#include #include #include #include #include #include #if defined(__i386__) || defined(__amd64__) #define CACHELINE_SIZE 64 #else #error "unsupported architecture" #endif #if defined(__SSE__) && !defined(__SSE2__) #define NORDTSCP #define NOMFENCE #define NOCLFLUSH #define LATENCY 18 + 18 #endif #ifndef NORDTSCP #define LATENCY 42 + 42 #else #ifndef NOMFENCE #define LATENCY 18 + 18 #endif #endif #ifdef MASKING_MITIGATION /* From https://github.com/torvalds/linux/blob/cb6416592bc2a8b731dabcec0d63cda270764fc6/arch/x86/include/asm/barrier.h#L27 * * array_index_mask_nospec() - generate a mask that is ~0UL when the * bounds check succeeds and 0 otherwise * @index: array element index * @size: number of elements in array * * Returns: * 0 - (index < size) */ static inline unsigned long array_index_mask_nospec(unsigned long index, unsigned long size) { unsigned long mask; __asm__ __volatile__ ("cmp %1,%2; sbb %0,%0;" :"=r" (mask) :"g"(size),"r" (index) :"cc"); return mask; } #endif //MASKING_MITIGATION #ifdef NOCLFLUSH #define CACHE_FLUSH_ITERATIONS 2048 #define CACHE_FLUSH_STRIDE 4096 uint8_t cache_flush_array[CACHE_FLUSH_STRIDE * CACHE_FLUSH_ITERATIONS]; /* Flush memory using long SSE instructions */ void flush_memory_sse(uint8_t * addr) { float* p = (float *)addr; float c = 0.f; __m128 i = _mm_setr_ps(c, c, c, c); int k, l; /* Non-sequential memory addressing by looping through k by l */ for (k = 0; k < 4; k++) for (l = 0; l < 4; l++) _mm_stderr_ps(&p[(l * 4 + k) * 4], i); } #endif //NOCLFLUSH static inline unsigned timed_access(volatile uint8_t *addr) { uint64_t t0, t1; #pragma GCC diagnostic ignored "-Wuninitialized" unsigned int junk = junk; #ifndef NORDTSCP t0 = __rdtscp(& junk); junk |= *addr; t1 = __rdtscp(& junk); #else #ifndef NOMFENCE /* Since the rdstc instruction isn't serialized, newer processors will try to reorder it, ruining its value as a timing mechanism. To get around this, we use the mfence instruction to introduce a memory barrier and force serialization. mfence is used because it is portable across Intel and AMD. */ _mm_mfence(); t0 = __rdtsc(); _mm_mfence(); junk = *addr; _mm_mfence(); t1 = __rdtsc(); _mm_mfence(); #else /* The mfence instruction was introduced with the SSE2 instruction set, so we have to ifdef it out on pre-SSE2 processors. Luckily, these older processors don't seem to reorder the rdtsc instruction, so not having mfence on older processors is less of an issue. */ t0 = __rdtsc(); junk |= *addr; t1 = __rdtsc(); #endif // NOMFENCE #endif // NORDTSCP return (unsigned)(t1 - t0 - LATENCY); } static void calibrate_threshold(unsigned int *threshold) { volatile char buf[2 * CACHELINE_SIZE]; volatile uint8_t* bufp; int i; const int cnt = 10000; uint64_t tcache = 0; __attribute__((unused)) volatile int junk = 0; bufp = ((volatile void *)(((unsigned long)(buf) + CACHELINE_SIZE) & ~(CACHELINE_SIZE - 1))); junk |= *bufp; for (i = 0, tcache = 0; i < cnt; i++) { tcache += timed_access(bufp); } tcache = tcache / cnt; if (threshold != NULL) { *threshold = tcache + LATENCY; } return; }