# 跳过缓存行 CLFLUSH

_mm_flush

asm
CCEH实现,加了pause增加延迟

1
2
3
4
5
6
7
8
9
10
11
inline void clflush(char* data, size_t len) {
  volatile char *ptr = (char*)((unsigned long)data & (~(kCacheLineSize-1)));
  mfence();
  for (; ptr < data+len; ptr+=kCacheLineSize) {
    unsigned long etcs = ReadTSC() + (unsigned long) (kWriteLatencyInNS*CPU_FREQ_MHZ/1000);
    asm volatile("clflush %0" : "+m" (*(volatile char*)ptr));
    while (ReadTSC() < etcs) CPUPause();
    clflushCount++;
  }
  mfence();
}

Pacman实现

1
2
3
4
5
6
7
static inline void clflush(const void *data, int len) {
volatile char *ptr = (char *)((unsigned long)data & ~(CACHE_LINE_SIZE - 1));
for (; ptr < (char *)data + len; ptr += CACHE_LINE_SIZE) {
asm volatile("clflush %0" : "+m"(*(volatile char *)ptr));
}
sfence();
}

三种实现方法

1
2
3
4
5
6
7
#ifdef CLFLUSH
asm volatile("clflush %0" : "+m" (*(volatile char *)ptr));
#elif CLFLUSH_OPT
asm volatile(".byte 0x66; clflush %0" : "+m" (*(volatile char *)(ptr)));
#elif CLWB
asm volatile(".byte 0x66; xsaveopt %0" : "+m" (*(volatile char *)(ptr)));
#endif

分别是clflush 乱序的clflushopt 缓存行不失效的写回