diff options
author | Pascal Massimino <pascal.massimino@gmail.com> | 2017-01-13 07:01:48 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2017-01-13 07:01:48 +0000 |
commit | 8fda56126efec2894503df26f38c50521e8a1afb (patch) | |
tree | 8f7ca423e6249c33d36e67ccbb68922479ca6abb | |
parent | 7c2779e95a0e03b5d108f7adc44f02c64f8aaa56 (diff) | |
parent | 86bbd2455226c632c1dfd0577e5daf40e4b32caf (diff) | |
download | libwebp-8fda56126efec2894503df26f38c50521e8a1afb.tar.gz |
Merge "add a kSlowSSSE3 feature for CPUInfo"
-rw-r--r-- | src/dsp/cpu.c | 45 | ||||
-rw-r--r-- | src/dsp/dsp.h | 1 |
2 files changed, 41 insertions, 5 deletions
diff --git a/src/dsp/cpu.c b/src/dsp/cpu.c index cbb08db9..b5583b6e 100644 --- a/src/dsp/cpu.c +++ b/src/dsp/cpu.c @@ -95,26 +95,62 @@ static WEBP_INLINE uint64_t xgetbv(void) { #endif #if defined(__i386__) || defined(__x86_64__) || defined(WEBP_MSC_SSE2) + +// helper function for run-time detection of slow SSSE3 platforms +static int CheckSlowModel(int info) { + // Table listing display models with longer latencies for the bsr instruction + // (ie 2 cycles vs 10/16 cycles) and some SSSE3 instructions like pshufb. + // Refer to Intel 64 and IA-32 Architectures Optimization Reference Manual. + static const uint8_t kSlowModels[] = { + 0x37, 0x4a, 0x4d, // Silvermont Microarchitecture + 0x1c, 0x26, 0x27 // Atom Microarchitecture + }; + const uint32_t model = ((info & 0xf0000) >> 12) | ((info >> 4) & 0xf); + const uint32_t family = (info >> 8) & 0xf; + if (family == 0x06) { + size_t i; + for (i = 0; i < sizeof(kSlowModels) / sizeof(kSlowModels[0]); ++i) { + if (model == kSlowModels[i]) return 1; + } + } + return 0; +} + static int x86CPUInfo(CPUFeature feature) { int max_cpuid_value; int cpu_info[4]; + int is_intel = 0; // get the highest feature value cpuid supports GetCPUInfo(cpu_info, 0); max_cpuid_value = cpu_info[0]; if (max_cpuid_value < 1) { return 0; + } else { + const int VENDOR_ID_INTEL_EBX = 0x756e6547; // uneG + const int VENDOR_ID_INTEL_EDX = 0x49656e69; // Ieni + const int VENDOR_ID_INTEL_ECX = 0x6c65746e; // letn + is_intel = (cpu_info[1] == VENDOR_ID_INTEL_EBX && + cpu_info[2] == VENDOR_ID_INTEL_ECX && + cpu_info[3] == VENDOR_ID_INTEL_EDX); // genuine Intel? } GetCPUInfo(cpu_info, 1); if (feature == kSSE2) { - return 0 != (cpu_info[3] & 0x04000000); + return !!(cpu_info[3] & (1 << 26)); } if (feature == kSSE3) { - return 0 != (cpu_info[2] & 0x00000001); + return !!(cpu_info[2] & (1 << 0)); + } + if (feature == kSlowSSSE3) { + if (is_intel && (cpu_info[2] & (1 << 0))) { // SSSE3? + return CheckSlowModel(cpu_info[0]); + } + return 0; } + if (feature == kSSE4_1) { - return 0 != (cpu_info[2] & 0x00080000); + return !!(cpu_info[2] & (1 << 19)); } if (feature == kAVX) { // bits 27 (OSXSAVE) & 28 (256-bit AVX) @@ -126,7 +162,7 @@ static int x86CPUInfo(CPUFeature feature) { if (feature == kAVX2) { if (x86CPUInfo(kAVX) && max_cpuid_value >= 7) { GetCPUInfo(cpu_info, 7); - return ((cpu_info[1] & 0x00000020) == 0x00000020); + return !!(cpu_info[1] & (1 << 5)); } } return 0; @@ -184,4 +220,3 @@ VP8CPUInfo VP8GetCPUInfo = mipsCPUInfo; #else VP8CPUInfo VP8GetCPUInfo = NULL; #endif - diff --git a/src/dsp/dsp.h b/src/dsp/dsp.h index e0fb4e5d..d6d50e77 100644 --- a/src/dsp/dsp.h +++ b/src/dsp/dsp.h @@ -132,6 +132,7 @@ extern "C" { typedef enum { kSSE2, kSSE3, + kSlowSSSE3, // special feature for slow SSSE3 architectures kSSE4_1, kAVX, kAVX2, |