summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPascal Massimino <pascal.massimino@gmail.com>2017-01-13 07:01:48 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2017-01-13 07:01:48 +0000
commit8fda56126efec2894503df26f38c50521e8a1afb (patch)
tree8f7ca423e6249c33d36e67ccbb68922479ca6abb
parent7c2779e95a0e03b5d108f7adc44f02c64f8aaa56 (diff)
parent86bbd2455226c632c1dfd0577e5daf40e4b32caf (diff)
downloadlibwebp-8fda56126efec2894503df26f38c50521e8a1afb.tar.gz
Merge "add a kSlowSSSE3 feature for CPUInfo"
-rw-r--r--src/dsp/cpu.c45
-rw-r--r--src/dsp/dsp.h1
2 files changed, 41 insertions, 5 deletions
diff --git a/src/dsp/cpu.c b/src/dsp/cpu.c
index cbb08db9..b5583b6e 100644
--- a/src/dsp/cpu.c
+++ b/src/dsp/cpu.c
@@ -95,26 +95,62 @@ static WEBP_INLINE uint64_t xgetbv(void) {
#endif
#if defined(__i386__) || defined(__x86_64__) || defined(WEBP_MSC_SSE2)
+
+// helper function for run-time detection of slow SSSE3 platforms
+static int CheckSlowModel(int info) {
+ // Table listing display models with longer latencies for the bsr instruction
+ // (ie 2 cycles vs 10/16 cycles) and some SSSE3 instructions like pshufb.
+ // Refer to Intel 64 and IA-32 Architectures Optimization Reference Manual.
+ static const uint8_t kSlowModels[] = {
+ 0x37, 0x4a, 0x4d, // Silvermont Microarchitecture
+ 0x1c, 0x26, 0x27 // Atom Microarchitecture
+ };
+ const uint32_t model = ((info & 0xf0000) >> 12) | ((info >> 4) & 0xf);
+ const uint32_t family = (info >> 8) & 0xf;
+ if (family == 0x06) {
+ size_t i;
+ for (i = 0; i < sizeof(kSlowModels) / sizeof(kSlowModels[0]); ++i) {
+ if (model == kSlowModels[i]) return 1;
+ }
+ }
+ return 0;
+}
+
static int x86CPUInfo(CPUFeature feature) {
int max_cpuid_value;
int cpu_info[4];
+ int is_intel = 0;
// get the highest feature value cpuid supports
GetCPUInfo(cpu_info, 0);
max_cpuid_value = cpu_info[0];
if (max_cpuid_value < 1) {
return 0;
+ } else {
+ const int VENDOR_ID_INTEL_EBX = 0x756e6547; // uneG
+ const int VENDOR_ID_INTEL_EDX = 0x49656e69; // Ieni
+ const int VENDOR_ID_INTEL_ECX = 0x6c65746e; // letn
+ is_intel = (cpu_info[1] == VENDOR_ID_INTEL_EBX &&
+ cpu_info[2] == VENDOR_ID_INTEL_ECX &&
+ cpu_info[3] == VENDOR_ID_INTEL_EDX); // genuine Intel?
}
GetCPUInfo(cpu_info, 1);
if (feature == kSSE2) {
- return 0 != (cpu_info[3] & 0x04000000);
+ return !!(cpu_info[3] & (1 << 26));
}
if (feature == kSSE3) {
- return 0 != (cpu_info[2] & 0x00000001);
+ return !!(cpu_info[2] & (1 << 0));
+ }
+ if (feature == kSlowSSSE3) {
+ if (is_intel && (cpu_info[2] & (1 << 0))) { // SSSE3?
+ return CheckSlowModel(cpu_info[0]);
+ }
+ return 0;
}
+
if (feature == kSSE4_1) {
- return 0 != (cpu_info[2] & 0x00080000);
+ return !!(cpu_info[2] & (1 << 19));
}
if (feature == kAVX) {
// bits 27 (OSXSAVE) & 28 (256-bit AVX)
@@ -126,7 +162,7 @@ static int x86CPUInfo(CPUFeature feature) {
if (feature == kAVX2) {
if (x86CPUInfo(kAVX) && max_cpuid_value >= 7) {
GetCPUInfo(cpu_info, 7);
- return ((cpu_info[1] & 0x00000020) == 0x00000020);
+ return !!(cpu_info[1] & (1 << 5));
}
}
return 0;
@@ -184,4 +220,3 @@ VP8CPUInfo VP8GetCPUInfo = mipsCPUInfo;
#else
VP8CPUInfo VP8GetCPUInfo = NULL;
#endif
-
diff --git a/src/dsp/dsp.h b/src/dsp/dsp.h
index e0fb4e5d..d6d50e77 100644
--- a/src/dsp/dsp.h
+++ b/src/dsp/dsp.h
@@ -132,6 +132,7 @@ extern "C" {
typedef enum {
kSSE2,
kSSE3,
+ kSlowSSSE3, // special feature for slow SSSE3 architectures
kSSE4_1,
kAVX,
kAVX2,