diff options
Diffstat (limited to 'chromium/third_party/WebKit/Source/core/platform/audio/VectorMath.cpp')
-rw-r--r-- | chromium/third_party/WebKit/Source/core/platform/audio/VectorMath.cpp | 690 |
1 files changed, 0 insertions, 690 deletions
diff --git a/chromium/third_party/WebKit/Source/core/platform/audio/VectorMath.cpp b/chromium/third_party/WebKit/Source/core/platform/audio/VectorMath.cpp deleted file mode 100644 index d16f3c813cd..00000000000 --- a/chromium/third_party/WebKit/Source/core/platform/audio/VectorMath.cpp +++ /dev/null @@ -1,690 +0,0 @@ -/* - * Copyright (C) 2010, Google Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" - -#if ENABLE(WEB_AUDIO) - -#include "core/platform/audio/VectorMath.h" -#include "wtf/Assertions.h" -#include "wtf/CPU.h" -#include <stdint.h> - -#if OS(MACOSX) -#include <Accelerate/Accelerate.h> -#endif - -#ifdef __SSE2__ -#include <emmintrin.h> -#endif - -#if HAVE(ARM_NEON_INTRINSICS) -#include <arm_neon.h> -#endif - -#include <math.h> -#include <algorithm> - -namespace WebCore { - -namespace VectorMath { - -#if OS(MACOSX) -// On the Mac we use the highly optimized versions in Accelerate.framework -// In 32-bit mode (__ppc__ or __i386__) <Accelerate/Accelerate.h> includes <vecLib/vDSP_translate.h> which defines macros of the same name as -// our namespaced function names, so we must handle this case differently. Other architectures (64bit, ARM, etc.) do not include this header file. - -void vsmul(const float* sourceP, int sourceStride, const float* scale, float* destP, int destStride, size_t framesToProcess) -{ -#if CPU(X86) - ::vsmul(sourceP, sourceStride, scale, destP, destStride, framesToProcess); -#else - vDSP_vsmul(sourceP, sourceStride, scale, destP, destStride, framesToProcess); -#endif -} - -void vadd(const float* source1P, int sourceStride1, const float* source2P, int sourceStride2, float* destP, int destStride, size_t framesToProcess) -{ -#if CPU(X86) - ::vadd(source1P, sourceStride1, source2P, sourceStride2, destP, destStride, framesToProcess); -#else - vDSP_vadd(source1P, sourceStride1, source2P, sourceStride2, destP, destStride, framesToProcess); -#endif -} - -void vmul(const float* source1P, int sourceStride1, const float* source2P, int sourceStride2, float* destP, int destStride, size_t framesToProcess) -{ -#if CPU(X86) - ::vmul(source1P, sourceStride1, source2P, sourceStride2, destP, destStride, framesToProcess); -#else - vDSP_vmul(source1P, sourceStride1, source2P, sourceStride2, destP, destStride, framesToProcess); -#endif -} - -void zvmul(const float* real1P, const float* imag1P, const float* real2P, const float* imag2P, float* realDestP, float* imagDestP, size_t framesToProcess) -{ - DSPSplitComplex sc1; - DSPSplitComplex sc2; - DSPSplitComplex dest; - sc1.realp = const_cast<float*>(real1P); - sc1.imagp = const_cast<float*>(imag1P); - sc2.realp = const_cast<float*>(real2P); - sc2.imagp = const_cast<float*>(imag2P); - dest.realp = realDestP; - dest.imagp = imagDestP; -#if CPU(X86) - ::zvmul(&sc1, 1, &sc2, 1, &dest, 1, framesToProcess, 1); -#else - vDSP_zvmul(&sc1, 1, &sc2, 1, &dest, 1, framesToProcess, 1); -#endif -} - -void vsma(const float* sourceP, int sourceStride, const float* scale, float* destP, int destStride, size_t framesToProcess) -{ - vDSP_vsma(sourceP, sourceStride, scale, destP, destStride, destP, destStride, framesToProcess); -} - -void vmaxmgv(const float* sourceP, int sourceStride, float* maxP, size_t framesToProcess) -{ - vDSP_maxmgv(sourceP, sourceStride, maxP, framesToProcess); -} - -void vsvesq(const float* sourceP, int sourceStride, float* sumP, size_t framesToProcess) -{ - vDSP_svesq(const_cast<float*>(sourceP), sourceStride, sumP, framesToProcess); -} - -void vclip(const float* sourceP, int sourceStride, const float* lowThresholdP, const float* highThresholdP, float* destP, int destStride, size_t framesToProcess) -{ - vDSP_vclip(const_cast<float*>(sourceP), sourceStride, const_cast<float*>(lowThresholdP), const_cast<float*>(highThresholdP), destP, destStride, framesToProcess); -} -#else - -void vsma(const float* sourceP, int sourceStride, const float* scale, float* destP, int destStride, size_t framesToProcess) -{ - int n = framesToProcess; - -#ifdef __SSE2__ - if ((sourceStride == 1) && (destStride == 1)) { - float k = *scale; - - // If the sourceP address is not 16-byte aligned, the first several frames (at most three) should be processed separately. - while ((reinterpret_cast<uintptr_t>(sourceP) & 0x0F) && n) { - *destP += k * *sourceP; - sourceP++; - destP++; - n--; - } - - // Now the sourceP is aligned, use SSE. - int tailFrames = n % 4; - const float* endP = destP + n - tailFrames; - - __m128 pSource; - __m128 dest; - __m128 temp; - __m128 mScale = _mm_set_ps1(k); - - bool destAligned = !(reinterpret_cast<uintptr_t>(destP) & 0x0F); - -#define SSE2_MULT_ADD(loadInstr, storeInstr) \ - while (destP < endP) \ - { \ - pSource = _mm_load_ps(sourceP); \ - temp = _mm_mul_ps(pSource, mScale); \ - dest = _mm_##loadInstr##_ps(destP); \ - dest = _mm_add_ps(dest, temp); \ - _mm_##storeInstr##_ps(destP, dest); \ - sourceP += 4; \ - destP += 4; \ - } - - if (destAligned) - SSE2_MULT_ADD(load, store) - else - SSE2_MULT_ADD(loadu, storeu) - - n = tailFrames; - } -#elif HAVE(ARM_NEON_INTRINSICS) - if ((sourceStride == 1) && (destStride == 1)) { - int tailFrames = n % 4; - const float* endP = destP + n - tailFrames; - - float32x4_t k = vdupq_n_f32(*scale); - while (destP < endP) { - float32x4_t source = vld1q_f32(sourceP); - float32x4_t dest = vld1q_f32(destP); - - dest = vmlaq_f32(dest, source, k); - vst1q_f32(destP, dest); - - sourceP += 4; - destP += 4; - } - n = tailFrames; - } -#endif - while (n) { - *destP += *sourceP * *scale; - sourceP += sourceStride; - destP += destStride; - n--; - } -} - -void vsmul(const float* sourceP, int sourceStride, const float* scale, float* destP, int destStride, size_t framesToProcess) -{ - int n = framesToProcess; - -#ifdef __SSE2__ - if ((sourceStride == 1) && (destStride == 1)) { - float k = *scale; - - // If the sourceP address is not 16-byte aligned, the first several frames (at most three) should be processed separately. - while ((reinterpret_cast<size_t>(sourceP) & 0x0F) && n) { - *destP = k * *sourceP; - sourceP++; - destP++; - n--; - } - - // Now the sourceP address is aligned and start to apply SSE. - int group = n / 4; - __m128 mScale = _mm_set_ps1(k); - __m128* pSource; - __m128* pDest; - __m128 dest; - - - if (reinterpret_cast<size_t>(destP) & 0x0F) { - while (group--) { - pSource = reinterpret_cast<__m128*>(const_cast<float*>(sourceP)); - dest = _mm_mul_ps(*pSource, mScale); - _mm_storeu_ps(destP, dest); - - sourceP += 4; - destP += 4; - } - } else { - while (group--) { - pSource = reinterpret_cast<__m128*>(const_cast<float*>(sourceP)); - pDest = reinterpret_cast<__m128*>(destP); - *pDest = _mm_mul_ps(*pSource, mScale); - - sourceP += 4; - destP += 4; - } - } - - // Non-SSE handling for remaining frames which is less than 4. - n %= 4; - while (n) { - *destP = k * *sourceP; - sourceP++; - destP++; - n--; - } - } else { // If strides are not 1, rollback to normal algorithm. -#elif HAVE(ARM_NEON_INTRINSICS) - if ((sourceStride == 1) && (destStride == 1)) { - float k = *scale; - int tailFrames = n % 4; - const float* endP = destP + n - tailFrames; - - while (destP < endP) { - float32x4_t source = vld1q_f32(sourceP); - vst1q_f32(destP, vmulq_n_f32(source, k)); - - sourceP += 4; - destP += 4; - } - n = tailFrames; - } -#endif - float k = *scale; - while (n--) { - *destP = k * *sourceP; - sourceP += sourceStride; - destP += destStride; - } -#ifdef __SSE2__ - } -#endif -} - -void vadd(const float* source1P, int sourceStride1, const float* source2P, int sourceStride2, float* destP, int destStride, size_t framesToProcess) -{ - int n = framesToProcess; - -#ifdef __SSE2__ - if ((sourceStride1 ==1) && (sourceStride2 == 1) && (destStride == 1)) { - // If the sourceP address is not 16-byte aligned, the first several frames (at most three) should be processed separately. - while ((reinterpret_cast<size_t>(source1P) & 0x0F) && n) { - *destP = *source1P + *source2P; - source1P++; - source2P++; - destP++; - n--; - } - - // Now the source1P address is aligned and start to apply SSE. - int group = n / 4; - __m128* pSource1; - __m128* pSource2; - __m128* pDest; - __m128 source2; - __m128 dest; - - bool source2Aligned = !(reinterpret_cast<size_t>(source2P) & 0x0F); - bool destAligned = !(reinterpret_cast<size_t>(destP) & 0x0F); - - if (source2Aligned && destAligned) { // all aligned - while (group--) { - pSource1 = reinterpret_cast<__m128*>(const_cast<float*>(source1P)); - pSource2 = reinterpret_cast<__m128*>(const_cast<float*>(source2P)); - pDest = reinterpret_cast<__m128*>(destP); - *pDest = _mm_add_ps(*pSource1, *pSource2); - - source1P += 4; - source2P += 4; - destP += 4; - } - - } else if (source2Aligned && !destAligned) { // source2 aligned but dest not aligned - while (group--) { - pSource1 = reinterpret_cast<__m128*>(const_cast<float*>(source1P)); - pSource2 = reinterpret_cast<__m128*>(const_cast<float*>(source2P)); - dest = _mm_add_ps(*pSource1, *pSource2); - _mm_storeu_ps(destP, dest); - - source1P += 4; - source2P += 4; - destP += 4; - } - - } else if (!source2Aligned && destAligned) { // source2 not aligned but dest aligned - while (group--) { - pSource1 = reinterpret_cast<__m128*>(const_cast<float*>(source1P)); - source2 = _mm_loadu_ps(source2P); - pDest = reinterpret_cast<__m128*>(destP); - *pDest = _mm_add_ps(*pSource1, source2); - - source1P += 4; - source2P += 4; - destP += 4; - } - } else if (!source2Aligned && !destAligned) { // both source2 and dest not aligned - while (group--) { - pSource1 = reinterpret_cast<__m128*>(const_cast<float*>(source1P)); - source2 = _mm_loadu_ps(source2P); - dest = _mm_add_ps(*pSource1, source2); - _mm_storeu_ps(destP, dest); - - source1P += 4; - source2P += 4; - destP += 4; - } - } - - // Non-SSE handling for remaining frames which is less than 4. - n %= 4; - while (n) { - *destP = *source1P + *source2P; - source1P++; - source2P++; - destP++; - n--; - } - } else { // if strides are not 1, rollback to normal algorithm -#elif HAVE(ARM_NEON_INTRINSICS) - if ((sourceStride1 ==1) && (sourceStride2 == 1) && (destStride == 1)) { - int tailFrames = n % 4; - const float* endP = destP + n - tailFrames; - - while (destP < endP) { - float32x4_t source1 = vld1q_f32(source1P); - float32x4_t source2 = vld1q_f32(source2P); - vst1q_f32(destP, vaddq_f32(source1, source2)); - - source1P += 4; - source2P += 4; - destP += 4; - } - n = tailFrames; - } -#endif - while (n--) { - *destP = *source1P + *source2P; - source1P += sourceStride1; - source2P += sourceStride2; - destP += destStride; - } -#ifdef __SSE2__ - } -#endif -} - -void vmul(const float* source1P, int sourceStride1, const float* source2P, int sourceStride2, float* destP, int destStride, size_t framesToProcess) -{ - - int n = framesToProcess; - -#ifdef __SSE2__ - if ((sourceStride1 == 1) && (sourceStride2 == 1) && (destStride == 1)) { - // If the source1P address is not 16-byte aligned, the first several frames (at most three) should be processed separately. - while ((reinterpret_cast<uintptr_t>(source1P) & 0x0F) && n) { - *destP = *source1P * *source2P; - source1P++; - source2P++; - destP++; - n--; - } - - // Now the source1P address aligned and start to apply SSE. - int tailFrames = n % 4; - const float* endP = destP + n - tailFrames; - __m128 pSource1; - __m128 pSource2; - __m128 dest; - - bool source2Aligned = !(reinterpret_cast<uintptr_t>(source2P) & 0x0F); - bool destAligned = !(reinterpret_cast<uintptr_t>(destP) & 0x0F); - -#define SSE2_MULT(loadInstr, storeInstr) \ - while (destP < endP) \ - { \ - pSource1 = _mm_load_ps(source1P); \ - pSource2 = _mm_##loadInstr##_ps(source2P); \ - dest = _mm_mul_ps(pSource1, pSource2); \ - _mm_##storeInstr##_ps(destP, dest); \ - source1P += 4; \ - source2P += 4; \ - destP += 4; \ - } - - if (source2Aligned && destAligned) // Both aligned. - SSE2_MULT(load, store) - else if (source2Aligned && !destAligned) // Source2 is aligned but dest not. - SSE2_MULT(load, storeu) - else if (!source2Aligned && destAligned) // Dest is aligned but source2 not. - SSE2_MULT(loadu, store) - else // Neither aligned. - SSE2_MULT(loadu, storeu) - - n = tailFrames; - } -#elif HAVE(ARM_NEON_INTRINSICS) - if ((sourceStride1 ==1) && (sourceStride2 == 1) && (destStride == 1)) { - int tailFrames = n % 4; - const float* endP = destP + n - tailFrames; - - while (destP < endP) { - float32x4_t source1 = vld1q_f32(source1P); - float32x4_t source2 = vld1q_f32(source2P); - vst1q_f32(destP, vmulq_f32(source1, source2)); - - source1P += 4; - source2P += 4; - destP += 4; - } - n = tailFrames; - } -#endif - while (n) { - *destP = *source1P * *source2P; - source1P += sourceStride1; - source2P += sourceStride2; - destP += destStride; - n--; - } -} - -void zvmul(const float* real1P, const float* imag1P, const float* real2P, const float* imag2P, float* realDestP, float* imagDestP, size_t framesToProcess) -{ - unsigned i = 0; -#ifdef __SSE2__ - // Only use the SSE optimization in the very common case that all addresses are 16-byte aligned. - // Otherwise, fall through to the scalar code below. - if (!(reinterpret_cast<uintptr_t>(real1P) & 0x0F) - && !(reinterpret_cast<uintptr_t>(imag1P) & 0x0F) - && !(reinterpret_cast<uintptr_t>(real2P) & 0x0F) - && !(reinterpret_cast<uintptr_t>(imag2P) & 0x0F) - && !(reinterpret_cast<uintptr_t>(realDestP) & 0x0F) - && !(reinterpret_cast<uintptr_t>(imagDestP) & 0x0F)) { - - unsigned endSize = framesToProcess - framesToProcess % 4; - while (i < endSize) { - __m128 real1 = _mm_load_ps(real1P + i); - __m128 real2 = _mm_load_ps(real2P + i); - __m128 imag1 = _mm_load_ps(imag1P + i); - __m128 imag2 = _mm_load_ps(imag2P + i); - __m128 real = _mm_mul_ps(real1, real2); - real = _mm_sub_ps(real, _mm_mul_ps(imag1, imag2)); - __m128 imag = _mm_mul_ps(real1, imag2); - imag = _mm_add_ps(imag, _mm_mul_ps(imag1, real2)); - _mm_store_ps(realDestP + i, real); - _mm_store_ps(imagDestP + i, imag); - i += 4; - } - } -#elif HAVE(ARM_NEON_INTRINSICS) - unsigned endSize = framesToProcess - framesToProcess % 4; - while (i < endSize) { - float32x4_t real1 = vld1q_f32(real1P + i); - float32x4_t real2 = vld1q_f32(real2P + i); - float32x4_t imag1 = vld1q_f32(imag1P + i); - float32x4_t imag2 = vld1q_f32(imag2P + i); - - float32x4_t realResult = vmlsq_f32(vmulq_f32(real1, real2), imag1, imag2); - float32x4_t imagResult = vmlaq_f32(vmulq_f32(real1, imag2), imag1, real2); - - vst1q_f32(realDestP + i, realResult); - vst1q_f32(imagDestP + i, imagResult); - - i += 4; - } -#endif - for (; i < framesToProcess; ++i) { - // Read and compute result before storing them, in case the - // destination is the same as one of the sources. - float realResult = real1P[i] * real2P[i] - imag1P[i] * imag2P[i]; - float imagResult = real1P[i] * imag2P[i] + imag1P[i] * real2P[i]; - - realDestP[i] = realResult; - imagDestP[i] = imagResult; - } -} - -void vsvesq(const float* sourceP, int sourceStride, float* sumP, size_t framesToProcess) -{ - int n = framesToProcess; - float sum = 0; - -#ifdef __SSE2__ - if (sourceStride == 1) { - // If the sourceP address is not 16-byte aligned, the first several frames (at most three) should be processed separately. - while ((reinterpret_cast<uintptr_t>(sourceP) & 0x0F) && n) { - float sample = *sourceP; - sum += sample * sample; - sourceP++; - n--; - } - - // Now the sourceP is aligned, use SSE. - int tailFrames = n % 4; - const float* endP = sourceP + n - tailFrames; - __m128 source; - __m128 mSum = _mm_setzero_ps(); - - while (sourceP < endP) { - source = _mm_load_ps(sourceP); - source = _mm_mul_ps(source, source); - mSum = _mm_add_ps(mSum, source); - sourceP += 4; - } - - // Summarize the SSE results. - const float* groupSumP = reinterpret_cast<float*>(&mSum); - sum += groupSumP[0] + groupSumP[1] + groupSumP[2] + groupSumP[3]; - - n = tailFrames; - } -#elif HAVE(ARM_NEON_INTRINSICS) - if (sourceStride == 1) { - int tailFrames = n % 4; - const float* endP = sourceP + n - tailFrames; - - float32x4_t fourSum = vdupq_n_f32(0); - while (sourceP < endP) { - float32x4_t source = vld1q_f32(sourceP); - fourSum = vmlaq_f32(fourSum, source, source); - sourceP += 4; - } - float32x2_t twoSum = vadd_f32(vget_low_f32(fourSum), vget_high_f32(fourSum)); - - float groupSum[2]; - vst1_f32(groupSum, twoSum); - sum += groupSum[0] + groupSum[1]; - - n = tailFrames; - } -#endif - - while (n--) { - float sample = *sourceP; - sum += sample * sample; - sourceP += sourceStride; - } - - ASSERT(sumP); - *sumP = sum; -} - -void vmaxmgv(const float* sourceP, int sourceStride, float* maxP, size_t framesToProcess) -{ - int n = framesToProcess; - float max = 0; - -#ifdef __SSE2__ - if (sourceStride == 1) { - // If the sourceP address is not 16-byte aligned, the first several frames (at most three) should be processed separately. - while ((reinterpret_cast<uintptr_t>(sourceP) & 0x0F) && n) { - max = std::max(max, fabsf(*sourceP)); - sourceP++; - n--; - } - - // Now the sourceP is aligned, use SSE. - int tailFrames = n % 4; - const float* endP = sourceP + n - tailFrames; - __m128 source; - __m128 mMax = _mm_setzero_ps(); - int mask = 0x7FFFFFFF; - __m128 mMask = _mm_set1_ps(*reinterpret_cast<float*>(&mask)); - - while (sourceP < endP) { - source = _mm_load_ps(sourceP); - // Calculate the absolute value by anding source with mask, the sign bit is set to 0. - source = _mm_and_ps(source, mMask); - mMax = _mm_max_ps(mMax, source); - sourceP += 4; - } - - // Get max from the SSE results. - const float* groupMaxP = reinterpret_cast<float*>(&mMax); - max = std::max(max, groupMaxP[0]); - max = std::max(max, groupMaxP[1]); - max = std::max(max, groupMaxP[2]); - max = std::max(max, groupMaxP[3]); - - n = tailFrames; - } -#elif HAVE(ARM_NEON_INTRINSICS) - if (sourceStride == 1) { - int tailFrames = n % 4; - const float* endP = sourceP + n - tailFrames; - - float32x4_t fourMax = vdupq_n_f32(0); - while (sourceP < endP) { - float32x4_t source = vld1q_f32(sourceP); - fourMax = vmaxq_f32(fourMax, vabsq_f32(source)); - sourceP += 4; - } - float32x2_t twoMax = vmax_f32(vget_low_f32(fourMax), vget_high_f32(fourMax)); - - float groupMax[2]; - vst1_f32(groupMax, twoMax); - max = std::max(groupMax[0], groupMax[1]); - - n = tailFrames; - } -#endif - - while (n--) { - max = std::max(max, fabsf(*sourceP)); - sourceP += sourceStride; - } - - ASSERT(maxP); - *maxP = max; -} - -void vclip(const float* sourceP, int sourceStride, const float* lowThresholdP, const float* highThresholdP, float* destP, int destStride, size_t framesToProcess) -{ - int n = framesToProcess; - float lowThreshold = *lowThresholdP; - float highThreshold = *highThresholdP; - - // FIXME: Optimize for SSE2. -#if HAVE(ARM_NEON_INTRINSICS) - if ((sourceStride == 1) && (destStride == 1)) { - int tailFrames = n % 4; - const float* endP = destP + n - tailFrames; - - float32x4_t low = vdupq_n_f32(lowThreshold); - float32x4_t high = vdupq_n_f32(highThreshold); - while (destP < endP) { - float32x4_t source = vld1q_f32(sourceP); - vst1q_f32(destP, vmaxq_f32(vminq_f32(source, high), low)); - sourceP += 4; - destP += 4; - } - n = tailFrames; - } -#endif - while (n--) { - *destP = std::max(std::min(*sourceP, highThreshold), lowThreshold); - sourceP += sourceStride; - destP += destStride; - } -} - -#endif // OS(MACOSX) - -} // namespace VectorMath - -} // namespace WebCore - -#endif // ENABLE(WEB_AUDIO) |