summaryrefslogtreecommitdiff
path: root/silk
diff options
context:
space:
mode:
authorTimothy B. Terriberry <tterribe@xiph.org>2013-05-19 17:11:17 -0700
committerTimothy B. Terriberry <tterribe@xiph.org>2013-05-19 19:12:51 -0700
commit972a34ec2c79d241318af24389b8ee042d10556a (patch)
tree18894d8e576d351923ed57aacbdec125919d3ba8 /silk
parentb7bd4c20acfd951ba46647e07411285997d952f4 (diff)
downloadopus-972a34ec2c79d241318af24389b8ee042d10556a.tar.gz
Add ARMv4/ARMv5E macros.
Original patch by Aurélien Zanelli <aurelien.zanelli@parrot.com>: http://lists.xiph.org/pipermail/opus/2013-May/002078.html Revised version: - Add autconf detection (ported from libtheora). - Rename ARM5E to ARMv5E (an ARM5 is not the same thing as ARMv5!). - Use actual macros so they can still be selectively overridden. - Split out ARMv4 parts and add a few more ARMv4 macros. - Label blocks to make them easy to find in generated assembly. - Fix MULT16_32_Q15() so we can pass make check. The MDCT test passes in values larger than 2**30 for b. The new version should be just as fast (or faster, since it's easier to merge the shift with following instructions), and there's no appreciable impact on accuracy (FFT/MDCT SNR actually goes up in most cases). - Fix register constraints. We were using early-clobber flags in a bunch of places that didn't need them, and commutative-pair flags in a bunch of places that weren't actually commutative. This was Jean-Marc's fault (the original code came from Speex). - Simplify silk_CLZ16(). - Port over iFFT C_MULC asm by Andree Buschmann <AndreeBuschmann@t-online.de> from Rockbox. - Speed up the C_MULC asm by using LDRD, allowing more flexible addressing, re-ordering instructions to avoid some stalls, allowing more flexible register allocation, and getting things out of the inline asm block so the compiler can schedule them better. - Add C_MUL and C_MUL4 asm for the FFT to the encoder based, on the new C_MULC. In total, this patch gives a 22.3% speed-up on test_opus_encoder on a 600 MHz Cortex A8 using gcc 4.2.1, When restricted to ARMv4 optimizations, it gives a 9.6% speed-up on the same processor/compiler. On the conformance test vectors: Average mono quality is 97.0583 % Average stereo quality is 97.775 %
Diffstat (limited to 'silk')
-rw-r--r--silk/SigProc_FIX.h8
-rw-r--r--silk/SigProc_FIX_armv4.h47
-rw-r--r--silk/SigProc_FIX_armv5e.h61
-rw-r--r--silk/macros.h8
-rw-r--r--silk/macros_armv4.h103
-rw-r--r--silk/macros_armv5e.h213
6 files changed, 440 insertions, 0 deletions
diff --git a/silk/SigProc_FIX.h b/silk/SigProc_FIX.h
index cf1ab362..cf9f30c0 100644
--- a/silk/SigProc_FIX.h
+++ b/silk/SigProc_FIX.h
@@ -576,6 +576,14 @@ static inline opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
#include "MacroCount.h"
#include "MacroDebug.h"
+#ifdef ARMv4_ASM
+#include "SigProc_FIX_armv4.h"
+#endif
+
+#ifdef ARMv5E_ASM
+#include "SigProc_FIX_armv5e.h"
+#endif
+
#ifdef __cplusplus
}
#endif
diff --git a/silk/SigProc_FIX_armv4.h b/silk/SigProc_FIX_armv4.h
new file mode 100644
index 00000000..ea372020
--- /dev/null
+++ b/silk/SigProc_FIX_armv4.h
@@ -0,0 +1,47 @@
+/***********************************************************************
+Copyright (C) 2013 Xiph.Org Foundation and contributors
+Copyright (c) 2013 Parrot
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_SIGPROC_FIX_ARMv4_H
+#define SILK_SIGPROC_FIX_ARMv4_H
+
+#undef silk_MLA
+static inline opus_int32 silk_MLA_armv4(opus_int32 a, opus_int32 b,
+ opus_int32 c)
+{
+ opus_int32 res;
+ __asm__(
+ "#silk_MLA\n\t"
+ "mla %0, %1, %2, %3\n\t"
+ : "=r"(res)
+ : "r"(b), "r"(c), "r"(a)
+ );
+ return res;
+}
+#define silk_MLA(a, b, c) (silk_MLA_armv4(a, b, c))
+
+#endif
diff --git a/silk/SigProc_FIX_armv5e.h b/silk/SigProc_FIX_armv5e.h
new file mode 100644
index 00000000..804e2bc5
--- /dev/null
+++ b/silk/SigProc_FIX_armv5e.h
@@ -0,0 +1,61 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Copyright (c) 2013 Parrot
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_SIGPROC_FIX_ARMv5E_H
+#define SILK_SIGPROC_FIX_ARMv5E_H
+
+#undef silk_SMULTT
+static inline opus_int32 silk_SMULTT_armv5e(opus_int32 a, opus_int32 b)
+{
+ opus_int32 res;
+ __asm__(
+ "#silk_SMULTT\n\t"
+ "smultt %0, %1, %2\n\t"
+ : "=r"(res)
+ : "r"(a), "r"(b)
+ );
+ return res;
+}
+#define silk_SMULTT(a, b) (silk_SMULTT_armv5e(a, b))
+
+#undef silk_SMLATT
+static inline opus_int32 silk_SMLATT_armv5e(opus_int32 a, opus_int32 b,
+ opus_int32 c)
+{
+ opus_int32 res;
+ __asm__(
+ "#silk_SMLATT\n\t"
+ "smlatt %0, %1, %2, %3\n\t"
+ : "=r"(res)
+ : "r"(b), "r"(c), "r"(a)
+ );
+ return res;
+}
+#define silk_SMLATT(a, b, c) (silk_SMLATT_armv5e(a, b, c))
+
+#endif
diff --git a/silk/macros.h b/silk/macros.h
index 31344cf4..6ac70057 100644
--- a/silk/macros.h
+++ b/silk/macros.h
@@ -134,5 +134,13 @@ static inline opus_int32 silk_CLZ32(opus_int32 in32)
(*((Matrix_base_adr) + ((row)+(M)*(column))))
#endif
+#ifdef ARMv4_ASM
+#include "macros_armv4.h"
+#endif
+
+#ifdef ARMv5E_ASM
+#include "macros_armv5e.h"
+#endif
+
#endif /* SILK_MACROS_H */
diff --git a/silk/macros_armv4.h b/silk/macros_armv4.h
new file mode 100644
index 00000000..e5dfe69c
--- /dev/null
+++ b/silk/macros_armv4.h
@@ -0,0 +1,103 @@
+/***********************************************************************
+Copyright (C) 2013 Xiph.Org Foundation and contributors.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_MACROS_ARMv4_H
+#define SILK_MACROS_ARMv4_H
+
+/* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */
+#undef silk_SMULWB
+static inline opus_int32 silk_SMULWB_armv4(opus_int32 a, opus_int16 b)
+{
+ unsigned rd_lo;
+ int rd_hi;
+ __asm__(
+ "#silk_SMULWB\n\t"
+ "smull %0, %1, %2, %3\n\t"
+ : "=r"(rd_lo), "=r"(rd_hi)
+ : "%r"(a), "r"(b<<16)
+ );
+ return rd_hi;
+}
+#define silk_SMULWB(a, b) (silk_SMULWB_armv4(a, b))
+
+/* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */
+#undef silk_SMLAWB
+#define silk_SMLAWB(a, b, c) ((a) + silk_SMULWB(b, c))
+
+/* (a32 * (b32 >> 16)) >> 16 */
+#undef silk_SMULWT
+static inline opus_int32 silk_SMULWT_armv4(opus_int32 a, opus_int32 b)
+{
+ unsigned rd_lo;
+ int rd_hi;
+ __asm__(
+ "#silk_SMULWT\n\t"
+ "smull %0, %1, %2, %3\n\t"
+ : "=r"(rd_lo), "=r"(rd_hi)
+ : "%r"(a), "r"(b&~0xFFFF)
+ );
+ return rd_hi;
+}
+#define silk_SMULWT(a, b) (silk_SMULWT_armv4(a, b))
+
+/* a32 + (b32 * (c32 >> 16)) >> 16 */
+#undef silk_SMLAWT
+#define silk_SMLAWT(a, b, c) ((a) + silk_SMULWT(b, c))
+
+/* (a32 * b32) >> 16 */
+#undef silk_SMULWW
+static inline opus_int32 silk_SMULWW_armv4(opus_int32 a, opus_int32 b)
+{
+ unsigned rd_lo;
+ int rd_hi;
+ __asm__(
+ "#silk_SMULWW\n\t"
+ "smull %0, %1, %2, %3\n\t"
+ : "=r"(rd_lo), "=r"(rd_hi)
+ : "%r"(a), "r"(b)
+ );
+ return (rd_lo>>16)|(rd_hi<<16);
+}
+#define silk_SMULWW(a, b) (silk_SMULWW_armv4(a, b))
+
+#undef silk_SMLAWW
+static inline opus_int32 silk_SMLAWW_armv4(opus_int32 a, opus_int32 b,
+ opus_int32 c)
+{
+ unsigned rd_lo;
+ int rd_hi;
+ __asm__(
+ "#silk_SMULWW\n\t"
+ "smull %0, %1, %2, %3\n\t"
+ : "=r"(rd_lo), "=r"(rd_hi)
+ : "%r"(b), "r"(c)
+ );
+ return a+((rd_lo>>16)|(rd_hi<<16));
+}
+#define silk_SMLAWW(a, b, c) (silk_SMLAWW_armv4(a, b, c))
+
+#endif /* SILK_MACROS_ARMv4_H */
diff --git a/silk/macros_armv5e.h b/silk/macros_armv5e.h
new file mode 100644
index 00000000..a86586b3
--- /dev/null
+++ b/silk/macros_armv5e.h
@@ -0,0 +1,213 @@
+/***********************************************************************
+Copyright (c) 2006-2011, Skype Limited. All rights reserved.
+Copyright (c) 2013 Parrot
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+- Neither the name of Internet Society, IETF or IETF Trust, nor the
+names of specific contributors, may be used to endorse or promote
+products derived from this software without specific prior written
+permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef SILK_MACROS_ARMv5E_H
+#define SILK_MACROS_ARMv5E_H
+
+/* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */
+#undef silk_SMULWB
+static inline opus_int32 silk_SMULWB_armv5e(opus_int32 a, opus_int16 b)
+{
+ int res;
+ __asm__(
+ "#silk_SMULWB\n\t"
+ "smulwb %0, %1, %2\n\t"
+ : "=r"(res)
+ : "r"(a), "r"(b)
+ );
+ return res;
+}
+#define silk_SMULWB(a, b) (silk_SMULWB_armv5e(a, b))
+
+/* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */
+#undef silk_SMLAWB
+static inline opus_int32 silk_SMLAWB_armv5e(opus_int32 a, opus_int32 b,
+ opus_int16 c)
+{
+ int res;
+ __asm__(
+ "#silk_SMLAWB\n\t"
+ "smlawb %0, %1, %2, %3\n\t"
+ : "=r"(res)
+ : "r"(b), "r"(c), "r"(a)
+ );
+ return res;
+}
+#define silk_SMLAWB(a, b, c) (silk_SMLAWB_armv5e(a, b, c))
+
+/* (a32 * (b32 >> 16)) >> 16 */
+#undef silk_SMULWT
+static inline opus_int32 silk_SMULWT_armv5e(opus_int32 a, opus_int32 b)
+{
+ int res;
+ __asm__(
+ "#silk_SMULWT\n\t"
+ "smulwt %0, %1, %2\n\t"
+ : "=r"(res)
+ : "r"(a), "r"(b)
+ );
+ return res;
+}
+#define silk_SMULWT(a, b) (silk_SMULWT_armv5e(a, b))
+
+/* a32 + (b32 * (c32 >> 16)) >> 16 */
+#undef silk_SMLAWT
+static inline opus_int32 silk_SMLAWT_armv5e(opus_int32 a, opus_int32 b,
+ opus_int32 c)
+{
+ int res;
+ __asm__(
+ "#silk_SMLAWT\n\t"
+ "smlawt %0, %1, %2, %3\n\t"
+ : "=r"(res)
+ : "r"(b), "r"(c), "r"(a)
+ );
+ return res;
+}
+#define silk_SMLAWT(a, b, c) (silk_SMLAWT_armv5e(a, b, c))
+
+/* (opus_int32)((opus_int16)(a3))) * (opus_int32)((opus_int16)(b32)) output have to be 32bit int */
+#undef silk_SMULBB
+static inline opus_int32 silk_SMULBB_armv5e(opus_int32 a, opus_int32 b)
+{
+ int res;
+ __asm__(
+ "#silk_SMULBB\n\t"
+ "smulbb %0, %1, %2\n\t"
+ : "=r"(res)
+ : "%r"(a), "r"(b)
+ );
+ return res;
+}
+#define silk_SMULBB(a, b) (silk_SMULBB_armv5e(a, b))
+
+/* a32 + (opus_int32)((opus_int16)(b32)) * (opus_int32)((opus_int16)(c32)) output have to be 32bit int */
+#undef silk_SMLABB
+static inline opus_int32 silk_SMLABB_armv5e(opus_int32 a, opus_int32 b,
+ opus_int32 c)
+{
+ int res;
+ __asm__(
+ "#silk_SMLABB\n\t"
+ "smlabb %0, %1, %2, %3\n\t"
+ : "=r"(res)
+ : "%r"(b), "r"(c), "r"(a)
+ );
+ return res;
+}
+#define silk_SMLABB(a, b, c) (silk_SMLABB_armv5e(a, b, c))
+
+/* (opus_int32)((opus_int16)(a32)) * (b32 >> 16) */
+#undef silk_SMULBT
+static inline opus_int32 silk_SMULBT_armv5e(opus_int32 a, opus_int32 b)
+{
+ int res;
+ __asm__(
+ "#silk_SMULBT\n\t"
+ "smulbt %0, %1, %2\n\t"
+ : "=r"(res)
+ : "r"(a), "r"(b)
+ );
+ return res;
+}
+#define silk_SMULBT(a, b) (silk_SMULBT_armv5e(a, b))
+
+/* a32 + (opus_int32)((opus_int16)(b32)) * (c32 >> 16) */
+#undef silk_SMLABT
+static inline opus_int32 silk_SMLABT_armv5e(opus_int32 a, opus_int32 b,
+ opus_int32 c)
+{
+ int res;
+ __asm__(
+ "#silk_SMLABT\n\t"
+ "smlabt %0, %1, %2, %3\n\t"
+ : "=r"(res)
+ : "r"(b), "r"(c), "r"(a)
+ );
+ return res;
+}
+#define silk_SMLABT(a, b, c) (silk_SMLABT_armv5e(a, b, c))
+
+/* add/subtract with output saturated */
+#undef silk_ADD_SAT32
+static inline opus_int32 silk_ADD_SAT32_armv5e(opus_int32 a, opus_int32 b)
+{
+ int res;
+ __asm__(
+ "#silk_ADD_SAT32\n\t"
+ "qadd %0, %1, %2\n\t"
+ : "=r"(res)
+ : "%r"(a), "r"(b)
+ );
+ return res;
+}
+#define silk_ADD_SAT32(a, b) (silk_ADD_SAT32_armv5e(a, b))
+
+#undef silk_SUB_SAT32
+static inline opus_int32 silk_SUB_SAT32_armv5e(opus_int32 a, opus_int32 b)
+{
+ int res;
+ __asm__(
+ "#silk_SUB_SAT32\n\t"
+ "qsub %0, %1, %2\n\t"
+ : "=r"(res)
+ : "r"(a), "r"(b)
+ );
+ return res;
+}
+#define silk_SUB_SAT32(a, b) (silk_SUB_SAT32_armv5e(a, b))
+
+#undef silk_CLZ16
+static inline opus_int32 silk_CLZ16_armv5(opus_int16 in16)
+{
+ int res;
+ __asm__(
+ "#silk_CLZ16\n\t"
+ "clz %0, %1;\n"
+ : "=r"(res)
+ : "r"(in16<<16|0x8000)
+ );
+ return res;
+}
+#define silk_CLZ16(in16) (silk_CLZ16_armv5(in16))
+
+#undef silk_CLZ32
+static inline opus_int32 silk_CLZ32_armv5(opus_int32 in32)
+{
+ int res;
+ __asm__(
+ "#silk_CLZ32\n\t"
+ "clz %0, %1\n\t"
+ : "=&r"(res)
+ : "r"(in32)
+ );
+ return res;
+}
+#define silk_CLZ32(in32) (silk_CLZ32_armv5(in32))
+
+#endif /* SILK_MACROS_ARMv5E_H */