summaryrefslogtreecommitdiff
path: root/sbc/sbc_primitives_mmx.c
diff options
context:
space:
mode:
authorSiarhei Siamashka <siarhei.siamashka@nokia.com>2010-06-29 16:48:46 +0300
committerJohan Hedberg <johan.hedberg@nokia.com>2010-06-30 10:30:12 +0300
commit95465b816f0ce7f0ec10a183ce7ff0c6f83d86eb (patch)
tree689c0bdec41b05ef36d36941c7142aca72f1e3d8 /sbc/sbc_primitives_mmx.c
parentb26b60a734d63d878194018fd5b8291e67dc6d66 (diff)
downloadbluez-95465b816f0ce7f0ec10a183ce7ff0c6f83d86eb.tar.gz
sbc: MMX optimization for scale factors calculation
Improves SBC encoding performance when joint stereo is not used. Benchmarked on Pentium-M: == Before: == $ time ./sbcenc -b53 -s8 test.au > /dev/null real 0m1.439s user 0m1.336s sys 0m0.104s samples % image name symbol name 8642 33.7473 sbcenc sbc_pack_frame 5873 22.9342 sbcenc sbc_analyze_4b_8s_mmx 4435 17.3188 sbcenc sbc_calc_scalefactors 4285 16.7331 sbcenc sbc_calculate_bits 1942 7.5836 sbcenc sbc_enc_process_input_8s_be 322 1.2574 sbcenc sbc_encode == After: == $ time ./sbcenc -b53 -s8 test.au > /dev/null real 0m1.319s user 0m1.220s sys 0m0.084s samples % image name symbol name 8706 37.9959 sbcenc sbc_pack_frame 5740 25.0513 sbcenc sbc_analyze_4b_8s_mmx 4307 18.7972 sbcenc sbc_calculate_bits 1937 8.4537 sbcenc sbc_enc_process_input_8s_be 1801 7.8602 sbcenc sbc_calc_scalefactors_mmx 307 1.3399 sbcenc sbc_encode
Diffstat (limited to 'sbc/sbc_primitives_mmx.c')
-rw-r--r--sbc/sbc_primitives_mmx.c54
1 files changed, 54 insertions, 0 deletions
diff --git a/sbc/sbc_primitives_mmx.c b/sbc/sbc_primitives_mmx.c
index e6900bcb3..45c62ac89 100644
--- a/sbc/sbc_primitives_mmx.c
+++ b/sbc/sbc_primitives_mmx.c
@@ -276,6 +276,59 @@ static inline void sbc_analyze_4b_8s_mmx(int16_t *x, int32_t *out,
asm volatile ("emms\n");
}
+static void sbc_calc_scalefactors_mmx(
+ int32_t sb_sample_f[16][2][8],
+ uint32_t scale_factor[2][8],
+ int blocks, int channels, int subbands)
+{
+ static const SBC_ALIGNED int32_t consts[2] = {
+ 1 << SCALE_OUT_BITS,
+ 1 << SCALE_OUT_BITS,
+ };
+ int ch, sb;
+ intptr_t blk;
+ for (ch = 0; ch < channels; ch++) {
+ for (sb = 0; sb < subbands; sb += 2) {
+ blk = (blocks - 1) * (((char *) &sb_sample_f[1][0][0] -
+ (char *) &sb_sample_f[0][0][0]));
+ asm volatile (
+ "movq (%4), %%mm0\n"
+ "1:\n"
+ "movq (%1, %0), %%mm1\n"
+ "pxor %%mm2, %%mm2\n"
+ "pcmpgtd %%mm2, %%mm1\n"
+ "paddd (%1, %0), %%mm1\n"
+ "pcmpgtd %%mm1, %%mm2\n"
+ "pxor %%mm2, %%mm1\n"
+
+ "por %%mm1, %%mm0\n"
+
+ "sub %2, %0\n"
+ "jns 1b\n"
+
+ "movd %%mm0, %k0\n"
+ "psrlq $32, %%mm0\n"
+ "bsrl %k0, %k0\n"
+ "subl %5, %k0\n"
+ "movl %k0, (%3)\n"
+
+ "movd %%mm0, %k0\n"
+ "bsrl %k0, %k0\n"
+ "subl %5, %k0\n"
+ "movl %k0, 4(%3)\n"
+ : "+r" (blk)
+ : "r" (&sb_sample_f[0][ch][sb]),
+ "i" ((char *) &sb_sample_f[1][0][0] -
+ (char *) &sb_sample_f[0][0][0]),
+ "r" (&scale_factor[ch][sb]),
+ "r" (&consts),
+ "i" (SCALE_OUT_BITS)
+ : "memory");
+ }
+ }
+ asm volatile ("emms\n");
+}
+
static int check_mmx_support(void)
{
#ifdef __amd64__
@@ -314,6 +367,7 @@ void sbc_init_primitives_mmx(struct sbc_encoder_state *state)
if (check_mmx_support()) {
state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_mmx;
state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_mmx;
+ state->sbc_calc_scalefactors = sbc_calc_scalefactors_mmx;
state->implementation_info = "MMX";
}
}