diff options
author | Justin Ruggles <justin.ruggles@gmail.com> | 2012-05-21 12:58:41 -0400 |
---|---|---|
committer | Justin Ruggles <justin.ruggles@gmail.com> | 2012-06-08 13:14:38 -0400 |
commit | d5a7229ba4aabc2b6407c731d9175879ae54c5ea (patch) | |
tree | 0596aaae9622f0f7b7e5d689b20ab3dc82f59897 /libavcodec/arm | |
parent | 98db4e2a4e35ccc2406004216270ceaa1c6a7d00 (diff) | |
download | ffmpeg-d5a7229ba4aabc2b6407c731d9175879ae54c5ea.tar.gz |
Add a float DSP framework to libavutil
Move vector_fmul() from DSPContext to AVFloatDSPContext.
Diffstat (limited to 'libavcodec/arm')
-rw-r--r-- | libavcodec/arm/dsputil_init_neon.c | 2 | ||||
-rw-r--r-- | libavcodec/arm/dsputil_init_vfp.c | 7 | ||||
-rw-r--r-- | libavcodec/arm/dsputil_neon.S | 39 | ||||
-rw-r--r-- | libavcodec/arm/dsputil_vfp.S | 47 |
4 files changed, 0 insertions, 95 deletions
diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c index d3ef85048f..398326c8c8 100644 --- a/libavcodec/arm/dsputil_init_neon.c +++ b/libavcodec/arm/dsputil_init_neon.c @@ -150,7 +150,6 @@ void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int); void ff_vp3_v_loop_filter_neon(uint8_t *, int, int *); void ff_vp3_h_loop_filter_neon(uint8_t *, int, int *); -void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int len); void ff_vector_fmul_window_neon(float *dst, const float *src0, const float *src1, const float *win, int len); void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul, @@ -328,7 +327,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) c->vp3_idct_dc_add = ff_vp3_idct_dc_add_neon; } - c->vector_fmul = ff_vector_fmul_neon; c->vector_fmul_window = ff_vector_fmul_window_neon; c->vector_fmul_scalar = ff_vector_fmul_scalar_neon; c->vector_fmac_scalar = ff_vector_fmac_scalar_neon; diff --git a/libavcodec/arm/dsputil_init_vfp.c b/libavcodec/arm/dsputil_init_vfp.c index d5e2d3b4ef..d77d686578 100644 --- a/libavcodec/arm/dsputil_init_vfp.c +++ b/libavcodec/arm/dsputil_init_vfp.c @@ -18,20 +18,13 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "libavutil/arm/cpu.h" #include "libavcodec/dsputil.h" #include "dsputil_arm.h" -void ff_vector_fmul_vfp(float *dst, const float *src0, - const float *src1, int len); void ff_vector_fmul_reverse_vfp(float *dst, const float *src0, const float *src1, int len); void ff_dsputil_init_vfp(DSPContext* c, AVCodecContext *avctx) { - int cpu_flags = av_get_cpu_flags(); - - if (!have_vfpv3(cpu_flags)) - c->vector_fmul = ff_vector_fmul_vfp; c->vector_fmul_reverse = ff_vector_fmul_reverse_vfp; } diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/dsputil_neon.S index b2243c8ccb..9a5a40d6ac 100644 --- a/libavcodec/arm/dsputil_neon.S +++ b/libavcodec/arm/dsputil_neon.S @@ -534,45 +534,6 @@ function ff_add_pixels_clamped_neon, export=1 bx lr endfunc -function ff_vector_fmul_neon, export=1 - subs r3, r3, #8 - vld1.32 {d0-d3}, [r1,:128]! - vld1.32 {d4-d7}, [r2,:128]! - vmul.f32 q8, q0, q2 - vmul.f32 q9, q1, q3 - beq 3f - bics ip, r3, #15 - beq 2f -1: subs ip, ip, #16 - vld1.32 {d0-d1}, [r1,:128]! - vld1.32 {d4-d5}, [r2,:128]! - vmul.f32 q10, q0, q2 - vld1.32 {d2-d3}, [r1,:128]! - vld1.32 {d6-d7}, [r2,:128]! - vmul.f32 q11, q1, q3 - vst1.32 {d16-d19},[r0,:128]! - vld1.32 {d0-d1}, [r1,:128]! - vld1.32 {d4-d5}, [r2,:128]! - vmul.f32 q8, q0, q2 - vld1.32 {d2-d3}, [r1,:128]! - vld1.32 {d6-d7}, [r2,:128]! - vmul.f32 q9, q1, q3 - vst1.32 {d20-d23},[r0,:128]! - bne 1b - ands r3, r3, #15 - beq 3f -2: vld1.32 {d0-d1}, [r1,:128]! - vld1.32 {d4-d5}, [r2,:128]! - vst1.32 {d16-d17},[r0,:128]! - vmul.f32 q8, q0, q2 - vld1.32 {d2-d3}, [r1,:128]! - vld1.32 {d6-d7}, [r2,:128]! - vst1.32 {d18-d19},[r0,:128]! - vmul.f32 q9, q1, q3 -3: vst1.32 {d16-d19},[r0,:128]! - bx lr -endfunc - function ff_vector_fmul_window_neon, export=1 push {r4,r5,lr} ldr lr, [sp, #12] diff --git a/libavcodec/arm/dsputil_vfp.S b/libavcodec/arm/dsputil_vfp.S index 8d385c7e42..9df955dbf9 100644 --- a/libavcodec/arm/dsputil_vfp.S +++ b/libavcodec/arm/dsputil_vfp.S @@ -37,53 +37,6 @@ */ /** - * ARM VFP optimized implementation of 'vector_fmul_c' function. - * Assume that len is a positive number and is multiple of 8 - */ -@ void ff_vector_fmul_vfp(float *dst, const float *src0, const float *src1, int len) -function ff_vector_fmul_vfp, export=1 - vpush {d8-d15} - fmrx r12, fpscr - orr r12, r12, #(3 << 16) /* set vector size to 4 */ - fmxr fpscr, r12 - - vldmia r1!, {s0-s3} - vldmia r2!, {s8-s11} - vldmia r1!, {s4-s7} - vldmia r2!, {s12-s15} - vmul.f32 s8, s0, s8 -1: - subs r3, r3, #16 - vmul.f32 s12, s4, s12 - itttt ge - vldmiage r1!, {s16-s19} - vldmiage r2!, {s24-s27} - vldmiage r1!, {s20-s23} - vldmiage r2!, {s28-s31} - it ge - vmulge.f32 s24, s16, s24 - vstmia r0!, {s8-s11} - vstmia r0!, {s12-s15} - it ge - vmulge.f32 s28, s20, s28 - itttt gt - vldmiagt r1!, {s0-s3} - vldmiagt r2!, {s8-s11} - vldmiagt r1!, {s4-s7} - vldmiagt r2!, {s12-s15} - ittt ge - vmulge.f32 s8, s0, s8 - vstmiage r0!, {s24-s27} - vstmiage r0!, {s28-s31} - bgt 1b - - bic r12, r12, #(7 << 16) /* set vector size back to 1 */ - fmxr fpscr, r12 - vpop {d8-d15} - bx lr -endfunc - -/** * ARM VFP optimized implementation of 'vector_fmul_reverse_c' function. * Assume that len is a positive number and is multiple of 8 */ |