diff options
author | Clément Bœsch <u@pkh.me> | 2014-08-23 20:03:10 +0200 |
---|---|---|
committer | Clément Bœsch <u@pkh.me> | 2014-08-23 20:12:56 +0200 |
commit | 554d8190624f25cefe079bd7b9ad61a2ade8541a (patch) | |
tree | f4c2d420952d1d6780c14ee962f588028d8b58ab | |
parent | f4dec0dba0faa46758554ff6229a57c0bf833404 (diff) | |
download | ffmpeg-554d8190624f25cefe079bd7b9ad61a2ade8541a.tar.gz |
avutil/pixelutils: faster pixelutils_sad_16x16
501 to 439 decicycles.
See 45c7f3997ea11c3d1007b2126b1c0049a8c27105.
-rw-r--r-- | libavutil/x86/pixelutils.asm | 16 |
1 files changed, 11 insertions, 5 deletions
diff --git a/libavutil/x86/pixelutils.asm b/libavutil/x86/pixelutils.asm index 15213d92d8..7522f24a42 100644 --- a/libavutil/x86/pixelutils.asm +++ b/libavutil/x86/pixelutils.asm @@ -109,18 +109,24 @@ cglobal pixelutils_sad_16x16, 4,4,0, src1, stride1, src2, stride2 ;------------------------------------------------------------------------------- INIT_XMM sse2 cglobal pixelutils_sad_16x16, 4,4,5, src1, stride1, src2, stride2 - pxor m4, m4 -%rep 8 - movu m0, [src1q] + movu m4, [src1q] + movu m2, [src2q] movu m1, [src1q + stride1q] + movu m3, [src2q + stride2q] + psadbw m4, m2 + psadbw m1, m3 + paddw m4, m1 +%rep 7 + lea src1q, [src1q + 2*stride1q] + lea src2q, [src2q + 2*stride2q] + movu m0, [src1q] movu m2, [src2q] + movu m1, [src1q + stride1q] movu m3, [src2q + stride2q] psadbw m0, m2 psadbw m1, m3 paddw m4, m0 paddw m4, m1 - lea src1q, [src1q + 2*stride1q] - lea src2q, [src2q + 2*stride2q] %endrep movhlps m0, m4 paddw m4, m0 |