diff options
Diffstat (limited to 'vp10/encoder/x86/ssim_opt_x86_64.asm')
-rw-r--r-- | vp10/encoder/x86/ssim_opt_x86_64.asm | 216 |
1 files changed, 0 insertions, 216 deletions
diff --git a/vp10/encoder/x86/ssim_opt_x86_64.asm b/vp10/encoder/x86/ssim_opt_x86_64.asm deleted file mode 100644 index b45f0095d..000000000 --- a/vp10/encoder/x86/ssim_opt_x86_64.asm +++ /dev/null @@ -1,216 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - -%include "vpx_ports/x86_abi_support.asm" - -; tabulate_ssim - sums sum_s,sum_r,sum_sq_s,sum_sq_r, sum_sxr -%macro TABULATE_SSIM 0 - paddusw xmm15, xmm3 ; sum_s - paddusw xmm14, xmm4 ; sum_r - movdqa xmm1, xmm3 - pmaddwd xmm1, xmm1 - paddd xmm13, xmm1 ; sum_sq_s - movdqa xmm2, xmm4 - pmaddwd xmm2, xmm2 - paddd xmm12, xmm2 ; sum_sq_r - pmaddwd xmm3, xmm4 - paddd xmm11, xmm3 ; sum_sxr -%endmacro - -; Sum across the register %1 starting with q words -%macro SUM_ACROSS_Q 1 - movdqa xmm2,%1 - punpckldq %1,xmm0 - punpckhdq xmm2,xmm0 - paddq %1,xmm2 - movdqa xmm2,%1 - punpcklqdq %1,xmm0 - punpckhqdq xmm2,xmm0 - paddq %1,xmm2 -%endmacro - -; Sum across the register %1 starting with q words -%macro SUM_ACROSS_W 1 - movdqa xmm1, %1 - punpcklwd %1,xmm0 - punpckhwd xmm1,xmm0 - paddd %1, xmm1 - SUM_ACROSS_Q %1 -%endmacro -;void ssim_parms_sse2( -; unsigned char *s, -; int sp, -; unsigned char *r, -; int rp -; unsigned long *sum_s, -; unsigned long *sum_r, -; unsigned long *sum_sq_s, -; unsigned long *sum_sq_r, -; unsigned long *sum_sxr); -; -; TODO: Use parm passing through structure, probably don't need the pxors -; ( calling app will initialize to 0 ) could easily fit everything in sse2 -; without too much hastle, and can probably do better estimates with psadw -; or pavgb At this point this is just meant to be first pass for calculating -; all the parms needed for 16x16 ssim so we can play with dssim as distortion -; in mode selection code. -global sym(vp10_ssim_parms_16x16_sse2) PRIVATE -sym(vp10_ssim_parms_16x16_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 9 - SAVE_XMM 15 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;s - mov rcx, arg(1) ;sp - mov rdi, arg(2) ;r - mov rax, arg(3) ;rp - - pxor xmm0, xmm0 - pxor xmm15,xmm15 ;sum_s - pxor xmm14,xmm14 ;sum_r - pxor xmm13,xmm13 ;sum_sq_s - pxor xmm12,xmm12 ;sum_sq_r - pxor xmm11,xmm11 ;sum_sxr - - mov rdx, 16 ;row counter -.NextRow: - - ;grab source and reference pixels - movdqu xmm5, [rsi] - movdqu xmm6, [rdi] - movdqa xmm3, xmm5 - movdqa xmm4, xmm6 - punpckhbw xmm3, xmm0 ; high_s - punpckhbw xmm4, xmm0 ; high_r - - TABULATE_SSIM - - movdqa xmm3, xmm5 - movdqa xmm4, xmm6 - punpcklbw xmm3, xmm0 ; low_s - punpcklbw xmm4, xmm0 ; low_r - - TABULATE_SSIM - - add rsi, rcx ; next s row - add rdi, rax ; next r row - - dec rdx ; counter - jnz .NextRow - - SUM_ACROSS_W xmm15 - SUM_ACROSS_W xmm14 - SUM_ACROSS_Q xmm13 - SUM_ACROSS_Q xmm12 - SUM_ACROSS_Q xmm11 - - mov rdi,arg(4) - movd [rdi], xmm15; - mov rdi,arg(5) - movd [rdi], xmm14; - mov rdi,arg(6) - movd [rdi], xmm13; - mov rdi,arg(7) - movd [rdi], xmm12; - mov rdi,arg(8) - movd [rdi], xmm11; - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void ssim_parms_sse2( -; unsigned char *s, -; int sp, -; unsigned char *r, -; int rp -; unsigned long *sum_s, -; unsigned long *sum_r, -; unsigned long *sum_sq_s, -; unsigned long *sum_sq_r, -; unsigned long *sum_sxr); -; -; TODO: Use parm passing through structure, probably don't need the pxors -; ( calling app will initialize to 0 ) could easily fit everything in sse2 -; without too much hastle, and can probably do better estimates with psadw -; or pavgb At this point this is just meant to be first pass for calculating -; all the parms needed for 16x16 ssim so we can play with dssim as distortion -; in mode selection code. -global sym(vp10_ssim_parms_8x8_sse2) PRIVATE -sym(vp10_ssim_parms_8x8_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 9 - SAVE_XMM 15 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;s - mov rcx, arg(1) ;sp - mov rdi, arg(2) ;r - mov rax, arg(3) ;rp - - pxor xmm0, xmm0 - pxor xmm15,xmm15 ;sum_s - pxor xmm14,xmm14 ;sum_r - pxor xmm13,xmm13 ;sum_sq_s - pxor xmm12,xmm12 ;sum_sq_r - pxor xmm11,xmm11 ;sum_sxr - - mov rdx, 8 ;row counter -.NextRow: - - ;grab source and reference pixels - movq xmm3, [rsi] - movq xmm4, [rdi] - punpcklbw xmm3, xmm0 ; low_s - punpcklbw xmm4, xmm0 ; low_r - - TABULATE_SSIM - - add rsi, rcx ; next s row - add rdi, rax ; next r row - - dec rdx ; counter - jnz .NextRow - - SUM_ACROSS_W xmm15 - SUM_ACROSS_W xmm14 - SUM_ACROSS_Q xmm13 - SUM_ACROSS_Q xmm12 - SUM_ACROSS_Q xmm11 - - mov rdi,arg(4) - movd [rdi], xmm15; - mov rdi,arg(5) - movd [rdi], xmm14; - mov rdi,arg(6) - movd [rdi], xmm13; - mov rdi,arg(7) - movd [rdi], xmm12; - mov rdi,arg(8) - movd [rdi], xmm11; - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret |