diff options
author | Michael Jennings <mej@kainx.org> | 2000-05-04 07:22:07 +0000 |
---|---|---|
committer | Michael Jennings <mej@kainx.org> | 2000-05-04 07:22:07 +0000 |
commit | dda8ccf31f10240ffc682212a0fafe9b3e307e23 (patch) | |
tree | d234743ab0cfc3d5efade1d8b1f840f845cb383a /src/mmx_cmod.S | |
parent | e6facd5833ffa97a6728448a0f413d43f323722c (diff) | |
download | eterm-dda8ccf31f10240ffc682212a0fafe9b3e307e23.tar.gz |
Thu May 4 00:32:45 PDT 2000 Michael Jennings <mej@eterm.org>
Added new shade/tint routines, including some done in MMX assembly, by
Willem Monsuwe <willem@stack.nl>. Thanks Willem! These should prove
to be faster than the old stuff, probably by quite a bit.
SVN revision: 2595
Diffstat (limited to 'src/mmx_cmod.S')
-rw-r--r-- | src/mmx_cmod.S | 482 |
1 files changed, 482 insertions, 0 deletions
diff --git a/src/mmx_cmod.S b/src/mmx_cmod.S new file mode 100644 index 0000000..f875062 --- /dev/null +++ b/src/mmx_cmod.S @@ -0,0 +1,482 @@ +/* + * Copyright (C) 1997-2000, Michael Jennings + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies of the Software, its documentation and marketing & publicity + * materials, and acknowledgment shall be given in the documentation, materials + * and software packages that this Software was used. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "config.h" + +/* MMX routines for tinting XImages written by Willem Monsuwe <willem@stack.nl> */ + +/* Function calling conventions: + * shade_ximage_xx(void *data, int bpl, int w, int h, int rm, int gm, int bm); + */ + +#ifdef HAVE_MMX + +#define data 8(%ebp) +#define bpl 12(%ebp) +#define w 16(%ebp) +#define h 20(%ebp) +#define rm 24(%ebp) +#define gm 28(%ebp) +#define bm 32(%ebp) + +.global shade_ximage_15_mmx + .type shade_ximage_15_mmx,@function +.global shade_ximage_16_mmx + .type shade_ximage_16_mmx,@function +.global shade_ximage_32_mmx + .type shade_ximage_32_mmx,@function + +.bss +.text +.align 8 + +#define ENTER \ + pushl %ebp ;\ + movl %esp, %ebp ;\ + pushl %ebx ;\ + pushl %ecx ;\ + pushl %edx ;\ + pushl %edi ;\ + pushl %esi ;\ + movl data, %esi ;\ + movl w, %ebx ;\ + movl h, %edx + +#define LEAVE \ +4: ;\ + emms ;\ + popl %esi ;\ + popl %edi ;\ + popl %edx ;\ + popl %ecx ;\ + popl %ebx ;\ + movl %ebp, %esp ;\ + popl %ebp ;\ + ret + + +shade_ximage_15_mmx: + ENTER + + leal -6(%esi, %ebx, 2), %esi + negl %ebx + jz 5f + + /* Setup multipliers */ + movd rm, %mm5 + movd gm, %mm6 + movd bm, %mm7 + punpcklwd %mm5, %mm5 /* 00 00 00 00 rm rm rm rm */ + punpcklwd %mm6, %mm6 /* 00 00 00 00 gm gm gm gm */ + punpcklwd %mm7, %mm7 /* 00 00 00 00 bm bm bm bm */ + punpckldq %mm5, %mm5 /* rm rm rm rm rm rm rm rm */ + punpckldq %mm6, %mm6 /* gm gm gm gm gm gm gm gm */ + punpckldq %mm7, %mm7 /* bm bm bm bm bm bm bm bm */ + + cmpl $256, rm + jg shade_ximage_15_mmx_saturate + cmpl $256, gm + jg shade_ximage_15_mmx_saturate + cmpl $256, bm + jg shade_ximage_15_mmx_saturate + +1: movl %ebx, %ecx + addl $3, %ecx + jns 3f +2: + movq (%esi, %ecx, 2), %mm0 + + movq %mm0, %mm1 /* rg gb */ + movq %mm0, %mm2 /* rg gb */ + psrlw $5, %mm1 /* 0r rg */ + psrlw $10, %mm0 /* 00 0r */ + psllw $11, %mm2 /* b0 00 */ + psllw $11, %mm1 /* g0 00 */ + psllw $8, %mm0 /* 0r 00 */ + psrlw $3, %mm1 /* 0g 00 */ + psrlw $3, %mm2 /* 0b 00 */ + + pmulhw %mm5, %mm0 /* 00 0r */ + pmulhw %mm6, %mm1 /* 00 0g */ + pmulhw %mm7, %mm2 /* 00 0b */ + + psllw $10, %mm0 /* r0 00 */ + psllw $5, %mm1 /* 0g g0 */ + por %mm2, %mm0 /* r0 0b */ + por %mm1, %mm0 /* rg gb */ + + movq %mm0, (%esi, %ecx, 2) + + addl $4, %ecx + js 2b + jz 4f +3: + movw (%esi, %ecx, 2), %ax + movd %eax, %mm0 + + movq %mm0, %mm1 /* rg gb */ + movq %mm0, %mm2 /* rg gb */ + psrlw $5, %mm1 /* 0r rg */ + psrlw $10, %mm0 /* 00 0r */ + psllw $11, %mm2 /* b0 00 */ + psllw $11, %mm1 /* g0 00 */ + psllw $8, %mm0 /* 0r 00 */ + psrlw $3, %mm1 /* 0g 00 */ + psrlw $3, %mm2 /* 0b 00 */ + + pmulhw %mm5, %mm0 /* 00 0r */ + pmulhw %mm6, %mm1 /* 00 0g */ + pmulhw %mm7, %mm2 /* 00 0b */ + + psllw $10, %mm0 /* r0 00 */ + psllw $5, %mm1 /* 0g g0 */ + por %mm2, %mm0 /* r0 0b */ + por %mm1, %mm0 /* rg gb */ + + movd %mm0, %eax + movw %ax, (%esi, %ecx, 2) + + incl %ecx + cmpl $2, %ecx + jng 3b +4: + addl bpl, %esi + decl %edx + jnz 1b +5: + LEAVE + + +shade_ximage_15_mmx_saturate: + + pcmpeqw %mm3, %mm3 + psllw $5, %mm3 /* ff e0 ff e0 ff e0 ff e0 */ + +1: movl %ebx, %ecx + addl $3, %ecx + jns 3f +2: + movq (%esi, %ecx, 2), %mm0 + + movq %mm0, %mm1 /* rg gb */ + movq %mm0, %mm2 /* rg gb */ + psrlw $5, %mm1 /* 0r rg */ + psrlw $10, %mm0 /* 00 0r */ + psllw $11, %mm2 /* b0 00 */ + psllw $11, %mm1 /* g0 00 */ + psllw $8, %mm0 /* 0r 00 */ + psrlw $3, %mm1 /* 0g 00 */ + psrlw $3, %mm2 /* 0b 00 */ + + pmulhw %mm5, %mm0 /* xx xr */ + pmulhw %mm6, %mm1 /* xx xg */ + pmulhw %mm7, %mm2 /* xx xb */ + + /* Saturate upper */ + paddusw %mm3, %mm0 /* ff er */ + paddusw %mm3, %mm1 /* ff eg */ + paddusw %mm3, %mm2 /* ff eb */ + + psubw %mm3, %mm1 /* 00 0g */ + psubw %mm3, %mm2 /* 00 0b */ + + psllw $10, %mm0 /* r0 00 */ + psllw $5, %mm1 /* 0g g0 */ + por %mm2, %mm0 /* r0 0b */ + por %mm1, %mm0 /* rg gb */ + + movq %mm0, (%esi, %ecx, 2) + + addl $4, %ecx + js 2b + jz 4f +3: + movw (%esi, %ecx, 2), %ax + movd %eax, %mm0 + + movq %mm0, %mm1 /* rg gb */ + movq %mm0, %mm2 /* rg gb */ + psrlw $5, %mm1 /* 0r rg */ + psrlw $10, %mm0 /* 00 0r */ + psllw $11, %mm2 /* b0 00 */ + psllw $11, %mm1 /* g0 00 */ + psllw $8, %mm0 /* 0r 00 */ + psrlw $3, %mm1 /* 0g 00 */ + psrlw $3, %mm2 /* 0b 00 */ + + pmulhw %mm5, %mm0 /* xx xr */ + pmulhw %mm6, %mm1 /* xx xg */ + pmulhw %mm7, %mm2 /* xx xb */ + + /* Saturate upper */ + paddusw %mm3, %mm0 /* ff er */ + paddusw %mm3, %mm1 /* ff eg */ + paddusw %mm3, %mm2 /* ff eb */ + + psubw %mm3, %mm1 /* 00 0g */ + psubw %mm3, %mm2 /* 00 0b */ + + psllw $10, %mm0 /* r0 00 */ + psllw $5, %mm1 /* 0g g0 */ + por %mm2, %mm0 /* r0 0b */ + por %mm1, %mm0 /* rg gb */ + + movd %mm0, %eax + movw %ax, (%esi, %ecx, 2) + + incl %ecx + cmpl $2, %ecx + jng 3b +4: + addl bpl, %esi + decl %edx + jnz 1b +5: + LEAVE + + +shade_ximage_16_mmx: + ENTER + + leal -6(%esi, %ebx, 2), %esi + negl %ebx + jz 5f + + /* Setup multipliers */ + movd rm, %mm5 + movd gm, %mm6 + movd bm, %mm7 + punpcklwd %mm5, %mm5 /* 00 00 00 00 rm rm rm rm */ + punpcklwd %mm6, %mm6 /* 00 00 00 00 gm gm gm gm */ + punpcklwd %mm7, %mm7 /* 00 00 00 00 bm bm bm bm */ + punpckldq %mm5, %mm5 /* rm rm rm rm rm rm rm rm */ + punpckldq %mm6, %mm6 /* gm gm gm gm gm gm gm gm */ + punpckldq %mm7, %mm7 /* bm bm bm bm bm bm bm bm */ + + cmpl $256, rm + jg shade_ximage_16_mmx_saturate + cmpl $256, gm + jg shade_ximage_16_mmx_saturate + cmpl $256, bm + jg shade_ximage_16_mmx_saturate + +1: movl %ebx, %ecx + addl $3, %ecx + jns 3f +2: + movq (%esi, %ecx, 2), %mm0 + + movq %mm0, %mm1 /* rg gb */ + movq %mm0, %mm2 /* rg gb */ + psrlw $5, %mm1 /* 0r rg */ + psrlw $11, %mm0 /* 00 0r */ + psllw $11, %mm2 /* b0 00 */ + psllw $10, %mm1 /* g0 00 */ + psllw $8, %mm0 /* 0r 00 */ + psrlw $2, %mm1 /* 0g 00 */ + psrlw $3, %mm2 /* 0b 00 */ + + pmulhw %mm5, %mm0 /* 00 0r */ + pmulhw %mm6, %mm1 /* 00 0g */ + pmulhw %mm7, %mm2 /* 00 0b */ + + psllw $11, %mm0 /* r0 00 */ + psllw $5, %mm1 /* 0g g0 */ + por %mm2, %mm0 /* r0 0b */ + por %mm1, %mm0 /* rg gb */ + + movq %mm0, (%esi, %ecx, 2) + + addl $4, %ecx + js 2b + jz 4f +3: + movw (%esi, %ecx, 2), %ax + movd %eax, %mm0 + + movq %mm0, %mm1 /* rg gb */ + movq %mm0, %mm2 /* rg gb */ + psrlw $5, %mm1 /* 0r rg */ + psrlw $11, %mm0 /* 00 0r */ + psllw $11, %mm2 /* b0 00 */ + psllw $10, %mm1 /* g0 00 */ + psllw $8, %mm0 /* 0r 00 */ + psrlw $2, %mm1 /* 0g 00 */ + psrlw $3, %mm2 /* 0b 00 */ + + pmulhw %mm5, %mm0 /* 00 0r */ + pmulhw %mm6, %mm1 /* 00 0g */ + pmulhw %mm7, %mm2 /* 00 0b */ + + psllw $11, %mm0 /* r0 00 */ + psllw $5, %mm1 /* 0g g0 */ + por %mm2, %mm0 /* r0 0b */ + por %mm1, %mm0 /* rg gb */ + + movd %mm0, %eax + movw %ax, (%esi, %ecx, 2) + + incl %ecx + cmpl $2, %ecx + jng 3b +4: + addl bpl, %esi + decl %edx + jnz 1b +5: + LEAVE + + +shade_ximage_16_mmx_saturate: + + pcmpeqw %mm3, %mm3 + movq %mm3, %mm4 + psllw $5, %mm3 /* ff e0 ff e0 ff e0 ff e0 */ + psllw $6, %mm4 /* ff c0 ff c0 ff c0 ff c0 */ + +1: movl %ebx, %ecx + addl $3, %ecx + jns 3f +2: + movq (%esi, %ecx, 2), %mm0 + + movq %mm0, %mm1 /* rg gb */ + movq %mm0, %mm2 /* rg gb */ + psrlw $5, %mm1 /* 0r rg */ + psrlw $11, %mm0 /* 00 0r */ + psllw $11, %mm2 /* b0 00 */ + psllw $10, %mm1 /* g0 00 */ + psllw $8, %mm0 /* 0r 00 */ + psrlw $2, %mm1 /* 0g 00 */ + psrlw $3, %mm2 /* 0b 00 */ + + pmulhw %mm5, %mm0 /* xx xr */ + pmulhw %mm6, %mm1 /* xx xg */ + pmulhw %mm7, %mm2 /* xx xb */ + + /* Saturate upper */ + paddusw %mm3, %mm0 /* ff er */ + paddusw %mm4, %mm1 /* ff cg */ + paddusw %mm3, %mm2 /* ff eb */ + + psubw %mm4, %mm1 /* 00 0g */ + psubw %mm3, %mm2 /* 00 0b */ + + psllw $11, %mm0 /* r0 00 */ + psllw $5, %mm1 /* 0g g0 */ + por %mm2, %mm0 /* r0 0b */ + por %mm1, %mm0 /* rg gb */ + + movq %mm0, (%esi, %ecx, 2) + + addl $4, %ecx + js 2b + jz 4f +3: + movw (%esi, %ecx, 2), %ax + movd %eax, %mm0 + + movq %mm0, %mm1 /* rg gb */ + movq %mm0, %mm2 /* rg gb */ + psrlw $5, %mm1 /* 0r rg */ + psrlw $11, %mm0 /* 00 0r */ + psllw $11, %mm2 /* b0 00 */ + psllw $10, %mm1 /* g0 00 */ + psllw $8, %mm0 /* 0r 00 */ + psrlw $2, %mm1 /* 0g 00 */ + psrlw $3, %mm2 /* 0b 00 */ + + pmulhw %mm5, %mm0 /* xx xr */ + pmulhw %mm6, %mm1 /* xx xg */ + pmulhw %mm7, %mm2 /* xx xb */ + + /* Saturate upper */ + paddusw %mm3, %mm0 /* ff er */ + paddusw %mm4, %mm1 /* ff cg */ + paddusw %mm3, %mm2 /* ff eb */ + + psubw %mm4, %mm1 /* 00 0g */ + psubw %mm3, %mm2 /* 00 0b */ + + psllw $11, %mm0 /* r0 00 */ + psllw $5, %mm1 /* 0g g0 */ + por %mm2, %mm0 /* r0 0b */ + por %mm1, %mm0 /* rg gb */ + + movd %mm0, %eax + movw %ax, (%esi, %ecx, 2) + + incl %ecx + cmpl $2, %ecx + jng 3b +4: + addl bpl, %esi + decl %edx + jnz 1b +5: + LEAVE + + +shade_ximage_32_mmx: + ENTER + + leal (%esi, %ebx, 4), %esi + negl %ebx + jz 3f + + movd rm, %mm4 + movd gm, %mm5 + movd bm, %mm6 + psllq $32, %mm4 + psllq $16, %mm5 + por %mm6, %mm4 + por %mm5, %mm4 + + pcmpeqw %mm6, %mm6 + psllw $15, %mm6 /* 80 00 80 00 80 00 80 00 */ + movq %mm6, %mm5 + pmulhw %mm4, %mm5 /* Get correction factor */ +1: + movl %ebx, %ecx +2: + movd (%esi, %ecx, 4), %mm1 /* 00 rr gg bb */ + pxor %mm0, %mm0 + punpcklbw %mm1, %mm0 /* 00 00 rr 00 gg 00 bb 00 */ + pxor %mm6, %mm0 /* Flip sign */ + + pmulhw %mm4, %mm0 /* 00 00 xx rr xx gg xx bb */ + psubw %mm5, %mm0 /* Correct range */ + packuswb %mm0, %mm0 /* 00 rr gg bb 00 rr gg bb */ + + movd %mm0, (%esi, %ecx, 4) + + incl %ecx + jnz 2b + + addl bpl, %esi + decl %edx + jnz 1b +3: + LEAVE + +#endif /* HAVE_MMX */ |