diff options
author | David Schleef <ds@schleef.org> | 2005-04-30 20:15:43 +0000 |
---|---|---|
committer | David Schleef <ds@schleef.org> | 2005-04-30 20:15:43 +0000 |
commit | 2c6c33f31dcabcb4a9632e8088b2c7bf68566edd (patch) | |
tree | defc73ec8f80aeddc8dc9e3b615c9a5ee7f68cc9 /liboil | |
parent | 9652109d3de3a3557c5174c914d83a6b0b8e6c41 (diff) | |
download | liboil-2c6c33f31dcabcb4a9632e8088b2c7bf68566edd.tar.gz |
* liboil/copy/Makefile.am: Some cool new implementations.
* liboil/copy/trans8x8_c.c: (trans8x8_u16_c1), (trans8x8_u16_c2),
(trans8x8_u16_c3), (trans8x8_u16_c4):
* liboil/copy/trans8x8_i386.c: (trans8x8_u16_asm1),
(trans8x8_u16_asm2):
Diffstat (limited to 'liboil')
-rw-r--r-- | liboil/copy/Makefile.am | 3 | ||||
-rw-r--r-- | liboil/copy/trans8x8_c.c | 118 | ||||
-rw-r--r-- | liboil/copy/trans8x8_i386.c | 98 |
3 files changed, 218 insertions, 1 deletions
diff --git a/liboil/copy/Makefile.am b/liboil/copy/Makefile.am index 8601ff3..92b202e 100644 --- a/liboil/copy/Makefile.am +++ b/liboil/copy/Makefile.am @@ -27,7 +27,8 @@ c_sources = \ permute.c \ splat_ref.c \ tablelookup_ref.c \ - trans8x8.c + trans8x8.c \ + trans8x8_c.c libcopy_la_SOURCES = \ $(c_sources) \ diff --git a/liboil/copy/trans8x8_c.c b/liboil/copy/trans8x8_c.c new file mode 100644 index 0000000..76b90d0 --- /dev/null +++ b/liboil/copy/trans8x8_c.c @@ -0,0 +1,118 @@ +/* + * LIBOIL - Library of Optimized Inner Loops + * Copyright (c) 2004 David A. Schleef <ds@schleef.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <liboil/liboil.h> +#include <liboil/liboilfunction.h> + +OIL_DECLARE_CLASS (trans8x8_u16); + +void +trans8x8_u16_c1 (uint16_t *dest, int dstr, uint16_t *src, int sstr) +{ + int i; + int j; + uint16_t *d, *s; + + for(i=0;i<8;i++){ + d = OIL_OFFSET(dest, 2*i); + s = OIL_OFFSET(src, sstr*i); + for(j=0;j<8;j++){ + OIL_GET(d,dstr*j,uint16_t) = s[j]; + } + } +} +OIL_DEFINE_IMPL (trans8x8_u16_c1, trans8x8_u16); + +void +trans8x8_u16_c2 (uint16_t *dest, int dstr, uint16_t *src, int sstr) +{ + int i; + uint16_t *d, *s; + + for(i=0;i<8;i++){ + d = OIL_OFFSET(dest, 2*i); + s = OIL_OFFSET(src, sstr*i); + *d = *s; s++; d = OIL_OFFSET(d, dstr); + *d = *s; s++; d = OIL_OFFSET(d, dstr); + *d = *s; s++; d = OIL_OFFSET(d, dstr); + *d = *s; s++; d = OIL_OFFSET(d, dstr); + *d = *s; s++; d = OIL_OFFSET(d, dstr); + *d = *s; s++; d = OIL_OFFSET(d, dstr); + *d = *s; s++; d = OIL_OFFSET(d, dstr); + *d = *s; s++; d = OIL_OFFSET(d, dstr); + } +} +OIL_DEFINE_IMPL (trans8x8_u16_c2, trans8x8_u16); + +void +trans8x8_u16_c3 (uint16_t *dest, int dstr, uint16_t *src, int sstr) +{ + int i; + uint16_t *d, *s; + + for(i=0;i<8;i++){ + d = OIL_OFFSET(dest, 2*i); + s = OIL_OFFSET(src, sstr*i); + OIL_GET(d,dstr*0,uint16_t) = s[0]; + OIL_GET(d,dstr*1,uint16_t) = s[1]; + OIL_GET(d,dstr*2,uint16_t) = s[2]; + OIL_GET(d,dstr*3,uint16_t) = s[3]; + OIL_GET(d,dstr*4,uint16_t) = s[4]; + OIL_GET(d,dstr*5,uint16_t) = s[5]; + OIL_GET(d,dstr*6,uint16_t) = s[6]; + OIL_GET(d,dstr*7,uint16_t) = s[7]; + } +} +OIL_DEFINE_IMPL (trans8x8_u16_c3, trans8x8_u16); + +void +trans8x8_u16_c4 (uint16_t *dest, int dstr, uint16_t *src, int sstr) +{ + int i; + uint16_t *d, *s; + + for(i=0;i<8;i++){ + d = OIL_OFFSET(dest, 2*i); + s = OIL_OFFSET(src, sstr*i); + OIL_GET(d,dstr*0,uint16_t) = *s++; + OIL_GET(d,dstr*1,uint16_t) = *s++; + OIL_GET(d,dstr*2,uint16_t) = *s++; + OIL_GET(d,dstr*3,uint16_t) = *s++; + OIL_GET(d,dstr*4,uint16_t) = *s++; + OIL_GET(d,dstr*5,uint16_t) = *s++; + OIL_GET(d,dstr*6,uint16_t) = *s++; + OIL_GET(d,dstr*7,uint16_t) = *s++; + } +} + +OIL_DEFINE_IMPL (trans8x8_u16_c4, trans8x8_u16); + + diff --git a/liboil/copy/trans8x8_i386.c b/liboil/copy/trans8x8_i386.c index 4ee5004..2a9a76c 100644 --- a/liboil/copy/trans8x8_i386.c +++ b/liboil/copy/trans8x8_i386.c @@ -34,6 +34,7 @@ OIL_DECLARE_CLASS(trans8x8_u16); +/* this could use additional work. */ static void trans8x8_u16_mmx (uint16_t *dest, int dstr, uint16_t *src, int sstr) { @@ -135,4 +136,101 @@ trans8x8_u16_mmx (uint16_t *dest, int dstr, uint16_t *src, int sstr) } OIL_DEFINE_IMPL (trans8x8_u16_mmx, trans8x8_u16); +void +trans8x8_u16_asm1 (uint16_t *dest, int dstr, uint16_t *src, int sstr) +{ + int saved_ebx = 0; + asm ( + " movl %%ebx, %4 \n" + " movl %0, %%ecx \n" + " movl %2, %%ebx \n" + " movl %1, %%edx \n" + " lea (%%ecx,%%edx,8), %%esi \n" + " sub %%edx, %%esi\n " + " movl $7, %%edi \n" + "1: \n" + + " mov (%%ebx), %%ax \n" + " mov %%ax,(%%ecx) \n" + " mov 2(%%ebx), %%ax \n" + " mov %%ax,(%%ecx,%%edx,1) \n" + " mov 4(%%ebx), %%ax \n" + " mov %%ax,(%%ecx,%%edx,2) \n" + " mov 8(%%ebx), %%ax \n" + " mov %%ax,(%%ecx,%%edx,4) \n" + + " neg %%edx \n" + + " mov 6(%%ebx), %%ax \n" + " mov %%ax,(%%esi,%%edx,4) \n" + " mov 10(%%ebx), %%ax \n" + " mov %%ax,(%%esi,%%edx,2) \n" + " mov 12(%%ebx), %%ax \n" + " mov %%ax,(%%esi,%%edx,1) \n" + " mov 14(%%ebx), %%ax \n" + " mov %%ax,(%%esi) \n" + + " neg %%edx \n" + " add %3, %%ebx \n" + " add $2, %%ecx \n" + " add $2, %%esi \n" + + " dec %%edi \n" + " jge 1b \n" + " movl %4, %%ebx \n" + : + : "m" (dest), "m" (dstr), "m" (src), "m" (sstr), "m" (saved_ebx) + : "eax", "ecx", "edx", "esi", "edi"); +} +OIL_DEFINE_IMPL (trans8x8_u16_asm1, trans8x8_u16); + +void +trans8x8_u16_asm2 (uint16_t *dest, int dstr, uint16_t *src, int sstr) +{ + int i; + int saved_ebx = 0; + asm ( + " movl %%ebx, %5 \n" + " movl %0, %%ecx \n" + " movl %2, %%ebx \n" + " movl %1, %%edx \n" + " lea (%%ecx,%%edx,8), %%esi \n" + " sub %%edx, %%esi\n " + " movl $7, %4 \n" + " movl %%edx, %%edi \n" + " negl %%edi \n" + "1: \n" + + " movl (%%ebx), %%eax \n" + " mov %%ax,(%%ecx) \n" + " shr $16, %%eax \n" + " mov %%ax,(%%ecx,%%edx,1) \n" + + " movl 4(%%ebx), %%eax \n" + " mov %%ax,(%%ecx,%%edx,2) \n" + " shr $16, %%eax \n" + " mov %%ax,(%%esi,%%edi,4) \n" + + " movl 8(%%ebx), %%eax \n" + " mov %%ax,(%%ecx,%%edx,4) \n" + " shr $16, %%eax \n" + " mov %%ax,(%%esi,%%edi,2) \n" + + " movl 12(%%ebx), %%eax \n" + " mov %%ax,(%%esi,%%edi,1) \n" + " shr $16, %%eax \n" + " mov %%ax,(%%esi) \n" + + " add %3, %%ebx \n" + " add $2, %%ecx \n" + " add $2, %%esi \n" + + " decl %4 \n" + " jge 1b \n" + " movl %5, %%ebx \n" + : + : "m" (dest), "m" (dstr), "m" (src), "m" (sstr), "m" (i), "m" (saved_ebx) + : "eax", "ebx", "ecx", "edx", "esi", "edi"); +} +OIL_DEFINE_IMPL (trans8x8_u16_asm2, trans8x8_u16); |