diff options
author | Glenn Randers-Pehrson <glennrp at users.sourceforge.net> | 2000-07-08 13:19:41 -0500 |
---|---|---|
committer | Glenn Randers-Pehrson <glennrp at users.sourceforge.net> | 2009-04-06 16:05:23 -0500 |
commit | 316f97a063959781f805fd68c0d9266247b2804b (patch) | |
tree | 820ef1111ab6a24365991c6308cdc27cec120cf1 /pnggccrd.c | |
parent | 3d5a520610bf787e5c6e3fccf9814de73f548e64 (diff) | |
download | libpng-316f97a063959781f805fd68c0d9266247b2804b.tar.gz |
Imported from libpng-1.0.8beta1.tarv1.0.8beta1
Diffstat (limited to 'pnggccrd.c')
-rw-r--r-- | pnggccrd.c | 549 |
1 files changed, 320 insertions, 229 deletions
diff --git a/pnggccrd.c b/pnggccrd.c index 9edb89862..bb574be2c 100644 --- a/pnggccrd.c +++ b/pnggccrd.c @@ -6,7 +6,7 @@ * and http://www.intel.com/drg/pentiumII/appnotes/923/923.htm * for Intel's performance analysis of the MMX vs. non-MMX code. * - * libpng 1.0.7 - July 1, 2000 + * libpng version 1.0.8beta1 - July 8, 2000 * For conditions of distribution and use, see copyright notice in png.h * Copyright (c) 1998, 1999, 2000 Glenn Randers-Pehrson * Copyright (c) 1998, Intel Corporation @@ -122,11 +122,33 @@ * - fixed up both versions of mmxsupport() (ORIG_THAT_USED_TO_CLOBBER_EBX * macro determines which is used); original not yet tested. * + * 20000213: + * - When compiling with gcc, be sure to use -fomit-frame-pointer + * * 20000319: * - fixed a register-name typo in png_do_read_interlace(), default (MMX) case, * pass == 4 or 5, that caused visible corruption of interlaced images * - * - When compiling with gcc, be sure to use -fomit-frame-pointer + * 20000623: + * - Various problems were reported with gcc 2.95.2 in the Cygwin environment, + * many of the form "forbidden register 0 (ax) was spilled for class AREG." + * This is explained at http://gcc.gnu.org/fom_serv/cache/23.html, and + * Chuck Wilson supplied a patch involving dummy output registers. See + * http://sourceforge.net/bugs/?func=detailbug&bug_id=108741&group_id=5624 + * for the original (anonymous) SourceForge bug report. + * + * 20000706: + * - Chuck Wilson passed along these remaining gcc 2.95.2 errors: + * pnggccrd.c: In function `png_combine_row': + * pnggccrd.c:525: more than 10 operands in `asm' + * pnggccrd.c:669: more than 10 operands in `asm' + * pnggccrd.c:828: more than 10 operands in `asm' + * pnggccrd.c:994: more than 10 operands in `asm' + * pnggccrd.c:1177: more than 10 operands in `asm' + * They are all the same problem and can be worked around by using the + * global _unmask variable unconditionally, not just in the -fPIC case. + * Apparently earlier versions of gcc also have the problem with more than + * 10 operands; they just don't report it. Much strangeness ensues, etc. */ #define PNG_INTERNAL @@ -174,9 +196,13 @@ static const int png_pass_width[7] = {8, 4, 4, 2, 2, 1, 1}; /* These constants are used in the inlined MMX assembly code. Ignore gcc's "At top level: defined but not used" warnings. */ -#ifdef __PIC__ -static int _unmask; // not enough regs when compiling with -fPIC, so... -#endif +/* GRR 20000706: originally _unmask was needed only when compiling with -fPIC, + * since that case uses the %ebx register for indexing the Global Offset Table + * and there were no other registers available. But gcc 2.95 and later emit + * "more than 10 operands in `asm'" errors when %ebx is used to preload unmask + * in the non-PIC case, so we'll just use the global unconditionally now. + */ +static int _unmask; static unsigned long long _mask8_0 = 0x0102040810204080LL; @@ -430,23 +456,19 @@ fflush(stderr); { png_uint_32 len; int diff; -#ifndef __PIC__ - int unmask = ~mask; -#else + int dummy_value_a; // fix 'forbidden register spilled' error + int dummy_value_d; + int dummy_value_c; + int dummy_value_S; + int dummy_value_D; _unmask = ~mask; // global variable for -fPIC version -#endif srcptr = png_ptr->row_buf + 1; dstptr = row; len = png_ptr->width &~7; // reduce to multiple of 8 diff = png_ptr->width & 7; // amount lost - __asm__ ( -#ifdef __PIC__ + __asm__ __volatile__ ( "movd _unmask, %%mm7 \n\t" // load bit pattern -#else -// preload "movd unmask, %%mm7 \n\t" // (unmask is in ebx) - "movd %%ebx, %%mm7 \n\t" // load bit pattern (unmask) -#endif "psubb %%mm6, %%mm6 \n\t" // zero mm6 "punpcklbw %%mm7, %%mm7 \n\t" "punpcklwd %%mm7, %%mm7 \n\t" @@ -498,21 +520,22 @@ fflush(stderr); "end8: \n\t" "EMMS \n\t" // DONE - : // output regs (none) + : "=a" (dummy_value_a), // output regs (dummy) + "=d" (dummy_value_d), + "=c" (dummy_value_c), + "=S" (dummy_value_S), + "=D" (dummy_value_D) - : "S" (srcptr), // esi // input regs - "D" (dstptr), // edi - "a" (diff), // eax -#ifndef __PIC__ - "b" (unmask), // ebx // Global Offset Table idx -#endif - "c" (len), // ecx - "d" (mask) // edx + : "3" (srcptr), // esi // input regs + "4" (dstptr), // edi + "0" (diff), // eax +// was (unmask) "b" RESERVED // ebx // Global Offset Table idx + "2" (len), // ecx + "1" (mask) // edx - : "%esi", "%edi", "%eax", // clobber list - "%ecx", "%edx" +// : // clobber list #if 0 /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */ - , "%mm0", "%mm4", "%mm6", "%mm7" + : "%mm0", "%mm4", "%mm6", "%mm7" #endif ); } @@ -550,23 +573,19 @@ fflush(stderr); { png_uint_32 len; int diff; -#ifndef __PIC__ - int unmask = ~mask; -#else + int dummy_value_a; // fix 'forbidden register spilled' error + int dummy_value_d; + int dummy_value_c; + int dummy_value_S; + int dummy_value_D; _unmask = ~mask; // global variable for -fPIC version -#endif srcptr = png_ptr->row_buf + 1; dstptr = row; len = png_ptr->width &~7; // reduce to multiple of 8 diff = png_ptr->width & 7; // amount lost - __asm__ ( -#ifdef __PIC__ + __asm__ __volatile__ ( "movd _unmask, %%mm7 \n\t" // load bit pattern -#else -// preload "movd unmask, %%mm7 \n\t" // (unmask is in ebx) - "movd %%ebx, %%mm7 \n\t" // load bit pattern (unmask) -#endif "psubb %%mm6, %%mm6 \n\t" // zero mm6 "punpcklbw %%mm7, %%mm7 \n\t" "punpcklwd %%mm7, %%mm7 \n\t" @@ -633,21 +652,22 @@ fflush(stderr); "end16: \n\t" "EMMS \n\t" // DONE - : // output regs (none) + : "=a" (dummy_value_a), // output regs (dummy) + "=d" (dummy_value_d), + "=c" (dummy_value_c), + "=S" (dummy_value_S), + "=D" (dummy_value_D) - : "S" (srcptr), // esi // input regs - "D" (dstptr), // edi - "a" (diff), // eax -#ifndef __PIC__ - "b" (unmask), // ebx // Global Offset Table idx -#endif - "c" (len), // ecx - "d" (mask) // edx + : "3" (srcptr), // esi // input regs + "4" (dstptr), // edi + "0" (diff), // eax +// was (unmask) "b" RESERVED // ebx // Global Offset Table idx + "2" (len), // ecx + "1" (mask) // edx - : "%esi", "%edi", "%eax", // clobber list - "%ecx", "%edx" +// : // clobber list #if 0 /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */ - , "%mm0", "%mm1", + : "%mm0", "%mm1", "%mm4", "%mm5", "%mm6", "%mm7" #endif ); @@ -686,23 +706,19 @@ fflush(stderr); { png_uint_32 len; int diff; -#ifndef __PIC__ - int unmask = ~mask; -#else + int dummy_value_a; // fix 'forbidden register spilled' error + int dummy_value_d; + int dummy_value_c; + int dummy_value_S; + int dummy_value_D; _unmask = ~mask; // global variable for -fPIC version -#endif srcptr = png_ptr->row_buf + 1; dstptr = row; len = png_ptr->width &~7; // reduce to multiple of 8 diff = png_ptr->width & 7; // amount lost - __asm__ ( -#ifdef __PIC__ + __asm__ __volatile__ ( "movd _unmask, %%mm7 \n\t" // load bit pattern -#else -// preload "movd unmask, %%mm7 \n\t" // (unmask is in ebx) - "movd %%ebx, %%mm7 \n\t" // load bit pattern (unmask) -#endif "psubb %%mm6, %%mm6 \n\t" // zero mm6 "punpcklbw %%mm7, %%mm7 \n\t" "punpcklwd %%mm7, %%mm7 \n\t" @@ -784,21 +800,22 @@ fflush(stderr); "end24: \n\t" "EMMS \n\t" // DONE - : // output regs (none) + : "=a" (dummy_value_a), // output regs (dummy) + "=d" (dummy_value_d), + "=c" (dummy_value_c), + "=S" (dummy_value_S), + "=D" (dummy_value_D) - : "S" (srcptr), // esi // input regs - "D" (dstptr), // edi - "a" (diff), // eax -#ifndef __PIC__ - "b" (unmask), // ebx // Global Offset Table idx -#endif - "c" (len), // ecx - "d" (mask) // edx + : "3" (srcptr), // esi // input regs + "4" (dstptr), // edi + "0" (diff), // eax +// was (unmask) "b" RESERVED // ebx // Global Offset Table idx + "2" (len), // ecx + "1" (mask) // edx - : "%esi", "%edi", "%eax", // clobber list - "%ecx", "%edx" +// : // clobber list #if 0 /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */ - , "%mm0", "%mm1", "%mm2", + : "%mm0", "%mm1", "%mm2", "%mm4", "%mm5", "%mm6", "%mm7" #endif ); @@ -837,23 +854,19 @@ fflush(stderr); { png_uint_32 len; int diff; -#ifndef __PIC__ - int unmask = ~mask; -#else + int dummy_value_a; // fix 'forbidden register spilled' error + int dummy_value_d; + int dummy_value_c; + int dummy_value_S; + int dummy_value_D; _unmask = ~mask; // global variable for -fPIC version -#endif srcptr = png_ptr->row_buf + 1; dstptr = row; len = png_ptr->width &~7; // reduce to multiple of 8 diff = png_ptr->width & 7; // amount lost - __asm__ ( -#ifdef __PIC__ + __asm__ __volatile__ ( "movd _unmask, %%mm7 \n\t" // load bit pattern -#else -// preload "movd unmask, %%mm7 \n\t" // (unmask is in ebx) - "movd %%ebx, %%mm7 \n\t" // load bit pattern (unmask) -#endif "psubb %%mm6, %%mm6 \n\t" // zero mm6 "punpcklbw %%mm7, %%mm7 \n\t" "punpcklwd %%mm7, %%mm7 \n\t" @@ -942,21 +955,22 @@ fflush(stderr); "end32: \n\t" "EMMS \n\t" // DONE - : // output regs (none) + : "=a" (dummy_value_a), // output regs (dummy) + "=d" (dummy_value_d), + "=c" (dummy_value_c), + "=S" (dummy_value_S), + "=D" (dummy_value_D) - : "S" (srcptr), // esi // input regs - "D" (dstptr), // edi - "a" (diff), // eax -#ifndef __PIC__ - "b" (unmask), // ebx // Global Offset Table idx -#endif - "c" (len), // ecx - "d" (mask) // edx + : "3" (srcptr), // esi // input regs + "4" (dstptr), // edi + "0" (diff), // eax +// was (unmask) "b" RESERVED // ebx // Global Offset Table idx + "2" (len), // ecx + "1" (mask) // edx - : "%esi", "%edi", "%eax", // clobber list - "%ecx", "%edx" +// : // clobber list #if 0 /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */ - , "%mm0", "%mm1", "%mm2", "%mm3", + : "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" #endif ); @@ -995,23 +1009,19 @@ fflush(stderr); { png_uint_32 len; int diff; -#ifndef __PIC__ - int unmask = ~mask; -#else + int dummy_value_a; // fix 'forbidden register spilled' error + int dummy_value_d; + int dummy_value_c; + int dummy_value_S; + int dummy_value_D; _unmask = ~mask; // global variable for -fPIC version -#endif srcptr = png_ptr->row_buf + 1; dstptr = row; len = png_ptr->width &~7; // reduce to multiple of 8 diff = png_ptr->width & 7; // amount lost - __asm__ ( -#ifdef __PIC__ + __asm__ __volatile__ ( "movd _unmask, %%mm7 \n\t" // load bit pattern -#else -// preload "movd unmask, %%mm7 \n\t" // (unmask is in ebx) - "movd %%ebx, %%mm7 \n\t" // load bit pattern (unmask) -#endif "psubb %%mm6, %%mm6 \n\t" // zero mm6 "punpcklbw %%mm7, %%mm7 \n\t" "punpcklwd %%mm7, %%mm7 \n\t" @@ -1117,21 +1127,22 @@ fflush(stderr); "end48: \n\t" "EMMS \n\t" // DONE - : // output regs (none) + : "=a" (dummy_value_a), // output regs (dummy) + "=d" (dummy_value_d), + "=c" (dummy_value_c), + "=S" (dummy_value_S), + "=D" (dummy_value_D) - : "S" (srcptr), // esi // input regs - "D" (dstptr), // edi - "a" (diff), // eax -#ifndef __PIC__ - "b" (unmask), // ebx // Global Offset Table idx -#endif - "c" (len), // ecx - "d" (mask) // edx + : "3" (srcptr), // esi // input regs + "4" (dstptr), // edi + "0" (diff), // eax +// was (unmask) "b" RESERVED // ebx // Global Offset Table idx + "2" (len), // ecx + "1" (mask) // edx - : "%esi", "%edi", "%eax", // clobber list - "%ecx", "%edx" +// : // clobber list #if 0 /* MMX regs (%mm0, etc.) not supported by gcc 2.7.2.3 or egcs 1.1 */ - , "%mm0", "%mm1", "%mm2", "%mm3", + : "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" #endif ); @@ -1453,7 +1464,10 @@ fflush(stderr); { if (((pass == 0) || (pass == 1)) && width) { - __asm__ ( + int dummy_value_c; // fix 'forbidden register spilled' + int dummy_value_S; + int dummy_value_D; + __asm__ __volatile__ ( "subl $21, %%edi \n\t" // (png_pass_inc[pass] - 1)*pixel_bytes @@ -1482,22 +1496,27 @@ fflush(stderr); "jnz .loop3_pass0 \n\t" "EMMS \n\t" // DONE - : // output regs (none) + : "=c" (dummy_value_c), // output regs (dummy) + "=S" (dummy_value_S), + "=D" (dummy_value_D) - : "S" (sptr), // esi // input regs - "D" (dp), // edi - "c" (width) // ecx + : "1" (sptr), // esi // input regs + "2" (dp), // edi + "0" (width) // ecx // doesn't work "i" (0x0000000000FFFFFFLL) // %1 (a.k.a. _const4) - : "%esi", "%edi", "%ecx" // clobber list +// : // clobber list #if 0 /* %mm0, ..., %mm4 not supported by gcc 2.7.2.3 or egcs 1.1 */ - , "%mm0", "%mm1", "%mm2", "%mm3", "%mm4" + : "%mm0", "%mm1", "%mm2", "%mm3", "%mm4" #endif ); } else if (((pass == 2) || (pass == 3)) && width) { - __asm__ ( + int dummy_value_c; // fix 'forbidden register spilled' + int dummy_value_S; + int dummy_value_D; + __asm__ __volatile__ ( "subl $9, %%edi \n\t" // (png_pass_inc[pass] - 1)*pixel_bytes @@ -1520,15 +1539,17 @@ fflush(stderr); "jnz .loop3_pass2 \n\t" "EMMS \n\t" // DONE - : // output regs (none) + : "=c" (dummy_value_c), // output regs (dummy) + "=S" (dummy_value_S), + "=D" (dummy_value_D) - : "S" (sptr), // esi // input regs - "D" (dp), // edi - "c" (width) // ecx + : "1" (sptr), // esi // input regs + "2" (dp), // edi + "0" (width) // ecx - : "%esi", "%edi", "%ecx" // clobber list +// : // clobber list #if 0 /* %mm0, ..., %mm2 not supported by gcc 2.7.2.3 or egcs 1.1 */ - , "%mm0", "%mm1", "%mm2" + : "%mm0", "%mm1", "%mm2" #endif ); } @@ -1543,7 +1564,10 @@ fflush(stderr); // png_pass_inc[] = {8, 8, 4, 4, 2, 2, 1}; // sptr points at last pixel in pre-expanded row // dp points at last pixel position in expanded row - __asm__ ( + int dummy_value_c; // fix 'forbidden register spilled' + int dummy_value_S; + int dummy_value_D; + __asm__ __volatile__ ( "subl $3, %%esi \n\t" "subl $9, %%edi \n\t" // (png_pass_inc[pass] + 1)*pixel_bytes @@ -1569,15 +1593,17 @@ fflush(stderr); "jnz .loop3_pass4 \n\t" "EMMS \n\t" // DONE - : // output regs (none) + : "=c" (dummy_value_c), // output regs (dummy) + "=S" (dummy_value_S), + "=D" (dummy_value_D) - : "S" (sptr), // esi // input regs - "D" (dp), // edi - "c" (width_mmx) // ecx + : "1" (sptr), // esi // input regs + "2" (dp), // edi + "0" (width_mmx) // ecx - : "%esi", "%edi", "%ecx" // clobber list +// : // clobber list #if 0 /* %mm0, ..., %mm3 not supported by gcc 2.7.2.3 or egcs 1.1 */ - , "%mm0", "%mm1", "%mm2", "%mm3" + : "%mm0", "%mm1", "%mm2", "%mm3" #endif ); } @@ -1609,7 +1635,10 @@ fflush(stderr); width -= width_mmx; // 0-3 pixels => 0-3 bytes if (width_mmx) { - __asm__ ( + int dummy_value_c; // fix 'forbidden register spilled' + int dummy_value_S; + int dummy_value_D; + __asm__ __volatile__ ( "subl $3, %%esi \n\t" "subl $31, %%edi \n\t" @@ -1636,15 +1665,17 @@ fflush(stderr); "jnz .loop1_pass0 \n\t" "EMMS \n\t" // DONE - : // output regs (none) + : "=c" (dummy_value_c), // output regs (dummy) + "=S" (dummy_value_S), + "=D" (dummy_value_D) - : "S" (sptr), // esi // input regs - "D" (dp), // edi - "c" (width_mmx) // ecx + : "1" (sptr), // esi // input regs + "2" (dp), // edi + "0" (width_mmx) // ecx - : "%esi", "%edi", "%ecx" // clobber list +// : // clobber list #if 0 /* %mm0, ..., %mm4 not supported by gcc 2.7.2.3 or egcs 1.1 */ - , "%mm0", "%mm1", "%mm2", "%mm3", "%mm4" + : "%mm0", "%mm1", "%mm2", "%mm3", "%mm4" #endif ); } @@ -1684,7 +1715,10 @@ fflush(stderr); width -= width_mmx; // 0-3 pixels => 0-3 bytes if (width_mmx) { - __asm__ ( + int dummy_value_c; // fix 'forbidden register spilled' + int dummy_value_S; + int dummy_value_D; + __asm__ __volatile__ ( "subl $3, %%esi \n\t" "subl $15, %%edi \n\t" @@ -1702,15 +1736,17 @@ fflush(stderr); "jnz .loop1_pass2 \n\t" "EMMS \n\t" // DONE - : // output regs (none) + : "=c" (dummy_value_c), // output regs (dummy) + "=S" (dummy_value_S), + "=D" (dummy_value_D) - : "S" (sptr), // esi // input regs - "D" (dp), // edi - "c" (width_mmx) // ecx + : "1" (sptr), // esi // input regs + "2" (dp), // edi + "0" (width_mmx) // ecx - : "%esi", "%edi", "%ecx" // clobber list +// : // clobber list #if 0 /* %mm0, %mm1 not supported by gcc 2.7.2.3 or egcs 1.1 */ - , "%mm0", "%mm1" + : "%mm0", "%mm1" #endif ); } @@ -1732,7 +1768,10 @@ fflush(stderr); width -= width_mmx; // 0-3 pixels => 0-3 bytes if (width_mmx) { - __asm__ ( + int dummy_value_c; // fix 'forbidden register spilled' + int dummy_value_S; + int dummy_value_D; + __asm__ __volatile__ ( "subl $7, %%esi \n\t" "subl $15, %%edi \n\t" @@ -1749,15 +1788,17 @@ fflush(stderr); "jnz .loop1_pass4 \n\t" "EMMS \n\t" // DONE - : // output regs (none) + : "=c" (dummy_value_c), // output regs (none) + "=S" (dummy_value_S), + "=D" (dummy_value_D) - : "S" (sptr), // esi // input regs - "D" (dp), // edi - "c" (width_mmx) // ecx + : "1" (sptr), // esi // input regs + "2" (dp), // edi + "0" (width_mmx) // ecx - : "%esi", "%edi", "%ecx" // clobber list +// : // clobber list #if 0 /* %mm0, %mm1 not supported by gcc 2.7.2.3 or egcs 1.1 */ - , "%mm0", "%mm1" + : "%mm0", "%mm1" #endif ); } @@ -1784,7 +1825,10 @@ fflush(stderr); width -= width_mmx; // 0,1 pixels => 0,2 bytes if (width_mmx) { - __asm__ ( + int dummy_value_c; // fix 'forbidden register spilled' + int dummy_value_S; + int dummy_value_D; + __asm__ __volatile__ ( "subl $2, %%esi \n\t" "subl $30, %%edi \n\t" @@ -1804,15 +1848,17 @@ fflush(stderr); "jnz .loop2_pass0 \n\t" "EMMS \n\t" // DONE - : // output regs (none) + : "=c" (dummy_value_c), // output regs (dummy) + "=S" (dummy_value_S), + "=D" (dummy_value_D) - : "S" (sptr), // esi // input regs - "D" (dp), // edi - "c" (width_mmx) // ecx + : "1" (sptr), // esi // input regs + "2" (dp), // edi + "0" (width_mmx) // ecx - : "%esi", "%edi", "%ecx" // clobber list +// : // clobber list #if 0 /* %mm0, %mm1 not supported by gcc 2.7.2.3 or egcs 1.1 */ - , "%mm0", "%mm1" + : "%mm0", "%mm1" #endif ); } @@ -1838,7 +1884,10 @@ fflush(stderr); width -= width_mmx; // 0,1 pixels => 0,2 bytes if (width_mmx) { - __asm__ ( + int dummy_value_c; // fix 'forbidden register spilled' + int dummy_value_S; + int dummy_value_D; + __asm__ __volatile__ ( "subl $2, %%esi \n\t" "subl $14, %%edi \n\t" @@ -1856,15 +1905,17 @@ fflush(stderr); "jnz .loop2_pass2 \n\t" "EMMS \n\t" // DONE - : // output regs (none) + : "=c" (dummy_value_c), // output regs (dummy) + "=S" (dummy_value_S), + "=D" (dummy_value_D) - : "S" (sptr), // esi // input regs - "D" (dp), // edi - "c" (width_mmx) // ecx + : "1" (sptr), // esi // input regs + "2" (dp), // edi + "0" (width_mmx) // ecx - : "%esi", "%edi", "%ecx" // clobber list +// : // clobber list #if 0 /* %mm0, %mm1 not supported by gcc 2.7.2.3 or egcs 1.1 */ - , "%mm0", "%mm1" + : "%mm0", "%mm1" #endif ); } @@ -1890,7 +1941,10 @@ fflush(stderr); width -= width_mmx; // 0,1 pixels => 0,2 bytes if (width_mmx) { - __asm__ ( + int dummy_value_c; // fix 'forbidden register spilled' + int dummy_value_S; + int dummy_value_D; + __asm__ __volatile__ ( "subl $2, %%esi \n\t" "subl $6, %%edi \n\t" @@ -1904,15 +1958,17 @@ fflush(stderr); "jnz .loop2_pass4 \n\t" "EMMS \n\t" // DONE - : // output regs (none) + : "=c" (dummy_value_c), // output regs (dummy) + "=S" (dummy_value_S), + "=D" (dummy_value_D) - : "S" (sptr), // esi // input regs - "D" (dp), // edi - "c" (width_mmx) // ecx + : "1" (sptr), // esi // input regs + "2" (dp), // edi + "0" (width_mmx) // ecx - : "%esi", "%edi", "%ecx" // clobber list +// : // clobber list #if 0 /* %mm0 not supported by gcc 2.7.2.3 or egcs 1.1 */ - , "%mm0" + : "%mm0" #endif ); } @@ -1948,12 +2004,15 @@ fflush(stderr); */ if (width_mmx) { + int dummy_value_c; // fix 'forbidden register spilled' + int dummy_value_S; + int dummy_value_D; #ifdef GRR_DEBUG FILE *junk = fopen("junk.4bytes", "wb"); if (junk) fclose(junk); #endif /* GRR_DEBUG */ - __asm__ ( + __asm__ __volatile__ ( "subl $4, %%esi \n\t" "subl $60, %%edi \n\t" @@ -1976,15 +2035,17 @@ fflush(stderr); "jnz .loop4_pass0 \n\t" "EMMS \n\t" // DONE - : // output regs (none) - - : "S" (sptr), // esi // input regs - "D" (dp), // edi - "c" (width_mmx) // ecx + : "=c" (dummy_value_c), // output regs (dummy) + "=S" (dummy_value_S), + "=D" (dummy_value_D) + + : "1" (sptr), // esi // input regs + "2" (dp), // edi + "0" (width_mmx) // ecx - : "%esi", "%edi", "%ecx" // clobber list +// : // clobber list #if 0 /* %mm0, %mm1 not supported by gcc 2.7.2.3 or egcs 1.1 */ - , "%mm0", "%mm1" + : "%mm0", "%mm1" #endif ); } @@ -2010,7 +2071,10 @@ fflush(stderr); width -= width_mmx; // 0,1 pixels => 0,4 bytes if (width_mmx) { - __asm__ ( + int dummy_value_c; // fix 'forbidden register spilled' + int dummy_value_S; + int dummy_value_D; + __asm__ __volatile__ ( "subl $4, %%esi \n\t" "subl $28, %%edi \n\t" @@ -2029,15 +2093,17 @@ fflush(stderr); "jnz .loop4_pass2 \n\t" "EMMS \n\t" // DONE - : // output regs (none) + : "=c" (dummy_value_c), // output regs (dummy) + "=S" (dummy_value_S), + "=D" (dummy_value_D) + + : "1" (sptr), // esi // input regs + "2" (dp), // edi + "0" (width_mmx) // ecx - : "S" (sptr), // esi // input regs - "D" (dp), // edi - "c" (width_mmx) // ecx - - : "%esi", "%edi", "%ecx" // clobber list +// : // clobber list #if 0 /* %mm0, %mm1 not supported by gcc 2.7.2.3 or egcs 1.1 */ - , "%mm0", "%mm1" + : "%mm0", "%mm1" #endif ); } @@ -2063,7 +2129,10 @@ fflush(stderr); width -= width_mmx; // 0,1 pixels => 0,4 bytes if (width_mmx) { - __asm__ ( + int dummy_value_c; // fix 'forbidden register spilled' + int dummy_value_S; + int dummy_value_D; + __asm__ __volatile__ ( "subl $4, %%esi \n\t" "subl $12, %%edi \n\t" @@ -2080,15 +2149,17 @@ fflush(stderr); "jnz .loop4_pass4 \n\t" "EMMS \n\t" // DONE - : // output regs (none) + : "=c" (dummy_value_c), // output regs (dummy) + "=S" (dummy_value_S), + "=D" (dummy_value_D) - : "S" (sptr), // esi // input regs - "D" (dp), // edi - "c" (width_mmx) // ecx + : "1" (sptr), // esi // input regs + "2" (dp), // edi + "0" (width_mmx) // ecx - : "%esi", "%edi", "%ecx" // clobber list +// : // clobber list #if 0 /* %mm0, %mm1 not supported by gcc 2.7.2.3 or egcs 1.1 */ - , "%mm0", "%mm1" + : "%mm0", "%mm1" #endif ); } @@ -2122,12 +2193,15 @@ fflush(stderr); { // source is 8-byte RRGGBBAA // dest is 64-byte RRGGBBAA RRGGBBAA RRGGBBAA RRGGBBAA ... + int dummy_value_c; // fix 'forbidden register spilled' + int dummy_value_S; + int dummy_value_D; #ifdef GRR_DEBUG FILE *junk = fopen("junk.8bytes", "wb"); if (junk) fclose(junk); #endif /* GRR_DEBUG */ - __asm__ ( + __asm__ __volatile__ ( "subl $56, %%edi \n\t" // start of last block ".loop8_pass0: \n\t" @@ -2146,15 +2220,17 @@ fflush(stderr); "jnz .loop8_pass0 \n\t" "EMMS \n\t" // DONE - : // output regs (none) + : "=c" (dummy_value_c), // output regs (dummy) + "=S" (dummy_value_S), + "=D" (dummy_value_D) - : "S" (sptr), // esi // input regs - "D" (dp), // edi - "c" (width) // ecx + : "1" (sptr), // esi // input regs + "2" (dp), // edi + "0" (width) // ecx - : "%esi", "%edi", "%ecx" // clobber list +// : // clobber list #if 0 /* %mm0 not supported by gcc 2.7.2.3 or egcs 1.1 */ - , "%mm0" + : "%mm0" #endif ); } @@ -2166,7 +2242,10 @@ fflush(stderr); width -= width_mmx; if (width_mmx) { - __asm__ ( + int dummy_value_c; // fix 'forbidden register spilled' + int dummy_value_S; + int dummy_value_D; + __asm__ __volatile__ ( "subl $24, %%edi \n\t" // start of last block ".loop8_pass2: \n\t" @@ -2181,15 +2260,17 @@ fflush(stderr); "jnz .loop8_pass2 \n\t" "EMMS \n\t" // DONE - : // output regs (none) + : "=c" (dummy_value_c), // output regs (dummy) + "=S" (dummy_value_S), + "=D" (dummy_value_D) - : "S" (sptr), // esi // input regs - "D" (dp), // edi - "c" (width) // ecx + : "1" (sptr), // esi // input regs + "2" (dp), // edi + "0" (width) // ecx - : "%esi", "%edi", "%ecx" // clobber list +// : // clobber list #if 0 /* %mm0 not supported by gcc 2.7.2.3 or egcs 1.1 */ - , "%mm0" + : "%mm0" #endif ); } @@ -2202,7 +2283,10 @@ fflush(stderr); width -= width_mmx; if (width_mmx) { - __asm__ ( + int dummy_value_c; // fix 'forbidden register spilled' + int dummy_value_S; + int dummy_value_D; + __asm__ __volatile__ ( "subl $8, %%edi \n\t" // start of last block ".loop8_pass4: \n\t" @@ -2215,15 +2299,17 @@ fflush(stderr); "jnz .loop8_pass4 \n\t" "EMMS \n\t" // DONE - : // output regs (none) + : "=c" (dummy_value_c), // output regs (dummy) + "=S" (dummy_value_S), + "=D" (dummy_value_D) - : "S" (sptr), // esi // input regs - "D" (dp), // edi - "c" (width) // ecx + : "1" (sptr), // esi // input regs + "2" (dp), // edi + "0" (width) // ecx - : "%esi", "%edi", "%ecx" // clobber list +// : // clobber list #if 0 /* %mm0 not supported by gcc 2.7.2.3 or egcs 1.1 */ - , "%mm0" + : "%mm0" #endif ); } @@ -2408,11 +2494,14 @@ png_read_filter_row_mmx_avg(png_row_infop row_info, png_bytep row, png_bytep prev_row) { int bpp; + int dummy_value_c; // fix 'forbidden register 2 (cx) was spilled' error + int dummy_value_S; + int dummy_value_D; // int diff; GRR: global now (shortened to dif/_dif) bpp = (row_info->pixel_depth + 7) >> 3; // Get # bytes per pixel _FullLength = row_info->rowbytes; // # of bytes to filter - __asm__ ( + __asm__ __volatile__ ( // Init address pointers and offset //GRR "movl row, %%edi \n\t" // edi ==> Avg(x) "xorl %%ebx, %%ebx \n\t" // ebx ==> x @@ -2467,14 +2556,16 @@ png_read_filter_row_mmx_avg(png_row_infop row_info, png_bytep row, "subl %%eax, %%ecx \n\t" // drop over bytes from original length "movl %%ecx, _MMXLength \n\t" - : // output regs/vars here, e.g., "=m" (_MMXLength) instead of final instr + : "=c" (dummy_value_c), // output regs/vars here, e.g., "=m" (_MMXLength) instead of final instr + "=S" (dummy_value_S), + "=D" (dummy_value_D) - : "S" (prev_row), // esi // input regs - "D" (row), // edi - "c" (bpp) // ecx + : "1" (prev_row), // esi // input regs + "2" (row), // edi + "0" (bpp) // ecx - : "%eax", "%ebx", "%ecx", // clobber list - "%edx", "%edi", "%esi" + : "%eax", "%ebx", // clobber list + "%edx" // GRR: INCLUDE "memory" as clobbered? (_dif, _MMXLength) PROBABLY ); |