diff options
author | gjl <gjl@138bc75d-0d04-0410-961f-82ee72b054a4> | 2012-11-22 10:00:13 +0000 |
---|---|---|
committer | gjl <gjl@138bc75d-0d04-0410-961f-82ee72b054a4> | 2012-11-22 10:00:13 +0000 |
commit | c8ec4eb62c989b8da4a8d4e6ba787e7785deab5a (patch) | |
tree | eabc8ea539916ca2cfbd3ccf1d3efabdd0791b90 | |
parent | be87c9ab7a7f751e2b605e1faa6be98835c47514 (diff) | |
download | gcc-c8ec4eb62c989b8da4a8d4e6ba787e7785deab5a.tar.gz |
libgcc/
Adjust decimal point of signed accum mode to GCC default.
PR target/54222
* config/avr/t-avr (LIB1ASMFUNCS): Add _fractsfsq _fractsfusq,
_divqq_helper.
* config/avr/lib1funcs-fixed.S (__fractqqsf, __fracthqsf)
(__fractsasf, __fractsfha, __fractusqsf, __fractsfsa)
(__mulha3, __mulsa3)
(__divqq3, __divha3, __divsa3): Adjust to new position of
decimal point of signed accum types.
(__mulusa3_round): New function.
(__mulusa3): Use it.
(__divqq_helper): New function.
(__udivuqq3): Use it.
gcc/
Adjust decimal point of signed accum mode to GCC default.
PR target/54222
* config/avr/avr-modes.def (HA, SA, DA): Remove mode adjustments.
(TA): Move decimal point one bit to the right.
* config/avr/avr.c (avr_out_fract): Rewrite.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@193721 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r-- | gcc/ChangeLog | 9 | ||||
-rw-r--r-- | gcc/config/avr/avr-modes.def | 17 | ||||
-rw-r--r-- | gcc/config/avr/avr.c | 668 | ||||
-rw-r--r-- | libgcc/ChangeLog | 18 | ||||
-rw-r--r-- | libgcc/config/avr/lib1funcs-fixed.S | 199 | ||||
-rw-r--r-- | libgcc/config/avr/t-avr | 4 |
6 files changed, 499 insertions, 416 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5854766ffdd..bad8461fa0c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2012-11-22 Georg-Johann Lay <avr@gjlay.de> + + Adjust decimal point of signed accum mode to GCC default. + + PR target/54222 + * config/avr/avr-modes.def (HA, SA, DA): Remove mode adjustments. + (TA): Move decimal point one bit to the right. + * config/avr/avr.c (avr_out_fract): Rewrite. + 2012-11-21 Matthias Klose <doko@ubuntu.com> * config/alpha/t-linux: New file; define MULTIARCH_DIRNAME. diff --git a/gcc/config/avr/avr-modes.def b/gcc/config/avr/avr-modes.def index 09e6b4983f0..04268cdf5b3 100644 --- a/gcc/config/avr/avr-modes.def +++ b/gcc/config/avr/avr-modes.def @@ -1,26 +1,13 @@ FRACTIONAL_INT_MODE (PSI, 24, 3); -/* On 8 bit machines it requires fewer instructions for fixed point - routines if the decimal place is on a byte boundary which is not - the default for signed accum types. */ - -ADJUST_IBIT (HA, 7); -ADJUST_FBIT (HA, 8); - -ADJUST_IBIT (SA, 15); -ADJUST_FBIT (SA, 16); - -ADJUST_IBIT (DA, 31); -ADJUST_FBIT (DA, 32); - /* Make TA and UTA 64 bits wide. 128 bit wide modes would be insane on a 8-bit machine. This needs special treatment in avr.c and avr-lib.h. */ ADJUST_BYTESIZE (TA, 8); ADJUST_ALIGNMENT (TA, 1); -ADJUST_IBIT (TA, 15); -ADJUST_FBIT (TA, 48); +ADJUST_IBIT (TA, 16); +ADJUST_FBIT (TA, 47); ADJUST_BYTESIZE (UTA, 8); ADJUST_ALIGNMENT (UTA, 1); diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c index 3d1a599265f..cc0290475cd 100644 --- a/gcc/config/avr/avr.c +++ b/gcc/config/avr/avr.c @@ -6974,6 +6974,332 @@ avr_out_addto_sp (rtx *op, int *plen) } +/* Outputs instructions needed for fixed point type conversion. + This includes converting between any fixed point type, as well + as converting to any integer type. Conversion between integer + types is not supported. + + Converting signed fractional types requires a bit shift if converting + to or from any unsigned fractional type because the decimal place is + shifted by 1 bit. When the destination is a signed fractional, the sign + is stored in either the carry or T bit. */ + +const char* +avr_out_fract (rtx insn, rtx operands[], bool intsigned, int *plen) +{ + size_t i; + rtx xop[6]; + RTX_CODE shift = UNKNOWN; + bool sign_in_carry = false; + bool msb_in_carry = false; + bool lsb_in_carry = false; + const char *code_ashift = "lsl %0"; + + +#define MAY_CLOBBER(RR) \ + /* Shorthand used below. */ \ + ((sign_bytes \ + && IN_RANGE (RR, dest.regno_msb - sign_bytes + 1, dest.regno_msb)) \ + || (reg_unused_after (insn, all_regs_rtx[RR]) \ + && !IN_RANGE (RR, dest.regno, dest.regno_msb))) + + struct + { + /* bytes : Length of operand in bytes. + ibyte : Length of integral part in bytes. + fbyte, fbit : Length of fractional part in bytes, bits. */ + + bool sbit; + unsigned fbit, bytes, ibyte, fbyte; + unsigned regno, regno_msb; + } dest, src, *val[2] = { &dest, &src }; + + if (plen) + *plen = 0; + + /* Step 0: Determine information on source and destination operand we + ====== will need in the remainder. */ + + for (i = 0; i < sizeof (val) / sizeof (*val); i++) + { + enum machine_mode mode; + + xop[i] = operands[i]; + + mode = GET_MODE (xop[i]); + + val[i]->bytes = GET_MODE_SIZE (mode); + val[i]->regno = REGNO (xop[i]); + val[i]->regno_msb = REGNO (xop[i]) + val[i]->bytes - 1; + + if (SCALAR_INT_MODE_P (mode)) + { + val[i]->sbit = intsigned; + val[i]->fbit = 0; + } + else if (ALL_SCALAR_FIXED_POINT_MODE_P (mode)) + { + val[i]->sbit = SIGNED_SCALAR_FIXED_POINT_MODE_P (mode); + val[i]->fbit = GET_MODE_FBIT (mode); + } + else + fatal_insn ("unsupported fixed-point conversion", insn); + + val[i]->fbyte = (1 + val[i]->fbit) / BITS_PER_UNIT; + val[i]->ibyte = val[i]->bytes - val[i]->fbyte; + } + + // Byte offset of the decimal point taking into account different place + // of the decimal point in input and output and different register numbers + // of input and output. + int offset = dest.regno - src.regno + dest.fbyte - src.fbyte; + + // Number of destination bytes that will come from sign / zero extension. + int sign_bytes = (dest.ibyte - src.ibyte) * (dest.ibyte > src.ibyte); + + // Number of bytes at the low end to be filled with zeros. + int zero_bytes = (dest.fbyte - src.fbyte) * (dest.fbyte > src.fbyte); + + // Do we have a 16-Bit register that is cleared? + rtx clrw = NULL_RTX; + + bool sign_extend = src.sbit && sign_bytes; + + if (0 == dest.fbit % 8 && 7 == src.fbit % 8) + shift = ASHIFT; + else if (7 == dest.fbit % 8 && 0 == src.fbit % 8) + shift = ASHIFTRT; + else if (dest.fbit % 8 == src.fbit % 8) + shift = UNKNOWN; + else + gcc_unreachable(); + + /* Step 1: Clear bytes at the low end and copy payload bits from source + ====== to destination. */ + + int step = offset < 0 ? 1 : -1; + unsigned d0 = offset < 0 ? dest.regno : dest.regno_msb; + + // We leared at least that number of registers. + int clr_n = 0; + + for (; d0 >= dest.regno && d0 <= dest.regno_msb; d0 += step) + { + // Next regno of destination is needed for MOVW + unsigned d1 = d0 + step; + + // Current and next regno of source + unsigned s0 = d0 - offset; + unsigned s1 = s0 + step; + + // Must current resp. next regno be CLRed? This applies to the low + // bytes of the destination that have no associated source bytes. + bool clr0 = s0 < src.regno; + bool clr1 = s1 < src.regno && d1 >= dest.regno; + + // First gather what code to emit (if any) and additional step to + // apply if a MOVW is in use. xop[2] is destination rtx and xop[3] + // is the source rtx for the current loop iteration. + const char *code = NULL; + int stepw = 0; + + if (clr0) + { + if (AVR_HAVE_MOVW && clr1 && clrw) + { + xop[2] = all_regs_rtx[d0 & ~1]; + xop[3] = clrw; + code = "movw %2,%3"; + stepw = step; + } + else + { + xop[2] = all_regs_rtx[d0]; + code = "clr %2"; + + if (++clr_n >= 2 + && !clrw + && d0 % 2 == (step > 0)) + { + clrw = all_regs_rtx[d0 & ~1]; + } + } + } + else if (offset && s0 <= src.regno_msb) + { + int movw = AVR_HAVE_MOVW && offset % 2 == 0 + && d0 % 2 == (offset > 0) + && d1 <= dest.regno_msb && d1 >= dest.regno + && s1 <= src.regno_msb && s1 >= src.regno; + + xop[2] = all_regs_rtx[d0 & ~movw]; + xop[3] = all_regs_rtx[s0 & ~movw]; + code = movw ? "movw %2,%3" : "mov %2,%3"; + stepw = step * movw; + } + + if (code) + { + if (sign_extend && shift != ASHIFT && !sign_in_carry + && (d0 == src.regno_msb || d0 + stepw == src.regno_msb)) + { + /* We are going to override the sign bit. If we sign-extend, + store the sign in the Carry flag. This is not needed if + the destination will be ASHIFT is the remainder because + the ASHIFT will set Carry without extra instruction. */ + + avr_asm_len ("lsl %0", &all_regs_rtx[src.regno_msb], plen, 1); + sign_in_carry = true; + } + + unsigned src_msb = dest.regno_msb - sign_bytes - offset + 1; + + if (!sign_extend && shift == ASHIFTRT && !msb_in_carry + && src.ibyte > dest.ibyte + && (d0 == src_msb || d0 + stepw == src_msb)) + { + /* We are going to override the MSB. If we shift right, + store the MSB in the Carry flag. This is only needed if + we don't sign-extend becaue with sign-extension the MSB + (the sign) will be produced by the sign extension. */ + + avr_asm_len ("lsr %0", &all_regs_rtx[src_msb], plen, 1); + msb_in_carry = true; + } + + unsigned src_lsb = dest.regno - offset -1; + + if (shift == ASHIFT && src.fbyte > dest.fbyte && !lsb_in_carry + && (d0 == src_lsb || d0 + stepw == src_lsb)) + { + /* We are going to override the new LSB; store it into carry. */ + + avr_asm_len ("lsl %0", &all_regs_rtx[src_lsb], plen, 1); + code_ashift = "rol %0"; + lsb_in_carry = true; + } + + avr_asm_len (code, xop, plen, 1); + d0 += stepw; + } + } + + /* Step 2: Shift destination left by 1 bit position. This might be needed + ====== for signed input and unsigned output. */ + + if (shift == ASHIFT && src.fbyte > dest.fbyte && !lsb_in_carry) + { + unsigned s0 = dest.regno - offset -1; + + if (MAY_CLOBBER (s0)) + avr_asm_len ("lsl %0", &all_regs_rtx[s0], plen, 1); + else + avr_asm_len ("mov __tmp_reg__,%0" CR_TAB + "lsl __tmp_reg__", &all_regs_rtx[s0], plen, 2); + + code_ashift = "rol %0"; + lsb_in_carry = true; + } + + if (shift == ASHIFT) + { + for (d0 = dest.regno + zero_bytes; + d0 <= dest.regno_msb - sign_bytes; d0++) + { + avr_asm_len (code_ashift, &all_regs_rtx[d0], plen, 1); + code_ashift = "rol %0"; + } + + lsb_in_carry = false; + sign_in_carry = true; + } + + /* Step 4a: Store MSB in carry if we don't already have it or will produce + ======= it in sign-extension below. */ + + if (!sign_extend && shift == ASHIFTRT && !msb_in_carry + && src.ibyte > dest.ibyte) + { + unsigned s0 = dest.regno_msb - sign_bytes - offset + 1; + + if (MAY_CLOBBER (s0)) + avr_asm_len ("lsr %0", &all_regs_rtx[s0], plen, 1); + else + avr_asm_len ("mov __tmp_reg__,%0" CR_TAB + "lsr __tmp_reg__", &all_regs_rtx[s0], plen, 2); + + msb_in_carry = true; + } + + /* Step 3: Sign-extend or zero-extend the destination as needed. + ====== */ + + if (sign_extend && !sign_in_carry) + { + unsigned s0 = src.regno_msb; + + if (MAY_CLOBBER (s0)) + avr_asm_len ("lsl %0", &all_regs_rtx[s0], plen, 1); + else + avr_asm_len ("mov __tmp_reg__,%0" CR_TAB + "lsl __tmp_reg__", &all_regs_rtx[s0], plen, 2); + + sign_in_carry = true; + } + + gcc_assert (sign_in_carry + msb_in_carry + lsb_in_carry <= 1); + + unsigned copies = 0; + rtx movw = sign_extend ? NULL_RTX : clrw; + + for (d0 = dest.regno_msb - sign_bytes + 1; d0 <= dest.regno_msb; d0++) + { + if (AVR_HAVE_MOVW && movw + && d0 % 2 == 0 && d0 + 1 <= dest.regno_msb) + { + xop[2] = all_regs_rtx[d0]; + xop[3] = movw; + avr_asm_len ("movw %2,%3", xop, plen, 1); + d0++; + } + else + { + avr_asm_len (sign_extend ? "sbc %0,%0" : "clr %0", + &all_regs_rtx[d0], plen, 1); + + if (++copies >= 2 && !movw && d0 % 2 == 1) + movw = all_regs_rtx[d0-1]; + } + } /* for */ + + + /* Step 4: Right shift the destination. This might be needed for + ====== conversions from unsigned to signed. */ + + if (shift == ASHIFTRT) + { + const char *code_ashiftrt = "lsr %0"; + + if (sign_extend || msb_in_carry) + code_ashiftrt = "ror %0"; + + if (src.sbit && src.ibyte == dest.ibyte) + code_ashiftrt = "asr %0"; + + for (d0 = dest.regno_msb - sign_bytes; + d0 >= dest.regno + zero_bytes - 1 && d0 >= dest.regno; d0--) + { + avr_asm_len (code_ashiftrt, &all_regs_rtx[d0], plen, 1); + code_ashiftrt = "ror %0"; + } + } + +#undef MAY_CLOBBER + + return ""; +} + + /* Create RTL split patterns for byte sized rotate expressions. This produces a series of move instructions and considers overlap situations. Overlapping non-HImode operands need a scratch register. */ @@ -7123,348 +7449,6 @@ avr_rotate_bytes (rtx operands[]) } -/* Outputs instructions needed for fixed point type conversion. - This includes converting between any fixed point type, as well - as converting to any integer type. Conversion between integer - types is not supported. - - The number of instructions generated depends on the types - being converted and the registers assigned to them. - - The number of instructions required to complete the conversion - is least if the registers for source and destination are overlapping - and are aligned at the decimal place as actual movement of data is - completely avoided. In some cases, the conversion may already be - complete without any instructions needed. - - When converting to signed types from signed types, sign extension - is implemented. - - Converting signed fractional types requires a bit shift if converting - to or from any unsigned fractional type because the decimal place is - shifted by 1 bit. When the destination is a signed fractional, the sign - is stored in either the carry or T bit. */ - -const char* -avr_out_fract (rtx insn, rtx operands[], bool intsigned, int *plen) -{ - int i; - bool sbit[2]; - /* ilen: Length of integral part (in bytes) - flen: Length of fractional part (in bytes) - tlen: Length of operand (in bytes) - blen: Length of operand (in bits) */ - int ilen[2], flen[2], tlen[2], blen[2]; - int rdest, rsource, offset; - int start, end, dir; - bool sign_in_T = false, sign_in_Carry = false, sign_done = false; - bool widening_sign_extend = false; - int clrword = -1, lastclr = 0, clr = 0; - rtx xop[6]; - - const int dest = 0; - const int src = 1; - - xop[dest] = operands[dest]; - xop[src] = operands[src]; - - if (plen) - *plen = 0; - - /* Determine format (integer and fractional parts) - of types needing conversion. */ - - for (i = 0; i < 2; i++) - { - enum machine_mode mode = GET_MODE (xop[i]); - - tlen[i] = GET_MODE_SIZE (mode); - blen[i] = GET_MODE_BITSIZE (mode); - - if (SCALAR_INT_MODE_P (mode)) - { - sbit[i] = intsigned; - ilen[i] = GET_MODE_SIZE (mode); - flen[i] = 0; - } - else if (ALL_SCALAR_FIXED_POINT_MODE_P (mode)) - { - sbit[i] = SIGNED_SCALAR_FIXED_POINT_MODE_P (mode); - ilen[i] = (GET_MODE_IBIT (mode) + 1) / 8; - flen[i] = (GET_MODE_FBIT (mode) + 1) / 8; - } - else - fatal_insn ("unsupported fixed-point conversion", insn); - } - - /* Perform sign extension if source and dest are both signed, - and there are more integer parts in dest than in source. */ - - widening_sign_extend = sbit[dest] && sbit[src] && ilen[dest] > ilen[src]; - - rdest = REGNO (xop[dest]); - rsource = REGNO (xop[src]); - offset = flen[src] - flen[dest]; - - /* Position of MSB resp. sign bit. */ - - xop[2] = GEN_INT (blen[dest] - 1); - xop[3] = GEN_INT (blen[src] - 1); - - /* Store the sign bit if the destination is a signed fract and the source - has a sign in the integer part. */ - - if (sbit[dest] && ilen[dest] == 0 && sbit[src] && ilen[src] > 0) - { - /* To avoid using BST and BLD if the source and destination registers - overlap or the source is unused after, we can use LSL to store the - sign bit in carry since we don't need the integral part of the source. - Restoring the sign from carry saves one BLD instruction below. */ - - if (reg_unused_after (insn, xop[src]) - || (rdest < rsource + tlen[src] - && rdest + tlen[dest] > rsource)) - { - avr_asm_len ("lsl %T1%t3", xop, plen, 1); - sign_in_Carry = true; - } - else - { - avr_asm_len ("bst %T1%T3", xop, plen, 1); - sign_in_T = true; - } - } - - /* Pick the correct direction to shift bytes. */ - - if (rdest < rsource + offset) - { - dir = 1; - start = 0; - end = tlen[dest]; - } - else - { - dir = -1; - start = tlen[dest] - 1; - end = -1; - } - - /* Perform conversion by moving registers into place, clearing - destination registers that do not overlap with any source. */ - - for (i = start; i != end; i += dir) - { - int destloc = rdest + i; - int sourceloc = rsource + i + offset; - - /* Source register location is outside range of source register, - so clear this byte in the dest. */ - - if (sourceloc < rsource - || sourceloc >= rsource + tlen[src]) - { - if (AVR_HAVE_MOVW - && i + dir != end - && (sourceloc + dir < rsource - || sourceloc + dir >= rsource + tlen[src]) - && ((dir == 1 && !(destloc % 2) && !(sourceloc % 2)) - || (dir == -1 && (destloc % 2) && (sourceloc % 2))) - && clrword != -1) - { - /* Use already cleared word to clear two bytes at a time. */ - - int even_i = i & ~1; - int even_clrword = clrword & ~1; - - xop[4] = GEN_INT (8 * even_i); - xop[5] = GEN_INT (8 * even_clrword); - avr_asm_len ("movw %T0%t4,%T0%t5", xop, plen, 1); - i += dir; - } - else - { - if (i == tlen[dest] - 1 - && widening_sign_extend - && blen[src] - 1 - 8 * offset < 0) - { - /* The SBRC below that sign-extends would come - up with a negative bit number because the sign - bit is out of reach. ALso avoid some early-clobber - situations because of premature CLR. */ - - if (reg_unused_after (insn, xop[src])) - avr_asm_len ("lsl %T1%t3" CR_TAB - "sbc %T0%t2,%T0%t2", xop, plen, 2); - else - avr_asm_len ("mov __tmp_reg__,%T1%t3" CR_TAB - "lsl __tmp_reg__" CR_TAB - "sbc %T0%t2,%T0%t2", xop, plen, 3); - sign_done = true; - - continue; - } - - /* Do not clear the register if it is going to get - sign extended with a MOV later. */ - - if (sbit[dest] && sbit[src] - && i != tlen[dest] - 1 - && i >= flen[dest]) - { - continue; - } - - xop[4] = GEN_INT (8 * i); - avr_asm_len ("clr %T0%t4", xop, plen, 1); - - /* If the last byte was cleared too, we have a cleared - word we can MOVW to clear two bytes at a time. */ - - if (lastclr) - clrword = i; - - clr = 1; - } - } - else if (destloc == sourceloc) - { - /* Source byte is already in destination: Nothing needed. */ - - continue; - } - else - { - /* Registers do not line up and source register location - is within range: Perform move, shifting with MOV or MOVW. */ - - if (AVR_HAVE_MOVW - && i + dir != end - && sourceloc + dir >= rsource - && sourceloc + dir < rsource + tlen[src] - && ((dir == 1 && !(destloc % 2) && !(sourceloc % 2)) - || (dir == -1 && (destloc % 2) && (sourceloc % 2)))) - { - int even_i = i & ~1; - int even_i_plus_offset = (i + offset) & ~1; - - xop[4] = GEN_INT (8 * even_i); - xop[5] = GEN_INT (8 * even_i_plus_offset); - avr_asm_len ("movw %T0%t4,%T1%t5", xop, plen, 1); - i += dir; - } - else - { - xop[4] = GEN_INT (8 * i); - xop[5] = GEN_INT (8 * (i + offset)); - avr_asm_len ("mov %T0%t4,%T1%t5", xop, plen, 1); - } - } - - lastclr = clr; - clr = 0; - } - - /* Perform sign extension if source and dest are both signed, - and there are more integer parts in dest than in source. */ - - if (widening_sign_extend) - { - if (!sign_done) - { - xop[4] = GEN_INT (blen[src] - 1 - 8 * offset); - - /* Register was cleared above, so can become 0xff and extended. - Note: Instead of the CLR/SBRC/COM the sign extension could - be performed after the LSL below by means of a SBC if only - one byte has to be shifted left. */ - - avr_asm_len ("sbrc %T0%T4" CR_TAB - "com %T0%t2", xop, plen, 2); - } - - /* Sign extend additional bytes by MOV and MOVW. */ - - start = tlen[dest] - 2; - end = flen[dest] + ilen[src] - 1; - - for (i = start; i != end; i--) - { - if (AVR_HAVE_MOVW && i != start && i-1 != end) - { - i--; - xop[4] = GEN_INT (8 * i); - xop[5] = GEN_INT (8 * (tlen[dest] - 2)); - avr_asm_len ("movw %T0%t4,%T0%t5", xop, plen, 1); - } - else - { - xop[4] = GEN_INT (8 * i); - xop[5] = GEN_INT (8 * (tlen[dest] - 1)); - avr_asm_len ("mov %T0%t4,%T0%t5", xop, plen, 1); - } - } - } - - /* If destination is a signed fract, and the source was not, a shift - by 1 bit is needed. Also restore sign from carry or T. */ - - if (sbit[dest] && !ilen[dest] && (!sbit[src] || ilen[src])) - { - /* We have flen[src] non-zero fractional bytes to shift. - Because of the right shift, handle one byte more so that the - LSB won't be lost. */ - - int nonzero = flen[src] + 1; - - /* If the LSB is in the T flag and there are no fractional - bits, the high byte is zero and no shift needed. */ - - if (flen[src] == 0 && sign_in_T) - nonzero = 0; - - start = flen[dest] - 1; - end = start - nonzero; - - for (i = start; i > end && i >= 0; i--) - { - xop[4] = GEN_INT (8 * i); - if (i == start && !sign_in_Carry) - avr_asm_len ("lsr %T0%t4", xop, plen, 1); - else - avr_asm_len ("ror %T0%t4", xop, plen, 1); - } - - if (sign_in_T) - { - avr_asm_len ("bld %T0%T2", xop, plen, 1); - } - } - else if (sbit[src] && !ilen[src] && (!sbit[dest] || ilen[dest])) - { - /* If source was a signed fract and dest was not, shift 1 bit - other way. */ - - start = flen[dest] - flen[src]; - - if (start < 0) - start = 0; - - for (i = start; i < flen[dest]; i++) - { - xop[4] = GEN_INT (8 * i); - - if (i == start) - avr_asm_len ("lsl %T0%t4", xop, plen, 1); - else - avr_asm_len ("rol %T0%t4", xop, plen, 1); - } - } - - return ""; -} - - /* Modifies the length assigned to instruction INSN LEN is the initially computed length of the insn. */ diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog index 9f704d83834..c547dcb06c6 100644 --- a/libgcc/ChangeLog +++ b/libgcc/ChangeLog @@ -1,3 +1,21 @@ +2012-11-22 Georg-Johann Lay <avr@gjlay.de> + + Adjust decimal point of signed accum mode to GCC default. + + PR target/54222 + * config/avr/t-avr (LIB1ASMFUNCS): Add _fractsfsq _fractsfusq, + _divqq_helper. + * config/avr/lib1funcs-fixed.S (__fractqqsf, __fracthqsf) + (__fractsasf, __fractsfha, __fractusqsf, __fractsfsa) + (__mulha3, __mulsa3) + (__divqq3, __divha3, __divsa3): Adjust to new position of + decimal point of signed accum types. + + (__mulusa3_round): New function. + (__mulusa3): Use it. + (__divqq_helper): New function. + (__udivuqq3): Use it. + 2012-11-20 Jakub Jelinek <jakub@redhat.com> PR bootstrap/55370 diff --git a/libgcc/config/avr/lib1funcs-fixed.S b/libgcc/config/avr/lib1funcs-fixed.S index ddcd02e197c..a9fd7d91f20 100644 --- a/libgcc/config/avr/lib1funcs-fixed.S +++ b/libgcc/config/avr/lib1funcs-fixed.S @@ -43,8 +43,8 @@ DEFUN __fractqqsf ;; Move in place for SA -> SF conversion clr r22 mov r23, r24 - lsl r23 ;; Sign-extend + lsl r24 sbc r24, r24 mov r25, r24 XJMP __fractsasf @@ -67,9 +67,8 @@ ENDF __fractuqqsf DEFUN __fracthqsf ;; Move in place for SA -> SF conversion wmov 22, 24 - lsl r22 - rol r23 ;; Sign-extend + lsl r25 sbc r24, r24 mov r25, r24 XJMP __fractsasf @@ -140,11 +139,13 @@ ENDF __fractusqsf #if defined (L_fractsasf) DEFUN __fractsasf XCALL __floatsisf - ;; Divide non-zero results by 2^16 to move the + ;; Divide non-zero results by 2^15 to move the ;; decimal point into place - cpse r25, __zero_reg__ - subi r25, exp_hi (16) - ret + tst r25 + breq 0f + subi r24, exp_lo (15) + sbci r25, exp_hi (15) +0: ret ENDF __fractsasf #endif /* L_fractsasf */ @@ -186,8 +187,9 @@ ENDF __fractsfuqq #if defined (L_fractsfha) DEFUN __fractsfha - ;; Multiply with 2^24 to get a HA result in r25:r24 - subi r25, exp_hi (-24) + ;; Multiply with 2^{16+7} to get a HA result in r25:r24 + subi r24, exp_lo (-23) + sbci r25, exp_hi (-23) XJMP __fixsfsi ENDF __fractsfha #endif /* L_fractsfha */ @@ -201,8 +203,7 @@ ENDF __fractsfuha #endif /* L_fractsfuha */ #if defined (L_fractsfhq) -DEFUN __fractsfsq -ENDF __fractsfsq +FALIAS __fractsfsq DEFUN __fractsfhq ;; Multiply with 2^{16+15} to get a HQ result in r25:r24 @@ -214,8 +215,7 @@ ENDF __fractsfhq #endif /* L_fractsfhq */ #if defined (L_fractsfuhq) -DEFUN __fractsfusq -ENDF __fractsfusq +FALIAS __fractsfusq DEFUN __fractsfuhq ;; Multiply with 2^{16+16} to get a UHQ result in r25:r24 @@ -227,8 +227,9 @@ ENDF __fractsfuhq #if defined (L_fractsfsa) DEFUN __fractsfsa - ;; Multiply with 2^16 to get a SA result in r25:r22 - subi r25, exp_hi (-16) + ;; Multiply with 2^15 to get a SA result in r25:r22 + subi r24, exp_lo (-15) + sbci r25, exp_hi (-15) XJMP __fixsfsi ENDF __fractsfsa #endif /* L_fractsfsa */ @@ -325,6 +326,9 @@ ENDF __muluhq3 ;;; Rounding: -0.5 LSB <= error <= 0.5 LSB DEFUN __mulha3 XCALL __mulhisi3 + lsl r22 + rol r23 + rol r24 XJMP __muluha3_round ENDF __mulha3 #endif /* L_mulha3 */ @@ -359,6 +363,9 @@ ENDF __muluha3_round Fixed Multiplication 16.16 x 16.16 *******************************************************/ +;; Bits outside the result (below LSB), used in the signed version +#define GUARD __tmp_reg__ + #if defined (__AVR_HAVE_MUL__) ;; Multiplier @@ -381,9 +388,16 @@ ENDF __muluha3_round #if defined (L_mulusa3) ;;; (C3:C0) = (A3:A0) * (B3:B0) -;;; Clobbers: __tmp_reg__ -;;; Rounding: -0.5 LSB < error <= 0.5 LSB -DEFUN __mulusa3 +DEFUN __mulusa3 + set + ;; Fallthru +ENDF __mulusa3 + +;;; Round for last digit iff T = 1 +;;; Return guard bits in GUARD (__tmp_reg__). +;;; Rounding, T = 0: -1.0 LSB < error <= 0 LSB +;;; Rounding, T = 1: -0.5 LSB < error <= 0.5 LSB +DEFUN __mulusa3_round ;; Some of the MUL instructions have LSBs outside the result. ;; Don't ignore these LSBs in order to tame rounding error. ;; Use C2/C3 for these LSBs. @@ -395,9 +409,12 @@ DEFUN __mulusa3 mul A1, B0 $ add C3, r0 $ adc C0, r1 mul A0, B1 $ add C3, r0 $ adc C0, r1 $ rol C1 - ;; Round + ;; Round if T = 1. Store guarding bits outside the result for rounding + ;; and left-shift by the signed version (function below). + brtc 0f sbrc C3, 7 adiw C0, 1 +0: push C3 ;; The following MULs don't have LSBs outside the result. ;; C2/C3 is the high part. @@ -420,25 +437,42 @@ DEFUN __mulusa3 mul A2, B3 $ add C3, r0 mul A3, B2 $ add C3, r0 + ;; Guard bits used in the signed version below. + pop GUARD clr __zero_reg__ ret -ENDF __mulusa3 +ENDF __mulusa3_round #endif /* L_mulusa3 */ #if defined (L_mulsa3) ;;; (C3:C0) = (A3:A0) * (B3:B0) -;;; Clobbers: __tmp_reg__ +;;; Clobbers: __tmp_reg__, T ;;; Rounding: -0.5 LSB <= error <= 0.5 LSB DEFUN __mulsa3 - XCALL __mulusa3 + clt + XCALL __mulusa3_round + ;; A posteriori sign extension of the operands tst B3 - brpl 1f + brpl 1f sub C2, A0 sbc C3, A1 1: sbrs A3, 7 - ret + rjmp 2f sub C2, B0 sbc C3, B1 +2: + ;; Shift 1 bit left to adjust for 15 fractional bits + lsl GUARD + rol C0 + rol C1 + rol C2 + rol C3 + ;; Round last digit + lsl GUARD + adc C0, __zero_reg__ + adc C1, __zero_reg__ + adc C2, __zero_reg__ + adc C3, __zero_reg__ ret ENDF __mulsa3 #endif /* L_mulsa3 */ @@ -492,27 +526,56 @@ ENDF __mulsa3 DEFUN __mulsa3 push B0 push B1 - bst B3, 7 - XCALL __mulusa3 - ;; A survived in 31:30:27:26 - rcall 1f - pop AA1 - pop AA0 + push B3 + clt + XCALL __mulusa3_round + pop r30 + ;; sign-extend B + bst r30, 7 + brtc 1f + ;; A1, A0 survived in R27:R26 + sub C2, AA0 + sbc C3, AA1 +1: + pop AA1 ;; B1 + pop AA0 ;; B0 + + ;; sign-extend A. A3 survived in R31 bst AA3, 7 -1: brtc 9f - ;; 1-extend A/B + brtc 2f sub C2, AA0 sbc C3, AA1 -9: ret +2: + ;; Shift 1 bit left to adjust for 15 fractional bits + lsl GUARD + rol C0 + rol C1 + rol C2 + rol C3 + ;; Round last digit + lsl GUARD + adc C0, __zero_reg__ + adc C1, __zero_reg__ + adc C2, __zero_reg__ + adc C3, __zero_reg__ + ret ENDF __mulsa3 #endif /* L_mulsa3 */ #if defined (L_mulusa3) ;;; (R25:R22) *= (R21:R18) -;;; Clobbers: ABI, called by optabs and __mulsua +;;; Clobbers: ABI, called by optabs ;;; Rounding: -1 LSB <= error <= 1 LSB -;;; Does not clobber T and A[] survives in 26, 27, 30, 31 -DEFUN __mulusa3 +DEFUN __mulusa3 + set + ;; Fallthru +ENDF __mulusa3 + +;;; A[] survives in 26, 27, 30, 31 +;;; Also used by __mulsa3 with T = 0 +;;; Round if T = 1 +;;; Return Guard bits in GUARD (__tmp_reg__), used by signed version. +DEFUN __mulusa3_round push CC2 push CC3 ; clear result @@ -560,21 +623,26 @@ DEFUN __mulusa3 sbci B0, 0 brne 5b - ;; Move result into place and round + ;; Save guard bits and set carry for rounding + push B3 lsl B3 + ;; Move result into place wmov C2, CC2 wmov C0, CC0 clr __zero_reg__ + brtc 6f + ;; Round iff T = 1 adc C0, __zero_reg__ adc C1, __zero_reg__ adc C2, __zero_reg__ adc C3, __zero_reg__ - +6: + pop GUARD ;; Epilogue pop CC3 pop CC2 ret -ENDF __mulusa3 +ENDF __mulusa3_round #endif /* L_mulusa3 */ #undef A0 @@ -600,6 +668,8 @@ ENDF __mulusa3 #endif /* __AVR_HAVE_MUL__ */ +#undef GUARD + /******************************************************* Fractional Division 8 / 8 *******************************************************/ @@ -607,30 +677,38 @@ ENDF __mulusa3 #define r_divd r25 /* dividend */ #define r_quo r24 /* quotient */ #define r_div r22 /* divisor */ +#define r_sign __tmp_reg__ #if defined (L_divqq3) DEFUN __divqq3 - mov r0, r_divd - eor r0, r_div + mov r_sign, r_divd + eor r_sign, r_div sbrc r_div, 7 neg r_div sbrc r_divd, 7 neg r_divd - cp r_divd, r_div - breq __divqq3_minus1 ; if equal return -1 - XCALL __udivuqq3 + XCALL __divqq_helper lsr r_quo - sbrc r0, 7 ; negate result if needed + sbrc r_sign, 7 ; negate result if needed neg r_quo ret -__divqq3_minus1: - ldi r_quo, 0x80 - ret ENDF __divqq3 -#endif /* defined (L_divqq3) */ +#endif /* L_divqq3 */ #if defined (L_udivuqq3) DEFUN __udivuqq3 + cp r_divd, r_div + brsh 0f + XJMP __divqq_helper + ;; Result is out of [0, 1) ==> Return 1 - eps. +0: ldi r_quo, 0xff + ret +ENDF __udivuqq3 +#endif /* L_udivuqq3 */ + + +#if defined (L_divqq_helper) +DEFUN __divqq_helper clr r_quo ; clear quotient inc __zero_reg__ ; init loop counter, used per shift __udivuqq3_loop: @@ -649,12 +727,13 @@ __udivuqq3_cont: com r_quo ; complement result ; because C flag was complemented in loop ret -ENDF __udivuqq3 -#endif /* defined (L_udivuqq3) */ +ENDF __divqq_helper +#endif /* L_divqq_helper */ #undef r_divd #undef r_quo #undef r_div +#undef r_sign /******************************************************* @@ -746,6 +825,8 @@ DEFUN __divha3 NEG2 r_divdL 2: XCALL __udivuha3 + lsr r_quoH ; adjust to 7 fractional bits + ror r_quoL sbrs r0, 7 ; negate result if needed ret NEG2 r_quoL @@ -806,6 +887,10 @@ DEFUN __divsa3 NEG4 r_arg1L 2: XCALL __udivusa3 + lsr r_quoHH ; adjust to 15 fractional bits + ror r_quoHL + ror r_quoH + ror r_quoL sbrs r0, 7 ; negate result if needed ret ;; negate r_quoL @@ -1024,8 +1109,8 @@ DEFUN __usadd_8 XCALL __adddi3 brcs 0f ret - ;; A[] = 0xffffffff -0: XJMP __sbc_8 +0: ;; A[] = 0xffffffff + XJMP __sbc_8 ENDF __usadd_8 #endif /* L_usadd_8 */ @@ -1038,8 +1123,8 @@ DEFUN __ussub_8 XCALL __subdi3 brcs 0f ret - ;; A[] = 0 -0: XJMP __clr_8 +0: ;; A[] = 0 + XJMP __clr_8 ENDF __ussub_8 #endif /* L_ussub_8 */ @@ -1049,9 +1134,9 @@ FALIAS __ssaddda3 FALIAS __ssadddq3 DEFUN __ssadd_8 - ;; A = (B >= 0) ? INT64_MAX : INT64_MIN XCALL __adddi3 brvc 0f + ;; A = (B >= 0) ? INT64_MAX : INT64_MIN cpi B7, 0x80 XCALL __sbc_8 subi A7, 0x80 @@ -1067,7 +1152,7 @@ FALIAS __sssubdq3 DEFUN __sssub_8 XCALL __subdi3 brvc 0f - ;; A = (B < 0) ? INT64_MAX : INT64_MIN + ;; A = (B < 0) ? INT64_MAX : INT64_MIN ldi A7, 0x7f cp A7, B7 XCALL __sbc_8 diff --git a/libgcc/config/avr/t-avr b/libgcc/config/avr/t-avr index d1f55e408d3..d609199168d 100644 --- a/libgcc/config/avr/t-avr +++ b/libgcc/config/avr/t-avr @@ -64,12 +64,12 @@ LIB1ASMFUNCS += \ \ _fractsfqq _fractsfuqq \ _fractsfhq _fractsfuhq _fractsfha _fractsfuha \ - _fractsfsa _fractsfusa \ + _fractsfsq _fractsfusq _fractsfsa _fractsfusa \ _mulqq3 \ _mulhq3 _muluhq3 \ _mulha3 _muluha3 _muluha3_round \ _mulsa3 _mulusa3 \ - _divqq3 _udivuqq3 \ + _divqq3 _udivuqq3 _divqq_helper \ _divhq3 _udivuhq3 \ _divha3 _udivuha3 \ _divsa3 _udivusa3 \ |