diff options
Diffstat (limited to 'libgcc/config')
-rw-r--r-- | libgcc/config/tilegx/sfp-machine.h | 5 | ||||
-rw-r--r-- | libgcc/config/tilegx/sfp-machine32.h | 61 | ||||
-rw-r--r-- | libgcc/config/tilegx/sfp-machine64.h | 61 | ||||
-rw-r--r-- | libgcc/config/tilegx/t-crtstuff | 4 | ||||
-rw-r--r-- | libgcc/config/tilegx/t-softfp | 1 | ||||
-rw-r--r-- | libgcc/config/tilegx/t-tilegx | 26 | ||||
-rw-r--r-- | libgcc/config/tilepro/atomic.c | 232 | ||||
-rw-r--r-- | libgcc/config/tilepro/atomic.h | 428 | ||||
-rw-r--r-- | libgcc/config/tilepro/linux-unwind.h | 100 | ||||
-rw-r--r-- | libgcc/config/tilepro/sfp-machine.h | 56 | ||||
-rw-r--r-- | libgcc/config/tilepro/softdivide.c | 354 | ||||
-rw-r--r-- | libgcc/config/tilepro/softmpy.S | 95 | ||||
-rw-r--r-- | libgcc/config/tilepro/t-crtstuff | 4 | ||||
-rw-r--r-- | libgcc/config/tilepro/t-tilepro | 33 |
14 files changed, 1460 insertions, 0 deletions
diff --git a/libgcc/config/tilegx/sfp-machine.h b/libgcc/config/tilegx/sfp-machine.h new file mode 100644 index 00000000000..1ce84e10943 --- /dev/null +++ b/libgcc/config/tilegx/sfp-machine.h @@ -0,0 +1,5 @@ +#ifdef __tilegx32__ +#include "config/tilegx/sfp-machine32.h" +#else +#include "config/tilegx/sfp-machine64.h" +#endif diff --git a/libgcc/config/tilegx/sfp-machine32.h b/libgcc/config/tilegx/sfp-machine32.h new file mode 100644 index 00000000000..37841000167 --- /dev/null +++ b/libgcc/config/tilegx/sfp-machine32.h @@ -0,0 +1,61 @@ +#define _FP_W_TYPE_SIZE 32 +#define _FP_W_TYPE unsigned long +#define _FP_WS_TYPE signed long +#define _FP_I_TYPE long + +typedef int TItype __attribute__ ((mode (TI))); +typedef unsigned int UTItype __attribute__ ((mode (TI))); + +#define TI_BITS (__CHAR_BIT__ * (int)sizeof(TItype)) + +/* The type of the result of a floating point comparison. This must + match `__libgcc_cmp_return__' in GCC for the target. */ +typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__))); +#define CMPtype __gcc_CMPtype + +#define _FP_MUL_MEAT_S(R,X,Y) \ + _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_D(R,X,Y) \ + _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_Q(R,X,Y) \ + _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm) + +#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_loop(S,R,X,Y) +#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv(D,R,X,Y) +#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_4_udiv(Q,R,X,Y) + +#define _FP_NANFRAC_S _FP_QNANBIT_S +#define _FP_NANFRAC_D _FP_QNANBIT_D, 0 +#define _FP_NANFRAC_Q _FP_QNANBIT_Q, 0, 0, 0 +#define _FP_NANSIGN_S 1 +#define _FP_NANSIGN_D 1 +#define _FP_NANSIGN_Q 1 + +#define _FP_KEEPNANFRACP 1 + +#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \ + do { \ + if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs) \ + && !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs)) \ + { \ + R##_s = Y##_s; \ + _FP_FRAC_COPY_##wc(R,Y); \ + } \ + else \ + { \ + R##_s = X##_s; \ + _FP_FRAC_COPY_##wc(R,X); \ + } \ + R##_c = FP_CLS_NAN; \ + } while (0) + +#define __LITTLE_ENDIAN 1234 +#define __BIG_ENDIAN 4321 + +#define __BYTE_ORDER __LITTLE_ENDIAN + +/* Define ALIASNAME as a strong alias for NAME. */ +# define strong_alias(name, aliasname) _strong_alias(name, aliasname) +# define _strong_alias(name, aliasname) \ + extern __typeof (name) aliasname __attribute__ ((alias (#name))); + diff --git a/libgcc/config/tilegx/sfp-machine64.h b/libgcc/config/tilegx/sfp-machine64.h new file mode 100644 index 00000000000..88bdcf5a318 --- /dev/null +++ b/libgcc/config/tilegx/sfp-machine64.h @@ -0,0 +1,61 @@ +#define _FP_W_TYPE_SIZE 64 +#define _FP_W_TYPE unsigned long +#define _FP_WS_TYPE signed long +#define _FP_I_TYPE long + +typedef int TItype __attribute__ ((mode (TI))); +typedef unsigned int UTItype __attribute__ ((mode (TI))); + +#define TI_BITS (__CHAR_BIT__ * (int)sizeof(TItype)) + +/* The type of the result of a floating point comparison. This must + match `__libgcc_cmp_return__' in GCC for the target. */ +typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__))); +#define CMPtype __gcc_CMPtype + +#define _FP_MUL_MEAT_S(R,X,Y) \ + _FP_MUL_MEAT_1_imm(_FP_WFRACBITS_S,R,X,Y) +#define _FP_MUL_MEAT_D(R,X,Y) \ + _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_Q(R,X,Y) \ + _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm) + +#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_imm(S,R,X,Y,_FP_DIV_HELP_imm) +#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_1_udiv_norm(D,R,X,Y) +#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_2_udiv(Q,R,X,Y) + +#define _FP_NANFRAC_S _FP_QNANBIT_S +#define _FP_NANFRAC_D _FP_QNANBIT_D +#define _FP_NANFRAC_Q _FP_QNANBIT_Q, 0 +#define _FP_NANSIGN_S 1 +#define _FP_NANSIGN_D 1 +#define _FP_NANSIGN_Q 1 + +#define _FP_KEEPNANFRACP 1 + +#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \ + do { \ + if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs) \ + && !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs)) \ + { \ + R##_s = Y##_s; \ + _FP_FRAC_COPY_##wc(R,Y); \ + } \ + else \ + { \ + R##_s = X##_s; \ + _FP_FRAC_COPY_##wc(R,X); \ + } \ + R##_c = FP_CLS_NAN; \ + } while (0) + +#define __LITTLE_ENDIAN 1234 +#define __BIG_ENDIAN 4321 + +#define __BYTE_ORDER __LITTLE_ENDIAN + +/* Define ALIASNAME as a strong alias for NAME. */ +# define strong_alias(name, aliasname) _strong_alias(name, aliasname) +# define _strong_alias(name, aliasname) \ + extern __typeof (name) aliasname __attribute__ ((alias (#name))); + diff --git a/libgcc/config/tilegx/t-crtstuff b/libgcc/config/tilegx/t-crtstuff new file mode 100644 index 00000000000..eddc45ce9d8 --- /dev/null +++ b/libgcc/config/tilegx/t-crtstuff @@ -0,0 +1,4 @@ +# crtend*.o cannot be compiled without -fno-asynchronous-unwind-tables, +# because then __FRAME_END__ might not be the last thing in .eh_frame +# section. +CRTSTUFF_T_CFLAGS += -fno-asynchronous-unwind-tables diff --git a/libgcc/config/tilegx/t-softfp b/libgcc/config/tilegx/t-softfp new file mode 100644 index 00000000000..a1e3513e288 --- /dev/null +++ b/libgcc/config/tilegx/t-softfp @@ -0,0 +1 @@ +softfp_int_modes += ti diff --git a/libgcc/config/tilegx/t-tilegx b/libgcc/config/tilegx/t-tilegx new file mode 100644 index 00000000000..2fb64454c36 --- /dev/null +++ b/libgcc/config/tilegx/t-tilegx @@ -0,0 +1,26 @@ +LIB2ADD += \ + $(srcdir)/config/tilepro/atomic.c + +SOFTDIVIDE_FUNCS := \ + _tile_udivsi3 \ + _tile_divsi3 \ + _tile_udivdi3 \ + _tile_divdi3 \ + _tile_umodsi3 \ + _tile_modsi3 \ + _tile_umoddi3 \ + _tile_moddi3 + +softdivide-o = $(patsubst %,%$(objext),$(SOFTDIVIDE_FUNCS)) +$(softdivide-o): %$(objext): $(srcdir)/config/tilepro/softdivide.c + $(gcc_compile) -ffunction-sections -DMAYBE_STATIC= -DL$* -c $< \ + $(vis_hide) +libgcc-objects += $(softdivide-o) + +ifeq ($(enable_shared),yes) +softdivide-s-o = $(patsubst %,%_s$(objext),$(SOFTDIVIDE_FUNCS)) +$(softdivide-s-o): %_s$(objext): $(srcdir)/config/tilepro/softdivide.c + $(gcc_s_compile) -ffunction-sections -DMAYBE_STATIC= -DL$* -c $< +libgcc-s-objects += $(softdivide-s-o) +libgcc-eh-objects += _tile_divdi3.o _tile_umoddi3.o +endif diff --git a/libgcc/config/tilepro/atomic.c b/libgcc/config/tilepro/atomic.c new file mode 100644 index 00000000000..cafbde8abee --- /dev/null +++ b/libgcc/config/tilepro/atomic.c @@ -0,0 +1,232 @@ +/* TILE atomics. + Copyright (C) 2011, 2012 + Free Software Foundation, Inc. + Contributed by Walter Lee (walt@tilera.com) + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any + later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "system.h" +#include "coretypes.h" +#include "atomic.h" + +/* This code should be inlined by the compiler, but for now support + it as out-of-line methods in libgcc. */ + +static void +pre_atomic_barrier (int model) +{ + switch ((enum memmodel) model) + { + case MEMMODEL_RELEASE: + case MEMMODEL_ACQ_REL: + case MEMMODEL_SEQ_CST: + __atomic_thread_fence (model); + break; + default: + break; + } + return; +} + +static void +post_atomic_barrier (int model) +{ + switch ((enum memmodel) model) + { + case MEMMODEL_ACQUIRE: + case MEMMODEL_ACQ_REL: + case MEMMODEL_SEQ_CST: + __atomic_thread_fence (model); + break; + default: + break; + } + return; +} + +#define __unused __attribute__((unused)) + +/* Provide additional methods not implemented by atomic.h. */ +#define atomic_xor(mem, mask) \ + __atomic_update_cmpxchg(mem, mask, __old ^ __value) +#define atomic_nand(mem, mask) \ + __atomic_update_cmpxchg(mem, mask, ~(__old & __value)) + +#define __atomic_fetch_and_do(type, size, opname) \ +type \ +__atomic_fetch_##opname##_##size(type* p, type i, int model) \ +{ \ + pre_atomic_barrier(model); \ + type rv = atomic_##opname(p, i); \ + post_atomic_barrier(model); \ + return rv; \ +} + +__atomic_fetch_and_do (int, 4, add) +__atomic_fetch_and_do (int, 4, sub) +__atomic_fetch_and_do (int, 4, or) +__atomic_fetch_and_do (int, 4, and) +__atomic_fetch_and_do (int, 4, xor) +__atomic_fetch_and_do (int, 4, nand) +__atomic_fetch_and_do (long long, 8, add) +__atomic_fetch_and_do (long long, 8, sub) +__atomic_fetch_and_do (long long, 8, or) +__atomic_fetch_and_do (long long, 8, and) +__atomic_fetch_and_do (long long, 8, xor) +__atomic_fetch_and_do (long long, 8, nand) +#define __atomic_do_and_fetch(type, size, opname, op) \ +type \ +__atomic_##opname##_fetch_##size(type* p, type i, int model) \ +{ \ + pre_atomic_barrier(model); \ + type rv = atomic_##opname(p, i) op i; \ + post_atomic_barrier(model); \ + return rv; \ +} +__atomic_do_and_fetch (int, 4, add, +) +__atomic_do_and_fetch (int, 4, sub, -) +__atomic_do_and_fetch (int, 4, or, |) +__atomic_do_and_fetch (int, 4, and, &) +__atomic_do_and_fetch (int, 4, xor, |) +__atomic_do_and_fetch (int, 4, nand, &) +__atomic_do_and_fetch (long long, 8, add, +) +__atomic_do_and_fetch (long long, 8, sub, -) +__atomic_do_and_fetch (long long, 8, or, |) +__atomic_do_and_fetch (long long, 8, and, &) +__atomic_do_and_fetch (long long, 8, xor, |) +__atomic_do_and_fetch (long long, 8, nand, &) +#define __atomic_exchange_methods(type, size) \ +bool \ +__atomic_compare_exchange_##size(volatile type* ptr, type* oldvalp, \ + type newval, bool weak __unused, \ + int models, int modelf __unused) \ +{ \ + type oldval = *oldvalp; \ + pre_atomic_barrier(models); \ + type retval = atomic_val_compare_and_exchange(ptr, oldval, newval); \ + post_atomic_barrier(models); \ + bool success = (retval == oldval); \ + *oldvalp = retval; \ + return success; \ +} \ + \ +type \ +__atomic_exchange_##size(volatile type* ptr, type val, int model) \ +{ \ + pre_atomic_barrier(model); \ + type retval = atomic_exchange(ptr, val); \ + post_atomic_barrier(model); \ + return retval; \ +} +__atomic_exchange_methods (int, 4) +__atomic_exchange_methods (long long, 8) + +/* Subword methods require the same approach for both TILEPro and + TILE-Gx. We load the background data for the word, insert the + desired subword piece, then compare-and-exchange it into place. */ +#define u8 unsigned char +#define u16 unsigned short +#define __atomic_subword_cmpxchg(type, size) \ + \ +bool \ +__atomic_compare_exchange_##size(volatile type* ptr, type* guess, \ + type val, bool weak __unused, int models, \ + int modelf __unused) \ +{ \ + pre_atomic_barrier(models); \ + unsigned int *p = (unsigned int *)((unsigned long)ptr & ~3UL); \ + const int shift = ((unsigned long)ptr & 3UL) * 8; \ + const unsigned int valmask = (1 << (sizeof(type) * 8)) - 1; \ + const unsigned int bgmask = ~(valmask << shift); \ + unsigned int oldword = *p; \ + type oldval = (oldword >> shift) & valmask; \ + if (__builtin_expect((oldval == *guess), 1)) { \ + unsigned int word = (oldword & bgmask) | ((val & valmask) << shift); \ + oldword = atomic_val_compare_and_exchange(p, oldword, word); \ + oldval = (oldword >> shift) & valmask; \ + } \ + post_atomic_barrier(models); \ + bool success = (oldval == *guess); \ + *guess = oldval; \ + return success; \ +} +__atomic_subword_cmpxchg (u8, 1) +__atomic_subword_cmpxchg (u16, 2) +/* For the atomic-update subword methods, we use the same approach as + above, but we retry until we succeed if the compare-and-exchange + fails. */ +#define __atomic_subword(type, proto, top, expr, bottom) \ +proto \ +{ \ + top \ + unsigned int *p = (unsigned int *)((unsigned long)ptr & ~3UL); \ + const int shift = ((unsigned long)ptr & 3UL) * 8; \ + const unsigned int valmask = (1 << (sizeof(type) * 8)) - 1; \ + const unsigned int bgmask = ~(valmask << shift); \ + unsigned int oldword, xword = *p; \ + type val, oldval; \ + do { \ + oldword = xword; \ + oldval = (oldword >> shift) & valmask; \ + val = expr; \ + unsigned int word = (oldword & bgmask) | ((val & valmask) << shift); \ + xword = atomic_val_compare_and_exchange(p, oldword, word); \ + } while (__builtin_expect(xword != oldword, 0)); \ + bottom \ +} +#define __atomic_subword_fetch(type, funcname, expr, retval) \ + __atomic_subword(type, \ + type __atomic_ ## funcname(volatile type *ptr, type i, int model), \ + pre_atomic_barrier(model);, \ + expr, \ + post_atomic_barrier(model); return retval;) +__atomic_subword_fetch (u8, fetch_add_1, oldval + i, oldval) +__atomic_subword_fetch (u8, fetch_sub_1, oldval - i, oldval) +__atomic_subword_fetch (u8, fetch_or_1, oldval | i, oldval) +__atomic_subword_fetch (u8, fetch_and_1, oldval & i, oldval) +__atomic_subword_fetch (u8, fetch_xor_1, oldval ^ i, oldval) +__atomic_subword_fetch (u8, fetch_nand_1, ~(oldval & i), oldval) +__atomic_subword_fetch (u16, fetch_add_2, oldval + i, oldval) +__atomic_subword_fetch (u16, fetch_sub_2, oldval - i, oldval) +__atomic_subword_fetch (u16, fetch_or_2, oldval | i, oldval) +__atomic_subword_fetch (u16, fetch_and_2, oldval & i, oldval) +__atomic_subword_fetch (u16, fetch_xor_2, oldval ^ i, oldval) +__atomic_subword_fetch (u16, fetch_nand_2, ~(oldval & i), oldval) +__atomic_subword_fetch (u8, add_fetch_1, oldval + i, val) +__atomic_subword_fetch (u8, sub_fetch_1, oldval - i, val) +__atomic_subword_fetch (u8, or_fetch_1, oldval | i, val) +__atomic_subword_fetch (u8, and_fetch_1, oldval & i, val) +__atomic_subword_fetch (u8, xor_fetch_1, oldval ^ i, val) +__atomic_subword_fetch (u8, nand_fetch_1, ~(oldval & i), val) +__atomic_subword_fetch (u16, add_fetch_2, oldval + i, val) +__atomic_subword_fetch (u16, sub_fetch_2, oldval - i, val) +__atomic_subword_fetch (u16, or_fetch_2, oldval | i, val) +__atomic_subword_fetch (u16, and_fetch_2, oldval & i, val) +__atomic_subword_fetch (u16, xor_fetch_2, oldval ^ i, val) +__atomic_subword_fetch (u16, nand_fetch_2, ~(oldval & i), val) +#define __atomic_subword_lock(type, size) \ + \ +__atomic_subword(type, \ + type __atomic_exchange_##size(volatile type* ptr, type nval, int model), \ + pre_atomic_barrier(model);, \ + nval, \ + post_atomic_barrier(model); return oldval;) +__atomic_subword_lock (u8, 1) +__atomic_subword_lock (u16, 2) diff --git a/libgcc/config/tilepro/atomic.h b/libgcc/config/tilepro/atomic.h new file mode 100644 index 00000000000..16306fe0e76 --- /dev/null +++ b/libgcc/config/tilepro/atomic.h @@ -0,0 +1,428 @@ +/* Macros for atomic functionality for tile. + Copyright (C) 2011, 2012 + Free Software Foundation, Inc. + Contributed by Walter Lee (walt@tilera.com) + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any + later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + + +/* Provides macros for common atomic functionality. */ + +#ifndef _ATOMIC_H_ +#define _ATOMIC_H_ + +#ifdef __tilegx__ +/* Atomic instruction macros + + The macros provided by atomic.h simplify access to the TILE-Gx + architecture's atomic instructions. The architecture provides a + variety of atomic instructions, including "exchange", "compare and + exchange", "fetch and ADD", "fetch and AND", "fetch and OR", and + "fetch and ADD if greater than or equal to zero". + + No barrier or fence semantics are implied by any of the atomic + instructions for manipulating memory; you must specify the barriers + that you wish explicitly, using the provided macros. + + Any integral 32- or 64-bit value can be used as the argument + to these macros, such as "int", "long long", "unsigned long", etc. + The pointers must be aligned to 4 or 8 bytes for 32- or 64-bit data. + The "exchange" and "compare and exchange" macros may also take + pointer values. We use the pseudo-type "VAL" in the documentation + to indicate the use of an appropriate type. */ +#else +/* Atomic instruction macros + + The macros provided by atomic.h simplify access to the Tile + architecture's atomic instructions. Since the architecture + supports test-and-set as its only in-silicon atomic operation, many + of the operations provided by this header are implemented as + fast-path calls to Linux emulation routines. + + Using the kernel for atomic operations allows userspace to take + advantage of the kernel's existing atomic-integer support (managed + by a distributed array of locks). The kernel provides proper + ordering among simultaneous atomic operations on different cores, + and guarantees a process can not be context-switched part way + through an atomic operation. By virtue of sharing the kernel + atomic implementation, the userspace atomic operations + are compatible with the atomic methods provided by the kernel's + futex() syscall API. Note that these operations never cause Linux + kernel scheduling, and are in fact invisible to the kernel; they + simply act as regular function calls but with an elevated privilege + level. Note that the kernel's distributed lock array is hashed by + using only VA bits from the atomic value's address (to avoid the + performance hit of page table locking and multiple page-table + lookups to get the PA) and only the VA bits that are below page + granularity (to properly lock simultaneous accesses to the same + page mapped at different VAs). As a result, simultaneous atomic + operations on values whose addresses are at the same offset on a + page will contend in the kernel for the same lock array element. + + No barrier or fence semantics are implied by any of the atomic + instructions for manipulating memory; you must specify the barriers + that you wish explicitly, using the provided macros. + + Any integral 32- or 64-bit value can be used as the argument + to these macros, such as "int", "long long", "unsigned long", etc. + The pointers must be aligned to 4 or 8 bytes for 32- or 64-bit data. + The "exchange" and "compare and exchange" macros may also take + pointer values. We use the pseudo-type "VAL" in the documentation + to indicate the use of an appropriate type. + + The 32-bit routines are implemented using a single kernel fast + syscall, as is the 64-bit compare-and-exchange. The other 64-bit + routines are implemented by looping over the 64-bit + compare-and-exchange routine, so may be potentially less efficient. */ +#endif + +#include <stdint.h> +#include <features.h> +#ifdef __tilegx__ +#include <arch/spr_def.h> +#else +#include <asm/unistd.h> +#endif + + +/* 32-bit integer compare-and-exchange. */ +static __inline __attribute__ ((always_inline)) + int atomic_val_compare_and_exchange_4 (volatile int *mem, + int oldval, int newval) +{ +#ifdef __tilegx__ + __insn_mtspr (SPR_CMPEXCH_VALUE, oldval); + return __insn_cmpexch4 (mem, newval); +#else + int result; + __asm__ __volatile__ ("swint1":"=R00" (result), + "=m" (*mem):"R10" (__NR_FAST_cmpxchg), "R00" (mem), + "R01" (oldval), "R02" (newval), "m" (*mem):"r20", + "r21", "r22", "r23", "r24", "r25", "r26", "r27", + "r28", "r29", "memory"); + return result; +#endif +} + +/* 64-bit integer compare-and-exchange. */ +static __inline __attribute__ ((always_inline)) + int64_t atomic_val_compare_and_exchange_8 (volatile int64_t * mem, + int64_t oldval, + int64_t newval) +{ +#ifdef __tilegx__ + __insn_mtspr (SPR_CMPEXCH_VALUE, oldval); + return __insn_cmpexch (mem, newval); +#else + unsigned int result_lo, result_hi; + unsigned int oldval_lo = oldval & 0xffffffffu, oldval_hi = oldval >> 32; + unsigned int newval_lo = newval & 0xffffffffu, newval_hi = newval >> 32; + __asm__ __volatile__ ("swint1":"=R00" (result_lo), "=R01" (result_hi), + "=m" (*mem):"R10" (__NR_FAST_cmpxchg64), "R00" (mem), + "R02" (oldval_lo), "R03" (oldval_hi), + "R04" (newval_lo), "R05" (newval_hi), + "m" (*mem):"r20", "r21", "r22", "r23", "r24", "r25", + "r26", "r27", "r28", "r29", "memory"); + return ((uint64_t) result_hi) << 32 | result_lo; +#endif +} + +/* This non-existent symbol is called for sizes other than "4" and "8", + indicating a bug in the caller. */ +extern int __atomic_error_bad_argument_size (void) + __attribute__ ((warning ("sizeof atomic argument not 4 or 8"))); + + +#define atomic_val_compare_and_exchange(mem, o, n) \ + ({ \ + (__typeof(*(mem)))(__typeof(*(mem)-*(mem))) \ + ((sizeof(*(mem)) == 8) ? \ + atomic_val_compare_and_exchange_8( \ + (volatile int64_t*)(mem), (__typeof((o)-(o)))(o), \ + (__typeof((n)-(n)))(n)) : \ + (sizeof(*(mem)) == 4) ? \ + atomic_val_compare_and_exchange_4( \ + (volatile int*)(mem), (__typeof((o)-(o)))(o), \ + (__typeof((n)-(n)))(n)) : \ + __atomic_error_bad_argument_size()); \ + }) + +#define atomic_bool_compare_and_exchange(mem, o, n) \ + ({ \ + __typeof(o) __o = (o); \ + __builtin_expect( \ + __o == atomic_val_compare_and_exchange((mem), __o, (n)), 1); \ + }) + + +/* Loop with compare_and_exchange until we guess the correct value. + Normally "expr" will be an expression using __old and __value. */ +#define __atomic_update_cmpxchg(mem, value, expr) \ + ({ \ + __typeof(value) __value = (value); \ + __typeof(*(mem)) *__mem = (mem), __old = *__mem, __guess; \ + do { \ + __guess = __old; \ + __old = atomic_val_compare_and_exchange(__mem, __old, (expr)); \ + } while (__builtin_expect(__old != __guess, 0)); \ + __old; \ + }) + +#ifdef __tilegx__ + +/* Generic atomic op with 8- or 4-byte variant. + The _mask, _addend, and _expr arguments are ignored on tilegx. */ +#define __atomic_update(mem, value, op, _mask, _addend, _expr) \ + ({ \ + ((__typeof(*(mem))) \ + ((sizeof(*(mem)) == 8) ? (__typeof(*(mem)-*(mem)))__insn_##op( \ + (void *)(mem), (int64_t)(__typeof((value)-(value)))(value)) : \ + (sizeof(*(mem)) == 4) ? (int)__insn_##op##4( \ + (void *)(mem), (int32_t)(__typeof((value)-(value)))(value)) : \ + __atomic_error_bad_argument_size())); \ + }) + +#else + +/* This uses TILEPro's fast syscall support to atomically compute: + + int old = *ptr; + *ptr = (old & mask) + addend; + return old; + + This primitive can be used for atomic exchange, add, or, and. + Only 32-bit support is provided. */ +static __inline __attribute__ ((always_inline)) + int + __atomic_update_4 (volatile int *mem, int mask, int addend) +{ + int result; + __asm__ __volatile__ ("swint1":"=R00" (result), + "=m" (*mem):"R10" (__NR_FAST_atomic_update), + "R00" (mem), "R01" (mask), "R02" (addend), + "m" (*mem):"r20", "r21", "r22", "r23", "r24", "r25", + "r26", "r27", "r28", "r29", "memory"); + return result; +} + +/* Generic atomic op with 8- or 4-byte variant. + The _op argument is ignored on tilepro. */ +#define __atomic_update(mem, value, _op, mask, addend, expr) \ + ({ \ + (__typeof(*(mem)))(__typeof(*(mem)-*(mem))) \ + ((sizeof(*(mem)) == 8) ? \ + __atomic_update_cmpxchg((mem), (value), (expr)) : \ + (sizeof(*(mem)) == 4) ? \ + __atomic_update_4((volatile int*)(mem), (__typeof((mask)-(mask)))(mask), \ + (__typeof((addend)-(addend)))(addend)) : \ + __atomic_error_bad_argument_size()); \ + }) + +#endif /* __tilegx__ */ + + +#define atomic_exchange(mem, newvalue) \ + __atomic_update(mem, newvalue, exch, 0, newvalue, __value) + +#define atomic_add(mem, value) \ + __atomic_update(mem, value, fetchadd, -1, value, __old + __value) + +#define atomic_sub(mem, value) atomic_add((mem), -(value)) + +#define atomic_increment(mem) atomic_add((mem), 1) + +#define atomic_decrement(mem) atomic_add((mem), -1) + +#define atomic_and(mem, mask) \ + __atomic_update(mem, mask, fetchand, mask, 0, __old & __value) + +#define atomic_or(mem, mask) \ + __atomic_update(mem, mask, fetchor, ~mask, mask, __old | __value) + +#define atomic_bit_set(mem, bit) \ + ({ \ + __typeof(*(mem)) __mask = (__typeof(*(mem)))1 << (bit); \ + __mask & atomic_or((mem), __mask); \ + }) + +#define atomic_bit_clear(mem, bit) \ + ({ \ + __typeof(*(mem)) __mask = (__typeof(*(mem)))1 << (bit); \ + __mask & atomic_and((mem), ~__mask); \ + }) + +#ifdef __tilegx__ +/* Atomically store a new value to memory. + Note that you can freely use types of any size here, unlike the + other atomic routines, which require 32- or 64-bit types. + This accessor is provided for compatibility with TILEPro, which + required an explicit atomic operation for stores that needed + to be atomic with respect to other atomic methods in this header. */ +#define atomic_write(mem, value) ((void) (*(mem) = (value))) +#else +#define atomic_write(mem, value) \ + do { \ + __typeof(mem) __aw_mem = (mem); \ + __typeof(value) __aw_val = (value); \ + unsigned int *__aw_mem32, __aw_intval, __aw_val32, __aw_off, __aw_mask; \ + __aw_intval = (__typeof((value) - (value)))__aw_val; \ + switch (sizeof(*__aw_mem)) { \ + case 8: \ + __atomic_update_cmpxchg(__aw_mem, __aw_val, __value); \ + break; \ + case 4: \ + __atomic_update_4((int *)__aw_mem, 0, __aw_intval); \ + break; \ + case 2: \ + __aw_off = 8 * ((long)__aw_mem & 0x2); \ + __aw_mask = 0xffffU << __aw_off; \ + __aw_mem32 = (unsigned int *)((long)__aw_mem & ~0x2); \ + __aw_val32 = (__aw_intval << __aw_off) & __aw_mask; \ + __atomic_update_cmpxchg(__aw_mem32, __aw_val32, \ + (__old & ~__aw_mask) | __value); \ + break; \ + case 1: \ + __aw_off = 8 * ((long)__aw_mem & 0x3); \ + __aw_mask = 0xffU << __aw_off; \ + __aw_mem32 = (unsigned int *)((long)__aw_mem & ~0x3); \ + __aw_val32 = (__aw_intval << __aw_off) & __aw_mask; \ + __atomic_update_cmpxchg(__aw_mem32, __aw_val32, \ + (__old & ~__aw_mask) | __value); \ + break; \ + } \ + } while (0) +#endif + +/* Compiler barrier. + + This macro prevents loads or stores from being moved by the compiler + across the macro. Any loaded value that was loaded before this + macro must then be reloaded by the compiler. */ +#define atomic_compiler_barrier() __asm__ __volatile__("" ::: "memory") + +/* Full memory barrier. + + This macro has the semantics of atomic_compiler_barrer(), but also + ensures that previous stores are visible to other cores, and that + all previous loaded values have been placed into their target + register on this core. */ +#define atomic_full_barrier() __insn_mf() + +/* Read memory barrier. + + Ensure that all reads by this processor that occurred prior to the + read memory barrier have completed, and that no reads that occur + after the read memory barrier on this processor are initiated + before the barrier. + + On current TILE chips a read barrier is implemented as a full barrier, + but this may not be true in later versions of the architecture. + + See also atomic_acquire_barrier() for the appropriate idiom to use + to ensure no reads are lifted above an atomic lock instruction. */ +#define atomic_read_barrier() atomic_full_barrier() + +/* Write memory barrier. + + Ensure that all writes by this processor that occurred prior to the + write memory barrier have completed, and that no writes that occur + after the write memory barrier on this processor are initiated + before the barrier. + + On current TILE chips a write barrier is implemented as a full barrier, + but this may not be true in later versions of the architecture. + + See also atomic_release_barrier() for the appropriate idiom to use + to ensure all writes are complete prior to an atomic unlock instruction. */ +#define atomic_write_barrier() atomic_full_barrier() + +/* Lock acquisition barrier. + + Ensure that no load operations that follow this macro in the + program can issue prior to the barrier. Without such a barrier, + the compiler can reorder them to issue earlier, or the hardware can + issue them speculatively. The latter is not currently done in the + Tile microarchitecture, but using this operation improves + portability to future implementations. + + This operation is intended to be used as part of the "acquire" + path for locking, that is, when entering a critical section. + This should be done after the atomic operation that actually + acquires the lock, and in conjunction with a "control dependency" + that checks the atomic operation result to see if the lock was + in fact acquired. See the atomic_read_barrier() macro + for a heavier-weight barrier to use in certain unusual constructs, + or atomic_acquire_barrier_value() if no control dependency exists. */ +#define atomic_acquire_barrier() atomic_compiler_barrier() + +/* Lock release barrier. + + Ensure that no store operations that precede this macro in the + program complete subsequent to the barrier. Without such a + barrier, the compiler can reorder stores to issue later, or stores + can be still outstanding in the memory network. + + This operation is intended to be used as part of the "release" path + for locking, that is, when leaving a critical section. This should + be done before the operation (such as a store of zero) that + actually releases the lock. */ +#define atomic_release_barrier() atomic_write_barrier() + +/* Barrier until the read of a particular value is complete. + + This is occasionally useful when constructing certain locking + scenarios. For example, you might write a routine that issues an + atomic instruction to enter a critical section, then reads one or + more values within the critical section without checking to see if + the critical section was in fact acquired, and only later checks + the atomic instruction result to see if the lock was acquired. If + so the routine could properly release the lock and know that the + values that were read were valid. + + In this scenario, it is required to wait for the result of the + atomic instruction, even if the value itself is not checked. This + guarantees that if the atomic instruction succeeded in taking the lock, + the lock was held before any reads in the critical section issued. */ +#define atomic_acquire_barrier_value(val) \ + __asm__ __volatile__("move %0, %0" :: "r"(val)) + +/* Access the given variable in memory exactly once. + + In some contexts, an algorithm may need to force access to memory, + since otherwise the compiler may think it can optimize away a + memory load or store; for example, in a loop when polling memory to + see if another cpu has updated it yet. Generally this is only + required for certain very carefully hand-tuned algorithms; using it + unnecessarily may result in performance losses. + + A related use of this macro is to ensure that the compiler does not + rematerialize the value of "x" by reloading it from memory + unexpectedly; the "volatile" marking will prevent the compiler from + being able to rematerialize. This is helpful if an algorithm needs + to read a variable without locking, but needs it to have the same + value if it ends up being used several times within the algorithm. + + Note that multiple uses of this macro are guaranteed to be ordered, + i.e. the compiler will not reorder stores or loads that are wrapped + in atomic_access_once(). */ +#define atomic_access_once(x) (*(volatile __typeof(x) *)&(x)) + + +#endif /* !_ATOMIC_H_ */ diff --git a/libgcc/config/tilepro/linux-unwind.h b/libgcc/config/tilepro/linux-unwind.h new file mode 100644 index 00000000000..0ed662c6598 --- /dev/null +++ b/libgcc/config/tilepro/linux-unwind.h @@ -0,0 +1,100 @@ +/* DWARF2 EH unwinding support for TILEPro. + Copyright (C) 2011, 2012 + Free Software Foundation, Inc. + Contributed by Walter Lee (walt@tilera.com) + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#ifndef inhibit_libc + +#include <arch/abi.h> +#include <signal.h> +#include <sys/ucontext.h> +#include <linux/unistd.h> + +/* Macro to define a copy of the kernel's __rt_sigreturn function + (in arch/tile/kernel/entry.S). If that function is changed, + this one needs to be changed to match it. */ +#define _sigreturn_asm(REG, NR) asm( \ + ".pushsection .text.__rt_sigreturn,\"a\"\n" \ + ".global __rt_sigreturn\n" \ + ".type __rt_sigreturn,@function\n" \ + "__rt_sigreturn:\n" \ + "moveli " #REG ", " #NR "\n" \ + "swint1\n" \ + ".size __rt_sigreturn, . - __rt_sigreturn\n" \ + ".popsection") +#define sigreturn_asm(REG, NR) _sigreturn_asm(REG, NR) +sigreturn_asm (TREG_SYSCALL_NR_NAME, __NR_rt_sigreturn); +#define SIGRETURN_LEN 16 +extern char __rt_sigreturn[]; + +#define MD_FALLBACK_FRAME_STATE_FOR tile_fallback_frame_state + +static _Unwind_Reason_Code +tile_fallback_frame_state (struct _Unwind_Context *context, + _Unwind_FrameState *fs) +{ + unsigned char *pc = context->ra; + struct sigcontext *sc; + long new_cfa; + int i; + + struct rt_sigframe { + unsigned char save_area[C_ABI_SAVE_AREA_SIZE]; + struct siginfo info; + struct ucontext uc; + } *rt_; + + /* Return if this is not a signal handler. */ + if (memcmp (pc, __rt_sigreturn, SIGRETURN_LEN) != 0) + return _URC_END_OF_STACK; + + /* It was a signal handler; update the reported PC to point to our + copy, since that will be findable with dladdr() and therefore + somewhat easier to help understand what actually happened. */ + context->ra = __rt_sigreturn; + + rt_ = context->cfa; + sc = &rt_->uc.uc_mcontext; + + new_cfa = sc->sp; + fs->regs.cfa_how = CFA_REG_OFFSET; + fs->regs.cfa_reg = STACK_POINTER_REGNUM; + fs->regs.cfa_offset = new_cfa - (long) context->cfa; + + for (i = 0; i < 56; ++i) + { + fs->regs.reg[i].how = REG_SAVED_OFFSET; + fs->regs.reg[i].loc.offset + = (long)&sc->gregs[i] - new_cfa; + } + + fs->regs.reg[56].how = REG_SAVED_OFFSET; + fs->regs.reg[56].loc.offset = (long)&sc->pc - new_cfa; + fs->retaddr_column = 56; + fs->signal_frame = 1; + + return _URC_NO_REASON; +} + +#endif /* ifdef inhibit_libc */ diff --git a/libgcc/config/tilepro/sfp-machine.h b/libgcc/config/tilepro/sfp-machine.h new file mode 100644 index 00000000000..ac5b8285e2e --- /dev/null +++ b/libgcc/config/tilepro/sfp-machine.h @@ -0,0 +1,56 @@ +#define _FP_W_TYPE_SIZE 32 +#define _FP_W_TYPE unsigned long +#define _FP_WS_TYPE signed long +#define _FP_I_TYPE long + +/* The type of the result of a floating point comparison. This must + match `__libgcc_cmp_return__' in GCC for the target. */ +typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__))); +#define CMPtype __gcc_CMPtype + +#define _FP_MUL_MEAT_S(R,X,Y) \ + _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_D(R,X,Y) \ + _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm) +#define _FP_MUL_MEAT_Q(R,X,Y) \ + _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm) + +#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_loop(S,R,X,Y) +#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv(D,R,X,Y) +#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_4_udiv(Q,R,X,Y) + +#define _FP_NANFRAC_S _FP_QNANBIT_S +#define _FP_NANFRAC_D _FP_QNANBIT_D, 0 +#define _FP_NANFRAC_Q _FP_QNANBIT_Q, 0, 0, 0 +#define _FP_NANSIGN_S 1 +#define _FP_NANSIGN_D 1 +#define _FP_NANSIGN_Q 1 + +#define _FP_KEEPNANFRACP 1 + +#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \ + do { \ + if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs) \ + && !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs)) \ + { \ + R##_s = Y##_s; \ + _FP_FRAC_COPY_##wc(R,Y); \ + } \ + else \ + { \ + R##_s = X##_s; \ + _FP_FRAC_COPY_##wc(R,X); \ + } \ + R##_c = FP_CLS_NAN; \ + } while (0) + +#define __LITTLE_ENDIAN 1234 +#define __BIG_ENDIAN 4321 + +#define __BYTE_ORDER __LITTLE_ENDIAN + +/* Define ALIASNAME as a strong alias for NAME. */ +# define strong_alias(name, aliasname) _strong_alias(name, aliasname) +# define _strong_alias(name, aliasname) \ + extern __typeof (name) aliasname __attribute__ ((alias (#name))); + diff --git a/libgcc/config/tilepro/softdivide.c b/libgcc/config/tilepro/softdivide.c new file mode 100644 index 00000000000..f09b9a29406 --- /dev/null +++ b/libgcc/config/tilepro/softdivide.c @@ -0,0 +1,354 @@ +/* Division and remainder routines for Tile. + Copyright (C) 2011, 2012 + Free Software Foundation, Inc. + Contributed by Walter Lee (walt@tilera.com) + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any + later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +typedef int int32_t; +typedef unsigned uint32_t; +typedef long long int64_t; +typedef unsigned long long uint64_t; + +/* Raise signal 8 (SIGFPE) with code 1 (FPE_INTDIV). */ +static inline void +raise_intdiv (void) +{ + asm ("{ raise; moveli zero, 8 + (1 << 6) }"); +} + + +#ifndef __tilegx__ +/*__udivsi3 - 32 bit integer unsigned divide */ +static inline uint32_t __attribute__ ((always_inline)) +__udivsi3_inline (uint32_t dividend, uint32_t divisor) +{ + /* Divide out any power of two factor from dividend and divisor. + Note that when dividing by zero the divisor will remain zero, + which is all we need to detect that case below. */ + const int power_of_two_factor = __insn_ctz (divisor); + divisor >>= power_of_two_factor; + dividend >>= power_of_two_factor; + + /* Checks for division by power of two or division by zero. */ + if (divisor <= 1) + { + if (divisor == 0) + { + raise_intdiv (); + return 0; + } + return dividend; + } + + /* Compute (a / b) by repeatedly finding the largest N + such that (b << N) <= a. For each such N, set bit N in the + quotient, subtract (b << N) from a, and keep going. Think of this as + the reverse of the "shift-and-add" that a multiply does. The values + of N are precisely those shift counts. + + Finding N is easy. First, use clz(b) - clz(a) to find the N + that lines up the high bit of (b << N) with the high bit of a. + Any larger value of N would definitely make (b << N) > a, + which is too big. + + Then, if (b << N) > a (because it has larger low bits), decrement + N by one. This adjustment will definitely make (b << N) less + than a, because a's high bit is now one higher than b's. */ + + /* Precomputing the max_ values allows us to avoid a subtract + in the inner loop and just right shift by clz(remainder). */ + const int divisor_clz = __insn_clz (divisor); + const uint32_t max_divisor = divisor << divisor_clz; + const uint32_t max_qbit = 1 << divisor_clz; + + uint32_t quotient = 0; + uint32_t remainder = dividend; + + while (remainder >= divisor) + { + int shift = __insn_clz (remainder); + uint32_t scaled_divisor = max_divisor >> shift; + uint32_t quotient_bit = max_qbit >> shift; + + int too_big = (scaled_divisor > remainder); + scaled_divisor >>= too_big; + quotient_bit >>= too_big; + remainder -= scaled_divisor; + quotient |= quotient_bit; + } + return quotient; +} +#endif /* !__tilegx__ */ + + +/* __udivdi3 - 64 bit integer unsigned divide */ +static inline uint64_t __attribute__ ((always_inline)) +__udivdi3_inline (uint64_t dividend, uint64_t divisor) +{ + /* Divide out any power of two factor from dividend and divisor. + Note that when dividing by zero the divisor will remain zero, + which is all we need to detect that case below. */ + const int power_of_two_factor = __builtin_ctzll (divisor); + divisor >>= power_of_two_factor; + dividend >>= power_of_two_factor; + + /* Checks for division by power of two or division by zero. */ + if (divisor <= 1) + { + if (divisor == 0) + { + raise_intdiv (); + return 0; + } + return dividend; + } + +#ifndef __tilegx__ + if (((uint32_t) (dividend >> 32) | ((uint32_t) (divisor >> 32))) == 0) + { + /* Operands both fit in 32 bits, so use faster 32 bit algorithm. */ + return __udivsi3_inline ((uint32_t) dividend, (uint32_t) divisor); + } +#endif /* !__tilegx__ */ + + /* See algorithm description in __udivsi3 */ + + const int divisor_clz = __builtin_clzll (divisor); + const uint64_t max_divisor = divisor << divisor_clz; + const uint64_t max_qbit = 1ULL << divisor_clz; + + uint64_t quotient = 0; + uint64_t remainder = dividend; + + while (remainder >= divisor) + { + int shift = __builtin_clzll (remainder); + uint64_t scaled_divisor = max_divisor >> shift; + uint64_t quotient_bit = max_qbit >> shift; + + int too_big = (scaled_divisor > remainder); + scaled_divisor >>= too_big; + quotient_bit >>= too_big; + remainder -= scaled_divisor; + quotient |= quotient_bit; + } + return quotient; +} + + +#ifndef __tilegx__ +/* __umodsi3 - 32 bit integer unsigned modulo */ +static inline uint32_t __attribute__ ((always_inline)) +__umodsi3_inline (uint32_t dividend, uint32_t divisor) +{ + /* Shortcircuit mod by a power of two (and catch mod by zero). */ + const uint32_t mask = divisor - 1; + if ((divisor & mask) == 0) + { + if (divisor == 0) + { + raise_intdiv (); + return 0; + } + return dividend & mask; + } + + /* We compute the remainder (a % b) by repeatedly subtracting off + multiples of b from a until a < b. The key is that subtracting + off a multiple of b does not affect the result mod b. + + To make the algorithm run efficiently, we need to subtract + off a large multiple of b at each step. We subtract the largest + (b << N) that is <= a. + + Finding N is easy. First, use clz(b) - clz(a) to find the N + that lines up the high bit of (b << N) with the high bit of a. + Any larger value of N would definitely make (b << N) > a, + which is too big. + + Then, if (b << N) > a (because it has larger low bits), decrement + N by one. This adjustment will definitely make (b << N) less + than a, because a's high bit is now one higher than b's. */ + const uint32_t max_divisor = divisor << __insn_clz (divisor); + + uint32_t remainder = dividend; + while (remainder >= divisor) + { + const int shift = __insn_clz (remainder); + uint32_t scaled_divisor = max_divisor >> shift; + scaled_divisor >>= (scaled_divisor > remainder); + remainder -= scaled_divisor; + } + + return remainder; +} +#endif /* !__tilegx__ */ + + +/* __umoddi3 - 64 bit integer unsigned modulo */ +static inline uint64_t __attribute__ ((always_inline)) +__umoddi3_inline (uint64_t dividend, uint64_t divisor) +{ +#ifndef __tilegx__ + if (((uint32_t) (dividend >> 32) | ((uint32_t) (divisor >> 32))) == 0) + { + /* Operands both fit in 32 bits, so use faster 32 bit algorithm. */ + return __umodsi3_inline ((uint32_t) dividend, (uint32_t) divisor); + } +#endif /* !__tilegx__ */ + + /* Shortcircuit mod by a power of two (and catch mod by zero). */ + const uint64_t mask = divisor - 1; + if ((divisor & mask) == 0) + { + if (divisor == 0) + { + raise_intdiv (); + return 0; + } + return dividend & mask; + } + + /* See algorithm description in __umodsi3 */ + const uint64_t max_divisor = divisor << __builtin_clzll (divisor); + + uint64_t remainder = dividend; + while (remainder >= divisor) + { + const int shift = __builtin_clzll (remainder); + uint64_t scaled_divisor = max_divisor >> shift; + scaled_divisor >>= (scaled_divisor > remainder); + remainder -= scaled_divisor; + } + + return remainder; +} + + +uint32_t __udivsi3 (uint32_t dividend, uint32_t divisor); +#ifdef L_tile_udivsi3 +uint32_t +__udivsi3 (uint32_t dividend, uint32_t divisor) +{ +#ifndef __tilegx__ + return __udivsi3_inline (dividend, divisor); +#else /* !__tilegx__ */ + uint64_t n = __udivdi3_inline (((uint64_t) dividend), ((uint64_t) divisor)); + return (uint32_t) n; +#endif /* !__tilegx__ */ +} +#endif + +#define ABS(x) ((x) >= 0 ? (x) : -(x)) + +int32_t __divsi3 (int32_t dividend, int32_t divisor); +#ifdef L_tile_divsi3 +/* __divsi3 - 32 bit integer signed divide */ +int32_t +__divsi3 (int32_t dividend, int32_t divisor) +{ +#ifndef __tilegx__ + uint32_t n = __udivsi3_inline (ABS (dividend), ABS (divisor)); +#else /* !__tilegx__ */ + uint64_t n = + __udivdi3_inline (ABS ((int64_t) dividend), ABS ((int64_t) divisor)); +#endif /* !__tilegx__ */ + if ((dividend ^ divisor) < 0) + n = -n; + return (int32_t) n; +} +#endif + + +uint64_t __udivdi3 (uint64_t dividend, uint64_t divisor); +#ifdef L_tile_udivdi3 +uint64_t +__udivdi3 (uint64_t dividend, uint64_t divisor) +{ + return __udivdi3_inline (dividend, divisor); +} +#endif + +/*__divdi3 - 64 bit integer signed divide */ +int64_t __divdi3 (int64_t dividend, int64_t divisor); +#ifdef L_tile_divdi3 +int64_t +__divdi3 (int64_t dividend, int64_t divisor) +{ + uint64_t n = __udivdi3_inline (ABS (dividend), ABS (divisor)); + if ((dividend ^ divisor) < 0) + n = -n; + return (int64_t) n; +} +#endif + + +uint32_t __umodsi3 (uint32_t dividend, uint32_t divisor); +#ifdef L_tile_umodsi3 +uint32_t +__umodsi3 (uint32_t dividend, uint32_t divisor) +{ +#ifndef __tilegx__ + return __umodsi3_inline (dividend, divisor); +#else /* !__tilegx__ */ + return __umoddi3_inline ((uint64_t) dividend, (uint64_t) divisor); +#endif /* !__tilegx__ */ +} +#endif + + +/* __modsi3 - 32 bit integer signed modulo */ +int32_t __modsi3 (int32_t dividend, int32_t divisor); +#ifdef L_tile_modsi3 +int32_t +__modsi3 (int32_t dividend, int32_t divisor) +{ +#ifndef __tilegx__ + uint32_t remainder = __umodsi3_inline (ABS (dividend), ABS (divisor)); +#else /* !__tilegx__ */ + uint64_t remainder = + __umoddi3_inline (ABS ((int64_t) dividend), ABS ((int64_t) divisor)); +#endif /* !__tilegx__ */ + return (int32_t) ((dividend >= 0) ? remainder : -remainder); +} +#endif + + +uint64_t __umoddi3 (uint64_t dividend, uint64_t divisor); +#ifdef L_tile_umoddi3 +uint64_t +__umoddi3 (uint64_t dividend, uint64_t divisor) +{ + return __umoddi3_inline (dividend, divisor); +} +#endif + + +/* __moddi3 - 64 bit integer signed modulo */ +int64_t __moddi3 (int64_t dividend, int64_t divisor); +#ifdef L_tile_moddi3 +int64_t +__moddi3 (int64_t dividend, int64_t divisor) +{ + uint64_t remainder = __umoddi3_inline (ABS (dividend), ABS (divisor)); + return (int64_t) ((dividend >= 0) ? remainder : -remainder); +} +#endif diff --git a/libgcc/config/tilepro/softmpy.S b/libgcc/config/tilepro/softmpy.S new file mode 100644 index 00000000000..eb571f65a74 --- /dev/null +++ b/libgcc/config/tilepro/softmpy.S @@ -0,0 +1,95 @@ +/* 64-bit multiplication support for TILEPro. + Copyright (C) 2011, 2012 + Free Software Foundation, Inc. + Contributed by Walter Lee (walt@tilera.com) + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any + later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* 64-bit multiplication support. */ + + .file "softmpy.S" + +/* Parameters */ +#define lo0 r9 /* low 32 bits of n0 */ +#define hi0 r1 /* high 32 bits of n0 */ +#define lo1 r2 /* low 32 bits of n1 */ +#define hi1 r3 /* high 32 bits of n1 */ + +/* temps */ +#define result1_a r4 +#define result1_b r5 + +#define tmp0 r6 +#define tmp0_left_16 r7 +#define tmp1 r8 + + .section .text.__muldi3, "ax" + .align 8 + .globl __muldi3 + .type __muldi3, @function +__muldi3: + { + move lo0, r0 /* so we can write "out r0" while "in r0" alive */ + mulhl_uu tmp0, lo1, r0 + } + { + mulll_uu result1_a, lo1, hi0 + } + { + move tmp1, tmp0 + mulhla_uu tmp0, lo0, lo1 + } + { + mulhlsa_uu result1_a, lo1, hi0 + } + { + mulll_uu result1_b, lo0, hi1 + slt_u tmp1, tmp0, tmp1 + } + { + mulhlsa_uu result1_a, lo0, hi1 + shli r0, tmp0, 16 + } + { + move tmp0_left_16, r0 + mulhha_uu result1_b, lo0, lo1 + } + { + mullla_uu r0, lo1, lo0 + shli tmp1, tmp1, 16 + } + { + mulhlsa_uu result1_b, hi0, lo1 + inthh tmp1, tmp1, tmp0 + } + { + mulhlsa_uu result1_a, hi1, lo0 + slt_u tmp0, r0, tmp0_left_16 + } + /* NOTE: this will stall for a cycle here. Oh well. */ + { + add r1, tmp0, tmp1 + add result1_a, result1_a, result1_b + } + { + add r1, r1, result1_a + jrp lr + } + .size __muldi3,.-__muldi3 diff --git a/libgcc/config/tilepro/t-crtstuff b/libgcc/config/tilepro/t-crtstuff new file mode 100644 index 00000000000..eddc45ce9d8 --- /dev/null +++ b/libgcc/config/tilepro/t-crtstuff @@ -0,0 +1,4 @@ +# crtend*.o cannot be compiled without -fno-asynchronous-unwind-tables, +# because then __FRAME_END__ might not be the last thing in .eh_frame +# section. +CRTSTUFF_T_CFLAGS += -fno-asynchronous-unwind-tables diff --git a/libgcc/config/tilepro/t-tilepro b/libgcc/config/tilepro/t-tilepro new file mode 100644 index 00000000000..eb6894ce115 --- /dev/null +++ b/libgcc/config/tilepro/t-tilepro @@ -0,0 +1,33 @@ +LIB2ADD += \ + $(srcdir)/config/tilepro/softmpy.S \ + $(srcdir)/config/tilepro/atomic.c + +LIB2FUNCS_EXCLUDE += \ + _divdi3 \ + _moddi3 \ + _muldi3 \ + _udivdi3 \ + _umoddi3 + +SOFTDIVIDE_FUNCS := \ + _tile_udivsi3 \ + _tile_divsi3 \ + _tile_udivdi3 \ + _tile_divdi3 \ + _tile_umodsi3 \ + _tile_modsi3 \ + _tile_umoddi3 \ + _tile_moddi3 + +softdivide-o = $(patsubst %,%$(objext),$(SOFTDIVIDE_FUNCS)) +$(softdivide-o): %$(objext): $(srcdir)/config/tilepro/softdivide.c + $(gcc_compile) -ffunction-sections -DMAYBE_STATIC= -DL$* -c $< \ + $(vis_hide) +libgcc-objects += $(softdivide-o) + +ifeq ($(enable_shared),yes) +softdivide-s-o = $(patsubst %,%_s$(objext),$(SOFTDIVIDE_FUNCS)) +$(softdivide-s-o): %_s$(objext): $(srcdir)/config/tilepro/softdivide.c + $(gcc_s_compile) -ffunction-sections -DMAYBE_STATIC= -DL$* -c $< +libgcc-s-objects += $(softdivide-s-o) +endif |