diff options
-rw-r--r-- | gcc/ChangeLog | 58 | ||||
-rw-r--r-- | gcc/Makefile.in | 12 | ||||
-rw-r--r-- | gcc/builtin-types.def | 2 | ||||
-rw-r--r-- | gcc/builtins.c | 48 | ||||
-rw-r--r-- | gcc/builtins.def | 59 | ||||
-rw-r--r-- | gcc/c-common.c | 1 | ||||
-rw-r--r-- | gcc/combine.c | 14 | ||||
-rw-r--r-- | gcc/config/alpha/alpha.md | 23 | ||||
-rw-r--r-- | gcc/config/arm/arm.c | 2 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 2 | ||||
-rw-r--r-- | gcc/doc/extend.texi | 75 | ||||
-rw-r--r-- | gcc/doc/md.texi | 30 | ||||
-rw-r--r-- | gcc/doc/rtl.texi | 27 | ||||
-rw-r--r-- | gcc/expr.c | 28 | ||||
-rw-r--r-- | gcc/fold-const.c | 4 | ||||
-rw-r--r-- | gcc/function.c | 2 | ||||
-rw-r--r-- | gcc/genattrtab.c | 4 | ||||
-rw-r--r-- | gcc/genopinit.c | 4 | ||||
-rw-r--r-- | gcc/libgcc-std.ver | 13 | ||||
-rw-r--r-- | gcc/libgcc2.c | 152 | ||||
-rw-r--r-- | gcc/libgcc2.h | 27 | ||||
-rw-r--r-- | gcc/mklibgcc.in | 25 | ||||
-rw-r--r-- | gcc/optabs.c | 135 | ||||
-rw-r--r-- | gcc/optabs.h | 10 | ||||
-rw-r--r-- | gcc/reload1.c | 8 | ||||
-rw-r--r-- | gcc/rtl.def | 12 | ||||
-rw-r--r-- | gcc/simplify-rtx.c | 77 | ||||
-rw-r--r-- | gcc/testsuite/gcc.c-torture/execute/builtin-bitops-1.c | 198 | ||||
-rw-r--r-- | gcc/tree.def | 5 |
29 files changed, 1019 insertions, 38 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a36da64ac73..a368069fd31 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,61 @@ +2003-02-01 Richard Henderson <rth@redhat.com> + + * optabs.c (expand_unop): Use word_mode for outmode of bit scaners. + * libgcc2.c (__ffsdi2, __clzsi2, __clzdi2, __ctzsi2, __ctzdi2, + __popcountsi2, __popcountdi2, __paritysi2 __paritydi2): Change + return type to Wtype. + + * libgcc-std.ver (GCC_3.4): Fix inheritance. + + * config/i386/i386.md (ffssi2): Use nonimmediate_operand for + expander input constraint. + +2003-02-01 Falk Hueffner <falk.hueffner@student.uni-tuebingen.de> + + * optabs.h (optab_index): Add OTI_clz, OTI_ctz, OTI_popcount and + OTI_parity. + (clz_optab, ctz_optab, popcount_optab, parity_optab): New. + * optabs.c (widen_clz, expand_parity): New. + (expand_unop): Handle clz and parity. Hardcode SImode as outmode + for libcalls to clz, ctz, popcount, and parity. + (init_optabs): Init clz_optab, ctz_optab, popcount_optab and + parity_optab, and set up libfunc handlers. + * libgcc2.c (__clzsi2, __clzdi2, __ctzsi2, __ctzdi2, + __popcountsi2, __popcountdi2, __paritysi2 __paritydi2, + __popcount_tab): New. + * libgcc2.h: Declare them. + * libgcc-std.ver (GCC_3.4): Add new functions from libgcc2.c. + * genopinit.c (optabs): Add clz_optab, ctz_optab, popcount_optab + and parity_optab. + * builtin-types.def (BT_FN_INT_LONG, BT_FN_INT_LONGLONG): New. + * builtins.def (BUILT_IN_CLZ, BUILT_IN_CTZ, BUILT_IN_POPCOUNT, + BUILT_IN_PARITY, BUILT_IN_FFSL, BUILT_IN_CLZL, BUILT_IN_CTZL, + BUILT_IN_POPCOUNTL, BUILT_IN_PARITYL, BUILT_IN_FFSLL, + BUILT_IN_CLZLL, BUILT_IN_CTZLL, BUILT_IN_POPCOUNTLL, + BUILT_IN_PARITYLL): New. + * builtins.c (expand_builtin_unop): Rename from expand_builtin_ffs + and add optab argument. + (expand_builtin): Expand BUILT_IN_{FFS,CLZ,POPCOUNT,PARITY}*. + * tree.def (CLZ_EXPR, CTZ_EXPR, POPCOUNT_EXPR, PARITY_EXPR): New. + * expr.c (expand_expr): Handle them. + * fold-const.c (tree_expr_nonnegative_p): Likewise. + * rtl.def (CLZ, CTZ, POPCOUNT, PARITY): New. + * reload1.c (eliminate_regs): Handle them. + (elimination_effects): Likewise. + * function.c (instantiate_virtual_regs_1): Likewise + * genattrtab.c (check_attr_value): Likewise. + * simplify-rtx.c (simplify_unary_operation): Likewise. + * c-common.c (c_common_truthvalue_conversion): Handle POPCOUNT_EXPR. + * combine.c (combine_simplify_rtx): Handle POPCOUNT and PARITY. + (nonzero_bits): Handle CLZ, CTZ, POPCOUNT and PARITY. + * config/alpha/alpha.md (clzdi2, ctzdi2, popcountdi2): New. + * config/arm/arm.c (arm_init_builtins): Rename __builtin_clz to + __builtin_arm_clz. + * Makefile.in (LIB2FUNCS_1, LIB2FUNCS_2): Move... + * mklibgcc.in (lib2funcs): ...here and merge. Add new members. + * doc/extend.texi (Other Builtins): Add new builtins. + * doc/md.texi (Standard Names): Add new patterns. + 2003-02-01 Ulrich Weigand <uweigand@de.ibm.com> * reload.c: Revert 2003-01-31 change. diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 8dae5fecdbf..cc444045e19 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -811,16 +811,6 @@ STAGESTUFF = *$(objext) insn-flags.h insn-config.h insn-codes.h \ *.[0-9][0-9].* *.[si] libcpp.a libbackend.a libgcc.mk \ $(LANG_STAGESTUFF) -# Library members defined in libgcc2.c. -# Variable length limited to 255 charactes when passed to a shell script. -LIB2FUNCS_1 = _muldi3 _negdi2 _lshrdi3 _ashldi3 _ashrdi3 _ffsdi2 _clz \ - _cmpdi2 _ucmpdi2 _floatdidf _floatdisf _fixunsdfsi _fixunssfsi \ - _fixunsdfdi _fixdfdi _fixunssfdi _fixsfdi _fixxfdi _fixunsxfdi - -LIB2FUNCS_2 = _floatdixf _fixunsxfsi _fixtfdi _fixunstfdi _floatditf \ - _clear_cache _trampoline __main _exit _absvsi2 _absvdi2 _addvsi3 \ - _addvdi3 _subvsi3 _subvdi3 _mulvsi3 _mulvdi3 _negvsi2 _negvdi2 _ctors - # Defined in libgcc2.c, included only in the static library. LIB2FUNCS_ST = _eprintf __gcc_bcmp @@ -1051,8 +1041,6 @@ LIB2ADD_ST = $(LIB2FUNCS_STATIC_EXTRA) libgcc.mk: config.status Makefile mklibgcc $(LIB2ADD) $(LIB2ADD_ST) xgcc$(exeext) specs objext='$(objext)' \ LIB1ASMFUNCS='$(LIB1ASMFUNCS)' \ - LIB2FUNCS_1='$(LIB2FUNCS_1)' \ - LIB2FUNCS_2='$(LIB2FUNCS_2)' \ LIB2FUNCS_ST='$(LIB2FUNCS_ST)' \ LIBGCOV='$(LIBGCOV)' \ LIB2ADD='$(LIB2ADD)' \ diff --git a/gcc/builtin-types.def b/gcc/builtin-types.def index 23b4336c2b3..b8b9b29929b 100644 --- a/gcc/builtin-types.def +++ b/gcc/builtin-types.def @@ -110,6 +110,8 @@ DEF_FUNCTION_TYPE_1 (BT_FN_LONG_DOUBLE_COMPLEX_LONG_DOUBLE, DEF_FUNCTION_TYPE_1 (BT_FN_PTR_UNSIGNED, BT_PTR, BT_UNSIGNED) DEF_FUNCTION_TYPE_1 (BT_FN_PTR_SIZE, BT_PTR, BT_SIZE) DEF_FUNCTION_TYPE_1 (BT_FN_INT_INT, BT_INT, BT_INT) +DEF_FUNCTION_TYPE_1 (BT_FN_INT_LONG, BT_INT, BT_LONG) +DEF_FUNCTION_TYPE_1 (BT_FN_INT_LONGLONG, BT_INT, BT_LONGLONG) DEF_FUNCTION_TYPE_1 (BT_FN_INT_PTR, BT_INT, BT_PTR) DEF_FUNCTION_TYPE_1 (BT_FN_VOID_PTR, BT_VOID, BT_PTR) DEF_FUNCTION_TYPE_1 (BT_FN_SIZE_CONST_STRING, BT_SIZE, BT_CONST_STRING) diff --git a/gcc/builtins.c b/gcc/builtins.c index d7d8054d539..47d93e21cee 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -147,7 +147,7 @@ static rtx expand_builtin_strchr PARAMS ((tree, rtx, static rtx expand_builtin_strrchr PARAMS ((tree, rtx, enum machine_mode)); static rtx expand_builtin_alloca PARAMS ((tree, rtx)); -static rtx expand_builtin_ffs PARAMS ((tree, rtx, rtx)); +static rtx expand_builtin_unop PARAMS ((tree, rtx, rtx, optab)); static rtx expand_builtin_frame_address PARAMS ((tree)); static rtx expand_builtin_fputs PARAMS ((tree, int, int)); static tree stabilize_va_list PARAMS ((tree, int)); @@ -3581,15 +3581,16 @@ expand_builtin_alloca (arglist, target) return result; } -/* Expand a call to the ffs builtin. The arguments are in ARGLIST. +/* Expand a call to a unary builtin. The arguments are in ARGLIST. Return 0 if a normal call should be emitted rather than expanding the function in-line. If convenient, the result should be placed in TARGET. SUBTARGET may be used as the target for computing one of EXP's operands. */ static rtx -expand_builtin_ffs (arglist, target, subtarget) +expand_builtin_unop (arglist, target, subtarget, op_optab) tree arglist; rtx target, subtarget; + optab op_optab; { rtx op0; if (!validate_arglist (arglist, INTEGER_TYPE, VOID_TYPE)) @@ -3597,10 +3598,10 @@ expand_builtin_ffs (arglist, target, subtarget) /* Compute the argument. */ op0 = expand_expr (TREE_VALUE (arglist), subtarget, VOIDmode, 0); - /* Compute ffs, into TARGET if possible. + /* Compute op, into TARGET if possible. Set TARGET to wherever the result comes back. */ target = expand_unop (TYPE_MODE (TREE_TYPE (TREE_VALUE (arglist))), - ffs_optab, op0, target, 1); + op_optab, op0, target, 1); if (target == 0) abort (); return target; @@ -4099,7 +4100,42 @@ expand_builtin (exp, target, subtarget, mode, ignore) break; case BUILT_IN_FFS: - target = expand_builtin_ffs (arglist, target, subtarget); + case BUILT_IN_FFSL: + case BUILT_IN_FFSLL: + target = expand_builtin_unop (arglist, target, subtarget, ffs_optab); + if (target) + return target; + break; + + case BUILT_IN_CLZ: + case BUILT_IN_CLZL: + case BUILT_IN_CLZLL: + target = expand_builtin_unop (arglist, target, subtarget, clz_optab); + if (target) + return target; + break; + + case BUILT_IN_CTZ: + case BUILT_IN_CTZL: + case BUILT_IN_CTZLL: + target = expand_builtin_unop (arglist, target, subtarget, ctz_optab); + if (target) + return target; + break; + + case BUILT_IN_POPCOUNT: + case BUILT_IN_POPCOUNTL: + case BUILT_IN_POPCOUNTLL: + target = expand_builtin_unop (arglist, target, subtarget, + popcount_optab); + if (target) + return target; + break; + + case BUILT_IN_PARITY: + case BUILT_IN_PARITYL: + case BUILT_IN_PARITYLL: + target = expand_builtin_unop (arglist, target, subtarget, parity_optab); if (target) return target; break; diff --git a/gcc/builtins.def b/gcc/builtins.def index 23a69df8b67..60a962a9abd 100644 --- a/gcc/builtins.def +++ b/gcc/builtins.def @@ -322,6 +322,65 @@ DEF_EXT_LIB_BUILTIN(BUILT_IN_FFS, "__builtin_ffs", BT_FN_INT_INT, ATTR_CONST_NOTHROW_LIST) +DEF_GCC_BUILTIN(BUILT_IN_CLZ, + "__builtin_clz", + BT_FN_INT_INT, + ATTR_CONST_NOTHROW_LIST) +DEF_GCC_BUILTIN(BUILT_IN_CTZ, + "__builtin_ctz", + BT_FN_INT_INT, + ATTR_CONST_NOTHROW_LIST) +DEF_GCC_BUILTIN(BUILT_IN_POPCOUNT, + "__builtin_popcount", + BT_FN_INT_INT, + ATTR_CONST_NOTHROW_LIST) +DEF_GCC_BUILTIN(BUILT_IN_PARITY, + "__builtin_parity", + BT_FN_INT_INT, + ATTR_CONST_NOTHROW_LIST) + +DEF_GCC_BUILTIN(BUILT_IN_FFSL, + "__builtin_ffsl", + BT_FN_INT_LONG, + ATTR_CONST_NOTHROW_LIST) +DEF_GCC_BUILTIN(BUILT_IN_CLZL, + "__builtin_clzl", + BT_FN_INT_LONG, + ATTR_CONST_NOTHROW_LIST) +DEF_GCC_BUILTIN(BUILT_IN_CTZL, + "__builtin_ctzl", + BT_FN_INT_LONG, + ATTR_CONST_NOTHROW_LIST) +DEF_GCC_BUILTIN(BUILT_IN_POPCOUNTL, + "__builtin_popcountl", + BT_FN_INT_LONG, + ATTR_CONST_NOTHROW_LIST) +DEF_GCC_BUILTIN(BUILT_IN_PARITYL, + "__builtin_parityl", + BT_FN_INT_LONG, + ATTR_CONST_NOTHROW_LIST) + +DEF_EXT_LIB_BUILTIN(BUILT_IN_FFSLL, + "__builtin_ffsll", + BT_FN_INT_LONGLONG, + ATTR_CONST_NOTHROW_LIST) +DEF_GCC_BUILTIN(BUILT_IN_CLZLL, + "__builtin_clzll", + BT_FN_INT_LONGLONG, + ATTR_CONST_NOTHROW_LIST) +DEF_GCC_BUILTIN(BUILT_IN_CTZLL, + "__builtin_ctzll", + BT_FN_INT_LONGLONG, + ATTR_CONST_NOTHROW_LIST) +DEF_GCC_BUILTIN(BUILT_IN_POPCOUNTLL, + "__builtin_popcountll", + BT_FN_INT_LONGLONG, + ATTR_CONST_NOTHROW_LIST) +DEF_GCC_BUILTIN(BUILT_IN_PARITYLL, + "__builtin_parityll", + BT_FN_INT_LONGLONG, + ATTR_CONST_NOTHROW_LIST) + DEF_EXT_LIB_BUILTIN(BUILT_IN_INDEX, "__builtin_index", BT_FN_STRING_CONST_STRING_INT, diff --git a/gcc/c-common.c b/gcc/c-common.c index 7c6fc7889fc..c78692fd959 100644 --- a/gcc/c-common.c +++ b/gcc/c-common.c @@ -2817,6 +2817,7 @@ c_common_truthvalue_conversion (expr) case ABS_EXPR: case FLOAT_EXPR: case FFS_EXPR: + case POPCOUNT_EXPR: /* These don't change whether an object is nonzero or zero. */ return c_common_truthvalue_conversion (TREE_OPERAND (expr, 0)); diff --git a/gcc/combine.c b/gcc/combine.c index d6915c15178..aeb5c2f1b98 100644 --- a/gcc/combine.c +++ b/gcc/combine.c @@ -4634,6 +4634,13 @@ combine_simplify_rtx (x, op0_mode, last, in_dest) SUBST (XEXP (x, 0), XEXP (XEXP (x, 0), 0)); break; + case POPCOUNT: + case PARITY: + /* (pop* (zero_extend <X>)) = (pop* <X>) */ + if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND) + SUBST (XEXP (x, 0), XEXP (XEXP (x, 0), 0)); + break; + case FLOAT: /* (float (sign_extend <X>)) = (float <X>). */ if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND) @@ -8540,10 +8547,17 @@ nonzero_bits (x, mode) break; case FFS: + case CLZ: + case CTZ: + case POPCOUNT: /* This is at most the number of bits in the mode. */ nonzero = ((HOST_WIDE_INT) 1 << (floor_log2 (mode_width) + 1)) - 1; break; + case PARITY: + nonzero = 1; + break; + case IF_THEN_ELSE: nonzero &= (nonzero_bits (XEXP (x, 1), mode) | nonzero_bits (XEXP (x, 2), mode)); diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md index c943527fa18..2d5285a688b 100644 --- a/gcc/config/alpha/alpha.md +++ b/gcc/config/alpha/alpha.md @@ -1323,7 +1323,7 @@ fadd,fmul,fcpys,fdiv,fsqrt,misc,mvi,ftoi,itof,multi,none" "eqv %r1,%2,%0" [(set_attr "type" "ilog")]) -;; Handle the FFS insn iff we support CIX. +;; Handle FFS and related insns iff we support CIX. (define_expand "ffsdi2" [(set (match_dup 2) @@ -1347,6 +1347,27 @@ fadd,fmul,fcpys,fdiv,fsqrt,misc,mvi,ftoi,itof,multi,none" ; EV6 calls all mvi and cttz/ctlz/popc class imisc, so just ; reuse the existing type name. [(set_attr "type" "mvi")]) + +(define_insn "clzdi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (clz:DI (match_operand:DI 1 "register_operand" "r")))] + "TARGET_CIX" + "ctlz %1,%0" + [(set_attr "type" "mvi")]) + +(define_insn "ctzdi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (ctz:DI (match_operand:DI 1 "register_operand" "r")))] + "TARGET_CIX" + "cttz %1,%0" + [(set_attr "type" "mvi")]) + +(define_insn "popcountdi2" + [(set (match_operand:DI 0 "register_operand" "=r") + (popcount:DI (match_operand:DI 1 "register_operand" "r")))] + "TARGET_CIX" + "ctpop %1,%0" + [(set_attr "type" "mvi")]) ;; Next come the shifts and the various extract and insert operations. diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index f9045993a86..fe306fd6391 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -10004,7 +10004,7 @@ arm_init_builtins () /* Initialize arm V5 builtins. */ if (arm_arch5) - def_builtin ("__builtin_clz", int_ftype_int, ARM_BUILTIN_CLZ); + def_builtin ("__builtin_arm_clz", int_ftype_int, ARM_BUILTIN_CLZ); } /* Expand an expression EXP that calls a built-in function, diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 26cb0a947fc..a888bc2e168 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -14109,7 +14109,7 @@ (define_expand "ffssi2" [(set (match_operand:SI 0 "nonimmediate_operand" "") - (ffs:SI (match_operand:SI 1 "general_operand" "")))] + (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "")))] "" { rtx out = gen_reg_rtx (SImode), tmp = gen_reg_rtx (SImode); diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index c5ad0940b6f..27a2e1ffb61 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -4986,6 +4986,81 @@ Similar to @code{__builtin_nans}, except the return type is @code{float}. Similar to @code{__builtin_nans}, except the return type is @code{long double}. @end deftypefn +@deftypefn {Built-in Function} int __builtin_ffs (unsigned int x) +Returns one plus the index of the least significant 1-bit of @var{x}, or +if @var{x} is zero, returns zero. +@end deftypefn + +@deftypefn {Built-in Function} int __builtin_clz (unsigned int x) +Returns the number of leading 0-bits in @var{x}, starting at the most +significant bit position. If @var{x} is 0, the result is undefined. +@end deftypefn + +@deftypefn {Built-in Function} int __builtin_ctz (unsigned int x) +Returns the number of trailing 0-bits in @var{x}, starting at the least +significant bit position. If @var{x} is 0, the result is undefined. +@end deftypefn + +@deftypefn {Built-in Function} int __builtin_popcount (unsigned int x) +Returns the number of 1-bits in @var{x}. +@end deftypefn + +@deftypefn {Built-in Function} int __builtin_parity (unsigned int x) +Returns the parity of @var{x}, i.@:e. the number of 1-bits in @var{x} +modulo 2. +@end deftypefn + +@deftypefn {Built-in Function} int __builtin_ffsl (unsigned long) +Similar to @code{__builtin_ffs}, except the argument type is +@code{unsigned long}. +@end deftypefn + +@deftypefn {Built-in Function} int __builtin_clzl (unsigned long) +Similar to @code{__builtin_clz}, except the argument type is +@code{unsigned long}. +@end deftypefn + +@deftypefn {Built-in Function} int __builtin_ctzl (unsigned long) +Similar to @code{__builtin_ctz}, except the argument type is +@code{unsigned long}. +@end deftypefn + +@deftypefn {Built-in Function} int __builtin_popcountl (unsigned long) +Similar to @code{__builtin_popcount}, except the argument type is +@code{unsigned long}. +@end deftypefn + +@deftypefn {Built-in Function} int __builtin_parityl (unsigned long) +Similar to @code{__builtin_parity}, except the argument type is +@code{unsigned long}. +@end deftypefn + +@deftypefn {Built-in Function} int __builtin_ffsll (unsigned long long) +Similar to @code{__builtin_ffs}, except the argument type is +@code{unsigned long long}. +@end deftypefn + +@deftypefn {Built-in Function} int __builtin_clzll (unsigned long long) +Similar to @code{__builtin_clz}, except the argument type is +@code{unsigned long long}. +@end deftypefn + +@deftypefn {Built-in Function} int __builtin_ctzll (unsigned long long) +Similar to @code{__builtin_ctz}, except the argument type is +@code{unsigned long long}. +@end deftypefn + +@deftypefn {Built-in Function} int __builtin_popcountll (unsigned long long) +Similar to @code{__builtin_popcount}, except the argument type is +@code{unsigned long long}. +@end deftypefn + +@deftypefn {Built-in Function} int __builtin_parityll (unsigned long long) +Similar to @code{__builtin_parity}, except the argument type is +@code{unsigned long long}. +@end deftypefn + + @node Target Builtins @section Built-in Functions Specific to Particular Target Machines diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 5b91c082680..ce3eb14aeb7 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -2663,6 +2663,36 @@ generating the instruction. The @code{ffs} built-in function of C always uses the mode which corresponds to the C data type @code{int}. +@cindex @code{clz@var{m}2} instruction pattern +@item @samp{clz@var{m}2} +Store into operand 0 the number of leading 0-bits in @var{x}, starting +at the most significant bit position. If @var{x} is 0, the result is +undefined. @var{m} is the mode of operand 0; operand 1's mode is +specified by the instruction pattern, and the compiler will convert the +operand to that mode before generating the instruction. + +@cindex @code{ctz@var{m}2} instruction pattern +@item @samp{ctz@var{m}2} +Store into operand 0 the number of trailing 0-bits in @var{x}, starting +at the least significant bit position. If @var{x} is 0, the result is +undefined. @var{m} is the mode of operand 0; operand 1's mode is +specified by the instruction pattern, and the compiler will convert the +operand to that mode before generating the instruction. + +@cindex @code{popcount@var{m}2} instruction pattern +@item @samp{popcount@var{m}2} +Store into operand 0 the number of 1-bits in @var{x}. @var{m} is the +mode of operand 0; operand 1's mode is specified by the instruction +pattern, and the compiler will convert the operand to that mode before +generating the instruction. + +@cindex @code{parity@var{m}2} instruction pattern +@item @samp{parity@var{m}2} +Store into operand 0 the parity of @var{x}, i.@:e. the number of 1-bits +in @var{x} modulo 2. @var{m} is the mode of operand 0; operand 1's mode +is specified by the instruction pattern, and the compiler will convert +the operand to that mode before generating the instruction. + @cindex @code{one_cmpl@var{m}2} instruction pattern @item @samp{one_cmpl@var{m}2} Store the bitwise-complement of operand 1 into operand 0. diff --git a/gcc/doc/rtl.texi b/gcc/doc/rtl.texi index a0c1f39403d..16985176ed2 100644 --- a/gcc/doc/rtl.texi +++ b/gcc/doc/rtl.texi @@ -1845,6 +1845,33 @@ Represents one plus the index of the least significant 1-bit in zero if @var{x} is zero.) The mode of @var{x} need not be @var{m}; depending on the target machine, various mode combinations may be valid. + +@findex clz +@item (clz:@var{m} @var{x}) +Represents the number of leading 0-bits in @var{x}, represented as an +integer of mode @var{m}, starting at the most significant bit position. +If @var{x} is zero, the value is undefined. Note that this is one of +the few expressions that is not invariant under widening. The mode of +@var{x} will usually be an integer mode. + +@findex ctz +@item (ctz:@var{m} @var{x}) +Represents the number of trailing 0-bits in @var{x}, represented as an +integer of mode @var{m}, starting at the least significant bit position. +If @var{x} is zero, the value is undefined. Except for this case, +@code{ctz(x)} is equivalent to @code{ffs(@var{x}) - 1}. The mode of +@var{x} will usually be an integer mode. + +@findex popcount +@item (popcount:@var{m} @var{x}) +Represents the number of 1-bits in @var{x}, represented as an integer of +mode @var{m}. The mode of @var{x} will usually be an integer mode. + +@findex parity +@item (parity:@var{m} @var{x}) +Represents the number of 1-bits modulo 2 in @var{x}, represented as an +integer of mode @var{m}. The mode of @var{x} will usually be an integer +mode. @end table @node Comparisons diff --git a/gcc/expr.c b/gcc/expr.c index 8d1fd4d4d1d..601c5e991ab 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -8353,6 +8353,34 @@ expand_expr (exp, target, tmode, modifier) abort (); return temp; + case CLZ_EXPR: + op0 = expand_expr (TREE_OPERAND (exp, 0), subtarget, VOIDmode, 0); + temp = expand_unop (mode, clz_optab, op0, target, 1); + if (temp == 0) + abort (); + return temp; + + case CTZ_EXPR: + op0 = expand_expr (TREE_OPERAND (exp, 0), subtarget, VOIDmode, 0); + temp = expand_unop (mode, ctz_optab, op0, target, 1); + if (temp == 0) + abort (); + return temp; + + case POPCOUNT_EXPR: + op0 = expand_expr (TREE_OPERAND (exp, 0), subtarget, VOIDmode, 0); + temp = expand_unop (mode, popcount_optab, op0, target, 1); + if (temp == 0) + abort (); + return temp; + + case PARITY_EXPR: + op0 = expand_expr (TREE_OPERAND (exp, 0), subtarget, VOIDmode, 0); + temp = expand_unop (mode, parity_optab, op0, target, 1); + if (temp == 0) + abort (); + return temp; + /* ??? Can optimize bitwise operations with one arg constant. Can optimize (a bitwise1 n) bitwise2 (a bitwise3 b) and (a bitwise1 b) bitwise2 b (etc) diff --git a/gcc/fold-const.c b/gcc/fold-const.c index 3ab360b0dd3..55f4ce46c41 100644 --- a/gcc/fold-const.c +++ b/gcc/fold-const.c @@ -7436,6 +7436,10 @@ tree_expr_nonnegative_p (t) { case ABS_EXPR: case FFS_EXPR: + case CLZ_EXPR: + case CTZ_EXPR: + case POPCOUNT_EXPR: + case PARITY_EXPR: return 1; case INTEGER_CST: return tree_int_cst_sgn (t) >= 0; diff --git a/gcc/function.c b/gcc/function.c index fe5394ec036..cb5cf64f4be 100644 --- a/gcc/function.c +++ b/gcc/function.c @@ -4004,6 +4004,8 @@ instantiate_virtual_regs_1 (loc, object, extra_insns) case ABS: case SQRT: case FFS: + case CLZ: case CTZ: + case POPCOUNT: case PARITY: /* These case either have just one operand or we know that we need not check the rest of the operands. */ loc = &XEXP (x, 0); diff --git a/gcc/genattrtab.c b/gcc/genattrtab.c index 2c07d87a329..0270945abff 100644 --- a/gcc/genattrtab.c +++ b/gcc/genattrtab.c @@ -1161,6 +1161,10 @@ check_attr_value (exp, attr) break; case FFS: + case CLZ: + case CTZ: + case POPCOUNT: + case PARITY: XEXP (exp, 0) = check_attr_value (XEXP (exp, 0), attr); break; diff --git a/gcc/genopinit.c b/gcc/genopinit.c index 1a10f0342e3..1a0070df064 100644 --- a/gcc/genopinit.c +++ b/gcc/genopinit.c @@ -126,6 +126,10 @@ static const char * const optabs[] = "strlen_optab->handlers[$A].insn_code = CODE_FOR_$(strlen$a$)", "one_cmpl_optab->handlers[$A].insn_code = CODE_FOR_$(one_cmpl$a2$)", "ffs_optab->handlers[$A].insn_code = CODE_FOR_$(ffs$a2$)", + "clz_optab->handlers[$A].insn_code = CODE_FOR_$(clz$a2$)", + "ctz_optab->handlers[$A].insn_code = CODE_FOR_$(ctz$a2$)", + "popcount_optab->handlers[$A].insn_code = CODE_FOR_$(popcount$a2$)", + "parity_optab->handlers[$A].insn_code = CODE_FOR_$(parity$a2$)", "mov_optab->handlers[$A].insn_code = CODE_FOR_$(mov$a$)", "movstrict_optab->handlers[$A].insn_code = CODE_FOR_$(movstrict$a$)", "cmp_optab->handlers[$A].insn_code = CODE_FOR_$(cmp$a$)", diff --git a/gcc/libgcc-std.ver b/gcc/libgcc-std.ver index abbab6ce941..360e528ba01 100644 --- a/gcc/libgcc-std.ver +++ b/gcc/libgcc-std.ver @@ -180,3 +180,16 @@ GCC_3.0 { GCC_3.3 { _Unwind_FindEnclosingFunction } + +%inherit GCC_3.4 GCC_3.3 +GCC_3.4 { + # bit scanning and counting built-ins + __clzsi2 + __clzdi2 + __ctzsi2 + __ctzdi2 + __popcountsi2 + __popcountdi2 + __paritysi2 + __paritydi2 +} diff --git a/gcc/libgcc2.c b/gcc/libgcc2.c index deb772fa629..5de1edecd8c 100644 --- a/gcc/libgcc2.c +++ b/gcc/libgcc2.c @@ -331,7 +331,7 @@ __ashrdi3 (DWtype u, word_type b) #endif #ifdef L_ffsdi2 -DWtype +Wtype __ffsdi2 (DWtype u) { DWunion uu; @@ -495,6 +495,11 @@ __udiv_w_sdiv (UWtype *rp __attribute__ ((__unused__)), #define L_udivmoddi4 #endif +#if (defined (L_clzsi2) || defined (L_clzdi2) || \ + defined (L_ctzsi2) || defined (L_ctzdi2)) +extern const UQItype __clz_tab[]; +#endif + #ifdef L_clz const UQItype __clz_tab[] = { @@ -508,6 +513,151 @@ const UQItype __clz_tab[] = 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, }; #endif + +#ifdef L_clzsi2 +Wtype +__clzsi2 (USItype x) +{ + Wtype a; + + /* Note that we've already verified that BITS_PER_UNIT == 8, and + thus SItype is 32 bits wide. */ + if (x < (1 << 2 * 8)) + if (x < (1 << 1 * 8)) + a = 0 * 8; + else + a = 1 * 8; + else + if (x < (1 << 3 * 8)) + a = 2 * 8; + else + a = 3 * 8; + + return 32 - (__clz_tab[x >> a] + a); +} +#endif + +#ifdef L_clzdi2 +Wtype +__clzdi2 (UDItype x) +{ + Wtype a; + + /* Note that we've already verified that BITS_PER_UNIT == 8, and + thus DItype is 64 bits wide. */ + for (a = 64 - 8; a > 0; a -= 8) + if (((x >> a) & 0xff) != 0) + break; + + return 64 - (__clz_tab[x >> a] + a); +} +#endif + +#ifdef L_ctzsi2 +Wtype +__ctzsi2 (USItype x) +{ + Wtype a; + + x = x & -x; + + /* Note that we've already verified that BITS_PER_UNIT == 8, and + thus SItype is 32 bits wide. */ + if (x < (1 << 2 * 8)) + if (x < (1 << 1 * 8)) + a = 0 * 8; + else + a = 1 * 8; + else + if (x < (1 << 3 * 8)) + a = 2 * 8; + else + a = 3 * 8; + + return __clz_tab[x >> a] + a - 1; +} +#endif + +#ifdef L_ctzdi2 +Wtype +__ctzdi2 (UDItype x) +{ + Wtype a; + + x = x & -x; + for (a = 64 - 8; a > 0; a -= 8) + if (((x >> a) & 0xff) != 0) + break; + + return __clz_tab[x >> a] + a - 1; +} +#endif + +#if (defined (L_popcountsi2) || defined (L_popcountdi2) || \ + defined (L_paritysi2) || defined (L_paritydi2)) +extern const UQItype __popcount_tab[]; +#endif + +#ifdef L_popcount_tab +const UQItype __popcount_tab[] = +{ + 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5, + 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, + 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, + 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7, + 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, + 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7, + 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7, + 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8, +}; +#endif + +#ifdef L_popcountsi2 +Wtype +__popcountsi2 (USItype x) +{ + return __popcount_tab[(x >> 0) & 0xff] + + __popcount_tab[(x >> 8) & 0xff] + + __popcount_tab[(x >> 16) & 0xff] + + __popcount_tab[(x >> 24) & 0xff]; +} +#endif + +#ifdef L_popcountdi2 +Wtype +__popcountdi2 (UDItype x) +{ + return __popcount_tab[(x >> 0) & 0xff] + + __popcount_tab[(x >> 8) & 0xff] + + __popcount_tab[(x >> 16) & 0xff] + + __popcount_tab[(x >> 24) & 0xff] + + __popcount_tab[(x >> 32) & 0xff] + + __popcount_tab[(x >> 40) & 0xff] + + __popcount_tab[(x >> 48) & 0xff] + + __popcount_tab[(x >> 56) & 0xff]; +} +#endif + +#ifdef L_paritysi2 +Wtype +__paritysi2 (USItype x) +{ + x ^= x >> 16; + x ^= x >> 8; + return __popcount_tab[x & 0xff] & 1; +} +#endif + +#ifdef L_paritydi2 +Wtype +__paritydi2 (UDItype x) +{ + Wtype nx = x ^ (x >> 32); + nx ^= nx >> 16; + nx ^= nx >> 8; + return __popcount_tab[nx & 0xff] & 1; +} +#endif #ifdef L_udivmoddi4 diff --git a/gcc/libgcc2.h b/gcc/libgcc2.h index 260cd7c9f57..c71fcce7824 100644 --- a/gcc/libgcc2.h +++ b/gcc/libgcc2.h @@ -223,7 +223,32 @@ extern DWtype __negdi2 (DWtype); extern DWtype __lshrdi3 (DWtype, word_type); extern DWtype __ashldi3 (DWtype, word_type); extern DWtype __ashrdi3 (DWtype, word_type); -extern DWtype __ffsdi2 (DWtype); +extern Wtype __ffsdi2 (DWtype); + +/* ??? Ought to get these named properly for DSPs. */ +#if BITS_PER_UNIT == 8 && MIN_UNITS_PER_WORD >= 4 +extern Wtype __clzsi2 (USItype); +extern Wtype __ctzsi2 (USItype); +extern Wtype __popcountsi2 (USItype x); +extern Wtype __paritysi2 (USItype x); +#else +#undef L_clzsi2 +#undef L_ctzsi2 +#undef L_popcountsi2 +#undef L_paritysi2 +#endif + +#if BITS_PER_UNIT == 8 && MIN_UNITS_PER_WORD >= 4 && LONG_LONG_TYPE_SIZE > 32 +extern Wtype __clzdi2 (UDItype); +extern Wtype __ctzdi2 (UDItype); +extern Wtype __popcountdi2 (UDItype x); +extern Wtype __paritydi2 (UDItype x); +#else +#undef L_clzdi2 +#undef L_ctzdi2 +#undef L_popcountdi2 +#undef L_paritydi2 +#endif /* __udiv_w_sdiv is static inline when building other libgcc2 portions. */ #if (!defined(L_udivdi3) && !defined(L_divdi3) && \ diff --git a/gcc/mklibgcc.in b/gcc/mklibgcc.in index a18b8dd0a8b..74f72035be2 100644 --- a/gcc/mklibgcc.in +++ b/gcc/mklibgcc.in @@ -9,8 +9,6 @@ # # objext # LIB1ASMFUNCS -# LIB2FUNCS_1 -# LIB2FUNCS_2 # LIB2FUNCS_ST # LIBGCOV # LIB2ADD @@ -45,6 +43,16 @@ echo echo 'force:' echo +# Library members defined in libgcc2.c. +lib2funcs='_muldi3 _negdi2 _lshrdi3 _ashldi3 _ashrdi3 _ffsdi2 _clz + _cmpdi2 _ucmpdi2 _floatdidf _floatdisf _fixunsdfsi _fixunssfsi + _fixunsdfdi _fixdfdi _fixunssfdi _fixsfdi _fixxfdi _fixunsxfdi + _floatdixf _fixunsxfsi _fixtfdi _fixunstfdi _floatditf _clear_cache + _trampoline __main _exit _absvsi2 _absvdi2 _addvsi3 _addvdi3 + _subvsi3 _subvdi3 _mulvsi3 _mulvdi3 _negvsi2 _negvdi2 _ctors + _clzsi2 _clzdi2 _ctzsi2 _ctzdi2 _popcount_tab _popcountsi2 + _popcountdi2 _paritysi2 _paritydi2' + # Disable SHLIB_LINK if shared libgcc not enabled. if [ "@enable_shared@" = "no" ]; then SHLIB_LINK="" @@ -88,14 +96,11 @@ for name in $LIB1ASMFUNCS; do echo " $gcc_compile" $flags -DL$name -xassembler-with-cpp \ -c '$(srcdir)/config/$(LIB1ASMSRC)' -o $out - # Remove any objects from LIB2FUNCS and LIB2_DIVMOD_FUNCS that are + # Remove any objects from lib2funcs and LIB2_DIVMOD_FUNCS that are # defined as optimized assembly code in LIB1ASMFUNCS. - LIB2FUNCS_1=`echo $LIB2FUNCS_1 | sed -e 's/^'$name' //' \ - -e 's/ '$name' / /' \ - -e 's/ '$name'$//'` - LIB2FUNCS_2=`echo $LIB2FUNCS_2 | sed -e 's/^'$name' //' \ - -e 's/ '$name' / /' \ - -e 's/ '$name'$//'` + lib2funcs=`echo $lib2funcs | sed -e 's/^'$name' //' \ + -e 's/ '$name' / /' \ + -e 's/ '$name'$//'` LIB2_DIVMOD_FUNCS=`echo $LIB2_DIVMOD_FUNCS | sed -e 's/^'$name' //' \ -e 's/ '$name' / /' \ -e 's/ '$name'$//'` @@ -111,7 +116,7 @@ libgcc2_objs="" libgcc2_st_objs="" libgcc2_eh_objs="" -for name in $LIB2FUNCS_1 $LIB2FUNCS_2; do +for name in $lib2funcs; do for ml in $MULTILIBS; do dir=`echo ${ml} | sed -e 's/;.*$//' -e 's/=/$(EQ)/g'` flags=`echo ${ml} | sed -e 's/^[^;]*;//' -e 's/@/ -/g'`; diff --git a/gcc/optabs.c b/gcc/optabs.c index ea045f455e0..2b08f8efa57 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -126,6 +126,8 @@ static rtx expand_vector_binop PARAMS ((enum machine_mode, optab, enum optab_methods)); static rtx expand_vector_unop PARAMS ((enum machine_mode, optab, rtx, rtx, int)); +static rtx widen_clz PARAMS ((enum machine_mode, rtx, rtx)); +static rtx expand_parity PARAMS ((enum machine_mode, rtx, rtx)); /* Add a REG_EQUAL note to the last insn in INSNS. TARGET is being set to the result of operation CODE applied to OP0 (and OP1 if it is a binary @@ -2325,6 +2327,89 @@ expand_simple_unop (mode, code, op0, target, unsignedp) return expand_unop (mode, unop, op0, target, unsignedp); } +/* Try calculating + (clz:narrow x) + as + (clz:wide (zero_extend:wide x)) - ((width wide) - (width narrow)). */ +static rtx +widen_clz (mode, op0, target) + enum machine_mode mode; + rtx op0; + rtx target; +{ + enum mode_class class = GET_MODE_CLASS (mode); + if (class == MODE_INT || class == MODE_FLOAT || class == MODE_COMPLEX_FLOAT) + { + enum machine_mode wider_mode; + for (wider_mode = GET_MODE_WIDER_MODE (mode); wider_mode != VOIDmode; + wider_mode = GET_MODE_WIDER_MODE (wider_mode)) + { + if (clz_optab->handlers[(int) wider_mode].insn_code + != CODE_FOR_nothing) + { + rtx xop0, temp, last; + + last = get_last_insn (); + + if (target == 0) + target = gen_reg_rtx (mode); + xop0 = widen_operand (op0, wider_mode, mode, true, false); + temp = expand_unop (wider_mode, clz_optab, xop0, NULL_RTX, true); + if (temp != 0) + temp = expand_binop (wider_mode, sub_optab, temp, + GEN_INT (GET_MODE_BITSIZE (wider_mode) + - GET_MODE_BITSIZE (mode)), + target, true, OPTAB_DIRECT); + if (temp == 0) + delete_insns_since (last); + + return temp; + } + } + } + return 0; +} + +/* Try calculating (parity x) as (and (popcount x) 1), where + popcount can also be done in a wider mode. */ +static rtx +expand_parity (mode, op0, target) + enum machine_mode mode; + rtx op0; + rtx target; +{ + enum mode_class class = GET_MODE_CLASS (mode); + if (class == MODE_INT || class == MODE_FLOAT || class == MODE_COMPLEX_FLOAT) + { + enum machine_mode wider_mode; + for (wider_mode = mode; wider_mode != VOIDmode; + wider_mode = GET_MODE_WIDER_MODE (wider_mode)) + { + if (popcount_optab->handlers[(int) wider_mode].insn_code + != CODE_FOR_nothing) + { + rtx xop0, temp, last; + + last = get_last_insn (); + + if (target == 0) + target = gen_reg_rtx (mode); + xop0 = widen_operand (op0, wider_mode, mode, true, false); + temp = expand_unop (wider_mode, popcount_optab, xop0, NULL_RTX, + true); + if (temp != 0) + temp = expand_binop (wider_mode, and_optab, temp, GEN_INT (1), + target, true, OPTAB_DIRECT); + if (temp == 0) + delete_insns_since (last); + + return temp; + } + } + } + return 0; +} + /* Generate code to perform an operation specified by UNOPTAB on operand OP0, with result having machine-mode MODE. @@ -2405,6 +2490,16 @@ expand_unop (mode, unoptab, op0, target, unsignedp) /* It can't be done in this mode. Can we open-code it in a wider mode? */ + /* Widening clz needs special treatment. */ + if (unoptab == clz_optab) + { + temp = widen_clz (mode, op0, target); + if (temp) + return temp; + else + goto try_libcall; + } + if (class == MODE_INT || class == MODE_FLOAT || class == MODE_COMPLEX_FLOAT) for (wider_mode = GET_MODE_WIDER_MODE (mode); wider_mode != VOIDmode; wider_mode = GET_MODE_WIDER_MODE (wider_mode)) @@ -2560,22 +2655,39 @@ expand_unop (mode, unoptab, op0, target, unsignedp) } } + /* Try calculating parity (x) as popcount (x) % 2. */ + if (unoptab == parity_optab) + { + temp = expand_parity (mode, op0, target); + if (temp) + return temp; + } + + try_libcall: /* Now try a library call in this mode. */ if (unoptab->handlers[(int) mode].libfunc) { rtx insns; rtx value; + enum machine_mode outmode = mode; + + /* All of these functions return small values. Thus we choose to + have them return something that isn't a double-word. */ + if (unoptab == ffs_optab || unoptab == clz_optab || unoptab == ctz_optab + || unoptab == popcount_optab || unoptab == parity_optab) + outmode = word_mode; start_sequence (); /* Pass 1 for NO_QUEUE so we don't lose any increments if the libcall is cse'd or moved. */ value = emit_library_call_value (unoptab->handlers[(int) mode].libfunc, - NULL_RTX, LCT_CONST, mode, 1, op0, mode); + NULL_RTX, LCT_CONST, outmode, + 1, op0, mode); insns = get_insns (); end_sequence (); - target = gen_reg_rtx (mode); + target = gen_reg_rtx (outmode); emit_libcall_block (insns, target, value, gen_rtx_fmt_e (unoptab->code, mode, op0)); @@ -5395,6 +5507,10 @@ init_optabs () addcc_optab = init_optab (UNKNOWN); one_cmpl_optab = init_optab (NOT); ffs_optab = init_optab (FFS); + clz_optab = init_optab (CLZ); + ctz_optab = init_optab (CTZ); + popcount_optab = init_optab (POPCOUNT); + parity_optab = init_optab (PARITY); sqrt_optab = init_optab (SQRT); floor_optab = init_optab (UNKNOWN); ceil_optab = init_optab (UNKNOWN); @@ -5472,6 +5588,10 @@ init_optabs () init_floating_libfuncs (negv_optab, "neg", '2'); init_integral_libfuncs (one_cmpl_optab, "one_cmpl", '2'); init_integral_libfuncs (ffs_optab, "ffs", '2'); + init_integral_libfuncs (clz_optab, "clz", '2'); + init_integral_libfuncs (ctz_optab, "ctz", '2'); + init_integral_libfuncs (popcount_optab, "popcount", '2'); + init_integral_libfuncs (parity_optab, "parity", '2'); /* Comparison libcalls for integers MUST come in pairs, signed/unsigned. */ init_integral_libfuncs (cmp_optab, "cmp", '2'); @@ -5531,6 +5651,17 @@ init_optabs () /* The ffs function operates on `int'. */ ffs_optab->handlers[(int) mode_for_size (INT_TYPE_SIZE, MODE_INT, 0)].libfunc = init_one_libfunc ("ffs"); + ffs_optab->handlers[(int) DImode].libfunc = init_one_libfunc ("__ffsdi2"); + clz_optab->handlers[(int) SImode].libfunc = init_one_libfunc ("__clzsi2"); + clz_optab->handlers[(int) DImode].libfunc = init_one_libfunc ("__clzdi2"); + ctz_optab->handlers[(int) SImode].libfunc = init_one_libfunc ("__ctzsi2"); + ctz_optab->handlers[(int) DImode].libfunc = init_one_libfunc ("__ctzdi2"); + popcount_optab->handlers[(int) SImode].libfunc + = init_one_libfunc ("__popcountsi2"); + popcount_optab->handlers[(int) DImode].libfunc + = init_one_libfunc ("__popcountdi2"); + parity_optab->handlers[(int) SImode].libfunc = init_one_libfunc ("__paritysi2"); + parity_optab->handlers[(int) DImode].libfunc = init_one_libfunc ("__paritydi2"); extendsfdf2_libfunc = init_one_libfunc ("__extendsfdf2"); extendsfxf2_libfunc = init_one_libfunc ("__extendsfxf2"); diff --git a/gcc/optabs.h b/gcc/optabs.h index 2ff8fec62dd..5fda4dabf74 100644 --- a/gcc/optabs.h +++ b/gcc/optabs.h @@ -123,8 +123,12 @@ enum optab_index OTI_absv, /* Bitwise not */ OTI_one_cmpl, - /* Find first bit set */ + /* Bit scanning and counting */ OTI_ffs, + OTI_clz, + OTI_ctz, + OTI_popcount, + OTI_parity, /* Square root */ OTI_sqrt, /* Sine */ @@ -208,6 +212,10 @@ extern GTY(()) optab optab_table[OTI_MAX]; #define absv_optab (optab_table[OTI_absv]) #define one_cmpl_optab (optab_table[OTI_one_cmpl]) #define ffs_optab (optab_table[OTI_ffs]) +#define clz_optab (optab_table[OTI_clz]) +#define ctz_optab (optab_table[OTI_ctz]) +#define popcount_optab (optab_table[OTI_popcount]) +#define parity_optab (optab_table[OTI_parity]) #define sqrt_optab (optab_table[OTI_sqrt]) #define sin_optab (optab_table[OTI_sin]) #define cos_optab (optab_table[OTI_cos]) diff --git a/gcc/reload1.c b/gcc/reload1.c index 90625dbee67..f6498f10e29 100644 --- a/gcc/reload1.c +++ b/gcc/reload1.c @@ -2545,6 +2545,10 @@ eliminate_regs (x, mem_mode, insn) case ABS: case SQRT: case FFS: + case CLZ: + case CTZ: + case POPCOUNT: + case PARITY: new = eliminate_regs (XEXP (x, 0), mem_mode, insn); if (new != XEXP (x, 0)) return gen_rtx_fmt_e (code, GET_MODE (x), new); @@ -2776,6 +2780,10 @@ elimination_effects (x, mem_mode) case ABS: case SQRT: case FFS: + case CLZ: + case CTZ: + case POPCOUNT: + case PARITY: elimination_effects (XEXP (x, 0), mem_mode); return; diff --git a/gcc/rtl.def b/gcc/rtl.def index e3be833e15f..ff19dd95d65 100644 --- a/gcc/rtl.def +++ b/gcc/rtl.def @@ -1070,6 +1070,18 @@ DEF_RTL_EXPR(SQRT, "sqrt", "e", '1') or 0 if arg is 0. */ DEF_RTL_EXPR(FFS, "ffs", "e", '1') +/* Count leading zeros. */ +DEF_RTL_EXPR(CLZ, "clz", "e", '1') + +/* Count trailing zeros. */ +DEF_RTL_EXPR(CTZ, "ctz", "e", '1') + +/* Population count (number of 1 bits). */ +DEF_RTL_EXPR(POPCOUNT, "popcount", "e", '1') + +/* Population parity (number of 1 bits modulo 2). */ +DEF_RTL_EXPR(PARITY, "parity", "e", '1') + /* Reference to a signed bit-field of specified size and position. Operand 0 is the memory unit (usually SImode or QImode) which contains the field's first bit. Operand 1 is the width, in bits. diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c index d563caeae2f..47dd7a07038 100644 --- a/gcc/simplify-rtx.c +++ b/gcc/simplify-rtx.c @@ -423,6 +423,33 @@ simplify_unary_operation (code, mode, op, op_mode) val = exact_log2 (arg0 & (- arg0)) + 1; break; + case CLZ: + arg0 &= GET_MODE_MASK (mode); + val = GET_MODE_BITSIZE (mode) - floor_log2 (arg0) - 1; + break; + + case CTZ: + arg0 &= GET_MODE_MASK (mode); + val = arg0 == 0 + ? GET_MODE_BITSIZE (mode) + : exact_log2 (arg0 & -arg0); + break; + + case POPCOUNT: + arg0 &= GET_MODE_MASK (mode); + val = 0; + while (arg0) + val++, arg0 &= arg0 - 1; + break; + + case PARITY: + arg0 &= GET_MODE_MASK (mode); + val = 0; + while (arg0) + val++, arg0 &= arg0 - 1; + val &= 1; + break; + case TRUNCATE: val = arg0; break; @@ -523,9 +550,55 @@ simplify_unary_operation (code, mode, op, op_mode) case FFS: hv = 0; if (l1 == 0) - lv = HOST_BITS_PER_WIDE_INT + exact_log2 (h1 & (-h1)) + 1; + { + if (h1 == 0) + lv = 0; + else + lv = HOST_BITS_PER_WIDE_INT + exact_log2 (h1 & -h1) + 1; + } else - lv = exact_log2 (l1 & (-l1)) + 1; + lv = exact_log2 (l1 & -l1) + 1; + break; + + case CLZ: + hv = 0; + if (h1 == 0) + lv = GET_MODE_BITSIZE (mode) - floor_log2 (l1) - 1; + else + lv = GET_MODE_BITSIZE (mode) - floor_log2 (h1) - 1 + - HOST_BITS_PER_WIDE_INT; + break; + + case CTZ: + hv = 0; + if (l1 == 0) + { + if (h1 == 0) + lv = GET_MODE_BITSIZE (mode); + else + lv = HOST_BITS_PER_WIDE_INT + exact_log2 (h1 & -h1); + } + else + lv = exact_log2 (l1 & -l1); + break; + + case POPCOUNT: + hv = 0; + lv = 0; + while (l1) + lv++, l1 &= l1 - 1; + while (h1) + lv++, h1 &= h1 - 1; + break; + + case PARITY: + hv = 0; + lv = 0; + while (l1) + lv++, l1 &= l1 - 1; + while (h1) + lv++, h1 &= h1 - 1; + lv &= 1; break; case TRUNCATE: diff --git a/gcc/testsuite/gcc.c-torture/execute/builtin-bitops-1.c b/gcc/testsuite/gcc.c-torture/execute/builtin-bitops-1.c new file mode 100644 index 00000000000..b28a2fe07ac --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/builtin-bitops-1.c @@ -0,0 +1,198 @@ +#include <limits.h> +#include <assert.h> + +#define MAKE_FUNS(suffix, type) \ +int my_ffs##suffix(type x) { \ + int i; \ + if (x == 0) \ + return 0; \ + for (i = 0; i < CHAR_BIT * sizeof (type); i++) \ + if (x & ((type) 1 << i)) \ + break; \ + return i + 1; \ +} \ + \ +int my_ctz##suffix(type x) { \ + int i; \ + for (i = 0; i < CHAR_BIT * sizeof (type); i++) \ + if (x & ((type) 1 << i)) \ + break; \ + return i; \ +} \ + \ +int my_clz##suffix(type x) { \ + int i; \ + for (i = 0; i < CHAR_BIT * sizeof (type); i++) \ + if (x & ((type) 1 << ((CHAR_BIT * sizeof (type)) - i - 1))) \ + break; \ + return i; \ +} \ + \ +int my_popcount##suffix(type x) { \ + int i; \ + int count = 0; \ + for (i = 0; i < CHAR_BIT * sizeof (type); i++) \ + if (x & ((type) 1 << i)) \ + count++; \ + return count; \ +} \ + \ +int my_parity##suffix(type x) { \ + int i; \ + int count = 0; \ + for (i = 0; i < CHAR_BIT * sizeof (type); i++) \ + if (x & ((type) 1 << i)) \ + count++; \ + return count & 1; \ +} + +MAKE_FUNS (, unsigned); +MAKE_FUNS (l, unsigned long); +MAKE_FUNS (ll, unsigned long long); + +extern void abort (void); +extern void exit (int); + +#define NUMS32 \ + { \ + 0x00000000U, \ + 0x00000001U, \ + 0x80000000U, \ + 0x00000002U, \ + 0x40000000U, \ + 0x00010000U, \ + 0x00008000U, \ + 0xa5a5a5a5U, \ + 0x5a5a5a5aU, \ + 0xcafe0000U, \ + 0x00cafe00U, \ + 0x0000cafeU, \ + 0xffffffffU \ + } + +#define NUMS64 \ + { \ + 0x0000000000000000ULL, \ + 0x0000000000000001ULL, \ + 0x8000000000000000ULL, \ + 0x0000000000000002ULL, \ + 0x4000000000000000ULL, \ + 0x0000000100000000ULL, \ + 0x0000000080000000ULL, \ + 0xa5a5a5a5a5a5a5a5ULL, \ + 0x5a5a5a5a5a5a5a5aULL, \ + 0xcafecafe00000000ULL, \ + 0x0000cafecafe0000ULL, \ + 0x00000000cafecafeULL, \ + 0xffffffffffffffffULL \ + } + +unsigned int ints[] = NUMS32; + +unsigned long longs[] = +#if __LONG_MAX__ >= 9223372036854775807L +NUMS64; +#else +NUMS32; +#endif + +unsigned long long longlongs[] = NUMS64; + +#define N(table) (sizeof (table) / sizeof (table[0])) + +int +main (void) +{ + int i; + + for (i = 0; i < N(ints); i++) + { + if (__builtin_ffs (ints[i]) != my_ffs (ints[i])) + abort (); + if (ints[i] != 0 + && __builtin_clz (ints[i]) != my_clz (ints[i])) + abort (); + if (ints[i] != 0 + && __builtin_ctz (ints[i]) != my_ctz (ints[i])) + abort (); + if (__builtin_popcount (ints[i]) != my_popcount (ints[i])) + abort (); + if (__builtin_parity (ints[i]) != my_parity (ints[i])) + abort (); + } + + for (i = 0; i < N(longs); i++) + { + if (__builtin_ffsl (longs[i]) != my_ffsl (longs[i])) + abort (); + if (longs[i] != 0 + && __builtin_clzl (longs[i]) != my_clzl (longs[i])) + abort (); + if (longs[i] != 0 + && __builtin_ctzl (longs[i]) != my_ctzl (longs[i])) + abort (); + if (__builtin_popcountl (longs[i]) != my_popcountl (longs[i])) + abort (); + if (__builtin_parityl (longs[i]) != my_parityl (longs[i])) + abort (); + } + + for (i = 0; i < N(longlongs); i++) + { + if (__builtin_ffsll (longlongs[i]) != my_ffsll (longlongs[i])) + abort (); + if (longlongs[i] != 0 + && __builtin_clzll (longlongs[i]) != my_clzll (longlongs[i])) + abort (); + if (longlongs[i] != 0 + && __builtin_ctzll (longlongs[i]) != my_ctzll (longlongs[i])) + abort (); + if (__builtin_popcountll (longlongs[i]) != my_popcountll (longlongs[i])) + abort (); + if (__builtin_parityll (longlongs[i]) != my_parityll (longlongs[i])) + abort (); + } + + /* Test constant folding. */ + +#define TEST(x, suffix) \ + if (__builtin_ffs##suffix (x) != my_ffs##suffix (x)) \ + abort (); \ + if (x != 0 && __builtin_clz##suffix (x) != my_clz##suffix (x)) \ + abort (); \ + if (x != 0 && __builtin_ctz##suffix (x) != my_ctz##suffix (x)) \ + abort (); \ + if (__builtin_popcount##suffix (x) != my_popcount##suffix (x)) \ + abort (); \ + if (__builtin_parity##suffix (x) != my_parity##suffix (x)) \ + abort (); + + TEST(0x00000000UL,); + TEST(0x00000001UL,); + TEST(0x80000000UL,); + TEST(0x40000000UL,); + TEST(0x00010000UL,); + TEST(0x00008000UL,); + TEST(0xa5a5a5a5UL,); + TEST(0x5a5a5a5aUL,); + TEST(0xcafe0000UL,); + TEST(0x00cafe00UL,); + TEST(0x0000cafeUL,); + TEST(0xffffffffUL,); + + TEST(0x0000000000000000ULL, ll); + TEST(0x0000000000000001ULL, ll); + TEST(0x8000000000000000ULL, ll); + TEST(0x0000000000000002ULL, ll); + TEST(0x4000000000000000ULL, ll); + TEST(0x0000000100000000ULL, ll); + TEST(0x0000000080000000ULL, ll); + TEST(0xa5a5a5a5a5a5a5a5ULL, ll); + TEST(0x5a5a5a5a5a5a5a5aULL, ll); + TEST(0xcafecafe00000000ULL, ll); + TEST(0x0000cafecafe0000ULL, ll); + TEST(0x00000000cafecafeULL, ll); + TEST(0xffffffffffffffffULL, ll); + + exit (0); +} diff --git a/gcc/tree.def b/gcc/tree.def index e05ec281dc4..03e44bcf242 100644 --- a/gcc/tree.def +++ b/gcc/tree.def @@ -614,7 +614,12 @@ DEFTREECODE (MAX_EXPR, "max_expr", '2', 2) operand of the ABS_EXPR must have the same type. */ DEFTREECODE (ABS_EXPR, "abs_expr", '1', 1) +/* Bit scanning and counting. */ DEFTREECODE (FFS_EXPR, "ffs_expr", '1', 1) +DEFTREECODE (CLZ_EXPR, "clz_expr", '1', 1) +DEFTREECODE (CTZ_EXPR, "ctz_expr", '1', 1) +DEFTREECODE (POPCOUNT_EXPR, "popcount_expr", '1', 1) +DEFTREECODE (PARITY_EXPR, "parity_expr", '1', 1) /* Shift operations for shift and rotate. Shift means logical shift if done on an |