diff options
94 files changed, 27465 insertions, 192 deletions
diff --git a/Makefile.am b/Makefile.am new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/Makefile.am diff --git a/libgfortran/Makefile.am b/libgfortran/Makefile.am index b7ef912a440..71139abcf69 100644 --- a/libgfortran/Makefile.am +++ b/libgfortran/Makefile.am @@ -243,7 +243,8 @@ i_bessel_c= \ $(srcdir)/generated/bessel_r4.c \ $(srcdir)/generated/bessel_r8.c \ $(srcdir)/generated/bessel_r10.c \ -$(srcdir)/generated/bessel_r16.c +$(srcdir)/generated/bessel_r16.c \ +$(srcdir)/generated/bessel_r17.c i_count_c= \ $(srcdir)/generated/count_1_l.c \ @@ -283,10 +284,12 @@ $(srcdir)/generated/findloc0_r4.c \ $(srcdir)/generated/findloc0_r8.c \ $(srcdir)/generated/findloc0_r10.c \ $(srcdir)/generated/findloc0_r16.c \ +$(srcdir)/generated/findloc0_r17.c \ $(srcdir)/generated/findloc0_c4.c \ $(srcdir)/generated/findloc0_c8.c \ $(srcdir)/generated/findloc0_c10.c \ -$(srcdir)/generated/findloc0_c16.c +$(srcdir)/generated/findloc0_c16.c \ +$(srcdir)/generated/findloc0_c17.c i_findloc0s_c= \ $(srcdir)/generated/findloc0_s1.c \ @@ -302,10 +305,12 @@ $(srcdir)/generated/findloc1_r4.c \ $(srcdir)/generated/findloc1_r8.c \ $(srcdir)/generated/findloc1_r10.c \ $(srcdir)/generated/findloc1_r16.c \ +$(srcdir)/generated/findloc1_r17.c \ $(srcdir)/generated/findloc1_c4.c \ $(srcdir)/generated/findloc1_c8.c \ $(srcdir)/generated/findloc1_c10.c \ -$(srcdir)/generated/findloc1_c16.c +$(srcdir)/generated/findloc1_c16.c \ +$(srcdir)/generated/findloc1_c17.c i_findloc1s_c= \ $(srcdir)/generated/findloc1_s1.c \ @@ -342,7 +347,10 @@ $(srcdir)/generated/maxloc0_8_r10.c \ $(srcdir)/generated/maxloc0_16_r10.c \ $(srcdir)/generated/maxloc0_4_r16.c \ $(srcdir)/generated/maxloc0_8_r16.c \ -$(srcdir)/generated/maxloc0_16_r16.c +$(srcdir)/generated/maxloc0_16_r16.c \ +$(srcdir)/generated/maxloc0_4_r17.c \ +$(srcdir)/generated/maxloc0_8_r17.c \ +$(srcdir)/generated/maxloc0_16_r17.c i_maxloc0s_c = \ $(srcdir)/generated/maxloc0_4_s1.c \ @@ -379,7 +387,10 @@ $(srcdir)/generated/maxloc1_8_r10.c \ $(srcdir)/generated/maxloc1_16_r10.c \ $(srcdir)/generated/maxloc1_4_r16.c \ $(srcdir)/generated/maxloc1_8_r16.c \ -$(srcdir)/generated/maxloc1_16_r16.c +$(srcdir)/generated/maxloc1_16_r16.c \ +$(srcdir)/generated/maxloc1_4_r17.c \ +$(srcdir)/generated/maxloc1_8_r17.c \ +$(srcdir)/generated/maxloc1_16_r17.c i_maxloc1s_c= \ $(srcdir)/generated/maxloc1_4_s1.c \ @@ -406,7 +417,8 @@ $(srcdir)/generated/maxval_i16.c \ $(srcdir)/generated/maxval_r4.c \ $(srcdir)/generated/maxval_r8.c \ $(srcdir)/generated/maxval_r10.c \ -$(srcdir)/generated/maxval_r16.c +$(srcdir)/generated/maxval_r16.c \ +$(srcdir)/generated/maxval_r17.c i_maxval0s_c=\ $(srcdir)/generated/maxval0_s1.c \ @@ -443,7 +455,10 @@ $(srcdir)/generated/minloc0_8_r10.c \ $(srcdir)/generated/minloc0_16_r10.c \ $(srcdir)/generated/minloc0_4_r16.c \ $(srcdir)/generated/minloc0_8_r16.c \ -$(srcdir)/generated/minloc0_16_r16.c +$(srcdir)/generated/minloc0_16_r16.c \ +$(srcdir)/generated/minloc0_4_r17.c \ +$(srcdir)/generated/minloc0_8_r17.c \ +$(srcdir)/generated/minloc0_16_r17.c i_minloc0s_c = \ $(srcdir)/generated/minloc0_4_s1.c \ @@ -480,7 +495,10 @@ $(srcdir)/generated/minloc1_8_r10.c \ $(srcdir)/generated/minloc1_16_r10.c \ $(srcdir)/generated/minloc1_4_r16.c \ $(srcdir)/generated/minloc1_8_r16.c \ -$(srcdir)/generated/minloc1_16_r16.c +$(srcdir)/generated/minloc1_16_r16.c \ +$(srcdir)/generated/minloc1_4_r17.c \ +$(srcdir)/generated/minloc1_8_r17.c \ +$(srcdir)/generated/minloc1_16_r17.c i_minloc1s_c= \ $(srcdir)/generated/minloc1_4_s1.c \ @@ -507,7 +525,8 @@ $(srcdir)/generated/minval_i16.c \ $(srcdir)/generated/minval_r4.c \ $(srcdir)/generated/minval_r8.c \ $(srcdir)/generated/minval_r10.c \ -$(srcdir)/generated/minval_r16.c +$(srcdir)/generated/minval_r16.c \ +$(srcdir)/generated/minval_r17.c i_minval0s_c=\ $(srcdir)/generated/minval0_s1.c \ @@ -521,7 +540,8 @@ i_norm2_c= \ $(srcdir)/generated/norm2_r4.c \ $(srcdir)/generated/norm2_r8.c \ $(srcdir)/generated/norm2_r10.c \ -$(srcdir)/generated/norm2_r16.c +$(srcdir)/generated/norm2_r16.c \ +$(srcdir)/generated/norm2_r17.c i_parity_c = \ $(srcdir)/generated/parity_l1.c \ @@ -540,10 +560,12 @@ $(srcdir)/generated/sum_r4.c \ $(srcdir)/generated/sum_r8.c \ $(srcdir)/generated/sum_r10.c \ $(srcdir)/generated/sum_r16.c \ +$(srcdir)/generated/sum_r17.c \ $(srcdir)/generated/sum_c4.c \ $(srcdir)/generated/sum_c8.c \ $(srcdir)/generated/sum_c10.c \ -$(srcdir)/generated/sum_c16.c +$(srcdir)/generated/sum_c16.c \ +$(srcdir)/generated/sum_c17.c i_product_c= \ $(srcdir)/generated/product_i1.c \ @@ -555,10 +577,12 @@ $(srcdir)/generated/product_r4.c \ $(srcdir)/generated/product_r8.c \ $(srcdir)/generated/product_r10.c \ $(srcdir)/generated/product_r16.c \ +$(srcdir)/generated/product_r17.c \ $(srcdir)/generated/product_c4.c \ $(srcdir)/generated/product_c8.c \ $(srcdir)/generated/product_c10.c \ -$(srcdir)/generated/product_c16.c +$(srcdir)/generated/product_c16.c \ +$(srcdir)/generated/product_c17.c i_matmul_c= \ $(srcdir)/generated/matmul_i1.c \ @@ -570,10 +594,12 @@ $(srcdir)/generated/matmul_r4.c \ $(srcdir)/generated/matmul_r8.c \ $(srcdir)/generated/matmul_r10.c \ $(srcdir)/generated/matmul_r16.c \ +$(srcdir)/generated/matmul_r17.c \ $(srcdir)/generated/matmul_c4.c \ $(srcdir)/generated/matmul_c8.c \ $(srcdir)/generated/matmul_c10.c \ -$(srcdir)/generated/matmul_c16.c +$(srcdir)/generated/matmul_c16.c \ +$(srcdir)/generated/matmul_c17.c i_matmulavx128_c= \ $(srcdir)/generated/matmulavx128_i1.c \ @@ -585,10 +611,12 @@ $(srcdir)/generated/matmulavx128_r4.c \ $(srcdir)/generated/matmulavx128_r8.c \ $(srcdir)/generated/matmulavx128_r10.c \ $(srcdir)/generated/matmulavx128_r16.c \ +$(srcdir)/generated/matmulavx128_r17.c \ $(srcdir)/generated/matmulavx128_c4.c \ $(srcdir)/generated/matmulavx128_c8.c \ $(srcdir)/generated/matmulavx128_c10.c \ -$(srcdir)/generated/matmulavx128_c16.c +$(srcdir)/generated/matmulavx128_c16.c \ +$(srcdir)/generated/matmulavx128_c17.c i_matmull_c= \ $(srcdir)/generated/matmul_l4.c \ @@ -610,10 +638,12 @@ $(srcdir)/generated/reshape_r4.c \ $(srcdir)/generated/reshape_r8.c \ $(srcdir)/generated/reshape_r10.c \ $(srcdir)/generated/reshape_r16.c \ +$(srcdir)/generated/reshape_r17.c \ $(srcdir)/generated/reshape_c4.c \ $(srcdir)/generated/reshape_c8.c \ $(srcdir)/generated/reshape_c10.c \ -$(srcdir)/generated/reshape_c16.c +$(srcdir)/generated/reshape_c16.c \ +$(srcdir)/generated/reshape_c17.c i_eoshift1_c= \ $(srcdir)/generated/eoshift1_4.c \ @@ -635,10 +665,12 @@ $(srcdir)/generated/cshift0_r4.c \ $(srcdir)/generated/cshift0_r8.c \ $(srcdir)/generated/cshift0_r10.c \ $(srcdir)/generated/cshift0_r16.c \ +$(srcdir)/generated/cshift0_r17.c \ $(srcdir)/generated/cshift0_c4.c \ $(srcdir)/generated/cshift0_c8.c \ $(srcdir)/generated/cshift0_c10.c \ -$(srcdir)/generated/cshift0_c16.c +$(srcdir)/generated/cshift0_c16.c \ +$(srcdir)/generated/cshift0_c17.c i_cshift1_c= \ $(srcdir)/generated/cshift1_4.c \ @@ -655,10 +687,12 @@ $(srcdir)/generated/cshift1_4_r4.c \ $(srcdir)/generated/cshift1_4_r8.c \ $(srcdir)/generated/cshift1_4_r10.c \ $(srcdir)/generated/cshift1_4_r16.c \ +$(srcdir)/generated/cshift1_4_r17.c \ $(srcdir)/generated/cshift1_4_c4.c \ $(srcdir)/generated/cshift1_4_c8.c \ $(srcdir)/generated/cshift1_4_c10.c \ $(srcdir)/generated/cshift1_4_c16.c \ +$(srcdir)/generated/cshift1_4_c17.c \ $(srcdir)/generated/cshift1_8_i1.c \ $(srcdir)/generated/cshift1_8_i2.c \ $(srcdir)/generated/cshift1_8_i4.c \ @@ -668,10 +702,12 @@ $(srcdir)/generated/cshift1_8_r4.c \ $(srcdir)/generated/cshift1_8_r8.c \ $(srcdir)/generated/cshift1_8_r10.c \ $(srcdir)/generated/cshift1_8_r16.c \ +$(srcdir)/generated/cshift1_8_r17.c \ $(srcdir)/generated/cshift1_8_c4.c \ $(srcdir)/generated/cshift1_8_c8.c \ $(srcdir)/generated/cshift1_8_c10.c \ $(srcdir)/generated/cshift1_8_c16.c \ +$(srcdir)/generated/cshift1_8_c17.c \ $(srcdir)/generated/cshift1_16_i1.c \ $(srcdir)/generated/cshift1_16_i2.c \ $(srcdir)/generated/cshift1_16_i4.c \ @@ -681,10 +717,12 @@ $(srcdir)/generated/cshift1_16_r4.c \ $(srcdir)/generated/cshift1_16_r8.c \ $(srcdir)/generated/cshift1_16_r10.c \ $(srcdir)/generated/cshift1_16_r16.c \ +$(srcdir)/generated/cshift1_16_r17.c \ $(srcdir)/generated/cshift1_16_c4.c \ $(srcdir)/generated/cshift1_16_c8.c \ $(srcdir)/generated/cshift1_16_c10.c \ -$(srcdir)/generated/cshift1_16_c16.c +$(srcdir)/generated/cshift1_16_c16.c \ +$(srcdir)/generated/cshift1_16_c17.c in_pack_c = \ $(srcdir)/generated/in_pack_i1.c \ @@ -696,10 +734,12 @@ $(srcdir)/generated/in_pack_r4.c \ $(srcdir)/generated/in_pack_r8.c \ $(srcdir)/generated/in_pack_r10.c \ $(srcdir)/generated/in_pack_r16.c \ +$(srcdir)/generated/in_pack_r17.c \ $(srcdir)/generated/in_pack_c4.c \ $(srcdir)/generated/in_pack_c8.c \ $(srcdir)/generated/in_pack_c10.c \ -$(srcdir)/generated/in_pack_c16.c +$(srcdir)/generated/in_pack_c16.c \ +$(srcdir)/generated/in_pack_c17.c in_unpack_c = \ $(srcdir)/generated/in_unpack_i1.c \ @@ -711,20 +751,24 @@ $(srcdir)/generated/in_unpack_r4.c \ $(srcdir)/generated/in_unpack_r8.c \ $(srcdir)/generated/in_unpack_r10.c \ $(srcdir)/generated/in_unpack_r16.c \ +$(srcdir)/generated/in_unpack_r17.c \ $(srcdir)/generated/in_unpack_c4.c \ $(srcdir)/generated/in_unpack_c8.c \ $(srcdir)/generated/in_unpack_c10.c \ -$(srcdir)/generated/in_unpack_c16.c +$(srcdir)/generated/in_unpack_c16.c \ +$(srcdir)/generated/in_unpack_c17.c i_pow_c = \ $(srcdir)/generated/pow_i4_i4.c \ $(srcdir)/generated/pow_i8_i4.c \ $(srcdir)/generated/pow_i16_i4.c \ $(srcdir)/generated/pow_r16_i4.c \ +$(srcdir)/generated/pow_r17_i4.c \ $(srcdir)/generated/pow_c4_i4.c \ $(srcdir)/generated/pow_c8_i4.c \ $(srcdir)/generated/pow_c10_i4.c \ $(srcdir)/generated/pow_c16_i4.c \ +$(srcdir)/generated/pow_c17_i4.c \ $(srcdir)/generated/pow_i4_i8.c \ $(srcdir)/generated/pow_i8_i8.c \ $(srcdir)/generated/pow_i16_i8.c \ @@ -732,10 +776,12 @@ $(srcdir)/generated/pow_r4_i8.c \ $(srcdir)/generated/pow_r8_i8.c \ $(srcdir)/generated/pow_r10_i8.c \ $(srcdir)/generated/pow_r16_i8.c \ +$(srcdir)/generated/pow_r17_i8.c \ $(srcdir)/generated/pow_c4_i8.c \ $(srcdir)/generated/pow_c8_i8.c \ $(srcdir)/generated/pow_c10_i8.c \ $(srcdir)/generated/pow_c16_i8.c \ +$(srcdir)/generated/pow_c17_i8.c \ $(srcdir)/generated/pow_i4_i16.c \ $(srcdir)/generated/pow_i8_i16.c \ $(srcdir)/generated/pow_i16_i16.c \ @@ -743,10 +789,12 @@ $(srcdir)/generated/pow_r4_i16.c \ $(srcdir)/generated/pow_r8_i16.c \ $(srcdir)/generated/pow_r10_i16.c \ $(srcdir)/generated/pow_r16_i16.c \ +$(srcdir)/generated/pow_r17_i16.c \ $(srcdir)/generated/pow_c4_i16.c \ $(srcdir)/generated/pow_c8_i16.c \ $(srcdir)/generated/pow_c10_i16.c \ -$(srcdir)/generated/pow_c16_i16.c +$(srcdir)/generated/pow_c16_i16.c \ +$(srcdir)/generated/pow_c17_i16.c i_pack_c = \ $(srcdir)/generated/pack_i1.c \ @@ -758,10 +806,12 @@ $(srcdir)/generated/pack_r4.c \ $(srcdir)/generated/pack_r8.c \ $(srcdir)/generated/pack_r10.c \ $(srcdir)/generated/pack_r16.c \ +$(srcdir)/generated/pack_r17.c \ $(srcdir)/generated/pack_c4.c \ $(srcdir)/generated/pack_c8.c \ $(srcdir)/generated/pack_c10.c \ -$(srcdir)/generated/pack_c16.c +$(srcdir)/generated/pack_c16.c \ +$(srcdir)/generated/pack_c17.c i_unpack_c = \ $(srcdir)/generated/unpack_i1.c \ @@ -773,10 +823,12 @@ $(srcdir)/generated/unpack_r4.c \ $(srcdir)/generated/unpack_r8.c \ $(srcdir)/generated/unpack_r10.c \ $(srcdir)/generated/unpack_r16.c \ +$(srcdir)/generated/unpack_r17.c \ $(srcdir)/generated/unpack_c4.c \ $(srcdir)/generated/unpack_c8.c \ $(srcdir)/generated/unpack_c10.c \ -$(srcdir)/generated/unpack_c16.c +$(srcdir)/generated/unpack_c16.c \ +$(srcdir)/generated/unpack_c17.c i_spread_c = \ $(srcdir)/generated/spread_i1.c \ @@ -788,43 +840,45 @@ $(srcdir)/generated/spread_r4.c \ $(srcdir)/generated/spread_r8.c \ $(srcdir)/generated/spread_r10.c \ $(srcdir)/generated/spread_r16.c \ +$(srcdir)/generated/spread_r17.c \ $(srcdir)/generated/spread_c4.c \ $(srcdir)/generated/spread_c8.c \ $(srcdir)/generated/spread_c10.c \ -$(srcdir)/generated/spread_c16.c +$(srcdir)/generated/spread_c16.c \ +$(srcdir)/generated/spread_c17.c i_isobinding_c = \ $(srcdir)/runtime/ISO_Fortran_binding.c m4_files= m4/iparm.m4 m4/ifunction.m4 m4/iforeach.m4 m4/all.m4 \ - m4/any.m4 m4/count.m4 m4/maxloc0.m4 m4/maxloc1.m4 m4/maxval.m4 \ - m4/minloc0.m4 m4/minloc1.m4 m4/minval.m4 m4/product.m4 m4/sum.m4 \ - m4/matmul.m4 m4/matmull.m4 m4/ifunction_logical.m4 \ - m4/ctrig.m4 m4/cexp.m4 m4/chyp.m4 m4/mtype.m4 \ - m4/specific.m4 m4/specific2.m4 m4/head.m4 m4/shape.m4 m4/reshape.m4 \ - m4/eoshift1.m4 m4/eoshift3.m4 \ - m4/pow.m4 \ - m4/misc_specifics.m4 m4/pack.m4 \ - m4/unpack.m4 m4/spread.m4 m4/bessel.m4 m4/norm2.m4 m4/parity.m4 \ - m4/iall.m4 m4/iany.m4 m4/iparity.m4 m4/iforeach-s.m4 m4/findloc0.m4 \ - m4/findloc0s.m4 m4/ifindloc0.m4 m4/findloc1.m4 m4/ifindloc1.m4 \ - m4/findloc2s.m4 m4/ifindloc2.m4 + m4/any.m4 m4/count.m4 m4/maxloc0.m4 m4/maxloc1.m4 m4/maxval.m4 \ + m4/minloc0.m4 m4/minloc1.m4 m4/minval.m4 m4/product.m4 m4/sum.m4 \ + m4/matmul.m4 m4/matmull.m4 m4/ifunction_logical.m4 \ + m4/ctrig.m4 m4/cexp.m4 m4/chyp.m4 m4/mtype.m4 \ + m4/specific.m4 m4/specific2.m4 m4/head.m4 m4/shape.m4 m4/reshape.m4 \ + m4/eoshift1.m4 m4/eoshift3.m4 \ + m4/pow.m4 \ + m4/misc_specifics.m4 m4/pack.m4 \ + m4/unpack.m4 m4/spread.m4 m4/bessel.m4 m4/norm2.m4 m4/parity.m4 \ + m4/iall.m4 m4/iany.m4 m4/iparity.m4 m4/iforeach-s.m4 m4/findloc0.m4 \ + m4/findloc0s.m4 m4/ifindloc0.m4 m4/findloc1.m4 m4/ifindloc1.m4 \ + m4/findloc2s.m4 m4/ifindloc2.m4 gfor_built_src= $(i_all_c) $(i_any_c) $(i_count_c) $(i_maxloc0_c) \ - $(i_maxloc1_c) $(i_maxval_c) $(i_minloc0_c) $(i_minloc1_c) $(i_minval_c) \ - $(i_product_c) $(i_sum_c) $(i_bessel_c) $(i_iall_c) $(i_iany_c) \ - $(i_iparity_c) $(i_norm2_c) $(i_parity_c) \ - $(i_matmul_c) $(i_matmull_c) $(i_shape_c) $(i_eoshift1_c) \ - $(i_eoshift3_c) $(i_cshift1_c) $(i_reshape_c) $(in_pack_c) $(in_unpack_c) \ - $(i_pow_c) $(i_pack_c) $(i_unpack_c) $(i_matmulavx128_c) \ - $(i_spread_c) selected_int_kind.inc selected_real_kind.inc kinds.h \ - $(i_cshift0_c) kinds.inc c99_protos.inc fpu-target.h fpu-target.inc \ - include/ISO_Fortran_binding.h \ - $(i_cshift1a_c) $(i_maxloc0s_c) $(i_minloc0s_c) $(i_maxloc1s_c) \ - $(i_minloc1s_c) $(i_maxloc2s_c) $(i_minloc2s_c) $(i_maxvals_c) \ - $(i_maxval0s_c) $(i_minval0s_c) $(i_maxval1s_c) $(i_minval1s_c) \ - $(i_findloc0_c) $(i_findloc0s_c) $(i_findloc1_c) $(i_findloc1s_c) \ - $(i_findloc2s_c) $(i_isobinding_c) + $(i_maxloc1_c) $(i_maxval_c) $(i_minloc0_c) $(i_minloc1_c) $(i_minval_c) \ + $(i_product_c) $(i_sum_c) $(i_bessel_c) $(i_iall_c) $(i_iany_c) \ + $(i_iparity_c) $(i_norm2_c) $(i_parity_c) \ + $(i_matmul_c) $(i_matmull_c) $(i_shape_c) $(i_eoshift1_c) \ + $(i_eoshift3_c) $(i_cshift1_c) $(i_reshape_c) $(in_pack_c) $(in_unpack_c) \ + $(i_pow_c) $(i_pack_c) $(i_unpack_c) $(i_matmulavx128_c) \ + $(i_spread_c) selected_int_kind.inc selected_real_kind.inc kinds.h \ + $(i_cshift0_c) kinds.inc c99_protos.inc fpu-target.h fpu-target.inc \ + include/ISO_Fortran_binding.h \ + $(i_cshift1a_c) $(i_maxloc0s_c) $(i_minloc0s_c) $(i_maxloc1s_c) \ + $(i_minloc1s_c) $(i_maxloc2s_c) $(i_minloc2s_c) $(i_maxvals_c) \ + $(i_maxval0s_c) $(i_minval0s_c) $(i_maxval1s_c) $(i_minval1s_c) \ + $(i_findloc0_c) $(i_findloc0s_c) $(i_findloc1_c) $(i_findloc1s_c) \ + $(i_findloc2s_c) $(i_isobinding_c) # Machine generated specifics gfor_built_specific_src= \ @@ -832,6 +886,7 @@ $(srcdir)/generated/_abs_c4.F90 \ $(srcdir)/generated/_abs_c8.F90 \ $(srcdir)/generated/_abs_c10.F90 \ $(srcdir)/generated/_abs_c16.F90 \ +$(srcdir)/generated/_abs_c17.F90 \ $(srcdir)/generated/_abs_i4.F90 \ $(srcdir)/generated/_abs_i8.F90 \ $(srcdir)/generated/_abs_i16.F90 \ @@ -839,106 +894,132 @@ $(srcdir)/generated/_abs_r4.F90 \ $(srcdir)/generated/_abs_r8.F90 \ $(srcdir)/generated/_abs_r10.F90 \ $(srcdir)/generated/_abs_r16.F90 \ +$(srcdir)/generated/_abs_r17.F90 \ $(srcdir)/generated/_aimag_c4.F90 \ $(srcdir)/generated/_aimag_c8.F90 \ $(srcdir)/generated/_aimag_c10.F90 \ $(srcdir)/generated/_aimag_c16.F90 \ +$(srcdir)/generated/_aimag_c17.F90 \ $(srcdir)/generated/_exp_r4.F90 \ $(srcdir)/generated/_exp_r8.F90 \ $(srcdir)/generated/_exp_r10.F90 \ $(srcdir)/generated/_exp_r16.F90 \ +$(srcdir)/generated/_exp_r17.F90 \ $(srcdir)/generated/_exp_c4.F90 \ $(srcdir)/generated/_exp_c8.F90 \ $(srcdir)/generated/_exp_c10.F90 \ $(srcdir)/generated/_exp_c16.F90 \ +$(srcdir)/generated/_exp_c17.F90 \ $(srcdir)/generated/_log_r4.F90 \ $(srcdir)/generated/_log_r8.F90 \ $(srcdir)/generated/_log_r10.F90 \ $(srcdir)/generated/_log_r16.F90 \ +$(srcdir)/generated/_log_r17.F90 \ $(srcdir)/generated/_log_c4.F90 \ $(srcdir)/generated/_log_c8.F90 \ $(srcdir)/generated/_log_c10.F90 \ $(srcdir)/generated/_log_c16.F90 \ +$(srcdir)/generated/_log_c17.F90 \ $(srcdir)/generated/_log10_r4.F90 \ $(srcdir)/generated/_log10_r8.F90 \ $(srcdir)/generated/_log10_r10.F90 \ $(srcdir)/generated/_log10_r16.F90 \ +$(srcdir)/generated/_log10_r17.F90 \ $(srcdir)/generated/_sqrt_r4.F90 \ $(srcdir)/generated/_sqrt_r8.F90 \ $(srcdir)/generated/_sqrt_r10.F90 \ $(srcdir)/generated/_sqrt_r16.F90 \ +$(srcdir)/generated/_sqrt_r17.F90 \ $(srcdir)/generated/_sqrt_c4.F90 \ $(srcdir)/generated/_sqrt_c8.F90 \ $(srcdir)/generated/_sqrt_c10.F90 \ $(srcdir)/generated/_sqrt_c16.F90 \ +$(srcdir)/generated/_sqrt_c17.F90 \ $(srcdir)/generated/_asin_r4.F90 \ $(srcdir)/generated/_asin_r8.F90 \ $(srcdir)/generated/_asin_r10.F90 \ $(srcdir)/generated/_asin_r16.F90 \ +$(srcdir)/generated/_asin_r17.F90 \ $(srcdir)/generated/_asinh_r4.F90 \ $(srcdir)/generated/_asinh_r8.F90 \ $(srcdir)/generated/_asinh_r10.F90 \ $(srcdir)/generated/_asinh_r16.F90 \ +$(srcdir)/generated/_asinh_r17.F90 \ $(srcdir)/generated/_acos_r4.F90 \ $(srcdir)/generated/_acos_r8.F90 \ $(srcdir)/generated/_acos_r10.F90 \ $(srcdir)/generated/_acos_r16.F90 \ +$(srcdir)/generated/_acos_r17.F90 \ $(srcdir)/generated/_acosh_r4.F90 \ $(srcdir)/generated/_acosh_r8.F90 \ $(srcdir)/generated/_acosh_r10.F90 \ $(srcdir)/generated/_acosh_r16.F90 \ +$(srcdir)/generated/_acosh_r17.F90 \ $(srcdir)/generated/_atan_r4.F90 \ $(srcdir)/generated/_atan_r8.F90 \ $(srcdir)/generated/_atan_r10.F90 \ $(srcdir)/generated/_atan_r16.F90 \ +$(srcdir)/generated/_atan_r17.F90 \ $(srcdir)/generated/_atanh_r4.F90 \ $(srcdir)/generated/_atanh_r8.F90 \ $(srcdir)/generated/_atanh_r10.F90 \ $(srcdir)/generated/_atanh_r16.F90 \ +$(srcdir)/generated/_atanh_r17.F90 \ $(srcdir)/generated/_sin_r4.F90 \ $(srcdir)/generated/_sin_r8.F90 \ $(srcdir)/generated/_sin_r10.F90 \ $(srcdir)/generated/_sin_r16.F90 \ +$(srcdir)/generated/_sin_r17.F90 \ $(srcdir)/generated/_sin_c4.F90 \ $(srcdir)/generated/_sin_c8.F90 \ $(srcdir)/generated/_sin_c10.F90 \ $(srcdir)/generated/_sin_c16.F90 \ +$(srcdir)/generated/_sin_c17.F90 \ $(srcdir)/generated/_cos_r4.F90 \ $(srcdir)/generated/_cos_r8.F90 \ $(srcdir)/generated/_cos_r10.F90 \ $(srcdir)/generated/_cos_r16.F90 \ +$(srcdir)/generated/_cos_r17.F90 \ $(srcdir)/generated/_cos_c4.F90 \ $(srcdir)/generated/_cos_c8.F90 \ $(srcdir)/generated/_cos_c10.F90 \ $(srcdir)/generated/_cos_c16.F90 \ +$(srcdir)/generated/_cos_c17.F90 \ $(srcdir)/generated/_tan_r4.F90 \ $(srcdir)/generated/_tan_r8.F90 \ $(srcdir)/generated/_tan_r10.F90 \ $(srcdir)/generated/_tan_r16.F90 \ +$(srcdir)/generated/_tan_r17.F90 \ $(srcdir)/generated/_sinh_r4.F90 \ $(srcdir)/generated/_sinh_r8.F90 \ $(srcdir)/generated/_sinh_r10.F90 \ $(srcdir)/generated/_sinh_r16.F90 \ +$(srcdir)/generated/_sinh_r17.F90 \ $(srcdir)/generated/_cosh_r4.F90 \ $(srcdir)/generated/_cosh_r8.F90 \ $(srcdir)/generated/_cosh_r10.F90 \ $(srcdir)/generated/_cosh_r16.F90 \ +$(srcdir)/generated/_cosh_r17.F90 \ $(srcdir)/generated/_tanh_r4.F90 \ $(srcdir)/generated/_tanh_r8.F90 \ $(srcdir)/generated/_tanh_r10.F90 \ $(srcdir)/generated/_tanh_r16.F90 \ +$(srcdir)/generated/_tanh_r17.F90 \ $(srcdir)/generated/_conjg_c4.F90 \ $(srcdir)/generated/_conjg_c8.F90 \ $(srcdir)/generated/_conjg_c10.F90 \ $(srcdir)/generated/_conjg_c16.F90 \ +$(srcdir)/generated/_conjg_c17.F90 \ $(srcdir)/generated/_aint_r4.F90 \ $(srcdir)/generated/_aint_r8.F90 \ $(srcdir)/generated/_aint_r10.F90 \ $(srcdir)/generated/_aint_r16.F90 \ +$(srcdir)/generated/_aint_r17.F90 \ $(srcdir)/generated/_anint_r4.F90 \ $(srcdir)/generated/_anint_r8.F90 \ $(srcdir)/generated/_anint_r10.F90 \ -$(srcdir)/generated/_anint_r16.F90 +$(srcdir)/generated/_anint_r16.F90 \ +$(srcdir)/generated/_anint_r17.F90 gfor_built_specific2_src= \ $(srcdir)/generated/_sign_i4.F90 \ @@ -948,6 +1029,7 @@ $(srcdir)/generated/_sign_r4.F90 \ $(srcdir)/generated/_sign_r8.F90 \ $(srcdir)/generated/_sign_r10.F90 \ $(srcdir)/generated/_sign_r16.F90 \ +$(srcdir)/generated/_sign_r17.F90 \ $(srcdir)/generated/_dim_i4.F90 \ $(srcdir)/generated/_dim_i8.F90 \ $(srcdir)/generated/_dim_i16.F90 \ @@ -955,17 +1037,20 @@ $(srcdir)/generated/_dim_r4.F90 \ $(srcdir)/generated/_dim_r8.F90 \ $(srcdir)/generated/_dim_r10.F90 \ $(srcdir)/generated/_dim_r16.F90 \ +$(srcdir)/generated/_dim_r17.F90 \ $(srcdir)/generated/_atan2_r4.F90 \ $(srcdir)/generated/_atan2_r8.F90 \ $(srcdir)/generated/_atan2_r10.F90 \ $(srcdir)/generated/_atan2_r16.F90 \ +$(srcdir)/generated/_atan2_r17.F90 \ $(srcdir)/generated/_mod_i4.F90 \ $(srcdir)/generated/_mod_i8.F90 \ $(srcdir)/generated/_mod_i16.F90 \ $(srcdir)/generated/_mod_r4.F90 \ $(srcdir)/generated/_mod_r8.F90 \ $(srcdir)/generated/_mod_r10.F90 \ -$(srcdir)/generated/_mod_r16.F90 +$(srcdir)/generated/_mod_r16.F90 \ +$(srcdir)/generated/_mod_r17.F90 gfor_misc_specifics = $(srcdir)/generated/misc_specifics.F90 @@ -991,6 +1076,11 @@ $(patsubst %.c,%.lo,$(notdir $(i_matmull_c))): AM_CFLAGS += -funroll-loops $(patsubst %.F90,%.lo,$(patsubst %.f90,%.lo,$(notdir $(gfor_specific_src)))): AM_FCFLAGS += -fallow-leading-underscore selected_real_kind.lo selected_int_kind.lo: AM_FCFLAGS += -fallow-leading-underscore +# Build *_r17.F90 and *_c17.F90 with additional -mabi=ieeelongdouble on powerpc64le-linux. +if HAVE_REAL_17 +$(patsubst %.F90,%.lo,$(filter %_r17.F90 %_c17.F90,$(notdir $(gfor_specific_src)))): AM_FCFLAGS += -mabi=ieeelongdouble +endif + if IEEE_SUPPORT # Add flags for IEEE modules $(patsubst %.F90,%.lo,$(notdir $(gfor_ieee_src))): AM_FCFLAGS += -Wno-unused-dummy-argument -Wno-c-binding-type -ffree-line-length-0 -fallow-leading-underscore -fsignaling-nans @@ -1012,10 +1102,10 @@ ieee_arithmetic.mod: ieee_arithmetic.lo : BUILT_SOURCES=$(gfor_built_src) $(gfor_built_specific_src) \ - $(gfor_built_specific2_src) $(gfor_misc_specifics) + $(gfor_built_specific2_src) $(gfor_misc_specifics) prereq_SRC = $(gfor_src) $(gfor_built_src) $(gfor_io_src) \ - $(gfor_helper_src) $(gfor_ieee_src) $(gfor_io_headers) $(gfor_specific_src) + $(gfor_helper_src) $(gfor_ieee_src) $(gfor_io_headers) $(gfor_specific_src) if onestep # dummy sources for libtool @@ -1048,7 +1138,7 @@ libgfortran_la_SOURCES = $(prereq_SRC) endif -I_M4_DEPS=m4/iparm.m4 +I_M4_DEPS=m4/iparm.m4 m4/mtype.m4 I_M4_DEPS0=$(I_M4_DEPS) m4/iforeach.m4 I_M4_DEPS1=$(I_M4_DEPS) m4/ifunction.m4 I_M4_DEPS2=$(I_M4_DEPS) m4/ifunction_logical.m4 diff --git a/libgfortran/Makefile.in b/libgfortran/Makefile.in index 3684b2aaa75..2c7d59ab138 100644 --- a/libgfortran/Makefile.in +++ b/libgfortran/Makefile.in @@ -240,7 +240,8 @@ am__objects_7 = maxloc0_4_i1.lo maxloc0_8_i1.lo maxloc0_16_i1.lo \ maxloc0_4_r4.lo maxloc0_8_r4.lo maxloc0_16_r4.lo \ maxloc0_4_r8.lo maxloc0_8_r8.lo maxloc0_16_r8.lo \ maxloc0_4_r10.lo maxloc0_8_r10.lo maxloc0_16_r10.lo \ - maxloc0_4_r16.lo maxloc0_8_r16.lo maxloc0_16_r16.lo + maxloc0_4_r16.lo maxloc0_8_r16.lo maxloc0_16_r16.lo \ + maxloc0_4_r17.lo maxloc0_8_r17.lo maxloc0_16_r17.lo am__objects_8 = maxloc1_4_i1.lo maxloc1_8_i1.lo maxloc1_16_i1.lo \ maxloc1_4_i2.lo maxloc1_8_i2.lo maxloc1_16_i2.lo \ maxloc1_4_i4.lo maxloc1_8_i4.lo maxloc1_16_i4.lo \ @@ -249,10 +250,11 @@ am__objects_8 = maxloc1_4_i1.lo maxloc1_8_i1.lo maxloc1_16_i1.lo \ maxloc1_4_r4.lo maxloc1_8_r4.lo maxloc1_16_r4.lo \ maxloc1_4_r8.lo maxloc1_8_r8.lo maxloc1_16_r8.lo \ maxloc1_4_r10.lo maxloc1_8_r10.lo maxloc1_16_r10.lo \ - maxloc1_4_r16.lo maxloc1_8_r16.lo maxloc1_16_r16.lo + maxloc1_4_r16.lo maxloc1_8_r16.lo maxloc1_16_r16.lo \ + maxloc1_4_r17.lo maxloc1_8_r17.lo maxloc1_16_r17.lo am__objects_9 = maxval_i1.lo maxval_i2.lo maxval_i4.lo maxval_i8.lo \ maxval_i16.lo maxval_r4.lo maxval_r8.lo maxval_r10.lo \ - maxval_r16.lo + maxval_r16.lo maxval_r17.lo am__objects_10 = minloc0_4_i1.lo minloc0_8_i1.lo minloc0_16_i1.lo \ minloc0_4_i2.lo minloc0_8_i2.lo minloc0_16_i2.lo \ minloc0_4_i4.lo minloc0_8_i4.lo minloc0_16_i4.lo \ @@ -261,7 +263,8 @@ am__objects_10 = minloc0_4_i1.lo minloc0_8_i1.lo minloc0_16_i1.lo \ minloc0_4_r4.lo minloc0_8_r4.lo minloc0_16_r4.lo \ minloc0_4_r8.lo minloc0_8_r8.lo minloc0_16_r8.lo \ minloc0_4_r10.lo minloc0_8_r10.lo minloc0_16_r10.lo \ - minloc0_4_r16.lo minloc0_8_r16.lo minloc0_16_r16.lo + minloc0_4_r16.lo minloc0_8_r16.lo minloc0_16_r16.lo \ + minloc0_4_r17.lo minloc0_8_r17.lo minloc0_16_r17.lo am__objects_11 = minloc1_4_i1.lo minloc1_8_i1.lo minloc1_16_i1.lo \ minloc1_4_i2.lo minloc1_8_i2.lo minloc1_16_i2.lo \ minloc1_4_i4.lo minloc1_8_i4.lo minloc1_16_i4.lo \ @@ -270,31 +273,34 @@ am__objects_11 = minloc1_4_i1.lo minloc1_8_i1.lo minloc1_16_i1.lo \ minloc1_4_r4.lo minloc1_8_r4.lo minloc1_16_r4.lo \ minloc1_4_r8.lo minloc1_8_r8.lo minloc1_16_r8.lo \ minloc1_4_r10.lo minloc1_8_r10.lo minloc1_16_r10.lo \ - minloc1_4_r16.lo minloc1_8_r16.lo minloc1_16_r16.lo + minloc1_4_r16.lo minloc1_8_r16.lo minloc1_16_r16.lo \ + minloc1_4_r17.lo minloc1_8_r17.lo minloc1_16_r17.lo am__objects_12 = minval_i1.lo minval_i2.lo minval_i4.lo minval_i8.lo \ minval_i16.lo minval_r4.lo minval_r8.lo minval_r10.lo \ - minval_r16.lo + minval_r16.lo minval_r17.lo am__objects_13 = product_i1.lo product_i2.lo product_i4.lo \ product_i8.lo product_i16.lo product_r4.lo product_r8.lo \ - product_r10.lo product_r16.lo product_c4.lo product_c8.lo \ - product_c10.lo product_c16.lo + product_r10.lo product_r16.lo product_r17.lo product_c4.lo \ + product_c8.lo product_c10.lo product_c16.lo product_c17.lo am__objects_14 = sum_i1.lo sum_i2.lo sum_i4.lo sum_i8.lo sum_i16.lo \ - sum_r4.lo sum_r8.lo sum_r10.lo sum_r16.lo sum_c4.lo sum_c8.lo \ - sum_c10.lo sum_c16.lo -am__objects_15 = bessel_r4.lo bessel_r8.lo bessel_r10.lo bessel_r16.lo + sum_r4.lo sum_r8.lo sum_r10.lo sum_r16.lo sum_r17.lo sum_c4.lo \ + sum_c8.lo sum_c10.lo sum_c16.lo sum_c17.lo +am__objects_15 = bessel_r4.lo bessel_r8.lo bessel_r10.lo bessel_r16.lo \ + bessel_r17.lo am__objects_16 = iall_i1.lo iall_i2.lo iall_i4.lo iall_i8.lo \ iall_i16.lo am__objects_17 = iany_i1.lo iany_i2.lo iany_i4.lo iany_i8.lo \ iany_i16.lo am__objects_18 = iparity_i1.lo iparity_i2.lo iparity_i4.lo \ iparity_i8.lo iparity_i16.lo -am__objects_19 = norm2_r4.lo norm2_r8.lo norm2_r10.lo norm2_r16.lo +am__objects_19 = norm2_r4.lo norm2_r8.lo norm2_r10.lo norm2_r16.lo \ + norm2_r17.lo am__objects_20 = parity_l1.lo parity_l2.lo parity_l4.lo parity_l8.lo \ parity_l16.lo am__objects_21 = matmul_i1.lo matmul_i2.lo matmul_i4.lo matmul_i8.lo \ matmul_i16.lo matmul_r4.lo matmul_r8.lo matmul_r10.lo \ - matmul_r16.lo matmul_c4.lo matmul_c8.lo matmul_c10.lo \ - matmul_c16.lo + matmul_r16.lo matmul_r17.lo matmul_c4.lo matmul_c8.lo \ + matmul_c10.lo matmul_c16.lo matmul_c17.lo am__objects_22 = matmul_l4.lo matmul_l8.lo matmul_l16.lo am__objects_23 = shape_i1.lo shape_i2.lo shape_i4.lo shape_i8.lo \ shape_i16.lo @@ -303,57 +309,63 @@ am__objects_25 = eoshift3_4.lo eoshift3_8.lo eoshift3_16.lo am__objects_26 = cshift1_4.lo cshift1_8.lo cshift1_16.lo am__objects_27 = reshape_i4.lo reshape_i8.lo reshape_i16.lo \ reshape_r4.lo reshape_r8.lo reshape_r10.lo reshape_r16.lo \ - reshape_c4.lo reshape_c8.lo reshape_c10.lo reshape_c16.lo + reshape_r17.lo reshape_c4.lo reshape_c8.lo reshape_c10.lo \ + reshape_c16.lo reshape_c17.lo am__objects_28 = in_pack_i1.lo in_pack_i2.lo in_pack_i4.lo \ in_pack_i8.lo in_pack_i16.lo in_pack_r4.lo in_pack_r8.lo \ - in_pack_r10.lo in_pack_r16.lo in_pack_c4.lo in_pack_c8.lo \ - in_pack_c10.lo in_pack_c16.lo + in_pack_r10.lo in_pack_r16.lo in_pack_r17.lo in_pack_c4.lo \ + in_pack_c8.lo in_pack_c10.lo in_pack_c16.lo in_pack_c17.lo am__objects_29 = in_unpack_i1.lo in_unpack_i2.lo in_unpack_i4.lo \ in_unpack_i8.lo in_unpack_i16.lo in_unpack_r4.lo \ in_unpack_r8.lo in_unpack_r10.lo in_unpack_r16.lo \ - in_unpack_c4.lo in_unpack_c8.lo in_unpack_c10.lo \ - in_unpack_c16.lo + in_unpack_r17.lo in_unpack_c4.lo in_unpack_c8.lo \ + in_unpack_c10.lo in_unpack_c16.lo in_unpack_c17.lo am__objects_30 = pow_i4_i4.lo pow_i8_i4.lo pow_i16_i4.lo pow_r16_i4.lo \ - pow_c4_i4.lo pow_c8_i4.lo pow_c10_i4.lo pow_c16_i4.lo \ - pow_i4_i8.lo pow_i8_i8.lo pow_i16_i8.lo pow_r4_i8.lo \ - pow_r8_i8.lo pow_r10_i8.lo pow_r16_i8.lo pow_c4_i8.lo \ - pow_c8_i8.lo pow_c10_i8.lo pow_c16_i8.lo pow_i4_i16.lo \ + pow_r17_i4.lo pow_c4_i4.lo pow_c8_i4.lo pow_c10_i4.lo \ + pow_c16_i4.lo pow_c17_i4.lo pow_i4_i8.lo pow_i8_i8.lo \ + pow_i16_i8.lo pow_r4_i8.lo pow_r8_i8.lo pow_r10_i8.lo \ + pow_r16_i8.lo pow_r17_i8.lo pow_c4_i8.lo pow_c8_i8.lo \ + pow_c10_i8.lo pow_c16_i8.lo pow_c17_i8.lo pow_i4_i16.lo \ pow_i8_i16.lo pow_i16_i16.lo pow_r4_i16.lo pow_r8_i16.lo \ - pow_r10_i16.lo pow_r16_i16.lo pow_c4_i16.lo pow_c8_i16.lo \ - pow_c10_i16.lo pow_c16_i16.lo + pow_r10_i16.lo pow_r16_i16.lo pow_r17_i16.lo pow_c4_i16.lo \ + pow_c8_i16.lo pow_c10_i16.lo pow_c16_i16.lo pow_c17_i16.lo am__objects_31 = pack_i1.lo pack_i2.lo pack_i4.lo pack_i8.lo \ pack_i16.lo pack_r4.lo pack_r8.lo pack_r10.lo pack_r16.lo \ - pack_c4.lo pack_c8.lo pack_c10.lo pack_c16.lo + pack_r17.lo pack_c4.lo pack_c8.lo pack_c10.lo pack_c16.lo \ + pack_c17.lo am__objects_32 = unpack_i1.lo unpack_i2.lo unpack_i4.lo unpack_i8.lo \ unpack_i16.lo unpack_r4.lo unpack_r8.lo unpack_r10.lo \ - unpack_r16.lo unpack_c4.lo unpack_c8.lo unpack_c10.lo \ - unpack_c16.lo + unpack_r16.lo unpack_r17.lo unpack_c4.lo unpack_c8.lo \ + unpack_c10.lo unpack_c16.lo unpack_c17.lo am__objects_33 = matmulavx128_i1.lo matmulavx128_i2.lo \ matmulavx128_i4.lo matmulavx128_i8.lo matmulavx128_i16.lo \ matmulavx128_r4.lo matmulavx128_r8.lo matmulavx128_r10.lo \ - matmulavx128_r16.lo matmulavx128_c4.lo matmulavx128_c8.lo \ - matmulavx128_c10.lo matmulavx128_c16.lo + matmulavx128_r16.lo matmulavx128_r17.lo matmulavx128_c4.lo \ + matmulavx128_c8.lo matmulavx128_c10.lo matmulavx128_c16.lo \ + matmulavx128_c17.lo am__objects_34 = spread_i1.lo spread_i2.lo spread_i4.lo spread_i8.lo \ spread_i16.lo spread_r4.lo spread_r8.lo spread_r10.lo \ - spread_r16.lo spread_c4.lo spread_c8.lo spread_c10.lo \ - spread_c16.lo + spread_r16.lo spread_r17.lo spread_c4.lo spread_c8.lo \ + spread_c10.lo spread_c16.lo spread_c17.lo am__objects_35 = cshift0_i1.lo cshift0_i2.lo cshift0_i4.lo \ cshift0_i8.lo cshift0_i16.lo cshift0_r4.lo cshift0_r8.lo \ - cshift0_r10.lo cshift0_r16.lo cshift0_c4.lo cshift0_c8.lo \ - cshift0_c10.lo cshift0_c16.lo + cshift0_r10.lo cshift0_r16.lo cshift0_r17.lo cshift0_c4.lo \ + cshift0_c8.lo cshift0_c10.lo cshift0_c16.lo cshift0_c17.lo am__objects_36 = cshift1_4_i1.lo cshift1_4_i2.lo cshift1_4_i4.lo \ cshift1_4_i8.lo cshift1_4_i16.lo cshift1_4_r4.lo \ cshift1_4_r8.lo cshift1_4_r10.lo cshift1_4_r16.lo \ - cshift1_4_c4.lo cshift1_4_c8.lo cshift1_4_c10.lo \ - cshift1_4_c16.lo cshift1_8_i1.lo cshift1_8_i2.lo \ - cshift1_8_i4.lo cshift1_8_i8.lo cshift1_8_i16.lo \ - cshift1_8_r4.lo cshift1_8_r8.lo cshift1_8_r10.lo \ - cshift1_8_r16.lo cshift1_8_c4.lo cshift1_8_c8.lo \ - cshift1_8_c10.lo cshift1_8_c16.lo cshift1_16_i1.lo \ - cshift1_16_i2.lo cshift1_16_i4.lo cshift1_16_i8.lo \ - cshift1_16_i16.lo cshift1_16_r4.lo cshift1_16_r8.lo \ - cshift1_16_r10.lo cshift1_16_r16.lo cshift1_16_c4.lo \ - cshift1_16_c8.lo cshift1_16_c10.lo cshift1_16_c16.lo + cshift1_4_r17.lo cshift1_4_c4.lo cshift1_4_c8.lo \ + cshift1_4_c10.lo cshift1_4_c16.lo cshift1_4_c17.lo \ + cshift1_8_i1.lo cshift1_8_i2.lo cshift1_8_i4.lo \ + cshift1_8_i8.lo cshift1_8_i16.lo cshift1_8_r4.lo \ + cshift1_8_r8.lo cshift1_8_r10.lo cshift1_8_r16.lo \ + cshift1_8_r17.lo cshift1_8_c4.lo cshift1_8_c8.lo \ + cshift1_8_c10.lo cshift1_8_c16.lo cshift1_8_c17.lo \ + cshift1_16_i1.lo cshift1_16_i2.lo cshift1_16_i4.lo \ + cshift1_16_i8.lo cshift1_16_i16.lo cshift1_16_r4.lo \ + cshift1_16_r8.lo cshift1_16_r10.lo cshift1_16_r16.lo \ + cshift1_16_r17.lo cshift1_16_c4.lo cshift1_16_c8.lo \ + cshift1_16_c10.lo cshift1_16_c16.lo cshift1_16_c17.lo am__objects_37 = maxloc0_4_s1.lo maxloc0_4_s4.lo maxloc0_8_s1.lo \ maxloc0_8_s4.lo maxloc0_16_s1.lo maxloc0_16_s4.lo am__objects_38 = minloc0_4_s1.lo minloc0_4_s4.lo minloc0_8_s1.lo \ @@ -372,13 +384,13 @@ am__objects_45 = maxval1_s1.lo maxval1_s4.lo am__objects_46 = minval1_s1.lo minval1_s4.lo am__objects_47 = findloc0_i1.lo findloc0_i2.lo findloc0_i4.lo \ findloc0_i8.lo findloc0_i16.lo findloc0_r4.lo findloc0_r8.lo \ - findloc0_r10.lo findloc0_r16.lo findloc0_c4.lo findloc0_c8.lo \ - findloc0_c10.lo findloc0_c16.lo + findloc0_r10.lo findloc0_r16.lo findloc0_r17.lo findloc0_c4.lo \ + findloc0_c8.lo findloc0_c10.lo findloc0_c16.lo findloc0_c17.lo am__objects_48 = findloc0_s1.lo findloc0_s4.lo am__objects_49 = findloc1_i1.lo findloc1_i2.lo findloc1_i4.lo \ findloc1_i8.lo findloc1_i16.lo findloc1_r4.lo findloc1_r8.lo \ - findloc1_r10.lo findloc1_r16.lo findloc1_c4.lo findloc1_c8.lo \ - findloc1_c10.lo findloc1_c16.lo + findloc1_r10.lo findloc1_r16.lo findloc1_r17.lo findloc1_c4.lo \ + findloc1_c8.lo findloc1_c10.lo findloc1_c16.lo findloc1_c17.lo am__objects_50 = findloc1_s1.lo findloc1_s4.lo am__objects_51 = findloc2_s1.lo findloc2_s4.lo am__objects_52 = ISO_Fortran_binding.lo @@ -429,35 +441,43 @@ am__objects_58 = associated.lo abort.lo args.lo cshift0.lo eoshift0.lo \ @IEEE_SUPPORT_TRUE@ ieee_exceptions.lo ieee_features.lo am__objects_60 = am__objects_61 = _abs_c4.lo _abs_c8.lo _abs_c10.lo _abs_c16.lo \ - _abs_i4.lo _abs_i8.lo _abs_i16.lo _abs_r4.lo _abs_r8.lo \ - _abs_r10.lo _abs_r16.lo _aimag_c4.lo _aimag_c8.lo \ - _aimag_c10.lo _aimag_c16.lo _exp_r4.lo _exp_r8.lo _exp_r10.lo \ - _exp_r16.lo _exp_c4.lo _exp_c8.lo _exp_c10.lo _exp_c16.lo \ - _log_r4.lo _log_r8.lo _log_r10.lo _log_r16.lo _log_c4.lo \ - _log_c8.lo _log_c10.lo _log_c16.lo _log10_r4.lo _log10_r8.lo \ - _log10_r10.lo _log10_r16.lo _sqrt_r4.lo _sqrt_r8.lo \ - _sqrt_r10.lo _sqrt_r16.lo _sqrt_c4.lo _sqrt_c8.lo _sqrt_c10.lo \ - _sqrt_c16.lo _asin_r4.lo _asin_r8.lo _asin_r10.lo _asin_r16.lo \ - _asinh_r4.lo _asinh_r8.lo _asinh_r10.lo _asinh_r16.lo \ - _acos_r4.lo _acos_r8.lo _acos_r10.lo _acos_r16.lo _acosh_r4.lo \ - _acosh_r8.lo _acosh_r10.lo _acosh_r16.lo _atan_r4.lo \ - _atan_r8.lo _atan_r10.lo _atan_r16.lo _atanh_r4.lo \ - _atanh_r8.lo _atanh_r10.lo _atanh_r16.lo _sin_r4.lo _sin_r8.lo \ - _sin_r10.lo _sin_r16.lo _sin_c4.lo _sin_c8.lo _sin_c10.lo \ - _sin_c16.lo _cos_r4.lo _cos_r8.lo _cos_r10.lo _cos_r16.lo \ - _cos_c4.lo _cos_c8.lo _cos_c10.lo _cos_c16.lo _tan_r4.lo \ - _tan_r8.lo _tan_r10.lo _tan_r16.lo _sinh_r4.lo _sinh_r8.lo \ - _sinh_r10.lo _sinh_r16.lo _cosh_r4.lo _cosh_r8.lo _cosh_r10.lo \ - _cosh_r16.lo _tanh_r4.lo _tanh_r8.lo _tanh_r10.lo _tanh_r16.lo \ + _abs_c17.lo _abs_i4.lo _abs_i8.lo _abs_i16.lo _abs_r4.lo \ + _abs_r8.lo _abs_r10.lo _abs_r16.lo _abs_r17.lo _aimag_c4.lo \ + _aimag_c8.lo _aimag_c10.lo _aimag_c16.lo _aimag_c17.lo \ + _exp_r4.lo _exp_r8.lo _exp_r10.lo _exp_r16.lo _exp_r17.lo \ + _exp_c4.lo _exp_c8.lo _exp_c10.lo _exp_c16.lo _exp_c17.lo \ + _log_r4.lo _log_r8.lo _log_r10.lo _log_r16.lo _log_r17.lo \ + _log_c4.lo _log_c8.lo _log_c10.lo _log_c16.lo _log_c17.lo \ + _log10_r4.lo _log10_r8.lo _log10_r10.lo _log10_r16.lo \ + _log10_r17.lo _sqrt_r4.lo _sqrt_r8.lo _sqrt_r10.lo \ + _sqrt_r16.lo _sqrt_r17.lo _sqrt_c4.lo _sqrt_c8.lo _sqrt_c10.lo \ + _sqrt_c16.lo _sqrt_c17.lo _asin_r4.lo _asin_r8.lo _asin_r10.lo \ + _asin_r16.lo _asin_r17.lo _asinh_r4.lo _asinh_r8.lo \ + _asinh_r10.lo _asinh_r16.lo _asinh_r17.lo _acos_r4.lo \ + _acos_r8.lo _acos_r10.lo _acos_r16.lo _acos_r17.lo \ + _acosh_r4.lo _acosh_r8.lo _acosh_r10.lo _acosh_r16.lo \ + _acosh_r17.lo _atan_r4.lo _atan_r8.lo _atan_r10.lo \ + _atan_r16.lo _atan_r17.lo _atanh_r4.lo _atanh_r8.lo \ + _atanh_r10.lo _atanh_r16.lo _atanh_r17.lo _sin_r4.lo \ + _sin_r8.lo _sin_r10.lo _sin_r16.lo _sin_r17.lo _sin_c4.lo \ + _sin_c8.lo _sin_c10.lo _sin_c16.lo _sin_c17.lo _cos_r4.lo \ + _cos_r8.lo _cos_r10.lo _cos_r16.lo _cos_r17.lo _cos_c4.lo \ + _cos_c8.lo _cos_c10.lo _cos_c16.lo _cos_c17.lo _tan_r4.lo \ + _tan_r8.lo _tan_r10.lo _tan_r16.lo _tan_r17.lo _sinh_r4.lo \ + _sinh_r8.lo _sinh_r10.lo _sinh_r16.lo _sinh_r17.lo _cosh_r4.lo \ + _cosh_r8.lo _cosh_r10.lo _cosh_r16.lo _cosh_r17.lo _tanh_r4.lo \ + _tanh_r8.lo _tanh_r10.lo _tanh_r16.lo _tanh_r17.lo \ _conjg_c4.lo _conjg_c8.lo _conjg_c10.lo _conjg_c16.lo \ - _aint_r4.lo _aint_r8.lo _aint_r10.lo _aint_r16.lo _anint_r4.lo \ - _anint_r8.lo _anint_r10.lo _anint_r16.lo + _conjg_c17.lo _aint_r4.lo _aint_r8.lo _aint_r10.lo \ + _aint_r16.lo _aint_r17.lo _anint_r4.lo _anint_r8.lo \ + _anint_r10.lo _anint_r16.lo _anint_r17.lo am__objects_62 = _sign_i4.lo _sign_i8.lo _sign_i16.lo _sign_r4.lo \ - _sign_r8.lo _sign_r10.lo _sign_r16.lo _dim_i4.lo _dim_i8.lo \ - _dim_i16.lo _dim_r4.lo _dim_r8.lo _dim_r10.lo _dim_r16.lo \ - _atan2_r4.lo _atan2_r8.lo _atan2_r10.lo _atan2_r16.lo \ - _mod_i4.lo _mod_i8.lo _mod_i16.lo _mod_r4.lo _mod_r8.lo \ - _mod_r10.lo _mod_r16.lo + _sign_r8.lo _sign_r10.lo _sign_r16.lo _sign_r17.lo _dim_i4.lo \ + _dim_i8.lo _dim_i16.lo _dim_r4.lo _dim_r8.lo _dim_r10.lo \ + _dim_r16.lo _dim_r17.lo _atan2_r4.lo _atan2_r8.lo \ + _atan2_r10.lo _atan2_r16.lo _atan2_r17.lo _mod_i4.lo \ + _mod_i8.lo _mod_i16.lo _mod_r4.lo _mod_r8.lo _mod_r10.lo \ + _mod_r16.lo _mod_r17.lo am__objects_63 = misc_specifics.lo am__objects_64 = $(am__objects_61) $(am__objects_62) $(am__objects_63) \ dprod_r8.lo f2c_specifics.lo random_init.lo @@ -807,7 +827,8 @@ i_bessel_c = \ $(srcdir)/generated/bessel_r4.c \ $(srcdir)/generated/bessel_r8.c \ $(srcdir)/generated/bessel_r10.c \ -$(srcdir)/generated/bessel_r16.c +$(srcdir)/generated/bessel_r16.c \ +$(srcdir)/generated/bessel_r17.c i_count_c = \ $(srcdir)/generated/count_1_l.c \ @@ -847,10 +868,12 @@ $(srcdir)/generated/findloc0_r4.c \ $(srcdir)/generated/findloc0_r8.c \ $(srcdir)/generated/findloc0_r10.c \ $(srcdir)/generated/findloc0_r16.c \ +$(srcdir)/generated/findloc0_r17.c \ $(srcdir)/generated/findloc0_c4.c \ $(srcdir)/generated/findloc0_c8.c \ $(srcdir)/generated/findloc0_c10.c \ -$(srcdir)/generated/findloc0_c16.c +$(srcdir)/generated/findloc0_c16.c \ +$(srcdir)/generated/findloc0_c17.c i_findloc0s_c = \ $(srcdir)/generated/findloc0_s1.c \ @@ -866,10 +889,12 @@ $(srcdir)/generated/findloc1_r4.c \ $(srcdir)/generated/findloc1_r8.c \ $(srcdir)/generated/findloc1_r10.c \ $(srcdir)/generated/findloc1_r16.c \ +$(srcdir)/generated/findloc1_r17.c \ $(srcdir)/generated/findloc1_c4.c \ $(srcdir)/generated/findloc1_c8.c \ $(srcdir)/generated/findloc1_c10.c \ -$(srcdir)/generated/findloc1_c16.c +$(srcdir)/generated/findloc1_c16.c \ +$(srcdir)/generated/findloc1_c17.c i_findloc1s_c = \ $(srcdir)/generated/findloc1_s1.c \ @@ -906,7 +931,10 @@ $(srcdir)/generated/maxloc0_8_r10.c \ $(srcdir)/generated/maxloc0_16_r10.c \ $(srcdir)/generated/maxloc0_4_r16.c \ $(srcdir)/generated/maxloc0_8_r16.c \ -$(srcdir)/generated/maxloc0_16_r16.c +$(srcdir)/generated/maxloc0_16_r16.c \ +$(srcdir)/generated/maxloc0_4_r17.c \ +$(srcdir)/generated/maxloc0_8_r17.c \ +$(srcdir)/generated/maxloc0_16_r17.c i_maxloc0s_c = \ $(srcdir)/generated/maxloc0_4_s1.c \ @@ -943,7 +971,10 @@ $(srcdir)/generated/maxloc1_8_r10.c \ $(srcdir)/generated/maxloc1_16_r10.c \ $(srcdir)/generated/maxloc1_4_r16.c \ $(srcdir)/generated/maxloc1_8_r16.c \ -$(srcdir)/generated/maxloc1_16_r16.c +$(srcdir)/generated/maxloc1_16_r16.c \ +$(srcdir)/generated/maxloc1_4_r17.c \ +$(srcdir)/generated/maxloc1_8_r17.c \ +$(srcdir)/generated/maxloc1_16_r17.c i_maxloc1s_c = \ $(srcdir)/generated/maxloc1_4_s1.c \ @@ -970,7 +1001,8 @@ $(srcdir)/generated/maxval_i16.c \ $(srcdir)/generated/maxval_r4.c \ $(srcdir)/generated/maxval_r8.c \ $(srcdir)/generated/maxval_r10.c \ -$(srcdir)/generated/maxval_r16.c +$(srcdir)/generated/maxval_r16.c \ +$(srcdir)/generated/maxval_r17.c i_maxval0s_c = \ $(srcdir)/generated/maxval0_s1.c \ @@ -1007,7 +1039,10 @@ $(srcdir)/generated/minloc0_8_r10.c \ $(srcdir)/generated/minloc0_16_r10.c \ $(srcdir)/generated/minloc0_4_r16.c \ $(srcdir)/generated/minloc0_8_r16.c \ -$(srcdir)/generated/minloc0_16_r16.c +$(srcdir)/generated/minloc0_16_r16.c \ +$(srcdir)/generated/minloc0_4_r17.c \ +$(srcdir)/generated/minloc0_8_r17.c \ +$(srcdir)/generated/minloc0_16_r17.c i_minloc0s_c = \ $(srcdir)/generated/minloc0_4_s1.c \ @@ -1044,7 +1079,10 @@ $(srcdir)/generated/minloc1_8_r10.c \ $(srcdir)/generated/minloc1_16_r10.c \ $(srcdir)/generated/minloc1_4_r16.c \ $(srcdir)/generated/minloc1_8_r16.c \ -$(srcdir)/generated/minloc1_16_r16.c +$(srcdir)/generated/minloc1_16_r16.c \ +$(srcdir)/generated/minloc1_4_r17.c \ +$(srcdir)/generated/minloc1_8_r17.c \ +$(srcdir)/generated/minloc1_16_r17.c i_minloc1s_c = \ $(srcdir)/generated/minloc1_4_s1.c \ @@ -1071,7 +1109,8 @@ $(srcdir)/generated/minval_i16.c \ $(srcdir)/generated/minval_r4.c \ $(srcdir)/generated/minval_r8.c \ $(srcdir)/generated/minval_r10.c \ -$(srcdir)/generated/minval_r16.c +$(srcdir)/generated/minval_r16.c \ +$(srcdir)/generated/minval_r17.c i_minval0s_c = \ $(srcdir)/generated/minval0_s1.c \ @@ -1085,7 +1124,8 @@ i_norm2_c = \ $(srcdir)/generated/norm2_r4.c \ $(srcdir)/generated/norm2_r8.c \ $(srcdir)/generated/norm2_r10.c \ -$(srcdir)/generated/norm2_r16.c +$(srcdir)/generated/norm2_r16.c \ +$(srcdir)/generated/norm2_r17.c i_parity_c = \ $(srcdir)/generated/parity_l1.c \ @@ -1104,10 +1144,12 @@ $(srcdir)/generated/sum_r4.c \ $(srcdir)/generated/sum_r8.c \ $(srcdir)/generated/sum_r10.c \ $(srcdir)/generated/sum_r16.c \ +$(srcdir)/generated/sum_r17.c \ $(srcdir)/generated/sum_c4.c \ $(srcdir)/generated/sum_c8.c \ $(srcdir)/generated/sum_c10.c \ -$(srcdir)/generated/sum_c16.c +$(srcdir)/generated/sum_c16.c \ +$(srcdir)/generated/sum_c17.c i_product_c = \ $(srcdir)/generated/product_i1.c \ @@ -1119,10 +1161,12 @@ $(srcdir)/generated/product_r4.c \ $(srcdir)/generated/product_r8.c \ $(srcdir)/generated/product_r10.c \ $(srcdir)/generated/product_r16.c \ +$(srcdir)/generated/product_r17.c \ $(srcdir)/generated/product_c4.c \ $(srcdir)/generated/product_c8.c \ $(srcdir)/generated/product_c10.c \ -$(srcdir)/generated/product_c16.c +$(srcdir)/generated/product_c16.c \ +$(srcdir)/generated/product_c17.c i_matmul_c = \ $(srcdir)/generated/matmul_i1.c \ @@ -1134,10 +1178,12 @@ $(srcdir)/generated/matmul_r4.c \ $(srcdir)/generated/matmul_r8.c \ $(srcdir)/generated/matmul_r10.c \ $(srcdir)/generated/matmul_r16.c \ +$(srcdir)/generated/matmul_r17.c \ $(srcdir)/generated/matmul_c4.c \ $(srcdir)/generated/matmul_c8.c \ $(srcdir)/generated/matmul_c10.c \ -$(srcdir)/generated/matmul_c16.c +$(srcdir)/generated/matmul_c16.c \ +$(srcdir)/generated/matmul_c17.c i_matmulavx128_c = \ $(srcdir)/generated/matmulavx128_i1.c \ @@ -1149,10 +1195,12 @@ $(srcdir)/generated/matmulavx128_r4.c \ $(srcdir)/generated/matmulavx128_r8.c \ $(srcdir)/generated/matmulavx128_r10.c \ $(srcdir)/generated/matmulavx128_r16.c \ +$(srcdir)/generated/matmulavx128_r17.c \ $(srcdir)/generated/matmulavx128_c4.c \ $(srcdir)/generated/matmulavx128_c8.c \ $(srcdir)/generated/matmulavx128_c10.c \ -$(srcdir)/generated/matmulavx128_c16.c +$(srcdir)/generated/matmulavx128_c16.c \ +$(srcdir)/generated/matmulavx128_c17.c i_matmull_c = \ $(srcdir)/generated/matmul_l4.c \ @@ -1174,10 +1222,12 @@ $(srcdir)/generated/reshape_r4.c \ $(srcdir)/generated/reshape_r8.c \ $(srcdir)/generated/reshape_r10.c \ $(srcdir)/generated/reshape_r16.c \ +$(srcdir)/generated/reshape_r17.c \ $(srcdir)/generated/reshape_c4.c \ $(srcdir)/generated/reshape_c8.c \ $(srcdir)/generated/reshape_c10.c \ -$(srcdir)/generated/reshape_c16.c +$(srcdir)/generated/reshape_c16.c \ +$(srcdir)/generated/reshape_c17.c i_eoshift1_c = \ $(srcdir)/generated/eoshift1_4.c \ @@ -1199,10 +1249,12 @@ $(srcdir)/generated/cshift0_r4.c \ $(srcdir)/generated/cshift0_r8.c \ $(srcdir)/generated/cshift0_r10.c \ $(srcdir)/generated/cshift0_r16.c \ +$(srcdir)/generated/cshift0_r17.c \ $(srcdir)/generated/cshift0_c4.c \ $(srcdir)/generated/cshift0_c8.c \ $(srcdir)/generated/cshift0_c10.c \ -$(srcdir)/generated/cshift0_c16.c +$(srcdir)/generated/cshift0_c16.c \ +$(srcdir)/generated/cshift0_c17.c i_cshift1_c = \ $(srcdir)/generated/cshift1_4.c \ @@ -1219,10 +1271,12 @@ $(srcdir)/generated/cshift1_4_r4.c \ $(srcdir)/generated/cshift1_4_r8.c \ $(srcdir)/generated/cshift1_4_r10.c \ $(srcdir)/generated/cshift1_4_r16.c \ +$(srcdir)/generated/cshift1_4_r17.c \ $(srcdir)/generated/cshift1_4_c4.c \ $(srcdir)/generated/cshift1_4_c8.c \ $(srcdir)/generated/cshift1_4_c10.c \ $(srcdir)/generated/cshift1_4_c16.c \ +$(srcdir)/generated/cshift1_4_c17.c \ $(srcdir)/generated/cshift1_8_i1.c \ $(srcdir)/generated/cshift1_8_i2.c \ $(srcdir)/generated/cshift1_8_i4.c \ @@ -1232,10 +1286,12 @@ $(srcdir)/generated/cshift1_8_r4.c \ $(srcdir)/generated/cshift1_8_r8.c \ $(srcdir)/generated/cshift1_8_r10.c \ $(srcdir)/generated/cshift1_8_r16.c \ +$(srcdir)/generated/cshift1_8_r17.c \ $(srcdir)/generated/cshift1_8_c4.c \ $(srcdir)/generated/cshift1_8_c8.c \ $(srcdir)/generated/cshift1_8_c10.c \ $(srcdir)/generated/cshift1_8_c16.c \ +$(srcdir)/generated/cshift1_8_c17.c \ $(srcdir)/generated/cshift1_16_i1.c \ $(srcdir)/generated/cshift1_16_i2.c \ $(srcdir)/generated/cshift1_16_i4.c \ @@ -1245,10 +1301,12 @@ $(srcdir)/generated/cshift1_16_r4.c \ $(srcdir)/generated/cshift1_16_r8.c \ $(srcdir)/generated/cshift1_16_r10.c \ $(srcdir)/generated/cshift1_16_r16.c \ +$(srcdir)/generated/cshift1_16_r17.c \ $(srcdir)/generated/cshift1_16_c4.c \ $(srcdir)/generated/cshift1_16_c8.c \ $(srcdir)/generated/cshift1_16_c10.c \ -$(srcdir)/generated/cshift1_16_c16.c +$(srcdir)/generated/cshift1_16_c16.c \ +$(srcdir)/generated/cshift1_16_c17.c in_pack_c = \ $(srcdir)/generated/in_pack_i1.c \ @@ -1260,10 +1318,12 @@ $(srcdir)/generated/in_pack_r4.c \ $(srcdir)/generated/in_pack_r8.c \ $(srcdir)/generated/in_pack_r10.c \ $(srcdir)/generated/in_pack_r16.c \ +$(srcdir)/generated/in_pack_r17.c \ $(srcdir)/generated/in_pack_c4.c \ $(srcdir)/generated/in_pack_c8.c \ $(srcdir)/generated/in_pack_c10.c \ -$(srcdir)/generated/in_pack_c16.c +$(srcdir)/generated/in_pack_c16.c \ +$(srcdir)/generated/in_pack_c17.c in_unpack_c = \ $(srcdir)/generated/in_unpack_i1.c \ @@ -1275,20 +1335,24 @@ $(srcdir)/generated/in_unpack_r4.c \ $(srcdir)/generated/in_unpack_r8.c \ $(srcdir)/generated/in_unpack_r10.c \ $(srcdir)/generated/in_unpack_r16.c \ +$(srcdir)/generated/in_unpack_r17.c \ $(srcdir)/generated/in_unpack_c4.c \ $(srcdir)/generated/in_unpack_c8.c \ $(srcdir)/generated/in_unpack_c10.c \ -$(srcdir)/generated/in_unpack_c16.c +$(srcdir)/generated/in_unpack_c16.c \ +$(srcdir)/generated/in_unpack_c17.c i_pow_c = \ $(srcdir)/generated/pow_i4_i4.c \ $(srcdir)/generated/pow_i8_i4.c \ $(srcdir)/generated/pow_i16_i4.c \ $(srcdir)/generated/pow_r16_i4.c \ +$(srcdir)/generated/pow_r17_i4.c \ $(srcdir)/generated/pow_c4_i4.c \ $(srcdir)/generated/pow_c8_i4.c \ $(srcdir)/generated/pow_c10_i4.c \ $(srcdir)/generated/pow_c16_i4.c \ +$(srcdir)/generated/pow_c17_i4.c \ $(srcdir)/generated/pow_i4_i8.c \ $(srcdir)/generated/pow_i8_i8.c \ $(srcdir)/generated/pow_i16_i8.c \ @@ -1296,10 +1360,12 @@ $(srcdir)/generated/pow_r4_i8.c \ $(srcdir)/generated/pow_r8_i8.c \ $(srcdir)/generated/pow_r10_i8.c \ $(srcdir)/generated/pow_r16_i8.c \ +$(srcdir)/generated/pow_r17_i8.c \ $(srcdir)/generated/pow_c4_i8.c \ $(srcdir)/generated/pow_c8_i8.c \ $(srcdir)/generated/pow_c10_i8.c \ $(srcdir)/generated/pow_c16_i8.c \ +$(srcdir)/generated/pow_c17_i8.c \ $(srcdir)/generated/pow_i4_i16.c \ $(srcdir)/generated/pow_i8_i16.c \ $(srcdir)/generated/pow_i16_i16.c \ @@ -1307,10 +1373,12 @@ $(srcdir)/generated/pow_r4_i16.c \ $(srcdir)/generated/pow_r8_i16.c \ $(srcdir)/generated/pow_r10_i16.c \ $(srcdir)/generated/pow_r16_i16.c \ +$(srcdir)/generated/pow_r17_i16.c \ $(srcdir)/generated/pow_c4_i16.c \ $(srcdir)/generated/pow_c8_i16.c \ $(srcdir)/generated/pow_c10_i16.c \ -$(srcdir)/generated/pow_c16_i16.c +$(srcdir)/generated/pow_c16_i16.c \ +$(srcdir)/generated/pow_c17_i16.c i_pack_c = \ $(srcdir)/generated/pack_i1.c \ @@ -1322,10 +1390,12 @@ $(srcdir)/generated/pack_r4.c \ $(srcdir)/generated/pack_r8.c \ $(srcdir)/generated/pack_r10.c \ $(srcdir)/generated/pack_r16.c \ +$(srcdir)/generated/pack_r17.c \ $(srcdir)/generated/pack_c4.c \ $(srcdir)/generated/pack_c8.c \ $(srcdir)/generated/pack_c10.c \ -$(srcdir)/generated/pack_c16.c +$(srcdir)/generated/pack_c16.c \ +$(srcdir)/generated/pack_c17.c i_unpack_c = \ $(srcdir)/generated/unpack_i1.c \ @@ -1337,10 +1407,12 @@ $(srcdir)/generated/unpack_r4.c \ $(srcdir)/generated/unpack_r8.c \ $(srcdir)/generated/unpack_r10.c \ $(srcdir)/generated/unpack_r16.c \ +$(srcdir)/generated/unpack_r17.c \ $(srcdir)/generated/unpack_c4.c \ $(srcdir)/generated/unpack_c8.c \ $(srcdir)/generated/unpack_c10.c \ -$(srcdir)/generated/unpack_c16.c +$(srcdir)/generated/unpack_c16.c \ +$(srcdir)/generated/unpack_c17.c i_spread_c = \ $(srcdir)/generated/spread_i1.c \ @@ -1352,43 +1424,45 @@ $(srcdir)/generated/spread_r4.c \ $(srcdir)/generated/spread_r8.c \ $(srcdir)/generated/spread_r10.c \ $(srcdir)/generated/spread_r16.c \ +$(srcdir)/generated/spread_r17.c \ $(srcdir)/generated/spread_c4.c \ $(srcdir)/generated/spread_c8.c \ $(srcdir)/generated/spread_c10.c \ -$(srcdir)/generated/spread_c16.c +$(srcdir)/generated/spread_c16.c \ +$(srcdir)/generated/spread_c17.c i_isobinding_c = \ $(srcdir)/runtime/ISO_Fortran_binding.c m4_files = m4/iparm.m4 m4/ifunction.m4 m4/iforeach.m4 m4/all.m4 \ - m4/any.m4 m4/count.m4 m4/maxloc0.m4 m4/maxloc1.m4 m4/maxval.m4 \ - m4/minloc0.m4 m4/minloc1.m4 m4/minval.m4 m4/product.m4 m4/sum.m4 \ - m4/matmul.m4 m4/matmull.m4 m4/ifunction_logical.m4 \ - m4/ctrig.m4 m4/cexp.m4 m4/chyp.m4 m4/mtype.m4 \ - m4/specific.m4 m4/specific2.m4 m4/head.m4 m4/shape.m4 m4/reshape.m4 \ - m4/eoshift1.m4 m4/eoshift3.m4 \ - m4/pow.m4 \ - m4/misc_specifics.m4 m4/pack.m4 \ - m4/unpack.m4 m4/spread.m4 m4/bessel.m4 m4/norm2.m4 m4/parity.m4 \ - m4/iall.m4 m4/iany.m4 m4/iparity.m4 m4/iforeach-s.m4 m4/findloc0.m4 \ - m4/findloc0s.m4 m4/ifindloc0.m4 m4/findloc1.m4 m4/ifindloc1.m4 \ - m4/findloc2s.m4 m4/ifindloc2.m4 + m4/any.m4 m4/count.m4 m4/maxloc0.m4 m4/maxloc1.m4 m4/maxval.m4 \ + m4/minloc0.m4 m4/minloc1.m4 m4/minval.m4 m4/product.m4 m4/sum.m4 \ + m4/matmul.m4 m4/matmull.m4 m4/ifunction_logical.m4 \ + m4/ctrig.m4 m4/cexp.m4 m4/chyp.m4 m4/mtype.m4 \ + m4/specific.m4 m4/specific2.m4 m4/head.m4 m4/shape.m4 m4/reshape.m4 \ + m4/eoshift1.m4 m4/eoshift3.m4 \ + m4/pow.m4 \ + m4/misc_specifics.m4 m4/pack.m4 \ + m4/unpack.m4 m4/spread.m4 m4/bessel.m4 m4/norm2.m4 m4/parity.m4 \ + m4/iall.m4 m4/iany.m4 m4/iparity.m4 m4/iforeach-s.m4 m4/findloc0.m4 \ + m4/findloc0s.m4 m4/ifindloc0.m4 m4/findloc1.m4 m4/ifindloc1.m4 \ + m4/findloc2s.m4 m4/ifindloc2.m4 gfor_built_src = $(i_all_c) $(i_any_c) $(i_count_c) $(i_maxloc0_c) \ - $(i_maxloc1_c) $(i_maxval_c) $(i_minloc0_c) $(i_minloc1_c) $(i_minval_c) \ - $(i_product_c) $(i_sum_c) $(i_bessel_c) $(i_iall_c) $(i_iany_c) \ - $(i_iparity_c) $(i_norm2_c) $(i_parity_c) \ - $(i_matmul_c) $(i_matmull_c) $(i_shape_c) $(i_eoshift1_c) \ - $(i_eoshift3_c) $(i_cshift1_c) $(i_reshape_c) $(in_pack_c) $(in_unpack_c) \ - $(i_pow_c) $(i_pack_c) $(i_unpack_c) $(i_matmulavx128_c) \ - $(i_spread_c) selected_int_kind.inc selected_real_kind.inc kinds.h \ - $(i_cshift0_c) kinds.inc c99_protos.inc fpu-target.h fpu-target.inc \ - include/ISO_Fortran_binding.h \ - $(i_cshift1a_c) $(i_maxloc0s_c) $(i_minloc0s_c) $(i_maxloc1s_c) \ - $(i_minloc1s_c) $(i_maxloc2s_c) $(i_minloc2s_c) $(i_maxvals_c) \ - $(i_maxval0s_c) $(i_minval0s_c) $(i_maxval1s_c) $(i_minval1s_c) \ - $(i_findloc0_c) $(i_findloc0s_c) $(i_findloc1_c) $(i_findloc1s_c) \ - $(i_findloc2s_c) $(i_isobinding_c) + $(i_maxloc1_c) $(i_maxval_c) $(i_minloc0_c) $(i_minloc1_c) $(i_minval_c) \ + $(i_product_c) $(i_sum_c) $(i_bessel_c) $(i_iall_c) $(i_iany_c) \ + $(i_iparity_c) $(i_norm2_c) $(i_parity_c) \ + $(i_matmul_c) $(i_matmull_c) $(i_shape_c) $(i_eoshift1_c) \ + $(i_eoshift3_c) $(i_cshift1_c) $(i_reshape_c) $(in_pack_c) $(in_unpack_c) \ + $(i_pow_c) $(i_pack_c) $(i_unpack_c) $(i_matmulavx128_c) \ + $(i_spread_c) selected_int_kind.inc selected_real_kind.inc kinds.h \ + $(i_cshift0_c) kinds.inc c99_protos.inc fpu-target.h fpu-target.inc \ + include/ISO_Fortran_binding.h \ + $(i_cshift1a_c) $(i_maxloc0s_c) $(i_minloc0s_c) $(i_maxloc1s_c) \ + $(i_minloc1s_c) $(i_maxloc2s_c) $(i_minloc2s_c) $(i_maxvals_c) \ + $(i_maxval0s_c) $(i_minval0s_c) $(i_maxval1s_c) $(i_minval1s_c) \ + $(i_findloc0_c) $(i_findloc0s_c) $(i_findloc1_c) $(i_findloc1s_c) \ + $(i_findloc2s_c) $(i_isobinding_c) # Machine generated specifics @@ -1397,6 +1471,7 @@ $(srcdir)/generated/_abs_c4.F90 \ $(srcdir)/generated/_abs_c8.F90 \ $(srcdir)/generated/_abs_c10.F90 \ $(srcdir)/generated/_abs_c16.F90 \ +$(srcdir)/generated/_abs_c17.F90 \ $(srcdir)/generated/_abs_i4.F90 \ $(srcdir)/generated/_abs_i8.F90 \ $(srcdir)/generated/_abs_i16.F90 \ @@ -1404,106 +1479,132 @@ $(srcdir)/generated/_abs_r4.F90 \ $(srcdir)/generated/_abs_r8.F90 \ $(srcdir)/generated/_abs_r10.F90 \ $(srcdir)/generated/_abs_r16.F90 \ +$(srcdir)/generated/_abs_r17.F90 \ $(srcdir)/generated/_aimag_c4.F90 \ $(srcdir)/generated/_aimag_c8.F90 \ $(srcdir)/generated/_aimag_c10.F90 \ $(srcdir)/generated/_aimag_c16.F90 \ +$(srcdir)/generated/_aimag_c17.F90 \ $(srcdir)/generated/_exp_r4.F90 \ $(srcdir)/generated/_exp_r8.F90 \ $(srcdir)/generated/_exp_r10.F90 \ $(srcdir)/generated/_exp_r16.F90 \ +$(srcdir)/generated/_exp_r17.F90 \ $(srcdir)/generated/_exp_c4.F90 \ $(srcdir)/generated/_exp_c8.F90 \ $(srcdir)/generated/_exp_c10.F90 \ $(srcdir)/generated/_exp_c16.F90 \ +$(srcdir)/generated/_exp_c17.F90 \ $(srcdir)/generated/_log_r4.F90 \ $(srcdir)/generated/_log_r8.F90 \ $(srcdir)/generated/_log_r10.F90 \ $(srcdir)/generated/_log_r16.F90 \ +$(srcdir)/generated/_log_r17.F90 \ $(srcdir)/generated/_log_c4.F90 \ $(srcdir)/generated/_log_c8.F90 \ $(srcdir)/generated/_log_c10.F90 \ $(srcdir)/generated/_log_c16.F90 \ +$(srcdir)/generated/_log_c17.F90 \ $(srcdir)/generated/_log10_r4.F90 \ $(srcdir)/generated/_log10_r8.F90 \ $(srcdir)/generated/_log10_r10.F90 \ $(srcdir)/generated/_log10_r16.F90 \ +$(srcdir)/generated/_log10_r17.F90 \ $(srcdir)/generated/_sqrt_r4.F90 \ $(srcdir)/generated/_sqrt_r8.F90 \ $(srcdir)/generated/_sqrt_r10.F90 \ $(srcdir)/generated/_sqrt_r16.F90 \ +$(srcdir)/generated/_sqrt_r17.F90 \ $(srcdir)/generated/_sqrt_c4.F90 \ $(srcdir)/generated/_sqrt_c8.F90 \ $(srcdir)/generated/_sqrt_c10.F90 \ $(srcdir)/generated/_sqrt_c16.F90 \ +$(srcdir)/generated/_sqrt_c17.F90 \ $(srcdir)/generated/_asin_r4.F90 \ $(srcdir)/generated/_asin_r8.F90 \ $(srcdir)/generated/_asin_r10.F90 \ $(srcdir)/generated/_asin_r16.F90 \ +$(srcdir)/generated/_asin_r17.F90 \ $(srcdir)/generated/_asinh_r4.F90 \ $(srcdir)/generated/_asinh_r8.F90 \ $(srcdir)/generated/_asinh_r10.F90 \ $(srcdir)/generated/_asinh_r16.F90 \ +$(srcdir)/generated/_asinh_r17.F90 \ $(srcdir)/generated/_acos_r4.F90 \ $(srcdir)/generated/_acos_r8.F90 \ $(srcdir)/generated/_acos_r10.F90 \ $(srcdir)/generated/_acos_r16.F90 \ +$(srcdir)/generated/_acos_r17.F90 \ $(srcdir)/generated/_acosh_r4.F90 \ $(srcdir)/generated/_acosh_r8.F90 \ $(srcdir)/generated/_acosh_r10.F90 \ $(srcdir)/generated/_acosh_r16.F90 \ +$(srcdir)/generated/_acosh_r17.F90 \ $(srcdir)/generated/_atan_r4.F90 \ $(srcdir)/generated/_atan_r8.F90 \ $(srcdir)/generated/_atan_r10.F90 \ $(srcdir)/generated/_atan_r16.F90 \ +$(srcdir)/generated/_atan_r17.F90 \ $(srcdir)/generated/_atanh_r4.F90 \ $(srcdir)/generated/_atanh_r8.F90 \ $(srcdir)/generated/_atanh_r10.F90 \ $(srcdir)/generated/_atanh_r16.F90 \ +$(srcdir)/generated/_atanh_r17.F90 \ $(srcdir)/generated/_sin_r4.F90 \ $(srcdir)/generated/_sin_r8.F90 \ $(srcdir)/generated/_sin_r10.F90 \ $(srcdir)/generated/_sin_r16.F90 \ +$(srcdir)/generated/_sin_r17.F90 \ $(srcdir)/generated/_sin_c4.F90 \ $(srcdir)/generated/_sin_c8.F90 \ $(srcdir)/generated/_sin_c10.F90 \ $(srcdir)/generated/_sin_c16.F90 \ +$(srcdir)/generated/_sin_c17.F90 \ $(srcdir)/generated/_cos_r4.F90 \ $(srcdir)/generated/_cos_r8.F90 \ $(srcdir)/generated/_cos_r10.F90 \ $(srcdir)/generated/_cos_r16.F90 \ +$(srcdir)/generated/_cos_r17.F90 \ $(srcdir)/generated/_cos_c4.F90 \ $(srcdir)/generated/_cos_c8.F90 \ $(srcdir)/generated/_cos_c10.F90 \ $(srcdir)/generated/_cos_c16.F90 \ +$(srcdir)/generated/_cos_c17.F90 \ $(srcdir)/generated/_tan_r4.F90 \ $(srcdir)/generated/_tan_r8.F90 \ $(srcdir)/generated/_tan_r10.F90 \ $(srcdir)/generated/_tan_r16.F90 \ +$(srcdir)/generated/_tan_r17.F90 \ $(srcdir)/generated/_sinh_r4.F90 \ $(srcdir)/generated/_sinh_r8.F90 \ $(srcdir)/generated/_sinh_r10.F90 \ $(srcdir)/generated/_sinh_r16.F90 \ +$(srcdir)/generated/_sinh_r17.F90 \ $(srcdir)/generated/_cosh_r4.F90 \ $(srcdir)/generated/_cosh_r8.F90 \ $(srcdir)/generated/_cosh_r10.F90 \ $(srcdir)/generated/_cosh_r16.F90 \ +$(srcdir)/generated/_cosh_r17.F90 \ $(srcdir)/generated/_tanh_r4.F90 \ $(srcdir)/generated/_tanh_r8.F90 \ $(srcdir)/generated/_tanh_r10.F90 \ $(srcdir)/generated/_tanh_r16.F90 \ +$(srcdir)/generated/_tanh_r17.F90 \ $(srcdir)/generated/_conjg_c4.F90 \ $(srcdir)/generated/_conjg_c8.F90 \ $(srcdir)/generated/_conjg_c10.F90 \ $(srcdir)/generated/_conjg_c16.F90 \ +$(srcdir)/generated/_conjg_c17.F90 \ $(srcdir)/generated/_aint_r4.F90 \ $(srcdir)/generated/_aint_r8.F90 \ $(srcdir)/generated/_aint_r10.F90 \ $(srcdir)/generated/_aint_r16.F90 \ +$(srcdir)/generated/_aint_r17.F90 \ $(srcdir)/generated/_anint_r4.F90 \ $(srcdir)/generated/_anint_r8.F90 \ $(srcdir)/generated/_anint_r10.F90 \ -$(srcdir)/generated/_anint_r16.F90 +$(srcdir)/generated/_anint_r16.F90 \ +$(srcdir)/generated/_anint_r17.F90 gfor_built_specific2_src = \ $(srcdir)/generated/_sign_i4.F90 \ @@ -1513,6 +1614,7 @@ $(srcdir)/generated/_sign_r4.F90 \ $(srcdir)/generated/_sign_r8.F90 \ $(srcdir)/generated/_sign_r10.F90 \ $(srcdir)/generated/_sign_r16.F90 \ +$(srcdir)/generated/_sign_r17.F90 \ $(srcdir)/generated/_dim_i4.F90 \ $(srcdir)/generated/_dim_i8.F90 \ $(srcdir)/generated/_dim_i16.F90 \ @@ -1520,17 +1622,20 @@ $(srcdir)/generated/_dim_r4.F90 \ $(srcdir)/generated/_dim_r8.F90 \ $(srcdir)/generated/_dim_r10.F90 \ $(srcdir)/generated/_dim_r16.F90 \ +$(srcdir)/generated/_dim_r17.F90 \ $(srcdir)/generated/_atan2_r4.F90 \ $(srcdir)/generated/_atan2_r8.F90 \ $(srcdir)/generated/_atan2_r10.F90 \ $(srcdir)/generated/_atan2_r16.F90 \ +$(srcdir)/generated/_atan2_r17.F90 \ $(srcdir)/generated/_mod_i4.F90 \ $(srcdir)/generated/_mod_i8.F90 \ $(srcdir)/generated/_mod_i16.F90 \ $(srcdir)/generated/_mod_r4.F90 \ $(srcdir)/generated/_mod_r8.F90 \ $(srcdir)/generated/_mod_r10.F90 \ -$(srcdir)/generated/_mod_r16.F90 +$(srcdir)/generated/_mod_r16.F90 \ +$(srcdir)/generated/_mod_r17.F90 gfor_misc_specifics = $(srcdir)/generated/misc_specifics.F90 gfor_specific_src = \ @@ -1545,7 +1650,7 @@ BUILT_SOURCES = $(gfor_built_src) $(gfor_built_specific_src) \ $(gfor_built_specific2_src) $(gfor_misc_specifics) \ $(am__append_7) prereq_SRC = $(gfor_src) $(gfor_built_src) $(gfor_io_src) \ - $(gfor_helper_src) $(gfor_ieee_src) $(gfor_io_headers) $(gfor_specific_src) + $(gfor_helper_src) $(gfor_ieee_src) $(gfor_io_headers) $(gfor_specific_src) @onestep_FALSE@libgfortran_la_SOURCES = $(prereq_SRC) @@ -1561,7 +1666,7 @@ prereq_SRC = $(gfor_src) $(gfor_built_src) $(gfor_io_src) \ #libgfortran_F.lo: # $(LTPPFCCOMPILE) -c -o $@ $^ -combine @onestep_TRUE@libgfortran_la_SOURCES = libgfortran_c.c $(filter-out %.c,$(prereq_SRC)) -I_M4_DEPS = m4/iparm.m4 +I_M4_DEPS = m4/iparm.m4 m4/mtype.m4 I_M4_DEPS0 = $(I_M4_DEPS) m4/iforeach.m4 I_M4_DEPS1 = $(I_M4_DEPS) m4/ifunction.m4 I_M4_DEPS2 = $(I_M4_DEPS) m4/ifunction_logical.m4 @@ -1737,6 +1842,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/backtrace.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bessel_r10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bessel_r16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bessel_r17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bessel_r4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bessel_r8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bounds.Plo@am__quote@ @@ -1756,6 +1862,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift0.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift0_c10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift0_c16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift0_c17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift0_c4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift0_c8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift0_i1.Plo@am__quote@ @@ -1765,11 +1872,13 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift0_i8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift0_r10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift0_r16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift0_r17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift0_r4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift0_r8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_16.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_16_c10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_16_c16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_16_c17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_16_c4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_16_c8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_16_i1.Plo@am__quote@ @@ -1779,11 +1888,13 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_16_i8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_16_r10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_16_r16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_16_r17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_16_r4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_16_r8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_4_c10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_4_c16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_4_c17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_4_c4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_4_c8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_4_i1.Plo@am__quote@ @@ -1793,11 +1904,13 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_4_i8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_4_r10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_4_r16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_4_r17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_4_r4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_4_r8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_8_c10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_8_c16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_8_c17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_8_c4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_8_c8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_8_i1.Plo@am__quote@ @@ -1807,6 +1920,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_8_i8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_8_r10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_8_r16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_8_r17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_8_r4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cshift1_8_r8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ctime.Plo@am__quote@ @@ -1832,6 +1946,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/file_pos.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/findloc0_c10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/findloc0_c16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/findloc0_c17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/findloc0_c4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/findloc0_c8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/findloc0_i1.Plo@am__quote@ @@ -1841,12 +1956,14 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/findloc0_i8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/findloc0_r10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/findloc0_r16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/findloc0_r17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/findloc0_r4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/findloc0_r8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/findloc0_s1.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/findloc0_s4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/findloc1_c10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/findloc1_c16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/findloc1_c17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/findloc1_c4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/findloc1_c8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/findloc1_i1.Plo@am__quote@ @@ -1856,6 +1973,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/findloc1_i8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/findloc1_r10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/findloc1_r16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/findloc1_r17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/findloc1_r4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/findloc1_r8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/findloc1_s1.Plo@am__quote@ @@ -1884,6 +2002,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ierrno.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/in_pack_c10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/in_pack_c16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/in_pack_c17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/in_pack_c4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/in_pack_c8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/in_pack_generic.Plo@am__quote@ @@ -1894,10 +2013,12 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/in_pack_i8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/in_pack_r10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/in_pack_r16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/in_pack_r17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/in_pack_r4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/in_pack_r8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/in_unpack_c10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/in_unpack_c16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/in_unpack_c17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/in_unpack_c4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/in_unpack_c8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/in_unpack_generic.Plo@am__quote@ @@ -1908,6 +2029,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/in_unpack_i8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/in_unpack_r10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/in_unpack_r16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/in_unpack_r17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/in_unpack_r4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/in_unpack_r8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/inquire.Plo@am__quote@ @@ -1927,6 +2049,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/main.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matmul_c10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matmul_c16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matmul_c17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matmul_c4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matmul_c8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matmul_i1.Plo@am__quote@ @@ -1939,10 +2062,12 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matmul_l8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matmul_r10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matmul_r16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matmul_r17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matmul_r4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matmul_r8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matmulavx128_c10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matmulavx128_c16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matmulavx128_c17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matmulavx128_c4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matmulavx128_c8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matmulavx128_i1.Plo@am__quote@ @@ -1952,6 +2077,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matmulavx128_i8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matmulavx128_r10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matmulavx128_r16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matmulavx128_r17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matmulavx128_r4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/matmulavx128_r8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc0_16_i1.Plo@am__quote@ @@ -1961,6 +2087,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc0_16_i8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc0_16_r10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc0_16_r16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc0_16_r17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc0_16_r4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc0_16_r8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc0_16_s1.Plo@am__quote@ @@ -1972,6 +2099,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc0_4_i8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc0_4_r10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc0_4_r16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc0_4_r17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc0_4_r4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc0_4_r8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc0_4_s1.Plo@am__quote@ @@ -1983,6 +2111,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc0_8_i8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc0_8_r10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc0_8_r16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc0_8_r17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc0_8_r4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc0_8_r8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc0_8_s1.Plo@am__quote@ @@ -1994,6 +2123,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc1_16_i8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc1_16_r10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc1_16_r16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc1_16_r17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc1_16_r4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc1_16_r8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc1_16_s1.Plo@am__quote@ @@ -2005,6 +2135,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc1_4_i8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc1_4_r10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc1_4_r16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc1_4_r17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc1_4_r4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc1_4_r8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc1_4_s1.Plo@am__quote@ @@ -2016,6 +2147,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc1_8_i8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc1_8_r10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc1_8_r16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc1_8_r17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc1_8_r4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc1_8_r8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxloc1_8_s1.Plo@am__quote@ @@ -2037,6 +2169,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxval_i8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxval_r10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxval_r16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxval_r17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxval_r4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/maxval_r8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/memory.Plo@am__quote@ @@ -2048,6 +2181,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc0_16_i8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc0_16_r10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc0_16_r16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc0_16_r17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc0_16_r4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc0_16_r8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc0_16_s1.Plo@am__quote@ @@ -2059,6 +2193,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc0_4_i8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc0_4_r10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc0_4_r16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc0_4_r17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc0_4_r4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc0_4_r8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc0_4_s1.Plo@am__quote@ @@ -2070,6 +2205,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc0_8_i8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc0_8_r10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc0_8_r16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc0_8_r17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc0_8_r4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc0_8_r8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc0_8_s1.Plo@am__quote@ @@ -2081,6 +2217,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc1_16_i8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc1_16_r10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc1_16_r16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc1_16_r17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc1_16_r4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc1_16_r8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc1_16_s1.Plo@am__quote@ @@ -2092,6 +2229,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc1_4_i8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc1_4_r10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc1_4_r16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc1_4_r17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc1_4_r4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc1_4_r8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc1_4_s1.Plo@am__quote@ @@ -2103,6 +2241,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc1_8_i8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc1_8_r10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc1_8_r16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc1_8_r17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc1_8_r4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc1_8_r8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minloc1_8_s1.Plo@am__quote@ @@ -2124,17 +2263,20 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minval_i8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minval_r10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minval_r16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minval_r17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minval_r4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/minval_r8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/move_alloc.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mvbits.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/norm2_r10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/norm2_r16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/norm2_r17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/norm2_r4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/norm2_r8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/open.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pack_c10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pack_c16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pack_c17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pack_c4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pack_c8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pack_generic.Plo@am__quote@ @@ -2145,6 +2287,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pack_i8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pack_r10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pack_r16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pack_r17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pack_r4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pack_r8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parity_l1.Plo@am__quote@ @@ -2160,6 +2303,9 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pow_c16_i16.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pow_c16_i4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pow_c16_i8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pow_c17_i16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pow_c17_i4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pow_c17_i8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pow_c4_i16.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pow_c4_i4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pow_c4_i8.Plo@am__quote@ @@ -2180,12 +2326,16 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pow_r16_i16.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pow_r16_i4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pow_r16_i8.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pow_r17_i16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pow_r17_i4.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pow_r17_i8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pow_r4_i16.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pow_r4_i8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pow_r8_i16.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pow_r8_i8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/product_c10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/product_c16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/product_c17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/product_c4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/product_c8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/product_i1.Plo@am__quote@ @@ -2195,6 +2345,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/product_i8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/product_r10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/product_r16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/product_r17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/product_r4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/product_r8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rand.Plo@am__quote@ @@ -2203,6 +2354,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rename.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reshape_c10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reshape_c16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reshape_c17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reshape_c4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reshape_c8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reshape_generic.Plo@am__quote@ @@ -2212,6 +2364,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reshape_packed.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reshape_r10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reshape_r16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reshape_r17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reshape_r4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reshape_r8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/select.Plo@am__quote@ @@ -2228,6 +2381,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sleep.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/spread_c10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/spread_c16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/spread_c17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/spread_c4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/spread_c8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/spread_generic.Plo@am__quote@ @@ -2238,6 +2392,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/spread_i8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/spread_r10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/spread_r16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/spread_r17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/spread_r4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/spread_r8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stat.Plo@am__quote@ @@ -2246,6 +2401,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/string_intrinsics.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sum_c10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sum_c16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sum_c17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sum_c4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sum_c8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sum_i1.Plo@am__quote@ @@ -2255,6 +2411,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sum_i8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sum_r10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sum_r16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sum_r17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sum_r4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sum_r8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/symlnk.Plo@am__quote@ @@ -2270,6 +2427,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unlink.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unpack_c10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unpack_c16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unpack_c17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unpack_c4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unpack_c8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unpack_generic.Plo@am__quote@ @@ -2280,6 +2438,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unpack_i8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unpack_r10.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unpack_r16.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unpack_r17.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unpack_r4.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unpack_r8.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/write.Plo@am__quote@ @@ -2311,6 +2470,9 @@ _abs_c10.lo: $(srcdir)/generated/_abs_c10.F90 _abs_c16.lo: $(srcdir)/generated/_abs_c16.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _abs_c16.lo `test -f '$(srcdir)/generated/_abs_c16.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_abs_c16.F90 +_abs_c17.lo: $(srcdir)/generated/_abs_c17.F90 + $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _abs_c17.lo `test -f '$(srcdir)/generated/_abs_c17.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_abs_c17.F90 + _abs_i4.lo: $(srcdir)/generated/_abs_i4.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _abs_i4.lo `test -f '$(srcdir)/generated/_abs_i4.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_abs_i4.F90 @@ -2332,6 +2494,9 @@ _abs_r10.lo: $(srcdir)/generated/_abs_r10.F90 _abs_r16.lo: $(srcdir)/generated/_abs_r16.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _abs_r16.lo `test -f '$(srcdir)/generated/_abs_r16.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_abs_r16.F90 +_abs_r17.lo: $(srcdir)/generated/_abs_r17.F90 + $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _abs_r17.lo `test -f '$(srcdir)/generated/_abs_r17.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_abs_r17.F90 + _aimag_c4.lo: $(srcdir)/generated/_aimag_c4.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _aimag_c4.lo `test -f '$(srcdir)/generated/_aimag_c4.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_aimag_c4.F90 @@ -2344,6 +2509,9 @@ _aimag_c10.lo: $(srcdir)/generated/_aimag_c10.F90 _aimag_c16.lo: $(srcdir)/generated/_aimag_c16.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _aimag_c16.lo `test -f '$(srcdir)/generated/_aimag_c16.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_aimag_c16.F90 +_aimag_c17.lo: $(srcdir)/generated/_aimag_c17.F90 + $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _aimag_c17.lo `test -f '$(srcdir)/generated/_aimag_c17.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_aimag_c17.F90 + _exp_r4.lo: $(srcdir)/generated/_exp_r4.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _exp_r4.lo `test -f '$(srcdir)/generated/_exp_r4.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_exp_r4.F90 @@ -2356,6 +2524,9 @@ _exp_r10.lo: $(srcdir)/generated/_exp_r10.F90 _exp_r16.lo: $(srcdir)/generated/_exp_r16.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _exp_r16.lo `test -f '$(srcdir)/generated/_exp_r16.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_exp_r16.F90 +_exp_r17.lo: $(srcdir)/generated/_exp_r17.F90 + $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _exp_r17.lo `test -f '$(srcdir)/generated/_exp_r17.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_exp_r17.F90 + _exp_c4.lo: $(srcdir)/generated/_exp_c4.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _exp_c4.lo `test -f '$(srcdir)/generated/_exp_c4.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_exp_c4.F90 @@ -2368,6 +2539,9 @@ _exp_c10.lo: $(srcdir)/generated/_exp_c10.F90 _exp_c16.lo: $(srcdir)/generated/_exp_c16.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _exp_c16.lo `test -f '$(srcdir)/generated/_exp_c16.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_exp_c16.F90 +_exp_c17.lo: $(srcdir)/generated/_exp_c17.F90 + $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _exp_c17.lo `test -f '$(srcdir)/generated/_exp_c17.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_exp_c17.F90 + _log_r4.lo: $(srcdir)/generated/_log_r4.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _log_r4.lo `test -f '$(srcdir)/generated/_log_r4.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_log_r4.F90 @@ -2380,6 +2554,9 @@ _log_r10.lo: $(srcdir)/generated/_log_r10.F90 _log_r16.lo: $(srcdir)/generated/_log_r16.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _log_r16.lo `test -f '$(srcdir)/generated/_log_r16.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_log_r16.F90 +_log_r17.lo: $(srcdir)/generated/_log_r17.F90 + $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _log_r17.lo `test -f '$(srcdir)/generated/_log_r17.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_log_r17.F90 + _log_c4.lo: $(srcdir)/generated/_log_c4.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _log_c4.lo `test -f '$(srcdir)/generated/_log_c4.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_log_c4.F90 @@ -2392,6 +2569,9 @@ _log_c10.lo: $(srcdir)/generated/_log_c10.F90 _log_c16.lo: $(srcdir)/generated/_log_c16.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _log_c16.lo `test -f '$(srcdir)/generated/_log_c16.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_log_c16.F90 +_log_c17.lo: $(srcdir)/generated/_log_c17.F90 + $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _log_c17.lo `test -f '$(srcdir)/generated/_log_c17.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_log_c17.F90 + _log10_r4.lo: $(srcdir)/generated/_log10_r4.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _log10_r4.lo `test -f '$(srcdir)/generated/_log10_r4.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_log10_r4.F90 @@ -2404,6 +2584,9 @@ _log10_r10.lo: $(srcdir)/generated/_log10_r10.F90 _log10_r16.lo: $(srcdir)/generated/_log10_r16.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _log10_r16.lo `test -f '$(srcdir)/generated/_log10_r16.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_log10_r16.F90 +_log10_r17.lo: $(srcdir)/generated/_log10_r17.F90 + $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _log10_r17.lo `test -f '$(srcdir)/generated/_log10_r17.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_log10_r17.F90 + _sqrt_r4.lo: $(srcdir)/generated/_sqrt_r4.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _sqrt_r4.lo `test -f '$(srcdir)/generated/_sqrt_r4.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_sqrt_r4.F90 @@ -2416,6 +2599,9 @@ _sqrt_r10.lo: $(srcdir)/generated/_sqrt_r10.F90 _sqrt_r16.lo: $(srcdir)/generated/_sqrt_r16.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _sqrt_r16.lo `test -f '$(srcdir)/generated/_sqrt_r16.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_sqrt_r16.F90 +_sqrt_r17.lo: $(srcdir)/generated/_sqrt_r17.F90 + $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _sqrt_r17.lo `test -f '$(srcdir)/generated/_sqrt_r17.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_sqrt_r17.F90 + _sqrt_c4.lo: $(srcdir)/generated/_sqrt_c4.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _sqrt_c4.lo `test -f '$(srcdir)/generated/_sqrt_c4.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_sqrt_c4.F90 @@ -2428,6 +2614,9 @@ _sqrt_c10.lo: $(srcdir)/generated/_sqrt_c10.F90 _sqrt_c16.lo: $(srcdir)/generated/_sqrt_c16.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _sqrt_c16.lo `test -f '$(srcdir)/generated/_sqrt_c16.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_sqrt_c16.F90 +_sqrt_c17.lo: $(srcdir)/generated/_sqrt_c17.F90 + $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _sqrt_c17.lo `test -f '$(srcdir)/generated/_sqrt_c17.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_sqrt_c17.F90 + _asin_r4.lo: $(srcdir)/generated/_asin_r4.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _asin_r4.lo `test -f '$(srcdir)/generated/_asin_r4.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_asin_r4.F90 @@ -2440,6 +2629,9 @@ _asin_r10.lo: $(srcdir)/generated/_asin_r10.F90 _asin_r16.lo: $(srcdir)/generated/_asin_r16.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _asin_r16.lo `test -f '$(srcdir)/generated/_asin_r16.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_asin_r16.F90 +_asin_r17.lo: $(srcdir)/generated/_asin_r17.F90 + $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _asin_r17.lo `test -f '$(srcdir)/generated/_asin_r17.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_asin_r17.F90 + _asinh_r4.lo: $(srcdir)/generated/_asinh_r4.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _asinh_r4.lo `test -f '$(srcdir)/generated/_asinh_r4.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_asinh_r4.F90 @@ -2452,6 +2644,9 @@ _asinh_r10.lo: $(srcdir)/generated/_asinh_r10.F90 _asinh_r16.lo: $(srcdir)/generated/_asinh_r16.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _asinh_r16.lo `test -f '$(srcdir)/generated/_asinh_r16.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_asinh_r16.F90 +_asinh_r17.lo: $(srcdir)/generated/_asinh_r17.F90 + $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _asinh_r17.lo `test -f '$(srcdir)/generated/_asinh_r17.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_asinh_r17.F90 + _acos_r4.lo: $(srcdir)/generated/_acos_r4.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _acos_r4.lo `test -f '$(srcdir)/generated/_acos_r4.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_acos_r4.F90 @@ -2464,6 +2659,9 @@ _acos_r10.lo: $(srcdir)/generated/_acos_r10.F90 _acos_r16.lo: $(srcdir)/generated/_acos_r16.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _acos_r16.lo `test -f '$(srcdir)/generated/_acos_r16.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_acos_r16.F90 +_acos_r17.lo: $(srcdir)/generated/_acos_r17.F90 + $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _acos_r17.lo `test -f '$(srcdir)/generated/_acos_r17.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_acos_r17.F90 + _acosh_r4.lo: $(srcdir)/generated/_acosh_r4.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _acosh_r4.lo `test -f '$(srcdir)/generated/_acosh_r4.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_acosh_r4.F90 @@ -2476,6 +2674,9 @@ _acosh_r10.lo: $(srcdir)/generated/_acosh_r10.F90 _acosh_r16.lo: $(srcdir)/generated/_acosh_r16.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _acosh_r16.lo `test -f '$(srcdir)/generated/_acosh_r16.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_acosh_r16.F90 +_acosh_r17.lo: $(srcdir)/generated/_acosh_r17.F90 + $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _acosh_r17.lo `test -f '$(srcdir)/generated/_acosh_r17.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_acosh_r17.F90 + _atan_r4.lo: $(srcdir)/generated/_atan_r4.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _atan_r4.lo `test -f '$(srcdir)/generated/_atan_r4.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_atan_r4.F90 @@ -2488,6 +2689,9 @@ _atan_r10.lo: $(srcdir)/generated/_atan_r10.F90 _atan_r16.lo: $(srcdir)/generated/_atan_r16.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _atan_r16.lo `test -f '$(srcdir)/generated/_atan_r16.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_atan_r16.F90 +_atan_r17.lo: $(srcdir)/generated/_atan_r17.F90 + $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _atan_r17.lo `test -f '$(srcdir)/generated/_atan_r17.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_atan_r17.F90 + _atanh_r4.lo: $(srcdir)/generated/_atanh_r4.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _atanh_r4.lo `test -f '$(srcdir)/generated/_atanh_r4.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_atanh_r4.F90 @@ -2500,6 +2704,9 @@ _atanh_r10.lo: $(srcdir)/generated/_atanh_r10.F90 _atanh_r16.lo: $(srcdir)/generated/_atanh_r16.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _atanh_r16.lo `test -f '$(srcdir)/generated/_atanh_r16.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_atanh_r16.F90 +_atanh_r17.lo: $(srcdir)/generated/_atanh_r17.F90 + $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _atanh_r17.lo `test -f '$(srcdir)/generated/_atanh_r17.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_atanh_r17.F90 + _sin_r4.lo: $(srcdir)/generated/_sin_r4.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _sin_r4.lo `test -f '$(srcdir)/generated/_sin_r4.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_sin_r4.F90 @@ -2512,6 +2719,9 @@ _sin_r10.lo: $(srcdir)/generated/_sin_r10.F90 _sin_r16.lo: $(srcdir)/generated/_sin_r16.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _sin_r16.lo `test -f '$(srcdir)/generated/_sin_r16.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_sin_r16.F90 +_sin_r17.lo: $(srcdir)/generated/_sin_r17.F90 + $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _sin_r17.lo `test -f '$(srcdir)/generated/_sin_r17.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_sin_r17.F90 + _sin_c4.lo: $(srcdir)/generated/_sin_c4.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _sin_c4.lo `test -f '$(srcdir)/generated/_sin_c4.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_sin_c4.F90 @@ -2524,6 +2734,9 @@ _sin_c10.lo: $(srcdir)/generated/_sin_c10.F90 _sin_c16.lo: $(srcdir)/generated/_sin_c16.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _sin_c16.lo `test -f '$(srcdir)/generated/_sin_c16.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_sin_c16.F90 +_sin_c17.lo: $(srcdir)/generated/_sin_c17.F90 + $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _sin_c17.lo `test -f '$(srcdir)/generated/_sin_c17.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_sin_c17.F90 + _cos_r4.lo: $(srcdir)/generated/_cos_r4.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _cos_r4.lo `test -f '$(srcdir)/generated/_cos_r4.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_cos_r4.F90 @@ -2536,6 +2749,9 @@ _cos_r10.lo: $(srcdir)/generated/_cos_r10.F90 _cos_r16.lo: $(srcdir)/generated/_cos_r16.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _cos_r16.lo `test -f '$(srcdir)/generated/_cos_r16.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_cos_r16.F90 +_cos_r17.lo: $(srcdir)/generated/_cos_r17.F90 + $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _cos_r17.lo `test -f '$(srcdir)/generated/_cos_r17.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_cos_r17.F90 + _cos_c4.lo: $(srcdir)/generated/_cos_c4.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _cos_c4.lo `test -f '$(srcdir)/generated/_cos_c4.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_cos_c4.F90 @@ -2548,6 +2764,9 @@ _cos_c10.lo: $(srcdir)/generated/_cos_c10.F90 _cos_c16.lo: $(srcdir)/generated/_cos_c16.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _cos_c16.lo `test -f '$(srcdir)/generated/_cos_c16.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_cos_c16.F90 +_cos_c17.lo: $(srcdir)/generated/_cos_c17.F90 + $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _cos_c17.lo `test -f '$(srcdir)/generated/_cos_c17.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_cos_c17.F90 + _tan_r4.lo: $(srcdir)/generated/_tan_r4.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _tan_r4.lo `test -f '$(srcdir)/generated/_tan_r4.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_tan_r4.F90 @@ -2560,6 +2779,9 @@ _tan_r10.lo: $(srcdir)/generated/_tan_r10.F90 _tan_r16.lo: $(srcdir)/generated/_tan_r16.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _tan_r16.lo `test -f '$(srcdir)/generated/_tan_r16.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_tan_r16.F90 +_tan_r17.lo: $(srcdir)/generated/_tan_r17.F90 + $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _tan_r17.lo `test -f '$(srcdir)/generated/_tan_r17.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_tan_r17.F90 + _sinh_r4.lo: $(srcdir)/generated/_sinh_r4.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _sinh_r4.lo `test -f '$(srcdir)/generated/_sinh_r4.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_sinh_r4.F90 @@ -2572,6 +2794,9 @@ _sinh_r10.lo: $(srcdir)/generated/_sinh_r10.F90 _sinh_r16.lo: $(srcdir)/generated/_sinh_r16.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _sinh_r16.lo `test -f '$(srcdir)/generated/_sinh_r16.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_sinh_r16.F90 +_sinh_r17.lo: $(srcdir)/generated/_sinh_r17.F90 + $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _sinh_r17.lo `test -f '$(srcdir)/generated/_sinh_r17.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_sinh_r17.F90 + _cosh_r4.lo: $(srcdir)/generated/_cosh_r4.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _cosh_r4.lo `test -f '$(srcdir)/generated/_cosh_r4.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_cosh_r4.F90 @@ -2584,6 +2809,9 @@ _cosh_r10.lo: $(srcdir)/generated/_cosh_r10.F90 _cosh_r16.lo: $(srcdir)/generated/_cosh_r16.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _cosh_r16.lo `test -f '$(srcdir)/generated/_cosh_r16.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_cosh_r16.F90 +_cosh_r17.lo: $(srcdir)/generated/_cosh_r17.F90 + $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _cosh_r17.lo `test -f '$(srcdir)/generated/_cosh_r17.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_cosh_r17.F90 + _tanh_r4.lo: $(srcdir)/generated/_tanh_r4.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _tanh_r4.lo `test -f '$(srcdir)/generated/_tanh_r4.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_tanh_r4.F90 @@ -2596,6 +2824,9 @@ _tanh_r10.lo: $(srcdir)/generated/_tanh_r10.F90 _tanh_r16.lo: $(srcdir)/generated/_tanh_r16.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _tanh_r16.lo `test -f '$(srcdir)/generated/_tanh_r16.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_tanh_r16.F90 +_tanh_r17.lo: $(srcdir)/generated/_tanh_r17.F90 + $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _tanh_r17.lo `test -f '$(srcdir)/generated/_tanh_r17.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_tanh_r17.F90 + _conjg_c4.lo: $(srcdir)/generated/_conjg_c4.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _conjg_c4.lo `test -f '$(srcdir)/generated/_conjg_c4.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_conjg_c4.F90 @@ -2608,6 +2839,9 @@ _conjg_c10.lo: $(srcdir)/generated/_conjg_c10.F90 _conjg_c16.lo: $(srcdir)/generated/_conjg_c16.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _conjg_c16.lo `test -f '$(srcdir)/generated/_conjg_c16.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_conjg_c16.F90 +_conjg_c17.lo: $(srcdir)/generated/_conjg_c17.F90 + $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _conjg_c17.lo `test -f '$(srcdir)/generated/_conjg_c17.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_conjg_c17.F90 + _aint_r4.lo: $(srcdir)/generated/_aint_r4.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _aint_r4.lo `test -f '$(srcdir)/generated/_aint_r4.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_aint_r4.F90 @@ -2620,6 +2854,9 @@ _aint_r10.lo: $(srcdir)/generated/_aint_r10.F90 _aint_r16.lo: $(srcdir)/generated/_aint_r16.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _aint_r16.lo `test -f '$(srcdir)/generated/_aint_r16.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_aint_r16.F90 +_aint_r17.lo: $(srcdir)/generated/_aint_r17.F90 + $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _aint_r17.lo `test -f '$(srcdir)/generated/_aint_r17.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_aint_r17.F90 + _anint_r4.lo: $(srcdir)/generated/_anint_r4.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _anint_r4.lo `test -f '$(srcdir)/generated/_anint_r4.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_anint_r4.F90 @@ -2632,6 +2869,9 @@ _anint_r10.lo: $(srcdir)/generated/_anint_r10.F90 _anint_r16.lo: $(srcdir)/generated/_anint_r16.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _anint_r16.lo `test -f '$(srcdir)/generated/_anint_r16.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_anint_r16.F90 +_anint_r17.lo: $(srcdir)/generated/_anint_r17.F90 + $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _anint_r17.lo `test -f '$(srcdir)/generated/_anint_r17.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_anint_r17.F90 + _sign_i4.lo: $(srcdir)/generated/_sign_i4.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _sign_i4.lo `test -f '$(srcdir)/generated/_sign_i4.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_sign_i4.F90 @@ -2653,6 +2893,9 @@ _sign_r10.lo: $(srcdir)/generated/_sign_r10.F90 _sign_r16.lo: $(srcdir)/generated/_sign_r16.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _sign_r16.lo `test -f '$(srcdir)/generated/_sign_r16.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_sign_r16.F90 +_sign_r17.lo: $(srcdir)/generated/_sign_r17.F90 + $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _sign_r17.lo `test -f '$(srcdir)/generated/_sign_r17.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_sign_r17.F90 + _dim_i4.lo: $(srcdir)/generated/_dim_i4.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _dim_i4.lo `test -f '$(srcdir)/generated/_dim_i4.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_dim_i4.F90 @@ -2674,6 +2917,9 @@ _dim_r10.lo: $(srcdir)/generated/_dim_r10.F90 _dim_r16.lo: $(srcdir)/generated/_dim_r16.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _dim_r16.lo `test -f '$(srcdir)/generated/_dim_r16.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_dim_r16.F90 +_dim_r17.lo: $(srcdir)/generated/_dim_r17.F90 + $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _dim_r17.lo `test -f '$(srcdir)/generated/_dim_r17.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_dim_r17.F90 + _atan2_r4.lo: $(srcdir)/generated/_atan2_r4.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _atan2_r4.lo `test -f '$(srcdir)/generated/_atan2_r4.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_atan2_r4.F90 @@ -2686,6 +2932,9 @@ _atan2_r10.lo: $(srcdir)/generated/_atan2_r10.F90 _atan2_r16.lo: $(srcdir)/generated/_atan2_r16.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _atan2_r16.lo `test -f '$(srcdir)/generated/_atan2_r16.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_atan2_r16.F90 +_atan2_r17.lo: $(srcdir)/generated/_atan2_r17.F90 + $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _atan2_r17.lo `test -f '$(srcdir)/generated/_atan2_r17.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_atan2_r17.F90 + _mod_i4.lo: $(srcdir)/generated/_mod_i4.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _mod_i4.lo `test -f '$(srcdir)/generated/_mod_i4.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_mod_i4.F90 @@ -2707,6 +2956,9 @@ _mod_r10.lo: $(srcdir)/generated/_mod_r10.F90 _mod_r16.lo: $(srcdir)/generated/_mod_r16.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _mod_r16.lo `test -f '$(srcdir)/generated/_mod_r16.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_mod_r16.F90 +_mod_r17.lo: $(srcdir)/generated/_mod_r17.F90 + $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o _mod_r17.lo `test -f '$(srcdir)/generated/_mod_r17.F90' || echo '$(srcdir)/'`$(srcdir)/generated/_mod_r17.F90 + misc_specifics.lo: $(srcdir)/generated/misc_specifics.F90 $(AM_V_PPFC)$(LIBTOOL) $(AM_V_lt) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(FC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_FCFLAGS) $(FCFLAGS) -c -o misc_specifics.lo `test -f '$(srcdir)/generated/misc_specifics.F90' || echo '$(srcdir)/'`$(srcdir)/generated/misc_specifics.F90 @@ -3133,6 +3385,27 @@ maxloc0_16_r16.lo: $(srcdir)/generated/maxloc0_16_r16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o maxloc0_16_r16.lo `test -f '$(srcdir)/generated/maxloc0_16_r16.c' || echo '$(srcdir)/'`$(srcdir)/generated/maxloc0_16_r16.c +maxloc0_4_r17.lo: $(srcdir)/generated/maxloc0_4_r17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT maxloc0_4_r17.lo -MD -MP -MF $(DEPDIR)/maxloc0_4_r17.Tpo -c -o maxloc0_4_r17.lo `test -f '$(srcdir)/generated/maxloc0_4_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/maxloc0_4_r17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/maxloc0_4_r17.Tpo $(DEPDIR)/maxloc0_4_r17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/maxloc0_4_r17.c' object='maxloc0_4_r17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o maxloc0_4_r17.lo `test -f '$(srcdir)/generated/maxloc0_4_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/maxloc0_4_r17.c + +maxloc0_8_r17.lo: $(srcdir)/generated/maxloc0_8_r17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT maxloc0_8_r17.lo -MD -MP -MF $(DEPDIR)/maxloc0_8_r17.Tpo -c -o maxloc0_8_r17.lo `test -f '$(srcdir)/generated/maxloc0_8_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/maxloc0_8_r17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/maxloc0_8_r17.Tpo $(DEPDIR)/maxloc0_8_r17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/maxloc0_8_r17.c' object='maxloc0_8_r17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o maxloc0_8_r17.lo `test -f '$(srcdir)/generated/maxloc0_8_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/maxloc0_8_r17.c + +maxloc0_16_r17.lo: $(srcdir)/generated/maxloc0_16_r17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT maxloc0_16_r17.lo -MD -MP -MF $(DEPDIR)/maxloc0_16_r17.Tpo -c -o maxloc0_16_r17.lo `test -f '$(srcdir)/generated/maxloc0_16_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/maxloc0_16_r17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/maxloc0_16_r17.Tpo $(DEPDIR)/maxloc0_16_r17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/maxloc0_16_r17.c' object='maxloc0_16_r17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o maxloc0_16_r17.lo `test -f '$(srcdir)/generated/maxloc0_16_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/maxloc0_16_r17.c + maxloc1_4_i1.lo: $(srcdir)/generated/maxloc1_4_i1.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT maxloc1_4_i1.lo -MD -MP -MF $(DEPDIR)/maxloc1_4_i1.Tpo -c -o maxloc1_4_i1.lo `test -f '$(srcdir)/generated/maxloc1_4_i1.c' || echo '$(srcdir)/'`$(srcdir)/generated/maxloc1_4_i1.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/maxloc1_4_i1.Tpo $(DEPDIR)/maxloc1_4_i1.Plo @@ -3322,6 +3595,27 @@ maxloc1_16_r16.lo: $(srcdir)/generated/maxloc1_16_r16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o maxloc1_16_r16.lo `test -f '$(srcdir)/generated/maxloc1_16_r16.c' || echo '$(srcdir)/'`$(srcdir)/generated/maxloc1_16_r16.c +maxloc1_4_r17.lo: $(srcdir)/generated/maxloc1_4_r17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT maxloc1_4_r17.lo -MD -MP -MF $(DEPDIR)/maxloc1_4_r17.Tpo -c -o maxloc1_4_r17.lo `test -f '$(srcdir)/generated/maxloc1_4_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/maxloc1_4_r17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/maxloc1_4_r17.Tpo $(DEPDIR)/maxloc1_4_r17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/maxloc1_4_r17.c' object='maxloc1_4_r17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o maxloc1_4_r17.lo `test -f '$(srcdir)/generated/maxloc1_4_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/maxloc1_4_r17.c + +maxloc1_8_r17.lo: $(srcdir)/generated/maxloc1_8_r17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT maxloc1_8_r17.lo -MD -MP -MF $(DEPDIR)/maxloc1_8_r17.Tpo -c -o maxloc1_8_r17.lo `test -f '$(srcdir)/generated/maxloc1_8_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/maxloc1_8_r17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/maxloc1_8_r17.Tpo $(DEPDIR)/maxloc1_8_r17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/maxloc1_8_r17.c' object='maxloc1_8_r17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o maxloc1_8_r17.lo `test -f '$(srcdir)/generated/maxloc1_8_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/maxloc1_8_r17.c + +maxloc1_16_r17.lo: $(srcdir)/generated/maxloc1_16_r17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT maxloc1_16_r17.lo -MD -MP -MF $(DEPDIR)/maxloc1_16_r17.Tpo -c -o maxloc1_16_r17.lo `test -f '$(srcdir)/generated/maxloc1_16_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/maxloc1_16_r17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/maxloc1_16_r17.Tpo $(DEPDIR)/maxloc1_16_r17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/maxloc1_16_r17.c' object='maxloc1_16_r17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o maxloc1_16_r17.lo `test -f '$(srcdir)/generated/maxloc1_16_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/maxloc1_16_r17.c + maxval_i1.lo: $(srcdir)/generated/maxval_i1.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT maxval_i1.lo -MD -MP -MF $(DEPDIR)/maxval_i1.Tpo -c -o maxval_i1.lo `test -f '$(srcdir)/generated/maxval_i1.c' || echo '$(srcdir)/'`$(srcdir)/generated/maxval_i1.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/maxval_i1.Tpo $(DEPDIR)/maxval_i1.Plo @@ -3385,6 +3679,13 @@ maxval_r16.lo: $(srcdir)/generated/maxval_r16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o maxval_r16.lo `test -f '$(srcdir)/generated/maxval_r16.c' || echo '$(srcdir)/'`$(srcdir)/generated/maxval_r16.c +maxval_r17.lo: $(srcdir)/generated/maxval_r17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT maxval_r17.lo -MD -MP -MF $(DEPDIR)/maxval_r17.Tpo -c -o maxval_r17.lo `test -f '$(srcdir)/generated/maxval_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/maxval_r17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/maxval_r17.Tpo $(DEPDIR)/maxval_r17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/maxval_r17.c' object='maxval_r17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o maxval_r17.lo `test -f '$(srcdir)/generated/maxval_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/maxval_r17.c + minloc0_4_i1.lo: $(srcdir)/generated/minloc0_4_i1.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT minloc0_4_i1.lo -MD -MP -MF $(DEPDIR)/minloc0_4_i1.Tpo -c -o minloc0_4_i1.lo `test -f '$(srcdir)/generated/minloc0_4_i1.c' || echo '$(srcdir)/'`$(srcdir)/generated/minloc0_4_i1.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/minloc0_4_i1.Tpo $(DEPDIR)/minloc0_4_i1.Plo @@ -3574,6 +3875,27 @@ minloc0_16_r16.lo: $(srcdir)/generated/minloc0_16_r16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o minloc0_16_r16.lo `test -f '$(srcdir)/generated/minloc0_16_r16.c' || echo '$(srcdir)/'`$(srcdir)/generated/minloc0_16_r16.c +minloc0_4_r17.lo: $(srcdir)/generated/minloc0_4_r17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT minloc0_4_r17.lo -MD -MP -MF $(DEPDIR)/minloc0_4_r17.Tpo -c -o minloc0_4_r17.lo `test -f '$(srcdir)/generated/minloc0_4_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/minloc0_4_r17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/minloc0_4_r17.Tpo $(DEPDIR)/minloc0_4_r17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/minloc0_4_r17.c' object='minloc0_4_r17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o minloc0_4_r17.lo `test -f '$(srcdir)/generated/minloc0_4_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/minloc0_4_r17.c + +minloc0_8_r17.lo: $(srcdir)/generated/minloc0_8_r17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT minloc0_8_r17.lo -MD -MP -MF $(DEPDIR)/minloc0_8_r17.Tpo -c -o minloc0_8_r17.lo `test -f '$(srcdir)/generated/minloc0_8_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/minloc0_8_r17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/minloc0_8_r17.Tpo $(DEPDIR)/minloc0_8_r17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/minloc0_8_r17.c' object='minloc0_8_r17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o minloc0_8_r17.lo `test -f '$(srcdir)/generated/minloc0_8_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/minloc0_8_r17.c + +minloc0_16_r17.lo: $(srcdir)/generated/minloc0_16_r17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT minloc0_16_r17.lo -MD -MP -MF $(DEPDIR)/minloc0_16_r17.Tpo -c -o minloc0_16_r17.lo `test -f '$(srcdir)/generated/minloc0_16_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/minloc0_16_r17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/minloc0_16_r17.Tpo $(DEPDIR)/minloc0_16_r17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/minloc0_16_r17.c' object='minloc0_16_r17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o minloc0_16_r17.lo `test -f '$(srcdir)/generated/minloc0_16_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/minloc0_16_r17.c + minloc1_4_i1.lo: $(srcdir)/generated/minloc1_4_i1.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT minloc1_4_i1.lo -MD -MP -MF $(DEPDIR)/minloc1_4_i1.Tpo -c -o minloc1_4_i1.lo `test -f '$(srcdir)/generated/minloc1_4_i1.c' || echo '$(srcdir)/'`$(srcdir)/generated/minloc1_4_i1.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/minloc1_4_i1.Tpo $(DEPDIR)/minloc1_4_i1.Plo @@ -3763,6 +4085,27 @@ minloc1_16_r16.lo: $(srcdir)/generated/minloc1_16_r16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o minloc1_16_r16.lo `test -f '$(srcdir)/generated/minloc1_16_r16.c' || echo '$(srcdir)/'`$(srcdir)/generated/minloc1_16_r16.c +minloc1_4_r17.lo: $(srcdir)/generated/minloc1_4_r17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT minloc1_4_r17.lo -MD -MP -MF $(DEPDIR)/minloc1_4_r17.Tpo -c -o minloc1_4_r17.lo `test -f '$(srcdir)/generated/minloc1_4_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/minloc1_4_r17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/minloc1_4_r17.Tpo $(DEPDIR)/minloc1_4_r17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/minloc1_4_r17.c' object='minloc1_4_r17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o minloc1_4_r17.lo `test -f '$(srcdir)/generated/minloc1_4_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/minloc1_4_r17.c + +minloc1_8_r17.lo: $(srcdir)/generated/minloc1_8_r17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT minloc1_8_r17.lo -MD -MP -MF $(DEPDIR)/minloc1_8_r17.Tpo -c -o minloc1_8_r17.lo `test -f '$(srcdir)/generated/minloc1_8_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/minloc1_8_r17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/minloc1_8_r17.Tpo $(DEPDIR)/minloc1_8_r17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/minloc1_8_r17.c' object='minloc1_8_r17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o minloc1_8_r17.lo `test -f '$(srcdir)/generated/minloc1_8_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/minloc1_8_r17.c + +minloc1_16_r17.lo: $(srcdir)/generated/minloc1_16_r17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT minloc1_16_r17.lo -MD -MP -MF $(DEPDIR)/minloc1_16_r17.Tpo -c -o minloc1_16_r17.lo `test -f '$(srcdir)/generated/minloc1_16_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/minloc1_16_r17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/minloc1_16_r17.Tpo $(DEPDIR)/minloc1_16_r17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/minloc1_16_r17.c' object='minloc1_16_r17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o minloc1_16_r17.lo `test -f '$(srcdir)/generated/minloc1_16_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/minloc1_16_r17.c + minval_i1.lo: $(srcdir)/generated/minval_i1.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT minval_i1.lo -MD -MP -MF $(DEPDIR)/minval_i1.Tpo -c -o minval_i1.lo `test -f '$(srcdir)/generated/minval_i1.c' || echo '$(srcdir)/'`$(srcdir)/generated/minval_i1.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/minval_i1.Tpo $(DEPDIR)/minval_i1.Plo @@ -3826,6 +4169,13 @@ minval_r16.lo: $(srcdir)/generated/minval_r16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o minval_r16.lo `test -f '$(srcdir)/generated/minval_r16.c' || echo '$(srcdir)/'`$(srcdir)/generated/minval_r16.c +minval_r17.lo: $(srcdir)/generated/minval_r17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT minval_r17.lo -MD -MP -MF $(DEPDIR)/minval_r17.Tpo -c -o minval_r17.lo `test -f '$(srcdir)/generated/minval_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/minval_r17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/minval_r17.Tpo $(DEPDIR)/minval_r17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/minval_r17.c' object='minval_r17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o minval_r17.lo `test -f '$(srcdir)/generated/minval_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/minval_r17.c + product_i1.lo: $(srcdir)/generated/product_i1.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT product_i1.lo -MD -MP -MF $(DEPDIR)/product_i1.Tpo -c -o product_i1.lo `test -f '$(srcdir)/generated/product_i1.c' || echo '$(srcdir)/'`$(srcdir)/generated/product_i1.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/product_i1.Tpo $(DEPDIR)/product_i1.Plo @@ -3889,6 +4239,13 @@ product_r16.lo: $(srcdir)/generated/product_r16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o product_r16.lo `test -f '$(srcdir)/generated/product_r16.c' || echo '$(srcdir)/'`$(srcdir)/generated/product_r16.c +product_r17.lo: $(srcdir)/generated/product_r17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT product_r17.lo -MD -MP -MF $(DEPDIR)/product_r17.Tpo -c -o product_r17.lo `test -f '$(srcdir)/generated/product_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/product_r17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/product_r17.Tpo $(DEPDIR)/product_r17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/product_r17.c' object='product_r17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o product_r17.lo `test -f '$(srcdir)/generated/product_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/product_r17.c + product_c4.lo: $(srcdir)/generated/product_c4.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT product_c4.lo -MD -MP -MF $(DEPDIR)/product_c4.Tpo -c -o product_c4.lo `test -f '$(srcdir)/generated/product_c4.c' || echo '$(srcdir)/'`$(srcdir)/generated/product_c4.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/product_c4.Tpo $(DEPDIR)/product_c4.Plo @@ -3917,6 +4274,13 @@ product_c16.lo: $(srcdir)/generated/product_c16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o product_c16.lo `test -f '$(srcdir)/generated/product_c16.c' || echo '$(srcdir)/'`$(srcdir)/generated/product_c16.c +product_c17.lo: $(srcdir)/generated/product_c17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT product_c17.lo -MD -MP -MF $(DEPDIR)/product_c17.Tpo -c -o product_c17.lo `test -f '$(srcdir)/generated/product_c17.c' || echo '$(srcdir)/'`$(srcdir)/generated/product_c17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/product_c17.Tpo $(DEPDIR)/product_c17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/product_c17.c' object='product_c17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o product_c17.lo `test -f '$(srcdir)/generated/product_c17.c' || echo '$(srcdir)/'`$(srcdir)/generated/product_c17.c + sum_i1.lo: $(srcdir)/generated/sum_i1.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT sum_i1.lo -MD -MP -MF $(DEPDIR)/sum_i1.Tpo -c -o sum_i1.lo `test -f '$(srcdir)/generated/sum_i1.c' || echo '$(srcdir)/'`$(srcdir)/generated/sum_i1.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/sum_i1.Tpo $(DEPDIR)/sum_i1.Plo @@ -3980,6 +4344,13 @@ sum_r16.lo: $(srcdir)/generated/sum_r16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o sum_r16.lo `test -f '$(srcdir)/generated/sum_r16.c' || echo '$(srcdir)/'`$(srcdir)/generated/sum_r16.c +sum_r17.lo: $(srcdir)/generated/sum_r17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT sum_r17.lo -MD -MP -MF $(DEPDIR)/sum_r17.Tpo -c -o sum_r17.lo `test -f '$(srcdir)/generated/sum_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/sum_r17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/sum_r17.Tpo $(DEPDIR)/sum_r17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/sum_r17.c' object='sum_r17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o sum_r17.lo `test -f '$(srcdir)/generated/sum_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/sum_r17.c + sum_c4.lo: $(srcdir)/generated/sum_c4.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT sum_c4.lo -MD -MP -MF $(DEPDIR)/sum_c4.Tpo -c -o sum_c4.lo `test -f '$(srcdir)/generated/sum_c4.c' || echo '$(srcdir)/'`$(srcdir)/generated/sum_c4.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/sum_c4.Tpo $(DEPDIR)/sum_c4.Plo @@ -4008,6 +4379,13 @@ sum_c16.lo: $(srcdir)/generated/sum_c16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o sum_c16.lo `test -f '$(srcdir)/generated/sum_c16.c' || echo '$(srcdir)/'`$(srcdir)/generated/sum_c16.c +sum_c17.lo: $(srcdir)/generated/sum_c17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT sum_c17.lo -MD -MP -MF $(DEPDIR)/sum_c17.Tpo -c -o sum_c17.lo `test -f '$(srcdir)/generated/sum_c17.c' || echo '$(srcdir)/'`$(srcdir)/generated/sum_c17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/sum_c17.Tpo $(DEPDIR)/sum_c17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/sum_c17.c' object='sum_c17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o sum_c17.lo `test -f '$(srcdir)/generated/sum_c17.c' || echo '$(srcdir)/'`$(srcdir)/generated/sum_c17.c + bessel_r4.lo: $(srcdir)/generated/bessel_r4.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT bessel_r4.lo -MD -MP -MF $(DEPDIR)/bessel_r4.Tpo -c -o bessel_r4.lo `test -f '$(srcdir)/generated/bessel_r4.c' || echo '$(srcdir)/'`$(srcdir)/generated/bessel_r4.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/bessel_r4.Tpo $(DEPDIR)/bessel_r4.Plo @@ -4036,6 +4414,13 @@ bessel_r16.lo: $(srcdir)/generated/bessel_r16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o bessel_r16.lo `test -f '$(srcdir)/generated/bessel_r16.c' || echo '$(srcdir)/'`$(srcdir)/generated/bessel_r16.c +bessel_r17.lo: $(srcdir)/generated/bessel_r17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT bessel_r17.lo -MD -MP -MF $(DEPDIR)/bessel_r17.Tpo -c -o bessel_r17.lo `test -f '$(srcdir)/generated/bessel_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/bessel_r17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/bessel_r17.Tpo $(DEPDIR)/bessel_r17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/bessel_r17.c' object='bessel_r17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o bessel_r17.lo `test -f '$(srcdir)/generated/bessel_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/bessel_r17.c + iall_i1.lo: $(srcdir)/generated/iall_i1.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT iall_i1.lo -MD -MP -MF $(DEPDIR)/iall_i1.Tpo -c -o iall_i1.lo `test -f '$(srcdir)/generated/iall_i1.c' || echo '$(srcdir)/'`$(srcdir)/generated/iall_i1.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/iall_i1.Tpo $(DEPDIR)/iall_i1.Plo @@ -4169,6 +4554,13 @@ norm2_r16.lo: $(srcdir)/generated/norm2_r16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o norm2_r16.lo `test -f '$(srcdir)/generated/norm2_r16.c' || echo '$(srcdir)/'`$(srcdir)/generated/norm2_r16.c +norm2_r17.lo: $(srcdir)/generated/norm2_r17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT norm2_r17.lo -MD -MP -MF $(DEPDIR)/norm2_r17.Tpo -c -o norm2_r17.lo `test -f '$(srcdir)/generated/norm2_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/norm2_r17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/norm2_r17.Tpo $(DEPDIR)/norm2_r17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/norm2_r17.c' object='norm2_r17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o norm2_r17.lo `test -f '$(srcdir)/generated/norm2_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/norm2_r17.c + parity_l1.lo: $(srcdir)/generated/parity_l1.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT parity_l1.lo -MD -MP -MF $(DEPDIR)/parity_l1.Tpo -c -o parity_l1.lo `test -f '$(srcdir)/generated/parity_l1.c' || echo '$(srcdir)/'`$(srcdir)/generated/parity_l1.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/parity_l1.Tpo $(DEPDIR)/parity_l1.Plo @@ -4267,6 +4659,13 @@ matmul_r16.lo: $(srcdir)/generated/matmul_r16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o matmul_r16.lo `test -f '$(srcdir)/generated/matmul_r16.c' || echo '$(srcdir)/'`$(srcdir)/generated/matmul_r16.c +matmul_r17.lo: $(srcdir)/generated/matmul_r17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT matmul_r17.lo -MD -MP -MF $(DEPDIR)/matmul_r17.Tpo -c -o matmul_r17.lo `test -f '$(srcdir)/generated/matmul_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/matmul_r17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/matmul_r17.Tpo $(DEPDIR)/matmul_r17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/matmul_r17.c' object='matmul_r17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o matmul_r17.lo `test -f '$(srcdir)/generated/matmul_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/matmul_r17.c + matmul_c4.lo: $(srcdir)/generated/matmul_c4.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT matmul_c4.lo -MD -MP -MF $(DEPDIR)/matmul_c4.Tpo -c -o matmul_c4.lo `test -f '$(srcdir)/generated/matmul_c4.c' || echo '$(srcdir)/'`$(srcdir)/generated/matmul_c4.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/matmul_c4.Tpo $(DEPDIR)/matmul_c4.Plo @@ -4295,6 +4694,13 @@ matmul_c16.lo: $(srcdir)/generated/matmul_c16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o matmul_c16.lo `test -f '$(srcdir)/generated/matmul_c16.c' || echo '$(srcdir)/'`$(srcdir)/generated/matmul_c16.c +matmul_c17.lo: $(srcdir)/generated/matmul_c17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT matmul_c17.lo -MD -MP -MF $(DEPDIR)/matmul_c17.Tpo -c -o matmul_c17.lo `test -f '$(srcdir)/generated/matmul_c17.c' || echo '$(srcdir)/'`$(srcdir)/generated/matmul_c17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/matmul_c17.Tpo $(DEPDIR)/matmul_c17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/matmul_c17.c' object='matmul_c17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o matmul_c17.lo `test -f '$(srcdir)/generated/matmul_c17.c' || echo '$(srcdir)/'`$(srcdir)/generated/matmul_c17.c + matmul_l4.lo: $(srcdir)/generated/matmul_l4.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT matmul_l4.lo -MD -MP -MF $(DEPDIR)/matmul_l4.Tpo -c -o matmul_l4.lo `test -f '$(srcdir)/generated/matmul_l4.c' || echo '$(srcdir)/'`$(srcdir)/generated/matmul_l4.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/matmul_l4.Tpo $(DEPDIR)/matmul_l4.Plo @@ -4463,6 +4869,13 @@ reshape_r16.lo: $(srcdir)/generated/reshape_r16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o reshape_r16.lo `test -f '$(srcdir)/generated/reshape_r16.c' || echo '$(srcdir)/'`$(srcdir)/generated/reshape_r16.c +reshape_r17.lo: $(srcdir)/generated/reshape_r17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT reshape_r17.lo -MD -MP -MF $(DEPDIR)/reshape_r17.Tpo -c -o reshape_r17.lo `test -f '$(srcdir)/generated/reshape_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/reshape_r17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/reshape_r17.Tpo $(DEPDIR)/reshape_r17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/reshape_r17.c' object='reshape_r17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o reshape_r17.lo `test -f '$(srcdir)/generated/reshape_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/reshape_r17.c + reshape_c4.lo: $(srcdir)/generated/reshape_c4.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT reshape_c4.lo -MD -MP -MF $(DEPDIR)/reshape_c4.Tpo -c -o reshape_c4.lo `test -f '$(srcdir)/generated/reshape_c4.c' || echo '$(srcdir)/'`$(srcdir)/generated/reshape_c4.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/reshape_c4.Tpo $(DEPDIR)/reshape_c4.Plo @@ -4491,6 +4904,13 @@ reshape_c16.lo: $(srcdir)/generated/reshape_c16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o reshape_c16.lo `test -f '$(srcdir)/generated/reshape_c16.c' || echo '$(srcdir)/'`$(srcdir)/generated/reshape_c16.c +reshape_c17.lo: $(srcdir)/generated/reshape_c17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT reshape_c17.lo -MD -MP -MF $(DEPDIR)/reshape_c17.Tpo -c -o reshape_c17.lo `test -f '$(srcdir)/generated/reshape_c17.c' || echo '$(srcdir)/'`$(srcdir)/generated/reshape_c17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/reshape_c17.Tpo $(DEPDIR)/reshape_c17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/reshape_c17.c' object='reshape_c17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o reshape_c17.lo `test -f '$(srcdir)/generated/reshape_c17.c' || echo '$(srcdir)/'`$(srcdir)/generated/reshape_c17.c + in_pack_i1.lo: $(srcdir)/generated/in_pack_i1.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT in_pack_i1.lo -MD -MP -MF $(DEPDIR)/in_pack_i1.Tpo -c -o in_pack_i1.lo `test -f '$(srcdir)/generated/in_pack_i1.c' || echo '$(srcdir)/'`$(srcdir)/generated/in_pack_i1.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/in_pack_i1.Tpo $(DEPDIR)/in_pack_i1.Plo @@ -4554,6 +4974,13 @@ in_pack_r16.lo: $(srcdir)/generated/in_pack_r16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o in_pack_r16.lo `test -f '$(srcdir)/generated/in_pack_r16.c' || echo '$(srcdir)/'`$(srcdir)/generated/in_pack_r16.c +in_pack_r17.lo: $(srcdir)/generated/in_pack_r17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT in_pack_r17.lo -MD -MP -MF $(DEPDIR)/in_pack_r17.Tpo -c -o in_pack_r17.lo `test -f '$(srcdir)/generated/in_pack_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/in_pack_r17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/in_pack_r17.Tpo $(DEPDIR)/in_pack_r17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/in_pack_r17.c' object='in_pack_r17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o in_pack_r17.lo `test -f '$(srcdir)/generated/in_pack_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/in_pack_r17.c + in_pack_c4.lo: $(srcdir)/generated/in_pack_c4.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT in_pack_c4.lo -MD -MP -MF $(DEPDIR)/in_pack_c4.Tpo -c -o in_pack_c4.lo `test -f '$(srcdir)/generated/in_pack_c4.c' || echo '$(srcdir)/'`$(srcdir)/generated/in_pack_c4.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/in_pack_c4.Tpo $(DEPDIR)/in_pack_c4.Plo @@ -4582,6 +5009,13 @@ in_pack_c16.lo: $(srcdir)/generated/in_pack_c16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o in_pack_c16.lo `test -f '$(srcdir)/generated/in_pack_c16.c' || echo '$(srcdir)/'`$(srcdir)/generated/in_pack_c16.c +in_pack_c17.lo: $(srcdir)/generated/in_pack_c17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT in_pack_c17.lo -MD -MP -MF $(DEPDIR)/in_pack_c17.Tpo -c -o in_pack_c17.lo `test -f '$(srcdir)/generated/in_pack_c17.c' || echo '$(srcdir)/'`$(srcdir)/generated/in_pack_c17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/in_pack_c17.Tpo $(DEPDIR)/in_pack_c17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/in_pack_c17.c' object='in_pack_c17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o in_pack_c17.lo `test -f '$(srcdir)/generated/in_pack_c17.c' || echo '$(srcdir)/'`$(srcdir)/generated/in_pack_c17.c + in_unpack_i1.lo: $(srcdir)/generated/in_unpack_i1.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT in_unpack_i1.lo -MD -MP -MF $(DEPDIR)/in_unpack_i1.Tpo -c -o in_unpack_i1.lo `test -f '$(srcdir)/generated/in_unpack_i1.c' || echo '$(srcdir)/'`$(srcdir)/generated/in_unpack_i1.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/in_unpack_i1.Tpo $(DEPDIR)/in_unpack_i1.Plo @@ -4645,6 +5079,13 @@ in_unpack_r16.lo: $(srcdir)/generated/in_unpack_r16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o in_unpack_r16.lo `test -f '$(srcdir)/generated/in_unpack_r16.c' || echo '$(srcdir)/'`$(srcdir)/generated/in_unpack_r16.c +in_unpack_r17.lo: $(srcdir)/generated/in_unpack_r17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT in_unpack_r17.lo -MD -MP -MF $(DEPDIR)/in_unpack_r17.Tpo -c -o in_unpack_r17.lo `test -f '$(srcdir)/generated/in_unpack_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/in_unpack_r17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/in_unpack_r17.Tpo $(DEPDIR)/in_unpack_r17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/in_unpack_r17.c' object='in_unpack_r17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o in_unpack_r17.lo `test -f '$(srcdir)/generated/in_unpack_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/in_unpack_r17.c + in_unpack_c4.lo: $(srcdir)/generated/in_unpack_c4.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT in_unpack_c4.lo -MD -MP -MF $(DEPDIR)/in_unpack_c4.Tpo -c -o in_unpack_c4.lo `test -f '$(srcdir)/generated/in_unpack_c4.c' || echo '$(srcdir)/'`$(srcdir)/generated/in_unpack_c4.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/in_unpack_c4.Tpo $(DEPDIR)/in_unpack_c4.Plo @@ -4673,6 +5114,13 @@ in_unpack_c16.lo: $(srcdir)/generated/in_unpack_c16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o in_unpack_c16.lo `test -f '$(srcdir)/generated/in_unpack_c16.c' || echo '$(srcdir)/'`$(srcdir)/generated/in_unpack_c16.c +in_unpack_c17.lo: $(srcdir)/generated/in_unpack_c17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT in_unpack_c17.lo -MD -MP -MF $(DEPDIR)/in_unpack_c17.Tpo -c -o in_unpack_c17.lo `test -f '$(srcdir)/generated/in_unpack_c17.c' || echo '$(srcdir)/'`$(srcdir)/generated/in_unpack_c17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/in_unpack_c17.Tpo $(DEPDIR)/in_unpack_c17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/in_unpack_c17.c' object='in_unpack_c17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o in_unpack_c17.lo `test -f '$(srcdir)/generated/in_unpack_c17.c' || echo '$(srcdir)/'`$(srcdir)/generated/in_unpack_c17.c + pow_i4_i4.lo: $(srcdir)/generated/pow_i4_i4.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT pow_i4_i4.lo -MD -MP -MF $(DEPDIR)/pow_i4_i4.Tpo -c -o pow_i4_i4.lo `test -f '$(srcdir)/generated/pow_i4_i4.c' || echo '$(srcdir)/'`$(srcdir)/generated/pow_i4_i4.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pow_i4_i4.Tpo $(DEPDIR)/pow_i4_i4.Plo @@ -4701,6 +5149,13 @@ pow_r16_i4.lo: $(srcdir)/generated/pow_r16_i4.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o pow_r16_i4.lo `test -f '$(srcdir)/generated/pow_r16_i4.c' || echo '$(srcdir)/'`$(srcdir)/generated/pow_r16_i4.c +pow_r17_i4.lo: $(srcdir)/generated/pow_r17_i4.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT pow_r17_i4.lo -MD -MP -MF $(DEPDIR)/pow_r17_i4.Tpo -c -o pow_r17_i4.lo `test -f '$(srcdir)/generated/pow_r17_i4.c' || echo '$(srcdir)/'`$(srcdir)/generated/pow_r17_i4.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pow_r17_i4.Tpo $(DEPDIR)/pow_r17_i4.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/pow_r17_i4.c' object='pow_r17_i4.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o pow_r17_i4.lo `test -f '$(srcdir)/generated/pow_r17_i4.c' || echo '$(srcdir)/'`$(srcdir)/generated/pow_r17_i4.c + pow_c4_i4.lo: $(srcdir)/generated/pow_c4_i4.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT pow_c4_i4.lo -MD -MP -MF $(DEPDIR)/pow_c4_i4.Tpo -c -o pow_c4_i4.lo `test -f '$(srcdir)/generated/pow_c4_i4.c' || echo '$(srcdir)/'`$(srcdir)/generated/pow_c4_i4.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pow_c4_i4.Tpo $(DEPDIR)/pow_c4_i4.Plo @@ -4729,6 +5184,13 @@ pow_c16_i4.lo: $(srcdir)/generated/pow_c16_i4.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o pow_c16_i4.lo `test -f '$(srcdir)/generated/pow_c16_i4.c' || echo '$(srcdir)/'`$(srcdir)/generated/pow_c16_i4.c +pow_c17_i4.lo: $(srcdir)/generated/pow_c17_i4.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT pow_c17_i4.lo -MD -MP -MF $(DEPDIR)/pow_c17_i4.Tpo -c -o pow_c17_i4.lo `test -f '$(srcdir)/generated/pow_c17_i4.c' || echo '$(srcdir)/'`$(srcdir)/generated/pow_c17_i4.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pow_c17_i4.Tpo $(DEPDIR)/pow_c17_i4.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/pow_c17_i4.c' object='pow_c17_i4.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o pow_c17_i4.lo `test -f '$(srcdir)/generated/pow_c17_i4.c' || echo '$(srcdir)/'`$(srcdir)/generated/pow_c17_i4.c + pow_i4_i8.lo: $(srcdir)/generated/pow_i4_i8.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT pow_i4_i8.lo -MD -MP -MF $(DEPDIR)/pow_i4_i8.Tpo -c -o pow_i4_i8.lo `test -f '$(srcdir)/generated/pow_i4_i8.c' || echo '$(srcdir)/'`$(srcdir)/generated/pow_i4_i8.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pow_i4_i8.Tpo $(DEPDIR)/pow_i4_i8.Plo @@ -4778,6 +5240,13 @@ pow_r16_i8.lo: $(srcdir)/generated/pow_r16_i8.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o pow_r16_i8.lo `test -f '$(srcdir)/generated/pow_r16_i8.c' || echo '$(srcdir)/'`$(srcdir)/generated/pow_r16_i8.c +pow_r17_i8.lo: $(srcdir)/generated/pow_r17_i8.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT pow_r17_i8.lo -MD -MP -MF $(DEPDIR)/pow_r17_i8.Tpo -c -o pow_r17_i8.lo `test -f '$(srcdir)/generated/pow_r17_i8.c' || echo '$(srcdir)/'`$(srcdir)/generated/pow_r17_i8.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pow_r17_i8.Tpo $(DEPDIR)/pow_r17_i8.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/pow_r17_i8.c' object='pow_r17_i8.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o pow_r17_i8.lo `test -f '$(srcdir)/generated/pow_r17_i8.c' || echo '$(srcdir)/'`$(srcdir)/generated/pow_r17_i8.c + pow_c4_i8.lo: $(srcdir)/generated/pow_c4_i8.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT pow_c4_i8.lo -MD -MP -MF $(DEPDIR)/pow_c4_i8.Tpo -c -o pow_c4_i8.lo `test -f '$(srcdir)/generated/pow_c4_i8.c' || echo '$(srcdir)/'`$(srcdir)/generated/pow_c4_i8.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pow_c4_i8.Tpo $(DEPDIR)/pow_c4_i8.Plo @@ -4806,6 +5275,13 @@ pow_c16_i8.lo: $(srcdir)/generated/pow_c16_i8.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o pow_c16_i8.lo `test -f '$(srcdir)/generated/pow_c16_i8.c' || echo '$(srcdir)/'`$(srcdir)/generated/pow_c16_i8.c +pow_c17_i8.lo: $(srcdir)/generated/pow_c17_i8.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT pow_c17_i8.lo -MD -MP -MF $(DEPDIR)/pow_c17_i8.Tpo -c -o pow_c17_i8.lo `test -f '$(srcdir)/generated/pow_c17_i8.c' || echo '$(srcdir)/'`$(srcdir)/generated/pow_c17_i8.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pow_c17_i8.Tpo $(DEPDIR)/pow_c17_i8.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/pow_c17_i8.c' object='pow_c17_i8.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o pow_c17_i8.lo `test -f '$(srcdir)/generated/pow_c17_i8.c' || echo '$(srcdir)/'`$(srcdir)/generated/pow_c17_i8.c + pow_i4_i16.lo: $(srcdir)/generated/pow_i4_i16.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT pow_i4_i16.lo -MD -MP -MF $(DEPDIR)/pow_i4_i16.Tpo -c -o pow_i4_i16.lo `test -f '$(srcdir)/generated/pow_i4_i16.c' || echo '$(srcdir)/'`$(srcdir)/generated/pow_i4_i16.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pow_i4_i16.Tpo $(DEPDIR)/pow_i4_i16.Plo @@ -4855,6 +5331,13 @@ pow_r16_i16.lo: $(srcdir)/generated/pow_r16_i16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o pow_r16_i16.lo `test -f '$(srcdir)/generated/pow_r16_i16.c' || echo '$(srcdir)/'`$(srcdir)/generated/pow_r16_i16.c +pow_r17_i16.lo: $(srcdir)/generated/pow_r17_i16.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT pow_r17_i16.lo -MD -MP -MF $(DEPDIR)/pow_r17_i16.Tpo -c -o pow_r17_i16.lo `test -f '$(srcdir)/generated/pow_r17_i16.c' || echo '$(srcdir)/'`$(srcdir)/generated/pow_r17_i16.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pow_r17_i16.Tpo $(DEPDIR)/pow_r17_i16.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/pow_r17_i16.c' object='pow_r17_i16.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o pow_r17_i16.lo `test -f '$(srcdir)/generated/pow_r17_i16.c' || echo '$(srcdir)/'`$(srcdir)/generated/pow_r17_i16.c + pow_c4_i16.lo: $(srcdir)/generated/pow_c4_i16.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT pow_c4_i16.lo -MD -MP -MF $(DEPDIR)/pow_c4_i16.Tpo -c -o pow_c4_i16.lo `test -f '$(srcdir)/generated/pow_c4_i16.c' || echo '$(srcdir)/'`$(srcdir)/generated/pow_c4_i16.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pow_c4_i16.Tpo $(DEPDIR)/pow_c4_i16.Plo @@ -4883,6 +5366,13 @@ pow_c16_i16.lo: $(srcdir)/generated/pow_c16_i16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o pow_c16_i16.lo `test -f '$(srcdir)/generated/pow_c16_i16.c' || echo '$(srcdir)/'`$(srcdir)/generated/pow_c16_i16.c +pow_c17_i16.lo: $(srcdir)/generated/pow_c17_i16.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT pow_c17_i16.lo -MD -MP -MF $(DEPDIR)/pow_c17_i16.Tpo -c -o pow_c17_i16.lo `test -f '$(srcdir)/generated/pow_c17_i16.c' || echo '$(srcdir)/'`$(srcdir)/generated/pow_c17_i16.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pow_c17_i16.Tpo $(DEPDIR)/pow_c17_i16.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/pow_c17_i16.c' object='pow_c17_i16.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o pow_c17_i16.lo `test -f '$(srcdir)/generated/pow_c17_i16.c' || echo '$(srcdir)/'`$(srcdir)/generated/pow_c17_i16.c + pack_i1.lo: $(srcdir)/generated/pack_i1.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT pack_i1.lo -MD -MP -MF $(DEPDIR)/pack_i1.Tpo -c -o pack_i1.lo `test -f '$(srcdir)/generated/pack_i1.c' || echo '$(srcdir)/'`$(srcdir)/generated/pack_i1.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pack_i1.Tpo $(DEPDIR)/pack_i1.Plo @@ -4946,6 +5436,13 @@ pack_r16.lo: $(srcdir)/generated/pack_r16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o pack_r16.lo `test -f '$(srcdir)/generated/pack_r16.c' || echo '$(srcdir)/'`$(srcdir)/generated/pack_r16.c +pack_r17.lo: $(srcdir)/generated/pack_r17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT pack_r17.lo -MD -MP -MF $(DEPDIR)/pack_r17.Tpo -c -o pack_r17.lo `test -f '$(srcdir)/generated/pack_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/pack_r17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pack_r17.Tpo $(DEPDIR)/pack_r17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/pack_r17.c' object='pack_r17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o pack_r17.lo `test -f '$(srcdir)/generated/pack_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/pack_r17.c + pack_c4.lo: $(srcdir)/generated/pack_c4.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT pack_c4.lo -MD -MP -MF $(DEPDIR)/pack_c4.Tpo -c -o pack_c4.lo `test -f '$(srcdir)/generated/pack_c4.c' || echo '$(srcdir)/'`$(srcdir)/generated/pack_c4.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pack_c4.Tpo $(DEPDIR)/pack_c4.Plo @@ -4974,6 +5471,13 @@ pack_c16.lo: $(srcdir)/generated/pack_c16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o pack_c16.lo `test -f '$(srcdir)/generated/pack_c16.c' || echo '$(srcdir)/'`$(srcdir)/generated/pack_c16.c +pack_c17.lo: $(srcdir)/generated/pack_c17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT pack_c17.lo -MD -MP -MF $(DEPDIR)/pack_c17.Tpo -c -o pack_c17.lo `test -f '$(srcdir)/generated/pack_c17.c' || echo '$(srcdir)/'`$(srcdir)/generated/pack_c17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pack_c17.Tpo $(DEPDIR)/pack_c17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/pack_c17.c' object='pack_c17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o pack_c17.lo `test -f '$(srcdir)/generated/pack_c17.c' || echo '$(srcdir)/'`$(srcdir)/generated/pack_c17.c + unpack_i1.lo: $(srcdir)/generated/unpack_i1.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT unpack_i1.lo -MD -MP -MF $(DEPDIR)/unpack_i1.Tpo -c -o unpack_i1.lo `test -f '$(srcdir)/generated/unpack_i1.c' || echo '$(srcdir)/'`$(srcdir)/generated/unpack_i1.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/unpack_i1.Tpo $(DEPDIR)/unpack_i1.Plo @@ -5037,6 +5541,13 @@ unpack_r16.lo: $(srcdir)/generated/unpack_r16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o unpack_r16.lo `test -f '$(srcdir)/generated/unpack_r16.c' || echo '$(srcdir)/'`$(srcdir)/generated/unpack_r16.c +unpack_r17.lo: $(srcdir)/generated/unpack_r17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT unpack_r17.lo -MD -MP -MF $(DEPDIR)/unpack_r17.Tpo -c -o unpack_r17.lo `test -f '$(srcdir)/generated/unpack_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/unpack_r17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/unpack_r17.Tpo $(DEPDIR)/unpack_r17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/unpack_r17.c' object='unpack_r17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o unpack_r17.lo `test -f '$(srcdir)/generated/unpack_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/unpack_r17.c + unpack_c4.lo: $(srcdir)/generated/unpack_c4.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT unpack_c4.lo -MD -MP -MF $(DEPDIR)/unpack_c4.Tpo -c -o unpack_c4.lo `test -f '$(srcdir)/generated/unpack_c4.c' || echo '$(srcdir)/'`$(srcdir)/generated/unpack_c4.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/unpack_c4.Tpo $(DEPDIR)/unpack_c4.Plo @@ -5065,6 +5576,13 @@ unpack_c16.lo: $(srcdir)/generated/unpack_c16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o unpack_c16.lo `test -f '$(srcdir)/generated/unpack_c16.c' || echo '$(srcdir)/'`$(srcdir)/generated/unpack_c16.c +unpack_c17.lo: $(srcdir)/generated/unpack_c17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT unpack_c17.lo -MD -MP -MF $(DEPDIR)/unpack_c17.Tpo -c -o unpack_c17.lo `test -f '$(srcdir)/generated/unpack_c17.c' || echo '$(srcdir)/'`$(srcdir)/generated/unpack_c17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/unpack_c17.Tpo $(DEPDIR)/unpack_c17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/unpack_c17.c' object='unpack_c17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o unpack_c17.lo `test -f '$(srcdir)/generated/unpack_c17.c' || echo '$(srcdir)/'`$(srcdir)/generated/unpack_c17.c + matmulavx128_i1.lo: $(srcdir)/generated/matmulavx128_i1.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT matmulavx128_i1.lo -MD -MP -MF $(DEPDIR)/matmulavx128_i1.Tpo -c -o matmulavx128_i1.lo `test -f '$(srcdir)/generated/matmulavx128_i1.c' || echo '$(srcdir)/'`$(srcdir)/generated/matmulavx128_i1.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/matmulavx128_i1.Tpo $(DEPDIR)/matmulavx128_i1.Plo @@ -5128,6 +5646,13 @@ matmulavx128_r16.lo: $(srcdir)/generated/matmulavx128_r16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o matmulavx128_r16.lo `test -f '$(srcdir)/generated/matmulavx128_r16.c' || echo '$(srcdir)/'`$(srcdir)/generated/matmulavx128_r16.c +matmulavx128_r17.lo: $(srcdir)/generated/matmulavx128_r17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT matmulavx128_r17.lo -MD -MP -MF $(DEPDIR)/matmulavx128_r17.Tpo -c -o matmulavx128_r17.lo `test -f '$(srcdir)/generated/matmulavx128_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/matmulavx128_r17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/matmulavx128_r17.Tpo $(DEPDIR)/matmulavx128_r17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/matmulavx128_r17.c' object='matmulavx128_r17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o matmulavx128_r17.lo `test -f '$(srcdir)/generated/matmulavx128_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/matmulavx128_r17.c + matmulavx128_c4.lo: $(srcdir)/generated/matmulavx128_c4.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT matmulavx128_c4.lo -MD -MP -MF $(DEPDIR)/matmulavx128_c4.Tpo -c -o matmulavx128_c4.lo `test -f '$(srcdir)/generated/matmulavx128_c4.c' || echo '$(srcdir)/'`$(srcdir)/generated/matmulavx128_c4.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/matmulavx128_c4.Tpo $(DEPDIR)/matmulavx128_c4.Plo @@ -5156,6 +5681,13 @@ matmulavx128_c16.lo: $(srcdir)/generated/matmulavx128_c16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o matmulavx128_c16.lo `test -f '$(srcdir)/generated/matmulavx128_c16.c' || echo '$(srcdir)/'`$(srcdir)/generated/matmulavx128_c16.c +matmulavx128_c17.lo: $(srcdir)/generated/matmulavx128_c17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT matmulavx128_c17.lo -MD -MP -MF $(DEPDIR)/matmulavx128_c17.Tpo -c -o matmulavx128_c17.lo `test -f '$(srcdir)/generated/matmulavx128_c17.c' || echo '$(srcdir)/'`$(srcdir)/generated/matmulavx128_c17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/matmulavx128_c17.Tpo $(DEPDIR)/matmulavx128_c17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/matmulavx128_c17.c' object='matmulavx128_c17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o matmulavx128_c17.lo `test -f '$(srcdir)/generated/matmulavx128_c17.c' || echo '$(srcdir)/'`$(srcdir)/generated/matmulavx128_c17.c + spread_i1.lo: $(srcdir)/generated/spread_i1.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT spread_i1.lo -MD -MP -MF $(DEPDIR)/spread_i1.Tpo -c -o spread_i1.lo `test -f '$(srcdir)/generated/spread_i1.c' || echo '$(srcdir)/'`$(srcdir)/generated/spread_i1.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/spread_i1.Tpo $(DEPDIR)/spread_i1.Plo @@ -5219,6 +5751,13 @@ spread_r16.lo: $(srcdir)/generated/spread_r16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o spread_r16.lo `test -f '$(srcdir)/generated/spread_r16.c' || echo '$(srcdir)/'`$(srcdir)/generated/spread_r16.c +spread_r17.lo: $(srcdir)/generated/spread_r17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT spread_r17.lo -MD -MP -MF $(DEPDIR)/spread_r17.Tpo -c -o spread_r17.lo `test -f '$(srcdir)/generated/spread_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/spread_r17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/spread_r17.Tpo $(DEPDIR)/spread_r17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/spread_r17.c' object='spread_r17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o spread_r17.lo `test -f '$(srcdir)/generated/spread_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/spread_r17.c + spread_c4.lo: $(srcdir)/generated/spread_c4.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT spread_c4.lo -MD -MP -MF $(DEPDIR)/spread_c4.Tpo -c -o spread_c4.lo `test -f '$(srcdir)/generated/spread_c4.c' || echo '$(srcdir)/'`$(srcdir)/generated/spread_c4.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/spread_c4.Tpo $(DEPDIR)/spread_c4.Plo @@ -5247,6 +5786,13 @@ spread_c16.lo: $(srcdir)/generated/spread_c16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o spread_c16.lo `test -f '$(srcdir)/generated/spread_c16.c' || echo '$(srcdir)/'`$(srcdir)/generated/spread_c16.c +spread_c17.lo: $(srcdir)/generated/spread_c17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT spread_c17.lo -MD -MP -MF $(DEPDIR)/spread_c17.Tpo -c -o spread_c17.lo `test -f '$(srcdir)/generated/spread_c17.c' || echo '$(srcdir)/'`$(srcdir)/generated/spread_c17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/spread_c17.Tpo $(DEPDIR)/spread_c17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/spread_c17.c' object='spread_c17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o spread_c17.lo `test -f '$(srcdir)/generated/spread_c17.c' || echo '$(srcdir)/'`$(srcdir)/generated/spread_c17.c + cshift0_i1.lo: $(srcdir)/generated/cshift0_i1.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT cshift0_i1.lo -MD -MP -MF $(DEPDIR)/cshift0_i1.Tpo -c -o cshift0_i1.lo `test -f '$(srcdir)/generated/cshift0_i1.c' || echo '$(srcdir)/'`$(srcdir)/generated/cshift0_i1.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/cshift0_i1.Tpo $(DEPDIR)/cshift0_i1.Plo @@ -5310,6 +5856,13 @@ cshift0_r16.lo: $(srcdir)/generated/cshift0_r16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o cshift0_r16.lo `test -f '$(srcdir)/generated/cshift0_r16.c' || echo '$(srcdir)/'`$(srcdir)/generated/cshift0_r16.c +cshift0_r17.lo: $(srcdir)/generated/cshift0_r17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT cshift0_r17.lo -MD -MP -MF $(DEPDIR)/cshift0_r17.Tpo -c -o cshift0_r17.lo `test -f '$(srcdir)/generated/cshift0_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/cshift0_r17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/cshift0_r17.Tpo $(DEPDIR)/cshift0_r17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/cshift0_r17.c' object='cshift0_r17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o cshift0_r17.lo `test -f '$(srcdir)/generated/cshift0_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/cshift0_r17.c + cshift0_c4.lo: $(srcdir)/generated/cshift0_c4.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT cshift0_c4.lo -MD -MP -MF $(DEPDIR)/cshift0_c4.Tpo -c -o cshift0_c4.lo `test -f '$(srcdir)/generated/cshift0_c4.c' || echo '$(srcdir)/'`$(srcdir)/generated/cshift0_c4.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/cshift0_c4.Tpo $(DEPDIR)/cshift0_c4.Plo @@ -5338,6 +5891,13 @@ cshift0_c16.lo: $(srcdir)/generated/cshift0_c16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o cshift0_c16.lo `test -f '$(srcdir)/generated/cshift0_c16.c' || echo '$(srcdir)/'`$(srcdir)/generated/cshift0_c16.c +cshift0_c17.lo: $(srcdir)/generated/cshift0_c17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT cshift0_c17.lo -MD -MP -MF $(DEPDIR)/cshift0_c17.Tpo -c -o cshift0_c17.lo `test -f '$(srcdir)/generated/cshift0_c17.c' || echo '$(srcdir)/'`$(srcdir)/generated/cshift0_c17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/cshift0_c17.Tpo $(DEPDIR)/cshift0_c17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/cshift0_c17.c' object='cshift0_c17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o cshift0_c17.lo `test -f '$(srcdir)/generated/cshift0_c17.c' || echo '$(srcdir)/'`$(srcdir)/generated/cshift0_c17.c + cshift1_4_i1.lo: $(srcdir)/generated/cshift1_4_i1.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT cshift1_4_i1.lo -MD -MP -MF $(DEPDIR)/cshift1_4_i1.Tpo -c -o cshift1_4_i1.lo `test -f '$(srcdir)/generated/cshift1_4_i1.c' || echo '$(srcdir)/'`$(srcdir)/generated/cshift1_4_i1.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/cshift1_4_i1.Tpo $(DEPDIR)/cshift1_4_i1.Plo @@ -5401,6 +5961,13 @@ cshift1_4_r16.lo: $(srcdir)/generated/cshift1_4_r16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o cshift1_4_r16.lo `test -f '$(srcdir)/generated/cshift1_4_r16.c' || echo '$(srcdir)/'`$(srcdir)/generated/cshift1_4_r16.c +cshift1_4_r17.lo: $(srcdir)/generated/cshift1_4_r17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT cshift1_4_r17.lo -MD -MP -MF $(DEPDIR)/cshift1_4_r17.Tpo -c -o cshift1_4_r17.lo `test -f '$(srcdir)/generated/cshift1_4_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/cshift1_4_r17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/cshift1_4_r17.Tpo $(DEPDIR)/cshift1_4_r17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/cshift1_4_r17.c' object='cshift1_4_r17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o cshift1_4_r17.lo `test -f '$(srcdir)/generated/cshift1_4_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/cshift1_4_r17.c + cshift1_4_c4.lo: $(srcdir)/generated/cshift1_4_c4.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT cshift1_4_c4.lo -MD -MP -MF $(DEPDIR)/cshift1_4_c4.Tpo -c -o cshift1_4_c4.lo `test -f '$(srcdir)/generated/cshift1_4_c4.c' || echo '$(srcdir)/'`$(srcdir)/generated/cshift1_4_c4.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/cshift1_4_c4.Tpo $(DEPDIR)/cshift1_4_c4.Plo @@ -5429,6 +5996,13 @@ cshift1_4_c16.lo: $(srcdir)/generated/cshift1_4_c16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o cshift1_4_c16.lo `test -f '$(srcdir)/generated/cshift1_4_c16.c' || echo '$(srcdir)/'`$(srcdir)/generated/cshift1_4_c16.c +cshift1_4_c17.lo: $(srcdir)/generated/cshift1_4_c17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT cshift1_4_c17.lo -MD -MP -MF $(DEPDIR)/cshift1_4_c17.Tpo -c -o cshift1_4_c17.lo `test -f '$(srcdir)/generated/cshift1_4_c17.c' || echo '$(srcdir)/'`$(srcdir)/generated/cshift1_4_c17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/cshift1_4_c17.Tpo $(DEPDIR)/cshift1_4_c17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/cshift1_4_c17.c' object='cshift1_4_c17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o cshift1_4_c17.lo `test -f '$(srcdir)/generated/cshift1_4_c17.c' || echo '$(srcdir)/'`$(srcdir)/generated/cshift1_4_c17.c + cshift1_8_i1.lo: $(srcdir)/generated/cshift1_8_i1.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT cshift1_8_i1.lo -MD -MP -MF $(DEPDIR)/cshift1_8_i1.Tpo -c -o cshift1_8_i1.lo `test -f '$(srcdir)/generated/cshift1_8_i1.c' || echo '$(srcdir)/'`$(srcdir)/generated/cshift1_8_i1.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/cshift1_8_i1.Tpo $(DEPDIR)/cshift1_8_i1.Plo @@ -5492,6 +6066,13 @@ cshift1_8_r16.lo: $(srcdir)/generated/cshift1_8_r16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o cshift1_8_r16.lo `test -f '$(srcdir)/generated/cshift1_8_r16.c' || echo '$(srcdir)/'`$(srcdir)/generated/cshift1_8_r16.c +cshift1_8_r17.lo: $(srcdir)/generated/cshift1_8_r17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT cshift1_8_r17.lo -MD -MP -MF $(DEPDIR)/cshift1_8_r17.Tpo -c -o cshift1_8_r17.lo `test -f '$(srcdir)/generated/cshift1_8_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/cshift1_8_r17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/cshift1_8_r17.Tpo $(DEPDIR)/cshift1_8_r17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/cshift1_8_r17.c' object='cshift1_8_r17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o cshift1_8_r17.lo `test -f '$(srcdir)/generated/cshift1_8_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/cshift1_8_r17.c + cshift1_8_c4.lo: $(srcdir)/generated/cshift1_8_c4.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT cshift1_8_c4.lo -MD -MP -MF $(DEPDIR)/cshift1_8_c4.Tpo -c -o cshift1_8_c4.lo `test -f '$(srcdir)/generated/cshift1_8_c4.c' || echo '$(srcdir)/'`$(srcdir)/generated/cshift1_8_c4.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/cshift1_8_c4.Tpo $(DEPDIR)/cshift1_8_c4.Plo @@ -5520,6 +6101,13 @@ cshift1_8_c16.lo: $(srcdir)/generated/cshift1_8_c16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o cshift1_8_c16.lo `test -f '$(srcdir)/generated/cshift1_8_c16.c' || echo '$(srcdir)/'`$(srcdir)/generated/cshift1_8_c16.c +cshift1_8_c17.lo: $(srcdir)/generated/cshift1_8_c17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT cshift1_8_c17.lo -MD -MP -MF $(DEPDIR)/cshift1_8_c17.Tpo -c -o cshift1_8_c17.lo `test -f '$(srcdir)/generated/cshift1_8_c17.c' || echo '$(srcdir)/'`$(srcdir)/generated/cshift1_8_c17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/cshift1_8_c17.Tpo $(DEPDIR)/cshift1_8_c17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/cshift1_8_c17.c' object='cshift1_8_c17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o cshift1_8_c17.lo `test -f '$(srcdir)/generated/cshift1_8_c17.c' || echo '$(srcdir)/'`$(srcdir)/generated/cshift1_8_c17.c + cshift1_16_i1.lo: $(srcdir)/generated/cshift1_16_i1.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT cshift1_16_i1.lo -MD -MP -MF $(DEPDIR)/cshift1_16_i1.Tpo -c -o cshift1_16_i1.lo `test -f '$(srcdir)/generated/cshift1_16_i1.c' || echo '$(srcdir)/'`$(srcdir)/generated/cshift1_16_i1.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/cshift1_16_i1.Tpo $(DEPDIR)/cshift1_16_i1.Plo @@ -5583,6 +6171,13 @@ cshift1_16_r16.lo: $(srcdir)/generated/cshift1_16_r16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o cshift1_16_r16.lo `test -f '$(srcdir)/generated/cshift1_16_r16.c' || echo '$(srcdir)/'`$(srcdir)/generated/cshift1_16_r16.c +cshift1_16_r17.lo: $(srcdir)/generated/cshift1_16_r17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT cshift1_16_r17.lo -MD -MP -MF $(DEPDIR)/cshift1_16_r17.Tpo -c -o cshift1_16_r17.lo `test -f '$(srcdir)/generated/cshift1_16_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/cshift1_16_r17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/cshift1_16_r17.Tpo $(DEPDIR)/cshift1_16_r17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/cshift1_16_r17.c' object='cshift1_16_r17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o cshift1_16_r17.lo `test -f '$(srcdir)/generated/cshift1_16_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/cshift1_16_r17.c + cshift1_16_c4.lo: $(srcdir)/generated/cshift1_16_c4.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT cshift1_16_c4.lo -MD -MP -MF $(DEPDIR)/cshift1_16_c4.Tpo -c -o cshift1_16_c4.lo `test -f '$(srcdir)/generated/cshift1_16_c4.c' || echo '$(srcdir)/'`$(srcdir)/generated/cshift1_16_c4.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/cshift1_16_c4.Tpo $(DEPDIR)/cshift1_16_c4.Plo @@ -5611,6 +6206,13 @@ cshift1_16_c16.lo: $(srcdir)/generated/cshift1_16_c16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o cshift1_16_c16.lo `test -f '$(srcdir)/generated/cshift1_16_c16.c' || echo '$(srcdir)/'`$(srcdir)/generated/cshift1_16_c16.c +cshift1_16_c17.lo: $(srcdir)/generated/cshift1_16_c17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT cshift1_16_c17.lo -MD -MP -MF $(DEPDIR)/cshift1_16_c17.Tpo -c -o cshift1_16_c17.lo `test -f '$(srcdir)/generated/cshift1_16_c17.c' || echo '$(srcdir)/'`$(srcdir)/generated/cshift1_16_c17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/cshift1_16_c17.Tpo $(DEPDIR)/cshift1_16_c17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/cshift1_16_c17.c' object='cshift1_16_c17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o cshift1_16_c17.lo `test -f '$(srcdir)/generated/cshift1_16_c17.c' || echo '$(srcdir)/'`$(srcdir)/generated/cshift1_16_c17.c + maxloc0_4_s1.lo: $(srcdir)/generated/maxloc0_4_s1.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT maxloc0_4_s1.lo -MD -MP -MF $(DEPDIR)/maxloc0_4_s1.Tpo -c -o maxloc0_4_s1.lo `test -f '$(srcdir)/generated/maxloc0_4_s1.c' || echo '$(srcdir)/'`$(srcdir)/generated/maxloc0_4_s1.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/maxloc0_4_s1.Tpo $(DEPDIR)/maxloc0_4_s1.Plo @@ -5982,6 +6584,13 @@ findloc0_r16.lo: $(srcdir)/generated/findloc0_r16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o findloc0_r16.lo `test -f '$(srcdir)/generated/findloc0_r16.c' || echo '$(srcdir)/'`$(srcdir)/generated/findloc0_r16.c +findloc0_r17.lo: $(srcdir)/generated/findloc0_r17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT findloc0_r17.lo -MD -MP -MF $(DEPDIR)/findloc0_r17.Tpo -c -o findloc0_r17.lo `test -f '$(srcdir)/generated/findloc0_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/findloc0_r17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/findloc0_r17.Tpo $(DEPDIR)/findloc0_r17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/findloc0_r17.c' object='findloc0_r17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o findloc0_r17.lo `test -f '$(srcdir)/generated/findloc0_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/findloc0_r17.c + findloc0_c4.lo: $(srcdir)/generated/findloc0_c4.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT findloc0_c4.lo -MD -MP -MF $(DEPDIR)/findloc0_c4.Tpo -c -o findloc0_c4.lo `test -f '$(srcdir)/generated/findloc0_c4.c' || echo '$(srcdir)/'`$(srcdir)/generated/findloc0_c4.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/findloc0_c4.Tpo $(DEPDIR)/findloc0_c4.Plo @@ -6010,6 +6619,13 @@ findloc0_c16.lo: $(srcdir)/generated/findloc0_c16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o findloc0_c16.lo `test -f '$(srcdir)/generated/findloc0_c16.c' || echo '$(srcdir)/'`$(srcdir)/generated/findloc0_c16.c +findloc0_c17.lo: $(srcdir)/generated/findloc0_c17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT findloc0_c17.lo -MD -MP -MF $(DEPDIR)/findloc0_c17.Tpo -c -o findloc0_c17.lo `test -f '$(srcdir)/generated/findloc0_c17.c' || echo '$(srcdir)/'`$(srcdir)/generated/findloc0_c17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/findloc0_c17.Tpo $(DEPDIR)/findloc0_c17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/findloc0_c17.c' object='findloc0_c17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o findloc0_c17.lo `test -f '$(srcdir)/generated/findloc0_c17.c' || echo '$(srcdir)/'`$(srcdir)/generated/findloc0_c17.c + findloc0_s1.lo: $(srcdir)/generated/findloc0_s1.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT findloc0_s1.lo -MD -MP -MF $(DEPDIR)/findloc0_s1.Tpo -c -o findloc0_s1.lo `test -f '$(srcdir)/generated/findloc0_s1.c' || echo '$(srcdir)/'`$(srcdir)/generated/findloc0_s1.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/findloc0_s1.Tpo $(DEPDIR)/findloc0_s1.Plo @@ -6087,6 +6703,13 @@ findloc1_r16.lo: $(srcdir)/generated/findloc1_r16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o findloc1_r16.lo `test -f '$(srcdir)/generated/findloc1_r16.c' || echo '$(srcdir)/'`$(srcdir)/generated/findloc1_r16.c +findloc1_r17.lo: $(srcdir)/generated/findloc1_r17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT findloc1_r17.lo -MD -MP -MF $(DEPDIR)/findloc1_r17.Tpo -c -o findloc1_r17.lo `test -f '$(srcdir)/generated/findloc1_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/findloc1_r17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/findloc1_r17.Tpo $(DEPDIR)/findloc1_r17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/findloc1_r17.c' object='findloc1_r17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o findloc1_r17.lo `test -f '$(srcdir)/generated/findloc1_r17.c' || echo '$(srcdir)/'`$(srcdir)/generated/findloc1_r17.c + findloc1_c4.lo: $(srcdir)/generated/findloc1_c4.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT findloc1_c4.lo -MD -MP -MF $(DEPDIR)/findloc1_c4.Tpo -c -o findloc1_c4.lo `test -f '$(srcdir)/generated/findloc1_c4.c' || echo '$(srcdir)/'`$(srcdir)/generated/findloc1_c4.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/findloc1_c4.Tpo $(DEPDIR)/findloc1_c4.Plo @@ -6115,6 +6738,13 @@ findloc1_c16.lo: $(srcdir)/generated/findloc1_c16.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o findloc1_c16.lo `test -f '$(srcdir)/generated/findloc1_c16.c' || echo '$(srcdir)/'`$(srcdir)/generated/findloc1_c16.c +findloc1_c17.lo: $(srcdir)/generated/findloc1_c17.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT findloc1_c17.lo -MD -MP -MF $(DEPDIR)/findloc1_c17.Tpo -c -o findloc1_c17.lo `test -f '$(srcdir)/generated/findloc1_c17.c' || echo '$(srcdir)/'`$(srcdir)/generated/findloc1_c17.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/findloc1_c17.Tpo $(DEPDIR)/findloc1_c17.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$(srcdir)/generated/findloc1_c17.c' object='findloc1_c17.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o findloc1_c17.lo `test -f '$(srcdir)/generated/findloc1_c17.c' || echo '$(srcdir)/'`$(srcdir)/generated/findloc1_c17.c + findloc1_s1.lo: $(srcdir)/generated/findloc1_s1.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT findloc1_s1.lo -MD -MP -MF $(DEPDIR)/findloc1_s1.Tpo -c -o findloc1_s1.lo `test -f '$(srcdir)/generated/findloc1_s1.c' || echo '$(srcdir)/'`$(srcdir)/generated/findloc1_s1.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/findloc1_s1.Tpo $(DEPDIR)/findloc1_s1.Plo @@ -6999,6 +7629,9 @@ $(patsubst %.c,%.lo,$(notdir $(i_matmull_c))): AM_CFLAGS += -funroll-loops $(patsubst %.F90,%.lo,$(patsubst %.f90,%.lo,$(notdir $(gfor_specific_src)))): AM_FCFLAGS += -fallow-leading-underscore selected_real_kind.lo selected_int_kind.lo: AM_FCFLAGS += -fallow-leading-underscore +# Build *_r17.F90 and *_c17.F90 with additional -mabi=ieeelongdouble on powerpc64le-linux. +@HAVE_REAL_17_TRUE@$(patsubst %.F90,%.lo,$(filter %_r17.F90 %_c17.F90,$(notdir $(gfor_specific_src)))): AM_FCFLAGS += -mabi=ieeelongdouble + # Add flags for IEEE modules @IEEE_SUPPORT_TRUE@$(patsubst %.F90,%.lo,$(notdir $(gfor_ieee_src))): AM_FCFLAGS += -Wno-unused-dummy-argument -Wno-c-binding-type -ffree-line-length-0 -fallow-leading-underscore -fsignaling-nans diff --git a/libgfortran/configure b/libgfortran/configure index 4810b9b032e..cc32e06b483 100755 --- a/libgfortran/configure +++ b/libgfortran/configure @@ -692,6 +692,8 @@ LIBGFOR_USE_SYMVER_FALSE LIBGFOR_USE_SYMVER_TRUE AM_CFLAGS AM_FCFLAGS +HAVE_REAL_17_FALSE +HAVE_REAL_17_TRUE toolexeclibdir toolexecdir EGREP @@ -5984,6 +5986,7 @@ fi # Add -Wall -fno-repack-arrays -fno-underscoring if we are using GCC. +have_real_17=no if test "x$GCC" = "xyes"; then AM_FCFLAGS="-I . -Wall -Werror -fimplicit-none -fno-repack-arrays -fno-underscoring" ## We like to use C11 and C99 routines when available. This makes @@ -5993,6 +5996,40 @@ if test "x$GCC" = "xyes"; then ## Compile the following tests with the same system header contents ## that we'll encounter when compiling our own source files. CFLAGS="-std=gnu11 $CFLAGS" + + case x$target in + xpowerpc64le*-linux*) + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#if __SIZEOF_LONG_DOUBLE__ != 16 + #error long double is double + #endif +int +main () +{ +(void) 0; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_cpp "$LINENO"; then : + AM_FCFLAGS="$AM_FCFLAGS -mabi=ibmlongdouble"; + AM_CFLAGS="$AM_CFLAGS -mabi=ibmlongdouble"; + CFLAGS="$CFLAGS -mabi=ibmlongdouble"; + have_real_17=yes +fi +rm -f conftest.err conftest.i conftest.$ac_ext + ;; + *) + ;; + esac +fi + if test "x$have_real_17" != xno; then + HAVE_REAL_17_TRUE= + HAVE_REAL_17_FALSE='#' +else + HAVE_REAL_17_TRUE='#' + HAVE_REAL_17_FALSE= fi # Add CET specific flags if CET is enabled @@ -12728,7 +12765,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 12731 "configure" +#line 12769 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -12834,7 +12871,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 12837 "configure" +#line 12875 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -27093,7 +27130,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext CFLAGS="$ac_save_CFLAGS" -# Check wether we support AVX2 extensions +# Check whether we support AVX2 extensions ac_save_CFLAGS="$CFLAGS" CFLAGS="-O2 -mavx2" @@ -27123,7 +27160,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext CFLAGS="$ac_save_CFLAGS" -# Check wether we support AVX512f extensions +# Check whether we support AVX512f extensions ac_save_CFLAGS="$CFLAGS" CFLAGS="-O2 -mavx512f" @@ -27522,6 +27559,10 @@ if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then as_fn_error $? "conditional \"am__fastdepCC\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi +if test -z "${HAVE_REAL_17_TRUE}" && test -z "${HAVE_REAL_17_FALSE}"; then + as_fn_error $? "conditional \"HAVE_REAL_17\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi if test -z "${LIBGFOR_USE_SYMVER_TRUE}" && test -z "${LIBGFOR_USE_SYMVER_FALSE}"; then as_fn_error $? "conditional \"LIBGFOR_USE_SYMVER\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 diff --git a/libgfortran/configure.ac b/libgfortran/configure.ac index a77509801e6..97439a805d5 100644 --- a/libgfortran/configure.ac +++ b/libgfortran/configure.ac @@ -145,6 +145,7 @@ AC_SUBST(CFLAGS) AM_PROG_CC_C_O # Add -Wall -fno-repack-arrays -fno-underscoring if we are using GCC. +have_real_17=no if test "x$GCC" = "xyes"; then AM_FCFLAGS="-I . -Wall -Werror -fimplicit-none -fno-repack-arrays -fno-underscoring" ## We like to use C11 and C99 routines when available. This makes @@ -154,8 +155,24 @@ if test "x$GCC" = "xyes"; then ## Compile the following tests with the same system header contents ## that we'll encounter when compiling our own source files. CFLAGS="-std=gnu11 $CFLAGS" -fi + case x$target in + xpowerpc64le*-linux*) + AC_PREPROC_IFELSE( + [AC_LANG_PROGRAM([[#if __SIZEOF_LONG_DOUBLE__ != 16 + #error long double is double + #endif]], + [[(void) 0;]])], + [AM_FCFLAGS="$AM_FCFLAGS -mabi=ibmlongdouble"; + AM_CFLAGS="$AM_CFLAGS -mabi=ibmlongdouble"; + CFLAGS="$CFLAGS -mabi=ibmlongdouble"; + have_real_17=yes]) + ;; + *) + ;; + esac +fi +AM_CONDITIONAL([HAVE_REAL_17], [test "x$have_real_17" != xno]) # Add CET specific flags if CET is enabled GCC_CET_FLAGS(CET_FLAGS) AM_FCFLAGS="$AM_FCFLAGS $CET_FLAGS" @@ -665,10 +682,10 @@ LIBGFOR_CHECK_CRLF # Check whether we support AVX extensions LIBGFOR_CHECK_AVX -# Check wether we support AVX2 extensions +# Check whether we support AVX2 extensions LIBGFOR_CHECK_AVX2 -# Check wether we support AVX512f extensions +# Check whether we support AVX512f extensions LIBGFOR_CHECK_AVX512F # Check for FMA3 extensions diff --git a/libgfortran/generated/_abs_c17.F90 b/libgfortran/generated/_abs_c17.F90 new file mode 100644 index 00000000000..3c186a298b7 --- /dev/null +++ b/libgfortran/generated/_abs_c17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_COMPLEX_17) +#ifdef HAVE_CABS_17 + +elemental function _gfortran_specific__abs_c17 (parm) + complex (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__abs_c17 + + _gfortran_specific__abs_c17 = abs (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_abs_r17.F90 b/libgfortran/generated/_abs_r17.F90 new file mode 100644 index 00000000000..bc8f3eb7a2e --- /dev/null +++ b/libgfortran/generated/_abs_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_FABS_17 + +elemental function _gfortran_specific__abs_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__abs_r17 + + _gfortran_specific__abs_r17 = abs (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_acos_r17.F90 b/libgfortran/generated/_acos_r17.F90 new file mode 100644 index 00000000000..f9e928c5d20 --- /dev/null +++ b/libgfortran/generated/_acos_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_ACOS_17 + +elemental function _gfortran_specific__acos_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__acos_r17 + + _gfortran_specific__acos_r17 = acos (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_acosh_r17.F90 b/libgfortran/generated/_acosh_r17.F90 new file mode 100644 index 00000000000..4508691c4ea --- /dev/null +++ b/libgfortran/generated/_acosh_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_ACOSH_17 + +elemental function _gfortran_specific__acosh_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__acosh_r17 + + _gfortran_specific__acosh_r17 = acosh (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_aimag_c17.F90 b/libgfortran/generated/_aimag_c17.F90 new file mode 100644 index 00000000000..b5a84fa0a78 --- /dev/null +++ b/libgfortran/generated/_aimag_c17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_COMPLEX_17) + + +elemental function _gfortran_specific__aimag_c17 (parm) + complex (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__aimag_c17 + + _gfortran_specific__aimag_c17 = aimag (parm) +end function + + +#endif diff --git a/libgfortran/generated/_aint_r17.F90 b/libgfortran/generated/_aint_r17.F90 new file mode 100644 index 00000000000..4e73b24946d --- /dev/null +++ b/libgfortran/generated/_aint_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_TRUNC_17 + +elemental function _gfortran_specific__aint_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__aint_r17 + + _gfortran_specific__aint_r17 = aint (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_anint_r17.F90 b/libgfortran/generated/_anint_r17.F90 new file mode 100644 index 00000000000..6b60e11e607 --- /dev/null +++ b/libgfortran/generated/_anint_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_ROUND_17 + +elemental function _gfortran_specific__anint_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__anint_r17 + + _gfortran_specific__anint_r17 = anint (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_asin_r17.F90 b/libgfortran/generated/_asin_r17.F90 new file mode 100644 index 00000000000..120be0494c5 --- /dev/null +++ b/libgfortran/generated/_asin_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_ASIN_17 + +elemental function _gfortran_specific__asin_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__asin_r17 + + _gfortran_specific__asin_r17 = asin (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_asinh_r17.F90 b/libgfortran/generated/_asinh_r17.F90 new file mode 100644 index 00000000000..64f18d2198f --- /dev/null +++ b/libgfortran/generated/_asinh_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_ASINH_17 + +elemental function _gfortran_specific__asinh_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__asinh_r17 + + _gfortran_specific__asinh_r17 = asinh (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_atan2_r17.F90 b/libgfortran/generated/_atan2_r17.F90 new file mode 100644 index 00000000000..f0fc9b88648 --- /dev/null +++ b/libgfortran/generated/_atan2_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) + +#ifdef HAVE_ATAN2_17 + +elemental function _gfortran_specific__atan2_r17 (p1, p2) + real (kind=17), intent (in) :: p1, p2 + real (kind=17) :: _gfortran_specific__atan2_r17 + + _gfortran_specific__atan2_r17 = atan2 (p1, p2) +end function + +#endif + +#endif diff --git a/libgfortran/generated/_atan_r17.F90 b/libgfortran/generated/_atan_r17.F90 new file mode 100644 index 00000000000..a8b0bbd2d71 --- /dev/null +++ b/libgfortran/generated/_atan_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_ATAN_17 + +elemental function _gfortran_specific__atan_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__atan_r17 + + _gfortran_specific__atan_r17 = atan (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_atanh_r17.F90 b/libgfortran/generated/_atanh_r17.F90 new file mode 100644 index 00000000000..dc4d8a306d2 --- /dev/null +++ b/libgfortran/generated/_atanh_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_ATANH_17 + +elemental function _gfortran_specific__atanh_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__atanh_r17 + + _gfortran_specific__atanh_r17 = atanh (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_conjg_c17.F90 b/libgfortran/generated/_conjg_c17.F90 new file mode 100644 index 00000000000..86a237c6442 --- /dev/null +++ b/libgfortran/generated/_conjg_c17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_COMPLEX_17) + + +elemental function _gfortran_specific__conjg_17 (parm) + complex (kind=17), intent (in) :: parm + complex (kind=17) :: _gfortran_specific__conjg_17 + + _gfortran_specific__conjg_17 = conjg (parm) +end function + + +#endif diff --git a/libgfortran/generated/_cos_c17.F90 b/libgfortran/generated/_cos_c17.F90 new file mode 100644 index 00000000000..8d50ac8f556 --- /dev/null +++ b/libgfortran/generated/_cos_c17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_COMPLEX_17) +#ifdef HAVE_CCOS_17 + +elemental function _gfortran_specific__cos_c17 (parm) + complex (kind=17), intent (in) :: parm + complex (kind=17) :: _gfortran_specific__cos_c17 + + _gfortran_specific__cos_c17 = cos (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_cos_r17.F90 b/libgfortran/generated/_cos_r17.F90 new file mode 100644 index 00000000000..fde038e15b9 --- /dev/null +++ b/libgfortran/generated/_cos_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_COS_17 + +elemental function _gfortran_specific__cos_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__cos_r17 + + _gfortran_specific__cos_r17 = cos (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_cosh_r17.F90 b/libgfortran/generated/_cosh_r17.F90 new file mode 100644 index 00000000000..c8fe7c8dbb6 --- /dev/null +++ b/libgfortran/generated/_cosh_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_COSH_17 + +elemental function _gfortran_specific__cosh_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__cosh_r17 + + _gfortran_specific__cosh_r17 = cosh (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_dim_r17.F90 b/libgfortran/generated/_dim_r17.F90 new file mode 100644 index 00000000000..d4b78c17dc5 --- /dev/null +++ b/libgfortran/generated/_dim_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) + + + +elemental function _gfortran_specific__dim_r17 (p1, p2) + real (kind=17), intent (in) :: p1, p2 + real (kind=17) :: _gfortran_specific__dim_r17 + + _gfortran_specific__dim_r17 = dim (p1, p2) +end function + + + +#endif diff --git a/libgfortran/generated/_exp_c17.F90 b/libgfortran/generated/_exp_c17.F90 new file mode 100644 index 00000000000..a2672a4ab98 --- /dev/null +++ b/libgfortran/generated/_exp_c17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_COMPLEX_17) +#ifdef HAVE_CEXP_17 + +elemental function _gfortran_specific__exp_c17 (parm) + complex (kind=17), intent (in) :: parm + complex (kind=17) :: _gfortran_specific__exp_c17 + + _gfortran_specific__exp_c17 = exp (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_exp_r17.F90 b/libgfortran/generated/_exp_r17.F90 new file mode 100644 index 00000000000..14cac70375a --- /dev/null +++ b/libgfortran/generated/_exp_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_EXP_17 + +elemental function _gfortran_specific__exp_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__exp_r17 + + _gfortran_specific__exp_r17 = exp (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_log10_r17.F90 b/libgfortran/generated/_log10_r17.F90 new file mode 100644 index 00000000000..ef3481af273 --- /dev/null +++ b/libgfortran/generated/_log10_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_LOG10_17 + +elemental function _gfortran_specific__log10_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__log10_r17 + + _gfortran_specific__log10_r17 = log10 (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_log_c17.F90 b/libgfortran/generated/_log_c17.F90 new file mode 100644 index 00000000000..65c758dad4c --- /dev/null +++ b/libgfortran/generated/_log_c17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_COMPLEX_17) +#ifdef HAVE_CLOG_17 + +elemental function _gfortran_specific__log_c17 (parm) + complex (kind=17), intent (in) :: parm + complex (kind=17) :: _gfortran_specific__log_c17 + + _gfortran_specific__log_c17 = log (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_log_r17.F90 b/libgfortran/generated/_log_r17.F90 new file mode 100644 index 00000000000..95cc9efeebd --- /dev/null +++ b/libgfortran/generated/_log_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_LOG_17 + +elemental function _gfortran_specific__log_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__log_r17 + + _gfortran_specific__log_r17 = log (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_mod_r17.F90 b/libgfortran/generated/_mod_r17.F90 new file mode 100644 index 00000000000..23973950a3b --- /dev/null +++ b/libgfortran/generated/_mod_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) + + + +elemental function _gfortran_specific__mod_r17 (p1, p2) + real (kind=17), intent (in) :: p1, p2 + real (kind=17) :: _gfortran_specific__mod_r17 + + _gfortran_specific__mod_r17 = mod (p1, p2) +end function + + + +#endif diff --git a/libgfortran/generated/_sign_r17.F90 b/libgfortran/generated/_sign_r17.F90 new file mode 100644 index 00000000000..d8afb5ec599 --- /dev/null +++ b/libgfortran/generated/_sign_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) + + + +elemental function _gfortran_specific__sign_r17 (p1, p2) + real (kind=17), intent (in) :: p1, p2 + real (kind=17) :: _gfortran_specific__sign_r17 + + _gfortran_specific__sign_r17 = sign (p1, p2) +end function + + + +#endif diff --git a/libgfortran/generated/_sin_c17.F90 b/libgfortran/generated/_sin_c17.F90 new file mode 100644 index 00000000000..6453ec70838 --- /dev/null +++ b/libgfortran/generated/_sin_c17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_COMPLEX_17) +#ifdef HAVE_CSIN_17 + +elemental function _gfortran_specific__sin_c17 (parm) + complex (kind=17), intent (in) :: parm + complex (kind=17) :: _gfortran_specific__sin_c17 + + _gfortran_specific__sin_c17 = sin (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_sin_r17.F90 b/libgfortran/generated/_sin_r17.F90 new file mode 100644 index 00000000000..8632c4d7a80 --- /dev/null +++ b/libgfortran/generated/_sin_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_SIN_17 + +elemental function _gfortran_specific__sin_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__sin_r17 + + _gfortran_specific__sin_r17 = sin (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_sinh_r17.F90 b/libgfortran/generated/_sinh_r17.F90 new file mode 100644 index 00000000000..bfe0f2a2bf6 --- /dev/null +++ b/libgfortran/generated/_sinh_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_SINH_17 + +elemental function _gfortran_specific__sinh_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__sinh_r17 + + _gfortran_specific__sinh_r17 = sinh (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_sqrt_c17.F90 b/libgfortran/generated/_sqrt_c17.F90 new file mode 100644 index 00000000000..5878b87c421 --- /dev/null +++ b/libgfortran/generated/_sqrt_c17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_COMPLEX_17) +#ifdef HAVE_CSQRT_17 + +elemental function _gfortran_specific__sqrt_c17 (parm) + complex (kind=17), intent (in) :: parm + complex (kind=17) :: _gfortran_specific__sqrt_c17 + + _gfortran_specific__sqrt_c17 = sqrt (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_sqrt_r17.F90 b/libgfortran/generated/_sqrt_r17.F90 new file mode 100644 index 00000000000..01c3a04a6d4 --- /dev/null +++ b/libgfortran/generated/_sqrt_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_SQRT_17 + +elemental function _gfortran_specific__sqrt_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__sqrt_r17 + + _gfortran_specific__sqrt_r17 = sqrt (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_tan_r17.F90 b/libgfortran/generated/_tan_r17.F90 new file mode 100644 index 00000000000..01f1bfa921a --- /dev/null +++ b/libgfortran/generated/_tan_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_TAN_17 + +elemental function _gfortran_specific__tan_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__tan_r17 + + _gfortran_specific__tan_r17 = tan (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/_tanh_r17.F90 b/libgfortran/generated/_tanh_r17.F90 new file mode 100644 index 00000000000..e04aae38499 --- /dev/null +++ b/libgfortran/generated/_tanh_r17.F90 @@ -0,0 +1,46 @@ +! Copyright (C) 2002-2022 Free Software Foundation, Inc. +! Contributed by Paul Brook <paul@nowt.org> +! +!This file is part of the GNU Fortran 95 runtime library (libgfortran). +! +!GNU libgfortran is free software; you can redistribute it and/or +!modify it under the terms of the GNU General Public +!License as published by the Free Software Foundation; either +!version 3 of the License, or (at your option) any later version. + +!GNU libgfortran is distributed in the hope that it will be useful, +!but WITHOUT ANY WARRANTY; without even the implied warranty of +!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +!GNU General Public License for more details. +! +!Under Section 7 of GPL version 3, you are granted additional +!permissions described in the GCC Runtime Library Exception, version +!3.1, as published by the Free Software Foundation. +! +!You should have received a copy of the GNU General Public License and +!a copy of the GCC Runtime Library Exception along with this program; +!see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +!<http://www.gnu.org/licenses/>. +! +!This file is machine generated. + + + + + +#include "config.h" +#include "kinds.inc" +#include "c99_protos.inc" + +#if defined (HAVE_GFC_REAL_17) +#ifdef HAVE_TANH_17 + +elemental function _gfortran_specific__tanh_r17 (parm) + real (kind=17), intent (in) :: parm + real (kind=17) :: _gfortran_specific__tanh_r17 + + _gfortran_specific__tanh_r17 = tanh (parm) +end function + +#endif +#endif diff --git a/libgfortran/generated/bessel_r17.c b/libgfortran/generated/bessel_r17.c new file mode 100644 index 00000000000..cca2623e4b2 --- /dev/null +++ b/libgfortran/generated/bessel_r17.c @@ -0,0 +1,186 @@ +/* Implementation of the BESSEL_JN and BESSEL_YN transformational + function using a recurrence algorithm. + Copyright (C) 2010-2022 Free Software Foundation, Inc. + Contributed by Tobias Burnus <burnus@net-b.de> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + + +#if defined(POWER_IEEE128) +#define MATHFUNC(funcname) __ ## funcname ## ieee128 +#else +#define MATHFUNC(funcname) funcname ## q +#endif + +#if defined (HAVE_GFC_REAL_17) + + + +#if 1 /* FIXME: figure this out later. */ +extern void bessel_jn_r17 (gfc_array_r17 * const restrict ret, int n1, + int n2, GFC_REAL_17 x); +export_proto(bessel_jn_r17); + +void +bessel_jn_r17 (gfc_array_r17 * const restrict ret, int n1, int n2, GFC_REAL_17 x) +{ + int i; + index_type stride; + + GFC_REAL_17 last1, last2, x2rev; + + stride = GFC_DESCRIPTOR_STRIDE(ret,0); + + if (ret->base_addr == NULL) + { + size_t size = n2 < n1 ? 0 : n2-n1+1; + GFC_DIMENSION_SET(ret->dim[0], 0, size-1, 1); + ret->base_addr = xmallocarray (size, sizeof (GFC_REAL_17)); + ret->offset = 0; + } + + if (unlikely (n2 < n1)) + return; + + if (unlikely (compile_options.bounds_check) + && GFC_DESCRIPTOR_EXTENT(ret,0) != (n2-n1+1)) + runtime_error("Incorrect extent in return value of BESSEL_JN " + "(%ld vs. %ld)", (long int) n2-n1, + (long int) GFC_DESCRIPTOR_EXTENT(ret,0)); + + stride = GFC_DESCRIPTOR_STRIDE(ret,0); + + if (unlikely (x == 0)) + { + ret->base_addr[0] = 1; + for (i = 1; i <= n2-n1; i++) + ret->base_addr[i*stride] = 0; + return; + } + + last1 = MATHFUNC(jn) (n2, x); + ret->base_addr[(n2-n1)*stride] = last1; + + if (n1 == n2) + return; + + last2 = MATHFUNC(jn) (n2 - 1, x); + ret->base_addr[(n2-n1-1)*stride] = last2; + + if (n1 + 1 == n2) + return; + + x2rev = GFC_REAL_17_LITERAL(2.)/x; + + for (i = n2-n1-2; i >= 0; i--) + { + ret->base_addr[i*stride] = x2rev * (i+1+n1) * last2 - last1; + last1 = last2; + last2 = ret->base_addr[i*stride]; + } +} + +#endif + +#if 1 /* FIXME: figure this out later. */ +extern void bessel_yn_r17 (gfc_array_r17 * const restrict ret, + int n1, int n2, GFC_REAL_17 x); +export_proto(bessel_yn_r17); + +void +bessel_yn_r17 (gfc_array_r17 * const restrict ret, int n1, int n2, + GFC_REAL_17 x) +{ + int i; + index_type stride; + + GFC_REAL_17 last1, last2, x2rev; + + stride = GFC_DESCRIPTOR_STRIDE(ret,0); + + if (ret->base_addr == NULL) + { + size_t size = n2 < n1 ? 0 : n2-n1+1; + GFC_DIMENSION_SET(ret->dim[0], 0, size-1, 1); + ret->base_addr = xmallocarray (size, sizeof (GFC_REAL_17)); + ret->offset = 0; + } + + if (unlikely (n2 < n1)) + return; + + if (unlikely (compile_options.bounds_check) + && GFC_DESCRIPTOR_EXTENT(ret,0) != (n2-n1+1)) + runtime_error("Incorrect extent in return value of BESSEL_JN " + "(%ld vs. %ld)", (long int) n2-n1, + (long int) GFC_DESCRIPTOR_EXTENT(ret,0)); + + stride = GFC_DESCRIPTOR_STRIDE(ret,0); + + if (unlikely (x == 0)) + { + for (i = 0; i <= n2-n1; i++) +#if defined(GFC_REAL_17_INFINITY) + ret->base_addr[i*stride] = -GFC_REAL_17_INFINITY; +#else + ret->base_addr[i*stride] = -GFC_REAL_17_HUGE; +#endif + return; + } + + last1 = MATHFUNC(yn) (n1, x); + ret->base_addr[0] = last1; + + if (n1 == n2) + return; + + last2 = MATHFUNC(yn) (n1 + 1, x); + ret->base_addr[1*stride] = last2; + + if (n1 + 1 == n2) + return; + + x2rev = GFC_REAL_17_LITERAL(2.)/x; + + for (i = 2; i <= n2 - n1; i++) + { +#if defined(GFC_REAL_17_INFINITY) + if (unlikely (last2 == -GFC_REAL_17_INFINITY)) + { + ret->base_addr[i*stride] = -GFC_REAL_17_INFINITY; + } + else +#endif + { + ret->base_addr[i*stride] = x2rev * (i-1+n1) * last2 - last1; + last1 = last2; + last2 = ret->base_addr[i*stride]; + } + } +} +#endif + +#endif + diff --git a/libgfortran/generated/cshift0_c17.c b/libgfortran/generated/cshift0_c17.c new file mode 100644 index 00000000000..73de471cac0 --- /dev/null +++ b/libgfortran/generated/cshift0_c17.c @@ -0,0 +1,242 @@ +/* Helper function for cshift functions. + Copyright (C) 2008-2022 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> + + +#if defined (HAVE_GFC_COMPLEX_17) + +void +cshift0_c17 (gfc_array_c17 *ret, const gfc_array_c17 *array, ptrdiff_t shift, + int which) +{ + /* r.* indicates the return array. */ + index_type rstride[GFC_MAX_DIMENSIONS]; + index_type rstride0; + index_type roffset; + GFC_COMPLEX_17 *rptr; + + /* s.* indicates the source array. */ + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type sstride0; + index_type soffset; + const GFC_COMPLEX_17 *sptr; + + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type dim; + index_type len; + index_type n; + + bool do_blocked; + index_type r_ex, a_ex; + + which = which - 1; + sstride[0] = 0; + rstride[0] = 0; + + extent[0] = 1; + count[0] = 0; + n = 0; + /* Initialized for avoiding compiler warnings. */ + roffset = 1; + soffset = 1; + len = 0; + + r_ex = 1; + a_ex = 1; + + if (which > 0) + { + /* Test if both ret and array are contiguous. */ + do_blocked = true; + dim = GFC_DESCRIPTOR_RANK (array); + for (n = 0; n < dim; n ++) + { + index_type rs, as; + rs = GFC_DESCRIPTOR_STRIDE (ret, n); + if (rs != r_ex) + { + do_blocked = false; + break; + } + as = GFC_DESCRIPTOR_STRIDE (array, n); + if (as != a_ex) + { + do_blocked = false; + break; + } + r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n); + a_ex *= GFC_DESCRIPTOR_EXTENT (array, n); + } + } + else + do_blocked = false; + + n = 0; + + if (do_blocked) + { + /* For contiguous arrays, use the relationship that + + dimension(n1,n2,n3) :: a, b + b = cshift(a,sh,3) + + can be dealt with as if + + dimension(n1*n2*n3) :: an, bn + bn = cshift(a,sh*n1*n2,1) + + we can used a more blocked algorithm for dim>1. */ + sstride[0] = 1; + rstride[0] = 1; + roffset = 1; + soffset = 1; + len = GFC_DESCRIPTOR_STRIDE(array, which) + * GFC_DESCRIPTOR_EXTENT(array, which); + shift *= GFC_DESCRIPTOR_STRIDE(array, which); + for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + dim = GFC_DESCRIPTOR_RANK (array) - which; + } + else + { + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + } + + rstride0 = rstride[0]; + sstride0 = sstride[0]; + rptr = ret->base_addr; + sptr = array->base_addr; + + /* Avoid the costly modulo for trivially in-bound shifts. */ + if (shift < 0 || shift >= len) + { + shift = len == 0 ? 0 : shift % (ptrdiff_t)len; + if (shift < 0) + shift += len; + } + + while (rptr) + { + /* Do the shift for this dimension. */ + + /* If elements are contiguous, perform the operation + in two block moves. */ + if (soffset == 1 && roffset == 1) + { + size_t len1 = shift * sizeof (GFC_COMPLEX_17); + size_t len2 = (len - shift) * sizeof (GFC_COMPLEX_17); + memcpy (rptr, sptr + shift, len2); + memcpy (rptr + (len - shift), sptr, len1); + } + else + { + /* Otherwise, we will have to perform the copy one element at + a time. */ + GFC_COMPLEX_17 *dest = rptr; + const GFC_COMPLEX_17 *src = &sptr[shift * soffset]; + + for (n = 0; n < len - shift; n++) + { + *dest = *src; + dest += roffset; + src += soffset; + } + for (src = sptr, n = 0; n < shift; n++) + { + *dest = *src; + dest += roffset; + src += soffset; + } + } + + /* Advance to the next section. */ + rptr += rstride0; + sptr += sstride0; + count[0]++; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + rptr -= rstride[n] * extent[n]; + sptr -= sstride[n] * extent[n]; + n++; + if (n >= dim - 1) + { + /* Break out of the loop. */ + rptr = NULL; + break; + } + else + { + count[n]++; + rptr += rstride[n]; + sptr += sstride[n]; + } + } + } + + return; +} + +#endif diff --git a/libgfortran/generated/cshift0_r17.c b/libgfortran/generated/cshift0_r17.c new file mode 100644 index 00000000000..670f2b7b3a0 --- /dev/null +++ b/libgfortran/generated/cshift0_r17.c @@ -0,0 +1,242 @@ +/* Helper function for cshift functions. + Copyright (C) 2008-2022 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> + + +#if defined (HAVE_GFC_REAL_17) + +void +cshift0_r17 (gfc_array_r17 *ret, const gfc_array_r17 *array, ptrdiff_t shift, + int which) +{ + /* r.* indicates the return array. */ + index_type rstride[GFC_MAX_DIMENSIONS]; + index_type rstride0; + index_type roffset; + GFC_REAL_17 *rptr; + + /* s.* indicates the source array. */ + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type sstride0; + index_type soffset; + const GFC_REAL_17 *sptr; + + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type dim; + index_type len; + index_type n; + + bool do_blocked; + index_type r_ex, a_ex; + + which = which - 1; + sstride[0] = 0; + rstride[0] = 0; + + extent[0] = 1; + count[0] = 0; + n = 0; + /* Initialized for avoiding compiler warnings. */ + roffset = 1; + soffset = 1; + len = 0; + + r_ex = 1; + a_ex = 1; + + if (which > 0) + { + /* Test if both ret and array are contiguous. */ + do_blocked = true; + dim = GFC_DESCRIPTOR_RANK (array); + for (n = 0; n < dim; n ++) + { + index_type rs, as; + rs = GFC_DESCRIPTOR_STRIDE (ret, n); + if (rs != r_ex) + { + do_blocked = false; + break; + } + as = GFC_DESCRIPTOR_STRIDE (array, n); + if (as != a_ex) + { + do_blocked = false; + break; + } + r_ex *= GFC_DESCRIPTOR_EXTENT (ret, n); + a_ex *= GFC_DESCRIPTOR_EXTENT (array, n); + } + } + else + do_blocked = false; + + n = 0; + + if (do_blocked) + { + /* For contiguous arrays, use the relationship that + + dimension(n1,n2,n3) :: a, b + b = cshift(a,sh,3) + + can be dealt with as if + + dimension(n1*n2*n3) :: an, bn + bn = cshift(a,sh*n1*n2,1) + + we can used a more blocked algorithm for dim>1. */ + sstride[0] = 1; + rstride[0] = 1; + roffset = 1; + soffset = 1; + len = GFC_DESCRIPTOR_STRIDE(array, which) + * GFC_DESCRIPTOR_EXTENT(array, which); + shift *= GFC_DESCRIPTOR_STRIDE(array, which); + for (dim = which + 1; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + dim = GFC_DESCRIPTOR_RANK (array) - which; + } + else + { + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + } + + rstride0 = rstride[0]; + sstride0 = sstride[0]; + rptr = ret->base_addr; + sptr = array->base_addr; + + /* Avoid the costly modulo for trivially in-bound shifts. */ + if (shift < 0 || shift >= len) + { + shift = len == 0 ? 0 : shift % (ptrdiff_t)len; + if (shift < 0) + shift += len; + } + + while (rptr) + { + /* Do the shift for this dimension. */ + + /* If elements are contiguous, perform the operation + in two block moves. */ + if (soffset == 1 && roffset == 1) + { + size_t len1 = shift * sizeof (GFC_REAL_17); + size_t len2 = (len - shift) * sizeof (GFC_REAL_17); + memcpy (rptr, sptr + shift, len2); + memcpy (rptr + (len - shift), sptr, len1); + } + else + { + /* Otherwise, we will have to perform the copy one element at + a time. */ + GFC_REAL_17 *dest = rptr; + const GFC_REAL_17 *src = &sptr[shift * soffset]; + + for (n = 0; n < len - shift; n++) + { + *dest = *src; + dest += roffset; + src += soffset; + } + for (src = sptr, n = 0; n < shift; n++) + { + *dest = *src; + dest += roffset; + src += soffset; + } + } + + /* Advance to the next section. */ + rptr += rstride0; + sptr += sstride0; + count[0]++; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + rptr -= rstride[n] * extent[n]; + sptr -= sstride[n] * extent[n]; + n++; + if (n >= dim - 1) + { + /* Break out of the loop. */ + rptr = NULL; + break; + } + else + { + count[n]++; + rptr += rstride[n]; + sptr += sstride[n]; + } + } + } + + return; +} + +#endif diff --git a/libgfortran/generated/cshift1_16_c17.c b/libgfortran/generated/cshift1_16_c17.c new file mode 100644 index 00000000000..d646f6d0e7d --- /dev/null +++ b/libgfortran/generated/cshift1_16_c17.c @@ -0,0 +1,193 @@ +/* Implementation of the CSHIFT intrinsic. + Copyright (C) 2017-2022 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> + +#if defined (HAVE_GFC_COMPLEX_17) && defined (HAVE_GFC_INTEGER_16) + +void +cshift1_16_c17 (gfc_array_c17 * const restrict ret, + const gfc_array_c17 * const restrict array, + const gfc_array_i16 * const restrict h, + const GFC_INTEGER_16 * const restrict pwhich) +{ + /* r.* indicates the return array. */ + index_type rstride[GFC_MAX_DIMENSIONS]; + index_type rstride0; + index_type roffset; + GFC_COMPLEX_17 *rptr; + GFC_COMPLEX_17 *dest; + /* s.* indicates the source array. */ + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type sstride0; + index_type soffset; + const GFC_COMPLEX_17 *sptr; + const GFC_COMPLEX_17 *src; + /* h.* indicates the shift array. */ + index_type hstride[GFC_MAX_DIMENSIONS]; + index_type hstride0; + const GFC_INTEGER_16 *hptr; + + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type rs_ex[GFC_MAX_DIMENSIONS]; + index_type ss_ex[GFC_MAX_DIMENSIONS]; + index_type hs_ex[GFC_MAX_DIMENSIONS]; + + index_type dim; + index_type len; + index_type n; + int which; + GFC_INTEGER_16 sh; + + /* Bounds checking etc is already done by the caller. */ + + if (pwhich) + which = *pwhich - 1; + else + which = 0; + + extent[0] = 1; + count[0] = 0; + n = 0; + + /* Initialized for avoiding compiler warnings. */ + roffset = 1; + soffset = 1; + len = 0; + + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + hstride[n] = GFC_DESCRIPTOR_STRIDE(h,n); + rs_ex[n] = rstride[n] * extent[n]; + ss_ex[n] = sstride[n] * extent[n]; + hs_ex[n] = hstride[n] * extent[n]; + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + if (hstride[0] == 0) + hstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + rstride0 = rstride[0]; + sstride0 = sstride[0]; + hstride0 = hstride[0]; + rptr = ret->base_addr; + sptr = array->base_addr; + hptr = h->base_addr; + + while (rptr) + { + /* Do the shift for this dimension. */ + sh = *hptr; + /* Normal case should be -len < sh < len; try to + avoid the expensive remainder operation if possible. */ + if (sh < 0) + sh += len; + if (unlikely(sh >= len || sh < 0)) + { + sh = sh % len; + if (sh < 0) + sh += len; + } + src = &sptr[sh * soffset]; + dest = rptr; + if (soffset == 1 && roffset == 1) + { + size_t len1 = sh * sizeof (GFC_COMPLEX_17); + size_t len2 = (len - sh) * sizeof (GFC_COMPLEX_17); + memcpy (rptr, sptr + sh, len2); + memcpy (rptr + (len - sh), sptr, len1); + } + else + { + for (n = 0; n < len - sh; n++) + { + *dest = *src; + dest += roffset; + src += soffset; + } + for (src = sptr, n = 0; n < sh; n++) + { + *dest = *src; + dest += roffset; + src += soffset; + } + } + + /* Advance to the next section. */ + rptr += rstride0; + sptr += sstride0; + hptr += hstride0; + count[0]++; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + rptr -= rs_ex[n]; + sptr -= ss_ex[n]; + hptr -= hs_ex[n]; + n++; + if (n >= dim - 1) + { + /* Break out of the loop. */ + rptr = NULL; + break; + } + else + { + count[n]++; + rptr += rstride[n]; + sptr += sstride[n]; + hptr += hstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/cshift1_16_r17.c b/libgfortran/generated/cshift1_16_r17.c new file mode 100644 index 00000000000..fe3fa273bc9 --- /dev/null +++ b/libgfortran/generated/cshift1_16_r17.c @@ -0,0 +1,193 @@ +/* Implementation of the CSHIFT intrinsic. + Copyright (C) 2017-2022 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_16) + +void +cshift1_16_r17 (gfc_array_r17 * const restrict ret, + const gfc_array_r17 * const restrict array, + const gfc_array_i16 * const restrict h, + const GFC_INTEGER_16 * const restrict pwhich) +{ + /* r.* indicates the return array. */ + index_type rstride[GFC_MAX_DIMENSIONS]; + index_type rstride0; + index_type roffset; + GFC_REAL_17 *rptr; + GFC_REAL_17 *dest; + /* s.* indicates the source array. */ + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type sstride0; + index_type soffset; + const GFC_REAL_17 *sptr; + const GFC_REAL_17 *src; + /* h.* indicates the shift array. */ + index_type hstride[GFC_MAX_DIMENSIONS]; + index_type hstride0; + const GFC_INTEGER_16 *hptr; + + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type rs_ex[GFC_MAX_DIMENSIONS]; + index_type ss_ex[GFC_MAX_DIMENSIONS]; + index_type hs_ex[GFC_MAX_DIMENSIONS]; + + index_type dim; + index_type len; + index_type n; + int which; + GFC_INTEGER_16 sh; + + /* Bounds checking etc is already done by the caller. */ + + if (pwhich) + which = *pwhich - 1; + else + which = 0; + + extent[0] = 1; + count[0] = 0; + n = 0; + + /* Initialized for avoiding compiler warnings. */ + roffset = 1; + soffset = 1; + len = 0; + + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + hstride[n] = GFC_DESCRIPTOR_STRIDE(h,n); + rs_ex[n] = rstride[n] * extent[n]; + ss_ex[n] = sstride[n] * extent[n]; + hs_ex[n] = hstride[n] * extent[n]; + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + if (hstride[0] == 0) + hstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + rstride0 = rstride[0]; + sstride0 = sstride[0]; + hstride0 = hstride[0]; + rptr = ret->base_addr; + sptr = array->base_addr; + hptr = h->base_addr; + + while (rptr) + { + /* Do the shift for this dimension. */ + sh = *hptr; + /* Normal case should be -len < sh < len; try to + avoid the expensive remainder operation if possible. */ + if (sh < 0) + sh += len; + if (unlikely(sh >= len || sh < 0)) + { + sh = sh % len; + if (sh < 0) + sh += len; + } + src = &sptr[sh * soffset]; + dest = rptr; + if (soffset == 1 && roffset == 1) + { + size_t len1 = sh * sizeof (GFC_REAL_17); + size_t len2 = (len - sh) * sizeof (GFC_REAL_17); + memcpy (rptr, sptr + sh, len2); + memcpy (rptr + (len - sh), sptr, len1); + } + else + { + for (n = 0; n < len - sh; n++) + { + *dest = *src; + dest += roffset; + src += soffset; + } + for (src = sptr, n = 0; n < sh; n++) + { + *dest = *src; + dest += roffset; + src += soffset; + } + } + + /* Advance to the next section. */ + rptr += rstride0; + sptr += sstride0; + hptr += hstride0; + count[0]++; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + rptr -= rs_ex[n]; + sptr -= ss_ex[n]; + hptr -= hs_ex[n]; + n++; + if (n >= dim - 1) + { + /* Break out of the loop. */ + rptr = NULL; + break; + } + else + { + count[n]++; + rptr += rstride[n]; + sptr += sstride[n]; + hptr += hstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/cshift1_4_c17.c b/libgfortran/generated/cshift1_4_c17.c new file mode 100644 index 00000000000..c01f02dc1da --- /dev/null +++ b/libgfortran/generated/cshift1_4_c17.c @@ -0,0 +1,193 @@ +/* Implementation of the CSHIFT intrinsic. + Copyright (C) 2017-2022 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> + +#if defined (HAVE_GFC_COMPLEX_17) && defined (HAVE_GFC_INTEGER_4) + +void +cshift1_4_c17 (gfc_array_c17 * const restrict ret, + const gfc_array_c17 * const restrict array, + const gfc_array_i4 * const restrict h, + const GFC_INTEGER_4 * const restrict pwhich) +{ + /* r.* indicates the return array. */ + index_type rstride[GFC_MAX_DIMENSIONS]; + index_type rstride0; + index_type roffset; + GFC_COMPLEX_17 *rptr; + GFC_COMPLEX_17 *dest; + /* s.* indicates the source array. */ + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type sstride0; + index_type soffset; + const GFC_COMPLEX_17 *sptr; + const GFC_COMPLEX_17 *src; + /* h.* indicates the shift array. */ + index_type hstride[GFC_MAX_DIMENSIONS]; + index_type hstride0; + const GFC_INTEGER_4 *hptr; + + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type rs_ex[GFC_MAX_DIMENSIONS]; + index_type ss_ex[GFC_MAX_DIMENSIONS]; + index_type hs_ex[GFC_MAX_DIMENSIONS]; + + index_type dim; + index_type len; + index_type n; + int which; + GFC_INTEGER_4 sh; + + /* Bounds checking etc is already done by the caller. */ + + if (pwhich) + which = *pwhich - 1; + else + which = 0; + + extent[0] = 1; + count[0] = 0; + n = 0; + + /* Initialized for avoiding compiler warnings. */ + roffset = 1; + soffset = 1; + len = 0; + + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + hstride[n] = GFC_DESCRIPTOR_STRIDE(h,n); + rs_ex[n] = rstride[n] * extent[n]; + ss_ex[n] = sstride[n] * extent[n]; + hs_ex[n] = hstride[n] * extent[n]; + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + if (hstride[0] == 0) + hstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + rstride0 = rstride[0]; + sstride0 = sstride[0]; + hstride0 = hstride[0]; + rptr = ret->base_addr; + sptr = array->base_addr; + hptr = h->base_addr; + + while (rptr) + { + /* Do the shift for this dimension. */ + sh = *hptr; + /* Normal case should be -len < sh < len; try to + avoid the expensive remainder operation if possible. */ + if (sh < 0) + sh += len; + if (unlikely(sh >= len || sh < 0)) + { + sh = sh % len; + if (sh < 0) + sh += len; + } + src = &sptr[sh * soffset]; + dest = rptr; + if (soffset == 1 && roffset == 1) + { + size_t len1 = sh * sizeof (GFC_COMPLEX_17); + size_t len2 = (len - sh) * sizeof (GFC_COMPLEX_17); + memcpy (rptr, sptr + sh, len2); + memcpy (rptr + (len - sh), sptr, len1); + } + else + { + for (n = 0; n < len - sh; n++) + { + *dest = *src; + dest += roffset; + src += soffset; + } + for (src = sptr, n = 0; n < sh; n++) + { + *dest = *src; + dest += roffset; + src += soffset; + } + } + + /* Advance to the next section. */ + rptr += rstride0; + sptr += sstride0; + hptr += hstride0; + count[0]++; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + rptr -= rs_ex[n]; + sptr -= ss_ex[n]; + hptr -= hs_ex[n]; + n++; + if (n >= dim - 1) + { + /* Break out of the loop. */ + rptr = NULL; + break; + } + else + { + count[n]++; + rptr += rstride[n]; + sptr += sstride[n]; + hptr += hstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/cshift1_4_r17.c b/libgfortran/generated/cshift1_4_r17.c new file mode 100644 index 00000000000..7262627f97a --- /dev/null +++ b/libgfortran/generated/cshift1_4_r17.c @@ -0,0 +1,193 @@ +/* Implementation of the CSHIFT intrinsic. + Copyright (C) 2017-2022 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_4) + +void +cshift1_4_r17 (gfc_array_r17 * const restrict ret, + const gfc_array_r17 * const restrict array, + const gfc_array_i4 * const restrict h, + const GFC_INTEGER_4 * const restrict pwhich) +{ + /* r.* indicates the return array. */ + index_type rstride[GFC_MAX_DIMENSIONS]; + index_type rstride0; + index_type roffset; + GFC_REAL_17 *rptr; + GFC_REAL_17 *dest; + /* s.* indicates the source array. */ + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type sstride0; + index_type soffset; + const GFC_REAL_17 *sptr; + const GFC_REAL_17 *src; + /* h.* indicates the shift array. */ + index_type hstride[GFC_MAX_DIMENSIONS]; + index_type hstride0; + const GFC_INTEGER_4 *hptr; + + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type rs_ex[GFC_MAX_DIMENSIONS]; + index_type ss_ex[GFC_MAX_DIMENSIONS]; + index_type hs_ex[GFC_MAX_DIMENSIONS]; + + index_type dim; + index_type len; + index_type n; + int which; + GFC_INTEGER_4 sh; + + /* Bounds checking etc is already done by the caller. */ + + if (pwhich) + which = *pwhich - 1; + else + which = 0; + + extent[0] = 1; + count[0] = 0; + n = 0; + + /* Initialized for avoiding compiler warnings. */ + roffset = 1; + soffset = 1; + len = 0; + + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + hstride[n] = GFC_DESCRIPTOR_STRIDE(h,n); + rs_ex[n] = rstride[n] * extent[n]; + ss_ex[n] = sstride[n] * extent[n]; + hs_ex[n] = hstride[n] * extent[n]; + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + if (hstride[0] == 0) + hstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + rstride0 = rstride[0]; + sstride0 = sstride[0]; + hstride0 = hstride[0]; + rptr = ret->base_addr; + sptr = array->base_addr; + hptr = h->base_addr; + + while (rptr) + { + /* Do the shift for this dimension. */ + sh = *hptr; + /* Normal case should be -len < sh < len; try to + avoid the expensive remainder operation if possible. */ + if (sh < 0) + sh += len; + if (unlikely(sh >= len || sh < 0)) + { + sh = sh % len; + if (sh < 0) + sh += len; + } + src = &sptr[sh * soffset]; + dest = rptr; + if (soffset == 1 && roffset == 1) + { + size_t len1 = sh * sizeof (GFC_REAL_17); + size_t len2 = (len - sh) * sizeof (GFC_REAL_17); + memcpy (rptr, sptr + sh, len2); + memcpy (rptr + (len - sh), sptr, len1); + } + else + { + for (n = 0; n < len - sh; n++) + { + *dest = *src; + dest += roffset; + src += soffset; + } + for (src = sptr, n = 0; n < sh; n++) + { + *dest = *src; + dest += roffset; + src += soffset; + } + } + + /* Advance to the next section. */ + rptr += rstride0; + sptr += sstride0; + hptr += hstride0; + count[0]++; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + rptr -= rs_ex[n]; + sptr -= ss_ex[n]; + hptr -= hs_ex[n]; + n++; + if (n >= dim - 1) + { + /* Break out of the loop. */ + rptr = NULL; + break; + } + else + { + count[n]++; + rptr += rstride[n]; + sptr += sstride[n]; + hptr += hstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/cshift1_8_c17.c b/libgfortran/generated/cshift1_8_c17.c new file mode 100644 index 00000000000..85610a5f0fb --- /dev/null +++ b/libgfortran/generated/cshift1_8_c17.c @@ -0,0 +1,193 @@ +/* Implementation of the CSHIFT intrinsic. + Copyright (C) 2017-2022 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> + +#if defined (HAVE_GFC_COMPLEX_17) && defined (HAVE_GFC_INTEGER_8) + +void +cshift1_8_c17 (gfc_array_c17 * const restrict ret, + const gfc_array_c17 * const restrict array, + const gfc_array_i8 * const restrict h, + const GFC_INTEGER_8 * const restrict pwhich) +{ + /* r.* indicates the return array. */ + index_type rstride[GFC_MAX_DIMENSIONS]; + index_type rstride0; + index_type roffset; + GFC_COMPLEX_17 *rptr; + GFC_COMPLEX_17 *dest; + /* s.* indicates the source array. */ + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type sstride0; + index_type soffset; + const GFC_COMPLEX_17 *sptr; + const GFC_COMPLEX_17 *src; + /* h.* indicates the shift array. */ + index_type hstride[GFC_MAX_DIMENSIONS]; + index_type hstride0; + const GFC_INTEGER_8 *hptr; + + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type rs_ex[GFC_MAX_DIMENSIONS]; + index_type ss_ex[GFC_MAX_DIMENSIONS]; + index_type hs_ex[GFC_MAX_DIMENSIONS]; + + index_type dim; + index_type len; + index_type n; + int which; + GFC_INTEGER_8 sh; + + /* Bounds checking etc is already done by the caller. */ + + if (pwhich) + which = *pwhich - 1; + else + which = 0; + + extent[0] = 1; + count[0] = 0; + n = 0; + + /* Initialized for avoiding compiler warnings. */ + roffset = 1; + soffset = 1; + len = 0; + + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + hstride[n] = GFC_DESCRIPTOR_STRIDE(h,n); + rs_ex[n] = rstride[n] * extent[n]; + ss_ex[n] = sstride[n] * extent[n]; + hs_ex[n] = hstride[n] * extent[n]; + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + if (hstride[0] == 0) + hstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + rstride0 = rstride[0]; + sstride0 = sstride[0]; + hstride0 = hstride[0]; + rptr = ret->base_addr; + sptr = array->base_addr; + hptr = h->base_addr; + + while (rptr) + { + /* Do the shift for this dimension. */ + sh = *hptr; + /* Normal case should be -len < sh < len; try to + avoid the expensive remainder operation if possible. */ + if (sh < 0) + sh += len; + if (unlikely(sh >= len || sh < 0)) + { + sh = sh % len; + if (sh < 0) + sh += len; + } + src = &sptr[sh * soffset]; + dest = rptr; + if (soffset == 1 && roffset == 1) + { + size_t len1 = sh * sizeof (GFC_COMPLEX_17); + size_t len2 = (len - sh) * sizeof (GFC_COMPLEX_17); + memcpy (rptr, sptr + sh, len2); + memcpy (rptr + (len - sh), sptr, len1); + } + else + { + for (n = 0; n < len - sh; n++) + { + *dest = *src; + dest += roffset; + src += soffset; + } + for (src = sptr, n = 0; n < sh; n++) + { + *dest = *src; + dest += roffset; + src += soffset; + } + } + + /* Advance to the next section. */ + rptr += rstride0; + sptr += sstride0; + hptr += hstride0; + count[0]++; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + rptr -= rs_ex[n]; + sptr -= ss_ex[n]; + hptr -= hs_ex[n]; + n++; + if (n >= dim - 1) + { + /* Break out of the loop. */ + rptr = NULL; + break; + } + else + { + count[n]++; + rptr += rstride[n]; + sptr += sstride[n]; + hptr += hstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/cshift1_8_r17.c b/libgfortran/generated/cshift1_8_r17.c new file mode 100644 index 00000000000..cc58d3cb7f0 --- /dev/null +++ b/libgfortran/generated/cshift1_8_r17.c @@ -0,0 +1,193 @@ +/* Implementation of the CSHIFT intrinsic. + Copyright (C) 2017-2022 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_8) + +void +cshift1_8_r17 (gfc_array_r17 * const restrict ret, + const gfc_array_r17 * const restrict array, + const gfc_array_i8 * const restrict h, + const GFC_INTEGER_8 * const restrict pwhich) +{ + /* r.* indicates the return array. */ + index_type rstride[GFC_MAX_DIMENSIONS]; + index_type rstride0; + index_type roffset; + GFC_REAL_17 *rptr; + GFC_REAL_17 *dest; + /* s.* indicates the source array. */ + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type sstride0; + index_type soffset; + const GFC_REAL_17 *sptr; + const GFC_REAL_17 *src; + /* h.* indicates the shift array. */ + index_type hstride[GFC_MAX_DIMENSIONS]; + index_type hstride0; + const GFC_INTEGER_8 *hptr; + + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type rs_ex[GFC_MAX_DIMENSIONS]; + index_type ss_ex[GFC_MAX_DIMENSIONS]; + index_type hs_ex[GFC_MAX_DIMENSIONS]; + + index_type dim; + index_type len; + index_type n; + int which; + GFC_INTEGER_8 sh; + + /* Bounds checking etc is already done by the caller. */ + + if (pwhich) + which = *pwhich - 1; + else + which = 0; + + extent[0] = 1; + count[0] = 0; + n = 0; + + /* Initialized for avoiding compiler warnings. */ + roffset = 1; + soffset = 1; + len = 0; + + for (dim = 0; dim < GFC_DESCRIPTOR_RANK (array); dim++) + { + if (dim == which) + { + roffset = GFC_DESCRIPTOR_STRIDE(ret,dim); + if (roffset == 0) + roffset = 1; + soffset = GFC_DESCRIPTOR_STRIDE(array,dim); + if (soffset == 0) + soffset = 1; + len = GFC_DESCRIPTOR_EXTENT(array,dim); + } + else + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,dim); + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,dim); + hstride[n] = GFC_DESCRIPTOR_STRIDE(h,n); + rs_ex[n] = rstride[n] * extent[n]; + ss_ex[n] = sstride[n] * extent[n]; + hs_ex[n] = hstride[n] * extent[n]; + n++; + } + } + if (sstride[0] == 0) + sstride[0] = 1; + if (rstride[0] == 0) + rstride[0] = 1; + if (hstride[0] == 0) + hstride[0] = 1; + + dim = GFC_DESCRIPTOR_RANK (array); + rstride0 = rstride[0]; + sstride0 = sstride[0]; + hstride0 = hstride[0]; + rptr = ret->base_addr; + sptr = array->base_addr; + hptr = h->base_addr; + + while (rptr) + { + /* Do the shift for this dimension. */ + sh = *hptr; + /* Normal case should be -len < sh < len; try to + avoid the expensive remainder operation if possible. */ + if (sh < 0) + sh += len; + if (unlikely(sh >= len || sh < 0)) + { + sh = sh % len; + if (sh < 0) + sh += len; + } + src = &sptr[sh * soffset]; + dest = rptr; + if (soffset == 1 && roffset == 1) + { + size_t len1 = sh * sizeof (GFC_REAL_17); + size_t len2 = (len - sh) * sizeof (GFC_REAL_17); + memcpy (rptr, sptr + sh, len2); + memcpy (rptr + (len - sh), sptr, len1); + } + else + { + for (n = 0; n < len - sh; n++) + { + *dest = *src; + dest += roffset; + src += soffset; + } + for (src = sptr, n = 0; n < sh; n++) + { + *dest = *src; + dest += roffset; + src += soffset; + } + } + + /* Advance to the next section. */ + rptr += rstride0; + sptr += sstride0; + hptr += hstride0; + count[0]++; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + rptr -= rs_ex[n]; + sptr -= ss_ex[n]; + hptr -= hs_ex[n]; + n++; + if (n >= dim - 1) + { + /* Break out of the loop. */ + rptr = NULL; + break; + } + else + { + count[n]++; + rptr += rstride[n]; + sptr += sstride[n]; + hptr += hstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/findloc0_c17.c b/libgfortran/generated/findloc0_c17.c new file mode 100644 index 00000000000..2d8d25715ed --- /dev/null +++ b/libgfortran/generated/findloc0_c17.c @@ -0,0 +1,375 @@ + +/* Implementation of the FINDLOC intrinsic + Copyright (C) 2018-2022 Free Software Foundation, Inc. + Contributed by Thomas König <tk@tkoenig.net> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <assert.h> + +#if defined (HAVE_GFC_COMPLEX_17) +extern void findloc0_c17 (gfc_array_index_type * const restrict retarray, + gfc_array_c17 * const restrict array, GFC_COMPLEX_17 value, + GFC_LOGICAL_4); +export_proto(findloc0_c17); + +void +findloc0_c17 (gfc_array_index_type * const restrict retarray, + gfc_array_c17 * const restrict array, GFC_COMPLEX_17 value, + GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride; + const GFC_COMPLEX_17 *base; + index_type * restrict dest; + index_type rank; + index_type n; + index_type sz; + + rank = GFC_DESCRIPTOR_RANK (array); + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (index_type)); + } + else + { + if (unlikely (compile_options.bounds_check)) + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "FINDLOC"); + } + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + + /* Set the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + + sz = 1; + for (n = 0; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + sz *= extent[n]; + if (extent[n] <= 0) + return; + } + + for (n = 0; n < rank; n++) + count[n] = 0; + + if (back) + { + base = array->base_addr + (sz - 1) * 1; + + while (1) + { + do + { + if (unlikely(*base == value)) + { + for (n = 0; n < rank; n++) + dest[n * dstride] = extent[n] - count[n]; + + return; + } + base -= sstride[0] * 1; + } while(++count[0] != extent[0]); + + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base += sstride[n] * extent[n] * 1; + n++; + if (n >= rank) + return; + else + { + count[n]++; + base -= sstride[n] * 1; + } + } while (count[n] == extent[n]); + } + } + else + { + base = array->base_addr; + while (1) + { + do + { + if (unlikely(*base == value)) + { + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + + return; + } + base += sstride[0] * 1; + } while(++count[0] != extent[0]); + + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n] * 1; + n++; + if (n >= rank) + return; + else + { + count[n]++; + base += sstride[n] * 1; + } + } while (count[n] == extent[n]); + } + } + return; +} + +extern void mfindloc0_c17 (gfc_array_index_type * const restrict retarray, + gfc_array_c17 * const restrict array, GFC_COMPLEX_17 value, + gfc_array_l1 *const restrict, GFC_LOGICAL_4); +export_proto(mfindloc0_c17); + +void +mfindloc0_c17 (gfc_array_index_type * const restrict retarray, + gfc_array_c17 * const restrict array, GFC_COMPLEX_17 value, + gfc_array_l1 *const restrict mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + index_type dstride; + const GFC_COMPLEX_17 *base; + index_type * restrict dest; + GFC_LOGICAL_1 *mbase; + index_type rank; + index_type n; + int mask_kind; + index_type sz; + + rank = GFC_DESCRIPTOR_RANK (array); + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (index_type)); + } + else + { + if (unlikely (compile_options.bounds_check)) + { + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "FINDLOC"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "FINDLOC"); + } + } + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + mbase = mask->base_addr; + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + internal_error (NULL, "Funny sized logical array"); + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + + /* Set the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + + sz = 1; + for (n = 0; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + sz *= extent[n]; + if (extent[n] <= 0) + return; + } + + for (n = 0; n < rank; n++) + count[n] = 0; + + if (back) + { + base = array->base_addr + (sz - 1) * 1; + mbase = mbase + (sz - 1) * mask_kind; + while (1) + { + do + { + if (unlikely(*mbase && *base == value)) + { + for (n = 0; n < rank; n++) + dest[n * dstride] = extent[n] - count[n]; + + return; + } + base -= sstride[0] * 1; + mbase -= mstride[0]; + } while(++count[0] != extent[0]); + + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base += sstride[n] * extent[n] * 1; + mbase -= mstride[n] * extent[n]; + n++; + if (n >= rank) + return; + else + { + count[n]++; + base -= sstride[n] * 1; + mbase += mstride[n]; + } + } while (count[n] == extent[n]); + } + } + else + { + base = array->base_addr; + while (1) + { + do + { + if (unlikely(*mbase && *base == value)) + { + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + + return; + } + base += sstride[0] * 1; + mbase += mstride[0]; + } while(++count[0] != extent[0]); + + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n] * 1; + mbase -= mstride[n] * extent[n]; + n++; + if (n >= rank) + return; + else + { + count[n]++; + base += sstride[n]* 1; + mbase += mstride[n]; + } + } while (count[n] == extent[n]); + } + } + return; +} + +extern void sfindloc0_c17 (gfc_array_index_type * const restrict retarray, + gfc_array_c17 * const restrict array, GFC_COMPLEX_17 value, + GFC_LOGICAL_4 *, GFC_LOGICAL_4); +export_proto(sfindloc0_c17); + +void +sfindloc0_c17 (gfc_array_index_type * const restrict retarray, + gfc_array_c17 * const restrict array, GFC_COMPLEX_17 value, + GFC_LOGICAL_4 * mask, GFC_LOGICAL_4 back) +{ + index_type rank; + index_type dstride; + index_type * restrict dest; + index_type n; + + if (mask == NULL || *mask) + { + findloc0_c17 (retarray, array, value, back); + return; + } + + rank = GFC_DESCRIPTOR_RANK (array); + + if (rank <= 0) + internal_error (NULL, "Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (index_type)); + } + else if (unlikely (compile_options.bounds_check)) + { + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "FINDLOC"); + } + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n<rank; n++) + dest[n * dstride] = 0 ; +} + +#endif diff --git a/libgfortran/generated/findloc0_r17.c b/libgfortran/generated/findloc0_r17.c new file mode 100644 index 00000000000..cdac81db142 --- /dev/null +++ b/libgfortran/generated/findloc0_r17.c @@ -0,0 +1,375 @@ + +/* Implementation of the FINDLOC intrinsic + Copyright (C) 2018-2022 Free Software Foundation, Inc. + Contributed by Thomas König <tk@tkoenig.net> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <assert.h> + +#if defined (HAVE_GFC_REAL_17) +extern void findloc0_r17 (gfc_array_index_type * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_REAL_17 value, + GFC_LOGICAL_4); +export_proto(findloc0_r17); + +void +findloc0_r17 (gfc_array_index_type * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_REAL_17 value, + GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride; + const GFC_REAL_17 *base; + index_type * restrict dest; + index_type rank; + index_type n; + index_type sz; + + rank = GFC_DESCRIPTOR_RANK (array); + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (index_type)); + } + else + { + if (unlikely (compile_options.bounds_check)) + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "FINDLOC"); + } + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + + /* Set the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + + sz = 1; + for (n = 0; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + sz *= extent[n]; + if (extent[n] <= 0) + return; + } + + for (n = 0; n < rank; n++) + count[n] = 0; + + if (back) + { + base = array->base_addr + (sz - 1) * 1; + + while (1) + { + do + { + if (unlikely(*base == value)) + { + for (n = 0; n < rank; n++) + dest[n * dstride] = extent[n] - count[n]; + + return; + } + base -= sstride[0] * 1; + } while(++count[0] != extent[0]); + + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base += sstride[n] * extent[n] * 1; + n++; + if (n >= rank) + return; + else + { + count[n]++; + base -= sstride[n] * 1; + } + } while (count[n] == extent[n]); + } + } + else + { + base = array->base_addr; + while (1) + { + do + { + if (unlikely(*base == value)) + { + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + + return; + } + base += sstride[0] * 1; + } while(++count[0] != extent[0]); + + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n] * 1; + n++; + if (n >= rank) + return; + else + { + count[n]++; + base += sstride[n] * 1; + } + } while (count[n] == extent[n]); + } + } + return; +} + +extern void mfindloc0_r17 (gfc_array_index_type * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_REAL_17 value, + gfc_array_l1 *const restrict, GFC_LOGICAL_4); +export_proto(mfindloc0_r17); + +void +mfindloc0_r17 (gfc_array_index_type * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_REAL_17 value, + gfc_array_l1 *const restrict mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + index_type dstride; + const GFC_REAL_17 *base; + index_type * restrict dest; + GFC_LOGICAL_1 *mbase; + index_type rank; + index_type n; + int mask_kind; + index_type sz; + + rank = GFC_DESCRIPTOR_RANK (array); + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (index_type)); + } + else + { + if (unlikely (compile_options.bounds_check)) + { + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "FINDLOC"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "FINDLOC"); + } + } + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + mbase = mask->base_addr; + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + internal_error (NULL, "Funny sized logical array"); + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + + /* Set the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + + sz = 1; + for (n = 0; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + sz *= extent[n]; + if (extent[n] <= 0) + return; + } + + for (n = 0; n < rank; n++) + count[n] = 0; + + if (back) + { + base = array->base_addr + (sz - 1) * 1; + mbase = mbase + (sz - 1) * mask_kind; + while (1) + { + do + { + if (unlikely(*mbase && *base == value)) + { + for (n = 0; n < rank; n++) + dest[n * dstride] = extent[n] - count[n]; + + return; + } + base -= sstride[0] * 1; + mbase -= mstride[0]; + } while(++count[0] != extent[0]); + + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base += sstride[n] * extent[n] * 1; + mbase -= mstride[n] * extent[n]; + n++; + if (n >= rank) + return; + else + { + count[n]++; + base -= sstride[n] * 1; + mbase += mstride[n]; + } + } while (count[n] == extent[n]); + } + } + else + { + base = array->base_addr; + while (1) + { + do + { + if (unlikely(*mbase && *base == value)) + { + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + + return; + } + base += sstride[0] * 1; + mbase += mstride[0]; + } while(++count[0] != extent[0]); + + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n] * 1; + mbase -= mstride[n] * extent[n]; + n++; + if (n >= rank) + return; + else + { + count[n]++; + base += sstride[n]* 1; + mbase += mstride[n]; + } + } while (count[n] == extent[n]); + } + } + return; +} + +extern void sfindloc0_r17 (gfc_array_index_type * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_REAL_17 value, + GFC_LOGICAL_4 *, GFC_LOGICAL_4); +export_proto(sfindloc0_r17); + +void +sfindloc0_r17 (gfc_array_index_type * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_REAL_17 value, + GFC_LOGICAL_4 * mask, GFC_LOGICAL_4 back) +{ + index_type rank; + index_type dstride; + index_type * restrict dest; + index_type n; + + if (mask == NULL || *mask) + { + findloc0_r17 (retarray, array, value, back); + return; + } + + rank = GFC_DESCRIPTOR_RANK (array); + + if (rank <= 0) + internal_error (NULL, "Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (index_type)); + } + else if (unlikely (compile_options.bounds_check)) + { + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "FINDLOC"); + } + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n<rank; n++) + dest[n * dstride] = 0 ; +} + +#endif diff --git a/libgfortran/generated/findloc1_c17.c b/libgfortran/generated/findloc1_c17.c new file mode 100644 index 00000000000..3021eb41754 --- /dev/null +++ b/libgfortran/generated/findloc1_c17.c @@ -0,0 +1,523 @@ +/* Implementation of the FINDLOC intrinsic + Copyright (C) 2018-2022 Free Software Foundation, Inc. + Contributed by Thomas König <tk@tkoenig.net> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <assert.h> + +#if defined (HAVE_GFC_COMPLEX_17) +extern void findloc1_c17 (gfc_array_index_type * const restrict retarray, + gfc_array_c17 * const restrict array, GFC_COMPLEX_17 value, + const index_type * restrict pdim, GFC_LOGICAL_4 back); +export_proto(findloc1_c17); + +extern void +findloc1_c17 (gfc_array_index_type * const restrict retarray, + gfc_array_c17 * const restrict array, GFC_COMPLEX_17 value, + const index_type * restrict pdim, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + const GFC_COMPLEX_17 * restrict base; + index_type * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type delta; + index_type dim; + int continue_loop; + + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in FINDLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (index_type)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " FINDLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "FINDLOC"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + dest = retarray->base_addr; + continue_loop = 1; + + base = array->base_addr; + while (continue_loop) + { + const GFC_COMPLEX_17 * restrict src; + index_type result; + + result = 0; + if (back) + { + src = base + (len - 1) * delta * 1; + for (n = len; n > 0; n--, src -= delta * 1) + { + if (*src == value) + { + result = n; + break; + } + } + } + else + { + src = base; + for (n = 1; n <= len; n++, src += delta * 1) + { + if (*src == value) + { + result = n; + break; + } + } + } + *dest = result; + + count[0]++; + base += sstride[0] * 1; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + count[n] = 0; + base -= sstride[n] * extent[n] * 1; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + continue_loop = 0; + break; + } + else + { + count[n]++; + base += sstride[n] * 1; + dest += dstride[n]; + } + } + } +} +extern void mfindloc1_c17 (gfc_array_index_type * const restrict retarray, + gfc_array_c17 * const restrict array, GFC_COMPLEX_17 value, + const index_type * restrict pdim, gfc_array_l1 *const restrict mask, + GFC_LOGICAL_4 back); +export_proto(mfindloc1_c17); + +extern void +mfindloc1_c17 (gfc_array_index_type * const restrict retarray, + gfc_array_c17 * const restrict array, GFC_COMPLEX_17 value, + const index_type * restrict pdim, gfc_array_l1 *const restrict mask, + GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + const GFC_COMPLEX_17 * restrict base; + const GFC_LOGICAL_1 * restrict mbase; + index_type * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type delta; + index_type mdelta; + index_type dim; + int mask_kind; + int continue_loop; + + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in FINDLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + mdelta = GFC_DESCRIPTOR_STRIDE_BYTES(mask,dim); + + mbase = mask->base_addr; + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + internal_error (NULL, "Funny sized logical array"); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (index_type)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " FINDLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "FINDLOC"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + dest = retarray->base_addr; + continue_loop = 1; + + base = array->base_addr; + while (continue_loop) + { + const GFC_COMPLEX_17 * restrict src; + const GFC_LOGICAL_1 * restrict msrc; + index_type result; + + result = 0; + if (back) + { + src = base + (len - 1) * delta * 1; + msrc = mbase + (len - 1) * mdelta; + for (n = len; n > 0; n--, src -= delta * 1, msrc -= mdelta) + { + if (*msrc && *src == value) + { + result = n; + break; + } + } + } + else + { + src = base; + msrc = mbase; + for (n = 1; n <= len; n++, src += delta * 1, msrc += mdelta) + { + if (*msrc && *src == value) + { + result = n; + break; + } + } + } + *dest = result; + + count[0]++; + base += sstride[0] * 1; + mbase += mstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + count[n] = 0; + base -= sstride[n] * extent[n] * 1; + mbase -= mstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + continue_loop = 0; + break; + } + else + { + count[n]++; + base += sstride[n] * 1; + dest += dstride[n]; + } + } + } +} +extern void sfindloc1_c17 (gfc_array_index_type * const restrict retarray, + gfc_array_c17 * const restrict array, GFC_COMPLEX_17 value, + const index_type * restrict pdim, GFC_LOGICAL_4 *const restrict mask, + GFC_LOGICAL_4 back); +export_proto(sfindloc1_c17); + +extern void +sfindloc1_c17 (gfc_array_index_type * const restrict retarray, + gfc_array_c17 * const restrict array, GFC_COMPLEX_17 value, + const index_type * restrict pdim, GFC_LOGICAL_4 *const restrict mask, + GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + index_type * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type dim; + bool continue_loop; + + if (mask == NULL || *mask) + { + findloc1_c17 (retarray, array, value, pdim, back); + return; + } + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in FINDLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + + for (n = 0; n < dim; n++) + { + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] <= 0) + extent[n] = 0; + } + + for (n = dim; n < rank; n++) + { + extent[n] = + GFC_DESCRIPTOR_EXTENT(array,n + 1); + + if (extent[n] <= 0) + extent[n] = 0; + } + + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (index_type)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " FINDLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "FINDLOC"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + dest = retarray->base_addr; + continue_loop = 1; + + while (continue_loop) + { + *dest = 0; + + count[0]++; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + count[n] = 0; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + continue_loop = 0; + break; + } + else + { + count[n]++; + dest += dstride[n]; + } + } + } +} +#endif diff --git a/libgfortran/generated/findloc1_r17.c b/libgfortran/generated/findloc1_r17.c new file mode 100644 index 00000000000..e32fd6947d7 --- /dev/null +++ b/libgfortran/generated/findloc1_r17.c @@ -0,0 +1,523 @@ +/* Implementation of the FINDLOC intrinsic + Copyright (C) 2018-2022 Free Software Foundation, Inc. + Contributed by Thomas König <tk@tkoenig.net> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <assert.h> + +#if defined (HAVE_GFC_REAL_17) +extern void findloc1_r17 (gfc_array_index_type * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_REAL_17 value, + const index_type * restrict pdim, GFC_LOGICAL_4 back); +export_proto(findloc1_r17); + +extern void +findloc1_r17 (gfc_array_index_type * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_REAL_17 value, + const index_type * restrict pdim, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + const GFC_REAL_17 * restrict base; + index_type * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type delta; + index_type dim; + int continue_loop; + + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in FINDLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (index_type)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " FINDLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "FINDLOC"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + dest = retarray->base_addr; + continue_loop = 1; + + base = array->base_addr; + while (continue_loop) + { + const GFC_REAL_17 * restrict src; + index_type result; + + result = 0; + if (back) + { + src = base + (len - 1) * delta * 1; + for (n = len; n > 0; n--, src -= delta * 1) + { + if (*src == value) + { + result = n; + break; + } + } + } + else + { + src = base; + for (n = 1; n <= len; n++, src += delta * 1) + { + if (*src == value) + { + result = n; + break; + } + } + } + *dest = result; + + count[0]++; + base += sstride[0] * 1; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + count[n] = 0; + base -= sstride[n] * extent[n] * 1; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + continue_loop = 0; + break; + } + else + { + count[n]++; + base += sstride[n] * 1; + dest += dstride[n]; + } + } + } +} +extern void mfindloc1_r17 (gfc_array_index_type * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_REAL_17 value, + const index_type * restrict pdim, gfc_array_l1 *const restrict mask, + GFC_LOGICAL_4 back); +export_proto(mfindloc1_r17); + +extern void +mfindloc1_r17 (gfc_array_index_type * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_REAL_17 value, + const index_type * restrict pdim, gfc_array_l1 *const restrict mask, + GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + const GFC_REAL_17 * restrict base; + const GFC_LOGICAL_1 * restrict mbase; + index_type * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type delta; + index_type mdelta; + index_type dim; + int mask_kind; + int continue_loop; + + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in FINDLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + mdelta = GFC_DESCRIPTOR_STRIDE_BYTES(mask,dim); + + mbase = mask->base_addr; + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + internal_error (NULL, "Funny sized logical array"); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (index_type)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " FINDLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "FINDLOC"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + dest = retarray->base_addr; + continue_loop = 1; + + base = array->base_addr; + while (continue_loop) + { + const GFC_REAL_17 * restrict src; + const GFC_LOGICAL_1 * restrict msrc; + index_type result; + + result = 0; + if (back) + { + src = base + (len - 1) * delta * 1; + msrc = mbase + (len - 1) * mdelta; + for (n = len; n > 0; n--, src -= delta * 1, msrc -= mdelta) + { + if (*msrc && *src == value) + { + result = n; + break; + } + } + } + else + { + src = base; + msrc = mbase; + for (n = 1; n <= len; n++, src += delta * 1, msrc += mdelta) + { + if (*msrc && *src == value) + { + result = n; + break; + } + } + } + *dest = result; + + count[0]++; + base += sstride[0] * 1; + mbase += mstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + count[n] = 0; + base -= sstride[n] * extent[n] * 1; + mbase -= mstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + continue_loop = 0; + break; + } + else + { + count[n]++; + base += sstride[n] * 1; + dest += dstride[n]; + } + } + } +} +extern void sfindloc1_r17 (gfc_array_index_type * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_REAL_17 value, + const index_type * restrict pdim, GFC_LOGICAL_4 *const restrict mask, + GFC_LOGICAL_4 back); +export_proto(sfindloc1_r17); + +extern void +sfindloc1_r17 (gfc_array_index_type * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_REAL_17 value, + const index_type * restrict pdim, GFC_LOGICAL_4 *const restrict mask, + GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + index_type * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type dim; + bool continue_loop; + + if (mask == NULL || *mask) + { + findloc1_r17 (retarray, array, value, pdim, back); + return; + } + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in FINDLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + + for (n = 0; n < dim; n++) + { + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] <= 0) + extent[n] = 0; + } + + for (n = dim; n < rank; n++) + { + extent[n] = + GFC_DESCRIPTOR_EXTENT(array,n + 1); + + if (extent[n] <= 0) + extent[n] = 0; + } + + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (index_type)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " FINDLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "FINDLOC"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + dest = retarray->base_addr; + continue_loop = 1; + + while (continue_loop) + { + *dest = 0; + + count[0]++; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + count[n] = 0; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + continue_loop = 0; + break; + } + else + { + count[n]++; + dest += dstride[n]; + } + } + } +} +#endif diff --git a/libgfortran/generated/in_pack_c17.c b/libgfortran/generated/in_pack_c17.c new file mode 100644 index 00000000000..fcc655212d0 --- /dev/null +++ b/libgfortran/generated/in_pack_c17.c @@ -0,0 +1,116 @@ +/* Helper function for repacking arrays. + Copyright (C) 2003-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + +#if defined (HAVE_GFC_COMPLEX_17) + +/* Allocates a block of memory with internal_malloc if the array needs + repacking. */ + +GFC_COMPLEX_17 * +internal_pack_c17 (gfc_array_c17 * source) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type stride0; + index_type dim; + index_type ssize; + const GFC_COMPLEX_17 *src; + GFC_COMPLEX_17 * restrict dest; + GFC_COMPLEX_17 *destptr; + int packed; + + /* TODO: Investigate how we can figure out if this is a temporary + since the stride=0 thing has been removed from the frontend. */ + + dim = GFC_DESCRIPTOR_RANK (source); + ssize = 1; + packed = 1; + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE(source,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(source,n); + if (extent[n] <= 0) + { + /* Do nothing. */ + packed = 1; + break; + } + + if (ssize != stride[n]) + packed = 0; + + ssize *= extent[n]; + } + + if (packed) + return source->base_addr; + + /* Allocate storage for the destination. */ + destptr = xmallocarray (ssize, sizeof (GFC_COMPLEX_17)); + dest = destptr; + src = source->base_addr; + stride0 = stride[0]; + + + while (src) + { + /* Copy the data. */ + *(dest++) = *src; + /* Advance to the next element. */ + src += stride0; + count[0]++; + /* Advance to the next source element. */ + index_type n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + return destptr; +} + +#endif + diff --git a/libgfortran/generated/in_pack_r17.c b/libgfortran/generated/in_pack_r17.c new file mode 100644 index 00000000000..759094c5e46 --- /dev/null +++ b/libgfortran/generated/in_pack_r17.c @@ -0,0 +1,116 @@ +/* Helper function for repacking arrays. + Copyright (C) 2003-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + +#if defined (HAVE_GFC_REAL_17) + +/* Allocates a block of memory with internal_malloc if the array needs + repacking. */ + +GFC_REAL_17 * +internal_pack_r17 (gfc_array_r17 * source) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type stride0; + index_type dim; + index_type ssize; + const GFC_REAL_17 *src; + GFC_REAL_17 * restrict dest; + GFC_REAL_17 *destptr; + int packed; + + /* TODO: Investigate how we can figure out if this is a temporary + since the stride=0 thing has been removed from the frontend. */ + + dim = GFC_DESCRIPTOR_RANK (source); + ssize = 1; + packed = 1; + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE(source,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(source,n); + if (extent[n] <= 0) + { + /* Do nothing. */ + packed = 1; + break; + } + + if (ssize != stride[n]) + packed = 0; + + ssize *= extent[n]; + } + + if (packed) + return source->base_addr; + + /* Allocate storage for the destination. */ + destptr = xmallocarray (ssize, sizeof (GFC_REAL_17)); + dest = destptr; + src = source->base_addr; + stride0 = stride[0]; + + + while (src) + { + /* Copy the data. */ + *(dest++) = *src; + /* Advance to the next element. */ + src += stride0; + count[0]++; + /* Advance to the next source element. */ + index_type n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + src -= stride[n] * extent[n]; + n++; + if (n == dim) + { + src = NULL; + break; + } + else + { + count[n]++; + src += stride[n]; + } + } + } + return destptr; +} + +#endif + diff --git a/libgfortran/generated/in_unpack_c17.c b/libgfortran/generated/in_unpack_c17.c new file mode 100644 index 00000000000..9d74b3060af --- /dev/null +++ b/libgfortran/generated/in_unpack_c17.c @@ -0,0 +1,104 @@ +/* Helper function for repacking arrays. + Copyright (C) 2003-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> + + +#if defined (HAVE_GFC_COMPLEX_17) + +void +internal_unpack_c17 (gfc_array_c17 * d, const GFC_COMPLEX_17 * src) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type stride0; + index_type dim; + index_type dsize; + GFC_COMPLEX_17 * restrict dest; + + dest = d->base_addr; + if (src == dest || !src) + return; + + dim = GFC_DESCRIPTOR_RANK (d); + dsize = 1; + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE(d,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(d,n); + if (extent[n] <= 0) + return; + + if (dsize == stride[n]) + dsize *= extent[n]; + else + dsize = 0; + } + + if (dsize != 0) + { + memcpy (dest, src, dsize * sizeof (GFC_COMPLEX_17)); + return; + } + + stride0 = stride[0]; + + while (dest) + { + /* Copy the data. */ + *dest = *(src++); + /* Advance to the next element. */ + dest += stride0; + count[0]++; + /* Advance to the next source element. */ + index_type n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } +} + +#endif + diff --git a/libgfortran/generated/in_unpack_r17.c b/libgfortran/generated/in_unpack_r17.c new file mode 100644 index 00000000000..574215183c8 --- /dev/null +++ b/libgfortran/generated/in_unpack_r17.c @@ -0,0 +1,104 @@ +/* Helper function for repacking arrays. + Copyright (C) 2003-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> + + +#if defined (HAVE_GFC_REAL_17) + +void +internal_unpack_r17 (gfc_array_r17 * d, const GFC_REAL_17 * src) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type stride[GFC_MAX_DIMENSIONS]; + index_type stride0; + index_type dim; + index_type dsize; + GFC_REAL_17 * restrict dest; + + dest = d->base_addr; + if (src == dest || !src) + return; + + dim = GFC_DESCRIPTOR_RANK (d); + dsize = 1; + for (index_type n = 0; n < dim; n++) + { + count[n] = 0; + stride[n] = GFC_DESCRIPTOR_STRIDE(d,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(d,n); + if (extent[n] <= 0) + return; + + if (dsize == stride[n]) + dsize *= extent[n]; + else + dsize = 0; + } + + if (dsize != 0) + { + memcpy (dest, src, dsize * sizeof (GFC_REAL_17)); + return; + } + + stride0 = stride[0]; + + while (dest) + { + /* Copy the data. */ + *dest = *(src++); + /* Advance to the next element. */ + dest += stride0; + count[0]++; + /* Advance to the next source element. */ + index_type n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + dest -= stride[n] * extent[n]; + n++; + if (n == dim) + { + dest = NULL; + break; + } + else + { + count[n]++; + dest += stride[n]; + } + } + } +} + +#endif + diff --git a/libgfortran/generated/matmul_c17.c b/libgfortran/generated/matmul_c17.c new file mode 100644 index 00000000000..a1a7d42f2e3 --- /dev/null +++ b/libgfortran/generated/matmul_c17.c @@ -0,0 +1,3013 @@ +/* Implementation of the MATMUL intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> +#include <assert.h> + + +#if defined (HAVE_GFC_COMPLEX_17) + +/* Prototype for the BLAS ?gemm subroutine, a pointer to which can be + passed to us by the front-end, in which case we call it for large + matrices. */ + +typedef void (*blas_call)(const char *, const char *, const int *, const int *, + const int *, const GFC_COMPLEX_17 *, const GFC_COMPLEX_17 *, + const int *, const GFC_COMPLEX_17 *, const int *, + const GFC_COMPLEX_17 *, GFC_COMPLEX_17 *, const int *, + int, int); + +/* The order of loops is different in the case of plain matrix + multiplication C=MATMUL(A,B), and in the frequent special case where + the argument A is the temporary result of a TRANSPOSE intrinsic: + C=MATMUL(TRANSPOSE(A),B). Transposed temporaries are detected by + looking at their strides. + + The equivalent Fortran pseudo-code is: + + DIMENSION A(M,COUNT), B(COUNT,N), C(M,N) + IF (.NOT.IS_TRANSPOSED(A)) THEN + C = 0 + DO J=1,N + DO K=1,COUNT + DO I=1,M + C(I,J) = C(I,J)+A(I,K)*B(K,J) + ELSE + DO J=1,N + DO I=1,M + S = 0 + DO K=1,COUNT + S = S+A(I,K)*B(K,J) + C(I,J) = S + ENDIF +*/ + +/* If try_blas is set to a nonzero value, then the matmul function will + see if there is a way to perform the matrix multiplication by a call + to the BLAS gemm function. */ + +extern void matmul_c17 (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm); +export_proto(matmul_c17); + +/* Put exhaustive list of possible architectures here here, ORed together. */ + +#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_AVX512F) + +#ifdef HAVE_AVX +static void +matmul_c17_avx (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) __attribute__((__target__("avx"))); +static void +matmul_c17_avx (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) +{ + const GFC_COMPLEX_17 * restrict abase; + const GFC_COMPLEX_17 * restrict bbase; + GFC_COMPLEX_17 * restrict dest; + + index_type rxstride, rystride, axstride, aystride, bxstride, bystride; + index_type x, y, n, count, xcount, ycount; + + assert (GFC_DESCRIPTOR_RANK (a) == 2 + || GFC_DESCRIPTOR_RANK (b) == 2); + +/* C[xcount,ycount] = A[xcount, count] * B[count,ycount] + + Either A or B (but not both) can be rank 1: + + o One-dimensional argument A is implicitly treated as a row matrix + dimensioned [1,count], so xcount=1. + + o One-dimensional argument B is implicitly treated as a column matrix + dimensioned [count, 1], so ycount=1. +*/ + + if (retarray->base_addr == NULL) + { + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + } + else + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + + GFC_DIMENSION_SET(retarray->dim[1], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, + GFC_DESCRIPTOR_EXTENT(retarray,0)); + } + + retarray->base_addr + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_COMPLEX_17)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) + { + index_type ret_extent, arg_extent; + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 2 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + } + + + if (GFC_DESCRIPTOR_RANK (retarray) == 1) + { + /* One-dimensional result may be addressed in the code below + either as a row or a column matrix. We want both cases to + work. */ + rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0); + } + else + { + rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + rystride = GFC_DESCRIPTOR_STRIDE(retarray,1); + } + + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + /* Treat it as a a row matrix A[1,count]. */ + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = 1; + + xcount = 1; + count = GFC_DESCRIPTOR_EXTENT(a,0); + } + else + { + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = GFC_DESCRIPTOR_STRIDE(a,1); + + count = GFC_DESCRIPTOR_EXTENT(a,1); + xcount = GFC_DESCRIPTOR_EXTENT(a,0); + } + + if (count != GFC_DESCRIPTOR_EXTENT(b,0)) + { + if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0) + runtime_error ("Incorrect extent in argument B in MATMUL intrinsic " + "in dimension 1: is %ld, should be %ld", + (long int) GFC_DESCRIPTOR_EXTENT(b,0), (long int) count); + } + + if (GFC_DESCRIPTOR_RANK (b) == 1) + { + /* Treat it as a column matrix B[count,1] */ + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + + /* bystride should never be used for 1-dimensional b. + The value is only used for calculation of the + memory by the buffer. */ + bystride = 256; + ycount = 1; + } + else + { + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + bystride = GFC_DESCRIPTOR_STRIDE(b,1); + ycount = GFC_DESCRIPTOR_EXTENT(b,1); + } + + abase = a->base_addr; + bbase = b->base_addr; + dest = retarray->base_addr; + + /* Now that everything is set up, we perform the multiplication + itself. */ + +#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x))) +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#define max(a,b) ((a) >= (b) ? (a) : (b)) + + if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1) + && (bxstride == 1 || bystride == 1) + && (((float) xcount) * ((float) ycount) * ((float) count) + > POW3(blas_limit))) + { + const int m = xcount, n = ycount, k = count, ldc = rystride; + const GFC_COMPLEX_17 one = 1, zero = 0; + const int lda = (axstride == 1) ? aystride : axstride, + ldb = (bxstride == 1) ? bystride : bxstride; + + if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1) + { + assert (gemm != NULL); + const char *transa, *transb; + if (try_blas & 2) + transa = "C"; + else + transa = axstride == 1 ? "N" : "T"; + + if (try_blas & 4) + transb = "C"; + else + transb = bxstride == 1 ? "N" : "T"; + + gemm (transa, transb , &m, + &n, &k, &one, abase, &lda, bbase, &ldb, &zero, dest, + &ldc, 1, 1); + return; + } + } + + if (rxstride == 1 && axstride == 1 && bxstride == 1 + && GFC_DESCRIPTOR_RANK (b) != 1) + { + /* This block of code implements a tuned matmul, derived from + Superscalar GEMM-based level 3 BLAS, Beta version 0.1 + + Bo Kagstrom and Per Ling + Department of Computing Science + Umea University + S-901 87 Umea, Sweden + + from netlib.org, translated to C, and modified for matmul.m4. */ + + const GFC_COMPLEX_17 *a, *b; + GFC_COMPLEX_17 *c; + const index_type m = xcount, n = ycount, k = count; + + /* System generated locals */ + index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, + i1, i2, i3, i4, i5, i6; + + /* Local variables */ + GFC_COMPLEX_17 f11, f12, f21, f22, f31, f32, f41, f42, + f13, f14, f23, f24, f33, f34, f43, f44; + index_type i, j, l, ii, jj, ll; + index_type isec, jsec, lsec, uisec, ujsec, ulsec; + GFC_COMPLEX_17 *t1; + + a = abase; + b = bbase; + c = retarray->base_addr; + + /* Parameter adjustments */ + c_dim1 = rystride; + c_offset = 1 + c_dim1; + c -= c_offset; + a_dim1 = aystride; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = bystride; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_17)0; + + /* Early exit if possible */ + if (m == 0 || n == 0 || k == 0) + return; + + /* Adjust size of t1 to what is needed. */ + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + + t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_17)); + + /* Start turning the crank. */ + i1 = n; + for (jj = 1; jj <= i1; jj += 512) + { + /* Computing MIN */ + i2 = 512; + i3 = n - jj + 1; + jsec = min(i2,i3); + ujsec = jsec - jsec % 4; + i2 = k; + for (ll = 1; ll <= i2; ll += 256) + { + /* Computing MIN */ + i3 = 256; + i4 = k - ll + 1; + lsec = min(i3,i4); + ulsec = lsec - lsec % 2; + + i3 = m; + for (ii = 1; ii <= i3; ii += 256) + { + /* Computing MIN */ + i4 = 256; + i5 = m - ii + 1; + isec = min(i4,i5); + uisec = isec - isec % 2; + i4 = ll + ulsec - 1; + for (l = ll; l <= i4; l += 2) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 2) + { + t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] = + a[i + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] = + a[i + (l + 1) * a_dim1]; + t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + (l + 1) * a_dim1]; + } + if (uisec < isec) + { + t1[l - ll + 1 + (isec << 8) - 257] = + a[ii + isec - 1 + l * a_dim1]; + t1[l - ll + 2 + (isec << 8) - 257] = + a[ii + isec - 1 + (l + 1) * a_dim1]; + } + } + if (ulsec < lsec) + { + i4 = ii + isec - 1; + for (i = ii; i<= i4; ++i) + { + t1[lsec + ((i - ii + 1) << 8) - 257] = + a[i + (ll + lsec - 1) * a_dim1]; + } + } + + uisec = isec - isec % 4; + i4 = jj + ujsec - 1; + for (j = jj; j <= i4; j += 4) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f22 = c[i + 1 + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f23 = c[i + 1 + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + f24 = c[i + 1 + (j + 3) * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + f32 = c[i + 2 + (j + 1) * c_dim1]; + f42 = c[i + 3 + (j + 1) * c_dim1]; + f33 = c[i + 2 + (j + 2) * c_dim1]; + f43 = c[i + 3 + (j + 2) * c_dim1]; + f34 = c[i + 2 + (j + 3) * c_dim1]; + f44 = c[i + 3 + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + j * b_dim1]; + f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + (j + 1) * c_dim1] = f12; + c[i + 1 + (j + 1) * c_dim1] = f22; + c[i + (j + 2) * c_dim1] = f13; + c[i + 1 + (j + 2) * c_dim1] = f23; + c[i + (j + 3) * c_dim1] = f14; + c[i + 1 + (j + 3) * c_dim1] = f24; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + c[i + 2 + (j + 1) * c_dim1] = f32; + c[i + 3 + (j + 1) * c_dim1] = f42; + c[i + 2 + (j + 2) * c_dim1] = f33; + c[i + 3 + (j + 2) * c_dim1] = f43; + c[i + 2 + (j + 3) * c_dim1] = f34; + c[i + 3 + (j + 3) * c_dim1] = f44; + } + if (uisec < isec) + { + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + (j + 1) * c_dim1] = f12; + c[i + (j + 2) * c_dim1] = f13; + c[i + (j + 3) * c_dim1] = f14; + } + } + } + if (ujsec < jsec) + { + i4 = jj + jsec - 1; + for (j = jj + ujsec; j <= i4; ++j) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - + 257] * b[l + j * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - + 257] * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + } + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + } + } + } + } + } + } + free(t1); + return; + } + else if (rxstride == 1 && aystride == 1 && bxstride == 1) + { + if (GFC_DESCRIPTOR_RANK (a) != 1) + { + const GFC_COMPLEX_17 *restrict abase_x; + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 *restrict dest_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n] * bbase_y[n]; + dest_y[x] = s; + } + } + } + else + { + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n]; + dest[y*rystride] = s; + } + } + } + else if (GFC_DESCRIPTOR_RANK (a) == 1) + { + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n*bxstride]; + dest[y*rxstride] = s; + } + } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_17)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } + else + { + const GFC_COMPLEX_17 *restrict abase_x; + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 *restrict dest_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n*aystride] * bbase_y[n*bxstride]; + dest_y[x*rxstride] = s; + } + } + } +} +#undef POW3 +#undef min +#undef max + +#endif /* HAVE_AVX */ + +#ifdef HAVE_AVX2 +static void +matmul_c17_avx2 (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma"))); +static void +matmul_c17_avx2 (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) +{ + const GFC_COMPLEX_17 * restrict abase; + const GFC_COMPLEX_17 * restrict bbase; + GFC_COMPLEX_17 * restrict dest; + + index_type rxstride, rystride, axstride, aystride, bxstride, bystride; + index_type x, y, n, count, xcount, ycount; + + assert (GFC_DESCRIPTOR_RANK (a) == 2 + || GFC_DESCRIPTOR_RANK (b) == 2); + +/* C[xcount,ycount] = A[xcount, count] * B[count,ycount] + + Either A or B (but not both) can be rank 1: + + o One-dimensional argument A is implicitly treated as a row matrix + dimensioned [1,count], so xcount=1. + + o One-dimensional argument B is implicitly treated as a column matrix + dimensioned [count, 1], so ycount=1. +*/ + + if (retarray->base_addr == NULL) + { + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + } + else + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + + GFC_DIMENSION_SET(retarray->dim[1], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, + GFC_DESCRIPTOR_EXTENT(retarray,0)); + } + + retarray->base_addr + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_COMPLEX_17)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) + { + index_type ret_extent, arg_extent; + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 2 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + } + + + if (GFC_DESCRIPTOR_RANK (retarray) == 1) + { + /* One-dimensional result may be addressed in the code below + either as a row or a column matrix. We want both cases to + work. */ + rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0); + } + else + { + rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + rystride = GFC_DESCRIPTOR_STRIDE(retarray,1); + } + + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + /* Treat it as a a row matrix A[1,count]. */ + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = 1; + + xcount = 1; + count = GFC_DESCRIPTOR_EXTENT(a,0); + } + else + { + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = GFC_DESCRIPTOR_STRIDE(a,1); + + count = GFC_DESCRIPTOR_EXTENT(a,1); + xcount = GFC_DESCRIPTOR_EXTENT(a,0); + } + + if (count != GFC_DESCRIPTOR_EXTENT(b,0)) + { + if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0) + runtime_error ("Incorrect extent in argument B in MATMUL intrinsic " + "in dimension 1: is %ld, should be %ld", + (long int) GFC_DESCRIPTOR_EXTENT(b,0), (long int) count); + } + + if (GFC_DESCRIPTOR_RANK (b) == 1) + { + /* Treat it as a column matrix B[count,1] */ + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + + /* bystride should never be used for 1-dimensional b. + The value is only used for calculation of the + memory by the buffer. */ + bystride = 256; + ycount = 1; + } + else + { + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + bystride = GFC_DESCRIPTOR_STRIDE(b,1); + ycount = GFC_DESCRIPTOR_EXTENT(b,1); + } + + abase = a->base_addr; + bbase = b->base_addr; + dest = retarray->base_addr; + + /* Now that everything is set up, we perform the multiplication + itself. */ + +#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x))) +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#define max(a,b) ((a) >= (b) ? (a) : (b)) + + if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1) + && (bxstride == 1 || bystride == 1) + && (((float) xcount) * ((float) ycount) * ((float) count) + > POW3(blas_limit))) + { + const int m = xcount, n = ycount, k = count, ldc = rystride; + const GFC_COMPLEX_17 one = 1, zero = 0; + const int lda = (axstride == 1) ? aystride : axstride, + ldb = (bxstride == 1) ? bystride : bxstride; + + if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1) + { + assert (gemm != NULL); + const char *transa, *transb; + if (try_blas & 2) + transa = "C"; + else + transa = axstride == 1 ? "N" : "T"; + + if (try_blas & 4) + transb = "C"; + else + transb = bxstride == 1 ? "N" : "T"; + + gemm (transa, transb , &m, + &n, &k, &one, abase, &lda, bbase, &ldb, &zero, dest, + &ldc, 1, 1); + return; + } + } + + if (rxstride == 1 && axstride == 1 && bxstride == 1 + && GFC_DESCRIPTOR_RANK (b) != 1) + { + /* This block of code implements a tuned matmul, derived from + Superscalar GEMM-based level 3 BLAS, Beta version 0.1 + + Bo Kagstrom and Per Ling + Department of Computing Science + Umea University + S-901 87 Umea, Sweden + + from netlib.org, translated to C, and modified for matmul.m4. */ + + const GFC_COMPLEX_17 *a, *b; + GFC_COMPLEX_17 *c; + const index_type m = xcount, n = ycount, k = count; + + /* System generated locals */ + index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, + i1, i2, i3, i4, i5, i6; + + /* Local variables */ + GFC_COMPLEX_17 f11, f12, f21, f22, f31, f32, f41, f42, + f13, f14, f23, f24, f33, f34, f43, f44; + index_type i, j, l, ii, jj, ll; + index_type isec, jsec, lsec, uisec, ujsec, ulsec; + GFC_COMPLEX_17 *t1; + + a = abase; + b = bbase; + c = retarray->base_addr; + + /* Parameter adjustments */ + c_dim1 = rystride; + c_offset = 1 + c_dim1; + c -= c_offset; + a_dim1 = aystride; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = bystride; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_17)0; + + /* Early exit if possible */ + if (m == 0 || n == 0 || k == 0) + return; + + /* Adjust size of t1 to what is needed. */ + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + + t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_17)); + + /* Start turning the crank. */ + i1 = n; + for (jj = 1; jj <= i1; jj += 512) + { + /* Computing MIN */ + i2 = 512; + i3 = n - jj + 1; + jsec = min(i2,i3); + ujsec = jsec - jsec % 4; + i2 = k; + for (ll = 1; ll <= i2; ll += 256) + { + /* Computing MIN */ + i3 = 256; + i4 = k - ll + 1; + lsec = min(i3,i4); + ulsec = lsec - lsec % 2; + + i3 = m; + for (ii = 1; ii <= i3; ii += 256) + { + /* Computing MIN */ + i4 = 256; + i5 = m - ii + 1; + isec = min(i4,i5); + uisec = isec - isec % 2; + i4 = ll + ulsec - 1; + for (l = ll; l <= i4; l += 2) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 2) + { + t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] = + a[i + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] = + a[i + (l + 1) * a_dim1]; + t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + (l + 1) * a_dim1]; + } + if (uisec < isec) + { + t1[l - ll + 1 + (isec << 8) - 257] = + a[ii + isec - 1 + l * a_dim1]; + t1[l - ll + 2 + (isec << 8) - 257] = + a[ii + isec - 1 + (l + 1) * a_dim1]; + } + } + if (ulsec < lsec) + { + i4 = ii + isec - 1; + for (i = ii; i<= i4; ++i) + { + t1[lsec + ((i - ii + 1) << 8) - 257] = + a[i + (ll + lsec - 1) * a_dim1]; + } + } + + uisec = isec - isec % 4; + i4 = jj + ujsec - 1; + for (j = jj; j <= i4; j += 4) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f22 = c[i + 1 + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f23 = c[i + 1 + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + f24 = c[i + 1 + (j + 3) * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + f32 = c[i + 2 + (j + 1) * c_dim1]; + f42 = c[i + 3 + (j + 1) * c_dim1]; + f33 = c[i + 2 + (j + 2) * c_dim1]; + f43 = c[i + 3 + (j + 2) * c_dim1]; + f34 = c[i + 2 + (j + 3) * c_dim1]; + f44 = c[i + 3 + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + j * b_dim1]; + f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + (j + 1) * c_dim1] = f12; + c[i + 1 + (j + 1) * c_dim1] = f22; + c[i + (j + 2) * c_dim1] = f13; + c[i + 1 + (j + 2) * c_dim1] = f23; + c[i + (j + 3) * c_dim1] = f14; + c[i + 1 + (j + 3) * c_dim1] = f24; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + c[i + 2 + (j + 1) * c_dim1] = f32; + c[i + 3 + (j + 1) * c_dim1] = f42; + c[i + 2 + (j + 2) * c_dim1] = f33; + c[i + 3 + (j + 2) * c_dim1] = f43; + c[i + 2 + (j + 3) * c_dim1] = f34; + c[i + 3 + (j + 3) * c_dim1] = f44; + } + if (uisec < isec) + { + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + (j + 1) * c_dim1] = f12; + c[i + (j + 2) * c_dim1] = f13; + c[i + (j + 3) * c_dim1] = f14; + } + } + } + if (ujsec < jsec) + { + i4 = jj + jsec - 1; + for (j = jj + ujsec; j <= i4; ++j) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - + 257] * b[l + j * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - + 257] * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + } + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + } + } + } + } + } + } + free(t1); + return; + } + else if (rxstride == 1 && aystride == 1 && bxstride == 1) + { + if (GFC_DESCRIPTOR_RANK (a) != 1) + { + const GFC_COMPLEX_17 *restrict abase_x; + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 *restrict dest_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n] * bbase_y[n]; + dest_y[x] = s; + } + } + } + else + { + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n]; + dest[y*rystride] = s; + } + } + } + else if (GFC_DESCRIPTOR_RANK (a) == 1) + { + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n*bxstride]; + dest[y*rxstride] = s; + } + } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_17)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } + else + { + const GFC_COMPLEX_17 *restrict abase_x; + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 *restrict dest_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n*aystride] * bbase_y[n*bxstride]; + dest_y[x*rxstride] = s; + } + } + } +} +#undef POW3 +#undef min +#undef max + +#endif /* HAVE_AVX2 */ + +#ifdef HAVE_AVX512F +static void +matmul_c17_avx512f (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) __attribute__((__target__("avx512f"))); +static void +matmul_c17_avx512f (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) +{ + const GFC_COMPLEX_17 * restrict abase; + const GFC_COMPLEX_17 * restrict bbase; + GFC_COMPLEX_17 * restrict dest; + + index_type rxstride, rystride, axstride, aystride, bxstride, bystride; + index_type x, y, n, count, xcount, ycount; + + assert (GFC_DESCRIPTOR_RANK (a) == 2 + || GFC_DESCRIPTOR_RANK (b) == 2); + +/* C[xcount,ycount] = A[xcount, count] * B[count,ycount] + + Either A or B (but not both) can be rank 1: + + o One-dimensional argument A is implicitly treated as a row matrix + dimensioned [1,count], so xcount=1. + + o One-dimensional argument B is implicitly treated as a column matrix + dimensioned [count, 1], so ycount=1. +*/ + + if (retarray->base_addr == NULL) + { + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + } + else + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + + GFC_DIMENSION_SET(retarray->dim[1], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, + GFC_DESCRIPTOR_EXTENT(retarray,0)); + } + + retarray->base_addr + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_COMPLEX_17)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) + { + index_type ret_extent, arg_extent; + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 2 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + } + + + if (GFC_DESCRIPTOR_RANK (retarray) == 1) + { + /* One-dimensional result may be addressed in the code below + either as a row or a column matrix. We want both cases to + work. */ + rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0); + } + else + { + rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + rystride = GFC_DESCRIPTOR_STRIDE(retarray,1); + } + + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + /* Treat it as a a row matrix A[1,count]. */ + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = 1; + + xcount = 1; + count = GFC_DESCRIPTOR_EXTENT(a,0); + } + else + { + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = GFC_DESCRIPTOR_STRIDE(a,1); + + count = GFC_DESCRIPTOR_EXTENT(a,1); + xcount = GFC_DESCRIPTOR_EXTENT(a,0); + } + + if (count != GFC_DESCRIPTOR_EXTENT(b,0)) + { + if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0) + runtime_error ("Incorrect extent in argument B in MATMUL intrinsic " + "in dimension 1: is %ld, should be %ld", + (long int) GFC_DESCRIPTOR_EXTENT(b,0), (long int) count); + } + + if (GFC_DESCRIPTOR_RANK (b) == 1) + { + /* Treat it as a column matrix B[count,1] */ + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + + /* bystride should never be used for 1-dimensional b. + The value is only used for calculation of the + memory by the buffer. */ + bystride = 256; + ycount = 1; + } + else + { + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + bystride = GFC_DESCRIPTOR_STRIDE(b,1); + ycount = GFC_DESCRIPTOR_EXTENT(b,1); + } + + abase = a->base_addr; + bbase = b->base_addr; + dest = retarray->base_addr; + + /* Now that everything is set up, we perform the multiplication + itself. */ + +#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x))) +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#define max(a,b) ((a) >= (b) ? (a) : (b)) + + if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1) + && (bxstride == 1 || bystride == 1) + && (((float) xcount) * ((float) ycount) * ((float) count) + > POW3(blas_limit))) + { + const int m = xcount, n = ycount, k = count, ldc = rystride; + const GFC_COMPLEX_17 one = 1, zero = 0; + const int lda = (axstride == 1) ? aystride : axstride, + ldb = (bxstride == 1) ? bystride : bxstride; + + if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1) + { + assert (gemm != NULL); + const char *transa, *transb; + if (try_blas & 2) + transa = "C"; + else + transa = axstride == 1 ? "N" : "T"; + + if (try_blas & 4) + transb = "C"; + else + transb = bxstride == 1 ? "N" : "T"; + + gemm (transa, transb , &m, + &n, &k, &one, abase, &lda, bbase, &ldb, &zero, dest, + &ldc, 1, 1); + return; + } + } + + if (rxstride == 1 && axstride == 1 && bxstride == 1 + && GFC_DESCRIPTOR_RANK (b) != 1) + { + /* This block of code implements a tuned matmul, derived from + Superscalar GEMM-based level 3 BLAS, Beta version 0.1 + + Bo Kagstrom and Per Ling + Department of Computing Science + Umea University + S-901 87 Umea, Sweden + + from netlib.org, translated to C, and modified for matmul.m4. */ + + const GFC_COMPLEX_17 *a, *b; + GFC_COMPLEX_17 *c; + const index_type m = xcount, n = ycount, k = count; + + /* System generated locals */ + index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, + i1, i2, i3, i4, i5, i6; + + /* Local variables */ + GFC_COMPLEX_17 f11, f12, f21, f22, f31, f32, f41, f42, + f13, f14, f23, f24, f33, f34, f43, f44; + index_type i, j, l, ii, jj, ll; + index_type isec, jsec, lsec, uisec, ujsec, ulsec; + GFC_COMPLEX_17 *t1; + + a = abase; + b = bbase; + c = retarray->base_addr; + + /* Parameter adjustments */ + c_dim1 = rystride; + c_offset = 1 + c_dim1; + c -= c_offset; + a_dim1 = aystride; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = bystride; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_17)0; + + /* Early exit if possible */ + if (m == 0 || n == 0 || k == 0) + return; + + /* Adjust size of t1 to what is needed. */ + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + + t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_17)); + + /* Start turning the crank. */ + i1 = n; + for (jj = 1; jj <= i1; jj += 512) + { + /* Computing MIN */ + i2 = 512; + i3 = n - jj + 1; + jsec = min(i2,i3); + ujsec = jsec - jsec % 4; + i2 = k; + for (ll = 1; ll <= i2; ll += 256) + { + /* Computing MIN */ + i3 = 256; + i4 = k - ll + 1; + lsec = min(i3,i4); + ulsec = lsec - lsec % 2; + + i3 = m; + for (ii = 1; ii <= i3; ii += 256) + { + /* Computing MIN */ + i4 = 256; + i5 = m - ii + 1; + isec = min(i4,i5); + uisec = isec - isec % 2; + i4 = ll + ulsec - 1; + for (l = ll; l <= i4; l += 2) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 2) + { + t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] = + a[i + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] = + a[i + (l + 1) * a_dim1]; + t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + (l + 1) * a_dim1]; + } + if (uisec < isec) + { + t1[l - ll + 1 + (isec << 8) - 257] = + a[ii + isec - 1 + l * a_dim1]; + t1[l - ll + 2 + (isec << 8) - 257] = + a[ii + isec - 1 + (l + 1) * a_dim1]; + } + } + if (ulsec < lsec) + { + i4 = ii + isec - 1; + for (i = ii; i<= i4; ++i) + { + t1[lsec + ((i - ii + 1) << 8) - 257] = + a[i + (ll + lsec - 1) * a_dim1]; + } + } + + uisec = isec - isec % 4; + i4 = jj + ujsec - 1; + for (j = jj; j <= i4; j += 4) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f22 = c[i + 1 + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f23 = c[i + 1 + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + f24 = c[i + 1 + (j + 3) * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + f32 = c[i + 2 + (j + 1) * c_dim1]; + f42 = c[i + 3 + (j + 1) * c_dim1]; + f33 = c[i + 2 + (j + 2) * c_dim1]; + f43 = c[i + 3 + (j + 2) * c_dim1]; + f34 = c[i + 2 + (j + 3) * c_dim1]; + f44 = c[i + 3 + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + j * b_dim1]; + f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + (j + 1) * c_dim1] = f12; + c[i + 1 + (j + 1) * c_dim1] = f22; + c[i + (j + 2) * c_dim1] = f13; + c[i + 1 + (j + 2) * c_dim1] = f23; + c[i + (j + 3) * c_dim1] = f14; + c[i + 1 + (j + 3) * c_dim1] = f24; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + c[i + 2 + (j + 1) * c_dim1] = f32; + c[i + 3 + (j + 1) * c_dim1] = f42; + c[i + 2 + (j + 2) * c_dim1] = f33; + c[i + 3 + (j + 2) * c_dim1] = f43; + c[i + 2 + (j + 3) * c_dim1] = f34; + c[i + 3 + (j + 3) * c_dim1] = f44; + } + if (uisec < isec) + { + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + (j + 1) * c_dim1] = f12; + c[i + (j + 2) * c_dim1] = f13; + c[i + (j + 3) * c_dim1] = f14; + } + } + } + if (ujsec < jsec) + { + i4 = jj + jsec - 1; + for (j = jj + ujsec; j <= i4; ++j) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - + 257] * b[l + j * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - + 257] * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + } + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + } + } + } + } + } + } + free(t1); + return; + } + else if (rxstride == 1 && aystride == 1 && bxstride == 1) + { + if (GFC_DESCRIPTOR_RANK (a) != 1) + { + const GFC_COMPLEX_17 *restrict abase_x; + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 *restrict dest_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n] * bbase_y[n]; + dest_y[x] = s; + } + } + } + else + { + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n]; + dest[y*rystride] = s; + } + } + } + else if (GFC_DESCRIPTOR_RANK (a) == 1) + { + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n*bxstride]; + dest[y*rxstride] = s; + } + } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_17)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } + else + { + const GFC_COMPLEX_17 *restrict abase_x; + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 *restrict dest_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n*aystride] * bbase_y[n*bxstride]; + dest_y[x*rxstride] = s; + } + } + } +} +#undef POW3 +#undef min +#undef max + +#endif /* HAVE_AVX512F */ + +/* AMD-specifix funtions with AVX128 and FMA3/FMA4. */ + +#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128) +void +matmul_c17_avx128_fma3 (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma"))); +internal_proto(matmul_c17_avx128_fma3); +#endif + +#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128) +void +matmul_c17_avx128_fma4 (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4"))); +internal_proto(matmul_c17_avx128_fma4); +#endif + +/* Function to fall back to if there is no special processor-specific version. */ +static void +matmul_c17_vanilla (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) +{ + const GFC_COMPLEX_17 * restrict abase; + const GFC_COMPLEX_17 * restrict bbase; + GFC_COMPLEX_17 * restrict dest; + + index_type rxstride, rystride, axstride, aystride, bxstride, bystride; + index_type x, y, n, count, xcount, ycount; + + assert (GFC_DESCRIPTOR_RANK (a) == 2 + || GFC_DESCRIPTOR_RANK (b) == 2); + +/* C[xcount,ycount] = A[xcount, count] * B[count,ycount] + + Either A or B (but not both) can be rank 1: + + o One-dimensional argument A is implicitly treated as a row matrix + dimensioned [1,count], so xcount=1. + + o One-dimensional argument B is implicitly treated as a column matrix + dimensioned [count, 1], so ycount=1. +*/ + + if (retarray->base_addr == NULL) + { + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + } + else + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + + GFC_DIMENSION_SET(retarray->dim[1], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, + GFC_DESCRIPTOR_EXTENT(retarray,0)); + } + + retarray->base_addr + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_COMPLEX_17)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) + { + index_type ret_extent, arg_extent; + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 2 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + } + + + if (GFC_DESCRIPTOR_RANK (retarray) == 1) + { + /* One-dimensional result may be addressed in the code below + either as a row or a column matrix. We want both cases to + work. */ + rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0); + } + else + { + rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + rystride = GFC_DESCRIPTOR_STRIDE(retarray,1); + } + + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + /* Treat it as a a row matrix A[1,count]. */ + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = 1; + + xcount = 1; + count = GFC_DESCRIPTOR_EXTENT(a,0); + } + else + { + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = GFC_DESCRIPTOR_STRIDE(a,1); + + count = GFC_DESCRIPTOR_EXTENT(a,1); + xcount = GFC_DESCRIPTOR_EXTENT(a,0); + } + + if (count != GFC_DESCRIPTOR_EXTENT(b,0)) + { + if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0) + runtime_error ("Incorrect extent in argument B in MATMUL intrinsic " + "in dimension 1: is %ld, should be %ld", + (long int) GFC_DESCRIPTOR_EXTENT(b,0), (long int) count); + } + + if (GFC_DESCRIPTOR_RANK (b) == 1) + { + /* Treat it as a column matrix B[count,1] */ + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + + /* bystride should never be used for 1-dimensional b. + The value is only used for calculation of the + memory by the buffer. */ + bystride = 256; + ycount = 1; + } + else + { + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + bystride = GFC_DESCRIPTOR_STRIDE(b,1); + ycount = GFC_DESCRIPTOR_EXTENT(b,1); + } + + abase = a->base_addr; + bbase = b->base_addr; + dest = retarray->base_addr; + + /* Now that everything is set up, we perform the multiplication + itself. */ + +#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x))) +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#define max(a,b) ((a) >= (b) ? (a) : (b)) + + if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1) + && (bxstride == 1 || bystride == 1) + && (((float) xcount) * ((float) ycount) * ((float) count) + > POW3(blas_limit))) + { + const int m = xcount, n = ycount, k = count, ldc = rystride; + const GFC_COMPLEX_17 one = 1, zero = 0; + const int lda = (axstride == 1) ? aystride : axstride, + ldb = (bxstride == 1) ? bystride : bxstride; + + if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1) + { + assert (gemm != NULL); + const char *transa, *transb; + if (try_blas & 2) + transa = "C"; + else + transa = axstride == 1 ? "N" : "T"; + + if (try_blas & 4) + transb = "C"; + else + transb = bxstride == 1 ? "N" : "T"; + + gemm (transa, transb , &m, + &n, &k, &one, abase, &lda, bbase, &ldb, &zero, dest, + &ldc, 1, 1); + return; + } + } + + if (rxstride == 1 && axstride == 1 && bxstride == 1 + && GFC_DESCRIPTOR_RANK (b) != 1) + { + /* This block of code implements a tuned matmul, derived from + Superscalar GEMM-based level 3 BLAS, Beta version 0.1 + + Bo Kagstrom and Per Ling + Department of Computing Science + Umea University + S-901 87 Umea, Sweden + + from netlib.org, translated to C, and modified for matmul.m4. */ + + const GFC_COMPLEX_17 *a, *b; + GFC_COMPLEX_17 *c; + const index_type m = xcount, n = ycount, k = count; + + /* System generated locals */ + index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, + i1, i2, i3, i4, i5, i6; + + /* Local variables */ + GFC_COMPLEX_17 f11, f12, f21, f22, f31, f32, f41, f42, + f13, f14, f23, f24, f33, f34, f43, f44; + index_type i, j, l, ii, jj, ll; + index_type isec, jsec, lsec, uisec, ujsec, ulsec; + GFC_COMPLEX_17 *t1; + + a = abase; + b = bbase; + c = retarray->base_addr; + + /* Parameter adjustments */ + c_dim1 = rystride; + c_offset = 1 + c_dim1; + c -= c_offset; + a_dim1 = aystride; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = bystride; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_17)0; + + /* Early exit if possible */ + if (m == 0 || n == 0 || k == 0) + return; + + /* Adjust size of t1 to what is needed. */ + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + + t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_17)); + + /* Start turning the crank. */ + i1 = n; + for (jj = 1; jj <= i1; jj += 512) + { + /* Computing MIN */ + i2 = 512; + i3 = n - jj + 1; + jsec = min(i2,i3); + ujsec = jsec - jsec % 4; + i2 = k; + for (ll = 1; ll <= i2; ll += 256) + { + /* Computing MIN */ + i3 = 256; + i4 = k - ll + 1; + lsec = min(i3,i4); + ulsec = lsec - lsec % 2; + + i3 = m; + for (ii = 1; ii <= i3; ii += 256) + { + /* Computing MIN */ + i4 = 256; + i5 = m - ii + 1; + isec = min(i4,i5); + uisec = isec - isec % 2; + i4 = ll + ulsec - 1; + for (l = ll; l <= i4; l += 2) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 2) + { + t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] = + a[i + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] = + a[i + (l + 1) * a_dim1]; + t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + (l + 1) * a_dim1]; + } + if (uisec < isec) + { + t1[l - ll + 1 + (isec << 8) - 257] = + a[ii + isec - 1 + l * a_dim1]; + t1[l - ll + 2 + (isec << 8) - 257] = + a[ii + isec - 1 + (l + 1) * a_dim1]; + } + } + if (ulsec < lsec) + { + i4 = ii + isec - 1; + for (i = ii; i<= i4; ++i) + { + t1[lsec + ((i - ii + 1) << 8) - 257] = + a[i + (ll + lsec - 1) * a_dim1]; + } + } + + uisec = isec - isec % 4; + i4 = jj + ujsec - 1; + for (j = jj; j <= i4; j += 4) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f22 = c[i + 1 + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f23 = c[i + 1 + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + f24 = c[i + 1 + (j + 3) * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + f32 = c[i + 2 + (j + 1) * c_dim1]; + f42 = c[i + 3 + (j + 1) * c_dim1]; + f33 = c[i + 2 + (j + 2) * c_dim1]; + f43 = c[i + 3 + (j + 2) * c_dim1]; + f34 = c[i + 2 + (j + 3) * c_dim1]; + f44 = c[i + 3 + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + j * b_dim1]; + f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + (j + 1) * c_dim1] = f12; + c[i + 1 + (j + 1) * c_dim1] = f22; + c[i + (j + 2) * c_dim1] = f13; + c[i + 1 + (j + 2) * c_dim1] = f23; + c[i + (j + 3) * c_dim1] = f14; + c[i + 1 + (j + 3) * c_dim1] = f24; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + c[i + 2 + (j + 1) * c_dim1] = f32; + c[i + 3 + (j + 1) * c_dim1] = f42; + c[i + 2 + (j + 2) * c_dim1] = f33; + c[i + 3 + (j + 2) * c_dim1] = f43; + c[i + 2 + (j + 3) * c_dim1] = f34; + c[i + 3 + (j + 3) * c_dim1] = f44; + } + if (uisec < isec) + { + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + (j + 1) * c_dim1] = f12; + c[i + (j + 2) * c_dim1] = f13; + c[i + (j + 3) * c_dim1] = f14; + } + } + } + if (ujsec < jsec) + { + i4 = jj + jsec - 1; + for (j = jj + ujsec; j <= i4; ++j) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - + 257] * b[l + j * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - + 257] * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + } + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + } + } + } + } + } + } + free(t1); + return; + } + else if (rxstride == 1 && aystride == 1 && bxstride == 1) + { + if (GFC_DESCRIPTOR_RANK (a) != 1) + { + const GFC_COMPLEX_17 *restrict abase_x; + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 *restrict dest_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n] * bbase_y[n]; + dest_y[x] = s; + } + } + } + else + { + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n]; + dest[y*rystride] = s; + } + } + } + else if (GFC_DESCRIPTOR_RANK (a) == 1) + { + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n*bxstride]; + dest[y*rxstride] = s; + } + } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_17)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } + else + { + const GFC_COMPLEX_17 *restrict abase_x; + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 *restrict dest_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n*aystride] * bbase_y[n*bxstride]; + dest_y[x*rxstride] = s; + } + } + } +} +#undef POW3 +#undef min +#undef max + + +/* Compiling main function, with selection code for the processor. */ + +/* Currently, this is i386 only. Adjust for other architectures. */ + +void matmul_c17 (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) +{ + static void (*matmul_p) (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm); + + void (*matmul_fn) (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm); + + matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED); + if (matmul_fn == NULL) + { + matmul_fn = matmul_c17_vanilla; + if (__builtin_cpu_is ("intel")) + { + /* Run down the available processors in order of preference. */ +#ifdef HAVE_AVX512F + if (__builtin_cpu_supports ("avx512f")) + { + matmul_fn = matmul_c17_avx512f; + goto store; + } + +#endif /* HAVE_AVX512F */ + +#ifdef HAVE_AVX2 + if (__builtin_cpu_supports ("avx2") + && __builtin_cpu_supports ("fma")) + { + matmul_fn = matmul_c17_avx2; + goto store; + } + +#endif + +#ifdef HAVE_AVX + if (__builtin_cpu_supports ("avx")) + { + matmul_fn = matmul_c17_avx; + goto store; + } +#endif /* HAVE_AVX */ + } + else if (__builtin_cpu_is ("amd")) + { +#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128) + if (__builtin_cpu_supports ("avx") + && __builtin_cpu_supports ("fma")) + { + matmul_fn = matmul_c17_avx128_fma3; + goto store; + } +#endif +#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128) + if (__builtin_cpu_supports ("avx") + && __builtin_cpu_supports ("fma4")) + { + matmul_fn = matmul_c17_avx128_fma4; + goto store; + } +#endif + + } + store: + __atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED); + } + + (*matmul_fn) (retarray, a, b, try_blas, blas_limit, gemm); +} + +#else /* Just the vanilla function. */ + +void +matmul_c17 (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) +{ + const GFC_COMPLEX_17 * restrict abase; + const GFC_COMPLEX_17 * restrict bbase; + GFC_COMPLEX_17 * restrict dest; + + index_type rxstride, rystride, axstride, aystride, bxstride, bystride; + index_type x, y, n, count, xcount, ycount; + + assert (GFC_DESCRIPTOR_RANK (a) == 2 + || GFC_DESCRIPTOR_RANK (b) == 2); + +/* C[xcount,ycount] = A[xcount, count] * B[count,ycount] + + Either A or B (but not both) can be rank 1: + + o One-dimensional argument A is implicitly treated as a row matrix + dimensioned [1,count], so xcount=1. + + o One-dimensional argument B is implicitly treated as a column matrix + dimensioned [count, 1], so ycount=1. +*/ + + if (retarray->base_addr == NULL) + { + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + } + else + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + + GFC_DIMENSION_SET(retarray->dim[1], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, + GFC_DESCRIPTOR_EXTENT(retarray,0)); + } + + retarray->base_addr + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_COMPLEX_17)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) + { + index_type ret_extent, arg_extent; + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 2 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + } + + + if (GFC_DESCRIPTOR_RANK (retarray) == 1) + { + /* One-dimensional result may be addressed in the code below + either as a row or a column matrix. We want both cases to + work. */ + rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0); + } + else + { + rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + rystride = GFC_DESCRIPTOR_STRIDE(retarray,1); + } + + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + /* Treat it as a a row matrix A[1,count]. */ + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = 1; + + xcount = 1; + count = GFC_DESCRIPTOR_EXTENT(a,0); + } + else + { + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = GFC_DESCRIPTOR_STRIDE(a,1); + + count = GFC_DESCRIPTOR_EXTENT(a,1); + xcount = GFC_DESCRIPTOR_EXTENT(a,0); + } + + if (count != GFC_DESCRIPTOR_EXTENT(b,0)) + { + if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0) + runtime_error ("Incorrect extent in argument B in MATMUL intrinsic " + "in dimension 1: is %ld, should be %ld", + (long int) GFC_DESCRIPTOR_EXTENT(b,0), (long int) count); + } + + if (GFC_DESCRIPTOR_RANK (b) == 1) + { + /* Treat it as a column matrix B[count,1] */ + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + + /* bystride should never be used for 1-dimensional b. + The value is only used for calculation of the + memory by the buffer. */ + bystride = 256; + ycount = 1; + } + else + { + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + bystride = GFC_DESCRIPTOR_STRIDE(b,1); + ycount = GFC_DESCRIPTOR_EXTENT(b,1); + } + + abase = a->base_addr; + bbase = b->base_addr; + dest = retarray->base_addr; + + /* Now that everything is set up, we perform the multiplication + itself. */ + +#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x))) +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#define max(a,b) ((a) >= (b) ? (a) : (b)) + + if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1) + && (bxstride == 1 || bystride == 1) + && (((float) xcount) * ((float) ycount) * ((float) count) + > POW3(blas_limit))) + { + const int m = xcount, n = ycount, k = count, ldc = rystride; + const GFC_COMPLEX_17 one = 1, zero = 0; + const int lda = (axstride == 1) ? aystride : axstride, + ldb = (bxstride == 1) ? bystride : bxstride; + + if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1) + { + assert (gemm != NULL); + const char *transa, *transb; + if (try_blas & 2) + transa = "C"; + else + transa = axstride == 1 ? "N" : "T"; + + if (try_blas & 4) + transb = "C"; + else + transb = bxstride == 1 ? "N" : "T"; + + gemm (transa, transb , &m, + &n, &k, &one, abase, &lda, bbase, &ldb, &zero, dest, + &ldc, 1, 1); + return; + } + } + + if (rxstride == 1 && axstride == 1 && bxstride == 1 + && GFC_DESCRIPTOR_RANK (b) != 1) + { + /* This block of code implements a tuned matmul, derived from + Superscalar GEMM-based level 3 BLAS, Beta version 0.1 + + Bo Kagstrom and Per Ling + Department of Computing Science + Umea University + S-901 87 Umea, Sweden + + from netlib.org, translated to C, and modified for matmul.m4. */ + + const GFC_COMPLEX_17 *a, *b; + GFC_COMPLEX_17 *c; + const index_type m = xcount, n = ycount, k = count; + + /* System generated locals */ + index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, + i1, i2, i3, i4, i5, i6; + + /* Local variables */ + GFC_COMPLEX_17 f11, f12, f21, f22, f31, f32, f41, f42, + f13, f14, f23, f24, f33, f34, f43, f44; + index_type i, j, l, ii, jj, ll; + index_type isec, jsec, lsec, uisec, ujsec, ulsec; + GFC_COMPLEX_17 *t1; + + a = abase; + b = bbase; + c = retarray->base_addr; + + /* Parameter adjustments */ + c_dim1 = rystride; + c_offset = 1 + c_dim1; + c -= c_offset; + a_dim1 = aystride; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = bystride; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_17)0; + + /* Early exit if possible */ + if (m == 0 || n == 0 || k == 0) + return; + + /* Adjust size of t1 to what is needed. */ + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + + t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_17)); + + /* Start turning the crank. */ + i1 = n; + for (jj = 1; jj <= i1; jj += 512) + { + /* Computing MIN */ + i2 = 512; + i3 = n - jj + 1; + jsec = min(i2,i3); + ujsec = jsec - jsec % 4; + i2 = k; + for (ll = 1; ll <= i2; ll += 256) + { + /* Computing MIN */ + i3 = 256; + i4 = k - ll + 1; + lsec = min(i3,i4); + ulsec = lsec - lsec % 2; + + i3 = m; + for (ii = 1; ii <= i3; ii += 256) + { + /* Computing MIN */ + i4 = 256; + i5 = m - ii + 1; + isec = min(i4,i5); + uisec = isec - isec % 2; + i4 = ll + ulsec - 1; + for (l = ll; l <= i4; l += 2) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 2) + { + t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] = + a[i + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] = + a[i + (l + 1) * a_dim1]; + t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + (l + 1) * a_dim1]; + } + if (uisec < isec) + { + t1[l - ll + 1 + (isec << 8) - 257] = + a[ii + isec - 1 + l * a_dim1]; + t1[l - ll + 2 + (isec << 8) - 257] = + a[ii + isec - 1 + (l + 1) * a_dim1]; + } + } + if (ulsec < lsec) + { + i4 = ii + isec - 1; + for (i = ii; i<= i4; ++i) + { + t1[lsec + ((i - ii + 1) << 8) - 257] = + a[i + (ll + lsec - 1) * a_dim1]; + } + } + + uisec = isec - isec % 4; + i4 = jj + ujsec - 1; + for (j = jj; j <= i4; j += 4) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f22 = c[i + 1 + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f23 = c[i + 1 + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + f24 = c[i + 1 + (j + 3) * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + f32 = c[i + 2 + (j + 1) * c_dim1]; + f42 = c[i + 3 + (j + 1) * c_dim1]; + f33 = c[i + 2 + (j + 2) * c_dim1]; + f43 = c[i + 3 + (j + 2) * c_dim1]; + f34 = c[i + 2 + (j + 3) * c_dim1]; + f44 = c[i + 3 + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + j * b_dim1]; + f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + (j + 1) * c_dim1] = f12; + c[i + 1 + (j + 1) * c_dim1] = f22; + c[i + (j + 2) * c_dim1] = f13; + c[i + 1 + (j + 2) * c_dim1] = f23; + c[i + (j + 3) * c_dim1] = f14; + c[i + 1 + (j + 3) * c_dim1] = f24; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + c[i + 2 + (j + 1) * c_dim1] = f32; + c[i + 3 + (j + 1) * c_dim1] = f42; + c[i + 2 + (j + 2) * c_dim1] = f33; + c[i + 3 + (j + 2) * c_dim1] = f43; + c[i + 2 + (j + 3) * c_dim1] = f34; + c[i + 3 + (j + 3) * c_dim1] = f44; + } + if (uisec < isec) + { + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + (j + 1) * c_dim1] = f12; + c[i + (j + 2) * c_dim1] = f13; + c[i + (j + 3) * c_dim1] = f14; + } + } + } + if (ujsec < jsec) + { + i4 = jj + jsec - 1; + for (j = jj + ujsec; j <= i4; ++j) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - + 257] * b[l + j * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - + 257] * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + } + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + } + } + } + } + } + } + free(t1); + return; + } + else if (rxstride == 1 && aystride == 1 && bxstride == 1) + { + if (GFC_DESCRIPTOR_RANK (a) != 1) + { + const GFC_COMPLEX_17 *restrict abase_x; + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 *restrict dest_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n] * bbase_y[n]; + dest_y[x] = s; + } + } + } + else + { + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n]; + dest[y*rystride] = s; + } + } + } + else if (GFC_DESCRIPTOR_RANK (a) == 1) + { + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n*bxstride]; + dest[y*rxstride] = s; + } + } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_17)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } + else + { + const GFC_COMPLEX_17 *restrict abase_x; + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 *restrict dest_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n*aystride] * bbase_y[n*bxstride]; + dest_y[x*rxstride] = s; + } + } + } +} +#undef POW3 +#undef min +#undef max + +#endif +#endif + diff --git a/libgfortran/generated/matmul_r17.c b/libgfortran/generated/matmul_r17.c new file mode 100644 index 00000000000..db7f05b524a --- /dev/null +++ b/libgfortran/generated/matmul_r17.c @@ -0,0 +1,3013 @@ +/* Implementation of the MATMUL intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> +#include <assert.h> + + +#if defined (HAVE_GFC_REAL_17) + +/* Prototype for the BLAS ?gemm subroutine, a pointer to which can be + passed to us by the front-end, in which case we call it for large + matrices. */ + +typedef void (*blas_call)(const char *, const char *, const int *, const int *, + const int *, const GFC_REAL_17 *, const GFC_REAL_17 *, + const int *, const GFC_REAL_17 *, const int *, + const GFC_REAL_17 *, GFC_REAL_17 *, const int *, + int, int); + +/* The order of loops is different in the case of plain matrix + multiplication C=MATMUL(A,B), and in the frequent special case where + the argument A is the temporary result of a TRANSPOSE intrinsic: + C=MATMUL(TRANSPOSE(A),B). Transposed temporaries are detected by + looking at their strides. + + The equivalent Fortran pseudo-code is: + + DIMENSION A(M,COUNT), B(COUNT,N), C(M,N) + IF (.NOT.IS_TRANSPOSED(A)) THEN + C = 0 + DO J=1,N + DO K=1,COUNT + DO I=1,M + C(I,J) = C(I,J)+A(I,K)*B(K,J) + ELSE + DO J=1,N + DO I=1,M + S = 0 + DO K=1,COUNT + S = S+A(I,K)*B(K,J) + C(I,J) = S + ENDIF +*/ + +/* If try_blas is set to a nonzero value, then the matmul function will + see if there is a way to perform the matrix multiplication by a call + to the BLAS gemm function. */ + +extern void matmul_r17 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm); +export_proto(matmul_r17); + +/* Put exhaustive list of possible architectures here here, ORed together. */ + +#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_AVX512F) + +#ifdef HAVE_AVX +static void +matmul_r17_avx (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) __attribute__((__target__("avx"))); +static void +matmul_r17_avx (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) +{ + const GFC_REAL_17 * restrict abase; + const GFC_REAL_17 * restrict bbase; + GFC_REAL_17 * restrict dest; + + index_type rxstride, rystride, axstride, aystride, bxstride, bystride; + index_type x, y, n, count, xcount, ycount; + + assert (GFC_DESCRIPTOR_RANK (a) == 2 + || GFC_DESCRIPTOR_RANK (b) == 2); + +/* C[xcount,ycount] = A[xcount, count] * B[count,ycount] + + Either A or B (but not both) can be rank 1: + + o One-dimensional argument A is implicitly treated as a row matrix + dimensioned [1,count], so xcount=1. + + o One-dimensional argument B is implicitly treated as a column matrix + dimensioned [count, 1], so ycount=1. +*/ + + if (retarray->base_addr == NULL) + { + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + } + else + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + + GFC_DIMENSION_SET(retarray->dim[1], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, + GFC_DESCRIPTOR_EXTENT(retarray,0)); + } + + retarray->base_addr + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_REAL_17)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) + { + index_type ret_extent, arg_extent; + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 2 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + } + + + if (GFC_DESCRIPTOR_RANK (retarray) == 1) + { + /* One-dimensional result may be addressed in the code below + either as a row or a column matrix. We want both cases to + work. */ + rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0); + } + else + { + rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + rystride = GFC_DESCRIPTOR_STRIDE(retarray,1); + } + + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + /* Treat it as a a row matrix A[1,count]. */ + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = 1; + + xcount = 1; + count = GFC_DESCRIPTOR_EXTENT(a,0); + } + else + { + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = GFC_DESCRIPTOR_STRIDE(a,1); + + count = GFC_DESCRIPTOR_EXTENT(a,1); + xcount = GFC_DESCRIPTOR_EXTENT(a,0); + } + + if (count != GFC_DESCRIPTOR_EXTENT(b,0)) + { + if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0) + runtime_error ("Incorrect extent in argument B in MATMUL intrinsic " + "in dimension 1: is %ld, should be %ld", + (long int) GFC_DESCRIPTOR_EXTENT(b,0), (long int) count); + } + + if (GFC_DESCRIPTOR_RANK (b) == 1) + { + /* Treat it as a column matrix B[count,1] */ + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + + /* bystride should never be used for 1-dimensional b. + The value is only used for calculation of the + memory by the buffer. */ + bystride = 256; + ycount = 1; + } + else + { + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + bystride = GFC_DESCRIPTOR_STRIDE(b,1); + ycount = GFC_DESCRIPTOR_EXTENT(b,1); + } + + abase = a->base_addr; + bbase = b->base_addr; + dest = retarray->base_addr; + + /* Now that everything is set up, we perform the multiplication + itself. */ + +#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x))) +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#define max(a,b) ((a) >= (b) ? (a) : (b)) + + if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1) + && (bxstride == 1 || bystride == 1) + && (((float) xcount) * ((float) ycount) * ((float) count) + > POW3(blas_limit))) + { + const int m = xcount, n = ycount, k = count, ldc = rystride; + const GFC_REAL_17 one = 1, zero = 0; + const int lda = (axstride == 1) ? aystride : axstride, + ldb = (bxstride == 1) ? bystride : bxstride; + + if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1) + { + assert (gemm != NULL); + const char *transa, *transb; + if (try_blas & 2) + transa = "C"; + else + transa = axstride == 1 ? "N" : "T"; + + if (try_blas & 4) + transb = "C"; + else + transb = bxstride == 1 ? "N" : "T"; + + gemm (transa, transb , &m, + &n, &k, &one, abase, &lda, bbase, &ldb, &zero, dest, + &ldc, 1, 1); + return; + } + } + + if (rxstride == 1 && axstride == 1 && bxstride == 1 + && GFC_DESCRIPTOR_RANK (b) != 1) + { + /* This block of code implements a tuned matmul, derived from + Superscalar GEMM-based level 3 BLAS, Beta version 0.1 + + Bo Kagstrom and Per Ling + Department of Computing Science + Umea University + S-901 87 Umea, Sweden + + from netlib.org, translated to C, and modified for matmul.m4. */ + + const GFC_REAL_17 *a, *b; + GFC_REAL_17 *c; + const index_type m = xcount, n = ycount, k = count; + + /* System generated locals */ + index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, + i1, i2, i3, i4, i5, i6; + + /* Local variables */ + GFC_REAL_17 f11, f12, f21, f22, f31, f32, f41, f42, + f13, f14, f23, f24, f33, f34, f43, f44; + index_type i, j, l, ii, jj, ll; + index_type isec, jsec, lsec, uisec, ujsec, ulsec; + GFC_REAL_17 *t1; + + a = abase; + b = bbase; + c = retarray->base_addr; + + /* Parameter adjustments */ + c_dim1 = rystride; + c_offset = 1 + c_dim1; + c -= c_offset; + a_dim1 = aystride; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = bystride; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_17)0; + + /* Early exit if possible */ + if (m == 0 || n == 0 || k == 0) + return; + + /* Adjust size of t1 to what is needed. */ + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + + t1 = malloc (t1_dim * sizeof(GFC_REAL_17)); + + /* Start turning the crank. */ + i1 = n; + for (jj = 1; jj <= i1; jj += 512) + { + /* Computing MIN */ + i2 = 512; + i3 = n - jj + 1; + jsec = min(i2,i3); + ujsec = jsec - jsec % 4; + i2 = k; + for (ll = 1; ll <= i2; ll += 256) + { + /* Computing MIN */ + i3 = 256; + i4 = k - ll + 1; + lsec = min(i3,i4); + ulsec = lsec - lsec % 2; + + i3 = m; + for (ii = 1; ii <= i3; ii += 256) + { + /* Computing MIN */ + i4 = 256; + i5 = m - ii + 1; + isec = min(i4,i5); + uisec = isec - isec % 2; + i4 = ll + ulsec - 1; + for (l = ll; l <= i4; l += 2) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 2) + { + t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] = + a[i + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] = + a[i + (l + 1) * a_dim1]; + t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + (l + 1) * a_dim1]; + } + if (uisec < isec) + { + t1[l - ll + 1 + (isec << 8) - 257] = + a[ii + isec - 1 + l * a_dim1]; + t1[l - ll + 2 + (isec << 8) - 257] = + a[ii + isec - 1 + (l + 1) * a_dim1]; + } + } + if (ulsec < lsec) + { + i4 = ii + isec - 1; + for (i = ii; i<= i4; ++i) + { + t1[lsec + ((i - ii + 1) << 8) - 257] = + a[i + (ll + lsec - 1) * a_dim1]; + } + } + + uisec = isec - isec % 4; + i4 = jj + ujsec - 1; + for (j = jj; j <= i4; j += 4) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f22 = c[i + 1 + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f23 = c[i + 1 + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + f24 = c[i + 1 + (j + 3) * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + f32 = c[i + 2 + (j + 1) * c_dim1]; + f42 = c[i + 3 + (j + 1) * c_dim1]; + f33 = c[i + 2 + (j + 2) * c_dim1]; + f43 = c[i + 3 + (j + 2) * c_dim1]; + f34 = c[i + 2 + (j + 3) * c_dim1]; + f44 = c[i + 3 + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + j * b_dim1]; + f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + (j + 1) * c_dim1] = f12; + c[i + 1 + (j + 1) * c_dim1] = f22; + c[i + (j + 2) * c_dim1] = f13; + c[i + 1 + (j + 2) * c_dim1] = f23; + c[i + (j + 3) * c_dim1] = f14; + c[i + 1 + (j + 3) * c_dim1] = f24; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + c[i + 2 + (j + 1) * c_dim1] = f32; + c[i + 3 + (j + 1) * c_dim1] = f42; + c[i + 2 + (j + 2) * c_dim1] = f33; + c[i + 3 + (j + 2) * c_dim1] = f43; + c[i + 2 + (j + 3) * c_dim1] = f34; + c[i + 3 + (j + 3) * c_dim1] = f44; + } + if (uisec < isec) + { + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + (j + 1) * c_dim1] = f12; + c[i + (j + 2) * c_dim1] = f13; + c[i + (j + 3) * c_dim1] = f14; + } + } + } + if (ujsec < jsec) + { + i4 = jj + jsec - 1; + for (j = jj + ujsec; j <= i4; ++j) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - + 257] * b[l + j * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - + 257] * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + } + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + } + } + } + } + } + } + free(t1); + return; + } + else if (rxstride == 1 && aystride == 1 && bxstride == 1) + { + if (GFC_DESCRIPTOR_RANK (a) != 1) + { + const GFC_REAL_17 *restrict abase_x; + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 *restrict dest_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n] * bbase_y[n]; + dest_y[x] = s; + } + } + } + else + { + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n]; + dest[y*rystride] = s; + } + } + } + else if (GFC_DESCRIPTOR_RANK (a) == 1) + { + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n*bxstride]; + dest[y*rxstride] = s; + } + } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_17)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } + else + { + const GFC_REAL_17 *restrict abase_x; + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 *restrict dest_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n*aystride] * bbase_y[n*bxstride]; + dest_y[x*rxstride] = s; + } + } + } +} +#undef POW3 +#undef min +#undef max + +#endif /* HAVE_AVX */ + +#ifdef HAVE_AVX2 +static void +matmul_r17_avx2 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma"))); +static void +matmul_r17_avx2 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) +{ + const GFC_REAL_17 * restrict abase; + const GFC_REAL_17 * restrict bbase; + GFC_REAL_17 * restrict dest; + + index_type rxstride, rystride, axstride, aystride, bxstride, bystride; + index_type x, y, n, count, xcount, ycount; + + assert (GFC_DESCRIPTOR_RANK (a) == 2 + || GFC_DESCRIPTOR_RANK (b) == 2); + +/* C[xcount,ycount] = A[xcount, count] * B[count,ycount] + + Either A or B (but not both) can be rank 1: + + o One-dimensional argument A is implicitly treated as a row matrix + dimensioned [1,count], so xcount=1. + + o One-dimensional argument B is implicitly treated as a column matrix + dimensioned [count, 1], so ycount=1. +*/ + + if (retarray->base_addr == NULL) + { + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + } + else + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + + GFC_DIMENSION_SET(retarray->dim[1], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, + GFC_DESCRIPTOR_EXTENT(retarray,0)); + } + + retarray->base_addr + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_REAL_17)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) + { + index_type ret_extent, arg_extent; + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 2 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + } + + + if (GFC_DESCRIPTOR_RANK (retarray) == 1) + { + /* One-dimensional result may be addressed in the code below + either as a row or a column matrix. We want both cases to + work. */ + rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0); + } + else + { + rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + rystride = GFC_DESCRIPTOR_STRIDE(retarray,1); + } + + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + /* Treat it as a a row matrix A[1,count]. */ + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = 1; + + xcount = 1; + count = GFC_DESCRIPTOR_EXTENT(a,0); + } + else + { + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = GFC_DESCRIPTOR_STRIDE(a,1); + + count = GFC_DESCRIPTOR_EXTENT(a,1); + xcount = GFC_DESCRIPTOR_EXTENT(a,0); + } + + if (count != GFC_DESCRIPTOR_EXTENT(b,0)) + { + if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0) + runtime_error ("Incorrect extent in argument B in MATMUL intrinsic " + "in dimension 1: is %ld, should be %ld", + (long int) GFC_DESCRIPTOR_EXTENT(b,0), (long int) count); + } + + if (GFC_DESCRIPTOR_RANK (b) == 1) + { + /* Treat it as a column matrix B[count,1] */ + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + + /* bystride should never be used for 1-dimensional b. + The value is only used for calculation of the + memory by the buffer. */ + bystride = 256; + ycount = 1; + } + else + { + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + bystride = GFC_DESCRIPTOR_STRIDE(b,1); + ycount = GFC_DESCRIPTOR_EXTENT(b,1); + } + + abase = a->base_addr; + bbase = b->base_addr; + dest = retarray->base_addr; + + /* Now that everything is set up, we perform the multiplication + itself. */ + +#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x))) +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#define max(a,b) ((a) >= (b) ? (a) : (b)) + + if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1) + && (bxstride == 1 || bystride == 1) + && (((float) xcount) * ((float) ycount) * ((float) count) + > POW3(blas_limit))) + { + const int m = xcount, n = ycount, k = count, ldc = rystride; + const GFC_REAL_17 one = 1, zero = 0; + const int lda = (axstride == 1) ? aystride : axstride, + ldb = (bxstride == 1) ? bystride : bxstride; + + if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1) + { + assert (gemm != NULL); + const char *transa, *transb; + if (try_blas & 2) + transa = "C"; + else + transa = axstride == 1 ? "N" : "T"; + + if (try_blas & 4) + transb = "C"; + else + transb = bxstride == 1 ? "N" : "T"; + + gemm (transa, transb , &m, + &n, &k, &one, abase, &lda, bbase, &ldb, &zero, dest, + &ldc, 1, 1); + return; + } + } + + if (rxstride == 1 && axstride == 1 && bxstride == 1 + && GFC_DESCRIPTOR_RANK (b) != 1) + { + /* This block of code implements a tuned matmul, derived from + Superscalar GEMM-based level 3 BLAS, Beta version 0.1 + + Bo Kagstrom and Per Ling + Department of Computing Science + Umea University + S-901 87 Umea, Sweden + + from netlib.org, translated to C, and modified for matmul.m4. */ + + const GFC_REAL_17 *a, *b; + GFC_REAL_17 *c; + const index_type m = xcount, n = ycount, k = count; + + /* System generated locals */ + index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, + i1, i2, i3, i4, i5, i6; + + /* Local variables */ + GFC_REAL_17 f11, f12, f21, f22, f31, f32, f41, f42, + f13, f14, f23, f24, f33, f34, f43, f44; + index_type i, j, l, ii, jj, ll; + index_type isec, jsec, lsec, uisec, ujsec, ulsec; + GFC_REAL_17 *t1; + + a = abase; + b = bbase; + c = retarray->base_addr; + + /* Parameter adjustments */ + c_dim1 = rystride; + c_offset = 1 + c_dim1; + c -= c_offset; + a_dim1 = aystride; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = bystride; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_17)0; + + /* Early exit if possible */ + if (m == 0 || n == 0 || k == 0) + return; + + /* Adjust size of t1 to what is needed. */ + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + + t1 = malloc (t1_dim * sizeof(GFC_REAL_17)); + + /* Start turning the crank. */ + i1 = n; + for (jj = 1; jj <= i1; jj += 512) + { + /* Computing MIN */ + i2 = 512; + i3 = n - jj + 1; + jsec = min(i2,i3); + ujsec = jsec - jsec % 4; + i2 = k; + for (ll = 1; ll <= i2; ll += 256) + { + /* Computing MIN */ + i3 = 256; + i4 = k - ll + 1; + lsec = min(i3,i4); + ulsec = lsec - lsec % 2; + + i3 = m; + for (ii = 1; ii <= i3; ii += 256) + { + /* Computing MIN */ + i4 = 256; + i5 = m - ii + 1; + isec = min(i4,i5); + uisec = isec - isec % 2; + i4 = ll + ulsec - 1; + for (l = ll; l <= i4; l += 2) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 2) + { + t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] = + a[i + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] = + a[i + (l + 1) * a_dim1]; + t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + (l + 1) * a_dim1]; + } + if (uisec < isec) + { + t1[l - ll + 1 + (isec << 8) - 257] = + a[ii + isec - 1 + l * a_dim1]; + t1[l - ll + 2 + (isec << 8) - 257] = + a[ii + isec - 1 + (l + 1) * a_dim1]; + } + } + if (ulsec < lsec) + { + i4 = ii + isec - 1; + for (i = ii; i<= i4; ++i) + { + t1[lsec + ((i - ii + 1) << 8) - 257] = + a[i + (ll + lsec - 1) * a_dim1]; + } + } + + uisec = isec - isec % 4; + i4 = jj + ujsec - 1; + for (j = jj; j <= i4; j += 4) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f22 = c[i + 1 + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f23 = c[i + 1 + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + f24 = c[i + 1 + (j + 3) * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + f32 = c[i + 2 + (j + 1) * c_dim1]; + f42 = c[i + 3 + (j + 1) * c_dim1]; + f33 = c[i + 2 + (j + 2) * c_dim1]; + f43 = c[i + 3 + (j + 2) * c_dim1]; + f34 = c[i + 2 + (j + 3) * c_dim1]; + f44 = c[i + 3 + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + j * b_dim1]; + f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + (j + 1) * c_dim1] = f12; + c[i + 1 + (j + 1) * c_dim1] = f22; + c[i + (j + 2) * c_dim1] = f13; + c[i + 1 + (j + 2) * c_dim1] = f23; + c[i + (j + 3) * c_dim1] = f14; + c[i + 1 + (j + 3) * c_dim1] = f24; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + c[i + 2 + (j + 1) * c_dim1] = f32; + c[i + 3 + (j + 1) * c_dim1] = f42; + c[i + 2 + (j + 2) * c_dim1] = f33; + c[i + 3 + (j + 2) * c_dim1] = f43; + c[i + 2 + (j + 3) * c_dim1] = f34; + c[i + 3 + (j + 3) * c_dim1] = f44; + } + if (uisec < isec) + { + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + (j + 1) * c_dim1] = f12; + c[i + (j + 2) * c_dim1] = f13; + c[i + (j + 3) * c_dim1] = f14; + } + } + } + if (ujsec < jsec) + { + i4 = jj + jsec - 1; + for (j = jj + ujsec; j <= i4; ++j) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - + 257] * b[l + j * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - + 257] * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + } + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + } + } + } + } + } + } + free(t1); + return; + } + else if (rxstride == 1 && aystride == 1 && bxstride == 1) + { + if (GFC_DESCRIPTOR_RANK (a) != 1) + { + const GFC_REAL_17 *restrict abase_x; + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 *restrict dest_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n] * bbase_y[n]; + dest_y[x] = s; + } + } + } + else + { + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n]; + dest[y*rystride] = s; + } + } + } + else if (GFC_DESCRIPTOR_RANK (a) == 1) + { + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n*bxstride]; + dest[y*rxstride] = s; + } + } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_17)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } + else + { + const GFC_REAL_17 *restrict abase_x; + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 *restrict dest_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n*aystride] * bbase_y[n*bxstride]; + dest_y[x*rxstride] = s; + } + } + } +} +#undef POW3 +#undef min +#undef max + +#endif /* HAVE_AVX2 */ + +#ifdef HAVE_AVX512F +static void +matmul_r17_avx512f (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) __attribute__((__target__("avx512f"))); +static void +matmul_r17_avx512f (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) +{ + const GFC_REAL_17 * restrict abase; + const GFC_REAL_17 * restrict bbase; + GFC_REAL_17 * restrict dest; + + index_type rxstride, rystride, axstride, aystride, bxstride, bystride; + index_type x, y, n, count, xcount, ycount; + + assert (GFC_DESCRIPTOR_RANK (a) == 2 + || GFC_DESCRIPTOR_RANK (b) == 2); + +/* C[xcount,ycount] = A[xcount, count] * B[count,ycount] + + Either A or B (but not both) can be rank 1: + + o One-dimensional argument A is implicitly treated as a row matrix + dimensioned [1,count], so xcount=1. + + o One-dimensional argument B is implicitly treated as a column matrix + dimensioned [count, 1], so ycount=1. +*/ + + if (retarray->base_addr == NULL) + { + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + } + else + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + + GFC_DIMENSION_SET(retarray->dim[1], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, + GFC_DESCRIPTOR_EXTENT(retarray,0)); + } + + retarray->base_addr + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_REAL_17)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) + { + index_type ret_extent, arg_extent; + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 2 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + } + + + if (GFC_DESCRIPTOR_RANK (retarray) == 1) + { + /* One-dimensional result may be addressed in the code below + either as a row or a column matrix. We want both cases to + work. */ + rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0); + } + else + { + rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + rystride = GFC_DESCRIPTOR_STRIDE(retarray,1); + } + + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + /* Treat it as a a row matrix A[1,count]. */ + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = 1; + + xcount = 1; + count = GFC_DESCRIPTOR_EXTENT(a,0); + } + else + { + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = GFC_DESCRIPTOR_STRIDE(a,1); + + count = GFC_DESCRIPTOR_EXTENT(a,1); + xcount = GFC_DESCRIPTOR_EXTENT(a,0); + } + + if (count != GFC_DESCRIPTOR_EXTENT(b,0)) + { + if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0) + runtime_error ("Incorrect extent in argument B in MATMUL intrinsic " + "in dimension 1: is %ld, should be %ld", + (long int) GFC_DESCRIPTOR_EXTENT(b,0), (long int) count); + } + + if (GFC_DESCRIPTOR_RANK (b) == 1) + { + /* Treat it as a column matrix B[count,1] */ + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + + /* bystride should never be used for 1-dimensional b. + The value is only used for calculation of the + memory by the buffer. */ + bystride = 256; + ycount = 1; + } + else + { + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + bystride = GFC_DESCRIPTOR_STRIDE(b,1); + ycount = GFC_DESCRIPTOR_EXTENT(b,1); + } + + abase = a->base_addr; + bbase = b->base_addr; + dest = retarray->base_addr; + + /* Now that everything is set up, we perform the multiplication + itself. */ + +#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x))) +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#define max(a,b) ((a) >= (b) ? (a) : (b)) + + if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1) + && (bxstride == 1 || bystride == 1) + && (((float) xcount) * ((float) ycount) * ((float) count) + > POW3(blas_limit))) + { + const int m = xcount, n = ycount, k = count, ldc = rystride; + const GFC_REAL_17 one = 1, zero = 0; + const int lda = (axstride == 1) ? aystride : axstride, + ldb = (bxstride == 1) ? bystride : bxstride; + + if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1) + { + assert (gemm != NULL); + const char *transa, *transb; + if (try_blas & 2) + transa = "C"; + else + transa = axstride == 1 ? "N" : "T"; + + if (try_blas & 4) + transb = "C"; + else + transb = bxstride == 1 ? "N" : "T"; + + gemm (transa, transb , &m, + &n, &k, &one, abase, &lda, bbase, &ldb, &zero, dest, + &ldc, 1, 1); + return; + } + } + + if (rxstride == 1 && axstride == 1 && bxstride == 1 + && GFC_DESCRIPTOR_RANK (b) != 1) + { + /* This block of code implements a tuned matmul, derived from + Superscalar GEMM-based level 3 BLAS, Beta version 0.1 + + Bo Kagstrom and Per Ling + Department of Computing Science + Umea University + S-901 87 Umea, Sweden + + from netlib.org, translated to C, and modified for matmul.m4. */ + + const GFC_REAL_17 *a, *b; + GFC_REAL_17 *c; + const index_type m = xcount, n = ycount, k = count; + + /* System generated locals */ + index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, + i1, i2, i3, i4, i5, i6; + + /* Local variables */ + GFC_REAL_17 f11, f12, f21, f22, f31, f32, f41, f42, + f13, f14, f23, f24, f33, f34, f43, f44; + index_type i, j, l, ii, jj, ll; + index_type isec, jsec, lsec, uisec, ujsec, ulsec; + GFC_REAL_17 *t1; + + a = abase; + b = bbase; + c = retarray->base_addr; + + /* Parameter adjustments */ + c_dim1 = rystride; + c_offset = 1 + c_dim1; + c -= c_offset; + a_dim1 = aystride; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = bystride; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_17)0; + + /* Early exit if possible */ + if (m == 0 || n == 0 || k == 0) + return; + + /* Adjust size of t1 to what is needed. */ + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + + t1 = malloc (t1_dim * sizeof(GFC_REAL_17)); + + /* Start turning the crank. */ + i1 = n; + for (jj = 1; jj <= i1; jj += 512) + { + /* Computing MIN */ + i2 = 512; + i3 = n - jj + 1; + jsec = min(i2,i3); + ujsec = jsec - jsec % 4; + i2 = k; + for (ll = 1; ll <= i2; ll += 256) + { + /* Computing MIN */ + i3 = 256; + i4 = k - ll + 1; + lsec = min(i3,i4); + ulsec = lsec - lsec % 2; + + i3 = m; + for (ii = 1; ii <= i3; ii += 256) + { + /* Computing MIN */ + i4 = 256; + i5 = m - ii + 1; + isec = min(i4,i5); + uisec = isec - isec % 2; + i4 = ll + ulsec - 1; + for (l = ll; l <= i4; l += 2) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 2) + { + t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] = + a[i + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] = + a[i + (l + 1) * a_dim1]; + t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + (l + 1) * a_dim1]; + } + if (uisec < isec) + { + t1[l - ll + 1 + (isec << 8) - 257] = + a[ii + isec - 1 + l * a_dim1]; + t1[l - ll + 2 + (isec << 8) - 257] = + a[ii + isec - 1 + (l + 1) * a_dim1]; + } + } + if (ulsec < lsec) + { + i4 = ii + isec - 1; + for (i = ii; i<= i4; ++i) + { + t1[lsec + ((i - ii + 1) << 8) - 257] = + a[i + (ll + lsec - 1) * a_dim1]; + } + } + + uisec = isec - isec % 4; + i4 = jj + ujsec - 1; + for (j = jj; j <= i4; j += 4) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f22 = c[i + 1 + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f23 = c[i + 1 + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + f24 = c[i + 1 + (j + 3) * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + f32 = c[i + 2 + (j + 1) * c_dim1]; + f42 = c[i + 3 + (j + 1) * c_dim1]; + f33 = c[i + 2 + (j + 2) * c_dim1]; + f43 = c[i + 3 + (j + 2) * c_dim1]; + f34 = c[i + 2 + (j + 3) * c_dim1]; + f44 = c[i + 3 + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + j * b_dim1]; + f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + (j + 1) * c_dim1] = f12; + c[i + 1 + (j + 1) * c_dim1] = f22; + c[i + (j + 2) * c_dim1] = f13; + c[i + 1 + (j + 2) * c_dim1] = f23; + c[i + (j + 3) * c_dim1] = f14; + c[i + 1 + (j + 3) * c_dim1] = f24; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + c[i + 2 + (j + 1) * c_dim1] = f32; + c[i + 3 + (j + 1) * c_dim1] = f42; + c[i + 2 + (j + 2) * c_dim1] = f33; + c[i + 3 + (j + 2) * c_dim1] = f43; + c[i + 2 + (j + 3) * c_dim1] = f34; + c[i + 3 + (j + 3) * c_dim1] = f44; + } + if (uisec < isec) + { + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + (j + 1) * c_dim1] = f12; + c[i + (j + 2) * c_dim1] = f13; + c[i + (j + 3) * c_dim1] = f14; + } + } + } + if (ujsec < jsec) + { + i4 = jj + jsec - 1; + for (j = jj + ujsec; j <= i4; ++j) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - + 257] * b[l + j * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - + 257] * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + } + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + } + } + } + } + } + } + free(t1); + return; + } + else if (rxstride == 1 && aystride == 1 && bxstride == 1) + { + if (GFC_DESCRIPTOR_RANK (a) != 1) + { + const GFC_REAL_17 *restrict abase_x; + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 *restrict dest_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n] * bbase_y[n]; + dest_y[x] = s; + } + } + } + else + { + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n]; + dest[y*rystride] = s; + } + } + } + else if (GFC_DESCRIPTOR_RANK (a) == 1) + { + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n*bxstride]; + dest[y*rxstride] = s; + } + } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_17)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } + else + { + const GFC_REAL_17 *restrict abase_x; + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 *restrict dest_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n*aystride] * bbase_y[n*bxstride]; + dest_y[x*rxstride] = s; + } + } + } +} +#undef POW3 +#undef min +#undef max + +#endif /* HAVE_AVX512F */ + +/* AMD-specifix funtions with AVX128 and FMA3/FMA4. */ + +#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128) +void +matmul_r17_avx128_fma3 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma"))); +internal_proto(matmul_r17_avx128_fma3); +#endif + +#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128) +void +matmul_r17_avx128_fma4 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4"))); +internal_proto(matmul_r17_avx128_fma4); +#endif + +/* Function to fall back to if there is no special processor-specific version. */ +static void +matmul_r17_vanilla (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) +{ + const GFC_REAL_17 * restrict abase; + const GFC_REAL_17 * restrict bbase; + GFC_REAL_17 * restrict dest; + + index_type rxstride, rystride, axstride, aystride, bxstride, bystride; + index_type x, y, n, count, xcount, ycount; + + assert (GFC_DESCRIPTOR_RANK (a) == 2 + || GFC_DESCRIPTOR_RANK (b) == 2); + +/* C[xcount,ycount] = A[xcount, count] * B[count,ycount] + + Either A or B (but not both) can be rank 1: + + o One-dimensional argument A is implicitly treated as a row matrix + dimensioned [1,count], so xcount=1. + + o One-dimensional argument B is implicitly treated as a column matrix + dimensioned [count, 1], so ycount=1. +*/ + + if (retarray->base_addr == NULL) + { + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + } + else + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + + GFC_DIMENSION_SET(retarray->dim[1], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, + GFC_DESCRIPTOR_EXTENT(retarray,0)); + } + + retarray->base_addr + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_REAL_17)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) + { + index_type ret_extent, arg_extent; + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 2 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + } + + + if (GFC_DESCRIPTOR_RANK (retarray) == 1) + { + /* One-dimensional result may be addressed in the code below + either as a row or a column matrix. We want both cases to + work. */ + rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0); + } + else + { + rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + rystride = GFC_DESCRIPTOR_STRIDE(retarray,1); + } + + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + /* Treat it as a a row matrix A[1,count]. */ + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = 1; + + xcount = 1; + count = GFC_DESCRIPTOR_EXTENT(a,0); + } + else + { + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = GFC_DESCRIPTOR_STRIDE(a,1); + + count = GFC_DESCRIPTOR_EXTENT(a,1); + xcount = GFC_DESCRIPTOR_EXTENT(a,0); + } + + if (count != GFC_DESCRIPTOR_EXTENT(b,0)) + { + if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0) + runtime_error ("Incorrect extent in argument B in MATMUL intrinsic " + "in dimension 1: is %ld, should be %ld", + (long int) GFC_DESCRIPTOR_EXTENT(b,0), (long int) count); + } + + if (GFC_DESCRIPTOR_RANK (b) == 1) + { + /* Treat it as a column matrix B[count,1] */ + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + + /* bystride should never be used for 1-dimensional b. + The value is only used for calculation of the + memory by the buffer. */ + bystride = 256; + ycount = 1; + } + else + { + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + bystride = GFC_DESCRIPTOR_STRIDE(b,1); + ycount = GFC_DESCRIPTOR_EXTENT(b,1); + } + + abase = a->base_addr; + bbase = b->base_addr; + dest = retarray->base_addr; + + /* Now that everything is set up, we perform the multiplication + itself. */ + +#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x))) +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#define max(a,b) ((a) >= (b) ? (a) : (b)) + + if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1) + && (bxstride == 1 || bystride == 1) + && (((float) xcount) * ((float) ycount) * ((float) count) + > POW3(blas_limit))) + { + const int m = xcount, n = ycount, k = count, ldc = rystride; + const GFC_REAL_17 one = 1, zero = 0; + const int lda = (axstride == 1) ? aystride : axstride, + ldb = (bxstride == 1) ? bystride : bxstride; + + if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1) + { + assert (gemm != NULL); + const char *transa, *transb; + if (try_blas & 2) + transa = "C"; + else + transa = axstride == 1 ? "N" : "T"; + + if (try_blas & 4) + transb = "C"; + else + transb = bxstride == 1 ? "N" : "T"; + + gemm (transa, transb , &m, + &n, &k, &one, abase, &lda, bbase, &ldb, &zero, dest, + &ldc, 1, 1); + return; + } + } + + if (rxstride == 1 && axstride == 1 && bxstride == 1 + && GFC_DESCRIPTOR_RANK (b) != 1) + { + /* This block of code implements a tuned matmul, derived from + Superscalar GEMM-based level 3 BLAS, Beta version 0.1 + + Bo Kagstrom and Per Ling + Department of Computing Science + Umea University + S-901 87 Umea, Sweden + + from netlib.org, translated to C, and modified for matmul.m4. */ + + const GFC_REAL_17 *a, *b; + GFC_REAL_17 *c; + const index_type m = xcount, n = ycount, k = count; + + /* System generated locals */ + index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, + i1, i2, i3, i4, i5, i6; + + /* Local variables */ + GFC_REAL_17 f11, f12, f21, f22, f31, f32, f41, f42, + f13, f14, f23, f24, f33, f34, f43, f44; + index_type i, j, l, ii, jj, ll; + index_type isec, jsec, lsec, uisec, ujsec, ulsec; + GFC_REAL_17 *t1; + + a = abase; + b = bbase; + c = retarray->base_addr; + + /* Parameter adjustments */ + c_dim1 = rystride; + c_offset = 1 + c_dim1; + c -= c_offset; + a_dim1 = aystride; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = bystride; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_17)0; + + /* Early exit if possible */ + if (m == 0 || n == 0 || k == 0) + return; + + /* Adjust size of t1 to what is needed. */ + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + + t1 = malloc (t1_dim * sizeof(GFC_REAL_17)); + + /* Start turning the crank. */ + i1 = n; + for (jj = 1; jj <= i1; jj += 512) + { + /* Computing MIN */ + i2 = 512; + i3 = n - jj + 1; + jsec = min(i2,i3); + ujsec = jsec - jsec % 4; + i2 = k; + for (ll = 1; ll <= i2; ll += 256) + { + /* Computing MIN */ + i3 = 256; + i4 = k - ll + 1; + lsec = min(i3,i4); + ulsec = lsec - lsec % 2; + + i3 = m; + for (ii = 1; ii <= i3; ii += 256) + { + /* Computing MIN */ + i4 = 256; + i5 = m - ii + 1; + isec = min(i4,i5); + uisec = isec - isec % 2; + i4 = ll + ulsec - 1; + for (l = ll; l <= i4; l += 2) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 2) + { + t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] = + a[i + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] = + a[i + (l + 1) * a_dim1]; + t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + (l + 1) * a_dim1]; + } + if (uisec < isec) + { + t1[l - ll + 1 + (isec << 8) - 257] = + a[ii + isec - 1 + l * a_dim1]; + t1[l - ll + 2 + (isec << 8) - 257] = + a[ii + isec - 1 + (l + 1) * a_dim1]; + } + } + if (ulsec < lsec) + { + i4 = ii + isec - 1; + for (i = ii; i<= i4; ++i) + { + t1[lsec + ((i - ii + 1) << 8) - 257] = + a[i + (ll + lsec - 1) * a_dim1]; + } + } + + uisec = isec - isec % 4; + i4 = jj + ujsec - 1; + for (j = jj; j <= i4; j += 4) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f22 = c[i + 1 + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f23 = c[i + 1 + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + f24 = c[i + 1 + (j + 3) * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + f32 = c[i + 2 + (j + 1) * c_dim1]; + f42 = c[i + 3 + (j + 1) * c_dim1]; + f33 = c[i + 2 + (j + 2) * c_dim1]; + f43 = c[i + 3 + (j + 2) * c_dim1]; + f34 = c[i + 2 + (j + 3) * c_dim1]; + f44 = c[i + 3 + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + j * b_dim1]; + f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + (j + 1) * c_dim1] = f12; + c[i + 1 + (j + 1) * c_dim1] = f22; + c[i + (j + 2) * c_dim1] = f13; + c[i + 1 + (j + 2) * c_dim1] = f23; + c[i + (j + 3) * c_dim1] = f14; + c[i + 1 + (j + 3) * c_dim1] = f24; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + c[i + 2 + (j + 1) * c_dim1] = f32; + c[i + 3 + (j + 1) * c_dim1] = f42; + c[i + 2 + (j + 2) * c_dim1] = f33; + c[i + 3 + (j + 2) * c_dim1] = f43; + c[i + 2 + (j + 3) * c_dim1] = f34; + c[i + 3 + (j + 3) * c_dim1] = f44; + } + if (uisec < isec) + { + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + (j + 1) * c_dim1] = f12; + c[i + (j + 2) * c_dim1] = f13; + c[i + (j + 3) * c_dim1] = f14; + } + } + } + if (ujsec < jsec) + { + i4 = jj + jsec - 1; + for (j = jj + ujsec; j <= i4; ++j) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - + 257] * b[l + j * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - + 257] * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + } + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + } + } + } + } + } + } + free(t1); + return; + } + else if (rxstride == 1 && aystride == 1 && bxstride == 1) + { + if (GFC_DESCRIPTOR_RANK (a) != 1) + { + const GFC_REAL_17 *restrict abase_x; + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 *restrict dest_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n] * bbase_y[n]; + dest_y[x] = s; + } + } + } + else + { + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n]; + dest[y*rystride] = s; + } + } + } + else if (GFC_DESCRIPTOR_RANK (a) == 1) + { + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n*bxstride]; + dest[y*rxstride] = s; + } + } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_17)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } + else + { + const GFC_REAL_17 *restrict abase_x; + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 *restrict dest_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n*aystride] * bbase_y[n*bxstride]; + dest_y[x*rxstride] = s; + } + } + } +} +#undef POW3 +#undef min +#undef max + + +/* Compiling main function, with selection code for the processor. */ + +/* Currently, this is i386 only. Adjust for other architectures. */ + +void matmul_r17 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) +{ + static void (*matmul_p) (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm); + + void (*matmul_fn) (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm); + + matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED); + if (matmul_fn == NULL) + { + matmul_fn = matmul_r17_vanilla; + if (__builtin_cpu_is ("intel")) + { + /* Run down the available processors in order of preference. */ +#ifdef HAVE_AVX512F + if (__builtin_cpu_supports ("avx512f")) + { + matmul_fn = matmul_r17_avx512f; + goto store; + } + +#endif /* HAVE_AVX512F */ + +#ifdef HAVE_AVX2 + if (__builtin_cpu_supports ("avx2") + && __builtin_cpu_supports ("fma")) + { + matmul_fn = matmul_r17_avx2; + goto store; + } + +#endif + +#ifdef HAVE_AVX + if (__builtin_cpu_supports ("avx")) + { + matmul_fn = matmul_r17_avx; + goto store; + } +#endif /* HAVE_AVX */ + } + else if (__builtin_cpu_is ("amd")) + { +#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128) + if (__builtin_cpu_supports ("avx") + && __builtin_cpu_supports ("fma")) + { + matmul_fn = matmul_r17_avx128_fma3; + goto store; + } +#endif +#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128) + if (__builtin_cpu_supports ("avx") + && __builtin_cpu_supports ("fma4")) + { + matmul_fn = matmul_r17_avx128_fma4; + goto store; + } +#endif + + } + store: + __atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED); + } + + (*matmul_fn) (retarray, a, b, try_blas, blas_limit, gemm); +} + +#else /* Just the vanilla function. */ + +void +matmul_r17 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) +{ + const GFC_REAL_17 * restrict abase; + const GFC_REAL_17 * restrict bbase; + GFC_REAL_17 * restrict dest; + + index_type rxstride, rystride, axstride, aystride, bxstride, bystride; + index_type x, y, n, count, xcount, ycount; + + assert (GFC_DESCRIPTOR_RANK (a) == 2 + || GFC_DESCRIPTOR_RANK (b) == 2); + +/* C[xcount,ycount] = A[xcount, count] * B[count,ycount] + + Either A or B (but not both) can be rank 1: + + o One-dimensional argument A is implicitly treated as a row matrix + dimensioned [1,count], so xcount=1. + + o One-dimensional argument B is implicitly treated as a column matrix + dimensioned [count, 1], so ycount=1. +*/ + + if (retarray->base_addr == NULL) + { + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + } + else + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + + GFC_DIMENSION_SET(retarray->dim[1], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, + GFC_DESCRIPTOR_EXTENT(retarray,0)); + } + + retarray->base_addr + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_REAL_17)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) + { + index_type ret_extent, arg_extent; + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 2 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + } + + + if (GFC_DESCRIPTOR_RANK (retarray) == 1) + { + /* One-dimensional result may be addressed in the code below + either as a row or a column matrix. We want both cases to + work. */ + rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0); + } + else + { + rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + rystride = GFC_DESCRIPTOR_STRIDE(retarray,1); + } + + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + /* Treat it as a a row matrix A[1,count]. */ + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = 1; + + xcount = 1; + count = GFC_DESCRIPTOR_EXTENT(a,0); + } + else + { + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = GFC_DESCRIPTOR_STRIDE(a,1); + + count = GFC_DESCRIPTOR_EXTENT(a,1); + xcount = GFC_DESCRIPTOR_EXTENT(a,0); + } + + if (count != GFC_DESCRIPTOR_EXTENT(b,0)) + { + if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0) + runtime_error ("Incorrect extent in argument B in MATMUL intrinsic " + "in dimension 1: is %ld, should be %ld", + (long int) GFC_DESCRIPTOR_EXTENT(b,0), (long int) count); + } + + if (GFC_DESCRIPTOR_RANK (b) == 1) + { + /* Treat it as a column matrix B[count,1] */ + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + + /* bystride should never be used for 1-dimensional b. + The value is only used for calculation of the + memory by the buffer. */ + bystride = 256; + ycount = 1; + } + else + { + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + bystride = GFC_DESCRIPTOR_STRIDE(b,1); + ycount = GFC_DESCRIPTOR_EXTENT(b,1); + } + + abase = a->base_addr; + bbase = b->base_addr; + dest = retarray->base_addr; + + /* Now that everything is set up, we perform the multiplication + itself. */ + +#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x))) +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#define max(a,b) ((a) >= (b) ? (a) : (b)) + + if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1) + && (bxstride == 1 || bystride == 1) + && (((float) xcount) * ((float) ycount) * ((float) count) + > POW3(blas_limit))) + { + const int m = xcount, n = ycount, k = count, ldc = rystride; + const GFC_REAL_17 one = 1, zero = 0; + const int lda = (axstride == 1) ? aystride : axstride, + ldb = (bxstride == 1) ? bystride : bxstride; + + if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1) + { + assert (gemm != NULL); + const char *transa, *transb; + if (try_blas & 2) + transa = "C"; + else + transa = axstride == 1 ? "N" : "T"; + + if (try_blas & 4) + transb = "C"; + else + transb = bxstride == 1 ? "N" : "T"; + + gemm (transa, transb , &m, + &n, &k, &one, abase, &lda, bbase, &ldb, &zero, dest, + &ldc, 1, 1); + return; + } + } + + if (rxstride == 1 && axstride == 1 && bxstride == 1 + && GFC_DESCRIPTOR_RANK (b) != 1) + { + /* This block of code implements a tuned matmul, derived from + Superscalar GEMM-based level 3 BLAS, Beta version 0.1 + + Bo Kagstrom and Per Ling + Department of Computing Science + Umea University + S-901 87 Umea, Sweden + + from netlib.org, translated to C, and modified for matmul.m4. */ + + const GFC_REAL_17 *a, *b; + GFC_REAL_17 *c; + const index_type m = xcount, n = ycount, k = count; + + /* System generated locals */ + index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, + i1, i2, i3, i4, i5, i6; + + /* Local variables */ + GFC_REAL_17 f11, f12, f21, f22, f31, f32, f41, f42, + f13, f14, f23, f24, f33, f34, f43, f44; + index_type i, j, l, ii, jj, ll; + index_type isec, jsec, lsec, uisec, ujsec, ulsec; + GFC_REAL_17 *t1; + + a = abase; + b = bbase; + c = retarray->base_addr; + + /* Parameter adjustments */ + c_dim1 = rystride; + c_offset = 1 + c_dim1; + c -= c_offset; + a_dim1 = aystride; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = bystride; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_17)0; + + /* Early exit if possible */ + if (m == 0 || n == 0 || k == 0) + return; + + /* Adjust size of t1 to what is needed. */ + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + + t1 = malloc (t1_dim * sizeof(GFC_REAL_17)); + + /* Start turning the crank. */ + i1 = n; + for (jj = 1; jj <= i1; jj += 512) + { + /* Computing MIN */ + i2 = 512; + i3 = n - jj + 1; + jsec = min(i2,i3); + ujsec = jsec - jsec % 4; + i2 = k; + for (ll = 1; ll <= i2; ll += 256) + { + /* Computing MIN */ + i3 = 256; + i4 = k - ll + 1; + lsec = min(i3,i4); + ulsec = lsec - lsec % 2; + + i3 = m; + for (ii = 1; ii <= i3; ii += 256) + { + /* Computing MIN */ + i4 = 256; + i5 = m - ii + 1; + isec = min(i4,i5); + uisec = isec - isec % 2; + i4 = ll + ulsec - 1; + for (l = ll; l <= i4; l += 2) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 2) + { + t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] = + a[i + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] = + a[i + (l + 1) * a_dim1]; + t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + (l + 1) * a_dim1]; + } + if (uisec < isec) + { + t1[l - ll + 1 + (isec << 8) - 257] = + a[ii + isec - 1 + l * a_dim1]; + t1[l - ll + 2 + (isec << 8) - 257] = + a[ii + isec - 1 + (l + 1) * a_dim1]; + } + } + if (ulsec < lsec) + { + i4 = ii + isec - 1; + for (i = ii; i<= i4; ++i) + { + t1[lsec + ((i - ii + 1) << 8) - 257] = + a[i + (ll + lsec - 1) * a_dim1]; + } + } + + uisec = isec - isec % 4; + i4 = jj + ujsec - 1; + for (j = jj; j <= i4; j += 4) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f22 = c[i + 1 + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f23 = c[i + 1 + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + f24 = c[i + 1 + (j + 3) * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + f32 = c[i + 2 + (j + 1) * c_dim1]; + f42 = c[i + 3 + (j + 1) * c_dim1]; + f33 = c[i + 2 + (j + 2) * c_dim1]; + f43 = c[i + 3 + (j + 2) * c_dim1]; + f34 = c[i + 2 + (j + 3) * c_dim1]; + f44 = c[i + 3 + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + j * b_dim1]; + f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + (j + 1) * c_dim1] = f12; + c[i + 1 + (j + 1) * c_dim1] = f22; + c[i + (j + 2) * c_dim1] = f13; + c[i + 1 + (j + 2) * c_dim1] = f23; + c[i + (j + 3) * c_dim1] = f14; + c[i + 1 + (j + 3) * c_dim1] = f24; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + c[i + 2 + (j + 1) * c_dim1] = f32; + c[i + 3 + (j + 1) * c_dim1] = f42; + c[i + 2 + (j + 2) * c_dim1] = f33; + c[i + 3 + (j + 2) * c_dim1] = f43; + c[i + 2 + (j + 3) * c_dim1] = f34; + c[i + 3 + (j + 3) * c_dim1] = f44; + } + if (uisec < isec) + { + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + (j + 1) * c_dim1] = f12; + c[i + (j + 2) * c_dim1] = f13; + c[i + (j + 3) * c_dim1] = f14; + } + } + } + if (ujsec < jsec) + { + i4 = jj + jsec - 1; + for (j = jj + ujsec; j <= i4; ++j) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - + 257] * b[l + j * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - + 257] * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + } + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + } + } + } + } + } + } + free(t1); + return; + } + else if (rxstride == 1 && aystride == 1 && bxstride == 1) + { + if (GFC_DESCRIPTOR_RANK (a) != 1) + { + const GFC_REAL_17 *restrict abase_x; + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 *restrict dest_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n] * bbase_y[n]; + dest_y[x] = s; + } + } + } + else + { + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n]; + dest[y*rystride] = s; + } + } + } + else if (GFC_DESCRIPTOR_RANK (a) == 1) + { + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n*bxstride]; + dest[y*rxstride] = s; + } + } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_17)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } + else + { + const GFC_REAL_17 *restrict abase_x; + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 *restrict dest_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n*aystride] * bbase_y[n*bxstride]; + dest_y[x*rxstride] = s; + } + } + } +} +#undef POW3 +#undef min +#undef max + +#endif +#endif + diff --git a/libgfortran/generated/matmulavx128_c17.c b/libgfortran/generated/matmulavx128_c17.c new file mode 100644 index 00000000000..79cfe5b4b2d --- /dev/null +++ b/libgfortran/generated/matmulavx128_c17.c @@ -0,0 +1,1186 @@ +/* Implementation of the MATMUL intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>. + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> +#include <assert.h> + + +/* These are the specific versions of matmul with -mprefer-avx128. */ + +#if defined (HAVE_GFC_COMPLEX_17) + +/* Prototype for the BLAS ?gemm subroutine, a pointer to which can be + passed to us by the front-end, in which case we call it for large + matrices. */ + +typedef void (*blas_call)(const char *, const char *, const int *, const int *, + const int *, const GFC_COMPLEX_17 *, const GFC_COMPLEX_17 *, + const int *, const GFC_COMPLEX_17 *, const int *, + const GFC_COMPLEX_17 *, GFC_COMPLEX_17 *, const int *, + int, int); + +#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128) +void +matmul_c17_avx128_fma3 (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma"))); +internal_proto(matmul_c17_avx128_fma3); +void +matmul_c17_avx128_fma3 (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) +{ + const GFC_COMPLEX_17 * restrict abase; + const GFC_COMPLEX_17 * restrict bbase; + GFC_COMPLEX_17 * restrict dest; + + index_type rxstride, rystride, axstride, aystride, bxstride, bystride; + index_type x, y, n, count, xcount, ycount; + + assert (GFC_DESCRIPTOR_RANK (a) == 2 + || GFC_DESCRIPTOR_RANK (b) == 2); + +/* C[xcount,ycount] = A[xcount, count] * B[count,ycount] + + Either A or B (but not both) can be rank 1: + + o One-dimensional argument A is implicitly treated as a row matrix + dimensioned [1,count], so xcount=1. + + o One-dimensional argument B is implicitly treated as a column matrix + dimensioned [count, 1], so ycount=1. +*/ + + if (retarray->base_addr == NULL) + { + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + } + else + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + + GFC_DIMENSION_SET(retarray->dim[1], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, + GFC_DESCRIPTOR_EXTENT(retarray,0)); + } + + retarray->base_addr + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_COMPLEX_17)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) + { + index_type ret_extent, arg_extent; + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 2 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + } + + + if (GFC_DESCRIPTOR_RANK (retarray) == 1) + { + /* One-dimensional result may be addressed in the code below + either as a row or a column matrix. We want both cases to + work. */ + rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0); + } + else + { + rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + rystride = GFC_DESCRIPTOR_STRIDE(retarray,1); + } + + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + /* Treat it as a a row matrix A[1,count]. */ + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = 1; + + xcount = 1; + count = GFC_DESCRIPTOR_EXTENT(a,0); + } + else + { + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = GFC_DESCRIPTOR_STRIDE(a,1); + + count = GFC_DESCRIPTOR_EXTENT(a,1); + xcount = GFC_DESCRIPTOR_EXTENT(a,0); + } + + if (count != GFC_DESCRIPTOR_EXTENT(b,0)) + { + if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0) + runtime_error ("Incorrect extent in argument B in MATMUL intrinsic " + "in dimension 1: is %ld, should be %ld", + (long int) GFC_DESCRIPTOR_EXTENT(b,0), (long int) count); + } + + if (GFC_DESCRIPTOR_RANK (b) == 1) + { + /* Treat it as a column matrix B[count,1] */ + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + + /* bystride should never be used for 1-dimensional b. + The value is only used for calculation of the + memory by the buffer. */ + bystride = 256; + ycount = 1; + } + else + { + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + bystride = GFC_DESCRIPTOR_STRIDE(b,1); + ycount = GFC_DESCRIPTOR_EXTENT(b,1); + } + + abase = a->base_addr; + bbase = b->base_addr; + dest = retarray->base_addr; + + /* Now that everything is set up, we perform the multiplication + itself. */ + +#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x))) +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#define max(a,b) ((a) >= (b) ? (a) : (b)) + + if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1) + && (bxstride == 1 || bystride == 1) + && (((float) xcount) * ((float) ycount) * ((float) count) + > POW3(blas_limit))) + { + const int m = xcount, n = ycount, k = count, ldc = rystride; + const GFC_COMPLEX_17 one = 1, zero = 0; + const int lda = (axstride == 1) ? aystride : axstride, + ldb = (bxstride == 1) ? bystride : bxstride; + + if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1) + { + assert (gemm != NULL); + const char *transa, *transb; + if (try_blas & 2) + transa = "C"; + else + transa = axstride == 1 ? "N" : "T"; + + if (try_blas & 4) + transb = "C"; + else + transb = bxstride == 1 ? "N" : "T"; + + gemm (transa, transb , &m, + &n, &k, &one, abase, &lda, bbase, &ldb, &zero, dest, + &ldc, 1, 1); + return; + } + } + + if (rxstride == 1 && axstride == 1 && bxstride == 1 + && GFC_DESCRIPTOR_RANK (b) != 1) + { + /* This block of code implements a tuned matmul, derived from + Superscalar GEMM-based level 3 BLAS, Beta version 0.1 + + Bo Kagstrom and Per Ling + Department of Computing Science + Umea University + S-901 87 Umea, Sweden + + from netlib.org, translated to C, and modified for matmul.m4. */ + + const GFC_COMPLEX_17 *a, *b; + GFC_COMPLEX_17 *c; + const index_type m = xcount, n = ycount, k = count; + + /* System generated locals */ + index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, + i1, i2, i3, i4, i5, i6; + + /* Local variables */ + GFC_COMPLEX_17 f11, f12, f21, f22, f31, f32, f41, f42, + f13, f14, f23, f24, f33, f34, f43, f44; + index_type i, j, l, ii, jj, ll; + index_type isec, jsec, lsec, uisec, ujsec, ulsec; + GFC_COMPLEX_17 *t1; + + a = abase; + b = bbase; + c = retarray->base_addr; + + /* Parameter adjustments */ + c_dim1 = rystride; + c_offset = 1 + c_dim1; + c -= c_offset; + a_dim1 = aystride; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = bystride; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_17)0; + + /* Early exit if possible */ + if (m == 0 || n == 0 || k == 0) + return; + + /* Adjust size of t1 to what is needed. */ + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + + t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_17)); + + /* Start turning the crank. */ + i1 = n; + for (jj = 1; jj <= i1; jj += 512) + { + /* Computing MIN */ + i2 = 512; + i3 = n - jj + 1; + jsec = min(i2,i3); + ujsec = jsec - jsec % 4; + i2 = k; + for (ll = 1; ll <= i2; ll += 256) + { + /* Computing MIN */ + i3 = 256; + i4 = k - ll + 1; + lsec = min(i3,i4); + ulsec = lsec - lsec % 2; + + i3 = m; + for (ii = 1; ii <= i3; ii += 256) + { + /* Computing MIN */ + i4 = 256; + i5 = m - ii + 1; + isec = min(i4,i5); + uisec = isec - isec % 2; + i4 = ll + ulsec - 1; + for (l = ll; l <= i4; l += 2) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 2) + { + t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] = + a[i + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] = + a[i + (l + 1) * a_dim1]; + t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + (l + 1) * a_dim1]; + } + if (uisec < isec) + { + t1[l - ll + 1 + (isec << 8) - 257] = + a[ii + isec - 1 + l * a_dim1]; + t1[l - ll + 2 + (isec << 8) - 257] = + a[ii + isec - 1 + (l + 1) * a_dim1]; + } + } + if (ulsec < lsec) + { + i4 = ii + isec - 1; + for (i = ii; i<= i4; ++i) + { + t1[lsec + ((i - ii + 1) << 8) - 257] = + a[i + (ll + lsec - 1) * a_dim1]; + } + } + + uisec = isec - isec % 4; + i4 = jj + ujsec - 1; + for (j = jj; j <= i4; j += 4) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f22 = c[i + 1 + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f23 = c[i + 1 + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + f24 = c[i + 1 + (j + 3) * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + f32 = c[i + 2 + (j + 1) * c_dim1]; + f42 = c[i + 3 + (j + 1) * c_dim1]; + f33 = c[i + 2 + (j + 2) * c_dim1]; + f43 = c[i + 3 + (j + 2) * c_dim1]; + f34 = c[i + 2 + (j + 3) * c_dim1]; + f44 = c[i + 3 + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + j * b_dim1]; + f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + (j + 1) * c_dim1] = f12; + c[i + 1 + (j + 1) * c_dim1] = f22; + c[i + (j + 2) * c_dim1] = f13; + c[i + 1 + (j + 2) * c_dim1] = f23; + c[i + (j + 3) * c_dim1] = f14; + c[i + 1 + (j + 3) * c_dim1] = f24; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + c[i + 2 + (j + 1) * c_dim1] = f32; + c[i + 3 + (j + 1) * c_dim1] = f42; + c[i + 2 + (j + 2) * c_dim1] = f33; + c[i + 3 + (j + 2) * c_dim1] = f43; + c[i + 2 + (j + 3) * c_dim1] = f34; + c[i + 3 + (j + 3) * c_dim1] = f44; + } + if (uisec < isec) + { + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + (j + 1) * c_dim1] = f12; + c[i + (j + 2) * c_dim1] = f13; + c[i + (j + 3) * c_dim1] = f14; + } + } + } + if (ujsec < jsec) + { + i4 = jj + jsec - 1; + for (j = jj + ujsec; j <= i4; ++j) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - + 257] * b[l + j * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - + 257] * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + } + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + } + } + } + } + } + } + free(t1); + return; + } + else if (rxstride == 1 && aystride == 1 && bxstride == 1) + { + if (GFC_DESCRIPTOR_RANK (a) != 1) + { + const GFC_COMPLEX_17 *restrict abase_x; + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 *restrict dest_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n] * bbase_y[n]; + dest_y[x] = s; + } + } + } + else + { + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n]; + dest[y*rystride] = s; + } + } + } + else if (GFC_DESCRIPTOR_RANK (a) == 1) + { + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n*bxstride]; + dest[y*rxstride] = s; + } + } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_17)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } + else + { + const GFC_COMPLEX_17 *restrict abase_x; + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 *restrict dest_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n*aystride] * bbase_y[n*bxstride]; + dest_y[x*rxstride] = s; + } + } + } +} +#undef POW3 +#undef min +#undef max + +#endif + +#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128) +void +matmul_c17_avx128_fma4 (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4"))); +internal_proto(matmul_c17_avx128_fma4); +void +matmul_c17_avx128_fma4 (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict a, gfc_array_c17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) +{ + const GFC_COMPLEX_17 * restrict abase; + const GFC_COMPLEX_17 * restrict bbase; + GFC_COMPLEX_17 * restrict dest; + + index_type rxstride, rystride, axstride, aystride, bxstride, bystride; + index_type x, y, n, count, xcount, ycount; + + assert (GFC_DESCRIPTOR_RANK (a) == 2 + || GFC_DESCRIPTOR_RANK (b) == 2); + +/* C[xcount,ycount] = A[xcount, count] * B[count,ycount] + + Either A or B (but not both) can be rank 1: + + o One-dimensional argument A is implicitly treated as a row matrix + dimensioned [1,count], so xcount=1. + + o One-dimensional argument B is implicitly treated as a column matrix + dimensioned [count, 1], so ycount=1. +*/ + + if (retarray->base_addr == NULL) + { + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + } + else + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + + GFC_DIMENSION_SET(retarray->dim[1], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, + GFC_DESCRIPTOR_EXTENT(retarray,0)); + } + + retarray->base_addr + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_COMPLEX_17)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) + { + index_type ret_extent, arg_extent; + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 2 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + } + + + if (GFC_DESCRIPTOR_RANK (retarray) == 1) + { + /* One-dimensional result may be addressed in the code below + either as a row or a column matrix. We want both cases to + work. */ + rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0); + } + else + { + rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + rystride = GFC_DESCRIPTOR_STRIDE(retarray,1); + } + + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + /* Treat it as a a row matrix A[1,count]. */ + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = 1; + + xcount = 1; + count = GFC_DESCRIPTOR_EXTENT(a,0); + } + else + { + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = GFC_DESCRIPTOR_STRIDE(a,1); + + count = GFC_DESCRIPTOR_EXTENT(a,1); + xcount = GFC_DESCRIPTOR_EXTENT(a,0); + } + + if (count != GFC_DESCRIPTOR_EXTENT(b,0)) + { + if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0) + runtime_error ("Incorrect extent in argument B in MATMUL intrinsic " + "in dimension 1: is %ld, should be %ld", + (long int) GFC_DESCRIPTOR_EXTENT(b,0), (long int) count); + } + + if (GFC_DESCRIPTOR_RANK (b) == 1) + { + /* Treat it as a column matrix B[count,1] */ + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + + /* bystride should never be used for 1-dimensional b. + The value is only used for calculation of the + memory by the buffer. */ + bystride = 256; + ycount = 1; + } + else + { + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + bystride = GFC_DESCRIPTOR_STRIDE(b,1); + ycount = GFC_DESCRIPTOR_EXTENT(b,1); + } + + abase = a->base_addr; + bbase = b->base_addr; + dest = retarray->base_addr; + + /* Now that everything is set up, we perform the multiplication + itself. */ + +#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x))) +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#define max(a,b) ((a) >= (b) ? (a) : (b)) + + if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1) + && (bxstride == 1 || bystride == 1) + && (((float) xcount) * ((float) ycount) * ((float) count) + > POW3(blas_limit))) + { + const int m = xcount, n = ycount, k = count, ldc = rystride; + const GFC_COMPLEX_17 one = 1, zero = 0; + const int lda = (axstride == 1) ? aystride : axstride, + ldb = (bxstride == 1) ? bystride : bxstride; + + if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1) + { + assert (gemm != NULL); + const char *transa, *transb; + if (try_blas & 2) + transa = "C"; + else + transa = axstride == 1 ? "N" : "T"; + + if (try_blas & 4) + transb = "C"; + else + transb = bxstride == 1 ? "N" : "T"; + + gemm (transa, transb , &m, + &n, &k, &one, abase, &lda, bbase, &ldb, &zero, dest, + &ldc, 1, 1); + return; + } + } + + if (rxstride == 1 && axstride == 1 && bxstride == 1 + && GFC_DESCRIPTOR_RANK (b) != 1) + { + /* This block of code implements a tuned matmul, derived from + Superscalar GEMM-based level 3 BLAS, Beta version 0.1 + + Bo Kagstrom and Per Ling + Department of Computing Science + Umea University + S-901 87 Umea, Sweden + + from netlib.org, translated to C, and modified for matmul.m4. */ + + const GFC_COMPLEX_17 *a, *b; + GFC_COMPLEX_17 *c; + const index_type m = xcount, n = ycount, k = count; + + /* System generated locals */ + index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, + i1, i2, i3, i4, i5, i6; + + /* Local variables */ + GFC_COMPLEX_17 f11, f12, f21, f22, f31, f32, f41, f42, + f13, f14, f23, f24, f33, f34, f43, f44; + index_type i, j, l, ii, jj, ll; + index_type isec, jsec, lsec, uisec, ujsec, ulsec; + GFC_COMPLEX_17 *t1; + + a = abase; + b = bbase; + c = retarray->base_addr; + + /* Parameter adjustments */ + c_dim1 = rystride; + c_offset = 1 + c_dim1; + c -= c_offset; + a_dim1 = aystride; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = bystride; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_17)0; + + /* Early exit if possible */ + if (m == 0 || n == 0 || k == 0) + return; + + /* Adjust size of t1 to what is needed. */ + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + + t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_17)); + + /* Start turning the crank. */ + i1 = n; + for (jj = 1; jj <= i1; jj += 512) + { + /* Computing MIN */ + i2 = 512; + i3 = n - jj + 1; + jsec = min(i2,i3); + ujsec = jsec - jsec % 4; + i2 = k; + for (ll = 1; ll <= i2; ll += 256) + { + /* Computing MIN */ + i3 = 256; + i4 = k - ll + 1; + lsec = min(i3,i4); + ulsec = lsec - lsec % 2; + + i3 = m; + for (ii = 1; ii <= i3; ii += 256) + { + /* Computing MIN */ + i4 = 256; + i5 = m - ii + 1; + isec = min(i4,i5); + uisec = isec - isec % 2; + i4 = ll + ulsec - 1; + for (l = ll; l <= i4; l += 2) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 2) + { + t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] = + a[i + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] = + a[i + (l + 1) * a_dim1]; + t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + (l + 1) * a_dim1]; + } + if (uisec < isec) + { + t1[l - ll + 1 + (isec << 8) - 257] = + a[ii + isec - 1 + l * a_dim1]; + t1[l - ll + 2 + (isec << 8) - 257] = + a[ii + isec - 1 + (l + 1) * a_dim1]; + } + } + if (ulsec < lsec) + { + i4 = ii + isec - 1; + for (i = ii; i<= i4; ++i) + { + t1[lsec + ((i - ii + 1) << 8) - 257] = + a[i + (ll + lsec - 1) * a_dim1]; + } + } + + uisec = isec - isec % 4; + i4 = jj + ujsec - 1; + for (j = jj; j <= i4; j += 4) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f22 = c[i + 1 + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f23 = c[i + 1 + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + f24 = c[i + 1 + (j + 3) * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + f32 = c[i + 2 + (j + 1) * c_dim1]; + f42 = c[i + 3 + (j + 1) * c_dim1]; + f33 = c[i + 2 + (j + 2) * c_dim1]; + f43 = c[i + 3 + (j + 2) * c_dim1]; + f34 = c[i + 2 + (j + 3) * c_dim1]; + f44 = c[i + 3 + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + j * b_dim1]; + f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + (j + 1) * c_dim1] = f12; + c[i + 1 + (j + 1) * c_dim1] = f22; + c[i + (j + 2) * c_dim1] = f13; + c[i + 1 + (j + 2) * c_dim1] = f23; + c[i + (j + 3) * c_dim1] = f14; + c[i + 1 + (j + 3) * c_dim1] = f24; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + c[i + 2 + (j + 1) * c_dim1] = f32; + c[i + 3 + (j + 1) * c_dim1] = f42; + c[i + 2 + (j + 2) * c_dim1] = f33; + c[i + 3 + (j + 2) * c_dim1] = f43; + c[i + 2 + (j + 3) * c_dim1] = f34; + c[i + 3 + (j + 3) * c_dim1] = f44; + } + if (uisec < isec) + { + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + (j + 1) * c_dim1] = f12; + c[i + (j + 2) * c_dim1] = f13; + c[i + (j + 3) * c_dim1] = f14; + } + } + } + if (ujsec < jsec) + { + i4 = jj + jsec - 1; + for (j = jj + ujsec; j <= i4; ++j) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - + 257] * b[l + j * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - + 257] * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + } + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + } + } + } + } + } + } + free(t1); + return; + } + else if (rxstride == 1 && aystride == 1 && bxstride == 1) + { + if (GFC_DESCRIPTOR_RANK (a) != 1) + { + const GFC_COMPLEX_17 *restrict abase_x; + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 *restrict dest_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n] * bbase_y[n]; + dest_y[x] = s; + } + } + } + else + { + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n]; + dest[y*rystride] = s; + } + } + } + else if (GFC_DESCRIPTOR_RANK (a) == 1) + { + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n*bxstride]; + dest[y*rxstride] = s; + } + } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_COMPLEX_17)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } + else + { + const GFC_COMPLEX_17 *restrict abase_x; + const GFC_COMPLEX_17 *restrict bbase_y; + GFC_COMPLEX_17 *restrict dest_y; + GFC_COMPLEX_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_COMPLEX_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n*aystride] * bbase_y[n*bxstride]; + dest_y[x*rxstride] = s; + } + } + } +} +#undef POW3 +#undef min +#undef max + +#endif + +#endif + diff --git a/libgfortran/generated/matmulavx128_r17.c b/libgfortran/generated/matmulavx128_r17.c new file mode 100644 index 00000000000..a3a04806a5c --- /dev/null +++ b/libgfortran/generated/matmulavx128_r17.c @@ -0,0 +1,1186 @@ +/* Implementation of the MATMUL intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>. + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> +#include <assert.h> + + +/* These are the specific versions of matmul with -mprefer-avx128. */ + +#if defined (HAVE_GFC_REAL_17) + +/* Prototype for the BLAS ?gemm subroutine, a pointer to which can be + passed to us by the front-end, in which case we call it for large + matrices. */ + +typedef void (*blas_call)(const char *, const char *, const int *, const int *, + const int *, const GFC_REAL_17 *, const GFC_REAL_17 *, + const int *, const GFC_REAL_17 *, const int *, + const GFC_REAL_17 *, GFC_REAL_17 *, const int *, + int, int); + +#if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128) +void +matmul_r17_avx128_fma3 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma"))); +internal_proto(matmul_r17_avx128_fma3); +void +matmul_r17_avx128_fma3 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) +{ + const GFC_REAL_17 * restrict abase; + const GFC_REAL_17 * restrict bbase; + GFC_REAL_17 * restrict dest; + + index_type rxstride, rystride, axstride, aystride, bxstride, bystride; + index_type x, y, n, count, xcount, ycount; + + assert (GFC_DESCRIPTOR_RANK (a) == 2 + || GFC_DESCRIPTOR_RANK (b) == 2); + +/* C[xcount,ycount] = A[xcount, count] * B[count,ycount] + + Either A or B (but not both) can be rank 1: + + o One-dimensional argument A is implicitly treated as a row matrix + dimensioned [1,count], so xcount=1. + + o One-dimensional argument B is implicitly treated as a column matrix + dimensioned [count, 1], so ycount=1. +*/ + + if (retarray->base_addr == NULL) + { + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + } + else + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + + GFC_DIMENSION_SET(retarray->dim[1], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, + GFC_DESCRIPTOR_EXTENT(retarray,0)); + } + + retarray->base_addr + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_REAL_17)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) + { + index_type ret_extent, arg_extent; + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 2 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + } + + + if (GFC_DESCRIPTOR_RANK (retarray) == 1) + { + /* One-dimensional result may be addressed in the code below + either as a row or a column matrix. We want both cases to + work. */ + rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0); + } + else + { + rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + rystride = GFC_DESCRIPTOR_STRIDE(retarray,1); + } + + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + /* Treat it as a a row matrix A[1,count]. */ + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = 1; + + xcount = 1; + count = GFC_DESCRIPTOR_EXTENT(a,0); + } + else + { + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = GFC_DESCRIPTOR_STRIDE(a,1); + + count = GFC_DESCRIPTOR_EXTENT(a,1); + xcount = GFC_DESCRIPTOR_EXTENT(a,0); + } + + if (count != GFC_DESCRIPTOR_EXTENT(b,0)) + { + if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0) + runtime_error ("Incorrect extent in argument B in MATMUL intrinsic " + "in dimension 1: is %ld, should be %ld", + (long int) GFC_DESCRIPTOR_EXTENT(b,0), (long int) count); + } + + if (GFC_DESCRIPTOR_RANK (b) == 1) + { + /* Treat it as a column matrix B[count,1] */ + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + + /* bystride should never be used for 1-dimensional b. + The value is only used for calculation of the + memory by the buffer. */ + bystride = 256; + ycount = 1; + } + else + { + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + bystride = GFC_DESCRIPTOR_STRIDE(b,1); + ycount = GFC_DESCRIPTOR_EXTENT(b,1); + } + + abase = a->base_addr; + bbase = b->base_addr; + dest = retarray->base_addr; + + /* Now that everything is set up, we perform the multiplication + itself. */ + +#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x))) +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#define max(a,b) ((a) >= (b) ? (a) : (b)) + + if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1) + && (bxstride == 1 || bystride == 1) + && (((float) xcount) * ((float) ycount) * ((float) count) + > POW3(blas_limit))) + { + const int m = xcount, n = ycount, k = count, ldc = rystride; + const GFC_REAL_17 one = 1, zero = 0; + const int lda = (axstride == 1) ? aystride : axstride, + ldb = (bxstride == 1) ? bystride : bxstride; + + if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1) + { + assert (gemm != NULL); + const char *transa, *transb; + if (try_blas & 2) + transa = "C"; + else + transa = axstride == 1 ? "N" : "T"; + + if (try_blas & 4) + transb = "C"; + else + transb = bxstride == 1 ? "N" : "T"; + + gemm (transa, transb , &m, + &n, &k, &one, abase, &lda, bbase, &ldb, &zero, dest, + &ldc, 1, 1); + return; + } + } + + if (rxstride == 1 && axstride == 1 && bxstride == 1 + && GFC_DESCRIPTOR_RANK (b) != 1) + { + /* This block of code implements a tuned matmul, derived from + Superscalar GEMM-based level 3 BLAS, Beta version 0.1 + + Bo Kagstrom and Per Ling + Department of Computing Science + Umea University + S-901 87 Umea, Sweden + + from netlib.org, translated to C, and modified for matmul.m4. */ + + const GFC_REAL_17 *a, *b; + GFC_REAL_17 *c; + const index_type m = xcount, n = ycount, k = count; + + /* System generated locals */ + index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, + i1, i2, i3, i4, i5, i6; + + /* Local variables */ + GFC_REAL_17 f11, f12, f21, f22, f31, f32, f41, f42, + f13, f14, f23, f24, f33, f34, f43, f44; + index_type i, j, l, ii, jj, ll; + index_type isec, jsec, lsec, uisec, ujsec, ulsec; + GFC_REAL_17 *t1; + + a = abase; + b = bbase; + c = retarray->base_addr; + + /* Parameter adjustments */ + c_dim1 = rystride; + c_offset = 1 + c_dim1; + c -= c_offset; + a_dim1 = aystride; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = bystride; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_17)0; + + /* Early exit if possible */ + if (m == 0 || n == 0 || k == 0) + return; + + /* Adjust size of t1 to what is needed. */ + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + + t1 = malloc (t1_dim * sizeof(GFC_REAL_17)); + + /* Start turning the crank. */ + i1 = n; + for (jj = 1; jj <= i1; jj += 512) + { + /* Computing MIN */ + i2 = 512; + i3 = n - jj + 1; + jsec = min(i2,i3); + ujsec = jsec - jsec % 4; + i2 = k; + for (ll = 1; ll <= i2; ll += 256) + { + /* Computing MIN */ + i3 = 256; + i4 = k - ll + 1; + lsec = min(i3,i4); + ulsec = lsec - lsec % 2; + + i3 = m; + for (ii = 1; ii <= i3; ii += 256) + { + /* Computing MIN */ + i4 = 256; + i5 = m - ii + 1; + isec = min(i4,i5); + uisec = isec - isec % 2; + i4 = ll + ulsec - 1; + for (l = ll; l <= i4; l += 2) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 2) + { + t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] = + a[i + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] = + a[i + (l + 1) * a_dim1]; + t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + (l + 1) * a_dim1]; + } + if (uisec < isec) + { + t1[l - ll + 1 + (isec << 8) - 257] = + a[ii + isec - 1 + l * a_dim1]; + t1[l - ll + 2 + (isec << 8) - 257] = + a[ii + isec - 1 + (l + 1) * a_dim1]; + } + } + if (ulsec < lsec) + { + i4 = ii + isec - 1; + for (i = ii; i<= i4; ++i) + { + t1[lsec + ((i - ii + 1) << 8) - 257] = + a[i + (ll + lsec - 1) * a_dim1]; + } + } + + uisec = isec - isec % 4; + i4 = jj + ujsec - 1; + for (j = jj; j <= i4; j += 4) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f22 = c[i + 1 + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f23 = c[i + 1 + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + f24 = c[i + 1 + (j + 3) * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + f32 = c[i + 2 + (j + 1) * c_dim1]; + f42 = c[i + 3 + (j + 1) * c_dim1]; + f33 = c[i + 2 + (j + 2) * c_dim1]; + f43 = c[i + 3 + (j + 2) * c_dim1]; + f34 = c[i + 2 + (j + 3) * c_dim1]; + f44 = c[i + 3 + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + j * b_dim1]; + f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + (j + 1) * c_dim1] = f12; + c[i + 1 + (j + 1) * c_dim1] = f22; + c[i + (j + 2) * c_dim1] = f13; + c[i + 1 + (j + 2) * c_dim1] = f23; + c[i + (j + 3) * c_dim1] = f14; + c[i + 1 + (j + 3) * c_dim1] = f24; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + c[i + 2 + (j + 1) * c_dim1] = f32; + c[i + 3 + (j + 1) * c_dim1] = f42; + c[i + 2 + (j + 2) * c_dim1] = f33; + c[i + 3 + (j + 2) * c_dim1] = f43; + c[i + 2 + (j + 3) * c_dim1] = f34; + c[i + 3 + (j + 3) * c_dim1] = f44; + } + if (uisec < isec) + { + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + (j + 1) * c_dim1] = f12; + c[i + (j + 2) * c_dim1] = f13; + c[i + (j + 3) * c_dim1] = f14; + } + } + } + if (ujsec < jsec) + { + i4 = jj + jsec - 1; + for (j = jj + ujsec; j <= i4; ++j) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - + 257] * b[l + j * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - + 257] * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + } + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + } + } + } + } + } + } + free(t1); + return; + } + else if (rxstride == 1 && aystride == 1 && bxstride == 1) + { + if (GFC_DESCRIPTOR_RANK (a) != 1) + { + const GFC_REAL_17 *restrict abase_x; + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 *restrict dest_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n] * bbase_y[n]; + dest_y[x] = s; + } + } + } + else + { + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n]; + dest[y*rystride] = s; + } + } + } + else if (GFC_DESCRIPTOR_RANK (a) == 1) + { + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n*bxstride]; + dest[y*rxstride] = s; + } + } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_17)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } + else + { + const GFC_REAL_17 *restrict abase_x; + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 *restrict dest_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n*aystride] * bbase_y[n*bxstride]; + dest_y[x*rxstride] = s; + } + } + } +} +#undef POW3 +#undef min +#undef max + +#endif + +#if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128) +void +matmul_r17_avx128_fma4 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4"))); +internal_proto(matmul_r17_avx128_fma4); +void +matmul_r17_avx128_fma4 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict a, gfc_array_r17 * const restrict b, int try_blas, + int blas_limit, blas_call gemm) +{ + const GFC_REAL_17 * restrict abase; + const GFC_REAL_17 * restrict bbase; + GFC_REAL_17 * restrict dest; + + index_type rxstride, rystride, axstride, aystride, bxstride, bystride; + index_type x, y, n, count, xcount, ycount; + + assert (GFC_DESCRIPTOR_RANK (a) == 2 + || GFC_DESCRIPTOR_RANK (b) == 2); + +/* C[xcount,ycount] = A[xcount, count] * B[count,ycount] + + Either A or B (but not both) can be rank 1: + + o One-dimensional argument A is implicitly treated as a row matrix + dimensioned [1,count], so xcount=1. + + o One-dimensional argument B is implicitly treated as a column matrix + dimensioned [count, 1], so ycount=1. +*/ + + if (retarray->base_addr == NULL) + { + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + } + else + { + GFC_DIMENSION_SET(retarray->dim[0], 0, + GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1); + + GFC_DIMENSION_SET(retarray->dim[1], 0, + GFC_DESCRIPTOR_EXTENT(b,1) - 1, + GFC_DESCRIPTOR_EXTENT(retarray,0)); + } + + retarray->base_addr + = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_REAL_17)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) + { + index_type ret_extent, arg_extent; + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else if (GFC_DESCRIPTOR_RANK (b) == 1) + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + else + { + arg_extent = GFC_DESCRIPTOR_EXTENT(a,0); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 1 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + + arg_extent = GFC_DESCRIPTOR_EXTENT(b,1); + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1); + if (arg_extent != ret_extent) + runtime_error ("Array bound mismatch for dimension 2 of " + "array (%ld/%ld) ", + (long int) ret_extent, (long int) arg_extent); + } + } + + + if (GFC_DESCRIPTOR_RANK (retarray) == 1) + { + /* One-dimensional result may be addressed in the code below + either as a row or a column matrix. We want both cases to + work. */ + rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0); + } + else + { + rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + rystride = GFC_DESCRIPTOR_STRIDE(retarray,1); + } + + + if (GFC_DESCRIPTOR_RANK (a) == 1) + { + /* Treat it as a a row matrix A[1,count]. */ + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = 1; + + xcount = 1; + count = GFC_DESCRIPTOR_EXTENT(a,0); + } + else + { + axstride = GFC_DESCRIPTOR_STRIDE(a,0); + aystride = GFC_DESCRIPTOR_STRIDE(a,1); + + count = GFC_DESCRIPTOR_EXTENT(a,1); + xcount = GFC_DESCRIPTOR_EXTENT(a,0); + } + + if (count != GFC_DESCRIPTOR_EXTENT(b,0)) + { + if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0) + runtime_error ("Incorrect extent in argument B in MATMUL intrinsic " + "in dimension 1: is %ld, should be %ld", + (long int) GFC_DESCRIPTOR_EXTENT(b,0), (long int) count); + } + + if (GFC_DESCRIPTOR_RANK (b) == 1) + { + /* Treat it as a column matrix B[count,1] */ + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + + /* bystride should never be used for 1-dimensional b. + The value is only used for calculation of the + memory by the buffer. */ + bystride = 256; + ycount = 1; + } + else + { + bxstride = GFC_DESCRIPTOR_STRIDE(b,0); + bystride = GFC_DESCRIPTOR_STRIDE(b,1); + ycount = GFC_DESCRIPTOR_EXTENT(b,1); + } + + abase = a->base_addr; + bbase = b->base_addr; + dest = retarray->base_addr; + + /* Now that everything is set up, we perform the multiplication + itself. */ + +#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x))) +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#define max(a,b) ((a) >= (b) ? (a) : (b)) + + if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1) + && (bxstride == 1 || bystride == 1) + && (((float) xcount) * ((float) ycount) * ((float) count) + > POW3(blas_limit))) + { + const int m = xcount, n = ycount, k = count, ldc = rystride; + const GFC_REAL_17 one = 1, zero = 0; + const int lda = (axstride == 1) ? aystride : axstride, + ldb = (bxstride == 1) ? bystride : bxstride; + + if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1) + { + assert (gemm != NULL); + const char *transa, *transb; + if (try_blas & 2) + transa = "C"; + else + transa = axstride == 1 ? "N" : "T"; + + if (try_blas & 4) + transb = "C"; + else + transb = bxstride == 1 ? "N" : "T"; + + gemm (transa, transb , &m, + &n, &k, &one, abase, &lda, bbase, &ldb, &zero, dest, + &ldc, 1, 1); + return; + } + } + + if (rxstride == 1 && axstride == 1 && bxstride == 1 + && GFC_DESCRIPTOR_RANK (b) != 1) + { + /* This block of code implements a tuned matmul, derived from + Superscalar GEMM-based level 3 BLAS, Beta version 0.1 + + Bo Kagstrom and Per Ling + Department of Computing Science + Umea University + S-901 87 Umea, Sweden + + from netlib.org, translated to C, and modified for matmul.m4. */ + + const GFC_REAL_17 *a, *b; + GFC_REAL_17 *c; + const index_type m = xcount, n = ycount, k = count; + + /* System generated locals */ + index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, + i1, i2, i3, i4, i5, i6; + + /* Local variables */ + GFC_REAL_17 f11, f12, f21, f22, f31, f32, f41, f42, + f13, f14, f23, f24, f33, f34, f43, f44; + index_type i, j, l, ii, jj, ll; + index_type isec, jsec, lsec, uisec, ujsec, ulsec; + GFC_REAL_17 *t1; + + a = abase; + b = bbase; + c = retarray->base_addr; + + /* Parameter adjustments */ + c_dim1 = rystride; + c_offset = 1 + c_dim1; + c -= c_offset; + a_dim1 = aystride; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = bystride; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_17)0; + + /* Early exit if possible */ + if (m == 0 || n == 0 || k == 0) + return; + + /* Adjust size of t1 to what is needed. */ + index_type t1_dim, a_sz; + if (aystride == 1) + a_sz = rystride; + else + a_sz = a_dim1; + + t1_dim = a_sz * 256 + b_dim1; + if (t1_dim > 65536) + t1_dim = 65536; + + t1 = malloc (t1_dim * sizeof(GFC_REAL_17)); + + /* Start turning the crank. */ + i1 = n; + for (jj = 1; jj <= i1; jj += 512) + { + /* Computing MIN */ + i2 = 512; + i3 = n - jj + 1; + jsec = min(i2,i3); + ujsec = jsec - jsec % 4; + i2 = k; + for (ll = 1; ll <= i2; ll += 256) + { + /* Computing MIN */ + i3 = 256; + i4 = k - ll + 1; + lsec = min(i3,i4); + ulsec = lsec - lsec % 2; + + i3 = m; + for (ii = 1; ii <= i3; ii += 256) + { + /* Computing MIN */ + i4 = 256; + i5 = m - ii + 1; + isec = min(i4,i5); + uisec = isec - isec % 2; + i4 = ll + ulsec - 1; + for (l = ll; l <= i4; l += 2) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 2) + { + t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] = + a[i + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] = + a[i + (l + 1) * a_dim1]; + t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + l * a_dim1]; + t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] = + a[i + 1 + (l + 1) * a_dim1]; + } + if (uisec < isec) + { + t1[l - ll + 1 + (isec << 8) - 257] = + a[ii + isec - 1 + l * a_dim1]; + t1[l - ll + 2 + (isec << 8) - 257] = + a[ii + isec - 1 + (l + 1) * a_dim1]; + } + } + if (ulsec < lsec) + { + i4 = ii + isec - 1; + for (i = ii; i<= i4; ++i) + { + t1[lsec + ((i - ii + 1) << 8) - 257] = + a[i + (ll + lsec - 1) * a_dim1]; + } + } + + uisec = isec - isec % 4; + i4 = jj + ujsec - 1; + for (j = jj; j <= i4; j += 4) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f22 = c[i + 1 + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f23 = c[i + 1 + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + f24 = c[i + 1 + (j + 3) * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + f32 = c[i + 2 + (j + 1) * c_dim1]; + f42 = c[i + 3 + (j + 1) * c_dim1]; + f33 = c[i + 2 + (j + 2) * c_dim1]; + f43 = c[i + 3 + (j + 2) * c_dim1]; + f34 = c[i + 2 + (j + 3) * c_dim1]; + f44 = c[i + 3 + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + j * b_dim1]; + f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 1) * b_dim1]; + f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 2) * b_dim1]; + f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257] + * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + (j + 1) * c_dim1] = f12; + c[i + 1 + (j + 1) * c_dim1] = f22; + c[i + (j + 2) * c_dim1] = f13; + c[i + 1 + (j + 2) * c_dim1] = f23; + c[i + (j + 3) * c_dim1] = f14; + c[i + 1 + (j + 3) * c_dim1] = f24; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + c[i + 2 + (j + 1) * c_dim1] = f32; + c[i + 3 + (j + 1) * c_dim1] = f42; + c[i + 2 + (j + 2) * c_dim1] = f33; + c[i + 3 + (j + 2) * c_dim1] = f43; + c[i + 2 + (j + 3) * c_dim1] = f34; + c[i + 3 + (j + 3) * c_dim1] = f44; + } + if (uisec < isec) + { + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + f12 = c[i + (j + 1) * c_dim1]; + f13 = c[i + (j + 2) * c_dim1]; + f14 = c[i + (j + 3) * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 1) * b_dim1]; + f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 2) * b_dim1]; + f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + (j + 3) * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + (j + 1) * c_dim1] = f12; + c[i + (j + 2) * c_dim1] = f13; + c[i + (j + 3) * c_dim1] = f14; + } + } + } + if (ujsec < jsec) + { + i4 = jj + jsec - 1; + for (j = jj + ujsec; j <= i4; ++j) + { + i5 = ii + uisec - 1; + for (i = ii; i <= i5; i += 4) + { + f11 = c[i + j * c_dim1]; + f21 = c[i + 1 + j * c_dim1]; + f31 = c[i + 2 + j * c_dim1]; + f41 = c[i + 3 + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - + 257] * b[l + j * b_dim1]; + f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - + 257] * b[l + j * b_dim1]; + f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + c[i + 1 + j * c_dim1] = f21; + c[i + 2 + j * c_dim1] = f31; + c[i + 3 + j * c_dim1] = f41; + } + i5 = ii + isec - 1; + for (i = ii + uisec; i <= i5; ++i) + { + f11 = c[i + j * c_dim1]; + i6 = ll + lsec - 1; + for (l = ll; l <= i6; ++l) + { + f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - + 257] * b[l + j * b_dim1]; + } + c[i + j * c_dim1] = f11; + } + } + } + } + } + } + free(t1); + return; + } + else if (rxstride == 1 && aystride == 1 && bxstride == 1) + { + if (GFC_DESCRIPTOR_RANK (a) != 1) + { + const GFC_REAL_17 *restrict abase_x; + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 *restrict dest_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n] * bbase_y[n]; + dest_y[x] = s; + } + } + } + else + { + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n]; + dest[y*rystride] = s; + } + } + } + else if (GFC_DESCRIPTOR_RANK (a) == 1) + { + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase[n*axstride] * bbase_y[n*bxstride]; + dest[y*rxstride] = s; + } + } + else if (axstride < aystride) + { + for (y = 0; y < ycount; y++) + for (x = 0; x < xcount; x++) + dest[x*rxstride + y*rystride] = (GFC_REAL_17)0; + + for (y = 0; y < ycount; y++) + for (n = 0; n < count; n++) + for (x = 0; x < xcount; x++) + /* dest[x,y] += a[x,n] * b[n,y] */ + dest[x*rxstride + y*rystride] += + abase[x*axstride + n*aystride] * + bbase[n*bxstride + y*bystride]; + } + else + { + const GFC_REAL_17 *restrict abase_x; + const GFC_REAL_17 *restrict bbase_y; + GFC_REAL_17 *restrict dest_y; + GFC_REAL_17 s; + + for (y = 0; y < ycount; y++) + { + bbase_y = &bbase[y*bystride]; + dest_y = &dest[y*rystride]; + for (x = 0; x < xcount; x++) + { + abase_x = &abase[x*axstride]; + s = (GFC_REAL_17) 0; + for (n = 0; n < count; n++) + s += abase_x[n*aystride] * bbase_y[n*bxstride]; + dest_y[x*rxstride] = s; + } + } + } +} +#undef POW3 +#undef min +#undef max + +#endif + +#endif + diff --git a/libgfortran/generated/maxloc0_16_r17.c b/libgfortran/generated/maxloc0_16_r17.c new file mode 100644 index 00000000000..0bd627c5303 --- /dev/null +++ b/libgfortran/generated/maxloc0_16_r17.c @@ -0,0 +1,408 @@ +/* Implementation of the MAXLOC intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <assert.h> + + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_16) + + +extern void maxloc0_16_r17 (gfc_array_i16 * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_LOGICAL_4); +export_proto(maxloc0_16_r17); + +void +maxloc0_16_r17 (gfc_array_i16 * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride; + const GFC_REAL_17 *base; + GFC_INTEGER_16 * restrict dest; + index_type rank; + index_type n; + + rank = GFC_DESCRIPTOR_RANK (array); + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { + if (unlikely (compile_options.bounds_check)) + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MAXLOC"); + } + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + count[n] = 0; + if (extent[n] <= 0) + { + /* Set the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + return; + } + } + + base = array->base_addr; + + /* Initialize the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 1; + { + + GFC_REAL_17 maxval; +#if defined(GFC_REAL_17_QUIET_NAN) + int fast = 0; +#endif + +#if defined(GFC_REAL_17_INFINITY) + maxval = -GFC_REAL_17_INFINITY; +#else + maxval = -GFC_REAL_17_HUGE; +#endif + while (base) + { + /* Implementation start. */ + +#if defined(GFC_REAL_17_QUIET_NAN) + if (unlikely (!fast)) + { + do + { + if (*base >= maxval) + { + fast = 1; + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + break; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + if (likely (fast)) + continue; + } + else +#endif + if (back) + do + { + if (unlikely (*base >= maxval)) + { + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + else + do + { + if (unlikely (*base > maxval)) + { + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + /* Implementation end. */ + /* Advance to the next element. */ + base += sstride[0]; + } + while (++count[0] != extent[0]); + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + } + } + while (count[n] == extent[n]); + } + } +} + +extern void mmaxloc0_16_r17 (gfc_array_i16 * const restrict, + gfc_array_r17 * const restrict, gfc_array_l1 * const restrict, + GFC_LOGICAL_4); +export_proto(mmaxloc0_16_r17); + +void +mmaxloc0_16_r17 (gfc_array_i16 * const restrict retarray, + gfc_array_r17 * const restrict array, + gfc_array_l1 * const restrict mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + index_type dstride; + GFC_INTEGER_16 *dest; + const GFC_REAL_17 *base; + GFC_LOGICAL_1 *mbase; + int rank; + index_type n; + int mask_kind; + + + if (mask == NULL) + { + maxloc0_16_r17 (retarray, array, back); + return; + } + + rank = GFC_DESCRIPTOR_RANK (array); + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { + if (unlikely (compile_options.bounds_check)) + { + + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MAXLOC"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "MAXLOC"); + } + } + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + mbase = mask->base_addr; + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + count[n] = 0; + if (extent[n] <= 0) + { + /* Set the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + return; + } + } + + base = array->base_addr; + + /* Initialize the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + { + + GFC_REAL_17 maxval; + int fast = 0; + +#if defined(GFC_REAL_17_INFINITY) + maxval = -GFC_REAL_17_INFINITY; +#else + maxval = -GFC_REAL_17_HUGE; +#endif + while (base) + { + /* Implementation start. */ + + if (unlikely (!fast)) + { + do + { + if (*mbase) + { +#if defined(GFC_REAL_17_QUIET_NAN) + if (unlikely (dest[0] == 0)) + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + if (*base >= maxval) +#endif + { + fast = 1; + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + break; + } + } + base += sstride[0]; + mbase += mstride[0]; + } + while (++count[0] != extent[0]); + if (likely (fast)) + continue; + } + else + if (back) + do + { + if (*mbase && *base >= maxval) + { + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + else + do + { + if (*mbase && unlikely (*base > maxval)) + { + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + /* Implementation end. */ + /* Advance to the next element. */ + base += sstride[0]; + mbase += mstride[0]; + } + while (++count[0] != extent[0]); + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + } + } + while (count[n] == extent[n]); + } + } +} + + +extern void smaxloc0_16_r17 (gfc_array_i16 * const restrict, + gfc_array_r17 * const restrict, GFC_LOGICAL_4 *, GFC_LOGICAL_4); +export_proto(smaxloc0_16_r17); + +void +smaxloc0_16_r17 (gfc_array_i16 * const restrict retarray, + gfc_array_r17 * const restrict array, + GFC_LOGICAL_4 * mask, GFC_LOGICAL_4 back) +{ + index_type rank; + index_type dstride; + index_type n; + GFC_INTEGER_16 *dest; + + if (mask == NULL || *mask) + { + maxloc0_16_r17 (retarray, array, back); + return; + } + + rank = GFC_DESCRIPTOR_RANK (array); + + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else if (unlikely (compile_options.bounds_check)) + { + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MAXLOC"); + } + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n<rank; n++) + dest[n * dstride] = 0 ; +} +#endif diff --git a/libgfortran/generated/maxloc0_4_r17.c b/libgfortran/generated/maxloc0_4_r17.c new file mode 100644 index 00000000000..cff16c77ce3 --- /dev/null +++ b/libgfortran/generated/maxloc0_4_r17.c @@ -0,0 +1,408 @@ +/* Implementation of the MAXLOC intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <assert.h> + + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_4) + + +extern void maxloc0_4_r17 (gfc_array_i4 * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_LOGICAL_4); +export_proto(maxloc0_4_r17); + +void +maxloc0_4_r17 (gfc_array_i4 * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride; + const GFC_REAL_17 *base; + GFC_INTEGER_4 * restrict dest; + index_type rank; + index_type n; + + rank = GFC_DESCRIPTOR_RANK (array); + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { + if (unlikely (compile_options.bounds_check)) + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MAXLOC"); + } + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + count[n] = 0; + if (extent[n] <= 0) + { + /* Set the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + return; + } + } + + base = array->base_addr; + + /* Initialize the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 1; + { + + GFC_REAL_17 maxval; +#if defined(GFC_REAL_17_QUIET_NAN) + int fast = 0; +#endif + +#if defined(GFC_REAL_17_INFINITY) + maxval = -GFC_REAL_17_INFINITY; +#else + maxval = -GFC_REAL_17_HUGE; +#endif + while (base) + { + /* Implementation start. */ + +#if defined(GFC_REAL_17_QUIET_NAN) + if (unlikely (!fast)) + { + do + { + if (*base >= maxval) + { + fast = 1; + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + break; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + if (likely (fast)) + continue; + } + else +#endif + if (back) + do + { + if (unlikely (*base >= maxval)) + { + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + else + do + { + if (unlikely (*base > maxval)) + { + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + /* Implementation end. */ + /* Advance to the next element. */ + base += sstride[0]; + } + while (++count[0] != extent[0]); + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + } + } + while (count[n] == extent[n]); + } + } +} + +extern void mmaxloc0_4_r17 (gfc_array_i4 * const restrict, + gfc_array_r17 * const restrict, gfc_array_l1 * const restrict, + GFC_LOGICAL_4); +export_proto(mmaxloc0_4_r17); + +void +mmaxloc0_4_r17 (gfc_array_i4 * const restrict retarray, + gfc_array_r17 * const restrict array, + gfc_array_l1 * const restrict mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + index_type dstride; + GFC_INTEGER_4 *dest; + const GFC_REAL_17 *base; + GFC_LOGICAL_1 *mbase; + int rank; + index_type n; + int mask_kind; + + + if (mask == NULL) + { + maxloc0_4_r17 (retarray, array, back); + return; + } + + rank = GFC_DESCRIPTOR_RANK (array); + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { + if (unlikely (compile_options.bounds_check)) + { + + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MAXLOC"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "MAXLOC"); + } + } + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + mbase = mask->base_addr; + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + count[n] = 0; + if (extent[n] <= 0) + { + /* Set the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + return; + } + } + + base = array->base_addr; + + /* Initialize the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + { + + GFC_REAL_17 maxval; + int fast = 0; + +#if defined(GFC_REAL_17_INFINITY) + maxval = -GFC_REAL_17_INFINITY; +#else + maxval = -GFC_REAL_17_HUGE; +#endif + while (base) + { + /* Implementation start. */ + + if (unlikely (!fast)) + { + do + { + if (*mbase) + { +#if defined(GFC_REAL_17_QUIET_NAN) + if (unlikely (dest[0] == 0)) + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + if (*base >= maxval) +#endif + { + fast = 1; + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + break; + } + } + base += sstride[0]; + mbase += mstride[0]; + } + while (++count[0] != extent[0]); + if (likely (fast)) + continue; + } + else + if (back) + do + { + if (*mbase && *base >= maxval) + { + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + else + do + { + if (*mbase && unlikely (*base > maxval)) + { + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + /* Implementation end. */ + /* Advance to the next element. */ + base += sstride[0]; + mbase += mstride[0]; + } + while (++count[0] != extent[0]); + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + } + } + while (count[n] == extent[n]); + } + } +} + + +extern void smaxloc0_4_r17 (gfc_array_i4 * const restrict, + gfc_array_r17 * const restrict, GFC_LOGICAL_4 *, GFC_LOGICAL_4); +export_proto(smaxloc0_4_r17); + +void +smaxloc0_4_r17 (gfc_array_i4 * const restrict retarray, + gfc_array_r17 * const restrict array, + GFC_LOGICAL_4 * mask, GFC_LOGICAL_4 back) +{ + index_type rank; + index_type dstride; + index_type n; + GFC_INTEGER_4 *dest; + + if (mask == NULL || *mask) + { + maxloc0_4_r17 (retarray, array, back); + return; + } + + rank = GFC_DESCRIPTOR_RANK (array); + + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else if (unlikely (compile_options.bounds_check)) + { + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MAXLOC"); + } + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n<rank; n++) + dest[n * dstride] = 0 ; +} +#endif diff --git a/libgfortran/generated/maxloc0_8_r17.c b/libgfortran/generated/maxloc0_8_r17.c new file mode 100644 index 00000000000..910ac655438 --- /dev/null +++ b/libgfortran/generated/maxloc0_8_r17.c @@ -0,0 +1,408 @@ +/* Implementation of the MAXLOC intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <assert.h> + + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_8) + + +extern void maxloc0_8_r17 (gfc_array_i8 * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_LOGICAL_4); +export_proto(maxloc0_8_r17); + +void +maxloc0_8_r17 (gfc_array_i8 * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride; + const GFC_REAL_17 *base; + GFC_INTEGER_8 * restrict dest; + index_type rank; + index_type n; + + rank = GFC_DESCRIPTOR_RANK (array); + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { + if (unlikely (compile_options.bounds_check)) + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MAXLOC"); + } + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + count[n] = 0; + if (extent[n] <= 0) + { + /* Set the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + return; + } + } + + base = array->base_addr; + + /* Initialize the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 1; + { + + GFC_REAL_17 maxval; +#if defined(GFC_REAL_17_QUIET_NAN) + int fast = 0; +#endif + +#if defined(GFC_REAL_17_INFINITY) + maxval = -GFC_REAL_17_INFINITY; +#else + maxval = -GFC_REAL_17_HUGE; +#endif + while (base) + { + /* Implementation start. */ + +#if defined(GFC_REAL_17_QUIET_NAN) + if (unlikely (!fast)) + { + do + { + if (*base >= maxval) + { + fast = 1; + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + break; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + if (likely (fast)) + continue; + } + else +#endif + if (back) + do + { + if (unlikely (*base >= maxval)) + { + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + else + do + { + if (unlikely (*base > maxval)) + { + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + /* Implementation end. */ + /* Advance to the next element. */ + base += sstride[0]; + } + while (++count[0] != extent[0]); + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + } + } + while (count[n] == extent[n]); + } + } +} + +extern void mmaxloc0_8_r17 (gfc_array_i8 * const restrict, + gfc_array_r17 * const restrict, gfc_array_l1 * const restrict, + GFC_LOGICAL_4); +export_proto(mmaxloc0_8_r17); + +void +mmaxloc0_8_r17 (gfc_array_i8 * const restrict retarray, + gfc_array_r17 * const restrict array, + gfc_array_l1 * const restrict mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + index_type dstride; + GFC_INTEGER_8 *dest; + const GFC_REAL_17 *base; + GFC_LOGICAL_1 *mbase; + int rank; + index_type n; + int mask_kind; + + + if (mask == NULL) + { + maxloc0_8_r17 (retarray, array, back); + return; + } + + rank = GFC_DESCRIPTOR_RANK (array); + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { + if (unlikely (compile_options.bounds_check)) + { + + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MAXLOC"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "MAXLOC"); + } + } + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + mbase = mask->base_addr; + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + count[n] = 0; + if (extent[n] <= 0) + { + /* Set the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + return; + } + } + + base = array->base_addr; + + /* Initialize the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + { + + GFC_REAL_17 maxval; + int fast = 0; + +#if defined(GFC_REAL_17_INFINITY) + maxval = -GFC_REAL_17_INFINITY; +#else + maxval = -GFC_REAL_17_HUGE; +#endif + while (base) + { + /* Implementation start. */ + + if (unlikely (!fast)) + { + do + { + if (*mbase) + { +#if defined(GFC_REAL_17_QUIET_NAN) + if (unlikely (dest[0] == 0)) + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + if (*base >= maxval) +#endif + { + fast = 1; + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + break; + } + } + base += sstride[0]; + mbase += mstride[0]; + } + while (++count[0] != extent[0]); + if (likely (fast)) + continue; + } + else + if (back) + do + { + if (*mbase && *base >= maxval) + { + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + else + do + { + if (*mbase && unlikely (*base > maxval)) + { + maxval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + /* Implementation end. */ + /* Advance to the next element. */ + base += sstride[0]; + mbase += mstride[0]; + } + while (++count[0] != extent[0]); + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + } + } + while (count[n] == extent[n]); + } + } +} + + +extern void smaxloc0_8_r17 (gfc_array_i8 * const restrict, + gfc_array_r17 * const restrict, GFC_LOGICAL_4 *, GFC_LOGICAL_4); +export_proto(smaxloc0_8_r17); + +void +smaxloc0_8_r17 (gfc_array_i8 * const restrict retarray, + gfc_array_r17 * const restrict array, + GFC_LOGICAL_4 * mask, GFC_LOGICAL_4 back) +{ + index_type rank; + index_type dstride; + index_type n; + GFC_INTEGER_8 *dest; + + if (mask == NULL || *mask) + { + maxloc0_8_r17 (retarray, array, back); + return; + } + + rank = GFC_DESCRIPTOR_RANK (array); + + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else if (unlikely (compile_options.bounds_check)) + { + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MAXLOC"); + } + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n<rank; n++) + dest[n * dstride] = 0 ; +} +#endif diff --git a/libgfortran/generated/maxloc1_16_r17.c b/libgfortran/generated/maxloc1_16_r17.c new file mode 100644 index 00000000000..31f06e24d58 --- /dev/null +++ b/libgfortran/generated/maxloc1_16_r17.c @@ -0,0 +1,607 @@ +/* Implementation of the MAXLOC intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <assert.h> + + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_16) + +#define HAVE_BACK_ARG 1 + + +extern void maxloc1_16_r17 (gfc_array_i16 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, GFC_LOGICAL_4 back); +export_proto(maxloc1_16_r17); + +void +maxloc1_16_r17 (gfc_array_i16 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + const GFC_REAL_17 * restrict base; + GFC_INTEGER_16 * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type delta; + index_type dim; + int continue_loop; + + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MAXLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " MAXLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "MAXLOC"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + base = array->base_addr; + dest = retarray->base_addr; + + continue_loop = 1; + while (continue_loop) + { + const GFC_REAL_17 * restrict src; + GFC_INTEGER_16 result; + src = base; + { + + GFC_REAL_17 maxval; +#if defined (GFC_REAL_17_INFINITY) + maxval = -GFC_REAL_17_INFINITY; +#else + maxval = -GFC_REAL_17_HUGE; +#endif + result = 1; + if (len <= 0) + *dest = 0; + else + { +#if ! defined HAVE_BACK_ARG + for (n = 0; n < len; n++, src += delta) + { +#endif + +#if defined (GFC_REAL_17_QUIET_NAN) + for (n = 0; n < len; n++, src += delta) + { + if (*src >= maxval) + { + maxval = *src; + result = (GFC_INTEGER_16)n + 1; + break; + } + } +#else + n = 0; +#endif + for (; n < len; n++, src += delta) + { + if (back ? *src >= maxval : *src > maxval) + { + maxval = *src; + result = (GFC_INTEGER_16)n + 1; + } + } + + *dest = result; + } + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + continue_loop = 0; + break; + } + else + { + count[n]++; + base += sstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void mmaxloc1_16_r17 (gfc_array_i16 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + gfc_array_l1 * const restrict, GFC_LOGICAL_4 back); +export_proto(mmaxloc1_16_r17); + +void +mmaxloc1_16_r17 (gfc_array_i16 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + gfc_array_l1 * const restrict mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_16 * restrict dest; + const GFC_REAL_17 * restrict base; + const GFC_LOGICAL_1 * restrict mbase; + index_type rank; + index_type dim; + index_type n; + index_type len; + index_type delta; + index_type mdelta; + int mask_kind; + + if (mask == NULL) + { +#ifdef HAVE_BACK_ARG + maxloc1_16_r17 (retarray, array, pdim, back); +#else + maxloc1_16_r17 (retarray, array, pdim); +#endif + return; + } + + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MAXLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len <= 0) + return; + + mbase = mask->base_addr; + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + mdelta = GFC_DESCRIPTOR_STRIDE_BYTES(mask,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n + 1); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str= GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype.rank = rank; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in MAXLOC intrinsic"); + + if (unlikely (compile_options.bounds_check)) + { + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "MAXLOC"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "MAXLOC"); + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + dest = retarray->base_addr; + base = array->base_addr; + + while (base) + { + const GFC_REAL_17 * restrict src; + const GFC_LOGICAL_1 * restrict msrc; + GFC_INTEGER_16 result; + src = base; + msrc = mbase; + { + + GFC_REAL_17 maxval; +#if defined (GFC_REAL_17_INFINITY) + maxval = -GFC_REAL_17_INFINITY; +#else + maxval = -GFC_REAL_17_HUGE; +#endif +#if defined (GFC_REAL_17_QUIET_NAN) + GFC_INTEGER_16 result2 = 0; +#endif + result = 0; + for (n = 0; n < len; n++, src += delta, msrc += mdelta) + { + + if (*msrc) + { +#if defined (GFC_REAL_17_QUIET_NAN) + if (!result2) + result2 = (GFC_INTEGER_16)n + 1; + if (*src >= maxval) +#endif + { + maxval = *src; + result = (GFC_INTEGER_16)n + 1; + break; + } + } + } +#if defined (GFC_REAL_17_QUIET_NAN) + if (unlikely (n >= len)) + result = result2; + else +#endif + if (back) + for (; n < len; n++, src += delta, msrc += mdelta) + { + if (*msrc && unlikely (*src >= maxval)) + { + maxval = *src; + result = (GFC_INTEGER_16)n + 1; + } + } + else + for (; n < len; n++, src += delta, msrc += mdelta) + { + if (*msrc && unlikely (*src > maxval)) + { + maxval = *src; + result = (GFC_INTEGER_16)n + 1; + } + } + *dest = result; + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + mbase += mstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void smaxloc1_16_r17 (gfc_array_i16 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + GFC_LOGICAL_4 *, GFC_LOGICAL_4 back); +export_proto(smaxloc1_16_r17); + +void +smaxloc1_16_r17 (gfc_array_i16 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + GFC_LOGICAL_4 * mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_16 * restrict dest; + index_type rank; + index_type n; + index_type dim; + + + if (mask == NULL || *mask) + { +#ifdef HAVE_BACK_ARG + maxloc1_16_r17 (retarray, array, pdim, back); +#else + maxloc1_16_r17 (retarray, array, pdim); +#endif + return; + } + /* Make dim zero based to avoid confusion. */ + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MAXLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + for (n = 0; n < dim; n++) + { + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] <= 0) + extent[n] = 0; + } + + for (n = dim; n < rank; n++) + { + extent[n] = + GFC_DESCRIPTOR_EXTENT(array,n + 1); + + if (extent[n] <= 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " MAXLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + { + for (n=0; n < rank; n++) + { + index_type ret_extent; + + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,n); + if (extent[n] != ret_extent) + runtime_error ("Incorrect extent in return value of" + " MAXLOC intrinsic in dimension %ld:" + " is %ld, should be %ld", (long int) n + 1, + (long int) ret_extent, (long int) extent[n]); + } + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + } + + dest = retarray->base_addr; + + while(1) + { + *dest = 0; + count[0]++; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + return; + else + { + count[n]++; + dest += dstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/maxloc1_4_r17.c b/libgfortran/generated/maxloc1_4_r17.c new file mode 100644 index 00000000000..de825c52e34 --- /dev/null +++ b/libgfortran/generated/maxloc1_4_r17.c @@ -0,0 +1,607 @@ +/* Implementation of the MAXLOC intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <assert.h> + + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_4) + +#define HAVE_BACK_ARG 1 + + +extern void maxloc1_4_r17 (gfc_array_i4 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, GFC_LOGICAL_4 back); +export_proto(maxloc1_4_r17); + +void +maxloc1_4_r17 (gfc_array_i4 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + const GFC_REAL_17 * restrict base; + GFC_INTEGER_4 * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type delta; + index_type dim; + int continue_loop; + + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MAXLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " MAXLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "MAXLOC"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + base = array->base_addr; + dest = retarray->base_addr; + + continue_loop = 1; + while (continue_loop) + { + const GFC_REAL_17 * restrict src; + GFC_INTEGER_4 result; + src = base; + { + + GFC_REAL_17 maxval; +#if defined (GFC_REAL_17_INFINITY) + maxval = -GFC_REAL_17_INFINITY; +#else + maxval = -GFC_REAL_17_HUGE; +#endif + result = 1; + if (len <= 0) + *dest = 0; + else + { +#if ! defined HAVE_BACK_ARG + for (n = 0; n < len; n++, src += delta) + { +#endif + +#if defined (GFC_REAL_17_QUIET_NAN) + for (n = 0; n < len; n++, src += delta) + { + if (*src >= maxval) + { + maxval = *src; + result = (GFC_INTEGER_4)n + 1; + break; + } + } +#else + n = 0; +#endif + for (; n < len; n++, src += delta) + { + if (back ? *src >= maxval : *src > maxval) + { + maxval = *src; + result = (GFC_INTEGER_4)n + 1; + } + } + + *dest = result; + } + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + continue_loop = 0; + break; + } + else + { + count[n]++; + base += sstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void mmaxloc1_4_r17 (gfc_array_i4 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + gfc_array_l1 * const restrict, GFC_LOGICAL_4 back); +export_proto(mmaxloc1_4_r17); + +void +mmaxloc1_4_r17 (gfc_array_i4 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + gfc_array_l1 * const restrict mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_4 * restrict dest; + const GFC_REAL_17 * restrict base; + const GFC_LOGICAL_1 * restrict mbase; + index_type rank; + index_type dim; + index_type n; + index_type len; + index_type delta; + index_type mdelta; + int mask_kind; + + if (mask == NULL) + { +#ifdef HAVE_BACK_ARG + maxloc1_4_r17 (retarray, array, pdim, back); +#else + maxloc1_4_r17 (retarray, array, pdim); +#endif + return; + } + + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MAXLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len <= 0) + return; + + mbase = mask->base_addr; + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + mdelta = GFC_DESCRIPTOR_STRIDE_BYTES(mask,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n + 1); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str= GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype.rank = rank; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in MAXLOC intrinsic"); + + if (unlikely (compile_options.bounds_check)) + { + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "MAXLOC"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "MAXLOC"); + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + dest = retarray->base_addr; + base = array->base_addr; + + while (base) + { + const GFC_REAL_17 * restrict src; + const GFC_LOGICAL_1 * restrict msrc; + GFC_INTEGER_4 result; + src = base; + msrc = mbase; + { + + GFC_REAL_17 maxval; +#if defined (GFC_REAL_17_INFINITY) + maxval = -GFC_REAL_17_INFINITY; +#else + maxval = -GFC_REAL_17_HUGE; +#endif +#if defined (GFC_REAL_17_QUIET_NAN) + GFC_INTEGER_4 result2 = 0; +#endif + result = 0; + for (n = 0; n < len; n++, src += delta, msrc += mdelta) + { + + if (*msrc) + { +#if defined (GFC_REAL_17_QUIET_NAN) + if (!result2) + result2 = (GFC_INTEGER_4)n + 1; + if (*src >= maxval) +#endif + { + maxval = *src; + result = (GFC_INTEGER_4)n + 1; + break; + } + } + } +#if defined (GFC_REAL_17_QUIET_NAN) + if (unlikely (n >= len)) + result = result2; + else +#endif + if (back) + for (; n < len; n++, src += delta, msrc += mdelta) + { + if (*msrc && unlikely (*src >= maxval)) + { + maxval = *src; + result = (GFC_INTEGER_4)n + 1; + } + } + else + for (; n < len; n++, src += delta, msrc += mdelta) + { + if (*msrc && unlikely (*src > maxval)) + { + maxval = *src; + result = (GFC_INTEGER_4)n + 1; + } + } + *dest = result; + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + mbase += mstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void smaxloc1_4_r17 (gfc_array_i4 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + GFC_LOGICAL_4 *, GFC_LOGICAL_4 back); +export_proto(smaxloc1_4_r17); + +void +smaxloc1_4_r17 (gfc_array_i4 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + GFC_LOGICAL_4 * mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_4 * restrict dest; + index_type rank; + index_type n; + index_type dim; + + + if (mask == NULL || *mask) + { +#ifdef HAVE_BACK_ARG + maxloc1_4_r17 (retarray, array, pdim, back); +#else + maxloc1_4_r17 (retarray, array, pdim); +#endif + return; + } + /* Make dim zero based to avoid confusion. */ + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MAXLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + for (n = 0; n < dim; n++) + { + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] <= 0) + extent[n] = 0; + } + + for (n = dim; n < rank; n++) + { + extent[n] = + GFC_DESCRIPTOR_EXTENT(array,n + 1); + + if (extent[n] <= 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " MAXLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + { + for (n=0; n < rank; n++) + { + index_type ret_extent; + + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,n); + if (extent[n] != ret_extent) + runtime_error ("Incorrect extent in return value of" + " MAXLOC intrinsic in dimension %ld:" + " is %ld, should be %ld", (long int) n + 1, + (long int) ret_extent, (long int) extent[n]); + } + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + } + + dest = retarray->base_addr; + + while(1) + { + *dest = 0; + count[0]++; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + return; + else + { + count[n]++; + dest += dstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/maxloc1_8_r17.c b/libgfortran/generated/maxloc1_8_r17.c new file mode 100644 index 00000000000..666e3a61bc4 --- /dev/null +++ b/libgfortran/generated/maxloc1_8_r17.c @@ -0,0 +1,607 @@ +/* Implementation of the MAXLOC intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <assert.h> + + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_8) + +#define HAVE_BACK_ARG 1 + + +extern void maxloc1_8_r17 (gfc_array_i8 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, GFC_LOGICAL_4 back); +export_proto(maxloc1_8_r17); + +void +maxloc1_8_r17 (gfc_array_i8 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + const GFC_REAL_17 * restrict base; + GFC_INTEGER_8 * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type delta; + index_type dim; + int continue_loop; + + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MAXLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " MAXLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "MAXLOC"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + base = array->base_addr; + dest = retarray->base_addr; + + continue_loop = 1; + while (continue_loop) + { + const GFC_REAL_17 * restrict src; + GFC_INTEGER_8 result; + src = base; + { + + GFC_REAL_17 maxval; +#if defined (GFC_REAL_17_INFINITY) + maxval = -GFC_REAL_17_INFINITY; +#else + maxval = -GFC_REAL_17_HUGE; +#endif + result = 1; + if (len <= 0) + *dest = 0; + else + { +#if ! defined HAVE_BACK_ARG + for (n = 0; n < len; n++, src += delta) + { +#endif + +#if defined (GFC_REAL_17_QUIET_NAN) + for (n = 0; n < len; n++, src += delta) + { + if (*src >= maxval) + { + maxval = *src; + result = (GFC_INTEGER_8)n + 1; + break; + } + } +#else + n = 0; +#endif + for (; n < len; n++, src += delta) + { + if (back ? *src >= maxval : *src > maxval) + { + maxval = *src; + result = (GFC_INTEGER_8)n + 1; + } + } + + *dest = result; + } + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + continue_loop = 0; + break; + } + else + { + count[n]++; + base += sstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void mmaxloc1_8_r17 (gfc_array_i8 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + gfc_array_l1 * const restrict, GFC_LOGICAL_4 back); +export_proto(mmaxloc1_8_r17); + +void +mmaxloc1_8_r17 (gfc_array_i8 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + gfc_array_l1 * const restrict mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_8 * restrict dest; + const GFC_REAL_17 * restrict base; + const GFC_LOGICAL_1 * restrict mbase; + index_type rank; + index_type dim; + index_type n; + index_type len; + index_type delta; + index_type mdelta; + int mask_kind; + + if (mask == NULL) + { +#ifdef HAVE_BACK_ARG + maxloc1_8_r17 (retarray, array, pdim, back); +#else + maxloc1_8_r17 (retarray, array, pdim); +#endif + return; + } + + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MAXLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len <= 0) + return; + + mbase = mask->base_addr; + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + mdelta = GFC_DESCRIPTOR_STRIDE_BYTES(mask,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n + 1); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str= GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype.rank = rank; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in MAXLOC intrinsic"); + + if (unlikely (compile_options.bounds_check)) + { + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "MAXLOC"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "MAXLOC"); + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + dest = retarray->base_addr; + base = array->base_addr; + + while (base) + { + const GFC_REAL_17 * restrict src; + const GFC_LOGICAL_1 * restrict msrc; + GFC_INTEGER_8 result; + src = base; + msrc = mbase; + { + + GFC_REAL_17 maxval; +#if defined (GFC_REAL_17_INFINITY) + maxval = -GFC_REAL_17_INFINITY; +#else + maxval = -GFC_REAL_17_HUGE; +#endif +#if defined (GFC_REAL_17_QUIET_NAN) + GFC_INTEGER_8 result2 = 0; +#endif + result = 0; + for (n = 0; n < len; n++, src += delta, msrc += mdelta) + { + + if (*msrc) + { +#if defined (GFC_REAL_17_QUIET_NAN) + if (!result2) + result2 = (GFC_INTEGER_8)n + 1; + if (*src >= maxval) +#endif + { + maxval = *src; + result = (GFC_INTEGER_8)n + 1; + break; + } + } + } +#if defined (GFC_REAL_17_QUIET_NAN) + if (unlikely (n >= len)) + result = result2; + else +#endif + if (back) + for (; n < len; n++, src += delta, msrc += mdelta) + { + if (*msrc && unlikely (*src >= maxval)) + { + maxval = *src; + result = (GFC_INTEGER_8)n + 1; + } + } + else + for (; n < len; n++, src += delta, msrc += mdelta) + { + if (*msrc && unlikely (*src > maxval)) + { + maxval = *src; + result = (GFC_INTEGER_8)n + 1; + } + } + *dest = result; + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + mbase += mstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void smaxloc1_8_r17 (gfc_array_i8 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + GFC_LOGICAL_4 *, GFC_LOGICAL_4 back); +export_proto(smaxloc1_8_r17); + +void +smaxloc1_8_r17 (gfc_array_i8 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + GFC_LOGICAL_4 * mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_8 * restrict dest; + index_type rank; + index_type n; + index_type dim; + + + if (mask == NULL || *mask) + { +#ifdef HAVE_BACK_ARG + maxloc1_8_r17 (retarray, array, pdim, back); +#else + maxloc1_8_r17 (retarray, array, pdim); +#endif + return; + } + /* Make dim zero based to avoid confusion. */ + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MAXLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + for (n = 0; n < dim; n++) + { + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] <= 0) + extent[n] = 0; + } + + for (n = dim; n < rank; n++) + { + extent[n] = + GFC_DESCRIPTOR_EXTENT(array,n + 1); + + if (extent[n] <= 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " MAXLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + { + for (n=0; n < rank; n++) + { + index_type ret_extent; + + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,n); + if (extent[n] != ret_extent) + runtime_error ("Incorrect extent in return value of" + " MAXLOC intrinsic in dimension %ld:" + " is %ld, should be %ld", (long int) n + 1, + (long int) ret_extent, (long int) extent[n]); + } + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + } + + dest = retarray->base_addr; + + while(1) + { + *dest = 0; + count[0]++; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + return; + else + { + count[n]++; + dest += dstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/maxval_r17.c b/libgfortran/generated/maxval_r17.c new file mode 100644 index 00000000000..aafdfae8b61 --- /dev/null +++ b/libgfortran/generated/maxval_r17.c @@ -0,0 +1,578 @@ +/* Implementation of the MAXVAL intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_REAL_17) + + +extern void maxval_r17 (gfc_array_r17 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict); +export_proto(maxval_r17); + +void +maxval_r17 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + const GFC_REAL_17 * restrict base; + GFC_REAL_17 * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type delta; + index_type dim; + int continue_loop; + + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MAXVAL intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_17)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " MAXVAL intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "MAXVAL"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + base = array->base_addr; + dest = retarray->base_addr; + + continue_loop = 1; + while (continue_loop) + { + const GFC_REAL_17 * restrict src; + GFC_REAL_17 result; + src = base; + { + +#if defined (GFC_REAL_17_INFINITY) + result = -GFC_REAL_17_INFINITY; +#else + result = -GFC_REAL_17_HUGE; +#endif + if (len <= 0) + *dest = -GFC_REAL_17_HUGE; + else + { +#if ! defined HAVE_BACK_ARG + for (n = 0; n < len; n++, src += delta) + { +#endif + +#if defined (GFC_REAL_17_QUIET_NAN) + if (*src >= result) + break; + } + if (unlikely (n >= len)) + result = GFC_REAL_17_QUIET_NAN; + else for (; n < len; n++, src += delta) + { +#endif + if (*src > result) + result = *src; + } + + *dest = result; + } + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + continue_loop = 0; + break; + } + else + { + count[n]++; + base += sstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void mmaxval_r17 (gfc_array_r17 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + gfc_array_l1 * const restrict); +export_proto(mmaxval_r17); + +void +mmaxval_r17 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + gfc_array_l1 * const restrict mask) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + GFC_REAL_17 * restrict dest; + const GFC_REAL_17 * restrict base; + const GFC_LOGICAL_1 * restrict mbase; + index_type rank; + index_type dim; + index_type n; + index_type len; + index_type delta; + index_type mdelta; + int mask_kind; + + if (mask == NULL) + { +#ifdef HAVE_BACK_ARG + maxval_r17 (retarray, array, pdim, back); +#else + maxval_r17 (retarray, array, pdim); +#endif + return; + } + + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MAXVAL intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len <= 0) + return; + + mbase = mask->base_addr; + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + mdelta = GFC_DESCRIPTOR_STRIDE_BYTES(mask,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n + 1); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str= GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype.rank = rank; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_17)); + + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in MAXVAL intrinsic"); + + if (unlikely (compile_options.bounds_check)) + { + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "MAXVAL"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "MAXVAL"); + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + dest = retarray->base_addr; + base = array->base_addr; + + while (base) + { + const GFC_REAL_17 * restrict src; + const GFC_LOGICAL_1 * restrict msrc; + GFC_REAL_17 result; + src = base; + msrc = mbase; + { + +#if defined (GFC_REAL_17_INFINITY) + result = -GFC_REAL_17_INFINITY; +#else + result = -GFC_REAL_17_HUGE; +#endif +#if defined (GFC_REAL_17_QUIET_NAN) + int non_empty_p = 0; +#endif + for (n = 0; n < len; n++, src += delta, msrc += mdelta) + { + +#if defined (GFC_REAL_17_INFINITY) || defined (GFC_REAL_17_QUIET_NAN) + if (*msrc) + { +#if defined (GFC_REAL_17_QUIET_NAN) + non_empty_p = 1; + if (*src >= result) +#endif + break; + } + } + if (unlikely (n >= len)) + { +#if defined (GFC_REAL_17_QUIET_NAN) + result = non_empty_p ? GFC_REAL_17_QUIET_NAN : -GFC_REAL_17_HUGE; +#else + result = -GFC_REAL_17_HUGE; +#endif + } + else for (; n < len; n++, src += delta, msrc += mdelta) + { +#endif + if (*msrc && *src > result) + result = *src; + } + *dest = result; + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + mbase += mstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void smaxval_r17 (gfc_array_r17 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + GFC_LOGICAL_4 *); +export_proto(smaxval_r17); + +void +smaxval_r17 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + GFC_LOGICAL_4 * mask) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + GFC_REAL_17 * restrict dest; + index_type rank; + index_type n; + index_type dim; + + + if (mask == NULL || *mask) + { +#ifdef HAVE_BACK_ARG + maxval_r17 (retarray, array, pdim, back); +#else + maxval_r17 (retarray, array, pdim); +#endif + return; + } + /* Make dim zero based to avoid confusion. */ + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MAXVAL intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + for (n = 0; n < dim; n++) + { + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] <= 0) + extent[n] = 0; + } + + for (n = dim; n < rank; n++) + { + extent[n] = + GFC_DESCRIPTOR_EXTENT(array,n + 1); + + if (extent[n] <= 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_17)); + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " MAXVAL intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + { + for (n=0; n < rank; n++) + { + index_type ret_extent; + + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,n); + if (extent[n] != ret_extent) + runtime_error ("Incorrect extent in return value of" + " MAXVAL intrinsic in dimension %ld:" + " is %ld, should be %ld", (long int) n + 1, + (long int) ret_extent, (long int) extent[n]); + } + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + } + + dest = retarray->base_addr; + + while(1) + { + *dest = -GFC_REAL_17_HUGE; + count[0]++; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + return; + else + { + count[n]++; + dest += dstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/minloc0_16_r17.c b/libgfortran/generated/minloc0_16_r17.c new file mode 100644 index 00000000000..e6a77c4f4b6 --- /dev/null +++ b/libgfortran/generated/minloc0_16_r17.c @@ -0,0 +1,407 @@ +/* Implementation of the MINLOC intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <assert.h> + + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_16) + + +extern void minloc0_16_r17 (gfc_array_i16 * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_LOGICAL_4); +export_proto(minloc0_16_r17); + +void +minloc0_16_r17 (gfc_array_i16 * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride; + const GFC_REAL_17 *base; + GFC_INTEGER_16 * restrict dest; + index_type rank; + index_type n; + + rank = GFC_DESCRIPTOR_RANK (array); + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { + if (unlikely (compile_options.bounds_check)) + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MINLOC"); + } + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + count[n] = 0; + if (extent[n] <= 0) + { + /* Set the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + return; + } + } + + base = array->base_addr; + + /* Initialize the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 1; + { + + GFC_REAL_17 minval; +#if defined(GFC_REAL_17_QUIET_NAN) + int fast = 0; +#endif + +#if defined(GFC_REAL_17_INFINITY) + minval = GFC_REAL_17_INFINITY; +#else + minval = GFC_REAL_17_HUGE; +#endif + while (base) + { + /* Implementation start. */ + +#if defined(GFC_REAL_17_QUIET_NAN) + if (unlikely (!fast)) + { + do + { + if (*base <= minval) + { + fast = 1; + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + break; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + if (likely (fast)) + continue; + } + else +#endif + if (back) + do + { + if (unlikely (*base <= minval)) + { + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + else + do + { + if (unlikely (*base < minval)) + { + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + /* Implementation end. */ + /* Advance to the next element. */ + base += sstride[0]; + } + while (++count[0] != extent[0]); + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + } + } + while (count[n] == extent[n]); + } + } +} + +extern void mminloc0_16_r17 (gfc_array_i16 * const restrict, + gfc_array_r17 * const restrict, gfc_array_l1 * const restrict, + GFC_LOGICAL_4); +export_proto(mminloc0_16_r17); + +void +mminloc0_16_r17 (gfc_array_i16 * const restrict retarray, + gfc_array_r17 * const restrict array, + gfc_array_l1 * const restrict mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + index_type dstride; + GFC_INTEGER_16 *dest; + const GFC_REAL_17 *base; + GFC_LOGICAL_1 *mbase; + int rank; + index_type n; + int mask_kind; + + + if (mask == NULL) + { + minloc0_16_r17 (retarray, array, back); + return; + } + + rank = GFC_DESCRIPTOR_RANK (array); + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { + if (unlikely (compile_options.bounds_check)) + { + + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MINLOC"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "MINLOC"); + } + } + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + mbase = mask->base_addr; + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + count[n] = 0; + if (extent[n] <= 0) + { + /* Set the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + return; + } + } + + base = array->base_addr; + + /* Initialize the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + { + + GFC_REAL_17 minval; + int fast = 0; + +#if defined(GFC_REAL_17_INFINITY) + minval = GFC_REAL_17_INFINITY; +#else + minval = GFC_REAL_17_HUGE; +#endif + while (base) + { + /* Implementation start. */ + + if (unlikely (!fast)) + { + do + { + if (*mbase) + { +#if defined(GFC_REAL_17_QUIET_NAN) + if (unlikely (dest[0] == 0)) + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + if (*base <= minval) +#endif + { + fast = 1; + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + break; + } + } + base += sstride[0]; + mbase += mstride[0]; + } + while (++count[0] != extent[0]); + if (likely (fast)) + continue; + } + else + if (back) + do + { + if (unlikely (*mbase && (*base <= minval))) + { + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + else + do + { + if (unlikely (*mbase && (*base < minval))) + { + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + /* Implementation end. */ + /* Advance to the next element. */ + base += sstride[0]; + mbase += mstride[0]; + } + while (++count[0] != extent[0]); + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + } + } + while (count[n] == extent[n]); + } + } +} + +extern void sminloc0_16_r17 (gfc_array_i16 * const restrict, + gfc_array_r17 * const restrict, GFC_LOGICAL_4 *, GFC_LOGICAL_4); +export_proto(sminloc0_16_r17); + +void +sminloc0_16_r17 (gfc_array_i16 * const restrict retarray, + gfc_array_r17 * const restrict array, + GFC_LOGICAL_4 * mask, GFC_LOGICAL_4 back) +{ + index_type rank; + index_type dstride; + index_type n; + GFC_INTEGER_16 *dest; + + if (mask == NULL || *mask) + { + minloc0_16_r17 (retarray, array, back); + return; + } + + rank = GFC_DESCRIPTOR_RANK (array); + + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else if (unlikely (compile_options.bounds_check)) + { + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MINLOC"); + } + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n<rank; n++) + dest[n * dstride] = 0 ; +} +#endif diff --git a/libgfortran/generated/minloc0_4_r17.c b/libgfortran/generated/minloc0_4_r17.c new file mode 100644 index 00000000000..7718d5c07e5 --- /dev/null +++ b/libgfortran/generated/minloc0_4_r17.c @@ -0,0 +1,407 @@ +/* Implementation of the MINLOC intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <assert.h> + + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_4) + + +extern void minloc0_4_r17 (gfc_array_i4 * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_LOGICAL_4); +export_proto(minloc0_4_r17); + +void +minloc0_4_r17 (gfc_array_i4 * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride; + const GFC_REAL_17 *base; + GFC_INTEGER_4 * restrict dest; + index_type rank; + index_type n; + + rank = GFC_DESCRIPTOR_RANK (array); + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { + if (unlikely (compile_options.bounds_check)) + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MINLOC"); + } + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + count[n] = 0; + if (extent[n] <= 0) + { + /* Set the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + return; + } + } + + base = array->base_addr; + + /* Initialize the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 1; + { + + GFC_REAL_17 minval; +#if defined(GFC_REAL_17_QUIET_NAN) + int fast = 0; +#endif + +#if defined(GFC_REAL_17_INFINITY) + minval = GFC_REAL_17_INFINITY; +#else + minval = GFC_REAL_17_HUGE; +#endif + while (base) + { + /* Implementation start. */ + +#if defined(GFC_REAL_17_QUIET_NAN) + if (unlikely (!fast)) + { + do + { + if (*base <= minval) + { + fast = 1; + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + break; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + if (likely (fast)) + continue; + } + else +#endif + if (back) + do + { + if (unlikely (*base <= minval)) + { + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + else + do + { + if (unlikely (*base < minval)) + { + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + /* Implementation end. */ + /* Advance to the next element. */ + base += sstride[0]; + } + while (++count[0] != extent[0]); + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + } + } + while (count[n] == extent[n]); + } + } +} + +extern void mminloc0_4_r17 (gfc_array_i4 * const restrict, + gfc_array_r17 * const restrict, gfc_array_l1 * const restrict, + GFC_LOGICAL_4); +export_proto(mminloc0_4_r17); + +void +mminloc0_4_r17 (gfc_array_i4 * const restrict retarray, + gfc_array_r17 * const restrict array, + gfc_array_l1 * const restrict mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + index_type dstride; + GFC_INTEGER_4 *dest; + const GFC_REAL_17 *base; + GFC_LOGICAL_1 *mbase; + int rank; + index_type n; + int mask_kind; + + + if (mask == NULL) + { + minloc0_4_r17 (retarray, array, back); + return; + } + + rank = GFC_DESCRIPTOR_RANK (array); + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { + if (unlikely (compile_options.bounds_check)) + { + + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MINLOC"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "MINLOC"); + } + } + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + mbase = mask->base_addr; + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + count[n] = 0; + if (extent[n] <= 0) + { + /* Set the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + return; + } + } + + base = array->base_addr; + + /* Initialize the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + { + + GFC_REAL_17 minval; + int fast = 0; + +#if defined(GFC_REAL_17_INFINITY) + minval = GFC_REAL_17_INFINITY; +#else + minval = GFC_REAL_17_HUGE; +#endif + while (base) + { + /* Implementation start. */ + + if (unlikely (!fast)) + { + do + { + if (*mbase) + { +#if defined(GFC_REAL_17_QUIET_NAN) + if (unlikely (dest[0] == 0)) + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + if (*base <= minval) +#endif + { + fast = 1; + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + break; + } + } + base += sstride[0]; + mbase += mstride[0]; + } + while (++count[0] != extent[0]); + if (likely (fast)) + continue; + } + else + if (back) + do + { + if (unlikely (*mbase && (*base <= minval))) + { + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + else + do + { + if (unlikely (*mbase && (*base < minval))) + { + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + /* Implementation end. */ + /* Advance to the next element. */ + base += sstride[0]; + mbase += mstride[0]; + } + while (++count[0] != extent[0]); + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + } + } + while (count[n] == extent[n]); + } + } +} + +extern void sminloc0_4_r17 (gfc_array_i4 * const restrict, + gfc_array_r17 * const restrict, GFC_LOGICAL_4 *, GFC_LOGICAL_4); +export_proto(sminloc0_4_r17); + +void +sminloc0_4_r17 (gfc_array_i4 * const restrict retarray, + gfc_array_r17 * const restrict array, + GFC_LOGICAL_4 * mask, GFC_LOGICAL_4 back) +{ + index_type rank; + index_type dstride; + index_type n; + GFC_INTEGER_4 *dest; + + if (mask == NULL || *mask) + { + minloc0_4_r17 (retarray, array, back); + return; + } + + rank = GFC_DESCRIPTOR_RANK (array); + + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else if (unlikely (compile_options.bounds_check)) + { + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MINLOC"); + } + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n<rank; n++) + dest[n * dstride] = 0 ; +} +#endif diff --git a/libgfortran/generated/minloc0_8_r17.c b/libgfortran/generated/minloc0_8_r17.c new file mode 100644 index 00000000000..8c8f8bd5dbc --- /dev/null +++ b/libgfortran/generated/minloc0_8_r17.c @@ -0,0 +1,407 @@ +/* Implementation of the MINLOC intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <assert.h> + + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_8) + + +extern void minloc0_8_r17 (gfc_array_i8 * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_LOGICAL_4); +export_proto(minloc0_8_r17); + +void +minloc0_8_r17 (gfc_array_i8 * const restrict retarray, + gfc_array_r17 * const restrict array, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride; + const GFC_REAL_17 *base; + GFC_INTEGER_8 * restrict dest; + index_type rank; + index_type n; + + rank = GFC_DESCRIPTOR_RANK (array); + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { + if (unlikely (compile_options.bounds_check)) + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MINLOC"); + } + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + count[n] = 0; + if (extent[n] <= 0) + { + /* Set the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + return; + } + } + + base = array->base_addr; + + /* Initialize the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 1; + { + + GFC_REAL_17 minval; +#if defined(GFC_REAL_17_QUIET_NAN) + int fast = 0; +#endif + +#if defined(GFC_REAL_17_INFINITY) + minval = GFC_REAL_17_INFINITY; +#else + minval = GFC_REAL_17_HUGE; +#endif + while (base) + { + /* Implementation start. */ + +#if defined(GFC_REAL_17_QUIET_NAN) + if (unlikely (!fast)) + { + do + { + if (*base <= minval) + { + fast = 1; + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + break; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + if (likely (fast)) + continue; + } + else +#endif + if (back) + do + { + if (unlikely (*base <= minval)) + { + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + else + do + { + if (unlikely (*base < minval)) + { + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + /* Implementation end. */ + /* Advance to the next element. */ + base += sstride[0]; + } + while (++count[0] != extent[0]); + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + } + } + while (count[n] == extent[n]); + } + } +} + +extern void mminloc0_8_r17 (gfc_array_i8 * const restrict, + gfc_array_r17 * const restrict, gfc_array_l1 * const restrict, + GFC_LOGICAL_4); +export_proto(mminloc0_8_r17); + +void +mminloc0_8_r17 (gfc_array_i8 * const restrict retarray, + gfc_array_r17 * const restrict array, + gfc_array_l1 * const restrict mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + index_type dstride; + GFC_INTEGER_8 *dest; + const GFC_REAL_17 *base; + GFC_LOGICAL_1 *mbase; + int rank; + index_type n; + int mask_kind; + + + if (mask == NULL) + { + minloc0_8_r17 (retarray, array, back); + return; + } + + rank = GFC_DESCRIPTOR_RANK (array); + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { + if (unlikely (compile_options.bounds_check)) + { + + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MINLOC"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "MINLOC"); + } + } + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + mbase = mask->base_addr; + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + count[n] = 0; + if (extent[n] <= 0) + { + /* Set the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + return; + } + } + + base = array->base_addr; + + /* Initialize the return value. */ + for (n = 0; n < rank; n++) + dest[n * dstride] = 0; + { + + GFC_REAL_17 minval; + int fast = 0; + +#if defined(GFC_REAL_17_INFINITY) + minval = GFC_REAL_17_INFINITY; +#else + minval = GFC_REAL_17_HUGE; +#endif + while (base) + { + /* Implementation start. */ + + if (unlikely (!fast)) + { + do + { + if (*mbase) + { +#if defined(GFC_REAL_17_QUIET_NAN) + if (unlikely (dest[0] == 0)) + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + if (*base <= minval) +#endif + { + fast = 1; + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + break; + } + } + base += sstride[0]; + mbase += mstride[0]; + } + while (++count[0] != extent[0]); + if (likely (fast)) + continue; + } + else + if (back) + do + { + if (unlikely (*mbase && (*base <= minval))) + { + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + base += sstride[0]; + } + while (++count[0] != extent[0]); + else + do + { + if (unlikely (*mbase && (*base < minval))) + { + minval = *base; + for (n = 0; n < rank; n++) + dest[n * dstride] = count[n] + 1; + } + /* Implementation end. */ + /* Advance to the next element. */ + base += sstride[0]; + mbase += mstride[0]; + } + while (++count[0] != extent[0]); + n = 0; + do + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + } + } + while (count[n] == extent[n]); + } + } +} + +extern void sminloc0_8_r17 (gfc_array_i8 * const restrict, + gfc_array_r17 * const restrict, GFC_LOGICAL_4 *, GFC_LOGICAL_4); +export_proto(sminloc0_8_r17); + +void +sminloc0_8_r17 (gfc_array_i8 * const restrict retarray, + gfc_array_r17 * const restrict array, + GFC_LOGICAL_4 * mask, GFC_LOGICAL_4 back) +{ + index_type rank; + index_type dstride; + index_type n; + GFC_INTEGER_8 *dest; + + if (mask == NULL || *mask) + { + minloc0_8_r17 (retarray, array, back); + return; + } + + rank = GFC_DESCRIPTOR_RANK (array); + + if (rank <= 0) + runtime_error ("Rank of array needs to be > 0"); + + if (retarray->base_addr == NULL) + { + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype.rank = 1; + retarray->offset = 0; + retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else if (unlikely (compile_options.bounds_check)) + { + bounds_iforeach_return ((array_t *) retarray, (array_t *) array, + "MINLOC"); + } + + dstride = GFC_DESCRIPTOR_STRIDE(retarray,0); + dest = retarray->base_addr; + for (n = 0; n<rank; n++) + dest[n * dstride] = 0 ; +} +#endif diff --git a/libgfortran/generated/minloc1_16_r17.c b/libgfortran/generated/minloc1_16_r17.c new file mode 100644 index 00000000000..95f17e04bd4 --- /dev/null +++ b/libgfortran/generated/minloc1_16_r17.c @@ -0,0 +1,617 @@ +/* Implementation of the MINLOC intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <assert.h> + + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_16) + +#define HAVE_BACK_ARG 1 + + +extern void minloc1_16_r17 (gfc_array_i16 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, GFC_LOGICAL_4 back); +export_proto(minloc1_16_r17); + +void +minloc1_16_r17 (gfc_array_i16 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + const GFC_REAL_17 * restrict base; + GFC_INTEGER_16 * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type delta; + index_type dim; + int continue_loop; + + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MINLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " MINLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "MINLOC"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + base = array->base_addr; + dest = retarray->base_addr; + + continue_loop = 1; + while (continue_loop) + { + const GFC_REAL_17 * restrict src; + GFC_INTEGER_16 result; + src = base; + { + + GFC_REAL_17 minval; +#if defined (GFC_REAL_17_INFINITY) + minval = GFC_REAL_17_INFINITY; +#else + minval = GFC_REAL_17_HUGE; +#endif + result = 1; + if (len <= 0) + *dest = 0; + else + { +#if ! defined HAVE_BACK_ARG + for (n = 0; n < len; n++, src += delta) + { +#endif + +#if defined (GFC_REAL_17_QUIET_NAN) + for (n = 0; n < len; n++, src += delta) + { + if (*src <= minval) + { + minval = *src; + result = (GFC_INTEGER_16)n + 1; + break; + } + } +#else + n = 0; +#endif + if (back) + for (; n < len; n++, src += delta) + { + if (unlikely (*src <= minval)) + { + minval = *src; + result = (GFC_INTEGER_16)n + 1; + } + } + else + for (; n < len; n++, src += delta) + { + if (unlikely (*src < minval)) + { + minval = *src; + result = (GFC_INTEGER_16) n + 1; + } + } + + *dest = result; + } + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + continue_loop = 0; + break; + } + else + { + count[n]++; + base += sstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void mminloc1_16_r17 (gfc_array_i16 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + gfc_array_l1 * const restrict, GFC_LOGICAL_4 back); +export_proto(mminloc1_16_r17); + +void +mminloc1_16_r17 (gfc_array_i16 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + gfc_array_l1 * const restrict mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_16 * restrict dest; + const GFC_REAL_17 * restrict base; + const GFC_LOGICAL_1 * restrict mbase; + index_type rank; + index_type dim; + index_type n; + index_type len; + index_type delta; + index_type mdelta; + int mask_kind; + + if (mask == NULL) + { +#ifdef HAVE_BACK_ARG + minloc1_16_r17 (retarray, array, pdim, back); +#else + minloc1_16_r17 (retarray, array, pdim); +#endif + return; + } + + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MINLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len <= 0) + return; + + mbase = mask->base_addr; + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + mdelta = GFC_DESCRIPTOR_STRIDE_BYTES(mask,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n + 1); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str= GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype.rank = rank; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in MINLOC intrinsic"); + + if (unlikely (compile_options.bounds_check)) + { + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "MINLOC"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "MINLOC"); + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + dest = retarray->base_addr; + base = array->base_addr; + + while (base) + { + const GFC_REAL_17 * restrict src; + const GFC_LOGICAL_1 * restrict msrc; + GFC_INTEGER_16 result; + src = base; + msrc = mbase; + { + + GFC_REAL_17 minval; +#if defined (GFC_REAL_17_INFINITY) + minval = GFC_REAL_17_INFINITY; +#else + minval = GFC_REAL_17_HUGE; +#endif +#if defined (GFC_REAL_17_QUIET_NAN) + GFC_INTEGER_16 result2 = 0; +#endif + result = 0; + for (n = 0; n < len; n++, src += delta, msrc += mdelta) + { + + if (*msrc) + { +#if defined (GFC_REAL_17_QUIET_NAN) + if (!result2) + result2 = (GFC_INTEGER_16)n + 1; + if (*src <= minval) +#endif + { + minval = *src; + result = (GFC_INTEGER_16)n + 1; + break; + } + } + } +#if defined (GFC_REAL_17_QUIET_NAN) + if (unlikely (n >= len)) + result = result2; + else +#endif + if (back) + for (; n < len; n++, src += delta, msrc += mdelta) + { + if (*msrc && unlikely (*src <= minval)) + { + minval = *src; + result = (GFC_INTEGER_16)n + 1; + } + } + else + for (; n < len; n++, src += delta, msrc += mdelta) + { + if (*msrc && unlikely (*src < minval)) + { + minval = *src; + result = (GFC_INTEGER_16) n + 1; + } + } + *dest = result; + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + mbase += mstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void sminloc1_16_r17 (gfc_array_i16 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + GFC_LOGICAL_4 *, GFC_LOGICAL_4 back); +export_proto(sminloc1_16_r17); + +void +sminloc1_16_r17 (gfc_array_i16 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + GFC_LOGICAL_4 * mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_16 * restrict dest; + index_type rank; + index_type n; + index_type dim; + + + if (mask == NULL || *mask) + { +#ifdef HAVE_BACK_ARG + minloc1_16_r17 (retarray, array, pdim, back); +#else + minloc1_16_r17 (retarray, array, pdim); +#endif + return; + } + /* Make dim zero based to avoid confusion. */ + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MINLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + for (n = 0; n < dim; n++) + { + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] <= 0) + extent[n] = 0; + } + + for (n = dim; n < rank; n++) + { + extent[n] = + GFC_DESCRIPTOR_EXTENT(array,n + 1); + + if (extent[n] <= 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " MINLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + { + for (n=0; n < rank; n++) + { + index_type ret_extent; + + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,n); + if (extent[n] != ret_extent) + runtime_error ("Incorrect extent in return value of" + " MINLOC intrinsic in dimension %ld:" + " is %ld, should be %ld", (long int) n + 1, + (long int) ret_extent, (long int) extent[n]); + } + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + } + + dest = retarray->base_addr; + + while(1) + { + *dest = 0; + count[0]++; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + return; + else + { + count[n]++; + dest += dstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/minloc1_4_r17.c b/libgfortran/generated/minloc1_4_r17.c new file mode 100644 index 00000000000..4deb31798f7 --- /dev/null +++ b/libgfortran/generated/minloc1_4_r17.c @@ -0,0 +1,617 @@ +/* Implementation of the MINLOC intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <assert.h> + + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_4) + +#define HAVE_BACK_ARG 1 + + +extern void minloc1_4_r17 (gfc_array_i4 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, GFC_LOGICAL_4 back); +export_proto(minloc1_4_r17); + +void +minloc1_4_r17 (gfc_array_i4 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + const GFC_REAL_17 * restrict base; + GFC_INTEGER_4 * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type delta; + index_type dim; + int continue_loop; + + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MINLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " MINLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "MINLOC"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + base = array->base_addr; + dest = retarray->base_addr; + + continue_loop = 1; + while (continue_loop) + { + const GFC_REAL_17 * restrict src; + GFC_INTEGER_4 result; + src = base; + { + + GFC_REAL_17 minval; +#if defined (GFC_REAL_17_INFINITY) + minval = GFC_REAL_17_INFINITY; +#else + minval = GFC_REAL_17_HUGE; +#endif + result = 1; + if (len <= 0) + *dest = 0; + else + { +#if ! defined HAVE_BACK_ARG + for (n = 0; n < len; n++, src += delta) + { +#endif + +#if defined (GFC_REAL_17_QUIET_NAN) + for (n = 0; n < len; n++, src += delta) + { + if (*src <= minval) + { + minval = *src; + result = (GFC_INTEGER_4)n + 1; + break; + } + } +#else + n = 0; +#endif + if (back) + for (; n < len; n++, src += delta) + { + if (unlikely (*src <= minval)) + { + minval = *src; + result = (GFC_INTEGER_4)n + 1; + } + } + else + for (; n < len; n++, src += delta) + { + if (unlikely (*src < minval)) + { + minval = *src; + result = (GFC_INTEGER_4) n + 1; + } + } + + *dest = result; + } + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + continue_loop = 0; + break; + } + else + { + count[n]++; + base += sstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void mminloc1_4_r17 (gfc_array_i4 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + gfc_array_l1 * const restrict, GFC_LOGICAL_4 back); +export_proto(mminloc1_4_r17); + +void +mminloc1_4_r17 (gfc_array_i4 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + gfc_array_l1 * const restrict mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_4 * restrict dest; + const GFC_REAL_17 * restrict base; + const GFC_LOGICAL_1 * restrict mbase; + index_type rank; + index_type dim; + index_type n; + index_type len; + index_type delta; + index_type mdelta; + int mask_kind; + + if (mask == NULL) + { +#ifdef HAVE_BACK_ARG + minloc1_4_r17 (retarray, array, pdim, back); +#else + minloc1_4_r17 (retarray, array, pdim); +#endif + return; + } + + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MINLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len <= 0) + return; + + mbase = mask->base_addr; + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + mdelta = GFC_DESCRIPTOR_STRIDE_BYTES(mask,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n + 1); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str= GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype.rank = rank; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in MINLOC intrinsic"); + + if (unlikely (compile_options.bounds_check)) + { + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "MINLOC"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "MINLOC"); + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + dest = retarray->base_addr; + base = array->base_addr; + + while (base) + { + const GFC_REAL_17 * restrict src; + const GFC_LOGICAL_1 * restrict msrc; + GFC_INTEGER_4 result; + src = base; + msrc = mbase; + { + + GFC_REAL_17 minval; +#if defined (GFC_REAL_17_INFINITY) + minval = GFC_REAL_17_INFINITY; +#else + minval = GFC_REAL_17_HUGE; +#endif +#if defined (GFC_REAL_17_QUIET_NAN) + GFC_INTEGER_4 result2 = 0; +#endif + result = 0; + for (n = 0; n < len; n++, src += delta, msrc += mdelta) + { + + if (*msrc) + { +#if defined (GFC_REAL_17_QUIET_NAN) + if (!result2) + result2 = (GFC_INTEGER_4)n + 1; + if (*src <= minval) +#endif + { + minval = *src; + result = (GFC_INTEGER_4)n + 1; + break; + } + } + } +#if defined (GFC_REAL_17_QUIET_NAN) + if (unlikely (n >= len)) + result = result2; + else +#endif + if (back) + for (; n < len; n++, src += delta, msrc += mdelta) + { + if (*msrc && unlikely (*src <= minval)) + { + minval = *src; + result = (GFC_INTEGER_4)n + 1; + } + } + else + for (; n < len; n++, src += delta, msrc += mdelta) + { + if (*msrc && unlikely (*src < minval)) + { + minval = *src; + result = (GFC_INTEGER_4) n + 1; + } + } + *dest = result; + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + mbase += mstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void sminloc1_4_r17 (gfc_array_i4 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + GFC_LOGICAL_4 *, GFC_LOGICAL_4 back); +export_proto(sminloc1_4_r17); + +void +sminloc1_4_r17 (gfc_array_i4 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + GFC_LOGICAL_4 * mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_4 * restrict dest; + index_type rank; + index_type n; + index_type dim; + + + if (mask == NULL || *mask) + { +#ifdef HAVE_BACK_ARG + minloc1_4_r17 (retarray, array, pdim, back); +#else + minloc1_4_r17 (retarray, array, pdim); +#endif + return; + } + /* Make dim zero based to avoid confusion. */ + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MINLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + for (n = 0; n < dim; n++) + { + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] <= 0) + extent[n] = 0; + } + + for (n = dim; n < rank; n++) + { + extent[n] = + GFC_DESCRIPTOR_EXTENT(array,n + 1); + + if (extent[n] <= 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " MINLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + { + for (n=0; n < rank; n++) + { + index_type ret_extent; + + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,n); + if (extent[n] != ret_extent) + runtime_error ("Incorrect extent in return value of" + " MINLOC intrinsic in dimension %ld:" + " is %ld, should be %ld", (long int) n + 1, + (long int) ret_extent, (long int) extent[n]); + } + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + } + + dest = retarray->base_addr; + + while(1) + { + *dest = 0; + count[0]++; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + return; + else + { + count[n]++; + dest += dstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/minloc1_8_r17.c b/libgfortran/generated/minloc1_8_r17.c new file mode 100644 index 00000000000..4237bb3aff2 --- /dev/null +++ b/libgfortran/generated/minloc1_8_r17.c @@ -0,0 +1,617 @@ +/* Implementation of the MINLOC intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <assert.h> + + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_8) + +#define HAVE_BACK_ARG 1 + + +extern void minloc1_8_r17 (gfc_array_i8 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, GFC_LOGICAL_4 back); +export_proto(minloc1_8_r17); + +void +minloc1_8_r17 (gfc_array_i8 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + const GFC_REAL_17 * restrict base; + GFC_INTEGER_8 * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type delta; + index_type dim; + int continue_loop; + + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MINLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " MINLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "MINLOC"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + base = array->base_addr; + dest = retarray->base_addr; + + continue_loop = 1; + while (continue_loop) + { + const GFC_REAL_17 * restrict src; + GFC_INTEGER_8 result; + src = base; + { + + GFC_REAL_17 minval; +#if defined (GFC_REAL_17_INFINITY) + minval = GFC_REAL_17_INFINITY; +#else + minval = GFC_REAL_17_HUGE; +#endif + result = 1; + if (len <= 0) + *dest = 0; + else + { +#if ! defined HAVE_BACK_ARG + for (n = 0; n < len; n++, src += delta) + { +#endif + +#if defined (GFC_REAL_17_QUIET_NAN) + for (n = 0; n < len; n++, src += delta) + { + if (*src <= minval) + { + minval = *src; + result = (GFC_INTEGER_8)n + 1; + break; + } + } +#else + n = 0; +#endif + if (back) + for (; n < len; n++, src += delta) + { + if (unlikely (*src <= minval)) + { + minval = *src; + result = (GFC_INTEGER_8)n + 1; + } + } + else + for (; n < len; n++, src += delta) + { + if (unlikely (*src < minval)) + { + minval = *src; + result = (GFC_INTEGER_8) n + 1; + } + } + + *dest = result; + } + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + continue_loop = 0; + break; + } + else + { + count[n]++; + base += sstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void mminloc1_8_r17 (gfc_array_i8 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + gfc_array_l1 * const restrict, GFC_LOGICAL_4 back); +export_proto(mminloc1_8_r17); + +void +mminloc1_8_r17 (gfc_array_i8 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + gfc_array_l1 * const restrict mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_8 * restrict dest; + const GFC_REAL_17 * restrict base; + const GFC_LOGICAL_1 * restrict mbase; + index_type rank; + index_type dim; + index_type n; + index_type len; + index_type delta; + index_type mdelta; + int mask_kind; + + if (mask == NULL) + { +#ifdef HAVE_BACK_ARG + minloc1_8_r17 (retarray, array, pdim, back); +#else + minloc1_8_r17 (retarray, array, pdim); +#endif + return; + } + + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MINLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len <= 0) + return; + + mbase = mask->base_addr; + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + mdelta = GFC_DESCRIPTOR_STRIDE_BYTES(mask,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n + 1); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str= GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype.rank = rank; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in MINLOC intrinsic"); + + if (unlikely (compile_options.bounds_check)) + { + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "MINLOC"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "MINLOC"); + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + dest = retarray->base_addr; + base = array->base_addr; + + while (base) + { + const GFC_REAL_17 * restrict src; + const GFC_LOGICAL_1 * restrict msrc; + GFC_INTEGER_8 result; + src = base; + msrc = mbase; + { + + GFC_REAL_17 minval; +#if defined (GFC_REAL_17_INFINITY) + minval = GFC_REAL_17_INFINITY; +#else + minval = GFC_REAL_17_HUGE; +#endif +#if defined (GFC_REAL_17_QUIET_NAN) + GFC_INTEGER_8 result2 = 0; +#endif + result = 0; + for (n = 0; n < len; n++, src += delta, msrc += mdelta) + { + + if (*msrc) + { +#if defined (GFC_REAL_17_QUIET_NAN) + if (!result2) + result2 = (GFC_INTEGER_8)n + 1; + if (*src <= minval) +#endif + { + minval = *src; + result = (GFC_INTEGER_8)n + 1; + break; + } + } + } +#if defined (GFC_REAL_17_QUIET_NAN) + if (unlikely (n >= len)) + result = result2; + else +#endif + if (back) + for (; n < len; n++, src += delta, msrc += mdelta) + { + if (*msrc && unlikely (*src <= minval)) + { + minval = *src; + result = (GFC_INTEGER_8)n + 1; + } + } + else + for (; n < len; n++, src += delta, msrc += mdelta) + { + if (*msrc && unlikely (*src < minval)) + { + minval = *src; + result = (GFC_INTEGER_8) n + 1; + } + } + *dest = result; + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + mbase += mstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void sminloc1_8_r17 (gfc_array_i8 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + GFC_LOGICAL_4 *, GFC_LOGICAL_4 back); +export_proto(sminloc1_8_r17); + +void +sminloc1_8_r17 (gfc_array_i8 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + GFC_LOGICAL_4 * mask, GFC_LOGICAL_4 back) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + GFC_INTEGER_8 * restrict dest; + index_type rank; + index_type n; + index_type dim; + + + if (mask == NULL || *mask) + { +#ifdef HAVE_BACK_ARG + minloc1_8_r17 (retarray, array, pdim, back); +#else + minloc1_8_r17 (retarray, array, pdim); +#endif + return; + } + /* Make dim zero based to avoid confusion. */ + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MINLOC intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + for (n = 0; n < dim; n++) + { + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] <= 0) + extent[n] = 0; + } + + for (n = dim; n < rank; n++) + { + extent[n] = + GFC_DESCRIPTOR_EXTENT(array,n + 1); + + if (extent[n] <= 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " MINLOC intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + { + for (n=0; n < rank; n++) + { + index_type ret_extent; + + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,n); + if (extent[n] != ret_extent) + runtime_error ("Incorrect extent in return value of" + " MINLOC intrinsic in dimension %ld:" + " is %ld, should be %ld", (long int) n + 1, + (long int) ret_extent, (long int) extent[n]); + } + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + } + + dest = retarray->base_addr; + + while(1) + { + *dest = 0; + count[0]++; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + return; + else + { + count[n]++; + dest += dstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/minval_r17.c b/libgfortran/generated/minval_r17.c new file mode 100644 index 00000000000..970500340ae --- /dev/null +++ b/libgfortran/generated/minval_r17.c @@ -0,0 +1,578 @@ +/* Implementation of the MINVAL intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_REAL_17) + + +extern void minval_r17 (gfc_array_r17 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict); +export_proto(minval_r17); + +void +minval_r17 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + const GFC_REAL_17 * restrict base; + GFC_REAL_17 * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type delta; + index_type dim; + int continue_loop; + + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MINVAL intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_17)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " MINVAL intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "MINVAL"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + base = array->base_addr; + dest = retarray->base_addr; + + continue_loop = 1; + while (continue_loop) + { + const GFC_REAL_17 * restrict src; + GFC_REAL_17 result; + src = base; + { + +#if defined (GFC_REAL_17_INFINITY) + result = GFC_REAL_17_INFINITY; +#else + result = GFC_REAL_17_HUGE; +#endif + if (len <= 0) + *dest = GFC_REAL_17_HUGE; + else + { +#if ! defined HAVE_BACK_ARG + for (n = 0; n < len; n++, src += delta) + { +#endif + +#if defined (GFC_REAL_17_QUIET_NAN) + if (*src <= result) + break; + } + if (unlikely (n >= len)) + result = GFC_REAL_17_QUIET_NAN; + else for (; n < len; n++, src += delta) + { +#endif + if (*src < result) + result = *src; + } + + *dest = result; + } + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + continue_loop = 0; + break; + } + else + { + count[n]++; + base += sstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void mminval_r17 (gfc_array_r17 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + gfc_array_l1 * const restrict); +export_proto(mminval_r17); + +void +mminval_r17 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + gfc_array_l1 * const restrict mask) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + GFC_REAL_17 * restrict dest; + const GFC_REAL_17 * restrict base; + const GFC_LOGICAL_1 * restrict mbase; + index_type rank; + index_type dim; + index_type n; + index_type len; + index_type delta; + index_type mdelta; + int mask_kind; + + if (mask == NULL) + { +#ifdef HAVE_BACK_ARG + minval_r17 (retarray, array, pdim, back); +#else + minval_r17 (retarray, array, pdim); +#endif + return; + } + + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MINVAL intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len <= 0) + return; + + mbase = mask->base_addr; + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + mdelta = GFC_DESCRIPTOR_STRIDE_BYTES(mask,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n + 1); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str= GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype.rank = rank; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_17)); + + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in MINVAL intrinsic"); + + if (unlikely (compile_options.bounds_check)) + { + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "MINVAL"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "MINVAL"); + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + dest = retarray->base_addr; + base = array->base_addr; + + while (base) + { + const GFC_REAL_17 * restrict src; + const GFC_LOGICAL_1 * restrict msrc; + GFC_REAL_17 result; + src = base; + msrc = mbase; + { + +#if defined (GFC_REAL_17_INFINITY) + result = GFC_REAL_17_INFINITY; +#else + result = GFC_REAL_17_HUGE; +#endif +#if defined (GFC_REAL_17_QUIET_NAN) + int non_empty_p = 0; +#endif + for (n = 0; n < len; n++, src += delta, msrc += mdelta) + { + +#if defined (GFC_REAL_17_INFINITY) || defined (GFC_REAL_17_QUIET_NAN) + if (*msrc) + { +#if defined (GFC_REAL_17_QUIET_NAN) + non_empty_p = 1; + if (*src <= result) +#endif + break; + } + } + if (unlikely (n >= len)) + { +#if defined (GFC_REAL_17_QUIET_NAN) + result = non_empty_p ? GFC_REAL_17_QUIET_NAN : GFC_REAL_17_HUGE; +#else + result = GFC_REAL_17_HUGE; +#endif + } + else for (; n < len; n++, src += delta, msrc += mdelta) + { +#endif + if (*msrc && *src < result) + result = *src; + } + *dest = result; + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + mbase += mstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void sminval_r17 (gfc_array_r17 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + GFC_LOGICAL_4 *); +export_proto(sminval_r17); + +void +sminval_r17 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + GFC_LOGICAL_4 * mask) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + GFC_REAL_17 * restrict dest; + index_type rank; + index_type n; + index_type dim; + + + if (mask == NULL || *mask) + { +#ifdef HAVE_BACK_ARG + minval_r17 (retarray, array, pdim, back); +#else + minval_r17 (retarray, array, pdim); +#endif + return; + } + /* Make dim zero based to avoid confusion. */ + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in MINVAL intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + for (n = 0; n < dim; n++) + { + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] <= 0) + extent[n] = 0; + } + + for (n = dim; n < rank; n++) + { + extent[n] = + GFC_DESCRIPTOR_EXTENT(array,n + 1); + + if (extent[n] <= 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_17)); + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " MINVAL intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + { + for (n=0; n < rank; n++) + { + index_type ret_extent; + + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,n); + if (extent[n] != ret_extent) + runtime_error ("Incorrect extent in return value of" + " MINVAL intrinsic in dimension %ld:" + " is %ld, should be %ld", (long int) n + 1, + (long int) ret_extent, (long int) extent[n]); + } + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + } + + dest = retarray->base_addr; + + while(1) + { + *dest = GFC_REAL_17_HUGE; + count[0]++; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + return; + else + { + count[n]++; + dest += dstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/norm2_r17.c b/libgfortran/generated/norm2_r17.c new file mode 100644 index 00000000000..d8a0fb7ee47 --- /dev/null +++ b/libgfortran/generated/norm2_r17.c @@ -0,0 +1,219 @@ +/* Implementation of the NORM2 intrinsic + Copyright (C) 2010-2022 Free Software Foundation, Inc. + Contributed by Tobias Burnus <burnus@net-b.de> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_REAL_17) && 1 /* FIXME: figure this out later. */ && 1 /* FIXME: figure this out later. */ + +#if defined(POWER_IEEE128) +#define MATHFUNC(funcname) __ ## funcname ## ieee128 +#else +#define MATHFUNC(funcname) funcname ## q +#endif + + +extern void norm2_r17 (gfc_array_r17 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict); +export_proto(norm2_r17); + +void +norm2_r17 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + const GFC_REAL_17 * restrict base; + GFC_REAL_17 * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type delta; + index_type dim; + int continue_loop; + + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in NORM intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_17)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " NORM intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "NORM"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + base = array->base_addr; + dest = retarray->base_addr; + + continue_loop = 1; + while (continue_loop) + { + const GFC_REAL_17 * restrict src; + GFC_REAL_17 result; + src = base; + { + + GFC_REAL_17 scale; + result = 0; + scale = 1; + if (len <= 0) + *dest = 0; + else + { +#if ! defined HAVE_BACK_ARG + for (n = 0; n < len; n++, src += delta) + { +#endif + + if (*src != 0) + { + GFC_REAL_17 absX, val; + absX = MATHFUNC(fabs) (*src); + if (scale < absX) + { + val = scale / absX; + result = 1 + result * val * val; + scale = absX; + } + else + { + val = absX / scale; + result += val * val; + } + } + } + result = scale * MATHFUNC(sqrt) (result); + *dest = result; + } + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + continue_loop = 0; + break; + } + else + { + count[n]++; + base += sstride[n]; + dest += dstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/pack_c17.c b/libgfortran/generated/pack_c17.c new file mode 100644 index 00000000000..2084c5303b3 --- /dev/null +++ b/libgfortran/generated/pack_c17.c @@ -0,0 +1,257 @@ +/* Specific implementation of the PACK intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Ligbfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> + + +#if defined (HAVE_GFC_COMPLEX_17) + +/* PACK is specified as follows: + + 13.14.80 PACK (ARRAY, MASK, [VECTOR]) + + Description: Pack an array into an array of rank one under the + control of a mask. + + Class: Transformational function. + + Arguments: + ARRAY may be of any type. It shall not be scalar. + MASK shall be of type LOGICAL. It shall be conformable with ARRAY. + VECTOR (optional) shall be of the same type and type parameters + as ARRAY. VECTOR shall have at least as many elements as + there are true elements in MASK. If MASK is a scalar + with the value true, VECTOR shall have at least as many + elements as there are in ARRAY. + + Result Characteristics: The result is an array of rank one with the + same type and type parameters as ARRAY. If VECTOR is present, the + result size is that of VECTOR; otherwise, the result size is the + number /t/ of true elements in MASK unless MASK is scalar with the + value true, in which case the result size is the size of ARRAY. + + Result Value: Element /i/ of the result is the element of ARRAY + that corresponds to the /i/th true element of MASK, taking elements + in array element order, for /i/ = 1, 2, ..., /t/. If VECTOR is + present and has size /n/ > /t/, element /i/ of the result has the + value VECTOR(/i/), for /i/ = /t/ + 1, ..., /n/. + + Examples: The nonzero elements of an array M with the value + | 0 0 0 | + | 9 0 0 | may be "gathered" by the function PACK. The result of + | 0 0 7 | + PACK (M, MASK = M.NE.0) is [9,7] and the result of PACK (M, M.NE.0, + VECTOR = (/ 2,4,6,8,10,12 /)) is [9,7,6,8,10,12]. + +There are two variants of the PACK intrinsic: one, where MASK is +array valued, and the other one where MASK is scalar. */ + +void +pack_c17 (gfc_array_c17 *ret, const gfc_array_c17 *array, + const gfc_array_l1 *mask, const gfc_array_c17 *vector) +{ + /* r.* indicates the return array. */ + index_type rstride0; + GFC_COMPLEX_17 * restrict rptr; + /* s.* indicates the source array. */ + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type sstride0; + const GFC_COMPLEX_17 *sptr; + /* m.* indicates the mask array. */ + index_type mstride[GFC_MAX_DIMENSIONS]; + index_type mstride0; + const GFC_LOGICAL_1 *mptr; + + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + int zero_sized; + index_type n; + index_type dim; + index_type nelem; + index_type total; + int mask_kind; + + dim = GFC_DESCRIPTOR_RANK (array); + + mptr = mask->base_addr; + + /* Use the same loop for all logical types, by using GFC_LOGICAL_1 + and using shifting to address size and endian issues. */ + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + { + /* Do not convert a NULL pointer as we use test for NULL below. */ + if (mptr) + mptr = GFOR_POINTER_TO_L1 (mptr, mask_kind); + } + else + runtime_error ("Funny sized logical array"); + + zero_sized = 0; + for (n = 0; n < dim; n++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + if (extent[n] <= 0) + zero_sized = 1; + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + } + if (sstride[0] == 0) + sstride[0] = 1; + if (mstride[0] == 0) + mstride[0] = mask_kind; + + if (zero_sized) + sptr = NULL; + else + sptr = array->base_addr; + + if (ret->base_addr == NULL || unlikely (compile_options.bounds_check)) + { + /* Count the elements, either for allocating memory or + for bounds checking. */ + + if (vector != NULL) + { + /* The return array will have as many + elements as there are in VECTOR. */ + total = GFC_DESCRIPTOR_EXTENT(vector,0); + if (total < 0) + { + total = 0; + vector = NULL; + } + } + else + { + /* We have to count the true elements in MASK. */ + total = count_0 (mask); + } + + if (ret->base_addr == NULL) + { + /* Setup the array descriptor. */ + GFC_DIMENSION_SET(ret->dim[0], 0, total-1, 1); + + ret->offset = 0; + + /* xmallocarray allocates a single byte for zero size. */ + ret->base_addr = xmallocarray (total, sizeof (GFC_COMPLEX_17)); + + if (total == 0) + return; + } + else + { + /* We come here because of range checking. */ + index_type ret_extent; + + ret_extent = GFC_DESCRIPTOR_EXTENT(ret,0); + if (total != ret_extent) + runtime_error ("Incorrect extent in return value of PACK intrinsic;" + " is %ld, should be %ld", (long int) total, + (long int) ret_extent); + } + } + + rstride0 = GFC_DESCRIPTOR_STRIDE(ret,0); + if (rstride0 == 0) + rstride0 = 1; + sstride0 = sstride[0]; + mstride0 = mstride[0]; + rptr = ret->base_addr; + + while (sptr && mptr) + { + /* Test this element. */ + if (*mptr) + { + /* Add it. */ + *rptr = *sptr; + rptr += rstride0; + } + /* Advance to the next element. */ + sptr += sstride0; + mptr += mstride0; + count[0]++; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + sptr -= sstride[n] * extent[n]; + mptr -= mstride[n] * extent[n]; + n++; + if (n >= dim) + { + /* Break out of the loop. */ + sptr = NULL; + break; + } + else + { + count[n]++; + sptr += sstride[n]; + mptr += mstride[n]; + } + } + } + + /* Add any remaining elements from VECTOR. */ + if (vector) + { + n = GFC_DESCRIPTOR_EXTENT(vector,0); + nelem = ((rptr - ret->base_addr) / rstride0); + if (n > nelem) + { + sstride0 = GFC_DESCRIPTOR_STRIDE(vector,0); + if (sstride0 == 0) + sstride0 = 1; + + sptr = vector->base_addr + sstride0 * nelem; + n -= nelem; + while (n--) + { + *rptr = *sptr; + rptr += rstride0; + sptr += sstride0; + } + } + } +} + +#endif + diff --git a/libgfortran/generated/pack_r17.c b/libgfortran/generated/pack_r17.c new file mode 100644 index 00000000000..ce950114685 --- /dev/null +++ b/libgfortran/generated/pack_r17.c @@ -0,0 +1,257 @@ +/* Specific implementation of the PACK intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Ligbfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> + + +#if defined (HAVE_GFC_REAL_17) + +/* PACK is specified as follows: + + 13.14.80 PACK (ARRAY, MASK, [VECTOR]) + + Description: Pack an array into an array of rank one under the + control of a mask. + + Class: Transformational function. + + Arguments: + ARRAY may be of any type. It shall not be scalar. + MASK shall be of type LOGICAL. It shall be conformable with ARRAY. + VECTOR (optional) shall be of the same type and type parameters + as ARRAY. VECTOR shall have at least as many elements as + there are true elements in MASK. If MASK is a scalar + with the value true, VECTOR shall have at least as many + elements as there are in ARRAY. + + Result Characteristics: The result is an array of rank one with the + same type and type parameters as ARRAY. If VECTOR is present, the + result size is that of VECTOR; otherwise, the result size is the + number /t/ of true elements in MASK unless MASK is scalar with the + value true, in which case the result size is the size of ARRAY. + + Result Value: Element /i/ of the result is the element of ARRAY + that corresponds to the /i/th true element of MASK, taking elements + in array element order, for /i/ = 1, 2, ..., /t/. If VECTOR is + present and has size /n/ > /t/, element /i/ of the result has the + value VECTOR(/i/), for /i/ = /t/ + 1, ..., /n/. + + Examples: The nonzero elements of an array M with the value + | 0 0 0 | + | 9 0 0 | may be "gathered" by the function PACK. The result of + | 0 0 7 | + PACK (M, MASK = M.NE.0) is [9,7] and the result of PACK (M, M.NE.0, + VECTOR = (/ 2,4,6,8,10,12 /)) is [9,7,6,8,10,12]. + +There are two variants of the PACK intrinsic: one, where MASK is +array valued, and the other one where MASK is scalar. */ + +void +pack_r17 (gfc_array_r17 *ret, const gfc_array_r17 *array, + const gfc_array_l1 *mask, const gfc_array_r17 *vector) +{ + /* r.* indicates the return array. */ + index_type rstride0; + GFC_REAL_17 * restrict rptr; + /* s.* indicates the source array. */ + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type sstride0; + const GFC_REAL_17 *sptr; + /* m.* indicates the mask array. */ + index_type mstride[GFC_MAX_DIMENSIONS]; + index_type mstride0; + const GFC_LOGICAL_1 *mptr; + + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + int zero_sized; + index_type n; + index_type dim; + index_type nelem; + index_type total; + int mask_kind; + + dim = GFC_DESCRIPTOR_RANK (array); + + mptr = mask->base_addr; + + /* Use the same loop for all logical types, by using GFC_LOGICAL_1 + and using shifting to address size and endian issues. */ + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + { + /* Do not convert a NULL pointer as we use test for NULL below. */ + if (mptr) + mptr = GFOR_POINTER_TO_L1 (mptr, mask_kind); + } + else + runtime_error ("Funny sized logical array"); + + zero_sized = 0; + for (n = 0; n < dim; n++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + if (extent[n] <= 0) + zero_sized = 1; + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + } + if (sstride[0] == 0) + sstride[0] = 1; + if (mstride[0] == 0) + mstride[0] = mask_kind; + + if (zero_sized) + sptr = NULL; + else + sptr = array->base_addr; + + if (ret->base_addr == NULL || unlikely (compile_options.bounds_check)) + { + /* Count the elements, either for allocating memory or + for bounds checking. */ + + if (vector != NULL) + { + /* The return array will have as many + elements as there are in VECTOR. */ + total = GFC_DESCRIPTOR_EXTENT(vector,0); + if (total < 0) + { + total = 0; + vector = NULL; + } + } + else + { + /* We have to count the true elements in MASK. */ + total = count_0 (mask); + } + + if (ret->base_addr == NULL) + { + /* Setup the array descriptor. */ + GFC_DIMENSION_SET(ret->dim[0], 0, total-1, 1); + + ret->offset = 0; + + /* xmallocarray allocates a single byte for zero size. */ + ret->base_addr = xmallocarray (total, sizeof (GFC_REAL_17)); + + if (total == 0) + return; + } + else + { + /* We come here because of range checking. */ + index_type ret_extent; + + ret_extent = GFC_DESCRIPTOR_EXTENT(ret,0); + if (total != ret_extent) + runtime_error ("Incorrect extent in return value of PACK intrinsic;" + " is %ld, should be %ld", (long int) total, + (long int) ret_extent); + } + } + + rstride0 = GFC_DESCRIPTOR_STRIDE(ret,0); + if (rstride0 == 0) + rstride0 = 1; + sstride0 = sstride[0]; + mstride0 = mstride[0]; + rptr = ret->base_addr; + + while (sptr && mptr) + { + /* Test this element. */ + if (*mptr) + { + /* Add it. */ + *rptr = *sptr; + rptr += rstride0; + } + /* Advance to the next element. */ + sptr += sstride0; + mptr += mstride0; + count[0]++; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + sptr -= sstride[n] * extent[n]; + mptr -= mstride[n] * extent[n]; + n++; + if (n >= dim) + { + /* Break out of the loop. */ + sptr = NULL; + break; + } + else + { + count[n]++; + sptr += sstride[n]; + mptr += mstride[n]; + } + } + } + + /* Add any remaining elements from VECTOR. */ + if (vector) + { + n = GFC_DESCRIPTOR_EXTENT(vector,0); + nelem = ((rptr - ret->base_addr) / rstride0); + if (n > nelem) + { + sstride0 = GFC_DESCRIPTOR_STRIDE(vector,0); + if (sstride0 == 0) + sstride0 = 1; + + sptr = vector->base_addr + sstride0 * nelem; + n -= nelem; + while (n--) + { + *rptr = *sptr; + rptr += rstride0; + sptr += sstride0; + } + } + } +} + +#endif + diff --git a/libgfortran/generated/pow_c17_i16.c b/libgfortran/generated/pow_c17_i16.c new file mode 100644 index 00000000000..c7dccf57c6b --- /dev/null +++ b/libgfortran/generated/pow_c17_i16.c @@ -0,0 +1,75 @@ +/* Support routines for the intrinsic power (**) operator. + Copyright (C) 2004-2022 Free Software Foundation, Inc. + Contributed by Paul Brook + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + +/* Use Binary Method to calculate the powi. This is not an optimal but + a simple and reasonable arithmetic. See section 4.6.3, "Evaluation of + Powers" of Donald E. Knuth, "Seminumerical Algorithms", Vol. 2, "The Art + of Computer Programming", 3rd Edition, 1998. */ + +#if defined (HAVE_GFC_COMPLEX_17) && defined (HAVE_GFC_INTEGER_16) + +GFC_COMPLEX_17 pow_c17_i16 (GFC_COMPLEX_17 a, GFC_INTEGER_16 b); +export_proto(pow_c17_i16); + +GFC_COMPLEX_17 +pow_c17_i16 (GFC_COMPLEX_17 a, GFC_INTEGER_16 b) +{ + GFC_COMPLEX_17 pow, x; + GFC_INTEGER_16 n; + GFC_UINTEGER_16 u; + + n = b; + x = a; + pow = 1; + if (n != 0) + { + if (n < 0) + { + + u = -n; + x = pow / x; + } + else + { + u = n; + } + for (;;) + { + if (u & 1) + pow *= x; + u >>= 1; + if (u) + x *= x; + else + break; + } + } + return pow; +} + +#endif diff --git a/libgfortran/generated/pow_c17_i4.c b/libgfortran/generated/pow_c17_i4.c new file mode 100644 index 00000000000..74d092ac734 --- /dev/null +++ b/libgfortran/generated/pow_c17_i4.c @@ -0,0 +1,75 @@ +/* Support routines for the intrinsic power (**) operator. + Copyright (C) 2004-2022 Free Software Foundation, Inc. + Contributed by Paul Brook + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + +/* Use Binary Method to calculate the powi. This is not an optimal but + a simple and reasonable arithmetic. See section 4.6.3, "Evaluation of + Powers" of Donald E. Knuth, "Seminumerical Algorithms", Vol. 2, "The Art + of Computer Programming", 3rd Edition, 1998. */ + +#if defined (HAVE_GFC_COMPLEX_17) && defined (HAVE_GFC_INTEGER_4) + +GFC_COMPLEX_17 pow_c17_i4 (GFC_COMPLEX_17 a, GFC_INTEGER_4 b); +export_proto(pow_c17_i4); + +GFC_COMPLEX_17 +pow_c17_i4 (GFC_COMPLEX_17 a, GFC_INTEGER_4 b) +{ + GFC_COMPLEX_17 pow, x; + GFC_INTEGER_4 n; + GFC_UINTEGER_4 u; + + n = b; + x = a; + pow = 1; + if (n != 0) + { + if (n < 0) + { + + u = -n; + x = pow / x; + } + else + { + u = n; + } + for (;;) + { + if (u & 1) + pow *= x; + u >>= 1; + if (u) + x *= x; + else + break; + } + } + return pow; +} + +#endif diff --git a/libgfortran/generated/pow_c17_i8.c b/libgfortran/generated/pow_c17_i8.c new file mode 100644 index 00000000000..8dc30ee1cb4 --- /dev/null +++ b/libgfortran/generated/pow_c17_i8.c @@ -0,0 +1,75 @@ +/* Support routines for the intrinsic power (**) operator. + Copyright (C) 2004-2022 Free Software Foundation, Inc. + Contributed by Paul Brook + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + +/* Use Binary Method to calculate the powi. This is not an optimal but + a simple and reasonable arithmetic. See section 4.6.3, "Evaluation of + Powers" of Donald E. Knuth, "Seminumerical Algorithms", Vol. 2, "The Art + of Computer Programming", 3rd Edition, 1998. */ + +#if defined (HAVE_GFC_COMPLEX_17) && defined (HAVE_GFC_INTEGER_8) + +GFC_COMPLEX_17 pow_c17_i8 (GFC_COMPLEX_17 a, GFC_INTEGER_8 b); +export_proto(pow_c17_i8); + +GFC_COMPLEX_17 +pow_c17_i8 (GFC_COMPLEX_17 a, GFC_INTEGER_8 b) +{ + GFC_COMPLEX_17 pow, x; + GFC_INTEGER_8 n; + GFC_UINTEGER_8 u; + + n = b; + x = a; + pow = 1; + if (n != 0) + { + if (n < 0) + { + + u = -n; + x = pow / x; + } + else + { + u = n; + } + for (;;) + { + if (u & 1) + pow *= x; + u >>= 1; + if (u) + x *= x; + else + break; + } + } + return pow; +} + +#endif diff --git a/libgfortran/generated/pow_r17_i16.c b/libgfortran/generated/pow_r17_i16.c new file mode 100644 index 00000000000..817dd59ca35 --- /dev/null +++ b/libgfortran/generated/pow_r17_i16.c @@ -0,0 +1,75 @@ +/* Support routines for the intrinsic power (**) operator. + Copyright (C) 2004-2022 Free Software Foundation, Inc. + Contributed by Paul Brook + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + +/* Use Binary Method to calculate the powi. This is not an optimal but + a simple and reasonable arithmetic. See section 4.6.3, "Evaluation of + Powers" of Donald E. Knuth, "Seminumerical Algorithms", Vol. 2, "The Art + of Computer Programming", 3rd Edition, 1998. */ + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_16) + +GFC_REAL_17 pow_r17_i16 (GFC_REAL_17 a, GFC_INTEGER_16 b); +export_proto(pow_r17_i16); + +GFC_REAL_17 +pow_r17_i16 (GFC_REAL_17 a, GFC_INTEGER_16 b) +{ + GFC_REAL_17 pow, x; + GFC_INTEGER_16 n; + GFC_UINTEGER_16 u; + + n = b; + x = a; + pow = 1; + if (n != 0) + { + if (n < 0) + { + + u = -n; + x = pow / x; + } + else + { + u = n; + } + for (;;) + { + if (u & 1) + pow *= x; + u >>= 1; + if (u) + x *= x; + else + break; + } + } + return pow; +} + +#endif diff --git a/libgfortran/generated/pow_r17_i4.c b/libgfortran/generated/pow_r17_i4.c new file mode 100644 index 00000000000..16ba682e418 --- /dev/null +++ b/libgfortran/generated/pow_r17_i4.c @@ -0,0 +1,75 @@ +/* Support routines for the intrinsic power (**) operator. + Copyright (C) 2004-2022 Free Software Foundation, Inc. + Contributed by Paul Brook + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + +/* Use Binary Method to calculate the powi. This is not an optimal but + a simple and reasonable arithmetic. See section 4.6.3, "Evaluation of + Powers" of Donald E. Knuth, "Seminumerical Algorithms", Vol. 2, "The Art + of Computer Programming", 3rd Edition, 1998. */ + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_4) + +GFC_REAL_17 pow_r17_i4 (GFC_REAL_17 a, GFC_INTEGER_4 b); +export_proto(pow_r17_i4); + +GFC_REAL_17 +pow_r17_i4 (GFC_REAL_17 a, GFC_INTEGER_4 b) +{ + GFC_REAL_17 pow, x; + GFC_INTEGER_4 n; + GFC_UINTEGER_4 u; + + n = b; + x = a; + pow = 1; + if (n != 0) + { + if (n < 0) + { + + u = -n; + x = pow / x; + } + else + { + u = n; + } + for (;;) + { + if (u & 1) + pow *= x; + u >>= 1; + if (u) + x *= x; + else + break; + } + } + return pow; +} + +#endif diff --git a/libgfortran/generated/pow_r17_i8.c b/libgfortran/generated/pow_r17_i8.c new file mode 100644 index 00000000000..d7bfce1fee7 --- /dev/null +++ b/libgfortran/generated/pow_r17_i8.c @@ -0,0 +1,75 @@ +/* Support routines for the intrinsic power (**) operator. + Copyright (C) 2004-2022 Free Software Foundation, Inc. + Contributed by Paul Brook + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + +/* Use Binary Method to calculate the powi. This is not an optimal but + a simple and reasonable arithmetic. See section 4.6.3, "Evaluation of + Powers" of Donald E. Knuth, "Seminumerical Algorithms", Vol. 2, "The Art + of Computer Programming", 3rd Edition, 1998. */ + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_INTEGER_8) + +GFC_REAL_17 pow_r17_i8 (GFC_REAL_17 a, GFC_INTEGER_8 b); +export_proto(pow_r17_i8); + +GFC_REAL_17 +pow_r17_i8 (GFC_REAL_17 a, GFC_INTEGER_8 b) +{ + GFC_REAL_17 pow, x; + GFC_INTEGER_8 n; + GFC_UINTEGER_8 u; + + n = b; + x = a; + pow = 1; + if (n != 0) + { + if (n < 0) + { + + u = -n; + x = pow / x; + } + else + { + u = n; + } + for (;;) + { + if (u & 1) + pow *= x; + u >>= 1; + if (u) + x *= x; + else + break; + } + } + return pow; +} + +#endif diff --git a/libgfortran/generated/product_c17.c b/libgfortran/generated/product_c17.c new file mode 100644 index 00000000000..dee58a13d14 --- /dev/null +++ b/libgfortran/generated/product_c17.c @@ -0,0 +1,536 @@ +/* Implementation of the PRODUCT intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + +#if defined (HAVE_GFC_COMPLEX_17) && defined (HAVE_GFC_COMPLEX_17) + + +extern void product_c17 (gfc_array_c17 * const restrict, + gfc_array_c17 * const restrict, const index_type * const restrict); +export_proto(product_c17); + +void +product_c17 (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict array, + const index_type * const restrict pdim) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + const GFC_COMPLEX_17 * restrict base; + GFC_COMPLEX_17 * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type delta; + index_type dim; + int continue_loop; + + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in PRODUCT intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_17)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " PRODUCT intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "PRODUCT"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + base = array->base_addr; + dest = retarray->base_addr; + + continue_loop = 1; + while (continue_loop) + { + const GFC_COMPLEX_17 * restrict src; + GFC_COMPLEX_17 result; + src = base; + { + + result = 1; + if (len <= 0) + *dest = 1; + else + { +#if ! defined HAVE_BACK_ARG + for (n = 0; n < len; n++, src += delta) + { +#endif + + result *= *src; + } + + *dest = result; + } + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + continue_loop = 0; + break; + } + else + { + count[n]++; + base += sstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void mproduct_c17 (gfc_array_c17 * const restrict, + gfc_array_c17 * const restrict, const index_type * const restrict, + gfc_array_l1 * const restrict); +export_proto(mproduct_c17); + +void +mproduct_c17 (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict array, + const index_type * const restrict pdim, + gfc_array_l1 * const restrict mask) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + GFC_COMPLEX_17 * restrict dest; + const GFC_COMPLEX_17 * restrict base; + const GFC_LOGICAL_1 * restrict mbase; + index_type rank; + index_type dim; + index_type n; + index_type len; + index_type delta; + index_type mdelta; + int mask_kind; + + if (mask == NULL) + { +#ifdef HAVE_BACK_ARG + product_c17 (retarray, array, pdim, back); +#else + product_c17 (retarray, array, pdim); +#endif + return; + } + + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in PRODUCT intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len <= 0) + return; + + mbase = mask->base_addr; + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + mdelta = GFC_DESCRIPTOR_STRIDE_BYTES(mask,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n + 1); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str= GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype.rank = rank; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_17)); + + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in PRODUCT intrinsic"); + + if (unlikely (compile_options.bounds_check)) + { + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "PRODUCT"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "PRODUCT"); + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + dest = retarray->base_addr; + base = array->base_addr; + + while (base) + { + const GFC_COMPLEX_17 * restrict src; + const GFC_LOGICAL_1 * restrict msrc; + GFC_COMPLEX_17 result; + src = base; + msrc = mbase; + { + + result = 1; + for (n = 0; n < len; n++, src += delta, msrc += mdelta) + { + + if (*msrc) + result *= *src; + } + *dest = result; + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + mbase += mstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void sproduct_c17 (gfc_array_c17 * const restrict, + gfc_array_c17 * const restrict, const index_type * const restrict, + GFC_LOGICAL_4 *); +export_proto(sproduct_c17); + +void +sproduct_c17 (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict array, + const index_type * const restrict pdim, + GFC_LOGICAL_4 * mask) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + GFC_COMPLEX_17 * restrict dest; + index_type rank; + index_type n; + index_type dim; + + + if (mask == NULL || *mask) + { +#ifdef HAVE_BACK_ARG + product_c17 (retarray, array, pdim, back); +#else + product_c17 (retarray, array, pdim); +#endif + return; + } + /* Make dim zero based to avoid confusion. */ + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in PRODUCT intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + for (n = 0; n < dim; n++) + { + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] <= 0) + extent[n] = 0; + } + + for (n = dim; n < rank; n++) + { + extent[n] = + GFC_DESCRIPTOR_EXTENT(array,n + 1); + + if (extent[n] <= 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_17)); + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " PRODUCT intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + { + for (n=0; n < rank; n++) + { + index_type ret_extent; + + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,n); + if (extent[n] != ret_extent) + runtime_error ("Incorrect extent in return value of" + " PRODUCT intrinsic in dimension %ld:" + " is %ld, should be %ld", (long int) n + 1, + (long int) ret_extent, (long int) extent[n]); + } + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + } + + dest = retarray->base_addr; + + while(1) + { + *dest = 1; + count[0]++; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + return; + else + { + count[n]++; + dest += dstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/product_r17.c b/libgfortran/generated/product_r17.c new file mode 100644 index 00000000000..1059533153d --- /dev/null +++ b/libgfortran/generated/product_r17.c @@ -0,0 +1,536 @@ +/* Implementation of the PRODUCT intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_REAL_17) + + +extern void product_r17 (gfc_array_r17 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict); +export_proto(product_r17); + +void +product_r17 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + const GFC_REAL_17 * restrict base; + GFC_REAL_17 * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type delta; + index_type dim; + int continue_loop; + + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in PRODUCT intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_17)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " PRODUCT intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "PRODUCT"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + base = array->base_addr; + dest = retarray->base_addr; + + continue_loop = 1; + while (continue_loop) + { + const GFC_REAL_17 * restrict src; + GFC_REAL_17 result; + src = base; + { + + result = 1; + if (len <= 0) + *dest = 1; + else + { +#if ! defined HAVE_BACK_ARG + for (n = 0; n < len; n++, src += delta) + { +#endif + + result *= *src; + } + + *dest = result; + } + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + continue_loop = 0; + break; + } + else + { + count[n]++; + base += sstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void mproduct_r17 (gfc_array_r17 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + gfc_array_l1 * const restrict); +export_proto(mproduct_r17); + +void +mproduct_r17 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + gfc_array_l1 * const restrict mask) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + GFC_REAL_17 * restrict dest; + const GFC_REAL_17 * restrict base; + const GFC_LOGICAL_1 * restrict mbase; + index_type rank; + index_type dim; + index_type n; + index_type len; + index_type delta; + index_type mdelta; + int mask_kind; + + if (mask == NULL) + { +#ifdef HAVE_BACK_ARG + product_r17 (retarray, array, pdim, back); +#else + product_r17 (retarray, array, pdim); +#endif + return; + } + + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in PRODUCT intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len <= 0) + return; + + mbase = mask->base_addr; + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + mdelta = GFC_DESCRIPTOR_STRIDE_BYTES(mask,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n + 1); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str= GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype.rank = rank; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_17)); + + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in PRODUCT intrinsic"); + + if (unlikely (compile_options.bounds_check)) + { + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "PRODUCT"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "PRODUCT"); + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + dest = retarray->base_addr; + base = array->base_addr; + + while (base) + { + const GFC_REAL_17 * restrict src; + const GFC_LOGICAL_1 * restrict msrc; + GFC_REAL_17 result; + src = base; + msrc = mbase; + { + + result = 1; + for (n = 0; n < len; n++, src += delta, msrc += mdelta) + { + + if (*msrc) + result *= *src; + } + *dest = result; + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + mbase += mstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void sproduct_r17 (gfc_array_r17 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + GFC_LOGICAL_4 *); +export_proto(sproduct_r17); + +void +sproduct_r17 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + GFC_LOGICAL_4 * mask) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + GFC_REAL_17 * restrict dest; + index_type rank; + index_type n; + index_type dim; + + + if (mask == NULL || *mask) + { +#ifdef HAVE_BACK_ARG + product_r17 (retarray, array, pdim, back); +#else + product_r17 (retarray, array, pdim); +#endif + return; + } + /* Make dim zero based to avoid confusion. */ + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in PRODUCT intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + for (n = 0; n < dim; n++) + { + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] <= 0) + extent[n] = 0; + } + + for (n = dim; n < rank; n++) + { + extent[n] = + GFC_DESCRIPTOR_EXTENT(array,n + 1); + + if (extent[n] <= 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_17)); + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " PRODUCT intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + { + for (n=0; n < rank; n++) + { + index_type ret_extent; + + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,n); + if (extent[n] != ret_extent) + runtime_error ("Incorrect extent in return value of" + " PRODUCT intrinsic in dimension %ld:" + " is %ld, should be %ld", (long int) n + 1, + (long int) ret_extent, (long int) extent[n]); + } + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + } + + dest = retarray->base_addr; + + while(1) + { + *dest = 1; + count[0]++; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + return; + else + { + count[n]++; + dest += dstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/reshape_c17.c b/libgfortran/generated/reshape_c17.c new file mode 100644 index 00000000000..b0d7ec8cd57 --- /dev/null +++ b/libgfortran/generated/reshape_c17.c @@ -0,0 +1,366 @@ +/* Implementation of the RESHAPE intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + +#if defined (HAVE_GFC_COMPLEX_17) + +typedef GFC_FULL_ARRAY_DESCRIPTOR(1, index_type) shape_type; + + +extern void reshape_c17 (gfc_array_c17 * const restrict, + gfc_array_c17 * const restrict, + shape_type * const restrict, + gfc_array_c17 * const restrict, + shape_type * const restrict); +export_proto(reshape_c17); + +void +reshape_c17 (gfc_array_c17 * const restrict ret, + gfc_array_c17 * const restrict source, + shape_type * const restrict shape, + gfc_array_c17 * const restrict pad, + shape_type * const restrict order) +{ + /* r.* indicates the return array. */ + index_type rcount[GFC_MAX_DIMENSIONS]; + index_type rextent[GFC_MAX_DIMENSIONS]; + index_type rstride[GFC_MAX_DIMENSIONS]; + index_type rstride0; + index_type rdim; + index_type rsize; + index_type rs; + index_type rex; + GFC_COMPLEX_17 *rptr; + /* s.* indicates the source array. */ + index_type scount[GFC_MAX_DIMENSIONS]; + index_type sextent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type sstride0; + index_type sdim; + index_type ssize; + const GFC_COMPLEX_17 *sptr; + /* p.* indicates the pad array. */ + index_type pcount[GFC_MAX_DIMENSIONS]; + index_type pextent[GFC_MAX_DIMENSIONS]; + index_type pstride[GFC_MAX_DIMENSIONS]; + index_type pdim; + index_type psize; + const GFC_COMPLEX_17 *pptr; + + const GFC_COMPLEX_17 *src; + int sempty, pempty, shape_empty; + index_type shape_data[GFC_MAX_DIMENSIONS]; + + rdim = GFC_DESCRIPTOR_EXTENT(shape,0); + /* rdim is always > 0; this lets the compiler optimize more and + avoids a potential warning. */ + GFC_ASSERT(rdim>0); + + if (rdim != GFC_DESCRIPTOR_RANK(ret)) + runtime_error("rank of return array incorrect in RESHAPE intrinsic"); + + shape_empty = 0; + + for (index_type n = 0; n < rdim; n++) + { + shape_data[n] = shape->base_addr[n * GFC_DESCRIPTOR_STRIDE(shape,0)]; + if (shape_data[n] <= 0) + { + shape_data[n] = 0; + shape_empty = 1; + } + } + + if (ret->base_addr == NULL) + { + index_type alloc_size; + + rs = 1; + for (index_type n = 0; n < rdim; n++) + { + rex = shape_data[n]; + + GFC_DIMENSION_SET(ret->dim[n], 0, rex - 1, rs); + + rs *= rex; + } + ret->offset = 0; + + if (unlikely (rs < 1)) + alloc_size = 0; + else + alloc_size = rs; + + ret->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_17)); + ret->dtype.rank = rdim; + } + + if (shape_empty) + return; + + if (pad) + { + pdim = GFC_DESCRIPTOR_RANK (pad); + psize = 1; + pempty = 0; + for (index_type n = 0; n < pdim; n++) + { + pcount[n] = 0; + pstride[n] = GFC_DESCRIPTOR_STRIDE(pad,n); + pextent[n] = GFC_DESCRIPTOR_EXTENT(pad,n); + if (pextent[n] <= 0) + { + pempty = 1; + pextent[n] = 0; + } + + if (psize == pstride[n]) + psize *= pextent[n]; + else + psize = 0; + } + pptr = pad->base_addr; + } + else + { + pdim = 0; + psize = 1; + pempty = 1; + pptr = NULL; + } + + if (unlikely (compile_options.bounds_check)) + { + index_type ret_extent, source_extent; + + rs = 1; + for (index_type n = 0; n < rdim; n++) + { + rs *= shape_data[n]; + ret_extent = GFC_DESCRIPTOR_EXTENT(ret,n); + if (ret_extent != shape_data[n]) + runtime_error("Incorrect extent in return value of RESHAPE" + " intrinsic in dimension %ld: is %ld," + " should be %ld", (long int) n+1, + (long int) ret_extent, (long int) shape_data[n]); + } + + source_extent = 1; + sdim = GFC_DESCRIPTOR_RANK (source); + for (index_type n = 0; n < sdim; n++) + { + index_type se; + se = GFC_DESCRIPTOR_EXTENT(source,n); + source_extent *= se > 0 ? se : 0; + } + + if (rs > source_extent && (!pad || pempty)) + runtime_error("Incorrect size in SOURCE argument to RESHAPE" + " intrinsic: is %ld, should be %ld", + (long int) source_extent, (long int) rs); + + if (order) + { + int seen[GFC_MAX_DIMENSIONS]; + index_type v; + + for (index_type n = 0; n < rdim; n++) + seen[n] = 0; + + for (index_type n = 0; n < rdim; n++) + { + v = order->base_addr[n * GFC_DESCRIPTOR_STRIDE(order,0)] - 1; + + if (v < 0 || v >= rdim) + runtime_error("Value %ld out of range in ORDER argument" + " to RESHAPE intrinsic", (long int) v + 1); + + if (seen[v] != 0) + runtime_error("Duplicate value %ld in ORDER argument to" + " RESHAPE intrinsic", (long int) v + 1); + + seen[v] = 1; + } + } + } + + rsize = 1; + for (index_type n = 0; n < rdim; n++) + { + index_type dim; + if (order) + dim = order->base_addr[n * GFC_DESCRIPTOR_STRIDE(order,0)] - 1; + else + dim = n; + + rcount[n] = 0; + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + rextent[n] = GFC_DESCRIPTOR_EXTENT(ret,dim); + if (rextent[n] < 0) + rextent[n] = 0; + + if (rextent[n] != shape_data[dim]) + runtime_error ("shape and target do not conform"); + + if (rsize == rstride[n]) + rsize *= rextent[n]; + else + rsize = 0; + if (rextent[n] <= 0) + return; + } + + sdim = GFC_DESCRIPTOR_RANK (source); + + /* sdim is always > 0; this lets the compiler optimize more and + avoids a warning. */ + GFC_ASSERT(sdim>0); + + ssize = 1; + sempty = 0; + for (index_type n = 0; n < sdim; n++) + { + scount[n] = 0; + sstride[n] = GFC_DESCRIPTOR_STRIDE(source,n); + sextent[n] = GFC_DESCRIPTOR_EXTENT(source,n); + if (sextent[n] <= 0) + { + sempty = 1; + sextent[n] = 0; + } + + if (ssize == sstride[n]) + ssize *= sextent[n]; + else + ssize = 0; + } + + if (rsize != 0 && ssize != 0 && psize != 0) + { + rsize *= sizeof (GFC_COMPLEX_17); + ssize *= sizeof (GFC_COMPLEX_17); + psize *= sizeof (GFC_COMPLEX_17); + reshape_packed ((char *)ret->base_addr, rsize, (char *)source->base_addr, + ssize, pad ? (char *)pad->base_addr : NULL, psize); + return; + } + rptr = ret->base_addr; + src = sptr = source->base_addr; + rstride0 = rstride[0]; + sstride0 = sstride[0]; + + if (sempty && pempty) + abort (); + + if (sempty) + { + /* Pretend we are using the pad array the first time around, too. */ + src = pptr; + sptr = pptr; + sdim = pdim; + for (index_type dim = 0; dim < pdim; dim++) + { + scount[dim] = pcount[dim]; + sextent[dim] = pextent[dim]; + sstride[dim] = pstride[dim]; + sstride0 = pstride[0]; + } + } + + while (rptr) + { + /* Select between the source and pad arrays. */ + *rptr = *src; + /* Advance to the next element. */ + rptr += rstride0; + src += sstride0; + rcount[0]++; + scount[0]++; + + /* Advance to the next destination element. */ + index_type n = 0; + while (rcount[n] == rextent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + rcount[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + rptr -= rstride[n] * rextent[n]; + n++; + if (n == rdim) + { + /* Break out of the loop. */ + rptr = NULL; + break; + } + else + { + rcount[n]++; + rptr += rstride[n]; + } + } + /* Advance to the next source element. */ + n = 0; + while (scount[n] == sextent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + scount[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + src -= sstride[n] * sextent[n]; + n++; + if (n == sdim) + { + if (sptr && pad) + { + /* Switch to the pad array. */ + sptr = NULL; + sdim = pdim; + for (index_type dim = 0; dim < pdim; dim++) + { + scount[dim] = pcount[dim]; + sextent[dim] = pextent[dim]; + sstride[dim] = pstride[dim]; + sstride0 = sstride[0]; + } + } + /* We now start again from the beginning of the pad array. */ + src = pptr; + break; + } + else + { + scount[n]++; + src += sstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/reshape_r17.c b/libgfortran/generated/reshape_r17.c new file mode 100644 index 00000000000..36425ca377f --- /dev/null +++ b/libgfortran/generated/reshape_r17.c @@ -0,0 +1,366 @@ +/* Implementation of the RESHAPE intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + +#if defined (HAVE_GFC_REAL_17) + +typedef GFC_FULL_ARRAY_DESCRIPTOR(1, index_type) shape_type; + + +extern void reshape_r17 (gfc_array_r17 * const restrict, + gfc_array_r17 * const restrict, + shape_type * const restrict, + gfc_array_r17 * const restrict, + shape_type * const restrict); +export_proto(reshape_r17); + +void +reshape_r17 (gfc_array_r17 * const restrict ret, + gfc_array_r17 * const restrict source, + shape_type * const restrict shape, + gfc_array_r17 * const restrict pad, + shape_type * const restrict order) +{ + /* r.* indicates the return array. */ + index_type rcount[GFC_MAX_DIMENSIONS]; + index_type rextent[GFC_MAX_DIMENSIONS]; + index_type rstride[GFC_MAX_DIMENSIONS]; + index_type rstride0; + index_type rdim; + index_type rsize; + index_type rs; + index_type rex; + GFC_REAL_17 *rptr; + /* s.* indicates the source array. */ + index_type scount[GFC_MAX_DIMENSIONS]; + index_type sextent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type sstride0; + index_type sdim; + index_type ssize; + const GFC_REAL_17 *sptr; + /* p.* indicates the pad array. */ + index_type pcount[GFC_MAX_DIMENSIONS]; + index_type pextent[GFC_MAX_DIMENSIONS]; + index_type pstride[GFC_MAX_DIMENSIONS]; + index_type pdim; + index_type psize; + const GFC_REAL_17 *pptr; + + const GFC_REAL_17 *src; + int sempty, pempty, shape_empty; + index_type shape_data[GFC_MAX_DIMENSIONS]; + + rdim = GFC_DESCRIPTOR_EXTENT(shape,0); + /* rdim is always > 0; this lets the compiler optimize more and + avoids a potential warning. */ + GFC_ASSERT(rdim>0); + + if (rdim != GFC_DESCRIPTOR_RANK(ret)) + runtime_error("rank of return array incorrect in RESHAPE intrinsic"); + + shape_empty = 0; + + for (index_type n = 0; n < rdim; n++) + { + shape_data[n] = shape->base_addr[n * GFC_DESCRIPTOR_STRIDE(shape,0)]; + if (shape_data[n] <= 0) + { + shape_data[n] = 0; + shape_empty = 1; + } + } + + if (ret->base_addr == NULL) + { + index_type alloc_size; + + rs = 1; + for (index_type n = 0; n < rdim; n++) + { + rex = shape_data[n]; + + GFC_DIMENSION_SET(ret->dim[n], 0, rex - 1, rs); + + rs *= rex; + } + ret->offset = 0; + + if (unlikely (rs < 1)) + alloc_size = 0; + else + alloc_size = rs; + + ret->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_17)); + ret->dtype.rank = rdim; + } + + if (shape_empty) + return; + + if (pad) + { + pdim = GFC_DESCRIPTOR_RANK (pad); + psize = 1; + pempty = 0; + for (index_type n = 0; n < pdim; n++) + { + pcount[n] = 0; + pstride[n] = GFC_DESCRIPTOR_STRIDE(pad,n); + pextent[n] = GFC_DESCRIPTOR_EXTENT(pad,n); + if (pextent[n] <= 0) + { + pempty = 1; + pextent[n] = 0; + } + + if (psize == pstride[n]) + psize *= pextent[n]; + else + psize = 0; + } + pptr = pad->base_addr; + } + else + { + pdim = 0; + psize = 1; + pempty = 1; + pptr = NULL; + } + + if (unlikely (compile_options.bounds_check)) + { + index_type ret_extent, source_extent; + + rs = 1; + for (index_type n = 0; n < rdim; n++) + { + rs *= shape_data[n]; + ret_extent = GFC_DESCRIPTOR_EXTENT(ret,n); + if (ret_extent != shape_data[n]) + runtime_error("Incorrect extent in return value of RESHAPE" + " intrinsic in dimension %ld: is %ld," + " should be %ld", (long int) n+1, + (long int) ret_extent, (long int) shape_data[n]); + } + + source_extent = 1; + sdim = GFC_DESCRIPTOR_RANK (source); + for (index_type n = 0; n < sdim; n++) + { + index_type se; + se = GFC_DESCRIPTOR_EXTENT(source,n); + source_extent *= se > 0 ? se : 0; + } + + if (rs > source_extent && (!pad || pempty)) + runtime_error("Incorrect size in SOURCE argument to RESHAPE" + " intrinsic: is %ld, should be %ld", + (long int) source_extent, (long int) rs); + + if (order) + { + int seen[GFC_MAX_DIMENSIONS]; + index_type v; + + for (index_type n = 0; n < rdim; n++) + seen[n] = 0; + + for (index_type n = 0; n < rdim; n++) + { + v = order->base_addr[n * GFC_DESCRIPTOR_STRIDE(order,0)] - 1; + + if (v < 0 || v >= rdim) + runtime_error("Value %ld out of range in ORDER argument" + " to RESHAPE intrinsic", (long int) v + 1); + + if (seen[v] != 0) + runtime_error("Duplicate value %ld in ORDER argument to" + " RESHAPE intrinsic", (long int) v + 1); + + seen[v] = 1; + } + } + } + + rsize = 1; + for (index_type n = 0; n < rdim; n++) + { + index_type dim; + if (order) + dim = order->base_addr[n * GFC_DESCRIPTOR_STRIDE(order,0)] - 1; + else + dim = n; + + rcount[n] = 0; + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,dim); + rextent[n] = GFC_DESCRIPTOR_EXTENT(ret,dim); + if (rextent[n] < 0) + rextent[n] = 0; + + if (rextent[n] != shape_data[dim]) + runtime_error ("shape and target do not conform"); + + if (rsize == rstride[n]) + rsize *= rextent[n]; + else + rsize = 0; + if (rextent[n] <= 0) + return; + } + + sdim = GFC_DESCRIPTOR_RANK (source); + + /* sdim is always > 0; this lets the compiler optimize more and + avoids a warning. */ + GFC_ASSERT(sdim>0); + + ssize = 1; + sempty = 0; + for (index_type n = 0; n < sdim; n++) + { + scount[n] = 0; + sstride[n] = GFC_DESCRIPTOR_STRIDE(source,n); + sextent[n] = GFC_DESCRIPTOR_EXTENT(source,n); + if (sextent[n] <= 0) + { + sempty = 1; + sextent[n] = 0; + } + + if (ssize == sstride[n]) + ssize *= sextent[n]; + else + ssize = 0; + } + + if (rsize != 0 && ssize != 0 && psize != 0) + { + rsize *= sizeof (GFC_REAL_17); + ssize *= sizeof (GFC_REAL_17); + psize *= sizeof (GFC_REAL_17); + reshape_packed ((char *)ret->base_addr, rsize, (char *)source->base_addr, + ssize, pad ? (char *)pad->base_addr : NULL, psize); + return; + } + rptr = ret->base_addr; + src = sptr = source->base_addr; + rstride0 = rstride[0]; + sstride0 = sstride[0]; + + if (sempty && pempty) + abort (); + + if (sempty) + { + /* Pretend we are using the pad array the first time around, too. */ + src = pptr; + sptr = pptr; + sdim = pdim; + for (index_type dim = 0; dim < pdim; dim++) + { + scount[dim] = pcount[dim]; + sextent[dim] = pextent[dim]; + sstride[dim] = pstride[dim]; + sstride0 = pstride[0]; + } + } + + while (rptr) + { + /* Select between the source and pad arrays. */ + *rptr = *src; + /* Advance to the next element. */ + rptr += rstride0; + src += sstride0; + rcount[0]++; + scount[0]++; + + /* Advance to the next destination element. */ + index_type n = 0; + while (rcount[n] == rextent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + rcount[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + rptr -= rstride[n] * rextent[n]; + n++; + if (n == rdim) + { + /* Break out of the loop. */ + rptr = NULL; + break; + } + else + { + rcount[n]++; + rptr += rstride[n]; + } + } + /* Advance to the next source element. */ + n = 0; + while (scount[n] == sextent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + scount[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + src -= sstride[n] * sextent[n]; + n++; + if (n == sdim) + { + if (sptr && pad) + { + /* Switch to the pad array. */ + sptr = NULL; + sdim = pdim; + for (index_type dim = 0; dim < pdim; dim++) + { + scount[dim] = pcount[dim]; + sextent[dim] = pextent[dim]; + sstride[dim] = pstride[dim]; + sstride0 = sstride[0]; + } + } + /* We now start again from the beginning of the pad array. */ + src = pptr; + break; + } + else + { + scount[n]++; + src += sstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/spread_c17.c b/libgfortran/generated/spread_c17.c new file mode 100644 index 00000000000..70de5871164 --- /dev/null +++ b/libgfortran/generated/spread_c17.c @@ -0,0 +1,266 @@ +/* Special implementation of the SPREAD intrinsic + Copyright (C) 2008-2022 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>, based on + spread_generic.c written by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Ligbfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> + + +#if defined (HAVE_GFC_COMPLEX_17) + +void +spread_c17 (gfc_array_c17 *ret, const gfc_array_c17 *source, + const index_type along, const index_type pncopies) +{ + /* r.* indicates the return array. */ + index_type rstride[GFC_MAX_DIMENSIONS]; + index_type rstride0; + index_type rdelta = 0; + index_type rrank; + index_type rs; + GFC_COMPLEX_17 *rptr; + GFC_COMPLEX_17 * restrict dest; + /* s.* indicates the source array. */ + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type sstride0; + index_type srank; + const GFC_COMPLEX_17 *sptr; + + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type n; + index_type dim; + index_type ncopies; + + srank = GFC_DESCRIPTOR_RANK(source); + + rrank = srank + 1; + if (rrank > GFC_MAX_DIMENSIONS) + runtime_error ("return rank too large in spread()"); + + if (along > rrank) + runtime_error ("dim outside of rank in spread()"); + + ncopies = pncopies; + + if (ret->base_addr == NULL) + { + + size_t ub, stride; + + /* The front end has signalled that we need to populate the + return array descriptor. */ + ret->dtype.rank = rrank; + + dim = 0; + rs = 1; + for (n = 0; n < rrank; n++) + { + stride = rs; + if (n == along - 1) + { + ub = ncopies - 1; + rdelta = rs; + rs *= ncopies; + } + else + { + count[dim] = 0; + extent[dim] = GFC_DESCRIPTOR_EXTENT(source,dim); + sstride[dim] = GFC_DESCRIPTOR_STRIDE(source,dim); + rstride[dim] = rs; + + ub = extent[dim] - 1; + rs *= extent[dim]; + dim++; + } + GFC_DIMENSION_SET(ret->dim[n], 0, ub, stride); + } + ret->offset = 0; + + /* xmallocarray allocates a single byte for zero size. */ + ret->base_addr = xmallocarray (rs, sizeof(GFC_COMPLEX_17)); + if (rs <= 0) + return; + } + else + { + int zero_sized; + + zero_sized = 0; + + dim = 0; + if (GFC_DESCRIPTOR_RANK(ret) != rrank) + runtime_error ("rank mismatch in spread()"); + + if (unlikely (compile_options.bounds_check)) + { + for (n = 0; n < rrank; n++) + { + index_type ret_extent; + + ret_extent = GFC_DESCRIPTOR_EXTENT(ret,n); + if (n == along - 1) + { + rdelta = GFC_DESCRIPTOR_STRIDE(ret,n); + + if (ret_extent != ncopies) + runtime_error("Incorrect extent in return value of SPREAD" + " intrinsic in dimension %ld: is %ld," + " should be %ld", (long int) n+1, + (long int) ret_extent, (long int) ncopies); + } + else + { + count[dim] = 0; + extent[dim] = GFC_DESCRIPTOR_EXTENT(source,dim); + if (ret_extent != extent[dim]) + runtime_error("Incorrect extent in return value of SPREAD" + " intrinsic in dimension %ld: is %ld," + " should be %ld", (long int) n+1, + (long int) ret_extent, + (long int) extent[dim]); + + if (extent[dim] <= 0) + zero_sized = 1; + sstride[dim] = GFC_DESCRIPTOR_STRIDE(source,dim); + rstride[dim] = GFC_DESCRIPTOR_STRIDE(ret,n); + dim++; + } + } + } + else + { + for (n = 0; n < rrank; n++) + { + if (n == along - 1) + { + rdelta = GFC_DESCRIPTOR_STRIDE(ret,n); + } + else + { + count[dim] = 0; + extent[dim] = GFC_DESCRIPTOR_EXTENT(source,dim); + if (extent[dim] <= 0) + zero_sized = 1; + sstride[dim] = GFC_DESCRIPTOR_STRIDE(source,dim); + rstride[dim] = GFC_DESCRIPTOR_STRIDE(ret,n); + dim++; + } + } + } + + if (zero_sized) + return; + + if (sstride[0] == 0) + sstride[0] = 1; + } + sstride0 = sstride[0]; + rstride0 = rstride[0]; + rptr = ret->base_addr; + sptr = source->base_addr; + + while (sptr) + { + /* Spread this element. */ + dest = rptr; + for (n = 0; n < ncopies; n++) + { + *dest = *sptr; + dest += rdelta; + } + /* Advance to the next element. */ + sptr += sstride0; + rptr += rstride0; + count[0]++; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + sptr -= sstride[n] * extent[n]; + rptr -= rstride[n] * extent[n]; + n++; + if (n >= srank) + { + /* Break out of the loop. */ + sptr = NULL; + break; + } + else + { + count[n]++; + sptr += sstride[n]; + rptr += rstride[n]; + } + } + } +} + +/* This version of spread_internal treats the special case of a scalar + source. This is much simpler than the more general case above. */ + +void +spread_scalar_c17 (gfc_array_c17 *ret, const GFC_COMPLEX_17 *source, + const index_type along, const index_type ncopies) +{ + GFC_COMPLEX_17 * restrict dest; + index_type stride; + + if (GFC_DESCRIPTOR_RANK (ret) != 1) + runtime_error ("incorrect destination rank in spread()"); + + if (along > 1) + runtime_error ("dim outside of rank in spread()"); + + if (ret->base_addr == NULL) + { + ret->base_addr = xmallocarray (ncopies, sizeof (GFC_COMPLEX_17)); + ret->offset = 0; + GFC_DIMENSION_SET(ret->dim[0], 0, ncopies - 1, 1); + } + else + { + if (ncopies - 1 > (GFC_DESCRIPTOR_EXTENT(ret,0) - 1) + / GFC_DESCRIPTOR_STRIDE(ret,0)) + runtime_error ("dim too large in spread()"); + } + + dest = ret->base_addr; + stride = GFC_DESCRIPTOR_STRIDE(ret,0); + + for (index_type n = 0; n < ncopies; n++) + { + *dest = *source; + dest += stride; + } +} + +#endif + diff --git a/libgfortran/generated/spread_r17.c b/libgfortran/generated/spread_r17.c new file mode 100644 index 00000000000..4aab77df117 --- /dev/null +++ b/libgfortran/generated/spread_r17.c @@ -0,0 +1,266 @@ +/* Special implementation of the SPREAD intrinsic + Copyright (C) 2008-2022 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>, based on + spread_generic.c written by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Ligbfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> + + +#if defined (HAVE_GFC_REAL_17) + +void +spread_r17 (gfc_array_r17 *ret, const gfc_array_r17 *source, + const index_type along, const index_type pncopies) +{ + /* r.* indicates the return array. */ + index_type rstride[GFC_MAX_DIMENSIONS]; + index_type rstride0; + index_type rdelta = 0; + index_type rrank; + index_type rs; + GFC_REAL_17 *rptr; + GFC_REAL_17 * restrict dest; + /* s.* indicates the source array. */ + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type sstride0; + index_type srank; + const GFC_REAL_17 *sptr; + + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type n; + index_type dim; + index_type ncopies; + + srank = GFC_DESCRIPTOR_RANK(source); + + rrank = srank + 1; + if (rrank > GFC_MAX_DIMENSIONS) + runtime_error ("return rank too large in spread()"); + + if (along > rrank) + runtime_error ("dim outside of rank in spread()"); + + ncopies = pncopies; + + if (ret->base_addr == NULL) + { + + size_t ub, stride; + + /* The front end has signalled that we need to populate the + return array descriptor. */ + ret->dtype.rank = rrank; + + dim = 0; + rs = 1; + for (n = 0; n < rrank; n++) + { + stride = rs; + if (n == along - 1) + { + ub = ncopies - 1; + rdelta = rs; + rs *= ncopies; + } + else + { + count[dim] = 0; + extent[dim] = GFC_DESCRIPTOR_EXTENT(source,dim); + sstride[dim] = GFC_DESCRIPTOR_STRIDE(source,dim); + rstride[dim] = rs; + + ub = extent[dim] - 1; + rs *= extent[dim]; + dim++; + } + GFC_DIMENSION_SET(ret->dim[n], 0, ub, stride); + } + ret->offset = 0; + + /* xmallocarray allocates a single byte for zero size. */ + ret->base_addr = xmallocarray (rs, sizeof(GFC_REAL_17)); + if (rs <= 0) + return; + } + else + { + int zero_sized; + + zero_sized = 0; + + dim = 0; + if (GFC_DESCRIPTOR_RANK(ret) != rrank) + runtime_error ("rank mismatch in spread()"); + + if (unlikely (compile_options.bounds_check)) + { + for (n = 0; n < rrank; n++) + { + index_type ret_extent; + + ret_extent = GFC_DESCRIPTOR_EXTENT(ret,n); + if (n == along - 1) + { + rdelta = GFC_DESCRIPTOR_STRIDE(ret,n); + + if (ret_extent != ncopies) + runtime_error("Incorrect extent in return value of SPREAD" + " intrinsic in dimension %ld: is %ld," + " should be %ld", (long int) n+1, + (long int) ret_extent, (long int) ncopies); + } + else + { + count[dim] = 0; + extent[dim] = GFC_DESCRIPTOR_EXTENT(source,dim); + if (ret_extent != extent[dim]) + runtime_error("Incorrect extent in return value of SPREAD" + " intrinsic in dimension %ld: is %ld," + " should be %ld", (long int) n+1, + (long int) ret_extent, + (long int) extent[dim]); + + if (extent[dim] <= 0) + zero_sized = 1; + sstride[dim] = GFC_DESCRIPTOR_STRIDE(source,dim); + rstride[dim] = GFC_DESCRIPTOR_STRIDE(ret,n); + dim++; + } + } + } + else + { + for (n = 0; n < rrank; n++) + { + if (n == along - 1) + { + rdelta = GFC_DESCRIPTOR_STRIDE(ret,n); + } + else + { + count[dim] = 0; + extent[dim] = GFC_DESCRIPTOR_EXTENT(source,dim); + if (extent[dim] <= 0) + zero_sized = 1; + sstride[dim] = GFC_DESCRIPTOR_STRIDE(source,dim); + rstride[dim] = GFC_DESCRIPTOR_STRIDE(ret,n); + dim++; + } + } + } + + if (zero_sized) + return; + + if (sstride[0] == 0) + sstride[0] = 1; + } + sstride0 = sstride[0]; + rstride0 = rstride[0]; + rptr = ret->base_addr; + sptr = source->base_addr; + + while (sptr) + { + /* Spread this element. */ + dest = rptr; + for (n = 0; n < ncopies; n++) + { + *dest = *sptr; + dest += rdelta; + } + /* Advance to the next element. */ + sptr += sstride0; + rptr += rstride0; + count[0]++; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + sptr -= sstride[n] * extent[n]; + rptr -= rstride[n] * extent[n]; + n++; + if (n >= srank) + { + /* Break out of the loop. */ + sptr = NULL; + break; + } + else + { + count[n]++; + sptr += sstride[n]; + rptr += rstride[n]; + } + } + } +} + +/* This version of spread_internal treats the special case of a scalar + source. This is much simpler than the more general case above. */ + +void +spread_scalar_r17 (gfc_array_r17 *ret, const GFC_REAL_17 *source, + const index_type along, const index_type ncopies) +{ + GFC_REAL_17 * restrict dest; + index_type stride; + + if (GFC_DESCRIPTOR_RANK (ret) != 1) + runtime_error ("incorrect destination rank in spread()"); + + if (along > 1) + runtime_error ("dim outside of rank in spread()"); + + if (ret->base_addr == NULL) + { + ret->base_addr = xmallocarray (ncopies, sizeof (GFC_REAL_17)); + ret->offset = 0; + GFC_DIMENSION_SET(ret->dim[0], 0, ncopies - 1, 1); + } + else + { + if (ncopies - 1 > (GFC_DESCRIPTOR_EXTENT(ret,0) - 1) + / GFC_DESCRIPTOR_STRIDE(ret,0)) + runtime_error ("dim too large in spread()"); + } + + dest = ret->base_addr; + stride = GFC_DESCRIPTOR_STRIDE(ret,0); + + for (index_type n = 0; n < ncopies; n++) + { + *dest = *source; + dest += stride; + } +} + +#endif + diff --git a/libgfortran/generated/sum_c17.c b/libgfortran/generated/sum_c17.c new file mode 100644 index 00000000000..16514d3df03 --- /dev/null +++ b/libgfortran/generated/sum_c17.c @@ -0,0 +1,536 @@ +/* Implementation of the SUM intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + +#if defined (HAVE_GFC_COMPLEX_17) && defined (HAVE_GFC_COMPLEX_17) + + +extern void sum_c17 (gfc_array_c17 * const restrict, + gfc_array_c17 * const restrict, const index_type * const restrict); +export_proto(sum_c17); + +void +sum_c17 (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict array, + const index_type * const restrict pdim) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + const GFC_COMPLEX_17 * restrict base; + GFC_COMPLEX_17 * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type delta; + index_type dim; + int continue_loop; + + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in SUM intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_17)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " SUM intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "SUM"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + base = array->base_addr; + dest = retarray->base_addr; + + continue_loop = 1; + while (continue_loop) + { + const GFC_COMPLEX_17 * restrict src; + GFC_COMPLEX_17 result; + src = base; + { + + result = 0; + if (len <= 0) + *dest = 0; + else + { +#if ! defined HAVE_BACK_ARG + for (n = 0; n < len; n++, src += delta) + { +#endif + + result += *src; + } + + *dest = result; + } + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + continue_loop = 0; + break; + } + else + { + count[n]++; + base += sstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void msum_c17 (gfc_array_c17 * const restrict, + gfc_array_c17 * const restrict, const index_type * const restrict, + gfc_array_l1 * const restrict); +export_proto(msum_c17); + +void +msum_c17 (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict array, + const index_type * const restrict pdim, + gfc_array_l1 * const restrict mask) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + GFC_COMPLEX_17 * restrict dest; + const GFC_COMPLEX_17 * restrict base; + const GFC_LOGICAL_1 * restrict mbase; + index_type rank; + index_type dim; + index_type n; + index_type len; + index_type delta; + index_type mdelta; + int mask_kind; + + if (mask == NULL) + { +#ifdef HAVE_BACK_ARG + sum_c17 (retarray, array, pdim, back); +#else + sum_c17 (retarray, array, pdim); +#endif + return; + } + + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in SUM intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len <= 0) + return; + + mbase = mask->base_addr; + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + mdelta = GFC_DESCRIPTOR_STRIDE_BYTES(mask,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n + 1); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str= GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype.rank = rank; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_17)); + + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in SUM intrinsic"); + + if (unlikely (compile_options.bounds_check)) + { + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "SUM"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "SUM"); + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + dest = retarray->base_addr; + base = array->base_addr; + + while (base) + { + const GFC_COMPLEX_17 * restrict src; + const GFC_LOGICAL_1 * restrict msrc; + GFC_COMPLEX_17 result; + src = base; + msrc = mbase; + { + + result = 0; + for (n = 0; n < len; n++, src += delta, msrc += mdelta) + { + + if (*msrc) + result += *src; + } + *dest = result; + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + mbase += mstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void ssum_c17 (gfc_array_c17 * const restrict, + gfc_array_c17 * const restrict, const index_type * const restrict, + GFC_LOGICAL_4 *); +export_proto(ssum_c17); + +void +ssum_c17 (gfc_array_c17 * const restrict retarray, + gfc_array_c17 * const restrict array, + const index_type * const restrict pdim, + GFC_LOGICAL_4 * mask) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + GFC_COMPLEX_17 * restrict dest; + index_type rank; + index_type n; + index_type dim; + + + if (mask == NULL || *mask) + { +#ifdef HAVE_BACK_ARG + sum_c17 (retarray, array, pdim, back); +#else + sum_c17 (retarray, array, pdim); +#endif + return; + } + /* Make dim zero based to avoid confusion. */ + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in SUM intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + for (n = 0; n < dim; n++) + { + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] <= 0) + extent[n] = 0; + } + + for (n = dim; n < rank; n++) + { + extent[n] = + GFC_DESCRIPTOR_EXTENT(array,n + 1); + + if (extent[n] <= 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_17)); + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " SUM intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + { + for (n=0; n < rank; n++) + { + index_type ret_extent; + + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,n); + if (extent[n] != ret_extent) + runtime_error ("Incorrect extent in return value of" + " SUM intrinsic in dimension %ld:" + " is %ld, should be %ld", (long int) n + 1, + (long int) ret_extent, (long int) extent[n]); + } + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + } + + dest = retarray->base_addr; + + while(1) + { + *dest = 0; + count[0]++; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + return; + else + { + count[n]++; + dest += dstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/sum_r17.c b/libgfortran/generated/sum_r17.c new file mode 100644 index 00000000000..a11550752c6 --- /dev/null +++ b/libgfortran/generated/sum_r17.c @@ -0,0 +1,536 @@ +/* Implementation of the SUM intrinsic + Copyright (C) 2002-2022 Free Software Foundation, Inc. + Contributed by Paul Brook <paul@nowt.org> + +This file is part of the GNU Fortran 95 runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Libgfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" + + +#if defined (HAVE_GFC_REAL_17) && defined (HAVE_GFC_REAL_17) + + +extern void sum_r17 (gfc_array_r17 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict); +export_proto(sum_r17); + +void +sum_r17 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + const GFC_REAL_17 * restrict base; + GFC_REAL_17 * restrict dest; + index_type rank; + index_type n; + index_type len; + index_type delta; + index_type dim; + int continue_loop; + + /* Make dim zero based to avoid confusion. */ + rank = GFC_DESCRIPTOR_RANK (array) - 1; + dim = (*pdim) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in SUM intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len < 0) + len = 0; + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_17)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + + } + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " SUM intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "SUM"); + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + base = array->base_addr; + dest = retarray->base_addr; + + continue_loop = 1; + while (continue_loop) + { + const GFC_REAL_17 * restrict src; + GFC_REAL_17 result; + src = base; + { + + result = 0; + if (len <= 0) + *dest = 0; + else + { +#if ! defined HAVE_BACK_ARG + for (n = 0; n < len; n++, src += delta) + { +#endif + + result += *src; + } + + *dest = result; + } + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + continue_loop = 0; + break; + } + else + { + count[n]++; + base += sstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void msum_r17 (gfc_array_r17 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + gfc_array_l1 * const restrict); +export_proto(msum_r17); + +void +msum_r17 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + gfc_array_l1 * const restrict mask) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type sstride[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + index_type mstride[GFC_MAX_DIMENSIONS]; + GFC_REAL_17 * restrict dest; + const GFC_REAL_17 * restrict base; + const GFC_LOGICAL_1 * restrict mbase; + index_type rank; + index_type dim; + index_type n; + index_type len; + index_type delta; + index_type mdelta; + int mask_kind; + + if (mask == NULL) + { +#ifdef HAVE_BACK_ARG + sum_r17 (retarray, array, pdim, back); +#else + sum_r17 (retarray, array, pdim); +#endif + return; + } + + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in SUM intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + len = GFC_DESCRIPTOR_EXTENT(array,dim); + if (len <= 0) + return; + + mbase = mask->base_addr; + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + mbase = GFOR_POINTER_TO_L1 (mbase, mask_kind); + else + runtime_error ("Funny sized logical array"); + + delta = GFC_DESCRIPTOR_STRIDE(array,dim); + mdelta = GFC_DESCRIPTOR_STRIDE_BYTES(mask,dim); + + for (n = 0; n < dim; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] < 0) + extent[n] = 0; + + } + for (n = dim; n < rank; n++) + { + sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n + 1); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask, n + 1); + extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1); + + if (extent[n] < 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str= GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype.rank = rank; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_17)); + + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in SUM intrinsic"); + + if (unlikely (compile_options.bounds_check)) + { + bounds_ifunction_return ((array_t *) retarray, extent, + "return value", "SUM"); + bounds_equal_extents ((array_t *) mask, (array_t *) array, + "MASK argument", "SUM"); + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + if (extent[n] <= 0) + return; + } + + dest = retarray->base_addr; + base = array->base_addr; + + while (base) + { + const GFC_REAL_17 * restrict src; + const GFC_LOGICAL_1 * restrict msrc; + GFC_REAL_17 result; + src = base; + msrc = mbase; + { + + result = 0; + for (n = 0; n < len; n++, src += delta, msrc += mdelta) + { + + if (*msrc) + result += *src; + } + *dest = result; + } + /* Advance to the next element. */ + count[0]++; + base += sstride[0]; + mbase += mstride[0]; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + base -= sstride[n] * extent[n]; + mbase -= mstride[n] * extent[n]; + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + { + /* Break out of the loop. */ + base = NULL; + break; + } + else + { + count[n]++; + base += sstride[n]; + mbase += mstride[n]; + dest += dstride[n]; + } + } + } +} + + +extern void ssum_r17 (gfc_array_r17 * const restrict, + gfc_array_r17 * const restrict, const index_type * const restrict, + GFC_LOGICAL_4 *); +export_proto(ssum_r17); + +void +ssum_r17 (gfc_array_r17 * const restrict retarray, + gfc_array_r17 * const restrict array, + const index_type * const restrict pdim, + GFC_LOGICAL_4 * mask) +{ + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type dstride[GFC_MAX_DIMENSIONS]; + GFC_REAL_17 * restrict dest; + index_type rank; + index_type n; + index_type dim; + + + if (mask == NULL || *mask) + { +#ifdef HAVE_BACK_ARG + sum_r17 (retarray, array, pdim, back); +#else + sum_r17 (retarray, array, pdim); +#endif + return; + } + /* Make dim zero based to avoid confusion. */ + dim = (*pdim) - 1; + rank = GFC_DESCRIPTOR_RANK (array) - 1; + + if (unlikely (dim < 0 || dim > rank)) + { + runtime_error ("Dim argument incorrect in SUM intrinsic: " + "is %ld, should be between 1 and %ld", + (long int) dim + 1, (long int) rank + 1); + } + + for (n = 0; n < dim; n++) + { + extent[n] = GFC_DESCRIPTOR_EXTENT(array,n); + + if (extent[n] <= 0) + extent[n] = 0; + } + + for (n = dim; n < rank; n++) + { + extent[n] = + GFC_DESCRIPTOR_EXTENT(array,n + 1); + + if (extent[n] <= 0) + extent[n] = 0; + } + + if (retarray->base_addr == NULL) + { + size_t alloc_size, str; + + for (n = 0; n < rank; n++) + { + if (n == 0) + str = 1; + else + str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1]; + + GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str); + + } + + retarray->offset = 0; + retarray->dtype.rank = rank; + + alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ + GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1); + return; + } + else + retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_17)); + } + else + { + if (rank != GFC_DESCRIPTOR_RANK (retarray)) + runtime_error ("rank of return array incorrect in" + " SUM intrinsic: is %ld, should be %ld", + (long int) (GFC_DESCRIPTOR_RANK (retarray)), + (long int) rank); + + if (unlikely (compile_options.bounds_check)) + { + for (n=0; n < rank; n++) + { + index_type ret_extent; + + ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,n); + if (extent[n] != ret_extent) + runtime_error ("Incorrect extent in return value of" + " SUM intrinsic in dimension %ld:" + " is %ld, should be %ld", (long int) n + 1, + (long int) ret_extent, (long int) extent[n]); + } + } + } + + for (n = 0; n < rank; n++) + { + count[n] = 0; + dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n); + } + + dest = retarray->base_addr; + + while(1) + { + *dest = 0; + count[0]++; + dest += dstride[0]; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + dest -= dstride[n] * extent[n]; + n++; + if (n >= rank) + return; + else + { + count[n]++; + dest += dstride[n]; + } + } + } +} + +#endif diff --git a/libgfortran/generated/unpack_c17.c b/libgfortran/generated/unpack_c17.c new file mode 100644 index 00000000000..c19d8026317 --- /dev/null +++ b/libgfortran/generated/unpack_c17.c @@ -0,0 +1,333 @@ +/* Specific implementation of the UNPACK intrinsic + Copyright (C) 2008-2022 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>, based on + unpack_generic.c by Paul Brook <paul@nowt.org>. + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Ligbfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> + + +#if defined (HAVE_GFC_COMPLEX_17) + +void +unpack0_c17 (gfc_array_c17 *ret, const gfc_array_c17 *vector, + const gfc_array_l1 *mask, const GFC_COMPLEX_17 *fptr) +{ + /* r.* indicates the return array. */ + index_type rstride[GFC_MAX_DIMENSIONS]; + index_type rstride0; + index_type rs; + GFC_COMPLEX_17 * restrict rptr; + /* v.* indicates the vector array. */ + index_type vstride0; + GFC_COMPLEX_17 *vptr; + /* Value for field, this is constant. */ + const GFC_COMPLEX_17 fval = *fptr; + /* m.* indicates the mask array. */ + index_type mstride[GFC_MAX_DIMENSIONS]; + index_type mstride0; + const GFC_LOGICAL_1 *mptr; + + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type n; + index_type dim; + + int empty; + int mask_kind; + + empty = 0; + + mptr = mask->base_addr; + + /* Use the same loop for all logical types, by using GFC_LOGICAL_1 + and using shifting to address size and endian issues. */ + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + { + /* Do not convert a NULL pointer as we use test for NULL below. */ + if (mptr) + mptr = GFOR_POINTER_TO_L1 (mptr, mask_kind); + } + else + runtime_error ("Funny sized logical array"); + + /* Initialize to avoid -Wmaybe-uninitialized complaints. */ + rstride[0] = 1; + if (ret->base_addr == NULL) + { + /* The front end has signalled that we need to populate the + return array descriptor. */ + dim = GFC_DESCRIPTOR_RANK (mask); + rs = 1; + for (n = 0; n < dim; n++) + { + count[n] = 0; + GFC_DIMENSION_SET(ret->dim[n], 0, + GFC_DESCRIPTOR_EXTENT(mask,n) - 1, rs); + extent[n] = GFC_DESCRIPTOR_EXTENT(ret,n); + empty = empty || extent[n] <= 0; + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + rs *= extent[n]; + } + ret->offset = 0; + ret->base_addr = xmallocarray (rs, sizeof (GFC_COMPLEX_17)); + } + else + { + dim = GFC_DESCRIPTOR_RANK (ret); + for (n = 0; n < dim; n++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(ret,n); + empty = empty || extent[n] <= 0; + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + } + if (rstride[0] == 0) + rstride[0] = 1; + } + + if (empty) + return; + + if (mstride[0] == 0) + mstride[0] = 1; + + vstride0 = GFC_DESCRIPTOR_STRIDE(vector,0); + if (vstride0 == 0) + vstride0 = 1; + rstride0 = rstride[0]; + mstride0 = mstride[0]; + rptr = ret->base_addr; + vptr = vector->base_addr; + + while (rptr) + { + if (*mptr) + { + /* From vector. */ + *rptr = *vptr; + vptr += vstride0; + } + else + { + /* From field. */ + *rptr = fval; + } + /* Advance to the next element. */ + rptr += rstride0; + mptr += mstride0; + count[0]++; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + rptr -= rstride[n] * extent[n]; + mptr -= mstride[n] * extent[n]; + n++; + if (n >= dim) + { + /* Break out of the loop. */ + rptr = NULL; + break; + } + else + { + count[n]++; + rptr += rstride[n]; + mptr += mstride[n]; + } + } + } +} + +void +unpack1_c17 (gfc_array_c17 *ret, const gfc_array_c17 *vector, + const gfc_array_l1 *mask, const gfc_array_c17 *field) +{ + /* r.* indicates the return array. */ + index_type rstride[GFC_MAX_DIMENSIONS]; + index_type rstride0; + index_type rs; + GFC_COMPLEX_17 * restrict rptr; + /* v.* indicates the vector array. */ + index_type vstride0; + GFC_COMPLEX_17 *vptr; + /* f.* indicates the field array. */ + index_type fstride[GFC_MAX_DIMENSIONS]; + index_type fstride0; + const GFC_COMPLEX_17 *fptr; + /* m.* indicates the mask array. */ + index_type mstride[GFC_MAX_DIMENSIONS]; + index_type mstride0; + const GFC_LOGICAL_1 *mptr; + + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type n; + index_type dim; + + int empty; + int mask_kind; + + empty = 0; + + mptr = mask->base_addr; + + /* Use the same loop for all logical types, by using GFC_LOGICAL_1 + and using shifting to address size and endian issues. */ + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + { + /* Do not convert a NULL pointer as we use test for NULL below. */ + if (mptr) + mptr = GFOR_POINTER_TO_L1 (mptr, mask_kind); + } + else + runtime_error ("Funny sized logical array"); + + /* Initialize to avoid -Wmaybe-uninitialized complaints. */ + rstride[0] = 1; + if (ret->base_addr == NULL) + { + /* The front end has signalled that we need to populate the + return array descriptor. */ + dim = GFC_DESCRIPTOR_RANK (mask); + rs = 1; + for (n = 0; n < dim; n++) + { + count[n] = 0; + GFC_DIMENSION_SET(ret->dim[n], 0, + GFC_DESCRIPTOR_EXTENT(mask,n) - 1, rs); + extent[n] = GFC_DESCRIPTOR_EXTENT(ret,n); + empty = empty || extent[n] <= 0; + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,n); + fstride[n] = GFC_DESCRIPTOR_STRIDE(field,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + rs *= extent[n]; + } + ret->offset = 0; + ret->base_addr = xmallocarray (rs, sizeof (GFC_COMPLEX_17)); + } + else + { + dim = GFC_DESCRIPTOR_RANK (ret); + for (n = 0; n < dim; n++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(ret,n); + empty = empty || extent[n] <= 0; + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,n); + fstride[n] = GFC_DESCRIPTOR_STRIDE(field,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + } + if (rstride[0] == 0) + rstride[0] = 1; + } + + if (empty) + return; + + if (fstride[0] == 0) + fstride[0] = 1; + if (mstride[0] == 0) + mstride[0] = 1; + + vstride0 = GFC_DESCRIPTOR_STRIDE(vector,0); + if (vstride0 == 0) + vstride0 = 1; + rstride0 = rstride[0]; + fstride0 = fstride[0]; + mstride0 = mstride[0]; + rptr = ret->base_addr; + fptr = field->base_addr; + vptr = vector->base_addr; + + while (rptr) + { + if (*mptr) + { + /* From vector. */ + *rptr = *vptr; + vptr += vstride0; + } + else + { + /* From field. */ + *rptr = *fptr; + } + /* Advance to the next element. */ + rptr += rstride0; + fptr += fstride0; + mptr += mstride0; + count[0]++; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + rptr -= rstride[n] * extent[n]; + fptr -= fstride[n] * extent[n]; + mptr -= mstride[n] * extent[n]; + n++; + if (n >= dim) + { + /* Break out of the loop. */ + rptr = NULL; + break; + } + else + { + count[n]++; + rptr += rstride[n]; + fptr += fstride[n]; + mptr += mstride[n]; + } + } + } +} + +#endif + diff --git a/libgfortran/generated/unpack_r17.c b/libgfortran/generated/unpack_r17.c new file mode 100644 index 00000000000..d09d3abbc77 --- /dev/null +++ b/libgfortran/generated/unpack_r17.c @@ -0,0 +1,333 @@ +/* Specific implementation of the UNPACK intrinsic + Copyright (C) 2008-2022 Free Software Foundation, Inc. + Contributed by Thomas Koenig <tkoenig@gcc.gnu.org>, based on + unpack_generic.c by Paul Brook <paul@nowt.org>. + +This file is part of the GNU Fortran runtime library (libgfortran). + +Libgfortran is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public +License as published by the Free Software Foundation; either +version 3 of the License, or (at your option) any later version. + +Ligbfortran is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +#include "libgfortran.h" +#include <string.h> + + +#if defined (HAVE_GFC_REAL_17) + +void +unpack0_r17 (gfc_array_r17 *ret, const gfc_array_r17 *vector, + const gfc_array_l1 *mask, const GFC_REAL_17 *fptr) +{ + /* r.* indicates the return array. */ + index_type rstride[GFC_MAX_DIMENSIONS]; + index_type rstride0; + index_type rs; + GFC_REAL_17 * restrict rptr; + /* v.* indicates the vector array. */ + index_type vstride0; + GFC_REAL_17 *vptr; + /* Value for field, this is constant. */ + const GFC_REAL_17 fval = *fptr; + /* m.* indicates the mask array. */ + index_type mstride[GFC_MAX_DIMENSIONS]; + index_type mstride0; + const GFC_LOGICAL_1 *mptr; + + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type n; + index_type dim; + + int empty; + int mask_kind; + + empty = 0; + + mptr = mask->base_addr; + + /* Use the same loop for all logical types, by using GFC_LOGICAL_1 + and using shifting to address size and endian issues. */ + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + { + /* Do not convert a NULL pointer as we use test for NULL below. */ + if (mptr) + mptr = GFOR_POINTER_TO_L1 (mptr, mask_kind); + } + else + runtime_error ("Funny sized logical array"); + + /* Initialize to avoid -Wmaybe-uninitialized complaints. */ + rstride[0] = 1; + if (ret->base_addr == NULL) + { + /* The front end has signalled that we need to populate the + return array descriptor. */ + dim = GFC_DESCRIPTOR_RANK (mask); + rs = 1; + for (n = 0; n < dim; n++) + { + count[n] = 0; + GFC_DIMENSION_SET(ret->dim[n], 0, + GFC_DESCRIPTOR_EXTENT(mask,n) - 1, rs); + extent[n] = GFC_DESCRIPTOR_EXTENT(ret,n); + empty = empty || extent[n] <= 0; + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + rs *= extent[n]; + } + ret->offset = 0; + ret->base_addr = xmallocarray (rs, sizeof (GFC_REAL_17)); + } + else + { + dim = GFC_DESCRIPTOR_RANK (ret); + for (n = 0; n < dim; n++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(ret,n); + empty = empty || extent[n] <= 0; + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + } + if (rstride[0] == 0) + rstride[0] = 1; + } + + if (empty) + return; + + if (mstride[0] == 0) + mstride[0] = 1; + + vstride0 = GFC_DESCRIPTOR_STRIDE(vector,0); + if (vstride0 == 0) + vstride0 = 1; + rstride0 = rstride[0]; + mstride0 = mstride[0]; + rptr = ret->base_addr; + vptr = vector->base_addr; + + while (rptr) + { + if (*mptr) + { + /* From vector. */ + *rptr = *vptr; + vptr += vstride0; + } + else + { + /* From field. */ + *rptr = fval; + } + /* Advance to the next element. */ + rptr += rstride0; + mptr += mstride0; + count[0]++; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + rptr -= rstride[n] * extent[n]; + mptr -= mstride[n] * extent[n]; + n++; + if (n >= dim) + { + /* Break out of the loop. */ + rptr = NULL; + break; + } + else + { + count[n]++; + rptr += rstride[n]; + mptr += mstride[n]; + } + } + } +} + +void +unpack1_r17 (gfc_array_r17 *ret, const gfc_array_r17 *vector, + const gfc_array_l1 *mask, const gfc_array_r17 *field) +{ + /* r.* indicates the return array. */ + index_type rstride[GFC_MAX_DIMENSIONS]; + index_type rstride0; + index_type rs; + GFC_REAL_17 * restrict rptr; + /* v.* indicates the vector array. */ + index_type vstride0; + GFC_REAL_17 *vptr; + /* f.* indicates the field array. */ + index_type fstride[GFC_MAX_DIMENSIONS]; + index_type fstride0; + const GFC_REAL_17 *fptr; + /* m.* indicates the mask array. */ + index_type mstride[GFC_MAX_DIMENSIONS]; + index_type mstride0; + const GFC_LOGICAL_1 *mptr; + + index_type count[GFC_MAX_DIMENSIONS]; + index_type extent[GFC_MAX_DIMENSIONS]; + index_type n; + index_type dim; + + int empty; + int mask_kind; + + empty = 0; + + mptr = mask->base_addr; + + /* Use the same loop for all logical types, by using GFC_LOGICAL_1 + and using shifting to address size and endian issues. */ + + mask_kind = GFC_DESCRIPTOR_SIZE (mask); + + if (mask_kind == 1 || mask_kind == 2 || mask_kind == 4 || mask_kind == 8 +#ifdef HAVE_GFC_LOGICAL_16 + || mask_kind == 16 +#endif + ) + { + /* Do not convert a NULL pointer as we use test for NULL below. */ + if (mptr) + mptr = GFOR_POINTER_TO_L1 (mptr, mask_kind); + } + else + runtime_error ("Funny sized logical array"); + + /* Initialize to avoid -Wmaybe-uninitialized complaints. */ + rstride[0] = 1; + if (ret->base_addr == NULL) + { + /* The front end has signalled that we need to populate the + return array descriptor. */ + dim = GFC_DESCRIPTOR_RANK (mask); + rs = 1; + for (n = 0; n < dim; n++) + { + count[n] = 0; + GFC_DIMENSION_SET(ret->dim[n], 0, + GFC_DESCRIPTOR_EXTENT(mask,n) - 1, rs); + extent[n] = GFC_DESCRIPTOR_EXTENT(ret,n); + empty = empty || extent[n] <= 0; + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,n); + fstride[n] = GFC_DESCRIPTOR_STRIDE(field,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + rs *= extent[n]; + } + ret->offset = 0; + ret->base_addr = xmallocarray (rs, sizeof (GFC_REAL_17)); + } + else + { + dim = GFC_DESCRIPTOR_RANK (ret); + for (n = 0; n < dim; n++) + { + count[n] = 0; + extent[n] = GFC_DESCRIPTOR_EXTENT(ret,n); + empty = empty || extent[n] <= 0; + rstride[n] = GFC_DESCRIPTOR_STRIDE(ret,n); + fstride[n] = GFC_DESCRIPTOR_STRIDE(field,n); + mstride[n] = GFC_DESCRIPTOR_STRIDE_BYTES(mask,n); + } + if (rstride[0] == 0) + rstride[0] = 1; + } + + if (empty) + return; + + if (fstride[0] == 0) + fstride[0] = 1; + if (mstride[0] == 0) + mstride[0] = 1; + + vstride0 = GFC_DESCRIPTOR_STRIDE(vector,0); + if (vstride0 == 0) + vstride0 = 1; + rstride0 = rstride[0]; + fstride0 = fstride[0]; + mstride0 = mstride[0]; + rptr = ret->base_addr; + fptr = field->base_addr; + vptr = vector->base_addr; + + while (rptr) + { + if (*mptr) + { + /* From vector. */ + *rptr = *vptr; + vptr += vstride0; + } + else + { + /* From field. */ + *rptr = *fptr; + } + /* Advance to the next element. */ + rptr += rstride0; + fptr += fstride0; + mptr += mstride0; + count[0]++; + n = 0; + while (count[n] == extent[n]) + { + /* When we get to the end of a dimension, reset it and increment + the next dimension. */ + count[n] = 0; + /* We could precalculate these products, but this is a less + frequently used path so probably not worth it. */ + rptr -= rstride[n] * extent[n]; + fptr -= fstride[n] * extent[n]; + mptr -= mstride[n] * extent[n]; + n++; + if (n >= dim) + { + /* Break out of the loop. */ + rptr = NULL; + break; + } + else + { + count[n]++; + rptr += rstride[n]; + fptr += fstride[n]; + mptr += mstride[n]; + } + } + } +} + +#endif + diff --git a/libgfortran/kinds-override.h b/libgfortran/kinds-override.h index 74b7b36baf6..efae0d034a4 100644 --- a/libgfortran/kinds-override.h +++ b/libgfortran/kinds-override.h @@ -29,3 +29,17 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see # error "Where has __float128 gone?" #endif +#if defined(__powerpc64__) \ + && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ \ + && __SIZEOF_LONG_DOUBLE__ == 16 \ + && defined(GFC_REAL_16_IS_LONG_DOUBLE) +typedef __float128 GFC_REAL_17; +typedef _Complex float __attribute__((mode(KC))) GFC_COMPLEX_17; +#define HAVE_GFC_REAL_17 +#define HAVE_GFC_COMPLEX_17 +#define GFC_REAL_17_HUGE 1.18973149535723176508575932662800702e4932q +#define GFC_REAL_17_LITERAL_SUFFIX q +#define GFC_REAL_17_LITERAL(X) (X ## q) +#define GFC_REAL_17_DIGITS 113 +#define GFC_REAL_17_RADIX 2 +#endif diff --git a/libgfortran/libgfortran.h b/libgfortran/libgfortran.h index 4cdd6a1a88a..f0bd50a7176 100644 --- a/libgfortran/libgfortran.h +++ b/libgfortran/libgfortran.h @@ -103,6 +103,10 @@ typedef off_t gfc_offset; #define NULL (void *) 0 #endif +#if defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ \ + && defined __GLIBC_PREREQ && __GLIBC_PREREQ (2, 32) +#define POWER_IEEE128 1 +#endif /* These functions from <ctype.h> should only be used on values that can be represented as unsigned char, otherwise the behavior is undefined. @@ -386,6 +390,9 @@ typedef GFC_ARRAY_DESCRIPTOR (GFC_REAL_10) gfc_array_r10; #ifdef HAVE_GFC_REAL_16 typedef GFC_ARRAY_DESCRIPTOR (GFC_REAL_16) gfc_array_r16; #endif +#ifdef HAVE_GFC_REAL_17 +typedef GFC_ARRAY_DESCRIPTOR (GFC_REAL_17) gfc_array_r17; +#endif typedef GFC_ARRAY_DESCRIPTOR (GFC_COMPLEX_4) gfc_array_c4; typedef GFC_ARRAY_DESCRIPTOR (GFC_COMPLEX_8) gfc_array_c8; #ifdef HAVE_GFC_COMPLEX_10 @@ -394,6 +401,9 @@ typedef GFC_ARRAY_DESCRIPTOR (GFC_COMPLEX_10) gfc_array_c10; #ifdef HAVE_GFC_COMPLEX_16 typedef GFC_ARRAY_DESCRIPTOR (GFC_COMPLEX_16) gfc_array_c16; #endif +#ifdef HAVE_GFC_COMPLEX_17 +typedef GFC_ARRAY_DESCRIPTOR (GFC_COMPLEX_17) gfc_array_c17; +#endif typedef GFC_ARRAY_DESCRIPTOR (GFC_LOGICAL_1) gfc_array_l1; typedef GFC_ARRAY_DESCRIPTOR (GFC_LOGICAL_2) gfc_array_l2; typedef GFC_ARRAY_DESCRIPTOR (GFC_LOGICAL_4) gfc_array_l4; @@ -507,6 +517,10 @@ typedef GFC_FULL_ARRAY_DESCRIPTOR (GFC_MAX_DIMENSIONS, GFC_INTEGER_4) gfc_full_a #define GFC_DTYPE_REAL_16 ((BT_REAL << GFC_DTYPE_TYPE_SHIFT) \ | (sizeof(GFC_REAL_16) << GFC_DTYPE_SIZE_SHIFT)) #endif +#ifdef HAVE_GFC_REAL_17 +#define GFC_DTYPE_REAL_17 ((BT_REAL << GFC_DTYPE_TYPE_SHIFT) \ + | (sizeof(GFC_REAL_17) << GFC_DTYPE_SIZE_SHIFT)) +#endif #define GFC_DTYPE_COMPLEX_4 ((BT_COMPLEX << GFC_DTYPE_TYPE_SHIFT) \ | (sizeof(GFC_COMPLEX_4) << GFC_DTYPE_SIZE_SHIFT)) @@ -520,6 +534,10 @@ typedef GFC_FULL_ARRAY_DESCRIPTOR (GFC_MAX_DIMENSIONS, GFC_INTEGER_4) gfc_full_a #define GFC_DTYPE_COMPLEX_16 ((BT_COMPLEX << GFC_DTYPE_TYPE_SHIFT) \ | (sizeof(GFC_COMPLEX_16) << GFC_DTYPE_SIZE_SHIFT)) #endif +#ifdef HAVE_GFC_COMPLEX_17 +#define GFC_DTYPE_COMPLEX_17 ((BT_COMPLEX << GFC_DTYPE_TYPE_SHIFT) \ + | (sizeof(GFC_COMPLEX_17) << GFC_DTYPE_SIZE_SHIFT)) +#endif /* Macros to determine the alignment of pointers. */ @@ -1782,6 +1800,33 @@ void cshift1_16_c16 (gfc_array_c16 * const restrict, internal_proto(cshift1_16_c16); #endif +/* Prototypes for the POWER __ieee128 functions. */ +#ifdef POWER_IEEE128 +extern __float128 __acoshieee128 (__float128) __attribute__ ((__nothrow__, __leaf__)); +extern __float128 __acosieee128 (__float128) __attribute__ ((__nothrow__, __leaf__)); +extern __float128 __asinhieee128 (__float128) __attribute__ ((__nothrow__, __leaf__)); +extern __float128 __asinieee128 (__float128) __attribute__ ((__nothrow__, __leaf__)); +extern __float128 __atan2ieee128 (__float128) __attribute__ ((__nothrow__, __leaf__)); +extern __float128 __atanhieee128 (__float128) __attribute__ ((__nothrow__, __leaf__)); +extern __float128 __atanieee128 (__float128) __attribute__ ((__nothrow__, __leaf__)); +extern __float128 __coshieee128 (__float128) __attribute__ ((__nothrow__, __leaf__)); +extern __float128 __cosieee128 (__float128) __attribute__ ((__nothrow__, __leaf__)); +extern __float128 __erfieee128 (__float128) __attribute__ ((__nothrow__, __leaf__)); +extern __float128 __expieee128 (__float128) __attribute__ ((__nothrow__, __leaf__)); +extern __float128 __fabsieee128 (__float128) __attribute__ ((__nothrow__, __leaf__)); +extern __float128 __jnieee128 (int, __float128) __attribute__ ((__nothrow__, __leaf__)); +extern __float128 __log10ieee128 (__float128) __attribute__ ((__nothrow__, __leaf__)); +extern __float128 __logieee128 (__float128) __attribute__ ((__nothrow__, __leaf__)); +extern __float128 __powieee128 (__float128) __attribute__ ((__nothrow__, __leaf__)); +extern __float128 __sinhieee128 (__float128) __attribute__ ((__nothrow__, __leaf__)); +extern __float128 __sinieee128 (__float128) __attribute__ ((__nothrow__, __leaf__)); +extern __float128 __sqrtieee128 (__float128) __attribute__ ((__nothrow__, __leaf__)); +extern __float128 __tanhieee128 (__float128) __attribute__ ((__nothrow__, __leaf__)); +extern __float128 __tanieee128 (__float128) __attribute__ ((__nothrow__, __leaf__)); +extern __float128 __ynieee128 (int , __float128) __attribute__ ((__nothrow__, __leaf__)); + +#endif + /* We always have these. */ #define HAVE_GFC_UINTEGER_1 1 diff --git a/libgfortran/m4/ifunc.m4 b/libgfortran/m4/ifunc.m4 new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/libgfortran/m4/ifunc.m4 diff --git a/libgfortran/m4/mtype.m4 b/libgfortran/m4/mtype.m4 index b133e578bfa..184ca0a064f 100644 --- a/libgfortran/m4/mtype.m4 +++ b/libgfortran/m4/mtype.m4 @@ -5,9 +5,24 @@ define(real_type, `GFC_REAL_'kind)dnl define(`upcase', `translit(`$*', `a-z', `A-Z')')dnl define(q,ifelse(kind,4,f,ifelse(kind,8,`',ifelse(kind,10,l,ifelse(kind,16,l,`_'kind)))))dnl define(Q,translit(q,`a-z',`A-Z'))dnl -define(hasmathfunc,`ifelse(kind,4,`defined (HAVE_'upcase($1)`F)',ifelse(kind,8,`defined (HAVE_'upcase($1)`)',ifelse(kind,10,`defined (HAVE_'upcase($1)`L)',ifelse(kind,16,`(defined(GFC_REAL_16_IS_FLOAT128) || defined(HAVE_'upcase($1)`L))',`error out'))))') -define(mathfunc_macro,`ifelse(kind,16,`#if defined(GFC_REAL_16_IS_FLOAT128) +define(hasmathfunc,dnl +`ifelse(kind,4,`defined (HAVE_'upcase($1)`F)',dnl +ifelse(kind,8,`defined (HAVE_'upcase($1)`)',dnl +ifelse(kind,10,`defined (HAVE_'upcase($1)`L)',dnl +ifelse(kind,16,`(defined(GFC_REAL_16_IS_FLOAT128) || defined(HAVE_'upcase($1)`L))',dnl +ifelse(kind,17,`1 /* FIXME: figure this out later. */',dnl +`error out')))))') +define(mathfunc_macro,`ifelse(kind,17,dnl +`#if defined(POWER_IEEE128) +#define MATHFUNC(funcname) __ ## funcname ## ieee128 +#else +#define MATHFUNC(funcname) funcname ## q +#endif',dnl +`ifelse(kind,16,dnl +`#if defined(GFC_REAL_16_IS_FLOAT128) #define MATHFUNC(funcname) funcname ## q #else #define MATHFUNC(funcname) funcname ## l -#endif',ifelse(kind,8,``#''`define MATHFUNC(funcname) funcname',```#'''`define MATHFUNC(funcname) funcname '```#'''```#'''` 'q))')dnl +#endif',dnl +ifelse(kind,8,``#''`define MATHFUNC(funcname) funcname',dnl +```#'''`define MATHFUNC(funcname) funcname '```#'''```#'''` 'q))')')dnl |