summaryrefslogtreecommitdiff
path: root/gcc/fortran
diff options
context:
space:
mode:
authorfxcoudert <fxcoudert@138bc75d-0d04-0410-961f-82ee72b054a4>2006-10-22 07:41:48 +0000
committerfxcoudert <fxcoudert@138bc75d-0d04-0410-961f-82ee72b054a4>2006-10-22 07:41:48 +0000
commit4e8e57b0ce67551ca61b7883e73586ba805f0a61 (patch)
tree06dcf4cc8adb7c0677545a92f9a81873ed2e8051 /gcc/fortran
parent9c384dcabc464328ebe0e4da1da978416882e344 (diff)
downloadgcc-4e8e57b0ce67551ca61b7883e73586ba805f0a61.tar.gz
PR fortran/26025
* lang.opt: Add -fexternal-blas and -fblas-matmul-limit options. * options.c (gfc_init_options): Initialize new flags. (gfc_handle_option): Handle new flags. * gfortran.h (gfc_option): Add flag_external_blas and blas_matmul_limit flags. * trans-expr.c (gfc_conv_function_call): Use new argument append_args, appending it at the end of the argument list built for a function call. * trans-stmt.c (gfc_trans_call): Use NULL_TREE for the new append_args argument to gfc_trans_call. * trans.h (gfc_conv_function_call): Update prototype. * trans-decl.c (gfc_build_intrinsic_function_decls): Add prototypes for BLAS ?gemm routines. * trans-intrinsic.c (gfc_conv_intrinsic_funcall): Generate the extra arguments given to the library matmul function, and give them to gfc_conv_function_call. * invoke.texi: Add documentation for -fexternal-blas and -fblas-matmul-limit. * m4/matmul.m4: Add possible call to gemm routine. * generated/matmul_r8.c: Regenerate. * generated/matmul_r16.c: Regenerate. * generated/matmul_c8.c: Regenerate. * generated/matmul_i8.c: Regenerate. * generated/matmul_c16.c: Regenerate. * generated/matmul_r10.c: Regenerate. * generated/matmul_r4.c: Regenerate. * generated/matmul_c10.c: Regenerate. * generated/matmul_c4.c: Regenerate. * generated/matmul_i4.c: Regenerate. * generated/matmul_i16.c: Regenerate. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@117948 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/fortran')
-rw-r--r--gcc/fortran/ChangeLog22
-rw-r--r--gcc/fortran/gfortran.h2
-rw-r--r--gcc/fortran/invoke.texi24
-rw-r--r--gcc/fortran/lang.opt8
-rw-r--r--gcc/fortran/options.c10
-rw-r--r--gcc/fortran/trans-decl.c49
-rw-r--r--gcc/fortran/trans-expr.c9
-rw-r--r--gcc/fortran/trans-intrinsic.c50
-rw-r--r--gcc/fortran/trans-stmt.c6
-rw-r--r--gcc/fortran/trans.h9
10 files changed, 182 insertions, 7 deletions
diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog
index 94e4d6cd013..53b7b300764 100644
--- a/gcc/fortran/ChangeLog
+++ b/gcc/fortran/ChangeLog
@@ -1,3 +1,25 @@
+2006-10-22 Francois-Xavier Coudert <coudert@clipper.ens.fr>
+
+ PR fortran/26025
+ * lang.opt: Add -fexternal-blas and -fblas-matmul-limit options.
+ * options.c (gfc_init_options): Initialize new flags.
+ (gfc_handle_option): Handle new flags.
+ * gfortran.h (gfc_option): Add flag_external_blas and
+ blas_matmul_limit flags.
+ * trans-expr.c (gfc_conv_function_call): Use new argument
+ append_args, appending it at the end of the argument list
+ built for a function call.
+ * trans-stmt.c (gfc_trans_call): Use NULL_TREE for the new
+ append_args argument to gfc_trans_call.
+ * trans.h (gfc_conv_function_call): Update prototype.
+ * trans-decl.c (gfc_build_intrinsic_function_decls): Add
+ prototypes for BLAS ?gemm routines.
+ * trans-intrinsic.c (gfc_conv_intrinsic_funcall): Generate the
+ extra arguments given to the library matmul function, and give
+ them to gfc_conv_function_call.
+ * invoke.texi: Add documentation for -fexternal-blas and
+ -fblas-matmul-limit.
+
2006-10-21 Kaveh R. Ghazi <ghazi@caip.rutgers.edu>
* Make-lang.in (F95_LIBS): Delete.
diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
index c89c136f6c0..b34d1c28dcb 100644
--- a/gcc/fortran/gfortran.h
+++ b/gcc/fortran/gfortran.h
@@ -1652,6 +1652,8 @@ typedef struct
int flag_f2c;
int flag_automatic;
int flag_backslash;
+ int flag_external_blas;
+ int blas_matmul_limit;
int flag_cray_pointer;
int flag_d_lines;
int flag_openmp;
diff --git a/gcc/fortran/invoke.texi b/gcc/fortran/invoke.texi
index 51554a5ddcf..8c6aadd6153 100644
--- a/gcc/fortran/invoke.texi
+++ b/gcc/fortran/invoke.texi
@@ -152,7 +152,8 @@ by type. Explanations are in the following sections.
@gccoptlist{
-fno-automatic -ff2c -fno-underscoring -fsecond-underscore @gol
-fbounds-check -fmax-stack-var-size=@var{n} @gol
--fpack-derived -frepack-arrays -fshort-enums}
+-fpack-derived -frepack-arrays -fshort-enums -fexternal-blas
+-fblas-matmul-limit=@var{n}}
@end table
@menu
@@ -859,6 +860,27 @@ This option is provided for interoperability with C code that was
compiled with the @command{-fshort-enums} option. It will make
GNU Fortran choose the smallest @code{INTEGER} kind a given
enumerator set will fit in, and give all its enumerators this kind.
+
+@cindex -fexternal-blas
+@item -fexternal-blas
+This option will make gfortran generate calls to BLAS functions for some
+matrix operations like @code{MATMUL}, instead of using our own
+algorithms, if the size of the matrices involved is larger than a given
+limit (see @command{-fblas-matmul-limit}). This may be profitable if an
+optimized vendor BLAS library is available. The BLAS library will have
+to be specified at link time.
+
+@cindex -fblas-matmul-limit
+@item -fblas-matmul-limit=@var{n}
+Only significant when @command{-fexternal-blas} is in effect.
+Matrix multiplication of matrices with size larger than (or equal to) @var{n}
+will be performed by calls to BLAS functions, while others will be
+handled by @command{gfortran} internal algorithms. If the matrices
+involved are not square, the size comparison is performed using the
+geometric mean of the dimensions of the argument and result matrices.
+
+The default value for @var{n} is 30.
+
@end table
@xref{Code Gen Options,,Options for Code Generation Conventions,
diff --git a/gcc/fortran/lang.opt b/gcc/fortran/lang.opt
index cb8810ae62b..cbef46a040d 100644
--- a/gcc/fortran/lang.opt
+++ b/gcc/fortran/lang.opt
@@ -85,6 +85,14 @@ fbackslash
Fortran
Specify that backslash in string introduces an escape character
+fexternal-blas
+Fortran
+Specify that an external BLAS library should be used for matmul calls on large-size arrays
+
+fblas-matmul-limit=
+Fortran RejectNegative Joined UInteger
+-fblas-matmul-limit=<n> Size of the smallest matrix for which matmul will use BLAS
+
fdefault-double-8
Fortran
Set the default double precision kind to an 8 byte wide type
diff --git a/gcc/fortran/options.c b/gcc/fortran/options.c
index 96347042bf3..f821d3e2695 100644
--- a/gcc/fortran/options.c
+++ b/gcc/fortran/options.c
@@ -80,6 +80,8 @@ gfc_init_options (unsigned int argc ATTRIBUTE_UNUSED,
gfc_option.flag_preprocessed = 0;
gfc_option.flag_automatic = 1;
gfc_option.flag_backslash = 1;
+ gfc_option.flag_external_blas = 0;
+ gfc_option.blas_matmul_limit = 30;
gfc_option.flag_cray_pointer = 0;
gfc_option.flag_d_lines = -1;
gfc_option.flag_openmp = 0;
@@ -450,6 +452,14 @@ gfc_handle_option (size_t scode, const char *arg, int value)
gfc_option.flag_dollar_ok = value;
break;
+ case OPT_fexternal_blas:
+ gfc_option.flag_external_blas = value;
+ break;
+
+ case OPT_fblas_matmul_limit_:
+ gfc_option.blas_matmul_limit = value;
+ break;
+
case OPT_fd_lines_as_code:
gfc_option.flag_d_lines = 1;
break;
diff --git a/gcc/fortran/trans-decl.c b/gcc/fortran/trans-decl.c
index d12b953cf9e..82315b708fc 100644
--- a/gcc/fortran/trans-decl.c
+++ b/gcc/fortran/trans-decl.c
@@ -143,6 +143,12 @@ tree gfor_fndecl_iargc;
tree gfor_fndecl_si_kind;
tree gfor_fndecl_sr_kind;
+/* BLAS gemm functions. */
+tree gfor_fndecl_sgemm;
+tree gfor_fndecl_dgemm;
+tree gfor_fndecl_cgemm;
+tree gfor_fndecl_zgemm;
+
static void
gfc_add_decl_to_parent_function (tree decl)
@@ -2186,6 +2192,49 @@ gfc_build_intrinsic_function_decls (void)
gfc_int4_type_node, 1,
gfc_real16_type_node);
+ /* BLAS functions. */
+ {
+ tree pint = build_pointer_type (gfc_c_int_type_node);
+ tree ps = build_pointer_type (gfc_get_real_type (gfc_default_real_kind));
+ tree pd = build_pointer_type (gfc_get_real_type (gfc_default_double_kind));
+ tree pc = build_pointer_type (gfc_get_complex_type (gfc_default_real_kind));
+ tree pz = build_pointer_type
+ (gfc_get_complex_type (gfc_default_double_kind));
+
+ gfor_fndecl_sgemm = gfc_build_library_function_decl
+ (get_identifier
+ (gfc_option.flag_underscoring ? "sgemm_"
+ : "sgemm"),
+ void_type_node, 15, pchar_type_node,
+ pchar_type_node, pint, pint, pint, ps, ps, pint,
+ ps, pint, ps, ps, pint, gfc_c_int_type_node,
+ gfc_c_int_type_node);
+ gfor_fndecl_dgemm = gfc_build_library_function_decl
+ (get_identifier
+ (gfc_option.flag_underscoring ? "dgemm_"
+ : "dgemm"),
+ void_type_node, 15, pchar_type_node,
+ pchar_type_node, pint, pint, pint, pd, pd, pint,
+ pd, pint, pd, pd, pint, gfc_c_int_type_node,
+ gfc_c_int_type_node);
+ gfor_fndecl_cgemm = gfc_build_library_function_decl
+ (get_identifier
+ (gfc_option.flag_underscoring ? "cgemm_"
+ : "cgemm"),
+ void_type_node, 15, pchar_type_node,
+ pchar_type_node, pint, pint, pint, pc, pc, pint,
+ pc, pint, pc, pc, pint, gfc_c_int_type_node,
+ gfc_c_int_type_node);
+ gfor_fndecl_zgemm = gfc_build_library_function_decl
+ (get_identifier
+ (gfc_option.flag_underscoring ? "zgemm_"
+ : "zgemm"),
+ void_type_node, 15, pchar_type_node,
+ pchar_type_node, pint, pint, pint, pz, pz, pint,
+ pz, pint, pz, pz, pint, gfc_c_int_type_node,
+ gfc_c_int_type_node);
+ }
+
/* Other functions. */
gfor_fndecl_size0 =
gfc_build_library_function_decl (get_identifier (PREFIX("size0")),
diff --git a/gcc/fortran/trans-expr.c b/gcc/fortran/trans-expr.c
index 3e7844ed445..e5c9f2486bd 100644
--- a/gcc/fortran/trans-expr.c
+++ b/gcc/fortran/trans-expr.c
@@ -1853,7 +1853,7 @@ is_aliased_array (gfc_expr * e)
int
gfc_conv_function_call (gfc_se * se, gfc_symbol * sym,
- gfc_actual_arglist * arg)
+ gfc_actual_arglist * arg, tree append_args)
{
gfc_interface_mapping mapping;
tree arglist;
@@ -2226,6 +2226,11 @@ gfc_conv_function_call (gfc_se * se, gfc_symbol * sym,
/* Add the hidden string length parameters to the arguments. */
arglist = chainon (arglist, stringargs);
+ /* We may want to append extra arguments here. This is used e.g. for
+ calls to libgfortran_matmul_??, which need extra information. */
+ if (append_args != NULL_TREE)
+ arglist = chainon (arglist, append_args);
+
/* Generate the actual call. */
gfc_conv_function_val (se, sym);
/* If there are alternate return labels, function type should be
@@ -2545,7 +2550,7 @@ gfc_conv_function_expr (gfc_se * se, gfc_expr * expr)
sym = expr->value.function.esym;
if (!sym)
sym = expr->symtree->n.sym;
- gfc_conv_function_call (se, sym, expr->value.function.actual);
+ gfc_conv_function_call (se, sym, expr->value.function.actual, NULL_TREE);
}
diff --git a/gcc/fortran/trans-intrinsic.c b/gcc/fortran/trans-intrinsic.c
index 53c61c696d9..7dbd60e8096 100644
--- a/gcc/fortran/trans-intrinsic.c
+++ b/gcc/fortran/trans-intrinsic.c
@@ -1378,6 +1378,7 @@ static void
gfc_conv_intrinsic_funcall (gfc_se * se, gfc_expr * expr)
{
gfc_symbol *sym;
+ tree append_args;
gcc_assert (!se->ss || se->ss->expr == expr);
@@ -1387,7 +1388,54 @@ gfc_conv_intrinsic_funcall (gfc_se * se, gfc_expr * expr)
gcc_assert (expr->rank == 0);
sym = gfc_get_symbol_for_expr (expr);
- gfc_conv_function_call (se, sym, expr->value.function.actual);
+
+ /* Calls to libgfortran_matmul need to be appended special arguments,
+ to be able to call the BLAS ?gemm functions if required and possible. */
+ append_args = NULL_TREE;
+ if (expr->value.function.isym->generic_id == GFC_ISYM_MATMUL
+ && sym->ts.type != BT_LOGICAL)
+ {
+ tree cint = gfc_get_int_type (gfc_c_int_kind);
+
+ if (gfc_option.flag_external_blas
+ && (sym->ts.type == BT_REAL || sym->ts.type == BT_COMPLEX)
+ && (sym->ts.kind == gfc_default_real_kind
+ || sym->ts.kind == gfc_default_double_kind))
+ {
+ tree gemm_fndecl;
+
+ if (sym->ts.type == BT_REAL)
+ {
+ if (sym->ts.kind == gfc_default_real_kind)
+ gemm_fndecl = gfor_fndecl_sgemm;
+ else
+ gemm_fndecl = gfor_fndecl_dgemm;
+ }
+ else
+ {
+ if (sym->ts.kind == gfc_default_real_kind)
+ gemm_fndecl = gfor_fndecl_cgemm;
+ else
+ gemm_fndecl = gfor_fndecl_zgemm;
+ }
+
+ append_args = gfc_chainon_list (NULL_TREE, build_int_cst (cint, 1));
+ append_args = gfc_chainon_list
+ (append_args, build_int_cst
+ (cint, gfc_option.blas_matmul_limit));
+ append_args = gfc_chainon_list (append_args,
+ gfc_build_addr_expr (NULL_TREE,
+ gemm_fndecl));
+ }
+ else
+ {
+ append_args = gfc_chainon_list (NULL_TREE, build_int_cst (cint, 0));
+ append_args = gfc_chainon_list (append_args, build_int_cst (cint, 0));
+ append_args = gfc_chainon_list (append_args, null_pointer_node);
+ }
+ }
+
+ gfc_conv_function_call (se, sym, expr->value.function.actual, append_args);
gfc_free (sym);
}
diff --git a/gcc/fortran/trans-stmt.c b/gcc/fortran/trans-stmt.c
index 08ba113cc07..03ff0fee92b 100644
--- a/gcc/fortran/trans-stmt.c
+++ b/gcc/fortran/trans-stmt.c
@@ -334,7 +334,8 @@ gfc_trans_call (gfc_code * code, bool dependency_check)
/* Translate the call. */
has_alternate_specifier
- = gfc_conv_function_call (&se, code->resolved_sym, code->ext.actual);
+ = gfc_conv_function_call (&se, code->resolved_sym, code->ext.actual,
+ NULL_TREE);
/* A subroutine without side-effect, by definition, does nothing! */
TREE_SIDE_EFFECTS (se.expr) = 1;
@@ -399,7 +400,8 @@ gfc_trans_call (gfc_code * code, bool dependency_check)
gfc_init_block (&block);
/* Add the subroutine call to the block. */
- gfc_conv_function_call (&loopse, code->resolved_sym, code->ext.actual);
+ gfc_conv_function_call (&loopse, code->resolved_sym, code->ext.actual,
+ NULL_TREE);
gfc_add_expr_to_block (&loopse.pre, loopse.expr);
gfc_add_block_to_block (&block, &loopse.pre);
diff --git a/gcc/fortran/trans.h b/gcc/fortran/trans.h
index 13c21aa2581..e8bb1d5d6aa 100644
--- a/gcc/fortran/trans.h
+++ b/gcc/fortran/trans.h
@@ -303,7 +303,8 @@ void gfc_conv_intrinsic_function (gfc_se *, gfc_expr *);
int gfc_is_intrinsic_libcall (gfc_expr *);
/* Also used to CALL subroutines. */
-int gfc_conv_function_call (gfc_se *, gfc_symbol *, gfc_actual_arglist *);
+int gfc_conv_function_call (gfc_se *, gfc_symbol *, gfc_actual_arglist *,
+ tree);
/* gfc_trans_* shouldn't call push/poplevel, use gfc_push/pop_scope */
/* Generate code for a scalar assignment. */
@@ -507,6 +508,12 @@ extern GTY(()) tree gfor_fndecl_math_exponent8;
extern GTY(()) tree gfor_fndecl_math_exponent10;
extern GTY(()) tree gfor_fndecl_math_exponent16;
+/* BLAS functions. */
+extern GTY(()) tree gfor_fndecl_sgemm;
+extern GTY(()) tree gfor_fndecl_dgemm;
+extern GTY(()) tree gfor_fndecl_cgemm;
+extern GTY(()) tree gfor_fndecl_zgemm;
+
/* String functions. */
extern GTY(()) tree gfor_fndecl_compare_string;
extern GTY(()) tree gfor_fndecl_concat_string;