diff options
author | tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4> | 2015-10-02 19:43:41 +0000 |
---|---|---|
committer | tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4> | 2015-10-02 19:43:41 +0000 |
commit | 689db5ed20ee0ae1ca351fd6066c72c60aa43805 (patch) | |
tree | aa9ed3c30c222e71bcb81aabff26db2e11ffd802 /libgomp | |
parent | de74aa1822da6a22e7dc283bc9eee095971c5b3f (diff) | |
download | gcc-689db5ed20ee0ae1ca351fd6066c72c60aa43805.tar.gz |
nvptx offloading linking
gcc/
* config/nvptx/mkoffload.c (Kind, Vis): Remove enums.
(Token, Stmt): Remove structs.
(decls, vars, fns): Remove variables.
(alloc_comment, append_stmt, is_keyword): Remove macros.
(tokenize, write_token, write_tokens, alloc_stmt, rev_stmts)
(write_stmt, write_stmts, parse_insn, parse_list_nosemi)
(parse_init, parse_file): Remove functions.
(read_file): Accept a pointer to a length and store into it.
(process): Don't try to parse the input file, just write it out as
a string, but looking for maps. Also write out the length.
(main): Don't use "-S" to compile PTX code.
libgomp/
* oacc-ptx.h: Remove file, moving its content into...
* config/nvptx/fortran.c: ... here...
* config/nvptx/oacc-init.c: ..., here...
* config/nvptx/oacc-parallel.c: ..., and here.
* config/nvptx/openacc.f90: New file.
* plugin/plugin-nvptx.c: Don't include "oacc-ptx.h".
(link_ptx): Don't link in predefined bits of PTX code.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@228418 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libgomp')
-rw-r--r-- | libgomp/ChangeLog | 10 | ||||
-rw-r--r-- | libgomp/config/nvptx/fortran.c | 40 | ||||
-rw-r--r-- | libgomp/config/nvptx/oacc-init.c | 42 | ||||
-rw-r--r-- | libgomp/config/nvptx/oacc-parallel.c | 358 | ||||
-rw-r--r-- | libgomp/config/nvptx/openacc.f90 | 102 | ||||
-rw-r--r-- | libgomp/oacc-init.c | 6 | ||||
-rw-r--r-- | libgomp/oacc-ptx.h | 426 | ||||
-rw-r--r-- | libgomp/plugin/plugin-nvptx.c | 30 |
8 files changed, 557 insertions, 457 deletions
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index b38234b8788..191f21fd4e0 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,13 @@ +2015-10-02 Thomas Schwinge <thomas@codesourcery.com> + + * oacc-ptx.h: Remove file, moving its content into... + * config/nvptx/fortran.c: ... here... + * config/nvptx/oacc-init.c: ..., here... + * config/nvptx/oacc-parallel.c: ..., and here. + * config/nvptx/openacc.f90: New file. + * plugin/plugin-nvptx.c: Don't include "oacc-ptx.h". + (link_ptx): Don't link in predefined bits of PTX code. + 2015-09-30 Nathan Sidwell <nathan@codesourcery.com> Bernd Schmidt <bernds@codesourcery.com> diff --git a/libgomp/config/nvptx/fortran.c b/libgomp/config/nvptx/fortran.c index e69de29bb2d..58ca7905a76 100644 --- a/libgomp/config/nvptx/fortran.c +++ b/libgomp/config/nvptx/fortran.c @@ -0,0 +1,40 @@ +/* OpenACC Runtime Fortran wrapper routines + + Copyright (C) 2014-2015 Free Software Foundation, Inc. + + Contributed by Mentor Embedded. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Temporary hack; this will be provided by libgfortran. */ + +extern void _gfortran_abort (void); + +__asm__ ("// BEGIN GLOBAL FUNCTION DECL: _gfortran_abort\n" + ".visible .func _gfortran_abort;\n" + "// BEGIN GLOBAL FUNCTION DEF: _gfortran_abort\n" + ".visible .func _gfortran_abort\n" + "{\n" + "trap;\n" + "ret;\n" + "}\n"); diff --git a/libgomp/config/nvptx/oacc-init.c b/libgomp/config/nvptx/oacc-init.c index e69de29bb2d..c57a3f3104d 100644 --- a/libgomp/config/nvptx/oacc-init.c +++ b/libgomp/config/nvptx/oacc-init.c @@ -0,0 +1,42 @@ +/* OpenACC Runtime initialization routines + + Copyright (C) 2014-2015 Free Software Foundation, Inc. + + Contributed by Mentor Embedded. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "openacc.h" + +/* For -O and higher, the compiler always attempts to expand acc_on_device, but + if the user disables the builtin, or calls it via a pointer, we'll need this + version. + + Compile this with optimization, so that the compiler expands + this, rather than generating infinitely recursive code. */ + +int __attribute__ ((__optimize__ ("O2"))) +acc_on_device (acc_device_t dev) +{ + return __builtin_acc_on_device (dev); +} diff --git a/libgomp/config/nvptx/oacc-parallel.c b/libgomp/config/nvptx/oacc-parallel.c index e69de29bb2d..b9712562e61 100644 --- a/libgomp/config/nvptx/oacc-parallel.c +++ b/libgomp/config/nvptx/oacc-parallel.c @@ -0,0 +1,358 @@ +/* OpenACC constructs + + Copyright (C) 2014-2015 Free Software Foundation, Inc. + + Contributed by Mentor Embedded. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#include "libgomp_g.h" + +__asm__ (".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1);\n" + ".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1);\n" + ".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1);\n" + ".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1);\n" + "// BEGIN GLOBAL FUNCTION DECL: GOACC_get_num_threads\n" + ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads;\n" + "// BEGIN GLOBAL FUNCTION DECL: GOACC_get_thread_num\n" + ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num;\n" + "// BEGIN GLOBAL FUNCTION DECL: abort\n" + ".extern .func abort;\n" + ".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1)\n" + "{\n" + ".reg .u32 %ar1;\n" + ".reg .u32 %retval;\n" + ".reg .u64 %hr10;\n" + ".reg .u32 %r22;\n" + ".reg .u32 %r23;\n" + ".reg .u32 %r24;\n" + ".reg .u32 %r25;\n" + ".reg .u32 %r26;\n" + ".reg .u32 %r27;\n" + ".reg .u32 %r28;\n" + ".reg .u32 %r29;\n" + ".reg .pred %r30;\n" + ".reg .u32 %r31;\n" + ".reg .pred %r32;\n" + ".reg .u32 %r33;\n" + ".reg .pred %r34;\n" + ".local .align 8 .b8 %frame[4];\n" + "ld.param.u32 %ar1,[%in_ar1];\n" + "mov.u32 %r27,%ar1;\n" + "st.local.u32 [%frame],%r27;\n" + "ld.local.u32 %r28,[%frame];\n" + "mov.u32 %r29,1;\n" + "setp.eq.u32 %r30,%r28,%r29;\n" + "@%r30 bra $L4;\n" + "mov.u32 %r31,2;\n" + "setp.eq.u32 %r32,%r28,%r31;\n" + "@%r32 bra $L5;\n" + "mov.u32 %r33,0;\n" + "setp.eq.u32 %r34,%r28,%r33;\n" + "@!%r34 bra $L8;\n" + "mov.u32 %r23,%tid.x;\n" + "mov.u32 %r22,%r23;\n" + "bra $L7;\n" + "$L4:\n" + "mov.u32 %r24,%tid.y;\n" + "mov.u32 %r22,%r24;\n" + "bra $L7;\n" + "$L5:\n" + "mov.u32 %r25,%tid.z;\n" + "mov.u32 %r22,%r25;\n" + "bra $L7;\n" + "$L8:\n" + "{\n" + "{\n" + "call abort;\n" + "}\n" + "}\n" + "$L7:\n" + "mov.u32 %r26,%r22;\n" + "mov.u32 %retval,%r26;\n" + "st.param.u32 [%out_retval],%retval;\n" + "ret;\n" + "}\n" + ".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1)\n" + "{\n" + ".reg .u32 %ar1;\n" + ".reg .u32 %retval;\n" + ".reg .u64 %hr10;\n" + ".reg .u32 %r22;\n" + ".reg .u32 %r23;\n" + ".reg .u32 %r24;\n" + ".reg .u32 %r25;\n" + ".reg .u32 %r26;\n" + ".reg .u32 %r27;\n" + ".reg .u32 %r28;\n" + ".reg .u32 %r29;\n" + ".reg .pred %r30;\n" + ".reg .u32 %r31;\n" + ".reg .pred %r32;\n" + ".reg .u32 %r33;\n" + ".reg .pred %r34;\n" + ".local .align 8 .b8 %frame[4];\n" + "ld.param.u32 %ar1,[%in_ar1];\n" + "mov.u32 %r27,%ar1;\n" + "st.local.u32 [%frame],%r27;\n" + "ld.local.u32 %r28,[%frame];\n" + "mov.u32 %r29,1;\n" + "setp.eq.u32 %r30,%r28,%r29;\n" + "@%r30 bra $L11;\n" + "mov.u32 %r31,2;\n" + "setp.eq.u32 %r32,%r28,%r31;\n" + "@%r32 bra $L12;\n" + "mov.u32 %r33,0;\n" + "setp.eq.u32 %r34,%r28,%r33;\n" + "@!%r34 bra $L15;\n" + "mov.u32 %r23,%ntid.x;\n" + "mov.u32 %r22,%r23;\n" + "bra $L14;\n" + "$L11:\n" + "mov.u32 %r24,%ntid.y;\n" + "mov.u32 %r22,%r24;\n" + "bra $L14;\n" + "$L12:\n" + "mov.u32 %r25,%ntid.z;\n" + "mov.u32 %r22,%r25;\n" + "bra $L14;\n" + "$L15:\n" + "{\n" + "{\n" + "call abort;\n" + "}\n" + "}\n" + "$L14:\n" + "mov.u32 %r26,%r22;\n" + "mov.u32 %retval,%r26;\n" + "st.param.u32 [%out_retval],%retval;\n" + "ret;\n" + "}\n" + ".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1)\n" + "{\n" + ".reg .u32 %ar1;\n" + ".reg .u32 %retval;\n" + ".reg .u64 %hr10;\n" + ".reg .u32 %r22;\n" + ".reg .u32 %r23;\n" + ".reg .u32 %r24;\n" + ".reg .u32 %r25;\n" + ".reg .u32 %r26;\n" + ".reg .u32 %r27;\n" + ".reg .u32 %r28;\n" + ".reg .u32 %r29;\n" + ".reg .pred %r30;\n" + ".reg .u32 %r31;\n" + ".reg .pred %r32;\n" + ".reg .u32 %r33;\n" + ".reg .pred %r34;\n" + ".local .align 8 .b8 %frame[4];\n" + "ld.param.u32 %ar1,[%in_ar1];\n" + "mov.u32 %r27,%ar1;\n" + "st.local.u32 [%frame],%r27;\n" + "ld.local.u32 %r28,[%frame];\n" + "mov.u32 %r29,1;\n" + "setp.eq.u32 %r30,%r28,%r29;\n" + "@%r30 bra $L18;\n" + "mov.u32 %r31,2;\n" + "setp.eq.u32 %r32,%r28,%r31;\n" + "@%r32 bra $L19;\n" + "mov.u32 %r33,0;\n" + "setp.eq.u32 %r34,%r28,%r33;\n" + "@!%r34 bra $L22;\n" + "mov.u32 %r23,%ctaid.x;\n" + "mov.u32 %r22,%r23;\n" + "bra $L21;\n" + "$L18:\n" + "mov.u32 %r24,%ctaid.y;\n" + "mov.u32 %r22,%r24;\n" + "bra $L21;\n" + "$L19:\n" + "mov.u32 %r25,%ctaid.z;\n" + "mov.u32 %r22,%r25;\n" + "bra $L21;\n" + "$L22:\n" + "{\n" + "{\n" + "call abort;\n" + "}\n" + "}\n" + "$L21:\n" + "mov.u32 %r26,%r22;\n" + "mov.u32 %retval,%r26;\n" + "st.param.u32 [%out_retval],%retval;\n" + "ret;\n" + "}\n" + ".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1)\n" + "{\n" + ".reg .u32 %ar1;\n" + ".reg .u32 %retval;\n" + ".reg .u64 %hr10;\n" + ".reg .u32 %r22;\n" + ".reg .u32 %r23;\n" + ".reg .u32 %r24;\n" + ".reg .u32 %r25;\n" + ".reg .u32 %r26;\n" + ".reg .u32 %r27;\n" + ".reg .u32 %r28;\n" + ".reg .u32 %r29;\n" + ".reg .pred %r30;\n" + ".reg .u32 %r31;\n" + ".reg .pred %r32;\n" + ".reg .u32 %r33;\n" + ".reg .pred %r34;\n" + ".local .align 8 .b8 %frame[4];\n" + "ld.param.u32 %ar1,[%in_ar1];\n" + "mov.u32 %r27,%ar1;\n" + "st.local.u32 [%frame],%r27;\n" + "ld.local.u32 %r28,[%frame];\n" + "mov.u32 %r29,1;\n" + "setp.eq.u32 %r30,%r28,%r29;\n" + "@%r30 bra $L25;\n" + "mov.u32 %r31,2;\n" + "setp.eq.u32 %r32,%r28,%r31;\n" + "@%r32 bra $L26;\n" + "mov.u32 %r33,0;\n" + "setp.eq.u32 %r34,%r28,%r33;\n" + "@!%r34 bra $L29;\n" + "mov.u32 %r23,%nctaid.x;\n" + "mov.u32 %r22,%r23;\n" + "bra $L28;\n" + "$L25:\n" + "mov.u32 %r24,%nctaid.y;\n" + "mov.u32 %r22,%r24;\n" + "bra $L28;\n" + "$L26:\n" + "mov.u32 %r25,%nctaid.z;\n" + "mov.u32 %r22,%r25;\n" + "bra $L28;\n" + "$L29:\n" + "{\n" + "{\n" + "call abort;\n" + "}\n" + "}\n" + "$L28:\n" + "mov.u32 %r26,%r22;\n" + "mov.u32 %retval,%r26;\n" + "st.param.u32 [%out_retval],%retval;\n" + "ret;\n" + "}\n" + "// BEGIN GLOBAL FUNCTION DEF: GOACC_get_num_threads\n" + ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads\n" + "{\n" + ".reg .u32 %retval;\n" + ".reg .u64 %hr10;\n" + ".reg .u32 %r22;\n" + ".reg .u32 %r23;\n" + ".reg .u32 %r24;\n" + ".reg .u32 %r25;\n" + ".reg .u32 %r26;\n" + ".reg .u32 %r27;\n" + ".reg .u32 %r28;\n" + ".reg .u32 %r29;\n" + "mov.u32 %r26,0;\n" + "{\n" + ".param .u32 %retval_in;\n" + "{\n" + ".param .u32 %out_arg0;\n" + "st.param.u32 [%out_arg0],%r26;\n" + "call (%retval_in),GOACC_ntid,(%out_arg0);\n" + "}\n" + "ld.param.u32 %r27,[%retval_in];\n" + "}\n" + "mov.u32 %r22,%r27;\n" + "mov.u32 %r28,0;\n" + "{\n" + ".param .u32 %retval_in;\n" + "{\n" + ".param .u32 %out_arg0;\n" + "st.param.u32 [%out_arg0],%r28;\n" + "call (%retval_in),GOACC_nctaid,(%out_arg0);\n" + "}\n" + "ld.param.u32 %r29,[%retval_in];\n" + "}\n" + "mov.u32 %r23,%r29;\n" + "mul.lo.u32 %r24,%r22,%r23;\n" + "mov.u32 %r25,%r24;\n" + "mov.u32 %retval,%r25;\n" + "st.param.u32 [%out_retval],%retval;\n" + "ret;\n" + "}\n" + "// BEGIN GLOBAL FUNCTION DEF: GOACC_get_thread_num\n" + ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num\n" + "{\n" + ".reg .u32 %retval;\n" + ".reg .u64 %hr10;\n" + ".reg .u32 %r22;\n" + ".reg .u32 %r23;\n" + ".reg .u32 %r24;\n" + ".reg .u32 %r25;\n" + ".reg .u32 %r26;\n" + ".reg .u32 %r27;\n" + ".reg .u32 %r28;\n" + ".reg .u32 %r29;\n" + ".reg .u32 %r30;\n" + ".reg .u32 %r31;\n" + ".reg .u32 %r32;\n" + ".reg .u32 %r33;\n" + "mov.u32 %r28,0;\n" + "{\n" + ".param .u32 %retval_in;\n" + "{\n" + ".param .u32 %out_arg0;\n" + "st.param.u32 [%out_arg0],%r28;\n" + "call (%retval_in),GOACC_ntid,(%out_arg0);\n" + "}\n" + "ld.param.u32 %r29,[%retval_in];\n" + "}\n" + "mov.u32 %r22,%r29;\n" + "mov.u32 %r30,0;\n" + "{\n" + ".param .u32 %retval_in;\n" + "{\n" + ".param .u32 %out_arg0;\n" + "st.param.u32 [%out_arg0],%r30;\n" + "call (%retval_in),GOACC_ctaid,(%out_arg0);\n" + "}\n" + "ld.param.u32 %r31,[%retval_in];\n" + "}\n" + "mov.u32 %r23,%r31;\n" + "mul.lo.u32 %r24,%r22,%r23;\n" + "mov.u32 %r32,0;\n" + "{\n" + ".param .u32 %retval_in;\n" + "{\n" + ".param .u32 %out_arg0;\n" + "st.param.u32 [%out_arg0],%r32;\n" + "call (%retval_in),GOACC_tid,(%out_arg0);\n" + "}\n" + "ld.param.u32 %r33,[%retval_in];\n" + "}\n" + "mov.u32 %r25,%r33;\n" + "add.u32 %r26,%r24,%r25;\n" + "mov.u32 %r27,%r26;\n" + "mov.u32 %retval,%r27;\n" + "st.param.u32 [%out_retval],%retval;\n" + "ret;\n" + "}\n"); diff --git a/libgomp/config/nvptx/openacc.f90 b/libgomp/config/nvptx/openacc.f90 new file mode 100644 index 00000000000..d8b5c0610b2 --- /dev/null +++ b/libgomp/config/nvptx/openacc.f90 @@ -0,0 +1,102 @@ +! OpenACC Runtime Library Definitions. + +! Copyright (C) 2014-2015 Free Software Foundation, Inc. + +! Contributed by Tobias Burnus <burnus@net-b.de> +! and Mentor Embedded. + +! This file is part of the GNU Offloading and Multi Processing Library +! (libgomp). + +! Libgomp is free software; you can redistribute it and/or modify it +! under the terms of the GNU General Public License as published by +! the Free Software Foundation; either version 3, or (at your option) +! any later version. + +! Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY +! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +! FOR A PARTICULAR PURPOSE. See the GNU General Public License for +! more details. + +! Under Section 7 of GPL version 3, you are granted additional +! permissions described in the GCC Runtime Library Exception, version +! 3.1, as published by the Free Software Foundation. + +! You should have received a copy of the GNU General Public License and +! a copy of the GCC Runtime Library Exception along with this program; +! see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +! <http://www.gnu.org/licenses/>. + +! Wrapper functions will be built from openacc.f90. We use a separate file +! here, because for using ../../openacc.f90, implementations are required for +! all the functions that it wraps, which we currently don't provide, so linking +! would fail. + +module openacc_kinds + use iso_fortran_env, only: int32 + implicit none + + private :: int32 + public :: acc_device_kind + + integer, parameter :: acc_device_kind = int32 + + public :: acc_device_none, acc_device_default, acc_device_host + public :: acc_device_not_host, acc_device_nvidia + + ! Keep in sync with include/gomp-constants.h. + integer (acc_device_kind), parameter :: acc_device_none = 0 + integer (acc_device_kind), parameter :: acc_device_default = 1 + integer (acc_device_kind), parameter :: acc_device_host = 2 + ! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 removed. + integer (acc_device_kind), parameter :: acc_device_not_host = 4 + integer (acc_device_kind), parameter :: acc_device_nvidia = 5 + +end module + +module openacc_internal + use openacc_kinds + implicit none + + interface + function acc_on_device_h (d) + import + integer (acc_device_kind) d + logical acc_on_device_h + end function + end interface + + interface + function acc_on_device_l (d) & + bind (C, name = "acc_on_device") + use iso_c_binding, only: c_int + integer (c_int) :: acc_on_device_l + integer (c_int), value :: d + end function + end interface +end module + +module openacc + use openacc_kinds + use openacc_internal + implicit none + + public :: acc_on_device + + interface acc_on_device + procedure :: acc_on_device_h + end interface + +end module openacc + +function acc_on_device_h (d) + use openacc_internal, only: acc_on_device_l + use openacc_kinds + integer (acc_device_kind) d + logical acc_on_device_h + if (acc_on_device_l (d) .eq. 1) then + acc_on_device_h = .TRUE. + else + acc_on_device_h = .FALSE. + end if +end function diff --git a/libgomp/oacc-init.c b/libgomp/oacc-init.c index 28b9e7a90f1..a0e62a40cc4 100644 --- a/libgomp/oacc-init.c +++ b/libgomp/oacc-init.c @@ -620,7 +620,11 @@ acc_set_device_num (int ord, acc_device_t d) ialias (acc_set_device_num) -/* Compile on_device with optimization, so that the compiler expands +/* For -O and higher, the compiler always attempts to expand acc_on_device, but + if the user disables the builtin, or calls it via a pointer, we'll need this + version. + + Compile this with optimization, so that the compiler expands this, rather than generating infinitely recursive code. */ int __attribute__ ((__optimize__ ("O2"))) diff --git a/libgomp/oacc-ptx.h b/libgomp/oacc-ptx.h deleted file mode 100644 index 2419a460c72..00000000000 --- a/libgomp/oacc-ptx.h +++ /dev/null @@ -1,426 +0,0 @@ -/* Copyright (C) 2014-2015 Free Software Foundation, Inc. - - Contributed by Mentor Embedded. - - This file is part of the GNU Offloading and Multi Processing Library - (libgomp). - - Libgomp is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - <http://www.gnu.org/licenses/>. */ - -#define ABORT_PTX \ - ".version 3.1\n" \ - ".target sm_30\n" \ - ".address_size 64\n" \ - ".visible .func abort;\n" \ - ".visible .func abort\n" \ - "{\n" \ - "trap;\n" \ - "ret;\n" \ - "}\n" \ - ".visible .func _gfortran_abort;\n" \ - ".visible .func _gfortran_abort\n" \ - "{\n" \ - "trap;\n" \ - "ret;\n" \ - "}\n" \ - -/* Generated with: - - $ echo 'int acc_on_device(int d) { return __builtin_acc_on_device(d); } int acc_on_device_h_(int *d) { return acc_on_device(*d); }' | accel-gcc/xgcc -Baccel-gcc -x c - -o - -S -m64 -O3 -fno-builtin-acc_on_device -fno-inline -*/ -#define ACC_ON_DEVICE_PTX \ - " .version 3.1\n" \ - " .target sm_30\n" \ - " .address_size 64\n" \ - ".visible .func (.param.u32 %out_retval)acc_on_device(.param.u32 %in_ar1);\n" \ - ".visible .func (.param.u32 %out_retval)acc_on_device(.param.u32 %in_ar1)\n" \ - "{\n" \ - " .reg.u32 %ar1;\n" \ - ".reg.u32 %retval;\n" \ - " .reg.u64 %hr10;\n" \ - " .reg.u32 %r24;\n" \ - " .reg.u32 %r25;\n" \ - " .reg.pred %r27;\n" \ - " .reg.u32 %r30;\n" \ - " ld.param.u32 %ar1, [%in_ar1];\n" \ - " mov.u32 %r24, %ar1;\n" \ - " setp.ne.u32 %r27,%r24,4;\n" \ - " set.u32.eq.u32 %r30,%r24,5;\n" \ - " neg.s32 %r25, %r30;\n" \ - " @%r27 bra $L3;\n" \ - " mov.u32 %r25, 1;\n" \ - "$L3:\n" \ - " mov.u32 %retval, %r25;\n" \ - " st.param.u32 [%out_retval], %retval;\n" \ - " ret;\n" \ - " }\n" \ - ".visible .func (.param.u32 %out_retval)acc_on_device_h_(.param.u64 %in_ar1);\n" \ - ".visible .func (.param.u32 %out_retval)acc_on_device_h_(.param.u64 %in_ar1)\n" \ - "{\n" \ - " .reg.u64 %ar1;\n" \ - ".reg.u32 %retval;\n" \ - " .reg.u64 %hr10;\n" \ - " .reg.u64 %r25;\n" \ - " .reg.u32 %r26;\n" \ - " .reg.u32 %r27;\n" \ - " ld.param.u64 %ar1, [%in_ar1];\n" \ - " mov.u64 %r25, %ar1;\n" \ - " ld.u32 %r26, [%r25];\n" \ - " {\n" \ - " .param.u32 %retval_in;\n" \ - " {\n" \ - " .param.u32 %out_arg0;\n" \ - " st.param.u32 [%out_arg0], %r26;\n" \ - " call (%retval_in), acc_on_device, (%out_arg0);\n" \ - " }\n" \ - " ld.param.u32 %r27, [%retval_in];\n" \ - "}\n" \ - " mov.u32 %retval, %r27;\n" \ - " st.param.u32 [%out_retval], %retval;\n" \ - " ret;\n" \ - " }" - - #define GOACC_INTERNAL_PTX \ - ".version 3.1\n" \ - ".target sm_30\n" \ - ".address_size 64\n" \ - ".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1);\n" \ - ".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1);\n" \ - ".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1);\n" \ - ".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1);\n" \ - ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads;\n" \ - ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num;\n" \ - ".extern .func abort;\n" \ - ".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1)\n" \ - "{\n" \ - ".reg .u32 %ar1;\n" \ - ".reg .u32 %retval;\n" \ - ".reg .u64 %hr10;\n" \ - ".reg .u32 %r22;\n" \ - ".reg .u32 %r23;\n" \ - ".reg .u32 %r24;\n" \ - ".reg .u32 %r25;\n" \ - ".reg .u32 %r26;\n" \ - ".reg .u32 %r27;\n" \ - ".reg .u32 %r28;\n" \ - ".reg .u32 %r29;\n" \ - ".reg .pred %r30;\n" \ - ".reg .u32 %r31;\n" \ - ".reg .pred %r32;\n" \ - ".reg .u32 %r33;\n" \ - ".reg .pred %r34;\n" \ - ".local .align 8 .b8 %frame[4];\n" \ - "ld.param.u32 %ar1,[%in_ar1];\n" \ - "mov.u32 %r27,%ar1;\n" \ - "st.local.u32 [%frame],%r27;\n" \ - "ld.local.u32 %r28,[%frame];\n" \ - "mov.u32 %r29,1;\n" \ - "setp.eq.u32 %r30,%r28,%r29;\n" \ - "@%r30 bra $L4;\n" \ - "mov.u32 %r31,2;\n" \ - "setp.eq.u32 %r32,%r28,%r31;\n" \ - "@%r32 bra $L5;\n" \ - "mov.u32 %r33,0;\n" \ - "setp.eq.u32 %r34,%r28,%r33;\n" \ - "@!%r34 bra $L8;\n" \ - "mov.u32 %r23,%tid.x;\n" \ - "mov.u32 %r22,%r23;\n" \ - "bra $L7;\n" \ - "$L4:\n" \ - "mov.u32 %r24,%tid.y;\n" \ - "mov.u32 %r22,%r24;\n" \ - "bra $L7;\n" \ - "$L5:\n" \ - "mov.u32 %r25,%tid.z;\n" \ - "mov.u32 %r22,%r25;\n" \ - "bra $L7;\n" \ - "$L8:\n" \ - "{\n" \ - "{\n" \ - "call abort;\n" \ - "}\n" \ - "}\n" \ - "$L7:\n" \ - "mov.u32 %r26,%r22;\n" \ - "mov.u32 %retval,%r26;\n" \ - "st.param.u32 [%out_retval],%retval;\n" \ - "ret;\n" \ - "}\n" \ - ".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1)\n" \ - "{\n" \ - ".reg .u32 %ar1;\n" \ - ".reg .u32 %retval;\n" \ - ".reg .u64 %hr10;\n" \ - ".reg .u32 %r22;\n" \ - ".reg .u32 %r23;\n" \ - ".reg .u32 %r24;\n" \ - ".reg .u32 %r25;\n" \ - ".reg .u32 %r26;\n" \ - ".reg .u32 %r27;\n" \ - ".reg .u32 %r28;\n" \ - ".reg .u32 %r29;\n" \ - ".reg .pred %r30;\n" \ - ".reg .u32 %r31;\n" \ - ".reg .pred %r32;\n" \ - ".reg .u32 %r33;\n" \ - ".reg .pred %r34;\n" \ - ".local .align 8 .b8 %frame[4];\n" \ - "ld.param.u32 %ar1,[%in_ar1];\n" \ - "mov.u32 %r27,%ar1;\n" \ - "st.local.u32 [%frame],%r27;\n" \ - "ld.local.u32 %r28,[%frame];\n" \ - "mov.u32 %r29,1;\n" \ - "setp.eq.u32 %r30,%r28,%r29;\n" \ - "@%r30 bra $L11;\n" \ - "mov.u32 %r31,2;\n" \ - "setp.eq.u32 %r32,%r28,%r31;\n" \ - "@%r32 bra $L12;\n" \ - "mov.u32 %r33,0;\n" \ - "setp.eq.u32 %r34,%r28,%r33;\n" \ - "@!%r34 bra $L15;\n" \ - "mov.u32 %r23,%ntid.x;\n" \ - "mov.u32 %r22,%r23;\n" \ - "bra $L14;\n" \ - "$L11:\n" \ - "mov.u32 %r24,%ntid.y;\n" \ - "mov.u32 %r22,%r24;\n" \ - "bra $L14;\n" \ - "$L12:\n" \ - "mov.u32 %r25,%ntid.z;\n" \ - "mov.u32 %r22,%r25;\n" \ - "bra $L14;\n" \ - "$L15:\n" \ - "{\n" \ - "{\n" \ - "call abort;\n" \ - "}\n" \ - "}\n" \ - "$L14:\n" \ - "mov.u32 %r26,%r22;\n" \ - "mov.u32 %retval,%r26;\n" \ - "st.param.u32 [%out_retval],%retval;\n" \ - "ret;\n" \ - "}\n" \ - ".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1)\n" \ - "{\n" \ - ".reg .u32 %ar1;\n" \ - ".reg .u32 %retval;\n" \ - ".reg .u64 %hr10;\n" \ - ".reg .u32 %r22;\n" \ - ".reg .u32 %r23;\n" \ - ".reg .u32 %r24;\n" \ - ".reg .u32 %r25;\n" \ - ".reg .u32 %r26;\n" \ - ".reg .u32 %r27;\n" \ - ".reg .u32 %r28;\n" \ - ".reg .u32 %r29;\n" \ - ".reg .pred %r30;\n" \ - ".reg .u32 %r31;\n" \ - ".reg .pred %r32;\n" \ - ".reg .u32 %r33;\n" \ - ".reg .pred %r34;\n" \ - ".local .align 8 .b8 %frame[4];\n" \ - "ld.param.u32 %ar1,[%in_ar1];\n" \ - "mov.u32 %r27,%ar1;\n" \ - "st.local.u32 [%frame],%r27;\n" \ - "ld.local.u32 %r28,[%frame];\n" \ - "mov.u32 %r29,1;\n" \ - "setp.eq.u32 %r30,%r28,%r29;\n" \ - "@%r30 bra $L18;\n" \ - "mov.u32 %r31,2;\n" \ - "setp.eq.u32 %r32,%r28,%r31;\n" \ - "@%r32 bra $L19;\n" \ - "mov.u32 %r33,0;\n" \ - "setp.eq.u32 %r34,%r28,%r33;\n" \ - "@!%r34 bra $L22;\n" \ - "mov.u32 %r23,%ctaid.x;\n" \ - "mov.u32 %r22,%r23;\n" \ - "bra $L21;\n" \ - "$L18:\n" \ - "mov.u32 %r24,%ctaid.y;\n" \ - "mov.u32 %r22,%r24;\n" \ - "bra $L21;\n" \ - "$L19:\n" \ - "mov.u32 %r25,%ctaid.z;\n" \ - "mov.u32 %r22,%r25;\n" \ - "bra $L21;\n" \ - "$L22:\n" \ - "{\n" \ - "{\n" \ - "call abort;\n" \ - "}\n" \ - "}\n" \ - "$L21:\n" \ - "mov.u32 %r26,%r22;\n" \ - "mov.u32 %retval,%r26;\n" \ - "st.param.u32 [%out_retval],%retval;\n" \ - "ret;\n" \ - "}\n" \ - ".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1)\n" \ - "{\n" \ - ".reg .u32 %ar1;\n" \ - ".reg .u32 %retval;\n" \ - ".reg .u64 %hr10;\n" \ - ".reg .u32 %r22;\n" \ - ".reg .u32 %r23;\n" \ - ".reg .u32 %r24;\n" \ - ".reg .u32 %r25;\n" \ - ".reg .u32 %r26;\n" \ - ".reg .u32 %r27;\n" \ - ".reg .u32 %r28;\n" \ - ".reg .u32 %r29;\n" \ - ".reg .pred %r30;\n" \ - ".reg .u32 %r31;\n" \ - ".reg .pred %r32;\n" \ - ".reg .u32 %r33;\n" \ - ".reg .pred %r34;\n" \ - ".local .align 8 .b8 %frame[4];\n" \ - "ld.param.u32 %ar1,[%in_ar1];\n" \ - "mov.u32 %r27,%ar1;\n" \ - "st.local.u32 [%frame],%r27;\n" \ - "ld.local.u32 %r28,[%frame];\n" \ - "mov.u32 %r29,1;\n" \ - "setp.eq.u32 %r30,%r28,%r29;\n" \ - "@%r30 bra $L25;\n" \ - "mov.u32 %r31,2;\n" \ - "setp.eq.u32 %r32,%r28,%r31;\n" \ - "@%r32 bra $L26;\n" \ - "mov.u32 %r33,0;\n" \ - "setp.eq.u32 %r34,%r28,%r33;\n" \ - "@!%r34 bra $L29;\n" \ - "mov.u32 %r23,%nctaid.x;\n" \ - "mov.u32 %r22,%r23;\n" \ - "bra $L28;\n" \ - "$L25:\n" \ - "mov.u32 %r24,%nctaid.y;\n" \ - "mov.u32 %r22,%r24;\n" \ - "bra $L28;\n" \ - "$L26:\n" \ - "mov.u32 %r25,%nctaid.z;\n" \ - "mov.u32 %r22,%r25;\n" \ - "bra $L28;\n" \ - "$L29:\n" \ - "{\n" \ - "{\n" \ - "call abort;\n" \ - "}\n" \ - "}\n" \ - "$L28:\n" \ - "mov.u32 %r26,%r22;\n" \ - "mov.u32 %retval,%r26;\n" \ - "st.param.u32 [%out_retval],%retval;\n" \ - "ret;\n" \ - "}\n" \ - ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads\n" \ - "{\n" \ - ".reg .u32 %retval;\n" \ - ".reg .u64 %hr10;\n" \ - ".reg .u32 %r22;\n" \ - ".reg .u32 %r23;\n" \ - ".reg .u32 %r24;\n" \ - ".reg .u32 %r25;\n" \ - ".reg .u32 %r26;\n" \ - ".reg .u32 %r27;\n" \ - ".reg .u32 %r28;\n" \ - ".reg .u32 %r29;\n" \ - "mov.u32 %r26,0;\n" \ - "{\n" \ - ".param .u32 %retval_in;\n" \ - "{\n" \ - ".param .u32 %out_arg0;\n" \ - "st.param.u32 [%out_arg0],%r26;\n" \ - "call (%retval_in),GOACC_ntid,(%out_arg0);\n" \ - "}\n" \ - "ld.param.u32 %r27,[%retval_in];\n" \ - "}\n" \ - "mov.u32 %r22,%r27;\n" \ - "mov.u32 %r28,0;\n" \ - "{\n" \ - ".param .u32 %retval_in;\n" \ - "{\n" \ - ".param .u32 %out_arg0;\n" \ - "st.param.u32 [%out_arg0],%r28;\n" \ - "call (%retval_in),GOACC_nctaid,(%out_arg0);\n" \ - "}\n" \ - "ld.param.u32 %r29,[%retval_in];\n" \ - "}\n" \ - "mov.u32 %r23,%r29;\n" \ - "mul.lo.u32 %r24,%r22,%r23;\n" \ - "mov.u32 %r25,%r24;\n" \ - "mov.u32 %retval,%r25;\n" \ - "st.param.u32 [%out_retval],%retval;\n" \ - "ret;\n" \ - "}\n" \ - ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num\n" \ - "{\n" \ - ".reg .u32 %retval;\n" \ - ".reg .u64 %hr10;\n" \ - ".reg .u32 %r22;\n" \ - ".reg .u32 %r23;\n" \ - ".reg .u32 %r24;\n" \ - ".reg .u32 %r25;\n" \ - ".reg .u32 %r26;\n" \ - ".reg .u32 %r27;\n" \ - ".reg .u32 %r28;\n" \ - ".reg .u32 %r29;\n" \ - ".reg .u32 %r30;\n" \ - ".reg .u32 %r31;\n" \ - ".reg .u32 %r32;\n" \ - ".reg .u32 %r33;\n" \ - "mov.u32 %r28,0;\n" \ - "{\n" \ - ".param .u32 %retval_in;\n" \ - "{\n" \ - ".param .u32 %out_arg0;\n" \ - "st.param.u32 [%out_arg0],%r28;\n" \ - "call (%retval_in),GOACC_ntid,(%out_arg0);\n" \ - "}\n" \ - "ld.param.u32 %r29,[%retval_in];\n" \ - "}\n" \ - "mov.u32 %r22,%r29;\n" \ - "mov.u32 %r30,0;\n" \ - "{\n" \ - ".param .u32 %retval_in;\n" \ - "{\n" \ - ".param .u32 %out_arg0;\n" \ - "st.param.u32 [%out_arg0],%r30;\n" \ - "call (%retval_in),GOACC_ctaid,(%out_arg0);\n" \ - "}\n" \ - "ld.param.u32 %r31,[%retval_in];\n" \ - "}\n" \ - "mov.u32 %r23,%r31;\n" \ - "mul.lo.u32 %r24,%r22,%r23;\n" \ - "mov.u32 %r32,0;\n" \ - "{\n" \ - ".param .u32 %retval_in;\n" \ - "{\n" \ - ".param .u32 %out_arg0;\n" \ - "st.param.u32 [%out_arg0],%r32;\n" \ - "call (%retval_in),GOACC_tid,(%out_arg0);\n" \ - "}\n" \ - "ld.param.u32 %r33,[%retval_in];\n" \ - "}\n" \ - "mov.u32 %r25,%r33;\n" \ - "add.u32 %r26,%r24,%r25;\n" \ - "mov.u32 %r27,%r26;\n" \ - "mov.u32 %retval,%r27;\n" \ - "st.param.u32 [%out_retval],%retval;\n" \ - "ret;\n" \ - "}\n" diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index cedcc592bf0..9b846378a11 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -34,7 +34,6 @@ #include "openacc.h" #include "config.h" #include "libgomp-plugin.h" -#include "oacc-ptx.h" #include "oacc-plugin.h" #include "gomp-constants.h" @@ -750,35 +749,6 @@ link_ptx (CUmodule *module, const struct targ_ptx_obj *ptx_objs, if (r != CUDA_SUCCESS) GOMP_PLUGIN_fatal ("cuLinkCreate error: %s", cuda_error (r)); - char *abort_ptx = ABORT_PTX; - r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, abort_ptx, - strlen (abort_ptx) + 1, 0, 0, 0, 0); - if (r != CUDA_SUCCESS) - { - GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]); - GOMP_PLUGIN_fatal ("cuLinkAddData (abort) error: %s", cuda_error (r)); - } - - char *acc_on_device_ptx = ACC_ON_DEVICE_PTX; - r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, acc_on_device_ptx, - strlen (acc_on_device_ptx) + 1, 0, 0, 0, 0); - if (r != CUDA_SUCCESS) - { - GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]); - GOMP_PLUGIN_fatal ("cuLinkAddData (acc_on_device) error: %s", - cuda_error (r)); - } - - char *goacc_internal_ptx = GOACC_INTERNAL_PTX; - r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, goacc_internal_ptx, - strlen (goacc_internal_ptx) + 1, 0, 0, 0, 0); - if (r != CUDA_SUCCESS) - { - GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]); - GOMP_PLUGIN_fatal ("cuLinkAddData (goacc_internal_ptx) error: %s", - cuda_error (r)); - } - for (; num_objs--; ptx_objs++) { /* cuLinkAddData's 'data' argument erroneously omits the const |