3840d5ccf7
..., and enable if 'flock' is available for serializing execution testing.
Regarding the default of 19 parallel slots, this turned out to be a local
minimum for wall time when testing this on:
$ uname -srvi
Linux 4.2.0-42-generic #49~14.04.1-Ubuntu SMP Wed Jun 29 20:22:11 UTC 2016 x86_64
$ grep '^model name' < /proc/cpuinfo | uniq -c
32 model name : Intel(R) Xeon(R) CPU E5-2640 v3 @ 2.60GHz
... in two configurations: case (a) standard configuration, no offloading
configured, case (b) offloading for GCN and nvptx configured but no devices
available. For both cases, default plus '-m32' variant.
$ \time make check-target-libgomp RUNTESTFLAGS="--target_board=unix\{,-m32\}"
Case (a), baseline:
6432.23user 332.38system 47:32.28elapsed 237%CPU (0avgtext+0avgdata 505044maxresident)k
6382.43user 319.21system 47:06.04elapsed 237%CPU (0avgtext+0avgdata 505172maxresident)k
This is what people have been complaining about, rightly so, in
<https://gcc.gnu.org/PR66005> "libgomp make check time is excessive" and
elsewhere.
Case (a), parallelized:
-j12 GCC_TEST_PARALLEL_SLOTS=10
3088.49user 267.74system 6:43.82elapsed 831%CPU (0avgtext+0avgdata 505188maxresident)k
-j15 GCC_TEST_PARALLEL_SLOTS=15
3308.08user 294.79system 5:56.04elapsed 1011%CPU (0avgtext+0avgdata 505360maxresident)k
-j17 GCC_TEST_PARALLEL_SLOTS=17
3539.93user 298.99system 5:27.86elapsed 1170%CPU (0avgtext+0avgdata 505112maxresident)k
-j18 GCC_TEST_PARALLEL_SLOTS=18
3697.50user 317.18system 5:14.63elapsed 1275%CPU (0avgtext+0avgdata 505360maxresident)k
-j19 GCC_TEST_PARALLEL_SLOTS=19
3765.94user 324.27system 5:13.22elapsed 1305%CPU (0avgtext+0avgdata 505128maxresident)k
-j20 GCC_TEST_PARALLEL_SLOTS=20
3684.66user 312.32system 5:15.26elapsed 1267%CPU (0avgtext+0avgdata 505100maxresident)k
-j23 GCC_TEST_PARALLEL_SLOTS=23
4040.59user 347.10system 5:29.12elapsed 1333%CPU (0avgtext+0avgdata 505200maxresident)k
-j26 GCC_TEST_PARALLEL_SLOTS=26
3973.24user 377.96system 5:24.70elapsed 1340%CPU (0avgtext+0avgdata 505160maxresident)k
-j32 GCC_TEST_PARALLEL_SLOTS=32
4004.42user 346.10system 5:16.11elapsed 1376%CPU (0avgtext+0avgdata 505160maxresident)k
Yay!
Case (b), baseline; 2+ h:
7227.58user 700.54system 2:14:33elapsed 98%CPU (0avgtext+0avgdata 994264maxresident)k
Case (b), parallelized:
-j12 GCC_TEST_PARALLEL_SLOTS=10
7377.46user 777.52system 16:06.63elapsed 843%CPU (0avgtext+0avgdata 994344maxresident)k
-j15 GCC_TEST_PARALLEL_SLOTS=15
8019.18user 721.42system 12:13.56elapsed 1191%CPU (0avgtext+0avgdata 994228maxresident)k
-j17 GCC_TEST_PARALLEL_SLOTS=17
8530.11user 716.95system 10:45.92elapsed 1431%CPU (0avgtext+0avgdata 994176maxresident)k
-j18 GCC_TEST_PARALLEL_SLOTS=18
8776.79user 645.89system 10:27.20elapsed 1502%CPU (0avgtext+0avgdata 994248maxresident)k
-j19 GCC_TEST_PARALLEL_SLOTS=19
9332.37user 641.76system 10:15.09elapsed 1621%CPU (0avgtext+0avgdata 994260maxresident)k
-j20 GCC_TEST_PARALLEL_SLOTS=20
9609.54user 789.88system 10:26.94elapsed 1658%CPU (0avgtext+0avgdata 994284maxresident)k
-j23 GCC_TEST_PARALLEL_SLOTS=23
10362.40user 911.14system 10:44.47elapsed 1749%CPU (0avgtext+0avgdata 994208maxresident)k
-j26 GCC_TEST_PARALLEL_SLOTS=26
11159.44user 850.99system 11:09.25elapsed 1794%CPU (0avgtext+0avgdata 994256maxresident)k
-j32 GCC_TEST_PARALLEL_SLOTS=32
11453.50user 939.52system 11:00.38elapsed 1876%CPU (0avgtext+0avgdata 994240maxresident)k
On my Dell Precision 7530 laptop:
$ uname -srvi
Linux 5.15.0-71-generic #78-Ubuntu SMP Tue Apr 18 09:00:29 UTC 2023 x86_64
$ grep '^model name' < /proc/cpuinfo | uniq -c
12 model name : Intel(R) Core(TM) i7-8850H CPU @ 2.60GHz
$ nvidia-smi -L
GPU 0: Quadro P1000 (UUID: GPU-e043973b-b52a-d02b-c066-a8fdbf64e8ea)
... in two configurations: case (c) standard configuration, no offloading
configured, case (d) offloading for nvptx configured and device available.
For both cases, only default variant, no '-m32'.
$ \time make check-target-libgomp
Case (c), baseline; roughly half of case (a) (just one variant):
1180.98user 110.80system 19:36.40elapsed 109%CPU (0avgtext+0avgdata 505148maxresident)k
1133.22user 111.08system 19:35.75elapsed 105%CPU (0avgtext+0avgdata 505212maxresident)k
Case (c), parallelized:
-j12 GCC_TEST_PARALLEL_SLOTS=2
1143.83user 110.76system 10:20.46elapsed 202%CPU (0avgtext+0avgdata 505216maxresident)k
-j12 GCC_TEST_PARALLEL_SLOTS=6
1737.08user 143.94system 4:59.48elapsed 628%CPU (0avgtext+0avgdata 505200maxresident)k
1730.31user 143.02system 4:58.75elapsed 627%CPU (0avgtext+0avgdata 505152maxresident)k
-j12 GCC_TEST_PARALLEL_SLOTS=8
2192.63user 169.34system 4:52.96elapsed 806%CPU (0avgtext+0avgdata 505216maxresident)k
2219.04user 167.67system 4:53.19elapsed 814%CPU (0avgtext+0avgdata 505152maxresident)k
-j12 GCC_TEST_PARALLEL_SLOTS=10
2463.93user 184.98system 4:48.39elapsed 918%CPU (0avgtext+0avgdata 505200maxresident)k
2455.62user 183.68system 4:47.40elapsed 918%CPU (0avgtext+0avgdata 505216maxresident)k
-j12 GCC_TEST_PARALLEL_SLOTS=12
2591.04user 192.64system 4:44.98elapsed 976%CPU (0avgtext+0avgdata 505216maxresident)k
2581.23user 195.21system 4:47.51elapsed 965%CPU (0avgtext+0avgdata 505212maxresident)k
-j20 GCC_TEST_PARALLEL_SLOTS=20 [oversubscribe]
2613.18user 199.51system 4:44.06elapsed 990%CPU (0avgtext+0avgdata 505216maxresident)k
Case (d), baseline (compared to case (b): only nvptx offloading compilation,
but also nvptx offloading execution); ~1 h:
2841.93user 653.68system 1:02:26elapsed 93%CPU (0avgtext+0avgdata 909792maxresident)k
2842.03user 654.39system 1:02:24elapsed 93%CPU (0avgtext+0avgdata 909880maxresident)k
Case (d), parallelized:
-j12 GCC_TEST_PARALLEL_SLOTS=2
2856.39user 606.87system 33:58.64elapsed 169%CPU (0avgtext+0avgdata 909948maxresident)k
-j12 GCC_TEST_PARALLEL_SLOTS=6
3444.90user 666.86system 18:37.57elapsed 367%CPU (0avgtext+0avgdata 909856maxresident)k
3462.13user 667.13system 18:36.87elapsed 369%CPU (0avgtext+0avgdata 909872maxresident)k
-j12 GCC_TEST_PARALLEL_SLOTS=8
3929.74user 716.22system 18:02.36elapsed 429%CPU (0avgtext+0avgdata 909832maxresident)k
-j12 GCC_TEST_PARALLEL_SLOTS=10
4152.84user 736.16system 17:43.05elapsed 459%CPU (0avgtext+0avgdata 909872maxresident)k
-j12 GCC_TEST_PARALLEL_SLOTS=12
4209.60user 749.00system 17:35.20elapsed 469%CPU (0avgtext+0avgdata 909840maxresident)k
-j20 GCC_TEST_PARALLEL_SLOTS=20 [oversubscribe]
4255.54user 756.78system 17:29.06elapsed 477%CPU (0avgtext+0avgdata 909868maxresident)k
Worth noting is that with nvptx offloading, there is one execution test case
that times out ('libgomp.fortran/reverse-offload-5.f90'). This effectively
stalls progress for almost 5 min: quickly other executions test cases queue up
on the lock for all parallel slots. That's working as expected; just noting
this as it accordingly does skew the wall time numbers.
PR testsuite/66005
libgomp/
* configure.ac: Look for 'flock'.
* testsuite/Makefile.am (gcc_test_parallel_slots): Enable parallel testing.
* testsuite/config/default.exp: Don't 'load_lib "standard.exp"' here...
* testsuite/lib/libgomp.exp: ... but here, instead.
(libgomp_load): Override for parallel testing.
* testsuite/libgomp-site-extra.exp.in (FLOCK): Set.
* configure: Regenerate.
* Makefile.in: Regenerate.
* testsuite/Makefile.in: Regenerate.
(cherry picked from commit 6c3b30ef9e0578509bdaf59c13da4a212fe6c2ba)
554 lines
16 KiB
Plaintext
554 lines
16 KiB
Plaintext
# Damn dejagnu for not having proper library search paths for load_lib.
|
|
# We have to explicitly load everything that gcc-dg.exp wants to load.
|
|
|
|
proc load_gcc_lib { filename } {
|
|
global srcdir loaded_libs
|
|
|
|
load_file $srcdir/../../gcc/testsuite/lib/$filename
|
|
set loaded_libs($filename) ""
|
|
}
|
|
|
|
load_lib dg.exp
|
|
load_lib standard.exp
|
|
|
|
# Required to use gcc-dg.exp - however, the latter should NOT be
|
|
# loaded until ${tool}_target_compile is defined since it uses that
|
|
# to determine default LTO options.
|
|
|
|
load_gcc_lib multiline.exp
|
|
load_gcc_lib prune.exp
|
|
load_gcc_lib target-libpath.exp
|
|
load_gcc_lib wrapper.exp
|
|
load_gcc_lib target-supports.exp
|
|
load_gcc_lib target-utils.exp
|
|
load_gcc_lib gcc-defs.exp
|
|
load_gcc_lib timeout.exp
|
|
load_gcc_lib file-format.exp
|
|
load_gcc_lib target-supports-dg.exp
|
|
load_gcc_lib scanasm.exp
|
|
load_gcc_lib scandump.exp
|
|
load_gcc_lib scanlang.exp
|
|
load_gcc_lib scanrtl.exp
|
|
load_gcc_lib scansarif.exp
|
|
load_gcc_lib scantree.exp
|
|
load_gcc_lib scanltranstree.exp
|
|
load_gcc_lib scanoffload.exp
|
|
load_gcc_lib scanoffloadipa.exp
|
|
load_gcc_lib scanoffloadtree.exp
|
|
load_gcc_lib scanoffloadrtl.exp
|
|
load_gcc_lib scanipa.exp
|
|
load_gcc_lib scanwpaipa.exp
|
|
load_gcc_lib timeout-dg.exp
|
|
load_gcc_lib torture-options.exp
|
|
load_gcc_lib fortran-modules.exp
|
|
|
|
# Try to load a test support file, built during libgomp configuration.
|
|
# Search in '..' vs. '.' to support parallel vs. sequential testing.
|
|
if [info exists ::env(GCC_RUNTEST_PARALLELIZE_DIR)] {
|
|
load_file ../libgomp-test-support.exp
|
|
} else {
|
|
load_file libgomp-test-support.exp
|
|
}
|
|
|
|
set dg-do-what-default run
|
|
|
|
#
|
|
# GCC_UNDER_TEST is the compiler under test.
|
|
#
|
|
|
|
set libgomp_compile_options ""
|
|
|
|
#
|
|
# libgomp_init
|
|
#
|
|
|
|
if [info exists TOOL_OPTIONS] {
|
|
set multilibs [get_multilibs $TOOL_OPTIONS]
|
|
} else {
|
|
set multilibs [get_multilibs]
|
|
}
|
|
|
|
proc libgomp_init { args } {
|
|
global srcdir blddir objdir tool_root_dir
|
|
global libgomp_initialized
|
|
global tmpdir
|
|
global blddir
|
|
global gluefile wrap_flags
|
|
global ALWAYS_CFLAGS
|
|
global CFLAGS
|
|
global TOOL_EXECUTABLE TOOL_OPTIONS
|
|
global GCC_UNDER_TEST
|
|
global TESTING_IN_BUILD_TREE
|
|
global target_triplet
|
|
global always_ld_library_path
|
|
|
|
set blddir [lookfor_file [get_multilibs] libgomp]
|
|
|
|
# We set LC_ALL and LANG to C so that we get the same error
|
|
# messages as expected.
|
|
setenv LC_ALL C
|
|
setenv LANG C
|
|
|
|
# Many hosts now default to a non-ASCII C locale, however, so
|
|
# they can set a charset encoding here if they need.
|
|
if { [ishost "*-*-cygwin*"] } {
|
|
setenv LC_ALL C.ASCII
|
|
setenv LANG C.ASCII
|
|
}
|
|
|
|
if ![info exists GCC_UNDER_TEST] then {
|
|
if [info exists TOOL_EXECUTABLE] {
|
|
set GCC_UNDER_TEST $TOOL_EXECUTABLE
|
|
} else {
|
|
set GCC_UNDER_TEST "[find_gcc]"
|
|
}
|
|
}
|
|
|
|
if ![info exists tmpdir] {
|
|
set tmpdir "/tmp"
|
|
}
|
|
|
|
if [info exists gluefile] {
|
|
unset gluefile
|
|
}
|
|
|
|
if {![info exists CFLAGS]} {
|
|
set CFLAGS ""
|
|
}
|
|
|
|
# Locate libgcc.a so we don't need to account for different values of
|
|
# SHLIB_EXT on different platforms
|
|
set gccdir [lookfor_file $tool_root_dir gcc/libgcc.a]
|
|
if {$gccdir != ""} {
|
|
set gccdir [file dirname $gccdir]
|
|
}
|
|
|
|
# Compute what needs to be put into LD_LIBRARY_PATH
|
|
set always_ld_library_path ".:${blddir}/.libs"
|
|
|
|
global offload_additional_lib_paths
|
|
if { $offload_additional_lib_paths != "" } {
|
|
append always_ld_library_path "${offload_additional_lib_paths}"
|
|
}
|
|
|
|
# Compute what needs to be added to the existing LD_LIBRARY_PATH.
|
|
if {$gccdir != ""} {
|
|
# Add AIX pthread directory first.
|
|
if { [llength [glob -nocomplain ${gccdir}/pthread/libgcc_s*.a]] >= 1 } {
|
|
append always_ld_library_path ":${gccdir}/pthread"
|
|
}
|
|
append always_ld_library_path ":${gccdir}"
|
|
set compiler [lindex $GCC_UNDER_TEST 0]
|
|
|
|
if { [is_remote host] == 0 && [which $compiler] != 0 } {
|
|
foreach i "[exec $compiler --print-multi-lib]" {
|
|
set mldir ""
|
|
regexp -- "\[a-z0-9=_/\.-\]*;" $i mldir
|
|
set mldir [string trimright $mldir "\;@"]
|
|
if { "$mldir" == "." } {
|
|
continue
|
|
}
|
|
if { [llength [glob -nocomplain ${gccdir}/${mldir}/libgcc_s*.so.*]] >= 1 } {
|
|
append always_ld_library_path ":${gccdir}/${mldir}"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
set ALWAYS_CFLAGS ""
|
|
if { $blddir != "" } {
|
|
lappend ALWAYS_CFLAGS "additional_flags=-B${blddir}/"
|
|
# targets that use libgomp.a%s in their specs need a -B option
|
|
# for uninstalled testing.
|
|
lappend ALWAYS_CFLAGS "additional_flags=-B${blddir}/.libs"
|
|
lappend ALWAYS_CFLAGS "additional_flags=-I${blddir}"
|
|
lappend ALWAYS_CFLAGS "ldflags=-L${blddir}/.libs"
|
|
}
|
|
# The top-level include directory, for gomp-constants.h.
|
|
lappend ALWAYS_CFLAGS "additional_flags=-I${srcdir}/../../include"
|
|
lappend ALWAYS_CFLAGS "additional_flags=-I${srcdir}/.."
|
|
|
|
# For build-tree testing, also consider the library paths used for builing.
|
|
# For installed testing, we assume all that to be provided in the sysroot.
|
|
if { $blddir != "" } {
|
|
# The `-fopenacc' and `-fopenmp' options imply `-pthread', and
|
|
# that implies `-latomic' on some hosts, so wire in libatomic
|
|
# build directories.
|
|
if [ishost "riscv*-*-linux*"] {
|
|
set shlib_ext [get_shlib_extension]
|
|
set atomic_library_path "${blddir}/../libatomic/.libs"
|
|
if { [file exists "${atomic_library_path}/libatomic.a"]
|
|
|| [file exists \
|
|
"${atomic_library_path}/libatomic.${shlib_ext}"] } {
|
|
lappend ALWAYS_CFLAGS \
|
|
"additional_flags=-L${atomic_library_path}"
|
|
append always_ld_library_path ":${atomic_library_path}"
|
|
}
|
|
}
|
|
}
|
|
|
|
# We use atomic operations in the testcases to validate results.
|
|
if { ([istarget i?86-*-*] || [istarget x86_64-*-*])
|
|
&& [check_effective_target_ia32]
|
|
&& ![check_effective_target_cas_char] } {
|
|
lappend ALWAYS_CFLAGS "additional_flags=-march=i486"
|
|
}
|
|
|
|
if [istarget *-*-darwin*] {
|
|
lappend ALWAYS_CFLAGS "additional_flags=-shared-libgcc"
|
|
}
|
|
|
|
if [istarget sparc*-*-*] {
|
|
lappend ALWAYS_CFLAGS "additional_flags=-mcpu=v9"
|
|
}
|
|
|
|
if [info exists TOOL_OPTIONS] {
|
|
lappend ALWAYS_CFLAGS "additional_flags=$TOOL_OPTIONS"
|
|
}
|
|
|
|
# Make sure that lines are not wrapped. That can confuse the
|
|
# error-message parsing machinery.
|
|
lappend ALWAYS_CFLAGS "additional_flags=-fmessage-length=0"
|
|
|
|
# Disable caret
|
|
lappend ALWAYS_CFLAGS "additional_flags=-fno-diagnostics-show-caret"
|
|
|
|
# Disable color diagnostics
|
|
lappend ALWAYS_CFLAGS "additional_flags=-fdiagnostics-color=never"
|
|
|
|
# Help GCC to find offload compilers' 'mkoffload'.
|
|
global offload_additional_options
|
|
if { $offload_additional_options != "" } {
|
|
lappend ALWAYS_CFLAGS "additional_flags=${offload_additional_options}"
|
|
}
|
|
|
|
# Tell warning from error diagnostics. This fits for C, C++, and Fortran.
|
|
global gcc_warning_prefix
|
|
set gcc_warning_prefix "\[Ww\]arning:"
|
|
global gcc_error_prefix
|
|
set gcc_error_prefix "(\[Ff\]atal )?\[Ee\]rror:"
|
|
}
|
|
|
|
#
|
|
# libgomp_target_compile -- compile a source file
|
|
#
|
|
|
|
proc libgomp_target_compile { source dest type options } {
|
|
global blddir
|
|
global libgomp_compile_options
|
|
global gluefile wrap_flags
|
|
global ALWAYS_CFLAGS
|
|
global GCC_UNDER_TEST
|
|
global lang_test_file
|
|
global lang_library_path
|
|
global lang_link_flags
|
|
global lang_include_flags
|
|
global lang_source_re
|
|
|
|
if { [info exists lang_test_file] } {
|
|
if { $blddir != "" } {
|
|
# Some targets use libgfortran.a%s in their specs, so they need
|
|
# a -B option for uninstalled testing.
|
|
lappend options "additional_flags=-B${blddir}/${lang_library_path}"
|
|
lappend options "ldflags=-L${blddir}/${lang_library_path}"
|
|
}
|
|
lappend options "ldflags=${lang_link_flags}"
|
|
if { [info exists lang_include_flags] \
|
|
&& [regexp ${lang_source_re} ${source}] } {
|
|
lappend options "additional_flags=${lang_include_flags}"
|
|
}
|
|
}
|
|
|
|
if { [target_info needs_status_wrapper] != "" && [info exists gluefile] } {
|
|
lappend options "libs=${gluefile}"
|
|
lappend options "ldflags=${wrap_flags}"
|
|
}
|
|
|
|
lappend options "additional_flags=[libio_include_flags]"
|
|
lappend options "timeout=[timeout_value]"
|
|
lappend options "compiler=$GCC_UNDER_TEST"
|
|
|
|
set options [concat $libgomp_compile_options $options]
|
|
|
|
if [info exists ALWAYS_CFLAGS] {
|
|
set options [concat "$ALWAYS_CFLAGS" $options]
|
|
}
|
|
|
|
set options [dg-additional-files-options $options $source]
|
|
|
|
set result [target_compile $source $dest $type $options]
|
|
|
|
return $result
|
|
}
|
|
|
|
proc libgomp_option_help { } {
|
|
send_user " --additional_options,OPTIONS\t\tUse OPTIONS to compile the testcase files. OPTIONS should be comma-separated.\n"
|
|
}
|
|
|
|
proc libgomp_option_proc { option } {
|
|
if [regexp "^--additional_options," $option] {
|
|
global libgomp_compile_options
|
|
regsub "--additional_options," $option "" option
|
|
foreach x [split $option ","] {
|
|
lappend libgomp_compile_options "additional_flags=$x"
|
|
}
|
|
return 1
|
|
} else {
|
|
return 0
|
|
}
|
|
}
|
|
|
|
if ![info exists ::env(GCC_RUNTEST_PARALLELIZE_DIR)] {
|
|
# No parallel testing.
|
|
} elseif { $FLOCK == "" } {
|
|
# Using just one parallel slot.
|
|
} else {
|
|
# Using several parallel slots. Override DejaGnu
|
|
# 'standard.exp:${tool}_load'...
|
|
rename libgomp_load standard_libgomp_load
|
|
proc libgomp_load { program args } {
|
|
# ... in order to serialize execution testing via an exclusive lock.
|
|
set lock_file ../lock
|
|
set lock_kind --exclusive
|
|
set lock_fd [open $lock_file a+]
|
|
set lock_clock_begin [clock seconds]
|
|
global FLOCK
|
|
exec $FLOCK $lock_kind 0 <@ $lock_fd
|
|
set lock_clock_end [clock seconds]
|
|
verbose -log "Got ${FLOCK}('$lock_file', '$lock_kind') at [clock format $lock_clock_end] after [expr $lock_clock_end - $lock_clock_begin] s" 2
|
|
|
|
set result [standard_libgomp_load $program $args]
|
|
|
|
# Unlock (implicit with 'close').
|
|
close $lock_fd
|
|
|
|
return $result
|
|
}
|
|
}
|
|
|
|
# Translate offload target to OpenACC device type. Return the empty string if
|
|
# not supported, and 'host' for offload target 'disable'.
|
|
proc offload_target_to_openacc_device_type { offload_target } {
|
|
switch -glob $offload_target {
|
|
amdgcn* {
|
|
return "radeon"
|
|
}
|
|
disable {
|
|
return "host"
|
|
}
|
|
nvptx* {
|
|
return "nvidia"
|
|
}
|
|
default {
|
|
error "Unknown offload target: $offload_target"
|
|
}
|
|
}
|
|
}
|
|
|
|
# Return 1 if compiling for the specified offload target
|
|
# Takes -foffload=... into account by checking OFFLOAD_TARGET_NAMES=
|
|
# in the -v compiler output.
|
|
proc libgomp_check_effective_target_offload_target { target_name } {
|
|
# Consider all actual options, including the flags passed to
|
|
# 'gcc-dg-runtest', or 'gfortran-dg-runtest' (see the 'libgomp.*/*.exp'
|
|
# files; in particular, '-foffload', 'libgomp.oacc-*/*.exp'), which don't
|
|
# get passed on to 'check_effective_target_*' functions. (Not caching the
|
|
# result due to that.)
|
|
set options [list "additional_flags=[concat "-v" [current_compiler_flags]]"]
|
|
# Instead of inspecting command-line options, look what the compiler driver
|
|
# decides. This is somewhat modelled after
|
|
# 'gcc/testsuite/lib/target-supports.exp:check_configured_with'.
|
|
set gcc_output [libgomp_target_compile "" "" "none" $options]
|
|
if [regexp "(?n)^OFFLOAD_TARGET_NAMES=(.*)" $gcc_output dummy gcc_offload_targets] {
|
|
verbose "compiling for offload targets: $gcc_offload_targets"
|
|
return [string match "*:$target_name*:*" ":$gcc_offload_targets:"]
|
|
}
|
|
|
|
verbose "not compiling for $target_name offload target"
|
|
return 0
|
|
}
|
|
|
|
# Return 1 if compiling for any offload target.
|
|
proc check_effective_target_offload_target_any { } {
|
|
return [libgomp_check_effective_target_offload_target ""]
|
|
}
|
|
|
|
# Return 1 if compiling for offload target nvptx.
|
|
proc check_effective_target_offload_target_nvptx { } {
|
|
return [libgomp_check_effective_target_offload_target "nvptx"]
|
|
}
|
|
|
|
# Return 1 if compiling for offload target amdgcn
|
|
proc check_effective_target_offload_target_amdgcn { } {
|
|
return [libgomp_check_effective_target_offload_target "amdgcn"]
|
|
}
|
|
|
|
# Return 1 if offload device is available.
|
|
proc check_effective_target_offload_device { } {
|
|
return [check_runtime_nocache offload_device_available_ {
|
|
#include <omp.h>
|
|
int main ()
|
|
{
|
|
int a;
|
|
#pragma omp target map(from: a)
|
|
a = omp_is_initial_device ();
|
|
return a;
|
|
}
|
|
} ]
|
|
}
|
|
|
|
# Return 1 if offload device is available and it has non-shared address space.
|
|
proc check_effective_target_offload_device_nonshared_as { } {
|
|
return [check_runtime_nocache offload_device_nonshared_as {
|
|
int main ()
|
|
{
|
|
int a = 8;
|
|
#pragma omp target map(to: a)
|
|
a++;
|
|
return a != 8;
|
|
}
|
|
} ]
|
|
}
|
|
|
|
# Return 1 if offload device is available and it has shared address space.
|
|
proc check_effective_target_offload_device_shared_as { } {
|
|
return [check_runtime_nocache offload_device_shared_as {
|
|
int main ()
|
|
{
|
|
int x = 10;
|
|
#pragma omp target map(to: x)
|
|
x++;
|
|
return x == 10;
|
|
}
|
|
} ]
|
|
}
|
|
|
|
# Return 1 if using nvptx offload device.
|
|
proc check_effective_target_offload_device_nvptx { } {
|
|
return [check_runtime_nocache offload_device_nvptx {
|
|
#include <omp.h>
|
|
#include "testsuite/libgomp.c-c++-common/on_device_arch.h"
|
|
int main ()
|
|
{
|
|
return !on_device_arch_nvptx ();
|
|
}
|
|
} ]
|
|
}
|
|
|
|
# Return 1 if using a GCN offload device.
|
|
proc check_effective_target_offload_device_gcn { } {
|
|
return [check_runtime_nocache offload_device_gcn {
|
|
#include <omp.h>
|
|
#include "testsuite/libgomp.c-c++-common/on_device_arch.h"
|
|
int main ()
|
|
{
|
|
return !on_device_arch_gcn ();
|
|
}
|
|
} ]
|
|
}
|
|
|
|
# Return 1 if at least one Nvidia GPU is accessible.
|
|
|
|
proc check_effective_target_openacc_nvidia_accel_present { } {
|
|
return [check_runtime openacc_nvidia_accel_present {
|
|
#include <openacc.h>
|
|
int main () {
|
|
return !(acc_get_num_devices (acc_device_nvidia) > 0);
|
|
}
|
|
} "" ]
|
|
}
|
|
|
|
# Return 1 if at least one Nvidia GPU is accessible, and the OpenACC 'nvidia'
|
|
# device type is selected.
|
|
|
|
proc check_effective_target_openacc_nvidia_accel_selected { } {
|
|
if { ![check_effective_target_openacc_nvidia_accel_present] } {
|
|
return 0;
|
|
}
|
|
global openacc_device_type
|
|
return [string match "nvidia" $openacc_device_type]
|
|
}
|
|
|
|
# Return 1 if the OpenACC 'host' device type is selected.
|
|
|
|
proc check_effective_target_openacc_host_selected { } {
|
|
global openacc_device_type
|
|
return [string match "host" $openacc_device_type]
|
|
}
|
|
|
|
# Return 1 if at least one AMD GPU is accessible.
|
|
|
|
proc check_effective_target_openacc_radeon_accel_present { } {
|
|
return [check_runtime openacc_radeon_accel_present {
|
|
#include <openacc.h>
|
|
int main () {
|
|
return !(acc_get_num_devices (acc_device_radeon) > 0);
|
|
}
|
|
} "" ]
|
|
}
|
|
|
|
# Return 1 if at least one AMD GPU is accessible, and the OpenACC 'radeon'
|
|
# device type is selected.
|
|
|
|
proc check_effective_target_openacc_radeon_accel_selected { } {
|
|
if { ![check_effective_target_openacc_radeon_accel_present] } {
|
|
return 0;
|
|
}
|
|
global openacc_device_type
|
|
return [string match "radeon" $openacc_device_type]
|
|
}
|
|
|
|
# Return 1 if cuda.h and -lcuda are available.
|
|
|
|
proc check_effective_target_openacc_cuda { } {
|
|
return [check_no_compiler_messages openacc_cuda executable {
|
|
#include <cuda.h>
|
|
int main() {
|
|
CUdevice dev;
|
|
CUresult r = cuDeviceGet (&dev, 0);
|
|
if (r != CUDA_SUCCESS)
|
|
return 1;
|
|
return 0;
|
|
} } "-lcuda" ]
|
|
}
|
|
|
|
# Return 1 if cublas_v2.h and -lcublas are available.
|
|
|
|
proc check_effective_target_openacc_cublas { } {
|
|
return [check_no_compiler_messages openacc_cublas executable {
|
|
#include <cuda.h>
|
|
#include <cublas_v2.h>
|
|
int main() {
|
|
cublasStatus_t s;
|
|
cublasHandle_t h;
|
|
CUdevice dev;
|
|
CUresult r = cuDeviceGet (&dev, 0);
|
|
if (r != CUDA_SUCCESS)
|
|
return 1;
|
|
s = cublasCreate (&h);
|
|
if (s != CUBLAS_STATUS_SUCCESS)
|
|
return 1;
|
|
return 0;
|
|
} } "-lcuda -lcublas" ]
|
|
}
|
|
|
|
# Return 1 if cuda_runtime_api.h and -lcudart are available.
|
|
|
|
proc check_effective_target_openacc_cudart { } {
|
|
return [check_no_compiler_messages openacc_cudart executable {
|
|
#include <cuda.h>
|
|
#include <cuda_runtime_api.h>
|
|
int main() {
|
|
cudaError_t e;
|
|
int devn;
|
|
CUdevice dev;
|
|
CUresult r = cuDeviceGet (&dev, 0);
|
|
if (r != CUDA_SUCCESS)
|
|
return 1;
|
|
e = cudaGetDevice (&devn);
|
|
if (e != cudaSuccess)
|
|
return 1;
|
|
return 0;
|
|
} } "-lcuda -lcudart" ]
|
|
}
|