diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 7bdd21c82982d..d2f63aa260863 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -6136,6 +6136,7 @@ def : Flag<["-"], "nogpulib">, def : Flag<["-"], "nocudalib">, Alias; def gpulibc : Flag<["-"], "gpulibc">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>, HelpText<"Link the LLVM C Library for GPUs">; +def nogpuflangrt : Flag<["-"], "nogpuflangrt">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>; def nogpulibc : Flag<["-"], "nogpulibc">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>; def nodefaultlibs : Flag<["-"], "nodefaultlibs">, Visibility<[ClangOption, FlangOption]>; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 9a1c454d5594a..8365861dfc123 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -9409,7 +9409,7 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, Args.MakeArgString("--device-linker=" + TC.getTripleString() + "=" + "-lclang_rt.builtins")); bool HasFlangRT = HasCompilerRT && C.getDriver().IsFlangMode(); - if (HasFlangRT) + if (HasFlangRT && !Args.hasArg(options::OPT_nogpuflangrt)) CmdArgs.push_back( Args.MakeArgString("--device-linker=" + TC.getTripleString() + "=" + "-lflang_rt.runtime")); diff --git a/flang-rt/CMakeLists.txt b/flang-rt/CMakeLists.txt index 17a51759e1a0e..b65378f012ecd 100644 --- a/flang-rt/CMakeLists.txt +++ b/flang-rt/CMakeLists.txt @@ -152,6 +152,11 @@ if (NOT "${FLANG_RT_LIBCXX_PROVIDER}" IN_LIST FLANG_RT_SUPPORTED_PROVIDERS) endif () option(FLANG_RT_ENABLE_STATIC "Build Flang-RT as a static library." ON) +option(FLANG_RT_EMBED_GPU_LLVM_IR "Build Flang-RT as GPU LLVM IR library" ON) +if (FLANG_RT_EMBED_GPU_LLVM_IR) + add_compile_definitions(EMBED_FLANG_RT_GPU_LLVM_IR) +endif () + if (WIN32) # Windows DLL currently not implemented. set(FLANG_RT_ENABLE_SHARED OFF) diff --git a/flang-rt/include/flang-rt/runtime/lock.h b/flang-rt/include/flang-rt/runtime/lock.h index 7c88534245733..0cffcf5e5deab 100644 --- a/flang-rt/include/flang-rt/runtime/lock.h +++ b/flang-rt/include/flang-rt/runtime/lock.h @@ -23,7 +23,9 @@ #endif #if USE_PTHREADS +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) #include +#endif #elif defined(_WIN32) #include "flang/Common/windows-include.h" #else @@ -45,6 +47,7 @@ class Lock { RT_API_ATTRS void Drop() {} RT_API_ATTRS bool TakeIfNoDeadlock() { return true; } #elif USE_PTHREADS +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) Lock() { pthread_mutex_init(&mutex_, nullptr); } ~Lock() { pthread_mutex_destroy(&mutex_); } void Take() { @@ -68,6 +71,14 @@ class Lock { isBusy_ = false; pthread_mutex_unlock(&mutex_); } +#else + RT_API_ATTRS void Take(){} + RT_API_ATTRS bool TakeIfNoDeadlock() {return true;} + RT_API_ATTRS bool Try() {return true;} + RT_API_ATTRS void Drop() {} + Lock() {} + ~Lock() {} +#endif #elif defined(_WIN32) Lock() { InitializeCriticalSection(&cs_); } ~Lock() { DeleteCriticalSection(&cs_); } @@ -91,9 +102,11 @@ class Lock { #if RT_USE_PSEUDO_FILE_UNIT // No state. #elif USE_PTHREADS +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) pthread_mutex_t mutex_{}; volatile bool isBusy_{false}; volatile pthread_t holder_; +#endif #elif defined(_WIN32) CRITICAL_SECTION cs_; #else diff --git a/flang-rt/include/flang-rt/runtime/tools.h b/flang-rt/include/flang-rt/runtime/tools.h index 1939c4d907be4..1c5554515037f 100644 --- a/flang-rt/include/flang-rt/runtime/tools.h +++ b/flang-rt/include/flang-rt/runtime/tools.h @@ -42,6 +42,13 @@ #define RT_USE_PSEUDO_FILE_UNIT 1 #endif + +#if (defined(__AMDGPU__) || defined(__NVPTX__)) && defined(EMBED_FLANG_RT_GPU_LLVM_IR) +// Use the pseudo lock and pseudo file unit implementations +// for the device. +#define RT_USE_PSEUDO_LOCK 1 +#define RT_USE_PSEUDO_FILE_UNIT 1 +#endif namespace Fortran::runtime { class Terminator; diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index ef0f812eaca00..ad46e8d847e5b 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -178,7 +178,11 @@ else () endif () if ("${LLVM_RUNTIMES_TARGET}" MATCHES "^amdgcn|^nvptx") - set(sources ${gpu_sources}) + if (FLANG_RT_EMBED_GPU_LLVM_IR) + set(sources ${supported_sources} ${gpu_sources}) + else () + set(sources ${gpu_sources}) + endif () elseif(FLANG_RT_EXPERIMENTAL_OFFLOAD_SUPPORT STREQUAL "CUDA") set(sources ${supported_sources}) else () diff --git a/flang-rt/lib/runtime/assign.cpp b/flang-rt/lib/runtime/assign.cpp index 303ec79de240c..4aa0d003dd163 100644 --- a/flang-rt/lib/runtime/assign.cpp +++ b/flang-rt/lib/runtime/assign.cpp @@ -7,7 +7,9 @@ //===----------------------------------------------------------------------===// #include "flang/Runtime/assign.h" +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) #include "flang/Runtime/stop.h" +#endif #include "flang-rt/runtime/assign-impl.h" #include "flang-rt/runtime/derived.h" #include "flang-rt/runtime/descriptor.h" @@ -862,17 +864,6 @@ void RTDEF(AssignPolymorphic)(Descriptor &to, const Descriptor &from, PolymorphicLHS); } -#if defined(OMP_OFFLOAD_BUILD) -// To support a recently added use of variant in the OpenMP offload build, -// added an abort wrapper which calls the flang-rt FortranAAbort. -// Avoids the following linker error: -// ld.lld: error: undefined symbol: abort -// >>> referenced by /tmp/device_aassign.amdgcn.gfx90a-34a7ed.img.lto.o:(std::__throw_bad_variant_access(char const*)) -extern "C" void abort(void) { - RTNAME(Abort)(); -} -#endif - RT_EXT_API_GROUP_END } // extern "C" } // namespace Fortran::runtime diff --git a/flang-rt/lib/runtime/descriptor.cpp b/flang-rt/lib/runtime/descriptor.cpp index c95da0a5371e5..147ff079fc2d9 100644 --- a/flang-rt/lib/runtime/descriptor.cpp +++ b/flang-rt/lib/runtime/descriptor.cpp @@ -8,7 +8,9 @@ #include "flang-rt/runtime/descriptor.h" #include "ISO_Fortran_util.h" +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) #include "memory.h" +#endif #include "flang-rt/runtime/allocator-registry.h" #include "flang-rt/runtime/derived.h" #include "flang-rt/runtime/stat.h" diff --git a/flang-rt/lib/runtime/edit-input.cpp b/flang-rt/lib/runtime/edit-input.cpp index 436fc3894d902..32ff42a4d0221 100644 --- a/flang-rt/lib/runtime/edit-input.cpp +++ b/flang-rt/lib/runtime/edit-input.cpp @@ -569,7 +569,11 @@ static RT_API_ATTRS void RaiseFPExceptions( #ifdef feraisexcept // a macro in some environments; omit std:: #define RAISE feraiseexcept #else +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined (EMBED_FLANG_RT_GPU_LLVM_IR) #define RAISE std::feraiseexcept +#else +#define RAISE +#endif #endif #endif // !defined(RT_DEVICE_COMPILATION) diff --git a/flang-rt/lib/runtime/environment.cpp b/flang-rt/lib/runtime/environment.cpp index 97ac56236e799..17ac09f841a4f 100644 --- a/flang-rt/lib/runtime/environment.cpp +++ b/flang-rt/lib/runtime/environment.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#if (not defined (__AMDGPU__) && not defined(__NVPTX__)) || not defined (EMBED_FLANG_RT_GPU_LLVM_IR) #include "flang-rt/runtime/environment.h" #include "environment-default-list.h" #include "memory.h" @@ -314,3 +315,4 @@ bool RTNAME(RegisterConfigureEnv)( } // extern "C" } // namespace Fortran::runtime +#endif diff --git a/flang-rt/lib/runtime/file.cpp b/flang-rt/lib/runtime/file.cpp index 8255ec8691886..c3d9a5b8321a7 100644 --- a/flang-rt/lib/runtime/file.cpp +++ b/flang-rt/lib/runtime/file.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) #include "flang-rt/runtime/file.h" #include "flang-rt/runtime/memory.h" #include "flang-rt/runtime/tools.h" @@ -486,3 +487,4 @@ RT_API_ATTRS std::int64_t SizeInBytes(const char *path) { #endif // defined(RT_DEVICE_COMPILATION) } // namespace Fortran::runtime::io +#endif diff --git a/flang-rt/lib/runtime/io-api-minimal.cpp b/flang-rt/lib/runtime/io-api-minimal.cpp index f84b62d63baa1..7077c20f41b4e 100644 --- a/flang-rt/lib/runtime/io-api-minimal.cpp +++ b/flang-rt/lib/runtime/io-api-minimal.cpp @@ -23,6 +23,7 @@ namespace Fortran::runtime::io { RT_EXT_API_GROUP_BEGIN #endif +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) Cookie IODEF(BeginExternalListOutput)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { return BeginExternalListIO( @@ -33,6 +34,7 @@ enum Iostat IODEF(EndIoStatement)(Cookie cookie) { IoStatementState &io{*cookie}; return static_cast(io.EndIoStatement()); } +#endif template > inline RT_API_ATTRS bool FormattedScalarIntegerOutput( @@ -45,6 +47,7 @@ inline RT_API_ATTRS bool FormattedScalarIntegerOutput( } } +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) bool IODEF(OutputInteger8)(Cookie cookie, std::int8_t n) { return FormattedScalarIntegerOutput<1>(*cookie, n, "OutputInteger8"); } @@ -60,6 +63,7 @@ bool IODEF(OutputInteger32)(Cookie cookie, std::int32_t n) { bool IODEF(OutputInteger64)(Cookie cookie, std::int64_t n) { return FormattedScalarIntegerOutput<8>(*cookie, n, "OutputInteger64"); } +#endif #ifdef __SIZEOF_INT128__ bool IODEF(OutputInteger128)(Cookie cookie, common::int128_t n) { @@ -79,6 +83,7 @@ inline RT_API_ATTRS bool FormattedScalarRealOutput( } } +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) bool IODEF(OutputReal32)(Cookie cookie, float x) { return FormattedScalarRealOutput<4>(*cookie, x, "OutputReal32"); } @@ -86,6 +91,7 @@ bool IODEF(OutputReal32)(Cookie cookie, float x) { bool IODEF(OutputReal64)(Cookie cookie, double x) { return FormattedScalarRealOutput<8>(*cookie, x, "OutputReal64"); } +#endif template ::BinaryFloatingPoint> @@ -110,6 +116,7 @@ inline RT_API_ATTRS bool FormattedScalarComplexOutput( return false; } +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) bool IODEF(OutputComplex32)(Cookie cookie, float re, float im) { return FormattedScalarComplexOutput<4>(*cookie, re, im, "OutputComplex32"); } @@ -145,6 +152,7 @@ bool IODEF(OutputLogical)(Cookie cookie, bool truth) { return false; } } +#endif } // namespace Fortran::runtime::io diff --git a/flang-rt/lib/runtime/io-api.cpp b/flang-rt/lib/runtime/io-api.cpp index 4c86fb9fdabf6..4eabde60a3a72 100644 --- a/flang-rt/lib/runtime/io-api.cpp +++ b/flang-rt/lib/runtime/io-api.cpp @@ -199,12 +199,14 @@ RT_API_ATTRS Cookie BeginExternalFormattedIO(const char *format, } } +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) Cookie IODEF(BeginExternalFormattedOutput)(const char *format, std::size_t formatLength, const Descriptor *formatDescriptor, ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { return BeginExternalFormattedIO(format, formatLength, formatDescriptor, unitNumber, sourceFile, sourceLine); } +#endif Cookie IODEF(BeginExternalFormattedInput)(const char *format, std::size_t formatLength, const Descriptor *formatDescriptor, diff --git a/flang-rt/lib/runtime/stop.cpp b/flang-rt/lib/runtime/stop.cpp index a12e9f14d90ec..87681dc8a1b1e 100644 --- a/flang-rt/lib/runtime/stop.cpp +++ b/flang-rt/lib/runtime/stop.cpp @@ -13,11 +13,15 @@ #include "flang-rt/runtime/file.h" #include "flang-rt/runtime/io-error.h" #include "flang-rt/runtime/terminator.h" +#if not defined(__AMDGPU__) && not defined(__NVPTX__) #include "flang/Runtime/main.h" +#endif #include #include #include +#if not defined(__AMDGPU__) && not defined(__NVPTX__) #include +#endif #ifdef HAVE_BACKTRACE #include BACKTRACE_HEADER @@ -26,6 +30,7 @@ extern "C" { [[maybe_unused]] static void DescribeIEEESignaledExceptions() { +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) #ifdef fetestexcept // a macro in some environments; omit std:: auto excepts{fetestexcept(FE_ALL_EXCEPT)}; #else @@ -60,6 +65,7 @@ extern "C" { #endif std::fputc('\n', stderr); } +#endif } static void CloseAllExternalUnits(const char *why) { @@ -67,6 +73,7 @@ static void CloseAllExternalUnits(const char *why) { Fortran::runtime::io::ExternalFileUnit::CloseAll(handler); } +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) [[noreturn]] RT_API_ATTRS void RTNAME(StopStatement)( int code, bool isErrorStop, bool quiet) { #if defined(RT_DEVICE_COMPILATION) @@ -103,7 +110,9 @@ static void CloseAllExternalUnits(const char *why) { std::exit(code); #endif } +#endif +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) [[noreturn]] RT_API_ATTRS void RTNAME(StopStatementText)( const char *code, std::size_t length, bool isErrorStop, bool quiet) { #if defined(RT_DEVICE_COMPILATION) @@ -136,6 +145,7 @@ static void CloseAllExternalUnits(const char *why) { } #endif } +#endif static bool StartPause() { if (Fortran::runtime::io::IsATerminal(0)) { @@ -218,13 +228,14 @@ static RT_NOINLINE_ATTR void PrintBacktrace() { #endif } - +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) [[noreturn]] RT_OPTNONE_ATTR void RTNAME(Abort)() { #ifdef HAVE_BACKTRACE PrintBacktrace(); #endif std::abort(); } +#endif RT_OPTNONE_ATTR void FORTRAN_PROCEDURE_NAME(backtrace)() { PrintBacktrace(); } diff --git a/flang/include/flang/Runtime/extensions.h b/flang/include/flang/Runtime/extensions.h index 9fd3e118a0f22..b357ca9d8fb3c 100644 --- a/flang/include/flang/Runtime/extensions.h +++ b/flang/include/flang/Runtime/extensions.h @@ -18,10 +18,13 @@ #define FORTRAN_PROCEDURE_NAME(name) name##_ -#ifdef _WIN32 +#if defined (_WIN32) // UID and GID don't exist on Windows, these exist to avoid errors. typedef std::uint32_t uid_t; typedef std::uint32_t gid_t; +#elif (defined(__AMDGPU__) || defined(__NVPTX__)) && defined (EMBED_FLANG_RT_GPU_LLVM_IR) +typedef std::uint32_t uid_t; +typedef std::uint32_t gid_t; #else #include "sys/types.h" //pid_t #endif diff --git a/flang/include/flang/Runtime/main.h b/flang/include/flang/Runtime/main.h index 40f7693221b6a..696ce466e67fc 100644 --- a/flang/include/flang/Runtime/main.h +++ b/flang/include/flang/Runtime/main.h @@ -11,11 +11,15 @@ #include "flang/Runtime/c-or-cpp.h" #include "flang/Runtime/entry-names.h" +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) #include +#endif struct EnvironmentDefaultList; +#if (not defined(__AMDGPU__) && not defined(__NVPTX__)) || not defined(EMBED_FLANG_RT_GPU_LLVM_IR) std::thread::id RTNAME(GetMainThreadId)(); +#endif FORTRAN_EXTERN_C_BEGIN void RTNAME(ProgramStart)( diff --git a/libc/CMakeLists.txt b/libc/CMakeLists.txt index 14718e2090bde..c55b477949a78 100644 --- a/libc/CMakeLists.txt +++ b/libc/CMakeLists.txt @@ -69,6 +69,7 @@ if(NOT LIBC_NAMESPACE MATCHES "^__llvm_libc") message(FATAL_ERROR "Invalid LIBC_NAMESPACE. Must start with '__llvm_libc' was '${LIBC_NAMESPACE}'") endif() +string(REPLACE "." "_" LIBC_NAMESPACE "${LIBC_NAMESPACE}") message(STATUS "Setting LIBC_NAMESPACE namespace to '${LIBC_NAMESPACE}'") add_compile_definitions(LIBC_NAMESPACE=${LIBC_NAMESPACE}) diff --git a/libcxx/src/string.cpp b/libcxx/src/string.cpp index 5028fc88fe46d..b9f8e01198c33 100644 --- a/libcxx/src/string.cpp +++ b/libcxx/src/string.cpp @@ -360,9 +360,11 @@ wstring to_wstring(unsigned long val) { return i_to_string(val); } wstring to_wstring(unsigned long long val) { return i_to_string(val); } #endif +#if not defined(__AMDGPU__) && not defined(__NVPTX__) string to_string(float val) { return as_string(snprintf, initial_string< string>()(), "%f", val); } string to_string(double val) { return as_string(snprintf, initial_string< string>()(), "%f", val); } string to_string(long double val) { return as_string(snprintf, initial_string< string>()(), "%Lf", val); } +#endif #if _LIBCPP_HAS_WIDE_CHARACTERS wstring to_wstring(float val) { return as_string(get_swprintf(), initial_string()(), L"%f", val); } diff --git a/offload/plugins-nextgen/common/CMakeLists.txt b/offload/plugins-nextgen/common/CMakeLists.txt index 198e2add6b2d3..fad361d943e91 100644 --- a/offload/plugins-nextgen/common/CMakeLists.txt +++ b/offload/plugins-nextgen/common/CMakeLists.txt @@ -48,7 +48,6 @@ if(OFFLOAD_ENABLE_EMISSARY_APIS AND LIBOMPTARGET_BUILD_DEVICE_FORTRT) target_link_libraries(PluginCommon PRIVATE flang_rt.runtime -L${CMAKE_BINARY_DIR}/../../lib -L${CMAKE_INSTALL_PREFIX}/lib) endif() - if (OMPT_TARGET_DEFAULT AND LIBOMPTARGET_OMPT_SUPPORT) add_library(PluginOmpt STATIC OMPT/OmptTracing.cpp OMPT/OmptProfiler.cpp) target_include_directories(PluginOmpt PUBLIC