diff --git a/include/RAJA/policy/sycl.hpp b/include/RAJA/policy/sycl.hpp index 4d9a7e1633..66eac6f939 100644 --- a/include/RAJA/policy/sycl.hpp +++ b/include/RAJA/policy/sycl.hpp @@ -29,9 +29,9 @@ #include "RAJA/policy/sycl/forall.hpp" #include "RAJA/policy/sycl/policy.hpp" #include "RAJA/policy/sycl/reduce.hpp" +#include "RAJA/policy/sycl/scan.hpp" +#include "RAJA/policy/sycl/sort.hpp" //#include "RAJA/policy/sycl/multi_reduce.hpp" -//#include "RAJA/policy/sycl/scan.hpp" -//#include "RAJA/policy/sycl/sort.hpp" #include "RAJA/policy/sycl/kernel.hpp" //#include "RAJA/policy/sycl/synchronize.hpp" #include "RAJA/policy/sycl/launch.hpp" diff --git a/include/RAJA/policy/sycl/scan.hpp b/include/RAJA/policy/sycl/scan.hpp new file mode 100644 index 0000000000..81e5614b71 --- /dev/null +++ b/include/RAJA/policy/sycl/scan.hpp @@ -0,0 +1,242 @@ +/*! +****************************************************************************** +* +* \file +* +* \brief Header file providing RAJA scan declarations. +* +****************************************************************************** +*/ + +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2016-24, Lawrence Livermore National Security, LLC +// and RAJA project contributors. See the RAJA/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#ifndef RAJA_scan_sycl_HPP +#define RAJA_scan_sycl_HPP + +#include +#include +#include "RAJA/config.hpp" +#include "camp/resource/sycl.hpp" + +#if defined(RAJA_ENABLE_SYCL) + +#include "RAJA/policy/sycl/policy.hpp" + +namespace RAJA +{ +namespace impl +{ +namespace scan +{ + +template +RAJA_INLINE +camp::resources::EventProxy +inclusive_inplace( + camp::resources::Sycl sycl_res, + ::RAJA::policy::sycl::sycl_exec, + InputIter begin, + InputIter end, + Function binary_op) +{ + ::sycl::queue* sycl_queue = sycl_res.get_queue(); + + // using valueT = typename std::remove_reference::type; + using valueT = typename std::iterator_traits::value_type; + + // Calculate the size of the input range + size_t size = std::distance(begin, end); + + ::sycl::buffer buffer(begin, end); + ::sycl::buffer tempAccBuff(begin, ::sycl::range<1>(size)); + + int iterations = 0; + for (size_t ii = size >> 1; ii > 0; ii >>= 1) { + iterations++; + } + if ((size & (size - 1)) != 0) { + iterations++; + } + + auto buffPtr = &buffer; + auto tempPtr = &tempAccBuff; + + if (iterations % 2 == 0) { + tempPtr = &buffer; + buffPtr = &tempAccBuff; + } + + int ii = 1; + do { + // Submit the kernel to the SYCL queue + sycl_queue->submit([&](::sycl::handler& cgh) { + auto buffAccessor = buffPtr->get_access(cgh); + auto tempAccessor = tempPtr->get_access(cgh); + // outBuffAccessor = outBuff->get_access<::sycl::access::mode::read(cgh); + + cgh.parallel_for(::sycl::range<1>(size), [=](::sycl::item<1> idx) { + size_t td = 1 << (ii - 1); + size_t thisID = idx.get_id(0); + if (thisID < size and thisID >= td) { + tempAccessor[thisID] = binary_op(buffAccessor[thisID - td], buffAccessor[thisID]); + } else { + tempAccessor[thisID] = buffAccessor[thisID]; + } + }); + }); + + std::swap(buffPtr, tempPtr); + ii++; + } while ( ii <= iterations); + + if (!Async) { sycl_res.wait(); } + return camp::resources::EventProxy(sycl_res); +} + +template +RAJA_INLINE +resources::EventProxy +exclusive_inplace( + resources::Sycl sycl_res, + ::RAJA::policy::sycl::sycl_exec exec, + InputIter begin, + InputIter end, + Function binary_op, + TT initVal) +{ + ::sycl::queue* sycl_queue = sycl_res.get_queue(); + + // using valueT = typename std::remove_reference::type; + using valueT = typename std::iterator_traits::value_type; + + + // Calculate the size of the input range + size_t size = std::distance(begin, end); + + ::sycl::buffer buffer(begin, end); + ::sycl::buffer tempAccBuff(begin, ::sycl::range<1>(size)); + + int iterations = 0; + for (size_t ii = size >> 1; ii > 0; ii >>= 1) { + iterations++; + } + if ((size & (size - 1)) != 0) { + iterations++; + } + + auto buffPtr = &buffer; + auto tempPtr = &tempAccBuff; + + if (iterations % 2 != 0) { + tempPtr = &buffer; + buffPtr = &tempAccBuff; + } + + // Submit the kernel to the SYCL queue + sycl_queue->submit([&](::sycl::handler& cgh) { + auto inAccessor = buffPtr->get_access(cgh); + auto outAccessor = tempPtr->get_access(cgh); + // outBuffAccessor = outBuff->get_access<::sycl::access::mode::read(cgh); + + cgh.parallel_for(::sycl::range<1>(size), [=](::sycl::item<1> idx) { + size_t thisID = idx.get_id(0); + // size_t td = 1 << (ii - 1); + if (thisID > 0) { + outAccessor[thisID] = inAccessor[thisID - 1]; + } else { + outAccessor[thisID] = initVal; + } + }); + }); + + std::swap(buffPtr, tempPtr); + + int ii = 1; + do { + // Submit the kernel to the SYCL queue + sycl_queue->submit([&](::sycl::handler& cgh) { + auto buffAccessor = buffPtr->get_access(cgh); + auto tempAccessor = tempPtr->get_access(cgh); + // outBuffAccessor = outBuff->get_access<::sycl::access::mode::read(cgh); + + cgh.parallel_for(::sycl::range<1>(size), [=](::sycl::item<1> idx) { + size_t td = 1 << (ii - 1); + size_t thisID = idx.get_id(0); + if (thisID < size and thisID >= td) { + tempAccessor[thisID] = binary_op(buffAccessor[thisID - td], buffAccessor[thisID]); + } else { + tempAccessor[thisID] = buffAccessor[thisID]; + } + }); + }); + + std::swap(buffPtr, tempPtr); + ii++; + } while ( ii <= iterations); + + if(!Async) { sycl_res.wait(); } + return camp::resources::EventProxy(sycl_res); + return inclusive_inplace(sycl_res, exec, begin, end, binary_op); +} + +template +RAJA_INLINE +resources::EventProxy +inclusive( + resources::Sycl sycl_res, + ::RAJA::policy::sycl::sycl_exec exec, + InputIter begin, + InputIter end, + OutputIter out, + Function binary_op) +{ + // using valueT = typename std::remove_reference::type; + using valueT = typename std::iterator_traits::value_type; + sycl_res.memcpy(out, begin, std::distance(begin, end) * sizeof(valueT)); + return inclusive_inplace(sycl_res, exec, out, out + std::distance(begin, end), binary_op); +} + +template +RAJA_INLINE +resources::EventProxy +exclusive( + resources::Sycl sycl_res, + ::RAJA::policy::sycl::sycl_exec exec, + InputIter begin, + InputIter end, + OutputIter out, + Function binary_op, + TT initVal) +{ + // using valueT = typename std::remove_reference::type; + using valueT = typename std::iterator_traits::value_type; + + sycl_res.memcpy(out, begin, std::distance(begin, end) * sizeof(valueT)); + return exclusive_inplace(sycl_res, exec, out, out + std::distance(begin, end), binary_op, initVal); +} + +} // namespace scan +} // namespace impl +} // namespace RAJA + +#endif // closing endif for RAJA enable Sycl guard + +#endif // closing endif for header include guard \ No newline at end of file diff --git a/test/functional/scan/CMakeLists.txt b/test/functional/scan/CMakeLists.txt index 87505f2b35..30a32b00bb 100644 --- a/test/functional/scan/CMakeLists.txt +++ b/test/functional/scan/CMakeLists.txt @@ -19,6 +19,10 @@ if(RAJA_ENABLE_HIP) list(APPEND SCAN_BACKENDS Hip) endif() +if(RAJA_ENABLE_SYCL) + list(APPEND SCAN_BACKENDS Sycl) +endif() + set(SCAN_TYPES Exclusive ExclusiveInplace Inclusive InclusiveInplace)