Skip to content

[SYCL] Move the NDRDescT class to a public header #19363

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: sycl
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 95 additions & 0 deletions sycl/include/sycl/queue.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,101 @@ class __SYCL_EXPORT SubmissionInfo {
ext::oneapi::experimental::event_mode_enum::none;
};

// The structure represents NDRange - global, local sizes, global offset and
// number of dimensions.

// TODO: A lot of tests rely on particular values to be set for dimensions that
// are not used. To clarify, for example, if a 2D kernel is invoked, in
// NDRDescT, the value of index 2 in GlobalSize must be set to either 1 or 0
// depending on which constructor is used for no clear reason.
// Instead, only sensible defaults should be used and tests should be updated
// to reflect this.
class NDRDescT {

public:
NDRDescT() = default;
NDRDescT(const NDRDescT &Desc) = default;
NDRDescT(NDRDescT &&Desc) = default;

template <int Dims_>
NDRDescT(sycl::range<Dims_> N, bool SetNumWorkGroups) : Dims{size_t(Dims_)} {
if (SetNumWorkGroups) {
for (size_t I = 0; I < Dims_; ++I) {
NumWorkGroups[I] = N[I];
}
} else {
for (size_t I = 0; I < Dims_; ++I) {
GlobalSize[I] = N[I];
}

for (int I = Dims_; I < 3; ++I) {
GlobalSize[I] = 1;
}
}
}

template <int Dims_>
NDRDescT(sycl::range<Dims_> NumWorkItems, sycl::range<Dims_> LocalSizes,
sycl::id<Dims_> Offset)
: Dims{size_t(Dims_)} {
for (size_t I = 0; I < Dims_; ++I) {
GlobalSize[I] = NumWorkItems[I];
LocalSize[I] = LocalSizes[I];
GlobalOffset[I] = Offset[I];
}

for (int I = Dims_; I < 3; ++I) {
LocalSize[I] = LocalSizes[0] ? 1 : 0;
}

for (int I = Dims_; I < 3; ++I) {
GlobalSize[I] = 1;
}
}

template <int Dims_>
NDRDescT(sycl::range<Dims_> NumWorkItems, sycl::id<Dims_> Offset)
: Dims{size_t(Dims_)} {
for (size_t I = 0; I < Dims_; ++I) {
GlobalSize[I] = NumWorkItems[I];
GlobalOffset[I] = Offset[I];
}
}

template <int Dims_>
NDRDescT(sycl::nd_range<Dims_> ExecutionRange)
: NDRDescT(ExecutionRange.get_global_range(),
ExecutionRange.get_local_range(),
ExecutionRange.get_offset()) {}

template <int Dims_>
NDRDescT(sycl::range<Dims_> Range)
: NDRDescT(Range, /*SetNumWorkGroups=*/false) {}

template <int Dims_> void setClusterDimensions(sycl::range<Dims_> N) {
if (this->Dims != size_t(Dims_)) {
throw std::runtime_error(
"Dimensionality of cluster, global and local ranges must be same");
}

for (int I = 0; I < Dims_; ++I)
ClusterDimensions[I] = N[I];
}

NDRDescT &operator=(const NDRDescT &Desc) = default;
NDRDescT &operator=(NDRDescT &&Desc) = default;

std::array<size_t, 3> GlobalSize{0, 0, 0};
std::array<size_t, 3> LocalSize{0, 0, 0};
std::array<size_t, 3> GlobalOffset{0, 0, 0};
/// Number of workgroups, used to record the number of workgroups from the
/// simplest form of parallel_for_work_group. If set, all other fields must be
/// zero
std::array<size_t, 3> NumWorkGroups{0, 0, 0};
std::array<size_t, 3> ClusterDimensions{1, 1, 1};
size_t Dims = 0;
};

} // namespace v1
} // namespace detail

Expand Down
99 changes: 2 additions & 97 deletions sycl/source/detail/cg.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,101 +60,6 @@ class ArgDesc {
int MIndex;
};

// The structure represents NDRange - global, local sizes, global offset and
// number of dimensions.

// TODO: A lot of tests rely on particular values to be set for dimensions that
// are not used. To clarify, for example, if a 2D kernel is invoked, in
// NDRDescT, the value of index 2 in GlobalSize must be set to either 1 or 0
// depending on which constructor is used for no clear reason.
// Instead, only sensible defaults should be used and tests should be updated
// to reflect this.
class NDRDescT {

public:
NDRDescT() = default;
NDRDescT(const NDRDescT &Desc) = default;
NDRDescT(NDRDescT &&Desc) = default;

template <int Dims_>
NDRDescT(sycl::range<Dims_> N, bool SetNumWorkGroups) : Dims{size_t(Dims_)} {
if (SetNumWorkGroups) {
for (size_t I = 0; I < Dims_; ++I) {
NumWorkGroups[I] = N[I];
}
} else {
for (size_t I = 0; I < Dims_; ++I) {
GlobalSize[I] = N[I];
}

for (int I = Dims_; I < 3; ++I) {
GlobalSize[I] = 1;
}
}
}

template <int Dims_>
NDRDescT(sycl::range<Dims_> NumWorkItems, sycl::range<Dims_> LocalSizes,
sycl::id<Dims_> Offset)
: Dims{size_t(Dims_)} {
for (size_t I = 0; I < Dims_; ++I) {
GlobalSize[I] = NumWorkItems[I];
LocalSize[I] = LocalSizes[I];
GlobalOffset[I] = Offset[I];
}

for (int I = Dims_; I < 3; ++I) {
LocalSize[I] = LocalSizes[0] ? 1 : 0;
}

for (int I = Dims_; I < 3; ++I) {
GlobalSize[I] = 1;
}
}

template <int Dims_>
NDRDescT(sycl::range<Dims_> NumWorkItems, sycl::id<Dims_> Offset)
: Dims{size_t(Dims_)} {
for (size_t I = 0; I < Dims_; ++I) {
GlobalSize[I] = NumWorkItems[I];
GlobalOffset[I] = Offset[I];
}
}

template <int Dims_>
NDRDescT(sycl::nd_range<Dims_> ExecutionRange)
: NDRDescT(ExecutionRange.get_global_range(),
ExecutionRange.get_local_range(),
ExecutionRange.get_offset()) {}

template <int Dims_>
NDRDescT(sycl::range<Dims_> Range)
: NDRDescT(Range, /*SetNumWorkGroups=*/false) {}

template <int Dims_> void setClusterDimensions(sycl::range<Dims_> N) {
if (this->Dims != size_t(Dims_)) {
throw std::runtime_error(
"Dimensionality of cluster, global and local ranges must be same");
}

for (int I = 0; I < Dims_; ++I)
ClusterDimensions[I] = N[I];
}

NDRDescT &operator=(const NDRDescT &Desc) = default;
NDRDescT &operator=(NDRDescT &&Desc) = default;

std::array<size_t, 3> GlobalSize{0, 0, 0};
std::array<size_t, 3> LocalSize{0, 0, 0};
std::array<size_t, 3> GlobalOffset{0, 0, 0};
/// Number of workgroups, used to record the number of workgroups from the
/// simplest form of parallel_for_work_group. If set, all other fields must be
/// zero
std::array<size_t, 3> NumWorkGroups{0, 0, 0};
std::array<size_t, 3> ClusterDimensions{1, 1, 1};
size_t Dims = 0;
};

/// Base class for all types of command groups.
class CG {
public:
Expand Down Expand Up @@ -248,7 +153,7 @@ class CG {
class CGExecKernel : public CG {
public:
/// Stores ND-range description.
NDRDescT MNDRDesc;
detail::v1::NDRDescT MNDRDesc;
std::shared_ptr<HostKernelBase> MHostKernel;
std::shared_ptr<detail::kernel_impl> MSyclKernel;
std::shared_ptr<detail::kernel_bundle_impl> MKernelBundle;
Expand All @@ -265,7 +170,7 @@ class CGExecKernel : public CG {
bool MKernelUsesClusterLaunch = false;
size_t MKernelWorkGroupMemorySize = 0;

CGExecKernel(NDRDescT NDRDesc, std::shared_ptr<HostKernelBase> HKernel,
CGExecKernel(detail::v1::NDRDescT NDRDesc, std::shared_ptr<HostKernelBase> HKernel,
std::shared_ptr<detail::kernel_impl> SyclKernel,
std::shared_ptr<detail::kernel_bundle_impl> KernelBundle,
CG::StorageInitHelper CGData, std::vector<ArgDesc> Args,
Expand Down
10 changes: 5 additions & 5 deletions sycl/source/detail/error_handling/error_handling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ inline namespace _V1 {
namespace detail::enqueue_kernel_launch {

void handleOutOfResources(const device_impl &DeviceImpl,
ur_kernel_handle_t Kernel, const NDRDescT &NDRDesc) {
ur_kernel_handle_t Kernel, const detail::v1::NDRDescT &NDRDesc) {
sycl::platform Platform = DeviceImpl.get_platform();
sycl::backend Backend = Platform.get_backend();
if (Backend == sycl::backend::ext_oneapi_cuda) {
Expand Down Expand Up @@ -75,7 +75,7 @@ void handleOutOfResources(const device_impl &DeviceImpl,

void handleInvalidWorkGroupSize(const device_impl &DeviceImpl,
ur_kernel_handle_t Kernel,
const NDRDescT &NDRDesc) {
const detail::v1::NDRDescT &NDRDesc) {
sycl::platform Platform = DeviceImpl.get_platform();

// Some of the error handling below is special for particular OpenCL
Expand Down Expand Up @@ -349,7 +349,7 @@ void handleInvalidWorkGroupSize(const device_impl &DeviceImpl,
}

void handleInvalidWorkItemSize(const device_impl &DeviceImpl,
const NDRDescT &NDRDesc) {
const detail::v1::NDRDescT &NDRDesc) {

const AdapterPtr &Adapter = DeviceImpl.getAdapter();
ur_device_handle_t Device = DeviceImpl.getHandleRef();
Expand All @@ -370,7 +370,7 @@ void handleInvalidWorkItemSize(const device_impl &DeviceImpl,
}

void handleInvalidValue(const device_impl &DeviceImpl,
const NDRDescT &NDRDesc) {
const detail::v1::NDRDescT &NDRDesc) {
const AdapterPtr &Adapter = DeviceImpl.getAdapter();
ur_device_handle_t Device = DeviceImpl.getHandleRef();

Expand All @@ -393,7 +393,7 @@ void handleInvalidValue(const device_impl &DeviceImpl,
}

void handleErrorOrWarning(ur_result_t Error, const device_impl &DeviceImpl,
ur_kernel_handle_t Kernel, const NDRDescT &NDRDesc) {
ur_kernel_handle_t Kernel, const detail::v1::NDRDescT &NDRDesc) {
assert(Error != UR_RESULT_SUCCESS &&
"Success is expected to be handled on caller side");
switch (Error) {
Expand Down
2 changes: 1 addition & 1 deletion sycl/source/detail/error_handling/error_handling.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ namespace enqueue_kernel_launch {
/// This function actually never returns and always throws an exception with
/// error description.
void handleErrorOrWarning(ur_result_t, const device_impl &, ur_kernel_handle_t,
const NDRDescT &);
const detail::v1::NDRDescT &);
} // namespace enqueue_kernel_launch

namespace kernel_get_group_info {
Expand Down
4 changes: 2 additions & 2 deletions sycl/source/detail/graph/graph_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1573,7 +1573,7 @@ void exec_graph_impl::populateURKernelUpdateStructs(
std::vector<ur_kernel_arg_mem_obj_properties_t> &MemobjProps,
std::vector<ur_exp_command_buffer_update_pointer_arg_desc_t> &PtrDescs,
std::vector<ur_exp_command_buffer_update_value_arg_desc_t> &ValueDescs,
sycl::detail::NDRDescT &NDRDesc,
sycl::detail::v1::NDRDescT &NDRDesc,
ur_exp_command_buffer_update_kernel_launch_desc_t &UpdateDesc) const {
sycl::detail::context_impl &ContextImpl =
*sycl::detail::getSyclObjImpl(MContext);
Expand Down Expand Up @@ -1800,7 +1800,7 @@ void exec_graph_impl::updateURImpl(
PtrDescsList(NumUpdatableNodes);
std::vector<std::vector<ur_exp_command_buffer_update_value_arg_desc_t>>
ValueDescsList(NumUpdatableNodes);
std::vector<sycl::detail::NDRDescT> NDRDescList(NumUpdatableNodes);
std::vector<sycl::detail::v1::NDRDescT> NDRDescList(NumUpdatableNodes);
std::vector<ur_exp_command_buffer_update_kernel_launch_desc_t> UpdateDescList(
NumUpdatableNodes);
std::vector<FastKernelCacheValPtr> KernelBundleObjList(NumUpdatableNodes);
Expand Down
3 changes: 1 addition & 2 deletions sycl/source/detail/graph/graph_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ class handler;
namespace detail {
class SYCLMemObjT;
class queue_impl;
class NDRDescT;
class ArgDesc;
class CG;
} // namespace detail
Expand Down Expand Up @@ -869,7 +868,7 @@ class exec_graph_impl {
std::vector<ur_kernel_arg_mem_obj_properties_t> &MemobjProps,
std::vector<ur_exp_command_buffer_update_pointer_arg_desc_t> &PtrDescs,
std::vector<ur_exp_command_buffer_update_value_arg_desc_t> &ValueDescs,
sycl::detail::NDRDescT &NDRDesc,
sycl::detail::v1::NDRDescT &NDRDesc,
ur_exp_command_buffer_update_kernel_launch_desc_t &UpdateDesc) const;

/// Execution schedule of nodes in the graph.
Expand Down
4 changes: 2 additions & 2 deletions sycl/source/detail/graph/node_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -437,7 +437,7 @@ class node_impl : public std::enable_shared_from_this<node_impl> {
"the node was originally created with.");
}

NDRDesc = sycl::detail::NDRDescT{ExecutionRange};
NDRDesc = sycl::detail::v1::NDRDescT{ExecutionRange};
}

template <int Dimensions> void updateRange(range<Dimensions> ExecutionRange) {
Expand All @@ -458,7 +458,7 @@ class node_impl : public std::enable_shared_from_this<node_impl> {
"the node was originally created with.");
}

NDRDesc = sycl::detail::NDRDescT{ExecutionRange};
NDRDesc = sycl::detail::v1::NDRDescT{ExecutionRange};
}
/// Update this node with the command-group from another node.
/// @param Other The other node to update, must be of the same node type.
Expand Down
2 changes: 1 addition & 1 deletion sycl/source/detail/handler_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ class handler_impl {
std::vector<detail::ArgDesc> MAssociatedAccesors;

/// Struct that encodes global size, local size, ...
detail::NDRDescT MNDRDesc;
detail::v1::NDRDescT MNDRDesc;

/// Type of the command group, e.g. kernel, fill. Can also encode version.
/// Use getType and setType methods to access this variable unless
Expand Down
6 changes: 3 additions & 3 deletions sycl/source/detail/program_manager/program_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -862,7 +862,7 @@ static void setSpecializationConstants(device_image_impl &InputImpl,
// its ref count incremented.
ur_program_handle_t ProgramManager::getBuiltURProgram(
context_impl &ContextImpl, device_impl &DeviceImpl,
KernelNameStrRefT KernelName, const NDRDescT &NDRDesc) {
KernelNameStrRefT KernelName, const detail::v1::NDRDescT &NDRDesc) {
device_impl *RootDevImpl;
ur_bool_t MustBuildOnSubdevice = true;

Expand Down Expand Up @@ -1124,7 +1124,7 @@ ur_program_handle_t ProgramManager::getBuiltURProgram(
FastKernelCacheValPtr ProgramManager::getOrCreateKernel(
context_impl &ContextImpl, device_impl &DeviceImpl,
KernelNameStrRefT KernelName,
KernelNameBasedCacheT *KernelNameBasedCachePtr, const NDRDescT &NDRDesc) {
KernelNameBasedCacheT *KernelNameBasedCachePtr, const detail::v1::NDRDescT &NDRDesc) {
if constexpr (DbgProgMgr > 0) {
std::cerr << ">>> ProgramManager::getOrCreateKernel(" << &ContextImpl
<< ", " << &DeviceImpl << ", " << KernelName << ")\n";
Expand Down Expand Up @@ -3593,7 +3593,7 @@ std::optional<sycl::exception> checkDevSupportJointMatrixMad(
std::optional<sycl::exception>
checkDevSupportDeviceRequirements(const device_impl &Dev,
const RTDeviceBinaryImage &Img,
const NDRDescT &NDRDesc) {
const detail::v1::NDRDescT &NDRDesc) {
auto getPropIt = [&Img](const std::string &PropName) {
auto &PropRange = Img.getDeviceRequirements();
RTDeviceBinaryImage::PropertyRange::ConstIterator PropIt = std::find_if(
Expand Down
6 changes: 3 additions & 3 deletions sycl/source/detail/program_manager/program_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ bool doesDevSupportDeviceRequirements(const device_impl &Dev,
std::optional<sycl::exception>
checkDevSupportDeviceRequirements(const device_impl &Dev,
const RTDeviceBinaryImage &BinImages,
const NDRDescT &NDRDesc = {});
const detail::v1::NDRDescT &NDRDesc = {});

bool doesImageTargetMatchDevice(const RTDeviceBinaryImage &Img,
const device_impl &DevImpl);
Expand Down Expand Up @@ -179,7 +179,7 @@ class ProgramManager {
ur_program_handle_t getBuiltURProgram(context_impl &ContextImpl,
device_impl &DeviceImpl,
KernelNameStrRefT KernelName,
const NDRDescT &NDRDesc = {});
const detail::v1::NDRDescT &NDRDesc = {});

/// Builds a program from a given set of images or retrieves that program from
/// cache.
Expand All @@ -201,7 +201,7 @@ class ProgramManager {
getOrCreateKernel(context_impl &ContextImpl, device_impl &DeviceImpl,
KernelNameStrRefT KernelName,
KernelNameBasedCacheT *KernelNameBasedCachePtr,
const NDRDescT &NDRDesc = {});
const detail::v1::NDRDescT &NDRDesc = {});

ur_kernel_handle_t getCachedMaterializedKernel(
KernelNameStrRefT KernelName,
Expand Down
Loading
Loading