diff --git a/src/opencl/cl_algo_registry.cpp b/src/opencl/cl_algo_registry.cpp index 65726579b..8905e089b 100644 --- a/src/opencl/cl_algo_registry.cpp +++ b/src/opencl/cl_algo_registry.cpp @@ -30,6 +30,7 @@ #include #include +#include #include #include #include @@ -37,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -93,6 +95,16 @@ namespace spla { g_registry->add(MAKE_KEY_CL_0("mxmT_masked", INT), std::make_shared>()); g_registry->add(MAKE_KEY_CL_0("mxmT_masked", UINT), std::make_shared>()); g_registry->add(MAKE_KEY_CL_0("mxmT_masked", FLOAT), std::make_shared>()); + + // algorthm m_extract_row + g_registry->add(MAKE_KEY_CL_0("m_extract_row", INT), std::make_shared>()); + g_registry->add(MAKE_KEY_CL_0("m_extract_row", UINT), std::make_shared>()); + g_registry->add(MAKE_KEY_CL_0("m_extract_row", FLOAT), std::make_shared>()); + + // algorthm v_emult + g_registry->add(MAKE_KEY_CL_0("v_emult", INT), std::make_shared>()); + g_registry->add(MAKE_KEY_CL_0("v_emult", UINT), std::make_shared>()); + g_registry->add(MAKE_KEY_CL_0("v_emult", FLOAT), std::make_shared>()); } }// namespace spla diff --git a/src/opencl/cl_m_extract_row.hpp b/src/opencl/cl_m_extract_row.hpp new file mode 100644 index 000000000..5a8f3b59b --- /dev/null +++ b/src/opencl/cl_m_extract_row.hpp @@ -0,0 +1,120 @@ +/**********************************************************************************/ +/* This file is part of spla project */ +/* https://github.com/SparseLinearAlgebra/spla */ +/**********************************************************************************/ +/* MIT License */ +/* */ +/* Copyright (c) 2025 SparseLinearAlgebra */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining a copy */ +/* of this software and associated documentation files (the "Software"), to deal */ +/* in the Software without restriction, including without limitation the rights */ +/* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell */ +/* copies of the Software, and to permit persons to whom the Software is */ +/* furnished to do so, subject to the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be included in all */ +/* copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR */ +/* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, */ +/* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE */ +/* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER */ +/* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, */ +/* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE */ +/* SOFTWARE. */ +/**********************************************************************************/ + +#ifndef SPLA_CL_M_EXTRACT_ROW_HPP +#define SPLA_CL_M_EXTRACT_ROW_HPP + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace spla { + + template + class Algo_m_extract_row_cl final : public RegistryAlgo { + public: + ~Algo_m_extract_row_cl() override = default; + + std::string get_name() override { + return "m_extract_row"; + } + + std::string get_description() override { + return "opencl extract row from matrix"; + } + + Status execute(const DispatchContext& ctx) override { + auto t = ctx.task.template cast_safe(); + + ref_ptr> r = t->r.template cast_safe>(); + ref_ptr> M = t->M.template cast_safe>(); + auto op_apply = t->op_apply.template cast_safe>(); + + r->validate_wd(FormatVector::AccDense); + M->validate_rw(FormatMatrix::AccCsr); + + auto* p_cl_r = r->template get>(); + auto* p_cl_M = M->template get>(); + auto* p_cl_acc = get_acc_cl(); + auto& queue = p_cl_acc->get_queue_default(); + + // get the row boundaries from M->Ap + uint row_bounds[2]; + cl::Buffer cl_row_bounds(p_cl_acc->get_context(), + CL_MEM_READ_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_USE_HOST_PTR, + sizeof(row_bounds), row_bounds); + + queue.enqueueCopyBuffer(p_cl_M->Ap, cl_row_bounds, t->index * sizeof(uint), 0, sizeof(row_bounds)); + queue.finish(); + + std::shared_ptr program; + ensure_kernel(op_apply, program); + + auto kernel = program->make_kernel("extract_row"); + kernel.setArg(0, p_cl_r->Ax); + kernel.setArg(1, p_cl_M->Ax); + kernel.setArg(2, p_cl_M->Aj); + kernel.setArg(3, row_bounds[1]); + + // amount of elements in the row + const uint n = row_bounds[1] - row_bounds[0] - 1; + + cl::NDRange global(p_cl_acc->get_default_wgs() * div_up_clamp(n, p_cl_acc->get_default_wgs(), 1u, 1024u)); + cl::NDRange local(p_cl_acc->get_default_wgs()); + queue.enqueueNDRangeKernel(kernel, cl::NDRange(row_bounds[0]), global, local); + + return Status::Ok; + } + + private: + void ensure_kernel(const ref_ptr>& op_apply, std::shared_ptr& program) { + CLProgramBuilder program_builder; + program_builder + .set_name("m_extract_row") + .add_type("TYPE", get_ttype().template as()) + .add_op("OP_APPLY", op_apply.template as()) + .set_source(source_m_extract_row) + .acquire(); + program = program_builder.get_program(); + } + }; + +}// namespace spla + +#endif//SPLA_CL_M_EXTRACT_ROW_HPP diff --git a/src/opencl/cl_v_emult.hpp b/src/opencl/cl_v_emult.hpp new file mode 100644 index 000000000..1edb91b09 --- /dev/null +++ b/src/opencl/cl_v_emult.hpp @@ -0,0 +1,128 @@ +/**********************************************************************************/ +/* This file is part of spla project */ +/* https://github.com/JetBrains-Research/spla */ +/**********************************************************************************/ +/* MIT License */ +/* */ +/* Copyright (c) 2025 SparseLinearAlgebra */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining a copy */ +/* of this software and associated documentation files (the "Software"), to deal */ +/* in the Software without restriction, including without limitation the rights */ +/* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell */ +/* copies of the Software, and to permit persons to whom the Software is */ +/* furnished to do so, subject to the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be included in all */ +/* copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR */ +/* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, */ +/* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE */ +/* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER */ +/* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, */ +/* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE */ +/* SOFTWARE. */ +/**********************************************************************************/ + +#ifndef SPLA_CL_VECTOR_EMULT_HPP +#define SPLA_CL_VECTOR_EMULT_HPP + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace spla { + + template + class Algo_v_emult_cl final : public RegistryAlgo { + public: + ~Algo_v_emult_cl() override = default; + + std::string get_name() override { + return "v_emult"; + } + + std::string get_description() override { + return "parallel vector element-wise mult on opencl device"; + } + + Status execute(const DispatchContext& ctx) override { + auto t = ctx.task.template cast_safe(); + ref_ptr> u = t->u.template cast_safe>(); + ref_ptr> v = t->v.template cast_safe>(); + + if (u->is_valid(FormatVector::AccDense) && v->is_valid(FormatVector::AccDense)) { + return execute_dn2dn(ctx); + } + + return execute_dn2dn(ctx); + } + + private: + Status execute_dn2dn(const DispatchContext& ctx) { + TIME_PROFILE_SCOPE("cl/vector_emult_dn2dn"); + + auto t = ctx.task.template cast_safe(); + ref_ptr> r = t->r.template cast_safe>(); + ref_ptr> u = t->u.template cast_safe>(); + ref_ptr> v = t->v.template cast_safe>(); + ref_ptr> op = t->op.template cast_safe>(); + + std::shared_ptr program; + if (!ensure_kernel(op, program)) return Status::CompilationError; + + r->validate_wd(FormatVector::AccDense); + u->validate_rw(FormatVector::AccDense); + v->validate_rw(FormatVector::AccDense); + + auto* p_cl_r = r->template get>(); + const auto* p_cl_u = u->template get>(); + const auto* p_cl_v = v->template get>(); + auto* p_cl_acc = get_acc_cl(); + auto& queue = p_cl_acc->get_queue_default(); + + const uint n = r->get_n_rows(); + + auto kernel = program->make_kernel("dense_to_dense"); + kernel.setArg(0, p_cl_r->Ax); + kernel.setArg(1, p_cl_u->Ax); + kernel.setArg(2, p_cl_v->Ax); + kernel.setArg(3, n); + kernel.setArg(4, r->get_fill_value()); + + cl::NDRange global(p_cl_acc->get_default_wgs() * div_up_clamp(n, p_cl_acc->get_default_wgs(), 1u, 1024u)); + cl::NDRange local(p_cl_acc->get_default_wgs()); + queue.enqueueNDRangeKernel(kernel, cl::NullRange, global, local); + + return Status::Ok; + } + + bool ensure_kernel(const ref_ptr>& op, std::shared_ptr& program) { + CLProgramBuilder program_builder; + program_builder + .set_name("vector_emult") + .add_type("TYPE", get_ttype().template as()) + .add_op("OP_BINARY", op.template as()) + .set_source(source_vector_emult) + .acquire(); + + program = program_builder.get_program(); + + return true; + } + }; + +}// namespace spla + +#endif diff --git a/src/opencl/generated/auto_m_extract_row.hpp b/src/opencl/generated/auto_m_extract_row.hpp new file mode 100644 index 000000000..3a83a96bc --- /dev/null +++ b/src/opencl/generated/auto_m_extract_row.hpp @@ -0,0 +1,23 @@ +//////////////////////////////////////////////////////////////////// +// Copyright (c) 2021 - 2025 SparseLinearAlgebra +// Autogenerated file, do not modify +//////////////////////////////////////////////////////////////////// + +#pragma once + +static const char source_m_extract_row[] = R"( + + +__kernel void extract_row(__global TYPE* g_rx, + __global const TYPE* g_Ax, + __global const uint* g_Aj, + const uint n) { + const uint gid = get_global_id(0); + const uint gsize = get_global_size(0); + + for (uint i = gid; i < n; i += gsize) { + g_rx[g_Aj[i]] = OP_APPLY(g_Ax[i]); + } +} + +)"; \ No newline at end of file diff --git a/src/opencl/generated/auto_vector_emult.hpp b/src/opencl/generated/auto_vector_emult.hpp new file mode 100644 index 000000000..598af5223 --- /dev/null +++ b/src/opencl/generated/auto_vector_emult.hpp @@ -0,0 +1,27 @@ +//////////////////////////////////////////////////////////////////// +// Copyright (c) 2021 - 2025 SparseLinearAlgebra +// Autogenerated file, do not modify +//////////////////////////////////////////////////////////////////// + +#pragma once + +static const char source_vector_emult[] = R"( + + + +__kernel void dense_to_dense(__global TYPE* g_rx, + __global const TYPE* g_ux, + __global const TYPE* g_vx, + const uint n, + const TYPE fill_value) { + const uint gid = get_global_id(0); + const uint gsize = get_global_size(0); + + for (uint i = gid; i < n; i += gsize) { + TYPE u = g_ux[i]; + TYPE v = g_vx[i]; + g_rx[i] = u != fill_value && v != fill_value ? OP_BINARY(u, v) : fill_value; + } +} + +)"; \ No newline at end of file diff --git a/src/opencl/kernels/m_extract_row.cl b/src/opencl/kernels/m_extract_row.cl new file mode 100644 index 000000000..eef00eec7 --- /dev/null +++ b/src/opencl/kernels/m_extract_row.cl @@ -0,0 +1,40 @@ +/**********************************************************************************/ +/* This file is part of spla project */ +/* https://github.com/SparseLinearAlgebra/spla */ +/**********************************************************************************/ +/* MIT License */ +/* */ +/* Copyright (c) 2025 SparseLinearAlgebra */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining a copy */ +/* of this software and associated documentation files (the "Software"), to deal */ +/* in the Software without restriction, including without limitation the rights */ +/* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell */ +/* copies of the Software, and to permit persons to whom the Software is */ +/* furnished to do so, subject to the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be included in all */ +/* copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR */ +/* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, */ +/* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE */ +/* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER */ +/* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, */ +/* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE */ +/* SOFTWARE. */ +/**********************************************************************************/ + +#include "common_def.cl" + +__kernel void extract_row(__global TYPE* g_rx, + __global const TYPE* g_Ax, + __global const uint* g_Aj, + const uint n) { + const uint gid = get_global_id(0); + const uint gsize = get_global_size(0); + + for (uint i = gid; i < n; i += gsize) { + g_rx[g_Aj[i]] = OP_APPLY(g_Ax[i]); + } +} diff --git a/src/opencl/kernels/vector_emult.cl b/src/opencl/kernels/vector_emult.cl new file mode 100644 index 000000000..ae5873873 --- /dev/null +++ b/src/opencl/kernels/vector_emult.cl @@ -0,0 +1,44 @@ + +/**********************************************************************************/ +/* This file is part of spla project */ +/* https://github.com/SparseLinearAlgebra/spla */ +/**********************************************************************************/ +/* MIT License */ +/* */ +/* Copyright (c) 2023 SparseLinearAlgebra */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining a copy */ +/* of this software and associated documentation files (the "Software"), to deal */ +/* in the Software without restriction, including without limitation the rights */ +/* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell */ +/* copies of the Software, and to permit persons to whom the Software is */ +/* furnished to do so, subject to the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be included in all */ +/* copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR */ +/* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, */ +/* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE */ +/* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER */ +/* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, */ +/* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE */ +/* SOFTWARE. */ +/**********************************************************************************/ + +#include "common_def.cl" + +__kernel void dense_to_dense(__global TYPE* g_rx, + __global const TYPE* g_ux, + __global const TYPE* g_vx, + const uint n, + const TYPE fill_value) { + const uint gid = get_global_id(0); + const uint gsize = get_global_size(0); + + for (uint i = gid; i < n; i += gsize) { + TYPE u = g_ux[i]; + TYPE v = g_vx[i]; + g_rx[i] = u != fill_value && v != fill_value ? OP_BINARY(u, v) : fill_value; + } +}