Skip to content

Commit 62d92ad

Browse files
authored
Merge pull request STEllAR-GROUP#6608 from STEllAR-GROUP/for_loop_simd
Partially support data parallel for_loop
2 parents b3d750f + 57d3fac commit 62d92ad

File tree

11 files changed

+297
-29
lines changed

11 files changed

+297
-29
lines changed

libs/core/algorithms/include/hpx/parallel/datapar/iterator_helpers.hpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#if defined(HPX_HAVE_DATAPAR)
1212
#include <hpx/assert.hpp>
1313
#include <hpx/execution/traits/vector_pack_alignment_size.hpp>
14+
#include <hpx/execution/traits/vector_pack_get_set.hpp>
1415
#include <hpx/execution/traits/vector_pack_load_store.hpp>
1516
#include <hpx/execution/traits/vector_pack_type.hpp>
1617
#include <hpx/functional/detail/invoke.hpp>
@@ -133,6 +134,31 @@ namespace hpx::parallel::util::detail {
133134
}
134135
};
135136

137+
template <typename I>
138+
struct datapar_loop_step<I, std::enable_if_t<std::is_integral_v<I>>>
139+
{
140+
using V1 = traits::vector_pack_type_t<I, 1>;
141+
using V = traits::vector_pack_type_t<I>;
142+
143+
template <typename F>
144+
HPX_HOST_DEVICE HPX_FORCEINLINE static constexpr void call1(F&& f, I& i)
145+
{
146+
V1 tmp(i);
147+
HPX_INVOKE(f, tmp);
148+
++i;
149+
}
150+
151+
template <typename F>
152+
HPX_HOST_DEVICE HPX_FORCEINLINE static constexpr void callv(F&& f, I& i)
153+
{
154+
V tmp;
155+
for (std::size_t e = 0; e != traits::size(tmp); ++e)
156+
traits::set(tmp, e, static_cast<I>(i + e));
157+
HPX_INVOKE(f, tmp);
158+
i += traits::vector_pack_size_v<V>;
159+
}
160+
};
161+
136162
///////////////////////////////////////////////////////////////////////////
137163
template <typename Iter, typename Enable = void>
138164
struct datapar_loop_pred_step

libs/core/algorithms/include/hpx/parallel/datapar/loop.hpp

Lines changed: 67 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright (c) 2007-2023 Hartmut Kaiser
1+
// Copyright (c) 2007-2025 Hartmut Kaiser
22
//
33
// SPDX-License-Identifier: BSL-1.0
44
// Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -15,6 +15,7 @@
1515
#include <hpx/execution/traits/vector_pack_load_store.hpp>
1616
#include <hpx/execution/traits/vector_pack_type.hpp>
1717
#include <hpx/executors/datapar/execution_policy.hpp>
18+
#include <hpx/iterator_support/traits/is_iterator.hpp>
1819
#include <hpx/parallel/datapar/iterator_helpers.hpp>
1920
#include <hpx/parallel/util/loop.hpp>
2021

@@ -238,8 +239,12 @@ namespace hpx::parallel::util {
238239
};
239240

240241
///////////////////////////////////////////////////////////////////////
242+
template <typename Iterator, typename Enable = void>
243+
struct datapar_loop_n;
244+
241245
template <typename Iterator>
242-
struct datapar_loop_n
246+
struct datapar_loop_n<Iterator,
247+
std::enable_if_t<hpx::traits::is_iterator_v<Iterator>>>
243248
{
244249
using iterator_type = std::decay_t<Iterator>;
245250
using value_type =
@@ -258,8 +263,9 @@ namespace hpx::parallel::util {
258263
{
259264
std::size_t len = count;
260265

266+
// clang-format off
261267
for (/* */; !detail::is_data_aligned(first) && len != 0;
262-
--len)
268+
--len)
263269
{
264270
datapar_loop_step<InIter>::call1(f, first);
265271
}
@@ -268,16 +274,18 @@ namespace hpx::parallel::util {
268274

269275
for (auto len_v =
270276
static_cast<std::int64_t>(len - (size + 1));
271-
len_v > 0;
272-
len_v -= static_cast<std::int64_t>(size), len -= size)
277+
len_v > 0;
278+
len_v -= static_cast<std::int64_t>(size), len -= size)
273279
{
274280
datapar_loop_step<InIter>::callv(f, first);
275281
}
282+
// clang-format on
276283

277284
for (/* */; len != 0; --len)
278285
{
279286
datapar_loop_step<InIter>::call1(f, first);
280287
}
288+
281289
return first;
282290
}
283291
else
@@ -302,6 +310,51 @@ namespace hpx::parallel::util {
302310
}
303311
};
304312

313+
template <typename I>
314+
struct datapar_loop_n<I, std::enable_if_t<std::is_integral_v<I>>>
315+
{
316+
using V = traits::vector_pack_type_t<I>;
317+
318+
template <typename Iter, typename F>
319+
HPX_HOST_DEVICE HPX_FORCEINLINE static constexpr Iter call(
320+
Iter first, std::size_t count, F&& f)
321+
{
322+
std::size_t len = count;
323+
constexpr std::size_t size = traits::vector_pack_size_v<V>;
324+
325+
for (size_t i = first % size; i != 0 && len != 0; --i, --len)
326+
{
327+
datapar_loop_step<Iter>::call1(f, first);
328+
}
329+
330+
// clang-format off
331+
for (auto len_v = static_cast<std::int64_t>(len - (size + 1));
332+
len_v > 0;
333+
len_v -= static_cast<std::int64_t>(size), len -= size)
334+
{
335+
datapar_loop_step<Iter>::callv(f, first);
336+
}
337+
// clang-format on
338+
339+
for (/* */; len != 0; --len)
340+
{
341+
datapar_loop_step<Iter>::call1(f, first);
342+
}
343+
return first;
344+
}
345+
346+
template <typename Iter, typename CancelToken, typename F>
347+
HPX_HOST_DEVICE HPX_FORCEINLINE static constexpr Iter call(
348+
Iter first, std::size_t count, CancelToken& tok, F&& f)
349+
{
350+
// check at the start of a partition only
351+
if (tok.was_cancelled())
352+
return first;
353+
354+
return call(first, count, HPX_FORWARD(F, f));
355+
}
356+
};
357+
305358
///////////////////////////////////////////////////////////////////////
306359
template <typename Iterator>
307360
struct datapar_loop_n_ind
@@ -323,8 +376,9 @@ namespace hpx::parallel::util {
323376
{
324377
std::size_t len = count;
325378

379+
// clang-format off
326380
for (/* */; !detail::is_data_aligned(first) && len != 0;
327-
--len)
381+
--len)
328382
{
329383
datapar_loop_step_ind<InIter>::call1(f, first);
330384
}
@@ -333,11 +387,12 @@ namespace hpx::parallel::util {
333387

334388
for (auto len_v =
335389
static_cast<std::int64_t>(len - (size + 1));
336-
len_v > 0;
337-
len_v -= static_cast<std::int64_t>(size), len -= size)
390+
len_v > 0;
391+
len_v -= static_cast<std::int64_t>(size), len -= size)
338392
{
339393
datapar_loop_step_ind<InIter>::callv(f, first);
340394
}
395+
// clang-format on
341396

342397
for (/* */; len != 0; --len)
343398
{
@@ -381,14 +436,16 @@ namespace hpx::parallel::util {
381436

382437
constexpr std::size_t size = traits::vector_pack_size_v<V>;
383438

439+
// clang-format off
384440
for (auto len_v = static_cast<std::int64_t>(len - (size + 1));
385-
len_v > 0;
386-
len_v -= static_cast<std::int64_t>(size), len -= size)
441+
len_v > 0;
442+
len_v -= static_cast<std::int64_t>(size), len -= size)
387443
{
388444
datapar_loop_idx_step<Iter>::callv(f, it, base_idx);
389445
std::advance(it, size);
390446
base_idx += size;
391447
}
448+
// clang-format on
392449

393450
for (/* */; len != 0; --len)
394451
{

libs/core/algorithms/tests/unit/datapar_algorithms/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ if(HPX_WITH_DATAPAR)
2929
foreach_datapar
3030
foreach_datapar_zipiter
3131
foreachn_datapar
32+
for_loop_datapar
3233
generate_datapar
3334
generaten_datapar
3435
mismatch_binary_datapar
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
// Copyright (c) 2016-2025 Hartmut Kaiser
2+
//
3+
// SPDX-License-Identifier: BSL-1.0
4+
// Distributed under the Boost Software License, Version 1.0. (See accompanying
5+
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6+
7+
#include <hpx/algorithm.hpp>
8+
#include <hpx/datapar.hpp>
9+
#include <hpx/init.hpp>
10+
#include <hpx/modules/testing.hpp>
11+
12+
#include <algorithm>
13+
#include <cstddef>
14+
#include <iostream>
15+
#include <numeric>
16+
#include <random>
17+
#include <string>
18+
#include <utility>
19+
#include <vector>
20+
21+
///////////////////////////////////////////////////////////////////////////////
22+
unsigned int seed = std::random_device{}();
23+
std::mt19937 gen(seed);
24+
25+
///////////////////////////////////////////////////////////////////////////////
26+
template <typename ExPolicy>
27+
void test_for_loop_idx(ExPolicy&& policy)
28+
{
29+
static_assert(hpx::is_execution_policy_v<ExPolicy>,
30+
"hpx::is_execution_policy_v<ExPolicy>");
31+
32+
std::vector<std::size_t> c(10007);
33+
std::iota(std::begin(c), std::end(c), gen());
34+
35+
hpx::experimental::for_loop(
36+
std::forward<ExPolicy>(policy), 0, int(c.size()), [&c](auto i) {
37+
for (std::size_t e = 0; e < hpx::parallel::traits::size(i); ++e)
38+
c[hpx::parallel::traits::get(i, e)] = 42;
39+
});
40+
41+
// verify values
42+
std::size_t count = 0;
43+
std::for_each(std::begin(c), std::end(c), [&count](std::size_t v) -> void {
44+
HPX_TEST_EQ(v, std::size_t(42));
45+
++count;
46+
});
47+
HPX_TEST_EQ(count, c.size());
48+
}
49+
50+
template <typename ExPolicy>
51+
void test_for_loop_idx_async(ExPolicy&& p)
52+
{
53+
std::vector<std::size_t> c(10007);
54+
std::iota(std::begin(c), std::end(c), gen());
55+
56+
auto f = hpx::experimental::for_loop(
57+
std::forward<ExPolicy>(p), 0, int(c.size()), [&c](auto i) {
58+
for (std::size_t e = 0; e < hpx::parallel::traits::size(i); ++e)
59+
c[hpx::parallel::traits::get(i, e)] = 42;
60+
});
61+
f.wait();
62+
63+
// verify values
64+
std::size_t count = 0;
65+
std::for_each(std::begin(c), std::end(c), [&count](std::size_t v) -> void {
66+
HPX_TEST_EQ(v, std::size_t(42));
67+
++count;
68+
});
69+
HPX_TEST_EQ(count, c.size());
70+
}
71+
72+
void for_loop_test_idx()
73+
{
74+
using namespace hpx::execution;
75+
76+
test_for_loop_idx(simd);
77+
test_for_loop_idx(par_simd);
78+
79+
test_for_loop_idx_async(simd(task));
80+
test_for_loop_idx_async(par_simd(task));
81+
}
82+
83+
///////////////////////////////////////////////////////////////////////////////
84+
int hpx_main(hpx::program_options::variables_map& vm)
85+
{
86+
if (vm.count("seed"))
87+
seed = vm["seed"].as<unsigned int>();
88+
89+
std::cout << "using seed: " << seed << std::endl;
90+
gen.seed(seed);
91+
92+
for_loop_test_idx();
93+
94+
return hpx::local::finalize();
95+
}
96+
97+
int main(int argc, char* argv[])
98+
{
99+
// add command line option which controls the random number generator seed
100+
using namespace hpx::program_options;
101+
options_description desc_commandline(
102+
"Usage: " HPX_APPLICATION_STRING " [options]");
103+
104+
desc_commandline.add_options()("seed,s", value<unsigned int>(),
105+
"the random number generator seed to use for this run");
106+
107+
// By default this test should run on all available cores
108+
std::vector<std::string> const cfg = {"hpx.os_threads=all"};
109+
110+
// Initialize and run HPX
111+
hpx::local::init_params init_args;
112+
init_args.desc_cmdline = desc_commandline;
113+
init_args.cfg = cfg;
114+
115+
HPX_TEST_EQ_MSG(hpx::local::init(hpx_main, argc, argv, init_args), 0,
116+
"HPX main exited with non-zero status");
117+
118+
return hpx::util::report_errors();
119+
}

libs/core/execution/include/hpx/execution/traits/detail/eve/vector_pack_get_set.hpp

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,25 +9,50 @@
99
#include <hpx/config.hpp>
1010

1111
#if defined(HPX_HAVE_DATAPAR_EVE)
12+
13+
#include <hpx/assert.hpp>
14+
#include <hpx/concepts/concepts.hpp>
15+
#include <hpx/execution/traits/detail/simd/vector_pack_simd.hpp>
16+
#include <hpx/execution/traits/vector_pack_alignment_size.hpp>
17+
1218
#include <cstddef>
1319

1420
namespace hpx::parallel::traits {
1521

1622
///////////////////////////////////////////////////////////////////////
17-
template <typename Vector>
23+
template <typename Vector, HPX_CONCEPT_REQUIRES_(is_vector_pack_v<Vector>)>
1824
HPX_HOST_DEVICE HPX_FORCEINLINE auto get(
1925
Vector& vec, std::size_t index) noexcept
2026
{
2127
return vec.get(index);
2228
}
2329

30+
template <typename Scalar,
31+
HPX_CONCEPT_REQUIRES_(is_scalar_vector_pack_v<Scalar>)>
32+
HPX_HOST_DEVICE HPX_FORCEINLINE auto get(
33+
Scalar& sc, [[maybe_unused]] std::size_t index) noexcept
34+
{
35+
HPX_ASSERT(index == 0);
36+
return sc;
37+
}
38+
2439
///////////////////////////////////////////////////////////////////////
25-
template <typename Vector, typename T>
40+
template <typename Vector, typename T,
41+
HPX_CONCEPT_REQUIRES_(is_vector_pack_v<Vector>)>
2642
HPX_HOST_DEVICE HPX_FORCEINLINE auto set(
2743
Vector& vec, std::size_t index, T val) noexcept
2844
{
2945
vec.set(index, val);
3046
}
47+
48+
template <typename Scalar, typename T,
49+
HPX_CONCEPT_REQUIRES_(is_scalar_vector_pack_v<Scalar>)>
50+
HPX_HOST_DEVICE HPX_FORCEINLINE auto set(
51+
Scalar& sc, [[maybe_unused]] std::size_t index, T val) noexcept
52+
{
53+
HPX_ASSERT(index == 0);
54+
sc = val;
55+
}
3156
} // namespace hpx::parallel::traits
3257

3358
#endif

0 commit comments

Comments
 (0)