1818#include < thrust/device_vector.h>
1919#include < thrust/host_vector.h>
2020
21- // for now, once finufft is demacroized we can test float
22- using T = double ;
23-
2421template <typename T, typename V> bool equal (V *d_vec, T *cpu, const std::size_t size) {
2522 // copy d_vec to cpu
2623 thrust::host_vector<T> h_vec (size);
@@ -75,10 +72,10 @@ auto almost_equal(V *d_vec,
7572 assert (cudaMemcpy (h_vec.data (), d_vec, size * sizeof (T), cudaMemcpyDeviceToHost) ==
7673 cudaSuccess);
7774 // print h_vec and cpu
78- // for (std::size_t i = 0; i < size; ++i) {
79- // std::cout << "gpu[" << i << "]: " << h_vec[i] << " cpu[" << i << "]: " << cpu[i]
80- // << '\n';
81- // }
75+ for (std::size_t i = 0 ; i < size; ++i) {
76+ std::cout << " gpu[" << i << " ]: " << h_vec[i] << " cpu[" << i << " ]: " << cpu[i]
77+ << ' \n ' ;
78+ }
8279 std::cout << " relerrtwonorm: " << infnorm (h_vec.data (), cpu, size) << std::endl;
8380 // compare the l2 norm of the difference between the two vectors
8481 if (relerrtwonorm (h_vec.data (), cpu, size) < tol) {
@@ -88,32 +85,39 @@ auto almost_equal(V *d_vec,
8885}
8986
9087int main () {
88+ // for now, once finufft is demacroized we can test float
89+ using test_t = double ;
90+
9191 // defaults. tests should shadow them to override
9292 cufinufft_opts opts;
9393 cufinufft_default_opts (&opts);
94- opts.debug = 2 ;
94+ opts.debug = 2 ;
95+ opts.upsampfac = 1.25 ;
96+ opts.gpu_kerevalmeth = 1 ;
9597 // opts.gpu_sort = 0;
9698 finufft_opts fin_opts;
9799 finufft_default_opts (&fin_opts);
98100 fin_opts.debug = 2 ;
99101 fin_opts.spread_kerevalmeth = 1 ;
102+ fin_opts.upsampfac = 1.25 ;
100103 const int iflag = 1 ;
101104 const int ntransf = 1 ;
102105 const int dim = 3 ;
103106 const double tol = 1e-9 ;
104- const int N = 1023 ;
107+ const int n_modes[] = {10 , 5 , 3 };
108+ const int N = n_modes[0 ] * n_modes[1 ] * n_modes[2 ];
105109 const int M = 1000 ;
106110 const double bandwidth = 50.0 ;
107111
108- thrust::host_vector<T > x (M * ntransf), y (M * ntransf), z (M * ntransf), s (N * ntransf),
109- t (N * ntransf), u (N * ntransf);
110- thrust::host_vector<std::complex <T >> c (M * ntransf), fk (N * ntransf);
112+ thrust::host_vector<test_t > x (M * ntransf), y (M * ntransf), z (M * ntransf),
113+ s (N * ntransf), t (N * ntransf), u (N * ntransf);
114+ thrust::host_vector<std::complex <test_t >> c (M * ntransf), fk (N * ntransf);
111115
112- thrust::device_vector<T > d_x{}, d_y{}, d_z{}, d_s{}, d_t {}, d_u{};
113- thrust::device_vector<std::complex <T >> d_c (M * ntransf), d_fk (N * ntransf);
116+ thrust::device_vector<test_t > d_x{}, d_y{}, d_z{}, d_s{}, d_t {}, d_u{};
117+ thrust::device_vector<std::complex <test_t >> d_c (M * ntransf), d_fk (N * ntransf);
114118
115119 std::default_random_engine eng (42 );
116- std::uniform_real_distribution<T > dist11 (-1 , 1 );
120+ std::uniform_real_distribution<test_t > dist11 (-1 , 1 );
117121 auto rand_util_11 = [&eng, &dist11]() {
118122 return dist11 (eng);
119123 };
@@ -161,11 +165,12 @@ int main() {
161165 cudaDeviceSynchronize ();
162166
163167 const auto cpu_planer =
164- [iflag, tol, ntransf, dim, M, N, &x, &y, &z, &s, &t, &u, &fin_opts](
168+ [iflag, tol, ntransf, dim, M, N, n_modes, &x, &y, &z, &s, &t, &u, &fin_opts](
165169 const auto type) {
166170 finufft_plan_s *plan{nullptr };
167- assert (finufft_makeplan (
168- type, dim, nullptr , iflag, ntransf, tol, &plan, &fin_opts) == 0 );
171+ std::int64_t nl[] = {n_modes[0 ], n_modes[1 ], n_modes[2 ]};
172+ assert (
173+ finufft_makeplan (type, dim, nl, iflag, ntransf, tol, &plan, &fin_opts) == 0 );
169174 assert (finufft_setpts (plan, M, x.data (), y.data (), z.data (), N, s.data (),
170175 t.data (), u.data ()) == 0 );
171176 return plan;
@@ -204,6 +209,7 @@ int main() {
204209 deconv_tol,
205210 M,
206211 N,
212+ n_modes,
207213 &d_x,
208214 &d_y,
209215 &d_z,
@@ -219,8 +225,8 @@ int main() {
219225 using T = typename std::remove_pointer<decltype (plan)>::type::real_t ;
220226 const int type = 3 ;
221227 const auto cpu_plan = cpu_planer (type);
222- assert (cufinufft_makeplan_impl<T>(type, dim, nullptr , iflag, ntransf, T (tol), &plan ,
223- &opts) == 0 );
228+ assert (cufinufft_makeplan_impl<T>(type, dim, ( int *)n_modes , iflag, ntransf, T (tol),
229+ &plan, & opts) == 0 );
224230 assert (cufinufft_setpts_impl<T>(M, d_x.data ().get (), d_y.data ().get (),
225231 d_z.data ().get (), N, d_s.data ().get (),
226232 d_t .data ().get (), d_u.data ().get (), plan) == 0 );
@@ -245,6 +251,11 @@ int main() {
245251 assert (equal (plan->kz , cpu_plan->Z , M));
246252 assert (equal (plan->d_s , cpu_plan->Sp , N));
247253 assert (equal (plan->d_t , cpu_plan->Tp , N));
254+ assert (plan->spopts .nspread == cpu_plan->spopts .nspread );
255+ assert (plan->spopts .upsampfac == cpu_plan->spopts .upsampfac );
256+ assert (plan->spopts .ES_beta == cpu_plan->spopts .ES_beta );
257+ assert (plan->spopts .ES_halfwidth == cpu_plan->spopts .ES_halfwidth );
258+ assert (plan->spopts .ES_c == cpu_plan->spopts .ES_c );
248259 assert (equal (plan->d_u , cpu_plan->Up , N));
249260 // NOTE:seems with infnorm we are getting at most 11 digits of precision
250261 std::cout << " prephase :\n " ;
@@ -258,10 +269,10 @@ int main() {
258269 c[i].imag (randm11 ());
259270 }
260271 d_c = c;
261- for (int i = 0 ; i < N; i++) {
262- fk[i] = {- 100 , - 100 };
263- }
264- d_fk = fk;
272+ // for (int i = 0; i < N; i++) {
273+ // fk[i] = {randm11(), randm11() };
274+ // }
275+ // d_fk = fk;
265276 cufinufft_execute_impl (
266277 (cuda_complex<T> *)d_c.data ().get (), (cuda_complex<T> *)d_fk.data ().get (), plan);
267278 finufft_execute (cpu_plan, (std::complex <T> *)c.data (), (std::complex <T> *)fk.data ());
@@ -273,7 +284,7 @@ int main() {
273284 };
274285 // testing correctness of the plan creation
275286 // cufinufft_plan_t<float> *single_plan{nullptr};
276- cufinufft_plan_t <T > *double_plan{nullptr };
287+ cufinufft_plan_t <test_t > *double_plan{nullptr };
277288 // test_type1(double_plan);
278289 // test_type2(double_plan);
279290 test_type3 (double_plan);
0 commit comments