Skip to content

Commit 7fbc700

Browse files
committed
Fix 740
1 parent 69a2788 commit 7fbc700

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+760
-696
lines changed

devel/foldrescale.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515
The macro wins hands-down on i7, even for modern GCC9.
1616
This should be done in C++ not as a macro, someday.
1717
*/
18+
19+
using finufft::common::INV_2PI;
20+
1821
#define FOLDRESCALE(x, N, p) \
1922
(p ? (x + (x >= -PI ? (x < PI ? PI : -PI) : 3 * PI)) * ((FLT)INV_2PI * N) \
2023
: (x >= 0.0 ? (x < (FLT)N ? x : x - (FLT)N) : x + (FLT)N))

docs/opts.rst

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -238,17 +238,15 @@ However, with FFTW as the FFT library, FINUFFT is thread safe so long as no othe
238238
#include <finufft.h>
239239
#include <omp.h>
240240

241-
using namespace std;
242-
243241
constexpr int N = 65384;
244242

245-
void locker(void *lck) { reinterpret_cast<recursive_mutex *>(lck)->lock(); }
246-
void unlocker(void *lck) { reinterpret_cast<recursive_mutex *>(lck)->unlock(); }
243+
void locker(void *lck) { reinterpret_cast<std::recursive_mutex *>(lck)->lock(); }
244+
void unlocker(void *lck) { reinterpret_cast<std::recursive_mutex *>(lck)->unlock(); }
247245
248246
int main() {
249247
int64_t Ns[3]; // guru describes mode array by vector [N1,N2..]
250248
Ns[0] = N;
251-
recursive_mutex lck;
249+
std::recursive_mutex lck;
252250

253251
finufft_opts opts;
254252
finufft_default_opts(&opts);
@@ -259,7 +257,7 @@ However, with FFTW as the FFT library, FINUFFT is thread safe so long as no othe
259257
opts.fftw_lock_data = reinterpret_cast<void *>(&lck);
260258
261259
// random nonuniform points (x) and complex strengths (c)
262-
vector<complex<double>> c(N);
260+
std::vector<std::complex<double>> c(N);
263261

264262
// init FFTW threads
265263
fftw_init_threads();
@@ -268,7 +266,7 @@ However, with FFTW as the FFT library, FINUFFT is thread safe so long as no othe
268266
#pragma omp parallel for
269267
for (int j = 0; j < 100; ++j) {
270268
// allocate output array for FFTW...
271-
vector<complex<double>> F1(N);
269+
std::vector<std::complex<double>> F1(N);
272270

273271
// FFTW plan
274272
lck.lock();

examples/cuda/example2d1many.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,12 +100,12 @@ int main(int argc, char *argv[])
100100
std::complex<float> Ft = std::complex<float>(0, 0),
101101
J = std::complex<float>(0, 1) * (float)iflag;
102102
for (auto j = 0UL; j < M; ++j)
103-
Ft += c[j + i * M] * exp(J * (nt1 * x[j] + nt2 * y[j])); // crude direct
103+
Ft += c[j + i * M] * std::exp(J * (nt1 * x[j] + nt2 * y[j])); // crude direct
104104
int it = N1 / 2 + nt1 + N1 * (N2 / 2 + nt2); // index in complex F as 1d array
105105
printf("[gpu %3d] one mode: abs err in F[%d,%d] is %.3g\n", i, nt1, nt2,
106-
abs(Ft - fk[it + i * N]));
106+
std::abs(Ft - fk[it + i * N]));
107107
printf("[gpu %3d] one mode: rel err in F[%d,%d] is %.3g\n", i, nt1, nt2,
108-
abs(Ft - fk[it + i * N]) / infnorm(N, fk + i * N));
108+
std::abs(Ft - fk[it + i * N]) / infnorm(N, fk + i * N));
109109
}
110110

111111
cudaFreeHost(x);

examples/cuda/example2d2many.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,10 +109,10 @@ int main(int argc, char *argv[])
109109
int m = 0;
110110
for (int m2 = -(N2 / 2); m2 <= (N2 - 1) / 2; ++m2) // loop in correct order over F
111111
for (int m1 = -(N1 / 2); m1 <= (N1 - 1) / 2; ++m1)
112-
ct += fkstart[m++] * exp(J * (m1 * x[jt] + m2 * y[jt])); // crude direct
112+
ct += fkstart[m++] * std::exp(J * (m1 * x[jt] + m2 * y[jt])); // crude direct
113113

114114
printf("[gpu %3d] one targ: rel err in c[%d] is %.3g\n", t, jt,
115-
abs(cstart[jt] - ct) / infnorm(M, c));
115+
std::abs(cstart[jt] - ct) / infnorm(M, c));
116116
}
117117

118118
cudaFreeHost(x);

examples/cuda/example2d3many.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,10 +112,10 @@ int main(int argc, char *argv[])
112112
int jt = N / 2; // check arbitrary choice of one targ pt
113113
std::complex<double> J(0, iflag * 1);
114114
std::complex<double> fkt(0, 0);
115-
for (int m = 0; m < M; m++) fkt += cstart[m] * exp(J * (s[jt] * x[m] + t[jt] * y[m]));
115+
for (int m = 0; m < M; m++) fkt += cstart[m] * std::exp(J * (s[jt] * x[m] + t[jt] * y[m]));
116116

117117
printf("[gpu %3d] one targ: rel err in c[%d] is %.3g\n", tr, jt,
118-
abs(fkstart[jt] - fkt) / infnorm(N, fkstart));
118+
std::abs(fkstart[jt] - fkt) / infnorm(N, fkstart));
119119
}
120120

121121
cudaFreeHost(x);

examples/cuda/getting_started.cpp

Lines changed: 37 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,19 @@
11
/*
2-
3-
Simple example of the 1D type-1 transform. To compile, run
4-
5-
nvcc -o getting_started getting_started.cpp -lcufinufft
6-
7-
followed by
8-
9-
./getting_started
10-
11-
with the necessary paths set if the library is not installed in the standard
12-
directories. If the library has been compiled in the standard way, this means
13-
14-
export CPATH="${CPATH:+${CPATH}:}../../include"
15-
export LIBRARY_PATH="${LIBRARY_PATH:+${LIBRARY_PATH}:}../../build"
16-
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH:+${LD_LIBRARY_PATH}:}../../build"
17-
18-
*/
19-
20-
#include <complex.h>
2+
Simple example of the 1D type-1 transform using std::complex.
3+
To compile:
4+
nvcc -o getting_started getting_started.cpp -lcufinufft
5+
*/
6+
7+
#include <cmath>
8+
#include <complex>
9+
#include <cstdio>
10+
#include <cstdlib>
2111
#include <cuComplex.h>
2212
#include <cuda_runtime.h>
2313
#include <cufinufft.h>
24-
#include <math.h>
25-
#include <stdio.h>
26-
#include <stdlib.h>
14+
#include <vector>
2715

28-
static const double PI = 3.141592653589793238462643383279502884;
16+
static constexpr double PI = 3.141592653589793238462643383279502884;
2917

3018
int main() {
3119
// Problem size: number of nonuniform points (M) and grid size (N).
@@ -35,9 +23,9 @@ int main() {
3523
int64_t modes[1] = {N};
3624

3725
// Host pointers: frequencies (x), coefficients (c), and output (f).
38-
float *x;
39-
float _Complex *c;
40-
float _Complex *f;
26+
std::vector<float> x(M);
27+
std::vector<std::complex<float>> c(M);
28+
std::vector<std::complex<float>> f(N);
4129

4230
// Device pointers.
4331
float *d_x;
@@ -48,71 +36,61 @@ int main() {
4836

4937
// Manual calculation at a single point idx.
5038
int idx;
51-
float _Complex f0;
52-
53-
// Allocate the host arrays.
54-
x = (float *)malloc(M * sizeof(float));
55-
c = (float _Complex *)malloc(M * sizeof(float _Complex));
56-
f = (float _Complex *)malloc(N * sizeof(float _Complex));
57-
58-
// Fill with random numbers. Frequencies must be in the interval [-pi, pi)
59-
// while strengths can be any value.
60-
srand(0);
39+
std::complex<float> f0;
6140

41+
// Fill with random numbers.
42+
std::srand(42);
6243
for (int j = 0; j < M; ++j) {
63-
x[j] = 2 * PI * (((float)rand()) / RAND_MAX - 1);
64-
c[j] =
65-
(2 * ((float)rand()) / RAND_MAX - 1) + I * (2 * ((float)rand()) / RAND_MAX - 1);
44+
x[j] = 2 * PI * ((float)std::rand() / RAND_MAX - 1);
45+
float re = 2 * ((float)std::rand()) / RAND_MAX - 1;
46+
float im = 2 * ((float)std::rand()) / RAND_MAX - 1;
47+
c[j] = std::complex<float>(re, im);
6648
}
6749

68-
// Allocate the device arrays and copy the x and c arrays.
50+
// Allocate the device arrays and copy x and c.
6951
cudaMalloc(&d_x, M * sizeof(float));
70-
cudaMalloc(&d_c, M * sizeof(float _Complex));
71-
cudaMalloc(&d_f, N * sizeof(float _Complex));
52+
cudaMalloc(&d_c, M * sizeof(cuFloatComplex));
53+
cudaMalloc(&d_f, N * sizeof(cuFloatComplex));
54+
55+
std::vector<cuFloatComplex> c_dev(M);
56+
for (int j = 0; j < M; ++j) c_dev[j] = make_cuFloatComplex(c[j].real(), c[j].imag());
7257

73-
cudaMemcpy(d_x, x, M * sizeof(float), cudaMemcpyHostToDevice);
74-
cudaMemcpy(d_c, c, M * sizeof(float _Complex), cudaMemcpyHostToDevice);
58+
cudaMemcpy(d_x, x.data(), M * sizeof(float), cudaMemcpyHostToDevice);
59+
cudaMemcpy(d_c, c_dev.data(), M * sizeof(cuFloatComplex), cudaMemcpyHostToDevice);
7560

76-
// Make the cufinufft plan for a 1D type-1 transform with six digits of
77-
// tolerance.
78-
cufinufftf_makeplan(1, 1, modes, 1, 1, 1e-6, &plan, NULL);
61+
// Make the cufinufft plan for 1D type-1 transform.
62+
cufinufftf_makeplan(1, 1, modes, 1, 1, 1e-6, &plan, nullptr);
7963

8064
// Set the frequencies of the nonuniform points.
81-
cufinufftf_setpts(plan, M, d_x, NULL, NULL, 0, NULL, NULL, NULL);
65+
cufinufftf_setpts(plan, M, d_x, nullptr, nullptr, 0, nullptr, nullptr, nullptr);
8266

8367
// Actually execute the plan on the given coefficients and store the result
8468
// in the d_f array.
8569
cufinufftf_execute(plan, d_c, d_f);
8670

8771
// Copy the result back onto the host.
88-
cudaMemcpy(f, d_f, N * sizeof(float _Complex), cudaMemcpyDeviceToHost);
72+
cudaMemcpy(f.data(), d_f, N * sizeof(cuFloatComplex), cudaMemcpyDeviceToHost);
8973

9074
// Destroy the plan and free the device arrays after we're done.
9175
cufinufftf_destroy(plan);
92-
9376
cudaFree(d_x);
9477
cudaFree(d_c);
9578
cudaFree(d_f);
9679

9780
// Pick an index to check the result of the calculation.
9881
idx = 4 * N / 7;
99-
100-
printf("f[%d] = %lf + %lfi\n", idx, crealf(f[idx]), cimagf(f[idx]));
82+
printf("f[%d] = %lf + %lfi\n", idx, std::real(f[idx]), std::imag(f[idx]));
10183

10284
// Calculate the result manually using the formula for the type-1
10385
// transform.
10486
f0 = 0;
10587

88+
std::complex<float> I(-0.0, 1.0);
10689
for (int j = 0; j < M; ++j) {
107-
f0 += c[j] * cexp(I * x[j] * (idx - N / 2));
90+
f0 += c[j] * std::exp(I * x[j] * float(idx - N / 2));
10891
}
10992

110-
printf("f0[%d] = %lf + %lfi\n", idx, crealf(f0), cimagf(f0));
111-
112-
// Finally free the host arrays.
113-
free(x);
114-
free(c);
115-
free(f);
93+
printf("f0[%d] = %lf + %lfi\n", idx, std::real(f0), std::imag(f0));
11694

11795
return 0;
11896
}

examples/guru1d1.cpp

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,13 @@
44

55
// specific to this example...
66
#include <cassert>
7-
#include <math.h>
8-
#include <stdio.h>
9-
#include <stdlib.h>
7+
#include <cmath>
8+
#include <cstdint>
9+
#include <cstdio>
10+
#include <cstdlib>
1011
#include <vector>
1112

12-
// only good for small projects...
13-
using namespace std;
14-
1513
static const double PI = 3.141592653589793238462643383279502884;
16-
// allows 1i to be the imaginary unit... (C++14 onwards)
17-
using namespace std::complex_literals;
1814

1915
int main(int argc, char *argv[])
2016
/* Example calling guru C++ interface to FINUFFT library, passing
@@ -42,43 +38,47 @@ int main(int argc, char *argv[])
4238
} else // or, NULL here means use default opts...
4339
finufft_makeplan(type, dim, Ns, +1, ntransf, tol, &plan, NULL);
4440

41+
const std::complex<double> I(0.0, 1.0);
42+
4543
// generate some random nonuniform points
46-
vector<double> x(M);
44+
std::vector<double> x(M);
4745
for (int j = 0; j < M; ++j)
48-
x[j] = PI * (2 * ((double)rand() / RAND_MAX) - 1); // uniform random in [-pi,pi)
46+
x[j] = PI * (2 * ((double)std::rand() / RAND_MAX) - 1); // uniform random in [-pi,pi)
4947
// note FINUFFT doesn't use std::vector types, so we need to make a pointer...
5048
finufft_setpts(plan, M, x.data(), NULL, NULL, 0, NULL, NULL, NULL);
5149

5250
// generate some complex strengths
53-
vector<complex<double>> c(M);
51+
std::vector<std::complex<double>> c(M);
5452
for (int j = 0; j < M; ++j)
55-
c[j] =
56-
2 * ((double)rand() / RAND_MAX) - 1 + 1i * (2 * ((double)rand() / RAND_MAX) - 1);
53+
c[j] = 2 * ((double)std::rand() / RAND_MAX) - 1 +
54+
std::complex<double>(0.0, 1.0) *
55+
(2 * ((double)std::rand() / RAND_MAX) - 1);
5756

5857
// alloc output array for the Fourier modes, then do the transform
59-
vector<complex<double>> F(N);
58+
std::vector<std::complex<double>> F(N);
6059
int ier = finufft_execute(plan, c.data(), F.data());
6160

6261
// for fun, do another with same NU pts (no re-sorting), but new strengths...
6362
for (int j = 0; j < M; ++j)
64-
c[j] =
65-
2 * ((double)rand() / RAND_MAX) - 1 + 1i * (2 * ((double)rand() / RAND_MAX) - 1);
63+
c[j] = 2 * ((double)std::rand() / RAND_MAX) - 1 +
64+
std::complex<double>(0.0, 1.0) *
65+
(2 * ((double)std::rand() / RAND_MAX) - 1);
6666
ier = finufft_execute(plan, c.data(), F.data());
6767

6868
finufft_destroy(plan); // don't forget! done with transforms of this size
6969

7070
// rest is math checking and reporting...
7171
int n = 142519; // check the answer just for this mode
7272
assert(n >= -(double)N / 2 && n < (double)N / 2); // ensure meaningful test
73-
complex<double> Ftest = complex<double>(0, 0);
74-
for (int j = 0; j < M; ++j) Ftest += c[j] * exp(1i * (double)n * x[j]);
73+
std::complex<double> Ftest(0, 0);
74+
for (int j = 0; j < M; ++j) Ftest += c[j] * std::exp(I * (double)n * x[j]);
7575
int nout = n + N / 2; // index in output array for freq mode n
7676
double Fmax = 0.0; // compute inf norm of F
7777
for (int m = 0; m < N; ++m) {
78-
double aF = abs(F[m]);
78+
double aF = std::abs(F[m]);
7979
if (aF > Fmax) Fmax = aF;
8080
}
81-
double err = abs(F[nout] - Ftest) / Fmax;
81+
double err = std::abs(F[nout] - Ftest) / Fmax;
8282
printf("guru 1D type-1 double-prec NUFFT done. ier=%d, rel err in F[%d] is %.3g\n", ier,
8383
n, err);
8484

0 commit comments

Comments
 (0)