Skip to content

Commit b4b8a10

Browse files
authored
Merge pull request #758 from DiamonDinoia/feature/hint-nj-density
Using density to determine upsampfact, deferring to setpts when auto Now type3 upsampfact may differs from inner type2 upsampfact if opts.upsampfac is set to 0. Previously type3 upsampfact is always same as the inner type2 upsampfact.
2 parents f7ef7fc + a9d3e57 commit b4b8a10

File tree

3 files changed

+171
-105
lines changed

3 files changed

+171
-105
lines changed

include/finufft/fft.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,15 @@ template<> struct Finufft_FFT_plan<float> {
7979
uint64_t nf = 1;
8080
for (auto i : dims) nf *= i;
8181
lock();
82+
// Destroy existing plans before creating new ones (handles re-planning)
83+
if (plan_) {
84+
fftwf_destroy_plan(plan_);
85+
plan_ = nullptr;
86+
}
87+
if (plan_adj_) {
88+
fftwf_destroy_plan(plan_adj_);
89+
plan_adj_ = nullptr;
90+
}
8291
#ifdef _OPENMP
8392
fftwf_plan_with_nthreads(nthreads);
8493
#endif
@@ -155,6 +164,15 @@ template<> struct Finufft_FFT_plan<double> {
155164
uint64_t nf = 1;
156165
for (auto i : dims) nf *= i;
157166
lock();
167+
// Destroy existing plans before creating new ones (handles re-planning)
168+
if (plan_) {
169+
fftw_destroy_plan(plan_);
170+
plan_ = nullptr;
171+
}
172+
if (plan_adj_) {
173+
fftw_destroy_plan(plan_adj_);
174+
plan_adj_ = nullptr;
175+
}
158176
#ifdef _OPENMP
159177
fftw_plan_with_nthreads(nthreads);
160178
#endif

include/finufft/finufft_core.h

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -84,14 +84,16 @@ template<typename TF> struct FINUFFT_PLAN_T { // the main plan class, fully C++
8484
int dim; // overall dimension: 1,2 or 3
8585

8686
private:
87-
int ntrans; // how many transforms to do at once (vector or "many" mode)
88-
BIGINT nj; // num of NU pts in type 1,2 (for type 3, num input x pts)
89-
BIGINT nk; // number of NU freq pts (type 3 only)
90-
TF tol; // relative user tolerance
91-
int batchSize; // # strength vectors to group together for FFTW, etc
92-
int nbatch; // how many batches done to cover all ntrans vectors
87+
int ntrans; // how many transforms to do at once (vector or "many" mode)
88+
BIGINT nj; // num of NU pts in type 1,2 (for type 3, num input x pts)
89+
BIGINT nk; // number of NU freq pts (type 3 only)
90+
TF tol; // relative user tolerance
91+
int batchSize; // # strength vectors to group together for FFTW, etc
92+
int nbatch; // how many batches done to cover all ntrans vectors
9393

94-
int nc = 0; // number of Horner coefficients used for ES kernel (<= MAX_NC)
94+
int nc = 0; // number of Horner coefficients used for ES kernel (<= MAX_NC)
95+
bool upsamp_locked = false; // true if user specified upsampfac != 0, prevents auto
96+
// update
9597

9698
public:
9799
std::array<BIGINT, 3> mstu; // number of modes in x,y,z directions
@@ -148,6 +150,10 @@ template<typename TF> struct FINUFFT_PLAN_T { // the main plan class, fully C++
148150

149151
void precompute_horner_coeffs();
150152

153+
// Helper to initialize spreader, phiHat (Fourier series), and FFT plan.
154+
// Used by constructor (when upsampfac given) and setpts (when upsampfac deferred).
155+
int initSpreadAndFFT();
156+
151157
public:
152158
FINUFFT_PLAN_T(int type, int dim, const BIGINT *n_modes, int iflag, int ntrans, TF tol,
153159
const finufft_opts *opts, int &ier);

src/finufft_core.cpp

Lines changed: 140 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -594,6 +594,88 @@ template<typename TF> void FINUFFT_PLAN_T<TF>::precompute_horner_coeffs() {
594594
}
595595
}
596596

597+
// Helper to initialize spreader, phiHat (Fourier series), and FFT plan.
598+
// Used by constructor (when upsampfac given) and setpts (when upsampfac deferred).
599+
// Returns 0 on success, or an error code if set_nf_type12 or alloc fails.
600+
template<typename TF> int FINUFFT_PLAN_T<TF>::initSpreadAndFFT() {
601+
CNTime timer{};
602+
spopts.spread_direction = type;
603+
constexpr TF EPSILON = std::numeric_limits<TF>::epsilon();
604+
605+
if (opts.spreadinterponly) { // (unusual case of no NUFFT, just report)
606+
// spreadinterp grid will simply be the user's "mode" grid...
607+
for (int idim = 0; idim < dim; ++idim) nfdim[idim] = mstu[idim];
608+
609+
if (opts.debug) { // "long long" here is to avoid warnings with printf...
610+
printf("[%s] %dd spreadinterponly(dir=%d): (ms,mt,mu)=(%lld,%lld,%lld)"
611+
"\n ntrans=%d nthr=%d batchSize=%d kernel width ns=%d",
612+
__func__, dim, type, (long long)mstu[0], (long long)mstu[1],
613+
(long long)mstu[2], ntrans, opts.nthreads, batchSize, spopts.nspread);
614+
if (batchSize == 1) // spread_thread has no effect in this case
615+
printf("\n");
616+
else
617+
printf(" spread_thread=%d\n", opts.spread_thread);
618+
}
619+
620+
} else { // ..... usual NUFFT: eval Fourier series, alloc workspace .....
621+
622+
if (opts.showwarn) { // user warn round-off error (due to prob condition #)...
623+
for (int idim = 0; idim < dim; ++idim)
624+
if (EPSILON * mstu[idim] > 1.0)
625+
fprintf(stderr,
626+
"%s warning: rounding err (due to cond # of prob) eps_mach*N%d = %.3g "
627+
"> 1 !\n",
628+
__func__, idim, (double)(EPSILON * mstu[idim]));
629+
}
630+
631+
// determine fine grid sizes, sanity check, then alloc...
632+
for (int idim = 0; idim < dim; ++idim) {
633+
int nfier = set_nf_type12(mstu[idim], opts, spopts, &nfdim[idim]);
634+
if (nfier) return nfier; // nf too big; we're done
635+
phiHat[idim].resize(nfdim[idim] / 2 + 1); // alloc fseries
636+
}
637+
638+
if (opts.debug) { // "long long" here is to avoid warnings with printf...
639+
printf("[%s] %dd%d: (ms,mt,mu)=(%lld,%lld,%lld) "
640+
"(nf1,nf2,nf3)=(%lld,%lld,%lld)\n ntrans=%d nthr=%d "
641+
"batchSize=%d ",
642+
__func__, dim, type, (long long)mstu[0], (long long)mstu[1],
643+
(long long)mstu[2], (long long)nfdim[0], (long long)nfdim[1],
644+
(long long)nfdim[2], ntrans, opts.nthreads, batchSize);
645+
if (batchSize == 1) // spread_thread has no effect in this case
646+
printf("\n");
647+
else
648+
printf(" spread_thread=%d\n", opts.spread_thread);
649+
}
650+
651+
// STEP 0: get Fourier coeffs of spreading kernel along each fine grid dim
652+
timer.restart();
653+
for (int idim = 0; idim < dim; ++idim)
654+
onedim_fseries_kernel(nfdim[idim], phiHat[idim], spopts, horner_coeffs.data(), nc);
655+
if (opts.debug)
656+
printf("[%s] kernel fser (ns=%d):\t\t%.3g s\n", __func__, spopts.nspread,
657+
timer.elapsedsec());
658+
659+
if (nf() * batchSize > MAX_NF) {
660+
fprintf(stderr,
661+
"[%s] fwBatch would be bigger than MAX_NF, not attempting memory "
662+
"allocation!\n",
663+
__func__);
664+
return FINUFFT_ERR_MAXNALLOC;
665+
}
666+
667+
timer.restart(); // plan the FFTW (to act in-place on the workspace fwBatch)
668+
int nthr_fft = opts.nthreads;
669+
const auto ns = gridsize_for_fft(*this);
670+
std::vector<TC, xsimd::aligned_allocator<TC, 64>> fwBatch(nf() * batchSize);
671+
fftPlan->plan(ns, batchSize, fwBatch.data(), fftSign, opts.fftw, nthr_fft);
672+
if (opts.debug)
673+
printf("[%s] FFT plan (mode %d, nthr=%d):\t%.3g s\n", __func__, opts.fftw, nthr_fft,
674+
timer.elapsedsec());
675+
}
676+
return 0;
677+
}
678+
597679
// --------------- rest is the five user guru (plan) interface drivers ----------
598680
// (they are not namespaced since have safe names finufft{f}_* )
599681
using namespace finufft::utils; // AHB since already given at top, needed again?
@@ -749,106 +831,28 @@ FINUFFT_PLAN_T<TF>::FINUFFT_PLAN_T(int type_, int dim_, const BIGINT *n_modes, i
749831
}
750832
}
751833

752-
// heuristic to choose default upsampfac... (currently two poss)
753-
if (opts.upsampfac == 0.0) { // init to auto choice
754-
// Let assume density=1 as the average use case.
755-
// TODO: make a decision on how to choose density properly.
756-
const auto density = TF{1};
757-
opts.upsampfac = bestUpsamplingFactor<TF>(opts.nthreads, density, dim, type, tol);
758-
if (opts.debug > 1)
759-
printf("[%s] threads %d, density %.3g, dim %d, nufft type %d, tol %.3g: auto "
760-
"upsampfac=%.2f\n",
761-
__func__, opts.nthreads, density, dim, type, tol, opts.upsampfac);
762-
}
763-
// use opts to choose and write into plan's spread options...
764-
ier = setup_spreader_for_nufft(spopts, tol, opts, dim);
765-
if (ier > 1) // proceed if success or warning
766-
throw int(ier);
767-
precompute_horner_coeffs();
768-
// ------------------------ types 1,2: planning needed ---------------------
769-
if (type == 1 || type == 2) {
770-
771-
int nthr_fft = nthr; // give FFT all threads (or use o.spread_thread?)
772-
// Note: batchSize not used since might be only 1.
773-
774-
spopts.spread_direction = type;
775-
constexpr TF EPSILON = std::numeric_limits<TF>::epsilon();
776-
777-
if (opts.spreadinterponly) { // (unusual case of no NUFFT, just report)
778-
779-
// spreadinterp grid will simply be the user's "mode" grid...
780-
for (int idim = 0; idim < dim; ++idim) nfdim[idim] = mstu[idim];
781-
782-
if (opts.debug) { // "long long" here is to avoid warnings with printf...
783-
printf("[%s] %dd spreadinterponly(dir=%d): (ms,mt,mu)=(%lld,%lld,%lld)"
784-
"\n ntrans=%d nthr=%d batchSize=%d kernel width ns=%d",
785-
__func__, dim, type, (long long)mstu[0], (long long)mstu[1],
786-
(long long)mstu[2], ntrans, nthr, batchSize, spopts.nspread);
787-
if (batchSize == 1) // spread_thread has no effect in this case
788-
printf("\n");
789-
else
790-
printf(" spread_thread=%d\n", opts.spread_thread);
791-
}
792-
793-
} else { // ..... usual NUFFT: eval Fourier series, alloc workspace .....
794-
795-
if (opts.showwarn) { // user warn round-off error (due to prob condition #)...
796-
for (int idim = 0; idim < dim; ++idim)
797-
if (EPSILON * mstu[idim] > 1.0)
798-
fprintf(
799-
stderr,
800-
"%s warning: rounding err (due to cond # of prob) eps_mach*N%d = %.3g "
801-
"> 1 !\n",
802-
__func__, idim, (double)(EPSILON * mstu[idim]));
803-
}
804-
805-
// determine fine grid sizes, sanity check, then alloc...
806-
for (int idim = 0; idim < dim; ++idim) {
807-
int nfier = set_nf_type12(mstu[idim], opts, spopts, &nfdim[idim]);
808-
if (nfier) throw nfier; // nf too big; we're done
809-
phiHat[idim].resize(nfdim[idim] / 2 + 1); // alloc fseries
810-
}
811-
812-
if (opts.debug) { // "long long" here is to avoid warnings with printf...
813-
printf("[%s] %dd%d: (ms,mt,mu)=(%lld,%lld,%lld) "
814-
"(nf1,nf2,nf3)=(%lld,%lld,%lld)\n ntrans=%d nthr=%d "
815-
"batchSize=%d ",
816-
__func__, dim, type, (long long)mstu[0], (long long)mstu[1],
817-
(long long)mstu[2], (long long)nfdim[0], (long long)nfdim[1],
818-
(long long)nfdim[2], ntrans, nthr, batchSize);
819-
if (batchSize == 1) // spread_thread has no effect in this case
820-
printf("\n");
821-
else
822-
printf(" spread_thread=%d\n", opts.spread_thread);
823-
}
824-
825-
// STEP 0: get Fourier coeffs of spreading kernel along each fine grid dim
826-
timer.restart();
827-
for (int idim = 0; idim < dim; ++idim)
828-
onedim_fseries_kernel(nfdim[idim], phiHat[idim], spopts, horner_coeffs.data(),
829-
nc);
830-
if (opts.debug)
831-
printf("[%s] kernel fser (ns=%d):\t\t%.3g s\n", __func__, spopts.nspread,
832-
timer.elapsedsec());
833-
834-
if (nf() * batchSize > MAX_NF) {
835-
fprintf(stderr,
836-
"[%s] fwBatch would be bigger than MAX_NF, not attempting memory "
837-
"allocation!\n",
838-
__func__);
839-
throw int(FINUFFT_ERR_MAXNALLOC);
840-
}
841-
842-
timer.restart(); // plan the FFTW (to act in-place on the workspace fwBatch)
843-
const auto ns = gridsize_for_fft(*this);
844-
std::vector<TC, xsimd::aligned_allocator<TC, 64>> fwBatch(nf() * batchSize);
845-
fftPlan->plan(ns, batchSize, fwBatch.data(), fftSign, opts.fftw, nthr_fft);
846-
if (opts.debug)
847-
printf("[%s] FFT plan (mode %d, nthr=%d):\t%.3g s\n", __func__, opts.fftw,
848-
nthr_fft, timer.elapsedsec());
834+
// heuristic to choose default upsampfac: defer selection to setpts unless
835+
// the user explicitly forced a nonzero value in opts. In that case initialize
836+
// spreader/Horner internals now using the provided upsampfac.
837+
if (opts.upsampfac != 0.0) {
838+
upsamp_locked = true; // user explicitly set upsampfac, don't auto-update
839+
ier = setup_spreader_for_nufft(spopts, tol, opts, dim);
840+
if (ier > 1) // proceed if success or warning
841+
throw int(ier);
842+
precompute_horner_coeffs();
843+
844+
// ------------------------ types 1,2: planning needed ---------------------
845+
if (type == 1 || type == 2) {
846+
int code = initSpreadAndFFT();
847+
if (code) throw code;
849848
}
849+
} else {
850+
// If upsampfac was left as 0.0 (auto) we defer setup_spreader to setpts.
851+
// However, we must still check if tol is too small to warn the user now.
852+
if (tol < std::numeric_limits<TF>::epsilon()) ier = FINUFFT_WARN_EPS_TOO_SMALL;
853+
}
850854

851-
} else { // -------------------------- type 3 (no planning) ------------
855+
if (type == 3) { // -------------------------- type 3 (no planning) ------------
852856

853857
if (opts.debug) printf("[%s] %dd%d: ntrans=%d\n", __func__, dim, type, ntrans);
854858
// Type 3 will call finufft_makeplan for type 2; no need to init FFTW
@@ -908,6 +912,27 @@ int FINUFFT_PLAN_T<TF>::setpts(BIGINT nj, const TF *xj, const TF *yj, const TF *
908912

909913
if (type != 3) { // ------------------ TYPE 1,2 SETPTS -------------------
910914
// (all we can do is check and maybe bin-sort the NU pts)
915+
// If upsampfac is not locked by user (auto mode), choose or update it now
916+
// based on the actual density nj/N(). Re-plan if density changed significantly.
917+
if (!upsamp_locked) {
918+
double density = double(nj) / double(N());
919+
double upsampfac = bestUpsamplingFactor<TF>(opts.nthreads, density, dim, type, tol);
920+
// Re-plan if this is the first call (upsampfac==0) or if upsampfac changed
921+
if (upsampfac != opts.upsampfac) {
922+
opts.upsampfac = upsampfac;
923+
if (opts.debug > 1)
924+
printf("[setpts] selected upsampfac=%.2f (density=%.3f, nj=%lld)\n",
925+
opts.upsampfac, density, (long long)nj);
926+
int code = setup_spreader_for_nufft(spopts, tol, opts, dim);
927+
if (code > 1) return code;
928+
precompute_horner_coeffs();
929+
930+
// Perform the planning steps (first call or re-plan due to density change).
931+
code = initSpreadAndFFT();
932+
if (code) return code;
933+
}
934+
}
935+
911936
XYZ = {xj, yj, zj}; // plan must keep pointers to user's fixed NU pts
912937
int ier = spreadcheck(nfdim[0], nfdim[1], nfdim[2], spopts);
913938
if (ier) // no warnings allowed here
@@ -935,6 +960,21 @@ int FINUFFT_PLAN_T<TF>::setpts(BIGINT nj, const TF *xj, const TF *yj, const TF *
935960
this->nk = nk; // user set # targ freq pts
936961
STU = {s, t, u};
937962

963+
// For type 3 with deferred upsampfac (not locked by user), pick and persist
964+
// an upsamp now using density=1.0 so that subsequent steps (set_nhg_type3 etc.)
965+
// have a concrete upsampfac to work with. This choice is persisted so inner
966+
// type-2 plans will inherit it.
967+
double upsampfac = bestUpsamplingFactor<TF>(opts.nthreads, 1.0, dim, type, tol);
968+
if (!upsamp_locked && (upsampfac != opts.upsampfac)) {
969+
opts.upsampfac = upsampfac;
970+
if (opts.debug > 1)
971+
printf("[setpts t3] selected upsampfac=%.2f (density=1 used; persisted)\n",
972+
opts.upsampfac);
973+
int sier = setup_spreader_for_nufft(spopts, tol, opts, dim);
974+
if (sier > 1) return sier;
975+
precompute_horner_coeffs();
976+
}
977+
938978
// pick x, s intervals & shifts & # fine grid pts (nf) in each dim...
939979
std::array<TF, 3> S = {0, 0, 0};
940980
if (opts.debug) printf("\tM=%lld N=%lld\n", (long long)nj, (long long)nk);
@@ -1038,6 +1078,8 @@ int FINUFFT_PLAN_T<TF>::setpts(BIGINT nj, const TF *xj, const TF *yj, const TF *
10381078
t2opts.debug = std::max(0, opts.debug - 1); // don't print as much detail
10391079
t2opts.spread_debug = std::max(0, opts.spread_debug - 1);
10401080
t2opts.showwarn = 0; // so don't see warnings 2x
1081+
if (!upsamp_locked) t2opts.upsampfac = 0.0; // if the upsampfac was auto, let inner
1082+
// t2 pick it again (from density=nj/Nf)
10411083
// (...could vary other t2opts here?)
10421084
// MR: temporary hack, until we have figured out the C++ interface.
10431085
FINUFFT_PLAN_T<TF> *tmpplan;

0 commit comments

Comments
 (0)