|
| 1 | +# |
| 2 | +# Copyright (C) 2013-2025 The ESPResSo project |
| 3 | +# |
| 4 | +# This file is part of ESPResSo. |
| 5 | +# |
| 6 | +# ESPResSo is free software: you can redistribute it and/or modify |
| 7 | +# it under the terms of the GNU General Public License as published by |
| 8 | +# the Free Software Foundation, either version 3 of the License, or |
| 9 | +# (at your option) any later version. |
| 10 | +# |
| 11 | +# ESPResSo is distributed in the hope that it will be useful, |
| 12 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 14 | +# GNU General Public License for more details. |
| 15 | +# |
| 16 | +# You should have received a copy of the GNU General Public License |
| 17 | +# along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 18 | +# |
| 19 | + |
| 20 | +import os |
| 21 | +import time |
| 22 | +import argparse |
| 23 | +import numpy as np |
| 24 | +import espressomd |
| 25 | +import espressomd.lb |
| 26 | +import espressomd.version |
| 27 | + |
| 28 | +parser = argparse.ArgumentParser(description="Benchmark LB simulations.") |
| 29 | +parser.add_argument("--gpu", action=argparse.BooleanOptionalAction, |
| 30 | + default=False, required=False, help="Use GPU implementation") |
| 31 | +parser.add_argument("--topology", metavar=("X", "Y", "Z"), nargs=3, action="store", |
| 32 | + default=None, required=False, type=int, help="Cartesian topology") |
| 33 | +parser.add_argument("--unit-cell", action="store", nargs="+", |
| 34 | + type=int, default=[64, 64, 64], required=False, |
| 35 | + help="unit cell size") |
| 36 | +parser.add_argument("--single-precision", action="store_true", required=False, |
| 37 | + help="Using single-precision floating point accuracy") |
| 38 | +parser.add_argument("--kT", metavar="kT", action="store", |
| 39 | + type=float, default=0., required=False, |
| 40 | + help="Thermostat heat bath") |
| 41 | +group = parser.add_mutually_exclusive_group() |
| 42 | +group.add_argument("--weak-scaling", action="store_true", |
| 43 | + help="Weak scaling benchmark (Gustafson's law: constant work per core)") |
| 44 | +group.add_argument("--strong-scaling", action="store_true", |
| 45 | + help="Strong scaling benchmark (Amdahl's law: constant total work)") |
| 46 | +group = parser.add_mutually_exclusive_group() |
| 47 | +group.add_argument("--particles-per-rank", metavar="N", action="store", |
| 48 | + type=int, default=0, required=False, |
| 49 | + help="Number of particles per MPI rank") |
| 50 | +group.add_argument("--particles-total", metavar="N", action="store", |
| 51 | + type=int, default=0, required=False, |
| 52 | + help="Number of particles in total") |
| 53 | +args = parser.parse_args() |
| 54 | + |
| 55 | + |
| 56 | +required_features = [] |
| 57 | +if args.particles_per_rank or args.particles_total: |
| 58 | + required_features.append("LENNARD_JONES") |
| 59 | +if args.gpu: |
| 60 | + required_features.append("CUDA") |
| 61 | +espressomd.assert_features(required_features) |
| 62 | +espresso_version = (espressomd.version.major(), espressomd.version.minor()) |
| 63 | + |
| 64 | +# initialize system |
| 65 | +system = espressomd.System(box_l=[100., 100., 100.]) |
| 66 | +system.time_step = 0.01 |
| 67 | +system.cell_system.skin = 0.4 |
| 68 | + |
| 69 | +# set MPI Cartesian topology |
| 70 | +node_grid = np.array(system.cell_system.node_grid) |
| 71 | +n_mpi_ranks = int(np.prod(node_grid)) |
| 72 | +n_omp_threads = int(os.environ.get("OMP_NUM_THREADS", 1)) |
| 73 | +if args.topology: |
| 74 | + system.cell_system.node_grid = node_grid = args.topology |
| 75 | + |
| 76 | +if args.weak_scaling: |
| 77 | + system.box_l = np.multiply(np.array(args.unit_cell), node_grid) |
| 78 | +else: |
| 79 | + system.box_l = args.unit_cell |
| 80 | + |
| 81 | +if args.particles_total: |
| 82 | + n_part = args.particles_total |
| 83 | +else: |
| 84 | + n_part = n_mpi_ranks * args.particles_per_rank |
| 85 | + |
| 86 | +# set CUDA topology |
| 87 | +devices = {} |
| 88 | +if args.gpu: |
| 89 | + devices = espressomd.cuda_init.CudaInitHandle().list_devices() |
| 90 | + if len(devices) > 1 and espresso_version >= (5, 0): |
| 91 | + system.cuda_init_handle.call_method("set_device_id_per_rank") |
| 92 | + |
| 93 | +# place particles at random |
| 94 | +if n_part: |
| 95 | + # volume of N spheres with radius r: N * (4/3*pi*r^3) |
| 96 | + lj_sig = 1. |
| 97 | + lj_eps = 1. |
| 98 | + lj_cut = lj_sig * 2**(1. / 6.) |
| 99 | + volume = float(np.prod(system.box_l)) |
| 100 | + vfrac = n_part * 4. / 3. * np.pi * (lj_sig / 2.)**3 / volume |
| 101 | + print(f"volume fraction: {100.*vfrac:.2f}%") |
| 102 | + assert vfrac < 0.74, "invalid volume fraction" |
| 103 | + system.non_bonded_inter[0, 0].lennard_jones.set_params( |
| 104 | + epsilon=lj_eps, sigma=lj_sig, cutoff=lj_cut, shift="auto") |
| 105 | + system.part.add(pos=np.random.random((n_part, 3)) * system.box_l) |
| 106 | + print("minimize") |
| 107 | + energy_target = n_part / 10. |
| 108 | + system.integrator.set_steepest_descent( |
| 109 | + f_max=0., gamma=0.001, max_displacement=0.01) |
| 110 | + system.integrator.run(200) |
| 111 | + energy = system.analysis.energy()["total"] |
| 112 | + assert energy < energy_target, f"Minimization failed to converge to {energy_target:.1f}" |
| 113 | + print("set Langevin") |
| 114 | + system.integrator.set_vv() |
| 115 | + system.thermostat.set_langevin(kT=1., gamma=1., seed=42) |
| 116 | + min_skin = 0.2 |
| 117 | + max_skin = 1.0 |
| 118 | + print("Tune skin: {:.3f}".format(system.cell_system.tune_skin( |
| 119 | + min_skin=min_skin, max_skin=max_skin, tol=0.05, int_steps=10))) |
| 120 | + print("MD equilibration") |
| 121 | + system.integrator.run(500) |
| 122 | + print("Tune skin: {:.3f}".format(system.cell_system.tune_skin( |
| 123 | + min_skin=min_skin, max_skin=max_skin, tol=0.05, int_steps=10))) |
| 124 | + print("MD equilibration") |
| 125 | + system.integrator.run(500) |
| 126 | + system.thermostat.turn_off() |
| 127 | + |
| 128 | +# setup LB solver |
| 129 | +lb_kwargs = {"agrid": 1., "tau": system.time_step, "kT": args.kT} |
| 130 | +if espresso_version == (4, 2): |
| 131 | + assert n_omp_threads == 1, "ESPResSo 4.2 doesn't support OpenMP" |
| 132 | + if args.gpu: |
| 133 | + lb_class = espressomd.lb.LBFluidGPU |
| 134 | + assert args.single_precision, "ESPResSo 4.2 LB GPU only available in single-precision" |
| 135 | + assert len(devices) == 1, "ESPResSo 4.2 LB GPU only supports 1 GPU accelerator" |
| 136 | + else: |
| 137 | + lb_class = espressomd.lb.LBFluid |
| 138 | + assert not args.single_precision, "ESPResSo 4.2 LB CPU only available in double-precision" |
| 139 | + lbf = lb_class(dens=1., visc=1., seed=42, **lb_kwargs) |
| 140 | + system.actors.add(lbf) |
| 141 | +else: |
| 142 | + if args.gpu: |
| 143 | + lb_class = espressomd.lb.LBFluidWalberlaGPU |
| 144 | + else: |
| 145 | + lb_class = espressomd.lb.LBFluidWalberla |
| 146 | + lbf = lb_class(density=1., kinematic_viscosity=1., |
| 147 | + single_precision=args.single_precision, **lb_kwargs) |
| 148 | + system.lb = lbf |
| 149 | + |
| 150 | +if n_part: |
| 151 | + system.thermostat.set_lb(LB_fluid=lbf, seed=42, gamma=1.) |
| 152 | + |
| 153 | +print("LB equilibration") |
| 154 | +system.integrator.run(100) |
| 155 | + |
| 156 | + |
| 157 | +def get_lb_kT(lbf): |
| 158 | + nodes_mass = lbf[:, :, :].density * lbf.agrid**3 |
| 159 | + nodes_vel_sq = np.sum(np.square(lbf[:, :, :].velocity), axis=3) |
| 160 | + return np.mean(nodes_mass * nodes_vel_sq) / 3. |
| 161 | + |
| 162 | + |
| 163 | +def get_md_kT(part): |
| 164 | + return np.mean(np.linalg.norm(part.all().v, axis=1)**2 * part.all().mass) / 3. |
| 165 | + |
| 166 | + |
| 167 | +print("Sanity checks") |
| 168 | +rtol_energy = 0.05 |
| 169 | +fluid_kTs = [] |
| 170 | +parts_kTs = [] |
| 171 | +for _ in range(30): |
| 172 | + fluid_kTs.append(get_lb_kT(lbf)) |
| 173 | + if n_part: |
| 174 | + parts_kTs.append(get_md_kT(system.part)) |
| 175 | + system.integrator.run(10) |
| 176 | +if args.kT == 0.: |
| 177 | + np.testing.assert_almost_equal(np.mean(fluid_kTs), args.kT, decimal=3) |
| 178 | +else: |
| 179 | + np.testing.assert_allclose(np.mean(fluid_kTs), args.kT, rtol=rtol_energy) |
| 180 | + if n_part: |
| 181 | + np.testing.assert_allclose(np.mean(parts_kTs), args.kT, rtol=rtol_energy) |
| 182 | + |
| 183 | +print("Final convergence met with tolerances: \n\ |
| 184 | + energy: ", rtol_energy, "\n") |
| 185 | + |
| 186 | +print("Benchmark") |
| 187 | +n_steps = 40 |
| 188 | +n_loops = 15 |
| 189 | +timings = [] |
| 190 | +for _ in range(n_loops): |
| 191 | + tick = time.time() |
| 192 | + system.integrator.run(n_steps) |
| 193 | + tock = time.time() |
| 194 | + timings.append((tock - tick) / n_steps) |
| 195 | + |
| 196 | +print(f"{n_loops * n_steps} steps executed...") |
| 197 | +print("Algorithm executed.") |
| 198 | +# write results to file |
| 199 | +header = '"mode","cores","mpi.x","mpi.y","mpi.z","omp.threads","gpus",\ |
| 200 | +"particles","mean","std","box.x","box.y","box.z","precision","hardware"' |
| 201 | +report = f'''"{"weak scaling" if args.weak_scaling else "strong scaling"}",\ |
| 202 | +{n_mpi_ranks * n_omp_threads},{node_grid[0]},{node_grid[1]},{node_grid[2]},\ |
| 203 | +{n_omp_threads},{len(devices)},{len(system.part)},\ |
| 204 | +{np.mean(timings):.3e},{np.std(timings,ddof=1):.3e},\ |
| 205 | +{system.box_l[0]:.2f},{system.box_l[1]:.2f},{system.box_l[2]:.2f},\ |
| 206 | +"{'sp' if args.single_precision else 'dp'}",\ |
| 207 | +"{'gpu' if args.gpu else 'cpu'}"''' |
| 208 | +print(header) |
| 209 | +print(report) |
| 210 | + |
| 211 | +print(f"Performance: {np.mean(timings):.3e} s/step") |
0 commit comments