Skip to content

Commit 1ca400d

Browse files
authored
Merge pull request #332 from anyangml2nd/chore/clean-up-LAMBenchV2
Chore: clean up dataset changes
2 parents 0eec2f7 + 131cc17 commit 1ca400d

File tree

6 files changed

+147
-318
lines changed

6 files changed

+147
-318
lines changed

lambench/metrics/direct_task_weights.yml

Lines changed: 37 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -14,22 +14,14 @@ HEA25_S:
1414
energy_std: 0.4030134901622356
1515
force_std: 1.5479359067976695
1616
virial_std: 1.4293255096528095
17-
HEA25_bulk:
17+
MoS2:
1818
domain: Inorganic Materials
1919
energy_weight: 1.0
2020
force_weight: 1.0
2121
virial_weight: 1.0
22-
energy_std: 0.4086027291354181
23-
force_std: 2.075184012071992
24-
virial_std: 2.065014356039771
25-
HEMC_HEMB:
26-
domain: Inorganic Materials
27-
energy_weight: 1.0
28-
force_weight: 1.0
29-
virial_weight: 1.0
30-
energy_std: 0.4750117425061965
31-
force_std: 1.8089415904253994
32-
virial_std: 0.4589409203427954
22+
energy_std: 0.08333066480136275
23+
force_std: 0.9536237886182164
24+
virial_std: 0.42877076652059987
3325
MD22:
3426
domain: Biomolecules & Supramolecules
3527
energy_weight: 1.0
@@ -118,24 +110,41 @@ Si_ZEO22:
118110
energy_std: 0.03534121167926313
119111
force_std: 1.2410267785352673
120112
virial_std: null
121-
WBM_downsampled:
122-
domain: Inorganic Materials
123-
energy_weight: 1.0
124-
force_weight: null
125-
virial_weight: null
126-
energy_std: 0.3743104865117501
127-
force_std: null
128-
virial_std: null
129-
Subalex_9k:
130-
domain: Inorganic Materials
131-
energy_weight: 1.0
132-
force_weight: 1.0
133-
virial_weight: 1.0
134-
energy_std: 0.7749643377228371
135-
force_std: 1.1503770816187873
136-
virial_std: 0.8678699239404154
113+
137114

138115
## DEPRECATED
116+
# WBM_downsampled:
117+
# domain: Inorganic Materials
118+
# energy_weight: 1.0
119+
# force_weight: null
120+
# virial_weight: null
121+
# energy_std: 0.3743104865117501
122+
# force_std: null
123+
# virial_std: null
124+
# Subalex_9k:
125+
# domain: Inorganic Materials
126+
# energy_weight: 1.0
127+
# force_weight: 1.0
128+
# virial_weight: 1.0
129+
# energy_std: 0.7749643377228371
130+
# force_std: 1.1503770816187873
131+
# virial_std: 0.8678699239404154
132+
# HEA25_bulk:
133+
# domain: Inorganic Materials
134+
# energy_weight: 1.0
135+
# force_weight: 1.0
136+
# virial_weight: 1.0
137+
# energy_std: 0.4086027291354181
138+
# force_std: 2.075184012071992
139+
# virial_std: 2.065014356039771
140+
# HEMC_HEMB:
141+
# domain: Inorganic Materials
142+
# energy_weight: 1.0
143+
# force_weight: 1.0
144+
# virial_weight: 1.0
145+
# energy_std: 0.4750117425061965
146+
# force_std: 1.8089415904253994
147+
# virial_std: 0.4589409203427954
139148
# Torsionnet500:
140149
# domain: Small Molecules
141150
# energy_weight: 1.0

lambench/metrics/results/metadata.json

Lines changed: 9 additions & 205 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@
200200
},
201201
"ANI": {
202202
"DISPLAY_NAME": "ANI-1x",
203-
"DESCRIPTION": "The training dataset of ANI-1x model containing 5.5 M structures, which is 25% the size of the dataset used in training the original ANI-1 potential (22 M). Calculations were performed using Gaussian with ωB97x/6-31G*. [https://doi.org/10.1063/1.5023802]",
203+
"DESCRIPTION": "A downsampled dataset from the training dataset of the ANI-1x model containing 997 frames. The dataset was relabeled using Gaussian with PBE/6-31G(d). [https://doi.org/10.1063/1.5023802]",
204204
"domain": "Small Molecules",
205205
"energy_rmse": {
206206
"DISPLAY_NAME": "E RMSE (meV)",
@@ -298,7 +298,7 @@
298298
},
299299
"Collision": {
300300
"DISPLAY_NAME": "Gasteiger2020Fast",
301-
"DESCRIPTION": "Validation set from COLL. Consists of configurations taken from molecular collisions of different small organic molecules. Energies and forces were recomputed with DFT using the revPBE functional and def2-TZVP basis, including D3 dispersion corrections. [https://arxiv.org/abs/2011.14115]",
301+
"DESCRIPTION": "A downsampled dataset from the Validation set of COLL. The original dataset consists of configurations taken from molecular collisions of different small organic molecules. Energies and forces were recomputed with DFT using the revPBE functional and def2-TZVP basis, including D3 dispersion corrections. [https://arxiv.org/abs/2011.14115]. The dataset was relabeled using Gaussian with PBE/6-31G(d).",
302302
"domain": "Reactions",
303303
"energy_rmse": {
304304
"DISPLAY_NAME": "E RMSE (meV)",
@@ -394,9 +394,9 @@
394394
"DESCRIPTION": "The mean absolute error of the virial prediction per atom."
395395
}
396396
},
397-
"Subalex_9k": {
398-
"DISPLAY_NAME": "SubAlex_9k",
399-
"DESCRIPTION": "A subsampled, Matbench-Discovery compliant version of the original Alexandria dataset. Calculations were performed using VASP with PBE/PAW and 520 eV cutoff. [https://arxiv.org/abs/2410.12771]",
397+
"MoS2": {
398+
"DISPLAY_NAME": "Gao2025Spontaneous",
399+
"DESCRIPTION": "2D MoS2 structures. Calculations were performed using VASP with PBE/PAW and 600 eV cutoff. [https://www.nature.com/articles/s41467-025-56055-x#Sec3]",
400400
"domain": "Inorganic Materials",
401401
"energy_rmse": {
402402
"DISPLAY_NAME": "E RMSE (meV)",
@@ -443,55 +443,6 @@
443443
"DESCRIPTION": "The mean absolute error of the virial prediction per atom."
444444
}
445445
},
446-
"Torsionnet500": {
447-
"DISPLAY_NAME": "Torsionnet500",
448-
"DESCRIPTION": "TorsionNet500, a benchmark dataset comprising 500 chemically diverse fragments with DFT torsion profiles (12k MM- and DFT-optimized geometries and energies). Calculations were performed using Gaussian with ωB97xD/6-31G**. [https://pubs.acs.org/doi/abs/10.1021/acs.jcim.1c01346]",
449-
"domain": "Small Molecules",
450-
"energy_rmse": {
451-
"DISPLAY_NAME": "E RMSE (meV)",
452-
"DESCRIPTION": "The root mean squared error of the energy prediction.",
453-
"hide": true
454-
},
455-
"energy_mae": {
456-
"DISPLAY_NAME": "E MAE (meV)",
457-
"DESCRIPTION": "The mean absolute error of the energy prediction.",
458-
"hide": true
459-
},
460-
"energy_rmse_natoms": {
461-
"DISPLAY_NAME": "E RMSE (meV/atom)",
462-
"DESCRIPTION": "The root mean squared error of the energy prediction per atom."
463-
},
464-
"energy_mae_natoms": {
465-
"DISPLAY_NAME": "E MAE (meV/atom)",
466-
"DESCRIPTION": "The mean absolute error of the energy prediction per atom."
467-
},
468-
"force_rmse": {
469-
"DISPLAY_NAME": "F RMSE (meV/\u00c5)",
470-
"DESCRIPTION": "The root mean squared error of the force prediction."
471-
},
472-
"force_mae": {
473-
"DISPLAY_NAME": "F MAE (meV/\u00c5)",
474-
"DESCRIPTION": "The mean absolute error of the force prediction."
475-
},
476-
"virial_rmse": {
477-
"DISPLAY_NAME": "V RMSE (meV)",
478-
"DESCRIPTION": "The root mean squared error of the virial prediction.",
479-
"hide": true
480-
},
481-
"virial_mae": {
482-
"DISPLAY_NAME": "V MAE (meV)",
483-
"DESCRIPTION": "The mean absolute error of the virial prediction.",
484-
"hide": true
485-
},
486-
"virial_rmse_natoms": {
487-
"DISPLAY_NAME": "V RMSE (meV/atom)",
488-
"DESCRIPTION": "The root mean squared error of the virial prediction per atom."
489-
},
490-
"virial_mae_natoms": {
491-
"DISPLAY_NAME": "V MAE (meV/atom)",
492-
"DESCRIPTION": "The mean absolute error of the virial prediction per atom."
493-
}
494-
},
495446
"Cu_MgO_catalysts": {
496447
"DISPLAY_NAME": "Villanueva2024Water",
497448
"DESCRIPTION": "Selective carbon dioxide Hydrogenation to Methanol over Cu-MgO-Al2O3 Catalysts. Calculations were performed using VASP with PBE-D3/PAW and 450 eV cutoff. [https://pubs.acs.org/doi/10.1021/jacs.3c10685]",
@@ -592,7 +543,7 @@
592543
},
593544
"AIMD-Chig": {
594545
"DISPLAY_NAME": "AIMD-Chig",
595-
"DESCRIPTION": "MD dataset including 2 million conformations of 166-atom protein Chignolin sampled at the density functional theory (DFT) level. Ab initio simulations were driven by M062X/6-31G* with a Berendsen thermostat at 340 K.[https://www.nature.com/articles/s41597-023-02465-9]",
546+
"DESCRIPTION": "A downsampled dataset containing MD conformations of 166-atom protein Chignolin. The original Ab initio simulations were driven by M062X/6-31G* with a Berendsen thermostat at 340 K. [https://www.nature.com/articles/s41597-023-02465-9]. The dataset was relabeled using Gaussian with PBE/6-31G(d).",
596547
"domain": "Biomolecules & Supramolecules",
597548
"energy_rmse": {
598549
"DISPLAY_NAME": "E RMSE (meV)",
@@ -640,7 +591,7 @@
640591
}
641592
},
642593
"HEA25_S": {
643-
"DISPLAY_NAME": "Lopanitsyna2023Modeling_A",
594+
"DISPLAY_NAME": "Lopanitsyna2023Modeling",
644595
"DESCRIPTION": "A dataset of 25-atom high entropy alloy surfaces, focusing on 25 d-block transition metals, excluding Tc, Cd, Re, Os and Hg. Calculations were performed using VASP with PBEsol/PAW, 550 eV cutoff, and Γ-centered k-points. [https://arxiv.org/abs/2212.13254]",
645596
"domain": "Inorganic Materials",
646597
"energy_rmse": {
@@ -688,156 +639,9 @@
688639
"DESCRIPTION": "The mean absolute error of the virial prediction per atom."
689640
}
690641
},
691-
"HEMC_HEMB": {
692-
"DISPLAY_NAME": "Dai2024Deep",
693-
"DESCRIPTION": "DFT dataset of high-entropy transition metal diboride (HEMB₂) and carbide (HEMC) ceramics. Calculations were performed using VASP with PBE/PAW, 900 eV cutoff, and k-spacing of 0.15 Å⁻¹. [https://www.oaepublish.com/articles/jmi.2024.14]",
694-
"domain": "Inorganic Materials",
695-
"energy_rmse": {
696-
"DISPLAY_NAME": "E RMSE (meV)",
697-
"DESCRIPTION": "The root mean squared error of the energy prediction.",
698-
"hide": true
699-
},
700-
"energy_mae": {
701-
"DISPLAY_NAME": "E MAE (meV)",
702-
"DESCRIPTION": "The mean absolute error of the energy prediction.",
703-
"hide": true
704-
},
705-
"energy_rmse_natoms": {
706-
"DISPLAY_NAME": "E RMSE (meV/atom)",
707-
"DESCRIPTION": "The root mean squared error of the energy prediction per atom."
708-
},
709-
"energy_mae_natoms": {
710-
"DISPLAY_NAME": "E MAE (meV/atom)",
711-
"DESCRIPTION": "The mean absolute error of the energy prediction per atom."
712-
},
713-
"force_rmse": {
714-
"DISPLAY_NAME": "F RMSE (meV/\u00c5)",
715-
"DESCRIPTION": "The root mean squared error of the force prediction."
716-
},
717-
"force_mae": {
718-
"DISPLAY_NAME": "F MAE (meV/\u00c5)",
719-
"DESCRIPTION": "The mean absolute error of the force prediction."
720-
},
721-
"virial_rmse": {
722-
"DISPLAY_NAME": "V RMSE (meV)",
723-
"DESCRIPTION": "The root mean squared error of the virial prediction.",
724-
"hide": true
725-
},
726-
"virial_mae": {
727-
"DISPLAY_NAME": "V MAE (meV)",
728-
"DESCRIPTION": "The mean absolute error of the virial prediction.",
729-
"hide": true
730-
},
731-
"virial_rmse_natoms": {
732-
"DISPLAY_NAME": "V RMSE (meV/atom)",
733-
"DESCRIPTION": "The root mean squared error of the virial prediction per atom."
734-
},
735-
"virial_mae_natoms": {
736-
"DISPLAY_NAME": "V MAE (meV/atom)",
737-
"DESCRIPTION": "The mean absolute error of the virial prediction per atom."
738-
}
739-
},
740-
"HEA25_bulk": {
741-
"DISPLAY_NAME": "Lopanitsyna2023Modeling_B",
742-
"DESCRIPTION": "A dataset of 25-atom high entropy alloy bulk structures, focusing on 25 d-block transition metals, excluding Tc, Cd, Re, Os and Hg. Calculations were performed using VASP with PBEsol/PAW, 550 eV cutoff, and Γ-centered k-points. [https://arxiv.org/abs/2212.13254]",
743-
"domain": "Inorganic Materials",
744-
"energy_rmse": {
745-
"DISPLAY_NAME": "E RMSE (meV)",
746-
"DESCRIPTION": "The root mean squared error of the energy prediction.",
747-
"hide": true
748-
},
749-
"energy_mae": {
750-
"DISPLAY_NAME": "E MAE (meV)",
751-
"DESCRIPTION": "The mean absolute error of the energy prediction.",
752-
"hide": true
753-
},
754-
"energy_rmse_natoms": {
755-
"DISPLAY_NAME": "E RMSE (meV/atom)",
756-
"DESCRIPTION": "The root mean squared error of the energy prediction per atom."
757-
},
758-
"energy_mae_natoms": {
759-
"DISPLAY_NAME": "E MAE (meV/atom)",
760-
"DESCRIPTION": "The mean absolute error of the energy prediction per atom."
761-
},
762-
"force_rmse": {
763-
"DISPLAY_NAME": "F RMSE (meV/\u00c5)",
764-
"DESCRIPTION": "The root mean squared error of the force prediction."
765-
},
766-
"force_mae": {
767-
"DISPLAY_NAME": "F MAE (meV/\u00c5)",
768-
"DESCRIPTION": "The mean absolute error of the force prediction."
769-
},
770-
"virial_rmse": {
771-
"DISPLAY_NAME": "V RMSE (meV)",
772-
"DESCRIPTION": "The root mean squared error of the virial prediction.",
773-
"hide": true
774-
},
775-
"virial_mae": {
776-
"DISPLAY_NAME": "V MAE (meV)",
777-
"DESCRIPTION": "The mean absolute error of the virial prediction.",
778-
"hide": true
779-
},
780-
"virial_rmse_natoms": {
781-
"DISPLAY_NAME": "V RMSE (meV/atom)",
782-
"DESCRIPTION": "The root mean squared error of the virial prediction per atom."
783-
},
784-
"virial_mae_natoms": {
785-
"DISPLAY_NAME": "V MAE (meV/atom)",
786-
"DESCRIPTION": "The mean absolute error of the virial prediction per atom."
787-
}
788-
},
789-
"WBM_downsampled": {
790-
"DISPLAY_NAME": "WBM_25k",
791-
"DESCRIPTION": "A downsampled version of the original WBM dataset. [https://www.nature.com/articles/s41524-020-00481-6]",
792-
"domain": "Inorganic Materials",
793-
"energy_rmse": {
794-
"DISPLAY_NAME": "E RMSE (meV)",
795-
"DESCRIPTION": "The root mean squared error of the energy prediction.",
796-
"hide": true
797-
},
798-
"energy_mae": {
799-
"DISPLAY_NAME": "E MAE (meV)",
800-
"DESCRIPTION": "The mean absolute error of the energy prediction.",
801-
"hide": true
802-
},
803-
"energy_rmse_natoms": {
804-
"DISPLAY_NAME": "E RMSE (meV/atom)",
805-
"DESCRIPTION": "The root mean squared error of the energy prediction per atom."
806-
},
807-
"energy_mae_natoms": {
808-
"DISPLAY_NAME": "E MAE (meV/atom)",
809-
"DESCRIPTION": "The mean absolute error of the energy prediction per atom."
810-
},
811-
"force_rmse": {
812-
"DISPLAY_NAME": "F RMSE (meV/\u00c5)",
813-
"DESCRIPTION": "The root mean squared error of the force prediction."
814-
},
815-
"force_mae": {
816-
"DISPLAY_NAME": "F MAE (meV/\u00c5)",
817-
"DESCRIPTION": "The mean absolute error of the force prediction."
818-
},
819-
"virial_rmse": {
820-
"DISPLAY_NAME": "V RMSE (meV)",
821-
"DESCRIPTION": "The root mean squared error of the virial prediction.",
822-
"hide": true
823-
},
824-
"virial_mae": {
825-
"DISPLAY_NAME": "V MAE (meV)",
826-
"DESCRIPTION": "The mean absolute error of the virial prediction.",
827-
"hide": true
828-
},
829-
"virial_rmse_natoms": {
830-
"DISPLAY_NAME": "V RMSE (meV/atom)",
831-
"DESCRIPTION": "The root mean squared error of the virial prediction per atom."
832-
},
833-
"virial_mae_natoms": {
834-
"DISPLAY_NAME": "V MAE (meV/atom)",
835-
"DESCRIPTION": "The mean absolute error of the virial prediction per atom."
836-
}
837-
},
838642
"H_nature_2022": {
839643
"DISPLAY_NAME": "Guan2022Benchmark",
840-
"DESCRIPTION": "Dataset of hydrogen combustion reactions. Calculations were performed using Q-Chem with ωB97X-V/cc-pVTZ. The dataset includes intrinsic reaction coordinate (IRC) calculations, ab initio MD simulations, and normal mode displacement calculations covering 19 reaction channels for hydrogen combustion. [https://www.nature.com/articles/s41597-022-01330-5]",
644+
"DESCRIPTION": "A downsampled dataset of hydrogen combustion reactions. The original datasets were constructed using Q-Chem with ωB97X-V/cc-pVTZ. The dataset includes intrinsic reaction coordinate (IRC) calculations, ab initio MD simulations, and normal mode displacement calculations covering 19 reaction channels for hydrogen combustion. [https://www.nature.com/articles/s41597-022-01330-5]. The dataset was relabeled using Gaussian with PBE/6-31G(d).",
841645
"domain": "Reactions",
842646
"energy_rmse": {
843647
"DISPLAY_NAME": "E RMSE (meV)",
@@ -962,7 +766,7 @@
962766
},
963767
"torsionnet": {
964768
"DISPLAY_NAME": "Torsional Barrier",
965-
"DESCRIPTION": "Evaluation of torsional barrier related metrics on the TorsionNet-500 dataset at the ωB97M-D3(BJ)/def2-TZVPPD level.",
769+
"DESCRIPTION": "Evaluation of torsional barrier related metrics on the TorsionNet-500 dataset at the CCSD(T)/CBS level.",
966770
"MAE": {
967771
"DISPLAY_NAME": "MAE (kcal/mol)",
968772
"DESCRIPTION": "The mean absolute error of the energy prediction across all configurations."

0 commit comments

Comments
 (0)