|
| 1 | +[ |
| 2 | + { |
| 3 | + "abstract": { |
| 4 | + "description": "<p>Flavour-tagging — the task of identifying the flavour of jets — is essential for many physics analyses at the ATLAS experiment. This dataset, released for public use, can be used to train and evaluate machine learning models for jet flavour-tagging at ATLAS. It aims to facilitate broader interest and further development of innovative machine learning techniques to improve flavour-tagging performance.</p>\n<p>The dataset consists of approximately 50 million events from simulated top quark pair production at a centre-of-mass energy of 13.6 TeV. It is stored in HDF5 format and contains structured event-level, jet-level, track-level and truth hadron information. This dataset is designed to be compatible with the flavour-tagging algorithm development pipeline used at ATLAS, and is supported by accompanying instructions and example configurations provided in open-source repositories.</p>\n<p>To improve usability, the dataset is split into three mutually exclusive HDF5 files:</p>\n<ul>\n<li><code>mc-flavtag-ttbar-small.h5</code> — ~1.36 million events (~5.6 million jets)</li>\n<li><code>mc-flavtag-ttbar-medium.h5</code> — ~6.23 million events (~25.6 million jets)</li>\n<li><code>mc-flavtag-ttbar-large.h5</code> — ~41.1 million events (~168 million jets)</li>\n</ul>\n<p>Downloading all three files will provide access to the complete dataset. The smaller subsets are useful for quick exploration or prototyping workflows.</p>" |
| 5 | + }, |
| 6 | + "accelerator": "CERN-LHC", |
| 7 | + "collaboration": { |
| 8 | + "name": "ATLAS collaboration" |
| 9 | + }, |
| 10 | + "collections": [ |
| 11 | + "ATLAS-Derived-Datasets" |
| 12 | + ], |
| 13 | + "date_published": "2025", |
| 14 | + "distribution": { |
| 15 | + "formats": [ |
| 16 | + "h5" |
| 17 | + ], |
| 18 | + "number_events": 48698675, |
| 19 | + "number_files": 3, |
| 20 | + "size": 107788972974 |
| 21 | + }, |
| 22 | + "doi": "10.7483/OPENDATA.ATLAS.QG8W.TO8P", |
| 23 | + "experiment": [ |
| 24 | + "ATLAS" |
| 25 | + ], |
| 26 | + "files": [ |
| 27 | + { |
| 28 | + "checksum": "adler32:8d7e9098", |
| 29 | + "size": 3055139850, |
| 30 | + "uri": "root://eospublic.cern.ch//eos/opendata/atlas/datascience/ATLAS-FTAG-2023-05/mc-flavtag-ttbar-small.h5" |
| 31 | + }, |
| 32 | + { |
| 33 | + "checksum": "adler32:e29be59c", |
| 34 | + "size": 13935881976, |
| 35 | + "uri": "root://eospublic.cern.ch//eos/opendata/atlas/datascience/ATLAS-FTAG-2023-05/mc-flavtag-ttbar-medium.h5" |
| 36 | + }, |
| 37 | + { |
| 38 | + "checksum": "adler32:61adcbcb", |
| 39 | + "size": 90797951148, |
| 40 | + "uri": "root://eospublic.cern.ch//eos/opendata/atlas/datascience/ATLAS-FTAG-2023-05/mc-flavtag-ttbar-large.h5" |
| 41 | + } |
| 42 | + ], |
| 43 | + "keywords": [ |
| 44 | + "datascience" |
| 45 | + ], |
| 46 | + "license": { |
| 47 | + "attribution": "CC0-1.0" |
| 48 | + }, |
| 49 | + "publisher": "CERN Open Data Portal", |
| 50 | + "recid": "93940", |
| 51 | + "title": "ATLAS $t\\bar{t}$ simulation for ML-based jet flavour tagging (JetSet)", |
| 52 | + "type": { |
| 53 | + "primary": "Dataset", |
| 54 | + "secondary": [ |
| 55 | + "Derived", |
| 56 | + "Simulated" |
| 57 | + ] |
| 58 | + }, |
| 59 | + "usage": { |
| 60 | + "description": "A detailed explanation of this dataset, and instructions for pre-processing, training, and evaluation workflows are provided in the accompanying GitLab repository. If this dataset is used in a publication, please cite this dataset record along with the accompanying ATLAS paper describing GN2, a ATLAS flavour-tagging algorithm with a transformer-like architecture.", |
| 61 | + "links": [ |
| 62 | + { |
| 63 | + "description": "Transforming Jet Flavour: Documentation and training pipeline", |
| 64 | + "url": "https://gitlab.cern.ch/atlas/open-data/transforming-jet-flavor" |
| 65 | + }, |
| 66 | + { |
| 67 | + "description": "ATLAS GN2 paper ATLAS-FTAG-2023-05", |
| 68 | + "url": "http://example.org" |
| 69 | + } |
| 70 | + ] |
| 71 | + } |
| 72 | + } |
| 73 | +] |
0 commit comments