Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@ servicex.yaml

#Testing
samples_structure.txt

tmp_test.py
36 changes: 32 additions & 4 deletions servicex_analysis_utils/file_peeking.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@
from servicex.dataset_identifier import DataSetIdentifier


def run_query(input_filenames):
def run_query(
input_filenames,
):
import uproot
import awkward as ak
import json
Expand All @@ -59,13 +61,24 @@ def is_tree(obj):
tree_dict = {}

with uproot.open(input_filenames) as file:

for tree_name in file.keys():
tree_name_clean = tree_name.rstrip(";1")
tree = file[tree_name]

if not is_tree(tree):
continue

if tree_name_clean == "MetaData":
fm_branches = [
b for b in tree.keys() if b.startswith("FileMetaDataAuxDyn.")
]
# remove the prefix in keys
meta_dict = {
p[19:]: str(tree[p].array(library="ak")[0]) for p in fm_branches
}
tree_dict["FileMetaData"] = meta_dict

branch_dict = {}
for branch_name, branch in tree.items():
branch_type = str(branch.interpretation)
Expand Down Expand Up @@ -174,9 +187,6 @@ def print_structure_from_str(
import json

output_lines = []
output_lines.append(
f"\nFile structure of all samples with branch filter '{filter_branch}':"
)

for sample_name, path in deliver_dict.items():
structure_str = open_delivered_file(sample_name, path)
Expand All @@ -191,6 +201,22 @@ def print_structure_from_str(
f"---------------------------"
)

# Get the metadata first
output_lines.append(f"\nFile Metadata \u2139\ufe0f :\n")
if "FileMetaData" not in structure_dict:
output_lines.append("No FileMetaData found in dataset.")
else:
for key, value in structure_dict.get("FileMetaData", {}).items():
output_lines.append(f"── {key}: {value}")
output_lines.append("\n---------------------------")

# drop the File metadata from the trees
structure_dict.pop("FileMetaData", {})

output_lines.append(
f"\nFile structure with branch filter \U0001f33f '{filter_branch}':\n"
)

for tree_name, branches in structure_dict.items():
output_lines.append(f"\n\U0001f333 Tree: {tree_name}")
output_lines.append(" ├── Branches:")
Expand Down Expand Up @@ -259,6 +285,8 @@ def str_to_array(encoded_json_str):
"""
reconstructed_data = {}
structure_dict = json.loads(encoded_json_str)
# drop the File metadata from the trees
structure_dict.pop("FileMetaData", {})

for treename, branch_dict in structure_dict.items():
branches = {}
Expand Down
19 changes: 19 additions & 0 deletions tests/data/expected_metadata.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@

---------------------------
📁 Sample: test_file
---------------------------

File Metadata ℹ️ :

── test_100: 100
── test_abc: abc

---------------------------

File structure with branch filter 🌿 '':


🌳 Tree: MetaData
├── Branches:
│ ├── FileMetaDataAuxDyn.test_100 ; dtype: AsDtype('>i8')
│ ├── FileMetaDataAuxDyn.test_abc ; dtype: AsStrings()
11 changes: 9 additions & 2 deletions tests/data/expected_structure.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@

File structure of all samples with branch filter '':

---------------------------
📁 Sample: test_file
---------------------------

File Metadata ℹ️ :

No FileMetaData found in dataset.

---------------------------

File structure with branch filter 🌿 '':


🌳 Tree: background
├── Branches:
│ ├── branch1 ; dtype: AsDtype('>f8')
Expand Down
88 changes: 88 additions & 0 deletions tests/test_file_peeking_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# Copyright (c) 2025, IRIS-HEP
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import pytest
import uproot
import json
import os
from servicex_analysis_utils import file_peeking
from pathlib import Path


@pytest.fixture
def build_test_samples(tmp_path):

test_path = str(tmp_path / "test_metadata.root")
# example data for two branches
tree_data = {
"FileMetaDataAuxDyn.test_100": [100],
"FileMetaDataAuxDyn.test_abc": ["abc"],
}

# Create tmp .root files
with uproot.create(test_path) as file:
file["MetaData"] = tree_data

return test_path


# Test run_query and print_structure_from_str
def test_metadata_retrieval(build_test_samples, tmp_path, capsys):

path = build_test_samples
query_output = file_peeking.run_query(path)
# Check result
expected_result = {
"FileMetaData": {"test_100": "100", "test_abc": "abc"},
"MetaData": {
"FileMetaDataAuxDyn.test_100": "AsDtype('>i8')",
"FileMetaDataAuxDyn.test_abc": "AsStrings()",
},
}
encoded_result = json.loads(query_output[0])

assert encoded_result == expected_result

# Produce servicex.deliver() like dict
# i.e {"Sample Name":"Path"}
tree_data = {"branch": query_output}
with uproot.create(tmp_path / "encoded.root") as file:
file["servicex"] = tree_data
assert os.path.exists(
tmp_path / "encoded.root"
), f"servicex-like test file not found."
deliver_dict = {"test_file": [str(tmp_path / "encoded.root")]}

## Test str formating
output_str = file_peeking.print_structure_from_str(deliver_dict)

expected_path = Path("tests/data/expected_metadata.txt")
expected = expected_path.read_text(encoding="utf-8")

assert (
expected == output_str
), f"Output does not match expected.\n Output: {output_str}"