34
34
import re
35
35
import filecmp
36
36
from servicex_analysis_utils import file_peeking
37
+ from servicex import dataset
38
+ from servicex .python_dataset import PythonFunction
39
+ from servicex .dataset_identifier import (
40
+ RucioDatasetIdentifier ,
41
+ FileListDataset ,
42
+ CERNOpenDataDatasetIdentifier ,
43
+ XRootDDatasetIdentifier ,
44
+ )
37
45
38
46
39
47
@pytest .fixture
@@ -146,14 +154,10 @@ def test_spec_builder():
146
154
assert isinstance (first_entry ["NFiles" ], int ), "'NFiles' should be an integer"
147
155
assert isinstance (first_entry ["Name" ], str ), "'Name' should be a string"
148
156
149
- from servicex .dataset_identifier import RucioDatasetIdentifier
150
-
151
157
assert isinstance (
152
158
first_entry ["Dataset" ], RucioDatasetIdentifier
153
159
), "'Dataset' should be a RucioDatasetIdentifier"
154
160
155
- from servicex .python_dataset import PythonFunction
156
-
157
161
assert isinstance (
158
162
first_entry ["Query" ], PythonFunction
159
163
), "'Query' should be a PythonFunction"
@@ -173,15 +177,65 @@ def test_spec_builder():
173
177
174
178
# wrong input type
175
179
wrong_did = 1234
180
+ expected_msg = (
181
+ f"Unsupported dataset input type: { type (wrong_did )} .\n "
182
+ "Input must be str or list of str of Rucio DIDs, "
183
+ "a DataSetIdentifier object or a dict "
184
+ "('sample_name':'dataset_id')"
185
+ )
186
+
176
187
with pytest .raises (
177
188
ValueError ,
178
- match = re .escape (
179
- f"Unsupported dataset input type: { type (wrong_did )} .\n Input must be dict ('sample_name':'dataset_id'), str or list of str"
180
- ),
189
+ match = re .escape (expected_msg ),
181
190
):
182
191
file_peeking .build_deliver_spec (wrong_did )
183
192
184
193
194
+ def test_spec_builder_with_dataset_identifier ():
195
+ # Build multiple types of dataset identifiers
196
+ ds1 = dataset .Rucio ("random_space:did" )
197
+ ds2 = dataset .XRootD ("root://server/file.root" )
198
+ ds3 = dataset .CERNOpenData ("cernopendata:12345" )
199
+ ds4 = dataset .FileList (["file1.root" , "file2.root" ])
200
+
201
+ ds_list = [ds1 , ds2 , ds3 , ds4 ]
202
+ ds_types = [
203
+ RucioDatasetIdentifier ,
204
+ XRootDDatasetIdentifier ,
205
+ CERNOpenDataDatasetIdentifier ,
206
+ FileListDataset ,
207
+ ]
208
+ for did , did_type in zip (ds_list , ds_types ):
209
+ spec = file_peeking .build_deliver_spec (did )
210
+
211
+ # Check return type
212
+ assert isinstance (spec , dict ), "build_deliver_spec does not return a dict"
213
+ assert "Sample" in spec , "Key 'Sample' is missing in the returned dict"
214
+ assert isinstance (spec ["Sample" ], list ), "'Sample' should be a list"
215
+
216
+ # Get return size
217
+ size = len (spec ["Sample" ])
218
+ assert (
219
+ size == 1
220
+ ), f"Only one did given but sample item of spec is not len 1: { size } "
221
+
222
+ # Check first sample
223
+ first_entry = spec ["Sample" ][0 ]
224
+ assert isinstance (first_entry , dict ), "Each entry in 'Sample' should be a dict"
225
+
226
+ # Check each key type
227
+ assert isinstance (first_entry ["NFiles" ], int ), "'NFiles' should be an integer"
228
+ assert isinstance (first_entry ["Name" ], str ), "'Name' should be a string"
229
+
230
+ assert isinstance (
231
+ first_entry ["Query" ], PythonFunction
232
+ ), "'Query' should be a PythonFunction"
233
+
234
+ assert isinstance (
235
+ first_entry ["Dataset" ], did_type
236
+ ), f"Input Dataset identifier { did } should be a { did_type } but is { type (first_entry ['Dataset' ])} "
237
+
238
+
185
239
def test_decoding_to_array (build_test_samples , array_out = True ):
186
240
path = build_test_samples
187
241
query_output = file_peeking .run_query (path )
0 commit comments