@@ -169,21 +169,25 @@ def no_uuid(record_identifier):
169169 # By default we assume source="gbv-ppn" mods:recordIdentifiers (= PPNs),
170170 # however, in mods:relatedItems, there may be source="dnb-ppns",
171171 # which we need to distinguish by using a separate field name.
172- try :
173- value ["recordIdentifier" ] = (
174- TagGroup (tag , group )
175- .filter (no_uuid )
176- .is_singleton ()
177- .has_attributes ({"source" : "gbv-ppn" })
178- .text ()
179- )
180- except ValueError :
181- value ["recordIdentifier-dnb-ppn" ] = (
182- TagGroup (tag , group )
183- .is_singleton ()
184- .has_attributes ({"source" : "dnb-ppn" })
185- .text ()
186- )
172+
173+ for field_name , source in \
174+ ("recordIdentifier" , "gbv-ppn" ), \
175+ ("recordIdentifier-dnb-ppn" , "dnb-ppn" ), \
176+ ("recordIdentifier-zdb" , "zdb" ):
177+ try :
178+ value [field_name ] = (
179+ TagGroup (tag , group )
180+ .filter (no_uuid )
181+ .fix_recordIdentifier_source_zdb ()
182+ .is_singleton ()
183+ .has_attributes ({"source" : source })
184+ .text ()
185+ )
186+ break
187+ except ValueError as e :
188+ pass
189+ if field_name not in value :
190+ raise ValueError ("Unknown recordIdentifier found" )
187191 elif tag == "{http://www.loc.gov/mods/v3}identifier" :
188192 for e in group :
189193 if len (e .attrib ) != 1 :
@@ -634,11 +638,18 @@ def process(mets_files: list[str], output_file: str, output_page_info: str, mets
634638 logger .exception ("Exception in {}" .format (mets_file ))
635639
636640 logger .info ("Writing DataFrame to {}" .format (output_file ))
637- try :
638- convert_db_to_parquet (con , "mods_info" , "recordInfo_recordIdentifier" , output_file )
639- except :
640- # FIXME: Fix missing mods:recordInfo instead, https://github.com/qurator-spk/mods4pandas/issues/60
641- convert_db_to_parquet (con , "mods_info" , "recordIdentifier" , output_file )
641+ considered_indexes = ("recordInfo_recordIdentifier" , "recordIdentifier-zdb" )
642+ success = False
643+ for considered_index in considered_indexes :
644+ try :
645+ convert_db_to_parquet (con , "mods_info" , considered_index , output_file )
646+ success = True
647+ break
648+ except :
649+ pass
650+ if not success :
651+ raise ValueError (f"None of { considered_indexes } found" )
652+
642653 if output_page_info :
643654 logger .info ("Writing DataFrame to {}" .format (output_page_info ))
644655 convert_db_to_parquet (
0 commit comments