1111from linkml_runtime .utils .schemaview import SchemaView
1212from pandas import DataFrame
1313from rdflib import BNode , Graph , Literal , Node , URIRef
14- from rdflib .namespace import RDF , XSD
15- from sssom_schema import EntityReference
14+ from rdflib .namespace import RDF , RDFS , XSD
1615from typing_extensions import override
1716
1817from .constants import (
4039EXTENSION_DEFINITION_IRI = URIRef (EXTENSION_DEFINITIONS , SSSOM_URI_PREFIX )
4140
4241
42+ class CurieConverterProvider (object ):
43+ """An interface for an object that can provide a CURIE converter.
44+
45+ We need this contraption because we have to create objects that
46+ will need to use a CURIE converter at some point, but we want to
47+ create such objects _before_ we get the converter -- because the
48+ converter to use will be specific to a given MSDF or a given RDF
49+ graph, which is not yet known at initialisation time.
50+ """
51+
52+ def get (self ) -> Converter :
53+ """Get the CURIE converter."""
54+ raise NotImplementedError
55+
56+
4357class ValueConverter (object ):
4458 """Base class for all value converters.
4559
@@ -72,32 +86,77 @@ def to_rdf(self, value: Any) -> Node:
7286 raise NotImplementedError
7387
7488
75- class StringValueConverter (ValueConverter ):
76- """Converter for string-typed slots.
89+ class BaseStringValueConverter (ValueConverter ):
90+ """Converter for all string-based slots."""
7791
78- A string-typed slot is quite naturally represented by a string
79- literal. Howver, for compatibility with the LinkML-based loader,
80- we also accept a named resource when converting from RDF.
81- """
92+ primary_type : URIRef
93+ allowed_types : Set [URIRef ]
94+
95+ def __init__ (
96+ self ,
97+ primary_type : URIRef = XSD .string ,
98+ allowed_types : Optional [List [URIRef ]] = None ,
99+ ):
100+ """Create a new instance.
101+
102+ :param primary_type: The datatype used to represent the value in
103+ RDF context, according to the SSSOM/RDF specification. A
104+ value of `rdfs:Resource` means the value is represented as
105+ a named resource rather than as a literal.
106+ :param allowed_types: Additional RDF types that are acceptable
107+ to represent the value in RDF context.
108+ """
109+ self .primary_type = primary_type
110+ if allowed_types is not None :
111+ self .allowed_types = set (allowed_types )
112+ else :
113+ self .allowed_types = set ()
82114
83115 @override
84116 def from_rdf (self , obj : Node ) -> str :
85- """Convert a RDF node into a SSSOM string value."""
86- if isinstance (obj , URIRef ):
117+ """Convert a RDF node into a string-based value."""
118+ if isinstance (obj , URIRef ) and (
119+ self .primary_type == RDFS .Resource or RDFS .Resource in self .allowed_types
120+ ):
87121 return str (obj )
88122 elif isinstance (obj , Literal ):
89- if obj .datatype is None or obj .datatype == XSD .string :
123+ # A "naked" literal is a xsd:string literal
124+ datatype = obj .datatype or XSD .string
125+ if datatype == self .primary_type or datatype in self .allowed_types :
90126 return str (obj .value )
91127
92- raise ValueError ("Invalid node type (string literal expected)" )
128+ if self .primary_type == RDFS .Resource :
129+ msg = "Invalid node type (named resource expected)"
130+ else :
131+ msg = f"Invalid node type ({ self .primary_type } literal expected)"
132+ raise ValueError (msg )
93133
94134 @override
95135 def to_rdf (self , value : str ) -> Node :
96- """Convert a SSSOM string value into a RDF node."""
97- return Literal (str (value ))
136+ """Convert a string-based value into a RDF node."""
137+ if self .primary_type == RDFS .Resource :
138+ return URIRef (value )
139+ elif self .primary_type == XSD .string :
140+ # Datatype is not needed for a xsd:string
141+ return Literal (value )
142+ else :
143+ return Literal (value , datatype = self .primary_type )
144+
145+
146+ class StringValueConverter (BaseStringValueConverter ):
147+ """Converter for string-typed slots.
148+
149+ A string-typed slot is quite naturally represented by a string
150+ literal. Howver, for compatibility with the LinkML-based loader,
151+ we also accept a named resource when converting from RDF.
152+ """
98153
154+ def __init__ (self ) -> None :
155+ """Create a new instance."""
156+ super ().__init__ (allowed_types = [RDFS .Resource ])
99157
100- class NonRelativeURIValueConverter (ValueConverter ):
158+
159+ class NonRelativeURIValueConverter (BaseStringValueConverter ):
101160 """Converter for SSSOM URI-typed slots.
102161
103162 As par the SSSOM/RDF specification, a URI-typed slot is represented
@@ -107,24 +166,12 @@ class NonRelativeURIValueConverter(ValueConverter):
107166 with the LinkML-based loader).
108167 """
109168
110- @override
111- def from_rdf (self , obj : Node ) -> str :
112- """Convert a RDF node into a SSSOM URI value."""
113- if isinstance (obj , URIRef ):
114- return str (obj )
115- elif isinstance (obj , Literal ):
116- if obj .datatype is None or (obj .datatype == XSD .string or obj .datatype == XSD .anyURI ):
117- return str (obj .value )
118-
119- raise ValueError ("Invalid node type (xsd:anyURI literal expected)" )
120-
121- @override
122- def to_rdf (self , value : str ) -> Node :
123- """Convert a SSSOM URI value into a RDF node."""
124- return URIRef (str (value ))
169+ def __init__ (self ) -> None :
170+ """Create a new instance."""
171+ super ().__init__ (primary_type = RDFS .Resource , allowed_types = [XSD .string , XSD .anyURI ])
125172
126173
127- class EntityReferenceValueConverter (ValueConverter ):
174+ class EntityReferenceValueConverter (BaseStringValueConverter ):
128175 """Converter for EntityReference-typed slots.
129176
130177 Entity references are represented as named resources in RDF, but we
@@ -136,38 +183,28 @@ class EntityReferenceValueConverter(ValueConverter):
136183 expanding them when converting to RDF.
137184 """
138185
139- prefix_manager : Converter
186+ cc_provider : CurieConverterProvider
140187
141- def __init__ (self , prefix_manager : Converter ):
188+ def __init__ (self , cc_provider : CurieConverterProvider ):
142189 """Create a new instance.
143190
144- :param prefix_manager: The CURIEs converter to use for
145- expanding and compressing entity references .
191+ :param cc_provider: An object that shall provide the CURIE
192+ converter to use for CURIE expansion/contraction .
146193 """
147- self .prefix_manager = prefix_manager
194+ super ().__init__ (primary_type = RDFS .Resource , allowed_types = [XSD .string ])
195+ self .cc_provider = cc_provider
148196
149197 @override
150198 def from_rdf (self , obj : Node ) -> str :
151- """Convert a RDF node into a SSSOM entity reference value."""
152- if isinstance (obj , URIRef ):
153- # Pass-through because we should probably not assume
154- # that the CURIE converter will know how to compress
155- # every single IRI found in the graph
156- return self .prefix_manager .compress (str (obj ), passthrough = True )
157- elif isinstance (obj , Literal ):
158- if obj .datatype is None or obj .datatype == XSD .string :
159- return self .prefix_manager .compress (obj .value , passthrough = True )
160-
161- raise ValueError ("Invalid node type (IRI expected)" )
199+ """Convert a RDF node into an entity reference value."""
200+ value = super ().from_rdf (obj )
201+ return self .cc_provider .get ().compress (value , passthrough = True )
162202
163203 @override
164- def to_rdf (self , value : Union [str , EntityReference ]) -> Node :
165- """Convert a SSSOM entity reference value into a RDF node."""
166- # Pass-through because even though all entity references in a
167- # MappingSetDataFrame should really be in CURIE form, it happens
168- # frequently that they are not -- including in some of our own
169- # test cases. :(
170- return URIRef (self .prefix_manager .expand (str (value ), passthrough = True ))
204+ def to_rdf (self , value : str ) -> Node :
205+ """Convert an entity reference value into a RDF node."""
206+ value = self .cc_provider .get ().expand (value , passthrough = True )
207+ return super ().to_rdf (value )
171208
172209
173210class DateValueConverter (ValueConverter ):
@@ -236,8 +273,8 @@ class EnumValueConverter(ValueConverter):
236273 def __init__ (self , schema : SchemaView , name : str ):
237274 """Create a new instance.
238275
239- :param enum : The class that implements the enum type to convert
240- to and from .
276+ :param schema : The SSSOM LinkML schema.
277+ :param name: The name of the enum type .
241278 """
242279 self .values_by_iri = {}
243280 self .uris_by_value = {}
@@ -302,33 +339,38 @@ def __init__(self) -> None:
302339 }
303340
304341 def create (
305- self , range_name : str , schema : SchemaView , curie_converter : Converter
306- ) -> ValueConverter :
342+ self , range_name : str , schema : SchemaView , cc_provider : CurieConverterProvider
343+ ) -> Optional [ ValueConverter ] :
307344 """Create a new value converter.
308345
309346 :param range_name: The range for which a value converter is
310347 wanted.
311348 :param schema: The SSSOM LinkML schema.
312- :param curie_converter : The CURIE converter to use, for the
313- converters that need one.
349+ :param cc_provider : The object that will provide the CURIE
350+ converter to use, for the value converters that need one.
314351
315352 :returns: A suitable value converter for the range.
316353 """
317- ctor = self .constructors .get (range_name )
318- if ctor is not None :
319- if ctor == EntityReferenceValueConverter :
320- return EntityReferenceValueConverter (curie_converter )
321- else :
322- return ctor ()
323- elif range_name .endswith ("_enum" ):
354+ if schema .get_class (range_name ) is not None :
355+ # This range is for objects, not scalar values
356+ return None
357+
358+ if range_name .endswith ("_enum" ):
324359 return EnumValueConverter (schema , range_name )
360+
361+ ctor = self .constructors .get (range_name )
362+ if ctor == EntityReferenceValueConverter :
363+ # CURIE provider needed
364+ return EntityReferenceValueConverter (cc_provider )
365+ elif ctor is not None :
366+ return ctor ()
325367 else :
326368 # This should only happen if a brand new type of slot has
327369 # been introduced in the SSSOM schema
328370 raise NotImplementedError (f"Range { range_name } is not supported" )
329371
330372
331- class ObjectConverter (object ):
373+ class ObjectConverter (CurieConverterProvider ):
332374 """Base class for conversion of SSSOM objects to and from RDF.
333375
334376 One instance of this class will handle the (de)serialisation of one
@@ -386,9 +428,9 @@ def __init__(self, class_name: str, curie_converter: Converter):
386428 self .value_converters = {}
387429 factory = ValueConverterFactory ()
388430 for rng in set (ranges ):
389- if self .schema .view . get_class ( rng ) is not None :
390- continue
391- self .value_converters [rng ] = factory . create ( rng , self . schema . view , curie_converter )
431+ vc = factory . create ( rng , self .schema .view , self )
432+ if vc is not None :
433+ self .value_converters [rng ] = vc
392434
393435 self .name = self ._fix_class_name (class_name )
394436 object_class = self .schema .view .get_class (class_name )
@@ -397,6 +439,12 @@ def __init__(self, class_name: str, curie_converter: Converter):
397439 else :
398440 self .object_uri = URIRef (self .name , SSSOM_URI_PREFIX )
399441
442+ # CurieConverterProvider implementation
443+ @override
444+ def get (self ) -> Converter :
445+ """Get the CURIE converter."""
446+ return self .curie_converter
447+
400448 # Methods for conversion from RDF
401449
402450 def dict_from_rdf (self , g : Graph , subject : Node ) -> Dict [str , Any ]:
0 commit comments