Skip to content

Commit 4acf200

Browse files
committed
Slightly overhaul the value converters.
Introduce a `CurieConverterProvider` interface so that we can construct all value converters even before we get the CURIE converter that the EntityReferenceValueConverter will need to convert EntityReference-typed value. This is the first step towards making it possible to create only one instance of the MappingSetRDFConverter class (maybe even making it a Singleton) that can be used to serialize/deserialize many different MSDS objects -- currently, the class can only really be used once, because it expects to get the CURIE converter at construction time, and in most cases the CURIE converter will be specific to only one MSDF or one RDF graph.
1 parent 448f364 commit 4acf200

File tree

1 file changed

+119
-71
lines changed

1 file changed

+119
-71
lines changed

src/sssom/rdf_internal.py

Lines changed: 119 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,7 @@
1111
from linkml_runtime.utils.schemaview import SchemaView
1212
from pandas import DataFrame
1313
from rdflib import BNode, Graph, Literal, Node, URIRef
14-
from rdflib.namespace import RDF, XSD
15-
from sssom_schema import EntityReference
14+
from rdflib.namespace import RDF, RDFS, XSD
1615
from typing_extensions import override
1716

1817
from .constants import (
@@ -40,6 +39,21 @@
4039
EXTENSION_DEFINITION_IRI = URIRef(EXTENSION_DEFINITIONS, SSSOM_URI_PREFIX)
4140

4241

42+
class CurieConverterProvider(object):
43+
"""An interface for an object that can provide a CURIE converter.
44+
45+
We need this contraption because we have to create objects that
46+
will need to use a CURIE converter at some point, but we want to
47+
create such objects _before_ we get the converter -- because the
48+
converter to use will be specific to a given MSDF or a given RDF
49+
graph, which is not yet known at initialisation time.
50+
"""
51+
52+
def get(self) -> Converter:
53+
"""Get the CURIE converter."""
54+
raise NotImplementedError
55+
56+
4357
class ValueConverter(object):
4458
"""Base class for all value converters.
4559
@@ -72,32 +86,77 @@ def to_rdf(self, value: Any) -> Node:
7286
raise NotImplementedError
7387

7488

75-
class StringValueConverter(ValueConverter):
76-
"""Converter for string-typed slots.
89+
class BaseStringValueConverter(ValueConverter):
90+
"""Converter for all string-based slots."""
7791

78-
A string-typed slot is quite naturally represented by a string
79-
literal. Howver, for compatibility with the LinkML-based loader,
80-
we also accept a named resource when converting from RDF.
81-
"""
92+
primary_type: URIRef
93+
allowed_types: Set[URIRef]
94+
95+
def __init__(
96+
self,
97+
primary_type: URIRef = XSD.string,
98+
allowed_types: Optional[List[URIRef]] = None,
99+
):
100+
"""Create a new instance.
101+
102+
:param primary_type: The datatype used to represent the value in
103+
RDF context, according to the SSSOM/RDF specification. A
104+
value of `rdfs:Resource` means the value is represented as
105+
a named resource rather than as a literal.
106+
:param allowed_types: Additional RDF types that are acceptable
107+
to represent the value in RDF context.
108+
"""
109+
self.primary_type = primary_type
110+
if allowed_types is not None:
111+
self.allowed_types = set(allowed_types)
112+
else:
113+
self.allowed_types = set()
82114

83115
@override
84116
def from_rdf(self, obj: Node) -> str:
85-
"""Convert a RDF node into a SSSOM string value."""
86-
if isinstance(obj, URIRef):
117+
"""Convert a RDF node into a string-based value."""
118+
if isinstance(obj, URIRef) and (
119+
self.primary_type == RDFS.Resource or RDFS.Resource in self.allowed_types
120+
):
87121
return str(obj)
88122
elif isinstance(obj, Literal):
89-
if obj.datatype is None or obj.datatype == XSD.string:
123+
# A "naked" literal is a xsd:string literal
124+
datatype = obj.datatype or XSD.string
125+
if datatype == self.primary_type or datatype in self.allowed_types:
90126
return str(obj.value)
91127

92-
raise ValueError("Invalid node type (string literal expected)")
128+
if self.primary_type == RDFS.Resource:
129+
msg = "Invalid node type (named resource expected)"
130+
else:
131+
msg = f"Invalid node type ({self.primary_type} literal expected)"
132+
raise ValueError(msg)
93133

94134
@override
95135
def to_rdf(self, value: str) -> Node:
96-
"""Convert a SSSOM string value into a RDF node."""
97-
return Literal(str(value))
136+
"""Convert a string-based value into a RDF node."""
137+
if self.primary_type == RDFS.Resource:
138+
return URIRef(value)
139+
elif self.primary_type == XSD.string:
140+
# Datatype is not needed for a xsd:string
141+
return Literal(value)
142+
else:
143+
return Literal(value, datatype=self.primary_type)
144+
145+
146+
class StringValueConverter(BaseStringValueConverter):
147+
"""Converter for string-typed slots.
148+
149+
A string-typed slot is quite naturally represented by a string
150+
literal. Howver, for compatibility with the LinkML-based loader,
151+
we also accept a named resource when converting from RDF.
152+
"""
98153

154+
def __init__(self) -> None:
155+
"""Create a new instance."""
156+
super().__init__(allowed_types=[RDFS.Resource])
99157

100-
class NonRelativeURIValueConverter(ValueConverter):
158+
159+
class NonRelativeURIValueConverter(BaseStringValueConverter):
101160
"""Converter for SSSOM URI-typed slots.
102161
103162
As par the SSSOM/RDF specification, a URI-typed slot is represented
@@ -107,24 +166,12 @@ class NonRelativeURIValueConverter(ValueConverter):
107166
with the LinkML-based loader).
108167
"""
109168

110-
@override
111-
def from_rdf(self, obj: Node) -> str:
112-
"""Convert a RDF node into a SSSOM URI value."""
113-
if isinstance(obj, URIRef):
114-
return str(obj)
115-
elif isinstance(obj, Literal):
116-
if obj.datatype is None or (obj.datatype == XSD.string or obj.datatype == XSD.anyURI):
117-
return str(obj.value)
118-
119-
raise ValueError("Invalid node type (xsd:anyURI literal expected)")
120-
121-
@override
122-
def to_rdf(self, value: str) -> Node:
123-
"""Convert a SSSOM URI value into a RDF node."""
124-
return URIRef(str(value))
169+
def __init__(self) -> None:
170+
"""Create a new instance."""
171+
super().__init__(primary_type=RDFS.Resource, allowed_types=[XSD.string, XSD.anyURI])
125172

126173

127-
class EntityReferenceValueConverter(ValueConverter):
174+
class EntityReferenceValueConverter(BaseStringValueConverter):
128175
"""Converter for EntityReference-typed slots.
129176
130177
Entity references are represented as named resources in RDF, but we
@@ -136,38 +183,28 @@ class EntityReferenceValueConverter(ValueConverter):
136183
expanding them when converting to RDF.
137184
"""
138185

139-
prefix_manager: Converter
186+
cc_provider: CurieConverterProvider
140187

141-
def __init__(self, prefix_manager: Converter):
188+
def __init__(self, cc_provider: CurieConverterProvider):
142189
"""Create a new instance.
143190
144-
:param prefix_manager: The CURIEs converter to use for
145-
expanding and compressing entity references.
191+
:param cc_provider: An object that shall provide the CURIE
192+
converter to use for CURIE expansion/contraction.
146193
"""
147-
self.prefix_manager = prefix_manager
194+
super().__init__(primary_type=RDFS.Resource, allowed_types=[XSD.string])
195+
self.cc_provider = cc_provider
148196

149197
@override
150198
def from_rdf(self, obj: Node) -> str:
151-
"""Convert a RDF node into a SSSOM entity reference value."""
152-
if isinstance(obj, URIRef):
153-
# Pass-through because we should probably not assume
154-
# that the CURIE converter will know how to compress
155-
# every single IRI found in the graph
156-
return self.prefix_manager.compress(str(obj), passthrough=True)
157-
elif isinstance(obj, Literal):
158-
if obj.datatype is None or obj.datatype == XSD.string:
159-
return self.prefix_manager.compress(obj.value, passthrough=True)
160-
161-
raise ValueError("Invalid node type (IRI expected)")
199+
"""Convert a RDF node into an entity reference value."""
200+
value = super().from_rdf(obj)
201+
return self.cc_provider.get().compress(value, passthrough=True)
162202

163203
@override
164-
def to_rdf(self, value: Union[str, EntityReference]) -> Node:
165-
"""Convert a SSSOM entity reference value into a RDF node."""
166-
# Pass-through because even though all entity references in a
167-
# MappingSetDataFrame should really be in CURIE form, it happens
168-
# frequently that they are not -- including in some of our own
169-
# test cases. :(
170-
return URIRef(self.prefix_manager.expand(str(value), passthrough=True))
204+
def to_rdf(self, value: str) -> Node:
205+
"""Convert an entity reference value into a RDF node."""
206+
value = self.cc_provider.get().expand(value, passthrough=True)
207+
return super().to_rdf(value)
171208

172209

173210
class DateValueConverter(ValueConverter):
@@ -236,8 +273,8 @@ class EnumValueConverter(ValueConverter):
236273
def __init__(self, schema: SchemaView, name: str):
237274
"""Create a new instance.
238275
239-
:param enum: The class that implements the enum type to convert
240-
to and from.
276+
:param schema: The SSSOM LinkML schema.
277+
:param name: The name of the enum type.
241278
"""
242279
self.values_by_iri = {}
243280
self.uris_by_value = {}
@@ -302,33 +339,38 @@ def __init__(self) -> None:
302339
}
303340

304341
def create(
305-
self, range_name: str, schema: SchemaView, curie_converter: Converter
306-
) -> ValueConverter:
342+
self, range_name: str, schema: SchemaView, cc_provider: CurieConverterProvider
343+
) -> Optional[ValueConverter]:
307344
"""Create a new value converter.
308345
309346
:param range_name: The range for which a value converter is
310347
wanted.
311348
:param schema: The SSSOM LinkML schema.
312-
:param curie_converter: The CURIE converter to use, for the
313-
converters that need one.
349+
:param cc_provider: The object that will provide the CURIE
350+
converter to use, for the value converters that need one.
314351
315352
:returns: A suitable value converter for the range.
316353
"""
317-
ctor = self.constructors.get(range_name)
318-
if ctor is not None:
319-
if ctor == EntityReferenceValueConverter:
320-
return EntityReferenceValueConverter(curie_converter)
321-
else:
322-
return ctor()
323-
elif range_name.endswith("_enum"):
354+
if schema.get_class(range_name) is not None:
355+
# This range is for objects, not scalar values
356+
return None
357+
358+
if range_name.endswith("_enum"):
324359
return EnumValueConverter(schema, range_name)
360+
361+
ctor = self.constructors.get(range_name)
362+
if ctor == EntityReferenceValueConverter:
363+
# CURIE provider needed
364+
return EntityReferenceValueConverter(cc_provider)
365+
elif ctor is not None:
366+
return ctor()
325367
else:
326368
# This should only happen if a brand new type of slot has
327369
# been introduced in the SSSOM schema
328370
raise NotImplementedError(f"Range {range_name} is not supported")
329371

330372

331-
class ObjectConverter(object):
373+
class ObjectConverter(CurieConverterProvider):
332374
"""Base class for conversion of SSSOM objects to and from RDF.
333375
334376
One instance of this class will handle the (de)serialisation of one
@@ -386,9 +428,9 @@ def __init__(self, class_name: str, curie_converter: Converter):
386428
self.value_converters = {}
387429
factory = ValueConverterFactory()
388430
for rng in set(ranges):
389-
if self.schema.view.get_class(rng) is not None:
390-
continue
391-
self.value_converters[rng] = factory.create(rng, self.schema.view, curie_converter)
431+
vc = factory.create(rng, self.schema.view, self)
432+
if vc is not None:
433+
self.value_converters[rng] = vc
392434

393435
self.name = self._fix_class_name(class_name)
394436
object_class = self.schema.view.get_class(class_name)
@@ -397,6 +439,12 @@ def __init__(self, class_name: str, curie_converter: Converter):
397439
else:
398440
self.object_uri = URIRef(self.name, SSSOM_URI_PREFIX)
399441

442+
# CurieConverterProvider implementation
443+
@override
444+
def get(self) -> Converter:
445+
"""Get the CURIE converter."""
446+
return self.curie_converter
447+
400448
# Methods for conversion from RDF
401449

402450
def dict_from_rdf(self, g: Graph, subject: Node) -> Dict[str, Any]:

0 commit comments

Comments
 (0)