Skip to content

Commit a98a308

Browse files
committed
Add RDF parsing tests.
Add a couple of tests to check the behaviour of the new RDF parser. Also fix an issue (revealed by one of those tests) with the RECORD_ID slot, which must be compressed upon parsing since it is an EntityReference-typed slot.
1 parent 8826ed1 commit a98a308

File tree

6 files changed

+140
-4
lines changed

6 files changed

+140
-4
lines changed

src/sssom/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@
8282
PREDICATE_MODIFIER_NOT = "Not"
8383
PREDICATE_LABEL = "predicate_label"
8484
PREDICATE_TYPE = "predicate_type"
85+
PUBLICATION_DATE = "publication_date"
8586
OBJECT_ID = "object_id"
8687
OBJECT_LABEL = "object_label"
8788
OBJECT_CATEGORY = "object_category"

src/sssom/rdf_internal.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -964,7 +964,7 @@ def _init_dict_from_rdf(self, g: Graph, subject: Node, dest: Dict[str, Any]) ->
964964
# If the root node is a named resource, then it is interpreted
965965
# as the RECORD_ID for the mapping.
966966
if isinstance(subject, URIRef):
967-
dest[RECORD_ID] = str(subject)
967+
dest[RECORD_ID] = self.ccp().compress(str(subject), passthrough=True)
968968
elif not isinstance(subject, BNode):
969969
raise ValueError(f"Invalid node type for a {self.name} object")
970970

tests/data/pre-standard-rdf.ttl

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
@prefix COMENT: <https://example.com/entities/> .
2+
@prefix ORGENT: <https://example.org/entities/> .
3+
@prefix dcterms: <http://purl.org/dc/terms/> .
4+
@prefix owl: <http://www.w3.org/2002/07/owl#> .
5+
@prefix semapv: <https://w3id.org/semapv/vocab/> .
6+
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
7+
@prefix sssom: <https://w3id.org/sssom/> .
8+
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
9+
10+
[] a sssom:MappingSet;
11+
dcterms:issued "2025-10-28"^^xsd:date;
12+
dcterms:license "https://creativecommons.org/licenses/by/4.0/"^^xsd:anyURI;
13+
sssom:mapping_set_id "https://example.org/sets/standard-rdf"^^xsd:anyURI;
14+
sssom:mappings [ a owl:Axiom;
15+
dcterms:created "2025-10-27"^^xsd:date;
16+
owl:annotatedProperty skos:closeMatch;
17+
owl:annotatedSource ORGENT:0001;
18+
owl:annotatedTarget COMENT:0011;
19+
sssom:confidence 7.0E-1;
20+
sssom:mapping_cardinality "1:1";
21+
sssom:mapping_justification semapv:ManualMappingCuration;
22+
sssom:object_label "alpha";
23+
sssom:predicate_modifier "Not";
24+
sssom:subject_label "alice";
25+
sssom:subject_type "owl class"
26+
] .

tests/data/record-id-as-node.ttl

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
@prefix COMENT: <https://example.com/entities/> .
2+
@prefix ORGENT: <https://example.org/entities/> .
3+
@prefix dcterms: <http://purl.org/dc/terms/> .
4+
@prefix mymaps: <https://example.org/sets/mapping/> .
5+
@prefix owl: <http://www.w3.org/2002/07/owl#> .
6+
@prefix semapv: <https://w3id.org/semapv/vocab/> .
7+
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
8+
@prefix sssom: <https://w3id.org/sssom/> .
9+
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
10+
11+
mymaps:0001 a owl:Axiom;
12+
owl:annotatedProperty skos:closeMatch;
13+
owl:annotatedSource ORGENT:0001;
14+
owl:annotatedTarget COMENT:0011;
15+
sssom:mapping_justification semapv:ManualMappingCuration .
16+
17+
<https://example.org/sets/standard-rdf> a sssom:MappingSet;
18+
dcterms:license <https://creativecommons.org/licenses/by/4.0/>;
19+
sssom:mappings mymaps:0001 .

tests/data/standard-rdf.ttl

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
@prefix COMENT: <https://example.com/entities/> .
2+
@prefix ORGENT: <https://example.org/entities/> .
3+
@prefix dcterms: <http://purl.org/dc/terms/> .
4+
@prefix owl: <http://www.w3.org/2002/07/owl#> .
5+
@prefix semapv: <https://w3id.org/semapv/vocab/> .
6+
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
7+
@prefix sssom: <https://w3id.org/sssom/> .
8+
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
9+
10+
<https://example.org/sets/standard-rdf> a sssom:MappingSet;
11+
dcterms:issued "2025-10-28"^^xsd:date;
12+
dcterms:license <https://creativecommons.org/licenses/by/4.0/>;
13+
sssom:mappings [ a owl:Axiom;
14+
dcterms:created "2025-10-27"^^xsd:date;
15+
owl:annotatedProperty skos:closeMatch;
16+
owl:annotatedSource ORGENT:0001;
17+
owl:annotatedTarget COMENT:0011;
18+
sssom:confidence 7.0E-1;
19+
sssom:mapping_cardinality "1:1";
20+
sssom:mapping_justification semapv:ManualMappingCuration;
21+
sssom:object_label "alpha";
22+
sssom:predicate_modifier sssom:NegatedPredicate;
23+
sssom:subject_label "alice";
24+
sssom:subject_type owl:Class
25+
] .

tests/test_parsers.py

Lines changed: 68 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,13 @@
44
import json
55
import math
66
import os
7-
import typing
87
import unittest
98
from collections import ChainMap
9+
from datetime import date
1010
from pathlib import Path
1111
from tempfile import TemporaryDirectory
1212
from textwrap import dedent
13+
from typing import Any, Dict, Union, get_args
1314
from xml.dom import minidom
1415

1516
import numpy as np
@@ -18,7 +19,25 @@
1819
from curies import Converter
1920
from rdflib import Graph
2021

21-
from sssom.constants import CURIE_MAP, get_default_metadata
22+
from sssom.constants import (
23+
CONFIDENCE,
24+
CURIE_MAP,
25+
LICENSE,
26+
MAPPING_CARDINALITY,
27+
MAPPING_DATE,
28+
MAPPING_JUSTIFICATION,
29+
MAPPING_SET_ID,
30+
OBJECT_ID,
31+
OBJECT_LABEL,
32+
PREDICATE_ID,
33+
PREDICATE_MODIFIER,
34+
PUBLICATION_DATE,
35+
RECORD_ID,
36+
SUBJECT_ID,
37+
SUBJECT_LABEL,
38+
SUBJECT_TYPE,
39+
get_default_metadata,
40+
)
2241
from sssom.context import SSSOM_BUILT_IN_PREFIXES, ensure_converter, get_converter
2342
from sssom.io import parse_file
2443
from sssom.parsers import (
@@ -29,6 +48,7 @@
2948
from_sssom_dataframe,
3049
from_sssom_json,
3150
from_sssom_rdf,
51+
parse_sssom_rdf,
3252
parse_sssom_table,
3353
split_dataframe_by_prefix,
3454
)
@@ -38,6 +58,19 @@
3858
from tests.constants import test_out_dir
3959

4060

61+
def assert_dict_contains(
62+
test: unittest.TestCase,
63+
expected_values: Dict[str, Any],
64+
actual_values: Union[Dict[str, Any], pd.Series],
65+
) -> None:
66+
"""Check that a dictionary contains expected values."""
67+
for k, v in expected_values.items():
68+
if isinstance(v, float):
69+
test.assertTrue(np.isclose(v, actual_values[k]))
70+
else:
71+
test.assertEqual(v, actual_values[k])
72+
73+
4174
class TestParse(unittest.TestCase):
4275
"""A test case for parser functionality."""
4376

@@ -277,6 +310,38 @@ def test_parse_sssom_rdf(self) -> None:
277310
f"{self.rdf_graph_file} has the wrong number of mappings.",
278311
)
279312

313+
def test_parse_standard_and_old_style_rdf(self) -> None:
314+
"""Test parsing a RDF conforming to pre-standard and standard SSSOM/RDF serialisation."""
315+
msdf = parse_sssom_rdf(f"{test_data_dir}/standard-rdf.ttl")
316+
self.assertEqual(msdf.metadata[MAPPING_SET_ID], "https://example.org/sets/standard-rdf")
317+
self.assertEqual(msdf.metadata[LICENSE], "https://creativecommons.org/licenses/by/4.0/")
318+
self.assertEqual(msdf.metadata[PUBLICATION_DATE], date(2025, 10, 28))
319+
expected_values = {
320+
SUBJECT_ID: "ORGENT:0001",
321+
OBJECT_ID: "COMENT:0011",
322+
PREDICATE_ID: "skos:closeMatch",
323+
MAPPING_JUSTIFICATION: "semapv:ManualMappingCuration",
324+
SUBJECT_LABEL: "alice",
325+
OBJECT_LABEL: "alpha",
326+
SUBJECT_TYPE: "owl class",
327+
MAPPING_CARDINALITY: "1:1",
328+
PREDICATE_MODIFIER: "Not",
329+
CONFIDENCE: 0.7,
330+
MAPPING_DATE: date(2025, 10, 27),
331+
}
332+
assert_dict_contains(self, expected_values, msdf.df.loc[0])
333+
334+
msdf = parse_sssom_rdf(f"{test_data_dir}/pre-standard-rdf.ttl")
335+
self.assertEqual(msdf.metadata[MAPPING_SET_ID], "https://example.org/sets/standard-rdf")
336+
self.assertEqual(msdf.metadata[LICENSE], "https://creativecommons.org/licenses/by/4.0/")
337+
self.assertEqual(msdf.metadata[PUBLICATION_DATE], date(2025, 10, 28))
338+
assert_dict_contains(self, expected_values, msdf.df.loc[0])
339+
340+
def test_parse_rdf_with_record_id(self) -> None:
341+
"""Test parsing a RDF file containing record IDs as named resources."""
342+
msdf = parse_sssom_rdf(f"{test_data_dir}/record-id-as-node.ttl")
343+
self.assertEqual("mymaps:0001", msdf.df.loc[0][RECORD_ID])
344+
280345
def test_parse_sssom_json(self) -> None:
281346
"""Test parsing JSON."""
282347
msdf = from_sssom_json(
@@ -576,7 +641,7 @@ def test_split_df(self) -> None:
576641

577642
sdf = pd.DataFrame(subrows, columns=columns)
578643

579-
for method in [None, *typing.get_args(SplitMethod)]:
644+
for method in [None, *get_args(SplitMethod)]:
580645
with self.subTest(method=method):
581646
self.assert_msdf(msdf, sdf, method)
582647

0 commit comments

Comments
 (0)