|
4 | 4 | import json |
5 | 5 | import math |
6 | 6 | import os |
7 | | -import typing |
8 | 7 | import unittest |
9 | 8 | from collections import ChainMap |
| 9 | +from datetime import date |
10 | 10 | from pathlib import Path |
11 | 11 | from tempfile import TemporaryDirectory |
12 | 12 | from textwrap import dedent |
| 13 | +from typing import Any, Dict, Union, get_args |
13 | 14 | from xml.dom import minidom |
14 | 15 |
|
15 | 16 | import numpy as np |
|
18 | 19 | from curies import Converter |
19 | 20 | from rdflib import Graph |
20 | 21 |
|
21 | | -from sssom.constants import CURIE_MAP, get_default_metadata |
| 22 | +from sssom.constants import ( |
| 23 | + CONFIDENCE, |
| 24 | + CURIE_MAP, |
| 25 | + LICENSE, |
| 26 | + MAPPING_CARDINALITY, |
| 27 | + MAPPING_DATE, |
| 28 | + MAPPING_JUSTIFICATION, |
| 29 | + MAPPING_SET_ID, |
| 30 | + OBJECT_ID, |
| 31 | + OBJECT_LABEL, |
| 32 | + PREDICATE_ID, |
| 33 | + PREDICATE_MODIFIER, |
| 34 | + PUBLICATION_DATE, |
| 35 | + RECORD_ID, |
| 36 | + SUBJECT_ID, |
| 37 | + SUBJECT_LABEL, |
| 38 | + SUBJECT_TYPE, |
| 39 | + get_default_metadata, |
| 40 | +) |
22 | 41 | from sssom.context import SSSOM_BUILT_IN_PREFIXES, ensure_converter, get_converter |
23 | 42 | from sssom.io import parse_file |
24 | 43 | from sssom.parsers import ( |
|
29 | 48 | from_sssom_dataframe, |
30 | 49 | from_sssom_json, |
31 | 50 | from_sssom_rdf, |
| 51 | + parse_sssom_rdf, |
32 | 52 | parse_sssom_table, |
33 | 53 | split_dataframe_by_prefix, |
34 | 54 | ) |
|
38 | 58 | from tests.constants import test_out_dir |
39 | 59 |
|
40 | 60 |
|
| 61 | +def assert_dict_contains( |
| 62 | + test: unittest.TestCase, |
| 63 | + expected_values: Dict[str, Any], |
| 64 | + actual_values: Union[Dict[str, Any], pd.Series], |
| 65 | +) -> None: |
| 66 | + """Check that a dictionary contains expected values.""" |
| 67 | + for k, v in expected_values.items(): |
| 68 | + if isinstance(v, float): |
| 69 | + test.assertTrue(np.isclose(v, actual_values[k])) |
| 70 | + else: |
| 71 | + test.assertEqual(v, actual_values[k]) |
| 72 | + |
| 73 | + |
41 | 74 | class TestParse(unittest.TestCase): |
42 | 75 | """A test case for parser functionality.""" |
43 | 76 |
|
@@ -277,6 +310,38 @@ def test_parse_sssom_rdf(self) -> None: |
277 | 310 | f"{self.rdf_graph_file} has the wrong number of mappings.", |
278 | 311 | ) |
279 | 312 |
|
| 313 | + def test_parse_standard_and_old_style_rdf(self) -> None: |
| 314 | + """Test parsing a RDF conforming to pre-standard and standard SSSOM/RDF serialisation.""" |
| 315 | + msdf = parse_sssom_rdf(f"{test_data_dir}/standard-rdf.ttl") |
| 316 | + self.assertEqual(msdf.metadata[MAPPING_SET_ID], "https://example.org/sets/standard-rdf") |
| 317 | + self.assertEqual(msdf.metadata[LICENSE], "https://creativecommons.org/licenses/by/4.0/") |
| 318 | + self.assertEqual(msdf.metadata[PUBLICATION_DATE], date(2025, 10, 28)) |
| 319 | + expected_values = { |
| 320 | + SUBJECT_ID: "ORGENT:0001", |
| 321 | + OBJECT_ID: "COMENT:0011", |
| 322 | + PREDICATE_ID: "skos:closeMatch", |
| 323 | + MAPPING_JUSTIFICATION: "semapv:ManualMappingCuration", |
| 324 | + SUBJECT_LABEL: "alice", |
| 325 | + OBJECT_LABEL: "alpha", |
| 326 | + SUBJECT_TYPE: "owl class", |
| 327 | + MAPPING_CARDINALITY: "1:1", |
| 328 | + PREDICATE_MODIFIER: "Not", |
| 329 | + CONFIDENCE: 0.7, |
| 330 | + MAPPING_DATE: date(2025, 10, 27), |
| 331 | + } |
| 332 | + assert_dict_contains(self, expected_values, msdf.df.loc[0]) |
| 333 | + |
| 334 | + msdf = parse_sssom_rdf(f"{test_data_dir}/pre-standard-rdf.ttl") |
| 335 | + self.assertEqual(msdf.metadata[MAPPING_SET_ID], "https://example.org/sets/standard-rdf") |
| 336 | + self.assertEqual(msdf.metadata[LICENSE], "https://creativecommons.org/licenses/by/4.0/") |
| 337 | + self.assertEqual(msdf.metadata[PUBLICATION_DATE], date(2025, 10, 28)) |
| 338 | + assert_dict_contains(self, expected_values, msdf.df.loc[0]) |
| 339 | + |
| 340 | + def test_parse_rdf_with_record_id(self) -> None: |
| 341 | + """Test parsing a RDF file containing record IDs as named resources.""" |
| 342 | + msdf = parse_sssom_rdf(f"{test_data_dir}/record-id-as-node.ttl") |
| 343 | + self.assertEqual("mymaps:0001", msdf.df.loc[0][RECORD_ID]) |
| 344 | + |
280 | 345 | def test_parse_sssom_json(self) -> None: |
281 | 346 | """Test parsing JSON.""" |
282 | 347 | msdf = from_sssom_json( |
@@ -576,7 +641,7 @@ def test_split_df(self) -> None: |
576 | 641 |
|
577 | 642 | sdf = pd.DataFrame(subrows, columns=columns) |
578 | 643 |
|
579 | | - for method in [None, *typing.get_args(SplitMethod)]: |
| 644 | + for method in [None, *get_args(SplitMethod)]: |
580 | 645 | with self.subTest(method=method): |
581 | 646 | self.assert_msdf(msdf, sdf, method) |
582 | 647 |
|
|
0 commit comments