diff --git a/src/sssom/util.py b/src/sssom/util.py index 3dc20939..50a6f3a4 100644 --- a/src/sssom/util.py +++ b/src/sssom/util.py @@ -1712,3 +1712,68 @@ def pandas_set_no_silent_downcasting(no_silent_downcasting=True): except KeyError: # Option does not exist in this version of pandas pass + + +#: A mapping from slots to the weight they have for calculating the FAIRness of a mapping +FAIR_WEIGHTS: dict[str, float] = { + # required + "object_id": 1.0, + "predicate_id": 1.0, + "subject_id": 1.0, + "mapping_justification": 1.0, + # Not required, but important + "license": 1.0, + "author_id": 1.0, + "creator_id": 1.0, + "reviewer_id": 1.0, + "confidence": 1.0, + # + "publication_date": 1.0, + "mapping_date": 1.0, + "issue_tracker_item": 1.0, + "curation_rule": 1.0, + "curation_rule_text": 1.0, + "similarity_measure": 1.0, + "subject_preprocessing": 1.0, + "object_category": 1.0, + "subject_source_version": 1.0, + "mapping_source": 1.0, + "subject_match_field": 1.0, + "subject_source": 1.0, + "object_source": 1.0, + "object_source_version": 1.0, + "object_preprocessing": 1.0, + "object_match_field": 1.0, + "mapping_tool": 1.0, + "mapping_tool_version": 1.0, + "subject_type": 1.0, + "similarity_score": 1.0, + "mapping_provider": 1.0, + "match_string": 1.0, + "object_type": 1.0, + "subject_category": 1.0, + # These give extra context, but are not critical + "predicate_label": 0.1, + "object_label": 0.1, + "subject_label": 0.1, + # These don't matter / are not actionable for FAIR + "comment": 0.0, + "other": 0.0, + "creator_label": 0.0, + "reviewer_label": 0.0, + "author_label": 0.0, + # These might not be relevant, so don't penalize if missing + "predicate_modifier": 0.0, + "mapping_cardinality": 0.0, + "see_also": 0.0, +} +FAIR_TOTAL_WEIGHT = sum(FAIR_WEIGHTS.values()) + + +def calculate_fairness(mapping: SSSOM_Mapping) -> float: + """Calculate FAIRness of a mapping.""" + s: float = sum(weight for key, weight in FAIR_WEIGHTS.items() if getattr(mapping, key, None)) + # Penalize for using label fields instead of ID fields + + rv: float = s / FAIR_TOTAL_WEIGHT + return rv diff --git a/tests/test_utils.py b/tests/test_utils.py index d2da2703..03df2822 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -27,7 +27,10 @@ from sssom.io import extract_iris from sssom.parsers import parse_sssom_table from sssom.util import ( + FAIR_WEIGHTS, MappingSetDataFrame, + _get_sssom_schema_object, + calculate_fairness, filter_out_prefixes, filter_prefixes, get_dict_from_mapping, @@ -635,3 +638,32 @@ def test_infer_scoped_cardinality(self) -> None: expected = ["1:n", "1:n", "1:n", "1:n", "1:n", "1:n"] self.assertEqual(expected, list(msdf.df[MAPPING_CARDINALITY].values)) self.assertNotIn(CARDINALITY_SCOPE, msdf.df.columns) + + +class TestFAIRScore(unittest.TestCase): + """Test the FAIRness score.""" + + def test_complete_weighting(self) -> None: + """Test that there are weights for all fields.""" + missing = set(_get_sssom_schema_object().mapping_slots).difference(FAIR_WEIGHTS) + if missing: + msg = "\n".join(missing) + self.fail(msg=f"missing weights for mapping fields: {msg}") + + def test_mapping_weight(self) -> None: + """Test calculating the weight on a mapping.""" + m1 = SSSOM_Mapping( + subject_id="DOID:0050601", + predicate_id="skos:exactMatch", + object_id="UMLS:C1863204", + mapping_justification=SEMAPV.ManualMappingCuration.value, + ) + m2 = SSSOM_Mapping( + subject_id="DOID:0050601", + subject_label="ADULT syndrome", + predicate_id="skos:exactMatch", + object_id="UMLS:C1863204", + object_label="ADULT syndrome", + mapping_justification=SEMAPV.ManualMappingCuration.value, + ) + self.assertLess(calculate_fairness(m1), calculate_fairness(m2))