Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions src/sssom/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -1712,3 +1712,68 @@ def pandas_set_no_silent_downcasting(no_silent_downcasting=True):
except KeyError:
# Option does not exist in this version of pandas
pass


#: A mapping from slots to the weight they have for calculating the FAIRness of a mapping
FAIR_WEIGHTS: dict[str, float] = {
# required
"object_id": 1.0,
"predicate_id": 1.0,
"subject_id": 1.0,
"mapping_justification": 1.0,
# Not required, but important
"license": 1.0,
"author_id": 1.0,
"creator_id": 1.0,
"reviewer_id": 1.0,
"confidence": 1.0,
#
"publication_date": 1.0,
"mapping_date": 1.0,
"issue_tracker_item": 1.0,
"curation_rule": 1.0,
"curation_rule_text": 1.0,
"similarity_measure": 1.0,
"subject_preprocessing": 1.0,
"object_category": 1.0,
"subject_source_version": 1.0,
"mapping_source": 1.0,
"subject_match_field": 1.0,
"subject_source": 1.0,
"object_source": 1.0,
"object_source_version": 1.0,
"object_preprocessing": 1.0,
"object_match_field": 1.0,
"mapping_tool": 1.0,
"mapping_tool_version": 1.0,
"subject_type": 1.0,
"similarity_score": 1.0,
"mapping_provider": 1.0,
"match_string": 1.0,
"object_type": 1.0,
"subject_category": 1.0,
# These give extra context, but are not critical
"predicate_label": 0.1,
"object_label": 0.1,
"subject_label": 0.1,
# These don't matter / are not actionable for FAIR
"comment": 0.0,
"other": 0.0,
"creator_label": 0.0,
"reviewer_label": 0.0,
"author_label": 0.0,
# These might not be relevant, so don't penalize if missing
"predicate_modifier": 0.0,
"mapping_cardinality": 0.0,
"see_also": 0.0,
}
FAIR_TOTAL_WEIGHT = sum(FAIR_WEIGHTS.values())


def calculate_fairness(mapping: SSSOM_Mapping) -> float:
"""Calculate FAIRness of a mapping."""
s: float = sum(weight for key, weight in FAIR_WEIGHTS.items() if getattr(mapping, key, None))
# Penalize for using label fields instead of ID fields

rv: float = s / FAIR_TOTAL_WEIGHT
return rv
32 changes: 32 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@
from sssom.io import extract_iris
from sssom.parsers import parse_sssom_table
from sssom.util import (
FAIR_WEIGHTS,
MappingSetDataFrame,
_get_sssom_schema_object,
calculate_fairness,
filter_out_prefixes,
filter_prefixes,
get_dict_from_mapping,
Expand Down Expand Up @@ -635,3 +638,32 @@ def test_infer_scoped_cardinality(self) -> None:
expected = ["1:n", "1:n", "1:n", "1:n", "1:n", "1:n"]
self.assertEqual(expected, list(msdf.df[MAPPING_CARDINALITY].values))
self.assertNotIn(CARDINALITY_SCOPE, msdf.df.columns)


class TestFAIRScore(unittest.TestCase):
"""Test the FAIRness score."""

def test_complete_weighting(self) -> None:
"""Test that there are weights for all fields."""
missing = set(_get_sssom_schema_object().mapping_slots).difference(FAIR_WEIGHTS)
if missing:
msg = "\n".join(missing)
self.fail(msg=f"missing weights for mapping fields: {msg}")

def test_mapping_weight(self) -> None:
"""Test calculating the weight on a mapping."""
m1 = SSSOM_Mapping(
subject_id="DOID:0050601",
predicate_id="skos:exactMatch",
object_id="UMLS:C1863204",
mapping_justification=SEMAPV.ManualMappingCuration.value,
)
m2 = SSSOM_Mapping(
subject_id="DOID:0050601",
subject_label="ADULT syndrome",
predicate_id="skos:exactMatch",
object_id="UMLS:C1863204",
object_label="ADULT syndrome",
mapping_justification=SEMAPV.ManualMappingCuration.value,
)
self.assertLess(calculate_fairness(m1), calculate_fairness(m2))
Loading