Skip to content

Commit f466a6a

Browse files
authored
Add more type checking (#613)
1 parent 0c88536 commit f466a6a

29 files changed

+255
-201
lines changed

src/sssom/cli.py

Lines changed: 43 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,15 @@
1616
import sys
1717
from operator import itemgetter
1818
from pathlib import Path
19-
from typing import Any, Callable, List, Optional, TextIO, Tuple, get_args
19+
from typing import Any, Callable, List, Optional, TextIO, Tuple, TypeVar, get_args
2020

2121
import click
2222
import curies
2323
import pandas as pd
2424
import yaml
2525
from curies import Converter
26-
from rdflib import Graph
26+
from rdflib import Graph, URIRef
27+
from typing_extensions import ParamSpec
2728

2829
from sssom.constants import (
2930
DEFAULT_VALIDATION_TYPES,
@@ -73,7 +74,7 @@
7374
"-I",
7475
"--input-format",
7576
help="The string denoting the input format.",
76-
type=click.Choice(PARSING_FUNCTIONS),
77+
type=click.Choice(list(PARSING_FUNCTIONS)),
7778
)
7879
output_option = click.option(
7980
"-o",
@@ -86,7 +87,7 @@
8687
"-O",
8788
"--output-format",
8889
help="Desired output format.",
89-
type=click.Choice(WRITER_FUNCTIONS),
90+
type=click.Choice(list(WRITER_FUNCTIONS)),
9091
)
9192
output_directory_option = click.option(
9293
"-d",
@@ -123,7 +124,7 @@
123124
@click.option("-v", "--verbose", count=True)
124125
@click.option("-q", "--quiet")
125126
@click.version_option(__version__)
126-
def main(verbose: int, quiet: bool):
127+
def main(verbose: int, quiet: bool) -> None:
127128
"""Run the SSSOM CLI."""
128129
logger = _logging.getLogger()
129130

@@ -142,7 +143,7 @@ def main(verbose: int, quiet: bool):
142143
@main.command()
143144
@click.argument("subcommand")
144145
@click.pass_context
145-
def help(ctx, subcommand):
146+
def help(ctx: click.Context, subcommand: str) -> None:
146147
"""Echoes help for subcommands."""
147148
subcommand_obj = main.get_command(ctx, subcommand)
148149
if subcommand_obj is None:
@@ -155,7 +156,7 @@ def help(ctx, subcommand):
155156
@input_argument
156157
@output_option
157158
@output_format_option
158-
def convert(input: str, output: TextIO, output_format: str):
159+
def convert(input: str, output: TextIO, output_format: str) -> None:
159160
"""Convert a file.
160161
161162
Example:
@@ -214,7 +215,7 @@ def parse(
214215
output: TextIO,
215216
embedded_mode: bool,
216217
mapping_predicate_filter: Optional[tuple],
217-
):
218+
) -> None:
218219
"""Parse a file in one of the supported formats (such as obographs) into an SSSOM TSV file."""
219220
parse_file(
220221
input_path=input,
@@ -234,7 +235,7 @@ def parse(
234235
@click.option(
235236
"--validation-types",
236237
"-V",
237-
type=click.Choice(SchemaValidationType),
238+
type=click.Choice(list(SchemaValidationType)),
238239
multiple=True,
239240
default=DEFAULT_VALIDATION_TYPES,
240241
)
@@ -247,7 +248,7 @@ def validate(input: str, validation_types: List[SchemaValidationType]):
247248
@main.command()
248249
@input_argument
249250
@output_directory_option
250-
def split(input: str, output_directory: str):
251+
def split(input: str, output_directory: str) -> None:
251252
"""Split input file into multiple output broken down by prefixes."""
252253
split_file(input_path=input, output_directory=output_directory)
253254

@@ -261,7 +262,7 @@ def split(input: str, output_directory: str):
261262
type=click.FloatRange(0, 1),
262263
help="Default confidence to be assigned if absent.",
263264
)
264-
def ptable(input, output: TextIO, inverse_factor: float, default_confidence: float):
265+
def ptable(input, output: TextIO, inverse_factor: float, default_confidence: float) -> None:
265266
"""Convert an SSSOM file to a ptable for kboom/`boomer <https://github.com/INCATools/boomer>`_."""
266267
# TODO should maybe move to boomer (but for now it can live here, so cjm can tweak
267268
msdf = parse_sssom_table(input)
@@ -275,7 +276,7 @@ def ptable(input, output: TextIO, inverse_factor: float, default_confidence: flo
275276
@main.command()
276277
@input_argument
277278
@output_option
278-
def dedupe(input: str, output: TextIO):
279+
def dedupe(input: str, output: TextIO) -> None:
279280
"""Remove lower confidence duplicate lines from an SSSOM file."""
280281
# df = parse(input)
281282
msdf = parse_sssom_table(input)
@@ -291,7 +292,7 @@ def dedupe(input: str, output: TextIO):
291292
@click.option("-Q", "--query", help='SQL query. Use "df" as table name.')
292293
@click.argument("inputs", nargs=-1)
293294
@output_option
294-
def dosql(query: str, inputs: List[str], output: TextIO):
295+
def dosql(query: str, inputs: List[str], output: TextIO) -> None:
295296
"""Run a SQL query over one or more SSSOM files.
296297
297298
Each of the N inputs is assigned a table name df1, df2, ..., dfN
@@ -353,7 +354,7 @@ def sparql(
353354
object_labels: bool,
354355
prefix: List[Tuple[str, str]],
355356
output: TextIO,
356-
):
357+
) -> None:
357358
"""Run a SPARQL query."""
358359
# FIXME this usage needs _serious_ refactoring
359360
endpoint = EndpointConfig(converter=Converter.from_prefix_map(dict(prefix))) # type: ignore
@@ -363,7 +364,7 @@ def sparql(
363364
if url is not None:
364365
endpoint.url = url
365366
if graph is not None:
366-
endpoint.graph = graph
367+
endpoint.graph = URIRef(graph)
367368
if limit is not None:
368369
endpoint.limit = limit
369370
if object_labels is not None:
@@ -376,7 +377,7 @@ def sparql(
376377
@main.command()
377378
@output_option
378379
@click.argument("inputs", nargs=2)
379-
def diff(inputs: Tuple[str, str], output: TextIO):
380+
def diff(inputs: Tuple[str, str], output: TextIO) -> None:
380381
"""Compare two SSSOM files.
381382
382383
The output is a new SSSOM file with the union of all mappings, and
@@ -398,7 +399,7 @@ def diff(inputs: Tuple[str, str], output: TextIO):
398399
)
399400

400401
prefix_map_list = [msdf1, msdf2]
401-
converter = curies.chain(m.converter for m in prefix_map_list)
402+
converter = curies.chain([m.converter for m in prefix_map_list])
402403
msdf = MappingSetDataFrame.with_converter(
403404
df=d.combined_dataframe.drop_duplicates(), converter=converter
404405
)
@@ -413,7 +414,7 @@ def diff(inputs: Tuple[str, str], output: TextIO):
413414
@main.command()
414415
@output_directory_option
415416
@click.argument("inputs", nargs=-1)
416-
def partition(inputs: List[str], output_directory: str):
417+
def partition(inputs: List[str], output_directory: str) -> None:
417418
"""Partition an SSSOM into one file for each strongly connected component."""
418419
docs = [parse_sssom_table(input) for input in inputs]
419420
doc = docs.pop()
@@ -443,7 +444,7 @@ def partition(inputs: List[str], output_directory: str):
443444
@output_option
444445
@metadata_option
445446
@click.option("-s", "--statsfile")
446-
def cliquesummary(input: str, output: TextIO, metadata: str, statsfile: str):
447+
def cliquesummary(input: str, output: TextIO, metadata: str, statsfile: str) -> None:
447448
"""Calculate summaries for each clique in a SSSOM file."""
448449
if metadata is None:
449450
doc = parse_sssom_table(input)
@@ -470,7 +471,7 @@ def cliquesummary(input: str, output: TextIO, metadata: str, statsfile: str):
470471
@output_option
471472
@transpose_option
472473
@fields_option
473-
def crosstab(input: str, output: TextIO, transpose: bool, fields: Tuple[str, str]):
474+
def crosstab(input: str, output: TextIO, transpose: bool, fields: Tuple[str, str]) -> None:
474475
"""Write sssom summary cross-tabulated by categories."""
475476
df = remove_unmatched(parse_sssom_table(input).df)
476477
logging.info(f"#CROSSTAB ON {fields}")
@@ -486,7 +487,7 @@ def crosstab(input: str, output: TextIO, transpose: bool, fields: Tuple[str, str
486487
@transpose_option
487488
@fields_option
488489
@input_argument
489-
def correlations(input: str, output: TextIO, transpose: bool, fields: Tuple[str, str]):
490+
def correlations(input: str, output: TextIO, transpose: bool, fields: Tuple[str, str]) -> None:
490491
"""Calculate correlations."""
491492
try:
492493
from scipy.stats import chi2_contingency
@@ -534,7 +535,7 @@ def correlations(input: str, output: TextIO, transpose: bool, fields: Tuple[str,
534535
help="Boolean indicating the need for reconciliation of the SSSOM tsv file.",
535536
)
536537
@output_option
537-
def merge(inputs: str, output: TextIO, reconcile: bool = False):
538+
def merge(inputs: str, output: TextIO, reconcile: bool = False) -> None:
538539
"""Merge multiple MappingSetDataFrames into one .
539540
540541
if reconcile=True, then dedupe(remove redundant lower confidence mappings) and
@@ -559,13 +560,13 @@ def merge(inputs: str, output: TextIO, reconcile: bool = False):
559560
)
560561
@output_option
561562
def rewire(
562-
input,
563-
mapping_file,
564-
precedence,
563+
input: str,
564+
mapping_file: str,
565+
precedence: list[str],
565566
output: TextIO,
566-
input_format,
567-
output_format,
568-
):
567+
input_format: str,
568+
output_format: str,
569+
) -> None:
569570
"""Rewire an ontology using equivalent classes/properties from a mapping file.
570571
571572
Example:
@@ -589,7 +590,7 @@ def rewire(
589590
help="Provide YAML file with prefix reconciliation information.",
590591
)
591592
@output_option
592-
def reconcile_prefixes(input: str, reconcile_prefix_file: Path, output: TextIO):
593+
def reconcile_prefixes(input: str, reconcile_prefix_file: Path, output: TextIO) -> None:
593594
"""
594595
Reconcile prefix_map based on provided YAML file.
595596
@@ -619,7 +620,7 @@ def reconcile_prefixes(input: str, reconcile_prefix_file: Path, output: TextIO):
619620
default=True,
620621
help="Sort rows by DataFrame column #1 (ascending).",
621622
)
622-
def sort(input: str, output: TextIO, by_columns: bool, by_rows: bool):
623+
def sort(input: str, output: TextIO, by_columns: bool, by_rows: bool) -> None:
623624
"""
624625
Sort DataFrame columns canonically.
625626
@@ -660,14 +661,20 @@ def sort(input: str, output: TextIO, by_columns: bool, by_rows: bool):
660661
# write_table(msdf=filtered_msdf, file=output)
661662

662663

663-
def dynamically_generate_sssom_options(options) -> Callable[[Any], Any]:
664+
P = ParamSpec("P")
665+
T = TypeVar("T")
666+
667+
668+
def dynamically_generate_sssom_options(
669+
options: List[str],
670+
) -> Callable[[Callable[P, T]], Callable[P, T]]:
664671
"""Dynamically generate click options.
665672
666673
:param options: List of all possible options.
667674
:return: Click options deduced from user input into parameters.
668675
"""
669676

670-
def _decorator(f):
677+
def _decorator(f: Callable[P, T]) -> Callable[P, T]:
671678
for sssom_slot in reversed(options):
672679
click.option("--" + sssom_slot, multiple=True)(f)
673680
return f
@@ -679,7 +686,7 @@ def _decorator(f):
679686
@input_argument
680687
@output_option
681688
@dynamically_generate_sssom_options(SSSOM_SV_OBJECT.mapping_slots)
682-
def filter(input: str, output: TextIO, **kwargs):
689+
def filter(input: str, output: TextIO, **kwargs: Any) -> None:
683690
"""Filter a dataframe by dynamically generating queries based on user input.
684691
685692
e.g. sssom filter --subject_id x:% --subject_id y:% --object_id y:% --object_id z:% tests/data/basic.tsv
@@ -715,7 +722,7 @@ def filter(input: str, output: TextIO, **kwargs):
715722
help="Multivalued slots should be replaced or not. [default: False]",
716723
)
717724
@dynamically_generate_sssom_options(SSSOM_SV_OBJECT.mapping_set_slots)
718-
def annotate(input: str, output: TextIO, replace_multivalued: bool, **kwargs):
725+
def annotate(input: str, output: TextIO, replace_multivalued: bool, **kwargs: Any) -> None:
719726
"""Annotate metadata of a mapping set.
720727
721728
:param input: Input path of the SSSOM tsv file.
@@ -736,7 +743,7 @@ def annotate(input: str, output: TextIO, replace_multivalued: bool, **kwargs):
736743
help="Mapping file path that needs to be removed from input.",
737744
)
738745
@output_option
739-
def remove(input: str, output: TextIO, remove_map: str):
746+
def remove(input: str, output: TextIO, remove_map: str) -> None:
740747
"""Remove mappings from an input mapping.
741748
742749
:param input: Input SSSOM tsv file.
@@ -778,7 +785,7 @@ def invert(
778785
merge_inverted: bool,
779786
update_justification: bool,
780787
inverse_map: TextIO,
781-
):
788+
) -> None:
782789
"""
783790
Invert subject and object IDs such that all subjects have the prefix provided.
784791

src/sssom/cliques.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ def group_values(d: Dict[str, str]) -> Dict[str, List[str]]:
126126
return dict(rv)
127127

128128

129-
def get_src(src: Optional[str], curie: str):
129+
def get_src(src: Optional[str], curie: str) -> str:
130130
"""Get prefix of subject/object in the MappingSetDataFrame.
131131
132132
:param src: Source
@@ -139,7 +139,7 @@ def get_src(src: Optional[str], curie: str):
139139
return src
140140

141141

142-
def summarize_cliques(doc: MappingSetDataFrame):
142+
def summarize_cliques(doc: MappingSetDataFrame) -> pd.DataFrame:
143143
"""Summarize stats on a clique doc."""
144144
cliquedocs = split_into_cliques(doc)
145145
items = []

src/sssom/constants.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
"""Constants."""
22

3+
from __future__ import annotations
4+
35
import pathlib
46
import uuid
57
from enum import Enum
68
from functools import cached_property, lru_cache
7-
from typing import Any, Dict, List, Literal, Set, TextIO, Union
9+
from typing import Any, ClassVar, Dict, List, Literal, Mapping, Set, TextIO, Union, cast
810

911
import importlib_resources
1012
import yaml
@@ -155,7 +157,7 @@
155157

156158
PREDICATE_INVERT_DICTIONARY = inverse_map["inverse_predicate_map"]
157159

158-
COLUMN_INVERT_DICTIONARY = {
160+
COLUMN_INVERT_DICTIONARY: Mapping[str, str] = {
159161
SUBJECT_ID: OBJECT_ID,
160162
SUBJECT_LABEL: OBJECT_LABEL,
161163
SUBJECT_CATEGORY: OBJECT_CATEGORY,
@@ -207,7 +209,7 @@ class SchemaValidationType(str, Enum):
207209
StrictCurieFormat = "StrictCurieFormat"
208210

209211

210-
DEFAULT_VALIDATION_TYPES = [
212+
DEFAULT_VALIDATION_TYPES: List[SchemaValidationType] = [
211213
SchemaValidationType.JsonSchema,
212214
SchemaValidationType.PrefixMapCompleteness,
213215
SchemaValidationType.StrictCurieFormat,
@@ -222,7 +224,9 @@ class SSSOMSchemaView(object):
222224
Implemented via PR: https://github.com/mapping-commons/sssom-py/pull/323
223225
"""
224226

225-
def __new__(cls):
227+
instance: ClassVar[SSSOMSchemaView]
228+
229+
def __new__(cls) -> SSSOMSchemaView:
226230
"""Create a instance of the SSSOM schema view if non-existent."""
227231
if not hasattr(cls, "instance"):
228232
cls.instance = super(SSSOMSchemaView, cls).__new__(cls)
@@ -234,7 +238,7 @@ def view(self) -> SchemaView:
234238
return SchemaView(SCHEMA_YAML)
235239

236240
@cached_property
237-
def dict(self) -> dict:
241+
def dict(self) -> Dict[str, Any]:
238242
"""Return SchemaView as a dictionary."""
239243
return schema_as_dict(self.view.schema)
240244

@@ -246,7 +250,7 @@ def mapping_slots(self) -> List[str]:
246250
@cached_property
247251
def mapping_set_slots(self) -> List[str]:
248252
"""Return list of mapping set slots."""
249-
return self.view.get_class("mapping set").slots
253+
return cast(List[str], self.view.get_class("mapping set").slots)
250254

251255
@cached_property
252256
def multivalued_slots(self) -> Set[str]:
@@ -288,7 +292,9 @@ def propagatable_slots(self) -> List[str]:
288292
def _get_sssom_schema_object() -> SSSOMSchemaView:
289293
"""Get a view over the SSSOM schema."""
290294
sssom_sv_object = (
291-
SSSOMSchemaView.instance if hasattr(SSSOMSchemaView, "instance") else SSSOMSchemaView()
295+
SSSOMSchemaView.instance
296+
if hasattr(SSSOMSchemaView, "instance")
297+
else SSSOMSchemaView() # type:ignore[misc]
292298
)
293299
return sssom_sv_object
294300

0 commit comments

Comments
 (0)