Skip to content

Commit e2f67de

Browse files
authored
Merge pull request #567 from gouttegd/strip-trailing-tabs-in-header
Strip trailing tabs in embedded YAML header.
2 parents 02d6005 + 97b0039 commit e2f67de

File tree

3 files changed

+37
-1
lines changed

3 files changed

+37
-1
lines changed

src/sssom/parsers.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,11 @@ def _separate_metadata_and_table_from_stream(s: io.StringIO):
121121
if header_section:
122122
header_section = False
123123
elif header_section:
124-
metadata_component.write(line)
124+
# We strip any trailing tabs. Such tabs may have been left
125+
# by a spreadsheet editor who treated the header lines as
126+
# if they were normal data lines; they would prevent the
127+
# YAML parser from correctly parsing the metadata block.
128+
metadata_component.write(line.rstrip("\t\n") + "\n")
125129
else:
126130
logging.info(
127131
f"Line {line} is starting with hash symbol, but header section is already passed. "

tests/data/trailing-tabs.sssom.tsv

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#curie_map:
2+
# COMENT: https://example.com/entities/
3+
# COMPID: https://example.com/people/
4+
# ORGENT: https://example.org/entities/
5+
# ORGPID: https://example.org/people/
6+
#mapping_set_id: https://example.org/sets/exo2c
7+
#mapping_set_title: O2C set
8+
#creator_id:
9+
# - ORGPID:0000-0000-0001-1234
10+
# - COMPID:0000-0000-0002-5678
11+
#license: https://creativecommons.org/licenses/by/4.0/
12+
#publication_date: 2023-09-13
13+
subject_id subject_label predicate_id object_id object_label mapping_justification
14+
ORGENT:0001 alice skos:closeMatch COMENT:0011 alpha semapv:ManualMappingCuration
15+
ORGENT:0002 bob skos:closeMatch COMENT:0012 beta semapv:ManualMappingCuration
16+
ORGENT:0004 daphne skos:closeMatch COMENT:0014 delta semapv:ManualMappingCuration
17+
ORGENT:0005 eve skos:closeMatch COMENT:0015 epsilon semapv:ManualMappingCuration
18+
ORGENT:0006 fanny skos:closeMatch COMENT:0016 zeta semapv:ManualMappingCuration
19+
ORGENT:0007 gavin skos:exactMatch COMENT:0013 gamma semapv:ManualMappingCuration
20+
ORGENT:0008 hector skos:closeMatch COMENT:0017 eta semapv:ManualMappingCuration
21+
ORGENT:0009 ivan skos:exactMatch COMENT:0019 iota semapv:ManualMappingCuration

tests/test_parsers.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,17 @@ def test_parse_obographs_merged(self):
339339
msdf = parse_sssom_table(outfile)
340340
self.assertTrue(custom_curie_map.items() <= msdf.prefix_map.items())
341341

342+
def test_parse_trailing_tabs_in_metadata_header(self):
343+
"""Test parsing a file containing trailing tabs in header."""
344+
input_path = f"{test_data_dir}/trailing-tabs.sssom.tsv"
345+
msdf = parse_sssom_table(input_path)
346+
self.assertEqual(msdf.metadata["mapping_set_id"], "https://example.org/sets/exo2c")
347+
self.assertEqual(
348+
len(msdf.df),
349+
8,
350+
f"{input_path} has the wrong number of mappings.",
351+
)
352+
342353

343354
class TestParseExplicit(unittest.TestCase):
344355
"""This test case contains explicit tests for parsing."""

0 commit comments

Comments
 (0)