Skip to content

Commit 794bc7e

Browse files
etienneguevelpercevalw
authored andcommitted
fix: correct conversions for unitless patterns
1 parent c36f986 commit 794bc7e

File tree

3 files changed

+18
-1
lines changed

3 files changed

+18
-1
lines changed

edsnlp/pipes/misc/quantities/patterns.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1841,6 +1841,20 @@
18411841
"terms": ["gt", "goutte", "gouttes"],
18421842
"followed_by": None,
18431843
},
1844+
"µl": {
1845+
"dim": "length",
1846+
"degree": 3,
1847+
"scale": 0.001,
1848+
"terms": [
1849+
"micro litre",
1850+
"micro-litre",
1851+
"microlitre",
1852+
"microlitres",
1853+
"microlitres",
1854+
"µl",
1855+
],
1856+
"followed_by": None,
1857+
},
18441858
"ml": {
18451859
"dim": "length",
18461860
"degree": 3,

edsnlp/pipes/misc/quantities/quantities.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,9 @@ def parse_unit(self, unit: str) -> Tuple[str, float]:
116116
for part in regex.split("(?<!per)_", unit):
117117
unit_config = self.config[unicodedata.normalize("NFKC", part)]
118118
degrees[unit_config["dim"]].append(unit_config["degree"])
119-
scale *= unit_config["scale"] ** abs(unit_config["degree"])
119+
120+
# degree minimal 1 -> accompts for unitless patterns such as x10*9
121+
scale *= unit_config["scale"] ** max(1, abs(unit_config["degree"]))
120122
degrees = {
121123
k: sum(v) if len(set(v)) > 1 else v[0]
122124
for k, v in degrees.items()

tests/pipelines/misc/test_quantities.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,7 @@ def test_conversions(blank_nlp, matcher: QuantitiesMatcher):
367367
("10 l", "cm3", 10000),
368368
("10 l", "cl", 1000),
369369
("25 kg/m2", "kg_per_cm2", 0.0025),
370+
("2.4 x10*9µl", "l", 2400),
370371
]
371372

372373
for text, unit, expected in tests:

0 commit comments

Comments
 (0)