Skip to content

Commit f200547

Browse files
committed
fix: support nlp-less components
1 parent 7e0ceeb commit f200547

File tree

8 files changed

+140
-30
lines changed

8 files changed

+140
-30
lines changed

docs/concepts/pipeline.md

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,12 +60,42 @@ To create your first EDS-NLP pipeline, run the following code. We provide severa
6060
nlp.add_pipe("eds.negation")
6161
```
6262

63-
=== "From a config file"
63+
=== "From a YAML config file"
64+
65+
You can also create a pipeline from a configuration file. This is useful when you plan on changing the pipeline configuration often.
66+
67+
```{ .yaml title="config.yml" }
68+
nlp:
69+
"@core": pipeline
70+
lang: eds
71+
components:
72+
sentences:
73+
"@factory": eds.sentences
74+
75+
matcher:
76+
"@factory": eds.matcher
77+
regex:
78+
smoker: ["fume", "clope"]
79+
80+
negation:
81+
"@factory": eds.negation
82+
```
83+
84+
and then load the pipeline with:
85+
86+
```{ .python .no-check }
87+
import edsnlp
88+
89+
nlp = edsnlp.load("config.yml")
90+
```
91+
92+
=== "From a INI config file"
6493

6594
You can also create a pipeline from a configuration file. This is useful when you plan on changing the pipeline configuration often.
6695

6796
```{ .cfg title="config.cfg" }
6897
[nlp]
98+
@core = "pipeline"
6999
lang = "eds"
70100
pipeline = ["sentences", "matcher", "negation"]
71101

@@ -100,7 +130,7 @@ from pathlib import Path
100130
nlp("Le patient ne fume pas")
101131
102132
# Processing multiple documents
103-
model.pipe([text1, text2])
133+
nlp.pipe([text1, text2])
104134
```
105135

106136
For more information on how to use the pipeline, refer to the [Inference](/inference) page.

docs/scripts/clickable_snippets.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ def replace_link(match):
184184

185185
# Re-insert soups into the output
186186
for soup, start, end in reversed(soups):
187-
output = output[:start] + str(soup) + output[end:]
187+
output = output[:start] + str(soup.find("code")) + output[end:]
188188

189189
output = regex.sub(HREF_REGEX, replace_link, output)
190190

@@ -202,7 +202,7 @@ def convert_html_to_code(
202202
cls, html_content: str
203203
) -> Tuple[BeautifulSoup, str, list, list]:
204204
pre_html_content = "<pre>" + html_content + "</pre>"
205-
soup = BeautifulSoup(pre_html_content, "html5lib")
205+
soup = list(BeautifulSoup(pre_html_content, "html5lib").children)[0]
206206
code_element = soup.find("code")
207207

208208
line_lengths = [0]

docs/tutorials/make-a-training-script.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,7 @@ print(nlp.config.to_yaml_str())
395395

396396
```yaml title="config.yml"
397397
nlp:
398+
"@core": "pipeline"
398399
lang: "eds"
399400
components:
400401
ner:

edsnlp/core/pipeline.py

Lines changed: 12 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import contextlib
22
import functools
33
import importlib
4-
import inspect
54
import os
65
import re
76
import shutil
@@ -10,6 +9,7 @@
109
import sysconfig
1110
import warnings
1211
from enum import Enum
12+
from inspect import Parameter, signature
1313
from pathlib import Path
1414
from types import FunctionType
1515
from typing import (
@@ -105,7 +105,7 @@ def __init__(
105105
vocab_config: Type[BaseDefaults] = None,
106106
meta: Dict[str, Any] = None,
107107
pipeline: Optional[Sequence[str]] = None,
108-
components: Dict[str, CurriedFactory] = {},
108+
components: Dict[str, Any] = {},
109109
disable: AsList[str] = EMPTY_LIST,
110110
enable: AsList[str] = EMPTY_LIST,
111111
exclude: AsList = EMPTY_LIST,
@@ -232,17 +232,18 @@ def create_pipe(
232232
Pipe
233233
"""
234234
try:
235-
curried: CurriedFactory = Config(
235+
pipe = Config(
236236
{
237237
"@factory": factory,
238238
**(config if config is not None else {}),
239239
}
240240
).resolve(registry=registry)
241-
if name is None:
242-
name = inspect.signature(curried.factory).parameters.get("name").default
243-
if name is None or name == inspect.Parameter.empty:
244-
name = factory
245-
pipe = curried.instantiate(nlp=self, path=(name,))
241+
if isinstance(pipe, CurriedFactory):
242+
if name is None:
243+
name = signature(pipe.factory).parameters.get("name").default
244+
if name is None or name == Parameter.empty:
245+
name = factory
246+
pipe = pipe.instantiate(nlp=self, path=(name,))
246247
except ConfitValidationError as e:
247248
raise e.with_traceback(None)
248249
return pipe
@@ -413,8 +414,8 @@ def pipe(
413414
inputs: Iterable[Union[str, Doc]]
414415
The inputs to create the Docs from, or Docs directly.
415416
n_process: int
416-
Deprecated. Use the ".set(num_cpu_workers=n_process)" method on the returned
417-
data stream instead.
417+
Deprecated. Use the ".set_processing(num_cpu_workers=n_process)" method
418+
on the returned data stream instead.
418419
The number of parallel workers to use. If 0, the operations will be
419420
executed sequentially.
420421
@@ -589,16 +590,6 @@ def _add_pipes(
589590
enable: Container[str],
590591
disable: Container[str],
591592
):
592-
# Since components are actually resolved as curried factories,
593-
# we need to instantiate them here
594-
for name, component in components.items():
595-
if not isinstance(component, CurriedFactory):
596-
raise ValueError(
597-
f"Component {repr(name)} is not instantiable (got {component}). "
598-
f"Please make sure that you didn't forget to add a '@factory' "
599-
f"key to the component config."
600-
)
601-
602593
try:
603594
components = CurriedFactory.instantiate(components, nlp=self)
604595
except ConfitValidationError as e:
@@ -1215,7 +1206,7 @@ def load(
12151206
elif is_package:
12161207
# Load as package
12171208
available_kwargs = {"overrides": overrides, **pipe_selection}
1218-
signature_kwargs = inspect.signature(module.load).parameters
1209+
signature_kwargs = signature(module.load).parameters
12191210
kwargs = {
12201211
name: available_kwargs[name]
12211212
for name in signature_kwargs

edsnlp/core/registries.py

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,12 +75,38 @@ def maybe_nlp(self) -> Union["CurriedFactory", Any]:
7575
-------
7676
Union["CurriedFactory", Any]
7777
"""
78+
from edsnlp.core.pipeline import Pipeline, PipelineProtocol
79+
7880
sig = inspect.signature(self.factory)
79-
# and sig.parameters["nlp"].default is sig.empty
80-
if "nlp" not in sig.parameters or "nlp" in self.kwargs:
81+
if (
82+
not (
83+
"nlp" in sig.parameters
84+
and (
85+
sig.parameters["nlp"].default is sig.empty
86+
or sig.parameters["nlp"].annotation in (Pipeline, PipelineProtocol)
87+
)
88+
)
89+
or "nlp" in self.kwargs
90+
) and not self.search_curried_factory(self.kwargs):
8191
return self.factory(**self.kwargs)
8292
return self
8393

94+
@classmethod
95+
def search_curried_factory(cls, obj):
96+
if isinstance(obj, CurriedFactory):
97+
return obj
98+
elif isinstance(obj, dict):
99+
for value in obj.values():
100+
result = cls.search_curried_factory(value)
101+
if result is not None:
102+
return result
103+
elif isinstance(obj, (tuple, list, set)):
104+
for value in obj:
105+
result = cls.search_curried_factory(value)
106+
if result is not None:
107+
return result
108+
return None
109+
84110
def instantiate(
85111
obj: Any,
86112
nlp: "edsnlp.Pipeline",
@@ -177,6 +203,9 @@ def __getattr__(self, name):
177203
raise AttributeError(name)
178204
self._raise_curried_factory_error()
179205

206+
def __repr__(self):
207+
return f"CurriedFactory({self.factory})"
208+
180209

181210
glob = []
182211

edsnlp/data/converters.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -644,7 +644,6 @@ def __call__(self, doc):
644644
def get_dict2doc_converter(
645645
converter: Union[str, Callable], kwargs
646646
) -> Tuple[Callable, Dict]:
647-
kwargs_to_init = False
648647
if not callable(converter):
649648
available = edsnlp.registry.factory.get_available()
650649
try:
@@ -666,7 +665,7 @@ def get_dict2doc_converter(
666665
f"Cannot find converter for format {converter}. "
667666
f"Available converters are {', '.join(available)}"
668667
)
669-
if isinstance(converter, type) or kwargs_to_init:
668+
if isinstance(converter, type):
670669
return converter(**kwargs), {}
671670
return converter, validate_kwargs(converter, kwargs)
672671

tests/test_pipeline.py

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,8 @@ def test_disk_serialization(tmp_path, ml_nlp):
130130
[components.ner]
131131
@factory = "eds.ner_crf"
132132
embedding = ${components.transformer}
133-
target_span_getter = ["ents", "ner-preds"]
134133
mode = "independent"
134+
target_span_getter = ["ents", "ner-preds"]
135135
labels = ["PERSON", "GIFT"]
136136
infer_span_setter = false
137137
window = 40
@@ -254,6 +254,41 @@ def test_config_validation_error():
254254
assert "got 'error-mode'" in str(e.value)
255255

256256

257+
@edsnlp.registry.factory.register("test_wrapper", spacy_compatible=False)
258+
class WrapperComponent:
259+
def __init__(self, *, copy_list, copy_dict, sub):
260+
pass
261+
262+
263+
fail_config_sub = """
264+
nlp:
265+
lang: "eds"
266+
components:
267+
wrapper:
268+
"@factory": "test_wrapper"
269+
270+
copy_list:
271+
- ${nlp.components.wrapper.sub}
272+
273+
copy_dict:
274+
key: ${nlp.components.wrapper.sub}
275+
276+
sub:
277+
"@factory": "eds.matcher"
278+
terms: 100.0 # clearly wrong
279+
280+
matcher_copy: ${nlp.components.wrapper.sub}
281+
"""
282+
283+
284+
def test_config_sub_validation_error():
285+
with pytest.raises(ConfitValidationError):
286+
Pipeline.from_config(Config.from_yaml_str(fail_config_sub))
287+
288+
fix = {"nlp": {"components": {"wrapper": {"sub": {"terms": {"pattern": ["ok"]}}}}}}
289+
Pipeline.from_config(Config.from_yaml_str(fail_config_sub).merge(fix))
290+
291+
257292
def test_add_pipe_validation_error():
258293
model = edsnlp.blank("eds")
259294
with pytest.raises(ConfitValidationError) as e:
@@ -407,3 +442,26 @@ def test_repr(frozen_ml_nlp):
407442
"ner": eds.ner_crf
408443
})"""
409444
)
445+
446+
447+
@edsnlp.registry.factory.register("test_nlp_less", spacy_compatible=False)
448+
class NlpLessComponent:
449+
def __init__(self, nlp=None, name: str = "nlp_less", *, value: int):
450+
self.value = value
451+
self.name = name
452+
453+
def __call__(self, doc):
454+
return doc
455+
456+
457+
def test_nlp_less_component():
458+
component = NlpLessComponent(value=42)
459+
assert component.value == 42
460+
461+
config = """
462+
[component]
463+
@factory = "test_nlp_less"
464+
value = 42
465+
"""
466+
component = Config.from_str(config).resolve(registry=registry)["component"]
467+
assert component.value == 42

tests/training/qlf_config.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# 🤖 PIPELINE DEFINITION
22
nlp:
3+
"@core": pipeline
4+
35
lang: eds
46

57
components:

0 commit comments

Comments
 (0)