Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ _build/
*.tar.gz
*.tsv
*.ann
!text.ann

# Editors
.idea
Expand Down
8 changes: 8 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@

- `eds.tables` accepts a minimum_table_size (default 2) argument to reduce pollution
- `RuleBasedQualifier` now expose a `process` method that only returns qualified entities and token without actually tagging them, defering this task to the `__call__` method.
- Relation implementation in `doc.spans["<label>"][i]._.rel = [{'type':'rel_type', 'target': <span>},]`
- Relation connector with brat2docs and docs2brat in `edsnlp.connectors.brat` compatible with `edsnlp.data.read_*` and `edsnlp.data.write_*` (modified files : `edsnlp.data.converters`, `edsnlp.data.standoff`)
- Rule based relation model using proximity and/or sentence in `edsnlp.pipes.misc.relations` registered as `eds.relation`
- Documentation using Mkdocs for relations `docs.pipes.misc.relations.md` and `docs.pipes.misc.index.md`
- Tests for relations `tests.pipelines.misc.test_relations` and ressources `ressources.relations`
- `data.set_processing(...)` now expose an `autocast` parameter to disable or tweak the automatic casting of the tensor
during the processing. Autocasting should result in a slight speedup, but may lead to numerical instability.
- Use `torch.inference_mode` to disable view tracking and version counter bumps during inference.

### Fixed

Expand Down
1 change: 1 addition & 0 deletions docs/pipes/misc/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,6 @@ For instance, the date detection and normalisation pipeline falls in this catego
| `eds.sections` | Section detection |
| `eds.reason` | Rule-based hospitalisation reason detection |
| `eds.tables` | Tables detection |
| `eds.relations` | Relations extraction |

<!-- --8<-- [end:components] -->
8 changes: 8 additions & 0 deletions docs/pipes/misc/relations.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Relations {: #edsnlp.pipes.misc.relations.factory.create_component }

::: edsnlp.pipes.misc.relations.factory.create_component
options:
heading_level: 2
show_bases: false
show_source: false
only_class_level: true
164 changes: 144 additions & 20 deletions edsnlp/data/converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,9 @@ class StandoffDict2DocConverter:
span_attributes : Optional[AttributesMappingArg]
Mapping from BRAT attributes to Span extensions (can be a list too).
By default, all attributes are imported as Span extensions with the same name.
span_rel : Optional[AttributesMappingArg]
Mapping from BRAT relations to Span extensions (can be a list too).
By default, all relations are imported as Span extensions with the name rel.
keep_raw_attribute_values : bool
Whether to keep the raw attribute values (as strings) or to convert them to
Python objects (e.g. booleans).
Expand All @@ -214,6 +217,7 @@ def __init__(
tokenizer: Optional[Tokenizer] = None,
span_setter: SpanSetterArg = {"ents": True, "*": True},
span_attributes: Optional[AttributesMappingArg] = None,
span_rel: Optional[AttributesMappingArg] = None, # to keep ?
keep_raw_attribute_values: bool = False,
bool_attributes: SequenceStr = [],
default_attributes: AttributesMappingArg = {},
Expand All @@ -223,6 +227,7 @@ def __init__(
self.tokenizer = tokenizer or (nlp.tokenizer if nlp is not None else None)
self.span_setter = span_setter
self.span_attributes = span_attributes # type: ignore
self.span_rel = span_rel # to keep ?
self.keep_raw_attribute_values = keep_raw_attribute_values
self.default_attributes = default_attributes
self.notes_as_span_attribute = notes_as_span_attribute
Expand All @@ -244,12 +249,19 @@ def __call__(self, obj):
if not Span.has_extension(dst):
Span.set_extension(dst, default=None)

############## Modifications for relations ###############
dict_entities = {} ## dict for entity storage
for ent in obj.get("entities") or ():
begin = min(f["begin"] for f in ent["fragments"]) # start of the entity
end = max(f["end"] for f in ent["fragments"]) # end of the entity
dict_entities[ent["entity_id"]] = (
ent["label"] + ";" + str(begin) + ";" + str(end)
)
fragments = (
[
{
"begin": min(f["begin"] for f in ent["fragments"]),
"end": max(f["end"] for f in ent["fragments"]),
"begin": begin,
"end": end,
}
]
if not self.split_fragments
Expand All @@ -267,6 +279,11 @@ def __call__(self, obj):
if isinstance(ent["attributes"], list)
else ent["attributes"]
)
attributes = (
{a["label"]: a["value"] for a in ent["attributes"]}
if isinstance(ent["attributes"], list)
else ent["attributes"]
)
if self.notes_as_span_attribute and ent["notes"]:
ent["attributes"][self.notes_as_span_attribute] = "|".join(
note["value"] for note in ent["notes"]
Expand Down Expand Up @@ -302,6 +319,67 @@ def __call__(self, obj):
if span._.get(attr) is None:
span._.set(attr, value)

############## Modifications fo relations ###############
# add relations in spans
if self.span_rel is None and not Span.has_extension("rel"):
Span.set_extension("rel", default=[])

for rel in obj.get("relations") or (): # iterates relations
for label in doc.spans: # iterates source labels
for i, spa in enumerate(doc.spans[label]): # iterates source spans
bo = False

# relations
if dict_entities[rel["from_entity_id"]].split(";") == [
label,
str(spa.start_char),
str(spa.end_char),
]: # sif source entity is the same as the span
for label2 in doc.spans: # iiterates target labels
for j, spa2 in enumerate(
doc.spans[label2]
): # iterates target label
if dict_entities[rel["to_entity_id"]].split(";") == [
label2,
str(spa2.start_char),
str(spa2.end_char),
]: # if target entity is the same as the span
relation = {
"type": rel["relation_label"],
"target": doc.spans[label2][j],
} # create the relation
doc.spans[label][i]._.rel.append(
relation
) # add the relation to the span
bo = True
break
if bo:
break
bo = False

# inverse relations
if dict_entities[rel["to_entity_id"]].split(";") == [
label,
str(spa.start_char),
str(spa.end_char),
]:
for label2 in doc.spans:
for j, spa2 in enumerate(doc.spans[label2]):
if dict_entities[rel["from_entity_id"]].split(";") == [
label2,
str(spa2.start_char),
str(spa2.end_char),
]:
relation = {
"type": "inv_" + rel["relation_label"],
"target": doc.spans[label2][j],
}
doc.spans[label][i]._.rel.append(relation)
bo = True
break
if bo:
break

return doc


Expand Down Expand Up @@ -346,29 +424,75 @@ def __init__(

def __call__(self, doc):
spans = get_spans(doc, self.span_getter)
entities = [
{
"entity_id": i,
"fragments": [
{
"begin": ent.start_char,
"end": ent.end_char,
}
],
"attributes": {
obj_name: getattr(ent._, ext_name)
for ext_name, obj_name in self.span_attributes.items()
if ent._.has(ext_name)
},
"label": ent.label_,
}
for i, ent in enumerate(sorted(dict.fromkeys(spans)))
]

# mapping between entities and their `entity_id`
entity_map = {
(
ent["fragments"][0]["begin"],
ent["fragments"][0]["end"],
ent["label"],
): ent["entity_id"]
for ent in entities
}

# doesn't include 'inv_' relations
relations = []
relation_idx = 1
for span_label, span_list in doc.spans.items():
for spa in span_list:
source_entity_id = entity_map.get(
(spa.start_char, spa.end_char, spa.label_)
)
for rel in spa._.rel:
if not rel["type"].startswith("inv_"):
target_entity_id = entity_map.get(
(
rel["target"].start_char,
rel["target"].end_char,
rel["target"].label_,
)
)
if (
source_entity_id is not None
and target_entity_id is not None
):
relations.append(
{
"rel_id": relation_idx,
"from_entity_id": source_entity_id,
"relation_type": rel["type"],
"to_entity_id": target_entity_id,
}
)
relation_idx += 1

# final object
obj = {
FILENAME: doc._.note_id,
"doc_id": doc._.note_id,
"text": doc.text,
"entities": [
{
"entity_id": i,
"fragments": [
{
"begin": ent.start_char,
"end": ent.end_char,
}
],
"attributes": {
obj_name: getattr(ent._, ext_name)
for ext_name, obj_name in self.span_attributes.items()
if ent._.has(ext_name)
},
"label": ent.label_,
}
for i, ent in enumerate(sorted(dict.fromkeys(spans)))
],
"entities": entities,
"relations": relations,
}

return obj


Expand Down
28 changes: 14 additions & 14 deletions edsnlp/data/standoff.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,20 +264,20 @@ def dump_standoff_file(
file=f,
)
attribute_idx += 1

# fmt: off
# if "relations" in doc:
# for i, relation in enumerate(doc["relations"]):
# entity_from = entities_ids[relation["from_entity_id"]]
# entity_to = entities_ids[relation["to_entity_id"]]
# print(
# "R{}\t{} Arg1:{} Arg2:{}\t".format(
# i + 1, str(relation["label"]), entity_from,
# entity_to
# ),
# file=f,
# )
# fmt: on
# Ajout du traitement des relations
relation_idx = 1
if "relations" in doc:
for relation in doc["relations"]:
print(
"R{}\t{} Arg1:{} Arg2:{}".format(
relation_idx,
relation["relation_type"],
entities_ids[relation["from_entity_id"]],
entities_ids[relation["to_entity_id"]],
),
file=f,
)
relation_idx += 1


class StandoffReader(BaseReader):
Expand Down
1 change: 1 addition & 0 deletions edsnlp/pipes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from .misc.dates.factory import create_component as dates
from .misc.quantities.factory import create_component as quantities
from .misc.reason.factory import create_component as reason
from .misc.relations.factory import create_component as relations
from .misc.sections.factory import create_component as sections
from .misc.tables.factory import create_component as tables
from .ner.adicap.factory import create_component as adicap
Expand Down
1 change: 1 addition & 0 deletions edsnlp/pipes/misc/relations/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .relations import RelationsMatcher
17 changes: 17 additions & 0 deletions edsnlp/pipes/misc/relations/factory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from edsnlp.core import registry

from .relations import RelationsMatcher

DEFAULT_CONFIG = dict(
scheme=None,
use_sentences=False,
clean_rel=False,
proximity_method="right",
max_dist=45,
)

create_component = registry.factory.register(
"eds.relations",
assigns=["doc.spans"],
deprecated=["relations"],
)(RelationsMatcher)
17 changes: 17 additions & 0 deletions edsnlp/pipes/misc/relations/patterns.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
scheme = [
{
"source": [{"label": "Chemical_and_drugs", "attr": {"Tech": [None]}}],
"target": [
{
"label": "Temporal",
"attr": {"AttTemp": [None, "Duration", "Date", "Frequency"]},
},
{
"label": "Chemical_and_drugs",
"attr": {"Tech": ["dosage", "route", "strength", "form"]},
},
],
"type": "Depend",
"inv_type": "inv_Depend",
},
]
Loading
Loading