From f16ff3d15bfdc8fc988ae039eb15e2bc8b2f663e Mon Sep 17 00:00:00 2001 From: rafelafrance Date: Wed, 9 Oct 2024 14:22:51 -0400 Subject: [PATCH] Allow a single pattern to match multiple traits --- traiter/pylib/pipes/trait.py | 22 +++++++++++++++++----- traiter/pylib/rules/base.py | 4 ---- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/traiter/pylib/pipes/trait.py b/traiter/pylib/pipes/trait.py index b374e63..d17e282 100644 --- a/traiter/pylib/pipes/trait.py +++ b/traiter/pylib/pipes/trait.py @@ -3,7 +3,7 @@ from spacy import util from spacy.language import Language from spacy.matcher import Matcher -from spacy.tokens import Doc +from spacy.tokens import Doc, Span from traiter.pylib.pipes.reject_match import RejectMatch @@ -64,17 +64,29 @@ def __call__(self, doc: Doc) -> Doc: if ent_tokens & used_tokens: continue + traits = None if action := self.dispatch_table.get(label): try: - ent._.trait = action(ent) + traits = action(ent) except RejectMatch: continue used_tokens |= ent_tokens - self.relabel_ent(ent, label) - - entities.append(ent) + if isinstance(traits, list): + for trait in traits: + sub_ent = Span( + doc=ent.doc, + start=trait._start_token, + end=trait._end_token, + label=trait._trait, + ) + sub_ent._.trait = trait + entities.append(sub_ent) + else: + ent._.trait = traits + self.relabel_ent(ent, label) + entities.append(ent) self.add_untouched_entities(doc, entities, used_tokens) diff --git a/traiter/pylib/rules/base.py b/traiter/pylib/rules/base.py index 3086396..38126a6 100644 --- a/traiter/pylib/rules/base.py +++ b/traiter/pylib/rules/base.py @@ -23,10 +23,6 @@ def from_ent(cls, ent, **kwargs): kwargs["_text"] = ent.text return cls(**kwargs) - @classmethod - def dummy_ent(cls, **kwargs): - return cls(**kwargs) - def to_dict(self) -> dict: return {k: v for k, v in asdict(self).items() if v is not None and k[0] != "_"}