From 48bd3e28dc0573bf13ba5152d539db5d02653997 Mon Sep 17 00:00:00 2001 From: Vincent Menger Date: Thu, 4 Jul 2024 14:47:49 +0200 Subject: [PATCH 01/14] Use titlecase in qualifier values --- scripts/mock_context_rules.json | 48 ++++++++++++++++----------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/scripts/mock_context_rules.json b/scripts/mock_context_rules.json index 2f41693..3466aa7 100644 --- a/scripts/mock_context_rules.json +++ b/scripts/mock_context_rules.json @@ -5,35 +5,35 @@ { "name": "Experiencer", "values": [ - "PATIENT", - "OTHER" + "Patient", + "Other" ] }, { "name": "Negation", "values": [ - "AFFIRMED", - "NEGATED" + "Affirmed", + "Negated" ] }, { "name": "Plausibility", "values": [ - "PLAUSIBLE", - "HYPOTHETICAL" + "Plausible", + "Hypothetical" ] }, { "name": "Temporality", "values": [ - "CURRENT", - "HISTORICAL" + "Current", + "Historical" ] } ], "rules": [ { - "qualifier": "Experiencer.OTHER", + "qualifier": "Experiencer.Other", "direction": "preceding", "max_scope": 5, "patterns": [ @@ -214,7 +214,7 @@ ] }, { - "qualifier": "Experiencer.OTHER", + "qualifier": "Experiencer.Other", "direction": "following", "max_scope": 5, "patterns": [ @@ -277,7 +277,7 @@ ] }, { - "qualifier": "Experiencer.OTHER", + "qualifier": "Experiencer.Other", "direction": "pseudo", "patterns": [ "door familie", @@ -485,7 +485,7 @@ ] }, { - "qualifier": "Experiencer.OTHER", + "qualifier": "Experiencer.Other", "direction": "termination", "patterns": [ "beslist", @@ -511,7 +511,7 @@ ] }, { - "qualifier": "Negation.NEGATED", + "qualifier": "Negation.Negated", "direction": "preceding", "max_scope": 5, "patterns": [ @@ -581,7 +581,7 @@ ] }, { - "qualifier": "Negation.NEGATED", + "qualifier": "Negation.Negated", "direction": "following", "max_scope": 5, "patterns": [ @@ -620,7 +620,7 @@ ] }, { - "qualifier": "Negation.NEGATED", + "qualifier": "Negation.Negated", "direction": "pseudo", "patterns": [ "geen afname", @@ -654,7 +654,7 @@ ] }, { - "qualifier": "Negation.NEGATED", + "qualifier": "Negation.Negated", "direction": "termination", "patterns": [ "aangezien er", @@ -742,7 +742,7 @@ ] }, { - "qualifier": "Plausibility.HYPOTHETICAL", + "qualifier": "Plausibility.Hypothetical", "direction": "preceding", "max_scope": 5, "patterns": [ @@ -807,7 +807,7 @@ ] }, { - "qualifier": "Plausibility.HYPOTHETICAL", + "qualifier": "Plausibility.Hypothetical", "direction": "following", "max_scope": 4, "patterns": [ @@ -841,7 +841,7 @@ ] }, { - "qualifier": "Plausibility.HYPOTHETICAL", + "qualifier": "Plausibility.Hypothetical", "direction": "pseudo", "patterns": [ "als baby", @@ -882,7 +882,7 @@ ] }, { - "qualifier": "Plausibility.HYPOTHETICAL", + "qualifier": "Plausibility.Hypothetical", "direction": "termination", "patterns": [ "als gevolg van", @@ -895,7 +895,7 @@ ] }, { - "qualifier": "Temporality.HISTORICAL", + "qualifier": "Temporality.Historical", "direction": "preceding", "max_scope": 5, "patterns": [ @@ -937,7 +937,7 @@ ] }, { - "qualifier": "Temporality.HISTORICAL", + "qualifier": "Temporality.Historical", "direction": "following", "max_scope": 5, "patterns": [ @@ -1044,7 +1044,7 @@ ] }, { - "qualifier": "Temporality.HISTORICAL", + "qualifier": "Temporality.Historical", "direction": "pseudo", "patterns": [ "blanco psychiatrische vg", @@ -1156,7 +1156,7 @@ ] }, { - "qualifier": "Temporality.HISTORICAL", + "qualifier": "Temporality.Historical", "direction": "termination", "patterns": [ "actueel", From 35cf18e551a5322ba9c0d8fddd3a4f38a0a87033 Mon Sep 17 00:00:00 2001 From: Vincent Menger Date: Thu, 4 Jul 2024 14:48:00 +0200 Subject: [PATCH 02/14] Update docs pickle --- tests/test_data/clinlp_docs.pickle | Bin 239942 -> 239942 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/tests/test_data/clinlp_docs.pickle b/tests/test_data/clinlp_docs.pickle index 04dde8c5fa43db8787dad3a119e4b31bcd82ad69..9b75e21d549d7eea11da87d80d383771b360d23d 100644 GIT binary patch delta 1471 zcmb_cO-NKx812nBO`0{LhGj0w$h3?cE2u@AC@3hRbp&x|-Z$^ve1F|{-p|njakplM2LHxQl36VQoT$T($|MeRZGDu?Z54TD`~R-h0k>&bjB!Z1l}+^hTf3 z7n*|ExN^7nAr7yU2;mehE;=ZjtT2e@(zt&1s0%^AoXH?BLNO9hZoOCdIMflXrsqvpP{l?_FpTbA8Y%-sy0$ z47o$JBW$j$IG#*WW=h1D{SvXg2kL%Nl1uv{0e*3gLN20Aiy!k!eRDQNtc-JL(qU*4 z?lH=d^7OF`vTPS|(|gmc>HVL?_h-tU$e8Oe(kYs{V3)XX2UWH}P| z24Q>gbq@?cLq^4=1iVPuHq+2e=oD47s4E8hpdB*Q3XGrb{SE1FJ^ykOQlT=Ye|x@q z0Olc_vB;EoKLA^z&yqIx4Q0fkI3u1+DvebWDP$(KRAlN(NXpe&6?4t`7#V~SsLnFm zDG*|5XriHh5^pD!wyil|zk-`!G7i+zN)Gzs<<>IynmK&F2kXN%QVE~Nc*3@1EDQ`- zRHYARY-h9!J7p#%^3LMfhwv@5ebnWI`H97Z&auR;6r-I&?rJO!kHA8xX(Em~ks2Cu zh;fHWl@qp+Gi8qQDcjKGy^Eu=2a1otXxel>s2plg-aS0DK1B=j z4dUZts0sW2t^ODnA440|$I&1zFC>X6Q3z#0Gp+dBBCb9C&n5664YlFjelLw_=+9hG uyIsGVJeK2}yW^&k*Yy(S~>mjJgfxrjVW;jlc|1DnfQ{pUMECtCyZ<6YQaay}e(uhsWf`8^I* zC4n>kNZ_=b?t!}eWdx9=eo%w6{fqfk^0-zqLr#xxk^<*ZkZ-%P|DC%W9fCS%^g7&w#wC4cNF0+& zYi<~<@Kb;kFJefLxT{XBOL~_~-GV!7k8awqJ{anVw1ta2uMDkp*-wt zSb}048MTUTXXBWqOb+!!yL{32yFsgxw9MZ{j&KJqde=s~)+0{U0DOQQPdW`@hy-$C zQk+R6Pf`{~uCOvPE8h-4jjSGodU<4Ag`#ipe?u`n1OrfCAcSd{L_NM_QNfCFl?lo; zd3*$#%X8c?ZC6Iu7<(DZe+c52QTPhA6W!Q_EORt#m{T!>sW9j+$Kp7jJIhOQyA>?F0sOcWDDX{fto zim`h65W4Ei&Y`C2Zm_Z8b!nofaBTQjVgF#8Y?JmQsP?Ifg%|4uk6|0sE^dy=DC)!# zg-(*BIwiEIT2S5`|4+N}b_RS6Q%OTpY_4`qYUmOz#BW&c4oPua2w^QQ5udw6oX-=m Y7q%>QjHBDJd=ypjJLT;WsPp21AFIRd8~^|S From 8f23fe6e209dc87189e916e2067324d4fb3c865d Mon Sep 17 00:00:00 2001 From: Vincent Menger Date: Thu, 4 Jul 2024 14:48:23 +0200 Subject: [PATCH 03/14] Make default optional --- src/clinlp/ie/qualifier/qualifier.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/clinlp/ie/qualifier/qualifier.py b/src/clinlp/ie/qualifier/qualifier.py index d336eda..e1605cc 100644 --- a/src/clinlp/ie/qualifier/qualifier.py +++ b/src/clinlp/ie/qualifier/qualifier.py @@ -108,7 +108,7 @@ class Qualifier: value: str = field(compare=True) """The value of the qualifier.""" - is_default: bool = field(compare=True) + is_default: Optional[bool] = field(default=None, compare=True) """Whether the value is the default value.""" priority: int = field(default=0, compare=False) From 49d57a6e6dadeb9c746e6c70ae361edd47d35cd6 Mon Sep 17 00:00:00 2001 From: Vincent Menger Date: Thu, 4 Jul 2024 14:50:53 +0200 Subject: [PATCH 04/14] Update path --- scripts/generate_clinlp_docs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/generate_clinlp_docs.py b/scripts/generate_clinlp_docs.py index fdf6697..72d4ff6 100644 --- a/scripts/generate_clinlp_docs.py +++ b/scripts/generate_clinlp_docs.py @@ -71,7 +71,7 @@ def get_model() -> Language: nlp.add_pipe( "clinlp_context_algorithm", config={ - "rules": "scripts/test_data/mock_context_rules.json", + "rules": "scripts/mock_context_rules.json", "phrase_matcher_attr": "NORM", }, ) From ee8969dfee512d47f6bdea8ed660824ffdd4d0c1 Mon Sep 17 00:00:00 2001 From: Vincent Menger Date: Thu, 4 Jul 2024 14:54:24 +0200 Subject: [PATCH 05/14] Update metric computation --- CHANGELOG.md | 13 ++ src/clinlp/metrics/ie.py | 294 ++++++++++++++++++---------------- tests/unit/metrics/test_ie.py | 231 +++++++++++++++++++------- 3 files changed, 338 insertions(+), 200 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 50c68e4..32d2a2e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 :exclamation: = Breaking change +## (unreleased) + +### Added + +* Loading and exporting `InfoExtractionDataset` as dictionaries or JSON files +* Metric support for multi-class qualifiers + +### Changed + +* Made the `default` field for `Qualifier` optional +* `InfoExtractionDataset` and `InfoExtractionMetrics` use `Qualifier` objects for qualifiers rather than `dict` +* `InfoExtractionDataset` and `InfoExtractionMetrics` no longer track or use qualifier defaults + ## 0.8.1 (2024-06-27) ### Added diff --git a/src/clinlp/metrics/ie.py b/src/clinlp/metrics/ie.py index baef3d3..8496ed2 100644 --- a/src/clinlp/metrics/ie.py +++ b/src/clinlp/metrics/ie.py @@ -2,7 +2,8 @@ import inspect import itertools -import warnings +import json +import pathlib from collections import Counter, defaultdict from dataclasses import dataclass, field from typing import Callable, ClassVar, Iterable, Optional @@ -12,6 +13,7 @@ from spacy.language import Doc from clinlp.ie import SPANS_KEY +from clinlp.ie.qualifier import Qualifier @dataclass @@ -30,7 +32,7 @@ class Annotation: label: str """The label/tag.""" - qualifiers: list[dict] = field(default_factory=list) + qualifiers: list[Qualifier] = field(default_factory=list) """The applicable qualifiers.""" def lstrip(self, chars: str = " ,") -> None: @@ -85,6 +87,23 @@ def to_nervaluate(self) -> dict: "label": self.label, } + def to_dict(self) -> dict: + """ + Convert to dictionary format. + + Returns + ------- + ``dict`` + A dictionary with the items of this annotation. + """ + return { + "text": self.text, + "start": self.start, + "end": self.end, + "label": self.label, + "qualifiers": [{"name": q.name, "value": q.value} for q in self.qualifiers], + } + @property def qualifier_names(self) -> set[str]: """ @@ -95,9 +114,9 @@ def qualifier_names(self) -> set[str]: ``set[str]`` A set of unique qualifier names, e.g. {"Presence", "Experiencer"}. """ - return {qualifier["name"] for qualifier in self.qualifiers} + return {qualifier.name for qualifier in self.qualifiers} - def get_qualifier_by_name(self, qualifier_name: str) -> dict: + def get_qualifier_by_name(self, qualifier_name: str) -> Qualifier: """ Get a qualifier by name. @@ -108,7 +127,7 @@ def get_qualifier_by_name(self, qualifier_name: str) -> dict: Returns ------- - ``dict`` + ``Qualifier`` The qualifier with the provided name. Raises @@ -117,7 +136,7 @@ def get_qualifier_by_name(self, qualifier_name: str) -> dict: If no qualifier with the provided name exists. """ for qualifier in self.qualifiers: - if qualifier["name"] == qualifier_name: + if qualifier.name == qualifier_name: return qualifier msg = f"No qualifier with name {qualifier_name}." @@ -158,6 +177,21 @@ def to_nervaluate( return [ann.to_nervaluate() for ann in self.annotations if ann_filter(ann)] + def to_dict(self) -> dict: + """ + Convert to dictionary format. + + Returns + ------- + ``dict`` + A dictionary with the items of this document. + """ + return { + "identifier": self.identifier, + "text": self.text, + "annotations": [ann.to_dict() for ann in self.annotations], + } + def labels( self, ann_filter: Optional[Callable[[Annotation], bool]] = None ) -> set[str]: @@ -214,12 +248,6 @@ class InfoExtractionDataset: docs: list[Document] """The annotated documents.""" - default_qualifiers: Optional[dict[str, str]] = None - """ - Mapping of qualifiers to their default value, e.g. - ``{"Presence": "Present"}``. - """ - _ALL_STATS: ClassVar[list] = [ "num_docs", "num_annotations", @@ -228,58 +256,9 @@ class InfoExtractionDataset: "qualifier_freqs", ] - def __post_init__(self) -> None: - """ - Initialize the dataset. - - Initializes the default qualifiers, from the annotations (if available) or - infers them from the majority class. - """ - self.default_qualifiers = {} - - try: - for doc in self.docs: - for annotation in doc.annotations: - for qualifier in annotation.qualifiers: - if qualifier["is_default"]: - self.default_qualifiers[qualifier["name"]] = qualifier[ - "value" - ] - except KeyError: - self.default_qualifiers = self.infer_default_qualifiers() - - def infer_default_qualifiers(self) -> dict: - """ - Infer and set Annotations' default qualifiers from majority classes. - - Returns - ------- - ``dict`` - A dictionary mapping qualifier names to their default values. - """ - default_qualifiers = { - name: max(counts, key=lambda item: counts[item]) - for name, counts in self.qualifier_freqs().items() - } - - warnings.warn( - f"Inferred the following qualifier defaults from the majority " - f"classes: {default_qualifiers}. ", - UserWarning, - stacklevel=2, - ) - - for doc in self.docs: - for annotation in doc.annotations: - for qualifier in annotation.qualifiers: - qualifier["is_default"] = ( - default_qualifiers[qualifier["name"]] == qualifier["value"] - ) - - return default_qualifiers - - @staticmethod + @classmethod def from_clinlp_docs( + cls, nlp_docs: Iterable[Doc], ids: Optional[Iterable[str]] = None, spans_key: str = SPANS_KEY, @@ -309,27 +288,16 @@ def from_clinlp_docs( docs = [] for doc, identifier in zip(nlp_docs, ids): - annotations = [] - - for ent in doc.spans[spans_key]: - qualifiers = [ - { - "name": qualifier.name.title(), - "value": qualifier.value.title(), - "is_default": qualifier.is_default, - } - for qualifier in ent._.qualifiers - ] - - annotations.append( - Annotation( - text=str(ent), - start=ent.start_char, - end=ent.end_char, - label=ent.label_, - qualifiers=qualifiers, - ) + annotations = [ + Annotation( + text=str(ent), + start=ent.start_char, + end=ent.end_char, + label=ent.label_, + qualifiers=ent._.qualifiers, ) + for ent in doc.spans[spans_key] + ] docs.append( Document( @@ -337,14 +305,14 @@ def from_clinlp_docs( ) ) - return InfoExtractionDataset(docs=docs) + return cls(docs=docs) - @staticmethod + @classmethod def from_medcattrainer( + cls, data: dict, *, strip_spans: bool = True, - default_qualifiers: Optional[dict[str, str]] = None, ) -> "InfoExtractionDataset": """ Create a dataset from a ``MedCATTrainer`` export. @@ -357,11 +325,6 @@ def from_medcattrainer( strip_spans Whether to remove punctuation and whitespaces from the beginning or end of annotations. Used to clean up accidental over-annotations. - default_qualifiers - The default qualifiers (which are not included in the ``MedCATTrainer`` - export), e.g. ``{"Presence": "Absent", "Experiencer": "Patient"}``, by - default ``None``. If ``None``, will infer the default qualifiers from the - majority class. Returns ------- @@ -378,6 +341,7 @@ def from_medcattrainer( raise ValueError(msg) data = data["projects"][0] + docs = [] for doc in data["documents"]: @@ -385,21 +349,13 @@ def from_medcattrainer( for annotation in doc["annotations"]: if not annotation["deleted"]: - qualifiers = [] - - for qualifier in annotation["meta_anns"].values(): - qualifier = { - "name": qualifier["name"].title(), - "value": qualifier["value"].title(), - } - - if default_qualifiers is not None: - qualifier["is_default"] = ( - default_qualifiers[qualifier["name"]] - == qualifier["value"] - ) - - qualifiers.append(qualifier) + qualifiers = [ + Qualifier( + name=qualifier["name"].title(), + value=qualifier["value"].title(), + ) + for qualifier in annotation["meta_anns"].values() + ] annotation = Annotation( text=annotation["value"], @@ -420,7 +376,55 @@ def from_medcattrainer( ) ) - return InfoExtractionDataset(docs) + return cls(docs) + + @classmethod + def from_dict(cls, data: dict) -> "InfoExtractionDataset": + """ + Create a dataset from dictionary. + + Parameters + ---------- + data + The data in dictionary format. + + Returns + ------- + ``InfoExtractionDataset`` + A dataset, corresponding to the provided dictionary data. + """ + data = data.copy() + + for doc in data["docs"]: + for ann in doc["annotations"]: + ann["qualifiers"] = [ + Qualifier(**qualifier) for qualifier in ann.get("qualifiers", []) + ] + doc["annotations"] = [Annotation(**ann) for ann in doc["annotations"]] + + docs = [Document(**doc) for doc in data["docs"]] + + return cls(docs=docs) + + @classmethod + def read_json(cls, file: str) -> "InfoExtractionDataset": + """ + Read a dataset from a ``JSON`` file. + + Parameters + ---------- + file + The path to the file. + + Returns + ------- + ``InfoExtractionDataset`` + A dataset, corresponding to the data in the provided file. + """ + with pathlib.Path(file).open() as f: + data = json.load(f) + + return cls.from_dict(data) def to_nervaluate( self, ann_filter: Optional[Callable[[Annotation], bool]] = None @@ -443,6 +447,29 @@ def to_nervaluate( return [doc.to_nervaluate(ann_filter) for doc in self.docs] + def to_dict(self) -> dict: + """ + Convert to dictionary format. + + Returns + ------- + ``dict`` + A dictionary with the items of this dataset. + """ + return {"docs": [doc.to_dict() for doc in self.docs]} + + def write_json(self, file: str) -> None: + """ + Write the dataset to a ``JSON`` file. + + Parameters + ---------- + file + The path to the file. + """ + with pathlib.Path(file).open("w") as f: + json.dump(self.to_dict(), f) + def num_docs(self) -> int: """ Compute the number of documents in this dataset. @@ -547,7 +574,7 @@ def qualifier_freqs(self) -> dict: for doc in self.docs: for annotation in doc.annotations: for qualifier in annotation.qualifiers: - cntrs[qualifier["name"]].update([qualifier["value"]]) + cntrs[qualifier.name].update([qualifier.value]) return {name: dict(counts) for name, counts in cntrs.items()} @@ -699,7 +726,8 @@ def _aggregate_qualifier_values(self) -> dict[str, dict[str, list]]: "pred": ["Present", "Absent", "Absent"], "misses": [ {"doc.identifier": 1, annotation: {"start": 0, "end": 5, "text": - "test"}, true_label: "Present", pred_label: "Absent"}, ...] + "test"}, true_qualifier: "Present", pred_qualifier: "Absent"}, + ...] }, ... } @@ -721,19 +749,19 @@ def _aggregate_qualifier_values(self) -> dict[str, dict[str, list]]: ) for name in qualifier_names: - true_val = true_annotation.get_qualifier_by_name(name)["value"] - pred_val = pred_annotation.get_qualifier_by_name(name)["value"] + true_value = true_annotation.get_qualifier_by_name(name).value + pred_value = pred_annotation.get_qualifier_by_name(name).value - aggregation[name]["true"].append(true_val) - aggregation[name]["pred"].append(pred_val) + aggregation[name]["true"].append(true_value) + aggregation[name]["pred"].append(pred_value) - if true_val != pred_val: + if true_value != pred_value: aggregation[name]["misses"].append( { "doc.identifier": true_doc.identifier, "annotation": true_annotation.to_nervaluate(), - "true_qualifier": true_val, - "pred_qualifier": pred_val, + "true_qualifier": true_value, + "pred_qualifier": pred_value, } ) @@ -763,36 +791,18 @@ def qualifier_metrics(self, *, misses: bool = True) -> dict: result = {} for name, values in aggregation.items(): - true_unique_values = set(values["true"]) - pred_unique_values = set(values["pred"]) - - if max(len(true_unique_values), len(pred_unique_values)) > 2: - msg = "Can oly compute metrics for binary qualifier values." - raise ValueError(msg) - - pos_label = next( - val - for val in true_unique_values - if val != self.true.default_qualifiers[name] - ) - - result[name] = { - "metrics": { - "n": len(values["true"]), - "n_pos_true": sum(1 for v in values["true"] if v == pos_label), - "n_pos_pred": sum(1 for v in values["pred"] if v == pos_label), - }, - } + result[name] = {"metrics": {"n": len(values["true"])}} if misses: result[name]["misses"] = values["misses"] - for ( - metric_name, - metric_func, - ) in InfoExtractionMetrics._QUALIFIER_METRICS.items(): - result[name]["metrics"][metric_name] = metric_func( - values["true"], values["pred"], pos_label=pos_label + metrics = InfoExtractionMetrics._QUALIFIER_METRICS + + for metric_name, metric_func in metrics.items(): + metric_result = metric_func( + values["true"], values["pred"], average="micro" ) + result[name]["metrics"][metric_name] = metric_result + return result diff --git a/tests/unit/metrics/test_ie.py b/tests/unit/metrics/test_ie.py index b201e9d..e6becac 100644 --- a/tests/unit/metrics/test_ie.py +++ b/tests/unit/metrics/test_ie.py @@ -5,6 +5,7 @@ import pytest from tests.conftest import TEST_DATA_DIR +from clinlp.ie.qualifier import Qualifier from clinlp.metrics.ie import ( Annotation, Document, @@ -72,7 +73,7 @@ def test_annotation_strip(self): # Assert assert ann == Annotation(text="test", start=1, end=5, label="test") - def test_annotation_nervaluate(self): + def test_annotation_to_nervaluate(self): # Arrange ann = Annotation(text="test", start=0, end=5, label="test") @@ -87,7 +88,7 @@ def test_annotation_nervaluate(self): "label": "test", } - def test_annotation_qualifier_names(self): + def test_annotation_to_dict(self): # Arrange ann = Annotation( text="test", @@ -95,18 +96,27 @@ def test_annotation_qualifier_names(self): end=4, label="test", qualifiers=[ - {"name": "Negation", "value": "Affirmed"}, - {"name": "Experiencer", "value": "Other"}, + Qualifier(name="Negation", value="Affirmed"), + Qualifier(name="Experiencer", value="Other"), ], ) # Act - qualifier_names = ann.qualifier_names + ann_dict = ann.to_dict() # Assert - assert qualifier_names == {"Negation", "Experiencer"} + assert ann_dict == { + "text": "test", + "start": 0, + "end": 4, + "label": "test", + "qualifiers": [ + {"name": "Negation", "value": "Affirmed"}, + {"name": "Experiencer", "value": "Other"}, + ], + } - def test_annotation_get_qualifier_by_name(self): + def test_annotation_qualifier_names(self): # Arrange ann = Annotation( text="test", @@ -114,20 +124,32 @@ def test_annotation_get_qualifier_by_name(self): end=4, label="test", qualifiers=[ - {"name": "Negation", "value": "Affirmed"}, - {"name": "Experiencer", "value": "Other"}, + Qualifier(name="Negation", value="Affirmed"), + Qualifier(name="Experiencer", value="Other"), ], ) + # Act + qualifier_names = ann.qualifier_names + + # Assert + assert qualifier_names == {"Negation", "Experiencer"} + + def test_annotation_get_qualifier_by_name(self): + # Arrange + q1 = Qualifier(name="Negation", value="Affirmed") + q2 = Qualifier(name="Experiencer", value="Other") + ann = Annotation(text="test", start=0, end=4, label="test", qualifiers=[q1, q2]) + # Act qualifier = ann.get_qualifier_by_name(qualifier_name="Experiencer") # Assert - assert qualifier == {"name": "Experiencer", "value": "Other"} + assert qualifier == q2 class TestDocument: - def test_document_nervaluate(self): + def test_document_to_nervaluate(self): # Arrange doc = Document( identifier="1", @@ -147,6 +169,42 @@ def test_document_nervaluate(self): {"text": "test2", "start": 10, "end": 15, "label": "test2"}, ] + def test_document_to_dict(self): + # Arrange + doc = Document( + identifier="1", + text="test1 and test2", + annotations=[ + Annotation(text="test1", start=0, end=5, label="test1"), + Annotation(text="test2", start=10, end=15, label="test2"), + ], + ) + + # Act + doc_dict = doc.to_dict() + + # Assert + assert doc_dict == { + "identifier": "1", + "text": "test1 and test2", + "annotations": [ + { + "text": "test1", + "start": 0, + "end": 5, + "label": "test1", + "qualifiers": [], + }, + { + "text": "test2", + "start": 10, + "end": 15, + "label": "test2", + "qualifiers": [], + }, + ], + } + def test_document_labels(self): # Arrange doc = Document( @@ -210,18 +268,6 @@ def test_document_annotation_from_span(self, start, end, expected_ann): @pytest.mark.filterwarnings("ignore:Inferred.*:UserWarning") class TestDataset: - def test_infer_default_qualifiers(self, mctrainer_dataset): - # Act - default_qualifiers = mctrainer_dataset.infer_default_qualifiers() - - # Assert - assert default_qualifiers == { - "Negation": "Affirmed", - "Experiencer": "Patient", - "Temporality": "Current", - "Plausibility": "Plausible", - } - def test_dataset_from_clinlp_docs(self, clinlp_docs): # Act ied = InfoExtractionDataset.from_clinlp_docs(nlp_docs=clinlp_docs) @@ -254,16 +300,14 @@ def test_dataset_from_clinlp_docs_qualifiers(self, clinlp_docs): ied = InfoExtractionDataset.from_clinlp_docs(nlp_docs=clinlp_docs) # Act - qualifiers = sorted( - ied.docs[0].annotations[0].qualifiers, key=lambda q: q["name"] - ) + qualifiers = sorted(ied.docs[0].annotations[0].qualifiers, key=lambda q: q.name) # Assert assert qualifiers == [ - {"name": "Experiencer", "value": "Patient", "is_default": True}, - {"name": "Negation", "value": "Negated", "is_default": False}, - {"name": "Plausibility", "value": "Plausible", "is_default": True}, - {"name": "Temporality", "value": "Current", "is_default": True}, + Qualifier(name="Experiencer", value="Patient", is_default=True), + Qualifier(name="Negation", value="Negated", is_default=False), + Qualifier(name="Plausibility", value="Plausible", is_default=True), + Qualifier(name="Temporality", value="Current", is_default=True), ] def test_dataset_from_medcattrainer_docs(self, mctrainer_data): @@ -302,13 +346,73 @@ def test_dataset_from_medcatrainer_docs_qualifiers(self, mctrainer_data): # Assert assert qualifiers == [ - {"name": "Temporality", "value": "Current", "is_default": True}, - {"name": "Plausibility", "value": "Plausible", "is_default": True}, - {"name": "Experiencer", "value": "Patient", "is_default": True}, - {"name": "Negation", "value": "Negated", "is_default": False}, + Qualifier(name="Temporality", value="Current"), + Qualifier(name="Plausibility", value="Plausible"), + Qualifier(name="Experiencer", value="Patient"), + Qualifier(name="Negation", value="Negated"), ] - def test_dataset_nervaluate(self): + def test_dataset_from_dict(self): + # Arrange + data = { + "docs": [ + { + "identifier": "1", + "text": "test1", + "annotations": [ + { + "text": "test1", + "start": 0, + "end": 5, + "label": "test1", + "qualifiers": [ + {"name": "Negation", "value": "Negated"}, + ], + }, + ], + }, + { + "identifier": "2", + "text": "test2", + "annotations": [ + { + "text": "test2", + "start": 0, + "end": 5, + "label": "test2", + }, + ], + }, + ] + } + + # Act + ied = InfoExtractionDataset.from_dict(data) + + # Assert + assert len(ied.docs) == 2 + assert ied.docs[0].text == "test1" + assert len(ied.docs[0].annotations) == 1 + assert ied.docs[0].annotations[0] == Annotation( + text="test1", + start=0, + end=5, + label="test1", + qualifiers=[ + Qualifier(name="Negation", value="Negated"), + ], + ) + assert ied.docs[1].text == "test2" + assert len(ied.docs[1].annotations) == 1 + assert ied.docs[1].annotations[0] == Annotation( + text="test2", + start=0, + end=5, + label="test2", + qualifiers=[], + ) + + def test_dataset_to_nervaluate(self): # Arrange ied = InfoExtractionDataset( docs=[ @@ -322,11 +426,7 @@ def test_dataset_nervaluate(self): end=5, label="test1", qualifiers=[ - { - "name": "Negation", - "value": "Negated", - "is_default": False, - } + Qualifier(name="Negation", value="Negated"), ], ), ], @@ -342,10 +442,10 @@ def test_dataset_nervaluate(self): ) # Act - nervaluate = ied.to_nervaluate() + to_nervaluate = ied.to_nervaluate() # Assert - assert nervaluate == [ + assert to_nervaluate == [ [{"text": "test1", "start": 0, "end": 5, "label": "test1"}], [{"text": "test2", "start": 0, "end": 5, "label": "test2"}], ] @@ -353,7 +453,7 @@ def test_dataset_nervaluate(self): def test_dataset_to_nervaluate_with_filter(self, mctrainer_dataset): # Arrange def ann_filter(ann): - return any(not qualifier["is_default"] for qualifier in ann.qualifiers) + return any(qualifier.value == "Negated" for qualifier in ann.qualifiers) # Act to_nervaluate = mctrainer_dataset.to_nervaluate(ann_filter=ann_filter) @@ -364,6 +464,28 @@ def ann_filter(ann): ] assert to_nervaluate[1] == [] + def test_dataset_to_dict(self, mctrainer_dataset): + # Act + ied_docs = mctrainer_dataset.to_dict()["docs"] + + # Assert + assert len(ied_docs) == 14 + assert ied_docs[0]["identifier"] == "doc_0" + assert ied_docs[0]["text"] == "patient had geen anemie" + assert len(ied_docs[0]["annotations"]) == 1 + assert ied_docs[0]["annotations"][0] == { + "text": "anemie", + "start": 17, + "end": 23, + "label": "C0002871_anemie", + "qualifiers": [ + {"name": "Temporality", "value": "Current"}, + {"name": "Plausibility", "value": "Plausible"}, + {"name": "Experiencer", "value": "Patient"}, + {"name": "Negation", "value": "Negated"}, + ], + } + def test_num_docs(self, mctrainer_dataset): # Act num_docs = mctrainer_dataset.num_docs() @@ -525,7 +647,8 @@ def test_entity_metrics_filter(self, mctrainer_dataset, clinlp_dataset): iem = InfoExtractionMetrics(mctrainer_dataset, clinlp_dataset) def filter_default(ann): - return all(qualifier["is_default"] for qualifier in ann.qualifiers) + defaults = {"Affirmed", "Patient", "Current", "Plausible"} + return all(qualifier.value in defaults for qualifier in ann.qualifiers) # Act metrics = iem.entity_metrics(ann_filter=filter_default) @@ -562,35 +685,27 @@ def test_qualifier_metrics_metrics(self, mctrainer_dataset, clinlp_dataset): # Assert assert metrics["Negation"]["metrics"] == { "n": 10, - "n_pos_pred": 2, - "n_pos_true": 2, "precision": 1.0, "recall": 1.0, "f1": 1.0, } assert metrics["Experiencer"]["metrics"] == { "n": 10, - "n_pos_pred": 1, - "n_pos_true": 1, "precision": 1.0, "recall": 1.0, "f1": 1.0, } assert metrics["Plausibility"]["metrics"] == { "n": 10, - "n_pos_pred": 3, - "n_pos_true": 2, - "precision": 0.6666666666666666, - "recall": 1.0, - "f1": 0.8, + "precision": 0.9, + "recall": 0.9, + "f1": 0.9, } assert metrics["Temporality"]["metrics"] == { "n": 10, - "n_pos_pred": 1, - "n_pos_true": 2, - "precision": 1.0, - "recall": 0.5, - "f1": 0.6666666666666666, + "precision": 0.9, + "recall": 0.9, + "f1": 0.9, } def test_qualifier_metrics_misses(self, mctrainer_dataset, clinlp_dataset): From a293f21c160e5984bc6f8c079935839460a32116 Mon Sep 17 00:00:00 2001 From: Vincent Menger Date: Thu, 4 Jul 2024 14:59:31 +0200 Subject: [PATCH 06/14] Update docs --- docs/source/metrics.md | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/docs/source/metrics.md b/docs/source/metrics.md index 2af740a..c6878dc 100644 --- a/docs/source/metrics.md +++ b/docs/source/metrics.md @@ -43,6 +43,43 @@ clinlp_dataset = InfoExtractionDataset.from_clinlp_docs(nlp_docs) ``` +#### From `dict` + +```python +from clinlp.metrics import InfoExtractionDataset + +dataset_dict = { + "documents": [ + { + "identifier": "...", + "text": "...", + "annotations": + { + "text": "...", + "start": 0, + "end": 10, + "label": "...", + "qualifiers": { + "...": "...", + ... + } + }, ... + }, + ... + ] +} +``` + +#### From `json` + +```python +from clinlp.metrics import InfoExtractionDataset + +json_dataset = InfoExtractionDataset.read_json("dataset.json") +``` + +Note that this method assumes the JSON file has been written by `InfoExtractionDataset.write_json`. We use a simple custom `json` format with all the information present, but please inform us if you know a more open format or standard to use here. + #### From other If your data is in a different format, you can manually convert it by creating `Annotation` and `Document` objects, and add those to a `InfoExtractionDataset`. Below are some pointers on how to create the appropriate objects: From a7d12e12d76ee89cef9dce596d2dbbd2f23a2263 Mon Sep 17 00:00:00 2001 From: Vincent Menger Date: Thu, 4 Jul 2024 15:07:53 +0200 Subject: [PATCH 07/14] Move test cases --- {tests/test_data => data}/qualifier_cases.json | 0 {tests/test_data => data}/sentencizer_cases.json | 0 {tests/test_data => data}/tokenizer_cases.json | 0 tests/regression/__init__.py | 4 +--- .../ie/qualifier/test_regression_context_algorithm.py | 2 +- tests/regression/ie/qualifier/test_regression_transformer.py | 2 +- tests/regression/test_regression_sentencizer.py | 2 +- tests/regression/test_regression_tokenizer.py | 2 +- 8 files changed, 5 insertions(+), 7 deletions(-) rename {tests/test_data => data}/qualifier_cases.json (100%) rename {tests/test_data => data}/sentencizer_cases.json (100%) rename {tests/test_data => data}/tokenizer_cases.json (100%) diff --git a/tests/test_data/qualifier_cases.json b/data/qualifier_cases.json similarity index 100% rename from tests/test_data/qualifier_cases.json rename to data/qualifier_cases.json diff --git a/tests/test_data/sentencizer_cases.json b/data/sentencizer_cases.json similarity index 100% rename from tests/test_data/sentencizer_cases.json rename to data/sentencizer_cases.json diff --git a/tests/test_data/tokenizer_cases.json b/data/tokenizer_cases.json similarity index 100% rename from tests/test_data/tokenizer_cases.json rename to data/tokenizer_cases.json diff --git a/tests/regression/__init__.py b/tests/regression/__init__.py index 55708a1..acd86f1 100644 --- a/tests/regression/__init__.py +++ b/tests/regression/__init__.py @@ -4,11 +4,9 @@ import pytest -from tests.conftest import TEST_DATA_DIR - def load_examples(filename: str) -> list[dict]: - with Path.open(TEST_DATA_DIR / filename, "rb") as file: + with Path(filename).open("rb") as file: return json.load(file)["examples"] diff --git a/tests/regression/ie/qualifier/test_regression_context_algorithm.py b/tests/regression/ie/qualifier/test_regression_context_algorithm.py index 7b8c499..1fe27ca 100644 --- a/tests/regression/ie/qualifier/test_regression_context_algorithm.py +++ b/tests/regression/ie/qualifier/test_regression_context_algorithm.py @@ -7,7 +7,7 @@ KNOWN_FAILURES = {9, 11, 12, 32} -examples = load_qualifier_examples("qualifier_cases.json", KNOWN_FAILURES) +examples = load_qualifier_examples("data/qualifier_cases.json", KNOWN_FAILURES) # Arrange diff --git a/tests/regression/ie/qualifier/test_regression_transformer.py b/tests/regression/ie/qualifier/test_regression_transformer.py index 6415698..3bd83f6 100644 --- a/tests/regression/ie/qualifier/test_regression_transformer.py +++ b/tests/regression/ie/qualifier/test_regression_transformer.py @@ -27,7 +27,7 @@ } examples = { - tr: load_qualifier_examples("qualifier_cases.json", KNOWN_FAILURES[tr]) + tr: load_qualifier_examples("data/qualifier_cases.json", KNOWN_FAILURES[tr]) for tr in KNOWN_FAILURES } diff --git a/tests/regression/test_regression_sentencizer.py b/tests/regression/test_regression_sentencizer.py index e6d8339..8f77e80 100644 --- a/tests/regression/test_regression_sentencizer.py +++ b/tests/regression/test_regression_sentencizer.py @@ -6,7 +6,7 @@ sentencizer_cases = [ pytest.param(example["tokens"], example["sentence_starts"], id="sentencizer_case_") - for example in load_examples("sentencizer_cases.json") + for example in load_examples("data/sentencizer_cases.json") ] diff --git a/tests/regression/test_regression_tokenizer.py b/tests/regression/test_regression_tokenizer.py index 243430f..ce1c80f 100644 --- a/tests/regression/test_regression_tokenizer.py +++ b/tests/regression/test_regression_tokenizer.py @@ -5,7 +5,7 @@ tokenizer_cases = [ pytest.param(example["text"], example["tokens"], id="tokenizer_case_") - for example in load_examples("tokenizer_cases.json") + for example in load_examples("data/tokenizer_cases.json") ] From 8ac9d4f585ec1d37826702d68f0ae1ebfb98d50d Mon Sep 17 00:00:00 2001 From: Vincent Menger Date: Thu, 4 Jul 2024 16:18:19 +0200 Subject: [PATCH 08/14] Write nicer json --- src/clinlp/metrics/ie.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/clinlp/metrics/ie.py b/src/clinlp/metrics/ie.py index 8496ed2..fb3063e 100644 --- a/src/clinlp/metrics/ie.py +++ b/src/clinlp/metrics/ie.py @@ -468,7 +468,7 @@ def write_json(self, file: str) -> None: The path to the file. """ with pathlib.Path(file).open("w") as f: - json.dump(self.to_dict(), f) + json.dump(self.to_dict(), f, indent=4) def num_docs(self) -> int: """ From 447b6279e9cc57515e503a76042c3b5a696fd93d Mon Sep 17 00:00:00 2001 From: Vincent Menger Date: Thu, 4 Jul 2024 16:18:35 +0200 Subject: [PATCH 09/14] Exclude is_default from compare --- src/clinlp/ie/qualifier/qualifier.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/clinlp/ie/qualifier/qualifier.py b/src/clinlp/ie/qualifier/qualifier.py index e1605cc..17525c4 100644 --- a/src/clinlp/ie/qualifier/qualifier.py +++ b/src/clinlp/ie/qualifier/qualifier.py @@ -108,7 +108,7 @@ class Qualifier: value: str = field(compare=True) """The value of the qualifier.""" - is_default: Optional[bool] = field(default=None, compare=True) + is_default: Optional[bool] = field(default=None, compare=False) """Whether the value is the default value.""" priority: int = field(default=0, compare=False) From db1f497dc2eb7903f34b8aa3151171f35e0b6a46 Mon Sep 17 00:00:00 2001 From: Vincent Menger Date: Thu, 4 Jul 2024 16:19:01 +0200 Subject: [PATCH 10/14] Update qualifier case format for consistencie with ie dataset --- data/qualifier_cases.json | 3084 +++++++++++------ scripts/generate_qualifier_regression_data.py | 23 +- tests/regression/__init__.py | 14 +- .../test_regression_context_algorithm.py | 14 +- .../qualifier/test_regression_transformer.py | 55 +- 5 files changed, 2063 insertions(+), 1127 deletions(-) diff --git a/data/qualifier_cases.json b/data/qualifier_cases.json index 2947e15..e2c221b 100644 --- a/data/qualifier_cases.json +++ b/data/qualifier_cases.json @@ -1,1082 +1,2006 @@ { - "examples": [ - { - "text": "2004 poli interne vanwege ENTITY.", - "example_id": 0, - "ent": { - "start": 4, - "end": 5, - "text": "ENTITY", - "qualifiers": [ - "Temporality.Historical", - "Presence.Present", - "Experiencer.Patient" - ] - } - }, - { - "text": "Geen ENTITY klachten; niet koortsig.", - "example_id": 1, - "ent": { - "start": 1, - "end": 2, - "text": "ENTITY", - "qualifiers": [ - "Presence.Absent", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "Geen pijn klachten; niet ENTITY.", - "example_id": 2, - "ent": { - "start": 5, - "end": 6, - "text": "ENTITY", - "qualifiers": [ - "Presence.Absent", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "Gaat pas slapen als hij ENTITY is.", - "example_id": 3, - "ent": { - "start": 5, - "end": 6, - "text": "ENTITY", - "qualifiers": [ - "Presence.Present", - "Temporality.Future", - "Experiencer.Patient" - ] - } - }, - { - "text": "Bij kliniek sinds [DATUM-1] jaar continue en toenemende ENTITY in de benen", - "example_id": 4, - "ent": { - "start": 8, - "end": 9, - "text": "ENTITY", - "qualifiers": [ - "Temporality.Historical", - "Presence.Present", - "Experiencer.Patient" - ] - } - }, - { - "text": "2022 sept gastroscopie: 11 cm van de ENTITY is er een stenose", - "example_id": 5, - "ent": { - "start": 8, - "end": 9, - "text": "ENTITY", - "qualifiers": [ - "Temporality.Historical", - "Presence.Present", - "Experiencer.Patient" - ] - } - }, - { - "text": "2022 sept gastroscopie: 11 cm van de tandenrij is er een ENTITY", - "example_id": 6, - "ent": { - "start": 12, - "end": 13, - "text": "ENTITY", - "qualifiers": [ - "Temporality.Historical", - "Presence.Present", - "Experiencer.Patient" - ] - } - }, - { - "text": "resp: geen ENTITY, kan krachtig hoesten", - "example_id": 7, - "ent": { - "start": 3, - "end": 4, - "text": "ENTITY", - "qualifiers": [ - "Presence.Absent", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "resp: geen benauwdheid, kan krachtig ENTITY", - "example_id": 8, - "ent": { - "start": 7, - "end": 8, - "text": "ENTITY", - "qualifiers": [ - "Presence.Present", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "Gezien geen aanwijzingen voor cardiologische oorzaak thoracale ENTITY, geen poliklinische follow-up", - "example_id": 9, - "ent": { - "start": 7, - "end": 8, - "text": "ENTITY", - "qualifiers": [ - "Presence.Present", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "Patiënt werd op [DATUM-10] opgenomen via de SEH in verband met ENTITY", - "example_id": 10, - "ent": { - "start": 11, - "end": 12, - "text": "ENTITY", - "qualifiers": [ - "Temporality.Historical", - "Presence.Present", - "Experiencer.Patient" - ] - } - }, - { - "text": "Wordt u ziek 1 tot 2 weken voor de operatie (ENTITY, verkouden of andere gezondheidsproblemen)?", - "example_id": 11, - "ent": { - "start": 11, - "end": 12, - "text": "ENTITY", - "qualifiers": [ - "Presence.Present", - "Temporality.Future", - "Experiencer.Patient" - ] - } - }, - { - "text": "Wordt u ziek 1 tot 2 weken voor de operatie (koorts, ENTITY of andere gezondheidsproblemen)?", - "example_id": 12, - "ent": { - "start": 13, - "end": 14, - "text": "ENTITY", - "qualifiers": [ - "Presence.Present", - "Temporality.Future", - "Experiencer.Patient" - ] - } - }, - { - "text": "S: Dhr heeft geen ENTITY geuit. ", - "example_id": 13, - "ent": { - "start": 5, - "end": 6, - "text": "ENTITY", - "qualifiers": [ - "Presence.Absent", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "Heden wel mild macrocytaire ENTITY, zonder zichtbaar bloedverlies.", - "example_id": 14, - "ent": { - "start": 4, - "end": 5, - "text": "ENTITY", - "qualifiers": [ - "Presence.Present", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "Heden wel mild macrocytaire anemie, zonder zichtbaar ENTITY.", - "example_id": 15, - "ent": { - "start": 8, - "end": 9, - "text": "ENTITY", - "qualifiers": [ - "Presence.Absent", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "Interventieradioloog gebeld, nagaan post renale ENTITY?", - "example_id": 16, - "ent": { - "start": 6, - "end": 7, - "text": "ENTITY", - "qualifiers": [ - "Presence.Uncertain", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "S1S2, geen ENTITY, HA rond 70/min ", - "example_id": 17, - "ent": { - "start": 3, - "end": 4, - "text": "ENTITY", - "qualifiers": [ - "Presence.Absent", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "vanochtend jammerig/ oncomfortabel, onduidelijk of dit ENTITY/ onbekendheid vpk is ", - "example_id": 18, - "ent": { - "start": 8, - "end": 9, - "text": "ENTITY", - "qualifiers": [ - "Presence.Uncertain", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "2022: milde ENTITY infcet (keepijn zonder koorts of hoesten).", - "example_id": 19, - "ent": { - "start": 3, - "end": 4, - "text": "ENTITY", - "qualifiers": [ - "Temporality.Historical", - "Presence.Present", - "Experiencer.Patient" - ] - } - }, - { - "text": "2022: milde covid infcet (ENTITY zonder koorts of hoesten).", - "example_id": 20, - "ent": { - "start": 6, - "end": 7, - "text": "ENTITY", - "qualifiers": [ - "Temporality.Historical", - "Presence.Present", - "Experiencer.Patient" - ] - } - }, - { - "text": "2022: milde covid infcet (keepijn zonder ENTITY of hoesten).", - "example_id": 21, - "ent": { - "start": 8, - "end": 9, - "text": "ENTITY", - "qualifiers": [ - "Temporality.Historical", - "Presence.Absent", - "Experiencer.Patient" - ] - } - }, - { - "text": "2022: milde covid infcet (keepijn zonder koorts of ENTITY).", - "example_id": 22, - "ent": { - "start": 10, - "end": 11, - "text": "ENTITY", - "qualifiers": [ - "Temporality.Historical", - "Presence.Absent", - "Experiencer.Patient" - ] - } - }, - { - "text": "Dochter van patiente heeft ook vergelijkbare klachten van ENTITY en diarree.", - "example_id": 23, - "ent": { - "start": 8, - "end": 9, - "text": "ENTITY", - "qualifiers": [ - "Experiencer.Family", - "Presence.Present", - "Temporality.Current" - ] - } - }, - { - "text": "Dochter van patiente heeft ook vergelijkbare klachten van braken en ENTITY.", - "example_id": 24, - "ent": { - "start": 10, - "end": 11, - "text": "ENTITY", - "qualifiers": [ - "Experiencer.Family", - "Presence.Present", - "Temporality.Current" - ] - } - }, - { - "text": "Nuchter, niet gespuugd of ENTITY geweest. ", - "example_id": 25, - "ent": { - "start": 5, - "end": 6, - "text": "ENTITY", - "qualifiers": [ - "Temporality.Historical", - "Presence.Absent", - "Experiencer.Patient" - ] - } - }, - { - "text": "Extremiteiten: slanke, soepele ENTITY, geen oedeem", - "example_id": 26, - "ent": { - "start": 5, - "end": 6, - "text": "ENTITY", - "qualifiers": [ - "Presence.Present", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "Extremiteiten: slanke, soepele kuiten, geen ENTITY", - "example_id": 27, - "ent": { - "start": 8, - "end": 9, - "text": "ENTITY", - "qualifiers": [ - "Presence.Absent", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "U kan nog een paar dagen last hebben van ENTITY.", - "example_id": 28, - "ent": { - "start": 9, - "end": 10, - "text": "ENTITY", - "qualifiers": [ - "Presence.Present", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "Positieve ENTITY op C4, maar geen pijnreductie.", - "example_id": 29, - "ent": { - "start": 1, - "end": 2, - "text": "ENTITY", - "qualifiers": [ - "Presence.Present", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "Positieve proefwortelblokkade op C4, maar geen ENTITY.", - "example_id": 30, - "ent": { - "start": 7, - "end": 8, - "text": "ENTITY", - "qualifiers": [ - "Presence.Absent", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "Overname [INSTELLING-1] verdenking ENTITY", - "example_id": 31, - "ent": { - "start": 3, - "end": 4, - "text": "ENTITY", - "qualifiers": [ - "Presence.Uncertain", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "Er zou een erfelijke vorm van ENTITY in de familie zitten, hier wordt patiënte nog op getest.", - "example_id": 32, - "ent": { - "start": 6, - "end": 7, - "text": "ENTITY", - "qualifiers": [ - "Presence.Uncertain", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "kan goed op de geopereerde kant liggen, dan geen ENTITY.", - "example_id": 33, - "ent": { - "start": 10, - "end": 11, - "text": "ENTITY", - "qualifiers": [ - "Presence.Absent", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "Familie anamnese: broer t-cel ENTITY dunnedarm coeliakie gerelateerd", - "example_id": 34, - "ent": { - "start": 7, - "end": 8, - "text": "ENTITY", - "qualifiers": [ - "Experiencer.Family", - "Presence.Present", - "Temporality.Current" - ] - } - }, - { - "text": "Familie anamnese: broer t-cel lymfoom dunnedarm ENTITY gerelateerd", - "example_id": 35, - "ent": { - "start": 9, - "end": 10, - "text": "ENTITY", - "qualifiers": [ - "Experiencer.Family", - "Presence.Present", - "Temporality.Current" - ] - } - }, - { - "text": "Ook een keer ENTITY en tintelingen gelaat gehad in 2014, weet niet welke kant.", - "example_id": 36, - "ent": { - "start": 3, - "end": 4, - "text": "ENTITY", - "qualifiers": [ - "Temporality.Historical", - "Presence.Present", - "Experiencer.Patient" - ] - } - }, - { - "text": "Ook een keer pijn en ENTITY gelaat gehad in 2014, weet niet welke kant.", - "example_id": 37, - "ent": { - "start": 5, - "end": 6, - "text": "ENTITY", - "qualifiers": [ - "Temporality.Historical", - "Presence.Present", - "Experiencer.Patient" - ] - } - }, - { - "text": "Mw voelt zich nog beroerd, veel ENTITY li flank merkt weinig verbetering.", - "example_id": 38, - "ent": { - "start": 7, - "end": 8, - "text": "ENTITY", - "qualifiers": [ - "Presence.Present", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "In rust hijgt hij van de ENTITY.", - "example_id": 39, - "ent": { - "start": 6, - "end": 7, - "text": "ENTITY", - "qualifiers": [ - "Presence.Present", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "CRT-D drager in verband met ENTITY bij passagere totaal AV", - "example_id": 40, - "ent": { - "start": 7, - "end": 8, - "text": "ENTITY", - "qualifiers": [ - "Presence.Present", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "CRT-D drager in verband met syncope bij ENTITY totaal AV", - "example_id": 41, - "ent": { - "start": 9, - "end": 10, - "text": "ENTITY", - "qualifiers": [ - "Presence.Present", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "LO/ Alg ind: niet acuut ENTITY, helder en alert", - "example_id": 42, - "ent": { - "start": 7, - "end": 8, - "text": "ENTITY", - "qualifiers": [ - "Presence.Absent", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "LO/ Alg ind: niet acuut ziek, helder en ENTITY", - "example_id": 43, - "ent": { - "start": 11, - "end": 12, - "text": "ENTITY", - "qualifiers": [ - "Presence.Present", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "Uiteindelijk is het fraai gelukt om de ENTITY macroscopisch compleet te verwijderen.", - "example_id": 44, - "ent": { - "start": 7, - "end": 8, - "text": "ENTITY", - "qualifiers": [ - "Presence.Present", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "Hij is motorisch ENTITY en snel afgeleid.", - "example_id": 45, - "ent": { - "start": 3, - "end": 4, - "text": "ENTITY", - "qualifiers": [ - "Presence.Present", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "Indicatie: analyse ENTITY, elder geduid als bij astma.", - "example_id": 46, - "ent": { - "start": 3, - "end": 4, - "text": "ENTITY", - "qualifiers": [ - "Presence.Present", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "Indicatie: analyse dyspnoe, elder geduid als bij ENTITY.", - "example_id": 47, - "ent": { - "start": 9, - "end": 10, - "text": "ENTITY", - "qualifiers": [ - "Presence.Present", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "Mw is ENTITY aanwezig, moe na het familiebezoek ", - "example_id": 48, - "ent": { - "start": 2, - "end": 3, - "text": "ENTITY", - "qualifiers": [ - "Presence.Present", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "Exp: ENTITY en hoesten antalgisch beperkt", - "example_id": 49, - "ent": { - "start": 2, - "end": 3, - "text": "ENTITY", - "qualifiers": [ - "Presence.Present", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "Exp: huffen en ENTITY antalgisch beperkt", - "example_id": 50, - "ent": { - "start": 4, - "end": 5, - "text": "ENTITY", - "qualifiers": [ - "Presence.Present", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "DD recente ENTITY in peripelviene cyste", - "example_id": 51, - "ent": { - "start": 2, - "end": 3, - "text": "ENTITY", - "qualifiers": [ - "Presence.Uncertain", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "DD recente bloeding in peripelviene ENTITY", - "example_id": 52, - "ent": { - "start": 5, - "end": 6, - "text": "ENTITY", - "qualifiers": [ - "Presence.Uncertain", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "geen ENTITY, wel meer moe overdag, komt door haar spierziekte fibromyalgie", - "example_id": 53, - "ent": { - "start": 1, - "end": 2, - "text": "ENTITY", - "qualifiers": [ - "Presence.Absent", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "geen apneu, wel meer ENTITY overdag, komt door haar spierziekte fibromyalgie", - "example_id": 54, - "ent": { - "start": 5, - "end": 6, - "text": "ENTITY", - "qualifiers": [ - "Presence.Present", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "geen apneu, wel meer moe overdag, komt door haar spierziekte ENTITY", - "example_id": 55, - "ent": { - "start": 12, - "end": 13, - "text": "ENTITY", - "qualifiers": [ - "Presence.Present", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "4dd 1000mg ivm ENTITY", - "example_id": 56, - "ent": { - "start": 5, - "end": 6, - "text": "ENTITY", - "qualifiers": [ - "Presence.Present", - "Temporality.Current", - "Experiencer.Patient" - ] - } - }, - { - "text": "CMV/EBV ENTITY negatief, lues negatief", - "example_id": 57, - "ent": { - "start": 3, - "end": 4, - "text": "ENTITY", - "qualifiers": [ - "Temporality.Current", - "Presence.Absent", - "Experiencer.Patient" - ] - } - }, - { - "text": "CMV/EBV HIV negatief, ENTITY negatief", - "example_id": 58, - "ent": { - "start": 6, - "end": 7, - "text": "ENTITY", - "qualifiers": [ - "Temporality.Current", - "Presence.Absent", - "Experiencer.Patient" - ] - } - }, - { - "text": "ENTITY verdween na gebruik diclofenac 50 mg 3dd grotendeels", - "example_id": 59, - "ent": { - "start": 0, - "end": 1, - "text": "ENTITY", - "qualifiers": [ - "Temporality.Current", - "Presence.Absent", - "Experiencer.Patient" - ] - } - }, - { - "text": "geen verklaring voor ENTITY AD", - "example_id": 60, - "ent": { - "start": 3, - "end": 4, - "text": "ENTITY", - "qualifiers": [ - "Temporality.Current", - "Presence.Present", - "Experiencer.Patient" - ] - } - }, - { - "text": "Geen genetische verklaring voor gevonden, dus losstaand van de ENTITY", - "example_id": 61, - "ent": { - "start": 10, - "end": 11, - "text": "ENTITY", - "qualifiers": [ - "Temporality.Current", - "Presence.Present", - "Experiencer.Patient" - ] - } - }, - { - "text": "Vannacht slapend gezien, leek niet ENTITY", - "example_id": 62, - "ent": { - "start": 6, - "end": 7, - "text": "ENTITY", - "qualifiers": [ - "Temporality.Current", - "Presence.Uncertain", - "Experiencer.Patient" - ] - } - }, - { - "text": "ENTITY: niet bekend", - "example_id": 63, - "ent": { - "start": 0, - "end": 1, - "text": "ENTITY", - "qualifiers": [ - "Temporality.Current", - "Presence.Uncertain", - "Experiencer.Patient" - ] - } - }, - { - "text": "- ENTITY (onwaarschijnlijk, laag d-dimeer)", - "example_id": 64, - "ent": { - "start": 1, - "end": 2, - "text": "ENTITY", - "qualifiers": [ - "Temporality.Current", - "Presence.Uncertain", - "Experiencer.Patient" - ] - } - }, - { - "text": "ENTITY 100 ml 3 dd in beide oren", - "example_id": 65, - "ent": { - "start": 0, - "end": 1, - "text": "ENTITY", - "qualifiers": [ - "Temporality.Current", - "Presence.Present", - "Experiencer.Patient" - ] - } - }, - { - "text": "1. dd ENTITY dd getraumatiseerde verruca dd AK? ", - "example_id": 66, - "ent": { - "start": 2, - "end": 3, - "text": "ENTITY", - "qualifiers": [ - "Temporality.Current", - "Presence.Uncertain", - "Experiencer.Patient" - ] - } - }, - { - "text": "1. dd pcc dd ENTITY dd AK? ", - "example_id": 67, - "ent": { - "start": 4, - "end": 5, - "text": "ENTITY", - "qualifiers": [ - "Temporality.Current", - "Presence.Uncertain", - "Experiencer.Patient" - ] - } - }, - { - "text": "1. dd pcc dd getraumatiseerde verruca dd ENTITY? ", - "example_id": 68, - "ent": { - "start": 7, - "end": 8, - "text": "ENTITY", - "qualifiers": [ - "Temporality.Current", - "Presence.Uncertain", - "Experiencer.Patient" - ] - } - }, - { - "text": "Nemen contact op indien er ENTITY zijn.", - "example_id": 69, - "ent": { - "start": 5, - "end": 6, - "text": "ENTITY", - "qualifiers": [ - "Experiencer.Patient", - "Presence.Present", - "Temporality.Future" - ] - } - }, - { - "text": "- Predocs verhoogd risico op ENTITY", - "example_id": 70, - "ent": { - "start": 5, - "end": 6, - "text": "ENTITY", - "qualifiers": [ - "Experiencer.Patient", - "Presence.Present", - "Temporality.Future" - ] - } - }, - { - "text": "Wel zijn er veel klachten van ENTITY, zelfs in die mate dat hij hierdoor een pneumonie kan ontwikkelen.", - "example_id": 71, - "ent": { - "start": 6, - "end": 7, - "text": "ENTITY", - "qualifiers": [ - "Temporality.Current", - "Presence.Present", - "Experiencer.Patient" - ] - } - }, - { - "text": "Wel zijn er veel klachten van maagzuur, zelfs in die mate dat hij hierdoor een ENTITY kan ontwikkelen.", - "example_id": 72, - "ent": { - "start": 16, - "end": 17, - "text": "ENTITY", - "qualifiers": [ - "Experiencer.Patient", - "Presence.Present", - "Temporality.Future" - ] - } - }, - { - "text": "Bij te verwachten ENTITY op iv laten.", - "example_id": 73, - "ent": { - "start": 3, - "end": 4, - "text": "ENTITY", - "qualifiers": [ - "Experiencer.Patient", - "Presence.Present", - "Temporality.Future" - ] - } - }, - { - "text": "psycholoog: als tiener: ENTITY, geen last meer", - "example_id": 74, - "ent": { - "start": 5, - "end": 6, - "text": "ENTITY", - "qualifiers": [ - "Presence.Present", - "Experiencer.Patient", - "Temporality.Historical" - ] - } - }, - { - "text": "Mw. kon niet slapen ivm ENTITY bij buurvrouw.", - "example_id": 75, - "ent": { - "start": 5, - "end": 6, - "text": "ENTITY", - "qualifiers": [ - "Temporality.Current", - "Presence.Present", - "Experiencer.Other" - ] - } - }, - { - "text": "Bij haar huisgenote is ENTITY gediagnosticeerd.", - "example_id": 76, - "ent": { - "start": 4, - "end": 5, - "text": "ENTITY", - "qualifiers": [ - "Temporality.Current", - "Presence.Present", - "Experiencer.Other" - ] - } - } - ] + "docs": [ + { + "identifier": "0", + "text": "2004 poli interne vanwege ENTITY.", + "annotations": [ + { + "text": "ENTITY", + "start": 4, + "end": 5, + "label": "entity", + "qualifiers": [ + { + "name": "Temporality", + "value": "Historical" + }, + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "1", + "text": "Geen ENTITY klachten; niet koortsig.", + "annotations": [ + { + "text": "ENTITY", + "start": 1, + "end": 2, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Absent" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "2", + "text": "Geen pijn klachten; niet ENTITY.", + "annotations": [ + { + "text": "ENTITY", + "start": 5, + "end": 6, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Absent" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "3", + "text": "Gaat pas slapen als hij ENTITY is.", + "annotations": [ + { + "text": "ENTITY", + "start": 5, + "end": 6, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Temporality", + "value": "Future" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "4", + "text": "Bij kliniek sinds [DATUM-1] jaar continue en toenemende ENTITY in de benen", + "annotations": [ + { + "text": "ENTITY", + "start": 8, + "end": 9, + "label": "entity", + "qualifiers": [ + { + "name": "Temporality", + "value": "Historical" + }, + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "5", + "text": "2022 sept gastroscopie: 11 cm van de ENTITY is er een stenose", + "annotations": [ + { + "text": "ENTITY", + "start": 8, + "end": 9, + "label": "entity", + "qualifiers": [ + { + "name": "Temporality", + "value": "Historical" + }, + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "6", + "text": "2022 sept gastroscopie: 11 cm van de tandenrij is er een ENTITY", + "annotations": [ + { + "text": "ENTITY", + "start": 12, + "end": 13, + "label": "entity", + "qualifiers": [ + { + "name": "Temporality", + "value": "Historical" + }, + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "7", + "text": "resp: geen ENTITY, kan krachtig hoesten", + "annotations": [ + { + "text": "ENTITY", + "start": 3, + "end": 4, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Absent" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "8", + "text": "resp: geen benauwdheid, kan krachtig ENTITY", + "annotations": [ + { + "text": "ENTITY", + "start": 7, + "end": 8, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "9", + "text": "Gezien geen aanwijzingen voor cardiologische oorzaak thoracale ENTITY, geen poliklinische follow-up", + "annotations": [ + { + "text": "ENTITY", + "start": 7, + "end": 8, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "10", + "text": "Pati\u00ebnt werd op [DATUM-10] opgenomen via de SEH in verband met ENTITY", + "annotations": [ + { + "text": "ENTITY", + "start": 11, + "end": 12, + "label": "entity", + "qualifiers": [ + { + "name": "Temporality", + "value": "Historical" + }, + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "11", + "text": "Wordt u ziek 1 tot 2 weken voor de operatie (ENTITY, verkouden of andere gezondheidsproblemen)?", + "annotations": [ + { + "text": "ENTITY", + "start": 11, + "end": 12, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Temporality", + "value": "Future" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "12", + "text": "Wordt u ziek 1 tot 2 weken voor de operatie (koorts, ENTITY of andere gezondheidsproblemen)?", + "annotations": [ + { + "text": "ENTITY", + "start": 13, + "end": 14, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Temporality", + "value": "Future" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "13", + "text": "S: Dhr heeft geen ENTITY geuit. ", + "annotations": [ + { + "text": "ENTITY", + "start": 5, + "end": 6, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Absent" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "14", + "text": "Heden wel mild macrocytaire ENTITY, zonder zichtbaar bloedverlies.", + "annotations": [ + { + "text": "ENTITY", + "start": 4, + "end": 5, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "15", + "text": "Heden wel mild macrocytaire anemie, zonder zichtbaar ENTITY.", + "annotations": [ + { + "text": "ENTITY", + "start": 8, + "end": 9, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Absent" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "16", + "text": "Interventieradioloog gebeld, nagaan post renale ENTITY?", + "annotations": [ + { + "text": "ENTITY", + "start": 6, + "end": 7, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Uncertain" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "17", + "text": "S1S2, geen ENTITY, HA rond 70/min ", + "annotations": [ + { + "text": "ENTITY", + "start": 3, + "end": 4, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Absent" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "18", + "text": "vanochtend jammerig/ oncomfortabel, onduidelijk of dit ENTITY/ onbekendheid vpk is ", + "annotations": [ + { + "text": "ENTITY", + "start": 8, + "end": 9, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Uncertain" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "19", + "text": "2022: milde ENTITY infcet (keepijn zonder koorts of hoesten).", + "annotations": [ + { + "text": "ENTITY", + "start": 3, + "end": 4, + "label": "entity", + "qualifiers": [ + { + "name": "Temporality", + "value": "Historical" + }, + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "20", + "text": "2022: milde covid infcet (ENTITY zonder koorts of hoesten).", + "annotations": [ + { + "text": "ENTITY", + "start": 6, + "end": 7, + "label": "entity", + "qualifiers": [ + { + "name": "Temporality", + "value": "Historical" + }, + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "21", + "text": "2022: milde covid infcet (keepijn zonder ENTITY of hoesten).", + "annotations": [ + { + "text": "ENTITY", + "start": 8, + "end": 9, + "label": "entity", + "qualifiers": [ + { + "name": "Temporality", + "value": "Historical" + }, + { + "name": "Presence", + "value": "Absent" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "22", + "text": "2022: milde covid infcet (keepijn zonder koorts of ENTITY).", + "annotations": [ + { + "text": "ENTITY", + "start": 10, + "end": 11, + "label": "entity", + "qualifiers": [ + { + "name": "Temporality", + "value": "Historical" + }, + { + "name": "Presence", + "value": "Absent" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "23", + "text": "Dochter van patiente heeft ook vergelijkbare klachten van ENTITY en diarree.", + "annotations": [ + { + "text": "ENTITY", + "start": 8, + "end": 9, + "label": "entity", + "qualifiers": [ + { + "name": "Experiencer", + "value": "Family" + }, + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Temporality", + "value": "Current" + } + ] + } + ] + }, + { + "identifier": "24", + "text": "Dochter van patiente heeft ook vergelijkbare klachten van braken en ENTITY.", + "annotations": [ + { + "text": "ENTITY", + "start": 10, + "end": 11, + "label": "entity", + "qualifiers": [ + { + "name": "Experiencer", + "value": "Family" + }, + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Temporality", + "value": "Current" + } + ] + } + ] + }, + { + "identifier": "25", + "text": "Nuchter, niet gespuugd of ENTITY geweest. ", + "annotations": [ + { + "text": "ENTITY", + "start": 5, + "end": 6, + "label": "entity", + "qualifiers": [ + { + "name": "Temporality", + "value": "Historical" + }, + { + "name": "Presence", + "value": "Absent" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "26", + "text": "Extremiteiten: slanke, soepele ENTITY, geen oedeem", + "annotations": [ + { + "text": "ENTITY", + "start": 5, + "end": 6, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "27", + "text": "Extremiteiten: slanke, soepele kuiten, geen ENTITY", + "annotations": [ + { + "text": "ENTITY", + "start": 8, + "end": 9, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Absent" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "28", + "text": "U kan nog een paar dagen last hebben van ENTITY.", + "annotations": [ + { + "text": "ENTITY", + "start": 9, + "end": 10, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "29", + "text": "Positieve ENTITY op C4, maar geen pijnreductie.", + "annotations": [ + { + "text": "ENTITY", + "start": 1, + "end": 2, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "30", + "text": "Positieve proefwortelblokkade op C4, maar geen ENTITY.", + "annotations": [ + { + "text": "ENTITY", + "start": 7, + "end": 8, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Absent" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "31", + "text": "Overname [INSTELLING-1] verdenking ENTITY", + "annotations": [ + { + "text": "ENTITY", + "start": 3, + "end": 4, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Uncertain" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "32", + "text": "Er zou een erfelijke vorm van ENTITY in de familie zitten, hier wordt pati\u00ebnte nog op getest.", + "annotations": [ + { + "text": "ENTITY", + "start": 6, + "end": 7, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Uncertain" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "33", + "text": "kan goed op de geopereerde kant liggen, dan geen ENTITY.", + "annotations": [ + { + "text": "ENTITY", + "start": 10, + "end": 11, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Absent" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "34", + "text": "Familie anamnese: broer t-cel ENTITY dunnedarm coeliakie gerelateerd", + "annotations": [ + { + "text": "ENTITY", + "start": 7, + "end": 8, + "label": "entity", + "qualifiers": [ + { + "name": "Experiencer", + "value": "Family" + }, + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Temporality", + "value": "Current" + } + ] + } + ] + }, + { + "identifier": "35", + "text": "Familie anamnese: broer t-cel lymfoom dunnedarm ENTITY gerelateerd", + "annotations": [ + { + "text": "ENTITY", + "start": 9, + "end": 10, + "label": "entity", + "qualifiers": [ + { + "name": "Experiencer", + "value": "Family" + }, + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Temporality", + "value": "Current" + } + ] + } + ] + }, + { + "identifier": "36", + "text": "Ook een keer ENTITY en tintelingen gelaat gehad in 2014, weet niet welke kant.", + "annotations": [ + { + "text": "ENTITY", + "start": 3, + "end": 4, + "label": "entity", + "qualifiers": [ + { + "name": "Temporality", + "value": "Historical" + }, + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "37", + "text": "Ook een keer pijn en ENTITY gelaat gehad in 2014, weet niet welke kant.", + "annotations": [ + { + "text": "ENTITY", + "start": 5, + "end": 6, + "label": "entity", + "qualifiers": [ + { + "name": "Temporality", + "value": "Historical" + }, + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "38", + "text": "Mw voelt zich nog beroerd, veel ENTITY li flank merkt weinig verbetering.", + "annotations": [ + { + "text": "ENTITY", + "start": 7, + "end": 8, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "39", + "text": "In rust hijgt hij van de ENTITY.", + "annotations": [ + { + "text": "ENTITY", + "start": 6, + "end": 7, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "40", + "text": "CRT-D drager in verband met ENTITY bij passagere totaal AV", + "annotations": [ + { + "text": "ENTITY", + "start": 7, + "end": 8, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "41", + "text": "CRT-D drager in verband met syncope bij ENTITY totaal AV", + "annotations": [ + { + "text": "ENTITY", + "start": 9, + "end": 10, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "42", + "text": "LO/ Alg ind: niet acuut ENTITY, helder en alert", + "annotations": [ + { + "text": "ENTITY", + "start": 7, + "end": 8, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Absent" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "43", + "text": "LO/ Alg ind: niet acuut ziek, helder en ENTITY", + "annotations": [ + { + "text": "ENTITY", + "start": 11, + "end": 12, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "44", + "text": "Uiteindelijk is het fraai gelukt om de ENTITY macroscopisch compleet te verwijderen.", + "annotations": [ + { + "text": "ENTITY", + "start": 7, + "end": 8, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "45", + "text": "Hij is motorisch ENTITY en snel afgeleid.", + "annotations": [ + { + "text": "ENTITY", + "start": 3, + "end": 4, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "46", + "text": "Indicatie: analyse ENTITY, elder geduid als bij astma.", + "annotations": [ + { + "text": "ENTITY", + "start": 3, + "end": 4, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "47", + "text": "Indicatie: analyse dyspnoe, elder geduid als bij ENTITY.", + "annotations": [ + { + "text": "ENTITY", + "start": 9, + "end": 10, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "48", + "text": "Mw is ENTITY aanwezig, moe na het familiebezoek ", + "annotations": [ + { + "text": "ENTITY", + "start": 2, + "end": 3, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "49", + "text": "Exp: ENTITY en hoesten antalgisch beperkt", + "annotations": [ + { + "text": "ENTITY", + "start": 2, + "end": 3, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "50", + "text": "Exp: huffen en ENTITY antalgisch beperkt", + "annotations": [ + { + "text": "ENTITY", + "start": 4, + "end": 5, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "51", + "text": "DD recente ENTITY in peripelviene cyste", + "annotations": [ + { + "text": "ENTITY", + "start": 2, + "end": 3, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Uncertain" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "52", + "text": "DD recente bloeding in peripelviene ENTITY", + "annotations": [ + { + "text": "ENTITY", + "start": 5, + "end": 6, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Uncertain" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "53", + "text": "geen ENTITY, wel meer moe overdag, komt door haar spierziekte fibromyalgie", + "annotations": [ + { + "text": "ENTITY", + "start": 1, + "end": 2, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Absent" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "54", + "text": "geen apneu, wel meer ENTITY overdag, komt door haar spierziekte fibromyalgie", + "annotations": [ + { + "text": "ENTITY", + "start": 5, + "end": 6, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "55", + "text": "geen apneu, wel meer moe overdag, komt door haar spierziekte ENTITY", + "annotations": [ + { + "text": "ENTITY", + "start": 12, + "end": 13, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "56", + "text": "4dd 1000mg ivm ENTITY", + "annotations": [ + { + "text": "ENTITY", + "start": 5, + "end": 6, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "57", + "text": "CMV/EBV ENTITY negatief, lues negatief", + "annotations": [ + { + "text": "ENTITY", + "start": 3, + "end": 4, + "label": "entity", + "qualifiers": [ + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Presence", + "value": "Absent" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "58", + "text": "CMV/EBV HIV negatief, ENTITY negatief", + "annotations": [ + { + "text": "ENTITY", + "start": 6, + "end": 7, + "label": "entity", + "qualifiers": [ + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Presence", + "value": "Absent" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "59", + "text": "ENTITY verdween na gebruik diclofenac 50 mg 3dd grotendeels", + "annotations": [ + { + "text": "ENTITY", + "start": 0, + "end": 1, + "label": "entity", + "qualifiers": [ + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Presence", + "value": "Absent" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "60", + "text": "geen verklaring voor ENTITY AD", + "annotations": [ + { + "text": "ENTITY", + "start": 3, + "end": 4, + "label": "entity", + "qualifiers": [ + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "61", + "text": "Geen genetische verklaring voor gevonden, dus losstaand van de ENTITY", + "annotations": [ + { + "text": "ENTITY", + "start": 10, + "end": 11, + "label": "entity", + "qualifiers": [ + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "62", + "text": "Vannacht slapend gezien, leek niet ENTITY", + "annotations": [ + { + "text": "ENTITY", + "start": 6, + "end": 7, + "label": "entity", + "qualifiers": [ + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Presence", + "value": "Uncertain" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "63", + "text": "ENTITY: niet bekend", + "annotations": [ + { + "text": "ENTITY", + "start": 0, + "end": 1, + "label": "entity", + "qualifiers": [ + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Presence", + "value": "Uncertain" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "64", + "text": "- ENTITY (onwaarschijnlijk, laag d-dimeer)", + "annotations": [ + { + "text": "ENTITY", + "start": 1, + "end": 2, + "label": "entity", + "qualifiers": [ + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Presence", + "value": "Uncertain" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "65", + "text": "ENTITY 100 ml 3 dd in beide oren", + "annotations": [ + { + "text": "ENTITY", + "start": 0, + "end": 1, + "label": "entity", + "qualifiers": [ + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "66", + "text": "1. dd ENTITY dd getraumatiseerde verruca dd AK? ", + "annotations": [ + { + "text": "ENTITY", + "start": 2, + "end": 3, + "label": "entity", + "qualifiers": [ + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Presence", + "value": "Uncertain" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "67", + "text": "1. dd pcc dd ENTITY dd AK? ", + "annotations": [ + { + "text": "ENTITY", + "start": 4, + "end": 5, + "label": "entity", + "qualifiers": [ + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Presence", + "value": "Uncertain" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "68", + "text": "1. dd pcc dd getraumatiseerde verruca dd ENTITY? ", + "annotations": [ + { + "text": "ENTITY", + "start": 7, + "end": 8, + "label": "entity", + "qualifiers": [ + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Presence", + "value": "Uncertain" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "69", + "text": "Nemen contact op indien er ENTITY zijn.", + "annotations": [ + { + "text": "ENTITY", + "start": 5, + "end": 6, + "label": "entity", + "qualifiers": [ + { + "name": "Experiencer", + "value": "Patient" + }, + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Temporality", + "value": "Future" + } + ] + } + ] + }, + { + "identifier": "70", + "text": "- Predocs verhoogd risico op ENTITY", + "annotations": [ + { + "text": "ENTITY", + "start": 5, + "end": 6, + "label": "entity", + "qualifiers": [ + { + "name": "Experiencer", + "value": "Patient" + }, + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Temporality", + "value": "Future" + } + ] + } + ] + }, + { + "identifier": "71", + "text": "Wel zijn er veel klachten van ENTITY, zelfs in die mate dat hij hierdoor een pneumonie kan ontwikkelen.", + "annotations": [ + { + "text": "ENTITY", + "start": 6, + "end": 7, + "label": "entity", + "qualifiers": [ + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Experiencer", + "value": "Patient" + } + ] + } + ] + }, + { + "identifier": "72", + "text": "Wel zijn er veel klachten van maagzuur, zelfs in die mate dat hij hierdoor een ENTITY kan ontwikkelen.", + "annotations": [ + { + "text": "ENTITY", + "start": 16, + "end": 17, + "label": "entity", + "qualifiers": [ + { + "name": "Experiencer", + "value": "Patient" + }, + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Temporality", + "value": "Future" + } + ] + } + ] + }, + { + "identifier": "73", + "text": "Bij te verwachten ENTITY op iv laten.", + "annotations": [ + { + "text": "ENTITY", + "start": 3, + "end": 4, + "label": "entity", + "qualifiers": [ + { + "name": "Experiencer", + "value": "Patient" + }, + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Temporality", + "value": "Future" + } + ] + } + ] + }, + { + "identifier": "74", + "text": "psycholoog: als tiener: ENTITY, geen last meer", + "annotations": [ + { + "text": "ENTITY", + "start": 5, + "end": 6, + "label": "entity", + "qualifiers": [ + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Experiencer", + "value": "Patient" + }, + { + "name": "Temporality", + "value": "Historical" + } + ] + } + ] + }, + { + "identifier": "75", + "text": "Mw. kon niet slapen ivm ENTITY bij buurvrouw.", + "annotations": [ + { + "text": "ENTITY", + "start": 5, + "end": 6, + "label": "entity", + "qualifiers": [ + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Experiencer", + "value": "Other" + } + ] + } + ] + }, + { + "identifier": "76", + "text": "Bij haar huisgenote is ENTITY gediagnosticeerd.", + "annotations": [ + { + "text": "ENTITY", + "start": 4, + "end": 5, + "label": "entity", + "qualifiers": [ + { + "name": "Temporality", + "value": "Current" + }, + { + "name": "Presence", + "value": "Present" + }, + { + "name": "Experiencer", + "value": "Other" + } + ] + } + ] + } + ] } \ No newline at end of file diff --git a/scripts/generate_qualifier_regression_data.py b/scripts/generate_qualifier_regression_data.py index a9b4d78..a30fe15 100644 --- a/scripts/generate_qualifier_regression_data.py +++ b/scripts/generate_qualifier_regression_data.py @@ -60,16 +60,25 @@ def get_model() -> Language: ent = ents[0] + qualifiers = [] + + for qualifier in ent._.qualifiers: + name, value = qualifier.split(".", 1) + qualifiers.append({"name": name, "value": value}) + data.append( { + "identifier": start_example_id + next(cntr), "text": text, - "example_id": start_example_id + next(cntr), - "ent": { - "start": ent.start, - "end": ent.end, - "text": str(ent), - "qualifiers": list(ent._.qualifiers_str), - }, + "annotations": [ + { + "text": str(ent), + "start": ent.start, + "end": ent.end, + "label": "entity", + "qualifiers": qualifiers, + } + ], } ) diff --git a/tests/regression/__init__.py b/tests/regression/__init__.py index acd86f1..fd21d4f 100644 --- a/tests/regression/__init__.py +++ b/tests/regression/__init__.py @@ -4,6 +4,8 @@ import pytest +from clinlp.metrics import InfoExtractionDataset + def load_examples(filename: str) -> list[dict]: with Path(filename).open("rb") as file: @@ -13,18 +15,18 @@ def load_examples(filename: str) -> list[dict]: def load_qualifier_examples( filename: str, failures=Collection[int] ) -> list["pytest.param"]: - examples = load_examples(filename) + ied = InfoExtractionDataset.read_json(filename) examples_as_param = [] - for example in examples: - marks = pytest.mark.xfail if example["example_id"] in failures else [] + for doc in ied.docs: + marks = pytest.mark.xfail if doc.identifier in failures else [] examples_as_param.append( pytest.param( - example["text"], - example["ent"], - id=f"qualifier_case_{example['example_id']}", + doc.text, + doc.annotations[0], + id=f"qualifier_case_{doc.identifier}", marks=marks, ) ) diff --git a/tests/regression/ie/qualifier/test_regression_context_algorithm.py b/tests/regression/ie/qualifier/test_regression_context_algorithm.py index 1fe27ca..76d18c0 100644 --- a/tests/regression/ie/qualifier/test_regression_context_algorithm.py +++ b/tests/regression/ie/qualifier/test_regression_context_algorithm.py @@ -3,9 +3,9 @@ from tests.regression import load_qualifier_examples from clinlp.ie import SPANS_KEY -from clinlp.ie.qualifier.qualifier import ATTR_QUALIFIERS_STR +from clinlp.ie.qualifier.qualifier import ATTR_QUALIFIERS -KNOWN_FAILURES = {9, 11, 12, 32} +KNOWN_FAILURES = {"9", "11", "12", "32"} examples = load_qualifier_examples("data/qualifier_cases.json", KNOWN_FAILURES) @@ -42,9 +42,9 @@ def test_regression_context_algorithm(self, nlp_qualifier, text, expected_ent): # Assert assert len(doc.spans[SPANS_KEY]) == 1 - assert doc.spans[SPANS_KEY][0].start == expected_ent["start"] - assert doc.spans[SPANS_KEY][0].end == expected_ent["end"] - assert str(doc.spans[SPANS_KEY][0]) == expected_ent["text"] - assert getattr(doc.spans[SPANS_KEY][0]._, ATTR_QUALIFIERS_STR).issubset( - set(expected_ent["qualifiers"]) + assert str(doc.spans[SPANS_KEY][0]) == expected_ent.text + assert doc.spans[SPANS_KEY][0].start == expected_ent.start + assert doc.spans[SPANS_KEY][0].end == expected_ent.end + assert getattr(doc.spans[SPANS_KEY][0]._, ATTR_QUALIFIERS).issubset( + set(expected_ent.qualifiers) ) diff --git a/tests/regression/ie/qualifier/test_regression_transformer.py b/tests/regression/ie/qualifier/test_regression_transformer.py index 3bd83f6..fb9cd6e 100644 --- a/tests/regression/ie/qualifier/test_regression_transformer.py +++ b/tests/regression/ie/qualifier/test_regression_transformer.py @@ -3,26 +3,26 @@ from tests.regression import load_qualifier_examples from clinlp.ie import SPANS_KEY -from clinlp.ie.qualifier.qualifier import ATTR_QUALIFIERS_STR +from clinlp.ie.qualifier.qualifier import ATTR_QUALIFIERS KNOWN_FAILURES = { - "experiencer": {32, 75, 76}, + "experiencer": {"32", "75", "76"}, "negation": { - 9, - 16, - 18, - 31, - 32, - 43, - 51, - 52, - 59, - 62, - 63, - 64, - 66, - 67, - 68, + "9", + "16", + "18", + "31", + "32", + "43", + "51", + "52", + "59", + "62", + "63", + "64", + "66", + "67", + "68", }, } @@ -75,11 +75,11 @@ def test_regression_negation_transformer( # Assert assert len(doc.spans[SPANS_KEY]) == 1 - assert doc.spans[SPANS_KEY][0].start == expected_ent["start"] - assert doc.spans[SPANS_KEY][0].end == expected_ent["end"] - assert str(doc.spans[SPANS_KEY][0]) == expected_ent["text"] - assert getattr(doc.spans[SPANS_KEY][0]._, ATTR_QUALIFIERS_STR).issubset( - set(expected_ent["qualifiers"]) + assert str(doc.spans[SPANS_KEY][0]) == expected_ent.text + assert doc.spans[SPANS_KEY][0].start == expected_ent.start + assert doc.spans[SPANS_KEY][0].end == expected_ent.end + assert getattr(doc.spans[SPANS_KEY][0]._, ATTR_QUALIFIERS).issubset( + set(expected_ent.qualifiers) ) @@ -91,11 +91,12 @@ def test_regression_experiencer_transformer( # Act doc = nlp_qualifier_experiencer(text) + # Assert # Assert assert len(doc.spans[SPANS_KEY]) == 1 - assert doc.spans[SPANS_KEY][0].start == expected_ent["start"] - assert doc.spans[SPANS_KEY][0].end == expected_ent["end"] - assert str(doc.spans[SPANS_KEY][0]) == expected_ent["text"] - assert getattr(doc.spans[SPANS_KEY][0]._, ATTR_QUALIFIERS_STR).issubset( - set(expected_ent["qualifiers"]) + assert str(doc.spans[SPANS_KEY][0]) == expected_ent.text + assert doc.spans[SPANS_KEY][0].start == expected_ent.start + assert doc.spans[SPANS_KEY][0].end == expected_ent.end + assert getattr(doc.spans[SPANS_KEY][0]._, ATTR_QUALIFIERS).issubset( + set(expected_ent.qualifiers) ) From 2c3bc95186ddd6942bf96a15e9eaed1ed876e094 Mon Sep 17 00:00:00 2001 From: Vincent Menger Date: Thu, 4 Jul 2024 16:34:05 +0200 Subject: [PATCH 11/14] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 32d2a2e..5d24b49 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * Made the `default` field for `Qualifier` optional * `InfoExtractionDataset` and `InfoExtractionMetrics` use `Qualifier` objects for qualifiers rather than `dict` * `InfoExtractionDataset` and `InfoExtractionMetrics` no longer track or use qualifier defaults +* Moved test cases to data directory in more open format, so they can be used by others ## 0.8.1 (2024-06-27) From 4b3bb43267ee28f63d2cc536e4f70c68751e4474 Mon Sep 17 00:00:00 2001 From: Vincent Menger Date: Thu, 4 Jul 2024 16:35:03 +0200 Subject: [PATCH 12/14] Add data documentation --- data/README.md | 1 + docs/source/data.md | 25 +++++++++++++++++++++++++ docs/source/index.md | 1 + 3 files changed, 27 insertions(+) create mode 100644 data/README.md create mode 100644 docs/source/data.md diff --git a/data/README.md b/data/README.md new file mode 100644 index 0000000..59a0f01 --- /dev/null +++ b/data/README.md @@ -0,0 +1 @@ +This folder contains some open data files. See https://clinlp.readthedocs.io/en/latest/data.html for more information. \ No newline at end of file diff --git a/docs/source/data.md b/docs/source/data.md new file mode 100644 index 0000000..d6d0d40 --- /dev/null +++ b/docs/source/data.md @@ -0,0 +1,25 @@ +# Data + +The `clinlp` repository contains some open data files with real (or semi-real) examples. Some of these are used by `clinlp` (for example in the tests), but they are also available for others to use. + +The files are located at: https://github.com/umcu/clinlp/tree/main/data + +## `tokenizer_cases.json` + +Some cases for testing tokenizers, collected during development of clinlp, often based on real examples. + +## `sentencizer_cases.json` + +Some cases for testing sentencizers, collected during development of clinlp, often based on real examples. + +## `qualifier_cases.json` + +Some cases for testing qualifier detectors, collected during development of clinlp, often based on real examples. Each doc contains exactly one entity, which makes it easier for our regression tests to mark skips. + +You can load this file to an `InfoExtractionDataset` for further evaluation using: + +```python +from clinlp.data import InfoExtractionDataset + +dataset = InfoExtractionDataset.from_json("data/qualifier_cases.json") +``` \ No newline at end of file diff --git a/docs/source/index.md b/docs/source/index.md index 71fdbf5..28aa8bc 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -13,6 +13,7 @@ Introduction Installation Getting started Roadmap +Data Citing ``` From 612b207cdf1985d6377d870f9c06e457b5162901 Mon Sep 17 00:00:00 2001 From: Vincent Menger Date: Thu, 4 Jul 2024 16:39:46 +0200 Subject: [PATCH 13/14] Fix links --- data/README.md | 4 +++- docs/source/data.md | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/data/README.md b/data/README.md index 59a0f01..1c19263 100644 --- a/data/README.md +++ b/data/README.md @@ -1 +1,3 @@ -This folder contains some open data files. See https://clinlp.readthedocs.io/en/latest/data.html for more information. \ No newline at end of file +# Data + +This folder contains some open data files. See [the Data page of the documentation](https://clinlp.readthedocs.io/en/latest/data.html) for more information. \ No newline at end of file diff --git a/docs/source/data.md b/docs/source/data.md index d6d0d40..1fac5ca 100644 --- a/docs/source/data.md +++ b/docs/source/data.md @@ -2,7 +2,7 @@ The `clinlp` repository contains some open data files with real (or semi-real) examples. Some of these are used by `clinlp` (for example in the tests), but they are also available for others to use. -The files are located at: https://github.com/umcu/clinlp/tree/main/data +The files are located at the [Data directory in the GitHub repo](https://github.com/umcu/clinlp/tree/main/data). ## `tokenizer_cases.json` From ae0c7d6354a000551c57dd5971d83373f331ffba Mon Sep 17 00:00:00 2001 From: Vincent Menger Date: Thu, 4 Jul 2024 16:40:06 +0200 Subject: [PATCH 14/14] Fix newlinews --- data/README.md | 2 +- docs/source/data.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/data/README.md b/data/README.md index 1c19263..4e25b60 100644 --- a/data/README.md +++ b/data/README.md @@ -1,3 +1,3 @@ # Data -This folder contains some open data files. See [the Data page of the documentation](https://clinlp.readthedocs.io/en/latest/data.html) for more information. \ No newline at end of file +This folder contains some open data files. See [the Data page of the documentation](https://clinlp.readthedocs.io/en/latest/data.html) for more information. diff --git a/docs/source/data.md b/docs/source/data.md index 1fac5ca..2795126 100644 --- a/docs/source/data.md +++ b/docs/source/data.md @@ -22,4 +22,4 @@ You can load this file to an `InfoExtractionDataset` for further evaluation usin from clinlp.data import InfoExtractionDataset dataset = InfoExtractionDataset.from_json("data/qualifier_cases.json") -``` \ No newline at end of file +```