Skip to content

Commit

Permalink
use .resolve() directly on annotation layers (requires pytorch-ie 0.3…
Browse files Browse the repository at this point in the history
…0.2)
  • Loading branch information
ArneBinder committed Apr 4, 2024
1 parent 1dc1a21 commit f2f2b38
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 29 deletions.
8 changes: 4 additions & 4 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ datasets = ">=2.14.0,<2.16.0"
pyarrow = "^13"

[tool.poetry.group.dev.dependencies]
pytorch-ie = {version = ">=0.30.1,<0.31.0", optional = true}
pytorch-ie = {version = ">=0.30.2,<0.31.0", optional = true}
pie-modules = ">=0.10.8,<0.12.0"
torch = {version = "^2.1.0+cpu", source = "pytorch"}
pytest = "^7.4.2"
Expand Down
35 changes: 11 additions & 24 deletions tests/dataset_builders/pie/test_drugprot.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,8 +310,7 @@ def test_document(document, dataset_variant):
)
elif dataset_variant == "drugprot_bigbio_kb":
assert isinstance(document, DrugprotBigbioDocument)
resolved_passages = [passage.resolve() for passage in document.passages]
assert resolved_passages == [
assert document.passages.resolve() == [
(
"title",
"RDH12, a retinol dehydrogenase causing Leber's congenital amaurosis, is also involved in steroid metabolism.",
Expand All @@ -330,8 +329,7 @@ def test_document(document, dataset_variant):
)

# check the entities
resolved_entities = [entity.resolve() for entity in document.entities]
assert resolved_entities == [
assert document.entities.resolve() == [
("CHEMICAL", "androstanediol"),
("CHEMICAL", "retinol"),
("CHEMICAL", "retinol"),
Expand Down Expand Up @@ -364,8 +362,7 @@ def test_document(document, dataset_variant):
]

# check the relations
resolved_relations = [relation.resolve() for relation in document.relations]
assert resolved_relations == [
assert document.relations.resolve() == [
("PRODUCT-OF", (("CHEMICAL", "androstanediol"), ("GENE-Y", "human type 12 RDH")))
]

Expand Down Expand Up @@ -416,10 +413,7 @@ def test_converted_document(converted_document, converted_document_type):
if isinstance(
converted_document, TextDocumentWithLabeledSpansBinaryRelationsAndLabeledPartitions
):
resolved_labeled_partitions = [
partition.resolve() for partition in converted_document.labeled_partitions
]
assert resolved_labeled_partitions == [
assert converted_document.labeled_partitions.resolve() == [
(
"title",
"RDH12, a retinol dehydrogenase causing Leber's congenital amaurosis, is also involved in steroid metabolism.",
Expand All @@ -435,8 +429,7 @@ def test_converted_document(converted_document, converted_document_type):
== "RDH12, a retinol dehydrogenase causing Leber's congenital amaurosis, is also involved in steroid metabolism. Three retinol dehydrogenases (RDHs) were tested for steroid converting abilities: human and murine RDH 12 and human RDH13. RDH12 is involved in retinal degeneration in Leber's congenital amaurosis (LCA). We show that murine Rdh12 and human RDH13 do not reveal activity towards the checked steroids, but that human type 12 RDH reduces dihydrotestosterone to androstanediol, and is thus also involved in steroid metabolism. Furthermore, we analyzed both expression and subcellular localization of these enzymes."
)
# check the entities
resolved_entities = [entity.resolve() for entity in converted_document.labeled_spans]
assert resolved_entities == [
assert converted_document.labeled_spans.resolve() == [
("CHEMICAL", "androstanediol"),
("CHEMICAL", "retinol"),
("CHEMICAL", "retinol"),
Expand Down Expand Up @@ -469,8 +462,7 @@ def test_converted_document(converted_document, converted_document_type):
]

# check the relations
resolved_relations = [relation.resolve() for relation in converted_document.binary_relations]
assert resolved_relations == [
assert converted_document.binary_relations.resolve() == [
("PRODUCT-OF", (("CHEMICAL", "androstanediol"), ("GENE-Y", "human type 12 RDH")))
]

Expand Down Expand Up @@ -518,8 +510,7 @@ def test_tokenize_document(converted_document, tokenizer):
doc: TokenDocumentWithLabeledSpansBinaryRelationsAndLabeledPartitions = tokenized_docs[0]
assert len(doc.tokens) == 32
assert len(doc.labeled_spans) == 3
resolved_labeled_spans = [ent.resolve() for ent in doc.labeled_spans]
assert resolved_labeled_spans == [
assert doc.labeled_spans.resolve() == [
("GENE-Y", ("rd", "##h", "##12")),
("CHEMICAL", ("re", "##tino", "##l")),
("GENE-N", ("re", "##tino", "##l", "de", "##hy", "##dro", "##genase")),
Expand All @@ -530,8 +521,7 @@ def test_tokenize_document(converted_document, tokenizer):
doc: TokenDocumentWithLabeledSpansBinaryRelationsAndLabeledPartitions = tokenized_docs[1]
assert len(doc.tokens) == 132
assert len(doc.labeled_spans) == 10
resolved_labeled_spans = [ent.resolve() for ent in doc.labeled_spans]
assert resolved_labeled_spans == [
assert doc.labeled_spans.resolve() == [
("CHEMICAL", ("re", "##tino", "##l")),
("GENE-N", ("re", "##tino", "##l", "de", "##hy", "##dro", "##genase", "##s")),
("GENE-N", ("rd", "##hs")),
Expand All @@ -544,8 +534,7 @@ def test_tokenize_document(converted_document, tokenizer):
("CHEMICAL", ("and", "##ros", "##tan", "##ed", "##iol")),
]
assert len(doc.binary_relations) == 1
resolved_relations = [rel.resolve() for rel in doc.binary_relations]
assert resolved_relations == [
assert doc.binary_relations.resolve() == [
(
"PRODUCT-OF",
(
Expand All @@ -564,8 +553,7 @@ def test_tokenize_document(converted_document, tokenizer):
assert len(doc.tokens) == 162

assert len(doc.labeled_spans) == 13
resolved_labeled_spans = [ent.resolve() for ent in doc.labeled_spans]
assert resolved_labeled_spans == [
assert doc.labeled_spans.resolve() == [
("GENE-Y", ("rd", "##h", "##12")),
("CHEMICAL", ("re", "##tino", "##l")),
("GENE-N", ("re", "##tino", "##l", "de", "##hy", "##dro", "##genase")),
Expand All @@ -582,8 +570,7 @@ def test_tokenize_document(converted_document, tokenizer):
]

assert len(doc.binary_relations) == 1
resolved_relations = [rel.resolve() for rel in doc.binary_relations]
assert resolved_relations == [
assert doc.binary_relations.resolve() == [
(
"PRODUCT-OF",
(
Expand Down

0 comments on commit f2f2b38

Please sign in to comment.