diff --git a/configs/dataset/_add_candidate_relations.yaml b/configs/dataset/_add_candidate_relations.yaml deleted file mode 100644 index b5e8f61..0000000 --- a/configs/dataset/_add_candidate_relations.yaml +++ /dev/null @@ -1,8 +0,0 @@ -add_candidate_relations: - _processor_: pytorch_ie.DatasetDict.map - function: - # see this for further information and options: - # https://github.com/ArneBinder/pie-utils/blob/main/src/pie_utils/document/processors/candidate_relation_adder.py - _target_: pie_utils.document.processors.CandidateRelationAdder - #partition_layer: partitions - #max_distance: 177 diff --git a/configs/dataset/_add_partitions.yaml b/configs/dataset/_add_partitions.yaml index 0339558..a12e6c2 100644 --- a/configs/dataset/_add_partitions.yaml +++ b/configs/dataset/_add_partitions.yaml @@ -1,7 +1,7 @@ add_partitions: - _processor_: pytorch_ie.DatasetDict.map + _processor_: pie_datasets.DatasetDict.map function: # see this for further information and options: - # https://github.com/ArneBinder/pie-utils/blob/main/src/pie_utils/document/processors/regex_partitioner.py - _target_: pie_utils.document.processors.RegexPartitioner + # https://github.com/ArneBinder/pie-datasets/blob/main/src/pie_datasets/document/processing/regex_partitioner.py + _target_: pie_datasets.document.processing.RegexPartitioner pattern: ??? diff --git a/configs/dataset/_add_reversed_relations.yaml b/configs/dataset/_add_reversed_relations.yaml deleted file mode 100644 index 6c719b4..0000000 --- a/configs/dataset/_add_reversed_relations.yaml +++ /dev/null @@ -1,8 +0,0 @@ -add_reversed_relations: - _processor_: pytorch_ie.DatasetDict.map - function: - # see this for further information and options: - # https://github.com/ArneBinder/pie-utils/blob/main/src/pie_utils/document/processors/reversed_relation_adder.py - _target_: pie_utils.document.processors.ReversedRelationAdder - label_suffix: _reversed - # symmetric_relation_labels: Optional[List[str]] = None, diff --git a/configs/dataset/_convert_documents.yaml b/configs/dataset/_convert_documents.yaml index e3a0b81..cf6a385 100644 --- a/configs/dataset/_convert_documents.yaml +++ b/configs/dataset/_convert_documents.yaml @@ -1,3 +1,4 @@ convert_documents: - _processor_: pytorch_ie.DatasetDict.to_document_type + # see https://github.com/ArneBinder/pie-datasets/blob/main/src/pie_datasets/core/dataset_dict.py + _processor_: pie_datasets.DatasetDict.to_document_type document_type: ??? diff --git a/configs/dataset/_create_test_split.yaml b/configs/dataset/_create_test_split.yaml index 1f5023f..b957a72 100644 --- a/configs/dataset/_create_test_split.yaml +++ b/configs/dataset/_create_test_split.yaml @@ -1,5 +1,5 @@ create_test_split: - _processor_: pytorch_ie.DatasetDict.add_test_split + _processor_: pie_datasets.DatasetDict.add_test_split # take 10% of the train split as the test split test_size: 0.1 # set a fixed seed to make the splitting reproducible diff --git a/configs/dataset/_create_test_split_by_ids.yaml b/configs/dataset/_create_test_split_by_ids.yaml index d190b57..aad6656 100644 --- a/configs/dataset/_create_test_split_by_ids.yaml +++ b/configs/dataset/_create_test_split_by_ids.yaml @@ -1,5 +1,5 @@ create_test_split: - _processor_: pytorch_ie.DatasetDict.move_to_new_split + _processor_: pie_datasets.DatasetDict.move_to_new_split source_split: train target_split: test ids: ??? diff --git a/configs/dataset/_create_validation_split.yaml b/configs/dataset/_create_validation_split.yaml index d7272ab..f2fb2d9 100644 --- a/configs/dataset/_create_validation_split.yaml +++ b/configs/dataset/_create_validation_split.yaml @@ -1,5 +1,5 @@ create_validation_split: - _processor_: pytorch_ie.DatasetDict.add_test_split + _processor_: pie_datasets.DatasetDict.add_test_split # take 10% of the train split as the validation split test_size: 0.1 # set a fixed seed to make the splitting reproducible diff --git a/configs/dataset/_rename_splits.yaml b/configs/dataset/_rename_splits.yaml index de1f029..5ce692c 100644 --- a/configs/dataset/_rename_splits.yaml +++ b/configs/dataset/_rename_splits.yaml @@ -1,5 +1,5 @@ rename_splits: - _processor_: pytorch_ie.DatasetDict.rename_splits + _processor_: pie_datasets.DatasetDict.rename_splits # dictionary to map from original split names to new split names mapping: ??? # if true, keep all other splits that are not mentioned in the mapping diff --git a/configs/dataset/_select_n.yaml b/configs/dataset/_select_n.yaml index d28702b..9d61d70 100644 --- a/configs/dataset/_select_n.yaml +++ b/configs/dataset/_select_n.yaml @@ -1,17 +1,17 @@ select_n: - _processor_: pytorch_ie.DatasetDict.select + _processor_: pie_datasets.DatasetDict.select split: train # take all data per default stop: null select_n_test: - _processor_: pytorch_ie.DatasetDict.select + _processor_: pie_datasets.DatasetDict.select split: test # take all data per default stop: null select_n_validation: - _processor_: pytorch_ie.DatasetDict.select + _processor_: pie_datasets.DatasetDict.select split: validation # take all data per default stop: null diff --git a/configs/dataset/conll2003.yaml b/configs/dataset/conll2003.yaml index 5ad619e..15097eb 100644 --- a/configs/dataset/conll2003.yaml +++ b/configs/dataset/conll2003.yaml @@ -1,4 +1,4 @@ -_target_: pytorch_ie.DatasetDict.load_dataset +_target_: pie_datasets.DatasetDict.load_dataset path: pie/conll2003 -revision: 1eceef918e5e2acc4cb24d4594ba5551e8967e3a +revision: 0fa8689b44ca9885b77276205a7dab3b562266b9 diff --git a/configs/dataset/conll2003_base.yaml b/configs/dataset/conll2003_base.yaml index 85d56e4..7749740 100644 --- a/configs/dataset/conll2003_base.yaml +++ b/configs/dataset/conll2003_base.yaml @@ -3,6 +3,6 @@ _target_: src.utils.execute_pipeline input: - _target_: pytorch_ie.DatasetDict.load_dataset + _target_: pie_datasets.DatasetDict.load_dataset path: pie/conll2003 - revision: 1eceef918e5e2acc4cb24d4594ba5551e8967e3a + revision: 0fa8689b44ca9885b77276205a7dab3b562266b9 diff --git a/configs/dataset/from_serialized_documents.yaml b/configs/dataset/from_serialized_documents.yaml index ef6c463..36cb59f 100644 --- a/configs/dataset/from_serialized_documents.yaml +++ b/configs/dataset/from_serialized_documents.yaml @@ -1,4 +1,4 @@ -_target_: pytorch_ie.DatasetDict.from_json +_target_: pie_datasets.DatasetDict.from_json # either define data_files ... # data_files: # test: path/to/documents.jsonl @@ -7,4 +7,4 @@ _target_: pytorch_ie.DatasetDict.from_json # The document_type field is required if you do not use "data_dir" or have no metadata.json file in that directory: # the document type depends on the task and the dataset. For example, for relation extraction, it can be: -# document_type: pytorch_ie.documents.TextDocumentWithLabeledEntitiesRelationsAndLabeledPartitions +# document_type: pytorch_ie.documents.TextDocumentWithLabeledSpansBinaryRelationsAndLabeledPartitions diff --git a/requirements.txt b/requirements.txt index 034119d..e700e8b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ # --------- pytorch-ie --------- # -pytorch-ie>=0.24.2,<0.25.0 +pytorch-ie>=0.28.0,<0.29.0 +pie-datasets>=0.3.1,<0.4.0 # pie-utils provides some useful helper methods for pytorch-ie, # e.g. document processors or span utils (convert span annotations # to sequence encodings such as BIO, IO or BIOUL, and back). diff --git a/src/document/types.py b/src/document/types.py index 5d19fbb..77af983 100644 --- a/src/document/types.py +++ b/src/document/types.py @@ -3,7 +3,7 @@ from pytorch_ie.annotations import LabeledSpan from pytorch_ie.core import Annotation, AnnotationList, annotation_field -from pytorch_ie.documents import TextBasedDocument, TextDocumentWithLabeledEntitiesAndRelations +from pytorch_ie.documents import TextBasedDocument # =========================== Annotation Types ============================= # diff --git a/src/evaluate.py b/src/evaluate.py index 6163178..7df1db3 100644 --- a/src/evaluate.py +++ b/src/evaluate.py @@ -38,7 +38,7 @@ import hydra import pytorch_lightning as pl from omegaconf import DictConfig -from pytorch_ie import DatasetDict +from pie_datasets import DatasetDict from pytorch_ie.core import PyTorchIEModel, TaskModule from pytorch_lightning import Trainer diff --git a/src/evaluate_documents.py b/src/evaluate_documents.py index 808acf2..06d74e4 100644 --- a/src/evaluate_documents.py +++ b/src/evaluate_documents.py @@ -38,7 +38,7 @@ import hydra import pytorch_lightning as pl from omegaconf import DictConfig -from pytorch_ie import DatasetDict +from pie_datasets import DatasetDict from pytorch_ie.core import DocumentMetric from src import utils diff --git a/src/predict.py b/src/predict.py index d65ee6b..435e715 100644 --- a/src/predict.py +++ b/src/predict.py @@ -39,7 +39,8 @@ import hydra import pytorch_lightning as pl from omegaconf import DictConfig, OmegaConf -from pytorch_ie import DatasetDict, Pipeline +from pie_datasets import DatasetDict +from pytorch_ie import Pipeline from src import utils from src.models import * # noqa: F403 diff --git a/src/serializer/json.py b/src/serializer/json.py index c8a409b..0af044f 100644 --- a/src/serializer/json.py +++ b/src/serializer/json.py @@ -2,8 +2,8 @@ import os from typing import Dict, List, Optional, Sequence, Type, TypeVar +from pie_datasets.core.dataset_dict import METADATA_FILE_NAME from pytorch_ie.core import Document -from pytorch_ie.data.dataset_dict import METADATA_FILE_NAME from pytorch_ie.utils.hydra import resolve_optional_document_type, serialize_document_type from src.serializer.interface import DocumentSerializer diff --git a/src/train.py b/src/train.py index 8e8c836..31815dd 100644 --- a/src/train.py +++ b/src/train.py @@ -39,7 +39,7 @@ import pytorch_lightning as pl from hydra.utils import get_class from omegaconf import DictConfig -from pytorch_ie import DatasetDict +from pie_datasets import DatasetDict from pytorch_ie.core import PyTorchIEModel, TaskModule from pytorch_ie.models import * # noqa: F403 from pytorch_ie.models.interface import RequiresModelNameOrPath, RequiresNumClasses diff --git a/tests/unit/serializer/test_json.py b/tests/unit/serializer/test_json.py index e4ac533..0009e62 100644 --- a/tests/unit/serializer/test_json.py +++ b/tests/unit/serializer/test_json.py @@ -1,7 +1,7 @@ from dataclasses import dataclass import pytest -from pytorch_ie import DatasetDict +from pie_datasets import DatasetDict from pytorch_ie.annotations import BinaryRelation, LabeledSpan from pytorch_ie.core import AnnotationList, annotation_field from pytorch_ie.documents import TextDocument