upgrade pytorch-ie to 0.28.0 (#140)

* upgrade pytorch-ie to 0.28.0 and add pie-datasets 0.3.1 as requirement * adjust dataset scripts * adjust python files
ArneBinder · Nov 8, 2023 · a0036c9 · a0036c9
1 parent 7052cbe
commit a0036c9
Show file tree

Hide file tree

Showing 20 changed files with 28 additions and 41 deletions.
diff --git a/configs/dataset/_add_candidate_relations.yaml b/configs/dataset/_add_candidate_relations.yaml
diff --git a/configs/dataset/_add_partitions.yaml b/configs/dataset/_add_partitions.yaml
@@ -1,7 +1,7 @@
 add_partitions:
-  _processor_: pytorch_ie.DatasetDict.map
+  _processor_: pie_datasets.DatasetDict.map
   function:
     # see this for further information and options:
-    # https://github.com/ArneBinder/pie-utils/blob/main/src/pie_utils/document/processors/regex_partitioner.py
-    _target_: pie_utils.document.processors.RegexPartitioner
+    # https://github.com/ArneBinder/pie-datasets/blob/main/src/pie_datasets/document/processing/regex_partitioner.py
+    _target_: pie_datasets.document.processing.RegexPartitioner
     pattern: ???
diff --git a/configs/dataset/_add_reversed_relations.yaml b/configs/dataset/_add_reversed_relations.yaml
diff --git a/configs/dataset/_convert_documents.yaml b/configs/dataset/_convert_documents.yaml
@@ -1,3 +1,4 @@
 convert_documents:
-  _processor_: pytorch_ie.DatasetDict.to_document_type
+  # see https://github.com/ArneBinder/pie-datasets/blob/main/src/pie_datasets/core/dataset_dict.py
+  _processor_: pie_datasets.DatasetDict.to_document_type
   document_type: ???
diff --git a/configs/dataset/_create_test_split.yaml b/configs/dataset/_create_test_split.yaml
@@ -1,5 +1,5 @@
 create_test_split:
-  _processor_: pytorch_ie.DatasetDict.add_test_split
+  _processor_: pie_datasets.DatasetDict.add_test_split
   # take 10% of the train split as the test split
   test_size: 0.1
   # set a fixed seed to make the splitting reproducible

diff --git a/configs/dataset/_create_test_split_by_ids.yaml b/configs/dataset/_create_test_split_by_ids.yaml
@@ -1,5 +1,5 @@
 create_test_split:
-  _processor_: pytorch_ie.DatasetDict.move_to_new_split
+  _processor_: pie_datasets.DatasetDict.move_to_new_split
   source_split: train
   target_split: test
   ids: ???
diff --git a/configs/dataset/_create_validation_split.yaml b/configs/dataset/_create_validation_split.yaml
@@ -1,5 +1,5 @@
 create_validation_split:
-  _processor_: pytorch_ie.DatasetDict.add_test_split
+  _processor_: pie_datasets.DatasetDict.add_test_split
   # take 10% of the train split as the validation split
   test_size: 0.1
   # set a fixed seed to make the splitting reproducible

diff --git a/configs/dataset/_rename_splits.yaml b/configs/dataset/_rename_splits.yaml
@@ -1,5 +1,5 @@
 rename_splits:
-  _processor_: pytorch_ie.DatasetDict.rename_splits
+  _processor_: pie_datasets.DatasetDict.rename_splits
   # dictionary to map from original split names to new split names
   mapping: ???
   # if true, keep all other splits that are not mentioned in the mapping

diff --git a/configs/dataset/_select_n.yaml b/configs/dataset/_select_n.yaml
@@ -1,17 +1,17 @@
 select_n:
-  _processor_: pytorch_ie.DatasetDict.select
+  _processor_: pie_datasets.DatasetDict.select
   split: train
   # take all data per default
   stop: null
 
 select_n_test:
-  _processor_: pytorch_ie.DatasetDict.select
+  _processor_: pie_datasets.DatasetDict.select
   split: test
   # take all data per default
   stop: null
 
 select_n_validation:
-  _processor_: pytorch_ie.DatasetDict.select
+  _processor_: pie_datasets.DatasetDict.select
   split: validation
   # take all data per default
   stop: null
diff --git a/configs/dataset/conll2003.yaml b/configs/dataset/conll2003.yaml
@@ -1,4 +1,4 @@
-_target_: pytorch_ie.DatasetDict.load_dataset
+_target_: pie_datasets.DatasetDict.load_dataset
 
 path: pie/conll2003
-revision: 1eceef918e5e2acc4cb24d4594ba5551e8967e3a
+revision: 0fa8689b44ca9885b77276205a7dab3b562266b9
diff --git a/configs/dataset/conll2003_base.yaml b/configs/dataset/conll2003_base.yaml
@@ -3,6 +3,6 @@
 
 _target_: src.utils.execute_pipeline
 input:
-  _target_: pytorch_ie.DatasetDict.load_dataset
+  _target_: pie_datasets.DatasetDict.load_dataset
   path: pie/conll2003
-  revision: 1eceef918e5e2acc4cb24d4594ba5551e8967e3a
+  revision: 0fa8689b44ca9885b77276205a7dab3b562266b9
diff --git a/configs/dataset/from_serialized_documents.yaml b/configs/dataset/from_serialized_documents.yaml
@@ -1,4 +1,4 @@
-_target_: pytorch_ie.DatasetDict.from_json
+_target_: pie_datasets.DatasetDict.from_json
 # either define data_files ...
 # data_files:
 #   test: path/to/documents.jsonl
@@ -7,4 +7,4 @@ _target_: pytorch_ie.DatasetDict.from_json
 
 # The document_type field is required if you do not use "data_dir" or have no metadata.json file in that directory:
 # the document type depends on the task and the dataset. For example, for relation extraction, it can be:
-# document_type: pytorch_ie.documents.TextDocumentWithLabeledEntitiesRelationsAndLabeledPartitions
+# document_type: pytorch_ie.documents.TextDocumentWithLabeledSpansBinaryRelationsAndLabeledPartitions
diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,6 @@
 # --------- pytorch-ie --------- #
-pytorch-ie>=0.24.2,<0.25.0
+pytorch-ie>=0.28.0,<0.29.0
+pie-datasets>=0.3.1,<0.4.0
 # pie-utils provides some useful helper methods for pytorch-ie,
 # e.g. document processors or span utils (convert span annotations
 # to sequence encodings such as BIO, IO or BIOUL, and back).

diff --git a/src/document/types.py b/src/document/types.py
@@ -3,7 +3,7 @@
 
 from pytorch_ie.annotations import LabeledSpan
 from pytorch_ie.core import Annotation, AnnotationList, annotation_field
-from pytorch_ie.documents import TextBasedDocument, TextDocumentWithLabeledEntitiesAndRelations
+from pytorch_ie.documents import TextBasedDocument
 
 # =========================== Annotation Types ============================= #
 

diff --git a/src/evaluate.py b/src/evaluate.py
@@ -38,7 +38,7 @@
 import hydra
 import pytorch_lightning as pl
 from omegaconf import DictConfig
-from pytorch_ie import DatasetDict
+from pie_datasets import DatasetDict
 from pytorch_ie.core import PyTorchIEModel, TaskModule
 from pytorch_lightning import Trainer
 

diff --git a/src/evaluate_documents.py b/src/evaluate_documents.py
@@ -38,7 +38,7 @@
 import hydra
 import pytorch_lightning as pl
 from omegaconf import DictConfig
-from pytorch_ie import DatasetDict
+from pie_datasets import DatasetDict
 from pytorch_ie.core import DocumentMetric
 
 from src import utils

diff --git a/src/predict.py b/src/predict.py
@@ -39,7 +39,8 @@
 import hydra
 import pytorch_lightning as pl
 from omegaconf import DictConfig, OmegaConf
-from pytorch_ie import DatasetDict, Pipeline
+from pie_datasets import DatasetDict
+from pytorch_ie import Pipeline
 
 from src import utils
 from src.models import *  # noqa: F403

diff --git a/src/serializer/json.py b/src/serializer/json.py
@@ -2,8 +2,8 @@
 import os
 from typing import Dict, List, Optional, Sequence, Type, TypeVar
 
+from pie_datasets.core.dataset_dict import METADATA_FILE_NAME
 from pytorch_ie.core import Document
-from pytorch_ie.data.dataset_dict import METADATA_FILE_NAME
 from pytorch_ie.utils.hydra import resolve_optional_document_type, serialize_document_type
 
 from src.serializer.interface import DocumentSerializer

diff --git a/src/train.py b/src/train.py
@@ -39,7 +39,7 @@
 import pytorch_lightning as pl
 from hydra.utils import get_class
 from omegaconf import DictConfig
-from pytorch_ie import DatasetDict
+from pie_datasets import DatasetDict
 from pytorch_ie.core import PyTorchIEModel, TaskModule
 from pytorch_ie.models import *  # noqa: F403
 from pytorch_ie.models.interface import RequiresModelNameOrPath, RequiresNumClasses

diff --git a/tests/unit/serializer/test_json.py b/tests/unit/serializer/test_json.py
@@ -1,7 +1,7 @@
 from dataclasses import dataclass
 
 import pytest
-from pytorch_ie import DatasetDict
+from pie_datasets import DatasetDict
 from pytorch_ie.annotations import BinaryRelation, LabeledSpan
 from pytorch_ie.core import AnnotationList, annotation_field
 from pytorch_ie.documents import TextDocument