Skip to content

Commit

Permalink
upgrade pytorch-ie to 0.28.0 (#140)
Browse files Browse the repository at this point in the history
* upgrade pytorch-ie to 0.28.0 and add pie-datasets 0.3.1 as requirement

* adjust dataset scripts

* adjust python files
  • Loading branch information
ArneBinder authored Nov 8, 2023
1 parent 7052cbe commit a0036c9
Show file tree
Hide file tree
Showing 20 changed files with 28 additions and 41 deletions.
8 changes: 0 additions & 8 deletions configs/dataset/_add_candidate_relations.yaml

This file was deleted.

6 changes: 3 additions & 3 deletions configs/dataset/_add_partitions.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
add_partitions:
_processor_: pytorch_ie.DatasetDict.map
_processor_: pie_datasets.DatasetDict.map
function:
# see this for further information and options:
# https://github.com/ArneBinder/pie-utils/blob/main/src/pie_utils/document/processors/regex_partitioner.py
_target_: pie_utils.document.processors.RegexPartitioner
# https://github.com/ArneBinder/pie-datasets/blob/main/src/pie_datasets/document/processing/regex_partitioner.py
_target_: pie_datasets.document.processing.RegexPartitioner
pattern: ???
8 changes: 0 additions & 8 deletions configs/dataset/_add_reversed_relations.yaml

This file was deleted.

3 changes: 2 additions & 1 deletion configs/dataset/_convert_documents.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
convert_documents:
_processor_: pytorch_ie.DatasetDict.to_document_type
# see https://github.com/ArneBinder/pie-datasets/blob/main/src/pie_datasets/core/dataset_dict.py
_processor_: pie_datasets.DatasetDict.to_document_type
document_type: ???
2 changes: 1 addition & 1 deletion configs/dataset/_create_test_split.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
create_test_split:
_processor_: pytorch_ie.DatasetDict.add_test_split
_processor_: pie_datasets.DatasetDict.add_test_split
# take 10% of the train split as the test split
test_size: 0.1
# set a fixed seed to make the splitting reproducible
Expand Down
2 changes: 1 addition & 1 deletion configs/dataset/_create_test_split_by_ids.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
create_test_split:
_processor_: pytorch_ie.DatasetDict.move_to_new_split
_processor_: pie_datasets.DatasetDict.move_to_new_split
source_split: train
target_split: test
ids: ???
2 changes: 1 addition & 1 deletion configs/dataset/_create_validation_split.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
create_validation_split:
_processor_: pytorch_ie.DatasetDict.add_test_split
_processor_: pie_datasets.DatasetDict.add_test_split
# take 10% of the train split as the validation split
test_size: 0.1
# set a fixed seed to make the splitting reproducible
Expand Down
2 changes: 1 addition & 1 deletion configs/dataset/_rename_splits.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
rename_splits:
_processor_: pytorch_ie.DatasetDict.rename_splits
_processor_: pie_datasets.DatasetDict.rename_splits
# dictionary to map from original split names to new split names
mapping: ???
# if true, keep all other splits that are not mentioned in the mapping
Expand Down
6 changes: 3 additions & 3 deletions configs/dataset/_select_n.yaml
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
select_n:
_processor_: pytorch_ie.DatasetDict.select
_processor_: pie_datasets.DatasetDict.select
split: train
# take all data per default
stop: null

select_n_test:
_processor_: pytorch_ie.DatasetDict.select
_processor_: pie_datasets.DatasetDict.select
split: test
# take all data per default
stop: null

select_n_validation:
_processor_: pytorch_ie.DatasetDict.select
_processor_: pie_datasets.DatasetDict.select
split: validation
# take all data per default
stop: null
4 changes: 2 additions & 2 deletions configs/dataset/conll2003.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
_target_: pytorch_ie.DatasetDict.load_dataset
_target_: pie_datasets.DatasetDict.load_dataset

path: pie/conll2003
revision: 1eceef918e5e2acc4cb24d4594ba5551e8967e3a
revision: 0fa8689b44ca9885b77276205a7dab3b562266b9
4 changes: 2 additions & 2 deletions configs/dataset/conll2003_base.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@

_target_: src.utils.execute_pipeline
input:
_target_: pytorch_ie.DatasetDict.load_dataset
_target_: pie_datasets.DatasetDict.load_dataset
path: pie/conll2003
revision: 1eceef918e5e2acc4cb24d4594ba5551e8967e3a
revision: 0fa8689b44ca9885b77276205a7dab3b562266b9
4 changes: 2 additions & 2 deletions configs/dataset/from_serialized_documents.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
_target_: pytorch_ie.DatasetDict.from_json
_target_: pie_datasets.DatasetDict.from_json
# either define data_files ...
# data_files:
# test: path/to/documents.jsonl
Expand All @@ -7,4 +7,4 @@ _target_: pytorch_ie.DatasetDict.from_json

# The document_type field is required if you do not use "data_dir" or have no metadata.json file in that directory:
# the document type depends on the task and the dataset. For example, for relation extraction, it can be:
# document_type: pytorch_ie.documents.TextDocumentWithLabeledEntitiesRelationsAndLabeledPartitions
# document_type: pytorch_ie.documents.TextDocumentWithLabeledSpansBinaryRelationsAndLabeledPartitions
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# --------- pytorch-ie --------- #
pytorch-ie>=0.24.2,<0.25.0
pytorch-ie>=0.28.0,<0.29.0
pie-datasets>=0.3.1,<0.4.0
# pie-utils provides some useful helper methods for pytorch-ie,
# e.g. document processors or span utils (convert span annotations
# to sequence encodings such as BIO, IO or BIOUL, and back).
Expand Down
2 changes: 1 addition & 1 deletion src/document/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from pytorch_ie.annotations import LabeledSpan
from pytorch_ie.core import Annotation, AnnotationList, annotation_field
from pytorch_ie.documents import TextBasedDocument, TextDocumentWithLabeledEntitiesAndRelations
from pytorch_ie.documents import TextBasedDocument

# =========================== Annotation Types ============================= #

Expand Down
2 changes: 1 addition & 1 deletion src/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
import hydra
import pytorch_lightning as pl
from omegaconf import DictConfig
from pytorch_ie import DatasetDict
from pie_datasets import DatasetDict
from pytorch_ie.core import PyTorchIEModel, TaskModule
from pytorch_lightning import Trainer

Expand Down
2 changes: 1 addition & 1 deletion src/evaluate_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
import hydra
import pytorch_lightning as pl
from omegaconf import DictConfig
from pytorch_ie import DatasetDict
from pie_datasets import DatasetDict
from pytorch_ie.core import DocumentMetric

from src import utils
Expand Down
3 changes: 2 additions & 1 deletion src/predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@
import hydra
import pytorch_lightning as pl
from omegaconf import DictConfig, OmegaConf
from pytorch_ie import DatasetDict, Pipeline
from pie_datasets import DatasetDict
from pytorch_ie import Pipeline

from src import utils
from src.models import * # noqa: F403
Expand Down
2 changes: 1 addition & 1 deletion src/serializer/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
import os
from typing import Dict, List, Optional, Sequence, Type, TypeVar

from pie_datasets.core.dataset_dict import METADATA_FILE_NAME
from pytorch_ie.core import Document
from pytorch_ie.data.dataset_dict import METADATA_FILE_NAME
from pytorch_ie.utils.hydra import resolve_optional_document_type, serialize_document_type

from src.serializer.interface import DocumentSerializer
Expand Down
2 changes: 1 addition & 1 deletion src/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
import pytorch_lightning as pl
from hydra.utils import get_class
from omegaconf import DictConfig
from pytorch_ie import DatasetDict
from pie_datasets import DatasetDict
from pytorch_ie.core import PyTorchIEModel, TaskModule
from pytorch_ie.models import * # noqa: F403
from pytorch_ie.models.interface import RequiresModelNameOrPath, RequiresNumClasses
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/serializer/test_json.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from dataclasses import dataclass

import pytest
from pytorch_ie import DatasetDict
from pie_datasets import DatasetDict
from pytorch_ie.annotations import BinaryRelation, LabeledSpan
from pytorch_ie.core import AnnotationList, annotation_field
from pytorch_ie.documents import TextDocument
Expand Down

0 comments on commit a0036c9

Please sign in to comment.