Skip to content

Commit

Permalink
implement DatasetDict (#296)
Browse files Browse the repository at this point in the history
* add utils.hydra.resolve_target()

* implement DatasetDict

* add documentation to methods

* rename from_hf_dataset to from_hf and allow HF Dataset or HF IterableDataset as input

* improve documentation

* fix tests
  • Loading branch information
ArneBinder authored Jul 28, 2023
1 parent 3dcbb4c commit ca80fe8
Show file tree
Hide file tree
Showing 9 changed files with 1,129 additions and 6 deletions.
7 changes: 1 addition & 6 deletions src/pytorch_ie/data/__init__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,8 @@
from typing import Dict, Union

from datasets import Split

from .builder import GeneratorBasedBuilder
from .dataset import Dataset, IterableDataset
from .dataset_dict import DatasetDict
from .dataset_formatter import DocumentFormatter

DatasetDict = Dict[Union[str, Split], Dataset]

__all__ = [
"GeneratorBasedBuilder",
"Dataset",
Expand Down
40 changes: 40 additions & 0 deletions src/pytorch_ie/data/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from abc import ABC, abstractmethod
from typing import Optional, Union

from .dataset import Dataset, IterableDataset


class EnterDatasetMixin(ABC):
"""Mixin for processors that enter a dataset context."""

@abstractmethod
def enter_dataset(
self, dataset: Union[Dataset, IterableDataset], name: Optional[str] = None
) -> None:
"""Enter dataset context."""


class ExitDatasetMixin(ABC):
"""Mixin for processors that exit a dataset context."""

@abstractmethod
def exit_dataset(
self, dataset: Union[Dataset, IterableDataset], name: Optional[str] = None
) -> None:
"""Exit dataset context."""


class EnterDatasetDictMixin(ABC):
"""Mixin for processors that enter a dataset dict context."""

@abstractmethod
def enter_dataset_dict(self, dataset_dict) -> None:
"""Enter dataset dict context."""


class ExitDatasetDictMixin(ABC):
"""Mixin for processors that exit a dataset dict context."""

@abstractmethod
def exit_dataset_dict(self, dataset_dict) -> None:
"""Exit dataset dict context."""
Loading

0 comments on commit ca80fe8

Please sign in to comment.