diff --git a/datasets_old_2022_10/README.md b/datasets_old_2022_10/README.md new file mode 100644 index 00000000..8ed6d4a3 --- /dev/null +++ b/datasets_old_2022_10/README.md @@ -0,0 +1,2 @@ +This was an initial try to define a common interface for datasets. +We decided to go with a new interface, see [#231](https://github.com/rwth-i6/returnn_common/issues/231). diff --git a/datasets/__init__.py b/datasets_old_2022_10/__init__.py similarity index 100% rename from datasets/__init__.py rename to datasets_old_2022_10/__init__.py diff --git a/datasets/asr/__init__.py b/datasets_old_2022_10/asr/__init__.py similarity index 100% rename from datasets/asr/__init__.py rename to datasets_old_2022_10/asr/__init__.py diff --git a/datasets/asr/features.py b/datasets_old_2022_10/asr/features.py similarity index 100% rename from datasets/asr/features.py rename to datasets_old_2022_10/asr/features.py diff --git a/datasets/asr/librispeech/__init__.py b/datasets_old_2022_10/asr/librispeech/__init__.py similarity index 100% rename from datasets/asr/librispeech/__init__.py rename to datasets_old_2022_10/asr/librispeech/__init__.py diff --git a/datasets/asr/librispeech/lm.py b/datasets_old_2022_10/asr/librispeech/lm.py similarity index 100% rename from datasets/asr/librispeech/lm.py rename to datasets_old_2022_10/asr/librispeech/lm.py diff --git a/datasets/asr/librispeech/norm_stats/stats.40.mean.txt b/datasets_old_2022_10/asr/librispeech/norm_stats/stats.40.mean.txt similarity index 100% rename from datasets/asr/librispeech/norm_stats/stats.40.mean.txt rename to datasets_old_2022_10/asr/librispeech/norm_stats/stats.40.mean.txt diff --git a/datasets/asr/librispeech/norm_stats/stats.40.std_dev.txt b/datasets_old_2022_10/asr/librispeech/norm_stats/stats.40.std_dev.txt similarity index 100% rename from datasets/asr/librispeech/norm_stats/stats.40.std_dev.txt rename to datasets_old_2022_10/asr/librispeech/norm_stats/stats.40.std_dev.txt diff --git a/datasets/asr/librispeech/norm_stats/stats.40.txt b/datasets_old_2022_10/asr/librispeech/norm_stats/stats.40.txt similarity index 100% rename from datasets/asr/librispeech/norm_stats/stats.40.txt rename to datasets_old_2022_10/asr/librispeech/norm_stats/stats.40.txt diff --git a/datasets/asr/librispeech/oggzip.py b/datasets_old_2022_10/asr/librispeech/oggzip.py similarity index 99% rename from datasets/asr/librispeech/oggzip.py rename to datasets_old_2022_10/asr/librispeech/oggzip.py index ae0bcecf..0b7017c1 100644 --- a/datasets/asr/librispeech/oggzip.py +++ b/datasets_old_2022_10/asr/librispeech/oggzip.py @@ -72,7 +72,7 @@ def get_train_dataset(self) -> Dict[str, Any]: def get_eval_datasets(self) -> Dict[str, Dict[str, Any]]: """ - Get eval datasets + Get eval datasets_old_2022_10 """ return { "dev": self.get_dataset("dev", train=False, subset=3000), diff --git a/datasets/asr/librispeech/vocabs.py b/datasets_old_2022_10/asr/librispeech/vocabs.py similarity index 100% rename from datasets/asr/librispeech/vocabs.py rename to datasets_old_2022_10/asr/librispeech/vocabs.py diff --git a/datasets/asr/librispeech/vocabs/trans.bpe10k.codes b/datasets_old_2022_10/asr/librispeech/vocabs/trans.bpe10k.codes similarity index 100% rename from datasets/asr/librispeech/vocabs/trans.bpe10k.codes rename to datasets_old_2022_10/asr/librispeech/vocabs/trans.bpe10k.codes diff --git a/datasets/asr/librispeech/vocabs/trans.bpe10k.vocab b/datasets_old_2022_10/asr/librispeech/vocabs/trans.bpe10k.vocab similarity index 100% rename from datasets/asr/librispeech/vocabs/trans.bpe10k.vocab rename to datasets_old_2022_10/asr/librispeech/vocabs/trans.bpe10k.vocab diff --git a/datasets/asr/librispeech/vocabs/trans.bpe1k.codes b/datasets_old_2022_10/asr/librispeech/vocabs/trans.bpe1k.codes similarity index 100% rename from datasets/asr/librispeech/vocabs/trans.bpe1k.codes rename to datasets_old_2022_10/asr/librispeech/vocabs/trans.bpe1k.codes diff --git a/datasets/asr/librispeech/vocabs/trans.bpe1k.vocab b/datasets_old_2022_10/asr/librispeech/vocabs/trans.bpe1k.vocab similarity index 100% rename from datasets/asr/librispeech/vocabs/trans.bpe1k.vocab rename to datasets_old_2022_10/asr/librispeech/vocabs/trans.bpe1k.vocab diff --git a/datasets/asr/timit/__init__.py b/datasets_old_2022_10/asr/timit/__init__.py similarity index 100% rename from datasets/asr/timit/__init__.py rename to datasets_old_2022_10/asr/timit/__init__.py diff --git a/datasets/asr/timit/nltk.py b/datasets_old_2022_10/asr/timit/nltk.py similarity index 97% rename from datasets/asr/timit/nltk.py rename to datasets_old_2022_10/asr/timit/nltk.py index 8a80a990..90863805 100644 --- a/datasets/asr/timit/nltk.py +++ b/datasets_old_2022_10/asr/timit/nltk.py @@ -54,7 +54,7 @@ def get_train_dataset(self) -> Dict[str, Any]: def get_eval_datasets(self) -> Dict[str, Dict[str, Any]]: """ - Get eval datasets + Get eval datasets_old_2022_10 """ return { "dev": self.get_dataset("dev"), @@ -72,7 +72,7 @@ def get_main_dataset(self) -> Dict[str]: def get_dataset(self, key, subset=None): """ - Get datasets + Get datasets_old_2022_10 """ assert key in {"train", "dev"} assert not subset diff --git a/datasets/interface.py b/datasets_old_2022_10/interface.py similarity index 100% rename from datasets/interface.py rename to datasets_old_2022_10/interface.py diff --git a/nn_raw/lm.py b/nn_raw/lm.py index 0686eb82..b917d75b 100644 --- a/nn_raw/lm.py +++ b/nn_raw/lm.py @@ -3,7 +3,7 @@ Language model functions. """ -from ..datasets.interface import VocabConfig +from ..datasets_old_2022_10.interface import VocabConfig from typing import Dict, Any diff --git a/nn_raw/transducer/transducer_fullsum.py b/nn_raw/transducer/transducer_fullsum.py index 041aeff8..dde320e1 100644 --- a/nn_raw/transducer/transducer_fullsum.py +++ b/nn_raw/transducer/transducer_fullsum.py @@ -64,7 +64,7 @@ from .recomb_recog import targetb_recomb_recog from .loss import rnnt_loss, rnnt_loss_out_type from ..collect_out_str import make_out_str_func -from ...datasets.interface import TargetConfig +from ...datasets_old_2022_10.interface import TargetConfig class Context: