From 000ee8d83a0c863b4f601a47679062034ca16fe1 Mon Sep 17 00:00:00 2001 From: Albert Zeyer Date: Mon, 24 Oct 2022 08:34:59 +0000 Subject: [PATCH] Rename old datasets interface #231 --- datasets_old_2022_10/README.md | 2 ++ {datasets => datasets_old_2022_10}/__init__.py | 0 {datasets => datasets_old_2022_10}/asr/__init__.py | 0 {datasets => datasets_old_2022_10}/asr/features.py | 0 .../asr/librispeech/__init__.py | 0 {datasets => datasets_old_2022_10}/asr/librispeech/lm.py | 0 .../asr/librispeech/norm_stats/stats.40.mean.txt | 0 .../asr/librispeech/norm_stats/stats.40.std_dev.txt | 0 .../asr/librispeech/norm_stats/stats.40.txt | 0 {datasets => datasets_old_2022_10}/asr/librispeech/oggzip.py | 2 +- {datasets => datasets_old_2022_10}/asr/librispeech/vocabs.py | 0 .../asr/librispeech/vocabs/trans.bpe10k.codes | 0 .../asr/librispeech/vocabs/trans.bpe10k.vocab | 0 .../asr/librispeech/vocabs/trans.bpe1k.codes | 0 .../asr/librispeech/vocabs/trans.bpe1k.vocab | 0 {datasets => datasets_old_2022_10}/asr/timit/__init__.py | 0 {datasets => datasets_old_2022_10}/asr/timit/nltk.py | 4 ++-- {datasets => datasets_old_2022_10}/interface.py | 0 nn_raw/lm.py | 2 +- nn_raw/transducer/transducer_fullsum.py | 2 +- 20 files changed, 7 insertions(+), 5 deletions(-) create mode 100644 datasets_old_2022_10/README.md rename {datasets => datasets_old_2022_10}/__init__.py (100%) rename {datasets => datasets_old_2022_10}/asr/__init__.py (100%) rename {datasets => datasets_old_2022_10}/asr/features.py (100%) rename {datasets => datasets_old_2022_10}/asr/librispeech/__init__.py (100%) rename {datasets => datasets_old_2022_10}/asr/librispeech/lm.py (100%) rename {datasets => datasets_old_2022_10}/asr/librispeech/norm_stats/stats.40.mean.txt (100%) rename {datasets => datasets_old_2022_10}/asr/librispeech/norm_stats/stats.40.std_dev.txt (100%) rename {datasets => datasets_old_2022_10}/asr/librispeech/norm_stats/stats.40.txt (100%) rename {datasets => datasets_old_2022_10}/asr/librispeech/oggzip.py (99%) rename {datasets => datasets_old_2022_10}/asr/librispeech/vocabs.py (100%) rename {datasets => datasets_old_2022_10}/asr/librispeech/vocabs/trans.bpe10k.codes (100%) rename {datasets => datasets_old_2022_10}/asr/librispeech/vocabs/trans.bpe10k.vocab (100%) rename {datasets => datasets_old_2022_10}/asr/librispeech/vocabs/trans.bpe1k.codes (100%) rename {datasets => datasets_old_2022_10}/asr/librispeech/vocabs/trans.bpe1k.vocab (100%) rename {datasets => datasets_old_2022_10}/asr/timit/__init__.py (100%) rename {datasets => datasets_old_2022_10}/asr/timit/nltk.py (97%) rename {datasets => datasets_old_2022_10}/interface.py (100%) diff --git a/datasets_old_2022_10/README.md b/datasets_old_2022_10/README.md new file mode 100644 index 00000000..8ed6d4a3 --- /dev/null +++ b/datasets_old_2022_10/README.md @@ -0,0 +1,2 @@ +This was an initial try to define a common interface for datasets. +We decided to go with a new interface, see [#231](https://github.com/rwth-i6/returnn_common/issues/231). diff --git a/datasets/__init__.py b/datasets_old_2022_10/__init__.py similarity index 100% rename from datasets/__init__.py rename to datasets_old_2022_10/__init__.py diff --git a/datasets/asr/__init__.py b/datasets_old_2022_10/asr/__init__.py similarity index 100% rename from datasets/asr/__init__.py rename to datasets_old_2022_10/asr/__init__.py diff --git a/datasets/asr/features.py b/datasets_old_2022_10/asr/features.py similarity index 100% rename from datasets/asr/features.py rename to datasets_old_2022_10/asr/features.py diff --git a/datasets/asr/librispeech/__init__.py b/datasets_old_2022_10/asr/librispeech/__init__.py similarity index 100% rename from datasets/asr/librispeech/__init__.py rename to datasets_old_2022_10/asr/librispeech/__init__.py diff --git a/datasets/asr/librispeech/lm.py b/datasets_old_2022_10/asr/librispeech/lm.py similarity index 100% rename from datasets/asr/librispeech/lm.py rename to datasets_old_2022_10/asr/librispeech/lm.py diff --git a/datasets/asr/librispeech/norm_stats/stats.40.mean.txt b/datasets_old_2022_10/asr/librispeech/norm_stats/stats.40.mean.txt similarity index 100% rename from datasets/asr/librispeech/norm_stats/stats.40.mean.txt rename to datasets_old_2022_10/asr/librispeech/norm_stats/stats.40.mean.txt diff --git a/datasets/asr/librispeech/norm_stats/stats.40.std_dev.txt b/datasets_old_2022_10/asr/librispeech/norm_stats/stats.40.std_dev.txt similarity index 100% rename from datasets/asr/librispeech/norm_stats/stats.40.std_dev.txt rename to datasets_old_2022_10/asr/librispeech/norm_stats/stats.40.std_dev.txt diff --git a/datasets/asr/librispeech/norm_stats/stats.40.txt b/datasets_old_2022_10/asr/librispeech/norm_stats/stats.40.txt similarity index 100% rename from datasets/asr/librispeech/norm_stats/stats.40.txt rename to datasets_old_2022_10/asr/librispeech/norm_stats/stats.40.txt diff --git a/datasets/asr/librispeech/oggzip.py b/datasets_old_2022_10/asr/librispeech/oggzip.py similarity index 99% rename from datasets/asr/librispeech/oggzip.py rename to datasets_old_2022_10/asr/librispeech/oggzip.py index ae0bcecf..0b7017c1 100644 --- a/datasets/asr/librispeech/oggzip.py +++ b/datasets_old_2022_10/asr/librispeech/oggzip.py @@ -72,7 +72,7 @@ def get_train_dataset(self) -> Dict[str, Any]: def get_eval_datasets(self) -> Dict[str, Dict[str, Any]]: """ - Get eval datasets + Get eval datasets_old_2022_10 """ return { "dev": self.get_dataset("dev", train=False, subset=3000), diff --git a/datasets/asr/librispeech/vocabs.py b/datasets_old_2022_10/asr/librispeech/vocabs.py similarity index 100% rename from datasets/asr/librispeech/vocabs.py rename to datasets_old_2022_10/asr/librispeech/vocabs.py diff --git a/datasets/asr/librispeech/vocabs/trans.bpe10k.codes b/datasets_old_2022_10/asr/librispeech/vocabs/trans.bpe10k.codes similarity index 100% rename from datasets/asr/librispeech/vocabs/trans.bpe10k.codes rename to datasets_old_2022_10/asr/librispeech/vocabs/trans.bpe10k.codes diff --git a/datasets/asr/librispeech/vocabs/trans.bpe10k.vocab b/datasets_old_2022_10/asr/librispeech/vocabs/trans.bpe10k.vocab similarity index 100% rename from datasets/asr/librispeech/vocabs/trans.bpe10k.vocab rename to datasets_old_2022_10/asr/librispeech/vocabs/trans.bpe10k.vocab diff --git a/datasets/asr/librispeech/vocabs/trans.bpe1k.codes b/datasets_old_2022_10/asr/librispeech/vocabs/trans.bpe1k.codes similarity index 100% rename from datasets/asr/librispeech/vocabs/trans.bpe1k.codes rename to datasets_old_2022_10/asr/librispeech/vocabs/trans.bpe1k.codes diff --git a/datasets/asr/librispeech/vocabs/trans.bpe1k.vocab b/datasets_old_2022_10/asr/librispeech/vocabs/trans.bpe1k.vocab similarity index 100% rename from datasets/asr/librispeech/vocabs/trans.bpe1k.vocab rename to datasets_old_2022_10/asr/librispeech/vocabs/trans.bpe1k.vocab diff --git a/datasets/asr/timit/__init__.py b/datasets_old_2022_10/asr/timit/__init__.py similarity index 100% rename from datasets/asr/timit/__init__.py rename to datasets_old_2022_10/asr/timit/__init__.py diff --git a/datasets/asr/timit/nltk.py b/datasets_old_2022_10/asr/timit/nltk.py similarity index 97% rename from datasets/asr/timit/nltk.py rename to datasets_old_2022_10/asr/timit/nltk.py index 8a80a990..90863805 100644 --- a/datasets/asr/timit/nltk.py +++ b/datasets_old_2022_10/asr/timit/nltk.py @@ -54,7 +54,7 @@ def get_train_dataset(self) -> Dict[str, Any]: def get_eval_datasets(self) -> Dict[str, Dict[str, Any]]: """ - Get eval datasets + Get eval datasets_old_2022_10 """ return { "dev": self.get_dataset("dev"), @@ -72,7 +72,7 @@ def get_main_dataset(self) -> Dict[str]: def get_dataset(self, key, subset=None): """ - Get datasets + Get datasets_old_2022_10 """ assert key in {"train", "dev"} assert not subset diff --git a/datasets/interface.py b/datasets_old_2022_10/interface.py similarity index 100% rename from datasets/interface.py rename to datasets_old_2022_10/interface.py diff --git a/nn_raw/lm.py b/nn_raw/lm.py index 0686eb82..b917d75b 100644 --- a/nn_raw/lm.py +++ b/nn_raw/lm.py @@ -3,7 +3,7 @@ Language model functions. """ -from ..datasets.interface import VocabConfig +from ..datasets_old_2022_10.interface import VocabConfig from typing import Dict, Any diff --git a/nn_raw/transducer/transducer_fullsum.py b/nn_raw/transducer/transducer_fullsum.py index 041aeff8..dde320e1 100644 --- a/nn_raw/transducer/transducer_fullsum.py +++ b/nn_raw/transducer/transducer_fullsum.py @@ -64,7 +64,7 @@ from .recomb_recog import targetb_recomb_recog from .loss import rnnt_loss, rnnt_loss_out_type from ..collect_out_str import make_out_str_func -from ...datasets.interface import TargetConfig +from ...datasets_old_2022_10.interface import TargetConfig class Context: