-
Notifications
You must be signed in to change notification settings - Fork 4
/
preprocess.py
59 lines (46 loc) · 1.92 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
""" preprocess.py
Preprocesses raw wavs and texts in a given experiment folder.
"""
import os
import sys
import argparse
import importlib
import logging
import tacorn.fileutils as fu
import tacorn.constants as consts
import tacorn.experiment as experiment
import tacorn.wrappers as wrappers
logging.basicConfig(level=logging.DEBUG,
format='%(asctime)s %(levelname)s %(message)s')
logger = logging.getLogger(__name__)
def _get_raw(exp: experiment.Experiment, args):
""" Retrieves raw data, performing potential pre-preprocessing before
feeding into the preprocessing of the feature prediction model. """
# for now just check text file format
return
def preprocess_acoustic_model(exp: experiment.Experiment, args):
""" Preprocesses data given in args using the experiment
stored in exp. """
#_get_raw(exp, args)
logger.info("Loading feature model wrapper %s for preprocessing" %
(exp.config["acoustic_model"]))
wrappers.load(exp.config["acoustic_model"]).preprocess(exp, vars(args))
logger.info("Preprocessing done")
def main():
""" main function for preprocessing data. """
parser = argparse.ArgumentParser()
parser.add_argument('experiment_dir',
help='Experiment directory.')
parser.add_argument('--wav_dir', default=None,
help='Folder containing wavefiles, if no given the corpus is assumed to already be in experiment/raw')
parser.add_argument('--text_file', default=None,
help='Text file containing transcriptions, if not given the corpus is assumed to already be in experiment/raw')
args = parser.parse_args()
try:
exp = experiment.load(args.experiment_dir)
except Exception:
print("Invalid experiment folder given: %s" % (args.experiment_dir))
sys.exit(1)
preprocess_acoustic_model(exp, args)
if __name__ == '__main__':
main()