forked from keithito/tacotron
-
Notifications
You must be signed in to change notification settings - Fork 103
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add support for CSS10 datasets and improved docker image to reuse pip…
… install
- Loading branch information
1 parent
00b92b5
commit dba5582
Showing
5 changed files
with
74 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
.git/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,12 @@ | ||
FROM tensorflow/tensorflow:1.8.0-py3 | ||
|
||
RUN mkdir /root/mimic2 | ||
COPY . /root/mimic2 | ||
WORKDIR /root/mimic2 | ||
|
||
COPY requirements.txt /root/mimic2/requirements.txt | ||
RUN pip install --no-cache-dir -r requirements.txt | ||
RUN apt update && apt install -y ffmpeg | ||
|
||
COPY . /root/mimic2 | ||
|
||
ENTRYPOINT [ "/bin/bash" ] | ||
ENTRYPOINT [ "/bin/bash" ] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
from concurrent.futures import ProcessPoolExecutor | ||
from functools import partial | ||
import numpy as np | ||
import os | ||
|
||
from util import audio | ||
|
||
|
||
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x): | ||
'''Preprocesses the css10 dataset from a given input path into a given output directory.''' | ||
executor = ProcessPoolExecutor(max_workers=num_workers) | ||
futures = [] | ||
|
||
# Read the transcript file | ||
with open(os.path.join(in_dir, 'transcript.txt'), encoding='utf-8') as f: | ||
for line in f: | ||
parts = line.strip().split('|') | ||
path = os.path.join(in_dir, parts[0]) | ||
text = parts[1] | ||
futures.append(executor.submit(partial(_process_utterance, out_dir, parts[0].split('/')[1], path, text))) | ||
|
||
return [future.result() for future in tqdm(futures)] | ||
|
||
|
||
def _process_utterance(out_dir, prompt_id, wav_path, text): | ||
# Load the audio to a numpy array: | ||
wav = audio.load_wav(wav_path) | ||
|
||
# Compute the linear-scale spectrogram from the wav: | ||
spectrogram = audio.spectrogram(wav).astype(np.float32) | ||
|
||
# Compute a mel-scale spectrogram from the wav: | ||
mel_spectrogram = audio.melspectrogram(wav).astype(np.float32) | ||
|
||
# Write the spectrograms to disk: | ||
spectrogram_filename = 'css10-spec-%s.npy' % prompt_id | ||
mel_filename = 'css10css10-mel-%s.npy' % prompt_id | ||
np.save(os.path.join(out_dir, spectrogram_filename), spectrogram.T, allow_pickle=False) | ||
np.save(os.path.join(out_dir, mel_filename), mel_spectrogram.T, allow_pickle=False) | ||
|
||
# Return a tuple describing this training example: | ||
n_frames = spectrogram.shape[1] | ||
return (spectrogram_filename, mel_filename, n_frames, text) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters