From 7d2355c90e087c8b75ce42f15f590cf9af110a0f Mon Sep 17 00:00:00 2001 From: Vichoko Date: Sat, 22 Aug 2020 17:11:41 -0400 Subject: [PATCH 1/2] Updated 1DPadding due deprecation of class-based PyTorch Functions. --- wavenet_model.py | 18 +++++++------ wavenet_modules.py | 64 ++++++++++++---------------------------------- 2 files changed, 27 insertions(+), 55 deletions(-) diff --git a/wavenet_model.py b/wavenet_model.py index 95301f809..77af306b7 100644 --- a/wavenet_model.py +++ b/wavenet_model.py @@ -1,8 +1,10 @@ import os import os.path import time -from wavenet_modules import * +import torch.nn.functional as F from audio_data import * +from torch import nn +from wavenet_modules import * class WaveNetModel(nn.Module): @@ -25,6 +27,7 @@ class WaveNetModel(nn.Module): - Output: :math:`()` L should be the length of the receptive field """ + def __init__(self, layers=10, blocks=4, @@ -109,9 +112,9 @@ def __init__(self, new_dilation *= 2 self.end_conv_1 = nn.Conv1d(in_channels=skip_channels, - out_channels=end_channels, - kernel_size=1, - bias=True) + out_channels=end_channels, + kernel_size=1, + bias=True) self.end_conv_2 = nn.Conv1d(in_channels=end_channels, out_channels=classes, @@ -153,7 +156,7 @@ def wavenet(self, input, dilation_func): # parametrized skip connection s = x if x.size(2) != 1: - s = dilate(x, 1, init_dilation=dilation) + s = dilate(x, 1, init_dilation=dilation) s = self.skip_convs[i](s) try: skip = skip[:, :, -s.size(2):] @@ -222,7 +225,7 @@ def generate(self, prob = prob.cpu() np_prob = prob.data.numpy() x = np.random.choice(self.classes, p=np_prob) - x = Variable(torch.LongTensor([x]))#np.array([x]) + x = Variable(torch.LongTensor([x])) # np.array([x]) else: x = torch.max(x, 0)[1].float() @@ -301,7 +304,7 @@ def generate_fast(self, input.zero_() input = input.scatter_(1, x.view(1, -1, 1), 1.).view(1, self.classes, 1) - if (i+1) == 100: + if (i + 1) == 100: toc = time.time() print("one generating step does take approximately " + str((toc - tic) * 0.01) + " seconds)") @@ -314,7 +317,6 @@ def generate_fast(self, mu_gen = mu_law_expansion(generated, self.classes) return mu_gen - def parameter_count(self): par = list(self.parameters()) s = sum([np.prod(list(d.size())) for d in par]) diff --git a/wavenet_modules.py b/wavenet_modules.py index e3eb022f1..e985f4b34 100644 --- a/wavenet_modules.py +++ b/wavenet_modules.py @@ -1,10 +1,10 @@ import math +import numpy as np import torch import torch.nn as nn import torch.nn.functional as F -from torch.nn import Parameter from torch.autograd import Variable, Function -import numpy as np +from torch.nn import Parameter def dilate(x, dilation, init_dilation=1, pad_start=True): @@ -24,7 +24,7 @@ def dilate(x, dilation, init_dilation=1, pad_start=True): new_l = int(np.ceil(l / dilation_factor) * dilation_factor) if new_l != l: l = new_l - x = constant_pad_1d(x, new_l, dimension=2, pad_start=pad_start) + x = constant_pad_1d(x, new_l, pad_start=pad_start) l_old = int(round(l / dilation_factor)) n_old = int(round(n * dilation_factor)) @@ -77,51 +77,21 @@ def reset(self): self.out_pos = 0 -class ConstantPad1d(Function): - def __init__(self, target_size, dimension=0, value=0, pad_start=False): - super(ConstantPad1d, self).__init__() - self.target_size = target_size - self.dimension = dimension - self.value = value - self.pad_start = pad_start - - def forward(self, input): - self.num_pad = self.target_size - input.size(self.dimension) - assert self.num_pad >= 0, 'target size has to be greater than input size' - - self.input_size = input.size() - - size = list(input.size()) - size[self.dimension] = self.target_size - output = input.new(*tuple(size)).fill_(self.value) - c_output = output - - # crop output - if self.pad_start: - c_output = c_output.narrow(self.dimension, self.num_pad, c_output.size(self.dimension) - self.num_pad) - else: - c_output = c_output.narrow(self.dimension, 0, c_output.size(self.dimension) - self.num_pad) - - c_output.copy_(input) - return output - - def backward(self, grad_output): - grad_input = grad_output.new(*self.input_size).zero_() - cg_output = grad_output - - # crop grad_output - if self.pad_start: - cg_output = cg_output.narrow(self.dimension, self.num_pad, cg_output.size(self.dimension) - self.num_pad) - else: - cg_output = cg_output.narrow(self.dimension, 0, cg_output.size(self.dimension) - self.num_pad) - - grad_input.copy_(cg_output) - return grad_input - - def constant_pad_1d(input, target_size, - dimension=0, value=0, pad_start=False): - return ConstantPad1d(target_size, dimension, value, pad_start)(input) + """ + Assumes that padded dim is the 2, based on pytorch specification. + Input: (N,C,Win)(N, C, W_{in})(N,C,Win​) + Output: (N,C,Wout)(N, C, W_{out})(N,C,Wout​) where + :param input: + :param target_size: + :param value: + :param pad_start: + :return: + """ + num_pad = target_size - input.size(2) + assert num_pad >= 0, 'target size has to be greater than input size' + padding = (num_pad, 0) if pad_start else (0, num_pad) + return torch.nn.ConstantPad1d(padding, value)(input) From c7fe85d56c14d268f265cf7a4d2dab6e443d05ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicente=20Oyanedel=20Mu=C3=B1oz?= Date: Sat, 22 Aug 2020 19:50:36 -0400 Subject: [PATCH 2/2] Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index e8754944f..1a35de8ed 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ # pytorch-wavenet This is an implementation of the WaveNet architecture, as described in the [original paper](https://arxiv.org/abs/1609.03499). +Updated to work on PyTorch 1.6. ## Features - Automatic creation of a dataset (training and validation/test set) from all sound files (.wav, .aiff, .mp3) in a directory @@ -9,7 +10,7 @@ This is an implementation of the WaveNet architecture, as described in the [orig ## Requirements - python 3 -- pytorch 0.3 +- pytorch 1.6 - numpy - librosa - jupyter