-
Notifications
You must be signed in to change notification settings - Fork 4
/
SLAM.py
272 lines (218 loc) · 9.61 KB
/
SLAM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
# SLAM : a method for the automatic Stylization and LAbelling of speech Melody
# Copyright (C) 2014 Julie BELIAO
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# -*- coding: utf-8 -*-
"""
#####################################################################
Automatic Stylizer.
#####################################################################
Takes a wavefile and a textgrid file as an input and compute the
styles of all the intervals of a desired tier with the SLAM algorithm.
PARAMETERS:
I/O:
---
* srcFile : path to the wave or PitchTier file to process
* inputTextgridFile : path to the input TextGrid file
* outputTextgridFile : path to the output TextGrid file
tiers of interest:
------------------
* speakerTier : average register of each speaker is computed
using this tier. For each different label in
this tier, we assume a different speaker, for
whom the average register is computed.
* targetTier : The tier whose intervals will be stylized using
SLAM
display:
-------
* displayExamples : True or False: whether or not to display examples
of stylized f0 segments
* displaySummary : True or False: whether or not to display a small
summary of the distribution of the stylizes
#####################################################################"""
#essai
timeStep = .001 #in seconds, step for swipe pitch analysis
voicedThreshold = 0.2 #for swipe
#Tiers for the speaker and the target intervals, put your own tier names
speakerTier= 'locuteur'
targetTier = 'syll'
#display
examplesDisplayCount = 3 #number of example plots to do. Possibly 0
minLengthDisplay = 30 #min number of f0 points for an interval to be displayed
#END OF PARAMETERS (don't touch below please)
#------------------------------------------------------
#imports
from SLAM_utils import TextGrid, swipe, stylize, praatUtil
import sys, glob, os, re
import numpy as np
change = stylize.input_SLAM("""
Current parameters are:
tier to use for categorizing registers : %s
tier to stylize : %s
Number of examples to display : %d
ENTER = ok
anything+ENTER = change
"""%(speakerTier, targetTier,examplesDisplayCount))
print(change)
if len(change):
new = stylize.input_SLAM('reference tier (empty = keep %s) : '%speakerTier)
if len(new):speakerTier=new
new = stylize.input_SLAM('target tier (empty = keep %s) : '%targetTier)
if len(new):targetTier=new
new = stylize.input_SLAM('number of displays (empty = keep %d) : '%examplesDisplayCount)
if len(new):examplesDisplayCount=int(new)
#all styles, for statistics
styles = []
totalN=0
#seperate input files into tgFiles and srcFiles
tmpFiles = glob.glob('./data/*.*')
tgFiles = []
srcFiles = []
while tmpFiles:
filename = tmpFiles.pop(0)
if re.search(r'\.TEXTGRID$', filename, re.IGNORECASE):
tgFiles.append(filename)
else:
srcFiles.append(filename)
while tgFiles:
#take a tg file from tgFiles and its related src file(s) from SrcFiles
inputTextgridFile = tgFiles.pop(0)
basename = stylize.get_basename(inputTextgridFile)
extension = stylize.get_extension(inputTextgridFile)
outputTextgridFile = './output/{}{}'.format(basename, extension)
srcFile = \
[filename for filename in srcFiles \
if stylize.get_basename(filename).lower() == basename.lower()]
for filename in srcFile: srcFiles.remove(filename)
#Create TextGrid object
print('')
print('Handling %s....'%basename)
print('Loading input TextGrid...')
tg = TextGrid.TextGrid()
tg.read(inputTextgridFile)
tierNames = [t.name() for t in tg]
while targetTier not in tierNames:
print(' TextGrid does not have a tier named %s. Available tiers are:'%targetTier)
for t in tierNames: print(' %s'%t)
targetTier=stylize.input_SLAM('Type the tier name to use (+ENTER):')
while speakerTier not in tierNames and speakerTier:
print(' TextGrid does not have a tier named %s. Available tiers are:'%speakerTier)
for t in tierNames: print(' %s'%t)
speakerTier=stylize.input_SLAM('Type the tier name indicating speaker (or any categorizing variable):')
#create interval tier for output
newTier = TextGrid.IntervalTier(name = '%sStyle'%targetTier,
xmin = tg[targetTier].xmin(), xmax=tg[targetTier].xmax())
#Create swipe object from wave file or external PitchTier file
sf = None
#try as PitchTier files (supported formats: short text and binary)
if not sf:
for file in srcFile:
try: sf = stylize.readPitchtier(file)
except: sf = None;continue
print('Reading pitch from PitchTier file {}'.format(file)); break
# try as wave files
if not sf:
for file in srcFile:
#check the header of WAVE
if not praatUtil.isGoodMonoWav(file): continue
try: sf = swipe.Swipe(file, pMin=75, pMax=500, s=timeStep, t=voicedThreshold, mel=False)
except:sf = None;continue
print('Computing pitch on wave file {}'.format(file)); break
# unknown format
if not sf:
print('Error: source files {} are not supported !'.format(srcFile))
continue
print('Computing average register for each speaker')
registers = stylize.averageRegisters(sf, tg[speakerTier])
print('Stylizing each interval of the target tier')
#computing at which iterations to give progress
LEN = float(len(tg[targetTier]))
totalN+=LEN
POSdisplay = set([int(float(i)/100.0*LEN) for i in range(0,100,10)])
for pos,interval in enumerate(tg[targetTier]):
if pos in POSdisplay:
print('stylizing: %d percents'%(pos/LEN*100.0))
#compute style of current interval
(style,original, smooth)=stylize.stylizeObject(interval,sf,tg[speakerTier],registers)
#if style computed, adding it to global list
if len(style) and (style!='_') :styles+=[style]
#then add an interval with that style to the (new) style tier
newInterval = TextGrid.Interval(interval.xmin(), interval.xmax(), style)
newTier.append(newInterval)
#display if interval is sufficiently large
if (examplesDisplayCount>0) and len(style) and len(original)>=minLengthDisplay:
import pylab as pl
pl.figure(1)
pl.clf()
pl.plot(np.linspace(0,1,len(original)),original,'b')
try:
pl.hold(True)
except:
pass
pl.plot(np.linspace(0,1,len(smooth)),smooth,'r')
pl.title(style)
pl.grid(True)
pl.show()
examplesDisplayCount-=1
#done, now writing tier into textgrid and saving textgrid
print('Saving computed styles in file %s'%outputTextgridFile)
tg.append(newTier)
tg.write(outputTextgridFile)
#Now output statistics
#---------------------
count = {}
for unique_style in set(styles):
if not len(unique_style):continue
count[unique_style] = styles.count(unique_style)
#valeurs triees par importance decroissante
unsorted_values = np.array(list(count.values()))
nbStylesRaw = len(unsorted_values)
total = float(sum(unsorted_values))
#remove styles that appear less than 0.5 percents of the time
for style in list(count.keys()):
if count[style]/total < 0.005: del count[style]
unsorted_values = np.array(list(count.values()))
stylesNames = list(count.keys())
argsort = np.argsort(unsorted_values)[::-1] # from most to less important
sorted_values = unsorted_values[argsort]
total = float(sum(unsorted_values))
L = min(len(list(count.keys())),20)
print("""
------------------------------------------------------------------
SLAM analysis overall summary:
------------------------------------------------------------------
- %d intervals to stylize.
- %d intervals with a non empty style (others are unvoiced)
- %d resulting styles appearing in total
- %d resulting nonnegligible styles (appearing more than 0.5%% of the time)
------------------------------------------------------------------
- The %d most important nonnegligible styles along with their frequency are:"""%(
totalN,
len(styles),
len(set(styles)),
len(count),
L))
styleNames=sorted(count,key=count.get)
styleNames.reverse()
for styleName in styleNames[:L]:
print('\t%s\t:\t:%0.1f%% (%d occurrences)'%(styleName,count[styleName]/total*100.0,count[styleName]))
print('''
x------------------------------------------x---------------------x
| explained proportion of the observations | number of styles |
| (percents) | |
x------------------------------------------x---------------------x''')
cumulative_values = np.cumsum(sorted_values)
cumulative_values = cumulative_values/float(cumulative_values[-1])
for P in [70, 75, 80, 85, 90, 95, 99]:
N = np.nonzero(cumulative_values>float(P)/100.0)[0][0]+1
print('| %d | %d |'%(P,N))
print('x------------------------------------------x---------------------x')