-
Notifications
You must be signed in to change notification settings - Fork 33
/
SRMR_CI.m
150 lines (124 loc) · 4.38 KB
/
SRMR_CI.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
function [ratio, energy] = SRMR_CI(s,varargin)
%SRMR Computes the SRMR-CI score of the given signal.
% [ratio, energy] = SRMR_CI(s, fs, 'norm', 0, 'minCF', 4, 'maxCF', 128)
%
% s: either the path to a WAV file or an array containing a single-channel speech sentence.
% fs: sampling rate of the data in `s`. If `s` is the path to a WAV file, this parameter
% has to be omitted.
% 'norm', N: flag to activate (`N = 1`)/deactivate (`N = 0`) the normalization step in the
% modulation spectrum representation, used for variability reduction. The default is `'norm', 0`.
% 'minCF', cf1: value of the center frequency of the first filter in the modulation filterbank.
% The default value is 4 Hz.
% 'maxCF', cf8: value of the center frequency of the first filter in the modulation filterbank.
% The default value is 128 Hz if the normalization is off and 30 Hz if normalization is on.
%
% ratio: the SRMR-CI score.
% energy: a 3D matrix with the per-frame modulation spectrum extracted from the input.
% Load (if needed) and preprocess file
if ischar(s)
fs = [];
args = {varargin{:}};
else
if isempty(varargin)
error('Second argument must be the sampling rate if input is a vector');
else
if ~isnumeric(varargin{1})
error('Second argument must be the sampling rate if input is a vector');
else
fs = varargin{1};
end
end
args = {varargin{2:end}};
end
[s, fs] = preprocess(s, fs);
if fs ~= 8000 && fs ~= 16000
warning('fs = %d, but SRMR has only been tested for fs = 8/16 kHz.', fs);
end
fast = 0;
norm = 0;
% Parameter parsing
for i = 1 : 2 : length(args)
name = args{i};
value = args{i+1};
switch name
case 'fast'
error('SRMR-CI does not have a fast implementation yet.');
case 'norm'
norm = value;
case 'minCF'
minCF = value;
case 'maxCF'
maxCF = value;
otherwise
error('Wrong parameter in parameter list');
end
end
%% Fixed parameters:
% Cochlear filterbank
nCochlearFilters = 22;
lowFreq = 150;
% Modulation filterbank
nModFilters = 8;
if ~exist('minCF','var')
minCF = 4;
end
if ~exist('maxCF', 'var')
if norm == 1
maxCF = 30;
else
maxCF = 64;
end
end
wLengthS = 0.256; % Window length in seconds.
wIncS = 0.064; % Window increment in seconds;
wLength = ceil(wLengthS*fs); % Window length in samples
wInc = ceil(wIncS*fs); % window increment in samples
%% Cochlear filterbank/envelope computation
% Pass the signal through the cochlear filter bank to produce the cochlear
% outputs at each critical band.
% The dimensions will be: cochlearOutputs(nCochlearFilters, nSamples), with
% the last being lowest frequency, and the first being highest frequency.
[cochlearOutputs, bandwidths] = CIFilterBank(fs, nCochlearFilters, lowFreq, s);
% Compute the temporal envelope for each critical band. The dimensions will
% be: temporalEnvelopes(nCochlearFilters, nSamples)
tempEnv = abs(hilbert(cochlearOutputs'))';
%% Modulation spectrum
modFilterCFs = computeModulationCFs(minCF, maxCF, nModFilters);
w = hamming(wLength);
for k=1:nCochlearFilters
modulationOutput = modulationFilterBank(tempEnv(k,:), modFilterCFs, fs, 2);
for m=1:nModFilters
% Window frames with Hamming window
modOutFrame = buffer(modulationOutput(m,:), wLength, (wLength-wInc));
energy(k,m,:) = sum((w(:,ones(1,size(modOutFrame,2))).*modOutFrame).^ 2);
end
end
%% Modulation energy thresholding
if norm
peak_energy = max(max(mean(energy)));
min_energy = peak_energy*0.001;
energy(energy < min_energy) = min_energy;
energy(energy > peak_energy) = peak_energy;
end
%% Computation of K*
cochFilt_BW = flipud(bandwidths);
avg_energy = mean(energy,3);
total_energy = sum(sum(avg_energy));
AC_energy = sum(avg_energy,2);
AC_perc = AC_energy*100./total_energy;
AC_perc_cumsum=cumsum(flipud(AC_perc));
K90perc = find(AC_perc_cumsum>90);
BW = cochFilt_BW(K90perc(1));
cutoffs = calc_cutoffs(modFilterCFs, fs, 2);
if (BW >= cutoffs(5)) && (BW < cutoffs(6))
Kstar=5;
elseif (BW >= cutoffs(6)) && (BW < cutoffs(7))
Kstar=6;
elseif (BW >= cutoffs(7)) && (BW < cutoffs(8))
Kstar=7;
elseif (BW >= cutoffs(8))
Kstar=8;
end
%% Modulation energy ratio
ratio = sum(sum(avg_energy(:,1:4)))/sum(sum(avg_energy(:,5:Kstar)));
end