-
Notifications
You must be signed in to change notification settings - Fork 0
/
mybib.bib
196 lines (176 loc) · 7.13 KB
/
mybib.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
%% Created for Prasanta Ghosh on 2018-01-20
@ARTICLE{ivector,
author={N. Dehak and P. J. Kenny and R. Dehak and P. Dumouchel and P. Ouellet},
journal={IEEE Transactions on Audio, Speech, and Language Processing},
title={Front-End Factor Analysis for Speaker Verification},
year={2011},
volume={19},
number={4},
pages={788-798},
keywords={speaker recognition;support vector machines;front end factor analysis;speaker representation;low dimensional speaker;channel dependent space;variability space;support vector machine;cosine kernel;similarity estimation;decision score;channel compensation technique;within class covariance normalization;linear discriminate analysis;nuisance attribute projection;speaker recognition evaluation;speaker verification;Support vector machines;Kernel;Testing;Linear discriminant analysis;NIST;Speaker recognition;Permission;Natural languages;Speech analysis;Context modeling;Cosine distance scoring;joint factor analysis (JFA);support vector machines (SVMs);total variability space},
doi={10.1109/TASL.2010.2064307},
ISSN={1558-7916},
month={May}}
@online{hkust1,
author = {Fung, Pascale and Huang, Shudong and Graff, David},
title = {HKUST Mandarin Telephone Speech, Part 1},
year = 2005,
url = {https://catalog.ldc.upenn.edu/LDC2005S15},
}
@article{thchs30,
author = {Dong Wang and
Xuewei Zhang},
title = {{THCHS-30} : {A} Free Chinese Speech Corpus},
journal = {CoRR},
volume = {abs/1512.01882},
year = {2015},
url = {http://arxiv.org/abs/1512.01882},
archivePrefix = {arXiv},
eprint = {1512.01882},
timestamp = {Mon, 13 Aug 2018 16:46:59 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/WangZ15e},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{lfmmi,
author = {Povey, Daniel and Peddinti, Vijayaditya and Galvez, Daniel and Ghahremani, Pegah and Manohar, Vimal and Na, Xingyu and Wang, Yiming and Khudanpur, Sanjeev},
pages = {2751-2755},
title = {Purely Sequence-Trained Neural Networks for ASR Based on Lattice-Free MMI},
booktitle = {Proceedings of Interspeech},
publisher = {ISCA},
year = {2016}
}
@inproceedings{tdnn,
author = {Peddinti, Vijayaditya and Povey, Daniel and Khudanpur, Sanjeev},
title = {A time delay neural network architecture for efficient modeling of long temporal contexts},
booktitle = {Proceedings of Interspeech},
publisher = {ISCA},
year = {2015}
}
@article{do2018_2,
author = {Zhang, Y and Zhang, P and Yan, Y},
title = {Data Augmentation for Language Models via Adversarial Training},
journal = {ACTA AUTOMATICA SINICA},
volume = {44.5},
pages = {891-900},
year = {2018}
}
@article{do2018_1,
author = {Jie Li and
Xiaorui Wang and
Yuanyuan Zhao and
Yan Li},
title = {Gated Recurrent Unit Based Acoustic Modeling with Future Context},
journal = {CoRR},
volume = {abs/1805.07024},
year = {2018},
url = {http://arxiv.org/abs/1805.07024},
archivePrefix = {arXiv},
eprint = {1805.07024},
timestamp = {Tue, 05 Jun 2018 18:50:11 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1805-07024},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{imagenet,
AUTHOR = {Deng, J. and Dong, W. and Socher, R. and Li, L.-J. and Li, K. and Fei-Fei, L.},
TITLE = {{ImageNet: A Large-Scale Hierarchical Image Database}},
BOOKTITLE = {CVPR09},
YEAR = {2009}
}
@article{coco,
author = {Tsung{-}Yi Lin and
Michael Maire and
Serge J. Belongie and
Lubomir D. Bourdev and
Ross B. Girshick and
James Hays and
Pietro Perona and
Deva Ramanan and
Piotr Doll{\'{a}}r and
C. Lawrence Zitnick},
title = {{Microsoft COCO: Common Objects in Context}},
journal = {CoRR},
volume = {abs/1405.0312},
year = {2014}
}
@inproceedings{aishell1,
title={{AIShell-1: An Open-Source Mandarin Speech Corpus and A Speech Recognition Baseline}},
author={Hui Bu and Jiayu Du and Xingyu Na and Bengu Wu and Hao Zheng},
booktitle={Oriental COCOSDA 2017},
pages={Submitted},
year={2017}
}
@INPROCEEDINGS{kaldi,
author = {Povey, Daniel and Ghoshal, Arnab and Boulianne, Gilles and Burget, Lukas and Glembek, Ondrej and Goel, Nagendra and Hannemann, Mirko and Motlicek, Petr and Qian, Yanmin and Schwarz, Petr and Silovsky, Jan and Stemmer, Georg and Vesely, Karel},
title = {{The Kaldi Speech Recognition Toolkit}},
booktitle = {IEEE 2011 Workshop on Automatic Speech Recognition and Understanding},
year = {2011}
}
@ARTICLE{do2017,
author={V. H. Do and N. F. Chen and B. P. Lim and M. A. Hasegawa-Johnson},
journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing},
title={{Multitask Learning for Phone Recognition of Underresourced Languages Using Mismatched Transcription}},
year={2018},
volume={26},
number={3},
pages={501-514},
month={March}
}
@book{pinyin,
title={{The phonology of standard Chinese}},
author={Duanmu, San},
year={2007},
publisher={Oxford University Press}
}
@article{jieba,
title={{Jieba Chinese word segmentation tool}},
author={Sun, J},
year={2012}
}
@inproceedings{tomko,
title={{A study on data augmentation of reverberant speech for robust speech recognition}},
author={Ko, Tom and Peddinti, Vijayaditya and Povey, Daniel and Seltzer, Michael L and Khudanpur, Sanjeev},
booktitle={Acoustics, Speech and Signal Processing (ICASSP), 2017 IEEE International Conference on},
pages={5220--5224},
year={2017},
organization={IEEE}
}
@inproceedings{relu,
title={{Improving deep neural networks for LVCSR using rectified linear units and dropout}},
author={Dahl, George E and Sainath, Tara N and Hinton, Geoffrey E},
booktitle={Acoustics, Speech and Signal Processing (ICASSP), 2013 IEEE International Conference on},
pages={8609--8613},
year={2013},
organization={IEEE}
}
@inproceedings{lstm,
title={Speech recognition with deep recurrent neural networks},
author={Graves, Alex and Mohamed, Abdel-rahman and Hinton, Geoffrey},
booktitle={Acoustics, speech and signal processing (icassp), 2013 ieee international conference on},
pages={6645--6649},
year={2013},
organization={IEEE}
}
@inproceedings{lstmp,
title={Long short-term memory recurrent neural network architectures for large scale acoustic modeling},
author={Sak, Ha{\c{s}}im and Senior, Andrew and Beaufays, Fran{\c{c}}oise},
booktitle={Fifteenth annual conference of the international speech communication association},
year={2014}
}
@inproceedings{adapt,
title={An investigation of deep neural networks for noise robust speech recognition},
author={Seltzer, Michael L and Yu, Dong and Wang, Yongqiang},
booktitle={Acoustics, Speech and Signal Processing (ICASSP), 2013 IEEE International Conference on},
pages={7398--7402},
year={2013},
organization={IEEE}
}
@article{kn,
title={An empirical study of smoothing techniques for language modeling},
author={Chen, Stanley F and Goodman, Joshua},
journal={Computer Speech \& Language},
volume={13},
number={4},
pages={359--394},
year={1999},
publisher={Elsevier}
}