dl4m.bib

@inproceedings{Bharucha1988,
  author = {Bharucha, J.},
  booktitle = {Proceedings of the First Workshop on Artificial Intelligence and Music},
  pages = {173--182},
  title = {Neural net modeling of music},
  year = {1988}
}

@inproceedings{Lewis1988,
  abstract = {The author describes a paradigm for creating novel examples from the class of patterns recognized by a trained gradient-descent associative learning network. The paradigm consists of a learning phase, in which the network learns to identify patterns of the desired class, followed by a simple synthesis algorithm, in which a haphazard 'creation' is refined by a gradient-descent search complementary to the one used in learning. This paradigm is an alternative to one in which novel patterns are obtained by applying novel inputs to a learned mapping, and can be used for creative problems, such as music composition, which are not described by an input-output mapping. A simple simulation is shown in which a back-propagation network learns to judge simple patterns representing musical motifs, and then creates similar motifs.<>},
  author = {Lewis, J. P.},
  booktitle = {IEEE_ICNN},
  doi = {10.1109/ICNN.1988.23933},
  link = {http://ieeexplore.ieee.org/stamp/stamp.jsp?arnumber=23933},
  month = {Jul.},
  pages = {229-233},
  title = {Creation by refinement: A creativity paradigm for gradient descent learning networks},
  volume = {2},
  year = {1988}
}

@inproceedings{Todd1988,
  author = {Todd, Peter M.},
  booktitle = {Connectionist Models Summer School},
  pages = {76--84},
  task = {Composition},
  title = {A sequential network design for musical applications},
  year = {1988}
}

@article{Laden1989,
  author = {Laden, Bernice and Keefe, Douglas H.},
  issn = {01489267, 15315169},
  journal = {[Computer Music Journal](http://computermusicjournal.org/)},
  link = {http://www.jstor.org/stable/3679550},
  number = {4},
  pages = {12-26},
  publisher = {The MIT Press},
  task = {Chord recognition},
  title = {The representation of pitch in a neural net model of chord classification},
  volume = {13},
  year = {1989}
}

@inproceedings{Lewis1989,
  author = {Lewis, J. P.},
  booktitle = {ICMC},
  link = {https://quod.lib.umich.edu/cgi/p/pod/dod-idx/algorithms-for-music-composition.pdf?c=icmc;idno=bbp2372.1989.044;format=pdf},
  publisher = {Ann Arbor, MI: Michigan Publishing, University of Michigan Library},
  task = {Composition},
  title = {Algorithms for music composition by neural nets: Improved CBR paradigms},
  year = {1989}
}

@article{Todd1989,
  author = {Todd, Peter M.},
  issn = {01489267, 15315169},
  journal = {[Computer Music Journal](http://computermusicjournal.org/)},
  link = {http://www.jstor.org/stable/3679551},
  number = {4},
  pages = {27-43},
  publisher = {The MIT Press},
  task = {Composition},
  title = {A connectionist approach to algorithmic composition},
  volume = {13},
  year = {1989}
}

@article{Mozer1999,
  author = {Mozer, Michael C.},
  journal = {[Connection Science](http://www.tandfonline.com/toc/ccos20/current)},
  link = {http://www-labs.iro.umontreal.ca/~pift6080/H09/documents/papers/mozer-music.pdf},
  number = {2-3},
  pages = {247--280},
  publisher = {Taylor \& Francis},
  task = {Composition},
  title = {Neural network music composition by prediction: Exploring the benefits of psychoacoustic constraints and multi-scale processing},
  volume = {6},
  year = {1994}
}

@inproceedings{Kaminsky1995,
  activation = {Sigmoid},
  address = {Perth, WA, Australia, Australia},
  architecture = {No},
  author = {Kaminsky, I. and Materka, Andrzej},
  batch = {No},
  booktitle = {IEEE_ICNN},
  code = {No},
  computationtime = {No},
  dataaugmentation = {No},
  dataset = {Inhouse},
  dimension = {1D},
  doi = {10.1109/ICNN.1995.488091},
  dropout = {No},
  epochs = {No},
  framework = {No},
  gpu = {No},
  input = {Raw audio},
  layers = {1},
  learningrate = {0.25},
  link = {https://www.researchgate.net/publication/3622871_Automatic_source_identification_of_monophonic_musical_instrument_sounds},
  loss = {No},
  metric = {No},
  momentum = {0.15},
  month = {Nov.},
  note = {https://ieeexplore.ieee.org/document/488091},
  optimizer = {No},
  pages = {189-194 vol.1},
  reproducible = {No},
  task = {Instrument recognition},
  title = {Automatic source identification of monophonic musical instrument sounds},
  year = {1995}
}

@inproceedings{Matityaho1995,
  address = {Israel},
  author = {Matityaho, Benyamin and Furst, Miriam},
  booktitle = {Convention of Electrical and Electronics Engineers},
  link = {http://ieeexplore.ieee.org/abstract/document/514161/},
  organization = {IEEE},
  pages = {4--3},
  task = {MGR},
  title = {Neural network based model for classification of music type},
  year = {1995}
}

@inproceedings{Dannenberg1997,
  author = {Dannenberg, Roger B and Thom, Belinda and Watson, David},
  booktitle = {ICMC},
  link = {http://repository.cmu.edu/cgi/viewcontent.cgi?article=1496&context=compsci},
  publisher = {University of Michigan},
  task = {MSR},
  title = {A machine learning approach to musical style recognition},
  year = {1997}
}

@inproceedings{Soltau1998,
  activation = {No},
  address = {Seattle, Washington, USA},
  architecture = {DNN},
  author = {Soltau, Hagen and Schultz, Tanja and Westphal, Martin and Waibel, Alex},
  batch = {No},
  booktitle = {ICASSP},
  code = {No},
  computationtime = {No},
  dataaugmentation = {No},
  dataset = {Inhouse},
  dimension = {2D},
  dropout = {No},
  epochs = {No},
  framework = {No},
  gpu = {No},
  input = {10x5 cepstral coefficients},
  layers = {3},
  learningrate = {No},
  link = {https://www.ri.cmu.edu/pub_files/pub1/soltau_hagen_1998_2/soltau_hagen_1998_2.pdf},
  loss = {No},
  metric = {No},
  momentum = {No},
  month = {May},
  note = {10 units in hidden layer},
  optimizer = {No},
  organization = {IEEE},
  pages = {1137--1140},
  reproducible = {No},
  task = {MGR},
  title = {Recognition of music types},
  volume = {2},
  year = {1998}
}

@book{Griffith1999,
  author = {Griffith, Niall and Todd, Peter M.},
  link = {https://s3.amazonaws.com/academia.edu.documents/3551783/10.1.1.39.6248.pdf?AWSAccessKeyId=AKIAIWOWYYGZ2Y53UL3A&Expires=1507055806&Signature=5mGzQc7bvJgUZYfXOmCX8eeNQOs%3D&response-content-disposition=inline%3B%20filename%3DMusical_networks_Parallel_distributed_pe.pdf},
  publisher = {MIT Press},
  title = {Musical networks: Parallel distributed perception and performance},
  year = {1999}
}

@inproceedings{Franklin2001,
  architecture = {RNN},
  author = {Franklin, Judy A},
  booktitle = {Biennial Symposium for Arts and Technology},
  link = {http://www.cs.smith.edu/~jfrankli/papers/CtColl01.pdf},
  task = {Composition},
  title = {Multi-phase learning for jazz improvisation and interaction},
  year = {2001}
}

@inproceedings{Buzzanca2002,
  author = {Buzzanca, Giuseppe},
  booktitle = {Music and Artificial Intelligence. Additional Proceedings of the Second International Conference, ICMAI},
  link = {https://www.researchgate.net/profile/Giuseppe_Buzzanca/publication/228588086_A_supervised_learning_approach_to_musical_style_recognition/links/54b43ee90cf26833efd0109f.pdf},
  pages = {167},
  task = {MGR},
  title = {A supervised learning approach to musical style recognition},
  volume = {2002},
  year = {2002}
}

@inproceedings{Eck2002,
  activation = {Logistic Sigmoid},
  address = {Martigny, Valais, Switzerland},
  architecture = {RNN-LSTM},
  author = {Eck, Douglas and Schmidhuber, Juergen},
  batch = {No},
  booktitle = {[NNSP](http://cogsys.imm.dtu.dk/nnsp2002/)},
  code = {No},
  computationtime = {15-45 min 1Ghz Pentium},
  dataaugmentation = {No},
  dataset = {Inhouse},
  dimension = {1D},
  dropout = {No},
  epochs = {No},
  framework = {No},
  gpu = {No},
  input = {Midi Chords & Midi notes},
  layers = {1},
  learningrate = {0.00001},
  link = {http://www-perso.iro.umontreal.ca/~eckdoug/papers/2002_ieee.pdf},
  loss = {cross-entropy},
  metric = {cross-entropy},
  momentum = {0.9},
  month = {Sep.},
  note = {},
  optimizer = {SGD},
  organization = {IEEE},
  pages = {747--756},
  reproducible = {No},
  task = {Composition},
  title = {Finding temporal structure in music: Blues improvisation with LSTM recurrent networks},
  year = {2002}
}

@unpublished{Marolt2002,
  activation = {No},
  address = {Gothenburg, Sweden},
  architecture = {MLP},
  author = {Marolt, Matija and Kavcic, Alenka and Privosnik, Marko},
  batch = {No},
  code = {No},
  computationtime = {No},
  dataaugmentation = {No},
  dataset = {Inhouse},
  dimension = {1D},
  dropout = {No},
  epochs = {No},
  framework = {No},
  gpu = {No},
  input = {Raw audio signal and synthesized},
  layers = {1},
  learningrate = {No},
  link = {https://www.researchgate.net/profile/Matija_Marolt/publication/2473938_Neural_Networks_for_Note_Onset_Detection_in_Piano_Music/links/00b49525efccc79fed000000.pdf},
  loss = {No},
  metric = {No},
  month = {Sep.},
  note = {One should take care when citing this article as it is referenced to be published in ICMC 2002 (cf https://scholar.google.fr/scholar?hl=fr&as_sdt=0%2C5&q=Neural+Networks+for+Note+Onset+Detection+in+Piano+Music&btnG=) but it is not in the proceedings of this conference (cf http://dblp.uni-trier.de/db/conf/icmc/icmc2002). If you have more info please get in touch.},
  optimizer = {No},
  pages = {1--4},
  reproducible = {No},
  task = {Onset detection},
  title = {Neural networks for note onset detection in piano music},
  year = {2002}
}

@inproceedings{Nava2004,
  author = {Nava, Gabriel Pablo and Tanaka, Hidehiko and Ide, Ichiro},
  booktitle = {ISMA},
  link = {http://www.murase.nuie.nagoya-u.ac.jp/~ide/res/paper/E04-conference-pablo-1.pdf},
  pages = {289--292},
  task = {Onset detection},
  title = {A convolutional-kernel based approach for note onset detection in piano-solo audio signals},
  year = {2004}
}

@inproceedings{Lee2009,
  architecture = {CDBN},
  author = {Lee, Honglak and Pham, Peter and Largman, Yan and Ng, Andrew Y},
  booktitle = {[NIPS](https://nips.cc/)},
  dataset = {[TIMIT](https://catalog.ldc.upenn.edu/LDC93S1)},
  link = {http://papers.nips.cc/paper/3674-unsupervised-feature-learning-for-audio-classification-using-convolutional-deep-belief-networks.pdf},
  pages = {1096--1104},
  task = {Speaker gender recognition},
  title = {Unsupervised feature learning for audio classification using convolutional deep belief networks},
  year = {2009}
}

@phdthesis{Li2010a,
  author = {Li, Lihua},
  dataset = {[GTzan](http://marsyas.info/downloads/datasets.html)},
  input = {MFCC},
  link = {http://lbms03.cityu.edu.hk/theses/c_ftt/mphil-cs-b39478026f.pdf},
  publisher = {City University of Hong Kong},
  title = {Audio musical genre classification using convolutional neural networks and pitch and tempo transformations},
  year = {2010}
}

@inproceedings{Li2010b,
  author = {Li, Tom LH and Chan, Antoni B and Chun, A},
  booktitle = {Int. Conf. Data Mining and Applications},
  dataset = {[GTzan](http://marsyas.info/downloads/datasets.html)},
  input = {MFCC},
  link = {https://www.researchgate.net/profile/Antoni_Chan2/publication/44260643_Automatic_Musical_Pattern_Feature_Extraction_Using_Convolutional_Neural_Network/links/02e7e523dac6bb86b0000000.pdf},
  task = {MGR},
  title = {Automatic musical pattern feature extraction using convolutional neural network},
  year = {2010}
}

@inproceedings{Dieleman2011,
  activation = {Custom},
  architecture = {CNN & MLP},
  author = {Dieleman, Sander and Brakel, Philémon and Schrauwen, Benjamin},
  batch = {No},
  booktitle = {ISMIR},
  code = {No},
  dataaugmentation = {No},
  dataset = {[MSD](https://labrosa.ee.columbia.edu/millionsong/)},
  dropout = {0.3},
  epochs = {1},
  framework = {Theano},
  gpu = {No},
  learningrate = {0.005 & 0.0001},
  link = {http://www.ismir2011.ismir.net/papers/PS6-3.pdf},
  optimizer = {No},
  pages = {669--674},
  reproducible = {No},
  task = {MGR & Artist recognition},
  title = {Audio-based music classification with a pretrained convolutional network},
  year = {2011}
}

@inproceedings{Humphrey2012b,
  architecture = {CNN},
  author = {Humphrey, Eric J. and Bello, Juan Pablo},
  booktitle = {ICMLA},
  dataset = {[Beatles](http://isophonics.net/content/reference-annotations-beatles) & [RWC](https://staff.aist.go.jp/m.goto/RWC-MDB/) & [US Pop](https://labrosa.ee.columbia.edu/projects/musicsim/uspop2002.html)},
  link = {http://ieeexplore.ieee.org/abstract/document/6406762/},
  loss = {Cross-entropy},
  organization = {IEEE},
  pages = {357--362},
  task = {Chord recognition},
  title = {Rethinking automatic chord recognition with convolutional neural networks},
  volume = {2},
  year = {2012}
}

@inproceedings{Humphrey2012a,
  author = {Humphrey, Eric J. and Bello, Juan Pablo and LeCun, Yann},
  booktitle = {ISMIR},
  link = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.294.2304&rep=rep1&type=pdf},
  pages = {403--408},
  title = {Moving beyond feature design: Deep architectures and automatic feature learning in music informatics},
  year = {2012}
}

@inproceedings{Nakashika2012,
  author = {Nakashika, Toru and Garcia, Christophe and Takiguchi, Tetsuya and De Lyon, Insa},
  booktitle = {INTERSPEECH},
  dataset = {[GTzan](http://marsyas.info/downloads/datasets.html)},
  input = {GLCM},
  link = {http://liris.cnrs.fr/Documents/Liris-5602.pdf},
  task = {MGR},
  title = {Local-feature-map integration using convolutional neural networks for music genre classification},
  year = {2012}
}

@inproceedings{Nam2012,
  author = {Nam, Juhan and Herrera, Jorge and Slaney, Malcolm and Smith, Julius O},
  booktitle = {ISMIR},
  link = {https://pdfs.semanticscholar.org/099d/85f25e9336f48ff64287a4b53ee5fb64ab51.pdf},
  pages = {565--570},
  title = {Learning sparse feature representations for music annotation and retrieval},
  year = {2012}
}

@inproceedings{Wulfing2012,
  author = {Wülfing, Jan and Riedmiller, Martin},
  booktitle = {ISMIR},
  dataset = {[GTzan](http://marsyas.info/downloads/datasets.html)},
  input = {CQT},
  link = {http://www.ismir2012.ismir.net/event/papers/139_ISMIR_2012.pdf},
  pages = {139--144},
  task = {MGR},
  title = {Unsupervised learning of local features for music classification},
  year = {2012}
}

@inproceedings{Dieleman2013,
  author = {Dieleman, Sander and Schrauwen, Benjamin},
  booktitle = {ISMIR},
  dataset = {[Magnatagatune](http://mirg.city.ac.uk/codeapps/the-magnatagatune-dataset)},
  input = {Mel-spectrogram},
  link = {http://ismir2013.ismir.net/wp-content/uploads/2013/09/69_Paper.pdf},
  loss = {cross-entropy},
  pages = {3--8},
  title = {Multiscale approaches to music audio feature learning},
  year = {2013}
}

@inproceedings{Schluter2013,
  address = {Prague, Czech Republic},
  author = {Schlüter, Jan and Böck, Sebastian},
  booktitle = {International Workshop on Machine Learning and Music},
  input = {Mel-spectrogram},
  link = {http://phenicx.upf.edu/system/files/publications/Schlueter_MML13.pdf},
  loss = {cross-entropy},
  task = {Onset detection},
  title = {Musical onset detection with convolutional neural networks},
  year = {2013}
}

@inproceedings{Oord2013,
  activation = {ReLU},
  architecture = {CNN},
  author = {Van den Oord, Aaron and Dieleman, Sander and Schrauwen, Benjamin},
  booktitle = {[NIPS](https://nips.cc/)},
  dataset = {[MSD](https://labrosa.ee.columbia.edu/millionsong/) & [Echo Nest Taste Profile Subset](https://labrosa.ee.columbia.edu/millionsong/tasteprofile) & [Last.fm](https://www.last.fm/)},
  framework = {Theano},
  input = {MFCC & Mel-Spectro},
  link = {http://papers.nips.cc/paper/5004-deep-content-based-music-recommendation.pdf},
  metric = {MSE},
  pages = {2643--2651},
  task = {Recommendation},
  title = {Deep content-based music recommendation},
  year = {2013}
}

@inproceedings{Coutinho2014,
  architecture = {RNN-LSTM},
  author = {Coutinho, Eduardo and Weninger, Felix and Schuller, Björn W and Scherer, Klaus R},
  booktitle = {MediaEval},
  link = {https://pdfs.semanticscholar.org/8a24/c5131d5a28165f719697028c34b00e6d3f60.pdf},
  task = {MER},
  title = {The munich LSTM-RNN approach to the MediaEval 2014 Emotion In Music task},
  year = {2014}
}

@inproceedings{Dieleman2014,
  architecture = {CNN},
  author = {Dieleman, Sander and Schrauwen, Benjamin},
  booktitle = {ICASSP},
  dataset = {[Magnatagatune](http://mirg.city.ac.uk/codeapps/the-magnatagatune-dataset)},
  input = {Raw & Mel-spectrogram},
  link = {http://ieeexplore.ieee.org/abstract/document/6854950/},
  organization = {IEEE},
  pages = {6964--6968},
  task = {MGR},
  title = {End-to-end learning for music audio},
  year = {2014}
}

@techreport{Feng2014,
  author = {Feng, Tao},
  dataset = {[GTzan](http://marsyas.info/downloads/datasets.html)},
  link = {https://courses.engr.illinois.edu/ece544na/fa2014/Tao_Feng.pdf},
  task = {MGR},
  title = {Deep learning for music genre classification},
  year = {2014}
}

@inproceedings{Gencoglu2014,
  author = {Gencoglu, Oguzhan and Virtanen, Tuomas and Huttunen, Heikki},
  booktitle = {EUSIPCO},
  link = {https://www.cs.tut.fi/sgn/arg/music/tuomasv/dnn_eusipco2014.pdf},
  organization = {IEEE},
  pages = {506--510},
  title = {Recognition of acoustic events using deep neural networks},
  year = {2014}
}

@article{Gwardys2014,
  author = {Gwardys, Grzegorz and Grzywczak, Daniel},
  dataset = {[GTzan](http://marsyas.info/downloads/datasets.html)},
  journal = {[International Journal of Electronics and Telecommunications](http://ijet.pl/index.php/ijet)},
  link = {https://www.degruyter.com/downloadpdf/j/eletel.2014.60.issue-4/eletel-2014-0042/eletel-2014-0042.pdf},
  number = {4},
  pages = {321--326},
  title = {Deep image features in music information retrieval},
  volume = {60},
  year = {2014}
}

@inproceedings{Humphrey2014,
  author = {Humphrey, Eric J. and Bello, Juan Pablo},
  booktitle = {ICASSP},
  dataset = {[Beatles](http://isophonics.net/content/reference-annotations-beatles) & [RWC](https://staff.aist.go.jp/m.goto/RWC-MDB/) & [US Pop](https://labrosa.ee.columbia.edu/projects/musicsim/uspop2002.html)},
  input = {CQT},
  link = {http://www.mirlab.org/conference_papers/International_Conference/ICASSP%202014/papers/p7024-humphrey.pdf},
  organization = {IEEE},
  pages = {6974--6978},
  task = {Chord recognition},
  title = {From music audio to chord tablature: Teaching deep convolutional networks to play guitar},
  year = {2014}
}

@inproceedings{Schluter2014,
  architecture = {CNN},
  author = {Schlüter, Jan and Bock, Sebastian},
  booktitle = {ICASSP},
  dataset = {Inhouse},
  dimension = {3D},
  input = {Mel-spectrogram},
  link = {http://www.mirlab.org/conference_papers/International_Conference/ICASSP%202014/papers/p7029-schluter.pdf},
  note = {3D representation as input: 3 STFT signals computed with different windows, i.e. different time-freq resolutions},
  organization = {IEEE},
  pages = {6979--6983},
  task = {Onset detection},
  title = {Improved musical onset detection with convolutional neural networks},
  year = {2014}
}

@inproceedings{Ullrich2014,
  address = {Taipei, Taiwan},
  author = {Ullrich, Karen and Schlüter, Jan and Grill, Thomas},
  booktitle = {ISMIR},
  dataset = {[SALAMI](http://ddmal.music.mcgill.ca/research/salami/annotations)},
  input = {Mel-spectrogram},
  link = {https://dav.grrrr.org/public/pub/ullrich_schlueter_grill-2014-ismir.pdf},
  loss = {Cross-entropy},
  task = {Boundary detection},
  title = {Boundary detection in music structure analysis using convolutional neural networks},
  year = {2014}
}

@inproceedings{Wang2014,
  architecture = {DBN},
  author = {Wang, Xinxi and Wang, Ye},
  booktitle = {ACM_MM},
  computationtime = {4 hours single GPU},
  dataset = {[Echo Nest Taste Profile Subset](https://labrosa.ee.columbia.edu/millionsong/tasteprofile) & [7digital](https://7digital.com)},
  epochs = {No},
  framework = {Theano},
  gpu = {15 nodes of 2 Tesla M2090},
  link = {http://www.smcnus.org/wp-content/uploads/2014/08/reco_MM14.pdf},
  metric = {RMSE},
  organization = {ACM},
  pages = {627--636},
  task = {Recommendation},
  title = {Improving content-based and hybrid music recommendation using deep learning},
  year = {2014}
}

@inproceedings{Zhang2014,
  architecture = {CNN},
  author = {Zhang, Chiyuan and Evangelopoulos, Georgios and Voinea, Stephen and Rosasco, Lorenzo and Poggio, Tomaso},
  booktitle = {ICASSP},
  dataset = {[GTzan](http://marsyas.info/downloads/datasets.html)},
  link = {http://www.mirlab.org/conference_papers/International_Conference/ICASSP%202014/papers/p7034-zhang.pdf},
  organization = {IEEE},
  pages = {6984--6988},
  task = {MGR},
  title = {A deep representation for invariance and music classification},
  year = {2014}
}

@inproceedings{Choi2015,
  author = {Choi, Keunwoo and Fazekas, György and Sandler, Mark Brian and Kim, Jeonghee},
  booktitle = {ISMIR},
  code = {https://github.com/keunwoochoi/Auralisation},
  dataset = {Inhouse},
  input = {STFT},
  link = {http://ismir2015.uma.es/LBD/LBD24.pdf},
  pages = {26--30},
  task = {MGR},
  title = {Auralisation of deep convolutional neural networks: Listening to learned features},
  year = {2015}
}

@inproceedings{Durand2015,
  author = {Durand, Simon and Bello, Juan Pablo and David, Bertrand and Richard, Gaël},
  booktitle = {ICASSP},
  link = {http://perso.telecom-paristech.fr/~grichard/Publications/2015-durand-icassp.pdf},
  organization = {IEEE},
  pages = {409--413},
  task = {Beat detection},
  title = {Downbeat tracking with multiple features and deep neural networks},
  year = {2015}
}

@inproceedings{Grill2015,
  address = {Nice, France},
  author = {Grill, Thomas and Schlüter, Jan},
  booktitle = {EUSIPCO},
  dataset = {[SALAMI](http://ddmal.music.mcgill.ca/research/salami/annotations)},
  input = {STFT},
  link = {http://www.ofai.at/~jan.schlueter/pubs/2015_eusipco.pdf},
  task = {Boundary detection},
  title = {Music boundary detection using neural networks on spectrograms and self-similarity lag matrices},
  year = {2015}
}

@inproceedings{Hirvonen2015,
  author = {Hirvonen, Toni},
  booktitle = {Audio Engineering Society Convention},
  link = {https://www.researchgate.net/profile/Toni_Hirvonen/publication/276061831_Classification_of_Spatial_Audio_Location_and_Content_Using_Convolutional_Neural_Networks/links/5550665908ae12808b37fe5a/Classification-of-Spatial-Audio-Location-and-Content-Using-Convolutional-Neural-Networks.pdf},
  organization = {Audio Engineering Society},
  title = {Classification of spatial audio location and content using convolutional neural networks},
  year = {2015}
}

@inproceedings{Kereliuk2015a,
  author = {Kereliuk, Corey and Sturm, Bob L. and Larsen, Jan},
  booktitle = {WASPAA},
  link = {http://www2.imm.dtu.dk/pubdb/views/edoc_download.php/6905/pdf/imm6905.pdf},
  organization = {IEEE},
  pages = {1--5},
  title = {Deep learning, audio adversaries, and music content analysis},
  year = {2015}
}

@article{Kereliuk2015b,
  architecture = {CNN},
  author = {Kereliuk, Corey and Sturm, Bob L. and Larsen, Jan},
  code = {https://github.com/coreyker/dnn-mgr},
  dataset = {[GTzan](http://marsyas.info/downloads/datasets.html) & [LMD](https://sites.google.com/site/carlossillajr/resources/the-latin-music-database-lmd)},
  input = {Magnitude spectral frames},
  journal = {[IEEE Transactions on Multimedia](https://signalprocessingsociety.org/publications-resources/ieee-transactions-multimedia)},
  link = {https://arxiv.org/pdf/1507.04761.pdf},
  number = {11},
  pages = {2059--2071},
  publisher = {IEEE},
  task = {MGR},
  title = {Deep learning and music adversaries},
  volume = {17},
  year = {2015}
}

@inproceedings{Leglaive2015,
  author = {Leglaive, Simon and Hennequin, Romain and Badeau, Roland},
  booktitle = {ICASSP},
  link = {https://hal-imt.archives-ouvertes.fr/hal-01110035/},
  organization = {IEEE},
  pages = {121--125},
  task = {SVD},
  title = {Singing voice detection with deep recurrent neural networks},
  year = {2015}
}

@unpublished{Li2015,
  author = {Li, Peter and Qian, Jiyuan and Wang, Tian},
  dataset = {[MedleyDB](http://medleydb.weebly.com/)},
  input = {1D Freq Raw audio},
  journal = {arXiv preprint arXiv:1511.05520},
  link = {https://arxiv.org/pdf/1511.05520.pdf},
  loss = {Cross-entropy},
  task = {Instrument recognition},
  title = {Automatic instrument recognition in polyphonic music using convolutional neural networks},
  year = {2015}
}

@inproceedings{Mcfee2015,
  author = {McFee, Brian and Humphrey, Eric J. and Bello, Juan Pablo},
  booktitle = {ISMIR},
  dataset = {[MedleyDB](http://medleydb.weebly.com/)},
  link = {https://bmcfee.github.io/papers/ismir2015_augmentation.pdf},
  pages = {248--254},
  task = {Instrument recognition},
  title = {A software framework for musical data augmentation},
  year = {2015}
}

@unpublished{Nam2015,
  author = {Nam, Juhan and Herrera, Jorge and Lee, Kyogu},
  journal = {arXiv preprint arXiv:1508.04999},
  link = {https://arxiv.org/pdf/1508.04999v1.pdf},
  title = {A deep bag-of-features model for music auto-tagging},
  year = {2015}
}

@inproceedings{Park2015a,
  author = {Park, Taejin and Lee, Taejin},
  batch = {No},
  booktitle = {ISMIR},
  input = {2D},
  link = {http://ismir2015.uma.es/LBD/LBD27.pdf},
  loss = {Cross-entropy},
  task = {Music/Noise segmentation},
  title = {Music-noise segmentation in spectrotemporal domain using convolutional neural networks},
  year = {2015}
}

@unpublished{Park2015b,
  author = {Park, Taejin and Lee, Taejin},
  dataset = {[UIOWA MIS](http://theremin.music.uiowa.edu/mis.html)},
  journal = {arXiv preprint arXiv:1512.07370},
  link = {https://arxiv.org/ftp/arxiv/papers/1512/1512.07370.pdf},
  task = {Instrument recognition},
  title = {Musical instrument sound classification with deep convolutional neural network using feature fusion approach},
  year = {2015}
}

@inproceedings{piczak2015environmental,
  author = {Piczak, Karol J},
  booktitle = {IEEE_MLSP},
  link = {http://karol.piczak.com/papers/Piczak2015-ESC-ConvNet.pdf},
  organization = {IEEE},
  pages = {1--6},
  title = {Environmental sound classification with convolutional neural networks},
  year = {2015}
}

@inproceedings{Schluter2015,
  architecture = {CNN},
  author = {Schlüter, Jan and Grill, Thomas},
  booktitle = {ISMIR},
  code = {https://github.com/f0k/ismir2015},
  dataaugmentation = {Dropout {5%, 10%, 20%} & Noise {Gaussian sigma={0.05, 0.1, 0.2}} & Pitch shift +-{10, 20, 30, 50} & Time stretch +-{10, 20, 30, 50} & Loudness +-{5dB, 10dB, 20dB} & Frequency filter +-{5dB, 10dB, 20dB} & Mix {10%, 20%, 30%, 50%} & Combined & Test and train},
  dataset = {Inhouse & [Jamendo](http://www.mathieuramona.com/wp/data/jamendo/) & [RWC](https://staff.aist.go.jp/m.goto/RWC-MDB/)},
  input = {Spectrogram},
  link = {https://grrrr.org/pub/schlueter-2015-ismir.pdf},
  pages = {121--126},
  task = {SVD},
  title = {Exploring data augmentation for improved singing voice detection with neural networks},
  year = {2015}
}

@techreport{Shi2015,
  author = {Shi, Zhengshan},
  link = {https://cs224d.stanford.edu/reports/SkiZhengshan.pdf},
  title = {Singer traits identification using deep neural network},
  year = {2015}
}

@inproceedings{Sigtia2015a,
  architecture = {RNN},
  author = {Sigtia, Siddharth and Benetos, Emmanouil and Boulanger-Lewandowski, Nicolas and Weyde, Tillman and Garcez, Artur S d'Avila and Dixon, Simon},
  batch = {No},
  booktitle = {ICASSP},
  dataset = {[MAPS](http://www.tsi.telecom-paristech.fr/aao/en/2010/07/08/maps-database-a-piano-database-for-multipitch-estimation-and-automatic-transcription-of-music/)},
  epochs = {No},
  gpu = {No},
  learningrate = {No},
  link = {https://arxiv.org/pdf/1411.1623.pdf},
  organization = {IEEE},
  pages = {2061--2065},
  task = {Transcription},
  title = {A hybrid recurrent neural network for music transcription},
  year = {2015}
}

@unpublished{Sigtia2015b,
  author = {Sigtia, Siddharth and Benetos, Emmanouil and Dixon, Simon},
  input = {CQT},
  journal = {arXiv preprint arXiv:1508.01774},
  link = {https://arxiv.org/pdf/1508.01774.pdf},
  task = {Transcription},
  title = {An end-to-end neural network for polyphonic music transcription},
  year = {2015}
}

@unpublished{Simpson2015,
  author = {Simpson, Andrew J. R. and Roma, Gerard and Plumbley, Mark D.},
  dataset = {[MedleyDB](http://medleydb.weebly.com/)},
  input = {STFT},
  journal = {arXiv preprint arXiv:1504.04658},
  link = {https://link.springer.com/chapter/10.1007/978-3-319-22482-4_50},
  task = {Source separation},
  title = {Deep karaoke: Extracting vocals from musical mixtures using a convolutional deep neural network},
  year = {2015}
}

@inproceedings{Sturm2015,
  author = {Sturm, Bob L. and Santos, João Felipe and Korshunova, Iryna},
  booktitle = {ISMIR},
  code = {https://github.com/IraKorshunova/folk-rnn},
  link = {http://ismir2015.uma.es/LBD/LBD13.pdf},
  task = {Composition},
  title = {Folk music style modelling by recurrent neural networks with long short term memory units},
  year = {2015}
}

@inproceedings{Uhlich2015,
  author = {Uhlich, Stefan and Giron, Franck and Mitsufuji, Yuki},
  booktitle = {ICASSP},
  input = {STFT},
  link = {https://www.researchgate.net/profile/Stefan_Uhlich/publication/282001406_Deep_neural_network_based_instrument_extraction_from_music/links/5600eeda08ae07629e52b397/Deep-neural-network-based-instrument-extraction-from-music.pdf},
  organization = {IEEE},
  pages = {2135--2139},
  task = {Source separation},
  title = {Deep neural network based instrument extraction from music},
  year = {2015}
}

@inproceedings{Zhang2015,
  architecture = {CNN},
  author = {Zhang, Pengjing and Zheng, Xiaoqing and Zhang, Wenqiang and Li, Siyan and Qian, Sheng and He, Wenqi and Zhang, Shangtong and Wang, Ziyuan},
  booktitle = {ICMR},
  link = {https://www.researchgate.net/profile/Xiaoqing_Zheng3/publication/275347034_A_Deep_Neural_Network_for_Modeling_Music/links/5539d2060cf2239f4e7dad0d/A-Deep-Neural-Network-for-Modeling-Music.pdf},
  task = {MGR},
  title = {A deep neural network for modeling music},
  year = {2015}
}

@article{Arumugam2016,
  architecture = {PNN},
  author = {Arumugam, Muthumari and Kaliappan, Mala},
  dataset = {[GTzan](http://marsyas.info/downloads/datasets.html)},
  journal = {[Circuits and Systems](http://www.scirp.org/journal/cs/)},
  link = {http://file.scirp.org/pdf/CS_2016042615054817.pdf},
  number = {04},
  pages = {255},
  publisher = {Scientific Research Publishing},
  task = {MGR & Instrument recognition},
  title = {An efficient approach for segmentation, feature extraction and classification of audio signals},
  volume = {7},
  year = {2016}
}

@inproceedings{Choi2016a,
  author = {Choi, Keunwoo and Fazekas, György and Sandler, Mark Brian},
  booktitle = {CSMC},
  link = {https://drive.google.com/file/d/0B1OooSxEtl0FcG9MYnY2Ylh5c0U/view},
  task = {Composition},
  title = {Text-based LSTM networks for automatic music composition},
  year = {2016}
}

@unpublished{Choi2016b,
  architecture = {RNN},
  author = {Choi, Keunwoo and Fazekas, György and Sandler, Mark Brian},
  journal = {arXiv preprint arXiv:1606.02096},
  link = {https://arxiv.org/pdf/1606.02096.pdf},
  task = {Playlist generation},
  title = {Towards playlist generation algorithms using RNNs trained on within-track transitions},
  year = {2016}
}

@inproceedings{Choi2016c,
  address = {New York, NY, USA},
  architecture = {FCN},
  author = {Choi, Keunwoo and Fazekas, György and Sandler, Mark Brian},
  booktitle = {ISMIR},
  link = {https://arxiv.org/pdf/1606.00298.pdf},
  pages = {805-811},
  task = {MGR},
  title = {Automatic tagging using deep convolutional neural networks},
  year = {2016}
}

@inproceedings{Deng2016,
  author = {Deng, Junqi and Kwok, Yu-Kwong},
  booktitle = {ICASSP},
  link = {http://ieeexplore.ieee.org/abstract/document/7471677/},
  organization = {IEEE},
  pages = {261--265},
  task = {Chord recognition},
  title = {Automatic chord estimation on seventhsbass chord vocabulary using deep neural network},
  year = {2016}
}

@inproceedings{Hadjeres2016d,
  author = {Hadjeres, Gaëtan and Pachet, François},
  booktitle = {ICML},
  code = {https://github.com/Ghadjeres/DeepBach},
  link = {https://arxiv.org/pdf/1612.01010.pdf},
  title = {DeepBach: A steerable model for Bach chorales generation},
  year = {2016}
}

@inproceedings{Holzapfel2016,
  architecture = {CNN},
  author = {Holzapfel, Andre and Grill, Thomas},
  booktitle = {ISMIR},
  link = {http://www.rhythmos.org/MMILab-Andre_files/ISMIR2016_CNNDBNbeats_camready.pdf},
  pages = {262--268},
  task = {Beat detection},
  title = {Bayesian meter tracking on learned signal representations},
  year = {2016}
}

@unpublished{Huang2016,
  architecture = {RNN-LSTM},
  author = {Huang, Allen and Wu, Raymond},
  dataset = {[Bach Corpus](http://musedata.org/)},
  journal = {arXiv preprint arXiv:1606.04930},
  link = {https://arxiv.org/pdf/1606.04930.pdf},
  task = {Composition},
  title = {Deep learning for music},
  year = {2016}
}

@inproceedings{Jeong2016,
  author = {Jeong, Il-Young and Lee, Kyogu},
  booktitle = {ISMIR},
  input = {STFT & Cepstrum},
  link = {https://www.researchgate.net/profile/Il_Young_Jeong/publication/305683876_Learning_temporal_features_using_a_deep_neural_network_and_its_application_to_music_genre_classification/links/5799a27c08aec89db7bb9f92.pdf},
  title = {Learning temporal features using a deep neural network and its application to music genre classification},
  year = {2016}
}

@unpublished{Kelz2016,
  architecture = {DNN & ConvNet},
  author = {Kelz, Rainer and Dorfer, Matthias and Korzeniowski, Filip and Böck, Sebastian and Arzt, Andreas and Widmer, Gerhard},
  journal = {arXiv preprint arXiv:1612.05153},
  link = {https://arxiv.org/pdf/1612.05153.pdf},
  title = {On the potential of simple framewise approaches to piano transcription},
  year = {2016}
}

@inproceedings{Korzeniowski2016a,
  address = {New York, NY, USA},
  author = {Korzeniowski, Filip and Widmer, Gerhard},
  booktitle = {ISMIR},
  code = {https://github.com/fdlm/chordrec/tree/master/experiments/ismir2016},
  link = {https://arxiv.org/pdf/1612.05065.pdf},
  note = {http://fdlm.github.io/post/deepchroma},
  task = {Chord recognition},
  title = {Feature learning for chord recognition: The deep chroma extractor},
  year = {2016}
}

@inproceedings{Korzeniowski2016b,
  address = {Salerno, Italy},
  author = {Korzeniowski, Filip and Widmer, Gerhard},
  booktitle = {IEEE_MLSP},
  link = {https://www.researchgate.net/profile/Filip_Korzeniowski/publication/305590295_A_Fully_Convolutional_Deep_Auditory_Model_for_Musical_Chord_Recognition/links/579486ba08aed51475cc6958/A-Fully-Convolutional-Deep-Auditory-Model-for-Musical-Chord-Recognition.pdf?_iepl%5BhomeFeedViewId%5D=HTzFFmKPia2YminQ4psHT5at&_iepl%5Bcontexts%5D%5B0%5D=pcfhf&_iepl%5BinteractionType%5D=publicationDownload&origin=publication_detail&ev=pub_int_prw_xdl&msrp=Dz_6LKHzYcPyP-LmgZPF-m63ayZ6k0entFEntooiu_e32zfETNQXKPQSTFOI87NONIIQuUQdnUtwORdomTXfteTrb09KiAIdDtBJnw_02P6JeRr5zu2eyaCG.2Uxsi_eENxtbYL39lvorIK8LofRYhkgpUHzpzmVzkIEiyHc0wUY87rEa4PH1qbXi4k4RyagHUsA2IsZtewnprglORjx2v9Cwbk9ZfQ.cd67BaqtHul_hE6SX6vUFKuldz81aH6dWq-cYMkq5vQKCHcvB8l9zgeM694Efb_r2wBB5GT9idt3OLeME0UxVHI6ROxamgK3LMNlSw.JtZXAo9HhR9t-8Wl3gxJgnoM4--rtmDEUDbXSWezbFyU-CoB_nyfxbRQ4kdoN4-5aJ3Tgx4YHdikicqAhc_cezB2ZntjxkB4rEDx1A},
  organization = {IEEE},
  pages = {1--6},
  task = {Chord recognition},
  title = {A fully convolutional deep auditory model for musical chord recognition},
  year = {2016}
}

@inproceedings{Li2016,
  architecture = {RNN & BILSTM & ELM},
  author = {Li, Xinxing and Xianyu, Haishu and Tian, Jiashen and Chen, Wenxiao and Meng, Fanhang and Xu, Mingxing and Cai, Lianhong},
  booktitle = {ICASSP},
  link = {http://ieeexplore.ieee.org/document/7471734/},
  organization = {IEEE},
  pages = {544--548},
  task = {MER},
  title = {A deep bidirectional long short-term memory based multi-scale approach for music dynamic emotion prediction},
  year = {2016}
}

@inproceedings{Liu2016,
  architecture = {CNN},
  author = {Liu, Jen-Yu and Yang, Yi-Hsuan},
  booktitle = {ACM_MM},
  code = {https://github.com/ciaua/clip2frame},
  link = {http://mac.citi.sinica.edu.tw/~yang/pub/liu16mm.pdf},
  organization = {ACM},
  pages = {1048--1057},
  title = {Event localization in music auto-tagging},
  year = {2016}
}

@inproceedings{Lostanlen2016,
  author = {Lostanlen, Vincent and Cella, Carmine-Emanuele},
  booktitle = {ISMIR},
  code = {https://github.com/lostanlen/ismir2016},
  link = {https://github.com/lostanlen/ismir2016/blob/master/paper/lostanlen_ismir2016.pdf},
  task = {Instrument recognition},
  title = {Deep convolutional networks on the pitch spiral for musical instrument recognition},
  year = {2016}
}

@inproceedings{Mehri2017,
  architecture = {RNN},
  author = {Mehri, Soroush and Kumar, Kundan and Gulrajani, Ishaan and Kumar, Rithesh and Jain, Shubham and Sotelo, Jose and Courville, Aaron and Bengio, Yoshua},
  booktitle = {ICLR},
  code = {https://github.com/soroushmehr/sampleRNN_ICLR2017},
  dataset = {[32 Beethoven’s piano sonatas gathered from https://archive.org](https://soundcloud.com/samplernn/sets)},
  link = {https://openreview.net/pdf?id=SkxKPDv5xl},
  note = {https://arxiv.org/pdf/1612.07837.pdf},
  task = {Composition},
  title = {SampleRNN: An unconditional end-to-end neural audio generation model},
  year = {2016}
}

@unpublished{Phan2016,
  architecture = {CNN},
  author = {Phan, Huy and Hertel, Lars and Maass, Marco and Mertins, Alfred},
  dataset = {[RWC](https://staff.aist.go.jp/m.goto/RWC-MDB/)},
  journal = {arXiv preprint arXiv:1604.06338},
  link = {https://arxiv.org/pdf/1604.06338.pdf},
  note = {Compare MFCC & deep learning},
  task = {Event recognition},
  title = {Robust audio event recognition with 1-max pooling convolutional neural networks},
  year = {2016}
}

@inproceedings{Pons2016,
  address = {Bucharest, Romania},
  author = {Pons, Jordi and Lidy, Thomas and Serra, Xavier},
  booktitle = {CBMI},
  code = {https://github.com/jordipons/},
  dataset = {[Ballroom](http://mtg.upf.edu/ismir2004/contest/tempoContest/node5.html)},
  doi = {10.1109/CBMI.2016.7500246},
  isbn = {978-1-4673-8695-1},
  link = {http://jordipons.me/media/CBMI16.pdf},
  month = {Jun.},
  title = {Experimenting with musically motivated convolutional neural networks},
  year = {2016}
}

@inproceedings{Rigaud2016,
  address = {New York, NY, USA},
  architecture = {DNN & RNN-LSTM},
  author = {Rigaud, François and Radenen, Mathieu},
  booktitle = {ISMIR},
  link = {https://wp.nyu.edu/ismir2016/wp-content/uploads/sites/2294/2016/07/163_Paper.pdf},
  pages = {737--743},
  task = {F0 & VAD},
  title = {Singing voice melody transcription using deep neural networks},
  year = {2016}
}

@inproceedings{Roma2016,
  author = {Roma, Gerard and Grais, Emad M. and Simpson, Andrew J. R. and Plumbley, Mark D.},
  booktitle = {MIREX},
  code = {http://cvssp.org/projects/maruss/mirex2016/},
  dataset = {[iKala](http://mac.citi.sinica.edu.tw/ikala/)},
  link = {http://www.music-ir.org/mirex/abstracts/2016/RSGP1.pdf},
  task = {SVS},
  title = {Singing voice separation using deep neural networks and F0 estimation},
  year = {2016}
}

@inproceedings{Schluter2016,
  address = {New York, NY, USA},
  architecture = {CNN},
  author = {Schlüter, Jan},
  booktitle = {ISMIR},
  input = {STFT},
  link = {http://www.ofai.at/~jan.schlueter/pubs/2016_ismir.pdf},
  month = {Aug.},
  pages = {44--50},
  title = {Learning to pinpoint singing voice from weakly labeled examples},
  year = {2016}
}

@inproceedings{Stasiak2016b,
  address = {Gdansk, Poland},
  author = {Stasiak, Bartłomiej and Mońko, Jędrzej},
  booktitle = {IEEE_FedCSIS},
  link = {http://ieeexplore.ieee.org/abstract/document/7733228/},
  organization = {IEEE},
  pages = {147--152},
  task = {Onset detection},
  title = {Analysis of time-frequency representations for musical onset detection with convolutional neural network},
  year = {2016}
}

@article{Stasiak2016a,
  architecture = {NNMODFF},
  author = {Stasiak, Bartłomiej and Mońko, Jędrzej and Niewiadomski, Adam},
  dataset = {Inhouse & [RWC](https://staff.aist.go.jp/m.goto/RWC-MDB/)},
  input = {Onset activation},
  journal = {[International Journal of Applied Mathematics and Computer Science](https://www.amcs.uz.zgora.pl/)},
  link = {https://www.degruyter.com/downloadpdf/j/amcs.2016.26.issue-1/amcs-2016-0014/amcs-2016-0014.pdf},
  number = {1},
  pages = {203--213},
  task = {Onset detection},
  title = {Note onset detection in musical signals via neural-network-based multi-ODF fusion},
  volume = {26},
  year = {2016}
}

@inproceedings{Sturm2016,
  author = {Sturm, Bob L. and Santos, João Felipe and Ben-Tal, Oded and Korshunova, Iryna},
  booktitle = {CSMC},
  code = {https://github.com/IraKorshunova/folk-rnn},
  link = {https://drive.google.com/file/d/0B1OooSxEtl0FcTBiOGdvSTBmWnc/view},
  task = {Composition},
  title = {Music transcription modelling and composition using deep learning},
  year = {2016}
}

@inproceedings{Su2016,
  architecture = {CNN & MLP},
  author = {Su, Hong and Zhang, Hui and Zhang, Xueliang and Gao, Guanglai},
  booktitle = {ICASSP},
  link = {http://www.mirlab.org/conference_papers/International_Conference/ICASSP%202016/pdfs/0000579.pdf},
  organization = {IEEE},
  pages = {579--583},
  task = {Pitch determination},
  title = {Convolutional neural network for robust pitch determination},
  year = {2016}
}

@unpublished{Takahashi2016,
  architecture = {CNN},
  author = {Takahashi, Naoya and Gygli, Michael and Pfister, Beat and Van Gool, Luc},
  code = {https://bitbucket.org/naoya1/aenet_release},
  dataaugmentation = {Mixing},
  dataset = {[Acoustic Event](https://data.vision.ee.ethz.ch/cvl/ae_dataset/)},
  journal = {arXiv preprint arXiv:1604.07160},
  link = {https://arxiv.org/pdf/1604.07160.pdf},
  task = {Event recognition},
  title = {Deep convolutional neural networks and data augmentation for acoustic event detection},
  year = {2016}
}

@unpublished{Bammer2017,
  author = {Bammer, Roswitha and Doerfler, Monika},
  link = {https://arxiv.org/pdf/1706.08818.pdf},
  title = {Gabor frames and deep scattering networks in audio processing},
  year = {2017}
}

@inproceedings{Bazzica2017,
  architecture = {CNN},
  author = {Bazzica, Alessio and Van Gemert, JC and Liem, CCS and Hanjalic, A},
  booktitle = {IWDLM},
  dataset = {[C4S](http://mmc.tudelft.nl/users/alessio-bazzica#C4S-dataset)},
  link = {http://dorienherremans.com/dlm2017/papers/bazzica2017clarinet.pdf},
  task = {Onset detection},
  title = {Vision-based detection of acoustic timed events: A case study on clarinet note onsets},
  year = {2017}
}

@unpublished{Briot2017,
  architecture = {No},
  author = {Briot, Jean-Pierre and Hadjeres, Gaëtan and Pachet, François},
  dataset = {[JSB Chorales](https://github.com/czhuang/JSB-Chorales-dataset) & [MusicNet](https://homes.cs.washington.edu/~thickstn/musicnet.html) & [Symbolic music data](http://users.cecs.anu.edu.au/~christian.walder/) & [LSDB](lsdb.flow-machines.com/)},
  link = {https://arxiv.org/pdf/1709.01620.pdf},
  task = {Survey & Composition},
  title = {Deep learning techniques for music generation - A survey},
  year = {2017}
}

@inproceedings{Brunner2017,
  activation = {Softmax},
  address = {Boston, MA, USA},
  architecture = {RNN-LSTM},
  author = {Brunner, Gino and Wang, Yuyi and Wattenhofer, Roger and Wiesendanger, Jonas},
  batch = {No},
  booktitle = {[IEEE_ICTAI](http://ictai2017.org/)},
  code = {https://github.com/brunnergino/JamBot},
  dataaugmentation = {No},
  dataset = {[Lakh MIDI](https://labrosa.ee.columbia.edu/sounds/music/)},
  dropout = {No},
  epochs = {4},
  framework = {Keras-TensorFlow},
  gpu = {1},
  learningrate = {0.00001},
  link = {https://arxiv.org/pdf/1711.07682.pdf},
  month = {Nov.},
  optimizer = {Adam},
  pages = {1--8},
  task = {Composition},
  title = {JamBot: Music theory aware chord based generation of polyphonic music with LSTMs},
  year = {2017}
}

@unpublished{Cangea2017,
  author = {Cangea, Cătălina and Veličković, Petar and Liò, Pietro},
  journal = {arXiv preprint arXiv:1709.00572},
  link = {https://arxiv.org/pdf/1709.00572.pdf},
  title = {XFlow: 1D <-> 2D cross-modal deep neural networks for audiovisual classification},
  year = {2017}
}

@inproceedings{Cella2017,
  activation = {No},
  architecture = {No},
  author = {Cella, Carmine-Emanuele},
  batch = {No},
  booktitle = {IWDLM},
  code = {No},
  dataaugmentation = {No},
  dataset = {No},
  dropout = {No},
  epochs = {No},
  framework = {No},
  gpu = {No},
  learningrate = {No},
  link = {http://dorienherremans.com/dlm2017/papers/cella2017mli.pdf},
  optimizer = {No},
  reproducible = {No},
  task = {Manifesto},
  title = {Machine listening intelligence},
  year = {2017}
}

@inproceedings{Chandna2017,
  architecture = {CNN},
  author = {Chandna, Pritish and Miron, Marius and Janer, Jordi and Gómez, Emilia},
  booktitle = {International Conference on Latent Variable Analysis and Signal Separation (LVA ICA)},
  code = {https://github.com/MTG/DeepConvSep},
  dataset = {[DSD100](http://sisec17.audiolabs-erlangen.de/#/dataset)},
  link = {http://mtg.upf.edu/system/files/publications/monoaural-audio-source_0.pdf},
  month = {Feb.},
  task = {Source separation},
  title = {Monoaural audio source separation using deep convolutional neural networks},
  year = {2017}
}

@inproceedings{Chen2017,
  author = {Chen, Tanfang and Wang, Shangfei and Chen, Shiyu},
  booktitle = {ICME},
  link = {http://ieeexplore.ieee.org/abstract/document/8019322/},
  organization = {IEEE},
  pages = {955--960},
  task = {General audio classification},
  title = {Deep multimodal network for multi-label classification},
  year = {2017}
}

@unpublished{Choi2017a,
  author = {Choi, Keunwoo and Fazekas, György and Cho, Kyunghyun and Sandler, Mark Brian},
  code = {https://github.com/keunwoochoi/dl4mir},
  journal = {arXiv preprint arXiv:1709.04396},
  link = {https://arxiv.org/pdf/1709.04396.pdf},
  task = {General audio classification},
  title = {A tutorial on deep learning for music information retrieval},
  year = {2017}
}

@unpublished{Choi2017b,
  author = {Choi, Keunwoo and Fazekas, György and Cho, Kyunghyun and Sandler, Mark Brian},
  code = {https://github.com/keunwoochoi/transfer_learning_music},
  dataset = {[MSD](https://labrosa.ee.columbia.edu/millionsong/)},
  journal = {arXiv preprint arXiv:1709.01922},
  link = {https://arxiv.org/pdf/1709.01922.pdf},
  task = {MGR},
  title = {A comparison on audio signal preprocessing methods for deep neural networks on music tagging},
  year = {2017}
}

@inproceedings{Choi2017c,
  author = {Choi, Keunwoo and Fazekas, György and Sandler, Mark Brian and Cho, Kyunghyun},
  batch = {No},
  booktitle = {ISMIR},
  code = {https://github.com/keunwoochoi/transfer_learning_music},
  dataset = {[MSD](https://labrosa.ee.columbia.edu/millionsong/)},
  link = {https://arxiv.org/pdf/1703.09179v3.pdf},
  optimizer = {Adam},
  task = {General audio classification},
  title = {Transfer learning for music classification and regression tasks},
  year = {2017}
}

@inproceedings{Choi2017d,
  architecture = {CRNN},
  author = {Choi, Keunwoo and Fazekas, György and Sandler, Mark Brian and Cho, Kyunghyun},
  booktitle = {ICASSP},
  code = {https://github.com/keunwoochoi/icassp_2017},
  link = {http://ieeexplore.ieee.org/abstract/document/7952585/},
  organization = {IEEE},
  pages = {2392--2396},
  reproducible = {Models & split sets only},
  task = {MGR},
  title = {Convolutional recurrent neural networks for music classification},
  year = {2017}
}

@article{Costa2017,
  architecture = {CNN},
  author = {Costa, Yandre MG and Oliveira, Luiz S and Silla, Carlos N},
  batch = {128},
  dataset = {[LMD](https://sites.google.com/site/carlossillajr/resources/the-latin-music-database-lmd)},
  framework = {Caffe},
  gpu = {Tesla C2050},
  journal = {[Applied Soft Computing](https://www.journals.elsevier.com/applied-soft-computing/)},
  learningrate = {0.001},
  link = {http://www.inf.ufpr.br/lesoliveira/download/ASOC2017.pdf},
  pages = {28--38},
  publisher = {Elsevier},
  task = {General audio classification},
  title = {An evaluation of convolutional neural networks for music classification using spectrograms},
  volume = {52},
  year = {2017}
}

@unpublished{Deng2017,
  author = {Deng, Junqi and Kwok, Yu-Kwong},
  journal = {arXiv preprint arXiv:1709.07153},
  link = {https://arxiv.org/pdf/1709.07153.pdf},
  task = {Chord recognition},
  title = {Large vocabulary automatic chord estimation using deep neural nets: Design framework, system variations and limitations},
  year = {2017}
}

@unpublished{Doerfler2017,
  author = {Doerfler, Monika and Grill, Thomas and Bammer, Roswitha and Flexer, Arthur},
  dataset = {Inhouse},
  input = {Raw & Mel-spectrogram},
  journal = {arXiv preprint arXiv:1709.02291},
  learningrate = {0.001},
  link = {https://arxiv.org/pdf/1709.02291.pdf},
  optimizer = {Adam},
  task = {SVD},
  title = {Basic filters for convolutional neural networks: Training or design?},
  year = {2017}
}

@unpublished{Duppada2017,
  author = {Duppada, Venkatesh and Hiray, Sushant},
  journal = {arXiv preprint arXiv:1708.05826},
  link = {https://arxiv.org/pdf/1708.05826.pdf},
  title = {Ensemble Of Deep Neural Networks For Acoustic Scene Classification},
  year = {2017}
}

@article{Durand2017,
  architecture = {CNN},
  author = {Durand, Simon and Bello, Juan Pablo and David, Bertrand and Richard, Gaël},
  journal = {[IEEE/ACM Transactions on Audio, Speech, and Language Processing](https://signalprocessingsociety.org/publications-resources/ieeeacm-transactions-audio-speech-and-language-processing/ieeeacm)},
  link = {http://ieeexplore.ieee.org/abstract/document/7728057/},
  number = {1},
  pages = {76--89},
  publisher = {IEEE},
  task = {Beat detection},
  title = {Robust downbeat tracking using an ensemble of convolutional networks},
  volume = {25},
  year = {2017}
}

@inproceedings{Fan2017,
  architecture = {VPNN & DNN},
  author = {Fan, Zhe-Cheng and Chan, TS and Yang, Yi-Hsuan and Jang, Jyh-Shing R},
  booktitle = {IWDLM},
  dataset = {[iKala](http://mac.citi.sinica.edu.tw/ikala/)},
  link = {http://dorienherremans.com/dlm2017/papers/fan2017vector.pdf},
  task = {SVS},
  title = {Music signal processing using vector product neural networks},
  year = {2017}
}

@inproceedings{Geng2017,
  architecture = {CNN},
  author = {Geng, Shijia and Ren, Gang and Ogihara, Mitsunori},
  booktitle = {IWDLM},
  dataset = {Inhouse},
  link = {http://dorienherremans.com/dlm2017/papers/geng2017genre.pdf},
  task = {Composition},
  title = {Transforming musical signals through a genre classifying convolutional neural network},
  year = {2017}
}

@inproceedings{Gong2017,
  author = {Gong, Rong and Pons, Jordi and Serra, Xavier},
  booktitle = {ISMIR},
  code = {https://github.com/ronggong/jingjuSingingPhraseMatching/tree/v0.1.0},
  link = {https://arxiv.org/pdf/1707.03547.pdf},
  title = {Audio to score matching by combining phonetic and duration information},
  year = {2017}
}

@unpublished{Hadjeres2017a,
  architecture = {ARNN},
  author = {Hadjeres, Gaëtan and Nielsen, Frank},
  journal = {arXiv preprint arXiv:1709.06404},
  link = {https://arxiv.org/pdf/1709.06404.pdf},
  task = {Composition},
  title = {Interactive music generation with positional constraints using anticipation-RNNs},
  year = {2017}
}

@unpublished{Hadjeres2017b,
  author = {Hadjeres, Gaëtan and Nielsen, Frank},
  journal = {arXiv preprint arXiv:1709.00740},
  link = {https://arxiv.org/pdf/1709.00740.pdf},
  title = {Deep rank-based transposition-invariant distances on musical sequences},
  year = {2017}
}

@unpublished{Hadjeres2017c,
  author = {Hadjeres, Gaëtan and Nielsen, Frank and Pachet, François},
  journal = {arXiv preprint arXiv:1707.04588},
  link = {https://arxiv.org/pdf/1707.04588.pdf},
  title = {GLSR-VAE: Geodesic latent space regularization for variational autoencoder architectures},
  year = {2017}
}

@article{Han2017,
  architecture = {CNN},
  author = {Han, Yoonchang and Kim, Jaehun and Lee, Kyogu and Han, Yoonchang and Kim, Jaehun and Lee, Kyogu},
  dataset = {[IRMAS](https://www.upf.edu/web/mtg/irmas)},
  journal = {[IEEE/ACM Transactions on Audio, Speech, and Language Processing](https://signalprocessingsociety.org/publications-resources/ieeeacm-transactions-audio-speech-and-language-processing/ieeeacm)},
  link = {http://dl.acm.org/citation.cfm?id=3068697},
  number = {1},
  pages = {208--221},
  publisher = {IEEE Press},
  task = {Instrument recognition},
  title = {Deep convolutional neural networks for predominant instrument recognition in polyphonic music},
  volume = {25},
  year = {2017}
}

@inproceedings{Hershey2017,
  architecture = {CNN},
  author = {Hershey, Shawn and Chaudhuri, Sourish and Ellis, Daniel P. W. and Gemmeke, Jort F. and Jansen, Aren and Moore, R. Channing and Plakal, Manoj and Platt, Devin and Saurous, Rif A. and Seybold, Bryan and Slaney, Malcolm and Weiss, Ron J. and Wilson, Kevin},
  booktitle = {ICASSP},
  link = {https://arxiv.org/pdf/1609.09430v2.pdf},
  organization = {IEEE},
  pages = {131--135},
  title = {CNN architectures for large-scale audio classification},
  year = {2017}
}

@inproceedings{Hsu2017,
  author = {Hsu, Yu-Lun and Lin, Chi-Po and Lin, Bo-Chen and Kuo, Hsu-Chan and Cheng, Wen-Huang and Hu, Min-Chun},
  booktitle = {IEEE_ICME},
  link = {http://ieeexplore.ieee.org/abstract/document/8026272/},
  organization = {IEEE},
  pages = {285--290},
  task = {Composition},
  title = {DeepSheet: A sheet music generator based on deep learning},
  year = {2017}
}

@inproceedings{Hutchings2017,
  architecture = {RNN},
  author = {Hutchings, P.},
  booktitle = {IWDLM},
  link = {http://dorienherremans.com/dlm2017/papers/hutchings2017drums.pdf},
  task = {Composition},
  title = {Talking Drums: Generating drum grooves with neural networks},
  year = {2017}
}

@inproceedings{Jansson2017,
  activation = {Sigmoid},
  address = {Suzhou, China},
  architecture = {CNN & U-Net},
  author = {Jansson, Andreas and Humphrey, Eric J. and Montecchio, Nicola and Bittner, Rachel and Kumar, Aparna and Weyde, Tillman},
  booktitle = {ISMIR},
  code = {https://github.com/Xiao-Ming/UNet-VocalSeparation-Chainer},
  dataaugmentation = {No},
  dataset = {[iKala](http://mac.citi.sinica.edu.tw/ikala/) & [MedleyDB](http://medleydb.weebly.com/)},
  link = {https://ismir2017.smcnus.org/wp-content/uploads/2017/10/171_Paper.pdf},
  month = {Oct.},
  note = {},
  optimizer = {Adam},
  pages = {745--751},
  reproducible = {No},
  task = {SVS},
  title = {Singing voice separation with deep U-Net convolutional networks},
  year = {2017}
}

@inproceedings{Jeon2017,
  architecture = {CNN},
  author = {Jeon, Byungsoo and Kim, Chanju and Kim, Adrian and Kim, Dongwon and Park, Jangyeon and Ha, Jung-Woo},
  booktitle = {RECSYS},
  dataset = {Inhouse},
  link = {http://ceur-ws.org/Vol-1905/recsys2017_poster18.pdf},
  task = {MER},
  title = {Music emotion recognition via end-to-end multimodal neural networks},
  year = {2017}
}

@inproceedings{Koops2017,
  author = {Koops, Hendrik Vincent and De Haas, W Bas and Bransen, Jeroen and Volk, Anja},
  booktitle = {IWDLM},
  link = {http://dorienherremans.com/dlm2017/papers/koops2017pers.pdf},
  title = {Chord label personalization through deep learning of integrated harmonic interval-based representations},
  year = {2017}
}

@unpublished{Korzeniowski2017,
  author = {Korzeniowski, Filip and Widmer, Gerhard},
  journal = {arXiv preprint arXiv:1706.02921},
  link = {https://arxiv.org/pdf/1706.02921.pdf},
  title = {End-to-end musical key estimation using a convolutional neural network},
  year = {2017}
}

@inproceedings{Koutini2017,
  author = {Koutini, Khaled and Imenina, Alina and Dorfer, Matthias and Gruber, Alexander Rudolf and Schedl, Markus},
  booktitle = {MediaEval},
  link = {http://www.cp.jku.at/research/papers/Koutini_2017_mediaeval-acousticbrainz.pdf},
  title = {MediaEval 2017 AcousticBrainz genre task: Multilayer perceptron approach},
  year = {2017}
}

@unpublished{Kum2017,
  activation = {Leaky ReLU},
  architecture = {CNN},
  author = {Kum, Sangeun and Nam, Juhan},
  batch = {No},
  code = {No},
  dataaugmentation = {Pitch shift -2, -1, +1, +2 semitones},
  dataset = {[LabROSA](http://labrosa.ee.columbia.edu/projects/melody/) & [MedleyDB](http://medleydb.weebly.com/) & [Jamendo](http://www.mathieuramona.com/wp/data/jamendo/) & [RWC](https://staff.aist.go.jp/m.goto/RWC-MDB/) & [iKala](http://mac.citi.sinica.edu.tw/ikala/) & [MIR-1K](https://sites.google.com/site/unvoicedsoundseparation/mir-1k) & [ADC2004](http://labrosa.ee.columbia.edu/projects/melody/)},
  doi = {http://dx.doi.org/10.20944/preprints201711.0027.v1},
  dropout = {0.3},
  epochs = {100},
  framework = {Keras},
  gpu = {2},
  learningrate = {0.02},
  link = {https://www.preprints.org/manuscript/201711.0027/v1},
  month = {Nov.},
  optimizer = {SGD},
  pages = {1--14},
  reproducible = {No},
  task = {F0},
  title = {Classification-based singing melody extraction using deep convolutional neural networks},
  year = {2017}
}

@article{Lee2017b,
  author = {Lee, Jongpil and Nam, Juhan},
  journal = {[IEEE Signal Processing Letters](https://signalprocessingsociety.org/publications-resources/ieee-signal-processing-letters/ieee-signal-processing-letters)},
  link = {https://arxiv.org/pdf/1703.01793v2.pdf},
  title = {Multi-level and multi-scale feature aggregation using pre-trained convolutional neural networks for music auto-tagging},
  year = {2017}
}

@unpublished{Lee2017c,
  author = {Lee, Jongpil and Nam, Juhan},
  code = {https://github.com/jongpillee/musicTagging_MSD},
  dataset = {[MSD](https://labrosa.ee.columbia.edu/millionsong/)},
  journal = {arXiv preprint arXiv:1706.06810},
  link = {https://arxiv.org/pdf/1706.06810.pdf},
  title = {Multi-level and multi-scale feature aggregation using sample-level deep convolutional neural networks for music classification},
  year = {2017}
}

@inproceedings{Lee2017a,
  author = {Lee, Jongpil and Park, Jiyoung and Kim, Keunhyoung Luke and Nam, Juhan},
  booktitle = {SMC},
  link = {https://arxiv.org/pdf/1703.01789v2.pdf},
  title = {Sample-level deep convolutional neural networks for music auto-tagging using raw waveforms},
  year = {2017}
}

@unpublished{Lee2017d,
  activation = {No},
  architecture = {SeqGAN},
  author = {Lee, Sang-gil and Hwang, Uiwon and Min, Seonwoo and Yoon, Sungroh},
  batch = {No},
  booktitle = {CoRR},
  code = {https://github.com/L0SG/seqgan-music},
  computationtime = {No},
  dataaugmentation = {No},
  dataset = {[Nottingham dataset](http://abc.sourceforge.net/NMD/)},
  dimension = {1D},
  dropout = {No},
  epochs = {100},
  framework = {Tensorflow},
  gpu = {No},
  input = {MIDI},
  layers = {5},
  learningrate = {0.01 & 0.001 & 0.0001},
  link = {https://arxiv.org/pdf/1710.11418.pdf},
  loss = {No},
  metric = {No},
  momentum = {No},
  optimizer = {No},
  pages = {1--8},
  reproducible = {No},
  task = {Polyphonic music sequence modelling},
  title = {A SeqGAN for Polyphonic Music Generation},
  year = {2017}
}

@inproceedings{Lim2017,
  architecture = {CNN},
  author = {Lim, Wootaek and Lee, Taejin},
  booktitle = {EUSIPCO},
  dataset = {[DSD100](http://sisec17.audiolabs-erlangen.de/#/dataset)},
  link = {http://www.eurasip.org/Proceedings/Eusipco/Eusipco2017/papers/1570346835.pdf},
  task = {Source separation},
  title = {Harmonic and percussive source separation using a convolutional auto encoder},
  year = {2017}
}

@unpublished{Malik2017,
  architecture = {CRNN},
  author = {Malik, Miroslav and Adavanne, Sharath and Drossos, Konstantinos and Virtanen, Tuomas and Ticha, Dasa and Jarina, Roman},
  code = {No},
  dataaugmentation = {No},
  dataset = {[Free music archive](http://freemusicarchive.org/) & [MedleyDB](http://medleydb.weebly.com/) & [Jamendo](http://www.mathieuramona.com/wp/data/jamendo/)},
  framework = {Keras-Theano},
  journal = {arXiv preprint arXiv:1706.02292},
  link = {https://arxiv.org/pdf/1706.02292.pdf},
  loss = {RMSE},
  optimizer = {Adam},
  task = {MER},
  title = {Stacked convolutional and recurrent neural networks for music emotion recognition},
  year = {2017}
}

@mastersthesis{Martelbaro2017,
  architecture = {DNN & CNN & RNN},
  author = {Martel Baro, Héctor},
  dataaugmentation = {Mixing & Circular Shift & Instrument augmentation},
  dataset = {[DSD100](http://sisec17.audiolabs-erlangen.de/#/dataset) & [HHDS](https://drive.google.com/drive/folders/0B1zpiGdDzFNlbmJyYU1VVFR3OEE)},
  link = {https://repositori.upf.edu/bitstream/handle/10230/32919/Martel_2017.pdf?sequence=1&isAllowed=y},
  pages = {1--79},
  school = {Escola Superior Politècnica UPF},
  task = {Source separation & Remixing},
  title = {A deep learning approach to source separation and remixing of hiphop music},
  year = {2017}
}

@inproceedings{Medhat2017,
  activation = {No},
  address = {Guangzhou, China},
  architecture = {MCLNN & CLNN},
  author = {Medhat, Fady and Chesmore, David and Robinson, John},
  batch = {No},
  booktitle = {ICONIP},
  code = {No},
  dataaugmentation = {No},
  dataset = {[Ballroom](http://mtg.upf.edu/ismir2004/contest/tempoContest/node5.html) & [Homburg](http://www-ai.cs.uni-dortmund.de/audio.html)},
  doi = {10.1007/978-3-319-70096-0_49},
  epochs = {No},
  framework = {Not disclosed},
  gpu = {No},
  isbn = {978-3-319-70096-0},
  learningrate = {No},
  link = {https://link.springer.com/chapter/10.1007%2F978-3-319-70096-0_49},
  month = {Nov.},
  optimizer = {Adam},
  pages = {470--481},
  publisher = {Springer International Publishing},
  task = {MGR},
  title = {Music Genre Classification Using Masked Conditional Neural Networks},
  year = {2017}
}

@unpublished{Mimilakis2017,
  activation = {ReLU},
  architecture = {RNN & DNN},
  author = {Mimilakis, Stylianos Ioannis and Drossos, Konstantinos and Santos, João Felipe and Schuller, Gerald and Virtanen, Tuomas and Bengio, Yoshua},
  batch = {16},
  code = {https://github.com/Js-Mim/mss_pytorch},
  dataaugmentation = {No},
  dataset = {[MedleyDB](http://medleydb.weebly.com/) & [DSD100](http://sisec17.audiolabs-erlangen.de/#/dataset)},
  epochs = {100},
  framework = {PyTorch},
  gpu = {No},
  learningrate = {0.0001},
  link = {https://arxiv.org/pdf/1711.01437.pdf},
  month = {Nov.},
  note = {https://js-mim.github.io/mss_pytorch/},
  optimizer = {Adam},
  pages = {1--6},
  task = {SVS},
  title = {Monaural Singing Voice Separation with Skip-Filtering Connections and Recurrent Inference of Time-Frequency Mask},
  year = {2017}
}

@inproceedings{Miron2017a,
  address = {Espoo, Finland},
  architecture = {CNN},
  author = {Miron, Marius and Janer Mestres, Jordi and Gómez Gutiérrez, Emilia},
  batch = {32},
  booktitle = {SMC},
  code = {https://github.com/MTG/DeepConvSep},
  dataset = {[RWC](https://staff.aist.go.jp/m.goto/RWC-MDB/) & [Bach10](http://music.cs.northwestern.edu/data/Bach10.html)},
  link = {https://www.researchgate.net/profile/Marius_Miron/publication/318322107_Generating_data_to_train_convolutional_neural_networks_for_classical_music_source_separation/links/59637cc3458515a3575b93c6/Generating-data-to-train-convolutional-neural-networks-for-classical-music-source-separation.pdf?_iepl%5BhomeFeedViewId%5D=WchoMnlUL1Hk9hBLVTeR8Amh&_iepl%5Bcontexts%5D%5B0%5D=pcfhf&_iepl%5BinteractionType%5D=publicationDownload&origin=publication_detail&ev=pub_int_prw_xdl&msrp=p3lQ8M4uZlb4TF5Hv9a2U3P2y4wW7ant5KWj4E5-OcD1Mg53p1ykTKHMG9_zVTB9n6mI8fvZOCL2Xhpru186pCEY-2ZxiYR-CB8_QvwHc1kUG-QE4SHdProR.LoJb2BDOiiQth3iR9xgZUxxCWEJgtTBF4whFrFa01OD49-3YYRxA0WQVN--zhtQU_7C2Pt0rKdwoFxT1pfxFvnKXSXmy2eT1Jpz-pw.U1QLoFO_Uc6aQVr2Nm2FcAi6BqAUfngH2Or5__6wegbCgVvTYoIGt22tmCkYbGTOQ_4PxBgt1LrvsFQiL0oMyogP8Yk8myTj0gs9jw.fGpkufGqAI4R2v8Hfe0ThcXL7M7yN2PuAlx974BGVn50SdUWvNhhIPWBD-zWTn8NKtVJx3XrjKXFrMgi9Cx7qGrNP8tBWpha6Srf6g},
  month = {Jul.},
  organization = {Aalto University},
  pages = {227},
  task = {Source separation},
  title = {Generating data to train convolutional neural networks for classical music source separation},
  year = {2017}
}

@inproceedings{Miron2017b,
  address = {Suzhou, China},
  architecture = {CNN},
  author = {Miron, Marius and Janer, Jordi and Gómez, Emilia},
  booktitle = {ISMIR},
  code = {https://github.com/MTG/DeepConvSep},
  dataset = {[Bach10](http://music.cs.northwestern.edu/data/Bach10.html)},
  link = {https://www.researchgate.net/profile/Marius_Miron/publication/318637038_Monaural_score-informed_source_separation_for_classical_music_using_convolutional_neural_networks/links/597327c6458515e26dfdb007/Monaural-score-informed-source-separation-for-classical-music-using-convolutional-neural-networks.pdf?_iepl%5BhomeFeedViewId%5D=WchoMnlUL1Hk9hBLVTeR8Amh&_iepl%5Bcontexts%5D%5B0%5D=pcfhf&_iepl%5BinteractionType%5D=publicationDownload&origin=publication_detail&ev=pub_int_prw_xdl&msrp=Hp6dDqMepEiRZ5E6WkreaqyjFkFkwMxPFoJvr14etVJsoKZBc5qb99fBnJjVUZrRHLFRhaXvNY9k1sMvYPOouuGbQP0YhEGm28zLw_55Zewu86WGnHck1Tqi.93HH2WqXfTedn6IaZRjjhQGYZVDHBz1X6nr4ABBgMAVv584gvGN3sW5IyBAY-4MBWf5DJFPBGm8zsaC2dKz8G-odZPfosWoXY0afAQ.KoCP2mO9l31lCER0oMZMZBrbuRGvb6ZzeBwHb88pL8AhMfJk03Hj1eLrohQIjPDETBj4hhqb0gniDGJgtZ9GnW64ZNjh9GbQDrIl5A.egNQTyC7t8P26zCQWrbEhf51Pxy2JRBZoTkH6SpRHHhRhFl1_AT_AT481lMcFI34-JbeRq-5oTQR7DpvAuw7iUIivd78ltuxpI9syg},
  task = {Source separation},
  title = {Monaural score-informed source separation for classical music using convolutional neural networks},
  year = {2017}
}

@inproceedings{Oramas2017b,
  address = {Suzhou, China},
  architecture = {CNN},
  author = {Oramas, Sergio and Nieto, Oriol and Barbieri, Francesco and Serra, Xavier},
  batch = {No},
  booktitle = {ISMIR},
  code = {https://github.com/sergiooramas/tartarus},
  dataset = {[MSD](https://labrosa.ee.columbia.edu/millionsong/)},
  link = {https://ismir2017.smcnus.org/wp-content/uploads/2017/10/126_Paper.pdf},
  month = {Oct.},
  task = {MGR},
  title = {Multi-label music genre classification from audio, text, and images using deep features},
  year = {2017}
}

@inproceedings{Oramas2017a,
  architecture = {CNN},
  author = {Oramas, Sergio and Nieto, Oriol and Sordo, Mohamed and Serra, Xavier},
  batch = {32},
  booktitle = {DLRS},
  code = {https://github.com/sergiooramas/tartarus},
  dataset = {[MSD](https://labrosa.ee.columbia.edu/millionsong/)},
  link = {https://arxiv.org/pdf/1706.09739.pdf},
  note = {http://dlrs-workshop.org/wp-content/uploads/2017/09/oramas_music_cold_start_dlrs2017.pdf},
  task = {Recommendation},
  title = {A deep multimodal approach for cold-start music recommendation},
  year = {2017}
}

@inproceedings{Park2017a,
  author = {Park, Hyunsin and Yoo, Chang D},
  batch = {No},
  booktitle = {ICASSP},
  link = {http://ieeexplore.ieee.org/abstract/document/7952660/},
  organization = {IEEE},
  pages = {2766--2770},
  task = {Artist recognition & MGR},
  title = {Melody extraction and detection through LSTM-RNN with harmonic sum loss},
  year = {2017}
}

@unpublished{Park2017b,
  architecture = {CNN},
  author = {Park, Jiyoung and Lee, Jongpil and Park, Jangyeon and Ha, Jung-Woo and Nam, Juhan},
  dataset = {[MSD](https://labrosa.ee.columbia.edu/millionsong/) & [GTzan](http://marsyas.info/downloads/datasets.html) & [Magnatagatune](http://mirg.city.ac.uk/codeapps/the-magnatagatune-dataset)},
  link = {https://arxiv.org/pdf/1710.06648.pdf},
  pages = {1--5},
  title = {Representation learning of music using artist labels},
  year = {2017}
}

@inproceedings{Pfalz2017,
  author = {Pfalz, A and Berdahl, E},
  booktitle = {IWDLM},
  code = {https://www.cct.lsu.edu/~apfalz/inverse_control.html},
  link = {http://dorienherremans.com/dlm2017/papers/pfalz2017synthesis.pdf},
  title = {Toward inverse control of physics-based sound synthesis},
  year = {2017}
}

@techreport{Phan2017,
  architecture = {CNN & DNN},
  author = {Phan, Huy and Krawczyk-Becker, Martin and Gerkmann, Timo and Mertins, Alfred},
  link = {https://arxiv.org/pdf/1708.03211.pdf},
  task = {Event recognition},
  title = {DNN and CNN with weighted and multi-task loss functions for audio event detection},
  year = {2017}
}

@inproceedings{Pons2017a,
  address = {Suzhou, China},
  author = {Pons, Jordi and Gong, Rong and Serra, Xavier},
  batch = {128},
  booktitle = {ISMIR},
  code = {https://github.com/ronggong/jingjuSyllabicSegmentaion/tree/v0.1.0},
  link = {https://ismir2017.smcnus.org/wp-content/uploads/2017/10/46_Paper.pdf},
  month = {Oct.},
  optimizer = {Adam},
  task = {Syllable segmentation},
  title = {Score-informed syllable segmentation for a cappella singing voice with convolutional neural networks},
  year = {2017}
}

@inproceedings{Pons2017d,
  activation = {ReLU},
  address = {Long Beach, CA, USA},
  architecture = {CNN},
  author = {Pons, Jordi and Nieto, Oriol and Prockup, Matthew and Schmidt, Erik M. and Ehmann, Andreas F. and Serra, Xavier},
  batch = {16},
  booktitle = {[NIPS_ML4Audio](https://nips.cc/Conferences/2017/Schedule?showEvent=8790)},
  code = {https://github.com/jordipons/music-audio-tagging-at-scale-models},
  dataaugmentation = {No},
  dataset = {Inhouse},
  epochs = {No},
  framework = {Tensorflow},
  gpu = {No},
  learningrate = {0.001},
  link = {https://arxiv.org/pdf/1711.02520.pdf},
  month = {Dec.},
  note = {Demo: http://www.jordipons.me/apps/music-audio-tagging-at-scale-demo/ and 15sec moving window to process the whole song},
  optimizer = {Adam},
  pages = {1--5},
  task = {General audio classification},
  title = {End-to-end learning for music audio tagging at scale},
  year = {2017}
}

@inproceedings{Pons2017c,
  address = {New Orleans, USA},
  architecture = {CNN},
  author = {Pons, Jordi and Serra, Xavier},
  booktitle = {ICASSP},
  code = {https://github.com/jordipons/ICASSP2017},
  dataset = {[Ballroom](http://mtg.upf.edu/ismir2004/contest/tempoContest/node5.html)},
  link = {http://ieeexplore.ieee.org/document/7952601/},
  task = {MGR},
  title = {Designing efficient architectures for modeling temporal features with convolutional neural networks},
  year = {2017}
}

@inproceedings{Pons2017b,
  architecture = {CNN},
  author = {Pons, Jordi and Slizovskaia, Olga and Gong, Rong and Gómez, Emilia and Serra, Xavier},
  booktitle = {EUSIPCO},
  code = {https://github.com/jordipons/EUSIPCO2017},
  link = {https://github.com/ronggong/EUSIPCO2017},
  title = {Timbre analysis of music audio signals with convolutional neural networks},
  year = {2017}
}

@inproceedings{Ramirez2017,
  address = {Salford, UK},
  architecture = {DAE},
  author = {Ramírez, Marco A. Martínez and Reiss, Joshua D.},
  booktitle = {WIMP},
  code = {No},
  dataaugmentation = {No},
  dataset = {[Open Multitrack Testbed](http://www.semanticaudio.co.uk/projects/omtb/)},
  link = {http://www.semanticaudio.co.uk/wp-content/uploads/2017/09/WIMP2017_Martinez-RamirezReiss.pdf},
  month = {Sep.},
  optimizer = {Adam},
  task = {Mixing},
  title = {Deep learning and intelligent audio mixing},
  year = {2017}
}

@phdthesis{Schlueter2017,
  author = {Schlüter, Jan},
  link = {http://ofai.at/~jan.schlueter/pubs/phd/phd.pdf},
  title = {Deep learning for event detection, sequence labelling and similarity estimation in music signals},
  year = {2017}
}

@inproceedings{Senac2017,
  architecture = {CNN},
  author = {Senac, Christine and Pellegrini, Thomas and Mouret, Florian and Pinquier, Julien},
  booktitle = {CBMI},
  dataset = {[GTzan](http://marsyas.info/downloads/datasets.html)},
  input = {Spectrograms & common audio features},
  link = {https://www.researchgate.net/profile/Thomas_Pellegrini/publication/319326354_Music_Feature_Maps_with_Convolutional_Neural_Networks_for_Music_Genre_Classification/links/59ba5ae3458515bb9c4c6724/Music-Feature-Maps-with-Convolutional-Neural-Networks-for-Music-Genre-Classification.pdf?origin=publication_detail&ev=pub_int_prw_xdl&msrp=wzXuHZAa5zAnqEmErYyZwIRr2H0q01LnNEd4Wd7A15CQfdVLwdy98pmE-AdnrDvoc3-bVENSFrHt0yhaOiE2mQrYllVS9CJZOk-c9R0j_R1rbgcZugS6RtQ_.AUjPuJSF5P_DMngf-woH7W-7jdnQlbNQziR4_h6NnCHfR_zGcEa8vOyyOz5gx5nc4azqKTPQ5ZgGGLUxkLj1qCQLEQ5ThkhGlWHLyA.s6MBZE20-EO_RjRGCOCV4wk0WSFdN56Aloiraxz9hKCbJwRM2Et27RHVUA8jj9H8qvXIB6f7zSIrQgjXGrL2yCpyQlLffuf57rzSwg.KMMXbZrHsihV8DJM53xkHAWf3VebCJESi4KU4btNv9nQsyK2KnkhSQaTILKv0DSZY3c70a61LzywCBuoHtIhVOFhW5hVZN2n5O9uKQ},
  organization = {ACM},
  pages = {19},
  task = {MGR},
  title = {Music feature maps with convolutional neural networks for music genre classification},
  year = {2017}
}

@inproceedings{Southall2017,
  architecture = {CNN & BRNN},
  author = {Southall, Carl and Stables, Ryan and Hockman, Jason},
  booktitle = {ISMIR},
  code = {https://github.com/CarlSouthall/ADTLib},
  dataset = {[IDMT-SMT-Drums](https://www.idmt.fraunhofer.de/en/business_units/m2d/smt/drums.html)},
  link = {https://carlsouthall.files.wordpress.com/2017/12/ismir2017adt.pdf},
  task = {Transcription},
  title = {Automatic drum transcription for polyphonic recordings using soft attention mechanisms and convolutional neural networks},
  year = {2017}
}

@unpublished{Stoller2017,
  activation = {ReLU & Leaky ReLU},
  architecture = {CNN & U-Net},
  author = {Stoller, Daniel and Ewert, Sebastian and Dixon, Simon},
  batch = {No},
  code = {No},
  dataaugmentation = {No},
  dataset = {[iKala](http://mac.citi.sinica.edu.tw/ikala/) & [MedleyDB](http://medleydb.weebly.com/) & [DSD100](http://sisec17.audiolabs-erlangen.de/#/dataset) & [CCMixter](https://members.loria.fr/ALiutkus/kam/)},
  epochs = {6},
  gpu = {NVIDIA GTX1080},
  link = {https://arxiv.org/pdf/1711.00048.pdf},
  month = {Oct.},
  note = {Follows U-Net architecture},
  optimizer = {Adam},
  pages = {1--5},
  reproducible = {No},
  task = {Source separation},
  title = {Adversarial semi-supervised audio source separation applied to singing voice extraction},
  year = {2017}
}

@article{Sturm2017,
  author = {Sturm, Bob L. and Ben-Tal, Oded},
  code = {https://github.com/IraKorshunova/folk-rnn},
  journal = {[Journal of Creative Music Systems](http://jcms.org.uk/)},
  link = {http://jcms.org.uk/issues/Vol2Issue1/taking-models-back-to-music-practice/Taking%20the%20Models%20back%20to%20Music%20Practice:%20Evaluating%20Generative%20Transcription%20Models%20built%20using%20Deep%20Learning.pdf},
  number = {1},
  publisher = {University of Huddersfield Press},
  task = {Composition},
  title = {Taking the models back to music practice: Evaluating generative transcription models built using deep learning},
  volume = {2},
  year = {2017}
}

@inproceedings{Teng2017,
  address = {Suzhou, China},
  author = {Teng, Yifei and Zhao, An and Goudeseune, Camille},
  booktitle = {ISMIR},
  link = {https://ismir2017.smcnus.org/wp-content/uploads/2017/10/178_Paper.pdf},
  month = {Oct.},
  note = {https://goo.gl/VezNNA and https://composing.ai/tsne},
  pages = {657--663},
  task = {Composition},
  title = {Generating nontrivial melodies for music as a service},
  year = {2017}
}

@unpublished{Thickstun2017,
  activation = {No},
  architecture = {CNN},
  author = {Thickstun, John and Harchaoui, Zaid and Foster, Dean and Kakade, Sham M.},
  batch = {150},
  code = {https://github.com/jthickstun/thickstun2018invariances/},
  dataaugmentation = {Pitch shift integer in [-5, 5] semitones and continuous in [-0.1, 0.1]},
  dataset = {[MusicNet](https://homes.cs.washington.edu/~thickstn/musicnet.html)},
  epochs = {No},
  framework = {Tensorflow},
  gpu = {1 NVIDIA 1080Ti},
  learningrate = {No},
  link = {https://arxiv.org/pdf/1711.04845.pdf},
  month = {Nov.},
  optimizer = {No},
  pages = {1--6},
  task = {Transcription},
  title = {Invariances and data augmentation for supervised music transcription},
  year = {2017}
}

@inproceedings{Tsaptsinos2017,
  address = {Suzhou, China},
  architecture = {HAN},
  author = {Tsaptsinos, Alexandros},
  booktitle = {ISMIR},
  code = {https://github.com/alexTsaptsinos/lyricsHAN},
  dataset = {[LyricFind](http://lyricfind.com/)},
  framework = {Tensorflow},
  link = {https://ismir2017.smcnus.org/wp-content/uploads/2017/10/43_Paper.pdf},
  loss = {cross-entropy},
  pages = {694--701},
  task = {MGR},
  title = {Lyrics-based music genre classification using a hierarchical attention network},
  year = {2017}
}

@unpublished{Valin2017,
  architecture = {RNN},
  author = {Valin, Jean-Marc},
  code = {https://github.com/xiph/rnnoise/},
  dataaugmentation = {No},
  dataset = {[TSP](http://www-mmsp.ece.mcgill.ca/Documents/Data/) & [NTT MLS](http://www.ntt-at.com/product/speech/)},
  input = {BFCC (22), 1st and 2nd derivatives of first 6 BFCCs, 6 coefficients of DCT of pitch correlation, pitch period, spectral non-stationary metric},
  journal = {arXiv preprint arXiv:1709.08243},
  layers = {4},
  link = {https://arxiv.org/pdf/1709.08243.pdf},
  loss = {Custom},
  task = {Noise suppression},
  title = {A hybrid DSP/deep learning approach to real-time full-band speech enhancement},
  year = {2017}
}

@phdthesis{Velarde2017,
  author = {Velarde, Gissel},
  link = {http://vbn.aau.dk/files/260308151/PHD_Gissel_Velarde_E_pdf.pdf},
  school = {Aalborg Universitetsforlag},
  title = {Convolutional methods for music analysis},
  year = {2017}
}

@inproceedings{Vrysis2017,
  architecture = {ANN},
  author = {Vrysis, Lazaros and Tsipas, Nikolaos and Dimoulas, Charalampos and Papanikolaou, George},
  booktitle = {Audio Engineering Society Convention},
  link = {http://www.aes.org/e-lib/browse.cfm?elib=18682},
  organization = {Audio Engineering Society},
  title = {Extending temporal feature integration for semantic audio analysis},
  year = {2017}
}

@inproceedings{Wang2017b,
  architecture = {CNN},
  author = {Wang, Chien-Yao and Santoso, Andri and Mathulaprangsan, Seksan and Chiang, Chin-Chin and Wu, Chung-Hsien and Wang, Jia-Ching},
  booktitle = {ICME},
  link = {http://ieeexplore.ieee.org/abstract/document/8019552/},
  organization = {IEEE},
  pages = {589--594},
  task = {Event recognition},
  title = {Recognition and retrieval of sound events using sparse coding convolutional neural network},
  year = {2017}
}

@article{Wang2017a,
  author = {Wang, Qi and Zhou, Ruohua and Yan, Yonghong},
  journal = {[Applied Sciences](http://www.mdpi.com/journal/applsci)},
  link = {http://www.mdpi.com/2076-3417/7/9/901/htm},
  number = {9},
  pages = {901},
  publisher = {Multidisciplinary Digital Publishing Institute},
  task = {Transcription},
  title = {A two-stage approach to note-level transcription of a specific piano},
  volume = {7},
  year = {2017}
}

@unpublished{Wu2017,
  activation = {ReLU},
  architecture = {CNN & RNN & MLP & AlexNet & ResNet},
  author = {Wu, Yuzhong and Lee, Tan},
  code = {No},
  dataaugmentation = {No},
  dataset = {[AudioSet](https://research.google.com/audioset/index.html) & [TUT Acoustic Scenes 2016](http://www.cs.tut.fi/~mesaros/pubs/mesaros_eusipco2016-dcase.pdf)},
  link = {https://arxiv.org/pdf/1711.00229.pdf},
  month = {Nov.},
  optimizer = {Adam},
  pages = {1--5},
  reproducible = {No},
  task = {General audio classification},
  title = {Reducing model complexity for DNN based large-scale audio classification},
  year = {2017}
}

@inproceedings{Wyse2017,
  architecture = {CNN},
  author = {Wyse, Lonce},
  booktitle = {IWDLM},
  code = {http://lonce.org/research/audioST/},
  link = {http://dorienherremans.com/dlm2017/papers/wyse2017spect.pdf},
  task = {Review & Comparison},
  title = {Audio spectrogram representations for processing with convolutional neural networks},
  year = {2017}
}

@article{Xu2017c,
  author = {Xu, Yong and Huang, Qiang and Wang, Wenwu and Foster, Peter and Sigtia, Siddharth and Jackson, Philip J. B. and Plumbley, Mark D.},
  journal = {[IEEE/ACM Transactions on Audio, Speech, and Language Processing](https://signalprocessingsociety.org/publications-resources/ieeeacm-transactions-audio-speech-and-language-processing/ieeeacm)},
  link = {https://arxiv.org/pdf/1607.03681.pdf},
  number = {6},
  pages = {1230--1241},
  publisher = {IEEE},
  title = {Unsupervised feature learning based on deep models for environmental audio tagging},
  volume = {25},
  year = {2017}
}

@inproceedings{Xu2017a,
  architecture = {CRNN},
  author = {Xu, Yong and Kong, Qiuqiang and Huang, Qiang and Wang, Wenwu and Plumbley, Mark D.},
  booktitle = {INTERSPEECH},
  code = {https://github.com/yongxuUSTC/att_loc_cgrnn},
  doi = {10.21437/Interspeech.2017-486},
  link = {https://arxiv.org/pdf/1703.06052.pdf},
  note = {https://sites.google.com/view/xuyong/demos/attention_model},
  pages = {3083--3087},
  task = {DCASE 2016 Task 4 Domestic audio tagging},
  title = {Attention and localization based on a deep convolutional recurrent model for weakly supervised audio tagging},
  year = {2017}
}

@techreport{Xu2017b,
  author = {Xu, Yong and Kong, Qiuqiang and Wang, Wenwu and Plumbley, Mark D.},
  code = {https://github.com/yongxuUSTC/dcase2017_task4_cvssp},
  link = {https://www.cs.tut.fi/sgn/arg/dcase2017/documents/challenge_technical_reports/DCASE2017_Xu_146.pdf},
  task = {Event recognition},
  title = {Surrey-CVSSP system for DCASE2017 challenge task4},
  year = {2017}
}

@inproceedings{Ycart2017,
  architecture = {RNN-LSTM},
  author = {Ycart, Adrien and Benetos, Emmanouil},
  booktitle = {ISMIR},
  code = {http://www.eecs.qmul.ac.uk/~ay304/code/ismir17},
  dataaugmentation = {Pitch shift},
  dataset = {Inhouse & [Piano-midi.de](Piano-midi.de)},
  link = {https://qmro.qmul.ac.uk/xmlui/handle/123456789/24946},
  task = {Polyphonic music sequence modelling},
  title = {A study on LSTM networks for polyphonic music sequence modelling},
  year = {2017}
}

@inproceedings{Dong2018,
  activation = {ReLU & Leaky ReLU},
  architecture = {GAN & CNN},
  author = {Dong, Hao-Wen and Hsiao, Wen-Yi and Yang, Li-Chia and Yang, Yi-Hsuan},
  batch = {No},
  booktitle = {AAAI},
  code = {https://github.com/salu133445/musegan},
  computationtime = {24 hours},
  dataaugmentation = {No},
  dataset = {[Lakh Pianoroll Datase](https://github.com/salu133445/musegan/blob/master/docs/dataset.md)},
  dimension = {1D},
  dropout = {No},
  epochs = {No},
  framework = {No},
  gpu = {1 Tesla K40m},
  input = {Piano-roll},
  layers = {9},
  learningrate = {No},
  link = {https://arxiv.org/pdf/1709.06298.pdf},
  loss = {No},
  metric = {No},
  month = {Nov.},
  note = {5 metrics proposed for evaluation},
  optimizer = {Adam},
  pages = {1--13},
  reproducible = {No},
  task = {Composition},
  title = {MuseGAN: Multi-track sequential generative adversarial networks for symbolic music generation and accompaniment},
  year = {2018}
}

@unpublished{Huang2018,
  activation = {No},
  address = {No},
  architecture = {Transformer & RNN & tensor2tensor},
  author = {Huang, Cheng-Zhi Anna and Vaswani, Ashish and Uszkoreit, Jakob and Shazeer, Noam and Simon, Ian and Hawthorne, Curtis and Dai, Andrew M. and Hoffman, Matthew D. and Dinculescu, Monica and Eck, Douglas},
  batch = {1},
  computationtime = {No},
  dataaugmentation = {Time Stretches & pitch transcription},
  dataset = {[J.S. Bach chorales dataset](https://github.com/czhuang/JSB-Chorales-dataset) & [Piano-e-Competition dataset (competition history)](http://www.piano-e-competition.com/)},
  dimension = {1D},
  dropout = {0.1},
  epochs = {No},
  framework = {No},
  gpu = {No},
  input = {MIDI},
  layers = {4 & 5 & 6},
  learningrate = {0.1},
  link = {https://arxiv.org/pdf/1809.04281.pdf},
  loss = {No},
  metric = {Negative Log-likelihood},
  momentum = {},
  note = {{Submitted to ICLR 2019 Conference Paper1531 Area Chair1 cf https://openreview.net/forum?id=rJe4ShAcF7}
code = {No}},
  optimizer = {No},
  pages = {1--14},
  reproducible = {No},
  task = {Polyphonic music sequence modelling},
  title = {Music transformer: Generating music with long-term structure},
  year = {2018}
}

@inproceedings{Li2018,
  activation = {No},
  address = {Montreal, Canada},
  architecture = {CNN & RNN},
  author = {Li, Juncheng and Qu, Shuhui and Wang, Yun and Li, Xinjian and Das, Samarjit and Metze, Florian},
  batch = {8},
  booktitle = {[NIPS](https://nips.cc/)},
  code = {No},
  computationtime = {No},
  dataaugmentation = {No},
  dataset = {[MAPS](http://www.tsi.telecom-paristech.fr/aao/en/2010/07/08/maps-database-a-piano-database-for-multipitch-estimation-and-automatic-transcription-of-music/)},
  dimension = {2D},
  dropout = {No},
  epochs = {No},
  gpu = {No},
  input = {Log Mel-spectrogram with 48 bins per octave and 512 hop-size and 2018 window size and 16 kHz sample rate},
  layers = {4},
  learningrate = {0.0006},
  link = {https://nips2018creativity.github.io/doc/music_theory_inspired_policy_gradient.pdf},
  loss = {binary cross-entropy},
  metric = {Precision & Recall & F1},
  momentum = {No},
  month = {Dec.},
  note = {{RL Reinforcement Learning}
framework = {No}},
  optimizer = {Adam},
  pages = {1--10},
  reproducible = {No},
  task = {Transcription},
  title = {Music theory inspired policy gradient method for piano music transcription},
  year = {2018}
}

@inproceedings{Hawthorne2019,
  activation = {No},
  address = {New Orleans, USA},
  architecture = {No},
  author = {Hawthorne, Curtis and Stasyuk, Andriy and Roberts, Adam and Simon, Ian and Huang, Cheng-Zhi Anna and Dieleman, Sander and Elsen, Erich and Engel, Jesse and Eck, Douglas},
  batch = {No},
  booktitle = {ICLR},
  code = {No},
  computationtime = {No},
  dataaugmentation = {pitch-shift {+-0.1 semitones} & compression {0 - 100} & EQ {32 - 4096} & Reverb & {0 - 70} & Pink-noise {0 - 0.04}},
  dataset = {[MAESTRO](https://magenta.tensorflow.org/datasets/maestro/)},
  dimension = {1D},
  dropout = {No},
  epochs = {No},
  framework = {No},
  gpu = {No},
  input = {MIDI},
  layers = {6},
  learningrate = {No},
  link = {https://arxiv.org/abs/1810.12247},
  loss = {No},
  metric = {MSE & Precision & Recall & F1},
  momentum = {No},
  month = {May},
  note = {Wave2Midi2Wave},
  optimizer = {No},
  pages = {1--12},
  reproducible = {No},
  task = {Transcription},
  title = {Enabling factorized piano music modeling and generation with the MAESTRO dataset},
  year = {2019}
}

@unpublished{Child2019,
  activation = {No},
  architecture = {Transformer},
  author = {Rewon Child and Scott Gray and Alec Radford and Ilya Sutskever},
  batch = {No},
  code = {https://github.com/openai/sparse_attention},
  dataaugmentation = {No},
  dataset = {[413 hours of recorded solo piano music](http://papers.nips.cc/paper/8023-the-challenge-of-realistic-music-generation-modelling-raw-audio-at-scale-supplemental.zip)},
  dimension = {1D},
  dropout = {0.25},
  epochs = {120},
  framework = {Tensorflow},
  gpu = {8 NVIDIA Tesla V100},
  input = {Raw Audio},
  layers = {128},
  learningrate = {0.00035},
  link = {https://arxiv.org/pdf/1904.10509.pdf},
  loss = {No},
  metric = {Human listening},
  momentum = {No},
  note = {this paper is mainly about how sparse transformer are implemented},
  optimizer = {Adam},
  pages = {8--9},
  reproducible = {No},
  task = {Audio generation},
  title = {Generating Long Sequences with Sparse Transformers},
  year = {2019}
}