-  Hayashi, T., Watanabe, S., Toda, T., Hori, T., Le Roux, J., Takeda, K., "BLSTM-HMM Hybrid System Combined with Sound Activity Detection Network for Polyphonic Sound Event Detection", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
BibTeX   TR2017-014 PDF- @inproceedings{Hayashi2017mar,
 - author = {Hayashi, Tomoki and Watanabe, Shinji and Toda, Tomoki and Hori, Takaaki and {Le Roux}, Jonathan and Takeda, Kazuya},
 - title = {{BLSTM-HMM Hybrid System Combined with Sound Activity Detection Network for Polyphonic Sound Event Detection}},
 - booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
 - year = 2017,
 - month = mar,
 - url = {https://www.merl.com/publications/TR2017-014}
 - }
 
  -  Kim, S., Hori, T., Watanabe, S., "Joint CTC- Attention Based End-to-End Speech Recognition Using Multi-task Learning", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
BibTeX   TR2017-016 PDF  Video- @inproceedings{Kim2017mar,
 - author = {Kim, Suyoun and Hori, Takaaki and Watanabe, Shinji},
 - title = {{Joint CTC- Attention Based End-to-End Speech Recognition Using Multi-task Learning}},
 - booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
 - year = 2017,
 - month = mar,
 - url = {https://www.merl.com/publications/TR2017-016}
 - }
 
  -  Luo, Y., Chen, Z., Hershey, J.R., Le Roux, J., Mesgarani, N., "Deep Clustering and Conventional Networks for Music Separation: Strong Together", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
BibTeX   TR2017-010 PDF- @inproceedings{Luo2017mar,
 - author = {Luo, Yi and Chen, Zhuo and Hershey, John R. and {Le Roux}, Jonathan and Mesgarani, Nima},
 - title = {{Deep Clustering and Conventional Networks for Music Separation: Strong Together}},
 - booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
 - year = 2017,
 - month = mar,
 - url = {https://www.merl.com/publications/TR2017-010}
 - }
 
  -  Meng, Z., Watanabe, S., Hershey, J.R., Erdogan, H., "Deep Long Short-Term Memory Adaptive Beamforming Networks for Multichannel Robust Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
BibTeX   TR2017-012 PDF- @inproceedings{Meng2017mar,
 - author = {Meng, Zhong and Watanabe, Shinji and Hershey, John R. and Erdogan, Hakan},
 - title = {{Deep Long Short-Term Memory Adaptive Beamforming Networks for Multichannel Robust Speech Recognition}},
 - booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
 - year = 2017,
 - month = mar,
 - url = {https://www.merl.com/publications/TR2017-012}
 - }
 
  -  Watanabe, S., Hori, T., Le Roux, J., Hershey, J.R., "Student-Teacher Network Learning with Enhanced Features", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
BibTeX   TR2017-011 PDF- @inproceedings{Watanabe2017mar,
 - author = {Watanabe, Shinji and Hori, Takaaki and {Le Roux}, Jonathan and Hershey, John R.},
 - title = {{Student- Teacher Network Learning with Enhanced Features}},
 - booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
 - year = 2017,
 - month = mar,
 - url = {https://www.merl.com/publications/TR2017-011}
 - }
 
  -  Xiao, X., Watanabe, S., Chng, E.S., Li, H., "Beamforming Networks Using Spatial Covariance Features for Far-field Speech Recognition", Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)<br />
, DOI: 10.1109/APSIPA.2016.7820724, December 2016.
BibTeX   TR2016-162 PDF- @inproceedings{Xiao2016dec,
 - author = {Xiao, Xiong and Watanabe, Shinji and Chng, Eng Siong and Li, Haizhou},
 - title = {{Beamforming Networks Using Spatial Covariance Features for Far-field Speech Recognition}},
 - booktitle = {Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)
}, - year = 2016,
 - month = dec,
 - doi = {10.1109/APSIPA.2016.7820724},
 - url = {https://www.merl.com/publications/TR2016-162}
 - }
 
  -  Hori, T., Wang, H., Hori, C., Watanabe, S., Harsham, B.A., Le Roux, J., Hershey, J.R., Koji, Y., Jing, Y., Zhu, Z., Aikawa, T., "Dialog State Tracking with Attention-based Sequence-to-sequence Learning", IEEE Workshop on Spoken Language Technology (SLT), DOI: 10.1109/SLT.2016.7846317, December 2016, pp. 552-558.
BibTeX   TR2016-163 PDF- @inproceedings{Hori2016dec,
 - author = {Hori, Takaaki and Wang, Hai and Hori, Chiori and Watanabe, Shinji and Harsham, Bret A. and {Le Roux}, Jonathan and Hershey, John R. and Koji, Yusuke and Jing, Yi and Zhu, Zhaocheng and Aikawa, Takeyuki},
 - title = {{Dialog State Tracking with Attention-based Sequence-to-sequence Learning}},
 - booktitle = {IEEE Workshop on Spoken Language Technology (SLT)},
 - year = 2016,
 - pages = {552--558},
 - month = dec,
 - doi = {10.1109/SLT.2016.7846317},
 - url = {https://www.merl.com/publications/TR2016-163}
 - }
 
  -  Takano, T., Moriya, T., Shinozaki, T., Watanabe, S., Hori, T., Duh, K., "Automated Structure Discovery and Parameter Tuning of Neural Network Language Model Based on Evolution Strategy", IEEE Spoken Language Technology Workshop (SLT), DOI: 10.1109/SLT.2016.7846334, December 2016.
BibTeX   TR2016-173 PDF- @inproceedings{Takano2016dec,
 - author = {Takano, Tomihiro and Moriya, Takafumi and Shinozaki, Takahiro and Watanabe, Shinji and Hori, Takaaki and Duh, Kevin},
 - title = {{Automated Structure Discovery and Parameter Tuning of Neural Network Language Model Based on Evolution Strategy}},
 - booktitle = {IEEE Spoken Language Technology Workshop (SLT)},
 - year = 2016,
 - month = dec,
 - doi = {10.1109/SLT.2016.7846334},
 - url = {https://www.merl.com/publications/TR2016-173}
 - }
 
  -  Barker, J., Marxer, R., Vincent, E., Watanabe, S., "The Third 'CHIME' Speech Separation and Recognition Challenge: Analysis and Outcomes", Computer Speech & Language, DOI: 10.1016/j.csl.2016.10.005, December 2016.
BibTeX   TR2016-171 PDF- @article{Barker2016dec,
 - author = {Barker, Jon and Marxer, Ricard and Vincent, Emmanuel and Watanabe, Shinji},
 - title = {{The Third 'CHIME' Speech Separation and Recognition Challenge: Analysis and Outcomes}},
 - journal = {Computer Speech \& Language},
 - year = 2016,
 - month = dec,
 - publisher = {Elsevier},
 - doi = {10.1016/j.csl.2016.10.005},
 - url = {https://www.merl.com/publications/TR2016-171}
 - }
 
  -  Wisdom, S., Powers, T., Hershey, J.R., Le Roux, J., Atlas, L., "Full-Capacity Unitary Recurrent Neural Networks", Advances in Neural Information Processing Systems (NIPS), December 2016.
BibTeX   TR2016-155 PDF- @inproceedings{Wisdom2016dec,
 - author = {Wisdom, Scott and Powers, Thomas and Hershey, John R. and {Le Roux}, Jonathan and Atlas, Les},
 - title = {{Full-Capacity Unitary Recurrent Neural Networks}},
 - booktitle = {Advances in Neural Information Processing Systems (NIPS)},
 - year = 2016,
 - month = dec,
 - url = {https://www.merl.com/publications/TR2016-155}
 - }
 
  -  Vincent, E., Watanabe, S., Nugraha, A.A., Barker, J., Marxer, R., "An analysis of environment, microphone and data simulation mismatches in robust speech recognition", Computer Speech & Language, DOI: 10.1016/j.csl.2016.11.005, December 2016.
BibTeX   TR2016-172 PDF- @article{Vincent2016dec,
 - author = {Vincent, Emmanuel and Watanabe, Shinji and Nugraha, Aditya Arie and Barker, Jon and Marxer, Ricard},
 - title = {{An analysis of environment, microphone and data simulation mismatches in robust speech recognition}},
 - journal = {Computer Speech \& Language},
 - year = 2016,
 - month = dec,
 - publisher = {Elsevier},
 - doi = {10.1016/j.csl.2016.11.005},
 - url = {https://www.merl.com/publications/TR2016-172}
 - }
 
  -  Tawara, N., Ogawa, T., Watanabe, S., Kobayashi, T., "Nested Gibbs sampling for mixture-of-mixture model and its application to speaker clustering", APSIPA Transactions on Signal and Information Processing, DOI: 10.1017/ATSIP.2016.15, Vol. 5, October 2016.
BibTeX   TR2016-138 PDF- @article{Tawara2016oct,
 - author = {Tawara, Naohiro and Ogawa, Tetsuji and Watanabe, Shinji and Kobayashi, Tetsunori},
 - title = {{Nested Gibbs sampling for mixture-of-mixture model and its application to speaker clustering}},
 - journal = {APSIPA Transactions on Signal and Information Processing},
 - year = 2016,
 - volume = 5,
 - month = oct,
 - doi = {10.1017/ATSIP.2016.15},
 - url = {https://www.merl.com/publications/TR2016-138}
 - }
 
  -  Delcroix, M., Watanabe, S., "Recent Advances in Distant Speech Recognition," Tech. Rep. TR2016-115, Interspeech Tutorials, September 2016.
BibTeX   TR2016-115 PDF- @techreport{Delcroix2016sep,
 - author = {Delcroix, Marc and Watanabe, Shinji},
 - title = {{Recent Advances in Distant Speech Recognition}},
 - booktitle = {Interspeech Tutorials},
 - institution = {Interspeech},
 - year = 2016,
 - month = sep,
 - url = {https://www.merl.com/publications/TR2016-115}
 - }
 
  -  Le Roux, J., Vincent, E., Erdogan, H., "Learning-Based Approaches to Speech Enhancement and Separation," Tech. Rep. TR2016-113, Interspeech Tutorials, September 2016.
BibTeX   TR2016-113 PDF- @techreport{LeRoux2016sep,
 - author = {{Le Roux}, Jonathan and Vincent, Emmanuel and Erdogan, Hakan},
 - title = {{Learning- Based Approaches to Speech Enhancement and Separation}},
 - booktitle = {Interspeech Tutorials},
 - year = 2016,
 - month = sep,
 - url = {https://www.merl.com/publications/TR2016-113}
 - }
 
  -  Erdogan, H., Hershey, J.R., Watanabe, S., Mandel, M., Le Roux, J., "Improved MVDR beamforming using single-channel mask prediction networks", Interspeech, DOI: 10.21437/Interspeech.2016-552, September 2016, pp. 1981-1985.
BibTeX   TR2016-072 PDF- @inproceedings{Erdogan2016sep,
 - author = {Erdogan, Hakan and Hershey, John R. and Watanabe, Shinji and Mandel, Michael and {Le Roux}, Jonathan},
 - title = {{Improved MVDR beamforming using single-channel mask prediction networks}},
 - booktitle = {Interspeech},
 - year = 2016,
 - pages = {1981--1985},
 - month = sep,
 - doi = {10.21437/Interspeech.2016-552},
 - url = {https://www.merl.com/publications/TR2016-072}
 - }
 
  -  Hori, C., Hori, T., Watanabe, S., Hershey, J.R., "Context-Sensitive and Role-Dependent Spoken Language Understanding using Bidirectional and Attention LSTMs", Interspeech, DOI: 10.21437/Interspeech.2016-1171, September 2016, pp. 3236-3240.
BibTeX   TR2016-074 PDF- @inproceedings{Hori2016sep,
 - author = {Hori, Chiori and Hori, Takaaki and Watanabe, Shinji and Hershey, John R.},
 - title = {{Context-Sensitive and Role-Dependent Spoken Language Understanding using Bidirectional and Attention LSTMs}},
 - booktitle = {Interspeech},
 - year = 2016,
 - pages = {3236--3240},
 - month = sep,
 - doi = {10.21437/Interspeech.2016-1171},
 - url = {https://www.merl.com/publications/TR2016-074}
 - }
 
  -  Isik, Y., Le Roux, J., Chen, Z., Watanabe, S., Hershey, J.R., "Single-Channel Multi-Speaker Separation using Deep Clustering", Interspeech, DOI: 10.21437/Interspeech.2016-1176, September 2016, pp. 545-549.
BibTeX   TR2016-073 PDF- @inproceedings{Isik2016sep,
 - author = {Isik, Yusuf and {Le Roux}, Jonathan and Chen, Zhuo and Watanabe, Shinji and Hershey, John R.},
 - title = {{Single-Channel Multi-Speaker Separation using Deep Clustering}},
 - booktitle = {Interspeech},
 - year = 2016,
 - pages = {545--549},
 - month = sep,
 - doi = {10.21437/Interspeech.2016-1176},
 - url = {https://www.merl.com/publications/TR2016-073}
 - }
 
  -  Zmolikova, K., Karafiat, M., Vesely, K., Delcroix, M., Watanabe, S., Burget, L., Cernocky, J.H., "Data selection by sequence summarizing neural network in mismatch condition training", Interspeech, DOI: 10.21437/Interspeech.2016-741, September 2016, pp. 2354-2358.
BibTeX   TR2016-075 PDF- @inproceedings{Zmolikova2016sep,
 - author = {Zmolikova, Katerina and Karafiat, Martin and Vesely, Karel and Delcroix, Marc and Watanabe, Shinji and Burget, Lukas and Cernocky, Jan, Honza},
 - title = {{Data selection by sequence summarizing neural network in mismatch condition training}},
 - booktitle = {Interspeech},
 - year = 2016,
 - pages = {2354--2358},
 - month = sep,
 - doi = {10.21437/Interspeech.2016-741},
 - url = {https://www.merl.com/publications/TR2016-075}
 - }
 
  -  Hayashi, T., Watanabe, S., Toda, T., Hori, T., Le Roux, J., Takeda, K., "Bidirectional LSTM-HMM Hybrid System for Polyphonic Sound Event Detection", Workshop on Detection and Classification of Acoustic Scenes and Events (DCASE), September 2016, pp. 35-39.
BibTeX   TR2016-114 PDF- @inproceedings{Hayashi2016sep,
 - author = {Hayashi, Tomoki and Watanabe, Shinji and Toda, Tomoki and Hori, Takaaki and {Le Roux}, Jonathan and Takeda, Kazuya},
 - title = {{Bidirectional LSTM-HMM Hybrid System for Polyphonic Sound Event Detection}},
 - booktitle = {Workshop on Detection and Classification of Acoustic Scenes and Events (DCASE)},
 - year = 2016,
 - pages = {35--39},
 - month = sep,
 - url = {https://www.merl.com/publications/TR2016-114}
 - }
 
  -  Hori, C., Watanabe, S., Hori, T., Harsham, B.A., Hershey, J.R., Koji, Y., Fujii, Y., Furumoto, Y., "Driver Confusion Status Detection Using Recurrent Neural Networks", IEEE International Conference on Multimedia and Expo (ICME), DOI: 10.1109/ICME.2016.7552966, July 2016.
BibTeX   TR2016-088 PDF- @inproceedings{Hori2016jul,
 - author = {Hori, Chiori and Watanabe, Shinji and Hori, Takaaki and Harsham, Bret A. and Hershey, John R. and Koji, Yusuke and Fujii, Youichi and Furumoto, Yuki},
 - title = {{Driver Confusion Status Detection Using Recurrent Neural Networks}},
 - booktitle = {IEEE International Conference on Multimedia and Expo (ICME)},
 - year = 2016,
 - month = jul,
 - doi = {10.1109/ICME.2016.7552966},
 - url = {https://www.merl.com/publications/TR2016-088}
 - }
 
  -  Hershey, J.R., Chen, Z., Le Roux, J., Watanabe, S., "Deep Clustering: Discriminative Embeddings for Segmentation and Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2016.7471631, March 2016, pp. 31-35.
BibTeX   TR2016-003 PDF- @inproceedings{Hershey2016mar,
 - author = {Hershey, John R. and Chen, Zhuo and {Le Roux}, Jonathan and Watanabe, Shinji},
 - title = {{Deep Clustering: Discriminative Embeddings for Segmentation and Separation}},
 - booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
 - year = 2016,
 - pages = {31--35},
 - month = mar,
 - doi = {10.1109/ICASSP.2016.7471631},
 - url = {https://www.merl.com/publications/TR2016-003}
 - }
 
  -  Hori, T., Hori, C., Watanabe, S., Hershey, J.R., "Minimum Word Error Training of Long Short-Term Memory Recurrent Neural Network Language Models for Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2016.7472827, March 2016, pp. 5990-5994.
BibTeX   TR2016-011 PDF- @inproceedings{Hori2016mar,
 - author = {Hori, Takaaki and Hori, Chiori and Watanabe, Shinji and Hershey, John R.},
 - title = {{Minimum Word Error Training of Long Short-Term Memory Recurrent Neural Network Language Models for Speech Recognition}},
 - booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
 - year = 2016,
 - pages = {5990--5994},
 - month = mar,
 - doi = {10.1109/ICASSP.2016.7472827},
 - url = {https://www.merl.com/publications/TR2016-011}
 - }
 
  -  Vesely, K., Watanabe, S., Zmolikova, K., Karafiat, M., Burget, L., Cernocky, J.H., "Sequence Summarizing Neural Network for Speaker Adaptation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2016.7472692, March 2016, pp. 5315-5319.
BibTeX   TR2016-001 PDF- @inproceedings{Vesely2016mar,
 - author = {Vesely, Karel and Watanabe, Shinji and Zmolikova, Katerina and Karafiat, Martin and Burget, Lukas and Cernocky, Jan, Honza},
 - title = {{Sequence Summarizing Neural Network for Speaker Adaptation}},
 - booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
 - year = 2016,
 - pages = {5315--5319},
 - month = mar,
 - doi = {10.1109/ICASSP.2016.7472692},
 - url = {https://www.merl.com/publications/TR2016-001}
 - }
 
  -  Wisdom, S., Hershey, J.R., Le Roux, J., Watanabe, S., "Deep Unfolding for Multichannel Source Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2016.7471649, March 2016, pp. 121-125.
BibTeX   TR2016-008 PDF- @inproceedings{Wisdom2016mar,
 - author = {Wisdom, Scott and Hershey, John R. and {Le Roux}, Jonathan and Watanabe, Shinji},
 - title = {{Deep Unfolding for Multichannel Source Separation}},
 - booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
 - year = 2016,
 - pages = {121--125},
 - month = mar,
 - doi = {10.1109/ICASSP.2016.7471649},
 - url = {https://www.merl.com/publications/TR2016-008}
 - }
 
  -  Xiao, X., Watanabe, S., Erdogan, H., Lu, L., Hershey, J., Seltzer, M., Chen, G., Zhang, Y., Mandel, M., Yu, D., "Deep Beamforming Networks for Multi-Channel Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2016.7472778, March 2016, pp. 5745-5749.
BibTeX   TR2016-002 PDF- @inproceedings{Xiao2016mar,
 - author = {Xiao, Xiong and Watanabe, Shinji and Erdogan, Hakan and Lu, Liang and Hershey, John and Seltzer, Mike and Chen, Guoguo and Zhang, Yu and Mandel, Michael and Yu, Dong},
 - title = {{Deep Beamforming Networks for Multi-Channel Speech Recognition}},
 - booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
 - year = 2016,
 - pages = {5745--5749},
 - month = mar,
 - doi = {10.1109/ICASSP.2016.7472778},
 - url = {https://www.merl.com/publications/TR2016-002}
 - }