Publications

314 / 3,599 publications found.


  •  Hori, C., Hori, T., Lee, T.-Y., Zhang, Z., Harsham, B.A., Sumi, K., Marks, T.K., Hershey, J.R., "Attention-Based Multimodal Fusion for Video Description", IEEE International Conference on Computer Vision (ICCV), DOI: 10.1109/​ICCV.2017.450, October 2017.
    BibTeX TR2017-156 PDF
    • @inproceedings{Hori2017oct,
    • author = {Hori, Chiori and Hori, Takaaki and Lee, Teng-Yok and Zhang, Ziming and Harsham, Bret A. and Sumi, Kazuhiko and Marks, Tim K. and Hershey, John R.},
    • title = {Attention-Based Multimodal Fusion for Video Description},
    • booktitle = {IEEE International Conference on Computer Vision (ICCV)},
    • year = 2017,
    • month = oct,
    • doi = {10.1109/ICCV.2017.450},
    • url = {https://www.merl.com/publications/TR2017-156}
    • }
  •  Ochiai, T., Watanabe, S., Katagiri, S., "Does speech enhancement work with end-to-end ASR objectives?: Experimental analysis of multichannel end-to-end ASR", IEEE International Workshop on Machine Learning for Signal Processing (MLSP), DOI: 10.1109/​JSTSP.2017.2764276, October 2017, vol. 11, pp. 1274-1288.
    BibTeX TR2017-139 PDF
    • @inproceedings{Ochiai2017oct,
    • author = {Ochiai, Tsubasa and Watanabe, Shinji and Katagiri, Shigeru},
    • title = {Does speech enhancement work with end-to-end ASR objectives?: Experimental analysis of multichannel end-to-end ASR},
    • booktitle = {IEEE International Workshop on Machine Learning for Signal Processing (MLSP)},
    • year = 2017,
    • volume = 11,
    • number = 8,
    • pages = {1274--1288},
    • month = oct,
    • doi = {10.1109/JSTSP.2017.2764276},
    • url = {https://www.merl.com/publications/TR2017-139}
    • }
  •  Ochiai, T., Watanabe, S., Hori, T., Hershey, J.R., Xiao, X., "Unified Architecture for Multichannel End-to-End Speech Recognition with Neural Beamforming", IEEE Journal of Selected Topics in Signal Processing, DOI: 10.1109/​JSTSP.2017.2764276, Vol. 11, No. 8, pp. 1274-1288, October 2017.
    BibTeX TR2017-192 PDF
    • @article{Ochiai2017oct2,
    • author = {Ochiai, Tsubasa and Watanabe, Shinji and Hori, Takaaki and Hershey, John R. and Xiao, Xiong},
    • title = {Unified Architecture for Multichannel End-to-End Speech Recognition with Neural Beamforming},
    • journal = {IEEE Journal of Selected Topics in Signal Processing},
    • year = 2017,
    • volume = 11,
    • number = 8,
    • pages = {1274--1288},
    • month = oct,
    • doi = {10.1109/JSTSP.2017.2764276},
    • issn = {1941-0484},
    • url = {https://www.merl.com/publications/TR2017-192}
    • }
  •  Watanabe, S., Hori, T., Kim, S., Hershey, J.R., Hayashi, T., "Hybrid CTC/Attention Architecture for End-to-End Speech Recognition", IEEE Journal of Selected Topics in Signal Processing, DOI: 10.1109/​JSTSP.2017.2763455, Vol. 11, No. 8, pp. 1240-1253, October 2017.
    BibTeX TR2017-190 PDF Video
    • @article{Watanabe2017oct,
    • author = {Watanabe, Shinji and Hori, Takaaki and Kim, Suyoun and Hershey, John R. and Hayashi, Tomoki},
    • title = {Hybrid CTC/Attention Architecture for End-to-End Speech Recognition},
    • journal = {IEEE Journal of Selected Topics in Signal Processing},
    • year = 2017,
    • volume = 11,
    • number = 8,
    • pages = {1240--1253},
    • month = oct,
    • doi = {10.1109/JSTSP.2017.2763455},
    • issn = {1941-0484},
    • url = {https://www.merl.com/publications/TR2017-190}
    • }
  •  Magron, P., Le Roux, J., Virtanen, T., "Consistent Anisotropic Wiener Filtering for Audio Source", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/​WASPAA.2017.8170037, October 2017.
    BibTeX TR2017-151 PDF
    • @inproceedings{Magron2017oct,
    • author = {Magron, Paul and Le Roux, Jonathan and Virtanen, Tuomas},
    • title = {Consistent Anisotropic Wiener Filtering for Audio Source},
    • booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
    • year = 2017,
    • month = oct,
    • doi = {10.1109/WASPAA.2017.8170037},
    • url = {https://www.merl.com/publications/TR2017-151}
    • }
  •  Hori, T., Watanabe, S., Zhang, Y., Chan, W., "Advances in Joint CTC-Attention based End-to-End Speech Recognition with a Deep CNN Encoder and RNN-LM", Interspeech, August 2017.
    BibTeX TR2017-132 PDF Video
    • @inproceedings{Hori2017aug,
    • author = {Hori, Takaaki and Watanabe, Shinji and Zhang, Yu and Chan, William},
    • title = {Advances in Joint CTC-Attention based End-to-End Speech Recognition with a Deep CNN Encoder and RNN-LM},
    • booktitle = {Interspeech},
    • year = 2017,
    • month = aug,
    • url = {https://www.merl.com/publications/TR2017-132}
    • }
  •  Shinozaki, T., Watanabe, S., Mochihashi, D., Neubig, G., "Semi-Supervised Learning of a Pronunciation Dictionary from Disjoint Phonemic Transcripts and Text", Interspeech, August 2017.
    BibTeX TR2017-133 PDF
    • @inproceedings{Shinozaki2017aug,
    • author = {Shinozaki, Takahiro and Watanabe, Shinji and Mochihashi, Daichi and Neubig, Graham},
    • title = {Semi-Supervised Learning of a Pronunciation Dictionary from Disjoint Phonemic Transcripts and Text},
    • booktitle = {Interspeech},
    • year = 2017,
    • month = aug,
    • url = {https://www.merl.com/publications/TR2017-133}
    • }
  •  Tachioka, Y., Narita, T., Miura, I., Uramoto, T., Monta, N., Uenohara, S., Furuya, K., Watanabe, S., Le Roux, J., "Coupled initialization of multi-channel non-negative matrix factorization based on spatial and spectral information", Interspeech, August 2017.
    BibTeX TR2017-134 PDF
    • @inproceedings{Tachioka2017aug,
    • author = {Tachioka, Yuuki and Narita, Tomohiro and Miura, Iori and Uramoto, Takanobu and Monta, Natsuki and Uenohara, Shingo and Furuya, Kenichi and Watanabe, Shinji and Le Roux, Jonathan},
    • title = {Coupled initialization of multi-channel non-negative matrix factorization based on spatial and spectral information},
    • booktitle = {Interspeech},
    • year = 2017,
    • month = aug,
    • url = {https://www.merl.com/publications/TR2017-134}
    • }
  •  Hayashi, T., Watanabe, S., Toda, T., Hori, T., Le Roux, J., Takeda, K., "Duration-Controlled LSTM for Polyphonic Sound Event Detection", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/​TASLP.2017.2740002, Vol. 25, No. 11, August 2017.
    BibTeX TR2017-150 PDF
    • @article{Hayashi2017aug,
    • author = {Hayashi, Tomoki and Watanabe, Shinji and Toda, Tomoki and Hori, Takaaki and Le Roux, Jonathan and Takeda, Kazuya},
    • title = {Duration-Controlled LSTM for Polyphonic Sound Event Detection},
    • journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
    • year = 2017,
    • volume = 25,
    • number = 11,
    • month = aug,
    • doi = {10.1109/TASLP.2017.2740002},
    • issn = {2329-9304},
    • url = {https://www.merl.com/publications/TR2017-150}
    • }
  •  Ochiai, T., Watanabe, S., Hori, T., Hershey, J.R., "Multichannel End-to-end Speech Recognition", International Conference on Machine Learning (ICML), August 2017.
    BibTeX TR2017-107 PDF
    • @inproceedings{Ochiai2017aug,
    • author = {Ochiai, Tsubasa and Watanabe, Shinji and Hori, Takaaki and Hershey, John R.},
    • title = {Multichannel End-to-end Speech Recognition},
    • booktitle = {International Conference on Machine Learning (ICML)},
    • year = 2017,
    • month = aug,
    • url = {https://www.merl.com/publications/TR2017-107}
    • }
  •  Hori, T., Watanabe, S., Hershey, J.R., "Joint CTC/attention decoding for end-to-end speech recognition", Association for Computational Linguistics (ACL), DOI: 10.18653/​v1/​P17-1048, July 2017, pp. 518-529.
    BibTeX TR2017-103 PDF Video
    • @inproceedings{Hori2017jul,
    • author = {Hori, Takaaki and Watanabe, Shinji and Hershey, John R.},
    • title = {Joint CTC/attention decoding for end-to-end speech recognition},
    • booktitle = {Association for Computational Linguistics (ACL)},
    • year = 2017,
    • pages = {518--529},
    • month = jul,
    • doi = {10.18653/v1/P17-1048},
    • url = {https://www.merl.com/publications/TR2017-103}
    • }
  •  Watanabe, S., Hori, T., Hayashi, T., Kim, S., "End-to-end ASR without using morphological analyzer, pronunciation dictionary and language model", Acoustical Society of Japan Spring Meeting (ASJ), March 2017.
    BibTeX TR2017-021 PDF
    • @inproceedings{Watanabe2017mar2,
    • author = {Watanabe, Shinji and Hori, Takaaki and Hayashi, Tomoki and Kim, Suyoun},
    • title = {End-to-end ASR without using morphological analyzer, pronunciation dictionary and language model},
    • booktitle = {Acoustical Society of Japan Spring Meeting (ASJ)},
    • year = 2017,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2017-021}
    • }
  •  Hayashi, T., Watanabe, S., Toda, T., Hori, T., Le Roux, J., Takeda, K., "BLSTM-HMM Hybrid System Combined with Sound Activity Detection Network for Polyphonic Sound Event Detection", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
    BibTeX TR2017-014 PDF
    • @inproceedings{Hayashi2017mar,
    • author = {Hayashi, Tomoki and Watanabe, Shinji and Toda, Tomoki and Hori, Takaaki and Le Roux, Jonathan and Takeda, Kazuya},
    • title = {BLSTM-HMM Hybrid System Combined with Sound Activity Detection Network for Polyphonic Sound Event Detection},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2017,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2017-014}
    • }
  •  Kim, S., Hori, T., Watanabe, S., "Joint CTC- Attention Based End-to-End Speech Recognition Using Multi-task Learning", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
    BibTeX TR2017-016 PDF Video
    • @inproceedings{Kim2017mar,
    • author = {Kim, Suyoun and Hori, Takaaki and Watanabe, Shinji},
    • title = {Joint CTC- Attention Based End-to-End Speech Recognition Using Multi-task Learning},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2017,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2017-016}
    • }
  •  Luo, Y., Chen, Z., Hershey, J.R., Le Roux, J., Mesgarani, N., "Deep Clustering and Conventional Networks for Music Separation: Strong Together", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
    BibTeX TR2017-010 PDF
    • @inproceedings{Luo2017mar,
    • author = {Luo, Yi and Chen, Zhuo and Hershey, John R. and Le Roux, Jonathan and Mesgarani, Nima},
    • title = {Deep Clustering and Conventional Networks for Music Separation: Strong Together},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2017,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2017-010}
    • }
  •  Meng, Z., Watanabe, S., Hershey, J.R., Erdogan, H., "Deep Long Short-Term Memory Adaptive Beamforming Networks for Multichannel Robust Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
    BibTeX TR2017-012 PDF
    • @inproceedings{Meng2017mar,
    • author = {Meng, Zhong and Watanabe, Shinji and Hershey, John R. and Erdogan, Hakan},
    • title = {Deep Long Short-Term Memory Adaptive Beamforming Networks for Multichannel Robust Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2017,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2017-012}
    • }
  •  Watanabe, S., Hori, T., Le Roux, J., Hershey, J.R., "Student-Teacher Network Learning with Enhanced Features", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
    BibTeX TR2017-011 PDF
    • @inproceedings{Watanabe2017mar,
    • author = {Watanabe, Shinji and Hori, Takaaki and Le Roux, Jonathan and Hershey, John R.},
    • title = {Student-Teacher Network Learning with Enhanced Features},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2017,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2017-011}
    • }
  •  Xiao, X., Watanabe, S., Chng, E.S., Li, H., "Beamforming Networks Using Spatial Covariance Features for Far-field Speech Recognition", Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)<br /> , DOI: 10.1109/​APSIPA.2016.7820724, December 2016.
    BibTeX TR2016-162 PDF
    • @inproceedings{Xiao2016dec,
    • author = {Xiao, Xiong and Watanabe, Shinji and Chng, Eng Siong and Li, Haizhou},
    • title = {Beamforming Networks Using Spatial Covariance Features for Far-field Speech Recognition},
    • booktitle = {Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)
      },
    • year = 2016,
    • month = dec,
    • doi = {10.1109/APSIPA.2016.7820724},
    • url = {https://www.merl.com/publications/TR2016-162}
    • }
  •  Hori, T., Wang, H., Hori, C., Watanabe, S., Harsham, B.A., Le Roux, J., Hershey, J.R., Koji, Y., Jing, Y., Zhu, Z., Aikawa, T., "Dialog State Tracking with Attention-based Sequence-to-sequence Learning", IEEE Workshop on Spoken Language Technology (SLT), DOI: 10.1109/​SLT.2016.7846317, December 2016, pp. 552-558.
    BibTeX TR2016-163 PDF
    • @inproceedings{Hori2016dec,
    • author = {Hori, Takaaki and Wang, Hai and Hori, Chiori and Watanabe, Shinji and Harsham, Bret A. and Le Roux, Jonathan and Hershey, John R. and Koji, Yusuke and Jing, Yi and Zhu, Zhaocheng and Aikawa, Takeyuki},
    • title = {Dialog State Tracking with Attention-based Sequence-to-sequence Learning},
    • booktitle = {IEEE Workshop on Spoken Language Technology (SLT)},
    • year = 2016,
    • pages = {552--558},
    • month = dec,
    • doi = {10.1109/SLT.2016.7846317},
    • url = {https://www.merl.com/publications/TR2016-163}
    • }
  •  Takano, T., Moriya, T., Shinozaki, T., Watanabe, S., Hori, T., Duh, K., "Automated Structure Discovery and Parameter Tuning of Neural Network Language Model Based on Evolution Strategy", IEEE Spoken Language Technology Workshop (SLT), DOI: 10.1109/​SLT.2016.7846334, December 2016.
    BibTeX TR2016-173 PDF
    • @inproceedings{Takano2016dec,
    • author = {Takano, Tomihiro and Moriya, Takafumi and Shinozaki, Takahiro and Watanabe, Shinji and Hori, Takaaki and Duh, Kevin},
    • title = {Automated Structure Discovery and Parameter Tuning of Neural Network Language Model Based on Evolution Strategy},
    • booktitle = {IEEE Spoken Language Technology Workshop (SLT)},
    • year = 2016,
    • month = dec,
    • doi = {10.1109/SLT.2016.7846334},
    • url = {https://www.merl.com/publications/TR2016-173}
    • }
  •  Barker, J., Marxer, R., Vincent, E., Watanabe, S., "The Third 'CHIME' Speech Separation and Recognition Challenge: Analysis and Outcomes", Computer Speech & Language, DOI: 10.1016/​j.csl.2016.10.005, December 2016.
    BibTeX TR2016-171 PDF
    • @article{Barker2016dec,
    • author = {Barker, Jon and Marxer, Ricard and Vincent, Emmanuel and Watanabe, Shinji},
    • title = {The Third 'CHIME' Speech Separation and Recognition Challenge: Analysis and Outcomes},
    • journal = {Computer Speech \& Language},
    • year = 2016,
    • month = dec,
    • publisher = {Elsevier},
    • doi = {10.1016/j.csl.2016.10.005},
    • url = {https://www.merl.com/publications/TR2016-171}
    • }
  •  Wisdom, S., Powers, T., Hershey, J.R., Le Roux, J., Atlas, L., "Full-Capacity Unitary Recurrent Neural Networks", Advances in Neural Information Processing Systems (NIPS), December 2016.
    BibTeX TR2016-155 PDF
    • @inproceedings{Wisdom2016dec,
    • author = {Wisdom, Scott and Powers, Thomas and Hershey, John R. and Le Roux, Jonathan and Atlas, Les},
    • title = {Full-Capacity Unitary Recurrent Neural Networks},
    • booktitle = {Advances in Neural Information Processing Systems (NIPS)},
    • year = 2016,
    • month = dec,
    • url = {https://www.merl.com/publications/TR2016-155}
    • }
  •  Vincent, E., Watanabe, S., Nugraha, A.A., Barker, J., Marxer, R., "An analysis of environment, microphone and data simulation mismatches in robust speech recognition", Computer Speech & Language, DOI: 10.1016/​j.csl.2016.11.005, December 2016.
    BibTeX TR2016-172 PDF
    • @article{Vincent2016dec,
    • author = {Vincent, Emmanuel and Watanabe, Shinji and Nugraha, Aditya Arie and Barker, Jon and Marxer, Ricard},
    • title = {An analysis of environment, microphone and data simulation mismatches in robust speech recognition},
    • journal = {Computer Speech \& Language},
    • year = 2016,
    • month = dec,
    • publisher = {Elsevier},
    • doi = {10.1016/j.csl.2016.11.005},
    • url = {https://www.merl.com/publications/TR2016-172}
    • }
  •  Tawara, N., Ogawa, T., Watanabe, S., Kobayashi, T., "Nested Gibbs sampling for mixture-of-mixture model and its application to speaker clustering", APSIPA Transactions on Signal and Information Processing, DOI: 10.1017/​ATSIP.2016.15, Vol. 5, October 2016.
    BibTeX TR2016-138 PDF
    • @article{Tawara2016oct,
    • author = {Tawara, Naohiro and Ogawa, Tetsuji and Watanabe, Shinji and Kobayashi, Tetsunori},
    • title = {Nested Gibbs sampling for mixture-of-mixture model and its application to speaker clustering},
    • journal = {APSIPA Transactions on Signal and Information Processing},
    • year = 2016,
    • volume = 5,
    • month = oct,
    • doi = {10.1017/ATSIP.2016.15},
    • url = {https://www.merl.com/publications/TR2016-138}
    • }
  •  Delcroix, M., Watanabe, S., "Recent Advances in Distant Speech Recognition," Tech. Rep. TR2016-115, Interspeech Tutorials, September 2016.
    BibTeX TR2016-115 PDF
    • @techreport{Delcroix2016sep,
    • author = {Delcroix, Marc and Watanabe, Shinji},
    • title = {Recent Advances in Distant Speech Recognition},
    • booktitle = {Interspeech Tutorials},
    • institution = {Interspeech},
    • year = 2016,
    • month = sep,
    • url = {https://www.merl.com/publications/TR2016-115}
    • }