Publications

Watanabe, S., Hori, T., Kim, S., Hershey, J.R., Hayashi, T., "Hybrid CTC/Attention Architecture for End-to-End Speech Recognition", IEEE Journal of Selected Topics in Signal Processing, DOI: 10.1109/JSTSP.2017.2763455, Vol. 11, No. 8, pp. 1240-1253, October 2017.
BibTeX TR2017-190 PDF Video
- @article{Watanabe2017oct,
- author = {Watanabe, Shinji and Hori, Takaaki and Kim, Suyoun and Hershey, John R. and Hayashi, Tomoki},
- title = {{Hybrid CTC/Attention Architecture for End-to-End Speech Recognition}},
- journal = {IEEE Journal of Selected Topics in Signal Processing},
- year = 2017,
- volume = 11,
- number = 8,
- pages = {1240--1253},
- month = oct,
- doi = {10.1109/JSTSP.2017.2763455},
- issn = {1941-0484},
- url = {https://www.merl.com/publications/TR2017-190}
- }
Hori, T., Watanabe, S., Zhang, Y., Chan, W., "Advances in Joint CTC-Attention based End-to-End Speech Recognition with a Deep CNN Encoder and RNN-LM", Interspeech, August 2017.
BibTeX TR2017-132 PDF Video
- @inproceedings{Hori2017aug,
- author = {Hori, Takaaki and Watanabe, Shinji and Zhang, Yu and Chan, William},
- title = {{Advances in Joint CTC-Attention based End-to-End Speech Recognition with a Deep CNN Encoder and RNN-LM}},
- booktitle = {Interspeech},
- year = 2017,
- month = aug,
- url = {https://www.merl.com/publications/TR2017-132}
- }
Shinozaki, T., Watanabe, S., Mochihashi, D., Neubig, G., "Semi-Supervised Learning of a Pronunciation Dictionary from Disjoint Phonemic Transcripts and Text", Interspeech, August 2017.
BibTeX TR2017-133 PDF
- @inproceedings{Shinozaki2017aug,
- author = {Shinozaki, Takahiro and Watanabe, Shinji and Mochihashi, Daichi and Neubig, Graham},
- title = {{Semi-Supervised Learning of a Pronunciation Dictionary from Disjoint Phonemic Transcripts and Text}},
- booktitle = {Interspeech},
- year = 2017,
- month = aug,
- url = {https://www.merl.com/publications/TR2017-133}
- }
Tachioka, Y., Narita, T., Miura, I., Uramoto, T., Monta, N., Uenohara, S., Furuya, K., Watanabe, S., Le Roux, J., "Coupled initialization of multi-channel non-negative matrix factorization based on spatial and spectral information", Interspeech, August 2017.
BibTeX TR2017-134 PDF
- @inproceedings{Tachioka2017aug,
- author = {Tachioka, Yuuki and Narita, Tomohiro and Miura, Iori and Uramoto, Takanobu and Monta, Natsuki and Uenohara, Shingo and Furuya, Kenichi and Watanabe, Shinji and {Le Roux}, Jonathan},
- title = {{Coupled initialization of multi-channel non-negative matrix factorization based on spatial and spectral information}},
- booktitle = {Interspeech},
- year = 2017,
- month = aug,
- url = {https://www.merl.com/publications/TR2017-134}
- }
Hayashi, T., Watanabe, S., Toda, T., Hori, T., Le Roux, J., Takeda, K., "Duration-Controlled LSTM for Polyphonic Sound Event Detection", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/TASLP.2017.2740002, Vol. 25, No. 11, August 2017.
BibTeX TR2017-150 PDF
- @article{Hayashi2017aug,
- author = {Hayashi, Tomoki and Watanabe, Shinji and Toda, Tomoki and Hori, Takaaki and {Le Roux}, Jonathan and Takeda, Kazuya},
- title = {{Duration-Controlled LSTM for Polyphonic Sound Event Detection}},
- journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
- year = 2017,
- volume = 25,
- number = 11,
- month = aug,
- doi = {10.1109/TASLP.2017.2740002},
- issn = {2329-9304},
- url = {https://www.merl.com/publications/TR2017-150}
- }
Ochiai, T., Watanabe, S., Hori, T., Hershey, J.R., "Multichannel End-to-end Speech Recognition", International Conference on Machine Learning (ICML), August 2017.
BibTeX TR2017-107 PDF
- @inproceedings{Ochiai2017aug,
- author = {Ochiai, Tsubasa and Watanabe, Shinji and Hori, Takaaki and Hershey, John R.},
- title = {{Multichannel End-to-end Speech Recognition}},
- booktitle = {International Conference on Machine Learning (ICML)},
- year = 2017,
- month = aug,
- url = {https://www.merl.com/publications/TR2017-107}
- }
Hori, T., Watanabe, S., Hershey, J.R., "Joint CTC/attention decoding for end-to-end speech recognition", Association for Computational Linguistics (ACL), DOI: 10.18653/v1/P17-1048, July 2017, pp. 518-529.
BibTeX TR2017-103 PDF Video
- @inproceedings{Hori2017jul,
- author = {Hori, Takaaki and Watanabe, Shinji and Hershey, John R.},
- title = {{Joint CTC/attention decoding for end-to-end speech recognition}},
- booktitle = {Association for Computational Linguistics (ACL)},
- year = 2017,
- pages = {518--529},
- month = jul,
- doi = {10.18653/v1/P17-1048},
- url = {https://www.merl.com/publications/TR2017-103}
- }
Pajovic, M., Sahinoglu, Z., Wang, Y., Orlik, P.V., Wada, T., "Online Data-Driven Battery Voltage Prediction", IEEE International Conference on Industrial Informatics (INDIN), DOI: 10.1109/INDIN.2017.8104879, July 2017.
BibTeX TR2017-101 PDF
- @inproceedings{Pajovic2017jul,
- author = {Pajovic, Milutin and Sahinoglu, Zafer and Wang, Yebin and Orlik, Philip V. and Wada, Toshihiro},
- title = {{Online Data-Driven Battery Voltage Prediction}},
- booktitle = {IEEE International Conference on Industrial Informatics (INDIN)},
- year = 2017,
- month = jul,
- doi = {10.1109/INDIN.2017.8104879},
- url = {https://www.merl.com/publications/TR2017-101}
- }
Abbeloos, W., Caccamo, S., Ataer-Cansizoglu, E., Taguchi, Y., Feng, C., Lee, T.-Y., "Detecting and Grouping Identical Objects for Region Proposal and Classification", CVPR Workshop on Deep Learning for Robotic Vision, DOI: 10.1109/CVPRW.2017.76, July 2017.
BibTeX TR2017-099 PDF
- @inproceedings{Abbeloos2017jul,
- author = {Abbeloos, Wim and Caccamo, Sergio and Ataer-Cansizoglu, Esra and Taguchi, Yuichi and Feng, Chen and Lee, Teng-Yok},
- title = {{Detecting and Grouping Identical Objects for Region Proposal and Classification}},
- booktitle = {CVPR Workshop on Deep Learning for Robotic Vision},
- year = 2017,
- month = jul,
- doi = {10.1109/CVPRW.2017.76},
- url = {https://www.merl.com/publications/TR2017-099}
- }
Yu, Z., Feng, C., Liu, M.-Y., Ramalingam, S., "CASENet: Deep Category-Aware Semantic Edge Detection", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), DOI: 10.1109/CVPR.2017.191, July 2017.
BibTeX TR2017-100 PDF Video Data Software
- @inproceedings{Yu2017jul,
- author = {Yu, Zhiding and Feng, Chen and Liu, Ming-Yu and Ramalingam, Srikumar},
- title = {{CASENet: Deep Category-Aware Semantic Edge Detection}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2017,
- month = jul,
- doi = {10.1109/CVPR.2017.191},
- url = {https://www.merl.com/publications/TR2017-100}
- }
Feng, C., Liu, M.-Y., Kao, C.-C., Lee, T.-Y., "Deep Active Learning for Civil Infrastructure Defect Detection and Classification", International Workshop on Computing in Civil Engineering (IWCCE), June 2017.
BibTeX TR2017-034 PDF
- @inproceedings{Feng2017jun,
- author = {Feng, Chen and Liu, Ming-Yu and Kao, Chieh-Chi and Lee, Teng-Yok},
- title = {{Deep Active Learning for Civil Infrastructure Defect Detection and Classification}},
- booktitle = {International Workshop on Computing in Civil Engineering (IWCCE)},
- year = 2017,
- month = jun,
- url = {https://www.merl.com/publications/TR2017-034}
- }
Farahmand, A.-M., Benosman, M., "Towards Stability in Learning-based Control: A Bayesian Optimization-based Adaptive Controller", The Multi-disciplinary Conference on Reinforcement Learning and Decision Making, June 2017.
BibTeX TR2017-075 PDF
- @inproceedings{Farahmand2017jun,
- author = {Farahmand, Amir-massoud and Benosman, Mouhacine},
- title = {{Towards Stability in Learning-based Control: A Bayesian Optimization-based Adaptive Controller}},
- booktitle = {The Multi-disciplinary Conference on Reinforcement Learning and Decision Making},
- year = 2017,
- month = jun,
- url = {https://www.merl.com/publications/TR2017-075}
- }
Farahmand, A.-M., Nabi, S., Nikovski, D.N., "Deep Reinforcement Learning for Partial Differential Equation Control", American Control Conference (ACC), DOI: 10.23919/ACC.2017.7963427, May 2017.
BibTeX TR2017-063 PDF
- @inproceedings{Farahmand2017may,
- author = {Farahmand, Amir-massoud and Nabi, Saleh and Nikovski, Daniel N.},
- title = {{Deep Reinforcement Learning for Partial Differential Equation Control}},
- booktitle = {American Control Conference (ACC)},
- year = 2017,
- month = may,
- doi = {10.23919/ACC.2017.7963427},
- url = {https://www.merl.com/publications/TR2017-063}
- }
Farahmand, A.-M., Barreto, A.M.S., Nikovski, D.N., "Value-Aware Loss Function for Model-based Reinforcement Learning", Artificial Intelligence and Statistics (AISTATS), Vol. 54, April 2017.
BibTeX TR2017-049 PDF
- @article{Farahmand2017apr,
- author = {Farahmand, Amir-massoud and Barreto, Andre M.S. and Nikovski, Daniel N.},
- title = {{Value-Aware Loss Function for Model-based Reinforcement Learning}},
- journal = {Artificial Intelligence and Statistics (AISTATS)},
- year = 2017,
- volume = 54,
- month = apr,
- url = {https://www.merl.com/publications/TR2017-049}
- }
Watanabe, S., Hori, T., Hayashi, T., Kim, S., "End-to-end ASR without using morphological analyzer, pronunciation dictionary and language model", Acoustical Society of Japan Spring Meeting (ASJ), March 2017.
BibTeX TR2017-021 PDF
- @inproceedings{Watanabe2017mar2,
- author = {Watanabe, Shinji and Hori, Takaaki and Hayashi, Tomoki and Kim, Suyoun},
- title = {{End-to-end ASR without using morphological analyzer, pronunciation dictionary and language model}},
- booktitle = {Acoustical Society of Japan Spring Meeting (ASJ)},
- year = 2017,
- month = mar,
- url = {https://www.merl.com/publications/TR2017-021}
- }
Hayashi, T., Watanabe, S., Toda, T., Hori, T., Le Roux, J., Takeda, K., "BLSTM-HMM Hybrid System Combined with Sound Activity Detection Network for Polyphonic Sound Event Detection", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
BibTeX TR2017-014 PDF
- @inproceedings{Hayashi2017mar,
- author = {Hayashi, Tomoki and Watanabe, Shinji and Toda, Tomoki and Hori, Takaaki and {Le Roux}, Jonathan and Takeda, Kazuya},
- title = {{BLSTM-HMM Hybrid System Combined with Sound Activity Detection Network for Polyphonic Sound Event Detection}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2017,
- month = mar,
- url = {https://www.merl.com/publications/TR2017-014}
- }
Kim, S., Hori, T., Watanabe, S., "Joint CTC- Attention Based End-to-End Speech Recognition Using Multi-task Learning", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
BibTeX TR2017-016 PDF Video
- @inproceedings{Kim2017mar,
- author = {Kim, Suyoun and Hori, Takaaki and Watanabe, Shinji},
- title = {{Joint CTC- Attention Based End-to-End Speech Recognition Using Multi-task Learning}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2017,
- month = mar,
- url = {https://www.merl.com/publications/TR2017-016}
- }
Luo, Y., Chen, Z., Hershey, J.R., Le Roux, J., Mesgarani, N., "Deep Clustering and Conventional Networks for Music Separation: Strong Together", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
BibTeX TR2017-010 PDF
- @inproceedings{Luo2017mar,
- author = {Luo, Yi and Chen, Zhuo and Hershey, John R. and {Le Roux}, Jonathan and Mesgarani, Nima},
- title = {{Deep Clustering and Conventional Networks for Music Separation: Strong Together}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2017,
- month = mar,
- url = {https://www.merl.com/publications/TR2017-010}
- }
Meng, Z., Watanabe, S., Hershey, J.R., Erdogan, H., "Deep Long Short-Term Memory Adaptive Beamforming Networks for Multichannel Robust Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
BibTeX TR2017-012 PDF
- @inproceedings{Meng2017mar,
- author = {Meng, Zhong and Watanabe, Shinji and Hershey, John R. and Erdogan, Hakan},
- title = {{Deep Long Short-Term Memory Adaptive Beamforming Networks for Multichannel Robust Speech Recognition}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2017,
- month = mar,
- url = {https://www.merl.com/publications/TR2017-012}
- }
Watanabe, S., Hori, T., Le Roux, J., Hershey, J.R., "Student-Teacher Network Learning with Enhanced Features", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
BibTeX TR2017-011 PDF
- @inproceedings{Watanabe2017mar,
- author = {Watanabe, Shinji and Hori, Takaaki and {Le Roux}, Jonathan and Hershey, John R.},
- title = {{Student- Teacher Network Learning with Enhanced Features}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2017,
- month = mar,
- url = {https://www.merl.com/publications/TR2017-011}
- }
Hara, K., Liu, M.-Y., Tuzel, C.O., Farahmand, A.-M., "Attentional Network for Visual Object Detection", arXiv, January 2017.
BibTeX arXiv
- @article{Hara2017jan,
- author = {Hara, Kota and Liu, Ming-Yu and Tuzel, C. Oncel and Farahmand, Amir-massoud},
- title = {{Attentional Network for Visual Object Detection}},
- journal = {arXiv},
- year = 2017,
- month = jan,
- url = {https://arxiv.org/abs/1702.01478}
- }
Xiao, X., Watanabe, S., Chng, E.S., Li, H., "Beamforming Networks Using Spatial Covariance Features for Far-field Speech Recognition", Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)<br /> , DOI: 10.1109/APSIPA.2016.7820724, December 2016.
BibTeX TR2016-162 PDF
- @inproceedings{Xiao2016dec,
- author = {Xiao, Xiong and Watanabe, Shinji and Chng, Eng Siong and Li, Haizhou},
- title = {{Beamforming Networks Using Spatial Covariance Features for Far-field Speech Recognition}},
- booktitle = {Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)
  },
- year = 2016,
- month = dec,
- doi = {10.1109/APSIPA.2016.7820724},
- url = {https://www.merl.com/publications/TR2016-162}
- }
Hori, T., Wang, H., Hori, C., Watanabe, S., Harsham, B.A., Le Roux, J., Hershey, J.R., Koji, Y., Jing, Y., Zhu, Z., Aikawa, T., "Dialog State Tracking with Attention-based Sequence-to-sequence Learning", IEEE Workshop on Spoken Language Technology (SLT), DOI: 10.1109/SLT.2016.7846317, December 2016, pp. 552-558.
BibTeX TR2016-163 PDF
- @inproceedings{Hori2016dec,
- author = {Hori, Takaaki and Wang, Hai and Hori, Chiori and Watanabe, Shinji and Harsham, Bret A. and {Le Roux}, Jonathan and Hershey, John R. and Koji, Yusuke and Jing, Yi and Zhu, Zhaocheng and Aikawa, Takeyuki},
- title = {{Dialog State Tracking with Attention-based Sequence-to-sequence Learning}},
- booktitle = {IEEE Workshop on Spoken Language Technology (SLT)},
- year = 2016,
- pages = {552--558},
- month = dec,
- doi = {10.1109/SLT.2016.7846317},
- url = {https://www.merl.com/publications/TR2016-163}
- }
Farahmand, A.-M., Nabi, S., Grover, P., Nikovski, D.N., "Learning to Control Partial Differential Equations: Regularized Fitted Q-Iteration Approach", IEEE Conference on Decision and Control (CDC), DOI: 10.1109/CDC.2016.7798966, December 2016, pp. 4578-4585.
BibTeX TR2016-145 PDF
- @inproceedings{Farahmand2016dec,
- author = {Farahmand, Amir-massoud and Nabi, Saleh and Grover, Piyush and Nikovski, Daniel N.},
- title = {{Learning to Control Partial Differential Equations: Regularized Fitted Q-Iteration Approach}},
- booktitle = {IEEE Conference on Decision and Control (CDC)},
- year = 2016,
- pages = {4578--4585},
- month = dec,
- doi = {10.1109/CDC.2016.7798966},
- url = {https://www.merl.com/publications/TR2016-145}
- }
Takano, T., Moriya, T., Shinozaki, T., Watanabe, S., Hori, T., Duh, K., "Automated Structure Discovery and Parameter Tuning of Neural Network Language Model Based on Evolution Strategy", IEEE Spoken Language Technology Workshop (SLT), DOI: 10.1109/SLT.2016.7846334, December 2016.
BibTeX TR2016-173 PDF
- @inproceedings{Takano2016dec,
- author = {Takano, Tomihiro and Moriya, Takafumi and Shinozaki, Takahiro and Watanabe, Shinji and Hori, Takaaki and Duh, Kevin},
- title = {{Automated Structure Discovery and Parameter Tuning of Neural Network Language Model Based on Evolution Strategy}},
- booktitle = {IEEE Spoken Language Technology Workshop (SLT)},
- year = 2016,
- month = dec,
- doi = {10.1109/SLT.2016.7846334},
- url = {https://www.merl.com/publications/TR2016-173}
- }