Publications

Watanabe, S., Hori, T., Kim, S., Hershey, J.R., Hayashi, T., "Hybrid CTC/Attention Architecture for End-to-End Speech Recognition", IEEE Journal of Selected Topics in Signal Processing, DOI: 10.1109/JSTSP.2017.2763455, Vol. 11, No. 8, pp. 1240-1253, October 2017.
BibTeX TR2017-190 PDF Video
- @article{Watanabe2017oct,
- author = {Watanabe, Shinji and Hori, Takaaki and Kim, Suyoun and Hershey, John R. and Hayashi, Tomoki},
- title = {{Hybrid CTC/Attention Architecture for End-to-End Speech Recognition}},
- journal = {IEEE Journal of Selected Topics in Signal Processing},
- year = 2017,
- volume = 11,
- number = 8,
- pages = {1240--1253},
- month = oct,
- doi = {10.1109/JSTSP.2017.2763455},
- issn = {1941-0484},
- url = {https://www.merl.com/publications/TR2017-190}
- }
Lee, T.-Y., Patil, S., Ramalingam, S., Taguchi, Y., Benes, B., "Barcode: Global Binary Patterns for Fast Visual Inference", International Conference on 3D Vision, October 2017.
BibTeX TR2017-145 PDF
- @inproceedings{Patil2017oct,
- author = {Lee, Teng-Yok and Patil, Sonali and Ramalingam, Srikumar and Taguchi, Yuichi and Benes, Bedrich},
- title = {{Barcode: Global Binary Patterns for Fast Visual Inference}},
- booktitle = {International Conference on 3D Vision},
- year = 2017,
- month = oct,
- url = {https://www.merl.com/publications/TR2017-145}
- }
Hori, T., Watanabe, S., Zhang, Y., Chan, W., "Advances in Joint CTC-Attention based End-to-End Speech Recognition with a Deep CNN Encoder and RNN-LM", Interspeech, August 2017.
BibTeX TR2017-132 PDF Video
- @inproceedings{Hori2017aug,
- author = {Hori, Takaaki and Watanabe, Shinji and Zhang, Yu and Chan, William},
- title = {{Advances in Joint CTC-Attention based End-to-End Speech Recognition with a Deep CNN Encoder and RNN-LM}},
- booktitle = {Interspeech},
- year = 2017,
- month = aug,
- url = {https://www.merl.com/publications/TR2017-132}
- }
Feng, C., Liu, M.-Y., Kao, C.-C., Lee, T.-Y., "Deep Active Learning for Civil Infrastructure Defect Detection and Classification", International Workshop on Computing in Civil Engineering (IWCCE), June 2017.
BibTeX TR2017-034 PDF
- @inproceedings{Feng2017jun,
- author = {Feng, Chen and Liu, Ming-Yu and Kao, Chieh-Chi and Lee, Teng-Yok},
- title = {{Deep Active Learning for Civil Infrastructure Defect Detection and Classification}},
- booktitle = {International Workshop on Computing in Civil Engineering (IWCCE)},
- year = 2017,
- month = jun,
- url = {https://www.merl.com/publications/TR2017-034}
- }
Luo, Y., Chen, Z., Hershey, J.R., Le Roux, J., Mesgarani, N., "Deep Clustering and Conventional Networks for Music Separation: Strong Together", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
BibTeX TR2017-010 PDF
- @inproceedings{Luo2017mar,
- author = {Luo, Yi and Chen, Zhuo and Hershey, John R. and {Le Roux}, Jonathan and Mesgarani, Nima},
- title = {{Deep Clustering and Conventional Networks for Music Separation: Strong Together}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2017,
- month = mar,
- url = {https://www.merl.com/publications/TR2017-010}
- }
Meng, Z., Watanabe, S., Hershey, J.R., Erdogan, H., "Deep Long Short-Term Memory Adaptive Beamforming Networks for Multichannel Robust Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
BibTeX TR2017-012 PDF
- @inproceedings{Meng2017mar,
- author = {Meng, Zhong and Watanabe, Shinji and Hershey, John R. and Erdogan, Hakan},
- title = {{Deep Long Short-Term Memory Adaptive Beamforming Networks for Multichannel Robust Speech Recognition}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2017,
- month = mar,
- url = {https://www.merl.com/publications/TR2017-012}
- }
Hara, K., Liu, M.-Y., Tuzel, C.O., Farahmand, A.-M., "Attentional Network for Visual Object Detection", arXiv, January 2017.
BibTeX arXiv
- @article{Hara2017jan,
- author = {Hara, Kota and Liu, Ming-Yu and Tuzel, C. Oncel and Farahmand, Amir-massoud},
- title = {{Attentional Network for Visual Object Detection}},
- journal = {arXiv},
- year = 2017,
- month = jan,
- url = {https://arxiv.org/abs/1702.01478}
- }
Xiao, X., Watanabe, S., Chng, E.S., Li, H., "Beamforming Networks Using Spatial Covariance Features for Far-field Speech Recognition", Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)<br /> , DOI: 10.1109/APSIPA.2016.7820724, December 2016.
BibTeX TR2016-162 PDF
- @inproceedings{Xiao2016dec,
- author = {Xiao, Xiong and Watanabe, Shinji and Chng, Eng Siong and Li, Haizhou},
- title = {{Beamforming Networks Using Spatial Covariance Features for Far-field Speech Recognition}},
- booktitle = {Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)
  },
- year = 2016,
- month = dec,
- doi = {10.1109/APSIPA.2016.7820724},
- url = {https://www.merl.com/publications/TR2016-162}
- }
Matsumoto, W., Hagiwara, M., Boufounos, P.T., Fukushima, K., Mariyama, T., Xiongxin, Z., "A Deep Neural Network Architecture Using Dimensionality Reduction with Sparse Matrices", International Conference on Neural Information Processing (ICONIP), DOI: 10.1007/978-3-319-46681-1_48, October 2016, vol. 9950, pp. 397-404.
BibTeX TR2016-134 PDF
- @inproceedings{Matsumoto2016oct,
- author = {Matsumoto, Wataru and Hagiwara, Manabu and Boufounos, Petros T. and Fukushima, Kunihiko and Mariyama, Toshisada and Xiongxin, Zhao},
- title = {{A Deep Neural Network Architecture Using Dimensionality Reduction with Sparse Matrices }},
- booktitle = {International Conference on Neural Information Processing (ICONIP)},
- year = 2016,
- volume = 9950,
- pages = {397--404},
- month = oct,
- doi = {10.1007/978-3-319-46681-1_48},
- issn = {0302-9743},
- isbn = {978-3-319-46681-1},
- url = {https://www.merl.com/publications/TR2016-134}
- }
Delcroix, M., Watanabe, S., "Recent Advances in Distant Speech Recognition," Tech. Rep. TR2016-115, Interspeech Tutorials, September 2016.
BibTeX TR2016-115 PDF
- @techreport{Delcroix2016sep,
- author = {Delcroix, Marc and Watanabe, Shinji},
- title = {{Recent Advances in Distant Speech Recognition}},
- booktitle = {Interspeech Tutorials},
- institution = {Interspeech},
- year = 2016,
- month = sep,
- url = {https://www.merl.com/publications/TR2016-115}
- }
Erdogan, H., Hershey, J.R., Watanabe, S., Mandel, M., Le Roux, J., "Improved MVDR beamforming using single-channel mask prediction networks", Interspeech, DOI: 10.21437/Interspeech.2016-552, September 2016, pp. 1981-1985.
BibTeX TR2016-072 PDF
- @inproceedings{Erdogan2016sep,
- author = {Erdogan, Hakan and Hershey, John R. and Watanabe, Shinji and Mandel, Michael and {Le Roux}, Jonathan},
- title = {{Improved MVDR beamforming using single-channel mask prediction networks}},
- booktitle = {Interspeech},
- year = 2016,
- pages = {1981--1985},
- month = sep,
- doi = {10.21437/Interspeech.2016-552},
- url = {https://www.merl.com/publications/TR2016-072}
- }
Kamilov, U., Mansour, H., "Learning MMSE Optimal Thresholds for FISTA", International Traveling Workshop on Interactions Between Sparse Models and Technology (iTWIST), August 2016.
BibTeX TR2016-111 PDF
- @inproceedings{Kamilov2016aug,
- author = {Kamilov, Ulugbek and Mansour, Hassan},
- title = {{Learning MMSE Optimal Thresholds for FISTA}},
- booktitle = {International Traveling Workshop on Interactions Between Sparse Models and Technology (iTWIST)},
- year = 2016,
- month = aug,
- url = {https://www.merl.com/publications/TR2016-111}
- }
Vemulapalli, R., Tuzel, C.O., Liu, M.-Y., "Deep Gaussian Conditional Random Field Network: A Model-based Deep Network for Discriminative Denoising", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), DOI: 10.1109/CVPR.2016.351, June 2016, pp. 4801-4809.
BibTeX TR2016-079 PDF
- @inproceedings{Vemulapalli2016jun2,
- author = {Vemulapalli, Raviteja and Tuzel, C. Oncel and Liu, Ming-Yu},
- title = {{Deep Gaussian Conditional Random Field Network: A Model-based Deep Network for Discriminative Denoising}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2016,
- pages = {4801--4809},
- month = jun,
- doi = {10.1109/CVPR.2016.351},
- url = {https://www.merl.com/publications/TR2016-079}
- }
Vemulapalli, R., Tuzel, C.O., Liu, M.-Y., Chellappa, R., "Gaussian Conditional Random Field Network for Semantic Segmentation", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2016, pp. 3224-3233.
BibTeX TR2016-078 PDF
- @inproceedings{Vemulapalli2016jun,
- author = {Vemulapalli, Raviteja and Tuzel, C. Oncel and Liu, Ming-Yu and Chellappa, Rama},
- title = {{Gaussian Conditional Random Field Network for Semantic Segmentation}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2016,
- pages = {3224--3233},
- month = jun,
- url = {https://www.merl.com/publications/TR2016-078}
- }
Hershey, J.R., Chen, Z., Le Roux, J., Watanabe, S., "Deep Clustering: Discriminative Embeddings for Segmentation and Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2016.7471631, March 2016, pp. 31-35.
BibTeX TR2016-003 PDF
- @inproceedings{Hershey2016mar,
- author = {Hershey, John R. and Chen, Zhuo and {Le Roux}, Jonathan and Watanabe, Shinji},
- title = {{Deep Clustering: Discriminative Embeddings for Segmentation and Separation}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2016,
- pages = {31--35},
- month = mar,
- doi = {10.1109/ICASSP.2016.7471631},
- url = {https://www.merl.com/publications/TR2016-003}
- }
Wisdom, S., Hershey, J.R., Le Roux, J., Watanabe, S., "Deep Unfolding for Multichannel Source Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2016.7471649, March 2016, pp. 121-125.
BibTeX TR2016-008 PDF
- @inproceedings{Wisdom2016mar,
- author = {Wisdom, Scott and Hershey, John R. and {Le Roux}, Jonathan and Watanabe, Shinji},
- title = {{Deep Unfolding for Multichannel Source Separation}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2016,
- pages = {121--125},
- month = mar,
- doi = {10.1109/ICASSP.2016.7471649},
- url = {https://www.merl.com/publications/TR2016-008}
- }
Xiao, X., Watanabe, S., Erdogan, H., Lu, L., Hershey, J., Seltzer, M., Chen, G., Zhang, Y., Mandel, M., Yu, D., "Deep Beamforming Networks for Multi-Channel Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2016.7472778, March 2016, pp. 5745-5749.
BibTeX TR2016-002 PDF
- @inproceedings{Xiao2016mar,
- author = {Xiao, Xiong and Watanabe, Shinji and Erdogan, Hakan and Lu, Liang and Hershey, John and Seltzer, Mike and Chen, Guoguo and Zhang, Yu and Mandel, Michael and Yu, Dong},
- title = {{Deep Beamforming Networks for Multi-Channel Speech Recognition}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2016,
- pages = {5745--5749},
- month = mar,
- doi = {10.1109/ICASSP.2016.7472778},
- url = {https://www.merl.com/publications/TR2016-002}
- }
Liu, M.-Y., Mallya, A., Tuzel, C.O., Chen, X., "Unsupervised Network Pretraining via Encoding Human Design", IEEE Winter Conference on Applications of Computer Vision (WACV), DOI: 10.1109/WACV.2016.7477698, March 2016, pp. 1-9.
BibTeX TR2016-022 PDF
- @inproceedings{Liu2016mar,
- author = {Liu, Ming-Yu and Mallya, Arun and Tuzel, C. Oncel and Chen, Xi},
- title = {{Unsupervised Network Pretraining via Encoding Human Design}},
- booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
- year = 2016,
- pages = {1--9},
- month = mar,
- doi = {10.1109/WACV.2016.7477698},
- url = {https://www.merl.com/publications/TR2016-022}
- }
Kanagawa, H., Tachioka, Y., Watanabe, S., Ishii, J., "Feature-Space Structural MAPLR with Regression Tree-Based Multiple Transformation Matrices for DNN", Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC), DOI: 10.1109/APSIPA.2015.7415425, December 2015, pp. 86-92.
BibTeX TR2015-150 PDF
- @inproceedings{Kanagawa2015dec,
- author = {Kanagawa, H. and Tachioka, Y. and Watanabe, S. and Ishii, J.},
- title = {{Feature-Space Structural MAPLR with Regression Tree-Based Multiple Transformation Matrices for DNN}},
- booktitle = {Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)},
- year = 2015,
- pages = {86--92},
- month = dec,
- doi = {10.1109/APSIPA.2015.7415425},
- url = {https://www.merl.com/publications/TR2015-150}
- }
Hori, T., Chen, Z., Erdogan, H., Hershey, J.R., Le Roux, J., Mitra, V., Watanabe, S., "The MERL/SRI System for the 3rd CHiME Challenge Using Beamforming, Robust Feature Extraction, and Advanced Speech Recognition", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/ASRU.2015.7404833, December 2015, pp. 475-481.
BibTeX TR2015-135 PDF
- @inproceedings{Hori2015dec2,
- author = {Hori, T. and Chen, Z. and Erdogan, H. and Hershey, J.R. and {Le Roux}, J. and Mitra, V. and Watanabe, S.},
- title = {{The MERL/SRI System for the 3rd CHiME Challenge Using Beamforming, Robust Feature Extraction, and Advanced Speech Recognition}},
- booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
- year = 2015,
- pages = {475--481},
- month = dec,
- publisher = {IEEE},
- doi = {10.1109/ASRU.2015.7404833},
- url = {https://www.merl.com/publications/TR2015-135}
- }
Abdelaziz, A.H., Watanabe, S., Hershey, J.R., Vincent, E., Kolossa, D., "Uncertainty Propagation Through Deep Neural Networks", Interspeech, September 2015, vol. 1 or 5, pp. 3561.
BibTeX TR2015-098 PDF
- @inproceedings{Abdelaziz2015sep,
- author = {Abdelaziz, A.H. and Watanabe, S. and Hershey, J.R. and Vincent, E. and Kolossa, D.},
- title = {{Uncertainty Propagation Through Deep Neural Networks}},
- booktitle = {Interspeech},
- year = 2015,
- volume = {1 or 5},
- pages = 3561,
- month = sep,
- isbn = {978-1-5108-1790-6},
- url = {https://www.merl.com/publications/TR2015-098}
- }
Tachioka, Y., Watanabe, S., "Uncertainty Training and Decoding Methods of Deep Neural Networks Based on Stochastic Representation of Enhanced Features", Interspeech, September 2015, vol. 1 or 5, pp. 3541.
BibTeX TR2015-099 PDF
- @inproceedings{Tachioka2015sep,
- author = {Tachioka, Y. and Watanabe, S.},
- title = {{Uncertainty Training and Decoding Methods of Deep Neural Networks Based on Stochastic Representation of Enhanced Features}},
- booktitle = {Interspeech},
- year = 2015,
- volume = {1 or 5},
- pages = 3541,
- month = sep,
- isbn = {978-1-5108-1790-6},
- url = {https://www.merl.com/publications/TR2015-099}
- }
Liu, M.-., Lin, A., Ramalingam, S., Tuzel, C.O., "Layered Interpretation of Street View Images", Robotics: Science and Systems Conference (RSS), DOI: 10.15607/RSS.2015.XI.025, July 2015.
BibTeX TR2015-073 PDF
- @inproceedings{Liu2015jul,
- author = {{Liu, M.-Y. and Lin, A. and Ramalingam, S. and Tuzel, C.O.}},
- title = {{Layered Interpretation of Street View Images}},
- booktitle = {Robotics: Science and Systems Conference (RSS)},
- year = 2015,
- month = jul,
- doi = {10.15607/RSS.2015.XI.025},
- url = {https://www.merl.com/publications/TR2015-073}
- }
Tachioka, Y., Narita, T., Watanabe, S., "Effectiveness of Dereverberation, Feature Transformation, Discriminative Training Methods, and System Combination Approach for Various Reverberant Environments", EURASIP Journal on Advances in Signal Processing, DOI: 10.1186/s13634-015-0241-y, June 2015.
BibTeX TR2015-152 PDF
- @article{Tachioka2015jun,
- author = {Tachioka, Y. and Narita, T. and Watanabe, S.},
- title = {{Effectiveness of Dereverberation, Feature Transformation, Discriminative Training Methods, and System Combination Approach for Various Reverberant Environments}},
- journal = {EURASIP Journal on Advances in Signal Processing},
- year = 2015,
- month = jun,
- doi = {10.1186/s13634-015-0241-y},
- url = {https://www.merl.com/publications/TR2015-152}
- }
Ahmed, E., Jones, M.J., Marks, T.K., "An Improved Deep Learning Architecture for Person Re-Identification", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), DOI: 10.1109/CVPR.2015.7299016, June 2015, pp. 3908-3916.
BibTeX TR2015-076 PDF
- @inproceedings{Jones2015jun,
- author = {Ahmed, E. and Jones, M.J. and Marks, T.K.},
- title = {{An Improved Deep Learning Architecture for Person Re-Identification}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2015,
- pages = {3908--3916},
- month = jun,
- doi = {10.1109/CVPR.2015.7299016},
- url = {https://www.merl.com/publications/TR2015-076}
- }