Publications

Karafiat, M., Baskar, M.K., Watanabe, S., Hori, T., Wiesner, M., Cernocky, J.H., "Analysis of Multilingual Sequence-to-Sequence Speech Recognition Systems", Interspeech, DOI: 10.21437/Interspeech.2019-2355//, September 2019, pp. 2019-2355.
BibTeX TR2019-103 PDF
- @inproceedings{Karafiat2019sep,
- author = {Karafiat, Martin and Baskar, Murali Karthick and Watanabe, Shinji and Hori, Takaaki and Wiesner, Matthew and Cernocky, Jan, Honza},
- title = {{Analysis of Multilingual Sequence-to-Sequence Speech Recognition Systems}},
- booktitle = {Interspeech},
- year = 2019,
- pages = {2019--2355},
- month = sep,
- doi = {10.21437/Interspeech.2019-2355//},
- url = {https://www.merl.com/publications/TR2019-103}
- }
Moritz, N., Hori, T., Le Roux, J., "Unidirectional Neural Network Architectures for End-to-End Automatic Speech Recognition", Interspeech, DOI: 10.21437/Interspeech.2019-2837, September 2019, pp. 76-80.
BibTeX TR2019-098 PDF
- @inproceedings{Moritz2019sep,
- author = {Moritz, Niko and Hori, Takaaki and {Le Roux}, Jonathan},
- title = {{Unidirectional Neural Network Architectures for End-to-End Automatic Speech Recognition}},
- booktitle = {Interspeech},
- year = 2019,
- pages = {76--80},
- month = sep,
- doi = {10.21437/Interspeech.2019-2837},
- url = {https://www.merl.com/publications/TR2019-098}
- }
Ochiai, T., Watanabe, S., Katagiri, S., Hori, T., Hershey, J.R., "Speaker Adaptation for Multichannel End-to-End Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2018.8462161, April 2018, pp. 6707-6711.
BibTeX TR2018-006 PDF
- @inproceedings{Ochiai2018apr,
- author = {Ochiai, Tsubasa and Watanabe, Shinji and Katagiri, Shigeru and Hori, Takaaki and Hershey, John R.},
- title = {{Speaker Adaptation for Multichannel End-to-End Speech Recognition}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2018,
- pages = {6707--6711},
- month = apr,
- doi = {10.1109/ICASSP.2018.8462161},
- url = {https://www.merl.com/publications/TR2018-006}
- }
Ochiai, T., Watanabe, S., Hori, T., Hershey, J.R., Xiao, X., "Unified Architecture for Multichannel End-to-End Speech Recognition with Neural Beamforming", IEEE Journal of Selected Topics in Signal Processing, DOI: 10.1109/JSTSP.2017.2764276, Vol. 11, No. 8, pp. 1274-1288, October 2017.
BibTeX TR2017-192 PDF
- @article{Ochiai2017oct2,
- author = {Ochiai, Tsubasa and Watanabe, Shinji and Hori, Takaaki and Hershey, John R. and Xiao, Xiong},
- title = {{Unified Architecture for Multichannel End-to-End Speech Recognition with Neural Beamforming}},
- journal = {IEEE Journal of Selected Topics in Signal Processing},
- year = 2017,
- volume = 11,
- number = 8,
- pages = {1274--1288},
- month = oct,
- doi = {10.1109/JSTSP.2017.2764276},
- issn = {1941-0484},
- url = {https://www.merl.com/publications/TR2017-192}
- }
Watanabe, S., Hori, T., Kim, S., Hershey, J.R., Hayashi, T., "Hybrid CTC/Attention Architecture for End-to-End Speech Recognition", IEEE Journal of Selected Topics in Signal Processing, DOI: 10.1109/JSTSP.2017.2763455, Vol. 11, No. 8, pp. 1240-1253, October 2017.
BibTeX TR2017-190 PDF Video
- @article{Watanabe2017oct,
- author = {Watanabe, Shinji and Hori, Takaaki and Kim, Suyoun and Hershey, John R. and Hayashi, Tomoki},
- title = {{Hybrid CTC/Attention Architecture for End-to-End Speech Recognition}},
- journal = {IEEE Journal of Selected Topics in Signal Processing},
- year = 2017,
- volume = 11,
- number = 8,
- pages = {1240--1253},
- month = oct,
- doi = {10.1109/JSTSP.2017.2763455},
- issn = {1941-0484},
- url = {https://www.merl.com/publications/TR2017-190}
- }
Hayashi, T., Watanabe, S., Toda, T., Hori, T., Le Roux, J., Takeda, K., "Duration-Controlled LSTM for Polyphonic Sound Event Detection", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/TASLP.2017.2740002, Vol. 25, No. 11, August 2017.
BibTeX TR2017-150 PDF
- @article{Hayashi2017aug,
- author = {Hayashi, Tomoki and Watanabe, Shinji and Toda, Tomoki and Hori, Takaaki and {Le Roux}, Jonathan and Takeda, Kazuya},
- title = {{Duration-Controlled LSTM for Polyphonic Sound Event Detection}},
- journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
- year = 2017,
- volume = 25,
- number = 11,
- month = aug,
- doi = {10.1109/TASLP.2017.2740002},
- issn = {2329-9304},
- url = {https://www.merl.com/publications/TR2017-150}
- }
Ochiai, T., Watanabe, S., Hori, T., Hershey, J.R., "Multichannel End-to-end Speech Recognition", International Conference on Machine Learning (ICML), August 2017.
BibTeX TR2017-107 PDF
- @inproceedings{Ochiai2017aug,
- author = {Ochiai, Tsubasa and Watanabe, Shinji and Hori, Takaaki and Hershey, John R.},
- title = {{Multichannel End-to-end Speech Recognition}},
- booktitle = {International Conference on Machine Learning (ICML)},
- year = 2017,
- month = aug,
- url = {https://www.merl.com/publications/TR2017-107}
- }
Hayashi, T., Watanabe, S., Toda, T., Hori, T., Le Roux, J., Takeda, K., "BLSTM-HMM Hybrid System Combined with Sound Activity Detection Network for Polyphonic Sound Event Detection", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
BibTeX TR2017-014 PDF
- @inproceedings{Hayashi2017mar,
- author = {Hayashi, Tomoki and Watanabe, Shinji and Toda, Tomoki and Hori, Takaaki and {Le Roux}, Jonathan and Takeda, Kazuya},
- title = {{BLSTM-HMM Hybrid System Combined with Sound Activity Detection Network for Polyphonic Sound Event Detection}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2017,
- month = mar,
- url = {https://www.merl.com/publications/TR2017-014}
- }
Hayashi, T., Watanabe, S., Toda, T., Hori, T., Le Roux, J., Takeda, K., "Bidirectional LSTM-HMM Hybrid System for Polyphonic Sound Event Detection", Workshop on Detection and Classification of Acoustic Scenes and Events (DCASE), September 2016, pp. 35-39.
BibTeX TR2016-114 PDF
- @inproceedings{Hayashi2016sep,
- author = {Hayashi, Tomoki and Watanabe, Shinji and Toda, Tomoki and Hori, Takaaki and {Le Roux}, Jonathan and Takeda, Kazuya},
- title = {{Bidirectional LSTM-HMM Hybrid System for Polyphonic Sound Event Detection}},
- booktitle = {Workshop on Detection and Classification of Acoustic Scenes and Events (DCASE)},
- year = 2016,
- pages = {35--39},
- month = sep,
- url = {https://www.merl.com/publications/TR2016-114}
- }
Watanabe, S., Nakamura, A., Juang, B.-H., "Structural Bayesian Linear Regression for Hidden Markov Models", Journal of Signal Processing Systems, DOI: 10.1007/s11265-013-0785-8, Vol. 74, No. 3, pp. 341-358, March 2014.
BibTeX TR2013-071 PDF
- @article{Watanabe2013aug,
- author = {Watanabe, S. and Nakamura, A. and Juang, B.-H.},
- title = {{Structural Bayesian Linear Regression for Hidden Markov Models}},
- journal = {Journal of Signal Processing Systems},
- year = 2014,
- volume = 74,
- number = 3,
- pages = {341--358},
- month = mar,
- doi = {10.1007/s11265-013-0785-8},
- issn = {1939-8018},
- url = {https://www.merl.com/publications/TR2013-071}
- }
Gales, M., Watanabe, S., Fosler-Lussier, E., "Structured Discriminative Models For Speech Recognition", IEEE Signal Processing Magazine, Vol. 29, No. 6, pp. 70-81, November 2012.
BibTeX TR2012-072 PDF
- @article{Gales2012nov,
- author = {Gales, M. and Watanabe, S. and Fosler-Lussier, E.},
- title = {{Structured Discriminative Models For Speech Recognition}},
- journal = {IEEE Signal Processing Magazine},
- year = 2012,
- volume = 29,
- number = 6,
- pages = {70--81},
- month = nov,
- url = {https://www.merl.com/publications/TR2012-072}
- }
Polatkan, G., Tuzel, O., "Compressed Inference for Probabilistic Sequential Models", Conference on Uncertainty in Artificial Intelligence (UAI), July 2011.
BibTeX TR2011-051 PDF
- @inproceedings{Polatkan2011jul,
- author = {Polatkan, G. and Tuzel, O.},
- title = {{Compressed Inference for Probabilistic Sequential Models}},
- booktitle = {Conference on Uncertainty in Artificial Intelligence (UAI)},
- year = 2011,
- month = jul,
- url = {https://www.merl.com/publications/TR2011-051}
- }
Pathak, M., Rane, S., Sun, W., Raj, B., "Privacy Preserving Probabilistic Inference with Hidden Markov Models", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2011.5947696, May 2011, pp. 5868-5871.
BibTeX TR2011-017 PDF
- @inproceedings{Pathak2011may,
- author = {Pathak, M. and Rane, S. and Sun, W. and Raj, B.},
- title = {{Privacy Preserving Probabilistic Inference with Hidden Markov Models}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2011,
- pages = {5868--5871},
- month = may,
- doi = {10.1109/ICASSP.2011.5947696},
- url = {https://www.merl.com/publications/TR2011-017}
- }
Mei, X., Porikli, F., "Joint Tracking and Video Registration by Factorial Hidden Markov Models", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2008, pp. 973-976.
BibTeX TR2008-010 PDF
- @inproceedings{Mei2008mar,
- author = {Mei, X. and Porikli, F.},
- title = {{Joint Tracking and Video Registration by Factorial Hidden Markov Models}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2008,
- pages = {973--976},
- month = mar,
- issn = {1520-6149},
- url = {https://www.merl.com/publications/TR2008-010}
- }
Wren, C., Ivanov, Y., Kaur, I., Leigh, D., Westhues, J., "SocialMotion: Measuring the Hidden Social Life of a Building", International Symposium on Location- and Context-Awareness (LoCA), September 2007, vol. 4718, pp. 85-102.
BibTeX TR2007-034 PDF
- @inproceedings{Wren2007sep,
- author = {Wren, C. and Ivanov, Y. and Kaur, I. and Leigh, D. and Westhues, J.},
- title = {{SocialMotion: Measuring the Hidden Social Life of a Building}},
- booktitle = {International Symposium on Location- and Context-Awareness (LoCA)},
- year = 2007,
- volume = 4718,
- series = {Lecture Notes in Computer Science},
- pages = {85--102},
- month = sep,
- isbn = {978-3-540-75159-5},
- url = {https://www.merl.com/publications/TR2007-034}
- }
Smaragdis, P., Boufounos, P., "Learning Source Trajectories Using Wrapped-Phase Hidden Markov Models", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), October 2005, pp. 114-117.
BibTeX TR2005-138 PDF
- @inproceedings{Smaragdis2005oct,
- author = {Smaragdis, P. and Boufounos, P.},
- title = {{Learning Source Trajectories Using Wrapped-Phase Hidden Markov Models}},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2005,
- pages = {114--117},
- month = oct,
- url = {https://www.merl.com/publications/TR2005-138}
- }
Xie, L., Kennedy, L., Chang, S.-F., Divakaran, A., Sun, H., Lin, C.-Y., "Layered Dynamic Mixture Model for Pattern Discovery in Asynchronous Multi-Modal Streams", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2005, vol. 2, pp. 1053-1056.
BibTeX TR2005-078 PDF
- @inproceedings{Xie2005mar,
- author = {Xie, L. and Kennedy, L. and Chang, S.-F. and Divakaran, A. and Sun, H. and Lin, C.-Y.},
- title = {{Layered Dynamic Mixture Model for Pattern Discovery in Asynchronous Multi-Modal Streams}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2005,
- volume = 2,
- pages = {1053--1056},
- month = mar,
- issn = {1520-6149},
- url = {https://www.merl.com/publications/TR2005-078}
- }
Li, X., Porikli, F.M., "A Hidden Markov Model Framework for Traffic Event Detection Using Video Features", IEEE International Conference on Image Processing (ICIP), October 2004, vol. 5, pp. 2901-2904.
BibTeX TR2004-130 PDF
- @inproceedings{Li2004oct,
- author = {Li, X. and Porikli, F.M.},
- title = {{A Hidden Markov Model Framework for Traffic Event Detection Using Video Features}},
- booktitle = {IEEE International Conference on Image Processing (ICIP)},
- year = 2004,
- volume = 5,
- pages = {2901--2904},
- month = oct,
- issn = {1522-4880},
- url = {https://www.merl.com/publications/TR2004-130}
- }
Xie, L., Kennedy, L., Chang, S.-F., Divakaran, A., Sun, H., Lin, C.-Y., "Discovering Meaningful Multimedia Patterns with Audio-Visual Concepts and Associated Text", IEEE International Conference on Image Processing (ICIP), October 2004, vol. 4, pp. 2383-2386.
BibTeX TR2004-128 PDF
- @inproceedings{Xie2004oct,
- author = {Xie, L. and Kennedy, L. and Chang, S.-F. and Divakaran, A. and Sun, H. and Lin, C.-Y.},
- title = {{Discovering Meaningful Multimedia Patterns with Audio-Visual Concepts and Associated Text}},
- booktitle = {IEEE International Conference on Image Processing (ICIP)},
- year = 2004,
- volume = 4,
- pages = {2383--2386},
- month = oct,
- issn = {1522-4880},
- url = {https://www.merl.com/publications/TR2004-128}
- }
Radhakrishnan, R., Xiong, Z., Divakaran, A., Memon, N., "Time Series Analysis and Segmentation Using Eigenvectors for Mining Semantic Audio Label Sequences", IEEE International Conference on Multimedia and Expo (ICME), June 2004.
BibTeX TR2004-063 PDF
- @inproceedings{Radhakrishnan2004jun,
- author = {Radhakrishnan, R. and Xiong, Z. and Divakaran, A. and Memon, N.},
- title = {{Time Series Analysis and Segmentation Using Eigenvectors for Mining Semantic Audio Label Sequences}},
- booktitle = {IEEE International Conference on Multimedia and Expo (ICME)},
- year = 2004,
- month = jun,
- url = {https://www.merl.com/publications/TR2004-063}
- }
Porikli, F.M., Li, X., "Traffic Congestion Estimation Using HMM Models Without Vehicle Tracking", IEEE Intelligent Vehicles Symposium (IV), June 2004, pp. 188-193.
BibTeX TR2004-019 PDF
- @inproceedings{Porikli2004jun2,
- author = {Porikli, F.M. and Li, X.},
- title = {{Traffic Congestion Estimation Using HMM Models Without Vehicle Tracking}},
- booktitle = {IEEE Intelligent Vehicles Symposium (IV)},
- year = 2004,
- pages = {188--193},
- month = jun,
- url = {https://www.merl.com/publications/TR2004-019}
- }
Xie, L., Xu, P., Chang, S.-F., Divakaran, A., Sun, H., "Structure Analysis of Soccer Video with Domain Knowledge and Hidden Markov Models", Pattern Recognition Letters, Vol. 25, No. 7, pp. 767-775, May 2004.
BibTeX TR2004-081 PDF
- @article{Xie2004may,
- author = {Xie, L. and Xu, P. and Chang, S.-F. and Divakaran, A. and Sun, H.},
- title = {{Structure Analysis of Soccer Video with Domain Knowledge and Hidden Markov Models}},
- journal = {Pattern Recognition Letters},
- year = 2004,
- volume = 25,
- number = 7,
- pages = {767--775},
- month = may,
- publisher = {Elsevier},
- url = {https://www.merl.com/publications/TR2004-081}
- }
Porikli, F.M., "Trajectory Distance Metric Using Hidden Markov Model Based Representation", European Conference on Computer Vision (ECCV), May 2004.
BibTeX TR2004-030 PDF
- @inproceedings{Porikli2004may3,
- author = {Porikli, F.M.},
- title = {{Trajectory Distance Metric Using Hidden Markov Model Based Representation}},
- booktitle = {European Conference on Computer Vision (ECCV)},
- year = 2004,
- month = may,
- url = {https://www.merl.com/publications/TR2004-030}
- }
Radhakrishan, R., Xiong, Z., Divakaran, A., Ishikawa, Y., "Generation of Sports Highlights Using a Combination of Supervised & Unsupervised Learning in Audio Domain", IEEE Pacific-Rim Conference on Multimedia (PCM), December 2003, vol. 2, pp. 935-939.
BibTeX TR2003-144 PDF
- @inproceedings{Radhakrishan2003dec,
- author = {Radhakrishan, R. and Xiong, Z. and Divakaran, A. and Ishikawa, Y.},
- title = {{Generation of Sports Highlights Using a Combination of Supervised \& Unsupervised Learning in Audio Domain}},
- booktitle = {IEEE Pacific-Rim Conference on Multimedia (PCM)},
- year = 2003,
- volume = 2,
- pages = {935--939},
- month = dec,
- url = {https://www.merl.com/publications/TR2003-144}
- }
Xie, L., Chang, S.-F., Divakaran, A., Sun, H., "Unsupervised Mining of Statistical Temporal Structures in Video" in Video Mining, DOI: 10.1007/978-1-4757-6928-9_10, Springer, October 2003.
BibTeX TR2003-132 PDF
- @incollection{Xie2003oct,
- author = {Xie, L. and Chang, S.-F. and Divakaran, A. and Sun, H.},
- title = {{Unsupervised Mining of Statistical Temporal Structures in Video}},
- booktitle = {Video Mining},
- year = 2003,
- month = oct,
- publisher = {Springer},
- doi = {10.1007/978-1-4757-6928-9_10},
- url = {https://www.merl.com/publications/TR2003-132}
- }