Publications

36 / 3,599 publications found.


  •  Karafiat, M., Baskar, M.K., Watanabe, S., Hori, T., Wiesner, M., Cernocky, J.H., "Analysis of Multilingual Sequence-to-Sequence Speech Recognition Systems", Interspeech, DOI: 10.21437/​Interspeech.2019-2355/​/​, September 2019, pp. 2019-2355.
    BibTeX TR2019-103 PDF
    • @inproceedings{Karafiat2019sep,
    • author = {Karafiat, Martin and Baskar, Murali Karthick and Watanabe, Shinji and Hori, Takaaki and Wiesner, Matthew and Cernocky, Jan, Honza},
    • title = {Analysis of Multilingual Sequence-to-Sequence Speech Recognition Systems},
    • booktitle = {Interspeech},
    • year = 2019,
    • pages = {2019--2355},
    • month = sep,
    • doi = {10.21437/Interspeech.2019-2355//},
    • url = {https://www.merl.com/publications/TR2019-103}
    • }
  •  Moritz, N., Hori, T., Le Roux, J., "Unidirectional Neural Network Architectures for End-to-End Automatic Speech Recognition", Interspeech, DOI: 10.21437/​Interspeech.2019-2837, September 2019, pp. 76-80.
    BibTeX TR2019-098 PDF
    • @inproceedings{Moritz2019sep,
    • author = {Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
    • title = {Unidirectional Neural Network Architectures for End-to-End Automatic Speech Recognition},
    • booktitle = {Interspeech},
    • year = 2019,
    • pages = {76--80},
    • month = sep,
    • doi = {10.21437/Interspeech.2019-2837},
    • url = {https://www.merl.com/publications/TR2019-098}
    • }
  •  Ochiai, T., Watanabe, S., Katagiri, S., Hori, T., Hershey, J.R., "Speaker Adaptation for Multichannel End-to-End Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP.2018.8462161, April 2018, pp. 6707-6711.
    BibTeX TR2018-006 PDF
    • @inproceedings{Ochiai2018apr,
    • author = {Ochiai, Tsubasa and Watanabe, Shinji and Katagiri, Shigeru and Hori, Takaaki and Hershey, John R.},
    • title = {Speaker Adaptation for Multichannel End-to-End Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2018,
    • pages = {6707--6711},
    • month = apr,
    • doi = {10.1109/ICASSP.2018.8462161},
    • url = {https://www.merl.com/publications/TR2018-006}
    • }
  •  Ochiai, T., Watanabe, S., Hori, T., Hershey, J.R., Xiao, X., "Unified Architecture for Multichannel End-to-End Speech Recognition with Neural Beamforming", IEEE Journal of Selected Topics in Signal Processing, DOI: 10.1109/​JSTSP.2017.2764276, Vol. 11, No. 8, pp. 1274-1288, October 2017.
    BibTeX TR2017-192 PDF
    • @article{Ochiai2017oct2,
    • author = {Ochiai, Tsubasa and Watanabe, Shinji and Hori, Takaaki and Hershey, John R. and Xiao, Xiong},
    • title = {Unified Architecture for Multichannel End-to-End Speech Recognition with Neural Beamforming},
    • journal = {IEEE Journal of Selected Topics in Signal Processing},
    • year = 2017,
    • volume = 11,
    • number = 8,
    • pages = {1274--1288},
    • month = oct,
    • doi = {10.1109/JSTSP.2017.2764276},
    • issn = {1941-0484},
    • url = {https://www.merl.com/publications/TR2017-192}
    • }
  •  Watanabe, S., Hori, T., Kim, S., Hershey, J.R., Hayashi, T., "Hybrid CTC/Attention Architecture for End-to-End Speech Recognition", IEEE Journal of Selected Topics in Signal Processing, DOI: 10.1109/​JSTSP.2017.2763455, Vol. 11, No. 8, pp. 1240-1253, October 2017.
    BibTeX TR2017-190 PDF Video
    • @article{Watanabe2017oct,
    • author = {Watanabe, Shinji and Hori, Takaaki and Kim, Suyoun and Hershey, John R. and Hayashi, Tomoki},
    • title = {Hybrid CTC/Attention Architecture for End-to-End Speech Recognition},
    • journal = {IEEE Journal of Selected Topics in Signal Processing},
    • year = 2017,
    • volume = 11,
    • number = 8,
    • pages = {1240--1253},
    • month = oct,
    • doi = {10.1109/JSTSP.2017.2763455},
    • issn = {1941-0484},
    • url = {https://www.merl.com/publications/TR2017-190}
    • }
  •  Hayashi, T., Watanabe, S., Toda, T., Hori, T., Le Roux, J., Takeda, K., "Duration-Controlled LSTM for Polyphonic Sound Event Detection", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/​TASLP.2017.2740002, Vol. 25, No. 11, August 2017.
    BibTeX TR2017-150 PDF
    • @article{Hayashi2017aug,
    • author = {Hayashi, Tomoki and Watanabe, Shinji and Toda, Tomoki and Hori, Takaaki and Le Roux, Jonathan and Takeda, Kazuya},
    • title = {Duration-Controlled LSTM for Polyphonic Sound Event Detection},
    • journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
    • year = 2017,
    • volume = 25,
    • number = 11,
    • month = aug,
    • doi = {10.1109/TASLP.2017.2740002},
    • issn = {2329-9304},
    • url = {https://www.merl.com/publications/TR2017-150}
    • }
  •  Ochiai, T., Watanabe, S., Hori, T., Hershey, J.R., "Multichannel End-to-end Speech Recognition", International Conference on Machine Learning (ICML), August 2017.
    BibTeX TR2017-107 PDF
    • @inproceedings{Ochiai2017aug,
    • author = {Ochiai, Tsubasa and Watanabe, Shinji and Hori, Takaaki and Hershey, John R.},
    • title = {Multichannel End-to-end Speech Recognition},
    • booktitle = {International Conference on Machine Learning (ICML)},
    • year = 2017,
    • month = aug,
    • url = {https://www.merl.com/publications/TR2017-107}
    • }
  •  Hayashi, T., Watanabe, S., Toda, T., Hori, T., Le Roux, J., Takeda, K., "BLSTM-HMM Hybrid System Combined with Sound Activity Detection Network for Polyphonic Sound Event Detection", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
    BibTeX TR2017-014 PDF
    • @inproceedings{Hayashi2017mar,
    • author = {Hayashi, Tomoki and Watanabe, Shinji and Toda, Tomoki and Hori, Takaaki and Le Roux, Jonathan and Takeda, Kazuya},
    • title = {BLSTM-HMM Hybrid System Combined with Sound Activity Detection Network for Polyphonic Sound Event Detection},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2017,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2017-014}
    • }
  •  Hayashi, T., Watanabe, S., Toda, T., Hori, T., Le Roux, J., Takeda, K., "Bidirectional LSTM-HMM Hybrid System for Polyphonic Sound Event Detection", Workshop on Detection and Classification of Acoustic Scenes and Events (DCASE), September 2016, pp. 35-39.
    BibTeX TR2016-114 PDF
    • @inproceedings{Hayashi2016sep,
    • author = {Hayashi, Tomoki and Watanabe, Shinji and Toda, Tomoki and Hori, Takaaki and Le Roux, Jonathan and Takeda, Kazuya},
    • title = {Bidirectional LSTM-HMM Hybrid System for Polyphonic Sound Event Detection},
    • booktitle = {Workshop on Detection and Classification of Acoustic Scenes and Events (DCASE)},
    • year = 2016,
    • pages = {35--39},
    • month = sep,
    • url = {https://www.merl.com/publications/TR2016-114}
    • }
  •  Watanabe, S., Nakamura, A., Juang, B.-H., "Structural Bayesian Linear Regression for Hidden Markov Models", Journal of Signal Processing Systems, DOI: 10.1007/​s11265-013-0785-8, Vol. 74, No. 3, pp. 341-358, March 2014.
    BibTeX TR2013-071 PDF
    • @article{Watanabe2013aug,
    • author = {Watanabe, S. and Nakamura, A. and Juang, B.-H.},
    • title = {Structural Bayesian Linear Regression for Hidden Markov Models},
    • journal = {Journal of Signal Processing Systems},
    • year = 2014,
    • volume = 74,
    • number = 3,
    • pages = {341--358},
    • month = mar,
    • doi = {10.1007/s11265-013-0785-8},
    • issn = {1939-8018},
    • url = {https://www.merl.com/publications/TR2013-071}
    • }
  •  Gales, M., Watanabe, S., Fosler-Lussier, E., "Structured Discriminative Models For Speech Recognition", IEEE Signal Processing Magazine, Vol. 29, No. 6, pp. 70-81, November 2012.
    BibTeX TR2012-072 PDF
    • @article{Gales2012nov,
    • author = {Gales, M. and Watanabe, S. and Fosler-Lussier, E.},
    • title = {Structured Discriminative Models For Speech Recognition},
    • journal = {IEEE Signal Processing Magazine},
    • year = 2012,
    • volume = 29,
    • number = 6,
    • pages = {70--81},
    • month = nov,
    • url = {https://www.merl.com/publications/TR2012-072}
    • }
  •  Polatkan, G., Tuzel, O., "Compressed Inference for Probabilistic Sequential Models", Conference on Uncertainty in Artificial Intelligence (UAI), July 2011.
    BibTeX TR2011-051 PDF
    • @inproceedings{Polatkan2011jul,
    • author = {Polatkan, G. and Tuzel, O.},
    • title = {Compressed Inference for Probabilistic Sequential Models},
    • booktitle = {Conference on Uncertainty in Artificial Intelligence (UAI)},
    • year = 2011,
    • month = jul,
    • url = {https://www.merl.com/publications/TR2011-051}
    • }
  •  Pathak, M., Rane, S., Sun, W., Raj, B., "Privacy Preserving Probabilistic Inference with Hidden Markov Models", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP.2011.5947696, May 2011, pp. 5868-5871.
    BibTeX TR2011-017 PDF
    • @inproceedings{Pathak2011may,
    • author = {Pathak, M. and Rane, S. and Sun, W. and Raj, B.},
    • title = {Privacy Preserving Probabilistic Inference with Hidden Markov Models},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2011,
    • pages = {5868--5871},
    • month = may,
    • doi = {10.1109/ICASSP.2011.5947696},
    • url = {https://www.merl.com/publications/TR2011-017}
    • }
  •  Mei, X., Porikli, F., "Joint Tracking and Video Registration by Factorial Hidden Markov Models", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2008, pp. 973-976.
    BibTeX TR2008-010 PDF
    • @inproceedings{Mei2008mar,
    • author = {Mei, X. and Porikli, F.},
    • title = {Joint Tracking and Video Registration by Factorial Hidden Markov Models},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2008,
    • pages = {973--976},
    • month = mar,
    • issn = {1520-6149},
    • url = {https://www.merl.com/publications/TR2008-010}
    • }
  •  Wren, C., Ivanov, Y., Kaur, I., Leigh, D., Westhues, J., "SocialMotion: Measuring the Hidden Social Life of a Building", International Symposium on Location- and Context-Awareness (LoCA), September 2007, vol. 4718, pp. 85-102.
    BibTeX TR2007-034 PDF
    • @inproceedings{Wren2007sep,
    • author = {Wren, C. and Ivanov, Y. and Kaur, I. and Leigh, D. and Westhues, J.},
    • title = {SocialMotion: Measuring the Hidden Social Life of a Building},
    • booktitle = {International Symposium on Location- and Context-Awareness (LoCA)},
    • year = 2007,
    • volume = 4718,
    • series = {Lecture Notes in Computer Science},
    • pages = {85--102},
    • month = sep,
    • isbn = {978-3-540-75159-5},
    • url = {https://www.merl.com/publications/TR2007-034}
    • }
  •  Smaragdis, P., Boufounos, P., "Learning Source Trajectories Using Wrapped-Phase Hidden Markov Models", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), October 2005, pp. 114-117.
    BibTeX TR2005-138 PDF
    • @inproceedings{Smaragdis2005oct,
    • author = {Smaragdis, P. and Boufounos, P.},
    • title = {Learning Source Trajectories Using Wrapped-Phase Hidden Markov Models},
    • booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
    • year = 2005,
    • pages = {114--117},
    • month = oct,
    • url = {https://www.merl.com/publications/TR2005-138}
    • }
  •  Xie, L., Kennedy, L., Chang, S.-F., Divakaran, A., Sun, H., Lin, C.-Y., "Layered Dynamic Mixture Model for Pattern Discovery in Asynchronous Multi-Modal Streams", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2005, vol. 2, pp. 1053-1056.
    BibTeX TR2005-078 PDF
    • @inproceedings{Xie2005mar,
    • author = {Xie, L. and Kennedy, L. and Chang, S.-F. and Divakaran, A. and Sun, H. and Lin, C.-Y.},
    • title = {Layered Dynamic Mixture Model for Pattern Discovery in Asynchronous Multi-Modal Streams},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2005,
    • volume = 2,
    • pages = {1053--1056},
    • month = mar,
    • issn = {1520-6149},
    • url = {https://www.merl.com/publications/TR2005-078}
    • }
  •  Li, X., Porikli, F.M., "A Hidden Markov Model Framework for Traffic Event Detection Using Video Features", IEEE International Conference on Image Processing (ICIP), October 2004, vol. 5, pp. 2901-2904.
    BibTeX TR2004-130 PDF
    • @inproceedings{Li2004oct,
    • author = {Li, X. and Porikli, F.M.},
    • title = {A Hidden Markov Model Framework for Traffic Event Detection Using Video Features},
    • booktitle = {IEEE International Conference on Image Processing (ICIP)},
    • year = 2004,
    • volume = 5,
    • pages = {2901--2904},
    • month = oct,
    • issn = {1522-4880},
    • url = {https://www.merl.com/publications/TR2004-130}
    • }
  •  Xie, L., Kennedy, L., Chang, S.-F., Divakaran, A., Sun, H., Lin, C.-Y., "Discovering Meaningful Multimedia Patterns with Audio-Visual Concepts and Associated Text", IEEE International Conference on Image Processing (ICIP), October 2004, vol. 4, pp. 2383-2386.
    BibTeX TR2004-128 PDF
    • @inproceedings{Xie2004oct,
    • author = {Xie, L. and Kennedy, L. and Chang, S.-F. and Divakaran, A. and Sun, H. and Lin, C.-Y.},
    • title = {Discovering Meaningful Multimedia Patterns with Audio-Visual Concepts and Associated Text},
    • booktitle = {IEEE International Conference on Image Processing (ICIP)},
    • year = 2004,
    • volume = 4,
    • pages = {2383--2386},
    • month = oct,
    • issn = {1522-4880},
    • url = {https://www.merl.com/publications/TR2004-128}
    • }
  •  Radhakrishnan, R., Xiong, Z., Divakaran, A., Memon, N., "Time Series Analysis and Segmentation Using Eigenvectors for Mining Semantic Audio Label Sequences", IEEE International Conference on Multimedia and Expo (ICME), June 2004.
    BibTeX TR2004-063 PDF
    • @inproceedings{Radhakrishnan2004jun,
    • author = {Radhakrishnan, R. and Xiong, Z. and Divakaran, A. and Memon, N.},
    • title = {Time Series Analysis and Segmentation Using Eigenvectors for Mining Semantic Audio Label Sequences},
    • booktitle = {IEEE International Conference on Multimedia and Expo (ICME)},
    • year = 2004,
    • month = jun,
    • url = {https://www.merl.com/publications/TR2004-063}
    • }
  •  Porikli, F.M., Li, X., "Traffic Congestion Estimation Using HMM Models Without Vehicle Tracking", IEEE Intelligent Vehicles Symposium (IV), June 2004, pp. 188-193.
    BibTeX TR2004-019 PDF
    • @inproceedings{Porikli2004jun2,
    • author = {Porikli, F.M. and Li, X.},
    • title = {Traffic Congestion Estimation Using HMM Models Without Vehicle Tracking},
    • booktitle = {IEEE Intelligent Vehicles Symposium (IV)},
    • year = 2004,
    • pages = {188--193},
    • month = jun,
    • url = {https://www.merl.com/publications/TR2004-019}
    • }
  •  Xie, L., Xu, P., Chang, S.-F., Divakaran, A., Sun, H., "Structure Analysis of Soccer Video with Domain Knowledge and Hidden Markov Models", Pattern Recognition Letters, Vol. 25, No. 7, pp. 767-775, May 2004.
    BibTeX TR2004-081 PDF
    • @article{Xie2004may,
    • author = {Xie, L. and Xu, P. and Chang, S.-F. and Divakaran, A. and Sun, H.},
    • title = {Structure Analysis of Soccer Video with Domain Knowledge and Hidden Markov Models},
    • journal = {Pattern Recognition Letters},
    • year = 2004,
    • volume = 25,
    • number = 7,
    • pages = {767--775},
    • month = may,
    • publisher = {Elsevier},
    • url = {https://www.merl.com/publications/TR2004-081}
    • }
  •  Porikli, F.M., "Trajectory Distance Metric Using Hidden Markov Model Based Representation", European Conference on Computer Vision (ECCV), May 2004.
    BibTeX TR2004-030 PDF
    • @inproceedings{Porikli2004may3,
    • author = {Porikli, F.M.},
    • title = {Trajectory Distance Metric Using Hidden Markov Model Based Representation},
    • booktitle = {European Conference on Computer Vision (ECCV)},
    • year = 2004,
    • month = may,
    • url = {https://www.merl.com/publications/TR2004-030}
    • }
  •  Radhakrishan, R., Xiong, Z., Divakaran, A., Ishikawa, Y., "Generation of Sports Highlights Using a Combination of Supervised & Unsupervised Learning in Audio Domain", IEEE Pacific-Rim Conference on Multimedia (PCM), December 2003, vol. 2, pp. 935-939.
    BibTeX TR2003-144 PDF
    • @inproceedings{Radhakrishan2003dec,
    • author = {Radhakrishan, R. and Xiong, Z. and Divakaran, A. and Ishikawa, Y.},
    • title = {Generation of Sports Highlights Using a Combination of Supervised & Unsupervised Learning in Audio Domain},
    • booktitle = {IEEE Pacific-Rim Conference on Multimedia (PCM)},
    • year = 2003,
    • volume = 2,
    • pages = {935--939},
    • month = dec,
    • url = {https://www.merl.com/publications/TR2003-144}
    • }
  •  Xie, L., Chang, S.-F., Divakaran, A., Sun, H., "Unsupervised Mining of Statistical Temporal Structures in Video" in Video Mining, DOI: 10.1007/​978-1-4757-6928-9_10, Springer, October 2003.
    BibTeX TR2003-132 PDF
    • @incollection{Xie2003oct,
    • author = {Xie, L. and Chang, S.-F. and Divakaran, A. and Sun, H.},
    • title = {Unsupervised Mining of Statistical Temporal Structures in Video},
    • booktitle = {Video Mining},
    • year = 2003,
    • month = oct,
    • publisher = {Springer},
    • doi = {10.1007/978-1-4757-6928-9_10},
    • url = {https://www.merl.com/publications/TR2003-132}
    • }