Publications

Xie, Y., Liu, D., Mansour, H., Boufounos, P.T., "Robust Parameter Estimation of Contaminated Damped Exponentials", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP40776.2020.9053507, April 2020, pp. 5500-5504.
BibTeX TR2020-052 PDF Video
- @inproceedings{Xie2020apr,
- author = {Xie, Youye and Liu, Dehong and Mansour, Hassan and Boufounos, Petros T.},
- title = {{Robust Parameter Estimation of Contaminated Damped Exponentials}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2020,
- pages = {5500--5504},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP40776.2020.9053507},
- issn = {2379-190X},
- isbn = {978-1-5090-6631-5},
- url = {https://www.merl.com/publications/TR2020-052}
- }
Yu, L., Liu, D., Mansour, H., Boufounos, P.T., Ma, Y., "Blind Multi-Spectral Image Pan-Sharpening", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP40776.2020.9053554, April 2020, pp. 1429-1433.
BibTeX TR2020-047 PDF Video
- @inproceedings{Yu2020apr,
- author = {Yu, Lantao and Liu, Dehong and Mansour, Hassan and Boufounos, Petros T. and Ma, Yanting},
- title = {{Blind Multi-Spectral Image Pan-Sharpening}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2020,
- pages = {1429--1433},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP40776.2020.9053554},
- issn = {2379-190X},
- isbn = {978-1-5090-6631-5},
- url = {https://www.merl.com/publications/TR2020-047}
- }
Li, R., Wang, X., Mallidi, H., Watanabe, S., Hori, T., Hermansky, H., "Multi-Stream End-to-End Speech Recognition", IEEE/ACM Transactions on Audio, Speech and Language Processing, DOI: 10.1109/TASLP.2019.2959721, Vol. 28, pp. 646-655, March 2020.
BibTeX TR2020-030 PDF
- @article{Li2020mar,
- author = {Li, Ruizhi and Wang, Xiaofei and Mallidi, Harish and Watanabe, Shinji and Hori, Takaaki and Hermansky, Hynek},
- title = {{Multi-Stream End-to-End Speech Recognition}},
- journal = {IEEE/ACM Transactions on Audio, Speech and Language Processing},
- year = 2020,
- volume = 28,
- pages = {646--655},
- month = mar,
- doi = {10.1109/TASLP.2019.2959721},
- url = {https://www.merl.com/publications/TR2020-030}
- }
D’Haro, L.F., Yoshino, K., Hori, C., Marks, T.K., Polymenakos, L., Kummerfeld, J.K., Galley, M., Gao, X., "Overview of the seventh Dialog System Technology Challenge: DSTC7", Computer Speech and Language, DOI: 10.1016/j.csl.2020.101068, Vol. 62, March 2020.
BibTeX TR2020-029 PDF
- @article{D’Haro2020mar,
- author = {D’Haro, Luis Fernando and Yoshino, Koichiro and Hori, Chiori and Marks, Tim K. and Polymenakos, Lazaros and Kummerfeld, Jonathan K. and Galley, Michel and Gao, Xiang},
- title = {{Overview of the seventh Dialog System Technology Challenge: DSTC7}},
- journal = {Computer Speech and Language},
- year = 2020,
- volume = 62,
- month = mar,
- doi = {10.1016/j.csl.2020.101068},
- url = {https://www.merl.com/publications/TR2020-029}
- }
Aihara, R., Wichern, G., Le Roux, J., "Deep Clustering-based Single Channel Speech Separation and Recent Advances", Acoustical Science and Technology, DOI: 10.1250/ast.41.465, Vol. 41, No. 2, pp. 465-471, March 2020.
BibTeX TR2021-020 PDF
- @article{Aihara2020jun,
- author = {Aihara, Ryo and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{Deep Clustering-based Single Channel Speech Separation and Recent Advances}},
- journal = {Acoustical Science and Technology},
- year = 2020,
- volume = 41,
- number = 2,
- pages = {465--471},
- month = mar,
- doi = {10.1250/ast.41.465},
- url = {https://www.merl.com/publications/TR2021-020}
- }
Chang, X., Zhang, W., Qian, Y., Le Roux, J., Watanabe, S., "MIMO-Speech: End-to-End Multi-Channel Multi-Speaker Speech Recognition", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), December 2019, pp. 237-144.
BibTeX TR2019-157 PDF
- @inproceedings{Chang2019dec,
- author = {Chang, Xuankai and Zhang, Wangyou and Qian, Yanmin and {Le Roux}, Jonathan and Watanabe, Shinji},
- title = {{MIMO-Speech: End-to-End Multi-Channel Multi-Speaker Speech Recognition}},
- booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
- year = 2019,
- pages = {237--144},
- month = dec,
- isbn = {978-1-7281-0305-1},
- url = {https://www.merl.com/publications/TR2019-157}
- }
Karita, S., Chen, N., Hayashi, T., Hori, T., Inaguma, H., Jiang, Z., Someki, M., Enrique Yalta Soplin, N., Yamamoto, R., Wang, X., Watanabe, S., Yoshimura, T., Zhang, W., "A Comparative Study on Transformer Vs RNN in Speech Applications", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/ASRU46091.2019.9003750, December 2019, pp. 449-456.
BibTeX TR2019-158 PDF
- @inproceedings{Karita2019dec,
- author = {Karita, Shigeki and Chen, Nanxin and Hayashi, Tomoki and Hori, Takaaki and Inaguma, Hirofumi and Jiang, Ziyan and Someki, Masao and Enrique Yalta Soplin, Nelson and Yamamoto, Ryuichi and Wang, Xiaofei and Watanabe, Shinji and Yoshimura, Takenori and Zhang, Wangyou},
- title = {{A Comparative Study on Transformer Vs RNN in Speech Applications}},
- booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
- year = 2019,
- pages = {449--456},
- month = dec,
- doi = {10.1109/ASRU46091.2019.9003750},
- url = {https://www.merl.com/publications/TR2019-158}
- }
Moritz, N., Hori, T., Le Roux, J., "Streaming End-to-End Speech Recognition with Joint CTC-Attention Based Models", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), December 2019, pp. 936-943.
BibTeX TR2019-159 PDF
- @inproceedings{Moritz2019dec,
- author = {Moritz, Niko and Hori, Takaaki and {Le Roux}, Jonathan},
- title = {{Streaming End-to-End Speech Recognition with Joint CTC-Attention Based Models}},
- booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
- year = 2019,
- pages = {936--943},
- month = dec,
- isbn = {978-1-7281-0305-1},
- url = {https://www.merl.com/publications/TR2019-159}
- }
Kavalerov, I., Wisdom, S., Erdogan, H., Patton, B., Wilson, K., Le Roux, J., Hershey, J., "Universal Sound Separation", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/WASPAA.2019.8937253, October 2019, pp. 170-174.
BibTeX TR2019-123 PDF
- @inproceedings{Kavalerov2019oct,
- author = {Kavalerov, Ilya and Wisdom, Scott and Erdogan, Hakan and Patton, Brian and Wilson, Kevin and {Le Roux}, Jonathan and Hershey, John},
- title = {{Universal Sound Separation}},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2019,
- pages = {170--174},
- month = oct,
- doi = {10.1109/WASPAA.2019.8937253},
- issn = {1947-1629},
- isbn = {978-1-7281-1123-0},
- url = {https://www.merl.com/publications/TR2019-123}
- }
Baskar, M.K., Watanabe, S., Astudillo, R., Hori, T., Burget, L., Cernocky, J.H., "Semi-supervised Sequence-to-sequence ASR using Unpaired Speech and Text", Interspeech, DOI: 10.21437/Interspeech.2019-3167, September 2019, pp. 3790-3794.
BibTeX TR2019-100 PDF
- @inproceedings{Baskar2019sep,
- author = {Baskar, Murali Karthick and Watanabe, Shinji and Astudillo, Ramon and Hori, Takaaki and Burget, Lukas and Cernocky, Jan, Honza},
- title = {{Semi-supervised Sequence-to-sequence ASR using Unpaired Speech and Text}},
- booktitle = {Interspeech},
- year = 2019,
- pages = {3790--3794},
- month = sep,
- doi = {10.21437/Interspeech.2019-3167},
- issn = {1990-9772},
- url = {https://www.merl.com/publications/TR2019-100}
- }
Karafiat, M., Baskar, M.K., Watanabe, S., Hori, T., Wiesner, M., Cernocky, J.H., "Analysis of Multilingual Sequence-to-Sequence Speech Recognition Systems", Interspeech, DOI: 10.21437/Interspeech.2019-2355//, September 2019, pp. 2019-2355.
BibTeX TR2019-103 PDF
- @inproceedings{Karafiat2019sep,
- author = {Karafiat, Martin and Baskar, Murali Karthick and Watanabe, Shinji and Hori, Takaaki and Wiesner, Matthew and Cernocky, Jan, Honza},
- title = {{Analysis of Multilingual Sequence-to-Sequence Speech Recognition Systems}},
- booktitle = {Interspeech},
- year = 2019,
- pages = {2019--2355},
- month = sep,
- doi = {10.21437/Interspeech.2019-2355//},
- url = {https://www.merl.com/publications/TR2019-103}
- }
Moritz, N., Hori, T., Le Roux, J., "Unidirectional Neural Network Architectures for End-to-End Automatic Speech Recognition", Interspeech, DOI: 10.21437/Interspeech.2019-2837, September 2019, pp. 76-80.
BibTeX TR2019-098 PDF
- @inproceedings{Moritz2019sep,
- author = {Moritz, Niko and Hori, Takaaki and {Le Roux}, Jonathan},
- title = {{Unidirectional Neural Network Architectures for End-to-End Automatic Speech Recognition}},
- booktitle = {Interspeech},
- year = 2019,
- pages = {76--80},
- month = sep,
- doi = {10.21437/Interspeech.2019-2837},
- url = {https://www.merl.com/publications/TR2019-098}
- }
Seki, H., Hori, T., Watanabe, S., Le Roux, J., Hershey, J., "End-to-End Multilingual Multi-Speaker Speech Recognition", Interspeech, DOI: 10.21437/Interspeech.2019-3038, September 2019, pp. 3755-3759.
BibTeX TR2019-101 PDF
- @inproceedings{Seki2019sep,
- author = {Seki, Hiroshi and Hori, Takaaki and Watanabe, Shinji and {Le Roux}, Jonathan and Hershey, John},
- title = {{End-to-End Multilingual Multi-Speaker Speech Recognition}},
- booktitle = {Interspeech},
- year = 2019,
- pages = {3755--3759},
- month = sep,
- doi = {10.21437/Interspeech.2019-3038},
- url = {https://www.merl.com/publications/TR2019-101}
- }
Seki, H., Hori, T., Watanabe, S., Moritz, N., Le Roux, J., "Vectorized Beam Search for CTC-Attention-based Speech Recognition", Interspeech, DOI: 10.21437/Interspeech.2019-2860, September 2019, pp. 3825-3829.
BibTeX TR2019-102 PDF
- @inproceedings{Seki2019sep2,
- author = {Seki, Hiroshi and Hori, Takaaki and Watanabe, Shinji and Moritz, Niko and {Le Roux}, Jonathan},
- title = {{Vectorized Beam Search for CTC-Attention-based Speech Recognition}},
- booktitle = {Interspeech},
- year = 2019,
- pages = {3825--3829},
- month = sep,
- doi = {10.21437/Interspeech.2019-2860},
- url = {https://www.merl.com/publications/TR2019-102}
- }
Wichern, G., McQuinn, E., Antognini, J., Flynn, M., Zhu, R., Crow, D., Manilow, E., Le Roux, J., "WHAM!: Extending Speech Separation to Noisy Environments", Interspeech, DOI: 10.21437/Interspeech.2019-2821, September 2019, pp. 1368-1372.
BibTeX TR2019-099 PDF
- @inproceedings{Wichern2019sep,
- author = {Wichern, Gordon and McQuinn, Emmett and Antognini, Joe and Flynn, Michael and Zhu, Richard and Crow, Dwight and Manilow, Ethan and {Le Roux}, Jonathan},
- title = {{WHAM!: Extending Speech Separation to Noisy Environments}},
- booktitle = {Interspeech},
- year = 2019,
- pages = {1368--1372},
- month = sep,
- doi = {10.21437/Interspeech.2019-2821},
- url = {https://www.merl.com/publications/TR2019-099}
- }
Yalta, N., Watanabe, S., Hori, T., Nakadai, K., Ogata, T., "CNN-based Multichannel End-to-End Speech Recognition for Everyday Home Environments", European Signal Processing Conference (EUSIPCO), DOI: 10.23919/EUSIPCO.2019.8902524, September 2019, pp. 1-5.
BibTeX TR2019-094 PDF
- @inproceedings{Yalta2019sep,
- author = {Yalta, Nelson and Watanabe, Shinji and Hori, Takaaki and Nakadai, Kazuhiro and Ogata, Tetsuya},
- title = {{CNN-based Multichannel End-to-End Speech Recognition for Everyday Home Environments}},
- booktitle = {European Signal Processing Conference (EUSIPCO)},
- year = 2019,
- pages = {1--5},
- month = sep,
- doi = {10.23919/EUSIPCO.2019.8902524},
- url = {https://www.merl.com/publications/TR2019-094}
- }
Duan, C., Chen, S., Tian, D., Moura, J., Kovacevic, J., "Deep Graph Topology Learning for 3D Point Cloud Reconstruction", Graph Signal Processing Workshop (GSP), June 2019.
BibTeX TR2019-046 PDF
- @inproceedings{Duan2019jun,
- author = {Duan, Chaojing and Chen, Siheng and Tian, Dong and Moura, Jose and Kovacevic, Jelena},
- title = {{Deep Graph Topology Learning for 3D Point Cloud Reconstruction}},
- booktitle = {Graph Signal Processing Workshop (GSP)},
- year = 2019,
- month = jun,
- url = {https://www.merl.com/publications/TR2019-046}
- }
Aihara, R., Hanazawa, T., Okato, Y., Wichern, G., Le Roux, J., "Teacher-Student Deep Clustering For Low-Delay Channel Speech Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8682695, May 2019.
BibTeX TR2019-003 PDF
- @inproceedings{Aihara2019may,
- author = {Aihara, Ryo and Hanazawa, Toshiyuki and Okato, Yohei and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{Teacher-Student Deep Clustering For Low-Delay Channel Speech Separation}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- month = may,
- doi = {10.1109/ICASSP.2019.8682695},
- url = {https://www.merl.com/publications/TR2019-003}
- }
Baskar, M.K., Burget, L., Watanabe, S., Karafiat, M., Hori, T., Cernocky, J.H., "Promising Accurate Prefix Boosting for Sequence-to-Sequence ASR", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8682782, May 2019, pp. 5646-5650.
BibTeX TR2019-006 PDF
- @inproceedings{Baskar2019may,
- author = {Baskar, Murali Karthick and Burget, Lukas and Watanabe, Shinji and Karafiat, Martin and Hori, Takaaki and Cernocky, Jan, Honza},
- title = {{Promising Accurate Prefix Boosting for Sequence-to-Sequence ASR}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- pages = {5646--5650},
- month = may,
- doi = {10.1109/ICASSP.2019.8682782},
- issn = {2379-190X},
- isbn = {978-1-4799-8131-1},
- url = {https://www.merl.com/publications/TR2019-006}
- }
Cho, J., Watanabe, S., Hori, T., Baskar, M.K., Inaguma, H., Villalba, J., Dehak, N., "Language Model Integration Based on Memory Control for Sequence to Sequence Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8683380, May 2019.
BibTeX TR2019-007 PDF
- @inproceedings{Cho2019may,
- author = {Cho, Jaejin and Watanabe, Shinji and Hori, Takaaki and Baskar, Murali Karthick and Inaguma, Hirofumi and Villalba, Jesus and Dehak, Najim},
- title = {{Language Model Integration Based on Memory Control for Sequence to Sequence Speech Recognition}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- month = may,
- doi = {10.1109/ICASSP.2019.8683380},
- url = {https://www.merl.com/publications/TR2019-007}
- }
Das, P., Kao, J.-Y., Ortega, A., Mansour, H., Vetro, A., Sawada, T., Minezawa, A., "Hand Graph Representations for Unsupervised Segmentation of Complex Activities", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8683643, May 2019.
BibTeX TR2019-009 PDF
- @inproceedings{Das2019may,
- author = {Das, Pratyusha and Kao, Jiun-Yu and Ortega, Antonio and Mansour, Hassan and Vetro, Anthony and Sawada, Tomoya and Minezawa, Akira},
- title = {{Hand Graph Representations for Unsupervised Segmentation of Complex Activities }},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- month = may,
- doi = {10.1109/ICASSP.2019.8683643},
- url = {https://www.merl.com/publications/TR2019-009}
- }
Hori, C., Alamri, H., Wang, J., Wichern, G., Hori, T., Cherian, A., Marks, T.K., Cartillier, V., Lopes, R., Das, A., Essa, I., Batra, D., Parikh, D., "End-to-End Audio Visual Scene-Aware Dialog Using Multimodal Attention-Based Video Features", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8682583, May 2019.
BibTeX TR2019-016 PDF
- @inproceedings{Hori2019may2,
- author = {Hori, Chiori and Alamri, Huda and Wang, Jue and Wichern, Gordon and Hori, Takaaki and Cherian, Anoop and Marks, Tim K. and Cartillier, Vincent and Lopes, Raphael and Das, Abhishek and Essa, Irfan and Batra, Dhruv and Parikh, Devi},
- title = {{End-to-End Audio Visual Scene-Aware Dialog Using Multimodal Attention-Based Video Features}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- month = may,
- doi = {10.1109/ICASSP.2019.8682583},
- url = {https://www.merl.com/publications/TR2019-016}
- }
Hori, T., Astudillo, R., Hayashi, T., Zhang, Y., Watanabe, S., Le Roux, J., "Cycle-Consistency Training for End-to-End Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8683307, May 2019.
BibTeX TR2019-002 PDF
- @inproceedings{Hori2019may,
- author = {Hori, Takaaki and Astudillo, Ramon and Hayashi, Tomoki and Zhang, Yu and Watanabe, Shinji and {Le Roux}, Jonathan},
- title = {{Cycle-Consistency Training for End-to-End Speech Recognition }},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- month = may,
- doi = {10.1109/ICASSP.2019.8683307},
- url = {https://www.merl.com/publications/TR2019-002}
- }
Kadu, A., Mansour, H., Boufounos, P.T., Liu, D., "Reflection Tomographic Imaging of Highly Scattering Objects Using Incremental Frequency Inversion", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8682393, May 2019.
BibTeX TR2019-012 PDF Video
- @inproceedings{Kadu2019may,
- author = {Kadu, Ajinkya and Mansour, Hassan and Boufounos, Petros T. and Liu, Dehong},
- title = {{Reflection Tomographic Imaging of Highly Scattering Objects Using Incremental Frequency Inversion}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- month = may,
- doi = {10.1109/ICASSP.2019.8682393},
- url = {https://www.merl.com/publications/TR2019-012}
- }
Le Roux, J., Wichern, G., Watanabe, S., Sarroff, A., Hershey, J., "The Phasebook: Building Complex Masks via Discrete Representations for Source Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8682587, May 2019.
BibTeX TR2019-008 PDF
- @inproceedings{LeRoux2019may2,
- author = {{Le Roux}, Jonathan and Wichern, Gordon and Watanabe, Shinji and Sarroff, Andy and Hershey, John},
- title = {{The Phasebook: Building Complex Masks via Discrete Representations for Source Separation}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- month = may,
- doi = {10.1109/ICASSP.2019.8682587},
- url = {https://www.merl.com/publications/TR2019-008}
- }