Publications

394 / 3,985 publications found.


  •  Watanabe, S., Boyer, F., Chang, X., Guo, P., Hayashi, T., Higuchi, Y., Hori, T., Huang, W.-C., Inaguma, H., Kamo, N., Shigeki, K., Li, C., Shi, J., Subramanian, A.S., Zhang, W., "The 2020 ESPNET Update: New Features, Broadened Applications, Performance Improvements, and Future Plans", IEEE Data Science and Learning Workshop (DSLW), DOI: 10.1109/​DSLW51110, June 2021, pp. 1-6.
    BibTeX TR2021-073 PDF
    • @inproceedings{Watanabe2021jun,
    • author = {Watanabe, Shinji and Boyer, Florian and Chang, Xuankai and Guo, Pengcheng and Hayashi, Tomoki and Higuchi, Yosuke and Hori, Takaaki and Huang, Wen-Chin and Inaguma, Hirofumi and Kamo, Naoyuki and Shigeki, Karita and Li, Chenda and Shi, Jing and Subramanian, Aswin S and Zhang, Wangyou},
    • title = {{The 2020 ESPNET Update: New Features, Broadened Applications, Performance Improvements, and Future Plans}},
    • booktitle = {IEEE Data Science and Learning Workshop (DSLW)},
    • year = 2021,
    • pages = {1--6},
    • month = jun,
    • publisher = {IEEE},
    • doi = {10.1109/DSLW51110},
    • isbn = {978-1-6654-2826-2},
    • url = {https://www.merl.com/publications/TR2021-073}
    • }
  •  Kim, S., Galley, M., Gunasekara, C., Lee, S., Atkinson, A., Peng, B., Schulz, H., Gao, J., Li, J., Adada, M., Huang, M., Lastras, L., Kummerfeld, J.K., Lasecki, W.S., Hori, C., Cherian, A., Marks, T.K., Rastogi, A., Zang, X., Sunkara, S., Gupta, R., "Overview of the Eighth Dialog System Technology Challenge: DSTC8", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/​TASLP.2021.3078368, May 2021.
    BibTeX TR2021-064 PDF
    • @article{Kim2021may,
    • author = {Kim, Seokhwan and Galley, Michel and Gunasekara, Chulaka and Lee, Sungjin and Atkinson, Adam and Peng, Baolin and Schulz, Hannes and Gao, Jianfeng and Li, Jinchao and Adada, Mahmoud and Huang, Minlie and Lastras, Luis and Kummerfeld, Jonathan K. and Lasecki, Walter S. and Hori, Chiori and Cherian, Anoop and Marks, Tim K. and Rastogi, Abhinav and Zang, Xiaoxue and Sunkara, Srinivas and Gupta, Raghav},
    • title = {{Overview of the Eighth Dialog System Technology Challenge: DSTC8}},
    • journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
    • year = 2021,
    • month = may,
    • doi = {10.1109/TASLP.2021.3078368},
    • issn = {2329-9290},
    • url = {https://www.merl.com/publications/TR2021-064}
    • }
  •  Hori, C., Tsuchiya, M., Chen, S., Cherian, A., Hori, T., Harsham, B.A., Marks, T.K., Le Roux, J., Sullivan, A., Vetro, A., "マルチモーダルセンシング情報に基づくScene-aware Interaction 技術", Society of Automotive Engineers of Japan, Vol. 75, No. 5, pp. 66-71, May 2021.
    BibTeX TR2021-042 PDF Video
    • @article{Hori2021may,
    • author = {Hori, Chiori and Tsuchiya, Masato and Chen, Siheng and Cherian, Anoop and Hori, Takaaki and Harsham, Bret A. and Marks, Tim K. and {Le Roux}, Jonathan and Sullivan, Alan and Vetro, Anthony},
    • title = {{マルチモーダルセンシング情報に基づくScene-aware Interaction 技術}},
    • journal = {Society of Automotive Engineers of Japan},
    • year = 2021,
    • volume = 75,
    • number = 5,
    • pages = {66--71},
    • month = may,
    • url = {https://www.merl.com/publications/TR2021-042}
    • }
  •  Hori, T., Moritz, N., Hori, C., Le Roux, J., "Transformer-based Long-context End-to-end Speech Recognition", Interspeech, DOI: 10.21437/​Interspeech.2020-2928, October 2020, pp. 5011-5015.
    BibTeX TR2020-139 PDF Presentation
    • @inproceedings{Hori2020oct,
    • author = {Hori, Takaaki and Moritz, Niko and Hori, Chiori and {Le Roux}, Jonathan},
    • title = {{Transformer-based Long-context End-to-end Speech Recognition}},
    • booktitle = {Interspeech},
    • year = 2020,
    • pages = {5011--5015},
    • month = oct,
    • doi = {10.21437/Interspeech.2020-2928},
    • issn = {1990-9772},
    • url = {https://www.merl.com/publications/TR2020-139}
    • }
  •  Jayashankar, T., Le Roux, J., Moulin, P., "Detecting Audio Attacks on ASR Systems with Dropout Uncertainty", Interspeech, DOI: 10.21437/​Interspeech.2020-1846, October 2020, pp. 4671-4675.
    BibTeX TR2020-137 PDF Presentation
    • @inproceedings{Jayashankar2020oct,
    • author = {Jayashankar, Tejas and {Le Roux}, Jonathan and Moulin, Pierre},
    • title = {{Detecting Audio Attacks on ASR Systems with Dropout Uncertainty}},
    • booktitle = {Interspeech},
    • year = 2020,
    • pages = {4671--4675},
    • month = oct,
    • doi = {10.21437/Interspeech.2020-1846},
    • issn = {1990-9772},
    • url = {https://www.merl.com/publications/TR2020-137}
    • }
  •  Moritz, N., Wichern, G., Hori, T., Le Roux, J., "All-in-One Transformer: Unifying Speech Recognition, Audio Tagging, and Event Detection", Interspeech, DOI: 10.21437/​Interspeech.2020-2757, October 2020, pp. 3112-3116.
    BibTeX TR2020-138 PDF Presentation
    • @inproceedings{Moritz2020oct,
    • author = {Moritz, Niko and Wichern, Gordon and Hori, Takaaki and {Le Roux}, Jonathan},
    • title = {{All-in-One Transformer: Unifying Speech Recognition, Audio Tagging, and Event Detection}},
    • booktitle = {Interspeech},
    • year = 2020,
    • pages = {3112--3116},
    • month = oct,
    • doi = {10.21437/Interspeech.2020-2757},
    • issn = {1990-9772},
    • url = {https://www.merl.com/publications/TR2020-138}
    • }
  •  Pishdadian, F., Wichern, G., Le Roux, J., "Finding Strength in Weakness: Learning to Separate Sounds with Weak Supervision", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/​TASLP.2020.3013105, Vol. 28, pp. 2386-2399, September 2020.
    BibTeX TR2020-126 PDF
    • @article{Pishdadian2020sep,
    • author = {Pishdadian, Fatemeh and Wichern, Gordon and {Le Roux}, Jonathan},
    • title = {{Finding Strength in Weakness: Learning to Separate Sounds with Weak Supervision}},
    • journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
    • year = 2020,
    • volume = 28,
    • pages = {2386--2399},
    • month = sep,
    • doi = {10.1109/TASLP.2020.3013105},
    • url = {https://www.merl.com/publications/TR2020-126}
    • }
  •  Seetharaman, P., Wichern, G., Le Roux, J., Pardo, B., "Bootstrapping Unsupervised Deep Music Separation from Primitive Auditory Grouping Principles", ICML 2020 Workshop on Self-supervision in Audio and Speech, July 2020.
    BibTeX TR2020-111 PDF
    • @inproceedings{Seetharaman2020jul,
    • author = {Seetharaman, Prem and Wichern, Gordon and {Le Roux}, Jonathan and Pardo, Bryan},
    • title = {{Bootstrapping Unsupervised Deep Music Separation from Primitive Auditory Grouping Principles}},
    • booktitle = {ICML 2020 Workshop on Self-supervision in Audio and Speech},
    • year = 2020,
    • month = jul,
    • url = {https://www.merl.com/publications/TR2020-111}
    • }
  •  Chang, X., Zhang, W., Qian, Y., Le Roux, J., Watanabe, S., "End-To-End Multi-Speaker Speech Recognition with Transformer", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP40776.2020.9054029, April 2020, pp. 6134-6138.
    BibTeX TR2020-043 PDF Video Presentation
    • @inproceedings{Chang2020apr,
    • author = {Chang, Xuankai and Zhang, Wangyou and Qian, Yanmin and {Le Roux}, Jonathan and Watanabe, Shinji},
    • title = {{End-To-End Multi-Speaker Speech Recognition with Transformer}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2020,
    • pages = {6134--6138},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP40776.2020.9054029},
    • issn = {2379-190X},
    • isbn = {978-1-5090-6631-5},
    • url = {https://www.merl.com/publications/TR2020-043}
    • }
  •  Liu, J., Chen, B., Chen, S., Berges, M., Bielak, J., Noh, H.Y., "Damage-Sensitive and Domain-Invariant Feature Extraction for Vehicle-Vibration-Based Bridge Health Monitoring", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP40776.2020.9053450, April 2020, pp. 3007-3011.
    BibTeX TR2020-053 PDF Video
    • @inproceedings{Liu2020apr,
    • author = {Liu, Jingxiao and Chen, Bingqing and Chen, Siheng and Berges, Mario and Bielak, Jacobo and Noh, Hae Young},
    • title = {{Damage-Sensitive and Domain-Invariant Feature Extraction for Vehicle-Vibration-Based Bridge Health Monitoring}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2020,
    • pages = {3007--3011},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP40776.2020.9053450},
    • issn = {2379-190X},
    • isbn = {978-1-5090-6631-5},
    • url = {https://www.merl.com/publications/TR2020-053}
    • }
  •  Pishdadian, F., Wichern, G., Le Roux, J., "Learning to Separate Sounds From Weakly Labeled Scenes", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP40776.2020.9053055, April 2020, pp. 91-95.
    BibTeX TR2020-038 PDF Video Presentation
    • @inproceedings{Pishdadian2020apr,
    • author = {Pishdadian, Fatemeh and Wichern, Gordon and {Le Roux}, Jonathan},
    • title = {{Learning to Separate Sounds From Weakly Labeled Scenes}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2020,
    • pages = {91--95},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP40776.2020.9053055},
    • issn = {2379-190X},
    • isbn = {978-1-5090-6631-5},
    • url = {https://www.merl.com/publications/TR2020-038}
    • }
  •  Al-Shabili, A., Mansour, H., Boufounos, P.T., "Learning Plug-and-Play Proximal Quasi-Newton Denoisers", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP40776.2020.9054537, April 2020, pp. 8896-8900.
    BibTeX TR2020-045 PDF Video
    • @inproceedings{Al-Shabili2020apr,
    • author = {Al-Shabili, Abdullah and Mansour, Hassan and Boufounos, Petros T.},
    • title = {{Learning Plug-and-Play Proximal Quasi-Newton Denoisers}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2020,
    • pages = {8896--8900},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP40776.2020.9054537},
    • issn = {2379-190X},
    • isbn = {978-1-5090-6631-5},
    • url = {https://www.merl.com/publications/TR2020-045}
    • }
  •  Maciejewski, M., Wichern, G., McQuinn, E., Le Roux, J., "WHAMR!: Noisy and Reverberant Single-Channel Speech Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP40776.2020.9053327, April 2020, pp. 696-700.
    BibTeX TR2020-042 PDF Video Presentation
    • @inproceedings{Maciejewski2020apr,
    • author = {Maciejewski, Matthew and Wichern, Gordon and McQuinn, Emmett and {Le Roux}, Jonathan},
    • title = {{WHAMR!: Noisy and Reverberant Single-Channel Speech Separation}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2020,
    • pages = {696--700},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP40776.2020.9053327},
    • issn = {2379-190X},
    • isbn = {978-1-5090-6631-5},
    • url = {https://www.merl.com/publications/TR2020-042}
    • }
  •  Ma, Y., Lodhi, M.A., Mansour, H., Boufounos, P.T., Liu, D., "Inverse Multiple Scattering With Phaseless Measurements", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP40776.2020.9053430, April 2020, pp. 1519-1523.
    BibTeX TR2020-041 PDF Video
    • @inproceedings{Ma2020apr,
    • author = {Ma, Yanting and Lodhi, Muhammad Asad and Mansour, Hassan and Boufounos, Petros T. and Liu, Dehong},
    • title = {{Inverse Multiple Scattering With Phaseless Measurements}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2020,
    • pages = {1519--1523},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP40776.2020.9053430},
    • issn = {2379-190X},
    • isbn = {978-1-5090-6631-5},
    • url = {https://www.merl.com/publications/TR2020-041}
    • }
  •  Moritz, N., Hori, T., Le Roux, J., "Streaming Automatic Speech Recognition With The Transformer Model", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP40776.2020.9054476, April 2020, pp. 6074-6078.
    BibTeX TR2020-040 PDF Video Presentation
    • @inproceedings{Moritz2020apr,
    • author = {Moritz, Niko and Hori, Takaaki and {Le Roux}, Jonathan},
    • title = {{Streaming Automatic Speech Recognition With The Transformer Model}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2020,
    • pages = {6074--6078},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP40776.2020.9054476},
    • issn = {2379-190X},
    • isbn = {978-1-5090-6631-5},
    • url = {https://www.merl.com/publications/TR2020-040}
    • }
  •  Sari, L., Moritz, N., Hori, T., Le Roux, J., "Unsupervised Speaker Adaptation Using Attention-Based Speaker Memory For End-To-End ASR", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP40776.2020.9054249, April 2020, pp. 7384-7388.
    BibTeX TR2020-037 PDF Video Presentation
    • @inproceedings{Sari2020apr,
    • author = {Sari, Leda and Moritz, Niko and Hori, Takaaki and {Le Roux}, Jonathan},
    • title = {{Unsupervised Speaker Adaptation Using Attention-Based Speaker Memory For End-To-End ASR}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2020,
    • pages = {7384--7388},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP40776.2020.9054249},
    • issn = {2379-190X},
    • isbn = {978-1-5090-6631-5},
    • url = {https://www.merl.com/publications/TR2020-037}
    • }
  •  Shi, L., Geng, S., Shuang, K., Hori, C., Liu, S., Gao, P., Su, S., "Multi-Layer Content Interaction Through Quaternion Product For Visual Question Answering", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP40776.2020.9053595, April 2020, pp. 4412-4416.
    BibTeX TR2020-046 PDF
    • @inproceedings{Shi2020apr,
    • author = {Shi, Lei and Geng, Shijie and Shuang, Kai and Hori, Chiori and Liu, Songxiang and Gao, Peng and Su, Sen},
    • title = {{Multi-Layer Content Interaction Through Quaternion Product For Visual Question Answering}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2020,
    • pages = {4412--4416},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP40776.2020.9053595},
    • issn = {2379-190X},
    • isbn = {978-1-5090-6631-5},
    • url = {https://www.merl.com/publications/TR2020-046}
    • }
  •  Wang, P., Boufounos, P.T., Mansour, H., Orlik, P.V., "Slow-Time MIMO-FMCW Automotive Radar Detection with Imperfect Waveform Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP40776.2020.9053892, April 2020, pp. 8634-8638.
    BibTeX TR2020-039 PDF Video
    • @inproceedings{Wang2020apr,
    • author = {Wang, Pu and Boufounos, Petros T. and Mansour, Hassan and Orlik, Philip V.},
    • title = {{Slow-Time MIMO-FMCW Automotive Radar Detection with Imperfect Waveform Separation}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2020,
    • pages = {8634--8638},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP40776.2020.9053892},
    • issn = {2379-190X},
    • isbn = {978-1-5090-6631-5},
    • url = {https://www.merl.com/publications/TR2020-039}
    • }
  •  Xia, Y., Wang, P., Berntorp, K., Koike-Akino, T., Mansour, H., Pajovic, M., Boufounos, P.T., Orlik, P.V., "Extended Object Tracking Using Hierarchical Truncation Measurement Model with Automotive Radar", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP40776.2020.9054614, April 2020, pp. 4900-4904.
    BibTeX TR2020-044 PDF Video
    • @inproceedings{Xia2020apr,
    • author = {Xia, Yuxuan and Wang, Pu and Berntorp, Karl and Koike-Akino, Toshiaki and Mansour, Hassan and Pajovic, Milutin and Boufounos, Petros T. and Orlik, Philip V.},
    • title = {{Extended Object Tracking Using Hierarchical Truncation Measurement Model with Automotive Radar}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2020,
    • pages = {4900--4904},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP40776.2020.9054614},
    • issn = {2379-190X},
    • isbn = {978-1-5090-6631-5},
    • url = {https://www.merl.com/publications/TR2020-044}
    • }
  •  Xie, Y., Liu, D., Mansour, H., Boufounos, P.T., "Robust Parameter Estimation of Contaminated Damped Exponentials", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP40776.2020.9053507, April 2020, pp. 5500-5504.
    BibTeX TR2020-052 PDF Video
    • @inproceedings{Xie2020apr,
    • author = {Xie, Youye and Liu, Dehong and Mansour, Hassan and Boufounos, Petros T.},
    • title = {{Robust Parameter Estimation of Contaminated Damped Exponentials}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2020,
    • pages = {5500--5504},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP40776.2020.9053507},
    • issn = {2379-190X},
    • isbn = {978-1-5090-6631-5},
    • url = {https://www.merl.com/publications/TR2020-052}
    • }
  •  Yu, L., Liu, D., Mansour, H., Boufounos, P.T., Ma, Y., "Blind Multi-Spectral Image Pan-Sharpening", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP40776.2020.9053554, April 2020, pp. 1429-1433.
    BibTeX TR2020-047 PDF Video
    • @inproceedings{Yu2020apr,
    • author = {Yu, Lantao and Liu, Dehong and Mansour, Hassan and Boufounos, Petros T. and Ma, Yanting},
    • title = {{Blind Multi-Spectral Image Pan-Sharpening}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2020,
    • pages = {1429--1433},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP40776.2020.9053554},
    • issn = {2379-190X},
    • isbn = {978-1-5090-6631-5},
    • url = {https://www.merl.com/publications/TR2020-047}
    • }
  •  Li, R., Wang, X., Mallidi, H., Watanabe, S., Hori, T., Hermansky, H., "Multi-Stream End-to-End Speech Recognition", IEEE/ACM Transactions on Audio, Speech and Language Processing, DOI: 10.1109/​TASLP.2019.2959721, Vol. 28, pp. 646-655, March 2020.
    BibTeX TR2020-030 PDF
    • @article{Li2020mar,
    • author = {Li, Ruizhi and Wang, Xiaofei and Mallidi, Harish and Watanabe, Shinji and Hori, Takaaki and Hermansky, Hynek},
    • title = {{Multi-Stream End-to-End Speech Recognition}},
    • journal = {IEEE/ACM Transactions on Audio, Speech and Language Processing},
    • year = 2020,
    • volume = 28,
    • pages = {646--655},
    • month = mar,
    • doi = {10.1109/TASLP.2019.2959721},
    • url = {https://www.merl.com/publications/TR2020-030}
    • }
  •  D’Haro, L.F., Yoshino, K., Hori, C., Marks, T.K., Polymenakos, L., Kummerfeld, J.K., Galley, M., Gao, X., "Overview of the seventh Dialog System Technology Challenge: DSTC7", Computer Speech and Language, DOI: 10.1016/​j.csl.2020.101068, Vol. 62, March 2020.
    BibTeX TR2020-029 PDF
    • @article{D’Haro2020mar,
    • author = {D’Haro, Luis Fernando and Yoshino, Koichiro and Hori, Chiori and Marks, Tim K. and Polymenakos, Lazaros and Kummerfeld, Jonathan K. and Galley, Michel and Gao, Xiang},
    • title = {{Overview of the seventh Dialog System Technology Challenge: DSTC7}},
    • journal = {Computer Speech and Language},
    • year = 2020,
    • volume = 62,
    • month = mar,
    • doi = {10.1016/j.csl.2020.101068},
    • url = {https://www.merl.com/publications/TR2020-029}
    • }
  •  Aihara, R., Wichern, G., Le Roux, J., "Deep Clustering-based Single Channel Speech Separation and Recent Advances", Acoustical Science and Technology, DOI: 10.1250/​ast.41.465, Vol. 41, No. 2, pp. 465-471, March 2020.
    BibTeX TR2021-020 PDF
    • @article{Aihara2020jun,
    • author = {Aihara, Ryo and Wichern, Gordon and {Le Roux}, Jonathan},
    • title = {{Deep Clustering-based Single Channel Speech Separation and Recent Advances}},
    • journal = {Acoustical Science and Technology},
    • year = 2020,
    • volume = 41,
    • number = 2,
    • pages = {465--471},
    • month = mar,
    • doi = {10.1250/ast.41.465},
    • url = {https://www.merl.com/publications/TR2021-020}
    • }
  •  Chang, X., Zhang, W., Qian, Y., Le Roux, J., Watanabe, S., "MIMO-Speech: End-to-End Multi-Channel Multi-Speaker Speech Recognition", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), December 2019, pp. 237-144.
    BibTeX TR2019-157 PDF
    • @inproceedings{Chang2019dec,
    • author = {Chang, Xuankai and Zhang, Wangyou and Qian, Yanmin and {Le Roux}, Jonathan and Watanabe, Shinji},
    • title = {{MIMO-Speech: End-to-End Multi-Channel Multi-Speaker Speech Recognition}},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2019,
    • pages = {237--144},
    • month = dec,
    • isbn = {978-1-7281-0305-1},
    • url = {https://www.merl.com/publications/TR2019-157}
    • }