Publications

169 / 3,803 publications found.


  •  Moritz, N., Hori, T., Le Roux, J., "Capturing Multi-Resolution Context by Dilated Self-Attention", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP39728.2021.9415001, June 2021, pp. 5869-5873.
    BibTeX TR2021-036 PDF
    • @inproceedings{Moritz2021jun,
    • author = {Moritz, Niko and Hori, Takaaki and {Le Roux}, Jonathan},
    • title = {{Capturing Multi-Resolution Context by Dilated Self-Attention}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2021,
    • pages = {5869--5873},
    • month = jun,
    • doi = {10.1109/ICASSP39728.2021.9415001},
    • url = {https://www.merl.com/publications/TR2021-036}
    • }
  •  Hung, Y.-N., Wichern, G., Le Roux, J., "Transcription Is All You Need: Learning to Separate Musical Mixtures with Score as Supervision", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP39728.2021.9413358, June 2021, pp. 46-50.
    BibTeX TR2021-069 PDF
    • @inproceedings{Hung2021jun,
    • author = {Hung, Yun-Ning and Wichern, Gordon and {Le Roux}, Jonathan},
    • title = {{Transcription Is All You Need: Learning to Separate Musical Mixtures with Score as Supervision}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2021,
    • pages = {46--50},
    • month = jun,
    • doi = {10.1109/ICASSP39728.2021.9413358},
    • issn = {2379-190X},
    • isbn = {978-1-7281-7605-5},
    • url = {https://www.merl.com/publications/TR2021-069}
    • }
  •  Khurana, S., Moritz, N., Hori, T., Le Roux, J., "Unsupervised Domain Adaptation For Speech Recognition via Uncertainty Driven Self-Training", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP39728.2021.9414299, June 2021, pp. 6553-6557.
    BibTeX TR2021-039 PDF
    • @inproceedings{Khurana2021jun,
    • author = {Khurana, Sameer and Moritz, Niko and Hori, Takaaki and {Le Roux}, Jonathan},
    • title = {{Unsupervised Domain Adaptation For Speech Recognition via Uncertainty Driven Self-Training}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2021,
    • pages = {6553--6557},
    • month = jun,
    • doi = {10.1109/ICASSP39728.2021.9414299},
    • url = {https://www.merl.com/publications/TR2021-039}
    • }
  •  Moritz, N., Hori, T., Le Roux, J., "Semi-Supervised Speech Recognition via Graph-Based Temporal Classification", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP39728.2021.9414058, June 2021, pp. 6548-6552.
    BibTeX TR2021-037 PDF
    • @inproceedings{Moritz2021jun2,
    • author = {Moritz, Niko and Hori, Takaaki and {Le Roux}, Jonathan},
    • title = {{Semi-Supervised Speech Recognition via Graph-Based Temporal Classification}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2021,
    • pages = {6548--6552},
    • month = jun,
    • doi = {10.1109/ICASSP39728.2021.9414058},
    • url = {https://www.merl.com/publications/TR2021-037}
    • }
  •  Hori, C., Tsuchiya, M., Chen, S., Cherian, A., Hori, T., Harsham, B.A., Marks, T.K., Le Roux, J., Sullivan, A., Vetro, A., "マルチモーダルセンシング情報に基づくScene-aware Interaction 技術", Society of Automotive Engineers of Japan, Vol. 75, No. 5, pp. 66-71, May 2021.
    BibTeX TR2021-042 PDF Video
    • @article{Hori2021may,
    • author = {Hori, Chiori and Tsuchiya, Masato and Chen, Siheng and Cherian, Anoop and Hori, Takaaki and Harsham, Bret A. and Marks, Tim K. and {Le Roux}, Jonathan and Sullivan, Alan and Vetro, Anthony},
    • title = {{マルチモーダルセンシング情報に基づくScene-aware Interaction 技術}},
    • journal = {Society of Automotive Engineers of Japan},
    • year = 2021,
    • volume = 75,
    • number = 5,
    • pages = {66--71},
    • month = may,
    • url = {https://www.merl.com/publications/TR2021-042}
    • }
  •  Geng, S., Gao, P., Chatterjee, M., Hori, C., Le Roux, J., Zhang, Y., Li, H., Cherian, A., "Dynamic Graph Representation Learning for Video Dialog via Multi-Modal Shuffled Transformers", AAAI Conference on Artificial Intelligence, February 2021, pp. 1415-1423.
    BibTeX TR2021-010 PDF
    • @inproceedings{Geng2021feb,
    • author = {Geng, Shijie and Gao, Peng and Chatterjee, Moitreya and Hori, Chiori and {Le Roux}, Jonathan and Zhang, Yongfeng and Li, Hongsheng and Cherian, Anoop},
    • title = {{Dynamic Graph Representation Learning for Video Dialog via Multi-Modal Shuffled Transformers}},
    • booktitle = {AAAI Conference on Artificial Intelligence},
    • year = 2021,
    • pages = {1415--1423},
    • month = feb,
    • publisher = {AAAI Press, Palo Alto, California USA},
    • isbn = {978-1-57735-866-4},
    • url = {https://www.merl.com/publications/TR2021-010}
    • }
  •  Hori, T., Moritz, N., Hori, C., Le Roux, J., "Transformer-based Long-context End-to-end Speech Recognition", Interspeech, DOI: 10.21437/​Interspeech.2020-2928, October 2020, pp. 5011-5015.
    BibTeX TR2020-139 PDF Presentation
    • @inproceedings{Hori2020oct,
    • author = {Hori, Takaaki and Moritz, Niko and Hori, Chiori and {Le Roux}, Jonathan},
    • title = {{Transformer-based Long-context End-to-end Speech Recognition}},
    • booktitle = {Interspeech},
    • year = 2020,
    • pages = {5011--5015},
    • month = oct,
    • doi = {10.21437/Interspeech.2020-2928},
    • issn = {1990-9772},
    • url = {https://www.merl.com/publications/TR2020-139}
    • }
  •  Jayashankar, T., Le Roux, J., Moulin, P., "Detecting Audio Attacks on ASR Systems with Dropout Uncertainty", Interspeech, DOI: 10.21437/​Interspeech.2020-1846, October 2020, pp. 4671-4675.
    BibTeX TR2020-137 PDF Presentation
    • @inproceedings{Jayashankar2020oct,
    • author = {Jayashankar, Tejas and {Le Roux}, Jonathan and Moulin, Pierre},
    • title = {{Detecting Audio Attacks on ASR Systems with Dropout Uncertainty}},
    • booktitle = {Interspeech},
    • year = 2020,
    • pages = {4671--4675},
    • month = oct,
    • doi = {10.21437/Interspeech.2020-1846},
    • issn = {1990-9772},
    • url = {https://www.merl.com/publications/TR2020-137}
    • }
  •  Moritz, N., Wichern, G., Hori, T., Le Roux, J., "All-in-One Transformer: Unifying Speech Recognition, Audio Tagging, and Event Detection", Interspeech, DOI: 10.21437/​Interspeech.2020-2757, October 2020, pp. 3112-3116.
    BibTeX TR2020-138 PDF Presentation
    • @inproceedings{Moritz2020oct,
    • author = {Moritz, Niko and Wichern, Gordon and Hori, Takaaki and {Le Roux}, Jonathan},
    • title = {{All-in-One Transformer: Unifying Speech Recognition, Audio Tagging, and Event Detection}},
    • booktitle = {Interspeech},
    • year = 2020,
    • pages = {3112--3116},
    • month = oct,
    • doi = {10.21437/Interspeech.2020-2757},
    • issn = {1990-9772},
    • url = {https://www.merl.com/publications/TR2020-138}
    • }
  •  Manilow, E., Wichern, G., Le Roux, J., "Hierarchical Musical Instrument Separation", International Society for Music Information Retrieval (ISMIR) Conference, October 2020, pp. 376-383.
    BibTeX TR2020-136 PDF Software
    • @inproceedings{Manilow2020oct,
    • author = {Manilow, Ethan and Wichern, Gordon and {Le Roux}, Jonathan},
    • title = {{Hierarchical Musical Instrument Separation}},
    • booktitle = {International Society for Music Information Retrieval (ISMIR) Conference},
    • year = 2020,
    • pages = {376--383},
    • month = oct,
    • isbn = {978-0-9813537-0-8},
    • url = {https://www.merl.com/publications/TR2020-136}
    • }
  •  Seetharaman, P., Wichern, G., Pardo, B., Le Roux, J., "Autoclip: Adaptive Gradient Clipping For Source Separation Networks", IEEE International Workshop on Machine Learning for Signal Processing (MLSP), DOI: 10.1109/​MLSP49062.2020.9231926, September 2020.
    BibTeX TR2020-132 PDF
    • @inproceedings{Seetharaman2020sep,
    • author = {Seetharaman, Prem and Wichern, Gordon and Pardo, Bryan and {Le Roux}, Jonathan},
    • title = {{Autoclip: Adaptive Gradient Clipping For Source Separation Networks}},
    • booktitle = {IEEE International Workshop on Machine Learning for Signal Processing (MLSP)},
    • year = 2020,
    • month = sep,
    • publisher = {IEEE},
    • doi = {10.1109/MLSP49062.2020.9231926},
    • url = {https://www.merl.com/publications/TR2020-132}
    • }
  •  Gao, P., Hori, C., Geng, S., Hori, T., Le Roux, J., "Multi-Pass Transformer for Machine Translation", arXiv, September 2020.
    BibTeX arXiv
    • @article{Gao2020sep,
    • author = {Gao, Peng and Hori, Chiori and Geng, Shijie and Hori, Takaaki and {Le Roux}, Jonathan},
    • title = {{Multi-Pass Transformer for Machine Translation}},
    • journal = {arXiv},
    • year = 2020,
    • month = sep,
    • url = {https://arxiv.org/abs/2009.11382}
    • }
  •  Pishdadian, F., Wichern, G., Le Roux, J., "Finding Strength in Weakness: Learning to Separate Sounds with Weak Supervision", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/​TASLP.2020.3013105, Vol. 28, pp. 2386-2399, September 2020.
    BibTeX TR2020-126 PDF
    • @article{Pishdadian2020sep,
    • author = {Pishdadian, Fatemeh and Wichern, Gordon and {Le Roux}, Jonathan},
    • title = {{Finding Strength in Weakness: Learning to Separate Sounds with Weak Supervision}},
    • journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
    • year = 2020,
    • volume = 28,
    • pages = {2386--2399},
    • month = sep,
    • doi = {10.1109/TASLP.2020.3013105},
    • url = {https://www.merl.com/publications/TR2020-126}
    • }
  •  Seetharaman, P., Wichern, G., Le Roux, J., Pardo, B., "Bootstrapping Unsupervised Deep Music Separation from Primitive Auditory Grouping Principles", ICML 2020 Workshop on Self-supervision in Audio and Speech, July 2020.
    BibTeX TR2020-111 PDF
    • @inproceedings{Seetharaman2020jul,
    • author = {Seetharaman, Prem and Wichern, Gordon and {Le Roux}, Jonathan and Pardo, Bryan},
    • title = {{Bootstrapping Unsupervised Deep Music Separation from Primitive Auditory Grouping Principles}},
    • booktitle = {ICML 2020 Workshop on Self-supervision in Audio and Speech},
    • year = 2020,
    • month = jul,
    • url = {https://www.merl.com/publications/TR2020-111}
    • }
  •  Geng, S., Gao, P., Hori, C., Le Roux, J., Cherian, A., "Spatio-Temporal Scene Graphs for Video Dialog", arXiv, July 2020.
    BibTeX arXiv
    • @article{Geng2020jul,
    • author = {Geng, Shijie and Gao, Peng and Hori, Chiori and {Le Roux}, Jonathan and Cherian, Anoop},
    • title = {{Spatio-Temporal Scene Graphs for Video Dialog}},
    • journal = {arXiv},
    • year = 2020,
    • month = jul,
    • url = {https://arxiv.org/abs/2007.03848}
    • }
  •  Chang, X., Zhang, W., Qian, Y., Le Roux, J., Watanabe, S., "End-To-End Multi-Speaker Speech Recognition with Transformer", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP40776.2020.9054029, April 2020, pp. 6134-6138.
    BibTeX TR2020-043 PDF Video Presentation
    • @inproceedings{Chang2020apr,
    • author = {Chang, Xuankai and Zhang, Wangyou and Qian, Yanmin and {Le Roux}, Jonathan and Watanabe, Shinji},
    • title = {{End-To-End Multi-Speaker Speech Recognition with Transformer}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2020,
    • pages = {6134--6138},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP40776.2020.9054029},
    • issn = {2379-190X},
    • isbn = {978-1-5090-6631-5},
    • url = {https://www.merl.com/publications/TR2020-043}
    • }
  •  Pishdadian, F., Wichern, G., Le Roux, J., "Learning to Separate Sounds From Weakly Labeled Scenes", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP40776.2020.9053055, April 2020, pp. 91-95.
    BibTeX TR2020-038 PDF Video Presentation
    • @inproceedings{Pishdadian2020apr,
    • author = {Pishdadian, Fatemeh and Wichern, Gordon and {Le Roux}, Jonathan},
    • title = {{Learning to Separate Sounds From Weakly Labeled Scenes}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2020,
    • pages = {91--95},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP40776.2020.9053055},
    • issn = {2379-190X},
    • isbn = {978-1-5090-6631-5},
    • url = {https://www.merl.com/publications/TR2020-038}
    • }
  •  Maciejewski, M., Wichern, G., McQuinn, E., Le Roux, J., "WHAMR!: Noisy and Reverberant Single-Channel Speech Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP40776.2020.9053327, April 2020, pp. 696-700.
    BibTeX TR2020-042 PDF Video Presentation
    • @inproceedings{Maciejewski2020apr,
    • author = {Maciejewski, Matthew and Wichern, Gordon and McQuinn, Emmett and {Le Roux}, Jonathan},
    • title = {{WHAMR!: Noisy and Reverberant Single-Channel Speech Separation}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2020,
    • pages = {696--700},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP40776.2020.9053327},
    • issn = {2379-190X},
    • isbn = {978-1-5090-6631-5},
    • url = {https://www.merl.com/publications/TR2020-042}
    • }
  •  Moritz, N., Hori, T., Le Roux, J., "Streaming Automatic Speech Recognition With The Transformer Model", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP40776.2020.9054476, April 2020, pp. 6074-6078.
    BibTeX TR2020-040 PDF Video Presentation
    • @inproceedings{Moritz2020apr,
    • author = {Moritz, Niko and Hori, Takaaki and {Le Roux}, Jonathan},
    • title = {{Streaming Automatic Speech Recognition With The Transformer Model}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2020,
    • pages = {6074--6078},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP40776.2020.9054476},
    • issn = {2379-190X},
    • isbn = {978-1-5090-6631-5},
    • url = {https://www.merl.com/publications/TR2020-040}
    • }
  •  Sari, L., Moritz, N., Hori, T., Le Roux, J., "Unsupervised Speaker Adaptation Using Attention-Based Speaker Memory For End-To-End ASR", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP40776.2020.9054249, April 2020, pp. 7384-7388.
    BibTeX TR2020-037 PDF Video Presentation
    • @inproceedings{Sari2020apr,
    • author = {Sari, Leda and Moritz, Niko and Hori, Takaaki and {Le Roux}, Jonathan},
    • title = {{Unsupervised Speaker Adaptation Using Attention-Based Speaker Memory For End-To-End ASR}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2020,
    • pages = {7384--7388},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP40776.2020.9054249},
    • issn = {2379-190X},
    • isbn = {978-1-5090-6631-5},
    • url = {https://www.merl.com/publications/TR2020-037}
    • }
  •  Aihara, R., Wichern, G., Le Roux, J., "Deep clusteringによる シングルチャネル音声分離とその発展", The Journal of the Acoustical Society of Japan, DOI: 10.20697/​jasj.76.2_101, Vol. 76, No. 2, pp. 101-108, April 2020.
    BibTeX J-STAGE
    • @article{Aihara2020apr,
    • author = {Aihara, Ryo and Wichern, Gordon and {Le Roux}, Jonathan},
    • title = {{Deep clusteringによる シングルチャネル音声分離とその発展}},
    • journal = {The Journal of the Acoustical Society of Japan},
    • year = 2020,
    • volume = 76,
    • number = 2,
    • pages = {101--108},
    • month = apr,
    • doi = {10.20697/jasj.76.2_101},
    • url = {https://www.jstage.jst.go.jp/article/jasj/76/2/76_101/_article/-char/en}
    • }
  •  Aihara, R., Wichern, G., Le Roux, J., "Deep Clustering-based Single Channel Speech Separation and Recent Advances", Acoustical Science and Technology, DOI: 10.1250/​ast.41.465, Vol. 41, No. 2, pp. 465-471, March 2020.
    BibTeX TR2021-020 PDF
    • @article{Aihara2020jun,
    • author = {Aihara, Ryo and Wichern, Gordon and {Le Roux}, Jonathan},
    • title = {{Deep Clustering-based Single Channel Speech Separation and Recent Advances}},
    • journal = {Acoustical Science and Technology},
    • year = 2020,
    • volume = 41,
    • number = 2,
    • pages = {465--471},
    • month = mar,
    • doi = {10.1250/ast.41.465},
    • url = {https://www.merl.com/publications/TR2021-020}
    • }
  •  Chang, X., Zhang, W., Qian, Y., Le Roux, J., Watanabe, S., "MIMO-Speech: End-to-End Multi-Channel Multi-Speaker Speech Recognition", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), December 2019, pp. 237-144.
    BibTeX TR2019-157 PDF
    • @inproceedings{Chang2019dec,
    • author = {Chang, Xuankai and Zhang, Wangyou and Qian, Yanmin and {Le Roux}, Jonathan and Watanabe, Shinji},
    • title = {{MIMO-Speech: End-to-End Multi-Channel Multi-Speaker Speech Recognition}},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2019,
    • pages = {237--144},
    • month = dec,
    • isbn = {978-1-7281-0305-1},
    • url = {https://www.merl.com/publications/TR2019-157}
    • }
  •  Moritz, N., Hori, T., Le Roux, J., "Streaming End-to-End Speech Recognition with Joint CTC-Attention Based Models", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), December 2019, pp. 936-943.
    BibTeX TR2019-159 PDF
    • @inproceedings{Moritz2019dec,
    • author = {Moritz, Niko and Hori, Takaaki and {Le Roux}, Jonathan},
    • title = {{Streaming End-to-End Speech Recognition with Joint CTC-Attention Based Models}},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2019,
    • pages = {936--943},
    • month = dec,
    • isbn = {978-1-7281-0305-1},
    • url = {https://www.merl.com/publications/TR2019-159}
    • }
  •  Kavalerov, I., Wisdom, S., Erdogan, H., Patton, B., Wilson, K., Le Roux, J., Hershey, J., "Universal Sound Separation", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/​WASPAA.2019.8937253, October 2019, pp. 170-174.
    BibTeX TR2019-123 PDF
    • @inproceedings{Kavalerov2019oct,
    • author = {Kavalerov, Ilya and Wisdom, Scott and Erdogan, Hakan and Patton, Brian and Wilson, Kevin and {Le Roux}, Jonathan and Hershey, John},
    • title = {{Universal Sound Separation}},
    • booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
    • year = 2019,
    • pages = {170--174},
    • month = oct,
    • doi = {10.1109/WASPAA.2019.8937253},
    • issn = {1947-1629},
    • isbn = {978-1-7281-1123-0},
    • url = {https://www.merl.com/publications/TR2019-123}
    • }