Publications

95 / 3,101 publications found.


  •  Moritz, N., Hori, T., Le Roux, J., "Capturing Multi-Resolution Context by Dilated Self-Attention", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), June 2021.
    BibTeX TR2021-036 PDF
    • @inproceedings{Moritz2021jun,
    • author = {Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
    • title = {Capturing Multi-Resolution Context by Dilated Self-Attention},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2021,
    • month = jun,
    • url = {https://www.merl.com/publications/TR2021-036}
    • }
  •  Khurana, S., Moritz, N., Hori, T., Le Roux, J., "Unsupervised Domain Adaptation For Speech Recognition via Uncertainty Driven Self-Training", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), June 2021.
    BibTeX TR2021-039 PDF
    • @inproceedings{Khurana2021jun,
    • author = {Khurana, Sameer and Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
    • title = {Unsupervised Domain Adaptation For Speech Recognition via Uncertainty Driven Self-Training},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2021,
    • month = jun,
    • url = {https://www.merl.com/publications/TR2021-039}
    • }
  •  Moritz, N., Hori, T., Le Roux, J., "Semi-Supervised Speech Recognition via Graph-Based Temporal Classification", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), June 2021.
    BibTeX TR2021-037 PDF
    • @inproceedings{Moritz2021jun2,
    • author = {Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
    • title = {Semi-Supervised Speech Recognition via Graph-Based Temporal Classification},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2021,
    • month = jun,
    • url = {https://www.merl.com/publications/TR2021-037}
    • }
  •  Hori, C., Tsuchiya, M., Chen, S., Cherian, A., Hori, T., Harsham, B.A., Marks, T.K., Le Roux, J., Sullivan, A., Vetro, A., "マルチモーダルセンシング情報に基づくScene-aware Interaction 技術", Society of Automotive Engineers of Japan, May 2021.
    BibTeX
    • @article{Hori2021may,
    • author = {Hori, Chiori and Tsuchiya, Masato and Chen, Siheng and Cherian, Anoop and Hori, Takaaki and Harsham, Bret A. and Marks, Tim K. and Le Roux, Jonathan and Sullivan, Alan and Vetro, Anthony},
    • title = {マルチモーダルセンシング情報に基づくScene-aware Interaction 技術},
    • journal = {Society of Automotive Engineers of Japan},
    • year = 2021,
    • month = may
    • }
  •  Hori, T., Moritz, N., Hori, C., Le Roux, J., "Advanced Long-context End-to-end Speech Recognition Using Context-expanded Transformers", arXiv, April 2021.
    BibTeX arXiv
    • @article{Hori2021apr,
    • author = {Hori, Takaaki and Moritz, Niko and Hori, Chiori and Le Roux, Jonathan},
    • title = {Advanced Long-context End-to-end Speech Recognition Using Context-expanded Transformers},
    • journal = {arXiv},
    • year = 2021,
    • month = apr,
    • url = {https://arxiv.org/abs/2104.09426}
    • }
  •  Geng, S., Gao, P., Chatterjee, M., Hori, C., Le Roux, J., Zhang, Y., Li, H., Cherian, A., "Dynamic Graph Representation Learning for Video Dialog via Multi-Modal Shuffled Transformers", AAAI Conference on Artificial Intelligence, February 2021.
    BibTeX TR2021-010 PDF
    • @inproceedings{Geng2021feb,
    • author = {Geng, Shijie and Gao, Peng and Chatterjee, Moitreya and Hori, Chiori and Le Roux, Jonathan and Zhang, Yongfeng and Li, Hongsheng and Cherian, Anoop},
    • title = {Dynamic Graph Representation Learning for Video Dialog via Multi-Modal Shuffled Transformers},
    • booktitle = {AAAI Conference on Artificial Intelligence},
    • year = 2021,
    • month = feb,
    • url = {https://www.merl.com/publications/TR2021-010}
    • }
  •  Hung, Y.-N., Wichern, G., Le Roux, J., "Transcription is All You Need: Learning to Separate Musical Mixtures with Score as Supervision", arXiv, November 2020.
    BibTeX arXiv
    • @article{Hung2020nov,
    • author = {Hung, Yun-Ning and Wichern, Gordon and Le Roux, Jonathan},
    • title = {Transcription is All You Need: Learning to Separate Musical Mixtures with Score as Supervision},
    • journal = {arXiv},
    • year = 2020,
    • month = nov,
    • url = {https://arxiv.org/abs/2010.11904}
    • }
  •  Hori, T., Moritz, N., Hori, C., Le Roux, J., "Transformer-based Long-context End-to-end Speech Recognition", Annual Conference of the International Speech Communication Association (Interspeech), DOI: 10.21437/​Interspeech.2020-2928, October 2020, pp. 5011-5015.
    BibTeX TR2020-139 PDF
    • @inproceedings{Hori2020oct,
    • author = {Hori, Takaaki and Moritz, Niko and Hori, Chiori and Le Roux, Jonathan},
    • title = {Transformer-based Long-context End-to-end Speech Recognition},
    • booktitle = {Annual Conference of the International Speech Communication Association (Interspeech)},
    • year = 2020,
    • pages = {5011--5015},
    • month = oct,
    • doi = {10.21437/Interspeech.2020-2928},
    • issn = {1990-9772},
    • url = {https://www.merl.com/publications/TR2020-139}
    • }
  •  Jayashankar, T., Le Roux, J., Moulin, P., "Detecting Audio Attacks on ASR Systems with Dropout Uncertainty", Annual Conference of the International Speech Communication Association (Interspeech), DOI: 10.21437/​Interspeech.2020-1846, October 2020, pp. 4671-4675.
    BibTeX TR2020-137 PDF
    • @inproceedings{Jayashankar2020oct,
    • author = {Jayashankar, Tejas and Le Roux, Jonathan and Moulin, Pierre},
    • title = {Detecting Audio Attacks on ASR Systems with Dropout Uncertainty},
    • booktitle = {Annual Conference of the International Speech Communication Association (Interspeech)},
    • year = 2020,
    • pages = {4671--4675},
    • month = oct,
    • doi = {10.21437/Interspeech.2020-1846},
    • issn = {1990-9772},
    • url = {https://www.merl.com/publications/TR2020-137}
    • }
  •  Moritz, N., Wichern, G., Hori, T., Le Roux, J., "All-in-One Transformer: Unifying Speech Recognition, Audio Tagging, and Event Detection", Annual Conference of the International Speech Communication Association (Interspeech), DOI: 10.21437/​Interspeech.2020-2757, October 2020, pp. 3112-3116.
    BibTeX TR2020-138 PDF
    • @inproceedings{Moritz2020oct,
    • author = {Moritz, Niko and Wichern, Gordon and Hori, Takaaki and Le Roux, Jonathan},
    • title = {All-in-One Transformer: Unifying Speech Recognition, Audio Tagging, and Event Detection},
    • booktitle = {Annual Conference of the International Speech Communication Association (Interspeech)},
    • year = 2020,
    • pages = {3112--3116},
    • month = oct,
    • doi = {10.21437/Interspeech.2020-2757},
    • issn = {1990-9772},
    • url = {https://www.merl.com/publications/TR2020-138}
    • }
  •  Manilow, E., Wichern, G., Le Roux, J., "Hierarchical Musical Instrument Separation", International Society for Music Information Retrieval (ISMIR) Conference, October 2020, pp. 376-383.
    BibTeX TR2020-136 PDF
    • @inproceedings{Manilow2020oct,
    • author = {Manilow, Ethan and Wichern, Gordon and Le Roux, Jonathan},
    • title = {Hierarchical Musical Instrument Separation},
    • booktitle = {International Society for Music Information Retrieval (ISMIR) Conference},
    • year = 2020,
    • pages = {376--383},
    • month = oct,
    • isbn = {978-0-9813537-0-8},
    • url = {https://www.merl.com/publications/TR2020-136}
    • }
  •  Seetharaman, P., Wichern, G., Pardo, B., Le Roux, J., "Autoclip: Adaptive Gradient Clipping For Source Separation Networks", IEEE International Workshop on Machine Learning for Signal Processing (MLSP), DOI: 10.1109/​MLSP49062.2020.9231926, September 2020.
    BibTeX TR2020-132 PDF
    • @inproceedings{Seetharaman2020sep,
    • author = {Seetharaman, Prem and Wichern, Gordon and Pardo, Bryan and Le Roux, Jonathan},
    • title = {Autoclip: Adaptive Gradient Clipping For Source Separation Networks},
    • booktitle = {IEEE International Workshop on Machine Learning for Signal Processing (MLSP)},
    • year = 2020,
    • month = sep,
    • publisher = {IEEE},
    • doi = {10.1109/MLSP49062.2020.9231926},
    • url = {https://www.merl.com/publications/TR2020-132}
    • }
  •  Gao, P., Hori, C., Geng, S., Hori, T., Le Roux, J., "Multi-Pass Transformer for Machine Translation", arXiv, September 2020.
    BibTeX arXiv
    • @article{Gao2020sep,
    • author = {Gao, Peng and Hori, Chiori and Geng, Shijie and Hori, Takaaki and Le Roux, Jonathan},
    • title = {Multi-Pass Transformer for Machine Translation},
    • journal = {arXiv},
    • year = 2020,
    • month = sep,
    • url = {http://arxiv.org/abs/2009.11382}
    • }
  •  Pishdadian, F., Wichern, G., Le Roux, J., "Finding Strength in Weakness: Learning to Separate Sounds with Weak Supervision", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/​TASLP.2020.3013105, Vol. 28, pp. 2386-2399, September 2020.
    BibTeX TR2020-126 PDF
    • @article{Pishdadian2020sep,
    • author = {Pishdadian, Fatemeh and Wichern, Gordon and Le Roux, Jonathan},
    • title = {Finding Strength in Weakness: Learning to Separate Sounds with Weak Supervision},
    • journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
    • year = 2020,
    • volume = 28,
    • pages = {2386--2399},
    • month = sep,
    • doi = {10.1109/TASLP.2020.3013105},
    • url = {https://www.merl.com/publications/TR2020-126}
    • }
  •  Seetharaman, P., Wichern, G., Le Roux, J., Pardo, B., "Bootstrapping Unsupervised Deep Music Separation from Primitive Auditory Grouping Principles", ICML 2020 Workshop on Self-supervision in Audio and Speech, July 2020.
    BibTeX TR2020-111 PDF
    • @inproceedings{Seetharaman2020jul,
    • author = {Seetharaman, Prem and Wichern, Gordon and Le Roux, Jonathan and Pardo, Bryan},
    • title = {Bootstrapping Unsupervised Deep Music Separation from Primitive Auditory Grouping Principles},
    • booktitle = {ICML 2020 Workshop on Self-supervision in Audio and Speech},
    • year = 2020,
    • month = jul,
    • url = {https://www.merl.com/publications/TR2020-111}
    • }
  •  Geng, S., Gao, P., Hori, C., Le Roux, J., Cherian, A., "Spatio-Temporal Scene Graphs for Video Dialog", arXiv, July 2020.
    BibTeX arXiv
    • @article{Geng2020jul,
    • author = {Geng, Shijie and Gao, Peng and Hori, Chiori and Le Roux, Jonathan and Cherian, Anoop},
    • title = {Spatio-Temporal Scene Graphs for Video Dialog},
    • journal = {arXiv},
    • year = 2020,
    • month = jul,
    • url = {https://arxiv.org/abs/2007.03848}
    • }
  •  Chang, X., Zhang, W., Qian, Y., Le Roux, J., Watanabe, S., "End-To-End Multi-Speaker Speech Recognition with Transformer", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP40776.2020.9054029, April 2020, pp. 6134-6138.
    BibTeX TR2020-043 PDF Video
    • @inproceedings{Chang2020apr,
    • author = {Chang, Xuankai and Zhang, Wangyou and Qian, Yanmin and Le Roux, Jonathan and Watanabe, Shinji},
    • title = {End-To-End Multi-Speaker Speech Recognition with Transformer},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2020,
    • pages = {6134--6138},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP40776.2020.9054029},
    • issn = {2379-190X},
    • isbn = {978-1-5090-6631-5},
    • url = {https://www.merl.com/publications/TR2020-043}
    • }
  •  Pishdadian, F., Wichern, G., Le Roux, J., "Learning to Separate Sounds From Weakly Labeled Scenes", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP40776.2020.9053055, April 2020, pp. 91-95.
    BibTeX TR2020-038 PDF Video
    • @inproceedings{Pishdadian2020apr,
    • author = {Pishdadian, Fatemeh and Wichern, Gordon and Le Roux, Jonathan},
    • title = {Learning to Separate Sounds From Weakly Labeled Scenes},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2020,
    • pages = {91--95},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP40776.2020.9053055},
    • issn = {2379-190X},
    • isbn = {978-1-5090-6631-5},
    • url = {https://www.merl.com/publications/TR2020-038}
    • }
  •  Maciejewski, M., Wichern, G., McQuinn, E., Le Roux, J., "WHAMR!: Noisy and Reverberant Single-Channel Speech Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP40776.2020.9053327, April 2020, pp. 696-700.
    BibTeX TR2020-042 PDF Video
    • @inproceedings{Maciejewski2020apr,
    • author = {Maciejewski, Matthew and Wichern, Gordon and McQuinn, Emmett and Le Roux, Jonathan},
    • title = {WHAMR!: Noisy and Reverberant Single-Channel Speech Separation},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2020,
    • pages = {696--700},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP40776.2020.9053327},
    • issn = {2379-190X},
    • isbn = {978-1-5090-6631-5},
    • url = {https://www.merl.com/publications/TR2020-042}
    • }
  •  Moritz, N., Hori, T., Le Roux, J., "Streaming Automatic Speech Recognition With The Transformer Model", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP40776.2020.9054476, April 2020, pp. 6074-6078.
    BibTeX TR2020-040 PDF Video
    • @inproceedings{Moritz2020apr,
    • author = {Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
    • title = {Streaming Automatic Speech Recognition With The Transformer Model},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2020,
    • pages = {6074--6078},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP40776.2020.9054476},
    • issn = {2379-190X},
    • isbn = {978-1-5090-6631-5},
    • url = {https://www.merl.com/publications/TR2020-040}
    • }
  •  Sari, L., Moritz, N., Hori, T., Le Roux, J., "Unsupervised Speaker Adaptation Using Attention-Based Speaker Memory For End-To-End ASR", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP40776.2020.9054249, April 2020, pp. 7384-7388.
    BibTeX TR2020-037 PDF Video
    • @inproceedings{Sari2020apr,
    • author = {Sari, Leda and Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
    • title = {Unsupervised Speaker Adaptation Using Attention-Based Speaker Memory For End-To-End ASR},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2020,
    • pages = {7384--7388},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP40776.2020.9054249},
    • issn = {2379-190X},
    • isbn = {978-1-5090-6631-5},
    • url = {https://www.merl.com/publications/TR2020-037}
    • }
  •  Aihara, R., Wichern, G., Le Roux, J., "Deep clusteringによる シングルチャネル音声分離とその発展", The Journal of the Acoustical Society of Japan, DOI: 10.20697/​jasj.76.2_101, Vol. 76, No. 2, pp. 101-108, April 2020.
    BibTeX J-STAGE
    • @article{Aihara2020apr,
    • author = {Aihara, Ryo and Wichern, Gordon and Le Roux, Jonathan},
    • title = {Deep clusteringによる シングルチャネル音声分離とその発展},
    • journal = {The Journal of the Acoustical Society of Japan},
    • year = 2020,
    • volume = 76,
    • number = 2,
    • pages = {101--108},
    • month = apr,
    • doi = {10.20697/jasj.76.2_101},
    • url = {https://www.jstage.jst.go.jp/article/jasj/76/2/76_101/_article/-char/en}
    • }
  •  Aihara, R., Wichern, G., Le Roux, J., "Deep Clustering-based Single Channel Speech Separation and Recent Advances", Acoustical Science and Technology, DOI: 10.1250/​ast.41.465, Vol. 41, No. 2, pp. 465-471, March 2020.
    BibTeX TR2021-020 PDF
    • @article{Aihara2020jun,
    • author = {Aihara, Ryo and Wichern, Gordon and Le Roux, Jonathan},
    • title = {Deep Clustering-based Single Channel Speech Separation and Recent Advances},
    • journal = {Acoustical Science and Technology},
    • year = 2020,
    • volume = 41,
    • number = 2,
    • pages = {465--471},
    • month = mar,
    • doi = {10.1250/ast.41.465},
    • url = {https://www.merl.com/publications/TR2021-020}
    • }
  •  Chang, X., Zhang, W., Qian, Y., Le Roux, J., Watanabe, S., "MIMO-Speech: End-to-End Multi-Channel Multi-Speaker Speech Recognition", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), December 2019, pp. 237-144.
    BibTeX TR2019-157 PDF
    • @inproceedings{Chang2019dec,
    • author = {Chang, Xuankai and Zhang, Wangyou and Qian, Yanmin and Le Roux, Jonathan and Watanabe, Shinji},
    • title = {MIMO-Speech: End-to-End Multi-Channel Multi-Speaker Speech Recognition},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2019,
    • pages = {237--144},
    • month = dec,
    • isbn = {978-1-7281-0305-1},
    • url = {https://www.merl.com/publications/TR2019-157}
    • }
  •  Moritz, N., Hori, T., Le Roux, J., "Streaming End-to-End Speech Recognition with Joint CTC-Attention Based Models", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), December 2019, pp. 936-943.
    BibTeX TR2019-159 PDF
    • @inproceedings{Moritz2019dec,
    • author = {Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
    • title = {Streaming End-to-End Speech Recognition with Joint CTC-Attention Based Models},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2019,
    • pages = {936--943},
    • month = dec,
    • isbn = {978-1-7281-0305-1},
    • url = {https://www.merl.com/publications/TR2019-159}
    • }