Publications

359 / 3,666 publications found.


  •  Wang, Z.-Q., Wichern, G., Le Roux, J., "Convolutive Prediction for Reverberant Speech Separation", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/​WASPAA52581.2021.9632667, October 2021, pp. 56-60.
    BibTeX TR2021-127 PDF
    • @inproceedings{Wang2021oct4,
    • author = {Wang, Zhong-Qiu and Wichern, Gordon and Le Roux, Jonathan},
    • title = {Convolutive Prediction for Reverberant Speech Separation},
    • booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
    • year = 2021,
    • pages = {56--60},
    • month = oct,
    • publisher = {IEEE},
    • doi = {10.1109/WASPAA52581.2021.9632667},
    • url = {https://www.merl.com/publications/TR2021-127}
    • }
  •  Wang, Z.-Q., Wichern, G., Le Roux, J., "Leveraging Low-Distortion Target Estimates for Improved Speech Enhancement", arXiv, October 2021.
    BibTeX arXiv
    • @article{Wang2021oct,
    • author = {Wang, Zhong-Qiu and Wichern, Gordon and Le Roux, Jonathan},
    • title = {Leveraging Low-Distortion Target Estimates for Improved Speech Enhancement},
    • journal = {arXiv},
    • year = 2021,
    • month = oct,
    • url = {https://arxiv.org/abs/2110.00570}
    • }
  •  Higuchi, Y., Moritz, N., Le Roux, J., Hori, T., "Momentum Pseudo-Labeling for Semi-Supervised Speech Recognition", Interspeech, DOI: 10.21437/​Interspeech.2021-571, September 2021, pp. 726-730.
    BibTeX TR2021-103 PDF
    • @inproceedings{Higuchi2021sep,
    • author = {Higuchi, Yosuke and Moritz, Niko and Le Roux, Jonathan and Hori, Takaaki},
    • title = {Momentum Pseudo-Labeling for Semi-Supervised Speech Recognition},
    • booktitle = {Interspeech},
    • year = 2021,
    • pages = {726--730},
    • month = sep,
    • doi = {10.21437/Interspeech.2021-571},
    • url = {https://www.merl.com/publications/TR2021-103}
    • }
  •  Hori, T., Moritz, N., Hori, C., Le Roux, J., "Advanced Long-context End-to-end Speech Recognition Using Context-expanded Transformers", Interspeech, DOI: 10.21437/​Interspeech.2021-1643, August 2021, pp. 2097-2101.
    BibTeX TR2021-100 PDF
    • @inproceedings{Hori2021aug3,
    • author = {Hori, Takaaki and Moritz, Niko and Hori, Chiori and Le Roux, Jonathan},
    • title = {Advanced Long-context End-to-end Speech Recognition Using Context-expanded Transformers},
    • booktitle = {Interspeech},
    • year = 2021,
    • pages = {2097--2101},
    • month = aug,
    • doi = {10.21437/Interspeech.2021-1643},
    • url = {https://www.merl.com/publications/TR2021-100}
    • }
  •  Moritz, N., Hori, T., Le Roux, J., "Dual Causal/Non-Causal Self-Attention for Streaming End-to-End Speech Recognition", Interspeech, DOI: 10.21437/​Interspeech.2021-1693, August 2021, pp. 1822-1826.
    BibTeX TR2021-094 PDF
    • @inproceedings{Moritz2021aug,
    • author = {Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
    • title = {Dual Causal/Non-Causal Self-Attention for Streaming End-to-End Speech Recognition},
    • booktitle = {Interspeech},
    • year = 2021,
    • pages = {1822--1826},
    • month = aug,
    • doi = {10.21437/Interspeech.2021-1693},
    • url = {https://www.merl.com/publications/TR2021-094}
    • }
  •  Moritz, N., Hori, T., Le Roux, J., "Capturing Multi-Resolution Context by Dilated Self-Attention", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP39728.2021.9415001, June 2021, pp. 5869-5873.
    BibTeX TR2021-036 PDF
    • @inproceedings{Moritz2021jun,
    • author = {Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
    • title = {Capturing Multi-Resolution Context by Dilated Self-Attention},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2021,
    • pages = {5869--5873},
    • month = jun,
    • doi = {10.1109/ICASSP39728.2021.9415001},
    • url = {https://www.merl.com/publications/TR2021-036}
    • }
  •  Ma, Y., Boufounos, P.T., Mansour, H., Aeron, S., "Multiview Sensing with Unknown Permutations: An Optimal Transport Approach", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP39728.2021.9415075, June 2021, pp. 1440-1444.
    BibTeX TR2021-047 PDF
    • @inproceedings{Ma2021jun,
    • author = {Ma, Yanting and Boufounos, Petros T. and Mansour, Hassan and Aeron, Shuchin},
    • title = {Multiview Sensing with Unknown Permutations: An Optimal Transport Approach},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2021,
    • pages = {1440--1444},
    • month = jun,
    • doi = {10.1109/ICASSP39728.2021.9415075},
    • issn = {1520-6149},
    • isbn = {978-1-7281-7606-2},
    • url = {https://www.merl.com/publications/TR2021-047}
    • }
  •  Chen, S., Eldar, Y., "Graph Signaling Denoising via Unrolling Networks", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP39728.2021.9415073, June 2021.
    BibTeX TR2021-071 PDF
    • @inproceedings{Chen2021jun3,
    • author = {Chen, Siheng and Eldar, Yonina},
    • title = {Graph Signaling Denoising via Unrolling Networks},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2021,
    • month = jun,
    • doi = {10.1109/ICASSP39728.2021.9415073},
    • url = {https://www.merl.com/publications/TR2021-071}
    • }
  •  Chen, S., Eldar, Y., "Time-Varying Graph Signal Inpainting via Unrolling Networks", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP39728.2021.9413406, June 2021.
    BibTeX TR2021-070 PDF
    • @inproceedings{Chen2021jun,
    • author = {Chen, Siheng and Eldar, Yonina},
    • title = {Time-Varying Graph Signal Inpainting via Unrolling Networks},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2021,
    • month = jun,
    • doi = {10.1109/ICASSP39728.2021.9413406},
    • url = {https://www.merl.com/publications/TR2021-070}
    • }
  •  Hung, Y.-N., Wichern, G., Le Roux, J., "Transcription Is All You Need: Learning to Separate Musical Mixtures with Score as Supervision", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP39728.2021.9413358, June 2021, pp. 46-50.
    BibTeX TR2021-069 PDF
    • @inproceedings{Hung2021jun,
    • author = {Hung, Yun-Ning and Wichern, Gordon and Le Roux, Jonathan},
    • title = {Transcription Is All You Need: Learning to Separate Musical Mixtures with Score as Supervision},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2021,
    • pages = {46--50},
    • month = jun,
    • doi = {10.1109/ICASSP39728.2021.9413358},
    • issn = {2379-190X},
    • isbn = {978-1-7281-7605-5},
    • url = {https://www.merl.com/publications/TR2021-069}
    • }
  •  Hyder, R., Mansour, H., Ma, Y., Boufounos, P.T., Wang, P., "A Consensus Equilibrium Solution for Deep Image Prior Powered by Red", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP39728.2021.9414290, June 2021, pp. 1380-1384.
    BibTeX TR2021-046 PDF
    • @inproceedings{Hyder2021jun,
    • author = {Hyder, Rakib and Mansour, Hassan and Ma, Yanting and Boufounos, Petros T. and Wang, Perry},
    • title = {A Consensus Equilibrium Solution for Deep Image Prior Powered by Red},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2021,
    • pages = {1380--1384},
    • month = jun,
    • doi = {10.1109/ICASSP39728.2021.9414290},
    • issn = {2379-190X},
    • isbn = {978-1-7281-7605-5},
    • url = {https://www.merl.com/publications/TR2021-046}
    • }
  •  Khurana, S., Moritz, N., Hori, T., Le Roux, J., "Unsupervised Domain Adaptation For Speech Recognition via Uncertainty Driven Self-Training", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP39728.2021.9414299, June 2021, pp. 6553-6557.
    BibTeX TR2021-039 PDF
    • @inproceedings{Khurana2021jun,
    • author = {Khurana, Sameer and Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
    • title = {Unsupervised Domain Adaptation For Speech Recognition via Uncertainty Driven Self-Training},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2021,
    • pages = {6553--6557},
    • month = jun,
    • doi = {10.1109/ICASSP39728.2021.9414299},
    • url = {https://www.merl.com/publications/TR2021-039}
    • }
  •  Moritz, N., Hori, T., Le Roux, J., "Semi-Supervised Speech Recognition via Graph-Based Temporal Classification", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP39728.2021.9414058, June 2021, pp. 6548-6552.
    BibTeX TR2021-037 PDF
    • @inproceedings{Moritz2021jun2,
    • author = {Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
    • title = {Semi-Supervised Speech Recognition via Graph-Based Temporal Classification},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2021,
    • pages = {6548--6552},
    • month = jun,
    • doi = {10.1109/ICASSP39728.2021.9414058},
    • url = {https://www.merl.com/publications/TR2021-037}
    • }
  •  Shi, L., Liu, D., Umeda, M., Hana, N., "Fusion-Based Image Correlations Framework For Strain Measurement", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP39728.2021.9414987, June 2021.
    BibTeX TR2021-012 PDF Video
    • @inproceedings{Shi2021feb,
    • author = {Shi, Laixi and Liu, Dehong and Umeda, Masaki and Hana, Norihiko},
    • title = {Fusion-Based Image Correlations Framework For Strain Measurement},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2021,
    • month = jun,
    • doi = {10.1109/ICASSP39728.2021.9414987},
    • issn = {2379-190X},
    • isbn = {978-1-7281-7606-2},
    • url = {https://www.merl.com/publications/TR2021-012}
    • }
  •  Yao, G., Wang, P., Berntorp, K., Mansour, H., Boufounos, P.T., Orlik, P.V., "Extended Object Tracking with Automotive Radar Using B-Spline Chained Ellipses Model", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP39728.2021.9415080, June 2021, pp. 8408-8412.
    BibTeX TR2021-048 PDF Video
    • @inproceedings{Yao2021jun,
    • author = {Yao, Gang and Wang, Perry and Berntorp, Karl and Mansour, Hassan and Boufounos, Petros T. and Orlik, Philip V.},
    • title = {Extended Object Tracking with Automotive Radar Using B-Spline Chained Ellipses Model},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2021,
    • pages = {8408--8412},
    • month = jun,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP39728.2021.9415080},
    • issn = {2379-190X},
    • isbn = {978-1-7281-7605-5},
    • url = {https://www.merl.com/publications/TR2021-048}
    • }
  •  Watanabe, S., Boyer, F., Chang, X., Guo, P., Hayashi, T., Higuchi, Y., Hori, T., Huang, W.-C., Inaguma, H., Kamo, N., Shigeki, K., Li, C., Shi, J., Subramanian, A.S., Zhang, W., "The 2020 ESPNET Update: New Features, Broadened Applications, Performance Improvements, and Future Plans", IEEE Data Science and Learning Workshop (DSLW), DOI: 10.1109/​DSLW51110, June 2021, pp. 1-6.
    BibTeX TR2021-073 PDF
    • @inproceedings{Watanabe2021jun,
    • author = {Watanabe, Shinji and Boyer, Florian and Chang, Xuankai and Guo, Pengcheng and Hayashi, Tomoki and Higuchi, Yosuke and Hori, Takaaki and Huang, Wen-Chin and Inaguma, Hirofumi and Kamo, Naoyuki and Shigeki, Karita and Li, Chenda and Shi, Jing and Subramanian, Aswin S and Zhang, Wangyou},
    • title = {The 2020 ESPNET Update: New Features, Broadened Applications, Performance Improvements, and Future Plans},
    • booktitle = {IEEE Data Science and Learning Workshop (DSLW)},
    • year = 2021,
    • pages = {1--6},
    • month = jun,
    • publisher = {IEEE},
    • doi = {10.1109/DSLW51110},
    • isbn = {978-1-6654-2826-2},
    • url = {https://www.merl.com/publications/TR2021-073}
    • }
  •  Kim, S., Galley, M., Gunasekara, C., Lee, S., Atkinson, A., Peng, B., Schulz, H., Gao, J., Li, J., Adada, M., Huang, M., Lastras, L., Kummerfeld, J.K., Lasecki, W.S., Hori, C., Cherian, A., Marks, T.K., Rastogi, A., Zang, X., Sunkara, S., Gupta, R., "Overview of the Eighth Dialog System Technology Challenge: DSTC8", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/​TASLP.2021.3078368, May 2021.
    BibTeX TR2021-064 PDF
    • @article{Kim2021may,
    • author = {Kim, Seokhwan and Galley, Michel and Gunasekara, Chulaka and Lee, Sungjin and Atkinson, Adam and Peng, Baolin and Schulz, Hannes and Gao, Jianfeng and Li, Jinchao and Adada, Mahmoud and Huang, Minlie and Lastras, Luis and Kummerfeld, Jonathan K. and Lasecki, Walter S. and Hori, Chiori and Cherian, Anoop and Marks, Tim K. and Rastogi, Abhinav and Zang, Xiaoxue and Sunkara, Srinivas and Gupta, Raghav},
    • title = {Overview of the Eighth Dialog System Technology Challenge: DSTC8},
    • journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
    • year = 2021,
    • month = may,
    • doi = {10.1109/TASLP.2021.3078368},
    • issn = {2329-9290},
    • url = {https://www.merl.com/publications/TR2021-064}
    • }
  •  Hori, C., Tsuchiya, M., Chen, S., Cherian, A., Hori, T., Harsham, B.A., Marks, T.K., Le Roux, J., Sullivan, A., Vetro, A., "マルチモーダルセンシング情報に基づくScene-aware Interaction 技術", Society of Automotive Engineers of Japan, Vol. 75, No. 5, pp. 66-71, May 2021.
    BibTeX TR2021-042 PDF Video
    • @article{Hori2021may,
    • author = {Hori, Chiori and Tsuchiya, Masato and Chen, Siheng and Cherian, Anoop and Hori, Takaaki and Harsham, Bret A. and Marks, Tim K. and Le Roux, Jonathan and Sullivan, Alan and Vetro, Anthony},
    • title = {マルチモーダルセンシング情報に基づくScene-aware Interaction 技術},
    • journal = {Society of Automotive Engineers of Japan},
    • year = 2021,
    • volume = 75,
    • number = 5,
    • pages = {66--71},
    • month = may,
    • url = {https://www.merl.com/publications/TR2021-042}
    • }
  •  Hori, T., Moritz, N., Hori, C., Le Roux, J., "Transformer-based Long-context End-to-end Speech Recognition", Interspeech, DOI: 10.21437/​Interspeech.2020-2928, October 2020, pp. 5011-5015.
    BibTeX TR2020-139 PDF Presentation
    • @inproceedings{Hori2020oct,
    • author = {Hori, Takaaki and Moritz, Niko and Hori, Chiori and Le Roux, Jonathan},
    • title = {Transformer-based Long-context End-to-end Speech Recognition},
    • booktitle = {Interspeech},
    • year = 2020,
    • pages = {5011--5015},
    • month = oct,
    • doi = {10.21437/Interspeech.2020-2928},
    • issn = {1990-9772},
    • url = {https://www.merl.com/publications/TR2020-139}
    • }
  •  Jayashankar, T., Le Roux, J., Moulin, P., "Detecting Audio Attacks on ASR Systems with Dropout Uncertainty", Interspeech, DOI: 10.21437/​Interspeech.2020-1846, October 2020, pp. 4671-4675.
    BibTeX TR2020-137 PDF Presentation
    • @inproceedings{Jayashankar2020oct,
    • author = {Jayashankar, Tejas and Le Roux, Jonathan and Moulin, Pierre},
    • title = {Detecting Audio Attacks on ASR Systems with Dropout Uncertainty},
    • booktitle = {Interspeech},
    • year = 2020,
    • pages = {4671--4675},
    • month = oct,
    • doi = {10.21437/Interspeech.2020-1846},
    • issn = {1990-9772},
    • url = {https://www.merl.com/publications/TR2020-137}
    • }
  •  Moritz, N., Wichern, G., Hori, T., Le Roux, J., "All-in-One Transformer: Unifying Speech Recognition, Audio Tagging, and Event Detection", Interspeech, DOI: 10.21437/​Interspeech.2020-2757, October 2020, pp. 3112-3116.
    BibTeX TR2020-138 PDF Presentation
    • @inproceedings{Moritz2020oct,
    • author = {Moritz, Niko and Wichern, Gordon and Hori, Takaaki and Le Roux, Jonathan},
    • title = {All-in-One Transformer: Unifying Speech Recognition, Audio Tagging, and Event Detection},
    • booktitle = {Interspeech},
    • year = 2020,
    • pages = {3112--3116},
    • month = oct,
    • doi = {10.21437/Interspeech.2020-2757},
    • issn = {1990-9772},
    • url = {https://www.merl.com/publications/TR2020-138}
    • }
  •  Pishdadian, F., Wichern, G., Le Roux, J., "Finding Strength in Weakness: Learning to Separate Sounds with Weak Supervision", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/​TASLP.2020.3013105, Vol. 28, pp. 2386-2399, September 2020.
    BibTeX TR2020-126 PDF
    • @article{Pishdadian2020sep,
    • author = {Pishdadian, Fatemeh and Wichern, Gordon and Le Roux, Jonathan},
    • title = {Finding Strength in Weakness: Learning to Separate Sounds with Weak Supervision},
    • journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
    • year = 2020,
    • volume = 28,
    • pages = {2386--2399},
    • month = sep,
    • doi = {10.1109/TASLP.2020.3013105},
    • url = {https://www.merl.com/publications/TR2020-126}
    • }
  •  Seetharaman, P., Wichern, G., Le Roux, J., Pardo, B., "Bootstrapping Unsupervised Deep Music Separation from Primitive Auditory Grouping Principles", ICML 2020 Workshop on Self-supervision in Audio and Speech, July 2020.
    BibTeX TR2020-111 PDF
    • @inproceedings{Seetharaman2020jul,
    • author = {Seetharaman, Prem and Wichern, Gordon and Le Roux, Jonathan and Pardo, Bryan},
    • title = {Bootstrapping Unsupervised Deep Music Separation from Primitive Auditory Grouping Principles},
    • booktitle = {ICML 2020 Workshop on Self-supervision in Audio and Speech},
    • year = 2020,
    • month = jul,
    • url = {https://www.merl.com/publications/TR2020-111}
    • }
  •  Chang, X., Zhang, W., Qian, Y., Le Roux, J., Watanabe, S., "End-To-End Multi-Speaker Speech Recognition with Transformer", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP40776.2020.9054029, April 2020, pp. 6134-6138.
    BibTeX TR2020-043 PDF Video Presentation
    • @inproceedings{Chang2020apr,
    • author = {Chang, Xuankai and Zhang, Wangyou and Qian, Yanmin and Le Roux, Jonathan and Watanabe, Shinji},
    • title = {End-To-End Multi-Speaker Speech Recognition with Transformer},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2020,
    • pages = {6134--6138},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP40776.2020.9054029},
    • issn = {2379-190X},
    • isbn = {978-1-5090-6631-5},
    • url = {https://www.merl.com/publications/TR2020-043}
    • }
  •  Liu, J., Chen, B., Chen, S., Berges, M., Bielak, J., Noh, H.Y., "Damage-Sensitive and Domain-Invariant Feature Extraction for Vehicle-Vibration-Based Bridge Health Monitoring", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP40776.2020.9053450, April 2020, pp. 3007-3011.
    BibTeX TR2020-053 PDF Video
    • @inproceedings{Liu2020apr,
    • author = {Liu, Jingxiao and Chen, Bingqing and Chen, Siheng and Berges, Mario and Bielak, Jacobo and Noh, Hae Young},
    • title = {Damage-Sensitive and Domain-Invariant Feature Extraction for Vehicle-Vibration-Based Bridge Health Monitoring},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2020,
    • pages = {3007--3011},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP40776.2020.9053450},
    • issn = {2379-190X},
    • isbn = {978-1-5090-6631-5},
    • url = {https://www.merl.com/publications/TR2020-053}
    • }