- Slizovskaia, O., Wichern, G., Wang, Z.-Q., Le Roux, J., "Locate This, Not That: Class-Conditioned Sound Event DOA Estimation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP43922.2022.9747604, April 2022, pp. 711-715.
BibTeX TR2022-023 PDF- @inproceedings{Slizovskaia2022mar,
- author = {Slizovskaia, Olga and Wichern, Gordon and Wang, Zhong-Qiu and {Le Roux}, Jonathan},
- title = {{Locate This, Not That: Class-Conditioned Sound Event DOA Estimation}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2022,
- pages = {711--715},
- month = apr,
- doi = {10.1109/ICASSP43922.2022.9747604},
- url = {https://www.merl.com/publications/TR2022-023}
- }
- Zhan, S., Wichern, G., Laughman, C.R., Chakrabarty, A., "Meta-Learned Bayesian Optimization for Building Model Calibration using Attentive Neural Processes", Advances in Neural Information Processing Systems (NeurIPS), December 2021.
BibTeX TR2021-149 PDF- @inproceedings{Zhan2021dec,
- author = {Zhan, Sicheng and Wichern, Gordon and Laughman, Christopher R. and Chakrabarty, Ankush},
- title = {{Meta-Learned Bayesian Optimization for Building Model Calibration using Attentive Neural Processes}},
- booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
- year = 2021,
- month = dec,
- url = {https://www.merl.com/publications/TR2021-149}
- }
- Wang, Z.-Q., Wichern, G., Le Roux, J., "Convolutive Prediction for Monaural Speech Dereverberation and Noisy-Reverberant Speaker Separation", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/TASLP.2021.3129363, Vol. 29, pp. 3476-3490, December 2021.
BibTeX TR2021-144 PDF- @article{Wang2021dec,
- author = {Wang, Zhong-Qiu and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{Convolutive Prediction for Monaural Speech Dereverberation and Noisy-Reverberant Speaker Separation}},
- journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
- year = 2021,
- volume = 29,
- pages = {3476--3490},
- month = dec,
- doi = {10.1109/TASLP.2021.3129363},
- url = {https://www.merl.com/publications/TR2021-144}
- }
- Wang, Z.-Q., Wichern, G., Le Roux, J., "On The Compensation Between Magnitude and Phase in Speech Separation", IEEE Signal Processing Letters, DOI: 10.1109/LSP.2021.3116502, Vol. 28, pp. 2018-2022, November 2021.
BibTeX TR2021-137 PDF- @article{Wang2021nov2,
- author = {Wang, Zhong-Qiu and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{On The Compensation Between Magnitude and Phase in Speech Separation}},
- journal = {IEEE Signal Processing Letters},
- year = 2021,
- volume = 28,
- pages = {2018--2022},
- month = nov,
- doi = {10.1109/LSP.2021.3116502},
- url = {https://www.merl.com/publications/TR2021-137}
- }
- Wang, Z.-Q., Wichern, G., Le Roux, J., "Convolutive Prediction for Reverberant Speech Separation", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/WASPAA52581.2021.9632667, October 2021, pp. 56-60.
BibTeX TR2021-127 PDF- @inproceedings{Wang2021oct4,
- author = {Wang, Zhong-Qiu and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{Convolutive Prediction for Reverberant Speech Separation}},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2021,
- pages = {56--60},
- month = oct,
- publisher = {IEEE},
- doi = {10.1109/WASPAA52581.2021.9632667},
- url = {https://www.merl.com/publications/TR2021-127}
- }
- Wichern, G., Chakrabarty, A., Wang, Z.-Q., Le Roux, J., "Anomalous sound detection using attentive neural processes", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/WASPAA52581.2021.9632762, October 2021, pp. 186-190.
BibTeX TR2021-129 PDF- @inproceedings{Wichern2021oct,
- author = {Wichern, Gordon and Chakrabarty, Ankush and Wang, Zhong-Qiu and {Le Roux}, Jonathan},
- title = {{Anomalous sound detection using attentive neural processes}},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2021,
- pages = {186--190},
- month = oct,
- publisher = {IEEE},
- doi = {10.1109/WASPAA52581.2021.9632762},
- url = {https://www.merl.com/publications/TR2021-129}
- }
- Wang, Z.-Q., Wichern, G., Le Roux, J., "Leveraging Low-Distortion Target Estimates for Improved Speech Enhancement", arXiv, October 2021.
BibTeX arXiv- @article{Wang2021oct,
- author = {Wang, Zhong-Qiu and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{Leveraging Low-Distortion Target Estimates for Improved Speech Enhancement}},
- journal = {arXiv},
- year = 2021,
- month = oct,
- url = {https://arxiv.org/abs/2110.00570}
- }
- Chakrabarty, A., Wichern, G., Laughman, C.R., "ANP-BBO: Attentive Neural Processes and Batch Bayesian Optimization for Scalable Calibration of Physics-Informed Digital Twins", International Conference on Machine Learning (ICML), July 2021.
BibTeX TR2021-086 PDF- @inproceedings{Chakrabarty2021jul,
- author = {Chakrabarty, Ankush and Wichern, Gordon and Laughman, Christopher R.},
- title = {{ANP-BBO: Attentive Neural Processes and Batch Bayesian Optimization for Scalable Calibration of Physics-Informed Digital Twins}},
- booktitle = {International Conference on Machine Learning (ICML)},
- year = 2021,
- month = jul,
- url = {https://www.merl.com/publications/TR2021-086}
- }
- Hung, Y.-N., Wichern, G., Le Roux, J., "Transcription Is All You Need: Learning to Separate Musical Mixtures with Score as Supervision", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP39728.2021.9413358, June 2021, pp. 46-50.
BibTeX TR2021-069 PDF- @inproceedings{Hung2021jun,
- author = {Hung, Yun-Ning and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{Transcription Is All You Need: Learning to Separate Musical Mixtures with Score as Supervision}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2021,
- pages = {46--50},
- month = jun,
- doi = {10.1109/ICASSP39728.2021.9413358},
- issn = {2379-190X},
- isbn = {978-1-7281-7605-5},
- url = {https://www.merl.com/publications/TR2021-069}
- }
- Moritz, N., Wichern, G., Hori, T., Le Roux, J., "All-in-One Transformer: Unifying Speech Recognition, Audio Tagging, and Event Detection", Interspeech, DOI: 10.21437/Interspeech.2020-2757, October 2020, pp. 3112-3116.
BibTeX TR2020-138 PDF Presentation- @inproceedings{Moritz2020oct,
- author = {Moritz, Niko and Wichern, Gordon and Hori, Takaaki and {Le Roux}, Jonathan},
- title = {{All-in-One Transformer: Unifying Speech Recognition, Audio Tagging, and Event Detection}},
- booktitle = {Interspeech},
- year = 2020,
- pages = {3112--3116},
- month = oct,
- doi = {10.21437/Interspeech.2020-2757},
- issn = {1990-9772},
- url = {https://www.merl.com/publications/TR2020-138}
- }
- Manilow, E., Wichern, G., Le Roux, J., "Hierarchical Musical Instrument Separation", International Society for Music Information Retrieval (ISMIR) Conference, October 2020, pp. 376-383.
BibTeX TR2020-136 PDF Software- @inproceedings{Manilow2020oct,
- author = {Manilow, Ethan and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{Hierarchical Musical Instrument Separation}},
- booktitle = {International Society for Music Information Retrieval (ISMIR) Conference},
- year = 2020,
- pages = {376--383},
- month = oct,
- isbn = {978-0-9813537-0-8},
- url = {https://www.merl.com/publications/TR2020-136}
- }
- Seetharaman, P., Wichern, G., Pardo, B., Le Roux, J., "Autoclip: Adaptive Gradient Clipping For Source Separation Networks", IEEE International Workshop on Machine Learning for Signal Processing (MLSP), DOI: 10.1109/MLSP49062.2020.9231926, September 2020.
BibTeX TR2020-132 PDF- @inproceedings{Seetharaman2020sep,
- author = {Seetharaman, Prem and Wichern, Gordon and Pardo, Bryan and {Le Roux}, Jonathan},
- title = {{Autoclip: Adaptive Gradient Clipping For Source Separation Networks}},
- booktitle = {IEEE International Workshop on Machine Learning for Signal Processing (MLSP)},
- year = 2020,
- month = sep,
- publisher = {IEEE},
- doi = {10.1109/MLSP49062.2020.9231926},
- url = {https://www.merl.com/publications/TR2020-132}
- }
- Pishdadian, F., Wichern, G., Le Roux, J., "Finding Strength in Weakness: Learning to Separate Sounds with Weak Supervision", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/TASLP.2020.3013105, Vol. 28, pp. 2386-2399, September 2020.
BibTeX TR2020-126 PDF- @article{Pishdadian2020sep,
- author = {Pishdadian, Fatemeh and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{Finding Strength in Weakness: Learning to Separate Sounds with Weak Supervision}},
- journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
- year = 2020,
- volume = 28,
- pages = {2386--2399},
- month = sep,
- doi = {10.1109/TASLP.2020.3013105},
- url = {https://www.merl.com/publications/TR2020-126}
- }
- Seetharaman, P., Wichern, G., Le Roux, J., Pardo, B., "Bootstrapping Unsupervised Deep Music Separation from Primitive Auditory Grouping Principles", ICML 2020 Workshop on Self-supervision in Audio and Speech, July 2020.
BibTeX TR2020-111 PDF- @inproceedings{Seetharaman2020jul,
- author = {Seetharaman, Prem and Wichern, Gordon and {Le Roux}, Jonathan and Pardo, Bryan},
- title = {{Bootstrapping Unsupervised Deep Music Separation from Primitive Auditory Grouping Principles}},
- booktitle = {ICML 2020 Workshop on Self-supervision in Audio and Speech},
- year = 2020,
- month = jul,
- url = {https://www.merl.com/publications/TR2020-111}
- }
- Pishdadian, F., Wichern, G., Le Roux, J., "Learning to Separate Sounds From Weakly Labeled Scenes", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP40776.2020.9053055, April 2020, pp. 91-95.
BibTeX TR2020-038 PDF Video Presentation- @inproceedings{Pishdadian2020apr,
- author = {Pishdadian, Fatemeh and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{Learning to Separate Sounds From Weakly Labeled Scenes}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2020,
- pages = {91--95},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP40776.2020.9053055},
- issn = {2379-190X},
- isbn = {978-1-5090-6631-5},
- url = {https://www.merl.com/publications/TR2020-038}
- }
- Maciejewski, M., Wichern, G., McQuinn, E., Le Roux, J., "WHAMR!: Noisy and Reverberant Single-Channel Speech Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP40776.2020.9053327, April 2020, pp. 696-700.
BibTeX TR2020-042 PDF Video Presentation- @inproceedings{Maciejewski2020apr,
- author = {Maciejewski, Matthew and Wichern, Gordon and McQuinn, Emmett and {Le Roux}, Jonathan},
- title = {{WHAMR!: Noisy and Reverberant Single-Channel Speech Separation}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2020,
- pages = {696--700},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP40776.2020.9053327},
- issn = {2379-190X},
- isbn = {978-1-5090-6631-5},
- url = {https://www.merl.com/publications/TR2020-042}
- }
- Aihara, R., Wichern, G., Le Roux, J., "Deep clusteringによる シングルチャネル音声分離とその発展", The Journal of the Acoustical Society of Japan, DOI: 10.20697/jasj.76.2_101, Vol. 76, No. 2, pp. 101-108, April 2020.
BibTeX J-STAGE- @article{Aihara2020apr,
- author = {Aihara, Ryo and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{Deep clusteringによる シングルチャネル音声分離とその発展}},
- journal = {The Journal of the Acoustical Society of Japan},
- year = 2020,
- volume = 76,
- number = 2,
- pages = {101--108},
- month = apr,
- doi = {10.20697/jasj.76.2_101},
- url = {https://www.jstage.jst.go.jp/article/jasj/76/2/76_101/_article/-char/en}
- }
- Aihara, R., Wichern, G., Le Roux, J., "Deep Clustering-based Single Channel Speech Separation and Recent Advances", Acoustical Science and Technology, DOI: 10.1250/ast.41.465, Vol. 41, No. 2, pp. 465-471, March 2020.
BibTeX TR2021-020 PDF- @article{Aihara2020jun,
- author = {Aihara, Ryo and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{Deep Clustering-based Single Channel Speech Separation and Recent Advances}},
- journal = {Acoustical Science and Technology},
- year = 2020,
- volume = 41,
- number = 2,
- pages = {465--471},
- month = mar,
- doi = {10.1250/ast.41.465},
- url = {https://www.merl.com/publications/TR2021-020}
- }
- Manilow, E., Wichern, G., Seetharaman, P., Le Roux, J., "Cutting Music Source Separation Some Slakh: A Dataset to Study the Impact of Training Data Quality and Quantity", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/WASPAA.2019.8937170, October 2019, pp. 45-49.
BibTeX TR2019-124 PDF- @inproceedings{Manilow2019oct,
- author = {Manilow, Ethan and Wichern, Gordon and Seetharaman, Prem and {Le Roux}, Jonathan},
- title = {{Cutting Music Source Separation Some Slakh: A Dataset to Study the Impact of Training Data Quality and Quantity}},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2019,
- pages = {45--49},
- month = oct,
- doi = {10.1109/WASPAA.2019.8937170},
- issn = {1947-1629},
- isbn = {978-1-7281-1123-0},
- url = {https://www.merl.com/publications/TR2019-124}
- }
- Wichern, G., McQuinn, E., Antognini, J., Flynn, M., Zhu, R., Crow, D., Manilow, E., Le Roux, J., "WHAM!: Extending Speech Separation to Noisy Environments", Interspeech, DOI: 10.21437/Interspeech.2019-2821, September 2019, pp. 1368-1372.
BibTeX TR2019-099 PDF- @inproceedings{Wichern2019sep,
- author = {Wichern, Gordon and McQuinn, Emmett and Antognini, Joe and Flynn, Michael and Zhu, Richard and Crow, Dwight and Manilow, Ethan and {Le Roux}, Jonathan},
- title = {{WHAM!: Extending Speech Separation to Noisy Environments}},
- booktitle = {Interspeech},
- year = 2019,
- pages = {1368--1372},
- month = sep,
- doi = {10.21437/Interspeech.2019-2821},
- url = {https://www.merl.com/publications/TR2019-099}
- }
- Aihara, R., Hanazawa, T., Okato, Y., Wichern, G., Le Roux, J., "Teacher-Student Deep Clustering For Low-Delay Channel Speech Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8682695, May 2019.
BibTeX TR2019-003 PDF- @inproceedings{Aihara2019may,
- author = {Aihara, Ryo and Hanazawa, Toshiyuki and Okato, Yohei and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{Teacher-Student Deep Clustering For Low-Delay Channel Speech Separation}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- month = may,
- doi = {10.1109/ICASSP.2019.8682695},
- url = {https://www.merl.com/publications/TR2019-003}
- }
- Hori, C., Alamri, H., Wang, J., Wichern, G., Hori, T., Cherian, A., Marks, T.K., Cartillier, V., Lopes, R., Das, A., Essa, I., Batra, D., Parikh, D., "End-to-End Audio Visual Scene-Aware Dialog Using Multimodal Attention-Based Video Features", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8682583, May 2019.
BibTeX TR2019-016 PDF- @inproceedings{Hori2019may2,
- author = {Hori, Chiori and Alamri, Huda and Wang, Jue and Wichern, Gordon and Hori, Takaaki and Cherian, Anoop and Marks, Tim K. and Cartillier, Vincent and Lopes, Raphael and Das, Abhishek and Essa, Irfan and Batra, Dhruv and Parikh, Devi},
- title = {{End-to-End Audio Visual Scene-Aware Dialog Using Multimodal Attention-Based Video Features}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- month = may,
- doi = {10.1109/ICASSP.2019.8682583},
- url = {https://www.merl.com/publications/TR2019-016}
- }
- Le Roux, J., Wichern, G., Watanabe, S., Sarroff, A., Hershey, J., "The Phasebook: Building Complex Masks via Discrete Representations for Source Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8682587, May 2019.
BibTeX TR2019-008 PDF- @inproceedings{LeRoux2019may2,
- author = {{Le Roux}, Jonathan and Wichern, Gordon and Watanabe, Shinji and Sarroff, Andy and Hershey, John},
- title = {{The Phasebook: Building Complex Masks via Discrete Representations for Source Separation}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- month = may,
- doi = {10.1109/ICASSP.2019.8682587},
- url = {https://www.merl.com/publications/TR2019-008}
- }
- Seetharaman, P., Wichern, G., Le Roux, J., Pardo, B., "Bootstrapping Single-Channel Source Separation via Unsupervised Spatial Clustering on Stereo Mixtures", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8683198, May 2019.
BibTeX TR2019-014 PDF- @inproceedings{Seetharaman2019may2,
- author = {Seetharaman, Prem and Wichern, Gordon and {Le Roux}, Jonathan and Pardo, Bryan},
- title = {{Bootstrapping Single-Channel Source Separation via Unsupervised Spatial Clustering on Stereo Mixtures}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- month = may,
- doi = {10.1109/ICASSP.2019.8683198},
- url = {https://www.merl.com/publications/TR2019-014}
- }
- Seetharaman, P., Wichern, G., Venkataramani, S., Le Roux, J., "Class-Conditional Embeddings for Music Source Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8683007, May 2019.
BibTeX TR2019-004 PDF- @inproceedings{Seetharaman2019may,
- author = {Seetharaman, Prem and Wichern, Gordon and Venkataramani, Shrikant and {Le Roux}, Jonathan},
- title = {{Class-Conditional Embeddings for Music Source Separation }},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- month = may,
- doi = {10.1109/ICASSP.2019.8683007},
- url = {https://www.merl.com/publications/TR2019-004}
- }