- Chakrabarty, A., Deshpande, V.M., Wichern, G., Berntorp, K., "Physics-Constrained Meta-Learning for Online Adaptation and Estimation in Positioning Applications", IEEE Conference on Decision and Control (CDC), DOI: 10.1109/CDC56724.2024.10886249, December 2024.
BibTeX TR2024-180 PDF- @inproceedings{Chakrabarty2024dec,
- author = {Chakrabarty, Ankush and Deshpande, Vedang M. and Wichern, Gordon and Berntorp, Karl},
- title = {{Physics-Constrained Meta-Learning for Online Adaptation and Estimation in Positioning Applications}},
- booktitle = {IEEE Conference on Decision and Control (CDC)},
- year = 2024,
- month = dec,
- doi = {10.1109/CDC56724.2024.10886249},
- url = {https://www.merl.com/publications/TR2024-180}
- }
- Park, Y.-J., Germain, F.G., Liu, J., Wang, Y., Koike-Akino, T., Wichern, G., Laughman, C.R., Azizan, N., Chakrabarty, A., "Probabilistic Forecasting for Building Energy Systems: Are Time-Series Foundation Models The Answer?", Advances in Neural Information Processing Systems (NeurIPS), December 2024.
BibTeX TR2025-001 PDF- @inproceedings{Park2024dec,
- author = {{{Park, Young-Jin and Germain, François G and Liu, Jing and Wang, Ye and Koike-Akino, Toshiaki and Wichern, Gordon and Laughman, Christopher R. and Azizan, Navid and Chakrabarty, Ankush}}},
- title = {{{Probabilistic Forecasting for Building Energy Systems: Are Time-Series Foundation Models The Answer?}}},
- booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
- year = 2024,
- month = dec,
- url = {https://www.merl.com/publications/TR2025-001}
- }
- Ick, C., Wichern, G., Masuyama, Y., Germain, F.G., Le Roux, J., "Spatially-Aware Losses for Enhanced Neural Acoustic Fields", NeurIPS 2024 Audio Imagination Workshop, December 2024.
BibTeX TR2024-169 PDF- @inproceedings{Ick2024dec,
- author = {Ick, Christopher and Wichern, Gordon and Masuyama, Yoshiki and Germain, François G and {Le Roux}, Jonathan},
- title = {{Spatially-Aware Losses for Enhanced Neural Acoustic Fields}},
- booktitle = {NeurIPS 2024 Audio Imagination Workshop},
- year = 2024,
- month = dec,
- url = {https://www.merl.com/publications/TR2024-169}
- }
- Saijo, K., Wichern, G., Germain, F.G., Pan, Z., Le Roux, J., "TF-Locoformer: Transformer with Local Modeling by Convolution for Speech Separation and Enhancement", International Workshop on Acoustic Signal Enhancement (IWAENC), DOI: 10.1109/IWAENC61483.2024.10694313, September 2024, pp. 205-209.
BibTeX TR2024-126 PDF Software- @inproceedings{Saijo2024sep2,
- author = {Saijo, Kohei and Wichern, Gordon and Germain, François G and Pan, Zexu and {Le Roux}, Jonathan},
- title = {{TF-Locoformer: Transformer with Local Modeling by Convolution for Speech Separation and Enhancement}},
- booktitle = {International Workshop on Acoustic Signal Enhancement (IWAENC)},
- year = 2024,
- pages = {205--209},
- month = sep,
- doi = {10.1109/IWAENC61483.2024.10694313},
- issn = {2835-3439},
- isbn = {979-8-3503-6185-8},
- url = {https://www.merl.com/publications/TR2024-126}
- }
- Ebbers, J., Germain, F.G., Wichern, G., Le Roux, J., "Sound Event Bounding Boxes", Interspeech, DOI: 10.21437/Interspeech.2024-2075, September 2024, pp. 562-566.
BibTeX TR2024-118 PDF Software- @inproceedings{Ebbers2024sep,
- author = {Ebbers, Janek and Germain, François G and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{Sound Event Bounding Boxes}},
- booktitle = {Interspeech},
- year = 2024,
- pages = {562--566},
- month = sep,
- doi = {10.21437/Interspeech.2024-2075},
- issn = {2958-1796},
- url = {https://www.merl.com/publications/TR2024-118}
- }
- Khurana, S., Hori, C., Laurent, A., Wichern, G., Le Roux, J., "ZeroST: Zero-Shot Speech Translation", Interspeech, DOI: 10.21437/Interspeech.2024-1088, September 2024, pp. 392-396.
BibTeX TR2024-122 PDF- @inproceedings{Khurana2024sep,
- author = {Khurana, Sameer and Hori, Chiori and Laurent, Antoine and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{ZeroST: Zero-Shot Speech Translation}},
- booktitle = {Interspeech},
- year = 2024,
- pages = {392--396},
- month = sep,
- doi = {10.21437/Interspeech.2024-1088},
- issn = {2958-1796},
- url = {https://www.merl.com/publications/TR2024-122}
- }
- Pan, Z., Wichern, G., Germain, F.G., Saijo, K., Le Roux, J., "PARIS: Pseudo-AutoRegressIve Siamese Training for Online Speech Separation", Interspeech, DOI: 10.21437/Interspeech.2024-1066, September 2024, pp. 582-586.
BibTeX TR2024-124 PDF- @inproceedings{Pan2024sep,
- author = {Pan, Zexu and Wichern, Gordon and Germain, François G and Saijo, Kohei and {Le Roux}, Jonathan},
- title = {{PARIS}: Pseudo-AutoRegressIve Siamese Training for Online Speech Separation},
- booktitle = {Interspeech},
- year = 2024,
- pages = {582--586},
- month = sep,
- doi = {10.21437/Interspeech.2024-1066},
- issn = {2958-1796},
- url = {https://www.merl.com/publications/TR2024-124}
- }
- Saijo, K., Wichern, G., Germain, F.G., Pan, Z., Le Roux, J., "Enhanced Reverberation as Supervision for Unsupervised Speech Separation", Interspeech, DOI: 10.21437/Interspeech.2024-1241, September 2024, pp. 607-611.
BibTeX TR2024-116 PDF Software- @inproceedings{Saijo2024sep,
- author = {Saijo, Kohei and Wichern, Gordon and Germain, François G and Pan, Zexu and {Le Roux}, Jonathan},
- title = {{Enhanced Reverberation as Supervision for Unsupervised Speech Separation}},
- booktitle = {Interspeech},
- year = 2024,
- pages = {607--611},
- month = sep,
- doi = {10.21437/Interspeech.2024-1241},
- issn = {2958-1796},
- url = {https://www.merl.com/publications/TR2024-116}
- }
- He, Y., Cherian, A., Wichern, G., Markham, A., "Deep Neural Room Acoustics Primitive", International Conference on Machine Learning (ICML), June 2024, pp. 17842-17857.
BibTeX TR2024-072 PDF- @inproceedings{He2024jun,
- author = {He, Yuhang and Cherian, Anoop and Wichern, Gordon and Markham, Andrew},
- title = {{Deep Neural Room Acoustics Primitive}},
- booktitle = {International Conference on Machine Learning (ICML)},
- year = 2024,
- pages = {17842--17857},
- month = jun,
- url = {https://www.merl.com/publications/TR2024-072}
- }
- Uhlich, S., Fabbro, G., Hirano, M., Takahashi, S., Wichern, G., Le Roux, J., Chakraborty, D., Mohanty, S., Li, K., Luo, Y., Yu, J., Gu, R., Solovyev, R., Stempkovskiy, A., Habruseva, T., Sukhovei, M., Mitsufuji, Y., "The Sound Demixing Challenge 2023 – Cinematic Demixing Track", Transactions of the International Society for Music Information Retrieval, DOI: 10.5334/tismir.172, Vol. 7, No. 1, pp. 44-62, May 2024.
BibTeX TR2024-047 PDF- @article{Uhlich2024may,
- author = {Uhlich, Stefan and Fabbro, Giorgio and Hirano, Masato and Takahashi, Shusuke and Wichern, Gordon and {Le Roux}, Jonathan and Chakraborty, Dipam and Mohanty, Sharada and Li, Kai and Luo, Yi and Yu, Jianwei and Gu, Rongzhi and Solovyev, Roman and Stempkovskiy, Alexander and Habruseva, Tatiana and Sukhovei, Mikhail and Mitsufuji, Yuki},
- title = {{The {S}ound {D}emixing {C}hallenge 2023 – {C}inematic {D}emixing {T}rack}},
- journal = {Transactions of the International Society for Music Information Retrieval},
- year = 2024,
- volume = 7,
- number = 1,
- pages = {44--62},
- month = may,
- doi = {10.5334/tismir.172},
- url = {https://www.merl.com/publications/TR2024-047}
- }
- Pan, Z., Wichern, G., Germain, F.G., Subramanian, A., Le Roux, J., "Late Audio-Visual Fusion for In-The-Wild Speaker Diarization", Hands-free Speech Communication and Microphone Arrays (HSCMA), DOI: 10.1109/ICASSPW62465.2024.10626914, April 2024, pp. 174-178.
BibTeX TR2024-029 PDF- @inproceedings{Pan2024apr,
- author = {Pan, Zexu and Wichern, Gordon and Germain, François G and Subramanian, Aswin and {Le Roux}, Jonathan},
- title = {{Late Audio-Visual Fusion for In-The-Wild Speaker Diarization}},
- booktitle = {Hands-free Speech Communication and Microphone Arrays (HSCMA)},
- year = 2024,
- pages = {174--178},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSPW62465.2024.10626914},
- isbn = {979-8-3503-7451-3},
- url = {https://www.merl.com/publications/TR2024-029}
- }
- Koo, J., Wichern, G., Germain, F.G., Khurana, S., Le Roux, J., "SMITIN: Self-Monitored Inference-Time INtervention for Generative Music Transformers", arXiv, April 2024.
BibTeX arXiv- @article{Koo2024apr2,
- author = {Koo, Junghyun and Wichern, Gordon and Germain, François G and Khurana, Sameer and {Le Roux}, Jonathan},
- title = {{SMITIN: Self-Monitored Inference-Time INtervention for Generative Music Transformers}},
- journal = {arXiv},
- year = 2024,
- month = apr,
- url = {https://arxiv.org/abs/2404.02252}
- }
- Koo, J., Wichern, G., Germain, F.G., Khurana, S., Le Roux, J., "Understanding and Controlling Generative Music Transformers by Probing Individual Attention Heads", IEEE ICASSP Satellite Workshop on Explainable Machine Learning for Speech and Audio (XAI-SA), April 2024.
BibTeX TR2024-032 PDF- @inproceedings{Koo2024apr,
- author = {Koo, Junghyun and Wichern, Gordon and Germain, François G and Khurana, Sameer and {Le Roux}, Jonathan},
- title = {{Understanding and Controlling Generative Music Transformers by Probing Individual Attention Heads}},
- booktitle = {IEEE ICASSP Satellite Workshop on Explainable Machine Learning for Speech and Audio (XAI-SA)},
- year = 2024,
- month = apr,
- url = {https://www.merl.com/publications/TR2024-032}
- }
- Jeon, C.-B., Wichern, G., Germain, F.G., Le Roux, J., "Why does music source separation benefit from cacophony?", IEEE ICASSP Satellite Workshop on Explainable Machine Learning for Speech and Audio (XAI-SA), DOI: 10.1109/ICASSPW62465.2024.10669899, March 2024, pp. 873-877.
BibTeX TR2024-030 PDF Video- @inproceedings{Jeon2024mar,
- author = {Jeon, Chang-Bin and Wichern, Gordon and Germain, François G and {Le Roux}, Jonathan},
- title = {{Why does music source separation benefit from cacophony?}},
- booktitle = {IEEE ICASSP Satellite Workshop on Explainable Machine Learning for Speech and Audio (XAI-SA)},
- year = 2024,
- pages = {873--877},
- month = mar,
- publisher = {IEEE},
- doi = {10.1109/ICASSPW62465.2024.10669899},
- isbn = {979-8-3503-7451-3},
- url = {https://www.merl.com/publications/TR2024-030}
- }
- Bralios, D., Wichern, G., Germain, F.G., Pan, Z., Khurana, S., Hori, C., Le Roux, J., "Generation or Replication: Auscultating Audio Latent Diffusion Models", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP48485.2024.10447705, March 2024, pp. 1156-1160.
BibTeX TR2024-027 PDF- @inproceedings{Bralios2024mar,
- author = {Bralios, Dimitrios and Wichern, Gordon and Germain, François G and Pan, Zexu and Khurana, Sameer and Hori, Chiori and {Le Roux}, Jonathan},
- title = {{Generation or Replication: Auscultating Audio Latent Diffusion Models}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2024,
- pages = {1156--1160},
- month = mar,
- doi = {10.1109/ICASSP48485.2024.10447705},
- url = {https://www.merl.com/publications/TR2024-027}
- }
- Masuyama, Y., Wichern, G., Germain, F.G., Pan, Z., Khurana, S., Hori, C., Le Roux, J., "NIIRF: Neural IIR Filter Field for HRTF Upsampling and Personalization", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP48485.2024.10448477, March 2024, pp. 1016-1020.
BibTeX TR2024-026 PDF Software- @inproceedings{Masuyama2024mar,
- author = {Masuyama, Yoshiki and Wichern, Gordon and Germain, François G and Pan, Zexu and Khurana, Sameer and Hori, Chiori and {Le Roux}, Jonathan},
- title = {{NIIRF: Neural IIR Filter Field for HRTF Upsampling and Personalization}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2024,
- pages = {1016--1020},
- month = mar,
- doi = {10.1109/ICASSP48485.2024.10448477},
- url = {https://www.merl.com/publications/TR2024-026}
- }
- Pan, Z., Wichern, G., Germain, F.G., Khurana, S., Le Roux, J., "NeuroHeed+: Improving Neuro-steered Speaker Extraction with Joint Auditory Attention Detection", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP48485.2024.10446333, March 2024, pp. 11456-11460.
BibTeX TR2024-025 PDF- @inproceedings{Pan2024mar,
- author = {Pan, Zexu and Wichern, Gordon and Germain, François G and Khurana, Sameer and {Le Roux}, Jonathan},
- title = {{NeuroHeed+: Improving Neuro-steered Speaker Extraction with Joint Auditory Attention Detection}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2024,
- pages = {11456--11460},
- month = mar,
- doi = {10.1109/ICASSP48485.2024.10446333},
- url = {https://www.merl.com/publications/TR2024-025}
- }
- Wu, S.-L., Chang, X., Wichern, G., Jung, J.-W., Germain, F.G., Le Roux, J., Watanabe, S., "Improving Audio Captioning Models with Fine-grained Audio Features, Text Embedding Supervision, and LLM Mix-up Augmentation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP48485.2024.10447215, March 2024, pp. 316-320.
BibTeX TR2024-028 PDF- @inproceedings{Wu2024mar,
- author = {Wu, Shih-Lun and Chang, Xuankai and Wichern, Gordon and Jung, Jee-weon and Germain, François G and {Le Roux}, Jonathan and Watanabe, Shinji},
- title = {{Improving Audio Captioning Models with Fine-grained Audio Features, Text Embedding Supervision, and LLM Mix-up Augmentation}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2024,
- pages = {316--320},
- month = mar,
- doi = {10.1109/ICASSP48485.2024.10447215},
- url = {https://www.merl.com/publications/TR2024-028}
- }
- Boeddeker, C., Subramanian, A.S., Wichern, G., Haeb-Umbach, R., Le Roux, J., "TS-SEP: Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/TASLP.2024.3350887, Vol. 32, pp. 1185-1197, February 2024.
BibTeX TR2024-006 PDF Software- @article{Boeddeker2024feb,
- author = {Boeddeker, Christoph and Subramanian, Aswin Shanmugam and Wichern, Gordon and Haeb-Umbach, Reinhold and {Le Roux}, Jonathan},
- title = {{TS-SEP: Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings}},
- journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
- year = 2024,
- volume = 32,
- pages = {1185--1197},
- month = feb,
- doi = {10.1109/TASLP.2024.3350887},
- issn = {2329-9304},
- url = {https://www.merl.com/publications/TR2024-006}
- }
- Pan, Z., Wichern, G., Masuyama, Y., Germain, F.G., Khurana, S., Hori, C., Le Roux, J., "Scenario-Aware Audio-Visual TF-GridNet for Target Speech Extraction", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/ASRU57964.2023.10389618, December 2023.
BibTeX TR2023-152 PDF Video- @inproceedings{Pan2023dec2,
- author = {Pan, Zexu and Wichern, Gordon and Masuyama, Yoshiki and Germain, François G and Khurana, Sameer and Hori, Chiori and {Le Roux}, Jonathan},
- title = {{Scenario-Aware Audio-Visual TF-GridNet for Target Speech Extraction}},
- booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
- year = 2023,
- month = dec,
- doi = {10.1109/ASRU57964.2023.10389618},
- isbn = {979-8-3503-0689-7},
- url = {https://www.merl.com/publications/TR2023-152}
- }
- Wu, S.-L., Chang, X., Wichern, G., Jung, J.-W., Germain, F.G., Le Roux, J., Watanabe, S., "On the Use of Pretrained Deep Audio Encoders for Automated Audio Captioning Tasks", International Symposium on Future Active Safety Technology toward zero traffic accidents (FAST-zero), November 2023.
BibTeX TR2023-141 PDF- @inproceedings{Wu2023nov,
- author = {Wu, Shih-Lun and Chang, Xuankai and Wichern, Gordon and Jung, Jee-weon and Germain, François G and {Le Roux}, Jonathan and Watanabe, Shinji},
- title = {{On the Use of Pretrained Deep Audio Encoders for Automated Audio Captioning Tasks}},
- booktitle = {International Symposium on Future Active Safety Technology toward zero traffic accidents (FAST-zero)},
- year = 2023,
- month = nov,
- url = {https://www.merl.com/publications/TR2023-141}
- }
- Pan, Z., Wichern, G., Germain, F., Subramanian, A., Le Roux, J., "Late Audio-Visual Fusion for In-The-Wild Speaker Diarization", arXiv, DOI: 10.48550/arXiv.2211.01299, September 2023.
BibTeX arXiv- @article{Pan2023sep,
- author = {Pan, Zexu and Wichern, Gordon and Germain, Francois and Subramanian, Aswin and {Le Roux}, Jonathan},
- title = {{Late Audio-Visual Fusion for In-The-Wild Speaker Diarization}},
- journal = {arXiv},
- year = 2023,
- month = sep,
- doi = {10.48550/arXiv.2211.01299},
- url = {https://arxiv.org/abs/2211.01299}
- }
- Falcon Perez, R., Wichern, G., Germain, F., Le Roux, J., "Location as supervision for weakly supervised multi-channel source separation of machine sounds", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/WASPAA58266.2023.10248128, September 2023.
BibTeX TR2023-119 PDF Presentation- @inproceedings{FalconPerez2023aug,
- author = {Falcon Perez, Ricardo and Wichern, Gordon and Germain, Francois and {Le Roux}, Jonathan},
- title = {{Location as supervision for weakly supervised multi-channel source separation of machine sounds}},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2023,
- month = sep,
- publisher = {IEEE},
- doi = {10.1109/WASPAA58266.2023.10248128},
- issn = {1947-1629},
- isbn = {979-8-3503-2372-6},
- url = {https://www.merl.com/publications/TR2023-119}
- }
- Germain, F., Wichern, G., Le Roux, J., "Hyperbolic Unsupervised Anomalous Sound Detection", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/WASPAA58266.2023.10248092, September 2023.
BibTeX TR2023-108 PDF Video Presentation- @inproceedings{Germain2023aug,
- author = {Germain, Francois and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{Hyperbolic Unsupervised Anomalous Sound Detection}},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2023,
- month = sep,
- publisher = {IEEE},
- doi = {10.1109/WASPAA58266.2023.10248092},
- issn = {1947-1629},
- isbn = {979-8-3503-2372-6},
- url = {https://www.merl.com/publications/TR2023-108}
- }
- Petermann, D., Wichern, G., Subramanian, A.S., Wang, Z.-Q., Le Roux, J., "Tackling the Cocktail Fork Problem for Separation and Transcription of Real-World Soundtracks", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/TASLP.2023.3290428, Vol. 31, pp. 2592-2605, September 2023.
BibTeX TR2023-113 PDF- @article{Petermann2023sep,
- author = {Petermann, Darius and Wichern, Gordon and Subramanian, Aswin Shanmugam and Wang, Zhong-Qiu and {Le Roux}, Jonathan},
- title = {{Tackling the Cocktail Fork Problem for Separation and Transcription of Real-World Soundtracks}},
- journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
- year = 2023,
- volume = 31,
- pages = {2592--2605},
- month = sep,
- doi = {10.1109/TASLP.2023.3290428},
- issn = {2329-9304},
- url = {https://www.merl.com/publications/TR2023-113}
- }