Publications

51 / 3,536 publications found.


  •  Pan, Z., Wichern, G., Masuyama, Y., Germain, F.G., Khurana, S., Hori, C., Le Roux, J., "Scenario-Aware Audio-Visual TF-GridNet for Target Speech Extraction", arXiv, October 2023.
    BibTeX arXiv
    • @article{Pan2023oct,
    • author = {Pan, Zexu and Wichern, Gordon and Masuyama, Yoshiki and Germain, Fran├žois G and Khurana, Sameer and Hori, Chiori and Le Roux, Jonathan},
    • title = {Scenario-Aware Audio-Visual TF-GridNet for Target Speech Extraction},
    • journal = {arXiv},
    • year = 2023,
    • month = oct,
    • url = {http://arxiv.org/abs/2310.19644}
    • }
  •  Bralios, D., Wichern, G., Germain, F.G., Pan, Z., Khurana, S., Hori, C., Le Roux, J., "Generation or Replication: Auscultating Audio Latent Diffusion Models", arXiv, October 2023.
    BibTeX arXiv
    • @article{Bralios2023oct,
    • author = {Bralios, Dimitrios and Wichern, Gordon and Germain, Fran├žois G and Pan, Zexu and Khurana, Sameer and Hori, Chiori and Le Roux, Jonathan},
    • title = {Generation or Replication: Auscultating Audio Latent Diffusion Models},
    • journal = {arXiv},
    • year = 2023,
    • month = oct,
    • url = {https://arxiv.org/abs/2310.10604}
    • }
  •  Pan, Z., Wichern, G., Germain, F., Subramanian, A., Le Roux, J., "Late Audio-Visual Fusion for In-The-Wild Speaker Diarization", arXiv, DOI: 10.48550/​arXiv.2211.01299, September 2023.
    BibTeX arXiv
    • @article{Pan2023sep,
    • author = {Pan, Zexu and Wichern, Gordon and Germain, Francois and Subramanian, Aswin and Le Roux, Jonathan},
    • title = {Late Audio-Visual Fusion for In-The-Wild Speaker Diarization},
    • journal = {arXiv},
    • year = 2023,
    • month = sep,
    • doi = {10.48550/arXiv.2211.01299},
    • url = {https://arxiv.org/abs/2211.01299}
    • }
  •  Falcon Perez, R., Wichern, G., Germain, F., Le Roux, J., "Location as supervision for weakly supervised multi-channel source separation of machine sounds", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/​WASPAA58266.2023.10248128, September 2023.
    BibTeX TR2023-119 PDF Presentation
    • @inproceedings{FalconPerez2023aug,
    • author = {Falcon Perez, Ricardo and Wichern, Gordon and Germain, Francois and Le Roux, Jonathan},
    • title = {Location as supervision for weakly supervised multi-channel source separation of machine sounds},
    • booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
    • year = 2023,
    • month = sep,
    • publisher = {IEEE},
    • doi = {10.1109/WASPAA58266.2023.10248128},
    • issn = {1947-1629},
    • isbn = {979-8-3503-2372-6},
    • url = {https://www.merl.com/publications/TR2023-119}
    • }
  •  Germain, F., Wichern, G., Le Roux, J., "Hyperbolic Unsupervised Anomalous Sound Detection", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/​WASPAA58266.2023.10248092, September 2023.
    BibTeX TR2023-108 PDF Video Presentation
    • @inproceedings{Germain2023aug,
    • author = {Germain, Francois and Wichern, Gordon and Le Roux, Jonathan},
    • title = {Hyperbolic Unsupervised Anomalous Sound Detection},
    • booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
    • year = 2023,
    • month = sep,
    • publisher = {IEEE},
    • doi = {10.1109/WASPAA58266.2023.10248092},
    • issn = {1947-1629},
    • isbn = {979-8-3503-2372-6},
    • url = {https://www.merl.com/publications/TR2023-108}
    • }
  •  Petermann, D., Wichern, G., Subramanian, A.S., Wang, Z.-Q., Le Roux, J., "Tackling the Cocktail Fork Problem for Separation and Transcription of Real-World Soundtracks", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/​TASLP.2023.3290428, Vol. 31, pp. 2592-2605, September 2023.
    BibTeX TR2023-113 PDF
    • @article{Petermann2023sep,
    • author = {Petermann, Darius and Wichern, Gordon and Subramanian, Aswin Shanmugam and Wang, Zhong-Qiu and Le Roux, Jonathan},
    • title = {Tackling the Cocktail Fork Problem for Separation and Transcription of Real-World Soundtracks},
    • journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
    • year = 2023,
    • volume = 31,
    • pages = {2592--2605},
    • month = sep,
    • doi = {10.1109/TASLP.2023.3290428},
    • issn = {2329-9304},
    • url = {https://www.merl.com/publications/TR2023-113}
    • }
  •  Uhlich, S., Fabbro, G., Hirano, M., Takahashi, S., Wichern, G., Le Roux, J., Chakraborty, D., Mohanty, S., Li, K., Luo, Y., Yu, J., Gu, R., Solovyev, R., Stempkovskiy, A., Habruseva, T., Sukhovei, M., Mitsufuji, Y., "The Sound Demixing Challenge 2023 - Cinematic Demixing Track", arXiv, August 2023.
    BibTeX arXiv
    • @article{Uhlich2023aug,
    • author = {Uhlich, Stefan and Fabbro, Giorgio and Hirano, Masato and Takahashi, Shusuke and Wichern, Gordon and Le Roux, Jonathan and Chakraborty, Dipam and Mohanty, Sharada and Li, Kai and Luo, Yi and Yu, Jianwei and Gu, Rongzhi and Solovyev, Roman and Stempkovskiy, Alexander and Habruseva, Tatiana and Sukhovei, Mikhail and Mitsufuji, Yuki},
    • title = {The Sound Demixing Challenge 2023 - Cinematic Demixing Track},
    • journal = {arXiv},
    • year = 2023,
    • month = aug,
    • url = {https://arxiv.org/abs/2308.06981}
    • }
  •  Chakrabarty, A., Wichern, G., Laughman, C.R., "Meta-Learning of Neural State-Space Models Using Data From Similar Systems", World Congress of the International Federation of Automatic Control (IFAC), July 2023.
    BibTeX TR2023-087 PDF
    • @inproceedings{Chakrabarty2023jul,
    • author = {Chakrabarty, Ankush and Wichern, Gordon and Laughman, Christopher R.},
    • title = {Meta-Learning of Neural State-Space Models Using Data From Similar Systems},
    • booktitle = {World Congress of the International Federation of Automatic Control (IFAC)},
    • year = 2023,
    • month = jul,
    • url = {https://www.merl.com/publications/TR2023-087}
    • }
  •  Salatiello, A., Wang, Y., Wichern, G., Koike-Akino, T., Yoshihiro, O., Kaneko, Y., Laughman, C.R., Chakrabarty, A., "Synthesizing Building Operation Data with Generative Models: VAEs, GANs, or Something In Between?", ACM e-Energy Conference, DOI: 10.1145/​3599733.3600260, June 2023.
    BibTeX TR2023-072 PDF
    • @inproceedings{Salatiello2023jun,
    • author = {Salatiello, Alessandro and Wang, Ye and Wichern, Gordon and Koike-Akino, Toshiaki and Yoshihiro, Ohta and Kaneko, Yosuke and Laughman, Christopher R. and Chakrabarty, Ankush},
    • title = {Synthesizing Building Operation Data with Generative Models: VAEs, GANs, or Something In Between?},
    • booktitle = {ACM e-Energy Conference},
    • year = 2023,
    • month = jun,
    • doi = {10.1145/3599733.3600260},
    • url = {https://www.merl.com/publications/TR2023-072}
    • }
  •  Wu, S.-L., Chang, X., Wichern, G., Jung, J.-W., Germain, F., Le Roux, J., Watanabe, S., "BEATs-based Audio Captioning Model with Instructor Embedding Supervision and ChatGPT Mix-up," Tech. Rep. TR2023-068, DCASE2023 Challenge, May 2023.
    BibTeX TR2023-068 PDF
    • @techreport{Wu2023may,
    • author = {Wu, Shih-Lun and Chang, Xuankai and Wichern, Gordon and Jung, Jee-weon and Germain, Francois and Le Roux, Jonathan and Watanabe, Shinji},
    • title = {BEATs-based Audio Captioning Model with Instructor Embedding Supervision and ChatGPT Mix-up},
    • institution = {DCASE2023 Challenge},
    • year = 2023,
    • month = may,
    • url = {https://www.merl.com/publications/TR2023-068}
    • }
  •  Chen, K., Wichern, G., Germain, F., Le Roux, J., "Pac-HuBERT: Self-Supervised Music Source Separation via Primitive Auditory Clustering and Hidden-Unit BERT", IEEE ICASSP Satellite Workshop on Self-supervision in Audio, Speech and Beyond (SASB), DOI: 10.1109/​ICASSPW59220.2023.10193575, May 2023.
    BibTeX TR2023-030 PDF
    • @inproceedings{Chen2023may,
    • author = {Chen, Ke and Wichern, Gordon and Germain, Francois and Le Roux, Jonathan},
    • title = {Pac-HuBERT: Self-Supervised Music Source Separation via Primitive Auditory Clustering and Hidden-Unit BERT},
    • booktitle = {IEEE ICASSP Satellite Workshop on Self-supervision in Audio, Speech and Beyond (SASB)},
    • year = 2023,
    • month = may,
    • doi = {10.1109/ICASSPW59220.2023.10193575},
    • isbn = {979-8-3503-0261-5},
    • url = {https://www.merl.com/publications/TR2023-030}
    • }
  •  Aralikatti, R., Boeddeker, C., Wichern, G., Subramanian, A.S., Le Roux, J., "Reverberation as Supervision for Speech Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP49357.2023.10095022, May 2023, pp. 1-5.
    BibTeX TR2023-016 PDF
    • @inproceedings{Aralikatti2023may,
    • author = {Aralikatti, Rohith and Boeddeker, Christoph and Wichern, Gordon and Subramanian, Aswin Shanmugam and Le Roux, Jonathan},
    • title = {Reverberation as Supervision for Speech Separation},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2023,
    • pages = {1--5},
    • month = may,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP49357.2023.10095022},
    • url = {https://www.merl.com/publications/TR2023-016}
    • }
  •  Bralios, D., Tzinis, E., Wichern, G., Smaragdis, P., Le Roux, J., "Latent Iterative Refinement for Modular Source Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP49357.2023.10096897, May 2023, pp. 1-5.
    BibTeX TR2023-019 PDF
    • @inproceedings{Bralios2023may,
    • author = {Bralios, Dimitrios and Tzinis, Efthymios and Wichern, Gordon and Smaragdis, Paris and Le Roux, Jonathan},
    • title = {Latent Iterative Refinement for Modular Source Separation},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2023,
    • pages = {1--5},
    • month = may,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP49357.2023.10096897},
    • url = {https://www.merl.com/publications/TR2023-019}
    • }
  •  Petermann, D., Wichern, G., Subramanian, A.S., Le Roux, J., "Hyperbolic Audio Source Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP49357.2023.10094943, May 2023, pp. 1-5.
    BibTeX TR2023-017 PDF Software
    • @inproceedings{Petermann2023may,
    • author = {Petermann, Darius and Wichern, Gordon and Subramanian, Aswin Shanmugam and Le Roux, Jonathan},
    • title = {Hyperbolic Audio Source Separation},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2023,
    • pages = {1--5},
    • month = may,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP49357.2023.10094943},
    • url = {https://www.merl.com/publications/TR2023-017}
    • }
  •  Tzinis, E., Wichern, G., Smaragdis, P., Le Roux, J., "Optimal Condition Training for Target Source Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP49357.2023.10095128, May 2023, pp. 1-5.
    BibTeX TR2023-018 PDF
    • @inproceedings{Tzinis2023may,
    • author = {Tzinis, Efthymios and Wichern, Gordon and Smaragdis, Paris and Le Roux, Jonathan},
    • title = {Optimal Condition Training for Target Source Separation},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2023,
    • pages = {1--5},
    • month = may,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP49357.2023.10095128},
    • url = {https://www.merl.com/publications/TR2023-018}
    • }
  •  Yen, H., Germain, F., Wichern, G., Le Roux, J., "Cold Diffusion for Speech Enhancement", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP49357.2023.10096064, May 2023, pp. 1-5.
    BibTeX TR2023-020 PDF
    • @inproceedings{Yen2023may,
    • author = {Yen, Hao and Germain, Francois and Wichern, Gordon and Le Roux, Jonathan},
    • title = {Cold Diffusion for Speech Enhancement},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2023,
    • pages = {1--5},
    • month = may,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP49357.2023.10096064},
    • url = {https://www.merl.com/publications/TR2023-020}
    • }
  •  Boeddeker, C., Subramanian, A.S., Wichern, G., Haeb-Umbach, R., Le Roux, J., "TS-SEP: Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings", arXiv, March 2023.
    BibTeX arXiv
    • @article{Boeddeker2023mar,
    • author = {Boeddeker, Christoph and Subramanian, Aswin Shanmugam and Wichern, Gordon and Haeb-Umbach, Reinhold and Le Roux, Jonathan},
    • title = {TS-SEP: Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings},
    • journal = {arXiv},
    • year = 2023,
    • month = mar,
    • url = {https://arxiv.org/abs/2303.03849}
    • }
  •  Wang, Z.-Q., Wichern, G., Watanabe, S., Le Roux, J., "STFT-Domain Neural Speech Enhancement with Very Low Algorithmic Latency", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/​TASLP.2022.3224285, Vol. 31, pp. 397-410, December 2022.
    BibTeX TR2022-166 PDF
    • @article{Wang2022dec2,
    • author = {Wang, Zhong-Qiu and Wichern, Gordon and Watanabe, Shinji and Le Roux, Jonathan},
    • title = {STFT-Domain Neural Speech Enhancement with Very Low Algorithmic Latency},
    • journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
    • year = 2022,
    • volume = 31,
    • pages = {397--410},
    • month = dec,
    • doi = {10.1109/TASLP.2022.3224285},
    • issn = {2329-9304},
    • url = {https://www.merl.com/publications/TR2022-166}
    • }
  •  Venkatesh, S., Wichern, G., Subramanian, A.S., Le Roux, J., "Improved Domain Generalization via Disentangled Multi-Task Learning in Unsupervised Anomalous Sound Detection", DCASE Workshop, Lagrange, M. and Mesaros, A. and Pellegrini, T. and Richard, G. and Serizel, R. and Stowell, D., Eds., November 2022.
    BibTeX TR2022-146 PDF Presentation
    • @inproceedings{Venkatesh2022nov,
    • author = {Venkatesh, Satvik and Wichern, Gordon and Subramanian, Aswin Shanmugam and Le Roux, Jonathan},
    • title = {Improved Domain Generalization via Disentangled Multi-Task Learning in Unsupervised Anomalous Sound Detection},
    • booktitle = {DCASE Workshop},
    • year = 2022,
    • editor = {Lagrange, M. and Mesaros, A. and Pellegrini, T. and Richard, G. and Serizel, R. and Stowell, D.},
    • month = nov,
    • isbn = {978-952-03-2677-7},
    • url = {https://www.merl.com/publications/TR2022-146}
    • }
  •  Pan, Z., Wichern, G., Germain, F., Subramanian, A.S., Le Roux, J., "Towards End-to-end Speaker Diarization in the Wild", arXiv, November 2022.
    BibTeX arXiv
    • @article{Pan2022nov,
    • author = {Pan, Zexu and Wichern, Gordon and Germain, Francois and Subramanian, Aswin Shanmugam and Le Roux, Jonathan},
    • title = {Towards End-to-end Speaker Diarization in the Wild},
    • journal = {arXiv},
    • year = 2022,
    • month = nov,
    • url = {https://arxiv.org/abs/2211.01299}
    • }
  •  Tzinis, E., Wichern, G., Subramanian, A.S., Smaragdis, P., Le Roux, J., "Heterogeneous Target Speech Separation", Interspeech, DOI: 10.21437/​Interspeech.2022-10717, September 2022, pp. 1796-1800.
    BibTeX TR2022-115 PDF Video Presentation
    • @inproceedings{Tzinis2022sep,
    • author = {Tzinis, Efthymios and Wichern, Gordon and Subramanian, Aswin Shanmugam and Smaragdis, Paris and Le Roux, Jonathan},
    • title = {Heterogeneous Target Speech Separation},
    • booktitle = {Interspeech},
    • year = 2022,
    • pages = {1796--1800},
    • month = sep,
    • doi = {10.21437/Interspeech.2022-10717},
    • url = {https://www.merl.com/publications/TR2022-115}
    • }
  •  Zhan, S., Wichern, G., Laughman, C.R., Chong, A., Chakrabarty, A., "Calibrating building simulation models using multi-source datasets and meta-learned Bayesian optimization", Energy and Buildings, DOI: 10.1016/​j.enbuild.2022.112278, Vol. 270, pp. 112278, September 2022.
    BibTeX TR2022-072 PDF
    • @article{Zhan2023jan,
    • author = {Zhan, Sicheng and Wichern, Gordon and Laughman, Christopher R. and Chong, Adrian and Chakrabarty, Ankush},
    • title = {Calibrating building simulation models using multi-source datasets and meta-learned Bayesian optimization},
    • journal = {Energy and Buildings},
    • year = 2022,
    • volume = 270,
    • pages = 112278,
    • month = sep,
    • doi = {10.1016/j.enbuild.2022.112278},
    • url = {https://www.merl.com/publications/TR2022-072}
    • }
  •  Venkatesh, S., Wichern, G., Subramanian, A.S., Le Roux, J., "Disentangled Surrogate Task Learning for Improved Domain Generalization in Unsupervised Anomolous Sound Detection," Tech. Rep. TR2022-092, Detection and Classification of Acoustic Scenes and Events (DCASE) Challenge 2022, July 2022.
    BibTeX TR2022-092 PDF Presentation
    • @techreport{Venkatesh2022jul,
    • author = {Venkatesh, Satvik and Wichern, Gordon and Subramanian, Aswin Shanmugam and Le Roux, Jonathan},
    • title = {Disentangled Surrogate Task Learning for Improved Domain Generalization in Unsupervised Anomolous Sound Detection},
    • institution = {DCASE2022 Challenge},
    • year = 2022,
    • month = jul,
    • url = {https://www.merl.com/publications/TR2022-092}
    • }
  •  Petermann, D., Wichern, G., Wang, Z.-Q., Le Roux, J., "The Cocktail Fork Problem: Three-Stem Audio Separation for Real-World Soundtracks", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP43922.2022.9746005, April 2022, pp. 526-530.
    BibTeX TR2022-022 PDF Data Software
    • @inproceedings{Petermann2022apr,
    • author = {Petermann, Darius and Wichern, Gordon and Wang, Zhong-Qiu and Le Roux, Jonathan},
    • title = {The Cocktail Fork Problem: Three-Stem Audio Separation for Real-World Soundtracks},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2022,
    • pages = {526--530},
    • month = apr,
    • doi = {10.1109/ICASSP43922.2022.9746005},
    • url = {https://www.merl.com/publications/TR2022-022}
    • }
  •  Slizovskaia, O., Wichern, G., Wang, Z.-Q., Le Roux, J., "Locate This, Not That: Class-Conditioned Sound Event DOA Estimation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP43922.2022.9747604, April 2022, pp. 711-715.
    BibTeX TR2022-023 PDF
    • @inproceedings{Slizovskaia2022mar,
    • author = {Slizovskaia, Olga and Wichern, Gordon and Wang, Zhong-Qiu and Le Roux, Jonathan},
    • title = {Locate This, Not That: Class-Conditioned Sound Event DOA Estimation},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2022,
    • pages = {711--715},
    • month = apr,
    • doi = {10.1109/ICASSP43922.2022.9747604},
    • url = {https://www.merl.com/publications/TR2022-023}
    • }