Publications

390 / 3,945 publications found.


  •  Tian, J., Shi, J., Chen, W., Arora, S., Masuyama, Y., Takashi, M., Wu, Y., Peng, J., Bharadwaj, S., Zhao, Y., Cornell, S., Peng, Y., Yue, X., Yang, C.-H.H., Neubig, G., Watanabe, S., "ESPnet-SpeechLM: An Open Speech Language Model Toolkit", NAACL-HLT (the system demonstration track), Dziri, N. and Ren, S. X. and Diao, S., Eds., March 2025, pp. 116-124.
    BibTeX TR2025-038 PDF
    • @inproceedings{Tian2025mar,
    • author = {Tian, Jinchuan and Shi, Jiatong and Chen, William and Arora, Siddhant and Masuyama, Yoshiki and Takashi, Maekaku and Wu, Yihan and Peng, Junyi and Bharadwaj, Shikhar and Zhao, Yiwen and Cornell, Samuele and Peng, Yifan and Yue, Xiang and Yang, Chao-Han H. and Neubig, Graham and Watanabe, Shinji},
    • title = {{ESPnet-SpeechLM: An Open Speech Language Model Toolkit}},
    • booktitle = {NAACL-HLT (the system demonstration track)},
    • year = 2025,
    • editor = {Dziri, N. and Ren, S. X. and Diao, S.},
    • pages = {116--124},
    • month = mar,
    • publisher = {Association for Computational Linguistics},
    • url = {https://www.merl.com/publications/TR2025-038}
    • }
  •  Saijo, K., Wichern, G., Germain, F.G., Pan, Z., Le Roux, J., "TF-Locoformer: Transformer with Local Modeling by Convolution for Speech Separation and Enhancement", International Workshop on Acoustic Signal Enhancement (IWAENC), DOI: 10.1109/​IWAENC61483.2024.10694313, September 2024, pp. 205-209.
    BibTeX TR2024-126 PDF Software
    • @inproceedings{Saijo2024sep2,
    • author = {Saijo, Kohei and Wichern, Gordon and Germain, François G and Pan, Zexu and {Le Roux}, Jonathan},
    • title = {{TF-Locoformer: Transformer with Local Modeling by Convolution for Speech Separation and Enhancement}},
    • booktitle = {International Workshop on Acoustic Signal Enhancement (IWAENC)},
    • year = 2024,
    • pages = {205--209},
    • month = sep,
    • doi = {10.1109/IWAENC61483.2024.10694313},
    • issn = {2835-3439},
    • isbn = {979-8-3503-6185-8},
    • url = {https://www.merl.com/publications/TR2024-126}
    • }
  •  Bahrman, L., Fontaine, M., Le Roux, J., Richard, G., "Speech Dereverberation Constrained on Room Impulse Response Characteristics", Interspeech, DOI: 10.21437/​Interspeech.2024-1173, September 2024, pp. 622-626.
    BibTeX TR2024-121 PDF
    • @inproceedings{Bahrman2024sep,
    • author = {Bahrman, Louis and Fontaine, Mathieu and {Le Roux}, Jonathan and Richard, Gaël},
    • title = {{Speech Dereverberation Constrained on Room Impulse Response Characteristics}},
    • booktitle = {Interspeech},
    • year = 2024,
    • pages = {622--626},
    • month = sep,
    • doi = {10.21437/Interspeech.2024-1173},
    • issn = {2958-1796},
    • url = {https://www.merl.com/publications/TR2024-121}
    • }
  •  Khurana, S., Hori, C., Laurent, A., Wichern, G., Le Roux, J., "ZeroST: Zero-Shot Speech Translation", Interspeech, DOI: 10.21437/​Interspeech.2024-1088, September 2024, pp. 392-396.
    BibTeX TR2024-122 PDF
    • @inproceedings{Khurana2024sep,
    • author = {Khurana, Sameer and Hori, Chiori and Laurent, Antoine and Wichern, Gordon and {Le Roux}, Jonathan},
    • title = {{ZeroST: Zero-Shot Speech Translation}},
    • booktitle = {Interspeech},
    • year = 2024,
    • pages = {392--396},
    • month = sep,
    • doi = {10.21437/Interspeech.2024-1088},
    • issn = {2958-1796},
    • url = {https://www.merl.com/publications/TR2024-122}
    • }
  •  Pan, Z., Wichern, G., Germain, F.G., Saijo, K., Le Roux, J., "PARIS: Pseudo-AutoRegressIve Siamese Training for Online Speech Separation", Interspeech, DOI: 10.21437/​Interspeech.2024-1066, September 2024, pp. 582-586.
    BibTeX TR2024-124 PDF
    • @inproceedings{Pan2024sep,
    • author = {Pan, Zexu and Wichern, Gordon and Germain, François G and Saijo, Kohei and {Le Roux}, Jonathan},
    • title = {{PARIS}: Pseudo-AutoRegressIve Siamese Training for Online Speech Separation},
    • booktitle = {Interspeech},
    • year = 2024,
    • pages = {582--586},
    • month = sep,
    • doi = {10.21437/Interspeech.2024-1066},
    • issn = {2958-1796},
    • url = {https://www.merl.com/publications/TR2024-124}
    • }
  •  Saijo, K., Wichern, G., Germain, F.G., Pan, Z., Le Roux, J., "Enhanced Reverberation as Supervision for Unsupervised Speech Separation", Interspeech, DOI: 10.21437/​Interspeech.2024-1241, September 2024, pp. 607-611.
    BibTeX TR2024-116 PDF Software
    • @inproceedings{Saijo2024sep,
    • author = {Saijo, Kohei and Wichern, Gordon and Germain, François G and Pan, Zexu and {Le Roux}, Jonathan},
    • title = {{Enhanced Reverberation as Supervision for Unsupervised Speech Separation}},
    • booktitle = {Interspeech},
    • year = 2024,
    • pages = {607--611},
    • month = sep,
    • doi = {10.21437/Interspeech.2024-1241},
    • issn = {2958-1796},
    • url = {https://www.merl.com/publications/TR2024-116}
    • }
  •  Mitsui, Y., Aihara, R., Hori, T., Le Roux, J., Taguchi, S., "Exploring Keyword Enrollment for Japanese End-to-End Automatic Speech Recognition using Contextual Biasing", OTOGAKU Symposium, June 2024.
    BibTeX TR2024-073 PDF
    • @inproceedings{Mitsui2024jun,
    • author = {Mitsui, Yoshiki and Aihara, Ryo and Hori, Takaaki and {Le Roux}, Jonathan and Taguchi, Shinya},
    • title = {{Exploring Keyword Enrollment for Japanese End-to-End Automatic Speech Recognition using Contextual Biasing}},
    • booktitle = {OTOGAKU Symposium},
    • year = 2024,
    • month = jun,
    • publisher = {Information Processing Society of Japan},
    • issn = {2188-8663},
    • url = {https://www.merl.com/publications/TR2024-073}
    • }
  •  Kambara, M., Hori, C., Sugiura, K., Ota, K., Jha, D.K., Khurana, S., Jain, S., Corcodel, R., Romeres, D., Le Roux, J., "Human Action Understanding-based Robot Planning using Multimodal LLM", IEEE International Conference on Robotics and Automation (ICRA), June 2024.
    BibTeX TR2024-066 PDF
    • @inproceedings{Kambara2024jun,
    • author = {Kambara, Motonari and Hori, Chiori and Sugiura, Komei and Ota, Kei and Jha, Devesh K. and Khurana, Sameer and Jain, Siddarth and Corcodel, Radu and Romeres, Diego and {Le Roux}, Jonathan},
    • title = {{Human Action Understanding-based Robot Planning using Multimodal LLM}},
    • booktitle = {IEEE International Conference on Robotics and Automation (ICRA) Workshop},
    • year = 2024,
    • month = jun,
    • url = {https://www.merl.com/publications/TR2024-066}
    • }
  •  Pan, Z., Wichern, G., Germain, F.G., Subramanian, A., Le Roux, J., "Late Audio-Visual Fusion for In-The-Wild Speaker Diarization", Hands-free Speech Communication and Microphone Arrays (HSCMA), DOI: 10.1109/​ICASSPW62465.2024.10626914, April 2024, pp. 174-178.
    BibTeX TR2024-029 PDF
    • @inproceedings{Pan2024apr,
    • author = {Pan, Zexu and Wichern, Gordon and Germain, François G and Subramanian, Aswin and {Le Roux}, Jonathan},
    • title = {{Late Audio-Visual Fusion for In-The-Wild Speaker Diarization}},
    • booktitle = {Hands-free Speech Communication and Microphone Arrays (HSCMA)},
    • year = 2024,
    • pages = {174--178},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSPW62465.2024.10626914},
    • isbn = {979-8-3503-7451-3},
    • url = {https://www.merl.com/publications/TR2024-029}
    • }
  •  Fujihashi, T., Kato, S., Koike-Akino, T., "Implicit Neural Representation for Low-Overhead Graph-Based Holographic-Type Communications", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP48485.2024.10445857, April 2024.
    BibTeX TR2024-022 PDF
    • @inproceedings{Fujihashi2024apr,
    • author = {Fujihashi, Takuya and Kato, Sorachi and Koike-Akino, Toshiaki},
    • title = {{Implicit Neural Representation for Low-Overhead Graph-Based Holographic-Type Communications}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2024,
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP48485.2024.10445857},
    • issn = {2379-190X},
    • isbn = {979-8-3503-4485-1},
    • url = {https://www.merl.com/publications/TR2024-022}
    • }
  •  Koo, J., Wichern, G., Germain, F.G., Khurana, S., Le Roux, J., "Understanding and Controlling Generative Music Transformers by Probing Individual Attention Heads", IEEE ICASSP Satellite Workshop on Explainable Machine Learning for Speech and Audio (XAI-SA), April 2024.
    BibTeX TR2024-032 PDF
    • @inproceedings{Koo2024apr,
    • author = {Koo, Junghyun and Wichern, Gordon and Germain, François G and Khurana, Sameer and {Le Roux}, Jonathan},
    • title = {{Understanding and Controlling Generative Music Transformers by Probing Individual Attention Heads}},
    • booktitle = {IEEE ICASSP Satellite Workshop on Explainable Machine Learning for Speech and Audio (XAI-SA)},
    • year = 2024,
    • month = apr,
    • url = {https://www.merl.com/publications/TR2024-032}
    • }
  •  Jeon, C.-B., Wichern, G., Germain, F.G., Le Roux, J., "Why does music source separation benefit from cacophony?", IEEE ICASSP Satellite Workshop on Explainable Machine Learning for Speech and Audio (XAI-SA), DOI: 10.1109/​ICASSPW62465.2024.10669899, March 2024, pp. 873-877.
    BibTeX TR2024-030 PDF Video
    • @inproceedings{Jeon2024mar,
    • author = {Jeon, Chang-Bin and Wichern, Gordon and Germain, François G and {Le Roux}, Jonathan},
    • title = {{Why does music source separation benefit from cacophony?}},
    • booktitle = {IEEE ICASSP Satellite Workshop on Explainable Machine Learning for Speech and Audio (XAI-SA)},
    • year = 2024,
    • pages = {873--877},
    • month = mar,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSPW62465.2024.10669899},
    • isbn = {979-8-3503-7451-3},
    • url = {https://www.merl.com/publications/TR2024-030}
    • }
  •  Bralios, D., Wichern, G., Germain, F.G., Pan, Z., Khurana, S., Hori, C., Le Roux, J., "Generation or Replication: Auscultating Audio Latent Diffusion Models", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP48485.2024.10447705, March 2024, pp. 1156-1160.
    BibTeX TR2024-027 PDF
    • @inproceedings{Bralios2024mar,
    • author = {Bralios, Dimitrios and Wichern, Gordon and Germain, François G and Pan, Zexu and Khurana, Sameer and Hori, Chiori and {Le Roux}, Jonathan},
    • title = {{Generation or Replication: Auscultating Audio Latent Diffusion Models}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2024,
    • pages = {1156--1160},
    • month = mar,
    • doi = {10.1109/ICASSP48485.2024.10447705},
    • url = {https://www.merl.com/publications/TR2024-027}
    • }
  •  Fernandez-Menduina, S., Rapp, J., Mansour, H., Greiff, M., Parsons, K., "Tracking Beyond the Unambiguous Range with Modulo Single-Photon Lidar", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP48485.2024.10446835, March 2024, pp. 6-10.
    BibTeX TR2024-021 PDF
    • @inproceedings{Fernandez-Menduina2024mar,
    • author = {Fernandez-Menduina, Samuel and Rapp, Joshua and Mansour, Hassan and Greiff, Marcus and Parsons, Kieran},
    • title = {{Tracking Beyond the Unambiguous Range with Modulo Single-Photon Lidar}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2024,
    • pages = {6--10},
    • month = mar,
    • doi = {10.1109/ICASSP48485.2024.10446835},
    • url = {https://www.merl.com/publications/TR2024-021}
    • }
  •  Masuyama, Y., Wichern, G., Germain, F.G., Pan, Z., Khurana, S., Hori, C., Le Roux, J., "NIIRF: Neural IIR Filter Field for HRTF Upsampling and Personalization", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP48485.2024.10448477, March 2024, pp. 1016-1020.
    BibTeX TR2024-026 PDF Software
    • @inproceedings{Masuyama2024mar,
    • author = {Masuyama, Yoshiki and Wichern, Gordon and Germain, François G and Pan, Zexu and Khurana, Sameer and Hori, Chiori and {Le Roux}, Jonathan},
    • title = {{NIIRF: Neural IIR Filter Field for HRTF Upsampling and Personalization}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2024,
    • pages = {1016--1020},
    • month = mar,
    • doi = {10.1109/ICASSP48485.2024.10448477},
    • url = {https://www.merl.com/publications/TR2024-026}
    • }
  •  Pan, Z., Wichern, G., Germain, F.G., Khurana, S., Le Roux, J., "NeuroHeed+: Improving Neuro-steered Speaker Extraction with Joint Auditory Attention Detection", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP48485.2024.10446333, March 2024, pp. 11456-11460.
    BibTeX TR2024-025 PDF
    • @inproceedings{Pan2024mar,
    • author = {Pan, Zexu and Wichern, Gordon and Germain, François G and Khurana, Sameer and {Le Roux}, Jonathan},
    • title = {{NeuroHeed+: Improving Neuro-steered Speaker Extraction with Joint Auditory Attention Detection}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2024,
    • pages = {11456--11460},
    • month = mar,
    • doi = {10.1109/ICASSP48485.2024.10446333},
    • url = {https://www.merl.com/publications/TR2024-025}
    • }
  •  Sholokhov, A., Rapp, J., Nabi, S., Brunton, S., Kutz, N., Mansour, H., "Single-pixel imaging of dynamic flows using Neural ODE regularization", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP48485.2024.10447584, March 2024, pp. 2530-2534.
    BibTeX TR2024-024 PDF
    • @inproceedings{Sholokhov2024mar,
    • author = {Sholokhov, Aleksei and Rapp, Joshua and Nabi, Saleh and Brunton, Steven and Kutz, Nathan and Mansour, Hassan},
    • title = {{Single-pixel imaging of dynamic flows using Neural ODE regularization}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2024,
    • pages = {2530--2534},
    • month = mar,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP48485.2024.10447584},
    • url = {https://www.merl.com/publications/TR2024-024}
    • }
  •  Wu, S.-L., Chang, X., Wichern, G., Jung, J.-W., Germain, F.G., Le Roux, J., Watanabe, S., "Improving Audio Captioning Models with Fine-grained Audio Features, Text Embedding Supervision, and LLM Mix-up Augmentation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP48485.2024.10447215, March 2024, pp. 316-320.
    BibTeX TR2024-028 PDF
    • @inproceedings{Wu2024mar,
    • author = {Wu, Shih-Lun and Chang, Xuankai and Wichern, Gordon and Jung, Jee-weon and Germain, François G and {Le Roux}, Jonathan and Watanabe, Shinji},
    • title = {{Improving Audio Captioning Models with Fine-grained Audio Features, Text Embedding Supervision, and LLM Mix-up Augmentation}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2024,
    • pages = {316--320},
    • month = mar,
    • doi = {10.1109/ICASSP48485.2024.10447215},
    • url = {https://www.merl.com/publications/TR2024-028}
    • }
  •  Kato, S., Wang, P., Koike-Akino, T., Fujihashi, T., Mansour, H., Boufounos, P.T., "Object Trajectory Estimation with Multi-Band Wi-Fi Neural Dynamic Fusion", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP48485.2024.10445972, March 2024, pp. 13261-13265.
    BibTeX TR2024-019 PDF
    • @inproceedings{Kato2024mar,
    • author = {Kato, Sorachi and Wang, Pu and Koike-Akino, Toshiaki and Fujihashi, Takuya and Mansour, Hassan and Boufounos, Petros T.},
    • title = {{Object Trajectory Estimation with Multi-Band Wi-Fi Neural Dynamic Fusion}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2024,
    • pages = {13261--13265},
    • month = mar,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP48485.2024.10445972},
    • issn = {2379-190X},
    • isbn = {979-8-3503-4485-1},
    • url = {https://www.merl.com/publications/TR2024-019}
    • }
  •  Wang, P., Boufounos, P.T., "Monostatic DMG Passive Sensing with Hypothesis Testing", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP48485.2024.10447134, March 2024, pp. 13381-13385.
    BibTeX TR2024-020 PDF
    • @inproceedings{Wang2024mar,
    • author = {Wang, Pu and Boufounos, Petros T.},
    • title = {{Monostatic DMG Passive Sensing with Hypothesis Testing}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2024,
    • pages = {13381--13385},
    • month = mar,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP48485.2024.10447134},
    • issn = {2379-190X},
    • isbn = {979-8-3503-4485-1},
    • url = {https://www.merl.com/publications/TR2024-020}
    • }
  •  Yataka, R., Wang, P., Boufounos, P.T., Takahashi, R., "Radar Perception with Scalable Connective Temporal Relations for Autonomous Driving", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP48485.2024.10446449, March 2024, pp. 13266-13270.
    BibTeX TR2024-023 PDF
    • @inproceedings{Yataka2024mar,
    • author = {Yataka, Ryoma and Wang, Pu and Boufounos, Petros T. and Takahashi, Ryuhei},
    • title = {{Radar Perception with Scalable Connective Temporal Relations for Autonomous Driving}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2024,
    • pages = {13266--13270},
    • month = mar,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP48485.2024.10446449},
    • issn = {2379-190X},
    • isbn = {979-8-3503-4485-1},
    • url = {https://www.merl.com/publications/TR2024-023}
    • }
  •  Baoueb, T., Liu, H., Fontaine, M., Le Roux, J., Richard, G., "SpecDiff-GAN: A Spectrally-Shaped Noise Diffusion GAN for Speech and Music Synthesis", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP48485.2024.10446830, March 2024, pp. 986-990.
    BibTeX TR2024-013 PDF
    • @inproceedings{Baoueb2024mar,
    • author = {Baoueb, Teysir and Liu, Haocheng and Fontaine, Mathieu and {Le Roux}, Jonathan and Richard, Gaël},
    • title = {{SpecDiff-GAN: A Spectrally-Shaped Noise Diffusion GAN for Speech and Music Synthesis}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2024,
    • pages = {986--990},
    • month = mar,
    • doi = {10.1109/ICASSP48485.2024.10446830},
    • issn = {2379-190X},
    • isbn = {979-8-3503-4485-1},
    • url = {https://www.merl.com/publications/TR2024-013}
    • }
  •  Hori, C., Wang, P., Rahman, M., Vaca-Rubio, C., Khurana, S., Cherian, A., Le Roux, J., "Wi-Fi based Indoor Monitoring Enhanced by Multimodal Fusion", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP48485.2024.10447600, March 2024, pp. 13296-13300.
    BibTeX TR2024-012 PDF
    • @inproceedings{Hori2024mar,
    • author = {Hori, Chiori and Wang, Pu and Rahman, Mahbub and Vaca-Rubio, Cristian and Khurana, Sameer and Cherian, Anoop and {Le Roux}, Jonathan},
    • title = {{Wi-Fi based Indoor Monitoring Enhanced by Multimodal Fusion}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2024,
    • pages = {13296--13300},
    • month = mar,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP48485.2024.10447600},
    • issn = {2379-190X},
    • isbn = {979-8-3503-4485-1},
    • url = {https://www.merl.com/publications/TR2024-012}
    • }
  •  Liu, H., Baoueb, T., Fontaine, M., Le Roux, J., Richard, G., "GLA-Grad: A Griffin-Lim Extended Waveform Generation Diffusion Model", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP48485.2024.10446058, March 2024, pp. 11611-11615.
    BibTeX TR2024-014 PDF
    • @inproceedings{Liu2024mar,
    • author = {Liu, Haocheng and Baoueb, Teysir and Fontaine, Mathieu and {Le Roux}, Jonathan and Richard, Gaël},
    • title = {{GLA-Grad: A Griffin-Lim Extended Waveform Generation Diffusion Model}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2024,
    • pages = {11611--11615},
    • month = mar,
    • doi = {10.1109/ICASSP48485.2024.10446058},
    • issn = {2379-190X},
    • isbn = {979-8-3503-4485-1},
    • url = {https://www.merl.com/publications/TR2024-014}
    • }
  •  Boeddeker, C., Subramanian, A.S., Wichern, G., Haeb-Umbach, R., Le Roux, J., "TS-SEP: Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/​TASLP.2024.3350887, Vol. 32, pp. 1185-1197, February 2024.
    BibTeX TR2024-006 PDF Software
    • @article{Boeddeker2024feb,
    • author = {Boeddeker, Christoph and Subramanian, Aswin Shanmugam and Wichern, Gordon and Haeb-Umbach, Reinhold and {Le Roux}, Jonathan},
    • title = {{TS-SEP: Joint Diarization and Separation Conditioned on Estimated Speaker Embeddings}},
    • journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
    • year = 2024,
    • volume = 32,
    • pages = {1185--1197},
    • month = feb,
    • doi = {10.1109/TASLP.2024.3350887},
    • issn = {2329-9304},
    • url = {https://www.merl.com/publications/TR2024-006}
    • }