Publications

11 / 3,736 publications found.


  •  Saijo, K., Ebbers, J., Germain, F.G., Khurana, S., Wichern, G., Le Roux, J., "Leveraging Audio-Only Data for Text-Queried Target Sound Extraction", arXiv, September 2024.
    BibTeX arXiv
    • @article{Saijo2024sep3,
    • author = {{Saijo, Kohei and Ebbers, Janek and Germain, François G and Khurana, Sameer and Wichern, Gordon and Le Roux, Jonathan}},
    • title = {Leveraging Audio-Only Data for Text-Queried Target Sound Extraction},
    • journal = {arXiv},
    • year = 2024,
    • month = sep,
    • url = {https://arxiv.org/abs/2409.13152v1}
    • }
  •  Khurana, S., Hori, C., Laurent, A., Wichern, G., Le Roux, J., "ZeroST: Zero-Shot Speech Translation", Interspeech, DOI: 10.21437/​Interspeech.2024-1088, September 2024, pp. 392-396.
    BibTeX TR2024-122 PDF
    • @inproceedings{Khurana2024sep,
    • author = {Khurana, Sameer and Hori, Chiori and Laurent, Antoine and Wichern, Gordon and Le Roux, Jonathan}},
    • title = {ZeroST: Zero-Shot Speech Translation},
    • booktitle = {Interspeech},
    • year = 2024,
    • pages = {392--396},
    • month = sep,
    • doi = {10.21437/Interspeech.2024-1088},
    • issn = {2958-1796},
    • url = {https://www.merl.com/publications/TR2024-122}
    • }
  •  Kambara, M., Hori, C., Sugiura, K., Ota, K., Jha, D.K., Khurana, S., Jain, S., Corcodel, R., Romeres, D., Le Roux, J., "Human Action Understanding-based Robot Planning using Multimodal LLM", IEEE International Conference on Robotics and Automation (ICRA), June 2024.
    BibTeX TR2024-066 PDF
    • @inproceedings{Kambara2024jun,
    • author = {Kambara, Motonari and Hori, Chiori and Sugiura, Komei and Ota, Kei and Jha, Devesh K. and Khurana, Sameer and Jain, Siddarth and Corcodel, Radu and Romeres, Diego and Le Roux, Jonathan}},
    • title = {Human Action Understanding-based Robot Planning using Multimodal LLM},
    • booktitle = {IEEE International Conference on Robotics and Automation (ICRA) Workshop},
    • year = 2024,
    • month = jun,
    • url = {https://www.merl.com/publications/TR2024-066}
    • }
  •  Koo, J., Wichern, G., Germain, F.G., Khurana, S., Le Roux, J., "SMITIN: Self-Monitored Inference-Time INtervention for Generative Music Transformers", arXiv, April 2024.
    BibTeX arXiv
    • @article{Koo2024apr2,
    • author = {Koo, Junghyun and Wichern, Gordon and Germain, François G and Khurana, Sameer and Le Roux, Jonathan},
    • title = {SMITIN: Self-Monitored Inference-Time INtervention for Generative Music Transformers},
    • journal = {arXiv},
    • year = 2024,
    • month = apr,
    • url = {https://arxiv.org/abs/2404.02252}
    • }
  •  Koo, J., Wichern, G., Germain, F.G., Khurana, S., Le Roux, J., "Understanding and Controlling Generative Music Transformers by Probing Individual Attention Heads", IEEE ICASSP Satellite Workshop on Explainable Machine Learning for Speech and Audio (XAI-SA), April 2024.
    BibTeX TR2024-032 PDF
    • @inproceedings{Koo2024apr,
    • author = {Koo, Junghyun and Wichern, Gordon and Germain, François G and Khurana, Sameer and Le Roux, Jonathan},
    • title = {Understanding and Controlling Generative Music Transformers by Probing Individual Attention Heads},
    • booktitle = {IEEE ICASSP Satellite Workshop on Explainable Machine Learning for Speech and Audio (XAI-SA)},
    • year = 2024,
    • month = apr,
    • url = {https://www.merl.com/publications/TR2024-032}
    • }
  •  Bralios, D., Wichern, G., Germain, F.G., Pan, Z., Khurana, S., Hori, C., Le Roux, J., "Generation or Replication: Auscultating Audio Latent Diffusion Models", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP48485.2024.10447705, March 2024, pp. 1156-1160.
    BibTeX TR2024-027 PDF
    • @inproceedings{Bralios2024mar,
    • author = {Bralios, Dimitrios and Wichern, Gordon and Germain, François G and Pan, Zexu and Khurana, Sameer and Hori, Chiori and Le Roux, Jonathan},
    • title = {Generation or Replication: Auscultating Audio Latent Diffusion Models},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2024,
    • pages = {1156--1160},
    • month = mar,
    • doi = {10.1109/ICASSP48485.2024.10447705},
    • url = {https://www.merl.com/publications/TR2024-027}
    • }
  •  Masuyama, Y., Wichern, G., Germain, F.G., Pan, Z., Khurana, S., Hori, C., Le Roux, J., "NIIRF: Neural IIR Filter Field for HRTF Upsampling and Personalization", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP48485.2024.10448477, March 2024, pp. 1016-1020.
    BibTeX TR2024-026 PDF Software
    • @inproceedings{Masuyama2024mar,
    • author = {Masuyama, Yoshiki and Wichern, Gordon and Germain, François G and Pan, Zexu and Khurana, Sameer and Hori, Chiori and Le Roux, Jonathan},
    • title = {NIIRF: Neural IIR Filter Field for HRTF Upsampling and Personalization},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2024,
    • pages = {1016--1020},
    • month = mar,
    • doi = {10.1109/ICASSP48485.2024.10448477},
    • url = {https://www.merl.com/publications/TR2024-026}
    • }
  •  Pan, Z., Wichern, G., Germain, F.G., Khurana, S., Le Roux, J., "NeuroHeed+: Improving Neuro-steered Speaker Extraction with Joint Auditory Attention Detection", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP48485.2024.10446333, March 2024, pp. 11456-11460.
    BibTeX TR2024-025 PDF
    • @inproceedings{Pan2024mar,
    • author = {Pan, Zexu and Wichern, Gordon and Germain, François G and Khurana, Sameer and Le Roux, Jonathan},
    • title = {NeuroHeed+: Improving Neuro-steered Speaker Extraction with Joint Auditory Attention Detection},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2024,
    • pages = {11456--11460},
    • month = mar,
    • doi = {10.1109/ICASSP48485.2024.10446333},
    • url = {https://www.merl.com/publications/TR2024-025}
    • }
  •  Hori, C., Wang, P., Rahman, M., Vaca-Rubio, C., Khurana, S., Cherian, A., Le Roux, J., "Wi-Fi based Indoor Monitoring Enhanced by Multimodal Fusion", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP48485.2024.10447600, March 2024, pp. 13296-13300.
    BibTeX TR2024-012 PDF
    • @inproceedings{Hori2024mar,
    • author = {Hori, Chiori and Wang, Pu and Rahman, Mahbub and Vaca-Rubio, Cristian and Khurana, Sameer and Cherian, Anoop and Le Roux, Jonathan},
    • title = {Wi-Fi based Indoor Monitoring Enhanced by Multimodal Fusion},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2024,
    • pages = {13296--13300},
    • month = mar,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP48485.2024.10447600},
    • issn = {2379-190X},
    • isbn = {979-8-3503-4485-1},
    • url = {https://www.merl.com/publications/TR2024-012}
    • }
  •  Pan, Z., Wichern, G., Masuyama, Y., Germain, F.G., Khurana, S., Hori, C., Le Roux, J., "Scenario-Aware Audio-Visual TF-GridNet for Target Speech Extraction", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/​ASRU57964.2023.10389618, December 2023.
    BibTeX TR2023-152 PDF Video
    • @inproceedings{Pan2023dec2,
    • author = {Pan, Zexu and Wichern, Gordon and Masuyama, Yoshiki and Germain, François G and Khurana, Sameer and Hori, Chiori and Le Roux, Jonathan},
    • title = {Scenario-Aware Audio-Visual TF-GridNet for Target Speech Extraction},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2023,
    • month = dec,
    • doi = {10.1109/ASRU57964.2023.10389618},
    • isbn = {979-8-3503-0689-7},
    • url = {https://www.merl.com/publications/TR2023-152}
    • }
  •  Khurana, S., Moritz, N., Hori, T., Le Roux, J., "Unsupervised Domain Adaptation For Speech Recognition via Uncertainty Driven Self-Training", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP39728.2021.9414299, June 2021, pp. 6553-6557.
    BibTeX TR2021-039 PDF
    • @inproceedings{Khurana2021jun,
    • author = {Khurana, Sameer and Moritz, Niko and Hori, Takaaki and Le Roux, Jonathan},
    • title = {Unsupervised Domain Adaptation For Speech Recognition via Uncertainty Driven Self-Training},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2021,
    • pages = {6553--6557},
    • month = jun,
    • doi = {10.1109/ICASSP39728.2021.9414299},
    • url = {https://www.merl.com/publications/TR2021-039}
    • }