- Yang, C.-A., Peng, K.-C., Yeh, R., "Toward Long-Tailed Online Anomaly Detection through Class-Agnostic Concepts", IEEE International Conference on Computer Vision (ICCV), October 2025.
BibTeX TR2025-124 PDF Data- @inproceedings{Yang2025oct,
- author = {Yang, Chiao-An and Peng, Kuan-Chuan and Yeh, Raymond},
- title = {{Toward Long-Tailed Online Anomaly Detection through Class-Agnostic Concepts}},
- booktitle = {IEEE International Conference on Computer Vision (ICCV)},
- year = 2025,
- month = oct,
- url = {https://www.merl.com/publications/TR2025-124}
- }
- Hussein, A., Khurana, S., Wichern, G., Germain, F.G., Le Roux, J., "HASRD: Hierarchical Acoustic and Semantic Representation Disentanglement", Interspeech, August 2025.
BibTeX TR2025-122 PDF- @inproceedings{Hussein2025aug,
- author = {Hussein, Amir and Khurana, Sameer and Wichern, Gordon and Germain, François G and {Le Roux}, Jonathan},
- title = {{HASRD: Hierarchical Acoustic and Semantic Representation Disentanglement}},
- booktitle = {Interspeech},
- year = 2025,
- month = aug,
- url = {https://www.merl.com/publications/TR2025-122}
- }
- Ick, C., Wichern, G., Masuyama, Y., Germain, F.G., Le Roux, J., "Direction-Aware Neural Acoustic Fields for Few-Shot Interpolation of Ambisonic Impulse Responses", Interspeech, DOI: 10.21437/Interspeech.2025-1912, August 2025, pp. 933-937.
BibTeX TR2025-120 PDF- @inproceedings{Ick2025aug,
- author = {Ick, Christopher and Wichern, Gordon and Masuyama, Yoshiki and Germain, François G and {Le Roux}, Jonathan},
- title = {{Direction-Aware Neural Acoustic Fields for Few-Shot Interpolation of Ambisonic Impulse Responses}},
- booktitle = {Interspeech},
- year = 2025,
- pages = {933--937},
- month = aug,
- doi = {10.21437/Interspeech.2025-1912},
- url = {https://www.merl.com/publications/TR2025-120}
- }
- Khurana, S., Klement, D., Laurent, A., Bobos, D., Novosad, J., Gazdik, P., Zhang, E., Huang, Z., Hussein, A., Marxer, R., Masuyama, Y., Aihara, R., Hori, C., Germain, F.G., Wichern, G., Le Roux, J., "Factorized RVQ-GAN For Disentangled Speech Tokenization", Interspeech, August 2025.
BibTeX TR2025-123 PDF- @inproceedings{Khurana2025aug,
- author = {Khurana, Sameer and Klement, Dominik and Laurent, Antoine and Bobos, Dominik and Novosad, Juraj and Gazdik, Peter and Zhang, Ellen and Huang, Zilli and Hussein, Amir and Marxer, Ricard and Masuyama, Yoshiki and Aihara, Ryo and Hori, Chiori and Germain, François G and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{Factorized RVQ-GAN For Disentangled Speech Tokenization}},
- booktitle = {Interspeech},
- year = 2025,
- month = aug,
- url = {https://www.merl.com/publications/TR2025-123}
- }
- Yang, H., Wichern, G., Aihara, R., Masuyama, Y., Khurana, S., Germain, F.G., Le Roux, J., "Investigating Continuous Autoregressive Generative Speech Enhancement", Interspeech, August 2025.
BibTeX TR2025-119 PDF- @inproceedings{Yang2025aug,
- author = {Yang, Haici and Wichern, Gordon and Aihara, Ryo and Masuyama, Yoshiki and Khurana, Sameer and Germain, François G and {Le Roux}, Jonathan},
- title = {{Investigating Continuous Autoregressive Generative Speech Enhancement}},
- booktitle = {Interspeech},
- year = 2025,
- month = aug,
- url = {https://www.merl.com/publications/TR2025-119}
- }
- Steinmetz, C., Uhle, C., Everardo, F., Mitcheltree, C., McElveen, J.K., Jot, J.-M., Wichern, G., "Audio Signal Processing in the Artificial Intelligence Era: Challenges and Directions", Journal of the Audio Engineering Society, August 2025.
BibTeX TR2025-116 PDF- @article{Steinmetz2025aug,
- author = {Steinmetz, Christian and Uhle, Christian and Everardo, Flavio and Mitcheltree, Christopher and McElveen, J. Keith and Jot, Jean-Marc and Wichern, Gordon},
- title = {{Audio Signal Processing in the Artificial Intelligence Era: Challenges and Directions}},
- journal = {Journal of the Audio Engineering Society},
- year = 2025,
- month = aug,
- url = {https://www.merl.com/publications/TR2025-116}
- }
- Lewis, A., White, M., Liu, J., Koike-Akino, T., Parsons, K., Wang, Y., "Winning Big with Small Models: Knowledge Distillation vs. Self-Training for Reducing Hallucination in Product QA Agents", ACL 2025 workshop on Generation, Evaluation & Metrics (GEM), July 2025.
BibTeX TR2025-114 PDF- @inproceedings{Lewis2025jul2,
- author = {Lewis, Ashley and White, Michael and Liu, Jing and Koike-Akino, Toshiaki and Parsons, Kieran and Wang, Ye},
- title = {{Winning Big with Small Models: Knowledge Distillation vs. Self-Training for Reducing Hallucination in Product QA Agents}},
- booktitle = {ACL 2025 workshop on Generation, Evaluation \& Metrics (GEM)},
- year = 2025,
- month = jul,
- url = {https://www.merl.com/publications/TR2025-114}
- }
- Almudévar, A., Hernández-Lobato, J.M., Khurana, S., Marxer, R., Ortega, A., "Aligning Multimodal Representations through an Information Bottleneck", International Conference on Machine Learning (ICML), July 2025.
BibTeX TR2025-109 PDF- @inproceedings{Almudévar2025jul,
- author = {Almudévar, Antonio and Hernández-Lobato, José, M and Khurana, Sameer and Marxer, Ricard and Ortega, Alfonso},
- title = {{Aligning Multimodal Representations through an Information Bottleneck}},
- booktitle = {International Conference on Machine Learning (ICML)},
- year = 2025,
- month = jul,
- url = {https://www.merl.com/publications/TR2025-109}
- }
- Liu, J., Koike-Akino, T., Wang, Y., Mansour, H., Brand, M., "AWP: Activation-Aware Weight Pruning and Quantization with Projected Gradient Descent", International Conference on Machine Learning (ICML) workshop, July 2025.
BibTeX TR2025-111 PDF- @inproceedings{Liu2025jul,
- author = {Liu, Jing and Koike-Akino, Toshiaki and Wang, Ye and Mansour, Hassan and Brand, Matthew},
- title = {{AWP: Activation-Aware Weight Pruning and Quantization with Projected Gradient Descent}},
- booktitle = {International Conference on Machine Learning (ICML) workshop},
- year = 2025,
- month = jul,
- url = {https://www.merl.com/publications/TR2025-111}
- }
- Koike-Akino, T., Liu, J., Wang, Y., "u-MoE: Test-Time Pruning as Micro-Grained Mixture-of-Experts", International Conference on Machine Learning (ICML) Workshop, July 2025.
BibTeX TR2025-112 PDF- @inproceedings{Koike-Akino2025jul,
- author = {Koike-Akino, Toshiaki and Liu, Jing and Wang, Ye},
- title = {{u-MoE: Test-Time Pruning as Micro-Grained Mixture-of-Experts}},
- booktitle = {International Conference on Machine Learning (ICML) Workshop},
- year = 2025,
- month = jul,
- url = {https://www.merl.com/publications/TR2025-112}
- }
- Wang, R., Wang, Y., Liu, J., Koike-Akino, T., "Quantum Diffusion Models for Few-Shot Learning", ICAD, DOI: 10.1109/ICAD65464.2025.11114033, June 2025.
BibTeX TR2025-095 PDF- @inproceedings{Wang2025jun2,
- author = {Wang, Ruhan and Wang, Ye and Liu, Jing and Koike-Akino, Toshiaki},
- title = {{Quantum Diffusion Models for Few-Shot Learning}},
- booktitle = {ICAD},
- year = 2025,
- month = jun,
- publisher = {IEEE},
- doi = {10.1109/ICAD65464.2025.11114033},
- isbn = {979-8-3315-2472-2},
- url = {https://www.merl.com/publications/TR2025-095}
- }
- Masuyama, Y., "Single- and Multi-Channel Speech Enhancement and Separation for Far-Field Conversation Recognition," Tech. Rep. TR2025-097, Jelinek Summer Workshop on Speech and Language Technology (JSALT), June 2025.
BibTeX TR2025-097 PDF- @techreport{Masuyama2025jun,
- author = {{{Masuyama, Yoshiki}}},
- title = {{{Single- and Multi-Channel Speech Enhancement and Separation for Far-Field Conversation Recognition}}},
- institution = {Jelinek Summer Workshop on Speech and Language Technology (JSALT)},
- year = 2025,
- month = jun,
- url = {https://www.merl.com/publications/TR2025-097}
- }
- Jung, Y.G., Park, J., Yoon, J., Peng, K.-C., Kim, W., Teoh, A.B.J., Camps, O., "TailedCore: Few-Shot Sampling for Unsupervised Long-Tail Noisy Anomaly Detection", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Isola, P. and Kjellström, H. and Lepetit, V. and Li, F. and Su, H. and Tang, S., Eds., DOI: 10.1109/CVPR52734.2025.02378, June 2025, pp. 25539-25548.
BibTeX TR2025-077 PDF Video Presentation- @inproceedings{Jung2025jun,
- author = {{{Jung, Yoon G. and Park, Jaewoo and Yoon, Jaeho and Peng, Kuan-Chuan and Kim, Wonchul and Teoh, Andrew B. J. and Camps, Octavia}}},
- title = {{{TailedCore: Few-Shot Sampling for Unsupervised Long-Tail Noisy Anomaly Detection}}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2025,
- editor = {Isola, P. and Kjellström, H. and Lepetit, V. and Li, F. and Su, H. and Tang, S.},
- pages = {25539--25548},
- month = jun,
- publisher = {IEEE},
- doi = {10.1109/CVPR52734.2025.02378},
- issn = {2575-7075},
- isbn = {979-8-3315-4364-8},
- url = {https://www.merl.com/publications/TR2025-077}
- }
- Lai, Y.-H., Ebbers, J., Wang, Y.-C.F., Germain, F.G., Jones, M.J., Chatterjee, M., "UWAV: Uncertainty-weighted Weakly-supervised Audio-Visual Video Parsing", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2025.
BibTeX TR2025-072 PDF- @inproceedings{Lai2025jun,
- author = {Lai, Yung-Hsuan and Ebbers, Janek and Wang, Yu-Chiang Frank and Germain, François G and Jones, Michael J. and Chatterjee, Moitreya},
- title = {{UWAV: Uncertainty-weighted Weakly-supervised Audio-Visual Video Parsing}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2025,
- month = jun,
- url = {https://www.merl.com/publications/TR2025-072}
- }
- Hegde, D., Lohit, S., Peng, K.-C., Jones, M.J., Patel, V.M., "Multimodal 3D Object Detection on Unseen Domains", IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshop, June 2025, pp. 2499-2509.
BibTeX TR2025-078 PDF- @inproceedings{Hegde2025jun,
- author = {Hegde, Deepti and Lohit, Suhas and Peng, Kuan-Chuan and Jones, Michael J. and Patel, Vishal M.},
- title = {{Multimodal 3D Object Detection on Unseen Domains}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshop},
- year = 2025,
- pages = {2499--2509},
- month = jun,
- url = {https://www.merl.com/publications/TR2025-078}
- }
- Koike-Akino, T., Chen, X., Liu, J., Wang, Y., Wang, P., Brand, M., "LatentLLM: Attention-Aware Joint Tensor Compression", IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshop, June 2025.
BibTeX TR2025-075 PDF- @inproceedings{Koike-Akino2025jun,
- author = {Koike-Akino, Toshiaki and Chen, Xiangyu and Liu, Jing and Wang, Ye and Wang, Pu and Brand, Matthew},
- title = {{LatentLLM: Attention-Aware Joint Tensor Compression}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshop},
- year = 2025,
- month = jun,
- url = {https://www.merl.com/publications/TR2025-075}
- }
- Li, K., Zhang, T., Peng, K.-C., Wang, G., "PF3Det: A Prompted Foundation Feature Assisted Visual LiDAR 3D Detector", IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshop, June 2025, pp. 3787-3796.
BibTeX TR2025-076 PDF Presentation- @inproceedings{Li2025jun,
- author = {{{Li, Kaidong and Zhang, Tianxiao and Peng, Kuan-Chuan and Wang, Guanghui}}},
- title = {{{PF3Det: A Prompted Foundation Feature Assisted Visual LiDAR 3D Detector}}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshop},
- year = 2025,
- pages = {3787--3796},
- month = jun,
- url = {https://www.merl.com/publications/TR2025-076}
- }
- Chen, X., Liu, J., Wang, Y., Brand, M., Wang, P., Koike-Akino, T., "TuneComp: Joint Fine-Tuning and Compression for Large Foundation Models", IEEE Conference on Computer Vision and Pattern Recognition (CVPR) workshop on Efficient and On-Device Generation, June 2025.
BibTeX TR2025-079 PDF- @inproceedings{Chen2025jun,
- author = {Chen, Xiangyu and Liu, Jing and Wang, Ye and Brand, Matthew and Wang, Pu and Koike-Akino, Toshiaki},
- title = {{TuneComp: Joint Fine-Tuning and Compression for Large Foundation Models}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR) workshop on Efficient and On-Device Generation},
- year = 2025,
- month = jun,
- url = {https://www.merl.com/publications/TR2025-079}
- }
- Sawada, N., Miraldo, P., Lohit, S., Marks, T.K., Chatterjee, M., "FreBIS: Frequency-Based Stratification for Neural Implicit Surface Representations", IEEE Conference on Computer Vision and Pattern Recognition Workshop (CVPR), June 2025.
BibTeX TR2025-074 PDF- @inproceedings{Sawada2025jun,
- author = {Sawada, Naoko and Miraldo, Pedro and Lohit, Suhas and Marks, Tim K. and Chatterjee, Moitreya},
- title = {{FreBIS: Frequency-Based Stratification for Neural Implicit Surface Representations}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition Workshop (CVPR)},
- year = 2025,
- month = jun,
- url = {https://www.merl.com/publications/TR2025-074}
- }
- Ni, Y., Wen, S., Koniusz, P., Cherian, A., "Noise Consistency Regularization for Improved Subject-Driven Image Synthesis", IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPR), June 2025, pp. 3116-3126.
BibTeX TR2025-073 PDF- @inproceedings{Ni2025jun,
- author = {Ni, Yao and Wen, Song and Koniusz, Piotr and Cherian, Anoop},
- title = {{Noise Consistency Regularization for Improved Subject-Driven Image Synthesis}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPR)},
- year = 2025,
- pages = {3116--3126},
- month = jun,
- publisher = {CVF},
- url = {https://www.merl.com/publications/TR2025-073}
- }
- Hazarika, A., Guo, J., Parsons, K., Nagai, Y., Sumi, T., Orlik, P.V., Rahmati, M., "AgriNex: Next-Gen Smart Agriculture with LLM-Integrated UAV-IoT Solutions", IEEE International Conference on Communications Workshops (ICC), June 2025.
BibTeX TR2025-081 PDF- @inproceedings{Hazarika2025jun,
- author = {Hazarika, Ananya and Guo, Jianlin and Parsons, Kieran and Nagai, Yukimasa and Sumi, Takenori and Orlik, Philip V. and Rahmati, Mehdi},
- title = {{AgriNex: Next-Gen Smart Agriculture with LLM-Integrated UAV-IoT Solutions}},
- booktitle = {IEEE International Conference on Communications Workshops (ICC)},
- year = 2025,
- month = jun,
- url = {https://www.merl.com/publications/TR2025-081}
- }
- Lu, K., Ma, C., Hori, C., Romeres, D., "KitchenVLA: Iterative Vision-Language Corrections for Robotic Execution of Human Tasks", IEEE International Conference on Robotics and Automation Workshop on Safely Leveraging Vision-Language Foundation Models in Robotics (SafeLVMs@ICRA), May 2025.
BibTeX TR2025-068 PDF- @inproceedings{Lu2025may,
- author = {Lu, Kai and Ma, Chenyang and Hori, Chiori and Romeres, Diego},
- title = {{KitchenVLA: Iterative Vision-Language Corrections for Robotic Execution of Human Tasks}},
- booktitle = {IEEE International Conference on Robotics and Automation Workshop on Safely Leveraging Vision-Language Foundation Models in Robotics (SafeLVMs@ICRA)},
- year = 2025,
- month = may,
- url = {https://www.merl.com/publications/TR2025-068}
- }
- Singh, A., Jones, M.J., Peng, K.-C., Chatterjee, M., Cherian, A., Learned-Miller, E., "Improving Open-World Object Localization by Discovering Background", CVPR Workshop on Domain Generalization: Evolution, Breakthroughs and Future Horizon, May 2025, pp. 6449-6458.
BibTeX TR2025-058 PDF- @inproceedings{Singh2025may,
- author = {Singh, Ashish and Jones, Michael J. and Peng, Kuan-Chuan and Chatterjee, Moitreya and Cherian, Anoop and Learned-Miller, Erik},
- title = {{Improving Open-World Object Localization by Discovering Background}},
- booktitle = {CVPR Workshop on Domain Generalization: Evolution, Breakthroughs and Future Horizon},
- year = 2025,
- pages = {6449--6458},
- month = may,
- url = {https://www.merl.com/publications/TR2025-058}
- }
- Basu, S., Lohit, S., Brand, M., "G-RepsNet: A Lightweight Construction of Equivariant Net- works for Arbitrary Matrix Groups", Transactions on Machine Learning Research (TMLR), May 2025.
BibTeX TR2025-056 PDF Software- @article{Basu2025may,
- author = {Basu, Sourya and Lohit, Suhas and Brand, Matthew},
- title = {{G-RepsNet: A Lightweight Construction of Equivariant Net- works for Arbitrary Matrix Groups}},
- journal = {Transactions on Machine Learning Research (TMLR)},
- year = 2025,
- month = may,
- url = {https://www.merl.com/publications/TR2025-056}
- }
- Masuyama, Y., Chang, X., Zhang, W., Cornell, S., Wang, Z.-Q., Ono, N., Qian, Y., Watanabe, S., "An End-to-End Integration of Speech Separation and Recognition with Self-Supervised Learning Representation", Computer Speech & Language, DOI: 10.1016/j.csl.2025.101813, Vol. 95, pp. 101813, May 2025.
BibTeX TR2025-054 PDF- @article{Masuyama2025may,
- author = {Masuyama, Yoshiki and Chang, Xuankai and Zhang, Wangyou and Cornell, Samuele and Wang, Zhong-Qiu and Ono, Nobutaka and Qian, Yanmin and Watanabe, Shinji},
- title = {{An End-to-End Integration of Speech Separation and Recognition with Self-Supervised Learning Representation}},
- journal = {Computer Speech \& Language},
- year = 2025,
- volume = 95,
- pages = 101813,
- month = may,
- doi = {10.1016/j.csl.2025.101813},
- issn = {0885-2308},
- url = {https://www.merl.com/publications/TR2025-054}
- }