Publications

719 / 3,993 publications found.


  •  Aihara, R., Masuyama, Y., Paissan, F., Germain, F.G., Wichern, G., Le Roux, J., "SUNAC: Source-aware Unified Neural Audio Codec", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), May 2026.
    BibTeX TR2026-032 PDF
    • @inproceedings{Aihara2026may,
    • author = {Aihara, Ryo and Masuyama, Yoshiki and Paissan, Francesco and Germain, François G and Wichern, Gordon and {Le Roux}, Jonathan},
    • title = {{SUNAC: Source-aware Unified Neural Audio Codec}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2026,
    • month = may,
    • url = {https://www.merl.com/publications/TR2026-032}
    • }
  •  Kato, S., Wang, P., Fujihashi, T., Markham, A., "Heatmap-to-SMPL Multi-View Radar Transformer for Multi-Person 3D Pose Estimation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), May 2026.
    BibTeX TR2026-040 PDF
    • @inproceedings{Kato2026may,
    • author = {Kato, Sorachi and Wang, Pu and Fujihashi, Takuya and Markham, Andrew},
    • title = {{Heatmap-to-SMPL Multi-View Radar Transformer for Multi-Person 3D Pose Estimation}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2026,
    • month = may,
    • url = {https://www.merl.com/publications/TR2026-040}
    • }
  •  Masuyama, Y., Germain, F.G., Wichern, G., Hori, C., Le Roux, J., "Velocity Potential Neural Field for Efficient Ambisonics Impulse Response Modeling", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), May 2026.
    BibTeX TR2026-033 PDF
    • @inproceedings{Masuyama2026may,
    • author = {Masuyama, Yoshiki and Germain, François G and Wichern, Gordon and Hori, Chiori and {Le Roux}, Jonathan},
    • title = {{Velocity Potential Neural Field for Efficient Ambisonics Impulse Response Modeling}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2026,
    • month = may,
    • url = {https://www.merl.com/publications/TR2026-033}
    • }
  •  Masuyama, Y., Saijo, K., Paissan, F., Han, J., Delcroix, M., Aihara, R., Germain, F.G., Wichern, G., Le Roux, J., "FlexIO: Flexible Single- and Multi-Channel Speech Separation and Enhancement", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), May 2026.
    BibTeX TR2026-034 PDF
    • @inproceedings{Masuyama2026may2,
    • author = {Masuyama, Yoshiki and Saijo, Kohei and Paissan, Francesco and Han, Jiangyu and Delcroix, Marc and Aihara, Ryo and Germain, François G and Wichern, Gordon and {Le Roux}, Jonathan},
    • title = {{FlexIO: Flexible Single- and Multi-Channel Speech Separation and Enhancement}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2026,
    • month = may,
    • url = {https://www.merl.com/publications/TR2026-034}
    • }
  •  Aihara, R., Masuyama, Y., Germain, F.G., Wichern, G., Le Roux, J., "Exploring Disentangled Neural Speech Codecs from Self-Supervised Representations", IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW), May 2026.
    BibTeX TR2026-035 PDF
    • @inproceedings{Aihara2026may2,
    • author = {Aihara, Ryo and Masuyama, Yoshiki and Germain, François G and Wichern, Gordon and {Le Roux}, Jonathan},
    • title = {{Exploring Disentangled Neural Speech Codecs from Self-Supervised Representations}},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)},
    • year = 2026,
    • month = may,
    • url = {https://www.merl.com/publications/TR2026-035}
    • }
  •  Ryo, H., Wang, Y., Koike-Akino, T., Liu, J., Parsons, K., Hato, J., "Evaluating Security Policy Compliance in Infrastructure as Code Generated by Large Language Models", International Symposium on Digital Forensics and Security, March 2026.
    BibTeX TR2026-036 PDF
    • @inproceedings{Ryo2026mar,
    • author = {Ryo, Hase and Wang, Ye and Koike-Akino, Toshiaki and Liu, Jing and Parsons, Kieran and Hato, Jumpei},
    • title = {{Evaluating Security Policy Compliance in Infrastructure as Code Generated by Large Language Models}},
    • booktitle = {International Symposium on Digital Forensics and Security},
    • year = 2026,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2026-036}
    • }
  •  Shenoy, V., Lohit, S., Mansour, H., Chellappa, R., Marks, T.K., "Recovering Pulse Waves from Video Using Deep Unrolling and Deep Equilibrium Models", IEEE Transactions on Image Processing, March 2026.
    BibTeX TR2026-031 PDF
    • @article{Shenoy2026mar,
    • author = {Shenoy, Vineet and Lohit, Suhas and Mansour, Hassan and Chellappa, Rama and Marks, Tim K.},
    • title = {{Recovering Pulse Waves from Video Using Deep Unrolling and Deep Equilibrium Models}},
    • journal = {IEEE Transactions on Image Processing},
    • year = 2026,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2026-031}
    • }
  •  Kogashi, K., Cherian, A., Kuo, M.-Y.J., "MMHOI: Modeling Complex 3D Multi-Human Multi-Object Interactions", IEEE Winter Conference on Applications of Computer Vision (WACV), March 2026.
    BibTeX TR2026-029 PDF Video Data
    • @inproceedings{Kogashi2026mar,
    • author = {Kogashi, Kaen and Cherian, Anoop and Kuo, Meng-Yu Jennifer},
    • title = {{MMHOI: Modeling Complex 3D Multi-Human Multi-Object Interactions}},
    • booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
    • year = 2026,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2026-029}
    • }
  •  Hu, H., Liu, C., Li, N., Wang, Y., "Training Task Reasoning LLM Agents for Multi-turn Task Planning via Single-turn Reinforcement Learning", IEEE Control Systems Letters, DOI: 10.1109/​LCSYS.2025.3642767, Vol. 9, pp. 2879-2884, February 2026.
    BibTeX TR2026-026 PDF
    • @article{Hu2026feb,
    • author = {Hu, Hanjiang and Liu, Changliu and Li, Na and Wang, Yebin},
    • title = {{Training Task Reasoning LLM Agents for Multi-turn Task Planning via Single-turn Reinforcement Learning}},
    • journal = {IEEE Control Systems Letters},
    • year = 2026,
    • volume = 9,
    • pages = {2879--2884},
    • month = feb,
    • doi = {10.1109/LCSYS.2025.3642767},
    • url = {https://www.merl.com/publications/TR2026-026}
    • }
  •  Mumcu, F., Jones, M.J., Yilmaz, Y., Cherian, A., "Leveraging Multimodal LLM Descriptions of Activity for Explainable Semi-Supervised Video Anomaly Detection", Transactions on Machine Learning Research, February 2026.
    BibTeX TR2026-027 PDF
    • @article{Mumcu2026feb2,
    • author = {Mumcu, Furkan and Jones, Michael J. and Yilmaz, Yasin and Cherian, Anoop},
    • title = {{Leveraging Multimodal LLM Descriptions of Activity for Explainable Semi-Supervised Video Anomaly Detection}},
    • journal = {Transactions on Machine Learning Research},
    • year = 2026,
    • month = feb,
    • url = {https://www.merl.com/publications/TR2026-027}
    • }
  •  Khattar, V., Choudhury, M., Rashid, M.R.U., Liu, J., Koike-Akino, T., Jin, M., Wang, Y., "Amplification Effects in Test-Time Reinforcement Learning: Safety and Reasoning Vulnerabilities", AAAI Workshop on Trust and Control in Agentic AI, January 2026.
    BibTeX TR2026-020 PDF
    • @inproceedings{Khattar2026jan,
    • author = {Khattar, Vanshaj and Choudhury, Moumita and Rashid, Md Rafi Ur and Liu, Jing and Koike-Akino, Toshiaki and Jin, Ming and Wang, Ye},
    • title = {{Amplification Effects in Test-Time Reinforcement Learning: Safety and Reasoning Vulnerabilities}},
    • booktitle = {AAAI Workshop on Trust and Control in Agentic AI},
    • year = 2026,
    • month = jan,
    • url = {https://www.merl.com/publications/TR2026-020}
    • }
  •  Fujihashi, T., Koike-Akino, T., "Quantum Implicit Neural Compression", Springer Nature, DOI: 10.1007/​978-3-032-15931-1, Vol. 2813, pp. 60-69, January 2026.
    BibTeX TR2026-022 PDF
    • @article{Fujihashi2026jan,
    • author = {Fujihashi, Takuya and Koike-Akino, Toshiaki},
    • title = {{Quantum Implicit Neural Compression}},
    • journal = {Springer Nature},
    • year = 2026,
    • volume = 2813,
    • pages = {60--69},
    • month = jan,
    • doi = {10.1007/978-3-032-15931-1},
    • issn = {1865-0937},
    • isbn = {978-3-032-15931-1},
    • url = {https://www.merl.com/publications/TR2026-022}
    • }
  •  Koike-Akino, T., Chen, X., Liu, J., Wang, Y., Wang, P., Brand, M., "LatentLLM: Activation-Aware Transform to Multi-Head Latent Attention", AAAI Conference on Artificial Intelligence, January 2026.
    BibTeX TR2026-018 PDF Video Presentation
    • @inproceedings{Koike-Akino2026jan,
    • author = {{{Koike-Akino, Toshiaki and Chen, Xiangyu and Liu, Jing and Wang, Ye and Wang, Pu and Brand, Matthew}}},
    • title = {{{LatentLLM: Activation-Aware Transform to Multi-Head Latent Attention}}},
    • booktitle = {AAAI Conference on Artificial Intelligence},
    • year = 2026,
    • month = jan,
    • url = {https://www.merl.com/publications/TR2026-018}
    • }
  •  Rashid, M.R.U., Dasu, V.A., Wang, Y., Tan, G.G., Mehnaz, S., "Chain-of-Thought Driven Adversarial Scenario Extrapolation for Robust Language Models", AAAI Conference on Artificial Intelligence, January 2026.
    BibTeX TR2026-017 PDF
    • @inproceedings{Rashid2026jan,
    • author = {Rashid, Md Rafi Ur and Dasu, Vishnu Asutosh and Wang, Ye and Tan, G. Gary and Mehnaz, Shagufta},
    • title = {{Chain-of-Thought Driven Adversarial Scenario Extrapolation for Robust Language Models}},
    • booktitle = {AAAI Conference on Artificial Intelligence},
    • year = 2026,
    • month = jan,
    • url = {https://www.merl.com/publications/TR2026-017}
    • }
  •  Jeon, C.-B., Wichern, G., Germain, F.G., Le Roux, J., "Embracing Cacophony: Explaining and Improving Random Mixing in Music Source Separation", IEEE Open Journal of Signal Processing, DOI: 10.1109/​OJSP.2025.3633567, Vol. 6, pp. 1179-1192, January 2026.
    BibTeX TR2026-012 PDF
    • @article{Jeon2026jan,
    • author = {Jeon, Chang-Bin and Wichern, Gordon and Germain, François G and {Le Roux}, Jonathan},
    • title = {{Embracing Cacophony: Explaining and Improving Random Mixing in Music Source Separation}},
    • journal = {IEEE Open Journal of Signal Processing},
    • year = 2026,
    • volume = 6,
    • pages = {1179--1192},
    • month = jan,
    • doi = {10.1109/OJSP.2025.3633567},
    • url = {https://www.merl.com/publications/TR2026-012}
    • }
  •  Wilkinghoff, K., Yang, H., Ebbers, J., Germain, F.G., Wichern, G., Le Roux, J., "Local Density-Based Anomaly Score Normalization for Domain Generalization", IEEE Transactions on Audio, Speech and Language Processing, DOI: 10.1109/​TASLPRO.2025.3629236, Vol. 33, pp. 4642-4652, January 2026.
    BibTeX TR2026-010 PDF Software
    • @article{Wilkinghoff2026jan,
    • author = {Wilkinghoff, Kevin and Yang, Haici and Ebbers, Janek and Germain, François G and Wichern, Gordon and {Le Roux}, Jonathan},
    • title = {{Local Density-Based Anomaly Score Normalization for Domain Generalization}},
    • journal = {IEEE Transactions on Audio, Speech and Language Processing},
    • year = 2026,
    • volume = 33,
    • pages = {4642--4652},
    • month = jan,
    • doi = {10.1109/TASLPRO.2025.3629236},
    • issn = {2998-4173},
    • url = {https://www.merl.com/publications/TR2026-010}
    • }
  •  Cornell, S., Boeddeker, C., Park, T., Huang, H., Raj, D., Wiesner, M., Masuyama, Y., Chang, X., Wang, Z.-Q., Squartini, S., Garcia, P., Watanabe, S., "Recent Trends in Distant Conversational Speech Recognition: A Review of CHiME-7 and 8 DASR Challenges", Computer Speech & Language, DOI: 10.1016/​j.csl.2025.101901, Vol. 97, pp. 101901, December 2025.
    BibTeX TR2026-008 PDF
    • @article{Cornell2025dec,
    • author = {Cornell, Samuele and Boeddeker, Christoph and Park, Taejin and Huang, He and Raj, Desh and Wiesner, Matthew and Masuyama, Yoshiki and Chang, Xuankai and Wang, Zhong-Qiu and Squartini, Stefano and Garcia, Paola and Watanabe, Shinji},
    • title = {{Recent Trends in Distant Conversational Speech Recognition: A Review of CHiME-7 and 8 DASR Challenges}},
    • journal = {Computer Speech \& Language},
    • year = 2025,
    • volume = 97,
    • pages = 101901,
    • month = dec,
    • doi = {10.1016/j.csl.2025.101901},
    • url = {https://www.merl.com/publications/TR2026-008}
    • }
  •  Masuyama, Y., Wichern, G., Germain, F.G., Ick, C., Le Roux, J., "RANF: Neural Field-Based HRTF Spatial Upsampling with Retrieval Augmentation and Parameter Efficient Fine-Tuning", IEEE Open Journal of Signal Processing, DOI: 10.1109/​OJSP.2025.3640517, Vol. 7, pp. 32-41, December 2025.
    BibTeX TR2026-007 PDF Software
    • @article{Masuyama2025dec,
    • author = {Masuyama, Yoshiki and Wichern, Gordon and Germain, François G and Ick, Christopher and {Le Roux}, Jonathan},
    • title = {{RANF: Neural Field-Based HRTF Spatial Upsampling with Retrieval Augmentation and Parameter Efficient Fine-Tuning}},
    • journal = {IEEE Open Journal of Signal Processing},
    • year = 2025,
    • volume = 7,
    • pages = {32--41},
    • month = dec,
    • doi = {10.1109/OJSP.2025.3640517},
    • url = {https://www.merl.com/publications/TR2026-007}
    • }
  •  Masuyama, Y., Wichern, G., Germain, F.G., Ick, C., Le Roux, J., "SuDaField: Subject- and Dataset-Aware Neural Field for HRTF Modeling", IEEE Open Journal of Signal Processing, DOI: 10.1109/​OJSP.2025.3627073, Vol. 6, pp. 1169-1178, December 2025.
    BibTeX TR2026-009 PDF
    • @article{Masuyama2025dec2,
    • author = {Masuyama, Yoshiki and Wichern, Gordon and Germain, François G and Ick, Christopher and {Le Roux}, Jonathan},
    • title = {{SuDaField: Subject- and Dataset-Aware Neural Field for HRTF Modeling}},
    • journal = {IEEE Open Journal of Signal Processing},
    • year = 2025,
    • volume = 6,
    • pages = {1169--1178},
    • month = dec,
    • doi = {10.1109/OJSP.2025.3627073},
    • url = {https://www.merl.com/publications/TR2026-009}
    • }
  •  Hori, C., Masuyama, Y., Jain, S., Corcodel, R., Jha, D.K., Romeres, D., Le Roux, J., "Robot Confirmation Generation and Action Planning Using Long-context Q-Former Integrated with Multimodal LLM", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), December 2025.
    BibTeX TR2025-167 PDF
    • @inproceedings{Hori2025dec,
    • author = {Hori, Chiori and Masuyama, Yoshiki and Jain, Siddarth and Corcodel, Radu and Jha, Devesh K. and Romeres, Diego and {Le Roux}, Jonathan},
    • title = {{Robot Confirmation Generation and Action Planning Using Long-context Q-Former Integrated with Multimodal LLM}},
    • booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
    • year = 2025,
    • month = dec,
    • url = {https://www.merl.com/publications/TR2025-167}
    • }
  •  Van der Merwe, M., Jha, D.K., "In-Context Policy Iteration for Dynamic Manipulation", Advances in Neural Information Processing Systems (NeurIPS) Workshop on Embodied World Models for Decision Making, December 2025.
    BibTeX TR2025-163 PDF Video
    • @inproceedings{VanderMerwe2025dec,
    • author = {Van der Merwe, Mark and Jha, Devesh K.},
    • title = {{In-Context Policy Iteration for Dynamic Manipulation}},
    • booktitle = {Advances in Neural Information Processing Systems (NeurIPS) Workshop on Embodied World Models for Decision Making},
    • year = 2025,
    • month = dec,
    • url = {https://www.merl.com/publications/TR2025-163}
    • }
  •  Xiang, X., Peng, K.-C., Lohit, S., Jones, M.J., Zhang, J., "Towards Open-Vocabulary Multimodal 3D Object Detection with Attributes", British Machine Vision Conference (BMVC), November 2025.
    BibTeX TR2025-162 PDF Video Data Presentation
    • @inproceedings{Xiang2025nov,
    • author = {{{Xiang, Xinhao and Peng, Kuan-Chuan and Lohit, Suhas and Jones, Michael J. and Zhang, Jiawei}}},
    • title = {{{Towards Open-Vocabulary Multimodal 3D Object Detection with Attributes}}},
    • booktitle = {British Machine Vision Conference (BMVC)},
    • year = 2025,
    • month = nov,
    • url = {https://www.merl.com/publications/TR2025-162}
    • }
  •  Masuyama, Y., "Neural Fields for Spatial Audio Modeling," Tech. Rep. TR2025-171, Speech and Audio in the Northeast (SANE), November 2025.
    BibTeX TR2025-171 PDF
    • @techreport{Masuyama2025nov,
    • author = {Masuyama, Yoshiki},
    • title = {{Neural Fields for Spatial Audio Modeling}},
    • institution = {Speech and Audio in the Northeast (SANE)},
    • year = 2025,
    • month = nov,
    • url = {https://www.merl.com/publications/TR2025-171}
    • }
  •  Wilkinghoff, K., Fujimura, T., Imoto, K., Le Roux, J., Tan, Z.-H., Toda, T., "Handling Domain Shifts for Anomalous Sound Detection: A Review of DCASE-Related Work", Workshop on Detection and Classification of Acoustic Scenes and Events (DCASE), DOI: 10.5281/​zenodo.17251589, October 2025, pp. 20-24.
    BibTeX TR2025-157 PDF
    • @inproceedings{Wilkinghoff2025oct,
    • author = {Wilkinghoff, Kevin and Fujimura, Takuya and Imoto, Keisuke and {Le Roux}, Jonathan and Tan, Zheng-Hua and Toda, Tomoki},
    • title = {{Handling Domain Shifts for Anomalous Sound Detection: A Review of DCASE-Related Work}},
    • booktitle = {Workshop on Detection and Classification of Acoustic Scenes and Events (DCASE)},
    • year = 2025,
    • pages = {20--24},
    • month = oct,
    • doi = {10.5281/zenodo.17251589},
    • isbn = {978-84-09-77652-8},
    • url = {https://www.merl.com/publications/TR2025-157}
    • }
  •  Fujihashi, T., Kuwabara, A., Koike-Akino, T., "QKAN-GS: Quantum-Empowered 3D Gaussian Splatting", ACM Multimedia Workshop, DOI: 10.1145/​3728486.375921, October 2025, pp. 51-55.
    BibTeX TR2025-156 PDF
    • @inproceedings{Fujihashi2025oct,
    • author = {Fujihashi, Takuya and Kuwabara, Akihiro and Koike-Akino, Toshiaki},
    • title = {{QKAN-GS: Quantum-Empowered 3D Gaussian Splatting}},
    • booktitle = {ACM Multimedia Workshop},
    • year = 2025,
    • pages = {51--55},
    • month = oct,
    • publisher = {ACM},
    • doi = {10.1145/3728486.375921},
    • url = {https://www.merl.com/publications/TR2025-156}
    • }