Publications

Kato, S., Wang, P., Fujihashi, T., Markham, A., "Heatmap-to-SMPL Multi-View Radar Transformer for Multi-Person 3D Pose Estimation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP55912.2026.11463668, May 2026.
BibTeX TR2026-040 PDF
- @inproceedings{Kato2026may,
- author = {Kato, Sorachi and Wang, Pu and Fujihashi, Takuya and Markham, Andrew},
- title = {{Heatmap-to-SMPL Multi-View Radar Transformer for Multi-Person 3D Pose Estimation}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2026,
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICASSP55912.2026.11463668},
- issn = {2379-190X},
- isbn = {979-8-3315-6701-9},
- url = {https://www.merl.com/publications/TR2026-040}
- }
Masuyama, Y., Germain, F.G., Wichern, G., Hori, C., Le Roux, J., "Velocity Potential Neural Field for Efficient Ambisonics Impulse Response Modeling", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP55912.2026.11460631, May 2026, pp. 22582-22586.
BibTeX TR2026-033 PDF
- @inproceedings{Masuyama2026may,
- author = {Masuyama, Yoshiki and Germain, François G and Wichern, Gordon and Hori, Chiori and {Le Roux}, Jonathan},
- title = {{Velocity Potential Neural Field for Efficient Ambisonics Impulse Response Modeling}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2026,
- pages = {22582--22586},
- month = may,
- doi = {10.1109/ICASSP55912.2026.11460631},
- url = {https://www.merl.com/publications/TR2026-033}
- }
Masuyama, Y., Saijo, K., Paissan, F., Han, J., Delcroix, M., Aihara, R., Germain, F.G., Wichern, G., Le Roux, J., "FlexIO: Flexible Single- and Multi-Channel Speech Separation and Enhancement", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP55912.2026.11462393, May 2026, pp. 14417-14421.
BibTeX TR2026-034 PDF
- @inproceedings{Masuyama2026may2,
- author = {Masuyama, Yoshiki and Saijo, Kohei and Paissan, Francesco and Han, Jiangyu and Delcroix, Marc and Aihara, Ryo and Germain, François G and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{FlexIO: Flexible Single- and Multi-Channel Speech Separation and Enhancement}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2026,
- pages = {14417--14421},
- month = may,
- doi = {10.1109/ICASSP55912.2026.11462393},
- url = {https://www.merl.com/publications/TR2026-034}
- }
Aihara, R., Masuyama, Y., Germain, F.G., Wichern, G., Le Roux, J., "Exploring Disentangled Neural Speech Codecs from Self-Supervised Representations", IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW), DOI: 10.1109/ICASSP55912.2026.11462776, May 2026, pp. 21992-21996.
BibTeX TR2026-035 PDF
- @inproceedings{Aihara2026may2,
- author = {Aihara, Ryo and Masuyama, Yoshiki and Germain, François G and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{Exploring Disentangled Neural Speech Codecs from Self-Supervised Representations}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)},
- year = 2026,
- pages = {21992--21996},
- month = may,
- doi = {10.1109/ICASSP55912.2026.11462776},
- url = {https://www.merl.com/publications/TR2026-035}
- }
Bimbraw, K., Wang, Y., Liu, J., Koike-Akino, T., "GPT Sonography: Hand Gesture Decoding from Forearm Ultrasound Images via a Large Vision-Language Model", IEEE Access, DOI: 10.1109/ACCESS.2026.3687477, Vol. 14, pp. 70724-70736, April 2026.
BibTeX TR2026-054 PDF
- @article{Bimbraw2026may,
- author = {Bimbraw, Keshav and Wang, Ye and Liu, Jing and Koike-Akino, Toshiaki},
- title = {{GPT Sonography: Hand Gesture Decoding from Forearm Ultrasound Images via a Large Vision-Language Model}},
- journal = {IEEE Access},
- year = 2026,
- volume = 14,
- pages = {70724--70736},
- month = may,
- doi = {10.1109/ACCESS.2026.3687477},
- issn = {2169-3536},
- url = {https://www.merl.com/publications/TR2026-054}
- }
Suzuki, K., Liu, J., Wang, Y., Hori, C., Brand, M., Romeres, D., Koike-Akino, T., "Embedding Morphology into Transformers for Cross-Robot Policy Learning", International Conference on Learning Representations (ICLR) Workshop, April 2026.
BibTeX TR2026-048 PDF
- @inproceedings{Suzuki2026apr,
- author = {Suzuki, Kei and Liu, Jing and Wang, Ye and Hori, Chiori and Brand, Matthew and Romeres, Diego and Koike-Akino, Toshiaki},
- title = {{Embedding Morphology into Transformers for Cross-Robot Policy Learning}},
- booktitle = {International Conference on Learning Representations (ICLR) Workshop on Efficient Spatial Reasoning},
- year = 2026,
- month = apr,
- url = {https://www.merl.com/publications/TR2026-048}
- }
Koike-Akino, T., Liu, J., Wang, Y., "TTQ: Activation-Aware Test-Time Quantization to Accelerate LLM Inference on the Fly", International Conference on Learning Representations (ICLR) Workshop, April 2026.
BibTeX TR2026-044 PDF Presentation
- @inproceedings{Koike-Akino2026apr,
- author = {{Koike-Akino, Toshiaki and Liu, Jing and Wang, Ye}},
- title = {{TTQ: Activation-Aware Test-Time Quantization to Accelerate LLM Inference on the Fly}},
- booktitle = {International Conference on Learning Representations (ICLR) Workshop on Test-Time Updates (TTU)},
- year = 2026,
- month = apr,
- url = {https://www.merl.com/publications/TR2026-044}
- }
Wang, Z., Hu, H., Deng, X., Mowlavi, S., Nakahira, Y., "OpInf-LLM: Parametric PDE Solving with LLMs via Operator Inference", International Conference on Learning Representations (ICLR) Workshop on AI and Partial Differential Equations (AI&PDE), April 2026.
BibTeX TR2026-043 PDF
- @inproceedings{Wang2026apr2,
- author = {Wang, Zhuoyuan and Hu, Hanjiang and Deng, Xiyu and Mowlavi, Saviz and Nakahira, Yorie},
- title = {{OpInf-LLM: Parametric PDE Solving with LLMs via Operator Inference}},
- booktitle = {International Conference on Learning Representations (ICLR) Workshop on AI and Partial Differential Equations (AI\&PDE)},
- year = 2026,
- month = apr,
- url = {https://www.merl.com/publications/TR2026-043}
- }
Ryo, H., Wang, Y., Koike-Akino, T., Liu, J., Parsons, K., Hato, J., "Evaluating Security Policy Compliance in Infrastructure as Code Generated by Large Language Models", International Symposium on Digital Forensics and Security, DOI: 10.1109/ISDFS69419.2026.11458930, March 2026.
BibTeX TR2026-036 PDF
- @inproceedings{Ryo2026mar,
- author = {Ryo, Hase and Wang, Ye and Koike-Akino, Toshiaki and Liu, Jing and Parsons, Kieran and Hato, Jumpei},
- title = {{Evaluating Security Policy Compliance in Infrastructure as Code Generated by Large Language Models}},
- booktitle = {International Symposium on Digital Forensics and Security},
- year = 2026,
- month = mar,
- doi = {10.1109/ISDFS69419.2026.11458930},
- issn = {2768-1831},
- isbn = {979-8-3315-7310-2},
- url = {https://www.merl.com/publications/TR2026-036}
- }
Koike-Akino, T., Chen, X., Liu, J., Wang, Y., Wang, P., Brand, M., "LatentLLM: Activation-Aware Transform to Multi-Head Latent Attention", AAAI Conference on Artificial Intelligence, Sven Koenig, Chad Jenkins, Matthew E. Taylor, Eds., DOI: 10.1609/aaai.v40i27.39425, March 2026, vol. 40, pp. 22644-22652.
BibTeX TR2026-018 PDF Video Presentation
- @inproceedings{Koike-Akino2026jan,
- author = {{{Koike-Akino, Toshiaki and Chen, Xiangyu and Liu, Jing and Wang, Ye and Wang, Pu and Brand, Matthew}}},
- title = {{{LatentLLM: Activation-Aware Transform to Multi-Head Latent Attention}}},
- booktitle = {AAAI Conference on Artificial Intelligence},
- year = 2026,
- editor = {Sven Koenig, Chad Jenkins, Matthew E. Taylor},
- volume = 40,
- number = 27,
- pages = {22644--22652},
- month = jan,
- publisher = {AAAI Press},
- doi = {10.1609/aaai.v40i27.39425},
- issn = {2374-3468},
- isbn = {978-1-57735-906-7},
- url = {https://www.merl.com/publications/TR2026-018}
- }
Shenoy, V., Lohit, S., Mansour, H., Chellappa, R., Marks, T.K., "Recovering Pulse Waves from Video Using Deep Unrolling and Deep Equilibrium Models", IEEE Transactions on Image Processing, DOI: 10.1109/TIP.2026.3671653, Vol. 35, pp. 2755-2770, March 2026.
BibTeX TR2026-031 PDF
- @article{Shenoy2026mar,
- author = {Shenoy, Vineet and Lohit, Suhas and Mansour, Hassan and Chellappa, Rama and Marks, Tim K.},
- title = {{Recovering Pulse Waves from Video Using Deep Unrolling and Deep Equilibrium Models}},
- journal = {IEEE Transactions on Image Processing},
- year = 2026,
- volume = 35,
- pages = {2755--2770},
- month = mar,
- doi = {10.1109/TIP.2026.3671653},
- issn = {1941-0042},
- url = {https://www.merl.com/publications/TR2026-031}
- }
Kogashi, K., Cherian, A., Kuo, M.-Y.J., "MMHOI: Modeling Complex 3D Multi-Human Multi-Object Interactions", IEEE Winter Conference on Applications of Computer Vision (WACV), March 2026, pp. 1512-1521.
BibTeX TR2026-029 PDF Video Data
- @inproceedings{Kogashi2026mar,
- author = {Kogashi, Kaen and Cherian, Anoop and Kuo, Meng-Yu Jennifer},
- title = {{MMHOI: Modeling Complex 3D Multi-Human Multi-Object Interactions}},
- booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
- year = 2026,
- pages = {1512--1521},
- month = mar,
- url = {https://www.merl.com/publications/TR2026-029}
- }
Hu, H., Liu, C., Li, N., Wang, Y., "Training Task Reasoning LLM Agents for Multi-turn Task Planning via Single-turn Reinforcement Learning", IEEE Control Systems Letters, DOI: 10.1109/LCSYS.2025.3642767, Vol. 9, pp. 2879-2884, February 2026.
BibTeX TR2026-026 PDF
- @article{Hu2026feb,
- author = {Hu, Hanjiang and Liu, Changliu and Li, Na and Wang, Yebin},
- title = {{Training Task Reasoning LLM Agents for Multi-turn Task Planning via Single-turn Reinforcement Learning}},
- journal = {IEEE Control Systems Letters},
- year = 2026,
- volume = 9,
- pages = {2879--2884},
- month = feb,
- doi = {10.1109/LCSYS.2025.3642767},
- url = {https://www.merl.com/publications/TR2026-026}
- }
Mumcu, F., Jones, M.J., Yilmaz, Y., Cherian, A., "Leveraging Multimodal LLM Descriptions of Activity for Explainable Semi-Supervised Video Anomaly Detection", Transactions on Machine Learning Research, February 2026.
BibTeX TR2026-027 PDF
- @article{Mumcu2026feb2,
- author = {Mumcu, Furkan and Jones, Michael J. and Yilmaz, Yasin and Cherian, Anoop},
- title = {{Leveraging Multimodal LLM Descriptions of Activity for Explainable Semi-Supervised Video Anomaly Detection}},
- journal = {Transactions on Machine Learning Research},
- year = 2026,
- month = feb,
- url = {https://www.merl.com/publications/TR2026-027}
- }
Wang, R., Wang, Y., Liu, J., Koike-Akino, T., "Quantum Diffusion Models for Few-Shot Learning", AAAI Conference on Artificial Intelligence, Shaukat Ali, Francisco Chicano, Alberto Moraglio, Eds., DOI: 10.1007/978-3-032-15931-1, January 2026, pp. 46-59.
BibTeX TR2025-025 PDF
- @inproceedings{Wang2025mar,
- author = {Wang, Ruhan and Wang, Ye and Liu, Jing and Koike-Akino, Toshiaki},
- title = {Quantum Diffusion Models for Few-Shot Learning},
- booktitle = {Quantum Computing and Artificial Intelligence: First International Workshop, QC+AI 2025},
- year = 2025,
- editor = {Shaukat Ali, Francisco Chicano, Alberto Moraglio},
- pages = {46--59},
- month = mar,
- publisher = {Springer Cham},
- doi = {10.1007/978-3-032-15931-1},
- issn = {1865-0929},
- isbn = {978-3-032-15931-1},
- url = {https://www.merl.com/publications/TR2025-025}
- }
Wang, R., Wang, Y., Liu, J., Koike-Akino, T., "Quantum Diffusion Models for Few-Shot Learning", Springer Nature, DOI: 10.1007/978-3-032-15931-1, pp. 46-59, January 2026.
BibTeX TR2026-042 PDF
- @article{Wang2026apr,
- author = {Wang, Ruhan and Wang, Ye and Liu, Jing and Koike-Akino, Toshiaki},
- title = {{Quantum Diffusion Models for Few-Shot Learning}},
- journal = {Springer Nature},
- year = 2026,
- pages = {46--59},
- month = apr,
- doi = {10.1007/978-3-032-15931-1},
- issn = {1865-0929},
- isbn = {978-3-032-15931-1},
- url = {https://www.merl.com/publications/TR2026-042}
- }
Khattar, V., Choudhury, M., Rashid, M.R.U., Liu, J., Koike-Akino, T., Jin, M., Wang, Y., "Amplification Effects in Test-Time Reinforcement Learning: Safety and Reasoning Vulnerabilities", AAAI Workshop on Trust and Control in Agentic AI, January 2026.
BibTeX TR2026-020 PDF
- @inproceedings{Khattar2026jan,
- author = {Khattar, Vanshaj and Choudhury, Moumita and Rashid, Md Rafi Ur and Liu, Jing and Koike-Akino, Toshiaki and Jin, Ming and Wang, Ye},
- title = {{Amplification Effects in Test-Time Reinforcement Learning: Safety and Reasoning Vulnerabilities}},
- booktitle = {AAAI Workshop on Trust and Control in Agentic AI},
- year = 2026,
- month = jan,
- url = {https://www.merl.com/publications/TR2026-020}
- }
Fujihashi, T., Koike-Akino, T., "Quantum Implicit Neural Compression", Springer Nature, DOI: 10.1007/978-3-032-15931-1, Vol. 2813, pp. 60-69, January 2026.
BibTeX TR2026-022 PDF
- @article{Fujihashi2026jan,
- author = {Fujihashi, Takuya and Koike-Akino, Toshiaki},
- title = {{Quantum Implicit Neural Compression}},
- journal = {Springer Nature},
- year = 2026,
- volume = 2813,
- pages = {60--69},
- month = jan,
- doi = {10.1007/978-3-032-15931-1},
- issn = {1865-0937},
- isbn = {978-3-032-15931-1},
- url = {https://www.merl.com/publications/TR2026-022}
- }
Rashid, M.R.U., Dasu, V.A., Wang, Y., Tan, G.G., Mehnaz, S., "Chain-of-Thought Driven Adversarial Scenario Extrapolation for Robust Language Models", AAAI Conference on Artificial Intelligence, Sven Koenig, Chad Jenkins, Matthew E. Taylor, Eds., DOI: 10.1609/aaai.v40i44.41122, January 2026, vol. 40, pp. 37858-37866.
BibTeX TR2026-017 PDF
- @inproceedings{Rashid2026jan,
- author = {Rashid, Md Rafi Ur and Dasu, Vishnu Asutosh and Wang, Ye and Tan, G. Gary and Mehnaz, Shagufta},
- title = {{Chain-of-Thought Driven Adversarial Scenario Extrapolation for Robust Language Models}},
- booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence},
- year = 2026,
- editor = {Sven Koenig, Chad Jenkins, Matthew E. Taylor},
- volume = 40,
- number = 44,
- pages = {37858--37866},
- month = jan,
- publisher = {AAAI Press},
- doi = {10.1609/aaai.v40i44.41122},
- issn = {2159-5399},
- isbn = {978-1-57735-906-7},
- url = {https://www.merl.com/publications/TR2026-017}
- }
Jeon, C.-B., Wichern, G., Germain, F.G., Le Roux, J., "Embracing Cacophony: Explaining and Improving Random Mixing in Music Source Separation", IEEE Open Journal of Signal Processing, DOI: 10.1109/OJSP.2025.3633567, Vol. 6, pp. 1179-1192, January 2026.
BibTeX TR2026-012 PDF Software
- @article{Jeon2026jan,
- author = {Jeon, Chang-Bin and Wichern, Gordon and Germain, François G and {Le Roux}, Jonathan},
- title = {{Embracing Cacophony: Explaining and Improving Random Mixing in Music Source Separation}},
- journal = {IEEE Open Journal of Signal Processing},
- year = 2026,
- volume = 6,
- pages = {1179--1192},
- month = jan,
- doi = {10.1109/OJSP.2025.3633567},
- url = {https://www.merl.com/publications/TR2026-012}
- }
Wilkinghoff, K., Yang, H., Ebbers, J., Germain, F.G., Wichern, G., Le Roux, J., "Local Density-Based Anomaly Score Normalization for Domain Generalization", IEEE Transactions on Audio, Speech and Language Processing, DOI: 10.1109/TASLPRO.2025.3629236, Vol. 33, pp. 4642-4652, January 2026.
BibTeX TR2026-010 PDF Software
- @article{Wilkinghoff2026jan,
- author = {Wilkinghoff, Kevin and Yang, Haici and Ebbers, Janek and Germain, François G and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{Local Density-Based Anomaly Score Normalization for Domain Generalization}},
- journal = {IEEE Transactions on Audio, Speech and Language Processing},
- year = 2026,
- volume = 33,
- pages = {4642--4652},
- month = jan,
- doi = {10.1109/TASLPRO.2025.3629236},
- issn = {2998-4173},
- url = {https://www.merl.com/publications/TR2026-010}
- }
Cornell, S., Boeddeker, C., Park, T., Huang, H., Raj, D., Wiesner, M., Masuyama, Y., Chang, X., Wang, Z.-Q., Squartini, S., Garcia, P., Watanabe, S., "Recent Trends in Distant Conversational Speech Recognition: A Review of CHiME-7 and 8 DASR Challenges", Computer Speech & Language, DOI: 10.1016/j.csl.2025.101901, Vol. 97, pp. 101901, December 2025.
BibTeX TR2026-008 PDF
- @article{Cornell2025dec,
- author = {Cornell, Samuele and Boeddeker, Christoph and Park, Taejin and Huang, He and Raj, Desh and Wiesner, Matthew and Masuyama, Yoshiki and Chang, Xuankai and Wang, Zhong-Qiu and Squartini, Stefano and Garcia, Paola and Watanabe, Shinji},
- title = {{Recent Trends in Distant Conversational Speech Recognition: A Review of CHiME-7 and 8 DASR Challenges}},
- journal = {Computer Speech \& Language},
- year = 2025,
- volume = 97,
- pages = 101901,
- month = dec,
- doi = {10.1016/j.csl.2025.101901},
- url = {https://www.merl.com/publications/TR2026-008}
- }
Masuyama, Y., Wichern, G., Germain, F.G., Ick, C., Le Roux, J., "SuDaField: Subject- and Dataset-Aware Neural Field for HRTF Modeling", IEEE Open Journal of Signal Processing, DOI: 10.1109/OJSP.2025.3627073, Vol. 6, pp. 1169-1178, December 2025.
BibTeX TR2026-009 PDF Software
- @article{Masuyama2025dec2,
- author = {Masuyama, Yoshiki and Wichern, Gordon and Germain, François G and Ick, Christopher and {Le Roux}, Jonathan},
- title = {{SuDaField: Subject- and Dataset-Aware Neural Field for HRTF Modeling}},
- journal = {IEEE Open Journal of Signal Processing},
- year = 2025,
- volume = 6,
- pages = {1169--1178},
- month = dec,
- doi = {10.1109/OJSP.2025.3627073},
- url = {https://www.merl.com/publications/TR2026-009}
- }
Masuyama, Y., Wichern, G., Germain, F.G., Ick, C., Le Roux, J., "RANF: Neural Field-Based HRTF Spatial Upsampling with Retrieval Augmentation and Parameter Efficient Fine-Tuning", IEEE Open Journal of Signal Processing, DOI: 10.1109/OJSP.2025.3640517, Vol. 7, pp. 32-41, December 2025.
BibTeX TR2026-007 PDF Software
- @article{Masuyama2025dec,
- author = {Masuyama, Yoshiki and Wichern, Gordon and Germain, François G and Ick, Christopher and {Le Roux}, Jonathan},
- title = {{RANF: Neural Field-Based HRTF Spatial Upsampling with Retrieval Augmentation and Parameter Efficient Fine-Tuning}},
- journal = {IEEE Open Journal of Signal Processing},
- year = 2025,
- volume = 7,
- pages = {32--41},
- month = dec,
- doi = {10.1109/OJSP.2025.3640517},
- url = {https://www.merl.com/publications/TR2026-007}
- }
Hori, C., Masuyama, Y., Jain, S., Corcodel, R., Jha, D.K., Romeres, D., Le Roux, J., "Robot Confirmation Generation and Action Planning Using Long-context Q-Former Integrated with Multimodal LLM", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/ASRU65441.2025.11434641, December 2025.
BibTeX TR2025-167 PDF
- @inproceedings{Hori2025dec,
- author = {Hori, Chiori and Masuyama, Yoshiki and Jain, Siddarth and Corcodel, Radu and Jha, Devesh K. and Romeres, Diego and {Le Roux}, Jonathan},
- title = {{Robot Confirmation Generation and Action Planning Using Long-context Q-Former Integrated with Multimodal LLM}},
- booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
- year = 2025,
- month = dec,
- doi = {10.1109/ASRU65441.2025.11434641},
- issn = {2997-6995},
- isbn = {979-8-3315-4426-3},
- url = {https://www.merl.com/publications/TR2025-167}
- }