Publications

Pais, G., Piedade, V., Chatterjee, M., Greiff, M., Miraldo, P., "A Probability-guided Sampler for Neural Implicit Surface Rendering", European Conference on Computer Vision (ECCV), Leonardis, A. and Ricci, E. and Roth, S. and Russakovsky, O., Sattler, T. and Varol, G., Eds., DOI: 10.1007/978-3-031-72913-3_10, September 2024, pp. 164-182.
BibTeX TR2024-129 PDF Video
- @inproceedings{Pais2024sep,
- author = {Pais, Goncalo and Piedade, Valter and Chatterjee, Moitreya and Greiff, Marcus and Miraldo, Pedro},
- title = {{A Probability-guided Sampler for Neural Implicit Surface Rendering}},
- booktitle = {European Conference on Computer Vision (ECCV)},
- year = 2024,
- editor = {Leonardis, A. and Ricci, E. and Roth, S. and Russakovsky, O., Sattler, T. and Varol, G.},
- pages = {164--182},
- month = sep,
- publisher = {Springer, Cham},
- doi = {10.1007/978-3-031-72913-3_10},
- isbn = {978-3-031-72913-3},
- url = {https://www.merl.com/publications/TR2024-129}
- }
Saijo, K., Wichern, G., Germain, F.G., Pan, Z., Le Roux, J., "TF-Locoformer: Transformer with Local Modeling by Convolution for Speech Separation and Enhancement", International Workshop on Acoustic Signal Enhancement (IWAENC), DOI: 10.1109/IWAENC61483.2024.10694313, September 2024, pp. 205-209.
BibTeX TR2024-126 PDF Software
- @inproceedings{Saijo2024sep2,
- author = {Saijo, Kohei and Wichern, Gordon and Germain, François G and Pan, Zexu and {Le Roux}, Jonathan},
- title = {{TF-Locoformer: Transformer with Local Modeling by Convolution for Speech Separation and Enhancement}},
- booktitle = {International Workshop on Acoustic Signal Enhancement (IWAENC)},
- year = 2024,
- pages = {205--209},
- month = sep,
- doi = {10.1109/IWAENC61483.2024.10694313},
- issn = {2835-3439},
- isbn = {979-8-3503-6185-8},
- url = {https://www.merl.com/publications/TR2024-126}
- }
Cherian, A., Jain, S., Marks, T.K., "Few-shot Transparent Instance Segmentation for Bin Picking", IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), September 2024, pp. 5009-5016.
BibTeX TR2024-127 PDF Video
- @inproceedings{Cherian2024sep,
- author = {Cherian, Anoop and Jain, Siddarth and Marks, Tim K.},
- title = {{Few-shot Transparent Instance Segmentation for Bin Picking}},
- booktitle = {2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)},
- year = 2024,
- pages = {5009--5016},
- month = sep,
- publisher = {IEEE},
- url = {https://www.merl.com/publications/TR2024-127}
- }
Yin, J., Luo, A., Du, Y., Cherian, A., Marks, T.K., Le Roux, J., Gan, C., "Disentangled Acoustic Fields For Multimodal Physical Scene Understanding", IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS), September 2024, pp. 557-564.
BibTeX TR2024-125 PDF
- @inproceedings{Yin2024sep,
- author = {Yin, Jie and Luo, Andrew and Du, Yilun and Cherian, Anoop and Marks, Tim K. and {Le Roux}, Jonathan and Gan, Chuang},
- title = {{Disentangled Acoustic Fields For Multimodal Physical Scene Understanding}},
- booktitle = {2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)},
- year = 2024,
- pages = {557--564},
- month = sep,
- publisher = {IEEE},
- url = {https://www.merl.com/publications/TR2024-125}
- }
Roque, P., Miraldo, P., Dimarogonas, D., "Multi-Agent Formation Control using Epipolar Constraints", IEEE Robotics and Automation Letters, DOI: 10.1109/LRA.2024.3444690, Vol. 9, No. 12, pp. 11002-11009, September 2024.
BibTeX TR2024-147 PDF
- @article{Roque2024sep,
- author = {Roque, Pedro and Miraldo, Pedro and Dimarogonas, Dimos},
- title = {{Multi-Agent Formation Control using Epipolar Constraints}},
- journal = {IEEE Robotics and Automation Letters},
- year = 2024,
- volume = 9,
- number = 12,
- pages = {11002--11009},
- month = sep,
- doi = {10.1109/LRA.2024.3444690},
- issn = {2377-3766},
- url = {https://www.merl.com/publications/TR2024-147}
- }
Bahrman, L., Fontaine, M., Le Roux, J., Richard, G., "Speech Dereverberation Constrained on Room Impulse Response Characteristics", Interspeech, DOI: 10.21437/Interspeech.2024-1173, September 2024, pp. 622-626.
BibTeX TR2024-121 PDF
- @inproceedings{Bahrman2024sep,
- author = {Bahrman, Louis and Fontaine, Mathieu and {Le Roux}, Jonathan and Richard, Gaël},
- title = {{Speech Dereverberation Constrained on Room Impulse Response Characteristics}},
- booktitle = {Interspeech},
- year = 2024,
- pages = {622--626},
- month = sep,
- doi = {10.21437/Interspeech.2024-1173},
- issn = {2958-1796},
- url = {https://www.merl.com/publications/TR2024-121}
- }
Ebbers, J., Germain, F.G., Wichern, G., Le Roux, J., "Sound Event Bounding Boxes", Interspeech, DOI: 10.21437/Interspeech.2024-2075, September 2024, pp. 562-566.
BibTeX TR2024-118 PDF Software
- @inproceedings{Ebbers2024sep,
- author = {Ebbers, Janek and Germain, François G and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{Sound Event Bounding Boxes}},
- booktitle = {Interspeech},
- year = 2024,
- pages = {562--566},
- month = sep,
- doi = {10.21437/Interspeech.2024-2075},
- issn = {2958-1796},
- url = {https://www.merl.com/publications/TR2024-118}
- }
Khurana, S., Hori, C., Laurent, A., Wichern, G., Le Roux, J., "ZeroST: Zero-Shot Speech Translation", Interspeech, DOI: 10.21437/Interspeech.2024-1088, September 2024, pp. 392-396.
BibTeX TR2024-122 PDF
- @inproceedings{Khurana2024sep,
- author = {Khurana, Sameer and Hori, Chiori and Laurent, Antoine and Wichern, Gordon and {Le Roux}, Jonathan},
- title = {{ZeroST: Zero-Shot Speech Translation}},
- booktitle = {Interspeech},
- year = 2024,
- pages = {392--396},
- month = sep,
- doi = {10.21437/Interspeech.2024-1088},
- issn = {2958-1796},
- url = {https://www.merl.com/publications/TR2024-122}
- }
Pan, Z., Wichern, G., Germain, F.G., Saijo, K., Le Roux, J., "PARIS: Pseudo-AutoRegressIve Siamese Training for Online Speech Separation", Interspeech, DOI: 10.21437/Interspeech.2024-1066, September 2024, pp. 582-586.
BibTeX TR2024-124 PDF
- @inproceedings{Pan2024sep,
- author = {Pan, Zexu and Wichern, Gordon and Germain, François G and Saijo, Kohei and {Le Roux}, Jonathan},
- title = {{PARIS}: Pseudo-AutoRegressIve Siamese Training for Online Speech Separation},
- booktitle = {Interspeech},
- year = 2024,
- pages = {582--586},
- month = sep,
- doi = {10.21437/Interspeech.2024-1066},
- issn = {2958-1796},
- url = {https://www.merl.com/publications/TR2024-124}
- }
Saijo, K., Wichern, G., Germain, F.G., Pan, Z., Le Roux, J., "Enhanced Reverberation as Supervision for Unsupervised Speech Separation", Interspeech, DOI: 10.21437/Interspeech.2024-1241, September 2024, pp. 607-611.
BibTeX TR2024-116 PDF Software
- @inproceedings{Saijo2024sep,
- author = {Saijo, Kohei and Wichern, Gordon and Germain, François G and Pan, Zexu and {Le Roux}, Jonathan},
- title = {{Enhanced Reverberation as Supervision for Unsupervised Speech Separation}},
- booktitle = {Interspeech},
- year = 2024,
- pages = {607--611},
- month = sep,
- doi = {10.21437/Interspeech.2024-1241},
- issn = {2958-1796},
- url = {https://www.merl.com/publications/TR2024-116}
- }
Koike-Akino, T., Meng Chang, , Cevher, V., De Micheli, G., "Hardware-Efficient Quantization for Green Custom Foundation Models", International Conference on Machine Learning (ICML), July 2024.
BibTeX TR2024-105 PDF Presentation
- @inproceedings{Koike-Akino2024jul2,
- author = {Koike-Akino, Toshiaki and Meng Chang and Cevher, Volkan and De Micheli, Giovanni},
- title = {{Hardware-Efficient Quantization for Green Custom Foundation Models}},
- booktitle = {International Conference on Machine Learning (ICML)},
- year = 2024,
- month = jul,
- url = {https://www.merl.com/publications/TR2024-105}
- }
Liu, J., Lowy, A., Koike-Akino, T., Parsons, K., Wang, Y., "Efficient Differentially Private Fine-Tuning of Diffusion Models", International Conference on Machine Learning (ICML) workshop (Next Generation of AI Safety), July 2024.
BibTeX TR2024-104 PDF Presentation
- @inproceedings{Liu2024jul,
- author = {Liu, Jing and Lowy, Andrew and Koike-Akino, Toshiaki and Parsons, Kieran and Wang, Ye},
- title = {{Efficient Differentially Private Fine-Tuning of Diffusion Models}},
- booktitle = {International Conference on Machine Learning (ICML) workshop (Next Generation of AI Safety)},
- year = 2024,
- month = jul,
- url = {https://www.merl.com/publications/TR2024-104}
- }
Bimbraw, K., Liu, J., Wang, Y., Koike-Akino, T., "Random Channel Ablation for Robust Hand Gesture Classification with Multimodal Biosignals", International Conference of the IEEE Engineering in Medicine and Biology Society, DOI: 10.1109/EMBC53108.2024.10782851, July 2024.
BibTeX TR2024-103 PDF
- @inproceedings{Bimbraw2024jul3,
- author = {Bimbraw, Keshav and Liu, Jing and Wang, Ye and Koike-Akino, Toshiaki},
- title = {{Random Channel Ablation for Robust Hand Gesture Classification with Multimodal Biosignals}},
- booktitle = {International Conference of the IEEE Engineering in Medicine and Biology Society},
- year = 2024,
- month = jul,
- publisher = {IEEE},
- doi = {10.1109/EMBC53108.2024.10782851},
- issn = {2694-0604},
- isbn = {979-8-3503-7149-9},
- url = {https://www.merl.com/publications/TR2024-103}
- }
Koike-Akino, T., Cevher, V., "Quantum-PEFT: Ultra Parameter-Efficient Fine-Tuning", International Conference on Machine Learning (ICML), July 2024.
BibTeX TR2024-101 PDF Presentation
- @inproceedings{Koike-Akino2024jul,
- author = {Koike-Akino, Toshiaki and Cevher, Volkan},
- title = {{Quantum-PEFT: Ultra Parameter-Efficient Fine-Tuning}},
- booktitle = {International Conference on Machine Learning (ICML)},
- year = 2024,
- month = jul,
- url = {https://www.merl.com/publications/TR2024-101}
- }
Zhang, X., Mao, W., Mowlavi, S., Benosman, M., Basar, T., "Controlgym: Large-Scale Control Environments for Benchmarking Reinforcement Learning Algorithms", Learning for Dynamics & Control Conference (L4DC), July 2024, pp. 181-196.
BibTeX TR2024-098 PDF
- @inproceedings{Zhang2024jul2,
- author = {Zhang, Xiangyuan and Mao, Weichao and Mowlavi, Saviz and Benosman, Mouhacine and Basar, Tamer},
- title = {{Controlgym: Large-Scale Control Environments for Benchmarking Reinforcement Learning Algorithms}},
- booktitle = {Learning for Dynamics \& Control Conference (L4DC)},
- year = 2024,
- pages = {181--196},
- month = jul,
- publisher = {PMLR},
- url = {https://www.merl.com/publications/TR2024-098}
- }
Chen, X., Liu, J., Wang, Y., Wang, P., Brand, M., Wang, G., Koike-Akino, T., "SuperLoRA: Parameter-Efficient Unified Adaptation for Large Vision Models", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), DOI: 10.1109/CVPRW63382.2024.00804, June 2024, pp. 8050-8055.
BibTeX TR2024-062 PDF Presentation
- @inproceedings{Chen2024jun,
- author = {Chen, Xiangyu and Liu, Jing and Wang, Ye and Wang, Pu and Brand, Matthew and Wang, Guanghui and Koike-Akino, Toshiaki},
- title = {{SuperLoRA: Parameter-Efficient Unified Adaptation for Large Vision Models}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2024,
- pages = {8050--8055},
- month = jun,
- publisher = {IEEE},
- doi = {10.1109/CVPRW63382.2024.00804},
- url = {https://www.merl.com/publications/TR2024-062}
- }
Ho, C.-H., Peng, K.-C., Vasconcelos, N., "Long-Tailed Anomaly Detection with Learnable Class Names", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Farhadi, A. and Crandall, D. and Sato, I. and Wu, J. and Pless, R. and Akata, Z., Eds., DOI: 10.1109/CVPR52733.2024.01182, June 2024, pp. 12435-12446.
BibTeX TR2024-040 PDF Video Data Presentation
- @inproceedings{Ho2024jun,
- author = {Ho, Chih-Hui and Peng, Kuan-Chuan and Vasconcelos, Nuno},
- title = {{Long-Tailed Anomaly Detection with Learnable Class Names}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2024,
- editor = {Farhadi, A. and Crandall, D. and Sato, I. and Wu, J. and Pless, R. and Akata, Z.},
- pages = {12435--12446},
- month = jun,
- publisher = {IEEE},
- doi = {10.1109/CVPR52733.2024.01182},
- issn = {2575-7075},
- isbn = {979-8-3503-5300-6},
- url = {https://www.merl.com/publications/TR2024-040}
- }
Ni, H., Egger, B., Lohit, S., Cherian, A., Wang, Y., Koike-Akino, T., Huang, S.X., Marks, T.K., "TI2V-Zero: Zero-Shot Image Conditioning for Text-to-Video Diffusion Models", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2024, pp. 9015-9025.
BibTeX TR2024-059 PDF Video Software Presentation
- @inproceedings{Ni2024jun,
- author = {Ni, Haomiao and Egger, Bernhard and Lohit, Suhas and Cherian, Anoop and Wang, Ye and Koike-Akino, Toshiaki and Huang, Sharon X. and Marks, Tim K.},
- title = {{TI2V-Zero: Zero-Shot Image Conditioning for Text-to-Video Diffusion Models}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2024,
- pages = {9015--9025},
- month = jun,
- url = {https://www.merl.com/publications/TR2024-059}
- }
Dey, R., Egger, B., Boddeti, V., Wang, Y., Marks, T.K., "CoLa-SDF: Controllable Latent StyleSDF for Disentangled 3D Face Generation", IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW), DOI: 10.1109/CVPRW63382.2024.00291, June 2024, pp. 2852-2861.
BibTeX TR2024-045 PDF
- @inproceedings{Dey2024apr,
- author = {Dey, Rahul and Egger, Bernhard and Boddeti, Vishnu and Wang, Ye and Marks, Tim K.},
- title = {{CoLa-SDF: Controllable Latent StyleSDF for Disentangled 3D Face Generation}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)},
- year = 2024,
- pages = {2852--2861},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/CVPRW63382.2024.00291},
- isbn = {979-8-3503-6547-4},
- url = {https://www.merl.com/publications/TR2024-045}
- }
Giammarino, V., Queeney, J., Paschalidis, I.C., "Adversarial Imitation Learning from Visual Observations using Latent Information", Transactions on Machine Learning Research (TMLR), June 2024.
BibTeX TR2024-068 PDF
- @article{Giammarino2024jun,
- author = {Giammarino, Vittorio and Queeney, James and Paschalidis, Ioannis Ch.},
- title = {{Adversarial Imitation Learning from Visual Observations using Latent Information}},
- journal = {Transactions on Machine Learning Research (TMLR)},
- year = 2024,
- month = jun,
- issn = {2835-8856},
- url = {https://www.merl.com/publications/TR2024-068}
- }
He, Y., Cherian, A., Wichern, G., Markham, A., "Deep Neural Room Acoustics Primitive", International Conference on Machine Learning (ICML), June 2024, pp. 17842-17857.
BibTeX TR2024-072 PDF
- @inproceedings{He2024jun,
- author = {He, Yuhang and Cherian, Anoop and Wichern, Gordon and Markham, Andrew},
- title = {{Deep Neural Room Acoustics Primitive}},
- booktitle = {International Conference on Machine Learning (ICML)},
- year = 2024,
- pages = {17842--17857},
- month = jun,
- url = {https://www.merl.com/publications/TR2024-072}
- }
Kambara, M., Hori, C., Sugiura, K., Ota, K., Jha, D.K., Khurana, S., Jain, S., Corcodel, R., Romeres, D., Le Roux, J., "Human Action Understanding-based Robot Planning using Multimodal LLM", IEEE International Conference on Robotics and Automation (ICRA), June 2024.
BibTeX TR2024-066 PDF
- @inproceedings{Kambara2024jun,
- author = {Kambara, Motonari and Hori, Chiori and Sugiura, Komei and Ota, Kei and Jha, Devesh K. and Khurana, Sameer and Jain, Siddarth and Corcodel, Radu and Romeres, Diego and {Le Roux}, Jonathan},
- title = {{Human Action Understanding-based Robot Planning using Multimodal LLM}},
- booktitle = {IEEE International Conference on Robotics and Automation (ICRA) Workshop},
- year = 2024,
- month = jun,
- url = {https://www.merl.com/publications/TR2024-066}
- }
Liu, X., Tai, Y.-W., Tang, C.-K., Miraldo, P., Lohit, S., Chatterjee, M., "Gear-NeRF: Free-Viewpoint Rendering and Tracking with Motion-aware Spatio-Temporal Sampling", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), May 2024, pp. 19667-19679.
BibTeX TR2024-042 PDF Videos Software
- @inproceedings{Liu2024may,
- author = {Liu, Xinhang and Tai, Yu-wing and Tang, Chi-Keung and Miraldo, Pedro and Lohit, Suhas and Chatterjee, Moitreya},
- title = {{Gear-NeRF: Free-Viewpoint Rendering and Tracking with Motion-aware Spatio-Temporal Sampling}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2024,
- pages = {19667--19679},
- month = may,
- publisher = {IEEE},
- url = {https://www.merl.com/publications/TR2024-042}
- }
Singh, A., Jones, M.J., Learned-Miller, E., "Tracklet-based Explainable Video Anomaly Localization", IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshops, May 2024, pp. 3992-4001.
BibTeX TR2024-057 PDF
- @inproceedings{Singh2024may,
- author = {Singh, Ashish and Jones, Michael J. and Learned-Miller, Erik},
- title = {{Tracklet-based Explainable Video Anomaly Localization}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
- year = 2024,
- pages = {3992--4001},
- month = may,
- url = {https://www.merl.com/publications/TR2024-057}
- }
Schperberg, A., Tanaka, Y., Mowlavi, S., Xu, F., Balaji, B., Hong, D., "OptiState: State Estimation of Legged Robots using Gated Networks with Transformer-based Vision and Kalman Filtering", IEEE International Conference on Robotics and Automation (ICRA), May 2024.
BibTeX TR2024-054 PDF
- @inproceedings{Schperberg2024may,
- author = {Schperberg, Alexander and Tanaka, Yusuke and Mowlavi, Saviz and Xu, Feng and Balaji, Bharathan and Hong, Dennis},
- title = {{OptiState: State Estimation of Legged Robots using Gated Networks with Transformer-based Vision and Kalman Filtering}},
- booktitle = {IEEE International Conference on Robotics and Automation (ICRA)},
- year = 2024,
- month = may,
- publisher = {IEEE},
- url = {https://www.merl.com/publications/TR2024-054}
- }