Publications

Li, D., Zhang, J., Egger, B., Chatterjee, M., Lohit, S., Marks, T.K., Cherian, A., "AssemblyBench: Physics-Aware Assembly of Complex Industrial Objects", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2026.
BibTeX TR2026-076 PDF Video Data Software
- @inproceedings{Li2026jun,
- author = {Li, Danrui and Zhang, Jiahao and Egger, Bernhard and Chatterjee, Moitreya and Lohit, Suhas and Marks, Tim K. and Cherian, Anoop},
- title = {{AssemblyBench: Physics-Aware Assembly of Complex Industrial Objects}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2026,
- month = jun,
- url = {https://www.merl.com/publications/TR2026-076}
- }
Liu, X., Miraldo, P., Lohit, S., Jiang, H., Sawada, N., Tai, Y.-W., Tang, C.-K., Chatterjee, M., "Point4Cast: Streaming Dynamic Scene Reconstruction and Forecasting", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2026.
BibTeX TR2026-077 PDF
- @inproceedings{Liu2026jun,
- author = {Liu, Xinhang and Miraldo, Pedro and Lohit, Suhas and Jiang, Huaizu and Sawada, Naoko and Tai, Yu-Wing and Tang, Chi-Keung and Chatterjee, Moitreya},
- title = {{Point4Cast: Streaming Dynamic Scene Reconstruction and Forecasting}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2026,
- month = jun,
- url = {https://www.merl.com/publications/TR2026-077}
- }
Ding, T., Xie, Y., Liang, Y., Chatterjee, M., Miraldo, P., Jiang, H., "LASER: Layer-wise Scale Alignment for Training-Free Streaming 4D Reconstruction", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), May 2026.
BibTeX TR2026-055 PDF
- @inproceedings{Ding2026may,
- author = {Ding, Tianye and Xie, Yiming and Liang, Yiqing and Chatterjee, Moitreya and Miraldo, Pedro and Jiang, Huaizu},
- title = {{LASER: Layer-wise Scale Alignment for Training-Free Streaming 4D Reconstruction}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2026,
- month = may,
- url = {https://www.merl.com/publications/TR2026-055}
- }
Moosa, I.M., Lohit, S., Wang, Y., Chatterjee, M., Yin, W., "Understanding Dynamic Compute Allocation in Recurrent Transformers", arXiv, February 2026.
BibTeX arXiv
- @article{Moosa2026feb,
- author = {Moosa, Ibraheem Muhammad and Lohit, Suhas and Wang, Ye and Chatterjee, Moitreya and Yin, Wenpeng},
- title = {{Understanding Dynamic Compute Allocation in Recurrent Transformers}},
- journal = {arXiv},
- year = 2026,
- month = feb,
- url = {https://arxiv.org/abs/2602.08864}
- }
Lai, Y.-H., Ebbers, J., Wang, Y.-C.F., Germain, F.G., Jones, M.J., Chatterjee, M., "UWAV: Uncertainty-weighted Weakly-supervised Audio-Visual Video Parsing", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), DOI: 10.1109/CVPR52734.2025.01266, June 2025, pp. 13561-13570.
BibTeX TR2025-072 PDF
- @inproceedings{Lai2025jun,
- author = {Lai, Yung-Hsuan and Ebbers, Janek and Wang, Yu-Chiang Frank and Germain, François G and Jones, Michael J. and Chatterjee, Moitreya},
- title = {{UWAV: Uncertainty-weighted Weakly-supervised Audio-Visual Video Parsing}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2025,
- pages = {13561--13570},
- month = jun,
- publisher = {IEEE},
- doi = {10.1109/CVPR52734.2025.01266},
- url = {https://www.merl.com/publications/TR2025-072}
- }
Sawada, N., Miraldo, P., Lohit, S., Marks, T.K., Chatterjee, M., "FreBIS: Frequency-Based Stratification for Neural Implicit Surface Representations", IEEE Conference on Computer Vision and Pattern Recognition Workshop (CVPR), DOI: 10.1109/CVPRW67362.2025.00041, June 2025, pp. 369-379.
BibTeX TR2025-074 PDF
- @inproceedings{Sawada2025jun,
- author = {Sawada, Naoko and Miraldo, Pedro and Lohit, Suhas and Marks, Tim K. and Chatterjee, Moitreya},
- title = {{FreBIS: Frequency-Based Stratification for Neural Implicit Surface Representations}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition Workshop (CVPR)},
- year = 2025,
- pages = {369--379},
- month = jun,
- doi = {10.1109/CVPRW67362.2025.00041},
- url = {https://www.merl.com/publications/TR2025-074}
- }
Singh, A., Jones, M.J., Peng, K.-C., Chatterjee, M., Cherian, A., Learned-Miller, E., "Improving Open-World Object Localization by Discovering Background", CVPR Workshop on Domain Generalization: Evolution, Breakthroughs and Future Horizon, May 2025, pp. 6449-6458.
BibTeX TR2025-058 PDF
- @inproceedings{Singh2025may,
- author = {Singh, Ashish and Jones, Michael J. and Peng, Kuan-Chuan and Chatterjee, Moitreya and Cherian, Anoop and Learned-Miller, Erik},
- title = {{Improving Open-World Object Localization by Discovering Background}},
- booktitle = {CVPR Workshop on Domain Generalization: Evolution, Breakthroughs and Future Horizon},
- year = 2025,
- pages = {6449--6458},
- month = may,
- url = {https://www.merl.com/publications/TR2025-058}
- }
Tang, H., Ellis, K., Lohit, S., Jones, M.J., Chatterjee, M., "Programmatic Video Prediction Using Large Language Models", International Conference on Learning Representations Workshops (ICLRW), April 2025.
BibTeX TR2025-049 PDF
- @inproceedings{Tang2025apr,
- author = {Tang, Hao and Ellis, Kevin and Lohit, Suhas and Jones, Michael J. and Chatterjee, Moitreya},
- title = {{Programmatic Video Prediction Using Large Language Models}},
- booktitle = {International Conference on Learning Representations Workshops (ICLRW)},
- year = 2025,
- month = apr,
- url = {https://www.merl.com/publications/TR2025-049}
- }
Pais, G., Piedade, V., Chatterjee, M., Greiff, M., Miraldo, P., "A Probability-guided Sampler for Neural Implicit Surface Rendering", European Conference on Computer Vision (ECCV), Leonardis, A. and Ricci, E. and Roth, S. and Russakovsky, O., Sattler, T. and Varol, G., Eds., DOI: 10.1007/978-3-031-72913-3_10, September 2024, pp. 164-182.
BibTeX TR2024-129 PDF Video
- @inproceedings{Pais2024sep,
- author = {Pais, Goncalo and Piedade, Valter and Chatterjee, Moitreya and Greiff, Marcus and Miraldo, Pedro},
- title = {{A Probability-guided Sampler for Neural Implicit Surface Rendering}},
- booktitle = {European Conference on Computer Vision (ECCV)},
- year = 2024,
- editor = {Leonardis, A. and Ricci, E. and Roth, S. and Russakovsky, O., Sattler, T. and Varol, G.},
- pages = {164--182},
- month = sep,
- publisher = {Springer, Cham},
- doi = {10.1007/978-3-031-72913-3_10},
- isbn = {978-3-031-72913-3},
- url = {https://www.merl.com/publications/TR2024-129}
- }
Liu, X., Tai, Y.-W., Tang, C.-K., Miraldo, P., Lohit, S., Chatterjee, M., "Gear-NeRF: Free-Viewpoint Rendering and Tracking with Motion-aware Spatio-Temporal Sampling", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), May 2024, pp. 19667-19679.
BibTeX TR2024-042 PDF Videos Software
- @inproceedings{Liu2024may,
- author = {Liu, Xinhang and Tai, Yu-wing and Tang, Chi-Keung and Miraldo, Pedro and Lohit, Suhas and Chatterjee, Moitreya},
- title = {{Gear-NeRF: Free-Viewpoint Rendering and Tracking with Motion-aware Spatio-Temporal Sampling}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2024,
- pages = {19667--19679},
- month = may,
- publisher = {IEEE},
- url = {https://www.merl.com/publications/TR2024-042}
- }
Liu, X., Paul, S., Chatterjee, M., Cherian, A., "CAVEN: An Embodied Conversational Agent for Efficient Audio-Visual Navigation in Noisy Environments", AAAI Conference on Artificial Intelligence, DOI: 10.1609/aaai.v38i4.28167, December 2023, pp. 3765-3773.
BibTeX TR2023-154 PDF
- @inproceedings{Liu2023dec2,
- author = {Liu, Xiulong and Paul, Sudipta and Chatterjee, Moitreya and Cherian, Anoop},
- title = {{CAVEN: An Embodied Conversational Agent for Efficient Audio-Visual Navigation in Noisy Environments}},
- booktitle = {Proceedings of the 38th AAAI Conference on Artificial Intelligence},
- year = 2023,
- pages = {3765--3773},
- month = dec,
- doi = {10.1609/aaai.v38i4.28167},
- url = {https://www.merl.com/publications/TR2023-154}
- }
Sharma, M., Chatterjee, M., Peng, K.-C., Lohit, S., Jones, M.J., "Tensor Factorization for Leveraging Cross-Modal Knowledge in Data-Constrained Infrared Object Detection", IEEE International Conference on Computer Vision Workshops (ICCV), October 2023, pp. 924-932.
BibTeX TR2023-125 PDF Presentation
- @inproceedings{Sharma2023oct,
- author = {Sharma, Manish and Chatterjee, Moitreya and Peng, Kuan-Chuan and Lohit, Suhas and Jones, Michael J.},
- title = {{Tensor Factorization for Leveraging Cross-Modal Knowledge in Data-Constrained Infrared Object Detection}},
- booktitle = {IEEE International Conference on Computer Vision Workshops (ICCV)},
- year = 2023,
- pages = {924--932},
- month = oct,
- url = {https://www.merl.com/publications/TR2023-125}
- }
Liu, X., Paul, S., Chatterjee, M., Cherian, A., "Active Sparse Conversations for Improved Audio-Visual Embodied Navigation", arXiv, June 2023.
BibTeX arXiv
- @inproceedings{Liu2023jun,
- author = {Liu, Xiulong and Paul, Sudipta and Chatterjee, Moitreya and Cherian, Anoop},
- title = {{Active Sparse Conversations for Improved Audio-Visual Embodied Navigation}},
- booktitle = {arXiv},
- year = 2023,
- month = jun,
- url = {https://arxiv.org/abs/2306.04047}
- }
Chatterjee, M., Ahuja, N., Cherian, A., "Learning Audio-Visual Dynamics Using Scene Graphs for Audio Source Separation", Advances in Neural Information Processing Systems (NeurIPS), November 2022.
BibTeX TR2022-140 PDF Presentation
- @inproceedings{Chatterjee2022nov,
- author = {Chatterjee, Moitreya and Ahuja, Narendra and Cherian, Anoop},
- title = {{Learning Audio-Visual Dynamics Using Scene Graphs for Audio Source Separation}},
- booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
- year = 2022,
- month = nov,
- url = {https://www.merl.com/publications/TR2022-140}
- }
Chatterjee, M., Ahuja, N., Cherian, A., "Quantifying Predictive Uncertainty for Stochastic Video Synthesis from Audio", IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW), June 2022.
BibTeX TR2022-082 PDF
- @inproceedings{Chatterjee2022jun,
- author = {Chatterjee, Moitreya and Ahuja, Narendra and Cherian, Anoop},
- title = {{Quantifying Predictive Uncertainty for Stochastic Video Synthesis from Audio}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)},
- year = 2022,
- month = jun,
- url = {https://www.merl.com/publications/TR2022-082}
- }
Chatterjee, M., Ahuja, N., Cherian, A., "A Hierarchical Variational Neural Uncertainty Model for Stochastic Video Prediction", IEEE International Conference on Computer Vision (ICCV), October 2021, pp. 9751-9761.
BibTeX TR2021-096 PDF Video
- @inproceedings{Chatterjee2021oct2,
- author = {Chatterjee, Moitreya and Ahuja, Narendra and Cherian, Anoop},
- title = {{A Hierarchical Variational Neural Uncertainty Model for Stochastic Video Prediction}},
- booktitle = {IEEE International Conference on Computer Vision (ICCV)},
- year = 2021,
- pages = {9751--9761},
- month = oct,
- url = {https://www.merl.com/publications/TR2021-096}
- }
Chatterjee, M., Le Roux, J., Ahuja, N., Cherian, A., "Visual Scene Graphs for Audio Source Separation", IEEE International Conference on Computer Vision (ICCV), October 2021, pp. 1204-1213.
BibTeX TR2021-095 PDF Video Software
- @inproceedings{Chatterjee2021oct,
- author = {Chatterjee, Moitreya and {Le Roux}, Jonathan and Ahuja, Narendra and Cherian, Anoop},
- title = {{Visual Scene Graphs for Audio Source Separation}},
- booktitle = {IEEE International Conference on Computer Vision (ICCV)},
- year = 2021,
- pages = {1204--1213},
- month = oct,
- publisher = {CVF},
- url = {https://www.merl.com/publications/TR2021-095}
- }
Geng, S., Gao, P., Chatterjee, M., Hori, C., Le Roux, J., Zhang, Y., Li, H., Cherian, A., "Dynamic Graph Representation Learning for Video Dialog via Multi-Modal Shuffled Transformers", AAAI Conference on Artificial Intelligence, February 2021, pp. 1415-1423.
BibTeX TR2021-010 PDF
- @inproceedings{Geng2021feb,
- author = {Geng, Shijie and Gao, Peng and Chatterjee, Moitreya and Hori, Chiori and {Le Roux}, Jonathan and Zhang, Yongfeng and Li, Hongsheng and Cherian, Anoop},
- title = {{Dynamic Graph Representation Learning for Video Dialog via Multi-Modal Shuffled Transformers}},
- booktitle = {AAAI Conference on Artificial Intelligence},
- year = 2021,
- pages = {1415--1423},
- month = feb,
- publisher = {AAAI Press, Palo Alto, California USA},
- isbn = {978-1-57735-866-4},
- url = {https://www.merl.com/publications/TR2021-010}
- }
Cherian, A., Chatterjee, M., Ahuja, N., "Sound2Sight: Generating Visual Dynamics from Sound and Context", European Conference on Computer Vision (ECCV), Vedaldi, A. and Bischof, H. and Brox, Th. and Frahm, J.-M., Eds., August 2020.
BibTeX TR2020-121 PDF Software
- @inproceedings{Cherian2020aug,
- author = {Cherian, Anoop and Chatterjee, Moitreya and Ahuja, Narendra},
- title = {{Sound2Sight: Generating Visual Dynamics from Sound and Context}},
- booktitle = {European Conference on Computer Vision (ECCV)},
- year = 2020,
- editor = {Vedaldi, A. and Bischof, H. and Brox, Th. and Frahm, J.-M.},
- month = aug,
- publisher = {Springer},
- url = {https://www.merl.com/publications/TR2020-121}
- }