Publications

Zhu, X., Jha, D.K., Romeres, D., Sun, L., Tomizuka, M., Cherian, A., "Multi-level Reasoning for Robotic Assembly: From Sequence Inference to Contact Selection", IEEE International Conference on Robotics and Automation (ICRA), March 2024, pp. 816-823.
BibTeX TR2024-033 PDF Video
- @inproceedings{Zhu2024mar,
- author = {Zhu, Xinghao and Jha, Devesh K. and Romeres, Diego and Sun, Lingfeng and Tomizuka, Masayoshi and Cherian, Anoop},
- title = {{Multi-level Reasoning for Robotic Assembly: From Sequence Inference to Contact Selection}},
- booktitle = {IEEE International Conference on Robotics and Automation (ICRA)},
- year = 2024,
- pages = {816--823},
- month = mar,
- publisher = {IEEE},
- url = {https://www.merl.com/publications/TR2024-033}
- }
Hori, C., Wang, P., Rahman, M., Vaca-Rubio, C., Khurana, S., Cherian, A., Le Roux, J., "Wi-Fi based Indoor Monitoring Enhanced by Multimodal Fusion", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP48485.2024.10447600, March 2024, pp. 13296-13300.
BibTeX TR2024-012 PDF
- @inproceedings{Hori2024mar,
- author = {Hori, Chiori and Wang, Pu and Rahman, Mahbub and Vaca-Rubio, Cristian and Khurana, Sameer and Cherian, Anoop and {Le Roux}, Jonathan},
- title = {{Wi-Fi based Indoor Monitoring Enhanced by Multimodal Fusion}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2024,
- pages = {13296--13300},
- month = mar,
- publisher = {IEEE},
- doi = {10.1109/ICASSP48485.2024.10447600},
- issn = {2379-190X},
- isbn = {979-8-3503-4485-1},
- url = {https://www.merl.com/publications/TR2024-012}
- }
Carmichael, Z., Jones, M.J., Lohit, S., Cherian, A., Scheirer, W., "Pixel-Grounded Prototypical Part Networks", IEEE Winter Conference on Applications of Computer Vision (WACV), DOI: 10.1109/WACV57701.2024.00470, January 2024.
BibTeX TR2024-002 PDF Video Software Presentation
- @inproceedings{Carmichael2024jan,
- author = {{Carmichael, Zachariah and Jones, Michael J. and Lohit, Suhas and Cherian, Anoop and Scheirer, Walter}},
- title = {{Pixel-Grounded Prototypical Part Networks}},
- booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
- year = 2024,
- month = jan,
- doi = {10.1109/WACV57701.2024.00470},
- url = {https://www.merl.com/publications/TR2024-002}
- }
Liu, X., Paul, S., Chatterjee, M., Cherian, A., "CAVEN: An Embodied Conversational Agent for Efficient Audio-Visual Navigation in Noisy Environments", AAAI Conference on Artificial Intelligence, DOI: 10.1609/aaai.v38i4.28167, December 2023, pp. 3765-3773.
BibTeX TR2023-154 PDF
- @inproceedings{Liu2023dec2,
- author = {Liu, Xiulong and Paul, Sudipta and Chatterjee, Moitreya and Cherian, Anoop},
- title = {{CAVEN: An Embodied Conversational Agent for Efficient Audio-Visual Navigation in Noisy Environments}},
- booktitle = {Proceedings of the 38th AAAI Conference on Artificial Intelligence},
- year = 2023,
- pages = {3765--3773},
- month = dec,
- doi = {10.1609/aaai.v38i4.28167},
- url = {https://www.merl.com/publications/TR2023-154}
- }
He, Y., Shin, S., Cherian, A., Markham, A., Trigon, N., "Sound3DVDet: 3D Sound Source Detection using Multiview Microphone Array and RGB Images", IEEE Winter Conference on Applications of Computer Vision (WACV), December 2023, pp. 5496-5507.
BibTeX TR2023-144 PDF
- @inproceedings{He2023dec,
- author = {He, Yuhang and Shin, Sangyun and Cherian, Anoop and Markham, Andrew and Trigon, Niki},
- title = {{Sound3DVDet: 3D Sound Source Detection using Multiview Microphone Array and RGB Images}},
- booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
- year = 2023,
- pages = {5496--5507},
- month = dec,
- url = {https://www.merl.com/publications/TR2023-144}
- }
Nair, N.G., Cherian, A., Lohit, S., Wang, Y., Koike-Akino, T., Patel, V.M., Marks, T.K., "Steered Diffusion: A Generalized Framework for Plug-and-Play Conditional Image Synthesis", IEEE International Conference on Computer Vision (ICCV), October 2023, pp. 20850-20860.
BibTeX TR2023-126 PDF Software Presentation
- @inproceedings{Nair2023sep,
- author = {Nair, Nithin Gopalakrishnan and Cherian, Anoop and Lohit, Suhas and Wang, Ye and Koike-Akino, Toshiaki and Patel, Vishal M. and Marks, Tim K.},
- title = {{Steered Diffusion: A Generalized Framework for Plug-and-Play Conditional Image Synthesis}},
- booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision},
- year = 2023,
- pages = {20850--20860},
- month = oct,
- publisher = {IEEE/CVF},
- url = {https://www.merl.com/publications/TR2023-126}
- }
Liu, X., Paul, S., Chatterjee, M., Cherian, A., "Active Sparse Conversations for Improved Audio-Visual Embodied Navigation", arXiv, June 2023.
BibTeX arXiv
- @inproceedings{Liu2023jun,
- author = {Liu, Xiulong and Paul, Sudipta and Chatterjee, Moitreya and Cherian, Anoop},
- title = {{Active Sparse Conversations for Improved Audio-Visual Embodied Navigation}},
- booktitle = {arXiv},
- year = 2023,
- month = jun,
- url = {https://arxiv.org/abs/2306.04047}
- }
Cherian, A., Jain, S., Marks, T.K., Sullivan, A., "Discriminative 3D Shape Modeling for Few-Shot Instance Segmentation", IEEE International Conference on Robotics and Automation (ICRA), DOI: 10.1109/ICRA48891.2023.10160644, May 2023, pp. 9296-9302.
BibTeX TR2023-010 PDF Presentation
- @inproceedings{Cherian2023may,
- author = {Cherian, Anoop and Jain, Siddarth and Marks, Tim K. and Sullivan, Alan},
- title = {{Discriminative 3D Shape Modeling for Few-Shot Instance Segmentation}},
- booktitle = {IEEE International Conference on Robotics and Automation (ICRA)},
- year = 2023,
- pages = {9296--9302},
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICRA48891.2023.10160644},
- url = {https://www.merl.com/publications/TR2023-010}
- }
Ota, K., Tung, H.-Y., Smith, K., Cherian, A., Marks, T.K., Sullivan, A., Kanezaki, A., Tenenbaum, J.B., "H-SAUR: Hypothesize, Simulate, Act, Update, and Repeat for Understanding Object Articulations from Interactions", IEEE International Conference on Robotics and Automation (ICRA), DOI: 10.1109/ICRA48891.2023.10160575, May 2023, pp. 7272-7278.
BibTeX TR2023-009 PDF
- @inproceedings{Ota2023may,
- author = {Ota, Kei and Tung, Hsiao-Yu and Smith, Kevin and Cherian, Anoop and Marks, Tim K. and Sullivan, Alan and Kanezaki, Asako and Tenenbaum, Joshua B.},
- title = {{H-SAUR: Hypothesize, Simulate, Act, Update, and Repeat for Understanding Object Articulations from Interactions}},
- booktitle = {IEEE International Conference on Robotics and Automation (ICRA)},
- year = 2023,
- pages = {7272--7278},
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICRA48891.2023.10160575},
- url = {https://www.merl.com/publications/TR2023-009}
- }
Shah, A., Roy, A., Shah, K., Mishra, S.K., Jacobs, D., Cherian, A., Chellappa, R., "HaLP: Hallucinating Latent Positives for Skeleton-based Self-Supervised Learning of Actions", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), May 2023, pp. 18846-18856.
BibTeX TR2023-035 PDF
- @inproceedings{Shah2023may,
- author = {Shah, Anshul and Roy, Aniket and Shah, Ketul and Mishra, Shlok Kumar and Jacobs, David and Cherian, Anoop and Chellappa, Rama},
- title = {{HaLP: Hallucinating Latent Positives for Skeleton-based Self-Supervised Learning of Actions}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2023,
- pages = {18846--18856},
- month = may,
- publisher = {CVF},
- url = {https://www.merl.com/publications/TR2023-035}
- }
Zhang, J., Cherian, A., Liu, Y., Shabat, I.B., Rodriguez, C., Gould, S., "Aligning Step-by-Step Instructional Diagrams to Video Demonstrations", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), May 2023, pp. 2483-2492.
BibTeX TR2023-034 PDF
- @inproceedings{Zhang2023may,
- author = {Zhang, Jiahao and Cherian, Anoop and Liu, Yanbin and Shabat, Itzik Ben and Rodriguez, Cristian and Gould, Stephen},
- title = {{Aligning Step-by-Step Instructional Diagrams to Video Demonstrations}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2023,
- pages = {2483--2492},
- month = may,
- publisher = {CVF},
- url = {https://www.merl.com/publications/TR2023-034}
- }
Cherian, A., Peng, K.-C., Lohit, S., Smith, K., Tenenbaum, J.B., "Are Deep Neural Networks SMARTer than Second Graders?", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), March 2023, pp. 10834-10844.
BibTeX TR2023-014 PDF Video Data Software Presentation
- @inproceedings{Cherian2023mar,
- author = {Cherian, Anoop and Peng, Kuan-Chuan and Lohit, Suhas and Smith, Kevin and Tenenbaum, Joshua B.},
- title = {{Are Deep Neural Networks SMARTer than Second Graders?}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2023,
- pages = {10834--10844},
- month = mar,
- publisher = {CVF},
- url = {https://www.merl.com/publications/TR2023-014}
- }
Liu, T., Cherian, A., "Learning a Constrained Optimizer: A Primal Method", AAAI Bridge on Constraint Programming and Machine Learning, January 2023.
BibTeX TR2023-003 PDF
- @inproceedings{Liu2023jan,
- author = {Liu, Tao and Cherian, Anoop},
- title = {{Learning a Constrained Optimizer: A Primal Method}},
- booktitle = {AAAI Bridge on Constraint Programming and Machine Learning},
- year = 2023,
- month = jan,
- url = {https://www.merl.com/publications/TR2023-003}
- }
Chatterjee, M., Ahuja, N., Cherian, A., "Learning Audio-Visual Dynamics Using Scene Graphs for Audio Source Separation", Advances in Neural Information Processing Systems (NeurIPS), November 2022.
BibTeX TR2022-140 PDF Presentation
- @inproceedings{Chatterjee2022nov,
- author = {Chatterjee, Moitreya and Ahuja, Narendra and Cherian, Anoop},
- title = {{Learning Audio-Visual Dynamics Using Scene Graphs for Audio Source Separation}},
- booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
- year = 2022,
- month = nov,
- url = {https://www.merl.com/publications/TR2022-140}
- }
Paul, S., Roy Chowdhury, A.K., Cherian, A., "AVLEN: Audio-Visual-Language Embodied Navigation in 3D Environments", Advances in Neural Information Processing Systems (NeurIPS), October 2022, pp. 6236-6249.
BibTeX TR2022-131 PDF Video Data Software
- @inproceedings{Paul2022oct2,
- author = {Paul, Sudipta and Roy Chowdhury, Amit K and Cherian, Anoop},
- title = {{AVLEN: Audio-Visual-Language Embodied Navigation in 3D Environments}},
- booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
- year = 2022,
- pages = {6236--6249},
- month = oct,
- url = {https://www.merl.com/publications/TR2022-131}
- }
Chatterjee, M., Ahuja, N., Cherian, A., "Quantifying Predictive Uncertainty for Stochastic Video Synthesis from Audio", IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW), June 2022.
BibTeX TR2022-082 PDF
- @inproceedings{Chatterjee2022jun,
- author = {Chatterjee, Moitreya and Ahuja, Narendra and Cherian, Anoop},
- title = {{Quantifying Predictive Uncertainty for Stochastic Video Synthesis from Audio}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)},
- year = 2022,
- month = jun,
- url = {https://www.merl.com/publications/TR2022-082}
- }
Shah, A.P., Geng, S., Gao, P., Cherian, A., Hori, T., Marks, T.K., Le Roux, J., Hori, C., "Audio-Visual Scene-Aware Dialog and Reasoning Using Audio-Visual Transformers with Joint Student-Teacher Learning", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), April 2022, pp. 7732-7736.
BibTeX TR2022-019 PDF
- @inproceedings{Shah2022apr,
- author = {Shah, Ankit Parag and Geng, Shijie and Gao, Peng and Cherian, Anoop and Hori, Takaaki and Marks, Tim K. and {Le Roux}, Jonathan and Hori, Chiori},
- title = {{Audio-Visual Scene-Aware Dialog and Reasoning Using Audio-Visual Transformers with Joint Student-Teacher Learning}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2022,
- pages = {7732--7736},
- month = apr,
- publisher = {IEEE},
- issn = {1520-6149},
- isbn = {978-1-6654-0540-9},
- url = {https://www.merl.com/publications/TR2022-019}
- }
Hori, C., Shah, A.P., Geng, S., Gao, P., Cherian, A., Hori, T., Le Roux, J., Marks, T.K., "Overview of Audio Visual Scene-Aware Dialog with Reasoning Track for Natural Language Generation in DSTC10", Dialog System Technology Challenge Workshop at AAAI, February 2022.
BibTeX TR2022-016 PDF
- @inproceedings{Hori2022feb,
- author = {{Hori, Chiori and Shah, Ankit Parag and Geng, Shijie and Gao, Peng and Cherian, Anoop and Hori, Takaaki and Le Roux, Jonathan and Marks, Tim K.}},
- title = {{Overview of Audio Visual Scene-Aware Dialog with Reasoning Track for Natural Language Generation in DSTC10}},
- booktitle = {Dialog System Technology Challenge Workshop at AAAI},
- year = 2022,
- month = feb,
- url = {https://www.merl.com/publications/TR2022-016}
- }
Cherian, A., Hori, C., Marks, T.K., Le Roux, J., "(2.5+1)D Spatio-Temporal Scene Graphs for Video Question Answering", AAAI Conference on Artificial Intelligence, DOI: 10.1609/aaai.v36i1.19922, February 2022, pp. 444-453.
BibTeX TR2022-014 PDF Video Presentation
- @inproceedings{Cherian2022feb,
- author = {Cherian, Anoop and Hori, Chiori and Marks, Tim K. and {Le Roux}, Jonathan},
- title = {{(2.5+1)D Spatio-Temporal Scene Graphs for Video Question Answering}},
- booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence},
- year = 2022,
- pages = {444--453},
- month = feb,
- doi = {10.1609/aaai.v36i1.19922},
- url = {https://www.merl.com/publications/TR2022-014}
- }
Shah, A., Sra, S., Chellappa, R., Cherian, A., "Max-Margin Contrastive Learning", AAAI Conference on Artificial Intelligence, DOI: 10.1609/aaai.v36i8.20796, February 2022, pp. 8220-8230.
BibTeX TR2022-013 PDF
- @inproceedings{Shah2022feb,
- author = {Shah, Anshul and Sra, Suvrit and Chellappa, Rama and Cherian, Anoop},
- title = {{Max-Margin Contrastive Learning}},
- booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence},
- year = 2022,
- pages = {8220--8230},
- month = feb,
- doi = {10.1609/aaai.v36i8.20796},
- url = {https://www.merl.com/publications/TR2022-013}
- }
Medin, S.C., Egger, B., Cherian, A., Wang, Y., Tenenbaum, J.B., Liu, X., Marks, T.K., "MOST-GAN: 3D Morphable StyleGAN for Disentangled Face Image Manipulation", AAAI Conference on Artificial Intelligence, DOI: 10.1609/aaai.v36i2.20091, February 2022, pp. 1962-1971.
BibTeX TR2022-011 PDF Video Data Presentation
- @inproceedings{Medin2022feb,
- author = {Medin, Safa C. and Egger, Bernhard and Cherian, Anoop and Wang, Ye and Tenenbaum, Joshua B. and Liu, Xiaoming and Marks, Tim K.},
- title = {{MOST-GAN: 3D Morphable StyleGAN for Disentangled Face Image Manipulation}},
- booktitle = {AAAI Conference on Artificial Intelligence},
- year = 2022,
- pages = {1962--1971},
- month = feb,
- doi = {10.1609/aaai.v36i2.20091},
- url = {https://www.merl.com/publications/TR2022-011}
- }
Chatterjee, M., Ahuja, N., Cherian, A., "A Hierarchical Variational Neural Uncertainty Model for Stochastic Video Prediction", IEEE International Conference on Computer Vision (ICCV), October 2021, pp. 9751-9761.
BibTeX TR2021-096 PDF Video
- @inproceedings{Chatterjee2021oct2,
- author = {Chatterjee, Moitreya and Ahuja, Narendra and Cherian, Anoop},
- title = {{A Hierarchical Variational Neural Uncertainty Model for Stochastic Video Prediction}},
- booktitle = {IEEE International Conference on Computer Vision (ICCV)},
- year = 2021,
- pages = {9751--9761},
- month = oct,
- url = {https://www.merl.com/publications/TR2021-096}
- }
Chatterjee, M., Le Roux, J., Ahuja, N., Cherian, A., "Visual Scene Graphs for Audio Source Separation", IEEE International Conference on Computer Vision (ICCV), October 2021, pp. 1204-1213.
BibTeX TR2021-095 PDF Video Software
- @inproceedings{Chatterjee2021oct,
- author = {Chatterjee, Moitreya and {Le Roux}, Jonathan and Ahuja, Narendra and Cherian, Anoop},
- title = {{Visual Scene Graphs for Audio Source Separation}},
- booktitle = {IEEE International Conference on Computer Vision (ICCV)},
- year = 2021,
- pages = {1204--1213},
- month = oct,
- publisher = {CVF},
- url = {https://www.merl.com/publications/TR2021-095}
- }
Cherian, A., Pais, G., Jain, S., Marks, T.K., Sullivan, A., "InSeGAN: A Generative Approach to Segmenting Identical Instances in Depth Images", IEEE International Conference on Computer Vision (ICCV), October 2021, pp. 10023-10032.
BibTeX TR2021-097 PDF Video Data Software
- @inproceedings{Cherian2021oct,
- author = {Cherian, Anoop and Pais, Goncalo and Jain, Siddarth and Marks, Tim K. and Sullivan, Alan},
- title = {{InSeGAN: A Generative Approach to Segmenting Identical Instances in Depth Images}},
- booktitle = {IEEE International Conference on Computer Vision (ICCV)},
- year = 2021,
- pages = {10023--10032},
- month = oct,
- publisher = {CVF},
- url = {https://www.merl.com/publications/TR2021-097}
- }
Cherian, A., Wang, J., "Generalized One-Class Learning Using Pairs of Complementary Classifiers", IEEE Transactions on Pattern Analysis and Machine Intelligence, DOI: 10.1109/TPAMI.2021.3092999, June 2021.
BibTeX TR2021-076 PDF Software
- @article{Cherian2021jun,
- author = {Cherian, Anoop and Wang, Jue},
- title = {{Generalized One-Class Learning Using Pairs of Complementary Classifiers}},
- journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
- year = 2021,
- month = jun,
- doi = {10.1109/TPAMI.2021.3092999},
- url = {https://www.merl.com/publications/TR2021-076}
- }