Publications

Cherian, A., Peng, K.-C., Lohit, S., Smith, K., Tenenbaum, J.B., "Are Deep Neural Networks SMARTer than Second Graders?", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), March 2023, pp. 10834-10844.
BibTeX TR2023-014 PDF Video Data Software Presentation
- @inproceedings{Cherian2023mar,
- author = {Cherian, Anoop and Peng, Kuan-Chuan and Lohit, Suhas and Smith, Kevin and Tenenbaum, Joshua B.},
- title = {{Are Deep Neural Networks SMARTer than Second Graders?}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2023,
- pages = {10834--10844},
- month = mar,
- publisher = {CVF},
- url = {https://www.merl.com/publications/TR2023-014}
- }
Liu, T., Cherian, A., "Learning a Constrained Optimizer: A Primal Method", AAAI Bridge on Constraint Programming and Machine Learning, January 2023.
BibTeX TR2023-003 PDF
- @inproceedings{Liu2023jan,
- author = {Liu, Tao and Cherian, Anoop},
- title = {{Learning a Constrained Optimizer: A Primal Method}},
- booktitle = {AAAI Bridge on Constraint Programming and Machine Learning},
- year = 2023,
- month = jan,
- url = {https://www.merl.com/publications/TR2023-003}
- }
Chatterjee, M., Ahuja, N., Cherian, A., "Learning Audio-Visual Dynamics Using Scene Graphs for Audio Source Separation", Advances in Neural Information Processing Systems (NeurIPS), November 2022.
BibTeX TR2022-140 PDF Presentation
- @inproceedings{Chatterjee2022nov,
- author = {Chatterjee, Moitreya and Ahuja, Narendra and Cherian, Anoop},
- title = {{Learning Audio-Visual Dynamics Using Scene Graphs for Audio Source Separation}},
- booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
- year = 2022,
- month = nov,
- url = {https://www.merl.com/publications/TR2022-140}
- }
Paul, S., Roy Chowdhury, A.K., Cherian, A., "AVLEN: Audio-Visual-Language Embodied Navigation in 3D Environments", Advances in Neural Information Processing Systems (NeurIPS), October 2022, pp. 6236-6249.
BibTeX TR2022-131 PDF Video Data Software
- @inproceedings{Paul2022oct2,
- author = {Paul, Sudipta and Roy Chowdhury, Amit K and Cherian, Anoop},
- title = {{AVLEN: Audio-Visual-Language Embodied Navigation in 3D Environments}},
- booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
- year = 2022,
- pages = {6236--6249},
- month = oct,
- url = {https://www.merl.com/publications/TR2022-131}
- }
Chatterjee, M., Ahuja, N., Cherian, A., "Quantifying Predictive Uncertainty for Stochastic Video Synthesis from Audio", IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW), June 2022.
BibTeX TR2022-082 PDF
- @inproceedings{Chatterjee2022jun,
- author = {Chatterjee, Moitreya and Ahuja, Narendra and Cherian, Anoop},
- title = {{Quantifying Predictive Uncertainty for Stochastic Video Synthesis from Audio}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)},
- year = 2022,
- month = jun,
- url = {https://www.merl.com/publications/TR2022-082}
- }
Shah, A.P., Geng, S., Gao, P., Cherian, A., Hori, T., Marks, T.K., Le Roux, J., Hori, C., "Audio-Visual Scene-Aware Dialog and Reasoning Using Audio-Visual Transformers with Joint Student-Teacher Learning", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), April 2022, pp. 7732-7736.
BibTeX TR2022-019 PDF
- @inproceedings{Shah2022apr,
- author = {Shah, Ankit Parag and Geng, Shijie and Gao, Peng and Cherian, Anoop and Hori, Takaaki and Marks, Tim K. and {Le Roux}, Jonathan and Hori, Chiori},
- title = {{Audio-Visual Scene-Aware Dialog and Reasoning Using Audio-Visual Transformers with Joint Student-Teacher Learning}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2022,
- pages = {7732--7736},
- month = apr,
- publisher = {IEEE},
- issn = {1520-6149},
- isbn = {978-1-6654-0540-9},
- url = {https://www.merl.com/publications/TR2022-019}
- }
Hori, C., Shah, A.P., Geng, S., Gao, P., Cherian, A., Hori, T., Le Roux, J., Marks, T.K., "Overview of Audio Visual Scene-Aware Dialog with Reasoning Track for Natural Language Generation in DSTC10", The 10th Dialog System Technology Challenge Workshop at AAAI, February 2022.
BibTeX TR2022-016 PDF
- @inproceedings{Hori2022feb,
- author = {Hori, Chiori and Shah, Ankit Parag and Geng, Shijie and Gao, Peng and Cherian, Anoop and Hori, Takaaki and {Le Roux}, Jonathan and Marks, Tim K.},
- title = {{Overview of Audio Visual Scene-Aware Dialog with Reasoning Track for Natural Language Generation in DSTC10}},
- booktitle = {The 10th Dialog System Technology Challenge Workshop at AAAI},
- year = 2022,
- month = feb,
- url = {https://www.merl.com/publications/TR2022-016}
- }
Cherian, A., Hori, C., Marks, T.K., Le Roux, J., "(2.5+1)D Spatio-Temporal Scene Graphs for Video Question Answering", AAAI Conference on Artificial Intelligence, DOI: 10.1609/aaai.v36i1.19922, February 2022, pp. 444-453.
BibTeX TR2022-014 PDF Video Presentation
- @inproceedings{Cherian2022feb,
- author = {Cherian, Anoop and Hori, Chiori and Marks, Tim K. and {Le Roux}, Jonathan},
- title = {{(2.5+1)D Spatio-Temporal Scene Graphs for Video Question Answering}},
- booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence},
- year = 2022,
- pages = {444--453},
- month = feb,
- doi = {10.1609/aaai.v36i1.19922},
- url = {https://www.merl.com/publications/TR2022-014}
- }
Shah, A., Sra, S., Chellappa, R., Cherian, A., "Max-Margin Contrastive Learning", AAAI Conference on Artificial Intelligence, DOI: 10.1609/aaai.v36i8.20796, February 2022, pp. 8220-8230.
BibTeX TR2022-013 PDF
- @inproceedings{Shah2022feb,
- author = {Shah, Anshul and Sra, Suvrit and Chellappa, Rama and Cherian, Anoop},
- title = {{Max-Margin Contrastive Learning}},
- booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence},
- year = 2022,
- pages = {8220--8230},
- month = feb,
- doi = {10.1609/aaai.v36i8.20796},
- url = {https://www.merl.com/publications/TR2022-013}
- }
Medin, S.C., Egger, B., Cherian, A., Wang, Y., Tenenbaum, J.B., Liu, X., Marks, T.K., "MOST-GAN: 3D Morphable StyleGAN for Disentangled Face Image Manipulation", AAAI Conference on Artificial Intelligence, DOI: 10.1609/aaai.v36i2.20091, February 2022, pp. 1962-1971.
BibTeX TR2022-011 PDF Video Data Presentation
- @inproceedings{Medin2022feb,
- author = {Medin, Safa C. and Egger, Bernhard and Cherian, Anoop and Wang, Ye and Tenenbaum, Joshua B. and Liu, Xiaoming and Marks, Tim K.},
- title = {{MOST-GAN: 3D Morphable StyleGAN for Disentangled Face Image Manipulation}},
- booktitle = {AAAI Conference on Artificial Intelligence},
- year = 2022,
- pages = {1962--1971},
- month = feb,
- doi = {10.1609/aaai.v36i2.20091},
- url = {https://www.merl.com/publications/TR2022-011}
- }
Chatterjee, M., Ahuja, N., Cherian, A., "A Hierarchical Variational Neural Uncertainty Model for Stochastic Video Prediction", IEEE International Conference on Computer Vision (ICCV), October 2021, pp. 9751-9761.
BibTeX TR2021-096 PDF Video
- @inproceedings{Chatterjee2021oct2,
- author = {Chatterjee, Moitreya and Ahuja, Narendra and Cherian, Anoop},
- title = {{A Hierarchical Variational Neural Uncertainty Model for Stochastic Video Prediction}},
- booktitle = {IEEE International Conference on Computer Vision (ICCV)},
- year = 2021,
- pages = {9751--9761},
- month = oct,
- url = {https://www.merl.com/publications/TR2021-096}
- }
Chatterjee, M., Le Roux, J., Ahuja, N., Cherian, A., "Visual Scene Graphs for Audio Source Separation", IEEE International Conference on Computer Vision (ICCV), October 2021, pp. 1204-1213.
BibTeX TR2021-095 PDF Video Software
- @inproceedings{Chatterjee2021oct,
- author = {Chatterjee, Moitreya and {Le Roux}, Jonathan and Ahuja, Narendra and Cherian, Anoop},
- title = {{Visual Scene Graphs for Audio Source Separation}},
- booktitle = {IEEE International Conference on Computer Vision (ICCV)},
- year = 2021,
- pages = {1204--1213},
- month = oct,
- publisher = {CVF},
- url = {https://www.merl.com/publications/TR2021-095}
- }
Cherian, A., Pais, G., Jain, S., Marks, T.K., Sullivan, A., "InSeGAN: A Generative Approach to Segmenting Identical Instances in Depth Images", IEEE International Conference on Computer Vision (ICCV), October 2021, pp. 10023-10032.
BibTeX TR2021-097 PDF Video Data Software Presentation
- @inproceedings{Cherian2021oct,
- author = {Cherian, Anoop and Pais, Goncalo and Jain, Siddarth and Marks, Tim K. and Sullivan, Alan},
- title = {{InSeGAN: A Generative Approach to Segmenting Identical Instances in Depth Images}},
- booktitle = {IEEE International Conference on Computer Vision (ICCV)},
- year = 2021,
- pages = {10023--10032},
- month = oct,
- publisher = {CVF},
- url = {https://www.merl.com/publications/TR2021-097}
- }
Cherian, A., Wang, J., "Generalized One-Class Learning Using Pairs of Complementary Classifiers", IEEE Transactions on Pattern Analysis and Machine Intelligence, DOI: 10.1109/TPAMI.2021.3092999, June 2021.
BibTeX TR2021-076 PDF Software
- @article{Cherian2021jun,
- author = {Cherian, Anoop and Wang, Jue},
- title = {{Generalized One-Class Learning Using Pairs of Complementary Classifiers}},
- journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
- year = 2021,
- month = jun,
- doi = {10.1109/TPAMI.2021.3092999},
- url = {https://www.merl.com/publications/TR2021-076}
- }
Kim, S., Galley, M., Gunasekara, C., Lee, S., Atkinson, A., Peng, B., Schulz, H., Gao, J., Li, J., Adada, M., Huang, M., Lastras, L., Kummerfeld, J.K., Lasecki, W.S., Hori, C., Cherian, A., Marks, T.K., Rastogi, A., Zang, X., Sunkara, S., Gupta, R., "Overview of the Eighth Dialog System Technology Challenge: DSTC8", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/TASLP.2021.3078368, May 2021.
BibTeX TR2021-064 PDF
- @article{Kim2021may,
- author = {Kim, Seokhwan and Galley, Michel and Gunasekara, Chulaka and Lee, Sungjin and Atkinson, Adam and Peng, Baolin and Schulz, Hannes and Gao, Jianfeng and Li, Jinchao and Adada, Mahmoud and Huang, Minlie and Lastras, Luis and Kummerfeld, Jonathan K. and Lasecki, Walter S. and Hori, Chiori and Cherian, Anoop and Marks, Tim K. and Rastogi, Abhinav and Zang, Xiaoxue and Sunkara, Srinivas and Gupta, Raghav},
- title = {{Overview of the Eighth Dialog System Technology Challenge: DSTC8}},
- journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
- year = 2021,
- month = may,
- doi = {10.1109/TASLP.2021.3078368},
- issn = {2329-9290},
- url = {https://www.merl.com/publications/TR2021-064}
- }
Hori, C., Tsuchiya, M., Chen, S., Cherian, A., Hori, T., Harsham, B.A., Marks, T.K., Le Roux, J., Sullivan, A., Vetro, A., "マルチモーダルセンシング情報に基づくScene-aware Interaction 技術", Society of Automotive Engineers of Japan, Vol. 75, No. 5, pp. 66-71, May 2021.
BibTeX TR2021-042 PDF Video
- @article{Hori2021may,
- author = {Hori, Chiori and Tsuchiya, Masato and Chen, Siheng and Cherian, Anoop and Hori, Takaaki and Harsham, Bret A. and Marks, Tim K. and {Le Roux}, Jonathan and Sullivan, Alan and Vetro, Anthony},
- title = {{マルチモーダルセンシング情報に基づくScene-aware Interaction 技術}},
- journal = {Society of Automotive Engineers of Japan},
- year = 2021,
- volume = 75,
- number = 5,
- pages = {66--71},
- month = may,
- url = {https://www.merl.com/publications/TR2021-042}
- }
Geng, S., Gao, P., Chatterjee, M., Hori, C., Le Roux, J., Zhang, Y., Li, H., Cherian, A., "Dynamic Graph Representation Learning for Video Dialog via Multi-Modal Shuffled Transformers", AAAI Conference on Artificial Intelligence, February 2021, pp. 1415-1423.
BibTeX TR2021-010 PDF
- @inproceedings{Geng2021feb,
- author = {Geng, Shijie and Gao, Peng and Chatterjee, Moitreya and Hori, Chiori and {Le Roux}, Jonathan and Zhang, Yongfeng and Li, Hongsheng and Cherian, Anoop},
- title = {{Dynamic Graph Representation Learning for Video Dialog via Multi-Modal Shuffled Transformers}},
- booktitle = {AAAI Conference on Artificial Intelligence},
- year = 2021,
- pages = {1415--1423},
- month = feb,
- publisher = {AAAI Press, Palo Alto, California USA},
- isbn = {978-1-57735-866-4},
- url = {https://www.merl.com/publications/TR2021-010}
- }
Benosman, M., Cherian, A., Romero, O., "Optimizing Deep Neural Networks via Discretization of Finite-Time Convergent Flows", arXiv, October 2020.
BibTeX arXiv
- @article{Benosman2020oct,
- author = {Benosman, Mouhacine and Cherian, Anoop and Romero, Orlando},
- title = {{Optimizing Deep Neural Networks via Discretization of Finite-Time Convergent Flows}},
- journal = {arXiv},
- year = 2020,
- month = oct,
- url = {https://arxiv.org/abs/2010.02990}
- }
Cherian, A., Chatterjee, M., Ahuja, N., "Sound2Sight: Generating Visual Dynamics from Sound and Context", European Conference on Computer Vision (ECCV), Vedaldi, A. and Bischof, H. and Brox, Th. and Frahm, J.-M., Eds., August 2020.
BibTeX TR2020-121 PDF Software
- @inproceedings{Cherian2020aug,
- author = {Cherian, Anoop and Chatterjee, Moitreya and Ahuja, Narendra},
- title = {{Sound2Sight: Generating Visual Dynamics from Sound and Context}},
- booktitle = {European Conference on Computer Vision (ECCV)},
- year = 2020,
- editor = {Vedaldi, A. and Bischof, H. and Brox, Th. and Frahm, J.-M.},
- month = aug,
- publisher = {Springer},
- url = {https://www.merl.com/publications/TR2020-121}
- }
Geng, S., Gao, P., Hori, C., Le Roux, J., Cherian, A., "Spatio-Temporal Scene Graphs for Video Dialog", arXiv, July 2020.
BibTeX arXiv
- @article{Geng2020jul,
- author = {Geng, Shijie and Gao, Peng and Hori, Chiori and {Le Roux}, Jonathan and Cherian, Anoop},
- title = {{Spatio-Temporal Scene Graphs for Video Dialog}},
- journal = {arXiv},
- year = 2020,
- month = jul,
- url = {https://arxiv.org/abs/2007.03848}
- }
Cherian, A., Aeron, S., "Representation Learning via Adversarially-Contrastive Optimal Transport", International Conference on Machine Learning (ICML), Daumé, H. and Singh, A., Eds., July 2020, pp. 10675-10685.
BibTeX TR2020-093 PDF Software
- @inproceedings{Cherian2020jul,
- author = {Cherian, Anoop and Aeron, Shuchin},
- title = {{Representation Learning via Adversarially-Contrastive Optimal Transport}},
- booktitle = {International Conference on Machine Learning (ICML)},
- year = 2020,
- editor = {Daumé, H. and Singh, A.},
- pages = {10675--10685},
- month = jul,
- url = {https://www.merl.com/publications/TR2020-093}
- }
Kumar, A., Marks, T.K., Mou, W., Wang, Y., Cherian, A., Jones, M.J., Liu, X., Koike-Akino, T., Feng, C., "LUVLi Face Alignment: Estimating Landmarks’ Location, Uncertainty, and Visibility Likelihood", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), DOI: 10.1109/CVPR42600.2020.00826, June 2020.
BibTeX TR2020-067 PDF Video Data Software
- @inproceedings{Kumar2020jun,
- author = {Kumar, Abhinav and Marks, Tim K. and Mou, Wenxuan and Wang, Ye and Cherian, Anoop and Jones, Michael J. and Liu, Xiaoming and Koike-Akino, Toshiaki and Feng, Chen},
- title = {{LUVLi Face Alignment: Estimating Landmarks’ Location, Uncertainty, and Visibility Likelihood}},
- booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
- year = 2020,
- month = jun,
- publisher = {IEEE},
- doi = {10.1109/CVPR42600.2020.00826},
- issn = {2575-7075},
- isbn = {978-1-7281-7168-5},
- url = {https://www.merl.com/publications/TR2020-067}
- }
Cherian, A., Wang, J., Hori, C., Marks, T.K., "Spatio-Temporal Ranked-Attention Networks for Video Captioning", IEEE Winter Conference on Applications of Computer Vision (WACV), DOI: 10.1109/WACV45572.2020.9093291, February 2020, pp. 1606-1615.
BibTeX TR2020-016 PDF
- @inproceedings{Cherian2020feb,
- author = {Cherian, Anoop and Wang, Jue and Hori, Chiori and Marks, Tim K.},
- title = {{Spatio-Temporal Ranked-Attention Networks for Video Captioning}},
- booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
- year = 2020,
- pages = {1606--1615},
- month = feb,
- publisher = {IEEE},
- doi = {10.1109/WACV45572.2020.9093291},
- url = {https://www.merl.com/publications/TR2020-016}
- }
Huang, R., Xu, W., Lee, T.-Y., Cherian, A., Wang, Y., Marks, T.K., "FX-GAN: Self-Supervised GAN Learning via Feature Exchange", IEEE Winter Conference on Applications of Computer Vision (WACV), DOI: 10.1109/WACV45572.2020.9093525, February 2020, pp. 3183-3191.
BibTeX TR2020-014 PDF
- @inproceedings{Huang2020feb,
- author = {Huang, Rui and Xu, Wenju and Lee, Teng-Yok and Cherian, Anoop and Wang, Ye and Marks, Tim K.},
- title = {{FX-GAN: Self-Supervised GAN Learning via Feature Exchange}},
- booktitle = {IEEE Winter Conference on Applications of Computer Vision (WACV)},
- year = 2020,
- pages = {3183--3191},
- month = feb,
- publisher = {IEEE},
- doi = {10.1109/WACV45572.2020.9093525},
- url = {https://www.merl.com/publications/TR2020-014}
- }
Wu, Y., Marks, T.K., Cherian, A., Chen, S., Feng, C., Wang, G., Sullivan, A., "Unsupervised Joint 3D Object Model Learning and 6D Pose Estimation for Depth-Based Instance Segmentation", IEEE ICCV Workshop on Recovering 6D Object Pose, DOI: 10.1109/ICCVW.2019.00339, October 2019, pp. 2777-2786.
BibTeX TR2019-118 PDF
- @inproceedings{Wu2019oct,
- author = {Wu, Yuanwei and Marks, Tim K. and Cherian, Anoop and Chen, Siheng and Feng, Chen and Wang, Guanghui and Sullivan, Alan},
- title = {{Unsupervised Joint 3D Object Model Learning and 6D Pose Estimation for Depth-Based Instance Segmentation}},
- booktitle = {IEEE ICCV Workshop on Recovering 6D Object Pose},
- year = 2019,
- pages = {2777--2786},
- month = oct,
- doi = {10.1109/ICCVW.2019.00339},
- url = {https://www.merl.com/publications/TR2019-118}
- }