Publications

642 / 3,769 publications found.


  •  Ebbers, J., Germain, F.G., Wichern, G., Le Roux, J., "Sound Event Bounding Boxes", Interspeech, DOI: 10.21437/​Interspeech.2024-2075, September 2024, pp. 562-566.
    BibTeX TR2024-118 PDF Software
    • @inproceedings{Ebbers2024sep,
    • author = {Ebbers, Janek and Germain, François G and Wichern, Gordon and Le Roux, Jonathan}},
    • title = {Sound Event Bounding Boxes},
    • booktitle = {Interspeech},
    • year = 2024,
    • pages = {562--566},
    • month = sep,
    • doi = {10.21437/Interspeech.2024-2075},
    • issn = {2958-1796},
    • url = {https://www.merl.com/publications/TR2024-118}
    • }
  •  Khurana, S., Hori, C., Laurent, A., Wichern, G., Le Roux, J., "ZeroST: Zero-Shot Speech Translation", Interspeech, DOI: 10.21437/​Interspeech.2024-1088, September 2024, pp. 392-396.
    BibTeX TR2024-122 PDF
    • @inproceedings{Khurana2024sep,
    • author = {Khurana, Sameer and Hori, Chiori and Laurent, Antoine and Wichern, Gordon and Le Roux, Jonathan}},
    • title = {ZeroST: Zero-Shot Speech Translation},
    • booktitle = {Interspeech},
    • year = 2024,
    • pages = {392--396},
    • month = sep,
    • doi = {10.21437/Interspeech.2024-1088},
    • issn = {2958-1796},
    • url = {https://www.merl.com/publications/TR2024-122}
    • }
  •  Pan, Z., Wichern, G., Germain, F.G., Saijo, K., Le Roux, J., "PARIS: Pseudo-AutoRegressIve Siamese Training for Online Speech Separation", Interspeech, DOI: 10.21437/​Interspeech.2024-1066, September 2024, pp. 582-586.
    BibTeX TR2024-124 PDF
    • @inproceedings{Pan2024sep,
    • author = {Pan, Zexu and Wichern, Gordon and Germain, François G and Saijo, Kohei and Le Roux, Jonathan}},
    • title = {PARIS: Pseudo-AutoRegressIve Siamese Training for Online Speech Separation},
    • booktitle = {Interspeech},
    • year = 2024,
    • pages = {582--586},
    • month = sep,
    • doi = {10.21437/Interspeech.2024-1066},
    • issn = {2958-1796},
    • url = {https://www.merl.com/publications/TR2024-124}
    • }
  •  Saijo, K., Wichern, G., Germain, F.G., Pan, Z., Le Roux, J., "Enhanced Reverberation as Supervision for Unsupervised Speech Separation", Interspeech, DOI: 10.21437/​Interspeech.2024-1241, September 2024, pp. 607-611.
    BibTeX TR2024-116 PDF Software
    • @inproceedings{Saijo2024sep,
    • author = {Saijo, Kohei and Wichern, Gordon and Germain, François G and Pan, Zexu and Le Roux, Jonathan}},
    • title = {Enhanced Reverberation as Supervision for Unsupervised Speech Separation},
    • booktitle = {Interspeech},
    • year = 2024,
    • pages = {607--611},
    • month = sep,
    • doi = {10.21437/Interspeech.2024-1241},
    • issn = {2958-1796},
    • url = {https://www.merl.com/publications/TR2024-116}
    • }
  •  Koike-Akino, T., Meng Chang, , Cevher, V., De Micheli, G., "Hardware-Efficient Quantization for Green Custom Foundation Models", International Conference on Machine Learning (ICML), July 2024.
    BibTeX TR2024-105 PDF Presentation
    • @inproceedings{Koike-Akino2024jul2,
    • author = {{Koike-Akino, Toshiaki and Meng Chang and Cevher, Volkan and De Micheli, Giovanni}},
    • title = {Hardware-Efficient Quantization for Green Custom Foundation Models},
    • booktitle = {International Conference on Machine Learning (ICML)},
    • year = 2024,
    • month = jul,
    • url = {https://www.merl.com/publications/TR2024-105}
    • }
  •  Liu, J., Lowy, A., Koike-Akino, T., Parsons, K., Wang, Y., "Efficient Differentially Private Fine-Tuning of Diffusion Models", International Conference on Machine Learning (ICML) workshop (Next Generation of AI Safety), July 2024.
    BibTeX TR2024-104 PDF Presentation
    • @inproceedings{Liu2024jul,
    • author = {{Liu, Jing and Lowy, Andrew and Koike-Akino, Toshiaki and Parsons, Kieran and Wang, Ye}},
    • title = {Efficient Differentially Private Fine-Tuning of Diffusion Models},
    • booktitle = {International Conference on Machine Learning (ICML) workshop (Next Generation of AI Safety)},
    • year = 2024,
    • month = jul,
    • url = {https://www.merl.com/publications/TR2024-104}
    • }
  •  Bimbraw, K., Liu, J., Wang, Y., Koike-Akino, T., "Random Channel Ablation for Robust Hand Gesture Classification with Multimodal Biosignals", International Conference of the IEEE Engineering in Medicine and Biology Society, DOI: 10.1109/​EMBC53108.2024.10782851, July 2024.
    BibTeX TR2024-103 PDF
    • @inproceedings{Bimbraw2024jul3,
    • author = {Bimbraw, Keshav and Liu, Jing and Wang, Ye and Koike-Akino, Toshiaki}},
    • title = {Random Channel Ablation for Robust Hand Gesture Classification with Multimodal Biosignals},
    • booktitle = {International Conference of the IEEE Engineering in Medicine and Biology Society},
    • year = 2024,
    • month = jul,
    • publisher = {IEEE},
    • doi = {10.1109/EMBC53108.2024.10782851},
    • issn = {2694-0604},
    • isbn = {979-8-3503-7149-9},
    • url = {https://www.merl.com/publications/TR2024-103}
    • }
  •  Koike-Akino, T., Cevher, V., "Quantum-PEFT: Ultra Parameter-Efficient Fine-Tuning", International Conference on Machine Learning (ICML), July 2024.
    BibTeX TR2024-101 PDF Presentation
    • @inproceedings{Koike-Akino2024jul,
    • author = {{Koike-Akino, Toshiaki and Cevher, Volkan}},
    • title = {Quantum-PEFT: Ultra Parameter-Efficient Fine-Tuning},
    • booktitle = {International Conference on Machine Learning (ICML)},
    • year = 2024,
    • month = jul,
    • url = {https://www.merl.com/publications/TR2024-101}
    • }
  •  Zhang, X., Mao, W., Mowlavi, S., Benosman, M., Basar, T., "Controlgym: Large-Scale Control Environments for Benchmarking Reinforcement Learning Algorithms", Learning for Dynamics & Control Conference (L4DC), July 2024, pp. 181-196.
    BibTeX TR2024-098 PDF
    • @inproceedings{Zhang2024jul2,
    • author = {Zhang, Xiangyuan and Mao, Weichao and Mowlavi, Saviz and Benosman, Mouhacine and Basar, Tamer}},
    • title = {Controlgym: Large-Scale Control Environments for Benchmarking Reinforcement Learning Algorithms},
    • booktitle = {Learning for Dynamics & Control Conference (L4DC)},
    • year = 2024,
    • pages = {181--196},
    • month = jul,
    • publisher = {PMLR},
    • url = {https://www.merl.com/publications/TR2024-098}
    • }
  •  Chen, X., Liu, J., Wang, Y., Wang, P., Brand, M., Wang, G., Koike-Akino, T., "SuperLoRA: Parameter-Efficient Unified Adaptation for Large Vision Models", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), DOI: 10.1109/​CVPRW63382.2024.00804, June 2024, pp. 8050-8055.
    BibTeX TR2024-062 PDF Presentation
    • @inproceedings{Chen2024jun,
    • author = {{Chen, Xiangyu and Liu, Jing and Wang, Ye and Wang, Pu and Brand, Matthew and Wang, Guanghui and Koike-Akino, Toshiaki}},
    • title = {SuperLoRA: Parameter-Efficient Unified Adaptation for Large Vision Models},
    • booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
    • year = 2024,
    • pages = {8050--8055},
    • month = jun,
    • publisher = {IEEE},
    • doi = {10.1109/CVPRW63382.2024.00804},
    • url = {https://www.merl.com/publications/TR2024-062}
    • }
  •  Ho, C.-H., Peng, K.-C., Vasconcelos, N., "Long-Tailed Anomaly Detection with Learnable Class Names", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Farhadi, A. and Crandall, D. and Sato, I. and Wu, J. and Pless, R. and Akata, Z., Eds., DOI: 10.1109/​CVPR52733.2024.01182, June 2024, pp. 12435-12446.
    BibTeX TR2024-040 PDF Video Data Presentation
    • @inproceedings{Ho2024jun,
    • author = {Ho, Chih-Hui and Peng, Kuan-Chuan and Vasconcelos, Nuno},
    • title = {Long-Tailed Anomaly Detection with Learnable Class Names},
    • booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
    • year = 2024,
    • editor = {Farhadi, A. and Crandall, D. and Sato, I. and Wu, J. and Pless, R. and Akata, Z.},
    • pages = {12435--12446},
    • month = jun,
    • publisher = {IEEE},
    • doi = {10.1109/CVPR52733.2024.01182},
    • issn = {2575-7075},
    • isbn = {979-8-3503-5300-6},
    • url = {https://www.merl.com/publications/TR2024-040}
    • }
  •  Ni, H., Egger, B., Lohit, S., Cherian, A., Wang, Y., Koike-Akino, T., Huang, S.X., Marks, T.K., "TI2V-Zero: Zero-Shot Image Conditioning for Text-to-Video Diffusion Models", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2024, pp. 9015-9025.
    BibTeX TR2024-059 PDF Video Software Presentation
    • @inproceedings{Ni2024jun,
    • author = {Ni, Haomiao and Egger, Bernhard and Lohit, Suhas and Cherian, Anoop and Wang, Ye and Koike-Akino, Toshiaki and Huang, Sharon X. and Marks, Tim K.},
    • title = {TI2V-Zero: Zero-Shot Image Conditioning for Text-to-Video Diffusion Models},
    • booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
    • year = 2024,
    • pages = {9015--9025},
    • month = jun,
    • url = {https://www.merl.com/publications/TR2024-059}
    • }
  •  Giammarino, V., Queeney, J., Paschalidis, I.C., "Adversarial Imitation Learning from Visual Observations using Latent Information", Transactions on Machine Learning Research (TMLR), June 2024.
    BibTeX TR2024-068 PDF
    • @article{Giammarino2024jun,
    • author = {Giammarino, Vittorio and Queeney, James and Paschalidis, Ioannis Ch.},
    • title = {Adversarial Imitation Learning from Visual Observations using Latent Information},
    • journal = {Transactions on Machine Learning Research (TMLR)},
    • year = 2024,
    • month = jun,
    • issn = {2835-8856},
    • url = {https://www.merl.com/publications/TR2024-068}
    • }
  •  He, Y., Cherian, A., Wichern, G., Markham, A., "Deep Neural Room Acoustics Primitive", International Conference on Machine Learning (ICML), June 2024, pp. 17842-17857.
    BibTeX TR2024-072 PDF
    • @inproceedings{He2024jun,
    • author = {He, Yuhang and Cherian, Anoop and Wichern, Gordon and Markham, Andrew}},
    • title = {Deep Neural Room Acoustics Primitive},
    • booktitle = {International Conference on Machine Learning (ICML)},
    • year = 2024,
    • pages = {17842--17857},
    • month = jun,
    • url = {https://www.merl.com/publications/TR2024-072}
    • }
  •  Kambara, M., Hori, C., Sugiura, K., Ota, K., Jha, D.K., Khurana, S., Jain, S., Corcodel, R., Romeres, D., Le Roux, J., "Human Action Understanding-based Robot Planning using Multimodal LLM", IEEE International Conference on Robotics and Automation (ICRA), June 2024.
    BibTeX TR2024-066 PDF
    • @inproceedings{Kambara2024jun,
    • author = {Kambara, Motonari and Hori, Chiori and Sugiura, Komei and Ota, Kei and Jha, Devesh K. and Khurana, Sameer and Jain, Siddarth and Corcodel, Radu and Romeres, Diego and Le Roux, Jonathan}},
    • title = {Human Action Understanding-based Robot Planning using Multimodal LLM},
    • booktitle = {IEEE International Conference on Robotics and Automation (ICRA) Workshop},
    • year = 2024,
    • month = jun,
    • url = {https://www.merl.com/publications/TR2024-066}
    • }
  •  Liu, X., Tai, Y.-W., Tang, C.-K., Miraldo, P., Lohit, S., Chatterjee, M., "Gear-NeRF: Free-Viewpoint Rendering and Tracking with Motion-aware Spatio-Temporal Sampling", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), May 2024, pp. 19667-19679.
    BibTeX TR2024-042 PDF Videos Software
    • @inproceedings{Liu2024may,
    • author = {Liu, Xinhang and Tai, Yu-wing and Tang, Chi-Keung and Miraldo, Pedro and Lohit, Suhas and Chatterjee, Moitreya},
    • title = {Gear-NeRF: Free-Viewpoint Rendering and Tracking with Motion-aware Spatio-Temporal Sampling},
    • booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
    • year = 2024,
    • pages = {19667--19679},
    • month = may,
    • publisher = {IEEE},
    • url = {https://www.merl.com/publications/TR2024-042}
    • }
  •  Singh, A., Jones, M.J., Learned-Miller, E., "Tracklet-based Explainable Video Anomaly Localization", IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshops, May 2024, pp. 3992-4001.
    BibTeX TR2024-057 PDF
    • @inproceedings{Singh2024may,
    • author = {Singh, Ashish and Jones, Michael J. and Learned-Miller, Erik}},
    • title = {Tracklet-based Explainable Video Anomaly Localization},
    • booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR) Workshops},
    • year = 2024,
    • pages = {3992--4001},
    • month = may,
    • url = {https://www.merl.com/publications/TR2024-057}
    • }
  •  Schperberg, A., Tanaka, Y., Mowlavi, S., Xu, F., Balaji, B., Hong, D., "OptiState: State Estimation of Legged Robots using Gated Networks with Transformer-based Vision and Kalman Filtering", IEEE International Conference on Robotics and Automation (ICRA), May 2024.
    BibTeX TR2024-054 PDF
    • @inproceedings{Schperberg2024may,
    • author = {{Schperberg, Alexander and Tanaka, Yusuke and Mowlavi, Saviz and Xu, Feng and Balaji, Bharathan and Hong, Dennis}},
    • title = {OptiState: State Estimation of Legged Robots using Gated Networks with Transformer-based Vision and Kalman Filtering},
    • booktitle = {IEEE International Conference on Robotics and Automation (ICRA)},
    • year = 2024,
    • month = may,
    • publisher = {IEEE},
    • url = {https://www.merl.com/publications/TR2024-054}
    • }
  •  Uhlich, S., Fabbro, G., Hirano, M., Takahashi, S., Wichern, G., Le Roux, J., Chakraborty, D., Mohanty, S., Li, K., Luo, Y., Yu, J., Gu, R., Solovyev, R., Stempkovskiy, A., Habruseva, T., Sukhovei, M., Mitsufuji, Y., "The Sound Demixing Challenge 2023 – Cinematic Demixing Track", Transactions of the International Society for Music Information Retrieval, DOI: 10.5334/​tismir.172, Vol. 7, No. 1, pp. 44-62, May 2024.
    BibTeX TR2024-047 PDF
    • @article{Uhlich2024may,
    • author = {Uhlich, Stefan and Fabbro, Giorgio and Hirano, Masato and Takahashi, Shusuke and Wichern, Gordon and Le Roux, Jonathan and Chakraborty, Dipam and Mohanty, Sharada and Li, Kai and Luo, Yi and Yu, Jianwei and Gu, Rongzhi and Solovyev, Roman and Stempkovskiy, Alexander and Habruseva, Tatiana and Sukhovei, Mikhail and Mitsufuji, Yuki},
    • title = {The Sound Demixing Challenge 2023 – Cinematic Demixing Track},
    • journal = {Transactions of the International Society for Music Information Retrieval},
    • year = 2024,
    • volume = 7,
    • number = 1,
    • pages = {44--62},
    • month = may,
    • doi = {10.5334/tismir.172},
    • url = {https://www.merl.com/publications/TR2024-047}
    • }
  •  Vaca-Rubio, C., Wang, P., Koike-Akino, T., Wang, Y., Boufounos, P.T., Popovski, P., "Object Trajectory Estimation with Continuous-Time Neural Dynamic Learning of Millimeter-Wave Wi-Fi", IEEE Journal of Selected Topics in Signal Processing, DOI: 10.1109/​JSTSP.2024.3388930, April 2024.
    BibTeX TR2024-044 PDF
    • @article{Vaca-Rubio2024apr,
    • author = {Vaca-Rubio, Cristian and Wang, Pu and Koike-Akino, Toshiaki and Wang, Ye and Boufounos, Petros T. and Popovski, Petar},
    • title = {Object Trajectory Estimation with Continuous-Time Neural Dynamic Learning of Millimeter-Wave Wi-Fi},
    • journal = {IEEE Journal of Selected Topics in Signal Processing},
    • year = 2024,
    • month = apr,
    • doi = {10.1109/JSTSP.2024.3388930},
    • issn = {1941-0484},
    • url = {https://www.merl.com/publications/TR2024-044}
    • }
  •  Dey, R., Egger, B., Boddeti, V., Wang, Y., Marks, T.K., "CoLa-SDF: Controllable Latent StyleSDF for Disentangled 3D Face Generation", IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW), April 2024.
    BibTeX TR2024-045 PDF
    • @inproceedings{Dey2024apr,
    • author = {Dey, Rahul and Egger, Bernhard and Boddeti, Vishnu and Wang, Ye and Marks, Tim K.},
    • title = {CoLa-SDF: Controllable Latent StyleSDF for Disentangled 3D Face Generation},
    • booktitle = {IEEE Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)},
    • year = 2024,
    • month = apr,
    • url = {https://www.merl.com/publications/TR2024-045}
    • }
  •  Pan, Z., Wichern, G., Germain, F.G., Subramanian, A., Le Roux, J., "Late Audio-Visual Fusion for In-The-Wild Speaker Diarization", Hands-free Speech Communication and Microphone Arrays (HSCMA), DOI: 10.1109/​ICASSPW62465.2024.10626914, April 2024, pp. 174-178.
    BibTeX TR2024-029 PDF
    • @inproceedings{Pan2024apr,
    • author = {Pan, Zexu and Wichern, Gordon and Germain, François G and Subramanian, Aswin and Le Roux, Jonathan},
    • title = {Late Audio-Visual Fusion for In-The-Wild Speaker Diarization},
    • booktitle = {Hands-free Speech Communication and Microphone Arrays (HSCMA)},
    • year = 2024,
    • pages = {174--178},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSPW62465.2024.10626914},
    • isbn = {979-8-3503-7451-3},
    • url = {https://www.merl.com/publications/TR2024-029}
    • }
  •  Queeney, J., Ozcan, E.C., Paschalidis, I.C., Cassandras, C.G., "Optimal Transport Perturbations for Safe Reinforcement Learning with Robustness Guarantees", Transactions on Machine Learning Research (TMLR), April 2024.
    BibTeX TR2024-037 PDF
    • @article{Queeney2024apr,
    • author = {Queeney, James and Ozcan, Erhan Can and Paschalidis, Ioannis Ch. and Cassandras, Christos G.},
    • title = {Optimal Transport Perturbations for Safe Reinforcement Learning with Robustness Guarantees},
    • journal = {Transactions on Machine Learning Research (TMLR)},
    • year = 2024,
    • month = apr,
    • issn = {2835-8856},
    • url = {https://www.merl.com/publications/TR2024-037}
    • }
  •  Yang, Z., Liu, J., Chen, P., Cherian, A., Marks, T.K., Le Roux, J., Gan, C., "RILA: Reflective and Imaginative Language Agent for Zero-Shot Semantic Audio-Visual Navigation", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), April 2024, pp. 16251-16261.
    BibTeX TR2024-043 PDF
    • @inproceedings{Yang2024apr,
    • author = {Yang, Zeyuan and Liu, Jiageng and Chen, Peihao and Cherian, Anoop and Marks, Tim K. and Le Roux, Jonathan and Gan, Chuang},
    • title = {RILA: Reflective and Imaginative Language Agent for Zero-Shot Semantic Audio-Visual Navigation},
    • booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
    • year = 2024,
    • pages = {16251--16261},
    • month = apr,
    • publisher = {CVF},
    • url = {https://www.merl.com/publications/TR2024-043}
    • }
  •  Koo, J., Wichern, G., Germain, F.G., Khurana, S., Le Roux, J., "Understanding and Controlling Generative Music Transformers by Probing Individual Attention Heads", IEEE ICASSP Satellite Workshop on Explainable Machine Learning for Speech and Audio (XAI-SA), April 2024.
    BibTeX TR2024-032 PDF
    • @inproceedings{Koo2024apr,
    • author = {Koo, Junghyun and Wichern, Gordon and Germain, François G and Khurana, Sameer and Le Roux, Jonathan},
    • title = {Understanding and Controlling Generative Music Transformers by Probing Individual Attention Heads},
    • booktitle = {IEEE ICASSP Satellite Workshop on Explainable Machine Learning for Speech and Audio (XAI-SA)},
    • year = 2024,
    • month = apr,
    • url = {https://www.merl.com/publications/TR2024-032}
    • }