Publications

359 / 3,666 publications found.


  •  Aralikatti, R., Boeddeker, C., Wichern, G., Subramanian, A.S., Le Roux, J., "Reverberation as Supervision for Speech Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP49357.2023.10095022, May 2023, pp. 1-5.
    BibTeX TR2023-016 PDF
    • @inproceedings{Aralikatti2023may,
    • author = {Aralikatti, Rohith and Boeddeker, Christoph and Wichern, Gordon and Subramanian, Aswin Shanmugam and Le Roux, Jonathan},
    • title = {Reverberation as Supervision for Speech Separation},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2023,
    • pages = {1--5},
    • month = may,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP49357.2023.10095022},
    • url = {https://www.merl.com/publications/TR2023-016}
    • }
  •  Berk, A., Ma, Y., Boufounos, P.T., Wang, P., Mansour, H., "Deep Proximal Gradient Method for Learned Convex Regularizers", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP49357.2023.10094632, May 2023, pp. 1-5.
    BibTeX TR2023-032 PDF Video
    • @inproceedings{Berk2023may,
    • author = {Berk, Aaron and Ma, Yanting and Boufounos, Petros T. and Wang, Pu and Mansour, Hassan},
    • title = {Deep Proximal Gradient Method for Learned Convex Regularizers},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2023,
    • pages = {1--5},
    • month = may,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP49357.2023.10094632},
    • isbn = {978-1-7281-6327-7},
    • url = {https://www.merl.com/publications/TR2023-032}
    • }
  •  Bralios, D., Tzinis, E., Wichern, G., Smaragdis, P., Le Roux, J., "Latent Iterative Refinement for Modular Source Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP49357.2023.10096897, May 2023, pp. 1-5.
    BibTeX TR2023-019 PDF
    • @inproceedings{Bralios2023may,
    • author = {Bralios, Dimitrios and Tzinis, Efthymios and Wichern, Gordon and Smaragdis, Paris and Le Roux, Jonathan},
    • title = {Latent Iterative Refinement for Modular Source Separation},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2023,
    • pages = {1--5},
    • month = may,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP49357.2023.10096897},
    • url = {https://www.merl.com/publications/TR2023-019}
    • }
  •  Fujihashi, T., Koike-Akino, T., Watanabe, T., "Soft 2D-to-3D Delivery Using Deep Graph Neural Networks for Holographic-Type Communication", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP49357.2023.10095882, May 2023.
    BibTeX TR2023-031 PDF
    • @inproceedings{Fujihashi2023may,
    • author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi},
    • title = {Soft 2D-to-3D Delivery Using Deep Graph Neural Networks for Holographic-Type Communication},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2023,
    • month = may,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP49357.2023.10095882},
    • isbn = {978-1-7281-6327-7},
    • url = {https://www.merl.com/publications/TR2023-031}
    • }
  •  Jeon, E.S., Lohit, S., Anirudh, R., Turaga, P., "Robust Time Series Recovery and Classification Using Test-time Noise Simulator Networks", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP49357.2023.10096888, May 2023.
    BibTeX TR2023-021 PDF Presentation
    • @inproceedings{Jeon2023may,
    • author = {Jeon, Eun Som and Lohit, Suhas and Anirudh, Rushil and Turaga, Pavan},
    • title = {Robust Time Series Recovery and Classification Using Test-time Noise Simulator Networks},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2023,
    • month = may,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP49357.2023.10096888},
    • url = {https://www.merl.com/publications/TR2023-021}
    • }
  •  Jin, S., Wang, P., Boufounos, P.T., Takahashi, R., Roy, S., "Spatial-Domain Object Detection Under MIMO-FMCW Automotive Radar Interference", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP49357.2023.10095409, May 2023, pp. 1-5.
    BibTeX TR2023-027 PDF
    • @inproceedings{Jin2023may,
    • author = {Jin, Sian and Wang, Pu and Boufounos, Petros T. and Takahashi, Ryuhei and Roy, Sumit},
    • title = {Spatial-Domain Object Detection Under MIMO-FMCW Automotive Radar Interference},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2023,
    • pages = {1--5},
    • month = may,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP49357.2023.10095409},
    • isbn = {978-1-7281-6327-7},
    • url = {https://www.merl.com/publications/TR2023-027}
    • }
  •  Kelkar, V.A., Liu, D., Inoue, H., Kanemaru, M., "Sparsity-driven Joint Blind Deconvolution-demodulation with Application to Motor Fault Detection", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP49357.2023.10094590, May 2023.
    BibTeX TR2023-026 PDF
    • @inproceedings{Kelkar2023may,
    • author = {Kelkar, Varun A. and Liu, Dehong and Inoue, Hiroshi and Kanemaru, Makoto},
    • title = {Sparsity-driven Joint Blind Deconvolution-demodulation with Application to Motor Fault Detection},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2023,
    • month = may,
    • doi = {10.1109/ICASSP49357.2023.10094590},
    • isbn = {978-1-7281-6328-4},
    • url = {https://www.merl.com/publications/TR2023-026}
    • }
  •  Petermann, D., Wichern, G., Subramanian, A.S., Le Roux, J., "Hyperbolic Audio Source Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP49357.2023.10094943, May 2023, pp. 1-5.
    BibTeX TR2023-017 PDF Video Software
    • @inproceedings{Petermann2023may,
    • author = {Petermann, Darius and Wichern, Gordon and Subramanian, Aswin Shanmugam and Le Roux, Jonathan},
    • title = {Hyperbolic Audio Source Separation},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2023,
    • pages = {1--5},
    • month = may,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP49357.2023.10094943},
    • url = {https://www.merl.com/publications/TR2023-017}
    • }
  •  Tzinis, E., Wichern, G., Smaragdis, P., Le Roux, J., "Optimal Condition Training for Target Source Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP49357.2023.10095128, May 2023, pp. 1-5.
    BibTeX TR2023-018 PDF
    • @inproceedings{Tzinis2023may,
    • author = {Tzinis, Efthymios and Wichern, Gordon and Smaragdis, Paris and Le Roux, Jonathan},
    • title = {Optimal Condition Training for Target Source Separation},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2023,
    • pages = {1--5},
    • month = may,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP49357.2023.10095128},
    • url = {https://www.merl.com/publications/TR2023-018}
    • }
  •  Ulvog, A., Rapp, J., Koike-Akino, T., Mansour, H., Boufounos, P.T., Parsons, K., "Phase Unwrapping in Correlated Noise for FMCW LIDAR Depth Estimation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP49357.2023.10095456, May 2023, pp. 1-5.
    BibTeX TR2023-028 PDF Video Presentation
    • @inproceedings{Ulvog2023may,
    • author = {Ulvog, Alfred and Rapp, Joshua and Koike-Akino, Toshiaki and Mansour, Hassan and Boufounos, Petros T. and Parsons, Kieran},
    • title = {Phase Unwrapping in Correlated Noise for FMCW LIDAR Depth Estimation},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2023,
    • pages = {1--5},
    • month = may,
    • doi = {10.1109/ICASSP49357.2023.10095456},
    • isbn = {978-1-7281-6327-7},
    • url = {https://www.merl.com/publications/TR2023-028}
    • }
  •  Vaca-Rubio, C., Wang, P., Koike-Akino, T., Wang, Y., Boufounos, P.T., Popovski, P., "mmWave Wi-Fi Trajectory Estimation with Continuous-Time Neural Dynamic Learning", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP49357.2023.10096474, May 2023, pp. 1-5.
    BibTeX TR2023-033 PDF Video
    • @inproceedings{Vaca-Rubio2023may,
    • author = {Vaca-Rubio, Cristian and Wang, Pu and Koike-Akino, Toshiaki and Wang, Ye and Boufounos, Petros T. and Popovski, Petar},
    • title = {mmWave Wi-Fi Trajectory Estimation with Continuous-Time Neural Dynamic Learning},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2023,
    • pages = {1--5},
    • month = may,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP49357.2023.10096474},
    • isbn = {978-1-7281-6327-7},
    • url = {https://www.merl.com/publications/TR2023-033}
    • }
  •  Yen, H., Germain, F., Wichern, G., Le Roux, J., "Cold Diffusion for Speech Enhancement", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP49357.2023.10096064, May 2023, pp. 1-5.
    BibTeX TR2023-020 PDF
    • @inproceedings{Yen2023may,
    • author = {Yen, Hao and Germain, Francois and Wichern, Gordon and Le Roux, Jonathan},
    • title = {Cold Diffusion for Speech Enhancement},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2023,
    • pages = {1--5},
    • month = may,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP49357.2023.10096064},
    • url = {https://www.merl.com/publications/TR2023-020}
    • }
  •  Zhao, Q., Ma, Y., Boufounos, P.T., Nabi, S., Mansour, H., "Deep Born Operator Learning for Reflection Tomographic Imaging", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP49357.2023.10095494, May 2023, pp. 1-5.
    BibTeX TR2023-029 PDF Video
    • @inproceedings{Zhao2023may,
    • author = {Zhao, Qingqing and Ma, Yanting and Boufounos, Petros T. and Nabi, Saleh and Mansour, Hassan},
    • title = {Deep Born Operator Learning for Reflection Tomographic Imaging},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2023,
    • pages = {1--5},
    • month = may,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP49357.2023.10095494},
    • url = {https://www.merl.com/publications/TR2023-029}
    • }
  •  Wang, Z.-Q., Wichern, G., Watanabe, S., Le Roux, J., "STFT-Domain Neural Speech Enhancement with Very Low Algorithmic Latency", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/​TASLP.2022.3224285, Vol. 31, pp. 397-410, December 2022.
    BibTeX TR2022-166 PDF
    • @article{Wang2022dec2,
    • author = {Wang, Zhong-Qiu and Wichern, Gordon and Watanabe, Shinji and Le Roux, Jonathan},
    • title = {STFT-Domain Neural Speech Enhancement with Very Low Algorithmic Latency},
    • journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
    • year = 2022,
    • volume = 31,
    • pages = {397--410},
    • month = dec,
    • doi = {10.1109/TASLP.2022.3224285},
    • issn = {2329-9304},
    • url = {https://www.merl.com/publications/TR2022-166}
    • }
  •  Tzinis, E., Wichern, G., Subramanian, A.S., Smaragdis, P., Le Roux, J., "Heterogeneous Target Speech Separation", Interspeech, DOI: 10.21437/​Interspeech.2022-10717, September 2022, pp. 1796-1800.
    BibTeX TR2022-115 PDF Video Presentation
    • @inproceedings{Tzinis2022sep,
    • author = {Tzinis, Efthymios and Wichern, Gordon and Subramanian, Aswin Shanmugam and Smaragdis, Paris and Le Roux, Jonathan},
    • title = {Heterogeneous Target Speech Separation},
    • booktitle = {Interspeech},
    • year = 2022,
    • pages = {1796--1800},
    • month = sep,
    • doi = {10.21437/Interspeech.2022-10717},
    • url = {https://www.merl.com/publications/TR2022-115}
    • }
  •  Chang, X., Moritz, N., Hori, T., Watanabe, S., Le Roux, J., "Extended Graph Temporal Classification for Multi-Speaker End-to-End ASR", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP43922.2022.9747375, April 2022, pp. 7322-7326.
    BibTeX TR2022-021 PDF
    • @inproceedings{Chang2022apr,
    • author = {Chang, Xuankai and Moritz, Niko and Hori, Takaaki and Watanabe, Shinji and Le Roux, Jonathan},
    • title = {Extended Graph Temporal Classification for Multi-Speaker End-to-End ASR},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2022,
    • pages = {7322--7326},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP43922.2022.9747375},
    • url = {https://www.merl.com/publications/TR2022-021}
    • }
  •  Higuchi, Y., Moritz, N., Le Roux, J., Hori, T., "Advancing Momentum Pseudo-Labeling with Conformer and Initialization Strategy", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP43922.2022.9746275, April 2022, pp. 7672-7676.
    BibTeX TR2022-026 PDF
    • @inproceedings{Higuchi2022apr,
    • author = {Higuchi, Yosuke and Moritz, Niko and Le Roux, Jonathan and Hori, Takaaki},
    • title = {Advancing Momentum Pseudo-Labeling with Conformer and Initialization Strategy},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2022,
    • pages = {7672--7676},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP43922.2022.9746275},
    • url = {https://www.merl.com/publications/TR2022-026}
    • }
  •  Moritz, N., Hori, T., Watanabe, S., Le Roux, J., "Sequence Transduction with Graph-based Supervision", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP43922.2022.9747788, April 2022, pp. 7212-7216.
    BibTeX TR2022-024 PDF
    • @inproceedings{Moritz2022apr,
    • author = {Moritz, Niko and Hori, Takaaki and Watanabe, Shinji and Le Roux, Jonathan},
    • title = {Sequence Transduction with Graph-based Supervision},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2022,
    • pages = {7212--7216},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP43922.2022.9747788},
    • url = {https://www.merl.com/publications/TR2022-024}
    • }
  •  Peng, K.-C., "Iterative Self Knowledge Distillation -- From Pothole Classification To Fine-Grained And COVID Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), Gan, W.-S. and Ma, K. K., Eds., DOI: 10.1109/​ICASSP43922.2022.9746470, April 2022, pp. 3139-3143.
    BibTeX TR2022-020 PDF Video Presentation
    • @inproceedings{Peng2022apr,
    • author = {Peng, Kuan-Chuan},
    • title = {Iterative Self Knowledge Distillation -- From Pothole Classification To Fine-Grained And COVID Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2022,
    • editor = {Gan, W.-S. and Ma, K. K.},
    • pages = {3139--3143},
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP43922.2022.9746470},
    • issn = {1520-6149},
    • isbn = {978-1-6654-0541-6},
    • url = {https://www.merl.com/publications/TR2022-020}
    • }
  •  Petermann, D., Wichern, G., Wang, Z.-Q., Le Roux, J., "The Cocktail Fork Problem: Three-Stem Audio Separation for Real-World Soundtracks", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP43922.2022.9746005, April 2022, pp. 526-530.
    BibTeX TR2022-022 PDF Software
    • @inproceedings{Petermann2022apr,
    • author = {Petermann, Darius and Wichern, Gordon and Wang, Zhong-Qiu and Le Roux, Jonathan},
    • title = {The Cocktail Fork Problem: Three-Stem Audio Separation for Real-World Soundtracks},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2022,
    • pages = {526--530},
    • month = apr,
    • doi = {10.1109/ICASSP43922.2022.9746005},
    • url = {https://www.merl.com/publications/TR2022-022}
    • }
  •  Shah, A.P., Geng, S., Gao, P., Cherian, A., Hori, T., Marks, T.K., Le Roux, J., Hori, C., "Audio-Visual Scene-Aware Dialog and Reasoning Using Audio-Visual Transformers with Joint Student-Teacher Learning", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), April 2022, pp. 7732-7736.
    BibTeX TR2022-019 PDF
    • @inproceedings{Shah2022apr,
    • author = {Shah, Ankit Parag and Geng, Shijie and Gao, Peng and Cherian, Anoop and Hori, Takaaki and Marks, Tim K. and Le Roux, Jonathan and Hori, Chiori},
    • title = {Audio-Visual Scene-Aware Dialog and Reasoning Using Audio-Visual Transformers with Joint Student-Teacher Learning},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2022,
    • pages = {7732--7736},
    • month = apr,
    • publisher = {IEEE},
    • issn = {1520-6149},
    • isbn = {978-1-6654-0540-9},
    • url = {https://www.merl.com/publications/TR2022-019}
    • }
  •  Slizovskaia, O., Wichern, G., Wang, Z.-Q., Le Roux, J., "Locate This, Not That: Class-Conditioned Sound Event DOA Estimation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP43922.2022.9747604, April 2022, pp. 711-715.
    BibTeX TR2022-023 PDF
    • @inproceedings{Slizovskaia2022mar,
    • author = {Slizovskaia, Olga and Wichern, Gordon and Wang, Zhong-Qiu and Le Roux, Jonathan},
    • title = {Locate This, Not That: Class-Conditioned Sound Event DOA Estimation},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2022,
    • pages = {711--715},
    • month = apr,
    • doi = {10.1109/ICASSP43922.2022.9747604},
    • url = {https://www.merl.com/publications/TR2022-023}
    • }
  •  Yu, J., Wang, P., Koike-Akino, T., Orlik, P.V., "Multi-Modal Recurrent Fusion for Indoor Localization", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP43922.2022.9746071, April 2022.
    BibTeX TR2022-018 PDF
    • @inproceedings{Yu2022apr,
    • author = {Yu, Jianyuan and Wang, Pu and Koike-Akino, Toshiaki and Orlik, Philip V.},
    • title = {Multi-Modal Recurrent Fusion for Indoor Localization},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2022,
    • month = apr,
    • publisher = {IEEE},
    • doi = {10.1109/ICASSP43922.2022.9746071},
    • issn = {2379-190X},
    • isbn = {978-1-6654-0540-9},
    • url = {https://www.merl.com/publications/TR2022-018}
    • }
  •  Wang, Z.-Q., Wichern, G., Le Roux, J., "Convolutive Prediction for Monaural Speech Dereverberation and Noisy-Reverberant Speaker Separation", IEEE/ACM Transactions on Audio, Speech, and Language Processing, DOI: 10.1109/​TASLP.2021.3129363, Vol. 29, pp. 3476-3490, December 2021.
    BibTeX TR2021-144 PDF
    • @article{Wang2021dec,
    • author = {Wang, Zhong-Qiu and Wichern, Gordon and Le Roux, Jonathan},
    • title = {Convolutive Prediction for Monaural Speech Dereverberation and Noisy-Reverberant Speaker Separation},
    • journal = {IEEE/ACM Transactions on Audio, Speech, and Language Processing},
    • year = 2021,
    • volume = 29,
    • pages = {3476--3490},
    • month = dec,
    • doi = {10.1109/TASLP.2021.3129363},
    • url = {https://www.merl.com/publications/TR2021-144}
    • }
  •  Wang, Z.-Q., Wichern, G., Le Roux, J., "On The Compensation Between Magnitude and Phase in Speech Separation", IEEE Signal Processing Letters, DOI: 10.1109/​LSP.2021.3116502, Vol. 28, pp. 2018-2022, November 2021.
    BibTeX TR2021-137 PDF
    • @article{Wang2021nov2,
    • author = {Wang, Zhong-Qiu and Wichern, Gordon and Le Roux, Jonathan},
    • title = {On The Compensation Between Magnitude and Phase in Speech Separation},
    • journal = {IEEE Signal Processing Letters},
    • year = 2021,
    • volume = 28,
    • pages = {2018--2022},
    • month = nov,
    • doi = {10.1109/LSP.2021.3116502},
    • url = {https://www.merl.com/publications/TR2021-137}
    • }