Publications

Manilow, E., Wichern, G., Seetharaman, P., Le Roux, J., "Cutting Music Source Separation Some Slakh: A Dataset to Study the Impact of Training Data Quality and Quantity", IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA), DOI: 10.1109/WASPAA.2019.8937170, October 2019, pp. 45-49.
BibTeX TR2019-124 PDF
- @inproceedings{Manilow2019oct,
- author = {Manilow, Ethan and Wichern, Gordon and Seetharaman, Prem and Le Roux, Jonathan},
- title = {Cutting Music Source Separation Some Slakh: A Dataset to Study the Impact of Training Data Quality and Quantity},
- booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
- year = 2019,
- pages = {45--49},
- month = oct,
- doi = {10.1109/WASPAA.2019.8937170},
- issn = {1947-1629},
- isbn = {978-1-7281-1123-0},
- url = {https://www.merl.com/publications/TR2019-124}
- }
Wichern, G., McQuinn, E., Antognini, J., Flynn, M., Zhu, R., Crow, D., Manilow, E., Le Roux, J., "WHAM!: Extending Speech Separation to Noisy Environments", Interspeech, DOI: 10.21437/Interspeech.2019-2821, September 2019, pp. 1368-1372.
BibTeX TR2019-099 PDF
- @inproceedings{Wichern2019sep,
- author = {Wichern, Gordon and McQuinn, Emmett and Antognini, Joe and Flynn, Michael and Zhu, Richard and Crow, Dwight and Manilow, Ethan and Le Roux, Jonathan},
- title = {WHAM!: Extending Speech Separation to Noisy Environments},
- booktitle = {Interspeech},
- year = 2019,
- pages = {1368--1372},
- month = sep,
- doi = {10.21437/Interspeech.2019-2821},
- url = {https://www.merl.com/publications/TR2019-099}
- }
Aihara, R., Hanazawa, T., Okato, Y., Wichern, G., Le Roux, J., "Teacher-Student Deep Clustering For Low-Delay Channel Speech Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8682695, May 2019.
BibTeX TR2019-003 PDF
- @inproceedings{Aihara2019may,
- author = {Aihara, Ryo and Hanazawa, Toshiyuki and Okato, Yohei and Wichern, Gordon and Le Roux, Jonathan},
- title = {Teacher-Student Deep Clustering For Low-Delay Channel Speech Separation},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- month = may,
- doi = {10.1109/ICASSP.2019.8682695},
- url = {https://www.merl.com/publications/TR2019-003}
- }
Hori, C., Alamri, H., Wang, J., Wichern, G., Hori, T., Cherian, A., Marks, T.K., Cartillier, V., Lopes, R., Das, A., Essa, I., Batra, D., Parikh, D., "End-to-End Audio Visual Scene-Aware Dialog Using Multimodal Attention-Based Video Features", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8682583, May 2019.
BibTeX TR2019-016 PDF
- @inproceedings{Hori2019may2,
- author = {Hori, Chiori and Alamri, Huda and Wang, Jue and Wichern, Gordon and Hori, Takaaki and Cherian, Anoop and Marks, Tim K. and Cartillier, Vincent and Lopes, Raphael and Das, Abhishek and Essa, Irfan and Batra, Dhruv and Parikh, Devi},
- title = {End-to-End Audio Visual Scene-Aware Dialog Using Multimodal Attention-Based Video Features},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- month = may,
- doi = {10.1109/ICASSP.2019.8682583},
- url = {https://www.merl.com/publications/TR2019-016}
- }
Le Roux, J., Wichern, G., Watanabe, S., Sarroff, A., Hershey, J., "The Phasebook: Building Complex Masks via Discrete Representations for Source Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8682587, May 2019.
BibTeX TR2019-008 PDF
- @inproceedings{LeRoux2019may2,
- author = {Le Roux, Jonathan and Wichern, Gordon and Watanabe, Shinji and Sarroff, Andy and Hershey, John},
- title = {The Phasebook: Building Complex Masks via Discrete Representations for Source Separation},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- month = may,
- doi = {10.1109/ICASSP.2019.8682587},
- url = {https://www.merl.com/publications/TR2019-008}
- }
Seetharaman, P., Wichern, G., Le Roux, J., Pardo, B., "Bootstrapping Single-Channel Source Separation via Unsupervised Spatial Clustering on Stereo Mixtures", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8683198, May 2019.
BibTeX TR2019-014 PDF
- @inproceedings{Seetharaman2019may2,
- author = {Seetharaman, Prem and Wichern, Gordon and Le Roux, Jonathan and Pardo, Bryan},
- title = {Bootstrapping Single-Channel Source Separation via Unsupervised Spatial Clustering on Stereo Mixtures},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- month = may,
- doi = {10.1109/ICASSP.2019.8683198},
- url = {https://www.merl.com/publications/TR2019-014}
- }
Seetharaman, P., Wichern, G., Venkataramani, S., Le Roux, J., "Class-Conditional Embeddings for Music Source Separation", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2019.8683007, May 2019.
BibTeX TR2019-004 PDF
- @inproceedings{Seetharaman2019may,
- author = {Seetharaman, Prem and Wichern, Gordon and Venkataramani, Shrikant and Le Roux, Jonathan},
- title = {Class-Conditional Embeddings for Music Source Separation},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2019,
- month = may,
- doi = {10.1109/ICASSP.2019.8683007},
- url = {https://www.merl.com/publications/TR2019-004}
- }
Le Roux, J., Wichern, G., Watanabe, S., Sarroff, A., Hershey, J., "Phasebook and Friends: Leveraging discrete representations for source separation", IEEE Journal of Selected Topics in Signal Processing, DOI: 10.1109/JSTSP.2019.2904183, Vol. 13, No. 2, pp. 370-382, March 2019.
BibTeX TR2018-199 PDF
- @article{LeRoux2019mar,
- author = {Le Roux, Jonathan and Wichern, Gordon and Watanabe, Shinji and Sarroff, Andy and Hershey, John},
- title = {Phasebook and Friends: Leveraging discrete representations for source separation},
- journal = {IEEE Journal of Selected Topics in Signal Processing},
- year = 2019,
- volume = 13,
- number = 2,
- pages = {370--382},
- month = mar,
- doi = {10.1109/JSTSP.2019.2904183},
- url = {https://www.merl.com/publications/TR2018-199}
- }
Wichern, G., Le Roux, J., "Phase Reconstruction with Learned Time-Frequency Representations for Single-Channel Speech Separation", International Workshop on Acoustic Signal Enhancement (IWAENC), DOI: 10.1109/IWAENC.2018.8521243, September 2018.
BibTeX TR2018-146 PDF
- @inproceedings{Wichern2018sep,
- author = {Wichern, Gordon and Le Roux, Jonathan},
- title = {Phase Reconstruction with Learned Time-Frequency Representations for Single-Channel Speech Separation},
- booktitle = {International Workshop on Acoustic Signal Enhancement (IWAENC)},
- year = 2018,
- month = sep,
- doi = {10.1109/IWAENC.2018.8521243},
- url = {https://www.merl.com/publications/TR2018-146}
- }