Publications

Hsiao, R., Ma, J., Hartmann, W., Karafiat, M., Grezl, F., Burget, L., Szoke, I., Cernocky, J., Watanabe, S., Chen, Z., Mallidi, S.H., Hermansky, H., Tsakalidis, S., Schwartz, R., "Robust Speech Recognition in Unknown Reverberant and Noisy Conditions", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/ARSU.2015.7404841, December 2015, pp. 533-538.
BibTeX TR2015-138 PDF
- @inproceedings{Hsiao2015dec,
- author = {Hsiao, R. and Ma, J. and Hartmann, W. and Karafiat, M. and Grezl, F. and Burget, L. and Szoke, I. and Cernocky, J. and Watanabe, S. and Chen, Z. and Mallidi, S.H. and Hermansky, H. and Tsakalidis, S. and Schwartz, R.},
- title = {{Robust Speech Recognition in Unknown Reverberant and Noisy Conditions}},
- booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
- year = 2015,
- pages = {533--538},
- month = dec,
- publisher = {IEEE},
- doi = {10.1109/ARSU.2015.7404841},
- url = {https://www.merl.com/publications/TR2015-138}
- }
Moriya, T., Shinozaki, T., Watanabe, S., Duh, K., "Automation of System Building for State-of-the-Art Large Vocabulary Speech Recognition Using Evolution Strategy", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/ASRU.2015.7404852, December 2015, pp. 610-616.
BibTeX TR2015-137 PDF
- @inproceedings{Moriya2015dec,
- author = {Moriya, T. and Shinozaki, T. and Watanabe, S. and Duh, K.},
- title = {{Automation of System Building for State-of-the-Art Large Vocabulary Speech Recognition Using Evolution Strategy}},
- booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
- year = 2015,
- pages = {610--616},
- month = dec,
- doi = {10.1109/ASRU.2015.7404852},
- url = {https://www.merl.com/publications/TR2015-137}
- }
Hori, T., Chen, Z., Erdogan, H., Hershey, J.R., Le Roux, J., Mitra, V., Watanabe, S., "The MERL/SRI System for the 3rd CHiME Challenge Using Beamforming, Robust Feature Extraction, and Advanced Speech Recognition", IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU), DOI: 10.1109/ASRU.2015.7404833, December 2015, pp. 475-481.
BibTeX TR2015-135 PDF
- @inproceedings{Hori2015dec2,
- author = {Hori, T. and Chen, Z. and Erdogan, H. and Hershey, J.R. and {Le Roux}, J. and Mitra, V. and Watanabe, S.},
- title = {{The MERL/SRI System for the 3rd CHiME Challenge Using Beamforming, Robust Feature Extraction, and Advanced Speech Recognition}},
- booktitle = {IEEE Workshop on Automatic Speech Recognition and Understanding (ASRU)},
- year = 2015,
- pages = {475--481},
- month = dec,
- publisher = {IEEE},
- doi = {10.1109/ASRU.2015.7404833},
- url = {https://www.merl.com/publications/TR2015-135}
- }
Hori, C.., Hori, T., Watanabe, S., Hershey, J.R., "Context Sensitive Spoken Language Understanding Using Role Dependent LSTM Layers", NIPS Workshop on Machine Learning for Spoken Language Understanding and Interaction, December 2015.
BibTeX TR2015-134 PDF
- @inproceedings{Hori2015dec1,
- author = {Hori, C. and Hori, T. and Watanabe, S. and Hershey, J.R.},
- title = {{Context Sensitive Spoken Language Understanding Using Role Dependent LSTM Layers}},
- booktitle = {NIPS Workshop on Machine Learning for Spoken Language Understanding and Interaction},
- year = 2015,
- month = dec,
- url = {https://www.merl.com/publications/TR2015-134}
- }
Fujihashi, T., Koike-Akino, T., Watanabe, T., Orlik, P.V., "Compressive Sensing for Loss-Resilient Hybrid Wireless Video Transmission", IEEE Global Communications Conference (GLOBECOM), DOI: 10.1109/GLOCOM.2015.7417536, December 2015, pp. 1-7.
BibTeX TR2015-132 PDF
- @inproceedings{Fujihashi2015dec,
- author = {Fujihashi, T. and Koike-Akino, T. and Watanabe, T. and Orlik, P.V.},
- title = {{Compressive Sensing for Loss-Resilient Hybrid Wireless Video Transmission}},
- booktitle = {IEEE Global Communications Conference (GLOBECOM)},
- year = 2015,
- pages = {1--7},
- month = dec,
- doi = {10.1109/GLOCOM.2015.7417536},
- url = {https://www.merl.com/publications/TR2015-132}
- }
Tawara, N., Ogawa, T., Watanabe, S., Nakamura, A., Kobayashi, T., "A Sampling-Based Speaker Clustering Using Utterance-Oriented Dirichlet Process Mixture Model and Its Evaluation on Large Scale Data", APSIPA Transactions on Signal and Information Processing, DOI: 10.1017/ATSIP.2015.19, Vol. 4, October 2015.
BibTeX TR2015-153 PDF
- @article{Tawara2015oct,
- author = {Tawara, N. and Ogawa, T. and Watanabe, S. and Nakamura, A. and Kobayashi, T.},
- title = {{A Sampling-Based Speaker Clustering Using Utterance-Oriented Dirichlet Process Mixture Model and Its Evaluation on Large Scale Data}},
- journal = {APSIPA Transactions on Signal and Information Processing},
- year = 2015,
- volume = 4,
- month = oct,
- doi = {10.1017/ATSIP.2015.19},
- issn = {2048-7703},
- url = {https://www.merl.com/publications/TR2015-153}
- }
Harsham, B.A., Watanabe, S., Esenther, A., Hershey, J.R., Le Roux, J., Luan, Y., Nikovski, D.N., Potluru, V.K., "Driver Prediction to Improve Interaction with In-Vehicle HMI", Workshop on DSP for In-Vehicle Systems and Safety (DSP), October 2015.
BibTeX TR2015-120 PDF
- @inproceedings{Harsham2015oct,
- author = {Harsham, B.A. and Watanabe, S. and Esenther, A. and Hershey, J.R. and {Le Roux}, J. and Luan, Y. and Nikovski, D.N. and Potluru, V.K.},
- title = {{Driver Prediction to Improve Interaction with In-Vehicle HMI}},
- booktitle = {Workshop on DSP for In-Vehicle Systems and Safety (DSP)},
- year = 2015,
- month = oct,
- url = {https://www.merl.com/publications/TR2015-120}
- }
Abdelaziz, A.H., Watanabe, S., Hershey, J.R., Vincent, E., Kolossa, D., "Uncertainty Propagation Through Deep Neural Networks", Interspeech, September 2015, vol. 1 or 5, pp. 3561.
BibTeX TR2015-098 PDF
- @inproceedings{Abdelaziz2015sep,
- author = {Abdelaziz, A.H. and Watanabe, S. and Hershey, J.R. and Vincent, E. and Kolossa, D.},
- title = {{Uncertainty Propagation Through Deep Neural Networks}},
- booktitle = {Interspeech},
- year = 2015,
- volume = {1 or 5},
- pages = 3561,
- month = sep,
- isbn = {978-1-5108-1790-6},
- url = {https://www.merl.com/publications/TR2015-098}
- }
Chen, Z., Watanabe, S., Erdogan, H., Hershey, J.R., "Speech Enhancement and Recognition Using Multi-Task Learning of Long Short-Term Memory Recurrent Neural Networks", Interspeech, September 2015, vol. 1 of 5, pp. 1278.
BibTeX TR2015-100 PDF
- @inproceedings{Chen2015sep,
- author = {Chen, Z. and Watanabe, S. and Erdogan, H. and Hershey, J.R.},
- title = {{Speech Enhancement and Recognition Using Multi-Task Learning of Long Short-Term Memory Recurrent Neural Networks}},
- booktitle = {Interspeech},
- year = 2015,
- volume = {1 of 5},
- pages = 1278,
- month = sep,
- isbn = {978-1-5108-1790-6},
- url = {https://www.merl.com/publications/TR2015-100}
- }
Luan, Y., Watanabe, S., Harsham, B.A., "Efficient Learning for Spoken Language Understanding Tasks with Word Embedding Based Pre-Training", Interspeech, September 2015, vol. 1 or 5, pp. 1398-1402.
BibTeX TR2015-097 PDF
- @inproceedings{Luan2015sep,
- author = {Luan, Y. and Watanabe, S. and Harsham, B.A.},
- title = {{Efficient Learning for Spoken Language Understanding Tasks with Word Embedding Based Pre-Training}},
- booktitle = {Interspeech},
- year = 2015,
- volume = {1 or 5},
- pages = {1398--1402},
- month = sep,
- isbn = {978-1-5108-1790-6},
- url = {https://www.merl.com/publications/TR2015-097}
- }
Tachioka, Y., Watanabe, S., "Uncertainty Training and Decoding Methods of Deep Neural Networks Based on Stochastic Representation of Enhanced Features", Interspeech, September 2015, vol. 1 or 5, pp. 3541.
BibTeX TR2015-099 PDF
- @inproceedings{Tachioka2015sep,
- author = {Tachioka, Y. and Watanabe, S.},
- title = {{Uncertainty Training and Decoding Methods of Deep Neural Networks Based on Stochastic Representation of Enhanced Features}},
- booktitle = {Interspeech},
- year = 2015,
- volume = {1 or 5},
- pages = 3541,
- month = sep,
- isbn = {978-1-5108-1790-6},
- url = {https://www.merl.com/publications/TR2015-099}
- }
Weninger, F.J., Erdogan, H., Watanabe, S., Vincent, E., Le Roux, J., Hershey, J.R., Schuller, B.W., "Speech Enhancement with LSTM Recurrent Neural Networks and Its Application to Noise-Robust ASR", Latent Variable Analysis and Signal Separation Conference (LVA), DOI: 10.1007/978-3-319-22482-4_11, August 2015, vol. 9237, pp. 91-99.
BibTeX TR2015-094 PDF
- @inproceedings{Weninger2015aug,
- author = {Weninger, F.J. and Erdogan, H. and Watanabe, S. and Vincent, E. and {Le Roux}, J. and Hershey, J.R. and Schuller, B.W.},
- title = {{Speech Enhancement with LSTM Recurrent Neural Networks and Its Application to Noise-Robust ASR}},
- booktitle = {Latent Variable Analysis and Signal Separation Conference (LVA)},
- year = 2015,
- volume = 9237,
- pages = {91--99},
- month = aug,
- doi = {10.1007/978-3-319-22482-4_11},
- isbn = {978-3-319-22482-4},
- url = {https://www.merl.com/publications/TR2015-094}
- }
Tachioka, Y., Narita, T., Watanabe, S., "Effectiveness of Dereverberation, Feature Transformation, Discriminative Training Methods, and System Combination Approach for Various Reverberant Environments", EURASIP Journal on Advances in Signal Processing, DOI: 10.1186/s13634-015-0241-y, June 2015.
BibTeX TR2015-152 PDF
- @article{Tachioka2015jun,
- author = {Tachioka, Y. and Narita, T. and Watanabe, S.},
- title = {{Effectiveness of Dereverberation, Feature Transformation, Discriminative Training Methods, and System Combination Approach for Various Reverberant Environments}},
- journal = {EURASIP Journal on Advances in Signal Processing},
- year = 2015,
- month = jun,
- doi = {10.1186/s13634-015-0241-y},
- url = {https://www.merl.com/publications/TR2015-152}
- }
Erdogan, H., Hershey, J.R., Watanabe, S., Le Roux, J., "Phase-Sensitive and Recognition-Boosted Speech Separation Using Deep Recurrent Neural Networks", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2015.7178061, April 2015, pp. 708-712.
BibTeX TR2015-031 PDF
- @inproceedings{Erdogan2015apr,
- author = {Erdogan, H. and Hershey, J.R. and Watanabe, S. and {Le Roux}, J.},
- title = {{Phase-Sensitive and Recognition-Boosted Speech Separation Using Deep Recurrent Neural Networks}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2015,
- pages = {708--712},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP.2015.7178061},
- url = {https://www.merl.com/publications/TR2015-031}
- }
Shinozaki, T., Watanabe, S., "Structure Discovery of Deep Neural Network Based on Evolutionary Algorithms", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2015.7178918, April 2015, pp. 4979-4983.
BibTeX TR2015-032 PDF
- @inproceedings{Shinozaki2015apr,
- author = {Shinozaki, T. and Watanabe, S.},
- title = {{Structure Discovery of Deep Neural Network Based on Evolutionary Algorithms}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2015,
- pages = {4979--4983},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP.2015.7178918},
- url = {https://www.merl.com/publications/TR2015-032}
- }
Tachioka, Y., Watanabe, S., "Discriminative Method for Recurrent Neural Network Language Models", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2015.7179000, April 2015, pp. 5386-5390.
BibTeX TR2015-033 PDF
- @inproceedings{Tachioka2015apr,
- author = {Tachioka, Y. and Watanabe, S.},
- title = {{Discriminative Method for Recurrent Neural Network Language Models}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2015,
- pages = {5386--5390},
- month = apr,
- publisher = {IEEE},
- doi = {10.1109/ICASSP.2015.7179000},
- url = {https://www.merl.com/publications/TR2015-033}
- }
Tachioka, Y., Watanabe, S., Le Roux, J., Hershey, J.R., "Sequential Maximum Mutual Information Linear Discriminant Analysis for Speech Recognition", Interspeech, September 2014, vol. 15, pp. 2415-2419.
BibTeX TR2014-079 PDF
- @inproceedings{Tachioka2014sep,
- author = {Tachioka, Y. and Watanabe, S. and {Le Roux}, J. and Hershey, J.R.},
- title = {{Sequential Maximum Mutual Information Linear Discriminant Analysis for Speech Recognition}},
- booktitle = {Interspeech},
- year = 2014,
- volume = 15,
- pages = {2415--2419},
- month = sep,
- publisher = {International Speech Communication Association},
- issn = {2308-457X},
- url = {https://www.merl.com/publications/TR2014-079}
- }
Watanabe, S., Hershey, J.R., Marks, T.K., Fujii, Y., Koji, Y., "Cost-level integration of statistical and rule-based dialog managers", Interspeech, September 2014, vol. 15, pp. 323-327.
BibTeX TR2014-082 PDF
- @inproceedings{Watanabe2014sep,
- author = {{{Watanabe, S. and Hershey, J.R. and Marks, T.K. and Fujii, Y. and Koji, Y.}}},
- title = {{{Cost-level integration of statistical and rule-based dialog managers}}},
- booktitle = {Interspeech},
- year = 2014,
- volume = 15,
- pages = {323--327},
- month = sep,
- publisher = {International Speech Communication Association},
- url = {https://www.merl.com/publications/TR2014-082}
- }
Weninger, F., Le Roux, J., Hershey, J.R., Watanabe, S., "Discriminative NMF and its application to single-channel source separation", Interspeech, September 2014, vol. 15, pp. 865-869.
BibTeX TR2014-081 PDF
- @inproceedings{Weninger2014sep,
- author = {Weninger, F. and {Le Roux}, J. and Hershey, J.R. and Watanabe, S.},
- title = {{Discriminative NMF and its application to single-channel source separation}},
- booktitle = {Interspeech},
- year = 2014,
- volume = 15,
- pages = {865--869},
- month = sep,
- publisher = {International Speech Communication Association},
- issn = {2308-457X},
- url = {https://www.merl.com/publications/TR2014-081}
- }
Tachioka, Y., Narita, T., Watanabe, S., Le Roux, J., "Ensemble Integration of Calibrated Speaker Localization and Statistical Speech Detection in Domestic Environments", Joint Workshop on Hands-free Speech Communication and Microphone Arrays (HSCMA), DOI: 10.1109/HSCMA.2014.6843272, May 2014, pp. 162-166.
BibTeX TR2014-034 PDF
- @inproceedings{Tachioka2014may2,
- author = {Tachioka, Y. and Narita, T. and Watanabe, S. and {Le Roux}, J.},
- title = {{Ensemble Integration of Calibrated Speaker Localization and Statistical Speech Detection in Domestic Environments}},
- booktitle = {Joint Workshop on Hands-free Speech Communication and Microphone Arrays (HSCMA)},
- year = 2014,
- pages = {162--166},
- month = may,
- publisher = {IEEE},
- doi = {10.1109/HSCMA.2014.6843272},
- url = {https://www.merl.com/publications/TR2014-034}
- }
Tachioka, Y., Narita, T., Weninger, F., Watanabe, S., "Dual system combination approach for various reverberant environments with dereverberation techniques", IEEE REVERB Workshop, May 2014.
BibTeX TR2014-032 PDF
- @inproceedings{Tachioka2014may,
- author = {Tachioka, Y. and Narita, T. and Weninger, F. and Watanabe, S.},
- title = {{Dual system combination approach for various reverberant environments with dereverberation techniques}},
- booktitle = {IEEE REVERB Workshop},
- year = 2014,
- month = may,
- url = {https://www.merl.com/publications/TR2014-032}
- }
Weninger, F., Watanabe, S., Le Roux, J., Hershey, J.R., Tachioka, Y., Geiger, J., Schuller, B., Rigoll, G., "The MERL/MELCO/TUM System for the REVERB Challenge Using Deep Recurrent Neural Network Feature Enhancement", IEEE REVERB Workshop, May 2014.
BibTeX TR2014-033 PDF
- @inproceedings{Weninger2014may2,
- author = {Weninger, F. and Watanabe, S. and {Le Roux}, J. and Hershey, J.R. and Tachioka, Y. and Geiger, J. and Schuller, B. and Rigoll, G.},
- title = {{The MERL/MELCO/TUM System for the REVERB Challenge Using Deep Recurrent Neural Network Feature Enhancement}},
- booktitle = {IEEE REVERB Workshop},
- year = 2014,
- month = may,
- url = {https://www.merl.com/publications/TR2014-033}
- }
Tang, H., Watanabe, S., Marks, T.K., Hershey, J.R., "Log-linear Dialog Manager", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2014.6854371, May 2014, pp. 4092-4096.
BibTeX TR2014-024 PDF
- @inproceedings{Tang2014may,
- author = {Tang, H. and Watanabe, S. and Marks, T.K. and Hershey, J.R.},
- title = {{Log-linear Dialog Manager}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2014,
- pages = {4092--4096},
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICASSP.2014.6854371},
- url = {https://www.merl.com/publications/TR2014-024}
- }
Watanabe, S., Le Roux, J., "Black Box Optimization for Automatic Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2014.6854202, May 2014, pp. 3256-3260.
BibTeX TR2014-021 PDF
- @inproceedings{Watanabe2014may,
- author = {Watanabe, S. and {Le Roux}, J.},
- title = {{Black Box Optimization for Automatic Speech Recognition}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2014,
- pages = {3256--3260},
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICASSP.2014.6854202},
- url = {https://www.merl.com/publications/TR2014-021}
- }
Weng, C., Yu, D., Watanabe, S., Juang, B.-H.F., "Recurrent Deep Neural Networks for Robust Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/ICASSP.2014.6854661, May 2014, pp. 5532-5536.
BibTeX TR2014-023 PDF
- @inproceedings{Weng2014may,
- author = {Weng, C. and Yu, D. and Watanabe, S. and Juang, B.-H.F.},
- title = {{Recurrent Deep Neural Networks for Robust Speech Recognition}},
- booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
- year = 2014,
- pages = {5532--5536},
- month = may,
- publisher = {IEEE},
- doi = {10.1109/ICASSP.2014.6854661},
- url = {https://www.merl.com/publications/TR2014-023}
- }