Publications

175 / 3,734 publications found.


  •  Xiao, X., Watanabe, S., Erdogan, H., Mandel, M., Lu, L., Hershey, J., Seltzer, M., Chen, G., Zhang, Y., Yu, D., "Discriminative beamforming with phase aware neural networks for speech enhancement and recognition" in New Era for Robust Speech Recognition: Exploiting Deep Learning, Watanabe, S. and Delcroix, M. and Metze, F. and Hershey, J.R., Eds., chapter 4, Springer, July 9, 2018.
    BibTeX
    • @incollection{Xiao2018jul2,
    • author = {Xiao, Xiong and Watanabe, Shinji and Erdogan, Hakan and Mandel, Michael and Lu, Liang and Hershey, John and Seltzer, Mike and Chen, Guoguo and Zhang, Yu and Yu, Dong},
    • title = {Discriminative beamforming with phase aware neural networks for speech enhancement and recognition},
    • booktitle = {New Era for Robust Speech Recognition: Exploiting Deep Learning},
    • year = 2018,
    • editor = {Watanabe, S. and Delcroix, M. and Metze, F. and Hershey, J.R.},
    • chapter = 4,
    • month = jul,
    • publisher = {Springer}
    • }
  •  Liu, J., Guo, J., Orlik, P.V., Shibata, M., Nakahara, D., Mii, S., Takac, M., "Anomaly Detection in Manufacturing Systems Using Structured Neural Networks", IEEE World Congress on Intelligent Control and Automation, DOI: 10.1109/​WCICA.2018.8630692, July 2018, pp. 175-180.
    BibTeX TR2018-097 PDF
    • @inproceedings{Liu2018jul2,
    • author = {Liu, Jie and Guo, Jianlin and Orlik, Philip V. and Shibata, Masahiko and Nakahara, Daiki and Mii, Satoshi and Takac, Martin},
    • title = {Anomaly Detection in Manufacturing Systems Using Structured Neural Networks},
    • booktitle = {IEEE World Congress on Intelligent Control and Automation},
    • year = 2018,
    • pages = {175--180},
    • month = jul,
    • doi = {10.1109/WCICA.2018.8630692},
    • url = {https://www.merl.com/publications/TR2018-097}
    • }
  •  Koike-Akino, T., Millar, D.S., Parsons, K., Kojima, K., "Fiber Nonlinearity Equalization with Multi-Label Deep Learning Scalable to High-Order DP-QAM", Signal Processing in Photonic Communications (SPPCom), DOI: 10.1364/​SPPCOM.2018.SpM4G.1, July 2018.
    BibTeX TR2018-047 PDF
    • @inproceedings{Koike-Akino2018jul3,
    • author = {Koike-Akino, Toshiaki and Millar, David S. and Parsons, Kieran and Kojima, Keisuke},
    • title = {Fiber Nonlinearity Equalization with Multi-Label Deep Learning Scalable to High-Order DP-QAM},
    • booktitle = {Signal Processing in Photonic Communications (SPPCom)},
    • year = 2018,
    • month = jul,
    • doi = {10.1364/SPPCOM.2018.SpM4G.1},
    • url = {https://www.merl.com/publications/TR2018-047}
    • }
  •  Shen, Y., Feng, C., Yang, Y., Tian, D., "Mining Point Cloud Local Structures by Kernel Correlation and Graph Pooling", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), June 2018.
    BibTeX TR2018-041 PDF Software
    • @inproceedings{Shen2018jun,
    • author = {Shen, Yiru and Feng, Chen and Yang, Yaoqing and Tian, Dong},
    • title = {Mining Point Cloud Local Structures by Kernel Correlation and Graph Pooling},
    • booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
    • year = 2018,
    • month = jun,
    • url = {https://www.merl.com/publications/TR2018-041}
    • }
  •  Yang, Y., Feng, C., Shen, Y., Tian, D., "FoldingNet: Point Cloud Auto-encoder via Deep Grid Deformation", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), DOI: 10.1109/​CVPR.2018.00029, June 2018.
    BibTeX TR2018-042 PDF Video Software
    • @inproceedings{Yang2018jun,
    • author = {Yang, Yaoqing and Feng, Chen and Shen, Yiru and Tian, Dong},
    • title = {FoldingNet: Point Cloud Auto-encoder via Deep Grid Deformation},
    • booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
    • year = 2018,
    • month = jun,
    • doi = {10.1109/CVPR.2018.00029},
    • url = {https://www.merl.com/publications/TR2018-042}
    • }
  •  Fujihashi, T., Koike-Akino, T., Watanabe, T., Orlik, P.V., "Nonlinear Equalization with Deep Learning for Multi-Purpose Visual MIMO Communications", IEEE International Conference on Communications (ICC), DOI: 10.1109/​ICC.2018.8422544, May 2018.
    BibTeX TR2018-039 PDF
    • @inproceedings{Fujihashi2018may,
    • author = {Fujihashi, Takuya and Koike-Akino, Toshiaki and Watanabe, Takashi and Orlik, Philip V.},
    • title = {Nonlinear Equalization with Deep Learning for Multi-Purpose Visual MIMO Communications},
    • booktitle = {IEEE International Conference on Communications (ICC)},
    • year = 2018,
    • month = may,
    • doi = {10.1109/ICC.2018.8422544},
    • url = {https://www.merl.com/publications/TR2018-039}
    • }
  •  Ochiai, T., Watanabe, S., Katagiri, S., Hori, T., Hershey, J.R., "Speaker Adaptation for Multichannel End-to-End Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP.2018.8462161, April 2018, pp. 6707-6711.
    BibTeX TR2018-006 PDF
    • @inproceedings{Ochiai2018apr,
    • author = {Ochiai, Tsubasa and Watanabe, Shinji and Katagiri, Shigeru and Hori, Takaaki and Hershey, John R.},
    • title = {Speaker Adaptation for Multichannel End-to-End Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2018,
    • pages = {6707--6711},
    • month = apr,
    • doi = {10.1109/ICASSP.2018.8462161},
    • url = {https://www.merl.com/publications/TR2018-006}
    • }
  •  Settle, S., Le Roux, J., Hori, T., Watanabe, S., Hershey, J.R., "End-to-End Multi-Speaker Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP.2018.8461893, April 2018, pp. 4819-4823.
    BibTeX TR2018-001 PDF Video
    • @inproceedings{Settle2018apr,
    • author = {Settle, Shane and Le Roux, Jonathan and Hori, Takaaki and Watanabe, Shinji and Hershey, John R.},
    • title = {End-to-End Multi-Speaker Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2018,
    • pages = {4819--4823},
    • month = apr,
    • doi = {10.1109/ICASSP.2018.8461893},
    • url = {https://www.merl.com/publications/TR2018-001}
    • }
  •  Wang, Z.-Q., Le Roux, J., Hershey, J.R., "Alternative Objective Functions for Deep Clustering", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP.2018.8462507, April 2018, pp. 686-690.
    BibTeX TR2018-005 PDF
    • @inproceedings{Wang2018apr,
    • author = {Wang, Zhong-Qiu and Le Roux, Jonathan and Hershey, John R.},
    • title = {Alternative Objective Functions for Deep Clustering},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2018,
    • pages = {686--690},
    • month = apr,
    • doi = {10.1109/ICASSP.2018.8462507},
    • url = {https://www.merl.com/publications/TR2018-005}
    • }
  •  Wen, B., Kamilov, U., Liu, D., Mansour, H., Boufounos, P.T., "DeepCASD: An End-to-End Approach for Multi-Spectural Image Super-Resolution", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), DOI: 10.1109/​ICASSP.2018.8461795, April 2018, pp. 6503-6507.
    BibTeX TR2018-009 PDF
    • @inproceedings{Wen2018apr,
    • author = {Wen, Bihan and Kamilov, Ulugbek and Liu, Dehong and Mansour, Hassan and Boufounos, Petros T.},
    • title = {DeepCASD: An End-to-End Approach for Multi-Spectural Image Super-Resolution},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2018,
    • pages = {6503--6507},
    • month = apr,
    • doi = {10.1109/ICASSP.2018.8461795},
    • url = {https://www.merl.com/publications/TR2018-009}
    • }
  •  Ziming, Z., Brand, M., "Convergent Block Coordinate Descent for Training Tikhonov Regularized Deep Neural Networks", Advances in Neural Information Processing Systems (NIPS), December 2017.
    BibTeX TR2017-140 PDF
    • @inproceedings{Ziming2017dec,
    • author = {Ziming, Zhang and Brand, Matthew},
    • title = {Convergent Block Coordinate Descent for Training Tikhonov Regularized Deep Neural Networks},
    • booktitle = {Advances in Neural Information Processing Systems (NIPS)},
    • year = 2017,
    • month = dec,
    • url = {https://www.merl.com/publications/TR2017-140}
    • }
  •  Ochiai, T., Watanabe, S., Hori, T., Hershey, J.R., Xiao, X., "Unified Architecture for Multichannel End-to-End Speech Recognition with Neural Beamforming", IEEE Journal of Selected Topics in Signal Processing, DOI: 10.1109/​JSTSP.2017.2764276, Vol. 11, No. 8, pp. 1274-1288, October 2017.
    BibTeX TR2017-192 PDF
    • @article{Ochiai2017oct2,
    • author = {Ochiai, Tsubasa and Watanabe, Shinji and Hori, Takaaki and Hershey, John R. and Xiao, Xiong},
    • title = {Unified Architecture for Multichannel End-to-End Speech Recognition with Neural Beamforming},
    • journal = {IEEE Journal of Selected Topics in Signal Processing},
    • year = 2017,
    • volume = 11,
    • number = 8,
    • pages = {1274--1288},
    • month = oct,
    • doi = {10.1109/JSTSP.2017.2764276},
    • issn = {1941-0484},
    • url = {https://www.merl.com/publications/TR2017-192}
    • }
  •  Watanabe, S., Hori, T., Kim, S., Hershey, J.R., Hayashi, T., "Hybrid CTC/Attention Architecture for End-to-End Speech Recognition", IEEE Journal of Selected Topics in Signal Processing, DOI: 10.1109/​JSTSP.2017.2763455, Vol. 11, No. 8, pp. 1240-1253, October 2017.
    BibTeX TR2017-190 PDF Video
    • @article{Watanabe2017oct,
    • author = {Watanabe, Shinji and Hori, Takaaki and Kim, Suyoun and Hershey, John R. and Hayashi, Tomoki},
    • title = {Hybrid CTC/Attention Architecture for End-to-End Speech Recognition},
    • journal = {IEEE Journal of Selected Topics in Signal Processing},
    • year = 2017,
    • volume = 11,
    • number = 8,
    • pages = {1240--1253},
    • month = oct,
    • doi = {10.1109/JSTSP.2017.2763455},
    • issn = {1941-0484},
    • url = {https://www.merl.com/publications/TR2017-190}
    • }
  •  Lee, T.-Y., Patil, S., Ramalingam, S., Taguchi, Y., Benes, B., "Barcode: Global Binary Patterns for Fast Visual Inference", International Conference on 3D Vision, October 2017.
    BibTeX TR2017-145 PDF
    • @inproceedings{Patil2017oct,
    • author = {Lee, Teng-Yok and Patil, Sonali and Ramalingam, Srikumar and Taguchi, Yuichi and Benes, Bedrich},
    • title = {Barcode: Global Binary Patterns for Fast Visual Inference},
    • booktitle = {International Conference on 3D Vision},
    • year = 2017,
    • month = oct,
    • url = {https://www.merl.com/publications/TR2017-145}
    • }
  •  Hori, T., Watanabe, S., Zhang, Y., Chan, W., "Advances in Joint CTC-Attention based End-to-End Speech Recognition with a Deep CNN Encoder and RNN-LM", Interspeech, August 2017.
    BibTeX TR2017-132 PDF Video
    • @inproceedings{Hori2017aug,
    • author = {Hori, Takaaki and Watanabe, Shinji and Zhang, Yu and Chan, William},
    • title = {Advances in Joint CTC-Attention based End-to-End Speech Recognition with a Deep CNN Encoder and RNN-LM},
    • booktitle = {Interspeech},
    • year = 2017,
    • month = aug,
    • url = {https://www.merl.com/publications/TR2017-132}
    • }
  •  Feng, C., Liu, M.-Y., Kao, C.-C., Lee, T.-Y., "Deep Active Learning for Civil Infrastructure Defect Detection and Classification", International Workshop on Computing in Civil Engineering (IWCCE), June 2017.
    BibTeX TR2017-034 PDF
    • @inproceedings{Feng2017jun,
    • author = {Feng, Chen and Liu, Ming-Yu and Kao, Chieh-Chi and Lee, Teng-Yok},
    • title = {Deep Active Learning for Civil Infrastructure Defect Detection and Classification},
    • booktitle = {International Workshop on Computing in Civil Engineering (IWCCE)},
    • year = 2017,
    • month = jun,
    • url = {https://www.merl.com/publications/TR2017-034}
    • }
  •  Luo, Y., Chen, Z., Hershey, J.R., Le Roux, J., Mesgarani, N., "Deep Clustering and Conventional Networks for Music Separation: Strong Together", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
    BibTeX TR2017-010 PDF
    • @inproceedings{Luo2017mar,
    • author = {Luo, Yi and Chen, Zhuo and Hershey, John R. and Le Roux, Jonathan and Mesgarani, Nima},
    • title = {Deep Clustering and Conventional Networks for Music Separation: Strong Together},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2017,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2017-010}
    • }
  •  Meng, Z., Watanabe, S., Hershey, J.R., Erdogan, H., "Deep Long Short-Term Memory Adaptive Beamforming Networks for Multichannel Robust Speech Recognition", IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), March 2017.
    BibTeX TR2017-012 PDF
    • @inproceedings{Meng2017mar,
    • author = {Meng, Zhong and Watanabe, Shinji and Hershey, John R. and Erdogan, Hakan},
    • title = {Deep Long Short-Term Memory Adaptive Beamforming Networks for Multichannel Robust Speech Recognition},
    • booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    • year = 2017,
    • month = mar,
    • url = {https://www.merl.com/publications/TR2017-012}
    • }
  •  Hara, K., Liu, M.-Y., Tuzel, C.O., Farahmand, A.-M., "Attentional Network for Visual Object Detection", arXiv, January 2017.
    BibTeX arXiv
    • @article{Hara2017jan,
    • author = {Hara, Kota and Liu, Ming-Yu and Tuzel, C. Oncel and Farahmand, Amir-massoud},
    • title = {Attentional Network for Visual Object Detection},
    • journal = {arXiv},
    • year = 2017,
    • month = jan,
    • url = {https://arxiv.org/abs/1702.01478}
    • }
  •  Xiao, X., Watanabe, S., Chng, E.S., Li, H., "Beamforming Networks Using Spatial Covariance Features for Far-field Speech Recognition", Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)<br /> , DOI: 10.1109/​APSIPA.2016.7820724, December 2016.
    BibTeX TR2016-162 PDF
    • @inproceedings{Xiao2016dec,
    • author = {Xiao, Xiong and Watanabe, Shinji and Chng, Eng Siong and Li, Haizhou},
    • title = {Beamforming Networks Using Spatial Covariance Features for Far-field Speech Recognition},
    • booktitle = {Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)
      },
    • year = 2016,
    • month = dec,
    • doi = {10.1109/APSIPA.2016.7820724},
    • url = {https://www.merl.com/publications/TR2016-162}
    • }
  •  Matsumoto, W., Hagiwara, M., Boufounos, P.T., Fukushima, K., Mariyama, T., Xiongxin, Z., "A Deep Neural Network Architecture Using Dimensionality Reduction with Sparse Matrices", International Conference on Neural Information Processing (ICONIP), DOI: 10.1007/​978-3-319-46681-1_48, October 2016, vol. 9950, pp. 397-404.
    BibTeX TR2016-134 PDF
    • @inproceedings{Matsumoto2016oct,
    • author = {Matsumoto, Wataru and Hagiwara, Manabu and Boufounos, Petros T. and Fukushima, Kunihiko and Mariyama, Toshisada and Xiongxin, Zhao},
    • title = {A Deep Neural Network Architecture Using Dimensionality Reduction with Sparse Matrices},
    • booktitle = {International Conference on Neural Information Processing (ICONIP)},
    • year = 2016,
    • volume = 9950,
    • pages = {397--404},
    • month = oct,
    • doi = {10.1007/978-3-319-46681-1_48},
    • issn = {0302-9743},
    • isbn = {978-3-319-46681-1},
    • url = {https://www.merl.com/publications/TR2016-134}
    • }
  •  Delcroix, M., Watanabe, S., "Recent Advances in Distant Speech Recognition," Tech. Rep. TR2016-115, Interspeech Tutorials, September 2016.
    BibTeX TR2016-115 PDF
    • @techreport{Delcroix2016sep,
    • author = {Delcroix, Marc and Watanabe, Shinji},
    • title = {Recent Advances in Distant Speech Recognition},
    • booktitle = {Interspeech Tutorials},
    • institution = {Interspeech},
    • year = 2016,
    • month = sep,
    • url = {https://www.merl.com/publications/TR2016-115}
    • }
  •  Erdogan, H., Hershey, J.R., Watanabe, S., Mandel, M., Le Roux, J., "Improved MVDR beamforming using single-channel mask prediction networks", Interspeech, DOI: 10.21437/​Interspeech.2016-552, September 2016, pp. 1981-1985.
    BibTeX TR2016-072 PDF
    • @inproceedings{Erdogan2016sep,
    • author = {Erdogan, Hakan and Hershey, John R. and Watanabe, Shinji and Mandel, Michael and Le Roux, Jonathan},
    • title = {Improved MVDR beamforming using single-channel mask prediction networks},
    • booktitle = {Interspeech},
    • year = 2016,
    • pages = {1981--1985},
    • month = sep,
    • doi = {10.21437/Interspeech.2016-552},
    • url = {https://www.merl.com/publications/TR2016-072}
    • }
  •  Kamilov, U., Mansour, H., "Learning MMSE Optimal Thresholds for FISTA", International Traveling Workshop on Interactions Between Sparse Models and Technology (iTWIST), August 2016.
    BibTeX TR2016-111 PDF
    • @inproceedings{Kamilov2016aug,
    • author = {Kamilov, Ulugbek and Mansour, Hassan},
    • title = {Learning MMSE Optimal Thresholds for FISTA},
    • booktitle = {International Traveling Workshop on Interactions Between Sparse Models and Technology (iTWIST)},
    • year = 2016,
    • month = aug,
    • url = {https://www.merl.com/publications/TR2016-111}
    • }
  •  Vemulapalli, R., Tuzel, C.O., Liu, M.-Y., "Deep Gaussian Conditional Random Field Network: A Model-based Deep Network for Discriminative Denoising", IEEE Conference on Computer Vision and Pattern Recognition (CVPR), DOI: 10.1109/​CVPR.2016.351, June 2016, pp. 4801-4809.
    BibTeX TR2016-079 PDF
    • @inproceedings{Vemulapalli2016jun2,
    • author = {Vemulapalli, Raviteja and Tuzel, C. Oncel and Liu, Ming-Yu},
    • title = {Deep Gaussian Conditional Random Field Network: A Model-based Deep Network for Discriminative Denoising},
    • booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
    • year = 2016,
    • pages = {4801--4809},
    • month = jun,
    • doi = {10.1109/CVPR.2016.351},
    • url = {https://www.merl.com/publications/TR2016-079}
    • }