diff writeup/mlj_submission/ift6266_ml.bib @ 587:b1be957dd1be

Added mlj_submission to group every file needed for that.
author fsavard
date Thu, 30 Sep 2010 17:51:02 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/writeup/mlj_submission/ift6266_ml.bib	Thu Sep 30 17:51:02 2010 -0400
@@ -0,0 +1,25838 @@
+%%WARNING: READ THE README FILE BEFORE ANY MODIFICATION!!!
+
+
+%%submitted papers
+%%%
+
+@Article{Bergstra+Bengio+Louradoj-2008sub,
+  author =       "J. Bergstra and Y. Bengio and J. Louradour",
+  title =        "Suitability of Complex Cell Models for Object Categorization",
+  journal =      "Computational Neuroscience",
+  year =         "2008",
+  note =         "Rejected."
+}
+@Article{Bergstra+Bengio+Louradoj-2009sub,
+  author =       "J. Bergstra and Y. Bengio and J. Louradour",
+  title =        "Suitability of Complex Cell Models for Object Categorization",
+  journal =      "Neural Computation",
+  year =         "2009",
+  note =         "Submitted."
+}
+@Article{Chapados+Bengio-2008sub,
+  author =       "N. Chapados and Y. Bengio",
+  title =        "Forecasting and Trading Commodity Contract Spreads with {G}aussian Processes",
+  journal =      "International Journal of Forecasting",
+  year =         "2008",
+  note = "Submitted.",
+}
+@Article{Chapados+Bengio-2008sub2,
+  author =       "N. Chapados and Y. Bengio",
+  title =        "Training Graphs of Learning Modules for Sequential Data",
+  journal =      "ACM Transactions on Knowledge Discovery from Data",
+  year =         "2008",
+  note = "Submitted.",
+}
+
+%%%
+%%accepted or published papers
+%%%
+
+@Article{Grother,
+  author = "Grother Patrick J.",
+  title = "NIST special database. Handprinted forms and characters database",
+  publisher = "National institute of standards and technology",
+  year = "1995"
+}
+
+@InCollection{Trentin+al-2002,
+  author =       "E. Trentin and F. Brugnara and Y. Bengio and C. Furlanello and R.  De Mori",
+  editor =       "R. Daniloff",
+  booktitle =    "Connectionist Approaches to Clinical Problems in Speech
+and Language",
+  title =        "Statistical and Neural Network Models for Speech Recognition",
+  publisher =    "Lawrence Erlbaum",
+  pages =        "213--264",
+  year =         "2002",
+}
+
+@InCollection{Bengio+grandvalet-2004,
+  author =       "Y. Bengio and Y. Grandvalet",
+  editor =       "P. Duchesne and B. Remillard",
+  booktitle =    "Statistical Modeling and Analysis for Complex Data Problem",
+  title =        "Bias in Estimating the Variance of K-Fold Cross-Validation",
+  publisher =    "Lawrence Erlbaum",
+  address =      "Kluwer",
+  pages =        "75--95",
+  year =         "2004",
+}
+
+@InCollection{Dugas+al-2004,
+  author =       "C. Dugas and Y. Bengio and N. Chapados and P. Vincent and G. Denoncourt and C. Fournier",
+  editor =       "L. Jain and A.F. Shapiro",
+  booktitle =    "Intelligent and Other Computational Techniques in Insurance: Theory and
+Applications",
+  title =        "Statistical Learning Algorithms Applied to Automobile Insurance Ratemaking",
+  publisher =    "World Scientific Publishing Company",
+  year =         "2004",
+}
+
+@InCollection{Dugas+al-2004-short,
+  author =       "C. Dugas and Y. Bengio and N. Chapados and P. Vincent and G. Denoncourt and C. Fournier",
+  booktitle =    "Intelligent and Other Computational Techniques in Insurance: Theory and
+Applications",
+  title =        "Statistical Learning Algorithms Applied to Automobile Insurance Ratemaking",
+  publisher =    "World Scientific Publishing Company",
+  year =         "2004",
+}
+
+@inproceedings{Collobert+Bengio+Bengio-2002b,
+    author = "R. Collobert and Y. Bengio and S. Bengio",
+    title = {Scaling Large Learning Problems with Hard Parallel Mixtures},
+    editor = "S.W. Lee and A. Verri",
+    year = 2002,
+    booktitle = SVM02,
+    volume = "2388 of Lecture Notes in Computer Science",
+    publisher = "Springer-Verlag",
+    pages = "8--23",
+}
+
+@Article{Collobert+Bengio+Bengio-2003,
+  author =       "R. Collobert and Y. Bengio and S. Bengio.",
+  title =        "Scaling Large Learning Problems with Hard Parallel Mixtures",
+  journal =      ijprai,
+  volume =       "17",
+  number =       "3",
+  pages =        "349--365",
+  year =         "2003",
+}
+
+@Article{Collobert+Bengio+Bengio-2003-small,
+  author =       "R. Collobert and Y. Bengio and S. Bengio.",
+  title =        "Scaling Large Learning Problems with Hard Parallel Mixtures",
+  journal =      "Int. J. Pattern Recognition and Artificial Intelligence",
+  volume =       "17(3)",
+  pages =        "349--365",
+  year =         "2003",
+}
+
+@InProceedings{Bengio+Chapados-2002,
+  author =       "Y. Bengio and N. Chapados",
+  title =        "Metric-based Model Selection for Time-Series Forecasting",
+  publisher =    "IEEE Press",
+  editor =       NIPS12ed,
+  booktitle =    NIPS12,
+  year =         "2002",
+  pages = "13--24",
+}
+
+@InProceedings{Bengio+Takeuchi+Kanamori-2002,
+  author =       "Y. Bengio and I. Takeuchi and K. Kanamori",
+  title =        "The Challenge of Non-Linear Regression on Large Datasets with Asymmetric Heavy Tails",
+  publisher =    "American Statistical Association publ.",
+  booktitle =    JSM02,
+  year =         "2002",
+  pages = "193-205"
+}
+
+@InProceedings{Bengio+Takeuchi+Kanamori-2002-short,
+  author =       "Y. Bengio and I. Takeuchi and K. Kanamori",
+  title =        "The Challenge of Non-Linear Regression on Large Datasets with Asymmetric Heavy Tails",
+  booktitle =    JSM02,
+  year =         "2002",
+}
+
+@InProceedings{Collobert+Bengio+Bengio-2002,
+  author =       "R. Collobert ans S. Bengio and Y. Bengio",
+  title =        "A Parallel Mixture of {SVM}s for Very Large Scale Problems",
+  booktitle =    NIPS14,
+  editor =       NIPS14ed,
+  pages =        "633--640",
+  year =         "2002",
+}
+
+@InProceedings{Bhattacharya+Getoor+Bengio-2004,
+  author =       "I. Bhattacharya and L. Getoor and Y. Bengio",
+  booktitle =    "Conference of the Association for Computational Linguistics (ACL'04)",
+  title =        "Unsupervised Sense Disambiguation Using Bilingual Probabilistic Models",
+  year =         "2004",
+}
+@InProceedings{Boufaden+Bengio+Lapalme-2008,
+  author =       "N. Boufaden and Y. Bengio and G. Lapalme",
+  booktitle =    "{\em TALN'2004}, Traitement Automatique du Langage Naturel.",
+  title =        "Approche statistique pour le repérage de mots informatifs dans les textes oraux",
+  year =         "2004",
+}
+@InProceedings{Chapados+Bengio-2006,
+  author =       "N. Chapados and Y. Bengio",
+  booktitle =    AI06,
+  title =        "The K Best-Paths Approach to Approximate Dynamic Programming with Application to Portfolio Optimization",
+  pages =        "491-502",
+  year =         "2006",
+}
+@InProceedings{Rivest+Bengio+Kalaska-2005,
+  author =       "F. Rivest and Y. Bengio and J. Kalaska",
+  editor =       NIPS17ed,
+  booktitle =    NIPS17,
+  title =        "Brain Inspired Reinforcement Learning",
+  publisher =    "MIT Press, Cambridge",
+  address =      "Cambridge, MA",
+  pages =        "1129-1136",
+  year =         "2005",
+}
+
+@InProceedings{Bengio+Grandvalet-NIPS-2004,
+  author =       "Y. Bengio Y. and Y. Grandvalet",
+  editor =       NIPS16ed,
+  booktitle =    NIPS16,
+  title =        "No Unbiased Estimator of the Variance of K-Fold Cross-Validation",
+  publisher =    "MIT Press, Cambridge",
+  address =      "Cambridge, MA",
+  year =         "2004",
+}
+
+@InProceedings{Bengio+Grandvalet-NIPS-2004-short,
+  author =       "Y. Bengio Y. and Y. Grandvalet",
+  booktitle =    NIPS16,
+  title =        "No Unbiased Estimator of the Variance of K-Fold Cross-Validation",
+  publisher =    "MIT Press, Cambridge",
+  year =         "2004",
+}
+
+@article{Zaccaro-et-al-2005,
+ author = {Maria Clara Zaccaro and Hong Boon Lee and Mookda Pattarawarapan and 
+           Zebin Xia and Antoine Caron and Pierre-Jean L'Heureux and Yoshua Bengio
+           and Kevin Burgess and H. Uri Saragovi},
+ title = {Selective Small Molecule Peptidomimetic Ligands of {TrkC} and {TrkA} Receptors Afford Discrete or Complete Neurotrophic Activities},
+ journal = {Chemistry \& Biology},
+ volume = 12,
+ number = 9,
+ pages = {1015--1028},
+ year = 2005,
+}
+
+@Article{63a:man,
+  author =       "B. Mandelbrot",
+  title =        "The variation of certain speculative prices",
+  journal =      "Journal of Business",
+  volume =       "36",
+  pages =        "394--419",
+  year =         "1963",
+  annote =       "Référence pour les distributions stables en finance",
+}
+
+@Article{65a:fam,
+  author =       "E. F. Fama",
+  title =        "The behavior of stock market prices",
+  journal =      "Journal of Business",
+  volume =       "38",
+  pages =        "34--105",
+  year =         "1965",
+  annote =       "Autre référence pour les distributions stables en
+                 finance",
+}
+
+@Article{96a:cor:gon:har,
+  author =       "R. M. Corless and G. H. Gonnet and D. E. G. Hare and
+                 D. J. Jeffrey and D. E. Knuth",
+  title =        "On the {Lambert} {W} Function",
+  journal =      "Advances in Computational Mathematics",
+  volume =       "5",
+  pages =        "329--359",
+  year =         "1996",
+  annote =       "Sert à résoudre les équations où une variable et son
+                 logarithme (ou exponentielle) apparaissent
+                 simultanément",
+}
+
+@Book{97b:emb:klu:mik,
+  author =       "P. Embrechts and C. Kluppelberg and T. Mikosch",
+  title =        "Modelling Extremal Events",
+  publisher =    "Springer",
+  year =         "1997",
+  series =       "Applications of Mathematics, Stochastic Modelling and
+                 Applied Probability",
+  annote =       "book on evt: theory, statistical methods for gev",
+}
+
+@Article{99a:kan:ser,
+  author =       "S. Kang and R. F. Serfozo",
+  title =        "Extreme values of phase-type and mixed random
+                 variables with parallel-processing examples",
+  journal =      "Journal of Applied Probability",
+  volume =       "36",
+  pages =        "194--210",
+  year =         "1999",
+  annote =       "limiting distribution of the maximum of r.v. i.i.d
+                 from a mixture is determined by the component of the
+                 mixture that has a dominant tail",
+}
+
+@TechReport{Abdallah+Plumbley-06,
+  author =       "Samer Abdallah and Mark Plumbley",
+  title =        "Geometry Dependency Analysis",
+  number =       "C4DM-TR06-05",
+  institution =  "Center for Digital Music, Queen Mary, University of
+                 London",
+  year =         "2006",
+}
+
+@Article{Abe+Warmuth92,
+  author =       "N. Abe and M. K. Warmuth",
+  title =        "On the Computational Complexity of Approximating
+                 Distributions by Probabilistic Automata",
+  journal =      "Machine Learning",
+  volume =       "9",
+  month =        jul,
+  year =         "1992",
+}
+
+@Article{Abu-Mostafa-hints,
+  author =       "Y. S. Abu-Mostafa",
+  title =        "Learning from Hints in Neural Networks",
+  journal =      jcomp,
+  volume =       "6",
+  pages =        "192--198",
+  year =         "1990",
+}
+
+@Article{Abu-Mostafa87,
+  author =       "Y. S. Abu-Mostafa and D. Psaltis",
+  title =        "Optical Neural Computers",
+  journal =      sciam,
+  volume =       "256",
+  pages =        "88--95",
+  month =        mar,
+  year =         "1987",
+}
+
+@Article{Abu-Mostafa89,
+  author =       "Y. S. Abu-Mostafa",
+  title =        "The {Vapnik}-{Chervonenkis} Dimension: Information
+                 versus Complexity in Learning",
+  journal =      nc,
+  volume =       "1",
+  pages =        "312--317",
+  year =         "1989",
+}
+
+@Article{abumostafa95,
+  author =       "Yaser S. Abu-Mostafa",
+  title =        "Hints",
+  journal =      "Neural Computation",
+  volume =       "7",
+  number =       "4",
+  pages =        "639--671",
+  month =        jul,
+  year =         "1995",
+}
+
+@misc{Ackerman+BenDavid-2008,
+    author = "Margareta Ackerman and Shai Ben-David",
+    title = "Clustering Quality Measures",
+    year = 2008,
+    note = "{\em Snowbird Learning Workshop}",
+}
+
+@Article{Ackley85,
+  author =       "D. H. Ackley and G. E. Hinton and T. J. Sejnowski",
+  title =        "A Learning Algorithm for {Boltzmann} Machines",
+  journal =      cogsci,
+  volume =       "9",
+  pages =        "147--169",
+  year =         "1985",
+}
+
+@InProceedings{Ackley90,
+  author =       "D. H. Ackley and M. S. Littman",
+  editor =       NIPS2ed,
+  booktitle =    NIPS2,
+  title =        "Generalization and Scaling in Reinforcement Learning",
+  publisher =    "Morgan Kaufmann, San Mateo",
+  address =      "Denver, CO",
+  pages =        "550--557",
+  year =         "1990",
+}
+
+@Article{ACM:Rohwer94,
+  author =       "R. Rohwer",
+  title =        "The time dimension of neural network models",
+  journal =      "ACM Sigart Bulleting",
+  volume =       "5",
+  number =       "3",
+  pages =        "36--44",
+  month =        jul,
+  year =         "1994",
+}
+
+@article{AdelsonBergen1985,
+    author={E. H. Adelson and J. R. Bergen},
+    title={Spatiotemporal Energy Models for the Perception of Motion},
+    journal={Journal of the Optical Society of America},
+    volume=2,
+    number=2,
+    year=1985,
+    pages={284-99},
+}
+
+@Article{Agrawala70,
+  author = 	 {Ashok Kumar Agrawala},
+  title = 	 {Learning with a Probabilistic Teacher},
+  journal = 	 {IEEE Transactions on Information Theory},
+  year = 	 1970,
+  volume =	 16,
+  pages =	 {373-379}
+}
+
+@Article{Ahalt90,
+  author =       "S. C. Ahalt and A. K. Krishnamurthy and P. Chen and D.
+                 E. Melton",
+  title =        "Competitive Learning Algorithms for Vector
+                 Quantization",
+  journal =      nn,
+  volume =       "3",
+  pages =        "277--290",
+  year =         "1990",
+}
+
+@InProceedings{Ahmad93,
+  author =       "S. Ahmad and V. Tresp",
+  editor =       NIPS5ed,
+  booktitle =    NIPS5,
+  title =        "Some Solutions to the Missing Feature Problem in
+                 Vision",
+  publisher =    "Morgan Kaufman Publishers",
+  address =      "San Mateo, CA",
+  year =         "1993",
+}
+
+@inproceedings{Ahmed2008,
+ author = {Amr Ahmed and Kai Yu and Wei Xu and Yihong Gong and Eric P. Xing},
+ booktitle = {Proceedings of the 10th European Conference on Computer Vision (ECCV'08)},
+ title = {Training Hierarchical Feed-forward Visual Recognition Models Using Transfer Learning from Pseudo Tasks},
+ year = 2008,
+ pages = "69--82",
+}
+
+@article{AitchisonJ1976,
+	author = {John Aitchison and Colin Aitken},
+	journal = {Biometrika},
+	number = {3},
+	pages = {413--420},
+	title = {Multivariate binary discrimination by the kernel method},
+	volume = {63},
+	year = {1976}
+}
+
+@Article{Aizerman64,
+  author =       "Mark A. Aizerman and Emmanuel M. Braverman and Lev I.
+                 Rozonoer",
+  title =        "Theoretical Foundations of the Potential Function
+                 Method in Pattern Recognition Learning",
+  journal =      "Automation and Remote Control",
+  volume =       "25",
+  pages =        "821--837",
+  year =         "1964",
+}
+
+@Article{Ajtai83,
+  author =       "Miklos Ajtai",
+  title =        "$\sum_1^1$-formulae on finite structures",
+  journal =      "Annals of Pure and Applied Logic",
+  volume =       "24",
+  number =      "1",
+  pages =        "1--48",
+  year =         "1983",
+}
+
+@Article{Akaike74,
+  author =       "H. Akaike",
+  title =        "A New Look at the Statistical Model Identification",
+  journal =      ieeeac,
+  volume =       "AC-19",
+  number =       "6",
+  pages =        "716--728",
+  year =         "1974",
+}
+
+@Article{Al-Mashouq-hints,
+  author =       "K. A. Al-Mashouq and I. S. Reed",
+  title =        "Including Hints in Training Neural Nets",
+  journal =      nc,
+  volume =       "3",
+  number =       "4",
+  pages =        "418--430",
+  year =         "1991",
+}
+
+@Book{Aleksander:90,
+  author =       "I. Aleksander and H. Morton",
+  title =        "An Introduction to Neural Computing",
+  publisher =    "Chapman and Hall",
+  address =      "London",
+  year =         "1990",
+  keywords =     "",
+}
+
+@InProceedings{Aleksander:93,
+  author =       "I. Aleksander and H. Morton",
+  editor =       "J. Mira and J. Cabestany and A. Prieto",
+  booktitle =    "New Trends in Neural Computation: Proc. of the
+                 International Workshop on Artificial Neural Networks
+                 IWANN'93",
+  title =        "A Neural State Machine for Iconic Language
+                 Representation",
+  publisher =    "Springer",
+  address =      "Berlin, Heidelberg",
+  pages =        "84--89",
+  year =         "1993",
+  keywords =     "",
+}
+
+@InProceedings{Allender96,
+  author =       "Eric Allender",
+  booktitle =    "16th Annual Conference on Foundations of Software
+                 Technology and Theoretical Computer Science",
+  title =        "Circuit Complexity Before the Dawn of the New
+                 Millennium",
+  publisher =    "Lecture Notes in Computer Science 1180, Springer
+                 Verlag",
+  pages =        "1--18",
+  year =         "1996",
+}
+
+@InProceedings{Alleva93,
+  author =       "F. Alleva and X. Huang and M. Y. Hwang",
+  booktitle =    icassp,
+  title =        "An improved search algorithm using incremental
+                 knowledge for continuous speech recognition",
+  address =      "Minneapolis, Minnesota",
+  pages =        "307--310",
+  year =         "1993",
+}
+
+@Book{Allgower80,
+  author =       "E. L. Allgower and K. Georg",
+  title =        "Numerical Continuation Methods. {A}n Introduction",
+  number =       "13",
+  publisher =    "Springer-Verlag",
+  year =         "1980",
+  series =       "Springer Series in Computational Mathematics",
+}
+
+@Book{Allgower80-short,
+  author =       "E. L. Allgower and K. Georg",
+  title =        "Numerical Continuation Methods. {A}n Introduction",
+  publisher =    "Springer-Verlag",
+  year =         "1980",
+}
+
+@InProceedings{Almeida87,
+  author =       "L. B. Almeida",
+  editor =       "M. Caudill and C. Butler",
+  booktitle =    icnn,
+  title =        "A Learning Rule for Asynchronous Perceptrons with
+                 Feedback in a Combinatorial Environment",
+  volume =       "2",
+  publisher =    "IEEE, New York",
+  address =      "San Diego 1987",
+  pages =        "609--618",
+  year =         "1987",
+}
+
+@InProceedings{Almeida88,
+  author =       "L. B. Almeida",
+  editor =       "R. Eckmiller and Ch. von der Malsburg",
+  booktitle =    "Neural Computers",
+  title =        "Backpropagation in Perceptrons with Feedback",
+  publisher =    "Springer-Verlag, Berlin",
+  address =      "Neuss 1987",
+  pages =        "199--208",
+  year =         "1988",
+}
+
+@inproceedings{Almuallim+Dietterich-1991,
+    address = {Anaheim, California},
+    author = {Almuallim, H.  and Dietterich, T. G.},
+    booktitle = {Proceedings of the Ninth National Conference on Artificial Intelligence},
+    pages = {547--552},
+    publisher = {AAAI Press},
+    title = {Learning with many irrelevant features},
+    url = "http://citeseer.ist.psu.edu/almuallim91learning.html",
+    volume = {2},
+    year = {1991}
+}
+
+@article{Almuallim+Dietterich-1994,
+    author = "Hussein Almuallim and Thomas G. Dietterich",
+    title = "Learning Boolean Concepts in the Presence of Many Irrelevant Features",
+    journal = "Artificial Intelligence",
+    volume = "69",
+    number = "1-2",
+    pages = "279-305",
+    year = "1994",
+    url = "citeseer.ist.psu.edu/almuallim94learning.html"
+}
+
+
+@InProceedings{Alspector87,
+  author =       "J. Alspector and R. B. Allen",
+  editor =       "P. Losleben",
+  booktitle =    "Advanced Research in VLSI: Proceedings of the 1987
+                 Stanford Conference",
+  title =        "A Neuromorphic {VLSI} Learning System",
+  publisher =    "MIT Press, Cambridge",
+  pages =        "313--349",
+  year =         "1987",
+}
+
+@InProceedings{Alspector88,
+  author =       "J. Alspector and R. B. Allen and V. Hu and S.
+                 Satyanarayana",
+  editor =       nips87ed,
+  booktitle =    nips87,
+  title =        "Stochastic Learning Networks and Their Electronic
+                 Implementation",
+  publisher =    "American Institute of Physics, New York",
+  address =      "Denver, CO",
+  pages =        "9--21",
+  year =         "1988",
+}
+
+@Article{Amari+Wu-99,
+  author =       "S. Amari and S. Wu",
+  title =        "Improving {Support} {Vector} {Machine} classifiers by
+                 modifying kernel functions",
+  journal =      "Neural Networks",
+  volume =       "12",
+  pages =        "783--789",
+  year =         "1999",
+}
+
+@Article{amari00adaptive,
+  author =       "{Shun-ichi} Amari and Hyeyoung Park and Kenji Fukumizu",
+  title =        "Adaptive Method of Realizing Natural Gradient Learning
+                 for Multilayer Perceptrons",
+  journal =      "Neural Computation",
+  volume =       "12",
+  number =       "6",
+  pages =        "1399--1409",
+  year =         "2000",
+  URL =          "citeseer.ist.psu.edu/amari98adaptive.html",
+}
+
+@Article{Amari77,
+  author =       "S. A. Amari",
+  title =        "Dynamics of Pattern Formation in Lateral-Inhibition
+                 Type Neural Fields",
+  journal =      biocyb,
+  volume =       "27",
+  pages =        "77--87",
+  year =         "1977",
+}
+
+@Article{Amari80,
+  author =       "S. A. Amari",
+  title =        "Topographic Organization of Nerve Fields",
+  journal =      bmbiol,
+  volume =       "42",
+  pages =        "339--364",
+  year =         "1980",
+}
+
+@Article{amari98natural,
+  author =       "{Shun-ichi} Amari",
+  title =        "Natural Gradient Works Efficiently in Learning",
+  journal =      "Neural Computation",
+  volume =       "10",
+  number =       "2",
+  pages =        "251--276",
+  year =         "1998",
+  URL =          "citeseer.ist.psu.edu/article/amari98natural.html",
+}
+
+@Article{Amari99,
+  author =       "S. Amari and S. Wu",
+  title =        "Improving Support Vector Machine Classifiers by
+                 Modifying Kernel Functions",
+  journal =      "Neural Networks",
+  volume =       "12",
+  number =       "6",
+  pages =        "783--789",
+  year =         "1999",
+}
+
+@article{AmariS1997,
+	author = {{Shun-ichi} Amari and Noboru Murata and Klaus-Robert M{\"u}ller and Michael Finke  and Howard Hua Yang },
+	journal = {IEEE Transactions on Neural Networks},
+	keywords = {regularization},
+	number = {5},
+	pages = {985--996},
+	title = {Asymptotic statistical theory of overtraining and cross-validation},
+	volume = {8},
+	year = {1997}
+}
+
+@InProceedings{amaya01improvement,
+  author =       "Fredy A. Amaya and Jose-Miguel Bened\`{i}",
+  booktitle =    "Meeting of the Association for Computational
+                 Linguistics",
+  title =        "Improvement of a Whole Sentence Maximum Entropy
+                 Language Model Using Grammatical Features",
+  pages =        "10--17",
+  year =         "2001",
+  URL =          "citeseer.nj.nec.com/505752.html",
+}
+
+@InProceedings{BoufadenLapalmeBengio2001,
+  author =       "N. Boufaden and Lapalme G. and Bengio Y.",
+  booktitle =    "Proceedings of the Natural Language Pacific Rim Symposium, NLPRS-01",
+  title =        "Topic segmentation: First Stage of Dialogue-Based Information extraction Process",
+  year =         "2001",
+}
+
+@Article{Amit85a,
+  author =       "D. Amit and H. Gutfreund and H. Sompolinsky",
+  title =        "Spin-Glass Models of Neural Networks",
+  journal =      prA,
+  volume =       "32",
+  pages =        "1007--1018",
+  year =         "1985",
+}
+
+@Article{Amit85b,
+  author =       "D. Amit and H. Gutfreund and H. Sompolinsky",
+  title =        "Storing Infinite Numbers of Patterns in a Spin-Glass
+                 Model of Neural Networks",
+  journal =      prl,
+  volume =       "55",
+  pages =        "1530--1533",
+  year =         "1985",
+}
+
+@Article{Amit87a,
+  author =       "D. Amit and H. Gutfreund and H. Sompolinsky",
+  title =        "Statistical Mechanics of Neural Networks Near
+                 Saturation",
+  journal =      annphys,
+  volume =       "173",
+  pages =        "30--67",
+  year =         "1987",
+}
+
+@Article{Amit87b,
+  author =       "D. Amit and H. Gutfreund and H. Sompolinsky",
+  title =        "Information Storage in Neural Networks with Low Levels
+                 of Activity",
+  journal =      prA,
+  volume =       "35",
+  pages =        "2293--2303",
+  year =         "1987",
+}
+
+@Article{Amit88,
+  author =       "D. Amit",
+  title =        "Neural Networks for Counting Chimes",
+  journal =      PNAS,
+  volume =       "85",
+  pages =        "2141--2145",
+  year =         "1988",
+}
+
+@Book{Amit89,
+  author =       "D. Amit",
+  title =        "Modelling Brain Function",
+  publisher =    "Cambridge University Press",
+  address =      "Cambridge",
+  year =         "1989",
+}
+
+@Article{Ammar+Miao-2000,
+  author =       "Hany H. Ammar and Zhouhui Miao",
+  title =        "Parallel Algorithms for the Training Process of a
+                 Neural Network-Based System",
+  journal =      "International Journal of High Performance Computing
+                 Applications",
+  volume =       "14",
+  number =       "1",
+  pages =        "3--25",
+  year =         "2000",
+  URL =          "http://hpc.sagepub.com/cgi/content/abstract/14/1/3",
+  doi =          "10.1177/109434200001400101",
+  eprint =       "http://hpc.sagepub.com/cgi/reprint/14/1/3.pdf",
+}
+
+@Book{Anderson,
+  author =       "T. Anderson",
+  title =        "An Introduction to Multivariate Statistical
+                 Analysis.",
+  publisher =    "John Wiley and Sons",
+  address =      "New York",
+  year =         "1984",
+}
+
+@Article{Anderson68,
+  author =       "J. A. Anderson",
+  title =        "A Memory Model Using Spatial Correlation Functions",
+  journal =      kyb,
+  volume =       "5",
+  pages =        "113--119",
+  year =         "1968",
+}
+
+@Article{Anderson70,
+  author =       "J. A. Anderson",
+  title =        "Two Models for Memory Organization",
+  journal =      mbio,
+  volume =       "8",
+  pages =        "137--160",
+  year =         "1970",
+}
+
+@book{Hinton+Anderson-81,
+ author = {G.E. Hinton and J.A. Anderson},
+ title = {Parallel models of associative memory},
+ publisher = {Lawrence Erlbaum Assoc.},
+ address = {Hillsdale, NJ},
+ year = 1981,
+}
+
+@InCollection{Anderson81,
+  author =       "J. A. Anderson and M. C. Mozer",
+  editor =       "G. E. Hinton and J. A. Anderson",
+  booktitle =    "Parallel Models of Associative Memory",
+  title =        "Categorization and Selective Neurons",
+  publisher =    "Lawrence Erlbaum",
+  address =      "Hillsdale",
+  pages =        "213--236",
+  year =         "1981",
+}
+
+@Article{Anderson86,
+  author =       "D. Z. Anderson",
+  title =        "Coherent Optical Eigenstate Memory",
+  journal =      optlett,
+  volume =       "11",
+  pages =        "56--58",
+  year =         "1986",
+}
+
+@Article{Anderson87,
+  author =       "C. H. Anderson and D. C. Van Essen",
+  title =        "Shifter Circuits: {A} Computational Strategy for
+                 Dynamic Aspects of Visual Processing",
+  journal =      PNAS,
+  volume =       "84",
+  pages =        "6297--6301",
+  year =         "1987",
+}
+
+@Book{Anderson88,
+  editor =       "J. A. Anderson and E. Rosenfeld",
+  title =        "Neurocomputing: Foundations of Research",
+  publisher =    "MIT Press",
+  address =      "Cambridge",
+  year =         "1988",
+}
+
+@InProceedings{Anderson89,
+  author =       "S. Anderson and J. W. L. Merrill and R. Port",
+  editor =       "D. Touretzky and G. Hinton and T. Sejnowski",
+  booktitle =    cmss88,
+  title =        "Dynamic Speech Categorization with Recurrent
+                 Networks",
+  publisher =    "Morgan Kaufmann, San Mateo",
+  address =      "Pittsburg 1988",
+  pages =        "398--406",
+  year =         "1989",
+}
+
+@Article{Ando+Zhange-JMLR-2005,
+  author =       "Rie Kubota Ando and Tong Zhang",
+  title =        "A Framework for Learning Predictive Structures from
+                 Multiple Tasks and Unlabeled Data",
+  journal =      jmlr,
+  volume =       "6",
+  pages =        "1817--1853",
+  year =         "2005",
+}
+
+@Article{Andrieu03,
+  author =       "Christophe Andrieu and Nando de Freitas and Arnaud
+                 Doucet and Michael I. Jordan",
+  title =        "An Introduction to {MCMC} for Machine Learning",
+  journal =      "Machine Learning",
+  volume =       "50",
+  number =       "1-2",
+  pages =        "5--43",
+  year =         "2003",
+}
+
+@Article{Andrieu2003,
+  author =       "C. Andrieu and N. de Freitas and A. Doucet and M.
+                 Jordan",
+  title =        "An introduction to {MCMC} for machine learning",
+  journal =      "Machine Learning",
+  volume =       "50",
+  pages =        "5--43",
+  year =         "2003",
+}
+
+@Article{Angeniol88,
+  author =       "B. Ang\'eniol and G. de La Croix Vaubois and J.-Y. Le
+                 Texier",
+  title =        "Self-Organizing Feature Maps and the Travelling
+                 Salesman Problem",
+  journal =      nn,
+  volume =       "1",
+  pages =        "289--293",
+  year =         "1988",
+}
+
+@Article{Angluin83,
+  author =       "D. Angluin and C. Smith",
+  title =        "Inductive Inference: Theory and Methods",
+  journal =      "Computing Surveys",
+  volume =       "15",
+  number =       "3",
+  pages =        "237--269",
+  year =         "1983",
+}
+
+@Book{Arbib87,
+  author =       "M. A. Arbib",
+  title =        "Brains, Machines, and Mathematics",
+  publisher =    "Springer-Verlag",
+  address =      "Berlin",
+  year =         "1987",
+}
+
+@Book{ARP94,
+  author =       "{Advanced Research Projects Agency}",
+  title =        "Proceedings of the 1994 {ARPA} Human Language
+                 Technology Workshop (Princeton, New Jersey, March
+                 1994)",
+  publisher =    "Morgan Kaufmann",
+  year =         "1994",
+}
+
+@Misc{Asuncion+Newman:2007,
+  author =       "A. Asuncion and D. J. Newman",
+  title =        "{UCI} Machine Learning Repository",
+  institution =  "University of California, Irvine, School of
+                 Information and Computer Sciences",
+  year =         "2007",
+  URL =          "http://www.ics.uci.edu/$\sim$mlearn/MLRepository.html",
+}
+
+@article{ashetal04,
+author = "Ash, J. and Berg, M. and Coiera, E.",
+title = "Some unintended consequences of 
+information technology in health care: the nature of patient care 
+information system-related errors",
+journal = "J Am Med Inform Assoc",
+volume = "11",
+number = 2,
+pages = "104-112",
+year = 2004,
+}
+
+@article{ashetal07,
+author = "Ash, J. and Sittig, D. and Dykstra, R. and Guappone, K. and 
+Carpenter, J. and Seshadri, V.",
+title = "Categorizing the unintended sociotechnical consequences of 
+computerized provider order entry",
+journal = "Int J Med Inform",
+volume = 76,
+number = "Suppl1",
+pages = "21-27",
+year = 2007,
+}
+
+@InProceedings{Atal83,
+  author =       "B. S. Atal",
+  booktitle =    icassp,
+  title =        "Efficient coding of {LPC} parameters by temporal
+                 decomposition",
+  address =      "Boston, MA",
+  pages =        "81--84",
+  year =         "1983",
+}
+
+@PhdThesis{Athaide95,
+  author =       "C. R. Athaide",
+  title =        "Likelihood estimation and state estimation for
+                 nonlinear state space models",
+  school =       "Graduate Group in Managerial Science and Applied
+                 Economics, University of Pennsylvania",
+  address =      "Philadelphia, PA",
+  year =         "1995",
+}
+
+@Book{Atherton-75,
+  author =       "D. P. Atherton",
+  title =        "Nonlinear Control Engineering",
+  publisher =    "Van Nostrand Reinhold",
+  address =      "Wokingam (England)",
+  year =         "1975",
+}
+
+@Article{atkeson96locally,
+  author =       "C. G. Atkeson and A. W. Moore and S. Schaal",
+  title =        "Locally Weighted Learning for Control",
+  journal =      "Artificial Intelligence Review",
+  volume =       "11",
+  pages =        "75--113",
+  year =         "1997",
+}
+
+@InProceedings{Aubert94,
+  author =       "X. Aubert and C. Dugast and H. Ney and V. Steinbiss",
+  booktitle =    icassp,
+  title =        "Large vocabulary continuous speech recognition of
+                 {Wall} {Street} journal data",
+  address =      "Adelaide, Australia",
+  pages =        "129--132",
+  year =         "1994",
+}
+
+@InProceedings{Auer-96,
+  author =       "Peter Auer and Mark Herbster and Manfred K. Warmuth",
+  editor =       NIPS8ed,
+  booktitle =    NIPS8,
+  title =        "Exponentially Many Local Minima for Single Neurons",
+  publisher =    "MIT Press, Cambridge, MA",
+  pages =        "315--322",
+  year =         "1996",
+}
+
+@InProceedings{auer97,
+  author =       "Peter Auer",
+  booktitle =    "Proc. 14th International Conference on Machine
+                 Learning",
+  title =        "On learning from multi-instance examples: Empirical
+                 evaluation of a theoretical approach",
+  publisher =    "Morgan Kaufmann",
+  pages =        "21--29",
+  year =         "1997",
+}
+
+@InProceedings{b-cdmvqfa-97,
+  author =       "Jonathan Baxter",
+  booktitle =    "Proc. 14th International Conference on Machine
+                 Learning",
+  title =        "The canonical distortion measure for vector
+                 quantization and function approximation",
+  publisher =    "Morgan Kaufmann",
+  pages =        "39--47",
+  year =         "1997",
+}
+
+@InCollection{Bach-2007,
+  author =       "Francis Bach",
+  editor =       NIPS19ed,
+  booktitle =    NIPS19,
+  title =        "Active learning for misspecified generalized linear
+                 models",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  pages =        "",
+  year =         "2007",
+}
+
+@Article{Bachmann87,
+  author =       "C. M. Bachmann and L. N. Cooper and A. Dembo and O.
+                 Zeitouni",
+  title =        "A Relaxation Model for Memory with High Storage
+                 Density",
+  journal =      PNAS,
+  volume =       "84",
+  pages =        "7529--7531",
+  year =         "1987",
+}
+
+@MastersThesis{Bachrach88,
+  author =       "J. Bachrach",
+  title =        "Learning to Represent State",
+  school =       "University of Massachusetts",
+  address =      "Amherst",
+  year =         "1988",
+}
+
+@Article{Back-nc91,
+  author =       "A. D. Back and A. C. Tsoi",
+  title =        "{FIR} and {IIR} Synapses: {A} New Neural Network
+                 Architecture for Time Series Modeling",
+  journal =      nc,
+  volume =       "3",
+  number =       "3",
+  pages =        "375--385",
+  year =         "1991",
+}
+
+@InCollection{Bahadur61,
+  author =       "R. R. Bahadur",
+  editor =       "H. Solomon",
+  booktitle =    "Studies in Item Analysis and Predictdion",
+  title =        "A representation of the joint distribution of
+                 responses to n dichotomous items",
+  publisher =    "Stanford University Press, California",
+  pages =        "158--168",
+  year =         "1961",
+}
+
+@InProceedings{bahl77,
+  author =       "L. R. Bahl and J. K. Baker and R. L. Mercer",
+  booktitle =    "94th Meeting of the Acoustical Society of America",
+  title =        "Perplexity: a measure of difficulty of speech
+                 recognition tasks",
+  address =      "Miami",
+  month =        dec,
+  year =         "1977",
+}
+
+@Article{Bahl83,
+  author =       "L. R. Bahl and F. Jelinek and R. L. Mercer",
+  title =        "A Maximum Likelihood Approach to Continuous Speech
+                 Recognition",
+  journal =      ieeetpami,
+  volume =       "5",
+  number =       "2",
+  pages =        "179--190",
+  month =        mar,
+  year =         "1983",
+}
+
+@InProceedings{Bahl86,
+  author =       "Lalit Bahl and Peter Brown and Peter {deSouza} and Robert Mercer",
+  booktitle =    icassp,
+  title =        "Maximum mutual information estimation of hidden Markov
+                 parameters for speech recognition",
+  address =      "Tokyo, Japan",
+  pages =        "49--52",
+  year =         "1986",
+}
+
+@Article{Bahl87,
+  author =       "L. R. Bahl and P. Brown and P. V. {de Souza} and R. L.
+                 Mercer",
+  title =        "Speech recognition with continuous-parameter hidden
+                 {Markov} models",
+  journal =      "Computer, Speech and Language",
+  volume =       "2",
+  pages =        "219--234",
+  year =         "1987",
+}
+
+@InProceedings{Bahl88,
+  author =       "L. R. Bahl and P. Brown and P. V. de Souza and R. L.
+                 Mercer",
+  booktitle =    icassp,
+  title =        "Speech recognition with continuous-parameter hidden
+                 {Markov} models",
+  address =      "New York, NY",
+  pages =        "40--43",
+  year =         "1988",
+}
+
+@Article{Bailey-Simon-60,
+  author =       "Robert A. Bailey and Leroy Simon",
+  title =        "Two Studies in Automobile Insurance Ratemaking",
+  journal =      "ASTIN Bulletin",
+  volume =       "1",
+  number =       "4",
+  pages =        "192--217",
+  year =         "1960",
+}
+
+@InCollection{Baker75,
+  author =       "J. K. Baker",
+  editor =       "D. R. Reddy",
+  booktitle =    "Speech Recognition",
+  title =        "Stochastic modeling for automatic speech
+                 understanding",
+  publisher =    "Academic Press",
+  address =      "New York",
+  pages =        "521--542",
+  year =         "1975",
+}
+
+@Book{Baker77,
+  author =       "C. T. H. Baker",
+  title =        "The numerical treatment of integral equations",
+  publisher =    "Clarendon Press",
+  address =      "Oxford",
+  year =         "1977",
+}
+
+@InProceedings{Baker98,
+  author =       "D. Baker and A. {McCallum}",
+  booktitle =    "SIGIR'98",
+  title =        "Distributional Clustering of Words for Text
+                 Classification",
+  year =         "1998",
+}
+
+@InProceedings{baker98berkeley,
+  author =       "Collin F. Baker and Charles J. Fillmore and John B.
+                 Lowe",
+  editor =       "Christian Boitet and Pete Whitelock",
+  booktitle =    "Proceedings of the Thirty-Sixth Annual Meeting of the
+                 {Association} for {Computational} {Linguistics} and
+                 Seventeenth International Conference on Computational
+                 Linguistics",
+  title =        "The {Berkeley} {FrameNet} Project",
+  publisher =    "Morgan Kaufmann Publishers",
+  address =      "San Francisco, California",
+  pages =        "86--90",
+  year =         "1998",
+}
+
+@InProceedings{Bakis76,
+  author =       "R. Bakis",
+  booktitle =    "19st Meeting of the Acoustic Society of America",
+  title =        "Continuous Speech Recognition via Centisecond Acoustic
+                 States",
+  month =        apr,
+  year =         "1976",
+}
+
+@Article{bakker03,
+  author =       "Bart Bakker and Tom Heskes",
+  title =        "Task clustering and gating for {B}ayesian multitask
+                 learning",
+  journal =      jmlr,
+  volume =       "4",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA, USA",
+  pages =        "83--99",
+  year =         "2003",
+  ISSN =         "1533-7928",
+}
+
+@Book{Baldi-Brunak-98,
+  author =       "Pierre Baldi and Soren Brunak",
+  title =        "Bioinformatics, the Machine Learning Approach",
+  publisher =    "MIT Press",
+  year =         "1998",
+}
+
+@Article{Baldi89,
+  author =       "Pierre Baldi and Kurt Hornik",
+  title =        "Neural Networks and Principal Component Analysis:
+                 Learning from Examples Without Local Minima",
+  journal =      nn,
+  volume =       "2",
+  pages =        "53--58",
+  year =         "1989",
+}
+
+@Article{Baldi94,
+  author =       "P. Baldi and Y. Chauvin and T. Hunkapiller and M.
+                 {McClure}",
+  title =        "Hidden Markov models of biological primary sequence
+                 information",
+  journal =      "Proc. Nat. Acad. Sci. (USA)",
+  volume =       "91",
+  number =       "3",
+  pages =        "1059--1063",
+  year =         "1995",
+}
+
+@Article{Ballard81,
+  author =       "D. H. Ballard",
+  title =        "Generalizing the Hough Transform to Detect Arbitrary
+                 Shapes",
+  journal =      "Pattern Recognition",
+  volume =       "13",
+  number =       "2",
+  pages =        "111--122",
+  year =         "1981",
+}
+
+@InProceedings{Baluja97,
+  author =       "S. Baluja",
+  editor =       NIPS9ed,
+  booktitle =    NIPS9,
+  title =        "Genetic Algorithms and Explicit Search Statistics",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  pages =        "",
+  year =         "1997",
+}
+
+@Article{Bar-Shalom78,
+  author =       "Y. Bar-Shalom",
+  title =        "Tracking methods in a multi-target environment",
+  journal =      "IEEE Trans. on Aut. Control",
+  volume =       "23",
+  pages =        "618--626",
+  year =         "1978",
+}
+
+@Book{Bar-Shalom93,
+  author =       "Y. Bar-Shalom and {X.-R.} Li",
+  title =        "Estimation and Tracking",
+  publisher =    "Artech House",
+  address =      "Boston, MA",
+  year =         "1993",
+}
+
+@InProceedings{Barber+Williams-nips9,
+  author =       "D. Barber and C. K. I. Williams",
+  editor =       NIPS9ed,
+  booktitle =    NIPS9,
+  title =        "Gaussian Processes for {Bayesian} Classification via
+                 Hybrid Monte Carlo",
+  publisher =    "MIT Press, Cambridge, MA",
+  pages =        "340--346",
+  year =         "1997",
+}
+
+@InProceedings{Bareiss87,
+  author =       "E. R. Bareiss and B. Porter",
+  booktitle =    "Proceedings of the 4th International Workshop on
+                 Machine Learning",
+  title =        "Protos: An Exemplar-Based Learning Apprentice",
+  publisher =    "Morgan Kaufmann",
+  address =      "Irvine, CA",
+  pages =        "12--23",
+  year =         "1987",
+}
+
+@Article{Barhen89,
+  author =       "J. Barhen and S. Gulati and M. Zak",
+  title =        "Neural Learning of Constrained Nonlinear
+                 Transformations",
+  journal =      computer,
+  pages =        "67--76",
+  month =        jun,
+  year =         "1989",
+}
+
+@article{Nykamp+Ringach-2002,
+ author = {D.Q. Nykamp and D.L. Ringach},
+ title = {Full identification of a linear-nonlinear system via cross-correlation analysis},
+ journal = {Journal of Vision}, 
+ volume = 2,
+ number = 1, 
+ pages = {1--11},
+ year = 2002,
+}
+
+@article{Wilson+Cowan-72,
+ author = {Hugh R. Wilson and Jack D. Cowan},
+ title = {Excitatory and inhibitory interactions in localized populations of model neurons},
+ journal = {Biophysiology Journal},
+ volume = 12,
+ pages = {1--24},
+ year = 1972,
+}
+
+@Article{Barlow89,
+  author =       "H. B. Barlow",
+  title =        "Unsupervised Learning",
+  journal =      nc,
+  volume =       "1",
+  pages =        "295--311",
+  year =         "1989",
+}
+
+@article{Barlow-2001,
+    address = {Cambridge, UK.},
+    author = {H. Barlow},
+    issn = {0954-898X},
+    journal = {Network: Computation in Neural Systems},
+    month = {August},
+    number = {3},
+    pages = {241--253},
+    title = {Redundancy reduction revisited},
+    url = {http://view.ncbi.nlm.nih.gov/pubmed/11563528},
+    volume = {12},
+    year = {2001},
+}
+
+@InProceedings{Barron+Barron88,
+  author =       "A. R. Barron and R. L. Barron",
+  editor =       "E. Wegman",
+  booktitle =    "Computing Science and Statistics, Proc. 20th Symp.
+                 Interface",
+  title =        "Statistical learning networks: {A} unifying view",
+  publisher =    "Amer. Statist. Assoc.",
+  address =      "Washington, DC",
+  pages =        "192--203",
+  year =         "1988",
+}
+
+@InProceedings{Barron89,
+  author =       "A. R. Barron",
+  booktitle =    "Proc. of the 28th conf. on Decision and Control",
+  title =        "Statistical properties of artificial neural networks",
+  address =      "Tampa, Florida",
+  pages =        "280--285",
+  year =         "1989",
+}
+
+@incollection{Barron91,
+  author =       "Andrew E.~Barron",
+  title =        "Complexity Regularization with Application to Artificial Neural Networks",
+  booktitle =      "Nonparametric Functional Estimation and Related Topics",
+  pages =        "561--576",
+  editor = "G.~Roussas",
+  year =         "1991",
+  publisher = "Kluwer Academic Publishers"
+}
+
+
+@Article{Bartal95,
+  author =       "Jie Lin and Yair Bartal and Robert E. Uhrig",
+  title =        "Nuclear Power Plant Transient Diagnostics Using
+                 Artificial Neural Networks that Allow {"}don't know{"}
+                 Classifications",
+  journal =      "Nuclear Technology",
+  volume =       "110",
+  pages =        "436--449",
+  month =        jun,
+  year =         "1995",
+}
+
+@Article{Bartlett+Uhrig92,
+  author =       "E. B. Bartlett and R. E. Uhrig",
+  title =        "Nuclear Power Plant Status Diagnostics Using an
+                 Artificial Neural Network",
+  journal =      "Nuclear Technology",
+  volume =       "97",
+  month =        mar,
+  year =         "1992",
+}
+
+@Article{Bartlett46,
+  author =       "M. S. Bartlett",
+  title =        "On the theoritical specification of sampling
+                 properties of autocorrelated time series",
+  journal =      "J. Royal Stat. Soc. B",
+  volume =       "8",
+  pages =        "27--41",
+  year =         "1946",
+}
+
+@Article{Bartlett92,
+  author =       "P. L. Bartlett and T. Downs",
+  title =        "Using Random Weights to train Multilayer Networks of
+                 Hard-Limiting Units",
+  journal =      ieeetrnn,
+  volume =       "3",
+  number =       "2",
+  pages =        "202--210",
+  year =         "1992",
+}
+
+@TechReport{Barto-tr91,
+  author =       "A. G. Barto and S. Bradtke and S. P. Singh",
+  title =        "Real-Time Learning and {Control} Using Asynchronous
+                 Dynamic Programming",
+  number =       "91-57",
+  institution =  "Univ. of Massachusetts (Computer Science)",
+  address =      "Amherst MA",
+  year =         "1991",
+}
+
+@Article{Barto81,
+  author =       "A. G. Barto and R. S. Sutton and P. S. Brouwer",
+  title =        "Associative Search Network: Reinforcement Learning
+                 Associative Memory",
+  journal =      "Biological Cybernetics",
+  volume =       "40",
+  year =         "1981",
+}
+
+@Article{Barto83,
+  author =       "A. G. Barto and R. S. Sutton and C. W. Anderson",
+  title =        "Neuronlike Adaptive Elements That Can Solve Difficult
+                 Learning Control Problems",
+  journal =      ieeesmc,
+  volume =       "13",
+  year =         "1983",
+}
+
+@Article{Barto85,
+  author =       "A. G. Barto and P. Anandan",
+  title =        "Pattern Recognizing Stochastic Learning Automata",
+  journal =      ieeesmc,
+  volume =       "15",
+  pages =        "360--375",
+  year =         "1985",
+}
+
+@InProceedings{Barto87,
+  author =       "A. G. Barto and M. I. Jordan",
+  editor =       "M. Caudill and C. Butler",
+  booktitle =    icnn,
+  title =        "Gradient Following Without Back-Propagation in Layered
+                 Networks",
+  volume =       "2",
+  publisher =    "IEEE, New York",
+  address =      "San Diego 1987",
+  pages =        "629--636",
+  year =         "1987",
+}
+
+@InCollection{Barto91,
+  author =       "A. G. Barto and R. S. Sutton and C. J. C. H. Watkins",
+  editor =       "M. Gabriel and J. W. Moore",
+  booktitle =    "Learning and Computational Neuroscience",
+  title =        "Learning and Sequential Decision Making",
+  publisher =    "MIT Press",
+  address =      "Cambridge",
+  year =         "1991",
+}
+
+@InCollection{Barto92,
+  author =       "A. G. Barto",
+  editor =       "W. T Miller and R. S. Sutton and P. J. Werbos",
+  booktitle =    "Neural Networks for Control",
+  title =        "Connectionist learning for control: an overview",
+  publisher =    "MIT Press",
+  year =         "1992",
+}
+
+@TechReport{Barto_tr91,
+  author =       "A. G. Barto and S. Bradtke and S. P. Singh",
+  title =        "Real-Time Learning and {CO}ntrol Using Asynchronous
+                 Dynamic Programming",
+  number =       "91-57",
+  institution =  "Univ. of Massachusetts (Computer Science)",
+  address =      "Amherst MA",
+  year =         "1991",
+}
+
+@Article{bassiouni95,
+  author =       "M. A. Bassiouni and A. Mukherjee",
+  title =        "Efficient Decoding of Compressed Data",
+  journal =      "Journal of the American Society for Information
+                 Science",
+  volume =       "46",
+  number =       "1",
+  pages =        "1--8",
+  year =         "1995",
+}
+
+@Article{Basu94,
+  author =       "A. Basu and E. B. Bartlett",
+  title =        "Detecting Faults in a Nuclear Power Plant by Using
+                 Dynamic Node Architecture Artificial Neural Networks",
+  journal =      "Nuclear Science and Engineering",
+  volume =       "116",
+  month =        apr,
+  year =         "1994",
+}
+
+@Article{battiti-89,
+  author =       "R. Battiti",
+  title =        "Accelerated Backpropagation Learning: Two Optimization
+                 Methods",
+  journal =      "Complex Systems",
+  volume =       "3",
+  pages =        "331--342",
+  year =         "1989",
+}
+
+@InProceedings{battiti-masulli-90,
+  author =       "E. Battiti and F. Masulli",
+  booktitle =    "Proceedings of Internationla Neural Network Conference
+                 (INNC 90, Paris)",
+  title =        "{BFGS} optimization for faster and automated
+                 supervised learning",
+  pages =        "757--760",
+  year =         "1990",
+}
+
+@Article{Battiti92,
+  author =       "T. Battiti",
+  title =        "First- and Second-Order Methods for Learning: Between
+                 Steepest Descent and {Newton}'s Method",
+  journal =      "Neural Computation",
+  volume =       "4",
+  type =         "Review",
+  number =       "2",
+  pages =        "141--166",
+  year =         "1992",
+}
+
+@Article{battiti:1994:ieeetnn,
+  author =       "R. Battiti",
+  title =        "Using Mutual Information for Selecting Features in
+                 Supervised Neural Net Learning",
+  journal =      "{IEEE} Transaction on Neural Networks",
+  volume =       "5",
+  number =       "4",
+  pages =        "537--550",
+  year =         "1994",
+}
+
+@article{Baudat+Anouar-2000,
+    author = {G. Baudat and F. Anouar},
+    title = {Generalized Discriminant Analysis Using a Kernel Approach},
+    journal = {Neural Computation},
+    volume = {12},
+    number = {10},
+    year = {2000},
+    issn = {0899-7667},
+    pages = {2385--2404},
+    doi = {http://dx.doi.org/10.1162/089976600300014980},
+    publisher = {MIT Press},
+    address = {Cambridge, MA, USA},
+}
+
+@Article{Baum66,
+  author =       "L. E. Baum and T. Petrie",
+  title =        "Statistical Inference for Probabilistic Functions of
+                 Finite State {Markov} Chains",
+  journal =      "Ann. Math. Stat.",
+  volume =       "37",
+  pages =        "1559--1563",
+  year =         "1966",
+}
+
+@Article{Baum67,
+  author =       "L. E. Baum and J. Eagon",
+  title =        "An inequality with applications to statistical
+                 prediction for functions of {Markov} processes and to a
+                 model of ecology",
+  journal =      "Bull. Amer. Math. Soc.",
+  volume =       "73",
+  pages =        "360--363",
+  year =         "1967",
+}
+
+@Article{Baum70,
+  author =       "L. E. Baum and T. Petrie and G. Soules and N. Weiss",
+  title =        "A maximization technique occuring in the statistical
+                 analysis of probabilistic functions of {Markov}
+                 chains",
+  journal =      "Ann. Math. Statistic.",
+  volume =       "41",
+  pages =        "164--171",
+  year =         "1970",
+}
+
+@Article{Baum72,
+  author =       "L. E. Baum",
+  title =        "An inequality and associated maximization technique in
+                 statistical estimation for probabilistic functions of a
+                 {Markov} process",
+  journal =      "Inequalities",
+  volume =       "3",
+  pages =        "1--8",
+  year =         "1972",
+}
+
+@InProceedings{Baum86,
+  author =       "E. B. Baum",
+  editor =       "J. S. Denker",
+  booktitle =    snowbird,
+  title =        "Towards Practical ``Neural'' Computation for
+                 Combinatorial Optimization Problems",
+  publisher =    "American Institute of Physics, New York",
+  address =      "Snowbird 1986",
+  pages =        "53--58",
+  year =         "1986",
+}
+
+@InProceedings{Baum88,
+  author =       "E. B. Baum and F. Wilczek",
+  editor =       nips87ed,
+  booktitle =    nips87,
+  title =        "Supervised Learning of Probability Distributions by
+                 Neural Networks",
+  publisher =    "American Institute of Physics, New York",
+  address =      "Denver, CO",
+  pages =        "52--61",
+  year =         "1988",
+}
+
+@Article{Baum89,
+  author =       "E. B. Baum and D. Haussler",
+  title =        "What Size Net Gives Valid Generalization?",
+  journal =      nc,
+  volume =       "1",
+  pages =        "151--160",
+  year =         "1989",
+}
+
+@Article{BaumNote,
+  author =       "E. B. Baum",
+  title =        "Review of {J}. {S}. {Judd}'s book {\em {Neural}
+                 {Network} {Design} and the {Complexity} of
+                 {Learning}}",
+  journal =      ieeetrnn,
+  volume =       "2",
+  number =       "1",
+  pages =        "181--182",
+  year =         "1991",
+}
+
+@Article{baxter00,
+  author =       "Jonathan Baxter",
+  title =        "A Model of Inductive Bias Learning.",
+  journal =      "J. Artif. Intell. Res. (JAIR)",
+  volume =       "12",
+  pages =        "149--198",
+  year =         "2000",
+}
+
+@InProceedings{baxter95a,
+  author =       "Jonathan Baxter",
+  booktitle =    colt95,
+  title =        "Learning Internal Representations",
+  publisher =    "ACM Press",
+  address =      "Santa Cruz, California",
+  pages =        "311--320",
+  year =         "1995",
+  url =          "http://citeseer.ist.psu.edu/baxter95learning.html",
+}
+
+@Unpublished{baxter95b,
+  author =       "Jonathan Baxter",
+  title =        "The Canonical Metric for Vector Quantization",
+  year =         "1995",
+  note =         "submitted to Information and Computation",
+}
+
+@InProceedings{baxter96,
+  author =       "Jonathan Baxter",
+  editor =       NIPS8ed,
+  booktitle =    NIPS8,
+  title =        "Learning Model Bias",
+  volume =       "8",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  pages =        "169--175",
+  year =         "1996",
+}
+
+@Article{baxter97,
+  author =       "Jonathan Baxter",
+  title =        "A {Bayesian}/information theoretic model of learning via
+                 multiple task sampling",
+  journal =      "Machine Learning",
+  volume =       "28",
+  pages =        "7--40",
+  year =         "1997",
+}
+
+@Article{baxter97a,
+  author =       "Jonathan Baxter",
+  title =        "A {Bayesian}/Information theoretic model of learning to
+                 learn via multiple task sampling",
+  journal =      "Machine Learning",
+  volume =       "28",
+  pages =        "7--40",
+  year =         "1997",
+}
+
+@InProceedings{Becker89,
+  author =       "S. Becker and Y. {LeCun}",
+  editor =       "D. Touretzky and G. Hinton and T. Sejnowski",
+  booktitle =    cmss88,
+  title =        "Improving the Convergence of Back-Propagation Learning
+                 with Second Order Methods",
+  publisher =    "Morgan Kaufmann, San Mateo",
+  address =      "Pittsburg 1988",
+  pages =        "29--37",
+  year =         "1989",
+}
+
+@InProceedings{Belkin+al-2004,
+  author =       "Mikhail Belkin and Irina Matveeva and Partha Niyogi",
+  editor =       "John Shawe-Taylor and Yoram Singer",
+  booktitle =    colt04,
+  title =        "Regularization and Semi-supervised Learning on Large
+                 Graphs",
+  publisher =    "Springer",
+  pages =        "624-638",
+  year =         "2004",
+}
+
+@InProceedings{Belkin+Niyogi-2002,
+  author =       "Mikhail Belkin and Partha Niyogi",
+  editor =       NIPS14ed,
+  booktitle =    NIPS14,
+  title =        "Laplacian Eigenmaps and Spectral Techniques for
+                 Embedding and Clustering",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  year =         "2002",
+  original =     "orig/AA42.ps",
+}
+
+@TechReport{Belkin+Niyogi-2002-01,
+  author =       "Mikhail Belkin and Partha Niyogi",
+  title =        "Laplacian Eigenmaps for Dimensionality Reduction and
+                 Data Representation",
+  number =       "TR-2002-01",
+  institution =  "University of Chicago, Computer Science",
+  year =         "2002",
+}
+
+@TechReport{Belkin+Niyogi-2002-ss,
+  author =       "Mkhail Belkin and Partha Niyogi",
+  title =        "Semi-supervised learning on manifolds",
+  number =       "TR-2002-12",
+  institution =  "University of Chicago, Computer Science",
+  year =         "2002",
+}
+
+@Article{Belkin+Niyogi-2003,
+  author =       "Mikhail Belkin and Partha Niyogi",
+  title =        "Laplacian Eigenmaps for Dimensionality Reduction and
+                 Data Representation",
+  journal =      "Neural Computation",
+  volume =       "15",
+  number =       "6",
+  pages =        "1373--1396",
+  year =         "2003",
+}
+
+@InProceedings{Belkin+Niyogi-nips2003,
+  author =       "Mikhail Belkin and Partha Niyogi",
+  editor =       NIPS15ed,
+  booktitle =    NIPS15,
+  title =        "Using Manifold Structure for Partially Labeled
+                 Classification",
+  publisher =    "{MIT} Press",
+  address =      "Cambridge, MA",
+  year =         "2003",
+}
+
+@article{BelkinM2006,
+	address = {Cambridge, MA, USA},
+	author = {Belkin, Mikhail   and Niyogi, Partha   and Sindhwani, Vikas  },
+	issn = {1533-7928},
+	journal = jmlr,
+	pages = {2399--2434},
+	publisher = {MIT Press},
+	title = {Manifold Regularization: A Geometric Framework for Learning from Labeled and Unlabeled Examples},
+	volume = {7},
+	year = {2006}
+}
+
+@Article{Bell-Sejnowski95,
+  author =       "Anthony J. Bell and Terrence J. Sejnowski",
+  title =        "An information maximisation approach to blind
+                 separation and blind deconvolution",
+  journal =      "Neural Computation",
+  volume =       "7",
+  number =       "6",
+  pages =        "1129--1159",
+  year =         "1995",
+}
+
+@InProceedings{Bellagarda+Nahamoo89,
+  author =       "J. R. Bellegarda and D. Nahamoo",
+  booktitle =    icassp,
+  title =        "Tied Mixture Continuous Parameter Models for Large
+                 Vocabulary Isolated Speech Recognition",
+  address =      "Glasgow, Scotland",
+  pages =        "13--16",
+  year =         "1989",
+}
+
+@InProceedings{Bellegarda97,
+  author =       "J. R. Bellegarda",
+  booktitle =    "Proceedings of Eurospeech 97",
+  title =        "A latent semantic analysis framework for large--span
+                 language modeling",
+  address =      "Rhodes, Greece",
+  pages =        "1451--1454",
+  year =         "1997",
+}
+
+@Book{Bellman57,
+  author =       "R. E. Bellman",
+  title =        "Dynamic Programming",
+  publisher =    "Princeton University Press",
+  address =      "NJ",
+  year =         "1957",
+}
+
+@Book{Bellman61,
+  author =       "R. Bellman",
+  title =        "Adaptive Control Processes: {A} Guided Tour",
+  publisher =    "Princeton University Press",
+  address =      "New Jersey",
+  year =         "1961",
+}
+
+@Book{Bellman74,
+  author =       "R. Bellman",
+  title =        "Introduction to Matrix Analysis",
+  publisher =    "McGraw-Hill",
+  address =      "New York, NY",
+  edition =      "2nd",
+  year =         "1974",
+}
+
+@InProceedings{ben-david03,
+  author =       "Shai Ben-David and Reba Schuller",
+  booktitle =    colt03,
+  title =        "Exploiting Task Relatedness for Mulitple Task
+                 Learning.",
+  crossref =     "colt03",
+  pages =        "567--580",
+  year =         "2003",
+}
+
+@InProceedings{BenDucVin01,
+  author =       "Yoshua Bengio and R\'ejean Ducharme and Pascal
+                 Vincent",
+  editor =       NIPS13ed,
+  booktitle =    NIPS13,
+  title =        "A Neural Probabilistic Language Model",
+  publisher =    "MIT Press",
+  pages =        "932--938",
+  year =         "2001",
+}
+
+@InProceedings{BenDucVin01-small,
+  author =       "Yoshua Bengio and R\'ejean Ducharme and Pascal
+                 Vincent",
+  editor =       "Todd K. Leen and Thomas G. Dietterich and Volker
+                 Tresp",
+  booktitle =    "Advances in NIPS 13",
+  title =        "A Neural Probabilistic Language Model",
+  publisher =    "MIT Press",
+  pages =        "932--938",
+  year =         "2001",
+}
+
+@InProceedings{BenDucVin01-short,
+  author =       "Y. Bengio and R. Ducharme and P. Vincent",
+  booktitle =    "Adv. Neural Inf. Proc. Sys. 13",
+  title =        "A Neural Probabilistic Language Model",
+  pages =        "932--938",
+  year =         "2001",
+}
+
+@TechReport{Bengio+al-2004,
+  author =       "Yoshua Bengio and Olivier Delalleau and Nicolas {Le Roux}",
+  title =        "Efficient Non-Parametric Function Induction in
+                 Semi-Supervised Learning",
+  number =       "1247",
+  institution =  "D\'epartement d'informatique et recherche
+                 op\'erationnelle, Universit\'e de Montr\'eal",
+  year =         "2004",
+}
+
+@InCollection{Bengio+al-2005,
+  author =       "Yoshua Bengio and Nicolas {Le Roux} and Pascal Vincent and
+                 Olivier Delalleau and Patrice Marcotte",
+  editor =       NIPS18ed,
+  booktitle =    NIPS18,
+  title =        "Convex Neural Networks",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  pages =        "123--130",
+  year =         "2006",
+}
+
+@InCollection{Bengio+al-2005-small,
+  author =       "Yoshua Bengio and Nicolas {Le Roux} and Pascal Vincent
+                 and Olivier Delalleau and Patrice Marcotte",
+  booktitle =    "NIPS 18",
+  title =        "Convex Neural Networks",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  pages =        "123--130",
+  year =         "2006",
+}
+
+@InCollection{Bengio+al-spectral-2006-short,
+  author =       "Yoshua Bengio and Olivier Delalleau and Nicolas {Le
+                 Roux} and Jean-Francois Paiement and Pascal Vincent
+                 and Marie Ouimet",
+  editor =       "Isabelle Guyon and Steve Gunn and Masoud Nikravesh and
+                 Lofti Zadeh",
+  booktitle =    "Feature Extraction, Foundations and Applications",
+  title =        "Spectral Dimensionality Reduction",
+  publisher =    "Springer",
+  year =         "2006",
+}
+
+@InProceedings{Bengio+Bengio-NIPS99,
+  author =       "Yoshua Bengio and Samy Bengio",
+  editor =       NIPS12ed,
+  booktitle =    NIPS12,
+  title =        "Modeling High-Dimensional Discrete Data with
+                 Multi-Layer Neural Networks",
+  publisher =    "MIT Press",
+  pages =        "400--406",
+   year =         "2000",
+}
+
+@Article{Bengio+Bengio-trnn2000,
+  author =       "S. Bengio and Y. Bengio",
+  title =        "Taking on the Curse of Dimensionality in Joint
+                 Distributions Using Neural Networks",
+  journal =      "IEEE Transactions on Neural Networks, special issue on
+                 Data Mining and Knowledge Discovery",
+  volume =       "11",
+  number =       "3",
+  pages =        "550--557",
+  year =         "2000",
+  url =          "http://www.iro.umontreal.ca/~lisa/pointeurs/jdm.pdf",
+}
+
+@Article{Bengio+Bengio-trnn2000-small,
+  author =       "S. Bengio and Y. Bengio",
+  title =        "Taking on the Curse of Dimensionality in Joint
+                 Distributions Using Neural Networks",
+  journal =      "IEEE Trans. Neural Networks",
+  volume =       "11",
+  number =       "3",
+  pages =        "550--557",
+  year =         "2000",
+  url =          "http://www.iro.umontreal.ca/~lisa/pointeurs/jdm.pdf",
+}
+
+@Article{Bengio+Chapados2003,
+  author =       "Yoshua Bengio and Nicolas Chapados",
+  title =        "Extensions to Metric-Based Model Selection",
+  journal =      jmlr,
+  volume =       "3",
+  pages =        "1209--1227",
+  month =        mar,
+  year =         "2003",
+  note =         "Special Issue on Feature Selection",
+}
+
+@TechReport{Bergstra-TR2008,
+  author =       "James Bergstra and Yoshua Bengio and Jerome Louradour",
+  title =        "Image Classification with Biologically Motivated Neuron Models",
+  number =       "---",
+  institution =  "Dept. IRO, Universit\'e de Montr\'eal",
+  year =         "2008",
+}
+
+@article{Bergstra-2009,
+  author =       "James Bergstra and Yoshua Bengio and Jerome Louradour",
+  title =        "Suitability of Complex Cell Models for Object Categorization",
+  journal = {Computational Neuroscience},
+  publisher = "submitted",
+  year = 2008,
+}
+
+@TechReport{Bengio+Frasconi94a,
+  author =       "Y. Bengio and P. Frasconi",
+  title =        "An {EM} Approach to Learning Sequential Behavior",
+  number =       "Tech. Report. DSI 11/94",
+  institution =  "Universit\`a di Firenze",
+  year =         "1994",
+}
+
+@article{Bengio-nc-2004,
+ author = {Yoshua Bengio and Olivier Delalleau and Nicolas Le Roux and Jean-François Paiement and Pascal Vincent and Marie Ouimet},
+ title = {Learning eigenfunctions links spectral embedding and kernel {PCA}},
+ journal = {Neural Computation},
+ volume = 16,
+ number = 10,
+ year = 2004,
+ pages = {2197--2219},
+}
+
+@article{Bengio-nc-2004-small,
+ author = {Yoshua Bengio and Olivier Delalleau and Nicolas Le Roux and Jean-François Paiement and Pascal Vincent and Marie Ouimet},
+ title = {{\small{Learning eigenfunctions links spectral embedding and kernel {PCA}}}},
+ journal = {Neural Comp.},
+ volume = {16(10)},
+ year = 2004,
+ pages = {2197--2219},
+}
+
+@Article{Bengio+Grandvalet-JMLR-2004,
+  author =       "Yoshua Bengio and Yves Grandvalet",
+  title =        "No Unbiased Estimator of the Variance of {K}-Fold
+                 Cross-Validation",
+  journal =      jmlr,
+  volume =       "5",
+  pages =        "1089--1105",
+  year =         "2004",
+}
+
+@TechReport{Bengio+Grandvalet-TR-2003,
+  author =       "Yoshua Bengio and Yves Grandvalet",
+  title =        "No Unbiased Estimator of the Variance of {K}-Fold
+                 Cross-Validation",
+  number =       "TR-2003-1234",
+  institution =  "Universite de Montreal, dept. IRO",
+  year =         "2003",
+}
+
+@InCollection{Bengio+Lecun-chapter2007,
+  author =       "Yoshua Bengio and Yann {LeCun}",
+  editor =       "L. Bottou and O. Chapelle and D. DeCoste and J.
+                 Weston",
+  booktitle =    "Large Scale Kernel Machines",
+  title =        "Scaling Learning Algorithms towards {AI}",
+  publisher =    "MIT Press",
+  year =         "2007",
+}
+
+@InCollection{Bengio+Lecun-chapter2007-small,
+  author =       "Y. Bengio and Y. {LeCun}",
+  booktitle =    "Large Scale Kernel Machines",
+  title =        "Scaling Learning Algorithms towards {AI}",
+  year =         "2007",
+}
+
+@InProceedings{Bengio+LeCun94b,
+  author =       "Yoshua Bengio and Yann {LeCun}",
+  booktitle =    ICPR94,
+  title =        "Word Normalization For On-Line Handwritten Word
+                 Recognition",
+  pages =        "409--413",
+  year =         "1994",
+}
+
+@Article{Bengio+Monperrus+Larochelle-2006,
+  author =       "Yoshua Bengio and Martin Monperrus and Hugo
+                 Larochelle",
+  title =        "Nonlocal Estimation of Manifold Structure",
+  journal =      "Neural Computation",
+  volume =       "18",
+  number =       "10",
+  pages =        "2509--2528",
+  year =         "2006",
+}
+
+@InProceedings{Bengio+Monperrus-2005,
+  author =       "Yoshua Bengio and Martin Monperrus",
+  editor =       NIPS17ed,
+  booktitle =    NIPS17,
+  title =        "Non-Local Manifold Tangent Learning",
+  publisher =    "{MIT} Press",
+  year =         "2005",
+  pages =        "129--136",
+  url =          "http://www.iro.umontreal.ca/~lisa/pointeurs/tangent\_learner\_nips2004.pdf",
+}
+
+@InProceedings{Bengio+Senecal-2003-small,
+  author =       "Yoshua Bengio and Jean-S\'ebastien Sen\'ecal",
+  booktitle =    "Proceedings of AISTATS 2003",
+  title =        "Quick Training of Probabilistic Neural Nets by
+                 Importance Sampling",
+  year =         "2003",
+}
+
+@TechReport{Bengio+Vincent+Paiement-TR2003,
+  author =       "Yoshua Bengio and Pascal Vincent and Jean-Fran{\cc}ois
+                 Paiement",
+  title =        "Learning Eigenfunctions of Similarity: Linking
+                 Spectral Clustering and Kernel {PCA}",
+  number =       "1232",
+  institution =  "D\'epartement d'informatique et recherche
+                 op\'erationnelle, Universit\'e de Montr\'eal",
+  year =         "2003",
+  URL =          "www.iro.umontreal.ca/~lisa/pointeurs/TR1232.pdf",
+}
+
+@TechReport{Bengio-decision-trees-TR-2007,
+  author =       "Yoshua Bengio and Olivier Delalleau and Clarence
+                 Simard",
+  title =        "Trees do not Generalize to New Variations",
+  number =       "",
+  institution =  "D\'epartement d'informatique et recherche
+                 op\'erationnelle, Universit\'e de Montr\'eal",
+  year =         "2007",
+}
+
+@TechReport{Bengio-decision-trees07,
+  author =       "Yoshua Bengio and Olivier Delalleau and Clarence
+                 Simard",
+  title =        "Decision Trees do not Generalize to New Variations",
+  number =       "1304",
+  institution =  "Universite de Montreal, Dept. IRO",
+  year =         "2007",
+  url =          "http://www.iro.umontreal.ca/~lisa/pointeurs/bengio+al-tr1304.pdf",
+}
+
+%I deprecate the following one as this is a duplicate of the preceding tech report!
+%Their was only one .tex file that was using it. I modified it.
+@TechReport{Bengio-Trees-TR2007,
+  author =       "Yoshua Bengio and Olivier Delalleau and Clarence
+                 Simard",
+  title =        "Decision Trees do not Generalize to New Variations",
+  number =       "1304",
+  institution =  "Dept. IRO, Universit\'e de Montr\'eal",
+  year =         "2007",
+  url =          "http://www.iro.umontreal.ca/~lisa/pointeurs/bengio+al-tr1304.pdf",
+}
+
+@Article{Bengio-hmms99,
+  author =       "Yoshua Bengio",
+  title =        "Markovian Models for Sequential Data",
+  journal =      "Neural Computing Surveys",
+  volume =       "2",
+  pages =        "129--162",
+  year =         "1999",
+}
+
+@Article{bengio-hyper-NC00,
+  author =       "Yoshua Bengio",
+  title =        "Gradient-Based Optimization of Hyperparameters",
+  journal =      "Neural Computation",
+  volume =       "12",
+  number =       "8",
+  pages =        "1889--1900",
+  year =         "2000",
+}
+
+@TechReport{bengio-hyper-TR98,
+  author =       "Yoshua Bengio",
+  title =        "Continuous Optimization of Hyper-Parameters for
+                 Non-{IID} Data",
+  institution =  "D\'epartement d'informatique et recherche
+                 op\'erationnelle, Universit\'e de Montr\'eal",
+  year =         "1998",
+  note =         "unpublished manuscript",
+}
+
+@Article{Bengio-Hyper-Weight-Decay-nips,
+  author =       "Simon Latendresse and Yoshua Bengio",
+  title =        "Linear Regression and the Optimization of
+                 Hyper-Parameters",
+  journal =      "submitted to NIPS'99",
+  year =         "1999",
+}
+
+@TechReport{Bengio-Hyper-Weight-Decay-TR,
+  author =       "Yoshua Bengio and Simon Latendresse",
+  title =        "Soft Variable Selection with Numerical Optimization of
+                 Weight Decays",
+  institution =  "D\'epartement d'informatique et recherche
+                 op\'erationnelle, Universit\'e de Montr\'eal",
+  year =         "1999",
+  note =         "in preparation",
+}
+
+@Article{Bengio-ijns97,
+  author =       "Yoshua Bengio",
+  title =        "Using a Financial Training Criterion Rather than a
+                 Prediction Criterion",
+  journal =      "International Journal of Neural Systems",
+  year =         "1997",
+  volume =       {8},
+  number =       {4},
+  note =         "Special issue on noisy time-series",
+  pages =        {433--443},
+  URL =          "www.iro.umontreal.ca/~lisa/pointeurs/profitcost.ps",
+}
+
+@Article{Bengio-IEEETRNN-2001,
+  author =       "Yoshua Bengio and Vincent-Philippe Lauzon and R\'ejean
+                 Ducharme",
+  title =        "Experiments on the Application of {IOHMM}s to Model
+                 Financial Returns Series",
+  journal =      ieeetrnn,
+  volume = 12,
+  number = 1,
+  pages = {113--123},
+  year =         "2001",
+}
+
+@InProceedings{Bengio-Larochelle-NLMP-NIPS-2006,
+  author =       "Yoshua Bengio and Hugo Larochelle and Pascal Vincent",
+  editor =       NIPS18ed,
+  booktitle =    NIPS18,
+  title =        "Non-Local Manifold Parzen Windows",
+  publisher =    "MIT Press",
+  pages =        "115--122",
+  year =         "2006",
+}
+
+@TechReport{Bengio-Larochelle-NLMP-TR-2005,
+  author =       "Yoshua Bengio and Hugo Larochelle",
+  title =        "Non-Local Manifold Parzen Windows",
+  number =       "1264",
+  institution =  "D\'epartement d'informatique et recherche
+                 op\'erationnelle, Universit\'e de Montr\'eal",
+  year =         "2005",
+}
+
+%have been rejected and later accepted to NIPS in Bengio-localfailure-NIPS-2006
+@InProceedings{Bengio-localfailure-icml-2005,
+  author =       "Yoshua Bengio and Olivier Delalleau and Nicolas {Le
+                 Roux}",
+  booktitle =    "submitted to ICML 2005",
+  title =        "The Curse of Dimensionality for Local Kernel
+                 Machines",
+  year =         "2005",
+}
+
+@InCollection{Bengio-localfailure-NIPS-2006,
+  author =       "Yoshua Bengio and Olivier Delalleau and Nicolas {Le Roux}",
+  editor =       NIPS18ed,
+  booktitle =    NIPS18,
+  title =        "The Curse of Highly Variable Functions for Local
+                 Kernel Machines",
+  publisher =    "{MIT} Press",
+  address =      "Cambridge, MA",
+  pages =        "107--114",
+  year =         "2006",
+}
+
+@InCollection{Bengio-localfailure-NIPS-2006-small,
+  author =       "Yoshua Bengio and Olivier Delalleau and Nicolas {Le Roux}",
+  booktitle =    "NIPS 18",
+  title =        "The Curse of Highly Variable Functions for Local
+                 Kernel Machines",
+  publisher =    "{MIT} Press",
+  address =      "Cambridge, MA",
+  pages =        "107--114",
+  year =         "2006",
+}
+
+@InProceedings{Bengio-localfailure-snowbird-2005,
+  author =       "Yoshua Bengio and Olivier Delalleau and Nicolas {Le
+                 Roux}",
+  booktitle =    "The Learning Workshop",
+  title =        "The Curse of Dimensionality for Local Kernel
+                 Machines",
+  address =      "Snowbird, Utah",
+  year =         "2005",
+}
+
+@InProceedings{HonglakLee-2007,
+  author =       "Honglak Lee and Alexis Battle and Rajat Raina and Andrew Ng",
+  editor =       NIPS19ed,
+  booktitle =    NIPS19,
+  title =        "Efficient sparse coding algorithms",
+  publisher =    "MIT Press",
+  pages =        "801--808",
+  year =         "2007",
+}
+
+@InProceedings{Bengio-nips-2006-small,
+  author =       "Y. Bengio and P. Lamblin and D. Popovici and
+                 H. Larochelle",
+  booktitle =    "Advances in NIPS 19",
+  title =        "Greedy Layer-Wise Training of Deep Networks",
+  year =         "2007",
+}
+
+@InProceedings{Bengio-nips-2006-short,
+  author =       "Y. Bengio and P. Lamblin and D. Popovici and
+                 H. Larochelle",
+  booktitle =    "Adv. Neural Inf. Proc. Sys. 19",
+  title =        "Greedy Layer-Wise Training of Deep Networks",
+  pages =        "153--160",
+  year =         "2007",
+}
+
+@InProceedings{Bengio-nips2004,
+  author =       "Yoshua Bengio and Jean-Fran\c{cois} Paiement and Pascal
+                 Vincent and Olivier Delalleau and Nicolas {Le Roux} and
+                 Marie Ouimet",
+  editor =       NIPS16ed,
+  booktitle =    NIPS16,
+  title =        "Out-of-Sample Extensions for {LLE}, {Isomap}, {MDS},
+                 {Eigenmaps}, and {Spectral} {Clustering}",
+  publisher =    "MIT Press",
+  year =         "2004",
+}
+
+@InProceedings{Bengio-nips2003,
+  author =       "Yoshua Bengio and Jean-Fran\c{cois} Paiement and Pascal
+                 Vincent and Olivier Delalleau and Nicolas {Le Roux} and
+                 Marie Ouimet",
+  editor =       NIPS16ed,
+  booktitle =    NIPS16,
+  title =        "Out-of-Sample Extensions for {LLE}, {Isomap}, {MDS},
+                 {Eigenmaps}, and {Spectral} {Clustering}",
+  publisher =    "MIT Press",
+  year =         "2004",
+}
+
+@InCollection{Bengio-NIPS2007,
+  author =       "Yoshua Bengio and Pascal Lamblin and Dan Popovici and
+                 Hugo Larochelle",
+  editor =       NIPS19ed,
+  booktitle =    NIPS19,
+  title =        "Greedy Layer-Wise Training of Deep Networks",
+  publisher =    "MIT Press",
+  pages =        "153--160",
+  year =         "2007",
+}
+
+@InProceedings{Bengio-nnlm2001,
+  author =       "Yoshua Bengio and R{\'e}jean Ducharme and Pascal Vincent",
+  editor =       NIPS13ed,
+  booktitle =    NIPS13,
+  title =        "A Neural Probabilistic Language Model",
+  publisher =    "{MIT} Press",
+  pages =        "933--938",
+  year =         "2001",
+  url =          "http://www.iro.umontreal.ca/~lisa/pointeurs/nips00-lm.ps",
+}
+
+@Article{Bengio-nnlm2003,
+  author =       "Yoshua Bengio and R{\'e}jean Ducharme and Pascal Vincent
+                 and Christian Jauvin",
+  title =        "A Neural Probabilistic Language Model",
+  journal =      jmlr,
+  volume =       "3",
+  pages =        "1137--1155",
+  year =         "2003",
+}
+
+@Article{Bengio-nnlm2003-small,
+  author =       "Y. Bengio and R. Ducharme and P. Vincent
+                 and C. Jauvin",
+  title =        "A Neural Probabilistic Language Model",
+  journal =      "JMLR",
+  volume =       "3",
+  pages =        "1137--1155",
+  year =         "2003",
+}
+
+@Article{Bengio-NonStat-Hyper-ML,
+  author =       "Yoshua Bengio and Charles Dugas",
+  title =        "Learning Simple Non-Stationarities with
+                 Hyper-Parameters",
+  journal =      "submitted to Machine Learning",
+  year =         "1999",
+}
+
+@Article{Bengio-prel92,
+  author =       "Y. Bengio and M. Gori and R. \mbox{De Mori}",
+  title =        "Learning the Dynamic Nature of Speech with
+                 Back-propagation for Sequences",
+  journal =      prel,
+  volume =       "13",
+  number =       "5",
+  pages =        "375--385",
+  year =         "1992",
+  note =         "(Special issue on Artificial Neural Networks)",
+}
+
+@Article{Bengio-2008,
+  author =       "Yoshua Bengio",
+  title =        "Learning Deep Architectures for {AI}",
+  journal =  {Foundations and Trends in Machine Learning},
+  year =         "2009",
+  volume = {to appear},
+}
+
+@Article{Bengio-2009-short,
+  author =       "Y. Bengio",
+  title =        "Learning Deep Architectures for {AI}",
+  journal =  {Foundations \& Trends in Mach. Learn.},
+  year =         "2009",
+  volume = 2,
+  number = 1,
+  pages = {1--127},
+}
+
+@TechReport{Bengio-TR1312-small,
+  author =       "Yoshua Bengio",
+  title =        "Learning Deep Architectures for {AI}",
+  number =       "1312",
+  institution =  "U. Montr\'eal, dept. IRO",
+  year =         "2007",
+}
+
+@InProceedings{Bengio-transducers-98,
+  author =       "Y. Bengio and S. Bengio and J. F. Isabelle and Y.
+                 Singer",
+  editor =       NIPS10ed,
+  booktitle =    NIPS10,
+  title =        "Shared Context Probabilistic Transducers",
+  publisher =    "MIT Press",
+  pages =        "409--415",
+  year =         "1998",
+}
+
+@Article{Bengio-trnn92,
+  author =       "Y. Bengio and R. \mbox{De Mori} and G. Flammia and R.
+                 Kompe",
+  title =        "Global Optimization of a Neural Network-Hidden
+                 {Markov} Model Hybrid",
+  journal =      ieeetrnn,
+  volume =       "3",
+  number =       "2",
+  pages =        "252--259",
+  year =         "1992",
+}
+
+@Article{Bengio-trnn93,
+  author =       "Y. Bengio and P. Simard and P. Frasconi",
+  title =        "Learning Long-Term Dependencies with Gradient Descent
+                 is Difficult",
+  journal =      ieeetrnn,
+  volume =       "5",
+  number =       "2",
+  pages =        "157--166",
+  year =         "1994",
+  OPTnote =      "(Special Issue on Recurrent Neural Networks)",
+  url =          "http://www.iro.umontreal.ca/~lisa/pointeurs/ieeetrnn94.pdf",
+}
+
+@Article{Bengio-trnn96,
+  author =       "Y. Bengio and P. Frasconi",
+  title =        "Input/{Output} {HMM}s for Sequence Processing",
+  journal =      "IEEE Transactions on Neural Networks",
+  volume =       "7",
+  number =       "5",
+  pages =        "1231--1249",
+  year =         "1996",
+}
+
+@TechReport{Bengio2003,
+  author =       "Christopher Kermorvant and Yoshua Bengio",
+  title =        "Extracting Hidden Sense Probabilities from Bitexts",
+  number =       "1231",
+  institution =  "Université de Montréal",
+  year =         "2003",
+}
+
+@InProceedings{Bengio89b,
+  author =       "Y. Bengio and P. Cosi and R. Cardin and R. De Mori",
+  editor =       NIPS1ed,
+  booktitle =    NIPS1,
+  title =        "Use of multi-layered networks for coding speech with
+                 phonetic features",
+  publisher =    "Morgan Kaufmann, San Mateo",
+  address =      "Denver, CO",
+  pages =        "224--231",
+  year =         "1989",
+}
+
+@PhdThesis{Bengio91,
+  author =       "Yoshua Bengio",
+  title =        "Artificial Neural Networks and their Application to
+                 Sequence Recognition",
+  school =       "McGill University, (Computer Science)",
+  address =      "Montreal, Qc., Canada",
+  year =         "1991",
+}
+
+@InProceedings{bengio91x,
+  author =       "Y. Bengio and R. {De Mori} and G. Flammia and R.
+                 Kompe",
+  booktitle =    ijcnn,
+  title =        "Global Optimization of a Neural Network - Hidden
+                 Markov Model Hybrid",
+  volume =       "2",
+  pages =        "789--794",
+  year =         "1991",
+  OPTaddress =   "Seattle WA",
+}
+
+@article{Becker92,
+ author = {Sue Becker and Geoffrey Hinton},
+ title =        {A self-organizing neural network that discovers surfaces in random-dot stereograms},
+ journal = {Nature},
+ volume = 355,
+ pages = {161--163},
+ year = 1992,
+}
+ 
+@Article{Bengio93,
+  author =       "Yoshua Bengio",
+  title =        "A Connectionist Approach to Speech Recognition",
+  journal =      "International Journal on Pattern Recognition and
+                 Artificial Intelligence",
+  volume =       "7",
+  number =       "4",
+  pages =        "647--668",
+  note =         "special issue entitled Advances in Pattern Recognition Systems using Neural Networks",
+  year =         "1993",
+}
+
+@InProceedings{Bengio93e,
+  author =       "S. Bengio and Y. Bengio and J. Cloutier and J.
+                 Gecsei",
+  editor =       "S. Gielen and B. Kappen",
+  booktitle =    "Proceedings of the International Conference on
+                 Artificial Neural Networks 1993",
+  title =        "Generalization of a Parametric Learning Rule",
+  publisher =    "Springer-Verlag",
+  address =      "Amsterdam, The Netherlands",
+  pages =        "502--502",
+  year =         "1993",
+}
+
+@Article{bengio:1999:nc,
+  author =       "S. Bengio and Y. Bengio and J. Robert and G.
+                 B\'elanger",
+  title =        "Stochastic Learning of Strategic Equilibria for
+                 Auctions",
+  journal =      "Neural Computation",
+  volume =       "11",
+  number =       "5",
+  pages =        "1199--1209",
+  year =         "1999",
+}
+
+@Article{bottou+al:1999,
+  author =       "L. Bottou and P. Haffner and P.G. Howard and P. Simard and Y. Bengio",
+  title =        "High quality document image compression with {DjVu}",
+  journal =      "Journal of Electronic Imaging",
+  volume =       "7",
+  number =       "3",
+  pages =        "410--425",
+  year =         "1998",
+}
+
+@Article{bengio+al:1998,
+  author =       "Y. Bengio and F. Gingras and B. Goulard and J.-M. Lina",
+  title =        "Gaussian Mixture Densities for Classification of Nuclear Power Plant Data",
+  journal =      "Computers and Artificial Intelligence, special issue on Intelligent Technologies for Electric and Nuclear Power Plants",
+  volume =       "17",
+  number =       "2--3",
+  pages =        "189--209",
+  year =         "1998",
+}
+
+@Article{GingrasBengio:1998,
+  author =       "F. Gingras and Y. Bengio",
+  title =        "Handling Asynchronous or Missing Financial Data with Recurrent Networks",
+  journal =      "International Journal of Computational Intelligence and Organizations",
+  volume =       "1",
+  number =       "3",
+  pages =        "154--163",
+  year =         "1998",
+}
+
+@Article{BengioS95,
+  author =       "S. Bengio and Y. Bengio and J. Cloutier",
+  title =        "On the search for new learning rules for {ANN}s",
+  journal =      "Neural Processing Letters",
+  volume =       "2",
+  number =       "4",
+  pages =        "26--30",
+  year =         "1995",
+}
+
+@Article{BengioMori89,
+  author =       "Y. Bengio and R. De Mori",
+  title =        "Use of multilayer networks for the recognition of phonetic features and phonemes",
+  journal =      "Computational Intelligence",
+  volume =       "5",
+  pages =        "134--141",
+  year =         "1989",
+}
+
+@TechReport{BengioTR1178,
+  author =       "Yoshua Bengio and R\'ejean Ducharme and Pascal
+                 Vincent",
+  title =        "A Neural Probabilistic Language Model",
+  number =       "1178",
+  institution =  "Dept. IRO, Universit\'e de Montr\'eal",
+  year =         "2002",
+}
+
+@TechReport{BengioTR1215,
+  author =       "Yoshua Bengio",
+  title =        "New Distributed Probabilistic Language Models",
+  number =       "1215",
+  institution =  "Dept. IRO, Universit\'e de Montr\'eal",
+  year =         "2002",
+}
+
+@Book{Bengio_book96,
+  author =       "Yoshua Bengio",
+  title =        "Neural Networks for Speech and Sequence Processing",
+  publisher =    "International Thomson Computer Press",
+  year =         "1996",
+}
+
+@InProceedings{Bengio_icnn93,
+  author =       "Y. Bengio and P. Frasconi and P. Simard",
+  booktitle =    icnn,
+  title =        "The problem of learning long-term dependencies in
+                 recurrent networks",
+  publisher =    "IEEE Press",
+  address =      "San Francisco",
+  pages =        "1183--1195",
+  year =         "1993",
+  note =         "(invited paper)",
+}
+
+@Article{Bengio_trnn94,
+  author =       "Y. Bengio and P. Simard and P. Frasconi",
+  title =        "Learning Long-Term Dependencies with Gradient Descent
+                 is Difficult",
+  journal =      ieeetrnn,
+  volume =       "5",
+  number =       "2",
+  pages =        "157--166",
+  year =         "1994",
+  note =         "Special Issue on Recurrent Neural Networks, March 94",
+}
+
+@Book{Benveniste90,
+  author =       "A. Benveniste and M. Metivier and P. Priouret",
+  title =        "Adaptive Algorithms and Stochastic Approximations",
+  publisher =    "Springer-Verlag",
+  address =      "Berlin, New York",
+  year =         "1990",
+}
+
+@Book{Berger85,
+  author =       "J. Berger",
+  title =        "Statistical Decision Theory and {Bayesian} Analysis",
+  publisher =    "Springer",
+  year =         "1985",
+}
+
+@Misc{berger97improved,
+  author =       "A. Berger",
+  title =        "The improved iterative scaling algorithm: {A} gentle
+                 introduction",
+  year =         "1997",
+  URL =          "citeseer.ist.psu.edu/berger97improved.html",
+  text =         "Berger, A. (1997). The improved iterative scaling
+                 algorithm: A gentle introduction.
+                 http://www.cs.cmu.edu/afs/cs/user/aberger/www/ps/scaling.ps.",
+}
+
+@article{Berkes-Wiskott-2005,
+    author = {Berkes, Pietro and Wiskott, Laurenz},
+    title = {Slow Feature Analysis Yields a Rich Repertoire of Complex Cell Properties},
+    journal = {Journal of Vision},
+    ISSN = {1534-7362},
+    volume = {5},
+    number = {6},
+    pages = {579-602},
+    year = {2005},
+    month = {7}
+}
+
+@Article{Beurle56,
+  author =       "R. L. Beurle",
+  title =        "Properties of a Mass of Cells Capable of Regenerating
+                 Pulses",
+  journal =      PTRSL,
+  volume =       "240",
+  pages =        "55--94",
+  year =         "1956",
+}
+
+@InProceedings{Beyer+al-1999,
+  author =       "Kevin S. Beyer and Jonathan Goldstein and Raghu Ramakrishnan
+                 and Uri Shaft",
+  booktitle =    "Proceeding of the 7th International Conference on
+                 Database Theory",
+  title =        "When Is ``Nearest Neighbor'' Meaningful?",
+  publisher =    "Springer-Verlag",
+  pages =        "217--235",
+  year =         "1999",
+  ISBN =         "3-540-65452-6",
+}
+
+@TechReport{Bianchini-rbf,
+  author =       "M. Bianchini and P. Frasconi and M. Gori",
+  title =        "Learning without Local Minima in Radial Basis Function
+                 Networks",
+  institution =  "Universit\`a di Firenze",
+  year =         "1992",
+  OPTannote =    "",
+}
+
+@Article{Bianchini-trnn94,
+  author =       "M. Bianchini and M. Gori and M. Maggini",
+  title =        "On the Problem of Local Minima in Recurrent Neural
+                 Networks",
+  journal =      ieeetrnn,
+  volume =       "5",
+  number =       "2",
+  pages =        "167--177",
+  year =         "1994",
+  OPTnote =      "(Special Issue on Recurrent Neural Networks)",
+}
+
+@TechReport{bickel+ritov95,
+  author =       "P. J. Bickel and Y. Ritov",
+  title =        "Inference in hidden {Markov} models {I}: local
+                 asymptotic normality in the stationary case",
+  number =       "Technical Report 383",
+  institution =  "Statistics Department, University of California,
+                 Berkeley",
+  year =         "February 1994, revised April 1995",
+}
+
+@Article{Bienenstock82,
+  author =       "E. L. Bienenstock and L. N. Cooper and P. W. Munro",
+  title =        "Theory for the Development of Neuron Selectivity:
+                 Orientation Specificity and Binocular Interaction in
+                 Visual Cortex",
+  journal =      jneuro,
+  volume =       "2",
+  year =         "1982",
+}
+
+@Article{BierdermanI1987,
+  author =       "Irving Bierderman",
+  title =        "Recognition-by-Components: {A} Theory of Human Image
+                 Understanding",
+  journal =      "Psychological Review",
+  volume =       "94",
+  number =       "2",
+  publisher =    "American Psychological Association, Inc.",
+  pages =        "115--147",
+  year =         "1987",
+  added-by =     "Daniel Acevedo",
+  date-added =   "Thu Oct 24 12:45:17 2002",
+  project =      "genetic",
+  theme =        "perception and vr and tech and natural and medicine
+                 and art",
+}
+
+@InProceedings{Bilbro89a,
+  author =       "G. Bilbro and R. Mann and T. K. Miller and W. E.
+                 Snyder and D. E. Van den Bout and M. White",
+  editor =       NIPS1ed,
+  booktitle =    NIPS1,
+  title =        "Optimization by Mean Field Annealing",
+  publisher =    "Morgan Kaufmann, San Mateo",
+  address =      "Denver, CO",
+  pages =        "91--98",
+  year =         "1989",
+}
+
+@InProceedings{Bilbro89b,
+  author =       "G. L. Bilbro and W. Snyder",
+  editor =       NIPS1ed,
+  booktitle =    NIPS1,
+  title =        "Range Image Restoration Using Mean Field Annealing",
+  publisher =    "Morgan Kaufmann, San Mateo",
+  address =      "Denver, CO",
+  pages =        "594--601",
+  year =         "1989",
+}
+
+@Article{Binder86,
+  author =       "K. Binder and A. P. Young",
+  title =        "Spin Glasses: Experimental Facts, Theoretical
+                 Concepts, and Open Questions",
+  journal =      rmp,
+  volume =       "58",
+  pages =        "801--976",
+  year =         "1986",
+}
+
+@Book{Binder88,
+  author =       "K. Binder and D. W. Heerman",
+  title =        "Monte Carlo Simulation in Statistical Mechanics",
+  publisher =    "Springer-Verlag",
+  address =      "Berlin",
+  year =         "1988",
+}
+
+@Book{bishop-book2006,
+  author =       "Christopher M. Bishop",
+  title =        "Pattern Recognition and Machine Learning",
+  publisher =    "Springer",
+  year =         "2006",
+}
+
+@Book{bishop-book95,
+  author =       "Christopher Bishop",
+  title =        "Neural Networks for Pattern Recognition",
+  publisher =    "Oxford University Press",
+  address =      "London, UK",
+  year =         "1995",
+}
+
+@Article{bishop92,
+  author =       "Christopher Bishop",
+  title =        "Exact calculation of the {Hessian} matrix for the
+                 multi-layer perceptron",
+  journal =      "Neural Computation",
+  volume =       "4",
+  number =       "4",
+  pages =        "494--501",
+  year =         "1992",
+}
+
+@Article{bishop95training,
+  author =       "Christopher M. Bishop",
+  title =        "Training with Noise is Equivalent to {Tikhonov}
+                 Regularization",
+  journal =      "Neural Computation",
+  volume =       "7",
+  number =       "1",
+  pages =        "108--116",
+  year =         "1995",
+}
+
+@Article{Blackscholes73,
+  author =       "F. Black and M. Scholes",
+  title =        "The Pricing of Options and Corporate Liabilities",
+  journal =      "Journal of Political Economy",
+  number =       "81",
+  pages =        "637--654",
+  year =         "1973",
+}
+
+@Article{Blakemore70,
+  author =       "C. Blakemore and G. F. Cooper",
+  title =        "Development of the Brain Depends on the Visual
+                 Environment",
+  journal =      nature,
+  volume =       "228",
+  pages =        "477--478",
+  year =         "1970",
+}
+
+@InCollection{Blitzer-nips17,
+  author =       "John Blitzer and Kilian Weinberger and Lawrence Saul
+                 and Fernando Pereira",
+  editor =       NIPS17ed,
+  booktitle =    NIPS17,
+  title =        "Hierarchical Distributed Representations for
+                 Statistical Language Modeling",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  year =         "2005",
+}
+
+@InProceedings{Blitzer05,
+  author =       "John Blitzer and Kilian Weinberger and Lawrence Saul
+                 and Fernando Pereira",
+  editor =       NIPS18ed,
+  booktitle =    NIPS18,
+  title =        "Hierarchical Distributed Representations for
+                 Statistical Language Modeling",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  year =         "2005",
+}
+
+@InProceedings{Blitzer2005,
+  author =       "J. Blitzer and K. Q. Weinberger and L. K. Saul and F.
+                 C. N. Pereira",
+  editor =       NIPS17ed,
+  booktitle =    NIPS17,
+  title =        "Hierarchical distributed representations for
+                 statistical language models",
+  publisher =    "{MIT} Press",
+  year =         "2005",
+}
+
+@Article{Block62,
+  author =       "H. D. Block",
+  title =        "The Perceptron: {A} Model for Brain Functioning",
+  journal =      rmp,
+  volume =       "34",
+  year =         "1962",
+}
+
+@InProceedings{Blum+Rivest,
+  author =       "A. Blum and R. L. Rivest",
+  editor =       NIPS1ed,
+  booktitle =    NIPS1,
+  title =        "Training a 3-node Neural Net is {NP}-Complete",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo, CA",
+  pages =        "494--501",
+  year =         "1989",
+}
+
+@InProceedings{blum01learning,
+  author =       "Avrim Blum and Shuchi Chawla",
+  booktitle =    "Proc. 18th International Conf. on Machine Learning",
+  title =        "Learning from Labeled and Unlabeled Data Using Graph
+                 Mincuts",
+  publisher =    "Morgan Kaufmann, San Francisco, CA",
+  pages =        "19--26",
+  year =         "2001",
+}
+  %URL =          "citeseer.ist.psu.edu/blum01learning.html",
+
+@InProceedings{blum98combining,
+  author =       "Avrim Blum and Tom Mitchell",
+  booktitle =    colt98,
+  publisher =    "Morgan Kaufmann Publishers",
+  title =        "Combining Labeled and Unlabeled Data with
+                 Co-training",
+  pages =        "92--100",
+  year =         "1998",
+}
+  %URL =          "citeseer.ist.psu.edu/blum98combining.html",
+
+@InProceedings{blum98combining-small,
+  author =       "Avrim Blum and Tom Mitchell",
+  booktitle =    "COLT'98",
+  title =        "Combining Labeled and Unlabeled Data with
+                 Co-training",
+  pages =        "92--100",
+  year =         "1998",
+}
+  %URL =          "citeseer.ist.psu.edu/blum98combining.html",
+
+@InProceedings{blum99,
+  author =       "A. Blum and A. Kalai and J. Langford",
+  booktitle =    colt99,
+  title =        "Beating the hold-out: Bounds for k-fold and
+                 progressive cross-validation",
+  pages =        "",
+  year =         "1999",
+}
+
+@InProceedings{Blumer86,
+  author =       "A. Blumer and A. Ehrenfeucht and D. Haussler and M.
+                 Warmuth",
+  booktitle =    "Proceedings of the Eighteenth Annual ACM Symposium on
+                 Theory of Computing",
+  title =        "Classifying Learnable Geometric Concepts with the
+                 Vapnik-Chervonenkis Dimension",
+  publisher =    "ACM, Salem",
+  address =      "Berkeley 1986",
+  pages =        "273--282",
+  year =         "1986",
+}
+
+@Article{Blumer87,
+  author =       "A. Blumer and A. Ehrenfeucht and D. Haussler and M.
+                 Warmuth",
+  title =        "Occam's razor",
+  journal =      "Inf. Proc. Let.",
+  volume =       "24",
+  pages =        "377--380",
+  year =         "1987",
+}
+
+@Article{Blumstein79,
+  author =       "S. E. Blumstein and K. N. Stevens",
+  title =        "Acoustic invariance in speech production: Evidence
+                 from measurements of the spectral characteristics of
+                 stop consonants",
+  journal =      "Journal of the Acoustical Society of America",
+  volume =       "66",
+  number =       "4",
+  pages =        "1001--1018",
+  year =         "1979",
+}
+
+@Article{Bohm96,
+  author =       "G. Bohm",
+  title =        "New approaches in molecular structure prediction",
+  journal =      "Biophys. Chem.",
+  volume =       "59",
+  pages =        "1--32",
+  year =         "1996",
+}
+
+@Article{Bohr88,
+  author =       "H. Bohr and J. Bohr and S. Brunak and R. M. J.
+                 Cotterill and B. Lautrup and L. Norskov and O. H.
+                 Olsen and S. B. Petersen",
+  title =        "Protein Secondary Structure and Homology by Neural
+                 Networks: The $\alpha$-Helices in Rhodopsin",
+  journal =      febsl,
+  volume =       "241",
+  pages =        "223--228",
+  year =         "1988",
+}
+
+@InProceedings{bollacker98,
+  author =       "Kurt D. Bollacker and Joydeep Ghosh",
+  booktitle =    ICML98,
+  editor =       ICML98ed,
+  publisher =    ICML98publ,
+  title =        "A Supra-Classifier Architecture for Scalable Knowledge
+                 Reuse",
+  address =      "San Francisco, CA, USA",
+  pages =        "64--72",
+  year =         "1998",
+}
+
+@InProceedings{BonillaE2007,
+  author =       "Edwin V. Bonilla and Felix V. Agakov and Christopher
+                 K. I. Williams",
+  booktitle =    "Proceedings of AISTATS 2007",
+  title =        "Kernel Multi-task Learning using Task-specific
+                 Features",
+  year =         "2007",
+}
+
+@Article{Bonomo94,
+  author =       "M. Bonomo and R. Garcia",
+  title =        "Can a well-fitted equilibrium asset-pricing model
+                 produce mean reversion?",
+  journal =      "Journal of Applied Econometrics",
+  volume =       "9",
+  pages =        "19--29",
+  year =         "1994",
+}
+
+@Article{bordes-09,
+  author =  {Bordes, Antoine and Bottou, L\'eon and Gallinari, Patrick},
+  title =   {SGD-QN: Careful Quasi-Newton Stochastic Gradient Descent},
+  journal = {Journal of Machine Learning Research},
+  year =    {2009},
+  volume =  {10},
+  pages =   {1737-1754},
+  month =   {July},
+}
+
+@Book{Bornstein-critical-87,
+		author = { Bornstein, Marc H. },
+		title = { Sensitive periods in development : interdisciplinary
+				perspectives / edited by Marc H. Bornstein },
+		publisher = { Lawrence Erlbaum Associates, Hillsdale, N.J. : },
+		year = { 1987 },
+		type = { Book },
+}
+
+
+@Article{boser-92,
+  author =       "B. Boser and E. Sackinger and J. Bromley and Y. {LeCun}
+                 and L. Jackel",
+  title =        "An analog neural network processor with programmable
+                 topology",
+  journal =      "IEEE Journal of Solid-State Circuits",
+  volume =       "26",
+  number =       "12",
+  pages =        "2017--2025",
+  month =        dec,
+  year =         "1991",
+}
+
+@InProceedings{Boser92,
+  author =       "Bernhard E. Boser and Isabelle M. Guyon and Vladimir N. Vapnik",
+  booktitle =    "Fifth Annual Workshop on Computational Learning
+                 Theory",
+  title =        "A training algorithm for optimal margin classifiers",
+  publisher =    "ACM",
+  address =      "Pittsburgh",
+  pages =        "144--152",
+  year =         "1992",
+  doi =          {http://doi.acm.org/10.1145/130385.130401},
+  isbn = {0-89791-497-X},
+}
+
+@incollection{bottou-bousquet-2008,
+  author = {Bottou, L\'{e}on and Bousquet, Olivier},
+  title = {The Tradeoffs of Large Scale Learning},
+  editor = NIPS20ed,
+  booktitle = NIPS20,
+  publisher = {MIT Press},
+  year = {2008},
+  volume = {20},
+  address = {Cambridge, MA},
+  url = "http://leon.bottou.org/papers/bottou-bousquet-2008",
+}
+
+@TechReport{Bottou+96,
+  author =       "L{\'e}on Bottou and Yoshua Bengio and Yann A. {Le Cun}",
+  title =        "Document Analysis with Generalized Transduction",
+  number =       "HA6156000-960701-01TM",
+  institution =  "AT\&T Laboratories",
+  address =      "Holmdel, New-Jersey",
+  month =        jul,
+  year =         "1996",
+}
+
+@Article{Bottou+LeCun05,
+  author =       "L{\'e}on Bottou and Yann {LeCun}",
+  title =        "Graph Transformer Networks for Image Recognition",
+  journal =      "Bulletin of the International Statistical Institute",
+  year =         "2005",
+}
+
+@TechReport{bottou-1996a,
+  author =       "L{\'{e}}on Bottou and Yoshua Bengio and Yann {Le Cun}",
+  title =        "Document Analysis with Transducers",
+  number =       "{960701}-{01}-{TM}",
+  institution =  "AT\&T Labs Technical Memorandum",
+  month =        jun,
+  year =         "1996",
+}
+
+@InProceedings{bottou-lecun-04b,
+  author =       "Leon Bottou and Yann {LeCun}",
+  editor =       NIPS15ed,
+  booktitle =    NIPS15,
+  title =        "Large-Scale On-Line Learning",
+  publisher =    "MIT Press",
+  year =         "2004",
+  original =     "orig/bottou-lecun-04b.ps.gz",
+}
+
+@InCollection{bottou-mlss-2004,
+  author =       "L\'{e}on Bottou",
+  editor =       "Olivier Bousquet and Ulrike von Luxburg",
+  booktitle =    "Advanced Lectures on Machine Learning",
+  title =        "Stochastic Learning",
+  number =       "LNAI 3176",
+  publisher =    "Springer Verlag",
+  address =      "Berlin",
+  pages =        "146--168",
+  year =         "2004",
+  series =       "Lecture Notes in Artificial Intelligence",
+  URL =          "http://leon.bottou.org/papers/bottou-mlss-2004",
+}
+
+@Article{Bottou90,
+  author =       "L. Bottou and F. Fogelman-Souli\'e and P. Blanchet and
+                 J. S. Lienard",
+  key =          "bottou",
+  title =        "Speaker independent isolated digit recognition:
+                 multilayer perceptrons vs dynamic time warping",
+  journal =      "Neural Networks",
+  volume =       "3",
+  pages =        "453--465",
+  year =         "1990",
+}
+
+@InProceedings{Bottou91,
+  author =       "L. Bottou and P. Gallinari",
+  editor =       NIPS3ed,
+  booktitle =    NIPS3,
+  title =        "A Framework for the Cooperation of Learning
+                 Algorithms",
+  address =      "Denver, CO",
+  pages =        "781--788",
+  year =         "1991",
+}
+
+@Article{Bottou92,
+  author =       "L. Bottou and V. Vapnik",
+  key =          "Bottou92",
+  title =        "Local Learning Algorithms",
+  journal =      nc,
+  volume =       "4",
+  number =       "6",
+  pages =        "888--900",
+  year =         "1992",
+}
+
+@InProceedings{Bottou94,
+  author =       "L. Bottou and C. Cortes and J. S. Denker and H.
+                 Drucker and I. Guyon and L. D. Jackel and Y. {LeCun} and
+                 U. A. Muller and E. Sackinger and P. Simard and V.
+                 Vapnik",
+  booktitle =    "International Conference on Pattern Recognition",
+  title =        "Comparison of classifier methods: a case study in
+                 handwritten digit recognition",
+  address =      "Jerusalem, Israel",
+  year =         "1994",
+}
+
+@InProceedings{Bottou97,
+  author =       "L{\'e}on Bottou and Yoshua Bengio and Yann {LeCun}",
+  booktitle =    cvpr97,
+  title =        "Global Training of Document Processing Systems using
+                 Graph Transformer Networks",
+  publisher =    "IEEE",
+  address =      "Puerto Rico",
+  pages =        "490--494",
+  year =         "1997",
+}
+
+@InCollection{Bottou98,
+  author =       "L{\'e}on Bottou",
+  editor =       "David Saad",
+  booktitle =    "Online Learning in Neural Networks",
+  title =        "Online Algorithms and Stochastic Approximations",
+  publisher =    "Cambridge University Press",
+  address =      "Cambridge, UK",
+  pages =        "",
+  year =         "1998",
+}
+
+@PhdThesis{Bottou_these91,
+  author =       "L\'eon Bottou",
+  title =        "Une approche th\'eorique de l'apprentissage
+                 connexioniste; applications \`a la reconnaissance de la
+                 parole",
+  school =       "Universit\'e de Paris XI",
+  year =         "1991",
+}
+
+@InProceedings{BouchardG2004,
+  author =       "Guillaume Bouchard and Bill Triggs",
+  booktitle =    "IASC International Symposium on Computational
+                 Statistics (COMPSTAT)",
+  title =        "The Tradeoff Between Generative and Discriminative
+                 Classifiers",
+  address =      "Prague",
+  pages =        "721--728",
+  month =        aug,
+  year =         "2004",
+  keywords =     "LEAR, LAVA",
+}
+  %URL =          "http://lear.inrialpes.fr/pubs/2004/BT04",
+
+@inproceedings{BouchardG2007,
+ author = {Guillaume Bouchard},
+ title = {Bias-Variance Tradeoff in Hybrid Generative-Discriminative Models},
+ booktitle = ICML07,
+ editor =    ICML07ed,
+ publisher = ICML07publ,
+ year = {2007},
+ isbn = {0-7695-3069-9},
+ pages = {124--129},
+ address = {Washington, DC, USA},
+ }
+ %doi = {http://dx.doi.org/10.1109/ICMLA.2007.23},
+
+@Article{Bourlard-cspla89,
+  author =       "H. Bourlard and C. Wellekens",
+  title =        "Speech Pattern Discrimination and Multi-Layered
+                 Perceptrons",
+  journal =      cspla,
+  volume =       "3",
+  pages =        "1--19",
+  year =         "1989",
+}
+
+@Article{Bourlard-pami90,
+  author =       "H. Bourlard and C. Wellekens",
+  title =        "Links between Hidden {Markov} Models and Multilayer
+                 Perceptrons",
+  journal =      ieeetpami,
+  volume =       "12",
+  pages =        "1167--1178",
+  year =         "1990",
+}
+
+@Article{Bourlard88,
+  author =       "H. Bourlard and Y. Kamp",
+  title =        "Auto-Association by Multilayer Perceptrons and
+                 Singular Value Decomposition",
+  journal =      biocyb,
+  volume =       "59",
+  pages =        "291--294",
+  year =         "1988",
+}
+
+@Book{Bourlard93,
+  author =       "H. Bourlard and N. Morgan",
+  title =        "Connectionist Speech Recognition. {A} Hybrid
+                 Approach",
+  volume =       "247",
+  publisher =    "Kluwer Academic Publishers",
+  address =      "Boston",
+  year =         "1993",
+  series =       "The Kluwer international series in engineering and
+                 computer science",
+}
+
+@Article{Bourlard_cspla89,
+  author =       "H Bourlard and C. Wellekens",
+  title =        "Speech Pattern Discrimination and Multi-Layered
+                 Perceptrons",
+  journal =      cspla,
+  volume =       "3",
+  pages =        "1--19",
+  year =         "1989",
+  OPTnote =      "",
+}
+
+@InCollection{Bourrely89,
+  author =       "J. Bourrely",
+  booktitle =    "Hypercube and distributed computers",
+  title =        "Parallelization of a Neural Learning Algorithm on a
+                 Hypercube",
+  publisher =    "Elsiever Science Publishing, North Holland",
+  pages =        "219--229",
+  year =         "1989",
+}
+
+@inproceedings{Bouveyron-Chipman-2007,
+ author = {C. Bouveyron and H. Chipman},
+ title = {Visualization and classification of graph-structured data: the case of the {E}nron dataset}, 
+ booktitle = ijcnn,
+ pages = {1506--1511}, 
+ year = 2007,
+}
+
+@Book{Box73,
+  author =       "G. E. P. Box and G. C. Tiao",
+  title =        "Bayesian inference in statistical analysis",
+  publisher =    "Addison-Wesley",
+  year =         "1973",
+}
+
+@Book{BoxJenkins,
+  author =       "G. E. P. Box and G. M. Jenkins",
+  title =        "Time Series Analysis: Forecasting and Control.",
+  publisher =    "Holden-Day",
+  address =      "San Francisco",
+  year =         "1970",
+}
+
+@Book{Boyd04,
+  author =       "Stephen Boyd and Lieven Vandenberghe",
+  title =        "Convex Optimization",
+  publisher =    "Cambridge University Press",
+  address =      "New York, NY, USA",
+  year =         "2004",
+  ISBN =         "0-521-83378-7",
+}
+
+@incollection{Bradley+Bagnell-2009,
+ title = {Differentiable Sparse Coding},
+ author = {J. Andrew Bagnell and David M. Bradley},
+ editor =       NIPS21ed,
+ booktitle =    NIPS21,
+ pages = {},
+ publisher = {NIPS Foundation},
+ year = {2009}
+}
+
+@PhdThesis{Bradley-thesis,
+  author =       "David Bradley",
+  title =        "Learning in Modular Systems",
+  school =       "The Robotics Institute, Carnegie Mellon University",
+  year =         "2009",
+}
+
+@Article{Brady-ieeecas89,
+  author =       "M. L. Brady and R. Raghavan and J. Slawny",
+  title =        "Back-Propagation Fails to Separate Where Perceptrons
+                 Succeed",
+  journal =      ieeetcas,
+  volume =       "36",
+  pages =        "665--674",
+  year =         "1989",
+}
+
+@Article{Brady89,
+  author =       "M. L. Brady and R. Raghavan and J. Slawny",
+  title =        "Back-Propagation fails to Separate Where Perceptrons
+                 Succeed",
+  journal =      "IEEE Transactions on Circuits and Systems",
+  volume =       "36",
+  number =       "5",
+  pages =        "665--674",
+  year =         "1989",
+}
+
+@InProceedings{Bramson90,
+  author =       "M. J. Bramson and R. G. Hoptroff",
+  booktitle =    "Workshop on Neural Networks for Statistical and
+                 Economic Data",
+  title =        "Forecasting the Economic Cycle: a Neural Network
+                 Approach",
+  address =      "Dublin",
+  year =         "1990",
+}
+
+@InProceedings{Brand2003,
+  author =       "M. Brand",
+  editor =       NIPS15ed,
+  booktitle =    NIPS15,
+  title =        "Charting a manifold",
+  publisher =    "{MIT} Press",
+  pages =        "961--968",
+  year =         "2003",
+}
+
+@Article{Brand99,
+  author =       "Matthew Brand",
+  title =        "Structure Learning in Conditional Probability Models
+                 via an Entropic Prior and Parameter Extinction",
+  journal =      "Neural Computation",
+  volume =       "11",
+  number =       "5",
+  pages =        "1155--1182",
+  year =         "1999",
+}
+
+@InProceedings{Brandt88,
+  author =       "R. D. Brandt and Y. Wang and A. J. Laub and S. K.
+                 Mitra",
+  booktitle =    icnn,
+  title =        "Alternative Networks for Solving the Travelling
+                 Salesman Problem and the List-Matching Problem",
+  volume =       "2",
+  publisher =    "IEEE, New York",
+  address =      "San Diego 1988",
+  pages =        "333--340",
+  year =         "1988",
+}
+
+@inproceedings{BreglerC1994,
+    author = "Christoph Bregler and Stephen M. Omohundro",
+    editor =       NIPS6ed,
+    booktitle =    NIPS6,
+    title = "Surface Learning with Applications to Lipreading",
+    publisher = "Morgan Kaufmann Publishers, Inc.",
+    pages = "43--50",
+    year = "1994",
+}
+
+
+@Article{Breiman-96,
+  author =       "L. Breiman",
+  title =        "Heuristics of instability and stabilization in model
+                 selection",
+  journal =      "The Annals of Statistics",
+  volume =       "24",
+  number =       "6",
+  pages =        "2350--2383",
+  year =         "1996",
+}
+
+@Article{breiman-stability-96,
+  author =       "L. Breiman",
+  title =        "Heuristics of Instability and Stabilization in Model
+                 Selection",
+  journal =      "Annals of Statistics",
+  volume =       "24",
+  number =       "6",
+  pages =        "2350--2383",
+  year =         "1996",
+}
+
+@Article{Breiman01,
+  author =       "Leo Breiman",
+  title =        "Random Forests",
+  journal =      "Machine Learning",
+  volume =       "45",
+  number =       "1",
+  pages =        "5--32",
+  year =         "2001",
+}
+
+@Book{Breiman84,
+  author =       "L. Breiman and J. H. Friedman and R. A. Olshen and C.
+                 J. Stone",
+  title =        "Classification and Regression Trees",
+  publisher =    "Wadsworth International Group",
+  address =      "Belmont, CA",
+  year =         "1984",
+}
+
+@TechReport{Breiman96,
+  author =       "L. Breiman",
+  title =        "Bias, Variance, and Arcing Classifiers",
+  number =       "Technical Report 460",
+  institution =  "Statistics Department, University of California",
+  address =      "Berkeley, CA 94720",
+  month =        apr,
+  year =         "1996",
+}
+
+@InCollection{Bridle+Cox91,
+  author =       "J. S. Bridle and S. J. Cox",
+  editor =       NIPS3ed,
+  booktitle =    NIPS3,
+  title =        "{RECNORM}: simultaneous normalisation and
+                 classification applied to speech recognition",
+  publisher =    "Morgan Kaufmann",
+  pages =        "234--240",
+  year =         "1991",
+}
+
+@InCollection{Bridle89,
+  author =       "J. Bridle",
+  editor =       "F. Fogelman-Souli\'e and J. {H\'{e}rault}",
+  booktitle =    "Neuro-computing: Algorithms, Architectures, and
+                 Applications",
+  title =        "Probabilistic interpretation of feedforward
+                 classification network outputs, with relationships to
+                 statistical pattern recognition",
+  publisher =    "Springer-Verlag",
+  address =      "New York",
+  year =         "1989",
+}
+
+@InCollection{Bridle89-nips,
+  author =       "J. S. Bridle",
+  editor =       NIPS2ed,
+  booktitle =    NIPS2,
+  title =        "Training Stochastic Model Recognition Algorithms as
+                 Networks can lead to Maximum Mutual Information
+                 Estimation of Parameters",
+  publisher =    "Morgan Kaufmann",
+  pages =        "211--217",
+  year =         "1990",
+}
+
+@Article{Bridle90,
+  author =       "J. S. Bridle",
+  title =        "Alphanets: a Recurrent `Neural' Network Architecture
+                 with a Hidden {Markov} Model Interpretation",
+  journal =      spcomm,
+  volume =       "9",
+  number =       "1",
+  pages =        "83--92",
+  year =         "1990",
+}
+
+@InCollection{Bridle90b,
+  author =       "J. S. Bridle",
+  editor =       NIPS2ed,
+  booktitle =    NIPS2,
+  title =        "Training Stochastic Model Recognition Algorithms as
+                 Networks can lead to Maximum Mutual Information
+                 Estimation of Parameters",
+  publisher =    "Morgan Kaufmann",
+  pages =        "211--217",
+  year =         "1990",
+}
+
+@InCollection{Bromley-siamese93,
+  author =       "J. Bromley and J. Benz and L. Bottou and I. Guyon and
+                 L. Jackel and Y. {LeCun} and C. Moore and E. Sackinger
+                 and R. Shah",
+  booktitle =    "Advances in Pattern Recognition Systems using Neural
+                 Network Technologies",
+  title =        "Signature verification using a siamese time delay
+                 neural network",
+  publisher =    "World Scientific, Singapore",
+  pages =        "669--687",
+  year =         "1993",
+}
+
+@InCollection{Bromley93,
+  author =       "J. Bromley and J. Benz and L. Bottou and I. Guyon and
+                 L. Jackel and Y. {LeCun} and C. Moore and E. Sackinger
+                 and R. Shah",
+  booktitle =    "Advances in Pattern Recognition Systems using Neural
+                 Network Technologies",
+  title =        "Signature verification using a siamese time delay
+                 neural network",
+  publisher =    "Series in Machine Perception and Artificial
+                 Intelligence, World Scientific, Singapore",
+  pages =        "669--687",
+  year =         "1993",
+}
+
+@Article{broomhead-lowe-88,
+  author =       "D. Broomhead and D. Lowe",
+  key =          "Broomhead",
+  title =        "Multivariable functional interpolation and adaptive
+                 networks",
+  journal =      "Complex Systems",
+  volume =       "2",
+  pages =        "321--355",
+  year =         "1988",
+}
+
+@TechReport{Brown-Hinton-PoHMM-2000,
+  author =       "Andrew Brown and Geoffrey Hinton",
+  title =        "Products of Hidden Markov Models",
+  number =       "GCNU TR 2000-004",
+  institution =  "Gatsby Unit, University College London",
+  year =         "2000",
+}
+
+@Book{Brown86,
+  author =       "Lawrence D. Brown",
+  title =        "Fundamentals of Statistical Exponential Families",
+  volume =       "9",
+  publisher =    "Inst. of Math. Statist. Lecture Notes Monograph
+                 Series",
+  year =         "1986",
+}
+
+@Article{Brown92,
+  author =       "P. F. Brown and V. J. Della Pietra and P. V. DeSouza
+                 and J. C. Lai and R. L. Mercer",
+  title =        "Class-based {\it n}-gram models of natural language",
+  journal =      "Computational Linguistics",
+  volume =       "18",
+  pages =        "467--479",
+  year =         "1992",
+}
+
+@PhdThesis{BrownPhD,
+  author =       "P. Brown",
+  title =        "The Acoustic-Modeling problem in Automatic Speech
+                 Recognition",
+  school =       "Dept. of Computer Science, Carnegie-Mellon
+                 University",
+  year =         "1987",
+}
+
+@InProceedings{Bruce-94,
+  author =       "Rebecca Bruce and Janyce Wiebe",
+  booktitle =    "{ARPA} Workshop on Human Language Technology",
+  title =        "A new approach to sense identification",
+  address =      "Plainsboro, {NJ}",
+  year =         "1994",
+}
+
+@InProceedings{Brugnara92,
+  author =       "F. Brugnara and R. DeMori and D. Giuliani and M.
+                 Omologo",
+  booktitle =    icassp,
+  title =        "A family of parallel hidden Markov models",
+  publisher =    "IEEE",
+  address =      "New York, NY, USA",
+  pages =        "377--370",
+  year =         "1992",
+}
+
+@Article{Brunak89,
+  author =       "S. Brunak and B. Lautrup",
+  title =        "Liniedeling med et Neuralt Nev{\ae}rk",
+  journal =      SAML,
+  volume =       "14",
+  pages =        "55--74",
+  year =         "1989",
+}
+
+@Book{Brunak90,
+  author =       "S. Brunak and B. Lautrup",
+  title =        "Neural Networks: Computers with Intuition",
+  publisher =    "World Scientific",
+  address =      "Singapore",
+  year =         "1990",
+}
+
+@Article{Brunak91,
+  author =       "S. Brunak and J. Engelbrecht and S. Knudsen",
+  title =        "Prediction of human {mRNA} donor and acceptor sites
+                 from the {DNA} sequence",
+  journal =      "J. Molec. Biol.",
+  volume =       "220",
+  pages =        "49--65",
+  year =         "1991",
+}
+
+@Book{Bryson69,
+  author =       "A. E. Bryson and Y.-C. Ho",
+  title =        "Applied Optimal Control",
+  publisher =    "Blaisdell",
+  address =      "New York",
+  year =         "1969",
+}
+
+@Article{BT-the-fitting-1974,
+  author =       "A. E. Beaton and J. W. Tukey",
+  title =        "The fitting of power series, meaning polynomials,
+                 illustrted on band-spectroscopic data",
+  journal =      "Technometrics",
+  volume =       "16",
+  pages =        "147--185",
+  year =         "1974",
+}
+
+@article{Buia-Tiesinga-2006,
+ author = {Calin Buia and Paul Tiesinga},
+ title = {Attentional modulation of firing rate and synchrony in a model cortical network},
+ journal = {J. Computational Neuroscience},
+ volume = 20,
+ pages = {247--264},
+ year = 2006,
+}
+
+@TechReport{buhlmann97,
+  author =       "P. Buhlmann and A. J. Wyner",
+  title =        "Variable Length Markov Chains",
+  number =       "technical report 479",
+  institution =  "Statistics Department, University of California,
+                 Berkeley",
+  month =        jan,
+  year =         "1997",
+}
+
+@Article{Buhmann87,
+  author =       "J. Buhmann and K. Schulten",
+  title =        "Noise-Driven Temporal Association in Neural Networks",
+  journal =      eul,
+  volume =       "4",
+  pages =        "1205--1209",
+  year =         "1987",
+}
+
+@InProceedings{Buhmann88,
+  author =       "J. Buhmann and K. Schulten",
+  editor =       "R. Eckmiller and Ch. von der Malsburg",
+  booktitle =    "Neural Computers",
+  title =        "Storing Sequences of Biased Patterns in Neural
+                 Networks with Stochastic Dynamics",
+  publisher =    "Springer-Verlag, Berlin",
+  address =      "Neuss 1987",
+  pages =        "231--242",
+  year =         "1988",
+}
+
+@Article{Buntine94,
+  author =       "W. Buntine",
+  title =        "Operations for Learning with Graphical Models",
+  journal =      "Journal of Artificial Intelligence Research",
+  volume =       "2",
+  pages =        "159--225",
+  year =         "1994",
+}
+
+@InProceedings{Burges92,
+  author =       "C. Burges and O. Matan and Y. {LeCun} and J. Denker and
+                 L. Jackel and C. Stenard and C. Nohl and J. Ben",
+  booktitle =    ijcnn,
+  title =        "Shortest Path Segmentation: {A} Method for Training a
+                 Neural Network to Recognize character Strings",
+  volume =       "3",
+  address =      "Baltimore",
+  pages =        "165--172",
+  year =         "1992",
+}
+
+@Article{Burges93,
+  author =       "C. J. C. Burges and J. I. Ben and J. S. Denker and Y.
+                 {LeCun} and C. R. Nohl",
+  title =        "Off Line Recognition of Handwritten Postal Words Using
+                 Neural Networks",
+  journal =      "International Journal of Pattern Recognition and
+                 Artificial Intelligence",
+  volume =       "7",
+  number =       "4",
+  pages =        "689",
+  year =         "1994",
+}
+
+@Article{burges98,
+  author =       "C. J. C. Burges",
+  title =        "A Tutorial on {Support} {Vector} {Machines} for
+                 Pattern Recognition",
+  journal =      "Data Mining and Knowledge Discovery",
+  volume =       "2",
+  number =       "2",
+  pages =        "1--47",
+  year =         "1998",
+}
+
+@InCollection{Burges99Geometry,
+  author =       "C. J. C. Burges",
+  editor =       "B. {Sch\"olkopf} and C. J. C. Burges and A. J. Smola",
+  booktitle =    "Advances in Kernel Methods --- Support Vector
+                 Learning",
+  title =        "Geometry and invariance in kernel based methods",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  pages =        "89--116",
+  year =         "1999",
+}
+
+@Article{Burr83,
+  author =       "D. J. Burr",
+  title =        "Designing a handwriting reader",
+  journal =      ieeetpami,
+  volume =       "5",
+  number =       "5",
+  pages =        "554--559",
+  month =        sep,
+  year =         "1983",
+}
+
+@InProceedings{Burr88,
+  author =       "D. J. Burr",
+  booktitle =    icnn,
+  title =        "An Improved Elastic Net Method for the Travelling
+                 Salesman Problem",
+  volume =       "1",
+  publisher =    "IEEE, New York",
+  address =      "San Diego 1988",
+  pages =        "69--76",
+  year =         "1988",
+}
+
+@Article{Burrows94,
+  author =       "J. H. Burrows and J. Peck",
+  title =        "On-Line Condition Monitoring of Rotating Equipment
+                 Using Neural Networks",
+  journal =      "ISA Transactions",
+  volume =       "33",
+  pages =        "159--164",
+  year =         "1994",
+}
+
+@InProceedings{Burrows95,
+  author =       "J. H. Burrows and R. Doucet",
+  booktitle =    "Proceedings of COMADEM'95",
+  title =        "Machine Condition Monitoring Using Artificial Neural
+                 Networks to Process Vibration Data Obtained from
+                 Maintenance Monitoring Equipment",
+  address =      "Kingston, Ontario, Canada",
+  year =         "1995",
+}
+
+@Article{Byrne87,
+  author =       "J. H. Byrne",
+  title =        "Cellular analysis of associative learning",
+  journal =      "Physiological Review",
+  volume =       "67",
+  pages =        "329--439",
+  year =         "1987",
+}
+
+@InCollection{Byrne89,
+  author =       "J. H. Byrne and K. J. Gingrich and D. A. Baxter",
+  editor =       "Hawkins R. D. and Bower G. H.",
+  booktitle =    "Computational Models of Learning in Simple Neural
+                 Systems",
+  title =        "Computational capabilities of single neurons:
+                 relationship to simple forms of associative and
+                 nonassociative learning in {\it Aplysia}",
+  publisher =    "Academic Press",
+  pages =        "31--63",
+  year =         "1989",
+}
+
+@InProceedings{Cacciatore-nips94,
+  author =       "T. W. Cacciatore and Steven J. Nowlan",
+  editor =       NIPS6ed,
+  booktitle =    NIPS6,
+  title =        "Mixtures of Controllers for Jump Linear and Non-linear
+                 Plants",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo, CA",
+  year =         "1994",
+}
+
+@Article{Cai94,
+  author =       "J. Cai",
+  title =        "A Markov model of unconditional variance in {ARCH}",
+  journal =      "Journal of Business and Economic Statistics",
+  year =         "1994",
+}
+
+@inproceedings{Cai+al-2007,
+    author = {Cai, Deng   and He, Xiaofei   and Han, Jiawei  },
+    booktitle = ICCV07,
+    pages = {1--7},
+    title = {Semi-supervised Discriminant Analysis},
+    year = {2007}
+}
+
+@Article{Caianiello61,
+  author =       "E. R. Caianiello",
+  title =        "Outline of a Theory of Thought and Thinking Machines",
+  journal =      jtb,
+  volume =       "1",
+  pages =        "204--235",
+  year =         "1961",
+}
+
+@article{Campbell+Kulikowski-1966,
+    author = {F. W. Campbell and J. J. Kulikowski},
+    title = {Orientational selectivity of the human visual system},
+    journal = {Journal of Physiology},
+    year = 1966,
+    pages = "437--445",
+    address = "London"
+}
+
+@article{Campbell+al-1969,
+    title = {The Spatial Selectivity of the Visual Cells of the Cat},
+    author = {F. W. Campbell and G. F. Cooper and Enroth C. Cugell},
+    journal = {Journal of Physiology},
+    address = "London",
+    pages = {223--235},
+    volume = {203},
+    year = {1969},
+    biburl = {http://www.bibsonomy.org/bibtex/2cfcc4bc8437b72761251fb2b9e7eb106/schaul},
+    description = {idsia},
+}
+
+@InBook{CandelaJ2006,
+  author =       "J. Quiñonero Candela and C. E. Rasmussen and F. Sinz
+                 and O. Bousquet and B. Schölkopf",
+  booktitle =    "Machine learning challenges: Evaluating predictive
+                 uncertainty, visual object classification, and
+                 recognising textual entailment",
+  title =        "Evaluating Predictive Uncertainty Challenge",
+  publisher =    "Springer",
+  address =      "Heidelberg, Germany",
+  pages =        "1--27",
+  month =        apr,
+  year =         "2006",
+  series =       "Lecture Notes in Computer Science: 3944",
+  URL =          "http://www.springerlink.com/(yxluatzjo3gnpl45323wjs45)/app/home/contribution.asp?referrer=parent&backto=issue,1,25;journal,2,3638;linkingpublicationresults,1:105633,1",
+  abstract =     "This Chapter presents the PASCAL1 Evaluating
+                 Predictive Uncertainty Challenge, introduces the
+                 contributed Chapters by the participants who obtained
+                 outstanding results, and provides a discussion with
+                 some lessons to be learnt. The Challenge was set up to
+                 evaluate the ability of Machine Learning algorithms to
+                 provide good Èprobabilistic predictionsÉ, rather than
+                 just the usual Èpoint predictionsÉ with no measure of
+                 uncertainty, in regression and classification problems.
+                 Parti-cipants had to compete on a number of regression
+                 and classification tasks, and were evaluated by both
+                 traditional losses that only take into account point
+                 predictions and losses we proposed that evaluate the
+                 quality of the probabilistic predictions.",
+  OPTeditor =    "Quiñonero Candela, J., I. Dagan, B. Magnini, F. DAlché
+                 Buc",
+}
+
+@article{candeswakin08,
+author = "Candes, E. and Wakin, M.",
+title = "An introduction to compressive sampling",
+journal = "IEEE Signal Processing Magazine",
+volume = 21,
+year = 2008,
+}
+
+@article{Candes+Tao-2005,
+ author = {E.J. Candes and T. Tao},
+ title = {Decoding by linear programming},
+ journal = {{IEEE} Transactions on Information Theory},
+ volume = 51,
+ number = 12,
+ pages = {4203--4215},
+ year = 2005,
+}
+
+@Article{Canning88,
+  author =       "A. Canning and E. Gardner",
+  title =        "Partially Connected Models of Neural Networks",
+  journal =      jpa,
+  volume =       "21",
+  pages =        "3275--3284",
+  year =         "1988",
+}
+
+@article{carandini:1994,
+    author = {Matteo Carandini and David J. Heeger},
+    title = {Summation and Division by Neurons in Primate Visual Cortex},
+    journal = {Science},
+    volume={264},
+    number={5163},
+    month = {May},
+    year = {1994},
+    pages = {1333-1336},
+}
+
+@inproceedings{Cardie-1993,
+    author = "Claire Cardie",
+    title = "Using Decision Trees to Improve Case--Based Learning",
+    booktitle = "Proceedings of the Tenth International Conference on Machine Learning",
+    publisher = "Morgan Kaufmann",
+    pages = "25--32",
+    year = "1993",
+    url = "citeseer.ist.psu.edu/cardie93using.html"
+}
+
+@Article{Carpenter87a,
+  author =       "G. A. Carpenter and S. Grossberg",
+  title =        "A Massively Parallel Architecture for a
+                 Self-Organizing Neural Pattern Recognition Machine",
+  journal =      cvgip,
+  volume =       "37",
+  pages =        "54--115",
+  year =         "1987",
+}
+
+@Article{Carpenter87b,
+  author =       "G. A. Carpenter and S. Grossberg",
+  title =        "{ART2}: Self-Organization of Stable Category
+                 Recognition Codes for Analog Input Patterns",
+  journal =      applopt,
+  volume =       "26",
+  pages =        "4919--4930",
+  year =         "1987",
+}
+
+@Article{Carpenter88,
+  author =       "G. A. Carpenter and S. Grossberg",
+  title =        "The {ART} of Adaptive Pattern Recognition by a
+                 Self-Organizing Neural Network",
+  journal =      computer,
+  pages =        "77--88",
+  month =        mar,
+  year =         "1988",
+}
+
+@InProceedings{Carrasco94,
+  author =       "R. C. Carrasco and J. Oncina",
+  booktitle =    "Grammatical Inference and Applications Proc. of the
+                 2nd International Colloquium on Grammatical Inference
+                 ICGI94",
+  title =        "Learning regular grammars by means of a state merging
+                 method",
+  publisher =    "Lecture Notes in Artificial Intelligence 862",
+  address =      "Alicante (Spain)",
+  month =        sep,
+  year =         "1994",
+}
+
+@Article{Carter94,
+  author =       "C. K. Carter and R. Kohn",
+  title =        "On Gibbs sampling for state space models",
+  journal =      "Biometrika",
+  volume =       "81",
+  pages =        "541--553",
+  year =         "1994",
+}
+
+@InProceedings{Caruana-2001,
+  author =       "Rich Caruana",
+  booktitle =    aistats01,
+  title =        "A Non-Parametric {EM}-Style Algorithm for Imputing
+                 Missing Values",
+  publisher =    "Society for Artificial Intelligence and Statistics",
+  year =         "2001",
+}
+
+@InProceedings{caruana06:empirical,
+  author =       "R. Caruana and A. Niculescu-Mizil",
+  booktitle =    ICML06,
+  editor =       ICML06ed,
+  publisher =    ICML06publ,
+  title =        "An Empirical Comparison of Supervised Learning
+                 Algorithms",
+  year =         "2006",
+}
+
+@InProceedings{caruana93a,
+  author =       "Rich Caruana",
+  booktitle =    "Proceedings of the 1993 Connectionist Models Summer
+                 School",
+  title =        "Multitask Connectionist Learning",
+  pages =        "372--379",
+  year =         "1993",
+}
+
+@InProceedings{caruana93a-small,
+  author =       "Rich Caruana",
+  booktitle =    "Proceedings of the 1993 Connectionist Models Summer
+                 School",
+  title =        "Multitask Connectionist Learning",
+  pages =        "372--379",
+  year =         "1993",
+}
+  %url =          "http://citeseer.ist.psu.edu/32984.html",
+
+@InProceedings{caruana95,
+  author =       "Rich Caruana",
+  editor =       NIPS7ed,
+  booktitle =    NIPS7,
+  title =        "Learning Many Related Tasks at the Same Time With
+                 Backpropagation",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  pages =        "657--664",
+  year =         "1995",
+}
+
+@InProceedings{caruana96,
+  author =       "Rich Caruana and Shumeet Baluja and Tom Mitchell",
+  editor =       NIPS8ed,
+  booktitle =    NIPS8,
+  title =        "Using the Future to ``Sort Out'' the Present: Rankprop
+                 and Multitask Learning for Medical Risk Evaluation",
+  publisher =    "",
+  address =      "",
+  pages =        "",
+  year =         "1996",
+}
+
+@InProceedings{caruana96c,
+  author =       "Rich Caruana",
+  booktitle =    "International Conference on Machine Learning",
+  title =        "Algorithms and Applications for Multitask Learning",
+  pages =        "87--95",
+  year =         "1996",
+}
+
+@Article{caruana97a,
+  author =       "Rich Caruana",
+  title =        "Multitask Learning",
+  journal =      "Machine Learning",
+  volume =       "28",
+  number =       "1",
+  publisher =    "Kluwer Academic Publishers",
+  address =      "Hingham, MA, USA",
+  pages =        "41--75",
+  year =         "1997",
+}
+
+@Article{Casdagli89,
+  author =       "M. Casdagli",
+  title =        "Nonlinear Prediction of Chaotic Time Series",
+  journal =      physicaD,
+  volume =       "35",
+  pages =        "335--356",
+  year =         "1989",
+}
+
+@book{Casella+Berger-2001,
+ author = {George Casella and Roger Berger},
+ title = {Statistical Inference},
+ publisher = {Duxbury Press},
+ year = 2001,
+}
+
+
+@Article{Cashman+Pouliot90,
+  author =       "N. R. Cashman and Y. Pouliot",
+  title =        "{EBV} {Ig}-like domains",
+  journal =      "Nature",
+  volume =       "343",
+  pages =        "319",
+  year =         "1990",
+}
+
+@ARTICLE{CataltepeZ1999,
+    author = {Zehra Cataltepe and Yaser S. Abu-mostafa and Malik Magdon-ismail},
+    title = {No free lunch for early stopping},
+    journal = {Neural Computation},
+    year = {1999},
+    volume = {11},
+    pages = {995--1009}
+}
+
+@InProceedings{Cater87,
+  author =       "J. P. Cater",
+  editor =       "M. Caudill and C. Butler",
+  booktitle =    icnn,
+  title =        "Successfully Using Peak Learning Rates of 10 (and
+                 Greater) in Back-Propagation Networks with the
+                 Heuristic Learning Algorithm",
+  volume =       "2",
+  publisher =    "IEEE, New York",
+  address =      "San Diego 1987",
+  pages =        "645--651",
+  year =         "1987",
+}
+
+@Book{Caudill89,
+  author =       "M. Caudill",
+  title =        "Neural Networks Primer",
+  publisher =    "Miller Freeman",
+  address =      "San Francisco",
+  year =         "1989",
+}
+
+@Manual{CC01a,
+  author =       "Chih-Chung Chang and Chih-Jen Lin",
+  title =        "{LIBSVM}: a library for support vector machines",
+  year =         "2001",
+  note =         "Software available at
+                 \verb+http://www.csie.ntu.edu.tw/~cjlin/libsvm+",
+}
+
+@Article{cemgil+kappen+barber-2006,
+  author =       "A. T. Cemgil and H. J. Kappen and D. Barber",
+  title =        "A Generative Model for Music Transcription",
+  journal =      "IEEE Transactions on Audio, Speech and Language
+                 Processing",
+  volume =       "14",
+  number =       "2",
+  pages =        "679--694",
+  year =         "2006",
+}
+
+@inproceedings{Cevikalp+al-2008,
+    title = {Semi-Supervised Dimensionality Reduction Using Pairwise Equivalence Constraints},
+    author = {Hakan Cevikalp and Jakob J. Verbeek and Frédéric Jurie and Alexander Kläser},
+    booktitle = {VISAPP},
+    editor = {Alpesh Ranchordas and Helder Araújo},
+    pages = {489-496},
+    publisher = {INSTICC - Institute for Systems and Technologies of Information, Control and Communication},
+    url = {http://dblp.uni-trier.de/db/conf/visapp/visapp2008-1.html#CevikalpVJK08},
+    year = {2008},
+    biburl = {http://www.bibsonomy.org/bibtex/21afc498c02543e97ff5bd4f6b107e16e/dblp},
+    description = {dblp},
+    isbn = {978-989-8111-21-0},
+    date = {2008-04-07},
+    keywords = {dblp }
+}
+
+@InProceedings{CGY96,
+    author =       "Ingemar J. Cox and Joumana Ghosn and Peter N.
+                 Yianilos",
+  booktitle =    cvpr96,
+  title =        "Feature-Based Face Recognition Using
+                 Mixture-Distance",
+  pages =        "209--216",
+  year =         "1996",
+}
+
+@Article{CHAID-BVS-91,
+  author =       "D. Biggs and B. Ville and E. Suen",
+  title =        "A method of choosing multiway partitions for
+                 classification and decision trees",
+  journal =      "Journal of Applied Statistics",
+  volume =       "18",
+  number =       "1",
+  pages =        "49--62",
+  year =         "1991",
+}
+
+@InBook{CHAID-HK-82,
+  author =       "D. M. Hawkins and G. V. Kass",
+  booktitle =    "Topics in Applied Multivariate Analysis",
+  title =        "Automatic Interaction Detection",
+  publisher =    "Cambridge, Cambridge University Press",
+  pages =        "269--302",
+  year =         "1982",
+}
+
+@Article{CHAID-original-80,
+  author =       "G. V. Kass",
+  title =        "An Exploratory Technique for Investigating Large
+                 Quantities of Categorical Data",
+  journal =      "Applied Statistics",
+  volume =       "29",
+  number =       "2",
+  pages =        "119--127",
+  year =         "1980",
+}
+
+@InProceedings{Chapados2002,
+  author =       "N. Chapados and Y. Bengio and P. Vincent and J. Ghosn
+                 and C. Dugas and I. Takeuchi and L. Meng",
+  editor =       NIPS14ed,
+  booktitle =    NIPS14,
+  title =        "Estimating Car Insurance Premia: a Case Study in
+                 High-Dimensional Data Inference",
+  publisher =    "{MIT} Press",
+  address =      "Cambridge, MA",
+  pages =        "1369--1376",
+  year =         "2002",
+}
+
+@InProceedings{Chapados2002-short,
+  author =       "N. Chapados and Y. Bengio and P. Vincent and J. Ghosn
+                 and C. Dugas and I. Takeuchi and L. Meng",
+  booktitle =    NIPS14,
+  title =        "Estimating Car Insurance Premia: a Case Study in
+                 High-Dimensional Data Inference",
+  publisher =    "{MIT} Press",
+  year =         "2002",
+}
+
+@InProceedings{Chapelle+al-2003,
+  author =       "O. Chapelle and J. Weston and B. Sch{\"o}lkopf",
+  editor =       NIPS15ed,
+  booktitle =    NIPS15,
+  title =        "Cluster kernels for semi-supervised learning",
+  publisher =    "{MIT} Press",
+  address =      "Cambridge, MA",
+  pages =         {585--592},
+  year =         "2003",
+}
+
+@InProceedings{Chapelle-nips2003,
+  author =       "O. Chapelle and B. Sch{\"o}lkopf and J. Weston",
+  editor =       NIPS15ed,
+  booktitle =    NIPS15,
+  title =        "Semi-supervised learning through principal directions
+                 estimation",
+  publisher =    "{MIT} Press",
+  year =         "2003",
+}
+
+@InProceedings{Chapelle2001,
+  author =       "Olivier Chapelle and Jason Weston and L\'eon Bottou
+                 and Vladimir Vapnik",
+  editor =       NIPS13ed,
+  booktitle =    NIPS13,
+  title =        "Vicinal Risk Minimization",
+  pages =        "416--422",
+  year =         "2001",
+}
+
+@InProceedings{chapelle2001iin,
+  author =       "O. Chapelle and B. Scholkopf",
+  title =        "{Incorporating invariances in nonlinear support vector
+                 machines}",
+  editor =       NIPS14ed,
+  booktitle =    NIPS14,
+  volume =       "14",
+  year =         "2001",
+}
+
+@Article{Chapelle99,
+  author =       "O. Chapelle and P. Haffner and V. Vapnik",
+  title =        "{SVM}s for Histogram-Based Image Classification",
+  journal =      "IEEE Transactions on Neural Networks",
+  year =         "1999",
+  note =         "accepted, special issue on Support Vectors",
+}
+
+@Article{ChapelleVapnikBengio2001,
+  author =       "O. Chapelle and V. Vapnik and Y. Bengio",
+  title =        "Model Selection for Small-Sample Regression",
+  journal =      "Machine Learning Journal",
+  volume =       "48",
+  number =       "1",
+  pages =        "9--23",
+  year =         "2002",
+}
+
+@inproceedings{Willski-2002,
+  author =       "A.S. Willsky",
+  title =        "Multiresolution {Markov} models for signal and image processing",
+  booktitle =    "Proceedings of the IEEE",
+  volume =       "90",
+  number =       "8",
+  pages =        "1396--1458",
+  year =         "2002",
+}
+
+@Article{Felzenszwalb+Huttenlocher-2004,
+  author =       "Pedro F. Felzenszwalb and Daniel P. Huttenlocher",
+  title =        "Efficient Graph-Based Image Segmentation",
+  journal =      "Intl. Journal of Computer Vision",
+  volume =       "59",
+  number =       "2",
+  pages =        "167-181",
+  year =         "2004",
+}
+
+@inproceedings{Lombaert-2005,
+  author =       "Herve Lombaert and Yiyong Sun and Leo Grady and Chenyang Xu",
+  title =        "A Multilevel Banded Graph Cuts Method for Fast Image Segmentation",
+  booktitle =    ICCV05,
+  volume =       "1",
+  pages =        "259-265",
+  year =         "2005",
+}
+
+@Article{Boykov+Kolmogorov-2004,
+  author =       "Y. Boykov and V. Kolmogorov",
+  title =        "An experimental comparison of min-cut/max-flow algorithms for energy minimization in vision",
+  journal =      ieeetpami,
+  volume =       "26",
+  number =       "9",
+  pages =        "1124-1137",
+  year =         "2004",
+}
+
+@inproceedings{chapelleetal06,
+author = "Chapelle, O. and Chi, M. and Zien, A.",
+title = "A continuation method for semi-supervised {SVMs}",
+booktitle = ICML06,
+editor =    ICML06ed,
+publisher = ICML06publ,
+year = 2006,
+}
+
+@inproceedings{ChapelleO2005,
+   author = {Olivier Chapelle and Alexander Zien},
+   title = {Semi-Supervised Classification by Low Density Separation},
+   year = {2005},
+   pages = {57-64},
+   month = {01},
+   journal = {Proceedings of the Tenth International Workshop on Artificial Intelligence and Statistics (AISTATS 2005)},
+   editor = {Cowell, R. , Z. Ghahramani},
+   booktitle = {Tenth International Workshop on Artificial Intelligence and Statistics},
+   location = {Barbados},
+}
+   %URL = {http://www.gatsby.ucl.ac.uk/aistats/aistats2005_eproc.pdf}
+
+@book{Chapelle-2006,
+ author = {Olivier Chapelle and Bernhard Sch{\"o}lkopf and Alexander Zien},
+ title =    "Semi-Supervised Learning",
+ publisher =    "{MIT} Press",
+ year =         "2006",
+}
+
+@TechReport{Charniak99,
+  author =       "Eugene Charniak",
+  title =        "A Maximum-Entropy-Inspired Parser",
+  number =       "CS-99-12",
+  institution =  "Brown University",
+  year =         "1999",
+  URL =          "citeseer.nj.nec.com/charniak99maximumentropyinspired.html",
+}
+
+@misc{Chatpatanasiri-2008,
+    author = {Ratthachat Chatpatanasiri},
+    title = {Spectral Methods for Linear and Non-Linear Semi-Supervised Dimensionality Reduction},
+    url = {http://www.citebase.org/abstract?id=oai:arXiv.org:0804.0924},
+    year = {2008},
+    note = {Submitted for publication},
+}
+
+@InProceedings{Chauvin89,
+  author =       "Y. Chauvin",
+  editor =       NIPS1ed,
+  booktitle =    NIPS1,
+  title =        "A Back-Propagation Algorithm with Optimal Use of
+                 Hidden Units",
+  publisher =    "Morgan Kaufmann, San Mateo",
+  address =      "Denver, CO",
+  pages =        "519--526",
+  year =         "1989",
+}
+
+@InProceedings{Chauvin90,
+  author =       "Y. Chauvin",
+  editor =       NIPS2ed,
+  booktitle =    NIPS2,
+  title =        "Dynamic behavior of constrained back-propagation
+                 networks",
+  publisher =    "Morgan Kaufmann",
+  address =      "Denver, CO",
+  pages =        "642--649",
+  year =         "1990",
+}
+
+@InProceedings{Cheeseman88,
+  author =       "P. Cheeseman and J. Kelly and M. Self and J. Stutz and
+                 W. Taylor and D. Freeman",
+  booktitle =    "Proceedings of the Fifth International Conference on
+                 Machine Learning",
+  title =        "{AutoClass}: {A} {Bayesian} Classification System",
+  address =      "The University of Michigan, Ann Arbor",
+  month =        jun,
+  year =         "1988",
+}
+
+@Article{Chelba-Jelinek-2000,
+  author =       "Ciprian Chelba and Frederick Jelinek",
+  title =        "Structured Language Modeling",
+  journal =      "Computer, Speech and Language",
+  volume =       "14",
+  number =       "4",
+  pages =        "282--332",
+  year =         "2000",
+}
+
+@Article{Chen+Goodman99,
+  author =       "Stanley F. Chen and Joshua T. Goodman.",
+  title =        "An Empirical Study of Smoothing Techniques for
+                 Language Modeling",
+  journal =      "Computer, Speech and Language",
+  volume =       "13",
+  number =       "4",
+  pages =        "359--393",
+  year =         "1999",
+}
+
+@Article{Chen+Murray2003,
+  author =       "Hsin Chen and Alan F. Murray",
+  title =        "A Continuous Restricted {Boltzmann} Machine with an
+                 Implementable Training Algorithm",
+  journal =      "IEE Proceedings of Vision, Image and Signal
+                 Processing",
+  volume =       "150",
+  number =       "3",
+  pages =        "153--158",
+  year =         "2003",
+}
+
+@PhdThesis{chen95basispursuit,
+  author =       "S. Chen",
+  title =        "Basis Pursuit",
+  school =       "Department of Statistics, Stanford University",
+  year =         "1995",
+}
+
+@TechReport{Chen98,
+  author =       "Stanley F. Chen and Joshua T. Goodman.",
+  title =        "An Empirical Study of Smoothing Techniques for
+                 Language Modeling",
+  number =       "TR-10-98",
+  institution =  "Computer Science Group, Harvard University",
+  year =         "1998",
+}
+
+@Article{ChenS2000,
+  author =       "Stanley F. Chen and Ronald Rosenfeld",
+  title =        "A Survey of Smoothing Techniques fo {ME} Models",
+  journal =      "IEEE Transactions on Speech and Audio Processing",
+  volume =       "8",
+  number =       "1",
+  month =        jan,
+  year =         "2000",
+}
+
+@techreport{Chen+Kotani-2005,
+  author =      "Chen, Fan and Kotani, Kazunori",
+  title =       "Facial Expression Recognition by Supervised {ICA} with Selective Prior",
+  ISSN =        "09135685",
+  institution = "The Institute of Electronics, Information and Communication Engineers",
+  year =        "2005",
+  number =      "462",
+  pages =       "27-32",
+  URL =         "http://ci.nii.ac.jp/naid/110004064718/en/",
+}
+
+@Article{ChenX1989,
+  author={Chen, X. R. and Krishnaiah, P. R. and Liang, W. W.},
+  title={Estimation of multivariate binary density using orthogonal functions},
+  journal={Journal of Multivariate Analysis},
+  year=1989,
+  volume={31},
+  number={2},
+  pages={178-186},
+  month={November},
+}
+
+@InProceedings{Chigier88,
+  author =       "B. Chigier and R. A. Brennan",
+  booktitle =    icassp,
+  title =        "Broad Class Network Generation Using a Combination of
+                 Rules and Statistics for Speaker Independent Continuous
+                 Speech",
+  address =      "New York, NY",
+  pages =        "449--452",
+  year =         "1988",
+}
+
+@InCollection{Chipman-NIPS2006,
+  author =       "H. A. Chipman and E. I. George and R. E. McCulloch",
+  editor =       NIPS19ed,
+  booktitle =    NIPS19,
+  title =        "Bayesian Ensemble Learning",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  year =         "2007",
+}
+
+@article{Chipman-2008,
+  author =       "H. A. Chipman and E. I. George and R. E. McCulloch",
+  title =        "Bayesian Ensemble Learning",
+  journal = "Annals of Applied Statistics",
+  year =         "2008",
+  editors =      "under revision",
+}
+
+@InProceedings{ChopraS2005,
+  author =       "Sumit Chopra and Raia Hadsell and Yann {LeCun}",
+  booktitle =    cvpr05,
+  title =        "Learning a Similarity Metric Discriminatively, with
+                 Application to Face Verification",
+  publisher =    "IEEE Press",
+  year =         "2005",
+  original =     "orig/chopra-05.ps.gz",
+}
+
+@InProceedings{Choueka-1998,
+  author =       "Y. Choueka",
+  booktitle =    "RIAO 88, User-oriented Content-based Text and Image
+                 Handling",
+  title =        "Looking for needles in a haystack",
+  volume =       "1",
+  pages =        "609--623",
+  year =         "1988",
+}
+
+@Article{Chow62,
+  author =       "C. K. Chow",
+  title =        "A recognition method using neighbor dependence",
+  journal =      "IRE Trans. Elec. Comp.",
+  volume =       "EC-11",
+  pages =        "683--690",
+  month =        oct,
+  year =         "1962",
+}
+
+@InProceedings{Chrisman92AAAI,
+  author =       "Lonnie Chrisman",
+  booktitle =    AAAI-92,
+  title =        "Reinforcement Learning with Perceptual Aliasing: The
+                 Perceptual Distinctions Approach",
+  pages =        "183--188",
+  year =         "1992",
+}
+
+@InProceedings{Chung+al-1998,
+  author =       "Yi-Ming Chung and William M. Pottenger and Bruce R.
+                 Schatz",
+  booktitle =    "DL '98: Proceedings of the third ACM conference on
+                 Digital libraries",
+  title =        "Automatic subject indexing using an associative neural
+                 network",
+  publisher =    "ACM Press",
+  address =      "New York, NY, USA",
+  pages =        "59--68",
+  year =         "1998",
+  ISBN =         "0-89791-965-3",
+  location =     "Pittsburgh, Pennsylvania, United States",
+}
+
+@InProceedings{Chung-97,
+  author =       "F. Chung",
+  booktitle =    "{CBMS} Regional Conference Series",
+  title =        "Spectral graph theory",
+  volume =       "92",
+  publisher =    "American Mathematical Society",
+  year =         "1997",
+}
+
+@Article{Churchill89,
+  author =       "G. A. Churchill",
+  title =        "A stochastic model for heterogeneous {DNA} sequences",
+  journal =      "Bull. Mathematical Biology",
+  volume =       "51",
+  pages =        "79--94",
+  year =         "1989",
+}
+
+@Book{Chvatal83,
+  author =       "V. Chv\'atal",
+  title =        "Linear Programming",
+  publisher =    "W. H. Freeman",
+  address =      "",
+  year =         "1983",
+}
+
+@Article{Cleeremans89,
+  author =       "A. Cleeremans and D. Servan-Schreiber and J. L.
+                 McClelland",
+  title =        "Finite State Automata and Simple Recurrent Networks",
+  journal =      nc,
+  volume =       "1",
+  pages =        "372--381",
+  year =         "1989",
+}
+
+@InCollection{Clifford-1990,
+  author = {Peter Clifford},
+  title = {Markov random Fields in statistics}, 
+  editor = {Geoffrey Grimmett and Dominic Welsh}, 
+  booktitle = {Disorder in Physical Systems: A Volume in Honour
+of John M. Hammersley}, 
+  pages = {19--32}, 
+  publisher = {Oxford University Press}, 
+  year = 1990,
+}
+
+@Book{CLM,
+  author =       "J. Y. Campbell and A. W. Lo and A. C. MacKinlay",
+  title =        "The Econometrics of Financial Markets",
+  publisher =    "Princeton University Press",
+  address =      "Princeton",
+  year =         "1997",
+}
+
+@Book{CND04,
+  author =       "{Congr\'egation de Notre-Dame}",
+  title =        "La cuisine raisonnée",
+  publisher =    "Fides",
+  year =         "2004",
+  ISBN =         "2-7621-2083-7",
+}
+
+@InProceedings{Cloutier96,
+  author =       "J. Cloutier and E. Cosatto and S. Pigeon and F. R.
+                 Boyer and P. Y. Simard",
+  booktitle =    "Fifth International Conference on Microelectronics for
+                 Neural Networks and Fuzzy Systems",
+  title =        "{VIP}: and {FPGA}-based processor for image processing
+                 and neural networks",
+  year =         "1996",
+  note =         "submitted",
+}
+
+@Manual{CMFortran,
+  author =       "",
+  key =          "TMC",
+  title =        "{CM} Fortran. Programming Guide",
+  organization = "Thinking Machine Corporation",
+  address =      "Cambridge, MA",
+  edition =      "1.1",
+  month =        jan,
+  year =         "1991",
+}
+
+@Article{Cohen83,
+  author =       "M. A. Cohen and S. Grossberg",
+  title =        "Absolute Stability of Global Pattern Formation and
+                 Parallel Memory Storage by Competitive Neural
+                 Networks",
+  journal =      ieeesmc,
+  volume =       "13",
+  pages =        "815--826",
+  year =         "1983",
+}
+
+@Article{Cohen86,
+  author =       "M. S. Cohen",
+  title =        "Design of a New Medium for Volume Holographic
+                 Information Processing",
+  journal =      applopt,
+  volume =       "25",
+  pages =        "2228--2294",
+  year =         "1986",
+}
+
+@Article{Cohen89,
+  author =       "J. R. Cohen",
+  title =        "Application of an auditory model to speech
+                 recognition",
+  journal =      "Journal of the Acoustical Society of America",
+  volume =       "85",
+  number =       "6",
+  pages =        "2623--2629",
+  year =         "1989",
+}
+
+@PhdThesis{Cohn-PhD,
+  author =       "D. Cohn",
+  title =        "Separating Formal Bounds from Practical Performance in
+                 Learning Systems",
+  school =       "University of Washington",
+  year =         "1992",
+}
+
+@InProceedings{Cohn95,
+  author =       "David Cohn and Zoubin Ghahramani and Michael I.
+                 Jordan",
+  editor =       NIPS7ed,
+  booktitle =    NIPS7,
+  title =        "Active learning with statistical models",
+  publisher =    "Cambridge MA: MIT Press",
+  year =         "1995",
+  pages = {705--712}
+}
+
+@InProceedings{Cohn95-small,
+  author =       "David Cohn and Zoubin Ghahramani and Michael I.
+                 Jordan",
+  editor =       NIPS7ed,
+  booktitle =    "Advances in NIPS 7",
+  title =        "Active learning with statistical models",
+  publisher =    "Cambridge MA: MIT Press",
+  year =         "1995",
+}
+
+@InProceedings{Cohn95-short,
+  author =       "D. Cohn and Z. Ghahramani and M.I.
+                 Jordan",
+  booktitle =    "Adv. Neural Inf. Proc. Sys. 7",
+  title =        "Active learning with statistical models",
+  year =         "1995",
+  pages = {705--712}
+}
+
+@InProceedings{Cole+Hou88,
+  author =       "R. A. Cole and L. Hou",
+  booktitle =    icassp,
+  title =        "Segmentation and Broad Classification of Continuous
+                 Speech",
+  address =      "New York, NY",
+  pages =        "453--452",
+  year =         "1988",
+}
+
+@Book{Cole96,
+  author =       "R. A. Cole and J. Mariani and H. Uszkoriet and A.
+                 Zaenen and V. Zue",
+  title =        "Survey of the State of the Art in Human Language
+                 Technology",
+  publisher =    "Cambridge University Press",
+  address =      "http://www.cse.ogi.edu/CSLU/HLTsurvey/HLTsurvey.html",
+  year =         "1996",
+}
+
+@TechReport{Coleman+Wu-1994,
+  author =       "Thomas F. Coleman and Zhijun Wu",
+  title =        "Parallel continuation-based global optimization for
+                 molecular conformation and protein folding",
+  institution =  "Cornell University, Dept. of Computer Science",
+  year =         "1994",
+}
+
+@TechReport{Coleman+Wu-1994-short,
+  author =       "T.F. Coleman and Z. Wu",
+  title =        "Parallel continuation-based global optimization for
+                 molecular conformation and protein folding",
+  institution =  "Cornell University, Dept. of Computer Science",
+  year =         "1994",
+}
+
+@TechReport{Collins89,
+  author =       "S. {Collins, E. Ghosh} and C. Scofield",
+  title =        "An application of a multiple neural network learning
+                 system to emulation of mortgage underwriting
+                 judgements",
+  institution =  "Nestor Inc.",
+  address =      "Providence, RI",
+  year =         "1989",
+}
+
+@InProceedings{Collins96,
+  author =       "M. Collins",
+  booktitle =    "34th Annual Meeting of the {ACL}",
+  title =        "A new statistical parser based on bigram lexical
+                 dependencies",
+  pages =        "184--191",
+  year =         "1996",
+}
+
+@InProceedings{Collins97,
+  author =       "M. Collins",
+  booktitle =    "35th Annual Meeting of the {ACL}",
+  title =        "Three generative, lexicalized models for statistical
+                 parsing",
+  address =      "Madrid, Spain",
+  pages =        "16--23",
+  year =         "1997",
+}
+
+@PhdThesis{Collins99,
+  author =       "M. Collins",
+  title =        "Head-driven statistical models for natural language
+                 parsing",
+  school =       "University of Pennsylvania",
+  year =         "1999",
+}
+
+@InProceedings{Collobert-2006,
+  author =       "R. Collobert and F. Sinz and J. Weston and L. Bottou",
+  booktitle =    "Proceedings of the 23rd International Conference on
+                 Machine Learning",
+  title =        "Trading Convexity for Scalability",
+  pages =        "",
+  year =         "2006",
+}
+
+@PhdThesis{Collobert04,
+  author =       "R. Collobert",
+  title =        "Large Scale Machine Learning",
+  school =       "Universit\'e de Paris VI, LIP6",
+  year =         "2004",
+}
+
+@Article{Collobert2002,
+  author =       "R. Collobert and S. Bengio and Y. Bengio",
+  title =        "Parallel Mixture of {SVM}s for Very Large Scale
+                 Problems",
+  journal =      "Neural Computation",
+  volume =       "14",
+  number =       "5",
+  pages =        "1105--1114",
+  year =         "2002",
+}
+
+@InProceedings{Collobert2004,
+  author =       "Ronan Collobert and Samy Bengio",
+  booktitle =    ICML04,
+  editor =       ICML04ed,
+  publisher =    ICML04publ,
+  title =        "Links between perceptrons, {MLP}s and {SVM}s",
+  address =      "New York, NY, USA",
+  year =         "2004",
+  location =     "Banff, Alberta, Canada",
+  isbn =         "1-58113-828-5",
+  pages =        "23",
+  location =     "Banff, Alberta, Canada",
+  doi =          "http://doi.acm.org/10.1145/1015330.1015415",
+}
+
+@InProceedings{CollobertR2008,
+  author =       "Ronan Collobert and Jason Weston",
+  booktitle =    ICML08,
+  editor =       ICML08ed,
+  publisher =    ICML08publ,
+  title =        "A Unified Architecture for Natural Language
+                 Processing: Deep Neural Networks with Multitask
+                 Learning",
+  year =         "2008",
+  pages =       "160-167",
+}
+  %url =          "http://www.kyb.tuebingen.mpg.de/bs/people/weston/papers/unified\-nlp.pdf",
+
+@InProceedings{CollobertR2008-small,
+  author =       "R. Collobert and J. Weston",
+  booktitle =    "ICML 2008",
+  title =        "A Unified Architecture for Natural Language
+                 Processing: Deep Neural Networks with Multitask
+                 Learning",
+  year =         "2008",
+}
+
+@InProceedings{CollobertR2008-short,
+  author =       "R. Collobert and J. Weston",
+  booktitle =    "Int. Conf. Mach. Learn. 2008",
+  title =        "A Unified Architecture for Natural Language
+                 Processing: Deep Neural Networks with Multitask
+                 Learning",
+  pages =       "160-167",
+  year =         "2008",
+}
+
+@Article{Comon94,
+  author =       "Pierre Comon",
+  title =        "Independent component analysis - a new concept?",
+  journal =      "Signal Processing",
+  volume =       "36",
+  pages =        "287--314",
+  year =         "1994",
+}
+
+@InProceedings{ConfAI:Grove:linprog,
+  author =       "Adam J. Grove and Dale Schuurmans",
+  booktitle =    "Proceedings of the Fifteenth National Conference on
+                 Artificial Intelligence",
+  title =        "Boosting in the limit: Maximizing the margin of
+                 learned ensembles",
+  year =         "1998",
+}
+
+@InProceedings{ConfAI:Maclin:adaboost,
+  author =       "Richard Maclin and David Opitz",
+  booktitle =    "Proceedings of the Fourteenth National Conference on
+                 Artificial Intelligenc",
+  title =        "An empirical evaluation of Bagging and Boosting",
+  pages =        "546--551",
+  year =         "1997",
+}
+
+@InProceedings{ConfLT:Freund:gametheorie,
+  author =       "Yoav Freund and Robert E. Schapire",
+  booktitle =    "Proceedings of the Ninth Annual Conference on
+                 Computational Learning Theory",
+  title =        "Game theory, on-line prediction and Boosting",
+  pages =        "325--332",
+  year =         "1996",
+}
+
+@InProceedings{ConfML:Dietterich:adaboost+prun,
+  author =       "D. Margineantu and Thomas G. Dietterich",
+  booktitle =    "Machine Learning: Proceedings of Fourteenth
+                 International Conference",
+  title =        "Pruning Adaptive Boosting",
+  publisher =    "ACM",
+  pages =        "211--218",
+  year =         "1997",
+}
+
+@InProceedings{ConfML:Freund:AdaBoostCompar,
+  author =       "Yoav Freund and Robert E. Schapire",
+  booktitle =    "Machine Learning: Proceedings of Thirteenth
+                 International Conference",
+  title =        "Experiments with a new Boosting algorithm",
+  publisher =    "ACM",
+  address =      "USA",
+  pages =        "148--156",
+  year =         "1996",
+}
+
+@InProceedings{ConfML:Freund:margins,
+  author =       "Robert E. Schapire and Yoav Freund and Peter Bartlett
+                 and Wee Sun Lee",
+  booktitle =    "Machine Learning: Proceedings of Fourteenth
+                 International Conference",
+  title =        "Boosting the margin: {A} new explanation for the
+                 effectiveness of voting methods",
+  pages =        "322--330",
+  year =         "1997",
+}
+
+@InProceedings{ConfML:Quinlan:AdaBoost-C45,
+  author =       "J. Ross Quinlan",
+  booktitle =    "Machine Learning: Proceedings of the fourteenth
+                 International Conference",
+  title =        "Bagging, Boosting and {C4.5}",
+  pages =        "725--730",
+  year =         "1996",
+}
+
+@InProceedings{ConfML:Schapire:outputcodes,
+  author =       "Robert E. Schapire",
+  booktitle =    "Machine Learning: Proceedings of the Fourteenth
+                 International Conference",
+  title =        "Using output codes to boost multiclass learning
+                 problems",
+  year =         "1997",
+}
+
+@Article{Coolen88,
+  author =       "A. C. C. Coolen and C. C. A. M. Gielen",
+  title =        "Delays in Neural Networks",
+  journal =      eul,
+  volume =       "7",
+  pages =        "281--285",
+  year =         "1988",
+}
+
+@Book{cooper+meyer-1960,
+  author =       "Grosvenor Cooper And Leonard B. Meyer",
+  title =        "{The Rhythmic Structure of Music}",
+  publisher =    "The Univ. of Chicago Press",
+  address =      "Chicago",
+  year =         "1960",
+  keywords =     "describe, music",
+  origin =       "Kielian-Gilbert",
+  own =          "IU Library",
+}
+
+@InCollection{Cooper73,
+  author =       "L. N. Cooper",
+  editor =       "B. Lundqvist and S. Lundqvist",
+  booktitle =    "Collective Properties of Physical Systems",
+  title =        "A Possible Organization of Animal Memory and
+                 Learning",
+  publisher =    "Academic Press",
+  address =      "New York",
+  pages =        "252--264",
+  year =         "1973",
+}
+
+@InCollection{Cooper87,
+  author =       "C. L. Scofield and D. L. Reilly and C. Elbaum and L.
+                 N. Cooper",
+  booktitle =    "Conference on Neural Information Processing Systems -
+                 Natural and Synthetic",
+  title =        "Pattern class degeneracy in an unrestricted storage
+                 density memory",
+  publisher =    "IEEE",
+  year =         "1987",
+}
+
+@Article{Corana87,
+  author =       "A. Corana and M. Marchesi and C. Martini and S.
+                 Ridella",
+  title =        "Minimizing Multimodal Functions of Continuous
+                 Variables with the Simulated Annealing Algorithm",
+  journal =      acmtms,
+  volume =       "13",
+  number =       "13",
+  pages =        "262--280",
+  month =        sep,
+  year =         "1987",
+  OPTnote =      "",
+}
+
+@Article{Corana87a,
+  author =       "A. Corana and M. Marchesi and C. Martini and S.
+                 Ridella",
+  title =        "Minimizing Multimodal Functions of Continuous
+                 Variables with the Simulated Annealing Algorithm",
+  journal =      acmtms,
+  volume =       "13",
+  number =       "13",
+  pages =        "262--280",
+  month =        sep,
+  year =         "1987",
+}
+
+@Article{Cortes04,
+  author =       "C. Cortes and P. Haffner and M. Mohri",
+  title =        "Rational Kernels: Theory and Algorithms",
+  journal =      jmlr,
+  volume =       "5",
+  pages =        "1035--1062",
+  year =         "2004",
+  OPTnumber =    "",
+}
+
+@Article{Cortes87,
+  author =       "C. Cortes and A. Krogh and J. A. Hertz",
+  title =        "Hierarchical Associative Networks",
+  journal =      jpa,
+  volume =       "20",
+  pages =        "4449--4455",
+  year =         "1987",
+}
+
+@InProceedings{Cortes89,
+  author =       "C. Cortes and J. A. Hertz",
+  booktitle =    ijcnn,
+  title =        "A Network System for Image Segmentation",
+  volume =       "1",
+  publisher =    "IEEE, New York",
+  address =      "Washington 1989",
+  pages =        "121--127",
+  year =         "1989",
+}
+
+@Article{Cortes95,
+  author =       "Corinna Cortes and Vladimir Vapnik",
+  title =        "Support Vector Networks",
+  journal =      "Machine Learning",
+  volume =       "20",
+  pages =        "273--297",
+  year =         "1995",
+}
+
+@InProceedings{Cortesetal95a,
+  author =       "C. Cortes and H. Drucker and D. Hoover and V. Vapnik",
+  booktitle =    "Proc. 1st Intl. Conf. on Knowledge Discovery and Data
+                 Mining",
+  title =        "Capacity and Complexity Control in Predicting the
+                 Spread Between Borrowing and Lending Interest Rates",
+  address =      "Montreal (Canada)",
+  pages =        "51--56",
+  year =         "1995",
+}
+
+@InProceedings{Cortesetal95b,
+  author =       "C. Cortes and L. D. Jackel and W. P. Chiang",
+  booktitle =    "Proc. 1st Intl. Conf. on Knowledge Discovery and Data
+                 Mining",
+  title =        "Limits on Learning Machine Accuracy Imposed by Data
+                 Quality",
+  address =      "Montreal (Canada)",
+  pages =        "57--62",
+  year =         "1995",
+}
+
+@InProceedings{Cosi-92,
+  author =       "P. Cosi and P. Frasconi and M. Gori and N. Griggio",
+  booktitle =    "Proc. of the International Conference on Spoken
+                 Language",
+  title =        "Phonetic Recognition Experiments with Recurrent Neural
+                 Networks",
+  address =      "Banff (Canada)",
+  pages =        "1335--1338",
+  month =        oct,
+  year =         "1992",
+}
+
+@InProceedings{Cosnard+al-1991,
+  author =       "M. Cosnard and J. C. Mignot and H. Paugam-Moisy",
+  booktitle =    "Proceedings of the Second International Specialist
+                 Seminar on the Design and Application of Parallel
+                 Digital Processors, 1991",
+  title =        "Implementations of Multilayer Neural Networks on
+                 Parallel Architectures",
+  address =      "Lisbon",
+  pages =        "43--47",
+  month =        apr,
+  year =         "1991",
+}
+
+@Article{Cosslett85,
+  author =       "S. R. Cosslett and L-F. Lee",
+  title =        "Serial correlation in discrete variable models",
+  journal =      "Journal of Econometrics",
+  volume =       "27",
+  pages =        "79--97",
+  year =         "1985",
+}
+
+@Article{Cottrell86,
+  author =       "M. Cottrell and J. C. Fort",
+  title =        "A Stochastic Model of Retinotopy: {A} Self Organizing
+                 Process",
+  journal =      biocyb,
+  volume =       "53",
+  pages =        "405--411",
+  year =         "1986",
+}
+
+@InProceedings{Cottrell87,
+  author =       "Garrison W. Cottrell and Paul Munro and David Zipser",
+  booktitle =    "Ninth Annual Conference of the Cognitive Science
+                 Society",
+  title =        "Learning Internal Representations from Gray-Scale
+                 Images: An Example of Extensional Programming",
+  publisher =    "Lawrence Erlbaum, Hillsdale",
+  address =      "Seattle 1987",
+  pages =        "462--473",
+  year =         "1987",
+}
+
+@Book{Courant51,
+  author =       "A. Courant and D. Hilbert",
+  title =        "Methods of Mathematical Physics",
+  publisher =    "Wiley Interscience, New York",
+  year =         "1951",
+}
+
+@Article{Cover65,
+  author =       "T. M. Cover",
+  title =        "Geometrical and Statistical Properties of Systems of
+                 Linear Inequalities with Applications in Pattern
+                 Recognition",
+  journal =      ieeetec,
+  volume =       "14",
+  pages =        "326--334",
+  year =         "1965",
+}
+
+@Article{CoverHart67,
+  author =       "T. M. Cover and P. E. Hart",
+  title =        "Nearest Neighbor Pattern Classification",
+  journal =      "IEEE Transactions on Information Theory",
+  volume =       "13",
+  number =       "1",
+  pages =        "21--27",
+  year =         "1967",
+}
+
+@Article{Cowan88a,
+  author =       "J. D. Cowan and D. H. Sharp",
+  title =        "Neural Nets and Artificial Intelligence",
+  journal =      daed,
+  volume =       "117",
+  pages =        "85--121",
+  year =         "1988",
+}
+
+@Article{Cowan88b,
+  author =       "J. D. Cowan and D. H. Sharp",
+  title =        "Neural Nets",
+  journal =      qrb,
+  volume =       "21",
+  pages =        "365--427",
+  year =         "1988",
+}
+
+@InProceedings{Cox+Bridle89,
+  author =       "S. Cox and J. S. Bridle",
+  booktitle =    "Proc. IEEE Conf. on Acoustics, Speech and Signal
+                 Processing",
+  title =        "Unsupervised speaker adaptation by probabilistic
+                 spectrum fitting",
+  organization = "British Telecom and RSRE",
+  year =         "1989",
+}
+
+@InProceedings{Cox+Bridle90,
+  author =       "S. Cox and J. S. Bridle",
+  booktitle =    "Proc. IEEE Conf. on Acoustics, Speech and Signal
+                 Processing",
+  title =        "Simultaneous Speaker Normalisation and Utterance
+                 labelling Using {Bayesian}/Neural Net Techniques",
+  organization = "British Telecom and RSRE",
+  year =         "1990",
+}
+
+@Book{CoxCox94,
+  author =       "Trevor F. Cox and Micheal {A. A}. Cox",
+  title =        "Multidimensional Scaling",
+  publisher =    "Chapman \& Hall",
+  address =      "London",
+  year =         "1994",
+}
+
+@Book{Cox+Cox-2000,
+  author =       "T. Cox and M. Cox",
+  title =        "Multidimensional Scaling",
+  publisher =    "Chapman \& Hall",
+  edition =      2,
+  address =      "London",
+  year =         "2000",
+}
+
+@InProceedings{Cozman2003,
+  author =       "F. Cozman and I. Cohen and M. Cirelo",
+  booktitle =    ICML03,
+  editor =       ICML03ed,
+  publisher =    ICML03publ,
+  title =        "Semi-Supervised Learning of Mixture Models",
+  year =         "2003",
+}
+
+@Article{Cragg54,
+  author =       "B. G. Cragg and H. N. V. Temperley",
+  title =        "The Organization of Neurones: {A} Cooperative
+                 Analogy",
+  journal =      EEGCN,
+  volume =       "6",
+  pages =        "85--92",
+  year =         "1954",
+}
+
+@Article{Cragg55,
+  author =       "B. G. Cragg and H. N. V. Temperley",
+  title =        "Memory: The Analogy with Ferromagnetic Hysteresis",
+  journal =      brain,
+  volume =       "78 II",
+  pages =        "304--316",
+  year =         "1955",
+}
+
+@Article{Craven+Wahba79,
+  author =       "P. Craven and G. Wahba",
+  title =        "Smoothing noisy data with spline functions",
+  journal =      "Numerical Mathematics",
+  volume =       "31",
+  pages =        "377--403",
+  year =         "1979",
+}
+
+@Article{Crick89,
+  author =       "F. Crick",
+  title =        "The Recent Excitement About Neural Networks",
+  journal =      nature,
+  volume =       "337",
+  pages =        "129--132",
+  year =         "1989",
+}
+
+@Article{Crisanti86,
+  author =       "A. Crisanti and D. J. Amit and H. Gutfreund",
+  title =        "Saturation Level of the Hopfield Model for Neural
+                 Network",
+  journal =      eul,
+  volume =       "2",
+  pages =        "337--341",
+  year =         "1986",
+}
+
+@Article{Crisanti87,
+  author =       "A. Crisanti and H. Sompolinsky",
+  title =        "Dynamics of Spin Systems with Randomly Asymmetric
+                 Bonds: Langevin Dynamics and a Spherical Model",
+  journal =      prA,
+  volume =       "36",
+  pages =        "4922--4939",
+  year =         "1987",
+}
+
+@Book{Cristianini+Shawe-Taylor-2000,
+  author =       "Nello Cristianini and John Shawe-Taylor",
+  title =        "An Introduction to Support Vector Machines and other
+                 kernel-based learning methods",
+  publisher =    "Cambridge University Press",
+  address =      "Cambridge, UK",
+  year =         "2000",
+}
+
+@InProceedings{Cristianini-2002,
+  author =       "N. Cristianini and J. Shawe-Taylor and J. Kandola",
+  editor =       NIPS14ed,
+  booktitle =    NIPS14,
+  title =        "Spectral Kernel Methods for Clustering",
+  publisher =    "{MIT} Press",
+  address =      "Cambridge, MA",
+  year =         "2002",
+}
+
+@InProceedings{Cristianini02,
+  author =       "N. Cristianini and J. Shawe-Taylor and A. Elisseeff
+                 and J. Kandola",
+  title =        "On Kernel-Target Alignment",
+  editor =       NIPS14ed,
+  booktitle =    NIPS14,
+  volume =       "14",
+  pages =        "367--373",
+  year =         "2002",
+}
+
+@InProceedings{Cristianini2002,
+  author =       "N. Cristianini and J. Shawe-Taylor and J. Kandola",
+  editor =       NIPS14ed,
+  booktitle =    NIPS14,
+  title =        "Spectral Kernel Methods for Clustering",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  year =         "2002",
+  original =     "orig/AA16.ps",
+}
+
+@Article{cucker+grigoriev99,
+  author =       "Felipe Cucker and Dima Grigoriev",
+  title =        "Complexity Lower Bounds for Approximation Algebraic
+                 Computation Trees",
+  journal =      "Journal of Complexity",
+  volume =       "15",
+  number =       "4",
+  pages =        "499--512",
+  year =         "1999",
+}
+
+@TechReport{Cybenko88,
+  author =       "G. Cybenko",
+  title =        "Continuous Valued Neural Networks with Two Hidden
+                 Layers Are Sufficient",
+  institution =  "Department of Computer Science, Tufts University",
+  address =      "Medford, MA",
+  year =         "1988",
+}
+
+@Article{Cybenko89,
+  author =       "G. Cybenko",
+  title =        "Approximation by Superpositions of a Sigmoidal
+                 Function",
+  journal =      mcss,
+  volume =       "2",
+  pages =        "303--314",
+  year =         "1989",
+}
+
+@InProceedings{Dahmen2000,
+  author =       "J. Dahmen and D. Keysers and M. Pitz and H. Ney",
+  booktitle =    "22nd Symposium of the German Association for Pattern
+                 Recognition",
+  title =        "Structured covariance matrices for statistical image
+                 object recognition",
+  address =      "Kiel, Germany",
+  year =         "2000",
+}
+
+@InProceedings{Dai95,
+  author =       "H. Dai and J. M. Lina and B. Goulard and J. W. Thomson
+                 and C. K. Scott",
+  booktitle =    "1995 Robotic and Knowledge Based Sytems Workshop",
+  title =        "An Expert Diagnostic System Introducing Wavelets
+                 Analysis and Neural Network",
+  address =      "St. Hubert, Canada",
+  pages =        "",
+  year =         "1995",
+}
+
+@InProceedings{darken-moody91,
+  author =       "Christian Darken and John Moody",
+  editor =       NIPS3ed,
+  booktitle =    NIPS3,
+  title =        "Note on learning rate schedules for stochastic
+                 optimization",
+  publisher =    "Morgan Kaufmann, Palo Alto",
+  address =      "Denver, CO",
+  pages =        "832--838",
+  year =         "1991",
+}
+
+@Article{DarrochJ1972,
+  author =       "J. N. Darroch and D. Ratcliff",
+  title =        "Generalized iterative scaling for log-linear models",
+  journal =      "Annals of Mathematical Statistics",
+  number =       "43",
+  pages =        "1470--1480",
+  year =         "1972",
+}
+
+@InProceedings{Das-nips93,
+  author =       "S. Das and C. L. Giles and G. Z. Sun",
+  editor =       NIPS5ed,
+  booktitle =    NIPS5,
+  title =        "Using Prior Knowledge in an {NNPDA} to Learn
+                 Context-Free Languages",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo CA",
+  year =         "1993",
+}
+
+@InProceedings{Das-nips94,
+  author =       "S. Das and M. C. Mozer",
+  editor =       NIPS6ed,
+  booktitle =    NIPS6,
+  title =        "A Unified Gradient-Descent/Clustering Architecture for
+                 Finite State Machine Induction",
+  publisher =    "Morgan Kaufmann",
+  year =         "1994",
+}
+
+@Article{daubechies90,
+  author =       "Ingrid Daubechies",
+  title =        "The Wavelet Transform, Time-Frequency Localization and
+                 Signal Analysis",
+  journal =      "IEEE Transaction on Information Theory",
+  volume =       "36",
+  number =       "5",
+  pages =        "961--1005",
+  month =        sep,
+  year =         "1990",
+}
+
+@article{daume09searn,
+  author =       {Hal {Daum\'e III} and John Langford and Daniel Marcu},
+  title =        {Search-based Structured Prediction},
+  year =         {2009},
+  booktitle =    {Machine Learning Journal},
+}
+
+@InProceedings{Davis89,
+  author =       "L. Davis",
+  editor =       "J. D. Schaffer",
+  booktitle =    "Proceedings of the Third International Conference on
+                 Genetic Algorithms",
+  title =        "Mapping neural networks into classifier systems",
+  publisher =    "Morgan Kaufmann, San Mateo",
+  address =      "Arlington 1989",
+  pages =        "375--378",
+  year =         "1989",
+}
+
+@Article{davis94adaptive,
+  author =       "G. Davis and S. Mallat and Z. Zhang",
+  title =        "Adaptive time-frequency decompositions",
+  journal =      "Optical Engineering",
+  volume =       "33",
+  number =       "7",
+  pages =        "2183--2191",
+  month =        jul,
+  year =         "1994",
+}
+
+@InProceedings{Dayan93,
+  author =       "P. Dayan and G. E. Hinton",
+  editor =       NIPS5ed,
+  booktitle =    NIPS5,
+  title =        "Feudal Reinforcement Learning",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo, CA",
+  year =         "1993",
+}
+
+@Article{Dayan95,
+  author =       "Peter Dayan and Geoffrey E. Hinton and Radford Neal and
+                 Rich Zemel",
+  title =        "The {Helmholtz} machine",
+  journal =      "Neural Computation",
+  volume =       "7",
+  pages =        "889--904",
+  year =         "1995",
+}
+
+@inproceedings{debiecristianini03,
+author = "{de Bie}, T. and Cristianini, N.",
+title = "Convex methods for transduction",
+editor = NIPS16ed,
+booktitle = NIPS16,
+year = 2003,
+}
+
+@article{debiecristianini06,
+author = "{de Bie}, T. and Cristianini, N.",
+title = "Fast {SDP} relaxations of graph cut 
+clustering, transduction, and other combinatorial problems",
+journal = jmlr,
+volume = 7,
+year = 2006,
+}
+
+
+@TechReport{deRidder+Duin-2002,
+    author =       {Dick {de Ridder} and Robert P. W. Duin},
+    title =        {Locally linear embedding for classification},
+    number =       {PH-2002-01},
+    institution =  {Pattern Recognition Group, Dept. of Imaging Science and Technology,
+        Delft University of Technology},
+    address =      {Delft, The Netherlands},
+    year =         2002,
+}
+
+@inproceedings{deRidder+al-2003,
+    author    = {Dick {de Ridder} and Olga Kouropteva and Oleg Okun and Matti Pietik{\"a}inen and Robert P. W. Duin},
+    title     = {Supervised Locally Linear Embedding},
+    booktitle = {ICANN},
+    year      = {2003},
+    pages     = {333-341},
+    ee        = {http://springerlink.metapress.com/openurl.asp?genre=article&issn=0302-9743&volume=2714&spage=333},
+    bibsource = {DBLP, http://dblp.uni-trier.de}
+}
+
+@InProceedings{debollivier-gallinari-thiria-90,
+  author =       "M. deBollivier and P. Gallinari and S. Thiria",
+  booktitle =    "Proc. of the International Neural Network Conference
+                 90",
+  title =        "Multi-module neural networks for classification",
+  address =      "Paris",
+  pages =        "777--780",
+  year =         "1990",
+}
+
+@Article{Decoste-2002,
+  author =       "Dennis Decoste and Bernhard Sch{\"o}lkopf",
+  title =        "Training invariant support vector machines",
+  journal =      "Machine Learning",
+  volume =       "46",
+  pages =        "161--190",
+  year =         "2002",
+}
+
+@Article{Deerwester90,
+  author =       "S. Deerwester and S. T. Dumais and G. W. Furnas and T.
+                 K. Landauer and R. Harshman",
+  title =        "Indexing by latent semantic analysis",
+  journal =      "Journal of the American Society for Information
+                 Science",
+  volume =       "41",
+  number =       "6",
+  pages =        "391--407",
+  year =         "1990",
+}
+
+@Article{Dehaene87,
+  author =       "S. Dehaene and J.-P. Changeux and J.-P. Nadal",
+  title =        "Neural Networks That Learn Temporal Sequences by
+                 Selection",
+  journal =      PNAS,
+  volume =       "84",
+  pages =        "2727--2731",
+  year =         "1987",
+}
+
+@InProceedings{Delalleau+al-2005-short,
+  author =       "Olivier Delalleau and Yoshua Bengio and Nicolas {Le Roux}",
+  editor =       aistats05ed,
+  booktitle =    aistats05,
+  title =        "Efficient Non-Parametric Function Induction in
+                 Semi-Supervised Learning",
+  pages =        "96--103",
+  year =         "2005",
+}
+
+@InProceedings{DeLaTorreF2006,
+  author =       "Fernando De la Torre Frade and Takeo Kanade",
+  booktitle =    "International Conference on Machine Learning",
+  title =        "Discriminative Cluster Analysis",
+  volume =       "148",
+  publisher =    "ACM Press",
+  address =      "New York, NY, USA",
+  pages =        "241--248",
+  month =        jun,
+  year =         "2006",
+}
+
+@Article{Delgutte+Kiang84,
+  author =       "B. Delgutte and N. Y. S. Kiang",
+  title =        "Speech coding in the auditory nerve",
+  journal =      jasa,
+  volume =       "75",
+  number =       "3",
+  pages =        "866--907",
+  year =         "1984",
+}
+
+@Article{Delgutte80,
+  author =       "B. Delgutte",
+  title =        "Representation of speech-like sounds in the discharge
+                 patterns of auditory nerve fibers",
+  journal =      jasa,
+  volume =       "68",
+  number =       "3",
+  pages =        "843--857",
+  year =         "1980",
+}
+
+@Misc{delve,
+  author =       "C. Rasmussen and R. Neal and G. E. Hinton and D. van
+                 Camp and Z. Ghahramani and R. Kustra and R.
+                 Tibshirani",
+  title =        "The {DELVE} Manual",
+  year =         "1996",
+  note =         "{DELVE} can be found at
+                 http://www.cs.toronto.edu/\%7Edelve",
+}
+
+@InProceedings{DeMers+Cottrell93,
+  author =       "David DeMers and Garrison W. Cottrell",
+  editor =       NIPS5ed,
+  booktitle =    NIPS5,
+  title =        "Non-linear dimensionality reduction",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo CA",
+  pages =        "580--587",
+  year =         "1993",
+}
+
+@InProceedings{Demichelis89,
+  author =       "P. DeMichelis and L. Fissore and P. Laface and G.
+                 Micca and E. Piccolo",
+  booktitle =    icassp,
+  title =        "On the Use of Neural Networks for Speaker Independent
+                 Isolated Word Recognition",
+  address =      "Glaskow (Scotland)",
+  year =         "1989",
+}
+
+@InProceedings{DeMori+Palakal85,
+  author =       "R. De Mori and M. Palakal",
+  booktitle =    "Proc. Ninth International Joint Conference on
+                 Artificial Intelligence",
+  title =        "On the use of taxonomy of time-frequency morphologies
+                 for automatic speech recognition",
+  address =      "Los Angeles, CA",
+  pages =        "877--879",
+  year =         "1985",
+}
+
+@Article{DeMori85,
+  author =       "R. De Mori and P. Laface and Y. Mong",
+  title =        "Parallel algorithms for syllable recognition in
+                 continuous speech",
+  journal =      ieeetpami,
+  volume =       "7",
+  pages =        "56--69",
+  year =         "1985",
+}
+
+@Article{DeMori87,
+  author =       "R. De Mori and L. Lam and M. Gilloux",
+  title =        "Learning and plan refinement in a knowledge-based
+                 system for automatic speech recognition",
+  journal =      ieeetpami,
+  volume =       "2",
+  pages =        "289--305",
+  year =         "1987",
+}
+
+@InCollection{DeMori96,
+  author =       "R. {De Mori} and F. Brugnara",
+  editor =       "R. A. Cole and J. Mariani and H. Uszkoriet and A.
+                 Zaenen and V. Zue",
+  booktitle =    "Survey of the State of the Art in Human Language
+                 Technology",
+  title =        "{HMM} Methods in Speech Recognition",
+  publisher =    "Cambridge University Press",
+  address =      "http://www.cse.ogi.edu/CSLU/HLTsurvey/HLTsurvey.html",
+  pages =        "24--34",
+  year =         "1996",
+}
+
+@Article{Dempster77,
+  author =       "A. P. Dempster and N. M. Laird and D. B. Rubin",
+  title =        "Maximum-likelihood from incomplete data via the {EM}
+                 algorithm",
+  journal =      "Journal of Royal Statistical Society B",
+  volume =       "39",
+  pages =        "1--38",
+  year =         "1977",
+}
+
+@InProceedings{denker-lecun-93,
+  author =       "Yann {LeCun} and John S. Denker",
+  booktitle =    "IEEE Workshop on the Physics of Computation",
+  title =        "Natural versus Universal Probability Complexity, and
+                 Entropy",
+  publisher =    "IEEE",
+  pages =        "122--127",
+  year =         "1992",
+}
+
+@InProceedings{Denker86,
+  author =       "J. Denker",
+  editor =       "J. S. Denker",
+  booktitle =    snowbird,
+  title =        "Neural Network Refinements and Extensions",
+  publisher =    "American Institute of Physics, New York",
+  address =      "Snowbird 1986",
+  pages =        "121--128",
+  year =         "1986",
+}
+
+@Article{Denker87,
+  author =       "J. Denker and D. Schwartz and B. Wittner and S. Solla
+                 and R. Howard and L. Jackel and J. Hopfield",
+  title =        "Large Automatic Learning, Rule Extraction, and
+                 Generalization",
+  journal =      cs,
+  volume =       "1",
+  pages =        "877--922",
+  year =         "1987",
+}
+
+@InProceedings{Denker91,
+  author =       "J. S. Denker and Y. {LeCun}",
+  editor =       NIPS3ed,
+  booktitle =    NIPS3,
+  title =        "Transforming neural-net output levels to probability
+                 distributions",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo CA",
+  pages =        "853--859",
+  year =         "1991",
+}
+
+@InProceedings{Denker94,
+  author =       "J. Denker and C. J. C. Burges",
+  booktitle =    "The Mathematics of Generalization: Proceedings of the
+                 SFI/CNLS Workshop on Formal Approaches to Supervised
+                 Learning",
+  title =        "Image Segmentation and Recognition",
+  publisher =    "Addison Wesley, ISBN 0-201-40985-2",
+  year =         "1994",
+}
+
+@Article{Deprit89,
+  author =       "E. Deprit",
+  title =        "Implementing Recurrent Back-Propagation on the
+                 Connection Machine",
+  journal =      "Neural Networks",
+  volume =       "2",
+  number =       "4",
+  pages =        "295--314",
+  year =         "1989",
+}
+
+@ARTICLE{Derenyi94,
+   author = {{Der{\'e}nyi}, I. and {Geszti}, T. and {Gy{\"o}rgyi}, G.},
+    title = "{Generalization in the programed teaching of a perceptron}",
+  journal = {Physical Review {E}},
+     year = 1994,
+    month = "October",
+   volume = 50,
+    pages = {3192-3200},
+      doi = {10.1103/PhysRevE.50.3192},
+   adsurl = {http://adsabs.harvard.edu/abs/1994PhRvE..50.3192D},
+  adsnote = {Provided by the SAO/NASA Astrophysics Data System}
+}
+
+@Article{Derrida87,
+  author =       "B. Derrida and E. Gardner and A. Zippelius",
+  title =        "An Exactly Soluble Asymmetric Neural Network Model",
+  journal =      eul,
+  volume =       "4",
+  pages =        "167--173",
+  year =         "1987",
+}
+
+@TechReport{Derthick84,
+  author =       "M. Derthick",
+  title =        "Variations on the {Boltzmann} Machine",
+  number =       "CMU--CS--84--120",
+  institution =  "Department of Computer Science, Carnegie Mellon
+                 University",
+  address =      "Pittsburgh, PA",
+  year =         "1984",
+}
+
+@inproceedings{deSaV93,
+	address = {San Francisco, CA},
+	author = {de Sa, Virginia  R. },
+        editor = NIPS5ed,
+        booktitle = NIPS5,
+	citeulike-article-id = {350518},
+	keywords = {multiview, semisupervised},
+	pages = {112--119},
+	posted-at = {2008-08-12 16:46:39},
+	priority = {2},
+	publisher = {Morgan Kaufmann Publishers},
+	title = {Learning Classification with Unlabeled Data},
+	year = {1993}
+}	
+	%url = {http://citeseer.ist.psu.edu/desa94learning.html},
+
+@InProceedings{DeSieno88,
+  author =       "D. DeSieno",
+  booktitle =    icnn,
+  title =        "Adding a Conscience to Competitive Learning",
+  volume =       "1",
+  publisher =    "IEEE, New York",
+  address =      "San Diego 1988",
+  pages =        "117--124",
+  year =         "1988",
+}
+
+@InProceedings{DeSilva+Tenenbaum-2003,
+  author =       "V. {de Silva} and J. B. Tenenbaum",
+  editor =       NIPS15ed,
+  booktitle =    NIPS15,
+  title =        "Global Versus Local Methods in Nonlinear
+                 Dimensionality Reduction",
+  publisher =    "{MIT} Press",
+  address =      "Cambridge, MA",
+  pages =        "705--712",
+  year =         "2003",
+}
+
+@Book{Devaney89,
+  author =       "R. L. Devaney",
+  title =        "An Introduction to Chaotic Dynamical Systems",
+  publisher =    "Addison-Wesley",
+  year =         "1989",
+}
+
+@Article{Devereux84,
+  author =       "J. Devereux and P. Haeberli and O. Smithies",
+  title =        "A comprehensive set of sequence analysis programs for
+                 the {VAX}",
+  journal =      "Nucleic Acids Research",
+  volume =       "12",
+  pages =        "387--395",
+  year =         "1984",
+}
+
+@Book{Devijver82,
+  author =       "P. A. Devijver and J. Kittler",
+  title =        "Pattern Recognition: {A} Statistical Approach",
+  publisher =    "Prentice-Hall",
+  address =      "London",
+  year =         "1982",
+}
+
+@Article{Devijver87,
+  author =       "J. Voisin and P. A. Devijver",
+  title =        "An application of the multiedit-condensing technique
+                 to the reference selection problem in a print
+                 recognition system",
+  journal =      "Pattern Recognition",
+  volume =       "20",
+  number =       "5",
+  pages =        "465--474",
+  year =         "1987",
+}
+
+@Article{deVries92,
+  author =       "B. \mbox{de Vries} and J. C. Principe",
+  title =        "The gamma model -- {A} new neural net model for
+                 temporal processing",
+  journal =      nn,
+  volume =       "5",
+  pages =        "565--576",
+  year =         "1992",
+}
+
+@Book{Devroye-book96,
+  author =       "L. Devroye and L. Gyröfi and G. Lugosi",
+  title =        "A Probabilistic Theory of Pattern Recognition",
+  publisher =    "Springer-Verlag",
+  year =         "1996",
+}
+
+@Article{Devroye88,
+  author =       "Luc Devroye",
+  title =        "Automatic Pattern Recognition: {A} Study of the
+                 Probability of Error",
+  journal =      "IEEE Transactions on Pattern Analysis and Machine
+                 Intelligence",
+  volume =       "10",
+  number =       "4",
+  pages =        "530--543",
+  month =        jul,
+  year =         "1988",
+}
+
+@Book{Diamantras-96,
+  author =       "K. I. Diamantras and S. Y. Kung",
+  title =        "Principal Component Neural Networks: theory and applications",
+  publisher =    "Wiley",
+  year =         "1996",
+}
+
+@Article{Diebold+Mariano95,
+  author =       "F. X. Diebold and R. S. Mariano",
+  title =        "Comparing Predictive Accuracy",
+  journal =      "Journal of Business and Economic Statistics",
+  volume =       "13",
+  number =       "3",
+  pages =        "253--263",
+  year =         "1995",
+}
+
+@InCollection{Diebold93,
+  author =       "F. X. Diebold and J. H. Lee and G. C. Weinbach",
+  editor =       "C. Hargreaves",
+  booktitle =    "Nonstationary Time Series Analysis and Cointegration",
+  title =        "Regime switching with time-varying transition
+                 probabilities",
+  publisher =    "Oxford University Press",
+  address =      "Oxford",
+  year =         "1993",
+}
+
+@InCollection{Diebold93b,
+  author =       "F. X. Diebold and G. Rudebusch and E. Sichel",
+  editor =       "J. H. Stock and M. W. Watson",
+  booktitle =    "Business Cycles, Indicators, and Forecasting",
+  title =        "Further evidence on business-cycle duration
+                 dependence",
+  publisher =    "University of Chicago Press",
+  address =      "Chicago",
+  year =         "1993",
+}
+
+@Article{DieboldKilian,
+  author =       "F. X. Diebold and L. Kilian",
+  title =        "Measuring Predictability:Theory and Macroeconomics
+                 Applications",
+  journal =      "NBER technical working paper",
+  volume =       "213",
+  year =         "1997",
+}
+
+@InCollection{DieboldLopez,
+  author =       "F. X. Diebold and J. A. Lopez",
+  editor =       "G. S. Maddala and C. R. Rao",
+  booktitle =    "Handbook of Statistics, Vol. 14",
+  title =        "Forecast Evaluation and Combination",
+  publisher =    "Elsevier Science",
+  pages =        "241--268",
+  year =         "1996",
+}
+
+@Article{Diederich87,
+  author =       "S. Diederich and M. Opper",
+  title =        "Learning of Correlated Patterns in Spin-Glass Networks
+                 by Local Learning Rules",
+  journal =      prl,
+  volume =       "58",
+  pages =        "949--952",
+  year =         "1987",
+}
+
+@InProceedings{Diegert90,
+  author =       "C. Diegert",
+  booktitle =    "Proceedings of IEEE-IJCNN90",
+  title =        "Out-of-core Backpropagation",
+  volume =       "II",
+  address =      "San Diego, CA",
+  pages =        "97--103",
+  year =         "1990",
+}
+
+@Article{dietterich,
+  author =       "T. G. Dietterich",
+  title =        "Approximate Statistical Tests for Comparing Supervised
+                 Classification Learning Algorithms",
+  journal =      "Neural Computation",
+  volume =       "10",
+  number =       "7",
+  pages =        "1895--1924",
+  year =         "1998",
+}
+
+@Article{Dietterich1998,
+  author =       "Thomas G. Dietterich",
+  title =        "Approximate Statistical Test For Comparing Supervised
+                 Classification Learning Algorithms",
+  journal =      "Neural Computation",
+  volume =       "10",
+  number =       "7",
+  pages =        "1895--1923",
+  year =         "1998",
+  URL =          "citeseer.ist.psu.edu/dietterich98approximate.html",
+}
+
+@Article{dietterich97,
+  author =       "Thomas G. Dietterich and Richard H. Lathrop and Tomas
+                 Lozano-Perez",
+  title =        "Solving the Multiple Instance Problem with
+                 Axis-Parallel Rectangles",
+  journal =      "Artificial Intelligence",
+  volume =       "89",
+  number =       "1-2",
+  pages =        "31--71",
+  year =         "1997",
+}
+
+
+@Article{Diggle+Gratton-1984,
+  author =       "P. Diggle and R. Gratton",
+  title =        "Monte Carlo Methods of Inference for Implicit Statistical Models",
+  journal =      "Journal of the Royal Statistical Society. Series B (Methodological)",
+  volume =       "46",
+  number =       "2",
+  pages =        "193--227",
+  year =         "1984",
+  publisher =    "Blackwell Publishing for the Royal Statistical Society",
+}
+
+
+@InCollection{Doi-2006,
+  author =       "Eizaburo Doi and Doru C. Balcan and Michael S.
+                 Lewicki",
+  editor =       NIPS18ed,
+  booktitle =    NIPS18,
+  title =        "A Theoretical Analysis of Robust Coding over Noisy
+                 Overcomplete Channels",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  pages =        "307--314",
+  year =         "2006",
+}
+
+@InProceedings{DoiE2007,
+  author =       "Eizaburo Doi and Michael S. Lewicki",
+  editor =       NIPS19ed,
+  booktitle =    NIPS19,
+  title =        "A Theory of Retinal Population Coding.",
+  publisher =    "MIT Press",
+  pages =        "353--360",
+  year =         "2007",
+}
+
+@book{Doidge-2007,
+    author = {Doidge, Norman},
+    howpublished = {Paperback},
+    isbn = {0143113100},
+    month = {December},
+    publisher = {Penguin Group},
+    title = {The Brain That Changes Itself: Stories of Personal Triumph from the Frontiers of Brain Science},
+    year = {2007}
+}
+
+@InCollection{DollarP2007,
+  author =       "Piotr Doll\'ar and Serge Belongie and Vincent Rabaud",
+  editor =       NIPS19ed,
+  booktitle =    NIPS19,
+  title =        "Learning to Traverse Image Manifolds",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  pages =        "361--368",
+  year =         "2007",
+}
+
+@inproceedings{ DollarP2007b,
+       author = "P. Doll\'ar and V. Rabaud and S. Belongie",
+       title = "Non-Isometric Manifold Learning: Analysis and an Algorithm",
+       booktitle =    ICML07,
+       editor =       ICML07ed,
+       publisher =    ICML07publ,
+       month = "June",
+       year = "2007"
+}
+
+@TechReport{Donoho+Carrie-03,
+  author =       "D. L. Donoho and C. Grimes",
+  title =        "Hessian Eigenmaps: new locally linear embedding
+                 techniques for high-dimensional data",
+  number =       "2003-08",
+  institution =  "Dept. Statistics, Stanford University",
+  year =         "2003",
+}
+
+@article{Donoho-2006,
+ author = {David Donoho},
+ title = {Compressed sensing},
+ journal = {{IEEE} Transactions on Information Theory},
+ volume = 52,
+ number = 4,
+ pages = {1289--1306},
+ year = 2006,
+}
+
+@Book{Dorigo98,
+  author =       "M. Dorigo and M. Colombetti",
+  title =        "Robot shaping: {An} experiment in behavior
+                 engineering",
+  publisher =    "MIT Press/Bradford Books",
+  year =         "1998",
+}
+
+@book{Doucet+al-2001,
+  editor =       "A. Doucet and  N. {de Freitas} and N. Gordon",
+  title =        "Sequential Monte Carlo Methods in Practice",
+  publisher =    "Springer-Verlag",
+  year =         "2001",
+}
+
+@TechReport{Doya93bif,
+  author =       "K. Doya",
+  title =        "Bifurcations of Recurrent Neural Networks in Gradient
+                 Learning",
+  institution =  "Department of Biology, University of California",
+  address =      "La Jolla, CA",
+  year =         "1993",
+  note =         "Submitted",
+}
+
+@TechReport{Doya93un,
+  author =       "K. Doya",
+  title =        "Universality of Fully-Connected Recurrent Neural
+                 Networks",
+  institution =  "Department of Biology, University of California",
+  address =      "La Jolla, CA",
+  year =         "1993",
+  note =         "Submitted",
+}
+
+@Article{Doyle+Snell-1984,
+  author =       "Peter G. Doyle and J. Laurie Snell",
+  title =        "Random Walks and Electric Networks",
+  journal =      "Mathematical Association of America",
+  year =         "1984",
+}
+
+@Book{Draper81,
+  author =       "N. R. Draper and H. Smith",
+  title =        "Applied Regression Analysis",
+  publisher =    "John Wiley and Sons",
+  year =         "1981",
+}
+
+@InProceedings{Driancourt91,
+  author =       "X. Driancourt and L. Bottou and P. Gallinari",
+  booktitle =    ijcnn,
+  title =        "Learning Vector Quantization, Multi-Layer Perceptron
+                 and Dynamic Programming: Comparison and Cooperation",
+  volume =       "2",
+  pages =        "815--819",
+  year =         "1991",
+  OPTaddress =   "Seattle WA",
+}
+
+@InProceedings{Drucker93,
+  author =       "H. Drucker and R. Schapire and R. Simard",
+  editor =       NIPS5ed,
+  booktitle =    NIPS5,
+  title =        "Improving performance in neural networks using a
+                 boosting algorithm",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo, CA",
+  pages =        "42--49",
+  year =         "1993",
+}
+
+@Article{Drucker93b,
+  author =       "H. Drucker and R. Schapire and R. Simard",
+  title =        "Boosting performance in neural networks",
+  journal =      "International Journal of Pattern Recognition and
+                 Artificial Intelligence",
+  pages =        "61--76",
+  year =         "1993",
+  note =         "Special Issue on Applications of Neural Networks to
+                 Pattern Recognition (I. Guyon Ed.)",
+}
+
+@article{Duane-1987,
+ author = {S. Duane and A.D. Kennedy and B. Pendleton and D. Roweth},
+ title = {Hybrid {M}onte {C}arlo},
+ journal = {Phys. Lett. {B}},
+ volume = 195,
+ pages = {216--222},
+ year = 1987,
+}
+
+@Book{Duda-Hart,
+  author =       "R. O. Duda and P. E. Hart",
+  title =        "Pattern Classification and Scene Analysis",
+  publisher =    "Wiley",
+  address =      "New York",
+  year =         "1973",
+}
+
+@Book{Duda-Hart-2000,
+  author =       "R. O. Duda and P. E. Hart and D. G. Stork",
+  title =        "Pattern Classification, Second Edition",
+  publisher =    "Wiley and Sons",
+  address =      "New York",
+  year =         "2001",
+}
+
+@Book{Duda73,
+  author =       "R. O. Duda and P. E. Hart",
+  title =        "Pattern Classification and Scene Analysis",
+  publisher =    "Wiley",
+  address =      "New York",
+  year =         "1973",
+}
+
+@Article{Dugas+al-2003,
+  author =       "C. Dugas and Y. Bengio and N. Chapados and P. Vincent
+                 and G. Denoncourt and C. Fournier",
+  title =        "Statistical Learning Algorithms Applied to Automobile
+                 Insurance Ratemaking",
+  journal =      "CAS Forum",
+  volume =       "1",
+  number =       "1",
+  pages =        "179--214",
+  month =        "Winter",
+  year =         "2003",
+}
+
+@TechReport{Dugas00,
+  author =       "C. Dugas and O. Bardou and Y. Bengio",
+  title =        "Analyses Empiriques sur des Transactions d'options",
+  number =       "1176",
+  institution =  "D\'epartment d'informatique et de Recherche
+                 Op\'erationnelle, Universit\'e de Montr\'eal",
+  address =      "Montr\'eal, Qu\'ebec, Canada",
+  year =         "2000",
+}
+
+@InProceedings{Dugas01,
+  author =       "C. Dugas and Y. Bengio and F. B\'elisle and C.
+                 Nadeau",
+  editor =       NIPS13ed,
+  booktitle =    NIPS13,
+  title =        "Incorporating Second-Order Functional Knowledge for Better Option Pricing",
+  publisher =    "{MIT} Press",
+  pages =        "472--478",
+  year =         "2001",
+}
+
+%%InProceedings{Bengio2000,
+%%  author =       "Y. Bengio",
+%%  booktitle =    icjnn
+%%  title =        "Incorporating Second-Order Functional Knowledge for Better Option Pricing",
+%%  volume =       "V",
+%%  pages =        "79--84",
+%%  year =         "2000",
+%%}
+
+@inproceedings{Bengio2000,
+  title={Probabilistic neural network models for sequential data},
+  author={Bengio, Y.},
+  booktitle=ijcnn,
+  year={2000},
+  volume={5},
+  pages={79-84},
+  abstract={Artificial neural networks (ANN) can be incorporated into probabilistic models. In this paper we review some of the approaches which have been proposed to incorporate them into probabilistic models of sequential data, such as hidden Markov models (HMM). We also discuss new developments and new ideas in this area, in particular how ANN can be used to model high-dimensional discrete and continuous data to deal with the curse of dimensionality and how the ideas proposed in these models could be applied to statistical language modeling to represent longer-term context than allowed by trigram models, while keeping word-order information},
+  keywords={computational linguistics, hidden Markov models, neural nets, probabilityANN, HMM, hidden Markov models, longer-term context, probabilistic models, probabilistic neural network models, sequential data, statistical language modeling, trigram models, word-order information},
+  doi={10.1109/IJCNN.2000.861438},
+}
+
+@InProceedings{Bengio-hyper-2000,
+  author =       "Yoshua Bengio",
+  booktitle =    ijcnn,
+  title =        "Continuous Optimization of Hyper-Parameters",
+  volume =       "V",
+  pages =        "305--310",
+  year =         "2000",
+}
+
+@InProceedings{Ghosn2000,
+  author =       "J. Ghosn and Y. Bengio",
+  booktitle =    ijcnn,
+  title =        "Bias Learning, Knowledge Sharing",
+  volume =       "I",
+  pages =        "9--14",
+  year =         "2000",
+}
+
+@Article{Durbin87,
+  author =       "R. Durbin and D. Willshaw",
+  title =        "An Analogue Approach to the Travelling Salesman
+                 Problem Using an Elastic Net Method",
+  journal =      nature,
+  volume =       "326",
+  pages =        "689--691",
+  year =         "1987",
+}
+
+@MastersThesis{Dzwonczyk91,
+  author =       "M. Dzwonczyk",
+  title =        "Quantitative failure models of feed-forward neural
+                 networks",
+  school =       "MIT",
+  year =         "1991",
+}
+
+@Book{econometric-G-97,
+  author =       "W. H. Greene",
+  title =        "Econometric Analysis 3rd edition",
+  publisher =    "Prentice Hall, Inc.",
+  year =         "1997",
+}
+
+@Article{efficient-KW-82,
+  author =       "W. W. Krasker and R. R. Welsch",
+  title =        "Efficient Bounded-Influence Regression Estimation",
+  journal =      "J. Am. Stat. Asso.",
+  volume =       "77",
+  pages =        "595--604",
+  year =         "1982",
+}
+
+@Book{Efron+Tibs93,
+  author =       "Bradley Efron and Robert J. Tibshirani",
+  title =        "An introduction to the Bootstrap",
+  publisher =    "Chapman and Hall",
+  address =      "New York",
+  year =         "1993",
+}
+
+@TechReport{eigen-TR2,
+  author =       "Yoshua Bengio and Pascal Vincent and Jean-Fran{\cc}ois
+                 Paiement and Olivier Delalleau and Marie Ouimet and
+                 Nicolas {Le Roux}",
+  title =        "Spectral Clustering and Kernel {PCA} are Learning
+                 Eigenfunctions",
+  number =       "1239",
+  institution =  "D\'epartement d'informatique et recherche
+                 op\'erationnelle, Universit\'e de Montr\'eal",
+  year =         "2003",
+}
+
+@InProceedings{Eisner96,
+  author =       "J. Eisner",
+  booktitle =    "COLING-96",
+  title =        "Three new probabilistic models for dependency parsing:
+                 an exploration",
+  address =      "Copenhagen, Denmark",
+  pages =        "340--345",
+  year =         "1996",
+}
+
+@Article{EladAharon2006,
+  author =       "Michael Elad and Michal Aharon",
+  title =        "Image Denoising Via Sparse and Redundant
+                 Representations Over Learned Dictionaries",
+  journal =      "IEEE Transactions on Image Processing",
+  volume =       "15",
+  number =       "12",
+  pages =        "3736--3745",
+  month =        dec,
+  year =         "2006",
+  bibsource =    "http://www.visionbib.com/bibliography/image-proc131.html#TT8737",
+}
+
+@InProceedings{ElHihi+Bengio-nips8-small,
+  author =       "S. ElHihi and Y. Bengio",
+  editor =       NIPS8ed,
+  booktitle =    NIPS8,
+  title =        "Hierarchical Recurrent Neural Networks for Long-Term
+                 Dependencies",
+  publisher =    "MIT Press, Cambridge, MA",
+  pages =        "493--499",
+  year =         "1996",
+}
+
+@InProceedings{ellis+poliner-icassp2007,
+  author =       "D. Ellis and G. Poliner",
+  editor =       "",
+  booktitle =    "{Proceedings of the 2007 International Conference on
+                 Acoustics, Speech and Signal Processing (ICASSP)}",
+  title =        "Identifying Cover Songs with Chroma Features and
+                 Dynamic Programming",
+  publisher =    "IEEE Signal Processing Society",
+  pages =        "",
+  year =         "2007",
+}
+
+@Article{Elman88,
+  author =       "J. L. Elman and D. Zipser",
+  title =        "Learning the Hidden Structure of Speech",
+  journal =      jasa,
+  volume =       "83",
+  pages =        "1615--1626",
+  year =         "1988",
+}
+
+@Article{Elman88Jasa88,
+  author =       "J. L. Elman and D. Zipser",
+  title =        "Learning the Hidden Structure of Speech",
+  journal =      "Journal of the Acoustical Society of America",
+  volume =       "83",
+  year =         "1988",
+}
+
+@Article{Elman90,
+  author =       "J. L. Elman",
+  title =        "Finding Structure in Time",
+  journal =      "Cognitive Science",
+  volume =       "14",
+  pages =        "179--211",
+  year =         "1990",
+}
+
+@Article{Elman93,
+  author =       "Jeffrey L. Elman",
+  title =        "Learning and development in neural networks: {The}
+                 importance of starting small.",
+  journal =      "Cognition",
+  volume =       "48",
+  pages =        "781--799",
+  year =         "1993",
+  url =          "http://www3.isrl.uiuc.edu/~junwang4/langev/localcopy/pdf/elman93cognition.pdf"
+}
+
+@TechReport{ElmanTR88,
+  author =       "J. L. Elman",
+  title =        "Finding Structure in Time",
+  number =       "CRL TR 8801",
+  institution =  "Center for Research in Language, University of
+                 California at San Diego",
+  year =         "1988",
+}
+
+@TechReport{EM-tech-rep,
+  author =       "Y. Bengio and P. Frasconi",
+  title =        "Learning Sequential Behavior: an {EM} Approach",
+  institution =  "Universit\`a di Firenze",
+  year =         "1994",
+  note =         "(in preparation)",
+}
+
+@Article{Engel-Mannor-Meir-2003,
+  author =       "Y. Engel and S. Mannor and R. Meir",
+  title =        "The kernel recursive least squares algorithm",
+  journal =      "IEEE Trans. Sig. Proc.",
+  volume =       "52",
+  number =       "8",
+  pages =        "2275--2285",
+  year =         "2004",
+}
+
+@Article{erhan06qsar,
+  author =       "Dumitru Erhan and Pierre-Jean L'Heureux and Shi Yi Yue
+                 and Yoshua Bengio",
+  title =        "Collaborative Filtering on a Family of Biological
+                 Targets.",
+  journal =      "Journal of Chemical Information and Modeling",
+  volume =       "46",
+  number =       "2",
+  pages =        "626--635",
+  year =         "2006",
+}
+
+@techreport{Erhan-09-visualization-tr,
+  author = {Dumitru Erhan and Yoshua Bengio and Aaron Courville and Pascal Vincent},
+  title = "Visualizing Higher-Layer Features of a Deep Network",
+  institution = "Universit\'{e} de Montr\'{e}al",
+  number = "1341",
+  year = 2009,
+}
+
+@inproceedings{Erhan2009-small,
+ author = {Dumitru Erhan and Pierre-Antoine Manzagol and Yoshua Bengio and Samy Bengio and Pascal Vincent},
+  booktitle =    "Proceedings of AISTATS'2009",
+  title =        "The Difficulty of Training Deep Architectures and the
+Effect of Unsupervised Pre-Training",
+  year = 2009,
+}
+
+@inproceedings{Erhan2009-short,
+ author = {D. Erhan and P.-A. Manzagol and Y. Bengio and S. Bengio and P. Vincent},
+  booktitle =    "AI \& Stat.'2009",
+  title =        "The Difficulty of Training Deep Architectures and the
+Effect of Unsupervised Pre-Training",
+  year = 2009,
+}
+
+@Book{EverittB1981,
+  author = 	 {B. S. Everitt and D. J. Hand},
+  title = 	 {Finite Mixture Distributions},
+  publisher =    {Chapman and Hall},
+  address =      {London},
+  year = 	 {1981},
+  series = 	 {Monographs on Statistics and Applied Probability},
+}
+
+@InProceedings{evgeniou04,
+  author =       "Theodoros Evgeniou and Massimiliano Pontil",
+  booktitle =    "KDD '04: Proceedings of the 2004 ACM SIGKDD
+                 international conference on Knowledge discovery and
+                 data mining",
+  title =        "Regularized multi--task learning",
+  publisher =    "ACM Press",
+  address =      "New York, NY, USA",
+  pages =        "109--117",
+  year =         "2004",
+  location =     "Seattle, WA, USA",
+}
+
+@Article{evgeniou05,
+  author =       "Theodoros Evgeniou and Charles A. Micchelli and
+                 Massimiliano Pontil",
+  title =        "Learning Multiple Tasks with Kernel Methods",
+  journal =      jmlr,
+  volume =       "6",
+  pages =        "615--637",
+  month =        apr,
+  year =         "2005",
+}
+
+@InProceedings{Fahlman83,
+  author =       "S. E. Fahlman and G. E. Hinton and T. J. Sejnowski",
+  booktitle =    "Proceedings of the National Conference on Artificial
+                 Intelligence AAAI-83",
+  title =        "Massively parallel architectures for {AI}: {NETL},
+                 Thistle, and {Boltzmann} machines",
+  year =         "1983",
+}
+
+@InProceedings{Fahlman89,
+  author =       "S. E. Fahlman",
+  editor =       "D. Touretzky and G. Hinton and T. Sejnowski",
+  booktitle =    cmss88,
+  title =        "Fast-Learning Variations on Back-Propagation: An
+                 Empirical Study",
+  publisher =    "Morgan Kaufmann, San Mateo",
+  address =      "Pittsburg 1988",
+  pages =        "38--51",
+  year =         "1989",
+}
+
+@InProceedings{Fahlman90,
+  author =       "Scott E. Fahlman and Christian Lebiere",
+  editor =       NIPS2ed,
+  booktitle =    NIPS2,
+  title =        "The Cascade-Correlation Learning Architecture",
+  publisher =    "Morgan Kaufmann, San Mateo",
+  address =      "Denver, CO",
+  pages =        "524--532",
+  year =         "1990",
+}
+
+@InProceedings{Fahlman90-small,
+  author =       "S. E. Fahlman and C. Lebiere",
+  booktitle =    "NIPS 2",
+  title =        "The Cascade-Correlation Learning Architecture",
+  publisher =    "Morgan Kaufmann, San Mateo",
+  address =      "Denver, CO",
+  pages =        "524--532",
+  year =         "1990",
+}
+
+@Article{Fama+French,
+  author =       "E. F. Fama and K. R. French",
+  title =        "Permanent and Temporary Components of Stock Prices",
+  journal =      "Journal of Political Economy",
+  volume =       "96",
+  number =       "2",
+  pages =        "246--273",
+  year =         "1988",
+}
+
+@Book{Fant60,
+  author =       "G. Fant",
+  title =        "Acoustic Theory of Speech Production",
+  publisher =    "Mouton and Co.",
+  year =         "1960",
+}
+
+@Book{Fant73,
+  author =       "G. Fant",
+  title =        "Speech Sounds and Features",
+  publisher =    "MIT Press, Cambridge, MA",
+  year =         "1973",
+}
+
+@Article{Farhat85,
+  author =       "N. H. Farhat and D. Psaltis and A. Prata and E. Paek",
+  title =        "Optical Implementation of the Hopfield Model",
+  journal =      applopt,
+  volume =       "24",
+  year =         "1985",
+}
+
+@Article{Farhat87,
+  author =       "N. H. Farhat",
+  title =        "Optoelectronic Analogs of Self-Programming Neural
+                 Nets: Architectures and Methods for Implementing Fast
+                 Stochastic Learning by Simulated Annealing",
+  journal =      applopt,
+  volume =       "26",
+  pages =        "5093--5103",
+  year =         "1987",
+}
+
+@Article{Farmer87,
+  author =       "D. Farmer and J. Sidorowich",
+  title =        "Predicting Chaotic Time Series",
+  journal =      prl,
+  volume =       "59",
+  pages =        "845--848",
+  year =         "1987",
+}
+
+@InCollection{Farmer88,
+  author =       "D. Farmer and J. Sidorowich",
+  editor =       "W. C. Lee",
+  booktitle =    "Evolution, Learning, and Cognition",
+  title =        "Exploiting Chaos to Predict the Future and Reduce
+                 Noise",
+  publisher =    "World Scientific",
+  address =      "Singapore",
+  pages =        "277--330",
+  year =         "1988",
+}
+
+@inproceedings{Fei-Fei.2004,
+        author = {Fei-Fei, Li and Fergus, Rod and Perona, Pietro},
+        doi = {10.1109/CVPR.2004.109},
+        journal = {Computer Vision and Pattern Recognition Workshop, 2004 Conference on},
+        keywords = {categorization, computer-vision, generative-models},
+        pages = {178},
+        posted-at = {2007-08-10 12:20:22},
+        priority = {3},
+        title = {Learning Generative Visual Models from Few Training Examples: An Incremental Bayesian Approach Tested on 101 Object Categories},
+        url = {http://dx.doi.org/10.1109/CVPR.2004.109},
+        year = {2004}
+}
+
+@Article{Feldman82,
+  author =       "J. A. Feldman and D. H. Ballard",
+  title =        "Connectionist Models and Their Properties",
+  journal =      cogsci,
+  volume =       "6",
+  year =         "1982",
+}
+
+@Article{feldman96,
+  author =       "Jerome A. Feldman and George Lakoff and David Bailey
+                 and Srini Narayanan and Terry Regier and Andreas
+                 Stolcke",
+  title =        "{L0} - The First Five Years of an Automated Language
+                 Acquisition Project",
+  journal =      "Artificial Intelligence Review",
+  volume =       "10",
+  number =       "1-2",
+  pages =        "103--129",
+  year =         "1996",
+  URL =          "citeseer.ist.psu.edu/feldman96first.html",
+}
+
+@Book{Fellbaum1996,
+  author =       "Christine Fellbaum",
+  title =        "{WordNet}: An Electronic Lexical Database and Some of
+                 its Application",
+  publisher =    "MIT Press",
+  year =         "1996",
+}
+
+@Misc{Fellbaum1998,
+  author =       "Christiane Fellbaum Editor",
+  title =        "{WordNet}: An Electronic Lexical Database",
+  URL =          "citeseer.nj.nec.com/fellbaum98wordnet.html",
+}
+
+@Book{Feller68,
+  author =       "W. Feller",
+  title =        "An Introduction to Probability Theory and Its
+                 Applications",
+  volume =       "1",
+  publisher =    "Wiley",
+  address =      "New York",
+  year =         "1968",
+}
+
+@InProceedings{Feng-Statlog,
+  author =       "C. Feng and A. Sutherland and R. King and S. Muggleton
+                 and R. Henery",
+  booktitle =    "Proceedings of the Fourth International Workshop on
+                 Artificial Intelligence and Statistics",
+  title =        "Comparison of machine learning classifiers to
+                 statistics and neural networks",
+  pages =        "41--52",
+  year =         "1993",
+}
+
+@article{Field-1994,
+    author = {David J. Field},
+    title = {What is the goal of sensory coding?},
+    journal = {Neural Computation},
+    volume = {6},
+    number = {4},
+    year = {1994},
+    issn = {0899-7667},
+    pages = {559--601},
+    doi = {http://dx.doi.org/10.1162/neco.1994.6.4.559},
+    publisher = {MIT Press},
+    address = {Cambridge, MA, USA},
+}
+
+@article{Fisher-1936,
+    author = {Ronald  A. Fisher},
+    journal = {Annals of Eugenics},
+    pages = {179--188},
+    title = {The use of multiple measurements in taxonomic problems},
+    volume = {7},
+    year = {1936}
+}
+
+@Book{Fischer90,
+  author =       "K. H. Fischer and J. A. Hertz",
+  title =        "Spin Glasses",
+  publisher =    "Cambridge University Press",
+  address =      "Cambridge",
+  year =         "1990",
+}
+
+@TechReport{Fix+Hodges-51,
+  author =       "E. Fix and J. L. Hodges",
+  title =        "Discriminatory analysis, non-parametric
+                 discrimination, consistency properties",
+  number =       "Report 21-49-004",
+  institution =  "{USAF} School of Aviation Medicine, Randolph Field,
+                 Texas",
+  year =         "1951",
+}
+
+@Article{FixHodges51,
+  author =       "Evelyn Fix and Joseph L. Hodges Jr.",
+  title =        "Discriminatory Analysis: Nonparametric discrimination:
+                 Consistency properties",
+  journal =      "USAF School of Aviation Medecine",
+  volume =       "4",
+  pages =        "261--279",
+  year =         "1951",
+}
+
+@Article{FixHodges52,
+  author =       "Evelyn Fix and Joseph L. Hodges Jr.",
+  title =        "Discriminatory Analysis: Nonparametric discrimination:
+                 Small sample performance",
+  journal =      "USAF School of Aviation Medecine",
+  volume =       "11",
+  pages =        "280--322",
+  year =         "1952",
+}
+
+@MastersThesis{Flammia91,
+  author =       "G. Flammia",
+  title =        "Speaker Independent Consonant Recognition in
+                 Continuous Speech with Distinctive Phonetic Features",
+  school =       "McGill University, School of Computer Science",
+  year =         "1991",
+}
+
+@Book{Flanagan72,
+  author =       "J. L. Flanagan",
+  title =        "Speech Analysis, Synthesis, and Perception",
+  publisher =    "Springer--Verlag",
+  address =      "Berlin",
+  edition =      "2nd",
+  year =         "1972",
+}
+
+@Book{Fletcher87,
+  author =       "Roger Fletcher",
+  title =        "Practical Methods of Optimization",
+  publisher =    "Wiley",
+  address =      "New York",
+  edition =      "Second",
+  year =         "1987",
+}
+
+@InCollection{FleuretF2006,
+  author =       "Francois Fleuret and Gilles Blanchard",
+  editor =       NIPS18ed,
+  booktitle =    NIPS18,
+  title =        "Pattern Recognition from One Example by Chopping",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  pages =        "371--378",
+  year =         "2006",
+}
+
+@InProceedings{Foldiak89,
+  author =       "P. F{\"o}ldi\'ak",
+  booktitle =    ijcnn,
+  title =        "Adaptive Network for Optimal Linear Feature
+                 Extraction",
+  volume =       "1",
+  publisher =    "IEEE, New York",
+  address =      "Washington 1989",
+  pages =        "401--405",
+  year =         "1989",
+}
+
+@Article{Foldiak91,
+  author =       "P. F{\"o}ldi\'ak",
+  title =        "Learning Invariance from Transformation Sequences",
+  journal =      "Neural Computation",
+  volume =       "3",
+  number =       "2",
+  pages =        "194--200",
+  year =         "1991",
+}
+
+@TechReport{Fontaine,
+  author =       "T. Fontaine",
+  title =        "{GRAD}-{CM2}: {A} Data-parallel Connectionist Network
+                 Simulator",
+  number =       "MS-CIS-92-55/LINC LAB 232",
+  institution =  "University of Pennsylvania",
+  month =        jul,
+  year =         "1992",
+  OPTnote =      "",
+}
+
+@Article{Foster+George94,
+  author =       "D. Foster and E. George",
+  title =        "The risk inflation criterion for multiple regression",
+  journal =      "Annals of Statistics",
+  volume =       "22",
+  pages =        "1947--1975",
+  year =         "1994",
+}
+
+@PhdThesis{Foster2002,
+  author =       "George Foster",
+  title =        "Text Prediction for Translators",
+  school =       "Dept. IRO, Université de Montréal",
+  year =         "2002",
+}
+
+@incollection{Fox-2009,
+ title = {Nonparametric Bayesian Learning of Switching Linear Dynamical Systems},
+ author = {Emily Fox and Erik Sudderth and Michael Jordan and Alan Willsky},
+ booktitle = NIPS21,
+ editor = NIPS21ed,
+ pages = {457--464},
+ year = {2009}
+}
+
+@Article{Fralick67,
+  author = 	 {Stanley C. Fralick},
+  title = 	 {Learning to Recognize Patterns without a Teacher},
+  journal = 	 {IEEE Transactions on Information Theory},
+  year = 	 1967,
+  volume =	 13,
+  pages =	 {57-64}
+}
+
+@InProceedings{Franzini87,
+  author =       "M. A. Franzini",
+  booktitle =    "Proceedings of the Ninth Annual Conference of the IEEE
+                 Engineering in Medicine and Biology Society",
+  title =        "Speech Recognition with Back Propagation",
+  publisher =    "IEEE, New York",
+  address =      "Boston 1987",
+  pages =        "1702--1703",
+  year =         "1987",
+}
+
+@InProceedings{Franzini90,
+  author =       "M. A. Franzini and K. F. Lee and A. Waibel",
+  booktitle =    icassp,
+  title =        "Connectionist {Viterbi} Training: a New Hybrid Method
+                 for Continuous Speech Recognition",
+  address =      "Albuquerque, NM",
+  pages =        "425--428",
+  year =         "1990",
+}
+
+@InProceedings{Frasconi-icnn93,
+  author =       "P. Frasconi and M. Gori and A. Tesi",
+  booktitle =    icnn,
+  title =        "Backpropagation for Linearly Separable Patterns: a
+                 Detailed Analysis",
+  publisher =    "IEEE Press",
+  address =      "S. Francisco CA",
+  pages =        "1818--1822",
+  year =         "1993",
+}
+
+@InProceedings{Frasconi-ijcnn91,
+  author =       "P. Frasconi and M. Gori and M. Maggini and G. Soda",
+  booktitle =    ijcnn,
+  title =        "A Unified Approach for Integrating Explicit Knowledge
+                 and Learning by Example in Recurrent Networks",
+  pages =        "811--816",
+  year =         "1991",
+  OPTaddress =   "Seattle WA",
+}
+
+@Article{Frasconi-ijmpC93,
+  author =       "P. Frasconi and M. Gori and G. Soda",
+  title =        "Daphne: Data Parallelism Neural Network Simulator",
+  journal =      "Int. Journal of Modern Physics C",
+  volume =       "4",
+  number =       "1",
+  pages =        "17--28",
+  year =         "1993",
+  note =         "Special Issue: ``Science on the Connection Machine''",
+}
+
+@InProceedings{Frasconi-milano,
+  author =       "P. Frasconi and M. Gori and G. Soda",
+  booktitle =    "Computational Intelligence 90",
+  title =        "Recurrent Networks for Continuous Speech Recognition",
+  publisher =    "Elsevier",
+  address =      "Milano (Italy)",
+  year =         "1990",
+}
+
+@MastersThesis{Frasconi-msthesis,
+  author =       "P. Frasconi",
+  title =        "Progetto e realizzazione di un simulatore per reti
+                 neurali ricorrenti e implementazione di prototipi per
+                 il riconoscimento vocale in tempo reale",
+  school =       "Universit\`a di Firenze",
+  year =         "1990",
+  note =         "(in Italian)",
+}
+
+@Article{Frasconi-nc92,
+  author =       "P. Frasconi and M. Gori and G. Soda",
+  title =        "Local Feedback Multi-Layered Networks",
+  journal =      nc,
+  volume =       "4",
+  number =       "1",
+  pages =        "120--130",
+  year =         "1992",
+}
+
+@PhdThesis{Frasconi-PhD,
+  author =       "Paolo Frasconi",
+  title =        "Reti Ricorrenti ed Elaborazione Adattiva di Sequenze",
+  school =       "Universit\`a di Firenze",
+  address =      "Italy",
+  year =         "1994",
+  note =         "(in Italian)",
+}
+
+@InCollection{Frasconi-pinn93,
+  author =       "P. Frasconi and M. Gori and A. Tesi",
+  editor =       "Omid Omidvar",
+  booktitle =    "Progress in Neural Networks",
+  title =        "Successes and Failures of Backpropagation: a
+                 Theoretical Investigation",
+  publisher =    "Ablex Publishing",
+  year =         "1993",
+}
+
+@InProceedings{Frasconi-spie93,
+  author =       "Paolo Frasconi and Marco Gori",
+  editor =       "D. Ruck",
+  booktitle =    "Proc. Conf. Science of Artificial Neural Networks II",
+  title =        "Multilayered networks and the {C}-{G} uncertainty
+                 principle",
+  volume =       "SPIE-1966",
+  organization = "International Society for Optical Engineering (SPIE)",
+  address =      "Orlando, FL",
+  year =         "1993",
+}
+
+@TechReport{Frasconi-TR92,
+  author =       "P. Frasconi and M. Gori and G. Soda",
+  title =        "Injecting Nondeterministic Finite State Automata into
+                 Recurrent Neural Networks",
+  number =       "DSI-RT15/92",
+  institution =  "Universit\`a di Firenze (Italy)",
+  month =        aug,
+  year =         "1992",
+}
+
+@Unpublished{Frasconi-unp94,
+  author =       "P. Frasconi and Y. Bengio",
+  title =        "An {EM} Approach to Grammatical Inference",
+  year =         "1994",
+  note =         "Submitted to the 12-th {\em International Conference
+                 on Pattern Recognition}",
+  OPTannote =    "",
+}
+
+@InProceedings{Frasconi-v91,
+  author =       "P. Frasconi and M. Gori and M. Maggini and G. Soda",
+  editor =       "E. Caianiello",
+  booktitle =    "Proc. of the 4th Italian Workshop on Parallel
+                 Architectures and Neural Networks",
+  title =        "Learning Automata with Sigmoidal Networks",
+  publisher =    "World Scientific Pub",
+  address =      "Vietri (Italy)",
+  pages =        "69--77",
+  year =         "1991",
+}
+
+@InProceedings{Frasconi90,
+  author =       "P. Frasconi and M. Gori and G. Soda",
+  editor =       "E. Caianiello",
+  booktitle =    "Proc. of the 3rd Italian Workshop on Parallel
+                 Architectures and Neural Networks",
+  title =        "Recurrent Networks with Activation Feedback",
+  publisher =    "World Scientific Pub",
+  address =      "Vietri (Italy)",
+  pages =        "329--335",
+  year =         "1990",
+}
+
+@InProceedings{Frasconi97,
+  author =       "P. Frasconi and M. Gori and A. Sperduti",
+  booktitle =    "Proc. Int. Joint Conf. on Artificial Intelligence",
+  title =        "On the Efficient Classification of Data Structures by
+                 Neural Networks",
+  year =         "1997",
+}
+
+@Article{Frasconi-kde93,
+  author =       "P. Frasconi and M. Gori and M. Maggini and G. Soda",
+  title =        "Unified Integration of Explicit Rules and Learning by
+                 Example in Recurrent Networks",
+  journal =      ieeetrkde,
+  year =         "1993",
+  note =         "(in press)",
+}
+
+@Article{Frean90,
+  author =       "M Frean",
+  title =        "The Upstart Algorithm: {A} Method for Constructing and
+                 Training Feedforward Neural Networks",
+  journal =      nc,
+  volume =       "2",
+  pages =        "198--209",
+  year =         "1990",
+}
+
+@TechReport{Freund+Haussler-94,
+  author =       "Yoav Freund and David Haussler",
+  title =        "Unsupervised learning of distributions on binary
+                 vectors using two layer networks",
+  number =       "UCSC-CRL-94-25",
+  institution =  "University of California, Santa Cruz",
+  year =         "1994",
+}
+
+@InProceedings{Freund+Haussler92,
+  author =       "Yoav Freund and David Haussler",
+  editor =       NIPS4ed,
+  booktitle =    NIPS4,
+  title =        "A fast and exact learning rule for a restricted class
+                 of {Boltzmann} machines",
+  publisher =    "Morgan Kaufmann, San Mateo",
+  address =      "Denver, CO",
+  pages =        "912--919",
+  year =         "1992",
+}
+
+@Article{Freund-Schapire-98,
+  author =       "Yoav Freund and Robert E. Schapire",
+  title =        "Adaptive Game Playing using Multiplicative Weights",
+  journal =      "Games and Economic Behavior",
+  year =         "1998",
+}
+
+@InProceedings{Freund1995,
+  author =       "Yoav Freund and Robert E. Schapire",
+  booktitle =    "Proceedings of the Second European Conference on
+                 Computational Learning Theory",
+  title =        "A decision-theoretic generalization of on-line
+                 learning and an application to boosting",
+  publisher =    "Springer-Verlag",
+  pages =        "23--37",
+  year =         "1995",
+  ISBN =         "3-540-59119-2",
+}
+
+@TechReport{freund94,
+  author =       "Y. Freund and D. Haussler",
+  title =        "Unsupervised learning of distributions of binary
+                 vectors using two layer networks",
+  number =       "CRL-94-25",
+  institution =  "UCSC",
+  year =         "1994",
+}
+
+@Unpublished{Freund97,
+  author =       "Y. Freund and R. E. Schapire and P. Bartlett and W. S.
+                 Lee",
+  title =        "Boosting the margin: {A} new explanation for the
+                 effectiveness of voting methods",
+  year =         "1997",
+  note =         "Presented at the Machines that Learn Conference,
+                 Snowbird, Utah",
+}
+
+@InProceedings{Frey96,
+  author =       "Brendan J. Frey and Geoffrey E. Hinton and Peter Dayan",
+  editor =       NIPS8ed,
+  booktitle =    NIPS8,
+  title =        "Does the wake-sleep algorithm learn good density estimators?",
+  publisher =    "MIT Press, Cambridge, MA",
+  pages =        "661--670",
+  year =         "1996",
+}
+
+@InProceedings{Frey-Hinton96,
+  author =       "B. J. Frey and G. E. Hinton",
+  booktitle =    "Proceedings of the Data Compression Conference",
+  title =        "Free Energy Coding",
+  publisher =    "IEEE Computer Society Press",
+  address =      "Los Alamitos, CA",
+  pages =        "",
+  year =         "1997",
+}
+
+@Book{Frey98,
+  author =       "Brendan J. Frey",
+  title =        "Graphical models for machine learning and digital
+                 communication",
+  publisher =    "{MIT} Press",
+  year =         "1998",
+}
+
+@InProceedings{frey99estimating,
+  author =       "B. J. Frey and N. Jojic",
+  booktitle =    cvpr99,
+  title =        "Estimating Mixture Models of Images and Inferring
+                 Spatial Transformations Using the {EM} Algorithm",
+  pages =        "416--422",
+  year =         "1999",
+  URL =          "citeseer.ist.psu.edu/frey99estimating.html",
+}
+
+@InProceedings{FreyUAI00,
+  author =       "Brendan Frey and Nebojsa Jojic",
+  booktitle =    UAI00,
+  title =        "Learning Graphical Models of Images, Videos and Their
+                 Spatial Transformations",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Francisco, CA",
+  pages =        "184--1",
+  year =         "2000",
+}
+
+@Article{Friedman+Fisher-99,
+  author =       "J. H. Friedman and N. I. Fisher",
+  title =        "Bump hunting in high-dimensional data",
+  journal =      "Statistics and Computing",
+  volume =       "9",
+  number =       "2",
+  pages =        "123--143",
+}
+
+@Article{Friedman+Hastie+Tibshirani:AdaBoost-theory,
+  author =       "J. Friedman and T. Hastie and R. Tibshirani",
+  title =        "Additive Logistic Regression: a Statistical View of
+                 Boosting",
+  journal =      "The Annals of Statistics",
+  volume =       "28",
+  pages =        "307--337",
+  year =         "2000",
+}
+
+@Article{Friedman-2001,
+  author =       "J. Friedman",
+  title =        "Greedy function approximation: a gradient boosting
+                 machine",
+  journal =      "Annals of Statistics",
+  volume =       "29",
+  pages =        "1180",
+  year =         "2001",
+}
+
+@Book{Friedman71,
+  author =       "A. Friedman",
+  title =        "Advanced Calculus",
+  publisher =    "Holt, Rinehart and Winston",
+  address =      "New York, NY",
+  year =         "1971",
+}
+
+@article{Friedman+Tukey-1974,
+    author = {J. H. Friedman and J. W. Tukey},
+    title = {A Projection Pursuit Algorithm for Exploratory Data Analysis},
+    journal = {IEEE Transactions on Computers},
+    volume = {23},
+    number = {9},
+    year = {1974},
+    issn = {0018-9340},
+    pages = {881--890},
+    doi = {http://dx.doi.org/10.1109/T-C.1974.224051},
+    publisher = {IEEE Computer Society},
+    address = {Washington, DC, USA},
+}
+
+@Article{Friedman87,
+  author =       "J. H. Friedman",
+  title =        "Exploratory projection pursuit",
+  journal =      "Journal of the American Statistical Association",
+  volume =       "92",
+  pages =        "249--266",
+  year =         "1987",
+}
+
+@Article{Friedman91,
+  author =       "J. H. Friedman",
+  title =        "Multivariate adaptive regression splines",
+  journal =      "The Annals of Statistics",
+  volume =       "19",
+  pages =        "1--141",
+  year =         "1991",
+}
+
+@TechReport{friedman94flexible,
+  author =       "J. Friedman",
+  title =        "Flexible metric nearest neighbor classification",
+  number =       "113",
+  institution =  "Stanford University Statistics Department",
+  year =         "1994",
+}
+
+@TechReport{Friedman98,
+  author =       "J. Friedman and T. Hastie and R. Tibshirani",
+  title =        "Additive logistic regression: {A} statistical view of
+                 boosting",
+  institution =  "Stanford University",
+  address =      "CA, USA",
+  year =         "1998",
+}
+
+@Misc{friedman99greedy,
+  author =       "J. Friedman",
+  title =        "Greedy Function Approximation: a Gradient Boosting
+                 Machine",
+  year =         "1999",
+  note =         "IMS 1999 Reitz Lecture, February 24, 1999, Dept. of
+                 Statistics, Stanford University",
+}
+
+@InProceedings{Friess98,
+  author =       "T. Friess and N. Cristianini and C. Campbel",
+  booktitle =    "Proceedings of the Fifteenth International Conference
+                 on Machine Learning",
+  title =        "The Kernel-Adatron: a Fast and Simple Learning
+                 Procedure for Support Vector Machines",
+  pages =        "188--196",
+  year =         "1998",
+}
+
+@InProceedings{Fritzke94,
+  author =       "B. Fritzke",
+  editor =       NIPS6ed,
+  booktitle =    NIPS6,
+  title =        "Supervised learning with growing cell structures",
+  publisher =    "Morgan Kaufmann",
+  year =         "1994",
+}
+
+@InProceedings{fs-lmcpa-98,
+  author =       "Yoav Freund and Robert E. Schapire",
+  booktitle =    "Proc. 11th Annu. Conf. on Comput. Learning Theory",
+  title =        "Large margin classification using the perceptron
+                 algorithm",
+  publisher =    "ACM Press, New York, NY",
+  pages =        "209--217",
+  year =         "1998",
+}
+
+@Article{fs-ppr-81,
+  author =       "J. H. Friedman and W. Stuetzle",
+  title =        "Projection Pursuit Regression",
+  journal =      "J. American Statistical Association",
+  volume =       "76",
+  number =       "376",
+  pages =        "817--823",
+  month =        dec,
+  year =         "1981",
+  comment =      "Good description of projection pursuit",
+}
+
+@Article{Fu86,
+  author =       "Y. Fu and P. W. Anderson",
+  title =        "Application of Statistical Mechanics to {NP}-Complete
+                 Problems in Combinatorial Optimization",
+  journal =      jpa,
+  volume =       "19",
+  pages =        "1605--1620",
+  year =         "1986",
+}
+
+@InProceedings{Fukumizu96,
+  author =       "K. Fukumizu",
+  editor =       NIPS8ed,
+  booktitle =    NIPS8,
+  title =        "Active Learning in Multilayer Perceptrons",
+  publisher =    "MIT Press, Cambridge, MA",
+  year =         "1996",
+}
+
+@Article{Fukumizu+Amari-2000,
+  author =      "Kenji Fukumizu and {Shun-ichi} Amari",
+  title =       "Local Minima and Plateaus in Hierarchical Structures of Multilayer Perceptrons",
+  journal =     "Neural Networks",
+  volume =      "13",
+  number =      "3",
+  pages =       "317--327",
+  year =        "2000",
+}
+
+@Article{Fukushima75,
+  author =       "K. Fukushima",
+  title =        "Cognitron: {A} Self-Organizing Multilayered Neural
+                 Network",
+  journal =      biocyb,
+  volume =       "20",
+  pages =        "121--136",
+  year =         "1975",
+}
+
+@Article{Fukushima80,
+  author =       "K. Fukushima",
+  title =        "Neocognitron: {A} Self-Organizing Neural Network Model
+                 for a Mechanism of Pattern Recognition Unaffected by
+                 Shift in Position",
+  journal =      biocyb,
+  volume =       "36",
+  pages =        "193--202",
+  year =         "1980",
+}
+
+@Article{Fukushima82,
+  author =       "K. Fukushima and S. Miyake",
+  key =          "Fukushima",
+  title =        "Neocognitron: {A} new algorithm for pattern
+                 recognition tolerant of deformations and shifts in
+                 position",
+  journal =      "Pattern Recognition",
+  volume =       "15",
+  pages =        "455--469",
+  year =         "1982",
+}
+
+@Article{Fukushima83,
+  author =       "K. Fukushima and S. Miyake and T. Ito",
+  title =        "Neocognitron: {A} Neural Network Model for a Mechanism
+                 of Visual Pattern Recognition",
+  journal =      ieeesmc,
+  volume =       "13",
+  year =         "1983",
+}
+
+@Article{Funahashi89,
+  author =       "K. Funahashi",
+  title =        "On the approximate realization of continuous mappings
+                 by neural networks",
+  journal =      "Neural Networks",
+  volume =       "2",
+  pages =        "183--192",
+  year =         "1989",
+}
+
+@Article{Funahashi93,
+  author =       "Ken-Ichi Funahashi and Yuichi Nakamura",
+  title =        "Approximation of Dynamical Systems by Continuous Time
+                 Recurrent Neural Networks",
+  journal =      nn,
+  volume =       "6",
+  pages =        "801--806",
+  year =         "1993",
+}
+
+@InProceedings{Fung-Crawford90,
+  author =       "R. M. Fung and S. L. Crawford",
+  booktitle =    "Eighth National Conference on Artificial Intelligence,
+                 Boston, Massachusetts, American Association for
+                 Artificial Intelligence",
+  title =        "A system for induction of probabilistic models",
+  pages =        "762--779",
+  year =         "1990",
+}
+
+@TechReport{Galland+Hinton89,
+  author =       "C. C. Galland and G. E. Hinton",
+  title =        "Deterministic learning in networks with asymmetric
+                 connectivity",
+  number =       "CRG-TR-89-6",
+  institution =  "Department of Computer Science, University of
+                 Toronto",
+  address =      "Toronto, Ontario",
+  year =         "1989",
+}
+
+@InProceedings{Gallant86,
+  author =       "S. I. Gallant",
+  booktitle =    "Eighth International Conference on Pattern
+                 Recognition",
+  title =        "Optimal Linear Discriminants",
+  publisher =    "IEEE, New York",
+  address =      "Paris 1986",
+  pages =        "849--852",
+  year =         "1986",
+}
+
+@Misc{gallant90perceptron-based,
+  author =       "S. Gallant",
+  title =        "Perceptron-based learning algorithms",
+  year =         "1990",
+  text =         "S. Gallant, Perceptron-based learning algorithms, IEEE
+                 Trans. Neural Networks 1, 179 (1990).",
+}
+
+@InProceedings{Gallinari87,
+  author =       "Patrick Gallinari and Yann {LeCun} and Sylvie Thiria and
+                 Francoise Fogelman-Soulie",
+  booktitle =    "Proceedings of COGNITIVA 87",
+  title =        "Memoires associatives distribuees",
+  address =      "Paris, La Villette",
+  year =         "1987",
+}
+
+@InProceedings{Gallinari88,
+  author =       "P. Gallinari and S. Thiria and F. Fogelman-Souli\'e",
+  booktitle =    "Proc. International Conference on Neural Networks
+                 '88",
+  title =        "Multilayer perceptrons and data analysis",
+  publisher =    "IEEE",
+  pages =        "391--399",
+  year =         "1988",
+}
+
+@InCollection{Gao-Goodman-Miao-2001,
+  author =       "J. Gao and J. Goodman and J. Miao",
+  booktitle =    "Computational Linguistics and Chinese Language
+                 Processing",
+  title =        "The Use of Clustering Techniques for Asian Language
+                 Modeling",
+  volume =       "6",
+  number =       "1",
+  pages =        "27--60",
+  year =         "2001",
+}
+
+@TechReport{Garcia-Perron95,
+  author =       "R. Garcia and P. Perron",
+  title =        "An analysis of the real interest rate under regime
+                 shift",
+  number =       "95s-5",
+  institution =  "CIRANO",
+  address =      "Montreal, Quebec, Canada",
+  year =         "1995",
+}
+
+@Article{Garcia-Perron96,
+  author =       "R. Garcia and P. Perron",
+  title =        "An analysis of the real interest rate under regime
+                 shift",
+  journal =      "The Review of Economics and Statistics",
+  year =         "1996",
+}
+
+@TechReport{Garcia-Schaller95,
+  author =       "R. Garcia and H. Schaller",
+  title =        "Are the effects of monetary policy asymmetric",
+  number =       "95s-6",
+  institution =  "CIRANO",
+  address =      "Montreal, Quebec, Canada",
+  year =         "1995",
+}
+
+@TechReport{Garcia95,
+  author =       "R. Garcia",
+  title =        "Asymptotic null distribution of the likelihood ratio
+                 test in Markov switching models",
+  number =       "95s-7",
+  institution =  "CIRANO",
+  address =      "Montreal, Quebec, Canada",
+  year =         "1995",
+}
+
+@TechReport{Garcia98,
+  author =       "R. Garcia and R. Gen\c{c}ay",
+  title =        "{Pricing and Hedging Derivative Securities with Neural
+                 Networks and a Homogeneity Hint}",
+  number =       "98s-35",
+  institution =  "CIRANO",
+  address =      "Montr\'eal, Qu\'ebec, Canada",
+  year =         "1998",
+}
+
+@Article{Gardner87,
+  author =       "E. Gardner",
+  title =        "Maximum Storage Capacity in Neural Networks",
+  journal =      eul,
+  volume =       "4",
+  pages =        "481--485",
+  year =         "1987",
+}
+
+@Article{Gardner88a,
+  author =       "E. Gardner",
+  title =        "The Space of Interactions in Neural Network Models",
+  journal =      jpa,
+  volume =       "21",
+  pages =        "257--270",
+  year =         "1988",
+}
+
+@Article{Gardner88b,
+  author =       "E. Gardner and B. Derrida",
+  title =        "Optimal Storage Properties of Neural Network Models",
+  journal =      jpa,
+  volume =       "21",
+  pages =        "271--284",
+  year =         "1988",
+}
+
+@Article{Gardner89a,
+  author =       "E. Gardner and B. Derrida",
+  title =        "Three Unfinished Works on the Optimal Storage Capacity
+                 of Networks",
+  journal =      jpa,
+  volume =       "22",
+  pages =        "1983--1994",
+  year =         "1989",
+}
+
+@Article{Gardner89b,
+  author =       "E. Gardner and H. Gutfreund and I. Yekutieli",
+  title =        "The Phase Space of Interactions in Neural Networks
+                 with Definite Symmetry",
+  journal =      jpa,
+  volume =       "22",
+  pages =        "1995--2008",
+  year =         "1989",
+}
+
+@Book{Garey79,
+  author =       "M. R. Garey and D. S. Johnson",
+  title =        "Computers and Intractability: {A} Guide to the Theory
+                 of {NP}-Completeness",
+  publisher =    "Freeman",
+  address =      "New York",
+  year =         "1979",
+}
+
+@InCollection{GarriguesP2008,
+  author =       "Pierre Garrigues and Bruno Olshausen",
+  editor =       NIPS20ed,
+  booktitle =    NIPS20,
+  title =        "Learning Horizontal Connections in a Sparse Coding
+                 Model of Natural Images",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  pages =        "505--512",
+  year =         "2008",
+}
+
+@InCollection{GarriguesP2008-small,
+  author =       "Pierre Garrigues and Bruno Olshausen",
+  booktitle =    "NIPS'20",
+  title =        "Learning Horizontal Connections in a Sparse Coding
+                 Model of Natural Images",
+  year =         "2008",
+}
+
+@Article{Gartner03,
+  author =       "T. G{\"a}rtner",
+  title =        "A survey of kernels for structured data",
+  journal =      "ACM SIGKDD Explorations Newsletter",
+  volume =       "5",
+  number =       "1",
+  pages =        "49--58",
+  year =         "2003",
+}
+
+@InProceedings{Gauvain:2003:icassp,
+  author =       "Jean-Luc Gauvain and L. Lamel and Holger Schwenk and
+                 G. Adda and L. Chen and F.\ Lef\`evre",
+  booktitle =    icassp,
+  title =        "Conversational Telephone Speech Recognition",
+  volume =       "1",
+  pages =        "212--215",
+  year =         "2003",
+}
+
+@InProceedings{Gaynier93,
+  author =       "R. J. Gaynier and T. Downs",
+  booktitle =    "IEEE International Conference on Neural Networks",
+  title =        "A Method of Training Multi-layer Networks with
+                 Heaviside Characteristics Using Internal
+                 Representations",
+  address =      "San Francisco, CA",
+  pages =        "1812--1817",
+  year =         "1993",
+}
+
+@InProceedings{GehlerP2006,
+  author =       "Peter V. Gehler and Alex D. Holub and Max Welling",
+  booktitle =    ICML06,
+  editor =       ICML06ed,
+  publisher =    ICML06publ,
+  title =        "The rate adapting poisson model for information
+                 retrieval and object recognition",
+  address =      "New York, NY, USA",
+  pages =        "337--344",
+  year =         "2006",
+  ISBN =         "1-59593-383-2",
+  doi =          "http://doi.acm.org/10.1145/1143844.1143887",
+  location =     "Pittsburgh, Pennsylvania",
+}
+
+@Article{Geman84,
+  author =       {Geman, Stuart and Geman, Donald},
+  title =        "Stochastic Relaxation, Gibbs Distributions, and the
+                 {Bayesian} Restoration of Images",
+  doi =          {10.1080/02664769300000058},
+  journal =      ieeetpami,
+  volume =       "6",
+  keywords =     {annealing, mrf, simulated},
+  month =        {November},
+  pages =        {721--741},
+  url =          {http://dx.doi.org/10.1080/02664769300000058},
+  year =         "1984",
+}
+
+@Article{Geman92,
+  author =       "S. Geman and E. Bienenstock and R. Doursat",
+  title =        "Neural Networks and the Bias/Variance Dilemma",
+  journal =      nc,
+  volume =       "4",
+  number =       "1",
+  pages =        "1--58",
+  year =         "1992",
+}
+
+@Article{Genest-Zideck-86,
+  author =       "C. Genest and J. V. Zideck",
+  title =        "Combining probability distributions: {A} critique and
+                 an annotated bibliography",
+  journal =      "Statistical Science",
+  volume =       "1",
+  pages =        "114--148",
+  year =         "1986",
+}
+
+@article{Geng+al-2005,
+    author    = {Xin Geng and De-Chuan Zhan and Zhi-Hua Zhou},
+    title     = {Supervised nonlinear dimensionality reduction for visualization and classification},
+    journal   = {IEEE Transactions on Systems, Man, and Cybernetics, Part B},
+    volume    = {35},
+    number    = {6},
+    year      = {2005},
+    pages     = {1098-1107},
+    ee        = {http://dx.doi.org/10.1109/TSMCB.2005.850151},
+    bibsource = {DBLP, http://dblp.uni-trier.de}
+}
+
+@Article{Geszti87,
+  author =       "T. Geszti and F. P\'azm\'andi",
+  title =        "Learning Within Bounds and Dream Sleep",
+  journal =      jpa,
+  volume =       "20",
+  pages =        "L1299--L1303",
+  year =         "1987",
+}
+
+@Book{Geszti90,
+  author =       "T. Geszti",
+  title =        "Physical Models of Neural Networks",
+  publisher =    "World Scientific",
+  address =      "Singapore",
+  year =         "1990",
+}
+
+@Article{Geweke1989,
+  author =       "J. Geweke",
+  title =        "Bayesian inference in econometric models using Monte
+                 carlo integration",
+  journal =      "Econometrica",
+  volume =       "57",
+  pages =        "1317--1339",
+  year =         "1989",
+}
+
+@InCollection{Gha94,
+  author =       "Z. Ghahramani",
+  booktitle =    "Proceedings of the 1993 Connectionist Models Summer
+                 School",
+  title =        "Solving inverse problems using an {EM} approach to
+                 density estimation",
+  publisher =    "Erlbaum",
+  address =      "Hillsdale, NJ",
+  year =         "1994",
+}
+
+@InProceedings{ghabea00,
+  author =       "Z. Ghahramani and M. J. Beal",
+  editor =       NIPS12ed,
+  booktitle =    NIPS12,
+  title =        "Variational inference for {Bayesian} mixtures of
+                 factor analysers",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  year =         "2000",
+  URL =          "citeseer.nj.nec.com/article/ghahramani00variational.html",
+}
+
+@TechReport{ghahramani96em,
+  author =       "Z. Ghahramani and G. E. Hinton",
+  title =        "The {EM} Algorithm for Mixtures of Factor Analyzers",
+  number =       "CRG-TR-96-1",
+  institution =  "Dpt. of Comp. Sci., Univ. of Toronto",
+  month =        jan,
+  year =         "1996",
+  URL =          "citeseer.nj.nec.com/ghahramani97em.html",
+}
+
+@TechReport{GhaJor93,
+  author =       "Z. Ghahramani and M. I. Jordan",
+  title =        "Function approximation via density estimation",
+  type =         "Computational Cognitive Science",
+  number =       "TR 9304",
+  institution =  "MIT",
+  address =      "Cambridge, MA",
+  year =         "1993",
+}
+
+@InProceedings{Gherrity89,
+  author =       "M. Gherrity",
+  booktitle =    ijcnn,
+  title =        "A Learning Algorithm for Analog, Fully Recurrent
+                 Neural Networks,",
+  publisher =    "IEEE Press",
+  address =      "Washington D.C.",
+  pages =        "643--644",
+  month =        jun,
+  year =         "1989",
+}
+
+@Article{Ghosh+Hwang-1989,
+  author =       "J. Ghosh and K. Hwang",
+  title =        "Mapping Neural Networks onto Message-Passing
+                 Multicomputers",
+  journal =      "Journal of Parallel and Distributed Computing",
+  volume =       "6",
+  number =       "2",
+  publisher =    "Academic Press",
+  pages =        "291--330",
+  year =         "1989",
+}
+
+@Article{Ghosn2003,
+  author =       "J. Ghosn and Y. Bengio",
+  title =        "Bias Learning, Knowledge Sharing",
+  journal =      "{IEEE} Transactions on Neural Networks",
+  volume =       "14",
+  pages =        "748--765",
+  month =        jul,
+  year =         "2003",
+  issue =        "4",
+}
+
+@TechReport{Ghysel93,
+  author =       "E. Ghysel",
+  title =        "A time series model with periodic stochastic regime
+                 switching",
+  number =       "C.R.D.E. Discussion paper 1093",
+  institution =  "C.R.D.E., Universite de Montreal",
+  address =      "Montreal, Quebec, Canada",
+  year =         "1993",
+}
+
+@book{Giarratano+Riley-2004,
+    author = {Giarratano, Joseph  C.  and Riley, Gary  D. },
+    howpublished = {Hardcover},
+    isbn = {0534384471},
+    month = {October},
+    posted-at = {2008-05-19 22:17:30},
+    priority = {2},
+    publisher = {{Course Technology}},
+    edition = {Fourth},
+    title = {Expert Systems: Principles and Programming},
+    url = {http://www.amazon.ca/exec/obidos/redirect?tag=citeulike09-20\&path=ASIN/0534384471},
+    year = {2004}
+}
+
+
+@Article{Giles86,
+  author =       "Y. C. Lee and G. Doolen and H. H. Chen and G. Z. Sun
+                 and T. Maxwell and H. Y. Lee and C. L. Giles",
+  title =        "Machine Learning Using a Higher Order Correlation
+                 Network",
+  journal =      "Physica D",
+  volume =       "2",
+  number =       "1-3",
+  pages =        "276",
+  year =         "1986",
+}
+
+@article{giles:1987, 
+    author = {C. Lee Giles and Tom Maxwell}, 
+    journal = {Applied Optics}, 
+    keywords = {},
+    number = {23}, 
+    pages = {4972}, 
+    publisher = {OSA},
+    title = {Learning, Invariance, and Generalization in High-Order Neural Networks}, 
+    volume = {26}, 
+    year = {1987},
+    url = {http://ao.osa.org/abstract.cfm?URI=ao-26-23-4972},
+}
+
+@InProceedings{Giles90,
+  author =       "C. L. Giles and G. Z. Sun and H. H. Chen and Y. C. Lee
+                 and D. Chen",
+  editor =       NIPS2ed,
+  booktitle =    NIPS2,
+  title =        "Higher Order Recurrent Networks \& Grammatical
+                 Inference",
+  publisher =    "Morgan Kaufmann Publishers",
+  address =      "San Mateo, CA",
+  pages =        "380--387",
+  year =         "1990",
+}
+
+@InProceedings{Giles-nnsp92,
+  author =       "C. L. Giles and C. W Omlin",
+  editor =       "Kung and Fallside and Sorenson and Kamm",
+  booktitle =    "Neural Networks for Signal Processing II, Proceedings
+                 of the 1992 IEEE workshop",
+  title =        "Inserting Rules into Recurrent Neural Networks",
+  publisher =    "IEEE Press",
+  pages =        "13--22",
+  year =         "1992",
+}
+
+@Article{Giles94,
+  author =       "C. L. Giles and C. W. Omlin",
+  title =        "Extraction, Insertion and Refinement of Symbolic Rules
+                 in Dynamically-Driven Recurrent Neural Networks",
+  journal =      "Connection Science",
+  pages =        "",
+  year =         "1994",
+}
+
+@Article{Giles-nc92,
+  author =       "C. L. Giles and C. B. Miller and D. Chen and G. Z. Sun
+                 and H. H. Chen and Y. C. Lee",
+  title =        "Learning and Extracting Finite State Automata with
+                 Second-Order Recurrent Neural Networks",
+  journal =      nc,
+  volume =       "4",
+  number =       "3",
+  pages =        "393--405",
+  year =         "1992",
+}
+
+@Book{Gill81,
+  author =       "P. E. Gill and W. Murray and M. H. Wright",
+  title =        "Practical Optimization",
+  publisher =    "Academic Press",
+  year =         "1981",
+}
+
+@InProceedings{Gillman+Sipser94,
+  author =       "David Gillman and Michael Sipser",
+  booktitle =    colt94,
+  title =        "Inference and minimization of hidden Marko chains",
+  publisher =    "ACM",
+  pages =        "147--158",
+  year =         "1994",
+}
+
+@Book{Gilmore-74,
+  author =       "R. Gilmore",
+  title =        "{Lie} groups, {Lie} algebras and some of their
+                 applications",
+  publisher =    "Wiley",
+  address =      "New-York",
+  year =         "1974",
+}
+
+@InProceedings{Gingras-Bengio-Nadeau-2000,
+  author =       "F. Gingras and Y. Bengio and C. Nadeau",
+  editor =       "",
+  booktitle =    "Computational Finance 2000",
+  title =        "On Out-of-Sample Statistics for Time-Series",
+  publisher =    "",
+  location =     "London, U.K.",
+  pages =        "",
+  year =         "2000",
+}
+
+@InProceedings{chapados+bengio-2000,
+  author =       "N. Chapados and Y. Bengio",
+  editor =       "",
+  booktitle =    "Computational Finance 2000",
+  title =        "{VaR}-based Asset Allocation using Neural Networks",
+  publisher =    "",
+  pages =        "",
+  year =         "2000",
+}
+
+@InProceedings{Pigeon+Bengio-99,
+  author =       "S. Pigeon and Y. Bengio",
+  editor =       "",
+  booktitle =    "Proceedings of the Data Compression Conference, DCC'1999",
+  title =        "Binary Pseudowavelets and Application to Bilevel Image Processing",
+  publisher =    "",
+  pages =        "",
+  year =         "1999",
+}
+
+@InProceedings{Girard+Paugam-Moisy-1994,
+  author =       "D. Girard and H\'{e}l\`{e}ne Paugam-Moisy",
+  booktitle =    "Proceedings of the {IFIP} {WG10.3} Working Conference
+                 on Applications in Parallel and Distributed Computing",
+  title =        "Strategies of Weight Updating for Parallel
+                 Back-propagation",
+  publisher =    "North-Holland Publishing Co.",
+  address =      "Amsterdam, The Netherlands",
+  pages =        "335--336",
+  year =         "1994",
+  ISBN =         "0-444-81870-7",
+}
+
+@InProceedings{Girju+al-2003,
+  author =       "Roxana Girju and Adriana Badulescu and Dan Moldovan",
+  booktitle =    "NAACL '03: Proceedings of the 2003 Conference of the
+                 North American Chapter of the Association for
+                 Computational Linguistics on Human Language
+                 Technology",
+  title =        "Learning semantic constraints for the automatic
+                 discovery of part-whole relations",
+  publisher =    "Association for Computational Linguistics",
+  address =      "Morristown, NJ, USA",
+  pages =        "1--8",
+  year =         "2003",
+  location =     "Edmonton, Canada",
+}
+
+@Article{Girolami-2001,
+  author =       "M. Girolami",
+  title =        "Orthogonal series density estimation and the kernel
+                 eigenvalue problem",
+  journal =      "Neural Computation",
+  volume =       "14",
+  number =       "3",
+  pages =        "669--688",
+  year =         "2001",
+}
+
+@Misc{girosi97an,
+  author =       "F. Girosi",
+  title =        "An equivalence between sparse approximation and
+                 Support Vector Machines",
+  year =         "1997",
+  text =         "F. Girosi. An equivalence between sparse approximation
+                 and Support Vector Machines. A.I. Memo 1606, MIT
+                 Artificial Intelligence Laboratory, 1997. (available at
+                 the URL:
+                 http://www.ai.mit.edu/people/girosi/svm.html).",
+}
+
+@Article{Glauber63,
+  author =       "R. J. Glauber",
+  title =        "Time-Dependent Statistics of the Ising Model",
+  journal =      jmp,
+  volume =       "4",
+  pages =        "294--307",
+  year =         "1963",
+}
+
+@Book{GLM-book-89,
+  author =       "P. McCullagh and J. Nelder",
+  title =        "Generalized Linear Models",
+  publisher =    "Chapman and Hall",
+  address =      "London",
+  year =         "1989",
+}
+
+@InCollection{GlobersonA2006,
+  author =       "Amir Globerson and Sam Roweis",
+  editor =       NIPS18ed,
+  booktitle =    NIPS18,
+  title =        "Metric Learning by Collapsing Classes",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  pages =        "451--458",
+  year =         "2006",
+}
+
+@Book{Gluck90,
+  author =       "M. A. Gluck and D. E. Rumelhart",
+  title =        "Neuroscience and connectionist theory",
+  publisher =    "Lawrence Erlbaum, London",
+  year =         "1990",
+}
+
+@Article{Godin89,
+  author =       "C. Godin and P. Lockwood",
+  title =        "{DTW} Schemes for Continuous Speech Recognition: {A}
+                 Unified view",
+  journal =      cspla,
+  volume =       "3",
+  pages =        "169--198",
+  year =         "1989",
+}
+
+@book{Gold+Morgan-1999,
+    author = {Gold, Ben and Morgan, Nelson},
+    howpublished = {Hardcover},
+    isbn = {0471351547},
+    month = {July},
+    publisher = {Wiley},
+    title = {Speech and Audio Signal Processing: Processing and Perception of Speech and Music},
+    year = {1999}
+}
+
+@Book{Goldberg89,
+  author =       "D. E. Goldberg",
+  title =        "Genetic Algorithms in Search, Optimization, and
+                 Machine Learning",
+  publisher =    "Addison-Wesley",
+  address =      "Reading",
+  year =         "1989",
+}
+
+@Article{Goldfeld73,
+  author =       "S. M. Goldfeld and R. M. Quandt",
+  title =        "A Markov model for switching regressions",
+  journal =      "Journal of Econometrics",
+  volume =       "1",
+  pages =        "3--16",
+  year =         "1973",
+}
+
+@TechReport{Goldhor85,
+  author =       "R. S. Goldhor",
+  title =        "Representation of consonants in the peripheral
+                 auditory system: {A} modeling study of the
+                 correspondance between response properties and phonetic
+                 features",
+  number =       "505",
+  institution =  "RLE.",
+  publisher =    "MIT Press, Cambridge, MA",
+  year =         "1985",
+}
+
+@Article{Golomb90,
+  author =       "D. Golomb and N. Rubin and H. Sompolinsky",
+  title =        "Willshaw Model: Associative Memory with Sparse Coding
+                 and Low Firing Rates",
+  journal =      prA,
+  volume =       "41",
+  pages =        "1843--1854",
+  year =         "1990",
+}
+
+@Book{Golub+VanLoan-1996,
+  author =       "Gene H. Golub and Charles F. Van Loan",
+  title =        "Matrix Computations",
+  howpublished = "Paperback",
+  publisher =    "{The Johns Hopkins University Press}",
+  month =        oct,
+  year =         "1996",
+  ISBN =         "0-8018-5414-8",
+}
+
+@TechReport{Goodman-LM-2001,
+  author =       "Joshua Goodman",
+  title =        "A Bit of Progress in Language Modeling",
+  number =       "MSR-TR-2001-72",
+  institution =  "Microsoft Research",
+  address =      "Redmond, Washington",
+  year =         "2001",
+}
+
+@InProceedings{Goodman2001,
+  author =       "J. Goodman",
+  booktitle =    icassp,
+  title =        "Classes for Fast Maximum Entropy Training",
+  address =      "Utah",
+  year =         "2001",
+}
+
+@InProceedings{Gori-ijcnn89,
+  author =       "M. Gori and Y. Bengio and R. \mbox{De Mori}",
+  booktitle =    ijcnn,
+  title =        "{BPS}: {A} Learning Algorithm for Capturing the
+                 Dynamical Nature of Speech",
+  publisher =    "IEEE, New York",
+  address =      "Washington D.C.",
+  pages =        "643--644",
+  year =         "1989",
+}
+
+@InProceedings{Gori-nimes89,
+  author =       "M Gori",
+  booktitle =    "Proceedings of Neuro-Nimes",
+  title =        "An Extension of {BPS}",
+  address =      "Nimes (France)",
+  pages =        "83--93",
+  year =         "1989",
+}
+
+@Article{Gori-pami91,
+  author =       "M. Gori and A. Tesi",
+  title =        "On the problem of local minima in Backpropagation",
+  journal =      ieeetpami,
+  volume =       "PAMI-14",
+  number =       "1",
+  pages =        "76--86",
+  year =         "1992",
+}
+
+@TechReport{Gori-tr94,
+  author =       "M. Gori and M. Maggini and G. Soda",
+  title =        "Insertion of Finite State Automata into Recurrent
+                 Radial Basis Function Networks",
+  number =       "DSI-17/93",
+  institution =  "Universit\`a di Firenze (Italy)",
+  year =         "1993",
+  note =         "(submitted)",
+  OPTannote =    "",
+}
+
+@InProceedings{GoriNimes,
+  author =       "M. Gori",
+  booktitle =    "Proceedings of Neuro-Nimes",
+  title =        "An Extension of {BPS}",
+  address =      "Nimes (France)",
+  pages =        "83--93",
+  month =        nov,
+  year =         "1989",
+}
+
+@Article{Gorman88a,
+  author =       "R. P. Gorman and T. J. Sejnowski",
+  title =        "Analysis of Hidden Units in a Layered Network Trained
+                 to Classify Sonar Targets",
+  journal =      nn,
+  volume =       "1",
+  pages =        "75--89",
+  year =         "1988",
+}
+
+@Article{Gorman88b,
+  author =       "R. P. Gorman and T. J. Sejnowski",
+  title =        "Learned Classification of Sonar Targets Using a
+                 Massively-Parallel Network",
+  journal =      ieeetassp,
+  volume =       "36",
+  pages =        "1135--1140",
+  year =         "1988",
+}
+
+@Unpublished{Gorse94,
+  author =       "D. Gorse and J. G. Taylor and T. G. Clarkson",
+  title =        "A pulse-based reinforcement algorithm for learning
+                 continuous functions",
+  year =         "1994",
+  note =         "Submitted to WCNN '94 San Diego",
+}
+
+@Article{Goudreau-trnn93,
+  author =       "M. W. Goudreau and C. L. Giles and S. T. Chakradhar
+                 and D. Chen",
+  title =        "First-order vs. second-order single layer recurrent
+                 neural networks",
+  journal =      ieeetrnn,
+  year =         "1993",
+  note =         "(in press)",
+}
+
+@Article{Goudreau93tb,
+  author =       "M. W. Goudreau and C. L. Giles and S. T. Chakradhar
+                 and D. Chen",
+  title =        "First-Order Vs. Second-Order Single Layer Recurrent
+                 Neural Networks",
+  journal =      "IEEE Transactions on Neural Networks",
+  year =         "1993",
+}
+
+@inproceedings{Gould+al:NIPS09,
+  author = {S. Gould and T. Gao and D. Koller},
+  title = {Region-based Segmentation and Object Detection},
+  booktitle =    "Advances in Neural Information Processing Systems (NIPS 2009)",
+  year = 2009,
+}
+
+@Article{goutte97,
+  author =       "C. Goutte",
+  title =        "Note on free lunches and cross-validation",
+  journal =      "Neural Computation",
+  volume =       "9",
+  number =       "6",
+  pages =        "1053--1059",
+  year =         "1997",
+}
+
+@Article{Gower-68,
+  author =       "J. C. Gower",
+  title =        "Adding a point to vector diagrams in multivariate
+                 analysis",
+  journal =      "Biometrika",
+  volume =       "55",
+  number =       "3",
+  pages =        "582--585",
+  year =         "1968",
+}
+
+@InProceedings{Graepel2000,
+  author =       "Thore Graepel and Ralf Herbrich and John
+                 Shawe-Taylor",
+  booktitle =    "Thirteenth Annual Conference on Computational Learning
+                 Theory, 2000",
+  title =        "Generalization error bounds for sparse linear
+                 classifiers",
+  publisher =    "Morgan Kaufmann",
+  year =         "2000",
+  note =         "in press",
+}
+
+@InProceedings{Graepel99,
+  author =       "T. Graepel and R. Herbrich and P. Bollmann-Sdorra and
+                 K. Obermayer",
+  editor =       NIPS12ed,
+  booktitle =    NIPS12,
+  title =        "Classification on Pairwise Proximity Data",
+  year =         "1999",
+}
+
+@InProceedings{graf-90a,
+  author =       "H. P. Graf and D. Henderson",
+  booktitle =    "ISSCC Digest",
+  title =        "A Reconfigurable {CMOS} Neural Network",
+  organization = "ISSCC",
+  year =         "1990",
+}
+
+@InProceedings{Graf86,
+  author =       "H. P. Graf and L. D. Jackel and R. E. Howard and B.
+                 Straughn and J. S. Denker and W. Hubbard and D. M.
+                 Tennant and D. Schwartz",
+  editor =       "J. S. Denker",
+  booktitle =    snowbird,
+  title =        "{VLSI} Implementation of a Neural Network Memory with
+                 Several Hundreds of Neurons",
+  publisher =    "American Institute of Physics, New York",
+  address =      "Snowbird 1986",
+  pages =        "182--187",
+  year =         "1986",
+}
+
+@InProceedings{Graf88,
+  author =       "D. H. Graf and W. R. LaLonde",
+  booktitle =    icnn,
+  title =        "A Neural Controller for Collision-Free Movement of
+                 General Robot Manipulators",
+  volume =       "1",
+  publisher =    "IEEE, New York",
+  address =      "San Diego 1988",
+  pages =        "77--84",
+  year =         "1988",
+}
+
+@InProceedings{Graf92,
+  author =       "H. P. Graf and C. R. Nohl and J. Ben",
+  editor =       NIPS4ed,
+  booktitle =    NIPS4,
+  title =        "Image segmentation with networks of variable scales",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo CA",
+  pages =        "480--487",
+  year =         "1992",
+}
+
+@InProceedings{Grandvalet98a,
+  author =       "Y. Grandvalet",
+  editor =       "L. Niklasson and M. Boden and T. Ziemske",
+  booktitle =    "ICANN'98",
+  title =        "Least absolute shrinkage is equivalent to quadratic
+                 penalization",
+  volume =       "1",
+  publisher =    "Springer",
+  pages =        "201--206",
+  year =         "1998",
+  series =       "Perspectives in Neural Computing",
+}
+
+@InProceedings{Grandvalet98a-short,
+  author =       "Y. Grandvalet",
+  booktitle =    "ICANN'98",
+  title =        "Least absolute shrinkage is equivalent to quadratic
+                 penalization",
+  pages =        "201--206",
+  year =         "1998",
+}
+
+@InProceedings{GrandvaletY2005,
+  author =       "Yves Grandvalet and Yoshua Bengio",
+  editor =       NIPS17ed,
+  booktitle =    NIPS17,
+  title =        "{Semi-supervised Learning by Entropy
+                 Minimization}",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  month =        dec,
+  year =         "2005",
+}
+%deprecate this version as we need to put the date of publication not the date of the conference. use GrandvaletY2005 instead.
+@InProceedings{GrandvaletY2004,
+  author =       "Yves Grandvalet and Yoshua Bengio",
+  editor =       NIPS17ed,
+  booktitle =    NIPS17,
+  title =        "{Semi-supervised Learning by Entropy
+                 Minimization}",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  month =        dec,
+  year =         "2005",
+}
+
+@INCOLLECTION {GrandvaletY2006,
+title = {Entropy Regularization},
+author = {Grandvalet, Yves and Bengio, Yoshua},
+editor = {Chapelle, Olivier and {Sch\"{o}lkopf}, Bernhard and Zien, Alexander},
+booktitle = {Semi-Supervised Learning},
+year = {2006},
+pages = {151--168},
+publisher = {{MIT} Press},
+}
+
+@Article{GrangerNewbold76,
+  author =       "C. W. J. Granger and P. Newbold",
+  title =        "Forecasting transformed series",
+  journal =      "J. Roy. Statist. Soc. B",
+  volume =       "38",
+  pages =        "189--203",
+  year =         "1976",
+}
+
+@InProceedings{Gray-Moore-2003,
+  author =       "Alexander Gray and Andrew Moore",
+  booktitle =    "Artificial Iintelligence and Statistics",
+  title =        "Rapid Evaluation of Multiple Density Models",
+  year =         "2003",
+}
+
+@Article{Gray84,
+  author =       "R. M. Gray",
+  title =        "Vector Quantization",
+  journal =      ieeeassp,
+  pages =        "4--29",
+  month =        apr,
+  year =         "1984",
+}
+
+@Article{Greenwood+Durand60,
+  author =       "T. A. Greenwood and D. Durand",
+  title =        "",
+  journal =      "Technometrics",
+  volume =       "2",
+  pages =        "55--56",
+  year =         "1960",
+}
+
+@InProceedings{GregoryD2007,
+  author =       "Gregory Druck and Chris Pal and Andrew Mccallum and
+                 Xiaojin Zhu",
+  booktitle =    "KDD '07: Proceedings of the 13th ACM SIGKDD
+                 international conference on Knowledge discovery and
+                 data mining",
+  title =        "Semi-supervised classification with hybrid
+                 generative/discriminative methods",
+  publisher =    "ACM",
+  address =      "New York, NY, USA",
+  pages =        "280--289",
+  year =         "2007",
+  OPTciteulike-article-id = "2304687",
+  OPTdoi =       "10.1145/1281192.1281225",
+  OPTisbn =      "9781595936097",
+  OPTkeywords =  "classification",
+  OPTpriority =  "2",
+}
+  %url =       "http://portal.acm.org/citation.cfm?id=1281192.1281225",
+
+@Article{Gribskov87,
+  author =       "M. Gribskov and M. McLachlan and D. Eisenber",
+  title =        "Profile analysis: detection of distantly related
+                 proteins",
+  journal =      PNAS,
+  volume =       "84",
+  pages =        "4355--4358",
+  year =         "1987",
+}
+
+@TechReport{Griffin-Holub-Perona-07,
+  author =       "Gregory Griffin and Alex Holub and Pietro Perona",
+  title =        "Caltech-256 Object Category Dataset",
+  number =       "Technical Report 7694",
+  institution =  "California Institute of Technology",
+  year =         "2007",
+}
+
+@Article{grigoriev95,
+  author =       "Dima Grigoriev and Marek Karpinski and Andrew Chi-Chih
+                 Yao",
+  title =        "An Exponential Lower Bound on the Size of Algebraic
+                 Decision Trees for {MAX}",
+  journal =      "Electronic Colloquium on Computational Complexity
+                 (ECCC)",
+  volume =       "2",
+  number =       "057",
+  year =         "1995",
+}
+
+@Article{Grimes-Rao-2005,
+  author =       "D. B. Grimes and R. P. N. Rao",
+  title =        "Bilinear Sparse Coding for Invariant Vision",
+  journal =      "Neural Computation",
+  volume =       "17",
+  number =       "1",
+  pages =        "47--73",
+  year =         "2005",
+}
+
+@Article{Grossberg67,
+  author =       "S. Grossberg",
+  title =        "Nonlinear Difference-Differential Equations in
+                 Prediction and Learning Theory",
+  journal =      PNAS,
+  volume =       "58",
+  pages =        "1329--1334",
+  year =         "1967",
+}
+
+@Article{Grossberg68a,
+  author =       "S. Grossberg",
+  title =        "Some Nonlinear Networks Capable of Learning a Spatial
+                 Pattern of Arbitrary Complexity",
+  journal =      PNAS,
+  volume =       "59",
+  pages =        "368--372",
+  year =         "1968",
+}
+
+@Article{Grossberg68b,
+  author =       "S. Grossberg",
+  title =        "Some Physiological and Biochemical Consequences of
+                 Psychological Postulates",
+  journal =      PNAS,
+  volume =       "60",
+  pages =        "758--765",
+  year =         "1968",
+}
+
+@Article{Grossberg69,
+  author =       "S. Grossberg",
+  title =        "Embedding Fields: {A} Theory of Learning with
+                 Physiological Implications",
+  journal =      jmpsych,
+  volume =       "6",
+  pages =        "209--239",
+  year =         "1969",
+}
+
+@Article{Grossberg72,
+  author =       "S. Grossberg",
+  title =        "Neural Expectation: Cerebellar and Retinal Analogs of
+                 Cells Fired by Learnable or Unlearned Pattern Classes",
+  journal =      kyb,
+  volume =       "10",
+  pages =        "49--57",
+  year =         "1972",
+}
+
+@Article{Grossberg76a,
+  author =       "S. Grossberg",
+  title =        "Adaptive Pattern Classification and Universal
+                 Recoding: {I}. Parallel Development and Coding of
+                 Neural Feature Detectors",
+  journal =      biocyb,
+  volume =       "23",
+  year =         "1976",
+}
+
+@Article{Grossberg76b,
+  author =       "S. Grossberg",
+  title =        "Adaptive Pattern Classification and Universal
+                 Recoding: {II}. Feedback, Expectation, Olfaction,
+                 Illusions",
+  journal =      biocyb,
+  volume =       "23",
+  pages =        "187--202",
+  year =         "1976",
+}
+
+@Article{Grossberg80,
+  author =       "S. Grossberg",
+  title =        "How Does the Brain Build a Cognitive Code?",
+  journal =      psyrev,
+  volume =       "87",
+  year =         "1980",
+}
+
+@Book{Grossberg87a,
+  author =       "S. Grossberg",
+  title =        "The Adaptive Brain",
+  volume =       "1--2",
+  publisher =    "Elsevier",
+  address =      "Amsterdam",
+  year =         "1987",
+}
+
+@Article{Grossberg87b,
+  author =       "S. Grossberg",
+  title =        "Competitive Learning: From Interactive Activation to
+                 Adaptive Resonance",
+  journal =      cogsci,
+  volume =       "11",
+  pages =        "23--63",
+  year =         "1987",
+}
+
+@inproceedings{Grosse-2007,
+ author = {Roger Grosse and Rajat Raina and Helen Kwong and Andrew Y. Ng},
+ title = {Shift-Invariant Sparse Coding for Audio Classification}, 
+ booktitle = UAI07,
+ year = 2007,
+}
+
+@InProceedings{Grossman-nips89,
+  author =       "T. Grossman R. Meir and E. Domany",
+  editor =       NIPS1ed,
+  booktitle =    NIPS1,
+  title =        "Learning by choice of internal representation",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo, CA",
+  pages =        "73--80",
+  year =         "1989",
+}
+
+@Article{Grossman89,
+  author =       "T. Grossman and R. Meir and E. Domany",
+  title =        "Learning by Choice of Internal Representations",
+  journal =      cs,
+  volume =       "2",
+  pages =        "555--575",
+  year =         "1989",
+}
+
+@InProceedings{Grossman90,
+  author =       "T. Grossman",
+  editor =       NIPS2ed,
+  booktitle =    NIPS2,
+  title =        "The {CHIR} Algorithm for Feed Forward Networks with
+                 Binary Weights",
+  publisher =    "Morgan Kaufmann, San Mateo",
+  address =      "Denver, CO",
+  pages =        "516--523",
+  year =         "1990",
+}
+
+@Article{Guillery2005,
+  author =       "R. W. Guillery",
+  title =        "Is postnatal neocortical maturation hierarchical?",
+  journal =      "Trends in Neuroscience",
+  volume =       "28",
+  number =       "10",
+  pages =        "512--517",
+  month =        oct,
+  year =         "2005",
+}
+
+@InCollection{Gull88,
+  author =       "S. F. Gull",
+  editor =       "G. Erickson and C. Smith",
+  booktitle =    "Maximum Entropy and {Bayesian} Methods in Science and
+                 Engineering",
+  title =        "{Bayesian} inductive inference and maximum entropy",
+  volume =       "1",
+  publisher =    "Kluwer",
+  address =      "Dordrecht",
+  pages =        "53--74",
+  year =         "1988",
+}
+
+@Article{gullapalli:nn:1990,
+  author =       "V. Gullapalli",
+  title =        "A Stochastic Reinforcement Learning Algorithm for
+                 Learning Real-Valued Functions",
+  journal =      nn,
+  volume =       "3",
+  pages =        "671--692",
+  year =         "1990",
+}
+
+@Article{Gunn+Kandola01,
+  author =       "S. R. Gunn and J. Kandola",
+  title =        "Structural Modelling with Sparse Kernels",
+  journal =      "Machine Learning",
+  volume =       "special issue on New Methods for Model Combination and
+                 Model Selection",
+  year =         "2001",
+  note =         "to appear",
+}
+
+@inproceedings{Guo+Schuurmans-2007,
+author = "Guo, Y. and Schuurmans, D.",
+title = "Convex relaxations of latent variable training",
+editor =    NIPS20ed,
+booktitle = NIPS20,
+year = 2007,
+}
+
+@inproceedings{guoschuurmans07b,
+author = "Guo, Y. and Schuurmans, D.",
+title = "Discriminative batch mode active learning",
+editor =    NIPS20ed,
+booktitle = NIPS20,
+year = 2007,
+}
+
+@inproceedings{Guo+Schuurmans-2008,
+author = "Guo, Y. and Schuurmans, D.",
+title = "Efficient global optimization for exponential family {PCA} and 
+low-rank matrix factorization",
+booktitle = "Proceedings of the Forty-sixth Annual Allerton Conference on
+Communication, Control, and Computing (Allerton)",
+year = 2008,
+}
+
+@Article{Gutfreund88a,
+  author =       "H. Gutfreund",
+  title =        "Neural Networks with Hierarchically Correlated
+                 Patterns",
+  journal =      prA,
+  volume =       "37",
+  pages =        "570--577",
+  year =         "1988",
+}
+
+@Article{Gutfreund88b,
+  author =       "H. Gutfreund and M. M\'ezard",
+  title =        "Processing of Temporal Sequences in Neural Networks",
+  journal =      prl,
+  volume =       "61",
+  pages =        "235--238",
+  year =         "1988",
+}
+
+@InProceedings{Gutzmann87,
+  author =       "K. Gutzmann",
+  editor =       "M. Caudill and C. Butler",
+  booktitle =    icnn,
+  title =        "Combinatorial Optimization Using a Continuous State
+                 {Boltzmann} Machine",
+  volume =       "3",
+  publisher =    "IEEE, New York",
+  address =      "San Diego 1987",
+  pages =        "721--734",
+  year =         "1987",
+}
+
+@Article{guyon-91,
+  author =       "I. Guyon and P. Albrecht and Y. {Le Cun} and J. S.
+                 Denker and W. Hubbard",
+  title =        "design of a neural network character recognizer for a
+                 touch termin al",
+  journal =      "Pattern Recognition",
+  volume =       "24",
+  number =       "2",
+  pages =        "105--119",
+  year =         "1991",
+}
+
+@InProceedings{Guyon92,
+  author =       "I. Guyon and V. Vapnik and B. Boser and L. Bottou and
+                 S. A. Solla",
+  editor =       NIPS4ed,
+  booktitle =    NIPS4,
+  title =        "Structural Risk Minimization for Character
+                 Recognition",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo CA",
+  pages =        "471--479",
+  year =         "1992",
+}
+
+@InCollection{Guyon92b,
+  author =       "I. Guyon",
+  editor =       "S. Impedovo",
+  booktitle =    "From Pixels to Features III",
+  title =        "Writer independent and writer adaptive neural network
+                 for on-line character recognition",
+  publisher =    "Elsevier",
+  address =      "Amsterdam",
+  pages =        "493--506",
+  year =         "1992",
+}
+
+@InProceedings{Guyon93,
+  author =       "I. Guyon and B. Boser and V. Vapnik",
+  editor =       NIPS5ed,
+  booktitle =    NIPS5,
+  title =        "Automatic Capacity Tuning of Very Large {VC}-dimension
+                 Classifiers",
+  publisher =    "Morgan Kaufmann",
+  address =      "Denver, CO",
+  pages =        "147--155",
+  year =         "1993",
+}
+
+@InProceedings{Guyon95,
+  author =       "I. Guyon and F. Pereira",
+  booktitle =    ICDAR95,
+  title =        "Design of a linguistic postprocessor using variable
+                 memory length {Markov} models",
+  publisher =    "IEEE Computer Society Press",
+  address =      "Montreal, Canada",
+  pages =        "454--457",
+  month =        aug,
+  year =         "1995",
+}
+
+@InCollection{Guyon96,
+  author =       "I. Guyon and M. Schenkel and J. Denker",
+  editor =       "P. S. P. Wang and H. Bunke",
+  booktitle =    "Handbook on Optical Character Recognition and Document
+                 Image Analysis",
+  title =        "Overview and synthesis of on-line cursive handwriting
+                 recognition techniques",
+  publisher =    "World Scientific",
+  year =         "1996",
+}
+
+@article{Guyon+Elisseeff-2003,
+    address = {Cambridge, MA},
+    author = {Guyon, Isabelle   and Elisseeff, Andre},
+    issn = {1533-7928},
+    journal = jmlr,
+    pages = {1157--1182},
+    publisher = {MIT Press},
+    title = {An introduction to variable and feature selection},
+    volume = {3},
+    year = {2003}
+}
+    %url = {http://portal.acm.org/citation.cfm?id=944968},
+
+@book{Guyon+al-2006,
+        editor = "Isabelle Guyon and Steve Gunn and Masoud Nikravesh and Lofti Zadeh",
+        title =    "Feature Extraction, Foundations and Applications",
+        publisher =    "Springer",
+        year =         "2006",
+}
+
+
+@Article{Gyorgyi90a,
+  author =       "G. Gy{\"o}rgyi",
+  title =        "Inference of a Rule by a Neural Network with Thermal
+                 Noise",
+  journal =      prl,
+  volume =       "64",
+  pages =        "2957--2960",
+  year =         "1990",
+}
+
+@InCollection{Gyorgyi90b,
+  author =       "G. Gyorgyi and N. Tishby",
+  editor =       "W. K. Theumann and R. Koeberle",
+  booktitle =    "Neural Networks and Spin Glasses",
+  title =        "Statistical Theory of Learning a Rule",
+  publisher =    "World Scientific",
+  address =      "Singapore",
+  year =         "1990",
+}
+
+@InProceedings{ha93,
+  author =       "J. Y. Ha and S. C. Oh and J. H. Kim and Y. B. Kwon",
+  booktitle =    "Third International Workshop on Frontiers in
+                 Handwriting Recognition",
+  title =        "Unconstrained handwritten word recognition with
+                 interconnected hidden {Markov} models",
+  publisher =    "IAPR",
+  address =      "Buffalo",
+  pages =        "455--460",
+  month =        may,
+  year =         "1993",
+}
+
+@Article{haasdonk2002tdk,
+  author =       "B. Haasdonk and D. Keysers",
+  title =        "{Tangent distance kernels for support vector
+                 machines}",
+  journal =      "Proc. of the 16th ICPR",
+  volume =       "2",
+  pages =        "864--868",
+  year =         "2002",
+}
+
+@inproceedings {hadsell-chopra-lecun-06,
+original = "orig/hadsell-chopra-lecun-06.pdf",
+author = "Hadsell, Raia and Chopra, Sumit and {LeCun}, Yann",
+title = "Dimensionality Reduction by Learning an Invariant Mapping",
+booktitle = cvpr06,
+publisher = "IEEE Press",
+pages = "1735--1742",
+year = 2006
+}
+
+@inproceedings {hadsell-chopra-lecun-06-small,
+original = "orig/hadsell-chopra-lecun-06.pdf",
+author = "Hadsell, Raia and Chopra, Sumit and {LeCun}, Yann",
+title = "Dimensionality Reduction by Learning an Invariant Mapping",
+booktitle = "CVPR'2006",
+publisher = "IEEE Press",
+year = 2006
+}
+
+@inproceedings{hadsell-iros-08,
+ original = "orig/hadsell-iros-08.pdf",
+ author = "Hadsell, Raia and Erkan, Ayse and Sermanet, Pierre and Scoffier, Marco and Muller, Urs and {LeCun}, Yann",
+ title = "Deep Belief Net Learning in a Long-Range Vision System for Autonomous Off-Road Driving",
+ booktitle = "Proc. Intelligent Robots and Systems (IROS'08)",
+ pages = "628--633",
+ year = "2008",
+}
+ %url = "http://www.cs.nyu.edu/~raia/docs/iros08-farod.pdf",
+
+@TechReport{Haffner+96,
+  author =       "P. Haffner and L. Bottou and J. Bromley and C. J. C.
+                 Burges and T. Cauble and Y. {Le Cun} and C. Nohl and C.
+                 Stanton and C. Stenard and P. Vincent",
+  title =        "the {HCAR50} check amount reading system",
+  number =       "Forthcoming publication",
+  institution =  "Lucent Technologies, Bell Labs Innovation",
+  address =      "Holmdel, New-Jersey",
+  year =         "1996",
+}
+
+@InProceedings{Haffner89,
+  author =       "P. Haffner and A. Waibel and K. Shikano",
+  booktitle =    "Proceedings of Eurospeech'89",
+  title =        "Fast back-propagation learning methods for large
+                 phonemic neural networks",
+  year =         "1989",
+}
+
+@InProceedings{Haffner91,
+  author =       "P. Haffner and M. Franzini and A. Waibel",
+  booktitle =    icassp,
+  title =        "Integrating Time Alignment and Neural Networks for
+                 High Performance Continuous Speech Recognition",
+  address =      "Toronto",
+  pages =        "105--108",
+  year =         "1991",
+}
+
+@Book{HAJ90,
+  author =       "X. D. Huang and Y. Ariki and M. Jack",
+  title =        "Hidden Markov Models for Speech Recognition",
+  publisher =    "University Press",
+  address =      "Edinburgh",
+  year =         "1990",
+}
+
+@inproceedings{HagiwaraK2000,
+ title = {Regularization Learning and Early Stopping in Linear Networks},
+ author = {Hagiwara, Katsuyuki and Kuno, Kazuhiro},
+ booktitle = ijcnn,
+ year = {2000},
+ isbn = {0-7695-0619-4},
+ pages = {4511},
+ publisher = {IEEE Computer Society},
+ address = {Washington, DC, USA},
+ }
+
+@TechReport{Ham2003,
+  author =       "J. Ham and D. D. Lee and S. Mika and B.
+                 Sch{\"o}lkopf",
+  title =        "A kernel view of the dimensionality reduction of
+                 manifolds",
+  number =       "TR-110",
+  institution =  "Max Planck Institute for Biological Cybernetics",
+  address =      "Germany",
+  year =         "2003",
+}
+
+@Article{Hamilton88,
+  author =       "J. D. Hamilton",
+  title =        "Rational-Expectations Econometric Analysis of Changes
+                 in Regime",
+  journal =      "Journal of Economic Dynamics and Control",
+  volume =       "12",
+  pages =        "385--423",
+  year =         "1988",
+}
+
+@Article{hamilton89,
+  author =       "J. D. Hamilton",
+  title =        "A new approach to the economic analysis of
+                 non-stationary time series and the business cycle",
+  journal =      "Econometrica",
+  volume =       "57",
+  number =       "2",
+  pages =        "357--384",
+  month =        mar,
+  year =         "1989",
+}
+
+@Article{Hamilton90,
+  author =       "J. D. Hamilton",
+  title =        "Analysis of time series subject to changes in regime",
+  journal =      "Journal of Econometrics",
+  volume =       "45",
+  pages =        "39--70",
+  year =         "1990",
+}
+
+@InCollection{Hamilton93,
+  author =       "J. D. Hamilton",
+  editor =       "R. Engle and D. {McFadden}",
+  booktitle =    "Handbook of Econometrics",
+  title =        "State-Space Models",
+  publisher =    "North Holland, New York",
+  year =         "1993",
+}
+
+@Article{Hamilton94,
+  author =       "J. D. Hamilton and R. Susmel",
+  title =        "Autoregressive conditional heteroskedasticity and
+                 changes in regime",
+  journal =      "Journal of Econometrics",
+  volume =       "64",
+  number =       "1-2",
+  pages =        "307--33",
+  year =         "1994",
+}
+
+@Article{Hamilton96,
+  author =       "J. D. Hamilton",
+  title =        "Specification testing in Markov-switching time-series
+                 models",
+  journal =      "Journal of Econometrics",
+  volume =       "70",
+  pages =        "127--157",
+  year =         "1996",
+}
+
+@misc{Hammersley+Clifford-1971,
+ author = {John M. Hammersley and Peter Clifford}, 
+ year = 1971, 
+ title = {Markov field on finite graphs and lattices},
+ howpublished = {Unpublished manuscript}
+}
+
+@InProceedings{HammondSimoncelli07,
+  author =       "David K. Hammond and Eero P. Simoncelli",
+  booktitle =    ICIP07,
+  title =        "A Machine Learning Framework for Adaptive Combination
+                 of Signal Denoising Methods",
+  volume =       "6",
+  pages =        "29--32",
+  year =         "2007",
+}
+
+@Article{hampshire90,
+  author =       "John B. Hampshire and Alexander H. Waibel",
+  title =        "A Novel Objective Function for Improved Phoneme
+                 Recognition Using Time-Delay Neural Networks",
+  journal =      "IEEE Transactions of Neural Networks",
+  volume =       "1",
+  number =       "2",
+  pages =        "216--228",
+  month =        jun,
+  year =         "1990",
+}
+
+@InProceedings{HAMPSHIRE92A,
+  author =       "J. B. Hampshire and B. V. K. Vijaya Kumar",
+  editor =       NIPS4ed,
+  booktitle =    NIPS4,
+  title =        "Shooting Craps in Search of an Optimal Strategy for
+                 Training Connectionist Pattern Classifiers",
+  publisher =    "Morgan Kaufmann",
+  address =      "Denver, CO",
+  pages =        "1125--1132",
+  year =         "1992",
+}
+
+@InProceedings{Han96,
+  author =       "H-H. Han and H-C. Jung and Y-R. Lee and S-C. Jeong",
+  booktitle =    nipc-hmit96,
+  title =        "Application of Neural Network for {PWR} Steam
+                 Generator Water Level Control at Low Power Operation",
+  volume =       "1",
+  publisher =    ans,
+  pages =        "49--52",
+  year =         "1996",
+}
+
+@InProceedings{Hanson89,
+  author =       "S. J. Hanson and L. Pratt",
+  editor =       NIPS1ed,
+  booktitle =    NIPS1,
+  title =        "A Comparison of Different Biases for Minimal Network
+                 Construction with Back-Propagation",
+  publisher =    "Morgan Kaufmann, San Mateo",
+  address =      "Denver, CO",
+  pages =        "177--185",
+  year =         "1989",
+}
+
+@Book{Hardle2004,
+  author =       "Wolfgang H{\"a}rdle and Marlene M{\"u}ller and Stefan Sperlich and Axel
+                 Werwatz",
+  title =        "Nonparametric and Semiparametric Models",
+  publisher =    "Springer",
+  address =      "http://www.xplore-stat.de/ebooks/ebooks.html",
+  year =         "2004",
+}
+
+@article{Hardoon+al-2004,
+    address = {Cambridge, MA, USA},
+    author = {Hardoon, David  R.  and Szedmak, Sandor  R.  and Shawe-Taylor, John  R. },
+    doi = {10.1162/0899766042321814},
+    issn = {0899-7667},
+    journal = {Neural Computation},
+    month = {December},
+    number = {12},
+    pages = {2639--2664},
+    publisher = {MIT Press},
+    title = {Canonical Correlation Analysis: An Overview with Application to Learning Methods},
+    url = {http://portal.acm.org/citation.cfm?id=1119696.1119703},
+    volume = {16},
+    year = {2004}
+}
+
+@InProceedings{HardoonD2007,
+  author =       "David R. Hardoon and John Shawe-Taylor and Antti
+                 Ajanki and Kai Puolamäki and Samuel Kaski",
+  booktitle =    "Proceedings of AISTATS 2007",
+  title =        "Information Retrieval by Inferring Implicit Queries
+                 from Eye Movements",
+  year =         "2007",
+}
+
+@InProceedings{Harmeling02,
+  author =       "S. Harmeling and A. Ziehe and M. Kawanabe and K.-R.
+                 M{\"u}ller",
+  editor =       NIPS14ed,
+  booktitle =    NIPS14,
+  title =        "Kernel Feature Spaces and Nonlinear Blind Souce
+                 Separation",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  year =         "2002",
+  original =     "orig/AA34.ps",
+}
+
+@InProceedings{Harp90,
+  author =       "S. A. Harp and T. Samad and A. Guha",
+  editor =       NIPS2ed,
+  booktitle =    NIPS2,
+  title =        "Designing Application-Specific Neural Networks Using
+                 the Genetic Algorithm",
+  publisher =    "Morgan Kaufmann, San Mateo",
+  address =      "Denver, CO",
+  pages =        "447--454",
+  year =         "1990",
+}
+
+@Article{Hartman90,
+  author =       "E. J. Hartman and J. D. Keeler and J. M. Kowalski",
+  title =        "Layered Neural Networks with {G}aussian Hidden Units As
+                 Universal Approximations",
+  journal =      nc,
+  volume =       "2",
+  pages =        "210--215",
+  year =         "1990",
+}
+
+@Article{Haruno01,
+  author =       "M. Haruno and DM. Wolpert and M. Kawato",
+  title =        "{MOSAIC} model for sensorimotor learning and control",
+  journal =      "Neural Computation",
+  volume =       "13",
+  number =       "10",
+  pages =        "2201--2220",
+  year =         "2001",
+}
+
+@InProceedings{Hassibi-nips93,
+  author =       "B. Hassibi and D. G. Stork",
+  editor =       NIPS5ed,
+  booktitle =    NIPS5,
+  title =        "Second Order Derivatives for Network Pruning: Optimal
+                 Brain Surgeon",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo, CA",
+  pages =        "164--171",
+  year =         "1993",
+}
+
+@InProceedings{Hastad86,
+  author =       "Johan H{\aa}stad",
+  booktitle =    "Proceedings of the 18th annual ACM Symposium on Theory
+                 of Computing",
+  title =        "Almost optimal lower bounds for small depth circuits",
+  publisher =    "ACM Press",
+  address =      "Berkeley, California",
+  pages =        "6--20",
+  year =         "1986",
+}
+
+@Book{Hastad87,
+  author =       "Johan T. H{\aa}stad",
+  title =        "Computational Limitations for Small Depth Circuits",
+  publisher =    "{MIT} Press",
+  year =         "1987",
+}
+
+@Article{Hastad91,
+  author =       "Johan H{\aa}stad and Mikael Goldmann",
+  title =        "On the power of small-depth threshold circuits",
+  journal =      "Computational Complexity",
+  volume =       "1",
+  pages =        "113--129",
+  year =         "1991",
+}
+
+@Article{Hastie-Stuetzle-1989,
+  author =       "T. Hastie and W. Stuetzle",
+  title =        "Principal Curves",
+  journal =      "Journal of the American Statistical Association",
+  volume =       "84",
+  pages =        "502--516",
+  year =         "1989",
+}
+
+@Book{Hastie2001,
+  author =       "T. Hastie and R. Tibshirani and J. Friedman",
+  title =        "The elements of statistical learning: data mining,
+                 inference and prediction",
+  publisher =    "Springer Verlag",
+  year =         "2001",
+  series =       "Springer Series in Statistics",
+  annote =       "ISBN: 0387952845",
+}
+
+@Article{Hastie2004,
+  author =       "Trevor Hastie and Saharon Rosset and Robert Tibshirani
+                 and Ji Zhu",
+  title =        "The entire regularization path for the support vector
+                 machine",
+  journal =      jmlr,
+  volume =       "5",
+  pages =        "1391--1415",
+  year =         "2004",
+}
+
+@InProceedings{hastie96discriminant,
+  author =       "T. Hastie and R. Tibshirani",
+  editor =       NIPS8ed,
+  booktitle =    NIPS8,
+  title =        "Discriminant Adaptive Nearest Neighbor Classification
+                 and Regression",
+  volume =       "8",
+  publisher =    "{MIT} Press",
+  pages =        "409--415",
+  year =         "1996",
+  URL =          "citeseer.nj.nec.com/hastie94discriminant.html",
+}
+
+@Article{Hathaway85,
+  author =       "R. J. Hathaway",
+  title =        "A constrained formulation of Maximum-Likelihood
+                 estimation for normal mixture distributions",
+  journal =      "The Annals of Statistics",
+  volume =       "13",
+  number =       "2",
+  year =         "1985",
+}
+
+@article{hausser:2003,
+    author = {Michael Ha{\"u}sser and Bartlett Mel},
+    title = {Dendrites: Bug or Feature?},
+    journal = {Current Opinion in Neurobiology},
+    volume = {13},
+    year = {2003},
+    pages = {372-383},
+}
+
+@InProceedings{Haussler89,
+  author =       "D. Haussler",
+  booktitle =    "Proc. of the 30th Annual Symposium on the Foundations
+                 of Computer Science",
+  title =        "Generalizing the {PAC} model: sample size bounds from
+                 metric dimension-based uniform convergence results",
+  publisher =    "IEEE",
+  year =         "1989",
+}
+
+@InProceedings{haussler95,
+  author =       "D. Haussler and J. Kivinen and M. K. Warmuth",
+  booktitle =    "Computational Learning Theory, 2nd European
+                 Conference, EuroCOLT'95",
+  title =        "Sequential prediction of individual sequences under
+                 general loss functions",
+  publisher =    "Springer",
+  pages =        "69--83",
+  year =         "1995",
+}
+
+@book{hay01nnn,
+    author = {Haykin, Simon},
+    edition = {2},
+    howpublished = {Hardcover},
+    isbn = {0132733501},
+    keywords = {network, neural},
+    month = {July},
+    posted-at = {2009-07-04 21:37:33},
+    priority = {2},
+    publisher = {Prentice Hall},
+    title = {Neural Networks: A Comprehensive Foundation (2nd Edition)},
+    url = {http://www.amazon.com/exec/obidos/redirect?tag=citeulike07-20&path=ASIN/0132733501},
+    year = {1998}
+}
+
+
+@TechReport{He+Niyogi-2002,
+  author =       "X. He and P. Niyogi",
+  title =        "Locality Preserving Projections ({LPP})",
+  number =       "TR-2002-09",
+  institution =  "University of Chicago, Computer Science",
+  year =         "2002",
+}
+
+@incollection{He+Niyogi-2004,
+    author = "Xiaofei He and Partha Niyogi",
+    title = "Locality Preserving Projections",
+    editor = NIPS16ed,
+    booktitle = NIPS16,
+    publisher = "MIT Press",
+    address = "Cambridge, MA",
+    year = "2004",
+}
+
+@Book{Hebb49,
+  author =       "D. O. Hebb",
+  title =        "The Organization of Behavior",
+  publisher =    "Wiley",
+  address =      "New York",
+  year =         "1949",
+}
+
+@InProceedings{Hecht-Nielsen87a,
+  author =       "R. Hecht-Nielsen",
+  editor =       "M. Caudill and C. Butler",
+  booktitle =    icnn,
+  title =        "Combinatorial Hypercompression",
+  volume =       "2",
+  publisher =    "IEEE, New York",
+  address =      "San Diego 1987",
+  pages =        "455--461",
+  year =         "1987",
+}
+
+@Article{Hecht-Nielsen87b,
+  author =       "R. Hecht-Nielsen",
+  title =        "Counterpropagation Networks",
+  journal =      applopt,
+  volume =       "26",
+  pages =        "4979--4984",
+  year =         "1987",
+}
+
+@Article{Hecht-Nielsen88,
+  author =       "R. Hecht-Nielsen",
+  title =        "Applications of Counterpropagation Networks",
+  journal =      nn,
+  volume =       "1",
+  pages =        "131--139",
+  year =         "1988",
+}
+
+@InProceedings{Hecht-Nielsen89,
+  author =       "R. Hecht-Nielsen",
+  booktitle =    ijcnn,
+  title =        "Theory of the Backpropagation Neural Network",
+  volume =       "1",
+  publisher =    "IEEE, New York",
+  address =      "Washington 1989",
+  pages =        "593--605",
+  year =         "1989",
+}
+
+@Article{Hecht-Nielsen-1995,
+  author =       "R. Hecht-Nielsen",
+  title =        "Replicator neural networks for universal optimal source coding",
+  journal =      "Science",
+  volume =       "269",
+  pages =        "1860-1863",
+  year =         "1995",
+}
+
+@TechReport{Heckerman96,
+  author =       "D. Heckerman",
+  title =        "A tutorial on learning with {Bayesian} networks",
+  number =       "TR-95-06",
+  institution =  "Microsoft Research",
+  address =      "ftp://ftp.research.microsoft.com/pub/Tech-Reports/Winter94-95/TR-95-06.PS",
+  month =        jan,
+  year =         "1996",
+}
+
+@article{HeckermanD2000,
+    author = {David Heckerman and David Maxwell Chickering and Christopher Meek and Robert Rounthwaite and Carl Kadie},
+    title = {Dependency networks for inference, collaborative filtering, and data visualization},
+    journal = jmlr,
+    year = {2000},
+    volume = {1},
+    pages = {49--75}
+}
+
+@article{heeger:1992a,
+    author={David J. Heeger},
+    title ={Normalization of Cell Responses in Cat Striate Cortex},
+    journal ={Visual Neuroscience},
+    volume={9},
+    number={2},
+    pages={181-198},
+    year={1992},
+}
+
+@InProceedings{Hegde88,
+  author =       "S. U. Hegde and J. L. Sweet and W. B. Levy",
+  booktitle =    icnn,
+  title =        "Determination of Parameters in a Hopfield/Tank
+                 Computational Network",
+  volume =       "2",
+  publisher =    "IEEE, New York",
+  address =      "San Diego 1988",
+  pages =        "291--298",
+  year =         "1988",
+}
+
+@article{HedgeJ2000,
+	address = {Department of Anatomy and Neurobiology, Washington University School of Medicine, St. Louis, Missouri 63110, USA.},
+	author = {Jay Hegd\'{e} and David C. {Van Essen} },
+	citeulike-article-id = {465720},
+	issn = {1529-2401},
+	journal = {Journal of Neuroscience},
+	keywords = {contour, v2},
+	month = {March},
+	number = {5},
+	posted-at = {2006-01-15 12:57:15},
+	priority = {0},
+	title = {Selectivity for complex shapes in primate visual area V2},
+	volume = {20},
+	year = {2000}
+}
+	%url = {http://view.ncbi.nlm.nih.gov/pubmed/10684908},
+
+@inproceedings{Heitz+al:NIPS08a,
+  title = {Cascaded Classification Models: {C}ombining Models for Holistic Scene Understanding},
+  author = {G. Heitz and S. Gould and A. Saxena and D. Koller},
+  booktitle =    "Advances in Neural Information Processing Systems (NIPS 2008)",
+  year = 2008,
+}
+
+@InProceedings{HeldM1998,
+  author =       "Marcus Held and Joachim M. Buhmann",
+  editor =       NIPS10ed,
+  booktitle =    NIPS10,
+  title =        "Unsupervised on-line learning of decision trees for
+                 hierarchical data analysis",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA, USA",
+  pages =        "514--520",
+  year =         "1998",
+  ISBN =         "0-262-10076-2",
+  location =     "Denver, Colorado, United States",
+}
+
+@InProceedings{herlocker99,
+  author =       "Jonathan L. Herlocker and Joseph A. Konstan and Al
+                 Borchers and John Riedl",
+  booktitle =    "SIGIR '99: Proceedings of the 22nd annual
+                 international ACM SIGIR conference on Research and
+                 development in information retrieval",
+  title =        "An algorithmic framework for performing collaborative
+                 filtering",
+  publisher =    "ACM Press",
+  address =      "New York, NY, USA",
+  pages =        "230--237",
+  year =         "1999",
+  location =     "Berkeley, California, United States",
+}
+
+@InProceedings{Hermansky-genova91,
+  author =       "Hynek Hermansky and Nelson Morgan and Aruna Bayya and
+                 Phil Kohn",
+  booktitle =    "Proc. of Eurospeech 91",
+  title =        "Compensation for the Effect of the Communication
+                 Channel in Auditory-like Analysis of Speech
+                 ({RASTA}-{PLP})",
+  address =      "Genova (Italy)",
+  pages =        "1367--1371",
+  year =         "1991",
+}
+
+@TechReport{Hermansky-icsi91,
+  author =       "Hynek Hermansky and Nelson Morgan and Aruna Bayya and
+                 Phil Kohn",
+  title =        "{RASTA}-{PLP} Speech Analysis",
+  number =       "TR-91-069",
+  institution =  "International Computer Science Institute",
+  address =      "Berkeley, CA",
+  month =        dec,
+  year =         "1991",
+  OPTnote =      "Most speech parameter estimation techniques are easily
+                 influenced by the frequency response of the
+                 communication channel. We have developed a technique
+                 that is more robust to such steady-state spectral
+                 factors in speech. The approach is conceptually simple
+                 and computationally efficient. The new method is
+                 described, and experimental results are reported,
+                 showing a significant advantage for the proposed
+                 method.",
+}
+
+@Article{Hermansky-jasa90,
+  author =       "Hynek Hermansky",
+  title =        "Perceptual Linear Predictive ({PLP}) Analysis for
+                 Speech",
+  journal =      jasa,
+  year =         "1990",
+  OPTnote =      "",
+  OPTpages =     "1738--1752",
+}
+
+@Book{Hernandez-Lerma+Lasserre-2003,
+  author =       "On\'esimo Hern\'andez-Lerma and Jean Bernard
+                 Lasserre",
+  title =        "Markov Chains and Invariant Probabilities",
+  publisher =    "Birkh{\"a}user Verlag",
+  year =         "2003",
+}
+
+@InProceedings{Hertz86,
+  author =       "J. A. Hertz and G. Grinstein and S. Solla",
+  editor =       "J. S. Denker",
+  booktitle =    snowbird,
+  title =        "Memory Networks with Asymmetric Bonds",
+  publisher =    "American Institute of Physics, New York",
+  address =      "Snowbird 1986",
+  pages =        "212--218",
+  year =         "1986",
+}
+
+@InProceedings{Hertz87,
+  author =       "J. A. Hertz and G. Grinstein and S. Solla",
+  editor =       "J. L. van Hemmen and I. Morgenstern",
+  booktitle =    "Heidelberg Colloquium on Glassy Dynamics",
+  title =        "Irreversible Spin Glasses and Neural Networks",
+  publisher =    "Springer-Verlag, Berlin",
+  address =      "Heidelberg 1986",
+  pages =        "538--546",
+  year =         "1987",
+}
+
+@Article{Hertz89a,
+  author =       "J. A. Hertz",
+  title =        "A Gauge Theory in Computational Vision: {A} Model for
+                 Outline Extraction",
+  journal =      pscrip,
+  volume =       "39",
+  pages =        "161--167",
+  year =         "1989",
+}
+
+@Article{Hertz89b,
+  author =       "J. A. Hertz and A. Krogh and G. I. Thorbergsson",
+  title =        "Phase Transitions in Simple Learning",
+  journal =      jpa,
+  volume =       "22",
+  pages =        "2133--2150",
+  year =         "1989",
+}
+
+@TechReport{Hertz90,
+  author =       "J. A. Hertz",
+  title =        "Statistical Dynamics of Learning",
+  type =         "Preprint",
+  number =       "90/34 S",
+  institution =  "Nordita",
+  address =      "Copenhagen, Denmark",
+  year =         "1990",
+}
+
+@Article{Herz89,
+  author =       "A. Herz and B. Sulzer and R. K{\"u}hn and J. L. van
+                 Hemmen",
+  title =        "Hebbian Learning Reconsidered: Representation of
+                 Static and Dynamic Objects in Associative Neural Nets",
+  journal =      biocyb,
+  volume =       "60",
+  pages =        "457--467",
+  year =         "1989",
+}
+
+@Article{Heskes-98,
+  author =       "T. Heskes",
+  title =        "Bias/variance decompositions for likelihood-based
+                 estimators",
+  journal =      "Neural Computation",
+  volume =       "10",
+  pages =        "1425--1433",
+  year =         "1998",
+}
+
+@Article{heskes00,
+  author =       "Tom Heskes",
+  title =        "On Natural Learning and Pruning in Multilayered
+                 Perceptrons",
+  journal =      "Neural Computation",
+  volume =       "12",
+  number =       "4",
+  pages =        "881--901",
+  year =         "2000",
+}
+
+@InProceedings{heskes98,
+  author =       "Tom Heskes",
+  booktitle =    "International Conference On Machine Learning",
+  title =        "Solving a huge number of similar tasks: a combination
+                 of multi-task learning and a hierarchical {Bayesian}
+                 approach",
+  year =         "1998",
+}
+
+@Article{Hestenes+Stiefel-1952,
+  author =       "Magnus R. Hestenes and Eduard Stiefel",
+  title =        "Methods of Conjugate Gradients for Solving Linear
+                 Systems",
+  journal =      "Journal of Research of National Bureau Standards",
+  volume =       "49",
+  number =       "6",
+  pages =        "409--436",
+  year =         "1952",
+}
+
+@Article{Hettich-93,
+  author =       "R. Hettich and K. O. Kortanek",
+  title =        "Semi-infinite programming: theory, methods, and
+                 applications",
+  journal =      "{SIAM} Review",
+  volume =       "35",
+  number =       "3",
+  pages =        "380--429",
+  year =         "1993",
+}
+
+@InProceedings{Hines96,
+  author =       "J. W. Hines",
+  booktitle =    nipc-hmit96,
+  title =        "A Logarithmic Neural Network Architecture for a {PRA}
+                 Approximation",
+  volume =       "1",
+  publisher =    ans,
+  pages =        "235--241",
+  year =         "1996",
+}
+
+@Article{HinOsiWel2006,
+  author =       "Geoffrey E. Hinton and Simon Osindero and Max Welling
+                 and {Yee Whye} Teh",
+  title =        "Unsupervised Discovery of Non-Linear Structure using
+                 Contrastive Backpropagation",
+  journal =      "Cognitive Science",
+  volume =       "30",
+  number =       "4",
+  year =         "2006",
+}
+
+@Article{Hinton+Ghahramani-97,
+  author =       "G. E. Hinton and Z. Ghahramani",
+  title =        "Generative models for discovering sparse distributed
+                 representations",
+  journal =      "Philosophical Transactions of the Royal Society of
+                 London",
+  volume =       "B",
+  number =       "352",
+  pages =        "1177--1190",
+  year =         "1997",
+}
+
+@InCollection{Hinton-bo86,
+  author =       "G. E. Hinton and T. J. Sejnowski",
+  editor =       "D. E. Rumelhart and J. L. McClelland",
+  booktitle =    "Parallel Distributed Processing: Explorations in the
+                 Microstructure of Cognition. Volume 1: Foundations",
+  title =        "Learning and relearning in {Boltzmann} machines",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  pages =        "282--317",
+  year =         "1986",
+}
+
+@InProceedings{Hinton-ICA-2001,
+  author =       "G. E. Hinton and M. Welling and Y. W. Teh and S.
+                 Osindero",
+  booktitle =    "Proceedings of 3rd International Conference on Independent Component Analysis and Blind Signal Separation (ICA'01)",
+  title =        "A New View of {ICA}",
+  address =      "San Diego, CA",
+  pages =        "746--751",
+  year =         "2001",
+}
+
+@InProceedings{Hinton-nips95,
+  author =       "G. E. Hinton and M. Revow and P. Dayan",
+  editor =       NIPS7ed,
+  booktitle =    NIPS7,
+  title =        "Recognizing handwritten digits using mixtures of
+                 linear models",
+  publisher =    "MIT Press, Cambridge, MA",
+  pages =        "1015--1022",
+  year =         "1995",
+}
+
+@TechReport{Hinton-PoE-2000,
+  author =       "Geoffrey E. Hinton",
+  title =        "Training Products of Experts by Minimizing Contrastive
+                 Divergence",
+  number =       "GCNU TR 2000-004",
+  institution =  "Gatsby Unit, University College London",
+  year =         "2000",
+}
+
+@Article{Hinton-Science2006,
+  author =       "Geoffrey E. Hinton and Ruslan Salakhutdinov",
+  title =        "Reducing the dimensionality of data with neural
+                 networks",
+  journal =      "Science",
+  volume =       "313",
+  number =       "5786",
+  pages =        "504--507",
+  month =        jul,
+  year =         "2006",
+}
+
+%I deprecate the following one as this is a duplicate of the preceding one!
+@Article{Hinton+Salakhutdinov-2006,
+  author =       "Geoffrey E. Hinton and Ruslan {Salakhutdinov}",
+  title =        "{Reducing the Dimensionality of Data with Neural
+                 Networks}",
+  journal =      "Science",
+  volume =       "313",
+  pages =        "504--507",
+  month =        jul,
+  year =         "2006",
+}
+
+
+@Article{Hinton06,
+  author =       "Goeffrey E. Hinton and Simon Osindero and {Yee Whye} Teh",
+  title =        "A fast learning algorithm for deep belief nets",
+  journal =      "Neural Computation",
+  volume =       "18",
+  pages =        "1527--1554",
+  year =         "2006",
+
+}
+
+@Article{Hinton06-small,
+  author =       "G. E. Hinton and S. Osindero and Y.-W. Teh",
+  title =        "A fast learning algorithm for deep belief nets",
+  journal =      "Neural Computation",
+  volume =       "18",
+  pages =        "1527--1554",
+  year =         "2006",
+
+}
+
+@InProceedings{hinton1994amd,
+  author =       "Geoffrey E. Hinton and R. S. Zemel",
+  title =        "Autoencoders, minimum description length, and
+                 Helmholtz free energy",
+  booktitle =    NIPS6,
+  editor =       NIPS6ed,
+  publisher =    "Morgan Kaufmann Publishers, Inc.",
+  pages =        "3--10",
+  year =         "1994",
+}
+
+@Article{Hinton2002,
+  author =       "Geoffrey E. Hinton",
+  title =        "Training products of experts by minimizing contrastive
+                 divergence",
+  journal =      "Neural Computation",
+  volume =       "14",
+  pages =        "1771--1800",
+  year =         "2002",
+}
+
+@InProceedings{Hinton83,
+  author =       "G. E. Hinton and T. J. Sejnowski",
+  booktitle =    cvpr83,
+  title =        "Optimal Perceptual Inference",
+  publisher =    "IEEE, New York",
+  address =      "Washington 1983",
+  pages =        "448--453",
+  year =         "1983",
+}
+
+@TechReport{Hinton84,
+  author =       "G. E. Hinton and T. J. Sejnowski and D. H. Ackley",
+  title =        "{Boltzmann} machines: Constraint satisfaction networks
+                 that learn",
+  number =       "TR-CMU-CS-84-119",
+  institution =  "Carnegie-Mellon University, Dept. of Computer
+                 Science",
+  year =         "1984",
+}
+
+@InCollection{Hinton86a,
+  author =       "G. E. Hinton and T. J. Sejnowski",
+  editor =       "D. E. Rumelhart and J. L. McClelland",
+  booktitle =    pdp,
+  title =        "Learning and Relearning in {Boltzmann} Machines",
+  chapter =      "7",
+  volume =       "1",
+  publisher =    "MIT Press",
+  address =      "Cambridge",
+  pages =        "282--317",
+  year =         "1986",
+}
+
+@InProceedings{Hinton86b,
+  author =       "Geoffrey E. Hinton",
+  booktitle =    "Proceedings of the Eighth Annual Conference of the
+                 Cognitive Science Society",
+  title =        "Learning Distributed Representations of Concepts",
+  publisher =    "Lawrence Erlbaum, Hillsdale",
+  address =      "Amherst 1986",
+  pages =        "1--12",
+  year =         "1986",
+}
+
+@InProceedings{Hinton86b-small,
+  author =       "Geoffrey E. Hinton",
+  booktitle =    "Proc. 8th Annual Conf. Cog. Sc. Society",
+  title =        "Learning Distributed Representations of Concepts",
+  pages =        "1--12",
+  year =         "1986",
+}
+
+@InProceedings{Hinton87,
+  author =       "Geoffrey E. Hinton",
+  editor =       "J. W. {de Bakker} and A. J. Nijman and P. C.
+                 Treleaven",
+  booktitle =    "Proceedings of {PARLE} Conference on Parallel
+                 Architectures and Languages Europe",
+  title =        "Learning translation invariant in massively parallel
+                 networks",
+  publisher =    "Springer-Verlag",
+  address =      "Berlin",
+  pages =        "1--13",
+  year =         "1987",
+}
+
+@Article{Hinton89,
+  author =       "Geoffrey E. Hinton",
+  title =        "Deterministic {Boltzmann} Learning Performs Steepest
+                 Descent in Weight Space",
+  journal =      nc,
+  volume =       "1",
+  pages =        "143--150",
+  year =         "1989",
+}
+
+@Article{Hinton89b,
+  author =       "Geoffrey E. Hinton",
+  title =        "Connectionist learning procedures",
+  journal =      "Artificial Intelligence",
+  volume =       "40",
+  pages =        "185--234",
+  year =         "1989",
+}
+
+@Article{Hinton90,
+  author =       "G. E. Hinton and S. J. Nowlan",
+  title =        "The bootstrap Widrow-Hoff rule as a cluster-formation
+                 algorithm",
+  journal =      nc,
+  volume =       "2",
+  pages =        "355--362",
+  year =         "1990",
+}
+
+@InProceedings{Hinton92,
+  author =       "G. E. Hinton and C. K. I. Williams and M. D. Revow",
+  editor =       NIPS4ed,
+  booktitle =    NIPS4,
+  title =        "Adaptive elastic models for hand-printed character
+                 recognition",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo CA",
+  pages =        "512--519",
+  year =         "1992",
+}
+
+@Misc{Hinton93,
+  author =       "Geoffrey E. Hinton",
+  title =        "Using the minimum description length principle to
+                 discover factorial codes",
+  howpublished = "Lecture given at the 1993 Connectionist Models Summer
+                 School",
+  year =         "1993",
+}
+
+@Article{Hinton95,
+  author =       "Geoffrey E. Hinton and Peter Dayan and Brendan J. Frey and Radford M.
+                 Neal",
+  title =        "The wake-sleep algorithm for unsupervised neural
+                 networks",
+  journal =      "Science",
+  volume =       "268",
+  pages =        "1558--1161",
+  year =         "1995",
+}
+
+@Article{hinton97modelling,
+  author =       "G. E. Hinton and P. Dayan and M. Revow",
+  title =        "Modelling the manifolds of images of handwritten
+                 digits",
+  journal =      "IEEE Transactions on Neural Networks",
+  volume =       "8",
+  pages =        "65--74",
+  year =         "1997",
+}
+
+@InProceedings{Hinton99,
+  author =       "Geoffrey E. Hinton",
+  booktitle =    "Proceedings of the Ninth International Conference on
+                 Artificial Neural Networks (ICANN)",
+  title =        "Products of Experts",
+  volume =       "1",
+  publisher =    "IEE",
+  address =      "Edinburgh, Scotland",
+  pages =        "1--6",
+  year =         "1999",
+}
+
+@InProceedings{HintonG2005,
+  author =       "Geoffrey E. Hinton and Simon Osindero and Kejie Bao",
+  editor =       aistats05ed,
+  booktitle =    aistats05,
+  title =        "Learning Causally Linked Markov Random Fields",
+  publisher =    "Society for Artificial Intelligence and Statistics",
+  pages =        "128--135",
+  year =         "2005",
+}
+
+@InProceedings{HintonG2005-small,
+  author =       "Geoffrey E. Hinton and Simon Osindero and Kejie Bao",
+  booktitle =    "Proceedings of AISTATS 2005",
+  title =        "Learning Causally Linked Markov Random Fields",
+  year =         "2005",
+}
+
+@TechReport{HintonG2006,
+  author =       "Geoffrey E. Hinton",
+  title =        "To recognize shapes, first learn to generate images",
+  number =       "UTML TR 2006-003",
+  institution =  "University of Toronto",
+  year =         "2006",
+}
+
+@InCollection{HintonG2007,
+  author =       "Geoffrey E. Hinton",
+  editor =       "Paul Cisek and Trevor Drew and John Kalaska",
+  booktitle =    "Computational Neuroscience: Theoretical Insights into
+                 Brain Function",
+  title =        "To recognize shapes, first learn to generate images",
+  publisher =    "Elsevier",
+  year =         "2007",
+}
+
+@TechReport{Hinton-Boltzmann,
+  author =       "G. E. Hinton and T. J. Sejnowski and D. H. Ackley",
+  title =        "{Boltzmann} machines: Constraint satisfaction networks
+                 that learn",
+  number =       "TR-CMU-CS-84-119",
+  institution =  "Carnegie-Mellon University, Dept. of Computer
+                 Science",
+  year =         "1984",
+  OPTnote =      "",
+}
+
+@InProceedings{Hirayama-nips92,
+  author =       "M. Yirayama and E. Vatikiotis-Bateson and M. Kawato
+                 and M. I. Jordan",
+  editor =       NIPS4ed,
+  booktitle =    NIPS4,
+  title =        "Forward Dynamics Modeling of Speech Motor Control
+                 Using Physiological Data",
+  publisher =    "Morgan Kaufmann, San Mateo",
+  address =      "Denver, CO",
+  pages =        "191--198",
+  year =         "1992",
+  OPTnote =      "",
+}
+
+@Article{Hjort96,
+  author =       "N. L. Hjort and M. C. Jones",
+  title =        "Locally parametric nonparametric density estimation",
+  journal =      "Annals of Statistics",
+  volume =       "24",
+  number =       "4",
+  pages =        "1619--1647",
+  year =         "1996",
+}
+
+@InProceedings{Ho95,
+  author =       "Tin Kam Ho",
+  booktitle =    ICDAR95,
+  title =        "Random Decision Forest",
+  address =      "Montreal, Canada",
+  pages =        "278--282",
+  year =         "1995",
+}
+
+@Misc{Hochreiter91,
+  author =       "S. Hochreiter",
+  title =        "{ Untersuchungen zu dynamischen neuronalen Netzen.
+                 Diploma thesis, Institut f\"{u}r Informatik, Lehrstuhl
+                 Prof. Brauer, Technische Universit\"{a}t M\"{u}nchen}",
+  year =         "1991",
+  url =         "http://www7.informatik.tu-muenchen.de/~Ehochreit",
+}
+
+@Article{Hoerl+Kennard70,
+  author =       "A. Hoerl and R. Kennard",
+  title =        "Ridge regression: biased estimation for non-orthogonal
+                 problems",
+  journal =      "Technometrics",
+  volume =       "12",
+  pages =        "55--67",
+  year =         "1970",
+}
+
+@inproceedings{Hoff-2008,
+ author = {H.D. Hoff},
+ title = {Modeling homophily and stochastic equivalence in symmetric relational data},
+  editor =       NIPS20ed,
+  booktitle =    NIPS20,
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  pages =        "657--664",
+  year =         "2008",
+}
+
+@InProceedings{Holger-icpr96,
+  author =       "H. Schwenk and M. Milgram",
+  booktitle =    icpr,
+  title =        "Constraint Tangent Distance For On-Line Character
+                 Recognition",
+  pages =        "520--524",
+  year =         "1996",
+}
+
+@InProceedings{Holger-nips96,
+  author =       "H. Schwenk and M. Milgram",
+  editor =       NIPS7ed,
+  booktitle =    NIPS7,
+  title =        "Transformation invariant autoassociation with
+                 application to handwritten character recognition",
+  publisher =    "MIT Press",
+  pages =        "991--998",
+  year =         "1995",
+}
+
+@Book{Holland75,
+  author =       "J. H. Holland",
+  key =          "Holland",
+  title =        "Adaptation in Natural and Artificial Systems",
+  publisher =    "University of Michigan Press",
+  year =         "1975",
+}
+
+@Article{Holley+Karplus89,
+  author =       "L. H. Holley and M. Karplus",
+  title =        "Protein secondary structure prediction with a neural
+                 network",
+  journal =      PNAS,
+  volume =       "86",
+  pages =        "152--156",
+  year =         "1989",
+}
+
+@InCollection{HolTre93,
+  author =       "J. Hollatz and V. Tresp",
+  editor =       "I. Aleksander and J. Taylor",
+  booktitle =    "Artificial Neural Networks II",
+  title =        "A rule-based network architecture",
+  publisher =    "Elsevier",
+  address =      "Amsterdam",
+  year =         "1992",
+}
+
+@TechReport{HolTreAhm92,
+  author =       "J. Hollatz and V. Tresp and S. Ahmad",
+  title =        "Network structuring and training using rule-based
+                 knowledge",
+  type =         "Technical Report",
+  institution =  "Siemens AG",
+  address =      "M{\"u}nchen, Germany",
+  year =         "1992",
+}
+
+@InProceedings{HolubA2005,
+  author =       "Alex Holub and Pietro Perona",
+  booktitle =    cvpr05,
+  title =        "A Discriminative Framework for Modelling Object
+                 Classes",
+  publisher =    "IEEE Computer Society",
+  address =      "Washington, DC, USA",
+  pages =        "664--671",
+  year =         "2005",
+  ISBN =         "0-7695-2372-2",
+  doi =          "http://dx.doi.org/10.1109/CVPR.2005.25",
+}
+
+@InCollection{HonglakL2009,
+  author =       "Honglak Lee and Roger Grosse and Rajesh Ranganath and Andrew Y. Ng",
+  booktitle =    ICML09,
+  editor =       ICML09ed,
+  publisher =    ICML09publ,
+  title =        "Convolutional deep belief networks for scalable unsupervised 
+		 		 learning of  hierarchical representations",
+  address =      "Montreal (Qc), Canada",
+  year =         "2009",
+}
+
+@InCollection{HonglakL2008,
+  author =       "Honglak Lee and Chaitanya Ekanadham and Andrew Ng",
+  editor =       NIPS20ed,
+  booktitle =    NIPS20,
+  title =        "Sparse deep belief net model for visual area {V2}",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  pages =        "873--880",
+  year =         "2008",
+}
+
+@incollection{HonglakLNIPS2009,
+ title = {Unsupervised feature learning for audio classification using convolutional deep belief networks},
+ author = {Honglak Lee and Peter Pham and Yan Largman and Andrew Ng},
+ booktitle = NIPS22,
+ editor = NIPS22ed,
+ pages = {1096--1104},
+ year = {2009}
+}
+
+@Book{Hopcroft79,
+  author =       "J. E. Hopcroft and J. D. Ullman",
+  title =        "Introduction to Automata Theory, Languages, and
+                 Computation",
+  publisher =    "Addison-Wesley Publishing Company, Inc.",
+  address =      "Reading, MA",
+  year =         "1979",
+}
+
+@Article{Hopfield82,
+  author =       "John J. Hopfield",
+  title =        "Neural Networks and Physical Systems with Emergent
+                 Collective Computational Abilities",
+  journal =      PNAS,
+  volume =       "79",
+  year =         "1982",
+}
+
+@Article{Hopfield83,
+  author =       "J. J. Hopfield and D. I. Feinstein and R. G. Palmer",
+  title =        "``Unlearning'' Has a Stabilizing Effect in Collective
+                 Memories",
+  journal =      nature,
+  volume =       "304",
+  pages =        "158--159",
+  year =         "1983",
+}
+
+@Article{Hopfield84,
+  author =       "J. J. Hopfield",
+  title =        "Neurons with Graded Responses Have Collective
+                 Computational Properties Like Those of Two-State
+                 Neurons",
+  journal =      PNAS,
+  volume =       "81",
+  year =         "1984",
+}
+
+@Article{Hopfield85,
+  author =       "J. J. Hopfield and D. W. Tank",
+  title =        "``Neural'' Computation of Decisions in Optimization
+                 Problems",
+  journal =      biocyb,
+  volume =       "52",
+  pages =        "141--152",
+  year =         "1985",
+}
+
+@Article{Hopfield86,
+  author =       "J. J. Hopfield and D. W. Tank",
+  title =        "Computing with Neural Circuits: {A} Model",
+  journal =      science,
+  volume =       "233",
+  pages =        "625--633",
+  year =         "1986",
+}
+
+@Article{Hopfield87,
+  author =       "J. J. Hopfield",
+  title =        "Learning Algorithms and Probability Distributions in
+                 Feed-Forward and Feed-Back Networks",
+  journal =      PNAS,
+  volume =       "84",
+  pages =        "8429--8433",
+  year =         "1987",
+}
+
+@InCollection{Hopfield89,
+  author =       "J. J. Hopfield and D. W. Tank",
+  editor =       "J. H. Byrne and W. O. Berry",
+  booktitle =    "Neural Models of Plasticity",
+  title =        "Neural Architecture and Biophysics for Sequence
+                 Recognition",
+  publisher =    "Academic Press",
+  address =      "San Diego",
+  pages =        "363--377",
+  year =         "1989",
+}
+
+@Article{Hornik89,
+  author =       "Kurt Hornik and Maxwell Stinchcombe and Halbert White",
+  title =        "Multilayer Feedforward Networks Are Universal
+                 Approximators",
+  journal =      nn,
+  volume =       "2",
+  pages =        "359--366",
+  year =         "1989",
+}
+
+@Article{Hotelling1933,
+  author =       "H. Hotelling",
+  title =        "Analysis of a Complex of Statistical Variables into
+                 Principal Components",
+  journal =      "Journal of Educational Psychology",
+  volume =       "24",
+  pages =        "417--441, 498--520",
+  year =         "1933",
+}
+
+@article{Hotelling-1936,
+    author = {H. Hotelling},
+    title = {Relations between two sets of variates},
+    journal = {Biometrika},
+    volume = 28,
+    pages = {321--377},
+    year = 1936,
+}
+
+@TechReport{Houde91,
+  author =       "J. F. Houde",
+  title =        "Recursive estimation of articulatory control",
+  type =         "Computational Cognitive Science",
+  number =       "TR",
+  institution =  "MIT",
+  address =      "Cambridge, MA",
+  year =         "1991",
+}
+
+@InProceedings{Howlett+Lawrence-1995a,
+  author =       "R. J. Howlett and D. H. Lawrence",
+  booktitle =    "World Transputer Congress~'95",
+  title =        "The Class-Distributed Neural Network",
+  address =      "Harrogate, UK",
+  year =         "1995",
+}
+
+@InProceedings{Howlett+Lawrence-1995b,
+  author =       "R. J. Howlett and D. H. Lawrence",
+  booktitle =    "Proceedings of the IEEE International Conference on
+                 Neural Networks",
+  title =        "A Multi-Computer Neural Network Applied to
+                 Machine-Vision",
+  volume =       "2",
+  address =      "Perth, Australia",
+  pages =        "1150--1153",
+  year =         "1995",
+}
+
+@InProceedings{Hsu88,
+  author =       "K. Hsu and D. Brady and D. Psaltis",
+  editor =       nips87ed,
+  booktitle =    nips87,
+  title =        "Experimental Demonstration of Optical Neural
+                 Computers",
+  publisher =    "American Institute of Physics, New York",
+  address =      "Denver, CO",
+  pages =        "377--386",
+  year =         "1988",
+}
+
+@Article{huang04dynamic,
+  author =       "X. Huang and F. Peng and A. An and D. Schuurmans",
+  title =        "Dynamic web log session identification with
+                 statistical language models",
+  journal =      "Journal of the American Society for Information
+                 Science and Technology",
+  volume =       "55",
+  number =       "14",
+  pages =        "1290--1303",
+  year =         "2004",
+}
+
+@Book{Huang87,
+  author =       "K. Huang",
+  title =        "Statistical Mechanics",
+  publisher =    "Wiley",
+  address =      "New York",
+  year =         "1987",
+}
+
+@InProceedings{Huang88,
+  author =       "W. Y. Huang and R. P. Lippmann",
+  editor =       nips87ed,
+  booktitle =    nips87,
+  title =        "Neural Net and Traditional Classifiers",
+  publisher =    "American Institute of Physics, New York",
+  address =      "Denver, CO",
+  pages =        "387--396",
+  year =         "1988",
+}
+
+@TechReport{Huang89,
+  author =       "X. D. Huang and H. W. Hon and K. F. Lee",
+  title =        "Multiple Codebook Semi-Continuous Hidden {Markov}
+                 Models for Speaker-Independent Continuous Speech
+                 Recognition",
+  number =       "CMU-CS-89-136",
+  institution =  "School of Computer Science Carnegie-Mellon
+                 University",
+  address =      "Pittburgh, Pensylvania",
+  month =        apr,
+  year =         "1989",
+}
+
+@InProceedings{Huang90,
+  author =       "Xuedong Huang and Kai-Fu Lee and Hsiao-Wuen Hon",
+  booktitle =    icassp,
+  title =        "On Semi-Continuous Hidden {Markov} Modeling",
+  pages =        "689--692",
+  year =         "1990",
+}
+
+@article{Hubel+Wiesel-1959,
+    title = {Receptive Fields of Single Neurons in the Cat's Striate Cortex},
+    author = {David H. Hubel and Torsten N. Wiesel},
+    journal = {Journal of Physiology},
+    pages = {574--591},
+    volume = {148},
+    year = {1959},
+    biburl = {http://www.bibsonomy.org/bibtex/202c5cf1ee910eadba5efa77b3cd043f6/idsia},
+}
+
+@Article{Hubel62,
+  author =       "D. H. Hubel and T. N. Wiesel",
+  title =        "Receptive Fields, Binocular Interaction, and Functional Architecture in the Cat's Visual Cortex",
+  journal =      jphysiol,
+  volume =       "160",
+  pages =        "106--154",
+  year =         "1962",
+}
+
+@article{Hubel+Wiesel-1968,
+ author = {D.H. Hubel and T.N. Wiesel},
+ title = {Receptive fields and functional architecture of monkey striate cortex},
+ journal = jphysiol,
+ volume = 195,
+ pages = {215--243},
+ year = 1968,
+}
+
+@article{Huber-1985,
+    author = {Huber, Peter  J. },
+    comment = {Projection Pursuit},
+    journal = {The Annals of Statistics},
+    number = {2},
+    pages = {435--475},
+    title = {Projection Pursuit},
+    url = {http://www.jstor.org/stable/2241175},
+    volume = {13},
+    year = {1985}
+}
+
+@InProceedings{Hueter88,
+  author =       "G. J. Hueter",
+  booktitle =    icnn,
+  title =        "Solution of the Travelling Salesman Problem with an
+                 Adaptive Ring",
+  volume =       "1",
+  publisher =    "IEEE, New York",
+  address =      "San Diego 1988",
+  pages =        "85--92",
+  year =         "1988",
+}
+
+@InProceedings{Hush88,
+  author =       "D. R. Hush and J. M. Salas",
+  booktitle =    icnn,
+  title =        "Improving the Learning Rate of Back-Propagation with
+                 the Gradient Reuse Algorithm",
+  volume =       "1",
+  publisher =    "IEEE, New York",
+  address =      "San Diego 1988",
+  pages =        "441--447",
+  year =         "1988",
+}
+
+@Article{Hush92,
+  author =       "D. R. Hush and B. Horne and J. M. Solas",
+  title =        "Error Surfaces for Multilayer Perceptrons",
+  journal =      ieeesmc,
+  volume =       "22",
+  number =       "5",
+  pages =        "1152--1161",
+  month =        sep,
+  year =         "1992",
+}
+
+@InCollection{Hutchins+Hazlehurst-02,
+  author =       "Edwin Hutchins and Brian Hazlehurst",
+  editor =       "A. Cangelosi and D. Parisi",
+  booktitle =    "Simulating the Evolution of Language",
+  title =        "Auto-organization and Emergence of Shared Language
+                 Structure",
+  publisher =    "London: Springer-Verlag",
+  pages =        "279--305",
+  year =         "2002",
+}
+
+@InCollection{Hutchins+Hazlehurst-95,
+  author =       "Edwin Hutchins and Brian Hazlehurst",
+  editor =       "N. Gilbert and R. Conte",
+  booktitle =    "Artificial Societies: the computer simulation of
+                 social life",
+  title =        "How to invent a lexicon: the development of shared
+                 symbols in interaction",
+  publisher =    "London: UCL Press",
+  pages =        "157--189",
+  year =         "1995",
+}
+
+@Article{Hutchinson94,
+  author =       "J. M. Hutchinson and A. W. Lo and T. Poggio",
+  title =        "{A Nonparametric Approach to Pricing and Hedging
+                 Derivative Securities Via Learning Networks}",
+  journal =      "Journal of Finance",
+  volume =       "49",
+  number =       "3",
+  pages =        "851--889",
+  year =         "1994",
+}
+
+@Book{Hutter2005,
+  author =       "Marcus Hutter",
+  title =        "Universal Artificial Intelligence: Sequential
+                 Decisions based on Algorithmic Probability",
+  publisher =    "Springer, Berlin",
+  year =         "2005",
+}
+
+@Article{Hwang+al-1992,
+  author =       "Frank K. Hwang and Dana Richards and Pawel Winter",
+  title =        "The {Steiner} Tree Problem",
+  journal =      "Annals of Discrete Mathematics",
+  volume =       "53",
+  publisher =    "Elsevier",
+  address =      "Amsterdam",
+  year =         "1992",
+}
+
+@article{Hyvarinen-1999,
+    author = {Hyv\"arinen, A. },
+    journal = {Neural Computing Surveys},
+    keywords = {ica, separation, waspaa07bib},
+    pages = {94--128},
+    title = {Survey on Independent Component Analysis},
+    url = {http://citeseer.ist.psu.edu/223687.html},
+    volume = {2},
+    year = {1999}
+}
+
+@book{Hyvarinen-2001,
+    author = {Hyv{\"{a}}rinen, Aapo   and Karhunen, Juha   and Oja, Erkki  },
+    howpublished = {Hardcover},
+    isbn = {047140540X},
+    month = {May},
+    posted-at = {2008-07-02 02:13:00},
+    priority = {2},
+    publisher = {Wiley-Interscience},
+    title = {Independent Component Analysis},
+    url = {http://www.amazon.ca/exec/obidos/redirect?tag=citeulike09-20\&path=ASIN/047140540X},
+    year = {2001}
+}
+
+@Article{Hyvarinen+al-01,
+  author =       "Aapo Hyv{\"{a}}rinen and Patrik O. Hoyer and Mika
+                 Inki",
+  title =        "Topographic Independent Component Analysis",
+  journal =      "Neural Computation",
+  volume =       "13",
+  number =       "7",
+  pages =        "1527--1558",
+  year =         "2001",
+}
+
+@Article{HyvarinenA2001,
+  author =       "Aapo Hyv{\"{a}}rinen and Patrik O. Hoyer and Mika O.
+                 Inki",
+  title =        "Topographic Independent Component Analysis",
+  journal =      "Neural Computation",
+  volume =       "13",
+  number =       "7",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA, USA",
+  pages =        "1527--1558",
+  year =         "2001",
+  ISSN =         "0899-7667",
+}
+
+@Article{HyvarinenA2001-small,
+  author =       "Aapo Hyv{\"{a}}rinen and Patrick O. Hoyer and Mika O. Inki",
+  title =        "Topographic Independent Component Analysis",
+  journal =      "Neural Computation",
+  volume =       "13",
+  number =       "7",
+  pages =        "1527--1558",
+  year =         "2001",
+}
+
+@Article{Hyvarinen-2005,
+  author =       "Aapo Hyv{\"{a}}rinen ",
+  title =        "Estimation of non-normalized statistical models using score matching",
+  journal =      jmlr,
+  volume =       "6",
+  pages =        "695--709",
+  year =         "2005",
+}
+
+@Article{Hyvarinen-2007,
+  author =       "Aapo Hyv{\"{a}}rinen ",
+  title =        "Some extensions of score matching",
+  journal =      "Computational Statistics and Data Analysis",
+  volume =       "51",
+  pages =        "2499--2512",
+  year =         "2007",
+}
+
+@Article{Hyvarinen-2007b,
+  author =       "Aapo Hyv{\"{a}}rinen ",
+  title =        "Connections between score matching, contrastive divergence, and pseudolikelihood
+                  for continuous-valued variables",
+  journal =      "{IEEE} Transactions on Neural Networks",
+  volume =       "18",
+  pages =        "1529--1531",
+  year =         "2007",
+}
+
+@article{HyvarinenA2008,
+ author = {Hyv\"{a}rinen,, Aapo},
+ title = {Optimal approximation of signal priors},
+ journal = {Neural Computation},
+ volume = {20},
+ number = {12},
+ year = {2008},
+ pages = {3087--3110},
+ publisher = {MIT Press},
+ address = {Cambridge, MA, USA},
+ }
+
+@article{kording2004,
+author={Konrad P. K{\"o}rding and Christoph Kayser and Wolfgang
+Einh{\"a}user and Peter K{\"o}nig},
+title = "How Are Complex Cell Properties Adapted to the Statistics of
+Natural Stimuli?",
+year = 2004,
+journal = "Journal of Neurophysiology",
+volume = 91,
+pages = {206--212},
+url="jn.physiology.org/cgi/reprint/91/1/206.pdf"
+}
+
+@inproceedings{Koster-Hyvarinen-2007,
+  author = {Urs K{\"{o}}ster and Aapo Hyv{\"{a}}rinen},
+ title = {A two-layer {ICA}-like model estimated by {S}core {M}atching},
+ booktitle = {Int. Conf. Artificial Neural Networks (ICANN'2007)},
+ pages = {798--807},
+ year = 2007,
+}
+
+@article{Iba-2001,
+  author =       "Yukito Iba",
+  title =        "Extended Ensemble Monte Carlo",
+  journal =      "International Journal of Modern Physics",
+  volume =       "C12",
+  pages =        "623--656",
+  year =         "2001",
+}
+
+@InProceedings{icml2009_093,
+  author =    {Hossein Mobahi and Ronan Collobert and Jason Weston},
+  title =     {Deep Learning from Temporal Coherence in Video},
+  booktitle = {Proceedings of the 26th International Conference on Machine Learning},
+  pages =     {737--744},
+  year =      2009,
+  editor =    {L\'{e}on Bottou and Michael Littman},
+  address =   {Montreal},
+  month =     {June},
+  publisher = {Omnipress}
+}
+
+@InProceedings{icann:Holger+Yoshua:1997,
+  author =       "Holger Schwenk and Yoshua Bengio",
+  booktitle =    "International Conference on Artificial Neural
+                 Networks",
+  title =        "{AdaBoosting} Neural Networks: Application to on-line
+                 Character Recognition",
+  publisher =    "Springer Verlag",
+  pages =        "967--972",
+  year =         "1997",
+}
+
+@Article{Ide1998,
+  author =       "Nancy Ide and Jean Veronis",
+  title =        "Introduction to the Special Issue on Word Sense
+                 Disambiguation: The State of the Art",
+  journal =      "Computational Linguistics",
+  volume =       "24",
+  number =       "1",
+  pages =        "1--40",
+  year =         "1998",
+}
+
+@Article{IEEE-KDE:Frasconi95,
+  author =       "P. Frasconi and M. Gori and M. Maggini and G. Soda",
+  title =        "Unified Integration of Explicit Rules and Learning by
+                 Example in Recurrent Networks",
+  journal =      "IEEE Transactions on Knowledge and Data Engineering",
+  volume =       "7",
+  number =       "2",
+  pages =        "340--346",
+  year =         "1995",
+  OPTmonth =     "",
+}
+
+@Article{igel05,
+  author =       "C. Igel and M. Toussaint and W. Weishui",
+  title =        "Rprop using the natural gradient compared to
+                 Levenberg-Marquardt optimization",
+  journal =      "Trends and Applications in Constructive Approximation.
+                 International Series of Numerical Mathematics.",
+  volume =       "151",
+  publisher =    "Birkhäuser Verlag",
+  pages =        "259--272",
+  year =         "2005",
+}
+
+@Article{intrator,
+  author =       "Nathan Intrator and Shimon Edelman",
+  title =        "How to make a low-dimensional representation suitable
+                 for diverse tasks",
+  journal =      "Connection Science, Special issue on Transfer in
+                 Neural Networks",
+  volume =       "8",
+  pages =        "205--224",
+  year =         "1996",
+}
+
+@Article{intrator96,
+  author =       "Nathan Intrator and Shimon Edelman",
+  title =        "How to make a low-dimensional representation suitable
+                 for diverse tasks",
+  journal =      "Connection Science, Special issue on Transfer in
+                 Neural Networks",
+  volume =       "8",
+  pages =        "205--224",
+  year =         "1996",
+}
+
+@Article{Inzenman-91,
+  author =       "A. J. Inzenman",
+  title =        "Recent developments in nonparametric density
+                 estimation",
+  journal =      "Journal of the American Statistical Association",
+  volume =       "86",
+  number =       "413",
+  pages =        "205--224",
+  year =         "1991",
+}
+
+@TechReport{IOHMM-TR,
+  author =       "Y. Bengio and P. Frasconi",
+  title =        "An {EM} Approach to Learning Sequential Behavior",
+  number =       "RT-DSI-11/94",
+  institution =  "University of Florence",
+  year =         "1994",
+}
+
+@InProceedings{Irie88,
+  author =       "B. Irie and S. Miyake",
+  booktitle =    "IEEE Second International Conference on Neural
+                 Networks, San Diego",
+  title =        "Capabilities of three layer perceptrons",
+  year =         "1988",
+}
+
+@Article{Irino+Kawahara90,
+  author =       "T. Irino and H. Kawahara",
+  title =        "A Method for Designing Neural Networks Using Nonlinear
+                 Multivariate Analysis: Application to
+                 Speaker-Independent Vowel Recognition",
+  journal =      "Neural Computation",
+  volume =       "2",
+  type =         "Letter",
+  number =       "3",
+  pages =        "386--397",
+  year =         "1990",
+}
+
+@article{ItoM2004,
+	author = {Ito, Minami   and Komatsu, Hidehiko  },
+	citeulike-article-id = {451606},
+	doi = {http://dx.doi.org/10.1523/JNEUROSCI.4364},
+	journal = {Journal of Neuroscience},
+	keywords = {cnv, v2},
+	month = {March},
+	number = {13},
+	pages = {3313--3324},
+	posted-at = {2007-03-30 11:19:11},
+	priority = {0},
+	title = {Representation of Angles Embedded within Contour Stimuli in Area V2 of Macaque Monkeys},
+	volume = {24},
+	year = {2004}
+}
+	%url = {http://dx.doi.org/10.1523/JNEUROSCI.4364},
+
+@Article{Jaakkola+Jordan99,
+  author =       "T. Jaakkola and M. I. Jordan",
+  title =        "Varitional methods and the {QMR}-{DT} database",
+  journal =      "Journal of Artificial Intelligence",
+  volume =       "10",
+  pages =        "291--322",
+  year =         "1999",
+}
+
+%I deprecated because the year in the tag is wrong
+@InProceedings{Jaakkola98,
+  author =       "Tommi S. Jaakkola and David Haussler",
+  editor =       NIPS11ed,
+  booktitle =    NIPS11,
+  title =        "Exploiting generative models in discriminative
+                 classifiers",
+  publisher =    "MIT Press, Cambridge, MA",
+  pages =        "487--493",
+  year =         "1999",
+}
+
+@InProceedings{Jaakkola99,
+  author =       "Tommi S. Jaakkola and David Haussler",
+  editor =       NIPS11ed,
+  booktitle =    NIPS11,
+  title =        "Exploiting generative models in discriminative
+                 classifiers",
+  publisher =    "MIT Press, Cambridge, MA",
+  pages =        "487--493",
+  year =         "1999",
+}
+
+@Misc{jaakkola98exploiting,
+  author =       "T. Jaakkola and D. Haussler",
+  title =        "Exploiting generative models in discriminative
+                 classifiers",
+  year =         "1998",
+  note =         "Preprint, Dept.of Computer Science, Univ. of California. 
+                  A shorter version is in Advances in Neural
+                  Information Processing Systems 11",
+  howpublished = "Available from http://www.cse.ucsc.edu/~haussler/pubs.html",
+}
+
+@Article{Jacobs-nc91,
+  author =       "R. A. Jacobs and M. I. Jordan and S. J. Nowlan and G.
+                 E. Hinton",
+  title =        "Adaptive mixture of local experts",
+  journal =      "Neural Computation",
+  volume =       "3",
+  pages =        "79--87",
+  year =         "1991",
+}
+
+@InCollection{Jacobs-nips91,
+  author =       "R. A. Jacobs and M. I. Jordan",
+  editor =       NIPS3ed,
+  booktitle =    NIPS3,
+  title =        "A competitive modular connectionist architecture",
+  publisher =    "Morgan Kaufman Publishers",
+  address =      "San Mateo, CA",
+  year =         "1991",
+}
+
+@TechReport{Jacobs-tr90,
+  author =       "R. A. Jacobs and M. I. Jordan and A. G. Barto",
+  title =        "Task Decomposition Through Competition in a Modular
+                 Connectionist Architecture: The {What} and {Where}
+                 Vision Tasks",
+  number =       "COINS 90-27",
+  institution =  "MIT",
+  address =      "Cambridge MA",
+  year =         "1990",
+}
+
+@Article{Jacobs88,
+  author =       "R. A. Jacobs",
+  title =        "Increased Rates of Convergence Through Learning Rate
+                 Adaptation",
+  journal =      nn,
+  volume =       "1",
+  pages =        "295--307",
+  year =         "1988",
+}
+
+@Article{Jacobs91a,
+  author =       "Robert A. Jacobs and Michael I. Jordan and Steven J.
+                 Nowlan and Geoffrey E. Hinton",
+  title =        "Adaptive Mixtures of Local Experts",
+  journal =      nc,
+  volume =       "3",
+  pages =        "79--87",
+  year =         "1991",
+}
+
+@Article{Jacobs91b,
+  author =       "R. A. Jacobs and M. I. Jordan and A. G. Barto",
+  title =        "Task Decomposition Through Competition in a Modular
+                 Connectionist Architecture: The What and Where Vision
+                 Task",
+  journal =      "Cognitive Science",
+  volume =       "15",
+  pages =        "219--250",
+  year =         "1991",
+}
+
+@Article{Jacobs94,
+  author =       "R. A. Jacobs and S. M. Kosslyn",
+  title =        "Encoding Shape and Spatial Relations: The Role of
+                 Receptive Fields in Coordinating Complementary
+                 Representations",
+  journal =      "Cognitive Science",
+  year =         "1994",
+}
+
+@article{Jaeger-2007,
+    author = {Herbert Jaeger},
+    title = {Echo state network},
+    year = 2007,
+    journal = {Scholarpedia},
+    volume = 2,
+    number = 9,
+    pages = 2330,
+}
+
+@Article{Japkowicz2000,
+  author =       "Nathalie Japkowicz and Stephen J. Hanson and Mark A.
+                 Gluck",
+  title =        "Nonlinear Autoassociation is not Equivalent to {PCA}",
+  journal =      "Neural Computation",
+  volume =       "12",
+  number =       "3",
+  pages =        "531--545",
+  year =         "2000",
+}
+
+@Article{Japkowicz2002,
+  author =       "N. Japkowicz and S. Stephen",
+  title =        "The Class Imbalance Problem: {A} Systematic Study",
+  journal =      "Intelligent Data Analysis",
+  volume =       "6",
+  number =       "5",
+  year =         "2002",
+}
+
+@inproceedings {Jarrett-ICCV2009,
+ original = "orig/jarrett-iccv-09.pdf",
+ title = "What is the Best Multi-Stage Architecture for Object Recognition?",
+ author = "Jarrett, Kevin and Kavukcuoglu, Koray and Ranzato, {Marc'Aurelio} and {LeCun}, Yann",
+ booktitle = "Proc. International Conference on Computer Vision (ICCV'09)",
+ publisher = "IEEE",
+ year = "2009"
+}
+
+@TechReport{Jauvin+Bengio-TR2003,
+  author =       "Christian Jauvin and Yoshua Bengio",
+  title =        "A Sense-Smoothed Bigram Language Model",
+  number =       "1233",
+  institution =  "Dept. IRO, Universit\'e de Montr\'eal",
+  year =         "2003",
+}
+
+@Book{Jaynes03,
+  author =       "E. T. Jaynes",
+  title =        "{Probability} {Theory}: {The} {Logic} of {Science}",
+  publisher =    "Cambridge University Press",
+  year =         "2003",
+}
+
+@InCollection{Jaynes83,
+  author =       "E. T. Jaynes",
+  booktitle =    "Papers on Probability, Statistics and Statistical
+                 Physics",
+  title =        "{Bayesian} intervals versus confidence intervals",
+  publisher =    "Kluwer",
+  year =         "1983",
+  editors =      "R. D. Rosencrantz",
+}
+
+@Article{JCB:Baldi95t,
+  author =       "Y. Chauvin and P. Baldi",
+  title =        "Hidden Markov models of the {G}-Protein-Coupled
+                 receptor family",
+  journal =      "Journal of Computational Biology",
+  year =         "1995",
+}
+
+@InProceedings{jebara03,
+  author =       "Tony Jebara and Risi Kondor",
+  booktitle =    colt03,
+  title =        "{Bhattacharyya and Expected Likelihood Kernels}",
+  year =         "2003",
+}
+
+@InProceedings{Jebara03Convex,
+  author =       "T. Jebara",
+  editor =       "",
+  booktitle =    "Proceedings of AISTATS 2003",
+  title =        "Convex Invariance Learning",
+  publisher =    "",
+  pages =        "",
+  year =         "2003",
+}
+
+@InProceedings{jebara04,
+  author =       "Tony Jebara",
+  booktitle =    ICML04,
+  editor =       ICML04ed,
+  publisher =    ICML04publ,
+  title =        "{Multi-task feature and kernel selection for SVMs}",
+  address =      "New York, NY, USA",
+  year =         "2004",
+  location =     "Banff, Alberta, Canada",
+}
+
+@Book{JebaraT2003,
+  author =       "Tony Jebara",
+  title =        "Machine Learning: Discriminative and Generative (The
+                 Kluwer International Series in Engineering and Computer
+                 Science)",
+  howpublished = "Hardcover",
+  publisher =    "Springer",
+  month =        dec,
+  year =         "2003",
+  citeulike-article-id = "134203",
+  comment =      "- maximum entropy discriminative as unification of
+                 discriminative and generative approaches",
+  keywords =     "book, generative-discriminative, svm",
+  priority =     "2",
+}
+  %ISBN =         "1402076479",
+  %URL =          "http://www.amazon.ca/exec/obidos/redirect?tag=citeulike09-20&path=ASIN/1402076479",
+
+@InCollection{Jelinek+Mercer80,
+  author =       "F. Jelinek and R. L. Mercer",
+  editor =       "E. S. Gelsema and L. N. Kanal",
+  booktitle =    "Pattern Recognition in Practice",
+  title =        "Interpolated estimation of Markov source parameters
+                 from sparse data",
+  publisher =    "North-Holland, Amsterdam",
+  year =         "1980",
+}
+
+@InProceedings{Jelinek-Chelba-99,
+  author =       "Frederick Jelinek and Ciprian Chelba",
+  booktitle =    "European Conference on Speech Communication and
+                 Technology",
+  title =        "Putting language into language modeling",
+  volume =       "1",
+  address =      "Budapest",
+  pages =        "KN1--KN5",
+  year =         "1999",
+}
+
+@Article{Jelinek76,
+  author =       "F. Jelinek",
+  title =        "Continuous speech recognition by statistical methods",
+  journal =      "Proceedings of the IEEE",
+  volume =       "64",
+  pages =        "532--556",
+  year =         "1976",
+}
+
+@InCollection{Jelinek80,
+  author =       "F. Jelinek and R. L. Mercer",
+  editor =       "E. S. Gelsema and L. N. Kanal",
+  booktitle =    "Pattern Recognition in Practice",
+  title =        "Interpolated Estimation of {Markov} Source Parameters
+                 from Sparse Data",
+  publisher =    "North-Holland",
+  address =      "Amsterdam",
+  year =         "1980",
+  copy =         yes,
+}
+
+@Book{Jelinek98,
+  author =       "F. Jelinek",
+  title =        "Statistical Methods for Speech Recognition",
+  publisher =    "MIT Press",
+  address =      "Cambridge, Massachussetts",
+  year =         "1998",
+}
+
+@InProceedings{JensenRiis2000,
+  author =       "K. J. Jensen and S. Riis",
+  booktitle =    "International Conference on Spoken Language
+                 Processing",
+  title =        "Self-organizing letter code-book for text-to-phoneme
+                 neural network model",
+  volume =       "3",
+  pages =        "318--321",
+  year =         "2000",
+}
+
+@InProceedings{Jeong96,
+  author =       "E. Jeong and K. Furuta and S. Kondo",
+  booktitle =    nipc-hmit96,
+  title =        "Identification of Transient in Nuclear Power Plant
+                 using Adaptive Template Matching with Neural Network",
+  volume =       "1",
+  publisher =    ans,
+  pages =        "243--250",
+  year =         "1996",
+}
+
+@InCollection{joachims99largescaleSVM,
+  author =       "T. Joachims",
+  editor =       "B. {Sch\"olkopf} and C. J. C. Burges and A. J. Smola",
+  booktitle =    "Advances in Kernel Methods --- Support Vector
+                 Learning",
+  title =        "Making large-Scale {SVM} Learning Practical",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  year =         "1999",
+}
+
+@InProceedings{joachims99transductive,
+  author =       "Thorsten Joachims",
+  booktitle =    ICML99,
+  editor =       ICML99ed,
+  publisher =    ICML99publ,
+  title =        "Transductive Inference for Text Classification using
+                 Support Vector Machines",
+  address =      "Bled, SL",
+  pages =        "200--209",
+  year =         "1999",
+}
+  %URL =          "citeseer.ist.psu.edu/joachims99transductive.html",
+
+@TechReport{Johansson90,
+  author =       "E. M. Johansson and F. U. Dowla and D. M. Goodman",
+  title =        "Backpropagation learning for multi-layer feed-forward
+                 neural networks using the conjugate gradient method",
+  number =       "UCRL-JC-104850",
+  institution =  "Lawrence Livermore National Laboratory",
+  month =        sep,
+  year =         "1990",
+}
+
+@inproceedings{John+al-1994,
+    author = {John, George  H.  and Kohavi, Ron  and Pfleger, Karl},
+    booktitle = {Proceedings of the Eleventh International Conference on Machine Learning},
+    pages = {121--129},
+    title = {Irrelevant Features and the Subset Selection Problem},
+    url = {http://citeseer.ist.psu.edu/john94irrelevant.html},
+    year = {1994},
+    publisher = {Morgan Kaufmann},
+}
+
+@Article{Johnson89,
+  author =       "D. S. Johnson and C. R. Aragon and L. A. McGeoch and
+                 C. Schevon",
+  title =        "Optimization by Simulated Annealing: An Experimental
+                 Evaluation; Part {I}, Graph Partitioning",
+  journal =      opres,
+  volume =       "37",
+  pages =        "865--891",
+  year =         "1989",
+}
+
+@InProceedings{Joines92QQ23,
+  author =       "J. A. Joines and M. W. White",
+  booktitle =    "IJCNN",
+  title =        "Improved Generalization Using Robust Cost Functions",
+  address =      "Baltimore, Maryland",
+  pages =        "911--918",
+  month =        jun,
+  year =         "1992",
+  ref =          "QQ23",
+}
+
+@Book{Jolliffe86,
+  author =       "Ian T. Jolliffe",
+  title =        "Principal Component Analysis",
+  publisher =    "Springer-Verlag",
+  address =      "New York",
+  year =         "1986",
+}
+
+@book{Jolliffe-2002,
+    author = {Ian T. Jolliffe},
+    citeulike-article-id = {1154147},
+    howpublished = {Hardcover},
+    isbn = {0387954422},
+    month = {October},
+    posted-at = {2007-03-11 15:04:57},
+    priority = {2},
+    publisher = {Springer},
+    title = {Principal Component Analysis},
+    url = {http://www.amazon.ca/exec/obidos/redirect?tag=citeulike09-20\&path=ASIN/0387954422},
+    year = {2002}
+}
+
+@Article{Jordan+Jacobs94,
+  author =       "M. I. Jordan and R. A. Jacobs",
+  title =        "Hierarchical mixtures of experts and the {E}{M}
+                 algorithm",
+  journal =      nc,
+  volume =       "6",
+  pages =        "181--214",
+  year =         "1994",
+}
+
+@TechReport{Jordan+Xu93,
+  author =       "Michael I. Jordan and L. Xu",
+  title =        "Convergence results for the {EM} approach to mixtures
+                 of experts architecture",
+  number =       "9303",
+  institution =  "MIT Computational Cognitive Science",
+  month =        sep,
+  year =         "1993",
+}
+
+@Article{Jordan-cs92,
+  author =       "M. I. Jordan and D. E. Rumelhart",
+  title =        "Forward models: Supervised learning with a distal
+                 teacher",
+  journal =      "Cognitive Science",
+  volume =       "16",
+  pages =        "307--354",
+  year =         "1992",
+}
+
+@InProceedings{Jordan-HMDT97,
+  author =       "M. Jordan and Z. Ghahramani and L. Saul",
+  editor =       NIPS9ed,
+  booktitle =    NIPS9,
+  title =        "Hidden Markov decision trees",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  pages =        "",
+  year =         "1997",
+}
+
+@InProceedings{Jordan-nips92,
+  author =       "M. I. Jordan and R. A. Jacobs",
+  editor =       NIPS4ed,
+  booktitle =    NIPS4,
+  title =        "Hierarchies of adaptive experts",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo, CA",
+  pages =        "985--992",
+  year =         "1992",
+}
+
+@TechReport{Jordan-tr86,
+  author =       "M. I. Jordan",
+  title =        "Serial Order: a Parallel Distributed Processing
+                 Approach",
+  number =       "8604",
+  institution =  "ICS (Institute for Cognitive Science, University of
+                 California)",
+  year =         "1986",
+}
+
+@InProceedings{Jordan86,
+  author =       "M. I. Jordan",
+  booktitle =    "Proceedings of the Eighth Annual Conference of the
+                 Cognitive Science Society",
+  title =        "Attractor Dynamics and Parallelism in a Connectionist
+                 Sequential Machine",
+  publisher =    "Lawrence Erlbaum, Hillsdale",
+  address =      "Amherst 1986",
+  pages =        "531--546",
+  year =         "1986",
+}
+
+@TechReport{Jordan88,
+  author =       "M. I. Jordan",
+  title =        "Supervised Learning and Systems with Excess Degrees of
+                 Freedom",
+  number =       "COINS Technical Report 88-27",
+  institution =  "MIT",
+  address =      "Cambridge MA",
+  year =         "1988",
+}
+
+@InCollection{Jordan89,
+  author =       "M. I. Jordan",
+  editor =       "J. L. Elman and D. E. Rumelhart",
+  booktitle =    "Advances in Connectionist Theory: Speech",
+  title =        "Serial Order: {A} Parallel, Distributed Processing
+                 Approach",
+  publisher =    "Lawrence Erlbaum",
+  address =      "Hillsdale",
+  year =         "1989",
+}
+
+@InProceedings{Jordan89b,
+  author =       "M. I. Jordan",
+  editor =       "G. Hinton and D. S. Touretzky",
+  booktitle =    "Proceedings of the 1988 Connectionist Models Summer
+                 School",
+  title =        "Supervised learning and systems with excess degrees of
+                 freedom",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo, CA",
+  year =         "1989",
+}
+
+@InCollection{Jordan90,
+  author =       "M. I. Jordan",
+  editor =       "M. Jeannerod",
+  booktitle =    "Attention and Performance XIII",
+  title =        "Motor learning and the degrees of freedom problem",
+  publisher =    "Hillsdale, NJ: Erlbaum",
+  year =         "1990",
+}
+
+@Book{Jordan98,
+  author =       "M. I. Jordan",
+  title =        "Learning in Graphical Models",
+  publisher =    "Kluwer",
+  address =      "Dordrecht, Netherlands",
+  year =         "1998",
+}
+
+@Article{Jour:Freund:AdaBoostDetailed,
+  author =       "Yoav Freund and Robert E. Schapire",
+  title =        "A decision theoretic generalization of on-line
+                 learning and an application to Boosting",
+  journal =      "Journal of Computer and System Science",
+  volume =       "55",
+  number =       "1",
+  pages =        "119--139",
+  year =         "1997",
+}
+
+@Article{Jour:Freund:boost,
+  author =       "Yoav Freund",
+  title =        "Boosting a weak learning algorithm by majority",
+  journal =      "Information and Computation",
+  volume =       "121",
+  number =       "2",
+  pages =        "256--285",
+  year =         "1995",
+}
+
+@Article{Jour-Freund-AdaBoostDetailed,
+  author =       "Yoav Freund and Robert E. Schapire",
+  title =        "A decision theoretic generalization of on-line
+                 learning and an application to Boosting",
+  journal =      "Journal of Computer and System Science",
+  volume =       "55",
+  number =       "1",
+  pages =        "119--139",
+  year =         "1997",
+}
+
+@PhdThesis{Jouvet88,
+  author =       "D. Jouvet",
+  title =        "Reconnaissance de Mots Connectes Independamment du
+                 Locuteur par des Methodes Statistiques",
+  number =       "NST-88E006",
+  school =       "Ecole National Superieure des Telecommunications",
+  year =         "1988",
+}
+
+@inproceedings{JuanA2001,
+ author = {Alfons Juan and Enrique Vidal},
+ title = {On the use of Bernoulli Mixture Models for Text Classification},
+ booktitle = {PRIS '01: Proceedings of the 1st International Workshop on Pattern Recognition in Information Systems},
+ year = {2001},
+ pages = {118--126},
+ publisher = {ICEIS Press},
+ }
+
+@inproceedings{JuanA2004,
+ author = {Alfons Juan and Enrique Vidal},
+ title = {Bernoulli Mixture Models for Binary Images},
+ booktitle = {ICPR '04: Proceedings of the Pattern Recognition, 17th International Conference on (ICPR'04) Volume 3},
+ year = {2004},
+ pages = {367--370},
+ publisher = {IEEE Computer Society},
+ address = {Washington, DC, USA},
+ }
+
+@Article{Juang92,
+  author =       "B. H. Juang and S. Katagiri",
+  title =        "Discriminative learning for minimum error
+                 classification",
+  journal =      "IEEE Transactions on Signal Processing",
+  volume =       "40",
+  number =       "12",
+  pages =        "3043--3054",
+  year =         "1992",
+}
+
+@Article{Judd88,
+  author =       "S. Judd",
+  title =        "On the complexity of loading shallow neural networks",
+  journal =      "Journal of Complexity",
+  volume =       "4",
+  pages =        "177--192",
+  year =         "1988",
+}
+
+@Book{JuddBook,
+  author =       "J. S. Judd",
+  title =        "Neural Network Design and the Complexity of Learning",
+  publisher =    "MIT press",
+  year =         "1989",
+}
+
+@book{Jurafsky+Martin-2008,
+    author = {Jurafsky, Daniel and Martin, James  H.},
+    howpublished = {Hardcover},
+    month = {January},
+    publisher = {Prentice Hall},
+    edition = 2,
+    title = {Speech and Language Processing: An Introduction to Natural Language Processing, Computational Linguistics and Speech Recognition},
+    year = {2008}
+}
+
+@Article{Jutten+Herault-91,
+  author =       "Christian Jutten and Jeanny Herault",
+  title =        "Blind separation of sources, part {I}: an adaptive
+                 algorithm based on neuromimetic architecture",
+  journal =      "Signal Processing",
+  volume =       "24",
+  pages =        "1--10",
+  year =         "1991",
+}
+
+@InProceedings{Kahng89,
+  author =       "A. B. Kahng",
+  booktitle =    ijcnn,
+  title =        "Travelling Salesman Heuristics and Embedding Dimension
+                 in the Hopfield Model",
+  volume =       "1",
+  publisher =    "IEEE, New York",
+  address =      "Washington 1989",
+  pages =        "513--520",
+  year =         "1989",
+}
+
+@InProceedings{kai03,
+  author =       "Yu Kai and Schwaighofer Anton and Tresp Volker and Ma
+                 Wei-Ying and Zhang HongJiang",
+  booktitle =    UAI03,
+  title =        "Collaborative Ensemble Learning: Combining
+                 Collaborative and Content-Based Information Filtering
+                 via Hierarchical Bayes",
+  publisher =    "Morgan Kaufmann Publishers",
+  address =      "San Francisco, CA",
+  pages =        "616--623",
+  year =         "2003",
+}
+
+@Article{Kalman61,
+  author =       "R. Kalman and R. S. Bucy",
+  title =        "New results in linear filtering and prediction",
+  journal =      "Journal of Basic Engineering (ASME)",
+  volume =       "83D",
+  pages =        "95--108",
+  year =         "1961",
+}
+
+@article{Kambhatla+Leen-1997,
+    author = {Kambhatla, N.  and Leen, T. K. },
+    journal = {Neural Computation},
+    pages = {1493--1516},
+    title = {Dimension Reduction by Local Principal Component Analysis},
+    volume = {9},
+    year = {1997}
+}
+
+@Article{Kammen88,
+  author =       "D. M. Kammen and A. L. Yuille",
+  title =        "Spontaneous Symmetry-Breaking Energy Functions and the
+                 Emergence of Orientation Selective Cortical Cells",
+  journal =      biocyb,
+  volume =       "59",
+  pages =        "23--31",
+  year =         "1988",
+}
+
+@InProceedings{Kammerer89,
+  author =       "B. K. Kammerer and W. A. Kupper",
+  booktitle =    ijcnn,
+  title =        "Design of Hierarchical Perceptron Structures and their
+                 Application to the Task of Isolated Word Recognition",
+  address =      "Washington D.C.",
+  year =         "1989",
+}
+
+@Book{Kandel85,
+  author =       "E. R. Kandel and J. H. Schwartz",
+  title =        "Principles of Neural Science",
+  publisher =    "Elsevier",
+  address =      "New York",
+  edition =      "2",
+  year =         "1985",
+}
+
+@Article{Kanter87,
+  author =       "I. Kanter and H. Sompolinsky",
+  title =        "Associative Recall of Memory Without Errors",
+  journal =      prA,
+  volume =       "35",
+  pages =        "380--392",
+  year =         "1987",
+}
+
+@inproceedings{KarklinY2003,
+  author    = {Yan Karklin and
+               Michael S. Lewicki},
+  title     = {A Model for Learning Variance Components of Natural Images},
+  year      = {2003},
+  pages     = {1367-1374},
+  editor =       NIPS15ed,
+  booktitle =    NIPS15,
+  publisher =    "{MIT} Press",
+}
+
+@Article{Karmin90,
+  author =       "E. D. Karmin",
+  title =        "A simple procedure for pruning back-propagation
+                 trained neural networks",
+  journal =      ieeetrnn,
+  volume =       "1",
+  number =       "2",
+  pages =        "239--242",
+  year =         "1990",
+}
+
+@Article{Karplus97,
+  author =       "K. Karplus and K. Sjolander and C. Barrett and M.
+                 Cline and D. Haussler and R. Hughey and L. Holm and C.
+                 Sander",
+  title =        "Predicting protein structure using hidden Markov
+                 models",
+  journal =      "Proteins: Structure, Function and Genetics",
+  volume =       "S 1",
+  number =       "1",
+  pages =        "134--139",
+  year =         "1997",
+}
+
+@PhdThesis{KasselR1995,
+  author = 	 {Robert Kassel},
+  title = 	 {A Comparison of Approaches to On-line Handwritten Character Recognition},
+  school = 	 {MIT Spoken Language Systems Group},
+  year = 	 {1995},
+}
+
+@Article{Katz87,
+  author =       "Slava M. Katz",
+  title =        "Estimation of Probabilities from Sparse Data for the
+                 Language Model Component of a Speech Recognizer",
+  journal =      "IEEE Transactions on Acoustics, Speech, and Signal
+                 Processing",
+  volume =       "ASSP-35",
+  number =       "3",
+  pages =        "400--401",
+  month =        mar,
+  year =         "1987",
+}
+
+@InCollection{Kaul,
+  author =       "G. Kaul",
+  editor =       "G. S. Maddala and C. R. Rao",
+  booktitle =    "Handbook of Statistics, Vol. 14",
+  title =        "Predictable Components in Stock Returns",
+  publisher =    "Elsevier Science",
+  pages =        "269--296",
+  year =         "1996",
+}
+
+@InProceedings{kbnn-craven.mlc93,
+  author =       "Mark W. Craven and Jude W. Shavlik",
+  booktitle =    "Proceedings of the Tenth International Conference on
+                 Machine Learning",
+  title =        "Learning Symbolic Rules Using Artificial Neural
+                 Networks",
+  publisher =    "Morgan Kaufmann",
+  address =      "Amherst, MA",
+  pages =        "73--80",
+  year =         "1993",
+}
+
+@InProceedings{kbnn-maclin.aaai92,
+  author =       "R. Maclin and J. Shavlik",
+  booktitle =    "Proceedings of the Tenth National Conference on
+                 Artificial Intelligence",
+  title =        "Using Knowledge-Based Neural Networks to Improve
+                 Algorithms: Refining the Chou-Fasman Algorithm for
+                 Protein Folding",
+  address =      "San Jose, CA",
+  pages =        "165--170",
+  year =         "1992",
+}
+
+@TechReport{kbnn-maclin.mlrgwp91,
+  author =       "R. Maclin and J. W. Shavlik",
+  title =        "Refining Algorithms with Knowledge-Based Neural
+                 Networks: Improving the Chou-Fasman Algorithm for
+                 Protein Folding",
+  number =       "Machine Learning Research Group Working Paper 91-2",
+  institution =  "Department of Computer Sciences, University of
+                 Wisconsin",
+  year =         "1991",
+  note =         "also in Computational Learning Theory and Natural
+                 Learning Systems, volume 1, S. Hanson, G. Drastal, and
+                 R. Rivest, (eds.), MIT Press",
+}
+
+@InProceedings{kbnn-noordewier.nips3,
+  author =       "Michiel O. Noordewier and Geoffrey G. Towell and Jude
+                 W. Shavlik",
+  editor =       NIPS3ed,
+  booktitle =    NIPS3,
+  title =        "Training Knowledge-Based Neural Networks to Recognize
+                 Genes in {DNA} Sequences",
+  publisher =    "Morgan Kaufmann",
+  address =      "Denver, CO",
+  pages =        "530--536",
+  year =         "1991",
+}
+
+@InProceedings{kbnn-opitz.ijcai93,
+  author =       "D. W. Opitz and J. W. Shavlik",
+  booktitle =    "Proceedings of the Thirteenth International Joint
+                 Conference on Artificial Intelligence",
+  title =        "Heuristically Expanding Knowledge-Based Neural
+                 Networks",
+  address =      "Chambery, France",
+  month =        sep,
+  year =         "1993",
+}
+
+@TechReport{kbnn-opitz.mlrgwp92,
+  author =       "D. W. Opitz and J. W. Shavlik",
+  title =        "Using Heuristic Search to Expand Knowledge-Based
+                 Neural Networks",
+  number =       "Machine Learning Research Group Working Paper 92-1",
+  institution =  "Department of Computer Sciences, University of
+                 Wisconsin",
+  year =         "1992",
+  note =         "(also in Computational Learning Theory and Natural
+                 Learning Systems, volume 3, T. Petsche, S. Judd, and S.
+                 Hanson, (eds.), MIT Press)",
+}
+
+@TechReport{kbnn-shavlik.tr92,
+  author =       "J. W. Shavlik",
+  title =        "A Framework for Combining Symbolic and Neural
+                 Learning",
+  number =       "UW TR 1123",
+  institution =  "Department of Computer Sciences, University of
+                 Wisconsin",
+  year =         "1992",
+  note =         "(a shorter version will appear in Machine Learning)",
+}
+
+@InProceedings{kbnn-towell.aaai90,
+  author =       "G. G. Towell and J. W. Shavlik and M. O. Noordewier",
+  booktitle =    "Proceedings of the Eighth National Conference on
+                 Artificial Intelligence",
+  title =        "Refinement of Approximate Domain Theories by
+                 Knowledge-Based Neural Networks",
+  address =      "Boston, MA",
+  pages =        "861--866",
+  year =         "1990",
+}
+
+@InProceedings{kbnn-towell.aaai92,
+  author =       "G. Towell and J. Shavlik",
+  booktitle =    "Proceedings of the Tenth National Conference on
+                 Artificial Intelligence",
+  title =        "Using Symbolic Learning to Improve Knowledge-Based
+                 Neural Networks",
+  address =      "San Jose, CA",
+  pages =        "177--182",
+  year =         "1992",
+}
+
+@Article{kbnn-towell.aij94,
+  author =       "Geoffrey G. Towell and Jude W. Shavlik",
+  title =        "Knowledge-Based Neural Networks",
+  journal =      "Artificial Intelligence",
+  year =         "1994",
+  note =         "undergoing 2nd review",
+}
+
+@InCollection{kbnn-towell.ml493,
+  author =       "Geoffrey G. Towell and Jude W. Shavlik",
+  editor =       "R. S. Michalski and G. Tecuci",
+  booktitle =    "Machine Learning: An Integrated Approach",
+  title =        "Refining Symbolic Knowledge Using Neural Networks",
+  volume =       "4",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo, CA",
+  year =         "1993",
+}
+
+@InProceedings{kbnn-towell.mlc91,
+  author =       "Geoffrey G. Towell and Mark W. Craven and Jude W.
+                 Shavlik",
+  booktitle =    "Proceedings of the Eighth International Machine
+                 Learning Workshop",
+  title =        "Constructive Induction in Knowledge-Based Neural
+                 Networks",
+  publisher =    "Morgan Kaufmann",
+  address =      "Evanston, IL",
+  pages =        "213--217",
+  year =         "1991",
+}
+
+@Article{kbnn-towell.mlj93,
+  author =       "Geoffrey G. Towell and Jude W. Shavlik",
+  title =        "The Extraction of Refined Rules from Knowledge-Based
+                 Neural Networks",
+  journal =      "Machine Learning",
+  volume =       "13",
+  number =       "1",
+  pages =        "71--101",
+  year =         "1993",
+}
+
+@InProceedings{kbnn-towell.nips4,
+  author =       "Geoffrey G. Towell and Jude W. Shavlik",
+  editor =       NIPS4ed,
+  booktitle =    NIPS4,
+  title =        "Interpretation of Artificial Neural Networks: Mapping
+                 knowledge-based Neural Networks into Rules",
+  publisher =    "Morgan Kaufmann",
+  address =      "Denver, CO",
+  year =         "1992",
+}
+
+@PhdThesis{kbnn-towell.thesis,
+  author =       "Geoffrey G. Towell",
+  title =        "Symbolic Knowledge and Neural Networks: Insertion,
+                 Refinement and Extraction",
+  school =       "University of Wisconsin -- Madison",
+  year =         "1991",
+  note =         "(Also appears as UW Technical Report 1072 [out of
+                 print].)",
+}
+
+@InProceedings{Kearns+Ron97,
+  author =       "Michael Kearns and Dana Ron",
+  booktitle =    "Tenth Annual Conference on Computational Learning
+                 Theory,",
+  title =        "Algorithmic Stability and Sanity-Check Bounds for
+                 Leave-One-Out Cross-Validation",
+  publisher =    "Morgan Kaufmann",
+  pages =        "152--162",
+  year =         "1997",
+}
+
+@InCollection{keeler-rumelhart-91,
+  author =       "J. Keeler and {W.-K.} {Rumelhart, D.and Leow}",
+  editor =       NIPS3ed,
+  booktitle =    NIPS3,
+  title =        "integrated segmentation and recognition of
+                 hand-printed numerals",
+  publisher =    "Morgan Kaufmann Publishers, San Mateo, CA",
+  pages =        "557--563",
+  year =         "1991",
+}
+
+@Article{Keerthi+Lin-2003,
+  author =       "S. Sathiya Keerthi and Chih-Jen Lin",
+  title =        "Asymptotic Behaviors of Support Vector Machines with
+                 {Gaussian} Kernel",
+  journal =      "Neural Computation",
+  volume =       "15",
+  number =       "7",
+  pages =        "1667--1689",
+  year =         "2003",
+}
+
+@InCollection{Kegl-2003,
+  author =       "Bal\'{a}zs K\'{e}gl",
+  editor =       NIPS15ed,
+  booktitle =    NIPS15,
+  title =        "Intrinsic Dimension Estimation Using Packing Numbers",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  pages =        "681--688",
+  year =         "2003",
+}
+
+@Article{Kegl-Krzyzak-2002,
+  author =       "B. Kegl and A. Krzyzak",
+  title =        "Piecewise linear skeletonization using principal
+                 curves",
+  journal =      "{IEEE} Transactions on Pattern Analysis and Machine
+                 Intelligence",
+  volume =       "24",
+  number =       "1",
+  pages =        "59--74",
+  year =         "2002",
+}
+
+@InProceedings{Kegl2003,
+  author =       "B. Kegl",
+  editor =       NIPS15ed,
+  booktitle =    NIPS15,
+  title =        "Intrinsic dimension estimation using packing numbers",
+  publisher =    "The {MIT} Press",
+  year =         "2003",
+}
+
+@InCollection{kegl2005,
+  author =       "Bal\'{a}zs K\'{e}gl and Ligen Wang",
+  editor =       NIPS17ed,
+  booktitle =    NIPS17,
+  title =        "Boosting on Manifolds: Adaptive Regularization of Base
+                 Classifiers",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  year =         "2005",
+}
+
+@TechReport{Kehagias89,
+  author =       "A. Kehagias",
+  title =        "Stochastic Recurrent Networks: Prediction and
+                 Classification of Time Series",
+  institution =  "Brown University. Division of Applied Mathematics",
+  address =      "Providence, RI 02912",
+  year =         "1991",
+}
+
+@InProceedings{KellerM2005,
+  author =       "M. Keller and S. Bengio",
+  booktitle =    "Proceedings of the 15th International Conference on
+                 Artificial Neural Networks: Biological Inspirations,
+                 ICANN, Lecture Notes in Computer Science",
+  title =        "A neural network for text representation",
+  volume =       "LNCS 3697",
+  pages =        "667--672",
+  year =         "2005",
+  teditor =      "Springer-Verlag",
+}
+
+@inproceedings{Keller2007,
+ author = {Katherine A. Heller and Zoubin Ghahramani}, 
+ booktitle =    aistats07,
+ year = 2007, 
+ title = {A Nonparametric Bayesian Approach to Modeling Overlapping Clusters},
+ publisher =    "Omnipress",
+ date =         "March 21-24, 2007",
+ address =      "San Juan, Porto Rico",
+ pages =        "187-194",
+}
+
+@inproceedings{Keller2008,
+ author = {Katherine A. Heller and Sinead Williamson and Zoubin Ghahramani}, 
+ year = 2008, 
+ title = {Statistical models for partial membership}, 
+ booktitle = ICML08,
+ editor =    ICML08ed,
+ publisher = ICML08publ,
+ location =  {Helsinki, Finland},
+ pages = "392--399",
+}
+
+@Book{Kelly1975,
+  author =       "Edward Kelly and Philip Stone",
+  title =        "Computer recognition of english word senses",
+  publisher =    "North-Holland Linguistics Series",
+  year =         "1975",
+}
+
+@InProceedings{Kemp+al-2004,
+  author =       "C. Kemp and T. L. Griffiths and S. Stromsten and J. B.
+                 Tenembaum",
+  editor =       NIPS16ed,
+  booktitle =    NIPS16,
+  title =        "Semi-supervised learning with trees",
+  publisher =    "{MIT} Press",
+  address =      "Cambridge, MA",
+  year =         "2004",
+}
+
+@inproceedings{Kerr2007,
+ author = {Wesley Kerr and Shane Hoversten and Daniel Hewlett and Paul R. Cohen and Yu-Han Chang},
+ title = {Learning in Wubble World},
+ booktitle = {Proceedings of the IEEE Int. Conference on Development and Learning},
+ year = 2007,
+}
+
+@Article{Kerszberg90,
+  author =       "M. Kerszberg and A. Zippelius",
+  title =        "Synchronization in Neural Assemblies",
+  journal =      pscrip,
+  volume =       "T33",
+  pages =        "54--64",
+  year =         "1990",
+}
+
+@InProceedings{Keysers2000,
+  author =       "D. Keysers and J. Dahmen and H. Ney",
+  booktitle =    "22nd Symposium of the German Association for Pattern
+                 Recognition",
+  title =        "A probabilistic view on tangent distance",
+  address =      "Kiel, Germany",
+  year =         "2000",
+}
+
+@Book{Khalil92,
+  author =       "Hassan K. Khalil",
+  title =        "Nonlinear Systems",
+  publisher =    "Macmillan Publishing Company",
+  address =      "New York",
+  year =         "1992",
+}
+
+@Book{Kiang65,
+  author =       "N. Y. S. Kiang and T. Watanabe and E. C. Thomas and L.
+                 F. Clark",
+  title =        "Discharge patterns of single fibers in the cat's
+                 auditory nerve fiber",
+  publisher =    "Cambdrige, MA: MIT Press",
+  year =         "1965",
+}
+
+@Article{Kiefer80,
+  author =       "N. M. Kiefer",
+  title =        "A note on switching regressions and logistic
+                 discrimination",
+  journal =      "Econometrica",
+  volume =       "48",
+  pages =        "1065--1069",
+  year =         "1980",
+}
+
+@Misc{Kilgarriff2000,
+  author =       "Adam Kilgarriff and Joseph Rosenzweig",
+  title =        "English {SENSEVAL}: Report and Results",
+  year =         "2000",
+  URL =          "citeseer.nj.nec.com/335615.html",
+  text =         "A. Kilgarriff and J. Rosenzweig. English SENSEVAL:
+                 Report and Results. In Proceedings of the 2nd
+                 International Conference on Language Resources and
+                 Evaluation, LREC, Athens, Greece.",
+}
+
+@InProceedings{Kilgarriff2002,
+  author =       "Adam Kilgarriff",
+  booktitle =    "Proceedings of Senseval-2",
+  title =        "English lexical sample task description",
+  organization = "ACL workshop",
+  year =         "2002",
+}
+
+@Article{Kim94,
+  author =       "C. J. Kim",
+  title =        "Dynamical linear models with Markov-switching",
+  journal =      "Journal of Econometrics",
+  volume =       "60",
+  pages =        "1--22",
+  year =         "1994",
+}
+
+@Article{Kimeldorf-Wahba-71,
+  author =       "G. Kimeldorf and G. Wahba",
+  title =        "Some results on {Tchebychean} spline functions",
+  journal =      "Journal of Mathematics Analysis and Applications",
+  volume =       "33",
+  pages =        "82--95",
+  year =         "1971",
+}
+
+@InCollection{Kinzel90,
+  author =       "W. Kinzel and M. Opper",
+  editor =       "E. Domany and J. L. van Hemmen and K. Schulten",
+  booktitle =    "Physics of Neural Networks",
+  title =        "Dynamics of Learning",
+  volume =       "1",
+  publisher =    "Springer-Verlag",
+  address =      "Berlin",
+  year =         "1990",
+}
+
+@inproceedings{Kira+Rendell-1992,
+    author    = {Kenji Kira and Larry A. Rendell},
+    title     = {The Feature Selection Problem: Traditional Methods and a New Algorithm},
+    booktitle = {Proceedings of the Tenth National Conference on Artificial Intelligence},
+    year      = {1992},
+    pages     = {129-134},
+    bibsource = {DBLP, http://dblp.uni-trier.de}
+}
+
+@inproceedings{Kira+Rendell-1992b,
+    address = {San Francisco, CA, USA},
+    author = {Kenji Kira and Larry A. Rendell},
+    booktitle = {Proceedings of the Ninth International Conference on Machine learning},
+    isbn = {15586247X},
+    pages = {249--256},
+    posted-at = {2007-02-07 04:40:40},
+    publisher = {Morgan Kaufmann},
+    title = {A practical approach to feature selection},
+    url = {http://portal.acm.org/citation.cfm?id=142034},
+    year = {1992}
+}
+
+@Book{Kirk70,
+    author =       "D. E. Kirk",
+    title =        "Optimal Control Theory: an Introduction",
+    publisher =    "Prentice Hall",
+    address =      "Englewood Cliffs NJ",
+    year =         "1970",
+}
+
+@Book{Kirk70a,
+  author =       "D. E. Kirk",
+  title =        "Optimal Control Theory: an Introduction",
+  publisher =    "Prentice Hall",
+  address =      "Englewood Cliffs NJ",
+  year =         "1970",
+}
+
+@Article{Kirkpatrick83,
+  author =       "S. Kirkpatrick and C. D. Gelatt Jr. and and M. P.
+                 Vecchi",
+  title =        "Optimization by Simulated Annealing",
+  journal =      science,
+  volume =       "220",
+  pages =        "671--680",
+  year =         "1983",
+}
+
+@Article{Kirkpatrick85,
+  author =       "S. Kirkpatrick and G. Toulouse",
+  title =        "Configuration Space Analysis of Travelling Salesman
+                 Problems",
+  journal =      jpp,
+  volume =       "46",
+  pages =        "1277--1292",
+  year =         "1985",
+}
+
+@Book{kitagawa+gersch96,
+  author =       "G. Kitagawa and W. Gersch",
+  title =        "Smoothness priors analysis of time series",
+  publisher =    "Eds. P. Bickel and P. Diggle and S. Fienberg and K.
+                 Krickeberg and I. Olkin and W. Wermuth and S. Zeger,
+                 Lecture Notes in Statistics, volume 116",
+  year =         "1996",
+}
+
+@Article{kitagawa87,
+  author =       "G. Kitagawa",
+  title =        "Non-{Gaussian} State-Space Modeling on Nonstationary
+                 Time Series",
+  journal =      "Journal of the American Statistical Association",
+  volume =       "82",
+  number =       "400",
+  pages =        "1032--1063",
+  year =         "1987",
+}
+
+@Article{kitagawa96,
+  author =       "G. Kitagawa",
+  title =        "{Monte} {Carlo} Filter and Smoother for Non-{Gaussian}
+                 Nonlinear State Space Models",
+  journal =      "Journal of Computational Graphics and Statistics",
+  volume =       "5",
+  number =       "1",
+  pages =        "1--25",
+  year =         "1996",
+}
+
+@Article{Kivinen02,
+  author =       "J. Kivinen and A. Smola and R. Williamson",
+  title =        "Online Learning with kernels",
+  year =         "2002",
+  URL =          "citeseer.csail.mit.edu/kivinen02online.html",
+  text =         "J. Kivinen, A. Smola, and R. C. Williamson, (2002)
+                 Online Learning with kernels. Advances in Neural
+                 Information Processing Systems 14, Cambridge, MA: MIT
+                 Press (pp. 785-793).",
+}
+
+@InProceedings{Klatt82,
+  author =       "D. Klatt",
+  booktitle =    icassp,
+  title =        "Prediction of perceived phonetic distance from
+                 critical-band spectra: a first step",
+  pages =        "1278--1281",
+  year =         "1982",
+}
+
+@inproceedings{Kleinberg-2003,
+    author = "J. Kleinberg",
+    title = "An impossibility theorem for clustering",
+    editor =       NIPS15ed,
+    booktitle =    NIPS15,
+    publisher =    "MIT Press",
+    address =      "Cambridge, MA",
+    year =         "2003",
+}
+
+@Article{Kleinfeld86,
+  author =       "D. Kleinfeld",
+  title =        "Sequential State Generation by Model Neural Networks",
+  journal =      PNAS,
+  volume =       "83",
+  pages =        "9469--9473",
+  year =         "1986",
+}
+
+@InCollection{Kleinfeld89,
+  author =       "D. Kleinfeld and H. Sompolinsky",
+  editor =       "C. Koch and I. Segev",
+  booktitle =    "Methods in Neuronal Modeling: From Synapses to
+                 Networks",
+  title =        "Associative Network Models for Central Pattern
+                 Generators",
+  publisher =    "MIT Press",
+  address =      "Cambridge",
+  pages =        "195--246",
+  year =         "1989",
+}
+
+@Book{Klopf82,
+  author =       "A. H. Klopf",
+  title =        "The Hedonistic Neuron: {A} Theory of Memory, Learning,
+                 and Intelligence",
+  publisher =    "Hemisphere",
+  address =      "Washington",
+  year =         "1982",
+}
+
+@InProceedings{Kneser95,
+  author =       "Reinhard Kneser and Hermann Ney",
+  booktitle =    icassp,
+  title =        "Improved Backing-Off for {M}-Gram Language Modeling",
+  pages =        "181--184",
+  year =         "1995",
+}
+
+@Article{Koch86,
+  author =       "C. Koch and J. Marroquin and A. Yuille",
+  title =        "Analog ``Neuronal'' Networks in Early Vision",
+  journal =      PNAS,
+  volume =       "83",
+  pages =        "4263--4267",
+  year =         "1986",
+}
+
+@InProceedings{Koch88,
+  author =       "C. Koch and J. Luo and C. Mead and J. Hutchinson",
+  editor =       nips87ed,
+  booktitle =    nips87,
+  title =        "Computing Motion Using Resistive Networks",
+  publisher =    "American Institute of Physics, New York",
+  address =      "Denver, CO",
+  pages =        "422--431",
+  year =         "1988",
+}
+
+@InProceedings{Kohavi95,
+  author =       "Ron Kohavi",
+  booktitle =    "Proceeding of the Fourteenth International Joint
+                 Conference on Artificial Intelligence",
+  title =        "A Study of Cross-Validation and Bootstrap for Accuracy
+                 Estimation and Model Selection",
+  publisher =    "Morgan Kaufmann",
+  pages =        "1137--1143",
+  year =         "1995",
+}
+
+@article{Kohavi+John-1997,
+        address = {Essex, UK},
+        author = {Kohavi, Ron   and John, George  H.},
+        doi = {10.1016/S0004-3702(97)00043-X},
+        issn = {0004-3702},
+        journal = {Artificial Intelligence},
+        number = {1-2},
+        pages = {273--324},
+        publisher = {Elsevier Science Publishers Ltd.},
+        title = {Wrappers for feature subset selection},
+        url = {http://portal.acm.org/citation.cfm?id=270627},
+        volume = {97},
+        year = {1997}
+}
+
+@Article{Kohonen-ieee90,
+  author =       "T. Kohonen",
+  title =        "The Self-Organizing Map",
+  journal =      ieeeproc,
+  volume =       "78",
+  number =       "9",
+  pages =        "1464--1480",
+  year =         "1990",
+  OPTnote =      "Special Issue on Neural Networks",
+}
+
+@Article{Kohonen74,
+  author =       "T. Kohonen",
+  title =        "An Adaptive Associative Memory Principle",
+  journal =      ieeetc,
+  volume =       "C-23",
+  pages =        "444--445",
+  year =         "1974",
+}
+
+@Article{Kohonen82,
+  author =       "T. Kohonen",
+  title =        "Self-Organized Formation of Topologically Correct
+                 Feature Maps",
+  journal =      biocyb,
+  volume =       "43",
+  year =         "1982",
+}
+
+@InProceedings{Kohonen84,
+  author =       "T. Kohonen and K. M{\"a}kisara and T. Saram{\"a}ki",
+  booktitle =    "Proceedings of the Seventh International Conference on
+                 Pattern Recognition",
+  title =        "Phonotopic Maps --- Insightful Representation of
+                 Phonological Features for Speech Recognition",
+  publisher =    "IEEE, New York",
+  address =      "Montreal 1984",
+  pages =        "182--185",
+  year =         "1984",
+}
+
+@TechReport{Kohonen86lvq,
+  author =       "Teuvo Kohonen",
+  title =        "Learning Vector Quantization for Pattern Recognition",
+  type =         "Report",
+  number =       "TKK-F-A601",
+  institution =  "Helsinki University of Technology",
+  address =      "Espoo, Finland",
+  year =         "1986",
+}
+
+@InProceedings{Kohonen88,
+  author =       "T. Kohonen and G. Barna and R. Chrisley",
+  booktitle =    icnn,
+  title =        "Statistical Pattern Recognition with Neural Networks:
+                 Benchmarking Studies",
+  volume =       "1",
+  publisher =    "IEEE, New York",
+  address =      "San Diego 1988",
+  pages =        "61--68",
+  year =         "1988",
+}
+
+@Book{Kohonen89,
+  author =       "T. Kohonen",
+  title =        "Self-Organization and Associative Memory",
+  publisher =    "Springer-Verlag",
+  address =      "Berlin",
+  edition =      "3",
+  year =         "1989",
+}
+
+@Book{Kohonen-2001,
+  author =       "T. Kohonen",
+  title =        "Self-Organizing Maps",
+  publisher =    "Springer",
+  edition =      "3",
+  year =         "2001",
+}
+
+@Article{Kolchinskii2000,
+  author =       "V. Koltchinskii and E. Giné",
+  title =        "Random matrix approximation of spectra of integral
+                 operators",
+  journal =      "Bernoulli",
+  volume =       "6",
+  number =       "1",
+  pages =        "113--167",
+  year =         "2000",
+}
+
+@TechReport{Kolen+Pollack90,
+  author =       "J. F. Kolen and J. B. Pollack",
+  key =          "kolen",
+  title =        "Back propagation is sensitive to initial conditions",
+  type =         "Technical Report",
+  number =       "TR 90-{JK}-{BPSIC}",
+  institution =  "The Ohio State University",
+  year =         "1990",
+}
+
+@InProceedings{Kolen-nips94,
+  author =       "John F. Kolen",
+  editor =       NIPS6ed,
+  booktitle =    NIPS6,
+  title =        "Fool's Gold: Extracting Finite State Machines From
+                 Recurrent Network Dynamics",
+  publisher =    "Morgan Kaufmann",
+  year =         "1994",
+}
+
+@Article{Kolmogorov33,
+  author =       "A. N. Kolmogorov",
+  title =        "Sulla determinazione empirica di una leggi di
+                 distribuzione",
+  journal =      "G. Inst. Ital. Attuari",
+  volume =       "4",
+  year =         "1933",
+  note =         "translated in English in {\em Breakthroughs in
+                 Statistics}, by Kotz and Johnson (editors),
+                 Springer-Verlag, 1992",
+}
+
+@Article{Kolmogorov57,
+  author =       "A. N. Kolmogorov",
+  title =        "On the representation of continuous functions of many
+                 variables by superposition of continuous functions of
+                 one variable and addition",
+  journal =      "Kokl. Akad. Nauk USSR",
+  volume =       "114",
+  publisher =    "[translated in: American Mathematical Society
+                 Translations 28 (1963) 55--59]",
+  pages =        "953--956",
+  year =         "1957",
+}
+
+@Article{Kolmogorov65,
+  author =       "A. N. Kolmogorov",
+  title =        "Three approaches to the quantitative definition of
+                 information",
+  journal =      "Problems of Information and Transmission",
+  volume =       "1",
+  number =       "1",
+  pages =        "1--7",
+  year =         "1965",
+}
+
+@InProceedings{Koltchinskii-1998,
+  author =       "V. Koltchinskii",
+  editor =       "Eberlein and Hahn and Talagrand",
+  booktitle =    "Progress in Probability",
+  title =        "Asymptotics of Spectral Projections of Some Random
+                 Matrices Approximating Integral Operators",
+  volume =       "43",
+  publisher =    "Birkhauser",
+  address =      "Basel",
+  pages =        "191--227",
+  year =         "1998",
+}
+
+@InProceedings{Kong95,
+  author =       "Eun Bae Kong and Thomas G. Dietterich",
+  booktitle =    "International Conference on Machine Learning",
+  title =        "Error-Correcting Output Coding Corrects Bias and
+                 Variance",
+  pages =        "313--321",
+  year =         "1995",
+}
+
+@InProceedings{Konig96,
+  author =       "Y. Konig and H. Bourlard and N. Morgan",
+  editor =       NIPS8ed,
+  booktitle =    NIPS8,
+  title =        "{REMAP}: Recursive Estimation and Maximization of {A}
+                 Posteriori Probabilities -- Application to
+                 transition-based connectionist speech recognition",
+  publisher =    "MIT Press, Cambridge, MA",
+  year =         "1996",
+}
+
+@inproceedings{Koray-08,
+ title = "Learning Invariant Features through Topographic Filter Maps",
+ author = "Kavukcuoglu, Koray and Ranzato, {Marc'Aurelio} and Fergus, Rob and {LeCun}, Yann",
+ booktitle = cvpr09,
+ publisher = "IEEE",
+ year = "2009"
+}
+
+@techreport {koray-psd-08,
+ original = "orig/koray-psd-08.pdf",
+ title = "Fast Inference in Sparse Coding Algorithms with Applications to Object Recognition",
+ author = "Kavukcuoglu, Koray and Ranzato, {Marc'Aurelio} and {LeCun}, Yann",
+ institution = "Computational and Biological Learning Lab, Courant Institute, NYU",
+ note = "Tech Report CBLL-TR-2008-12-01",
+ year = "2008"
+}
+
+@article{Kouh-Poggio-2008,
+  author = {Minjoon M. Kouh and Tomaso T. Poggio},
+  title = {A Canonical Neural Circuit for Cortical Nonlinear Operations},
+  journal = {Neural Computation},
+  volume = 20,
+  number={6},
+  pages = {1427--1451},
+  year = 2008,
+}
+
+@TechReport{Kouropteva+al-2002,
+    author =       {O. Kouropteva and O. Okun and A. Hadid and M. Soriano and S. Marcos and M. Pietik{\"a}inen},
+    title =        {Beyond locally linear embedding algorithm},
+    number =       {MVG-01-2002},
+    institution =  {Department of Electrical and Information Engineering, University of Oulu},
+    address =      {Oulu, Finland},
+    year =         2002,
+}
+
+@inproceedings{Kononenko-1994,
+    author = {Kononenko, Igor},
+    booktitle = ECML94,
+    pages = {171--182},
+    editor = {F. Bergadano and L. D. Raedt},
+    title = {Estimating Attributes: Analysis and Extensions of RELIEF},
+    url = {http://citeseer.ist.psu.edu/kononenko94estimating.html},
+    year = {1994}
+}
+
+@InProceedings{Kozma96,
+  author =       "R. Kozma and M. Kitamura and S. Sato",
+  booktitle =    nipc-hmit96,
+  title =        "Monitoring of {NPP} State using Structural Adaptation
+                 in a Neural Signal Processing System",
+  volume =       "1",
+  publisher =    ans,
+  pages =        "273--278",
+  year =         "1996",
+}
+
+@Article{Kramer1991,
+  author =       "Mark Kramer",
+  title =        "Nonlinear Principal Component Analysis Using
+                 Autoassociative Neural Network",
+  journal =      "AIChE Journal",
+  volume =       "34",
+  pages =        "233--243",
+  year =         "1991",
+}
+
+@InProceedings{Kramer89,
+  author =       "A. H. Kramer and A. Sangiovanni-Vincentelli",
+  editor =       NIPS1ed,
+  booktitle =    NIPS1,
+  title =        "Efficient Parallel Learning Algorithms for Neural
+                 Networks",
+  publisher =    "Morgan Kaufmann, San Mateo",
+  address =      "Denver, CO",
+  pages =        "40--48",
+  year =         "1989",
+}
+
+@Article{Krauth89,
+  author =       "W. Krauth and M. M\'ezard",
+  title =        "The Cavity Method and the Travelling-Salesman
+                 Problem",
+  journal =      eul,
+  volume =       "8",
+  pages =        "213--218",
+  year =         "1989",
+}
+
+@Book{Kreyszig90,
+  author =       "E. Kreyszig",
+  title =        "Introductory Functional Analysis with Applications",
+  publisher =    "John Wiley \& Sons, Inc.",
+  address =      "New York, NY",
+  year =         "1990",
+}
+
+@Book{Krishnaiah82,
+  editor =       "P. R. Krishnaiah and L. N. Kanal",
+  title =        "Classification, Pattern Recognition, and Reduction of
+                 Dimensionality",
+  volume =       "2",
+  publisher =    "North Holland",
+  address =      "Amsterdam",
+  year =         "1982",
+  series =       "Handbook of Statistics",
+}
+
+@techreport{KrizhevskyHinton2009,
+    author={Alex Krizhevsky and Geoffrey Hinton},
+    title = {Learning Multiple Layers of Features from Tiny Images},
+    year = 2009,
+    chapter=3,
+    institution={University of Toronto}
+}
+
+@InProceedings{Krogh-nips8,
+  author =       "A. Krogh and S. K. Riis",
+  editor =       NIPS8ed,
+  booktitle =    NIPS8,
+  title =        "Prediction of beta sheets in proteins",
+  publisher =    "MIT Press, Cambridge, MA",
+  pages =        "917--923",
+  year =         "1996",
+}
+
+@Article{Krogh88,
+  author =       "A. Krogh and J. A. Hertz",
+  title =        "Mean Field Analysis of Hierarchical Associative
+                 Networks with Magnetization",
+  journal =      jpa,
+  volume =       "21",
+  pages =        "2211--2224",
+  year =         "1988",
+}
+
+@InProceedings{Krogh90a,
+  author =       "A. Krogh and G. I. Thorbergsson and J. A. Hertz",
+  editor =       NIPS2ed,
+  booktitle =    NIPS2,
+  title =        "A Cost Function for Internal Representations",
+  publisher =    "Morgan Kaufmann, San Mateo",
+  address =      "Denver, CO",
+  pages =        "733--740",
+  year =         "1990",
+}
+
+@InProceedings{Krogh90b,
+  author =       "A. Krogh and J. A. Hertz",
+  editor =       "R. Eckmiller and G. Hartmann and G. Hauske",
+  booktitle =    "Parallel Processing in Neural Systems and Computers",
+  title =        "Hebbian Learning of Principal Components",
+  publisher =    "Elsevier, Amsterdam",
+  address =      "D{\"u}sseldorf 1990",
+  pages =        "183--186",
+  year =         "1990",
+}
+
+@Article{Krogh94,
+  author =       "A. Krogh and M. Brown and I. S. Mian and K. Sjölander
+                 and D. Haussler",
+  title =        "Hidden Markov models in computational biology:
+                 Applications to protein modeling",
+  journal =      "Journal Molecular Biology",
+  volume =       "235",
+  pages =        "1501--1531",
+  year =         "1994",
+}
+
+@InProceedings{Krogh95,
+  author =       "A. Krogh and J. Vedelsby",
+  editor =       NIPS7ed,
+  booktitle =    NIPS7,
+  title =        "Neural network ensembles, cross validation and active
+                 learning",
+  publisher =    "Cambridge MA: MIT Press",
+  pages =        "231--238",
+  year =         "1995",
+}
+
+@Book{Krolzig97,
+  author =       "H.-M. Krolzig",
+  title =        "Markov-Switching Vector Autoregressions",
+  publisher =    "Springer",
+  year =         "1997",
+}
+
+@article{Krueger+Dayan-2009,
+ author = {Kai A. Krueger and Peter Dayan},
+ title = {Flexible shaping: how learning in small steps helps},
+ journal = {Cognition},
+ volume = 110,
+ year = 2009,
+ pages = {380--394},
+}
+
+@Article{Ku92,
+  author =       "C. C. Ku and K. Y. Lee and R. M. Eawards",
+  title =        "Improved Nuclear Reactor Temperature Control Using
+                 Diagonal Recurrent Neural Networks",
+  journal =      "IEEE Transactions on Nuclear Science",
+  volume =       "39",
+  pages =        "2292--2308",
+  year =         "1992",
+}
+
+@InProceedings{Kubala94,
+  author =       "F. Kubala and A. Anastasakos and J. Makhoul and L.
+                 Nguyen and R. Schwartz and G. Zavaliagkos",
+  booktitle =    icassp,
+  title =        "Comparative experiments on large vocabulary speech
+                 recognition",
+  address =      "Adelaide, Australia",
+  pages =        "561--564",
+  year =         "1994",
+}
+
+@InProceedings{Kuhn+Herzberg90,
+  author =       "G. Kuhn and N. Herzberg",
+  booktitle =    "Proc. 24th Conference on Information Sciences and
+                 Systems",
+  title =        "Variations on training of recurrent networks",
+  organization = "Princeton University",
+  address =      "NJ",
+  year =         "1990",
+}
+
+@Unpublished{Kuhn87,
+  author =       "G. Kuhn",
+  title =        "A first look at phonetic discrimination using
+                 connectionist models with recurrent links",
+  year =         "1987",
+  note =         "CCRP -- IDA SCIMP working paper No.4/87, Institute for
+                 Defense Analysis, Princeton, NJ",
+}
+
+@Article{Kuhn-et-al-90,
+  author =       "G. Kuhn and R. L. Watrous and B. Ladendorf",
+  title =        "Connected recognition with a recurrent network",
+  journal =      spcomm,
+  volume =       "9",
+  pages =        "41--49",
+  year =         "1990",
+  OPTnote =      "",
+}
+
+@Book{Kullback59,
+  author =       "S. Kullback",
+  title =        "Information Theory and Statistics",
+  publisher =    "Wiley",
+  address =      "New York",
+  year =         "1959",
+}
+
+@Book{Kumar+al-1994,
+  author =       "V. Kumar and A. Grama and A. Gupta and G. Karypis",
+  title =        "Introduction to Parallel Computing: Design and
+                 Analysis of Algorithms",
+  publisher =    "Benjamin Cummings",
+  address =      "Redwood City, CA",
+  year =         "1994",
+}
+
+@Article{Kumar+al-1994b,
+  author =       "Vipin Kumar and Shashi Shekhar and Minesh B. Amin",
+  title =        "A Scalable Parallel Formulation of the Backpropagation
+                 Algorithm for Hypercubes and Related Architectures",
+  journal =      "IEEE Transactions on Parallel and Distributed
+                 Systems",
+  volume =       "5",
+  number =       "10",
+  pages =        "1073--1090",
+  year =         "1994",
+}
+
+@InProceedings{Kundu88,
+  author =       "A. Kundu and L. R. Bahl",
+  booktitle =    icassp,
+  title =        "Recognition of handwritten script: a hidden {Markov}
+                 model based approach",
+  address =      "New York, NY",
+  pages =        "928--931",
+  year =         "1988",
+}
+
+@Article{Kuperstein88,
+  author =       "M. Kuperstein",
+  title =        "Neural model of adaptive hand-eye coordination for
+                 single postures",
+  journal =      "Science",
+  volume =       "239",
+  pages =        "1308--1311",
+  year =         "1988",
+}
+
+@Article{Kurkova95,
+  author =       "V. Kurkov\'a",
+  title =        "Approximation of functions by perceptron networks with
+                 bounded number of hidden units",
+  journal =      "Neural Networks",
+  volume =       "8",
+  pages =        "745--750",
+  year =         "1995",
+}
+
+@Book{Kushner78,
+  author =       "H. J. Kushner and D. S. Clark",
+  title =        "Stochastic Approximation Methods for Constrained and
+                 Unconstrained Systems",
+  publisher =    "Springer-Verlag",
+  address =      "New York",
+  year =         "1978",
+}
+
+@InProceedings{Kwok-Tsang-2003,
+  author =       "J. T. Kwok and I. W. Tsang",
+  booktitle =    ICML03,
+  editor =       ICML03ed,
+  publisher =    ICML03publ,
+  title =        "Learning with idealized kernels",
+  pages =        "400--407",
+  year =         "2003",
+}
+
+@InProceedings{Laaksonen97,
+  author =       "Jorma Laaksonen",
+  booktitle =    "Proceedngs of the International Conference on
+                 Artificial Neural Networks ICANN'97",
+  title =        "Local Subspace Classifier",
+  pages =        "637--642",
+  year =         "1997",
+  URL =          "http://www.cis.hut.fi/jorma/papers/abstracts.html#icann97",
+}
+
+@InProceedings{Lafferty-icml2001,
+  author =       "John Lafferty and Andrew McCallum and Fernando C. N. Pereira",
+  booktitle =    ICML01,
+  editor =       ICML01ed,
+  publisher =    ICML01publ,
+  title =        "Conditional Random Fields: Probabilistic Models for
+                 Segmenting and Labeling Sequence Data",
+  year =         "2001",
+}
+
+@article{Lai+Fyfe-2000,
+    author = {P. L. Lai and C. Fyfe},
+    title = {Kernel and Nonlinear Canonical Correlation Analysis},
+    journal = {International Journal of Neural Systems},
+    year = {2000},
+    pages = {365--377},
+    volume = 10,
+    number = 5,
+}
+
+@InProceedings{Laj92,
+  author =       "E. Laj and A. Paoloni",
+  editor =       "M. Gori",
+  booktitle =    "Proc. of the Second Workshop on Neural Networks for
+                 Speech Processing",
+  title =        "{AIDA}: The Italian Corpora",
+  publisher =    "LINT",
+  address =      "Firenze (Italy)",
+  pages =        "179--183",
+  year =         "1992",
+}
+
+@InProceedings{Lanckriet-2002,
+  author =       "G. Lanckriet and N. Cristianini and P. Bartlett and L.
+                 {El Gahoui} and M. Jordan",
+  booktitle =    ICML02,
+  editor =       ICML02ed,
+  publisher =    ICML02publ,
+  title =        "Learning the kernel matrix with semi-definite
+                 programming",
+  pages =        "323--330",
+  year =         "2002",
+}
+
+@Article{Lanckriet2004,
+  author =       "Gert R. G. Lanckriet and Nello Cristianini and Peter
+                 Bartlett and Laurent El Ghaoui and Michael I. Jordan",
+  title =        "Learning the Kernel Matrix with Semidefinite
+                 Programming",
+  journal =      jmlr,
+  volume =       "5",
+  pages =        "27--72",
+  year =         "2004",
+}
+
+@TechReport{Lang+Hinton88,
+  author =       "K. J. Lang and G. E. Hinton",
+  title =        "The development of the Time-Delay Neural Network
+                 architecture for speech recognition",
+  number =       "CMU-CS-88-152",
+  institution =  "Carnegie-Mellon University",
+  year =         "1988",
+}
+
+@Article{Langdell-00-nips,
+  author =       "S. Langdell and Y. Bengio",
+  title =        "Approximate {SVM} Solutions: a Datamining Tool",
+  journal =      "submitted to NIPS'2000",
+  year =         "2000",
+}
+
+@InProceedings{Langford+Zadrozny-2005,
+  author =       "John Langford and Bianca Zadrozny",
+  editor =       aistats05ed,
+  booktitle =    aistats05,
+  title =        "Estimating Class Membership Probabilities using
+                 Classifier Learners",
+  publisher =    "Society for Artificial Intelligence and Statistics",
+  pages =        "198--205",
+  year =         "2005",
+}
+
+@Article{Lapedes86a,
+  author =       "A. Lapedes and R. Farber",
+  title =        "A Self-Optimizing, Nonsymmetrical Neural Net for
+                 Content Addressable Memory and Pattern Recognition",
+  journal =      physicaD,
+  volume =       "22",
+  pages =        "247--259",
+  year =         "1986",
+}
+
+@InProceedings{Lapedes86b,
+  author =       "A. Lapedes and R. Farber",
+  editor =       "J. S. Denker",
+  booktitle =    snowbird,
+  title =        "Programming a Massively Parallel, Computation
+                 Universal System: Static Behavior",
+  publisher =    "American Institute of Physics, New York",
+  address =      "Snowbird 1986",
+  pages =        "283--298",
+  year =         "1986",
+}
+
+@TechReport{Lapedes87,
+  author =       "A. Lapedes and R. Farber",
+  title =        "Nonlinear Signal Processing Using Neural Networks:
+                 Prediction and System Modelling",
+  number =       "LA--UR--87--2662",
+  institution =  "Los Alamos National Laboratory",
+  address =      "Los Alamos, NM",
+  year =         "1987",
+}
+
+@InProceedings{Lapedes88,
+  author =       "A. Lapedes and R. Farber",
+  editor =       nips87ed,
+  booktitle =    nips87,
+  title =        "How Neural Nets Work",
+  publisher =    "American Institute of Physics, New York",
+  address =      "Denver, CO",
+  pages =        "442--456",
+  year =         "1988",
+}
+
+@Article{Lari90,
+  author =       "K. Lari and S. J. Young",
+  title =        "The estimation of stochastic context-free grammars
+                 using the Inside-Outside algorithm",
+  journal =      cspla,
+  volume =       "4",
+  pages =        "35--56",
+  year =         "1990",
+}
+
+@inproceedings{Tieleman08,
+    author = {Tijmen Tieleman},
+    title = {Training restricted Boltzmann machines using approximations to the likelihood gradient},
+    booktitle = ICML08,
+    editor =    ICML08ed,
+    publisher = ICML08publ,
+    location = {Helsinki, Finland},
+    year = {2008},
+    pages = {1064--1071}
+}
+
+@InProceedings{TielemanT2009,
+ author =    {Tijmen Tieleman and Geoffrey Hinton},
+ title =     {Using Fast Weights to Improve Persistent Contrastive Divergence},
+ booktitle = ICML09,
+ editor =    ICML09ed,
+ publisher = ICML09publ,
+ year =      "2009",
+ isbn =      {978-1-60558-516-1},
+ pages =     {1033--1040},
+ location =  icml09loc,
+ doi =       {http://doi.acm.org/10.1145/1553374.1553506},
+}
+
+@article{Larochelle-jmlr-toappear-2008,
+ author = {Hugo Larochelle and Yoshua Bengio and Jerome Louradour and Pascal Lamblin},
+ title = {Exploring Strategies for Training Deep Neural Networks},
+ journal = jmlr,
+ year = 2009,
+ volume = 10,
+ pages = {1--40},
+}
+
+@InProceedings{LarochelleH2007-small,
+  author =       "H. Larochelle and D. Erhan and A. Courville and
+                 J. Bergstra and Y. Bengio",
+  booktitle =    "ICML 2007",
+  title =        "An Empirical Evaluation of Deep Architectures on
+                 Problems with Many Factors of Variation",
+  year =         "2007",
+}
+
+@InProceedings{LarochelleH2007-short,
+  author =       "H. Larochelle and D. Erhan and A. Courville and
+                 J. Bergstra and Y. Bengio",
+  booktitle =    "Int. Conf. Mach. Learn.",
+  title =        "An Empirical Evaluation of Deep Architectures on
+                 Problems with Many Factors of Variation",
+  year =         "2007",
+  pages =        "473--480",
+}
+
+%I deprecate the following one as this is a duplicate of LarochelleH2007
+@InProceedings{larochelle-icml-2007,
+  author =       "Hugo Larochelle and Dumitru Erhan and Aaron Courville
+                 and James Bergstra and Yoshua Bengio",
+  booktitle =    ICML07,
+  editor =       ICML07ed,
+  publisher =    ICML07publ,
+  title =        "An Empirical Evaluation of Deep Architectures on
+                 Problems with Many Factors of Variation",
+  pages =        "473--480",
+  location =     "Corvallis, OR",
+  year =         "2007",
+}
+  %url =          "http://www.machinelearning.org/proceedings/icml2007/papers/331.pdf",
+
+%I deprecate the following one as this is a duplicate of LarochelleH2007
+@Article{larochelle:icml07,
+  author =       "Hugo Larochelle and Dumitru Erhan and Aaron Courville and
+                 James Bergstra and Yoshua Bengio",
+  booktitle =    ICML07,
+  editor =       ICML07ed,
+  publisher =    ICML07publ,
+  title =        "An empirical evaluation of deep architectures on
+                 problems with many factors of variation",
+  pages =        "473--480",
+  year =         "2007",
+  location =     "Corvallis, OR",
+  url =          "http://www.machinelearning.org/proceedings/icml2007/papers/331.pdf",
+}
+
+@inproceedings{Larochelle+Bengio-2008-small,
+    author = "Hugo Larochelle and Yoshua Bengio",
+    title = {Classification using Discriminative Restricted {Boltzmann} Machines},
+    booktitle = {Proceedings of ICML 2008},
+    year = {2008},
+    pages = {536--543}
+}
+
+@InCollection{Larsen98,
+  author =       "Jan Larsen and Claus Svarer and Lars Nonboe Andersen
+                 and Lars Kai Hansen",
+  editor =       "G. B. Orr and K-R. Muller",
+  booktitle =    "Neural Networks: Tricks of he Trade",
+  title =        "Adaptive Regularization in Neural Networks Modeling",
+  publisher =    "Springer",
+  pages =        "113--132",
+  year =         "1998",
+}
+
+ 
+@InProceedings{LasserreJ2006,
+  author =       "Julia A. Lasserre and Christopher M. Bishop and
+                 Thomas P. Minka",
+  booktitle =    cvpr06,
+  title =        "Principled Hybrids of Generative and Discriminative
+                 Models",
+  publisher =    "IEEE Computer Society",
+  address =      "Washington, DC, USA",
+  pages =        "87--94",
+  year =         "2006",
+  ISBN =         "0-7695-2597-0",
+  doi =          "http://dx.doi.org/10.1109/CVPR.2006.227",
+}
+
+
+@TechReport{Laub2003,
+  author =       "J. Laub and K.-R. M{\"u}ller",
+  title =        "Feature discovery: unraveling hidden structure in
+                 non-metric pairwise data",
+  institution =  "Fraunhofer FIRST.IDA",
+  address =      "Germany",
+  year =         "2003",
+}
+
+@Article{Lauritzen95,
+  author =       "Steffen L. Lauritzen",
+  title =        "The {EM} algorithm for graphical association models
+                 with missing data",
+  journal =      "Computational Statistics and Data Analysis",
+  volume =       "19",
+  pages =        "191--201",
+  year =         "1995",
+}
+
+@Book{Lauritzen96,
+  author =       "Steffen L. Lauritzen",
+  title =        "Graphical Models",
+  publisher =    "Clarendon Press",
+  address =      "Oxford",
+  year =         "1996",
+  ISBN =         "0-19-852219-3",
+}
+
+@Book{Lawler76,
+  author =       "E. L. Lawler",
+  title =        "Combinatorial Optimization: Networks and Matroids",
+  publisher =    "Holt-Rinehart-Winston",
+  address =      "New York",
+  year =         "1976",
+}
+
+@Book{Lawler85,
+  editor =       "E. L. Lawler and J. K. Lenstra and A. H. G. Rinnooy
+                 Kan and D. B. Shmoys",
+  title =        "The Travelling Salesman Problem",
+  publisher =    "Wiley",
+  address =      "Chichester",
+  year =         "1985",
+}
+
+@InProceedings{Lawrence-Seeger-Herbrich-2003,
+  author =       "Neil Lawrence and Matthias Seeger and Ralf Herbrich",
+  editor =       NIPS15ed,
+  booktitle =    NIPS15,
+  title =        "Fast Sparse {G}aussian Process Methods: The Informative
+                 Vector Machine",
+  publisher =    "{MIT} Press",
+  pages =        "609--616",
+  year =         "2003",
+}
+
+@InCollection{Lawrence00,
+  author =       "S. Lawrence and S. Fong and C. L. Giles",
+  title =        "Natural Language Grammatical Inference with Recurrent
+                 Neural Networks",
+  journal =      "IEEE Trans. on Knowledge and Data Engineering",
+  pages =        "",
+  year =         "2000",
+}
+
+@InCollection{Lawrence96,
+  author =       "S. Lawrence and S. Fong and C. L. Giles",
+  editor =       "S. Wermter and E. Riloff and G. Scheler",
+  booktitle =    "Lecture Notes on Artificial Intelligence,
+                 Connectionist, Statistical and Symbolic Approaches to
+                 Learning for Natural Language Processing",
+  title =        "Natural Language Grammatical Inference: {A} Comparison
+                 of Recurrent Neural Networks and Machine Learning
+                 Methods",
+  publisher =    "Springer-Verlag, NY",
+  year =         "1996",
+}
+
+@InCollection{LawrenceN2005,
+  author =       "Neil D. {Lawrence} and Michael I. {Jordan}",
+  editor =       NIPS17ed,
+  booktitle =    NIPS17,
+  title =        "Semi-supervised Learning via {G}aussian Processes",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  pages =        "753--760",
+  year =         "2005",
+  original =     "0753-257.PDF",
+}
+
+@TechReport{LeBaron95,
+  author =       "B. LeBaron and A. S. Weigend",
+  title =        "Evaluating Neural Network Predictors by
+                 Bootstrapping",
+  number =       "CU-CS-725-94",
+  institution =  "University of Colorado, Boulder",
+  year =         "1995",
+}
+
+@Article{LeCun+98,
+  author =       "Yann {LeCun} and Leon Bottou and Yoshua Bengio and
+                 Patrick Haffner",
+  title =        "Gradient-Based Learning Applied to Document
+                 Recognition",
+  journal =      "Proceedings of the {IEEE}",
+  volume =       "86",
+  number =       "11",
+  pages =        "2278--2324",
+  month =        nov,
+  year =         "1998",
+}
+
+@InCollection{LeCun+98backprop,
+  author =       "Yann {LeCun} and L\'{e}on Bottou and Genevieve B. Orr
+                 and Klaus-Robert M{\"{u}}ller",
+  title =        "Efficient Backprop",
+  booktitle =    "Neural Networks, Tricks of the Trade",
+  series =       "Lecture Notes in Computer Science LNCS~1524",
+  publisher =    "Springer Verlag",
+  year =         "1998",
+}
+  %URL =          "http://leon.bottou.org/papers/lecun-98x",
+
+
+@InCollection{LeCun+98backprop-small,
+  author =       "Y. {LeCun} and L. Bottou and G. B. Orr
+                 and K. M{\"{u}}ller",
+  title =        "Efficient Backprop",
+  booktitle =    "Neural Networks, Tricks of the Trade",
+  year =         "1998",
+}
+
+
+@InProceedings{lecun-04,
+  author =       "Yann {LeCun} and Fu-Jie Huang and L{\'e}on Bottou",
+  booktitle =    cvpr04,
+  title =        "Learning Methods for Generic Object Recognition with
+                 Invariance to Pose and Lighting",
+  volume = {2},
+  year =         "2004",
+  issn = {1063-6919},
+  pages = {97-104},
+  doi = {http://doi.ieeecomputersociety.org/10.1109/CVPR.2004.144},
+  publisher = {IEEE Computer Society},
+  address = {Los Alamitos, CA, USA},
+}
+
+@InProceedings{LeCun-cp89,
+  author =       "Yann {LeCun}",
+  booktitle =    "Connectionism in Perspective",
+  title =        "Generalization and Network Design Strategies",
+  publisher =    "Elsevier Publishers",
+  year =         "1989",
+}
+
+@InCollection{LeCun-dsbo86,
+  author =       "Yann {LeCun}",
+  editor =       "F. Fogelman-Souli\'e and E. Bienenstock and G.
+                 Weisbuch",
+  booktitle =    "Disordered Systems and Biological Organization",
+  title =        "Learning Processes in an Asymmetric Threshold
+                 Network",
+  publisher =    "Springer-Verlag",
+  address =      "Les Houches, France",
+  pages =        "233--240",
+  year =         "1986",
+}
+
+@InProceedings{lecun-huang-05,
+  author =       "Yann {LeCun} and {Fu Jie} Huang",
+  editor =       aistats05ed,
+  booktitle =    aistats05,
+  title =        "Loss Functions for Discriminative Training of
+                 Energy-Based Models",
+  date =         "Jan 6-8, 2005",
+  location =     "Savannah Hotel, Barbados",
+  year =         "2005",
+}
+
+@Misc{LeCun-nips93-tutorial,
+  author =       "Yann {LeCun}",
+  title =        "Efficient learning and second-order methods",
+  year =         "1993",
+  note =         "Tutorial presented at NIPS'93, Denver, CO",
+}
+
+@PhdThesis{Lecun-these87,
+  author =       "Yann {LeCun}",
+  title =        "Mod\`eles connexionistes de l'apprentissage",
+  school =       "Universit\'e de Paris VI",
+  year =         "1987",
+}
+
+@InCollection{lecun2006,
+  author =       "Yann {LeCun} and Sumit Chopra and Raia Hadsell and
+                 Marc-Aurelio Ranzato and Fu-Jie Huang",
+  editor =       "G. Bakir and T. Hofman and B. Scholkopf and A. Smola
+                 and B. Taskar",
+  booktitle =    "Predicting Structured Data",
+  title =        "A Tutorial on Energy-Based Learning",
+  publisher =    "MIT Press",
+  pages =        "191--246",
+  year =         "2006",
+}
+
+@InProceedings{LeCun85,
+  author =       "Yann {LeCun}",
+  booktitle =    "Cognitiva 85: A la Fronti\`ere de l'Intelligence
+                 Artificielle, des Sciences de la Connaissance et des
+                 Neurosciences",
+  title =        "Une Proc\'edure d'Apprentissage pour {R}\'eseau \`a
+                 Seuil Assym\'etrique",
+  publisher =    "CESTA, Paris",
+  address =      "Paris 1985",
+  pages =        "599--604",
+  year =         "1985",
+}
+
+@InCollection{LeCun86,
+  author =       "Yann {LeCun}",
+  editor =       "E. Bienenstock and F. Fogelman-Souli\'e and G.
+                 Weisbuch",
+  booktitle =    "Disordered Systems and Biological Organization",
+  title =        "Learning Processes in an Asymmetric Threshold
+                 Network",
+  publisher =    "Springer-Verlag, Berlin",
+  address =      "Les Houches 1985",
+  pages =        "233--240",
+  year =         "1986",
+}
+
+@Article{LeCun89,
+  author =       "Yann {LeCun} and Bernhard Boser and John S. Denker and Donnie
+                 Henderson and Richard E. Howard and Wayne Hubbard and Lawrence D.
+                 Jackel",
+  title =        "Backpropagation Applied to Handwritten Zip Code
+                 Recognition",
+  journal =      nc,
+  volume =       "1",
+  number =       "4",
+  pages =        "541--551",
+  year =         "1989",
+}
+
+@TechReport{LeCun89a,
+  author =       "Yann {LeCun}",
+  key =          "LeCun",
+  title =        "Generalization and Network Design Strategies",
+  type =         "Technical Report",
+  number =       "CRG-TR-89-4",
+  institution =  "University of Toronto",
+  year =         "1989",
+}
+
+@Article{LeCun89d,
+  author =       "Yann {LeCun} and Lawrence D. Jackel and B. Boser and J.
+                 S. Denker and Hans P. Graf and I. Guyon and D.
+                 Henderson and R. E. Howard and W. Hubbard",
+  title =        "Handwritten Digit recognition: Applications of Neural
+                 Network Chips and Automatic Learning",
+  journal =      "IEEE Communications Magazine",
+  volume =       "27",
+  number =       "11",
+  pages =        "41--46",
+  month =        nov,
+  year =         "1989",
+}
+
+@InProceedings{LeCun90a,
+  author =       "Y. {LeCun} and B. Boser and J. S. Denker and D.
+                 Henderson and R. E. Howard and W. Hubbard and L. D.
+                 Jackel",
+  editor =       NIPS2ed,
+  booktitle =    NIPS2,
+  title =        "Handwritten Digit Recognition with a Back-Propagation
+                 Network",
+  publisher =    "Morgan Kaufmann, San Mateo",
+  address =      "Denver, CO",
+  pages =        "396--404",
+  year =         "1990",
+}
+
+@InProceedings{LeCun90b,
+  author =       "Y. {LeCun} and J. S. Denker and S. A. Solla",
+  editor =       NIPS2ed,
+  booktitle =    NIPS2,
+  title =        "Optimal Brain Damage",
+  publisher =    "Morgan Kaufmann, San Mateo",
+  address =      "Denver, CO",
+  pages =        "598--605",
+  year =         "1990",
+}
+
+@InProceedings{LeCun90c,
+  author =       "Y. LeCun and Y. Matan and B. Boser and J. S. Denker
+                 and D. Henderson and R. E. Howard and W. Hubbard and L.
+                 D. Jackel and H. S. Baird",
+  editor =       "IAPR",
+  booktitle =    "International Conference on Pattern Recognition",
+  title =        "Handwritten Zip Code Recognition with Multilayer
+                 Networks",
+  publisher =    "IEEE",
+  address =      "Atlantic City",
+  year =         "1990",
+}
+
+@InProceedings{LeCun91,
+  author =       "Y. {LeCun} and I. Kanter and S. Solla",
+  editor =       NIPS3ed,
+  booktitle =    NIPS3,
+  title =        "Second order properties of error surfaces: learning
+                 time, generalization",
+  publisher =    "Morgan Kaufmann",
+  address =      "Denver, CO",
+  pages =        "918--924",
+  year =         "1991",
+}
+
+@InCollection{LeCun93,
+  author =       "Y. {LeCun} and P. Simard and B. Pearlmutter",
+  editor =       NIPS5ed,
+  booktitle =    NIPS5,
+  title =        "Automatic learning rate maximization by on-line
+                 estimation of the {Hessian}'s eigenvectors",
+  publisher =    "Morgan Kaufmann Publishers, San Mateo, CA",
+  pages =        "156--163",
+  year =         "1993",
+}
+
+@InProceedings{LeCun94b,
+  author =       "Yann LeCun and Yoshua Bengio",
+  editor =       "IEEE",
+  booktitle =    ICPR94,
+  title =        "Word-Level Training of a Handritten Word Recognizer
+                 based on Convolutional Neural Networks",
+  address =      "Jerusalem 1994",
+  year =         "1994",
+}
+
+@Article{LeCun98-small,
+  author =       "Y. {LeCun} and L. Bottou and Y. Bengio and
+                 P. Haffner",
+  title =        "Gradient Based Learning Applied to Document
+                 Recognition",
+  journal =      "IEEE",
+  volume =       "86",
+  number =       "11",
+  pages =        "2278--2324",
+  month =        nov,
+  year =         "1998",
+}
+
+@InCollection{LeCun98-tricks,
+  author =       "Y. {LeCun} and L. Bottou and G. B. Orr and K.-R.
+                 M{\"u}ller",
+  editor =       "G. B. Orr and K.-R. M{\"u}ller",
+  booktitle =    "Neural Networks: Tricks of the Trade",
+  title =        "Efficient {BackProp}",
+  publisher =    "Springer",
+  pages =        "9--50",
+  year =         "1998",
+}
+
+@TechReport{LeCun-TR,
+  author =       "Yann {LeCun}",
+  key =          "Lecun",
+  title =        "Generalization and Network Design Strategies",
+  number =       "CRG-TR-89-4",
+  institution =  "Department of Computer Science, University of
+                 Toronto",
+  year =         "1989",
+}
+
+@Article{Lee+Hon89,
+  author =       "Kai-Fu Lee and Hsiao-Wuen Hon",
+  title =        "Speaker-independent phone recognition using hidden
+                 {Markov} models",
+  journal =      "IEEE Trans. on Acoustics, Speech and Signal
+                 Processing",
+  volume =       "37",
+  number =       "11",
+  pages =        "1641--1648",
+  month =        nov,
+  year =         "1989",
+}
+
+@Article{Lee+Lewicki-2002,
+  author =       "T-W. Lee and M. S. Lewicki",
+  title =        "Unsupervised classification segmentation and
+                 enhancement of images using {ICA} mixture models",
+  journal =      "IEEE Trans. Image Proc.",
+  volume =       "11",
+  number =       "3",
+  pages =        "270--279",
+  year =         "2002",
+}
+
+@InCollection{Lee-2008,
+  author =       "Honglak Lee and Chaitanya Ekanadham and Andrew Ng",
+  editor =       NIPS20ed,
+  booktitle =    NIPS20,
+  title =        "Sparse deep belief net model for visual area {V}2",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  pages = {873--880},
+  year =         "2008",
+}
+
+@Book{Lee91,
+  author =       "Kai-Fu Lee",
+  title =        "Automatic Speech Recognition: the development of the
+                 {SPHINX} system",
+  publisher =    "Kluwer Academic Publ.",
+  year =         "1989",
+}
+
+@article{Lee-1996,
+    author = "Tai Sing Lee",
+    title = "Image Representation Using {2D} {Gabor} Wavelets",
+    journal = "IEEE Transactions on Pattern Analysis and Machine Intelligence",
+    volume = "18",
+    number = "10",
+    pages = "959-971",
+    year = "1996",
+}
+
+@InProceedings{Lee99a,
+  author =       "Lillian Lee",
+  booktitle =    "ACL99",
+  title =        "Measures of Distributional Similarity",
+  pages =        "25--32",
+}
+
+@InProceedings{Lee99b,
+  author =       "Lillian Lee and Fernando Pereira",
+  title =        "Distributional Similarity Models: Clustering vs.
+                 Nearest Neighbours",
+  booktitle =    "ACL99",
+  pages =        "33--40",
+}
+
+@article{Lee+Mumford-2003,
+ author = {Tai-Sing Lee and David Mumford},
+ title = {Hierarchical Bayesian inference in the visual cortex},
+ year = 2003,  
+ journal = {Journal of Optical Society of America, A},
+ volume = 20,
+ number = 7,
+ pages = {1434--1448},
+}
+
+
+@Article{Leitch91,
+  author =       "G. Leitch and J. E. Tanner",
+  title =        "Economic Forecast Evaluation: Profits Versus The
+                 Conventional Error Measures",
+  journal =      "The American Economic Review",
+  pages =        "580--590",
+  year =         "1991",
+}
+
+@Article{Lengelle+Denoeux96,
+  author =       "R{\'e}gis Lengell{\'e} and Thierry Denoeux",
+  title =        "Training {MLP}s layer by layer using an objective
+                 function for internal representations",
+  journal =      "Neural Networks",
+  volume =       "9",
+  pages =        "83--97",
+  year =         "1996",
+}
+
+@InProceedings{Leprieur95,
+  author =       "H. Leprieur and P. Haffner",
+  booktitle =    "EUROSPEECH'95",
+  title =        "Discriminant learning with minimum memory loss for
+                 improved non-vocabulary rejection",
+  address =      "Madrid, Spain",
+  year =         "1995",
+}
+
+@Book{lerdahl+jackendoff-1983,
+  author =       "F. Lerdahl and R. Jackendoff",
+  title =        "A {Generative} {Theory} of {Tonal} {Music}",
+  publisher =    "MIT Press",
+  address =      "Cambridge, Mass.",
+  year =         "1983",
+}
+
+@InCollection{LeRoux+al-tonga-2008,
+  author =       "Nicolas {Le Roux} and Pierre-Antoine Manzagol and
+                 Yoshua Bengio",
+  editor =       NIPS20ed,
+  booktitle =    NIPS20,
+  title =        "Topmoumoute online natural gradient algorithm",
+  publisher =    "{MIT} Press",
+  address =      "Cambridge, MA",
+  pages =        "849--856",
+  year =         "2008",
+}
+
+@InCollection{LeRoux+al-tonga-2008-small,
+  author =       "Nicolas {Le Roux} and Pierre-Antoine Manzagol and
+                 Yoshua Bengio",
+  booktitle =    "NIPS 20",
+  title =        "Topmoumoute online natural gradient algorithm",
+  pages =        "849--856",
+  year =         "2008",
+}
+
+@TechReport{LeRoux-comb-dens-2005,
+  author =       "Nicolas {Le Roux} and Yoshua Bengio and R\'ejean
+                 Ducharme",
+  title =        "Combining density estimators to improve classification
+                 accuracy",
+  number =       "1261",
+  institution =  "D\'epartement d'informatique et recherche
+                 op\'erationnelle, Universit\'e de Montr\'eal",
+  year =         "2005",
+}
+
+@InProceedings{LeRoux-continuous-short,
+  author =       "Nicolas Le Roux and Yoshua Bengio",
+  booktitle =    aistats07,
+  title =        "Continuous Neural Networks",
+  year =         "2007",
+  date =         "March 21-24, 2007",
+}
+
+@InProceedings{Lesk1986,
+  author =       "Michael E. Lesk",
+  booktitle =    "SIGDOC Conference",
+  title =        "Automatic sense disambiguation using machine readable
+                 dictionaries: How to tell a pine cone from an ice cream
+                 cone.",
+  address =      "Toronto, Canada",
+  year =         "1980",
+}
+
+@InProceedings{Leung92,
+  author =       "H. C. Leung and I. L. Hetherington and V. W. Zue",
+  booktitle =    icassp,
+  title =        "Speech recognition using stochastic segment neural
+                 networks",
+  volume =       "1",
+  institution =  "Lab. for Comput. Sci., MIT, Cambridge, MA, USA",
+  publisher =    "IEEE",
+  address =      "New York, NY, USA",
+  pages =        "613--16",
+  year =         "1992",
+}
+
+@Article{Levenberg44,
+  author =       "K. Levenberg",
+  title =        "A method for the solution of certain non-linear
+                 problems in least squares",
+  journal =      "Quarterly Journal of Applied Mathematics",
+  volume =       "II",
+  number =       "2",
+  pages =        "164--168",
+  year =         "1944",
+}
+
+@InProceedings{Levin90,
+  author =       "E. Levin",
+  booktitle =    icassp,
+  title =        "Word Recognition using Hidden Control Neural
+                 Architecture",
+  address =      "Albuquerque, NM",
+  pages =        "433--436",
+  year =         "1990",
+}
+
+@InProceedings{Levin92,
+  author =       "E. Levin and R. Pieraccini and E. Bocchieri",
+  editor =       NIPS4ed,
+  booktitle =    NIPS4,
+  title =        "Time-Warping Network: a Hybrid Framework for Speech
+                 Recognition",
+  address =      "Denver, CO",
+  pages =        "151--158",
+  year =         "1992",
+}
+
+@Article{Levinson83,
+  author =       "S. E. Levinson and L. R. Rabiner and M. M. Sondhi",
+  title =        "An Introduction to the Application of the Theory of
+                 Probabilistic Functions of a {Markov} Process to
+                 Automatic Speech Recognition",
+  journal =      "Bell System Technical Journal",
+  volume =       "64",
+  number =       "4",
+  pages =        "1035--1074",
+  year =         "1983",
+}
+
+@InCollection{Levinson96,
+  author =       "S. E. Levinson",
+  editor =       "R. A. Cole and J. Mariani and H. Uszkoriet and A.
+                 Zaenen and V. Zue",
+  booktitle =    "Survey of the State of the Art in Human Language
+                 Technology",
+  title =        "Statistical Modeling and Classification",
+  publisher =    "Cambridge University Press",
+  address =      "http://www.cse.ogi.edu/CSLU/HLTsurvey/HLTsurvey.html",
+  pages =        "395--401",
+  year =         "1996",
+}
+
+@phdthesis{Levner2008,
+  author = {Ilya Levner},
+  title = {Data Driven Object Segmentation},
+  school = {Department of Computer Science, University of Alberta},
+  year = 2008,
+}
+
+@InProceedings{Lewicki+Sejnowski-97,
+  author =       "Michael Lewicki and Terry Sejnowski",
+  editor =       NIPS10ed,
+  booktitle =    NIPS10,
+  title =        "Learning nonlinear overcomplete representations for
+                 efficient coding",
+  publisher =    "MIT Press",
+  isbn = {0-262-10076-2},
+  location = {Denver, Colorado, United States},
+  address = {Cambridge, MA, USA},
+  pages =        "556--562",
+  year =         "1998",
+}
+
+@article{Lewicki+Sejnowski-2000,
+    author = {Michael S. Lewicki and Terrence J. Sejnowski},
+    title = {Learning Overcomplete Representations},
+    journal = {Neural Computation},
+    volume = {12},
+    number = {2},
+    year = {2000},
+    issn = {0899-7667},
+    pages = {337--365},
+    doi = {http://dx.doi.org/10.1162/089976600300015826},
+    publisher = {MIT Press},
+    address = {Cambridge, MA, USA},
+}
+
+@InProceedings{LewisC62,
+    author =       "P. M. {Lewis II} and C. L. Coates",
+    title =        "A realization procedure for threshold gate networks",
+    crossref =     "FOCS3",
+    pages =        "159--168",
+    url =          "http://theory.lcs.mit.edu/~dmjones/FOCS/focs.bib",
+}
+
+@Article{lheureux-04-small,
+  author =       "P.-J. {L'Heureux} and J. Carreau and Y. Bengio and O.
+                 Delalleau and S. Y. Yue",
+  title =        "Locally Linear Embedding for dimensionality reduction
+                 in {QSAR}",
+  journal =      "J. Computer-Aided Molecular Design",
+  pages =        "18.475",
+  year =         "2004",
+}
+
+@Book{Li93,
+  author =       "Ming Li and Paul Vitanyi",
+  title =        "An Introduction to Kolmogorov Complexity and Its
+                 Applications",
+  publisher =    "Second edition, Springer",
+  address =      "New York, NY",
+  year =         "1997",
+}
+
+@Article{li99face,
+  author =       "S. Z. Li and J. W. Lu",
+  title =        "Face recognition using the nearest feature line
+                 method",
+  journal =      "IEEE Transactions on Neural Networks",
+  volume =       "10",
+  number =       "2",
+  pages =        "439--443",
+  year =         "1999",
+  URL =          "citeseer.nj.nec.com/li99face.html",
+}
+
+@inproceedings{Li+al-2005,
+    author    = {Hongyu Li and Wenbin Chen and I-Fan Shen},
+    title     = {Supervised Local Tangent Space Alignment for Classification},
+    booktitle = {IJCAI},
+    year      = {2005},
+    pages     = {1620-1621},
+    ee        = {http://www.ijcai.org/papers/post-0505.pdf},
+    bibsource = {DBLP, http://dblp.uni-trier.de}
+}
+
+@article{Li+Guo-2006,
+    author = {Chun-Guang Li and Jun Guo},
+    title = {Supervised Isomap with Explicit Mapping},
+    journal = {First International Conference on Innovative Computing, Information and Control},
+    volume = {3},
+    year = {2006},
+    isbn = {0-7695-2616-0},
+    pages = {345-348},
+    doi = {http://doi.ieeecomputersociety.org/10.1109/ICICIC.2006.530},
+    publisher = {IEEE Computer Society},
+    address = {Los Alamitos, CA, USA},
+}
+
+@inproceedings{lischuurmans08a,
+author = "Li, Y. and Schuurmans, D.",
+title = "Policy iteration for learning an exercise policy for {American} 
+options",
+booktitle = "Proceedings of the European Workshop on Reinforcement 
+Learning (EWRL)",
+year = 2008,
+note = "Acceptance rate 33\%; all authors from my research group"
+}
+
+@inproceedings{lischuurmans08b,
+author = "Li, Y. and Schuurmans, D.",
+title = "Learning an exercise policy for {American} options on real data",
+booktitle = "Proceedings of the International Symposium on Financial 
+Engineering and Risk Management (FERM)",
+year = 2008,
+note = "All authors from my research group; unrefereed publication"
+}
+
+@inproceedings{Li+al-2007,
+    author    = {Jun-Bao Li and Shu-Chuan Chu and Jeng-Shyang Pan},
+    title     = {Locally Discriminant Projection with Kernels for Feature Extraction},
+    booktitle = {Proceedings of the Third International Conference on Advanced Data Mining and Applications},
+    editor    = {Reda Alhajj and Hong Gao and Xue Li and Jianzhong Li and Osmar R. Za\"{\i}ane},
+    publisher = {Springer},
+    year      = {2007},
+    pages     = {586-593},
+    ee        = {http://dx.doi.org/10.1007/978-3-540-73871-8_56},
+    bibsource = {DBLP, http://dblp.uni-trier.de}
+}
+
+@InCollection{Liang83,
+  author =       "F. M. Liang",
+  editor =       "D. E. Knuth",
+  booktitle =    "The \TeX Book",
+  title =        "Ph.{D}.\ Thesis",
+  publisher =    "Addison-Wesley",
+  address =      "Reading",
+  year =         "1986",
+}
+
+@inproceedings{LiangP2008,
+ author = {Percy Liang and Michael I. Jordan},
+ title = {An asymptotic analysis of generative, discriminative, and pseudolikelihood estimators},
+ booktitle =    ICML08,
+ editor =       ICML08ed,
+ publisher =    ICML08publ,
+ year = {2008},
+ isbn = {978-1-60558-205-4},
+ pages = {584--591},
+ location = {Helsinki, Finland},
+ doi = {http://doi.acm.org/10.1145/1390156.1390230},
+ address = {New York, NY, USA},
+ }
+
+@Article{Liberman67,
+  author =       "A. M. Liberman and F. S. Cooper and D. P. Shankweiler
+                 and M. Studdert-Kennedy",
+  title =        "Perception of the speech code",
+  journal =      "Psychological Review",
+  volume =       "74",
+  pages =        "431--461",
+  year =         "1967",
+}
+
+@Article{Lin+al-1991,
+  author =       "W.-M. Lin and V. K. Prasanna and K. W. Przytula",
+  title =        "Algorithmic mapping of neural network Models onto
+                 Parallel {SIMD} Machines",
+  journal =      "IEEE Transactions on Computers",
+  volume =       "40",
+  number =       "12",
+  publisher =    "IEEE Computer Society",
+  address =      "Los Alamitos, CA, USA",
+  pages =        "1390--1401",
+  year =         "1991",
+  ISSN =         "0018-9340",
+  doi =          "http://doi.ieeecomputersociety.org/10.1109/12.106224",
+}
+
+@Article{Lin-2000,
+  author =       "Dekang Lin",
+  title =        "Word sense disambigutation with a similarity based
+                 smoothed library",
+  journal =      "Computers and the Humanities: special issue on
+                 {SENSEVAL}",
+  volume =       "34",
+  pages =        "147--152",
+  year =         "2000",
+}
+
+@InProceedings{Lin-99,
+  author =       "Dekang Lin",
+  booktitle =    "Proceedings of the Conference of the Pacific
+                 Association for Computational Linguistics",
+  title =        "A case-based algorithm for word sense disambiguation",
+  address =      "Waterloo, Canada",
+  year =         "1999",
+}
+
+@Article{Lin73,
+  author =       "S. Lin and B. W. Kernighan",
+  title =        "An Effective Heuristic Algorithm for the Travelling
+                 Salesman Problem",
+  journal =      opres,
+  volume =       "21",
+  pages =        "498--516",
+  year =         "1973",
+}
+
+@TechReport{Lin95,
+  author =       "T. Lin and B. G. Horne and P. Tino and C. L. Giles",
+  title =        "Learning long-term dependencies is not as difficult
+                 with {NARX} recurrent neural networks",
+  number =       "UMICAS-TR-95-78",
+  institution =  "Institute for Advanced Computer Studies, University of
+                 Mariland",
+  year =         "1995",
+}
+
+@InProceedings{Lin96,
+  author =       "C. Lin and S-C. Chang and K-J. Lin",
+  booktitle =    nipc-hmit96,
+  title =        "Simulation of the Balance of Plant of a Nuclear Power
+                 Plant by Neural Networks",
+  volume =       "1",
+  publisher =    ans,
+  pages =        "251--255",
+  year =         "1996",
+}
+
+@Article{Linde80,
+  author =       "Y. Linde and A. Buzo and R. M. Gray",
+  title =        "An algorithm for vector quantizer design",
+  journal =      "IEEE Transactions on Communication",
+  volume =       "COM-28",
+  number =       "1",
+  pages =        "84--95",
+  month =        jan,
+  year =         "1980",
+}
+
+@Article{Lindgren78,
+  author =       "G. Lindgren",
+  title =        "{Markov} Regime Models for Mixed Distributions and
+                 Switching Regressions",
+  journal =      "Scan. J. Statist.",
+  volume =       "5",
+  pages =        "81--91",
+  year =         "1978",
+}
+
+@Article{Linial93,
+  author =       "Nathan Linial and Yishay Mansour and Noam Nisan",
+  title =        "Constant depth circuits, {Fourier} transform, and
+                 learnability",
+  journal =      "J. ACM",
+  volume =       "40",
+  number =       "3",
+  publisher =    "ACM Press",
+  address =      "New York, NY, USA",
+  pages =        "607--620",
+  year =         "1993",
+}
+
+@Article{Linsker86,
+  author =       "R. Linsker",
+  title =        "From Basic Network Principles to Neural Architecture",
+  journal =      PNAS,
+  volume =       "83",
+  pages =        "7508--7512, 8390--8394, 8779--8783",
+  year =         "1986",
+}
+
+@Article{Linsker88,
+  author =       "R. Linsker",
+  title =        "Self-Organization in a Perceptual Network",
+  journal =      computer,
+  pages =        "105--117",
+  month =        mar,
+  year =         "1988",
+}
+
+@TechReport{liporace-76,
+  author =       "L. A. Liporace",
+  title =        "{PTAH} on Continuous Multivariate Functions of
+                 {Markov} Chains",
+  number =       "80193",
+  institution =  "Institute for Defense Analysis, Communication Research
+                 Department",
+  month =        feb,
+  year =         "1976",
+}
+
+@Article{Lippmann87,
+  author =       "R. P. Lippmann",
+  title =        "An Introduction to Computing with Neural Nets",
+  journal =      ieeeassp,
+  pages =        "4--22",
+  month =        apr,
+  year =         "1987",
+}
+
+@InProceedings{Lippmann87b,
+  author =       "R. P. Lippmann and B. Gold",
+  booktitle =    "IEEE Proc. First Intl. Conf. on Neural Networks",
+  title =        "Neural Classifiers Useful for Speech Recognition",
+  volume =       "IV",
+  address =      "San Diego, CA",
+  pages =        "417--422",
+  year =         "1987",
+}
+
+@Article{Lippmann89,
+  author =       "R. P. Lippmann",
+  title =        "Review of Neural Networks for Speech Recognition",
+  journal =      nc,
+  volume =       "1",
+  pages =        "1--38",
+  year =         "1989",
+}
+
+@InProceedings{Lister90,
+  author =       "R. Lister",
+  booktitle =    ijcnn,
+  title =        "Segment Reversal and the {TSP}",
+  volume =       "1",
+  publisher =    "Lawrence Erlbaum, Hillsdale",
+  address =      "Washington 1990",
+  pages =        "424--427",
+  year =         "1990",
+}
+
+@Article{Litkowski-2000,
+  author =       "K. Litkowski",
+  title =        "{SENSEVAL}: The {CL}-research experience",
+  journal =      "Computers and the Humanities: special issue on
+                 SENSEVAL",
+  volume =       "34",
+  pages =        "153--158",
+  year =         "2000",
+}
+
+@Book{Little+Rubin-2002,
+  author =       "R. J. A. Little and D. B. Rubin",
+  title =        "Statistical Analysis with Missing Data",
+  publisher =    "Wiley",
+  address =      "New York",
+  edition =      "2nd",
+  year =         "2002",
+}
+
+@Book{Little-Rubin,
+  author =       "R. J. A. Little and D. B. Rubin",
+  title =        "Statistical Analysis with Missing Data",
+  publisher =    "Wiley",
+  address =      "New York",
+  year =         "1987",
+}
+
+@Article{Little74,
+  author =       "W. A. Little",
+  title =        "The Existence of Persistent States in the Brain",
+  journal =      mbio,
+  volume =       "19",
+  pages =        "101--120",
+  year =         "1974",
+}
+
+@Article{Little75,
+  author =       "W. A. Little and G. L. Shaw",
+  title =        "A Statistical Theory of Short and Long Term Memory",
+  journal =      behbio,
+  volume =       "14",
+  year =         "1975",
+}
+
+@Article{Little78,
+  author =       "W. A. Little and G. L. Shaw",
+  title =        "Analytic Study of the Memory Storage Capacity of a
+                 Neural Network",
+  journal =      mbio,
+  volume =       "39",
+  pages =        "281--290",
+  year =         "1978",
+}
+
+@Article{littlestone-warmuth94,
+  author =       "N. Littlestone and M. K. Warmuth",
+  title =        "The weighted majority algorithm",
+  journal =      "Information and Computation",
+  volume =       "108",
+  number =       "2",
+  pages =        "212--261",
+  year =         "1994",
+}
+
+@Misc{Littlestone86,
+  author =       "N. Littlestone and M. Warmuth",
+  title =        "Relating data compression and learnability",
+  year =         "1986",
+  note =         "Unpublished manuscript. University of California Santa
+                 Cruz. An extended version can be found in (Floyd and
+                 Warmuth 95)",
+}
+
+@InCollection{Liu2001,
+  author =       "J. S. Liu & R. Chen & T. Logvinenko",
+  editor =       "N. Gordon {A. Doucet, N. de Freitas}",
+  booktitle =    "Sequential Monte Carlo Methods in Practice",
+  title =        "A theoretical framework for sequential importance
+                 sampling and resampling",
+  publisher =    "Springer-Verlag",
+  year =         "2001",
+}
+
+@Book{Ljung+Soderstrom83,
+  author =       "L. Ljung and T. Soderstrom",
+  title =        "Theory and Practice of recursive identification",
+  publisher =    "MIT Press",
+  year =         "1983",
+}
+
+@Book{Ljung-86,
+  author =       "L. Lyung and T. S{\"o}derstr{\"o}m",
+  title =        "Theory and Practice of Recursive Identification",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  year =         "1986",
+}
+
+@article{LloydS1982,
+	author = {Stuart P. Lloyd },
+	booktitle = {Information Theory, IEEE Transactions on},
+	journal = {Information Theory, IEEE Transactions on},
+	number = {2},
+	pages = {129--137},
+	title = {Least squares quantization in PCM},
+	volume = {28},
+	year = {1982}
+}
+
+@Article{Loader96,
+  author =       "C. R. Loader",
+  title =        "Local lieklihood density estimation",
+  journal =      "Annals of Statistics",
+  volume =       "24",
+  number =       "4",
+  pages =        "1602--1618",
+  year =         "1996",
+}
+
+@Article{Loftsgaarden+Quesenberry-65,
+  author =       "D. O. Loftsgaarden and C. P. Quesenberry",
+  title =        "A nonparametric estimate of a multivariate density
+                 function",
+  journal =      "Annals of Mathematical Statistics",
+  volume =       "36",
+  pages =        "1049--1051",
+  year =         "1965",
+}
+
+@InBook{lognormal-A-85,
+  author =       "C. E. Antle",
+  booktitle =    "Encyclopedia of Statistical Sciences",
+  title =        "Lognormal Distribution",
+  volume =       "5",
+  publisher =    "John Wiley \& Sons",
+  pages =        "134--136",
+  year =         "1985",
+}
+
+@Article{Loh-Shih97,
+  author =       "Wei-Yin Loh and Yu-Shan Shih",
+  title =        "Split selection methods for classification trees",
+  journal =      "Statistica Sinica",
+  volume =       "7",
+  pages =        "815--840",
+  year =         "1997",
+}
+
+@incollection{loosli-canu-bottou-2006,
+  author = {Loosli, Ga\"{e}lle and Canu, St\'{e}phane and Bottou, L\'{e}on},
+  title = {Training Invariant Support Vector Machines using Selective Sampling},
+  pages = {301-320},
+  editor = {Bottou, L\'{e}on and Chapelle, Olivier and {DeCoste}, Dennis and Weston, Jason},
+  booktitle = {Large Scale Kernel Machines},
+  publisher = {MIT Press},
+  address = {Cambridge, MA.},
+  year = {2007},
+  url = {http://leon.bottou.org/papers/loosli-canu-bottou-2006},
+}
+
+@Article{Lowe04,
+  author =       "D. G. Lowe",
+  title =        "Distinctive Image Features from Scale-Invariant
+                 Keypoints",
+  journal =      "International Journal of Computer Vision",
+  volume =       "60",
+  number =       "2",
+  pages =        "91--110",
+  year =         "2004",
+}
+
+@Article{Lowe95,
+  author =       "D. G. Lowe",
+  title =        "Similarity metric learning for a variable-kernel
+                 classifier",
+  journal =      "Neural Computation",
+  volume =       "7",
+  number =       "1",
+  pages =        "72--85",
+  year =         "1995",
+}
+
+@InProceedings{lu04,
+  author =       "Wen-Cong Lu and Nian-Yi Chen and Guo-Zheng Li and Jie
+                 Yang",
+  editor =       "Per Svensson and Johan Schubert",
+  booktitle =    "Proceedings of the Seventh International Conference on
+                 Information Fusion",
+  title =        "Multitask learning using partial least square method",
+  volume =       "I",
+  publisher =    "International Society of Information Fusion",
+  address =      "Mountain View, CA",
+  pages =        "79--84",
+  month =        jun,
+  year =         "2004",
+  location =     "Stockholm, Sweden",
+}
+
+@Book{Lue84,
+  author =       "D. G. Luenberger",
+  title =        "Linear and Nonlinear Programming",
+  publisher =    "Addison Wesley",
+  year =         "1984",
+}
+
+@Book{Luenberger86,
+  author =       "D. G. Luenberger",
+  title =        "Linear and Nonlinear Programming",
+  publisher =    "Addison-Wesley",
+  address =      "Reading",
+  year =         "1986",
+}
+
+@InProceedings{Lyu09,
+  author =       "Siwei Lyu",
+  booktitle =    "The proceedings of the 25th Conference on Uncertainty in Artificial Intelligence",
+  title =        "Interpretation and Generalization of Score Matching",
+  year =         "2009",
+}
+
+@Book{Ma85,
+  author =       "S.-K. Ma",
+  title =        "Statistical Mechanics",
+  publisher =    "World Scientific",
+  address =      "Philadelphia",
+  year =         "1985",
+}
+
+@InProceedings{Ma09,
+ author = {Justin Ma and Lawrence K. Saul and Stefan Savage and Geoffrey M. Voelker},
+ title = {Identifying Suspicious URLs: An Application of Large-Scale Online Learning},
+ booktitle = {Proceedings of the International Conference on Machine Learning},
+ year = {2009},
+ pages = {681--688},
+ location = {Montreal, Canada},
+}
+
+@Misc{MacKay+Neal94,
+  author =       "D. MacKay and R. Neal",
+  title =        "Automatic Relevance Determination",
+  year =         "1994",
+  note =         "Unpublished report. See also MacKay D., 1995, Probable
+                 Neworks and Plausible Predictions -- A Review of
+                 Practical {Bayesian} Methods for Supervised Neural
+                 Networks, in {\em Network: Computation in Neural
+                 Systems}, v. 6, pp. 469--505",
+}
+
+@Book{MacKay03,
+  author =       "David MacKay",
+  title =        "Information Theory, Inference and Learning
+                 Algorithms",
+  publisher =    "Cambridge University Press",
+  year =         "2003",
+}
+
+@Misc{MacKay2001,
+  author =       "David MacKay",
+  title =        "Failures of the One-Step Learning Algorithm",
+  year =         "2001",
+  note =         "Unpublished report",
+}
+
+@Article{MacKay90,
+  author =       "D. J. C. MacKay and K. D. Miller",
+  title =        "Analysis of Linsker's Simulation of Hebbian Rules",
+  journal =      nc,
+  volume =       "2",
+  pages =        "173--187",
+  year =         "1990",
+}
+
+@PhdThesis{MacKay91,
+  author =       "D. J. C. MacKay",
+  title =        "Bayesian methods for adaptive models",
+  school =       "California Institute of Technology",
+  year =         "1991",
+}
+
+@Article{MacKay92a,
+  author =       "David {J. C}. MacKay",
+  title =        "Bayesian interpolation",
+  journal =      "Neural Computation",
+  volume =       "4",
+  number =       "3",
+  pages =        "415--447",
+  year =         "1992",
+}
+
+@Article{MacKay92b,
+  author =       "D. J. C. MacKay",
+  title =        "The evidence framework applied to classification
+                 networks",
+  journal =      "Neural Computation",
+  volume =       "4",
+  number =       "5",
+  pages =        "698--714",
+  year =         "1992",
+}
+
+@Article{MacKay92c,
+  author =       "David {J. C}. MacKay",
+  title =        "A practical {Bayesian} framework for backpropagation
+                 networks",
+  journal =      "Neural Computation",
+  volume =       "4",
+  number =       "3",
+  pages =        "448--472",
+  year =         "1992",
+}
+
+@Article{MacKay98,
+  author =       "D. J. C. MacKay and R. J. McEliece and J-F. Cheng (in
+                 press)",
+  title =        "Turbo-decoding as an instance of Pearl's belief
+                 propagation algorithm",
+  journal =      "IEEE Journal on Selected Areas in Communications",
+  year =         "1998",
+}
+
+@TechReport{MacKay98b,
+  author =       "D. J. C. MacKay",
+  title =        "Introduction to {G}aussian Processes",
+  institution =  "Cambridge University",
+  year =         "1998",
+  URL =          "http://wol.ra.phy.cam.ac.uk/mackay/gpB.pdf",
+}
+
+@Article{Mackey77,
+  author =       "M. C. Mackey and L. Glass",
+  title =        "Oscillation and Chaos in Physiological Control
+                 Systems",
+  journal =      science,
+  volume =       "197",
+  pages =        "287",
+  year =         "1977",
+}
+
+@InProceedings{Maclin-iwml91,
+  author =       "R. Maclin and J. W. Shawlik",
+  editor =       "L. Birnbaum and G. Collins",
+  booktitle =    "Machine Learning: Proceedings of the Eighth
+                 International Workshop",
+  title =        "Refining Domain Theories Expressed as Finite-State
+                 Automata",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo CA",
+  year =         "1991",
+}
+
+@Article{Maclin-ml,
+  author =       "R. Maclin and J. W. Shawlik",
+  title =        "Using Knowledge-Based Neural Networks to Improve
+                 Algorithms: Refining the Chou-Fasman Algorithm for
+                 Protein Folding",
+  journal =      mlearn,
+}
+
+@InProceedings{MacQueen67,
+  author =       "James B. MacQueen",
+  booktitle =    "Proceedings of the Fifth Berkeley Symposium on
+                 Mathematics, Statistics and Probability, Vol. 1",
+  title =        "Some Methods for Classification and Analysis of
+                 Multivariate Observations",
+  pages =        "281--296",
+  year =         "1967",
+}
+
+@Article{Mahapatra+al-1997,
+  author =       "S. Mahapatra and R. N. Mahapatra and B. N. Chatterji",
+  title =        "A parallel formulation of back-propagation learning on
+                 distributed memory multiprocessors",
+  journal =      "Parallel Computing",
+  volume =       "22",
+  number =       "12",
+  publisher =    "Elsevier Science Publishers",
+  address =      "Amsterdam, The Netherlands",
+  pages =        "1661--1675",
+  year =         "1997",
+  ISSN =         "0167-8191",
+  doi =          "http://dx.doi.org/10.1016/S0167-8191(96)00051-8",
+}
+
+@incollection{Mairal-2009,
+ title = {Supervised Dictionary Learning},
+ author = {Julien Mairal and Francis Bach and Jean Ponce and Guillermo Sapiro and Andrew Zisserman},
+ booktitle = NIPS21,
+ editor = NIPS21ed,
+ pages = {1033--1040},
+ publisher = {NIPS Foundation},
+ year = {2009}
+}
+@book{Maimon+Rokach-2005,
+    author = {Maimon, O.  and Rokach, L. },
+    howpublished = {Hardcover},
+    isbn = {0387244352},
+    month = {September},
+    publisher = {Springer},
+    title = {Data Mining and Knowledge Discovery Handbook},
+    year = {2005}
+}
+
+@InProceedings{Makram-Ebeid89,
+  author =       "S. Makram-Ebeid and J.-A. Sirat and J.-R. Viala",
+  booktitle =    ijcnn,
+  title =        "A Rationalized Back-Propagation Learning Algorithm",
+  volume =       "2",
+  publisher =    "IEEE, New York",
+  address =      "Washington 1989",
+  pages =        "373--380",
+  year =         "1989",
+}
+
+@Article{mallat93matching,
+  author =       "S. Mallat and Z. Zhang",
+  title =        "Matching pursuit with time-frequency dictionaries",
+  journal =      "IEEE Trans. Signal Proc.",
+  volume =       "41",
+  number =       "12",
+  pages =        "3397--3415",
+  month =        dec,
+  year =         "1993",
+}
+
+@InProceedings{malouf2002conll,
+  author =       "Robert Malouf",
+  booktitle =    "Proceedings of CoNLL-2002",
+  title =        "A comparison of algorithms for maximum entropy
+                 parameter estimation",
+  publisher =    "Taipei, Taiwan",
+  pages =        "49--55",
+  year =         "2002",
+  editors =      "Dan Roth and Antal van den Bosch",
+}
+
+@Book{Mandelbrot82,
+  author =       "B. B. Mandelbrot",
+  title =        "The Fractal Geometry of Nature",
+  publisher =    "Freeman",
+  address =      "San Francisco",
+  year =         "1982",
+}
+
+@Book{Manning+Schutze99,
+  author =       "Christopher Manning and Hinrich Schutze",
+  title =        "Foundations of Statistical Natural Language
+                 Processing",
+  publisher =    "MIT Press",
+  year =         "1999",
+}
+
+@InProceedings{Mantysalo92firenze,
+  author =       "Jyri M{\"{a}}ntysalo and Kari Torkkola and Teuvo
+                 Kohonen",
+  booktitle =    "Proc. of the Second Workshop on Neural Networks for
+                 Speech Processing",
+  title =        "Experiments on the use of {LVQ} in phoneme-level
+                 segmentation of speech",
+  publisher =    "LINT",
+  address =      "Firenze (Italy)",
+  year =         "1992",
+}
+
+@article{Marcelja-1980,
+    author = {Marcelja, S.},
+    journal = {Journal of the Optical Society of America},
+    month = {November},
+    number = {11},
+    pages = {1297--1300},
+    title = {Mathematical description of the responses of simple cortical cells.},
+    url = {http://view.ncbi.nlm.nih.gov/pubmed/7463179},
+    volume = {70},
+    year = {1980}
+}
+
+@Article{Marchand90,
+  author =       "M. Marchand and M. Golea and P. Ruj\'an",
+  title =        "A Convergence Theorem for Sequential Learning in
+                 Two-Layer Perceptrons",
+  journal =      eul,
+  volume =       "11",
+  pages =        "487--492",
+  year =         "1990",
+}
+
+@Article{Marcotte-92,
+  author =       "P. Marcotte and G. Savard",
+  title =        "Novel approaches to the discrimination problem",
+  journal =      "Zeitschrift f{\"u}r Operations Research (Theory)",
+  volume =       "36",
+  pages =        "517--545",
+  year =         "1992",
+}
+
+@Article{Marcus91,
+  author =       "C. M. Marcus and F. R. Waugh and R. M. Westervelt",
+  title =        "Nonlinear Dynamics and Stability of Analog Neural
+                 Networks",
+  journal =      "Physica D",
+  volume =       "51",
+  pages =        "234--247",
+  year =         "1991",
+  note =         "(special issue)",
+}
+
+@Article{Marcus-et-al91,
+  author =       "C. M. Marcus and F. R. Waugh and R. M. Westervelt",
+  title =        "Nonlinear Dynamics and Stability of Analog Neural
+                 Networks",
+  journal =      physicaD,
+  volume =       "51",
+  pages =        "1991",
+  year =         "1991",
+  note =         "(special issue)",
+}
+
+@Article{Markov13,
+  author =       "A. A. Markov",
+  title =        "An example of statistical investigation in the text of
+                 `Eugene Onyegin' illustrating coupling of `tests' in
+                 chains",
+  journal =      "Proceedings of the Academy of Science, St.
+                 Petersburg",
+  volume =       "7",
+  pages =        "153--162",
+  year =         "1913",
+}
+
+@Article{Markovitz-52,
+  author =       "H. M. Markovitz",
+  title =        "Portfolio Selection",
+  journal =      "Journal of Finance",
+  volume =       "7",
+  number =       "1",
+  pages =        "77--91",
+  year =         "1952",
+}
+
+@InProceedings{maron98,
+  author =       "Oded Maron and Tom\'{a}s Lozano-P\'{e}rez",
+  editor =       NIPS10ed,
+  booktitle =    NIPS10,
+  title =        "A Framework for Multiple-Instance Learning",
+  volume =       "10",
+  publisher =    "{MIT} Press",
+  year =         "1998",
+}
+
+@Article{Marquardt63,
+  author =       "D. W. Marquardt",
+  title =        "An algorithm for least-squares estimation of
+                 non-linear parameters",
+  journal =      "Journal of the Society of Industrial and Applied
+                 Mathematics",
+  volume =       "11",
+  number =       "2",
+  pages =        "431--441",
+  year =         "1963",
+}
+
+@Article{Marr69,
+  author =       "D. Marr",
+  title =        "A Theory of Cerebellar Cortex",
+  journal =      jphysiol,
+  volume =       "202",
+  pages =        "437--470",
+  year =         "1969",
+}
+
+@Article{Marr70,
+  author =       "D. Marr",
+  title =        "A Theory for Cerebral Neocortex",
+  journal =      PRSLB,
+  volume =       "176",
+  pages =        "161--234",
+  year =         "1970",
+}
+
+@Article{Marr71,
+  author =       "D. Marr",
+  title =        "Simple Memory: {A} Theory for Archicortex",
+  journal =      PTRSL,
+  volume =       "262",
+  pages =        "23--81",
+  year =         "1971",
+}
+
+@Article{Marr76,
+  author =       "D. Marr and T. Poggio",
+  title =        "Cooperative Computation of Stereo Disparity",
+  journal =      science,
+  volume =       "194",
+  year =         "1976",
+}
+
+@Book{Marr82,
+  author =       "D. Marr",
+  title =        "Vision",
+  publisher =    "Freeman",
+  address =      "San Francisco",
+  year =         "1982",
+}
+
+@Article{Martin91,
+  author =       "G. L. Martin and J. A. Pittman",
+  title =        "Recognizing hand-printed letters and digits using
+                 backpropagation learning",
+  journal =      nc,
+  volume =       "3",
+  number =       "2",
+  pages =        "258--267",
+  year =         "1991",
+}
+
+@Article{Mashouk+Reed91,
+  author =       "K. A. Al-Mashouq and I. S. Reed",
+  title =        "Including Hints in Training Neural Nets",
+  journal =      nc,
+  volume =       "3",
+  number =       "4",
+  pages =        "418",
+  year =         "1991",
+}
+
+@InProceedings{Mason98,
+  author =       "L. Mason and Bartlett and J. P. Baxter",
+  editor =       NIPS12ed,
+  booktitle =    NIPS12,
+  title =        "Direct Optimization of Margins Improves Generalization
+                 in Combined Classifiers",
+  year =         "1999",
+}
+
+@InProceedings{Mason99,
+  author =       "L. Mason and J. Baxter and P. Bartlett and M. Frean",
+  editor =       NIPS12ed,
+  booktitle =    NIPS12,
+  title =        "Boosting Algorithms as Gradient Descent",
+  publisher =    "MIT Press",
+  pages =        "512--518",
+  year =         "2000",
+}
+
+@InProceedings{Matan92,
+  author =       "O. Matan and C. J. C. Burges and Y. {LeCun} and J. S.
+                 Denker",
+  editor =       NIPS4ed,
+  booktitle =    NIPS4,
+  title =        "Multi-Digit Recognition Using a Space Displacement
+                 Neural Network",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo CA",
+  pages =        "488--495",
+  year =         "1992",
+}
+
+@InProceedings{matic-92a,
+  author =       "N. Mati\'{c} and I. Guyon and L. Bottou and J. Denker
+                 and V. Vapnik",
+  booktitle =    "11th International Conference on Pattern Recognition",
+  title =        "Computer Aided Cleaning of Large Databases for
+                 Character Recogn ition",
+  volume =       "II",
+  pages =        "330--333",
+  year =         "1992",
+}
+
+@Misc{matrix-cookbook,
+  author =       "K. B. Petersen and M. S. Pedersen",
+  title =        "The Matrix Cookbook",
+  publisher =    "Technical University of Denmark",
+  address =      "",
+  month =        feb,
+  year =         "2006",
+  note =         "Version 20051003",
+  abstract =     "Matrix identities, relations and approximations. A
+                 desktop reference for quick overview of mathematics of
+                 matrices.",
+  keywords =     "Matrix identity, matrix relations, inverse, matrix
+                 derivative",
+}
+
+@Article{Mattis76,
+  author =       "D. Mattis",
+  title =        "Solvable Spin Systems with Random Interactions",
+  journal =      plettA,
+  volume =       "56",
+  pages =        "421--422",
+  year =         "1976",
+}
+
+@Article{MaxEnt96,
+  author =       "Adam L. Berger and Vincent J. {Della Pietra} and Stephen A. {Della
+                 Pietra}",
+  title =        "A maximum entropy approach to natural language
+                 processing",
+  journal =      "Computational Linguistics",
+  volume =       "22",
+  pages =        "39--71",
+  year =         "1996",
+}
+
+@Article{Mayraz+Hinton-2002,
+  author =       "G. Mayraz and G. E. Hinton",
+  title =        "Recognizing handwritten digits using hierarchical
+                 products of experts",
+  journal =      "IEEE Transactions on Pattern Analysis and Machine
+                 Intelligence",
+  volume =       "24",
+  pages =        "189--197",
+  year =         "2002",
+}
+
+@InProceedings{Mazaika87,
+  author =       "P. K. Mazaika",
+  editor =       "M. Caudill and C. Butler",
+  booktitle =    icnn,
+  title =        "A Mathematical Model of the {Boltzmann} Machine",
+  volume =       "3",
+  publisher =    "IEEE, New York",
+  address =      "San Diego 1987",
+  pages =        "157--163",
+  year =         "1987",
+}
+
+@InProceedings{mbbf-bagd-00,
+  author =       "L. Mason and J. Baxter and P. L. Bartlett and M.
+                 Frean",
+  editor =       NIPS12ed,
+  booktitle =    NIPS12,
+  title =        "Boosting algorithms as gradient descent",
+  pages =        "512--518",
+  year =         "2000",
+}
+
+@InProceedings{McCallum+Nigam-1998,
+  author =       "A. {McCallum} and K. Nigam",
+  booktitle =    ICML08,
+  editor =       ICML08ed,
+  publisher =    ICML08publ,
+  title =        "Employing {EM} and pool-based active learning for text
+                 classification",
+  year =         "1998",
+}
+
+@InProceedings{McCallumA2006,
+  author =       "Andrew McCallum and Chris Pal and Gregory Druck and
+                 Xuerui Wang",
+  booktitle =    "Twenty-first National Conference on Artificial
+                 Intelligence (AAAI-06)",
+  title =        "Multi-Conditional Learning: Generative/Discriminative
+                 Training for Clustering and Classification",
+  publisher =    "AAAI Press",
+  year =         "2006",
+  OPTbibsource = "DBLP, http://dblp.uni-trier.de",
+  OPTcrossref =  "DBLP:conf/aaai/2006",
+}
+
+@article{McClelland+Rumelhart-81,
+ author = {James L. {McClelland} and David E. Rumelhart},
+ title = {An interactive activation model of context effects in letter perception},
+ journal = {Psychological Review},
+ volume = 88,
+ pages = {375--407},
+ year = 1981,
+}
+
+@Book{McClelland86a,
+  author =       "James L. McClelland and David E. Rumelhart and the PDP
+                 Research Group",
+  title =        "Parallel Distributed Processing: Explorations in the
+                 Microstructure of Cognition",
+  volume =       "2",
+  publisher =    "MIT Press",
+  address =      "Cambridge",
+  year =         "1986",
+}
+
+@InCollection{McClelland86b,
+  author =       "J. L. McClelland and J. L. Elman",
+  editor =       "J. L. McClelland and D. E. Rumelhart",
+  booktitle =    pdp,
+  title =        "Interactive Processes in Speech Perception: The
+                 {TRACE} Model",
+  chapter =      "15",
+  volume =       "2",
+  publisher =    "MIT Press",
+  address =      "Cambridge",
+  pages =        "58--121",
+  year =         "1986",
+}
+
+@Book{McClelland88,
+  author =       "J. L. McClelland and D. E. Rumelhart",
+  title =        "Explorations in Parallel Distributed Processing",
+  publisher =    "MIT Press",
+  address =      "Cambridge",
+  year =         "1988",
+}
+
+@Article{McCulloch43,
+  author =       "W. S. McCulloch and W. Pitts",
+  title =        "A Logical Calculus of Ideas Immanent in Nervous
+                 Activity",
+  journal =      bmbiophys,
+  volume =       "5",
+  pages =        "115--133",
+  year =         "1943",
+}
+
+@InProceedings{Mcdermott89,
+  author =       "E. McDermott and S. Katagiri",
+  booktitle =    icassp,
+  title =        "Shift-Invariant, Multi-Category Phoneme Recognition
+                 using {Kohonen's} {LVQ2}",
+  volume =       "1",
+  organization = "IEEE",
+  address =      "Glasgow, Scotland",
+  pages =        "81--84",
+  year =         "1989",
+}
+
+@Article{Mcdermott91,
+  author =       "E. McDermott and S. Katagiri",
+  title =        "{LVQ}-based shift-tolerant phoneme recognition",
+  journal =      "IEEE Transactions on Signal Processing",
+  volume =       "39",
+  number =       "6",
+  pages =        "1398--1411",
+  year =         "1991",
+  OPTmonth =     "June",
+}
+
+@Article{McEliece87,
+  author =       "R. J. McEliece and E. C. Posner and E. R. Rodemich and
+                 S. S. Venkatesh",
+  title =        "The Capacity of the Hopfield Associative Memory",
+  journal =      ieeeit,
+  volume =       "33",
+  pages =        "461--482",
+  year =         "1987",
+}
+
+@InProceedings{McInerny89,
+  author =       "J. M. McInerny and K. G. Haines and S. Biafore and R.
+                 Hecht-Nielsen",
+  booktitle =    ijcnn,
+  title =        "Back Propagation Error Surfaces Can Have Local
+                 Minima",
+  volume =       "2",
+  publisher =    "IEEE, New York",
+  address =      "Washington 1989",
+  pages =        "627",
+  year =         "1989",
+}
+
+@Book{McLachlan2000,
+  author =       "G. J. McLachlan and D. Peel",
+  title =        "Finite Mixture Models",
+  publisher =    "Wiley",
+  address =      "New York",
+  year =         "2000",
+}
+
+@Book{McLachlan88,
+  author =       "G. J. McLachlan and K. E. Basford",
+  title =        "Mixture models: Inference and applications to
+                 clustering.",
+  publisher =    "Marcel Dekker",
+  year =         "1988",
+}
+
+@book{Mclachlan-2004,
+    author = {Geoffrey  J. Mclachlan},
+    howpublished = {Paperback},
+    isbn = {0471691151},
+    month = {August},
+    publisher = {Wiley-Interscience},
+    title = {Discriminant Analysis and Statistical Pattern Recognition},
+    year = {2004}
+}
+
+@Article{McLoone+Irwin-1997,
+  author =       "S. McLoone and G. W. Irwin",
+  title =        "Fast Parallel Off-Line Training of Multilayer
+                 Perceptrons",
+  journal =      "IEEE Transactions on Neural Networks",
+  volume =       "8",
+  number =       "3",
+  pages =        "646--653",
+  year =         "1997",
+}
+
+@Book{Mead89,
+  author =       "C. Mead",
+  title =        "Analog {VLSI} and Neural Systems",
+  publisher =    "Addison Wesley",
+  address =      "Reading",
+  year =         "1989",
+}
+
+@InProceedings{Meila96,
+  author =       "M. Meila and M. I. Jordan",
+  editor =       NIPS8ed,
+  booktitle =    NIPS8,
+  title =        "Learning fine motion by Markov mixtures of experts",
+  publisher =    "MIT Press, Cambridge, MA",
+  year =         "1996",
+}
+
+@InProceedings{Mel+Koch90,
+  author =       "Bartlett W. Mel and Christof Koch",
+  editor =       NIPS2ed,
+  booktitle =    NIPS2,
+  title =        "{Sigma}-{Pi} Learning: On Radial Basis Functions and
+                 Cortical Associative Learning",
+  publisher =    "Morgan Kaufmann",
+  pages =        "474--481",
+  year =         "1990",
+}
+
+@InProceedings{Melvilleetal,
+  author =       "P. Melville and R. J. Mooney and R. Nagarajan",
+  booktitle =    "Proceedings of the ACM SIGIR Workshop on Recommender
+                 Systems",
+  title =        "Content-boosted collaborative filtering",
+  month =        sep,
+  year =         "2001",
+  keywords =     "boosted collaborative filtering content",
+  location =     "New Orleans, LA",
+}
+
+@InProceedings{Memisevic+Hinton-2007,
+  author =       "Roland Memisevic and Geoffrey E. Hinton",
+  booktitle =    cvpr07,
+  title =        "Unsupervised learning of image transformations",
+  year =         "2007",
+}
+
+@PhdThesis{Memisevic-thesis,
+  author =       "Roland Memisevic",
+  title =        "Non-linear latent factor models for revealing
+                 structure in high-dimensional data",
+  school =       "Departement of Computer Science, University of
+                 Toronto",
+  address =      "Toronto, Ontario, Canada",
+  year =         "2007",
+}
+
+@Book{Mendelson97,
+  author =       "E. Mendelson",
+  title =        "Introduction to Mathematical Logic, 4th ed.",
+  publisher =    "Chapman \& Hall",
+  year =         "1997",
+}
+
+@InProceedings{Merkel-1994,
+  author =       "Magnus Merkel and Bernt Nilsson and Lars Ahrenberg",
+  booktitle =    "Proceedings of the 4th Workshop on Very Large
+                 Corpora",
+  title =        "A Phrase-Retrieval System Based on Recurrence",
+  address =      "Tokyo, Japan",
+  year =         "1994",
+}
+
+@InProceedings{Merkel-2000,
+  author =       "Magnus Merkel and Mikael Andersson",
+  booktitle =    "Proceedings of RIAO'2000",
+  title =        "Knowledge-lite extraction of multi-word units with
+                 language filters and entropy thresholds",
+  volume =       "1",
+  pages =        "737--746",
+  year =         "2000",
+}
+
+@InProceedings{Merlo86,
+  author =       "E. Merlo and R. De Mori and G. Mercier and M.
+                 Palakal",
+  booktitle =    icassp,
+  title =        "A continuous parameter and frequency domain based
+                 {Markov} model",
+  pages =        "1597--1600",
+  year =         "1986",
+}
+
+@article{Merzenich-2000,
+    title = {Seeing in the Sound Zone},
+    author = {M. Merzenich},
+    journal = {Nature},
+    pages = {820--821},
+    volume = {404},
+    year = {2000},
+}
+
+@Article{Metropolis53,
+  author =       "N. Metropolis and A. W. Rosenbluth and M. N.
+                 Rosenbluth and A. H. Teller and E. Teller",
+  title =        "Equation of State Calculations for Fast Computing
+                 Machines",
+  journal =      jcp,
+  volume =       "21",
+  pages =        "1087--1092",
+  year =         "1953",
+}
+
+@Article{Mezard85,
+  author =       "M. M\'ezard and G. Parisi",
+  title =        "Replicas and Optimization",
+  journal =      jppl,
+  volume =       "46",
+  pages =        "771--778",
+  year =         "1985",
+}
+
+@Article{Mezard86,
+  author =       "M. M\'ezard and G. Parisi",
+  title =        "A Replica Analysis of the Travelling Salesman
+                 Problem",
+  journal =      jpp,
+  volume =       "47",
+  pages =        "1285--1296",
+  year =         "1986",
+}
+
+@Book{Mezard87,
+  author =       "M. M\'ezard and G. Parisi and M. A. Virasoro",
+  title =        "Spin Glass Theory and Beyond",
+  publisher =    "World Scientific",
+  address =      "Singapore",
+  year =         "1987",
+}
+
+@Article{Mezard88,
+  author =       "M. M\'ezard and G. Parisi",
+  title =        "The Euclidean Matching Problem",
+  journal =      jpp,
+  volume =       "49",
+  pages =        "2019--2025",
+  year =         "1988",
+}
+
+@Article{Mezard89,
+  author =       "M. M\'ezard and J.-P. Nadal",
+  title =        "Learning in Feedforward Layered Networks: The Tiling
+                 Algorithm",
+  journal =      jpa,
+  volume =       "22",
+  pages =        "2191--2204",
+  year =         "1989",
+}
+
+@Article{Micchelli-1986,
+  author =       "C. A. Micchelli",
+  title =        "Interpolation of scattered data: distance matrices and
+                 conditionally positive definite functions",
+  journal =      "Constructive Approximation",
+  volume =       "2",
+  pages =        "11--22",
+  year =         "1986",
+}
+
+@InProceedings{micchelli05,
+  author =       "Charles A. {Micchelli} and Massimiliano {Pontil}",
+  editor =       NIPS17ed,
+  booktitle =    NIPS17,
+  title =        "Kernels for Multi--task Learning",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  pages =        "921--928",
+  year =         "2005",
+}
+
+@InProceedings{Mihalcea2002,
+  author =       "Rada Mihalcea",
+  booktitle =    "Proceedings of the 6th Conference on Natural Language
+                 Learning",
+  title =        "Instance Based Learning with Automatic Feature
+                 Selection Applied to Word",
+  year =         "2002",
+  URL =          "citeseer.nj.nec.com/587173.html",
+}
+
+@Article{Miikkulainen91,
+  author =       "R. Miikkulainen and M. G. Dyer",
+  title =        "Natural language processing with modular {PDP}
+                 networks and distributed lexicon",
+  journal =      "Cognitive Science",
+  volume =       "15",
+  pages =        "343--399",
+  year =         "1991",
+}
+
+@Article{Miller+Sachs83,
+  author =       "M. M. Miller and M. B. Sachs",
+  title =        "Representation of stop consonants in the discharge
+                 patterns of auditory nerve fibers",
+  journal =      jasa,
+  volume =       "74",
+  number =       "2",
+  pages =        "502--517",
+  year =         "1983",
+}
+
+@PhdThesis{miller02,
+  author =       "Erik G. Miller",
+  title =        "Learning from one example in machine vision by sharing
+                 probability densities",
+  school =       "Massachusetts Institute of Technology",
+  year =         "2002",
+}
+
+@PhdThesis{miller02one,
+  author =       "Erik Miller",
+  title =        "Learning from one example in machine vision by sharing
+                 probability densities",
+  school =       "Massachusetts Institute of Technology, Department of
+                 Electrical Engineering and Computer Science",
+  year =         "2002",
+}
+
+@InProceedings{Miller89,
+  author =       "G. F. Miller and P. M. Todd and S. U. Hegde",
+  editor =       "J. D. Schaffer",
+  booktitle =    "Proceedings of the Third International Conference on
+                 Genetic Algorithms",
+  title =        "Designing Neural Networks Using Genetic Algorithms",
+  publisher =    "Morgan Kaufmann, San Mateo",
+  address =      "Arlington 1989",
+  pages =        "379--384",
+  year =         "1989",
+}
+
+@Article{MillerD1996,
+  author =       "David Miller and Kenneth Rose",
+  title =        "Hierarchical, unsupervised learning with growing via
+                 phase transitions",
+  journal =      "Neural Computation",
+  volume =       "8",
+  number =       "2",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA, USA",
+  pages =        "425--450",
+  year =         "1996",
+  ISSN =         "0899-7667",
+}
+
+@Article{Miller-ijprai93,
+  author =       "C. B. Miller and C. L. Giles",
+  title =        "Experimental Comparison of the Effect of Order in
+                 Recurrent Neural Networks",
+  journal =      "Int. Journal of Pattern Recognition and Artificial
+                 Intelligence",
+  pages =        "205--228",
+  year =         "1993",
+  note =         "Special Issue on Applications of Neural Networks to
+                 Pattern Recognition (I. Guyon Ed.)",
+}
+
+@Book{Minc-88,
+  author =       "H. Minc",
+  title =        "Nonnegative Matrices",
+  publisher =    "John Wiley \& Sons",
+  address =      "New York",
+  year =         "1988",
+}
+
+@Book{Minsky67,
+  author =       "M. L. Minsky",
+  title =        "Computation: Finite and Infinite Machines",
+  publisher =    "Prentice-Hall",
+  address =      "Englewood Cliffs",
+  year =         "1967",
+}
+
+@Book{Minsky69,
+  author =       "M. L. Minsky and S. A. Papert",
+  title =        "Perceptrons",
+  publisher =    "MIT Press",
+  address =      "Cambridge",
+  year =         "1969",
+}
+
+@Article{Misra-1997,
+  author =       "Manavendra Misra",
+  title =        "Parallel Environments for Implementing Neural
+                 Networks",
+  journal =      "Neural Computing Surveys",
+  volume =       "1",
+  pages =        "48--60",
+  year =         "1997",
+}
+
+@Article{Mitchison89,
+  author =       "G. J. Mitchison and R. M. Durbin",
+  title =        "Bounds on the Learning Capacity of Some Multi-Layer
+                 Networks",
+  journal =      biocyb,
+  volume =       "60",
+  pages =        "345--356",
+  year =         "1989",
+}
+
+@Article{ML:Bauer:boost,
+  author =       "Eric Bauer and Ron Kohavi",
+  title =        "An empirical comparison of voting classification
+                 algorithms: Bagging, Boosting, and variants",
+  journal =      "Machine Learning",
+  year =         "1998",
+}
+
+@Article{ML:Breiman:bagging,
+  author =       "Leo Breiman",
+  title =        "Bagging Predictors",
+  journal =      "Machine Learning",
+  volume =       "24",
+  number =       "2",
+  pages =        "123--140",
+  year =         "1994",
+}
+
+@Article{ML:Dietterich:adaboost+noise,
+  author =       "Thomas G. Dietterich",
+  title =        "An experimental comparison of three methods for
+                 constructing ensembles of decision trees: Bagging,
+                 Boosting, and randomization",
+  journal =      "submitted to Machine Learning",
+  year =         "1998",
+  note =         "\\available at {\tt
+                 ftp://ftp.cs.orst.edu/pub/tgd/papers/tr-randomized-c4.ps.gz}",
+}
+
+@Article{ML:Schapire:weaklearn,
+  author =       "Robert E. Schapire",
+  title =        "The strength of weak learnability",
+  journal =      "Machine Learning",
+  volume =       "5",
+  number =       "2",
+  pages =        "197--227",
+  year =         "1990",
+}
+
+@Misc{MLJ-model-selection-combination-2001,
+  author =       "Y. Bengio and D. Schuurmans",
+  title =        "Special Issue on New methods for model selection and
+                 model combination",
+  year =         "2002",
+  note =         "{\em Machine Learning}, 48(1)",
+}
+
+@InProceedings{Mnih+Hinton-2007,
+  author =       "Andriy Mnih and Geoffrey E. Hinton",
+  booktitle =    ICML07,
+  editor =       ICML07ed,
+  publisher =    ICML07publ,
+  title =        "Three New Graphical Models for Statistical Language
+                 Modelling",
+  pages =        "641--648",
+  year =         "2007",
+}
+
+@InProceedings{Mnih+Hinton-2007-small,
+  author =       "Andriy Mnih and Geoffrey E. Hinton",
+  booktitle =    "ICML 2007",
+  title =        "Three New Graphical Models for Statistical Language
+                 Modelling",
+  year =         "2007",
+}
+
+@InProceedings{Mnih+Hinton-2009,
+  author =       "Andriy Mnih and Geoffrey E. Hinton",
+  booktitle =    NIPS21,
+  editor =       NIPS21ed,
+  title =        {A Scalable Hierarchical Distributed Language Model},
+  pages =        {1081--1088},
+  year =         "2009",
+}
+
+@InProceedings{mohri-pereira-riley96,
+  author =       "M. Mohri and F. C. N. Pereira and M. D. Riley",
+  booktitle =    "ECAI 96, 12th European Conference on Artificial
+                 Intelligence",
+  title =        "Weighted automata in text and speech processing",
+  pages =        "",
+  year =         "1996",
+}
+
+@Article{Mohri96,
+  author =       "M. Mohri",
+  title =        "Finite-State Transducers in Language and Speech
+                 Processing",
+  journal =      "Computational Linguistics",
+  volume =       "20",
+  number =       "1",
+  pages =        "1--33",
+  year =         "1996",
+}
+
+@InProceedings{Molina02,
+  author =       "A. Molina and F. Pla and E. Segarra and L. Moreno",
+  booktitle =    "{Proceedings of 3rd International Conference on
+                 Language Resources and Evaluation, LREC2002}",
+  title =        "{Word Sense Disambiguation using Statistical Models
+                 and {WordNet}}",
+  address =      "{Las Palmas de Gran Canaria, Spain}",
+  year =         "2002",
+}
+
+@PhdThesis{moller,
+  author =       "M. {Moller}",
+  title =        "Efficient Training of Feed-Forward Neural Networks",
+  school =       "Aarhus University",
+  address =      "Aarhus, Denmark",
+  year =         "1993",
+}
+
+@InProceedings{moller-92,
+  author =       "M. Moller",
+  booktitle =    "Neural Networks for Signal Processing 2",
+  title =        "supervised learning on large redundant training sets",
+  publisher =    "IEEE press",
+  year =         "1992",
+}
+
+@InProceedings{Momma2003,
+  author =       "M. Momma and K. P. Bennett",
+  booktitle =    colt03,
+  title =        "Sparse Kernel Partial Least Squares Regression",
+  year =         "2003",
+}
+
+@InProceedings{Montana89,
+  author =       "D. J. Montana and L. Davis",
+  editor =       "N. S. Sridharan",
+  booktitle =    "Eleventh International Joint Conference on Artificial
+                 Intelligence",
+  title =        "Training Feedforward Networks Using Genetic
+                 Algorithms",
+  publisher =    "Morgan Kaufmann, San Mateo",
+  address =      "Detroit 1989",
+  pages =        "762--767",
+  year =         "1989",
+}
+
+@InProceedings{Moody88,
+  author =       "J. Moody and C. Darken",
+  editor =       "D. Touretzky and G. Hinton and T. Sejnowski",
+  booktitle =    cmss88,
+  title =        "Learning with Localized Receptive Fields",
+  publisher =    "Morgan Kaufmann, San Mateo",
+  address =      "Pittsburg 1988",
+  pages =        "133--143",
+  year =         "1988",
+}
+
+@Article{Moody89,
+  author =       "J. Moody and C. Darken",
+  title =        "Fast Learning in Networks of Locally-Tuned Processing
+                 Units",
+  journal =      nc,
+  volume =       "1",
+  pages =        "281--294",
+  year =         "1989",
+}
+
+@InProceedings{Moody92,
+  author =       "J. E. Moody",
+  editor =       NIPS4ed,
+  booktitle =    NIPS4,
+  title =        "The Effective Number of Parameters: An Analysis of
+                 Generalization and Regularization in Nonlinear Learning
+                 Systems",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo, CA",
+  pages =        "847--854",
+  year =         "1992",
+}
+
+@InProceedings{Moody92b,
+  author =       "J. Moody and J. Utans",
+  editor =       NIPS4ed,
+  booktitle =    NIPS4,
+  title =        "Principled architecture selection for neural networks:
+                 application to corporate bond rating prediction",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo, CA",
+  pages =        "683--690",
+  year =         "1992",
+}
+
+@Article{moody93,
+  author =       "J. Moody and U. Levin and S. Rehfuss",
+  title =        "Predicting the {U.S.} Index of Industrial Production",
+  journal =      "Neural Network World",
+  volume =       "3",
+  number =       "6",
+  pages =        "791--794",
+  year =         "1993",
+}
+
+@InCollection{Moody94,
+  author =       "J. Moody",
+  booktitle =    "From Statistics to Neural Networks: Theory and Pattern
+                 Recognition Applications",
+  title =        "Prediction Risk and Architecture Selection for Neural
+                 Networks",
+  publisher =    "Springer",
+  year =         "1994",
+}
+
+@InCollection{Moody98,
+  author =       "J. Moody",
+  editor =       "G. B. Orr and K-R. Muller",
+  booktitle =    "Neural Networks: Tricks of he Trade",
+  title =        "Forecasting the economy with neural nets: a survey of
+                 challenges",
+  publisher =    "Springer",
+  pages =        "347--372",
+  year =         "1998",
+}
+
+@InProceedings{Moore88,
+  author =       "B. Moore",
+  editor =       "D. Touretzky and G. Hinton and T. Sejnowski",
+  booktitle =    cmss88,
+  title =        "{ART}1 and Pattern Clustering",
+  publisher =    "Morgan Kaufmann, San Mateo",
+  address =      "Pittsburg 1988",
+  pages =        "174--185",
+  year =         "1988",
+}
+
+@InProceedings{MoosmannF2007,
+  author =       "Frank Moosmann and Bill Triggs and Frederic Jurie",
+  editor =       NIPS19ed,
+  booktitle =    NIPS19ed,
+  title =        "Fast Discriminative Visual Codebooks using Randomized
+                 Clustering Forests",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  pages =        "985--992",
+  year =         "2007",
+}
+
+@InCollection{More+Wu-1996,
+  author =       "Jorge More and Zhijun Wu",
+  editor =       "G. Di Pillo and F. Giannessi",
+  booktitle =    "Nonlinear Optimization and Applications",
+  title =        "Smoothing techniques for macromolecular global
+                 optimization",
+  publisher =    "Plenum Press",
+  year =         "1996",
+}
+
+@InProceedings{Morgan+Bourlard90b,
+  author =       "N. Morgan and H. Bourlard",
+  editor =       NIPS2ed,
+  booktitle =    NIPS2,
+  title =        "Generalization and parameter estimation in feedforward
+                 nets: some experiments",
+  publisher =    "Morgan Kaufmann",
+  address =      "Denver, CO",
+  pages =        "413--416",
+  year =         "1990",
+}
+
+@InProceedings{Morgan90,
+  author =       "N. Morgan and H. Bourlard",
+  booktitle =    icassp,
+  title =        "Continuous Speech Recognition Using Multilayer
+                 Perceptrons with Hidden {Markov} Models",
+  address =      "Albuquerque, NM",
+  pages =        "413--416",
+  year =         "1990",
+}
+
+@InProceedings{Morgan93,
+  author =       "M. Cohen and H. Franco and N. Morgan and D. Rumelhart
+                 and V. Abrash",
+  editor =       NIPS5ed,
+  booktitle =    NIPS5,
+  title =        "Context-Dependent Multiple Distribution Phonetic
+                 Modeling with {MLP}s",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo CA",
+  pages =        "649--657",
+  year =         "1993",
+}
+
+@InProceedings{Morgan95,
+  author =       "N. Morgan and Y. Konig and S. L. Wu and H. Bourlard",
+  booktitle =    "Proceedings of IEEE Automatic Speech Recognition
+                 Workshop (Snowbird)",
+  title =        "Transition-based Statistical Training for {ASR}",
+  pages =        "133--134",
+  year =         "1995",
+}
+
+@InProceedings{Morin+Bengio-2005,
+  author =       "Fr\'ed\'eric Morin and Yoshua Bengio",
+  editor =       aistats05ed,
+  booktitle =    aistats05,
+  title =        "Hierarchical Probabilistic Neural Network Language
+                 Model",
+  publisher =    "",
+  date =         "Jan 6-8, 2005",
+  location =     "Savannah Hotel, Barbados",
+  pages =        "246--252",
+  year =         "2005",
+}
+
+@Article{Mosesova-2006,
+  author =       "S. A. Mosesova and H. A. Chipman and R. J. MacKay and
+                 S. H. Steiner",
+  title =        "Profile monitoring using mixed effects models",
+  journal =      "Submitted to Technometrics",
+  year =         "2006",
+}
+
+@Article{MosesY1996,
+  author =       "Y. Moses and S. Ullman and S. Edelman",
+  title =        "Generalization to novel images in upright and inverted
+                 faces",
+  journal =      "Perception",
+  volume =       "25",
+  number =       "4",
+  pages =        "443--461",
+  year =         "1996",
+  OPTannote =    "",
+  OPTkey =       "",
+  OPTmonth =     "",
+  OPTnote =      "",
+}
+
+@Article{Movellan-2002,
+  author =       "Javier R. Movellan and Paul Mineiro and R. J. Williams",
+  title =        "A Monte-Carlo {EM} approach for partially observable
+                 diffusion processes: theory and applications to neural
+                 networks",
+  journal =      "Neural Computation",
+  volume =       "14",
+  pages =        "1501--1544",
+  year =         "2002",
+}
+
+@TechReport{Movelland+McClelland91,
+  author =       "Javier R. Movellan and James L. McClelland",
+  title =        "Learning Continuous Probability Distributions with the
+                 Contrastive {Hebbian} Algorithm",
+  number =       "PDP.CNS.91.2",
+  institution =  "Carnegie Mellon University, Dept. of Psychology",
+  address =      "Pittsburgh, PA",
+  year =         "1991",
+}
+
+@InCollection{Mozer+Smolensky89,
+  author =       "M. C. Mozer and P. Smolensky",
+  editor =       NIPS1ed,
+  booktitle =    NIPS1,
+  title =        "Skeletonization: {A} technique for trimming the fat
+                 from a network via relabance assessment",
+  publisher =    "Morgan Kaufmann",
+  pages =        "107--115",
+  year =         "1989",
+}
+
+@InProceedings{Mozer-nips92,
+  author =       "M. C. Mozer",
+  editor =       NIPS4ed,
+  booktitle =    NIPS4,
+  title =        "The induction of Multiscale Temporal Structure",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo, CA",
+  pages =        "275--282",
+  year =         "1992",
+}
+
+@Article{mozer-smolensky-89,
+  author =       "M. C. Mozer and P. Smolensky",
+  key =          "Mozer",
+  title =        "Using relevance to reduce network size automatically",
+  journal =      "Connection Science",
+  volume =       "1",
+  number =       "1",
+  pages =        "3--16",
+  year =         "1989",
+}
+
+@Article{Mozer-trnn2000,
+  author =       "M. C. Mozer and R. Wolniewicz and D. B. Grimes and E.
+                 Johnson and H. Kaushansky",
+  title =        "Predicting Subscriber Dissatisfaction and Improving
+                 Retention in the Wireless Telecommunications Industry",
+  journal =      "IEEE Transactions on Neural Networks, special issue on
+                 Data Mining and Knowledge Discovery",
+  volume =       "11",
+  number =       "3",
+  year =         "2000",
+}
+
+@Article{Mozer89,
+  author =       "M. C. Mozer",
+  title =        "A Focused Back-Propagation Algorithm for Temporal
+                 Pattern Recognition",
+  journal =      cs,
+  volume =       "3",
+  pages =        "349--381",
+  year =         "1989",
+}
+
+@InCollection{Mozer93,
+  author =       "M. C. Mozer",
+  editor =       "A. Weigend and N. Gershenfeld",
+  booktitle =    "Predicting the Future and Understanding the Past",
+  title =        "Neural net architectures for temporal sequence
+                 processing",
+  publisher =    "Addison-Wesley",
+  address =      "Redwood City, CA",
+  pages =        "243--264",
+  year =         "1993",
+}
+
+@TechReport{MPIforum,
+  author =       "Jack Dongarra and David Walker and {The Message
+                 Passing Interface Forum}",
+  title =        "{MPI}: {A} Message Passing Interface Standard",
+  number =       "http://www-unix.mcs.anl.gov/mpi",
+  institution =  "University of Tenessee",
+  year =         "1995",
+}
+
+@Article{multidimensional-FGS-83,
+  author =       "J. H. Friedman and E. Grosse and W. Suetzle",
+  title =        "Multidimensional additive spline approximation",
+  journal =      "SIAM Journal of Scientific and Statistical Computing",
+  volume =       "4",
+  number =       "2",
+  pages =        "291--301",
+  year =         "1983",
+}
+
+@InProceedings{Munro87,
+  author =       "P. Munro",
+  booktitle =    "The Ninth Annual Conference of the Cognitive Science
+                 Society",
+  title =        "A Dual Back-Propagation Scheme for Scalar Reward
+                 Learning",
+  publisher =    "Lawrence Erlbaum, Hillsdale",
+  address =      "Seattle 1987",
+  pages =        "165--176",
+  year =         "1987",
+}
+
+@InProceedings{MurraySal09,
+author=         "Iain Murray and Ruslan Salakhutdinov",
+title=          "Evaluating probabilities under high-dimensional latent variable models",
+editor =        NIPS21ed,
+booktitle=      NIPS21,
+volume=         "21",
+pages =         "1137--1144",
+year=           "2009"
+}
+
+@InProceedings{Murveit93,
+  author =       "H. Murveit and J. Butzberger and V. Digilakis and M.
+                 Weintraub",
+  booktitle =    icassp,
+  title =        "Large-vocabulary dictation using {SRI}'s {DECIPHER}
+                 speech recognition system: Progressive search
+                 techniques knowledge for continuous speech
+                 recognition",
+  address =      "Minneapolis, Minnesota",
+  pages =        "319--322",
+  year =         "1993",
+}
+
+@Article{Muselli97,
+  author =       "M. Muselli",
+  title =        "On convergence properties of pocket algorithm",
+  journal =      "IEEE Transactions on Neural Networks",
+  volume =       "8",
+  pages =        "623--629",
+  year =         "1997",
+}
+
+@article{Mutch-Lowe-2008,
+ author = {Jim Mutch and David G. Lowe}, 
+ title = {Object class recognition and localization using sparse features with limited receptive fields}, 
+ journal = {International Journal of Computer Vision}, 
+ volume = 80, 
+ number = 1,
+ year = 2008, 
+ pages = {45--57},
+}
+
+@Article{myles90multiclass,
+  author =       "J. Myles and D. Hand",
+  title =        "The Multi-Class Measure Problem in Nearest Neighbour
+                 Discrimination Rules",
+  journal =      "Pattern Recognition",
+  volume =       "23",
+  pages =        "1291--1297",
+  year =         "1990",
+}
+
+@Article{Nadal86,
+  author =       "J.-P. Nadal and J.-P. Changeux G. Toulouse and S.
+                 Dehaene",
+  title =        "Networks of Formal Neurons and Memory Palimpsests",
+  journal =      eul,
+  volume =       "1",
+  pages =        "535--542",
+  year =         "1986",
+}
+
+@Article{Nadaraya64,
+  author =       "E. A. Nadaraya",
+  title =        "On estimating regression",
+  journal =      "Theory of Probability and its Applications",
+  volume =       "9",
+  pages =        "141--142",
+  year =         "1964",
+}
+
+@Article{Nadaraya65,
+  author =       "E. A. Nadaraya",
+  title =        "On nonparametric estimates of density functions and
+                 regression curves",
+  journal =      "Theory of Applied Probability",
+  volume =       "10",
+  pages =        "186--190",
+  year =         "1965",
+}
+
+@Article{Nadas85,
+  author =       "Arthur Nádas",
+  title =        "On {Turing's} Formula for Word Probabilities",
+  journal =      "IEEE Transactions on Acoustics, Speech, and Signal
+                 Processing",
+  volume =       "33",
+  number =       "6",
+  pages =        "1415--1417",
+  month =        dec,
+  year =         "1985",
+  copy =         yes,
+}
+
+@Article{Nadas85-small,
+  author =       "Arthur Nádas",
+  title =        "On {Turing's} Formula for Word Probabilities",
+  journal =      "ASSP",
+  volume =       "33",
+  number =       "6",
+  pages =        "1415--1417",
+  month =        dec,
+  year =         "1985",
+  copy =         yes,
+}
+
+@Article{Nadas88,
+  author =       "A. Nadas and D. Nahamoo and M. A. Picheny",
+  title =        "On a model-robust training method for speech
+                 recognition",
+  journal =      "IEEE Transactions on Acoustics, Speech and Signal
+                 Processing",
+  volume =       "ASSP-36",
+  number =       "9",
+  pages =        "1432--1436",
+  year =         "1988",
+}
+
+@Article{Nadeau-Bengio-2003,
+  author =       "Claude Nadeau and Yoshua Bengio",
+  title =        "Inference for the Generalization Error",
+  journal =      "Machine Learning",
+  volume =       "52",
+  number =       "3",
+  pages =        "239--281",
+  year =         "2003",
+}
+
+@Article{Nadeau-Bengio-2003-small,
+  author =       "Claude Nadeau and Yoshua Bengio",
+  title =        "Inference for the Generalization Error",
+  journal =      "Machine Learning",
+  volume =       "52(3)",
+  pages =        "239--281",
+  year =         "2003",
+}
+
+@InProceedings{Nadeau00-nips,
+  author =       "Claude Nadeau and Yoshua Bengio",
+  editor =       NIPS12ed,
+  booktitle =    NIPS12,
+  title =        "Inference for the Generalization Error",
+  publisher =    "MIT Press",
+  pages =        "307--313",
+  year =         "2000",
+}
+
+@InProceedings{Bonneville+al-1998,
+  author =       "M. Bonneville and J. Meunier and Y. Bengio and J.P. Soucy",
+  booktitle =    "SPIE Medical Imaging 1998",
+  title =        "Support Vector Machines for Improving the classification of Brain Pet Images",
+  address =      "San Diego",
+  year =         "1998",
+}
+
+@TechReport{Nadeau99-TR,
+  author =       "Claude Nadeau and Yoshua Bengio",
+  title =        "Inference for the Generalization Error",
+  institution =  "CIRANO",
+  address =      "Montreal, Quebec, Canada",
+  year =         "1999",
+}
+
+@InProceedings{nag86,
+  author =       "R. Nag and K. H. Wong and F. Fallside",
+  booktitle =    icassp,
+  title =        "Script recognition using hidden {Markov} models",
+  address =      "Tokyo",
+  pages =        "2071--2074",
+  year =         "1986",
+}
+
+@MastersThesis{Nahm-2005,
+ author = {E. Nahm},
+ title = {Classification models for transactional graph data},
+ school = {Department of Mathematics and Statistics, Acadia University},
+ year = 2005,
+}
+
+@article{Naka-Rushton-1966a,
+ author = {K.I. Naka and W.A.H. Rushton},
+ year = 1966,
+ title = {{S}-potentials from colour units in the retina of fish (Cyprinidae)},
+ journal = {J. Physiol.}, 
+ volume = 185, 
+ pages = {536-–555},
+}
+
+@article{Naka-Rushton-1966b,
+ author = {K.I. Naka and W.A.H. Rushton},
+ year = 1966,
+ title = {An attempt to analyse colour perception by electrophysiology},
+ journal = {J. Physiol.}, 
+ volume = 185, 
+ pages = {556–586},
+}
+
+
+@InProceedings{NakagawaT04,
+  author =       "Tetsuji Nakagawa and Taku Kudoh and Yuji Matsumoto",
+  booktitle =    "Proceedings of the Sixth Natural Language Processing
+                 Pacific Rim Symposium",
+  title =        "Unknown Word Guessing and Part-of-Speech Tagging Using
+                 Support Vector Machines",
+  address =      "Tokyo, Japan",
+  pages =        "325--331",
+  year =         "2001",
+}
+
+@Article{Naradraya70,
+  author =       "E. A. Nadaraya",
+  title =        "Remarks on nonparametric estimates for density
+                 functions and regression curves",
+  journal =      "Theory of Probability and its Applications",
+  volume =       "15",
+  pages =        "134--137",
+  year =         "1970",
+}
+
+@Book{Narendra89,
+  author =       "K. Narendra and M. A. L. Thathachar",
+  title =        "Learning Automata: An Introduction",
+  publisher =    "Prentice-Hall",
+  address =      "Englewood Cliffs",
+  year =         "1989",
+}
+
+@Book{narendra:1989,
+  author =       "K. S. Narendra and M. A. L. Thathachar",
+  title =        "Learning Automata: an introduction",
+  publisher =    "Prentice Hall",
+  year =         "1989",
+}
+
+@Article{Nasrabadi88a,
+  author =       "N. M. Nasrabadi and R. A. King",
+  title =        "Image Coding Using Vector Quantization: {A} Review",
+  journal =      ieeetcomm,
+  volume =       "36",
+  pages =        "957--971",
+  year =         "1988",
+}
+
+@InProceedings{Nasrabadi88b,
+  author =       "N. M. Nasrabadi and Y. Feng",
+  booktitle =    icnn,
+  title =        "Vector Quantization of Images Based upon the Kohonen
+                 Self-Organizing Feature Maps",
+  volume =       "1",
+  publisher =    "IEEE, New York",
+  address =      "San Diego 1988",
+  pages =        "101--108",
+  year =         "1988",
+}
+
+@Article{Nass75,
+  author =       "M. M. Nass and L. N. Cooper",
+  title =        "A Theory for the Development of Feature Detecting
+                 Cells in Visual Cortex",
+  journal =      biocyb,
+  volume =       "19",
+  pages =        "1--18",
+  year =         "1975",
+}
+
+@Article{Naylor88,
+  author =       "J. Naylor and K. P. Li",
+  title =        "Analysis of a Neural Network Algorithm for Vector
+                 Quantization of Speech Parameters",
+  journal =      nnsupp,
+  volume =       "1",
+  pages =        "310",
+  year =         "1988",
+}
+
+@Article{NC:Baldi93,
+  author =       "P. Baldi and Y. Chauvin",
+  title =        "Neural Networks for Fingerprint Recognition",
+  journal =      "Neural Computation",
+  volume =       "5",
+  type =         "Letter",
+  number =       "3",
+  pages =        "402--418",
+  year =         "1993",
+}
+
+@Article{nc:Geman+Bienenstock+Doursat:1992,
+  author =       "S. Geman and E. Bienenstock and R. Doursat",
+  title =        "Neural Networks and the Bias/Variance Dilemma",
+  journal =      "Neural Computation",
+  volume =       "4",
+  type =         "View",
+  number =       "1",
+  pages =        "1--58",
+  year =         "1992",
+}
+
+@Article{nc:Poggio+Girosi:1998,
+  author =       "Tomaso Poggio and Frederico Girosi",
+  title =        "A Sparse Representation for Function Approximation",
+  journal =      "Neural Computation",
+  volume =       "10",
+  number =       "6",
+  pages =        "1445--1454",
+  year =         "1998",
+}
+
+@TechReport{Neal-GP97,
+  author =       "Radford M. Neal",
+  title =        "Monte Carlo implementation of {G}aussian process models
+                 for {Bayesian} regression and classification",
+  number =       "9702",
+  institution =  "University of Toronto, Department of Statistics",
+  year =         "1997",
+}
+
+@Article{Neal92,
+  author =       "Radford M. Neal",
+  title =        "Connectionist learning of belief networks",
+  journal =      "Artificial Intelligence",
+  volume =       "56",
+  pages =        "71--113",
+  year =         "1992",
+}
+
+@InProceedings{Neal93a,
+  author =       "Radford M. Neal",
+  editor =       NIPS5ed,
+  booktitle =    NIPS5,
+  title =        "Bayesian learning via stochastic dynamics",
+  address =      "Denver, CO",
+  pages =        "475--482",
+  year =         "1993",
+}
+
+@TechReport{Neal93b,
+  author =       "Radford M. Neal",
+  title =        "Probabilistic inference using {Markov} chain
+                 {Monte-Carlo} methods",
+  number =       "{CRG-TR}-93-1",
+  institution =  "Dept. of Computer Science, University of Toronto",
+  year =         "1993",
+}
+
+@PhdThesis{Neal94,
+  author =       "Radford M. Neal",
+  title =        "Bayesian Learning for Neural Networks",
+  school =       "Dept. of Computer Science, University of Toronto",
+  year =         "1994",
+}
+
+@TechReport{Neal94b,
+  author =       "Radford M. Neal",
+  title =        "Sampling from Multimodal Distributions Using Tempered Transitions",
+  number =       "9421",
+  institution =  "Dept. of Statistics, University of Toronto",
+  year =         "1994",
+}
+
+@InCollection{Neal98,
+  author =       "Radford M. Neal",
+  editor =       "C. M. Bishop",
+  booktitle =    "Neural Networks and Machine Learning",
+  title =        "Assessing relevance determination methods using
+                 {DELVE}",
+  publisher =    "Springer-Verlag",
+  pages =        "97--129",
+  year =         1998,
+}
+
+@Misc{neal98assessing,
+  author =       "Radford M. Neal",
+  title =        "Assessing Relevance Determination Methods Using
+                 {DELVE} Generalization in Neural Networks and Machine
+                 Learning",
+  year =         "1998",
+  text =         "Neal, R. N. (1998). Assessing Relevance Determination
+                 Methods Using DELVE Generalization in Neural Networks
+                 and Machine Learning, C. M. Bishop (editor),
+                 SpringerVerlag.",
+}
+
+@article{Neal-2001,
+  author =      "Radford M. Neal",
+  journal =     "Statistics and Computing",
+  month =       "April",
+  number =      "2",
+  pages =       "125--139",
+  title =       "Annealed importance sampling",
+  url =         "http://dx.doi.org/10.1023/A:1008923215028",
+  volume =      "11",
+  year =        "2001"
+}
+
+@Article{Needleman+Wunsch70,
+  author =       "S. B. Needleman and C. D. Wunsch",
+  title =        "A general method applicable to the search of
+                 similarities in the amino acid sequence of two
+                 proteins",
+  journal =      "Journal of Molecular Biology",
+  volume =       "48",
+  pages =        "443--453",
+  year =         "1970",
+}
+
+@Article{NeweyWest1987,
+  author =       "W. Newey and K. West",
+  title =        "A Simple, Positive Semi-Definite, Heteroscedasticity
+                 and Autocorrelation Consistent Covariance Matrix",
+  journal =      "Econometrica",
+  volume =       "55",
+  pages =        "703--708",
+  year =         "1987",
+}
+
+@InProceedings{Ney+Kneser93,
+  author =       "Hermann Ney and Reinhard Kneser",
+  booktitle =    "European Conference on Speech Communication and
+                 Technology (Eurospeech)",
+  title =        "Improved clustering techniques for class-based
+                 statistical language modelling",
+  address =      "Berlin",
+  pages =        "973--976",
+  year =         "1993",
+}
+
+@Article{Ney92,
+  author =       "H. Ney and D. Mergel and A. Noll and A. Paesler",
+  title =        "Data driven search organization for continuous speech
+                 recognition",
+  journal =      "IEEE Transactions on Signal Processing",
+  volume =       "40",
+  number =       "2",
+  pages =        "272--281",
+  month =        feb,
+  year =         "1992",
+}
+
+@InProceedings{Ng1996,
+  author =       "Hwee Tou Ng and Hian Beng Lee",
+  editor =       "Arivind Joshi and Martha Palmer",
+  booktitle =    "Proceedings of the Thirty-Fourth Annual Meeting of the
+                 Association for Computational Linguistics",
+  title =        "Integrating Multiple Knowledge Sources to Disambiguate
+                 Word Sense: An Exemplar-Based Approach",
+  publisher =    "Morgan Kaufmann Publishers",
+  address =      "San Francisco",
+  pages =        "40--47",
+  year =         "1996",
+  URL =          "citeseer.nj.nec.com/ng96integrating.html",
+}
+
+@InProceedings{Ng1997,
+  author =       "Hwee Tou Ng",
+  booktitle =    SIGLEX97,
+  title =        "Getting Serious about Word Sense Disambiguation",
+  address =      "Washington",
+  pages =        "1--7",
+  year =         "1997",
+}
+
+@InProceedings{Ng2002,
+  author =       "Andrew Y. Ng and Michael I. Jordan and Yair Weiss",
+  editor =       NIPS14ed,
+  booktitle =    NIPS14,
+  title =        "On Spectral Clustering: analysis and an algorithm",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  year =         "2002",
+  original =     "orig/AA35.ps",
+}
+
+@InProceedings{Ng2008,
+  author =       "Honglak Lee and Ekanadham Chaitanya and Andrew Y. Ng",
+  editor =       NIPS20ed,
+  booktitle =    NIPS20,
+  title =        "Sparse deep belief net model for visual area {V2}",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  year =         "2008",
+}
+
+@InProceedings{NgJ02,
+  author =       "Andrew Y. Ng and Michael I. Jordan",
+  booktitle =    NIPS14,
+  editor =       NIPS14ed,
+  title =        {On Discriminative vs. Generative Classifiers: A
+                 comparison of logistic regression and naive Bayes},
+  pages =        "841--848",
+  year =         "2002",
+}
+
+%%Fred I deprecate the following as the tag name have the year of the conf and not of the papers!
+@InProceedings{NgJ01,
+  author =       "Andrew Y. Ng and Michael I. Jordan",
+  booktitle =    NIPS14,
+  editor =       NIPS14ed,
+  title =        {On Discriminative vs. Generative Classifiers: A
+                 comparison of logistic regression and naive Bayes},
+  pages =        "841--848",
+  year =         "2002",
+}
+
+@InProceedings{Nie99,
+  author =       "J. Y. Nie and M. Simard and P. Isabelle and R.
+                 Durand",
+  booktitle =    "22nd ACM-SIGIR",
+  title =        "Cross-Language Information Retrieval based on Parallel
+                 Texts and Automatic Mining of Parallel Texts in the
+                 Web",
+  address =      "Berkeley",
+  pages =        "74--81",
+  year =         "1999",
+}
+
+@INPROCEEDINGS{Niebles+Fei-Fei-2007,
+  AUTHOR =       "Niebles, J.C. and Fei-Fei, L.",
+  TITLE =        "A hierarchical model of shape and appearance for human action classification. ",
+  BOOKTITLE =    cvpr07,
+  YEAR =         "2007",
+}
+
+@Article{Nielsen96,
+  author =       "H. Nielsen and J. Engelbrecht and G. {von Heijne} and
+                 S. Brunak",
+  title =        "Defining a similarity threshold for a functional
+                 protein sequence pattern: the signal peptide cleavage
+                 site",
+  journal =      "Proteins",
+  pages =        "316--320",
+  year =         "1996",
+  volme =        "24",
+}
+
+@Article{Nielsen97,
+  author =       "H. Nielsen and J. Engelbrecht and S. Brunak and G.
+                 {von Heijne}",
+  title =        "Identification of prokaryotic and eukaryotic signal
+                 peptides and prediction of their cleavage sites",
+  journal =      "Prot. Eng.",
+  pages =        "1--6",
+  year =         "1997",
+  volme =        "10",
+}
+
+@InProceedings{Niesler98,
+  author =       "T. R. Niesler and E. W. D. Whittaker and P. C.
+                 Woodland",
+  booktitle =    icassp,
+  title =        "Comparison of part-of-speech and automatically derived
+                 category-based language models for speech recognition",
+  pages =        "177--180",
+  year =         "1998",
+}
+
+@InProceedings{Niles90,
+  author =       "L. T. Niles and H. F. Silverman",
+  booktitle =    icassp,
+  title =        "Combining Hidden {Markov} Models and Neural Network
+                 Classifiers",
+  address =      "Albuquerque, NM",
+  pages =        "417--420",
+  year =         "1990",
+}
+
+@Book{Nilsson-65,
+  author =       "N. J. Nilsson",
+  title =        "Learning Machines",
+  publisher =    "McGraw-Hill",
+  address =      "New York",
+  year =         "1965",
+}
+
+@Book{Nilsson-71,
+  author =       "N. J. Nilsson",
+  title =        "Problem-Solving Methods in Artificial Intelligence",
+  publisher =    "McGraw-Hill",
+  address =      "New York",
+  year =         "1971",
+}
+
+@InProceedings{nips-10:Baxter+Bartlett:1998,
+  author =       "Jonathan Baxter and Peter Bartlett",
+  editor =       NIPS10ed,
+  booktitle =    NIPS10,
+  title =        "The Canonical Distortion Measure in Feature Space and
+                 1-{NN} Classification",
+  publisher =    "MIT Press",
+  year =         "1998",
+}
+
+@InProceedings{nips-10:Holger+Yoshua:1998,
+  author =       "Holger Schwenk and Yoshua Bengio",
+  editor =       NIPS10ed,
+  booktitle =    NIPS10,
+  title =        "Training Methods for Adaptive Boosting of Neural
+                 Networks",
+  publisher =    "MIT Press",
+  pages =        "647--653",
+  year =         "1998",
+}
+
+@InProceedings{nips-6:Perrone:1994,
+  author =       "Michael P. Perrone",
+  editor =       NIPS6ed,
+  booktitle =    NIPS6,
+  title =        "Putting It All Together: Methods for Combining Neural
+                 Networks",
+  publisher =    "Morgan Kaufmann Publishers, Inc.",
+  pages =        "1188--1189",
+  year =         "1994",
+}
+
+@InProceedings{nips-9:Burges+Schoelkopf:1997,
+  author =       "Chris J. C. Burges and B. Sch{\"o}lkopf",
+  editor =       NIPS9ed,
+  booktitle =    NIPS9,
+  title =        "Improving the Accuracy and Speed of Support Vector
+                 Machines",
+  publisher =    "MIT Press",
+  pages =        "375",
+  year =         "1997",
+}
+
+@InProceedings{nips02-LT09,
+  author =       "G. Lebanon and J. Lafferty",
+  editor =       NIPS14ed,
+  booktitle =    NIPS14,
+  title =        "Boosting and Maximum Likelihood for Exponential
+                 Models",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  year =         "2002",
+  original =     "orig/LT09.ps",
+}
+
+@InCollection{NIPS2005-207,
+  author =       "Jian Zhang and Zoubin Ghahramani and Yiming Yang",
+  editor =       NIPS18ed,
+  booktitle =    NIPS18,
+  title =        "Learning Multiple Related Tasks using Latent
+                 Independent Component Analysis",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  pages =        "1587--1594",
+  year =         "2006",
+}
+
+@InCollection{NIPS2007-812-small,
+  author =       "Nicolas Chapados and Yoshua Bengio",
+  booktitle =    "NIPS 20",
+  title =        "Augmented Functional Time Series Representation and
+                 Forecasting with {G}aussian Processes",
+  pages =        "265--272",
+  year =         "2008",
+}
+
+@InCollection{NIPS2007-925-small,
+  author =       "Nicolas {Le Roux} and Yoshua Bengio and Pascal Lamblin
+                 and Marc Joliveau and Balazs Kegl",
+  booktitle =    "NIPS 20",
+  title =        "Learning the 2-{D} Topology of Images",
+  pages =        "841--848",
+  year =         "2008",
+}
+
+@InProceedings{NIPS8:Drucker:AdaBoost-Trees,
+  author =       "Harris Drucker and Corinna Cortes",
+  editor =       NIPS8ed,
+  booktitle =    NIPS8,
+  title =        "Boosting decision trees",
+  publisher =    "MIT Press",
+  pages =        "479--485",
+  year =         "1996",
+}
+
+@InProceedings{NIPS8:Hofmann-Tresp,
+  author =       "Reimar Hofmann and Volker Tresp",
+  editor =       NIPS8ed,
+  booktitle =    NIPS8,
+  title =        "Discovering structure in continuous variables using
+                 {Bayesian} networks",
+  publisher =    "MIT Press",
+  pages =        "500--506",
+  year =         "1996",
+}
+
+@InProceedings{NIPS9:Monti-Cooper,
+  author =       "Stefano Monti and Gregory F. Cooper",
+  editor =       NIPS9ed,
+  booktitle =    NIPS9,
+  title =        "Learning {Bayesian} belief networks with neural
+                 network estimators",
+  publisher =    "MIT Press",
+  pages =        "578--584",
+  year =         "1997",
+}
+
+@Article{Niranjan90,
+  author =       "M. Niranjan and F. Fallside",
+  title =        "Neural Networks and Radial Basis Functions in
+                 Classifying Static Speech Patterns",
+  journal =      cspla,
+  volume =       "4",
+  pages =        "275--289",
+  year =         "1990",
+}
+
+@Article{Nishimori90,
+  author =       "H. Nishimori and T. Nakamura and M. Shiino",
+  title =        "Retrieval of Spatio-Temporal Sequence in Asynchronous
+                 Neural Network",
+  journal =      prA,
+  volume =       "41",
+  pages =        "3346--3354",
+  year =         "1990",
+}
+
+@book{Nixon+Aguado+2007,
+    author = {Nixon, M. S.  and Aguado, A. S. },
+    publisher = {Academic Press},
+    edition = 2,
+    title = {Feature Extraction and Image Processing},
+    year = {2007}
+}
+
+@Article{nonparametric-LZ-95,
+  author =       "G. Lugosi and K. Xeger",
+  title =        "Nonparametric Estimation via Empirical Risk
+                 Minimization",
+  journal =      "IEEE Trans. on Information Theory",
+  volume =       "41",
+  number =       "3",
+  pages =        "677--687",
+  year =         "1995",
+}
+
+@Article{nonparametric-SK-96,
+  author =       "M. Smith and R. Kohn",
+  title =        "Nonparametric regression using {Bayesian} variable
+                 selection",
+  journal =      "J.Econometrics",
+  volume =       "75",
+  pages =        "317--344",
+  year =         "1996",
+}
+
+@InProceedings{nonparametric-W-91,
+  author =       "H. White",
+  booktitle =    "Proceedings of 23rd Symposium on the Interface,
+                 Computer Science and Statistics",
+  title =        "Nonparametric Estimation of Conditional Quantiles
+                 Using Neural Networks",
+  publisher =    "New-York: Springer-Verlag",
+  pages =        "190--199",
+  year =         "1992",
+}
+
+@Article{NordStrom,
+  author =       "T. Nordstrom and B. Svensson",
+  title =        "Using and Designing Massively Parallel Computers for
+                 Artificial Neural Networks",
+  journal =      "Journal of Parallel and Distributed Computing",
+  volume =       "3",
+  number =       "14",
+  pages =        "260--285",
+  year =         "1992",
+  OPTnote =      "",
+}
+
+@Article{Normandin94,
+  author =       "Y. Normandin and R. Cardin and R. {DeMori}",
+  title =        "High-performance connected digit recognition using
+                 maximum mutual information estimation",
+  journal =      "Transactions on Speech and Audio Processing",
+  volume =       "2",
+  number =       "2",
+  pages =        "299--311",
+  year =         "1994",
+}
+
+@InProceedings{Nowlan-nips90,
+  author =       "S. J. Nowlan",
+  editor =       NIPS2ed,
+  booktitle =    NIPS2,
+  title =        "Maximum Likelihood Competitive Learning",
+  publisher =    "Morgan Kaufman Publishers",
+  address =      "San Mateo, CA",
+  pages =        "574--582",
+  year =         "1990",
+}
+
+@InProceedings{Nowlan-nips92,
+  author =       "S. J. Nowlan and G. E. Hinton",
+  editor =       NIPS4ed,
+  booktitle =    NIPS4,
+  title =        "Adaptive Soft Weight Tying using {G}aussian Mixtures",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo, CA",
+  pages =        "993--1000",
+  year =         "1992",
+}
+
+@PhdThesis{Nowlan-PhD,
+  author =       "S. J. Nowlan",
+  title =        "Soft Competitive Adaptation: Neural Network Learning
+                 Algorithms based on Fitting Statistical Mixtures",
+  type =         "{C}{M}{U}-{C}{S}-91-126",
+  school =       "School of Computer Science, Carnegie Mellon
+                 University",
+  address =      "Pittsburgh, PA",
+  month =        apr # " 14",
+  year =         "1991",
+}
+
+@Article{Nowlan88,
+  author =       "S. J. Nowlan",
+  title =        "Gain Variation in Recurrent Error Propagation
+                 Networks",
+  journal =      cs,
+  volume =       "2",
+  pages =        "305--320",
+  year =         "1988",
+}
+
+@TechReport{Nowlan90,
+  author =       "S. J. Nowlan",
+  key =          "Nowlan",
+  title =        "Competing Experts: {An} experimental investigation of
+                 associative mixture models",
+  type =         "Technical Report",
+  number =       "CRG-TR-90-5",
+  institution =  "University of Toronto",
+  year =         "1990",
+  annote =       "In CRG Library",
+}
+
+@Article{Nowlan92,
+  author =       "S. J. Nowlan and G. E. Hinton",
+  title =        "Simplifying Neural Networks by Soft Weight-Sharing",
+  journal =      "Neural Computation",
+  volume =       "4",
+  type =         "Letter",
+  number =       "4",
+  pages =        "473--493",
+  year =         "1992",
+}
+
+@InProceedings{nsvnijcnn,
+  author =       "Pascal Vincent and Yoshua Bengio",
+  booktitle =    ijcnn,
+  title =        "A Neural Support Vector Network Architecture with
+                 Adaptive Kernels",
+  volume =       "5",
+  pages =        "5187--5192",
+  year =         "2000",
+}
+
+@Book{NumOptBook,
+  author =       "J. Nocedal and S. Wright",
+  title =        "Numerical Optimization",
+  publisher =    "Springer",
+  year =         "2006",
+}
+
+@Article{Nystrom-1928,
+  author =       "E. J. Nystr{\"o}m",
+  title =        "{\"{U}}ber die praktische aufl{\"o}sung von linearen
+                 integralgleichungen mit anwendungen auf
+                 randwertaufgaben der potentialtheorie",
+  journal =      "Commentationes Physico-Mathematicae",
+  volume =       "4",
+  number =       "15",
+  pages =        "1--52",
+  year =         "1928",
+}
+
+@Book{O'Shaughnessy87,
+  author =       "D. O'Shaughnessy",
+  title =        "Speech Communication --- Human and Machine",
+  publisher =    "Addison-Wesley",
+  year =         "1987",
+}
+
+@Article{Oja82,
+  author =       "E. Oja",
+  title =        "A Simplified Neuron Model As a Principal Component
+                 Analyzer",
+  journal =      jmathb,
+  volume =       "15",
+  pages =        "267--273",
+  year =         "1982",
+}
+
+@Article{Oja85,
+  author =       "E. Oja and J. Karhunen",
+  title =        "On Stochastic Approximation of the Eigenvectors and
+                 Eigenvalues of the Expectation of a Random Matrix",
+  journal =      jama,
+  volume =       "106",
+  pages =        "69--84",
+  year =         "1985",
+}
+
+@Article{Oja89,
+  author =       "E. Oja",
+  title =        "Neural Networks, Principal Components, and Subspaces",
+  journal =      "International Journal of Neural Systems",
+  volume =       "1",
+  pages =        "61--68",
+  year =         "1989",
+}
+
+@Article{Olshausen+Field-1996,
+  author =       "Bruno A. Olshausen and David J. Field",
+  title =        {Emergence of simple-cell receptive field properties by learning a sparse code for natural images},
+  journal =      "Nature",
+  volume =       381,
+  pages =        {607--609},
+  year =         "1996",
+}
+
+@Article{Olshausen-97,
+  author =       "B. A. Olshausen and D. J. Field",
+  title =        "Sparse coding with an overcomplete basis set: a
+                 strategy employed by {V}1?",
+  journal =      "Vision Research",
+  volume =       "37",
+  pages =        "3311--3325",
+  year =         "1997",
+  url =          {http://view.ncbi.nlm.nih.gov/pubmed/9425546},
+  keywords = {sparse-coding, v1, vision},
+  month = {December},
+}
+
+@article{olshausen:2005,
+    author = {Bruno Olshausen and David J. Field},
+    title = {How Close are We to Understanding {V1}?},
+    journal = {Neural Computation},
+    volume = {17},
+    pages = {1665-1699},
+    year = {2005},
+}
+
+
+@InProceedings{Omlin-ml92,
+  author =       "C. W. Omlin and C. L. Giles",
+  editor =       "D. Sleeman and P. Edwards",
+  booktitle =    "Machine Learning: Proc. of the Ninth Int. Conference",
+  title =        "Training Second-Order Recurrent Neural Networks using
+                 Hints",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo CA",
+  year =         "1992",
+}
+
+@InProceedings{Omohundro96,
+  author =       "S. Omohundro",
+  editor =       NIPS8ed,
+  booktitle =    NIPS8,
+  title =        "Family Discovery",
+  publisher =    "MIT Press, Cambridge, MA",
+  year =         "1996",
+}
+
+@InProceedings{Ong-Smola-2003,
+  author =       "C. S. Ong and A. J. Smola",
+  booktitle =    ICML03,
+  editor =       ICML03ed,
+  publisher =    ICML03publ,
+  title =        "Machine learning using hyperkernels",
+  year =         "2003",
+}
+
+@Article{Opper90,
+  author =       "M. Opper and W. Kinzel and J. Kleinz and R. Nehl",
+  title =        "On the Ability of the Optimal Perceptron to
+                 Generalize",
+  journal =      jpa,
+  volume =       "23",
+  pages =        "L581--L586",
+  year =         "1990",
+}
+
+@Article{Orland85,
+  author =       "H. Orland",
+  title =        "Mean-Field Theory for Optimization Problems",
+  journal =      jppl,
+  volume =       "46",
+  pages =        "763--770",
+  year =         "1985",
+}
+
+@InProceedings{ormo-nips99,
+  author =       "D. Ormoneit and T. Hastie",
+  editor =       NIPS12ed,
+  booktitle =    NIPS12,
+  title =        "Optimal Kernel Shapes for Local Linear Regression",
+  publisher =    "MIT Press",
+  year =         "2000",
+}
+
+@Article{Orponen94,
+  author =       "Pekka Orponen",
+  title =        "Computational complexity of neural networks: a
+                 survey",
+  journal =      "Nordic Journal of Computing",
+  volume =       "1",
+  number =       "1",
+  pages =        "94--110",
+  month =        "Spring",
+  year =         "1994",
+  URL =          "citeseer.ist.psu.edu/article/orponen95computational.html",
+}
+
+@Book{Ortega70,
+  author =       "J. M. Ortega and W. C. Rheinboldt",
+  title =        "Iterative Solution of Non-linear Equations in Several
+                 Variables and Systems",
+  publisher =    "Academic Press",
+  address =      "New York",
+  year =         "1970",
+  OPTnote =      "",
+}
+
+@Book{Ortega70a,
+  author =       "J. M. Ortega and W. C. Rheinboldt",
+  title =        "Iterative Solution of Non-linear Equations in Several
+                 Variables and Systems",
+  publisher =    "Academic Press",
+  address =      "New York",
+  year =         "1970",
+}
+
+@InProceedings{Osindero+Hinton-2008,
+  author =       "Simon Osindero and Geoffrey E. Hinton",
+  editor =       NIPS20ed,
+  booktitle =    NIPS20,
+  title =        {Modeling image patches with a directed hierarchy of
+                 Markov random field},
+  publisher =    {MIT Press},
+  address =      {Cambridge, MA},
+  pages =        {1121--1128},
+  year =         "2008",
+}
+
+@InProceedings{Osindero+Hinton-2008-small,
+  author =       "S. Osindero and G. Hinton",
+  booktitle =    "NIPS 20",
+  title =        {Modeling image patches with a directed hierarchy of
+                 Markov random field},
+  year =         "2008",
+}
+
+@Article{Osindero+Welling+Hinton-05,
+  author =       "Simon Osindero and Max Welling and Geoffrey E. Hinton",
+  title =        "Topographic Product Models Applied To Natural Scene
+                 Statistics",
+  journal =      "Neural Computation",
+  volume =       "18",
+  pages =        "381--344",
+  year =         "2005",
+}
+
+@Article{OsinderoS2006,
+  author =       "Simon Osindero and Max Welling and Geoffrey E.
+                 Hinton",
+  title =        "Topographic Product Models Applied to Natural Scene
+                 Statistics",
+  journal =      "Neural Computation",
+  volume =       "18",
+  number =       "2",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA, USA",
+  pages =        "381--414",
+  year =         "2006",
+  ISSN =         "0899-7667",
+}
+
+@Article{OsinderoS2006-small,
+  author =       "Simon Osindero and Max Welling and Geoffrey E. Hinton",
+  title =        "Topographic Product Models Applied to Natural Scene
+                 Statistics",
+  journal =      "Neural Computation",
+  volume =       "18",
+  number =       "2",
+  pages =        "381--414",
+  year =         "2006",
+}
+
+@InProceedings{Ott76,
+  author =       "R. Ott",
+  booktitle =    "Third International Joint Conference on Pattern
+                 Recognition",
+  title =        "Construction of quadratic polynomial classifiers",
+  publisher =    "IEEE, CA",
+  address =      "Coronado, CA",
+  pages =        "161--165",
+  year =         "1976",
+}
+
+@article{OttJ1976b,
+     title = {Some Classification Procedures for Multivariate Binary Data Using Orthogonal Functions},
+     author = {Ott, Jurg and Kronmal, Richard A.},
+     journal = {Journal of the American Statistical Association},
+     volume = {71},
+     number = {354},
+     pages = {391--399},
+     year = {1976},
+     publisher = {American Statistical Association},    
+     copyright = {Copyright © 1976 American Statistical Association},
+    }
+
+
+@InProceedings{Ouimet+Bengio-2005,
+  author =       "Marie Ouimet and Yoshua Bengio",
+  editor =       aistats05ed,
+  booktitle =    aistats05,
+  title =        "Greedy Spectral Embedding",
+  publisher =    "",
+  date =         "Jan 6-8, 2005",
+  location =     "Savannah Hotel, Barbados",
+  pages =        "253--260",
+  year =         "2005",
+}
+
+@InProceedings{Owens89,
+  author =       "A. J. Owens and D. L. Filkin",
+  booktitle =    ijcnn,
+  title =        "Efficient Training of the Back Propagation Network by
+                 Solving a System of Stiff Ordinary Differential
+                 Equations",
+  volume =       "2",
+  publisher =    "IEEE, New York",
+  address =      "Washington 1989",
+  pages =        "381--386",
+  year =         "1989",
+}
+
+@InProceedings{Paccanaro2000,
+  author =       "A. Paccanaro and G. E. Hinton",
+  booktitle =    ijcnn,
+  title =        "Extracting Distributed Representations of Concepts and
+                 Relations from Positive and Negative Propositions",
+  publisher =    "IEEE, New York",
+  address =      "Como, Italy",
+  year =         "2000",
+}
+
+@Article{Packard80,
+  author =       "N. H. Packard and J. P Crutchfield and J. D. Farmer
+                 and R. S. Shaw",
+  title =        "Geometry from a Time Series",
+  journal =      prl,
+  volume =       "45",
+  pages =        "712--716",
+  year =         "1980",
+}
+
+@misc{Pal+al-2006,
+    author = {Chris Pal and Michael Kelm and Xuerui Wang and Greg Druck and Andrew McCallum},
+    title = {On Discriminative and Semi-Supervised Dimensionality Reduction},
+    year = {2006},
+    note = {Workshop on Novel Applications of Dimensionality Reduction, NIPS'06},
+}
+
+@InCollection{Palmer88,
+  author =       "R. G. Palmer",
+  editor =       "P. W. Anderson and K. J. Arrow and D. Pines",
+  booktitle =    "The Economy As an Evolving Complex System",
+  title =        "Statistical Mechanics Approaches to Complex
+                 Optimization Problems",
+  volume =       "5",
+  publisher =    "Addison-Wesley",
+  address =      "Redwood City",
+  pages =        "177--193",
+  year =         "1988",
+  series =       "SFI Studies in the Sciences of Complexity:
+                 Proceedings",
+}
+
+@InCollection{Palmer89,
+  author =       "R. G. Palmer",
+  editor =       "D. L. Stein",
+  booktitle =    "Lectures in the Sciences of Complexity",
+  title =        "Neural Nets",
+  volume =       "1",
+  publisher =    "Addison-Wesley",
+  address =      "Redwood City",
+  pages =        "439--461",
+  year =         "1989",
+  series =       "SFI Studies in the Sciences of Complexity: Lectures",
+}
+
+@Book{Papadimitriou,
+  author =       "C. H. Papadimitriou",
+  title =        "Combinatorial Optimization: Algorithms and
+                 Complexity",
+  publisher =    "Prentice-Hall",
+  address =      "Englewood Cliffs, NJ",
+  year =         "1982",
+}
+
+@Book{Papadimitriou82,
+  author =       "C. H. Papadimitriou and K. Steiglitz",
+  title =        "Combinatorial Optimization: Algorithms and
+                 Complexity",
+  publisher =    "Prentice-Hall",
+  address =      "Englewood Cliffs",
+  year =         "1982",
+}
+
+@Article{Parga86,
+  author =       "N. Parga and M. A. Virasoro",
+  title =        "The Ultrametric Organization of Memories in a Neural
+                 Network",
+  journal =      jpp,
+  volume =       "47",
+  pages =        "1857--1864",
+  year =         "1986",
+}
+
+@Article{Parisi86,
+  author =       "G. Parisi",
+  title =        "Asymmetric Neural Networks and the Process of
+                 Learning",
+  journal =      jpa,
+  volume =       "19",
+  pages =        "L675--L680",
+  year =         "1986",
+}
+
+@Book{Parisi88,
+  author =       "G. Parisi",
+  title =        "Statistical Field Theory",
+  publisher =    "Addison-Wesley",
+  address =      "Redwood City, CA",
+  year =         "1988",
+}
+
+@Article{Park-nc91,
+  author =       "J. Park and I. W. Sandberg",
+  title =        "Universal Approximation Using Radial-Basis-Function
+                 Networks",
+  journal =      nc,
+  volume =       "3",
+  number =       "2",
+  pages =        "246--257",
+  year =         "1991",
+}
+
+@TechReport{Parker85,
+  author =       "D. B. Parker",
+  title =        "Learning Logic",
+  number =       "TR--47",
+  institution =  "Center for Computational Research in Economics and
+                 Management Science, Massachusetts Institute of
+                 Technology",
+  address =      "Cambridge, MA",
+  year =         "1985",
+}
+
+@InProceedings{Parker87,
+  author =       "D. B. Parker",
+  editor =       "M. Caudill and C. Butler",
+  booktitle =    icnn,
+  title =        "Optimal Algorithms for Adaptive Networks: Second Order
+                 Back Propagation, Second Order Direct Propagation, and
+                 Second Order Hebbian Learning",
+  volume =       "2",
+  publisher =    "IEEE, New York",
+  address =      "San Diego 1987",
+  pages =        "593--600",
+  year =         "1987",
+}
+
+@InProceedings{Parks87,
+  author =       "M. Parks",
+  editor =       "M. Caudill and C. Butler",
+  booktitle =    icnn,
+  title =        "Characterization of the {Boltzmann} Machine Learning
+                 Rate",
+  volume =       "3",
+  publisher =    "IEEE, New York",
+  address =      "San Diego 1987",
+  pages =        "715--719",
+  year =         "1987",
+}
+
+@Article{Parlos94,
+  author =       "A. G. Parlos and J. Muthusami and A. F. Atiya",
+  title =        "Incipient Fault Detection and Identification in
+                 Process Systems using Accelerated Neural Network
+                 Learning",
+  journal =      "Nuclear Technology",
+  volume =       "105",
+  pages =        "145",
+  year =         "1994",
+}
+
+@Article{Parzen62,
+  author =       "Emanuel Parzen",
+  title =        "On the estimation of a probability density function
+                 and mode",
+  journal =      "Annals of Mathematical Statistics",
+  volume =       "33",
+  pages =        "1064--1076",
+  year =         "1962",
+}
+
+@InProceedings{pati93orthogonal,
+  author =       "Y. Pati and R. Rezaiifar and P. Krishnaprasad",
+  booktitle =    "Proceedings of the 27 th Annual Asilomar Conference on
+                 Signals, Systems, and Computers",
+  title =        "Orthogonal Matching Pursuit: Recursive Function
+                 Approximation with Applications to Wavelet
+                 Decomposition",
+  pages =        "40--44",
+  month =        nov,
+  year =         "1993",
+}
+
+@InProceedings{Paugam-Moisy-1992,
+  author =       "H\'el\`ene {Paugam-Moisy}",
+  booktitle =    ijcnn,
+  title =        "On the Convergence of a Block-Gradient Algorithm for
+                 Back-Propagation Learning",
+  volume =       "3",
+  publisher =    "IEEE",
+  address =      "New York",
+  pages =        "919--924",
+  year =         "1992",
+}
+
+@InProceedings{Paugam-Moisy-1992b,
+  author =       "H\'{e}l\`{e}ne {Paugam-Moisy}",
+  booktitle =    "CONPAR '92/ VAPP V: Proceedings of the Second Joint
+                 International Conference on Vector and Parallel
+                 Processing",
+  title =        "Optimal Speedup Conditions for a Parallel
+                 Back-Propagation Algorithm",
+  publisher =    "Springer-Verlag",
+  address =      "London, UK",
+  pages =        "719--724",
+  year =         "1992",
+  ISBN =         "3-540-55895-0",
+}
+
+@InCollection{Paugam-Moisy-1993,
+  author =       "H\'el\`ene {Paugam-Moisy}",
+  editor =       "I. Pitas",
+  booktitle =    "Parallel Algorithms for Digital Image Processing,
+                 Computer Vision and Neural Networks",
+  title =        "Parallel Neural Computing Based on Network
+                 Duplicating",
+  publisher =    "John Wiley",
+  pages =        "305--340",
+  year =         "1993",
+}
+
+@inproceedings{Pavlovic-2001,
+ author = {Vladimir Pavlovic and James M. Rehg and John MacCormick},
+ title = {Learning Switching Linear Models of Human Motion},
+  editor =       NIPS13ed,
+  booktitle =    NIPS13,
+  publisher =    "{MIT} Press",
+  pages =        "981--987",
+  year =         "2001",
+}
+ 
+
+@Book{PdpManual,
+  author =       "D. E. Rumelhart and J. L. McClelland",
+  title =        "Exploration in Parallel Distributed Processing",
+  volume =       "3",
+  publisher =    "MIT Press",
+  year =         "1988",
+}
+
+@InProceedings{Pearl-Verma91,
+  author =       "J. Pearl and T. S. Verma",
+  editor =       "J. A. Allen and R. Fikes and and E. Sandewall",
+  booktitle =    "Principles of Knowledge Representation and Reasoning:
+                 Proceedings of the Second International Conference",
+  title =        "A theory of inferred causation",
+  publisher =    "Morgan Kaufmann, San Mateo, CA",
+  pages =        "441--452",
+  year =         "1991",
+}
+
+@Book{Pearl88,
+  author =       "Judea Pearl",
+  title =        "Probabilistic Reasoning in Intelligent Systems:
+                 Networks of Plausible Inference",
+  publisher =    "Morgan Kaufmann",
+  year =         "1988",
+}
+
+@InProceedings{Pearlmutter+Parra-96,
+  author =       "Barak Pearlmutter and L. C. Parra",
+  editor =       "L. Xu",
+  booktitle =    "International Conference On Neural Information
+                 Processing",
+  title =        "A context-sensitive generalization of {ICA}",
+  address =      "Hong-Kong",
+  pages =        {151--157},
+  year =         "1996",
+}
+
+@InProceedings{Pearlmutter86,
+  author =       "B. A. Pearlmutter and G. E. Hinton",
+  editor =       "J. S. Denker",
+  booktitle =    snowbird,
+  title =        "{G}-Maximization: An Unsupervised Learning Procedure
+                 for Discovering Regularities",
+  publisher =    "American Institute of Physics, New York",
+  address =      "Snowbird 1986",
+  pages =        "333--338",
+  year =         "1986",
+}
+
+@InProceedings{Pearlmutter89a,
+  author =       "B. A. Pearlmutter",
+  booktitle =    ijcnn,
+  title =        "Learning State Space Trajectories in Recurrent Neural
+                 Networks",
+  volume =       "2",
+  publisher =    "IEEE, New York",
+  address =      "Washington 1989",
+  pages =        "365--372",
+  year =         "1989",
+}
+
+@Article{Pearlmutter89b,
+  author =       "B. A. Pearlmutter",
+  title =        "Learning State Space Trajectories in Recurrent Neural
+                 Networks",
+  journal =      nc,
+  volume =       "1",
+  pages =        "263--269",
+  year =         "1989",
+}
+
+@article{Pearson-1901,
+    author = {Pearson, K. },
+    citeulike-article-id = {2013414},
+    journal = {Philosophical Magazine},
+    keywords = {pca},
+    number = {6},
+    pages = {559--572},
+    posted-at = {2007-11-29 10:41:36},
+    priority = {2},
+    title = {On lines and planes of closest fit to systems of points in space},
+    volume = {2},
+    year = {1901}
+}
+
+@InProceedings{Pedersen2001,
+  author =       "Ted Pedersen",
+  booktitle =    "Proceedings of the Second Annual Meeting of the North
+                 American Chapter of the Association for Computational
+                 Linguistics",
+  title =        "A decision tree of bigrams is an accurate predictor of
+                 word sense",
+  pages =        "79--86",
+  year =         "2001",
+  URL =          "citeseer.nj.nec.com/pedersen01decision.html",
+}
+
+@InProceedings{Peeling86,
+  author =       "S. M. Peeling and R. K. Moore and M. J. Tomlinson",
+  booktitle =    "Proceedings of the 10th Autumn Conference on Speech
+                 and Hearing",
+  title =        "The Multi-Layer Perceptron as a Tool for Speech
+                 Pattern Processing Research",
+  year =         "1986",
+}
+
+@InProceedings{peng04accurate,
+  author =       "F. Peng and A. McCallum",
+  booktitle =    "Proceedings of Human Language Technology Conference /
+                 North American Chapter of the Association for
+                 Computational Linguistics annual meeting",
+  title =        "Accurate information extraction from research papers
+                 using conditional random fields",
+  pages =        "329--336",
+  year =         "2004",
+}
+
+@InProceedings{Pennacchiotti+Pantel-2006,
+  author =       "Marco Pennacchiotti and Patrick Pantel",
+  booktitle =    "Proceedings of the 21st International Conference on
+                 Computational Linguistics and 44th Annual Meeting of
+                 the ACL",
+  title =        "Ontologizing Semantic Relations",
+  address =      "Sydney",
+  pages =        "793--800",
+  year =         "2006",
+}
+
+@Article{Penrose55,
+  author =       "R. Penrose",
+  title =        "A generalized inverse for matrices",
+  journal =      "Proc. Cambridge Philos. Soc.",
+  volume =       "51",
+  pages =        "406--513",
+  year =         "1955",
+}
+
+@InProceedings{Pereira93,
+  author =       "F. Pereira and N. Tishby and L. Lee",
+  booktitle =    "30th Annual Meeting of the Association for
+                 Computational Linguistics",
+  title =        "Distributional Clustering of English Words",
+  address =      "Columbus, Ohio",
+  pages =        "183--190",
+  year =         "1993",
+}
+
+@InProceedings{Pereira94,
+  author =       "F. Pereira and M. Riley and R. Sproat",
+  booktitle =    "ARPA Natural Language Processing Workshop",
+  title =        "Weighted rational transductions and their application
+                 to human language processing",
+  year =         "1994",
+}
+
+@InCollection{Pereira97,
+  author =       "F. C. N. Pereira and M. D. Riley",
+  editor =       "Emmanuel Roche and Yves Schabes",
+  booktitle =    "Finite-State Language Processing",
+  title =        "Speech recognition by composition of weighted finite
+                 automata",
+  publisher =    "MIT Press, Cambridge, Massachussetts",
+  pages =        "431--453",
+  year =         "1997",
+}
+
+@Article{Peretto84,
+  author =       "P. Peretto",
+  title =        "Collective Properties of Neural Networks: {A}
+                 Statistical Physics Approach",
+  journal =      biocyb,
+  volume =       "50",
+  pages =        "51--62",
+  year =         "1984",
+}
+
+@InProceedings{Peretto86,
+  author =       "P. Peretto and J. J. Niez",
+  editor =       "E. Bienenstock and F. Fogelman-Souli\'e and G.
+                 Weisbuch",
+  booktitle =    "Disordered Systems and Biological Organization",
+  title =        "Collective Properties of Neural Networks",
+  publisher =    "Springer-Verlag, Berlin",
+  address =      "Les Houches 1985",
+  pages =        "171--185",
+  year =         "1986",
+}
+
+@Article{Peretto88,
+  author =       "P. Peretto",
+  title =        "On Learning Rules and Memory Storage Abilities of
+                 Asymmetrical Neural Networks",
+  journal =      jpp,
+  volume =       "49",
+  pages =        "711--726",
+  year =         "1988",
+}
+
+@InProceedings{Perez+Rendell-1996,
+  author =       "Eduardo P\'erez and Larry A. Rendell",
+  booktitle =    ICML96,
+  editor =       ICML96ed,
+  publisher =    ICML96publ,
+  title =        "Learning Despite Concept Variation by Finding
+                 Structure in Attribute-based Data",
+  pages =        "391--399",
+  year =         "1996",
+}
+
+@Article{Perez75,
+  author =       "R. P\'erez and L. Glass and R. Shlaer",
+  title =        "Development of Specificity in the Cat Visual Cortex",
+  journal =      jmathb,
+  volume =       "1",
+  pages =        "275--288",
+  year =         "1975",
+}
+
+@MISC{Perez98markovrandom,
+  author = {Patrick Perez},
+  title = {Markov Random Fields and Images},
+  year = {1998}
+}
+
+@article{PerpinanM2000,
+ author = {Miguel \'{A}. Carreira-Perpi{\~{n}}\'{a}n and Steve \'{A}. Renals},
+ title = {Practical Identifiability of Finite Mixtures of Multivariate Bernoulli Distributions},
+ journal = {Neural Computation},
+ volume = {12},
+ number = {1},
+ year = {2000},
+ pages = {141--152},
+ publisher = {MIT Press},
+ address = {Cambridge, MA, USA},
+ }
+
+@InProceedings{Perpinan+Hinton-2005,
+  author =       "Miguel A. Carreira-Perpi{\~{n}}an and Geoffrey E. Hinton",
+  editor =       aistats05ed,
+  booktitle =    aistats05,
+  title =        "On Contrastive Divergence Learning",
+  publisher =    "Society for Artificial Intelligence and Statistics",
+  date =         "Jan 6-8, 2005",
+  location =     "Savannah Hotel, Barbados",
+  pages =        "33--40",
+  year =         "2005",
+}
+
+@Article{Personnaz85,
+  author =       "L. Personnaz and I. Guyon and G. Dreyfus",
+  title =        "Information Storage and Retrieval in Spin-Glass-Like
+                 Neural Networks",
+  journal =      jppl,
+  volume =       "46",
+  pages =        "359--365",
+  year =         "1985",
+}
+
+@Article{Personnaz86,
+  author =       "L. Personnaz and I. Guyon and G. Dreyfus",
+  title =        "Collective Computational Properties of Neural
+                 Networks: New Learning Mechanisms",
+  journal =      prA,
+  volume =       "34",
+  pages =        "4217--4228",
+  year =         "1986",
+}
+
+@Article{Peterson2004,
+  author =       "Gail B. Peterson",
+  title =        "A day of great illumination: {B. F.} {Skinner}'s
+                 discovery of shaping",
+  journal =      "Journal of the Experimental Analysis of Behavior",
+  volume =       "82",
+  number =       "3",
+  pages =        "317--328",
+  year =         "2004",
+}
+
+@Article{Peterson87,
+  author =       "C. Peterson and J. R. Anderson",
+  title =        "A Mean Field Theory Learning Algorithm for Neural
+                 Networks",
+  journal =      cs,
+  volume =       "1",
+  pages =        "995--1019",
+  year =         "1987",
+}
+
+@Article{Peterson89,
+  author =       "C. Peterson and B. S{\"o}derberg",
+  title =        "A New Method for Mapping Optimization Problems onto
+                 Neural Networks",
+  journal =      ijns,
+  volume =       "1",
+  pages =        "3--22",
+  year =         "1989",
+}
+
+@Article{Peterson90,
+  author =       "C. Peterson and S. Redfield and J. D. Keeler and E.
+                 Hartman",
+  title =        "An Optoelectronic Architecture for Multilayer Learning
+                 in a Single Photorefractive Crystal",
+  journal =      nc,
+  volume =       "2",
+  pages =        "25--34",
+  year =         "1990",
+}
+
+@PhdThesis{PhD:Perrone,
+  author =       "Michael P. Perrone",
+  title =        "Improving Regression Estimation: Averaging Methods for
+                 Variance Reduction with Extensions to General Conve
+                 Measure Optimization",
+  school =       "Brown University, Institute for Brain and Neural
+                 Systems",
+  month =        may,
+  year =         "1993",
+}
+
+@Book{Piaget1952,
+  author =       "J.-P. Piaget",
+  title =        "The origins of intelligence in children",
+  publisher =    "International Universities Press",
+  address =      "New York",
+  year =         "1952",
+}
+
+@Article{Pineda87,
+  author =       "F. J. Pineda",
+  title =        "Generalization of Back-Propagation to Recurrent Neural
+                 Networks",
+  journal =      prl,
+  volume =       "59",
+  pages =        "2229--2232",
+  year =         "1987",
+}
+
+@Article{Pineda88,
+  author =       "F. J. Pineda",
+  title =        "Dynamics and Architecture for Neural Computation",
+  journal =      jcomp,
+  volume =       "4",
+  pages =        "216--245",
+  year =         "1988",
+}
+
+@InProceedings{Pineda88-nips,
+  author =       "F. Pineda",
+  editor =       nips87ed,
+  booktitle =    nips87,
+  title =        "Generalization of Backpropagation to Recurrent and
+                 Higher Order Neural Networks",
+  organization = "American Institute of Physics",
+  address =      "New York, NY",
+  pages =        "602--611",
+  year =         "1988",
+}
+
+@Article{Pineda89,
+  author =       "F. J. Pineda",
+  title =        "Recurrent Back-Propagation and the Dynamical Approach
+                 to Adaptive Neural Computation",
+  journal =      nc,
+  volume =       "1",
+  pages =        "161--172",
+  year =         "1989",
+}
+
+@InCollection{PINN,
+  author =       "P. Frasconi and M. Gori and A. Tesi",
+  editor =       "O. Omidvar",
+  booktitle =    "Progress in Neural Networks",
+  title =        "Successes and Failures of Backpropagation: {A}
+                 Theoretical Investigation",
+  volume =       "5",
+  publisher =    "Ablex Publishing",
+  year =         "1993",
+}
+
+@article{Pinto08,
+  author = {Pinto, Nicolas AND Cox, David D AND DiCarlo, James J},
+  journal = {PLoS Comput Biol},
+  publisher = {Public Library of Science},
+  title = {Why is Real-World Visual Object Recognition Hard?},
+  year = {2008},
+  month = {01},
+  volume = {4},
+}        
+
+@inproceedings{Pinto-DiCarlo-2008,
+ author = {Nicolas Pinto and James {DiCarlo} and David Cox},
+ title = {Establishing Good Benchmarks and Baselines for Face Recognition},
+ booktitle = {ECCV 2008 Faces in 'Real-Life' Images Workshop},
+ year = 2008,
+address={{M}arseille {F}rance },
+organization={{E}rik {L}earned-{M}iller and {A}ndras {F}erencz and {F}r{\'e}d{\'e}ric {J}urie },
+audience={internationale },
+URL={http://hal.inria.fr/inria-00326732/en/},
+}
+
+@article{Pinto-2009,
+  author = {Pinto, Nicolas AND Doukhan, David AND DiCarlo, James J. AND Cox, David D.},
+  journal = {PLoS Comput Biol},
+  publisher = {Public Library of Science},
+  title = {A High-Throughput Screening Approach to Discovering Good Forms of Biologically Inspired Visual Representation},
+  year = {2009},
+  month = {11},
+  volume = {5},
+  pages = {e1000579},
+  number = {11},
+}        
+
+@InCollection{Platt2000,
+  author =       "J. Platt",
+  editor =       "A. Smola and P. Bartlett and B. Scholkopf and D.
+                 Schuurmans",
+  booktitle =    "Advances in Large Margin Classifiers",
+  title =        "Probabilities for support vector machines",
+  publisher =    "MIT press",
+  year =         "2000",
+}
+
+@Article{Platt91,
+  author =       "J. Platt",
+  title =        "A Resource-Allocating Network for Function
+                 Interpolation",
+  journal =      "Neural Computation",
+  volume =       "3",
+  type =         "Letter",
+  number =       "2",
+  pages =        "213--225",
+  year =         "1991",
+}
+
+@InProceedings{Platt94,
+  author =       "R. Wolf and J. Platt",
+  editor =       NIPS6ed,
+  booktitle =    NIPS6,
+  title =        "Postal address block location using a convolutional
+                 locator network",
+  pages =        "745--752",
+  year =         "1994",
+}
+
+@Article{Plaut-csl87,
+  author =       "D. C. Plaut and G. E. Hinton",
+  title =        "Learning Set of Filters Using Back-propagation",
+  journal =      cspla,
+  volume =       "2",
+  pages =        "35--61",
+  year =         "1987",
+}
+
+@TechReport{Plaut86,
+  author =       "D. Plaut and S. Nowlan and G. Hinton",
+  title =        "Experiments on Learning by Back-Propagation",
+  number =       "CMU--CS--86--126",
+  institution =  "Department of Computer Science, Carnegie Mellon
+                 University",
+  address =      "Pittsburgh, PA",
+  year =         "1986",
+}
+
+@Article{PLS-Frank-Friedman,
+  author =       "Ildiko E. Frank and Jerome H. Friedman",
+  title =        "A statistical view of some chemometrics regression
+                 tools",
+  journal =      "Technometrics",
+  volume =       "35",
+  number =       "2",
+  pages =        "109--148",
+  year =         "1993",
+}
+
+@Article{Podder-2006,
+  author =       "M. Podder and W. J. Welch and R. H. Zamar and S. J. S.
+                 J. Tebbutt",
+  title =        "Dynamic Variable Selection in {SNP} Genotype
+                 Autocalling from {APEX} Microarray Data",
+  journal =      "In revision for BMC Bioinformatics",
+  year =         "2006",
+}
+
+@Article{Poggio-ieee90,
+  author =       "T. Poggio and F. Girosi",
+  title =        "Networks for Approximation and Learning",
+  journal =      ieeeproc,
+  volume =       "78",
+  number =       "9",
+  pages =        "1481--1497",
+  year =         "1990",
+}
+
+@Article{Poggio75,
+  author =       "T. Poggio",
+  title =        "On Optimal NonLinear Associative Recall",
+  journal =      biocyb,
+  volume =       "19",
+  pages =        "201",
+  year =         "1975",
+}
+
+@Article{Poggio85,
+  author =       "T. Poggio and V. Torre and C. Koch",
+  title =        "Computational Vision and Regularization Theory",
+  journal =      "Nature",
+  volume =       "317",
+  number =       "26",
+  pages =        "314--319",
+  year =         "1985",
+}
+
+@TechReport{Poggio89,
+  author =       "T. Poggio and F. Girosi",
+  title =        "A theory of networks for approximation and learning",
+  number =       "1140",
+  institution =  "MIT AI Laboratory",
+  address =      "Cambridge, MA",
+  year =         "1989",
+}
+
+@Article{Poggio90,
+  author =       "T. Poggio and F. Girosi",
+  title =        "Regularization Algorithms for Learning That Are
+                 Equivalent to Multilayer Networks",
+  journal =      science,
+  volume =       "247",
+  pages =        "978--982",
+  year =         "1990",
+}
+
+@Article{Pollack90,
+  author =       "Jordan B. Pollack",
+  title =        "Recursive Distributed Representations",
+  journal =      "Artificial Intelligence",
+  volume =       "46",
+  number =       "1",
+  pages =        "77--105",
+  year =         "1990",
+}
+
+@Article{Pollack91,
+  author =       "Jordan B. Pollack",
+  title =        "The Induction of Dynamical Recognizers",
+  journal =      mlearn,
+  volume =       "7",
+  number =       "2",
+  pages =        "196--227",
+  year =         "1991",
+}
+
+@Book{Pollard84,
+  author =       "D. Pollard",
+  title =        "Convergence of stochastic processes",
+  publisher =    "Springer-Verlag",
+  address =      "New York, NY",
+  year =         "1984",
+}
+
+@InProceedings{Pollit91,
+  author =       "M. D. Pollit and J. Peck",
+  booktitle =    "Proc. 2nd Canadian Conf. on Computer Applications in
+                 the Mineral Industry",
+  title =        "Recent advances in lithological recognition based on
+                 rotary blasthole drill responses",
+  address =      "Vancouver, Canada",
+  year =         "1991",
+}
+
+@InProceedings{Pomerleau89,
+  author =       "D. A. Pomerleau",
+  editor =       NIPS1ed,
+  booktitle =    NIPS1,
+  title =        "{ALVINN}: An Autonomous Land Vehicle in a Neural
+                 Network",
+  publisher =    "Morgan Kaufmann, San Mateo",
+  address =      "Denver, CO",
+  pages =        "305--313",
+  year =         "1989",
+}
+
+@TechReport{Pontil98,
+  author =       "M. Pontil and A. Verri",
+  title =        "Properties of Support Vector Machines",
+  number =       "AI Memo 1612",
+  institution =  "MIT",
+  year =         "1998",
+}
+
+@InProceedings{Poritz88,
+  author =       "A. B. Poritz",
+  booktitle =    "Proc. Int. Conf. Acoustics, Speech, and Signal
+                 Processing",
+  title =        "Hidden {Markov} models: a guided tour",
+  pages =        "7--13",
+  year =         "1988",
+}
+
+@InProceedings{Poston,
+  author =       "T. Poston and C. Lee and Y. Choie and Y. Kwon",
+  booktitle =    "Proc. of the IEEE-IJCNN91",
+  title =        "Local minima and Backpropagation",
+  address =      "Seattle, WA",
+  pages =        "173--176",
+  year =         "1991",
+}
+
+@InProceedings{Poston-ijcnn91,
+  author =       "T. Poston and C. Lee and Y. Choie and Y. Kwon",
+  booktitle =    ijcnn,
+  title =        "Local Minima and Backpropagation",
+  publisher =    "IEEE Press",
+  address =      "Seattle WA",
+  pages =        "173--176",
+  year =         "1991",
+}
+
+@Article{Poterba+Summers,
+  author =       "J. M. Poterba and L. H. Summers",
+  title =        "Mean Reversion in Stock Prices",
+  journal =      "Journal of Financial Economics",
+  volume =       "22",
+  pages =        "27--59",
+  year =         "1988",
+}
+
+@Article{potvin:1995:orsajc,
+  author =       "J.-Y. Potvin and S. Bengio",
+  title =        "The Vehicle Routing Problem with Time Windows - Part
+                 {II}: Genetic Search",
+  journal =      "{ORSA} Journal on Computing",
+  year =         "1995",
+}
+
+@Misc{powell87radial,
+  author =       "M. Powell",
+  title =        "Radial basis functions for multivariable
+                 interpolation: {A} review",
+  year =         "1987",
+  text =         "M. J. D. Powell. Radial basis functions for
+                 multivariable interpolation: A review. In J. C. Mason
+                 and M. G. Cox, editors, Algorithms for Approximation of
+                 Functions and Data, pages 143--167. Oxford University
+                 Press, 1987.",
+}
+
+@InProceedings{Pratt+Kamm91,
+  author =       "L. Y. Pratt and C. A. Kamm",
+  booktitle =    ijcnn,
+  title =        "Improving a phoneme classification neural network
+                 through problem decomposition",
+  volume =       "2",
+  publisher =    "IEEE Press",
+  address =      "Seattle WA",
+  pages =        "821--826",
+  year =         "1991",
+}
+
+@InProceedings{pratt93,
+  author =       "Lorien Y. Pratt",
+  editor =       NIPS5ed,
+  booktitle =    NIPS5,
+  title =        "Discriminability-Based Transfer between Neural
+                 Networks",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo, CA",
+  pages =        "204--211",
+  year =         "1993",
+}
+
+@Article{Presnell93,
+  author =       "S. R. Presnell and F. E. Cohen",
+  title =        "Artificial neural networks for pattern recognition in
+                 biochemical sequences",
+  journal =      "Ann. Rev. Biophys. Biomol. Struct.",
+  volume =       "22",
+  pages =        "283--298",
+  year =         "1993",
+}
+
+@Book{Press86,
+  author =       "W. H. Press and B. P. Flannery and S. A. Teukolsky and
+                 W. T. Vetterling",
+  title =        "Numerical Recipes",
+  publisher =    "Cambridge University Press",
+  address =      "Cambridge",
+  year =         "1986",
+}
+
+@Book{Press92,
+  author =       "W. H. Press and S. A. Teukolsky and W. T. Vetterling
+                 and B. P. Flannery",
+  title =        "Numerical Recipes in {C}: The art of scientific
+                 computing (2nd ed.)",
+  publisher =    "Cambridge University Press",
+  address =      "Cambridge",
+  year =         "1992",
+}
+
+@article{Priebe2005,
+ author = {C.E. Priebe and J.M. Conroy and D.J. Marchette and Y. park},
+ title = {Scan Statistics on Enron Graphs},
+ journal = {Computational and Mathematical Organization Theory},
+ volume = 11,
+ number = 3,
+ pages = {229--247},
+ month = {October},
+ year = 2005,
+ publisher = {Springer},
+}
+
+@Book{Priestley81,
+  author =       "M. B. Priestley",
+  title =        "Spectral Analysis and Time Series, Vol.1: Univariate
+                 Series",
+  publisher =    "Academic Press",
+  year =         "1981",
+}
+
+@Article{Principe92,
+  author =       "B. {de Vries} and J. C. Principe",
+  title =        "The gamma model -- {A} new neural net model for
+                 temporal processing",
+  journal =      nn,
+  volume =       "5",
+  pages =        "565--576",
+  year =         "1992",
+  OPTnote =      "",
+}
+
+@Article{Psa88a,
+  author =       "D. Psaltis and C. H. Park and J. Hong",
+  title =        "Higher Order Associative Memories and Their Optical
+                 Implementations",
+  journal =      "Neural Networks",
+  volume =       "1",
+  number =       "2",
+  pages =        "149--163",
+  year =         "1988",
+}
+
+@InProceedings{Psaltis89,
+  author =       "D. Psaltis and D. Brady and K. Hsu",
+  booktitle =    ijcnn,
+  title =        "Learning in optical neural computers",
+  volume =       "2",
+  address =      "Washington D.C.",
+  pages =        "72--75",
+  year =         "1989",
+}
+
+@TechReport{publication-an,
+  author =       "Tomaso Poggio and Frederico Girosi",
+  title =        "An Equivalence Between Sparse Approximation and
+                 Support Vector Machines",
+}
+
+@TechReport{publication-notes,
+  author =       "Tomaso Poggio and Frederico Girosi",
+  title =        "Notes on {PCA}, Regularization, Sparsity and Support
+                 Vector Machines",
+}
+
+@Article{Qian+Sejnowski88,
+  author =       "N. Qian and T. J. Sejnowski",
+  title =        "Predicting the secondary structure of globular
+                 proteins using neural network models",
+  journal =      "Journal of Molecular Biology",
+  volume =       "202",
+  pages =        "865--884",
+  year =         "1988",
+}
+
+@Article{Qian88a,
+  author =       "N. Qian and T. J. Sejnowski",
+  title =        "Predicting the Secondary Structure of Globular
+                 Proteins Using Neural Network Models",
+  journal =      jmolecb,
+  volume =       "202",
+  pages =        "865--884",
+  year =         "1988",
+}
+
+@InProceedings{Qian88b,
+  author =       "N. Qian and T. J. Sejnowski",
+  editor =       "D. Touretzky and G. Hinton and T. Sejnowski",
+  booktitle =    cmss88,
+  title =        "Learning to Solve Random-Dot Stereograms of Dense
+                 Transparent Surfaces with Recurrent Back-Propagation",
+  publisher =    "Morgan Kaufmann, San Mateo",
+  address =      "Pittsburg 1988",
+  pages =        "435--443",
+  year =         "1988",
+}
+
+@Article{quantiles-nc-2002,
+  author =       "Ichiro Takeuchi and Yoshua Bengio and Takafumi
+                 Kanamori",
+  title =        "Robust Regression with Asymmetric Heavy-Tail Noise Distributions",
+  journal =      "Neural Computation",
+  volume =       "14",
+  number =       "10",
+  pages =        "2469--2496",
+  year =         "2002",
+}
+
+@TechReport{quantiles-TR,
+  author =       "Ichiro Takeuchi and Yoshua Bengio and Takafumi
+                 Kanamori",
+  title =        "Robust Regression with Asymmetric Heavy-Tail Noise",
+  number =       "1198",
+  institution =  "Dept. IRO, Universit\'e de Montr\'eal",
+  year =         "2001",
+}
+
+@Article{Quinlan+Rivest89,
+  author =       "J. Ross Quinlan and Ronald L. Rivest",
+  title =        "Inferring Decision Trees Using the Minimum Description
+                 Length Principle",
+  journal =      "Information and Computation",
+  volume =       "80",
+  pages =        "227--248",
+  year =         "1989",
+}
+
+@Article{Quinlan86,
+  author =       "J. Ross Quinlan",
+  title =        "Induction of Decision Trees",
+  journal =      "Machine Learning",
+  volume =       "1",
+  number =       "1",
+  pages =        "81--106",
+  year =         "1986",
+}
+
+@Book{Quinlan93,
+  author =       "J. Ross Quinlan",
+  title =        "{C4}.5: Programs for Machine Learning",
+  publisher =    "Morgan Kaufmann",
+  year =         "1993",
+}
+
+@Book{Rabiner+Gold75,
+  author =       "L. R. Rabiner and B. Gold",
+  title =        "Theory and application of digital signal processing",
+  publisher =    "Prentice-Hall",
+  year =         "1975",
+}
+
+@Article{Rabiner85,
+  author =       "L. R. Rabiner and S. E. Levinson",
+  title =        "A speaker-independent, syntax-directed, connected word
+                 recognition system based on hidden {Markov} models and
+                 level building",
+  journal =      ieeetassp,
+  volume =       "33",
+  number =       "3",
+  pages =        "561--573",
+  year =         "1985",
+}
+
+@Article{Rabiner86,
+  author =       "L. R. Rabiner and B. H. Juang",
+  title =        "An Introduction to Hidden {Markov} Models",
+  journal =      ieeeassp,
+  pages =        "257--285",
+  month =        "jan",
+  year =         "1986",
+}
+
+@Article{Rabiner89,
+  author =       "La. R. Rabiner",
+  title =        "A Tutorial on Hidden {Markov} Models and Selected
+                 Applications in Speech Recognition",
+  journal =      "Proceedings of the IEEE",
+  volume =       "77",
+  number =       "2",
+  pages =        "257--286",
+  year =         "1989",
+  OPTannote =    "",
+}
+
+@Article{Raetsch-2002,
+  author =       "Gunnar R{\"a}tsch and Ayhan Demiriz and Kristin P. Bennett",
+  title =        "Sparse Regression Ensembles in Infinite and Finite
+                 Hypothesis Spaces",
+  journal =      "Machine Learning",
+  publisher =    "Kluwer Academic Publishers",
+  year =         "2002",
+}
+
+@InCollection{Raftery1996,
+  author =       "A. Raftery",
+  editor =       "Gilks and al.",
+  booktitle =    "MCMC in Practice",
+  title =        "Hypothesis Testing and Model Selection",
+  publisher =    "Chapman and Hall",
+  pages =        "163--188",
+  year =         "1996",
+}
+
+
+@inproceedings{RaginskyM2008,
+  author    = {Maxim Raginsky and
+               Svetlana Lazebnik and
+               Rebecca Willett and
+               Jorge Silva},
+  title     = {Near-minimax recursive density estimation on the binary
+               hypercube},
+  editor =       NIPS20ed,
+  booktitle =    NIPS20,
+  year      = {2008},
+  pages     = {1305-1312},
+}
+
+@INPROCEEDINGS{RainaR2003,
+    author = {Rajat Raina and Yirong Shen and Andrew Y. Ng and Andrew McCallum},
+    title = {Classification with hybrid generative/discriminative models},
+    editor = NIPS16ed,
+    booktitle = NIPS16,
+    year = {2003},
+    publisher = {MIT Press}
+}
+
+@Misc{raina+ng+koller-workshop-2005,
+  author =       "Rajat Raina and Andrew Y. Ng and Daphne Koller",
+  title =        "Transfer Learning by Constructing Informative Priors",
+  howpublished = "'Inductive Transfer: 10 Years Later' NIPS Workshop",
+  year =         "2005",
+  OPTkey =       "",
+}
+
+@InProceedings{RainaR2007,
+  author =       "Rajat Raina and Alexis Battle and Honglak Lee and
+                 Benjamin Packer and Andrew Y. Ng",
+  booktitle =    ICML07,
+  editor =       ICML07ed,
+  publisher =    ICML07publ,
+  title =        "Self-taught learning: transfer learning from unlabeled
+                 data",
+  pages =        "759--766",
+  year =         "2007",
+  bibsource =    "DBLP, http://dblp.uni-trier.de",
+  ee =           "http://doi.acm.org/10.1145/1273496.1273592",
+}
+
+@InProceedings{RainaR2007-small,
+  author =       "R. Raina and A. Battle and H. Lee and B. Packer and A.
+                 Y. Ng",
+  booktitle =    "ICML 2007",
+  title =        "Self-taught learning: transfer learning from unlabeled
+                 data",
+  year =         "2007",
+}
+
+@inproceedings{RainaICML09,
+  author = {Raina, Rajat and Madhavan, Anand and Ng, Andrew Y.},
+  title = {Large-scale deep unsupervised learning using graphics processors},
+  booktitle = ICML09,
+  editor =  ICML09ed,
+  publisher = ICML09publ,
+  year = {2009},
+  isbn = {978-1-60558-516-1},
+  pages = {873--880},
+  location = {Montreal, Quebec, Canada},
+  doi = {http://doi.acm.org/10.1145/1553374.1553486},
+  address = {New York, NY, USA},
+}
+
+@InProceedings{Ramanujam88,
+  author =       "J. Ramanujam and P. Sadayappan",
+  booktitle =    icnn,
+  title =        "Optimization by Neural Networks",
+  volume =       "2",
+  publisher =    "IEEE, New York",
+  address =      "San Diego 1988",
+  pages =        "325--332",
+  year =         "1988",
+}
+
+@InProceedings{ranzato-07,
+  author =       "{Marc'Aurelio} Ranzato and Christopher Poultney and
+                 Sumit Chopra and Yann {LeCun}",
+  editor =       NIPS19ed,
+  booktitle =    NIPS19,
+  title =        "Efficient Learning of Sparse Representations with an
+                 Energy-Based Model",
+  publisher =    "MIT Press",
+  pages = {1137--1144},
+  year =         "2007",
+}
+
+@InProceedings{ranzato-07-small,
+  author =       "M. Ranzato and C. Poultney and
+                 S. Chopra and Y. {LeCun}",
+  booktitle =    "NIPS'06",
+  title =        "Efficient Learning of Sparse Representations with an
+                 Energy-Based Model",
+  year =         "2007",
+}
+
+@InProceedings{ranzato-07-short,
+  author =       "M. Ranzato and C. Poultney and
+                 S. Chopra and Y. {LeCun}",
+  booktitle =    "Adv. Neural Inf. Proc. Sys. 19",
+  title =        "Efficient Learning of Sparse Representations with an
+                 Energy-Based Model",
+  pages = {1137--1144},
+  year =         "2007",
+}
+
+# Please do NOT use this citation as it is a duplicate of ranzato-07
+@InCollection{ranzato-06,
+  author =       "{Marc'Aurelio} Ranzato and Christopher Poultney and
+                 Sumit Chopra and Yann {LeCun}",
+  editor =       NIPS19ed,
+  booktitle =    NIPS19,
+  title =        "Efficient Learning of Sparse Representations with an
+                 Energy-Based Model",
+  publisher =    "{MIT} Press",
+  pages =        "",
+  year =         "2007",
+}
+
+# Please do NOT use this citation as it is a duplicate of ranzato-07-small
+@InCollection{ranzato-06-small,
+  author =       "M. Ranzato and C. Poultney and
+                 S. Chopra and Y. {LeCun}",
+  booktitle =    "NIPS 19",
+  title =        "Efficient Learning of Sparse Representations with an
+                 Energy-Based Model",
+  year =         "2007",
+}
+
+
+@InProceedings{ranzato-08,
+  author =       "{Marc'Aurelio} Ranzato and Y-Lan Boureau and Yann
+                 {LeCun}",
+  editor =       NIPS20ed,
+  booktitle =    NIPS20,
+  title =        "Sparse feature learning for deep belief networks",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  pages =        "1185--1192",
+  year =         "2008",
+}
+  %url =          "http://www.cs.nyu.edu/~ranzato/publications/ranzato-nips07.pdf",
+
+@InProceedings{ranzato-08-small,
+  author =       "M. Ranzato and Y. Boureau and Y. {LeCun}",
+  booktitle =    "NIPS 20",
+  title =        "Sparse feature learning for deep belief networks",
+  year =         "2008",
+}
+
+@InProceedings{ranzato-08-short,
+  author =       "M. Ranzato and Y. Boureau and Y. {LeCun}",
+  booktitle =    "Adv. Neural Inf. Proc. Sys. 20",
+  title =        "Sparse feature learning for deep belief networks",
+  year =         "2008",
+  pages = {1185--1192},
+}
+
+@InProceedings{ranzato-cvpr-07,
+  author =       "{Marc'Aurelio} Ranzato and {Fu-Jie} Huang and {Y-Lan}
+                 Boureau and Yann {LeCun}",
+  booktitle =    cvpr07,
+  title =        "Unsupervised Learning of Invariant Feature Hierarchies
+                 with Applications to Object Recognition",
+  publisher =    "IEEE Press",
+  year =         "2007",
+  original =     "orig/ranzato-cvpr-07.pdf",
+}
+
+@InProceedings{ranzato-cvpr-07-small,
+  author =       "{Marc'Aurelio} Ranzato and {Fu-Jie} Huang and {Y-Lan}
+                 Boureau and Yann {LeCun}",
+  booktitle =    "CVPR'07",
+  title =        "Unsupervised Learning of Invariant Feature Hierarchies
+                 with Applications to Object Recognition",
+  year =         "2007",
+  original =     "orig/ranzato-cvpr-07.pdf",
+}
+
+@InProceedings{Ranzato-icdar07,
+  author =       "{Marc'Aurelio} Ranzato and Yann {LeCun}",
+  booktitle =    ICDAR07,
+  title =        "A Sparse and Locally Shift Invariant Feature Extractor
+                 Applied to Document Images",
+  year =         "2007",
+  isbn =         {0-7695-2822-8},
+  pages =        {1213--1217},
+  publisher =    {IEEE Computer Society},
+  address =      {Washington, DC, USA},
+
+}
+
+@InProceedings{ranzato-unsup-07,
+  author =       "{Marc'Aurelio} Ranzato and {Y-Lan} Boureau and Sumit
+                 Chopra and Yann {LeCun}",
+  booktitle =    aistats07,
+  title =        "A Unified Energy-Based Framework for Unsupervised
+                 Learning",
+  publisher =    "Omnipress",
+  date =         "March 21-24, 2007",
+  address =      "San Juan, Porto Rico",
+  year =         "2007",
+}
+
+@InProceedings{Rao+Ruderman-99,
+  author =       "R. P. N. Rao and D. L. Ruderman",
+  editor =       NIPS11ed,
+  booktitle =    NIPS11,
+  title =        "Learning {Lie} Groups for Invariant Visual
+                 Perception",
+  publisher =    "MIT Press, Cambridge, MA",
+  pages =        "810--816",
+  year =         "1999",
+}
+
+@Book{Rao71,
+  author =       "C. R. Rao and S. K. Mitra",
+  title =        "Generalized Inverse of Matrices and Its Applications",
+  publisher =    "Wiley",
+  address =      "New York",
+  year =         "1971",
+}
+
+@Book{Rashevsky38,
+  author =       "N. Rashevsky",
+  title =        "Mathematical Biophysics",
+  publisher =    "University of Chicago Press",
+  address =      "Chicago",
+  year =         "1938",
+}
+
+@InProceedings{RasmussenC2000,
+  author =       "Carl Rasmussen",
+  editor =       NIPS12ed,
+  booktitle =    NIPS12,
+  title =        "The Infinite {G}aussian Mixture Model",
+  year =         "2000",
+}
+
+@Misc{Rasmussen2001,
+  author =       "Carl Edward Rasmussen",
+  title =        "Conjugate gradient for Matlab",
+  year =         "2001",
+  note =         "http://www.kyb.tuebingen.mpg.de/bs/people/carl/code/minimize/",
+}
+
+@Article{Ratnaparkhi99,
+  author =       "A. Ratnaparkhi",
+  title =        "Learning to parse natural language with maximum
+                 entropy models",
+  journal =      "Machine Learning",
+  volume =       "341",
+  number =       "2",
+  pages =        "151--176",
+  year =         "1999",
+}
+
+@Article{Rauch63,
+  author =       "H. E. Rauch",
+  title =        "Solutions to the linear smoothing problem",
+  journal =      "IEEE Transactions on Automatic Control",
+  volume =       "8",
+  pages =        "371--372",
+  year =         "1963",
+}
+
+@Article{Refenes-94,
+  author =       "A. N. Refenes",
+  title =        "Stock Performance Modeling Using Neural Networks: a
+                 Comparative Study with Regression Models",
+  journal =      "Neural Networks",
+  volume =       "7",
+  number =       "2",
+  pages =        "375--388",
+  year =         "1994",
+}
+
+@Article{regression-KB-78,
+  author =       "R. Koenker and G. Bassett Jr.",
+  title =        "Regression Quantiles",
+  journal =      "Econometrica",
+  volume =       "46",
+  number =       "1",
+  pages =        "33--50",
+  year =         "1978",
+}
+@inproceedings{reid:1989,
+    title = {Rapid Training of Higher-Order Neural Networks for Invariant Pattern
+        Recognition},
+    author = {Reid, M. B. and  Spirkovska, L. and  Ochoa, E  },
+    booktitle = ijcnn,
+    month   = {June},
+    year    = {1989},
+    address = {Washington, DC, USA},
+}
+
+@InCollection{Rescorla72,
+  author =       "R. A. Rescorla and A. R. Wagner",
+  editor =       "A. H. Black and W. F. Prokasy",
+  booktitle =    "Classical Conditioning II: Current Research and
+                 Theory",
+  title =        "A Theory of Pavlovian Conditioning: The Effectiveness
+                 of Reinforcement and Nonreinforcement",
+  publisher =    "Appleton-Century-Crofts",
+  address =      "New York",
+  pages =        "64--69",
+  year =         "1972",
+}
+
+@InProceedings{Resnik-2002,
+  author =       "Mona Diab and Philip Resnik",
+  booktitle =    "40th Annual Meeting of the {ACL}",
+  title =        "An unsupervised method for word sense tagging using
+                 parallel corpora",
+  year =         "2002",
+}
+
+@Article{Resnik-99,
+  author =       "Philip Resnik",
+  title =        "Semantic similarity in a taxonomy: an
+                 information-based measure and its application to
+                 problems of ambiguity in natural language",
+  journal =      "Journal of Artificial Intelligence Research",
+  volume =       "11",
+  pages =        "95--130",
+  year =         "1999",
+}
+
+@InProceedings{Resnik-99-web,
+  author =       "P. Resnik",
+  booktitle =    "37th Annual Meeting of the Association for
+                 Computational Linguistics (ACL'99)",
+  title =        "Mining the Web for Bilingual Text",
+  address =      "College Park, Maryland",
+  month =        jun,
+  year =         "1999",
+}
+
+@article{Rhodes-2008,
+ author = {Paul Rhodes},
+ title = {Recoding Patterns of Sensory Input: Higher-Order Features and the Function of Nonlinear Dendritic Trees},
+ journal = {Neural Computation},
+ volume = 20,
+ number=8,
+ pages = {2000--2036},
+ year = 2008,
+}
+
+@Article{RicLip91,
+  author =       "Michael D. Richard and Richard P. Lippmann",
+  title =        "Neural Network Classifiers Estimate {Bayesian}
+                 a-posteriori Probabilities",
+  journal =      "Neural Computation",
+  volume =       "3",
+  pages =        "461--483",
+  year =         "1991",
+  abstract =     "Theoretical argumentation under which circumstances
+                 nets can estimate correctly and what this means for
+                 network engineering methodology. Experimental
+                 evaluations with different cost functions (mean squared
+                 error, cross entropy, normalized likelihood) and
+                 network types (multi layer perceptron, radial basis
+                 function, high order polynomial) show how accuracy
+                 degrades with insufficient data or inadequate network
+                 size. Dicusses practical consequences. Contains
+                 references to work on other cost functions (e.g.
+                 information measures)",
+  class =        "nn, learning, theory",
+}
+
+@InProceedings{Ricotti88,
+  author =       "L. P. Ricotti and S. Ragazzini and G. Martinelli",
+  booktitle =    icnn,
+  title =        "Learning of Word Stress in a Sub-Optimal Second Order
+                 Back-Propagation Neural Network",
+  volume =       "1",
+  publisher =    "IEEE, New York",
+  address =      "San Diego 1988",
+  pages =        "355--361",
+  year =         "1988",
+}
+
+@Article{Riedel88,
+  author =       "U. Riedel and R. K{\"u}hn and J. L. van Hemmen",
+  title =        "Temporal Sequences and Chaos in Neural Nets",
+  journal =      prA,
+  volume =       "38",
+  pages =        "1105--1108",
+  year =         "1988",
+}
+
+@Article{Riis96,
+  author =       "S. K. Riis and A. Krogh",
+  title =        "Improving prediction of protein secondary structure
+                 using structured neural networks and multiple sequence
+                 alignments",
+  journal =      "J. Comput. Biol.",
+  volume =       "3",
+  pages =        "163--183",
+  year =         "1996",
+}
+
+@Article{RiisKrogh1996,
+  author =       "S. Riis and A. Krogh",
+  title =        "Improving protein secondary structure prediction using
+                 structured neural networks and multiple sequence
+                 profiles",
+  journal =      "Journal of Computational Biology",
+  pages =        "163--183",
+  year =         "1996",
+}
+
+@TechReport{Riley94,
+  author =       "M. D. Riley and F. C. N. Pereira",
+  title =        "Weighted-finite-automata tools with applications to
+                 speech and language processing",
+  number =       "Technical Memorandum 11222-931130-28TM",
+  institution =  "AT\&T Bell Laboratories",
+  year =         "1994",
+}
+
+@article{Rissanen79, 
+ author = {J.J. Rissanen and G.G. Langdon Jr.},
+ title = {Arithmetic coding},
+ journal = {IBM Journal of Research and Development},
+ volume = 23, 
+ number = 2,
+ pages = {149--162},
+ year = 1979,
+}
+
+@Article{rissanen83,
+  author =       "J.J. Rissanen",
+  title =        "A universal data compression system",
+  journal =      "IEEE Transactions on Information Theory",
+  volume =       "29",
+  pages =        "656--664",
+  year =         "1983",
+}
+
+@Article{Rissanen86,
+  author =       "J. Rissanen",
+  title =        "Stochastic complexity and modeling",
+  journal =      "Annals of Statistics",
+  volume =       "14",
+  pages =        "1080--1100",
+  year =         "1986",
+}
+
+@Book{RissanenBook,
+  author =       "J. Rissanen",
+  title =        "Stochastic Complexity in Statistical Inquiry",
+  publisher =    "World Scientific",
+  address =      "Singapore",
+  year =         "1990",
+}
+
+@Article{Ritter86,
+  author =       "H. Ritter and K. Schulten",
+  title =        "On the Stationary State of Kohonen's Self-Organizing
+                 Sensory Mapping",
+  journal =      biocyb,
+  volume =       "54",
+  pages =        "99--106",
+  year =         "1986",
+}
+
+@InProceedings{Ritter88a,
+  author =       "H. Ritter and K. Schulten",
+  editor =       "R. Eckmiller and Ch. von der Malsburg",
+  booktitle =    "Neural Computers",
+  title =        "Extending Kohonen's Self-Organizing Mapping Algorithm
+                 to Learn Ballistic Movements",
+  publisher =    "Springer-Verlag, Berlin",
+  address =      "Neuss 1987",
+  pages =        "393--406",
+  year =         "1988",
+}
+
+@Article{Ritter88b,
+  author =       "H. Ritter and K. Schulten",
+  title =        "Convergence Properties of Kohonen's Topology
+                 Conserving Maps: Fluctuations, Stability, and Dimension
+                 Selection",
+  journal =      biocyb,
+  volume =       "60",
+  pages =        "59--71",
+  year =         "1988",
+}
+
+@InProceedings{Ritter88c,
+  author =       "H. Ritter and K. Schulten",
+  booktitle =    icnn,
+  title =        "Kohonen's Self-Organizing Maps: Exploring Their
+                 Computational Capabilities",
+  volume =       "1",
+  publisher =    "IEEE, New York",
+  address =      "San Diego 1988",
+  pages =        "109--116",
+  year =         "1988",
+}
+
+@Book{Robert-1999,
+  author =       "Christian P. Robert and George Casella",
+  title =        "Monte Carlo Statistical Methods",
+  publisher =    "Springer",
+  year =         "1999",
+}
+
+@TechReport{Robinson+Fallside90,
+  author =       "A. J. Robinson and F. Fallside",
+  key =          "Robinson",
+  title =        "Phoneme recognition from the {TIMIT} database using
+                 recurrent error propagation networks",
+  type =         "Technical Report",
+  number =       "{CUED/F-INFENG/TR.42}",
+  institution =  "Cambridge University Engineering Department",
+  year =         "1990",
+}
+
+@Article{Robinson+Fallside91,
+  author =       "A. J. Robinson and F. Fallside",
+  title =        "A recurrent error propagation network speech
+                 recognition system",
+  journal =      "Computer Speech and Language",
+  volume =       "5",
+  number =       "3",
+  pages =        "259--274",
+  month =        jul,
+  year =         "1991",
+}
+
+@InProceedings{Robinson88,
+  author =       "A. J. Robinson and F. Fallside",
+  editor =       nips87ed,
+  booktitle =    nips87,
+  title =        "Static and Dynamic Error Propagation Networks with
+                 Application to Speech Coding",
+  publisher =    "American Institute of Physics, New York",
+  address =      "Denver, CO",
+  pages =        "632--641",
+  year =         "1988",
+}
+
+@Article{Robinson91,
+  author =       "T. Robinson and F. Fallside",
+  title =        "Recurrent Error Propagation Network Speech Recognition
+                 System",
+  journal =      cspla,
+  volume =       "5",
+  number =       "3",
+  pages =        "259--274",
+  month =        jul,
+  year =         "1991",
+}
+
+@InProceedings{Robinson92-icassp,
+  author =       "T. Robinson",
+  booktitle =    icassp,
+  title =        "A Real-Time Recurrent Error Propagation Network Word
+                 Recognition System",
+  volume =       "I",
+  pages =        "617--620",
+  year =         "1992",
+}
+
+@Article{robust-H-73,
+  author =       "P. J. Huber",
+  title =        "Robust regression: Asymptotics, Conjectures and
+                 {Monte} {Carlo}",
+  journal =      "Ann. Stat.",
+  volume =       "1",
+  pages =        "799--821",
+  year =         "1973",
+}
+
+@Book{robust-H-82,
+  author =       "P. J. Huber",
+  title =        "Robust Statistics",
+  publisher =    "John Wiley \& Sons Inc.",
+  year =         "1982",
+}
+
+@Book{robust-HRRS-86,
+  author =       "F. R. Hampel and E. M. Ronchetti and P. J. Rousseeuw
+                 and W. A. Stahel",
+  title =        "Robust Statistics, The Approach based on Influence
+                 Functions",
+  publisher =    "John Wiley \& Sons",
+  year =         "1986",
+}
+
+@TechReport{robust-RAD-00,
+  author =       "P. J. Rousseeuw and S. V. Aelst and K. V. Driessen",
+  title =        "Robust Multivariate Regression",
+  institution =  "University of Antwerp",
+  year =         "2000",
+}
+
+@Book{robust-RL-87,
+  author =       "P. J. Rousseeuw and A. M. Leroy",
+  title =        "Robust Regression and Outlier Detection",
+  publisher =    "John Wiley \& Sons Inc.",
+  year =         "1987",
+}
+
+@InProceedings{Rohwer-nips90,
+  author =       "R. Rohwer",
+  editor =       NIPS2ed,
+  booktitle =    NIPS2,
+  title =        "The `Moving Targets' Training Algorithm",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo, CA",
+  pages =        "558--565",
+  year =         "1990",
+}
+
+@InProceedings{Rohwer87,
+  author =       "R. Rohwer and B. Forrest",
+  editor =       "M. Caudill and C. Butler",
+  booktitle =    icnn,
+  title =        "Training Time-Dependence in Neural Networks",
+  volume =       "2",
+  publisher =    "IEEE, New York",
+  address =      "San Diego 1987",
+  pages =        "701--708",
+  year =         "1987",
+}
+
+@InProceedings{Rohwer90,
+  author =       "R. Rohwer",
+  editor =       NIPS2ed,
+  booktitle =    NIPS2,
+  title =        "The ``Moving Targets'' Training Algorithm",
+  publisher =    "Morgan Kaufmann, San Mateo",
+  address =      "Denver, CO",
+  pages =        "558--565",
+  year =         "1990",
+}
+
+@article{Rohde+Plaut-99,
+ author = {D.L.T. Rohde and D.C. Plaut},
+ title = {Language acquisition in the absence of explicit negative evidence: {H}ow important is starting small?},
+ journal = {Cognition}, 
+ volume = 72,
+ pages = {67--109},
+ year = 1999
+}
+
+@PhdThesis{Romeo89,
+  author =       "F. I. Romeo",
+  title =        "Simulated Annealing: Theory and Applications to Layout
+                 Problems",
+  school =       "University of California at Berkeley",
+  year =         "1989",
+  note =         "Memorandum UCB/ERL--M89/29",
+}
+
+@InProceedings{Romer+Frey2003,
+  author =       "R. Rosales and B. Frey",
+  booktitle =    UAI03,
+  title =        "Learning Generative Models of Affinity Matrices",
+  publisher =    "Morgan Kaufmann Publishers",
+  address =      "San Francisco, CA",
+  pages =        "485--492",
+  year =         "2003",
+}
+
+@InProceedings{Ron94,
+  author =       "D. Ron and Y. Singer and N. Tishby",
+  editor =       NIPS6ed,
+  booktitle =    NIPS6,
+  title =        "The power of amnesia",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo, CA",
+  pages =        "176--183",
+  year =         "1994",
+}
+
+@Article{Ron96,
+  author =       "D. Ron and Y. Singer and N. Tishby",
+  title =        "The power of amnesia: Learning Probabilistic Automata
+                 with Variable Memory Length",
+  journal =      "Machine Learning",
+  volume =       "25",
+  year =         "1996",
+}
+
+@Article{Ron98,
+  author =       "Naftali Tishby {Dana Ron, Yoram Singer}",
+  title =        "On the Learnability and Usage of Acyclic Probabilistic
+                 Finite Automata",
+  journal =      "Journal of Computer and System Sciences",
+  volume =       "56",
+  number =       "2",
+  pages =        "133--152",
+  year =         "1998",
+}
+
+@InProceedings{Roscheisen-nips92,
+  author =       "M. Rvscheisen and R. Hofman and V. Tresp",
+  editor =       NIPS4ed,
+  booktitle =    NIPS4,
+  title =        "Neural Control for Rolling Mills: Incorporating Domain
+                 Theories to Overcome Data Deficiency",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo, CA",
+  pages =        "659--666",
+  year =         "1992",
+}
+
+@Book{Rose85,
+  editor =       "D. Rose and V. G. Dobson",
+  title =        "Models of the Visual Cortex",
+  publisher =    "Wiley",
+  address =      "Chichester",
+  year =         "1985",
+}
+
+@Book{Rosenberg-1997,
+  author =       "S. Rosenberg",
+  title =        "The Laplacian on a Riemannian Manifold",
+  publisher =    "Cambridge University Press",
+  address =      "Cambridge, UK",
+  year =         "1997",
+}
+
+@InCollection{Rosenberg88,
+  author =       "C. R. Rosenberg and G. Blelloch",
+  editor =       "D. Waltz and J. Feldman",
+  booktitle =    "Connectionist Models and their Implications",
+  title =        "An Implementation of Network Learning on the
+                 Connection Machine",
+  publisher =    "Ablex Pub. Corp",
+  address =      "Norwood, NJ",
+  year =         "1988",
+}
+
+@TechReport{Rosenblatt57,
+  author =       "Frank Rosenblatt",
+  title =        "The Perceptron --- a perceiving and recognizing
+                 automaton",
+  number =       "85-460-1",
+  institution =  "Cornell Aeronautical Laboratory",
+  address =      "Ithaca, N.Y.",
+  year =         "1957",
+}
+
+@article{Rosenblatt-1958,
+    author = {Frank Rosenblatt},
+    title = {The perceptron: A probabilistic model for information storage and organization in the brain},
+    journal = {Psychological Review},
+    year = {1958},
+    volume = {65},
+    pages = {386–408},
+}
+
+@Book{Rosenblatt62,
+  author =       "Frank Rosenblatt",
+  title =        "Principles of Neurodynamics",
+  publisher =    "Spartan",
+  address =      "New York",
+  year =         "1962",
+}
+
+@Article{rosenfeld02whole,
+  author =       "Ronald Rosenfeld and Stanley F. Chen and Xiaojin Zhu",
+  title =        "Whole-Sentence Exponential Language Models: {A}
+                 Vehicle For Linguistic-Statistical Integration",
+  journal =      CSL,
+  volume =       "15",
+  number =       "1",
+  year =         "2001",
+  URL =          "citeseer.nj.nec.com/448532.html",
+}
+
+@Article{Rosenfeld2000,
+  author =       "Ronald Rosenfeld",
+  title =        "Two decades of Statistical Language Modeling: Where Do
+                 We Go From Here?",
+  journal =      "Proceedings of the {IEEE}",
+  volume =       "88",
+  number =       "8",
+  pages =        "1270--1278",
+  year =         "2000",
+}
+
+@InProceedings{Rosipal2003,
+  author =       "R. Rosipal and L. J. Trejo and B. Matthews",
+  booktitle =    ICML03,
+  editor =       ICML03ed,
+  publisher =    ICML03publ,
+  title =        "Kernel {PLS}-{SVC} for Linear and Nonlinear
+                 Classification",
+  year =         "2003",
+}
+
+@PhdThesis{Rossen89,
+  author =       "M. L. Rossen",
+  title =        "Speech Syllable Recognition with a Neural Network",
+  school =       "Brown University",
+  year =         "1989",
+}
+
+@Article{Rost93,
+  author =       "B. Rost and C. Sander",
+  title =        "Improved prediction of protein secondary structure by
+                 use of sequence profiles and neural networks",
+  journal =      "Proc. Nat. Ac. Sci. USA",
+  volume =       "90",
+  pages =        "7558--7562",
+  year =         "1993",
+}
+
+@Article{Rost94,
+  author =       "B. Rost and C. Sander",
+  title =        "Combining evolutionary information and neural networks
+                 to predict protein secondary structure",
+  journal =      "Proteins",
+  volume =       "19",
+  pages =        "55--72",
+  year =         "1994",
+}
+
+@InProceedings{RothBlack2005,
+  author =       "Stefan Roth and Michael J. Black",
+  booktitle =    cvpr05,
+  title =        "Fields of Experts: a framework for learning image
+                 priors",
+  volume =       "2",
+  number =       "",
+  pages =        "860--867",
+  year =         "2005",
+}
+
+@InProceedings{Roweis+Saul+Hinton-2002,
+  author =       "S. Roweis and L. Saul and G. Hinton",
+  editor =       NIPS14ed,
+  booktitle =    NIPS14,
+  title =        "Global coordination of local linear models",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  year =         "2002",
+}
+
+% DEPRECATED, USE THE ONE BELOW
+@Article{roweis00lle,
+  author =       "Sam Roweis and Lawrence K. Saul",
+  title =        "Nonlinear dimensionality reduction by locally linear
+                 embedding",
+  journal =      "Science",
+  volume =       "290",
+  number =       "5500",
+  pages =        "2323--2326",
+  month =        dec,
+  year =         "2000",
+}
+
+@Article{Roweis2000-lle,
+  author =       "Sam Roweis and Lawrence K. Saul",
+  title =        "Nonlinear dimensionality reduction by locally linear
+                 embedding",
+  journal =      "Science",
+  volume =       "290",
+  number =       "5500",
+  pages =        "2323--2326",
+  month =        dec,
+  year =         "2000",
+}
+
+@TechReport{roweis97unifying,
+  author =       "Sam Roweis and Zoubin Ghahramani",
+  title =        "A Unifying Review of Linear {G}aussian Models",
+  address =      "6 King's College Road, Toronto M5S 3H5, Canada",
+  year =         "1997",
+  URL =          "citeseer.nj.nec.com/article/roweis97unifying.html",
+}
+
+@InProceedings{roweis98em,
+  author =       "Sam Roweis",
+  editor =       NIPS10ed,
+  booktitle =    NIPS10,
+  title =        "{EM} Algorithms for {PCA} and {SPCA}",
+  volume =       "10",
+  publisher =    "{MIT} Press",
+  year =         "1998",
+  URL =          "citeseer.nj.nec.com/roweis98em.html",
+}
+
+@InProceedings{RoweisNCA2005,
+  author =       "Jacob Goldberger and Sam Roweis and Geoffrey E. Hinton and Ruslan
+                 Salakhutdinov",
+  editor =       NIPS17ed,
+  booktitle =    NIPS17,
+  title =        "Neighbourhood Components Analysis",
+  publisher =    "{MIT} Press",
+  year =         "2005",
+}
+
+@Book{Rubinstein1981,
+  author =       "Reuven Y. Rubinstein",
+  title =        "Simulation and the Monte Carlo Method",
+  publisher =    "John Wiley \& Sons",
+  year =         "1981",
+}
+
+@Article{Rubner89,
+  author =       "J. Rubner and P. Tavan",
+  title =        "A Self-Organizing Network for Principal-Component
+                 Analysis",
+  journal =      eul,
+  volume =       "10",
+  pages =        "693--698",
+  year =         "1989",
+}
+
+
+@Article{Rubner90,
+  author =       "J. Rubner and K. Schulten",
+  title =        "Development of Feature Detectors by
+                 Self-Organization",
+  journal =      biocyb,
+  volume =       "62",
+  pages =        "193--199",
+  year =         "1990",
+}
+
+@Article{Rumelhart85,
+  author =       "D. E. Rumelhart and D. Zipser",
+  title =        "Feature Discovery by Competitive Learning",
+  journal =      cogsci,
+  volume =       "9",
+  pages =        "75--112",
+  year =         "1985",
+  note =         "Reprinted in \cite[chapter 5]{Rumelhart86a}",
+}
+
+@Book{Rumelhart86a,
+  author =       "D. E. Rumelhart and J. L. McClelland and the PDP
+                 Research Group",
+  title =        "Parallel Distributed Processing: Explorations in the
+                 Microstructure of Cognition",
+  volume =       "1",
+  publisher =    "MIT Press",
+  address =      "Cambridge",
+  year =         "1986",
+}
+
+@Article{Rumelhart86b,
+  author =       "David E. Rumelhart and Geoffrey E. Hinton and Ronald J. Williams",
+  title =        "Learning Representations by Back-Propagating Errors",
+  journal =      "Nature",
+  volume =       "323",
+  pages =        "533--536",
+  year =         "1986",
+}
+
+@InCollection{Rumelhart86c,
+  author =       "D. E. Rumelhart and G. E. Hinton and R. J. Williams",
+  editor =       "D. E. Rumelhart and J. L. McClelland",
+  booktitle =    pdp,
+  title =        "Learning Internal Representations by Error
+                 Propagation",
+  chapter =      "8",
+  volume =       "1",
+  publisher =    "MIT Press",
+  address =      "Cambridge",
+  pages =        "318--362",
+  year =         "1986",
+}
+
+@InProceedings{Russ+Geoff-nips-2007,
+  author =       "Ruslan Salakhutdinov and Geoffrey E. Hinton",
+  editor =       NIPS20ed,
+  booktitle =    NIPS20,
+  title =        "Using Deep Belief Nets to Learn Covariance Kernels for
+                 {Gaussian} Processes",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  pages =        "1249--1256",
+  year =         "2008",
+}
+  %url =          "http://www.csri.utoronto.ca/~hinton/absps/dbngp.pdf",
+
+@InProceedings{Russ+Geoff-nips-2007-small,
+  author =       "Ruslan Salakhutdinov and Geoffrey E. Hinton",
+  booktitle =    "NIPS 20",
+  title =        "Using {D}eep {B}elief {N}ets to Learn Covariance Kernels for
+                 {G}aussian Processes",
+  year =         "2008",
+}
+
+@InProceedings{Russ+Geoff-nips-2007-short,
+  author =       "R. Salakhutdinov and G.E. Hinton",
+  booktitle =    "Adv. Neural Inf. Proc. Sys. 20",
+  title =        "Using {D}eep {B}elief {N}ets to Learn Covariance Kernels for
+                 {G}aussian Processes",
+  pages = {1249--1256},
+  year =         "2008",
+}
+
+@article{rust:2005,
+    author      = {Nicole Rust and Odelia Schwartz and J. Anthony Movshon and Eero Simoncelli},
+    title       = {Spatiotemporal Elements of Macaque {V1} Receptive Fields},
+    journal     = {Neuron},
+    volume      = {46},
+    number      = {6},
+    pages       = {945-956},
+    year        = {2005}
+}
+@article{rust:2006,
+    author = {Nicole C. Rust and Valerio Mante and Eero P. Simoncelli and J.
+        Anthony Movshon},
+    year = {2006},
+    title = {How MT Cells Analyze the Motion of Visual Patterns},
+    journal = {Nature Neuroscience},
+    volume = {9},
+    number = {11},
+    pages = {1421-1431},
+}
+
+@Article{RYsed98,
+  author =       "Eric Sven Ristad and Peter N. Yianilos",
+  title =        "Learning String Edit Distance",
+  journal =      "IEEE Transactions on Pattern Recognition and Machine
+                 Intelligence",
+  month =        may,
+  year =         "1998",
+}
+
+@Book{Saad-1996,
+  author =       "Y. Saad",
+  title =        "Iterative Methods for Sparse Linear Systems",
+  publisher =    "{PWS} Publishing Company",
+  address =      "Boston, MA",
+  year =         "1996",
+}
+
+@TechReport{Saad90a,
+  author =       "D. Saad and E. Marom",
+  title =        "Learning by Choice of Internal Representations --- An
+                 Energy Minimization Approach",
+  type =         "Preprint",
+  institution =  "Faculty of Engineering, Tel Aviv University",
+  address =      "Ramat-Aviv, Israel",
+  year =         "1990",
+}
+
+@TechReport{Saad90b,
+  author =       "D. Saad and E. Marom",
+  title =        "Training Feed Forward Nets with Binary Weights via a
+                 Modified {CHIR} Algorithm",
+  type =         "Preprint",
+  institution =  "Faculty of Engineering, Tel Aviv University",
+  address =      "Ramat-Aviv, Israel",
+  year =         "1990",
+}
+
+@Book{SaadOnlineLearning1999,
+  editor =       "David Saad",
+  title =        "On-Line Learning in Neural Networks",
+  publisher =    "Cambridge University Press",
+  year =         "1999",
+}
+
+@Article{Sachs+Young80,
+  author =       "M. B. Sachs and E. D. Young",
+  title =        "Effects of nonlinearities on speech encoding in the
+                 auditory nerve",
+  journal =      jasa,
+  volume =       "68",
+  number =       "3",
+  pages =        "858--875",
+  year =         "1980",
+}
+
+@Article{Sakoe78,
+  author =       "H. Sakoe and C. Chiba",
+  title =        "Dynamic Programming Algorithm Optimization for Spoken
+                 Word Recognition",
+  journal =      ieeetassp,
+  volume =       "26",
+  number =       "1",
+  pages =        "43--49",
+  month =        feb,
+  year =         "1978",
+}
+
+@InProceedings{Salakhutdinov-2010,
+    author = {Ruslan Salakhutdinov},
+     title = {Learning in {M}arkov Random Fields using Tempered Transitions},
+      year = {2010},
+  crossref = {NIPS22}
+}
+
+@InProceedings{Salakhutdinov+Hinton2007,
+  author =       "Ruslan Salakhutdinov and Geoffrey E. Hinton",
+  booktitle =    "Proceedings of the 2007 Workshop on Information
+                 Retrieval and applications of Graphical Models (SIGIR
+                 2007)",
+  title =        "Semantic Hashing",
+  year =         "2007",
+  publisher  =   "Elsevier",
+  address = {Amsterdam},
+}
+
+@InProceedings{Salakhutdinov+Hinton2007-small,
+  author =       "Ruslan Salakhutdinov and Geoffrey E. Hinton",
+  booktitle =    "SIGIR",
+  title =        "Semantic Hashing",
+  year =         "2007",
+}
+
+@InProceedings{SalakhutdinovR2007,
+  author =       "Ruslan Salakhutdinov and Geoffrey E. Hinton",
+  booktitle =    aistats07,
+  title =        "Learning a Nonlinear Embedding by Preserving Class
+                 Neighbourhood Structure",
+  publisher =    "Omnipress",
+  date =         "March 21-24, 2007",
+  address =      "San Juan, Porto Rico",
+  year =         "2007",
+}
+
+@InProceedings{SalakhutdinovR2007-small,
+  author =       "Ruslan Salakhutdinov and Geoffrey E. Hinton",
+  booktitle =    aistats07-small,
+  title =        "Learning a Nonlinear Embedding by Preserving Class
+                 Neighbourhood Structure",
+  year =         "2007",
+}
+
+@InProceedings{SalakhutdinovR2007-short,
+  author =       "R. Salakhutdinov and G.E. Hinton",
+  booktitle =    {AI \& Stat.'2007},
+  title =        "Learning a Nonlinear Embedding by Preserving Class
+                 Neighbourhood Structure",
+  year =         "2007",
+}
+
+@InProceedings{SalakhutdinovR2007b,
+  author =       "Ruslan Salakhutdinov and Andriy Mnih and Geoffrey E.
+                 Hinton",
+  booktitle =    ICML07,
+  editor =       ICML07ed,
+  publisher =    ICML07publ,
+  title =        "Restricted {Boltzmann} machines for collaborative
+                 filtering",
+  address =      "New York, NY, USA",
+  pages =        "791--798",
+  year =         "2007",
+  location =     "Corvalis, Oregon",
+}
+
+@InProceedings{SalakhutdinovR2007b-small,
+  author =       "Ruslan Salakhutdinov and Andriy Mnih and Geoffrey E. Hinton",
+  booktitle =    "ICML 2007",
+  title =        "Restricted {Boltzmann} machines for collaborative
+                 filtering",
+  year =         "2007",
+}
+
+@InProceedings{SalakhutdinovR2007b-short,
+  author =       "R. Salakhutdinov and A. Mnih and G.E. Hinton",
+  booktitle =    "Int. Conf. Mach. Learn. 2007",
+  title =        "Restricted {Boltzmann} machines for collaborative
+                 filtering",
+  pages =        "791--798",
+  year =         "2007",
+}
+
+
+@InProceedings{Salakhutdinov+Murray-2008,
+    title =     "On the Quantitative Analysis of Deep Belief Networks",
+    author =    "Ruslan Salakhutdinov and Iain Murray",
+    booktitle = ICML08,
+    editor =    ICML08ed,
+    publisher = ICML08publ,
+    pages =     "872--879",
+    year =      "2008",
+    volume =    "25",
+}
+
+@InProceedings{Salakhutdinov+Hinton-2009,
+  author =       "Ruslan Salakhutdinov and Geoffrey E. Hinton",
+  booktitle =    aistats09,
+  title =        "Deep {Boltzmann} Machines",
+  year =         "2009",
+  volume =       5,
+  location =     "Clearwater (Florida), USA",
+  date =         "April 16-18, 2009",
+  pages =        "448--455",
+}
+
+@Article{Salamon88,
+  author =       "P. Salamon and J. D. Nulton and J. Robinson and J.
+                 Petersen and G. Ruppeiner and L. Liao",
+  title =        "Simulated Annealing with Constant Thermodynamic
+                 Speed",
+  journal =      cpc,
+  volume =       "49",
+  pages =        "423--428",
+  year =         "1988",
+}
+
+@Article{Salton+Buckley88,
+  author =       "G. Salton and C. Buckley",
+  title =        "Term weighting approaches in automatic text
+                 retrieval",
+  journal =      "Information Processing and Management",
+  volume =       "24",
+  number =       "5",
+  pages =        "513--523",
+  year =         "1988",
+}
+
+@Article{Sanger89a,
+  author =       "T. D. Sanger",
+  title =        "Optimal Unsupervised Learning in a Single-Layer Linear
+                 Feedforward Neural Network",
+  journal =      nn,
+  volume =       "2",
+  pages =        "459--473",
+  year =         "1989",
+}
+
+@InProceedings{Sanger89b,
+  author =       "T. D. Sanger",
+  editor =       NIPS1ed,
+  booktitle =    NIPS1,
+  title =        "An Optimality Principle for Unsupervised Learning",
+  publisher =    "Morgan Kaufmann, San Mateo",
+  address =      "Denver, CO",
+  pages =        "11--19",
+  year =         "1989",
+}
+
+@article{Sanger-1994,
+ author = {Terence D. Sanger},
+ title = {Neural network learning control of robot manipulators 
+      using gradually increasing task difficulty},
+ journal = {{IEEE} Transactions on Robotics and Automation},
+ volume = 10,
+ number = 3,
+ year = 1994,
+}
+
+@article{Sanger-1994-small,
+ author = {Terence D. Sanger},
+ title = {Neural network learning control of robot manipulators 
+      using gradually increasing task difficulty},
+ journal = {{IEEE} Trans. on Robotics and Automation},
+ volume = 10,
+ number = 3,
+ year = 1994,
+}
+
+@InProceedings{sarawagi03,
+  author =       "Sunita Sarawagi and Soumen Chakrabarti and Shantanu
+                 Godbole",
+  booktitle =    "KDD '03: Proceedings of the ninth ACM SIGKDD
+                 international conference on Knowledge discovery and
+                 data mining",
+  title =        "Cross-training: learning probabilistic mappings
+                 between topics",
+  publisher =    "ACM Press",
+  address =      "New York, NY, USA",
+  pages =        "177--186",
+  year =         "2003",
+  location =     "Washington, D.C.",
+}
+
+@article{Sarkar-Moore-2005,
+ author = {P. Sarkar and A. Moore},
+ title = {Dynamic social network analysis using latent space models},
+ journal = {{SIGKDD} Explorations},
+ volume = 7,
+ number = 2,
+ pages = {31--40},
+ year = 2005,
+}
+
+@Article{Sato90,
+  author =       "M. Sato",
+  title =        "A Real Time Learning Algorithm for Recurrent Analog
+                 Neural Networks",
+  journal =      biocyb,
+  volume =       "62",
+  pages =        "237--241",
+  year =         "1990",
+}
+
+@Article{Saul+96,
+  author =       "Lawrence K. Saul and Tommi Jaakkola and Michael I. Jordan",
+  title =        "Mean field theory for sigmoid belief networks",
+  journal =      "Journal of Artificial Intelligence Research",
+  volume =       "4",
+  pages =        "61--76",
+  year =         "1996",
+}
+
+@Article{Saul+Roweis-2002,
+  author =       "L. Saul and S. Roweis",
+  title =        "Think globally, fit locally: unsupervised learning of
+                 low dimensional manifolds",
+  journal =      jmlr,
+  volume =       "4",
+  number =       "",
+  pages =        "119--155",
+  month =        "",
+  year =         "2002",
+}
+
+@InProceedings{Saul95,
+  author =       "Lawrence K. Saul and Michael I. Jordan",
+  editor =       NIPS7ed,
+  booktitle =    NIPS7,
+  title =        {Boltzmann Chains and Hidden Markov Models},
+  publisher =    "MIT Press, Cambridge, MA",
+  pages =        "435--442",
+  year =         "1995",
+}
+
+@InProceedings{Saul96,
+  author =       "Lawrence K. Saul and Michael I. Jordan",
+  editor =       NIPS8ed,
+  booktitle =    NIPS8,
+  title =        "Exploiting tractable substructures in intractable
+                 networks",
+  publisher =    "MIT Press, Cambridge, MA",
+  year =         "1996",
+}
+
+@InProceedings{SaulJordan97,
+  author =       "Lawrence K. Saul and Michael I. Jordan",
+  editor =       NIPS9ed,
+  booktitle =    NIPS9,
+  title =        "A variational model for model-based interpolation",
+  publisher =    "MIT Press",
+  pages =        "375",
+  year =         "1997",
+}
+
+@Article{Saund-1989,
+  author =       "Eric Saund",
+  title =        "Dimensionality-reduction using connectionist
+                 networks",
+  journal =      "{IEEE} Transactions on Pattern Analysis and Machine
+                 Intelligence",
+  volume =       "11",
+  number =       "3",
+  pages =        "304--314",
+  year =         "1989",
+}
+
+@InCollection{Scalettar88,
+  author =       "R. Scalettar and A. Zee",
+  editor =       "D. Waltz and J. A. Feldman",
+  booktitle =    "Connectionist Models and Their Implications: Readings
+                 from Cognitive Science",
+  title =        "Emergence of Grandmother Memory in Feed Forward
+                 Networks: Learning with Noise and Forgetfulness",
+  publisher =    "Ablex",
+  address =      "Norwood",
+  pages =        "309--332",
+  year =         "1988",
+}
+
+@Article{schapire-90,
+  author =       "Robert E. Schapire",
+  title =        "The strength of weak learnability",
+  journal =      "Machine Learning",
+  volume =       "5",
+  number =       "2",
+  pages =        "197--227",
+  year =         "1990",
+}
+
+@Article{Schapire-margin98,
+  author =       "Robert E. Schapire and Yoav Freund and Peter Bartlett
+                 and Wee Sun Lee",
+  title =        "Boosting the margin: {A} new explanation for the
+                 effectiveness of voting methods",
+  journal =      "The Annals of Statistics",
+  volume =       "26",
+  number =       "5",
+  pages =        "1651--1686",
+  year =         "1998",
+}
+
+@InProceedings{schapire99theoretical,
+  author =       "Robert E. Schapire",
+  booktitle =    "Algorithmic Learning Theory, 10th International
+                 Conference, {ALT} '99, Tokyo, Japan, December 1999,
+                 Proceedings",
+  title =        "Theoretical Views of Boosting and Applications",
+  volume =       "1720",
+  publisher =    "Springer",
+  pages =        "13--25",
+  year =         "1999",
+  URL =          "http:citeseer.ist.psu.edu/article/schapire99theoretical.html",
+}
+
+@InProceedings{SchapireSinger98,
+  author =       "R. E. Schapire and Y. Singer",
+  booktitle =    "Proceedings of the 11th Annual Conference on
+                 Computational Learning Theory",
+  title =        "Improved Boosting Algorithms Using Confidence Rated
+                 Predictions",
+  year =         "1998",
+}
+
+@Book{SchBurSmo99,
+  author =       "B. {Sch\"olkopf} and C. J. C. Burges and A. J. Smola",
+  title =        "Advances in Kernel Methods --- Support Vector
+                 Learning",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  year =         "1999",
+}
+
+@InProceedings{ScheinA2001,
+  author =       "Andrew I. Schein and Alexandrin Popescul and Lyle H.
+                 Ungar and David M. Pennock",
+  booktitle =    "Workshop on Recommender Systems at SIGIR",
+  title =        "Generative Models for Cold-Start Recommendations",
+  year =         "2001",
+}
+
+@InProceedings{ScheinA2002,
+  author =       "Andrew I. Schein and Alexandrin Popescul and Lyle H.
+                 Ungar and David M. Pennock",
+  booktitle =    "SIGIR '02",
+  title =        "Methods and metrics for cold-start recommendations",
+  publisher =    "ACM Press",
+  address =      "New York, NY, USA",
+  pages =        "253--260",
+  year =         "2002",
+}
+
+@InCollection{Scheines94,
+  author =       "R. Scheines",
+  editor =       "P. Cheeseman and R. W. Oldford",
+  booktitle =    "Selecting Models from Data: Artificial Intelligence
+                 and Statistics {IV}",
+  title =        "Inferring causal structure among unmeasured
+                 variables",
+  publisher =    "Springer-Verlag",
+  pages =        "197--204",
+  year =         "1994",
+}
+
+@InProceedings{Schenkel93,
+  author =       "M. Schenkel and H. Weissman and I. Guyon and C. Nohl
+                 and D. Henderson",
+  editor =       NIPS5ed,
+  booktitle =    NIPS5,
+  title =        "Recognition-Based Segmentation of On-Line Hand-Printed
+                 Words",
+  address =      "Denver, CO",
+  pages =        "723--730",
+  year =         "1993",
+}
+
+@Article{schenkel95,
+  author =       "M. Schenkel and I. Guyon and D. Henderson",
+  title =        "On-line Cursive Script Recognition using Time Delay
+                 Neural Networks and Hidden {Markov} Models",
+  journal =      "{Machine} {Vision} and {Applications}",
+  publisher =    "Springer Verlag",
+  pages =        "215--223",
+  year =         "1995",
+}
+
+@InProceedings{SchGra03,
+  author =       "Nicol N. Schraudolph and Thore Graepel",
+  editor =       "Christopher M. Bishop and Brendan J. Frey",
+  booktitle =    "Proc.\ 9th Intl.\ Workshop Artificial Intelligence and
+                 Statistics (AIstats)",
+  title =        "Combining Conjugate Direction Methods with Stochastic
+                 Approximation of Gradients",
+  publisher =    "Society for Artificial Intelligence and Statistics",
+  address =      "Key West, Florida",
+  pages =        "7--13",
+  year =         "2003",
+  ISBN =         "0-9727358-0-1",
+  abstract =     "The method of conjugate directions provides a very
+                 effective way to optimize large, deterministic systems
+                 by gradient descent. In its standard form, however, it
+                 is not amenable to stochastic approximation of the
+                 gradient. Here we explore ideas from conjugate gradient
+                 in the stochastic (online) setting, using fast
+                 Hessian-gradient products to set up low-dimensional
+                 Krylov subspaces within individual mini-batches. In our
+                 benchmark experiments the resulting online learning
+                 algorithms converge orders of magnitude faster than
+                 ordinary stochastic gradient descent.",
+}
+
+@Article{Schmidhuber92,
+  author =       "J{\"u}rgen Schmidhuber",
+  title =        "Learning Complex, Extended Sequences using the
+                 Principle of History Compression",
+  journal =      nc,
+  volume =       "4",
+  number =       "2",
+  pages =        "234--242",
+  year =         "1992",
+}
+
+@Article{Schmidhuber96,
+  author =       "J{\"u}rgen Schmidhuber",
+  title =        "Sequential Neural Text Compression",
+  journal =      "IEEE Transactions on Neural Networks",
+  volume =       "7",
+  number =       "1",
+  pages =        "142--146",
+  year =         "1996",
+}
+
+@InCollection{Schmidt-2006,
+  author =       "Volker Schmidt",
+  booktitle =    "Lecture Notes, Summer 2006",
+  title =        {Markov Chains and Monte-Carlo Simulation},
+  address =      "Ulm University, Department of Stochastics",
+  year =         "2006",
+  URL =          "http://www.mathematik.uni-ulm.de/stochastik/lehre/ss06/markov/skript-engl/skript-engl.htm",
+}
+
+@Article{Schmitt-2002,
+  author =       "M. Schmitt",
+  title =        "Descartes' Rule of Signs for Radial Basis Function
+                 Neural Networks",
+  journal =      "Neural Computation",
+  volume =       "14",
+  number =       "12",
+  pages =        "2997--3011",
+  year =         "2002",
+}
+
+@Article{Schneider-2001,
+  author =       "Tapio Schneider",
+  title =        "Analysis of Incomplete Climate Data: Estimation of
+                 Mean Values and Covariance Matrices and Imputation of
+                 Missing Values",
+  journal =      "Journal of Climate",
+  volume =       "14",
+  pages =        "853--871",
+  year =         "2001",
+}
+
+@article{Schneidman+al-2003,
+    address = {Department of Molecular Biology, Princeton University, Princeton, New Jersey 08544, USA.},
+    author = {Schneidman, E.  and Bialek, W.  and Berry, M. J. },
+    issn = {1529-2401},
+    journal = {Journal of Neuroscience},
+    month = {December},
+    number = {37},
+    pages = {11539--11553},
+    title = {Synergy, redundancy, and independence in population codes},
+    url = {http://www.jneurosci.org/cgi/content/abstract/23/37/11539},
+    volume = {23},
+    year = {2003}
+}
+    
+
+@Article{schoelkopf97comparing,
+  author =       "B. Sch{\"o}lkopf and K. Sung and C. Burges and F.
+                 Girosi and P. Niyogi and T. Poggio and V. Vapnik",
+  title =        "Comparing support vector machines with {G}aussian
+                 kernels to radial basis function classifiers",
+  journal =      "IEEE Transactions on Signal Processing",
+  volume =       "45",
+  pages =        "2758--2765",
+  year =         "1997",
+  text =         "Sch{\"o}lkopf, B., Sung, K., Burges, C., Girosi, F.,
+                 Niyogi, P., Poggio, T., and Vapnik, V.: Comparing
+                 support vector machines with {G}aussian kernels to radial
+                 basis function classifiers. IEEE Transactions on Signal
+                 Processing, 45 (1997) 2758-2765.",
+}
+
+@Book{Scholkopf02-book,
+  author =       "B. Sch{\"o}lkopf and A. J. Smola",
+  title =        "Learning with Kernels: Support Vector Machines,
+                 Regularization, Optimization and Beyond",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  year =         "2002",
+}
+
+@TechReport{Scholkopf96,
+  author =       "B. Sch{\"o}lkopf and A. Smola and K.-R. M{\"u}ller",
+  title =        "Nonlinear Component Analysis as a Kernel Eigenvalue
+                 Problem",
+  number =       "44",
+  institution =  "Max Planck Institute for Biological Cybernetics,
+                 Tübingen, Germany",
+  year =         "1996",
+}
+
+@Article{Scholkopf98,
+  author =       "B. Sch{\"o}lkopf and A. Smola and K.-R. M{\"u}ller",
+  title =        "Nonlinear component analysis as a kernel eigenvalue
+                 problem",
+  journal =      "Neural Computation",
+  volume =       "10",
+  pages =        "1299--1319",
+  year =         "1998",
+}
+
+@Book{Scholkopf98-book,
+  author =       "B. Sch{\"o}lkopf and C. J. C. Burges and A. J. Smola",
+  title =        "Advances in kernel methods: support vector learning",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  year =         "1998",
+}
+
+@Article{Scholkopf99,
+  author =       "B. Sch{\"o}lkopf and S. Mika and C. Burges and P.
+                 Knirsch and K.-R. M{\"u}ller and G. R{\"a}tsch and A.
+                 Smola",
+  title =        "Input Space Versus Feature Space in Kernel-Based Methods",
+  journal =      "IEEE Trans. Neural Networks",
+  volume =       "10",
+  number =       "5",
+  pages =        "1000--1017",
+  year =         "1999",
+}
+
+@Article{Schraudolph02,
+  author =       "Nicol N. Schraudolph",
+  title =        "Fast Curvature Matrix-Vector Products for Second-Order
+                 Gradient Descent",
+  journal =      "Neural Computation",
+  volume =       "14",
+  number =       "7",
+  pages =        "1723--1738",
+  year =         "2002",
+}
+
+@InProceedings{Schraudolph99,
+  author =       "Nicol N. Schraudolph",
+  booktitle =    "Proceedings of the 9th International Conference on
+                 Artificial Neural Networks",
+  title =        "Local gain adaptation in stochastic gradient descent",
+  pages =        "569--574",
+  year =         "1999",
+}
+
+@InProceedings{Schutze92,
+  author =       "Hinrich Sch{\"u}tze",
+  booktitle =    "Supercomputing'92",
+  title =        "Dimensions of Meaning",
+  address =      "Minneapolis MN",
+  pages =        "787--796",
+  year =         "1992",
+}
+
+@InProceedings{Schutze93,
+  author =       "H. Schutze",
+  editor =       NIPS5ed,
+  booktitle =    NIPS5,
+  title =        "Word space",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo CA",
+  pages =        "895--902",
+  year =         "1993",
+}
+
+@Misc{Schuurmans1999,
+  author =       "Dale Schuurmans",
+  title =        "Greedy importance sampling: {A} new Monte Carlo
+                 inference method",
+  year =         "1999",
+  URL =          "citeseer.nj.nec.com/25013.html",
+}
+
+@InProceedings{Schuurmans2000,
+  author =       "Dale Schuurmans and Finnegan Southey",
+  title =        "Monte Carlo inference via greedy importance sampling",
+  pages =        "523--532",
+  year =         "2000",
+  URL =          "citeseer.nj.nec.com/281712.html",
+}
+
+@Article{Schuurmans2001,
+  author =       "D. Schuurmans and F. Southey",
+  title =        "Metric-based methods for adaptive model selection and
+                 regularization",
+  journal =      "Machine Learning",
+  volume =       "48",
+  number =       "1",
+  pages =        "51--84",
+  year =         "2002",
+}
+
+@InProceedings{Schuurmans97,
+  author =       "D. Schuurmans",
+  booktitle =    "Proceedings of the National Conference on Artificial
+                 Intelligence (AAAI-97)",
+  title =        "A new metric-based approach to model selection",
+  pages =        "552--558",
+  year =         "1997",
+}
+
+@Article{Schwartz90,
+  author =       "D. B. Schwartz and V. K. Samalam and S. A. Solla and
+                 J. S. Denker",
+  title =        "Exhaustive Learning",
+  journal =      nc,
+  volume =       "2",
+  pages =        "371--382",
+  year =         "1990",
+}
+
+@Article{Schwenk+Bengio00,
+  author =       "Holger Schwenk and Yoshua Bengio",
+  title =        "Boosting Neural Networks",
+  journal =      "Neural Computation",
+  volume =       "12",
+  number =       "8",
+  pages =        "1869--1887",
+  year =         "2000",
+}
+
+@InProceedings{Schwenk+Gauvain-2005,
+  author =       "Holger Schwenk and Jean-Luc Gauvain",
+  booktitle =    "Interspeech",
+  title =        "Building continuous space language models for
+                 transcribing European languages",
+  pages =        "737--740",
+  year =         "2005",
+}
+
+@InProceedings{Schwenk+Gauvain2002,
+  author =       "H. Schwenk and J-L. Gauvain",
+  booktitle =    icassp,
+  title =        "Connectionist Language Modeling for Large Vocabulary
+                 Continuous Speech Recognition",
+  address =      "Orlando, Florida",
+  pages =        "765--768",
+  year =         "2002",
+}
+
+@InProceedings{Schwenk+Gauvain2002-short,
+  author =       "H. Schwenk and J-L. Gauvain",
+  booktitle =    {Int. Conf. Acoust. Speech \& Sig. Proc.},
+  title =        "Connectionist Language Modeling for Large Vocabulary
+                 Continuous Speech Recognition",
+  address =      "Orlando, Florida",
+  pages =        "765--768",
+  year =         "2002",
+}
+
+@InProceedings{Schwenk05C,
+  author =       "Holger Schwenk and Jean-Luc Gauvain",
+  booktitle =    "Joint Human Language Technology Conference and
+                 Conference on Empirical Methods in Natural Language
+                 Processing (EMNLP)",
+  title =        "Training Neural Network Language Models On Very Large
+                 Corpora",
+  address =      "Vancouver",
+  pages =        "201--208",
+  month =        oct,
+  year =         "2005",
+  URL =          "ftp://tlp.limsi.fr/public/emnlp05.pdf",
+}
+
+@InProceedings{Schwenk05C-small,
+  author =       "Holger Schwenk and Jean-Luc Gauvain",
+  booktitle =    "EMNLP'2005",
+  title =        "Training Neural Network Language Models On Very Large
+                 Corpora",
+  pages =        "201--208",
+  year =         "2005",
+}
+
+@TechReport{Schwenk:2001:tr,
+  author =       "Holger Schwenk",
+  title =        "Language Modeling in the Continuous Domain",
+  number =       "2001-20",
+  institution =  "LIMSI-CNRS, Orsay, France",
+  year =         "2001",
+  date =         "dec 2001",
+}
+
+@InProceedings{Schwenk:2002:icassp,
+  author =       "Holger Schwenk and Jean-Luc Gauvain",
+  booktitle =    icassp,
+  title =        "Connectionist Language Modeling for Large Vocabulary
+                 Continuous Speech Recognition",
+  volume =       "1",
+  pages =        "765--768",
+  year =         "2002",
+}
+
+@InProceedings{Schwenk:2003:sspr,
+  author =       "Holger Schwenk and Jean-Luc Gauvain",
+  booktitle =    "ISCA \& IEEE Workshop on Spontaneous Speech Processing
+                 and Recognition",
+  title =        "{Using Continuous Space Language Models for
+                 Conversational Speech Recognition}",
+  address =      "Tokyo",
+  month =        apr,
+  year =         "2003",
+}
+
+@InProceedings{Schwenk:2004:icslp,
+  author =       "Holger Schwenk and Jean-Luc Gauvain",
+  booktitle =    icslp,
+  title =        "Using a Continuous Space Language Model for
+                 Conversational Speech Recognition",
+  year =         "2004",
+  note =         "submitted",
+}
+
+@InProceedings{Schwenk:2004:ijcnn,
+  author =       "Holger Schwenk",
+  booktitle =    ijcnn,
+  title =        "Efficient Training of Large Neural Networks for
+                 Language Modeling",
+  volume =       "4",
+  pages =        "3050--3064",
+  year =         "2004",
+}
+
+@InProceedings{SchYuGue07,
+  author =       "Nicol N. Schraudolph and Jin Yu and Simon G{\"u}nter",
+  booktitle =    "Proc.\ 11th Intl.\ Conf.\ Artificial Intelligence and
+                 Statistics (AIstats)",
+  title =        "A Stochastic Quasi-{Newton} Method for Online Convex
+                 Optimization",
+  publisher =    "Society for Artificial Intelligence and Statistics",
+  address =      "San Juan, Puerto Rico",
+  pages =        "433--440",
+  year =         "2007",
+  ISBN =         "0-9727358-2-8",
+}
+
+@InProceedings{Scofield88,
+  author =       "C. L. Scofield",
+  booktitle =    icnn,
+  title =        "Learning Internal Representations in the Coulomb
+                 Energy Network",
+  volume =       "1",
+  publisher =    "IEEE, New York",
+  address =      "San Diego 1988",
+  pages =        "271--276",
+  year =         "1988",
+}
+
+@InProceedings{Scott+al-2003,
+  author =       "Scott S. L. Piao and Paul Rayson and Dawn Archer and
+                 Andrew Wilson and Tony McEnery",
+  booktitle =    "Proceedings of the ACL 2003 workshop on Multiword
+                 expressions",
+  title =        "Extracting multiword expressions with a semantic
+                 tagger",
+  publisher =    "Association for Computational Linguistics",
+  address =      "Morristown, NJ, USA",
+  pages =        "49--56",
+  year =         "2003",
+}
+
+@Book{Scott92,
+  author =       "D. W. Scott",
+  title =        "Multivariate Density Estimation: Theory, Practice, and
+                 Visualization",
+  publisher =    "Wiley",
+  address =      "New York",
+  year =         "1992",
+}
+
+@Article{ScST95,
+  author =       "A. Schaerf and S. Yoav and M. Tennenholtz",
+  title =        "Adaptive load balancing: a study in multi-agent
+                 learning",
+  journal =      "Journal of Artificial Intelligence Research",
+  volume =       "2",
+  pages =        "475--500",
+  year =         "1995",
+}
+
+@Article{Scudder65,
+  author = 	 "{Henry J. Scudder, III}",
+  title = 	 {Probability of Error of Some Adaptive Pattern-Recognition Machines},
+  journal = 	 {IEEE Transactions on Information Theory},
+  year = 	 1965,
+  volume =	 11,
+  pages =	 {363-371}
+}
+
+@TechReport{Seeger-2005,
+  author =       "Matthias Seeger",
+  title =        "Low Rank Updates for the {Cholesky} Decomposition",
+  institution =  "Department of EECS, University of California at
+                 Berkeley",
+  year =         "2005",
+}
+
+@InProceedings{Seeger-Williams-Lawrence-2003,
+  author =       "M. Seeger and C. Williams and N. Lawrence",
+  booktitle =    "Workshop on AI and Statistics",
+  title =        "Fast Forward Selection to Speed Up Sparse {G}aussian
+                 Process Regression",
+  volume =       "9",
+  year =         "2003",
+}
+
+@TechReport{Seeger2001,
+  author =       "M. Seeger",
+  title =        "Learning with labeled and unlabeled data",
+  institution =  "Edinburgh University",
+  year =         "2001",
+}
+
+@InProceedings{seidl91p1,
+  author =       "D. R. Seidl and D. Lorenz",
+  booktitle =    ijcnn,
+  title =        "A structure by which a recurrent neural network can
+                 approximate a nonlinear dynamic system",
+  volume =       "2",
+  pages =        "709--714",
+  month =        jul,
+  year =         "1991",
+}
+
+@TechReport{Sejnowski+Rosenberg86,
+  author =       "T. J. Sejnowski and C. R. Rosenberg",
+  key =          "Sejnowski",
+  title =        "{\em NETtalk: A parallel network that learns to read
+                 aloud}",
+  type =         "Technical Report 86-01",
+  institution =  "Department of Electrical Engineering and Computer
+                 Science, Johns Hopkins University, Baltimore, MD.",
+  year =         "1986",
+}
+
+@Article{Sejnowski86,
+  author =       "T. J. Sejnowski and P. K. Kienker and G. Hinton",
+  title =        "Learning Symmetry Groups with Hidden Units: Beyond the
+                 Perceptron",
+  journal =      physicaD,
+  volume =       "22",
+  pages =        "260--275",
+  year =         "1986",
+}
+
+@Article{Sejnowski87,
+  author =       "T. J. Sejnowski and C. R. Rosenberg",
+  title =        "Parallel Networks that Learn to Pronounce English
+                 Text",
+  journal =      cs,
+  volume =       "1",
+  pages =        "145--168",
+  year =         "1987",
+}
+
+@InProceedings{Seneff84,
+  author =       "S. Seneff",
+  booktitle =    icassp,
+  title =        "Pitch and spectral estimation of speech based on an
+                 auditory synchrony model",
+  pages =        "",
+  year =         "1984",
+}
+
+@TechReport{Seneff85,
+  author =       "S. Seneff",
+  title =        "Pitch and spectral estimation of speech based on an
+                 auditory synchrony model",
+  number =       "RLE Technical report no. 504",
+  institution =  "LRE",
+  address =      "Cambridge, MA: MIT Press",
+  year =         "1985",
+}
+
+@InProceedings{Seneff86,
+  author =       "S. Seneff",
+  booktitle =    icassp,
+  title =        "A computational model for the peripheral auditory
+                 system: application to speech recognition research",
+  pages =        "1983--1986",
+  year =         "1986",
+}
+
+@Article{Seneff88,
+  author =       "S. Seneff",
+  title =        "A joint synchrony/mean-rate model of auditory speech
+                 processing",
+  journal =      "Journal of Phonetics",
+  volume =       "16",
+  pages =        "55--76",
+  year =         "1988",
+}
+
+@Book{Seneta-81,
+  author =       "E. Seneta",
+  title =        "Nonnegative Matrices and {Markov} Chains",
+  publisher =    "Springer",
+  address =      "New York",
+  year =         "1981",
+}
+
+@Article{senseval-2000,
+  author =       "Adam Kilgarrif and Joseph Rosenzweig",
+  title =        "Framework and results for English {SENSEVAL}",
+  journal =      "Computers and the Humanities: special issue on
+                 {SENSEVAL}",
+  volume =       "34",
+  pages =        "15--48",
+  year =         "2000",
+}
+
+@Article{Serbedzija-1996,
+  author =       "Nikola B. {\v{S}}erbed{\v{z}}ija",
+  title =        "Simulating Artificial Neural Networks on Parallel
+                 Architectures",
+  journal =      "Computer",
+  volume =       "29",
+  number =       "3",
+  publisher =    "IEEE Computer Society Press",
+  address =      "Los Alamitos, CA, USA",
+  pages =        "56--63",
+  year =         "1996",
+  ISSN =         "0018-9162",
+  doi =          "http://dx.doi.org/10.1109/2.485893",
+}
+
+@Article{Serre2007,
+  author =       "T. Serre and G. Kreiman and M. Kouh and C. Cadieu and
+                 U. Knoblich and T. Poggio",
+  title =        "A quantitative theory of immediate visual
+                 recognition",
+  journal =      "Progress in Brain Research, Computational
+                 Neuroscience: Theoretical Insights into Brain
+                 Function",
+  volume =       "165",
+  pages =        "33--56",
+  year =         "2007",
+}
+
+@Article{Serre2007-small,
+  author =       "T. Serre and G. Kreiman and M. Kouh and C. Cadieu and
+                 U. Knoblich and T. Poggio",
+  title =        "A quantitative theory of immediate visual
+                 recognition",
+  journal =      "Progress in Brain Res., Comput.
+                 Neurosc.",
+  volume =       "165",
+  pages =        "33--56",
+  year =         "2007",
+}
+
+@article{Serre-Wolf-2007,
+  author = {Thomas Serre and Lior Wolf and Stanley Bileschi and Maximilian Riesenhuber},
+  note = {Member-Poggio, Tomaso},
+  title = {Robust Object Recognition with Cortex-Like Mechanisms},
+  journal = {IEEE Trans. Pattern Anal. Mach. Intell.},
+  volume = {29},
+  number = {3},
+  year = {2007},
+  issn = {0162-8828},
+  pages = {411--426},
+  doi = {http://dx.doi.org/10.1109/TPAMI.2007.56},
+  publisher = {IEEE Computer Society},
+  address = {Washington, DC, USA},
+}
+
+
+@INPROCEEDINGS{SeungS1998,
+    author = {Sebastian H. Seung},
+    title = {Learning continuous attractors in recurrent networks},
+    editor =       NIPS10ed,
+    booktitle =    NIPS10,
+    year = {1998},
+    pages = {654--660},
+    publisher = {MIT Press}
+}
+
+@INPROCEEDINGS{Jain-Seung-08,
+    author = {Viren Jain and Sebastian H. Seung},
+    title = {Natural Image Denoising with Convolutional Networks},
+    editor =       NIPS21ed,
+    booktitle =    NIPS21,
+    year = {2008},
+}
+
+@inproceedings{Sha+Saul-2005,
+    author = {Fei Sha and Lawrence K. Saul},
+    title = {Analysis and extension of spectral methods for nonlinear dimensionality reduction},
+    booktitle = {Proceedings of the 22nd International Conference on Machine Learning},
+    year = {2005},
+    isbn = {1-59593-180-5},
+    pages = {784--791},
+    location = {Bonn, Germany},
+    doi = {http://doi.acm.org/10.1145/1102351.1102450},
+    publisher = {ACM},
+    address = {New York, NY},
+}
+
+@article{Shannon-1949,
+    Author = {C. E. Shannon},
+    Title = {Communication in the presence of noise},
+    Journal = {{Proceedings of the Institute of Radio Engineers}},
+    Volume = {37},
+    number = 1,
+    Pages = {10--21},
+    Year = {1949}
+}
+
+@Article{shapiro00lift,
+  author =       "Gregory Piatetsky-Shapiro and Sam Steingold",
+  title =        "Measuring lift quality in database marketing",
+  journal =      "SIGKDD Explor. Newsl.",
+  volume =       "2",
+  number =       "2",
+  publisher =    "ACM Press",
+  address =      "New York, NY, USA",
+  pages =        "76--80",
+  year =         "2000",
+  ISSN =         "1931-0145",
+}
+
+@InProceedings{shardanand95,
+  author =       "Upendra Shardanand and Pattie Maes",
+  booktitle =    "CHI '95: Proceedings of the SIGCHI conference on Human
+                 factors in computing systems",
+  title =        "{Social information filtering: algorithms for
+                 automating ``word of mouth''}",
+  publisher =    "ACM Press/Addison-Wesley Publishing Co.",
+  pages =        "210--217",
+  year =         "1995",
+  location =     "Denver, Colorado, United States",
+}
+
+@article{Sharma-2000,
+    title = {Induction of Visual Orientation Modules in Auditory Cortex},
+    author = {J. Sharma and A. Angelucci and M. Sur},
+    journal = {Nature},
+    pages = {841--847},
+    volume = {404},
+    year = {2000},
+} 
+
+@Article{Sharpe-64,
+  author =       "W. F. Sharpe",
+  title =        "Capital Asset Prices: {A} Theory of Market Equilibrium
+                 under Conditions of Risk",
+  journal =      "Journal of Finance",
+  volume =       "19",
+  pages =        "425--442",
+  year =         "1964",
+}
+
+@Article{Sharpe-66,
+  author =       "W. F. Sharpe",
+  title =        "Mutual Fund Performance",
+  journal =      "Journal of Business",
+  volume =       "39",
+  number =       "1",
+  pages =        "119--138",
+  year =         "1966",
+}
+
+@InProceedings{Shaw+Jebara-2007,
+  author =       "Blake Shaw and Tony Jebara",
+  booktitle =    aistats07,
+  title =        "Minimum Volume Embedding",
+  publisher =    "Omnipress",
+  date =         "March 21-24, 2007",
+  address =      "San Juan, Porto Rico",
+  year =         "2007",
+}
+
+@InProceedings{Shawe-Taylor+Cristianini+Kandola-2002,
+  author =       "J. Shawe-Taylor and N. Cristianini and J. Kandola",
+  editor =       NIPS14ed,
+  booktitle =    NIPS14,
+  title =        "On the concentration of spectral properties",
+  publisher =    "{MIT} Press",
+  year =         "2002",
+}
+
+@InProceedings{Shawe-Taylor+Williams-2003,
+  author =       "J. Shawe-Taylor and C. K. I. Williams",
+  editor =       NIPS15ed,
+  booktitle =    NIPS15,
+  title =        "The Stability of Kernel Principal Components Analysis
+                 and its Relation to the Process Eigenspectrum",
+  publisher =    "{MIT} Press",
+  year =         "2003",
+}
+
+@Article{Shawe-Taylor98,
+  author =       "John Shawe-Taylor and Peter Bartlett and Robert
+                 Williamson and Martin Anthony",
+  title =        "Structural Risk Minimization over Data-Dependent
+                 Hierarchies",
+  journal =      "IEEE Transactions on Information Theory",
+  volume =       "44",
+  number =       "5",
+  pages =        "1926--1940",
+  year =         "1998",
+}
+
+@Article{Sherrington75,
+  author =       "D. Sherrington and S. Kirkpatrick",
+  title =        "Solvable Model of a Spin Glass",
+  journal =      prl,
+  volume =       "35",
+  pages =        "1792--1796",
+  year =         "1975",
+}
+
+@Article{Shi+Malik-2000,
+  author =       "Jianbo Shi and Jitendra Malik",
+  title =        "Normalized Cuts and Image Segmentation",
+  journal =      "IEEE Transactions on Pattern Analysis and Machine
+                 Intelligence (PAMI)",
+  year =         "2000",
+}
+
+@InProceedings{Shi+Malik-97,
+  author =       "J. Shi and J. Malik",
+  booktitle =    cvpr97,
+  title =        "Normalized cuts and image segmentation",
+  pages =        "731--737",
+  year =         "1997",
+}
+
+@InProceedings{Shimohara88,
+  author =       "K. Shimohara and T. Uchiyama and Y. Tokunaga",
+  booktitle =    icnn,
+  title =        "Back-Propagation Networks for Event-Driven Temporal
+                 Sequence Processing",
+  volume =       "1",
+  publisher =    "IEEE, New York",
+  address =      "San Diego 1988",
+  pages =        "665--672",
+  year =         "1988",
+}
+
+@InProceedings{Shimohata+al-1997,
+  author =       "Sayori Shimohata and Toshiyuki Sugio and Junji
+                 Nagata",
+  booktitle =    "Proceedings of the 35th Conference of the Association
+                 for Computational Linguistics",
+  title =        "Retrieving Collocations by Co-occurrences and Word
+                 Order Constraints",
+  address =      "Madrid",
+  pages =        "476--481",
+  year =         "1997",
+}
+
+@inproceedings{shin:1991,
+    title = {The Pi-Sigma Network: An Efficient Higher-Order Neural Network for
+        Pattern Classification and Function Approximation},
+    author = {Yoan Shin and Joydeep Ghosh},
+    crossref = {IJCNN:1991},
+}
+@proceedings{IJCNN:1991,
+    title = {International Joint Conference on Neural Networks ({IJCNN})},
+    booktitle = ijcnn,
+    year = {1991},
+    address = {Seattle, Washington, USA},
+}
+
+@article{ShmulevichI2002,
+	author = {Ilya Shmulevich and Wei Zhang},
+	journal = {Bioinformatics},
+	number = {4},
+	pages = {555--565},
+	title = {Binary analysis and optimization-based normalization of gene expression data},
+	volume = {18},
+	year = {2002}
+}
+
+@Article{short81optimal,
+  author =       "R. D. Short and K. Fukunaga",
+  title =        "The optimal distance measure for nearest neighbor
+                 classification",
+  journal =      "IEEE Transactions on Information Theory",
+  volume =       "27",
+  pages =        "622--627",
+  year =         "1981",
+}
+
+@InProceedings{ShrikiO2001,
+  author =       "Oren Shriki and Haim Sompolinsky and Daniel D. Lee",
+  editor =       NIPS13ed,
+  booktitle =    NIPS13,
+  title =        "An Information Maximization Approach to Overcomplete
+                 and Recurrent Representations",
+  publisher =    "{MIT} Press",
+  pages =        "933--938",
+  year =         "2001",
+}
+
+@InProceedings{ShrikiO2001-small,
+  author =       "Oren Shriki and Haim Sompolinsky and Daniel D. Lee",
+  booktitle =    "NIPS 13",
+  title =        "An Information Maximization Approach to Overcomplete
+                 and Recurrent Representations",
+  year =         "2001",
+}
+
+@Article{Shumway82,
+  author =       "R. H. Shumway and D. S. Stoffer",
+  title =        "An approach to time series smoothing and forecasting
+                 using the {EM} algorithm",
+  journal =      "Journal of Time Series Analysis",
+  volume =       "3",
+  number =       "4",
+  pages =        "253--264",
+  year =         "1982",
+}
+
+@Article{Shumway91,
+  author =       "R. H. Shumway and D. S. Stoffer",
+  title =        "Dynamic linear models with switching",
+  journal =      "J. Amer. Stat. Assoc.",
+  volume =       "86",
+  pages =        "763--769",
+  year =         "1991",
+}
+
+@Article{Sichel91,
+  author =       "D. E. Sichel",
+  title =        "Business cycle duration dependence: a parametric
+                 approach",
+  journal =      "Review of Economics and Statistics",
+  volume =       "71",
+  pages =        "245--260",
+  year =         "1991",
+}
+
+@TechReport{Siegelmann92,
+  author =       "H. T. Siegelmann and E. D. Sontag",
+  title =        "Neural Networks with Real Weighs: Analog Computational
+                 Complexity",
+  number =       "SYCON-92-05",
+  institution =  "Rutgers Center for System and Control",
+  address =      "New Brunswick, NJ",
+  month =        sep,
+  year =         "1992",
+}
+
+@InProceedings{Sietsma88,
+  author =       "J. Sietsma and R. J. F. Dow",
+  booktitle =    icnn,
+  title =        "Neural Net Pruning---Why and How",
+  volume =       "1",
+  publisher =    "IEEE, New York",
+  address =      "San Diego 1988",
+  pages =        "325--333",
+  year =         "1988",
+}
+
+@InProceedings{silver95,
+  author =       "Daniel L. Silver and Robert E. Mercer",
+  booktitle =    "Proceedings of the INNS World Congress on Neural
+                 Networks",
+  title =        "Toward a Model of Consolidation: The Retention and
+                 Transfer of Neural Net Task Knowledge",
+  volume =       "3",
+  address =      "Washington, DC",
+  pages =        "164--169",
+  month =        jul,
+  year =         "1995",
+}
+
+@Article{silver96,
+  author =       "Daniel L. Silver and Robert E. Mercer",
+  title =        "The Parallel Transfer of Task Knowledge Using Dynamic
+                 Learning Rates Based on a Measure of Relatedness",
+  journal =      "Connection Science, Special issue on Transfer in
+                 Inductive Systems",
+  volume =       "8",
+  number =       "2",
+  pages =        "277--294",
+  year =         "1996",
+}
+
+@TechReport{silver97,
+  author =       "Daniel L. Silver and Robert E. Mercer and Gilbert A.
+                 Hurwitz",
+  title =        "The Functional Transfer of Knowledge for Coronary
+                 Artery Disease Diagnosis",
+  number =       "513",
+  institution =  "Department of Computer Science, University of Western
+                 Ontario",
+  month =        jan,
+  year =         "1997",
+}
+
+@InCollection{Silverman-encyc86,
+  author =       "B. W. Silverman",
+  editor =       "N. L. Johnson and S. Kotz",
+  booktitle =    "Encyclopaedia of Statistical Sciences",
+  title =        "Penalized Likelihood",
+  volume =       "6",
+  publisher =    "Wiley, New York",
+  pages =        "664--667",
+  year =         "1986",
+}
+
+@Book{Silverman86,
+  author =       "Bernard W. Silverman",
+  title =        "Density Estimation for Statistics and Data Analysis",
+  publisher =    "Chapman and Hall",
+  address =      "London",
+  year =         "1986",
+}
+
+@InProceedings{Silverman88,
+  author =       "R. H. Silverman and A. S. Noetzel",
+  editor =      nips87ed,
+  booktitle =    nips87,
+  title =        "Time-Sequential Self-Organization of Hierarchical
+                 Neural Networks",
+  publisher =    "American Institute of Physics, New York",
+  address =      "Denver, CO",
+  pages =        "709--714",
+  year =         "1988",
+}
+
+@InProceedings{simard-03,
+  author =       "D. Simard and P. Y. Steinkraus and J. C. Platt",
+  booktitle =    ICDAR03,
+  title =        "Best Practices for Convolutional Neural Networks",
+  year =         "2003",
+  isbn =         {0-7695-1960-1},
+  pages =        {958},
+  publisher =    {IEEE Computer Society},
+  address =      {Washington, DC, USA},
+  doi =          "http://doi.ieeecomputersociety.org/10.1109/ICDAR.2003.1227801",
+}
+
+@InProceedings{Simard89,
+  author =       "P. Y. Simard and M. B. Ottaway and D. H. Ballard",
+  editor =       "D. Touretzky and G. Hinton and T. Sejnowski",
+  booktitle =    cmss88,
+  title =        "Analysis of Recurrent Backpropagation",
+  publisher =    "Morgan Kaufmann, San Mateo",
+  address =      "Pittsburg 1988",
+  pages =        "103--112",
+  year =         "1989",
+}
+
+@InProceedings{Simard92,
+  author =       "Patrice Simard and Bernard Victorri and Yann LeCun
+                 and John Denker",
+  editor =       NIPS4ed,
+  booktitle =    NIPS4,
+  title =        "Tangent Prop - {A} formalism for specifying selected
+                 invariances in an adaptive network",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo, CA",
+  pages =        "895--903",
+  year =         "1992",
+}
+
+@InProceedings{Simard93,
+  author =       "P. Y. Simard and Y. {LeCun} and J. Denker",
+  editor =       NIPS5ed,
+  booktitle =    NIPS5,
+  title =        "Efficient pattern recognition using a new
+                 transformation distance",
+  publisher =    "Morgan Kaufmann, San Mateo",
+  pages =        "50--58",
+  year =         "1993",
+}
+
+@Article{Simard98,
+  author =       "P. Y. Simard and Y. A. {LeCun} and J. S. Denker and B.
+                 Victorri",
+  title =        "Transformation Invariance in Pattern Recognition ---
+                 Tangent Distance and Tangent Propagation",
+  journal =      "Lecture Notes in Computer Science",
+  volume =       "1524",
+  year =         "1998",
+  CODEN =        "LNCSD9",
+  ISSN =         "0302-9743",
+  bibdate =      "Tue Jan 5 08:21:58 1999",
+  acknowledgement = ack-nhfb,
+  OPTpages =     "239--??",
+}
+
+@InProceedings{Simard-nips92,
+  author =       "P. Simard and Y. {LeCun}",
+  editor =       NIPS4ed,
+  booktitle =    NIPS4,
+  title =        "Reverse {TDNN}: An Architecture for Trajectory
+                 Generation",
+  publisher =    "Morgan Kaufmann, San Mateo",
+  address =      "Denver, CO",
+  pages =        "579--588",
+  year =         "1992",
+}
+
+@PhdThesis{Simard-PhD,
+  author =       "P. Y. Simard",
+  title =        "Learning State Space Dynamics in Recurrent Networks",
+  school =       "University of Rochester",
+  address =      "Rochester, NY",
+  year =         "1991",
+  note =         "Tech. Rep. 383",
+}
+
+@Article{Simic90,
+  author =       "P. D. Simic",
+  title =        "Statistical Mechanics As the Underlying Theory of
+                 ``Elastic'' and ``Neural'' Optimizations",
+  journal =      network,
+  volume =       "1",
+  pages =        "89--103",
+  year =         "1990",
+}
+
+@article{Simoncelli+al-1992,
+    author = "Eero P. Simoncelli and William T. Freeman and Edward H. Adelson and David J. Heeger", 
+    title = "Shiftable Multi-scale Transforms", 
+    journal = "IEEE Transactions on Informations Theory", 
+    volume = "38", 
+    number = "2", 
+    year = "1992", 
+    publisher = "The IEEE Computer Society", 
+}
+
+@InProceedings{Simoncelli97,
+  author =       "E. P. Simoncelli",
+  booktitle =    "Proc. 31st Asilomar Conference on Signals, Systems and
+                 Computers",
+  title =        "Statistical Models for Images: Compression,
+                 Restoration and Synthesis",
+  publisher =    "IEEE",
+  year =         "1997",
+}
+
+@InProceedings{Simoncelli99,
+  author =       "E. P. Simoncelli",
+  booktitle =    "Proc. SPIE,44th annual meeting",
+  title =        "Modeling the Joint Statistics of Images in the Wavelet
+                 Domain",
+  volume =       "3813",
+  publisher =    "SPIE",
+  year =         "1999",
+}
+
+@Article{Sinex+Geisler83,
+  author =       "D. G. Sinex and C. D. Geisler",
+  title =        "Response of auditory nerve fibers to consonant-vowel
+                 syllables",
+  journal =      jasa,
+  volume =       "73",
+  number =       "2",
+  pages =        "602--615",
+  year =         "1983",
+}
+
+@Article{Singer,
+  author =       "A. Singer",
+  title =        "Implementations of Artificial Neural Networks on the
+                 Connection Machine",
+  journal =      "Parallel Computing",
+  volume =       "14",
+  pages =        "305--315",
+  year =         "1990",
+  OPTnote =      "",
+}
+
+@InProceedings{Singer-1990,
+  author =       "Alexander Singer",
+  booktitle =    "Proceedings of the International Neural Networks
+                 Conference",
+  title =        "Exploiting the Inherent Parallelism of Artificial
+                 Neural Networks to Achieve 1300 Million Interconnects
+                 per Second",
+  pages =        "656--660",
+  year =         "1990",
+}
+
+@InProceedings{singer00leveraged,
+  author =       "Y. Singer",
+  editor =       NIPS12ed,
+  booktitle =    NIPS12,
+  title =        "Leveraged vector machines",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  pages =        "610--616",
+  year =         "2000",
+}
+
+@InProceedings{Singer96,
+  author =       "Y. Singer",
+  editor =       NIPS8ed,
+  booktitle =    NIPS8,
+  title =        "Adaptive Mixtures of Probabilistic Transducers",
+  publisher =    "MIT Press, Cambridge, MA",
+  year =         "1996",
+}
+
+@Article{Singer97,
+  author =       "Y. Singer",
+  title =        "Adaptive Mixtures of Probabilistic Transducers",
+  journal =      "Neural Computation",
+  volume =       "9",
+  number =       "8",
+  year =         "1997",
+}
+
+@InProceedings{singer:1996:nips,
+  author =       "Y. Singer",
+  editor =       NIPS8ed,
+  booktitle =    NIPS8,
+  title =        "Adaptive Mixtures of Probabilistic Transducers",
+  publisher =    "MIT Press, Cambridge, MA",
+  year =         "1996",
+}
+
+@InProceedings{Singh92,
+  author =       "S. P. Singh",
+  booktitle =    "Proceedings of the 10th National Conference on
+                 Artificial Intelligence",
+  title =        "Reinforcement learning with a hierarchy of abstract
+                 models",
+  publisher =    "MIT/AAAI Press",
+  pages =        "202--207",
+  year =         "1992",
+}
+
+@InProceedings{SinkkonenJ2002,
+  author =       "Janne Sinkkonen and Samuel Kaski and Janne
+                 Nikkil{\"{a}}",
+  booktitle =    ECML02,
+  title =        "Discriminative Clustering: Optimal Contingency Tables
+                 by Learning Metrics",
+  publisher =    "Springer-Verlag",
+  address =      "London, UK",
+  pages =        "418--430",
+  year =         "2002",
+  ISBN =         "3-540-44036-4",
+}
+
+@TechReport{Sirat90,
+  author =       "J.-A. Sirat and J.-P. Nadal",
+  title =        "Neural Trees: {A} New Tool for Classification",
+  type =         "Preprint",
+  institution =  "Laboratoires d'Electronique Philips",
+  address =      "Limeil-Bre\'vannes, France",
+  year =         "1990",
+}
+
+@InProceedings{SiroshJ1994,
+  author =       "Joseph Sirosh and Risto Miikkulainen",
+  editor =       NIPS6ed,
+  booktitle =    NIPS6,
+  title =        "Ocular Dominance and Patterned Lateral Connections in
+                 a Self-Organizing Model of the Primary Visual Cortex",
+  publisher =    "Morgan Kaufmann",
+  pages =        "109--116",
+  year =         "1994",
+}
+
+@InProceedings{SiroshJ1994-small,
+  author =       "J. Sirosh and R. Miikkulainen",
+  booktitle =    "NIPS 6",
+  title =        "Ocular Dominance and Patterned Lateral Connections in
+                 a Self-Organizing Model of the Primary Visual Cortex",
+  year =         "1994",
+}
+
+@InProceedings{Sivilotti87,
+  author =       "M. A. Sivilotti and M. A. Mahowald and C. A. Mead",
+  editor =       "P. Losleben",
+  booktitle =    "Advanced Research in VLSI: Proceedings of the 1987
+                 Stanford Conference",
+  title =        "Real-Time Visual Computations Using Analog {CMOS}
+                 Processing Arrays",
+  publisher =    "MIT Press, Cambridge",
+  pages =        "295--312",
+  year =         "1987",
+}
+
+@TechReport{Sjoberg92,
+  author =       "Jonas Si{\"o}berg and Lennart Ljung",
+  title =        "Overtraining, Regularization, and Searching for
+                 Minimum in Neural Networks",
+  institution =  "Link{\"o}ping University",
+  address =      "S-581 83 Link{\"o}ping, Sweden",
+  year =         "1992",
+}
+
+@article{Sjoberg95,
+  title={{Overtraining, regularization and searching for a minimum, with application to neural networks}},
+  author={Sj{\"o}berg, J. and Ljung, L.},
+  journal={International Journal of Control},
+  volume={62},
+  number={6},
+  pages={1391--1407},
+  year={1995},
+  publisher={Taylor \& Francis}
+}
+
+@Article{Skinner1958,
+  author =       "Burrhus F. Skinner",
+  title =        "Reinforcement Today",
+  journal =      "American Psychologist",
+  volume =       "13",
+  pages =        "94--99",
+  year =         "1958",
+}
+
+@PhdThesis{Small1980,
+  author =       "Steven L. Small",
+  title =        "Word Expert Parsing: {A} Theory of Distributed
+                 Word-Based Natural Language Understanding",
+  school =       "University of Maryland",
+  year =         "1980",
+}
+
+@Article{smilde97,
+  author =       "A. K. Smilde",
+  title =        "Comments on multilinear {PLS}",
+  journal =      "Journal of Chemometrics",
+  volume =       "11",
+  pages =        "367--377",
+  year =         "1997",
+}
+
+@Article{Smith+Waterman81,
+  author =       "T. F. Smith and W. S. Waterman",
+  title =        "Identification of common molecular subsequences",
+  journal =      "Journal of Molecular Biology",
+  volume =       "147",
+  pages =        "195--197",
+  year =         "1981",
+}
+
+@Article{Smith95,
+  author =       "S. P. Smith",
+  title =        "Differentiation of the Cholesky algorithm",
+  journal =      "Journal of Computational and Graphical Statistics",
+  volume =       "4",
+  pages =        "134--147",
+  year =         "1995",
+}
+
+@InProceedings{smola00sparsegreedy,
+  author =       "A. J. Smola and B. Sch{\"o}lkopf",
+  editor =       "P. Langley",
+  booktitle =    "International Conference on Machine Learning",
+  title =        "Sparse greedy matrix approximation for machine
+                 learning",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Francisco",
+  pages =        "911--918",
+  year =         "2000",
+}
+
+@InProceedings{Smola2000sparsegreedy,
+  author =       "A. J. Smola and P. Bartlett",
+  editor =       NIPS13ed,
+  booktitle =    NIPS13,
+  title =        "Sparse Greedy {G}aussian Process Regression",
+  year =         "2001",
+}
+
+@InProceedings{Smola99semiparametricSVM,
+  author =       "A. J. Smola and T. Friess and B. {Sch\"olkopf}",
+  editor =       NIPS11ed,
+  booktitle =    NIPS11,
+  title =        "Semiparametric Support Vector and Linear Programming
+                 Machines",
+  publisher =    "MIT Press",
+  pages =        "585--591",
+  year =         "1999",
+  OPTaddress =   "Cambridge, MA",
+  OPTannote =    "",
+  OPTcrossref =  "",
+  OPTkey =       "",
+  OPTmonth =     "",
+  OPTnote =      "",
+  OPTnumber =    "",
+  OPTorganization = "",
+  OPTseries =    "",
+}
+
+@InCollection{Smolensky86,
+  author =       "Paul Smolensky",
+  editor =       "D. E. Rumelhart and J. L. McClelland",
+  booktitle =    pdp,
+  title =        "Information Processing in Dynamical Systems:
+                 Foundations of Harmony Theory",
+  chapter =      "6",
+  volume =       "1",
+  publisher =    "MIT Press",
+  address =      "Cambridge",
+  pages =        "194--281",
+  year =         "1986",
+}
+
+@Article{Smyth94,
+  author =       "P. Smyth",
+  title =        {Hidden Markov models for fault detection in dynamic
+                 systems},
+  journal =      "Pattern Recognition",
+  volume =       "27",
+  number =       "1",
+  pages =        "149--164",
+  year =         "1994",
+}
+
+@Article{Smyth97,
+  author =       "P. Smyth and D. Heckerman and M. I. Jordan",
+  title =        {Probabilistic independence networks for hidden Markov
+                 probability models},
+  journal =      "Neural Computation",
+  volume =       "9",
+  number =       "2",
+  pages =        "227--269",
+  year =         "1997",
+}
+
+@InProceedings{Smyth97-nips,
+  author =       "P. Smyth",
+  editor =       NIPS9ed,
+  booktitle =    NIPS9,
+  title =        {Clustering sequences with hidden Markov models},
+  publisher =    "MIT Press",
+  year =         "1997",
+}
+
+@Article{Smyth98,
+  author =       "P. Smyth",
+  title =        {Belief Networks, Hidden Markov Models, and Markov
+                 Random Fields: a Unifying View},
+  journal =      "Pattern Recognition Letters",
+  year =         "1998",
+}
+
+@TechReport{Snapp+Venkatesh-1998,
+  author =       "Robert R. Snapp and Santosh S. Venkatesh",
+  title =        "Asymptotic derivation of the finite-sample risk of the
+                 k nearest neighbor classifier",
+  number =       "UVM-CS-1998-0101",
+  institution =  "Department of Computer Science, University of
+                 Vermont",
+  year =         "1998",
+}
+
+@InCollection{SNE-nips15,
+  author =       "G. E. Hinton and S. Roweis",
+  editor =       NIPS15ed,
+  booktitle =    NIPS15,
+  title =        "Stochastic Neighbor Embedding",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  year =         "2003",
+}
+
+@InProceedings{Snow+al-2006,
+  author =       "Rion Snow and Daniel Jurafsky and Andrew Y. Ng",
+  booktitle =    "Proceedings of COLING/ACL 2006",
+  title =        "Semantic taxonomy induction from heterogenous
+                 evidence",
+  year =         "2006",
+}
+
+@book{SocietyNeuro-2006,
+    author = "{Society for Neuroscience}",
+    title = "Brain Facts: A Primer on the Brain and Nervous System",
+    year = 2006,
+    edition = "Fifth",
+    note = "{http://sfn.org}",
+}
+
+@Article{Soffer86,
+  author =       "B. H. Soffer and G. J. Dunning and Y. Owechko and E.
+                 Marom",
+  title =        "Associative Holographic Memory with Feedback Using
+                 Phase-Conjugate Mirrors",
+  journal =      optlett,
+  volume =       "11",
+  pages =        "118--120",
+  year =         "1986",
+}
+
+@Article{Sola94,
+  author =       "M. Sola and J. Driffill",
+  title =        "Testing the term structure of interest rates using a
+                 stationary vector autoregression with regime
+                 switching",
+  journal =      "Journal of Economic Dynamics and Control",
+  volume =       "18",
+  pages =        "601--628",
+  year =         "1994",
+}
+
+@Article{Solla88,
+  author =       "S. A. Solla and E. Levin and M. Fleisher",
+  title =        "Accelerated Learning in Layered Neural Networks",
+  journal =      cs,
+  volume =       "2",
+  pages =        "625--639",
+  year =         "1988",
+}
+
+@InProceedings{Solla89,
+  author =       "S. A. Solla",
+  editor =       "L. Personnaz and G. Dreyfus",
+  booktitle =    "Neural Networks from Models to Applications",
+  title =        "Learning and Generalization in Layered Neural
+                 Networks: The Contiguity Problem",
+  publisher =    "I.D.S.E.T., Paris",
+  address =      "Paris 1988",
+  pages =        "168--177",
+  year =         "1989",
+}
+
+@Article{Solomonoff64,
+  author =       "Ray J. Solomonoff",
+  title =        "A formal theory of inductive inference",
+  journal =      "Information and Control",
+  volume =       "7",
+  pages =        "1--22, 224--254",
+  year =         "1964",
+}
+
+@Article{Sompolinsky86,
+  author =       "H. Sompolinsky and I. Kanter",
+  title =        "Temporal Association in Asymmetric Neural Networks",
+  journal =      prl,
+  volume =       "57",
+  pages =        "2861--2864",
+  year =         "1986",
+}
+
+@InProceedings{Sompolinsky87,
+  author =       "H. Sompolinsky",
+  editor =       "J. L. van Hemmen and I. Morgenstern",
+  booktitle =    "Heidelberg Colloquium on Glassy Dynamics",
+  title =        "The Theory of Neural Networks: The Hebb Rules and
+                 Beyond",
+  publisher =    "Springer-Verlag, Berlin",
+  address =      "Heidelberg 1986",
+  pages =        "485--527",
+  year =         "1987",
+}
+
+@Article{Sompolinsky88,
+  author =       "H. Sompolinsky and A. Crisanti and H. J. Sommers",
+  title =        "Chaos in Random Neural Networks",
+  journal =      prl,
+  volume =       "61",
+  pages =        "259--262",
+  year =         "1988",
+}
+
+@Article{Sondik73,
+  author =       "E. J. Sondik",
+  title =        "The optimal control of partially observable Markov
+                 processes over the finite horizon",
+  journal =      "Operations Research",
+  volume =       "11",
+  pages =        "1071--1088",
+  year =         "1973",
+}
+
+@Article{Sondik78,
+  author =       "E. J. Sondik",
+  title =        "The optimal control of partially observable Markov
+                 processes over the infinite horizon: discounted case",
+  journal =      "Operations Research",
+  volume =       "26",
+  pages =        "282--304",
+  year =         "1978",
+}
+
+@misc{Song+al-2008a,
+    author = {Yangqiu Song and Feiping Nie and Changshui Zhang},
+    title = {Semi-Supervised Sub-Manifold Discriminant Analysis},
+    note = {Pattern Recognition Letter},
+    year = 2008,
+}
+
+@article{Song+al-2008b,
+    author = {Yangqiu Song and Feiping Nie and Changshui Zhang and Shiming Xiang},
+    title = {A Unified Framework for Semi-Supervised Dimensionality Reduction},
+    journal = {Pattern Recognition},
+    volume = 41,
+    number = 9,
+    pages = {2789--2799},
+    year = 2008,
+}
+
+@incollection{Song+al-2008c,
+    title = {Colored Maximum Variance Unfolding},
+    author = {Le Song and Alex Smola and Karsten Borgwardt and Arthur Gretton},
+    editor =       NIPS20ed,
+    booktitle =    NIPS20,
+    publisher = {MIT Press},
+    address = {Cambridge, MA},
+    pages = {1385--1392},
+    year = {2008}
+}
+
+@Article{Sontag-cs89,
+  author =       "E. D. Sontag and H. J. Sussman",
+  title =        "Backpropagation Can Give Rise to Spurious Local Minima
+                 Even for Networks without Hidden Layers",
+  journal =      "Complex Systems",
+  volume =       "3",
+  pages =        "91--106",
+  year =         "1989",
+}
+
+@InProceedings{Sontag-ijcnn89,
+  author =       "E. D. Sontag and H. J. Sussman",
+  booktitle =    ijcnn,
+  title =        "Backpropagation Separates when Perceptrons Do",
+  publisher =    "IEEE Press",
+  address =      "Washington DC",
+  year =         "1989",
+  OPTpages =     "639--642",
+}
+
+@TechReport{sontag92t1,
+  author =       "E. D. Sontag",
+  title =        "Systems Combining Linearity and Saturations and
+                 Relations to Neural Networks",
+  number =       "SYCON--92--01",
+  institution =  "Rutgers Center for Systems and Control",
+  year =         "1992",
+}
+
+@Article{Soukoulis83,
+  author =       "C. M. Soukoulis and K. Levin and G. S. Grest",
+  title =        "Irreversibility and Metastability in Spin-Glasses.
+                 {I}. Ising Model",
+  journal =      prB,
+  volume =       "28",
+  pages =        "1495--1509",
+  year =         "1983",
+}
+
+@Article{Specht90,
+  author =       "D. F. Specht",
+  title =        "Probabilistic Neural Networks",
+  journal =      nn,
+  volume =       "3",
+  pages =        "109--118",
+  year =         "1990",
+}
+
+@Article{Specht91,
+  author =       "D. F. Specht",
+  title =        "A General Regression Neural Network",
+  journal =      "IEEE Trans. Neural Networks",
+  volume =       "2",
+  number =       "6",
+  pages =        "568--576",
+  month =        nov,
+  year =         "1991",
+}
+
+@Article{Spiegelhalter93,
+  author =       "D. J. Spiegelhalter and A. P. Dawid and S. L.
+                 Lauritzen and R. G. Cowell",
+  title =        "Bayesian Analysis in Expert Systems",
+  journal =      "Statistical Science",
+  volume =       "8",
+  pages =        "219--283",
+  year =         "1993",
+}
+
+@InProceedings{Spielman-96,
+  author =       "D. Spielman and S. Teng",
+  booktitle =    "Proceedings of the 37th Annual Symposium on
+                 Foundations of Computer Science",
+  title =        "Spectral partitioning works: planar graphs and finite
+                 element meshes",
+  year =         "1996",
+}
+
+@TechReport{Spielman-96b,
+  author =       "Daniel A. Spielman and Shang-Hua Teng",
+  title =        "Spectral Partitioning Works: Planar Graphs and Finite
+                 Element Meshes",
+  number =       "UCB CSD-96-898",
+  institution =  "U.C. Berkeley",
+  year =         "1996",
+}
+
+@ARTICLE{spirkovska:1990,
+    author={Spirkovska, L. and Reid, M. B.},
+    title={Connectivity Strategies for Higher-Order Neural Networks Applied to
+        Pattern Recognition},
+    journal=ijcnn,
+    year={1990},
+    month={June},
+    volume={1},
+    number={},
+    pages={21--26},
+    keywords={computerised pattern recognition, neural netsconnection
+        strategies, higher-order neural networks, interconnections, pattern
+            recognition, pattern-recognition, regional connectivity},
+    doi={10.1109/IJCNN.1990.137538},
+    ISSN={}, 
+}
+
+
+@Book{Spirtes-book93,
+  author =       "P. Spirtes and C. Glymour and R. Scheines",
+  title =        "Causation, Prediction, and Search",
+  publisher =    "Springer-Verlag, New York",
+  year =         "1993",
+}
+
+@Article{Spirtes-Glymour91,
+  author =       "P. Spirtes and C. Glymour",
+  title =        "An algorithm for fast recovery of sparse causal
+                 graphs",
+  journal =      "Social Science Computing Reviews",
+  volume =       "9",
+  number =       "1",
+  pages =        "62--72",
+  year =         "1991",
+}
+
+@InProceedings{Srebro-Jaakkola,
+  author =       "N. Srebro and T. Jaakkola",
+  booktitle =    ICML03,
+  editor =       ICML03ed,
+  publisher =    ICML03publ,
+  title =        "Weighted Low-Rank Approximations",
+  address =      "Washington, D.C.",
+  pages =        "720--727",
+  year =         "2003",
+}
+
+@Book{SSL-Book-2006,
+  author =       "Olivier Chapelle and Bernhard. Sch{\"{o}}lkopf and Alexander Zien",
+  title =        "Semi-Supervised Learning",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  year =         "2006",
+}
+
+@Article{Steels2003,
+  author =       "L. Steels",
+  title =        "Evolving grounded communication for robots",
+  journal =      "Trends in Cognitive Science",
+  volume =       "7",
+  number =       "7",
+  pages =        "308--312",
+  month =        jul,
+  year =         "2003",
+  URL =          "http://www.csl.sony.fr/downloads/papers/2003/steels-03c.pdf",
+}
+
+@Article{Steinbuch61,
+  author =       "K. Steinbuch",
+  title =        "Die Lernmatrix",
+  journal =      kyb,
+  volume =       "1",
+  pages =        "36--45",
+  year =         "1961",
+}
+
+@Article{SteinhausH1956,
+  author = 	 {Hugo Steinhaus},
+  title = 	 {Sur la division des corps mat\'eriels en parties},
+  journal = 	 {Bulletin L'Acad\'emie Polonaise des Sciences},
+  year = 	 {1956},
+  volume = 	 {4},
+  pages = 	 {801-804},
+}
+
+@InCollection{Stevens+Blumstein81,
+  author =       "K. N. Stevens and S. E. Blumstein",
+  editor =       "P. D. Eimas and J. L. Miller",
+  booktitle =    "Perspectives on the study of speech",
+  title =        "The search for invariant acoustic correlates of
+                 phonetic features",
+  publisher =    "Lawrence Erlbaum ass.",
+  pages =        "1--38",
+  year =         "1981",
+}
+
+@InCollection{Stevens75,
+  author =       "K. N. Stevens",
+  editor =       "G. Fant and M. A. Tatham",
+  booktitle =    "Auditory analysis and perception of speech",
+  title =        "The potential role of properties detectors in the
+                 perception of consonants",
+  publisher =    "Academic Press, London",
+  pages =        "303--330",
+  year =         "1975",
+}
+
+@Article{Stevenson90,
+  author =       "M. Stevenson and R. Winter and B. Widrow",
+  title =        "Sensitivity of Feedforward Neural Networks to Weight
+                 Errors",
+  journal =      "IEEE. Trans. on Neural Networks",
+  volume =       "1",
+  number =       "1",
+  pages =        "71--80",
+  month =        mar,
+  year =         "1990",
+  keywords =     "neural network fault tolerance robustness reliability
+                 adaline weight errors",
+}
+
+@Book{Stewart-1998,
+  author =       "G. W. Stewart",
+  title =        "Matrix Algorithms, Volume {I}: Basic Decompositions",
+  publisher =    "SIAM",
+  address =      "Philadelphia",
+  year =         "1998",
+}
+
+@Book{Stewart73,
+  author =       "G. W. Stewart",
+  title =        "Introduction to matrix computations",
+  publisher =    "Academic Press",
+  year =         "1973",
+}
+
+@InProceedings{Stinchcombe+White89,
+  author =       "M. Stinchcombe and H. White",
+  booktitle =    ijcnn,
+  title =        "Universal approximation using feedforward networks
+                 with non-sigmoid hidden layer activation function",
+  publisher =    "IEEE",
+  address =      "Washington DC",
+  pages =        "613--617",
+  year =         "1989",
+}
+
+@TechReport{Stokbro90,
+  author =       "K. Stokbro and D. K. Umberger and J. A. Hertz",
+  title =        "Exploiting Neurons with Localized Receptive Fields to
+                 Learn Chaos",
+  type =         "Preprint",
+  number =       "90/28 S",
+  institution =  "Nordita",
+  address =      "Copenhagen, Denmark",
+  year =         "1990",
+}
+
+@InProceedings{Stolcke-ICSLP02,
+  author =       "A. Stolcke",
+  booktitle =    "Proceedings of the International Conference on
+                 Statistical Language Processing",
+  title =        "{SRILM} - An extensible language modeling toolkit",
+  address =      "Denver, Colorado",
+  year =         "2002",
+}
+
+@InProceedings{Stolcke93,
+  author =       "A. Stolcke and S. Omohundro",
+  editor =       NIPS5ed,
+  booktitle =    NIPS5,
+  title =        "Hidden {Markov} model induction by {Bayesian} model
+                 merging",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo, CA",
+  pages =        "11--18",
+  year =         "1993",
+}
+
+@TechReport{Stolcke94a,
+  author =       "A. Stolcke and S. M. Omohundro",
+  title =        "Best-first Model Merging for Hidden {Markov} Model
+                 Induction",
+  number =       "TR-94-003",
+  institution =  "International Computer Science Institute",
+  address =      "Berkeley, CA",
+  month =        jan,
+  year =         "1994",
+}
+
+@TechReport{Stolcke94b,
+  author =       "A. Stolcke and J. Segal",
+  title =        "Precise n-gram Probabilities from Stochastic
+                 Context-free Grammars",
+  number =       "TR-94-007",
+  institution =  "International Computer Science Institute",
+  address =      "Berkeley, CA",
+  month =        jan,
+  year =         "1994",
+}
+
+@Article{Stone-80,
+  author =       "C. J. Stone",
+  title =        "Optimal rates of convergence for nonparametric
+                 estimators",
+  journal =      "Annals of Statistics",
+  volume =       "8",
+  number =       "6",
+  pages =        "1348--1360",
+  year =         "1980",
+}
+
+@Article{Stormo82,
+  author =       "G. D. Stormo and T. D. Schneider and L. Gold and A.
+                 Ehrenfeucht",
+  title =        "Use of the perceptron algorithm to distinguish
+                 translational initiation sites in {\it {E}. {Coli}}",
+  journal =      "Nucleic Acid Research",
+  volume =       "10",
+  pages =        "2997--3010",
+  year =         "1982",
+}
+
+@InProceedings{Stornetta88,
+  author =       "W. S. Stornetta and T. Hogg and B. A. Huberman",
+  editor =       nips87ed,
+  booktitle =    nips87,
+  title =        "A Dynamical Approach to Temporal Pattern Processing",
+  publisher =    "American Institute of Physics, New York",
+  address =      "Denver, CO",
+  pages =        "750--759",
+  year =         "1988",
+}
+
+@Book{Strang80,
+  author =       "G. Strang",
+  title =        "Linear Algebra and Its Applications",
+  publisher =    "Academic Press",
+  address =      "New York",
+  year =         "1980",
+}
+
+@PhdThesis{Suaudeau94,
+  author =       "N. Suaudeau",
+  title =        "Un mod\`ele probabiliste pour int\'egrer la dimension
+                 temporelle dans un syst\`eme de reconnaissance
+                 automatique de la parole",
+  school =       "Universit\'e de Rennes I",
+  address =      "France",
+  year =         "1994",
+}
+
+@Article{suddarth91,
+  author =       "Steven C. Suddarth and Alistair D. C. Holden",
+  title =        "Symbolic-neural systems and the use of hints for
+                 developing complex systems",
+  journal =      "Int. J. Man-Mach. Stud.",
+  volume =       "35",
+  number =       "3",
+  publisher =    "Academic Press Ltd.",
+  address =      "London, UK",
+  pages =        "291--311",
+  year =         "1991",
+}
+
+@article{Sudderth-2007,
+ author = {Erik B. Sudderth and Antonio Torralba and William T. Freeman and Alan S. Willsky},
+ title = {Describing visual scenes using transformed objects and parts},
+ journal = {Int. Journal of Computer Vision},
+ volume = 77,
+ publisher = {Springer},
+ pages = "291--330",
+ year = "2007",
+}
+
+@article{Sugiyama-2007,
+    author = {Masashi Sugiyama},
+    title = {Dimensionality reduction of multimodal labeled data by local {F}isher discriminant analysis},
+    journal = jmlr,
+    year = {2007},
+    volume = {8},
+    pages = {1027--1061}
+}
+
+@InProceedings{Sun-ijcnn90,
+  author =       "G. Z. Sun and H. H. Chen and Y. C. Lee and C. L
+                 Giles",
+  booktitle =    ijcnn,
+  title =        "Recurrent Neural Networks, Hidden {Markov} Models and
+                 Stochastic Grammars",
+  volume =       "I",
+  address =      "San Diego CA",
+  pages =        "729--734",
+  year =         "1990",
+}
+
+@Book{Sundararajan+Saratchandran-1998,
+  author =       "N. Sundararajan and P. Saratchandran",
+  title =        "Parallel Architectures for Artificial Neural Networks:
+                 Paradigms and Implementations",
+  publisher =    "IEEE Computer Society Press",
+  address =      "Los Alamitos, CA",
+  year =         "1998",
+  ISBN =         "0-8186-8399-6",
+}
+
+@InProceedings{Sutskever+Hinton-2007,
+  author =       "Ilya Sutskever and Geoffrey E. Hinton",
+  booktitle =    aistats07,
+  title =        "Learning Multilevel Distributed Representations for
+                 High-Dimensional Sequences",
+  publisher =    "Omnipress",
+  date =         "March 21-24, 2007",
+  address =      "San Juan, Porto Rico",
+  year =         "2007",
+}
+
+@Article{Sutskever+Hinton-2008,
+  author =       "Ilya Sutskever and Geoffrey E. Hinton",
+  title =        "Deep Narrow Sigmoid Belief Networks are Universal
+                 Approximators",
+  journal =      "Neural Computation",
+  volume =       "to appear",
+  year =         "2008",
+}
+
+@Book{Sutton+Barto-98,
+  author =       "Richard Sutton and Andrew Barto",
+  title =        "Reinforcement Learning: An Introduction",
+  publisher =    "MIT Press",
+  year =         "1998",
+}
+
+@InCollection{sutton06introduction,
+  author =       "Charles Sutton and Andrew McCallum",
+  editor =       "Lise Getoor and Ben Taskar",
+  booktitle =    "Introduction to Statistical Relational Learning",
+  title =        "An Introduction to Conditional Random Fields for
+                 Relational Learning",
+  publisher =    "MIT Press",
+  year =         "2006",
+  note =         "",
+  URL =          "publications/crf-tutorial.pdf",
+  tags =         "recent",
+}
+
+@PhdThesis{Sutton84,
+  author =       "R. S. Sutton",
+  title =        "Temporal Credit Assignment in Reinforcement Learning",
+  school =       "University of Massachusetts",
+  address =      "Amherst",
+  year =         "1984",
+}
+
+@Article{Sutton88,
+  author =       "R. S. Sutton",
+  title =        "Learning to Predict by the Methods of Temporal
+                 Differences",
+  journal =      mlearn,
+  volume =       "3",
+  pages =        "9--44",
+  year =         "1988",
+}
+
+@InCollection{Sutton91,
+  author =       "R. S. Sutton and A. G. Barto",
+  editor =       "M. Gabriel and J. W. Moore",
+  booktitle =    "Learning and Computational Neuroscience",
+  title =        "Time Derivative Models of Pavlovian Reinforcement",
+  publisher =    "MIT Press",
+  address =      "Cambridge",
+  year =         "1991",
+}
+
+@InProceedings{Sutton95,
+  author =       "R. S. Sutton",
+  booktitle =    "Proceedings of the 12th International Conference on
+                 Machine Learning",
+  title =        "{TD} models: modeling the world at a mixture of time
+                 scales",
+  publisher =    "Morgan Kaufmann",
+  year =         "1995",
+}
+
+@InProceedings{Szu86,
+  author =       "H. Szu",
+  editor =       "J. S. Denker",
+  booktitle =    snowbird,
+  title =        "Fast Simulated Annealing",
+  publisher =    "American Institute of Physics, New York",
+  address =      "Snowbird 1986",
+  pages =        "420--425",
+  year =         "1986",
+}
+
+@InProceedings{Szummer+Jaakkola-2002,
+  author =       "M. Szummer and T. Jaakkola",
+  editor =       NIPS14ed,
+  booktitle =    NIPS14,
+  title =        "Partially labeled classification with Markov random
+                 walks",
+  publisher =    "{MIT} Press",
+  address =      "Cambridge, MA",
+  year =         "2002",
+}
+
+
+@article{Takabatake+al-2007,
+    author = {Hiroki Takabatake and Manabu Kotani and Seiichi Ozawa},
+    title = {Feature extraction by supervised independent component analysis based on category information},
+    journal = {Electrical Engineering in Japan},
+    volume = 161,
+    number = 2,
+    pages = {25--32},
+    year = 2007,
+}
+
+@InProceedings{TakahashiN2001,
+  author =       "Naoto Takahashi and Minoru Motoki and Yoshio Shimazu
+                 and Yoichi Tomiura and Tory Hitaka",
+  booktitle =    "Proceedings of the Second Workshop on Natural Language
+                 Processing and Neural Networks",
+  title =        "{PP}-attachment Ambiguity Resolution Using a Neural
+                 Network with Modified {FGREP} Method",
+  address =      "Tokyo",
+  year =         "2001",
+}
+
+@InProceedings{Takens81,
+  author =       "F. Takens",
+  editor =       "D. A. Rand and L.-S. Young",
+  booktitle =    "Dynamical Systems and Turbulenc",
+  title =        "Detecting Strange Attractors In Turbulence",
+  volume =       "898",
+  publisher =    "Springer-Verlag, Berlin",
+  address =      "Warwick 1980",
+  pages =        "366--381",
+  year =         "1981",
+  series =       "Lecture Notes in Mathematics",
+}
+
+@Article{Takeuchi79,
+  author =       "A. Takeuchi and S. Amari",
+  title =        "Formation of Topographic Maps and Columnar
+                 Microstructures in Nerve Fields",
+  journal =      biocyb,
+  volume =       "35",
+  pages =        "63--72",
+  year =         "1979",
+}
+
+@InCollection{Tam+Perkel89,
+  author =       "Tam D. C. and Perkel D. H.",
+  editor =       "Hawkins R. D. and Bower G. H.",
+  booktitle =    "Computational Models of Learning in Simple Neural
+                 Systems",
+  title =        "Quantitative modeling of synaptic plasticity",
+  publisher =    "Academic Press",
+  pages =        "1--30",
+  year =         "1989",
+}
+
+@Article{Tank86,
+  author =       "D. W. Tank and J. J. Hopfield",
+  title =        "Simple ``Neural'' Optimization Networks: An {A}/{D}
+                 Converter, Signal Decision Circuit, and a Linear
+                 Programming Circuit",
+  journal =      ieeetcas,
+  volume =       "33",
+  pages =        "533--541",
+  year =         "1986",
+}
+
+@Article{Tank87a,
+  author =       "D. W. Tank and J. J. Hopfield",
+  title =        "Neural Computation by Time Compression",
+  journal =      PNAS,
+  volume =       "84",
+  pages =        "1896--1900",
+  year =         "1987",
+}
+
+@InProceedings{Tank87b,
+  author =       "D. W. Tank and J. J. Hopfield",
+  editor =       "M. Caudill and C. Butler",
+  booktitle =    icnn,
+  title =        "Concentrating Information in Time: Analog Neural
+                 Networks with Applications to Speech Recognition
+                 Problems",
+  volume =       "2",
+  publisher =    "IEEE, New York",
+  address =      "San Diego 1987",
+  pages =        "455--468",
+  year =         "1987",
+}
+
+@Book{Tanner1993,
+  author =       "M. Tanner",
+  title =        "Tools for statistical inference: Methods for
+                 exploration of posterior distributions and likelihood
+                 functions",
+  publisher =    "Springer",
+  address =      "New York",
+  year =         "1993",
+}
+
+@Article{Tappert90,
+  author =       "C. Tappert and C. Suen and T. Wakahara",
+  title =        "The state of the art in on-line handwriting
+                 recognition",
+  journal =      ieeetpami,
+  volume =       "8",
+  number =       "12",
+  pages =        "787--808",
+  year =         "1990",
+}
+
+@InCollection{Taylor+2007,
+  author =       "Graham Taylor and Geoffrey E. Hinton and Sam Roweis",
+  editor =       NIPS19ed,
+  booktitle =    NIPS19,
+  title =        "Modeling Human Motion Using Binary Latent Variables",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  pages =        "1345--1352",
+  year =         "2007",
+}
+
+%%FRED: I deprecate this one as the years in the tag is not the one for the publication but the conference!
+@InProceedings{Taylor2006,
+  author =       "Graham Taylor and Geoffrey E. Hinton and Sam Roweis",
+  editor =       NIPS19ed,
+  booktitle =    NIPS19,
+  title =        "Modeling Human Motion Using Binary Latent Variables",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  pages =        "1345--1352",
+  year =         "2007",
+}
+
+@InProceedings{Taylor2006-small,
+  author =       "Graham Taylor and Geoffrey E. Hinton and Sam Roweis",
+  booktitle =    "NIPS 20",
+  title =        "Modeling Human Motion Using Binary Latent Variables",
+  year =         "2006",
+}
+
+@InProceedings{TaylorHintonICML2009,
+  author =    {Graham Taylor and Geoffrey Hinton},
+  title =     {Factored Conditional Restricted {Boltzmann} Machines for Modeling Motion Style},
+  booktitle = {Proceedings of the 26th International Conference on Machine Learning (ICML'09)},
+  pages =     {1025--1032},
+  year =      2009,
+  editor =    {L\'{e}on Bottou and Michael Littman},
+  address =   {Montreal},
+  month =     {June},
+  publisher = {Omnipress}
+}
+
+@InProceedings{Taylor56,
+  author =       "W. K. Taylor",
+  editor =       "C. Cherry",
+  booktitle =    "Information Theory",
+  title =        "Electrical Simulation of Some Nervous System
+                 Functional Activities",
+  publisher =    "Butterworths, London",
+  address =      "London 1985",
+  pages =        "314--328",
+  year =         "1956",
+}
+
+@InProceedings{Tebelskis91,
+  author =       "J. Tebelskis and A. Waibel and B. Petek and O.
+                 Schmidbauer",
+  editor =       NIPS3ed,
+  booktitle =    NIPS3,
+  title =        "Continuous Speech Recognition Using Linked Predictive
+                 Networks",
+  publisher =    "Morgan Kaufmann, San Mateo",
+  address =      "Denver, CO",
+  pages =        "199--205",
+  year =         "1991",
+}
+
+@Article{Teh-2003,
+  author =       "{Yee Wye} Teh and Max Welling and Simon Osindero and
+                 Geoffrey E. Hinton",
+  title =        "Energy-Based Models for Sparse Overcomplete
+                 Representations",
+  journal =      jmlr,
+  volume =       "4",
+  pages =        "1235--1260",
+  year =         "2003",
+}
+
+@InProceedings{Teh-Roweis-2003,
+  author =       "Y. Whye Teh and S. Roweis",
+  editor =       NIPS15ed,
+  booktitle =    NIPS15,
+  title =        "Automatic Alignment of Local Representations",
+  publisher =    "{MIT} Press",
+  year =         "2003",
+}
+
+@article{TehY2006,
+title=          "Hierarchical {D}irichlet Processes",
+author=         "Y. W. Teh and M. I. Jordan and M. J. Beal and D. M. Blei",
+journal=        "Journal of the American Statistical Association",
+volume=         "101",
+number=         "476",
+pages=          "1566-1581",
+year=           "2006"
+}
+
+@Article{tenenbaum00separating,
+  author =       "Joshua B. Tenenbaum and William T. Freeman",
+  title =        "Separating Style and Content with Bilinear Models",
+  journal =      "Neural Computation",
+  volume =       "12",
+  number =       "6",
+  pages =        "1247--1283",
+  year =         "2000",
+}
+
+@Article{Tenenbaum2000-isomap,
+  author =       "Joshua Tenenbaum and Vin {de Silva} and John C. Langford",
+  title =        "A Global Geometric Framework for Nonlinear
+                 Dimensionality Reduction",
+  journal =      "Science",
+  volume =       "290",
+  number =       "5500",
+  pages =        "2319--2323",
+  month =        dec,
+  year =         "2000",
+}
+
+@Article{Terrell+Scott-1992,
+  author =       "G. R. Terrell and D. W. Scott",
+  title =        "Variable Kernel Density Estimation",
+  journal =      "Annals of Statistics",
+  volume =       "20",
+  pages =        "1236--1265",
+  year =         "1992",
+}
+
+@Article{Tesauro86,
+  author =       "G. Tesauro",
+  title =        "Simple Neural Models of Classical Conditioning",
+  journal =      biocyb,
+  volume =       "55",
+  pages =        "187--200",
+  year =         "1986",
+}
+
+@Article{Tesauro88a,
+  author =       "G. Tesauro and B. Janssens",
+  title =        "Scaling Relationships in Back-Propagation Learning",
+  journal =      cs,
+  volume =       "2",
+  pages =        "39--44",
+  year =         "1988",
+}
+
+@InProceedings{Tesauro88b,
+  author =       "G. Tesauro and T. J. Sejnowski",
+  editor =       nips87ed,
+  booktitle =    nips87,
+  title =        "A ``Neural'' Network That Learns to Play Backgammon",
+  publisher =    "American Institute of Physics, New York",
+  address =      "Denver, CO",
+  pages =        "442--456",
+  year =         "1988",
+}
+
+@Article{Tesauro90,
+  author =       "G. Tesauro",
+  title =        "Neurogammon Wins Computer Olympiad",
+  journal =      nc,
+  volume =       "1",
+  pages =        "321--323",
+  year =         "1990",
+}
+
+@Article{Tesauro92,
+  author =       "G. Tesauro",
+  title =        "Practical issues in temporal difference learning",
+  journal =      "Machine Learning",
+  volume =       "8",
+  pages =        "257--277",
+  year =         "1992",
+}
+
+@Article{tesauro:1994:nc,
+  author =       "G. Tesauro",
+  title =        "{TD-Gammon}, a Self-Teaching Backgammon Program,
+                 Achieves Master-Level Play",
+  journal =      nc,
+  volume =       "6",
+  number =       "2",
+  pages =        "215--219",
+  year =         "1994",
+}
+
+@Article{Thakoor87,
+  author =       "A. P. Thakoor and A. Moopenn and J. Lambe and S. K.
+                 Khanna",
+  title =        "Electronic Hardware Implementations of Neural
+                 Networks",
+  journal =      applopt,
+  volume =       "26",
+  pages =        "5085--5092",
+  year =         "1987",
+}
+
+@InProceedings{THastie95,
+  author =       "Trevor Hastie and Patrice Simard and Eduard
+                 Sackinger",
+  editor =       NIPS7ed,
+  booktitle =    NIPS7,
+  title =        "Learning Prototype Models for Tangent Distance",
+  publisher =    "MIT Press",
+  pages =        "999--1006",
+  year =         "1995",
+}
+
+@Article{THastie98,
+  author =       "T. Hastie and P. Simard",
+  title =        "Metrics and Models for Handwritten Character
+                 Recognition",
+  journal =      "Statistical Science",
+  volume =       "13",
+  number =       "1",
+  pages =        "54--65",
+  month =        jan,
+  year =         "1998",
+  URL =          "citeseer.ist.psu.edu/hastie97metrics.html",
+}
+
+@Book{thrun+pratt-book-1998,
+  editor =       "Sebastian Thrun and Lorien Y. Pratt",
+  title =        "Learning to Learn",
+  publisher =    "Kluwer Academic",
+  year =         "1998",
+}
+
+@InProceedings{Thrun1995,
+  author =       "T. Thrun and T. Mitchell",
+  booktitle =    "Proceedings of the 14th International Joint Conference
+                 on Artificial Intelligence (IJCAI)",
+  title =        "Learning One More Thing",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo, CA",
+  month =        aug,
+  year =         "1995",
+}
+
+@Misc{thrun95,
+  author =       "S. Thrun and J. O'Sullivan",
+  title =        "Clustering learning tasks and the selective cross-task
+                 transfer of knowledge",
+  year =         "1995",
+  text =         "Technical Report CMU-CS-95-209, Carnegie Mellon
+                 University, School of Computer Science",
+}
+
+@TechReport{thrun95a,
+  author =       "Sebastian Thrun",
+  title =        "Lifelong Learning: {A} Case Study",
+  number =       "CMU-CS-95-208",
+  institution =  "School of Computer Science, Carnegie Mellon
+                 University",
+  address =      "Pittsburgh, PA 15213",
+  month =        nov,
+  year =         "1995",
+}
+
+@InProceedings{thrun95b,
+  author =       "Sebastian Thrun and Tom M. Mitchell",
+  booktitle =    "Proceedings of IJCAI-95",
+  title =        "Learning One More Thing",
+  organization = "IJCAI",
+  address =      "Montreal, Canada",
+  year =         "1995",
+}
+
+@InProceedings{Thrun96a,
+  author =       "S. Thrun",
+  editor =       NIPS8ed,
+  booktitle =    NIPS8,
+  title =        "Is Learning the $n$-th Thing Any Easier Than Learning
+                 the First?",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  pages =        "640--646",
+  year =         "1996",
+}
+
+@BOOK{Thrun96b,
+  AUTHOR         = {S. Thrun},
+  YEAR           = {1996},
+  TITLE          = {Explanation-Based Neural Network Learning: A Lifelong 
+                    Learning Approach},
+  PUBLISHER      = {Kluwer Academic Publishers},
+  ADDRESS        = {Boston, MA}
+}
+
+@Article{Tibshirani95,
+  author =       "Robert J. Tibshirani",
+  title =        "Regression shrinkage and selection via the lasso",
+  journal =      "Journal of the Royal Statistical Society B",
+  volume =       "58",
+  pages =        "267--288",
+  year =         "1995",
+}
+
+@Article{Ticknor87,
+  author =       "A. J. Ticknor and H. Barrett",
+  title =        "Optical Implementations of {Boltzmann} Machines",
+  journal =      opteng,
+  volume =       "26",
+  pages =        "16--21",
+  year =         "1987",
+}
+
+@Book{Tikhonov+Arsenin77,
+  author =       "A. N. Tikhonov and V. Y. Arsenin",
+  title =        "Solutions of Ill-posed Problems",
+  publisher =    "W. H. Winston",
+  address =      "Washington D.C.",
+  year =         "1977",
+}
+
+@InProceedings{tipping00relevance,
+  author =       "M. E. Tipping",
+  editor =       NIPS12ed,
+  booktitle =    NIPS12,
+  title =        "The Relevance Vector Machine",
+  publisher =    "MIT Press",
+  pages =        "652--658",
+  year =         "2000",
+  OPTaddress =   "Cambridge, MA",
+}
+
+@Article{tipping99mixtures,
+  author =       "M. E. Tipping and C. M. Bishop",
+  title =        "Mixtures of Probabilistic Principal Component
+                 Analysers",
+  journal =      "Neural Computation",
+  volume =       "11",
+  number =       "2",
+  pages =        "443--482",
+  year =         "1999",
+  URL =          "citeseer.nj.nec.com/tipping98mixtures.html",
+}
+
+@InProceedings{Tishby89,
+  author =       "N. Tishby and E. Levin and S. A. Solla",
+  booktitle =    ijcnn,
+  title =        "Consistent Inference of Probabilities in Layered
+                 Networks: Predictions and Generalization",
+  volume =       "2",
+  publisher =    "IEEE, New York",
+  address =      "Washington 1989",
+  pages =        "403--410",
+  year =         "1989",
+}
+
+@InProceedings{Titov+Henderson-2007,
+  author =       "Ivan Titov and James Henderson",
+  booktitle =    "Proc. 45th Meeting of Association for Computational
+                 Linguistics (ACL'07)",
+  title =        "Constituent Parsing with Incremental Sigmoid Belief
+                 Networks",
+  address =      "Prague, Czech Republic",
+  pages =        "632--639",
+  year =         "2007",
+  URL =          {http://aclweb.org/anthology-new/P/P07/P07-1080.pdf},
+}
+
+@InProceedings{ToMa00,
+  author =       "Kristina Toutanova and Christopher D. Manning",
+  booktitle =    "EMNLP/VLC 2000",
+  title =        "Enriching the Knowledge Sources Used in a Maximum
+                 Entropy Part-of-Speech Tagger",
+  pages =        "63--70",
+  year =         "2000",
+}
+
+@InProceedings{Tomita82,
+  author =       "M. Tomita",
+  booktitle =    "Proceedings of the Fourth Annual Cognitive Science
+                 Conference",
+  title =        "Dynamic Construction of Finite-state Automata from
+                 Examples Using Hill-Climbing",
+  address =      "Ann Arbor, MI",
+  pages =        "105--108",
+  year =         "1982",
+}
+
+@Book{Tong83,
+  author =       "H. Tong",
+  title =        "Threshold Models in Nonlinear Time Series Analysis",
+  publisher =    "Springer-Verlag",
+  address =      "Berlin",
+  year =         "1983",
+}
+
+@InProceedings{TongKoller2000,
+  author =       "S. Tong and D. Koller",
+  booktitle =    "Proceedings of the 17th National Conference on
+                 Artificial Intelligence (AAAI)",
+  title =        "Restricted Bayes Optimal Classifiers",
+  address =      "Austin, Texas",
+  pages =        "658--664",
+  year =         "2000",
+}
+
+@Article{Torgerson52,
+  author =       "W. Torgerson",
+  title =        "Multidimensional scaling, 1: Theory and method",
+  journal =      "Psychometrika",
+  volume =       "17",
+  pages =        "401--419",
+  year =         "1952",
+}
+
+@inproceedings{Torralba+Fergus+Weiss-2008,
+ author = {Antonio Torralba and Robert Fergus and Yair Weiss},
+ title = {Small codes and large databases for recognition},
+ booktitle = cvpr08,
+ pages = "1-8",
+ year = 2008,
+}
+
+@incollection{Torresani+Lee-2007,
+    title = {Large Margin Component Analysis},
+    author = {Lorenzo Torresani and Kuang-Chih Lee},
+    booktitle = NIPS19,
+    editor = NIPS19ed,
+    publisher = {MIT Press},
+    address = {Cambridge, MA},
+    pages = {1385--1392},
+    year = {2007}
+}
+
+@InProceedings{Torresen+al-1995,
+  author =       "J. Torresen and S. Mori and H. Nakashima and S. Tomita
+                 and O. Landsverk",
+  booktitle =    "Proceedings of the Fourth International Conference on
+                 Artificial Neural Networks",
+  title =        "Exploiting multiple degrees of {BP} parallelism on the
+                 highly parallel computer {AP1000}",
+  address =      "Cambridge, UK",
+  pages =        "483--488",
+  year =         "1995",
+}
+
+@InProceedings{Torresen+al-1995b,
+  author =       "J. Torresen and S. Tomita and O. Landsverk",
+  booktitle =    "World Congress on Neural Networks",
+  title =        "The relation of Weight Update Frequency to Convergence
+                 of {BP}",
+  address =      "Washington D.C., USA",
+  year =         "1995",
+}
+
+@Article{Torresen-1997,
+  author =       "Jim Torresen",
+  title =        "The Convergence of Backpropagation Trained Neural
+                 Networks for Various Weight Update Frequencies",
+  journal =      "International Journal of Neural Systems",
+  volume =       "8",
+  number =       "3",
+  year =         "1997",
+}
+
+@Article{Toulouse86,
+  author =       "G. Toulouse and S. Dehaene and J.-P. Changeux",
+  title =        "Spin Glass Model of Learning by Selection",
+  journal =      PNAS,
+  volume =       "83",
+  pages =        "1695--1698",
+  year =         "1986",
+}
+
+@Article{Touretzky89,
+  author =       "D. S. Touretzky and D. A. Pomerleau",
+  title =        "What's Hidden in the Hidden Layers?",
+  journal =      BYTE,
+  pages =        "227--233",
+  month =        aug,
+  year =         "1989",
+}
+
+@InProceedings{ToutanovaKMS03,
+  author =       "Kristina Toutanova and Dan Klein and Christopher D.
+                 Manning and Yoram Singer",
+  booktitle =    "HLT-NAACL",
+  title =        "Feature-Rich Part-of-Speech Tagging with a Cyclic
+                 Dependency Network.",
+  year =         "2003",
+  bibsource =    "DBLP, http://dblp.uni-trier.de",
+  ee =           "http://acl.ldc.upenn.edu/N/N03/N03-1033.pdf",
+}
+
+@InProceedings{Towell-nips92,
+  author =       "G. G. Towell and J. W. Shawlik",
+  editor =       NIPS4ed,
+  booktitle =    NIPS4,
+  title =        "Interpretation of Artificial Neural Networks: Mapping
+                 Knowledge-Based Neural Networks into Rules",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo CA",
+  year =         "1992",
+}
+
+@InProceedings{towell93,
+  author =       "G. G. Towell and J. W. Shavlik",
+  editor =       NIPS4ed,
+  booktitle =    NIPS4,
+  title =        "Interpretation of Artificial Neural Networks: Mapping
+                 Knowledge-Based Neural Networks into rules",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Meteo, CA",
+  pages =        "977--984",
+  year =         "1992",
+}
+
+@InProceedings{Towell-aaai90,
+  author =       "G. G. Towell and J. W. Shawlick and M. O. Noordewier",
+  booktitle =    "Proceedings of the Eighth National Conference on
+                 Artificial Intelligence (AAAI-90)",
+  title =        "Refinement of Approximate Domain Theories by
+                 Knowledge-Based Neural Networks",
+  pages =        "861--866",
+  year =         "1990",
+  OPTnote =      "",
+}
+
+@TechReport{TR:Breiman.arcing,
+  author =       "Leo Breiman",
+  title =        "Bias, variance, and Arcing classifiers",
+  number =       "460",
+  institution =  "Statistics Department, University of California at
+                 Berkeley",
+  year =         "1996",
+}
+
+@TechReport{TR:Breiman:edge,
+  author =       "Leo Breiman",
+  title =        "Arcing the edge",
+  number =       "486",
+  institution =  "Statistics Department, University of California at
+                 Berkeley",
+  year =         "1997",
+}
+
+@TechReport{TR:Breiman:gametheorie,
+  author =       "Leo Breiman",
+  title =        "Prediction games and arcing classifiers",
+  number =       "504",
+  institution =  "Statistics Department, University of California at
+                 Berkeley",
+  year =         "1997",
+}
+
+@TechReport{TR:Friedman+Hastie+Tibshirani:AdaBoost-theory,
+  author =       "J. Friedman and T. Hastie and R. Tibshirani",
+  title =        "Additive Logistic Regression: a Statistical View of
+                 Boosting",
+  institution =  "August 1998, Department of Statistics, Stanford
+                 University",
+  year =         "1998",
+}
+
+@TechReport{TR:Tibshirani:bias+var,
+  author =       "R. Tibshirani",
+  title =        "Bias, Variance and Prediction Error for Classification
+                 Rules",
+  institution =  "Departement od Statistics, University of Toronto",
+  year =         "1996",
+}
+
+@Article{Traven91,
+  author =       "H. G. C. Traven",
+  title =        "A neural network approach to statistical pattern
+                 classification by semiparametric estimation of
+                 probability density functions",
+  journal =      ieeetrnn,
+  volume =       "2",
+  number =       "3",
+  pages =        "366--377",
+  year =         "1991",
+}
+
+@InCollection{TreHolAhm93,
+  author =       "V. Tresp and J. Hollatz and S. Ahmad",
+  editor =       NIPS5ed,
+  booktitle =    NIPS5,
+  title =        "Network structuring and training using rule-based
+                 knowledge",
+  publisher =    "Morgan Kaufman Publishers",
+  address =      "San Mateo, CA",
+  year =         "1993",
+}
+
+@InProceedings{Tresp-nips93,
+  author =       "V. Tresp and J. Hollatz and S. Ahmad",
+  editor =       NIPS5ed,
+  booktitle =    NIPS5,
+  title =        "Network Structuring and Training Using Rule-based
+                 Knowledge",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo, CA",
+  year =         "1993",
+}
+
+@Article{tresp2001,
+  author =       "V. Tresp",
+  title =        "Scaling Kernel-Based Systems to Large Data Sets",
+  journal =      "Data Mining and Knowledge Discovery",
+  volume =       "5",
+  number =       "3",
+  pages =        "197--211",
+  year =         "2001",
+}
+
+@InCollection{Tresp94,
+  author =       "V. Tresp and S. Ahmad and R. Neuneier",
+  editor =       NIPS6ed,
+  booktitle =    NIPS6,
+  title =        "Training neural networks with deficient data",
+  publisher =    "Morgan Kaufman Publishers",
+  address =      "San Mateo, CA",
+  pages =        "128--135",
+  year =         "1994",
+}
+
+@Article{TRNN:Tsoi94,
+  author =       "A. C. Tsoi and A. Back",
+  title =        "Locally Recurrent Globally Feedforward Networks, {A}
+                 Critical Review of Architectures",
+  journal =      "IEEE Transactions on Neural Networks",
+  volume =       "5",
+  number =       "2",
+  pages =        "229--239",
+  year =         "1994",
+}
+
+@InProceedings{Tseng-1998,
+  author =       "Yuen-Hsien Tseng",
+  booktitle =    "SIGIR '98: Proceedings of the 21st Annual
+                 International ACM SIGIR Conference on Research and
+                 Development in Information Retrieval, August 24-28
+                 1998, Melbourne, Australia",
+  title =        "Multilingual Keyword Extraction for Term Suggestion",
+  publisher =    "ACM",
+  pages =        "377--378",
+  year =         "1998",
+}
+
+@Article{TsochantaridisI2005,
+  author =       "Ioannis Tsochantaridis and Thorsten Joachims and
+                 Thomas Hofmann and Yasemin Altun",
+  title =        "Large Margin Methods for Structured and Interdependent
+                 Output Variables",
+  journal =      "J. Mach. Learn. Res.",
+  volume =       "6",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA, USA",
+  pages =        "1453--1484",
+  year =         "2005",
+  ISSN =         "1533-7928",
+}
+
+@Article{Tsodyks88,
+  author =       "M. V. Tsodyks and M. V. Feigel'man",
+  title =        "The Enhanced Storage Capacity in Neural Networks with
+                 Low Activity Level",
+  journal =      eul,
+  volume =       "6",
+  pages =        "101--105",
+  year =         "1988",
+}
+
+@InProceedings{Tsoi+Pearson91,
+  author =       "A. C. Tsoi and R. A. Pearson",
+  editor =       NIPS3ed,
+  booktitle =    NIPS3,
+  title =        "Comparison of three classification techniques: {CART},
+                 {C4}.5, and multi-layer perceptron",
+  publisher =    "Morgan Kaufmann",
+  address =      "Denver, CO",
+  pages =        "",
+  year =         "1991",
+}
+
+@Book{TSP93,
+  editor =       "A. Weigend and N. Gershenfeld",
+  title =        "Time Series Prediction: Forecasting the future and
+                 understanding the past",
+  publisher =    "Addison-Wesley",
+  year =         "1993",
+}
+
+@InProceedings{Tsuda99,
+  author =       "K. Tsuda",
+  booktitle =    "ICANN'99",
+  title =        "Optimal Hyperplane Classifier based on Entropy Number
+                 Bound",
+  pages =        "419--424",
+  year =         "1999",
+}
+
+@PhdThesis{Turian07thesis,
+  author =       "Joseph Turian",
+  title =        "Constituent Parsing by Classification",
+  school =       "New York University",
+  year =         "2007",
+}
+
+@Article{tzanetakis+cook:2002,
+  author =       "George Tzanetakis and Perry Cook",
+  title =        "Musical Genre Classification of Audio Signals",
+  journal =      "IEEE Transactions on Speech and Audio Processing",
+  volume =       "10",
+  number =       "5",
+  pages =        "293--302",
+  month =        jul,
+  year =         "2002",
+}
+
+@Article{Uberbacher91,
+  author =       "E. C. Uberbacher and R. J. Mural",
+  title =        "Locating protein-coding regions in human {DNA}
+                 sequences by a multiple sensor-neural network
+                 approach",
+  journal =      "Proc. Natl. Acad. Sci. USA",
+  volume =       "88",
+  pages =        "11261--11265",
+  year =         "1991",
+}
+
+@Article{Uhrig91,
+  author =       "R. E. Uhrig",
+  title =        "Potential Applications of Neural Networks to the
+                 Operation of a Nuclear Power Plant",
+  journal =      "Nuclear Safety",
+  volume =       "32",
+  number =       "1",
+  year =         "1991",
+}
+
+@Article{Uhrig94,
+  author =       "R. E. Uhrig",
+  title =        "Artificial Neural Networks in Nuclear Power Plants",
+  journal =      "Nuclear News",
+  volume =       "37",
+  number =       "9",
+  pages =        "38",
+  year =         "1994",
+}
+
+@Article{Utgoff-2002,
+  author =       "Paul E. Utgoff and David J. Stracuzzi",
+  title =        "Many-Layered Learning",
+  journal =      "Neural Computation",
+  volume =       "14",
+  pages =        "2497--2539",
+  year =         "2002",
+}
+
+@Article{Valiant84,
+  author =       "L. G. Valiant",
+  title =        "A Theory of the Learnable",
+  journal =      "Communications of the ACM",
+  volume =       "27",
+  number =       "11",
+  pages =        "1134--1142",
+  year =         "1984",
+}
+
+@InProceedings{VandenBout88,
+  author =       "D. E. Van den Bout and T. K. Miller",
+  booktitle =    icnn,
+  title =        "A Travelling Salesman Objective Function That Works",
+  volume =       "2",
+  publisher =    "IEEE, New York",
+  address =      "San Diego 1988",
+  pages =        "299--303",
+  year =         "1988",
+}
+
+@Article{VandenBout89,
+  author =       "D. E. Van den Bout and T. K. Miller",
+  title =        "Improving the Performance of the Hopfield-Tank Neural
+                 Network Through Normalization and Annealing",
+  journal =      biocyb,
+  volume =       "62",
+  pages =        "129--139",
+  year =         "1989",
+}
+
+@Article{VanDerMaaten08,
+  author =       "Laurens {van der Maaten} and Geoffrey E. Hinton",
+  title =        {Visualizing Data using t-SNE},
+  journal =      jmlr,
+  year =         "2008",
+  keywords =     {dimension-reduction, locality, nearest-neighbors, spectral, visualization},
+  month =        {November},
+  pages =        {2579--2605},
+  url =          {http://www.jmlr.org/papers/volume9/vandermaaten08a/vandermaaten08a.pdf},
+  volume =       {9},
+}
+
+@Book{VanDerVaart+Wellner-1996,
+  author =       "A. W. {van der Vaart} and J. Wellner",
+  title =        "Weak Convergence and Empirical Processes with
+                 applications to Statistics",
+  publisher =    "Springer",
+  address =      "New York",
+  year =         "1996",
+}
+
+@Article{vanHemmen79,
+  author =       "J. L. van Hemmen and R. G. Palmer",
+  title =        "The Replica Method and a Solvable Spin Glass Model",
+  journal =      jpa,
+  volume =       "12",
+  pages =        "563--580",
+  year =         "1979",
+}
+
+@Article{vanHemmen86,
+  author =       "J. L. van Hemmen and R. K{\"u}hn",
+  title =        "Nonlinear Neural Networks",
+  journal =      prl,
+  volume =       "57",
+  pages =        "913--916",
+  year =         "1986",
+}
+
+@Article{vanHemmen90,
+  author =       "J. L. van Hemmen and L. B. Ioffe and R. K{\"u}hn and
+                 M. Vaas",
+  title =        "Increasing the Efficiency of a Neural Network through
+                 Unlearning",
+  journal =      physicaA,
+  volume =       "163",
+  pages =        "386--392",
+  year =         "1990",
+}
+
+% HUGO: Haven't found what A. stands for...
+@Article{VapnikV63,
+  author =       "Vladimir Vapnik and A. Lerner", 
+  title =        "Pattern Recognition using Generalized Portrait Method",
+  journal =      "Automation and Remote Control",
+  volume =       "24",
+  year =         "1963",
+}
+
+@Article{Vapnik71,
+  author =       "V. N. Vapnik and A. Y. Chervonenkis",
+  title =        "On the Uniform Convergence of Relative Frequencies of
+                 Events to Their Probabilities",
+  journal =      tprobapp,
+  volume =       "16",
+  pages =        "264--280",
+  year =         "1971",
+}
+
+@Book{Vapnik82,
+  author =       "V. N. Vapnik",
+  title =        "Estimation of Dependences Based on Empirical Data",
+  publisher =    "Springer-Verlag",
+  address =      "Berlin",
+  year =         "1982",
+}
+
+@Article{Vapnik93,
+  author =       "V. Vapnik and L. Bottou",
+  title =        "Local algorithms for pattern recognition and
+                 dependencies estimation",
+  journal =      nc,
+  volume =       "5",
+  number =       "6",
+  pages =        "893--909",
+  year =         "1993",
+}
+
+@Book{Vapnik95,
+  author =       "V. N. Vapnik",
+  title =        "The Nature of Statistical Learning Theory",
+  publisher =    "Springer",
+  address =      "New York",
+  year =         "1995",
+}
+
+@Book{Vapnik98,
+  author =       "Vladimir Vapnik",
+  title =        "Statistical Learning Theory",
+  publisher =    "Wiley, Lecture Notes in Economics and Mathematical
+                 Systems, volume 454",
+  year =         "1998",
+}
+
+@InCollection{variational99,
+  author =       "M. I. Jordan and Z. Ghahramani and T. Jaakkola and L.
+                 Saul",
+  editor =       "M. I. Jordan",
+  booktitle =    "Learning in Graphical Models",
+  title =        "An introduction to variational methods in graphical
+                 models",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  year =         "1999",
+}
+
+@InProceedings{Venka+PC-2004,
+  author =       "Shailaja Venkatsubramanyan and Jose Perez-Carballo",
+  booktitle =    "Second ACL Workshop on Multiword Expressions",
+  title =        "Multiword Expression Filtering for Building Knowledge
+                 Maps",
+  pages =        "40--47",
+  year =         "2004",
+}
+
+@InProceedings{Verbeek-2004,
+  author =       "Jakob J. Verbeek and Sam T. Roweis and Nikos Vlassis",
+  editor =       NIPS16ed,
+  booktitle =    NIPS16,
+  title =        "Non-linear {CCA} and {PCA} by Alignment of Local
+                 Models",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  year =         "2004",
+  keywords =     "dimensionality reduction, spectral methods, mixture
+                 density, CCA, PCA",
+}
+
+@InProceedings{Veronis1990,
+  author =       "Jean Veronis and Nancy Ide",
+  booktitle =    "COLING'90",
+  title =        "Word Sense Disambiguation with Very Large Neural
+                 Networks Extracted from Machine Readable Dictionaries",
+  year =         "1990",
+}
+
+@Misc{Veronis98,
+  author =       "Jean Veronis",
+  title =        "A study of polysemy judgements and inter-annotator
+                 agreement",
+  year =         "1998",
+  URL =          "citeseer.nj.nec.com/veronis98study.html",
+  text =         "Veronis, J., 1998. A study of polysemy judgements and
+                 inter-annotator agreement. In Programme and advanced
+                 papers of the Senseval workshop. Herstmonceux Castle,
+                 England.",
+}
+
+@InProceedings{Vilalta+al-1997,
+  author =       "Ricardo Vilalta and Gunnar Blix and Larry Rendell",
+  booktitle =    ECML97,
+  title =        "Global Data Analysis and the Fragmentation Problem in
+                 Decision Tree Induction",
+  publisher =    "Springer-Verlag",
+  pages =        "312--327",
+  year =         "1997",
+}
+
+@InProceedings{Vincent-Bengio-2003-short,
+  author =       "Pascal Vincent and Yoshua Bengio",
+  booktitle =    NIPS15,
+  title =        "Manifold Parzen Windows",
+  publisher =    "MIT Press",
+  year =         "2003",
+}
+
+@TechReport{Vincent-TR1316-small,
+  author =       "P. Vincent and H. Larochelle and Y. Bengio and P.-A.
+                 Manzagol",
+  title =        "Extracting and Composing Robust Features with
+                 Denoising Autoencoders",
+  number =       "1316",
+  institution =  "Universit\'e de Montr\'eal, dept. IRO",
+  year =         "2008",
+}
+
+@Article{Vincent2001,
+  author =       "P. Vincent and Y. Bengio",
+  title =        "Kernel Matching Pursuit",
+  journal =      "Machine Learning",
+  volume =       "48",
+  number =       "",
+  pages =        "165--187",
+  year =         "2002",
+}
+
+@InProceedings{Vincent2002,
+  author =       "P. Vincent and Y. Bengio",
+  editor =       NIPS14ed,
+  booktitle =    NIPS14,
+  title =        "{K}-Local Hyperplane and Convex Distance Nearest
+                 Neighbor Algorithms",
+  publisher =    "{MIT} Press",
+  address =      "Cambridge, MA",
+  pages =        "985--992",
+  year =         "2002",
+}
+
+@InProceedings{VincentPLarochelleH2008-small,
+  author =       "Pascal Vincent and Hugo Larochelle and Yoshua Bengio
+                 and Pierre-Antoine Manzagol",
+  booktitle =    "ICML 2008",
+  title =        "Extracting and Composing Robust Features with
+                 Denoising Autoencoders",
+  year =         "2008",
+}
+
+@InProceedings{VincentPLarochelleH2008-short,
+  author =       "Pascal Vincent and Hugo Larochelle and Yoshua Bengio
+                 and Pierre-Antoine Manzagol",
+  booktitle =    "Int. Conf. Mach. Learn.",
+  title =        "Extracting and Composing Robust Features with
+                 Denoising Autoencoders",
+  year =         "2008",
+  pages =        "1096--1103"
+}
+
+
+@InProceedings{vincent:icml08,
+   author =     "Pascal Vincent and Hugo Larochelle and Yoshua Bengio and {Pierre-Antoine Manzagol}",
+   title =      "Extracting and composing robust features with denoising autoencoders",
+   booktitle =  "Proceedings of the 25th Annual International Conference on Machine Learning (ICML 2008)",
+   location =   "Helsinki, Finland",
+   editor =     "Andrew McCallum and Sam Roweis",
+   publisher =  "Omnipress",
+   year =       "2008",
+   pages =      "1096--1103",
+}
+   %url =        "http://icml2008.cs.helsinki.fi/papers/592.pdf",
+
+@InProceedings{VincentPLarochelleH2008-very-small,
+  author =       "P. Vincent and H. Larochelle and Y. Bengio and P.-A.
+                 Manzagol",
+  booktitle =    "ICML 2008",
+  title =        "Extracting and Composing Robust Features with
+                 Denoising Autoencoders",
+  year =         "2008",
+}
+
+@Article{Viterbi67,
+  author =       "A. Viterbi",
+  title =        "Error bounds for convolutional codes and an
+                 asymptotically optimum decoding algorithm",
+  journal =      ieeeit,
+  pages =        "260--269",
+  year =         "1967",
+}
+
+@InProceedings{Vlachos-2002,
+  author =       "Michail Vlachos and Carlotta Domeniconi and Dimitrios
+                 Gunopulos and George Kollios and Nick Koudas",
+  booktitle =    "Proc. of 8th SIGKDD",
+  title =        "Non-Linear Dimensionality Reduction Techniques for
+                 Classification and Visualization",
+  address =      "Edmonton, Canada",
+  year =         "2002",
+  URL =          "citeseer.ist.psu.edu/573153.html",
+}
+
+@Article{vogl-88,
+  author =       "T. Vogl and J. Mangis and J. Rigler and W. Zink and D.
+                 Alkon",
+  title =        "accelerating convergence of the back-propagation
+                 method",
+  journal =      "Biological Cybernetics",
+  volume =       "59",
+  pages =        "257--263",
+  year =         "1988",
+}
+
+@Article{Vogl88,
+  author =       "T. P. Vogl and J. K. Mangis and A. K. Rigler and W. T.
+                 Zink and D. L. Alkon",
+  title =        "Accelerating the Convergence of the Back-Propagation
+                 Method",
+  journal =      biocyb,
+  volume =       "59",
+  pages =        "257--263",
+  year =         "1988",
+}
+
+@Book{Volterra,
+  author =       "V. Volterra",
+  title =        "Theory of Functionals and of Integrals and
+                 Integro-Differential Equations",
+  publisher =    "Dover",
+  address =      "New York",
+  year =         "1959",
+}
+
+@Article{vonderMalsburg73,
+  author =       "Ch. von der Malsburg",
+  title =        "Self-Organization of Orientation Sensitive Cells in
+                 the Striate Cortex",
+  journal =      kyb,
+  volume =       "14",
+  year =         "1973",
+}
+
+@Article{vonderMalsburg82,
+  author =       "Ch. von der Malsburg and J. D. Cowan",
+  title =        "Outline of a Theory for the Ontogenesis of
+                 Iso-Orientation Domains in Visual Cortex",
+  journal =      biocyb,
+  volume =       "45",
+  pages =        "49--56",
+  year =         "1982",
+}
+
+@InProceedings{vonLehman88,
+  author =       "A. von Lehman and E. G. Paek and P. F. Liao and A.
+                 Marrakchi and J. S. Patel",
+  booktitle =    icnn,
+  title =        "Factors Influencing Learning by Back-Propagation",
+  volume =       "1",
+  publisher =    "IEEE, New York",
+  address =      "San Diego 1988",
+  pages =        "335--341",
+  year =         "1988",
+}
+
+@Article{vonLuxurg07,
+  author =       "U. von Luxburg",
+  title =        "A Tutorial on Spectral Clustering.",
+  journal =      "Statistics and Computing",
+  volume = 	 "17(4)",
+  pages =        "395-416",
+  year =         "2007",
+}
+
+@InCollection{vonNeumann56,
+  author =       "J. von Neumann",
+  editor =       "C. E. Shannon and J. McCarthy",
+  booktitle =    "Automata Studies",
+  title =        "Probabilistic Logics and the Synthesis of Reliable
+                 Organisms from Unreliable Components",
+  publisher =    "Princeton University Press",
+  address =      "Princeton",
+  pages =        "43--98",
+  year =         "1956",
+}
+
+@Article{Wagner87,
+  author =       "K. Wagner and D. Psaltis",
+  title =        "Multilayer Optical Learning Networks",
+  journal =      applopt,
+  volume =       "26",
+  pages =        "5061--5076",
+  year =         "1987",
+}
+
+@InCollection{Wahba82,
+  author =       "G. Wahba",
+  editor =       "Gupta and Berger",
+  booktitle =    "Statistical Decision Theory and Related Topics III",
+  title =        "Constrained regularization for ill-posed linear
+                 operator equations, with applications in meteorology
+                 and medecine",
+  publisher =    "Academic Press",
+  year =         "1982",
+}
+
+@InProceedings{Wahba90,
+  author =       "G. Wahba",
+  booktitle =    "CBMS-NSF Regional Conference Series in Applied
+                 Mathematics",
+  title =        "Spline models for observational data",
+  volume =       "59",
+  publisher =    "Society for Industrial and Applied Mathematics
+                 (SIAM)",
+  address =      "Philadelphia, PA",
+  year =         "1990",
+}
+
+@Article{Waibel89a,
+  author =       "A. Waibel",
+  title =        "Modular Construction of Time-Delay Neural Networks for
+                 Speech Recognition",
+  journal =      nc,
+  volume =       "1",
+  pages =        "39--46",
+  year =         "1989",
+}
+
+@Article{Waibel89b,
+  author =       "A. Waibel and T. Hanazawa and G. E. Hinton and K.
+                 Shikano and K. Lang",
+  title =        "Phoneme Recognition Using Time-Delay Neural Networks",
+  journal =      ieeetassp,
+  volume =       "37",
+  pages =        "328--339",
+  year =         "1989",
+}
+
+@Article{Waibel89c,
+  author =       "A. Waibel and H Sawai and K. Shikano",
+  title =        "Modularity and Scaling in Large Phonemic Neural
+                 Networks",
+  journal =      ieeetassp,
+  volume =       "37",
+  pages =        "1888--1898",
+  year =         "1989",
+}
+
+@Article{Wallace+Boulton-1968,
+  author =       "C. S. Wallace and D. M. Boulton",
+  title =        "An information measure for classification",
+  journal =      "Computer Journal",
+  volume =       "11",
+  number =       "2",
+  pages =        "185--194",
+  year =         "1968",
+}
+
+@InCollection{Wan93,
+  author =       "Wan E. A.",
+  editor =       "A. S. Weigend and N. A. Gershenfeld",
+  booktitle =    "Time Series Prediction: Forecasting the Future and
+                 Understanding the Past",
+  title =        "Time series prediction by using a connectionist
+                 network with internal delay lines",
+  publisher =    "Addison-Wesley",
+  pages =        "195--217",
+  year =         "1993",
+}
+
+@InCollection{Wan93a,
+  author =       "E. A. Wan",
+  editor =       "A. Weigend and N. Gershenfeld",
+  booktitle =    "Predicting the future and understanding the past",
+  title =        "Time Series Prediction by Using a Connectionist
+                 Network with Internal Delay Lines",
+  publisher =    "Addison-Wesley",
+  address =      "Redwood City, CA",
+  pages =        "175--193",
+  year =         "1993",
+}
+
+@InProceedings{Wang-ijcnn91,
+  author =       "S. D. Wang and C. H. Hsu",
+  booktitle =    ijcnn,
+  title =        "Terminal Attractor Learning Algorithms for
+                 Backpropagation Neural Networks",
+  publisher =    "IEEE Press",
+  address =      "Singapore",
+  pages =        "183--189",
+  month =        nov,
+  year =         "1991",
+}
+
+@INPROCEEDINGS{WangC1994,
+    author = {Changfeng Wang and Santosh S. Venkatesh and J. Stephen Judd},
+    title = {Optimal stopping and effective machine complexity in learning},
+    editor = NIPS6ed,
+    booktitle = NIPS6,
+    year = {1994},
+    pages = {303--310},
+    publisher = {Morgan Kaufmann}
+}
+
+@inproceedings{wangetal08,
+author = "Wang, Q. and Lin, D. and Schuurmans, D.",
+title = "Semi-supervised convex training for dependency parsing",
+booktitle = "Proceedings of the Forty-sixth Annual Conference of the 
+Association for Computational Linguistics: Human Language Technologies (ACL)",
+year = 2008,
+note = "Acceptance rate 25\%; Wang a trainee"
+}
+
+@inproceedings{wangetal07,
+author = "Wang, T. and Lizotte, D. and Bowling, M. and Schuurmans, D.",
+title = "Stable dual dynamic programming",
+editor =       NIPS20ed,
+booktitle =    NIPS20,
+year = 2007,
+note = "Acceptance rate 22\%; Wang and Lizotte trainees"
+}
+
+
+@Misc{Wang02,
+  author =       "L. Wang and K. Luk Chan",
+  howpublished =    "6th kernel machines workshop, in conjunction with Neural Information Processing Systems (NIPS)",
+  title =        "Learning Kernel Parameters by using Class Separability
+                 Measure",
+  year =         "2002",
+  url =          "http://users.rsise.anu.edu.au/~wanglei/#Publication",
+}
+
+@Article{Wang89,
+  author =       "H. Wang and J. Wu and P. Tang",
+  title =        "Superfamily expands",
+  journal =      "Nature",
+  volume =       "337",
+  pages =        "514",
+  year =         "1989",
+}
+
+@InProceedings{WangHarper2002,
+  author =       "Wen Wang and Mary P. Harper",
+  booktitle =    "EMNLP '02: Proceedings of the ACL-02 conference on
+                 Empirical methods in natural language processing",
+  title =        "The Super{ARV} language model: investigating the
+                 effectiveness of tightly integrating multiple knowledge
+                 sources",
+  publisher =    "Association for Computational Linguistics",
+  address =      "Morristown, NJ, USA",
+  pages =        "238--247",
+  year =         "2002",
+}
+
+@Article{Warmuth95,
+  author =       "Sally Floyd and Manfred Warmuth",
+  title =        "Sample Compression, Learnability, and the
+                 Vapnik-Chervonenkis Dimension",
+  journal =      "Machine Learning",
+  volume =       "21",
+  number =       "3",
+  pages =        "269--304",
+  year =         "1995",
+}
+
+@Book{Wasserman-2004,
+  author =       "Larry Wasserman",
+  title =        "All of Statistics - A Concise Course in Statistical Inference",
+  publisher =    "Springer",
+  year =         "2004",
+}
+
+@PhdThesis{Watkins-PhD,
+  author =       "C. J. C. H. Watkins",
+  title =        "Learning from Delayed Rewards",
+  school =       "Cambridge University",
+  address =      "Cambridge, England",
+  year =         "1989",
+}
+
+@InProceedings{Watrous87,
+  author =       "R. L. Watrous",
+  editor =       "M. Caudill and C. Butler",
+  booktitle =    icnn,
+  title =        "Learning Algorithms for Connectionist Networks:
+                 Applied Gradient Methods of Nonlinear Optimization",
+  volume =       "2",
+  publisher =    "IEEE, New York",
+  address =      "San Diego 1987",
+  pages =        "619--627",
+  year =         "1987",
+}
+
+@TechReport{Watrous89,
+  author =       "R. L. Watrous",
+  title =        "Context-modulated discrimination of similar vowels
+                 using second-order connectionist networks",
+  number =       "{CRG-TR}-89-5",
+  institution =  "University of Toronto",
+  year =         "1989",
+}
+
+@Article{Watrous-nc92,
+  author =       "R. L. Watrous and G. M. Kuhn",
+  title =        "Induction of Finite-State Languages Using Second-Order
+                 Recurrent Networks",
+  journal =      nc,
+  volume =       "4",
+  number =       "3",
+  pages =        "406--414",
+  year =         "1992",
+}
+
+@Article{Watson64,
+  author =       "G. S. Watson",
+  title =        "Smooth regression analysis",
+  journal =      "Sankhya - The Indian Journal of Statistics",
+  volume =       "26",
+  pages =        "359--372",
+  year =         "1964",
+}
+
+@inproceedings{Weber-2000,
+ author = {Markus Weber and Max Welling and Pietro Perona},
+ title = {Unsupervised Learning of Models for Recognition},
+ booktitle = {Proc. 6th Europ. Conf. Comp. Vis., ECCV2000}, 
+ address = {Dublin},
+ year = 2000,
+ pages     = {18-32},
+ url       = {http://link.springer.de/link/service/series/0558/bibs/1842/18420018.htm},
+}
+
+@Book{Webster88,
+  editor =       "Webster",
+  title =        "Webster's Ninth New Collegiate Dictionary",
+  publisher =    "Merriam-Webster",
+  address =      "Springfield",
+  year =         "1988",
+}
+
+@Book{Wegener87,
+  author =       "Ingo Wegener",
+  title =        "The Complexity of Boolean Functions",
+  publisher =    "John Wiley \& Sons",
+  year =         "1987",
+}
+
+@InCollection{Weigend93,
+  author =       "N. A. Gershenfeld and A. S. Weigend",
+  editor =       "A. Weigend and N. Gershenfeld",
+  booktitle =    "Predicting the future and understanding the past",
+  title =        "The Future of Time Series: Learning and
+                 Understanding",
+  publisher =    "Addison-Wesley",
+  address =      "Redwood City, CA",
+  pages =        "1--70",
+  year =         "1993",
+}
+
+@Article{Weigend95,
+  author =       "A. S. Weigend and A. N. Srivastava",
+  title =        "Predicting Conditional Probability Distributions: {A}
+                 Connectionist Approach",
+  journal =      "International Journal of Neural Systems",
+  volume =       "6",
+  year =         "1995",
+}
+
+@InProceedings{Weinberger+Saul-06,
+  author =       "K. Q. Weinberger and L. K. Saul",
+  booktitle =    "Proceedings of the National Conference on Artificial
+                 Intelligence (AAAI)",
+  title =        "An Introduction to Nonlinear Dimensionality Reduction
+                 by Maximum Variance Unfolding",
+  address =      "Boston, MA",
+  year =         "2006",
+}
+
+@InProceedings{weinberger-learningkernel-04,
+  author =       "Kilian Q. Weinberger and Fei Sha and Lawrence K. Saul",
+  booktitle =    ICML04,
+  editor =       ICML04ed,
+  publisher =    ICML04publ,
+  title =        "Learning a kernel matrix for nonlinear dimensionality
+                 reduction",
+  address =      "Banff, Canada",
+  pages =        "839--846",
+  year =         "2004",
+}
+
+@InProceedings{Weinberger04a,
+  author =       "K. Q. Weinberger and L. K. Saul",
+  booktitle =    cvpr04,
+  title =        "Unsupervised Learning of Image Manifolds by
+                 Semidefinite Programming",
+  volume =       "2",
+  address =      "Washington D.C.",
+  pages =        "988--995",
+  year =         "2004",
+}
+
+@Article{weinberger95,
+  author =       "M. J. Weinberger and J. Rissanen and M. Feder",
+  title =        "A universal finite memory source",
+  journal =      "IEEE Transactions on Information Theory",
+  pages =        "656--664",
+  year =         "1983",
+}
+
+@InCollection{WeinbergerK2006,
+  author =       "Kilian Q. Weinberger and John Blitzer and Lawrence K. Saul",
+  editor =       NIPS18ed,
+  booktitle =    NIPS18,
+  title =        "Distance Metric Learning for Large Margin Nearest
+                 Neighbor Classification",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  pages =        "1473--1480",
+  year =         "2006",
+}
+
+@conference{WeinbergerK2007,
+  author = {Kilian Q. Weinberger and Gerald Tesauro},
+  title = {Metric Learning for Kernel Regression},
+  booktitle = {Proc. of the 11 thInternational Conference on Artificial Intelligence and Statistics},
+  year = {2007},
+}
+  %url = {http://www.stat.umn.edu/~aistat/proceedings/data/papers/077.pdf}
+
+@Article{Weingartner,
+  author =       "H. M. Weingartner and D. N. Ness",
+  title =        "Methods for the Solution of the Multi-Dimensional 0/1
+                 Knapsack Problem",
+  journal =      "Operations Research",
+  volume =       "15",
+  pages =        "83--103",
+  year =         "1967",
+}
+
+@Article{Weisbuch85,
+  author =       "G. Weisbuch and F. Fogelman-Souli\'e",
+  title =        "Scaling Laws for the Attractors of Hopfield Networks",
+  journal =      jppl,
+  volume =       "46",
+  pages =        "623--630",
+  year =         "1985",
+}
+
+@InProceedings{Weiss-99,
+  author =       "Yair Weiss",
+  booktitle =    ICCV99,
+  title =        "Segmentation using eigenvectors: a unifying view",
+  pages =        "975--982",
+  year =         "1999",
+}
+
+@Article{Weiss2000,
+  author =       "Yair Weiss",
+  title =        "Correctness of local probability propagation in
+                 graphical models with loops",
+  journal =      "Neural Computation",
+  volume =       "12",
+  pages =        "1--41",
+  year =         "2000",
+}
+
+@Book{Weiss90,
+  author =       "S. M. Weiss and C. A. Kulikowski",
+  title =        "Computer Systems That Learn",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo, CA",
+  year =         "1990",
+}
+
+@InProceedings{Welling05,
+  author =       "Max Welling and Michal Rosen-Zvi and Geoffrey E. Hinton",
+  editor =       NIPS17ed,
+  booktitle =    NIPS17,
+  title =        "Exponential Family Harmoniums with an Application to
+                 Information Retrieval",
+  volume =       "17",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  year =         "2005",
+}
+
+@InProceedings{Welling05-small,
+  author =       "M. Welling and M. Rosen-Zvi and G. E. Hinton",
+  booktitle =    "NIPS 17",
+  title =        "Exponential Family Harmoniums with an Application to
+                 Information Retrieval",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  year =         "2005",
+}
+
+@InProceedings{Welling2003,
+  author =       "Max Welling and Richard Zemel and Geoffrey E. Hinton",
+  editor =       NIPS15ed,
+  booktitle =    NIPS15,
+  title =        "Self-Supervised Boosting",
+  publisher =    "{MIT} Press",
+  pages =        "665--672",
+  year =         "2003",
+}
+
+@InProceedings{WellingM2002,
+  author =       "Max Welling and Geoffrey E. Hinton",
+  booktitle =    "ICANN '02: Proceedings of the International Conference
+                 on Artificial Neural Networks",
+  title =        "A New Learning Algorithm for Mean Field {Boltzmann}
+                 Machines",
+  publisher =    "Springer-Verlag",
+  address =      "London, UK",
+  pages =        "351--357",
+  year =         "2002",
+  ISBN =         "3-540-44074-7",
+}
+
+@InProceedings{WellingNIPS17,
+  author =       "Max Welling and Michal Rosen-Zvi and Geoffrey E. Hinton",
+  editor =       NIPS17ed,
+  booktitle =    NIPS17,
+  title =        "Exponential Family Harmoniums with an Application to
+                 Information Retrieval",
+  publisher =    "{MIT} Press",
+  address =      {Cambridge, MA},
+  pages =        "1481--1488",
+  year =         "2005",
+}
+
+@InProceedings{WellingNIPS17-small,
+  author =       "M. Welling and M. Rosen-Zvi and G. E. Hinton",
+  booktitle =    "NIPS 17",
+  title =        "Exponential Family Harmoniums with an Application to
+                 Information Retrieval",
+  publisher =    "{MIT} Press",
+  year =         "2005",
+}
+
+
+@InProceedings{WellingUAI2009,
+  author =       "Max Welling",
+  booktitle =    UAI09,
+  title =        "Herding Dynamic Weights for Partially Observed Random Field Models",
+  publisher =    "Morgan Kaufmann",
+  year =         "2009",
+}
+
+@InProceedings{WellingICML2009,
+  author =       "Max Welling",
+  booktitle =    ICML09,
+  editor =       ICML09ed,
+  publisher =    ICML09publ,
+  title =        {Herding Dynamic Weights to Learn},
+  year =         "2009",
+}
+
+@InProceedings{Werbos-icnn88,
+  author =       "P. J. Werbos",
+  booktitle =    icnn,
+  title =        "Back-Propagation: Past and Future",
+  publisher =    "IEEE Press",
+  address =      "New York, NY",
+  year =         "1988",
+  OPTpages =     "343--353",
+}
+
+@PhdThesis{Werbos74,
+  author =       "P. Werbos",
+  title =        "Beyond Regression: New Tools for Prediction and
+                 Analysis in the Behavioral Sciences",
+  school =       "Harvard University",
+  year =         "1974",
+}
+
+@Article{Werbos87,
+  author =       "P. J. Werbos",
+  title =        "Building and Understanding Adaptive Systems: {A}
+                 Statistical/Numerical Approach to Factory Automation
+                 and Brain Research",
+  journal =      ieeesmc,
+  volume =       "17",
+  pages =        "7--20",
+  year =         "1987",
+}
+
+@Article{Werbos88,
+  author =       "P. J. Werbos",
+  title =        "Generalization of Backpropagation with Application to
+                 a Recurrent Gas Market Model",
+  journal =      nn,
+  volume =       "1",
+  pages =        "339--356",
+  year =         "1988",
+}
+
+@InProceedings{wermuth+cox92,
+  author =       "N. Wermuth and D. R. Cox",
+  booktitle =    "Proceedings of the 10th Symposium on Computational
+                 Statistics",
+  title =        "Graphical models for dependencies and associations",
+  volume =       "1",
+  address =      "Physica, Heidelberg",
+  pages =        "235--249",
+  year =         "1992",
+}
+
+@Article{wermuth+lauritzen90,
+  author =       "N. Wermuth and S. L. Lauritzen",
+  title =        "On substantive research hypotheses, conditional
+                 independence graphs and graphical chain models",
+  journal =      "J. Roy. Statist. Soc. Ser. B",
+  volume =       "52",
+  pages =        "21--72",
+  year =         "1990",
+}
+
+@Article{Wessels-trnn92,
+  author =       "L. F. A. Wessels and E. Barnad",
+  title =        "Avoiding False Local Minima by Proper Initialization
+                 of Connections",
+  journal =      ieeetrnn,
+  volume =       "3",
+  number =       "6",
+  pages =        "899--905",
+  year =         "1992",
+}
+
+@Article{weston03zeronorm,
+  author =       "Jason Weston and Andr\'e Elisseeff and Bernhard
+                 Sch{\"o}lkopf and Mike Tipping",
+  title =        "Use of the zero norm with linear models and kernel
+                 methods",
+  journal =      jmlr,
+  volume =       "3",
+  publisher =    "MIT Press",
+  pages =        "1439--1461",
+  year =         "2003",
+  ISSN =         "1533-7928",
+}
+
+@InProceedings{weston99density,
+  author =       "J. Weston and A. Gammerman and M. Stitson and V.
+                 Vapnik and V. Vovk and C. Watkins",
+  editor =       "B. {Sch\"olkopf} and C. J. C. Burges and A. J. Smola",
+  booktitle =    "Advances in Kernel Methods --- Support Vector
+                 Learning",
+  title =        "Density estimation using support vector machines",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  pages =        "293--306",
+  year =         "1999",
+}
+
+@InProceedings{WestonJ2008,
+  author =       "Jason Weston and {Fr\'ed\'eric} Ratle and Ronan
+                 Collobert",
+  booktitle =    ICML08,
+  editor =       ICML08ed,
+  publisher =    ICML08publ,
+  title =        "Deep Learning via Semi-Supervised Embedding",
+  year =         "2008",
+  isbn =         {978-1-60558-205-4},
+  pages =        {1168--1175},
+  location =     {Helsinki, Finland},
+  doi =          {http://doi.acm.org/10.1145/1390156.1390303},
+  address =      {New York, NY, USA},
+}
+  %url =          "http://www.kyb.tuebingen.mpg.de/bs/people/weston/papers/deep-embed.pdf",
+
+@InProceedings{WestonJ2008-small,
+  author =       "J. Weston and F. Ratle and R. Collobert",
+  booktitle =    "ICML 2008",
+  title =        "Deep Learning via Semi-Supervised Embedding",
+  year =         "2008",
+}
+
+@InProceedings{WestonJ2008-short,
+  author =       "J. Weston and F. Ratle and R. Collobert",
+  booktitle =    "Int. Conf. Mach. Learn. 2008",
+  title =        "Deep Learning via Semi-Supervised Embedding",
+  year =         "2008",
+  pages = {1168--1175},
+}
+
+@InProceedings{MobahiCollobertWestonICML2009,
+  author =    {Hossein Mobahi and Ronan Collobert and Jason Weston},
+  title =     {Deep Learning from Temporal Coherence in Video},
+  booktitle = {Proceedings of the 26th International Conference on Machine Learning},
+  pages =     {737--744},
+  year =      2009,
+  editor =    {L\'{e}on Bottou and Michael Littman},
+  address =   {Montreal},
+  month =     {June},
+  publisher = {Omnipress}
+}
+
+@Article{White89,
+  author =       "H. White",
+  title =        "Learning in Artificial Neural Networks: {A}
+                 Statistical Perspective",
+  journal =      "Neural Computation",
+  volume =       "1",
+  type =         "Review",
+  number =       "4",
+  pages =        "425--464",
+  year =         "1989",
+}
+
+@Article{White90,
+  author =       "H. White",
+  title =        "Connectionist nonparametric regression: {Multilayer}
+                 feedforward networks can learn arbitrary mappings",
+  journal =      "Neural Networks",
+  volume =       "3",
+  number =       "5",
+  publisher =    "Pergamon Press Ltd., Inc.",
+  pages =        "535--549",
+  year =         "1990",
+}
+
+@InProceedings{White91,
+  author =       "H. White",
+  booktitle =    "?",
+  title =        "An overview of representation and convergence results
+                 for multilayer feedforward networks",
+  pages =        "",
+  year =         "1991",
+}
+
+@InProceedings{Whitley89,
+  author =       "D. Whitley and T. Hanson",
+  editor =       "J. D. Schaffer",
+  booktitle =    "Proceedings of the Third International Conference on
+                 Genetic Algorithms",
+  title =        "Optimizing Neural Networks Using Faster, More Accurate
+                 Genetic Search",
+  publisher =    "Morgan Kaufmann, San Mateo",
+  address =      "Arlington 1989",
+  pages =        "391--396",
+  year =         "1989",
+}
+
+@Book{whittaker90,
+  author =       "J. Whittaker",
+  title =        "Graphical Models in Applied Multivariate Statistics",
+  publisher =    "Wiley, Chichester",
+  year =         "1990",
+}
+
+@InCollection{Widrow60,
+  author =       "B. Widrow and M. E. Hoff",
+  booktitle =    "1960 IRE WESCON Convention Record",
+  title =        "Adaptive Switching Circuits",
+  volume =       "4",
+  publisher =    "IRE",
+  address =      "New York",
+  pages =        "96--104",
+  year =         "1960",
+}
+
+@InProceedings{Widrow62,
+  author =       "B. Widrow",
+  editor =       "M. C. Yovits and G. T. Jacobi and G. D. Goldstein",
+  booktitle =    "Self-Organizing Systems 1962",
+  title =        "Generalization and Information Storage in Networks of
+                 Adaline ``Neurons''",
+  publisher =    "Spartan, Washington",
+  address =      "Chicago 1962",
+  pages =        "435--461",
+  year =         "1962",
+}
+
+@Article{Widrow73,
+  author =       "B. Widrow and N. K. Gupta and S. Maitra",
+  title =        "Punish/Reward: Learning with a Critic in Adaptive
+                 Threshold Systems",
+  journal =      ieeesmc,
+  volume =       "3",
+  pages =        "455--465",
+  year =         "1973",
+}
+
+@Book{Wiener48,
+  author =       "N. Wiener",
+  title =        "Cybernetics, or Control and Communication in the
+                 Animal and the Machine",
+  publisher =    "Wiley",
+  address =      "New York",
+  year =         "1948",
+}
+
+@Book{Wiener49,
+  author =       "N. Wiener",
+  title =        "The Extrapolation, Interpolation and Smoothing of
+                 Stationary Time Series with Engineering Applications",
+  publisher =    "Wiley",
+  address =      "New York",
+  year =         "1949",
+}
+
+@Article{Wilbur+Lipman83,
+  author =       "W. J. Wilbur and D. J. Lipman",
+  title =        "Rapid similarity searches of nucleic acids and protein
+                 data banks",
+  journal =      "Proc. Natl. Acad. Sci. USA",
+  volume =       "80",
+  pages =        "726--730",
+  year =         "1983",
+}
+
+@TechReport{Wilks1996,
+  author =       "Yorick Wilks and Mark Stevenson",
+  title =        "The grammar of sense: Is word sense tagging much more
+                 than part-of-speech tagging?",
+  institution =  "University of Sheffield",
+  year =         "1996",
+}
+
+@Article{Williams+Barclay88,
+  author =       "A. F. Williams and A. N. Barclay",
+  title =        "The immunoglobulin superfamily domains for cell
+                 surface recognition",
+  journal =      "Annual Review of Immunology",
+  volume =       "6",
+  pages =        "381--405",
+  year =         "1988",
+}
+
+@InProceedings{Williams+Rasmussen-nips8,
+  author =       "C. K. I. Williams and C. E. Rasmussen",
+  editor =       NIPS8ed,
+  booktitle =    NIPS8,
+  title =        "Gaussian Processes for Regression",
+  publisher =    "MIT Press, Cambridge, MA",
+  pages =        "514--520",
+  year =         "1996",
+}
+
+@InProceedings{Williams+Seeger-2000,
+  author =       "C. K. I. Williams and M. Seeger",
+  booktitle =    "Proceedings of the Seventeenth International
+                 Conference on Machine Learning",
+  title =        "The Effect of the Input Density Distribution on
+                 Kernel-based Classifiers",
+  publisher =    "Morgan Kaufmann",
+  year =         "2000",
+}
+
+@InProceedings{Williams+Seeger-2001,
+  author =       "Christopher K. I. Williams and Matthias Seeger",
+  editor =       NIPS13ed,
+  booktitle =    NIPS13,
+  title =        "Using the {Nystr{\"o}m} Method to Speed Up Kernel
+                 Machines",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  pages =        "682--688",
+  year =         "2001",
+}
+
+@InProceedings{Williams2001,
+  author =       "C. K. I. Williams",
+  editor =       NIPS13ed,
+  booktitle =    NIPS13,
+  title =        "On a Connection between Kernel {PCA} and Metric
+                 Multidimensional Scaling",
+  publisher =    "{MIT} Press",
+  pages =        "675--681",
+  year =         "2001",
+}
+
+@InProceedings{Williams87,
+  author =       "R. J. Williams",
+  editor =       "M. Caudill and C. Butler",
+  booktitle =    icnn,
+  title =        "A Class of Gradient-Estimating Algorithms for
+                 Reinforcement Learning in Neural Networks",
+  volume =       "2",
+  publisher =    "IEEE, New York",
+  address =      "San Diego 1987",
+  pages =        "601--608",
+  year =         "1987",
+}
+
+@InProceedings{Williams88a,
+  author =       "R. J. Williams",
+  booktitle =    icnn,
+  title =        "On the Use of Back-Propagation in Associative
+                 Reinforcement Learning",
+  volume =       "1",
+  publisher =    "IEEE, New York",
+  address =      "San Diego 1988",
+  pages =        "263--270",
+  year =         "1988",
+}
+
+@TechReport{Williams88b,
+  author =       "R. J. Williams",
+  title =        "Towards a Theory of Reinforcement-Learning
+                 Connectionist Systems",
+  number =       "NU--CCS--88--3",
+  institution =  "College of Computer Science, Northeastern University",
+  address =      "Boston, MA",
+  year =         "1988",
+}
+
+@InProceedings{Williams89a,
+  author =       "R. J. Williams and J. Peng",
+  booktitle =    ijcnn,
+  title =        "Reinforcement Learning Algorithms As Function
+                 Optimizers",
+  volume =       "2",
+  publisher =    "IEEE, New York",
+  address =      "Washington 1989",
+  pages =        "89--95",
+  year =         "1989",
+}
+
+@Article{Williams89b,
+  author =       "R. J. Williams and D. Zipser",
+  title =        "A Learning Algorithm for Continually Running Fully
+                 Recurrent Neural Networks",
+  journal =      nc,
+  volume =       "1",
+  pages =        "270--280",
+  year =         "1989",
+}
+
+@Article{Williams89c,
+  author =       "R. J. Williams and D. Zipser",
+  title =        "Experimental Analysis of the Real-Time Recurrent
+                 Learning Algorithm",
+  journal =      connsci,
+  volume =       "1",
+  pages =        "87--111",
+  year =         "1989",
+}
+
+@InProceedings{Williams93,
+  author =       "William Evans and Sridhar Rajagopalan and Umesh
+                 Vazirani",
+  booktitle =    "Proceedings of the 6th Annual Conference on
+                 Computational Learning Theory",
+  title =        "Choosing a Reliable Hypothesis",
+  publisher =    "ACM Press",
+  address =      "Santa Cruz, CA, USA",
+  pages =        "269--276",
+  month =        jul,
+  year =         "1993",
+  ISBN =         "0-89791-611-5",
+}
+
+@InProceedings{williams95gaussian,
+  author =       "Christopher K. I. Williams and Carl Edward Rasmussen",
+  editor =       NIPS8ed,
+  booktitle =    NIPS8,
+  title =        "{Gaussian} Processes for Regression",
+  volume =       "8",
+  publisher =    "{MIT} Press",
+  year =         "1995",
+  ISBN =         "0-262-20107-0",
+}
+
+@InProceedings{Williams96-nips,
+  author =       "C. K. I. Williams",
+  editor =       NIPS9ed,
+  booktitle =    NIPS9,
+  title =        "Computing with infinite networks",
+  publisher =    "MIT Press",
+  year =         "1997",
+}
+
+@InProceedings{WilliamsC1990,
+  author = 	 {Christopher K. I. Williams and Geoffrey E. Hinton},
+  title = 	 {Mean field networks that learn to discriminate temporally distorted strings},
+  booktitle = {Connectionist Models: Proceedings of the 1990 Connectionist Summer School},
+  year = 	 {1990},
+  address = 	 {San Mateo, CA},
+}
+
+@Article{Willshaw69,
+  author =       "D. J. Willshaw and O. P. Buneman and H. C.
+                 Longuet-Higgins",
+  title =        "Non-Holographic Associative Memory",
+  journal =      nature,
+  volume =       "222",
+  year =         "1969",
+}
+
+@Article{Willshaw76,
+  author =       "D. J. Willshaw and C. von der Malsburg",
+  title =        "How Patterned Neural Connections Can Be Set Up by
+                 Self-Organization",
+  journal =      PRSLB,
+  volume =       "194",
+  pages =        "431--445",
+  year =         "1976",
+}
+
+@Article{Wilson-2003,
+  author =       "D. Randall Wilson and Tony R. Martinez",
+  title =        "The general inefficiency of batch training for
+                 gradient descent learning",
+  journal =      "Neural Networks",
+  volume =       "16",
+  number =       "10",
+  publisher =    "Elsevier Science Ltd.",
+  address =      "Oxford, UK",
+  pages =        "1429--1451",
+  year =         "2003",
+  ISSN =         "0893-6080",
+}
+
+@InProceedings{Wilson2007,
+  author =       "D. Keith Wilson",
+  booktitle =    "Proceedings of NOISE-CON 2007",
+  title =        "Weather effects and outdoor noise exposure: Where,
+                 when, and how often to measure?",
+  address =      "Reno, Nevada",
+  year =         "2007",
+}
+
+@Article{Wilson73,
+  author =       "H. R. Wilson and J. D. Cowan",
+  title =        "A Mathematical Theory of the Functional Dynamics of
+                 Cortical and Thalamic Nervous Tissue",
+  journal =      kyb,
+  volume =       "13",
+  pages =        "55--80",
+  year =         "1973",
+}
+
+@Article{Wilson88,
+  author =       "G. V. Wilson and G. S. Pawley",
+  title =        "On the Stability of the Travelling Salesman Problem
+                 Algorithm of Hopfield and Tank",
+  journal =      biocyb,
+  volume =       "58",
+  pages =        "63--70",
+  year =         "1988",
+}
+
+@InProceedings{wilson97instance,
+  author =       "D. Randall Wilson and Tony R. Martinez",
+  booktitle =    "Proc. 14th International Conference on Machine
+                 Learning",
+  title =        "Instance pruning techniques",
+  publisher =    "Morgan Kaufmann",
+  pages =        "403--411",
+  year =         "1997",
+  URL =          "citeseer.nj.nec.com/wilson97instance.html",
+}
+
+@Book{Winograd63,
+  author =       "S. Winograd and J. D. Cowan",
+  title =        "Reliable Computation in the Presence of Noise",
+  publisher =    "MIT Press",
+  address =      "Cambridge",
+  year =         "1963",
+}
+
+@Article{Winters89,
+  author =       "J. H. Winters and C. Rose",
+  title =        "Minimum Distance Automata in Parallel Networks for
+                 Optimum Classification",
+  journal =      nn,
+  volume =       "2",
+  pages =        "127--132",
+  year =         "1989",
+}
+
+@Article{WisSej2002,
+  author =       "L. Wiskott and T. J. Sejnowski",
+  title =        "Slow Feature Analysis: Unsupervised Learning of
+                 Invariances",
+  journal =      "Neural Computation",
+  volume =       "14",
+  number =       "4",
+  pages =        "715--770",
+  year =         "2002",
+  uralbstract =  "{http://itb.biologie.hu-berlin.de/~wiskott/Abstracts/WisSej2002.html}",
+  urlpaper =     "{http://itb.biologie.hu-berlin.de/~wiskott/Publications/WisSej2002-LearningInvariances-NC.ps.gz}",
+}
+
+@TechReport{Witbrock+Zagha-1989,
+  author =       "Michael Witbrock and Marco Zagha",
+  title =        "An Implementation of Back-Propagation Learning on
+                 {GF11}, a Large {SIMD} Parallel Computer",
+  number =       "CMU-CS-89-208",
+  institution =  "Carnegie Mellon University",
+  year =         "1989",
+}
+
+@Book{Wittgenstein58,
+  author =       "L. Wittgenstein",
+  title =        "Philosophical Investigations",
+  publisher =    "Blackwell",
+  address =      "Oxford",
+  year =         "1958",
+}
+
+@InProceedings{Wittner88,
+  author =       "B. S. Wittner and J. S. Denker",
+  editor =       nips87ed,
+  booktitle =    nips87,
+  title =        "Strategies for Teaching Layered Networks
+                 Classification Tasks",
+  publisher =    "American Institute of Physics, New York",
+  address =      "Denver, CO",
+  pages =        "850--859",
+  year =         "1988",
+}
+
+@Book{WL90,
+  author =       "A. Waibel and K. F. Lee",
+  title =        "Readings in Speech Recognition",
+  publisher =    "Morgan Kaufmann",
+  year =         "1990",
+}
+
+@Article{Wolpert-1996,
+  author =       "D. H. Wolpert",
+  title =        "The lack of a priori distinction between learning
+                 algorithms",
+  journal =      "Neural Computation",
+  volume =       "8",
+  number =       "7",
+  pages =        "1341--1390",
+  year =         "1996",
+}
+
+@Article{Wolpert92,
+  author =       "D. H. Wolpert",
+  title =        "Stacked Generalization",
+  journal =      "Neural Networks",
+  volume =       "5",
+  pages =        "241--249",
+  year =         "1992",
+}
+
+@TechReport{wolpert95,
+  author =       "D. Wolpert and W. Macready",
+  title =        "No free lunch theorems for search",
+  number =       "SFI-TR-95-02-010",
+  institution =  "The Santa Fe Institute",
+  year =         "1995",
+}
+
+@article{wolpert96no,
+  author =       "D. Wolpert and W. MacReady",
+  title =        "No free lunch theorems for optimization",
+  year =         "1997",
+  journal =      "IEEE Transactions on Evolutionary Computation",
+  volume =       1,
+  pages =       {67--82},
+}
+
+@Book{wordnet-book98,
+  author =       "Christiane Fellbaum",
+  title =        "{WordNet}: An Electronic Lexical Database",
+  publisher =    "MIT Press",
+  year =         "1998",
+}
+
+@TechReport{wrong-delve-citation,
+  author =       "G. Hinton and R. Neal and R. Tibshirani",
+  title =        "Assessing learning procedures using {DELVE}",
+  institution =  "University of Toronto, Department of Computer Science,
+                 http://www.cs.utoronto.ca/neuron/delve/delve.html.",
+  year =         "1995",
+}
+
+@Article{Wu-97,
+  author =       "Zhijun Wu",
+  title =        "Global continuation for distance geometry problems",
+  journal =      "{SIAM} Journal of Optimization",
+  volume =       "7",
+  pages =        "814--836",
+  year =         "1997",
+}
+
+@Article{Wu-97-short,
+  author =       "Z. Wu",
+  title =        "Global continuation for distance geometry problems",
+  journal =      "{SIAM} J. Optimization",
+  volume =       "7",
+  pages =        "814--836",
+  year =         "1997",
+}
+
+@Article{Wu97,
+  author =       "C. H. Wu",
+  title =        "Artificial neural networks for molecular sequence
+                 analysis",
+  journal =      "Comp. Chem.",
+  volume =       "21",
+  pages =        "237--256",
+  year =         "1997",
+}
+
+@InProceedings{XingE2005,
+  author =       "Eric P. Xing and Rong Yan and Alexander G. Hauptmann",
+  booktitle =    UAI05,
+  title =        "Mining Associated Text and Images with Dual-Wing
+                 Harmoniums.",
+  publisher =    "AUAI Press",
+  pages =        "633--641",
+  year =         "2005",
+  ISBN =         "0-9749039-1-4",
+  date =         "2007-07-26",
+  OPTcrossref =  "conf/uai/2005",
+  OPTdescription = "dblp",
+  OPTee =        "http://uai.sis.pitt.edu/displayArticleDetails.jsp?mmnu=1&smnu=2&article-id=1184&proceeding-id=21",
+  OPTkeywords =  "dblp",
+}
+  %url =       "http://dblp.uni-trier.de/db/conf/uai/uai2005.html#XingYH05",
+
+@InProceedings{Xu+Rudnicky-2000,
+  author =       "Wei Xu and Alex Rudnicky",
+  booktitle =    "International Conference on Statistical Language
+                 Processing",
+  title =        "Can Artificial Neural Networks Learn Language Models",
+  address =      "Beijing, China",
+  pages =        "M1--13",
+  year =         "2000",
+}
+
+@InProceedings{Xu-Emami-Jelinek-2003,
+  author =       "P. Xu and A. Emami and F. Jelinek",
+  booktitle =    "Proceedings of the 2003 Conference on Empirical
+                 Methods in Natural Language Processing (EMNLP'2003)",
+  title =        "Training Connectionist Models for the Structured
+                 Language Model",
+  volume =       "10",
+  pages =        "160--167",
+  year =         "2003",
+}
+
+@Misc{xu-jordan-94,
+  author =       "L. Xu and M. I. Jordan",
+  title =        "Theoretical and experimental studies of convergence
+                 properties of the {EM} algorithm for unsupervised
+                 learning based on finite mixtures",
+  address =      "Snowbird, UTAH",
+  pages =        "",
+  year =         "1994",
+  note =         "Presented at the Neural Networks for Computing
+                 Conference",
+}
+
+@inproceedings{xuetal04,
+author = "Xu, L. and Neufeld, J. and Larson, B. and Schuurmans, D.",
+title = "Maximum margin clustering",
+editor =       NIPS17ed,
+booktitle =    NIPS17,
+year = 2004,
+}
+
+@inproceedings{Xu-ICML-2006,
+author = "Xu, L. and Wilkinson, D. and Southey, F. and Schuurmans, D.",
+title = "Discriminative unsupervised learning of structured predictors",
+booktitle =    ICML06,
+editor =       ICML06ed,
+publisher =    ICML06publ,
+year = 2006,
+}
+
+@InProceedings{Xu-AAAI-2006,
+  author =       "L. Xu and K. Crammer and D. Schuurmans",
+  booktitle =    "Twenty-first National Conference on Artificial
+                 Intelligence (AAAI-06)",
+  title =        "Robust support vector machine training via convex
+                 outlier ablation",
+  year =         "2006",
+}
+
+
+
+@Misc{YA97a,
+  author =       "Howard Hua Yang and {Shun-ichi} Amari",
+  title =        "Natural Gradient Descent for Training Multi-Layer
+                 Perceptrons",
+  year =         "1997",
+  URL =          "citeseer.ist.psu.edu/hua96natural.html",
+}
+
+@Article{yang98complexity,
+  author =       "Howard Hua Yang and {Shun-ichi} Amari",
+  title =        "Complexity Issues in Natural Gradient Descent Method
+                 for Training Multi-Layer Perceptrons",
+  journal =      "Neural Computation",
+  volume =       "10",
+  number =       "8",
+  pages =        "2137--2157",
+  year =         "1998",
+  URL =          "citeseer.ist.psu.edu/91462.html",
+}
+
+@inproceedings{Yang+al-2006,
+    author = {Xin Yang and Haoying Fu and Hongyuan Zha and Jesse Barlow},
+    title = {Semi-supervised nonlinear dimensionality reduction},
+    booktitle = {Proceedings of the 23rd International Conference on Machine Learning},
+    year = {2006},
+    isbn = {1-59593-383-2},
+    pages = {1065--1072},
+    location = {Pittsburgh, Pennsylvania},
+    doi = {http://doi.acm.org/10.1145/1143844.1143978},
+    publisher = {ACM},
+    address = {New York, NY, USA},
+}
+
+@misc{Yang+Jin-2006,
+    author = {Liu Yang and Rong Jin},
+    title = {Distance Metric Learning: A Comprehensive Survey},
+    year = 2006,
+    note = {url{http://www.cse.msu.edu/~yangliu1/frame\_survey\_v2.pdf}},
+}
+
+@misc{Yang-2007,
+    author = {Liu Yang},
+    title = {An Overview of Distance Metric Learning},
+    year = 2007,
+    note = {url{http://www.cse.msu.edu/~yangliu1/dist\_overview.pdf}},
+}
+
+@InProceedings{YangL2007,
+  author =       "Liu Yang and Rong Jin and Caroline Pantofaru and Rahul
+                 Sukthankar",
+  booktitle =    cvpr07,
+  title =        "Discriminative Cluster Refinement: Improving Object
+                 Category Recognition Given Limited Training Data",
+  month =        jun,
+  year =         "2007",
+}
+
+@InProceedings{Yao85,
+  author =       "Andrew Yao",
+  booktitle =    "Proceedings of the 26th Annual {IEEE} Symposium on
+                 Foundations of Computer Science",
+  title =        "Separating the polynomial-time hierarchy by oracles",
+  pages =        "1--10",
+  year =         "1985",
+}
+
+@InProceedings{Yarowsky-92,
+  author =       "David Yarowsky",
+  booktitle =    "Proceedings of the 14th International Conference on
+                 Computational Linguistics (COLING-92)",
+  title =        "Word-sense disambiguation using statistical models of
+                 {Roget}'s categories trained on large corpora",
+  address =      "Nantes, France",
+  pages =        "454--460",
+  year =         "1992",
+}
+
+@InProceedings{Yarowsky-93,
+  author =       "David Yarowsky",
+  booktitle =    "{ARPA} Workshop on Human Language Technology",
+  title =        "One sense per collocation",
+  address =      "Princeton, {NJ}",
+  year =         "1993",
+}
+
+@InProceedings{Yarowsky-95,
+  author =       "David Yarowsky",
+  booktitle =    "33rd Annual Meeting of the {ACL}",
+  title =        "Unsupervised word sense disambiguation rivaling
+                 supervised methods",
+  address =      "Cambridge, {MA}",
+  pages =        "189--196",
+  year =         "1995",
+}
+
+@InProceedings{Yarowsky1994,
+  author =       "David Yarowsky",
+  booktitle =    "Meeting of the Association for Computational
+                 Linguistics",
+  title =        "Decision Lists for Lexical Ambiguity Resolution:
+                 Application to Accent Restoration in Spanish and
+                 French",
+  pages =        "88--95",
+  year =         "1994",
+  URL =          "citeseer.nj.nec.com/yarowsky94decision.html",
+}
+
+@InProceedings{Yarowsky1995,
+  author =       "David Yarowsky",
+  booktitle =    "Meeting of the Association for Computational
+                 Linguistics",
+  title =        "Unsupervised Word Sense Disambiguation Rivaling
+                 Supervised Methods",
+  pages =        "189--196",
+  year =         "1995",
+  URL =          "citeseer.nj.nec.com/yarowsky95unsupervised.html",
+}
+
+@TechReport{Yianilos95,
+  author =       "Peter N. Yianilos",
+  title =        "Metric Learning via Normal Mixtures",
+  institution =  "NEC Research Institute",
+  address =      "Princeton, NJ",
+  month =        oct,
+  year =         "1995",
+}
+
+@InProceedings{Younes98onthe,
+    author = {Laurent Younes},
+    title = {On The Convergence Of Markovian Stochastic Algorithms With Rapidly Decreasing Ergodicity Rates},
+    booktitle = {Stochastics and Stochastics Models},
+    year = {1998},
+    pages = {177--228}
+}
+
+@Article{Young+Sachs79,
+  author =       "E. D. Young and M. B. Sachs",
+  title =        "Representation of steady-state vowels in the temporal
+                 aspects of the discharge pattern of population of
+                 auditory nerve fibers",
+  journal =      jasa,
+  volume =       "66",
+  number =       "5",
+  pages =        "1381--1403",
+  year =         "1979",
+}
+
+@InProceedings{Yu+Simmons90,
+  author =       "Y. H. Yu and R. F. Simmons",
+  booktitle =    ijcnn,
+  title =        "Extra output biased learning",
+  publisher =    "Lawrence Erlbaum, Hillsdale",
+  address =      "Washington 1990",
+  year =         "1990",
+}
+
+@Article{Yu-trnn92,
+  author =       "X. H. Yu",
+  title =        "Can Backpropagation Error Surface Not Have Local
+                 Minima?",
+  journal =      ieeetrnn,
+  volume =       "3",
+  number =       "6",
+  pages =        "1019--1020",
+  year =         "1992",
+}
+
+@Article{Yu92,
+  author =       "X. H. Yu",
+  title =        "Can Backpropagation Error Surface Not Have Local
+                 Minima?",
+  journal =      ieeetrnn,
+  volume =       "3",
+  number =       "6",
+  pages =        "1019--1020",
+  year =         "1992",
+}
+
+@InProceedings{Yuille2005,
+  author =       "Alan L. Yuille",
+  editor =       NIPS17ed,
+  booktitle =    NIPS17,
+  title =        "The Convergence of Contrastive Divergences",
+  publisher =    "{MIT} Press",
+  pages =        "1593--1600",
+  year =         "2005",
+}
+
+@Article{Yuille89,
+  author =       "Alan L. Yuille and D. M. Kammen and D. S. Cohen",
+  title =        "Quadrature and the Development of Orientation
+                 Selective Cortical Cells by Hebb Rules",
+  journal =      biocyb,
+  volume =       "61",
+  pages =        "183--194",
+  year =         "1989",
+}
+
+@Article{Yuille90,
+  author =       "Alan L. Yuille",
+  title =        "Generalized Deformable Models, Statistical Physics,
+                 and Matching Problems",
+  journal =      "Neural Computation",
+  volume =       "2",
+  number =       "1",
+  pages =        "1--24",
+  year =         "1990",
+}
+
+@Article{Zak-nn92,
+  author =       "M. Zak",
+  title =        "Terminal Attractors in Neural Networks",
+  journal =      nn,
+  volume =       "2",
+  pages =        "259--274",
+  year =         "1989",
+}
+
+@Article{Zak88,
+  author =       "M. Zak",
+  title =        "Terminal Attractors for Addressable Memory in Neural
+                 Networks",
+  journal =      plettA,
+  volume =       "133",
+  pages =        "18--22",
+  year =         "1988",
+}
+
+@Article{Zak89,
+  author =       "M. Zak",
+  title =        "Terminal Attractors in Neural Networks",
+  journal =      nn,
+  volume =       "2",
+  pages =        "259--274",
+  year =         "1989",
+}
+
+@Article{Zavaliagkos93,
+  author =       "G. Zavaliagkos and S. Austin and J. Makhoul and R.
+                 Schwartz",
+  title =        "A Hybrid Continuous Speech Recognition System Using
+                 Segmental Neural Nets with Hidden {Markov} Models",
+  journal =      "Int. Journal of Pattern Recognition and Artificial
+                 Intelligence",
+  pages =        "305--319",
+  year =         "1993",
+  note =         "Special Issue on Applications of Neural Networks to
+                 Pattern Recognition (I. Guyon Ed.)",
+}
+
+@InProceedings{Zell+al-1993,
+  author =       "Andreas Zell and Niels Mache and Michael Vogt and
+                 Markus H{\"u}ttel",
+  booktitle =    "Proceedings of the IEEE International Conference on
+                 Neural Networks",
+  title =        "Problems of Massive Parallelism in Neural Network
+                 Simulation",
+  volume =       "3",
+  address =      "San Francisco, CA",
+  pages =        "1890--1895",
+  year =         "1993",
+}
+
+@InProceedings{Zemel90,
+  author =       "R. S. Zemel and M. C. Mozer and G. E. Hinton",
+  editor =       NIPS2ed,
+  booktitle =    NIPS2,
+  title =        "Recognizing objects using hierarchical reference frame
+                 transformations",
+  address =      "San Mateo, CA",
+  year =         "1990",
+}
+
+@PhdThesis{Zemel93-thesis,
+  author =       "Richard S. Zemel",
+  title =        "A Minimum Description Length Framework for
+                 Unsupervised Learning",
+  school =       "University of Toronto",
+  year =         "1993",
+}
+
+@InProceedings{Zha2002,
+  author =       "H. Zha and C. Ding and M. Gu and X. He and H. Simon",
+  editor =       NIPS14ed,
+  booktitle =    NIPS14,
+  title =        "Spectral relaxation for {K}-means clustering",
+  publisher =    "{MIT} Press",
+  year =         "2002",
+}
+
+@InProceedings{Zhang-nips90,
+  author =       "X. Zhang and Others",
+  editor =       NIPS2ed,
+  booktitle =    NIPS2,
+  title =        "An Efficient Implementation of the Backpropagation
+                 Algorithm on the Connection Machine {CM}-2",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo, CA",
+  pages =        "801--809",
+  year =         "1990",
+}
+
+@Misc{zhang-workshop-2005,
+  author =       "Jian Zhang",
+  title =        "Sparsity Models for Multi-task Learning",
+  howpublished = "'Inductive Transfer: 10 Years Later' NIPS Workshop",
+  year =         "2005",
+  OPTkey =       "",
+}
+
+@TechReport{Zhang2001,
+  author =       "Bin Zhang",
+  title =        "Is the Maximal Margin Hyperplane Special in a Feature
+                 Space?",
+  number =       "HPL-2001-89",
+  institution =  "Hewlett-Packards Labs",
+  year =         "2001",
+}
+
+@article{Zhang+Zha-2005,
+    address = {Philadelphia, PA},
+    author = {Zhang, Zhenyue   and Zha, Hongyuan  },
+    doi = {10.1137/S1064827502419154},
+    issn = {1064-8275},
+    journal = {SIAM Journal on Scientific Computing},
+    number = {1},
+    pages = {313--338},
+    publisher = {Society for Industrial and Applied Mathematics},
+    title = {Principal Manifolds and Nonlinear Dimensionality Reduction via Tangent Space Alignment},
+    url = {http://portal.acm.org/citation.cfm?id=1024004.1039898},
+    volume = {26},
+    year = {2005}
+}
+
+@InProceedings{Zhang+al-2007,
+    author = {D. Zhang and Z. H. Zhou and S. Chen},
+    title = {Semi-supervised dimensionality reduction},
+    booktitle = {Proceedings of the 7th SIAM International Conference on Data Mining},
+    address = {Minneapolis, MN},
+    year = 2007,
+}
+
+@article{Zhao+al-2006,
+    author = {Haitao Zhao and Shaoyuan Sun and Zhongliang Jing and Jingyu Yang},
+    title = {Local structure based supervised feature extraction},
+    journal = {Pattern Recognition},
+    volume = {39},
+    number = {8},
+    year = {2006},
+    issn = {0031-3203},
+    pages = {1546--1550},
+    doi = {http://dx.doi.org/10.1016/j.patcog.2006.02.023},
+    publisher = {Elsevier Science Inc.},
+    address = {New York, NY, USA},
+}
+
+@InProceedings{Zhou+al-2004,
+  author =       "D. Zhou and O. Bousquet and T. {Navin Lal} and J.
+                 Weston and B. Sch{\"o}lkopf",
+  editor =       NIPS16ed,
+  booktitle =    NIPS16,
+  title =        "Learning with local and global consistency",
+  publisher =    "MIT Press",
+  address =      "Cambridge, MA",
+  pages =        "321--328",
+  year =         "2004",
+  keywords =     "semi-supervised learning, manifold, kernel methods",
+}
+
+@InProceedings{Zhou+Dapkus-1995,
+  author =       "J. Zhou and P. Dapkus",
+  booktitle =    "Proceedings of the Third Workshop on Very Large
+                 Corpora",
+  title =        "Automatic Suggestion of Significant Terms for a
+                 Predefined Topic",
+  address =      "Cambridge",
+  pages =        "131--147",
+  year =         "1995",
+}
+
+@InProceedings{Zhou+Tanner-1997,
+  author =       "Joe Zhou and Troy Tanner",
+  booktitle =    "Proceedings of the fifth conference on Applied natural
+                 language processing",
+  title =        "Construction and visualization of key term
+                 hierarchies",
+  publisher =    "Morgan Kaufmann Publishers Inc.",
+  address =      "San Francisco, CA, USA",
+  pages =        "307--311",
+  year =         "1997",
+  location =     "Washington, DC",
+}
+
+@InProceedings{zhou2002,
+  author =       "Z.-H. Zhou and M.-L. Zhang",
+  booktitle =    "Proceedings of the International Conference on
+                 Intelligent Information Technology, 2002, pp.455-459",
+  title =        "Neural Networks for Multi-Instance Learning",
+  address =      "Beijing, China",
+  year =         "2002",
+  page =         "455-459",
+}
+
+@InProceedings{ZhouX2007,
+  author =       "Xiaojin Zhu and Timothy J. Rogers and Ruichen Qian and
+                 Chuck Kalish",
+  booktitle =    "AAAI",
+  title =        "Humans Perform Semi-Supervised Classification Too.",
+  publisher =    "AAAI Press",
+  pages =        "864",
+  year =         "2007",
+  ISBN =         "978-1-57735-323-2",
+  URL =          "http://dblp.uni-trier.de/db/conf/aaai/aaai2007.html#ZhuRQK07",
+  date =         "2007-09-05",
+  description =  "dblp",
+  keywords =     "dblp",
+}
+
+@article{Zhu2009,
+ author = {Long Zhu and Yuanhao Chen and Alan Yuille},
+ title = {Unsupervised Learning of Probabilistic Grammar-Markov Models for Object Categories},
+ journal = {{IEEE} Transactions on Pattern Analysis and Machine Intelligence},
+ volume = 31,
+ number = 1,
+ pages = {114--128},
+ year = 2009,
+}
+
+@InProceedings{Zhu+al-2003,
+  author =       "Xiaojin Zhu and Zoubin Ghahramani and John Lafferty",
+  booktitle =    ICML03,
+  editor =       ICML03ed,
+  publisher =    ICML03publ,
+  title =        "Semi-supervised learning using {Gaussian} fields and
+                 harmonic functions",
+  pages =        "912--919",
+  year =         "2003",
+}
+
+@TechReport{Zhu+al-TR2003,
+  author =       "Xiaojin Zhu and John Lafferty and Zoubin Ghahramani",
+  title =        "Semi-Supervised Learning: From {G}aussian Fields to
+                 {G}aussian Processes",
+  number =       "CMU-CS-03-175",
+  institution =  "CMU",
+  year =         "2003",
+}
+
+@Article{Zhu-2006,
+  author =       "M. Zhu and W. Su and H. A. Chipman",
+  title =        "{LAGO}: {A} computationally efficient approach for
+                 statistical detection",
+  journal =      "Technometrics",
+  volume =       "48",
+  number =       "2",
+  pages =        "193--205",
+  year =         "2006",
+}
+
+@InProceedings{Zhu-ijcai-2005,
+  author =       "Tingshao Zhu and Russ Greiner and Gerald Haeubl and
+                 Kevin Jewell and Bob Price",
+  booktitle =    "Nineteenth International Joint Conference on
+                 Artificial Intelligence (IJCAI-05)",
+  title =        "Using Learned Browsing Behavior Models to Recommend
+                 Relevant Web Pages",
+  address =      "Edinburgh, U.K.",
+  pages =        "1589--1591",
+  year =         "2005",
+}
+
+@TechReport{Zhu-Lafferty-Ghahramani-2003,
+  author =       "Xiaojin Zhu and John Lafferty and Zoubin Ghahramani",
+  title =        "Semi-supervised learning: from {G}aussian fields to
+                 {G}aussian processes",
+  number =       "CMU-CS-03-175",
+  institution =  "School of Computer Science, Carnegie Mellon
+                 University",
+  year =         "2003",
+}
+
+@Article{zhu-rohwer96,
+  author =       "H. Zhu and R. Rohwer",
+  title =        "No free lunch for cross validation",
+  journal =      "Neural Computation",
+  volume =       "8",
+  number =       "7",
+  pages =        "1421--1426",
+  year =         "1996",
+}
+
+@TechReport{zhu05survey,
+  author =       "Xiaojin Zhu",
+  title =        "Semi-Supervised Learning Literature Survey",
+  number =       "1530",
+  institution =  "Computer Science, University of Wisconsin-Madison",
+  year =         "2005",
+  note =         "http://www.cs.wisc.edu/$\sim$jerryzhu/pub/ssl\-survey.pdf",
+}
+
+@TechReport{ZhuX2002,
+  author =       "Xiaojin Zhu and Zoubin Ghahramani",
+  title =        "Towards semisupervised classification with Markov
+                 random fields",
+  institution =  "Carnegie Mellon University",
+  year =         "2002",
+}
+
+@inproceedings{Zinkevich-2003,
+  author = {Martin Zinkevich},
+  title ={Online convex programming and generalized infinitesimal gradient ascent},
+  booktitle =    ICML03,
+  editor =       ICML03ed,
+  publisher =    ICML03publ,
+  pages =        "928--936",
+  year =         "2003",
+}
+
+@InProceedings{Zoubin-nips8,
+  author =       "Z. Ghahramani and M. I. Jordan",
+  editor =       NIPS8ed,
+  booktitle =    NIPS8,
+  title =        "Factorial Hidden Markov Models",
+  publisher =    "MIT Press, Cambridge, MA",
+  year =         "1996",
+}
+
+@InProceedings{Zoubin-nips94,
+  author =       "Z. Ghahramani and M. I. Jordan",
+  editor =       NIPS6ed,
+  booktitle =    NIPS6,
+  title =        "Supervised learning from incomplete data via an {EM}
+                 approach",
+  publisher =    "Morgan Kaufmann",
+  address =      "San Mateo, CA",
+  year =         "1994",
+}
+
+@TechReport{Zoubin-tr93,
+  author =       "Z. Ghahramani and M. I. Jordan",
+  title =        "Function approximation via density estimation using
+                 the {E}{M} approach",
+  type =         "Computational Cognitive Science",
+  number =       "TR 9304",
+  institution =  "MIT",
+  year =         "1993",
+}
+
+@TechReport{Zoubin96,
+  author =       "Z. Ghahramani and G. E. Hinton",
+  title =        "Parameter estimation for linear dynamical systems",
+  number =       "Technical Report CRG-TR-91-1",
+  institution =  "University of Toronto",
+  year =         "1996",
+}
+
+@TechReport{Zoubin96b,
+  author =       "Z. Ghahramani and G. E. Hinton",
+  title =        "Switching state-space models",
+  number =       "Technical Report CRG-TR-91-3",
+  institution =  "University of Toronto",
+  year =         "1996",
+}
+
+@Article{Zue90a,
+  author =       "V. Zue and S. Seneff and J. Glass",
+  title =        "Speech database development: {TIMIT} and beyond",
+  journal =      spcomm,
+  volume =       "9",
+  number =       "4",
+  pages =        "351--356",
+  month =        aug,
+  year =         "1990",
+}
+
+@InProceedings{Zue90b,
+  author =       "V. Zue and J. Glass and D. Goddeau and D. Goodine and
+                 H. Leung and M. McCandless and M. Phillips and J.
+                 Polifroni and S. Seneff and D. Whitney",
+  booktitle =    "Proc. Int. Conf. Spoken Languague Processing",
+  title =        "Recent progress on the {MIT} {VOYAGER} spoken language
+                 system",
+  address =      "Kobe, Japan",
+  pages =        "29.6.1",
+  year =         "1990",
+}
+
+@InProceedings{Zwald+al-2004,
+  author =       "Laurent Zwald and Olivier Bousquet and Gilles
+                 Blanchard",
+  editor =       "John Shawe-Taylor and Yoram Singer",
+  booktitle =    colt04,
+  title =        "Statistical Properties of Kernel Principal Component
+                 Analysis",
+  volume =       "3120",
+  publisher =    "Springer-Verlag",
+  pages =        "594--608",
+  year =         "2004",
+  series =       "Lecture Notes in Computer Science",
+}
+
+@InProceedings{Zweig+Russel-AAAI98,
+  author =       "G. Zweig and S. Russel",
+  booktitle =    "Proceedings of the AAAI Conference",
+  title =        "Speech Recognition with Dynamic {Bayesian} Networks",
+  publisher =    "AAAI Press",
+  address =      "Madison, Wisconsin",
+  year =         "1998",
+}
+
+@InProceedings{Zweig+Russel-ICSLP98,
+  author =       "G. Zweig and S. Russel",
+  booktitle =    "Proceedings of the International Conference on
+                 Statistical Language Processing",
+  title =        "Probabilistic Modeling with {Bayesian} Networks for
+                 {ASR}",
+  address =      "Sidney, Australia",
+  year =         "1998",
+}
+
+@Article{Zwicker+Terhardt80,
+  author =       "E. Zwicker and E. Terhardt",
+  title =        "Analytical expressions for critical band rate and
+                 critical bandwidths as a function of frequency",
+  journal =      jasa,
+  volume =       "68",
+  number =       "5",
+  pages =        "1523--1525",
+  year =         "1980",
+}
+
+@Proceedings{colt03,
+  editor =       "Bernhard Sch{\"o}lkopf and Manfred K. Warmuth",
+  booktitle =    colt03,
+  title =        "Computational Learning Theory and Kernel Machines,
+                 16th Annual Conference on Computational Learning Theory
+                 and 7th Kernel Workshop, {COLT}/Kernel 2003,
+                 Washington, {DC}, {USA}, August 24-27, 2003,
+                 Proceedings",
+  volume =       "2777",
+  publisher =    "Springer",
+  year =         "2003",
+  series =       "Lecture Notes in Computer Science",
+}
+
+@Proceedings{FOCS3,
+  booktitle =    "Proceedings of the Third Annual Symposium on Switching
+                 Circuit Theory and Logical Design",
+  title =        "Proceedings of the Third Annual Symposium on Switching
+                 Circuit Theory and Logical Design",
+  organization = "American Institute of Electrical Engineers",
+  address =      "Chicago, Illinois",
+  month =        "7--12" # oct,
+  year =         "1962",
+  crossrefonly = "1",
+  url =       "http://theory.lcs.mit.edu/~dmjones/FOCS/focs.bib",
+}
+
+@Book{TricksOfTheTrade,
+  editor =       "Genevieve Orr and Klaus-Robert Muller",
+  booktitle =    "Neural networks: tricks of the trade",
+  title =        "Neural networks: tricks of the trade",
+  volume =       "1524",
+  publisher =    "Springer-Verlag Inc.",
+  address =      "New York, NY, USA",
+  pages =        "vi + 432",
+  year =         "1998",
+  ISBN =         "3-540-65311-2 (paperback)",
+  ISSN =         "0302-9743",
+  LCCN =         "QA76.87.N4913 1998",
+  bibdate =      "Sat Jan 9 14:35:31 1999",
+  series =       "Lecture Notes in Computer Science",
+  acknowledgement = ack-nhfb,
+  keywords =     "Neural networks (Computer science)",
+}
+
+@Article{Besag75pseudolikelihood,
+  author =       "Julian Besag",
+  title =        "Statistical analysis of non-lattice data",
+  journal =      "The Statistician",
+  volume =       "24",
+  number =       "3",
+  pages =        "179--195",
+  year =         "1975",
+}
+
+@INPROCEEDINGS{Marlin05unsupervisedlearning,
+    author = {Benjamin Marlin and Richard S. Zemel and Sam T. Roweis},
+    title = {Unsupervised learning with non-ignorable missing data},
+    booktitle = {In Proceedings of the Tenth International Workshop on Artificial Intelligence and Statistics (AISTATS 2005)},
+    year = {2005},
+    pages = {222--229}
+}
+
+@PhdThesis{MarlinThesis08,
+  author = "Benjamin M. Marlin",
+  title =  "Missing Data Problems in Machine Learning",
+  school = "Dept. of Computer Science, University of Toronto",
+  year =   "2008"
+}
+
+@inproceedings{odonnellservedio08,
+author = "{O'Donnell}, R. and Servedio, R.",
+title = "The {Chow} parameters problem",
+booktitle = "Proceedings of the Fortieth Annual Symposium on Theory of 
+Computing (STOC)",
+year = 2008,
+pages = "517-526",
+}
+
+@article{bendaviddichterman98,
+author = "{Ben-David}, S. and Dichterman, E.",
+title = "Learning with restricted focus of attention",
+journal = "Journal of Computer and System Sciences",
+volume = 56,
+numer = 3,
+year = 1998,
+pages = "277-298",
+}
+
+@techreport{cma07,
+author = "Canadian Medical Association",
+title = "Information technology and health care in Canada: 2007 status report",
+year = 2007,
+}
+
+@article{hanetal05,
+author = "Y. Han and J. Carcillo and S. Venkataraman and R. Clark and 
+R. Watson and T. Nguyen and H. Bayir and R. Orr",
+title = "Unexpected increased mortality after implementation 
+of a commercially sold computerized physician order entry system",
+journal = "Pediatrics",
+volume = "116",
+number = 6,
+pages = "1506-1512",
+year = 2005,
+}
+
+@InProceedings{conf/uai/McCallum03,
+  title =   "Efficiently Inducing Features of Conditional Random
+         Fields",
+  author =  "Andrew McCallum",
+  booktitle =  UAI03,
+  publisher =   "Morgan Kaufmann",
+  date = "August 7-10",
+  location = "Acapulco, Mexico",
+  year =    "2003",
+  editor =  "Christopher Meek and Uffe Kj{\ae}rulff",
+  ISBN =    "0-127-05664-5",
+  pages =   "403--410",
+}
+
+
+@InProceedings{conf/uai/McCallum03-small,
+  title =   "Efficiently Inducing Features of Conditional Random
+         Fields",
+  author =  "A. McCallum",
+  booktitle =   "UAI",
+  year =    "2003",
+}
+
+
+@InProceedings{conf/icml/RanzatoS08,
+  title =   "Semi-supervised learning of compact document
+         representations with deep networks",
+  author =  "Marc'Aurelio Ranzato and Martin Szummer",
+  booktitle = ICML08,
+  editor =  ICML08ed,
+  publisher = ICML08publ,
+  year =    "2008",
+  volume =  "307",
+  ISBN =    "978-1-60558-205-4",
+  pages =   "792--799",
+  series =  "ACM International Conference Proceeding Series",
+  date =    "June 5-9, 2008",
+  location = "Helsinki, Finland",
+  URL =     "http://doi.acm.org/10.1145/1390156.1390256",
+}
+
+@InProceedings{conf/icml/RanzatoS08-small,
+  title =   "Semi-supervised learning of compact document
+         representations with deep networks",
+  author =  "M. Ranzato and M. Szummer",
+  booktitle =   "ICML",
+  year =    "2008",
+}
+
+@PhdThesis{Cosatto02sample-basedtalking-head,
+    author = {Eric Cosatto and Prof Murat Kunt},
+    title = {Sample-Based Talking-Head Synthesis},
+    institution = {Signal Processing Lab, Swiss Federal Institute of Techology},
+    year = {2002}
+}
+
+@incollection{SutskeverHintonTaylor2009,
+ title = {The Recurrent Temporal Restricted Boltzmann Machine},
+ author = {Ilya Sutskever and Geoffrey E Hinton and Graham Taylor},
+ editor = NIPS21ed,
+ booktitle = NIPS21,
+ pages = {1601--1608},
+ year = {2009}
+}
+
+@TechReport{Bergstra+2009-small,
+  author =       "J. Bergstra and G. Desjardins and P. Lamblin and Y. Bengio",
+  title =        "Quadratic Polynomials Learn Better Image Features",
+  number =       "1337",
+  institution =  "DIRO, Universit\'e de Montr\'eal",
+  year =         "2009",
+}
+
+@inproceedings{Haffner+al-1998,
+ author = {Haffner, P. and Bottou, L. and Howard, P. G. and Simard, P. and Bengio, Y. and Cun, Y. Le},
+ title = {Browsing through High Quality Document Images with {DjVu}},
+ booktitle = {Proceedings of the Advances in Digital Libraries Conference (ADL'98)},
+ year = {1998},
+ isbn = {0-8186-8464-X},
+ pages = {309},
+ publisher = {IEEE Computer Society},
+ address = {Washington, DC, USA},
+ }
+
+@inproceedings{Bottou+Howard+Bengio-1998,
+ author = {Bottou, L. and Howard, P. G. and Bengio, Y.},
+ title = {The {Z}-Coder Adaptive Binary Coder},
+ booktitle = {Proceedings of the Conference on Data Compression (DCC'98)},
+ year = {1998},
+ pages = {13},
+ publisher = {IEEE Computer Society},
+ address = {Washington, DC, USA},
+ }
+
+@inproceedings{Pigeon+Bengio-1998,
+  author    = {Steven Pigeon and
+               Yoshua Bengio},
+  title     = {A Memory-Efficient Adaptive Huffman Coding Algorithm for
+               Very Large Sets of Symbols},
+  booktitle = {Proceedings of the Conference on Data Compression (DCC'98)},
+  year      = {1998},
+  pages     = {568},
+  ee        = {http://dlib.computer.org/conferen/dcc/8406/pdf/84060568.pdf},
+  bibsource = {DBLP, http://dblp.uni-trier.de}
+}
+
+@INPROCEEDINGS{LeCun+Bottou+Bengio-1997,
+title={Reading checks with multilayer graph transformer networks},
+author={Yann LeCun and Bottou, L. and Bengio, Y.},
+booktitle={IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP'97)},
+year={1997},
+month={Apr},
+volume={1},
+pages={151--154},
+keywords={backpropagation, banking, cheque processing, document image processing, image segmentation, optical character recognitionbusiness checks, business cheques, check reading system, cheque reading system, convolutional neural network character recognizers, gradient-based learning algorithms, graph-based stochastic models, machine learning paradigm, multilayer graph transformer networks, personal checks, personal cheques},
+doi={10.1109/ICASSP.1997.599580},
+ }
+
+@INPROCEEDINGS{Rahim97discriminativefeature,
+    author = {Mazin Rahim and Yoshua Bengio and Yann {LeCun}},
+    title = {Discriminative Feature And Model Design For Automatic Speech Recognition},
+    booktitle = {In Proc. of Eurospeech},
+    year = {1997},
+    pages = {75--78}
+}
+
+@InProceedings{Bengio-nncm-1996,
+author = {Yoshua Bengio},
+title = {Training A Neural Network with a Financial Criterion Rather then a Prediction Criterion},
+booktitle = {Proceedings of the Fourth International Conference on Neural Networks in the Capital Markets (NNCM-96)},
+editor = { A.S. Weigend and Y.S Abu-Mostafa and A.-P.N. Regenes},
+publisher = {World Scientific},
+pages = {433--443},
+year = "1997",
+}
+
+@INPROCEEDINGS{Bengio+Bengio+Cloutier-1994,
+title={Use of genetic programming for the search of a new learning rule for neural networks},
+author={Bengio, S. and Bengio, Y. and Cloutier, J.},
+booktitle={Proceedings of the First IEEE Conference on Evolutionary Computation},
+year={1994},
+month={Jun},
+pages={324-327 vol.1},
+keywords={ backpropagation, genetic algorithms, learning (artificial intelligence), neural nets, optimisation, search problems backpropagation algorithm, classification tasks, genetic algorithms, genetic programming, gradient descent, learning rule, neural networks, optimization, parametric function, rule parameters, search, simulated annealing, standard optimization methods},
+doi={10.1109/ICEC.1994.349932},
+}
+
+@article{Chakraborty+al-2002,
+ author = {Chakraborty, Basabi and Chakraborty, Goutam},
+ title = {A new feature extraction technique for on-line recognition of handwritten alphanumeric characters},
+ journal = {Inf. Sci. Appl.},
+ volume = {148},
+ number = {1-4},
+ year = {2002},
+ issn = {0020-0255},
+ pages = {55--70},
+ doi = {http://dx.doi.org/10.1016/S0020-0255(02)00276-1},
+ publisher = {Elsevier Science Inc.},
+ address = {New York, NY, USA},
+ }
+
+
+@INPROCEEDINGS{LeCun+al-1993,
+title={On-Line handwriting recognition with neural networks: spatial representation versus temporal representation},
+author={{LeCun}, Y and Bengio, Y. and Henderson, D. and Weisbuch, A.},
+booktitle={Proceedings of the International Conference on Handwriting and Drawing},
+year={1993},
+location= {Ecole Nationale Superieure des Telecommunications},
+}
+
+@INPROCEEDINGS{Bengio+al-92,
+    author = {Yoshua Bengio and Samy Bengio and Jocelyn Cloutier and Jan Gecsei},
+    title = {On the Optimization of a Synaptic Learning Rule},
+    booktitle = {in Conference on Optimality in Biological and Artificial Networks},
+    year = {1992}
+}
+
+@INPROCEEDINGS{Bengio+al-91,
+    author = {Yoshua Bengio and Samy Bengio and Jocelyn Cloutier and Jan Gecsei},
+    title = {Learning a Synaptic Learning Rule},
+    booktitle = ijcnn,
+    location = "Seattle, WA",
+    pages = "II-A969",
+    year = {1991}
+}
+
+@INPROCEEDINGS{Bengio91acomparative,
+    author = {Yoshua Bengio and Renato De Mori and Giovanni Flammia and Ralf Kompe},
+    title = {A Comparative Study On Hybrid Acoustic Phonetic Decoders Based On Artificial Neural Networks},
+    booktitle = {Proceeding of EuroSpeech},
+    location = {Genova, Italy},
+    year = {1991}
+}
+
+@inproceedings { lecun-01a,
+original =      "orig/lecun-01a.ps.gz",
+author = 	"{LeCun}, Y. and Bottou, L. and Bengio, Y. and Haffner, P.",
+title = 	"Gradient-Based Learning Applied to Document Recognition",
+booktitle =     "Intelligent Signal Processing",
+editors =       "Haykin, S. and Kosko, B.",
+pages =         "306-351",
+publisher =     "IEEE Press",
+note =          "chap. 9",
+year =		2001,
+}
+
+@InCollection{Hochreiter+al-2000,
+    abstract = {Introduction Recurrent networks (crossreference Chapter 12) can, in principle, use their feedback connections to store representations of recent input events in the form of activations. The most widely used algorithms for learning what to put in short-term memory, however, take too much time to be feasible or do not work well at all, especially when minimal time lags between inputs and corresponding teacher signals are long. Although theoretically fascinating, they do not provide clear practical advantages over, say, backprop in feedforward networks with limited time windows (see crossreference Chapters 11 and 12). With conventional \&\#034;algorithms based on the computation of the complete gradient\&\#034;, such as \&\#034;Back-Propagation Through Time\&\#034; (BPTT, e.g., [22, 27, 26]) or \&\#034;Real-Time Recurrent Learning\&\#034; (RTRL, e.g., [21]) error signals \&\#034;flowing backwards in time\&\#034; tend to either (1) blow up or (2) vanish: the temporal evolution of the backpropagated error ex},
+    author = {Hochreiter, Sepp and Informatik, Fakultat F. and Bengio, Yoshua and Frasconi, Paolo and Schmidhuber, Jurgen},
+    citeulike-article-id = {4450697},
+    citeulike-linkout-0 = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.24.7321},
+    keywords = {gradient-descent, long-term-dependencies, rnn},
+    posted-at = {2009-05-02 00:58:01},
+    priority = {2},
+    title = {Gradient Flow in Recurrent Nets: the Difficulty of Learning Long-Term Dependencies},
+    url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.24.7321},
+    booktitle =    "Field Guide to Dynamical Recurrent Networks",
+    editor = "J. Kolen and S. Kremer",
+    publisher = "IEEE Press",
+    year = "2000",
+}
+
+@INPROCEEDINGS{Lecun99objectrecognition,
+    author = {Yann {LeCun} and Patrick Haffner and Léon Bottou and Yoshua Bengio},
+    title = {Object Recognition with Gradient-Based Learning},
+    booktitle = {Shape, Contour and Grouping in Computer Vision},
+    year = {1999},
+    publisher = {Springer},
+    pages = {319--345},
+}
+
+
+% non-ref conference
+@MISC{snowbird_learn_conf,
+title = "Snowbirds papers",
+author = "many authors",
+howpublished = "Learning Conference, Snowbird",
+location = "Utah",
+year = "many",
+}
+
+@MISC{Collobert+Bengio-2001,
+title = "Magic Mix",
+author = "Collobert, R. and Bengioy, Y.",
+year = "2002",
+howpublished = "Learning Conference, Snowbird",
+location = "Utah",
+}
+
+@MISC{Bengio+al-2001,
+title = "Learning a Distributed Representation for Statistical Language Modeling and Information Retrieval",
+author = "Yoshua Bengio and Pascal Vincent and Florence d'Alché-Buc",
+year = "2001",
+howpublished = "Learning Conference, Snowbird",
+location = "Utah",
+}
+
+@MISC{Bengio+Nadeau-2000,
+title = "About Realistic Comparisons Between Learning Algorithms",
+author = "Yoshua Bengio and C. Nadeau",
+year = "2000",
+howpublished = "Learning Conference, Snowbird",
+location = "Utah",
+}
+@MISC{Bengio-1999,
+title = "Learning from Structured High-Dimensional Data",
+author = "Yoshua Bengio",
+howpublished = "Meeting of the Mathematical Society of Canada",
+location = "Montreal, Canada",
+year = "1999",
+}
+
+@MISC{Bengio+al-1999,
+title = "Gradient-Based Learning of Hyper-Parameters",
+author = "Yoshua Bengio and S. Latendresse and Charles Dugas",
+year = "1999",
+howpublished = "Learning Conference, Snowbird",
+location = "Utah",
+}
+
+@MISC{Bengio+al-1999b,
+title = "Learning Algorithms for Sorting Compounds from Titration Curves",
+author = "Yoshua Bengio and J-J. Brault and F. Major and R. Neal and S. Pigeon",
+howpublished = "Symposium on New Perspectives for Computer-Aided Drug Design",
+location = "Montreal, Canada",
+year = "1999",
+}
+
+@MISC{Bengio+al-1998,
+title = "Stochastic learning of strategic equilibria for auctions",
+author = "Yoshua Bengio and S. Latendresse and Charles Dugas",
+howpublished = "Machines That Learn Conference, Snowbird",
+location = "Utah",
+year = "1998",
+}
+
+@MISC{Bengio+al-1997,
+title = "On the Clusterization of Probabilistic Transducers",
+author = "Bengio, Y. and Bengio, S. and Singer, Y. and Isabelle, J-F.",
+howpublished = "1997 Neural Networks for Computing Conference, Snowbird",
+location = "Utah",
+year = "1997",
+}
+
+@MISC{Bengio-1995,
+title = "Fast High Capacity Classifiers",
+author = "Bengio, Y. and Bengio, S. and Singer, Y. and Isabelle, J-F.",
+howpublished = "1995 Neural Networks for Computing Conference, Snowbird",
+location = "Utah",
+year = "1997",
+}
+
+@MISC{Bengio+Frasconi-1994,
+title = "Réseaux de neurones Markoviens pour l'inférence grammaticale",
+author = "Bengio, Y. and Frasconi, P.",
+howpublished = "1994 ACFAS Conference, neural networks colloquium",
+location = "Montréal, Québec",
+year = "1994",
+}
+
+@MISC{Bengio+LeCun-1994,
+title = "Reconnaissance de mots manuscrits avec réseaux de neurones et modèles de Markov",
+author = "Bengio, Y. and {LeCun}, Y.",
+howpublished = "1994 ACFAS Conference, neural networks colloquium",
+location = "Montréal, Québec",
+year = "1994",
+}
+
+@MISC{Bengio+al-1994,
+title = "Optimisation d'une règle d'apprentissage pour réseaux de neurones artificiels",
+author = "Bengio, S. and Bengio, Y. and Cloutier, J. and Gecsei, J.",
+howpublished = "1994 ACFAS Conference, neural networks colloquium",
+location = "Montréal, Québec",
+year = "1994",
+}
+
+@MISC{Bengio+Frasconi-1994b,
+title = "An {EM} Algorithm for Target Propagation",
+author = "Bengio, Y. and Frasconi P.",
+howpublished = "1994 Neural Networks for Computing Conference, Snowbird",
+location = "Utah",
+year = "1994",
+}
+
+@MISC{Bengio+al-1993,
+title = "The Problem of Learning Long-Term Dependencies in Recurrent Networks",
+author = "Bengio, Y. and Simard, P. and Frasconi P.",
+howpublished = "1994 Neural Networks for Computing Conference, Snowbird",
+location = "Utah",
+year = "1993",
+}
+@MISC{Bengio-1992,
+title = "Representations Based on Articulatory Dynamics for Speech Recognition",
+author = "Bengio, Y.",
+howpublished = "1992 Neural Networks for Computing Conference, Snowbird",
+location = "Utah",
+year = "1992",
+}
+
+@MISC{Bengio+al-1991,
+title = "Learning a Synaptic Learning Rule",
+author = "Bengio, Y. and Bengio, S. and Cloutier, J.",
+howpublished = "1991 Neural Networks for Computing Conference, Snowbird",
+location = "Utah",
+year = "1991",
+}
+
+@MISC{Bengio+DeMori-1990,
+title = "Recurrent networks with Radial Basis Functions for speech recognition",
+author = "Bengio, Y. and De Mori, R.",
+howpublished = "1990 Neural Networks for Computing Conference, Snowbird",
+location = "Utah",
+year = "1991",
+}
+
+
+%%tech repport
+@TechReport{Bardou+Bengio-TR2002,
+  author =       "O. Bardou and Yoshua Bengio",
+  title =        "Régularisation du prix des option : Stacking",
+  institution =  "Cahier Scientifique Cirano 2002s-44",
+  year =         "2002",
+}
+
+@TechReport{Dugas+Bengio-TR2002,
+  author =       "O. Bardou and Yoshua Bengio",
+  title =        "Étude du biais dans le prix des options",
+  institution =  "Cahier Scientifique Cirano 2002s-45",
+  year =         "2002",
+}
+
+@TechReport{Dugas+al-TR2002,
+  author =       "C. Dugas and Y. Bengio and F. Bélisle and C. Nadeau and R. Garcia",
+  title =        "Incorporating Second-Order Functional Knowledge for Better Option Pricing",
+  institution =  "Cahier Scientifique Cirano 2002s-46",
+  year =         "2002",
+}
+
+@TechReport{Bengio+al-TR2002,
+  author =       "Y. Bengio and V.-P. Lauzon and R. Ducharme",
+  title =        "Experiments on the Application of IOHMMs to Model Financial Returns Series",
+  institution =  "Cahier Scientifique Cirano 2002s-47",
+  year =         "2002",
+}
+
+@TechReport{Bengio+al-TR2002b,
+  author =       "Y. Bengio and R. Ducharme and O. Bardou and N. Chapados",
+  title =        "Valorisation d'options par optimisation du Sharpe Ratio",
+  institution =  "Cahier Scientifique Cirano 2002s-48",
+  year =         "2002",
+}
+
+@TechReport{Chapados+Bengio-TR2002,
+  author =       "N. Chapados and Y. Bengio",
+  title =        "Cost Functions and Model Combination for VaR-based Asset Allocation using
+ Neural Networks",
+  institution =  "Cahier Scientifique Cirano 2002s-49",
+  year =         "2002",
+}
+
+@TechReport{Bengio+Dugas-TR2002,
+  author =       "Y. Bengio and C. Dugas",
+  title =        "Forecasting Non-Stationary Volatility with Hyper-Parameters",
+  institution =  "Cahier Scientifique Cirano 2002s-50",
+  year =         "2002",
+}
+
+@TechReport{Gingras+al-TR2002,
+  author =       "F. Gingras and Y. Bengio and C. Nadeau",
+  title =        "On Out-of-Sample Statistics for Time-Series",
+  institution =  "Cahier Scientifique Cirano 2002s-51",
+  year =         "2002",
+}
+
+@TechReport{Chapados+Bengio-TR2002b,
+  author =       "N. Chapados and Y. Bengio",
+  title =        "Input Decay : Simple and Effective Soft Variable Selection",
+  institution =  "Cahier Scientifique Cirano 2002s-52",
+  year =         "2002",
+}
+
+@TechReport{Ghosn+Bengio-TR2002,
+  author =       "J. Ghosn and Y. Bengio",
+  title =        "Multi-Task Learning For Option Pricing",
+  institution =  "Cahier Scientifique Cirano 2002s-53",
+  year =         "2002",
+}
+
+@TechReport{Collobert+al-TR2001,
+  author =       "J. Ghosn and Y. Bengio",
+  title =        "A Parallel Mixture of {SVM}s for Very Large Scale Problems",
+  institution =  "IDIAP",
+  location =     "Switzerland",
+  number =       "IDIAP-RR-01-12",
+  year =         "2001",
+}
+
+@TechReport{Vincent+Bengio-TR2001,
+  author =       "Vincent, P. and Bengio, Y.",
+  title =        "K-Local Hyperplane and Convex Distance Nearest Neighbor Algorithms",
+  institution =  DIRO,
+  location =     "Switzerland",
+  number =       "1197",
+  year =         "2001",
+}
+
+@TechReport{Chapados+al-TR2001,
+  author =       "Chapados, N. and Bengio, Y. and Vincent, P. and Ghosn, J. and Dugas, C. and Takeuchi, I. and Meng, L.",
+  title =        "Estimating Car Insurance Premia : a Case Study in High-Dimensional Data Inference",
+  institution =  DIRO,
+  number =       "1199",
+  year =         "2001",
+}
+
+@TechReport{Bengio+Chapados-TR2001,
+  author =       "Chapados, N. and Bengio, Y. and Vincent, P. and Ghosn, J. and Dugas, C. and Takeuchi, I. and Meng, L.",
+  title =        "Extending Metric-Based Model Selection and Regularization in the Absence of Unlabeled Data",
+  institution =  DIRO,
+  number =       "1200",
+  year =         "2001",
+}
+
+@TechReport{Nadeau+Bengio-TR1999,
+  author =       "Nadeau, C. and Bengio, Y.",
+  title =        "Inference and the Generalization Error",
+  institution =  "Cahier Scientifique Cirano 99s-25",
+  year =         "2002",
+}
+
+@TechReport{Gingras+al-TR1999,
+  author =       "Gingras, F. and Bengio, Y. and Nadeau, C.",
+  title =        "On Out-of-Sample Statistics for Financial Time-Series",
+  institution =  "Centre de Recherches Mathématiques, Université de Montreal",
+  number =       "2585",
+  year =         "1999",
+}
+
+@TechReport{Bengio-1998-TR,
+  author =       "Bengio, Y.",
+  title =        "Using a financial training criterion rather than a prediction criterion",
+  institution =  "Cahier Scientifique Cirano 98s-21",
+  year =         "1998",
+}
+
+@TechReport{Bengio+DeMori-1990-TR,
+  author =       "Bengio, Y. and De Mori, R.",
+  title =        "Some connectionist models and their application to speech recognition",
+  institution =  "School of Computer Science, McGill University",
+  number =       "TR-SOCS-90-12",
+  year =         "1990",
+}
+
+@article{becker+hinton:1993,
+    author = {Becker, S. and Hinton, G. E.},
+    title=  {Learning Mixture Models of Spatial Coherence},
+    journal={Neural Computation},
+    volume={5},
+    pages={267--277},
+    year={1993}
+}
+@article{berkes:2005,
+    author = {Berkes, Pietro and Wiskott, Laurenz},
+    title = {Slow Feature Analysis Yields a Rich Repertoire of Complex Cell Properties},
+    journal = {Journal of Vision},
+    ISSN = {1534-7362},
+    volume = {5},
+    number = {6},
+    pages = {579-602},
+    year = {2005},
+    month = {7},
+    URL = {http://journalofvision.org/5/6/9/},
+    eprint = {http://journalofvision.org/5/6/9/Berkes-2005-jov-5-6-9.pdf},
+}
+@inproceedings{hurri+hyvarinen:2003,
+    author={Hurri, J. and Hyv{\"a}rinen, A.},
+    title={Temporal Coherence, Natural Image Sequences, and the Visual Cortex.},
+    booktitle={Advances in Neural Information Processing Systems 15
+        ({NIPS*02})},
+    year={2003},
+    pages={141--148},
+}
+@article{wiskott:2002,
+    author =       "Laurenz Wiskott and Terrence Sejnowski",
+    year =         "2002",
+    title = {Slow Feature Analysis: Unsupervised Learning of Invariances},
+    journal =      "Neural Computation",
+    volume =       "14",
+    number =       "4",
+    pages =        "715--770",
+    url= {http://itb.biologie.hu-berlin.de/~wiskott/Publications/WisSej2002-LearningInvariances-NC.ps.gz},
+}
+
+@article{KouhPoggio2008,
+    author={Minjoon M. Kouh and Tomaso T. Poggio},
+    title={A Canonical Neural Circuit for Cortical Nonlinear Operations},
+    journal={Neural Computation},
+    volume={20},
+    number={6},
+    year={2008},
+    pages={1427-51},
+}
+@article{NykampRingach2002,
+    author={D. Q. Nykamp and D. L. Ringach},
+    title ={Full Identification of a Linear-Nonlinear System via Cross-Correlation Analysis},
+    journal = {Journal of Vision},
+    volume={2},
+    pages={1-11},
+    year={2002},
+}
+@incollection{cadieu+olshausen:2009,
+     title = {Learning Transformational Invariants from Natural Movies},
+      author = {Charles Cadieu and Bruno Olshausen},
+       booktitle = {Advances in Neural Information Processing Systems 21},
+        editor = {D. Koller and D. Schuurmans and Y. Bengio and L. Bottou},
+         pages = {209--216},
+          year = {2009},
+     publisher = {MIT Press}
+}
+@book{DayanAbbott2001,
+    author={Peter Dayan and L. F. Abbott},
+    title = {Theoretical Neuroscience},
+    publisher = {The {MIT} Press},
+    year = 2001,
+}
+
+@inproceedings{Chechik-MIR2008,
+ author = {G. Chechik and E. Ie and M. Rehn and S. Bengio and D. Lyon},
+ title = {Large-scale content-based audio retrieval from text queries},
+ booktitle = {ACM International Conference on Multimedia Information Retrieval (MIR'08)},
+ year = 2008,
+}
+
+@inproceedings{Bai-ECIR2009,
+ author = {B. Bai and J. Weston and R. Collobert and D. Grangier},
+ title = {Supervised Semantic Indexing},
+ booktitle = { European Conference on Information Retrieval (ECIR'09)},
+ year = 2009,
+}
+
+@article{Attwell+Laughlin-2001,
+ author = {David Attwell and Simon B. Laughlin},
+ title = {An energy budget for signaling in the grey matter of the brain},
+ journal = {Journal of Cerebral Blood Flow And Metabolism},
+ year =2001,
+ volume = 21,
+ pages = {1133--1145},
+}
+
+@article{Lennie-2003,
+ author = {Peter Lennie},
+ title = {The cost of cortical computation},
+ journal = {Current Biology},
+ year = 2003,
+ month = {Mar 18},
+ volume = {13},
+ number = 6,
+ pages = {493--497},
+}
+
+@inproceedings{LowdD2005,
+ author = {Lowd, Daniel and Domingos, Pedro},
+ title = {Naive Bayes models for probability estimation},
+ booktitle = ICML05,
+ editor = ICML05ed,
+ year = {2005},
+ pages = {529--536},
+ location = {Bonn, Germany},
+ publisher = ICML05publ,
+ address = {New York, NY, USA},
+ }
+
+@incollection{NairV2009,
+ title = {Implicit Mixtures of Restricted Boltzmann Machines},
+ author = {Vinod Nair and Geoffrey E Hinton},
+ booktitle = NIPS21,
+ editor = NIPS21ed,
+ publisher = NIPS21publ,
+ pages = {1145--1152},
+ year = {2009}
+}
+
+@incollection{Goodfellow2009,
+ title = {Measuring Invariances in Deep Networks},
+ author = {Ian Goodfellow and Quoc Le and Andrew Saxe and Andrew Ng},
+ booktitle = NIPS22,
+ editor = NIPS22ed,
+ pages = {646--654},
+ year = {2009}
+}
+
+@incollection{Xiao2009,
+ title = {Dual Averaging Method for Regularized Stochastic Learning and Online Optimization},
+ author = {Lin Xiao},
+ booktitle = {Advances in Neural Information Processing Systems 22},
+ editor = {Y. Bengio and D. Schuurmans and J. Lafferty and C. K. I. Williams and A. Culotta},
+ pages = {2116--2124},
+ year = {2009}
+}
+
+@incollection{Kwok2009,
+ title = {Accelerated Gradient Methods for Stochastic Optimization and Online Learning},
+ author = {Chonghai Hu and James Kwok and Weike Pan},
+ booktitle = {Advances in Neural Information Processing Systems 22},
+ editor = {Y. Bengio and D. Schuurmans and J. Lafferty and C. K. I. Williams and A. Culotta},
+ pages = {781--789},
+ year = {2009}
+}
+
+@article{Nesterov83,
+ author = {Yu Nesterov},
+ title = {A method for unconstrained convex minimization problem with the rate of convergence $o(1/k^2)$}, 
+ journal = {Doklady AN SSSR (translated as Soviet. Math. Docl.)}, 
+ volume = 269,
+ pages = {543--547}, 
+ year = 1983,
+}
+
+@incollection{Bai2009,
+ title = {Polynomial Semantic Indexing},
+ author = {Bing Bai and Jason Weston and David Grangier and Ronan Collobert and Kunihiko Sadamasa and Yanjun Qi and Corinna Cortes and Mehryar Mohri},
+ booktitle = {Advances in Neural Information Processing Systems 22},
+ editor = {Y. Bengio and D. Schuurmans and J. Lafferty and C.K.I. Williams and A. Culotta},
+ pages = {64--72},
+ year = {2009}
+}
+
+@incollection{Chechik2009,
+ title = {An Online Algorithm for Large Scale Image Similarity Learning},
+ author = {Gal Chechik and Uri Shalit and Varun Sharma and Samy Bengio},
+ booktitle = {Advances in Neural Information Processing Systems 22},
+ editor = {Y. Bengio and D. Schuurmans and J. Lafferty and C. K. I. Williams and A. Culotta},
+ pages = {306--314},
+ year = {2009}
+}
+
+@incollection{Klampfl+Maass-2009,
+ title = {Replacing supervised classification learning by Slow Feature Analysis in spiking neural networks},
+ author = {Stefan Klampfl and Wolfgang Maass},
+ booktitle = NIPS22,
+ editor = NIPS22ed,
+ pages = {988--996},
+ year = {2009}
+}
+
+
+
+@Article{GrandvaletCanuBoucheron97,
+  author =       "Yves Grandvalet and Stéphane Canu and Stéphane Boucheron",
+  title =        "Noise Injection: Theoretical Prospects",
+  journal =      "Neural Computation",
+  volume =       "9",
+  number =       "5",
+  pages =        "1093--1108",
+  year =         "1997",
+}
+
+@Article{SietsmaDow91,
+  author =       "J. Sietsma and R. Dow",
+  title =        "Creating artificial neural networks that generalize",
+  journal =      "Neural Networks",
+  volume =       "4",
+  number =       "1",
+  pages =        "67--79",
+  year =         "1991",
+}
+
+@Article{HolmstromKoistinen92,
+  author =       "Lasse Holmström and Petri Koistinen",
+  title =        "Using additive noise in back-propagation training",
+  journal =      "{IEEE} Transactions on Neural Networks",
+  volume =       "3",
+  number =       "1",
+  pages =        "24--38",
+  year =         "1992",
+}
+
+@inproceedings{Baird90,
+    author = "H. Baird",
+    title = {Document image defect models},
+    year = 1990,
+    booktitle = "IAPR Workshop on Syntactic and Structural Pattern Recognition",
+    pages = "38--46",
+    address = "Murray Hill, NJ."
+}
+
+@TechReport{Poggio+Vetter92,
+  author =       "T. Poggio and T. Vetter",
+  title =        "Recognition and structure from one 2D model view: Observations on prototypes, object classes and symmetries",
+  number =       "A.I. Memo No. 1347",
+  institution =  "Artificial Intelligence Laboratory, Massachusetts Institute of Technology",
+  year =         "1992",
+}
+
+@INPROCEEDINGS{Scholkopf96invariances,
+    author = {Bernhard Sch{\"o}lkopf and Chris Burges and Vladimir Vapnik},
+    title = {Incorporating Invariances in Support Vector Learning Machines},
+    booktitle = {Lecture Notes in Computer Science (Vol 112), Artificial Neural Netowrks ICANN'96},
+    year = {1996},
+    editor = {C. von der Malsburg and W. von Seelen and J. C. Vorbrüggen and B. Sendhoff},
+    pages = {47--52},
+    publisher = {Springer}
+}
+
+@inproceedings{Cho+Saul09,
+ title = {Kernel Methods for Deep Learning},
+ author = {Youngmin Cho and Lawrence Saul},
+ booktitle = NIPS22,
+ editor = NIPS22ed,
+ pages = {342--350},
+ year = {2010},
+ publisher = {NIPS Foundation},
+}
+
+
+@InProceedings{Linsker89,
+  author =       "R. Linsker",
+  editor =       NIPS1ed,
+  booktitle =    NIPS1,
+  title =        "An application of the principle of maximum information 
+preservation to linear systems",
+  publisher =    NIPS1publ,
+  year =         "1989",
+}
+
+@Article{An96AddingNoise,
+  author =       "Guozhong An",
+  title =        "The effects of adding noise during backpropagation training on a generalization performance",
+  journal =      "Neural Computation",
+  volume =       "8",
+  number =       "3",
+  pages =        "643--674",
+  year =         "1996",
+}
+
+@article{DruckerLeCun92,
+	author = {Harris Drucker and Yann LeCun},
+	title = {Improving generalisation performance using double back-propagation.},
+	journal = {IEEE Transactions on Neural Networks},
+	number = {6},
+	pages = {991--997},
+	volume = {3},
+	year = {1992}
+}
+
+@Article{BellSejnowski-97,
+  author =       "A. Bell and T. J. Sejnowski",
+  title =        "The independent components of natural scenes are edge filters",
+  journal =      "Vision Research",
+  volume =       "37",
+  pages =        "3327--3338",
+  year =         "1997",
+}
+
+
+@Article{Dokur1997,
+  author =       {Z\:{u}mray Dokur, Tamer \:{O}lmez, Ertugrul Yazgan, Okan K. Ersoy},
+  title =        {Detection of {ECG} waveforms by neural networks},
+  journal =      {Medical engineering & physics},
+  year =         {1997},
+  volume =    {19},
+  number =    {8},
+  pages =     {738--741},
+  month =     {October},
+}
+
+@Article{Hu1993,
+  author =       {Y. H. Hu and W. J. Tompkins and J. L. Urrusti and V. X. Afonso},
+  title =        {Applications of artificial neural networks for {ECG} signal detection and classification},
+  journal =      JEC,
+  year =         {1993},
+  volume =    {26s},
+  pages =     {66--73},
+}
+
+@Article{Unser1996,
+author = {M. Unser and A. Aldroubi},
+title = {A Review of Wavelets in Biomedical Applications},
+journal = {Proceedings of the {IEEE}},
+year = {1996},
+volume= {84},
+number= {4},
+pages = {626--638},
+month = {April},
+}
+
+@inproceedings{Povey+Woodland-2002,
+ author = {D. Povley and P.C. Woodland},
+ title = {Minimum error and {I}-smoothing for improved discriminative training},
+ booktile = {Proceedings of the International Conference on Acoustics,
+Speech, and Signal Processing (ICASSP'2002)},
+ publisher = {IEEE},
+ volume = 1,
+ pages = {I-105--I-108},
+ address = {Orlando, Florida, USA},
+}
+
+@incollection{Susskind2008,
+ author = {Joshua M. Susskind and Geoffrey E. and Javier R. Movellan and Adam K. Anderson},
+ title = {Generating Facial Expressions with Deep Belief Nets},
+ editor = {V. Kordic},
+ booktitle = {Affective Computing, Emotion Modelling, Synthesis and Recognition},
+ publisher = {ARS Publishers},
+ year = 2008,
+ pages = {421--440},
+}
+
+@InCollection{Li2005,
+  author =       {Peng Li and Kap Luk Chan and Sheng Fu and S.M. Krishnan},
+  title =        {An Abnormal {ECG} Beat Detection Approach for Long-Term Monitoring of Heart Patients Based on Hybrid Kernel Machine Ensemble},
+  booktitle =    {Multiple Classifier Systems},
+  pages =     {346-355},
+  publisher = {Springer},
+  year =      {2005},
+  volume =    {3541/2005},
+  series =    {Lecture Notes in Computer Science},
+  address =   {Berlin / Heidelberg},
+}
+
+@incollection {Hughes_NIPS2003,
+  author = " Nicholas P. Hughes and  Lionel Tarassenko and  Stephen J. Roberts",
+  title = " Markov Models for Automated {ECG} Interval Analysis",
+  booktitle = NIPS16,
+  editor = NIPS16ed,
+  publisher = NIPS16publ,
+  address = NIPS16addr,
+  year = "2004",
+  keywords = "hidden Markov models, Markov models, wavelets, segmentation, probabilistic models, biomedical signal processing, time series",
+  }
+
+@inproceedings{Salem2009,
+ author = {Abdel-Badeeh M. Salem and Kenneth Revett and El-Sayed A. El-Dahshan},
+ title = {Machine Learning in Electrocardiogram Diagnosis},
+ booktitle = {Proceedings of the International Multiconference on Computer Science and Information Technology},
+ volume = 4,
+ pages = {429--433},
+ year = 2009,
+ publisher = {IEEE},
+}
+
+@book{Clifford2006,
+ author = {G.D. Clifford and F. Azuaje and P.E. McSharry}, 
+ title = {Advanced Methods and Tools for {ECG} Analysis},
+ publisher = {Artech House Publishing},
+ year = 2006,
+}
+
+@inproceedings{Lin2009,
+  author = {Lin, Jessica and Li, Yuan}, 
+  title = {Finding Structural Similarity in Time Series Data Using Bag-of-Patterns Representation},
+  booktitle = {SSDBM 2009: Proceedings of the 21st International Conference on Scientific and Statistical Database Management},
+  year = {2009},
+  isbn = {978-3-642-02278-4},
+  pages = {461--477},
+  location = {New Orleans, LA, USA},
+  doi = {http://dx.doi.org/10.1007/978-3-642-02279-1_33},
+  publisher = {Springer-Verlag},
+  address = {Berlin, Heidelberg},
+ }
+
+@article{Froese2006,
+ author = {Froese, Tom and Hadjiloucas, Sillas and Galv\,
+{a}o, Roberto K. H. and Becerra, Victor M. and Coelho, Clarimar Jos\'{e}},
+ title = {Comparison of extrasystolic {ECG} signal classifiers using discrete wavelet transforms},
+ journal = {Pattern Recogn. Lett.},
+ volume = {27},
+ number = {5},
+ year = {2006},
+ issn = {0167-8655},
+ pages = {393--407},
+ doi = {http://dx.doi.org/10.1016/j.patrec.2005.09.002},
+ publisher = {Elsevier Science Inc.},
+ address = {New York, NY, USA},
+ }
+
+@Article{Crowe1992,
+  author =   {J. A. Crowe and N. M. Gibson and M. S. Woolfson and M. G. Somekh},
+  title =    {Wavelet transform as a potential tool for {ECG} analysis and compression},
+  journal =  {Journal of Biomedical Engineering},
+  year =     {1992},
+  volume =   {14},
+  number =   {3},
+  pages =    {268--272},
+  month =    {May},
+}
+
+@ARTICLE{Hilton1997,
+    author = {Michael Hilton},
+    title = {Wavelet and Wavelet Packet Compression of Electrocardiograms},
+    journal = IEEE_trans_biomed,
+    year = {1997},
+    volume = {44},
+    pages = {394--402}
+}
+
+@Article{Li1995,
+  author =       {C. Li and C. Zheng and C. Tai},
+  title =        {Detection of {ECG} characteristic points using wavelet transforms},
+  journal =     IEEE_trans_biomed,
+  year =        {1995},
+  volume =    {42},
+  number =    {1},
+  pages =     {21--28},
+  month =     {January},
+}
+
+@article{Polat2007,
+title = {Detection of {ECG} Arrhythmia using a differential expert system approach based on principal component analysis and least square support vector machine},
+journal = {Applied Mathematics and Computation},
+volume = {186},
+number = {1},
+pages = {898--906},
+year = {2007},
+issn = {0096-3003},
+doi = {DOI: 10.1016/j.amc.2006.08.020},
+url = {http://www.sciencedirect.com/science/article/B6TY8-4KXDWBF-5/2/a9e1d7e2dfc4c88935386ea04ca9cb94},
+author = {Kemal Polat and Salih G\"{u}nes},
+keywords = {ECG Arrhythmia},
+keywords = {Principal component analysis (PCA)},
+keywords = {Least square support vector machine (LSSVM)},
+keywords = {ROC curves},
+}
+
+@article{Song2005,
+  author =       {Mi Hye Song and Jeon Lee and Sung Pil Cho and Kyoung Joung Lee and Sun Kook Yoo},
+  title =        {Support Vector Machine Based Arrhythmia Classification  
+Using Reduced Features},
+  journal =      IJCAS,
+  year =         {2005},
+  volume =    {3},
+  number =    {4},
+  pages =     {571--579},
+  month =     {December},
+}
+
+@article{Ubeyli2009,
+ author = {Elif Derya \"{U}beyli},
+ title = {Combining recurrent neural networks with eigenvector methods for classification of {ECG} beats},
+ journal = DSP,
+ volume = {19},
+ number = {2},
+ year = {2009},
+ issn = {1051-2004},
+ pages = {320--329},
+ doi = {http://dx.doi.org/10.1016/j.dsp.2008.09.002},
+ publisher = {Academic Press, Inc.},
+ address = {Orlando, FL, USA},
+ }
+
+@article{Ubeyli2007,
+  author =       {Elif Derya \"{U}beyli},
+  title =        {{ECG} beats classification using multiclass support vector machines with error correcting output codes},
+  journal =      DSP,
+  year =         {2007},
+  volume =    {17},
+  pages =     {675--684},
+}
+
+@Article{Soman2005,
+  author =    {T. Soman and P. O. Bobbie},
+  title =     {Classification of Arrhythmia Using Machine Learning Techniques},
+  journal =   {WSEAS Transactions on Computers},
+  year =      {2005},
+  volume =    {4},
+  number =    {6},
+  pages =     {548--552},
+  month =     {June},
+}
+
+@InProceedings{Chengwei2006,
+  author =       {Li Chengwei and Wang Shoubin and Xu Aijun and Peng Hui},
+  title =        {Clinical Diagnosis of Cardiac Disease Based on Support Vector Machine},
+  booktitle = {World Congress on Medical Physics and Biomedical Engineering},
+  pages =     {1273--1276},
+  year =      {2006},
+  editor =    {R. Magjarevic and J. H. Nagel},
+  volume =    {14},
+  series =    {IFMBE Proceedings},
+  publisher = {Springer Berlin Heidelberg},
+}
+
+@Article{Chiu2005,
+  author =       {Chuang-Chien Chiu and Tong-Hong Lin and Ben-Yi Liau},
+  title =        {Using correlation coefficient in {ECG} waveform for arrhythmia detection},
+  journal =      BME,
+  year =         {2005},
+  volume =    {17},
+  number =    {3},
+  pages =     {147--152},
+  month =     {June},
+}
+
+@Article{Silipo1998,
+  author =       {Rosaria Silipo and Carlo Marchesi},
+  title =        {Artificial Neural Networks for Automatic {ECG} Analysis},
+  journal =      IEEE_trans_SP,
+  year =         {1998},
+  volume =    {46},
+  number =    {5},
+  pages =     {1417--1425},
+  month =     {May},
+}
+
+@Article{Osowski2004,
+  author =       {Stanislaw Osowski and Linh Tran Hoai and Tomasz Markiewicz},
+  title =        {Support Vector Machine-Based Expert System for 
+Reliable Heartbeat Recognition},
+  journal =      IEEE_trans_biomed,
+  year =         {2004},
+  volume =    {51},
+  number =    {4},
+  pages =     {582--589},
+  month =     {April},
+}
+
+@article{PhysioNet,
+ author = PhysioNetAuthors,
+ title = "{PhysioBank, PhysioToolkit, and PhysioNet}: Components of a New
+	  Research Resource for Complex Physiologic Signals",
+ journal = "Circulation",
+ year = PhysioNetYear,
+ volume = "101",
+ number = "23",
+ pages = "e215--e220",
+ note = PhysioNetNote,
+}
+
+@article{Lin2007,
+    author = {Lin, Jessica and Keogh, Eamonn and Wei, Li and Lonardi, Stefano},
+    citeulike-article-id = {2821475},
+    citeulike-linkout-0 = {http://dblp.uni-trier.de/rec/bibtex/journals/datamine/LinKWL07},
+    citeulike-linkout-1 = {http://dx.doi.org/10.1007/s10618-007-0064-z},
+    citeulike-linkout-2 = {http://www.springerlink.com/content/g69808822l82t325},
+    day = {18},
+    doi = {10.1007/s10618-007-0064-z},
+    journal = DMKD,
+    keywords = {simulation},
+    month = {October},
+    number = {2},
+    pages = {107--144},
+    posted-at = {2008-05-21 23:56:04},
+    priority = {2},
+    title = {Experiencing SAX: a novel symbolic representation of time series},
+    url = {http://dx.doi.org/10.1007/s10618-007-0064-z},
+    volume = {15},
+    year = {2007}
+}
+
+@inproceedings{Lin2010,
+  author = {Lin, Jessica and Li, Yuan},
+  title = {Finding Structural Similarity in Time Series Data Using Bag-of-Patterns Representation},
+  booktitle = SSDBM2009, 
+  year = {2009},
+  isbn = {978-3-642-02278-4},
+  pages = {461--477},
+  location = {New Orleans, LA, USA},
+  doi = {http://dx.doi.org/10.1007/978-3-642-02279-1_33},
+  publisher = {Springer-Verlag},
+  address = {Berlin, Heidelberg},
+ }
+
+@Article{Ham1996,
+  author =       {F. M. Ham and Soowhan Han},
+  title =        {Classification of cardiac arrhythmias using fuzzy ARTMAP},
+  journal =      IEEE_trans_biomed,
+  year =         {1996},
+  volume =    {43},
+  number =    {4},
+  pages =     {425--429},
+  month =     {April},
+}
+@article{Engin2004,
+  title = "ECG beat classification using neuro-fuzzy network",
+  journal = PRL,
+  volume = "25",
+  number = "15",
+  pages = "1715 - 1722",
+  year = "2004",
+  issn = "0167-8655",
+  doi = "DOI: 10.1016/j.patrec.2004.06.014",
+  url = "http://www.sciencedirect.com/science/article/B6V15-4D0Y5TH-2/2/b83f364f61d79f96abeb1bc1b1898ab9",
+  author = "Mehmet Engin",
+  keywords = "ECG beat classification",
+  keywords = "MIT/BIH database",
+  keywords = "Neuro-fuzzy networks",
+  keywords = "Higher-order statistics",
+  keywords = "Wavelet transform",
+  keywords = "AR modelling",
+  keywords = "Pattern recognition"
+}
+
+@article{Turaga2010,
+ author = {S. C. Turaga and J. F. Murray and V. Jain and F. Roth and M. Helmstaedter and K. Briggman and W. Denk and H. S. Seung}, 
+ title = {Convolutional networks can learn to generate affinity graphs for image segmentation}, 
+ journal = {Neural Computation}, 
+ volume = 22, 
+ pages = {511--538},
+ year = 2010,
+}
+
+@article{Hahnloser-2003,
+ author = {Richard H.R. Hahnloser and H. Sebastian Seung and J.J. Slotine},
+ title = {Permitted and forbidden sets in symmetric threshold-linear networks},
+ journal = {Neural Computation},
+ volume = 15,
+ pages = {621--638},
+ year = 2003,
+}
+
+@techreport{Jenatton-2009,
+ title={Structured Variable Selection with Sparsity-Inducing Norms},
+ author={Jenatton, R. and Audibert, J.-Y. and Bach, F.},
+ institution={arXiv:0904.3523},
+ year={2009}
+}
+
+@ARTICLE{Erhan2010,
+    author = {Erhan, Dumitru and Bengio, Yoshua and Courville, Aaron and Manzagol, Pierre-Antoine and Vincent, Pascal and Bengio, Samy},
+     month = feb,
+     title = {Why Does Unsupervised Pre-training Help Deep Learning?},
+   journal = jmlr,
+    volume = {11},
+      year = {2010},
+     pages = {625--660},
+  abstract = {Much recent research has been devoted to learning algorithms for deep architectures such as Deep Belief Networks and stacks of auto-encoder variants, with impressive results obtained in several areas, mostly on vision and language datasets. The best results obtained on supervised learning tasks involve an unsupervised learning component, usually in an unsupervised pre-training phase. Even though these new algorithms have enabled training deep models, many questions remain as to the nature of this difficult learning problem. The main question investigated here is the following: why does unsupervised pre-training work and why does it work so well? Answering these questions is important if learning in deep architectures is to be further improved. We propose several explanatory hypotheses and test them through extensive simulations. We empirically show the influence of pre-training with respect to architecture depth, model capacity, and number of training examples. The experiments confirm and clarify the advantage of unsupervised pre-training. The results suggest that unsupervised pre-training guides the learning towards basins of attraction of minima that are better in terms of the underlying data distribution; the evidence from these results supports a regularization explanation for the effect of pre-training.}
+}
+
+@ARTICLE{Bengio2009FTML,
+    author = {Bengio, Yoshua},
+     title = {Learning deep architectures for {AI}},
+   journal = FTML,
+    volume = {2},
+    number = {1},
+      year = {2009},
+     pages = {1--127},
+      note = Bengio2009FTML_note,
+  abstract = {Theoretical results suggest that in order to learn the kind of
+complicated functions that can represent high-level abstractions (e.g. in
+vision, language, and other AI-level tasks), one may need {\insist deep
+architectures}. Deep architectures are composed of multiple levels of non-linear
+operations, such as in neural nets with many hidden layers or in complicated
+propositional formulae re-using many sub-formulae. Searching the
+parameter space of deep architectures is a difficult task, but
+learning algorithms such as those for Deep Belief Networks have recently been proposed
+to tackle this problem with notable success, beating the state-of-the-art
+in certain areas. This paper discusses the motivations and principles regarding 
+learning algorithms for deep architectures,  in particular those exploiting as
+building blocks unsupervised learning of single-layer models such as Restricted {Boltzmann} Machines,
+used to construct deeper models such as Deep Belief Networks.}
+}
+
+@ARTICLE{Bengio1994ITNN,
+    author = {Bengio, Yoshua and Simard, Patrice and Frasconi, Paolo},
+     title = {Learning Long-Term Dependencies with Gradient Descent is Difficult},
+   journal = IEEE_trans_NN,
+    volume = {5},
+    number = {2},
+      year = {1994},
+     pages = {157--166},
+  abstract = {Recurrent neural networks can be used to map input sequences to output sequences, such as for recognition, production or prediction problems. However, practical difficulties have been reported in training recurrent neural networks to perform tasks in which the temporal contingencies present in the input/output sequences span long intervals. We show why gradient based learning algorithms face an increasingly difficult problem as the duration of the dependencies to be captures increases. These results expose a trade-off between efficient learning by gradient descent and latching on information for long periods. Based on an understanding of this problem, alternatives to standard gradient descent are considered.},
+optnote={(Special Issue on Recurrent Neural Networks)},topics={LongTerm},cat={J},
+}
+
+@article{Kohler1992,
+    abstract = {The QRS complex is the most striking waveform within the electrocardiogram (ECG). Since it reflects the electrical activity within the heart during the ventricular contraction, the time of its occurrence as well as its shape provide much information about the current state of the heart. Due to its characteristic shape it serves as the basis for the automated determination of the heart rate, as an entry point for classification schemes of the cardiac cycle, and often it is also used in ECG data compression algorithms. In that sense, QRS detection provides the fundamentals for almost all automated ECG analysis algorithms. Software QRS detection has been a research topic for more than 30 years. The evolution of these algorithms clearly reflects the great advances in computer technology. Within the last decade many new approaches to QRS detection have been proposed; for example, algorithms from the field of artificial neural networks genetic algorithms wavelet transforms, filter banks as well as heuristic methods mostly based on nonlinear transforms. The authors provide an overview of these recent developments as well as of formerly proposed algorithms},
+    author = {Kohler, B. U. and Hennig, C. and Orglmeister, R.},
+    citeulike-article-id = {546409},
+    citeulike-linkout-0 = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=993193},
+    journal = eng_med_bio,
+    keywords = {detector, ecg\_processing, qrs, qt\_interval, review\_article, rr\_interval},
+    number = {1},
+    pages = {42--57},
+    posted-at = {2007-11-25 20:38:19},
+    priority = {2},
+    title = {The principles of software QRS detection},
+    url = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=993193},
+    volume = {21},
+    year = {2002}
+}
+
+@article{Thomas2006,
+author = {Julien Thomas and Cedric Rose and Francois Charpillet},
+title = {A Multi-HMM Approach to ECG Segmentation},
+journal = ICTAI06, 
+volume = {0},
+issn = {1082-3409},
+year = {2006},
+pages = {609-616},
+doi = {http://doi.ieeecomputersociety.org/10.1109/ICTAI.2006.17},
+publisher = {IEEE Computer Society},
+address = {Los Alamitos, CA, USA},
+}
+
+@inproceedings{Cortes+al-2000,
+ author = {Juan Carlos P\'{e}rez-Cortes and Rafael Llobet and Joaquim Arlandis},
+ title = {Fast and Accurate Handwritten Character Recognition Using Approximate Nearest Neighbours Search on Large Databases},
+ booktitle = iapr,
+ year = {2000},
+ isbn = {3-540-67946-4},
+ pages = {767--776},
+ publisher = {Springer-Verlag},
+ address = {London, UK},
+ }
+
+
+@Article{Oliveira+al-2002,
+  author =       "Oliveira, L.S.  and  Sabourin, R.  and  Bortolozzi, F.  and  Suen, C.Y.",
+  title =        "Automatic recognition of handwritten numerical strings: a recognition and verification strategy",
+  journal =      ieeetpami,
+  volume =       "24",
+  number =       "11",
+  pages =        "1438-1454",
+  month =        nov,
+  year =         "2002",
+  doi  =         "10.1109/TPAMI.2002.1046154",
+  issn =         "0162-8828",
+}
+
+@Article{Oliveira+al-2002-short,
+  author =       "Oliveira, L.S.  and  Sabourin, R.  and  Bortolozzi, F.  and  Suen, C.Y.",
+  title =        "Automatic recognition of handwritten numerical strings: a recognition and verification strategy",
+  journal =      ieeetpami,
+  volume =       "24",
+  number =       "11",
+  pages =        "1438-1454",
+  year =         "2002",
+}
+
+@inproceedings{SimardSP03,
+  author    = {Patrice Simard and
+               David Steinkraus and
+               John C. Platt},
+  title     = {Best Practices for Convolutional Neural Networks Applied
+               to Visual Document Analysis},
+  booktitle = {ICDAR},
+  year      = {2003},
+  pages     = {958-962},
+  ee        = {http://csdl.computer.org/comp/proceedings/icdar/2003/1960/02/196020958abs.htm},
+  crossref  = {DBLP:conf/icdar/2003},
+  bibsource = {DBLP, http://dblp.uni-trier.de}
+}
+
+@inproceedings{SimardSP03-short,
+  author    = {Patrice Simard and
+               David Steinkraus and
+               John C. Platt},
+  title     = {Best Practices for Convolutional Neural Networks Applied
+               to Visual Document Analysis},
+  booktitle = {ICDAR},
+  year      = {2003},
+  pages     = {958-962},
+}
+
+@inproceedings{Milgram+al-2005,
+  author = {Milgram, J. and Cheriet, M. and Sabourin, R.},
+  title = {Estimating accurate multi-class probabilities with support vector machines},
+  booktitle = {Int. Joint Conf. on Neural Networks},
+  year = {2005},
+  pages = {906--1911},
+  location = {Montreal, Canada},
+ }
+
+@proceedings{DBLP:conf/icdar/2003,
+  title     = {7th International Conference on Document Analysis and Recognition
+               (ICDAR 2003), 2-Volume Set, 3-6 August 2003, Edinburgh,
+               Scotland, UK},
+  booktitle = {ICDAR},
+  publisher = {IEEE Computer Society},
+  year      = {2003},
+  isbn      = {0-7695-1960-1},
+  bibsource = {DBLP, http://dblp.uni-trier.de}
+}
+
+
+@article{Granger+al-2007,
+    author = {Eric Granger and Robert Sabourin and Luiz S. Oliveira and Catolica Parana},
+    title = {Supervised Learning of Fuzzy ARTMAP Neural Networks Through Particle Swarm Optimization},
+    journal = jprr,
+    year = {2007},
+    volume = "2",
+    number = "1",
+    pages = "27-60",
+}
+
+@inproceedings{SnowEtAl2008,
+    author = {Snow, R. and O'Connor, B. and Jurafsky, D. and Ng, A.},
+    booktitle = {Proc. Empirical Methods in NLP},
+    pages = {254--263},
+    title = {Cheap and Fast -- But is it Good? Evaluating Non-Expert Annotations for Natural Language Tasks},
+    year = {2008}
+}
+
+@TECHREPORT{Garris94+al-1994,
+    author = {Michael D. Garris and James L. Blue and Gerald T. Candela and Gerald T. C and Darrin L. Dimmick and Jon Geist and Patrick J. Grother and Stanley A. Janet and Charles L. Wilson},
+    title = {NIST Form-Based Handprint Recognition System},
+    institution = {Technical Report NISTIR 5469 and CD-ROM, National Institute of Standards and Technology},
+    year = {1994},
+    doi = {10.1.1.45.1560},
+}
+
+@inproceedings{SorokinAndForsyth2008,
+    author = {Sorokin, A. and Forsyth, D.},
+    booktitle = {CVPR Workshops},
+    pages = {1--8},
+    title = {Utility data annotation with Amazon Mechanical Turk},
+    year = {2008}
+}
+
+@inproceedings{Grother-1995,
+        AUTHOR = "Grother, P.J.",
+        TITLE = "Handprinted Forms and Character Database, {NIST} Special Database 19",
+        BOOKTITLE = "National Institute of Standards and Technology (NIST) Intelligent Systems Division (NISTIR)",
+        YEAR = "1995",
+        BIBSOURCE = "http://www.visionbib.com/bibliography/char1015.html#TT105853"}
+}
+
+@inproceedings{ whitehill09,
+ title = {Whose Vote Should Count More: Optimal Integration of Labels from Labelers of Unknown Expertise},
+ author = {J. Whitehill and P. Ruvolo and T. Wu and J. Bergsma and J. Movellan},
+ booktitle = {NIPS 22},
+ pages = {2035--2043},
+ year = 2009
+}
+
+@techreport{ift6266-tr-anonymous,
+ author = "Anonymous authors",
+ title = "Anonymous title",
+ institution = "University X.",
+ year = 2010,
+}