view writeup/mlj_submission/ift6266_ml.bib @ 632:5541056d3fb0

merge
author Yoshua Bengio <bengioy@iro.umontreal.ca>
date Sat, 19 Mar 2011 22:49:33 -0400
parents b1be957dd1be
children
line wrap: on
line source

%%WARNING: READ THE README FILE BEFORE ANY MODIFICATION!!!


%%submitted papers
%%%

@Article{Bergstra+Bengio+Louradoj-2008sub,
  author =       "J. Bergstra and Y. Bengio and J. Louradour",
  title =        "Suitability of Complex Cell Models for Object Categorization",
  journal =      "Computational Neuroscience",
  year =         "2008",
  note =         "Rejected."
}
@Article{Bergstra+Bengio+Louradoj-2009sub,
  author =       "J. Bergstra and Y. Bengio and J. Louradour",
  title =        "Suitability of Complex Cell Models for Object Categorization",
  journal =      "Neural Computation",
  year =         "2009",
  note =         "Submitted."
}
@Article{Chapados+Bengio-2008sub,
  author =       "N. Chapados and Y. Bengio",
  title =        "Forecasting and Trading Commodity Contract Spreads with {G}aussian Processes",
  journal =      "International Journal of Forecasting",
  year =         "2008",
  note = "Submitted.",
}
@Article{Chapados+Bengio-2008sub2,
  author =       "N. Chapados and Y. Bengio",
  title =        "Training Graphs of Learning Modules for Sequential Data",
  journal =      "ACM Transactions on Knowledge Discovery from Data",
  year =         "2008",
  note = "Submitted.",
}

%%%
%%accepted or published papers
%%%

@Article{Grother,
  author = "Grother Patrick J.",
  title = "NIST special database. Handprinted forms and characters database",
  publisher = "National institute of standards and technology",
  year = "1995"
}

@InCollection{Trentin+al-2002,
  author =       "E. Trentin and F. Brugnara and Y. Bengio and C. Furlanello and R.  De Mori",
  editor =       "R. Daniloff",
  booktitle =    "Connectionist Approaches to Clinical Problems in Speech
and Language",
  title =        "Statistical and Neural Network Models for Speech Recognition",
  publisher =    "Lawrence Erlbaum",
  pages =        "213--264",
  year =         "2002",
}

@InCollection{Bengio+grandvalet-2004,
  author =       "Y. Bengio and Y. Grandvalet",
  editor =       "P. Duchesne and B. Remillard",
  booktitle =    "Statistical Modeling and Analysis for Complex Data Problem",
  title =        "Bias in Estimating the Variance of K-Fold Cross-Validation",
  publisher =    "Lawrence Erlbaum",
  address =      "Kluwer",
  pages =        "75--95",
  year =         "2004",
}

@InCollection{Dugas+al-2004,
  author =       "C. Dugas and Y. Bengio and N. Chapados and P. Vincent and G. Denoncourt and C. Fournier",
  editor =       "L. Jain and A.F. Shapiro",
  booktitle =    "Intelligent and Other Computational Techniques in Insurance: Theory and
Applications",
  title =        "Statistical Learning Algorithms Applied to Automobile Insurance Ratemaking",
  publisher =    "World Scientific Publishing Company",
  year =         "2004",
}

@InCollection{Dugas+al-2004-short,
  author =       "C. Dugas and Y. Bengio and N. Chapados and P. Vincent and G. Denoncourt and C. Fournier",
  booktitle =    "Intelligent and Other Computational Techniques in Insurance: Theory and
Applications",
  title =        "Statistical Learning Algorithms Applied to Automobile Insurance Ratemaking",
  publisher =    "World Scientific Publishing Company",
  year =         "2004",
}

@inproceedings{Collobert+Bengio+Bengio-2002b,
    author = "R. Collobert and Y. Bengio and S. Bengio",
    title = {Scaling Large Learning Problems with Hard Parallel Mixtures},
    editor = "S.W. Lee and A. Verri",
    year = 2002,
    booktitle = SVM02,
    volume = "2388 of Lecture Notes in Computer Science",
    publisher = "Springer-Verlag",
    pages = "8--23",
}

@Article{Collobert+Bengio+Bengio-2003,
  author =       "R. Collobert and Y. Bengio and S. Bengio.",
  title =        "Scaling Large Learning Problems with Hard Parallel Mixtures",
  journal =      ijprai,
  volume =       "17",
  number =       "3",
  pages =        "349--365",
  year =         "2003",
}

@Article{Collobert+Bengio+Bengio-2003-small,
  author =       "R. Collobert and Y. Bengio and S. Bengio.",
  title =        "Scaling Large Learning Problems with Hard Parallel Mixtures",
  journal =      "Int. J. Pattern Recognition and Artificial Intelligence",
  volume =       "17(3)",
  pages =        "349--365",
  year =         "2003",
}

@InProceedings{Bengio+Chapados-2002,
  author =       "Y. Bengio and N. Chapados",
  title =        "Metric-based Model Selection for Time-Series Forecasting",
  publisher =    "IEEE Press",
  editor =       NIPS12ed,
  booktitle =    NIPS12,
  year =         "2002",
  pages = "13--24",
}

@InProceedings{Bengio+Takeuchi+Kanamori-2002,
  author =       "Y. Bengio and I. Takeuchi and K. Kanamori",
  title =        "The Challenge of Non-Linear Regression on Large Datasets with Asymmetric Heavy Tails",
  publisher =    "American Statistical Association publ.",
  booktitle =    JSM02,
  year =         "2002",
  pages = "193-205"
}

@InProceedings{Bengio+Takeuchi+Kanamori-2002-short,
  author =       "Y. Bengio and I. Takeuchi and K. Kanamori",
  title =        "The Challenge of Non-Linear Regression on Large Datasets with Asymmetric Heavy Tails",
  booktitle =    JSM02,
  year =         "2002",
}

@InProceedings{Collobert+Bengio+Bengio-2002,
  author =       "R. Collobert ans S. Bengio and Y. Bengio",
  title =        "A Parallel Mixture of {SVM}s for Very Large Scale Problems",
  booktitle =    NIPS14,
  editor =       NIPS14ed,
  pages =        "633--640",
  year =         "2002",
}

@InProceedings{Bhattacharya+Getoor+Bengio-2004,
  author =       "I. Bhattacharya and L. Getoor and Y. Bengio",
  booktitle =    "Conference of the Association for Computational Linguistics (ACL'04)",
  title =        "Unsupervised Sense Disambiguation Using Bilingual Probabilistic Models",
  year =         "2004",
}
@InProceedings{Boufaden+Bengio+Lapalme-2008,
  author =       "N. Boufaden and Y. Bengio and G. Lapalme",
  booktitle =    "{\em TALN'2004}, Traitement Automatique du Langage Naturel.",
  title =        "Approche statistique pour le repérage de mots informatifs dans les textes oraux",
  year =         "2004",
}
@InProceedings{Chapados+Bengio-2006,
  author =       "N. Chapados and Y. Bengio",
  booktitle =    AI06,
  title =        "The K Best-Paths Approach to Approximate Dynamic Programming with Application to Portfolio Optimization",
  pages =        "491-502",
  year =         "2006",
}
@InProceedings{Rivest+Bengio+Kalaska-2005,
  author =       "F. Rivest and Y. Bengio and J. Kalaska",
  editor =       NIPS17ed,
  booktitle =    NIPS17,
  title =        "Brain Inspired Reinforcement Learning",
  publisher =    "MIT Press, Cambridge",
  address =      "Cambridge, MA",
  pages =        "1129-1136",
  year =         "2005",
}

@InProceedings{Bengio+Grandvalet-NIPS-2004,
  author =       "Y. Bengio Y. and Y. Grandvalet",
  editor =       NIPS16ed,
  booktitle =    NIPS16,
  title =        "No Unbiased Estimator of the Variance of K-Fold Cross-Validation",
  publisher =    "MIT Press, Cambridge",
  address =      "Cambridge, MA",
  year =         "2004",
}

@InProceedings{Bengio+Grandvalet-NIPS-2004-short,
  author =       "Y. Bengio Y. and Y. Grandvalet",
  booktitle =    NIPS16,
  title =        "No Unbiased Estimator of the Variance of K-Fold Cross-Validation",
  publisher =    "MIT Press, Cambridge",
  year =         "2004",
}

@article{Zaccaro-et-al-2005,
 author = {Maria Clara Zaccaro and Hong Boon Lee and Mookda Pattarawarapan and 
           Zebin Xia and Antoine Caron and Pierre-Jean L'Heureux and Yoshua Bengio
           and Kevin Burgess and H. Uri Saragovi},
 title = {Selective Small Molecule Peptidomimetic Ligands of {TrkC} and {TrkA} Receptors Afford Discrete or Complete Neurotrophic Activities},
 journal = {Chemistry \& Biology},
 volume = 12,
 number = 9,
 pages = {1015--1028},
 year = 2005,
}

@Article{63a:man,
  author =       "B. Mandelbrot",
  title =        "The variation of certain speculative prices",
  journal =      "Journal of Business",
  volume =       "36",
  pages =        "394--419",
  year =         "1963",
  annote =       "Référence pour les distributions stables en finance",
}

@Article{65a:fam,
  author =       "E. F. Fama",
  title =        "The behavior of stock market prices",
  journal =      "Journal of Business",
  volume =       "38",
  pages =        "34--105",
  year =         "1965",
  annote =       "Autre référence pour les distributions stables en
                 finance",
}

@Article{96a:cor:gon:har,
  author =       "R. M. Corless and G. H. Gonnet and D. E. G. Hare and
                 D. J. Jeffrey and D. E. Knuth",
  title =        "On the {Lambert} {W} Function",
  journal =      "Advances in Computational Mathematics",
  volume =       "5",
  pages =        "329--359",
  year =         "1996",
  annote =       "Sert à résoudre les équations où une variable et son
                 logarithme (ou exponentielle) apparaissent
                 simultanément",
}

@Book{97b:emb:klu:mik,
  author =       "P. Embrechts and C. Kluppelberg and T. Mikosch",
  title =        "Modelling Extremal Events",
  publisher =    "Springer",
  year =         "1997",
  series =       "Applications of Mathematics, Stochastic Modelling and
                 Applied Probability",
  annote =       "book on evt: theory, statistical methods for gev",
}

@Article{99a:kan:ser,
  author =       "S. Kang and R. F. Serfozo",
  title =        "Extreme values of phase-type and mixed random
                 variables with parallel-processing examples",
  journal =      "Journal of Applied Probability",
  volume =       "36",
  pages =        "194--210",
  year =         "1999",
  annote =       "limiting distribution of the maximum of r.v. i.i.d
                 from a mixture is determined by the component of the
                 mixture that has a dominant tail",
}

@TechReport{Abdallah+Plumbley-06,
  author =       "Samer Abdallah and Mark Plumbley",
  title =        "Geometry Dependency Analysis",
  number =       "C4DM-TR06-05",
  institution =  "Center for Digital Music, Queen Mary, University of
                 London",
  year =         "2006",
}

@Article{Abe+Warmuth92,
  author =       "N. Abe and M. K. Warmuth",
  title =        "On the Computational Complexity of Approximating
                 Distributions by Probabilistic Automata",
  journal =      "Machine Learning",
  volume =       "9",
  month =        jul,
  year =         "1992",
}

@Article{Abu-Mostafa-hints,
  author =       "Y. S. Abu-Mostafa",
  title =        "Learning from Hints in Neural Networks",
  journal =      jcomp,
  volume =       "6",
  pages =        "192--198",
  year =         "1990",
}

@Article{Abu-Mostafa87,
  author =       "Y. S. Abu-Mostafa and D. Psaltis",
  title =        "Optical Neural Computers",
  journal =      sciam,
  volume =       "256",
  pages =        "88--95",
  month =        mar,
  year =         "1987",
}

@Article{Abu-Mostafa89,
  author =       "Y. S. Abu-Mostafa",
  title =        "The {Vapnik}-{Chervonenkis} Dimension: Information
                 versus Complexity in Learning",
  journal =      nc,
  volume =       "1",
  pages =        "312--317",
  year =         "1989",
}

@Article{abumostafa95,
  author =       "Yaser S. Abu-Mostafa",
  title =        "Hints",
  journal =      "Neural Computation",
  volume =       "7",
  number =       "4",
  pages =        "639--671",
  month =        jul,
  year =         "1995",
}

@misc{Ackerman+BenDavid-2008,
    author = "Margareta Ackerman and Shai Ben-David",
    title = "Clustering Quality Measures",
    year = 2008,
    note = "{\em Snowbird Learning Workshop}",
}

@Article{Ackley85,
  author =       "D. H. Ackley and G. E. Hinton and T. J. Sejnowski",
  title =        "A Learning Algorithm for {Boltzmann} Machines",
  journal =      cogsci,
  volume =       "9",
  pages =        "147--169",
  year =         "1985",
}

@InProceedings{Ackley90,
  author =       "D. H. Ackley and M. S. Littman",
  editor =       NIPS2ed,
  booktitle =    NIPS2,
  title =        "Generalization and Scaling in Reinforcement Learning",
  publisher =    "Morgan Kaufmann, San Mateo",
  address =      "Denver, CO",
  pages =        "550--557",
  year =         "1990",
}

@Article{ACM:Rohwer94,
  author =       "R. Rohwer",
  title =        "The time dimension of neural network models",
  journal =      "ACM Sigart Bulleting",
  volume =       "5",
  number =       "3",
  pages =        "36--44",
  month =        jul,
  year =         "1994",
}

@article{AdelsonBergen1985,
    author={E. H. Adelson and J. R. Bergen},
    title={Spatiotemporal Energy Models for the Perception of Motion},
    journal={Journal of the Optical Society of America},
    volume=2,
    number=2,
    year=1985,
    pages={284-99},
}

@Article{Agrawala70,
  author = 	 {Ashok Kumar Agrawala},
  title = 	 {Learning with a Probabilistic Teacher},
  journal = 	 {IEEE Transactions on Information Theory},
  year = 	 1970,
  volume =	 16,
  pages =	 {373-379}
}

@Article{Ahalt90,
  author =       "S. C. Ahalt and A. K. Krishnamurthy and P. Chen and D.
                 E. Melton",
  title =        "Competitive Learning Algorithms for Vector
                 Quantization",
  journal =      nn,
  volume =       "3",
  pages =        "277--290",
  year =         "1990",
}

@InProceedings{Ahmad93,
  author =       "S. Ahmad and V. Tresp",
  editor =       NIPS5ed,
  booktitle =    NIPS5,
  title =        "Some Solutions to the Missing Feature Problem in
                 Vision",
  publisher =    "Morgan Kaufman Publishers",
  address =      "San Mateo, CA",
  year =         "1993",
}

@inproceedings{Ahmed2008,
 author = {Amr Ahmed and Kai Yu and Wei Xu and Yihong Gong and Eric P. Xing},
 booktitle = {Proceedings of the 10th European Conference on Computer Vision (ECCV'08)},
 title = {Training Hierarchical Feed-forward Visual Recognition Models Using Transfer Learning from Pseudo Tasks},
 year = 2008,
 pages = "69--82",
}

@article{AitchisonJ1976,
	author = {John Aitchison and Colin Aitken},
	journal = {Biometrika},
	number = {3},
	pages = {413--420},
	title = {Multivariate binary discrimination by the kernel method},
	volume = {63},
	year = {1976}
}

@Article{Aizerman64,
  author =       "Mark A. Aizerman and Emmanuel M. Braverman and Lev I.
                 Rozonoer",
  title =        "Theoretical Foundations of the Potential Function
                 Method in Pattern Recognition Learning",
  journal =      "Automation and Remote Control",
  volume =       "25",
  pages =        "821--837",
  year =         "1964",
}

@Article{Ajtai83,
  author =       "Miklos Ajtai",
  title =        "$\sum_1^1$-formulae on finite structures",
  journal =      "Annals of Pure and Applied Logic",
  volume =       "24",
  number =      "1",
  pages =        "1--48",
  year =         "1983",
}

@Article{Akaike74,
  author =       "H. Akaike",
  title =        "A New Look at the Statistical Model Identification",
  journal =      ieeeac,
  volume =       "AC-19",
  number =       "6",
  pages =        "716--728",
  year =         "1974",
}

@Article{Al-Mashouq-hints,
  author =       "K. A. Al-Mashouq and I. S. Reed",
  title =        "Including Hints in Training Neural Nets",
  journal =      nc,
  volume =       "3",
  number =       "4",
  pages =        "418--430",
  year =         "1991",
}

@Book{Aleksander:90,
  author =       "I. Aleksander and H. Morton",
  title =        "An Introduction to Neural Computing",
  publisher =    "Chapman and Hall",
  address =      "London",
  year =         "1990",
  keywords =     "",
}

@InProceedings{Aleksander:93,
  author =       "I. Aleksander and H. Morton",
  editor =       "J. Mira and J. Cabestany and A. Prieto",
  booktitle =    "New Trends in Neural Computation: Proc. of the
                 International Workshop on Artificial Neural Networks
                 IWANN'93",
  title =        "A Neural State Machine for Iconic Language
                 Representation",
  publisher =    "Springer",
  address =      "Berlin, Heidelberg",
  pages =        "84--89",
  year =         "1993",
  keywords =     "",
}

@InProceedings{Allender96,
  author =       "Eric Allender",
  booktitle =    "16th Annual Conference on Foundations of Software
                 Technology and Theoretical Computer Science",
  title =        "Circuit Complexity Before the Dawn of the New
                 Millennium",
  publisher =    "Lecture Notes in Computer Science 1180, Springer
                 Verlag",
  pages =        "1--18",
  year =         "1996",
}

@InProceedings{Alleva93,
  author =       "F. Alleva and X. Huang and M. Y. Hwang",
  booktitle =    icassp,
  title =        "An improved search algorithm using incremental
                 knowledge for continuous speech recognition",
  address =      "Minneapolis, Minnesota",
  pages =        "307--310",
  year =         "1993",
}

@Book{Allgower80,
  author =       "E. L. Allgower and K. Georg",
  title =        "Numerical Continuation Methods. {A}n Introduction",
  number =       "13",
  publisher =    "Springer-Verlag",
  year =         "1980",
  series =       "Springer Series in Computational Mathematics",
}

@Book{Allgower80-short,
  author =       "E. L. Allgower and K. Georg",
  title =        "Numerical Continuation Methods. {A}n Introduction",
  publisher =    "Springer-Verlag",
  year =         "1980",
}

@InProceedings{Almeida87,
  author =       "L. B. Almeida",
  editor =       "M. Caudill and C. Butler",
  booktitle =    icnn,
  title =        "A Learning Rule for Asynchronous Perceptrons with
                 Feedback in a Combinatorial Environment",
  volume =       "2",
  publisher =    "IEEE, New York",
  address =      "San Diego 1987",
  pages =        "609--618",
  year =         "1987",
}

@InProceedings{Almeida88,
  author =       "L. B. Almeida",
  editor =       "R. Eckmiller and Ch. von der Malsburg",
  booktitle =    "Neural Computers",
  title =        "Backpropagation in Perceptrons with Feedback",
  publisher =    "Springer-Verlag, Berlin",
  address =      "Neuss 1987",
  pages =        "199--208",
  year =         "1988",
}

@inproceedings{Almuallim+Dietterich-1991,
    address = {Anaheim, California},
    author = {Almuallim, H.  and Dietterich, T. G.},
    booktitle = {Proceedings of the Ninth National Conference on Artificial Intelligence},
    pages = {547--552},
    publisher = {AAAI Press},
    title = {Learning with many irrelevant features},
    url = "http://citeseer.ist.psu.edu/almuallim91learning.html",
    volume = {2},
    year = {1991}
}

@article{Almuallim+Dietterich-1994,
    author = "Hussein Almuallim and Thomas G. Dietterich",
    title = "Learning Boolean Concepts in the Presence of Many Irrelevant Features",
    journal = "Artificial Intelligence",
    volume = "69",
    number = "1-2",
    pages = "279-305",
    year = "1994",
    url = "citeseer.ist.psu.edu/almuallim94learning.html"
}


@InProceedings{Alspector87,
  author =       "J. Alspector and R. B. Allen",
  editor =       "P. Losleben",
  booktitle =    "Advanced Research in VLSI: Proceedings of the 1987
                 Stanford Conference",
  title =        "A Neuromorphic {VLSI} Learning System",
  publisher =    "MIT Press, Cambridge",
  pages =        "313--349",
  year =         "1987",
}

@InProceedings{Alspector88,
  author =       "J. Alspector and R. B. Allen and V. Hu and S.
                 Satyanarayana",
  editor =       nips87ed,
  booktitle =    nips87,
  title =        "Stochastic Learning Networks and Their Electronic
                 Implementation",
  publisher =    "American Institute of Physics, New York",
  address =      "Denver, CO",
  pages =        "9--21",
  year =         "1988",
}

@Article{Amari+Wu-99,
  author =       "S. Amari and S. Wu",
  title =        "Improving {Support} {Vector} {Machine} classifiers by
                 modifying kernel functions",
  journal =      "Neural Networks",
  volume =       "12",
  pages =        "783--789",
  year =         "1999",
}

@Article{amari00adaptive,
  author =       "{Shun-ichi} Amari and Hyeyoung Park and Kenji Fukumizu",
  title =        "Adaptive Method of Realizing Natural Gradient Learning
                 for Multilayer Perceptrons",
  journal =      "Neural Computation",
  volume =       "12",
  number =       "6",
  pages =        "1399--1409",
  year =         "2000",
  URL =          "citeseer.ist.psu.edu/amari98adaptive.html",
}

@Article{Amari77,
  author =       "S. A. Amari",
  title =        "Dynamics of Pattern Formation in Lateral-Inhibition
                 Type Neural Fields",
  journal =      biocyb,
  volume =       "27",
  pages =        "77--87",
  year =         "1977",
}

@Article{Amari80,
  author =       "S. A. Amari",
  title =        "Topographic Organization of Nerve Fields",
  journal =      bmbiol,
  volume =       "42",
  pages =        "339--364",
  year =         "1980",
}

@Article{amari98natural,
  author =       "{Shun-ichi} Amari",
  title =        "Natural Gradient Works Efficiently in Learning",
  journal =      "Neural Computation",
  volume =       "10",
  number =       "2",
  pages =        "251--276",
  year =         "1998",
  URL =          "citeseer.ist.psu.edu/article/amari98natural.html",
}

@Article{Amari99,
  author =       "S. Amari and S. Wu",
  title =        "Improving Support Vector Machine Classifiers by
                 Modifying Kernel Functions",
  journal =      "Neural Networks",
  volume =       "12",
  number =       "6",
  pages =        "783--789",
  year =         "1999",
}

@article{AmariS1997,
	author = {{Shun-ichi} Amari and Noboru Murata and Klaus-Robert M{\"u}ller and Michael Finke  and Howard Hua Yang },
	journal = {IEEE Transactions on Neural Networks},
	keywords = {regularization},
	number = {5},
	pages = {985--996},
	title = {Asymptotic statistical theory of overtraining and cross-validation},
	volume = {8},
	year = {1997}
}

@InProceedings{amaya01improvement,
  author =       "Fredy A. Amaya and Jose-Miguel Bened\`{i}",
  booktitle =    "Meeting of the Association for Computational
                 Linguistics",
  title =        "Improvement of a Whole Sentence Maximum Entropy
                 Language Model Using Grammatical Features",
  pages =        "10--17",
  year =         "2001",
  URL =          "citeseer.nj.nec.com/505752.html",
}

@InProceedings{BoufadenLapalmeBengio2001,
  author =       "N. Boufaden and Lapalme G. and Bengio Y.",
  booktitle =    "Proceedings of the Natural Language Pacific Rim Symposium, NLPRS-01",
  title =        "Topic segmentation: First Stage of Dialogue-Based Information extraction Process",
  year =         "2001",
}

@Article{Amit85a,
  author =       "D. Amit and H. Gutfreund and H. Sompolinsky",
  title =        "Spin-Glass Models of Neural Networks",
  journal =      prA,
  volume =       "32",
  pages =        "1007--1018",
  year =         "1985",
}

@Article{Amit85b,
  author =       "D. Amit and H. Gutfreund and H. Sompolinsky",
  title =        "Storing Infinite Numbers of Patterns in a Spin-Glass
                 Model of Neural Networks",
  journal =      prl,
  volume =       "55",
  pages =        "1530--1533",
  year =         "1985",
}

@Article{Amit87a,
  author =       "D. Amit and H. Gutfreund and H. Sompolinsky",
  title =        "Statistical Mechanics of Neural Networks Near
                 Saturation",
  journal =      annphys,
  volume =       "173",
  pages =        "30--67",
  year =         "1987",
}

@Article{Amit87b,
  author =       "D. Amit and H. Gutfreund and H. Sompolinsky",
  title =        "Information Storage in Neural Networks with Low Levels
                 of Activity",
  journal =      prA,
  volume =       "35",
  pages =        "2293--2303",
  year =         "1987",
}

@Article{Amit88,
  author =       "D. Amit",
  title =        "Neural Networks for Counting Chimes",
  journal =      PNAS,
  volume =       "85",
  pages =        "2141--2145",
  year =         "1988",
}

@Book{Amit89,
  author =       "D. Amit",
  title =        "Modelling Brain Function",
  publisher =    "Cambridge University Press",
  address =      "Cambridge",
  year =         "1989",
}

@Article{Ammar+Miao-2000,
  author =       "Hany H. Ammar and Zhouhui Miao",
  title =        "Parallel Algorithms for the Training Process of a
                 Neural Network-Based System",
  journal =      "International Journal of High Performance Computing
                 Applications",
  volume =       "14",
  number =       "1",
  pages =        "3--25",
  year =         "2000",
  URL =          "http://hpc.sagepub.com/cgi/content/abstract/14/1/3",
  doi =          "10.1177/109434200001400101",
  eprint =       "http://hpc.sagepub.com/cgi/reprint/14/1/3.pdf",
}

@Book{Anderson,
  author =       "T. Anderson",
  title =        "An Introduction to Multivariate Statistical
                 Analysis.",
  publisher =    "John Wiley and Sons",
  address =      "New York",
  year =         "1984",
}

@Article{Anderson68,
  author =       "J. A. Anderson",
  title =        "A Memory Model Using Spatial Correlation Functions",
  journal =      kyb,
  volume =       "5",
  pages =        "113--119",
  year =         "1968",
}

@Article{Anderson70,
  author =       "J. A. Anderson",
  title =        "Two Models for Memory Organization",
  journal =      mbio,
  volume =       "8",
  pages =        "137--160",
  year =         "1970",
}

@book{Hinton+Anderson-81,
 author = {G.E. Hinton and J.A. Anderson},
 title = {Parallel models of associative memory},
 publisher = {Lawrence Erlbaum Assoc.},
 address = {Hillsdale, NJ},
 year = 1981,
}

@InCollection{Anderson81,
  author =       "J. A. Anderson and M. C. Mozer",
  editor =       "G. E. Hinton and J. A. Anderson",
  booktitle =    "Parallel Models of Associative Memory",
  title =        "Categorization and Selective Neurons",
  publisher =    "Lawrence Erlbaum",
  address =      "Hillsdale",
  pages =        "213--236",
  year =         "1981",
}

@Article{Anderson86,
  author =       "D. Z. Anderson",
  title =        "Coherent Optical Eigenstate Memory",
  journal =      optlett,
  volume =       "11",
  pages =        "56--58",
  year =         "1986",
}

@Article{Anderson87,
  author =       "C. H. Anderson and D. C. Van Essen",
  title =        "Shifter Circuits: {A} Computational Strategy for
                 Dynamic Aspects of Visual Processing",
  journal =      PNAS,
  volume =       "84",
  pages =        "6297--6301",
  year =         "1987",
}

@Book{Anderson88,
  editor =       "J. A. Anderson and E. Rosenfeld",
  title =        "Neurocomputing: Foundations of Research",
  publisher =    "MIT Press",
  address =      "Cambridge",
  year =         "1988",
}

@InProceedings{Anderson89,
  author =       "S. Anderson and J. W. L. Merrill and R. Port",
  editor =       "D. Touretzky and G. Hinton and T. Sejnowski",
  booktitle =    cmss88,
  title =        "Dynamic Speech Categorization with Recurrent
                 Networks",
  publisher =    "Morgan Kaufmann, San Mateo",
  address =      "Pittsburg 1988",
  pages =        "398--406",
  year =         "1989",
}

@Article{Ando+Zhange-JMLR-2005,
  author =       "Rie Kubota Ando and Tong Zhang",
  title =        "A Framework for Learning Predictive Structures from
                 Multiple Tasks and Unlabeled Data",
  journal =      jmlr,
  volume =       "6",
  pages =        "1817--1853",
  year =         "2005",
}

@Article{Andrieu03,
  author =       "Christophe Andrieu and Nando de Freitas and Arnaud
                 Doucet and Michael I. Jordan",
  title =        "An Introduction to {MCMC} for Machine Learning",
  journal =      "Machine Learning",
  volume =       "50",
  number =       "1-2",
  pages =        "5--43",
  year =         "2003",
}

@Article{Andrieu2003,
  author =       "C. Andrieu and N. de Freitas and A. Doucet and M.
                 Jordan",
  title =        "An introduction to {MCMC} for machine learning",
  journal =      "Machine Learning",
  volume =       "50",
  pages =        "5--43",
  year =         "2003",
}

@Article{Angeniol88,
  author =       "B. Ang\'eniol and G. de La Croix Vaubois and J.-Y. Le
                 Texier",
  title =        "Self-Organizing Feature Maps and the Travelling
                 Salesman Problem",
  journal =      nn,
  volume =       "1",
  pages =        "289--293",
  year =         "1988",
}

@Article{Angluin83,
  author =       "D. Angluin and C. Smith",
  title =        "Inductive Inference: Theory and Methods",
  journal =      "Computing Surveys",
  volume =       "15",
  number =       "3",
  pages =        "237--269",
  year =         "1983",
}

@Book{Arbib87,
  author =       "M. A. Arbib",
  title =        "Brains, Machines, and Mathematics",
  publisher =    "Springer-Verlag",
  address =      "Berlin",
  year =         "1987",
}

@Book{ARP94,
  author =       "{Advanced Research Projects Agency}",
  title =        "Proceedings of the 1994 {ARPA} Human Language
                 Technology Workshop (Princeton, New Jersey, March
                 1994)",
  publisher =    "Morgan Kaufmann",
  year =         "1994",
}

@Misc{Asuncion+Newman:2007,
  author =       "A. Asuncion and D. J. Newman",
  title =        "{UCI} Machine Learning Repository",
  institution =  "University of California, Irvine, School of
                 Information and Computer Sciences",
  year =         "2007",
  URL =          "http://www.ics.uci.edu/$\sim$mlearn/MLRepository.html",
}

@article{ashetal04,
author = "Ash, J. and Berg, M. and Coiera, E.",
title = "Some unintended consequences of 
information technology in health care: the nature of patient care 
information system-related errors",
journal = "J Am Med Inform Assoc",
volume = "11",
number = 2,
pages = "104-112",
year = 2004,
}

@article{ashetal07,
author = "Ash, J. and Sittig, D. and Dykstra, R. and Guappone, K. and 
Carpenter, J. and Seshadri, V.",
title = "Categorizing the unintended sociotechnical consequences of 
computerized provider order entry",
journal = "Int J Med Inform",
volume = 76,
number = "Suppl1",
pages = "21-27",
year = 2007,
}

@InProceedings{Atal83,
  author =       "B. S. Atal",
  booktitle =    icassp,
  title =        "Efficient coding of {LPC} parameters by temporal
                 decomposition",
  address =      "Boston, MA",
  pages =        "81--84",
  year =         "1983",
}

@PhdThesis{Athaide95,
  author =       "C. R. Athaide",
  title =        "Likelihood estimation and state estimation for
                 nonlinear state space models",
  school =       "Graduate Group in Managerial Science and Applied
                 Economics, University of Pennsylvania",
  address =      "Philadelphia, PA",
  year =         "1995",
}

@Book{Atherton-75,
  author =       "D. P. Atherton",
  title =        "Nonlinear Control Engineering",
  publisher =    "Van Nostrand Reinhold",
  address =      "Wokingam (England)",
  year =         "1975",
}

@Article{atkeson96locally,
  author =       "C. G. Atkeson and A. W. Moore and S. Schaal",
  title =        "Locally Weighted Learning for Control",
  journal =      "Artificial Intelligence Review",
  volume =       "11",
  pages =        "75--113",
  year =         "1997",
}

@InProceedings{Aubert94,
  author =       "X. Aubert and C. Dugast and H. Ney and V. Steinbiss",
  booktitle =    icassp,
  title =        "Large vocabulary continuous speech recognition of
                 {Wall} {Street} journal data",
  address =      "Adelaide, Australia",
  pages =        "129--132",
  year =         "1994",
}

@InProceedings{Auer-96,
  author =       "Peter Auer and Mark Herbster and Manfred K. Warmuth",
  editor =       NIPS8ed,
  booktitle =    NIPS8,
  title =        "Exponentially Many Local Minima for Single Neurons",
  publisher =    "MIT Press, Cambridge, MA",
  pages =        "315--322",
  year =         "1996",
}

@InProceedings{auer97,
  author =       "Peter Auer",
  booktitle =    "Proc. 14th International Conference on Machine
                 Learning",
  title =        "On learning from multi-instance examples: Empirical
                 evaluation of a theoretical approach",
  publisher =    "Morgan Kaufmann",
  pages =        "21--29",
  year =         "1997",
}

@InProceedings{b-cdmvqfa-97,
  author =       "Jonathan Baxter",
  booktitle =    "Proc. 14th International Conference on Machine
                 Learning",
  title =        "The canonical distortion measure for vector
                 quantization and function approximation",
  publisher =    "Morgan Kaufmann",
  pages =        "39--47",
  year =         "1997",
}

@InCollection{Bach-2007,
  author =       "Francis Bach",
  editor =       NIPS19ed,
  booktitle =    NIPS19,
  title =        "Active learning for misspecified generalized linear
                 models",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  pages =        "",
  year =         "2007",
}

@Article{Bachmann87,
  author =       "C. M. Bachmann and L. N. Cooper and A. Dembo and O.
                 Zeitouni",
  title =        "A Relaxation Model for Memory with High Storage
                 Density",
  journal =      PNAS,
  volume =       "84",
  pages =        "7529--7531",
  year =         "1987",
}

@MastersThesis{Bachrach88,
  author =       "J. Bachrach",
  title =        "Learning to Represent State",
  school =       "University of Massachusetts",
  address =      "Amherst",
  year =         "1988",
}

@Article{Back-nc91,
  author =       "A. D. Back and A. C. Tsoi",
  title =        "{FIR} and {IIR} Synapses: {A} New Neural Network
                 Architecture for Time Series Modeling",
  journal =      nc,
  volume =       "3",
  number =       "3",
  pages =        "375--385",
  year =         "1991",
}

@InCollection{Bahadur61,
  author =       "R. R. Bahadur",
  editor =       "H. Solomon",
  booktitle =    "Studies in Item Analysis and Predictdion",
  title =        "A representation of the joint distribution of
                 responses to n dichotomous items",
  publisher =    "Stanford University Press, California",
  pages =        "158--168",
  year =         "1961",
}

@InProceedings{bahl77,
  author =       "L. R. Bahl and J. K. Baker and R. L. Mercer",
  booktitle =    "94th Meeting of the Acoustical Society of America",
  title =        "Perplexity: a measure of difficulty of speech
                 recognition tasks",
  address =      "Miami",
  month =        dec,
  year =         "1977",
}

@Article{Bahl83,
  author =       "L. R. Bahl and F. Jelinek and R. L. Mercer",
  title =        "A Maximum Likelihood Approach to Continuous Speech
                 Recognition",
  journal =      ieeetpami,
  volume =       "5",
  number =       "2",
  pages =        "179--190",
  month =        mar,
  year =         "1983",
}

@InProceedings{Bahl86,
  author =       "Lalit Bahl and Peter Brown and Peter {deSouza} and Robert Mercer",
  booktitle =    icassp,
  title =        "Maximum mutual information estimation of hidden Markov
                 parameters for speech recognition",
  address =      "Tokyo, Japan",
  pages =        "49--52",
  year =         "1986",
}

@Article{Bahl87,
  author =       "L. R. Bahl and P. Brown and P. V. {de Souza} and R. L.
                 Mercer",
  title =        "Speech recognition with continuous-parameter hidden
                 {Markov} models",
  journal =      "Computer, Speech and Language",
  volume =       "2",
  pages =        "219--234",
  year =         "1987",
}

@InProceedings{Bahl88,
  author =       "L. R. Bahl and P. Brown and P. V. de Souza and R. L.
                 Mercer",
  booktitle =    icassp,
  title =        "Speech recognition with continuous-parameter hidden
                 {Markov} models",
  address =      "New York, NY",
  pages =        "40--43",
  year =         "1988",
}

@Article{Bailey-Simon-60,
  author =       "Robert A. Bailey and Leroy Simon",
  title =        "Two Studies in Automobile Insurance Ratemaking",
  journal =      "ASTIN Bulletin",
  volume =       "1",
  number =       "4",
  pages =        "192--217",
  year =         "1960",
}

@InCollection{Baker75,
  author =       "J. K. Baker",
  editor =       "D. R. Reddy",
  booktitle =    "Speech Recognition",
  title =        "Stochastic modeling for automatic speech
                 understanding",
  publisher =    "Academic Press",
  address =      "New York",
  pages =        "521--542",
  year =         "1975",
}

@Book{Baker77,
  author =       "C. T. H. Baker",
  title =        "The numerical treatment of integral equations",
  publisher =    "Clarendon Press",
  address =      "Oxford",
  year =         "1977",
}

@InProceedings{Baker98,
  author =       "D. Baker and A. {McCallum}",
  booktitle =    "SIGIR'98",
  title =        "Distributional Clustering of Words for Text
                 Classification",
  year =         "1998",
}

@InProceedings{baker98berkeley,
  author =       "Collin F. Baker and Charles J. Fillmore and John B.
                 Lowe",
  editor =       "Christian Boitet and Pete Whitelock",
  booktitle =    "Proceedings of the Thirty-Sixth Annual Meeting of the
                 {Association} for {Computational} {Linguistics} and
                 Seventeenth International Conference on Computational
                 Linguistics",
  title =        "The {Berkeley} {FrameNet} Project",
  publisher =    "Morgan Kaufmann Publishers",
  address =      "San Francisco, California",
  pages =        "86--90",
  year =         "1998",
}

@InProceedings{Bakis76,
  author =       "R. Bakis",
  booktitle =    "19st Meeting of the Acoustic Society of America",
  title =        "Continuous Speech Recognition via Centisecond Acoustic
                 States",
  month =        apr,
  year =         "1976",
}

@Article{bakker03,
  author =       "Bart Bakker and Tom Heskes",
  title =        "Task clustering and gating for {B}ayesian multitask
                 learning",
  journal =      jmlr,
  volume =       "4",
  publisher =    "MIT Press",
  address =      "Cambridge, MA, USA",
  pages =        "83--99",
  year =         "2003",
  ISSN =         "1533-7928",
}

@Book{Baldi-Brunak-98,
  author =       "Pierre Baldi and Soren Brunak",
  title =        "Bioinformatics, the Machine Learning Approach",
  publisher =    "MIT Press",
  year =         "1998",
}

@Article{Baldi89,
  author =       "Pierre Baldi and Kurt Hornik",
  title =        "Neural Networks and Principal Component Analysis:
                 Learning from Examples Without Local Minima",
  journal =      nn,
  volume =       "2",
  pages =        "53--58",
  year =         "1989",
}

@Article{Baldi94,
  author =       "P. Baldi and Y. Chauvin and T. Hunkapiller and M.
                 {McClure}",
  title =        "Hidden Markov models of biological primary sequence
                 information",
  journal =      "Proc. Nat. Acad. Sci. (USA)",
  volume =       "91",
  number =       "3",
  pages =        "1059--1063",
  year =         "1995",
}

@Article{Ballard81,
  author =       "D. H. Ballard",
  title =        "Generalizing the Hough Transform to Detect Arbitrary
                 Shapes",
  journal =      "Pattern Recognition",
  volume =       "13",
  number =       "2",
  pages =        "111--122",
  year =         "1981",
}

@InProceedings{Baluja97,
  author =       "S. Baluja",
  editor =       NIPS9ed,
  booktitle =    NIPS9,
  title =        "Genetic Algorithms and Explicit Search Statistics",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  pages =        "",
  year =         "1997",
}

@Article{Bar-Shalom78,
  author =       "Y. Bar-Shalom",
  title =        "Tracking methods in a multi-target environment",
  journal =      "IEEE Trans. on Aut. Control",
  volume =       "23",
  pages =        "618--626",
  year =         "1978",
}

@Book{Bar-Shalom93,
  author =       "Y. Bar-Shalom and {X.-R.} Li",
  title =        "Estimation and Tracking",
  publisher =    "Artech House",
  address =      "Boston, MA",
  year =         "1993",
}

@InProceedings{Barber+Williams-nips9,
  author =       "D. Barber and C. K. I. Williams",
  editor =       NIPS9ed,
  booktitle =    NIPS9,
  title =        "Gaussian Processes for {Bayesian} Classification via
                 Hybrid Monte Carlo",
  publisher =    "MIT Press, Cambridge, MA",
  pages =        "340--346",
  year =         "1997",
}

@InProceedings{Bareiss87,
  author =       "E. R. Bareiss and B. Porter",
  booktitle =    "Proceedings of the 4th International Workshop on
                 Machine Learning",
  title =        "Protos: An Exemplar-Based Learning Apprentice",
  publisher =    "Morgan Kaufmann",
  address =      "Irvine, CA",
  pages =        "12--23",
  year =         "1987",
}

@Article{Barhen89,
  author =       "J. Barhen and S. Gulati and M. Zak",
  title =        "Neural Learning of Constrained Nonlinear
                 Transformations",
  journal =      computer,
  pages =        "67--76",
  month =        jun,
  year =         "1989",
}

@article{Nykamp+Ringach-2002,
 author = {D.Q. Nykamp and D.L. Ringach},
 title = {Full identification of a linear-nonlinear system via cross-correlation analysis},
 journal = {Journal of Vision}, 
 volume = 2,
 number = 1, 
 pages = {1--11},
 year = 2002,
}

@article{Wilson+Cowan-72,
 author = {Hugh R. Wilson and Jack D. Cowan},
 title = {Excitatory and inhibitory interactions in localized populations of model neurons},
 journal = {Biophysiology Journal},
 volume = 12,
 pages = {1--24},
 year = 1972,
}

@Article{Barlow89,
  author =       "H. B. Barlow",
  title =        "Unsupervised Learning",
  journal =      nc,
  volume =       "1",
  pages =        "295--311",
  year =         "1989",
}

@article{Barlow-2001,
    address = {Cambridge, UK.},
    author = {H. Barlow},
    issn = {0954-898X},
    journal = {Network: Computation in Neural Systems},
    month = {August},
    number = {3},
    pages = {241--253},
    title = {Redundancy reduction revisited},
    url = {http://view.ncbi.nlm.nih.gov/pubmed/11563528},
    volume = {12},
    year = {2001},
}

@InProceedings{Barron+Barron88,
  author =       "A. R. Barron and R. L. Barron",
  editor =       "E. Wegman",
  booktitle =    "Computing Science and Statistics, Proc. 20th Symp.
                 Interface",
  title =        "Statistical learning networks: {A} unifying view",
  publisher =    "Amer. Statist. Assoc.",
  address =      "Washington, DC",
  pages =        "192--203",
  year =         "1988",
}

@InProceedings{Barron89,
  author =       "A. R. Barron",
  booktitle =    "Proc. of the 28th conf. on Decision and Control",
  title =        "Statistical properties of artificial neural networks",
  address =      "Tampa, Florida",
  pages =        "280--285",
  year =         "1989",
}

@incollection{Barron91,
  author =       "Andrew E.~Barron",
  title =        "Complexity Regularization with Application to Artificial Neural Networks",
  booktitle =      "Nonparametric Functional Estimation and Related Topics",
  pages =        "561--576",
  editor = "G.~Roussas",
  year =         "1991",
  publisher = "Kluwer Academic Publishers"
}


@Article{Bartal95,
  author =       "Jie Lin and Yair Bartal and Robert E. Uhrig",
  title =        "Nuclear Power Plant Transient Diagnostics Using
                 Artificial Neural Networks that Allow {"}don't know{"}
                 Classifications",
  journal =      "Nuclear Technology",
  volume =       "110",
  pages =        "436--449",
  month =        jun,
  year =         "1995",
}

@Article{Bartlett+Uhrig92,
  author =       "E. B. Bartlett and R. E. Uhrig",
  title =        "Nuclear Power Plant Status Diagnostics Using an
                 Artificial Neural Network",
  journal =      "Nuclear Technology",
  volume =       "97",
  month =        mar,
  year =         "1992",
}

@Article{Bartlett46,
  author =       "M. S. Bartlett",
  title =        "On the theoritical specification of sampling
                 properties of autocorrelated time series",
  journal =      "J. Royal Stat. Soc. B",
  volume =       "8",
  pages =        "27--41",
  year =         "1946",
}

@Article{Bartlett92,
  author =       "P. L. Bartlett and T. Downs",
  title =        "Using Random Weights to train Multilayer Networks of
                 Hard-Limiting Units",
  journal =      ieeetrnn,
  volume =       "3",
  number =       "2",
  pages =        "202--210",
  year =         "1992",
}

@TechReport{Barto-tr91,
  author =       "A. G. Barto and S. Bradtke and S. P. Singh",
  title =        "Real-Time Learning and {Control} Using Asynchronous
                 Dynamic Programming",
  number =       "91-57",
  institution =  "Univ. of Massachusetts (Computer Science)",
  address =      "Amherst MA",
  year =         "1991",
}

@Article{Barto81,
  author =       "A. G. Barto and R. S. Sutton and P. S. Brouwer",
  title =        "Associative Search Network: Reinforcement Learning
                 Associative Memory",
  journal =      "Biological Cybernetics",
  volume =       "40",
  year =         "1981",
}

@Article{Barto83,
  author =       "A. G. Barto and R. S. Sutton and C. W. Anderson",
  title =        "Neuronlike Adaptive Elements That Can Solve Difficult
                 Learning Control Problems",
  journal =      ieeesmc,
  volume =       "13",
  year =         "1983",
}

@Article{Barto85,
  author =       "A. G. Barto and P. Anandan",
  title =        "Pattern Recognizing Stochastic Learning Automata",
  journal =      ieeesmc,
  volume =       "15",
  pages =        "360--375",
  year =         "1985",
}

@InProceedings{Barto87,
  author =       "A. G. Barto and M. I. Jordan",
  editor =       "M. Caudill and C. Butler",
  booktitle =    icnn,
  title =        "Gradient Following Without Back-Propagation in Layered
                 Networks",
  volume =       "2",
  publisher =    "IEEE, New York",
  address =      "San Diego 1987",
  pages =        "629--636",
  year =         "1987",
}

@InCollection{Barto91,
  author =       "A. G. Barto and R. S. Sutton and C. J. C. H. Watkins",
  editor =       "M. Gabriel and J. W. Moore",
  booktitle =    "Learning and Computational Neuroscience",
  title =        "Learning and Sequential Decision Making",
  publisher =    "MIT Press",
  address =      "Cambridge",
  year =         "1991",
}

@InCollection{Barto92,
  author =       "A. G. Barto",
  editor =       "W. T Miller and R. S. Sutton and P. J. Werbos",
  booktitle =    "Neural Networks for Control",
  title =        "Connectionist learning for control: an overview",
  publisher =    "MIT Press",
  year =         "1992",
}

@TechReport{Barto_tr91,
  author =       "A. G. Barto and S. Bradtke and S. P. Singh",
  title =        "Real-Time Learning and {CO}ntrol Using Asynchronous
                 Dynamic Programming",
  number =       "91-57",
  institution =  "Univ. of Massachusetts (Computer Science)",
  address =      "Amherst MA",
  year =         "1991",
}

@Article{bassiouni95,
  author =       "M. A. Bassiouni and A. Mukherjee",
  title =        "Efficient Decoding of Compressed Data",
  journal =      "Journal of the American Society for Information
                 Science",
  volume =       "46",
  number =       "1",
  pages =        "1--8",
  year =         "1995",
}

@Article{Basu94,
  author =       "A. Basu and E. B. Bartlett",
  title =        "Detecting Faults in a Nuclear Power Plant by Using
                 Dynamic Node Architecture Artificial Neural Networks",
  journal =      "Nuclear Science and Engineering",
  volume =       "116",
  month =        apr,
  year =         "1994",
}

@Article{battiti-89,
  author =       "R. Battiti",
  title =        "Accelerated Backpropagation Learning: Two Optimization
                 Methods",
  journal =      "Complex Systems",
  volume =       "3",
  pages =        "331--342",
  year =         "1989",
}

@InProceedings{battiti-masulli-90,
  author =       "E. Battiti and F. Masulli",
  booktitle =    "Proceedings of Internationla Neural Network Conference
                 (INNC 90, Paris)",
  title =        "{BFGS} optimization for faster and automated
                 supervised learning",
  pages =        "757--760",
  year =         "1990",
}

@Article{Battiti92,
  author =       "T. Battiti",
  title =        "First- and Second-Order Methods for Learning: Between
                 Steepest Descent and {Newton}'s Method",
  journal =      "Neural Computation",
  volume =       "4",
  type =         "Review",
  number =       "2",
  pages =        "141--166",
  year =         "1992",
}

@Article{battiti:1994:ieeetnn,
  author =       "R. Battiti",
  title =        "Using Mutual Information for Selecting Features in
                 Supervised Neural Net Learning",
  journal =      "{IEEE} Transaction on Neural Networks",
  volume =       "5",
  number =       "4",
  pages =        "537--550",
  year =         "1994",
}

@article{Baudat+Anouar-2000,
    author = {G. Baudat and F. Anouar},
    title = {Generalized Discriminant Analysis Using a Kernel Approach},
    journal = {Neural Computation},
    volume = {12},
    number = {10},
    year = {2000},
    issn = {0899-7667},
    pages = {2385--2404},
    doi = {http://dx.doi.org/10.1162/089976600300014980},
    publisher = {MIT Press},
    address = {Cambridge, MA, USA},
}

@Article{Baum66,
  author =       "L. E. Baum and T. Petrie",
  title =        "Statistical Inference for Probabilistic Functions of
                 Finite State {Markov} Chains",
  journal =      "Ann. Math. Stat.",
  volume =       "37",
  pages =        "1559--1563",
  year =         "1966",
}

@Article{Baum67,
  author =       "L. E. Baum and J. Eagon",
  title =        "An inequality with applications to statistical
                 prediction for functions of {Markov} processes and to a
                 model of ecology",
  journal =      "Bull. Amer. Math. Soc.",
  volume =       "73",
  pages =        "360--363",
  year =         "1967",
}

@Article{Baum70,
  author =       "L. E. Baum and T. Petrie and G. Soules and N. Weiss",
  title =        "A maximization technique occuring in the statistical
                 analysis of probabilistic functions of {Markov}
                 chains",
  journal =      "Ann. Math. Statistic.",
  volume =       "41",
  pages =        "164--171",
  year =         "1970",
}

@Article{Baum72,
  author =       "L. E. Baum",
  title =        "An inequality and associated maximization technique in
                 statistical estimation for probabilistic functions of a
                 {Markov} process",
  journal =      "Inequalities",
  volume =       "3",
  pages =        "1--8",
  year =         "1972",
}

@InProceedings{Baum86,
  author =       "E. B. Baum",
  editor =       "J. S. Denker",
  booktitle =    snowbird,
  title =        "Towards Practical ``Neural'' Computation for
                 Combinatorial Optimization Problems",
  publisher =    "American Institute of Physics, New York",
  address =      "Snowbird 1986",
  pages =        "53--58",
  year =         "1986",
}

@InProceedings{Baum88,
  author =       "E. B. Baum and F. Wilczek",
  editor =       nips87ed,
  booktitle =    nips87,
  title =        "Supervised Learning of Probability Distributions by
                 Neural Networks",
  publisher =    "American Institute of Physics, New York",
  address =      "Denver, CO",
  pages =        "52--61",
  year =         "1988",
}

@Article{Baum89,
  author =       "E. B. Baum and D. Haussler",
  title =        "What Size Net Gives Valid Generalization?",
  journal =      nc,
  volume =       "1",
  pages =        "151--160",
  year =         "1989",
}

@Article{BaumNote,
  author =       "E. B. Baum",
  title =        "Review of {J}. {S}. {Judd}'s book {\em {Neural}
                 {Network} {Design} and the {Complexity} of
                 {Learning}}",
  journal =      ieeetrnn,
  volume =       "2",
  number =       "1",
  pages =        "181--182",
  year =         "1991",
}

@Article{baxter00,
  author =       "Jonathan Baxter",
  title =        "A Model of Inductive Bias Learning.",
  journal =      "J. Artif. Intell. Res. (JAIR)",
  volume =       "12",
  pages =        "149--198",
  year =         "2000",
}

@InProceedings{baxter95a,
  author =       "Jonathan Baxter",
  booktitle =    colt95,
  title =        "Learning Internal Representations",
  publisher =    "ACM Press",
  address =      "Santa Cruz, California",
  pages =        "311--320",
  year =         "1995",
  url =          "http://citeseer.ist.psu.edu/baxter95learning.html",
}

@Unpublished{baxter95b,
  author =       "Jonathan Baxter",
  title =        "The Canonical Metric for Vector Quantization",
  year =         "1995",
  note =         "submitted to Information and Computation",
}

@InProceedings{baxter96,
  author =       "Jonathan Baxter",
  editor =       NIPS8ed,
  booktitle =    NIPS8,
  title =        "Learning Model Bias",
  volume =       "8",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  pages =        "169--175",
  year =         "1996",
}

@Article{baxter97,
  author =       "Jonathan Baxter",
  title =        "A {Bayesian}/information theoretic model of learning via
                 multiple task sampling",
  journal =      "Machine Learning",
  volume =       "28",
  pages =        "7--40",
  year =         "1997",
}

@Article{baxter97a,
  author =       "Jonathan Baxter",
  title =        "A {Bayesian}/Information theoretic model of learning to
                 learn via multiple task sampling",
  journal =      "Machine Learning",
  volume =       "28",
  pages =        "7--40",
  year =         "1997",
}

@InProceedings{Becker89,
  author =       "S. Becker and Y. {LeCun}",
  editor =       "D. Touretzky and G. Hinton and T. Sejnowski",
  booktitle =    cmss88,
  title =        "Improving the Convergence of Back-Propagation Learning
                 with Second Order Methods",
  publisher =    "Morgan Kaufmann, San Mateo",
  address =      "Pittsburg 1988",
  pages =        "29--37",
  year =         "1989",
}

@InProceedings{Belkin+al-2004,
  author =       "Mikhail Belkin and Irina Matveeva and Partha Niyogi",
  editor =       "John Shawe-Taylor and Yoram Singer",
  booktitle =    colt04,
  title =        "Regularization and Semi-supervised Learning on Large
                 Graphs",
  publisher =    "Springer",
  pages =        "624-638",
  year =         "2004",
}

@InProceedings{Belkin+Niyogi-2002,
  author =       "Mikhail Belkin and Partha Niyogi",
  editor =       NIPS14ed,
  booktitle =    NIPS14,
  title =        "Laplacian Eigenmaps and Spectral Techniques for
                 Embedding and Clustering",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  year =         "2002",
  original =     "orig/AA42.ps",
}

@TechReport{Belkin+Niyogi-2002-01,
  author =       "Mikhail Belkin and Partha Niyogi",
  title =        "Laplacian Eigenmaps for Dimensionality Reduction and
                 Data Representation",
  number =       "TR-2002-01",
  institution =  "University of Chicago, Computer Science",
  year =         "2002",
}

@TechReport{Belkin+Niyogi-2002-ss,
  author =       "Mkhail Belkin and Partha Niyogi",
  title =        "Semi-supervised learning on manifolds",
  number =       "TR-2002-12",
  institution =  "University of Chicago, Computer Science",
  year =         "2002",
}

@Article{Belkin+Niyogi-2003,
  author =       "Mikhail Belkin and Partha Niyogi",
  title =        "Laplacian Eigenmaps for Dimensionality Reduction and
                 Data Representation",
  journal =      "Neural Computation",
  volume =       "15",
  number =       "6",
  pages =        "1373--1396",
  year =         "2003",
}

@InProceedings{Belkin+Niyogi-nips2003,
  author =       "Mikhail Belkin and Partha Niyogi",
  editor =       NIPS15ed,
  booktitle =    NIPS15,
  title =        "Using Manifold Structure for Partially Labeled
                 Classification",
  publisher =    "{MIT} Press",
  address =      "Cambridge, MA",
  year =         "2003",
}

@article{BelkinM2006,
	address = {Cambridge, MA, USA},
	author = {Belkin, Mikhail   and Niyogi, Partha   and Sindhwani, Vikas  },
	issn = {1533-7928},
	journal = jmlr,
	pages = {2399--2434},
	publisher = {MIT Press},
	title = {Manifold Regularization: A Geometric Framework for Learning from Labeled and Unlabeled Examples},
	volume = {7},
	year = {2006}
}

@Article{Bell-Sejnowski95,
  author =       "Anthony J. Bell and Terrence J. Sejnowski",
  title =        "An information maximisation approach to blind
                 separation and blind deconvolution",
  journal =      "Neural Computation",
  volume =       "7",
  number =       "6",
  pages =        "1129--1159",
  year =         "1995",
}

@InProceedings{Bellagarda+Nahamoo89,
  author =       "J. R. Bellegarda and D. Nahamoo",
  booktitle =    icassp,
  title =        "Tied Mixture Continuous Parameter Models for Large
                 Vocabulary Isolated Speech Recognition",
  address =      "Glasgow, Scotland",
  pages =        "13--16",
  year =         "1989",
}

@InProceedings{Bellegarda97,
  author =       "J. R. Bellegarda",
  booktitle =    "Proceedings of Eurospeech 97",
  title =        "A latent semantic analysis framework for large--span
                 language modeling",
  address =      "Rhodes, Greece",
  pages =        "1451--1454",
  year =         "1997",
}

@Book{Bellman57,
  author =       "R. E. Bellman",
  title =        "Dynamic Programming",
  publisher =    "Princeton University Press",
  address =      "NJ",
  year =         "1957",
}

@Book{Bellman61,
  author =       "R. Bellman",
  title =        "Adaptive Control Processes: {A} Guided Tour",
  publisher =    "Princeton University Press",
  address =      "New Jersey",
  year =         "1961",
}

@Book{Bellman74,
  author =       "R. Bellman",
  title =        "Introduction to Matrix Analysis",
  publisher =    "McGraw-Hill",
  address =      "New York, NY",
  edition =      "2nd",
  year =         "1974",
}

@InProceedings{ben-david03,
  author =       "Shai Ben-David and Reba Schuller",
  booktitle =    colt03,
  title =        "Exploiting Task Relatedness for Mulitple Task
                 Learning.",
  crossref =     "colt03",
  pages =        "567--580",
  year =         "2003",
}

@InProceedings{BenDucVin01,
  author =       "Yoshua Bengio and R\'ejean Ducharme and Pascal
                 Vincent",
  editor =       NIPS13ed,
  booktitle =    NIPS13,
  title =        "A Neural Probabilistic Language Model",
  publisher =    "MIT Press",
  pages =        "932--938",
  year =         "2001",
}

@InProceedings{BenDucVin01-small,
  author =       "Yoshua Bengio and R\'ejean Ducharme and Pascal
                 Vincent",
  editor =       "Todd K. Leen and Thomas G. Dietterich and Volker
                 Tresp",
  booktitle =    "Advances in NIPS 13",
  title =        "A Neural Probabilistic Language Model",
  publisher =    "MIT Press",
  pages =        "932--938",
  year =         "2001",
}

@InProceedings{BenDucVin01-short,
  author =       "Y. Bengio and R. Ducharme and P. Vincent",
  booktitle =    "Adv. Neural Inf. Proc. Sys. 13",
  title =        "A Neural Probabilistic Language Model",
  pages =        "932--938",
  year =         "2001",
}

@TechReport{Bengio+al-2004,
  author =       "Yoshua Bengio and Olivier Delalleau and Nicolas {Le Roux}",
  title =        "Efficient Non-Parametric Function Induction in
                 Semi-Supervised Learning",
  number =       "1247",
  institution =  "D\'epartement d'informatique et recherche
                 op\'erationnelle, Universit\'e de Montr\'eal",
  year =         "2004",
}

@InCollection{Bengio+al-2005,
  author =       "Yoshua Bengio and Nicolas {Le Roux} and Pascal Vincent and
                 Olivier Delalleau and Patrice Marcotte",
  editor =       NIPS18ed,
  booktitle =    NIPS18,
  title =        "Convex Neural Networks",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  pages =        "123--130",
  year =         "2006",
}

@InCollection{Bengio+al-2005-small,
  author =       "Yoshua Bengio and Nicolas {Le Roux} and Pascal Vincent
                 and Olivier Delalleau and Patrice Marcotte",
  booktitle =    "NIPS 18",
  title =        "Convex Neural Networks",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  pages =        "123--130",
  year =         "2006",
}

@InCollection{Bengio+al-spectral-2006-short,
  author =       "Yoshua Bengio and Olivier Delalleau and Nicolas {Le
                 Roux} and Jean-Francois Paiement and Pascal Vincent
                 and Marie Ouimet",
  editor =       "Isabelle Guyon and Steve Gunn and Masoud Nikravesh and
                 Lofti Zadeh",
  booktitle =    "Feature Extraction, Foundations and Applications",
  title =        "Spectral Dimensionality Reduction",
  publisher =    "Springer",
  year =         "2006",
}

@InProceedings{Bengio+Bengio-NIPS99,
  author =       "Yoshua Bengio and Samy Bengio",
  editor =       NIPS12ed,
  booktitle =    NIPS12,
  title =        "Modeling High-Dimensional Discrete Data with
                 Multi-Layer Neural Networks",
  publisher =    "MIT Press",
  pages =        "400--406",
   year =         "2000",
}

@Article{Bengio+Bengio-trnn2000,
  author =       "S. Bengio and Y. Bengio",
  title =        "Taking on the Curse of Dimensionality in Joint
                 Distributions Using Neural Networks",
  journal =      "IEEE Transactions on Neural Networks, special issue on
                 Data Mining and Knowledge Discovery",
  volume =       "11",
  number =       "3",
  pages =        "550--557",
  year =         "2000",
  url =          "http://www.iro.umontreal.ca/~lisa/pointeurs/jdm.pdf",
}

@Article{Bengio+Bengio-trnn2000-small,
  author =       "S. Bengio and Y. Bengio",
  title =        "Taking on the Curse of Dimensionality in Joint
                 Distributions Using Neural Networks",
  journal =      "IEEE Trans. Neural Networks",
  volume =       "11",
  number =       "3",
  pages =        "550--557",
  year =         "2000",
  url =          "http://www.iro.umontreal.ca/~lisa/pointeurs/jdm.pdf",
}

@Article{Bengio+Chapados2003,
  author =       "Yoshua Bengio and Nicolas Chapados",
  title =        "Extensions to Metric-Based Model Selection",
  journal =      jmlr,
  volume =       "3",
  pages =        "1209--1227",
  month =        mar,
  year =         "2003",
  note =         "Special Issue on Feature Selection",
}

@TechReport{Bergstra-TR2008,
  author =       "James Bergstra and Yoshua Bengio and Jerome Louradour",
  title =        "Image Classification with Biologically Motivated Neuron Models",
  number =       "---",
  institution =  "Dept. IRO, Universit\'e de Montr\'eal",
  year =         "2008",
}

@article{Bergstra-2009,
  author =       "James Bergstra and Yoshua Bengio and Jerome Louradour",
  title =        "Suitability of Complex Cell Models for Object Categorization",
  journal = {Computational Neuroscience},
  publisher = "submitted",
  year = 2008,
}

@TechReport{Bengio+Frasconi94a,
  author =       "Y. Bengio and P. Frasconi",
  title =        "An {EM} Approach to Learning Sequential Behavior",
  number =       "Tech. Report. DSI 11/94",
  institution =  "Universit\`a di Firenze",
  year =         "1994",
}

@article{Bengio-nc-2004,
 author = {Yoshua Bengio and Olivier Delalleau and Nicolas Le Roux and Jean-François Paiement and Pascal Vincent and Marie Ouimet},
 title = {Learning eigenfunctions links spectral embedding and kernel {PCA}},
 journal = {Neural Computation},
 volume = 16,
 number = 10,
 year = 2004,
 pages = {2197--2219},
}

@article{Bengio-nc-2004-small,
 author = {Yoshua Bengio and Olivier Delalleau and Nicolas Le Roux and Jean-François Paiement and Pascal Vincent and Marie Ouimet},
 title = {{\small{Learning eigenfunctions links spectral embedding and kernel {PCA}}}},
 journal = {Neural Comp.},
 volume = {16(10)},
 year = 2004,
 pages = {2197--2219},
}

@Article{Bengio+Grandvalet-JMLR-2004,
  author =       "Yoshua Bengio and Yves Grandvalet",
  title =        "No Unbiased Estimator of the Variance of {K}-Fold
                 Cross-Validation",
  journal =      jmlr,
  volume =       "5",
  pages =        "1089--1105",
  year =         "2004",
}

@TechReport{Bengio+Grandvalet-TR-2003,
  author =       "Yoshua Bengio and Yves Grandvalet",
  title =        "No Unbiased Estimator of the Variance of {K}-Fold
                 Cross-Validation",
  number =       "TR-2003-1234",
  institution =  "Universite de Montreal, dept. IRO",
  year =         "2003",
}

@InCollection{Bengio+Lecun-chapter2007,
  author =       "Yoshua Bengio and Yann {LeCun}",
  editor =       "L. Bottou and O. Chapelle and D. DeCoste and J.
                 Weston",
  booktitle =    "Large Scale Kernel Machines",
  title =        "Scaling Learning Algorithms towards {AI}",
  publisher =    "MIT Press",
  year =         "2007",
}

@InCollection{Bengio+Lecun-chapter2007-small,
  author =       "Y. Bengio and Y. {LeCun}",
  booktitle =    "Large Scale Kernel Machines",
  title =        "Scaling Learning Algorithms towards {AI}",
  year =         "2007",
}

@InProceedings{Bengio+LeCun94b,
  author =       "Yoshua Bengio and Yann {LeCun}",
  booktitle =    ICPR94,
  title =        "Word Normalization For On-Line Handwritten Word
                 Recognition",
  pages =        "409--413",
  year =         "1994",
}

@Article{Bengio+Monperrus+Larochelle-2006,
  author =       "Yoshua Bengio and Martin Monperrus and Hugo
                 Larochelle",
  title =        "Nonlocal Estimation of Manifold Structure",
  journal =      "Neural Computation",
  volume =       "18",
  number =       "10",
  pages =        "2509--2528",
  year =         "2006",
}

@InProceedings{Bengio+Monperrus-2005,
  author =       "Yoshua Bengio and Martin Monperrus",
  editor =       NIPS17ed,
  booktitle =    NIPS17,
  title =        "Non-Local Manifold Tangent Learning",
  publisher =    "{MIT} Press",
  year =         "2005",
  pages =        "129--136",
  url =          "http://www.iro.umontreal.ca/~lisa/pointeurs/tangent\_learner\_nips2004.pdf",
}

@InProceedings{Bengio+Senecal-2003-small,
  author =       "Yoshua Bengio and Jean-S\'ebastien Sen\'ecal",
  booktitle =    "Proceedings of AISTATS 2003",
  title =        "Quick Training of Probabilistic Neural Nets by
                 Importance Sampling",
  year =         "2003",
}

@TechReport{Bengio+Vincent+Paiement-TR2003,
  author =       "Yoshua Bengio and Pascal Vincent and Jean-Fran{\cc}ois
                 Paiement",
  title =        "Learning Eigenfunctions of Similarity: Linking
                 Spectral Clustering and Kernel {PCA}",
  number =       "1232",
  institution =  "D\'epartement d'informatique et recherche
                 op\'erationnelle, Universit\'e de Montr\'eal",
  year =         "2003",
  URL =          "www.iro.umontreal.ca/~lisa/pointeurs/TR1232.pdf",
}

@TechReport{Bengio-decision-trees-TR-2007,
  author =       "Yoshua Bengio and Olivier Delalleau and Clarence
                 Simard",
  title =        "Trees do not Generalize to New Variations",
  number =       "",
  institution =  "D\'epartement d'informatique et recherche
                 op\'erationnelle, Universit\'e de Montr\'eal",
  year =         "2007",
}

@TechReport{Bengio-decision-trees07,
  author =       "Yoshua Bengio and Olivier Delalleau and Clarence
                 Simard",
  title =        "Decision Trees do not Generalize to New Variations",
  number =       "1304",
  institution =  "Universite de Montreal, Dept. IRO",
  year =         "2007",
  url =          "http://www.iro.umontreal.ca/~lisa/pointeurs/bengio+al-tr1304.pdf",
}

%I deprecate the following one as this is a duplicate of the preceding tech report!
%Their was only one .tex file that was using it. I modified it.
@TechReport{Bengio-Trees-TR2007,
  author =       "Yoshua Bengio and Olivier Delalleau and Clarence
                 Simard",
  title =        "Decision Trees do not Generalize to New Variations",
  number =       "1304",
  institution =  "Dept. IRO, Universit\'e de Montr\'eal",
  year =         "2007",
  url =          "http://www.iro.umontreal.ca/~lisa/pointeurs/bengio+al-tr1304.pdf",
}

@Article{Bengio-hmms99,
  author =       "Yoshua Bengio",
  title =        "Markovian Models for Sequential Data",
  journal =      "Neural Computing Surveys",
  volume =       "2",
  pages =        "129--162",
  year =         "1999",
}

@Article{bengio-hyper-NC00,
  author =       "Yoshua Bengio",
  title =        "Gradient-Based Optimization of Hyperparameters",
  journal =      "Neural Computation",
  volume =       "12",
  number =       "8",
  pages =        "1889--1900",
  year =         "2000",
}

@TechReport{bengio-hyper-TR98,
  author =       "Yoshua Bengio",
  title =        "Continuous Optimization of Hyper-Parameters for
                 Non-{IID} Data",
  institution =  "D\'epartement d'informatique et recherche
                 op\'erationnelle, Universit\'e de Montr\'eal",
  year =         "1998",
  note =         "unpublished manuscript",
}

@Article{Bengio-Hyper-Weight-Decay-nips,
  author =       "Simon Latendresse and Yoshua Bengio",
  title =        "Linear Regression and the Optimization of
                 Hyper-Parameters",
  journal =      "submitted to NIPS'99",
  year =         "1999",
}

@TechReport{Bengio-Hyper-Weight-Decay-TR,
  author =       "Yoshua Bengio and Simon Latendresse",
  title =        "Soft Variable Selection with Numerical Optimization of
                 Weight Decays",
  institution =  "D\'epartement d'informatique et recherche
                 op\'erationnelle, Universit\'e de Montr\'eal",
  year =         "1999",
  note =         "in preparation",
}

@Article{Bengio-ijns97,
  author =       "Yoshua Bengio",
  title =        "Using a Financial Training Criterion Rather than a
                 Prediction Criterion",
  journal =      "International Journal of Neural Systems",
  year =         "1997",
  volume =       {8},
  number =       {4},
  note =         "Special issue on noisy time-series",
  pages =        {433--443},
  URL =          "www.iro.umontreal.ca/~lisa/pointeurs/profitcost.ps",
}

@Article{Bengio-IEEETRNN-2001,
  author =       "Yoshua Bengio and Vincent-Philippe Lauzon and R\'ejean
                 Ducharme",
  title =        "Experiments on the Application of {IOHMM}s to Model
                 Financial Returns Series",
  journal =      ieeetrnn,
  volume = 12,
  number = 1,
  pages = {113--123},
  year =         "2001",
}

@InProceedings{Bengio-Larochelle-NLMP-NIPS-2006,
  author =       "Yoshua Bengio and Hugo Larochelle and Pascal Vincent",
  editor =       NIPS18ed,
  booktitle =    NIPS18,
  title =        "Non-Local Manifold Parzen Windows",
  publisher =    "MIT Press",
  pages =        "115--122",
  year =         "2006",
}

@TechReport{Bengio-Larochelle-NLMP-TR-2005,
  author =       "Yoshua Bengio and Hugo Larochelle",
  title =        "Non-Local Manifold Parzen Windows",
  number =       "1264",
  institution =  "D\'epartement d'informatique et recherche
                 op\'erationnelle, Universit\'e de Montr\'eal",
  year =         "2005",
}

%have been rejected and later accepted to NIPS in Bengio-localfailure-NIPS-2006
@InProceedings{Bengio-localfailure-icml-2005,
  author =       "Yoshua Bengio and Olivier Delalleau and Nicolas {Le
                 Roux}",
  booktitle =    "submitted to ICML 2005",
  title =        "The Curse of Dimensionality for Local Kernel
                 Machines",
  year =         "2005",
}

@InCollection{Bengio-localfailure-NIPS-2006,
  author =       "Yoshua Bengio and Olivier Delalleau and Nicolas {Le Roux}",
  editor =       NIPS18ed,
  booktitle =    NIPS18,
  title =        "The Curse of Highly Variable Functions for Local
                 Kernel Machines",
  publisher =    "{MIT} Press",
  address =      "Cambridge, MA",
  pages =        "107--114",
  year =         "2006",
}

@InCollection{Bengio-localfailure-NIPS-2006-small,
  author =       "Yoshua Bengio and Olivier Delalleau and Nicolas {Le Roux}",
  booktitle =    "NIPS 18",
  title =        "The Curse of Highly Variable Functions for Local
                 Kernel Machines",
  publisher =    "{MIT} Press",
  address =      "Cambridge, MA",
  pages =        "107--114",
  year =         "2006",
}

@InProceedings{Bengio-localfailure-snowbird-2005,
  author =       "Yoshua Bengio and Olivier Delalleau and Nicolas {Le
                 Roux}",
  booktitle =    "The Learning Workshop",
  title =        "The Curse of Dimensionality for Local Kernel
                 Machines",
  address =      "Snowbird, Utah",
  year =         "2005",
}

@InProceedings{HonglakLee-2007,
  author =       "Honglak Lee and Alexis Battle and Rajat Raina and Andrew Ng",
  editor =       NIPS19ed,
  booktitle =    NIPS19,
  title =        "Efficient sparse coding algorithms",
  publisher =    "MIT Press",
  pages =        "801--808",
  year =         "2007",
}

@InProceedings{Bengio-nips-2006-small,
  author =       "Y. Bengio and P. Lamblin and D. Popovici and
                 H. Larochelle",
  booktitle =    "Advances in NIPS 19",
  title =        "Greedy Layer-Wise Training of Deep Networks",
  year =         "2007",
}

@InProceedings{Bengio-nips-2006-short,
  author =       "Y. Bengio and P. Lamblin and D. Popovici and
                 H. Larochelle",
  booktitle =    "Adv. Neural Inf. Proc. Sys. 19",
  title =        "Greedy Layer-Wise Training of Deep Networks",
  pages =        "153--160",
  year =         "2007",
}

@InProceedings{Bengio-nips2004,
  author =       "Yoshua Bengio and Jean-Fran\c{cois} Paiement and Pascal
                 Vincent and Olivier Delalleau and Nicolas {Le Roux} and
                 Marie Ouimet",
  editor =       NIPS16ed,
  booktitle =    NIPS16,
  title =        "Out-of-Sample Extensions for {LLE}, {Isomap}, {MDS},
                 {Eigenmaps}, and {Spectral} {Clustering}",
  publisher =    "MIT Press",
  year =         "2004",
}

@InProceedings{Bengio-nips2003,
  author =       "Yoshua Bengio and Jean-Fran\c{cois} Paiement and Pascal
                 Vincent and Olivier Delalleau and Nicolas {Le Roux} and
                 Marie Ouimet",
  editor =       NIPS16ed,
  booktitle =    NIPS16,
  title =        "Out-of-Sample Extensions for {LLE}, {Isomap}, {MDS},
                 {Eigenmaps}, and {Spectral} {Clustering}",
  publisher =    "MIT Press",
  year =         "2004",
}

@InCollection{Bengio-NIPS2007,
  author =       "Yoshua Bengio and Pascal Lamblin and Dan Popovici and
                 Hugo Larochelle",
  editor =       NIPS19ed,
  booktitle =    NIPS19,
  title =        "Greedy Layer-Wise Training of Deep Networks",
  publisher =    "MIT Press",
  pages =        "153--160",
  year =         "2007",
}

@InProceedings{Bengio-nnlm2001,
  author =       "Yoshua Bengio and R{\'e}jean Ducharme and Pascal Vincent",
  editor =       NIPS13ed,
  booktitle =    NIPS13,
  title =        "A Neural Probabilistic Language Model",
  publisher =    "{MIT} Press",
  pages =        "933--938",
  year =         "2001",
  url =          "http://www.iro.umontreal.ca/~lisa/pointeurs/nips00-lm.ps",
}

@Article{Bengio-nnlm2003,
  author =       "Yoshua Bengio and R{\'e}jean Ducharme and Pascal Vincent
                 and Christian Jauvin",
  title =        "A Neural Probabilistic Language Model",
  journal =      jmlr,
  volume =       "3",
  pages =        "1137--1155",
  year =         "2003",
}

@Article{Bengio-nnlm2003-small,
  author =       "Y. Bengio and R. Ducharme and P. Vincent
                 and C. Jauvin",
  title =        "A Neural Probabilistic Language Model",
  journal =      "JMLR",
  volume =       "3",
  pages =        "1137--1155",
  year =         "2003",
}

@Article{Bengio-NonStat-Hyper-ML,
  author =       "Yoshua Bengio and Charles Dugas",
  title =        "Learning Simple Non-Stationarities with
                 Hyper-Parameters",
  journal =      "submitted to Machine Learning",
  year =         "1999",
}

@Article{Bengio-prel92,
  author =       "Y. Bengio and M. Gori and R. \mbox{De Mori}",
  title =        "Learning the Dynamic Nature of Speech with
                 Back-propagation for Sequences",
  journal =      prel,
  volume =       "13",
  number =       "5",
  pages =        "375--385",
  year =         "1992",
  note =         "(Special issue on Artificial Neural Networks)",
}

@Article{Bengio-2008,
  author =       "Yoshua Bengio",
  title =        "Learning Deep Architectures for {AI}",
  journal =  {Foundations and Trends in Machine Learning},
  year =         "2009",
  volume = {to appear},
}

@Article{Bengio-2009-short,
  author =       "Y. Bengio",
  title =        "Learning Deep Architectures for {AI}",
  journal =  {Foundations \& Trends in Mach. Learn.},
  year =         "2009",
  volume = 2,
  number = 1,
  pages = {1--127},
}

@TechReport{Bengio-TR1312-small,
  author =       "Yoshua Bengio",
  title =        "Learning Deep Architectures for {AI}",
  number =       "1312",
  institution =  "U. Montr\'eal, dept. IRO",
  year =         "2007",
}

@InProceedings{Bengio-transducers-98,
  author =       "Y. Bengio and S. Bengio and J. F. Isabelle and Y.
                 Singer",
  editor =       NIPS10ed,
  booktitle =    NIPS10,
  title =        "Shared Context Probabilistic Transducers",
  publisher =    "MIT Press",
  pages =        "409--415",
  year =         "1998",
}

@Article{Bengio-trnn92,
  author =       "Y. Bengio and R. \mbox{De Mori} and G. Flammia and R.
                 Kompe",
  title =        "Global Optimization of a Neural Network-Hidden
                 {Markov} Model Hybrid",
  journal =      ieeetrnn,
  volume =       "3",
  number =       "2",
  pages =        "252--259",
  year =         "1992",
}

@Article{Bengio-trnn93,
  author =       "Y. Bengio and P. Simard and P. Frasconi",
  title =        "Learning Long-Term Dependencies with Gradient Descent
                 is Difficult",
  journal =      ieeetrnn,
  volume =       "5",
  number =       "2",
  pages =        "157--166",
  year =         "1994",
  OPTnote =      "(Special Issue on Recurrent Neural Networks)",
  url =          "http://www.iro.umontreal.ca/~lisa/pointeurs/ieeetrnn94.pdf",
}

@Article{Bengio-trnn96,
  author =       "Y. Bengio and P. Frasconi",
  title =        "Input/{Output} {HMM}s for Sequence Processing",
  journal =      "IEEE Transactions on Neural Networks",
  volume =       "7",
  number =       "5",
  pages =        "1231--1249",
  year =         "1996",
}

@TechReport{Bengio2003,
  author =       "Christopher Kermorvant and Yoshua Bengio",
  title =        "Extracting Hidden Sense Probabilities from Bitexts",
  number =       "1231",
  institution =  "Université de Montréal",
  year =         "2003",
}

@InProceedings{Bengio89b,
  author =       "Y. Bengio and P. Cosi and R. Cardin and R. De Mori",
  editor =       NIPS1ed,
  booktitle =    NIPS1,
  title =        "Use of multi-layered networks for coding speech with
                 phonetic features",
  publisher =    "Morgan Kaufmann, San Mateo",
  address =      "Denver, CO",
  pages =        "224--231",
  year =         "1989",
}

@PhdThesis{Bengio91,
  author =       "Yoshua Bengio",
  title =        "Artificial Neural Networks and their Application to
                 Sequence Recognition",
  school =       "McGill University, (Computer Science)",
  address =      "Montreal, Qc., Canada",
  year =         "1991",
}

@InProceedings{bengio91x,
  author =       "Y. Bengio and R. {De Mori} and G. Flammia and R.
                 Kompe",
  booktitle =    ijcnn,
  title =        "Global Optimization of a Neural Network - Hidden
                 Markov Model Hybrid",
  volume =       "2",
  pages =        "789--794",
  year =         "1991",
  OPTaddress =   "Seattle WA",
}

@article{Becker92,
 author = {Sue Becker and Geoffrey Hinton},
 title =        {A self-organizing neural network that discovers surfaces in random-dot stereograms},
 journal = {Nature},
 volume = 355,
 pages = {161--163},
 year = 1992,
}
 
@Article{Bengio93,
  author =       "Yoshua Bengio",
  title =        "A Connectionist Approach to Speech Recognition",
  journal =      "International Journal on Pattern Recognition and
                 Artificial Intelligence",
  volume =       "7",
  number =       "4",
  pages =        "647--668",
  note =         "special issue entitled Advances in Pattern Recognition Systems using Neural Networks",
  year =         "1993",
}

@InProceedings{Bengio93e,
  author =       "S. Bengio and Y. Bengio and J. Cloutier and J.
                 Gecsei",
  editor =       "S. Gielen and B. Kappen",
  booktitle =    "Proceedings of the International Conference on
                 Artificial Neural Networks 1993",
  title =        "Generalization of a Parametric Learning Rule",
  publisher =    "Springer-Verlag",
  address =      "Amsterdam, The Netherlands",
  pages =        "502--502",
  year =         "1993",
}

@Article{bengio:1999:nc,
  author =       "S. Bengio and Y. Bengio and J. Robert and G.
                 B\'elanger",
  title =        "Stochastic Learning of Strategic Equilibria for
                 Auctions",
  journal =      "Neural Computation",
  volume =       "11",
  number =       "5",
  pages =        "1199--1209",
  year =         "1999",
}

@Article{bottou+al:1999,
  author =       "L. Bottou and P. Haffner and P.G. Howard and P. Simard and Y. Bengio",
  title =        "High quality document image compression with {DjVu}",
  journal =      "Journal of Electronic Imaging",
  volume =       "7",
  number =       "3",
  pages =        "410--425",
  year =         "1998",
}

@Article{bengio+al:1998,
  author =       "Y. Bengio and F. Gingras and B. Goulard and J.-M. Lina",
  title =        "Gaussian Mixture Densities for Classification of Nuclear Power Plant Data",
  journal =      "Computers and Artificial Intelligence, special issue on Intelligent Technologies for Electric and Nuclear Power Plants",
  volume =       "17",
  number =       "2--3",
  pages =        "189--209",
  year =         "1998",
}

@Article{GingrasBengio:1998,
  author =       "F. Gingras and Y. Bengio",
  title =        "Handling Asynchronous or Missing Financial Data with Recurrent Networks",
  journal =      "International Journal of Computational Intelligence and Organizations",
  volume =       "1",
  number =       "3",
  pages =        "154--163",
  year =         "1998",
}

@Article{BengioS95,
  author =       "S. Bengio and Y. Bengio and J. Cloutier",
  title =        "On the search for new learning rules for {ANN}s",
  journal =      "Neural Processing Letters",
  volume =       "2",
  number =       "4",
  pages =        "26--30",
  year =         "1995",
}

@Article{BengioMori89,
  author =       "Y. Bengio and R. De Mori",
  title =        "Use of multilayer networks for the recognition of phonetic features and phonemes",
  journal =      "Computational Intelligence",
  volume =       "5",
  pages =        "134--141",
  year =         "1989",
}

@TechReport{BengioTR1178,
  author =       "Yoshua Bengio and R\'ejean Ducharme and Pascal
                 Vincent",
  title =        "A Neural Probabilistic Language Model",
  number =       "1178",
  institution =  "Dept. IRO, Universit\'e de Montr\'eal",
  year =         "2002",
}

@TechReport{BengioTR1215,
  author =       "Yoshua Bengio",
  title =        "New Distributed Probabilistic Language Models",
  number =       "1215",
  institution =  "Dept. IRO, Universit\'e de Montr\'eal",
  year =         "2002",
}

@Book{Bengio_book96,
  author =       "Yoshua Bengio",
  title =        "Neural Networks for Speech and Sequence Processing",
  publisher =    "International Thomson Computer Press",
  year =         "1996",
}

@InProceedings{Bengio_icnn93,
  author =       "Y. Bengio and P. Frasconi and P. Simard",
  booktitle =    icnn,
  title =        "The problem of learning long-term dependencies in
                 recurrent networks",
  publisher =    "IEEE Press",
  address =      "San Francisco",
  pages =        "1183--1195",
  year =         "1993",
  note =         "(invited paper)",
}

@Article{Bengio_trnn94,
  author =       "Y. Bengio and P. Simard and P. Frasconi",
  title =        "Learning Long-Term Dependencies with Gradient Descent
                 is Difficult",
  journal =      ieeetrnn,
  volume =       "5",
  number =       "2",
  pages =        "157--166",
  year =         "1994",
  note =         "Special Issue on Recurrent Neural Networks, March 94",
}

@Book{Benveniste90,
  author =       "A. Benveniste and M. Metivier and P. Priouret",
  title =        "Adaptive Algorithms and Stochastic Approximations",
  publisher =    "Springer-Verlag",
  address =      "Berlin, New York",
  year =         "1990",
}

@Book{Berger85,
  author =       "J. Berger",
  title =        "Statistical Decision Theory and {Bayesian} Analysis",
  publisher =    "Springer",
  year =         "1985",
}

@Misc{berger97improved,
  author =       "A. Berger",
  title =        "The improved iterative scaling algorithm: {A} gentle
                 introduction",
  year =         "1997",
  URL =          "citeseer.ist.psu.edu/berger97improved.html",
  text =         "Berger, A. (1997). The improved iterative scaling
                 algorithm: A gentle introduction.
                 http://www.cs.cmu.edu/afs/cs/user/aberger/www/ps/scaling.ps.",
}

@article{Berkes-Wiskott-2005,
    author = {Berkes, Pietro and Wiskott, Laurenz},
    title = {Slow Feature Analysis Yields a Rich Repertoire of Complex Cell Properties},
    journal = {Journal of Vision},
    ISSN = {1534-7362},
    volume = {5},
    number = {6},
    pages = {579-602},
    year = {2005},
    month = {7}
}

@Article{Beurle56,
  author =       "R. L. Beurle",
  title =        "Properties of a Mass of Cells Capable of Regenerating
                 Pulses",
  journal =      PTRSL,
  volume =       "240",
  pages =        "55--94",
  year =         "1956",
}

@InProceedings{Beyer+al-1999,
  author =       "Kevin S. Beyer and Jonathan Goldstein and Raghu Ramakrishnan
                 and Uri Shaft",
  booktitle =    "Proceeding of the 7th International Conference on
                 Database Theory",
  title =        "When Is ``Nearest Neighbor'' Meaningful?",
  publisher =    "Springer-Verlag",
  pages =        "217--235",
  year =         "1999",
  ISBN =         "3-540-65452-6",
}

@TechReport{Bianchini-rbf,
  author =       "M. Bianchini and P. Frasconi and M. Gori",
  title =        "Learning without Local Minima in Radial Basis Function
                 Networks",
  institution =  "Universit\`a di Firenze",
  year =         "1992",
  OPTannote =    "",
}

@Article{Bianchini-trnn94,
  author =       "M. Bianchini and M. Gori and M. Maggini",
  title =        "On the Problem of Local Minima in Recurrent Neural
                 Networks",
  journal =      ieeetrnn,
  volume =       "5",
  number =       "2",
  pages =        "167--177",
  year =         "1994",
  OPTnote =      "(Special Issue on Recurrent Neural Networks)",
}

@TechReport{bickel+ritov95,
  author =       "P. J. Bickel and Y. Ritov",
  title =        "Inference in hidden {Markov} models {I}: local
                 asymptotic normality in the stationary case",
  number =       "Technical Report 383",
  institution =  "Statistics Department, University of California,
                 Berkeley",
  year =         "February 1994, revised April 1995",
}

@Article{Bienenstock82,
  author =       "E. L. Bienenstock and L. N. Cooper and P. W. Munro",
  title =        "Theory for the Development of Neuron Selectivity:
                 Orientation Specificity and Binocular Interaction in
                 Visual Cortex",
  journal =      jneuro,
  volume =       "2",
  year =         "1982",
}

@Article{BierdermanI1987,
  author =       "Irving Bierderman",
  title =        "Recognition-by-Components: {A} Theory of Human Image
                 Understanding",
  journal =      "Psychological Review",
  volume =       "94",
  number =       "2",
  publisher =    "American Psychological Association, Inc.",
  pages =        "115--147",
  year =         "1987",
  added-by =     "Daniel Acevedo",
  date-added =   "Thu Oct 24 12:45:17 2002",
  project =      "genetic",
  theme =        "perception and vr and tech and natural and medicine
                 and art",
}

@InProceedings{Bilbro89a,
  author =       "G. Bilbro and R. Mann and T. K. Miller and W. E.
                 Snyder and D. E. Van den Bout and M. White",
  editor =       NIPS1ed,
  booktitle =    NIPS1,
  title =        "Optimization by Mean Field Annealing",
  publisher =    "Morgan Kaufmann, San Mateo",
  address =      "Denver, CO",
  pages =        "91--98",
  year =         "1989",
}

@InProceedings{Bilbro89b,
  author =       "G. L. Bilbro and W. Snyder",
  editor =       NIPS1ed,
  booktitle =    NIPS1,
  title =        "Range Image Restoration Using Mean Field Annealing",
  publisher =    "Morgan Kaufmann, San Mateo",
  address =      "Denver, CO",
  pages =        "594--601",
  year =         "1989",
}

@Article{Binder86,
  author =       "K. Binder and A. P. Young",
  title =        "Spin Glasses: Experimental Facts, Theoretical
                 Concepts, and Open Questions",
  journal =      rmp,
  volume =       "58",
  pages =        "801--976",
  year =         "1986",
}

@Book{Binder88,
  author =       "K. Binder and D. W. Heerman",
  title =        "Monte Carlo Simulation in Statistical Mechanics",
  publisher =    "Springer-Verlag",
  address =      "Berlin",
  year =         "1988",
}

@Book{bishop-book2006,
  author =       "Christopher M. Bishop",
  title =        "Pattern Recognition and Machine Learning",
  publisher =    "Springer",
  year =         "2006",
}

@Book{bishop-book95,
  author =       "Christopher Bishop",
  title =        "Neural Networks for Pattern Recognition",
  publisher =    "Oxford University Press",
  address =      "London, UK",
  year =         "1995",
}

@Article{bishop92,
  author =       "Christopher Bishop",
  title =        "Exact calculation of the {Hessian} matrix for the
                 multi-layer perceptron",
  journal =      "Neural Computation",
  volume =       "4",
  number =       "4",
  pages =        "494--501",
  year =         "1992",
}

@Article{bishop95training,
  author =       "Christopher M. Bishop",
  title =        "Training with Noise is Equivalent to {Tikhonov}
                 Regularization",
  journal =      "Neural Computation",
  volume =       "7",
  number =       "1",
  pages =        "108--116",
  year =         "1995",
}

@Article{Blackscholes73,
  author =       "F. Black and M. Scholes",
  title =        "The Pricing of Options and Corporate Liabilities",
  journal =      "Journal of Political Economy",
  number =       "81",
  pages =        "637--654",
  year =         "1973",
}

@Article{Blakemore70,
  author =       "C. Blakemore and G. F. Cooper",
  title =        "Development of the Brain Depends on the Visual
                 Environment",
  journal =      nature,
  volume =       "228",
  pages =        "477--478",
  year =         "1970",
}

@InCollection{Blitzer-nips17,
  author =       "John Blitzer and Kilian Weinberger and Lawrence Saul
                 and Fernando Pereira",
  editor =       NIPS17ed,
  booktitle =    NIPS17,
  title =        "Hierarchical Distributed Representations for
                 Statistical Language Modeling",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  year =         "2005",
}

@InProceedings{Blitzer05,
  author =       "John Blitzer and Kilian Weinberger and Lawrence Saul
                 and Fernando Pereira",
  editor =       NIPS18ed,
  booktitle =    NIPS18,
  title =        "Hierarchical Distributed Representations for
                 Statistical Language Modeling",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  year =         "2005",
}

@InProceedings{Blitzer2005,
  author =       "J. Blitzer and K. Q. Weinberger and L. K. Saul and F.
                 C. N. Pereira",
  editor =       NIPS17ed,
  booktitle =    NIPS17,
  title =        "Hierarchical distributed representations for
                 statistical language models",
  publisher =    "{MIT} Press",
  year =         "2005",
}

@Article{Block62,
  author =       "H. D. Block",
  title =        "The Perceptron: {A} Model for Brain Functioning",
  journal =      rmp,
  volume =       "34",
  year =         "1962",
}

@InProceedings{Blum+Rivest,
  author =       "A. Blum and R. L. Rivest",
  editor =       NIPS1ed,
  booktitle =    NIPS1,
  title =        "Training a 3-node Neural Net is {NP}-Complete",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo, CA",
  pages =        "494--501",
  year =         "1989",
}

@InProceedings{blum01learning,
  author =       "Avrim Blum and Shuchi Chawla",
  booktitle =    "Proc. 18th International Conf. on Machine Learning",
  title =        "Learning from Labeled and Unlabeled Data Using Graph
                 Mincuts",
  publisher =    "Morgan Kaufmann, San Francisco, CA",
  pages =        "19--26",
  year =         "2001",
}
  %URL =          "citeseer.ist.psu.edu/blum01learning.html",

@InProceedings{blum98combining,
  author =       "Avrim Blum and Tom Mitchell",
  booktitle =    colt98,
  publisher =    "Morgan Kaufmann Publishers",
  title =        "Combining Labeled and Unlabeled Data with
                 Co-training",
  pages =        "92--100",
  year =         "1998",
}
  %URL =          "citeseer.ist.psu.edu/blum98combining.html",

@InProceedings{blum98combining-small,
  author =       "Avrim Blum and Tom Mitchell",
  booktitle =    "COLT'98",
  title =        "Combining Labeled and Unlabeled Data with
                 Co-training",
  pages =        "92--100",
  year =         "1998",
}
  %URL =          "citeseer.ist.psu.edu/blum98combining.html",

@InProceedings{blum99,
  author =       "A. Blum and A. Kalai and J. Langford",
  booktitle =    colt99,
  title =        "Beating the hold-out: Bounds for k-fold and
                 progressive cross-validation",
  pages =        "",
  year =         "1999",
}

@InProceedings{Blumer86,
  author =       "A. Blumer and A. Ehrenfeucht and D. Haussler and M.
                 Warmuth",
  booktitle =    "Proceedings of the Eighteenth Annual ACM Symposium on
                 Theory of Computing",
  title =        "Classifying Learnable Geometric Concepts with the
                 Vapnik-Chervonenkis Dimension",
  publisher =    "ACM, Salem",
  address =      "Berkeley 1986",
  pages =        "273--282",
  year =         "1986",
}

@Article{Blumer87,
  author =       "A. Blumer and A. Ehrenfeucht and D. Haussler and M.
                 Warmuth",
  title =        "Occam's razor",
  journal =      "Inf. Proc. Let.",
  volume =       "24",
  pages =        "377--380",
  year =         "1987",
}

@Article{Blumstein79,
  author =       "S. E. Blumstein and K. N. Stevens",
  title =        "Acoustic invariance in speech production: Evidence
                 from measurements of the spectral characteristics of
                 stop consonants",
  journal =      "Journal of the Acoustical Society of America",
  volume =       "66",
  number =       "4",
  pages =        "1001--1018",
  year =         "1979",
}

@Article{Bohm96,
  author =       "G. Bohm",
  title =        "New approaches in molecular structure prediction",
  journal =      "Biophys. Chem.",
  volume =       "59",
  pages =        "1--32",
  year =         "1996",
}

@Article{Bohr88,
  author =       "H. Bohr and J. Bohr and S. Brunak and R. M. J.
                 Cotterill and B. Lautrup and L. Norskov and O. H.
                 Olsen and S. B. Petersen",
  title =        "Protein Secondary Structure and Homology by Neural
                 Networks: The $\alpha$-Helices in Rhodopsin",
  journal =      febsl,
  volume =       "241",
  pages =        "223--228",
  year =         "1988",
}

@InProceedings{bollacker98,
  author =       "Kurt D. Bollacker and Joydeep Ghosh",
  booktitle =    ICML98,
  editor =       ICML98ed,
  publisher =    ICML98publ,
  title =        "A Supra-Classifier Architecture for Scalable Knowledge
                 Reuse",
  address =      "San Francisco, CA, USA",
  pages =        "64--72",
  year =         "1998",
}

@InProceedings{BonillaE2007,
  author =       "Edwin V. Bonilla and Felix V. Agakov and Christopher
                 K. I. Williams",
  booktitle =    "Proceedings of AISTATS 2007",
  title =        "Kernel Multi-task Learning using Task-specific
                 Features",
  year =         "2007",
}

@Article{Bonomo94,
  author =       "M. Bonomo and R. Garcia",
  title =        "Can a well-fitted equilibrium asset-pricing model
                 produce mean reversion?",
  journal =      "Journal of Applied Econometrics",
  volume =       "9",
  pages =        "19--29",
  year =         "1994",
}

@Article{bordes-09,
  author =  {Bordes, Antoine and Bottou, L\'eon and Gallinari, Patrick},
  title =   {SGD-QN: Careful Quasi-Newton Stochastic Gradient Descent},
  journal = {Journal of Machine Learning Research},
  year =    {2009},
  volume =  {10},
  pages =   {1737-1754},
  month =   {July},
}

@Book{Bornstein-critical-87,
		author = { Bornstein, Marc H. },
		title = { Sensitive periods in development : interdisciplinary
				perspectives / edited by Marc H. Bornstein },
		publisher = { Lawrence Erlbaum Associates, Hillsdale, N.J. : },
		year = { 1987 },
		type = { Book },
}


@Article{boser-92,
  author =       "B. Boser and E. Sackinger and J. Bromley and Y. {LeCun}
                 and L. Jackel",
  title =        "An analog neural network processor with programmable
                 topology",
  journal =      "IEEE Journal of Solid-State Circuits",
  volume =       "26",
  number =       "12",
  pages =        "2017--2025",
  month =        dec,
  year =         "1991",
}

@InProceedings{Boser92,
  author =       "Bernhard E. Boser and Isabelle M. Guyon and Vladimir N. Vapnik",
  booktitle =    "Fifth Annual Workshop on Computational Learning
                 Theory",
  title =        "A training algorithm for optimal margin classifiers",
  publisher =    "ACM",
  address =      "Pittsburgh",
  pages =        "144--152",
  year =         "1992",
  doi =          {http://doi.acm.org/10.1145/130385.130401},
  isbn = {0-89791-497-X},
}

@incollection{bottou-bousquet-2008,
  author = {Bottou, L\'{e}on and Bousquet, Olivier},
  title = {The Tradeoffs of Large Scale Learning},
  editor = NIPS20ed,
  booktitle = NIPS20,
  publisher = {MIT Press},
  year = {2008},
  volume = {20},
  address = {Cambridge, MA},
  url = "http://leon.bottou.org/papers/bottou-bousquet-2008",
}

@TechReport{Bottou+96,
  author =       "L{\'e}on Bottou and Yoshua Bengio and Yann A. {Le Cun}",
  title =        "Document Analysis with Generalized Transduction",
  number =       "HA6156000-960701-01TM",
  institution =  "AT\&T Laboratories",
  address =      "Holmdel, New-Jersey",
  month =        jul,
  year =         "1996",
}

@Article{Bottou+LeCun05,
  author =       "L{\'e}on Bottou and Yann {LeCun}",
  title =        "Graph Transformer Networks for Image Recognition",
  journal =      "Bulletin of the International Statistical Institute",
  year =         "2005",
}

@TechReport{bottou-1996a,
  author =       "L{\'{e}}on Bottou and Yoshua Bengio and Yann {Le Cun}",
  title =        "Document Analysis with Transducers",
  number =       "{960701}-{01}-{TM}",
  institution =  "AT\&T Labs Technical Memorandum",
  month =        jun,
  year =         "1996",
}

@InProceedings{bottou-lecun-04b,
  author =       "Leon Bottou and Yann {LeCun}",
  editor =       NIPS15ed,
  booktitle =    NIPS15,
  title =        "Large-Scale On-Line Learning",
  publisher =    "MIT Press",
  year =         "2004",
  original =     "orig/bottou-lecun-04b.ps.gz",
}

@InCollection{bottou-mlss-2004,
  author =       "L\'{e}on Bottou",
  editor =       "Olivier Bousquet and Ulrike von Luxburg",
  booktitle =    "Advanced Lectures on Machine Learning",
  title =        "Stochastic Learning",
  number =       "LNAI 3176",
  publisher =    "Springer Verlag",
  address =      "Berlin",
  pages =        "146--168",
  year =         "2004",
  series =       "Lecture Notes in Artificial Intelligence",
  URL =          "http://leon.bottou.org/papers/bottou-mlss-2004",
}

@Article{Bottou90,
  author =       "L. Bottou and F. Fogelman-Souli\'e and P. Blanchet and
                 J. S. Lienard",
  key =          "bottou",
  title =        "Speaker independent isolated digit recognition:
                 multilayer perceptrons vs dynamic time warping",
  journal =      "Neural Networks",
  volume =       "3",
  pages =        "453--465",
  year =         "1990",
}

@InProceedings{Bottou91,
  author =       "L. Bottou and P. Gallinari",
  editor =       NIPS3ed,
  booktitle =    NIPS3,
  title =        "A Framework for the Cooperation of Learning
                 Algorithms",
  address =      "Denver, CO",
  pages =        "781--788",
  year =         "1991",
}

@Article{Bottou92,
  author =       "L. Bottou and V. Vapnik",
  key =          "Bottou92",
  title =        "Local Learning Algorithms",
  journal =      nc,
  volume =       "4",
  number =       "6",
  pages =        "888--900",
  year =         "1992",
}

@InProceedings{Bottou94,
  author =       "L. Bottou and C. Cortes and J. S. Denker and H.
                 Drucker and I. Guyon and L. D. Jackel and Y. {LeCun} and
                 U. A. Muller and E. Sackinger and P. Simard and V.
                 Vapnik",
  booktitle =    "International Conference on Pattern Recognition",
  title =        "Comparison of classifier methods: a case study in
                 handwritten digit recognition",
  address =      "Jerusalem, Israel",
  year =         "1994",
}

@InProceedings{Bottou97,
  author =       "L{\'e}on Bottou and Yoshua Bengio and Yann {LeCun}",
  booktitle =    cvpr97,
  title =        "Global Training of Document Processing Systems using
                 Graph Transformer Networks",
  publisher =    "IEEE",
  address =      "Puerto Rico",
  pages =        "490--494",
  year =         "1997",
}

@InCollection{Bottou98,
  author =       "L{\'e}on Bottou",
  editor =       "David Saad",
  booktitle =    "Online Learning in Neural Networks",
  title =        "Online Algorithms and Stochastic Approximations",
  publisher =    "Cambridge University Press",
  address =      "Cambridge, UK",
  pages =        "",
  year =         "1998",
}

@PhdThesis{Bottou_these91,
  author =       "L\'eon Bottou",
  title =        "Une approche th\'eorique de l'apprentissage
                 connexioniste; applications \`a la reconnaissance de la
                 parole",
  school =       "Universit\'e de Paris XI",
  year =         "1991",
}

@InProceedings{BouchardG2004,
  author =       "Guillaume Bouchard and Bill Triggs",
  booktitle =    "IASC International Symposium on Computational
                 Statistics (COMPSTAT)",
  title =        "The Tradeoff Between Generative and Discriminative
                 Classifiers",
  address =      "Prague",
  pages =        "721--728",
  month =        aug,
  year =         "2004",
  keywords =     "LEAR, LAVA",
}
  %URL =          "http://lear.inrialpes.fr/pubs/2004/BT04",

@inproceedings{BouchardG2007,
 author = {Guillaume Bouchard},
 title = {Bias-Variance Tradeoff in Hybrid Generative-Discriminative Models},
 booktitle = ICML07,
 editor =    ICML07ed,
 publisher = ICML07publ,
 year = {2007},
 isbn = {0-7695-3069-9},
 pages = {124--129},
 address = {Washington, DC, USA},
 }
 %doi = {http://dx.doi.org/10.1109/ICMLA.2007.23},

@Article{Bourlard-cspla89,
  author =       "H. Bourlard and C. Wellekens",
  title =        "Speech Pattern Discrimination and Multi-Layered
                 Perceptrons",
  journal =      cspla,
  volume =       "3",
  pages =        "1--19",
  year =         "1989",
}

@Article{Bourlard-pami90,
  author =       "H. Bourlard and C. Wellekens",
  title =        "Links between Hidden {Markov} Models and Multilayer
                 Perceptrons",
  journal =      ieeetpami,
  volume =       "12",
  pages =        "1167--1178",
  year =         "1990",
}

@Article{Bourlard88,
  author =       "H. Bourlard and Y. Kamp",
  title =        "Auto-Association by Multilayer Perceptrons and
                 Singular Value Decomposition",
  journal =      biocyb,
  volume =       "59",
  pages =        "291--294",
  year =         "1988",
}

@Book{Bourlard93,
  author =       "H. Bourlard and N. Morgan",
  title =        "Connectionist Speech Recognition. {A} Hybrid
                 Approach",
  volume =       "247",
  publisher =    "Kluwer Academic Publishers",
  address =      "Boston",
  year =         "1993",
  series =       "The Kluwer international series in engineering and
                 computer science",
}

@Article{Bourlard_cspla89,
  author =       "H Bourlard and C. Wellekens",
  title =        "Speech Pattern Discrimination and Multi-Layered
                 Perceptrons",
  journal =      cspla,
  volume =       "3",
  pages =        "1--19",
  year =         "1989",
  OPTnote =      "",
}

@InCollection{Bourrely89,
  author =       "J. Bourrely",
  booktitle =    "Hypercube and distributed computers",
  title =        "Parallelization of a Neural Learning Algorithm on a
                 Hypercube",
  publisher =    "Elsiever Science Publishing, North Holland",
  pages =        "219--229",
  year =         "1989",
}

@inproceedings{Bouveyron-Chipman-2007,
 author = {C. Bouveyron and H. Chipman},
 title = {Visualization and classification of graph-structured data: the case of the {E}nron dataset}, 
 booktitle = ijcnn,
 pages = {1506--1511}, 
 year = 2007,
}

@Book{Box73,
  author =       "G. E. P. Box and G. C. Tiao",
  title =        "Bayesian inference in statistical analysis",
  publisher =    "Addison-Wesley",
  year =         "1973",
}

@Book{BoxJenkins,
  author =       "G. E. P. Box and G. M. Jenkins",
  title =        "Time Series Analysis: Forecasting and Control.",
  publisher =    "Holden-Day",
  address =      "San Francisco",
  year =         "1970",
}

@Book{Boyd04,
  author =       "Stephen Boyd and Lieven Vandenberghe",
  title =        "Convex Optimization",
  publisher =    "Cambridge University Press",
  address =      "New York, NY, USA",
  year =         "2004",
  ISBN =         "0-521-83378-7",
}

@incollection{Bradley+Bagnell-2009,
 title = {Differentiable Sparse Coding},
 author = {J. Andrew Bagnell and David M. Bradley},
 editor =       NIPS21ed,
 booktitle =    NIPS21,
 pages = {},
 publisher = {NIPS Foundation},
 year = {2009}
}

@PhdThesis{Bradley-thesis,
  author =       "David Bradley",
  title =        "Learning in Modular Systems",
  school =       "The Robotics Institute, Carnegie Mellon University",
  year =         "2009",
}

@Article{Brady-ieeecas89,
  author =       "M. L. Brady and R. Raghavan and J. Slawny",
  title =        "Back-Propagation Fails to Separate Where Perceptrons
                 Succeed",
  journal =      ieeetcas,
  volume =       "36",
  pages =        "665--674",
  year =         "1989",
}

@Article{Brady89,
  author =       "M. L. Brady and R. Raghavan and J. Slawny",
  title =        "Back-Propagation fails to Separate Where Perceptrons
                 Succeed",
  journal =      "IEEE Transactions on Circuits and Systems",
  volume =       "36",
  number =       "5",
  pages =        "665--674",
  year =         "1989",
}

@InProceedings{Bramson90,
  author =       "M. J. Bramson and R. G. Hoptroff",
  booktitle =    "Workshop on Neural Networks for Statistical and
                 Economic Data",
  title =        "Forecasting the Economic Cycle: a Neural Network
                 Approach",
  address =      "Dublin",
  year =         "1990",
}

@InProceedings{Brand2003,
  author =       "M. Brand",
  editor =       NIPS15ed,
  booktitle =    NIPS15,
  title =        "Charting a manifold",
  publisher =    "{MIT} Press",
  pages =        "961--968",
  year =         "2003",
}

@Article{Brand99,
  author =       "Matthew Brand",
  title =        "Structure Learning in Conditional Probability Models
                 via an Entropic Prior and Parameter Extinction",
  journal =      "Neural Computation",
  volume =       "11",
  number =       "5",
  pages =        "1155--1182",
  year =         "1999",
}

@InProceedings{Brandt88,
  author =       "R. D. Brandt and Y. Wang and A. J. Laub and S. K.
                 Mitra",
  booktitle =    icnn,
  title =        "Alternative Networks for Solving the Travelling
                 Salesman Problem and the List-Matching Problem",
  volume =       "2",
  publisher =    "IEEE, New York",
  address =      "San Diego 1988",
  pages =        "333--340",
  year =         "1988",
}

@inproceedings{BreglerC1994,
    author = "Christoph Bregler and Stephen M. Omohundro",
    editor =       NIPS6ed,
    booktitle =    NIPS6,
    title = "Surface Learning with Applications to Lipreading",
    publisher = "Morgan Kaufmann Publishers, Inc.",
    pages = "43--50",
    year = "1994",
}


@Article{Breiman-96,
  author =       "L. Breiman",
  title =        "Heuristics of instability and stabilization in model
                 selection",
  journal =      "The Annals of Statistics",
  volume =       "24",
  number =       "6",
  pages =        "2350--2383",
  year =         "1996",
}

@Article{breiman-stability-96,
  author =       "L. Breiman",
  title =        "Heuristics of Instability and Stabilization in Model
                 Selection",
  journal =      "Annals of Statistics",
  volume =       "24",
  number =       "6",
  pages =        "2350--2383",
  year =         "1996",
}

@Article{Breiman01,
  author =       "Leo Breiman",
  title =        "Random Forests",
  journal =      "Machine Learning",
  volume =       "45",
  number =       "1",
  pages =        "5--32",
  year =         "2001",
}

@Book{Breiman84,
  author =       "L. Breiman and J. H. Friedman and R. A. Olshen and C.
                 J. Stone",
  title =        "Classification and Regression Trees",
  publisher =    "Wadsworth International Group",
  address =      "Belmont, CA",
  year =         "1984",
}

@TechReport{Breiman96,
  author =       "L. Breiman",
  title =        "Bias, Variance, and Arcing Classifiers",
  number =       "Technical Report 460",
  institution =  "Statistics Department, University of California",
  address =      "Berkeley, CA 94720",
  month =        apr,
  year =         "1996",
}

@InCollection{Bridle+Cox91,
  author =       "J. S. Bridle and S. J. Cox",
  editor =       NIPS3ed,
  booktitle =    NIPS3,
  title =        "{RECNORM}: simultaneous normalisation and
                 classification applied to speech recognition",
  publisher =    "Morgan Kaufmann",
  pages =        "234--240",
  year =         "1991",
}

@InCollection{Bridle89,
  author =       "J. Bridle",
  editor =       "F. Fogelman-Souli\'e and J. {H\'{e}rault}",
  booktitle =    "Neuro-computing: Algorithms, Architectures, and
                 Applications",
  title =        "Probabilistic interpretation of feedforward
                 classification network outputs, with relationships to
                 statistical pattern recognition",
  publisher =    "Springer-Verlag",
  address =      "New York",
  year =         "1989",
}

@InCollection{Bridle89-nips,
  author =       "J. S. Bridle",
  editor =       NIPS2ed,
  booktitle =    NIPS2,
  title =        "Training Stochastic Model Recognition Algorithms as
                 Networks can lead to Maximum Mutual Information
                 Estimation of Parameters",
  publisher =    "Morgan Kaufmann",
  pages =        "211--217",
  year =         "1990",
}

@Article{Bridle90,
  author =       "J. S. Bridle",
  title =        "Alphanets: a Recurrent `Neural' Network Architecture
                 with a Hidden {Markov} Model Interpretation",
  journal =      spcomm,
  volume =       "9",
  number =       "1",
  pages =        "83--92",
  year =         "1990",
}

@InCollection{Bridle90b,
  author =       "J. S. Bridle",
  editor =       NIPS2ed,
  booktitle =    NIPS2,
  title =        "Training Stochastic Model Recognition Algorithms as
                 Networks can lead to Maximum Mutual Information
                 Estimation of Parameters",
  publisher =    "Morgan Kaufmann",
  pages =        "211--217",
  year =         "1990",
}

@InCollection{Bromley-siamese93,
  author =       "J. Bromley and J. Benz and L. Bottou and I. Guyon and
                 L. Jackel and Y. {LeCun} and C. Moore and E. Sackinger
                 and R. Shah",
  booktitle =    "Advances in Pattern Recognition Systems using Neural
                 Network Technologies",
  title =        "Signature verification using a siamese time delay
                 neural network",
  publisher =    "World Scientific, Singapore",
  pages =        "669--687",
  year =         "1993",
}

@InCollection{Bromley93,
  author =       "J. Bromley and J. Benz and L. Bottou and I. Guyon and
                 L. Jackel and Y. {LeCun} and C. Moore and E. Sackinger
                 and R. Shah",
  booktitle =    "Advances in Pattern Recognition Systems using Neural
                 Network Technologies",
  title =        "Signature verification using a siamese time delay
                 neural network",
  publisher =    "Series in Machine Perception and Artificial
                 Intelligence, World Scientific, Singapore",
  pages =        "669--687",
  year =         "1993",
}

@Article{broomhead-lowe-88,
  author =       "D. Broomhead and D. Lowe",
  key =          "Broomhead",
  title =        "Multivariable functional interpolation and adaptive
                 networks",
  journal =      "Complex Systems",
  volume =       "2",
  pages =        "321--355",
  year =         "1988",
}

@TechReport{Brown-Hinton-PoHMM-2000,
  author =       "Andrew Brown and Geoffrey Hinton",
  title =        "Products of Hidden Markov Models",
  number =       "GCNU TR 2000-004",
  institution =  "Gatsby Unit, University College London",
  year =         "2000",
}

@Book{Brown86,
  author =       "Lawrence D. Brown",
  title =        "Fundamentals of Statistical Exponential Families",
  volume =       "9",
  publisher =    "Inst. of Math. Statist. Lecture Notes Monograph
                 Series",
  year =         "1986",
}

@Article{Brown92,
  author =       "P. F. Brown and V. J. Della Pietra and P. V. DeSouza
                 and J. C. Lai and R. L. Mercer",
  title =        "Class-based {\it n}-gram models of natural language",
  journal =      "Computational Linguistics",
  volume =       "18",
  pages =        "467--479",
  year =         "1992",
}

@PhdThesis{BrownPhD,
  author =       "P. Brown",
  title =        "The Acoustic-Modeling problem in Automatic Speech
                 Recognition",
  school =       "Dept. of Computer Science, Carnegie-Mellon
                 University",
  year =         "1987",
}

@InProceedings{Bruce-94,
  author =       "Rebecca Bruce and Janyce Wiebe",
  booktitle =    "{ARPA} Workshop on Human Language Technology",
  title =        "A new approach to sense identification",
  address =      "Plainsboro, {NJ}",
  year =         "1994",
}

@InProceedings{Brugnara92,
  author =       "F. Brugnara and R. DeMori and D. Giuliani and M.
                 Omologo",
  booktitle =    icassp,
  title =        "A family of parallel hidden Markov models",
  publisher =    "IEEE",
  address =      "New York, NY, USA",
  pages =        "377--370",
  year =         "1992",
}

@Article{Brunak89,
  author =       "S. Brunak and B. Lautrup",
  title =        "Liniedeling med et Neuralt Nev{\ae}rk",
  journal =      SAML,
  volume =       "14",
  pages =        "55--74",
  year =         "1989",
}

@Book{Brunak90,
  author =       "S. Brunak and B. Lautrup",
  title =        "Neural Networks: Computers with Intuition",
  publisher =    "World Scientific",
  address =      "Singapore",
  year =         "1990",
}

@Article{Brunak91,
  author =       "S. Brunak and J. Engelbrecht and S. Knudsen",
  title =        "Prediction of human {mRNA} donor and acceptor sites
                 from the {DNA} sequence",
  journal =      "J. Molec. Biol.",
  volume =       "220",
  pages =        "49--65",
  year =         "1991",
}

@Book{Bryson69,
  author =       "A. E. Bryson and Y.-C. Ho",
  title =        "Applied Optimal Control",
  publisher =    "Blaisdell",
  address =      "New York",
  year =         "1969",
}

@Article{BT-the-fitting-1974,
  author =       "A. E. Beaton and J. W. Tukey",
  title =        "The fitting of power series, meaning polynomials,
                 illustrted on band-spectroscopic data",
  journal =      "Technometrics",
  volume =       "16",
  pages =        "147--185",
  year =         "1974",
}

@article{Buia-Tiesinga-2006,
 author = {Calin Buia and Paul Tiesinga},
 title = {Attentional modulation of firing rate and synchrony in a model cortical network},
 journal = {J. Computational Neuroscience},
 volume = 20,
 pages = {247--264},
 year = 2006,
}

@TechReport{buhlmann97,
  author =       "P. Buhlmann and A. J. Wyner",
  title =        "Variable Length Markov Chains",
  number =       "technical report 479",
  institution =  "Statistics Department, University of California,
                 Berkeley",
  month =        jan,
  year =         "1997",
}

@Article{Buhmann87,
  author =       "J. Buhmann and K. Schulten",
  title =        "Noise-Driven Temporal Association in Neural Networks",
  journal =      eul,
  volume =       "4",
  pages =        "1205--1209",
  year =         "1987",
}

@InProceedings{Buhmann88,
  author =       "J. Buhmann and K. Schulten",
  editor =       "R. Eckmiller and Ch. von der Malsburg",
  booktitle =    "Neural Computers",
  title =        "Storing Sequences of Biased Patterns in Neural
                 Networks with Stochastic Dynamics",
  publisher =    "Springer-Verlag, Berlin",
  address =      "Neuss 1987",
  pages =        "231--242",
  year =         "1988",
}

@Article{Buntine94,
  author =       "W. Buntine",
  title =        "Operations for Learning with Graphical Models",
  journal =      "Journal of Artificial Intelligence Research",
  volume =       "2",
  pages =        "159--225",
  year =         "1994",
}

@InProceedings{Burges92,
  author =       "C. Burges and O. Matan and Y. {LeCun} and J. Denker and
                 L. Jackel and C. Stenard and C. Nohl and J. Ben",
  booktitle =    ijcnn,
  title =        "Shortest Path Segmentation: {A} Method for Training a
                 Neural Network to Recognize character Strings",
  volume =       "3",
  address =      "Baltimore",
  pages =        "165--172",
  year =         "1992",
}

@Article{Burges93,
  author =       "C. J. C. Burges and J. I. Ben and J. S. Denker and Y.
                 {LeCun} and C. R. Nohl",
  title =        "Off Line Recognition of Handwritten Postal Words Using
                 Neural Networks",
  journal =      "International Journal of Pattern Recognition and
                 Artificial Intelligence",
  volume =       "7",
  number =       "4",
  pages =        "689",
  year =         "1994",
}

@Article{burges98,
  author =       "C. J. C. Burges",
  title =        "A Tutorial on {Support} {Vector} {Machines} for
                 Pattern Recognition",
  journal =      "Data Mining and Knowledge Discovery",
  volume =       "2",
  number =       "2",
  pages =        "1--47",
  year =         "1998",
}

@InCollection{Burges99Geometry,
  author =       "C. J. C. Burges",
  editor =       "B. {Sch\"olkopf} and C. J. C. Burges and A. J. Smola",
  booktitle =    "Advances in Kernel Methods --- Support Vector
                 Learning",
  title =        "Geometry and invariance in kernel based methods",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  pages =        "89--116",
  year =         "1999",
}

@Article{Burr83,
  author =       "D. J. Burr",
  title =        "Designing a handwriting reader",
  journal =      ieeetpami,
  volume =       "5",
  number =       "5",
  pages =        "554--559",
  month =        sep,
  year =         "1983",
}

@InProceedings{Burr88,
  author =       "D. J. Burr",
  booktitle =    icnn,
  title =        "An Improved Elastic Net Method for the Travelling
                 Salesman Problem",
  volume =       "1",
  publisher =    "IEEE, New York",
  address =      "San Diego 1988",
  pages =        "69--76",
  year =         "1988",
}

@Article{Burrows94,
  author =       "J. H. Burrows and J. Peck",
  title =        "On-Line Condition Monitoring of Rotating Equipment
                 Using Neural Networks",
  journal =      "ISA Transactions",
  volume =       "33",
  pages =        "159--164",
  year =         "1994",
}

@InProceedings{Burrows95,
  author =       "J. H. Burrows and R. Doucet",
  booktitle =    "Proceedings of COMADEM'95",
  title =        "Machine Condition Monitoring Using Artificial Neural
                 Networks to Process Vibration Data Obtained from
                 Maintenance Monitoring Equipment",
  address =      "Kingston, Ontario, Canada",
  year =         "1995",
}

@Article{Byrne87,
  author =       "J. H. Byrne",
  title =        "Cellular analysis of associative learning",
  journal =      "Physiological Review",
  volume =       "67",
  pages =        "329--439",
  year =         "1987",
}

@InCollection{Byrne89,
  author =       "J. H. Byrne and K. J. Gingrich and D. A. Baxter",
  editor =       "Hawkins R. D. and Bower G. H.",
  booktitle =    "Computational Models of Learning in Simple Neural
                 Systems",
  title =        "Computational capabilities of single neurons:
                 relationship to simple forms of associative and
                 nonassociative learning in {\it Aplysia}",
  publisher =    "Academic Press",
  pages =        "31--63",
  year =         "1989",
}

@InProceedings{Cacciatore-nips94,
  author =       "T. W. Cacciatore and Steven J. Nowlan",
  editor =       NIPS6ed,
  booktitle =    NIPS6,
  title =        "Mixtures of Controllers for Jump Linear and Non-linear
                 Plants",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo, CA",
  year =         "1994",
}

@Article{Cai94,
  author =       "J. Cai",
  title =        "A Markov model of unconditional variance in {ARCH}",
  journal =      "Journal of Business and Economic Statistics",
  year =         "1994",
}

@inproceedings{Cai+al-2007,
    author = {Cai, Deng   and He, Xiaofei   and Han, Jiawei  },
    booktitle = ICCV07,
    pages = {1--7},
    title = {Semi-supervised Discriminant Analysis},
    year = {2007}
}

@Article{Caianiello61,
  author =       "E. R. Caianiello",
  title =        "Outline of a Theory of Thought and Thinking Machines",
  journal =      jtb,
  volume =       "1",
  pages =        "204--235",
  year =         "1961",
}

@article{Campbell+Kulikowski-1966,
    author = {F. W. Campbell and J. J. Kulikowski},
    title = {Orientational selectivity of the human visual system},
    journal = {Journal of Physiology},
    year = 1966,
    pages = "437--445",
    address = "London"
}

@article{Campbell+al-1969,
    title = {The Spatial Selectivity of the Visual Cells of the Cat},
    author = {F. W. Campbell and G. F. Cooper and Enroth C. Cugell},
    journal = {Journal of Physiology},
    address = "London",
    pages = {223--235},
    volume = {203},
    year = {1969},
    biburl = {http://www.bibsonomy.org/bibtex/2cfcc4bc8437b72761251fb2b9e7eb106/schaul},
    description = {idsia},
}

@InBook{CandelaJ2006,
  author =       "J. Quiñonero Candela and C. E. Rasmussen and F. Sinz
                 and O. Bousquet and B. Schölkopf",
  booktitle =    "Machine learning challenges: Evaluating predictive
                 uncertainty, visual object classification, and
                 recognising textual entailment",
  title =        "Evaluating Predictive Uncertainty Challenge",
  publisher =    "Springer",
  address =      "Heidelberg, Germany",
  pages =        "1--27",
  month =        apr,
  year =         "2006",
  series =       "Lecture Notes in Computer Science: 3944",
  URL =          "http://www.springerlink.com/(yxluatzjo3gnpl45323wjs45)/app/home/contribution.asp?referrer=parent&amp;amp;amp;amp;amp;amp;amp;backto=issue,1,25;journal,2,3638;linkingpublicationresults,1:105633,1",
  abstract =     "This Chapter presents the PASCAL1 Evaluating
                 Predictive Uncertainty Challenge, introduces the
                 contributed Chapters by the participants who obtained
                 outstanding results, and provides a discussion with
                 some lessons to be learnt. The Challenge was set up to
                 evaluate the ability of Machine Learning algorithms to
                 provide good Èprobabilistic predictionsÉ, rather than
                 just the usual Èpoint predictionsÉ with no measure of
                 uncertainty, in regression and classification problems.
                 Parti-cipants had to compete on a number of regression
                 and classification tasks, and were evaluated by both
                 traditional losses that only take into account point
                 predictions and losses we proposed that evaluate the
                 quality of the probabilistic predictions.",
  OPTeditor =    "Quiñonero Candela, J., I. Dagan, B. Magnini, F. DAlché
                 Buc",
}

@article{candeswakin08,
author = "Candes, E. and Wakin, M.",
title = "An introduction to compressive sampling",
journal = "IEEE Signal Processing Magazine",
volume = 21,
year = 2008,
}

@article{Candes+Tao-2005,
 author = {E.J. Candes and T. Tao},
 title = {Decoding by linear programming},
 journal = {{IEEE} Transactions on Information Theory},
 volume = 51,
 number = 12,
 pages = {4203--4215},
 year = 2005,
}

@Article{Canning88,
  author =       "A. Canning and E. Gardner",
  title =        "Partially Connected Models of Neural Networks",
  journal =      jpa,
  volume =       "21",
  pages =        "3275--3284",
  year =         "1988",
}

@article{carandini:1994,
    author = {Matteo Carandini and David J. Heeger},
    title = {Summation and Division by Neurons in Primate Visual Cortex},
    journal = {Science},
    volume={264},
    number={5163},
    month = {May},
    year = {1994},
    pages = {1333-1336},
}

@inproceedings{Cardie-1993,
    author = "Claire Cardie",
    title = "Using Decision Trees to Improve Case--Based Learning",
    booktitle = "Proceedings of the Tenth International Conference on Machine Learning",
    publisher = "Morgan Kaufmann",
    pages = "25--32",
    year = "1993",
    url = "citeseer.ist.psu.edu/cardie93using.html"
}

@Article{Carpenter87a,
  author =       "G. A. Carpenter and S. Grossberg",
  title =        "A Massively Parallel Architecture for a
                 Self-Organizing Neural Pattern Recognition Machine",
  journal =      cvgip,
  volume =       "37",
  pages =        "54--115",
  year =         "1987",
}

@Article{Carpenter87b,
  author =       "G. A. Carpenter and S. Grossberg",
  title =        "{ART2}: Self-Organization of Stable Category
                 Recognition Codes for Analog Input Patterns",
  journal =      applopt,
  volume =       "26",
  pages =        "4919--4930",
  year =         "1987",
}

@Article{Carpenter88,
  author =       "G. A. Carpenter and S. Grossberg",
  title =        "The {ART} of Adaptive Pattern Recognition by a
                 Self-Organizing Neural Network",
  journal =      computer,
  pages =        "77--88",
  month =        mar,
  year =         "1988",
}

@InProceedings{Carrasco94,
  author =       "R. C. Carrasco and J. Oncina",
  booktitle =    "Grammatical Inference and Applications Proc. of the
                 2nd International Colloquium on Grammatical Inference
                 ICGI94",
  title =        "Learning regular grammars by means of a state merging
                 method",
  publisher =    "Lecture Notes in Artificial Intelligence 862",
  address =      "Alicante (Spain)",
  month =        sep,
  year =         "1994",
}

@Article{Carter94,
  author =       "C. K. Carter and R. Kohn",
  title =        "On Gibbs sampling for state space models",
  journal =      "Biometrika",
  volume =       "81",
  pages =        "541--553",
  year =         "1994",
}

@InProceedings{Caruana-2001,
  author =       "Rich Caruana",
  booktitle =    aistats01,
  title =        "A Non-Parametric {EM}-Style Algorithm for Imputing
                 Missing Values",
  publisher =    "Society for Artificial Intelligence and Statistics",
  year =         "2001",
}

@InProceedings{caruana06:empirical,
  author =       "R. Caruana and A. Niculescu-Mizil",
  booktitle =    ICML06,
  editor =       ICML06ed,
  publisher =    ICML06publ,
  title =        "An Empirical Comparison of Supervised Learning
                 Algorithms",
  year =         "2006",
}

@InProceedings{caruana93a,
  author =       "Rich Caruana",
  booktitle =    "Proceedings of the 1993 Connectionist Models Summer
                 School",
  title =        "Multitask Connectionist Learning",
  pages =        "372--379",
  year =         "1993",
}

@InProceedings{caruana93a-small,
  author =       "Rich Caruana",
  booktitle =    "Proceedings of the 1993 Connectionist Models Summer
                 School",
  title =        "Multitask Connectionist Learning",
  pages =        "372--379",
  year =         "1993",
}
  %url =          "http://citeseer.ist.psu.edu/32984.html",

@InProceedings{caruana95,
  author =       "Rich Caruana",
  editor =       NIPS7ed,
  booktitle =    NIPS7,
  title =        "Learning Many Related Tasks at the Same Time With
                 Backpropagation",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  pages =        "657--664",
  year =         "1995",
}

@InProceedings{caruana96,
  author =       "Rich Caruana and Shumeet Baluja and Tom Mitchell",
  editor =       NIPS8ed,
  booktitle =    NIPS8,
  title =        "Using the Future to ``Sort Out'' the Present: Rankprop
                 and Multitask Learning for Medical Risk Evaluation",
  publisher =    "",
  address =      "",
  pages =        "",
  year =         "1996",
}

@InProceedings{caruana96c,
  author =       "Rich Caruana",
  booktitle =    "International Conference on Machine Learning",
  title =        "Algorithms and Applications for Multitask Learning",
  pages =        "87--95",
  year =         "1996",
}

@Article{caruana97a,
  author =       "Rich Caruana",
  title =        "Multitask Learning",
  journal =      "Machine Learning",
  volume =       "28",
  number =       "1",
  publisher =    "Kluwer Academic Publishers",
  address =      "Hingham, MA, USA",
  pages =        "41--75",
  year =         "1997",
}

@Article{Casdagli89,
  author =       "M. Casdagli",
  title =        "Nonlinear Prediction of Chaotic Time Series",
  journal =      physicaD,
  volume =       "35",
  pages =        "335--356",
  year =         "1989",
}

@book{Casella+Berger-2001,
 author = {George Casella and Roger Berger},
 title = {Statistical Inference},
 publisher = {Duxbury Press},
 year = 2001,
}


@Article{Cashman+Pouliot90,
  author =       "N. R. Cashman and Y. Pouliot",
  title =        "{EBV} {Ig}-like domains",
  journal =      "Nature",
  volume =       "343",
  pages =        "319",
  year =         "1990",
}

@ARTICLE{CataltepeZ1999,
    author = {Zehra Cataltepe and Yaser S. Abu-mostafa and Malik Magdon-ismail},
    title = {No free lunch for early stopping},
    journal = {Neural Computation},
    year = {1999},
    volume = {11},
    pages = {995--1009}
}

@InProceedings{Cater87,
  author =       "J. P. Cater",
  editor =       "M. Caudill and C. Butler",
  booktitle =    icnn,
  title =        "Successfully Using Peak Learning Rates of 10 (and
                 Greater) in Back-Propagation Networks with the
                 Heuristic Learning Algorithm",
  volume =       "2",
  publisher =    "IEEE, New York",
  address =      "San Diego 1987",
  pages =        "645--651",
  year =         "1987",
}

@Book{Caudill89,
  author =       "M. Caudill",
  title =        "Neural Networks Primer",
  publisher =    "Miller Freeman",
  address =      "San Francisco",
  year =         "1989",
}

@Manual{CC01a,
  author =       "Chih-Chung Chang and Chih-Jen Lin",
  title =        "{LIBSVM}: a library for support vector machines",
  year =         "2001",
  note =         "Software available at
                 \verb+http://www.csie.ntu.edu.tw/~cjlin/libsvm+",
}

@Article{cemgil+kappen+barber-2006,
  author =       "A. T. Cemgil and H. J. Kappen and D. Barber",
  title =        "A Generative Model for Music Transcription",
  journal =      "IEEE Transactions on Audio, Speech and Language
                 Processing",
  volume =       "14",
  number =       "2",
  pages =        "679--694",
  year =         "2006",
}

@inproceedings{Cevikalp+al-2008,
    title = {Semi-Supervised Dimensionality Reduction Using Pairwise Equivalence Constraints},
    author = {Hakan Cevikalp and Jakob J. Verbeek and Frédéric Jurie and Alexander Kläser},
    booktitle = {VISAPP},
    editor = {Alpesh Ranchordas and Helder Araújo},
    pages = {489-496},
    publisher = {INSTICC - Institute for Systems and Technologies of Information, Control and Communication},
    url = {http://dblp.uni-trier.de/db/conf/visapp/visapp2008-1.html#CevikalpVJK08},
    year = {2008},
    biburl = {http://www.bibsonomy.org/bibtex/21afc498c02543e97ff5bd4f6b107e16e/dblp},
    description = {dblp},
    isbn = {978-989-8111-21-0},
    date = {2008-04-07},
    keywords = {dblp }
}

@InProceedings{CGY96,
    author =       "Ingemar J. Cox and Joumana Ghosn and Peter N.
                 Yianilos",
  booktitle =    cvpr96,
  title =        "Feature-Based Face Recognition Using
                 Mixture-Distance",
  pages =        "209--216",
  year =         "1996",
}

@Article{CHAID-BVS-91,
  author =       "D. Biggs and B. Ville and E. Suen",
  title =        "A method of choosing multiway partitions for
                 classification and decision trees",
  journal =      "Journal of Applied Statistics",
  volume =       "18",
  number =       "1",
  pages =        "49--62",
  year =         "1991",
}

@InBook{CHAID-HK-82,
  author =       "D. M. Hawkins and G. V. Kass",
  booktitle =    "Topics in Applied Multivariate Analysis",
  title =        "Automatic Interaction Detection",
  publisher =    "Cambridge, Cambridge University Press",
  pages =        "269--302",
  year =         "1982",
}

@Article{CHAID-original-80,
  author =       "G. V. Kass",
  title =        "An Exploratory Technique for Investigating Large
                 Quantities of Categorical Data",
  journal =      "Applied Statistics",
  volume =       "29",
  number =       "2",
  pages =        "119--127",
  year =         "1980",
}

@InProceedings{Chapados2002,
  author =       "N. Chapados and Y. Bengio and P. Vincent and J. Ghosn
                 and C. Dugas and I. Takeuchi and L. Meng",
  editor =       NIPS14ed,
  booktitle =    NIPS14,
  title =        "Estimating Car Insurance Premia: a Case Study in
                 High-Dimensional Data Inference",
  publisher =    "{MIT} Press",
  address =      "Cambridge, MA",
  pages =        "1369--1376",
  year =         "2002",
}

@InProceedings{Chapados2002-short,
  author =       "N. Chapados and Y. Bengio and P. Vincent and J. Ghosn
                 and C. Dugas and I. Takeuchi and L. Meng",
  booktitle =    NIPS14,
  title =        "Estimating Car Insurance Premia: a Case Study in
                 High-Dimensional Data Inference",
  publisher =    "{MIT} Press",
  year =         "2002",
}

@InProceedings{Chapelle+al-2003,
  author =       "O. Chapelle and J. Weston and B. Sch{\"o}lkopf",
  editor =       NIPS15ed,
  booktitle =    NIPS15,
  title =        "Cluster kernels for semi-supervised learning",
  publisher =    "{MIT} Press",
  address =      "Cambridge, MA",
  pages =         {585--592},
  year =         "2003",
}

@InProceedings{Chapelle-nips2003,
  author =       "O. Chapelle and B. Sch{\"o}lkopf and J. Weston",
  editor =       NIPS15ed,
  booktitle =    NIPS15,
  title =        "Semi-supervised learning through principal directions
                 estimation",
  publisher =    "{MIT} Press",
  year =         "2003",
}

@InProceedings{Chapelle2001,
  author =       "Olivier Chapelle and Jason Weston and L\'eon Bottou
                 and Vladimir Vapnik",
  editor =       NIPS13ed,
  booktitle =    NIPS13,
  title =        "Vicinal Risk Minimization",
  pages =        "416--422",
  year =         "2001",
}

@InProceedings{chapelle2001iin,
  author =       "O. Chapelle and B. Scholkopf",
  title =        "{Incorporating invariances in nonlinear support vector
                 machines}",
  editor =       NIPS14ed,
  booktitle =    NIPS14,
  volume =       "14",
  year =         "2001",
}

@Article{Chapelle99,
  author =       "O. Chapelle and P. Haffner and V. Vapnik",
  title =        "{SVM}s for Histogram-Based Image Classification",
  journal =      "IEEE Transactions on Neural Networks",
  year =         "1999",
  note =         "accepted, special issue on Support Vectors",
}

@Article{ChapelleVapnikBengio2001,
  author =       "O. Chapelle and V. Vapnik and Y. Bengio",
  title =        "Model Selection for Small-Sample Regression",
  journal =      "Machine Learning Journal",
  volume =       "48",
  number =       "1",
  pages =        "9--23",
  year =         "2002",
}

@inproceedings{Willski-2002,
  author =       "A.S. Willsky",
  title =        "Multiresolution {Markov} models for signal and image processing",
  booktitle =    "Proceedings of the IEEE",
  volume =       "90",
  number =       "8",
  pages =        "1396--1458",
  year =         "2002",
}

@Article{Felzenszwalb+Huttenlocher-2004,
  author =       "Pedro F. Felzenszwalb and Daniel P. Huttenlocher",
  title =        "Efficient Graph-Based Image Segmentation",
  journal =      "Intl. Journal of Computer Vision",
  volume =       "59",
  number =       "2",
  pages =        "167-181",
  year =         "2004",
}

@inproceedings{Lombaert-2005,
  author =       "Herve Lombaert and Yiyong Sun and Leo Grady and Chenyang Xu",
  title =        "A Multilevel Banded Graph Cuts Method for Fast Image Segmentation",
  booktitle =    ICCV05,
  volume =       "1",
  pages =        "259-265",
  year =         "2005",
}

@Article{Boykov+Kolmogorov-2004,
  author =       "Y. Boykov and V. Kolmogorov",
  title =        "An experimental comparison of min-cut/max-flow algorithms for energy minimization in vision",
  journal =      ieeetpami,
  volume =       "26",
  number =       "9",
  pages =        "1124-1137",
  year =         "2004",
}

@inproceedings{chapelleetal06,
author = "Chapelle, O. and Chi, M. and Zien, A.",
title = "A continuation method for semi-supervised {SVMs}",
booktitle = ICML06,
editor =    ICML06ed,
publisher = ICML06publ,
year = 2006,
}

@inproceedings{ChapelleO2005,
   author = {Olivier Chapelle and Alexander Zien},
   title = {Semi-Supervised Classification by Low Density Separation},
   year = {2005},
   pages = {57-64},
   month = {01},
   journal = {Proceedings of the Tenth International Workshop on Artificial Intelligence and Statistics (AISTATS 2005)},
   editor = {Cowell, R. , Z. Ghahramani},
   booktitle = {Tenth International Workshop on Artificial Intelligence and Statistics},
   location = {Barbados},
}
   %URL = {http://www.gatsby.ucl.ac.uk/aistats/aistats2005_eproc.pdf}

@book{Chapelle-2006,
 author = {Olivier Chapelle and Bernhard Sch{\"o}lkopf and Alexander Zien},
 title =    "Semi-Supervised Learning",
 publisher =    "{MIT} Press",
 year =         "2006",
}

@TechReport{Charniak99,
  author =       "Eugene Charniak",
  title =        "A Maximum-Entropy-Inspired Parser",
  number =       "CS-99-12",
  institution =  "Brown University",
  year =         "1999",
  URL =          "citeseer.nj.nec.com/charniak99maximumentropyinspired.html",
}

@misc{Chatpatanasiri-2008,
    author = {Ratthachat Chatpatanasiri},
    title = {Spectral Methods for Linear and Non-Linear Semi-Supervised Dimensionality Reduction},
    url = {http://www.citebase.org/abstract?id=oai:arXiv.org:0804.0924},
    year = {2008},
    note = {Submitted for publication},
}

@InProceedings{Chauvin89,
  author =       "Y. Chauvin",
  editor =       NIPS1ed,
  booktitle =    NIPS1,
  title =        "A Back-Propagation Algorithm with Optimal Use of
                 Hidden Units",
  publisher =    "Morgan Kaufmann, San Mateo",
  address =      "Denver, CO",
  pages =        "519--526",
  year =         "1989",
}

@InProceedings{Chauvin90,
  author =       "Y. Chauvin",
  editor =       NIPS2ed,
  booktitle =    NIPS2,
  title =        "Dynamic behavior of constrained back-propagation
                 networks",
  publisher =    "Morgan Kaufmann",
  address =      "Denver, CO",
  pages =        "642--649",
  year =         "1990",
}

@InProceedings{Cheeseman88,
  author =       "P. Cheeseman and J. Kelly and M. Self and J. Stutz and
                 W. Taylor and D. Freeman",
  booktitle =    "Proceedings of the Fifth International Conference on
                 Machine Learning",
  title =        "{AutoClass}: {A} {Bayesian} Classification System",
  address =      "The University of Michigan, Ann Arbor",
  month =        jun,
  year =         "1988",
}

@Article{Chelba-Jelinek-2000,
  author =       "Ciprian Chelba and Frederick Jelinek",
  title =        "Structured Language Modeling",
  journal =      "Computer, Speech and Language",
  volume =       "14",
  number =       "4",
  pages =        "282--332",
  year =         "2000",
}

@Article{Chen+Goodman99,
  author =       "Stanley F. Chen and Joshua T. Goodman.",
  title =        "An Empirical Study of Smoothing Techniques for
                 Language Modeling",
  journal =      "Computer, Speech and Language",
  volume =       "13",
  number =       "4",
  pages =        "359--393",
  year =         "1999",
}

@Article{Chen+Murray2003,
  author =       "Hsin Chen and Alan F. Murray",
  title =        "A Continuous Restricted {Boltzmann} Machine with an
                 Implementable Training Algorithm",
  journal =      "IEE Proceedings of Vision, Image and Signal
                 Processing",
  volume =       "150",
  number =       "3",
  pages =        "153--158",
  year =         "2003",
}

@PhdThesis{chen95basispursuit,
  author =       "S. Chen",
  title =        "Basis Pursuit",
  school =       "Department of Statistics, Stanford University",
  year =         "1995",
}

@TechReport{Chen98,
  author =       "Stanley F. Chen and Joshua T. Goodman.",
  title =        "An Empirical Study of Smoothing Techniques for
                 Language Modeling",
  number =       "TR-10-98",
  institution =  "Computer Science Group, Harvard University",
  year =         "1998",
}

@Article{ChenS2000,
  author =       "Stanley F. Chen and Ronald Rosenfeld",
  title =        "A Survey of Smoothing Techniques fo {ME} Models",
  journal =      "IEEE Transactions on Speech and Audio Processing",
  volume =       "8",
  number =       "1",
  month =        jan,
  year =         "2000",
}

@techreport{Chen+Kotani-2005,
  author =      "Chen, Fan and Kotani, Kazunori",
  title =       "Facial Expression Recognition by Supervised {ICA} with Selective Prior",
  ISSN =        "09135685",
  institution = "The Institute of Electronics, Information and Communication Engineers",
  year =        "2005",
  number =      "462",
  pages =       "27-32",
  URL =         "http://ci.nii.ac.jp/naid/110004064718/en/",
}

@Article{ChenX1989,
  author={Chen, X. R. and Krishnaiah, P. R. and Liang, W. W.},
  title={Estimation of multivariate binary density using orthogonal functions},
  journal={Journal of Multivariate Analysis},
  year=1989,
  volume={31},
  number={2},
  pages={178-186},
  month={November},
}

@InProceedings{Chigier88,
  author =       "B. Chigier and R. A. Brennan",
  booktitle =    icassp,
  title =        "Broad Class Network Generation Using a Combination of
                 Rules and Statistics for Speaker Independent Continuous
                 Speech",
  address =      "New York, NY",
  pages =        "449--452",
  year =         "1988",
}

@InCollection{Chipman-NIPS2006,
  author =       "H. A. Chipman and E. I. George and R. E. McCulloch",
  editor =       NIPS19ed,
  booktitle =    NIPS19,
  title =        "Bayesian Ensemble Learning",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  year =         "2007",
}

@article{Chipman-2008,
  author =       "H. A. Chipman and E. I. George and R. E. McCulloch",
  title =        "Bayesian Ensemble Learning",
  journal = "Annals of Applied Statistics",
  year =         "2008",
  editors =      "under revision",
}

@InProceedings{ChopraS2005,
  author =       "Sumit Chopra and Raia Hadsell and Yann {LeCun}",
  booktitle =    cvpr05,
  title =        "Learning a Similarity Metric Discriminatively, with
                 Application to Face Verification",
  publisher =    "IEEE Press",
  year =         "2005",
  original =     "orig/chopra-05.ps.gz",
}

@InProceedings{Choueka-1998,
  author =       "Y. Choueka",
  booktitle =    "RIAO 88, User-oriented Content-based Text and Image
                 Handling",
  title =        "Looking for needles in a haystack",
  volume =       "1",
  pages =        "609--623",
  year =         "1988",
}

@Article{Chow62,
  author =       "C. K. Chow",
  title =        "A recognition method using neighbor dependence",
  journal =      "IRE Trans. Elec. Comp.",
  volume =       "EC-11",
  pages =        "683--690",
  month =        oct,
  year =         "1962",
}

@InProceedings{Chrisman92AAAI,
  author =       "Lonnie Chrisman",
  booktitle =    AAAI-92,
  title =        "Reinforcement Learning with Perceptual Aliasing: The
                 Perceptual Distinctions Approach",
  pages =        "183--188",
  year =         "1992",
}

@InProceedings{Chung+al-1998,
  author =       "Yi-Ming Chung and William M. Pottenger and Bruce R.
                 Schatz",
  booktitle =    "DL '98: Proceedings of the third ACM conference on
                 Digital libraries",
  title =        "Automatic subject indexing using an associative neural
                 network",
  publisher =    "ACM Press",
  address =      "New York, NY, USA",
  pages =        "59--68",
  year =         "1998",
  ISBN =         "0-89791-965-3",
  location =     "Pittsburgh, Pennsylvania, United States",
}

@InProceedings{Chung-97,
  author =       "F. Chung",
  booktitle =    "{CBMS} Regional Conference Series",
  title =        "Spectral graph theory",
  volume =       "92",
  publisher =    "American Mathematical Society",
  year =         "1997",
}

@Article{Churchill89,
  author =       "G. A. Churchill",
  title =        "A stochastic model for heterogeneous {DNA} sequences",
  journal =      "Bull. Mathematical Biology",
  volume =       "51",
  pages =        "79--94",
  year =         "1989",
}

@Book{Chvatal83,
  author =       "V. Chv\'atal",
  title =        "Linear Programming",
  publisher =    "W. H. Freeman",
  address =      "",
  year =         "1983",
}

@Article{Cleeremans89,
  author =       "A. Cleeremans and D. Servan-Schreiber and J. L.
                 McClelland",
  title =        "Finite State Automata and Simple Recurrent Networks",
  journal =      nc,
  volume =       "1",
  pages =        "372--381",
  year =         "1989",
}

@InCollection{Clifford-1990,
  author = {Peter Clifford},
  title = {Markov random Fields in statistics}, 
  editor = {Geoffrey Grimmett and Dominic Welsh}, 
  booktitle = {Disorder in Physical Systems: A Volume in Honour
of John M. Hammersley}, 
  pages = {19--32}, 
  publisher = {Oxford University Press}, 
  year = 1990,
}

@Book{CLM,
  author =       "J. Y. Campbell and A. W. Lo and A. C. MacKinlay",
  title =        "The Econometrics of Financial Markets",
  publisher =    "Princeton University Press",
  address =      "Princeton",
  year =         "1997",
}

@Book{CND04,
  author =       "{Congr\'egation de Notre-Dame}",
  title =        "La cuisine raisonnée",
  publisher =    "Fides",
  year =         "2004",
  ISBN =         "2-7621-2083-7",
}

@InProceedings{Cloutier96,
  author =       "J. Cloutier and E. Cosatto and S. Pigeon and F. R.
                 Boyer and P. Y. Simard",
  booktitle =    "Fifth International Conference on Microelectronics for
                 Neural Networks and Fuzzy Systems",
  title =        "{VIP}: and {FPGA}-based processor for image processing
                 and neural networks",
  year =         "1996",
  note =         "submitted",
}

@Manual{CMFortran,
  author =       "",
  key =          "TMC",
  title =        "{CM} Fortran. Programming Guide",
  organization = "Thinking Machine Corporation",
  address =      "Cambridge, MA",
  edition =      "1.1",
  month =        jan,
  year =         "1991",
}

@Article{Cohen83,
  author =       "M. A. Cohen and S. Grossberg",
  title =        "Absolute Stability of Global Pattern Formation and
                 Parallel Memory Storage by Competitive Neural
                 Networks",
  journal =      ieeesmc,
  volume =       "13",
  pages =        "815--826",
  year =         "1983",
}

@Article{Cohen86,
  author =       "M. S. Cohen",
  title =        "Design of a New Medium for Volume Holographic
                 Information Processing",
  journal =      applopt,
  volume =       "25",
  pages =        "2228--2294",
  year =         "1986",
}

@Article{Cohen89,
  author =       "J. R. Cohen",
  title =        "Application of an auditory model to speech
                 recognition",
  journal =      "Journal of the Acoustical Society of America",
  volume =       "85",
  number =       "6",
  pages =        "2623--2629",
  year =         "1989",
}

@PhdThesis{Cohn-PhD,
  author =       "D. Cohn",
  title =        "Separating Formal Bounds from Practical Performance in
                 Learning Systems",
  school =       "University of Washington",
  year =         "1992",
}

@InProceedings{Cohn95,
  author =       "David Cohn and Zoubin Ghahramani and Michael I.
                 Jordan",
  editor =       NIPS7ed,
  booktitle =    NIPS7,
  title =        "Active learning with statistical models",
  publisher =    "Cambridge MA: MIT Press",
  year =         "1995",
  pages = {705--712}
}

@InProceedings{Cohn95-small,
  author =       "David Cohn and Zoubin Ghahramani and Michael I.
                 Jordan",
  editor =       NIPS7ed,
  booktitle =    "Advances in NIPS 7",
  title =        "Active learning with statistical models",
  publisher =    "Cambridge MA: MIT Press",
  year =         "1995",
}

@InProceedings{Cohn95-short,
  author =       "D. Cohn and Z. Ghahramani and M.I.
                 Jordan",
  booktitle =    "Adv. Neural Inf. Proc. Sys. 7",
  title =        "Active learning with statistical models",
  year =         "1995",
  pages = {705--712}
}

@InProceedings{Cole+Hou88,
  author =       "R. A. Cole and L. Hou",
  booktitle =    icassp,
  title =        "Segmentation and Broad Classification of Continuous
                 Speech",
  address =      "New York, NY",
  pages =        "453--452",
  year =         "1988",
}

@Book{Cole96,
  author =       "R. A. Cole and J. Mariani and H. Uszkoriet and A.
                 Zaenen and V. Zue",
  title =        "Survey of the State of the Art in Human Language
                 Technology",
  publisher =    "Cambridge University Press",
  address =      "http://www.cse.ogi.edu/CSLU/HLTsurvey/HLTsurvey.html",
  year =         "1996",
}

@TechReport{Coleman+Wu-1994,
  author =       "Thomas F. Coleman and Zhijun Wu",
  title =        "Parallel continuation-based global optimization for
                 molecular conformation and protein folding",
  institution =  "Cornell University, Dept. of Computer Science",
  year =         "1994",
}

@TechReport{Coleman+Wu-1994-short,
  author =       "T.F. Coleman and Z. Wu",
  title =        "Parallel continuation-based global optimization for
                 molecular conformation and protein folding",
  institution =  "Cornell University, Dept. of Computer Science",
  year =         "1994",
}

@TechReport{Collins89,
  author =       "S. {Collins, E. Ghosh} and C. Scofield",
  title =        "An application of a multiple neural network learning
                 system to emulation of mortgage underwriting
                 judgements",
  institution =  "Nestor Inc.",
  address =      "Providence, RI",
  year =         "1989",
}

@InProceedings{Collins96,
  author =       "M. Collins",
  booktitle =    "34th Annual Meeting of the {ACL}",
  title =        "A new statistical parser based on bigram lexical
                 dependencies",
  pages =        "184--191",
  year =         "1996",
}

@InProceedings{Collins97,
  author =       "M. Collins",
  booktitle =    "35th Annual Meeting of the {ACL}",
  title =        "Three generative, lexicalized models for statistical
                 parsing",
  address =      "Madrid, Spain",
  pages =        "16--23",
  year =         "1997",
}

@PhdThesis{Collins99,
  author =       "M. Collins",
  title =        "Head-driven statistical models for natural language
                 parsing",
  school =       "University of Pennsylvania",
  year =         "1999",
}

@InProceedings{Collobert-2006,
  author =       "R. Collobert and F. Sinz and J. Weston and L. Bottou",
  booktitle =    "Proceedings of the 23rd International Conference on
                 Machine Learning",
  title =        "Trading Convexity for Scalability",
  pages =        "",
  year =         "2006",
}

@PhdThesis{Collobert04,
  author =       "R. Collobert",
  title =        "Large Scale Machine Learning",
  school =       "Universit\'e de Paris VI, LIP6",
  year =         "2004",
}

@Article{Collobert2002,
  author =       "R. Collobert and S. Bengio and Y. Bengio",
  title =        "Parallel Mixture of {SVM}s for Very Large Scale
                 Problems",
  journal =      "Neural Computation",
  volume =       "14",
  number =       "5",
  pages =        "1105--1114",
  year =         "2002",
}

@InProceedings{Collobert2004,
  author =       "Ronan Collobert and Samy Bengio",
  booktitle =    ICML04,
  editor =       ICML04ed,
  publisher =    ICML04publ,
  title =        "Links between perceptrons, {MLP}s and {SVM}s",
  address =      "New York, NY, USA",
  year =         "2004",
  location =     "Banff, Alberta, Canada",
  isbn =         "1-58113-828-5",
  pages =        "23",
  location =     "Banff, Alberta, Canada",
  doi =          "http://doi.acm.org/10.1145/1015330.1015415",
}

@InProceedings{CollobertR2008,
  author =       "Ronan Collobert and Jason Weston",
  booktitle =    ICML08,
  editor =       ICML08ed,
  publisher =    ICML08publ,
  title =        "A Unified Architecture for Natural Language
                 Processing: Deep Neural Networks with Multitask
                 Learning",
  year =         "2008",
  pages =       "160-167",
}
  %url =          "http://www.kyb.tuebingen.mpg.de/bs/people/weston/papers/unified\-nlp.pdf",

@InProceedings{CollobertR2008-small,
  author =       "R. Collobert and J. Weston",
  booktitle =    "ICML 2008",
  title =        "A Unified Architecture for Natural Language
                 Processing: Deep Neural Networks with Multitask
                 Learning",
  year =         "2008",
}

@InProceedings{CollobertR2008-short,
  author =       "R. Collobert and J. Weston",
  booktitle =    "Int. Conf. Mach. Learn. 2008",
  title =        "A Unified Architecture for Natural Language
                 Processing: Deep Neural Networks with Multitask
                 Learning",
  pages =       "160-167",
  year =         "2008",
}

@Article{Comon94,
  author =       "Pierre Comon",
  title =        "Independent component analysis - a new concept?",
  journal =      "Signal Processing",
  volume =       "36",
  pages =        "287--314",
  year =         "1994",
}

@InProceedings{ConfAI:Grove:linprog,
  author =       "Adam J. Grove and Dale Schuurmans",
  booktitle =    "Proceedings of the Fifteenth National Conference on
                 Artificial Intelligence",
  title =        "Boosting in the limit: Maximizing the margin of
                 learned ensembles",
  year =         "1998",
}

@InProceedings{ConfAI:Maclin:adaboost,
  author =       "Richard Maclin and David Opitz",
  booktitle =    "Proceedings of the Fourteenth National Conference on
                 Artificial Intelligenc",
  title =        "An empirical evaluation of Bagging and Boosting",
  pages =        "546--551",
  year =         "1997",
}

@InProceedings{ConfLT:Freund:gametheorie,
  author =       "Yoav Freund and Robert E. Schapire",
  booktitle =    "Proceedings of the Ninth Annual Conference on
                 Computational Learning Theory",
  title =        "Game theory, on-line prediction and Boosting",
  pages =        "325--332",
  year =         "1996",
}

@InProceedings{ConfML:Dietterich:adaboost+prun,
  author =       "D. Margineantu and Thomas G. Dietterich",
  booktitle =    "Machine Learning: Proceedings of Fourteenth
                 International Conference",
  title =        "Pruning Adaptive Boosting",
  publisher =    "ACM",
  pages =        "211--218",
  year =         "1997",
}

@InProceedings{ConfML:Freund:AdaBoostCompar,
  author =       "Yoav Freund and Robert E. Schapire",
  booktitle =    "Machine Learning: Proceedings of Thirteenth
                 International Conference",
  title =        "Experiments with a new Boosting algorithm",
  publisher =    "ACM",
  address =      "USA",
  pages =        "148--156",
  year =         "1996",
}

@InProceedings{ConfML:Freund:margins,
  author =       "Robert E. Schapire and Yoav Freund and Peter Bartlett
                 and Wee Sun Lee",
  booktitle =    "Machine Learning: Proceedings of Fourteenth
                 International Conference",
  title =        "Boosting the margin: {A} new explanation for the
                 effectiveness of voting methods",
  pages =        "322--330",
  year =         "1997",
}

@InProceedings{ConfML:Quinlan:AdaBoost-C45,
  author =       "J. Ross Quinlan",
  booktitle =    "Machine Learning: Proceedings of the fourteenth
                 International Conference",
  title =        "Bagging, Boosting and {C4.5}",
  pages =        "725--730",
  year =         "1996",
}

@InProceedings{ConfML:Schapire:outputcodes,
  author =       "Robert E. Schapire",
  booktitle =    "Machine Learning: Proceedings of the Fourteenth
                 International Conference",
  title =        "Using output codes to boost multiclass learning
                 problems",
  year =         "1997",
}

@Article{Coolen88,
  author =       "A. C. C. Coolen and C. C. A. M. Gielen",
  title =        "Delays in Neural Networks",
  journal =      eul,
  volume =       "7",
  pages =        "281--285",
  year =         "1988",
}

@Book{cooper+meyer-1960,
  author =       "Grosvenor Cooper And Leonard B. Meyer",
  title =        "{The Rhythmic Structure of Music}",
  publisher =    "The Univ. of Chicago Press",
  address =      "Chicago",
  year =         "1960",
  keywords =     "describe, music",
  origin =       "Kielian-Gilbert",
  own =          "IU Library",
}

@InCollection{Cooper73,
  author =       "L. N. Cooper",
  editor =       "B. Lundqvist and S. Lundqvist",
  booktitle =    "Collective Properties of Physical Systems",
  title =        "A Possible Organization of Animal Memory and
                 Learning",
  publisher =    "Academic Press",
  address =      "New York",
  pages =        "252--264",
  year =         "1973",
}

@InCollection{Cooper87,
  author =       "C. L. Scofield and D. L. Reilly and C. Elbaum and L.
                 N. Cooper",
  booktitle =    "Conference on Neural Information Processing Systems -
                 Natural and Synthetic",
  title =        "Pattern class degeneracy in an unrestricted storage
                 density memory",
  publisher =    "IEEE",
  year =         "1987",
}

@Article{Corana87,
  author =       "A. Corana and M. Marchesi and C. Martini and S.
                 Ridella",
  title =        "Minimizing Multimodal Functions of Continuous
                 Variables with the Simulated Annealing Algorithm",
  journal =      acmtms,
  volume =       "13",
  number =       "13",
  pages =        "262--280",
  month =        sep,
  year =         "1987",
  OPTnote =      "",
}

@Article{Corana87a,
  author =       "A. Corana and M. Marchesi and C. Martini and S.
                 Ridella",
  title =        "Minimizing Multimodal Functions of Continuous
                 Variables with the Simulated Annealing Algorithm",
  journal =      acmtms,
  volume =       "13",
  number =       "13",
  pages =        "262--280",
  month =        sep,
  year =         "1987",
}

@Article{Cortes04,
  author =       "C. Cortes and P. Haffner and M. Mohri",
  title =        "Rational Kernels: Theory and Algorithms",
  journal =      jmlr,
  volume =       "5",
  pages =        "1035--1062",
  year =         "2004",
  OPTnumber =    "",
}

@Article{Cortes87,
  author =       "C. Cortes and A. Krogh and J. A. Hertz",
  title =        "Hierarchical Associative Networks",
  journal =      jpa,
  volume =       "20",
  pages =        "4449--4455",
  year =         "1987",
}

@InProceedings{Cortes89,
  author =       "C. Cortes and J. A. Hertz",
  booktitle =    ijcnn,
  title =        "A Network System for Image Segmentation",
  volume =       "1",
  publisher =    "IEEE, New York",
  address =      "Washington 1989",
  pages =        "121--127",
  year =         "1989",
}

@Article{Cortes95,
  author =       "Corinna Cortes and Vladimir Vapnik",
  title =        "Support Vector Networks",
  journal =      "Machine Learning",
  volume =       "20",
  pages =        "273--297",
  year =         "1995",
}

@InProceedings{Cortesetal95a,
  author =       "C. Cortes and H. Drucker and D. Hoover and V. Vapnik",
  booktitle =    "Proc. 1st Intl. Conf. on Knowledge Discovery and Data
                 Mining",
  title =        "Capacity and Complexity Control in Predicting the
                 Spread Between Borrowing and Lending Interest Rates",
  address =      "Montreal (Canada)",
  pages =        "51--56",
  year =         "1995",
}

@InProceedings{Cortesetal95b,
  author =       "C. Cortes and L. D. Jackel and W. P. Chiang",
  booktitle =    "Proc. 1st Intl. Conf. on Knowledge Discovery and Data
                 Mining",
  title =        "Limits on Learning Machine Accuracy Imposed by Data
                 Quality",
  address =      "Montreal (Canada)",
  pages =        "57--62",
  year =         "1995",
}

@InProceedings{Cosi-92,
  author =       "P. Cosi and P. Frasconi and M. Gori and N. Griggio",
  booktitle =    "Proc. of the International Conference on Spoken
                 Language",
  title =        "Phonetic Recognition Experiments with Recurrent Neural
                 Networks",
  address =      "Banff (Canada)",
  pages =        "1335--1338",
  month =        oct,
  year =         "1992",
}

@InProceedings{Cosnard+al-1991,
  author =       "M. Cosnard and J. C. Mignot and H. Paugam-Moisy",
  booktitle =    "Proceedings of the Second International Specialist
                 Seminar on the Design and Application of Parallel
                 Digital Processors, 1991",
  title =        "Implementations of Multilayer Neural Networks on
                 Parallel Architectures",
  address =      "Lisbon",
  pages =        "43--47",
  month =        apr,
  year =         "1991",
}

@Article{Cosslett85,
  author =       "S. R. Cosslett and L-F. Lee",
  title =        "Serial correlation in discrete variable models",
  journal =      "Journal of Econometrics",
  volume =       "27",
  pages =        "79--97",
  year =         "1985",
}

@Article{Cottrell86,
  author =       "M. Cottrell and J. C. Fort",
  title =        "A Stochastic Model of Retinotopy: {A} Self Organizing
                 Process",
  journal =      biocyb,
  volume =       "53",
  pages =        "405--411",
  year =         "1986",
}

@InProceedings{Cottrell87,
  author =       "Garrison W. Cottrell and Paul Munro and David Zipser",
  booktitle =    "Ninth Annual Conference of the Cognitive Science
                 Society",
  title =        "Learning Internal Representations from Gray-Scale
                 Images: An Example of Extensional Programming",
  publisher =    "Lawrence Erlbaum, Hillsdale",
  address =      "Seattle 1987",
  pages =        "462--473",
  year =         "1987",
}

@Book{Courant51,
  author =       "A. Courant and D. Hilbert",
  title =        "Methods of Mathematical Physics",
  publisher =    "Wiley Interscience, New York",
  year =         "1951",
}

@Article{Cover65,
  author =       "T. M. Cover",
  title =        "Geometrical and Statistical Properties of Systems of
                 Linear Inequalities with Applications in Pattern
                 Recognition",
  journal =      ieeetec,
  volume =       "14",
  pages =        "326--334",
  year =         "1965",
}

@Article{CoverHart67,
  author =       "T. M. Cover and P. E. Hart",
  title =        "Nearest Neighbor Pattern Classification",
  journal =      "IEEE Transactions on Information Theory",
  volume =       "13",
  number =       "1",
  pages =        "21--27",
  year =         "1967",
}

@Article{Cowan88a,
  author =       "J. D. Cowan and D. H. Sharp",
  title =        "Neural Nets and Artificial Intelligence",
  journal =      daed,
  volume =       "117",
  pages =        "85--121",
  year =         "1988",
}

@Article{Cowan88b,
  author =       "J. D. Cowan and D. H. Sharp",
  title =        "Neural Nets",
  journal =      qrb,
  volume =       "21",
  pages =        "365--427",
  year =         "1988",
}

@InProceedings{Cox+Bridle89,
  author =       "S. Cox and J. S. Bridle",
  booktitle =    "Proc. IEEE Conf. on Acoustics, Speech and Signal
                 Processing",
  title =        "Unsupervised speaker adaptation by probabilistic
                 spectrum fitting",
  organization = "British Telecom and RSRE",
  year =         "1989",
}

@InProceedings{Cox+Bridle90,
  author =       "S. Cox and J. S. Bridle",
  booktitle =    "Proc. IEEE Conf. on Acoustics, Speech and Signal
                 Processing",
  title =        "Simultaneous Speaker Normalisation and Utterance
                 labelling Using {Bayesian}/Neural Net Techniques",
  organization = "British Telecom and RSRE",
  year =         "1990",
}

@Book{CoxCox94,
  author =       "Trevor F. Cox and Micheal {A. A}. Cox",
  title =        "Multidimensional Scaling",
  publisher =    "Chapman \& Hall",
  address =      "London",
  year =         "1994",
}

@Book{Cox+Cox-2000,
  author =       "T. Cox and M. Cox",
  title =        "Multidimensional Scaling",
  publisher =    "Chapman \& Hall",
  edition =      2,
  address =      "London",
  year =         "2000",
}

@InProceedings{Cozman2003,
  author =       "F. Cozman and I. Cohen and M. Cirelo",
  booktitle =    ICML03,
  editor =       ICML03ed,
  publisher =    ICML03publ,
  title =        "Semi-Supervised Learning of Mixture Models",
  year =         "2003",
}

@Article{Cragg54,
  author =       "B. G. Cragg and H. N. V. Temperley",
  title =        "The Organization of Neurones: {A} Cooperative
                 Analogy",
  journal =      EEGCN,
  volume =       "6",
  pages =        "85--92",
  year =         "1954",
}

@Article{Cragg55,
  author =       "B. G. Cragg and H. N. V. Temperley",
  title =        "Memory: The Analogy with Ferromagnetic Hysteresis",
  journal =      brain,
  volume =       "78 II",
  pages =        "304--316",
  year =         "1955",
}

@Article{Craven+Wahba79,
  author =       "P. Craven and G. Wahba",
  title =        "Smoothing noisy data with spline functions",
  journal =      "Numerical Mathematics",
  volume =       "31",
  pages =        "377--403",
  year =         "1979",
}

@Article{Crick89,
  author =       "F. Crick",
  title =        "The Recent Excitement About Neural Networks",
  journal =      nature,
  volume =       "337",
  pages =        "129--132",
  year =         "1989",
}

@Article{Crisanti86,
  author =       "A. Crisanti and D. J. Amit and H. Gutfreund",
  title =        "Saturation Level of the Hopfield Model for Neural
                 Network",
  journal =      eul,
  volume =       "2",
  pages =        "337--341",
  year =         "1986",
}

@Article{Crisanti87,
  author =       "A. Crisanti and H. Sompolinsky",
  title =        "Dynamics of Spin Systems with Randomly Asymmetric
                 Bonds: Langevin Dynamics and a Spherical Model",
  journal =      prA,
  volume =       "36",
  pages =        "4922--4939",
  year =         "1987",
}

@Book{Cristianini+Shawe-Taylor-2000,
  author =       "Nello Cristianini and John Shawe-Taylor",
  title =        "An Introduction to Support Vector Machines and other
                 kernel-based learning methods",
  publisher =    "Cambridge University Press",
  address =      "Cambridge, UK",
  year =         "2000",
}

@InProceedings{Cristianini-2002,
  author =       "N. Cristianini and J. Shawe-Taylor and J. Kandola",
  editor =       NIPS14ed,
  booktitle =    NIPS14,
  title =        "Spectral Kernel Methods for Clustering",
  publisher =    "{MIT} Press",
  address =      "Cambridge, MA",
  year =         "2002",
}

@InProceedings{Cristianini02,
  author =       "N. Cristianini and J. Shawe-Taylor and A. Elisseeff
                 and J. Kandola",
  title =        "On Kernel-Target Alignment",
  editor =       NIPS14ed,
  booktitle =    NIPS14,
  volume =       "14",
  pages =        "367--373",
  year =         "2002",
}

@InProceedings{Cristianini2002,
  author =       "N. Cristianini and J. Shawe-Taylor and J. Kandola",
  editor =       NIPS14ed,
  booktitle =    NIPS14,
  title =        "Spectral Kernel Methods for Clustering",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  year =         "2002",
  original =     "orig/AA16.ps",
}

@Article{cucker+grigoriev99,
  author =       "Felipe Cucker and Dima Grigoriev",
  title =        "Complexity Lower Bounds for Approximation Algebraic
                 Computation Trees",
  journal =      "Journal of Complexity",
  volume =       "15",
  number =       "4",
  pages =        "499--512",
  year =         "1999",
}

@TechReport{Cybenko88,
  author =       "G. Cybenko",
  title =        "Continuous Valued Neural Networks with Two Hidden
                 Layers Are Sufficient",
  institution =  "Department of Computer Science, Tufts University",
  address =      "Medford, MA",
  year =         "1988",
}

@Article{Cybenko89,
  author =       "G. Cybenko",
  title =        "Approximation by Superpositions of a Sigmoidal
                 Function",
  journal =      mcss,
  volume =       "2",
  pages =        "303--314",
  year =         "1989",
}

@InProceedings{Dahmen2000,
  author =       "J. Dahmen and D. Keysers and M. Pitz and H. Ney",
  booktitle =    "22nd Symposium of the German Association for Pattern
                 Recognition",
  title =        "Structured covariance matrices for statistical image
                 object recognition",
  address =      "Kiel, Germany",
  year =         "2000",
}

@InProceedings{Dai95,
  author =       "H. Dai and J. M. Lina and B. Goulard and J. W. Thomson
                 and C. K. Scott",
  booktitle =    "1995 Robotic and Knowledge Based Sytems Workshop",
  title =        "An Expert Diagnostic System Introducing Wavelets
                 Analysis and Neural Network",
  address =      "St. Hubert, Canada",
  pages =        "",
  year =         "1995",
}

@InProceedings{darken-moody91,
  author =       "Christian Darken and John Moody",
  editor =       NIPS3ed,
  booktitle =    NIPS3,
  title =        "Note on learning rate schedules for stochastic
                 optimization",
  publisher =    "Morgan Kaufmann, Palo Alto",
  address =      "Denver, CO",
  pages =        "832--838",
  year =         "1991",
}

@Article{DarrochJ1972,
  author =       "J. N. Darroch and D. Ratcliff",
  title =        "Generalized iterative scaling for log-linear models",
  journal =      "Annals of Mathematical Statistics",
  number =       "43",
  pages =        "1470--1480",
  year =         "1972",
}

@InProceedings{Das-nips93,
  author =       "S. Das and C. L. Giles and G. Z. Sun",
  editor =       NIPS5ed,
  booktitle =    NIPS5,
  title =        "Using Prior Knowledge in an {NNPDA} to Learn
                 Context-Free Languages",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo CA",
  year =         "1993",
}

@InProceedings{Das-nips94,
  author =       "S. Das and M. C. Mozer",
  editor =       NIPS6ed,
  booktitle =    NIPS6,
  title =        "A Unified Gradient-Descent/Clustering Architecture for
                 Finite State Machine Induction",
  publisher =    "Morgan Kaufmann",
  year =         "1994",
}

@Article{daubechies90,
  author =       "Ingrid Daubechies",
  title =        "The Wavelet Transform, Time-Frequency Localization and
                 Signal Analysis",
  journal =      "IEEE Transaction on Information Theory",
  volume =       "36",
  number =       "5",
  pages =        "961--1005",
  month =        sep,
  year =         "1990",
}

@article{daume09searn,
  author =       {Hal {Daum\'e III} and John Langford and Daniel Marcu},
  title =        {Search-based Structured Prediction},
  year =         {2009},
  booktitle =    {Machine Learning Journal},
}

@InProceedings{Davis89,
  author =       "L. Davis",
  editor =       "J. D. Schaffer",
  booktitle =    "Proceedings of the Third International Conference on
                 Genetic Algorithms",
  title =        "Mapping neural networks into classifier systems",
  publisher =    "Morgan Kaufmann, San Mateo",
  address =      "Arlington 1989",
  pages =        "375--378",
  year =         "1989",
}

@Article{davis94adaptive,
  author =       "G. Davis and S. Mallat and Z. Zhang",
  title =        "Adaptive time-frequency decompositions",
  journal =      "Optical Engineering",
  volume =       "33",
  number =       "7",
  pages =        "2183--2191",
  month =        jul,
  year =         "1994",
}

@InProceedings{Dayan93,
  author =       "P. Dayan and G. E. Hinton",
  editor =       NIPS5ed,
  booktitle =    NIPS5,
  title =        "Feudal Reinforcement Learning",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo, CA",
  year =         "1993",
}

@Article{Dayan95,
  author =       "Peter Dayan and Geoffrey E. Hinton and Radford Neal and
                 Rich Zemel",
  title =        "The {Helmholtz} machine",
  journal =      "Neural Computation",
  volume =       "7",
  pages =        "889--904",
  year =         "1995",
}

@inproceedings{debiecristianini03,
author = "{de Bie}, T. and Cristianini, N.",
title = "Convex methods for transduction",
editor = NIPS16ed,
booktitle = NIPS16,
year = 2003,
}

@article{debiecristianini06,
author = "{de Bie}, T. and Cristianini, N.",
title = "Fast {SDP} relaxations of graph cut 
clustering, transduction, and other combinatorial problems",
journal = jmlr,
volume = 7,
year = 2006,
}


@TechReport{deRidder+Duin-2002,
    author =       {Dick {de Ridder} and Robert P. W. Duin},
    title =        {Locally linear embedding for classification},
    number =       {PH-2002-01},
    institution =  {Pattern Recognition Group, Dept. of Imaging Science and Technology,
        Delft University of Technology},
    address =      {Delft, The Netherlands},
    year =         2002,
}

@inproceedings{deRidder+al-2003,
    author    = {Dick {de Ridder} and Olga Kouropteva and Oleg Okun and Matti Pietik{\"a}inen and Robert P. W. Duin},
    title     = {Supervised Locally Linear Embedding},
    booktitle = {ICANN},
    year      = {2003},
    pages     = {333-341},
    ee        = {http://springerlink.metapress.com/openurl.asp?genre=article&issn=0302-9743&volume=2714&spage=333},
    bibsource = {DBLP, http://dblp.uni-trier.de}
}

@InProceedings{debollivier-gallinari-thiria-90,
  author =       "M. deBollivier and P. Gallinari and S. Thiria",
  booktitle =    "Proc. of the International Neural Network Conference
                 90",
  title =        "Multi-module neural networks for classification",
  address =      "Paris",
  pages =        "777--780",
  year =         "1990",
}

@Article{Decoste-2002,
  author =       "Dennis Decoste and Bernhard Sch{\"o}lkopf",
  title =        "Training invariant support vector machines",
  journal =      "Machine Learning",
  volume =       "46",
  pages =        "161--190",
  year =         "2002",
}

@Article{Deerwester90,
  author =       "S. Deerwester and S. T. Dumais and G. W. Furnas and T.
                 K. Landauer and R. Harshman",
  title =        "Indexing by latent semantic analysis",
  journal =      "Journal of the American Society for Information
                 Science",
  volume =       "41",
  number =       "6",
  pages =        "391--407",
  year =         "1990",
}

@Article{Dehaene87,
  author =       "S. Dehaene and J.-P. Changeux and J.-P. Nadal",
  title =        "Neural Networks That Learn Temporal Sequences by
                 Selection",
  journal =      PNAS,
  volume =       "84",
  pages =        "2727--2731",
  year =         "1987",
}

@InProceedings{Delalleau+al-2005-short,
  author =       "Olivier Delalleau and Yoshua Bengio and Nicolas {Le Roux}",
  editor =       aistats05ed,
  booktitle =    aistats05,
  title =        "Efficient Non-Parametric Function Induction in
                 Semi-Supervised Learning",
  pages =        "96--103",
  year =         "2005",
}

@InProceedings{DeLaTorreF2006,
  author =       "Fernando De la Torre Frade and Takeo Kanade",
  booktitle =    "International Conference on Machine Learning",
  title =        "Discriminative Cluster Analysis",
  volume =       "148",
  publisher =    "ACM Press",
  address =      "New York, NY, USA",
  pages =        "241--248",
  month =        jun,
  year =         "2006",
}

@Article{Delgutte+Kiang84,
  author =       "B. Delgutte and N. Y. S. Kiang",
  title =        "Speech coding in the auditory nerve",
  journal =      jasa,
  volume =       "75",
  number =       "3",
  pages =        "866--907",
  year =         "1984",
}

@Article{Delgutte80,
  author =       "B. Delgutte",
  title =        "Representation of speech-like sounds in the discharge
                 patterns of auditory nerve fibers",
  journal =      jasa,
  volume =       "68",
  number =       "3",
  pages =        "843--857",
  year =         "1980",
}

@Misc{delve,
  author =       "C. Rasmussen and R. Neal and G. E. Hinton and D. van
                 Camp and Z. Ghahramani and R. Kustra and R.
                 Tibshirani",
  title =        "The {DELVE} Manual",
  year =         "1996",
  note =         "{DELVE} can be found at
                 http://www.cs.toronto.edu/\%7Edelve",
}

@InProceedings{DeMers+Cottrell93,
  author =       "David DeMers and Garrison W. Cottrell",
  editor =       NIPS5ed,
  booktitle =    NIPS5,
  title =        "Non-linear dimensionality reduction",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo CA",
  pages =        "580--587",
  year =         "1993",
}

@InProceedings{Demichelis89,
  author =       "P. DeMichelis and L. Fissore and P. Laface and G.
                 Micca and E. Piccolo",
  booktitle =    icassp,
  title =        "On the Use of Neural Networks for Speaker Independent
                 Isolated Word Recognition",
  address =      "Glaskow (Scotland)",
  year =         "1989",
}

@InProceedings{DeMori+Palakal85,
  author =       "R. De Mori and M. Palakal",
  booktitle =    "Proc. Ninth International Joint Conference on
                 Artificial Intelligence",
  title =        "On the use of taxonomy of time-frequency morphologies
                 for automatic speech recognition",
  address =      "Los Angeles, CA",
  pages =        "877--879",
  year =         "1985",
}

@Article{DeMori85,
  author =       "R. De Mori and P. Laface and Y. Mong",
  title =        "Parallel algorithms for syllable recognition in
                 continuous speech",
  journal =      ieeetpami,
  volume =       "7",
  pages =        "56--69",
  year =         "1985",
}

@Article{DeMori87,
  author =       "R. De Mori and L. Lam and M. Gilloux",
  title =        "Learning and plan refinement in a knowledge-based
                 system for automatic speech recognition",
  journal =      ieeetpami,
  volume =       "2",
  pages =        "289--305",
  year =         "1987",
}

@InCollection{DeMori96,
  author =       "R. {De Mori} and F. Brugnara",
  editor =       "R. A. Cole and J. Mariani and H. Uszkoriet and A.
                 Zaenen and V. Zue",
  booktitle =    "Survey of the State of the Art in Human Language
                 Technology",
  title =        "{HMM} Methods in Speech Recognition",
  publisher =    "Cambridge University Press",
  address =      "http://www.cse.ogi.edu/CSLU/HLTsurvey/HLTsurvey.html",
  pages =        "24--34",
  year =         "1996",
}

@Article{Dempster77,
  author =       "A. P. Dempster and N. M. Laird and D. B. Rubin",
  title =        "Maximum-likelihood from incomplete data via the {EM}
                 algorithm",
  journal =      "Journal of Royal Statistical Society B",
  volume =       "39",
  pages =        "1--38",
  year =         "1977",
}

@InProceedings{denker-lecun-93,
  author =       "Yann {LeCun} and John S. Denker",
  booktitle =    "IEEE Workshop on the Physics of Computation",
  title =        "Natural versus Universal Probability Complexity, and
                 Entropy",
  publisher =    "IEEE",
  pages =        "122--127",
  year =         "1992",
}

@InProceedings{Denker86,
  author =       "J. Denker",
  editor =       "J. S. Denker",
  booktitle =    snowbird,
  title =        "Neural Network Refinements and Extensions",
  publisher =    "American Institute of Physics, New York",
  address =      "Snowbird 1986",
  pages =        "121--128",
  year =         "1986",
}

@Article{Denker87,
  author =       "J. Denker and D. Schwartz and B. Wittner and S. Solla
                 and R. Howard and L. Jackel and J. Hopfield",
  title =        "Large Automatic Learning, Rule Extraction, and
                 Generalization",
  journal =      cs,
  volume =       "1",
  pages =        "877--922",
  year =         "1987",
}

@InProceedings{Denker91,
  author =       "J. S. Denker and Y. {LeCun}",
  editor =       NIPS3ed,
  booktitle =    NIPS3,
  title =        "Transforming neural-net output levels to probability
                 distributions",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo CA",
  pages =        "853--859",
  year =         "1991",
}

@InProceedings{Denker94,
  author =       "J. Denker and C. J. C. Burges",
  booktitle =    "The Mathematics of Generalization: Proceedings of the
                 SFI/CNLS Workshop on Formal Approaches to Supervised
                 Learning",
  title =        "Image Segmentation and Recognition",
  publisher =    "Addison Wesley, ISBN 0-201-40985-2",
  year =         "1994",
}

@Article{Deprit89,
  author =       "E. Deprit",
  title =        "Implementing Recurrent Back-Propagation on the
                 Connection Machine",
  journal =      "Neural Networks",
  volume =       "2",
  number =       "4",
  pages =        "295--314",
  year =         "1989",
}

@ARTICLE{Derenyi94,
   author = {{Der{\'e}nyi}, I. and {Geszti}, T. and {Gy{\"o}rgyi}, G.},
    title = "{Generalization in the programed teaching of a perceptron}",
  journal = {Physical Review {E}},
     year = 1994,
    month = "October",
   volume = 50,
    pages = {3192-3200},
      doi = {10.1103/PhysRevE.50.3192},
   adsurl = {http://adsabs.harvard.edu/abs/1994PhRvE..50.3192D},
  adsnote = {Provided by the SAO/NASA Astrophysics Data System}
}

@Article{Derrida87,
  author =       "B. Derrida and E. Gardner and A. Zippelius",
  title =        "An Exactly Soluble Asymmetric Neural Network Model",
  journal =      eul,
  volume =       "4",
  pages =        "167--173",
  year =         "1987",
}

@TechReport{Derthick84,
  author =       "M. Derthick",
  title =        "Variations on the {Boltzmann} Machine",
  number =       "CMU--CS--84--120",
  institution =  "Department of Computer Science, Carnegie Mellon
                 University",
  address =      "Pittsburgh, PA",
  year =         "1984",
}

@inproceedings{deSaV93,
	address = {San Francisco, CA},
	author = {de Sa, Virginia  R. },
        editor = NIPS5ed,
        booktitle = NIPS5,
	citeulike-article-id = {350518},
	keywords = {multiview, semisupervised},
	pages = {112--119},
	posted-at = {2008-08-12 16:46:39},
	priority = {2},
	publisher = {Morgan Kaufmann Publishers},
	title = {Learning Classification with Unlabeled Data},
	year = {1993}
}	
	%url = {http://citeseer.ist.psu.edu/desa94learning.html},

@InProceedings{DeSieno88,
  author =       "D. DeSieno",
  booktitle =    icnn,
  title =        "Adding a Conscience to Competitive Learning",
  volume =       "1",
  publisher =    "IEEE, New York",
  address =      "San Diego 1988",
  pages =        "117--124",
  year =         "1988",
}

@InProceedings{DeSilva+Tenenbaum-2003,
  author =       "V. {de Silva} and J. B. Tenenbaum",
  editor =       NIPS15ed,
  booktitle =    NIPS15,
  title =        "Global Versus Local Methods in Nonlinear
                 Dimensionality Reduction",
  publisher =    "{MIT} Press",
  address =      "Cambridge, MA",
  pages =        "705--712",
  year =         "2003",
}

@Book{Devaney89,
  author =       "R. L. Devaney",
  title =        "An Introduction to Chaotic Dynamical Systems",
  publisher =    "Addison-Wesley",
  year =         "1989",
}

@Article{Devereux84,
  author =       "J. Devereux and P. Haeberli and O. Smithies",
  title =        "A comprehensive set of sequence analysis programs for
                 the {VAX}",
  journal =      "Nucleic Acids Research",
  volume =       "12",
  pages =        "387--395",
  year =         "1984",
}

@Book{Devijver82,
  author =       "P. A. Devijver and J. Kittler",
  title =        "Pattern Recognition: {A} Statistical Approach",
  publisher =    "Prentice-Hall",
  address =      "London",
  year =         "1982",
}

@Article{Devijver87,
  author =       "J. Voisin and P. A. Devijver",
  title =        "An application of the multiedit-condensing technique
                 to the reference selection problem in a print
                 recognition system",
  journal =      "Pattern Recognition",
  volume =       "20",
  number =       "5",
  pages =        "465--474",
  year =         "1987",
}

@Article{deVries92,
  author =       "B. \mbox{de Vries} and J. C. Principe",
  title =        "The gamma model -- {A} new neural net model for
                 temporal processing",
  journal =      nn,
  volume =       "5",
  pages =        "565--576",
  year =         "1992",
}

@Book{Devroye-book96,
  author =       "L. Devroye and L. Gyröfi and G. Lugosi",
  title =        "A Probabilistic Theory of Pattern Recognition",
  publisher =    "Springer-Verlag",
  year =         "1996",
}

@Article{Devroye88,
  author =       "Luc Devroye",
  title =        "Automatic Pattern Recognition: {A} Study of the
                 Probability of Error",
  journal =      "IEEE Transactions on Pattern Analysis and Machine
                 Intelligence",
  volume =       "10",
  number =       "4",
  pages =        "530--543",
  month =        jul,
  year =         "1988",
}

@Book{Diamantras-96,
  author =       "K. I. Diamantras and S. Y. Kung",
  title =        "Principal Component Neural Networks: theory and applications",
  publisher =    "Wiley",
  year =         "1996",
}

@Article{Diebold+Mariano95,
  author =       "F. X. Diebold and R. S. Mariano",
  title =        "Comparing Predictive Accuracy",
  journal =      "Journal of Business and Economic Statistics",
  volume =       "13",
  number =       "3",
  pages =        "253--263",
  year =         "1995",
}

@InCollection{Diebold93,
  author =       "F. X. Diebold and J. H. Lee and G. C. Weinbach",
  editor =       "C. Hargreaves",
  booktitle =    "Nonstationary Time Series Analysis and Cointegration",
  title =        "Regime switching with time-varying transition
                 probabilities",
  publisher =    "Oxford University Press",
  address =      "Oxford",
  year =         "1993",
}

@InCollection{Diebold93b,
  author =       "F. X. Diebold and G. Rudebusch and E. Sichel",
  editor =       "J. H. Stock and M. W. Watson",
  booktitle =    "Business Cycles, Indicators, and Forecasting",
  title =        "Further evidence on business-cycle duration
                 dependence",
  publisher =    "University of Chicago Press",
  address =      "Chicago",
  year =         "1993",
}

@Article{DieboldKilian,
  author =       "F. X. Diebold and L. Kilian",
  title =        "Measuring Predictability:Theory and Macroeconomics
                 Applications",
  journal =      "NBER technical working paper",
  volume =       "213",
  year =         "1997",
}

@InCollection{DieboldLopez,
  author =       "F. X. Diebold and J. A. Lopez",
  editor =       "G. S. Maddala and C. R. Rao",
  booktitle =    "Handbook of Statistics, Vol. 14",
  title =        "Forecast Evaluation and Combination",
  publisher =    "Elsevier Science",
  pages =        "241--268",
  year =         "1996",
}

@Article{Diederich87,
  author =       "S. Diederich and M. Opper",
  title =        "Learning of Correlated Patterns in Spin-Glass Networks
                 by Local Learning Rules",
  journal =      prl,
  volume =       "58",
  pages =        "949--952",
  year =         "1987",
}

@InProceedings{Diegert90,
  author =       "C. Diegert",
  booktitle =    "Proceedings of IEEE-IJCNN90",
  title =        "Out-of-core Backpropagation",
  volume =       "II",
  address =      "San Diego, CA",
  pages =        "97--103",
  year =         "1990",
}

@Article{dietterich,
  author =       "T. G. Dietterich",
  title =        "Approximate Statistical Tests for Comparing Supervised
                 Classification Learning Algorithms",
  journal =      "Neural Computation",
  volume =       "10",
  number =       "7",
  pages =        "1895--1924",
  year =         "1998",
}

@Article{Dietterich1998,
  author =       "Thomas G. Dietterich",
  title =        "Approximate Statistical Test For Comparing Supervised
                 Classification Learning Algorithms",
  journal =      "Neural Computation",
  volume =       "10",
  number =       "7",
  pages =        "1895--1923",
  year =         "1998",
  URL =          "citeseer.ist.psu.edu/dietterich98approximate.html",
}

@Article{dietterich97,
  author =       "Thomas G. Dietterich and Richard H. Lathrop and Tomas
                 Lozano-Perez",
  title =        "Solving the Multiple Instance Problem with
                 Axis-Parallel Rectangles",
  journal =      "Artificial Intelligence",
  volume =       "89",
  number =       "1-2",
  pages =        "31--71",
  year =         "1997",
}


@Article{Diggle+Gratton-1984,
  author =       "P. Diggle and R. Gratton",
  title =        "Monte Carlo Methods of Inference for Implicit Statistical Models",
  journal =      "Journal of the Royal Statistical Society. Series B (Methodological)",
  volume =       "46",
  number =       "2",
  pages =        "193--227",
  year =         "1984",
  publisher =    "Blackwell Publishing for the Royal Statistical Society",
}


@InCollection{Doi-2006,
  author =       "Eizaburo Doi and Doru C. Balcan and Michael S.
                 Lewicki",
  editor =       NIPS18ed,
  booktitle =    NIPS18,
  title =        "A Theoretical Analysis of Robust Coding over Noisy
                 Overcomplete Channels",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  pages =        "307--314",
  year =         "2006",
}

@InProceedings{DoiE2007,
  author =       "Eizaburo Doi and Michael S. Lewicki",
  editor =       NIPS19ed,
  booktitle =    NIPS19,
  title =        "A Theory of Retinal Population Coding.",
  publisher =    "MIT Press",
  pages =        "353--360",
  year =         "2007",
}

@book{Doidge-2007,
    author = {Doidge, Norman},
    howpublished = {Paperback},
    isbn = {0143113100},
    month = {December},
    publisher = {Penguin Group},
    title = {The Brain That Changes Itself: Stories of Personal Triumph from the Frontiers of Brain Science},
    year = {2007}
}

@InCollection{DollarP2007,
  author =       "Piotr Doll\'ar and Serge Belongie and Vincent Rabaud",
  editor =       NIPS19ed,
  booktitle =    NIPS19,
  title =        "Learning to Traverse Image Manifolds",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  pages =        "361--368",
  year =         "2007",
}

@inproceedings{ DollarP2007b,
       author = "P. Doll\'ar and V. Rabaud and S. Belongie",
       title = "Non-Isometric Manifold Learning: Analysis and an Algorithm",
       booktitle =    ICML07,
       editor =       ICML07ed,
       publisher =    ICML07publ,
       month = "June",
       year = "2007"
}

@TechReport{Donoho+Carrie-03,
  author =       "D. L. Donoho and C. Grimes",
  title =        "Hessian Eigenmaps: new locally linear embedding
                 techniques for high-dimensional data",
  number =       "2003-08",
  institution =  "Dept. Statistics, Stanford University",
  year =         "2003",
}

@article{Donoho-2006,
 author = {David Donoho},
 title = {Compressed sensing},
 journal = {{IEEE} Transactions on Information Theory},
 volume = 52,
 number = 4,
 pages = {1289--1306},
 year = 2006,
}

@Book{Dorigo98,
  author =       "M. Dorigo and M. Colombetti",
  title =        "Robot shaping: {An} experiment in behavior
                 engineering",
  publisher =    "MIT Press/Bradford Books",
  year =         "1998",
}

@book{Doucet+al-2001,
  editor =       "A. Doucet and  N. {de Freitas} and N. Gordon",
  title =        "Sequential Monte Carlo Methods in Practice",
  publisher =    "Springer-Verlag",
  year =         "2001",
}

@TechReport{Doya93bif,
  author =       "K. Doya",
  title =        "Bifurcations of Recurrent Neural Networks in Gradient
                 Learning",
  institution =  "Department of Biology, University of California",
  address =      "La Jolla, CA",
  year =         "1993",
  note =         "Submitted",
}

@TechReport{Doya93un,
  author =       "K. Doya",
  title =        "Universality of Fully-Connected Recurrent Neural
                 Networks",
  institution =  "Department of Biology, University of California",
  address =      "La Jolla, CA",
  year =         "1993",
  note =         "Submitted",
}

@Article{Doyle+Snell-1984,
  author =       "Peter G. Doyle and J. Laurie Snell",
  title =        "Random Walks and Electric Networks",
  journal =      "Mathematical Association of America",
  year =         "1984",
}

@Book{Draper81,
  author =       "N. R. Draper and H. Smith",
  title =        "Applied Regression Analysis",
  publisher =    "John Wiley and Sons",
  year =         "1981",
}

@InProceedings{Driancourt91,
  author =       "X. Driancourt and L. Bottou and P. Gallinari",
  booktitle =    ijcnn,
  title =        "Learning Vector Quantization, Multi-Layer Perceptron
                 and Dynamic Programming: Comparison and Cooperation",
  volume =       "2",
  pages =        "815--819",
  year =         "1991",
  OPTaddress =   "Seattle WA",
}

@InProceedings{Drucker93,
  author =       "H. Drucker and R. Schapire and R. Simard",
  editor =       NIPS5ed,
  booktitle =    NIPS5,
  title =        "Improving performance in neural networks using a
                 boosting algorithm",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo, CA",
  pages =        "42--49",
  year =         "1993",
}

@Article{Drucker93b,
  author =       "H. Drucker and R. Schapire and R. Simard",
  title =        "Boosting performance in neural networks",
  journal =      "International Journal of Pattern Recognition and
                 Artificial Intelligence",
  pages =        "61--76",
  year =         "1993",
  note =         "Special Issue on Applications of Neural Networks to
                 Pattern Recognition (I. Guyon Ed.)",
}

@article{Duane-1987,
 author = {S. Duane and A.D. Kennedy and B. Pendleton and D. Roweth},
 title = {Hybrid {M}onte {C}arlo},
 journal = {Phys. Lett. {B}},
 volume = 195,
 pages = {216--222},
 year = 1987,
}

@Book{Duda-Hart,
  author =       "R. O. Duda and P. E. Hart",
  title =        "Pattern Classification and Scene Analysis",
  publisher =    "Wiley",
  address =      "New York",
  year =         "1973",
}

@Book{Duda-Hart-2000,
  author =       "R. O. Duda and P. E. Hart and D. G. Stork",
  title =        "Pattern Classification, Second Edition",
  publisher =    "Wiley and Sons",
  address =      "New York",
  year =         "2001",
}

@Book{Duda73,
  author =       "R. O. Duda and P. E. Hart",
  title =        "Pattern Classification and Scene Analysis",
  publisher =    "Wiley",
  address =      "New York",
  year =         "1973",
}

@Article{Dugas+al-2003,
  author =       "C. Dugas and Y. Bengio and N. Chapados and P. Vincent
                 and G. Denoncourt and C. Fournier",
  title =        "Statistical Learning Algorithms Applied to Automobile
                 Insurance Ratemaking",
  journal =      "CAS Forum",
  volume =       "1",
  number =       "1",
  pages =        "179--214",
  month =        "Winter",
  year =         "2003",
}

@TechReport{Dugas00,
  author =       "C. Dugas and O. Bardou and Y. Bengio",
  title =        "Analyses Empiriques sur des Transactions d'options",
  number =       "1176",
  institution =  "D\'epartment d'informatique et de Recherche
                 Op\'erationnelle, Universit\'e de Montr\'eal",
  address =      "Montr\'eal, Qu\'ebec, Canada",
  year =         "2000",
}

@InProceedings{Dugas01,
  author =       "C. Dugas and Y. Bengio and F. B\'elisle and C.
                 Nadeau",
  editor =       NIPS13ed,
  booktitle =    NIPS13,
  title =        "Incorporating Second-Order Functional Knowledge for Better Option Pricing",
  publisher =    "{MIT} Press",
  pages =        "472--478",
  year =         "2001",
}

%%InProceedings{Bengio2000,
%%  author =       "Y. Bengio",
%%  booktitle =    icjnn
%%  title =        "Incorporating Second-Order Functional Knowledge for Better Option Pricing",
%%  volume =       "V",
%%  pages =        "79--84",
%%  year =         "2000",
%%}

@inproceedings{Bengio2000,
  title={Probabilistic neural network models for sequential data},
  author={Bengio, Y.},
  booktitle=ijcnn,
  year={2000},
  volume={5},
  pages={79-84},
  abstract={Artificial neural networks (ANN) can be incorporated into probabilistic models. In this paper we review some of the approaches which have been proposed to incorporate them into probabilistic models of sequential data, such as hidden Markov models (HMM). We also discuss new developments and new ideas in this area, in particular how ANN can be used to model high-dimensional discrete and continuous data to deal with the curse of dimensionality and how the ideas proposed in these models could be applied to statistical language modeling to represent longer-term context than allowed by trigram models, while keeping word-order information},
  keywords={computational linguistics, hidden Markov models, neural nets, probabilityANN, HMM, hidden Markov models, longer-term context, probabilistic models, probabilistic neural network models, sequential data, statistical language modeling, trigram models, word-order information},
  doi={10.1109/IJCNN.2000.861438},
}

@InProceedings{Bengio-hyper-2000,
  author =       "Yoshua Bengio",
  booktitle =    ijcnn,
  title =        "Continuous Optimization of Hyper-Parameters",
  volume =       "V",
  pages =        "305--310",
  year =         "2000",
}

@InProceedings{Ghosn2000,
  author =       "J. Ghosn and Y. Bengio",
  booktitle =    ijcnn,
  title =        "Bias Learning, Knowledge Sharing",
  volume =       "I",
  pages =        "9--14",
  year =         "2000",
}

@Article{Durbin87,
  author =       "R. Durbin and D. Willshaw",
  title =        "An Analogue Approach to the Travelling Salesman
                 Problem Using an Elastic Net Method",
  journal =      nature,
  volume =       "326",
  pages =        "689--691",
  year =         "1987",
}

@MastersThesis{Dzwonczyk91,
  author =       "M. Dzwonczyk",
  title =        "Quantitative failure models of feed-forward neural
                 networks",
  school =       "MIT",
  year =         "1991",
}

@Book{econometric-G-97,
  author =       "W. H. Greene",
  title =        "Econometric Analysis 3rd edition",
  publisher =    "Prentice Hall, Inc.",
  year =         "1997",
}

@Article{efficient-KW-82,
  author =       "W. W. Krasker and R. R. Welsch",
  title =        "Efficient Bounded-Influence Regression Estimation",
  journal =      "J. Am. Stat. Asso.",
  volume =       "77",
  pages =        "595--604",
  year =         "1982",
}

@Book{Efron+Tibs93,
  author =       "Bradley Efron and Robert J. Tibshirani",
  title =        "An introduction to the Bootstrap",
  publisher =    "Chapman and Hall",
  address =      "New York",
  year =         "1993",
}

@TechReport{eigen-TR2,
  author =       "Yoshua Bengio and Pascal Vincent and Jean-Fran{\cc}ois
                 Paiement and Olivier Delalleau and Marie Ouimet and
                 Nicolas {Le Roux}",
  title =        "Spectral Clustering and Kernel {PCA} are Learning
                 Eigenfunctions",
  number =       "1239",
  institution =  "D\'epartement d'informatique et recherche
                 op\'erationnelle, Universit\'e de Montr\'eal",
  year =         "2003",
}

@InProceedings{Eisner96,
  author =       "J. Eisner",
  booktitle =    "COLING-96",
  title =        "Three new probabilistic models for dependency parsing:
                 an exploration",
  address =      "Copenhagen, Denmark",
  pages =        "340--345",
  year =         "1996",
}

@Article{EladAharon2006,
  author =       "Michael Elad and Michal Aharon",
  title =        "Image Denoising Via Sparse and Redundant
                 Representations Over Learned Dictionaries",
  journal =      "IEEE Transactions on Image Processing",
  volume =       "15",
  number =       "12",
  pages =        "3736--3745",
  month =        dec,
  year =         "2006",
  bibsource =    "http://www.visionbib.com/bibliography/image-proc131.html#TT8737",
}

@InProceedings{ElHihi+Bengio-nips8-small,
  author =       "S. ElHihi and Y. Bengio",
  editor =       NIPS8ed,
  booktitle =    NIPS8,
  title =        "Hierarchical Recurrent Neural Networks for Long-Term
                 Dependencies",
  publisher =    "MIT Press, Cambridge, MA",
  pages =        "493--499",
  year =         "1996",
}

@InProceedings{ellis+poliner-icassp2007,
  author =       "D. Ellis and G. Poliner",
  editor =       "",
  booktitle =    "{Proceedings of the 2007 International Conference on
                 Acoustics, Speech and Signal Processing (ICASSP)}",
  title =        "Identifying Cover Songs with Chroma Features and
                 Dynamic Programming",
  publisher =    "IEEE Signal Processing Society",
  pages =        "",
  year =         "2007",
}

@Article{Elman88,
  author =       "J. L. Elman and D. Zipser",
  title =        "Learning the Hidden Structure of Speech",
  journal =      jasa,
  volume =       "83",
  pages =        "1615--1626",
  year =         "1988",
}

@Article{Elman88Jasa88,
  author =       "J. L. Elman and D. Zipser",
  title =        "Learning the Hidden Structure of Speech",
  journal =      "Journal of the Acoustical Society of America",
  volume =       "83",
  year =         "1988",
}

@Article{Elman90,
  author =       "J. L. Elman",
  title =        "Finding Structure in Time",
  journal =      "Cognitive Science",
  volume =       "14",
  pages =        "179--211",
  year =         "1990",
}

@Article{Elman93,
  author =       "Jeffrey L. Elman",
  title =        "Learning and development in neural networks: {The}
                 importance of starting small.",
  journal =      "Cognition",
  volume =       "48",
  pages =        "781--799",
  year =         "1993",
  url =          "http://www3.isrl.uiuc.edu/~junwang4/langev/localcopy/pdf/elman93cognition.pdf"
}

@TechReport{ElmanTR88,
  author =       "J. L. Elman",
  title =        "Finding Structure in Time",
  number =       "CRL TR 8801",
  institution =  "Center for Research in Language, University of
                 California at San Diego",
  year =         "1988",
}

@TechReport{EM-tech-rep,
  author =       "Y. Bengio and P. Frasconi",
  title =        "Learning Sequential Behavior: an {EM} Approach",
  institution =  "Universit\`a di Firenze",
  year =         "1994",
  note =         "(in preparation)",
}

@Article{Engel-Mannor-Meir-2003,
  author =       "Y. Engel and S. Mannor and R. Meir",
  title =        "The kernel recursive least squares algorithm",
  journal =      "IEEE Trans. Sig. Proc.",
  volume =       "52",
  number =       "8",
  pages =        "2275--2285",
  year =         "2004",
}

@Article{erhan06qsar,
  author =       "Dumitru Erhan and Pierre-Jean L'Heureux and Shi Yi Yue
                 and Yoshua Bengio",
  title =        "Collaborative Filtering on a Family of Biological
                 Targets.",
  journal =      "Journal of Chemical Information and Modeling",
  volume =       "46",
  number =       "2",
  pages =        "626--635",
  year =         "2006",
}

@techreport{Erhan-09-visualization-tr,
  author = {Dumitru Erhan and Yoshua Bengio and Aaron Courville and Pascal Vincent},
  title = "Visualizing Higher-Layer Features of a Deep Network",
  institution = "Universit\'{e} de Montr\'{e}al",
  number = "1341",
  year = 2009,
}

@inproceedings{Erhan2009-small,
 author = {Dumitru Erhan and Pierre-Antoine Manzagol and Yoshua Bengio and Samy Bengio and Pascal Vincent},
  booktitle =    "Proceedings of AISTATS'2009",
  title =        "The Difficulty of Training Deep Architectures and the
Effect of Unsupervised Pre-Training",
  year = 2009,
}

@inproceedings{Erhan2009-short,
 author = {D. Erhan and P.-A. Manzagol and Y. Bengio and S. Bengio and P. Vincent},
  booktitle =    "AI \& Stat.'2009",
  title =        "The Difficulty of Training Deep Architectures and the
Effect of Unsupervised Pre-Training",
  year = 2009,
}

@Book{EverittB1981,
  author = 	 {B. S. Everitt and D. J. Hand},
  title = 	 {Finite Mixture Distributions},
  publisher =    {Chapman and Hall},
  address =      {London},
  year = 	 {1981},
  series = 	 {Monographs on Statistics and Applied Probability},
}

@InProceedings{evgeniou04,
  author =       "Theodoros Evgeniou and Massimiliano Pontil",
  booktitle =    "KDD '04: Proceedings of the 2004 ACM SIGKDD
                 international conference on Knowledge discovery and
                 data mining",
  title =        "Regularized multi--task learning",
  publisher =    "ACM Press",
  address =      "New York, NY, USA",
  pages =        "109--117",
  year =         "2004",
  location =     "Seattle, WA, USA",
}

@Article{evgeniou05,
  author =       "Theodoros Evgeniou and Charles A. Micchelli and
                 Massimiliano Pontil",
  title =        "Learning Multiple Tasks with Kernel Methods",
  journal =      jmlr,
  volume =       "6",
  pages =        "615--637",
  month =        apr,
  year =         "2005",
}

@InProceedings{Fahlman83,
  author =       "S. E. Fahlman and G. E. Hinton and T. J. Sejnowski",
  booktitle =    "Proceedings of the National Conference on Artificial
                 Intelligence AAAI-83",
  title =        "Massively parallel architectures for {AI}: {NETL},
                 Thistle, and {Boltzmann} machines",
  year =         "1983",
}

@InProceedings{Fahlman89,
  author =       "S. E. Fahlman",
  editor =       "D. Touretzky and G. Hinton and T. Sejnowski",
  booktitle =    cmss88,
  title =        "Fast-Learning Variations on Back-Propagation: An
                 Empirical Study",
  publisher =    "Morgan Kaufmann, San Mateo",
  address =      "Pittsburg 1988",
  pages =        "38--51",
  year =         "1989",
}

@InProceedings{Fahlman90,
  author =       "Scott E. Fahlman and Christian Lebiere",
  editor =       NIPS2ed,
  booktitle =    NIPS2,
  title =        "The Cascade-Correlation Learning Architecture",
  publisher =    "Morgan Kaufmann, San Mateo",
  address =      "Denver, CO",
  pages =        "524--532",
  year =         "1990",
}

@InProceedings{Fahlman90-small,
  author =       "S. E. Fahlman and C. Lebiere",
  booktitle =    "NIPS 2",
  title =        "The Cascade-Correlation Learning Architecture",
  publisher =    "Morgan Kaufmann, San Mateo",
  address =      "Denver, CO",
  pages =        "524--532",
  year =         "1990",
}

@Article{Fama+French,
  author =       "E. F. Fama and K. R. French",
  title =        "Permanent and Temporary Components of Stock Prices",
  journal =      "Journal of Political Economy",
  volume =       "96",
  number =       "2",
  pages =        "246--273",
  year =         "1988",
}

@Book{Fant60,
  author =       "G. Fant",
  title =        "Acoustic Theory of Speech Production",
  publisher =    "Mouton and Co.",
  year =         "1960",
}

@Book{Fant73,
  author =       "G. Fant",
  title =        "Speech Sounds and Features",
  publisher =    "MIT Press, Cambridge, MA",
  year =         "1973",
}

@Article{Farhat85,
  author =       "N. H. Farhat and D. Psaltis and A. Prata and E. Paek",
  title =        "Optical Implementation of the Hopfield Model",
  journal =      applopt,
  volume =       "24",
  year =         "1985",
}

@Article{Farhat87,
  author =       "N. H. Farhat",
  title =        "Optoelectronic Analogs of Self-Programming Neural
                 Nets: Architectures and Methods for Implementing Fast
                 Stochastic Learning by Simulated Annealing",
  journal =      applopt,
  volume =       "26",
  pages =        "5093--5103",
  year =         "1987",
}

@Article{Farmer87,
  author =       "D. Farmer and J. Sidorowich",
  title =        "Predicting Chaotic Time Series",
  journal =      prl,
  volume =       "59",
  pages =        "845--848",
  year =         "1987",
}

@InCollection{Farmer88,
  author =       "D. Farmer and J. Sidorowich",
  editor =       "W. C. Lee",
  booktitle =    "Evolution, Learning, and Cognition",
  title =        "Exploiting Chaos to Predict the Future and Reduce
                 Noise",
  publisher =    "World Scientific",
  address =      "Singapore",
  pages =        "277--330",
  year =         "1988",
}

@inproceedings{Fei-Fei.2004,
        author = {Fei-Fei, Li and Fergus, Rod and Perona, Pietro},
        doi = {10.1109/CVPR.2004.109},
        journal = {Computer Vision and Pattern Recognition Workshop, 2004 Conference on},
        keywords = {categorization, computer-vision, generative-models},
        pages = {178},
        posted-at = {2007-08-10 12:20:22},
        priority = {3},
        title = {Learning Generative Visual Models from Few Training Examples: An Incremental Bayesian Approach Tested on 101 Object Categories},
        url = {http://dx.doi.org/10.1109/CVPR.2004.109},
        year = {2004}
}

@Article{Feldman82,
  author =       "J. A. Feldman and D. H. Ballard",
  title =        "Connectionist Models and Their Properties",
  journal =      cogsci,
  volume =       "6",
  year =         "1982",
}

@Article{feldman96,
  author =       "Jerome A. Feldman and George Lakoff and David Bailey
                 and Srini Narayanan and Terry Regier and Andreas
                 Stolcke",
  title =        "{L0} - The First Five Years of an Automated Language
                 Acquisition Project",
  journal =      "Artificial Intelligence Review",
  volume =       "10",
  number =       "1-2",
  pages =        "103--129",
  year =         "1996",
  URL =          "citeseer.ist.psu.edu/feldman96first.html",
}

@Book{Fellbaum1996,
  author =       "Christine Fellbaum",
  title =        "{WordNet}: An Electronic Lexical Database and Some of
                 its Application",
  publisher =    "MIT Press",
  year =         "1996",
}

@Misc{Fellbaum1998,
  author =       "Christiane Fellbaum Editor",
  title =        "{WordNet}: An Electronic Lexical Database",
  URL =          "citeseer.nj.nec.com/fellbaum98wordnet.html",
}

@Book{Feller68,
  author =       "W. Feller",
  title =        "An Introduction to Probability Theory and Its
                 Applications",
  volume =       "1",
  publisher =    "Wiley",
  address =      "New York",
  year =         "1968",
}

@InProceedings{Feng-Statlog,
  author =       "C. Feng and A. Sutherland and R. King and S. Muggleton
                 and R. Henery",
  booktitle =    "Proceedings of the Fourth International Workshop on
                 Artificial Intelligence and Statistics",
  title =        "Comparison of machine learning classifiers to
                 statistics and neural networks",
  pages =        "41--52",
  year =         "1993",
}

@article{Field-1994,
    author = {David J. Field},
    title = {What is the goal of sensory coding?},
    journal = {Neural Computation},
    volume = {6},
    number = {4},
    year = {1994},
    issn = {0899-7667},
    pages = {559--601},
    doi = {http://dx.doi.org/10.1162/neco.1994.6.4.559},
    publisher = {MIT Press},
    address = {Cambridge, MA, USA},
}

@article{Fisher-1936,
    author = {Ronald  A. Fisher},
    journal = {Annals of Eugenics},
    pages = {179--188},
    title = {The use of multiple measurements in taxonomic problems},
    volume = {7},
    year = {1936}
}

@Book{Fischer90,
  author =       "K. H. Fischer and J. A. Hertz",
  title =        "Spin Glasses",
  publisher =    "Cambridge University Press",
  address =      "Cambridge",
  year =         "1990",
}

@TechReport{Fix+Hodges-51,
  author =       "E. Fix and J. L. Hodges",
  title =        "Discriminatory analysis, non-parametric
                 discrimination, consistency properties",
  number =       "Report 21-49-004",
  institution =  "{USAF} School of Aviation Medicine, Randolph Field,
                 Texas",
  year =         "1951",
}

@Article{FixHodges51,
  author =       "Evelyn Fix and Joseph L. Hodges Jr.",
  title =        "Discriminatory Analysis: Nonparametric discrimination:
                 Consistency properties",
  journal =      "USAF School of Aviation Medecine",
  volume =       "4",
  pages =        "261--279",
  year =         "1951",
}

@Article{FixHodges52,
  author =       "Evelyn Fix and Joseph L. Hodges Jr.",
  title =        "Discriminatory Analysis: Nonparametric discrimination:
                 Small sample performance",
  journal =      "USAF School of Aviation Medecine",
  volume =       "11",
  pages =        "280--322",
  year =         "1952",
}

@MastersThesis{Flammia91,
  author =       "G. Flammia",
  title =        "Speaker Independent Consonant Recognition in
                 Continuous Speech with Distinctive Phonetic Features",
  school =       "McGill University, School of Computer Science",
  year =         "1991",
}

@Book{Flanagan72,
  author =       "J. L. Flanagan",
  title =        "Speech Analysis, Synthesis, and Perception",
  publisher =    "Springer--Verlag",
  address =      "Berlin",
  edition =      "2nd",
  year =         "1972",
}

@Book{Fletcher87,
  author =       "Roger Fletcher",
  title =        "Practical Methods of Optimization",
  publisher =    "Wiley",
  address =      "New York",
  edition =      "Second",
  year =         "1987",
}

@InCollection{FleuretF2006,
  author =       "Francois Fleuret and Gilles Blanchard",
  editor =       NIPS18ed,
  booktitle =    NIPS18,
  title =        "Pattern Recognition from One Example by Chopping",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  pages =        "371--378",
  year =         "2006",
}

@InProceedings{Foldiak89,
  author =       "P. F{\"o}ldi\'ak",
  booktitle =    ijcnn,
  title =        "Adaptive Network for Optimal Linear Feature
                 Extraction",
  volume =       "1",
  publisher =    "IEEE, New York",
  address =      "Washington 1989",
  pages =        "401--405",
  year =         "1989",
}

@Article{Foldiak91,
  author =       "P. F{\"o}ldi\'ak",
  title =        "Learning Invariance from Transformation Sequences",
  journal =      "Neural Computation",
  volume =       "3",
  number =       "2",
  pages =        "194--200",
  year =         "1991",
}

@TechReport{Fontaine,
  author =       "T. Fontaine",
  title =        "{GRAD}-{CM2}: {A} Data-parallel Connectionist Network
                 Simulator",
  number =       "MS-CIS-92-55/LINC LAB 232",
  institution =  "University of Pennsylvania",
  month =        jul,
  year =         "1992",
  OPTnote =      "",
}

@Article{Foster+George94,
  author =       "D. Foster and E. George",
  title =        "The risk inflation criterion for multiple regression",
  journal =      "Annals of Statistics",
  volume =       "22",
  pages =        "1947--1975",
  year =         "1994",
}

@PhdThesis{Foster2002,
  author =       "George Foster",
  title =        "Text Prediction for Translators",
  school =       "Dept. IRO, Université de Montréal",
  year =         "2002",
}

@incollection{Fox-2009,
 title = {Nonparametric Bayesian Learning of Switching Linear Dynamical Systems},
 author = {Emily Fox and Erik Sudderth and Michael Jordan and Alan Willsky},
 booktitle = NIPS21,
 editor = NIPS21ed,
 pages = {457--464},
 year = {2009}
}

@Article{Fralick67,
  author = 	 {Stanley C. Fralick},
  title = 	 {Learning to Recognize Patterns without a Teacher},
  journal = 	 {IEEE Transactions on Information Theory},
  year = 	 1967,
  volume =	 13,
  pages =	 {57-64}
}

@InProceedings{Franzini87,
  author =       "M. A. Franzini",
  booktitle =    "Proceedings of the Ninth Annual Conference of the IEEE
                 Engineering in Medicine and Biology Society",
  title =        "Speech Recognition with Back Propagation",
  publisher =    "IEEE, New York",
  address =      "Boston 1987",
  pages =        "1702--1703",
  year =         "1987",
}

@InProceedings{Franzini90,
  author =       "M. A. Franzini and K. F. Lee and A. Waibel",
  booktitle =    icassp,
  title =        "Connectionist {Viterbi} Training: a New Hybrid Method
                 for Continuous Speech Recognition",
  address =      "Albuquerque, NM",
  pages =        "425--428",
  year =         "1990",
}

@InProceedings{Frasconi-icnn93,
  author =       "P. Frasconi and M. Gori and A. Tesi",
  booktitle =    icnn,
  title =        "Backpropagation for Linearly Separable Patterns: a
                 Detailed Analysis",
  publisher =    "IEEE Press",
  address =      "S. Francisco CA",
  pages =        "1818--1822",
  year =         "1993",
}

@InProceedings{Frasconi-ijcnn91,
  author =       "P. Frasconi and M. Gori and M. Maggini and G. Soda",
  booktitle =    ijcnn,
  title =        "A Unified Approach for Integrating Explicit Knowledge
                 and Learning by Example in Recurrent Networks",
  pages =        "811--816",
  year =         "1991",
  OPTaddress =   "Seattle WA",
}

@Article{Frasconi-ijmpC93,
  author =       "P. Frasconi and M. Gori and G. Soda",
  title =        "Daphne: Data Parallelism Neural Network Simulator",
  journal =      "Int. Journal of Modern Physics C",
  volume =       "4",
  number =       "1",
  pages =        "17--28",
  year =         "1993",
  note =         "Special Issue: ``Science on the Connection Machine''",
}

@InProceedings{Frasconi-milano,
  author =       "P. Frasconi and M. Gori and G. Soda",
  booktitle =    "Computational Intelligence 90",
  title =        "Recurrent Networks for Continuous Speech Recognition",
  publisher =    "Elsevier",
  address =      "Milano (Italy)",
  year =         "1990",
}

@MastersThesis{Frasconi-msthesis,
  author =       "P. Frasconi",
  title =        "Progetto e realizzazione di un simulatore per reti
                 neurali ricorrenti e implementazione di prototipi per
                 il riconoscimento vocale in tempo reale",
  school =       "Universit\`a di Firenze",
  year =         "1990",
  note =         "(in Italian)",
}

@Article{Frasconi-nc92,
  author =       "P. Frasconi and M. Gori and G. Soda",
  title =        "Local Feedback Multi-Layered Networks",
  journal =      nc,
  volume =       "4",
  number =       "1",
  pages =        "120--130",
  year =         "1992",
}

@PhdThesis{Frasconi-PhD,
  author =       "Paolo Frasconi",
  title =        "Reti Ricorrenti ed Elaborazione Adattiva di Sequenze",
  school =       "Universit\`a di Firenze",
  address =      "Italy",
  year =         "1994",
  note =         "(in Italian)",
}

@InCollection{Frasconi-pinn93,
  author =       "P. Frasconi and M. Gori and A. Tesi",
  editor =       "Omid Omidvar",
  booktitle =    "Progress in Neural Networks",
  title =        "Successes and Failures of Backpropagation: a
                 Theoretical Investigation",
  publisher =    "Ablex Publishing",
  year =         "1993",
}

@InProceedings{Frasconi-spie93,
  author =       "Paolo Frasconi and Marco Gori",
  editor =       "D. Ruck",
  booktitle =    "Proc. Conf. Science of Artificial Neural Networks II",
  title =        "Multilayered networks and the {C}-{G} uncertainty
                 principle",
  volume =       "SPIE-1966",
  organization = "International Society for Optical Engineering (SPIE)",
  address =      "Orlando, FL",
  year =         "1993",
}

@TechReport{Frasconi-TR92,
  author =       "P. Frasconi and M. Gori and G. Soda",
  title =        "Injecting Nondeterministic Finite State Automata into
                 Recurrent Neural Networks",
  number =       "DSI-RT15/92",
  institution =  "Universit\`a di Firenze (Italy)",
  month =        aug,
  year =         "1992",
}

@Unpublished{Frasconi-unp94,
  author =       "P. Frasconi and Y. Bengio",
  title =        "An {EM} Approach to Grammatical Inference",
  year =         "1994",
  note =         "Submitted to the 12-th {\em International Conference
                 on Pattern Recognition}",
  OPTannote =    "",
}

@InProceedings{Frasconi-v91,
  author =       "P. Frasconi and M. Gori and M. Maggini and G. Soda",
  editor =       "E. Caianiello",
  booktitle =    "Proc. of the 4th Italian Workshop on Parallel
                 Architectures and Neural Networks",
  title =        "Learning Automata with Sigmoidal Networks",
  publisher =    "World Scientific Pub",
  address =      "Vietri (Italy)",
  pages =        "69--77",
  year =         "1991",
}

@InProceedings{Frasconi90,
  author =       "P. Frasconi and M. Gori and G. Soda",
  editor =       "E. Caianiello",
  booktitle =    "Proc. of the 3rd Italian Workshop on Parallel
                 Architectures and Neural Networks",
  title =        "Recurrent Networks with Activation Feedback",
  publisher =    "World Scientific Pub",
  address =      "Vietri (Italy)",
  pages =        "329--335",
  year =         "1990",
}

@InProceedings{Frasconi97,
  author =       "P. Frasconi and M. Gori and A. Sperduti",
  booktitle =    "Proc. Int. Joint Conf. on Artificial Intelligence",
  title =        "On the Efficient Classification of Data Structures by
                 Neural Networks",
  year =         "1997",
}

@Article{Frasconi-kde93,
  author =       "P. Frasconi and M. Gori and M. Maggini and G. Soda",
  title =        "Unified Integration of Explicit Rules and Learning by
                 Example in Recurrent Networks",
  journal =      ieeetrkde,
  year =         "1993",
  note =         "(in press)",
}

@Article{Frean90,
  author =       "M Frean",
  title =        "The Upstart Algorithm: {A} Method for Constructing and
                 Training Feedforward Neural Networks",
  journal =      nc,
  volume =       "2",
  pages =        "198--209",
  year =         "1990",
}

@TechReport{Freund+Haussler-94,
  author =       "Yoav Freund and David Haussler",
  title =        "Unsupervised learning of distributions on binary
                 vectors using two layer networks",
  number =       "UCSC-CRL-94-25",
  institution =  "University of California, Santa Cruz",
  year =         "1994",
}

@InProceedings{Freund+Haussler92,
  author =       "Yoav Freund and David Haussler",
  editor =       NIPS4ed,
  booktitle =    NIPS4,
  title =        "A fast and exact learning rule for a restricted class
                 of {Boltzmann} machines",
  publisher =    "Morgan Kaufmann, San Mateo",
  address =      "Denver, CO",
  pages =        "912--919",
  year =         "1992",
}

@Article{Freund-Schapire-98,
  author =       "Yoav Freund and Robert E. Schapire",
  title =        "Adaptive Game Playing using Multiplicative Weights",
  journal =      "Games and Economic Behavior",
  year =         "1998",
}

@InProceedings{Freund1995,
  author =       "Yoav Freund and Robert E. Schapire",
  booktitle =    "Proceedings of the Second European Conference on
                 Computational Learning Theory",
  title =        "A decision-theoretic generalization of on-line
                 learning and an application to boosting",
  publisher =    "Springer-Verlag",
  pages =        "23--37",
  year =         "1995",
  ISBN =         "3-540-59119-2",
}

@TechReport{freund94,
  author =       "Y. Freund and D. Haussler",
  title =        "Unsupervised learning of distributions of binary
                 vectors using two layer networks",
  number =       "CRL-94-25",
  institution =  "UCSC",
  year =         "1994",
}

@Unpublished{Freund97,
  author =       "Y. Freund and R. E. Schapire and P. Bartlett and W. S.
                 Lee",
  title =        "Boosting the margin: {A} new explanation for the
                 effectiveness of voting methods",
  year =         "1997",
  note =         "Presented at the Machines that Learn Conference,
                 Snowbird, Utah",
}

@InProceedings{Frey96,
  author =       "Brendan J. Frey and Geoffrey E. Hinton and Peter Dayan",
  editor =       NIPS8ed,
  booktitle =    NIPS8,
  title =        "Does the wake-sleep algorithm learn good density estimators?",
  publisher =    "MIT Press, Cambridge, MA",
  pages =        "661--670",
  year =         "1996",
}

@InProceedings{Frey-Hinton96,
  author =       "B. J. Frey and G. E. Hinton",
  booktitle =    "Proceedings of the Data Compression Conference",
  title =        "Free Energy Coding",
  publisher =    "IEEE Computer Society Press",
  address =      "Los Alamitos, CA",
  pages =        "",
  year =         "1997",
}

@Book{Frey98,
  author =       "Brendan J. Frey",
  title =        "Graphical models for machine learning and digital
                 communication",
  publisher =    "{MIT} Press",
  year =         "1998",
}

@InProceedings{frey99estimating,
  author =       "B. J. Frey and N. Jojic",
  booktitle =    cvpr99,
  title =        "Estimating Mixture Models of Images and Inferring
                 Spatial Transformations Using the {EM} Algorithm",
  pages =        "416--422",
  year =         "1999",
  URL =          "citeseer.ist.psu.edu/frey99estimating.html",
}

@InProceedings{FreyUAI00,
  author =       "Brendan Frey and Nebojsa Jojic",
  booktitle =    UAI00,
  title =        "Learning Graphical Models of Images, Videos and Their
                 Spatial Transformations",
  publisher =    "Morgan Kaufmann",
  address =      "San Francisco, CA",
  pages =        "184--1",
  year =         "2000",
}

@Article{Friedman+Fisher-99,
  author =       "J. H. Friedman and N. I. Fisher",
  title =        "Bump hunting in high-dimensional data",
  journal =      "Statistics and Computing",
  volume =       "9",
  number =       "2",
  pages =        "123--143",
}

@Article{Friedman+Hastie+Tibshirani:AdaBoost-theory,
  author =       "J. Friedman and T. Hastie and R. Tibshirani",
  title =        "Additive Logistic Regression: a Statistical View of
                 Boosting",
  journal =      "The Annals of Statistics",
  volume =       "28",
  pages =        "307--337",
  year =         "2000",
}

@Article{Friedman-2001,
  author =       "J. Friedman",
  title =        "Greedy function approximation: a gradient boosting
                 machine",
  journal =      "Annals of Statistics",
  volume =       "29",
  pages =        "1180",
  year =         "2001",
}

@Book{Friedman71,
  author =       "A. Friedman",
  title =        "Advanced Calculus",
  publisher =    "Holt, Rinehart and Winston",
  address =      "New York, NY",
  year =         "1971",
}

@article{Friedman+Tukey-1974,
    author = {J. H. Friedman and J. W. Tukey},
    title = {A Projection Pursuit Algorithm for Exploratory Data Analysis},
    journal = {IEEE Transactions on Computers},
    volume = {23},
    number = {9},
    year = {1974},
    issn = {0018-9340},
    pages = {881--890},
    doi = {http://dx.doi.org/10.1109/T-C.1974.224051},
    publisher = {IEEE Computer Society},
    address = {Washington, DC, USA},
}

@Article{Friedman87,
  author =       "J. H. Friedman",
  title =        "Exploratory projection pursuit",
  journal =      "Journal of the American Statistical Association",
  volume =       "92",
  pages =        "249--266",
  year =         "1987",
}

@Article{Friedman91,
  author =       "J. H. Friedman",
  title =        "Multivariate adaptive regression splines",
  journal =      "The Annals of Statistics",
  volume =       "19",
  pages =        "1--141",
  year =         "1991",
}

@TechReport{friedman94flexible,
  author =       "J. Friedman",
  title =        "Flexible metric nearest neighbor classification",
  number =       "113",
  institution =  "Stanford University Statistics Department",
  year =         "1994",
}

@TechReport{Friedman98,
  author =       "J. Friedman and T. Hastie and R. Tibshirani",
  title =        "Additive logistic regression: {A} statistical view of
                 boosting",
  institution =  "Stanford University",
  address =      "CA, USA",
  year =         "1998",
}

@Misc{friedman99greedy,
  author =       "J. Friedman",
  title =        "Greedy Function Approximation: a Gradient Boosting
                 Machine",
  year =         "1999",
  note =         "IMS 1999 Reitz Lecture, February 24, 1999, Dept. of
                 Statistics, Stanford University",
}

@InProceedings{Friess98,
  author =       "T. Friess and N. Cristianini and C. Campbel",
  booktitle =    "Proceedings of the Fifteenth International Conference
                 on Machine Learning",
  title =        "The Kernel-Adatron: a Fast and Simple Learning
                 Procedure for Support Vector Machines",
  pages =        "188--196",
  year =         "1998",
}

@InProceedings{Fritzke94,
  author =       "B. Fritzke",
  editor =       NIPS6ed,
  booktitle =    NIPS6,
  title =        "Supervised learning with growing cell structures",
  publisher =    "Morgan Kaufmann",
  year =         "1994",
}

@InProceedings{fs-lmcpa-98,
  author =       "Yoav Freund and Robert E. Schapire",
  booktitle =    "Proc. 11th Annu. Conf. on Comput. Learning Theory",
  title =        "Large margin classification using the perceptron
                 algorithm",
  publisher =    "ACM Press, New York, NY",
  pages =        "209--217",
  year =         "1998",
}

@Article{fs-ppr-81,
  author =       "J. H. Friedman and W. Stuetzle",
  title =        "Projection Pursuit Regression",
  journal =      "J. American Statistical Association",
  volume =       "76",
  number =       "376",
  pages =        "817--823",
  month =        dec,
  year =         "1981",
  comment =      "Good description of projection pursuit",
}

@Article{Fu86,
  author =       "Y. Fu and P. W. Anderson",
  title =        "Application of Statistical Mechanics to {NP}-Complete
                 Problems in Combinatorial Optimization",
  journal =      jpa,
  volume =       "19",
  pages =        "1605--1620",
  year =         "1986",
}

@InProceedings{Fukumizu96,
  author =       "K. Fukumizu",
  editor =       NIPS8ed,
  booktitle =    NIPS8,
  title =        "Active Learning in Multilayer Perceptrons",
  publisher =    "MIT Press, Cambridge, MA",
  year =         "1996",
}

@Article{Fukumizu+Amari-2000,
  author =      "Kenji Fukumizu and {Shun-ichi} Amari",
  title =       "Local Minima and Plateaus in Hierarchical Structures of Multilayer Perceptrons",
  journal =     "Neural Networks",
  volume =      "13",
  number =      "3",
  pages =       "317--327",
  year =        "2000",
}

@Article{Fukushima75,
  author =       "K. Fukushima",
  title =        "Cognitron: {A} Self-Organizing Multilayered Neural
                 Network",
  journal =      biocyb,
  volume =       "20",
  pages =        "121--136",
  year =         "1975",
}

@Article{Fukushima80,
  author =       "K. Fukushima",
  title =        "Neocognitron: {A} Self-Organizing Neural Network Model
                 for a Mechanism of Pattern Recognition Unaffected by
                 Shift in Position",
  journal =      biocyb,
  volume =       "36",
  pages =        "193--202",
  year =         "1980",
}

@Article{Fukushima82,
  author =       "K. Fukushima and S. Miyake",
  key =          "Fukushima",
  title =        "Neocognitron: {A} new algorithm for pattern
                 recognition tolerant of deformations and shifts in
                 position",
  journal =      "Pattern Recognition",
  volume =       "15",
  pages =        "455--469",
  year =         "1982",
}

@Article{Fukushima83,
  author =       "K. Fukushima and S. Miyake and T. Ito",
  title =        "Neocognitron: {A} Neural Network Model for a Mechanism
                 of Visual Pattern Recognition",
  journal =      ieeesmc,
  volume =       "13",
  year =         "1983",
}

@Article{Funahashi89,
  author =       "K. Funahashi",
  title =        "On the approximate realization of continuous mappings
                 by neural networks",
  journal =      "Neural Networks",
  volume =       "2",
  pages =        "183--192",
  year =         "1989",
}

@Article{Funahashi93,
  author =       "Ken-Ichi Funahashi and Yuichi Nakamura",
  title =        "Approximation of Dynamical Systems by Continuous Time
                 Recurrent Neural Networks",
  journal =      nn,
  volume =       "6",
  pages =        "801--806",
  year =         "1993",
}

@InProceedings{Fung-Crawford90,
  author =       "R. M. Fung and S. L. Crawford",
  booktitle =    "Eighth National Conference on Artificial Intelligence,
                 Boston, Massachusetts, American Association for
                 Artificial Intelligence",
  title =        "A system for induction of probabilistic models",
  pages =        "762--779",
  year =         "1990",
}

@TechReport{Galland+Hinton89,
  author =       "C. C. Galland and G. E. Hinton",
  title =        "Deterministic learning in networks with asymmetric
                 connectivity",
  number =       "CRG-TR-89-6",
  institution =  "Department of Computer Science, University of
                 Toronto",
  address =      "Toronto, Ontario",
  year =         "1989",
}

@InProceedings{Gallant86,
  author =       "S. I. Gallant",
  booktitle =    "Eighth International Conference on Pattern
                 Recognition",
  title =        "Optimal Linear Discriminants",
  publisher =    "IEEE, New York",
  address =      "Paris 1986",
  pages =        "849--852",
  year =         "1986",
}

@Misc{gallant90perceptron-based,
  author =       "S. Gallant",
  title =        "Perceptron-based learning algorithms",
  year =         "1990",
  text =         "S. Gallant, Perceptron-based learning algorithms, IEEE
                 Trans. Neural Networks 1, 179 (1990).",
}

@InProceedings{Gallinari87,
  author =       "Patrick Gallinari and Yann {LeCun} and Sylvie Thiria and
                 Francoise Fogelman-Soulie",
  booktitle =    "Proceedings of COGNITIVA 87",
  title =        "Memoires associatives distribuees",
  address =      "Paris, La Villette",
  year =         "1987",
}

@InProceedings{Gallinari88,
  author =       "P. Gallinari and S. Thiria and F. Fogelman-Souli\'e",
  booktitle =    "Proc. International Conference on Neural Networks
                 '88",
  title =        "Multilayer perceptrons and data analysis",
  publisher =    "IEEE",
  pages =        "391--399",
  year =         "1988",
}

@InCollection{Gao-Goodman-Miao-2001,
  author =       "J. Gao and J. Goodman and J. Miao",
  booktitle =    "Computational Linguistics and Chinese Language
                 Processing",
  title =        "The Use of Clustering Techniques for Asian Language
                 Modeling",
  volume =       "6",
  number =       "1",
  pages =        "27--60",
  year =         "2001",
}

@TechReport{Garcia-Perron95,
  author =       "R. Garcia and P. Perron",
  title =        "An analysis of the real interest rate under regime
                 shift",
  number =       "95s-5",
  institution =  "CIRANO",
  address =      "Montreal, Quebec, Canada",
  year =         "1995",
}

@Article{Garcia-Perron96,
  author =       "R. Garcia and P. Perron",
  title =        "An analysis of the real interest rate under regime
                 shift",
  journal =      "The Review of Economics and Statistics",
  year =         "1996",
}

@TechReport{Garcia-Schaller95,
  author =       "R. Garcia and H. Schaller",
  title =        "Are the effects of monetary policy asymmetric",
  number =       "95s-6",
  institution =  "CIRANO",
  address =      "Montreal, Quebec, Canada",
  year =         "1995",
}

@TechReport{Garcia95,
  author =       "R. Garcia",
  title =        "Asymptotic null distribution of the likelihood ratio
                 test in Markov switching models",
  number =       "95s-7",
  institution =  "CIRANO",
  address =      "Montreal, Quebec, Canada",
  year =         "1995",
}

@TechReport{Garcia98,
  author =       "R. Garcia and R. Gen\c{c}ay",
  title =        "{Pricing and Hedging Derivative Securities with Neural
                 Networks and a Homogeneity Hint}",
  number =       "98s-35",
  institution =  "CIRANO",
  address =      "Montr\'eal, Qu\'ebec, Canada",
  year =         "1998",
}

@Article{Gardner87,
  author =       "E. Gardner",
  title =        "Maximum Storage Capacity in Neural Networks",
  journal =      eul,
  volume =       "4",
  pages =        "481--485",
  year =         "1987",
}

@Article{Gardner88a,
  author =       "E. Gardner",
  title =        "The Space of Interactions in Neural Network Models",
  journal =      jpa,
  volume =       "21",
  pages =        "257--270",
  year =         "1988",
}

@Article{Gardner88b,
  author =       "E. Gardner and B. Derrida",
  title =        "Optimal Storage Properties of Neural Network Models",
  journal =      jpa,
  volume =       "21",
  pages =        "271--284",
  year =         "1988",
}

@Article{Gardner89a,
  author =       "E. Gardner and B. Derrida",
  title =        "Three Unfinished Works on the Optimal Storage Capacity
                 of Networks",
  journal =      jpa,
  volume =       "22",
  pages =        "1983--1994",
  year =         "1989",
}

@Article{Gardner89b,
  author =       "E. Gardner and H. Gutfreund and I. Yekutieli",
  title =        "The Phase Space of Interactions in Neural Networks
                 with Definite Symmetry",
  journal =      jpa,
  volume =       "22",
  pages =        "1995--2008",
  year =         "1989",
}

@Book{Garey79,
  author =       "M. R. Garey and D. S. Johnson",
  title =        "Computers and Intractability: {A} Guide to the Theory
                 of {NP}-Completeness",
  publisher =    "Freeman",
  address =      "New York",
  year =         "1979",
}

@InCollection{GarriguesP2008,
  author =       "Pierre Garrigues and Bruno Olshausen",
  editor =       NIPS20ed,
  booktitle =    NIPS20,
  title =        "Learning Horizontal Connections in a Sparse Coding
                 Model of Natural Images",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  pages =        "505--512",
  year =         "2008",
}

@InCollection{GarriguesP2008-small,
  author =       "Pierre Garrigues and Bruno Olshausen",
  booktitle =    "NIPS'20",
  title =        "Learning Horizontal Connections in a Sparse Coding
                 Model of Natural Images",
  year =         "2008",
}

@Article{Gartner03,
  author =       "T. G{\"a}rtner",
  title =        "A survey of kernels for structured data",
  journal =      "ACM SIGKDD Explorations Newsletter",
  volume =       "5",
  number =       "1",
  pages =        "49--58",
  year =         "2003",
}

@InProceedings{Gauvain:2003:icassp,
  author =       "Jean-Luc Gauvain and L. Lamel and Holger Schwenk and
                 G. Adda and L. Chen and F.\ Lef\`evre",
  booktitle =    icassp,
  title =        "Conversational Telephone Speech Recognition",
  volume =       "1",
  pages =        "212--215",
  year =         "2003",
}

@InProceedings{Gaynier93,
  author =       "R. J. Gaynier and T. Downs",
  booktitle =    "IEEE International Conference on Neural Networks",
  title =        "A Method of Training Multi-layer Networks with
                 Heaviside Characteristics Using Internal
                 Representations",
  address =      "San Francisco, CA",
  pages =        "1812--1817",
  year =         "1993",
}

@InProceedings{GehlerP2006,
  author =       "Peter V. Gehler and Alex D. Holub and Max Welling",
  booktitle =    ICML06,
  editor =       ICML06ed,
  publisher =    ICML06publ,
  title =        "The rate adapting poisson model for information
                 retrieval and object recognition",
  address =      "New York, NY, USA",
  pages =        "337--344",
  year =         "2006",
  ISBN =         "1-59593-383-2",
  doi =          "http://doi.acm.org/10.1145/1143844.1143887",
  location =     "Pittsburgh, Pennsylvania",
}

@Article{Geman84,
  author =       {Geman, Stuart and Geman, Donald},
  title =        "Stochastic Relaxation, Gibbs Distributions, and the
                 {Bayesian} Restoration of Images",
  doi =          {10.1080/02664769300000058},
  journal =      ieeetpami,
  volume =       "6",
  keywords =     {annealing, mrf, simulated},
  month =        {November},
  pages =        {721--741},
  url =          {http://dx.doi.org/10.1080/02664769300000058},
  year =         "1984",
}

@Article{Geman92,
  author =       "S. Geman and E. Bienenstock and R. Doursat",
  title =        "Neural Networks and the Bias/Variance Dilemma",
  journal =      nc,
  volume =       "4",
  number =       "1",
  pages =        "1--58",
  year =         "1992",
}

@Article{Genest-Zideck-86,
  author =       "C. Genest and J. V. Zideck",
  title =        "Combining probability distributions: {A} critique and
                 an annotated bibliography",
  journal =      "Statistical Science",
  volume =       "1",
  pages =        "114--148",
  year =         "1986",
}

@article{Geng+al-2005,
    author    = {Xin Geng and De-Chuan Zhan and Zhi-Hua Zhou},
    title     = {Supervised nonlinear dimensionality reduction for visualization and classification},
    journal   = {IEEE Transactions on Systems, Man, and Cybernetics, Part B},
    volume    = {35},
    number    = {6},
    year      = {2005},
    pages     = {1098-1107},
    ee        = {http://dx.doi.org/10.1109/TSMCB.2005.850151},
    bibsource = {DBLP, http://dblp.uni-trier.de}
}

@Article{Geszti87,
  author =       "T. Geszti and F. P\'azm\'andi",
  title =        "Learning Within Bounds and Dream Sleep",
  journal =      jpa,
  volume =       "20",
  pages =        "L1299--L1303",
  year =         "1987",
}

@Book{Geszti90,
  author =       "T. Geszti",
  title =        "Physical Models of Neural Networks",
  publisher =    "World Scientific",
  address =      "Singapore",
  year =         "1990",
}

@Article{Geweke1989,
  author =       "J. Geweke",
  title =        "Bayesian inference in econometric models using Monte
                 carlo integration",
  journal =      "Econometrica",
  volume =       "57",
  pages =        "1317--1339",
  year =         "1989",
}

@InCollection{Gha94,
  author =       "Z. Ghahramani",
  booktitle =    "Proceedings of the 1993 Connectionist Models Summer
                 School",
  title =        "Solving inverse problems using an {EM} approach to
                 density estimation",
  publisher =    "Erlbaum",
  address =      "Hillsdale, NJ",
  year =         "1994",
}

@InProceedings{ghabea00,
  author =       "Z. Ghahramani and M. J. Beal",
  editor =       NIPS12ed,
  booktitle =    NIPS12,
  title =        "Variational inference for {Bayesian} mixtures of
                 factor analysers",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  year =         "2000",
  URL =          "citeseer.nj.nec.com/article/ghahramani00variational.html",
}

@TechReport{ghahramani96em,
  author =       "Z. Ghahramani and G. E. Hinton",
  title =        "The {EM} Algorithm for Mixtures of Factor Analyzers",
  number =       "CRG-TR-96-1",
  institution =  "Dpt. of Comp. Sci., Univ. of Toronto",
  month =        jan,
  year =         "1996",
  URL =          "citeseer.nj.nec.com/ghahramani97em.html",
}

@TechReport{GhaJor93,
  author =       "Z. Ghahramani and M. I. Jordan",
  title =        "Function approximation via density estimation",
  type =         "Computational Cognitive Science",
  number =       "TR 9304",
  institution =  "MIT",
  address =      "Cambridge, MA",
  year =         "1993",
}

@InProceedings{Gherrity89,
  author =       "M. Gherrity",
  booktitle =    ijcnn,
  title =        "A Learning Algorithm for Analog, Fully Recurrent
                 Neural Networks,",
  publisher =    "IEEE Press",
  address =      "Washington D.C.",
  pages =        "643--644",
  month =        jun,
  year =         "1989",
}

@Article{Ghosh+Hwang-1989,
  author =       "J. Ghosh and K. Hwang",
  title =        "Mapping Neural Networks onto Message-Passing
                 Multicomputers",
  journal =      "Journal of Parallel and Distributed Computing",
  volume =       "6",
  number =       "2",
  publisher =    "Academic Press",
  pages =        "291--330",
  year =         "1989",
}

@Article{Ghosn2003,
  author =       "J. Ghosn and Y. Bengio",
  title =        "Bias Learning, Knowledge Sharing",
  journal =      "{IEEE} Transactions on Neural Networks",
  volume =       "14",
  pages =        "748--765",
  month =        jul,
  year =         "2003",
  issue =        "4",
}

@TechReport{Ghysel93,
  author =       "E. Ghysel",
  title =        "A time series model with periodic stochastic regime
                 switching",
  number =       "C.R.D.E. Discussion paper 1093",
  institution =  "C.R.D.E., Universite de Montreal",
  address =      "Montreal, Quebec, Canada",
  year =         "1993",
}

@book{Giarratano+Riley-2004,
    author = {Giarratano, Joseph  C.  and Riley, Gary  D. },
    howpublished = {Hardcover},
    isbn = {0534384471},
    month = {October},
    posted-at = {2008-05-19 22:17:30},
    priority = {2},
    publisher = {{Course Technology}},
    edition = {Fourth},
    title = {Expert Systems: Principles and Programming},
    url = {http://www.amazon.ca/exec/obidos/redirect?tag=citeulike09-20\&amp;path=ASIN/0534384471},
    year = {2004}
}


@Article{Giles86,
  author =       "Y. C. Lee and G. Doolen and H. H. Chen and G. Z. Sun
                 and T. Maxwell and H. Y. Lee and C. L. Giles",
  title =        "Machine Learning Using a Higher Order Correlation
                 Network",
  journal =      "Physica D",
  volume =       "2",
  number =       "1-3",
  pages =        "276",
  year =         "1986",
}

@article{giles:1987, 
    author = {C. Lee Giles and Tom Maxwell}, 
    journal = {Applied Optics}, 
    keywords = {},
    number = {23}, 
    pages = {4972}, 
    publisher = {OSA},
    title = {Learning, Invariance, and Generalization in High-Order Neural Networks}, 
    volume = {26}, 
    year = {1987},
    url = {http://ao.osa.org/abstract.cfm?URI=ao-26-23-4972},
}

@InProceedings{Giles90,
  author =       "C. L. Giles and G. Z. Sun and H. H. Chen and Y. C. Lee
                 and D. Chen",
  editor =       NIPS2ed,
  booktitle =    NIPS2,
  title =        "Higher Order Recurrent Networks \& Grammatical
                 Inference",
  publisher =    "Morgan Kaufmann Publishers",
  address =      "San Mateo, CA",
  pages =        "380--387",
  year =         "1990",
}

@InProceedings{Giles-nnsp92,
  author =       "C. L. Giles and C. W Omlin",
  editor =       "Kung and Fallside and Sorenson and Kamm",
  booktitle =    "Neural Networks for Signal Processing II, Proceedings
                 of the 1992 IEEE workshop",
  title =        "Inserting Rules into Recurrent Neural Networks",
  publisher =    "IEEE Press",
  pages =        "13--22",
  year =         "1992",
}

@Article{Giles94,
  author =       "C. L. Giles and C. W. Omlin",
  title =        "Extraction, Insertion and Refinement of Symbolic Rules
                 in Dynamically-Driven Recurrent Neural Networks",
  journal =      "Connection Science",
  pages =        "",
  year =         "1994",
}

@Article{Giles-nc92,
  author =       "C. L. Giles and C. B. Miller and D. Chen and G. Z. Sun
                 and H. H. Chen and Y. C. Lee",
  title =        "Learning and Extracting Finite State Automata with
                 Second-Order Recurrent Neural Networks",
  journal =      nc,
  volume =       "4",
  number =       "3",
  pages =        "393--405",
  year =         "1992",
}

@Book{Gill81,
  author =       "P. E. Gill and W. Murray and M. H. Wright",
  title =        "Practical Optimization",
  publisher =    "Academic Press",
  year =         "1981",
}

@InProceedings{Gillman+Sipser94,
  author =       "David Gillman and Michael Sipser",
  booktitle =    colt94,
  title =        "Inference and minimization of hidden Marko chains",
  publisher =    "ACM",
  pages =        "147--158",
  year =         "1994",
}

@Book{Gilmore-74,
  author =       "R. Gilmore",
  title =        "{Lie} groups, {Lie} algebras and some of their
                 applications",
  publisher =    "Wiley",
  address =      "New-York",
  year =         "1974",
}

@InProceedings{Gingras-Bengio-Nadeau-2000,
  author =       "F. Gingras and Y. Bengio and C. Nadeau",
  editor =       "",
  booktitle =    "Computational Finance 2000",
  title =        "On Out-of-Sample Statistics for Time-Series",
  publisher =    "",
  location =     "London, U.K.",
  pages =        "",
  year =         "2000",
}

@InProceedings{chapados+bengio-2000,
  author =       "N. Chapados and Y. Bengio",
  editor =       "",
  booktitle =    "Computational Finance 2000",
  title =        "{VaR}-based Asset Allocation using Neural Networks",
  publisher =    "",
  pages =        "",
  year =         "2000",
}

@InProceedings{Pigeon+Bengio-99,
  author =       "S. Pigeon and Y. Bengio",
  editor =       "",
  booktitle =    "Proceedings of the Data Compression Conference, DCC'1999",
  title =        "Binary Pseudowavelets and Application to Bilevel Image Processing",
  publisher =    "",
  pages =        "",
  year =         "1999",
}

@InProceedings{Girard+Paugam-Moisy-1994,
  author =       "D. Girard and H\'{e}l\`{e}ne Paugam-Moisy",
  booktitle =    "Proceedings of the {IFIP} {WG10.3} Working Conference
                 on Applications in Parallel and Distributed Computing",
  title =        "Strategies of Weight Updating for Parallel
                 Back-propagation",
  publisher =    "North-Holland Publishing Co.",
  address =      "Amsterdam, The Netherlands",
  pages =        "335--336",
  year =         "1994",
  ISBN =         "0-444-81870-7",
}

@InProceedings{Girju+al-2003,
  author =       "Roxana Girju and Adriana Badulescu and Dan Moldovan",
  booktitle =    "NAACL '03: Proceedings of the 2003 Conference of the
                 North American Chapter of the Association for
                 Computational Linguistics on Human Language
                 Technology",
  title =        "Learning semantic constraints for the automatic
                 discovery of part-whole relations",
  publisher =    "Association for Computational Linguistics",
  address =      "Morristown, NJ, USA",
  pages =        "1--8",
  year =         "2003",
  location =     "Edmonton, Canada",
}

@Article{Girolami-2001,
  author =       "M. Girolami",
  title =        "Orthogonal series density estimation and the kernel
                 eigenvalue problem",
  journal =      "Neural Computation",
  volume =       "14",
  number =       "3",
  pages =        "669--688",
  year =         "2001",
}

@Misc{girosi97an,
  author =       "F. Girosi",
  title =        "An equivalence between sparse approximation and
                 Support Vector Machines",
  year =         "1997",
  text =         "F. Girosi. An equivalence between sparse approximation
                 and Support Vector Machines. A.I. Memo 1606, MIT
                 Artificial Intelligence Laboratory, 1997. (available at
                 the URL:
                 http://www.ai.mit.edu/people/girosi/svm.html).",
}

@Article{Glauber63,
  author =       "R. J. Glauber",
  title =        "Time-Dependent Statistics of the Ising Model",
  journal =      jmp,
  volume =       "4",
  pages =        "294--307",
  year =         "1963",
}

@Book{GLM-book-89,
  author =       "P. McCullagh and J. Nelder",
  title =        "Generalized Linear Models",
  publisher =    "Chapman and Hall",
  address =      "London",
  year =         "1989",
}

@InCollection{GlobersonA2006,
  author =       "Amir Globerson and Sam Roweis",
  editor =       NIPS18ed,
  booktitle =    NIPS18,
  title =        "Metric Learning by Collapsing Classes",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  pages =        "451--458",
  year =         "2006",
}

@Book{Gluck90,
  author =       "M. A. Gluck and D. E. Rumelhart",
  title =        "Neuroscience and connectionist theory",
  publisher =    "Lawrence Erlbaum, London",
  year =         "1990",
}

@Article{Godin89,
  author =       "C. Godin and P. Lockwood",
  title =        "{DTW} Schemes for Continuous Speech Recognition: {A}
                 Unified view",
  journal =      cspla,
  volume =       "3",
  pages =        "169--198",
  year =         "1989",
}

@book{Gold+Morgan-1999,
    author = {Gold, Ben and Morgan, Nelson},
    howpublished = {Hardcover},
    isbn = {0471351547},
    month = {July},
    publisher = {Wiley},
    title = {Speech and Audio Signal Processing: Processing and Perception of Speech and Music},
    year = {1999}
}

@Book{Goldberg89,
  author =       "D. E. Goldberg",
  title =        "Genetic Algorithms in Search, Optimization, and
                 Machine Learning",
  publisher =    "Addison-Wesley",
  address =      "Reading",
  year =         "1989",
}

@Article{Goldfeld73,
  author =       "S. M. Goldfeld and R. M. Quandt",
  title =        "A Markov model for switching regressions",
  journal =      "Journal of Econometrics",
  volume =       "1",
  pages =        "3--16",
  year =         "1973",
}

@TechReport{Goldhor85,
  author =       "R. S. Goldhor",
  title =        "Representation of consonants in the peripheral
                 auditory system: {A} modeling study of the
                 correspondance between response properties and phonetic
                 features",
  number =       "505",
  institution =  "RLE.",
  publisher =    "MIT Press, Cambridge, MA",
  year =         "1985",
}

@Article{Golomb90,
  author =       "D. Golomb and N. Rubin and H. Sompolinsky",
  title =        "Willshaw Model: Associative Memory with Sparse Coding
                 and Low Firing Rates",
  journal =      prA,
  volume =       "41",
  pages =        "1843--1854",
  year =         "1990",
}

@Book{Golub+VanLoan-1996,
  author =       "Gene H. Golub and Charles F. Van Loan",
  title =        "Matrix Computations",
  howpublished = "Paperback",
  publisher =    "{The Johns Hopkins University Press}",
  month =        oct,
  year =         "1996",
  ISBN =         "0-8018-5414-8",
}

@TechReport{Goodman-LM-2001,
  author =       "Joshua Goodman",
  title =        "A Bit of Progress in Language Modeling",
  number =       "MSR-TR-2001-72",
  institution =  "Microsoft Research",
  address =      "Redmond, Washington",
  year =         "2001",
}

@InProceedings{Goodman2001,
  author =       "J. Goodman",
  booktitle =    icassp,
  title =        "Classes for Fast Maximum Entropy Training",
  address =      "Utah",
  year =         "2001",
}

@InProceedings{Gori-ijcnn89,
  author =       "M. Gori and Y. Bengio and R. \mbox{De Mori}",
  booktitle =    ijcnn,
  title =        "{BPS}: {A} Learning Algorithm for Capturing the
                 Dynamical Nature of Speech",
  publisher =    "IEEE, New York",
  address =      "Washington D.C.",
  pages =        "643--644",
  year =         "1989",
}

@InProceedings{Gori-nimes89,
  author =       "M Gori",
  booktitle =    "Proceedings of Neuro-Nimes",
  title =        "An Extension of {BPS}",
  address =      "Nimes (France)",
  pages =        "83--93",
  year =         "1989",
}

@Article{Gori-pami91,
  author =       "M. Gori and A. Tesi",
  title =        "On the problem of local minima in Backpropagation",
  journal =      ieeetpami,
  volume =       "PAMI-14",
  number =       "1",
  pages =        "76--86",
  year =         "1992",
}

@TechReport{Gori-tr94,
  author =       "M. Gori and M. Maggini and G. Soda",
  title =        "Insertion of Finite State Automata into Recurrent
                 Radial Basis Function Networks",
  number =       "DSI-17/93",
  institution =  "Universit\`a di Firenze (Italy)",
  year =         "1993",
  note =         "(submitted)",
  OPTannote =    "",
}

@InProceedings{GoriNimes,
  author =       "M. Gori",
  booktitle =    "Proceedings of Neuro-Nimes",
  title =        "An Extension of {BPS}",
  address =      "Nimes (France)",
  pages =        "83--93",
  month =        nov,
  year =         "1989",
}

@Article{Gorman88a,
  author =       "R. P. Gorman and T. J. Sejnowski",
  title =        "Analysis of Hidden Units in a Layered Network Trained
                 to Classify Sonar Targets",
  journal =      nn,
  volume =       "1",
  pages =        "75--89",
  year =         "1988",
}

@Article{Gorman88b,
  author =       "R. P. Gorman and T. J. Sejnowski",
  title =        "Learned Classification of Sonar Targets Using a
                 Massively-Parallel Network",
  journal =      ieeetassp,
  volume =       "36",
  pages =        "1135--1140",
  year =         "1988",
}

@Unpublished{Gorse94,
  author =       "D. Gorse and J. G. Taylor and T. G. Clarkson",
  title =        "A pulse-based reinforcement algorithm for learning
                 continuous functions",
  year =         "1994",
  note =         "Submitted to WCNN '94 San Diego",
}

@Article{Goudreau-trnn93,
  author =       "M. W. Goudreau and C. L. Giles and S. T. Chakradhar
                 and D. Chen",
  title =        "First-order vs. second-order single layer recurrent
                 neural networks",
  journal =      ieeetrnn,
  year =         "1993",
  note =         "(in press)",
}

@Article{Goudreau93tb,
  author =       "M. W. Goudreau and C. L. Giles and S. T. Chakradhar
                 and D. Chen",
  title =        "First-Order Vs. Second-Order Single Layer Recurrent
                 Neural Networks",
  journal =      "IEEE Transactions on Neural Networks",
  year =         "1993",
}

@inproceedings{Gould+al:NIPS09,
  author = {S. Gould and T. Gao and D. Koller},
  title = {Region-based Segmentation and Object Detection},
  booktitle =    "Advances in Neural Information Processing Systems (NIPS 2009)",
  year = 2009,
}

@Article{goutte97,
  author =       "C. Goutte",
  title =        "Note on free lunches and cross-validation",
  journal =      "Neural Computation",
  volume =       "9",
  number =       "6",
  pages =        "1053--1059",
  year =         "1997",
}

@Article{Gower-68,
  author =       "J. C. Gower",
  title =        "Adding a point to vector diagrams in multivariate
                 analysis",
  journal =      "Biometrika",
  volume =       "55",
  number =       "3",
  pages =        "582--585",
  year =         "1968",
}

@InProceedings{Graepel2000,
  author =       "Thore Graepel and Ralf Herbrich and John
                 Shawe-Taylor",
  booktitle =    "Thirteenth Annual Conference on Computational Learning
                 Theory, 2000",
  title =        "Generalization error bounds for sparse linear
                 classifiers",
  publisher =    "Morgan Kaufmann",
  year =         "2000",
  note =         "in press",
}

@InProceedings{Graepel99,
  author =       "T. Graepel and R. Herbrich and P. Bollmann-Sdorra and
                 K. Obermayer",
  editor =       NIPS12ed,
  booktitle =    NIPS12,
  title =        "Classification on Pairwise Proximity Data",
  year =         "1999",
}

@InProceedings{graf-90a,
  author =       "H. P. Graf and D. Henderson",
  booktitle =    "ISSCC Digest",
  title =        "A Reconfigurable {CMOS} Neural Network",
  organization = "ISSCC",
  year =         "1990",
}

@InProceedings{Graf86,
  author =       "H. P. Graf and L. D. Jackel and R. E. Howard and B.
                 Straughn and J. S. Denker and W. Hubbard and D. M.
                 Tennant and D. Schwartz",
  editor =       "J. S. Denker",
  booktitle =    snowbird,
  title =        "{VLSI} Implementation of a Neural Network Memory with
                 Several Hundreds of Neurons",
  publisher =    "American Institute of Physics, New York",
  address =      "Snowbird 1986",
  pages =        "182--187",
  year =         "1986",
}

@InProceedings{Graf88,
  author =       "D. H. Graf and W. R. LaLonde",
  booktitle =    icnn,
  title =        "A Neural Controller for Collision-Free Movement of
                 General Robot Manipulators",
  volume =       "1",
  publisher =    "IEEE, New York",
  address =      "San Diego 1988",
  pages =        "77--84",
  year =         "1988",
}

@InProceedings{Graf92,
  author =       "H. P. Graf and C. R. Nohl and J. Ben",
  editor =       NIPS4ed,
  booktitle =    NIPS4,
  title =        "Image segmentation with networks of variable scales",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo CA",
  pages =        "480--487",
  year =         "1992",
}

@InProceedings{Grandvalet98a,
  author =       "Y. Grandvalet",
  editor =       "L. Niklasson and M. Boden and T. Ziemske",
  booktitle =    "ICANN'98",
  title =        "Least absolute shrinkage is equivalent to quadratic
                 penalization",
  volume =       "1",
  publisher =    "Springer",
  pages =        "201--206",
  year =         "1998",
  series =       "Perspectives in Neural Computing",
}

@InProceedings{Grandvalet98a-short,
  author =       "Y. Grandvalet",
  booktitle =    "ICANN'98",
  title =        "Least absolute shrinkage is equivalent to quadratic
                 penalization",
  pages =        "201--206",
  year =         "1998",
}

@InProceedings{GrandvaletY2005,
  author =       "Yves Grandvalet and Yoshua Bengio",
  editor =       NIPS17ed,
  booktitle =    NIPS17,
  title =        "{Semi-supervised Learning by Entropy
                 Minimization}",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  month =        dec,
  year =         "2005",
}
%deprecate this version as we need to put the date of publication not the date of the conference. use GrandvaletY2005 instead.
@InProceedings{GrandvaletY2004,
  author =       "Yves Grandvalet and Yoshua Bengio",
  editor =       NIPS17ed,
  booktitle =    NIPS17,
  title =        "{Semi-supervised Learning by Entropy
                 Minimization}",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  month =        dec,
  year =         "2005",
}

@INCOLLECTION {GrandvaletY2006,
title = {Entropy Regularization},
author = {Grandvalet, Yves and Bengio, Yoshua},
editor = {Chapelle, Olivier and {Sch\"{o}lkopf}, Bernhard and Zien, Alexander},
booktitle = {Semi-Supervised Learning},
year = {2006},
pages = {151--168},
publisher = {{MIT} Press},
}

@Article{GrangerNewbold76,
  author =       "C. W. J. Granger and P. Newbold",
  title =        "Forecasting transformed series",
  journal =      "J. Roy. Statist. Soc. B",
  volume =       "38",
  pages =        "189--203",
  year =         "1976",
}

@InProceedings{Gray-Moore-2003,
  author =       "Alexander Gray and Andrew Moore",
  booktitle =    "Artificial Iintelligence and Statistics",
  title =        "Rapid Evaluation of Multiple Density Models",
  year =         "2003",
}

@Article{Gray84,
  author =       "R. M. Gray",
  title =        "Vector Quantization",
  journal =      ieeeassp,
  pages =        "4--29",
  month =        apr,
  year =         "1984",
}

@Article{Greenwood+Durand60,
  author =       "T. A. Greenwood and D. Durand",
  title =        "",
  journal =      "Technometrics",
  volume =       "2",
  pages =        "55--56",
  year =         "1960",
}

@InProceedings{GregoryD2007,
  author =       "Gregory Druck and Chris Pal and Andrew Mccallum and
                 Xiaojin Zhu",
  booktitle =    "KDD '07: Proceedings of the 13th ACM SIGKDD
                 international conference on Knowledge discovery and
                 data mining",
  title =        "Semi-supervised classification with hybrid
                 generative/discriminative methods",
  publisher =    "ACM",
  address =      "New York, NY, USA",
  pages =        "280--289",
  year =         "2007",
  OPTciteulike-article-id = "2304687",
  OPTdoi =       "10.1145/1281192.1281225",
  OPTisbn =      "9781595936097",
  OPTkeywords =  "classification",
  OPTpriority =  "2",
}
  %url =       "http://portal.acm.org/citation.cfm?id=1281192.1281225",

@Article{Gribskov87,
  author =       "M. Gribskov and M. McLachlan and D. Eisenber",
  title =        "Profile analysis: detection of distantly related
                 proteins",
  journal =      PNAS,
  volume =       "84",
  pages =        "4355--4358",
  year =         "1987",
}

@TechReport{Griffin-Holub-Perona-07,
  author =       "Gregory Griffin and Alex Holub and Pietro Perona",
  title =        "Caltech-256 Object Category Dataset",
  number =       "Technical Report 7694",
  institution =  "California Institute of Technology",
  year =         "2007",
}

@Article{grigoriev95,
  author =       "Dima Grigoriev and Marek Karpinski and Andrew Chi-Chih
                 Yao",
  title =        "An Exponential Lower Bound on the Size of Algebraic
                 Decision Trees for {MAX}",
  journal =      "Electronic Colloquium on Computational Complexity
                 (ECCC)",
  volume =       "2",
  number =       "057",
  year =         "1995",
}

@Article{Grimes-Rao-2005,
  author =       "D. B. Grimes and R. P. N. Rao",
  title =        "Bilinear Sparse Coding for Invariant Vision",
  journal =      "Neural Computation",
  volume =       "17",
  number =       "1",
  pages =        "47--73",
  year =         "2005",
}

@Article{Grossberg67,
  author =       "S. Grossberg",
  title =        "Nonlinear Difference-Differential Equations in
                 Prediction and Learning Theory",
  journal =      PNAS,
  volume =       "58",
  pages =        "1329--1334",
  year =         "1967",
}

@Article{Grossberg68a,
  author =       "S. Grossberg",
  title =        "Some Nonlinear Networks Capable of Learning a Spatial
                 Pattern of Arbitrary Complexity",
  journal =      PNAS,
  volume =       "59",
  pages =        "368--372",
  year =         "1968",
}

@Article{Grossberg68b,
  author =       "S. Grossberg",
  title =        "Some Physiological and Biochemical Consequences of
                 Psychological Postulates",
  journal =      PNAS,
  volume =       "60",
  pages =        "758--765",
  year =         "1968",
}

@Article{Grossberg69,
  author =       "S. Grossberg",
  title =        "Embedding Fields: {A} Theory of Learning with
                 Physiological Implications",
  journal =      jmpsych,
  volume =       "6",
  pages =        "209--239",
  year =         "1969",
}

@Article{Grossberg72,
  author =       "S. Grossberg",
  title =        "Neural Expectation: Cerebellar and Retinal Analogs of
                 Cells Fired by Learnable or Unlearned Pattern Classes",
  journal =      kyb,
  volume =       "10",
  pages =        "49--57",
  year =         "1972",
}

@Article{Grossberg76a,
  author =       "S. Grossberg",
  title =        "Adaptive Pattern Classification and Universal
                 Recoding: {I}. Parallel Development and Coding of
                 Neural Feature Detectors",
  journal =      biocyb,
  volume =       "23",
  year =         "1976",
}

@Article{Grossberg76b,
  author =       "S. Grossberg",
  title =        "Adaptive Pattern Classification and Universal
                 Recoding: {II}. Feedback, Expectation, Olfaction,
                 Illusions",
  journal =      biocyb,
  volume =       "23",
  pages =        "187--202",
  year =         "1976",
}

@Article{Grossberg80,
  author =       "S. Grossberg",
  title =        "How Does the Brain Build a Cognitive Code?",
  journal =      psyrev,
  volume =       "87",
  year =         "1980",
}

@Book{Grossberg87a,
  author =       "S. Grossberg",
  title =        "The Adaptive Brain",
  volume =       "1--2",
  publisher =    "Elsevier",
  address =      "Amsterdam",
  year =         "1987",
}

@Article{Grossberg87b,
  author =       "S. Grossberg",
  title =        "Competitive Learning: From Interactive Activation to
                 Adaptive Resonance",
  journal =      cogsci,
  volume =       "11",
  pages =        "23--63",
  year =         "1987",
}

@inproceedings{Grosse-2007,
 author = {Roger Grosse and Rajat Raina and Helen Kwong and Andrew Y. Ng},
 title = {Shift-Invariant Sparse Coding for Audio Classification}, 
 booktitle = UAI07,
 year = 2007,
}

@InProceedings{Grossman-nips89,
  author =       "T. Grossman R. Meir and E. Domany",
  editor =       NIPS1ed,
  booktitle =    NIPS1,
  title =        "Learning by choice of internal representation",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo, CA",
  pages =        "73--80",
  year =         "1989",
}

@Article{Grossman89,
  author =       "T. Grossman and R. Meir and E. Domany",
  title =        "Learning by Choice of Internal Representations",
  journal =      cs,
  volume =       "2",
  pages =        "555--575",
  year =         "1989",
}

@InProceedings{Grossman90,
  author =       "T. Grossman",
  editor =       NIPS2ed,
  booktitle =    NIPS2,
  title =        "The {CHIR} Algorithm for Feed Forward Networks with
                 Binary Weights",
  publisher =    "Morgan Kaufmann, San Mateo",
  address =      "Denver, CO",
  pages =        "516--523",
  year =         "1990",
}

@Article{Guillery2005,
  author =       "R. W. Guillery",
  title =        "Is postnatal neocortical maturation hierarchical?",
  journal =      "Trends in Neuroscience",
  volume =       "28",
  number =       "10",
  pages =        "512--517",
  month =        oct,
  year =         "2005",
}

@InCollection{Gull88,
  author =       "S. F. Gull",
  editor =       "G. Erickson and C. Smith",
  booktitle =    "Maximum Entropy and {Bayesian} Methods in Science and
                 Engineering",
  title =        "{Bayesian} inductive inference and maximum entropy",
  volume =       "1",
  publisher =    "Kluwer",
  address =      "Dordrecht",
  pages =        "53--74",
  year =         "1988",
}

@Article{gullapalli:nn:1990,
  author =       "V. Gullapalli",
  title =        "A Stochastic Reinforcement Learning Algorithm for
                 Learning Real-Valued Functions",
  journal =      nn,
  volume =       "3",
  pages =        "671--692",
  year =         "1990",
}

@Article{Gunn+Kandola01,
  author =       "S. R. Gunn and J. Kandola",
  title =        "Structural Modelling with Sparse Kernels",
  journal =      "Machine Learning",
  volume =       "special issue on New Methods for Model Combination and
                 Model Selection",
  year =         "2001",
  note =         "to appear",
}

@inproceedings{Guo+Schuurmans-2007,
author = "Guo, Y. and Schuurmans, D.",
title = "Convex relaxations of latent variable training",
editor =    NIPS20ed,
booktitle = NIPS20,
year = 2007,
}

@inproceedings{guoschuurmans07b,
author = "Guo, Y. and Schuurmans, D.",
title = "Discriminative batch mode active learning",
editor =    NIPS20ed,
booktitle = NIPS20,
year = 2007,
}

@inproceedings{Guo+Schuurmans-2008,
author = "Guo, Y. and Schuurmans, D.",
title = "Efficient global optimization for exponential family {PCA} and 
low-rank matrix factorization",
booktitle = "Proceedings of the Forty-sixth Annual Allerton Conference on
Communication, Control, and Computing (Allerton)",
year = 2008,
}

@Article{Gutfreund88a,
  author =       "H. Gutfreund",
  title =        "Neural Networks with Hierarchically Correlated
                 Patterns",
  journal =      prA,
  volume =       "37",
  pages =        "570--577",
  year =         "1988",
}

@Article{Gutfreund88b,
  author =       "H. Gutfreund and M. M\'ezard",
  title =        "Processing of Temporal Sequences in Neural Networks",
  journal =      prl,
  volume =       "61",
  pages =        "235--238",
  year =         "1988",
}

@InProceedings{Gutzmann87,
  author =       "K. Gutzmann",
  editor =       "M. Caudill and C. Butler",
  booktitle =    icnn,
  title =        "Combinatorial Optimization Using a Continuous State
                 {Boltzmann} Machine",
  volume =       "3",
  publisher =    "IEEE, New York",
  address =      "San Diego 1987",
  pages =        "721--734",
  year =         "1987",
}

@Article{guyon-91,
  author =       "I. Guyon and P. Albrecht and Y. {Le Cun} and J. S.
                 Denker and W. Hubbard",
  title =        "design of a neural network character recognizer for a
                 touch termin al",
  journal =      "Pattern Recognition",
  volume =       "24",
  number =       "2",
  pages =        "105--119",
  year =         "1991",
}

@InProceedings{Guyon92,
  author =       "I. Guyon and V. Vapnik and B. Boser and L. Bottou and
                 S. A. Solla",
  editor =       NIPS4ed,
  booktitle =    NIPS4,
  title =        "Structural Risk Minimization for Character
                 Recognition",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo CA",
  pages =        "471--479",
  year =         "1992",
}

@InCollection{Guyon92b,
  author =       "I. Guyon",
  editor =       "S. Impedovo",
  booktitle =    "From Pixels to Features III",
  title =        "Writer independent and writer adaptive neural network
                 for on-line character recognition",
  publisher =    "Elsevier",
  address =      "Amsterdam",
  pages =        "493--506",
  year =         "1992",
}

@InProceedings{Guyon93,
  author =       "I. Guyon and B. Boser and V. Vapnik",
  editor =       NIPS5ed,
  booktitle =    NIPS5,
  title =        "Automatic Capacity Tuning of Very Large {VC}-dimension
                 Classifiers",
  publisher =    "Morgan Kaufmann",
  address =      "Denver, CO",
  pages =        "147--155",
  year =         "1993",
}

@InProceedings{Guyon95,
  author =       "I. Guyon and F. Pereira",
  booktitle =    ICDAR95,
  title =        "Design of a linguistic postprocessor using variable
                 memory length {Markov} models",
  publisher =    "IEEE Computer Society Press",
  address =      "Montreal, Canada",
  pages =        "454--457",
  month =        aug,
  year =         "1995",
}

@InCollection{Guyon96,
  author =       "I. Guyon and M. Schenkel and J. Denker",
  editor =       "P. S. P. Wang and H. Bunke",
  booktitle =    "Handbook on Optical Character Recognition and Document
                 Image Analysis",
  title =        "Overview and synthesis of on-line cursive handwriting
                 recognition techniques",
  publisher =    "World Scientific",
  year =         "1996",
}

@article{Guyon+Elisseeff-2003,
    address = {Cambridge, MA},
    author = {Guyon, Isabelle   and Elisseeff, Andre},
    issn = {1533-7928},
    journal = jmlr,
    pages = {1157--1182},
    publisher = {MIT Press},
    title = {An introduction to variable and feature selection},
    volume = {3},
    year = {2003}
}
    %url = {http://portal.acm.org/citation.cfm?id=944968},

@book{Guyon+al-2006,
        editor = "Isabelle Guyon and Steve Gunn and Masoud Nikravesh and Lofti Zadeh",
        title =    "Feature Extraction, Foundations and Applications",
        publisher =    "Springer",
        year =         "2006",
}


@Article{Gyorgyi90a,
  author =       "G. Gy{\"o}rgyi",
  title =        "Inference of a Rule by a Neural Network with Thermal
                 Noise",
  journal =      prl,
  volume =       "64",
  pages =        "2957--2960",
  year =         "1990",
}

@InCollection{Gyorgyi90b,
  author =       "G. Gyorgyi and N. Tishby",
  editor =       "W. K. Theumann and R. Koeberle",
  booktitle =    "Neural Networks and Spin Glasses",
  title =        "Statistical Theory of Learning a Rule",
  publisher =    "World Scientific",
  address =      "Singapore",
  year =         "1990",
}

@InProceedings{ha93,
  author =       "J. Y. Ha and S. C. Oh and J. H. Kim and Y. B. Kwon",
  booktitle =    "Third International Workshop on Frontiers in
                 Handwriting Recognition",
  title =        "Unconstrained handwritten word recognition with
                 interconnected hidden {Markov} models",
  publisher =    "IAPR",
  address =      "Buffalo",
  pages =        "455--460",
  month =        may,
  year =         "1993",
}

@Article{haasdonk2002tdk,
  author =       "B. Haasdonk and D. Keysers",
  title =        "{Tangent distance kernels for support vector
                 machines}",
  journal =      "Proc. of the 16th ICPR",
  volume =       "2",
  pages =        "864--868",
  year =         "2002",
}

@inproceedings {hadsell-chopra-lecun-06,
original = "orig/hadsell-chopra-lecun-06.pdf",
author = "Hadsell, Raia and Chopra, Sumit and {LeCun}, Yann",
title = "Dimensionality Reduction by Learning an Invariant Mapping",
booktitle = cvpr06,
publisher = "IEEE Press",
pages = "1735--1742",
year = 2006
}

@inproceedings {hadsell-chopra-lecun-06-small,
original = "orig/hadsell-chopra-lecun-06.pdf",
author = "Hadsell, Raia and Chopra, Sumit and {LeCun}, Yann",
title = "Dimensionality Reduction by Learning an Invariant Mapping",
booktitle = "CVPR'2006",
publisher = "IEEE Press",
year = 2006
}

@inproceedings{hadsell-iros-08,
 original = "orig/hadsell-iros-08.pdf",
 author = "Hadsell, Raia and Erkan, Ayse and Sermanet, Pierre and Scoffier, Marco and Muller, Urs and {LeCun}, Yann",
 title = "Deep Belief Net Learning in a Long-Range Vision System for Autonomous Off-Road Driving",
 booktitle = "Proc. Intelligent Robots and Systems (IROS'08)",
 pages = "628--633",
 year = "2008",
}
 %url = "http://www.cs.nyu.edu/~raia/docs/iros08-farod.pdf",

@TechReport{Haffner+96,
  author =       "P. Haffner and L. Bottou and J. Bromley and C. J. C.
                 Burges and T. Cauble and Y. {Le Cun} and C. Nohl and C.
                 Stanton and C. Stenard and P. Vincent",
  title =        "the {HCAR50} check amount reading system",
  number =       "Forthcoming publication",
  institution =  "Lucent Technologies, Bell Labs Innovation",
  address =      "Holmdel, New-Jersey",
  year =         "1996",
}

@InProceedings{Haffner89,
  author =       "P. Haffner and A. Waibel and K. Shikano",
  booktitle =    "Proceedings of Eurospeech'89",
  title =        "Fast back-propagation learning methods for large
                 phonemic neural networks",
  year =         "1989",
}

@InProceedings{Haffner91,
  author =       "P. Haffner and M. Franzini and A. Waibel",
  booktitle =    icassp,
  title =        "Integrating Time Alignment and Neural Networks for
                 High Performance Continuous Speech Recognition",
  address =      "Toronto",
  pages =        "105--108",
  year =         "1991",
}

@Book{HAJ90,
  author =       "X. D. Huang and Y. Ariki and M. Jack",
  title =        "Hidden Markov Models for Speech Recognition",
  publisher =    "University Press",
  address =      "Edinburgh",
  year =         "1990",
}

@inproceedings{HagiwaraK2000,
 title = {Regularization Learning and Early Stopping in Linear Networks},
 author = {Hagiwara, Katsuyuki and Kuno, Kazuhiro},
 booktitle = ijcnn,
 year = {2000},
 isbn = {0-7695-0619-4},
 pages = {4511},
 publisher = {IEEE Computer Society},
 address = {Washington, DC, USA},
 }

@TechReport{Ham2003,
  author =       "J. Ham and D. D. Lee and S. Mika and B.
                 Sch{\"o}lkopf",
  title =        "A kernel view of the dimensionality reduction of
                 manifolds",
  number =       "TR-110",
  institution =  "Max Planck Institute for Biological Cybernetics",
  address =      "Germany",
  year =         "2003",
}

@Article{Hamilton88,
  author =       "J. D. Hamilton",
  title =        "Rational-Expectations Econometric Analysis of Changes
                 in Regime",
  journal =      "Journal of Economic Dynamics and Control",
  volume =       "12",
  pages =        "385--423",
  year =         "1988",
}

@Article{hamilton89,
  author =       "J. D. Hamilton",
  title =        "A new approach to the economic analysis of
                 non-stationary time series and the business cycle",
  journal =      "Econometrica",
  volume =       "57",
  number =       "2",
  pages =        "357--384",
  month =        mar,
  year =         "1989",
}

@Article{Hamilton90,
  author =       "J. D. Hamilton",
  title =        "Analysis of time series subject to changes in regime",
  journal =      "Journal of Econometrics",
  volume =       "45",
  pages =        "39--70",
  year =         "1990",
}

@InCollection{Hamilton93,
  author =       "J. D. Hamilton",
  editor =       "R. Engle and D. {McFadden}",
  booktitle =    "Handbook of Econometrics",
  title =        "State-Space Models",
  publisher =    "North Holland, New York",
  year =         "1993",
}

@Article{Hamilton94,
  author =       "J. D. Hamilton and R. Susmel",
  title =        "Autoregressive conditional heteroskedasticity and
                 changes in regime",
  journal =      "Journal of Econometrics",
  volume =       "64",
  number =       "1-2",
  pages =        "307--33",
  year =         "1994",
}

@Article{Hamilton96,
  author =       "J. D. Hamilton",
  title =        "Specification testing in Markov-switching time-series
                 models",
  journal =      "Journal of Econometrics",
  volume =       "70",
  pages =        "127--157",
  year =         "1996",
}

@misc{Hammersley+Clifford-1971,
 author = {John M. Hammersley and Peter Clifford}, 
 year = 1971, 
 title = {Markov field on finite graphs and lattices},
 howpublished = {Unpublished manuscript}
}

@InProceedings{HammondSimoncelli07,
  author =       "David K. Hammond and Eero P. Simoncelli",
  booktitle =    ICIP07,
  title =        "A Machine Learning Framework for Adaptive Combination
                 of Signal Denoising Methods",
  volume =       "6",
  pages =        "29--32",
  year =         "2007",
}

@Article{hampshire90,
  author =       "John B. Hampshire and Alexander H. Waibel",
  title =        "A Novel Objective Function for Improved Phoneme
                 Recognition Using Time-Delay Neural Networks",
  journal =      "IEEE Transactions of Neural Networks",
  volume =       "1",
  number =       "2",
  pages =        "216--228",
  month =        jun,
  year =         "1990",
}

@InProceedings{HAMPSHIRE92A,
  author =       "J. B. Hampshire and B. V. K. Vijaya Kumar",
  editor =       NIPS4ed,
  booktitle =    NIPS4,
  title =        "Shooting Craps in Search of an Optimal Strategy for
                 Training Connectionist Pattern Classifiers",
  publisher =    "Morgan Kaufmann",
  address =      "Denver, CO",
  pages =        "1125--1132",
  year =         "1992",
}

@InProceedings{Han96,
  author =       "H-H. Han and H-C. Jung and Y-R. Lee and S-C. Jeong",
  booktitle =    nipc-hmit96,
  title =        "Application of Neural Network for {PWR} Steam
                 Generator Water Level Control at Low Power Operation",
  volume =       "1",
  publisher =    ans,
  pages =        "49--52",
  year =         "1996",
}

@InProceedings{Hanson89,
  author =       "S. J. Hanson and L. Pratt",
  editor =       NIPS1ed,
  booktitle =    NIPS1,
  title =        "A Comparison of Different Biases for Minimal Network
                 Construction with Back-Propagation",
  publisher =    "Morgan Kaufmann, San Mateo",
  address =      "Denver, CO",
  pages =        "177--185",
  year =         "1989",
}

@Book{Hardle2004,
  author =       "Wolfgang H{\"a}rdle and Marlene M{\"u}ller and Stefan Sperlich and Axel
                 Werwatz",
  title =        "Nonparametric and Semiparametric Models",
  publisher =    "Springer",
  address =      "http://www.xplore-stat.de/ebooks/ebooks.html",
  year =         "2004",
}

@article{Hardoon+al-2004,
    address = {Cambridge, MA, USA},
    author = {Hardoon, David  R.  and Szedmak, Sandor  R.  and Shawe-Taylor, John  R. },
    doi = {10.1162/0899766042321814},
    issn = {0899-7667},
    journal = {Neural Computation},
    month = {December},
    number = {12},
    pages = {2639--2664},
    publisher = {MIT Press},
    title = {Canonical Correlation Analysis: An Overview with Application to Learning Methods},
    url = {http://portal.acm.org/citation.cfm?id=1119696.1119703},
    volume = {16},
    year = {2004}
}

@InProceedings{HardoonD2007,
  author =       "David R. Hardoon and John Shawe-Taylor and Antti
                 Ajanki and Kai Puolamäki and Samuel Kaski",
  booktitle =    "Proceedings of AISTATS 2007",
  title =        "Information Retrieval by Inferring Implicit Queries
                 from Eye Movements",
  year =         "2007",
}

@InProceedings{Harmeling02,
  author =       "S. Harmeling and A. Ziehe and M. Kawanabe and K.-R.
                 M{\"u}ller",
  editor =       NIPS14ed,
  booktitle =    NIPS14,
  title =        "Kernel Feature Spaces and Nonlinear Blind Souce
                 Separation",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  year =         "2002",
  original =     "orig/AA34.ps",
}

@InProceedings{Harp90,
  author =       "S. A. Harp and T. Samad and A. Guha",
  editor =       NIPS2ed,
  booktitle =    NIPS2,
  title =        "Designing Application-Specific Neural Networks Using
                 the Genetic Algorithm",
  publisher =    "Morgan Kaufmann, San Mateo",
  address =      "Denver, CO",
  pages =        "447--454",
  year =         "1990",
}

@Article{Hartman90,
  author =       "E. J. Hartman and J. D. Keeler and J. M. Kowalski",
  title =        "Layered Neural Networks with {G}aussian Hidden Units As
                 Universal Approximations",
  journal =      nc,
  volume =       "2",
  pages =        "210--215",
  year =         "1990",
}

@Article{Haruno01,
  author =       "M. Haruno and DM. Wolpert and M. Kawato",
  title =        "{MOSAIC} model for sensorimotor learning and control",
  journal =      "Neural Computation",
  volume =       "13",
  number =       "10",
  pages =        "2201--2220",
  year =         "2001",
}

@InProceedings{Hassibi-nips93,
  author =       "B. Hassibi and D. G. Stork",
  editor =       NIPS5ed,
  booktitle =    NIPS5,
  title =        "Second Order Derivatives for Network Pruning: Optimal
                 Brain Surgeon",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo, CA",
  pages =        "164--171",
  year =         "1993",
}

@InProceedings{Hastad86,
  author =       "Johan H{\aa}stad",
  booktitle =    "Proceedings of the 18th annual ACM Symposium on Theory
                 of Computing",
  title =        "Almost optimal lower bounds for small depth circuits",
  publisher =    "ACM Press",
  address =      "Berkeley, California",
  pages =        "6--20",
  year =         "1986",
}

@Book{Hastad87,
  author =       "Johan T. H{\aa}stad",
  title =        "Computational Limitations for Small Depth Circuits",
  publisher =    "{MIT} Press",
  year =         "1987",
}

@Article{Hastad91,
  author =       "Johan H{\aa}stad and Mikael Goldmann",
  title =        "On the power of small-depth threshold circuits",
  journal =      "Computational Complexity",
  volume =       "1",
  pages =        "113--129",
  year =         "1991",
}

@Article{Hastie-Stuetzle-1989,
  author =       "T. Hastie and W. Stuetzle",
  title =        "Principal Curves",
  journal =      "Journal of the American Statistical Association",
  volume =       "84",
  pages =        "502--516",
  year =         "1989",
}

@Book{Hastie2001,
  author =       "T. Hastie and R. Tibshirani and J. Friedman",
  title =        "The elements of statistical learning: data mining,
                 inference and prediction",
  publisher =    "Springer Verlag",
  year =         "2001",
  series =       "Springer Series in Statistics",
  annote =       "ISBN: 0387952845",
}

@Article{Hastie2004,
  author =       "Trevor Hastie and Saharon Rosset and Robert Tibshirani
                 and Ji Zhu",
  title =        "The entire regularization path for the support vector
                 machine",
  journal =      jmlr,
  volume =       "5",
  pages =        "1391--1415",
  year =         "2004",
}

@InProceedings{hastie96discriminant,
  author =       "T. Hastie and R. Tibshirani",
  editor =       NIPS8ed,
  booktitle =    NIPS8,
  title =        "Discriminant Adaptive Nearest Neighbor Classification
                 and Regression",
  volume =       "8",
  publisher =    "{MIT} Press",
  pages =        "409--415",
  year =         "1996",
  URL =          "citeseer.nj.nec.com/hastie94discriminant.html",
}

@Article{Hathaway85,
  author =       "R. J. Hathaway",
  title =        "A constrained formulation of Maximum-Likelihood
                 estimation for normal mixture distributions",
  journal =      "The Annals of Statistics",
  volume =       "13",
  number =       "2",
  year =         "1985",
}

@article{hausser:2003,
    author = {Michael Ha{\"u}sser and Bartlett Mel},
    title = {Dendrites: Bug or Feature?},
    journal = {Current Opinion in Neurobiology},
    volume = {13},
    year = {2003},
    pages = {372-383},
}

@InProceedings{Haussler89,
  author =       "D. Haussler",
  booktitle =    "Proc. of the 30th Annual Symposium on the Foundations
                 of Computer Science",
  title =        "Generalizing the {PAC} model: sample size bounds from
                 metric dimension-based uniform convergence results",
  publisher =    "IEEE",
  year =         "1989",
}

@InProceedings{haussler95,
  author =       "D. Haussler and J. Kivinen and M. K. Warmuth",
  booktitle =    "Computational Learning Theory, 2nd European
                 Conference, EuroCOLT'95",
  title =        "Sequential prediction of individual sequences under
                 general loss functions",
  publisher =    "Springer",
  pages =        "69--83",
  year =         "1995",
}

@book{hay01nnn,
    author = {Haykin, Simon},
    edition = {2},
    howpublished = {Hardcover},
    isbn = {0132733501},
    keywords = {network, neural},
    month = {July},
    posted-at = {2009-07-04 21:37:33},
    priority = {2},
    publisher = {Prentice Hall},
    title = {Neural Networks: A Comprehensive Foundation (2nd Edition)},
    url = {http://www.amazon.com/exec/obidos/redirect?tag=citeulike07-20&path=ASIN/0132733501},
    year = {1998}
}


@TechReport{He+Niyogi-2002,
  author =       "X. He and P. Niyogi",
  title =        "Locality Preserving Projections ({LPP})",
  number =       "TR-2002-09",
  institution =  "University of Chicago, Computer Science",
  year =         "2002",
}

@incollection{He+Niyogi-2004,
    author = "Xiaofei He and Partha Niyogi",
    title = "Locality Preserving Projections",
    editor = NIPS16ed,
    booktitle = NIPS16,
    publisher = "MIT Press",
    address = "Cambridge, MA",
    year = "2004",
}

@Book{Hebb49,
  author =       "D. O. Hebb",
  title =        "The Organization of Behavior",
  publisher =    "Wiley",
  address =      "New York",
  year =         "1949",
}

@InProceedings{Hecht-Nielsen87a,
  author =       "R. Hecht-Nielsen",
  editor =       "M. Caudill and C. Butler",
  booktitle =    icnn,
  title =        "Combinatorial Hypercompression",
  volume =       "2",
  publisher =    "IEEE, New York",
  address =      "San Diego 1987",
  pages =        "455--461",
  year =         "1987",
}

@Article{Hecht-Nielsen87b,
  author =       "R. Hecht-Nielsen",
  title =        "Counterpropagation Networks",
  journal =      applopt,
  volume =       "26",
  pages =        "4979--4984",
  year =         "1987",
}

@Article{Hecht-Nielsen88,
  author =       "R. Hecht-Nielsen",
  title =        "Applications of Counterpropagation Networks",
  journal =      nn,
  volume =       "1",
  pages =        "131--139",
  year =         "1988",
}

@InProceedings{Hecht-Nielsen89,
  author =       "R. Hecht-Nielsen",
  booktitle =    ijcnn,
  title =        "Theory of the Backpropagation Neural Network",
  volume =       "1",
  publisher =    "IEEE, New York",
  address =      "Washington 1989",
  pages =        "593--605",
  year =         "1989",
}

@Article{Hecht-Nielsen-1995,
  author =       "R. Hecht-Nielsen",
  title =        "Replicator neural networks for universal optimal source coding",
  journal =      "Science",
  volume =       "269",
  pages =        "1860-1863",
  year =         "1995",
}

@TechReport{Heckerman96,
  author =       "D. Heckerman",
  title =        "A tutorial on learning with {Bayesian} networks",
  number =       "TR-95-06",
  institution =  "Microsoft Research",
  address =      "ftp://ftp.research.microsoft.com/pub/Tech-Reports/Winter94-95/TR-95-06.PS",
  month =        jan,
  year =         "1996",
}

@article{HeckermanD2000,
    author = {David Heckerman and David Maxwell Chickering and Christopher Meek and Robert Rounthwaite and Carl Kadie},
    title = {Dependency networks for inference, collaborative filtering, and data visualization},
    journal = jmlr,
    year = {2000},
    volume = {1},
    pages = {49--75}
}

@article{heeger:1992a,
    author={David J. Heeger},
    title ={Normalization of Cell Responses in Cat Striate Cortex},
    journal ={Visual Neuroscience},
    volume={9},
    number={2},
    pages={181-198},
    year={1992},
}

@InProceedings{Hegde88,
  author =       "S. U. Hegde and J. L. Sweet and W. B. Levy",
  booktitle =    icnn,
  title =        "Determination of Parameters in a Hopfield/Tank
                 Computational Network",
  volume =       "2",
  publisher =    "IEEE, New York",
  address =      "San Diego 1988",
  pages =        "291--298",
  year =         "1988",
}

@article{HedgeJ2000,
	address = {Department of Anatomy and Neurobiology, Washington University School of Medicine, St. Louis, Missouri 63110, USA.},
	author = {Jay Hegd\'{e} and David C. {Van Essen} },
	citeulike-article-id = {465720},
	issn = {1529-2401},
	journal = {Journal of Neuroscience},
	keywords = {contour, v2},
	month = {March},
	number = {5},
	posted-at = {2006-01-15 12:57:15},
	priority = {0},
	title = {Selectivity for complex shapes in primate visual area V2},
	volume = {20},
	year = {2000}
}
	%url = {http://view.ncbi.nlm.nih.gov/pubmed/10684908},

@inproceedings{Heitz+al:NIPS08a,
  title = {Cascaded Classification Models: {C}ombining Models for Holistic Scene Understanding},
  author = {G. Heitz and S. Gould and A. Saxena and D. Koller},
  booktitle =    "Advances in Neural Information Processing Systems (NIPS 2008)",
  year = 2008,
}

@InProceedings{HeldM1998,
  author =       "Marcus Held and Joachim M. Buhmann",
  editor =       NIPS10ed,
  booktitle =    NIPS10,
  title =        "Unsupervised on-line learning of decision trees for
                 hierarchical data analysis",
  publisher =    "MIT Press",
  address =      "Cambridge, MA, USA",
  pages =        "514--520",
  year =         "1998",
  ISBN =         "0-262-10076-2",
  location =     "Denver, Colorado, United States",
}

@InProceedings{herlocker99,
  author =       "Jonathan L. Herlocker and Joseph A. Konstan and Al
                 Borchers and John Riedl",
  booktitle =    "SIGIR '99: Proceedings of the 22nd annual
                 international ACM SIGIR conference on Research and
                 development in information retrieval",
  title =        "An algorithmic framework for performing collaborative
                 filtering",
  publisher =    "ACM Press",
  address =      "New York, NY, USA",
  pages =        "230--237",
  year =         "1999",
  location =     "Berkeley, California, United States",
}

@InProceedings{Hermansky-genova91,
  author =       "Hynek Hermansky and Nelson Morgan and Aruna Bayya and
                 Phil Kohn",
  booktitle =    "Proc. of Eurospeech 91",
  title =        "Compensation for the Effect of the Communication
                 Channel in Auditory-like Analysis of Speech
                 ({RASTA}-{PLP})",
  address =      "Genova (Italy)",
  pages =        "1367--1371",
  year =         "1991",
}

@TechReport{Hermansky-icsi91,
  author =       "Hynek Hermansky and Nelson Morgan and Aruna Bayya and
                 Phil Kohn",
  title =        "{RASTA}-{PLP} Speech Analysis",
  number =       "TR-91-069",
  institution =  "International Computer Science Institute",
  address =      "Berkeley, CA",
  month =        dec,
  year =         "1991",
  OPTnote =      "Most speech parameter estimation techniques are easily
                 influenced by the frequency response of the
                 communication channel. We have developed a technique
                 that is more robust to such steady-state spectral
                 factors in speech. The approach is conceptually simple
                 and computationally efficient. The new method is
                 described, and experimental results are reported,
                 showing a significant advantage for the proposed
                 method.",
}

@Article{Hermansky-jasa90,
  author =       "Hynek Hermansky",
  title =        "Perceptual Linear Predictive ({PLP}) Analysis for
                 Speech",
  journal =      jasa,
  year =         "1990",
  OPTnote =      "",
  OPTpages =     "1738--1752",
}

@Book{Hernandez-Lerma+Lasserre-2003,
  author =       "On\'esimo Hern\'andez-Lerma and Jean Bernard
                 Lasserre",
  title =        "Markov Chains and Invariant Probabilities",
  publisher =    "Birkh{\"a}user Verlag",
  year =         "2003",
}

@InProceedings{Hertz86,
  author =       "J. A. Hertz and G. Grinstein and S. Solla",
  editor =       "J. S. Denker",
  booktitle =    snowbird,
  title =        "Memory Networks with Asymmetric Bonds",
  publisher =    "American Institute of Physics, New York",
  address =      "Snowbird 1986",
  pages =        "212--218",
  year =         "1986",
}

@InProceedings{Hertz87,
  author =       "J. A. Hertz and G. Grinstein and S. Solla",
  editor =       "J. L. van Hemmen and I. Morgenstern",
  booktitle =    "Heidelberg Colloquium on Glassy Dynamics",
  title =        "Irreversible Spin Glasses and Neural Networks",
  publisher =    "Springer-Verlag, Berlin",
  address =      "Heidelberg 1986",
  pages =        "538--546",
  year =         "1987",
}

@Article{Hertz89a,
  author =       "J. A. Hertz",
  title =        "A Gauge Theory in Computational Vision: {A} Model for
                 Outline Extraction",
  journal =      pscrip,
  volume =       "39",
  pages =        "161--167",
  year =         "1989",
}

@Article{Hertz89b,
  author =       "J. A. Hertz and A. Krogh and G. I. Thorbergsson",
  title =        "Phase Transitions in Simple Learning",
  journal =      jpa,
  volume =       "22",
  pages =        "2133--2150",
  year =         "1989",
}

@TechReport{Hertz90,
  author =       "J. A. Hertz",
  title =        "Statistical Dynamics of Learning",
  type =         "Preprint",
  number =       "90/34 S",
  institution =  "Nordita",
  address =      "Copenhagen, Denmark",
  year =         "1990",
}

@Article{Herz89,
  author =       "A. Herz and B. Sulzer and R. K{\"u}hn and J. L. van
                 Hemmen",
  title =        "Hebbian Learning Reconsidered: Representation of
                 Static and Dynamic Objects in Associative Neural Nets",
  journal =      biocyb,
  volume =       "60",
  pages =        "457--467",
  year =         "1989",
}

@Article{Heskes-98,
  author =       "T. Heskes",
  title =        "Bias/variance decompositions for likelihood-based
                 estimators",
  journal =      "Neural Computation",
  volume =       "10",
  pages =        "1425--1433",
  year =         "1998",
}

@Article{heskes00,
  author =       "Tom Heskes",
  title =        "On Natural Learning and Pruning in Multilayered
                 Perceptrons",
  journal =      "Neural Computation",
  volume =       "12",
  number =       "4",
  pages =        "881--901",
  year =         "2000",
}

@InProceedings{heskes98,
  author =       "Tom Heskes",
  booktitle =    "International Conference On Machine Learning",
  title =        "Solving a huge number of similar tasks: a combination
                 of multi-task learning and a hierarchical {Bayesian}
                 approach",
  year =         "1998",
}

@Article{Hestenes+Stiefel-1952,
  author =       "Magnus R. Hestenes and Eduard Stiefel",
  title =        "Methods of Conjugate Gradients for Solving Linear
                 Systems",
  journal =      "Journal of Research of National Bureau Standards",
  volume =       "49",
  number =       "6",
  pages =        "409--436",
  year =         "1952",
}

@Article{Hettich-93,
  author =       "R. Hettich and K. O. Kortanek",
  title =        "Semi-infinite programming: theory, methods, and
                 applications",
  journal =      "{SIAM} Review",
  volume =       "35",
  number =       "3",
  pages =        "380--429",
  year =         "1993",
}

@InProceedings{Hines96,
  author =       "J. W. Hines",
  booktitle =    nipc-hmit96,
  title =        "A Logarithmic Neural Network Architecture for a {PRA}
                 Approximation",
  volume =       "1",
  publisher =    ans,
  pages =        "235--241",
  year =         "1996",
}

@Article{HinOsiWel2006,
  author =       "Geoffrey E. Hinton and Simon Osindero and Max Welling
                 and {Yee Whye} Teh",
  title =        "Unsupervised Discovery of Non-Linear Structure using
                 Contrastive Backpropagation",
  journal =      "Cognitive Science",
  volume =       "30",
  number =       "4",
  year =         "2006",
}

@Article{Hinton+Ghahramani-97,
  author =       "G. E. Hinton and Z. Ghahramani",
  title =        "Generative models for discovering sparse distributed
                 representations",
  journal =      "Philosophical Transactions of the Royal Society of
                 London",
  volume =       "B",
  number =       "352",
  pages =        "1177--1190",
  year =         "1997",
}

@InCollection{Hinton-bo86,
  author =       "G. E. Hinton and T. J. Sejnowski",
  editor =       "D. E. Rumelhart and J. L. McClelland",
  booktitle =    "Parallel Distributed Processing: Explorations in the
                 Microstructure of Cognition. Volume 1: Foundations",
  title =        "Learning and relearning in {Boltzmann} machines",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  pages =        "282--317",
  year =         "1986",
}

@InProceedings{Hinton-ICA-2001,
  author =       "G. E. Hinton and M. Welling and Y. W. Teh and S.
                 Osindero",
  booktitle =    "Proceedings of 3rd International Conference on Independent Component Analysis and Blind Signal Separation (ICA'01)",
  title =        "A New View of {ICA}",
  address =      "San Diego, CA",
  pages =        "746--751",
  year =         "2001",
}

@InProceedings{Hinton-nips95,
  author =       "G. E. Hinton and M. Revow and P. Dayan",
  editor =       NIPS7ed,
  booktitle =    NIPS7,
  title =        "Recognizing handwritten digits using mixtures of
                 linear models",
  publisher =    "MIT Press, Cambridge, MA",
  pages =        "1015--1022",
  year =         "1995",
}

@TechReport{Hinton-PoE-2000,
  author =       "Geoffrey E. Hinton",
  title =        "Training Products of Experts by Minimizing Contrastive
                 Divergence",
  number =       "GCNU TR 2000-004",
  institution =  "Gatsby Unit, University College London",
  year =         "2000",
}

@Article{Hinton-Science2006,
  author =       "Geoffrey E. Hinton and Ruslan Salakhutdinov",
  title =        "Reducing the dimensionality of data with neural
                 networks",
  journal =      "Science",
  volume =       "313",
  number =       "5786",
  pages =        "504--507",
  month =        jul,
  year =         "2006",
}

%I deprecate the following one as this is a duplicate of the preceding one!
@Article{Hinton+Salakhutdinov-2006,
  author =       "Geoffrey E. Hinton and Ruslan {Salakhutdinov}",
  title =        "{Reducing the Dimensionality of Data with Neural
                 Networks}",
  journal =      "Science",
  volume =       "313",
  pages =        "504--507",
  month =        jul,
  year =         "2006",
}


@Article{Hinton06,
  author =       "Goeffrey E. Hinton and Simon Osindero and {Yee Whye} Teh",
  title =        "A fast learning algorithm for deep belief nets",
  journal =      "Neural Computation",
  volume =       "18",
  pages =        "1527--1554",
  year =         "2006",

}

@Article{Hinton06-small,
  author =       "G. E. Hinton and S. Osindero and Y.-W. Teh",
  title =        "A fast learning algorithm for deep belief nets",
  journal =      "Neural Computation",
  volume =       "18",
  pages =        "1527--1554",
  year =         "2006",

}

@InProceedings{hinton1994amd,
  author =       "Geoffrey E. Hinton and R. S. Zemel",
  title =        "Autoencoders, minimum description length, and
                 Helmholtz free energy",
  booktitle =    NIPS6,
  editor =       NIPS6ed,
  publisher =    "Morgan Kaufmann Publishers, Inc.",
  pages =        "3--10",
  year =         "1994",
}

@Article{Hinton2002,
  author =       "Geoffrey E. Hinton",
  title =        "Training products of experts by minimizing contrastive
                 divergence",
  journal =      "Neural Computation",
  volume =       "14",
  pages =        "1771--1800",
  year =         "2002",
}

@InProceedings{Hinton83,
  author =       "G. E. Hinton and T. J. Sejnowski",
  booktitle =    cvpr83,
  title =        "Optimal Perceptual Inference",
  publisher =    "IEEE, New York",
  address =      "Washington 1983",
  pages =        "448--453",
  year =         "1983",
}

@TechReport{Hinton84,
  author =       "G. E. Hinton and T. J. Sejnowski and D. H. Ackley",
  title =        "{Boltzmann} machines: Constraint satisfaction networks
                 that learn",
  number =       "TR-CMU-CS-84-119",
  institution =  "Carnegie-Mellon University, Dept. of Computer
                 Science",
  year =         "1984",
}

@InCollection{Hinton86a,
  author =       "G. E. Hinton and T. J. Sejnowski",
  editor =       "D. E. Rumelhart and J. L. McClelland",
  booktitle =    pdp,
  title =        "Learning and Relearning in {Boltzmann} Machines",
  chapter =      "7",
  volume =       "1",
  publisher =    "MIT Press",
  address =      "Cambridge",
  pages =        "282--317",
  year =         "1986",
}

@InProceedings{Hinton86b,
  author =       "Geoffrey E. Hinton",
  booktitle =    "Proceedings of the Eighth Annual Conference of the
                 Cognitive Science Society",
  title =        "Learning Distributed Representations of Concepts",
  publisher =    "Lawrence Erlbaum, Hillsdale",
  address =      "Amherst 1986",
  pages =        "1--12",
  year =         "1986",
}

@InProceedings{Hinton86b-small,
  author =       "Geoffrey E. Hinton",
  booktitle =    "Proc. 8th Annual Conf. Cog. Sc. Society",
  title =        "Learning Distributed Representations of Concepts",
  pages =        "1--12",
  year =         "1986",
}

@InProceedings{Hinton87,
  author =       "Geoffrey E. Hinton",
  editor =       "J. W. {de Bakker} and A. J. Nijman and P. C.
                 Treleaven",
  booktitle =    "Proceedings of {PARLE} Conference on Parallel
                 Architectures and Languages Europe",
  title =        "Learning translation invariant in massively parallel
                 networks",
  publisher =    "Springer-Verlag",
  address =      "Berlin",
  pages =        "1--13",
  year =         "1987",
}

@Article{Hinton89,
  author =       "Geoffrey E. Hinton",
  title =        "Deterministic {Boltzmann} Learning Performs Steepest
                 Descent in Weight Space",
  journal =      nc,
  volume =       "1",
  pages =        "143--150",
  year =         "1989",
}

@Article{Hinton89b,
  author =       "Geoffrey E. Hinton",
  title =        "Connectionist learning procedures",
  journal =      "Artificial Intelligence",
  volume =       "40",
  pages =        "185--234",
  year =         "1989",
}

@Article{Hinton90,
  author =       "G. E. Hinton and S. J. Nowlan",
  title =        "The bootstrap Widrow-Hoff rule as a cluster-formation
                 algorithm",
  journal =      nc,
  volume =       "2",
  pages =        "355--362",
  year =         "1990",
}

@InProceedings{Hinton92,
  author =       "G. E. Hinton and C. K. I. Williams and M. D. Revow",
  editor =       NIPS4ed,
  booktitle =    NIPS4,
  title =        "Adaptive elastic models for hand-printed character
                 recognition",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo CA",
  pages =        "512--519",
  year =         "1992",
}

@Misc{Hinton93,
  author =       "Geoffrey E. Hinton",
  title =        "Using the minimum description length principle to
                 discover factorial codes",
  howpublished = "Lecture given at the 1993 Connectionist Models Summer
                 School",
  year =         "1993",
}

@Article{Hinton95,
  author =       "Geoffrey E. Hinton and Peter Dayan and Brendan J. Frey and Radford M.
                 Neal",
  title =        "The wake-sleep algorithm for unsupervised neural
                 networks",
  journal =      "Science",
  volume =       "268",
  pages =        "1558--1161",
  year =         "1995",
}

@Article{hinton97modelling,
  author =       "G. E. Hinton and P. Dayan and M. Revow",
  title =        "Modelling the manifolds of images of handwritten
                 digits",
  journal =      "IEEE Transactions on Neural Networks",
  volume =       "8",
  pages =        "65--74",
  year =         "1997",
}

@InProceedings{Hinton99,
  author =       "Geoffrey E. Hinton",
  booktitle =    "Proceedings of the Ninth International Conference on
                 Artificial Neural Networks (ICANN)",
  title =        "Products of Experts",
  volume =       "1",
  publisher =    "IEE",
  address =      "Edinburgh, Scotland",
  pages =        "1--6",
  year =         "1999",
}

@InProceedings{HintonG2005,
  author =       "Geoffrey E. Hinton and Simon Osindero and Kejie Bao",
  editor =       aistats05ed,
  booktitle =    aistats05,
  title =        "Learning Causally Linked Markov Random Fields",
  publisher =    "Society for Artificial Intelligence and Statistics",
  pages =        "128--135",
  year =         "2005",
}

@InProceedings{HintonG2005-small,
  author =       "Geoffrey E. Hinton and Simon Osindero and Kejie Bao",
  booktitle =    "Proceedings of AISTATS 2005",
  title =        "Learning Causally Linked Markov Random Fields",
  year =         "2005",
}

@TechReport{HintonG2006,
  author =       "Geoffrey E. Hinton",
  title =        "To recognize shapes, first learn to generate images",
  number =       "UTML TR 2006-003",
  institution =  "University of Toronto",
  year =         "2006",
}

@InCollection{HintonG2007,
  author =       "Geoffrey E. Hinton",
  editor =       "Paul Cisek and Trevor Drew and John Kalaska",
  booktitle =    "Computational Neuroscience: Theoretical Insights into
                 Brain Function",
  title =        "To recognize shapes, first learn to generate images",
  publisher =    "Elsevier",
  year =         "2007",
}

@TechReport{Hinton-Boltzmann,
  author =       "G. E. Hinton and T. J. Sejnowski and D. H. Ackley",
  title =        "{Boltzmann} machines: Constraint satisfaction networks
                 that learn",
  number =       "TR-CMU-CS-84-119",
  institution =  "Carnegie-Mellon University, Dept. of Computer
                 Science",
  year =         "1984",
  OPTnote =      "",
}

@InProceedings{Hirayama-nips92,
  author =       "M. Yirayama and E. Vatikiotis-Bateson and M. Kawato
                 and M. I. Jordan",
  editor =       NIPS4ed,
  booktitle =    NIPS4,
  title =        "Forward Dynamics Modeling of Speech Motor Control
                 Using Physiological Data",
  publisher =    "Morgan Kaufmann, San Mateo",
  address =      "Denver, CO",
  pages =        "191--198",
  year =         "1992",
  OPTnote =      "",
}

@Article{Hjort96,
  author =       "N. L. Hjort and M. C. Jones",
  title =        "Locally parametric nonparametric density estimation",
  journal =      "Annals of Statistics",
  volume =       "24",
  number =       "4",
  pages =        "1619--1647",
  year =         "1996",
}

@InProceedings{Ho95,
  author =       "Tin Kam Ho",
  booktitle =    ICDAR95,
  title =        "Random Decision Forest",
  address =      "Montreal, Canada",
  pages =        "278--282",
  year =         "1995",
}

@Misc{Hochreiter91,
  author =       "S. Hochreiter",
  title =        "{ Untersuchungen zu dynamischen neuronalen Netzen.
                 Diploma thesis, Institut f\"{u}r Informatik, Lehrstuhl
                 Prof. Brauer, Technische Universit\"{a}t M\"{u}nchen}",
  year =         "1991",
  url =         "http://www7.informatik.tu-muenchen.de/~Ehochreit",
}

@Article{Hoerl+Kennard70,
  author =       "A. Hoerl and R. Kennard",
  title =        "Ridge regression: biased estimation for non-orthogonal
                 problems",
  journal =      "Technometrics",
  volume =       "12",
  pages =        "55--67",
  year =         "1970",
}

@inproceedings{Hoff-2008,
 author = {H.D. Hoff},
 title = {Modeling homophily and stochastic equivalence in symmetric relational data},
  editor =       NIPS20ed,
  booktitle =    NIPS20,
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  pages =        "657--664",
  year =         "2008",
}

@InProceedings{Holger-icpr96,
  author =       "H. Schwenk and M. Milgram",
  booktitle =    icpr,
  title =        "Constraint Tangent Distance For On-Line Character
                 Recognition",
  pages =        "520--524",
  year =         "1996",
}

@InProceedings{Holger-nips96,
  author =       "H. Schwenk and M. Milgram",
  editor =       NIPS7ed,
  booktitle =    NIPS7,
  title =        "Transformation invariant autoassociation with
                 application to handwritten character recognition",
  publisher =    "MIT Press",
  pages =        "991--998",
  year =         "1995",
}

@Book{Holland75,
  author =       "J. H. Holland",
  key =          "Holland",
  title =        "Adaptation in Natural and Artificial Systems",
  publisher =    "University of Michigan Press",
  year =         "1975",
}

@Article{Holley+Karplus89,
  author =       "L. H. Holley and M. Karplus",
  title =        "Protein secondary structure prediction with a neural
                 network",
  journal =      PNAS,
  volume =       "86",
  pages =        "152--156",
  year =         "1989",
}

@InCollection{HolTre93,
  author =       "J. Hollatz and V. Tresp",
  editor =       "I. Aleksander and J. Taylor",
  booktitle =    "Artificial Neural Networks II",
  title =        "A rule-based network architecture",
  publisher =    "Elsevier",
  address =      "Amsterdam",
  year =         "1992",
}

@TechReport{HolTreAhm92,
  author =       "J. Hollatz and V. Tresp and S. Ahmad",
  title =        "Network structuring and training using rule-based
                 knowledge",
  type =         "Technical Report",
  institution =  "Siemens AG",
  address =      "M{\"u}nchen, Germany",
  year =         "1992",
}

@InProceedings{HolubA2005,
  author =       "Alex Holub and Pietro Perona",
  booktitle =    cvpr05,
  title =        "A Discriminative Framework for Modelling Object
                 Classes",
  publisher =    "IEEE Computer Society",
  address =      "Washington, DC, USA",
  pages =        "664--671",
  year =         "2005",
  ISBN =         "0-7695-2372-2",
  doi =          "http://dx.doi.org/10.1109/CVPR.2005.25",
}

@InCollection{HonglakL2009,
  author =       "Honglak Lee and Roger Grosse and Rajesh Ranganath and Andrew Y. Ng",
  booktitle =    ICML09,
  editor =       ICML09ed,
  publisher =    ICML09publ,
  title =        "Convolutional deep belief networks for scalable unsupervised 
		 		 learning of  hierarchical representations",
  address =      "Montreal (Qc), Canada",
  year =         "2009",
}

@InCollection{HonglakL2008,
  author =       "Honglak Lee and Chaitanya Ekanadham and Andrew Ng",
  editor =       NIPS20ed,
  booktitle =    NIPS20,
  title =        "Sparse deep belief net model for visual area {V2}",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  pages =        "873--880",
  year =         "2008",
}

@incollection{HonglakLNIPS2009,
 title = {Unsupervised feature learning for audio classification using convolutional deep belief networks},
 author = {Honglak Lee and Peter Pham and Yan Largman and Andrew Ng},
 booktitle = NIPS22,
 editor = NIPS22ed,
 pages = {1096--1104},
 year = {2009}
}

@Book{Hopcroft79,
  author =       "J. E. Hopcroft and J. D. Ullman",
  title =        "Introduction to Automata Theory, Languages, and
                 Computation",
  publisher =    "Addison-Wesley Publishing Company, Inc.",
  address =      "Reading, MA",
  year =         "1979",
}

@Article{Hopfield82,
  author =       "John J. Hopfield",
  title =        "Neural Networks and Physical Systems with Emergent
                 Collective Computational Abilities",
  journal =      PNAS,
  volume =       "79",
  year =         "1982",
}

@Article{Hopfield83,
  author =       "J. J. Hopfield and D. I. Feinstein and R. G. Palmer",
  title =        "``Unlearning'' Has a Stabilizing Effect in Collective
                 Memories",
  journal =      nature,
  volume =       "304",
  pages =        "158--159",
  year =         "1983",
}

@Article{Hopfield84,
  author =       "J. J. Hopfield",
  title =        "Neurons with Graded Responses Have Collective
                 Computational Properties Like Those of Two-State
                 Neurons",
  journal =      PNAS,
  volume =       "81",
  year =         "1984",
}

@Article{Hopfield85,
  author =       "J. J. Hopfield and D. W. Tank",
  title =        "``Neural'' Computation of Decisions in Optimization
                 Problems",
  journal =      biocyb,
  volume =       "52",
  pages =        "141--152",
  year =         "1985",
}

@Article{Hopfield86,
  author =       "J. J. Hopfield and D. W. Tank",
  title =        "Computing with Neural Circuits: {A} Model",
  journal =      science,
  volume =       "233",
  pages =        "625--633",
  year =         "1986",
}

@Article{Hopfield87,
  author =       "J. J. Hopfield",
  title =        "Learning Algorithms and Probability Distributions in
                 Feed-Forward and Feed-Back Networks",
  journal =      PNAS,
  volume =       "84",
  pages =        "8429--8433",
  year =         "1987",
}

@InCollection{Hopfield89,
  author =       "J. J. Hopfield and D. W. Tank",
  editor =       "J. H. Byrne and W. O. Berry",
  booktitle =    "Neural Models of Plasticity",
  title =        "Neural Architecture and Biophysics for Sequence
                 Recognition",
  publisher =    "Academic Press",
  address =      "San Diego",
  pages =        "363--377",
  year =         "1989",
}

@Article{Hornik89,
  author =       "Kurt Hornik and Maxwell Stinchcombe and Halbert White",
  title =        "Multilayer Feedforward Networks Are Universal
                 Approximators",
  journal =      nn,
  volume =       "2",
  pages =        "359--366",
  year =         "1989",
}

@Article{Hotelling1933,
  author =       "H. Hotelling",
  title =        "Analysis of a Complex of Statistical Variables into
                 Principal Components",
  journal =      "Journal of Educational Psychology",
  volume =       "24",
  pages =        "417--441, 498--520",
  year =         "1933",
}

@article{Hotelling-1936,
    author = {H. Hotelling},
    title = {Relations between two sets of variates},
    journal = {Biometrika},
    volume = 28,
    pages = {321--377},
    year = 1936,
}

@TechReport{Houde91,
  author =       "J. F. Houde",
  title =        "Recursive estimation of articulatory control",
  type =         "Computational Cognitive Science",
  number =       "TR",
  institution =  "MIT",
  address =      "Cambridge, MA",
  year =         "1991",
}

@InProceedings{Howlett+Lawrence-1995a,
  author =       "R. J. Howlett and D. H. Lawrence",
  booktitle =    "World Transputer Congress~'95",
  title =        "The Class-Distributed Neural Network",
  address =      "Harrogate, UK",
  year =         "1995",
}

@InProceedings{Howlett+Lawrence-1995b,
  author =       "R. J. Howlett and D. H. Lawrence",
  booktitle =    "Proceedings of the IEEE International Conference on
                 Neural Networks",
  title =        "A Multi-Computer Neural Network Applied to
                 Machine-Vision",
  volume =       "2",
  address =      "Perth, Australia",
  pages =        "1150--1153",
  year =         "1995",
}

@InProceedings{Hsu88,
  author =       "K. Hsu and D. Brady and D. Psaltis",
  editor =       nips87ed,
  booktitle =    nips87,
  title =        "Experimental Demonstration of Optical Neural
                 Computers",
  publisher =    "American Institute of Physics, New York",
  address =      "Denver, CO",
  pages =        "377--386",
  year =         "1988",
}

@Article{huang04dynamic,
  author =       "X. Huang and F. Peng and A. An and D. Schuurmans",
  title =        "Dynamic web log session identification with
                 statistical language models",
  journal =      "Journal of the American Society for Information
                 Science and Technology",
  volume =       "55",
  number =       "14",
  pages =        "1290--1303",
  year =         "2004",
}

@Book{Huang87,
  author =       "K. Huang",
  title =        "Statistical Mechanics",
  publisher =    "Wiley",
  address =      "New York",
  year =         "1987",
}

@InProceedings{Huang88,
  author =       "W. Y. Huang and R. P. Lippmann",
  editor =       nips87ed,
  booktitle =    nips87,
  title =        "Neural Net and Traditional Classifiers",
  publisher =    "American Institute of Physics, New York",
  address =      "Denver, CO",
  pages =        "387--396",
  year =         "1988",
}

@TechReport{Huang89,
  author =       "X. D. Huang and H. W. Hon and K. F. Lee",
  title =        "Multiple Codebook Semi-Continuous Hidden {Markov}
                 Models for Speaker-Independent Continuous Speech
                 Recognition",
  number =       "CMU-CS-89-136",
  institution =  "School of Computer Science Carnegie-Mellon
                 University",
  address =      "Pittburgh, Pensylvania",
  month =        apr,
  year =         "1989",
}

@InProceedings{Huang90,
  author =       "Xuedong Huang and Kai-Fu Lee and Hsiao-Wuen Hon",
  booktitle =    icassp,
  title =        "On Semi-Continuous Hidden {Markov} Modeling",
  pages =        "689--692",
  year =         "1990",
}

@article{Hubel+Wiesel-1959,
    title = {Receptive Fields of Single Neurons in the Cat's Striate Cortex},
    author = {David H. Hubel and Torsten N. Wiesel},
    journal = {Journal of Physiology},
    pages = {574--591},
    volume = {148},
    year = {1959},
    biburl = {http://www.bibsonomy.org/bibtex/202c5cf1ee910eadba5efa77b3cd043f6/idsia},
}

@Article{Hubel62,
  author =       "D. H. Hubel and T. N. Wiesel",
  title =        "Receptive Fields, Binocular Interaction, and Functional Architecture in the Cat's Visual Cortex",
  journal =      jphysiol,
  volume =       "160",
  pages =        "106--154",
  year =         "1962",
}

@article{Hubel+Wiesel-1968,
 author = {D.H. Hubel and T.N. Wiesel},
 title = {Receptive fields and functional architecture of monkey striate cortex},
 journal = jphysiol,
 volume = 195,
 pages = {215--243},
 year = 1968,
}

@article{Huber-1985,
    author = {Huber, Peter  J. },
    comment = {Projection Pursuit},
    journal = {The Annals of Statistics},
    number = {2},
    pages = {435--475},
    title = {Projection Pursuit},
    url = {http://www.jstor.org/stable/2241175},
    volume = {13},
    year = {1985}
}

@InProceedings{Hueter88,
  author =       "G. J. Hueter",
  booktitle =    icnn,
  title =        "Solution of the Travelling Salesman Problem with an
                 Adaptive Ring",
  volume =       "1",
  publisher =    "IEEE, New York",
  address =      "San Diego 1988",
  pages =        "85--92",
  year =         "1988",
}

@InProceedings{Hush88,
  author =       "D. R. Hush and J. M. Salas",
  booktitle =    icnn,
  title =        "Improving the Learning Rate of Back-Propagation with
                 the Gradient Reuse Algorithm",
  volume =       "1",
  publisher =    "IEEE, New York",
  address =      "San Diego 1988",
  pages =        "441--447",
  year =         "1988",
}

@Article{Hush92,
  author =       "D. R. Hush and B. Horne and J. M. Solas",
  title =        "Error Surfaces for Multilayer Perceptrons",
  journal =      ieeesmc,
  volume =       "22",
  number =       "5",
  pages =        "1152--1161",
  month =        sep,
  year =         "1992",
}

@InCollection{Hutchins+Hazlehurst-02,
  author =       "Edwin Hutchins and Brian Hazlehurst",
  editor =       "A. Cangelosi and D. Parisi",
  booktitle =    "Simulating the Evolution of Language",
  title =        "Auto-organization and Emergence of Shared Language
                 Structure",
  publisher =    "London: Springer-Verlag",
  pages =        "279--305",
  year =         "2002",
}

@InCollection{Hutchins+Hazlehurst-95,
  author =       "Edwin Hutchins and Brian Hazlehurst",
  editor =       "N. Gilbert and R. Conte",
  booktitle =    "Artificial Societies: the computer simulation of
                 social life",
  title =        "How to invent a lexicon: the development of shared
                 symbols in interaction",
  publisher =    "London: UCL Press",
  pages =        "157--189",
  year =         "1995",
}

@Article{Hutchinson94,
  author =       "J. M. Hutchinson and A. W. Lo and T. Poggio",
  title =        "{A Nonparametric Approach to Pricing and Hedging
                 Derivative Securities Via Learning Networks}",
  journal =      "Journal of Finance",
  volume =       "49",
  number =       "3",
  pages =        "851--889",
  year =         "1994",
}

@Book{Hutter2005,
  author =       "Marcus Hutter",
  title =        "Universal Artificial Intelligence: Sequential
                 Decisions based on Algorithmic Probability",
  publisher =    "Springer, Berlin",
  year =         "2005",
}

@Article{Hwang+al-1992,
  author =       "Frank K. Hwang and Dana Richards and Pawel Winter",
  title =        "The {Steiner} Tree Problem",
  journal =      "Annals of Discrete Mathematics",
  volume =       "53",
  publisher =    "Elsevier",
  address =      "Amsterdam",
  year =         "1992",
}

@article{Hyvarinen-1999,
    author = {Hyv\"arinen, A. },
    journal = {Neural Computing Surveys},
    keywords = {ica, separation, waspaa07bib},
    pages = {94--128},
    title = {Survey on Independent Component Analysis},
    url = {http://citeseer.ist.psu.edu/223687.html},
    volume = {2},
    year = {1999}
}

@book{Hyvarinen-2001,
    author = {Hyv{\"{a}}rinen, Aapo   and Karhunen, Juha   and Oja, Erkki  },
    howpublished = {Hardcover},
    isbn = {047140540X},
    month = {May},
    posted-at = {2008-07-02 02:13:00},
    priority = {2},
    publisher = {Wiley-Interscience},
    title = {Independent Component Analysis},
    url = {http://www.amazon.ca/exec/obidos/redirect?tag=citeulike09-20\&amp;path=ASIN/047140540X},
    year = {2001}
}

@Article{Hyvarinen+al-01,
  author =       "Aapo Hyv{\"{a}}rinen and Patrik O. Hoyer and Mika
                 Inki",
  title =        "Topographic Independent Component Analysis",
  journal =      "Neural Computation",
  volume =       "13",
  number =       "7",
  pages =        "1527--1558",
  year =         "2001",
}

@Article{HyvarinenA2001,
  author =       "Aapo Hyv{\"{a}}rinen and Patrik O. Hoyer and Mika O.
                 Inki",
  title =        "Topographic Independent Component Analysis",
  journal =      "Neural Computation",
  volume =       "13",
  number =       "7",
  publisher =    "MIT Press",
  address =      "Cambridge, MA, USA",
  pages =        "1527--1558",
  year =         "2001",
  ISSN =         "0899-7667",
}

@Article{HyvarinenA2001-small,
  author =       "Aapo Hyv{\"{a}}rinen and Patrick O. Hoyer and Mika O. Inki",
  title =        "Topographic Independent Component Analysis",
  journal =      "Neural Computation",
  volume =       "13",
  number =       "7",
  pages =        "1527--1558",
  year =         "2001",
}

@Article{Hyvarinen-2005,
  author =       "Aapo Hyv{\"{a}}rinen ",
  title =        "Estimation of non-normalized statistical models using score matching",
  journal =      jmlr,
  volume =       "6",
  pages =        "695--709",
  year =         "2005",
}

@Article{Hyvarinen-2007,
  author =       "Aapo Hyv{\"{a}}rinen ",
  title =        "Some extensions of score matching",
  journal =      "Computational Statistics and Data Analysis",
  volume =       "51",
  pages =        "2499--2512",
  year =         "2007",
}

@Article{Hyvarinen-2007b,
  author =       "Aapo Hyv{\"{a}}rinen ",
  title =        "Connections between score matching, contrastive divergence, and pseudolikelihood
                  for continuous-valued variables",
  journal =      "{IEEE} Transactions on Neural Networks",
  volume =       "18",
  pages =        "1529--1531",
  year =         "2007",
}

@article{HyvarinenA2008,
 author = {Hyv\"{a}rinen,, Aapo},
 title = {Optimal approximation of signal priors},
 journal = {Neural Computation},
 volume = {20},
 number = {12},
 year = {2008},
 pages = {3087--3110},
 publisher = {MIT Press},
 address = {Cambridge, MA, USA},
 }

@article{kording2004,
author={Konrad P. K{\"o}rding and Christoph Kayser and Wolfgang
Einh{\"a}user and Peter K{\"o}nig},
title = "How Are Complex Cell Properties Adapted to the Statistics of
Natural Stimuli?",
year = 2004,
journal = "Journal of Neurophysiology",
volume = 91,
pages = {206--212},
url="jn.physiology.org/cgi/reprint/91/1/206.pdf"
}

@inproceedings{Koster-Hyvarinen-2007,
  author = {Urs K{\"{o}}ster and Aapo Hyv{\"{a}}rinen},
 title = {A two-layer {ICA}-like model estimated by {S}core {M}atching},
 booktitle = {Int. Conf. Artificial Neural Networks (ICANN'2007)},
 pages = {798--807},
 year = 2007,
}

@article{Iba-2001,
  author =       "Yukito Iba",
  title =        "Extended Ensemble Monte Carlo",
  journal =      "International Journal of Modern Physics",
  volume =       "C12",
  pages =        "623--656",
  year =         "2001",
}

@InProceedings{icml2009_093,
  author =    {Hossein Mobahi and Ronan Collobert and Jason Weston},
  title =     {Deep Learning from Temporal Coherence in Video},
  booktitle = {Proceedings of the 26th International Conference on Machine Learning},
  pages =     {737--744},
  year =      2009,
  editor =    {L\'{e}on Bottou and Michael Littman},
  address =   {Montreal},
  month =     {June},
  publisher = {Omnipress}
}

@InProceedings{icann:Holger+Yoshua:1997,
  author =       "Holger Schwenk and Yoshua Bengio",
  booktitle =    "International Conference on Artificial Neural
                 Networks",
  title =        "{AdaBoosting} Neural Networks: Application to on-line
                 Character Recognition",
  publisher =    "Springer Verlag",
  pages =        "967--972",
  year =         "1997",
}

@Article{Ide1998,
  author =       "Nancy Ide and Jean Veronis",
  title =        "Introduction to the Special Issue on Word Sense
                 Disambiguation: The State of the Art",
  journal =      "Computational Linguistics",
  volume =       "24",
  number =       "1",
  pages =        "1--40",
  year =         "1998",
}

@Article{IEEE-KDE:Frasconi95,
  author =       "P. Frasconi and M. Gori and M. Maggini and G. Soda",
  title =        "Unified Integration of Explicit Rules and Learning by
                 Example in Recurrent Networks",
  journal =      "IEEE Transactions on Knowledge and Data Engineering",
  volume =       "7",
  number =       "2",
  pages =        "340--346",
  year =         "1995",
  OPTmonth =     "",
}

@Article{igel05,
  author =       "C. Igel and M. Toussaint and W. Weishui",
  title =        "Rprop using the natural gradient compared to
                 Levenberg-Marquardt optimization",
  journal =      "Trends and Applications in Constructive Approximation.
                 International Series of Numerical Mathematics.",
  volume =       "151",
  publisher =    "Birkhäuser Verlag",
  pages =        "259--272",
  year =         "2005",
}

@Article{intrator,
  author =       "Nathan Intrator and Shimon Edelman",
  title =        "How to make a low-dimensional representation suitable
                 for diverse tasks",
  journal =      "Connection Science, Special issue on Transfer in
                 Neural Networks",
  volume =       "8",
  pages =        "205--224",
  year =         "1996",
}

@Article{intrator96,
  author =       "Nathan Intrator and Shimon Edelman",
  title =        "How to make a low-dimensional representation suitable
                 for diverse tasks",
  journal =      "Connection Science, Special issue on Transfer in
                 Neural Networks",
  volume =       "8",
  pages =        "205--224",
  year =         "1996",
}

@Article{Inzenman-91,
  author =       "A. J. Inzenman",
  title =        "Recent developments in nonparametric density
                 estimation",
  journal =      "Journal of the American Statistical Association",
  volume =       "86",
  number =       "413",
  pages =        "205--224",
  year =         "1991",
}

@TechReport{IOHMM-TR,
  author =       "Y. Bengio and P. Frasconi",
  title =        "An {EM} Approach to Learning Sequential Behavior",
  number =       "RT-DSI-11/94",
  institution =  "University of Florence",
  year =         "1994",
}

@InProceedings{Irie88,
  author =       "B. Irie and S. Miyake",
  booktitle =    "IEEE Second International Conference on Neural
                 Networks, San Diego",
  title =        "Capabilities of three layer perceptrons",
  year =         "1988",
}

@Article{Irino+Kawahara90,
  author =       "T. Irino and H. Kawahara",
  title =        "A Method for Designing Neural Networks Using Nonlinear
                 Multivariate Analysis: Application to
                 Speaker-Independent Vowel Recognition",
  journal =      "Neural Computation",
  volume =       "2",
  type =         "Letter",
  number =       "3",
  pages =        "386--397",
  year =         "1990",
}

@article{ItoM2004,
	author = {Ito, Minami   and Komatsu, Hidehiko  },
	citeulike-article-id = {451606},
	doi = {http://dx.doi.org/10.1523/JNEUROSCI.4364},
	journal = {Journal of Neuroscience},
	keywords = {cnv, v2},
	month = {March},
	number = {13},
	pages = {3313--3324},
	posted-at = {2007-03-30 11:19:11},
	priority = {0},
	title = {Representation of Angles Embedded within Contour Stimuli in Area V2 of Macaque Monkeys},
	volume = {24},
	year = {2004}
}
	%url = {http://dx.doi.org/10.1523/JNEUROSCI.4364},

@Article{Jaakkola+Jordan99,
  author =       "T. Jaakkola and M. I. Jordan",
  title =        "Varitional methods and the {QMR}-{DT} database",
  journal =      "Journal of Artificial Intelligence",
  volume =       "10",
  pages =        "291--322",
  year =         "1999",
}

%I deprecated because the year in the tag is wrong
@InProceedings{Jaakkola98,
  author =       "Tommi S. Jaakkola and David Haussler",
  editor =       NIPS11ed,
  booktitle =    NIPS11,
  title =        "Exploiting generative models in discriminative
                 classifiers",
  publisher =    "MIT Press, Cambridge, MA",
  pages =        "487--493",
  year =         "1999",
}

@InProceedings{Jaakkola99,
  author =       "Tommi S. Jaakkola and David Haussler",
  editor =       NIPS11ed,
  booktitle =    NIPS11,
  title =        "Exploiting generative models in discriminative
                 classifiers",
  publisher =    "MIT Press, Cambridge, MA",
  pages =        "487--493",
  year =         "1999",
}

@Misc{jaakkola98exploiting,
  author =       "T. Jaakkola and D. Haussler",
  title =        "Exploiting generative models in discriminative
                 classifiers",
  year =         "1998",
  note =         "Preprint, Dept.of Computer Science, Univ. of California. 
                  A shorter version is in Advances in Neural
                  Information Processing Systems 11",
  howpublished = "Available from http://www.cse.ucsc.edu/~haussler/pubs.html",
}

@Article{Jacobs-nc91,
  author =       "R. A. Jacobs and M. I. Jordan and S. J. Nowlan and G.
                 E. Hinton",
  title =        "Adaptive mixture of local experts",
  journal =      "Neural Computation",
  volume =       "3",
  pages =        "79--87",
  year =         "1991",
}

@InCollection{Jacobs-nips91,
  author =       "R. A. Jacobs and M. I. Jordan",
  editor =       NIPS3ed,
  booktitle =    NIPS3,
  title =        "A competitive modular connectionist architecture",
  publisher =    "Morgan Kaufman Publishers",
  address =      "San Mateo, CA",
  year =         "1991",
}

@TechReport{Jacobs-tr90,
  author =       "R. A. Jacobs and M. I. Jordan and A. G. Barto",
  title =        "Task Decomposition Through Competition in a Modular
                 Connectionist Architecture: The {What} and {Where}
                 Vision Tasks",
  number =       "COINS 90-27",
  institution =  "MIT",
  address =      "Cambridge MA",
  year =         "1990",
}

@Article{Jacobs88,
  author =       "R. A. Jacobs",
  title =        "Increased Rates of Convergence Through Learning Rate
                 Adaptation",
  journal =      nn,
  volume =       "1",
  pages =        "295--307",
  year =         "1988",
}

@Article{Jacobs91a,
  author =       "Robert A. Jacobs and Michael I. Jordan and Steven J.
                 Nowlan and Geoffrey E. Hinton",
  title =        "Adaptive Mixtures of Local Experts",
  journal =      nc,
  volume =       "3",
  pages =        "79--87",
  year =         "1991",
}

@Article{Jacobs91b,
  author =       "R. A. Jacobs and M. I. Jordan and A. G. Barto",
  title =        "Task Decomposition Through Competition in a Modular
                 Connectionist Architecture: The What and Where Vision
                 Task",
  journal =      "Cognitive Science",
  volume =       "15",
  pages =        "219--250",
  year =         "1991",
}

@Article{Jacobs94,
  author =       "R. A. Jacobs and S. M. Kosslyn",
  title =        "Encoding Shape and Spatial Relations: The Role of
                 Receptive Fields in Coordinating Complementary
                 Representations",
  journal =      "Cognitive Science",
  year =         "1994",
}

@article{Jaeger-2007,
    author = {Herbert Jaeger},
    title = {Echo state network},
    year = 2007,
    journal = {Scholarpedia},
    volume = 2,
    number = 9,
    pages = 2330,
}

@Article{Japkowicz2000,
  author =       "Nathalie Japkowicz and Stephen J. Hanson and Mark A.
                 Gluck",
  title =        "Nonlinear Autoassociation is not Equivalent to {PCA}",
  journal =      "Neural Computation",
  volume =       "12",
  number =       "3",
  pages =        "531--545",
  year =         "2000",
}

@Article{Japkowicz2002,
  author =       "N. Japkowicz and S. Stephen",
  title =        "The Class Imbalance Problem: {A} Systematic Study",
  journal =      "Intelligent Data Analysis",
  volume =       "6",
  number =       "5",
  year =         "2002",
}

@inproceedings {Jarrett-ICCV2009,
 original = "orig/jarrett-iccv-09.pdf",
 title = "What is the Best Multi-Stage Architecture for Object Recognition?",
 author = "Jarrett, Kevin and Kavukcuoglu, Koray and Ranzato, {Marc'Aurelio} and {LeCun}, Yann",
 booktitle = "Proc. International Conference on Computer Vision (ICCV'09)",
 publisher = "IEEE",
 year = "2009"
}

@TechReport{Jauvin+Bengio-TR2003,
  author =       "Christian Jauvin and Yoshua Bengio",
  title =        "A Sense-Smoothed Bigram Language Model",
  number =       "1233",
  institution =  "Dept. IRO, Universit\'e de Montr\'eal",
  year =         "2003",
}

@Book{Jaynes03,
  author =       "E. T. Jaynes",
  title =        "{Probability} {Theory}: {The} {Logic} of {Science}",
  publisher =    "Cambridge University Press",
  year =         "2003",
}

@InCollection{Jaynes83,
  author =       "E. T. Jaynes",
  booktitle =    "Papers on Probability, Statistics and Statistical
                 Physics",
  title =        "{Bayesian} intervals versus confidence intervals",
  publisher =    "Kluwer",
  year =         "1983",
  editors =      "R. D. Rosencrantz",
}

@Article{JCB:Baldi95t,
  author =       "Y. Chauvin and P. Baldi",
  title =        "Hidden Markov models of the {G}-Protein-Coupled
                 receptor family",
  journal =      "Journal of Computational Biology",
  year =         "1995",
}

@InProceedings{jebara03,
  author =       "Tony Jebara and Risi Kondor",
  booktitle =    colt03,
  title =        "{Bhattacharyya and Expected Likelihood Kernels}",
  year =         "2003",
}

@InProceedings{Jebara03Convex,
  author =       "T. Jebara",
  editor =       "",
  booktitle =    "Proceedings of AISTATS 2003",
  title =        "Convex Invariance Learning",
  publisher =    "",
  pages =        "",
  year =         "2003",
}

@InProceedings{jebara04,
  author =       "Tony Jebara",
  booktitle =    ICML04,
  editor =       ICML04ed,
  publisher =    ICML04publ,
  title =        "{Multi-task feature and kernel selection for SVMs}",
  address =      "New York, NY, USA",
  year =         "2004",
  location =     "Banff, Alberta, Canada",
}

@Book{JebaraT2003,
  author =       "Tony Jebara",
  title =        "Machine Learning: Discriminative and Generative (The
                 Kluwer International Series in Engineering and Computer
                 Science)",
  howpublished = "Hardcover",
  publisher =    "Springer",
  month =        dec,
  year =         "2003",
  citeulike-article-id = "134203",
  comment =      "- maximum entropy discriminative as unification of
                 discriminative and generative approaches",
  keywords =     "book, generative-discriminative, svm",
  priority =     "2",
}
  %ISBN =         "1402076479",
  %URL =          "http://www.amazon.ca/exec/obidos/redirect?tag=citeulike09-20&path=ASIN/1402076479",

@InCollection{Jelinek+Mercer80,
  author =       "F. Jelinek and R. L. Mercer",
  editor =       "E. S. Gelsema and L. N. Kanal",
  booktitle =    "Pattern Recognition in Practice",
  title =        "Interpolated estimation of Markov source parameters
                 from sparse data",
  publisher =    "North-Holland, Amsterdam",
  year =         "1980",
}

@InProceedings{Jelinek-Chelba-99,
  author =       "Frederick Jelinek and Ciprian Chelba",
  booktitle =    "European Conference on Speech Communication and
                 Technology",
  title =        "Putting language into language modeling",
  volume =       "1",
  address =      "Budapest",
  pages =        "KN1--KN5",
  year =         "1999",
}

@Article{Jelinek76,
  author =       "F. Jelinek",
  title =        "Continuous speech recognition by statistical methods",
  journal =      "Proceedings of the IEEE",
  volume =       "64",
  pages =        "532--556",
  year =         "1976",
}

@InCollection{Jelinek80,
  author =       "F. Jelinek and R. L. Mercer",
  editor =       "E. S. Gelsema and L. N. Kanal",
  booktitle =    "Pattern Recognition in Practice",
  title =        "Interpolated Estimation of {Markov} Source Parameters
                 from Sparse Data",
  publisher =    "North-Holland",
  address =      "Amsterdam",
  year =         "1980",
  copy =         yes,
}

@Book{Jelinek98,
  author =       "F. Jelinek",
  title =        "Statistical Methods for Speech Recognition",
  publisher =    "MIT Press",
  address =      "Cambridge, Massachussetts",
  year =         "1998",
}

@InProceedings{JensenRiis2000,
  author =       "K. J. Jensen and S. Riis",
  booktitle =    "International Conference on Spoken Language
                 Processing",
  title =        "Self-organizing letter code-book for text-to-phoneme
                 neural network model",
  volume =       "3",
  pages =        "318--321",
  year =         "2000",
}

@InProceedings{Jeong96,
  author =       "E. Jeong and K. Furuta and S. Kondo",
  booktitle =    nipc-hmit96,
  title =        "Identification of Transient in Nuclear Power Plant
                 using Adaptive Template Matching with Neural Network",
  volume =       "1",
  publisher =    ans,
  pages =        "243--250",
  year =         "1996",
}

@InCollection{joachims99largescaleSVM,
  author =       "T. Joachims",
  editor =       "B. {Sch\"olkopf} and C. J. C. Burges and A. J. Smola",
  booktitle =    "Advances in Kernel Methods --- Support Vector
                 Learning",
  title =        "Making large-Scale {SVM} Learning Practical",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  year =         "1999",
}

@InProceedings{joachims99transductive,
  author =       "Thorsten Joachims",
  booktitle =    ICML99,
  editor =       ICML99ed,
  publisher =    ICML99publ,
  title =        "Transductive Inference for Text Classification using
                 Support Vector Machines",
  address =      "Bled, SL",
  pages =        "200--209",
  year =         "1999",
}
  %URL =          "citeseer.ist.psu.edu/joachims99transductive.html",

@TechReport{Johansson90,
  author =       "E. M. Johansson and F. U. Dowla and D. M. Goodman",
  title =        "Backpropagation learning for multi-layer feed-forward
                 neural networks using the conjugate gradient method",
  number =       "UCRL-JC-104850",
  institution =  "Lawrence Livermore National Laboratory",
  month =        sep,
  year =         "1990",
}

@inproceedings{John+al-1994,
    author = {John, George  H.  and Kohavi, Ron  and Pfleger, Karl},
    booktitle = {Proceedings of the Eleventh International Conference on Machine Learning},
    pages = {121--129},
    title = {Irrelevant Features and the Subset Selection Problem},
    url = {http://citeseer.ist.psu.edu/john94irrelevant.html},
    year = {1994},
    publisher = {Morgan Kaufmann},
}

@Article{Johnson89,
  author =       "D. S. Johnson and C. R. Aragon and L. A. McGeoch and
                 C. Schevon",
  title =        "Optimization by Simulated Annealing: An Experimental
                 Evaluation; Part {I}, Graph Partitioning",
  journal =      opres,
  volume =       "37",
  pages =        "865--891",
  year =         "1989",
}

@InProceedings{Joines92QQ23,
  author =       "J. A. Joines and M. W. White",
  booktitle =    "IJCNN",
  title =        "Improved Generalization Using Robust Cost Functions",
  address =      "Baltimore, Maryland",
  pages =        "911--918",
  month =        jun,
  year =         "1992",
  ref =          "QQ23",
}

@Book{Jolliffe86,
  author =       "Ian T. Jolliffe",
  title =        "Principal Component Analysis",
  publisher =    "Springer-Verlag",
  address =      "New York",
  year =         "1986",
}

@book{Jolliffe-2002,
    author = {Ian T. Jolliffe},
    citeulike-article-id = {1154147},
    howpublished = {Hardcover},
    isbn = {0387954422},
    month = {October},
    posted-at = {2007-03-11 15:04:57},
    priority = {2},
    publisher = {Springer},
    title = {Principal Component Analysis},
    url = {http://www.amazon.ca/exec/obidos/redirect?tag=citeulike09-20\&amp;path=ASIN/0387954422},
    year = {2002}
}

@Article{Jordan+Jacobs94,
  author =       "M. I. Jordan and R. A. Jacobs",
  title =        "Hierarchical mixtures of experts and the {E}{M}
                 algorithm",
  journal =      nc,
  volume =       "6",
  pages =        "181--214",
  year =         "1994",
}

@TechReport{Jordan+Xu93,
  author =       "Michael I. Jordan and L. Xu",
  title =        "Convergence results for the {EM} approach to mixtures
                 of experts architecture",
  number =       "9303",
  institution =  "MIT Computational Cognitive Science",
  month =        sep,
  year =         "1993",
}

@Article{Jordan-cs92,
  author =       "M. I. Jordan and D. E. Rumelhart",
  title =        "Forward models: Supervised learning with a distal
                 teacher",
  journal =      "Cognitive Science",
  volume =       "16",
  pages =        "307--354",
  year =         "1992",
}

@InProceedings{Jordan-HMDT97,
  author =       "M. Jordan and Z. Ghahramani and L. Saul",
  editor =       NIPS9ed,
  booktitle =    NIPS9,
  title =        "Hidden Markov decision trees",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  pages =        "",
  year =         "1997",
}

@InProceedings{Jordan-nips92,
  author =       "M. I. Jordan and R. A. Jacobs",
  editor =       NIPS4ed,
  booktitle =    NIPS4,
  title =        "Hierarchies of adaptive experts",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo, CA",
  pages =        "985--992",
  year =         "1992",
}

@TechReport{Jordan-tr86,
  author =       "M. I. Jordan",
  title =        "Serial Order: a Parallel Distributed Processing
                 Approach",
  number =       "8604",
  institution =  "ICS (Institute for Cognitive Science, University of
                 California)",
  year =         "1986",
}

@InProceedings{Jordan86,
  author =       "M. I. Jordan",
  booktitle =    "Proceedings of the Eighth Annual Conference of the
                 Cognitive Science Society",
  title =        "Attractor Dynamics and Parallelism in a Connectionist
                 Sequential Machine",
  publisher =    "Lawrence Erlbaum, Hillsdale",
  address =      "Amherst 1986",
  pages =        "531--546",
  year =         "1986",
}

@TechReport{Jordan88,
  author =       "M. I. Jordan",
  title =        "Supervised Learning and Systems with Excess Degrees of
                 Freedom",
  number =       "COINS Technical Report 88-27",
  institution =  "MIT",
  address =      "Cambridge MA",
  year =         "1988",
}

@InCollection{Jordan89,
  author =       "M. I. Jordan",
  editor =       "J. L. Elman and D. E. Rumelhart",
  booktitle =    "Advances in Connectionist Theory: Speech",
  title =        "Serial Order: {A} Parallel, Distributed Processing
                 Approach",
  publisher =    "Lawrence Erlbaum",
  address =      "Hillsdale",
  year =         "1989",
}

@InProceedings{Jordan89b,
  author =       "M. I. Jordan",
  editor =       "G. Hinton and D. S. Touretzky",
  booktitle =    "Proceedings of the 1988 Connectionist Models Summer
                 School",
  title =        "Supervised learning and systems with excess degrees of
                 freedom",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo, CA",
  year =         "1989",
}

@InCollection{Jordan90,
  author =       "M. I. Jordan",
  editor =       "M. Jeannerod",
  booktitle =    "Attention and Performance XIII",
  title =        "Motor learning and the degrees of freedom problem",
  publisher =    "Hillsdale, NJ: Erlbaum",
  year =         "1990",
}

@Book{Jordan98,
  author =       "M. I. Jordan",
  title =        "Learning in Graphical Models",
  publisher =    "Kluwer",
  address =      "Dordrecht, Netherlands",
  year =         "1998",
}

@Article{Jour:Freund:AdaBoostDetailed,
  author =       "Yoav Freund and Robert E. Schapire",
  title =        "A decision theoretic generalization of on-line
                 learning and an application to Boosting",
  journal =      "Journal of Computer and System Science",
  volume =       "55",
  number =       "1",
  pages =        "119--139",
  year =         "1997",
}

@Article{Jour:Freund:boost,
  author =       "Yoav Freund",
  title =        "Boosting a weak learning algorithm by majority",
  journal =      "Information and Computation",
  volume =       "121",
  number =       "2",
  pages =        "256--285",
  year =         "1995",
}

@Article{Jour-Freund-AdaBoostDetailed,
  author =       "Yoav Freund and Robert E. Schapire",
  title =        "A decision theoretic generalization of on-line
                 learning and an application to Boosting",
  journal =      "Journal of Computer and System Science",
  volume =       "55",
  number =       "1",
  pages =        "119--139",
  year =         "1997",
}

@PhdThesis{Jouvet88,
  author =       "D. Jouvet",
  title =        "Reconnaissance de Mots Connectes Independamment du
                 Locuteur par des Methodes Statistiques",
  number =       "NST-88E006",
  school =       "Ecole National Superieure des Telecommunications",
  year =         "1988",
}

@inproceedings{JuanA2001,
 author = {Alfons Juan and Enrique Vidal},
 title = {On the use of Bernoulli Mixture Models for Text Classification},
 booktitle = {PRIS '01: Proceedings of the 1st International Workshop on Pattern Recognition in Information Systems},
 year = {2001},
 pages = {118--126},
 publisher = {ICEIS Press},
 }

@inproceedings{JuanA2004,
 author = {Alfons Juan and Enrique Vidal},
 title = {Bernoulli Mixture Models for Binary Images},
 booktitle = {ICPR '04: Proceedings of the Pattern Recognition, 17th International Conference on (ICPR'04) Volume 3},
 year = {2004},
 pages = {367--370},
 publisher = {IEEE Computer Society},
 address = {Washington, DC, USA},
 }

@Article{Juang92,
  author =       "B. H. Juang and S. Katagiri",
  title =        "Discriminative learning for minimum error
                 classification",
  journal =      "IEEE Transactions on Signal Processing",
  volume =       "40",
  number =       "12",
  pages =        "3043--3054",
  year =         "1992",
}

@Article{Judd88,
  author =       "S. Judd",
  title =        "On the complexity of loading shallow neural networks",
  journal =      "Journal of Complexity",
  volume =       "4",
  pages =        "177--192",
  year =         "1988",
}

@Book{JuddBook,
  author =       "J. S. Judd",
  title =        "Neural Network Design and the Complexity of Learning",
  publisher =    "MIT press",
  year =         "1989",
}

@book{Jurafsky+Martin-2008,
    author = {Jurafsky, Daniel and Martin, James  H.},
    howpublished = {Hardcover},
    month = {January},
    publisher = {Prentice Hall},
    edition = 2,
    title = {Speech and Language Processing: An Introduction to Natural Language Processing, Computational Linguistics and Speech Recognition},
    year = {2008}
}

@Article{Jutten+Herault-91,
  author =       "Christian Jutten and Jeanny Herault",
  title =        "Blind separation of sources, part {I}: an adaptive
                 algorithm based on neuromimetic architecture",
  journal =      "Signal Processing",
  volume =       "24",
  pages =        "1--10",
  year =         "1991",
}

@InProceedings{Kahng89,
  author =       "A. B. Kahng",
  booktitle =    ijcnn,
  title =        "Travelling Salesman Heuristics and Embedding Dimension
                 in the Hopfield Model",
  volume =       "1",
  publisher =    "IEEE, New York",
  address =      "Washington 1989",
  pages =        "513--520",
  year =         "1989",
}

@InProceedings{kai03,
  author =       "Yu Kai and Schwaighofer Anton and Tresp Volker and Ma
                 Wei-Ying and Zhang HongJiang",
  booktitle =    UAI03,
  title =        "Collaborative Ensemble Learning: Combining
                 Collaborative and Content-Based Information Filtering
                 via Hierarchical Bayes",
  publisher =    "Morgan Kaufmann Publishers",
  address =      "San Francisco, CA",
  pages =        "616--623",
  year =         "2003",
}

@Article{Kalman61,
  author =       "R. Kalman and R. S. Bucy",
  title =        "New results in linear filtering and prediction",
  journal =      "Journal of Basic Engineering (ASME)",
  volume =       "83D",
  pages =        "95--108",
  year =         "1961",
}

@article{Kambhatla+Leen-1997,
    author = {Kambhatla, N.  and Leen, T. K. },
    journal = {Neural Computation},
    pages = {1493--1516},
    title = {Dimension Reduction by Local Principal Component Analysis},
    volume = {9},
    year = {1997}
}

@Article{Kammen88,
  author =       "D. M. Kammen and A. L. Yuille",
  title =        "Spontaneous Symmetry-Breaking Energy Functions and the
                 Emergence of Orientation Selective Cortical Cells",
  journal =      biocyb,
  volume =       "59",
  pages =        "23--31",
  year =         "1988",
}

@InProceedings{Kammerer89,
  author =       "B. K. Kammerer and W. A. Kupper",
  booktitle =    ijcnn,
  title =        "Design of Hierarchical Perceptron Structures and their
                 Application to the Task of Isolated Word Recognition",
  address =      "Washington D.C.",
  year =         "1989",
}

@Book{Kandel85,
  author =       "E. R. Kandel and J. H. Schwartz",
  title =        "Principles of Neural Science",
  publisher =    "Elsevier",
  address =      "New York",
  edition =      "2",
  year =         "1985",
}

@Article{Kanter87,
  author =       "I. Kanter and H. Sompolinsky",
  title =        "Associative Recall of Memory Without Errors",
  journal =      prA,
  volume =       "35",
  pages =        "380--392",
  year =         "1987",
}

@inproceedings{KarklinY2003,
  author    = {Yan Karklin and
               Michael S. Lewicki},
  title     = {A Model for Learning Variance Components of Natural Images},
  year      = {2003},
  pages     = {1367-1374},
  editor =       NIPS15ed,
  booktitle =    NIPS15,
  publisher =    "{MIT} Press",
}

@Article{Karmin90,
  author =       "E. D. Karmin",
  title =        "A simple procedure for pruning back-propagation
                 trained neural networks",
  journal =      ieeetrnn,
  volume =       "1",
  number =       "2",
  pages =        "239--242",
  year =         "1990",
}

@Article{Karplus97,
  author =       "K. Karplus and K. Sjolander and C. Barrett and M.
                 Cline and D. Haussler and R. Hughey and L. Holm and C.
                 Sander",
  title =        "Predicting protein structure using hidden Markov
                 models",
  journal =      "Proteins: Structure, Function and Genetics",
  volume =       "S 1",
  number =       "1",
  pages =        "134--139",
  year =         "1997",
}

@PhdThesis{KasselR1995,
  author = 	 {Robert Kassel},
  title = 	 {A Comparison of Approaches to On-line Handwritten Character Recognition},
  school = 	 {MIT Spoken Language Systems Group},
  year = 	 {1995},
}

@Article{Katz87,
  author =       "Slava M. Katz",
  title =        "Estimation of Probabilities from Sparse Data for the
                 Language Model Component of a Speech Recognizer",
  journal =      "IEEE Transactions on Acoustics, Speech, and Signal
                 Processing",
  volume =       "ASSP-35",
  number =       "3",
  pages =        "400--401",
  month =        mar,
  year =         "1987",
}

@InCollection{Kaul,
  author =       "G. Kaul",
  editor =       "G. S. Maddala and C. R. Rao",
  booktitle =    "Handbook of Statistics, Vol. 14",
  title =        "Predictable Components in Stock Returns",
  publisher =    "Elsevier Science",
  pages =        "269--296",
  year =         "1996",
}

@InProceedings{kbnn-craven.mlc93,
  author =       "Mark W. Craven and Jude W. Shavlik",
  booktitle =    "Proceedings of the Tenth International Conference on
                 Machine Learning",
  title =        "Learning Symbolic Rules Using Artificial Neural
                 Networks",
  publisher =    "Morgan Kaufmann",
  address =      "Amherst, MA",
  pages =        "73--80",
  year =         "1993",
}

@InProceedings{kbnn-maclin.aaai92,
  author =       "R. Maclin and J. Shavlik",
  booktitle =    "Proceedings of the Tenth National Conference on
                 Artificial Intelligence",
  title =        "Using Knowledge-Based Neural Networks to Improve
                 Algorithms: Refining the Chou-Fasman Algorithm for
                 Protein Folding",
  address =      "San Jose, CA",
  pages =        "165--170",
  year =         "1992",
}

@TechReport{kbnn-maclin.mlrgwp91,
  author =       "R. Maclin and J. W. Shavlik",
  title =        "Refining Algorithms with Knowledge-Based Neural
                 Networks: Improving the Chou-Fasman Algorithm for
                 Protein Folding",
  number =       "Machine Learning Research Group Working Paper 91-2",
  institution =  "Department of Computer Sciences, University of
                 Wisconsin",
  year =         "1991",
  note =         "also in Computational Learning Theory and Natural
                 Learning Systems, volume 1, S. Hanson, G. Drastal, and
                 R. Rivest, (eds.), MIT Press",
}

@InProceedings{kbnn-noordewier.nips3,
  author =       "Michiel O. Noordewier and Geoffrey G. Towell and Jude
                 W. Shavlik",
  editor =       NIPS3ed,
  booktitle =    NIPS3,
  title =        "Training Knowledge-Based Neural Networks to Recognize
                 Genes in {DNA} Sequences",
  publisher =    "Morgan Kaufmann",
  address =      "Denver, CO",
  pages =        "530--536",
  year =         "1991",
}

@InProceedings{kbnn-opitz.ijcai93,
  author =       "D. W. Opitz and J. W. Shavlik",
  booktitle =    "Proceedings of the Thirteenth International Joint
                 Conference on Artificial Intelligence",
  title =        "Heuristically Expanding Knowledge-Based Neural
                 Networks",
  address =      "Chambery, France",
  month =        sep,
  year =         "1993",
}

@TechReport{kbnn-opitz.mlrgwp92,
  author =       "D. W. Opitz and J. W. Shavlik",
  title =        "Using Heuristic Search to Expand Knowledge-Based
                 Neural Networks",
  number =       "Machine Learning Research Group Working Paper 92-1",
  institution =  "Department of Computer Sciences, University of
                 Wisconsin",
  year =         "1992",
  note =         "(also in Computational Learning Theory and Natural
                 Learning Systems, volume 3, T. Petsche, S. Judd, and S.
                 Hanson, (eds.), MIT Press)",
}

@TechReport{kbnn-shavlik.tr92,
  author =       "J. W. Shavlik",
  title =        "A Framework for Combining Symbolic and Neural
                 Learning",
  number =       "UW TR 1123",
  institution =  "Department of Computer Sciences, University of
                 Wisconsin",
  year =         "1992",
  note =         "(a shorter version will appear in Machine Learning)",
}

@InProceedings{kbnn-towell.aaai90,
  author =       "G. G. Towell and J. W. Shavlik and M. O. Noordewier",
  booktitle =    "Proceedings of the Eighth National Conference on
                 Artificial Intelligence",
  title =        "Refinement of Approximate Domain Theories by
                 Knowledge-Based Neural Networks",
  address =      "Boston, MA",
  pages =        "861--866",
  year =         "1990",
}

@InProceedings{kbnn-towell.aaai92,
  author =       "G. Towell and J. Shavlik",
  booktitle =    "Proceedings of the Tenth National Conference on
                 Artificial Intelligence",
  title =        "Using Symbolic Learning to Improve Knowledge-Based
                 Neural Networks",
  address =      "San Jose, CA",
  pages =        "177--182",
  year =         "1992",
}

@Article{kbnn-towell.aij94,
  author =       "Geoffrey G. Towell and Jude W. Shavlik",
  title =        "Knowledge-Based Neural Networks",
  journal =      "Artificial Intelligence",
  year =         "1994",
  note =         "undergoing 2nd review",
}

@InCollection{kbnn-towell.ml493,
  author =       "Geoffrey G. Towell and Jude W. Shavlik",
  editor =       "R. S. Michalski and G. Tecuci",
  booktitle =    "Machine Learning: An Integrated Approach",
  title =        "Refining Symbolic Knowledge Using Neural Networks",
  volume =       "4",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo, CA",
  year =         "1993",
}

@InProceedings{kbnn-towell.mlc91,
  author =       "Geoffrey G. Towell and Mark W. Craven and Jude W.
                 Shavlik",
  booktitle =    "Proceedings of the Eighth International Machine
                 Learning Workshop",
  title =        "Constructive Induction in Knowledge-Based Neural
                 Networks",
  publisher =    "Morgan Kaufmann",
  address =      "Evanston, IL",
  pages =        "213--217",
  year =         "1991",
}

@Article{kbnn-towell.mlj93,
  author =       "Geoffrey G. Towell and Jude W. Shavlik",
  title =        "The Extraction of Refined Rules from Knowledge-Based
                 Neural Networks",
  journal =      "Machine Learning",
  volume =       "13",
  number =       "1",
  pages =        "71--101",
  year =         "1993",
}

@InProceedings{kbnn-towell.nips4,
  author =       "Geoffrey G. Towell and Jude W. Shavlik",
  editor =       NIPS4ed,
  booktitle =    NIPS4,
  title =        "Interpretation of Artificial Neural Networks: Mapping
                 knowledge-based Neural Networks into Rules",
  publisher =    "Morgan Kaufmann",
  address =      "Denver, CO",
  year =         "1992",
}

@PhdThesis{kbnn-towell.thesis,
  author =       "Geoffrey G. Towell",
  title =        "Symbolic Knowledge and Neural Networks: Insertion,
                 Refinement and Extraction",
  school =       "University of Wisconsin -- Madison",
  year =         "1991",
  note =         "(Also appears as UW Technical Report 1072 [out of
                 print].)",
}

@InProceedings{Kearns+Ron97,
  author =       "Michael Kearns and Dana Ron",
  booktitle =    "Tenth Annual Conference on Computational Learning
                 Theory,",
  title =        "Algorithmic Stability and Sanity-Check Bounds for
                 Leave-One-Out Cross-Validation",
  publisher =    "Morgan Kaufmann",
  pages =        "152--162",
  year =         "1997",
}

@InCollection{keeler-rumelhart-91,
  author =       "J. Keeler and {W.-K.} {Rumelhart, D.and Leow}",
  editor =       NIPS3ed,
  booktitle =    NIPS3,
  title =        "integrated segmentation and recognition of
                 hand-printed numerals",
  publisher =    "Morgan Kaufmann Publishers, San Mateo, CA",
  pages =        "557--563",
  year =         "1991",
}

@Article{Keerthi+Lin-2003,
  author =       "S. Sathiya Keerthi and Chih-Jen Lin",
  title =        "Asymptotic Behaviors of Support Vector Machines with
                 {Gaussian} Kernel",
  journal =      "Neural Computation",
  volume =       "15",
  number =       "7",
  pages =        "1667--1689",
  year =         "2003",
}

@InCollection{Kegl-2003,
  author =       "Bal\'{a}zs K\'{e}gl",
  editor =       NIPS15ed,
  booktitle =    NIPS15,
  title =        "Intrinsic Dimension Estimation Using Packing Numbers",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  pages =        "681--688",
  year =         "2003",
}

@Article{Kegl-Krzyzak-2002,
  author =       "B. Kegl and A. Krzyzak",
  title =        "Piecewise linear skeletonization using principal
                 curves",
  journal =      "{IEEE} Transactions on Pattern Analysis and Machine
                 Intelligence",
  volume =       "24",
  number =       "1",
  pages =        "59--74",
  year =         "2002",
}

@InProceedings{Kegl2003,
  author =       "B. Kegl",
  editor =       NIPS15ed,
  booktitle =    NIPS15,
  title =        "Intrinsic dimension estimation using packing numbers",
  publisher =    "The {MIT} Press",
  year =         "2003",
}

@InCollection{kegl2005,
  author =       "Bal\'{a}zs K\'{e}gl and Ligen Wang",
  editor =       NIPS17ed,
  booktitle =    NIPS17,
  title =        "Boosting on Manifolds: Adaptive Regularization of Base
                 Classifiers",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  year =         "2005",
}

@TechReport{Kehagias89,
  author =       "A. Kehagias",
  title =        "Stochastic Recurrent Networks: Prediction and
                 Classification of Time Series",
  institution =  "Brown University. Division of Applied Mathematics",
  address =      "Providence, RI 02912",
  year =         "1991",
}

@InProceedings{KellerM2005,
  author =       "M. Keller and S. Bengio",
  booktitle =    "Proceedings of the 15th International Conference on
                 Artificial Neural Networks: Biological Inspirations,
                 ICANN, Lecture Notes in Computer Science",
  title =        "A neural network for text representation",
  volume =       "LNCS 3697",
  pages =        "667--672",
  year =         "2005",
  teditor =      "Springer-Verlag",
}

@inproceedings{Keller2007,
 author = {Katherine A. Heller and Zoubin Ghahramani}, 
 booktitle =    aistats07,
 year = 2007, 
 title = {A Nonparametric Bayesian Approach to Modeling Overlapping Clusters},
 publisher =    "Omnipress",
 date =         "March 21-24, 2007",
 address =      "San Juan, Porto Rico",
 pages =        "187-194",
}

@inproceedings{Keller2008,
 author = {Katherine A. Heller and Sinead Williamson and Zoubin Ghahramani}, 
 year = 2008, 
 title = {Statistical models for partial membership}, 
 booktitle = ICML08,
 editor =    ICML08ed,
 publisher = ICML08publ,
 location =  {Helsinki, Finland},
 pages = "392--399",
}

@Book{Kelly1975,
  author =       "Edward Kelly and Philip Stone",
  title =        "Computer recognition of english word senses",
  publisher =    "North-Holland Linguistics Series",
  year =         "1975",
}

@InProceedings{Kemp+al-2004,
  author =       "C. Kemp and T. L. Griffiths and S. Stromsten and J. B.
                 Tenembaum",
  editor =       NIPS16ed,
  booktitle =    NIPS16,
  title =        "Semi-supervised learning with trees",
  publisher =    "{MIT} Press",
  address =      "Cambridge, MA",
  year =         "2004",
}

@inproceedings{Kerr2007,
 author = {Wesley Kerr and Shane Hoversten and Daniel Hewlett and Paul R. Cohen and Yu-Han Chang},
 title = {Learning in Wubble World},
 booktitle = {Proceedings of the IEEE Int. Conference on Development and Learning},
 year = 2007,
}

@Article{Kerszberg90,
  author =       "M. Kerszberg and A. Zippelius",
  title =        "Synchronization in Neural Assemblies",
  journal =      pscrip,
  volume =       "T33",
  pages =        "54--64",
  year =         "1990",
}

@InProceedings{Keysers2000,
  author =       "D. Keysers and J. Dahmen and H. Ney",
  booktitle =    "22nd Symposium of the German Association for Pattern
                 Recognition",
  title =        "A probabilistic view on tangent distance",
  address =      "Kiel, Germany",
  year =         "2000",
}

@Book{Khalil92,
  author =       "Hassan K. Khalil",
  title =        "Nonlinear Systems",
  publisher =    "Macmillan Publishing Company",
  address =      "New York",
  year =         "1992",
}

@Book{Kiang65,
  author =       "N. Y. S. Kiang and T. Watanabe and E. C. Thomas and L.
                 F. Clark",
  title =        "Discharge patterns of single fibers in the cat's
                 auditory nerve fiber",
  publisher =    "Cambdrige, MA: MIT Press",
  year =         "1965",
}

@Article{Kiefer80,
  author =       "N. M. Kiefer",
  title =        "A note on switching regressions and logistic
                 discrimination",
  journal =      "Econometrica",
  volume =       "48",
  pages =        "1065--1069",
  year =         "1980",
}

@Misc{Kilgarriff2000,
  author =       "Adam Kilgarriff and Joseph Rosenzweig",
  title =        "English {SENSEVAL}: Report and Results",
  year =         "2000",
  URL =          "citeseer.nj.nec.com/335615.html",
  text =         "A. Kilgarriff and J. Rosenzweig. English SENSEVAL:
                 Report and Results. In Proceedings of the 2nd
                 International Conference on Language Resources and
                 Evaluation, LREC, Athens, Greece.",
}

@InProceedings{Kilgarriff2002,
  author =       "Adam Kilgarriff",
  booktitle =    "Proceedings of Senseval-2",
  title =        "English lexical sample task description",
  organization = "ACL workshop",
  year =         "2002",
}

@Article{Kim94,
  author =       "C. J. Kim",
  title =        "Dynamical linear models with Markov-switching",
  journal =      "Journal of Econometrics",
  volume =       "60",
  pages =        "1--22",
  year =         "1994",
}

@Article{Kimeldorf-Wahba-71,
  author =       "G. Kimeldorf and G. Wahba",
  title =        "Some results on {Tchebychean} spline functions",
  journal =      "Journal of Mathematics Analysis and Applications",
  volume =       "33",
  pages =        "82--95",
  year =         "1971",
}

@InCollection{Kinzel90,
  author =       "W. Kinzel and M. Opper",
  editor =       "E. Domany and J. L. van Hemmen and K. Schulten",
  booktitle =    "Physics of Neural Networks",
  title =        "Dynamics of Learning",
  volume =       "1",
  publisher =    "Springer-Verlag",
  address =      "Berlin",
  year =         "1990",
}

@inproceedings{Kira+Rendell-1992,
    author    = {Kenji Kira and Larry A. Rendell},
    title     = {The Feature Selection Problem: Traditional Methods and a New Algorithm},
    booktitle = {Proceedings of the Tenth National Conference on Artificial Intelligence},
    year      = {1992},
    pages     = {129-134},
    bibsource = {DBLP, http://dblp.uni-trier.de}
}

@inproceedings{Kira+Rendell-1992b,
    address = {San Francisco, CA, USA},
    author = {Kenji Kira and Larry A. Rendell},
    booktitle = {Proceedings of the Ninth International Conference on Machine learning},
    isbn = {15586247X},
    pages = {249--256},
    posted-at = {2007-02-07 04:40:40},
    publisher = {Morgan Kaufmann},
    title = {A practical approach to feature selection},
    url = {http://portal.acm.org/citation.cfm?id=142034},
    year = {1992}
}

@Book{Kirk70,
    author =       "D. E. Kirk",
    title =        "Optimal Control Theory: an Introduction",
    publisher =    "Prentice Hall",
    address =      "Englewood Cliffs NJ",
    year =         "1970",
}

@Book{Kirk70a,
  author =       "D. E. Kirk",
  title =        "Optimal Control Theory: an Introduction",
  publisher =    "Prentice Hall",
  address =      "Englewood Cliffs NJ",
  year =         "1970",
}

@Article{Kirkpatrick83,
  author =       "S. Kirkpatrick and C. D. Gelatt Jr. and and M. P.
                 Vecchi",
  title =        "Optimization by Simulated Annealing",
  journal =      science,
  volume =       "220",
  pages =        "671--680",
  year =         "1983",
}

@Article{Kirkpatrick85,
  author =       "S. Kirkpatrick and G. Toulouse",
  title =        "Configuration Space Analysis of Travelling Salesman
                 Problems",
  journal =      jpp,
  volume =       "46",
  pages =        "1277--1292",
  year =         "1985",
}

@Book{kitagawa+gersch96,
  author =       "G. Kitagawa and W. Gersch",
  title =        "Smoothness priors analysis of time series",
  publisher =    "Eds. P. Bickel and P. Diggle and S. Fienberg and K.
                 Krickeberg and I. Olkin and W. Wermuth and S. Zeger,
                 Lecture Notes in Statistics, volume 116",
  year =         "1996",
}

@Article{kitagawa87,
  author =       "G. Kitagawa",
  title =        "Non-{Gaussian} State-Space Modeling on Nonstationary
                 Time Series",
  journal =      "Journal of the American Statistical Association",
  volume =       "82",
  number =       "400",
  pages =        "1032--1063",
  year =         "1987",
}

@Article{kitagawa96,
  author =       "G. Kitagawa",
  title =        "{Monte} {Carlo} Filter and Smoother for Non-{Gaussian}
                 Nonlinear State Space Models",
  journal =      "Journal of Computational Graphics and Statistics",
  volume =       "5",
  number =       "1",
  pages =        "1--25",
  year =         "1996",
}

@Article{Kivinen02,
  author =       "J. Kivinen and A. Smola and R. Williamson",
  title =        "Online Learning with kernels",
  year =         "2002",
  URL =          "citeseer.csail.mit.edu/kivinen02online.html",
  text =         "J. Kivinen, A. Smola, and R. C. Williamson, (2002)
                 Online Learning with kernels. Advances in Neural
                 Information Processing Systems 14, Cambridge, MA: MIT
                 Press (pp. 785-793).",
}

@InProceedings{Klatt82,
  author =       "D. Klatt",
  booktitle =    icassp,
  title =        "Prediction of perceived phonetic distance from
                 critical-band spectra: a first step",
  pages =        "1278--1281",
  year =         "1982",
}

@inproceedings{Kleinberg-2003,
    author = "J. Kleinberg",
    title = "An impossibility theorem for clustering",
    editor =       NIPS15ed,
    booktitle =    NIPS15,
    publisher =    "MIT Press",
    address =      "Cambridge, MA",
    year =         "2003",
}

@Article{Kleinfeld86,
  author =       "D. Kleinfeld",
  title =        "Sequential State Generation by Model Neural Networks",
  journal =      PNAS,
  volume =       "83",
  pages =        "9469--9473",
  year =         "1986",
}

@InCollection{Kleinfeld89,
  author =       "D. Kleinfeld and H. Sompolinsky",
  editor =       "C. Koch and I. Segev",
  booktitle =    "Methods in Neuronal Modeling: From Synapses to
                 Networks",
  title =        "Associative Network Models for Central Pattern
                 Generators",
  publisher =    "MIT Press",
  address =      "Cambridge",
  pages =        "195--246",
  year =         "1989",
}

@Book{Klopf82,
  author =       "A. H. Klopf",
  title =        "The Hedonistic Neuron: {A} Theory of Memory, Learning,
                 and Intelligence",
  publisher =    "Hemisphere",
  address =      "Washington",
  year =         "1982",
}

@InProceedings{Kneser95,
  author =       "Reinhard Kneser and Hermann Ney",
  booktitle =    icassp,
  title =        "Improved Backing-Off for {M}-Gram Language Modeling",
  pages =        "181--184",
  year =         "1995",
}

@Article{Koch86,
  author =       "C. Koch and J. Marroquin and A. Yuille",
  title =        "Analog ``Neuronal'' Networks in Early Vision",
  journal =      PNAS,
  volume =       "83",
  pages =        "4263--4267",
  year =         "1986",
}

@InProceedings{Koch88,
  author =       "C. Koch and J. Luo and C. Mead and J. Hutchinson",
  editor =       nips87ed,
  booktitle =    nips87,
  title =        "Computing Motion Using Resistive Networks",
  publisher =    "American Institute of Physics, New York",
  address =      "Denver, CO",
  pages =        "422--431",
  year =         "1988",
}

@InProceedings{Kohavi95,
  author =       "Ron Kohavi",
  booktitle =    "Proceeding of the Fourteenth International Joint
                 Conference on Artificial Intelligence",
  title =        "A Study of Cross-Validation and Bootstrap for Accuracy
                 Estimation and Model Selection",
  publisher =    "Morgan Kaufmann",
  pages =        "1137--1143",
  year =         "1995",
}

@article{Kohavi+John-1997,
        address = {Essex, UK},
        author = {Kohavi, Ron   and John, George  H.},
        doi = {10.1016/S0004-3702(97)00043-X},
        issn = {0004-3702},
        journal = {Artificial Intelligence},
        number = {1-2},
        pages = {273--324},
        publisher = {Elsevier Science Publishers Ltd.},
        title = {Wrappers for feature subset selection},
        url = {http://portal.acm.org/citation.cfm?id=270627},
        volume = {97},
        year = {1997}
}

@Article{Kohonen-ieee90,
  author =       "T. Kohonen",
  title =        "The Self-Organizing Map",
  journal =      ieeeproc,
  volume =       "78",
  number =       "9",
  pages =        "1464--1480",
  year =         "1990",
  OPTnote =      "Special Issue on Neural Networks",
}

@Article{Kohonen74,
  author =       "T. Kohonen",
  title =        "An Adaptive Associative Memory Principle",
  journal =      ieeetc,
  volume =       "C-23",
  pages =        "444--445",
  year =         "1974",
}

@Article{Kohonen82,
  author =       "T. Kohonen",
  title =        "Self-Organized Formation of Topologically Correct
                 Feature Maps",
  journal =      biocyb,
  volume =       "43",
  year =         "1982",
}

@InProceedings{Kohonen84,
  author =       "T. Kohonen and K. M{\"a}kisara and T. Saram{\"a}ki",
  booktitle =    "Proceedings of the Seventh International Conference on
                 Pattern Recognition",
  title =        "Phonotopic Maps --- Insightful Representation of
                 Phonological Features for Speech Recognition",
  publisher =    "IEEE, New York",
  address =      "Montreal 1984",
  pages =        "182--185",
  year =         "1984",
}

@TechReport{Kohonen86lvq,
  author =       "Teuvo Kohonen",
  title =        "Learning Vector Quantization for Pattern Recognition",
  type =         "Report",
  number =       "TKK-F-A601",
  institution =  "Helsinki University of Technology",
  address =      "Espoo, Finland",
  year =         "1986",
}

@InProceedings{Kohonen88,
  author =       "T. Kohonen and G. Barna and R. Chrisley",
  booktitle =    icnn,
  title =        "Statistical Pattern Recognition with Neural Networks:
                 Benchmarking Studies",
  volume =       "1",
  publisher =    "IEEE, New York",
  address =      "San Diego 1988",
  pages =        "61--68",
  year =         "1988",
}

@Book{Kohonen89,
  author =       "T. Kohonen",
  title =        "Self-Organization and Associative Memory",
  publisher =    "Springer-Verlag",
  address =      "Berlin",
  edition =      "3",
  year =         "1989",
}

@Book{Kohonen-2001,
  author =       "T. Kohonen",
  title =        "Self-Organizing Maps",
  publisher =    "Springer",
  edition =      "3",
  year =         "2001",
}

@Article{Kolchinskii2000,
  author =       "V. Koltchinskii and E. Giné",
  title =        "Random matrix approximation of spectra of integral
                 operators",
  journal =      "Bernoulli",
  volume =       "6",
  number =       "1",
  pages =        "113--167",
  year =         "2000",
}

@TechReport{Kolen+Pollack90,
  author =       "J. F. Kolen and J. B. Pollack",
  key =          "kolen",
  title =        "Back propagation is sensitive to initial conditions",
  type =         "Technical Report",
  number =       "TR 90-{JK}-{BPSIC}",
  institution =  "The Ohio State University",
  year =         "1990",
}

@InProceedings{Kolen-nips94,
  author =       "John F. Kolen",
  editor =       NIPS6ed,
  booktitle =    NIPS6,
  title =        "Fool's Gold: Extracting Finite State Machines From
                 Recurrent Network Dynamics",
  publisher =    "Morgan Kaufmann",
  year =         "1994",
}

@Article{Kolmogorov33,
  author =       "A. N. Kolmogorov",
  title =        "Sulla determinazione empirica di una leggi di
                 distribuzione",
  journal =      "G. Inst. Ital. Attuari",
  volume =       "4",
  year =         "1933",
  note =         "translated in English in {\em Breakthroughs in
                 Statistics}, by Kotz and Johnson (editors),
                 Springer-Verlag, 1992",
}

@Article{Kolmogorov57,
  author =       "A. N. Kolmogorov",
  title =        "On the representation of continuous functions of many
                 variables by superposition of continuous functions of
                 one variable and addition",
  journal =      "Kokl. Akad. Nauk USSR",
  volume =       "114",
  publisher =    "[translated in: American Mathematical Society
                 Translations 28 (1963) 55--59]",
  pages =        "953--956",
  year =         "1957",
}

@Article{Kolmogorov65,
  author =       "A. N. Kolmogorov",
  title =        "Three approaches to the quantitative definition of
                 information",
  journal =      "Problems of Information and Transmission",
  volume =       "1",
  number =       "1",
  pages =        "1--7",
  year =         "1965",
}

@InProceedings{Koltchinskii-1998,
  author =       "V. Koltchinskii",
  editor =       "Eberlein and Hahn and Talagrand",
  booktitle =    "Progress in Probability",
  title =        "Asymptotics of Spectral Projections of Some Random
                 Matrices Approximating Integral Operators",
  volume =       "43",
  publisher =    "Birkhauser",
  address =      "Basel",
  pages =        "191--227",
  year =         "1998",
}

@InProceedings{Kong95,
  author =       "Eun Bae Kong and Thomas G. Dietterich",
  booktitle =    "International Conference on Machine Learning",
  title =        "Error-Correcting Output Coding Corrects Bias and
                 Variance",
  pages =        "313--321",
  year =         "1995",
}

@InProceedings{Konig96,
  author =       "Y. Konig and H. Bourlard and N. Morgan",
  editor =       NIPS8ed,
  booktitle =    NIPS8,
  title =        "{REMAP}: Recursive Estimation and Maximization of {A}
                 Posteriori Probabilities -- Application to
                 transition-based connectionist speech recognition",
  publisher =    "MIT Press, Cambridge, MA",
  year =         "1996",
}

@inproceedings{Koray-08,
 title = "Learning Invariant Features through Topographic Filter Maps",
 author = "Kavukcuoglu, Koray and Ranzato, {Marc'Aurelio} and Fergus, Rob and {LeCun}, Yann",
 booktitle = cvpr09,
 publisher = "IEEE",
 year = "2009"
}

@techreport {koray-psd-08,
 original = "orig/koray-psd-08.pdf",
 title = "Fast Inference in Sparse Coding Algorithms with Applications to Object Recognition",
 author = "Kavukcuoglu, Koray and Ranzato, {Marc'Aurelio} and {LeCun}, Yann",
 institution = "Computational and Biological Learning Lab, Courant Institute, NYU",
 note = "Tech Report CBLL-TR-2008-12-01",
 year = "2008"
}

@article{Kouh-Poggio-2008,
  author = {Minjoon M. Kouh and Tomaso T. Poggio},
  title = {A Canonical Neural Circuit for Cortical Nonlinear Operations},
  journal = {Neural Computation},
  volume = 20,
  number={6},
  pages = {1427--1451},
  year = 2008,
}

@TechReport{Kouropteva+al-2002,
    author =       {O. Kouropteva and O. Okun and A. Hadid and M. Soriano and S. Marcos and M. Pietik{\"a}inen},
    title =        {Beyond locally linear embedding algorithm},
    number =       {MVG-01-2002},
    institution =  {Department of Electrical and Information Engineering, University of Oulu},
    address =      {Oulu, Finland},
    year =         2002,
}

@inproceedings{Kononenko-1994,
    author = {Kononenko, Igor},
    booktitle = ECML94,
    pages = {171--182},
    editor = {F. Bergadano and L. D. Raedt},
    title = {Estimating Attributes: Analysis and Extensions of RELIEF},
    url = {http://citeseer.ist.psu.edu/kononenko94estimating.html},
    year = {1994}
}

@InProceedings{Kozma96,
  author =       "R. Kozma and M. Kitamura and S. Sato",
  booktitle =    nipc-hmit96,
  title =        "Monitoring of {NPP} State using Structural Adaptation
                 in a Neural Signal Processing System",
  volume =       "1",
  publisher =    ans,
  pages =        "273--278",
  year =         "1996",
}

@Article{Kramer1991,
  author =       "Mark Kramer",
  title =        "Nonlinear Principal Component Analysis Using
                 Autoassociative Neural Network",
  journal =      "AIChE Journal",
  volume =       "34",
  pages =        "233--243",
  year =         "1991",
}

@InProceedings{Kramer89,
  author =       "A. H. Kramer and A. Sangiovanni-Vincentelli",
  editor =       NIPS1ed,
  booktitle =    NIPS1,
  title =        "Efficient Parallel Learning Algorithms for Neural
                 Networks",
  publisher =    "Morgan Kaufmann, San Mateo",
  address =      "Denver, CO",
  pages =        "40--48",
  year =         "1989",
}

@Article{Krauth89,
  author =       "W. Krauth and M. M\'ezard",
  title =        "The Cavity Method and the Travelling-Salesman
                 Problem",
  journal =      eul,
  volume =       "8",
  pages =        "213--218",
  year =         "1989",
}

@Book{Kreyszig90,
  author =       "E. Kreyszig",
  title =        "Introductory Functional Analysis with Applications",
  publisher =    "John Wiley \& Sons, Inc.",
  address =      "New York, NY",
  year =         "1990",
}

@Book{Krishnaiah82,
  editor =       "P. R. Krishnaiah and L. N. Kanal",
  title =        "Classification, Pattern Recognition, and Reduction of
                 Dimensionality",
  volume =       "2",
  publisher =    "North Holland",
  address =      "Amsterdam",
  year =         "1982",
  series =       "Handbook of Statistics",
}

@techreport{KrizhevskyHinton2009,
    author={Alex Krizhevsky and Geoffrey Hinton},
    title = {Learning Multiple Layers of Features from Tiny Images},
    year = 2009,
    chapter=3,
    institution={University of Toronto}
}

@InProceedings{Krogh-nips8,
  author =       "A. Krogh and S. K. Riis",
  editor =       NIPS8ed,
  booktitle =    NIPS8,
  title =        "Prediction of beta sheets in proteins",
  publisher =    "MIT Press, Cambridge, MA",
  pages =        "917--923",
  year =         "1996",
}

@Article{Krogh88,
  author =       "A. Krogh and J. A. Hertz",
  title =        "Mean Field Analysis of Hierarchical Associative
                 Networks with Magnetization",
  journal =      jpa,
  volume =       "21",
  pages =        "2211--2224",
  year =         "1988",
}

@InProceedings{Krogh90a,
  author =       "A. Krogh and G. I. Thorbergsson and J. A. Hertz",
  editor =       NIPS2ed,
  booktitle =    NIPS2,
  title =        "A Cost Function for Internal Representations",
  publisher =    "Morgan Kaufmann, San Mateo",
  address =      "Denver, CO",
  pages =        "733--740",
  year =         "1990",
}

@InProceedings{Krogh90b,
  author =       "A. Krogh and J. A. Hertz",
  editor =       "R. Eckmiller and G. Hartmann and G. Hauske",
  booktitle =    "Parallel Processing in Neural Systems and Computers",
  title =        "Hebbian Learning of Principal Components",
  publisher =    "Elsevier, Amsterdam",
  address =      "D{\"u}sseldorf 1990",
  pages =        "183--186",
  year =         "1990",
}

@Article{Krogh94,
  author =       "A. Krogh and M. Brown and I. S. Mian and K. Sjölander
                 and D. Haussler",
  title =        "Hidden Markov models in computational biology:
                 Applications to protein modeling",
  journal =      "Journal Molecular Biology",
  volume =       "235",
  pages =        "1501--1531",
  year =         "1994",
}

@InProceedings{Krogh95,
  author =       "A. Krogh and J. Vedelsby",
  editor =       NIPS7ed,
  booktitle =    NIPS7,
  title =        "Neural network ensembles, cross validation and active
                 learning",
  publisher =    "Cambridge MA: MIT Press",
  pages =        "231--238",
  year =         "1995",
}

@Book{Krolzig97,
  author =       "H.-M. Krolzig",
  title =        "Markov-Switching Vector Autoregressions",
  publisher =    "Springer",
  year =         "1997",
}

@article{Krueger+Dayan-2009,
 author = {Kai A. Krueger and Peter Dayan},
 title = {Flexible shaping: how learning in small steps helps},
 journal = {Cognition},
 volume = 110,
 year = 2009,
 pages = {380--394},
}

@Article{Ku92,
  author =       "C. C. Ku and K. Y. Lee and R. M. Eawards",
  title =        "Improved Nuclear Reactor Temperature Control Using
                 Diagonal Recurrent Neural Networks",
  journal =      "IEEE Transactions on Nuclear Science",
  volume =       "39",
  pages =        "2292--2308",
  year =         "1992",
}

@InProceedings{Kubala94,
  author =       "F. Kubala and A. Anastasakos and J. Makhoul and L.
                 Nguyen and R. Schwartz and G. Zavaliagkos",
  booktitle =    icassp,
  title =        "Comparative experiments on large vocabulary speech
                 recognition",
  address =      "Adelaide, Australia",
  pages =        "561--564",
  year =         "1994",
}

@InProceedings{Kuhn+Herzberg90,
  author =       "G. Kuhn and N. Herzberg",
  booktitle =    "Proc. 24th Conference on Information Sciences and
                 Systems",
  title =        "Variations on training of recurrent networks",
  organization = "Princeton University",
  address =      "NJ",
  year =         "1990",
}

@Unpublished{Kuhn87,
  author =       "G. Kuhn",
  title =        "A first look at phonetic discrimination using
                 connectionist models with recurrent links",
  year =         "1987",
  note =         "CCRP -- IDA SCIMP working paper No.4/87, Institute for
                 Defense Analysis, Princeton, NJ",
}

@Article{Kuhn-et-al-90,
  author =       "G. Kuhn and R. L. Watrous and B. Ladendorf",
  title =        "Connected recognition with a recurrent network",
  journal =      spcomm,
  volume =       "9",
  pages =        "41--49",
  year =         "1990",
  OPTnote =      "",
}

@Book{Kullback59,
  author =       "S. Kullback",
  title =        "Information Theory and Statistics",
  publisher =    "Wiley",
  address =      "New York",
  year =         "1959",
}

@Book{Kumar+al-1994,
  author =       "V. Kumar and A. Grama and A. Gupta and G. Karypis",
  title =        "Introduction to Parallel Computing: Design and
                 Analysis of Algorithms",
  publisher =    "Benjamin Cummings",
  address =      "Redwood City, CA",
  year =         "1994",
}

@Article{Kumar+al-1994b,
  author =       "Vipin Kumar and Shashi Shekhar and Minesh B. Amin",
  title =        "A Scalable Parallel Formulation of the Backpropagation
                 Algorithm for Hypercubes and Related Architectures",
  journal =      "IEEE Transactions on Parallel and Distributed
                 Systems",
  volume =       "5",
  number =       "10",
  pages =        "1073--1090",
  year =         "1994",
}

@InProceedings{Kundu88,
  author =       "A. Kundu and L. R. Bahl",
  booktitle =    icassp,
  title =        "Recognition of handwritten script: a hidden {Markov}
                 model based approach",
  address =      "New York, NY",
  pages =        "928--931",
  year =         "1988",
}

@Article{Kuperstein88,
  author =       "M. Kuperstein",
  title =        "Neural model of adaptive hand-eye coordination for
                 single postures",
  journal =      "Science",
  volume =       "239",
  pages =        "1308--1311",
  year =         "1988",
}

@Article{Kurkova95,
  author =       "V. Kurkov\'a",
  title =        "Approximation of functions by perceptron networks with
                 bounded number of hidden units",
  journal =      "Neural Networks",
  volume =       "8",
  pages =        "745--750",
  year =         "1995",
}

@Book{Kushner78,
  author =       "H. J. Kushner and D. S. Clark",
  title =        "Stochastic Approximation Methods for Constrained and
                 Unconstrained Systems",
  publisher =    "Springer-Verlag",
  address =      "New York",
  year =         "1978",
}

@InProceedings{Kwok-Tsang-2003,
  author =       "J. T. Kwok and I. W. Tsang",
  booktitle =    ICML03,
  editor =       ICML03ed,
  publisher =    ICML03publ,
  title =        "Learning with idealized kernels",
  pages =        "400--407",
  year =         "2003",
}

@InProceedings{Laaksonen97,
  author =       "Jorma Laaksonen",
  booktitle =    "Proceedngs of the International Conference on
                 Artificial Neural Networks ICANN'97",
  title =        "Local Subspace Classifier",
  pages =        "637--642",
  year =         "1997",
  URL =          "http://www.cis.hut.fi/jorma/papers/abstracts.html#icann97",
}

@InProceedings{Lafferty-icml2001,
  author =       "John Lafferty and Andrew McCallum and Fernando C. N. Pereira",
  booktitle =    ICML01,
  editor =       ICML01ed,
  publisher =    ICML01publ,
  title =        "Conditional Random Fields: Probabilistic Models for
                 Segmenting and Labeling Sequence Data",
  year =         "2001",
}

@article{Lai+Fyfe-2000,
    author = {P. L. Lai and C. Fyfe},
    title = {Kernel and Nonlinear Canonical Correlation Analysis},
    journal = {International Journal of Neural Systems},
    year = {2000},
    pages = {365--377},
    volume = 10,
    number = 5,
}

@InProceedings{Laj92,
  author =       "E. Laj and A. Paoloni",
  editor =       "M. Gori",
  booktitle =    "Proc. of the Second Workshop on Neural Networks for
                 Speech Processing",
  title =        "{AIDA}: The Italian Corpora",
  publisher =    "LINT",
  address =      "Firenze (Italy)",
  pages =        "179--183",
  year =         "1992",
}

@InProceedings{Lanckriet-2002,
  author =       "G. Lanckriet and N. Cristianini and P. Bartlett and L.
                 {El Gahoui} and M. Jordan",
  booktitle =    ICML02,
  editor =       ICML02ed,
  publisher =    ICML02publ,
  title =        "Learning the kernel matrix with semi-definite
                 programming",
  pages =        "323--330",
  year =         "2002",
}

@Article{Lanckriet2004,
  author =       "Gert R. G. Lanckriet and Nello Cristianini and Peter
                 Bartlett and Laurent El Ghaoui and Michael I. Jordan",
  title =        "Learning the Kernel Matrix with Semidefinite
                 Programming",
  journal =      jmlr,
  volume =       "5",
  pages =        "27--72",
  year =         "2004",
}

@TechReport{Lang+Hinton88,
  author =       "K. J. Lang and G. E. Hinton",
  title =        "The development of the Time-Delay Neural Network
                 architecture for speech recognition",
  number =       "CMU-CS-88-152",
  institution =  "Carnegie-Mellon University",
  year =         "1988",
}

@Article{Langdell-00-nips,
  author =       "S. Langdell and Y. Bengio",
  title =        "Approximate {SVM} Solutions: a Datamining Tool",
  journal =      "submitted to NIPS'2000",
  year =         "2000",
}

@InProceedings{Langford+Zadrozny-2005,
  author =       "John Langford and Bianca Zadrozny",
  editor =       aistats05ed,
  booktitle =    aistats05,
  title =        "Estimating Class Membership Probabilities using
                 Classifier Learners",
  publisher =    "Society for Artificial Intelligence and Statistics",
  pages =        "198--205",
  year =         "2005",
}

@Article{Lapedes86a,
  author =       "A. Lapedes and R. Farber",
  title =        "A Self-Optimizing, Nonsymmetrical Neural Net for
                 Content Addressable Memory and Pattern Recognition",
  journal =      physicaD,
  volume =       "22",
  pages =        "247--259",
  year =         "1986",
}

@InProceedings{Lapedes86b,
  author =       "A. Lapedes and R. Farber",
  editor =       "J. S. Denker",
  booktitle =    snowbird,
  title =        "Programming a Massively Parallel, Computation
                 Universal System: Static Behavior",
  publisher =    "American Institute of Physics, New York",
  address =      "Snowbird 1986",
  pages =        "283--298",
  year =         "1986",
}

@TechReport{Lapedes87,
  author =       "A. Lapedes and R. Farber",
  title =        "Nonlinear Signal Processing Using Neural Networks:
                 Prediction and System Modelling",
  number =       "LA--UR--87--2662",
  institution =  "Los Alamos National Laboratory",
  address =      "Los Alamos, NM",
  year =         "1987",
}

@InProceedings{Lapedes88,
  author =       "A. Lapedes and R. Farber",
  editor =       nips87ed,
  booktitle =    nips87,
  title =        "How Neural Nets Work",
  publisher =    "American Institute of Physics, New York",
  address =      "Denver, CO",
  pages =        "442--456",
  year =         "1988",
}

@Article{Lari90,
  author =       "K. Lari and S. J. Young",
  title =        "The estimation of stochastic context-free grammars
                 using the Inside-Outside algorithm",
  journal =      cspla,
  volume =       "4",
  pages =        "35--56",
  year =         "1990",
}

@inproceedings{Tieleman08,
    author = {Tijmen Tieleman},
    title = {Training restricted Boltzmann machines using approximations to the likelihood gradient},
    booktitle = ICML08,
    editor =    ICML08ed,
    publisher = ICML08publ,
    location = {Helsinki, Finland},
    year = {2008},
    pages = {1064--1071}
}

@InProceedings{TielemanT2009,
 author =    {Tijmen Tieleman and Geoffrey Hinton},
 title =     {Using Fast Weights to Improve Persistent Contrastive Divergence},
 booktitle = ICML09,
 editor =    ICML09ed,
 publisher = ICML09publ,
 year =      "2009",
 isbn =      {978-1-60558-516-1},
 pages =     {1033--1040},
 location =  icml09loc,
 doi =       {http://doi.acm.org/10.1145/1553374.1553506},
}

@article{Larochelle-jmlr-toappear-2008,
 author = {Hugo Larochelle and Yoshua Bengio and Jerome Louradour and Pascal Lamblin},
 title = {Exploring Strategies for Training Deep Neural Networks},
 journal = jmlr,
 year = 2009,
 volume = 10,
 pages = {1--40},
}

@InProceedings{LarochelleH2007-small,
  author =       "H. Larochelle and D. Erhan and A. Courville and
                 J. Bergstra and Y. Bengio",
  booktitle =    "ICML 2007",
  title =        "An Empirical Evaluation of Deep Architectures on
                 Problems with Many Factors of Variation",
  year =         "2007",
}

@InProceedings{LarochelleH2007-short,
  author =       "H. Larochelle and D. Erhan and A. Courville and
                 J. Bergstra and Y. Bengio",
  booktitle =    "Int. Conf. Mach. Learn.",
  title =        "An Empirical Evaluation of Deep Architectures on
                 Problems with Many Factors of Variation",
  year =         "2007",
  pages =        "473--480",
}

%I deprecate the following one as this is a duplicate of LarochelleH2007
@InProceedings{larochelle-icml-2007,
  author =       "Hugo Larochelle and Dumitru Erhan and Aaron Courville
                 and James Bergstra and Yoshua Bengio",
  booktitle =    ICML07,
  editor =       ICML07ed,
  publisher =    ICML07publ,
  title =        "An Empirical Evaluation of Deep Architectures on
                 Problems with Many Factors of Variation",
  pages =        "473--480",
  location =     "Corvallis, OR",
  year =         "2007",
}
  %url =          "http://www.machinelearning.org/proceedings/icml2007/papers/331.pdf",

%I deprecate the following one as this is a duplicate of LarochelleH2007
@Article{larochelle:icml07,
  author =       "Hugo Larochelle and Dumitru Erhan and Aaron Courville and
                 James Bergstra and Yoshua Bengio",
  booktitle =    ICML07,
  editor =       ICML07ed,
  publisher =    ICML07publ,
  title =        "An empirical evaluation of deep architectures on
                 problems with many factors of variation",
  pages =        "473--480",
  year =         "2007",
  location =     "Corvallis, OR",
  url =          "http://www.machinelearning.org/proceedings/icml2007/papers/331.pdf",
}

@inproceedings{Larochelle+Bengio-2008-small,
    author = "Hugo Larochelle and Yoshua Bengio",
    title = {Classification using Discriminative Restricted {Boltzmann} Machines},
    booktitle = {Proceedings of ICML 2008},
    year = {2008},
    pages = {536--543}
}

@InCollection{Larsen98,
  author =       "Jan Larsen and Claus Svarer and Lars Nonboe Andersen
                 and Lars Kai Hansen",
  editor =       "G. B. Orr and K-R. Muller",
  booktitle =    "Neural Networks: Tricks of he Trade",
  title =        "Adaptive Regularization in Neural Networks Modeling",
  publisher =    "Springer",
  pages =        "113--132",
  year =         "1998",
}

 
@InProceedings{LasserreJ2006,
  author =       "Julia A. Lasserre and Christopher M. Bishop and
                 Thomas P. Minka",
  booktitle =    cvpr06,
  title =        "Principled Hybrids of Generative and Discriminative
                 Models",
  publisher =    "IEEE Computer Society",
  address =      "Washington, DC, USA",
  pages =        "87--94",
  year =         "2006",
  ISBN =         "0-7695-2597-0",
  doi =          "http://dx.doi.org/10.1109/CVPR.2006.227",
}


@TechReport{Laub2003,
  author =       "J. Laub and K.-R. M{\"u}ller",
  title =        "Feature discovery: unraveling hidden structure in
                 non-metric pairwise data",
  institution =  "Fraunhofer FIRST.IDA",
  address =      "Germany",
  year =         "2003",
}

@Article{Lauritzen95,
  author =       "Steffen L. Lauritzen",
  title =        "The {EM} algorithm for graphical association models
                 with missing data",
  journal =      "Computational Statistics and Data Analysis",
  volume =       "19",
  pages =        "191--201",
  year =         "1995",
}

@Book{Lauritzen96,
  author =       "Steffen L. Lauritzen",
  title =        "Graphical Models",
  publisher =    "Clarendon Press",
  address =      "Oxford",
  year =         "1996",
  ISBN =         "0-19-852219-3",
}

@Book{Lawler76,
  author =       "E. L. Lawler",
  title =        "Combinatorial Optimization: Networks and Matroids",
  publisher =    "Holt-Rinehart-Winston",
  address =      "New York",
  year =         "1976",
}

@Book{Lawler85,
  editor =       "E. L. Lawler and J. K. Lenstra and A. H. G. Rinnooy
                 Kan and D. B. Shmoys",
  title =        "The Travelling Salesman Problem",
  publisher =    "Wiley",
  address =      "Chichester",
  year =         "1985",
}

@InProceedings{Lawrence-Seeger-Herbrich-2003,
  author =       "Neil Lawrence and Matthias Seeger and Ralf Herbrich",
  editor =       NIPS15ed,
  booktitle =    NIPS15,
  title =        "Fast Sparse {G}aussian Process Methods: The Informative
                 Vector Machine",
  publisher =    "{MIT} Press",
  pages =        "609--616",
  year =         "2003",
}

@InCollection{Lawrence00,
  author =       "S. Lawrence and S. Fong and C. L. Giles",
  title =        "Natural Language Grammatical Inference with Recurrent
                 Neural Networks",
  journal =      "IEEE Trans. on Knowledge and Data Engineering",
  pages =        "",
  year =         "2000",
}

@InCollection{Lawrence96,
  author =       "S. Lawrence and S. Fong and C. L. Giles",
  editor =       "S. Wermter and E. Riloff and G. Scheler",
  booktitle =    "Lecture Notes on Artificial Intelligence,
                 Connectionist, Statistical and Symbolic Approaches to
                 Learning for Natural Language Processing",
  title =        "Natural Language Grammatical Inference: {A} Comparison
                 of Recurrent Neural Networks and Machine Learning
                 Methods",
  publisher =    "Springer-Verlag, NY",
  year =         "1996",
}

@InCollection{LawrenceN2005,
  author =       "Neil D. {Lawrence} and Michael I. {Jordan}",
  editor =       NIPS17ed,
  booktitle =    NIPS17,
  title =        "Semi-supervised Learning via {G}aussian Processes",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  pages =        "753--760",
  year =         "2005",
  original =     "0753-257.PDF",
}

@TechReport{LeBaron95,
  author =       "B. LeBaron and A. S. Weigend",
  title =        "Evaluating Neural Network Predictors by
                 Bootstrapping",
  number =       "CU-CS-725-94",
  institution =  "University of Colorado, Boulder",
  year =         "1995",
}

@Article{LeCun+98,
  author =       "Yann {LeCun} and Leon Bottou and Yoshua Bengio and
                 Patrick Haffner",
  title =        "Gradient-Based Learning Applied to Document
                 Recognition",
  journal =      "Proceedings of the {IEEE}",
  volume =       "86",
  number =       "11",
  pages =        "2278--2324",
  month =        nov,
  year =         "1998",
}

@InCollection{LeCun+98backprop,
  author =       "Yann {LeCun} and L\'{e}on Bottou and Genevieve B. Orr
                 and Klaus-Robert M{\"{u}}ller",
  title =        "Efficient Backprop",
  booktitle =    "Neural Networks, Tricks of the Trade",
  series =       "Lecture Notes in Computer Science LNCS~1524",
  publisher =    "Springer Verlag",
  year =         "1998",
}
  %URL =          "http://leon.bottou.org/papers/lecun-98x",


@InCollection{LeCun+98backprop-small,
  author =       "Y. {LeCun} and L. Bottou and G. B. Orr
                 and K. M{\"{u}}ller",
  title =        "Efficient Backprop",
  booktitle =    "Neural Networks, Tricks of the Trade",
  year =         "1998",
}


@InProceedings{lecun-04,
  author =       "Yann {LeCun} and Fu-Jie Huang and L{\'e}on Bottou",
  booktitle =    cvpr04,
  title =        "Learning Methods for Generic Object Recognition with
                 Invariance to Pose and Lighting",
  volume = {2},
  year =         "2004",
  issn = {1063-6919},
  pages = {97-104},
  doi = {http://doi.ieeecomputersociety.org/10.1109/CVPR.2004.144},
  publisher = {IEEE Computer Society},
  address = {Los Alamitos, CA, USA},
}

@InProceedings{LeCun-cp89,
  author =       "Yann {LeCun}",
  booktitle =    "Connectionism in Perspective",
  title =        "Generalization and Network Design Strategies",
  publisher =    "Elsevier Publishers",
  year =         "1989",
}

@InCollection{LeCun-dsbo86,
  author =       "Yann {LeCun}",
  editor =       "F. Fogelman-Souli\'e and E. Bienenstock and G.
                 Weisbuch",
  booktitle =    "Disordered Systems and Biological Organization",
  title =        "Learning Processes in an Asymmetric Threshold
                 Network",
  publisher =    "Springer-Verlag",
  address =      "Les Houches, France",
  pages =        "233--240",
  year =         "1986",
}

@InProceedings{lecun-huang-05,
  author =       "Yann {LeCun} and {Fu Jie} Huang",
  editor =       aistats05ed,
  booktitle =    aistats05,
  title =        "Loss Functions for Discriminative Training of
                 Energy-Based Models",
  date =         "Jan 6-8, 2005",
  location =     "Savannah Hotel, Barbados",
  year =         "2005",
}

@Misc{LeCun-nips93-tutorial,
  author =       "Yann {LeCun}",
  title =        "Efficient learning and second-order methods",
  year =         "1993",
  note =         "Tutorial presented at NIPS'93, Denver, CO",
}

@PhdThesis{Lecun-these87,
  author =       "Yann {LeCun}",
  title =        "Mod\`eles connexionistes de l'apprentissage",
  school =       "Universit\'e de Paris VI",
  year =         "1987",
}

@InCollection{lecun2006,
  author =       "Yann {LeCun} and Sumit Chopra and Raia Hadsell and
                 Marc-Aurelio Ranzato and Fu-Jie Huang",
  editor =       "G. Bakir and T. Hofman and B. Scholkopf and A. Smola
                 and B. Taskar",
  booktitle =    "Predicting Structured Data",
  title =        "A Tutorial on Energy-Based Learning",
  publisher =    "MIT Press",
  pages =        "191--246",
  year =         "2006",
}

@InProceedings{LeCun85,
  author =       "Yann {LeCun}",
  booktitle =    "Cognitiva 85: A la Fronti\`ere de l'Intelligence
                 Artificielle, des Sciences de la Connaissance et des
                 Neurosciences",
  title =        "Une Proc\'edure d'Apprentissage pour {R}\'eseau \`a
                 Seuil Assym\'etrique",
  publisher =    "CESTA, Paris",
  address =      "Paris 1985",
  pages =        "599--604",
  year =         "1985",
}

@InCollection{LeCun86,
  author =       "Yann {LeCun}",
  editor =       "E. Bienenstock and F. Fogelman-Souli\'e and G.
                 Weisbuch",
  booktitle =    "Disordered Systems and Biological Organization",
  title =        "Learning Processes in an Asymmetric Threshold
                 Network",
  publisher =    "Springer-Verlag, Berlin",
  address =      "Les Houches 1985",
  pages =        "233--240",
  year =         "1986",
}

@Article{LeCun89,
  author =       "Yann {LeCun} and Bernhard Boser and John S. Denker and Donnie
                 Henderson and Richard E. Howard and Wayne Hubbard and Lawrence D.
                 Jackel",
  title =        "Backpropagation Applied to Handwritten Zip Code
                 Recognition",
  journal =      nc,
  volume =       "1",
  number =       "4",
  pages =        "541--551",
  year =         "1989",
}

@TechReport{LeCun89a,
  author =       "Yann {LeCun}",
  key =          "LeCun",
  title =        "Generalization and Network Design Strategies",
  type =         "Technical Report",
  number =       "CRG-TR-89-4",
  institution =  "University of Toronto",
  year =         "1989",
}

@Article{LeCun89d,
  author =       "Yann {LeCun} and Lawrence D. Jackel and B. Boser and J.
                 S. Denker and Hans P. Graf and I. Guyon and D.
                 Henderson and R. E. Howard and W. Hubbard",
  title =        "Handwritten Digit recognition: Applications of Neural
                 Network Chips and Automatic Learning",
  journal =      "IEEE Communications Magazine",
  volume =       "27",
  number =       "11",
  pages =        "41--46",
  month =        nov,
  year =         "1989",
}

@InProceedings{LeCun90a,
  author =       "Y. {LeCun} and B. Boser and J. S. Denker and D.
                 Henderson and R. E. Howard and W. Hubbard and L. D.
                 Jackel",
  editor =       NIPS2ed,
  booktitle =    NIPS2,
  title =        "Handwritten Digit Recognition with a Back-Propagation
                 Network",
  publisher =    "Morgan Kaufmann, San Mateo",
  address =      "Denver, CO",
  pages =        "396--404",
  year =         "1990",
}

@InProceedings{LeCun90b,
  author =       "Y. {LeCun} and J. S. Denker and S. A. Solla",
  editor =       NIPS2ed,
  booktitle =    NIPS2,
  title =        "Optimal Brain Damage",
  publisher =    "Morgan Kaufmann, San Mateo",
  address =      "Denver, CO",
  pages =        "598--605",
  year =         "1990",
}

@InProceedings{LeCun90c,
  author =       "Y. LeCun and Y. Matan and B. Boser and J. S. Denker
                 and D. Henderson and R. E. Howard and W. Hubbard and L.
                 D. Jackel and H. S. Baird",
  editor =       "IAPR",
  booktitle =    "International Conference on Pattern Recognition",
  title =        "Handwritten Zip Code Recognition with Multilayer
                 Networks",
  publisher =    "IEEE",
  address =      "Atlantic City",
  year =         "1990",
}

@InProceedings{LeCun91,
  author =       "Y. {LeCun} and I. Kanter and S. Solla",
  editor =       NIPS3ed,
  booktitle =    NIPS3,
  title =        "Second order properties of error surfaces: learning
                 time, generalization",
  publisher =    "Morgan Kaufmann",
  address =      "Denver, CO",
  pages =        "918--924",
  year =         "1991",
}

@InCollection{LeCun93,
  author =       "Y. {LeCun} and P. Simard and B. Pearlmutter",
  editor =       NIPS5ed,
  booktitle =    NIPS5,
  title =        "Automatic learning rate maximization by on-line
                 estimation of the {Hessian}'s eigenvectors",
  publisher =    "Morgan Kaufmann Publishers, San Mateo, CA",
  pages =        "156--163",
  year =         "1993",
}

@InProceedings{LeCun94b,
  author =       "Yann LeCun and Yoshua Bengio",
  editor =       "IEEE",
  booktitle =    ICPR94,
  title =        "Word-Level Training of a Handritten Word Recognizer
                 based on Convolutional Neural Networks",
  address =      "Jerusalem 1994",
  year =         "1994",
}

@Article{LeCun98-small,
  author =       "Y. {LeCun} and L. Bottou and Y. Bengio and
                 P. Haffner",
  title =        "Gradient Based Learning Applied to Document
                 Recognition",
  journal =      "IEEE",
  volume =       "86",
  number =       "11",
  pages =        "2278--2324",
  month =        nov,
  year =         "1998",
}

@InCollection{LeCun98-tricks,
  author =       "Y. {LeCun} and L. Bottou and G. B. Orr and K.-R.
                 M{\"u}ller",
  editor =       "G. B. Orr and K.-R. M{\"u}ller",
  booktitle =    "Neural Networks: Tricks of the Trade",
  title =        "Efficient {BackProp}",
  publisher =    "Springer",
  pages =        "9--50",
  year =         "1998",
}

@TechReport{LeCun-TR,
  author =       "Yann {LeCun}",
  key =          "Lecun",
  title =        "Generalization and Network Design Strategies",
  number =       "CRG-TR-89-4",
  institution =  "Department of Computer Science, University of
                 Toronto",
  year =         "1989",
}

@Article{Lee+Hon89,
  author =       "Kai-Fu Lee and Hsiao-Wuen Hon",
  title =        "Speaker-independent phone recognition using hidden
                 {Markov} models",
  journal =      "IEEE Trans. on Acoustics, Speech and Signal
                 Processing",
  volume =       "37",
  number =       "11",
  pages =        "1641--1648",
  month =        nov,
  year =         "1989",
}

@Article{Lee+Lewicki-2002,
  author =       "T-W. Lee and M. S. Lewicki",
  title =        "Unsupervised classification segmentation and
                 enhancement of images using {ICA} mixture models",
  journal =      "IEEE Trans. Image Proc.",
  volume =       "11",
  number =       "3",
  pages =        "270--279",
  year =         "2002",
}

@InCollection{Lee-2008,
  author =       "Honglak Lee and Chaitanya Ekanadham and Andrew Ng",
  editor =       NIPS20ed,
  booktitle =    NIPS20,
  title =        "Sparse deep belief net model for visual area {V}2",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  pages = {873--880},
  year =         "2008",
}

@Book{Lee91,
  author =       "Kai-Fu Lee",
  title =        "Automatic Speech Recognition: the development of the
                 {SPHINX} system",
  publisher =    "Kluwer Academic Publ.",
  year =         "1989",
}

@article{Lee-1996,
    author = "Tai Sing Lee",
    title = "Image Representation Using {2D} {Gabor} Wavelets",
    journal = "IEEE Transactions on Pattern Analysis and Machine Intelligence",
    volume = "18",
    number = "10",
    pages = "959-971",
    year = "1996",
}

@InProceedings{Lee99a,
  author =       "Lillian Lee",
  booktitle =    "ACL99",
  title =        "Measures of Distributional Similarity",
  pages =        "25--32",
}

@InProceedings{Lee99b,
  author =       "Lillian Lee and Fernando Pereira",
  title =        "Distributional Similarity Models: Clustering vs.
                 Nearest Neighbours",
  booktitle =    "ACL99",
  pages =        "33--40",
}

@article{Lee+Mumford-2003,
 author = {Tai-Sing Lee and David Mumford},
 title = {Hierarchical Bayesian inference in the visual cortex},
 year = 2003,  
 journal = {Journal of Optical Society of America, A},
 volume = 20,
 number = 7,
 pages = {1434--1448},
}


@Article{Leitch91,
  author =       "G. Leitch and J. E. Tanner",
  title =        "Economic Forecast Evaluation: Profits Versus The
                 Conventional Error Measures",
  journal =      "The American Economic Review",
  pages =        "580--590",
  year =         "1991",
}

@Article{Lengelle+Denoeux96,
  author =       "R{\'e}gis Lengell{\'e} and Thierry Denoeux",
  title =        "Training {MLP}s layer by layer using an objective
                 function for internal representations",
  journal =      "Neural Networks",
  volume =       "9",
  pages =        "83--97",
  year =         "1996",
}

@InProceedings{Leprieur95,
  author =       "H. Leprieur and P. Haffner",
  booktitle =    "EUROSPEECH'95",
  title =        "Discriminant learning with minimum memory loss for
                 improved non-vocabulary rejection",
  address =      "Madrid, Spain",
  year =         "1995",
}

@Book{lerdahl+jackendoff-1983,
  author =       "F. Lerdahl and R. Jackendoff",
  title =        "A {Generative} {Theory} of {Tonal} {Music}",
  publisher =    "MIT Press",
  address =      "Cambridge, Mass.",
  year =         "1983",
}

@InCollection{LeRoux+al-tonga-2008,
  author =       "Nicolas {Le Roux} and Pierre-Antoine Manzagol and
                 Yoshua Bengio",
  editor =       NIPS20ed,
  booktitle =    NIPS20,
  title =        "Topmoumoute online natural gradient algorithm",
  publisher =    "{MIT} Press",
  address =      "Cambridge, MA",
  pages =        "849--856",
  year =         "2008",
}

@InCollection{LeRoux+al-tonga-2008-small,
  author =       "Nicolas {Le Roux} and Pierre-Antoine Manzagol and
                 Yoshua Bengio",
  booktitle =    "NIPS 20",
  title =        "Topmoumoute online natural gradient algorithm",
  pages =        "849--856",
  year =         "2008",
}

@TechReport{LeRoux-comb-dens-2005,
  author =       "Nicolas {Le Roux} and Yoshua Bengio and R\'ejean
                 Ducharme",
  title =        "Combining density estimators to improve classification
                 accuracy",
  number =       "1261",
  institution =  "D\'epartement d'informatique et recherche
                 op\'erationnelle, Universit\'e de Montr\'eal",
  year =         "2005",
}

@InProceedings{LeRoux-continuous-short,
  author =       "Nicolas Le Roux and Yoshua Bengio",
  booktitle =    aistats07,
  title =        "Continuous Neural Networks",
  year =         "2007",
  date =         "March 21-24, 2007",
}

@InProceedings{Lesk1986,
  author =       "Michael E. Lesk",
  booktitle =    "SIGDOC Conference",
  title =        "Automatic sense disambiguation using machine readable
                 dictionaries: How to tell a pine cone from an ice cream
                 cone.",
  address =      "Toronto, Canada",
  year =         "1980",
}

@InProceedings{Leung92,
  author =       "H. C. Leung and I. L. Hetherington and V. W. Zue",
  booktitle =    icassp,
  title =        "Speech recognition using stochastic segment neural
                 networks",
  volume =       "1",
  institution =  "Lab. for Comput. Sci., MIT, Cambridge, MA, USA",
  publisher =    "IEEE",
  address =      "New York, NY, USA",
  pages =        "613--16",
  year =         "1992",
}

@Article{Levenberg44,
  author =       "K. Levenberg",
  title =        "A method for the solution of certain non-linear
                 problems in least squares",
  journal =      "Quarterly Journal of Applied Mathematics",
  volume =       "II",
  number =       "2",
  pages =        "164--168",
  year =         "1944",
}

@InProceedings{Levin90,
  author =       "E. Levin",
  booktitle =    icassp,
  title =        "Word Recognition using Hidden Control Neural
                 Architecture",
  address =      "Albuquerque, NM",
  pages =        "433--436",
  year =         "1990",
}

@InProceedings{Levin92,
  author =       "E. Levin and R. Pieraccini and E. Bocchieri",
  editor =       NIPS4ed,
  booktitle =    NIPS4,
  title =        "Time-Warping Network: a Hybrid Framework for Speech
                 Recognition",
  address =      "Denver, CO",
  pages =        "151--158",
  year =         "1992",
}

@Article{Levinson83,
  author =       "S. E. Levinson and L. R. Rabiner and M. M. Sondhi",
  title =        "An Introduction to the Application of the Theory of
                 Probabilistic Functions of a {Markov} Process to
                 Automatic Speech Recognition",
  journal =      "Bell System Technical Journal",
  volume =       "64",
  number =       "4",
  pages =        "1035--1074",
  year =         "1983",
}

@InCollection{Levinson96,
  author =       "S. E. Levinson",
  editor =       "R. A. Cole and J. Mariani and H. Uszkoriet and A.
                 Zaenen and V. Zue",
  booktitle =    "Survey of the State of the Art in Human Language
                 Technology",
  title =        "Statistical Modeling and Classification",
  publisher =    "Cambridge University Press",
  address =      "http://www.cse.ogi.edu/CSLU/HLTsurvey/HLTsurvey.html",
  pages =        "395--401",
  year =         "1996",
}

@phdthesis{Levner2008,
  author = {Ilya Levner},
  title = {Data Driven Object Segmentation},
  school = {Department of Computer Science, University of Alberta},
  year = 2008,
}

@InProceedings{Lewicki+Sejnowski-97,
  author =       "Michael Lewicki and Terry Sejnowski",
  editor =       NIPS10ed,
  booktitle =    NIPS10,
  title =        "Learning nonlinear overcomplete representations for
                 efficient coding",
  publisher =    "MIT Press",
  isbn = {0-262-10076-2},
  location = {Denver, Colorado, United States},
  address = {Cambridge, MA, USA},
  pages =        "556--562",
  year =         "1998",
}

@article{Lewicki+Sejnowski-2000,
    author = {Michael S. Lewicki and Terrence J. Sejnowski},
    title = {Learning Overcomplete Representations},
    journal = {Neural Computation},
    volume = {12},
    number = {2},
    year = {2000},
    issn = {0899-7667},
    pages = {337--365},
    doi = {http://dx.doi.org/10.1162/089976600300015826},
    publisher = {MIT Press},
    address = {Cambridge, MA, USA},
}

@InProceedings{LewisC62,
    author =       "P. M. {Lewis II} and C. L. Coates",
    title =        "A realization procedure for threshold gate networks",
    crossref =     "FOCS3",
    pages =        "159--168",
    url =          "http://theory.lcs.mit.edu/~dmjones/FOCS/focs.bib",
}

@Article{lheureux-04-small,
  author =       "P.-J. {L'Heureux} and J. Carreau and Y. Bengio and O.
                 Delalleau and S. Y. Yue",
  title =        "Locally Linear Embedding for dimensionality reduction
                 in {QSAR}",
  journal =      "J. Computer-Aided Molecular Design",
  pages =        "18.475",
  year =         "2004",
}

@Book{Li93,
  author =       "Ming Li and Paul Vitanyi",
  title =        "An Introduction to Kolmogorov Complexity and Its
                 Applications",
  publisher =    "Second edition, Springer",
  address =      "New York, NY",
  year =         "1997",
}

@Article{li99face,
  author =       "S. Z. Li and J. W. Lu",
  title =        "Face recognition using the nearest feature line
                 method",
  journal =      "IEEE Transactions on Neural Networks",
  volume =       "10",
  number =       "2",
  pages =        "439--443",
  year =         "1999",
  URL =          "citeseer.nj.nec.com/li99face.html",
}

@inproceedings{Li+al-2005,
    author    = {Hongyu Li and Wenbin Chen and I-Fan Shen},
    title     = {Supervised Local Tangent Space Alignment for Classification},
    booktitle = {IJCAI},
    year      = {2005},
    pages     = {1620-1621},
    ee        = {http://www.ijcai.org/papers/post-0505.pdf},
    bibsource = {DBLP, http://dblp.uni-trier.de}
}

@article{Li+Guo-2006,
    author = {Chun-Guang Li and Jun Guo},
    title = {Supervised Isomap with Explicit Mapping},
    journal = {First International Conference on Innovative Computing, Information and Control},
    volume = {3},
    year = {2006},
    isbn = {0-7695-2616-0},
    pages = {345-348},
    doi = {http://doi.ieeecomputersociety.org/10.1109/ICICIC.2006.530},
    publisher = {IEEE Computer Society},
    address = {Los Alamitos, CA, USA},
}

@inproceedings{lischuurmans08a,
author = "Li, Y. and Schuurmans, D.",
title = "Policy iteration for learning an exercise policy for {American} 
options",
booktitle = "Proceedings of the European Workshop on Reinforcement 
Learning (EWRL)",
year = 2008,
note = "Acceptance rate 33\%; all authors from my research group"
}

@inproceedings{lischuurmans08b,
author = "Li, Y. and Schuurmans, D.",
title = "Learning an exercise policy for {American} options on real data",
booktitle = "Proceedings of the International Symposium on Financial 
Engineering and Risk Management (FERM)",
year = 2008,
note = "All authors from my research group; unrefereed publication"
}

@inproceedings{Li+al-2007,
    author    = {Jun-Bao Li and Shu-Chuan Chu and Jeng-Shyang Pan},
    title     = {Locally Discriminant Projection with Kernels for Feature Extraction},
    booktitle = {Proceedings of the Third International Conference on Advanced Data Mining and Applications},
    editor    = {Reda Alhajj and Hong Gao and Xue Li and Jianzhong Li and Osmar R. Za\"{\i}ane},
    publisher = {Springer},
    year      = {2007},
    pages     = {586-593},
    ee        = {http://dx.doi.org/10.1007/978-3-540-73871-8_56},
    bibsource = {DBLP, http://dblp.uni-trier.de}
}

@InCollection{Liang83,
  author =       "F. M. Liang",
  editor =       "D. E. Knuth",
  booktitle =    "The \TeX Book",
  title =        "Ph.{D}.\ Thesis",
  publisher =    "Addison-Wesley",
  address =      "Reading",
  year =         "1986",
}

@inproceedings{LiangP2008,
 author = {Percy Liang and Michael I. Jordan},
 title = {An asymptotic analysis of generative, discriminative, and pseudolikelihood estimators},
 booktitle =    ICML08,
 editor =       ICML08ed,
 publisher =    ICML08publ,
 year = {2008},
 isbn = {978-1-60558-205-4},
 pages = {584--591},
 location = {Helsinki, Finland},
 doi = {http://doi.acm.org/10.1145/1390156.1390230},
 address = {New York, NY, USA},
 }

@Article{Liberman67,
  author =       "A. M. Liberman and F. S. Cooper and D. P. Shankweiler
                 and M. Studdert-Kennedy",
  title =        "Perception of the speech code",
  journal =      "Psychological Review",
  volume =       "74",
  pages =        "431--461",
  year =         "1967",
}

@Article{Lin+al-1991,
  author =       "W.-M. Lin and V. K. Prasanna and K. W. Przytula",
  title =        "Algorithmic mapping of neural network Models onto
                 Parallel {SIMD} Machines",
  journal =      "IEEE Transactions on Computers",
  volume =       "40",
  number =       "12",
  publisher =    "IEEE Computer Society",
  address =      "Los Alamitos, CA, USA",
  pages =        "1390--1401",
  year =         "1991",
  ISSN =         "0018-9340",
  doi =          "http://doi.ieeecomputersociety.org/10.1109/12.106224",
}

@Article{Lin-2000,
  author =       "Dekang Lin",
  title =        "Word sense disambigutation with a similarity based
                 smoothed library",
  journal =      "Computers and the Humanities: special issue on
                 {SENSEVAL}",
  volume =       "34",
  pages =        "147--152",
  year =         "2000",
}

@InProceedings{Lin-99,
  author =       "Dekang Lin",
  booktitle =    "Proceedings of the Conference of the Pacific
                 Association for Computational Linguistics",
  title =        "A case-based algorithm for word sense disambiguation",
  address =      "Waterloo, Canada",
  year =         "1999",
}

@Article{Lin73,
  author =       "S. Lin and B. W. Kernighan",
  title =        "An Effective Heuristic Algorithm for the Travelling
                 Salesman Problem",
  journal =      opres,
  volume =       "21",
  pages =        "498--516",
  year =         "1973",
}

@TechReport{Lin95,
  author =       "T. Lin and B. G. Horne and P. Tino and C. L. Giles",
  title =        "Learning long-term dependencies is not as difficult
                 with {NARX} recurrent neural networks",
  number =       "UMICAS-TR-95-78",
  institution =  "Institute for Advanced Computer Studies, University of
                 Mariland",
  year =         "1995",
}

@InProceedings{Lin96,
  author =       "C. Lin and S-C. Chang and K-J. Lin",
  booktitle =    nipc-hmit96,
  title =        "Simulation of the Balance of Plant of a Nuclear Power
                 Plant by Neural Networks",
  volume =       "1",
  publisher =    ans,
  pages =        "251--255",
  year =         "1996",
}

@Article{Linde80,
  author =       "Y. Linde and A. Buzo and R. M. Gray",
  title =        "An algorithm for vector quantizer design",
  journal =      "IEEE Transactions on Communication",
  volume =       "COM-28",
  number =       "1",
  pages =        "84--95",
  month =        jan,
  year =         "1980",
}

@Article{Lindgren78,
  author =       "G. Lindgren",
  title =        "{Markov} Regime Models for Mixed Distributions and
                 Switching Regressions",
  journal =      "Scan. J. Statist.",
  volume =       "5",
  pages =        "81--91",
  year =         "1978",
}

@Article{Linial93,
  author =       "Nathan Linial and Yishay Mansour and Noam Nisan",
  title =        "Constant depth circuits, {Fourier} transform, and
                 learnability",
  journal =      "J. ACM",
  volume =       "40",
  number =       "3",
  publisher =    "ACM Press",
  address =      "New York, NY, USA",
  pages =        "607--620",
  year =         "1993",
}

@Article{Linsker86,
  author =       "R. Linsker",
  title =        "From Basic Network Principles to Neural Architecture",
  journal =      PNAS,
  volume =       "83",
  pages =        "7508--7512, 8390--8394, 8779--8783",
  year =         "1986",
}

@Article{Linsker88,
  author =       "R. Linsker",
  title =        "Self-Organization in a Perceptual Network",
  journal =      computer,
  pages =        "105--117",
  month =        mar,
  year =         "1988",
}

@TechReport{liporace-76,
  author =       "L. A. Liporace",
  title =        "{PTAH} on Continuous Multivariate Functions of
                 {Markov} Chains",
  number =       "80193",
  institution =  "Institute for Defense Analysis, Communication Research
                 Department",
  month =        feb,
  year =         "1976",
}

@Article{Lippmann87,
  author =       "R. P. Lippmann",
  title =        "An Introduction to Computing with Neural Nets",
  journal =      ieeeassp,
  pages =        "4--22",
  month =        apr,
  year =         "1987",
}

@InProceedings{Lippmann87b,
  author =       "R. P. Lippmann and B. Gold",
  booktitle =    "IEEE Proc. First Intl. Conf. on Neural Networks",
  title =        "Neural Classifiers Useful for Speech Recognition",
  volume =       "IV",
  address =      "San Diego, CA",
  pages =        "417--422",
  year =         "1987",
}

@Article{Lippmann89,
  author =       "R. P. Lippmann",
  title =        "Review of Neural Networks for Speech Recognition",
  journal =      nc,
  volume =       "1",
  pages =        "1--38",
  year =         "1989",
}

@InProceedings{Lister90,
  author =       "R. Lister",
  booktitle =    ijcnn,
  title =        "Segment Reversal and the {TSP}",
  volume =       "1",
  publisher =    "Lawrence Erlbaum, Hillsdale",
  address =      "Washington 1990",
  pages =        "424--427",
  year =         "1990",
}

@Article{Litkowski-2000,
  author =       "K. Litkowski",
  title =        "{SENSEVAL}: The {CL}-research experience",
  journal =      "Computers and the Humanities: special issue on
                 SENSEVAL",
  volume =       "34",
  pages =        "153--158",
  year =         "2000",
}

@Book{Little+Rubin-2002,
  author =       "R. J. A. Little and D. B. Rubin",
  title =        "Statistical Analysis with Missing Data",
  publisher =    "Wiley",
  address =      "New York",
  edition =      "2nd",
  year =         "2002",
}

@Book{Little-Rubin,
  author =       "R. J. A. Little and D. B. Rubin",
  title =        "Statistical Analysis with Missing Data",
  publisher =    "Wiley",
  address =      "New York",
  year =         "1987",
}

@Article{Little74,
  author =       "W. A. Little",
  title =        "The Existence of Persistent States in the Brain",
  journal =      mbio,
  volume =       "19",
  pages =        "101--120",
  year =         "1974",
}

@Article{Little75,
  author =       "W. A. Little and G. L. Shaw",
  title =        "A Statistical Theory of Short and Long Term Memory",
  journal =      behbio,
  volume =       "14",
  year =         "1975",
}

@Article{Little78,
  author =       "W. A. Little and G. L. Shaw",
  title =        "Analytic Study of the Memory Storage Capacity of a
                 Neural Network",
  journal =      mbio,
  volume =       "39",
  pages =        "281--290",
  year =         "1978",
}

@Article{littlestone-warmuth94,
  author =       "N. Littlestone and M. K. Warmuth",
  title =        "The weighted majority algorithm",
  journal =      "Information and Computation",
  volume =       "108",
  number =       "2",
  pages =        "212--261",
  year =         "1994",
}

@Misc{Littlestone86,
  author =       "N. Littlestone and M. Warmuth",
  title =        "Relating data compression and learnability",
  year =         "1986",
  note =         "Unpublished manuscript. University of California Santa
                 Cruz. An extended version can be found in (Floyd and
                 Warmuth 95)",
}

@InCollection{Liu2001,
  author =       "J. S. Liu & R. Chen & T. Logvinenko",
  editor =       "N. Gordon {A. Doucet, N. de Freitas}",
  booktitle =    "Sequential Monte Carlo Methods in Practice",
  title =        "A theoretical framework for sequential importance
                 sampling and resampling",
  publisher =    "Springer-Verlag",
  year =         "2001",
}

@Book{Ljung+Soderstrom83,
  author =       "L. Ljung and T. Soderstrom",
  title =        "Theory and Practice of recursive identification",
  publisher =    "MIT Press",
  year =         "1983",
}

@Book{Ljung-86,
  author =       "L. Lyung and T. S{\"o}derstr{\"o}m",
  title =        "Theory and Practice of Recursive Identification",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  year =         "1986",
}

@article{LloydS1982,
	author = {Stuart P. Lloyd },
	booktitle = {Information Theory, IEEE Transactions on},
	journal = {Information Theory, IEEE Transactions on},
	number = {2},
	pages = {129--137},
	title = {Least squares quantization in PCM},
	volume = {28},
	year = {1982}
}

@Article{Loader96,
  author =       "C. R. Loader",
  title =        "Local lieklihood density estimation",
  journal =      "Annals of Statistics",
  volume =       "24",
  number =       "4",
  pages =        "1602--1618",
  year =         "1996",
}

@Article{Loftsgaarden+Quesenberry-65,
  author =       "D. O. Loftsgaarden and C. P. Quesenberry",
  title =        "A nonparametric estimate of a multivariate density
                 function",
  journal =      "Annals of Mathematical Statistics",
  volume =       "36",
  pages =        "1049--1051",
  year =         "1965",
}

@InBook{lognormal-A-85,
  author =       "C. E. Antle",
  booktitle =    "Encyclopedia of Statistical Sciences",
  title =        "Lognormal Distribution",
  volume =       "5",
  publisher =    "John Wiley \& Sons",
  pages =        "134--136",
  year =         "1985",
}

@Article{Loh-Shih97,
  author =       "Wei-Yin Loh and Yu-Shan Shih",
  title =        "Split selection methods for classification trees",
  journal =      "Statistica Sinica",
  volume =       "7",
  pages =        "815--840",
  year =         "1997",
}

@incollection{loosli-canu-bottou-2006,
  author = {Loosli, Ga\"{e}lle and Canu, St\'{e}phane and Bottou, L\'{e}on},
  title = {Training Invariant Support Vector Machines using Selective Sampling},
  pages = {301-320},
  editor = {Bottou, L\'{e}on and Chapelle, Olivier and {DeCoste}, Dennis and Weston, Jason},
  booktitle = {Large Scale Kernel Machines},
  publisher = {MIT Press},
  address = {Cambridge, MA.},
  year = {2007},
  url = {http://leon.bottou.org/papers/loosli-canu-bottou-2006},
}

@Article{Lowe04,
  author =       "D. G. Lowe",
  title =        "Distinctive Image Features from Scale-Invariant
                 Keypoints",
  journal =      "International Journal of Computer Vision",
  volume =       "60",
  number =       "2",
  pages =        "91--110",
  year =         "2004",
}

@Article{Lowe95,
  author =       "D. G. Lowe",
  title =        "Similarity metric learning for a variable-kernel
                 classifier",
  journal =      "Neural Computation",
  volume =       "7",
  number =       "1",
  pages =        "72--85",
  year =         "1995",
}

@InProceedings{lu04,
  author =       "Wen-Cong Lu and Nian-Yi Chen and Guo-Zheng Li and Jie
                 Yang",
  editor =       "Per Svensson and Johan Schubert",
  booktitle =    "Proceedings of the Seventh International Conference on
                 Information Fusion",
  title =        "Multitask learning using partial least square method",
  volume =       "I",
  publisher =    "International Society of Information Fusion",
  address =      "Mountain View, CA",
  pages =        "79--84",
  month =        jun,
  year =         "2004",
  location =     "Stockholm, Sweden",
}

@Book{Lue84,
  author =       "D. G. Luenberger",
  title =        "Linear and Nonlinear Programming",
  publisher =    "Addison Wesley",
  year =         "1984",
}

@Book{Luenberger86,
  author =       "D. G. Luenberger",
  title =        "Linear and Nonlinear Programming",
  publisher =    "Addison-Wesley",
  address =      "Reading",
  year =         "1986",
}

@InProceedings{Lyu09,
  author =       "Siwei Lyu",
  booktitle =    "The proceedings of the 25th Conference on Uncertainty in Artificial Intelligence",
  title =        "Interpretation and Generalization of Score Matching",
  year =         "2009",
}

@Book{Ma85,
  author =       "S.-K. Ma",
  title =        "Statistical Mechanics",
  publisher =    "World Scientific",
  address =      "Philadelphia",
  year =         "1985",
}

@InProceedings{Ma09,
 author = {Justin Ma and Lawrence K. Saul and Stefan Savage and Geoffrey M. Voelker},
 title = {Identifying Suspicious URLs: An Application of Large-Scale Online Learning},
 booktitle = {Proceedings of the International Conference on Machine Learning},
 year = {2009},
 pages = {681--688},
 location = {Montreal, Canada},
}

@Misc{MacKay+Neal94,
  author =       "D. MacKay and R. Neal",
  title =        "Automatic Relevance Determination",
  year =         "1994",
  note =         "Unpublished report. See also MacKay D., 1995, Probable
                 Neworks and Plausible Predictions -- A Review of
                 Practical {Bayesian} Methods for Supervised Neural
                 Networks, in {\em Network: Computation in Neural
                 Systems}, v. 6, pp. 469--505",
}

@Book{MacKay03,
  author =       "David MacKay",
  title =        "Information Theory, Inference and Learning
                 Algorithms",
  publisher =    "Cambridge University Press",
  year =         "2003",
}

@Misc{MacKay2001,
  author =       "David MacKay",
  title =        "Failures of the One-Step Learning Algorithm",
  year =         "2001",
  note =         "Unpublished report",
}

@Article{MacKay90,
  author =       "D. J. C. MacKay and K. D. Miller",
  title =        "Analysis of Linsker's Simulation of Hebbian Rules",
  journal =      nc,
  volume =       "2",
  pages =        "173--187",
  year =         "1990",
}

@PhdThesis{MacKay91,
  author =       "D. J. C. MacKay",
  title =        "Bayesian methods for adaptive models",
  school =       "California Institute of Technology",
  year =         "1991",
}

@Article{MacKay92a,
  author =       "David {J. C}. MacKay",
  title =        "Bayesian interpolation",
  journal =      "Neural Computation",
  volume =       "4",
  number =       "3",
  pages =        "415--447",
  year =         "1992",
}

@Article{MacKay92b,
  author =       "D. J. C. MacKay",
  title =        "The evidence framework applied to classification
                 networks",
  journal =      "Neural Computation",
  volume =       "4",
  number =       "5",
  pages =        "698--714",
  year =         "1992",
}

@Article{MacKay92c,
  author =       "David {J. C}. MacKay",
  title =        "A practical {Bayesian} framework for backpropagation
                 networks",
  journal =      "Neural Computation",
  volume =       "4",
  number =       "3",
  pages =        "448--472",
  year =         "1992",
}

@Article{MacKay98,
  author =       "D. J. C. MacKay and R. J. McEliece and J-F. Cheng (in
                 press)",
  title =        "Turbo-decoding as an instance of Pearl's belief
                 propagation algorithm",
  journal =      "IEEE Journal on Selected Areas in Communications",
  year =         "1998",
}

@TechReport{MacKay98b,
  author =       "D. J. C. MacKay",
  title =        "Introduction to {G}aussian Processes",
  institution =  "Cambridge University",
  year =         "1998",
  URL =          "http://wol.ra.phy.cam.ac.uk/mackay/gpB.pdf",
}

@Article{Mackey77,
  author =       "M. C. Mackey and L. Glass",
  title =        "Oscillation and Chaos in Physiological Control
                 Systems",
  journal =      science,
  volume =       "197",
  pages =        "287",
  year =         "1977",
}

@InProceedings{Maclin-iwml91,
  author =       "R. Maclin and J. W. Shawlik",
  editor =       "L. Birnbaum and G. Collins",
  booktitle =    "Machine Learning: Proceedings of the Eighth
                 International Workshop",
  title =        "Refining Domain Theories Expressed as Finite-State
                 Automata",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo CA",
  year =         "1991",
}

@Article{Maclin-ml,
  author =       "R. Maclin and J. W. Shawlik",
  title =        "Using Knowledge-Based Neural Networks to Improve
                 Algorithms: Refining the Chou-Fasman Algorithm for
                 Protein Folding",
  journal =      mlearn,
}

@InProceedings{MacQueen67,
  author =       "James B. MacQueen",
  booktitle =    "Proceedings of the Fifth Berkeley Symposium on
                 Mathematics, Statistics and Probability, Vol. 1",
  title =        "Some Methods for Classification and Analysis of
                 Multivariate Observations",
  pages =        "281--296",
  year =         "1967",
}

@Article{Mahapatra+al-1997,
  author =       "S. Mahapatra and R. N. Mahapatra and B. N. Chatterji",
  title =        "A parallel formulation of back-propagation learning on
                 distributed memory multiprocessors",
  journal =      "Parallel Computing",
  volume =       "22",
  number =       "12",
  publisher =    "Elsevier Science Publishers",
  address =      "Amsterdam, The Netherlands",
  pages =        "1661--1675",
  year =         "1997",
  ISSN =         "0167-8191",
  doi =          "http://dx.doi.org/10.1016/S0167-8191(96)00051-8",
}

@incollection{Mairal-2009,
 title = {Supervised Dictionary Learning},
 author = {Julien Mairal and Francis Bach and Jean Ponce and Guillermo Sapiro and Andrew Zisserman},
 booktitle = NIPS21,
 editor = NIPS21ed,
 pages = {1033--1040},
 publisher = {NIPS Foundation},
 year = {2009}
}
@book{Maimon+Rokach-2005,
    author = {Maimon, O.  and Rokach, L. },
    howpublished = {Hardcover},
    isbn = {0387244352},
    month = {September},
    publisher = {Springer},
    title = {Data Mining and Knowledge Discovery Handbook},
    year = {2005}
}

@InProceedings{Makram-Ebeid89,
  author =       "S. Makram-Ebeid and J.-A. Sirat and J.-R. Viala",
  booktitle =    ijcnn,
  title =        "A Rationalized Back-Propagation Learning Algorithm",
  volume =       "2",
  publisher =    "IEEE, New York",
  address =      "Washington 1989",
  pages =        "373--380",
  year =         "1989",
}

@Article{mallat93matching,
  author =       "S. Mallat and Z. Zhang",
  title =        "Matching pursuit with time-frequency dictionaries",
  journal =      "IEEE Trans. Signal Proc.",
  volume =       "41",
  number =       "12",
  pages =        "3397--3415",
  month =        dec,
  year =         "1993",
}

@InProceedings{malouf2002conll,
  author =       "Robert Malouf",
  booktitle =    "Proceedings of CoNLL-2002",
  title =        "A comparison of algorithms for maximum entropy
                 parameter estimation",
  publisher =    "Taipei, Taiwan",
  pages =        "49--55",
  year =         "2002",
  editors =      "Dan Roth and Antal van den Bosch",
}

@Book{Mandelbrot82,
  author =       "B. B. Mandelbrot",
  title =        "The Fractal Geometry of Nature",
  publisher =    "Freeman",
  address =      "San Francisco",
  year =         "1982",
}

@Book{Manning+Schutze99,
  author =       "Christopher Manning and Hinrich Schutze",
  title =        "Foundations of Statistical Natural Language
                 Processing",
  publisher =    "MIT Press",
  year =         "1999",
}

@InProceedings{Mantysalo92firenze,
  author =       "Jyri M{\"{a}}ntysalo and Kari Torkkola and Teuvo
                 Kohonen",
  booktitle =    "Proc. of the Second Workshop on Neural Networks for
                 Speech Processing",
  title =        "Experiments on the use of {LVQ} in phoneme-level
                 segmentation of speech",
  publisher =    "LINT",
  address =      "Firenze (Italy)",
  year =         "1992",
}

@article{Marcelja-1980,
    author = {Marcelja, S.},
    journal = {Journal of the Optical Society of America},
    month = {November},
    number = {11},
    pages = {1297--1300},
    title = {Mathematical description of the responses of simple cortical cells.},
    url = {http://view.ncbi.nlm.nih.gov/pubmed/7463179},
    volume = {70},
    year = {1980}
}

@Article{Marchand90,
  author =       "M. Marchand and M. Golea and P. Ruj\'an",
  title =        "A Convergence Theorem for Sequential Learning in
                 Two-Layer Perceptrons",
  journal =      eul,
  volume =       "11",
  pages =        "487--492",
  year =         "1990",
}

@Article{Marcotte-92,
  author =       "P. Marcotte and G. Savard",
  title =        "Novel approaches to the discrimination problem",
  journal =      "Zeitschrift f{\"u}r Operations Research (Theory)",
  volume =       "36",
  pages =        "517--545",
  year =         "1992",
}

@Article{Marcus91,
  author =       "C. M. Marcus and F. R. Waugh and R. M. Westervelt",
  title =        "Nonlinear Dynamics and Stability of Analog Neural
                 Networks",
  journal =      "Physica D",
  volume =       "51",
  pages =        "234--247",
  year =         "1991",
  note =         "(special issue)",
}

@Article{Marcus-et-al91,
  author =       "C. M. Marcus and F. R. Waugh and R. M. Westervelt",
  title =        "Nonlinear Dynamics and Stability of Analog Neural
                 Networks",
  journal =      physicaD,
  volume =       "51",
  pages =        "1991",
  year =         "1991",
  note =         "(special issue)",
}

@Article{Markov13,
  author =       "A. A. Markov",
  title =        "An example of statistical investigation in the text of
                 `Eugene Onyegin' illustrating coupling of `tests' in
                 chains",
  journal =      "Proceedings of the Academy of Science, St.
                 Petersburg",
  volume =       "7",
  pages =        "153--162",
  year =         "1913",
}

@Article{Markovitz-52,
  author =       "H. M. Markovitz",
  title =        "Portfolio Selection",
  journal =      "Journal of Finance",
  volume =       "7",
  number =       "1",
  pages =        "77--91",
  year =         "1952",
}

@InProceedings{maron98,
  author =       "Oded Maron and Tom\'{a}s Lozano-P\'{e}rez",
  editor =       NIPS10ed,
  booktitle =    NIPS10,
  title =        "A Framework for Multiple-Instance Learning",
  volume =       "10",
  publisher =    "{MIT} Press",
  year =         "1998",
}

@Article{Marquardt63,
  author =       "D. W. Marquardt",
  title =        "An algorithm for least-squares estimation of
                 non-linear parameters",
  journal =      "Journal of the Society of Industrial and Applied
                 Mathematics",
  volume =       "11",
  number =       "2",
  pages =        "431--441",
  year =         "1963",
}

@Article{Marr69,
  author =       "D. Marr",
  title =        "A Theory of Cerebellar Cortex",
  journal =      jphysiol,
  volume =       "202",
  pages =        "437--470",
  year =         "1969",
}

@Article{Marr70,
  author =       "D. Marr",
  title =        "A Theory for Cerebral Neocortex",
  journal =      PRSLB,
  volume =       "176",
  pages =        "161--234",
  year =         "1970",
}

@Article{Marr71,
  author =       "D. Marr",
  title =        "Simple Memory: {A} Theory for Archicortex",
  journal =      PTRSL,
  volume =       "262",
  pages =        "23--81",
  year =         "1971",
}

@Article{Marr76,
  author =       "D. Marr and T. Poggio",
  title =        "Cooperative Computation of Stereo Disparity",
  journal =      science,
  volume =       "194",
  year =         "1976",
}

@Book{Marr82,
  author =       "D. Marr",
  title =        "Vision",
  publisher =    "Freeman",
  address =      "San Francisco",
  year =         "1982",
}

@Article{Martin91,
  author =       "G. L. Martin and J. A. Pittman",
  title =        "Recognizing hand-printed letters and digits using
                 backpropagation learning",
  journal =      nc,
  volume =       "3",
  number =       "2",
  pages =        "258--267",
  year =         "1991",
}

@Article{Mashouk+Reed91,
  author =       "K. A. Al-Mashouq and I. S. Reed",
  title =        "Including Hints in Training Neural Nets",
  journal =      nc,
  volume =       "3",
  number =       "4",
  pages =        "418",
  year =         "1991",
}

@InProceedings{Mason98,
  author =       "L. Mason and Bartlett and J. P. Baxter",
  editor =       NIPS12ed,
  booktitle =    NIPS12,
  title =        "Direct Optimization of Margins Improves Generalization
                 in Combined Classifiers",
  year =         "1999",
}

@InProceedings{Mason99,
  author =       "L. Mason and J. Baxter and P. Bartlett and M. Frean",
  editor =       NIPS12ed,
  booktitle =    NIPS12,
  title =        "Boosting Algorithms as Gradient Descent",
  publisher =    "MIT Press",
  pages =        "512--518",
  year =         "2000",
}

@InProceedings{Matan92,
  author =       "O. Matan and C. J. C. Burges and Y. {LeCun} and J. S.
                 Denker",
  editor =       NIPS4ed,
  booktitle =    NIPS4,
  title =        "Multi-Digit Recognition Using a Space Displacement
                 Neural Network",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo CA",
  pages =        "488--495",
  year =         "1992",
}

@InProceedings{matic-92a,
  author =       "N. Mati\'{c} and I. Guyon and L. Bottou and J. Denker
                 and V. Vapnik",
  booktitle =    "11th International Conference on Pattern Recognition",
  title =        "Computer Aided Cleaning of Large Databases for
                 Character Recogn ition",
  volume =       "II",
  pages =        "330--333",
  year =         "1992",
}

@Misc{matrix-cookbook,
  author =       "K. B. Petersen and M. S. Pedersen",
  title =        "The Matrix Cookbook",
  publisher =    "Technical University of Denmark",
  address =      "",
  month =        feb,
  year =         "2006",
  note =         "Version 20051003",
  abstract =     "Matrix identities, relations and approximations. A
                 desktop reference for quick overview of mathematics of
                 matrices.",
  keywords =     "Matrix identity, matrix relations, inverse, matrix
                 derivative",
}

@Article{Mattis76,
  author =       "D. Mattis",
  title =        "Solvable Spin Systems with Random Interactions",
  journal =      plettA,
  volume =       "56",
  pages =        "421--422",
  year =         "1976",
}

@Article{MaxEnt96,
  author =       "Adam L. Berger and Vincent J. {Della Pietra} and Stephen A. {Della
                 Pietra}",
  title =        "A maximum entropy approach to natural language
                 processing",
  journal =      "Computational Linguistics",
  volume =       "22",
  pages =        "39--71",
  year =         "1996",
}

@Article{Mayraz+Hinton-2002,
  author =       "G. Mayraz and G. E. Hinton",
  title =        "Recognizing handwritten digits using hierarchical
                 products of experts",
  journal =      "IEEE Transactions on Pattern Analysis and Machine
                 Intelligence",
  volume =       "24",
  pages =        "189--197",
  year =         "2002",
}

@InProceedings{Mazaika87,
  author =       "P. K. Mazaika",
  editor =       "M. Caudill and C. Butler",
  booktitle =    icnn,
  title =        "A Mathematical Model of the {Boltzmann} Machine",
  volume =       "3",
  publisher =    "IEEE, New York",
  address =      "San Diego 1987",
  pages =        "157--163",
  year =         "1987",
}

@InProceedings{mbbf-bagd-00,
  author =       "L. Mason and J. Baxter and P. L. Bartlett and M.
                 Frean",
  editor =       NIPS12ed,
  booktitle =    NIPS12,
  title =        "Boosting algorithms as gradient descent",
  pages =        "512--518",
  year =         "2000",
}

@InProceedings{McCallum+Nigam-1998,
  author =       "A. {McCallum} and K. Nigam",
  booktitle =    ICML08,
  editor =       ICML08ed,
  publisher =    ICML08publ,
  title =        "Employing {EM} and pool-based active learning for text
                 classification",
  year =         "1998",
}

@InProceedings{McCallumA2006,
  author =       "Andrew McCallum and Chris Pal and Gregory Druck and
                 Xuerui Wang",
  booktitle =    "Twenty-first National Conference on Artificial
                 Intelligence (AAAI-06)",
  title =        "Multi-Conditional Learning: Generative/Discriminative
                 Training for Clustering and Classification",
  publisher =    "AAAI Press",
  year =         "2006",
  OPTbibsource = "DBLP, http://dblp.uni-trier.de",
  OPTcrossref =  "DBLP:conf/aaai/2006",
}

@article{McClelland+Rumelhart-81,
 author = {James L. {McClelland} and David E. Rumelhart},
 title = {An interactive activation model of context effects in letter perception},
 journal = {Psychological Review},
 volume = 88,
 pages = {375--407},
 year = 1981,
}

@Book{McClelland86a,
  author =       "James L. McClelland and David E. Rumelhart and the PDP
                 Research Group",
  title =        "Parallel Distributed Processing: Explorations in the
                 Microstructure of Cognition",
  volume =       "2",
  publisher =    "MIT Press",
  address =      "Cambridge",
  year =         "1986",
}

@InCollection{McClelland86b,
  author =       "J. L. McClelland and J. L. Elman",
  editor =       "J. L. McClelland and D. E. Rumelhart",
  booktitle =    pdp,
  title =        "Interactive Processes in Speech Perception: The
                 {TRACE} Model",
  chapter =      "15",
  volume =       "2",
  publisher =    "MIT Press",
  address =      "Cambridge",
  pages =        "58--121",
  year =         "1986",
}

@Book{McClelland88,
  author =       "J. L. McClelland and D. E. Rumelhart",
  title =        "Explorations in Parallel Distributed Processing",
  publisher =    "MIT Press",
  address =      "Cambridge",
  year =         "1988",
}

@Article{McCulloch43,
  author =       "W. S. McCulloch and W. Pitts",
  title =        "A Logical Calculus of Ideas Immanent in Nervous
                 Activity",
  journal =      bmbiophys,
  volume =       "5",
  pages =        "115--133",
  year =         "1943",
}

@InProceedings{Mcdermott89,
  author =       "E. McDermott and S. Katagiri",
  booktitle =    icassp,
  title =        "Shift-Invariant, Multi-Category Phoneme Recognition
                 using {Kohonen's} {LVQ2}",
  volume =       "1",
  organization = "IEEE",
  address =      "Glasgow, Scotland",
  pages =        "81--84",
  year =         "1989",
}

@Article{Mcdermott91,
  author =       "E. McDermott and S. Katagiri",
  title =        "{LVQ}-based shift-tolerant phoneme recognition",
  journal =      "IEEE Transactions on Signal Processing",
  volume =       "39",
  number =       "6",
  pages =        "1398--1411",
  year =         "1991",
  OPTmonth =     "June",
}

@Article{McEliece87,
  author =       "R. J. McEliece and E. C. Posner and E. R. Rodemich and
                 S. S. Venkatesh",
  title =        "The Capacity of the Hopfield Associative Memory",
  journal =      ieeeit,
  volume =       "33",
  pages =        "461--482",
  year =         "1987",
}

@InProceedings{McInerny89,
  author =       "J. M. McInerny and K. G. Haines and S. Biafore and R.
                 Hecht-Nielsen",
  booktitle =    ijcnn,
  title =        "Back Propagation Error Surfaces Can Have Local
                 Minima",
  volume =       "2",
  publisher =    "IEEE, New York",
  address =      "Washington 1989",
  pages =        "627",
  year =         "1989",
}

@Book{McLachlan2000,
  author =       "G. J. McLachlan and D. Peel",
  title =        "Finite Mixture Models",
  publisher =    "Wiley",
  address =      "New York",
  year =         "2000",
}

@Book{McLachlan88,
  author =       "G. J. McLachlan and K. E. Basford",
  title =        "Mixture models: Inference and applications to
                 clustering.",
  publisher =    "Marcel Dekker",
  year =         "1988",
}

@book{Mclachlan-2004,
    author = {Geoffrey  J. Mclachlan},
    howpublished = {Paperback},
    isbn = {0471691151},
    month = {August},
    publisher = {Wiley-Interscience},
    title = {Discriminant Analysis and Statistical Pattern Recognition},
    year = {2004}
}

@Article{McLoone+Irwin-1997,
  author =       "S. McLoone and G. W. Irwin",
  title =        "Fast Parallel Off-Line Training of Multilayer
                 Perceptrons",
  journal =      "IEEE Transactions on Neural Networks",
  volume =       "8",
  number =       "3",
  pages =        "646--653",
  year =         "1997",
}

@Book{Mead89,
  author =       "C. Mead",
  title =        "Analog {VLSI} and Neural Systems",
  publisher =    "Addison Wesley",
  address =      "Reading",
  year =         "1989",
}

@InProceedings{Meila96,
  author =       "M. Meila and M. I. Jordan",
  editor =       NIPS8ed,
  booktitle =    NIPS8,
  title =        "Learning fine motion by Markov mixtures of experts",
  publisher =    "MIT Press, Cambridge, MA",
  year =         "1996",
}

@InProceedings{Mel+Koch90,
  author =       "Bartlett W. Mel and Christof Koch",
  editor =       NIPS2ed,
  booktitle =    NIPS2,
  title =        "{Sigma}-{Pi} Learning: On Radial Basis Functions and
                 Cortical Associative Learning",
  publisher =    "Morgan Kaufmann",
  pages =        "474--481",
  year =         "1990",
}

@InProceedings{Melvilleetal,
  author =       "P. Melville and R. J. Mooney and R. Nagarajan",
  booktitle =    "Proceedings of the ACM SIGIR Workshop on Recommender
                 Systems",
  title =        "Content-boosted collaborative filtering",
  month =        sep,
  year =         "2001",
  keywords =     "boosted collaborative filtering content",
  location =     "New Orleans, LA",
}

@InProceedings{Memisevic+Hinton-2007,
  author =       "Roland Memisevic and Geoffrey E. Hinton",
  booktitle =    cvpr07,
  title =        "Unsupervised learning of image transformations",
  year =         "2007",
}

@PhdThesis{Memisevic-thesis,
  author =       "Roland Memisevic",
  title =        "Non-linear latent factor models for revealing
                 structure in high-dimensional data",
  school =       "Departement of Computer Science, University of
                 Toronto",
  address =      "Toronto, Ontario, Canada",
  year =         "2007",
}

@Book{Mendelson97,
  author =       "E. Mendelson",
  title =        "Introduction to Mathematical Logic, 4th ed.",
  publisher =    "Chapman \& Hall",
  year =         "1997",
}

@InProceedings{Merkel-1994,
  author =       "Magnus Merkel and Bernt Nilsson and Lars Ahrenberg",
  booktitle =    "Proceedings of the 4th Workshop on Very Large
                 Corpora",
  title =        "A Phrase-Retrieval System Based on Recurrence",
  address =      "Tokyo, Japan",
  year =         "1994",
}

@InProceedings{Merkel-2000,
  author =       "Magnus Merkel and Mikael Andersson",
  booktitle =    "Proceedings of RIAO'2000",
  title =        "Knowledge-lite extraction of multi-word units with
                 language filters and entropy thresholds",
  volume =       "1",
  pages =        "737--746",
  year =         "2000",
}

@InProceedings{Merlo86,
  author =       "E. Merlo and R. De Mori and G. Mercier and M.
                 Palakal",
  booktitle =    icassp,
  title =        "A continuous parameter and frequency domain based
                 {Markov} model",
  pages =        "1597--1600",
  year =         "1986",
}

@article{Merzenich-2000,
    title = {Seeing in the Sound Zone},
    author = {M. Merzenich},
    journal = {Nature},
    pages = {820--821},
    volume = {404},
    year = {2000},
}

@Article{Metropolis53,
  author =       "N. Metropolis and A. W. Rosenbluth and M. N.
                 Rosenbluth and A. H. Teller and E. Teller",
  title =        "Equation of State Calculations for Fast Computing
                 Machines",
  journal =      jcp,
  volume =       "21",
  pages =        "1087--1092",
  year =         "1953",
}

@Article{Mezard85,
  author =       "M. M\'ezard and G. Parisi",
  title =        "Replicas and Optimization",
  journal =      jppl,
  volume =       "46",
  pages =        "771--778",
  year =         "1985",
}

@Article{Mezard86,
  author =       "M. M\'ezard and G. Parisi",
  title =        "A Replica Analysis of the Travelling Salesman
                 Problem",
  journal =      jpp,
  volume =       "47",
  pages =        "1285--1296",
  year =         "1986",
}

@Book{Mezard87,
  author =       "M. M\'ezard and G. Parisi and M. A. Virasoro",
  title =        "Spin Glass Theory and Beyond",
  publisher =    "World Scientific",
  address =      "Singapore",
  year =         "1987",
}

@Article{Mezard88,
  author =       "M. M\'ezard and G. Parisi",
  title =        "The Euclidean Matching Problem",
  journal =      jpp,
  volume =       "49",
  pages =        "2019--2025",
  year =         "1988",
}

@Article{Mezard89,
  author =       "M. M\'ezard and J.-P. Nadal",
  title =        "Learning in Feedforward Layered Networks: The Tiling
                 Algorithm",
  journal =      jpa,
  volume =       "22",
  pages =        "2191--2204",
  year =         "1989",
}

@Article{Micchelli-1986,
  author =       "C. A. Micchelli",
  title =        "Interpolation of scattered data: distance matrices and
                 conditionally positive definite functions",
  journal =      "Constructive Approximation",
  volume =       "2",
  pages =        "11--22",
  year =         "1986",
}

@InProceedings{micchelli05,
  author =       "Charles A. {Micchelli} and Massimiliano {Pontil}",
  editor =       NIPS17ed,
  booktitle =    NIPS17,
  title =        "Kernels for Multi--task Learning",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  pages =        "921--928",
  year =         "2005",
}

@InProceedings{Mihalcea2002,
  author =       "Rada Mihalcea",
  booktitle =    "Proceedings of the 6th Conference on Natural Language
                 Learning",
  title =        "Instance Based Learning with Automatic Feature
                 Selection Applied to Word",
  year =         "2002",
  URL =          "citeseer.nj.nec.com/587173.html",
}

@Article{Miikkulainen91,
  author =       "R. Miikkulainen and M. G. Dyer",
  title =        "Natural language processing with modular {PDP}
                 networks and distributed lexicon",
  journal =      "Cognitive Science",
  volume =       "15",
  pages =        "343--399",
  year =         "1991",
}

@Article{Miller+Sachs83,
  author =       "M. M. Miller and M. B. Sachs",
  title =        "Representation of stop consonants in the discharge
                 patterns of auditory nerve fibers",
  journal =      jasa,
  volume =       "74",
  number =       "2",
  pages =        "502--517",
  year =         "1983",
}

@PhdThesis{miller02,
  author =       "Erik G. Miller",
  title =        "Learning from one example in machine vision by sharing
                 probability densities",
  school =       "Massachusetts Institute of Technology",
  year =         "2002",
}

@PhdThesis{miller02one,
  author =       "Erik Miller",
  title =        "Learning from one example in machine vision by sharing
                 probability densities",
  school =       "Massachusetts Institute of Technology, Department of
                 Electrical Engineering and Computer Science",
  year =         "2002",
}

@InProceedings{Miller89,
  author =       "G. F. Miller and P. M. Todd and S. U. Hegde",
  editor =       "J. D. Schaffer",
  booktitle =    "Proceedings of the Third International Conference on
                 Genetic Algorithms",
  title =        "Designing Neural Networks Using Genetic Algorithms",
  publisher =    "Morgan Kaufmann, San Mateo",
  address =      "Arlington 1989",
  pages =        "379--384",
  year =         "1989",
}

@Article{MillerD1996,
  author =       "David Miller and Kenneth Rose",
  title =        "Hierarchical, unsupervised learning with growing via
                 phase transitions",
  journal =      "Neural Computation",
  volume =       "8",
  number =       "2",
  publisher =    "MIT Press",
  address =      "Cambridge, MA, USA",
  pages =        "425--450",
  year =         "1996",
  ISSN =         "0899-7667",
}

@Article{Miller-ijprai93,
  author =       "C. B. Miller and C. L. Giles",
  title =        "Experimental Comparison of the Effect of Order in
                 Recurrent Neural Networks",
  journal =      "Int. Journal of Pattern Recognition and Artificial
                 Intelligence",
  pages =        "205--228",
  year =         "1993",
  note =         "Special Issue on Applications of Neural Networks to
                 Pattern Recognition (I. Guyon Ed.)",
}

@Book{Minc-88,
  author =       "H. Minc",
  title =        "Nonnegative Matrices",
  publisher =    "John Wiley \& Sons",
  address =      "New York",
  year =         "1988",
}

@Book{Minsky67,
  author =       "M. L. Minsky",
  title =        "Computation: Finite and Infinite Machines",
  publisher =    "Prentice-Hall",
  address =      "Englewood Cliffs",
  year =         "1967",
}

@Book{Minsky69,
  author =       "M. L. Minsky and S. A. Papert",
  title =        "Perceptrons",
  publisher =    "MIT Press",
  address =      "Cambridge",
  year =         "1969",
}

@Article{Misra-1997,
  author =       "Manavendra Misra",
  title =        "Parallel Environments for Implementing Neural
                 Networks",
  journal =      "Neural Computing Surveys",
  volume =       "1",
  pages =        "48--60",
  year =         "1997",
}

@Article{Mitchison89,
  author =       "G. J. Mitchison and R. M. Durbin",
  title =        "Bounds on the Learning Capacity of Some Multi-Layer
                 Networks",
  journal =      biocyb,
  volume =       "60",
  pages =        "345--356",
  year =         "1989",
}

@Article{ML:Bauer:boost,
  author =       "Eric Bauer and Ron Kohavi",
  title =        "An empirical comparison of voting classification
                 algorithms: Bagging, Boosting, and variants",
  journal =      "Machine Learning",
  year =         "1998",
}

@Article{ML:Breiman:bagging,
  author =       "Leo Breiman",
  title =        "Bagging Predictors",
  journal =      "Machine Learning",
  volume =       "24",
  number =       "2",
  pages =        "123--140",
  year =         "1994",
}

@Article{ML:Dietterich:adaboost+noise,
  author =       "Thomas G. Dietterich",
  title =        "An experimental comparison of three methods for
                 constructing ensembles of decision trees: Bagging,
                 Boosting, and randomization",
  journal =      "submitted to Machine Learning",
  year =         "1998",
  note =         "\\available at {\tt
                 ftp://ftp.cs.orst.edu/pub/tgd/papers/tr-randomized-c4.ps.gz}",
}

@Article{ML:Schapire:weaklearn,
  author =       "Robert E. Schapire",
  title =        "The strength of weak learnability",
  journal =      "Machine Learning",
  volume =       "5",
  number =       "2",
  pages =        "197--227",
  year =         "1990",
}

@Misc{MLJ-model-selection-combination-2001,
  author =       "Y. Bengio and D. Schuurmans",
  title =        "Special Issue on New methods for model selection and
                 model combination",
  year =         "2002",
  note =         "{\em Machine Learning}, 48(1)",
}

@InProceedings{Mnih+Hinton-2007,
  author =       "Andriy Mnih and Geoffrey E. Hinton",
  booktitle =    ICML07,
  editor =       ICML07ed,
  publisher =    ICML07publ,
  title =        "Three New Graphical Models for Statistical Language
                 Modelling",
  pages =        "641--648",
  year =         "2007",
}

@InProceedings{Mnih+Hinton-2007-small,
  author =       "Andriy Mnih and Geoffrey E. Hinton",
  booktitle =    "ICML 2007",
  title =        "Three New Graphical Models for Statistical Language
                 Modelling",
  year =         "2007",
}

@InProceedings{Mnih+Hinton-2009,
  author =       "Andriy Mnih and Geoffrey E. Hinton",
  booktitle =    NIPS21,
  editor =       NIPS21ed,
  title =        {A Scalable Hierarchical Distributed Language Model},
  pages =        {1081--1088},
  year =         "2009",
}

@InProceedings{mohri-pereira-riley96,
  author =       "M. Mohri and F. C. N. Pereira and M. D. Riley",
  booktitle =    "ECAI 96, 12th European Conference on Artificial
                 Intelligence",
  title =        "Weighted automata in text and speech processing",
  pages =        "",
  year =         "1996",
}

@Article{Mohri96,
  author =       "M. Mohri",
  title =        "Finite-State Transducers in Language and Speech
                 Processing",
  journal =      "Computational Linguistics",
  volume =       "20",
  number =       "1",
  pages =        "1--33",
  year =         "1996",
}

@InProceedings{Molina02,
  author =       "A. Molina and F. Pla and E. Segarra and L. Moreno",
  booktitle =    "{Proceedings of 3rd International Conference on
                 Language Resources and Evaluation, LREC2002}",
  title =        "{Word Sense Disambiguation using Statistical Models
                 and {WordNet}}",
  address =      "{Las Palmas de Gran Canaria, Spain}",
  year =         "2002",
}

@PhdThesis{moller,
  author =       "M. {Moller}",
  title =        "Efficient Training of Feed-Forward Neural Networks",
  school =       "Aarhus University",
  address =      "Aarhus, Denmark",
  year =         "1993",
}

@InProceedings{moller-92,
  author =       "M. Moller",
  booktitle =    "Neural Networks for Signal Processing 2",
  title =        "supervised learning on large redundant training sets",
  publisher =    "IEEE press",
  year =         "1992",
}

@InProceedings{Momma2003,
  author =       "M. Momma and K. P. Bennett",
  booktitle =    colt03,
  title =        "Sparse Kernel Partial Least Squares Regression",
  year =         "2003",
}

@InProceedings{Montana89,
  author =       "D. J. Montana and L. Davis",
  editor =       "N. S. Sridharan",
  booktitle =    "Eleventh International Joint Conference on Artificial
                 Intelligence",
  title =        "Training Feedforward Networks Using Genetic
                 Algorithms",
  publisher =    "Morgan Kaufmann, San Mateo",
  address =      "Detroit 1989",
  pages =        "762--767",
  year =         "1989",
}

@InProceedings{Moody88,
  author =       "J. Moody and C. Darken",
  editor =       "D. Touretzky and G. Hinton and T. Sejnowski",
  booktitle =    cmss88,
  title =        "Learning with Localized Receptive Fields",
  publisher =    "Morgan Kaufmann, San Mateo",
  address =      "Pittsburg 1988",
  pages =        "133--143",
  year =         "1988",
}

@Article{Moody89,
  author =       "J. Moody and C. Darken",
  title =        "Fast Learning in Networks of Locally-Tuned Processing
                 Units",
  journal =      nc,
  volume =       "1",
  pages =        "281--294",
  year =         "1989",
}

@InProceedings{Moody92,
  author =       "J. E. Moody",
  editor =       NIPS4ed,
  booktitle =    NIPS4,
  title =        "The Effective Number of Parameters: An Analysis of
                 Generalization and Regularization in Nonlinear Learning
                 Systems",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo, CA",
  pages =        "847--854",
  year =         "1992",
}

@InProceedings{Moody92b,
  author =       "J. Moody and J. Utans",
  editor =       NIPS4ed,
  booktitle =    NIPS4,
  title =        "Principled architecture selection for neural networks:
                 application to corporate bond rating prediction",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo, CA",
  pages =        "683--690",
  year =         "1992",
}

@Article{moody93,
  author =       "J. Moody and U. Levin and S. Rehfuss",
  title =        "Predicting the {U.S.} Index of Industrial Production",
  journal =      "Neural Network World",
  volume =       "3",
  number =       "6",
  pages =        "791--794",
  year =         "1993",
}

@InCollection{Moody94,
  author =       "J. Moody",
  booktitle =    "From Statistics to Neural Networks: Theory and Pattern
                 Recognition Applications",
  title =        "Prediction Risk and Architecture Selection for Neural
                 Networks",
  publisher =    "Springer",
  year =         "1994",
}

@InCollection{Moody98,
  author =       "J. Moody",
  editor =       "G. B. Orr and K-R. Muller",
  booktitle =    "Neural Networks: Tricks of he Trade",
  title =        "Forecasting the economy with neural nets: a survey of
                 challenges",
  publisher =    "Springer",
  pages =        "347--372",
  year =         "1998",
}

@InProceedings{Moore88,
  author =       "B. Moore",
  editor =       "D. Touretzky and G. Hinton and T. Sejnowski",
  booktitle =    cmss88,
  title =        "{ART}1 and Pattern Clustering",
  publisher =    "Morgan Kaufmann, San Mateo",
  address =      "Pittsburg 1988",
  pages =        "174--185",
  year =         "1988",
}

@InProceedings{MoosmannF2007,
  author =       "Frank Moosmann and Bill Triggs and Frederic Jurie",
  editor =       NIPS19ed,
  booktitle =    NIPS19ed,
  title =        "Fast Discriminative Visual Codebooks using Randomized
                 Clustering Forests",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  pages =        "985--992",
  year =         "2007",
}

@InCollection{More+Wu-1996,
  author =       "Jorge More and Zhijun Wu",
  editor =       "G. Di Pillo and F. Giannessi",
  booktitle =    "Nonlinear Optimization and Applications",
  title =        "Smoothing techniques for macromolecular global
                 optimization",
  publisher =    "Plenum Press",
  year =         "1996",
}

@InProceedings{Morgan+Bourlard90b,
  author =       "N. Morgan and H. Bourlard",
  editor =       NIPS2ed,
  booktitle =    NIPS2,
  title =        "Generalization and parameter estimation in feedforward
                 nets: some experiments",
  publisher =    "Morgan Kaufmann",
  address =      "Denver, CO",
  pages =        "413--416",
  year =         "1990",
}

@InProceedings{Morgan90,
  author =       "N. Morgan and H. Bourlard",
  booktitle =    icassp,
  title =        "Continuous Speech Recognition Using Multilayer
                 Perceptrons with Hidden {Markov} Models",
  address =      "Albuquerque, NM",
  pages =        "413--416",
  year =         "1990",
}

@InProceedings{Morgan93,
  author =       "M. Cohen and H. Franco and N. Morgan and D. Rumelhart
                 and V. Abrash",
  editor =       NIPS5ed,
  booktitle =    NIPS5,
  title =        "Context-Dependent Multiple Distribution Phonetic
                 Modeling with {MLP}s",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo CA",
  pages =        "649--657",
  year =         "1993",
}

@InProceedings{Morgan95,
  author =       "N. Morgan and Y. Konig and S. L. Wu and H. Bourlard",
  booktitle =    "Proceedings of IEEE Automatic Speech Recognition
                 Workshop (Snowbird)",
  title =        "Transition-based Statistical Training for {ASR}",
  pages =        "133--134",
  year =         "1995",
}

@InProceedings{Morin+Bengio-2005,
  author =       "Fr\'ed\'eric Morin and Yoshua Bengio",
  editor =       aistats05ed,
  booktitle =    aistats05,
  title =        "Hierarchical Probabilistic Neural Network Language
                 Model",
  publisher =    "",
  date =         "Jan 6-8, 2005",
  location =     "Savannah Hotel, Barbados",
  pages =        "246--252",
  year =         "2005",
}

@Article{Mosesova-2006,
  author =       "S. A. Mosesova and H. A. Chipman and R. J. MacKay and
                 S. H. Steiner",
  title =        "Profile monitoring using mixed effects models",
  journal =      "Submitted to Technometrics",
  year =         "2006",
}

@Article{MosesY1996,
  author =       "Y. Moses and S. Ullman and S. Edelman",
  title =        "Generalization to novel images in upright and inverted
                 faces",
  journal =      "Perception",
  volume =       "25",
  number =       "4",
  pages =        "443--461",
  year =         "1996",
  OPTannote =    "",
  OPTkey =       "",
  OPTmonth =     "",
  OPTnote =      "",
}

@Article{Movellan-2002,
  author =       "Javier R. Movellan and Paul Mineiro and R. J. Williams",
  title =        "A Monte-Carlo {EM} approach for partially observable
                 diffusion processes: theory and applications to neural
                 networks",
  journal =      "Neural Computation",
  volume =       "14",
  pages =        "1501--1544",
  year =         "2002",
}

@TechReport{Movelland+McClelland91,
  author =       "Javier R. Movellan and James L. McClelland",
  title =        "Learning Continuous Probability Distributions with the
                 Contrastive {Hebbian} Algorithm",
  number =       "PDP.CNS.91.2",
  institution =  "Carnegie Mellon University, Dept. of Psychology",
  address =      "Pittsburgh, PA",
  year =         "1991",
}

@InCollection{Mozer+Smolensky89,
  author =       "M. C. Mozer and P. Smolensky",
  editor =       NIPS1ed,
  booktitle =    NIPS1,
  title =        "Skeletonization: {A} technique for trimming the fat
                 from a network via relabance assessment",
  publisher =    "Morgan Kaufmann",
  pages =        "107--115",
  year =         "1989",
}

@InProceedings{Mozer-nips92,
  author =       "M. C. Mozer",
  editor =       NIPS4ed,
  booktitle =    NIPS4,
  title =        "The induction of Multiscale Temporal Structure",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo, CA",
  pages =        "275--282",
  year =         "1992",
}

@Article{mozer-smolensky-89,
  author =       "M. C. Mozer and P. Smolensky",
  key =          "Mozer",
  title =        "Using relevance to reduce network size automatically",
  journal =      "Connection Science",
  volume =       "1",
  number =       "1",
  pages =        "3--16",
  year =         "1989",
}

@Article{Mozer-trnn2000,
  author =       "M. C. Mozer and R. Wolniewicz and D. B. Grimes and E.
                 Johnson and H. Kaushansky",
  title =        "Predicting Subscriber Dissatisfaction and Improving
                 Retention in the Wireless Telecommunications Industry",
  journal =      "IEEE Transactions on Neural Networks, special issue on
                 Data Mining and Knowledge Discovery",
  volume =       "11",
  number =       "3",
  year =         "2000",
}

@Article{Mozer89,
  author =       "M. C. Mozer",
  title =        "A Focused Back-Propagation Algorithm for Temporal
                 Pattern Recognition",
  journal =      cs,
  volume =       "3",
  pages =        "349--381",
  year =         "1989",
}

@InCollection{Mozer93,
  author =       "M. C. Mozer",
  editor =       "A. Weigend and N. Gershenfeld",
  booktitle =    "Predicting the Future and Understanding the Past",
  title =        "Neural net architectures for temporal sequence
                 processing",
  publisher =    "Addison-Wesley",
  address =      "Redwood City, CA",
  pages =        "243--264",
  year =         "1993",
}

@TechReport{MPIforum,
  author =       "Jack Dongarra and David Walker and {The Message
                 Passing Interface Forum}",
  title =        "{MPI}: {A} Message Passing Interface Standard",
  number =       "http://www-unix.mcs.anl.gov/mpi",
  institution =  "University of Tenessee",
  year =         "1995",
}

@Article{multidimensional-FGS-83,
  author =       "J. H. Friedman and E. Grosse and W. Suetzle",
  title =        "Multidimensional additive spline approximation",
  journal =      "SIAM Journal of Scientific and Statistical Computing",
  volume =       "4",
  number =       "2",
  pages =        "291--301",
  year =         "1983",
}

@InProceedings{Munro87,
  author =       "P. Munro",
  booktitle =    "The Ninth Annual Conference of the Cognitive Science
                 Society",
  title =        "A Dual Back-Propagation Scheme for Scalar Reward
                 Learning",
  publisher =    "Lawrence Erlbaum, Hillsdale",
  address =      "Seattle 1987",
  pages =        "165--176",
  year =         "1987",
}

@InProceedings{MurraySal09,
author=         "Iain Murray and Ruslan Salakhutdinov",
title=          "Evaluating probabilities under high-dimensional latent variable models",
editor =        NIPS21ed,
booktitle=      NIPS21,
volume=         "21",
pages =         "1137--1144",
year=           "2009"
}

@InProceedings{Murveit93,
  author =       "H. Murveit and J. Butzberger and V. Digilakis and M.
                 Weintraub",
  booktitle =    icassp,
  title =        "Large-vocabulary dictation using {SRI}'s {DECIPHER}
                 speech recognition system: Progressive search
                 techniques knowledge for continuous speech
                 recognition",
  address =      "Minneapolis, Minnesota",
  pages =        "319--322",
  year =         "1993",
}

@Article{Muselli97,
  author =       "M. Muselli",
  title =        "On convergence properties of pocket algorithm",
  journal =      "IEEE Transactions on Neural Networks",
  volume =       "8",
  pages =        "623--629",
  year =         "1997",
}

@article{Mutch-Lowe-2008,
 author = {Jim Mutch and David G. Lowe}, 
 title = {Object class recognition and localization using sparse features with limited receptive fields}, 
 journal = {International Journal of Computer Vision}, 
 volume = 80, 
 number = 1,
 year = 2008, 
 pages = {45--57},
}

@Article{myles90multiclass,
  author =       "J. Myles and D. Hand",
  title =        "The Multi-Class Measure Problem in Nearest Neighbour
                 Discrimination Rules",
  journal =      "Pattern Recognition",
  volume =       "23",
  pages =        "1291--1297",
  year =         "1990",
}

@Article{Nadal86,
  author =       "J.-P. Nadal and J.-P. Changeux G. Toulouse and S.
                 Dehaene",
  title =        "Networks of Formal Neurons and Memory Palimpsests",
  journal =      eul,
  volume =       "1",
  pages =        "535--542",
  year =         "1986",
}

@Article{Nadaraya64,
  author =       "E. A. Nadaraya",
  title =        "On estimating regression",
  journal =      "Theory of Probability and its Applications",
  volume =       "9",
  pages =        "141--142",
  year =         "1964",
}

@Article{Nadaraya65,
  author =       "E. A. Nadaraya",
  title =        "On nonparametric estimates of density functions and
                 regression curves",
  journal =      "Theory of Applied Probability",
  volume =       "10",
  pages =        "186--190",
  year =         "1965",
}

@Article{Nadas85,
  author =       "Arthur Nádas",
  title =        "On {Turing's} Formula for Word Probabilities",
  journal =      "IEEE Transactions on Acoustics, Speech, and Signal
                 Processing",
  volume =       "33",
  number =       "6",
  pages =        "1415--1417",
  month =        dec,
  year =         "1985",
  copy =         yes,
}

@Article{Nadas85-small,
  author =       "Arthur Nádas",
  title =        "On {Turing's} Formula for Word Probabilities",
  journal =      "ASSP",
  volume =       "33",
  number =       "6",
  pages =        "1415--1417",
  month =        dec,
  year =         "1985",
  copy =         yes,
}

@Article{Nadas88,
  author =       "A. Nadas and D. Nahamoo and M. A. Picheny",
  title =        "On a model-robust training method for speech
                 recognition",
  journal =      "IEEE Transactions on Acoustics, Speech and Signal
                 Processing",
  volume =       "ASSP-36",
  number =       "9",
  pages =        "1432--1436",
  year =         "1988",
}

@Article{Nadeau-Bengio-2003,
  author =       "Claude Nadeau and Yoshua Bengio",
  title =        "Inference for the Generalization Error",
  journal =      "Machine Learning",
  volume =       "52",
  number =       "3",
  pages =        "239--281",
  year =         "2003",
}

@Article{Nadeau-Bengio-2003-small,
  author =       "Claude Nadeau and Yoshua Bengio",
  title =        "Inference for the Generalization Error",
  journal =      "Machine Learning",
  volume =       "52(3)",
  pages =        "239--281",
  year =         "2003",
}

@InProceedings{Nadeau00-nips,
  author =       "Claude Nadeau and Yoshua Bengio",
  editor =       NIPS12ed,
  booktitle =    NIPS12,
  title =        "Inference for the Generalization Error",
  publisher =    "MIT Press",
  pages =        "307--313",
  year =         "2000",
}

@InProceedings{Bonneville+al-1998,
  author =       "M. Bonneville and J. Meunier and Y. Bengio and J.P. Soucy",
  booktitle =    "SPIE Medical Imaging 1998",
  title =        "Support Vector Machines for Improving the classification of Brain Pet Images",
  address =      "San Diego",
  year =         "1998",
}

@TechReport{Nadeau99-TR,
  author =       "Claude Nadeau and Yoshua Bengio",
  title =        "Inference for the Generalization Error",
  institution =  "CIRANO",
  address =      "Montreal, Quebec, Canada",
  year =         "1999",
}

@InProceedings{nag86,
  author =       "R. Nag and K. H. Wong and F. Fallside",
  booktitle =    icassp,
  title =        "Script recognition using hidden {Markov} models",
  address =      "Tokyo",
  pages =        "2071--2074",
  year =         "1986",
}

@MastersThesis{Nahm-2005,
 author = {E. Nahm},
 title = {Classification models for transactional graph data},
 school = {Department of Mathematics and Statistics, Acadia University},
 year = 2005,
}

@article{Naka-Rushton-1966a,
 author = {K.I. Naka and W.A.H. Rushton},
 year = 1966,
 title = {{S}-potentials from colour units in the retina of fish (Cyprinidae)},
 journal = {J. Physiol.}, 
 volume = 185, 
 pages = {536-–555},
}

@article{Naka-Rushton-1966b,
 author = {K.I. Naka and W.A.H. Rushton},
 year = 1966,
 title = {An attempt to analyse colour perception by electrophysiology},
 journal = {J. Physiol.}, 
 volume = 185, 
 pages = {556–586},
}


@InProceedings{NakagawaT04,
  author =       "Tetsuji Nakagawa and Taku Kudoh and Yuji Matsumoto",
  booktitle =    "Proceedings of the Sixth Natural Language Processing
                 Pacific Rim Symposium",
  title =        "Unknown Word Guessing and Part-of-Speech Tagging Using
                 Support Vector Machines",
  address =      "Tokyo, Japan",
  pages =        "325--331",
  year =         "2001",
}

@Article{Naradraya70,
  author =       "E. A. Nadaraya",
  title =        "Remarks on nonparametric estimates for density
                 functions and regression curves",
  journal =      "Theory of Probability and its Applications",
  volume =       "15",
  pages =        "134--137",
  year =         "1970",
}

@Book{Narendra89,
  author =       "K. Narendra and M. A. L. Thathachar",
  title =        "Learning Automata: An Introduction",
  publisher =    "Prentice-Hall",
  address =      "Englewood Cliffs",
  year =         "1989",
}

@Book{narendra:1989,
  author =       "K. S. Narendra and M. A. L. Thathachar",
  title =        "Learning Automata: an introduction",
  publisher =    "Prentice Hall",
  year =         "1989",
}

@Article{Nasrabadi88a,
  author =       "N. M. Nasrabadi and R. A. King",
  title =        "Image Coding Using Vector Quantization: {A} Review",
  journal =      ieeetcomm,
  volume =       "36",
  pages =        "957--971",
  year =         "1988",
}

@InProceedings{Nasrabadi88b,
  author =       "N. M. Nasrabadi and Y. Feng",
  booktitle =    icnn,
  title =        "Vector Quantization of Images Based upon the Kohonen
                 Self-Organizing Feature Maps",
  volume =       "1",
  publisher =    "IEEE, New York",
  address =      "San Diego 1988",
  pages =        "101--108",
  year =         "1988",
}

@Article{Nass75,
  author =       "M. M. Nass and L. N. Cooper",
  title =        "A Theory for the Development of Feature Detecting
                 Cells in Visual Cortex",
  journal =      biocyb,
  volume =       "19",
  pages =        "1--18",
  year =         "1975",
}

@Article{Naylor88,
  author =       "J. Naylor and K. P. Li",
  title =        "Analysis of a Neural Network Algorithm for Vector
                 Quantization of Speech Parameters",
  journal =      nnsupp,
  volume =       "1",
  pages =        "310",
  year =         "1988",
}

@Article{NC:Baldi93,
  author =       "P. Baldi and Y. Chauvin",
  title =        "Neural Networks for Fingerprint Recognition",
  journal =      "Neural Computation",
  volume =       "5",
  type =         "Letter",
  number =       "3",
  pages =        "402--418",
  year =         "1993",
}

@Article{nc:Geman+Bienenstock+Doursat:1992,
  author =       "S. Geman and E. Bienenstock and R. Doursat",
  title =        "Neural Networks and the Bias/Variance Dilemma",
  journal =      "Neural Computation",
  volume =       "4",
  type =         "View",
  number =       "1",
  pages =        "1--58",
  year =         "1992",
}

@Article{nc:Poggio+Girosi:1998,
  author =       "Tomaso Poggio and Frederico Girosi",
  title =        "A Sparse Representation for Function Approximation",
  journal =      "Neural Computation",
  volume =       "10",
  number =       "6",
  pages =        "1445--1454",
  year =         "1998",
}

@TechReport{Neal-GP97,
  author =       "Radford M. Neal",
  title =        "Monte Carlo implementation of {G}aussian process models
                 for {Bayesian} regression and classification",
  number =       "9702",
  institution =  "University of Toronto, Department of Statistics",
  year =         "1997",
}

@Article{Neal92,
  author =       "Radford M. Neal",
  title =        "Connectionist learning of belief networks",
  journal =      "Artificial Intelligence",
  volume =       "56",
  pages =        "71--113",
  year =         "1992",
}

@InProceedings{Neal93a,
  author =       "Radford M. Neal",
  editor =       NIPS5ed,
  booktitle =    NIPS5,
  title =        "Bayesian learning via stochastic dynamics",
  address =      "Denver, CO",
  pages =        "475--482",
  year =         "1993",
}

@TechReport{Neal93b,
  author =       "Radford M. Neal",
  title =        "Probabilistic inference using {Markov} chain
                 {Monte-Carlo} methods",
  number =       "{CRG-TR}-93-1",
  institution =  "Dept. of Computer Science, University of Toronto",
  year =         "1993",
}

@PhdThesis{Neal94,
  author =       "Radford M. Neal",
  title =        "Bayesian Learning for Neural Networks",
  school =       "Dept. of Computer Science, University of Toronto",
  year =         "1994",
}

@TechReport{Neal94b,
  author =       "Radford M. Neal",
  title =        "Sampling from Multimodal Distributions Using Tempered Transitions",
  number =       "9421",
  institution =  "Dept. of Statistics, University of Toronto",
  year =         "1994",
}

@InCollection{Neal98,
  author =       "Radford M. Neal",
  editor =       "C. M. Bishop",
  booktitle =    "Neural Networks and Machine Learning",
  title =        "Assessing relevance determination methods using
                 {DELVE}",
  publisher =    "Springer-Verlag",
  pages =        "97--129",
  year =         1998,
}

@Misc{neal98assessing,
  author =       "Radford M. Neal",
  title =        "Assessing Relevance Determination Methods Using
                 {DELVE} Generalization in Neural Networks and Machine
                 Learning",
  year =         "1998",
  text =         "Neal, R. N. (1998). Assessing Relevance Determination
                 Methods Using DELVE Generalization in Neural Networks
                 and Machine Learning, C. M. Bishop (editor),
                 SpringerVerlag.",
}

@article{Neal-2001,
  author =      "Radford M. Neal",
  journal =     "Statistics and Computing",
  month =       "April",
  number =      "2",
  pages =       "125--139",
  title =       "Annealed importance sampling",
  url =         "http://dx.doi.org/10.1023/A:1008923215028",
  volume =      "11",
  year =        "2001"
}

@Article{Needleman+Wunsch70,
  author =       "S. B. Needleman and C. D. Wunsch",
  title =        "A general method applicable to the search of
                 similarities in the amino acid sequence of two
                 proteins",
  journal =      "Journal of Molecular Biology",
  volume =       "48",
  pages =        "443--453",
  year =         "1970",
}

@Article{NeweyWest1987,
  author =       "W. Newey and K. West",
  title =        "A Simple, Positive Semi-Definite, Heteroscedasticity
                 and Autocorrelation Consistent Covariance Matrix",
  journal =      "Econometrica",
  volume =       "55",
  pages =        "703--708",
  year =         "1987",
}

@InProceedings{Ney+Kneser93,
  author =       "Hermann Ney and Reinhard Kneser",
  booktitle =    "European Conference on Speech Communication and
                 Technology (Eurospeech)",
  title =        "Improved clustering techniques for class-based
                 statistical language modelling",
  address =      "Berlin",
  pages =        "973--976",
  year =         "1993",
}

@Article{Ney92,
  author =       "H. Ney and D. Mergel and A. Noll and A. Paesler",
  title =        "Data driven search organization for continuous speech
                 recognition",
  journal =      "IEEE Transactions on Signal Processing",
  volume =       "40",
  number =       "2",
  pages =        "272--281",
  month =        feb,
  year =         "1992",
}

@InProceedings{Ng1996,
  author =       "Hwee Tou Ng and Hian Beng Lee",
  editor =       "Arivind Joshi and Martha Palmer",
  booktitle =    "Proceedings of the Thirty-Fourth Annual Meeting of the
                 Association for Computational Linguistics",
  title =        "Integrating Multiple Knowledge Sources to Disambiguate
                 Word Sense: An Exemplar-Based Approach",
  publisher =    "Morgan Kaufmann Publishers",
  address =      "San Francisco",
  pages =        "40--47",
  year =         "1996",
  URL =          "citeseer.nj.nec.com/ng96integrating.html",
}

@InProceedings{Ng1997,
  author =       "Hwee Tou Ng",
  booktitle =    SIGLEX97,
  title =        "Getting Serious about Word Sense Disambiguation",
  address =      "Washington",
  pages =        "1--7",
  year =         "1997",
}

@InProceedings{Ng2002,
  author =       "Andrew Y. Ng and Michael I. Jordan and Yair Weiss",
  editor =       NIPS14ed,
  booktitle =    NIPS14,
  title =        "On Spectral Clustering: analysis and an algorithm",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  year =         "2002",
  original =     "orig/AA35.ps",
}

@InProceedings{Ng2008,
  author =       "Honglak Lee and Ekanadham Chaitanya and Andrew Y. Ng",
  editor =       NIPS20ed,
  booktitle =    NIPS20,
  title =        "Sparse deep belief net model for visual area {V2}",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  year =         "2008",
}

@InProceedings{NgJ02,
  author =       "Andrew Y. Ng and Michael I. Jordan",
  booktitle =    NIPS14,
  editor =       NIPS14ed,
  title =        {On Discriminative vs. Generative Classifiers: A
                 comparison of logistic regression and naive Bayes},
  pages =        "841--848",
  year =         "2002",
}

%%Fred I deprecate the following as the tag name have the year of the conf and not of the papers!
@InProceedings{NgJ01,
  author =       "Andrew Y. Ng and Michael I. Jordan",
  booktitle =    NIPS14,
  editor =       NIPS14ed,
  title =        {On Discriminative vs. Generative Classifiers: A
                 comparison of logistic regression and naive Bayes},
  pages =        "841--848",
  year =         "2002",
}

@InProceedings{Nie99,
  author =       "J. Y. Nie and M. Simard and P. Isabelle and R.
                 Durand",
  booktitle =    "22nd ACM-SIGIR",
  title =        "Cross-Language Information Retrieval based on Parallel
                 Texts and Automatic Mining of Parallel Texts in the
                 Web",
  address =      "Berkeley",
  pages =        "74--81",
  year =         "1999",
}

@INPROCEEDINGS{Niebles+Fei-Fei-2007,
  AUTHOR =       "Niebles, J.C. and Fei-Fei, L.",
  TITLE =        "A hierarchical model of shape and appearance for human action classification. ",
  BOOKTITLE =    cvpr07,
  YEAR =         "2007",
}

@Article{Nielsen96,
  author =       "H. Nielsen and J. Engelbrecht and G. {von Heijne} and
                 S. Brunak",
  title =        "Defining a similarity threshold for a functional
                 protein sequence pattern: the signal peptide cleavage
                 site",
  journal =      "Proteins",
  pages =        "316--320",
  year =         "1996",
  volme =        "24",
}

@Article{Nielsen97,
  author =       "H. Nielsen and J. Engelbrecht and S. Brunak and G.
                 {von Heijne}",
  title =        "Identification of prokaryotic and eukaryotic signal
                 peptides and prediction of their cleavage sites",
  journal =      "Prot. Eng.",
  pages =        "1--6",
  year =         "1997",
  volme =        "10",
}

@InProceedings{Niesler98,
  author =       "T. R. Niesler and E. W. D. Whittaker and P. C.
                 Woodland",
  booktitle =    icassp,
  title =        "Comparison of part-of-speech and automatically derived
                 category-based language models for speech recognition",
  pages =        "177--180",
  year =         "1998",
}

@InProceedings{Niles90,
  author =       "L. T. Niles and H. F. Silverman",
  booktitle =    icassp,
  title =        "Combining Hidden {Markov} Models and Neural Network
                 Classifiers",
  address =      "Albuquerque, NM",
  pages =        "417--420",
  year =         "1990",
}

@Book{Nilsson-65,
  author =       "N. J. Nilsson",
  title =        "Learning Machines",
  publisher =    "McGraw-Hill",
  address =      "New York",
  year =         "1965",
}

@Book{Nilsson-71,
  author =       "N. J. Nilsson",
  title =        "Problem-Solving Methods in Artificial Intelligence",
  publisher =    "McGraw-Hill",
  address =      "New York",
  year =         "1971",
}

@InProceedings{nips-10:Baxter+Bartlett:1998,
  author =       "Jonathan Baxter and Peter Bartlett",
  editor =       NIPS10ed,
  booktitle =    NIPS10,
  title =        "The Canonical Distortion Measure in Feature Space and
                 1-{NN} Classification",
  publisher =    "MIT Press",
  year =         "1998",
}

@InProceedings{nips-10:Holger+Yoshua:1998,
  author =       "Holger Schwenk and Yoshua Bengio",
  editor =       NIPS10ed,
  booktitle =    NIPS10,
  title =        "Training Methods for Adaptive Boosting of Neural
                 Networks",
  publisher =    "MIT Press",
  pages =        "647--653",
  year =         "1998",
}

@InProceedings{nips-6:Perrone:1994,
  author =       "Michael P. Perrone",
  editor =       NIPS6ed,
  booktitle =    NIPS6,
  title =        "Putting It All Together: Methods for Combining Neural
                 Networks",
  publisher =    "Morgan Kaufmann Publishers, Inc.",
  pages =        "1188--1189",
  year =         "1994",
}

@InProceedings{nips-9:Burges+Schoelkopf:1997,
  author =       "Chris J. C. Burges and B. Sch{\"o}lkopf",
  editor =       NIPS9ed,
  booktitle =    NIPS9,
  title =        "Improving the Accuracy and Speed of Support Vector
                 Machines",
  publisher =    "MIT Press",
  pages =        "375",
  year =         "1997",
}

@InProceedings{nips02-LT09,
  author =       "G. Lebanon and J. Lafferty",
  editor =       NIPS14ed,
  booktitle =    NIPS14,
  title =        "Boosting and Maximum Likelihood for Exponential
                 Models",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  year =         "2002",
  original =     "orig/LT09.ps",
}

@InCollection{NIPS2005-207,
  author =       "Jian Zhang and Zoubin Ghahramani and Yiming Yang",
  editor =       NIPS18ed,
  booktitle =    NIPS18,
  title =        "Learning Multiple Related Tasks using Latent
                 Independent Component Analysis",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  pages =        "1587--1594",
  year =         "2006",
}

@InCollection{NIPS2007-812-small,
  author =       "Nicolas Chapados and Yoshua Bengio",
  booktitle =    "NIPS 20",
  title =        "Augmented Functional Time Series Representation and
                 Forecasting with {G}aussian Processes",
  pages =        "265--272",
  year =         "2008",
}

@InCollection{NIPS2007-925-small,
  author =       "Nicolas {Le Roux} and Yoshua Bengio and Pascal Lamblin
                 and Marc Joliveau and Balazs Kegl",
  booktitle =    "NIPS 20",
  title =        "Learning the 2-{D} Topology of Images",
  pages =        "841--848",
  year =         "2008",
}

@InProceedings{NIPS8:Drucker:AdaBoost-Trees,
  author =       "Harris Drucker and Corinna Cortes",
  editor =       NIPS8ed,
  booktitle =    NIPS8,
  title =        "Boosting decision trees",
  publisher =    "MIT Press",
  pages =        "479--485",
  year =         "1996",
}

@InProceedings{NIPS8:Hofmann-Tresp,
  author =       "Reimar Hofmann and Volker Tresp",
  editor =       NIPS8ed,
  booktitle =    NIPS8,
  title =        "Discovering structure in continuous variables using
                 {Bayesian} networks",
  publisher =    "MIT Press",
  pages =        "500--506",
  year =         "1996",
}

@InProceedings{NIPS9:Monti-Cooper,
  author =       "Stefano Monti and Gregory F. Cooper",
  editor =       NIPS9ed,
  booktitle =    NIPS9,
  title =        "Learning {Bayesian} belief networks with neural
                 network estimators",
  publisher =    "MIT Press",
  pages =        "578--584",
  year =         "1997",
}

@Article{Niranjan90,
  author =       "M. Niranjan and F. Fallside",
  title =        "Neural Networks and Radial Basis Functions in
                 Classifying Static Speech Patterns",
  journal =      cspla,
  volume =       "4",
  pages =        "275--289",
  year =         "1990",
}

@Article{Nishimori90,
  author =       "H. Nishimori and T. Nakamura and M. Shiino",
  title =        "Retrieval of Spatio-Temporal Sequence in Asynchronous
                 Neural Network",
  journal =      prA,
  volume =       "41",
  pages =        "3346--3354",
  year =         "1990",
}

@book{Nixon+Aguado+2007,
    author = {Nixon, M. S.  and Aguado, A. S. },
    publisher = {Academic Press},
    edition = 2,
    title = {Feature Extraction and Image Processing},
    year = {2007}
}

@Article{nonparametric-LZ-95,
  author =       "G. Lugosi and K. Xeger",
  title =        "Nonparametric Estimation via Empirical Risk
                 Minimization",
  journal =      "IEEE Trans. on Information Theory",
  volume =       "41",
  number =       "3",
  pages =        "677--687",
  year =         "1995",
}

@Article{nonparametric-SK-96,
  author =       "M. Smith and R. Kohn",
  title =        "Nonparametric regression using {Bayesian} variable
                 selection",
  journal =      "J.Econometrics",
  volume =       "75",
  pages =        "317--344",
  year =         "1996",
}

@InProceedings{nonparametric-W-91,
  author =       "H. White",
  booktitle =    "Proceedings of 23rd Symposium on the Interface,
                 Computer Science and Statistics",
  title =        "Nonparametric Estimation of Conditional Quantiles
                 Using Neural Networks",
  publisher =    "New-York: Springer-Verlag",
  pages =        "190--199",
  year =         "1992",
}

@Article{NordStrom,
  author =       "T. Nordstrom and B. Svensson",
  title =        "Using and Designing Massively Parallel Computers for
                 Artificial Neural Networks",
  journal =      "Journal of Parallel and Distributed Computing",
  volume =       "3",
  number =       "14",
  pages =        "260--285",
  year =         "1992",
  OPTnote =      "",
}

@Article{Normandin94,
  author =       "Y. Normandin and R. Cardin and R. {DeMori}",
  title =        "High-performance connected digit recognition using
                 maximum mutual information estimation",
  journal =      "Transactions on Speech and Audio Processing",
  volume =       "2",
  number =       "2",
  pages =        "299--311",
  year =         "1994",
}

@InProceedings{Nowlan-nips90,
  author =       "S. J. Nowlan",
  editor =       NIPS2ed,
  booktitle =    NIPS2,
  title =        "Maximum Likelihood Competitive Learning",
  publisher =    "Morgan Kaufman Publishers",
  address =      "San Mateo, CA",
  pages =        "574--582",
  year =         "1990",
}

@InProceedings{Nowlan-nips92,
  author =       "S. J. Nowlan and G. E. Hinton",
  editor =       NIPS4ed,
  booktitle =    NIPS4,
  title =        "Adaptive Soft Weight Tying using {G}aussian Mixtures",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo, CA",
  pages =        "993--1000",
  year =         "1992",
}

@PhdThesis{Nowlan-PhD,
  author =       "S. J. Nowlan",
  title =        "Soft Competitive Adaptation: Neural Network Learning
                 Algorithms based on Fitting Statistical Mixtures",
  type =         "{C}{M}{U}-{C}{S}-91-126",
  school =       "School of Computer Science, Carnegie Mellon
                 University",
  address =      "Pittsburgh, PA",
  month =        apr # " 14",
  year =         "1991",
}

@Article{Nowlan88,
  author =       "S. J. Nowlan",
  title =        "Gain Variation in Recurrent Error Propagation
                 Networks",
  journal =      cs,
  volume =       "2",
  pages =        "305--320",
  year =         "1988",
}

@TechReport{Nowlan90,
  author =       "S. J. Nowlan",
  key =          "Nowlan",
  title =        "Competing Experts: {An} experimental investigation of
                 associative mixture models",
  type =         "Technical Report",
  number =       "CRG-TR-90-5",
  institution =  "University of Toronto",
  year =         "1990",
  annote =       "In CRG Library",
}

@Article{Nowlan92,
  author =       "S. J. Nowlan and G. E. Hinton",
  title =        "Simplifying Neural Networks by Soft Weight-Sharing",
  journal =      "Neural Computation",
  volume =       "4",
  type =         "Letter",
  number =       "4",
  pages =        "473--493",
  year =         "1992",
}

@InProceedings{nsvnijcnn,
  author =       "Pascal Vincent and Yoshua Bengio",
  booktitle =    ijcnn,
  title =        "A Neural Support Vector Network Architecture with
                 Adaptive Kernels",
  volume =       "5",
  pages =        "5187--5192",
  year =         "2000",
}

@Book{NumOptBook,
  author =       "J. Nocedal and S. Wright",
  title =        "Numerical Optimization",
  publisher =    "Springer",
  year =         "2006",
}

@Article{Nystrom-1928,
  author =       "E. J. Nystr{\"o}m",
  title =        "{\"{U}}ber die praktische aufl{\"o}sung von linearen
                 integralgleichungen mit anwendungen auf
                 randwertaufgaben der potentialtheorie",
  journal =      "Commentationes Physico-Mathematicae",
  volume =       "4",
  number =       "15",
  pages =        "1--52",
  year =         "1928",
}

@Book{O'Shaughnessy87,
  author =       "D. O'Shaughnessy",
  title =        "Speech Communication --- Human and Machine",
  publisher =    "Addison-Wesley",
  year =         "1987",
}

@Article{Oja82,
  author =       "E. Oja",
  title =        "A Simplified Neuron Model As a Principal Component
                 Analyzer",
  journal =      jmathb,
  volume =       "15",
  pages =        "267--273",
  year =         "1982",
}

@Article{Oja85,
  author =       "E. Oja and J. Karhunen",
  title =        "On Stochastic Approximation of the Eigenvectors and
                 Eigenvalues of the Expectation of a Random Matrix",
  journal =      jama,
  volume =       "106",
  pages =        "69--84",
  year =         "1985",
}

@Article{Oja89,
  author =       "E. Oja",
  title =        "Neural Networks, Principal Components, and Subspaces",
  journal =      "International Journal of Neural Systems",
  volume =       "1",
  pages =        "61--68",
  year =         "1989",
}

@Article{Olshausen+Field-1996,
  author =       "Bruno A. Olshausen and David J. Field",
  title =        {Emergence of simple-cell receptive field properties by learning a sparse code for natural images},
  journal =      "Nature",
  volume =       381,
  pages =        {607--609},
  year =         "1996",
}

@Article{Olshausen-97,
  author =       "B. A. Olshausen and D. J. Field",
  title =        "Sparse coding with an overcomplete basis set: a
                 strategy employed by {V}1?",
  journal =      "Vision Research",
  volume =       "37",
  pages =        "3311--3325",
  year =         "1997",
  url =          {http://view.ncbi.nlm.nih.gov/pubmed/9425546},
  keywords = {sparse-coding, v1, vision},
  month = {December},
}

@article{olshausen:2005,
    author = {Bruno Olshausen and David J. Field},
    title = {How Close are We to Understanding {V1}?},
    journal = {Neural Computation},
    volume = {17},
    pages = {1665-1699},
    year = {2005},
}


@InProceedings{Omlin-ml92,
  author =       "C. W. Omlin and C. L. Giles",
  editor =       "D. Sleeman and P. Edwards",
  booktitle =    "Machine Learning: Proc. of the Ninth Int. Conference",
  title =        "Training Second-Order Recurrent Neural Networks using
                 Hints",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo CA",
  year =         "1992",
}

@InProceedings{Omohundro96,
  author =       "S. Omohundro",
  editor =       NIPS8ed,
  booktitle =    NIPS8,
  title =        "Family Discovery",
  publisher =    "MIT Press, Cambridge, MA",
  year =         "1996",
}

@InProceedings{Ong-Smola-2003,
  author =       "C. S. Ong and A. J. Smola",
  booktitle =    ICML03,
  editor =       ICML03ed,
  publisher =    ICML03publ,
  title =        "Machine learning using hyperkernels",
  year =         "2003",
}

@Article{Opper90,
  author =       "M. Opper and W. Kinzel and J. Kleinz and R. Nehl",
  title =        "On the Ability of the Optimal Perceptron to
                 Generalize",
  journal =      jpa,
  volume =       "23",
  pages =        "L581--L586",
  year =         "1990",
}

@Article{Orland85,
  author =       "H. Orland",
  title =        "Mean-Field Theory for Optimization Problems",
  journal =      jppl,
  volume =       "46",
  pages =        "763--770",
  year =         "1985",
}

@InProceedings{ormo-nips99,
  author =       "D. Ormoneit and T. Hastie",
  editor =       NIPS12ed,
  booktitle =    NIPS12,
  title =        "Optimal Kernel Shapes for Local Linear Regression",
  publisher =    "MIT Press",
  year =         "2000",
}

@Article{Orponen94,
  author =       "Pekka Orponen",
  title =        "Computational complexity of neural networks: a
                 survey",
  journal =      "Nordic Journal of Computing",
  volume =       "1",
  number =       "1",
  pages =        "94--110",
  month =        "Spring",
  year =         "1994",
  URL =          "citeseer.ist.psu.edu/article/orponen95computational.html",
}

@Book{Ortega70,
  author =       "J. M. Ortega and W. C. Rheinboldt",
  title =        "Iterative Solution of Non-linear Equations in Several
                 Variables and Systems",
  publisher =    "Academic Press",
  address =      "New York",
  year =         "1970",
  OPTnote =      "",
}

@Book{Ortega70a,
  author =       "J. M. Ortega and W. C. Rheinboldt",
  title =        "Iterative Solution of Non-linear Equations in Several
                 Variables and Systems",
  publisher =    "Academic Press",
  address =      "New York",
  year =         "1970",
}

@InProceedings{Osindero+Hinton-2008,
  author =       "Simon Osindero and Geoffrey E. Hinton",
  editor =       NIPS20ed,
  booktitle =    NIPS20,
  title =        {Modeling image patches with a directed hierarchy of
                 Markov random field},
  publisher =    {MIT Press},
  address =      {Cambridge, MA},
  pages =        {1121--1128},
  year =         "2008",
}

@InProceedings{Osindero+Hinton-2008-small,
  author =       "S. Osindero and G. Hinton",
  booktitle =    "NIPS 20",
  title =        {Modeling image patches with a directed hierarchy of
                 Markov random field},
  year =         "2008",
}

@Article{Osindero+Welling+Hinton-05,
  author =       "Simon Osindero and Max Welling and Geoffrey E. Hinton",
  title =        "Topographic Product Models Applied To Natural Scene
                 Statistics",
  journal =      "Neural Computation",
  volume =       "18",
  pages =        "381--344",
  year =         "2005",
}

@Article{OsinderoS2006,
  author =       "Simon Osindero and Max Welling and Geoffrey E.
                 Hinton",
  title =        "Topographic Product Models Applied to Natural Scene
                 Statistics",
  journal =      "Neural Computation",
  volume =       "18",
  number =       "2",
  publisher =    "MIT Press",
  address =      "Cambridge, MA, USA",
  pages =        "381--414",
  year =         "2006",
  ISSN =         "0899-7667",
}

@Article{OsinderoS2006-small,
  author =       "Simon Osindero and Max Welling and Geoffrey E. Hinton",
  title =        "Topographic Product Models Applied to Natural Scene
                 Statistics",
  journal =      "Neural Computation",
  volume =       "18",
  number =       "2",
  pages =        "381--414",
  year =         "2006",
}

@InProceedings{Ott76,
  author =       "R. Ott",
  booktitle =    "Third International Joint Conference on Pattern
                 Recognition",
  title =        "Construction of quadratic polynomial classifiers",
  publisher =    "IEEE, CA",
  address =      "Coronado, CA",
  pages =        "161--165",
  year =         "1976",
}

@article{OttJ1976b,
     title = {Some Classification Procedures for Multivariate Binary Data Using Orthogonal Functions},
     author = {Ott, Jurg and Kronmal, Richard A.},
     journal = {Journal of the American Statistical Association},
     volume = {71},
     number = {354},
     pages = {391--399},
     year = {1976},
     publisher = {American Statistical Association},    
     copyright = {Copyright © 1976 American Statistical Association},
    }


@InProceedings{Ouimet+Bengio-2005,
  author =       "Marie Ouimet and Yoshua Bengio",
  editor =       aistats05ed,
  booktitle =    aistats05,
  title =        "Greedy Spectral Embedding",
  publisher =    "",
  date =         "Jan 6-8, 2005",
  location =     "Savannah Hotel, Barbados",
  pages =        "253--260",
  year =         "2005",
}

@InProceedings{Owens89,
  author =       "A. J. Owens and D. L. Filkin",
  booktitle =    ijcnn,
  title =        "Efficient Training of the Back Propagation Network by
                 Solving a System of Stiff Ordinary Differential
                 Equations",
  volume =       "2",
  publisher =    "IEEE, New York",
  address =      "Washington 1989",
  pages =        "381--386",
  year =         "1989",
}

@InProceedings{Paccanaro2000,
  author =       "A. Paccanaro and G. E. Hinton",
  booktitle =    ijcnn,
  title =        "Extracting Distributed Representations of Concepts and
                 Relations from Positive and Negative Propositions",
  publisher =    "IEEE, New York",
  address =      "Como, Italy",
  year =         "2000",
}

@Article{Packard80,
  author =       "N. H. Packard and J. P Crutchfield and J. D. Farmer
                 and R. S. Shaw",
  title =        "Geometry from a Time Series",
  journal =      prl,
  volume =       "45",
  pages =        "712--716",
  year =         "1980",
}

@misc{Pal+al-2006,
    author = {Chris Pal and Michael Kelm and Xuerui Wang and Greg Druck and Andrew McCallum},
    title = {On Discriminative and Semi-Supervised Dimensionality Reduction},
    year = {2006},
    note = {Workshop on Novel Applications of Dimensionality Reduction, NIPS'06},
}

@InCollection{Palmer88,
  author =       "R. G. Palmer",
  editor =       "P. W. Anderson and K. J. Arrow and D. Pines",
  booktitle =    "The Economy As an Evolving Complex System",
  title =        "Statistical Mechanics Approaches to Complex
                 Optimization Problems",
  volume =       "5",
  publisher =    "Addison-Wesley",
  address =      "Redwood City",
  pages =        "177--193",
  year =         "1988",
  series =       "SFI Studies in the Sciences of Complexity:
                 Proceedings",
}

@InCollection{Palmer89,
  author =       "R. G. Palmer",
  editor =       "D. L. Stein",
  booktitle =    "Lectures in the Sciences of Complexity",
  title =        "Neural Nets",
  volume =       "1",
  publisher =    "Addison-Wesley",
  address =      "Redwood City",
  pages =        "439--461",
  year =         "1989",
  series =       "SFI Studies in the Sciences of Complexity: Lectures",
}

@Book{Papadimitriou,
  author =       "C. H. Papadimitriou",
  title =        "Combinatorial Optimization: Algorithms and
                 Complexity",
  publisher =    "Prentice-Hall",
  address =      "Englewood Cliffs, NJ",
  year =         "1982",
}

@Book{Papadimitriou82,
  author =       "C. H. Papadimitriou and K. Steiglitz",
  title =        "Combinatorial Optimization: Algorithms and
                 Complexity",
  publisher =    "Prentice-Hall",
  address =      "Englewood Cliffs",
  year =         "1982",
}

@Article{Parga86,
  author =       "N. Parga and M. A. Virasoro",
  title =        "The Ultrametric Organization of Memories in a Neural
                 Network",
  journal =      jpp,
  volume =       "47",
  pages =        "1857--1864",
  year =         "1986",
}

@Article{Parisi86,
  author =       "G. Parisi",
  title =        "Asymmetric Neural Networks and the Process of
                 Learning",
  journal =      jpa,
  volume =       "19",
  pages =        "L675--L680",
  year =         "1986",
}

@Book{Parisi88,
  author =       "G. Parisi",
  title =        "Statistical Field Theory",
  publisher =    "Addison-Wesley",
  address =      "Redwood City, CA",
  year =         "1988",
}

@Article{Park-nc91,
  author =       "J. Park and I. W. Sandberg",
  title =        "Universal Approximation Using Radial-Basis-Function
                 Networks",
  journal =      nc,
  volume =       "3",
  number =       "2",
  pages =        "246--257",
  year =         "1991",
}

@TechReport{Parker85,
  author =       "D. B. Parker",
  title =        "Learning Logic",
  number =       "TR--47",
  institution =  "Center for Computational Research in Economics and
                 Management Science, Massachusetts Institute of
                 Technology",
  address =      "Cambridge, MA",
  year =         "1985",
}

@InProceedings{Parker87,
  author =       "D. B. Parker",
  editor =       "M. Caudill and C. Butler",
  booktitle =    icnn,
  title =        "Optimal Algorithms for Adaptive Networks: Second Order
                 Back Propagation, Second Order Direct Propagation, and
                 Second Order Hebbian Learning",
  volume =       "2",
  publisher =    "IEEE, New York",
  address =      "San Diego 1987",
  pages =        "593--600",
  year =         "1987",
}

@InProceedings{Parks87,
  author =       "M. Parks",
  editor =       "M. Caudill and C. Butler",
  booktitle =    icnn,
  title =        "Characterization of the {Boltzmann} Machine Learning
                 Rate",
  volume =       "3",
  publisher =    "IEEE, New York",
  address =      "San Diego 1987",
  pages =        "715--719",
  year =         "1987",
}

@Article{Parlos94,
  author =       "A. G. Parlos and J. Muthusami and A. F. Atiya",
  title =        "Incipient Fault Detection and Identification in
                 Process Systems using Accelerated Neural Network
                 Learning",
  journal =      "Nuclear Technology",
  volume =       "105",
  pages =        "145",
  year =         "1994",
}

@Article{Parzen62,
  author =       "Emanuel Parzen",
  title =        "On the estimation of a probability density function
                 and mode",
  journal =      "Annals of Mathematical Statistics",
  volume =       "33",
  pages =        "1064--1076",
  year =         "1962",
}

@InProceedings{pati93orthogonal,
  author =       "Y. Pati and R. Rezaiifar and P. Krishnaprasad",
  booktitle =    "Proceedings of the 27 th Annual Asilomar Conference on
                 Signals, Systems, and Computers",
  title =        "Orthogonal Matching Pursuit: Recursive Function
                 Approximation with Applications to Wavelet
                 Decomposition",
  pages =        "40--44",
  month =        nov,
  year =         "1993",
}

@InProceedings{Paugam-Moisy-1992,
  author =       "H\'el\`ene {Paugam-Moisy}",
  booktitle =    ijcnn,
  title =        "On the Convergence of a Block-Gradient Algorithm for
                 Back-Propagation Learning",
  volume =       "3",
  publisher =    "IEEE",
  address =      "New York",
  pages =        "919--924",
  year =         "1992",
}

@InProceedings{Paugam-Moisy-1992b,
  author =       "H\'{e}l\`{e}ne {Paugam-Moisy}",
  booktitle =    "CONPAR '92/ VAPP V: Proceedings of the Second Joint
                 International Conference on Vector and Parallel
                 Processing",
  title =        "Optimal Speedup Conditions for a Parallel
                 Back-Propagation Algorithm",
  publisher =    "Springer-Verlag",
  address =      "London, UK",
  pages =        "719--724",
  year =         "1992",
  ISBN =         "3-540-55895-0",
}

@InCollection{Paugam-Moisy-1993,
  author =       "H\'el\`ene {Paugam-Moisy}",
  editor =       "I. Pitas",
  booktitle =    "Parallel Algorithms for Digital Image Processing,
                 Computer Vision and Neural Networks",
  title =        "Parallel Neural Computing Based on Network
                 Duplicating",
  publisher =    "John Wiley",
  pages =        "305--340",
  year =         "1993",
}

@inproceedings{Pavlovic-2001,
 author = {Vladimir Pavlovic and James M. Rehg and John MacCormick},
 title = {Learning Switching Linear Models of Human Motion},
  editor =       NIPS13ed,
  booktitle =    NIPS13,
  publisher =    "{MIT} Press",
  pages =        "981--987",
  year =         "2001",
}
 

@Book{PdpManual,
  author =       "D. E. Rumelhart and J. L. McClelland",
  title =        "Exploration in Parallel Distributed Processing",
  volume =       "3",
  publisher =    "MIT Press",
  year =         "1988",
}

@InProceedings{Pearl-Verma91,
  author =       "J. Pearl and T. S. Verma",
  editor =       "J. A. Allen and R. Fikes and and E. Sandewall",
  booktitle =    "Principles of Knowledge Representation and Reasoning:
                 Proceedings of the Second International Conference",
  title =        "A theory of inferred causation",
  publisher =    "Morgan Kaufmann, San Mateo, CA",
  pages =        "441--452",
  year =         "1991",
}

@Book{Pearl88,
  author =       "Judea Pearl",
  title =        "Probabilistic Reasoning in Intelligent Systems:
                 Networks of Plausible Inference",
  publisher =    "Morgan Kaufmann",
  year =         "1988",
}

@InProceedings{Pearlmutter+Parra-96,
  author =       "Barak Pearlmutter and L. C. Parra",
  editor =       "L. Xu",
  booktitle =    "International Conference On Neural Information
                 Processing",
  title =        "A context-sensitive generalization of {ICA}",
  address =      "Hong-Kong",
  pages =        {151--157},
  year =         "1996",
}

@InProceedings{Pearlmutter86,
  author =       "B. A. Pearlmutter and G. E. Hinton",
  editor =       "J. S. Denker",
  booktitle =    snowbird,
  title =        "{G}-Maximization: An Unsupervised Learning Procedure
                 for Discovering Regularities",
  publisher =    "American Institute of Physics, New York",
  address =      "Snowbird 1986",
  pages =        "333--338",
  year =         "1986",
}

@InProceedings{Pearlmutter89a,
  author =       "B. A. Pearlmutter",
  booktitle =    ijcnn,
  title =        "Learning State Space Trajectories in Recurrent Neural
                 Networks",
  volume =       "2",
  publisher =    "IEEE, New York",
  address =      "Washington 1989",
  pages =        "365--372",
  year =         "1989",
}

@Article{Pearlmutter89b,
  author =       "B. A. Pearlmutter",
  title =        "Learning State Space Trajectories in Recurrent Neural
                 Networks",
  journal =      nc,
  volume =       "1",
  pages =        "263--269",
  year =         "1989",
}

@article{Pearson-1901,
    author = {Pearson, K. },
    citeulike-article-id = {2013414},
    journal = {Philosophical Magazine},
    keywords = {pca},
    number = {6},
    pages = {559--572},
    posted-at = {2007-11-29 10:41:36},
    priority = {2},
    title = {On lines and planes of closest fit to systems of points in space},
    volume = {2},
    year = {1901}
}

@InProceedings{Pedersen2001,
  author =       "Ted Pedersen",
  booktitle =    "Proceedings of the Second Annual Meeting of the North
                 American Chapter of the Association for Computational
                 Linguistics",
  title =        "A decision tree of bigrams is an accurate predictor of
                 word sense",
  pages =        "79--86",
  year =         "2001",
  URL =          "citeseer.nj.nec.com/pedersen01decision.html",
}

@InProceedings{Peeling86,
  author =       "S. M. Peeling and R. K. Moore and M. J. Tomlinson",
  booktitle =    "Proceedings of the 10th Autumn Conference on Speech
                 and Hearing",
  title =        "The Multi-Layer Perceptron as a Tool for Speech
                 Pattern Processing Research",
  year =         "1986",
}

@InProceedings{peng04accurate,
  author =       "F. Peng and A. McCallum",
  booktitle =    "Proceedings of Human Language Technology Conference /
                 North American Chapter of the Association for
                 Computational Linguistics annual meeting",
  title =        "Accurate information extraction from research papers
                 using conditional random fields",
  pages =        "329--336",
  year =         "2004",
}

@InProceedings{Pennacchiotti+Pantel-2006,
  author =       "Marco Pennacchiotti and Patrick Pantel",
  booktitle =    "Proceedings of the 21st International Conference on
                 Computational Linguistics and 44th Annual Meeting of
                 the ACL",
  title =        "Ontologizing Semantic Relations",
  address =      "Sydney",
  pages =        "793--800",
  year =         "2006",
}

@Article{Penrose55,
  author =       "R. Penrose",
  title =        "A generalized inverse for matrices",
  journal =      "Proc. Cambridge Philos. Soc.",
  volume =       "51",
  pages =        "406--513",
  year =         "1955",
}

@InProceedings{Pereira93,
  author =       "F. Pereira and N. Tishby and L. Lee",
  booktitle =    "30th Annual Meeting of the Association for
                 Computational Linguistics",
  title =        "Distributional Clustering of English Words",
  address =      "Columbus, Ohio",
  pages =        "183--190",
  year =         "1993",
}

@InProceedings{Pereira94,
  author =       "F. Pereira and M. Riley and R. Sproat",
  booktitle =    "ARPA Natural Language Processing Workshop",
  title =        "Weighted rational transductions and their application
                 to human language processing",
  year =         "1994",
}

@InCollection{Pereira97,
  author =       "F. C. N. Pereira and M. D. Riley",
  editor =       "Emmanuel Roche and Yves Schabes",
  booktitle =    "Finite-State Language Processing",
  title =        "Speech recognition by composition of weighted finite
                 automata",
  publisher =    "MIT Press, Cambridge, Massachussetts",
  pages =        "431--453",
  year =         "1997",
}

@Article{Peretto84,
  author =       "P. Peretto",
  title =        "Collective Properties of Neural Networks: {A}
                 Statistical Physics Approach",
  journal =      biocyb,
  volume =       "50",
  pages =        "51--62",
  year =         "1984",
}

@InProceedings{Peretto86,
  author =       "P. Peretto and J. J. Niez",
  editor =       "E. Bienenstock and F. Fogelman-Souli\'e and G.
                 Weisbuch",
  booktitle =    "Disordered Systems and Biological Organization",
  title =        "Collective Properties of Neural Networks",
  publisher =    "Springer-Verlag, Berlin",
  address =      "Les Houches 1985",
  pages =        "171--185",
  year =         "1986",
}

@Article{Peretto88,
  author =       "P. Peretto",
  title =        "On Learning Rules and Memory Storage Abilities of
                 Asymmetrical Neural Networks",
  journal =      jpp,
  volume =       "49",
  pages =        "711--726",
  year =         "1988",
}

@InProceedings{Perez+Rendell-1996,
  author =       "Eduardo P\'erez and Larry A. Rendell",
  booktitle =    ICML96,
  editor =       ICML96ed,
  publisher =    ICML96publ,
  title =        "Learning Despite Concept Variation by Finding
                 Structure in Attribute-based Data",
  pages =        "391--399",
  year =         "1996",
}

@Article{Perez75,
  author =       "R. P\'erez and L. Glass and R. Shlaer",
  title =        "Development of Specificity in the Cat Visual Cortex",
  journal =      jmathb,
  volume =       "1",
  pages =        "275--288",
  year =         "1975",
}

@MISC{Perez98markovrandom,
  author = {Patrick Perez},
  title = {Markov Random Fields and Images},
  year = {1998}
}

@article{PerpinanM2000,
 author = {Miguel \'{A}. Carreira-Perpi{\~{n}}\'{a}n and Steve \'{A}. Renals},
 title = {Practical Identifiability of Finite Mixtures of Multivariate Bernoulli Distributions},
 journal = {Neural Computation},
 volume = {12},
 number = {1},
 year = {2000},
 pages = {141--152},
 publisher = {MIT Press},
 address = {Cambridge, MA, USA},
 }

@InProceedings{Perpinan+Hinton-2005,
  author =       "Miguel A. Carreira-Perpi{\~{n}}an and Geoffrey E. Hinton",
  editor =       aistats05ed,
  booktitle =    aistats05,
  title =        "On Contrastive Divergence Learning",
  publisher =    "Society for Artificial Intelligence and Statistics",
  date =         "Jan 6-8, 2005",
  location =     "Savannah Hotel, Barbados",
  pages =        "33--40",
  year =         "2005",
}

@Article{Personnaz85,
  author =       "L. Personnaz and I. Guyon and G. Dreyfus",
  title =        "Information Storage and Retrieval in Spin-Glass-Like
                 Neural Networks",
  journal =      jppl,
  volume =       "46",
  pages =        "359--365",
  year =         "1985",
}

@Article{Personnaz86,
  author =       "L. Personnaz and I. Guyon and G. Dreyfus",
  title =        "Collective Computational Properties of Neural
                 Networks: New Learning Mechanisms",
  journal =      prA,
  volume =       "34",
  pages =        "4217--4228",
  year =         "1986",
}

@Article{Peterson2004,
  author =       "Gail B. Peterson",
  title =        "A day of great illumination: {B. F.} {Skinner}'s
                 discovery of shaping",
  journal =      "Journal of the Experimental Analysis of Behavior",
  volume =       "82",
  number =       "3",
  pages =        "317--328",
  year =         "2004",
}

@Article{Peterson87,
  author =       "C. Peterson and J. R. Anderson",
  title =        "A Mean Field Theory Learning Algorithm for Neural
                 Networks",
  journal =      cs,
  volume =       "1",
  pages =        "995--1019",
  year =         "1987",
}

@Article{Peterson89,
  author =       "C. Peterson and B. S{\"o}derberg",
  title =        "A New Method for Mapping Optimization Problems onto
                 Neural Networks",
  journal =      ijns,
  volume =       "1",
  pages =        "3--22",
  year =         "1989",
}

@Article{Peterson90,
  author =       "C. Peterson and S. Redfield and J. D. Keeler and E.
                 Hartman",
  title =        "An Optoelectronic Architecture for Multilayer Learning
                 in a Single Photorefractive Crystal",
  journal =      nc,
  volume =       "2",
  pages =        "25--34",
  year =         "1990",
}

@PhdThesis{PhD:Perrone,
  author =       "Michael P. Perrone",
  title =        "Improving Regression Estimation: Averaging Methods for
                 Variance Reduction with Extensions to General Conve
                 Measure Optimization",
  school =       "Brown University, Institute for Brain and Neural
                 Systems",
  month =        may,
  year =         "1993",
}

@Book{Piaget1952,
  author =       "J.-P. Piaget",
  title =        "The origins of intelligence in children",
  publisher =    "International Universities Press",
  address =      "New York",
  year =         "1952",
}

@Article{Pineda87,
  author =       "F. J. Pineda",
  title =        "Generalization of Back-Propagation to Recurrent Neural
                 Networks",
  journal =      prl,
  volume =       "59",
  pages =        "2229--2232",
  year =         "1987",
}

@Article{Pineda88,
  author =       "F. J. Pineda",
  title =        "Dynamics and Architecture for Neural Computation",
  journal =      jcomp,
  volume =       "4",
  pages =        "216--245",
  year =         "1988",
}

@InProceedings{Pineda88-nips,
  author =       "F. Pineda",
  editor =       nips87ed,
  booktitle =    nips87,
  title =        "Generalization of Backpropagation to Recurrent and
                 Higher Order Neural Networks",
  organization = "American Institute of Physics",
  address =      "New York, NY",
  pages =        "602--611",
  year =         "1988",
}

@Article{Pineda89,
  author =       "F. J. Pineda",
  title =        "Recurrent Back-Propagation and the Dynamical Approach
                 to Adaptive Neural Computation",
  journal =      nc,
  volume =       "1",
  pages =        "161--172",
  year =         "1989",
}

@InCollection{PINN,
  author =       "P. Frasconi and M. Gori and A. Tesi",
  editor =       "O. Omidvar",
  booktitle =    "Progress in Neural Networks",
  title =        "Successes and Failures of Backpropagation: {A}
                 Theoretical Investigation",
  volume =       "5",
  publisher =    "Ablex Publishing",
  year =         "1993",
}

@article{Pinto08,
  author = {Pinto, Nicolas AND Cox, David D AND DiCarlo, James J},
  journal = {PLoS Comput Biol},
  publisher = {Public Library of Science},
  title = {Why is Real-World Visual Object Recognition Hard?},
  year = {2008},
  month = {01},
  volume = {4},
}        

@inproceedings{Pinto-DiCarlo-2008,
 author = {Nicolas Pinto and James {DiCarlo} and David Cox},
 title = {Establishing Good Benchmarks and Baselines for Face Recognition},
 booktitle = {ECCV 2008 Faces in 'Real-Life' Images Workshop},
 year = 2008,
address={{M}arseille {F}rance },
organization={{E}rik {L}earned-{M}iller and {A}ndras {F}erencz and {F}r{\'e}d{\'e}ric {J}urie },
audience={internationale },
URL={http://hal.inria.fr/inria-00326732/en/},
}

@article{Pinto-2009,
  author = {Pinto, Nicolas AND Doukhan, David AND DiCarlo, James J. AND Cox, David D.},
  journal = {PLoS Comput Biol},
  publisher = {Public Library of Science},
  title = {A High-Throughput Screening Approach to Discovering Good Forms of Biologically Inspired Visual Representation},
  year = {2009},
  month = {11},
  volume = {5},
  pages = {e1000579},
  number = {11},
}        

@InCollection{Platt2000,
  author =       "J. Platt",
  editor =       "A. Smola and P. Bartlett and B. Scholkopf and D.
                 Schuurmans",
  booktitle =    "Advances in Large Margin Classifiers",
  title =        "Probabilities for support vector machines",
  publisher =    "MIT press",
  year =         "2000",
}

@Article{Platt91,
  author =       "J. Platt",
  title =        "A Resource-Allocating Network for Function
                 Interpolation",
  journal =      "Neural Computation",
  volume =       "3",
  type =         "Letter",
  number =       "2",
  pages =        "213--225",
  year =         "1991",
}

@InProceedings{Platt94,
  author =       "R. Wolf and J. Platt",
  editor =       NIPS6ed,
  booktitle =    NIPS6,
  title =        "Postal address block location using a convolutional
                 locator network",
  pages =        "745--752",
  year =         "1994",
}

@Article{Plaut-csl87,
  author =       "D. C. Plaut and G. E. Hinton",
  title =        "Learning Set of Filters Using Back-propagation",
  journal =      cspla,
  volume =       "2",
  pages =        "35--61",
  year =         "1987",
}

@TechReport{Plaut86,
  author =       "D. Plaut and S. Nowlan and G. Hinton",
  title =        "Experiments on Learning by Back-Propagation",
  number =       "CMU--CS--86--126",
  institution =  "Department of Computer Science, Carnegie Mellon
                 University",
  address =      "Pittsburgh, PA",
  year =         "1986",
}

@Article{PLS-Frank-Friedman,
  author =       "Ildiko E. Frank and Jerome H. Friedman",
  title =        "A statistical view of some chemometrics regression
                 tools",
  journal =      "Technometrics",
  volume =       "35",
  number =       "2",
  pages =        "109--148",
  year =         "1993",
}

@Article{Podder-2006,
  author =       "M. Podder and W. J. Welch and R. H. Zamar and S. J. S.
                 J. Tebbutt",
  title =        "Dynamic Variable Selection in {SNP} Genotype
                 Autocalling from {APEX} Microarray Data",
  journal =      "In revision for BMC Bioinformatics",
  year =         "2006",
}

@Article{Poggio-ieee90,
  author =       "T. Poggio and F. Girosi",
  title =        "Networks for Approximation and Learning",
  journal =      ieeeproc,
  volume =       "78",
  number =       "9",
  pages =        "1481--1497",
  year =         "1990",
}

@Article{Poggio75,
  author =       "T. Poggio",
  title =        "On Optimal NonLinear Associative Recall",
  journal =      biocyb,
  volume =       "19",
  pages =        "201",
  year =         "1975",
}

@Article{Poggio85,
  author =       "T. Poggio and V. Torre and C. Koch",
  title =        "Computational Vision and Regularization Theory",
  journal =      "Nature",
  volume =       "317",
  number =       "26",
  pages =        "314--319",
  year =         "1985",
}

@TechReport{Poggio89,
  author =       "T. Poggio and F. Girosi",
  title =        "A theory of networks for approximation and learning",
  number =       "1140",
  institution =  "MIT AI Laboratory",
  address =      "Cambridge, MA",
  year =         "1989",
}

@Article{Poggio90,
  author =       "T. Poggio and F. Girosi",
  title =        "Regularization Algorithms for Learning That Are
                 Equivalent to Multilayer Networks",
  journal =      science,
  volume =       "247",
  pages =        "978--982",
  year =         "1990",
}

@Article{Pollack90,
  author =       "Jordan B. Pollack",
  title =        "Recursive Distributed Representations",
  journal =      "Artificial Intelligence",
  volume =       "46",
  number =       "1",
  pages =        "77--105",
  year =         "1990",
}

@Article{Pollack91,
  author =       "Jordan B. Pollack",
  title =        "The Induction of Dynamical Recognizers",
  journal =      mlearn,
  volume =       "7",
  number =       "2",
  pages =        "196--227",
  year =         "1991",
}

@Book{Pollard84,
  author =       "D. Pollard",
  title =        "Convergence of stochastic processes",
  publisher =    "Springer-Verlag",
  address =      "New York, NY",
  year =         "1984",
}

@InProceedings{Pollit91,
  author =       "M. D. Pollit and J. Peck",
  booktitle =    "Proc. 2nd Canadian Conf. on Computer Applications in
                 the Mineral Industry",
  title =        "Recent advances in lithological recognition based on
                 rotary blasthole drill responses",
  address =      "Vancouver, Canada",
  year =         "1991",
}

@InProceedings{Pomerleau89,
  author =       "D. A. Pomerleau",
  editor =       NIPS1ed,
  booktitle =    NIPS1,
  title =        "{ALVINN}: An Autonomous Land Vehicle in a Neural
                 Network",
  publisher =    "Morgan Kaufmann, San Mateo",
  address =      "Denver, CO",
  pages =        "305--313",
  year =         "1989",
}

@TechReport{Pontil98,
  author =       "M. Pontil and A. Verri",
  title =        "Properties of Support Vector Machines",
  number =       "AI Memo 1612",
  institution =  "MIT",
  year =         "1998",
}

@InProceedings{Poritz88,
  author =       "A. B. Poritz",
  booktitle =    "Proc. Int. Conf. Acoustics, Speech, and Signal
                 Processing",
  title =        "Hidden {Markov} models: a guided tour",
  pages =        "7--13",
  year =         "1988",
}

@InProceedings{Poston,
  author =       "T. Poston and C. Lee and Y. Choie and Y. Kwon",
  booktitle =    "Proc. of the IEEE-IJCNN91",
  title =        "Local minima and Backpropagation",
  address =      "Seattle, WA",
  pages =        "173--176",
  year =         "1991",
}

@InProceedings{Poston-ijcnn91,
  author =       "T. Poston and C. Lee and Y. Choie and Y. Kwon",
  booktitle =    ijcnn,
  title =        "Local Minima and Backpropagation",
  publisher =    "IEEE Press",
  address =      "Seattle WA",
  pages =        "173--176",
  year =         "1991",
}

@Article{Poterba+Summers,
  author =       "J. M. Poterba and L. H. Summers",
  title =        "Mean Reversion in Stock Prices",
  journal =      "Journal of Financial Economics",
  volume =       "22",
  pages =        "27--59",
  year =         "1988",
}

@Article{potvin:1995:orsajc,
  author =       "J.-Y. Potvin and S. Bengio",
  title =        "The Vehicle Routing Problem with Time Windows - Part
                 {II}: Genetic Search",
  journal =      "{ORSA} Journal on Computing",
  year =         "1995",
}

@Misc{powell87radial,
  author =       "M. Powell",
  title =        "Radial basis functions for multivariable
                 interpolation: {A} review",
  year =         "1987",
  text =         "M. J. D. Powell. Radial basis functions for
                 multivariable interpolation: A review. In J. C. Mason
                 and M. G. Cox, editors, Algorithms for Approximation of
                 Functions and Data, pages 143--167. Oxford University
                 Press, 1987.",
}

@InProceedings{Pratt+Kamm91,
  author =       "L. Y. Pratt and C. A. Kamm",
  booktitle =    ijcnn,
  title =        "Improving a phoneme classification neural network
                 through problem decomposition",
  volume =       "2",
  publisher =    "IEEE Press",
  address =      "Seattle WA",
  pages =        "821--826",
  year =         "1991",
}

@InProceedings{pratt93,
  author =       "Lorien Y. Pratt",
  editor =       NIPS5ed,
  booktitle =    NIPS5,
  title =        "Discriminability-Based Transfer between Neural
                 Networks",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo, CA",
  pages =        "204--211",
  year =         "1993",
}

@Article{Presnell93,
  author =       "S. R. Presnell and F. E. Cohen",
  title =        "Artificial neural networks for pattern recognition in
                 biochemical sequences",
  journal =      "Ann. Rev. Biophys. Biomol. Struct.",
  volume =       "22",
  pages =        "283--298",
  year =         "1993",
}

@Book{Press86,
  author =       "W. H. Press and B. P. Flannery and S. A. Teukolsky and
                 W. T. Vetterling",
  title =        "Numerical Recipes",
  publisher =    "Cambridge University Press",
  address =      "Cambridge",
  year =         "1986",
}

@Book{Press92,
  author =       "W. H. Press and S. A. Teukolsky and W. T. Vetterling
                 and B. P. Flannery",
  title =        "Numerical Recipes in {C}: The art of scientific
                 computing (2nd ed.)",
  publisher =    "Cambridge University Press",
  address =      "Cambridge",
  year =         "1992",
}

@article{Priebe2005,
 author = {C.E. Priebe and J.M. Conroy and D.J. Marchette and Y. park},
 title = {Scan Statistics on Enron Graphs},
 journal = {Computational and Mathematical Organization Theory},
 volume = 11,
 number = 3,
 pages = {229--247},
 month = {October},
 year = 2005,
 publisher = {Springer},
}

@Book{Priestley81,
  author =       "M. B. Priestley",
  title =        "Spectral Analysis and Time Series, Vol.1: Univariate
                 Series",
  publisher =    "Academic Press",
  year =         "1981",
}

@Article{Principe92,
  author =       "B. {de Vries} and J. C. Principe",
  title =        "The gamma model -- {A} new neural net model for
                 temporal processing",
  journal =      nn,
  volume =       "5",
  pages =        "565--576",
  year =         "1992",
  OPTnote =      "",
}

@Article{Psa88a,
  author =       "D. Psaltis and C. H. Park and J. Hong",
  title =        "Higher Order Associative Memories and Their Optical
                 Implementations",
  journal =      "Neural Networks",
  volume =       "1",
  number =       "2",
  pages =        "149--163",
  year =         "1988",
}

@InProceedings{Psaltis89,
  author =       "D. Psaltis and D. Brady and K. Hsu",
  booktitle =    ijcnn,
  title =        "Learning in optical neural computers",
  volume =       "2",
  address =      "Washington D.C.",
  pages =        "72--75",
  year =         "1989",
}

@TechReport{publication-an,
  author =       "Tomaso Poggio and Frederico Girosi",
  title =        "An Equivalence Between Sparse Approximation and
                 Support Vector Machines",
}

@TechReport{publication-notes,
  author =       "Tomaso Poggio and Frederico Girosi",
  title =        "Notes on {PCA}, Regularization, Sparsity and Support
                 Vector Machines",
}

@Article{Qian+Sejnowski88,
  author =       "N. Qian and T. J. Sejnowski",
  title =        "Predicting the secondary structure of globular
                 proteins using neural network models",
  journal =      "Journal of Molecular Biology",
  volume =       "202",
  pages =        "865--884",
  year =         "1988",
}

@Article{Qian88a,
  author =       "N. Qian and T. J. Sejnowski",
  title =        "Predicting the Secondary Structure of Globular
                 Proteins Using Neural Network Models",
  journal =      jmolecb,
  volume =       "202",
  pages =        "865--884",
  year =         "1988",
}

@InProceedings{Qian88b,
  author =       "N. Qian and T. J. Sejnowski",
  editor =       "D. Touretzky and G. Hinton and T. Sejnowski",
  booktitle =    cmss88,
  title =        "Learning to Solve Random-Dot Stereograms of Dense
                 Transparent Surfaces with Recurrent Back-Propagation",
  publisher =    "Morgan Kaufmann, San Mateo",
  address =      "Pittsburg 1988",
  pages =        "435--443",
  year =         "1988",
}

@Article{quantiles-nc-2002,
  author =       "Ichiro Takeuchi and Yoshua Bengio and Takafumi
                 Kanamori",
  title =        "Robust Regression with Asymmetric Heavy-Tail Noise Distributions",
  journal =      "Neural Computation",
  volume =       "14",
  number =       "10",
  pages =        "2469--2496",
  year =         "2002",
}

@TechReport{quantiles-TR,
  author =       "Ichiro Takeuchi and Yoshua Bengio and Takafumi
                 Kanamori",
  title =        "Robust Regression with Asymmetric Heavy-Tail Noise",
  number =       "1198",
  institution =  "Dept. IRO, Universit\'e de Montr\'eal",
  year =         "2001",
}

@Article{Quinlan+Rivest89,
  author =       "J. Ross Quinlan and Ronald L. Rivest",
  title =        "Inferring Decision Trees Using the Minimum Description
                 Length Principle",
  journal =      "Information and Computation",
  volume =       "80",
  pages =        "227--248",
  year =         "1989",
}

@Article{Quinlan86,
  author =       "J. Ross Quinlan",
  title =        "Induction of Decision Trees",
  journal =      "Machine Learning",
  volume =       "1",
  number =       "1",
  pages =        "81--106",
  year =         "1986",
}

@Book{Quinlan93,
  author =       "J. Ross Quinlan",
  title =        "{C4}.5: Programs for Machine Learning",
  publisher =    "Morgan Kaufmann",
  year =         "1993",
}

@Book{Rabiner+Gold75,
  author =       "L. R. Rabiner and B. Gold",
  title =        "Theory and application of digital signal processing",
  publisher =    "Prentice-Hall",
  year =         "1975",
}

@Article{Rabiner85,
  author =       "L. R. Rabiner and S. E. Levinson",
  title =        "A speaker-independent, syntax-directed, connected word
                 recognition system based on hidden {Markov} models and
                 level building",
  journal =      ieeetassp,
  volume =       "33",
  number =       "3",
  pages =        "561--573",
  year =         "1985",
}

@Article{Rabiner86,
  author =       "L. R. Rabiner and B. H. Juang",
  title =        "An Introduction to Hidden {Markov} Models",
  journal =      ieeeassp,
  pages =        "257--285",
  month =        "jan",
  year =         "1986",
}

@Article{Rabiner89,
  author =       "La. R. Rabiner",
  title =        "A Tutorial on Hidden {Markov} Models and Selected
                 Applications in Speech Recognition",
  journal =      "Proceedings of the IEEE",
  volume =       "77",
  number =       "2",
  pages =        "257--286",
  year =         "1989",
  OPTannote =    "",
}

@Article{Raetsch-2002,
  author =       "Gunnar R{\"a}tsch and Ayhan Demiriz and Kristin P. Bennett",
  title =        "Sparse Regression Ensembles in Infinite and Finite
                 Hypothesis Spaces",
  journal =      "Machine Learning",
  publisher =    "Kluwer Academic Publishers",
  year =         "2002",
}

@InCollection{Raftery1996,
  author =       "A. Raftery",
  editor =       "Gilks and al.",
  booktitle =    "MCMC in Practice",
  title =        "Hypothesis Testing and Model Selection",
  publisher =    "Chapman and Hall",
  pages =        "163--188",
  year =         "1996",
}


@inproceedings{RaginskyM2008,
  author    = {Maxim Raginsky and
               Svetlana Lazebnik and
               Rebecca Willett and
               Jorge Silva},
  title     = {Near-minimax recursive density estimation on the binary
               hypercube},
  editor =       NIPS20ed,
  booktitle =    NIPS20,
  year      = {2008},
  pages     = {1305-1312},
}

@INPROCEEDINGS{RainaR2003,
    author = {Rajat Raina and Yirong Shen and Andrew Y. Ng and Andrew McCallum},
    title = {Classification with hybrid generative/discriminative models},
    editor = NIPS16ed,
    booktitle = NIPS16,
    year = {2003},
    publisher = {MIT Press}
}

@Misc{raina+ng+koller-workshop-2005,
  author =       "Rajat Raina and Andrew Y. Ng and Daphne Koller",
  title =        "Transfer Learning by Constructing Informative Priors",
  howpublished = "'Inductive Transfer: 10 Years Later' NIPS Workshop",
  year =         "2005",
  OPTkey =       "",
}

@InProceedings{RainaR2007,
  author =       "Rajat Raina and Alexis Battle and Honglak Lee and
                 Benjamin Packer and Andrew Y. Ng",
  booktitle =    ICML07,
  editor =       ICML07ed,
  publisher =    ICML07publ,
  title =        "Self-taught learning: transfer learning from unlabeled
                 data",
  pages =        "759--766",
  year =         "2007",
  bibsource =    "DBLP, http://dblp.uni-trier.de",
  ee =           "http://doi.acm.org/10.1145/1273496.1273592",
}

@InProceedings{RainaR2007-small,
  author =       "R. Raina and A. Battle and H. Lee and B. Packer and A.
                 Y. Ng",
  booktitle =    "ICML 2007",
  title =        "Self-taught learning: transfer learning from unlabeled
                 data",
  year =         "2007",
}

@inproceedings{RainaICML09,
  author = {Raina, Rajat and Madhavan, Anand and Ng, Andrew Y.},
  title = {Large-scale deep unsupervised learning using graphics processors},
  booktitle = ICML09,
  editor =  ICML09ed,
  publisher = ICML09publ,
  year = {2009},
  isbn = {978-1-60558-516-1},
  pages = {873--880},
  location = {Montreal, Quebec, Canada},
  doi = {http://doi.acm.org/10.1145/1553374.1553486},
  address = {New York, NY, USA},
}

@InProceedings{Ramanujam88,
  author =       "J. Ramanujam and P. Sadayappan",
  booktitle =    icnn,
  title =        "Optimization by Neural Networks",
  volume =       "2",
  publisher =    "IEEE, New York",
  address =      "San Diego 1988",
  pages =        "325--332",
  year =         "1988",
}

@InProceedings{ranzato-07,
  author =       "{Marc'Aurelio} Ranzato and Christopher Poultney and
                 Sumit Chopra and Yann {LeCun}",
  editor =       NIPS19ed,
  booktitle =    NIPS19,
  title =        "Efficient Learning of Sparse Representations with an
                 Energy-Based Model",
  publisher =    "MIT Press",
  pages = {1137--1144},
  year =         "2007",
}

@InProceedings{ranzato-07-small,
  author =       "M. Ranzato and C. Poultney and
                 S. Chopra and Y. {LeCun}",
  booktitle =    "NIPS'06",
  title =        "Efficient Learning of Sparse Representations with an
                 Energy-Based Model",
  year =         "2007",
}

@InProceedings{ranzato-07-short,
  author =       "M. Ranzato and C. Poultney and
                 S. Chopra and Y. {LeCun}",
  booktitle =    "Adv. Neural Inf. Proc. Sys. 19",
  title =        "Efficient Learning of Sparse Representations with an
                 Energy-Based Model",
  pages = {1137--1144},
  year =         "2007",
}

# Please do NOT use this citation as it is a duplicate of ranzato-07
@InCollection{ranzato-06,
  author =       "{Marc'Aurelio} Ranzato and Christopher Poultney and
                 Sumit Chopra and Yann {LeCun}",
  editor =       NIPS19ed,
  booktitle =    NIPS19,
  title =        "Efficient Learning of Sparse Representations with an
                 Energy-Based Model",
  publisher =    "{MIT} Press",
  pages =        "",
  year =         "2007",
}

# Please do NOT use this citation as it is a duplicate of ranzato-07-small
@InCollection{ranzato-06-small,
  author =       "M. Ranzato and C. Poultney and
                 S. Chopra and Y. {LeCun}",
  booktitle =    "NIPS 19",
  title =        "Efficient Learning of Sparse Representations with an
                 Energy-Based Model",
  year =         "2007",
}


@InProceedings{ranzato-08,
  author =       "{Marc'Aurelio} Ranzato and Y-Lan Boureau and Yann
                 {LeCun}",
  editor =       NIPS20ed,
  booktitle =    NIPS20,
  title =        "Sparse feature learning for deep belief networks",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  pages =        "1185--1192",
  year =         "2008",
}
  %url =          "http://www.cs.nyu.edu/~ranzato/publications/ranzato-nips07.pdf",

@InProceedings{ranzato-08-small,
  author =       "M. Ranzato and Y. Boureau and Y. {LeCun}",
  booktitle =    "NIPS 20",
  title =        "Sparse feature learning for deep belief networks",
  year =         "2008",
}

@InProceedings{ranzato-08-short,
  author =       "M. Ranzato and Y. Boureau and Y. {LeCun}",
  booktitle =    "Adv. Neural Inf. Proc. Sys. 20",
  title =        "Sparse feature learning for deep belief networks",
  year =         "2008",
  pages = {1185--1192},
}

@InProceedings{ranzato-cvpr-07,
  author =       "{Marc'Aurelio} Ranzato and {Fu-Jie} Huang and {Y-Lan}
                 Boureau and Yann {LeCun}",
  booktitle =    cvpr07,
  title =        "Unsupervised Learning of Invariant Feature Hierarchies
                 with Applications to Object Recognition",
  publisher =    "IEEE Press",
  year =         "2007",
  original =     "orig/ranzato-cvpr-07.pdf",
}

@InProceedings{ranzato-cvpr-07-small,
  author =       "{Marc'Aurelio} Ranzato and {Fu-Jie} Huang and {Y-Lan}
                 Boureau and Yann {LeCun}",
  booktitle =    "CVPR'07",
  title =        "Unsupervised Learning of Invariant Feature Hierarchies
                 with Applications to Object Recognition",
  year =         "2007",
  original =     "orig/ranzato-cvpr-07.pdf",
}

@InProceedings{Ranzato-icdar07,
  author =       "{Marc'Aurelio} Ranzato and Yann {LeCun}",
  booktitle =    ICDAR07,
  title =        "A Sparse and Locally Shift Invariant Feature Extractor
                 Applied to Document Images",
  year =         "2007",
  isbn =         {0-7695-2822-8},
  pages =        {1213--1217},
  publisher =    {IEEE Computer Society},
  address =      {Washington, DC, USA},

}

@InProceedings{ranzato-unsup-07,
  author =       "{Marc'Aurelio} Ranzato and {Y-Lan} Boureau and Sumit
                 Chopra and Yann {LeCun}",
  booktitle =    aistats07,
  title =        "A Unified Energy-Based Framework for Unsupervised
                 Learning",
  publisher =    "Omnipress",
  date =         "March 21-24, 2007",
  address =      "San Juan, Porto Rico",
  year =         "2007",
}

@InProceedings{Rao+Ruderman-99,
  author =       "R. P. N. Rao and D. L. Ruderman",
  editor =       NIPS11ed,
  booktitle =    NIPS11,
  title =        "Learning {Lie} Groups for Invariant Visual
                 Perception",
  publisher =    "MIT Press, Cambridge, MA",
  pages =        "810--816",
  year =         "1999",
}

@Book{Rao71,
  author =       "C. R. Rao and S. K. Mitra",
  title =        "Generalized Inverse of Matrices and Its Applications",
  publisher =    "Wiley",
  address =      "New York",
  year =         "1971",
}

@Book{Rashevsky38,
  author =       "N. Rashevsky",
  title =        "Mathematical Biophysics",
  publisher =    "University of Chicago Press",
  address =      "Chicago",
  year =         "1938",
}

@InProceedings{RasmussenC2000,
  author =       "Carl Rasmussen",
  editor =       NIPS12ed,
  booktitle =    NIPS12,
  title =        "The Infinite {G}aussian Mixture Model",
  year =         "2000",
}

@Misc{Rasmussen2001,
  author =       "Carl Edward Rasmussen",
  title =        "Conjugate gradient for Matlab",
  year =         "2001",
  note =         "http://www.kyb.tuebingen.mpg.de/bs/people/carl/code/minimize/",
}

@Article{Ratnaparkhi99,
  author =       "A. Ratnaparkhi",
  title =        "Learning to parse natural language with maximum
                 entropy models",
  journal =      "Machine Learning",
  volume =       "341",
  number =       "2",
  pages =        "151--176",
  year =         "1999",
}

@Article{Rauch63,
  author =       "H. E. Rauch",
  title =        "Solutions to the linear smoothing problem",
  journal =      "IEEE Transactions on Automatic Control",
  volume =       "8",
  pages =        "371--372",
  year =         "1963",
}

@Article{Refenes-94,
  author =       "A. N. Refenes",
  title =        "Stock Performance Modeling Using Neural Networks: a
                 Comparative Study with Regression Models",
  journal =      "Neural Networks",
  volume =       "7",
  number =       "2",
  pages =        "375--388",
  year =         "1994",
}

@Article{regression-KB-78,
  author =       "R. Koenker and G. Bassett Jr.",
  title =        "Regression Quantiles",
  journal =      "Econometrica",
  volume =       "46",
  number =       "1",
  pages =        "33--50",
  year =         "1978",
}
@inproceedings{reid:1989,
    title = {Rapid Training of Higher-Order Neural Networks for Invariant Pattern
        Recognition},
    author = {Reid, M. B. and  Spirkovska, L. and  Ochoa, E  },
    booktitle = ijcnn,
    month   = {June},
    year    = {1989},
    address = {Washington, DC, USA},
}

@InCollection{Rescorla72,
  author =       "R. A. Rescorla and A. R. Wagner",
  editor =       "A. H. Black and W. F. Prokasy",
  booktitle =    "Classical Conditioning II: Current Research and
                 Theory",
  title =        "A Theory of Pavlovian Conditioning: The Effectiveness
                 of Reinforcement and Nonreinforcement",
  publisher =    "Appleton-Century-Crofts",
  address =      "New York",
  pages =        "64--69",
  year =         "1972",
}

@InProceedings{Resnik-2002,
  author =       "Mona Diab and Philip Resnik",
  booktitle =    "40th Annual Meeting of the {ACL}",
  title =        "An unsupervised method for word sense tagging using
                 parallel corpora",
  year =         "2002",
}

@Article{Resnik-99,
  author =       "Philip Resnik",
  title =        "Semantic similarity in a taxonomy: an
                 information-based measure and its application to
                 problems of ambiguity in natural language",
  journal =      "Journal of Artificial Intelligence Research",
  volume =       "11",
  pages =        "95--130",
  year =         "1999",
}

@InProceedings{Resnik-99-web,
  author =       "P. Resnik",
  booktitle =    "37th Annual Meeting of the Association for
                 Computational Linguistics (ACL'99)",
  title =        "Mining the Web for Bilingual Text",
  address =      "College Park, Maryland",
  month =        jun,
  year =         "1999",
}

@article{Rhodes-2008,
 author = {Paul Rhodes},
 title = {Recoding Patterns of Sensory Input: Higher-Order Features and the Function of Nonlinear Dendritic Trees},
 journal = {Neural Computation},
 volume = 20,
 number=8,
 pages = {2000--2036},
 year = 2008,
}

@Article{RicLip91,
  author =       "Michael D. Richard and Richard P. Lippmann",
  title =        "Neural Network Classifiers Estimate {Bayesian}
                 a-posteriori Probabilities",
  journal =      "Neural Computation",
  volume =       "3",
  pages =        "461--483",
  year =         "1991",
  abstract =     "Theoretical argumentation under which circumstances
                 nets can estimate correctly and what this means for
                 network engineering methodology. Experimental
                 evaluations with different cost functions (mean squared
                 error, cross entropy, normalized likelihood) and
                 network types (multi layer perceptron, radial basis
                 function, high order polynomial) show how accuracy
                 degrades with insufficient data or inadequate network
                 size. Dicusses practical consequences. Contains
                 references to work on other cost functions (e.g.
                 information measures)",
  class =        "nn, learning, theory",
}

@InProceedings{Ricotti88,
  author =       "L. P. Ricotti and S. Ragazzini and G. Martinelli",
  booktitle =    icnn,
  title =        "Learning of Word Stress in a Sub-Optimal Second Order
                 Back-Propagation Neural Network",
  volume =       "1",
  publisher =    "IEEE, New York",
  address =      "San Diego 1988",
  pages =        "355--361",
  year =         "1988",
}

@Article{Riedel88,
  author =       "U. Riedel and R. K{\"u}hn and J. L. van Hemmen",
  title =        "Temporal Sequences and Chaos in Neural Nets",
  journal =      prA,
  volume =       "38",
  pages =        "1105--1108",
  year =         "1988",
}

@Article{Riis96,
  author =       "S. K. Riis and A. Krogh",
  title =        "Improving prediction of protein secondary structure
                 using structured neural networks and multiple sequence
                 alignments",
  journal =      "J. Comput. Biol.",
  volume =       "3",
  pages =        "163--183",
  year =         "1996",
}

@Article{RiisKrogh1996,
  author =       "S. Riis and A. Krogh",
  title =        "Improving protein secondary structure prediction using
                 structured neural networks and multiple sequence
                 profiles",
  journal =      "Journal of Computational Biology",
  pages =        "163--183",
  year =         "1996",
}

@TechReport{Riley94,
  author =       "M. D. Riley and F. C. N. Pereira",
  title =        "Weighted-finite-automata tools with applications to
                 speech and language processing",
  number =       "Technical Memorandum 11222-931130-28TM",
  institution =  "AT\&T Bell Laboratories",
  year =         "1994",
}

@article{Rissanen79, 
 author = {J.J. Rissanen and G.G. Langdon Jr.},
 title = {Arithmetic coding},
 journal = {IBM Journal of Research and Development},
 volume = 23, 
 number = 2,
 pages = {149--162},
 year = 1979,
}

@Article{rissanen83,
  author =       "J.J. Rissanen",
  title =        "A universal data compression system",
  journal =      "IEEE Transactions on Information Theory",
  volume =       "29",
  pages =        "656--664",
  year =         "1983",
}

@Article{Rissanen86,
  author =       "J. Rissanen",
  title =        "Stochastic complexity and modeling",
  journal =      "Annals of Statistics",
  volume =       "14",
  pages =        "1080--1100",
  year =         "1986",
}

@Book{RissanenBook,
  author =       "J. Rissanen",
  title =        "Stochastic Complexity in Statistical Inquiry",
  publisher =    "World Scientific",
  address =      "Singapore",
  year =         "1990",
}

@Article{Ritter86,
  author =       "H. Ritter and K. Schulten",
  title =        "On the Stationary State of Kohonen's Self-Organizing
                 Sensory Mapping",
  journal =      biocyb,
  volume =       "54",
  pages =        "99--106",
  year =         "1986",
}

@InProceedings{Ritter88a,
  author =       "H. Ritter and K. Schulten",
  editor =       "R. Eckmiller and Ch. von der Malsburg",
  booktitle =    "Neural Computers",
  title =        "Extending Kohonen's Self-Organizing Mapping Algorithm
                 to Learn Ballistic Movements",
  publisher =    "Springer-Verlag, Berlin",
  address =      "Neuss 1987",
  pages =        "393--406",
  year =         "1988",
}

@Article{Ritter88b,
  author =       "H. Ritter and K. Schulten",
  title =        "Convergence Properties of Kohonen's Topology
                 Conserving Maps: Fluctuations, Stability, and Dimension
                 Selection",
  journal =      biocyb,
  volume =       "60",
  pages =        "59--71",
  year =         "1988",
}

@InProceedings{Ritter88c,
  author =       "H. Ritter and K. Schulten",
  booktitle =    icnn,
  title =        "Kohonen's Self-Organizing Maps: Exploring Their
                 Computational Capabilities",
  volume =       "1",
  publisher =    "IEEE, New York",
  address =      "San Diego 1988",
  pages =        "109--116",
  year =         "1988",
}

@Book{Robert-1999,
  author =       "Christian P. Robert and George Casella",
  title =        "Monte Carlo Statistical Methods",
  publisher =    "Springer",
  year =         "1999",
}

@TechReport{Robinson+Fallside90,
  author =       "A. J. Robinson and F. Fallside",
  key =          "Robinson",
  title =        "Phoneme recognition from the {TIMIT} database using
                 recurrent error propagation networks",
  type =         "Technical Report",
  number =       "{CUED/F-INFENG/TR.42}",
  institution =  "Cambridge University Engineering Department",
  year =         "1990",
}

@Article{Robinson+Fallside91,
  author =       "A. J. Robinson and F. Fallside",
  title =        "A recurrent error propagation network speech
                 recognition system",
  journal =      "Computer Speech and Language",
  volume =       "5",
  number =       "3",
  pages =        "259--274",
  month =        jul,
  year =         "1991",
}

@InProceedings{Robinson88,
  author =       "A. J. Robinson and F. Fallside",
  editor =       nips87ed,
  booktitle =    nips87,
  title =        "Static and Dynamic Error Propagation Networks with
                 Application to Speech Coding",
  publisher =    "American Institute of Physics, New York",
  address =      "Denver, CO",
  pages =        "632--641",
  year =         "1988",
}

@Article{Robinson91,
  author =       "T. Robinson and F. Fallside",
  title =        "Recurrent Error Propagation Network Speech Recognition
                 System",
  journal =      cspla,
  volume =       "5",
  number =       "3",
  pages =        "259--274",
  month =        jul,
  year =         "1991",
}

@InProceedings{Robinson92-icassp,
  author =       "T. Robinson",
  booktitle =    icassp,
  title =        "A Real-Time Recurrent Error Propagation Network Word
                 Recognition System",
  volume =       "I",
  pages =        "617--620",
  year =         "1992",
}

@Article{robust-H-73,
  author =       "P. J. Huber",
  title =        "Robust regression: Asymptotics, Conjectures and
                 {Monte} {Carlo}",
  journal =      "Ann. Stat.",
  volume =       "1",
  pages =        "799--821",
  year =         "1973",
}

@Book{robust-H-82,
  author =       "P. J. Huber",
  title =        "Robust Statistics",
  publisher =    "John Wiley \& Sons Inc.",
  year =         "1982",
}

@Book{robust-HRRS-86,
  author =       "F. R. Hampel and E. M. Ronchetti and P. J. Rousseeuw
                 and W. A. Stahel",
  title =        "Robust Statistics, The Approach based on Influence
                 Functions",
  publisher =    "John Wiley \& Sons",
  year =         "1986",
}

@TechReport{robust-RAD-00,
  author =       "P. J. Rousseeuw and S. V. Aelst and K. V. Driessen",
  title =        "Robust Multivariate Regression",
  institution =  "University of Antwerp",
  year =         "2000",
}

@Book{robust-RL-87,
  author =       "P. J. Rousseeuw and A. M. Leroy",
  title =        "Robust Regression and Outlier Detection",
  publisher =    "John Wiley \& Sons Inc.",
  year =         "1987",
}

@InProceedings{Rohwer-nips90,
  author =       "R. Rohwer",
  editor =       NIPS2ed,
  booktitle =    NIPS2,
  title =        "The `Moving Targets' Training Algorithm",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo, CA",
  pages =        "558--565",
  year =         "1990",
}

@InProceedings{Rohwer87,
  author =       "R. Rohwer and B. Forrest",
  editor =       "M. Caudill and C. Butler",
  booktitle =    icnn,
  title =        "Training Time-Dependence in Neural Networks",
  volume =       "2",
  publisher =    "IEEE, New York",
  address =      "San Diego 1987",
  pages =        "701--708",
  year =         "1987",
}

@InProceedings{Rohwer90,
  author =       "R. Rohwer",
  editor =       NIPS2ed,
  booktitle =    NIPS2,
  title =        "The ``Moving Targets'' Training Algorithm",
  publisher =    "Morgan Kaufmann, San Mateo",
  address =      "Denver, CO",
  pages =        "558--565",
  year =         "1990",
}

@article{Rohde+Plaut-99,
 author = {D.L.T. Rohde and D.C. Plaut},
 title = {Language acquisition in the absence of explicit negative evidence: {H}ow important is starting small?},
 journal = {Cognition}, 
 volume = 72,
 pages = {67--109},
 year = 1999
}

@PhdThesis{Romeo89,
  author =       "F. I. Romeo",
  title =        "Simulated Annealing: Theory and Applications to Layout
                 Problems",
  school =       "University of California at Berkeley",
  year =         "1989",
  note =         "Memorandum UCB/ERL--M89/29",
}

@InProceedings{Romer+Frey2003,
  author =       "R. Rosales and B. Frey",
  booktitle =    UAI03,
  title =        "Learning Generative Models of Affinity Matrices",
  publisher =    "Morgan Kaufmann Publishers",
  address =      "San Francisco, CA",
  pages =        "485--492",
  year =         "2003",
}

@InProceedings{Ron94,
  author =       "D. Ron and Y. Singer and N. Tishby",
  editor =       NIPS6ed,
  booktitle =    NIPS6,
  title =        "The power of amnesia",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo, CA",
  pages =        "176--183",
  year =         "1994",
}

@Article{Ron96,
  author =       "D. Ron and Y. Singer and N. Tishby",
  title =        "The power of amnesia: Learning Probabilistic Automata
                 with Variable Memory Length",
  journal =      "Machine Learning",
  volume =       "25",
  year =         "1996",
}

@Article{Ron98,
  author =       "Naftali Tishby {Dana Ron, Yoram Singer}",
  title =        "On the Learnability and Usage of Acyclic Probabilistic
                 Finite Automata",
  journal =      "Journal of Computer and System Sciences",
  volume =       "56",
  number =       "2",
  pages =        "133--152",
  year =         "1998",
}

@InProceedings{Roscheisen-nips92,
  author =       "M. Rvscheisen and R. Hofman and V. Tresp",
  editor =       NIPS4ed,
  booktitle =    NIPS4,
  title =        "Neural Control for Rolling Mills: Incorporating Domain
                 Theories to Overcome Data Deficiency",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo, CA",
  pages =        "659--666",
  year =         "1992",
}

@Book{Rose85,
  editor =       "D. Rose and V. G. Dobson",
  title =        "Models of the Visual Cortex",
  publisher =    "Wiley",
  address =      "Chichester",
  year =         "1985",
}

@Book{Rosenberg-1997,
  author =       "S. Rosenberg",
  title =        "The Laplacian on a Riemannian Manifold",
  publisher =    "Cambridge University Press",
  address =      "Cambridge, UK",
  year =         "1997",
}

@InCollection{Rosenberg88,
  author =       "C. R. Rosenberg and G. Blelloch",
  editor =       "D. Waltz and J. Feldman",
  booktitle =    "Connectionist Models and their Implications",
  title =        "An Implementation of Network Learning on the
                 Connection Machine",
  publisher =    "Ablex Pub. Corp",
  address =      "Norwood, NJ",
  year =         "1988",
}

@TechReport{Rosenblatt57,
  author =       "Frank Rosenblatt",
  title =        "The Perceptron --- a perceiving and recognizing
                 automaton",
  number =       "85-460-1",
  institution =  "Cornell Aeronautical Laboratory",
  address =      "Ithaca, N.Y.",
  year =         "1957",
}

@article{Rosenblatt-1958,
    author = {Frank Rosenblatt},
    title = {The perceptron: A probabilistic model for information storage and organization in the brain},
    journal = {Psychological Review},
    year = {1958},
    volume = {65},
    pages = {386–408},
}

@Book{Rosenblatt62,
  author =       "Frank Rosenblatt",
  title =        "Principles of Neurodynamics",
  publisher =    "Spartan",
  address =      "New York",
  year =         "1962",
}

@Article{rosenfeld02whole,
  author =       "Ronald Rosenfeld and Stanley F. Chen and Xiaojin Zhu",
  title =        "Whole-Sentence Exponential Language Models: {A}
                 Vehicle For Linguistic-Statistical Integration",
  journal =      CSL,
  volume =       "15",
  number =       "1",
  year =         "2001",
  URL =          "citeseer.nj.nec.com/448532.html",
}

@Article{Rosenfeld2000,
  author =       "Ronald Rosenfeld",
  title =        "Two decades of Statistical Language Modeling: Where Do
                 We Go From Here?",
  journal =      "Proceedings of the {IEEE}",
  volume =       "88",
  number =       "8",
  pages =        "1270--1278",
  year =         "2000",
}

@InProceedings{Rosipal2003,
  author =       "R. Rosipal and L. J. Trejo and B. Matthews",
  booktitle =    ICML03,
  editor =       ICML03ed,
  publisher =    ICML03publ,
  title =        "Kernel {PLS}-{SVC} for Linear and Nonlinear
                 Classification",
  year =         "2003",
}

@PhdThesis{Rossen89,
  author =       "M. L. Rossen",
  title =        "Speech Syllable Recognition with a Neural Network",
  school =       "Brown University",
  year =         "1989",
}

@Article{Rost93,
  author =       "B. Rost and C. Sander",
  title =        "Improved prediction of protein secondary structure by
                 use of sequence profiles and neural networks",
  journal =      "Proc. Nat. Ac. Sci. USA",
  volume =       "90",
  pages =        "7558--7562",
  year =         "1993",
}

@Article{Rost94,
  author =       "B. Rost and C. Sander",
  title =        "Combining evolutionary information and neural networks
                 to predict protein secondary structure",
  journal =      "Proteins",
  volume =       "19",
  pages =        "55--72",
  year =         "1994",
}

@InProceedings{RothBlack2005,
  author =       "Stefan Roth and Michael J. Black",
  booktitle =    cvpr05,
  title =        "Fields of Experts: a framework for learning image
                 priors",
  volume =       "2",
  number =       "",
  pages =        "860--867",
  year =         "2005",
}

@InProceedings{Roweis+Saul+Hinton-2002,
  author =       "S. Roweis and L. Saul and G. Hinton",
  editor =       NIPS14ed,
  booktitle =    NIPS14,
  title =        "Global coordination of local linear models",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  year =         "2002",
}

% DEPRECATED, USE THE ONE BELOW
@Article{roweis00lle,
  author =       "Sam Roweis and Lawrence K. Saul",
  title =        "Nonlinear dimensionality reduction by locally linear
                 embedding",
  journal =      "Science",
  volume =       "290",
  number =       "5500",
  pages =        "2323--2326",
  month =        dec,
  year =         "2000",
}

@Article{Roweis2000-lle,
  author =       "Sam Roweis and Lawrence K. Saul",
  title =        "Nonlinear dimensionality reduction by locally linear
                 embedding",
  journal =      "Science",
  volume =       "290",
  number =       "5500",
  pages =        "2323--2326",
  month =        dec,
  year =         "2000",
}

@TechReport{roweis97unifying,
  author =       "Sam Roweis and Zoubin Ghahramani",
  title =        "A Unifying Review of Linear {G}aussian Models",
  address =      "6 King's College Road, Toronto M5S 3H5, Canada",
  year =         "1997",
  URL =          "citeseer.nj.nec.com/article/roweis97unifying.html",
}

@InProceedings{roweis98em,
  author =       "Sam Roweis",
  editor =       NIPS10ed,
  booktitle =    NIPS10,
  title =        "{EM} Algorithms for {PCA} and {SPCA}",
  volume =       "10",
  publisher =    "{MIT} Press",
  year =         "1998",
  URL =          "citeseer.nj.nec.com/roweis98em.html",
}

@InProceedings{RoweisNCA2005,
  author =       "Jacob Goldberger and Sam Roweis and Geoffrey E. Hinton and Ruslan
                 Salakhutdinov",
  editor =       NIPS17ed,
  booktitle =    NIPS17,
  title =        "Neighbourhood Components Analysis",
  publisher =    "{MIT} Press",
  year =         "2005",
}

@Book{Rubinstein1981,
  author =       "Reuven Y. Rubinstein",
  title =        "Simulation and the Monte Carlo Method",
  publisher =    "John Wiley \& Sons",
  year =         "1981",
}

@Article{Rubner89,
  author =       "J. Rubner and P. Tavan",
  title =        "A Self-Organizing Network for Principal-Component
                 Analysis",
  journal =      eul,
  volume =       "10",
  pages =        "693--698",
  year =         "1989",
}


@Article{Rubner90,
  author =       "J. Rubner and K. Schulten",
  title =        "Development of Feature Detectors by
                 Self-Organization",
  journal =      biocyb,
  volume =       "62",
  pages =        "193--199",
  year =         "1990",
}

@Article{Rumelhart85,
  author =       "D. E. Rumelhart and D. Zipser",
  title =        "Feature Discovery by Competitive Learning",
  journal =      cogsci,
  volume =       "9",
  pages =        "75--112",
  year =         "1985",
  note =         "Reprinted in \cite[chapter 5]{Rumelhart86a}",
}

@Book{Rumelhart86a,
  author =       "D. E. Rumelhart and J. L. McClelland and the PDP
                 Research Group",
  title =        "Parallel Distributed Processing: Explorations in the
                 Microstructure of Cognition",
  volume =       "1",
  publisher =    "MIT Press",
  address =      "Cambridge",
  year =         "1986",
}

@Article{Rumelhart86b,
  author =       "David E. Rumelhart and Geoffrey E. Hinton and Ronald J. Williams",
  title =        "Learning Representations by Back-Propagating Errors",
  journal =      "Nature",
  volume =       "323",
  pages =        "533--536",
  year =         "1986",
}

@InCollection{Rumelhart86c,
  author =       "D. E. Rumelhart and G. E. Hinton and R. J. Williams",
  editor =       "D. E. Rumelhart and J. L. McClelland",
  booktitle =    pdp,
  title =        "Learning Internal Representations by Error
                 Propagation",
  chapter =      "8",
  volume =       "1",
  publisher =    "MIT Press",
  address =      "Cambridge",
  pages =        "318--362",
  year =         "1986",
}

@InProceedings{Russ+Geoff-nips-2007,
  author =       "Ruslan Salakhutdinov and Geoffrey E. Hinton",
  editor =       NIPS20ed,
  booktitle =    NIPS20,
  title =        "Using Deep Belief Nets to Learn Covariance Kernels for
                 {Gaussian} Processes",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  pages =        "1249--1256",
  year =         "2008",
}
  %url =          "http://www.csri.utoronto.ca/~hinton/absps/dbngp.pdf",

@InProceedings{Russ+Geoff-nips-2007-small,
  author =       "Ruslan Salakhutdinov and Geoffrey E. Hinton",
  booktitle =    "NIPS 20",
  title =        "Using {D}eep {B}elief {N}ets to Learn Covariance Kernels for
                 {G}aussian Processes",
  year =         "2008",
}

@InProceedings{Russ+Geoff-nips-2007-short,
  author =       "R. Salakhutdinov and G.E. Hinton",
  booktitle =    "Adv. Neural Inf. Proc. Sys. 20",
  title =        "Using {D}eep {B}elief {N}ets to Learn Covariance Kernels for
                 {G}aussian Processes",
  pages = {1249--1256},
  year =         "2008",
}

@article{rust:2005,
    author      = {Nicole Rust and Odelia Schwartz and J. Anthony Movshon and Eero Simoncelli},
    title       = {Spatiotemporal Elements of Macaque {V1} Receptive Fields},
    journal     = {Neuron},
    volume      = {46},
    number      = {6},
    pages       = {945-956},
    year        = {2005}
}
@article{rust:2006,
    author = {Nicole C. Rust and Valerio Mante and Eero P. Simoncelli and J.
        Anthony Movshon},
    year = {2006},
    title = {How MT Cells Analyze the Motion of Visual Patterns},
    journal = {Nature Neuroscience},
    volume = {9},
    number = {11},
    pages = {1421-1431},
}

@Article{RYsed98,
  author =       "Eric Sven Ristad and Peter N. Yianilos",
  title =        "Learning String Edit Distance",
  journal =      "IEEE Transactions on Pattern Recognition and Machine
                 Intelligence",
  month =        may,
  year =         "1998",
}

@Book{Saad-1996,
  author =       "Y. Saad",
  title =        "Iterative Methods for Sparse Linear Systems",
  publisher =    "{PWS} Publishing Company",
  address =      "Boston, MA",
  year =         "1996",
}

@TechReport{Saad90a,
  author =       "D. Saad and E. Marom",
  title =        "Learning by Choice of Internal Representations --- An
                 Energy Minimization Approach",
  type =         "Preprint",
  institution =  "Faculty of Engineering, Tel Aviv University",
  address =      "Ramat-Aviv, Israel",
  year =         "1990",
}

@TechReport{Saad90b,
  author =       "D. Saad and E. Marom",
  title =        "Training Feed Forward Nets with Binary Weights via a
                 Modified {CHIR} Algorithm",
  type =         "Preprint",
  institution =  "Faculty of Engineering, Tel Aviv University",
  address =      "Ramat-Aviv, Israel",
  year =         "1990",
}

@Book{SaadOnlineLearning1999,
  editor =       "David Saad",
  title =        "On-Line Learning in Neural Networks",
  publisher =    "Cambridge University Press",
  year =         "1999",
}

@Article{Sachs+Young80,
  author =       "M. B. Sachs and E. D. Young",
  title =        "Effects of nonlinearities on speech encoding in the
                 auditory nerve",
  journal =      jasa,
  volume =       "68",
  number =       "3",
  pages =        "858--875",
  year =         "1980",
}

@Article{Sakoe78,
  author =       "H. Sakoe and C. Chiba",
  title =        "Dynamic Programming Algorithm Optimization for Spoken
                 Word Recognition",
  journal =      ieeetassp,
  volume =       "26",
  number =       "1",
  pages =        "43--49",
  month =        feb,
  year =         "1978",
}

@InProceedings{Salakhutdinov-2010,
    author = {Ruslan Salakhutdinov},
     title = {Learning in {M}arkov Random Fields using Tempered Transitions},
      year = {2010},
  crossref = {NIPS22}
}

@InProceedings{Salakhutdinov+Hinton2007,
  author =       "Ruslan Salakhutdinov and Geoffrey E. Hinton",
  booktitle =    "Proceedings of the 2007 Workshop on Information
                 Retrieval and applications of Graphical Models (SIGIR
                 2007)",
  title =        "Semantic Hashing",
  year =         "2007",
  publisher  =   "Elsevier",
  address = {Amsterdam},
}

@InProceedings{Salakhutdinov+Hinton2007-small,
  author =       "Ruslan Salakhutdinov and Geoffrey E. Hinton",
  booktitle =    "SIGIR",
  title =        "Semantic Hashing",
  year =         "2007",
}

@InProceedings{SalakhutdinovR2007,
  author =       "Ruslan Salakhutdinov and Geoffrey E. Hinton",
  booktitle =    aistats07,
  title =        "Learning a Nonlinear Embedding by Preserving Class
                 Neighbourhood Structure",
  publisher =    "Omnipress",
  date =         "March 21-24, 2007",
  address =      "San Juan, Porto Rico",
  year =         "2007",
}

@InProceedings{SalakhutdinovR2007-small,
  author =       "Ruslan Salakhutdinov and Geoffrey E. Hinton",
  booktitle =    aistats07-small,
  title =        "Learning a Nonlinear Embedding by Preserving Class
                 Neighbourhood Structure",
  year =         "2007",
}

@InProceedings{SalakhutdinovR2007-short,
  author =       "R. Salakhutdinov and G.E. Hinton",
  booktitle =    {AI \& Stat.'2007},
  title =        "Learning a Nonlinear Embedding by Preserving Class
                 Neighbourhood Structure",
  year =         "2007",
}

@InProceedings{SalakhutdinovR2007b,
  author =       "Ruslan Salakhutdinov and Andriy Mnih and Geoffrey E.
                 Hinton",
  booktitle =    ICML07,
  editor =       ICML07ed,
  publisher =    ICML07publ,
  title =        "Restricted {Boltzmann} machines for collaborative
                 filtering",
  address =      "New York, NY, USA",
  pages =        "791--798",
  year =         "2007",
  location =     "Corvalis, Oregon",
}

@InProceedings{SalakhutdinovR2007b-small,
  author =       "Ruslan Salakhutdinov and Andriy Mnih and Geoffrey E. Hinton",
  booktitle =    "ICML 2007",
  title =        "Restricted {Boltzmann} machines for collaborative
                 filtering",
  year =         "2007",
}

@InProceedings{SalakhutdinovR2007b-short,
  author =       "R. Salakhutdinov and A. Mnih and G.E. Hinton",
  booktitle =    "Int. Conf. Mach. Learn. 2007",
  title =        "Restricted {Boltzmann} machines for collaborative
                 filtering",
  pages =        "791--798",
  year =         "2007",
}


@InProceedings{Salakhutdinov+Murray-2008,
    title =     "On the Quantitative Analysis of Deep Belief Networks",
    author =    "Ruslan Salakhutdinov and Iain Murray",
    booktitle = ICML08,
    editor =    ICML08ed,
    publisher = ICML08publ,
    pages =     "872--879",
    year =      "2008",
    volume =    "25",
}

@InProceedings{Salakhutdinov+Hinton-2009,
  author =       "Ruslan Salakhutdinov and Geoffrey E. Hinton",
  booktitle =    aistats09,
  title =        "Deep {Boltzmann} Machines",
  year =         "2009",
  volume =       5,
  location =     "Clearwater (Florida), USA",
  date =         "April 16-18, 2009",
  pages =        "448--455",
}

@Article{Salamon88,
  author =       "P. Salamon and J. D. Nulton and J. Robinson and J.
                 Petersen and G. Ruppeiner and L. Liao",
  title =        "Simulated Annealing with Constant Thermodynamic
                 Speed",
  journal =      cpc,
  volume =       "49",
  pages =        "423--428",
  year =         "1988",
}

@Article{Salton+Buckley88,
  author =       "G. Salton and C. Buckley",
  title =        "Term weighting approaches in automatic text
                 retrieval",
  journal =      "Information Processing and Management",
  volume =       "24",
  number =       "5",
  pages =        "513--523",
  year =         "1988",
}

@Article{Sanger89a,
  author =       "T. D. Sanger",
  title =        "Optimal Unsupervised Learning in a Single-Layer Linear
                 Feedforward Neural Network",
  journal =      nn,
  volume =       "2",
  pages =        "459--473",
  year =         "1989",
}

@InProceedings{Sanger89b,
  author =       "T. D. Sanger",
  editor =       NIPS1ed,
  booktitle =    NIPS1,
  title =        "An Optimality Principle for Unsupervised Learning",
  publisher =    "Morgan Kaufmann, San Mateo",
  address =      "Denver, CO",
  pages =        "11--19",
  year =         "1989",
}

@article{Sanger-1994,
 author = {Terence D. Sanger},
 title = {Neural network learning control of robot manipulators 
      using gradually increasing task difficulty},
 journal = {{IEEE} Transactions on Robotics and Automation},
 volume = 10,
 number = 3,
 year = 1994,
}

@article{Sanger-1994-small,
 author = {Terence D. Sanger},
 title = {Neural network learning control of robot manipulators 
      using gradually increasing task difficulty},
 journal = {{IEEE} Trans. on Robotics and Automation},
 volume = 10,
 number = 3,
 year = 1994,
}

@InProceedings{sarawagi03,
  author =       "Sunita Sarawagi and Soumen Chakrabarti and Shantanu
                 Godbole",
  booktitle =    "KDD '03: Proceedings of the ninth ACM SIGKDD
                 international conference on Knowledge discovery and
                 data mining",
  title =        "Cross-training: learning probabilistic mappings
                 between topics",
  publisher =    "ACM Press",
  address =      "New York, NY, USA",
  pages =        "177--186",
  year =         "2003",
  location =     "Washington, D.C.",
}

@article{Sarkar-Moore-2005,
 author = {P. Sarkar and A. Moore},
 title = {Dynamic social network analysis using latent space models},
 journal = {{SIGKDD} Explorations},
 volume = 7,
 number = 2,
 pages = {31--40},
 year = 2005,
}

@Article{Sato90,
  author =       "M. Sato",
  title =        "A Real Time Learning Algorithm for Recurrent Analog
                 Neural Networks",
  journal =      biocyb,
  volume =       "62",
  pages =        "237--241",
  year =         "1990",
}

@Article{Saul+96,
  author =       "Lawrence K. Saul and Tommi Jaakkola and Michael I. Jordan",
  title =        "Mean field theory for sigmoid belief networks",
  journal =      "Journal of Artificial Intelligence Research",
  volume =       "4",
  pages =        "61--76",
  year =         "1996",
}

@Article{Saul+Roweis-2002,
  author =       "L. Saul and S. Roweis",
  title =        "Think globally, fit locally: unsupervised learning of
                 low dimensional manifolds",
  journal =      jmlr,
  volume =       "4",
  number =       "",
  pages =        "119--155",
  month =        "",
  year =         "2002",
}

@InProceedings{Saul95,
  author =       "Lawrence K. Saul and Michael I. Jordan",
  editor =       NIPS7ed,
  booktitle =    NIPS7,
  title =        {Boltzmann Chains and Hidden Markov Models},
  publisher =    "MIT Press, Cambridge, MA",
  pages =        "435--442",
  year =         "1995",
}

@InProceedings{Saul96,
  author =       "Lawrence K. Saul and Michael I. Jordan",
  editor =       NIPS8ed,
  booktitle =    NIPS8,
  title =        "Exploiting tractable substructures in intractable
                 networks",
  publisher =    "MIT Press, Cambridge, MA",
  year =         "1996",
}

@InProceedings{SaulJordan97,
  author =       "Lawrence K. Saul and Michael I. Jordan",
  editor =       NIPS9ed,
  booktitle =    NIPS9,
  title =        "A variational model for model-based interpolation",
  publisher =    "MIT Press",
  pages =        "375",
  year =         "1997",
}

@Article{Saund-1989,
  author =       "Eric Saund",
  title =        "Dimensionality-reduction using connectionist
                 networks",
  journal =      "{IEEE} Transactions on Pattern Analysis and Machine
                 Intelligence",
  volume =       "11",
  number =       "3",
  pages =        "304--314",
  year =         "1989",
}

@InCollection{Scalettar88,
  author =       "R. Scalettar and A. Zee",
  editor =       "D. Waltz and J. A. Feldman",
  booktitle =    "Connectionist Models and Their Implications: Readings
                 from Cognitive Science",
  title =        "Emergence of Grandmother Memory in Feed Forward
                 Networks: Learning with Noise and Forgetfulness",
  publisher =    "Ablex",
  address =      "Norwood",
  pages =        "309--332",
  year =         "1988",
}

@Article{schapire-90,
  author =       "Robert E. Schapire",
  title =        "The strength of weak learnability",
  journal =      "Machine Learning",
  volume =       "5",
  number =       "2",
  pages =        "197--227",
  year =         "1990",
}

@Article{Schapire-margin98,
  author =       "Robert E. Schapire and Yoav Freund and Peter Bartlett
                 and Wee Sun Lee",
  title =        "Boosting the margin: {A} new explanation for the
                 effectiveness of voting methods",
  journal =      "The Annals of Statistics",
  volume =       "26",
  number =       "5",
  pages =        "1651--1686",
  year =         "1998",
}

@InProceedings{schapire99theoretical,
  author =       "Robert E. Schapire",
  booktitle =    "Algorithmic Learning Theory, 10th International
                 Conference, {ALT} '99, Tokyo, Japan, December 1999,
                 Proceedings",
  title =        "Theoretical Views of Boosting and Applications",
  volume =       "1720",
  publisher =    "Springer",
  pages =        "13--25",
  year =         "1999",
  URL =          "http:citeseer.ist.psu.edu/article/schapire99theoretical.html",
}

@InProceedings{SchapireSinger98,
  author =       "R. E. Schapire and Y. Singer",
  booktitle =    "Proceedings of the 11th Annual Conference on
                 Computational Learning Theory",
  title =        "Improved Boosting Algorithms Using Confidence Rated
                 Predictions",
  year =         "1998",
}

@Book{SchBurSmo99,
  author =       "B. {Sch\"olkopf} and C. J. C. Burges and A. J. Smola",
  title =        "Advances in Kernel Methods --- Support Vector
                 Learning",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  year =         "1999",
}

@InProceedings{ScheinA2001,
  author =       "Andrew I. Schein and Alexandrin Popescul and Lyle H.
                 Ungar and David M. Pennock",
  booktitle =    "Workshop on Recommender Systems at SIGIR",
  title =        "Generative Models for Cold-Start Recommendations",
  year =         "2001",
}

@InProceedings{ScheinA2002,
  author =       "Andrew I. Schein and Alexandrin Popescul and Lyle H.
                 Ungar and David M. Pennock",
  booktitle =    "SIGIR '02",
  title =        "Methods and metrics for cold-start recommendations",
  publisher =    "ACM Press",
  address =      "New York, NY, USA",
  pages =        "253--260",
  year =         "2002",
}

@InCollection{Scheines94,
  author =       "R. Scheines",
  editor =       "P. Cheeseman and R. W. Oldford",
  booktitle =    "Selecting Models from Data: Artificial Intelligence
                 and Statistics {IV}",
  title =        "Inferring causal structure among unmeasured
                 variables",
  publisher =    "Springer-Verlag",
  pages =        "197--204",
  year =         "1994",
}

@InProceedings{Schenkel93,
  author =       "M. Schenkel and H. Weissman and I. Guyon and C. Nohl
                 and D. Henderson",
  editor =       NIPS5ed,
  booktitle =    NIPS5,
  title =        "Recognition-Based Segmentation of On-Line Hand-Printed
                 Words",
  address =      "Denver, CO",
  pages =        "723--730",
  year =         "1993",
}

@Article{schenkel95,
  author =       "M. Schenkel and I. Guyon and D. Henderson",
  title =        "On-line Cursive Script Recognition using Time Delay
                 Neural Networks and Hidden {Markov} Models",
  journal =      "{Machine} {Vision} and {Applications}",
  publisher =    "Springer Verlag",
  pages =        "215--223",
  year =         "1995",
}

@InProceedings{SchGra03,
  author =       "Nicol N. Schraudolph and Thore Graepel",
  editor =       "Christopher M. Bishop and Brendan J. Frey",
  booktitle =    "Proc.\ 9th Intl.\ Workshop Artificial Intelligence and
                 Statistics (AIstats)",
  title =        "Combining Conjugate Direction Methods with Stochastic
                 Approximation of Gradients",
  publisher =    "Society for Artificial Intelligence and Statistics",
  address =      "Key West, Florida",
  pages =        "7--13",
  year =         "2003",
  ISBN =         "0-9727358-0-1",
  abstract =     "The method of conjugate directions provides a very
                 effective way to optimize large, deterministic systems
                 by gradient descent. In its standard form, however, it
                 is not amenable to stochastic approximation of the
                 gradient. Here we explore ideas from conjugate gradient
                 in the stochastic (online) setting, using fast
                 Hessian-gradient products to set up low-dimensional
                 Krylov subspaces within individual mini-batches. In our
                 benchmark experiments the resulting online learning
                 algorithms converge orders of magnitude faster than
                 ordinary stochastic gradient descent.",
}

@Article{Schmidhuber92,
  author =       "J{\"u}rgen Schmidhuber",
  title =        "Learning Complex, Extended Sequences using the
                 Principle of History Compression",
  journal =      nc,
  volume =       "4",
  number =       "2",
  pages =        "234--242",
  year =         "1992",
}

@Article{Schmidhuber96,
  author =       "J{\"u}rgen Schmidhuber",
  title =        "Sequential Neural Text Compression",
  journal =      "IEEE Transactions on Neural Networks",
  volume =       "7",
  number =       "1",
  pages =        "142--146",
  year =         "1996",
}

@InCollection{Schmidt-2006,
  author =       "Volker Schmidt",
  booktitle =    "Lecture Notes, Summer 2006",
  title =        {Markov Chains and Monte-Carlo Simulation},
  address =      "Ulm University, Department of Stochastics",
  year =         "2006",
  URL =          "http://www.mathematik.uni-ulm.de/stochastik/lehre/ss06/markov/skript-engl/skript-engl.htm",
}

@Article{Schmitt-2002,
  author =       "M. Schmitt",
  title =        "Descartes' Rule of Signs for Radial Basis Function
                 Neural Networks",
  journal =      "Neural Computation",
  volume =       "14",
  number =       "12",
  pages =        "2997--3011",
  year =         "2002",
}

@Article{Schneider-2001,
  author =       "Tapio Schneider",
  title =        "Analysis of Incomplete Climate Data: Estimation of
                 Mean Values and Covariance Matrices and Imputation of
                 Missing Values",
  journal =      "Journal of Climate",
  volume =       "14",
  pages =        "853--871",
  year =         "2001",
}

@article{Schneidman+al-2003,
    address = {Department of Molecular Biology, Princeton University, Princeton, New Jersey 08544, USA.},
    author = {Schneidman, E.  and Bialek, W.  and Berry, M. J. },
    issn = {1529-2401},
    journal = {Journal of Neuroscience},
    month = {December},
    number = {37},
    pages = {11539--11553},
    title = {Synergy, redundancy, and independence in population codes},
    url = {http://www.jneurosci.org/cgi/content/abstract/23/37/11539},
    volume = {23},
    year = {2003}
}
    

@Article{schoelkopf97comparing,
  author =       "B. Sch{\"o}lkopf and K. Sung and C. Burges and F.
                 Girosi and P. Niyogi and T. Poggio and V. Vapnik",
  title =        "Comparing support vector machines with {G}aussian
                 kernels to radial basis function classifiers",
  journal =      "IEEE Transactions on Signal Processing",
  volume =       "45",
  pages =        "2758--2765",
  year =         "1997",
  text =         "Sch{\"o}lkopf, B., Sung, K., Burges, C., Girosi, F.,
                 Niyogi, P., Poggio, T., and Vapnik, V.: Comparing
                 support vector machines with {G}aussian kernels to radial
                 basis function classifiers. IEEE Transactions on Signal
                 Processing, 45 (1997) 2758-2765.",
}

@Book{Scholkopf02-book,
  author =       "B. Sch{\"o}lkopf and A. J. Smola",
  title =        "Learning with Kernels: Support Vector Machines,
                 Regularization, Optimization and Beyond",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  year =         "2002",
}

@TechReport{Scholkopf96,
  author =       "B. Sch{\"o}lkopf and A. Smola and K.-R. M{\"u}ller",
  title =        "Nonlinear Component Analysis as a Kernel Eigenvalue
                 Problem",
  number =       "44",
  institution =  "Max Planck Institute for Biological Cybernetics,
                 Tübingen, Germany",
  year =         "1996",
}

@Article{Scholkopf98,
  author =       "B. Sch{\"o}lkopf and A. Smola and K.-R. M{\"u}ller",
  title =        "Nonlinear component analysis as a kernel eigenvalue
                 problem",
  journal =      "Neural Computation",
  volume =       "10",
  pages =        "1299--1319",
  year =         "1998",
}

@Book{Scholkopf98-book,
  author =       "B. Sch{\"o}lkopf and C. J. C. Burges and A. J. Smola",
  title =        "Advances in kernel methods: support vector learning",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  year =         "1998",
}

@Article{Scholkopf99,
  author =       "B. Sch{\"o}lkopf and S. Mika and C. Burges and P.
                 Knirsch and K.-R. M{\"u}ller and G. R{\"a}tsch and A.
                 Smola",
  title =        "Input Space Versus Feature Space in Kernel-Based Methods",
  journal =      "IEEE Trans. Neural Networks",
  volume =       "10",
  number =       "5",
  pages =        "1000--1017",
  year =         "1999",
}

@Article{Schraudolph02,
  author =       "Nicol N. Schraudolph",
  title =        "Fast Curvature Matrix-Vector Products for Second-Order
                 Gradient Descent",
  journal =      "Neural Computation",
  volume =       "14",
  number =       "7",
  pages =        "1723--1738",
  year =         "2002",
}

@InProceedings{Schraudolph99,
  author =       "Nicol N. Schraudolph",
  booktitle =    "Proceedings of the 9th International Conference on
                 Artificial Neural Networks",
  title =        "Local gain adaptation in stochastic gradient descent",
  pages =        "569--574",
  year =         "1999",
}

@InProceedings{Schutze92,
  author =       "Hinrich Sch{\"u}tze",
  booktitle =    "Supercomputing'92",
  title =        "Dimensions of Meaning",
  address =      "Minneapolis MN",
  pages =        "787--796",
  year =         "1992",
}

@InProceedings{Schutze93,
  author =       "H. Schutze",
  editor =       NIPS5ed,
  booktitle =    NIPS5,
  title =        "Word space",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo CA",
  pages =        "895--902",
  year =         "1993",
}

@Misc{Schuurmans1999,
  author =       "Dale Schuurmans",
  title =        "Greedy importance sampling: {A} new Monte Carlo
                 inference method",
  year =         "1999",
  URL =          "citeseer.nj.nec.com/25013.html",
}

@InProceedings{Schuurmans2000,
  author =       "Dale Schuurmans and Finnegan Southey",
  title =        "Monte Carlo inference via greedy importance sampling",
  pages =        "523--532",
  year =         "2000",
  URL =          "citeseer.nj.nec.com/281712.html",
}

@Article{Schuurmans2001,
  author =       "D. Schuurmans and F. Southey",
  title =        "Metric-based methods for adaptive model selection and
                 regularization",
  journal =      "Machine Learning",
  volume =       "48",
  number =       "1",
  pages =        "51--84",
  year =         "2002",
}

@InProceedings{Schuurmans97,
  author =       "D. Schuurmans",
  booktitle =    "Proceedings of the National Conference on Artificial
                 Intelligence (AAAI-97)",
  title =        "A new metric-based approach to model selection",
  pages =        "552--558",
  year =         "1997",
}

@Article{Schwartz90,
  author =       "D. B. Schwartz and V. K. Samalam and S. A. Solla and
                 J. S. Denker",
  title =        "Exhaustive Learning",
  journal =      nc,
  volume =       "2",
  pages =        "371--382",
  year =         "1990",
}

@Article{Schwenk+Bengio00,
  author =       "Holger Schwenk and Yoshua Bengio",
  title =        "Boosting Neural Networks",
  journal =      "Neural Computation",
  volume =       "12",
  number =       "8",
  pages =        "1869--1887",
  year =         "2000",
}

@InProceedings{Schwenk+Gauvain-2005,
  author =       "Holger Schwenk and Jean-Luc Gauvain",
  booktitle =    "Interspeech",
  title =        "Building continuous space language models for
                 transcribing European languages",
  pages =        "737--740",
  year =         "2005",
}

@InProceedings{Schwenk+Gauvain2002,
  author =       "H. Schwenk and J-L. Gauvain",
  booktitle =    icassp,
  title =        "Connectionist Language Modeling for Large Vocabulary
                 Continuous Speech Recognition",
  address =      "Orlando, Florida",
  pages =        "765--768",
  year =         "2002",
}

@InProceedings{Schwenk+Gauvain2002-short,
  author =       "H. Schwenk and J-L. Gauvain",
  booktitle =    {Int. Conf. Acoust. Speech \& Sig. Proc.},
  title =        "Connectionist Language Modeling for Large Vocabulary
                 Continuous Speech Recognition",
  address =      "Orlando, Florida",
  pages =        "765--768",
  year =         "2002",
}

@InProceedings{Schwenk05C,
  author =       "Holger Schwenk and Jean-Luc Gauvain",
  booktitle =    "Joint Human Language Technology Conference and
                 Conference on Empirical Methods in Natural Language
                 Processing (EMNLP)",
  title =        "Training Neural Network Language Models On Very Large
                 Corpora",
  address =      "Vancouver",
  pages =        "201--208",
  month =        oct,
  year =         "2005",
  URL =          "ftp://tlp.limsi.fr/public/emnlp05.pdf",
}

@InProceedings{Schwenk05C-small,
  author =       "Holger Schwenk and Jean-Luc Gauvain",
  booktitle =    "EMNLP'2005",
  title =        "Training Neural Network Language Models On Very Large
                 Corpora",
  pages =        "201--208",
  year =         "2005",
}

@TechReport{Schwenk:2001:tr,
  author =       "Holger Schwenk",
  title =        "Language Modeling in the Continuous Domain",
  number =       "2001-20",
  institution =  "LIMSI-CNRS, Orsay, France",
  year =         "2001",
  date =         "dec 2001",
}

@InProceedings{Schwenk:2002:icassp,
  author =       "Holger Schwenk and Jean-Luc Gauvain",
  booktitle =    icassp,
  title =        "Connectionist Language Modeling for Large Vocabulary
                 Continuous Speech Recognition",
  volume =       "1",
  pages =        "765--768",
  year =         "2002",
}

@InProceedings{Schwenk:2003:sspr,
  author =       "Holger Schwenk and Jean-Luc Gauvain",
  booktitle =    "ISCA \& IEEE Workshop on Spontaneous Speech Processing
                 and Recognition",
  title =        "{Using Continuous Space Language Models for
                 Conversational Speech Recognition}",
  address =      "Tokyo",
  month =        apr,
  year =         "2003",
}

@InProceedings{Schwenk:2004:icslp,
  author =       "Holger Schwenk and Jean-Luc Gauvain",
  booktitle =    icslp,
  title =        "Using a Continuous Space Language Model for
                 Conversational Speech Recognition",
  year =         "2004",
  note =         "submitted",
}

@InProceedings{Schwenk:2004:ijcnn,
  author =       "Holger Schwenk",
  booktitle =    ijcnn,
  title =        "Efficient Training of Large Neural Networks for
                 Language Modeling",
  volume =       "4",
  pages =        "3050--3064",
  year =         "2004",
}

@InProceedings{SchYuGue07,
  author =       "Nicol N. Schraudolph and Jin Yu and Simon G{\"u}nter",
  booktitle =    "Proc.\ 11th Intl.\ Conf.\ Artificial Intelligence and
                 Statistics (AIstats)",
  title =        "A Stochastic Quasi-{Newton} Method for Online Convex
                 Optimization",
  publisher =    "Society for Artificial Intelligence and Statistics",
  address =      "San Juan, Puerto Rico",
  pages =        "433--440",
  year =         "2007",
  ISBN =         "0-9727358-2-8",
}

@InProceedings{Scofield88,
  author =       "C. L. Scofield",
  booktitle =    icnn,
  title =        "Learning Internal Representations in the Coulomb
                 Energy Network",
  volume =       "1",
  publisher =    "IEEE, New York",
  address =      "San Diego 1988",
  pages =        "271--276",
  year =         "1988",
}

@InProceedings{Scott+al-2003,
  author =       "Scott S. L. Piao and Paul Rayson and Dawn Archer and
                 Andrew Wilson and Tony McEnery",
  booktitle =    "Proceedings of the ACL 2003 workshop on Multiword
                 expressions",
  title =        "Extracting multiword expressions with a semantic
                 tagger",
  publisher =    "Association for Computational Linguistics",
  address =      "Morristown, NJ, USA",
  pages =        "49--56",
  year =         "2003",
}

@Book{Scott92,
  author =       "D. W. Scott",
  title =        "Multivariate Density Estimation: Theory, Practice, and
                 Visualization",
  publisher =    "Wiley",
  address =      "New York",
  year =         "1992",
}

@Article{ScST95,
  author =       "A. Schaerf and S. Yoav and M. Tennenholtz",
  title =        "Adaptive load balancing: a study in multi-agent
                 learning",
  journal =      "Journal of Artificial Intelligence Research",
  volume =       "2",
  pages =        "475--500",
  year =         "1995",
}

@Article{Scudder65,
  author = 	 "{Henry J. Scudder, III}",
  title = 	 {Probability of Error of Some Adaptive Pattern-Recognition Machines},
  journal = 	 {IEEE Transactions on Information Theory},
  year = 	 1965,
  volume =	 11,
  pages =	 {363-371}
}

@TechReport{Seeger-2005,
  author =       "Matthias Seeger",
  title =        "Low Rank Updates for the {Cholesky} Decomposition",
  institution =  "Department of EECS, University of California at
                 Berkeley",
  year =         "2005",
}

@InProceedings{Seeger-Williams-Lawrence-2003,
  author =       "M. Seeger and C. Williams and N. Lawrence",
  booktitle =    "Workshop on AI and Statistics",
  title =        "Fast Forward Selection to Speed Up Sparse {G}aussian
                 Process Regression",
  volume =       "9",
  year =         "2003",
}

@TechReport{Seeger2001,
  author =       "M. Seeger",
  title =        "Learning with labeled and unlabeled data",
  institution =  "Edinburgh University",
  year =         "2001",
}

@InProceedings{seidl91p1,
  author =       "D. R. Seidl and D. Lorenz",
  booktitle =    ijcnn,
  title =        "A structure by which a recurrent neural network can
                 approximate a nonlinear dynamic system",
  volume =       "2",
  pages =        "709--714",
  month =        jul,
  year =         "1991",
}

@TechReport{Sejnowski+Rosenberg86,
  author =       "T. J. Sejnowski and C. R. Rosenberg",
  key =          "Sejnowski",
  title =        "{\em NETtalk: A parallel network that learns to read
                 aloud}",
  type =         "Technical Report 86-01",
  institution =  "Department of Electrical Engineering and Computer
                 Science, Johns Hopkins University, Baltimore, MD.",
  year =         "1986",
}

@Article{Sejnowski86,
  author =       "T. J. Sejnowski and P. K. Kienker and G. Hinton",
  title =        "Learning Symmetry Groups with Hidden Units: Beyond the
                 Perceptron",
  journal =      physicaD,
  volume =       "22",
  pages =        "260--275",
  year =         "1986",
}

@Article{Sejnowski87,
  author =       "T. J. Sejnowski and C. R. Rosenberg",
  title =        "Parallel Networks that Learn to Pronounce English
                 Text",
  journal =      cs,
  volume =       "1",
  pages =        "145--168",
  year =         "1987",
}

@InProceedings{Seneff84,
  author =       "S. Seneff",
  booktitle =    icassp,
  title =        "Pitch and spectral estimation of speech based on an
                 auditory synchrony model",
  pages =        "",
  year =         "1984",
}

@TechReport{Seneff85,
  author =       "S. Seneff",
  title =        "Pitch and spectral estimation of speech based on an
                 auditory synchrony model",
  number =       "RLE Technical report no. 504",
  institution =  "LRE",
  address =      "Cambridge, MA: MIT Press",
  year =         "1985",
}

@InProceedings{Seneff86,
  author =       "S. Seneff",
  booktitle =    icassp,
  title =        "A computational model for the peripheral auditory
                 system: application to speech recognition research",
  pages =        "1983--1986",
  year =         "1986",
}

@Article{Seneff88,
  author =       "S. Seneff",
  title =        "A joint synchrony/mean-rate model of auditory speech
                 processing",
  journal =      "Journal of Phonetics",
  volume =       "16",
  pages =        "55--76",
  year =         "1988",
}

@Book{Seneta-81,
  author =       "E. Seneta",
  title =        "Nonnegative Matrices and {Markov} Chains",
  publisher =    "Springer",
  address =      "New York",
  year =         "1981",
}

@Article{senseval-2000,
  author =       "Adam Kilgarrif and Joseph Rosenzweig",
  title =        "Framework and results for English {SENSEVAL}",
  journal =      "Computers and the Humanities: special issue on
                 {SENSEVAL}",
  volume =       "34",
  pages =        "15--48",
  year =         "2000",
}

@Article{Serbedzija-1996,
  author =       "Nikola B. {\v{S}}erbed{\v{z}}ija",
  title =        "Simulating Artificial Neural Networks on Parallel
                 Architectures",
  journal =      "Computer",
  volume =       "29",
  number =       "3",
  publisher =    "IEEE Computer Society Press",
  address =      "Los Alamitos, CA, USA",
  pages =        "56--63",
  year =         "1996",
  ISSN =         "0018-9162",
  doi =          "http://dx.doi.org/10.1109/2.485893",
}

@Article{Serre2007,
  author =       "T. Serre and G. Kreiman and M. Kouh and C. Cadieu and
                 U. Knoblich and T. Poggio",
  title =        "A quantitative theory of immediate visual
                 recognition",
  journal =      "Progress in Brain Research, Computational
                 Neuroscience: Theoretical Insights into Brain
                 Function",
  volume =       "165",
  pages =        "33--56",
  year =         "2007",
}

@Article{Serre2007-small,
  author =       "T. Serre and G. Kreiman and M. Kouh and C. Cadieu and
                 U. Knoblich and T. Poggio",
  title =        "A quantitative theory of immediate visual
                 recognition",
  journal =      "Progress in Brain Res., Comput.
                 Neurosc.",
  volume =       "165",
  pages =        "33--56",
  year =         "2007",
}

@article{Serre-Wolf-2007,
  author = {Thomas Serre and Lior Wolf and Stanley Bileschi and Maximilian Riesenhuber},
  note = {Member-Poggio, Tomaso},
  title = {Robust Object Recognition with Cortex-Like Mechanisms},
  journal = {IEEE Trans. Pattern Anal. Mach. Intell.},
  volume = {29},
  number = {3},
  year = {2007},
  issn = {0162-8828},
  pages = {411--426},
  doi = {http://dx.doi.org/10.1109/TPAMI.2007.56},
  publisher = {IEEE Computer Society},
  address = {Washington, DC, USA},
}


@INPROCEEDINGS{SeungS1998,
    author = {Sebastian H. Seung},
    title = {Learning continuous attractors in recurrent networks},
    editor =       NIPS10ed,
    booktitle =    NIPS10,
    year = {1998},
    pages = {654--660},
    publisher = {MIT Press}
}

@INPROCEEDINGS{Jain-Seung-08,
    author = {Viren Jain and Sebastian H. Seung},
    title = {Natural Image Denoising with Convolutional Networks},
    editor =       NIPS21ed,
    booktitle =    NIPS21,
    year = {2008},
}

@inproceedings{Sha+Saul-2005,
    author = {Fei Sha and Lawrence K. Saul},
    title = {Analysis and extension of spectral methods for nonlinear dimensionality reduction},
    booktitle = {Proceedings of the 22nd International Conference on Machine Learning},
    year = {2005},
    isbn = {1-59593-180-5},
    pages = {784--791},
    location = {Bonn, Germany},
    doi = {http://doi.acm.org/10.1145/1102351.1102450},
    publisher = {ACM},
    address = {New York, NY},
}

@article{Shannon-1949,
    Author = {C. E. Shannon},
    Title = {Communication in the presence of noise},
    Journal = {{Proceedings of the Institute of Radio Engineers}},
    Volume = {37},
    number = 1,
    Pages = {10--21},
    Year = {1949}
}

@Article{shapiro00lift,
  author =       "Gregory Piatetsky-Shapiro and Sam Steingold",
  title =        "Measuring lift quality in database marketing",
  journal =      "SIGKDD Explor. Newsl.",
  volume =       "2",
  number =       "2",
  publisher =    "ACM Press",
  address =      "New York, NY, USA",
  pages =        "76--80",
  year =         "2000",
  ISSN =         "1931-0145",
}

@InProceedings{shardanand95,
  author =       "Upendra Shardanand and Pattie Maes",
  booktitle =    "CHI '95: Proceedings of the SIGCHI conference on Human
                 factors in computing systems",
  title =        "{Social information filtering: algorithms for
                 automating ``word of mouth''}",
  publisher =    "ACM Press/Addison-Wesley Publishing Co.",
  pages =        "210--217",
  year =         "1995",
  location =     "Denver, Colorado, United States",
}

@article{Sharma-2000,
    title = {Induction of Visual Orientation Modules in Auditory Cortex},
    author = {J. Sharma and A. Angelucci and M. Sur},
    journal = {Nature},
    pages = {841--847},
    volume = {404},
    year = {2000},
} 

@Article{Sharpe-64,
  author =       "W. F. Sharpe",
  title =        "Capital Asset Prices: {A} Theory of Market Equilibrium
                 under Conditions of Risk",
  journal =      "Journal of Finance",
  volume =       "19",
  pages =        "425--442",
  year =         "1964",
}

@Article{Sharpe-66,
  author =       "W. F. Sharpe",
  title =        "Mutual Fund Performance",
  journal =      "Journal of Business",
  volume =       "39",
  number =       "1",
  pages =        "119--138",
  year =         "1966",
}

@InProceedings{Shaw+Jebara-2007,
  author =       "Blake Shaw and Tony Jebara",
  booktitle =    aistats07,
  title =        "Minimum Volume Embedding",
  publisher =    "Omnipress",
  date =         "March 21-24, 2007",
  address =      "San Juan, Porto Rico",
  year =         "2007",
}

@InProceedings{Shawe-Taylor+Cristianini+Kandola-2002,
  author =       "J. Shawe-Taylor and N. Cristianini and J. Kandola",
  editor =       NIPS14ed,
  booktitle =    NIPS14,
  title =        "On the concentration of spectral properties",
  publisher =    "{MIT} Press",
  year =         "2002",
}

@InProceedings{Shawe-Taylor+Williams-2003,
  author =       "J. Shawe-Taylor and C. K. I. Williams",
  editor =       NIPS15ed,
  booktitle =    NIPS15,
  title =        "The Stability of Kernel Principal Components Analysis
                 and its Relation to the Process Eigenspectrum",
  publisher =    "{MIT} Press",
  year =         "2003",
}

@Article{Shawe-Taylor98,
  author =       "John Shawe-Taylor and Peter Bartlett and Robert
                 Williamson and Martin Anthony",
  title =        "Structural Risk Minimization over Data-Dependent
                 Hierarchies",
  journal =      "IEEE Transactions on Information Theory",
  volume =       "44",
  number =       "5",
  pages =        "1926--1940",
  year =         "1998",
}

@Article{Sherrington75,
  author =       "D. Sherrington and S. Kirkpatrick",
  title =        "Solvable Model of a Spin Glass",
  journal =      prl,
  volume =       "35",
  pages =        "1792--1796",
  year =         "1975",
}

@Article{Shi+Malik-2000,
  author =       "Jianbo Shi and Jitendra Malik",
  title =        "Normalized Cuts and Image Segmentation",
  journal =      "IEEE Transactions on Pattern Analysis and Machine
                 Intelligence (PAMI)",
  year =         "2000",
}

@InProceedings{Shi+Malik-97,
  author =       "J. Shi and J. Malik",
  booktitle =    cvpr97,
  title =        "Normalized cuts and image segmentation",
  pages =        "731--737",
  year =         "1997",
}

@InProceedings{Shimohara88,
  author =       "K. Shimohara and T. Uchiyama and Y. Tokunaga",
  booktitle =    icnn,
  title =        "Back-Propagation Networks for Event-Driven Temporal
                 Sequence Processing",
  volume =       "1",
  publisher =    "IEEE, New York",
  address =      "San Diego 1988",
  pages =        "665--672",
  year =         "1988",
}

@InProceedings{Shimohata+al-1997,
  author =       "Sayori Shimohata and Toshiyuki Sugio and Junji
                 Nagata",
  booktitle =    "Proceedings of the 35th Conference of the Association
                 for Computational Linguistics",
  title =        "Retrieving Collocations by Co-occurrences and Word
                 Order Constraints",
  address =      "Madrid",
  pages =        "476--481",
  year =         "1997",
}

@inproceedings{shin:1991,
    title = {The Pi-Sigma Network: An Efficient Higher-Order Neural Network for
        Pattern Classification and Function Approximation},
    author = {Yoan Shin and Joydeep Ghosh},
    crossref = {IJCNN:1991},
}
@proceedings{IJCNN:1991,
    title = {International Joint Conference on Neural Networks ({IJCNN})},
    booktitle = ijcnn,
    year = {1991},
    address = {Seattle, Washington, USA},
}

@article{ShmulevichI2002,
	author = {Ilya Shmulevich and Wei Zhang},
	journal = {Bioinformatics},
	number = {4},
	pages = {555--565},
	title = {Binary analysis and optimization-based normalization of gene expression data},
	volume = {18},
	year = {2002}
}

@Article{short81optimal,
  author =       "R. D. Short and K. Fukunaga",
  title =        "The optimal distance measure for nearest neighbor
                 classification",
  journal =      "IEEE Transactions on Information Theory",
  volume =       "27",
  pages =        "622--627",
  year =         "1981",
}

@InProceedings{ShrikiO2001,
  author =       "Oren Shriki and Haim Sompolinsky and Daniel D. Lee",
  editor =       NIPS13ed,
  booktitle =    NIPS13,
  title =        "An Information Maximization Approach to Overcomplete
                 and Recurrent Representations",
  publisher =    "{MIT} Press",
  pages =        "933--938",
  year =         "2001",
}

@InProceedings{ShrikiO2001-small,
  author =       "Oren Shriki and Haim Sompolinsky and Daniel D. Lee",
  booktitle =    "NIPS 13",
  title =        "An Information Maximization Approach to Overcomplete
                 and Recurrent Representations",
  year =         "2001",
}

@Article{Shumway82,
  author =       "R. H. Shumway and D. S. Stoffer",
  title =        "An approach to time series smoothing and forecasting
                 using the {EM} algorithm",
  journal =      "Journal of Time Series Analysis",
  volume =       "3",
  number =       "4",
  pages =        "253--264",
  year =         "1982",
}

@Article{Shumway91,
  author =       "R. H. Shumway and D. S. Stoffer",
  title =        "Dynamic linear models with switching",
  journal =      "J. Amer. Stat. Assoc.",
  volume =       "86",
  pages =        "763--769",
  year =         "1991",
}

@Article{Sichel91,
  author =       "D. E. Sichel",
  title =        "Business cycle duration dependence: a parametric
                 approach",
  journal =      "Review of Economics and Statistics",
  volume =       "71",
  pages =        "245--260",
  year =         "1991",
}

@TechReport{Siegelmann92,
  author =       "H. T. Siegelmann and E. D. Sontag",
  title =        "Neural Networks with Real Weighs: Analog Computational
                 Complexity",
  number =       "SYCON-92-05",
  institution =  "Rutgers Center for System and Control",
  address =      "New Brunswick, NJ",
  month =        sep,
  year =         "1992",
}

@InProceedings{Sietsma88,
  author =       "J. Sietsma and R. J. F. Dow",
  booktitle =    icnn,
  title =        "Neural Net Pruning---Why and How",
  volume =       "1",
  publisher =    "IEEE, New York",
  address =      "San Diego 1988",
  pages =        "325--333",
  year =         "1988",
}

@InProceedings{silver95,
  author =       "Daniel L. Silver and Robert E. Mercer",
  booktitle =    "Proceedings of the INNS World Congress on Neural
                 Networks",
  title =        "Toward a Model of Consolidation: The Retention and
                 Transfer of Neural Net Task Knowledge",
  volume =       "3",
  address =      "Washington, DC",
  pages =        "164--169",
  month =        jul,
  year =         "1995",
}

@Article{silver96,
  author =       "Daniel L. Silver and Robert E. Mercer",
  title =        "The Parallel Transfer of Task Knowledge Using Dynamic
                 Learning Rates Based on a Measure of Relatedness",
  journal =      "Connection Science, Special issue on Transfer in
                 Inductive Systems",
  volume =       "8",
  number =       "2",
  pages =        "277--294",
  year =         "1996",
}

@TechReport{silver97,
  author =       "Daniel L. Silver and Robert E. Mercer and Gilbert A.
                 Hurwitz",
  title =        "The Functional Transfer of Knowledge for Coronary
                 Artery Disease Diagnosis",
  number =       "513",
  institution =  "Department of Computer Science, University of Western
                 Ontario",
  month =        jan,
  year =         "1997",
}

@InCollection{Silverman-encyc86,
  author =       "B. W. Silverman",
  editor =       "N. L. Johnson and S. Kotz",
  booktitle =    "Encyclopaedia of Statistical Sciences",
  title =        "Penalized Likelihood",
  volume =       "6",
  publisher =    "Wiley, New York",
  pages =        "664--667",
  year =         "1986",
}

@Book{Silverman86,
  author =       "Bernard W. Silverman",
  title =        "Density Estimation for Statistics and Data Analysis",
  publisher =    "Chapman and Hall",
  address =      "London",
  year =         "1986",
}

@InProceedings{Silverman88,
  author =       "R. H. Silverman and A. S. Noetzel",
  editor =      nips87ed,
  booktitle =    nips87,
  title =        "Time-Sequential Self-Organization of Hierarchical
                 Neural Networks",
  publisher =    "American Institute of Physics, New York",
  address =      "Denver, CO",
  pages =        "709--714",
  year =         "1988",
}

@InProceedings{simard-03,
  author =       "D. Simard and P. Y. Steinkraus and J. C. Platt",
  booktitle =    ICDAR03,
  title =        "Best Practices for Convolutional Neural Networks",
  year =         "2003",
  isbn =         {0-7695-1960-1},
  pages =        {958},
  publisher =    {IEEE Computer Society},
  address =      {Washington, DC, USA},
  doi =          "http://doi.ieeecomputersociety.org/10.1109/ICDAR.2003.1227801",
}

@InProceedings{Simard89,
  author =       "P. Y. Simard and M. B. Ottaway and D. H. Ballard",
  editor =       "D. Touretzky and G. Hinton and T. Sejnowski",
  booktitle =    cmss88,
  title =        "Analysis of Recurrent Backpropagation",
  publisher =    "Morgan Kaufmann, San Mateo",
  address =      "Pittsburg 1988",
  pages =        "103--112",
  year =         "1989",
}

@InProceedings{Simard92,
  author =       "Patrice Simard and Bernard Victorri and Yann LeCun
                 and John Denker",
  editor =       NIPS4ed,
  booktitle =    NIPS4,
  title =        "Tangent Prop - {A} formalism for specifying selected
                 invariances in an adaptive network",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo, CA",
  pages =        "895--903",
  year =         "1992",
}

@InProceedings{Simard93,
  author =       "P. Y. Simard and Y. {LeCun} and J. Denker",
  editor =       NIPS5ed,
  booktitle =    NIPS5,
  title =        "Efficient pattern recognition using a new
                 transformation distance",
  publisher =    "Morgan Kaufmann, San Mateo",
  pages =        "50--58",
  year =         "1993",
}

@Article{Simard98,
  author =       "P. Y. Simard and Y. A. {LeCun} and J. S. Denker and B.
                 Victorri",
  title =        "Transformation Invariance in Pattern Recognition ---
                 Tangent Distance and Tangent Propagation",
  journal =      "Lecture Notes in Computer Science",
  volume =       "1524",
  year =         "1998",
  CODEN =        "LNCSD9",
  ISSN =         "0302-9743",
  bibdate =      "Tue Jan 5 08:21:58 1999",
  acknowledgement = ack-nhfb,
  OPTpages =     "239--??",
}

@InProceedings{Simard-nips92,
  author =       "P. Simard and Y. {LeCun}",
  editor =       NIPS4ed,
  booktitle =    NIPS4,
  title =        "Reverse {TDNN}: An Architecture for Trajectory
                 Generation",
  publisher =    "Morgan Kaufmann, San Mateo",
  address =      "Denver, CO",
  pages =        "579--588",
  year =         "1992",
}

@PhdThesis{Simard-PhD,
  author =       "P. Y. Simard",
  title =        "Learning State Space Dynamics in Recurrent Networks",
  school =       "University of Rochester",
  address =      "Rochester, NY",
  year =         "1991",
  note =         "Tech. Rep. 383",
}

@Article{Simic90,
  author =       "P. D. Simic",
  title =        "Statistical Mechanics As the Underlying Theory of
                 ``Elastic'' and ``Neural'' Optimizations",
  journal =      network,
  volume =       "1",
  pages =        "89--103",
  year =         "1990",
}

@article{Simoncelli+al-1992,
    author = "Eero P. Simoncelli and William T. Freeman and Edward H. Adelson and David J. Heeger", 
    title = "Shiftable Multi-scale Transforms", 
    journal = "IEEE Transactions on Informations Theory", 
    volume = "38", 
    number = "2", 
    year = "1992", 
    publisher = "The IEEE Computer Society", 
}

@InProceedings{Simoncelli97,
  author =       "E. P. Simoncelli",
  booktitle =    "Proc. 31st Asilomar Conference on Signals, Systems and
                 Computers",
  title =        "Statistical Models for Images: Compression,
                 Restoration and Synthesis",
  publisher =    "IEEE",
  year =         "1997",
}

@InProceedings{Simoncelli99,
  author =       "E. P. Simoncelli",
  booktitle =    "Proc. SPIE,44th annual meeting",
  title =        "Modeling the Joint Statistics of Images in the Wavelet
                 Domain",
  volume =       "3813",
  publisher =    "SPIE",
  year =         "1999",
}

@Article{Sinex+Geisler83,
  author =       "D. G. Sinex and C. D. Geisler",
  title =        "Response of auditory nerve fibers to consonant-vowel
                 syllables",
  journal =      jasa,
  volume =       "73",
  number =       "2",
  pages =        "602--615",
  year =         "1983",
}

@Article{Singer,
  author =       "A. Singer",
  title =        "Implementations of Artificial Neural Networks on the
                 Connection Machine",
  journal =      "Parallel Computing",
  volume =       "14",
  pages =        "305--315",
  year =         "1990",
  OPTnote =      "",
}

@InProceedings{Singer-1990,
  author =       "Alexander Singer",
  booktitle =    "Proceedings of the International Neural Networks
                 Conference",
  title =        "Exploiting the Inherent Parallelism of Artificial
                 Neural Networks to Achieve 1300 Million Interconnects
                 per Second",
  pages =        "656--660",
  year =         "1990",
}

@InProceedings{singer00leveraged,
  author =       "Y. Singer",
  editor =       NIPS12ed,
  booktitle =    NIPS12,
  title =        "Leveraged vector machines",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  pages =        "610--616",
  year =         "2000",
}

@InProceedings{Singer96,
  author =       "Y. Singer",
  editor =       NIPS8ed,
  booktitle =    NIPS8,
  title =        "Adaptive Mixtures of Probabilistic Transducers",
  publisher =    "MIT Press, Cambridge, MA",
  year =         "1996",
}

@Article{Singer97,
  author =       "Y. Singer",
  title =        "Adaptive Mixtures of Probabilistic Transducers",
  journal =      "Neural Computation",
  volume =       "9",
  number =       "8",
  year =         "1997",
}

@InProceedings{singer:1996:nips,
  author =       "Y. Singer",
  editor =       NIPS8ed,
  booktitle =    NIPS8,
  title =        "Adaptive Mixtures of Probabilistic Transducers",
  publisher =    "MIT Press, Cambridge, MA",
  year =         "1996",
}

@InProceedings{Singh92,
  author =       "S. P. Singh",
  booktitle =    "Proceedings of the 10th National Conference on
                 Artificial Intelligence",
  title =        "Reinforcement learning with a hierarchy of abstract
                 models",
  publisher =    "MIT/AAAI Press",
  pages =        "202--207",
  year =         "1992",
}

@InProceedings{SinkkonenJ2002,
  author =       "Janne Sinkkonen and Samuel Kaski and Janne
                 Nikkil{\"{a}}",
  booktitle =    ECML02,
  title =        "Discriminative Clustering: Optimal Contingency Tables
                 by Learning Metrics",
  publisher =    "Springer-Verlag",
  address =      "London, UK",
  pages =        "418--430",
  year =         "2002",
  ISBN =         "3-540-44036-4",
}

@TechReport{Sirat90,
  author =       "J.-A. Sirat and J.-P. Nadal",
  title =        "Neural Trees: {A} New Tool for Classification",
  type =         "Preprint",
  institution =  "Laboratoires d'Electronique Philips",
  address =      "Limeil-Bre\'vannes, France",
  year =         "1990",
}

@InProceedings{SiroshJ1994,
  author =       "Joseph Sirosh and Risto Miikkulainen",
  editor =       NIPS6ed,
  booktitle =    NIPS6,
  title =        "Ocular Dominance and Patterned Lateral Connections in
                 a Self-Organizing Model of the Primary Visual Cortex",
  publisher =    "Morgan Kaufmann",
  pages =        "109--116",
  year =         "1994",
}

@InProceedings{SiroshJ1994-small,
  author =       "J. Sirosh and R. Miikkulainen",
  booktitle =    "NIPS 6",
  title =        "Ocular Dominance and Patterned Lateral Connections in
                 a Self-Organizing Model of the Primary Visual Cortex",
  year =         "1994",
}

@InProceedings{Sivilotti87,
  author =       "M. A. Sivilotti and M. A. Mahowald and C. A. Mead",
  editor =       "P. Losleben",
  booktitle =    "Advanced Research in VLSI: Proceedings of the 1987
                 Stanford Conference",
  title =        "Real-Time Visual Computations Using Analog {CMOS}
                 Processing Arrays",
  publisher =    "MIT Press, Cambridge",
  pages =        "295--312",
  year =         "1987",
}

@TechReport{Sjoberg92,
  author =       "Jonas Si{\"o}berg and Lennart Ljung",
  title =        "Overtraining, Regularization, and Searching for
                 Minimum in Neural Networks",
  institution =  "Link{\"o}ping University",
  address =      "S-581 83 Link{\"o}ping, Sweden",
  year =         "1992",
}

@article{Sjoberg95,
  title={{Overtraining, regularization and searching for a minimum, with application to neural networks}},
  author={Sj{\"o}berg, J. and Ljung, L.},
  journal={International Journal of Control},
  volume={62},
  number={6},
  pages={1391--1407},
  year={1995},
  publisher={Taylor \& Francis}
}

@Article{Skinner1958,
  author =       "Burrhus F. Skinner",
  title =        "Reinforcement Today",
  journal =      "American Psychologist",
  volume =       "13",
  pages =        "94--99",
  year =         "1958",
}

@PhdThesis{Small1980,
  author =       "Steven L. Small",
  title =        "Word Expert Parsing: {A} Theory of Distributed
                 Word-Based Natural Language Understanding",
  school =       "University of Maryland",
  year =         "1980",
}

@Article{smilde97,
  author =       "A. K. Smilde",
  title =        "Comments on multilinear {PLS}",
  journal =      "Journal of Chemometrics",
  volume =       "11",
  pages =        "367--377",
  year =         "1997",
}

@Article{Smith+Waterman81,
  author =       "T. F. Smith and W. S. Waterman",
  title =        "Identification of common molecular subsequences",
  journal =      "Journal of Molecular Biology",
  volume =       "147",
  pages =        "195--197",
  year =         "1981",
}

@Article{Smith95,
  author =       "S. P. Smith",
  title =        "Differentiation of the Cholesky algorithm",
  journal =      "Journal of Computational and Graphical Statistics",
  volume =       "4",
  pages =        "134--147",
  year =         "1995",
}

@InProceedings{smola00sparsegreedy,
  author =       "A. J. Smola and B. Sch{\"o}lkopf",
  editor =       "P. Langley",
  booktitle =    "International Conference on Machine Learning",
  title =        "Sparse greedy matrix approximation for machine
                 learning",
  publisher =    "Morgan Kaufmann",
  address =      "San Francisco",
  pages =        "911--918",
  year =         "2000",
}

@InProceedings{Smola2000sparsegreedy,
  author =       "A. J. Smola and P. Bartlett",
  editor =       NIPS13ed,
  booktitle =    NIPS13,
  title =        "Sparse Greedy {G}aussian Process Regression",
  year =         "2001",
}

@InProceedings{Smola99semiparametricSVM,
  author =       "A. J. Smola and T. Friess and B. {Sch\"olkopf}",
  editor =       NIPS11ed,
  booktitle =    NIPS11,
  title =        "Semiparametric Support Vector and Linear Programming
                 Machines",
  publisher =    "MIT Press",
  pages =        "585--591",
  year =         "1999",
  OPTaddress =   "Cambridge, MA",
  OPTannote =    "",
  OPTcrossref =  "",
  OPTkey =       "",
  OPTmonth =     "",
  OPTnote =      "",
  OPTnumber =    "",
  OPTorganization = "",
  OPTseries =    "",
}

@InCollection{Smolensky86,
  author =       "Paul Smolensky",
  editor =       "D. E. Rumelhart and J. L. McClelland",
  booktitle =    pdp,
  title =        "Information Processing in Dynamical Systems:
                 Foundations of Harmony Theory",
  chapter =      "6",
  volume =       "1",
  publisher =    "MIT Press",
  address =      "Cambridge",
  pages =        "194--281",
  year =         "1986",
}

@Article{Smyth94,
  author =       "P. Smyth",
  title =        {Hidden Markov models for fault detection in dynamic
                 systems},
  journal =      "Pattern Recognition",
  volume =       "27",
  number =       "1",
  pages =        "149--164",
  year =         "1994",
}

@Article{Smyth97,
  author =       "P. Smyth and D. Heckerman and M. I. Jordan",
  title =        {Probabilistic independence networks for hidden Markov
                 probability models},
  journal =      "Neural Computation",
  volume =       "9",
  number =       "2",
  pages =        "227--269",
  year =         "1997",
}

@InProceedings{Smyth97-nips,
  author =       "P. Smyth",
  editor =       NIPS9ed,
  booktitle =    NIPS9,
  title =        {Clustering sequences with hidden Markov models},
  publisher =    "MIT Press",
  year =         "1997",
}

@Article{Smyth98,
  author =       "P. Smyth",
  title =        {Belief Networks, Hidden Markov Models, and Markov
                 Random Fields: a Unifying View},
  journal =      "Pattern Recognition Letters",
  year =         "1998",
}

@TechReport{Snapp+Venkatesh-1998,
  author =       "Robert R. Snapp and Santosh S. Venkatesh",
  title =        "Asymptotic derivation of the finite-sample risk of the
                 k nearest neighbor classifier",
  number =       "UVM-CS-1998-0101",
  institution =  "Department of Computer Science, University of
                 Vermont",
  year =         "1998",
}

@InCollection{SNE-nips15,
  author =       "G. E. Hinton and S. Roweis",
  editor =       NIPS15ed,
  booktitle =    NIPS15,
  title =        "Stochastic Neighbor Embedding",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  year =         "2003",
}

@InProceedings{Snow+al-2006,
  author =       "Rion Snow and Daniel Jurafsky and Andrew Y. Ng",
  booktitle =    "Proceedings of COLING/ACL 2006",
  title =        "Semantic taxonomy induction from heterogenous
                 evidence",
  year =         "2006",
}

@book{SocietyNeuro-2006,
    author = "{Society for Neuroscience}",
    title = "Brain Facts: A Primer on the Brain and Nervous System",
    year = 2006,
    edition = "Fifth",
    note = "{http://sfn.org}",
}

@Article{Soffer86,
  author =       "B. H. Soffer and G. J. Dunning and Y. Owechko and E.
                 Marom",
  title =        "Associative Holographic Memory with Feedback Using
                 Phase-Conjugate Mirrors",
  journal =      optlett,
  volume =       "11",
  pages =        "118--120",
  year =         "1986",
}

@Article{Sola94,
  author =       "M. Sola and J. Driffill",
  title =        "Testing the term structure of interest rates using a
                 stationary vector autoregression with regime
                 switching",
  journal =      "Journal of Economic Dynamics and Control",
  volume =       "18",
  pages =        "601--628",
  year =         "1994",
}

@Article{Solla88,
  author =       "S. A. Solla and E. Levin and M. Fleisher",
  title =        "Accelerated Learning in Layered Neural Networks",
  journal =      cs,
  volume =       "2",
  pages =        "625--639",
  year =         "1988",
}

@InProceedings{Solla89,
  author =       "S. A. Solla",
  editor =       "L. Personnaz and G. Dreyfus",
  booktitle =    "Neural Networks from Models to Applications",
  title =        "Learning and Generalization in Layered Neural
                 Networks: The Contiguity Problem",
  publisher =    "I.D.S.E.T., Paris",
  address =      "Paris 1988",
  pages =        "168--177",
  year =         "1989",
}

@Article{Solomonoff64,
  author =       "Ray J. Solomonoff",
  title =        "A formal theory of inductive inference",
  journal =      "Information and Control",
  volume =       "7",
  pages =        "1--22, 224--254",
  year =         "1964",
}

@Article{Sompolinsky86,
  author =       "H. Sompolinsky and I. Kanter",
  title =        "Temporal Association in Asymmetric Neural Networks",
  journal =      prl,
  volume =       "57",
  pages =        "2861--2864",
  year =         "1986",
}

@InProceedings{Sompolinsky87,
  author =       "H. Sompolinsky",
  editor =       "J. L. van Hemmen and I. Morgenstern",
  booktitle =    "Heidelberg Colloquium on Glassy Dynamics",
  title =        "The Theory of Neural Networks: The Hebb Rules and
                 Beyond",
  publisher =    "Springer-Verlag, Berlin",
  address =      "Heidelberg 1986",
  pages =        "485--527",
  year =         "1987",
}

@Article{Sompolinsky88,
  author =       "H. Sompolinsky and A. Crisanti and H. J. Sommers",
  title =        "Chaos in Random Neural Networks",
  journal =      prl,
  volume =       "61",
  pages =        "259--262",
  year =         "1988",
}

@Article{Sondik73,
  author =       "E. J. Sondik",
  title =        "The optimal control of partially observable Markov
                 processes over the finite horizon",
  journal =      "Operations Research",
  volume =       "11",
  pages =        "1071--1088",
  year =         "1973",
}

@Article{Sondik78,
  author =       "E. J. Sondik",
  title =        "The optimal control of partially observable Markov
                 processes over the infinite horizon: discounted case",
  journal =      "Operations Research",
  volume =       "26",
  pages =        "282--304",
  year =         "1978",
}

@misc{Song+al-2008a,
    author = {Yangqiu Song and Feiping Nie and Changshui Zhang},
    title = {Semi-Supervised Sub-Manifold Discriminant Analysis},
    note = {Pattern Recognition Letter},
    year = 2008,
}

@article{Song+al-2008b,
    author = {Yangqiu Song and Feiping Nie and Changshui Zhang and Shiming Xiang},
    title = {A Unified Framework for Semi-Supervised Dimensionality Reduction},
    journal = {Pattern Recognition},
    volume = 41,
    number = 9,
    pages = {2789--2799},
    year = 2008,
}

@incollection{Song+al-2008c,
    title = {Colored Maximum Variance Unfolding},
    author = {Le Song and Alex Smola and Karsten Borgwardt and Arthur Gretton},
    editor =       NIPS20ed,
    booktitle =    NIPS20,
    publisher = {MIT Press},
    address = {Cambridge, MA},
    pages = {1385--1392},
    year = {2008}
}

@Article{Sontag-cs89,
  author =       "E. D. Sontag and H. J. Sussman",
  title =        "Backpropagation Can Give Rise to Spurious Local Minima
                 Even for Networks without Hidden Layers",
  journal =      "Complex Systems",
  volume =       "3",
  pages =        "91--106",
  year =         "1989",
}

@InProceedings{Sontag-ijcnn89,
  author =       "E. D. Sontag and H. J. Sussman",
  booktitle =    ijcnn,
  title =        "Backpropagation Separates when Perceptrons Do",
  publisher =    "IEEE Press",
  address =      "Washington DC",
  year =         "1989",
  OPTpages =     "639--642",
}

@TechReport{sontag92t1,
  author =       "E. D. Sontag",
  title =        "Systems Combining Linearity and Saturations and
                 Relations to Neural Networks",
  number =       "SYCON--92--01",
  institution =  "Rutgers Center for Systems and Control",
  year =         "1992",
}

@Article{Soukoulis83,
  author =       "C. M. Soukoulis and K. Levin and G. S. Grest",
  title =        "Irreversibility and Metastability in Spin-Glasses.
                 {I}. Ising Model",
  journal =      prB,
  volume =       "28",
  pages =        "1495--1509",
  year =         "1983",
}

@Article{Specht90,
  author =       "D. F. Specht",
  title =        "Probabilistic Neural Networks",
  journal =      nn,
  volume =       "3",
  pages =        "109--118",
  year =         "1990",
}

@Article{Specht91,
  author =       "D. F. Specht",
  title =        "A General Regression Neural Network",
  journal =      "IEEE Trans. Neural Networks",
  volume =       "2",
  number =       "6",
  pages =        "568--576",
  month =        nov,
  year =         "1991",
}

@Article{Spiegelhalter93,
  author =       "D. J. Spiegelhalter and A. P. Dawid and S. L.
                 Lauritzen and R. G. Cowell",
  title =        "Bayesian Analysis in Expert Systems",
  journal =      "Statistical Science",
  volume =       "8",
  pages =        "219--283",
  year =         "1993",
}

@InProceedings{Spielman-96,
  author =       "D. Spielman and S. Teng",
  booktitle =    "Proceedings of the 37th Annual Symposium on
                 Foundations of Computer Science",
  title =        "Spectral partitioning works: planar graphs and finite
                 element meshes",
  year =         "1996",
}

@TechReport{Spielman-96b,
  author =       "Daniel A. Spielman and Shang-Hua Teng",
  title =        "Spectral Partitioning Works: Planar Graphs and Finite
                 Element Meshes",
  number =       "UCB CSD-96-898",
  institution =  "U.C. Berkeley",
  year =         "1996",
}

@ARTICLE{spirkovska:1990,
    author={Spirkovska, L. and Reid, M. B.},
    title={Connectivity Strategies for Higher-Order Neural Networks Applied to
        Pattern Recognition},
    journal=ijcnn,
    year={1990},
    month={June},
    volume={1},
    number={},
    pages={21--26},
    keywords={computerised pattern recognition, neural netsconnection
        strategies, higher-order neural networks, interconnections, pattern
            recognition, pattern-recognition, regional connectivity},
    doi={10.1109/IJCNN.1990.137538},
    ISSN={}, 
}


@Book{Spirtes-book93,
  author =       "P. Spirtes and C. Glymour and R. Scheines",
  title =        "Causation, Prediction, and Search",
  publisher =    "Springer-Verlag, New York",
  year =         "1993",
}

@Article{Spirtes-Glymour91,
  author =       "P. Spirtes and C. Glymour",
  title =        "An algorithm for fast recovery of sparse causal
                 graphs",
  journal =      "Social Science Computing Reviews",
  volume =       "9",
  number =       "1",
  pages =        "62--72",
  year =         "1991",
}

@InProceedings{Srebro-Jaakkola,
  author =       "N. Srebro and T. Jaakkola",
  booktitle =    ICML03,
  editor =       ICML03ed,
  publisher =    ICML03publ,
  title =        "Weighted Low-Rank Approximations",
  address =      "Washington, D.C.",
  pages =        "720--727",
  year =         "2003",
}

@Book{SSL-Book-2006,
  author =       "Olivier Chapelle and Bernhard. Sch{\"{o}}lkopf and Alexander Zien",
  title =        "Semi-Supervised Learning",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  year =         "2006",
}

@Article{Steels2003,
  author =       "L. Steels",
  title =        "Evolving grounded communication for robots",
  journal =      "Trends in Cognitive Science",
  volume =       "7",
  number =       "7",
  pages =        "308--312",
  month =        jul,
  year =         "2003",
  URL =          "http://www.csl.sony.fr/downloads/papers/2003/steels-03c.pdf",
}

@Article{Steinbuch61,
  author =       "K. Steinbuch",
  title =        "Die Lernmatrix",
  journal =      kyb,
  volume =       "1",
  pages =        "36--45",
  year =         "1961",
}

@Article{SteinhausH1956,
  author = 	 {Hugo Steinhaus},
  title = 	 {Sur la division des corps mat\'eriels en parties},
  journal = 	 {Bulletin L'Acad\'emie Polonaise des Sciences},
  year = 	 {1956},
  volume = 	 {4},
  pages = 	 {801-804},
}

@InCollection{Stevens+Blumstein81,
  author =       "K. N. Stevens and S. E. Blumstein",
  editor =       "P. D. Eimas and J. L. Miller",
  booktitle =    "Perspectives on the study of speech",
  title =        "The search for invariant acoustic correlates of
                 phonetic features",
  publisher =    "Lawrence Erlbaum ass.",
  pages =        "1--38",
  year =         "1981",
}

@InCollection{Stevens75,
  author =       "K. N. Stevens",
  editor =       "G. Fant and M. A. Tatham",
  booktitle =    "Auditory analysis and perception of speech",
  title =        "The potential role of properties detectors in the
                 perception of consonants",
  publisher =    "Academic Press, London",
  pages =        "303--330",
  year =         "1975",
}

@Article{Stevenson90,
  author =       "M. Stevenson and R. Winter and B. Widrow",
  title =        "Sensitivity of Feedforward Neural Networks to Weight
                 Errors",
  journal =      "IEEE. Trans. on Neural Networks",
  volume =       "1",
  number =       "1",
  pages =        "71--80",
  month =        mar,
  year =         "1990",
  keywords =     "neural network fault tolerance robustness reliability
                 adaline weight errors",
}

@Book{Stewart-1998,
  author =       "G. W. Stewart",
  title =        "Matrix Algorithms, Volume {I}: Basic Decompositions",
  publisher =    "SIAM",
  address =      "Philadelphia",
  year =         "1998",
}

@Book{Stewart73,
  author =       "G. W. Stewart",
  title =        "Introduction to matrix computations",
  publisher =    "Academic Press",
  year =         "1973",
}

@InProceedings{Stinchcombe+White89,
  author =       "M. Stinchcombe and H. White",
  booktitle =    ijcnn,
  title =        "Universal approximation using feedforward networks
                 with non-sigmoid hidden layer activation function",
  publisher =    "IEEE",
  address =      "Washington DC",
  pages =        "613--617",
  year =         "1989",
}

@TechReport{Stokbro90,
  author =       "K. Stokbro and D. K. Umberger and J. A. Hertz",
  title =        "Exploiting Neurons with Localized Receptive Fields to
                 Learn Chaos",
  type =         "Preprint",
  number =       "90/28 S",
  institution =  "Nordita",
  address =      "Copenhagen, Denmark",
  year =         "1990",
}

@InProceedings{Stolcke-ICSLP02,
  author =       "A. Stolcke",
  booktitle =    "Proceedings of the International Conference on
                 Statistical Language Processing",
  title =        "{SRILM} - An extensible language modeling toolkit",
  address =      "Denver, Colorado",
  year =         "2002",
}

@InProceedings{Stolcke93,
  author =       "A. Stolcke and S. Omohundro",
  editor =       NIPS5ed,
  booktitle =    NIPS5,
  title =        "Hidden {Markov} model induction by {Bayesian} model
                 merging",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo, CA",
  pages =        "11--18",
  year =         "1993",
}

@TechReport{Stolcke94a,
  author =       "A. Stolcke and S. M. Omohundro",
  title =        "Best-first Model Merging for Hidden {Markov} Model
                 Induction",
  number =       "TR-94-003",
  institution =  "International Computer Science Institute",
  address =      "Berkeley, CA",
  month =        jan,
  year =         "1994",
}

@TechReport{Stolcke94b,
  author =       "A. Stolcke and J. Segal",
  title =        "Precise n-gram Probabilities from Stochastic
                 Context-free Grammars",
  number =       "TR-94-007",
  institution =  "International Computer Science Institute",
  address =      "Berkeley, CA",
  month =        jan,
  year =         "1994",
}

@Article{Stone-80,
  author =       "C. J. Stone",
  title =        "Optimal rates of convergence for nonparametric
                 estimators",
  journal =      "Annals of Statistics",
  volume =       "8",
  number =       "6",
  pages =        "1348--1360",
  year =         "1980",
}

@Article{Stormo82,
  author =       "G. D. Stormo and T. D. Schneider and L. Gold and A.
                 Ehrenfeucht",
  title =        "Use of the perceptron algorithm to distinguish
                 translational initiation sites in {\it {E}. {Coli}}",
  journal =      "Nucleic Acid Research",
  volume =       "10",
  pages =        "2997--3010",
  year =         "1982",
}

@InProceedings{Stornetta88,
  author =       "W. S. Stornetta and T. Hogg and B. A. Huberman",
  editor =       nips87ed,
  booktitle =    nips87,
  title =        "A Dynamical Approach to Temporal Pattern Processing",
  publisher =    "American Institute of Physics, New York",
  address =      "Denver, CO",
  pages =        "750--759",
  year =         "1988",
}

@Book{Strang80,
  author =       "G. Strang",
  title =        "Linear Algebra and Its Applications",
  publisher =    "Academic Press",
  address =      "New York",
  year =         "1980",
}

@PhdThesis{Suaudeau94,
  author =       "N. Suaudeau",
  title =        "Un mod\`ele probabiliste pour int\'egrer la dimension
                 temporelle dans un syst\`eme de reconnaissance
                 automatique de la parole",
  school =       "Universit\'e de Rennes I",
  address =      "France",
  year =         "1994",
}

@Article{suddarth91,
  author =       "Steven C. Suddarth and Alistair D. C. Holden",
  title =        "Symbolic-neural systems and the use of hints for
                 developing complex systems",
  journal =      "Int. J. Man-Mach. Stud.",
  volume =       "35",
  number =       "3",
  publisher =    "Academic Press Ltd.",
  address =      "London, UK",
  pages =        "291--311",
  year =         "1991",
}

@article{Sudderth-2007,
 author = {Erik B. Sudderth and Antonio Torralba and William T. Freeman and Alan S. Willsky},
 title = {Describing visual scenes using transformed objects and parts},
 journal = {Int. Journal of Computer Vision},
 volume = 77,
 publisher = {Springer},
 pages = "291--330",
 year = "2007",
}

@article{Sugiyama-2007,
    author = {Masashi Sugiyama},
    title = {Dimensionality reduction of multimodal labeled data by local {F}isher discriminant analysis},
    journal = jmlr,
    year = {2007},
    volume = {8},
    pages = {1027--1061}
}

@InProceedings{Sun-ijcnn90,
  author =       "G. Z. Sun and H. H. Chen and Y. C. Lee and C. L
                 Giles",
  booktitle =    ijcnn,
  title =        "Recurrent Neural Networks, Hidden {Markov} Models and
                 Stochastic Grammars",
  volume =       "I",
  address =      "San Diego CA",
  pages =        "729--734",
  year =         "1990",
}

@Book{Sundararajan+Saratchandran-1998,
  author =       "N. Sundararajan and P. Saratchandran",
  title =        "Parallel Architectures for Artificial Neural Networks:
                 Paradigms and Implementations",
  publisher =    "IEEE Computer Society Press",
  address =      "Los Alamitos, CA",
  year =         "1998",
  ISBN =         "0-8186-8399-6",
}

@InProceedings{Sutskever+Hinton-2007,
  author =       "Ilya Sutskever and Geoffrey E. Hinton",
  booktitle =    aistats07,
  title =        "Learning Multilevel Distributed Representations for
                 High-Dimensional Sequences",
  publisher =    "Omnipress",
  date =         "March 21-24, 2007",
  address =      "San Juan, Porto Rico",
  year =         "2007",
}

@Article{Sutskever+Hinton-2008,
  author =       "Ilya Sutskever and Geoffrey E. Hinton",
  title =        "Deep Narrow Sigmoid Belief Networks are Universal
                 Approximators",
  journal =      "Neural Computation",
  volume =       "to appear",
  year =         "2008",
}

@Book{Sutton+Barto-98,
  author =       "Richard Sutton and Andrew Barto",
  title =        "Reinforcement Learning: An Introduction",
  publisher =    "MIT Press",
  year =         "1998",
}

@InCollection{sutton06introduction,
  author =       "Charles Sutton and Andrew McCallum",
  editor =       "Lise Getoor and Ben Taskar",
  booktitle =    "Introduction to Statistical Relational Learning",
  title =        "An Introduction to Conditional Random Fields for
                 Relational Learning",
  publisher =    "MIT Press",
  year =         "2006",
  note =         "",
  URL =          "publications/crf-tutorial.pdf",
  tags =         "recent",
}

@PhdThesis{Sutton84,
  author =       "R. S. Sutton",
  title =        "Temporal Credit Assignment in Reinforcement Learning",
  school =       "University of Massachusetts",
  address =      "Amherst",
  year =         "1984",
}

@Article{Sutton88,
  author =       "R. S. Sutton",
  title =        "Learning to Predict by the Methods of Temporal
                 Differences",
  journal =      mlearn,
  volume =       "3",
  pages =        "9--44",
  year =         "1988",
}

@InCollection{Sutton91,
  author =       "R. S. Sutton and A. G. Barto",
  editor =       "M. Gabriel and J. W. Moore",
  booktitle =    "Learning and Computational Neuroscience",
  title =        "Time Derivative Models of Pavlovian Reinforcement",
  publisher =    "MIT Press",
  address =      "Cambridge",
  year =         "1991",
}

@InProceedings{Sutton95,
  author =       "R. S. Sutton",
  booktitle =    "Proceedings of the 12th International Conference on
                 Machine Learning",
  title =        "{TD} models: modeling the world at a mixture of time
                 scales",
  publisher =    "Morgan Kaufmann",
  year =         "1995",
}

@InProceedings{Szu86,
  author =       "H. Szu",
  editor =       "J. S. Denker",
  booktitle =    snowbird,
  title =        "Fast Simulated Annealing",
  publisher =    "American Institute of Physics, New York",
  address =      "Snowbird 1986",
  pages =        "420--425",
  year =         "1986",
}

@InProceedings{Szummer+Jaakkola-2002,
  author =       "M. Szummer and T. Jaakkola",
  editor =       NIPS14ed,
  booktitle =    NIPS14,
  title =        "Partially labeled classification with Markov random
                 walks",
  publisher =    "{MIT} Press",
  address =      "Cambridge, MA",
  year =         "2002",
}


@article{Takabatake+al-2007,
    author = {Hiroki Takabatake and Manabu Kotani and Seiichi Ozawa},
    title = {Feature extraction by supervised independent component analysis based on category information},
    journal = {Electrical Engineering in Japan},
    volume = 161,
    number = 2,
    pages = {25--32},
    year = 2007,
}

@InProceedings{TakahashiN2001,
  author =       "Naoto Takahashi and Minoru Motoki and Yoshio Shimazu
                 and Yoichi Tomiura and Tory Hitaka",
  booktitle =    "Proceedings of the Second Workshop on Natural Language
                 Processing and Neural Networks",
  title =        "{PP}-attachment Ambiguity Resolution Using a Neural
                 Network with Modified {FGREP} Method",
  address =      "Tokyo",
  year =         "2001",
}

@InProceedings{Takens81,
  author =       "F. Takens",
  editor =       "D. A. Rand and L.-S. Young",
  booktitle =    "Dynamical Systems and Turbulenc",
  title =        "Detecting Strange Attractors In Turbulence",
  volume =       "898",
  publisher =    "Springer-Verlag, Berlin",
  address =      "Warwick 1980",
  pages =        "366--381",
  year =         "1981",
  series =       "Lecture Notes in Mathematics",
}

@Article{Takeuchi79,
  author =       "A. Takeuchi and S. Amari",
  title =        "Formation of Topographic Maps and Columnar
                 Microstructures in Nerve Fields",
  journal =      biocyb,
  volume =       "35",
  pages =        "63--72",
  year =         "1979",
}

@InCollection{Tam+Perkel89,
  author =       "Tam D. C. and Perkel D. H.",
  editor =       "Hawkins R. D. and Bower G. H.",
  booktitle =    "Computational Models of Learning in Simple Neural
                 Systems",
  title =        "Quantitative modeling of synaptic plasticity",
  publisher =    "Academic Press",
  pages =        "1--30",
  year =         "1989",
}

@Article{Tank86,
  author =       "D. W. Tank and J. J. Hopfield",
  title =        "Simple ``Neural'' Optimization Networks: An {A}/{D}
                 Converter, Signal Decision Circuit, and a Linear
                 Programming Circuit",
  journal =      ieeetcas,
  volume =       "33",
  pages =        "533--541",
  year =         "1986",
}

@Article{Tank87a,
  author =       "D. W. Tank and J. J. Hopfield",
  title =        "Neural Computation by Time Compression",
  journal =      PNAS,
  volume =       "84",
  pages =        "1896--1900",
  year =         "1987",
}

@InProceedings{Tank87b,
  author =       "D. W. Tank and J. J. Hopfield",
  editor =       "M. Caudill and C. Butler",
  booktitle =    icnn,
  title =        "Concentrating Information in Time: Analog Neural
                 Networks with Applications to Speech Recognition
                 Problems",
  volume =       "2",
  publisher =    "IEEE, New York",
  address =      "San Diego 1987",
  pages =        "455--468",
  year =         "1987",
}

@Book{Tanner1993,
  author =       "M. Tanner",
  title =        "Tools for statistical inference: Methods for
                 exploration of posterior distributions and likelihood
                 functions",
  publisher =    "Springer",
  address =      "New York",
  year =         "1993",
}

@Article{Tappert90,
  author =       "C. Tappert and C. Suen and T. Wakahara",
  title =        "The state of the art in on-line handwriting
                 recognition",
  journal =      ieeetpami,
  volume =       "8",
  number =       "12",
  pages =        "787--808",
  year =         "1990",
}

@InCollection{Taylor+2007,
  author =       "Graham Taylor and Geoffrey E. Hinton and Sam Roweis",
  editor =       NIPS19ed,
  booktitle =    NIPS19,
  title =        "Modeling Human Motion Using Binary Latent Variables",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  pages =        "1345--1352",
  year =         "2007",
}

%%FRED: I deprecate this one as the years in the tag is not the one for the publication but the conference!
@InProceedings{Taylor2006,
  author =       "Graham Taylor and Geoffrey E. Hinton and Sam Roweis",
  editor =       NIPS19ed,
  booktitle =    NIPS19,
  title =        "Modeling Human Motion Using Binary Latent Variables",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  pages =        "1345--1352",
  year =         "2007",
}

@InProceedings{Taylor2006-small,
  author =       "Graham Taylor and Geoffrey E. Hinton and Sam Roweis",
  booktitle =    "NIPS 20",
  title =        "Modeling Human Motion Using Binary Latent Variables",
  year =         "2006",
}

@InProceedings{TaylorHintonICML2009,
  author =    {Graham Taylor and Geoffrey Hinton},
  title =     {Factored Conditional Restricted {Boltzmann} Machines for Modeling Motion Style},
  booktitle = {Proceedings of the 26th International Conference on Machine Learning (ICML'09)},
  pages =     {1025--1032},
  year =      2009,
  editor =    {L\'{e}on Bottou and Michael Littman},
  address =   {Montreal},
  month =     {June},
  publisher = {Omnipress}
}

@InProceedings{Taylor56,
  author =       "W. K. Taylor",
  editor =       "C. Cherry",
  booktitle =    "Information Theory",
  title =        "Electrical Simulation of Some Nervous System
                 Functional Activities",
  publisher =    "Butterworths, London",
  address =      "London 1985",
  pages =        "314--328",
  year =         "1956",
}

@InProceedings{Tebelskis91,
  author =       "J. Tebelskis and A. Waibel and B. Petek and O.
                 Schmidbauer",
  editor =       NIPS3ed,
  booktitle =    NIPS3,
  title =        "Continuous Speech Recognition Using Linked Predictive
                 Networks",
  publisher =    "Morgan Kaufmann, San Mateo",
  address =      "Denver, CO",
  pages =        "199--205",
  year =         "1991",
}

@Article{Teh-2003,
  author =       "{Yee Wye} Teh and Max Welling and Simon Osindero and
                 Geoffrey E. Hinton",
  title =        "Energy-Based Models for Sparse Overcomplete
                 Representations",
  journal =      jmlr,
  volume =       "4",
  pages =        "1235--1260",
  year =         "2003",
}

@InProceedings{Teh-Roweis-2003,
  author =       "Y. Whye Teh and S. Roweis",
  editor =       NIPS15ed,
  booktitle =    NIPS15,
  title =        "Automatic Alignment of Local Representations",
  publisher =    "{MIT} Press",
  year =         "2003",
}

@article{TehY2006,
title=          "Hierarchical {D}irichlet Processes",
author=         "Y. W. Teh and M. I. Jordan and M. J. Beal and D. M. Blei",
journal=        "Journal of the American Statistical Association",
volume=         "101",
number=         "476",
pages=          "1566-1581",
year=           "2006"
}

@Article{tenenbaum00separating,
  author =       "Joshua B. Tenenbaum and William T. Freeman",
  title =        "Separating Style and Content with Bilinear Models",
  journal =      "Neural Computation",
  volume =       "12",
  number =       "6",
  pages =        "1247--1283",
  year =         "2000",
}

@Article{Tenenbaum2000-isomap,
  author =       "Joshua Tenenbaum and Vin {de Silva} and John C. Langford",
  title =        "A Global Geometric Framework for Nonlinear
                 Dimensionality Reduction",
  journal =      "Science",
  volume =       "290",
  number =       "5500",
  pages =        "2319--2323",
  month =        dec,
  year =         "2000",
}

@Article{Terrell+Scott-1992,
  author =       "G. R. Terrell and D. W. Scott",
  title =        "Variable Kernel Density Estimation",
  journal =      "Annals of Statistics",
  volume =       "20",
  pages =        "1236--1265",
  year =         "1992",
}

@Article{Tesauro86,
  author =       "G. Tesauro",
  title =        "Simple Neural Models of Classical Conditioning",
  journal =      biocyb,
  volume =       "55",
  pages =        "187--200",
  year =         "1986",
}

@Article{Tesauro88a,
  author =       "G. Tesauro and B. Janssens",
  title =        "Scaling Relationships in Back-Propagation Learning",
  journal =      cs,
  volume =       "2",
  pages =        "39--44",
  year =         "1988",
}

@InProceedings{Tesauro88b,
  author =       "G. Tesauro and T. J. Sejnowski",
  editor =       nips87ed,
  booktitle =    nips87,
  title =        "A ``Neural'' Network That Learns to Play Backgammon",
  publisher =    "American Institute of Physics, New York",
  address =      "Denver, CO",
  pages =        "442--456",
  year =         "1988",
}

@Article{Tesauro90,
  author =       "G. Tesauro",
  title =        "Neurogammon Wins Computer Olympiad",
  journal =      nc,
  volume =       "1",
  pages =        "321--323",
  year =         "1990",
}

@Article{Tesauro92,
  author =       "G. Tesauro",
  title =        "Practical issues in temporal difference learning",
  journal =      "Machine Learning",
  volume =       "8",
  pages =        "257--277",
  year =         "1992",
}

@Article{tesauro:1994:nc,
  author =       "G. Tesauro",
  title =        "{TD-Gammon}, a Self-Teaching Backgammon Program,
                 Achieves Master-Level Play",
  journal =      nc,
  volume =       "6",
  number =       "2",
  pages =        "215--219",
  year =         "1994",
}

@Article{Thakoor87,
  author =       "A. P. Thakoor and A. Moopenn and J. Lambe and S. K.
                 Khanna",
  title =        "Electronic Hardware Implementations of Neural
                 Networks",
  journal =      applopt,
  volume =       "26",
  pages =        "5085--5092",
  year =         "1987",
}

@InProceedings{THastie95,
  author =       "Trevor Hastie and Patrice Simard and Eduard
                 Sackinger",
  editor =       NIPS7ed,
  booktitle =    NIPS7,
  title =        "Learning Prototype Models for Tangent Distance",
  publisher =    "MIT Press",
  pages =        "999--1006",
  year =         "1995",
}

@Article{THastie98,
  author =       "T. Hastie and P. Simard",
  title =        "Metrics and Models for Handwritten Character
                 Recognition",
  journal =      "Statistical Science",
  volume =       "13",
  number =       "1",
  pages =        "54--65",
  month =        jan,
  year =         "1998",
  URL =          "citeseer.ist.psu.edu/hastie97metrics.html",
}

@Book{thrun+pratt-book-1998,
  editor =       "Sebastian Thrun and Lorien Y. Pratt",
  title =        "Learning to Learn",
  publisher =    "Kluwer Academic",
  year =         "1998",
}

@InProceedings{Thrun1995,
  author =       "T. Thrun and T. Mitchell",
  booktitle =    "Proceedings of the 14th International Joint Conference
                 on Artificial Intelligence (IJCAI)",
  title =        "Learning One More Thing",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo, CA",
  month =        aug,
  year =         "1995",
}

@Misc{thrun95,
  author =       "S. Thrun and J. O'Sullivan",
  title =        "Clustering learning tasks and the selective cross-task
                 transfer of knowledge",
  year =         "1995",
  text =         "Technical Report CMU-CS-95-209, Carnegie Mellon
                 University, School of Computer Science",
}

@TechReport{thrun95a,
  author =       "Sebastian Thrun",
  title =        "Lifelong Learning: {A} Case Study",
  number =       "CMU-CS-95-208",
  institution =  "School of Computer Science, Carnegie Mellon
                 University",
  address =      "Pittsburgh, PA 15213",
  month =        nov,
  year =         "1995",
}

@InProceedings{thrun95b,
  author =       "Sebastian Thrun and Tom M. Mitchell",
  booktitle =    "Proceedings of IJCAI-95",
  title =        "Learning One More Thing",
  organization = "IJCAI",
  address =      "Montreal, Canada",
  year =         "1995",
}

@InProceedings{Thrun96a,
  author =       "S. Thrun",
  editor =       NIPS8ed,
  booktitle =    NIPS8,
  title =        "Is Learning the $n$-th Thing Any Easier Than Learning
                 the First?",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  pages =        "640--646",
  year =         "1996",
}

@BOOK{Thrun96b,
  AUTHOR         = {S. Thrun},
  YEAR           = {1996},
  TITLE          = {Explanation-Based Neural Network Learning: A Lifelong 
                    Learning Approach},
  PUBLISHER      = {Kluwer Academic Publishers},
  ADDRESS        = {Boston, MA}
}

@Article{Tibshirani95,
  author =       "Robert J. Tibshirani",
  title =        "Regression shrinkage and selection via the lasso",
  journal =      "Journal of the Royal Statistical Society B",
  volume =       "58",
  pages =        "267--288",
  year =         "1995",
}

@Article{Ticknor87,
  author =       "A. J. Ticknor and H. Barrett",
  title =        "Optical Implementations of {Boltzmann} Machines",
  journal =      opteng,
  volume =       "26",
  pages =        "16--21",
  year =         "1987",
}

@Book{Tikhonov+Arsenin77,
  author =       "A. N. Tikhonov and V. Y. Arsenin",
  title =        "Solutions of Ill-posed Problems",
  publisher =    "W. H. Winston",
  address =      "Washington D.C.",
  year =         "1977",
}

@InProceedings{tipping00relevance,
  author =       "M. E. Tipping",
  editor =       NIPS12ed,
  booktitle =    NIPS12,
  title =        "The Relevance Vector Machine",
  publisher =    "MIT Press",
  pages =        "652--658",
  year =         "2000",
  OPTaddress =   "Cambridge, MA",
}

@Article{tipping99mixtures,
  author =       "M. E. Tipping and C. M. Bishop",
  title =        "Mixtures of Probabilistic Principal Component
                 Analysers",
  journal =      "Neural Computation",
  volume =       "11",
  number =       "2",
  pages =        "443--482",
  year =         "1999",
  URL =          "citeseer.nj.nec.com/tipping98mixtures.html",
}

@InProceedings{Tishby89,
  author =       "N. Tishby and E. Levin and S. A. Solla",
  booktitle =    ijcnn,
  title =        "Consistent Inference of Probabilities in Layered
                 Networks: Predictions and Generalization",
  volume =       "2",
  publisher =    "IEEE, New York",
  address =      "Washington 1989",
  pages =        "403--410",
  year =         "1989",
}

@InProceedings{Titov+Henderson-2007,
  author =       "Ivan Titov and James Henderson",
  booktitle =    "Proc. 45th Meeting of Association for Computational
                 Linguistics (ACL'07)",
  title =        "Constituent Parsing with Incremental Sigmoid Belief
                 Networks",
  address =      "Prague, Czech Republic",
  pages =        "632--639",
  year =         "2007",
  URL =          {http://aclweb.org/anthology-new/P/P07/P07-1080.pdf},
}

@InProceedings{ToMa00,
  author =       "Kristina Toutanova and Christopher D. Manning",
  booktitle =    "EMNLP/VLC 2000",
  title =        "Enriching the Knowledge Sources Used in a Maximum
                 Entropy Part-of-Speech Tagger",
  pages =        "63--70",
  year =         "2000",
}

@InProceedings{Tomita82,
  author =       "M. Tomita",
  booktitle =    "Proceedings of the Fourth Annual Cognitive Science
                 Conference",
  title =        "Dynamic Construction of Finite-state Automata from
                 Examples Using Hill-Climbing",
  address =      "Ann Arbor, MI",
  pages =        "105--108",
  year =         "1982",
}

@Book{Tong83,
  author =       "H. Tong",
  title =        "Threshold Models in Nonlinear Time Series Analysis",
  publisher =    "Springer-Verlag",
  address =      "Berlin",
  year =         "1983",
}

@InProceedings{TongKoller2000,
  author =       "S. Tong and D. Koller",
  booktitle =    "Proceedings of the 17th National Conference on
                 Artificial Intelligence (AAAI)",
  title =        "Restricted Bayes Optimal Classifiers",
  address =      "Austin, Texas",
  pages =        "658--664",
  year =         "2000",
}

@Article{Torgerson52,
  author =       "W. Torgerson",
  title =        "Multidimensional scaling, 1: Theory and method",
  journal =      "Psychometrika",
  volume =       "17",
  pages =        "401--419",
  year =         "1952",
}

@inproceedings{Torralba+Fergus+Weiss-2008,
 author = {Antonio Torralba and Robert Fergus and Yair Weiss},
 title = {Small codes and large databases for recognition},
 booktitle = cvpr08,
 pages = "1-8",
 year = 2008,
}

@incollection{Torresani+Lee-2007,
    title = {Large Margin Component Analysis},
    author = {Lorenzo Torresani and Kuang-Chih Lee},
    booktitle = NIPS19,
    editor = NIPS19ed,
    publisher = {MIT Press},
    address = {Cambridge, MA},
    pages = {1385--1392},
    year = {2007}
}

@InProceedings{Torresen+al-1995,
  author =       "J. Torresen and S. Mori and H. Nakashima and S. Tomita
                 and O. Landsverk",
  booktitle =    "Proceedings of the Fourth International Conference on
                 Artificial Neural Networks",
  title =        "Exploiting multiple degrees of {BP} parallelism on the
                 highly parallel computer {AP1000}",
  address =      "Cambridge, UK",
  pages =        "483--488",
  year =         "1995",
}

@InProceedings{Torresen+al-1995b,
  author =       "J. Torresen and S. Tomita and O. Landsverk",
  booktitle =    "World Congress on Neural Networks",
  title =        "The relation of Weight Update Frequency to Convergence
                 of {BP}",
  address =      "Washington D.C., USA",
  year =         "1995",
}

@Article{Torresen-1997,
  author =       "Jim Torresen",
  title =        "The Convergence of Backpropagation Trained Neural
                 Networks for Various Weight Update Frequencies",
  journal =      "International Journal of Neural Systems",
  volume =       "8",
  number =       "3",
  year =         "1997",
}

@Article{Toulouse86,
  author =       "G. Toulouse and S. Dehaene and J.-P. Changeux",
  title =        "Spin Glass Model of Learning by Selection",
  journal =      PNAS,
  volume =       "83",
  pages =        "1695--1698",
  year =         "1986",
}

@Article{Touretzky89,
  author =       "D. S. Touretzky and D. A. Pomerleau",
  title =        "What's Hidden in the Hidden Layers?",
  journal =      BYTE,
  pages =        "227--233",
  month =        aug,
  year =         "1989",
}

@InProceedings{ToutanovaKMS03,
  author =       "Kristina Toutanova and Dan Klein and Christopher D.
                 Manning and Yoram Singer",
  booktitle =    "HLT-NAACL",
  title =        "Feature-Rich Part-of-Speech Tagging with a Cyclic
                 Dependency Network.",
  year =         "2003",
  bibsource =    "DBLP, http://dblp.uni-trier.de",
  ee =           "http://acl.ldc.upenn.edu/N/N03/N03-1033.pdf",
}

@InProceedings{Towell-nips92,
  author =       "G. G. Towell and J. W. Shawlik",
  editor =       NIPS4ed,
  booktitle =    NIPS4,
  title =        "Interpretation of Artificial Neural Networks: Mapping
                 Knowledge-Based Neural Networks into Rules",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo CA",
  year =         "1992",
}

@InProceedings{towell93,
  author =       "G. G. Towell and J. W. Shavlik",
  editor =       NIPS4ed,
  booktitle =    NIPS4,
  title =        "Interpretation of Artificial Neural Networks: Mapping
                 Knowledge-Based Neural Networks into rules",
  publisher =    "Morgan Kaufmann",
  address =      "San Meteo, CA",
  pages =        "977--984",
  year =         "1992",
}

@InProceedings{Towell-aaai90,
  author =       "G. G. Towell and J. W. Shawlick and M. O. Noordewier",
  booktitle =    "Proceedings of the Eighth National Conference on
                 Artificial Intelligence (AAAI-90)",
  title =        "Refinement of Approximate Domain Theories by
                 Knowledge-Based Neural Networks",
  pages =        "861--866",
  year =         "1990",
  OPTnote =      "",
}

@TechReport{TR:Breiman.arcing,
  author =       "Leo Breiman",
  title =        "Bias, variance, and Arcing classifiers",
  number =       "460",
  institution =  "Statistics Department, University of California at
                 Berkeley",
  year =         "1996",
}

@TechReport{TR:Breiman:edge,
  author =       "Leo Breiman",
  title =        "Arcing the edge",
  number =       "486",
  institution =  "Statistics Department, University of California at
                 Berkeley",
  year =         "1997",
}

@TechReport{TR:Breiman:gametheorie,
  author =       "Leo Breiman",
  title =        "Prediction games and arcing classifiers",
  number =       "504",
  institution =  "Statistics Department, University of California at
                 Berkeley",
  year =         "1997",
}

@TechReport{TR:Friedman+Hastie+Tibshirani:AdaBoost-theory,
  author =       "J. Friedman and T. Hastie and R. Tibshirani",
  title =        "Additive Logistic Regression: a Statistical View of
                 Boosting",
  institution =  "August 1998, Department of Statistics, Stanford
                 University",
  year =         "1998",
}

@TechReport{TR:Tibshirani:bias+var,
  author =       "R. Tibshirani",
  title =        "Bias, Variance and Prediction Error for Classification
                 Rules",
  institution =  "Departement od Statistics, University of Toronto",
  year =         "1996",
}

@Article{Traven91,
  author =       "H. G. C. Traven",
  title =        "A neural network approach to statistical pattern
                 classification by semiparametric estimation of
                 probability density functions",
  journal =      ieeetrnn,
  volume =       "2",
  number =       "3",
  pages =        "366--377",
  year =         "1991",
}

@InCollection{TreHolAhm93,
  author =       "V. Tresp and J. Hollatz and S. Ahmad",
  editor =       NIPS5ed,
  booktitle =    NIPS5,
  title =        "Network structuring and training using rule-based
                 knowledge",
  publisher =    "Morgan Kaufman Publishers",
  address =      "San Mateo, CA",
  year =         "1993",
}

@InProceedings{Tresp-nips93,
  author =       "V. Tresp and J. Hollatz and S. Ahmad",
  editor =       NIPS5ed,
  booktitle =    NIPS5,
  title =        "Network Structuring and Training Using Rule-based
                 Knowledge",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo, CA",
  year =         "1993",
}

@Article{tresp2001,
  author =       "V. Tresp",
  title =        "Scaling Kernel-Based Systems to Large Data Sets",
  journal =      "Data Mining and Knowledge Discovery",
  volume =       "5",
  number =       "3",
  pages =        "197--211",
  year =         "2001",
}

@InCollection{Tresp94,
  author =       "V. Tresp and S. Ahmad and R. Neuneier",
  editor =       NIPS6ed,
  booktitle =    NIPS6,
  title =        "Training neural networks with deficient data",
  publisher =    "Morgan Kaufman Publishers",
  address =      "San Mateo, CA",
  pages =        "128--135",
  year =         "1994",
}

@Article{TRNN:Tsoi94,
  author =       "A. C. Tsoi and A. Back",
  title =        "Locally Recurrent Globally Feedforward Networks, {A}
                 Critical Review of Architectures",
  journal =      "IEEE Transactions on Neural Networks",
  volume =       "5",
  number =       "2",
  pages =        "229--239",
  year =         "1994",
}

@InProceedings{Tseng-1998,
  author =       "Yuen-Hsien Tseng",
  booktitle =    "SIGIR '98: Proceedings of the 21st Annual
                 International ACM SIGIR Conference on Research and
                 Development in Information Retrieval, August 24-28
                 1998, Melbourne, Australia",
  title =        "Multilingual Keyword Extraction for Term Suggestion",
  publisher =    "ACM",
  pages =        "377--378",
  year =         "1998",
}

@Article{TsochantaridisI2005,
  author =       "Ioannis Tsochantaridis and Thorsten Joachims and
                 Thomas Hofmann and Yasemin Altun",
  title =        "Large Margin Methods for Structured and Interdependent
                 Output Variables",
  journal =      "J. Mach. Learn. Res.",
  volume =       "6",
  publisher =    "MIT Press",
  address =      "Cambridge, MA, USA",
  pages =        "1453--1484",
  year =         "2005",
  ISSN =         "1533-7928",
}

@Article{Tsodyks88,
  author =       "M. V. Tsodyks and M. V. Feigel'man",
  title =        "The Enhanced Storage Capacity in Neural Networks with
                 Low Activity Level",
  journal =      eul,
  volume =       "6",
  pages =        "101--105",
  year =         "1988",
}

@InProceedings{Tsoi+Pearson91,
  author =       "A. C. Tsoi and R. A. Pearson",
  editor =       NIPS3ed,
  booktitle =    NIPS3,
  title =        "Comparison of three classification techniques: {CART},
                 {C4}.5, and multi-layer perceptron",
  publisher =    "Morgan Kaufmann",
  address =      "Denver, CO",
  pages =        "",
  year =         "1991",
}

@Book{TSP93,
  editor =       "A. Weigend and N. Gershenfeld",
  title =        "Time Series Prediction: Forecasting the future and
                 understanding the past",
  publisher =    "Addison-Wesley",
  year =         "1993",
}

@InProceedings{Tsuda99,
  author =       "K. Tsuda",
  booktitle =    "ICANN'99",
  title =        "Optimal Hyperplane Classifier based on Entropy Number
                 Bound",
  pages =        "419--424",
  year =         "1999",
}

@PhdThesis{Turian07thesis,
  author =       "Joseph Turian",
  title =        "Constituent Parsing by Classification",
  school =       "New York University",
  year =         "2007",
}

@Article{tzanetakis+cook:2002,
  author =       "George Tzanetakis and Perry Cook",
  title =        "Musical Genre Classification of Audio Signals",
  journal =      "IEEE Transactions on Speech and Audio Processing",
  volume =       "10",
  number =       "5",
  pages =        "293--302",
  month =        jul,
  year =         "2002",
}

@Article{Uberbacher91,
  author =       "E. C. Uberbacher and R. J. Mural",
  title =        "Locating protein-coding regions in human {DNA}
                 sequences by a multiple sensor-neural network
                 approach",
  journal =      "Proc. Natl. Acad. Sci. USA",
  volume =       "88",
  pages =        "11261--11265",
  year =         "1991",
}

@Article{Uhrig91,
  author =       "R. E. Uhrig",
  title =        "Potential Applications of Neural Networks to the
                 Operation of a Nuclear Power Plant",
  journal =      "Nuclear Safety",
  volume =       "32",
  number =       "1",
  year =         "1991",
}

@Article{Uhrig94,
  author =       "R. E. Uhrig",
  title =        "Artificial Neural Networks in Nuclear Power Plants",
  journal =      "Nuclear News",
  volume =       "37",
  number =       "9",
  pages =        "38",
  year =         "1994",
}

@Article{Utgoff-2002,
  author =       "Paul E. Utgoff and David J. Stracuzzi",
  title =        "Many-Layered Learning",
  journal =      "Neural Computation",
  volume =       "14",
  pages =        "2497--2539",
  year =         "2002",
}

@Article{Valiant84,
  author =       "L. G. Valiant",
  title =        "A Theory of the Learnable",
  journal =      "Communications of the ACM",
  volume =       "27",
  number =       "11",
  pages =        "1134--1142",
  year =         "1984",
}

@InProceedings{VandenBout88,
  author =       "D. E. Van den Bout and T. K. Miller",
  booktitle =    icnn,
  title =        "A Travelling Salesman Objective Function That Works",
  volume =       "2",
  publisher =    "IEEE, New York",
  address =      "San Diego 1988",
  pages =        "299--303",
  year =         "1988",
}

@Article{VandenBout89,
  author =       "D. E. Van den Bout and T. K. Miller",
  title =        "Improving the Performance of the Hopfield-Tank Neural
                 Network Through Normalization and Annealing",
  journal =      biocyb,
  volume =       "62",
  pages =        "129--139",
  year =         "1989",
}

@Article{VanDerMaaten08,
  author =       "Laurens {van der Maaten} and Geoffrey E. Hinton",
  title =        {Visualizing Data using t-SNE},
  journal =      jmlr,
  year =         "2008",
  keywords =     {dimension-reduction, locality, nearest-neighbors, spectral, visualization},
  month =        {November},
  pages =        {2579--2605},
  url =          {http://www.jmlr.org/papers/volume9/vandermaaten08a/vandermaaten08a.pdf},
  volume =       {9},
}

@Book{VanDerVaart+Wellner-1996,
  author =       "A. W. {van der Vaart} and J. Wellner",
  title =        "Weak Convergence and Empirical Processes with
                 applications to Statistics",
  publisher =    "Springer",
  address =      "New York",
  year =         "1996",
}

@Article{vanHemmen79,
  author =       "J. L. van Hemmen and R. G. Palmer",
  title =        "The Replica Method and a Solvable Spin Glass Model",
  journal =      jpa,
  volume =       "12",
  pages =        "563--580",
  year =         "1979",
}

@Article{vanHemmen86,
  author =       "J. L. van Hemmen and R. K{\"u}hn",
  title =        "Nonlinear Neural Networks",
  journal =      prl,
  volume =       "57",
  pages =        "913--916",
  year =         "1986",
}

@Article{vanHemmen90,
  author =       "J. L. van Hemmen and L. B. Ioffe and R. K{\"u}hn and
                 M. Vaas",
  title =        "Increasing the Efficiency of a Neural Network through
                 Unlearning",
  journal =      physicaA,
  volume =       "163",
  pages =        "386--392",
  year =         "1990",
}

% HUGO: Haven't found what A. stands for...
@Article{VapnikV63,
  author =       "Vladimir Vapnik and A. Lerner", 
  title =        "Pattern Recognition using Generalized Portrait Method",
  journal =      "Automation and Remote Control",
  volume =       "24",
  year =         "1963",
}

@Article{Vapnik71,
  author =       "V. N. Vapnik and A. Y. Chervonenkis",
  title =        "On the Uniform Convergence of Relative Frequencies of
                 Events to Their Probabilities",
  journal =      tprobapp,
  volume =       "16",
  pages =        "264--280",
  year =         "1971",
}

@Book{Vapnik82,
  author =       "V. N. Vapnik",
  title =        "Estimation of Dependences Based on Empirical Data",
  publisher =    "Springer-Verlag",
  address =      "Berlin",
  year =         "1982",
}

@Article{Vapnik93,
  author =       "V. Vapnik and L. Bottou",
  title =        "Local algorithms for pattern recognition and
                 dependencies estimation",
  journal =      nc,
  volume =       "5",
  number =       "6",
  pages =        "893--909",
  year =         "1993",
}

@Book{Vapnik95,
  author =       "V. N. Vapnik",
  title =        "The Nature of Statistical Learning Theory",
  publisher =    "Springer",
  address =      "New York",
  year =         "1995",
}

@Book{Vapnik98,
  author =       "Vladimir Vapnik",
  title =        "Statistical Learning Theory",
  publisher =    "Wiley, Lecture Notes in Economics and Mathematical
                 Systems, volume 454",
  year =         "1998",
}

@InCollection{variational99,
  author =       "M. I. Jordan and Z. Ghahramani and T. Jaakkola and L.
                 Saul",
  editor =       "M. I. Jordan",
  booktitle =    "Learning in Graphical Models",
  title =        "An introduction to variational methods in graphical
                 models",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  year =         "1999",
}

@InProceedings{Venka+PC-2004,
  author =       "Shailaja Venkatsubramanyan and Jose Perez-Carballo",
  booktitle =    "Second ACL Workshop on Multiword Expressions",
  title =        "Multiword Expression Filtering for Building Knowledge
                 Maps",
  pages =        "40--47",
  year =         "2004",
}

@InProceedings{Verbeek-2004,
  author =       "Jakob J. Verbeek and Sam T. Roweis and Nikos Vlassis",
  editor =       NIPS16ed,
  booktitle =    NIPS16,
  title =        "Non-linear {CCA} and {PCA} by Alignment of Local
                 Models",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  year =         "2004",
  keywords =     "dimensionality reduction, spectral methods, mixture
                 density, CCA, PCA",
}

@InProceedings{Veronis1990,
  author =       "Jean Veronis and Nancy Ide",
  booktitle =    "COLING'90",
  title =        "Word Sense Disambiguation with Very Large Neural
                 Networks Extracted from Machine Readable Dictionaries",
  year =         "1990",
}

@Misc{Veronis98,
  author =       "Jean Veronis",
  title =        "A study of polysemy judgements and inter-annotator
                 agreement",
  year =         "1998",
  URL =          "citeseer.nj.nec.com/veronis98study.html",
  text =         "Veronis, J., 1998. A study of polysemy judgements and
                 inter-annotator agreement. In Programme and advanced
                 papers of the Senseval workshop. Herstmonceux Castle,
                 England.",
}

@InProceedings{Vilalta+al-1997,
  author =       "Ricardo Vilalta and Gunnar Blix and Larry Rendell",
  booktitle =    ECML97,
  title =        "Global Data Analysis and the Fragmentation Problem in
                 Decision Tree Induction",
  publisher =    "Springer-Verlag",
  pages =        "312--327",
  year =         "1997",
}

@InProceedings{Vincent-Bengio-2003-short,
  author =       "Pascal Vincent and Yoshua Bengio",
  booktitle =    NIPS15,
  title =        "Manifold Parzen Windows",
  publisher =    "MIT Press",
  year =         "2003",
}

@TechReport{Vincent-TR1316-small,
  author =       "P. Vincent and H. Larochelle and Y. Bengio and P.-A.
                 Manzagol",
  title =        "Extracting and Composing Robust Features with
                 Denoising Autoencoders",
  number =       "1316",
  institution =  "Universit\'e de Montr\'eal, dept. IRO",
  year =         "2008",
}

@Article{Vincent2001,
  author =       "P. Vincent and Y. Bengio",
  title =        "Kernel Matching Pursuit",
  journal =      "Machine Learning",
  volume =       "48",
  number =       "",
  pages =        "165--187",
  year =         "2002",
}

@InProceedings{Vincent2002,
  author =       "P. Vincent and Y. Bengio",
  editor =       NIPS14ed,
  booktitle =    NIPS14,
  title =        "{K}-Local Hyperplane and Convex Distance Nearest
                 Neighbor Algorithms",
  publisher =    "{MIT} Press",
  address =      "Cambridge, MA",
  pages =        "985--992",
  year =         "2002",
}

@InProceedings{VincentPLarochelleH2008-small,
  author =       "Pascal Vincent and Hugo Larochelle and Yoshua Bengio
                 and Pierre-Antoine Manzagol",
  booktitle =    "ICML 2008",
  title =        "Extracting and Composing Robust Features with
                 Denoising Autoencoders",
  year =         "2008",
}

@InProceedings{VincentPLarochelleH2008-short,
  author =       "Pascal Vincent and Hugo Larochelle and Yoshua Bengio
                 and Pierre-Antoine Manzagol",
  booktitle =    "Int. Conf. Mach. Learn.",
  title =        "Extracting and Composing Robust Features with
                 Denoising Autoencoders",
  year =         "2008",
  pages =        "1096--1103"
}


@InProceedings{vincent:icml08,
   author =     "Pascal Vincent and Hugo Larochelle and Yoshua Bengio and {Pierre-Antoine Manzagol}",
   title =      "Extracting and composing robust features with denoising autoencoders",
   booktitle =  "Proceedings of the 25th Annual International Conference on Machine Learning (ICML 2008)",
   location =   "Helsinki, Finland",
   editor =     "Andrew McCallum and Sam Roweis",
   publisher =  "Omnipress",
   year =       "2008",
   pages =      "1096--1103",
}
   %url =        "http://icml2008.cs.helsinki.fi/papers/592.pdf",

@InProceedings{VincentPLarochelleH2008-very-small,
  author =       "P. Vincent and H. Larochelle and Y. Bengio and P.-A.
                 Manzagol",
  booktitle =    "ICML 2008",
  title =        "Extracting and Composing Robust Features with
                 Denoising Autoencoders",
  year =         "2008",
}

@Article{Viterbi67,
  author =       "A. Viterbi",
  title =        "Error bounds for convolutional codes and an
                 asymptotically optimum decoding algorithm",
  journal =      ieeeit,
  pages =        "260--269",
  year =         "1967",
}

@InProceedings{Vlachos-2002,
  author =       "Michail Vlachos and Carlotta Domeniconi and Dimitrios
                 Gunopulos and George Kollios and Nick Koudas",
  booktitle =    "Proc. of 8th SIGKDD",
  title =        "Non-Linear Dimensionality Reduction Techniques for
                 Classification and Visualization",
  address =      "Edmonton, Canada",
  year =         "2002",
  URL =          "citeseer.ist.psu.edu/573153.html",
}

@Article{vogl-88,
  author =       "T. Vogl and J. Mangis and J. Rigler and W. Zink and D.
                 Alkon",
  title =        "accelerating convergence of the back-propagation
                 method",
  journal =      "Biological Cybernetics",
  volume =       "59",
  pages =        "257--263",
  year =         "1988",
}

@Article{Vogl88,
  author =       "T. P. Vogl and J. K. Mangis and A. K. Rigler and W. T.
                 Zink and D. L. Alkon",
  title =        "Accelerating the Convergence of the Back-Propagation
                 Method",
  journal =      biocyb,
  volume =       "59",
  pages =        "257--263",
  year =         "1988",
}

@Book{Volterra,
  author =       "V. Volterra",
  title =        "Theory of Functionals and of Integrals and
                 Integro-Differential Equations",
  publisher =    "Dover",
  address =      "New York",
  year =         "1959",
}

@Article{vonderMalsburg73,
  author =       "Ch. von der Malsburg",
  title =        "Self-Organization of Orientation Sensitive Cells in
                 the Striate Cortex",
  journal =      kyb,
  volume =       "14",
  year =         "1973",
}

@Article{vonderMalsburg82,
  author =       "Ch. von der Malsburg and J. D. Cowan",
  title =        "Outline of a Theory for the Ontogenesis of
                 Iso-Orientation Domains in Visual Cortex",
  journal =      biocyb,
  volume =       "45",
  pages =        "49--56",
  year =         "1982",
}

@InProceedings{vonLehman88,
  author =       "A. von Lehman and E. G. Paek and P. F. Liao and A.
                 Marrakchi and J. S. Patel",
  booktitle =    icnn,
  title =        "Factors Influencing Learning by Back-Propagation",
  volume =       "1",
  publisher =    "IEEE, New York",
  address =      "San Diego 1988",
  pages =        "335--341",
  year =         "1988",
}

@Article{vonLuxurg07,
  author =       "U. von Luxburg",
  title =        "A Tutorial on Spectral Clustering.",
  journal =      "Statistics and Computing",
  volume = 	 "17(4)",
  pages =        "395-416",
  year =         "2007",
}

@InCollection{vonNeumann56,
  author =       "J. von Neumann",
  editor =       "C. E. Shannon and J. McCarthy",
  booktitle =    "Automata Studies",
  title =        "Probabilistic Logics and the Synthesis of Reliable
                 Organisms from Unreliable Components",
  publisher =    "Princeton University Press",
  address =      "Princeton",
  pages =        "43--98",
  year =         "1956",
}

@Article{Wagner87,
  author =       "K. Wagner and D. Psaltis",
  title =        "Multilayer Optical Learning Networks",
  journal =      applopt,
  volume =       "26",
  pages =        "5061--5076",
  year =         "1987",
}

@InCollection{Wahba82,
  author =       "G. Wahba",
  editor =       "Gupta and Berger",
  booktitle =    "Statistical Decision Theory and Related Topics III",
  title =        "Constrained regularization for ill-posed linear
                 operator equations, with applications in meteorology
                 and medecine",
  publisher =    "Academic Press",
  year =         "1982",
}

@InProceedings{Wahba90,
  author =       "G. Wahba",
  booktitle =    "CBMS-NSF Regional Conference Series in Applied
                 Mathematics",
  title =        "Spline models for observational data",
  volume =       "59",
  publisher =    "Society for Industrial and Applied Mathematics
                 (SIAM)",
  address =      "Philadelphia, PA",
  year =         "1990",
}

@Article{Waibel89a,
  author =       "A. Waibel",
  title =        "Modular Construction of Time-Delay Neural Networks for
                 Speech Recognition",
  journal =      nc,
  volume =       "1",
  pages =        "39--46",
  year =         "1989",
}

@Article{Waibel89b,
  author =       "A. Waibel and T. Hanazawa and G. E. Hinton and K.
                 Shikano and K. Lang",
  title =        "Phoneme Recognition Using Time-Delay Neural Networks",
  journal =      ieeetassp,
  volume =       "37",
  pages =        "328--339",
  year =         "1989",
}

@Article{Waibel89c,
  author =       "A. Waibel and H Sawai and K. Shikano",
  title =        "Modularity and Scaling in Large Phonemic Neural
                 Networks",
  journal =      ieeetassp,
  volume =       "37",
  pages =        "1888--1898",
  year =         "1989",
}

@Article{Wallace+Boulton-1968,
  author =       "C. S. Wallace and D. M. Boulton",
  title =        "An information measure for classification",
  journal =      "Computer Journal",
  volume =       "11",
  number =       "2",
  pages =        "185--194",
  year =         "1968",
}

@InCollection{Wan93,
  author =       "Wan E. A.",
  editor =       "A. S. Weigend and N. A. Gershenfeld",
  booktitle =    "Time Series Prediction: Forecasting the Future and
                 Understanding the Past",
  title =        "Time series prediction by using a connectionist
                 network with internal delay lines",
  publisher =    "Addison-Wesley",
  pages =        "195--217",
  year =         "1993",
}

@InCollection{Wan93a,
  author =       "E. A. Wan",
  editor =       "A. Weigend and N. Gershenfeld",
  booktitle =    "Predicting the future and understanding the past",
  title =        "Time Series Prediction by Using a Connectionist
                 Network with Internal Delay Lines",
  publisher =    "Addison-Wesley",
  address =      "Redwood City, CA",
  pages =        "175--193",
  year =         "1993",
}

@InProceedings{Wang-ijcnn91,
  author =       "S. D. Wang and C. H. Hsu",
  booktitle =    ijcnn,
  title =        "Terminal Attractor Learning Algorithms for
                 Backpropagation Neural Networks",
  publisher =    "IEEE Press",
  address =      "Singapore",
  pages =        "183--189",
  month =        nov,
  year =         "1991",
}

@INPROCEEDINGS{WangC1994,
    author = {Changfeng Wang and Santosh S. Venkatesh and J. Stephen Judd},
    title = {Optimal stopping and effective machine complexity in learning},
    editor = NIPS6ed,
    booktitle = NIPS6,
    year = {1994},
    pages = {303--310},
    publisher = {Morgan Kaufmann}
}

@inproceedings{wangetal08,
author = "Wang, Q. and Lin, D. and Schuurmans, D.",
title = "Semi-supervised convex training for dependency parsing",
booktitle = "Proceedings of the Forty-sixth Annual Conference of the 
Association for Computational Linguistics: Human Language Technologies (ACL)",
year = 2008,
note = "Acceptance rate 25\%; Wang a trainee"
}

@inproceedings{wangetal07,
author = "Wang, T. and Lizotte, D. and Bowling, M. and Schuurmans, D.",
title = "Stable dual dynamic programming",
editor =       NIPS20ed,
booktitle =    NIPS20,
year = 2007,
note = "Acceptance rate 22\%; Wang and Lizotte trainees"
}


@Misc{Wang02,
  author =       "L. Wang and K. Luk Chan",
  howpublished =    "6th kernel machines workshop, in conjunction with Neural Information Processing Systems (NIPS)",
  title =        "Learning Kernel Parameters by using Class Separability
                 Measure",
  year =         "2002",
  url =          "http://users.rsise.anu.edu.au/~wanglei/#Publication",
}

@Article{Wang89,
  author =       "H. Wang and J. Wu and P. Tang",
  title =        "Superfamily expands",
  journal =      "Nature",
  volume =       "337",
  pages =        "514",
  year =         "1989",
}

@InProceedings{WangHarper2002,
  author =       "Wen Wang and Mary P. Harper",
  booktitle =    "EMNLP '02: Proceedings of the ACL-02 conference on
                 Empirical methods in natural language processing",
  title =        "The Super{ARV} language model: investigating the
                 effectiveness of tightly integrating multiple knowledge
                 sources",
  publisher =    "Association for Computational Linguistics",
  address =      "Morristown, NJ, USA",
  pages =        "238--247",
  year =         "2002",
}

@Article{Warmuth95,
  author =       "Sally Floyd and Manfred Warmuth",
  title =        "Sample Compression, Learnability, and the
                 Vapnik-Chervonenkis Dimension",
  journal =      "Machine Learning",
  volume =       "21",
  number =       "3",
  pages =        "269--304",
  year =         "1995",
}

@Book{Wasserman-2004,
  author =       "Larry Wasserman",
  title =        "All of Statistics - A Concise Course in Statistical Inference",
  publisher =    "Springer",
  year =         "2004",
}

@PhdThesis{Watkins-PhD,
  author =       "C. J. C. H. Watkins",
  title =        "Learning from Delayed Rewards",
  school =       "Cambridge University",
  address =      "Cambridge, England",
  year =         "1989",
}

@InProceedings{Watrous87,
  author =       "R. L. Watrous",
  editor =       "M. Caudill and C. Butler",
  booktitle =    icnn,
  title =        "Learning Algorithms for Connectionist Networks:
                 Applied Gradient Methods of Nonlinear Optimization",
  volume =       "2",
  publisher =    "IEEE, New York",
  address =      "San Diego 1987",
  pages =        "619--627",
  year =         "1987",
}

@TechReport{Watrous89,
  author =       "R. L. Watrous",
  title =        "Context-modulated discrimination of similar vowels
                 using second-order connectionist networks",
  number =       "{CRG-TR}-89-5",
  institution =  "University of Toronto",
  year =         "1989",
}

@Article{Watrous-nc92,
  author =       "R. L. Watrous and G. M. Kuhn",
  title =        "Induction of Finite-State Languages Using Second-Order
                 Recurrent Networks",
  journal =      nc,
  volume =       "4",
  number =       "3",
  pages =        "406--414",
  year =         "1992",
}

@Article{Watson64,
  author =       "G. S. Watson",
  title =        "Smooth regression analysis",
  journal =      "Sankhya - The Indian Journal of Statistics",
  volume =       "26",
  pages =        "359--372",
  year =         "1964",
}

@inproceedings{Weber-2000,
 author = {Markus Weber and Max Welling and Pietro Perona},
 title = {Unsupervised Learning of Models for Recognition},
 booktitle = {Proc. 6th Europ. Conf. Comp. Vis., ECCV2000}, 
 address = {Dublin},
 year = 2000,
 pages     = {18-32},
 url       = {http://link.springer.de/link/service/series/0558/bibs/1842/18420018.htm},
}

@Book{Webster88,
  editor =       "Webster",
  title =        "Webster's Ninth New Collegiate Dictionary",
  publisher =    "Merriam-Webster",
  address =      "Springfield",
  year =         "1988",
}

@Book{Wegener87,
  author =       "Ingo Wegener",
  title =        "The Complexity of Boolean Functions",
  publisher =    "John Wiley \& Sons",
  year =         "1987",
}

@InCollection{Weigend93,
  author =       "N. A. Gershenfeld and A. S. Weigend",
  editor =       "A. Weigend and N. Gershenfeld",
  booktitle =    "Predicting the future and understanding the past",
  title =        "The Future of Time Series: Learning and
                 Understanding",
  publisher =    "Addison-Wesley",
  address =      "Redwood City, CA",
  pages =        "1--70",
  year =         "1993",
}

@Article{Weigend95,
  author =       "A. S. Weigend and A. N. Srivastava",
  title =        "Predicting Conditional Probability Distributions: {A}
                 Connectionist Approach",
  journal =      "International Journal of Neural Systems",
  volume =       "6",
  year =         "1995",
}

@InProceedings{Weinberger+Saul-06,
  author =       "K. Q. Weinberger and L. K. Saul",
  booktitle =    "Proceedings of the National Conference on Artificial
                 Intelligence (AAAI)",
  title =        "An Introduction to Nonlinear Dimensionality Reduction
                 by Maximum Variance Unfolding",
  address =      "Boston, MA",
  year =         "2006",
}

@InProceedings{weinberger-learningkernel-04,
  author =       "Kilian Q. Weinberger and Fei Sha and Lawrence K. Saul",
  booktitle =    ICML04,
  editor =       ICML04ed,
  publisher =    ICML04publ,
  title =        "Learning a kernel matrix for nonlinear dimensionality
                 reduction",
  address =      "Banff, Canada",
  pages =        "839--846",
  year =         "2004",
}

@InProceedings{Weinberger04a,
  author =       "K. Q. Weinberger and L. K. Saul",
  booktitle =    cvpr04,
  title =        "Unsupervised Learning of Image Manifolds by
                 Semidefinite Programming",
  volume =       "2",
  address =      "Washington D.C.",
  pages =        "988--995",
  year =         "2004",
}

@Article{weinberger95,
  author =       "M. J. Weinberger and J. Rissanen and M. Feder",
  title =        "A universal finite memory source",
  journal =      "IEEE Transactions on Information Theory",
  pages =        "656--664",
  year =         "1983",
}

@InCollection{WeinbergerK2006,
  author =       "Kilian Q. Weinberger and John Blitzer and Lawrence K. Saul",
  editor =       NIPS18ed,
  booktitle =    NIPS18,
  title =        "Distance Metric Learning for Large Margin Nearest
                 Neighbor Classification",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  pages =        "1473--1480",
  year =         "2006",
}

@conference{WeinbergerK2007,
  author = {Kilian Q. Weinberger and Gerald Tesauro},
  title = {Metric Learning for Kernel Regression},
  booktitle = {Proc. of the 11 thInternational Conference on Artificial Intelligence and Statistics},
  year = {2007},
}
  %url = {http://www.stat.umn.edu/~aistat/proceedings/data/papers/077.pdf}

@Article{Weingartner,
  author =       "H. M. Weingartner and D. N. Ness",
  title =        "Methods for the Solution of the Multi-Dimensional 0/1
                 Knapsack Problem",
  journal =      "Operations Research",
  volume =       "15",
  pages =        "83--103",
  year =         "1967",
}

@Article{Weisbuch85,
  author =       "G. Weisbuch and F. Fogelman-Souli\'e",
  title =        "Scaling Laws for the Attractors of Hopfield Networks",
  journal =      jppl,
  volume =       "46",
  pages =        "623--630",
  year =         "1985",
}

@InProceedings{Weiss-99,
  author =       "Yair Weiss",
  booktitle =    ICCV99,
  title =        "Segmentation using eigenvectors: a unifying view",
  pages =        "975--982",
  year =         "1999",
}

@Article{Weiss2000,
  author =       "Yair Weiss",
  title =        "Correctness of local probability propagation in
                 graphical models with loops",
  journal =      "Neural Computation",
  volume =       "12",
  pages =        "1--41",
  year =         "2000",
}

@Book{Weiss90,
  author =       "S. M. Weiss and C. A. Kulikowski",
  title =        "Computer Systems That Learn",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo, CA",
  year =         "1990",
}

@InProceedings{Welling05,
  author =       "Max Welling and Michal Rosen-Zvi and Geoffrey E. Hinton",
  editor =       NIPS17ed,
  booktitle =    NIPS17,
  title =        "Exponential Family Harmoniums with an Application to
                 Information Retrieval",
  volume =       "17",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  year =         "2005",
}

@InProceedings{Welling05-small,
  author =       "M. Welling and M. Rosen-Zvi and G. E. Hinton",
  booktitle =    "NIPS 17",
  title =        "Exponential Family Harmoniums with an Application to
                 Information Retrieval",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  year =         "2005",
}

@InProceedings{Welling2003,
  author =       "Max Welling and Richard Zemel and Geoffrey E. Hinton",
  editor =       NIPS15ed,
  booktitle =    NIPS15,
  title =        "Self-Supervised Boosting",
  publisher =    "{MIT} Press",
  pages =        "665--672",
  year =         "2003",
}

@InProceedings{WellingM2002,
  author =       "Max Welling and Geoffrey E. Hinton",
  booktitle =    "ICANN '02: Proceedings of the International Conference
                 on Artificial Neural Networks",
  title =        "A New Learning Algorithm for Mean Field {Boltzmann}
                 Machines",
  publisher =    "Springer-Verlag",
  address =      "London, UK",
  pages =        "351--357",
  year =         "2002",
  ISBN =         "3-540-44074-7",
}

@InProceedings{WellingNIPS17,
  author =       "Max Welling and Michal Rosen-Zvi and Geoffrey E. Hinton",
  editor =       NIPS17ed,
  booktitle =    NIPS17,
  title =        "Exponential Family Harmoniums with an Application to
                 Information Retrieval",
  publisher =    "{MIT} Press",
  address =      {Cambridge, MA},
  pages =        "1481--1488",
  year =         "2005",
}

@InProceedings{WellingNIPS17-small,
  author =       "M. Welling and M. Rosen-Zvi and G. E. Hinton",
  booktitle =    "NIPS 17",
  title =        "Exponential Family Harmoniums with an Application to
                 Information Retrieval",
  publisher =    "{MIT} Press",
  year =         "2005",
}


@InProceedings{WellingUAI2009,
  author =       "Max Welling",
  booktitle =    UAI09,
  title =        "Herding Dynamic Weights for Partially Observed Random Field Models",
  publisher =    "Morgan Kaufmann",
  year =         "2009",
}

@InProceedings{WellingICML2009,
  author =       "Max Welling",
  booktitle =    ICML09,
  editor =       ICML09ed,
  publisher =    ICML09publ,
  title =        {Herding Dynamic Weights to Learn},
  year =         "2009",
}

@InProceedings{Werbos-icnn88,
  author =       "P. J. Werbos",
  booktitle =    icnn,
  title =        "Back-Propagation: Past and Future",
  publisher =    "IEEE Press",
  address =      "New York, NY",
  year =         "1988",
  OPTpages =     "343--353",
}

@PhdThesis{Werbos74,
  author =       "P. Werbos",
  title =        "Beyond Regression: New Tools for Prediction and
                 Analysis in the Behavioral Sciences",
  school =       "Harvard University",
  year =         "1974",
}

@Article{Werbos87,
  author =       "P. J. Werbos",
  title =        "Building and Understanding Adaptive Systems: {A}
                 Statistical/Numerical Approach to Factory Automation
                 and Brain Research",
  journal =      ieeesmc,
  volume =       "17",
  pages =        "7--20",
  year =         "1987",
}

@Article{Werbos88,
  author =       "P. J. Werbos",
  title =        "Generalization of Backpropagation with Application to
                 a Recurrent Gas Market Model",
  journal =      nn,
  volume =       "1",
  pages =        "339--356",
  year =         "1988",
}

@InProceedings{wermuth+cox92,
  author =       "N. Wermuth and D. R. Cox",
  booktitle =    "Proceedings of the 10th Symposium on Computational
                 Statistics",
  title =        "Graphical models for dependencies and associations",
  volume =       "1",
  address =      "Physica, Heidelberg",
  pages =        "235--249",
  year =         "1992",
}

@Article{wermuth+lauritzen90,
  author =       "N. Wermuth and S. L. Lauritzen",
  title =        "On substantive research hypotheses, conditional
                 independence graphs and graphical chain models",
  journal =      "J. Roy. Statist. Soc. Ser. B",
  volume =       "52",
  pages =        "21--72",
  year =         "1990",
}

@Article{Wessels-trnn92,
  author =       "L. F. A. Wessels and E. Barnad",
  title =        "Avoiding False Local Minima by Proper Initialization
                 of Connections",
  journal =      ieeetrnn,
  volume =       "3",
  number =       "6",
  pages =        "899--905",
  year =         "1992",
}

@Article{weston03zeronorm,
  author =       "Jason Weston and Andr\'e Elisseeff and Bernhard
                 Sch{\"o}lkopf and Mike Tipping",
  title =        "Use of the zero norm with linear models and kernel
                 methods",
  journal =      jmlr,
  volume =       "3",
  publisher =    "MIT Press",
  pages =        "1439--1461",
  year =         "2003",
  ISSN =         "1533-7928",
}

@InProceedings{weston99density,
  author =       "J. Weston and A. Gammerman and M. Stitson and V.
                 Vapnik and V. Vovk and C. Watkins",
  editor =       "B. {Sch\"olkopf} and C. J. C. Burges and A. J. Smola",
  booktitle =    "Advances in Kernel Methods --- Support Vector
                 Learning",
  title =        "Density estimation using support vector machines",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  pages =        "293--306",
  year =         "1999",
}

@InProceedings{WestonJ2008,
  author =       "Jason Weston and {Fr\'ed\'eric} Ratle and Ronan
                 Collobert",
  booktitle =    ICML08,
  editor =       ICML08ed,
  publisher =    ICML08publ,
  title =        "Deep Learning via Semi-Supervised Embedding",
  year =         "2008",
  isbn =         {978-1-60558-205-4},
  pages =        {1168--1175},
  location =     {Helsinki, Finland},
  doi =          {http://doi.acm.org/10.1145/1390156.1390303},
  address =      {New York, NY, USA},
}
  %url =          "http://www.kyb.tuebingen.mpg.de/bs/people/weston/papers/deep-embed.pdf",

@InProceedings{WestonJ2008-small,
  author =       "J. Weston and F. Ratle and R. Collobert",
  booktitle =    "ICML 2008",
  title =        "Deep Learning via Semi-Supervised Embedding",
  year =         "2008",
}

@InProceedings{WestonJ2008-short,
  author =       "J. Weston and F. Ratle and R. Collobert",
  booktitle =    "Int. Conf. Mach. Learn. 2008",
  title =        "Deep Learning via Semi-Supervised Embedding",
  year =         "2008",
  pages = {1168--1175},
}

@InProceedings{MobahiCollobertWestonICML2009,
  author =    {Hossein Mobahi and Ronan Collobert and Jason Weston},
  title =     {Deep Learning from Temporal Coherence in Video},
  booktitle = {Proceedings of the 26th International Conference on Machine Learning},
  pages =     {737--744},
  year =      2009,
  editor =    {L\'{e}on Bottou and Michael Littman},
  address =   {Montreal},
  month =     {June},
  publisher = {Omnipress}
}

@Article{White89,
  author =       "H. White",
  title =        "Learning in Artificial Neural Networks: {A}
                 Statistical Perspective",
  journal =      "Neural Computation",
  volume =       "1",
  type =         "Review",
  number =       "4",
  pages =        "425--464",
  year =         "1989",
}

@Article{White90,
  author =       "H. White",
  title =        "Connectionist nonparametric regression: {Multilayer}
                 feedforward networks can learn arbitrary mappings",
  journal =      "Neural Networks",
  volume =       "3",
  number =       "5",
  publisher =    "Pergamon Press Ltd., Inc.",
  pages =        "535--549",
  year =         "1990",
}

@InProceedings{White91,
  author =       "H. White",
  booktitle =    "?",
  title =        "An overview of representation and convergence results
                 for multilayer feedforward networks",
  pages =        "",
  year =         "1991",
}

@InProceedings{Whitley89,
  author =       "D. Whitley and T. Hanson",
  editor =       "J. D. Schaffer",
  booktitle =    "Proceedings of the Third International Conference on
                 Genetic Algorithms",
  title =        "Optimizing Neural Networks Using Faster, More Accurate
                 Genetic Search",
  publisher =    "Morgan Kaufmann, San Mateo",
  address =      "Arlington 1989",
  pages =        "391--396",
  year =         "1989",
}

@Book{whittaker90,
  author =       "J. Whittaker",
  title =        "Graphical Models in Applied Multivariate Statistics",
  publisher =    "Wiley, Chichester",
  year =         "1990",
}

@InCollection{Widrow60,
  author =       "B. Widrow and M. E. Hoff",
  booktitle =    "1960 IRE WESCON Convention Record",
  title =        "Adaptive Switching Circuits",
  volume =       "4",
  publisher =    "IRE",
  address =      "New York",
  pages =        "96--104",
  year =         "1960",
}

@InProceedings{Widrow62,
  author =       "B. Widrow",
  editor =       "M. C. Yovits and G. T. Jacobi and G. D. Goldstein",
  booktitle =    "Self-Organizing Systems 1962",
  title =        "Generalization and Information Storage in Networks of
                 Adaline ``Neurons''",
  publisher =    "Spartan, Washington",
  address =      "Chicago 1962",
  pages =        "435--461",
  year =         "1962",
}

@Article{Widrow73,
  author =       "B. Widrow and N. K. Gupta and S. Maitra",
  title =        "Punish/Reward: Learning with a Critic in Adaptive
                 Threshold Systems",
  journal =      ieeesmc,
  volume =       "3",
  pages =        "455--465",
  year =         "1973",
}

@Book{Wiener48,
  author =       "N. Wiener",
  title =        "Cybernetics, or Control and Communication in the
                 Animal and the Machine",
  publisher =    "Wiley",
  address =      "New York",
  year =         "1948",
}

@Book{Wiener49,
  author =       "N. Wiener",
  title =        "The Extrapolation, Interpolation and Smoothing of
                 Stationary Time Series with Engineering Applications",
  publisher =    "Wiley",
  address =      "New York",
  year =         "1949",
}

@Article{Wilbur+Lipman83,
  author =       "W. J. Wilbur and D. J. Lipman",
  title =        "Rapid similarity searches of nucleic acids and protein
                 data banks",
  journal =      "Proc. Natl. Acad. Sci. USA",
  volume =       "80",
  pages =        "726--730",
  year =         "1983",
}

@TechReport{Wilks1996,
  author =       "Yorick Wilks and Mark Stevenson",
  title =        "The grammar of sense: Is word sense tagging much more
                 than part-of-speech tagging?",
  institution =  "University of Sheffield",
  year =         "1996",
}

@Article{Williams+Barclay88,
  author =       "A. F. Williams and A. N. Barclay",
  title =        "The immunoglobulin superfamily domains for cell
                 surface recognition",
  journal =      "Annual Review of Immunology",
  volume =       "6",
  pages =        "381--405",
  year =         "1988",
}

@InProceedings{Williams+Rasmussen-nips8,
  author =       "C. K. I. Williams and C. E. Rasmussen",
  editor =       NIPS8ed,
  booktitle =    NIPS8,
  title =        "Gaussian Processes for Regression",
  publisher =    "MIT Press, Cambridge, MA",
  pages =        "514--520",
  year =         "1996",
}

@InProceedings{Williams+Seeger-2000,
  author =       "C. K. I. Williams and M. Seeger",
  booktitle =    "Proceedings of the Seventeenth International
                 Conference on Machine Learning",
  title =        "The Effect of the Input Density Distribution on
                 Kernel-based Classifiers",
  publisher =    "Morgan Kaufmann",
  year =         "2000",
}

@InProceedings{Williams+Seeger-2001,
  author =       "Christopher K. I. Williams and Matthias Seeger",
  editor =       NIPS13ed,
  booktitle =    NIPS13,
  title =        "Using the {Nystr{\"o}m} Method to Speed Up Kernel
                 Machines",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  pages =        "682--688",
  year =         "2001",
}

@InProceedings{Williams2001,
  author =       "C. K. I. Williams",
  editor =       NIPS13ed,
  booktitle =    NIPS13,
  title =        "On a Connection between Kernel {PCA} and Metric
                 Multidimensional Scaling",
  publisher =    "{MIT} Press",
  pages =        "675--681",
  year =         "2001",
}

@InProceedings{Williams87,
  author =       "R. J. Williams",
  editor =       "M. Caudill and C. Butler",
  booktitle =    icnn,
  title =        "A Class of Gradient-Estimating Algorithms for
                 Reinforcement Learning in Neural Networks",
  volume =       "2",
  publisher =    "IEEE, New York",
  address =      "San Diego 1987",
  pages =        "601--608",
  year =         "1987",
}

@InProceedings{Williams88a,
  author =       "R. J. Williams",
  booktitle =    icnn,
  title =        "On the Use of Back-Propagation in Associative
                 Reinforcement Learning",
  volume =       "1",
  publisher =    "IEEE, New York",
  address =      "San Diego 1988",
  pages =        "263--270",
  year =         "1988",
}

@TechReport{Williams88b,
  author =       "R. J. Williams",
  title =        "Towards a Theory of Reinforcement-Learning
                 Connectionist Systems",
  number =       "NU--CCS--88--3",
  institution =  "College of Computer Science, Northeastern University",
  address =      "Boston, MA",
  year =         "1988",
}

@InProceedings{Williams89a,
  author =       "R. J. Williams and J. Peng",
  booktitle =    ijcnn,
  title =        "Reinforcement Learning Algorithms As Function
                 Optimizers",
  volume =       "2",
  publisher =    "IEEE, New York",
  address =      "Washington 1989",
  pages =        "89--95",
  year =         "1989",
}

@Article{Williams89b,
  author =       "R. J. Williams and D. Zipser",
  title =        "A Learning Algorithm for Continually Running Fully
                 Recurrent Neural Networks",
  journal =      nc,
  volume =       "1",
  pages =        "270--280",
  year =         "1989",
}

@Article{Williams89c,
  author =       "R. J. Williams and D. Zipser",
  title =        "Experimental Analysis of the Real-Time Recurrent
                 Learning Algorithm",
  journal =      connsci,
  volume =       "1",
  pages =        "87--111",
  year =         "1989",
}

@InProceedings{Williams93,
  author =       "William Evans and Sridhar Rajagopalan and Umesh
                 Vazirani",
  booktitle =    "Proceedings of the 6th Annual Conference on
                 Computational Learning Theory",
  title =        "Choosing a Reliable Hypothesis",
  publisher =    "ACM Press",
  address =      "Santa Cruz, CA, USA",
  pages =        "269--276",
  month =        jul,
  year =         "1993",
  ISBN =         "0-89791-611-5",
}

@InProceedings{williams95gaussian,
  author =       "Christopher K. I. Williams and Carl Edward Rasmussen",
  editor =       NIPS8ed,
  booktitle =    NIPS8,
  title =        "{Gaussian} Processes for Regression",
  volume =       "8",
  publisher =    "{MIT} Press",
  year =         "1995",
  ISBN =         "0-262-20107-0",
}

@InProceedings{Williams96-nips,
  author =       "C. K. I. Williams",
  editor =       NIPS9ed,
  booktitle =    NIPS9,
  title =        "Computing with infinite networks",
  publisher =    "MIT Press",
  year =         "1997",
}

@InProceedings{WilliamsC1990,
  author = 	 {Christopher K. I. Williams and Geoffrey E. Hinton},
  title = 	 {Mean field networks that learn to discriminate temporally distorted strings},
  booktitle = {Connectionist Models: Proceedings of the 1990 Connectionist Summer School},
  year = 	 {1990},
  address = 	 {San Mateo, CA},
}

@Article{Willshaw69,
  author =       "D. J. Willshaw and O. P. Buneman and H. C.
                 Longuet-Higgins",
  title =        "Non-Holographic Associative Memory",
  journal =      nature,
  volume =       "222",
  year =         "1969",
}

@Article{Willshaw76,
  author =       "D. J. Willshaw and C. von der Malsburg",
  title =        "How Patterned Neural Connections Can Be Set Up by
                 Self-Organization",
  journal =      PRSLB,
  volume =       "194",
  pages =        "431--445",
  year =         "1976",
}

@Article{Wilson-2003,
  author =       "D. Randall Wilson and Tony R. Martinez",
  title =        "The general inefficiency of batch training for
                 gradient descent learning",
  journal =      "Neural Networks",
  volume =       "16",
  number =       "10",
  publisher =    "Elsevier Science Ltd.",
  address =      "Oxford, UK",
  pages =        "1429--1451",
  year =         "2003",
  ISSN =         "0893-6080",
}

@InProceedings{Wilson2007,
  author =       "D. Keith Wilson",
  booktitle =    "Proceedings of NOISE-CON 2007",
  title =        "Weather effects and outdoor noise exposure: Where,
                 when, and how often to measure?",
  address =      "Reno, Nevada",
  year =         "2007",
}

@Article{Wilson73,
  author =       "H. R. Wilson and J. D. Cowan",
  title =        "A Mathematical Theory of the Functional Dynamics of
                 Cortical and Thalamic Nervous Tissue",
  journal =      kyb,
  volume =       "13",
  pages =        "55--80",
  year =         "1973",
}

@Article{Wilson88,
  author =       "G. V. Wilson and G. S. Pawley",
  title =        "On the Stability of the Travelling Salesman Problem
                 Algorithm of Hopfield and Tank",
  journal =      biocyb,
  volume =       "58",
  pages =        "63--70",
  year =         "1988",
}

@InProceedings{wilson97instance,
  author =       "D. Randall Wilson and Tony R. Martinez",
  booktitle =    "Proc. 14th International Conference on Machine
                 Learning",
  title =        "Instance pruning techniques",
  publisher =    "Morgan Kaufmann",
  pages =        "403--411",
  year =         "1997",
  URL =          "citeseer.nj.nec.com/wilson97instance.html",
}

@Book{Winograd63,
  author =       "S. Winograd and J. D. Cowan",
  title =        "Reliable Computation in the Presence of Noise",
  publisher =    "MIT Press",
  address =      "Cambridge",
  year =         "1963",
}

@Article{Winters89,
  author =       "J. H. Winters and C. Rose",
  title =        "Minimum Distance Automata in Parallel Networks for
                 Optimum Classification",
  journal =      nn,
  volume =       "2",
  pages =        "127--132",
  year =         "1989",
}

@Article{WisSej2002,
  author =       "L. Wiskott and T. J. Sejnowski",
  title =        "Slow Feature Analysis: Unsupervised Learning of
                 Invariances",
  journal =      "Neural Computation",
  volume =       "14",
  number =       "4",
  pages =        "715--770",
  year =         "2002",
  uralbstract =  "{http://itb.biologie.hu-berlin.de/~wiskott/Abstracts/WisSej2002.html}",
  urlpaper =     "{http://itb.biologie.hu-berlin.de/~wiskott/Publications/WisSej2002-LearningInvariances-NC.ps.gz}",
}

@TechReport{Witbrock+Zagha-1989,
  author =       "Michael Witbrock and Marco Zagha",
  title =        "An Implementation of Back-Propagation Learning on
                 {GF11}, a Large {SIMD} Parallel Computer",
  number =       "CMU-CS-89-208",
  institution =  "Carnegie Mellon University",
  year =         "1989",
}

@Book{Wittgenstein58,
  author =       "L. Wittgenstein",
  title =        "Philosophical Investigations",
  publisher =    "Blackwell",
  address =      "Oxford",
  year =         "1958",
}

@InProceedings{Wittner88,
  author =       "B. S. Wittner and J. S. Denker",
  editor =       nips87ed,
  booktitle =    nips87,
  title =        "Strategies for Teaching Layered Networks
                 Classification Tasks",
  publisher =    "American Institute of Physics, New York",
  address =      "Denver, CO",
  pages =        "850--859",
  year =         "1988",
}

@Book{WL90,
  author =       "A. Waibel and K. F. Lee",
  title =        "Readings in Speech Recognition",
  publisher =    "Morgan Kaufmann",
  year =         "1990",
}

@Article{Wolpert-1996,
  author =       "D. H. Wolpert",
  title =        "The lack of a priori distinction between learning
                 algorithms",
  journal =      "Neural Computation",
  volume =       "8",
  number =       "7",
  pages =        "1341--1390",
  year =         "1996",
}

@Article{Wolpert92,
  author =       "D. H. Wolpert",
  title =        "Stacked Generalization",
  journal =      "Neural Networks",
  volume =       "5",
  pages =        "241--249",
  year =         "1992",
}

@TechReport{wolpert95,
  author =       "D. Wolpert and W. Macready",
  title =        "No free lunch theorems for search",
  number =       "SFI-TR-95-02-010",
  institution =  "The Santa Fe Institute",
  year =         "1995",
}

@article{wolpert96no,
  author =       "D. Wolpert and W. MacReady",
  title =        "No free lunch theorems for optimization",
  year =         "1997",
  journal =      "IEEE Transactions on Evolutionary Computation",
  volume =       1,
  pages =       {67--82},
}

@Book{wordnet-book98,
  author =       "Christiane Fellbaum",
  title =        "{WordNet}: An Electronic Lexical Database",
  publisher =    "MIT Press",
  year =         "1998",
}

@TechReport{wrong-delve-citation,
  author =       "G. Hinton and R. Neal and R. Tibshirani",
  title =        "Assessing learning procedures using {DELVE}",
  institution =  "University of Toronto, Department of Computer Science,
                 http://www.cs.utoronto.ca/neuron/delve/delve.html.",
  year =         "1995",
}

@Article{Wu-97,
  author =       "Zhijun Wu",
  title =        "Global continuation for distance geometry problems",
  journal =      "{SIAM} Journal of Optimization",
  volume =       "7",
  pages =        "814--836",
  year =         "1997",
}

@Article{Wu-97-short,
  author =       "Z. Wu",
  title =        "Global continuation for distance geometry problems",
  journal =      "{SIAM} J. Optimization",
  volume =       "7",
  pages =        "814--836",
  year =         "1997",
}

@Article{Wu97,
  author =       "C. H. Wu",
  title =        "Artificial neural networks for molecular sequence
                 analysis",
  journal =      "Comp. Chem.",
  volume =       "21",
  pages =        "237--256",
  year =         "1997",
}

@InProceedings{XingE2005,
  author =       "Eric P. Xing and Rong Yan and Alexander G. Hauptmann",
  booktitle =    UAI05,
  title =        "Mining Associated Text and Images with Dual-Wing
                 Harmoniums.",
  publisher =    "AUAI Press",
  pages =        "633--641",
  year =         "2005",
  ISBN =         "0-9749039-1-4",
  date =         "2007-07-26",
  OPTcrossref =  "conf/uai/2005",
  OPTdescription = "dblp",
  OPTee =        "http://uai.sis.pitt.edu/displayArticleDetails.jsp?mmnu=1&smnu=2&article-id=1184&proceeding-id=21",
  OPTkeywords =  "dblp",
}
  %url =       "http://dblp.uni-trier.de/db/conf/uai/uai2005.html#XingYH05",

@InProceedings{Xu+Rudnicky-2000,
  author =       "Wei Xu and Alex Rudnicky",
  booktitle =    "International Conference on Statistical Language
                 Processing",
  title =        "Can Artificial Neural Networks Learn Language Models",
  address =      "Beijing, China",
  pages =        "M1--13",
  year =         "2000",
}

@InProceedings{Xu-Emami-Jelinek-2003,
  author =       "P. Xu and A. Emami and F. Jelinek",
  booktitle =    "Proceedings of the 2003 Conference on Empirical
                 Methods in Natural Language Processing (EMNLP'2003)",
  title =        "Training Connectionist Models for the Structured
                 Language Model",
  volume =       "10",
  pages =        "160--167",
  year =         "2003",
}

@Misc{xu-jordan-94,
  author =       "L. Xu and M. I. Jordan",
  title =        "Theoretical and experimental studies of convergence
                 properties of the {EM} algorithm for unsupervised
                 learning based on finite mixtures",
  address =      "Snowbird, UTAH",
  pages =        "",
  year =         "1994",
  note =         "Presented at the Neural Networks for Computing
                 Conference",
}

@inproceedings{xuetal04,
author = "Xu, L. and Neufeld, J. and Larson, B. and Schuurmans, D.",
title = "Maximum margin clustering",
editor =       NIPS17ed,
booktitle =    NIPS17,
year = 2004,
}

@inproceedings{Xu-ICML-2006,
author = "Xu, L. and Wilkinson, D. and Southey, F. and Schuurmans, D.",
title = "Discriminative unsupervised learning of structured predictors",
booktitle =    ICML06,
editor =       ICML06ed,
publisher =    ICML06publ,
year = 2006,
}

@InProceedings{Xu-AAAI-2006,
  author =       "L. Xu and K. Crammer and D. Schuurmans",
  booktitle =    "Twenty-first National Conference on Artificial
                 Intelligence (AAAI-06)",
  title =        "Robust support vector machine training via convex
                 outlier ablation",
  year =         "2006",
}



@Misc{YA97a,
  author =       "Howard Hua Yang and {Shun-ichi} Amari",
  title =        "Natural Gradient Descent for Training Multi-Layer
                 Perceptrons",
  year =         "1997",
  URL =          "citeseer.ist.psu.edu/hua96natural.html",
}

@Article{yang98complexity,
  author =       "Howard Hua Yang and {Shun-ichi} Amari",
  title =        "Complexity Issues in Natural Gradient Descent Method
                 for Training Multi-Layer Perceptrons",
  journal =      "Neural Computation",
  volume =       "10",
  number =       "8",
  pages =        "2137--2157",
  year =         "1998",
  URL =          "citeseer.ist.psu.edu/91462.html",
}

@inproceedings{Yang+al-2006,
    author = {Xin Yang and Haoying Fu and Hongyuan Zha and Jesse Barlow},
    title = {Semi-supervised nonlinear dimensionality reduction},
    booktitle = {Proceedings of the 23rd International Conference on Machine Learning},
    year = {2006},
    isbn = {1-59593-383-2},
    pages = {1065--1072},
    location = {Pittsburgh, Pennsylvania},
    doi = {http://doi.acm.org/10.1145/1143844.1143978},
    publisher = {ACM},
    address = {New York, NY, USA},
}

@misc{Yang+Jin-2006,
    author = {Liu Yang and Rong Jin},
    title = {Distance Metric Learning: A Comprehensive Survey},
    year = 2006,
    note = {url{http://www.cse.msu.edu/~yangliu1/frame\_survey\_v2.pdf}},
}

@misc{Yang-2007,
    author = {Liu Yang},
    title = {An Overview of Distance Metric Learning},
    year = 2007,
    note = {url{http://www.cse.msu.edu/~yangliu1/dist\_overview.pdf}},
}

@InProceedings{YangL2007,
  author =       "Liu Yang and Rong Jin and Caroline Pantofaru and Rahul
                 Sukthankar",
  booktitle =    cvpr07,
  title =        "Discriminative Cluster Refinement: Improving Object
                 Category Recognition Given Limited Training Data",
  month =        jun,
  year =         "2007",
}

@InProceedings{Yao85,
  author =       "Andrew Yao",
  booktitle =    "Proceedings of the 26th Annual {IEEE} Symposium on
                 Foundations of Computer Science",
  title =        "Separating the polynomial-time hierarchy by oracles",
  pages =        "1--10",
  year =         "1985",
}

@InProceedings{Yarowsky-92,
  author =       "David Yarowsky",
  booktitle =    "Proceedings of the 14th International Conference on
                 Computational Linguistics (COLING-92)",
  title =        "Word-sense disambiguation using statistical models of
                 {Roget}'s categories trained on large corpora",
  address =      "Nantes, France",
  pages =        "454--460",
  year =         "1992",
}

@InProceedings{Yarowsky-93,
  author =       "David Yarowsky",
  booktitle =    "{ARPA} Workshop on Human Language Technology",
  title =        "One sense per collocation",
  address =      "Princeton, {NJ}",
  year =         "1993",
}

@InProceedings{Yarowsky-95,
  author =       "David Yarowsky",
  booktitle =    "33rd Annual Meeting of the {ACL}",
  title =        "Unsupervised word sense disambiguation rivaling
                 supervised methods",
  address =      "Cambridge, {MA}",
  pages =        "189--196",
  year =         "1995",
}

@InProceedings{Yarowsky1994,
  author =       "David Yarowsky",
  booktitle =    "Meeting of the Association for Computational
                 Linguistics",
  title =        "Decision Lists for Lexical Ambiguity Resolution:
                 Application to Accent Restoration in Spanish and
                 French",
  pages =        "88--95",
  year =         "1994",
  URL =          "citeseer.nj.nec.com/yarowsky94decision.html",
}

@InProceedings{Yarowsky1995,
  author =       "David Yarowsky",
  booktitle =    "Meeting of the Association for Computational
                 Linguistics",
  title =        "Unsupervised Word Sense Disambiguation Rivaling
                 Supervised Methods",
  pages =        "189--196",
  year =         "1995",
  URL =          "citeseer.nj.nec.com/yarowsky95unsupervised.html",
}

@TechReport{Yianilos95,
  author =       "Peter N. Yianilos",
  title =        "Metric Learning via Normal Mixtures",
  institution =  "NEC Research Institute",
  address =      "Princeton, NJ",
  month =        oct,
  year =         "1995",
}

@InProceedings{Younes98onthe,
    author = {Laurent Younes},
    title = {On The Convergence Of Markovian Stochastic Algorithms With Rapidly Decreasing Ergodicity Rates},
    booktitle = {Stochastics and Stochastics Models},
    year = {1998},
    pages = {177--228}
}

@Article{Young+Sachs79,
  author =       "E. D. Young and M. B. Sachs",
  title =        "Representation of steady-state vowels in the temporal
                 aspects of the discharge pattern of population of
                 auditory nerve fibers",
  journal =      jasa,
  volume =       "66",
  number =       "5",
  pages =        "1381--1403",
  year =         "1979",
}

@InProceedings{Yu+Simmons90,
  author =       "Y. H. Yu and R. F. Simmons",
  booktitle =    ijcnn,
  title =        "Extra output biased learning",
  publisher =    "Lawrence Erlbaum, Hillsdale",
  address =      "Washington 1990",
  year =         "1990",
}

@Article{Yu-trnn92,
  author =       "X. H. Yu",
  title =        "Can Backpropagation Error Surface Not Have Local
                 Minima?",
  journal =      ieeetrnn,
  volume =       "3",
  number =       "6",
  pages =        "1019--1020",
  year =         "1992",
}

@Article{Yu92,
  author =       "X. H. Yu",
  title =        "Can Backpropagation Error Surface Not Have Local
                 Minima?",
  journal =      ieeetrnn,
  volume =       "3",
  number =       "6",
  pages =        "1019--1020",
  year =         "1992",
}

@InProceedings{Yuille2005,
  author =       "Alan L. Yuille",
  editor =       NIPS17ed,
  booktitle =    NIPS17,
  title =        "The Convergence of Contrastive Divergences",
  publisher =    "{MIT} Press",
  pages =        "1593--1600",
  year =         "2005",
}

@Article{Yuille89,
  author =       "Alan L. Yuille and D. M. Kammen and D. S. Cohen",
  title =        "Quadrature and the Development of Orientation
                 Selective Cortical Cells by Hebb Rules",
  journal =      biocyb,
  volume =       "61",
  pages =        "183--194",
  year =         "1989",
}

@Article{Yuille90,
  author =       "Alan L. Yuille",
  title =        "Generalized Deformable Models, Statistical Physics,
                 and Matching Problems",
  journal =      "Neural Computation",
  volume =       "2",
  number =       "1",
  pages =        "1--24",
  year =         "1990",
}

@Article{Zak-nn92,
  author =       "M. Zak",
  title =        "Terminal Attractors in Neural Networks",
  journal =      nn,
  volume =       "2",
  pages =        "259--274",
  year =         "1989",
}

@Article{Zak88,
  author =       "M. Zak",
  title =        "Terminal Attractors for Addressable Memory in Neural
                 Networks",
  journal =      plettA,
  volume =       "133",
  pages =        "18--22",
  year =         "1988",
}

@Article{Zak89,
  author =       "M. Zak",
  title =        "Terminal Attractors in Neural Networks",
  journal =      nn,
  volume =       "2",
  pages =        "259--274",
  year =         "1989",
}

@Article{Zavaliagkos93,
  author =       "G. Zavaliagkos and S. Austin and J. Makhoul and R.
                 Schwartz",
  title =        "A Hybrid Continuous Speech Recognition System Using
                 Segmental Neural Nets with Hidden {Markov} Models",
  journal =      "Int. Journal of Pattern Recognition and Artificial
                 Intelligence",
  pages =        "305--319",
  year =         "1993",
  note =         "Special Issue on Applications of Neural Networks to
                 Pattern Recognition (I. Guyon Ed.)",
}

@InProceedings{Zell+al-1993,
  author =       "Andreas Zell and Niels Mache and Michael Vogt and
                 Markus H{\"u}ttel",
  booktitle =    "Proceedings of the IEEE International Conference on
                 Neural Networks",
  title =        "Problems of Massive Parallelism in Neural Network
                 Simulation",
  volume =       "3",
  address =      "San Francisco, CA",
  pages =        "1890--1895",
  year =         "1993",
}

@InProceedings{Zemel90,
  author =       "R. S. Zemel and M. C. Mozer and G. E. Hinton",
  editor =       NIPS2ed,
  booktitle =    NIPS2,
  title =        "Recognizing objects using hierarchical reference frame
                 transformations",
  address =      "San Mateo, CA",
  year =         "1990",
}

@PhdThesis{Zemel93-thesis,
  author =       "Richard S. Zemel",
  title =        "A Minimum Description Length Framework for
                 Unsupervised Learning",
  school =       "University of Toronto",
  year =         "1993",
}

@InProceedings{Zha2002,
  author =       "H. Zha and C. Ding and M. Gu and X. He and H. Simon",
  editor =       NIPS14ed,
  booktitle =    NIPS14,
  title =        "Spectral relaxation for {K}-means clustering",
  publisher =    "{MIT} Press",
  year =         "2002",
}

@InProceedings{Zhang-nips90,
  author =       "X. Zhang and Others",
  editor =       NIPS2ed,
  booktitle =    NIPS2,
  title =        "An Efficient Implementation of the Backpropagation
                 Algorithm on the Connection Machine {CM}-2",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo, CA",
  pages =        "801--809",
  year =         "1990",
}

@Misc{zhang-workshop-2005,
  author =       "Jian Zhang",
  title =        "Sparsity Models for Multi-task Learning",
  howpublished = "'Inductive Transfer: 10 Years Later' NIPS Workshop",
  year =         "2005",
  OPTkey =       "",
}

@TechReport{Zhang2001,
  author =       "Bin Zhang",
  title =        "Is the Maximal Margin Hyperplane Special in a Feature
                 Space?",
  number =       "HPL-2001-89",
  institution =  "Hewlett-Packards Labs",
  year =         "2001",
}

@article{Zhang+Zha-2005,
    address = {Philadelphia, PA},
    author = {Zhang, Zhenyue   and Zha, Hongyuan  },
    doi = {10.1137/S1064827502419154},
    issn = {1064-8275},
    journal = {SIAM Journal on Scientific Computing},
    number = {1},
    pages = {313--338},
    publisher = {Society for Industrial and Applied Mathematics},
    title = {Principal Manifolds and Nonlinear Dimensionality Reduction via Tangent Space Alignment},
    url = {http://portal.acm.org/citation.cfm?id=1024004.1039898},
    volume = {26},
    year = {2005}
}

@InProceedings{Zhang+al-2007,
    author = {D. Zhang and Z. H. Zhou and S. Chen},
    title = {Semi-supervised dimensionality reduction},
    booktitle = {Proceedings of the 7th SIAM International Conference on Data Mining},
    address = {Minneapolis, MN},
    year = 2007,
}

@article{Zhao+al-2006,
    author = {Haitao Zhao and Shaoyuan Sun and Zhongliang Jing and Jingyu Yang},
    title = {Local structure based supervised feature extraction},
    journal = {Pattern Recognition},
    volume = {39},
    number = {8},
    year = {2006},
    issn = {0031-3203},
    pages = {1546--1550},
    doi = {http://dx.doi.org/10.1016/j.patcog.2006.02.023},
    publisher = {Elsevier Science Inc.},
    address = {New York, NY, USA},
}

@InProceedings{Zhou+al-2004,
  author =       "D. Zhou and O. Bousquet and T. {Navin Lal} and J.
                 Weston and B. Sch{\"o}lkopf",
  editor =       NIPS16ed,
  booktitle =    NIPS16,
  title =        "Learning with local and global consistency",
  publisher =    "MIT Press",
  address =      "Cambridge, MA",
  pages =        "321--328",
  year =         "2004",
  keywords =     "semi-supervised learning, manifold, kernel methods",
}

@InProceedings{Zhou+Dapkus-1995,
  author =       "J. Zhou and P. Dapkus",
  booktitle =    "Proceedings of the Third Workshop on Very Large
                 Corpora",
  title =        "Automatic Suggestion of Significant Terms for a
                 Predefined Topic",
  address =      "Cambridge",
  pages =        "131--147",
  year =         "1995",
}

@InProceedings{Zhou+Tanner-1997,
  author =       "Joe Zhou and Troy Tanner",
  booktitle =    "Proceedings of the fifth conference on Applied natural
                 language processing",
  title =        "Construction and visualization of key term
                 hierarchies",
  publisher =    "Morgan Kaufmann Publishers Inc.",
  address =      "San Francisco, CA, USA",
  pages =        "307--311",
  year =         "1997",
  location =     "Washington, DC",
}

@InProceedings{zhou2002,
  author =       "Z.-H. Zhou and M.-L. Zhang",
  booktitle =    "Proceedings of the International Conference on
                 Intelligent Information Technology, 2002, pp.455-459",
  title =        "Neural Networks for Multi-Instance Learning",
  address =      "Beijing, China",
  year =         "2002",
  page =         "455-459",
}

@InProceedings{ZhouX2007,
  author =       "Xiaojin Zhu and Timothy J. Rogers and Ruichen Qian and
                 Chuck Kalish",
  booktitle =    "AAAI",
  title =        "Humans Perform Semi-Supervised Classification Too.",
  publisher =    "AAAI Press",
  pages =        "864",
  year =         "2007",
  ISBN =         "978-1-57735-323-2",
  URL =          "http://dblp.uni-trier.de/db/conf/aaai/aaai2007.html#ZhuRQK07",
  date =         "2007-09-05",
  description =  "dblp",
  keywords =     "dblp",
}

@article{Zhu2009,
 author = {Long Zhu and Yuanhao Chen and Alan Yuille},
 title = {Unsupervised Learning of Probabilistic Grammar-Markov Models for Object Categories},
 journal = {{IEEE} Transactions on Pattern Analysis and Machine Intelligence},
 volume = 31,
 number = 1,
 pages = {114--128},
 year = 2009,
}

@InProceedings{Zhu+al-2003,
  author =       "Xiaojin Zhu and Zoubin Ghahramani and John Lafferty",
  booktitle =    ICML03,
  editor =       ICML03ed,
  publisher =    ICML03publ,
  title =        "Semi-supervised learning using {Gaussian} fields and
                 harmonic functions",
  pages =        "912--919",
  year =         "2003",
}

@TechReport{Zhu+al-TR2003,
  author =       "Xiaojin Zhu and John Lafferty and Zoubin Ghahramani",
  title =        "Semi-Supervised Learning: From {G}aussian Fields to
                 {G}aussian Processes",
  number =       "CMU-CS-03-175",
  institution =  "CMU",
  year =         "2003",
}

@Article{Zhu-2006,
  author =       "M. Zhu and W. Su and H. A. Chipman",
  title =        "{LAGO}: {A} computationally efficient approach for
                 statistical detection",
  journal =      "Technometrics",
  volume =       "48",
  number =       "2",
  pages =        "193--205",
  year =         "2006",
}

@InProceedings{Zhu-ijcai-2005,
  author =       "Tingshao Zhu and Russ Greiner and Gerald Haeubl and
                 Kevin Jewell and Bob Price",
  booktitle =    "Nineteenth International Joint Conference on
                 Artificial Intelligence (IJCAI-05)",
  title =        "Using Learned Browsing Behavior Models to Recommend
                 Relevant Web Pages",
  address =      "Edinburgh, U.K.",
  pages =        "1589--1591",
  year =         "2005",
}

@TechReport{Zhu-Lafferty-Ghahramani-2003,
  author =       "Xiaojin Zhu and John Lafferty and Zoubin Ghahramani",
  title =        "Semi-supervised learning: from {G}aussian fields to
                 {G}aussian processes",
  number =       "CMU-CS-03-175",
  institution =  "School of Computer Science, Carnegie Mellon
                 University",
  year =         "2003",
}

@Article{zhu-rohwer96,
  author =       "H. Zhu and R. Rohwer",
  title =        "No free lunch for cross validation",
  journal =      "Neural Computation",
  volume =       "8",
  number =       "7",
  pages =        "1421--1426",
  year =         "1996",
}

@TechReport{zhu05survey,
  author =       "Xiaojin Zhu",
  title =        "Semi-Supervised Learning Literature Survey",
  number =       "1530",
  institution =  "Computer Science, University of Wisconsin-Madison",
  year =         "2005",
  note =         "http://www.cs.wisc.edu/$\sim$jerryzhu/pub/ssl\-survey.pdf",
}

@TechReport{ZhuX2002,
  author =       "Xiaojin Zhu and Zoubin Ghahramani",
  title =        "Towards semisupervised classification with Markov
                 random fields",
  institution =  "Carnegie Mellon University",
  year =         "2002",
}

@inproceedings{Zinkevich-2003,
  author = {Martin Zinkevich},
  title ={Online convex programming and generalized infinitesimal gradient ascent},
  booktitle =    ICML03,
  editor =       ICML03ed,
  publisher =    ICML03publ,
  pages =        "928--936",
  year =         "2003",
}

@InProceedings{Zoubin-nips8,
  author =       "Z. Ghahramani and M. I. Jordan",
  editor =       NIPS8ed,
  booktitle =    NIPS8,
  title =        "Factorial Hidden Markov Models",
  publisher =    "MIT Press, Cambridge, MA",
  year =         "1996",
}

@InProceedings{Zoubin-nips94,
  author =       "Z. Ghahramani and M. I. Jordan",
  editor =       NIPS6ed,
  booktitle =    NIPS6,
  title =        "Supervised learning from incomplete data via an {EM}
                 approach",
  publisher =    "Morgan Kaufmann",
  address =      "San Mateo, CA",
  year =         "1994",
}

@TechReport{Zoubin-tr93,
  author =       "Z. Ghahramani and M. I. Jordan",
  title =        "Function approximation via density estimation using
                 the {E}{M} approach",
  type =         "Computational Cognitive Science",
  number =       "TR 9304",
  institution =  "MIT",
  year =         "1993",
}

@TechReport{Zoubin96,
  author =       "Z. Ghahramani and G. E. Hinton",
  title =        "Parameter estimation for linear dynamical systems",
  number =       "Technical Report CRG-TR-91-1",
  institution =  "University of Toronto",
  year =         "1996",
}

@TechReport{Zoubin96b,
  author =       "Z. Ghahramani and G. E. Hinton",
  title =        "Switching state-space models",
  number =       "Technical Report CRG-TR-91-3",
  institution =  "University of Toronto",
  year =         "1996",
}

@Article{Zue90a,
  author =       "V. Zue and S. Seneff and J. Glass",
  title =        "Speech database development: {TIMIT} and beyond",
  journal =      spcomm,
  volume =       "9",
  number =       "4",
  pages =        "351--356",
  month =        aug,
  year =         "1990",
}

@InProceedings{Zue90b,
  author =       "V. Zue and J. Glass and D. Goddeau and D. Goodine and
                 H. Leung and M. McCandless and M. Phillips and J.
                 Polifroni and S. Seneff and D. Whitney",
  booktitle =    "Proc. Int. Conf. Spoken Languague Processing",
  title =        "Recent progress on the {MIT} {VOYAGER} spoken language
                 system",
  address =      "Kobe, Japan",
  pages =        "29.6.1",
  year =         "1990",
}

@InProceedings{Zwald+al-2004,
  author =       "Laurent Zwald and Olivier Bousquet and Gilles
                 Blanchard",
  editor =       "John Shawe-Taylor and Yoram Singer",
  booktitle =    colt04,
  title =        "Statistical Properties of Kernel Principal Component
                 Analysis",
  volume =       "3120",
  publisher =    "Springer-Verlag",
  pages =        "594--608",
  year =         "2004",
  series =       "Lecture Notes in Computer Science",
}

@InProceedings{Zweig+Russel-AAAI98,
  author =       "G. Zweig and S. Russel",
  booktitle =    "Proceedings of the AAAI Conference",
  title =        "Speech Recognition with Dynamic {Bayesian} Networks",
  publisher =    "AAAI Press",
  address =      "Madison, Wisconsin",
  year =         "1998",
}

@InProceedings{Zweig+Russel-ICSLP98,
  author =       "G. Zweig and S. Russel",
  booktitle =    "Proceedings of the International Conference on
                 Statistical Language Processing",
  title =        "Probabilistic Modeling with {Bayesian} Networks for
                 {ASR}",
  address =      "Sidney, Australia",
  year =         "1998",
}

@Article{Zwicker+Terhardt80,
  author =       "E. Zwicker and E. Terhardt",
  title =        "Analytical expressions for critical band rate and
                 critical bandwidths as a function of frequency",
  journal =      jasa,
  volume =       "68",
  number =       "5",
  pages =        "1523--1525",
  year =         "1980",
}

@Proceedings{colt03,
  editor =       "Bernhard Sch{\"o}lkopf and Manfred K. Warmuth",
  booktitle =    colt03,
  title =        "Computational Learning Theory and Kernel Machines,
                 16th Annual Conference on Computational Learning Theory
                 and 7th Kernel Workshop, {COLT}/Kernel 2003,
                 Washington, {DC}, {USA}, August 24-27, 2003,
                 Proceedings",
  volume =       "2777",
  publisher =    "Springer",
  year =         "2003",
  series =       "Lecture Notes in Computer Science",
}

@Proceedings{FOCS3,
  booktitle =    "Proceedings of the Third Annual Symposium on Switching
                 Circuit Theory and Logical Design",
  title =        "Proceedings of the Third Annual Symposium on Switching
                 Circuit Theory and Logical Design",
  organization = "American Institute of Electrical Engineers",
  address =      "Chicago, Illinois",
  month =        "7--12" # oct,
  year =         "1962",
  crossrefonly = "1",
  url =       "http://theory.lcs.mit.edu/~dmjones/FOCS/focs.bib",
}

@Book{TricksOfTheTrade,
  editor =       "Genevieve Orr and Klaus-Robert Muller",
  booktitle =    "Neural networks: tricks of the trade",
  title =        "Neural networks: tricks of the trade",
  volume =       "1524",
  publisher =    "Springer-Verlag Inc.",
  address =      "New York, NY, USA",
  pages =        "vi + 432",
  year =         "1998",
  ISBN =         "3-540-65311-2 (paperback)",
  ISSN =         "0302-9743",
  LCCN =         "QA76.87.N4913 1998",
  bibdate =      "Sat Jan 9 14:35:31 1999",
  series =       "Lecture Notes in Computer Science",
  acknowledgement = ack-nhfb,
  keywords =     "Neural networks (Computer science)",
}

@Article{Besag75pseudolikelihood,
  author =       "Julian Besag",
  title =        "Statistical analysis of non-lattice data",
  journal =      "The Statistician",
  volume =       "24",
  number =       "3",
  pages =        "179--195",
  year =         "1975",
}

@INPROCEEDINGS{Marlin05unsupervisedlearning,
    author = {Benjamin Marlin and Richard S. Zemel and Sam T. Roweis},
    title = {Unsupervised learning with non-ignorable missing data},
    booktitle = {In Proceedings of the Tenth International Workshop on Artificial Intelligence and Statistics (AISTATS 2005)},
    year = {2005},
    pages = {222--229}
}

@PhdThesis{MarlinThesis08,
  author = "Benjamin M. Marlin",
  title =  "Missing Data Problems in Machine Learning",
  school = "Dept. of Computer Science, University of Toronto",
  year =   "2008"
}

@inproceedings{odonnellservedio08,
author = "{O'Donnell}, R. and Servedio, R.",
title = "The {Chow} parameters problem",
booktitle = "Proceedings of the Fortieth Annual Symposium on Theory of 
Computing (STOC)",
year = 2008,
pages = "517-526",
}

@article{bendaviddichterman98,
author = "{Ben-David}, S. and Dichterman, E.",
title = "Learning with restricted focus of attention",
journal = "Journal of Computer and System Sciences",
volume = 56,
numer = 3,
year = 1998,
pages = "277-298",
}

@techreport{cma07,
author = "Canadian Medical Association",
title = "Information technology and health care in Canada: 2007 status report",
year = 2007,
}

@article{hanetal05,
author = "Y. Han and J. Carcillo and S. Venkataraman and R. Clark and 
R. Watson and T. Nguyen and H. Bayir and R. Orr",
title = "Unexpected increased mortality after implementation 
of a commercially sold computerized physician order entry system",
journal = "Pediatrics",
volume = "116",
number = 6,
pages = "1506-1512",
year = 2005,
}

@InProceedings{conf/uai/McCallum03,
  title =   "Efficiently Inducing Features of Conditional Random
         Fields",
  author =  "Andrew McCallum",
  booktitle =  UAI03,
  publisher =   "Morgan Kaufmann",
  date = "August 7-10",
  location = "Acapulco, Mexico",
  year =    "2003",
  editor =  "Christopher Meek and Uffe Kj{\ae}rulff",
  ISBN =    "0-127-05664-5",
  pages =   "403--410",
}


@InProceedings{conf/uai/McCallum03-small,
  title =   "Efficiently Inducing Features of Conditional Random
         Fields",
  author =  "A. McCallum",
  booktitle =   "UAI",
  year =    "2003",
}


@InProceedings{conf/icml/RanzatoS08,
  title =   "Semi-supervised learning of compact document
         representations with deep networks",
  author =  "Marc'Aurelio Ranzato and Martin Szummer",
  booktitle = ICML08,
  editor =  ICML08ed,
  publisher = ICML08publ,
  year =    "2008",
  volume =  "307",
  ISBN =    "978-1-60558-205-4",
  pages =   "792--799",
  series =  "ACM International Conference Proceeding Series",
  date =    "June 5-9, 2008",
  location = "Helsinki, Finland",
  URL =     "http://doi.acm.org/10.1145/1390156.1390256",
}

@InProceedings{conf/icml/RanzatoS08-small,
  title =   "Semi-supervised learning of compact document
         representations with deep networks",
  author =  "M. Ranzato and M. Szummer",
  booktitle =   "ICML",
  year =    "2008",
}

@PhdThesis{Cosatto02sample-basedtalking-head,
    author = {Eric Cosatto and Prof Murat Kunt},
    title = {Sample-Based Talking-Head Synthesis},
    institution = {Signal Processing Lab, Swiss Federal Institute of Techology},
    year = {2002}
}

@incollection{SutskeverHintonTaylor2009,
 title = {The Recurrent Temporal Restricted Boltzmann Machine},
 author = {Ilya Sutskever and Geoffrey E Hinton and Graham Taylor},
 editor = NIPS21ed,
 booktitle = NIPS21,
 pages = {1601--1608},
 year = {2009}
}

@TechReport{Bergstra+2009-small,
  author =       "J. Bergstra and G. Desjardins and P. Lamblin and Y. Bengio",
  title =        "Quadratic Polynomials Learn Better Image Features",
  number =       "1337",
  institution =  "DIRO, Universit\'e de Montr\'eal",
  year =         "2009",
}

@inproceedings{Haffner+al-1998,
 author = {Haffner, P. and Bottou, L. and Howard, P. G. and Simard, P. and Bengio, Y. and Cun, Y. Le},
 title = {Browsing through High Quality Document Images with {DjVu}},
 booktitle = {Proceedings of the Advances in Digital Libraries Conference (ADL'98)},
 year = {1998},
 isbn = {0-8186-8464-X},
 pages = {309},
 publisher = {IEEE Computer Society},
 address = {Washington, DC, USA},
 }

@inproceedings{Bottou+Howard+Bengio-1998,
 author = {Bottou, L. and Howard, P. G. and Bengio, Y.},
 title = {The {Z}-Coder Adaptive Binary Coder},
 booktitle = {Proceedings of the Conference on Data Compression (DCC'98)},
 year = {1998},
 pages = {13},
 publisher = {IEEE Computer Society},
 address = {Washington, DC, USA},
 }

@inproceedings{Pigeon+Bengio-1998,
  author    = {Steven Pigeon and
               Yoshua Bengio},
  title     = {A Memory-Efficient Adaptive Huffman Coding Algorithm for
               Very Large Sets of Symbols},
  booktitle = {Proceedings of the Conference on Data Compression (DCC'98)},
  year      = {1998},
  pages     = {568},
  ee        = {http://dlib.computer.org/conferen/dcc/8406/pdf/84060568.pdf},
  bibsource = {DBLP, http://dblp.uni-trier.de}
}

@INPROCEEDINGS{LeCun+Bottou+Bengio-1997,
title={Reading checks with multilayer graph transformer networks},
author={Yann LeCun and Bottou, L. and Bengio, Y.},
booktitle={IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP'97)},
year={1997},
month={Apr},
volume={1},
pages={151--154},
keywords={backpropagation, banking, cheque processing, document image processing, image segmentation, optical character recognitionbusiness checks, business cheques, check reading system, cheque reading system, convolutional neural network character recognizers, gradient-based learning algorithms, graph-based stochastic models, machine learning paradigm, multilayer graph transformer networks, personal checks, personal cheques},
doi={10.1109/ICASSP.1997.599580},
 }

@INPROCEEDINGS{Rahim97discriminativefeature,
    author = {Mazin Rahim and Yoshua Bengio and Yann {LeCun}},
    title = {Discriminative Feature And Model Design For Automatic Speech Recognition},
    booktitle = {In Proc. of Eurospeech},
    year = {1997},
    pages = {75--78}
}

@InProceedings{Bengio-nncm-1996,
author = {Yoshua Bengio},
title = {Training A Neural Network with a Financial Criterion Rather then a Prediction Criterion},
booktitle = {Proceedings of the Fourth International Conference on Neural Networks in the Capital Markets (NNCM-96)},
editor = { A.S. Weigend and Y.S Abu-Mostafa and A.-P.N. Regenes},
publisher = {World Scientific},
pages = {433--443},
year = "1997",
}

@INPROCEEDINGS{Bengio+Bengio+Cloutier-1994,
title={Use of genetic programming for the search of a new learning rule for neural networks},
author={Bengio, S. and Bengio, Y. and Cloutier, J.},
booktitle={Proceedings of the First IEEE Conference on Evolutionary Computation},
year={1994},
month={Jun},
pages={324-327 vol.1},
keywords={ backpropagation, genetic algorithms, learning (artificial intelligence), neural nets, optimisation, search problems backpropagation algorithm, classification tasks, genetic algorithms, genetic programming, gradient descent, learning rule, neural networks, optimization, parametric function, rule parameters, search, simulated annealing, standard optimization methods},
doi={10.1109/ICEC.1994.349932},
}

@article{Chakraborty+al-2002,
 author = {Chakraborty, Basabi and Chakraborty, Goutam},
 title = {A new feature extraction technique for on-line recognition of handwritten alphanumeric characters},
 journal = {Inf. Sci. Appl.},
 volume = {148},
 number = {1-4},
 year = {2002},
 issn = {0020-0255},
 pages = {55--70},
 doi = {http://dx.doi.org/10.1016/S0020-0255(02)00276-1},
 publisher = {Elsevier Science Inc.},
 address = {New York, NY, USA},
 }


@INPROCEEDINGS{LeCun+al-1993,
title={On-Line handwriting recognition with neural networks: spatial representation versus temporal representation},
author={{LeCun}, Y and Bengio, Y. and Henderson, D. and Weisbuch, A.},
booktitle={Proceedings of the International Conference on Handwriting and Drawing},
year={1993},
location= {Ecole Nationale Superieure des Telecommunications},
}

@INPROCEEDINGS{Bengio+al-92,
    author = {Yoshua Bengio and Samy Bengio and Jocelyn Cloutier and Jan Gecsei},
    title = {On the Optimization of a Synaptic Learning Rule},
    booktitle = {in Conference on Optimality in Biological and Artificial Networks},
    year = {1992}
}

@INPROCEEDINGS{Bengio+al-91,
    author = {Yoshua Bengio and Samy Bengio and Jocelyn Cloutier and Jan Gecsei},
    title = {Learning a Synaptic Learning Rule},
    booktitle = ijcnn,
    location = "Seattle, WA",
    pages = "II-A969",
    year = {1991}
}

@INPROCEEDINGS{Bengio91acomparative,
    author = {Yoshua Bengio and Renato De Mori and Giovanni Flammia and Ralf Kompe},
    title = {A Comparative Study On Hybrid Acoustic Phonetic Decoders Based On Artificial Neural Networks},
    booktitle = {Proceeding of EuroSpeech},
    location = {Genova, Italy},
    year = {1991}
}

@inproceedings { lecun-01a,
original =      "orig/lecun-01a.ps.gz",
author = 	"{LeCun}, Y. and Bottou, L. and Bengio, Y. and Haffner, P.",
title = 	"Gradient-Based Learning Applied to Document Recognition",
booktitle =     "Intelligent Signal Processing",
editors =       "Haykin, S. and Kosko, B.",
pages =         "306-351",
publisher =     "IEEE Press",
note =          "chap. 9",
year =		2001,
}

@InCollection{Hochreiter+al-2000,
    abstract = {Introduction Recurrent networks (crossreference Chapter 12) can, in principle, use their feedback connections to store representations of recent input events in the form of activations. The most widely used algorithms for learning what to put in short-term memory, however, take too much time to be feasible or do not work well at all, especially when minimal time lags between inputs and corresponding teacher signals are long. Although theoretically fascinating, they do not provide clear practical advantages over, say, backprop in feedforward networks with limited time windows (see crossreference Chapters 11 and 12). With conventional \&\#034;algorithms based on the computation of the complete gradient\&\#034;, such as \&\#034;Back-Propagation Through Time\&\#034; (BPTT, e.g., [22, 27, 26]) or \&\#034;Real-Time Recurrent Learning\&\#034; (RTRL, e.g., [21]) error signals \&\#034;flowing backwards in time\&\#034; tend to either (1) blow up or (2) vanish: the temporal evolution of the backpropagated error ex},
    author = {Hochreiter, Sepp and Informatik, Fakultat F. and Bengio, Yoshua and Frasconi, Paolo and Schmidhuber, Jurgen},
    citeulike-article-id = {4450697},
    citeulike-linkout-0 = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.24.7321},
    keywords = {gradient-descent, long-term-dependencies, rnn},
    posted-at = {2009-05-02 00:58:01},
    priority = {2},
    title = {Gradient Flow in Recurrent Nets: the Difficulty of Learning Long-Term Dependencies},
    url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.24.7321},
    booktitle =    "Field Guide to Dynamical Recurrent Networks",
    editor = "J. Kolen and S. Kremer",
    publisher = "IEEE Press",
    year = "2000",
}

@INPROCEEDINGS{Lecun99objectrecognition,
    author = {Yann {LeCun} and Patrick Haffner and Léon Bottou and Yoshua Bengio},
    title = {Object Recognition with Gradient-Based Learning},
    booktitle = {Shape, Contour and Grouping in Computer Vision},
    year = {1999},
    publisher = {Springer},
    pages = {319--345},
}


% non-ref conference
@MISC{snowbird_learn_conf,
title = "Snowbirds papers",
author = "many authors",
howpublished = "Learning Conference, Snowbird",
location = "Utah",
year = "many",
}

@MISC{Collobert+Bengio-2001,
title = "Magic Mix",
author = "Collobert, R. and Bengioy, Y.",
year = "2002",
howpublished = "Learning Conference, Snowbird",
location = "Utah",
}

@MISC{Bengio+al-2001,
title = "Learning a Distributed Representation for Statistical Language Modeling and Information Retrieval",
author = "Yoshua Bengio and Pascal Vincent and Florence d'Alché-Buc",
year = "2001",
howpublished = "Learning Conference, Snowbird",
location = "Utah",
}

@MISC{Bengio+Nadeau-2000,
title = "About Realistic Comparisons Between Learning Algorithms",
author = "Yoshua Bengio and C. Nadeau",
year = "2000",
howpublished = "Learning Conference, Snowbird",
location = "Utah",
}
@MISC{Bengio-1999,
title = "Learning from Structured High-Dimensional Data",
author = "Yoshua Bengio",
howpublished = "Meeting of the Mathematical Society of Canada",
location = "Montreal, Canada",
year = "1999",
}

@MISC{Bengio+al-1999,
title = "Gradient-Based Learning of Hyper-Parameters",
author = "Yoshua Bengio and S. Latendresse and Charles Dugas",
year = "1999",
howpublished = "Learning Conference, Snowbird",
location = "Utah",
}

@MISC{Bengio+al-1999b,
title = "Learning Algorithms for Sorting Compounds from Titration Curves",
author = "Yoshua Bengio and J-J. Brault and F. Major and R. Neal and S. Pigeon",
howpublished = "Symposium on New Perspectives for Computer-Aided Drug Design",
location = "Montreal, Canada",
year = "1999",
}

@MISC{Bengio+al-1998,
title = "Stochastic learning of strategic equilibria for auctions",
author = "Yoshua Bengio and S. Latendresse and Charles Dugas",
howpublished = "Machines That Learn Conference, Snowbird",
location = "Utah",
year = "1998",
}

@MISC{Bengio+al-1997,
title = "On the Clusterization of Probabilistic Transducers",
author = "Bengio, Y. and Bengio, S. and Singer, Y. and Isabelle, J-F.",
howpublished = "1997 Neural Networks for Computing Conference, Snowbird",
location = "Utah",
year = "1997",
}

@MISC{Bengio-1995,
title = "Fast High Capacity Classifiers",
author = "Bengio, Y. and Bengio, S. and Singer, Y. and Isabelle, J-F.",
howpublished = "1995 Neural Networks for Computing Conference, Snowbird",
location = "Utah",
year = "1997",
}

@MISC{Bengio+Frasconi-1994,
title = "Réseaux de neurones Markoviens pour l'inférence grammaticale",
author = "Bengio, Y. and Frasconi, P.",
howpublished = "1994 ACFAS Conference, neural networks colloquium",
location = "Montréal, Québec",
year = "1994",
}

@MISC{Bengio+LeCun-1994,
title = "Reconnaissance de mots manuscrits avec réseaux de neurones et modèles de Markov",
author = "Bengio, Y. and {LeCun}, Y.",
howpublished = "1994 ACFAS Conference, neural networks colloquium",
location = "Montréal, Québec",
year = "1994",
}

@MISC{Bengio+al-1994,
title = "Optimisation d'une règle d'apprentissage pour réseaux de neurones artificiels",
author = "Bengio, S. and Bengio, Y. and Cloutier, J. and Gecsei, J.",
howpublished = "1994 ACFAS Conference, neural networks colloquium",
location = "Montréal, Québec",
year = "1994",
}

@MISC{Bengio+Frasconi-1994b,
title = "An {EM} Algorithm for Target Propagation",
author = "Bengio, Y. and Frasconi P.",
howpublished = "1994 Neural Networks for Computing Conference, Snowbird",
location = "Utah",
year = "1994",
}

@MISC{Bengio+al-1993,
title = "The Problem of Learning Long-Term Dependencies in Recurrent Networks",
author = "Bengio, Y. and Simard, P. and Frasconi P.",
howpublished = "1994 Neural Networks for Computing Conference, Snowbird",
location = "Utah",
year = "1993",
}
@MISC{Bengio-1992,
title = "Representations Based on Articulatory Dynamics for Speech Recognition",
author = "Bengio, Y.",
howpublished = "1992 Neural Networks for Computing Conference, Snowbird",
location = "Utah",
year = "1992",
}

@MISC{Bengio+al-1991,
title = "Learning a Synaptic Learning Rule",
author = "Bengio, Y. and Bengio, S. and Cloutier, J.",
howpublished = "1991 Neural Networks for Computing Conference, Snowbird",
location = "Utah",
year = "1991",
}

@MISC{Bengio+DeMori-1990,
title = "Recurrent networks with Radial Basis Functions for speech recognition",
author = "Bengio, Y. and De Mori, R.",
howpublished = "1990 Neural Networks for Computing Conference, Snowbird",
location = "Utah",
year = "1991",
}


%%tech repport
@TechReport{Bardou+Bengio-TR2002,
  author =       "O. Bardou and Yoshua Bengio",
  title =        "Régularisation du prix des option : Stacking",
  institution =  "Cahier Scientifique Cirano 2002s-44",
  year =         "2002",
}

@TechReport{Dugas+Bengio-TR2002,
  author =       "O. Bardou and Yoshua Bengio",
  title =        "Étude du biais dans le prix des options",
  institution =  "Cahier Scientifique Cirano 2002s-45",
  year =         "2002",
}

@TechReport{Dugas+al-TR2002,
  author =       "C. Dugas and Y. Bengio and F. Bélisle and C. Nadeau and R. Garcia",
  title =        "Incorporating Second-Order Functional Knowledge for Better Option Pricing",
  institution =  "Cahier Scientifique Cirano 2002s-46",
  year =         "2002",
}

@TechReport{Bengio+al-TR2002,
  author =       "Y. Bengio and V.-P. Lauzon and R. Ducharme",
  title =        "Experiments on the Application of IOHMMs to Model Financial Returns Series",
  institution =  "Cahier Scientifique Cirano 2002s-47",
  year =         "2002",
}

@TechReport{Bengio+al-TR2002b,
  author =       "Y. Bengio and R. Ducharme and O. Bardou and N. Chapados",
  title =        "Valorisation d'options par optimisation du Sharpe Ratio",
  institution =  "Cahier Scientifique Cirano 2002s-48",
  year =         "2002",
}

@TechReport{Chapados+Bengio-TR2002,
  author =       "N. Chapados and Y. Bengio",
  title =        "Cost Functions and Model Combination for VaR-based Asset Allocation using
 Neural Networks",
  institution =  "Cahier Scientifique Cirano 2002s-49",
  year =         "2002",
}

@TechReport{Bengio+Dugas-TR2002,
  author =       "Y. Bengio and C. Dugas",
  title =        "Forecasting Non-Stationary Volatility with Hyper-Parameters",
  institution =  "Cahier Scientifique Cirano 2002s-50",
  year =         "2002",
}

@TechReport{Gingras+al-TR2002,
  author =       "F. Gingras and Y. Bengio and C. Nadeau",
  title =        "On Out-of-Sample Statistics for Time-Series",
  institution =  "Cahier Scientifique Cirano 2002s-51",
  year =         "2002",
}

@TechReport{Chapados+Bengio-TR2002b,
  author =       "N. Chapados and Y. Bengio",
  title =        "Input Decay : Simple and Effective Soft Variable Selection",
  institution =  "Cahier Scientifique Cirano 2002s-52",
  year =         "2002",
}

@TechReport{Ghosn+Bengio-TR2002,
  author =       "J. Ghosn and Y. Bengio",
  title =        "Multi-Task Learning For Option Pricing",
  institution =  "Cahier Scientifique Cirano 2002s-53",
  year =         "2002",
}

@TechReport{Collobert+al-TR2001,
  author =       "J. Ghosn and Y. Bengio",
  title =        "A Parallel Mixture of {SVM}s for Very Large Scale Problems",
  institution =  "IDIAP",
  location =     "Switzerland",
  number =       "IDIAP-RR-01-12",
  year =         "2001",
}

@TechReport{Vincent+Bengio-TR2001,
  author =       "Vincent, P. and Bengio, Y.",
  title =        "K-Local Hyperplane and Convex Distance Nearest Neighbor Algorithms",
  institution =  DIRO,
  location =     "Switzerland",
  number =       "1197",
  year =         "2001",
}

@TechReport{Chapados+al-TR2001,
  author =       "Chapados, N. and Bengio, Y. and Vincent, P. and Ghosn, J. and Dugas, C. and Takeuchi, I. and Meng, L.",
  title =        "Estimating Car Insurance Premia : a Case Study in High-Dimensional Data Inference",
  institution =  DIRO,
  number =       "1199",
  year =         "2001",
}

@TechReport{Bengio+Chapados-TR2001,
  author =       "Chapados, N. and Bengio, Y. and Vincent, P. and Ghosn, J. and Dugas, C. and Takeuchi, I. and Meng, L.",
  title =        "Extending Metric-Based Model Selection and Regularization in the Absence of Unlabeled Data",
  institution =  DIRO,
  number =       "1200",
  year =         "2001",
}

@TechReport{Nadeau+Bengio-TR1999,
  author =       "Nadeau, C. and Bengio, Y.",
  title =        "Inference and the Generalization Error",
  institution =  "Cahier Scientifique Cirano 99s-25",
  year =         "2002",
}

@TechReport{Gingras+al-TR1999,
  author =       "Gingras, F. and Bengio, Y. and Nadeau, C.",
  title =        "On Out-of-Sample Statistics for Financial Time-Series",
  institution =  "Centre de Recherches Mathématiques, Université de Montreal",
  number =       "2585",
  year =         "1999",
}

@TechReport{Bengio-1998-TR,
  author =       "Bengio, Y.",
  title =        "Using a financial training criterion rather than a prediction criterion",
  institution =  "Cahier Scientifique Cirano 98s-21",
  year =         "1998",
}

@TechReport{Bengio+DeMori-1990-TR,
  author =       "Bengio, Y. and De Mori, R.",
  title =        "Some connectionist models and their application to speech recognition",
  institution =  "School of Computer Science, McGill University",
  number =       "TR-SOCS-90-12",
  year =         "1990",
}

@article{becker+hinton:1993,
    author = {Becker, S. and Hinton, G. E.},
    title=  {Learning Mixture Models of Spatial Coherence},
    journal={Neural Computation},
    volume={5},
    pages={267--277},
    year={1993}
}
@article{berkes:2005,
    author = {Berkes, Pietro and Wiskott, Laurenz},
    title = {Slow Feature Analysis Yields a Rich Repertoire of Complex Cell Properties},
    journal = {Journal of Vision},
    ISSN = {1534-7362},
    volume = {5},
    number = {6},
    pages = {579-602},
    year = {2005},
    month = {7},
    URL = {http://journalofvision.org/5/6/9/},
    eprint = {http://journalofvision.org/5/6/9/Berkes-2005-jov-5-6-9.pdf},
}
@inproceedings{hurri+hyvarinen:2003,
    author={Hurri, J. and Hyv{\"a}rinen, A.},
    title={Temporal Coherence, Natural Image Sequences, and the Visual Cortex.},
    booktitle={Advances in Neural Information Processing Systems 15
        ({NIPS*02})},
    year={2003},
    pages={141--148},
}
@article{wiskott:2002,
    author =       "Laurenz Wiskott and Terrence Sejnowski",
    year =         "2002",
    title = {Slow Feature Analysis: Unsupervised Learning of Invariances},
    journal =      "Neural Computation",
    volume =       "14",
    number =       "4",
    pages =        "715--770",
    url= {http://itb.biologie.hu-berlin.de/~wiskott/Publications/WisSej2002-LearningInvariances-NC.ps.gz},
}

@article{KouhPoggio2008,
    author={Minjoon M. Kouh and Tomaso T. Poggio},
    title={A Canonical Neural Circuit for Cortical Nonlinear Operations},
    journal={Neural Computation},
    volume={20},
    number={6},
    year={2008},
    pages={1427-51},
}
@article{NykampRingach2002,
    author={D. Q. Nykamp and D. L. Ringach},
    title ={Full Identification of a Linear-Nonlinear System via Cross-Correlation Analysis},
    journal = {Journal of Vision},
    volume={2},
    pages={1-11},
    year={2002},
}
@incollection{cadieu+olshausen:2009,
     title = {Learning Transformational Invariants from Natural Movies},
      author = {Charles Cadieu and Bruno Olshausen},
       booktitle = {Advances in Neural Information Processing Systems 21},
        editor = {D. Koller and D. Schuurmans and Y. Bengio and L. Bottou},
         pages = {209--216},
          year = {2009},
     publisher = {MIT Press}
}
@book{DayanAbbott2001,
    author={Peter Dayan and L. F. Abbott},
    title = {Theoretical Neuroscience},
    publisher = {The {MIT} Press},
    year = 2001,
}

@inproceedings{Chechik-MIR2008,
 author = {G. Chechik and E. Ie and M. Rehn and S. Bengio and D. Lyon},
 title = {Large-scale content-based audio retrieval from text queries},
 booktitle = {ACM International Conference on Multimedia Information Retrieval (MIR'08)},
 year = 2008,
}

@inproceedings{Bai-ECIR2009,
 author = {B. Bai and J. Weston and R. Collobert and D. Grangier},
 title = {Supervised Semantic Indexing},
 booktitle = { European Conference on Information Retrieval (ECIR'09)},
 year = 2009,
}

@article{Attwell+Laughlin-2001,
 author = {David Attwell and Simon B. Laughlin},
 title = {An energy budget for signaling in the grey matter of the brain},
 journal = {Journal of Cerebral Blood Flow And Metabolism},
 year =2001,
 volume = 21,
 pages = {1133--1145},
}

@article{Lennie-2003,
 author = {Peter Lennie},
 title = {The cost of cortical computation},
 journal = {Current Biology},
 year = 2003,
 month = {Mar 18},
 volume = {13},
 number = 6,
 pages = {493--497},
}

@inproceedings{LowdD2005,
 author = {Lowd, Daniel and Domingos, Pedro},
 title = {Naive Bayes models for probability estimation},
 booktitle = ICML05,
 editor = ICML05ed,
 year = {2005},
 pages = {529--536},
 location = {Bonn, Germany},
 publisher = ICML05publ,
 address = {New York, NY, USA},
 }

@incollection{NairV2009,
 title = {Implicit Mixtures of Restricted Boltzmann Machines},
 author = {Vinod Nair and Geoffrey E Hinton},
 booktitle = NIPS21,
 editor = NIPS21ed,
 publisher = NIPS21publ,
 pages = {1145--1152},
 year = {2009}
}

@incollection{Goodfellow2009,
 title = {Measuring Invariances in Deep Networks},
 author = {Ian Goodfellow and Quoc Le and Andrew Saxe and Andrew Ng},
 booktitle = NIPS22,
 editor = NIPS22ed,
 pages = {646--654},
 year = {2009}
}

@incollection{Xiao2009,
 title = {Dual Averaging Method for Regularized Stochastic Learning and Online Optimization},
 author = {Lin Xiao},
 booktitle = {Advances in Neural Information Processing Systems 22},
 editor = {Y. Bengio and D. Schuurmans and J. Lafferty and C. K. I. Williams and A. Culotta},
 pages = {2116--2124},
 year = {2009}
}

@incollection{Kwok2009,
 title = {Accelerated Gradient Methods for Stochastic Optimization and Online Learning},
 author = {Chonghai Hu and James Kwok and Weike Pan},
 booktitle = {Advances in Neural Information Processing Systems 22},
 editor = {Y. Bengio and D. Schuurmans and J. Lafferty and C. K. I. Williams and A. Culotta},
 pages = {781--789},
 year = {2009}
}

@article{Nesterov83,
 author = {Yu Nesterov},
 title = {A method for unconstrained convex minimization problem with the rate of convergence $o(1/k^2)$}, 
 journal = {Doklady AN SSSR (translated as Soviet. Math. Docl.)}, 
 volume = 269,
 pages = {543--547}, 
 year = 1983,
}

@incollection{Bai2009,
 title = {Polynomial Semantic Indexing},
 author = {Bing Bai and Jason Weston and David Grangier and Ronan Collobert and Kunihiko Sadamasa and Yanjun Qi and Corinna Cortes and Mehryar Mohri},
 booktitle = {Advances in Neural Information Processing Systems 22},
 editor = {Y. Bengio and D. Schuurmans and J. Lafferty and C.K.I. Williams and A. Culotta},
 pages = {64--72},
 year = {2009}
}

@incollection{Chechik2009,
 title = {An Online Algorithm for Large Scale Image Similarity Learning},
 author = {Gal Chechik and Uri Shalit and Varun Sharma and Samy Bengio},
 booktitle = {Advances in Neural Information Processing Systems 22},
 editor = {Y. Bengio and D. Schuurmans and J. Lafferty and C. K. I. Williams and A. Culotta},
 pages = {306--314},
 year = {2009}
}

@incollection{Klampfl+Maass-2009,
 title = {Replacing supervised classification learning by Slow Feature Analysis in spiking neural networks},
 author = {Stefan Klampfl and Wolfgang Maass},
 booktitle = NIPS22,
 editor = NIPS22ed,
 pages = {988--996},
 year = {2009}
}



@Article{GrandvaletCanuBoucheron97,
  author =       "Yves Grandvalet and Stéphane Canu and Stéphane Boucheron",
  title =        "Noise Injection: Theoretical Prospects",
  journal =      "Neural Computation",
  volume =       "9",
  number =       "5",
  pages =        "1093--1108",
  year =         "1997",
}

@Article{SietsmaDow91,
  author =       "J. Sietsma and R. Dow",
  title =        "Creating artificial neural networks that generalize",
  journal =      "Neural Networks",
  volume =       "4",
  number =       "1",
  pages =        "67--79",
  year =         "1991",
}

@Article{HolmstromKoistinen92,
  author =       "Lasse Holmström and Petri Koistinen",
  title =        "Using additive noise in back-propagation training",
  journal =      "{IEEE} Transactions on Neural Networks",
  volume =       "3",
  number =       "1",
  pages =        "24--38",
  year =         "1992",
}

@inproceedings{Baird90,
    author = "H. Baird",
    title = {Document image defect models},
    year = 1990,
    booktitle = "IAPR Workshop on Syntactic and Structural Pattern Recognition",
    pages = "38--46",
    address = "Murray Hill, NJ."
}

@TechReport{Poggio+Vetter92,
  author =       "T. Poggio and T. Vetter",
  title =        "Recognition and structure from one 2D model view: Observations on prototypes, object classes and symmetries",
  number =       "A.I. Memo No. 1347",
  institution =  "Artificial Intelligence Laboratory, Massachusetts Institute of Technology",
  year =         "1992",
}

@INPROCEEDINGS{Scholkopf96invariances,
    author = {Bernhard Sch{\"o}lkopf and Chris Burges and Vladimir Vapnik},
    title = {Incorporating Invariances in Support Vector Learning Machines},
    booktitle = {Lecture Notes in Computer Science (Vol 112), Artificial Neural Netowrks ICANN'96},
    year = {1996},
    editor = {C. von der Malsburg and W. von Seelen and J. C. Vorbrüggen and B. Sendhoff},
    pages = {47--52},
    publisher = {Springer}
}

@inproceedings{Cho+Saul09,
 title = {Kernel Methods for Deep Learning},
 author = {Youngmin Cho and Lawrence Saul},
 booktitle = NIPS22,
 editor = NIPS22ed,
 pages = {342--350},
 year = {2010},
 publisher = {NIPS Foundation},
}


@InProceedings{Linsker89,
  author =       "R. Linsker",
  editor =       NIPS1ed,
  booktitle =    NIPS1,
  title =        "An application of the principle of maximum information 
preservation to linear systems",
  publisher =    NIPS1publ,
  year =         "1989",
}

@Article{An96AddingNoise,
  author =       "Guozhong An",
  title =        "The effects of adding noise during backpropagation training on a generalization performance",
  journal =      "Neural Computation",
  volume =       "8",
  number =       "3",
  pages =        "643--674",
  year =         "1996",
}

@article{DruckerLeCun92,
	author = {Harris Drucker and Yann LeCun},
	title = {Improving generalisation performance using double back-propagation.},
	journal = {IEEE Transactions on Neural Networks},
	number = {6},
	pages = {991--997},
	volume = {3},
	year = {1992}
}

@Article{BellSejnowski-97,
  author =       "A. Bell and T. J. Sejnowski",
  title =        "The independent components of natural scenes are edge filters",
  journal =      "Vision Research",
  volume =       "37",
  pages =        "3327--3338",
  year =         "1997",
}


@Article{Dokur1997,
  author =       {Z\:{u}mray Dokur, Tamer \:{O}lmez, Ertugrul Yazgan, Okan K. Ersoy},
  title =        {Detection of {ECG} waveforms by neural networks},
  journal =      {Medical engineering & physics},
  year =         {1997},
  volume =    {19},
  number =    {8},
  pages =     {738--741},
  month =     {October},
}

@Article{Hu1993,
  author =       {Y. H. Hu and W. J. Tompkins and J. L. Urrusti and V. X. Afonso},
  title =        {Applications of artificial neural networks for {ECG} signal detection and classification},
  journal =      JEC,
  year =         {1993},
  volume =    {26s},
  pages =     {66--73},
}

@Article{Unser1996,
author = {M. Unser and A. Aldroubi},
title = {A Review of Wavelets in Biomedical Applications},
journal = {Proceedings of the {IEEE}},
year = {1996},
volume= {84},
number= {4},
pages = {626--638},
month = {April},
}

@inproceedings{Povey+Woodland-2002,
 author = {D. Povley and P.C. Woodland},
 title = {Minimum error and {I}-smoothing for improved discriminative training},
 booktile = {Proceedings of the International Conference on Acoustics,
Speech, and Signal Processing (ICASSP'2002)},
 publisher = {IEEE},
 volume = 1,
 pages = {I-105--I-108},
 address = {Orlando, Florida, USA},
}

@incollection{Susskind2008,
 author = {Joshua M. Susskind and Geoffrey E. and Javier R. Movellan and Adam K. Anderson},
 title = {Generating Facial Expressions with Deep Belief Nets},
 editor = {V. Kordic},
 booktitle = {Affective Computing, Emotion Modelling, Synthesis and Recognition},
 publisher = {ARS Publishers},
 year = 2008,
 pages = {421--440},
}

@InCollection{Li2005,
  author =       {Peng Li and Kap Luk Chan and Sheng Fu and S.M. Krishnan},
  title =        {An Abnormal {ECG} Beat Detection Approach for Long-Term Monitoring of Heart Patients Based on Hybrid Kernel Machine Ensemble},
  booktitle =    {Multiple Classifier Systems},
  pages =     {346-355},
  publisher = {Springer},
  year =      {2005},
  volume =    {3541/2005},
  series =    {Lecture Notes in Computer Science},
  address =   {Berlin / Heidelberg},
}

@incollection {Hughes_NIPS2003,
  author = " Nicholas P. Hughes and  Lionel Tarassenko and  Stephen J. Roberts",
  title = " Markov Models for Automated {ECG} Interval Analysis",
  booktitle = NIPS16,
  editor = NIPS16ed,
  publisher = NIPS16publ,
  address = NIPS16addr,
  year = "2004",
  keywords = "hidden Markov models, Markov models, wavelets, segmentation, probabilistic models, biomedical signal processing, time series",
  }

@inproceedings{Salem2009,
 author = {Abdel-Badeeh M. Salem and Kenneth Revett and El-Sayed A. El-Dahshan},
 title = {Machine Learning in Electrocardiogram Diagnosis},
 booktitle = {Proceedings of the International Multiconference on Computer Science and Information Technology},
 volume = 4,
 pages = {429--433},
 year = 2009,
 publisher = {IEEE},
}

@book{Clifford2006,
 author = {G.D. Clifford and F. Azuaje and P.E. McSharry}, 
 title = {Advanced Methods and Tools for {ECG} Analysis},
 publisher = {Artech House Publishing},
 year = 2006,
}

@inproceedings{Lin2009,
  author = {Lin, Jessica and Li, Yuan}, 
  title = {Finding Structural Similarity in Time Series Data Using Bag-of-Patterns Representation},
  booktitle = {SSDBM 2009: Proceedings of the 21st International Conference on Scientific and Statistical Database Management},
  year = {2009},
  isbn = {978-3-642-02278-4},
  pages = {461--477},
  location = {New Orleans, LA, USA},
  doi = {http://dx.doi.org/10.1007/978-3-642-02279-1_33},
  publisher = {Springer-Verlag},
  address = {Berlin, Heidelberg},
 }

@article{Froese2006,
 author = {Froese, Tom and Hadjiloucas, Sillas and Galv\,
{a}o, Roberto K. H. and Becerra, Victor M. and Coelho, Clarimar Jos\'{e}},
 title = {Comparison of extrasystolic {ECG} signal classifiers using discrete wavelet transforms},
 journal = {Pattern Recogn. Lett.},
 volume = {27},
 number = {5},
 year = {2006},
 issn = {0167-8655},
 pages = {393--407},
 doi = {http://dx.doi.org/10.1016/j.patrec.2005.09.002},
 publisher = {Elsevier Science Inc.},
 address = {New York, NY, USA},
 }

@Article{Crowe1992,
  author =   {J. A. Crowe and N. M. Gibson and M. S. Woolfson and M. G. Somekh},
  title =    {Wavelet transform as a potential tool for {ECG} analysis and compression},
  journal =  {Journal of Biomedical Engineering},
  year =     {1992},
  volume =   {14},
  number =   {3},
  pages =    {268--272},
  month =    {May},
}

@ARTICLE{Hilton1997,
    author = {Michael Hilton},
    title = {Wavelet and Wavelet Packet Compression of Electrocardiograms},
    journal = IEEE_trans_biomed,
    year = {1997},
    volume = {44},
    pages = {394--402}
}

@Article{Li1995,
  author =       {C. Li and C. Zheng and C. Tai},
  title =        {Detection of {ECG} characteristic points using wavelet transforms},
  journal =     IEEE_trans_biomed,
  year =        {1995},
  volume =    {42},
  number =    {1},
  pages =     {21--28},
  month =     {January},
}

@article{Polat2007,
title = {Detection of {ECG} Arrhythmia using a differential expert system approach based on principal component analysis and least square support vector machine},
journal = {Applied Mathematics and Computation},
volume = {186},
number = {1},
pages = {898--906},
year = {2007},
issn = {0096-3003},
doi = {DOI: 10.1016/j.amc.2006.08.020},
url = {http://www.sciencedirect.com/science/article/B6TY8-4KXDWBF-5/2/a9e1d7e2dfc4c88935386ea04ca9cb94},
author = {Kemal Polat and Salih G\"{u}nes},
keywords = {ECG Arrhythmia},
keywords = {Principal component analysis (PCA)},
keywords = {Least square support vector machine (LSSVM)},
keywords = {ROC curves},
}

@article{Song2005,
  author =       {Mi Hye Song and Jeon Lee and Sung Pil Cho and Kyoung Joung Lee and Sun Kook Yoo},
  title =        {Support Vector Machine Based Arrhythmia Classification  
Using Reduced Features},
  journal =      IJCAS,
  year =         {2005},
  volume =    {3},
  number =    {4},
  pages =     {571--579},
  month =     {December},
}

@article{Ubeyli2009,
 author = {Elif Derya \"{U}beyli},
 title = {Combining recurrent neural networks with eigenvector methods for classification of {ECG} beats},
 journal = DSP,
 volume = {19},
 number = {2},
 year = {2009},
 issn = {1051-2004},
 pages = {320--329},
 doi = {http://dx.doi.org/10.1016/j.dsp.2008.09.002},
 publisher = {Academic Press, Inc.},
 address = {Orlando, FL, USA},
 }

@article{Ubeyli2007,
  author =       {Elif Derya \"{U}beyli},
  title =        {{ECG} beats classification using multiclass support vector machines with error correcting output codes},
  journal =      DSP,
  year =         {2007},
  volume =    {17},
  pages =     {675--684},
}

@Article{Soman2005,
  author =    {T. Soman and P. O. Bobbie},
  title =     {Classification of Arrhythmia Using Machine Learning Techniques},
  journal =   {WSEAS Transactions on Computers},
  year =      {2005},
  volume =    {4},
  number =    {6},
  pages =     {548--552},
  month =     {June},
}

@InProceedings{Chengwei2006,
  author =       {Li Chengwei and Wang Shoubin and Xu Aijun and Peng Hui},
  title =        {Clinical Diagnosis of Cardiac Disease Based on Support Vector Machine},
  booktitle = {World Congress on Medical Physics and Biomedical Engineering},
  pages =     {1273--1276},
  year =      {2006},
  editor =    {R. Magjarevic and J. H. Nagel},
  volume =    {14},
  series =    {IFMBE Proceedings},
  publisher = {Springer Berlin Heidelberg},
}

@Article{Chiu2005,
  author =       {Chuang-Chien Chiu and Tong-Hong Lin and Ben-Yi Liau},
  title =        {Using correlation coefficient in {ECG} waveform for arrhythmia detection},
  journal =      BME,
  year =         {2005},
  volume =    {17},
  number =    {3},
  pages =     {147--152},
  month =     {June},
}

@Article{Silipo1998,
  author =       {Rosaria Silipo and Carlo Marchesi},
  title =        {Artificial Neural Networks for Automatic {ECG} Analysis},
  journal =      IEEE_trans_SP,
  year =         {1998},
  volume =    {46},
  number =    {5},
  pages =     {1417--1425},
  month =     {May},
}

@Article{Osowski2004,
  author =       {Stanislaw Osowski and Linh Tran Hoai and Tomasz Markiewicz},
  title =        {Support Vector Machine-Based Expert System for 
Reliable Heartbeat Recognition},
  journal =      IEEE_trans_biomed,
  year =         {2004},
  volume =    {51},
  number =    {4},
  pages =     {582--589},
  month =     {April},
}

@article{PhysioNet,
 author = PhysioNetAuthors,
 title = "{PhysioBank, PhysioToolkit, and PhysioNet}: Components of a New
	  Research Resource for Complex Physiologic Signals",
 journal = "Circulation",
 year = PhysioNetYear,
 volume = "101",
 number = "23",
 pages = "e215--e220",
 note = PhysioNetNote,
}

@article{Lin2007,
    author = {Lin, Jessica and Keogh, Eamonn and Wei, Li and Lonardi, Stefano},
    citeulike-article-id = {2821475},
    citeulike-linkout-0 = {http://dblp.uni-trier.de/rec/bibtex/journals/datamine/LinKWL07},
    citeulike-linkout-1 = {http://dx.doi.org/10.1007/s10618-007-0064-z},
    citeulike-linkout-2 = {http://www.springerlink.com/content/g69808822l82t325},
    day = {18},
    doi = {10.1007/s10618-007-0064-z},
    journal = DMKD,
    keywords = {simulation},
    month = {October},
    number = {2},
    pages = {107--144},
    posted-at = {2008-05-21 23:56:04},
    priority = {2},
    title = {Experiencing SAX: a novel symbolic representation of time series},
    url = {http://dx.doi.org/10.1007/s10618-007-0064-z},
    volume = {15},
    year = {2007}
}

@inproceedings{Lin2010,
  author = {Lin, Jessica and Li, Yuan},
  title = {Finding Structural Similarity in Time Series Data Using Bag-of-Patterns Representation},
  booktitle = SSDBM2009, 
  year = {2009},
  isbn = {978-3-642-02278-4},
  pages = {461--477},
  location = {New Orleans, LA, USA},
  doi = {http://dx.doi.org/10.1007/978-3-642-02279-1_33},
  publisher = {Springer-Verlag},
  address = {Berlin, Heidelberg},
 }

@Article{Ham1996,
  author =       {F. M. Ham and Soowhan Han},
  title =        {Classification of cardiac arrhythmias using fuzzy ARTMAP},
  journal =      IEEE_trans_biomed,
  year =         {1996},
  volume =    {43},
  number =    {4},
  pages =     {425--429},
  month =     {April},
}
@article{Engin2004,
  title = "ECG beat classification using neuro-fuzzy network",
  journal = PRL,
  volume = "25",
  number = "15",
  pages = "1715 - 1722",
  year = "2004",
  issn = "0167-8655",
  doi = "DOI: 10.1016/j.patrec.2004.06.014",
  url = "http://www.sciencedirect.com/science/article/B6V15-4D0Y5TH-2/2/b83f364f61d79f96abeb1bc1b1898ab9",
  author = "Mehmet Engin",
  keywords = "ECG beat classification",
  keywords = "MIT/BIH database",
  keywords = "Neuro-fuzzy networks",
  keywords = "Higher-order statistics",
  keywords = "Wavelet transform",
  keywords = "AR modelling",
  keywords = "Pattern recognition"
}

@article{Turaga2010,
 author = {S. C. Turaga and J. F. Murray and V. Jain and F. Roth and M. Helmstaedter and K. Briggman and W. Denk and H. S. Seung}, 
 title = {Convolutional networks can learn to generate affinity graphs for image segmentation}, 
 journal = {Neural Computation}, 
 volume = 22, 
 pages = {511--538},
 year = 2010,
}

@article{Hahnloser-2003,
 author = {Richard H.R. Hahnloser and H. Sebastian Seung and J.J. Slotine},
 title = {Permitted and forbidden sets in symmetric threshold-linear networks},
 journal = {Neural Computation},
 volume = 15,
 pages = {621--638},
 year = 2003,
}

@techreport{Jenatton-2009,
 title={Structured Variable Selection with Sparsity-Inducing Norms},
 author={Jenatton, R. and Audibert, J.-Y. and Bach, F.},
 institution={arXiv:0904.3523},
 year={2009}
}

@ARTICLE{Erhan2010,
    author = {Erhan, Dumitru and Bengio, Yoshua and Courville, Aaron and Manzagol, Pierre-Antoine and Vincent, Pascal and Bengio, Samy},
     month = feb,
     title = {Why Does Unsupervised Pre-training Help Deep Learning?},
   journal = jmlr,
    volume = {11},
      year = {2010},
     pages = {625--660},
  abstract = {Much recent research has been devoted to learning algorithms for deep architectures such as Deep Belief Networks and stacks of auto-encoder variants, with impressive results obtained in several areas, mostly on vision and language datasets. The best results obtained on supervised learning tasks involve an unsupervised learning component, usually in an unsupervised pre-training phase. Even though these new algorithms have enabled training deep models, many questions remain as to the nature of this difficult learning problem. The main question investigated here is the following: why does unsupervised pre-training work and why does it work so well? Answering these questions is important if learning in deep architectures is to be further improved. We propose several explanatory hypotheses and test them through extensive simulations. We empirically show the influence of pre-training with respect to architecture depth, model capacity, and number of training examples. The experiments confirm and clarify the advantage of unsupervised pre-training. The results suggest that unsupervised pre-training guides the learning towards basins of attraction of minima that are better in terms of the underlying data distribution; the evidence from these results supports a regularization explanation for the effect of pre-training.}
}

@ARTICLE{Bengio2009FTML,
    author = {Bengio, Yoshua},
     title = {Learning deep architectures for {AI}},
   journal = FTML,
    volume = {2},
    number = {1},
      year = {2009},
     pages = {1--127},
      note = Bengio2009FTML_note,
  abstract = {Theoretical results suggest that in order to learn the kind of
complicated functions that can represent high-level abstractions (e.g. in
vision, language, and other AI-level tasks), one may need {\insist deep
architectures}. Deep architectures are composed of multiple levels of non-linear
operations, such as in neural nets with many hidden layers or in complicated
propositional formulae re-using many sub-formulae. Searching the
parameter space of deep architectures is a difficult task, but
learning algorithms such as those for Deep Belief Networks have recently been proposed
to tackle this problem with notable success, beating the state-of-the-art
in certain areas. This paper discusses the motivations and principles regarding 
learning algorithms for deep architectures,  in particular those exploiting as
building blocks unsupervised learning of single-layer models such as Restricted {Boltzmann} Machines,
used to construct deeper models such as Deep Belief Networks.}
}

@ARTICLE{Bengio1994ITNN,
    author = {Bengio, Yoshua and Simard, Patrice and Frasconi, Paolo},
     title = {Learning Long-Term Dependencies with Gradient Descent is Difficult},
   journal = IEEE_trans_NN,
    volume = {5},
    number = {2},
      year = {1994},
     pages = {157--166},
  abstract = {Recurrent neural networks can be used to map input sequences to output sequences, such as for recognition, production or prediction problems. However, practical difficulties have been reported in training recurrent neural networks to perform tasks in which the temporal contingencies present in the input/output sequences span long intervals. We show why gradient based learning algorithms face an increasingly difficult problem as the duration of the dependencies to be captures increases. These results expose a trade-off between efficient learning by gradient descent and latching on information for long periods. Based on an understanding of this problem, alternatives to standard gradient descent are considered.},
optnote={(Special Issue on Recurrent Neural Networks)},topics={LongTerm},cat={J},
}

@article{Kohler1992,
    abstract = {The QRS complex is the most striking waveform within the electrocardiogram (ECG). Since it reflects the electrical activity within the heart during the ventricular contraction, the time of its occurrence as well as its shape provide much information about the current state of the heart. Due to its characteristic shape it serves as the basis for the automated determination of the heart rate, as an entry point for classification schemes of the cardiac cycle, and often it is also used in ECG data compression algorithms. In that sense, QRS detection provides the fundamentals for almost all automated ECG analysis algorithms. Software QRS detection has been a research topic for more than 30 years. The evolution of these algorithms clearly reflects the great advances in computer technology. Within the last decade many new approaches to QRS detection have been proposed; for example, algorithms from the field of artificial neural networks genetic algorithms wavelet transforms, filter banks as well as heuristic methods mostly based on nonlinear transforms. The authors provide an overview of these recent developments as well as of formerly proposed algorithms},
    author = {Kohler, B. U. and Hennig, C. and Orglmeister, R.},
    citeulike-article-id = {546409},
    citeulike-linkout-0 = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=993193},
    journal = eng_med_bio,
    keywords = {detector, ecg\_processing, qrs, qt\_interval, review\_article, rr\_interval},
    number = {1},
    pages = {42--57},
    posted-at = {2007-11-25 20:38:19},
    priority = {2},
    title = {The principles of software QRS detection},
    url = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=993193},
    volume = {21},
    year = {2002}
}

@article{Thomas2006,
author = {Julien Thomas and Cedric Rose and Francois Charpillet},
title = {A Multi-HMM Approach to ECG Segmentation},
journal = ICTAI06, 
volume = {0},
issn = {1082-3409},
year = {2006},
pages = {609-616},
doi = {http://doi.ieeecomputersociety.org/10.1109/ICTAI.2006.17},
publisher = {IEEE Computer Society},
address = {Los Alamitos, CA, USA},
}

@inproceedings{Cortes+al-2000,
 author = {Juan Carlos P\'{e}rez-Cortes and Rafael Llobet and Joaquim Arlandis},
 title = {Fast and Accurate Handwritten Character Recognition Using Approximate Nearest Neighbours Search on Large Databases},
 booktitle = iapr,
 year = {2000},
 isbn = {3-540-67946-4},
 pages = {767--776},
 publisher = {Springer-Verlag},
 address = {London, UK},
 }


@Article{Oliveira+al-2002,
  author =       "Oliveira, L.S.  and  Sabourin, R.  and  Bortolozzi, F.  and  Suen, C.Y.",
  title =        "Automatic recognition of handwritten numerical strings: a recognition and verification strategy",
  journal =      ieeetpami,
  volume =       "24",
  number =       "11",
  pages =        "1438-1454",
  month =        nov,
  year =         "2002",
  doi  =         "10.1109/TPAMI.2002.1046154",
  issn =         "0162-8828",
}

@Article{Oliveira+al-2002-short,
  author =       "Oliveira, L.S.  and  Sabourin, R.  and  Bortolozzi, F.  and  Suen, C.Y.",
  title =        "Automatic recognition of handwritten numerical strings: a recognition and verification strategy",
  journal =      ieeetpami,
  volume =       "24",
  number =       "11",
  pages =        "1438-1454",
  year =         "2002",
}

@inproceedings{SimardSP03,
  author    = {Patrice Simard and
               David Steinkraus and
               John C. Platt},
  title     = {Best Practices for Convolutional Neural Networks Applied
               to Visual Document Analysis},
  booktitle = {ICDAR},
  year      = {2003},
  pages     = {958-962},
  ee        = {http://csdl.computer.org/comp/proceedings/icdar/2003/1960/02/196020958abs.htm},
  crossref  = {DBLP:conf/icdar/2003},
  bibsource = {DBLP, http://dblp.uni-trier.de}
}

@inproceedings{SimardSP03-short,
  author    = {Patrice Simard and
               David Steinkraus and
               John C. Platt},
  title     = {Best Practices for Convolutional Neural Networks Applied
               to Visual Document Analysis},
  booktitle = {ICDAR},
  year      = {2003},
  pages     = {958-962},
}

@inproceedings{Milgram+al-2005,
  author = {Milgram, J. and Cheriet, M. and Sabourin, R.},
  title = {Estimating accurate multi-class probabilities with support vector machines},
  booktitle = {Int. Joint Conf. on Neural Networks},
  year = {2005},
  pages = {906--1911},
  location = {Montreal, Canada},
 }

@proceedings{DBLP:conf/icdar/2003,
  title     = {7th International Conference on Document Analysis and Recognition
               (ICDAR 2003), 2-Volume Set, 3-6 August 2003, Edinburgh,
               Scotland, UK},
  booktitle = {ICDAR},
  publisher = {IEEE Computer Society},
  year      = {2003},
  isbn      = {0-7695-1960-1},
  bibsource = {DBLP, http://dblp.uni-trier.de}
}


@article{Granger+al-2007,
    author = {Eric Granger and Robert Sabourin and Luiz S. Oliveira and Catolica Parana},
    title = {Supervised Learning of Fuzzy ARTMAP Neural Networks Through Particle Swarm Optimization},
    journal = jprr,
    year = {2007},
    volume = "2",
    number = "1",
    pages = "27-60",
}

@inproceedings{SnowEtAl2008,
    author = {Snow, R. and O'Connor, B. and Jurafsky, D. and Ng, A.},
    booktitle = {Proc. Empirical Methods in NLP},
    pages = {254--263},
    title = {Cheap and Fast -- But is it Good? Evaluating Non-Expert Annotations for Natural Language Tasks},
    year = {2008}
}

@TECHREPORT{Garris94+al-1994,
    author = {Michael D. Garris and James L. Blue and Gerald T. Candela and Gerald T. C and Darrin L. Dimmick and Jon Geist and Patrick J. Grother and Stanley A. Janet and Charles L. Wilson},
    title = {NIST Form-Based Handprint Recognition System},
    institution = {Technical Report NISTIR 5469 and CD-ROM, National Institute of Standards and Technology},
    year = {1994},
    doi = {10.1.1.45.1560},
}

@inproceedings{SorokinAndForsyth2008,
    author = {Sorokin, A. and Forsyth, D.},
    booktitle = {CVPR Workshops},
    pages = {1--8},
    title = {Utility data annotation with Amazon Mechanical Turk},
    year = {2008}
}

@inproceedings{Grother-1995,
        AUTHOR = "Grother, P.J.",
        TITLE = "Handprinted Forms and Character Database, {NIST} Special Database 19",
        BOOKTITLE = "National Institute of Standards and Technology (NIST) Intelligent Systems Division (NISTIR)",
        YEAR = "1995",
        BIBSOURCE = "http://www.visionbib.com/bibliography/char1015.html#TT105853"}
}

@inproceedings{ whitehill09,
 title = {Whose Vote Should Count More: Optimal Integration of Labels from Labelers of Unknown Expertise},
 author = {J. Whitehill and P. Ruvolo and T. Wu and J. Bergsma and J. Movellan},
 booktitle = {NIPS 22},
 pages = {2035--2043},
 year = 2009
}

@techreport{ift6266-tr-anonymous,
 author = "Anonymous authors",
 title = "Anonymous title",
 institution = "University X.",
 year = 2010,
}