# HG changeset patch # User pascanur # Date 1284754438 14400 # Node ID fe6c25eb1e3741533e9c1e792e4efd9a2bca668f # Parent a0f178bc905277aad16a5ba41352146a56ad075c# Parent fab72f424ee0eec654f9beda4735e6c26bfbb7ad merge diff -r a0f178bc9052 -r fe6c25eb1e37 .hgignore --- a/.hgignore Fri Sep 17 16:12:33 2010 -0400 +++ b/.hgignore Fri Sep 17 16:13:58 2010 -0400 @@ -2,5 +2,6 @@ *~ *.swp *.pyc +*.orig core.* html \ No newline at end of file diff -r a0f178bc9052 -r fe6c25eb1e37 doc/v2_planning/API_coding_style.txt --- a/doc/v2_planning/API_coding_style.txt Fri Sep 17 16:12:33 2010 -0400 +++ b/doc/v2_planning/API_coding_style.txt Fri Sep 17 16:13:58 2010 -0400 @@ -2,6 +2,10 @@ Coding Style Guidelines ========================= +Note: until the Pylearn documentation is properly compiled, you can view +the HTML version of this document `here +`_. + Main Goals ========== @@ -58,20 +62,81 @@ """ + * Standard library imports can (and should) be on the same line, to avoid + wasting space on straighforward imports: + + .. code-block:: python + + # Good. + import os, sys, time + # Good when it does not fit on a single line. + import std_lib_module_1, std_lib_module_2, std_lib_module_3 + import std_lib_module_4, std_lib_module_5, std_lib_module_6 + # Bad. + import os + import sys + import time + + * Importing class / functions from a module is allowed when these are + used multiple times, and no ambiguity is possible. + + .. code-block:: python + + # Good when Bar and Blah are used many times. + from foo import Bar, Blah + do_something_with(Bar(), Blah(), Bar(), Blah(), Bar(), Blah()) + # Good in most situations. + import foo + do_something_with(foo.Bar(), foo.Blah()) + # Bad. + from foo import * + from numpy import any # Potential ambiguity with __builtin__.any + Excerpts ~~~~~~~~ We emphasize here a few important topics that are found in the official guidelines: + * Only use ASCII characters in code files. + + * Code indent must be done with four blank characters (no tabs). + + * Limit lines to 79 characters. + + * Naming conventions: ``ClassName``, ``TOP_LEVEL_CONSTANT``, + ``everything_else``. + + * Comments should start with a capital letter (unless the first word is a + code identifier) and end with a period (short inline comments may skip + the period at the end). + + * Imports should be listed in alphabetical order. It makes it easier to + verify that something is imported, and avoids duplicated imports. + + * Use absolute imports only. This is compatible across a wider range of + Python versions, and avoids confusion about what is being + imported. + + * Avoid renaming imported modules. This makes code more difficult to + re-use, and is not grep-friendly. + + .. code-block:: python + + # Good. + from theano import tensor + # Bad. + from theano import tensor as T + * Avoid using lists if all you care about is iterating on something. Using lists: - - uses more memory (and possibly more CPU if the code may break out of - the iteration), - - can lead to ugly code when converted to Python 3 with 2to3, - - can have a different behavior if evaluating elements in the list has - side effects (if you want these side effects, make it explicit by - assigning the list to some variable before iterating on it). + + - uses more memory (and possibly more CPU if the code may break out of + the iteration), + - can lead to ugly code when converted to Python 3 with 2to3, + - can have a different behavior if evaluating elements in the list has + side effects (if you want these side effects, make it explicit by + assigning the list to some variable before iterating on it). +------------------------+------------------------+ | Iterative version | List version | @@ -101,7 +166,7 @@ for f_x in imap(f, x): ... all_f_x = map(f, x) - map(f, x) + map(f, x) # f has some side effect. # Bad. for element in map(f, x): ... @@ -133,12 +198,107 @@ has_key = my_dict.has_key(key) has_substring = my_string.find(substring) >= 0 + * Do not use mutable arguments as default values. Instead, use a helper + function (conditional expressions are forbidden at this point, see + below). + + .. code-block:: python + + # Good. + def f(array=None): + array = pylearn.if_none(array, []) + ... + # Bad. + def f(array=[]): # Dangerous if `array` is modified down the road. + ... + + * Use a leading underscore '_' in names of internal attributes / methods, + but avoid the double underscore '__' unless you know what you are + doing. + Additional Recommendations -------------------------- Things you should do even if they are not listed in official guidelines: + * All Python code files should start like this: + + .. code-block:: python + + """Module docstring as the first line, as usual.""" + + __authors__ = "Olivier Delalleau, Frederic Bastien, David Warde-Farley" + __copyright__ = "(c) 2010, Universite de Montreal" + __license__ = "3-clause BSD License" + __contact__ = "Name Of Current Guardian of this file " + + * Use ``//`` for integer division and ``/ float(...)`` if you want the + floating point operation (for readability and compatibility across all + versions of Python). + + .. code-block:: python + + # Good. + n_samples_per_split = n_samples // n_splits + mean_x = sum(x) / float(len(x)) + # Bad. + n_samples_per_split = n_samples / n_splits + mean_x = sum(x) / len(x) + + * Always raise an exception with ``raise MyException(args)`` where ``MyException`` + inherits from ``Exception``. This is required for compatibility across + all versions of Python. + + .. code-block:: python + + # Good. + raise NotImplementedError('The Pylearn team is too lazy.') + # Bad. + raise NotImplementedError, 'The Pylearn team is too lazy.' + raise 'The Pylearn team is too lazy to implement this.' + + * Use either ``try ... except`` or ``try ... finally``, but do not mix + ``except`` with ``finally`` (which is not supported in Python 2.4). + You can however embed one into the other to mimic the ``try ... except ... + finally`` behavior. + + .. code-block:: python + + # Good. + try: + try: + something_that_may_fail() + except SomeError: + do_something_if_it_failed() + finally: + always_do_this_regardless_of_what_happened() + # Bad. + try: + something_that_may_fail() + except SomeError: + do_something_if_it_failed() + finally: + always_do_this_regardless_of_what_happened() + + * No conditional expression (not supported in Python 2.4). These are + expressions of the form ``x = y if condition else z``. + + * Do not use the ``all`` and ``any`` builtin functions (they are not supported + in Python 2.4). Instead, import them from ``theano.gof.python25`` (or + use ``numpy.all`` / ``numpy.any`` for array data). + + * Do not use the ``hashlib`` module (not supported in Python 2.4). We will + probably provide a wrapper around it to be compatible with all Python + versions. + + * Use ``numpy.inf`` and ``numpy.nan`` rather than + ``float('inf')`` / ``float('nan')`` (should be slightly more efficient even + if efficiency is typically not an issue here, the main goal being code + consistency). Also, always use ``numpy.isinf`` / ``numpy.isnan`` to + test infinite / NaN values. This is important because ``numpy.nan != + float('nan')``. + * Avoid backslashes whenever possible. They make it more difficult to edit code, and they are ugly (as well as potentially dangerous if there are trailing white spaces). @@ -195,6 +355,50 @@ my_everything]: ... + * Use the ``key`` argument instead of ``cmp`` when sorting (for Python 3 + compatibility). + + .. code-block:: python + + # Good. + my_list.sort(key=abs) + # Bad. + my_list.sort(cmp=lambda x, y: cmp(abs(x), abs(y))) + + * Whenever you read / write binary files, specify it in the mode ('rb' for + reading, 'wb' for writing). This is important for cross-platform and + Python 3 compatibility (e.g. when pickling / unpickling objects). + + .. code-block:: python + + # Good. + cPickle.dump(obj, open('my_obj.pkl', 'wb', protocol=-1)) + # Bad. + cPickle.dump(obj, open('my_obj.pkl', 'w', protocol=-1)) + + * Avoid tuple parameter unpacking as it can lead to very ugly code when + converting to Python 3. + + .. code-block:: python + + # Good. + def f(x, y_z): + y, z = y_z + ... + # Bad. + def f(x, (y, z)): + ... + + * Only use ``cPickle``, not ``pickle`` (except for debugging purpose since + error messages from ``pickle`` are sometimes easier to understand). + + * A script's only top-level code should be something like: + + .. code-block:: python + + if __name__ == '__main__': + sys.exit(main()) + The ``logging`` Module vs. the ``warning`` Module ================================================= @@ -246,10 +450,73 @@ Code Sample =========== -The following code sample illustrates many of the coding guidelines one should -follow in Pylearn. +The following code sample illustrates some of the coding guidelines one should +follow in Pylearn. This is still a work-in-progress. .. code-block:: python + #! /usr/env/bin python + + """Sample code. There may still be mistakes / missing elements.""" + + __authors__ = "Olivier Delalleau" + __copyright__ = "(c) 2010, Universite de Montreal" + __license__ = "3-clause BSD License" + __contact__ = "Olivier Delalleau " + + # Standard library imports are on a single line. import os, sys, time + # Third-party imports come after standard library imports, and there is + # only one import per line. Imports are sorted lexicographically. + import numpy + import scipy + import theano + # Put 'from' imports below. + from numpy import argmax + from theano import tensor + + # Application-specific imports come last. + from pylearn import dataset + from pylearn.optimization import minimize + + def print_files_in(directory): + """Print the first line of each file in given directory.""" + # TODO To be continued... + + def main(): + if len(sys.argv) != 2: + # Note: conventions on how to display script documentation and + # parse arguments are still to-be-determined. + print("""\ + Usage: %s + Print first line of each file in given directory (in alphabetic order).""" + % os.path.basename(sys.argv[0])) + return 1 + print_files_in(sys.argv[1]) + return 0 + + # Top-level executable code should be minimal. + if __name__ == '__main__': + sys.exit(main()) + + +Automatic Code Verification +=========================== + +Tools will be available to make it easier to automatically ensure that code +committed to Pylearn complies to above specifications. This work is not +finalized yet, but David started a `Wiki page`_ with helpful configuration +tips for Vim. + +.. _Wiki page: http://www.iro.umontreal.ca/~lisa/twiki/bin/view.cgi/Divers/VimPythonRecommendations + +TODO +==== + +Things still missing from this document, being discussed in coding_style.txt: + - Proper style for C code and Mercurial commits + - Enforcing 100% test coverage of the code base + - Providing ways to add type checking for function arguments + - Conventions for script usage documentation and argument parsing + diff -r a0f178bc9052 -r fe6c25eb1e37 doc/v2_planning/API_formulas.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/v2_planning/API_formulas.txt Fri Sep 17 16:13:58 2010 -0400 @@ -0,0 +1,96 @@ +.. _v2planning_formulas: + +Math formulas API +================= + +Why we need a formulas API +-------------------------- + +Their is a few reasons why having a library of mathematical formula for theano is a good reason: + +* Some formula have some special thing needed for the gpu. + * Sometimes we need to cast to floatX... +* Some formula have numerical stability problem. +* Some formula gradiant have numerical stability problem. (Happen more frequently then the previous ones) + * If theano don't always do some stability optimization, we could do it manually in the formulas +* Some formula as complex to implement and take many try to do correctly. +* Can mimic the hierarchy of other library to ease the migration to theano + +Having a library help in that we solve those problem only once. + +What is a formula +----------------- + +We define formulas as something that don't have a state. They are implemented as +python function that take theano variable as input and they output theano +variable. If you want state, look at what the others commities will do. + +Formulas documentation +---------------------- + +We must respect what the coding commitee have set for the docstring of the file and of the function. + +* A latex mathematical description of the formulas(for picture representation in generated documentation) +* Tags(for searching): + * a list of lower level fct used + * category(name of the submodule itself) +* Tell if we did some work to make it more numerical stable. Do theano do the optimization needed? +* Tell if the grad is numericaly stable? Do theano do the optimization needed? +* Tell if work/don't/unknow on gpu. +* Tell alternate name +* Tell the domaine, range of the input/output(range should use the english notation of including or excluding) + +Proposed hierarchy +------------------ + +Here is the proposed hierarchy for formulas: + +* pylearn.formulas.costs: generic / common cost functions, e.g. various cross-entropies, squared error, + abs. error, various sparsity penalties (L1, Student) +* pylearn.formulas.regularization: formulas for regularization +* pylearn.formulas.linear: formulas for linear classifier, linear regression, factor analysis, PCA +* pylearn.formulas.nnet: formulas for building layers of various kinds, various activation functions, + layers which could be plugged with various costs & penalties, and stacked +* pylearn.formulas.ae: formulas for auto-encoders and denoising auto-encoder variants +* pylearn.formulas.noise: formulas for corruption processes +* pylearn.formulas.rbm: energies, free energies, conditional distributions, Gibbs sampling +* pylearn.formulas.trees: formulas for decision trees +* pylearn.formulas.boosting: formulas for boosting variants +* pylearn.formulas.maths for other math formulas +* pylearn.formulas.scipy.stats: example to implement the same interface as existing lib + +etc. + +Example +------- +.. code-block:: python + + """ + This script defines a few often used cost functions. + """ + import theano + import theano.tensor as T + from tags import tags + + @tags('cost','binary','cross-entropy') + def binary_crossentropy(output, target): + """ Compute the crossentropy of binary output wrt binary target. + + .. math:: + L_{CE} \equiv t\log(o) + (1-t)\log(1-o) + + :type output: Theano variable + :param output: Binary output or prediction :math:`\in[0,1]` + :type target: Theano variable + :param target: Binary target usually :math:`\in\{0,1\}` + """ + return -(target * tensor.log(output) + (1.0 - target) * tensor.log(1.0 - output)) + + +TODO +---- +* define a list of search tag to start with +* Add to the html page a list of the tag and a list of each fct associated to them. +* move existing formulas to pylearn as examples and add other basics ones. +* theano.tensor.nnet will probably be copied to pylearn.formulas.nnet and depricated. + diff -r a0f178bc9052 -r fe6c25eb1e37 doc/v2_planning/API_learner.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/v2_planning/API_learner.txt Fri Sep 17 16:13:58 2010 -0400 @@ -0,0 +1,95 @@ +# A list of "task types" + +''' + List of tasks types: + Attributes + + sequential + spatial + structured + semi-supervised + missing-values + + + Supervised (x,y) + + classification + regression + probabilistic classification + ranking + conditional density estimation + collaborative filtering + ordinal regression ?= ranking + + Unsupervised (x) + + de-noising + feature learning ( transformation ) PCA, DAA + density estimation + inference + + Other + + generation (sampling) + structure learning ??? + + +Notes on metrics & statistics: + - some are applied to an example, others on a batch + - most statistics are on the dataset +''' + + +class Learner(Object): + ''' + Takes data as inputs, and learns a prediction function (or several). + + A learner is parametrized by hyper-parameters, which can be set from the + outside (a "client" from Learner, that can be a HyperLearner, a + Tester,...). + + The data can be given all at a time as a data set, or incrementally. + Some learner need to be fully trained in one step, whereas other can be + trained incrementally. + + The question of statistics collection during training remains open. + ''' + #def use_dataset(dataset) + + # return a dictionary of hyperparameters names(keys) + # and value(values) + def get_hyper_parameters() + def set_hyper_parameters(dictionary) + + + + + # Ver B + def eval(dataset) + def predict(dataset) + + # Trainable + def train(dataset) # train until complition + + # Incremental + def use_dataset(dataset) + def adapt(n_steps =1) + def has_converged() + + # + + +# Some example cases + +class HyperLearner(Learner): + + ### def get_hyper_parameter_distribution(name) + def set_hyper_parameters_distribution(dictionary) + + +def bagging(learner_factory): + for i in range(N): + learner_i = learner_factory.new() + # todo: get dataset_i ?? + learner_i.use_dataset(dataset_i) + learner_i.train() diff -r a0f178bc9052 -r fe6c25eb1e37 doc/v2_planning/coding_style.txt --- a/doc/v2_planning/coding_style.txt Fri Sep 17 16:12:33 2010 -0400 +++ b/doc/v2_planning/coding_style.txt Fri Sep 17 16:13:58 2010 -0400 @@ -11,6 +11,15 @@ Open for public debate ---------------------- + * File header: + - Do we put the accents in 'Universite de Montreal'? + OD: No (restricting code to ASCII characters is much safer) + - Do we put the Mercurial version number in each file? + OD: No (useless in my experience, if it's a release the version + number can be provided in the README for instance, and in + addition Mercurial IDs cannot be easily compared to figure + out which of two versions is most recent) + * Avoid contractions in code comments (particularly in documentation): "We do not add blue to red because it does not look good" rather than "We don't add blue to red because it doesn't look good". @@ -288,86 +297,13 @@ Have a sample code that showcases everything one should comply to. -Some coding guidelines (work-in-progress from OD) -------------------------------------------------- - - - * Use the `key` argument instead of `cmp` when sorting (for Python 3 - compatibility). - Yes: - my_list.sort(key=abs) - No: - my_list.sort(cmp=lambda x, y: cmp(abs(x), abs(y))) - - * Use // for integer division (for readability and Python 3 compatibility). - Yes: - n_samples_per_split = n_samples // n_splits - No: - n_samples_per_split = n_samples / n_splits - - * Only use ASCII characters in code files. - - * Code indent must be done with four blank characters (not with tabs). - - * Limit lines to 79 characters. - - * Comments should start with a capital letter (unless the first word is a - code identifier) and end with a period (very short inline comments may - ignore this rule). - - * Whenever you read / write binary files, specify it in the mode ('rb' for - reading, 'wb' for writing). This is important for cross-platform and - Python 3 compatibility (e.g. when pickling / unpickling objects). - - * Avoid tuple parameter unpacking to avoid very ugly code when converting - to Python 3. - Yes: - def f(x, y_z): - y, z = y_z - No: - def f(x, (y, z)) +Fred's suggestion to solve issue with hashlib not available in Python 2.4: +-------------------------------------------------------------------------- - * Only use cPickle, not pickle. - - * Always raise exception with - raise MyException(args) - where MyException inherits from Exception. - - * Imports should be listed in alphabetical order. It makes it easier to - verify that something is imported, and avoids duplicated imports. - - * Use absolute imports only. This is compatible across a wider range of - Python versions, and avoids confusion about what is being - imported. - - * Use a leading underscore '_' for internal attributes / methods, - but avoid the double underscore '__' unless you know what you are - doing. - - * A script's only top-level code should be something like: - if __name__ == '__main__': - sys.exit(main()) +You can do as in theano.gof.cc: - * No conditional expression (not supported in Python 2.4). These are - expressions of the form - x = y if condition else z - - * Use either "try ... except" or "try ... finally", but do not mix - "except" with "finally" (which is not supported in Python 2.4). - You can make a try... except inside a try... finally if you need both. + ..code:: - * Do not use the `all` and `any` builtin functions (they are not supported - in Python 2.4). - You can use numpy.{all,any} instead of import theano.gof.python25 that - define all and any. - OD: I think we should have something like pylearn.compat.{all,any}. - numpy.{all,any} are meant to be used on arrays only. - OD: As agreed during committee's meeting, we will use - theano.gof.python25 - - * Do not use the `hashlib` module (not supported in Python 2.4). - You can do as in theano.gof.cc: - ..code:: if sys.version_info[:2] >= (2,5): import hashlib def hash_from_code(msg): @@ -376,17 +312,7 @@ import md5 def hash_from_code(msg): return md5.new(msg).hexdigest() - OD: Yep, we could probably come up with such a wrapper in a pylearn.compat - module. - * Do not use mutable arguments as default values. Instead, use a helper - function: - Yes: - def f(array=None): - array = pylearn.if_none(array, []) - No: - def f(array=[]): - # Dangerous if `array` is modified down the road. Mercurial commits ----------------- @@ -509,15 +435,6 @@ OD: This was discussed in committee's meeting. We agreed to provide ways to do this, but not to enforce its usage. -Consistent inf / nan --------------------- - -OD: Use numpy.inf and numpy.nan rather than float('inf') / float('nan')? -(should be slightly more efficient even if efficiency usually doesn't matter -here - the main goal would be for everyone to use the same inf / nan to make -the code consistent). -OD: Approved during committee's meeting. - Enforcing strict testing policy ------------------------------- @@ -561,23 +478,9 @@ * Make public some configuration files / plugins for vim * Come up with official common file header (license in particular) -Suggested per-file boilerplate ------------------------------- - -"""Module docstring as the first line, as usual.""" - -__authors__ = "Olivier Delalleau, Frederic Bastien, David Warde-Farley" -__copyright__ = "(c) 2010, Université de Montréal" -__license__ = "3-clause BSD License" -__contact__ = "Name Of Current Guardian of this file " +Script usage documentation +-------------------------- -We could also pull Mercurial revision info and put it in __version__, this -seems to be common. +OD: It would be nice to have some standardized way of parsing a script's +arguments and displaying the script usage doc to the user. -Editor setup ------------- - -(DWF:) Some enhanced configuration files for Vim that I've put a little bit -of work into modifying in some cases can be found at: - -http://www.iro.umontreal.ca/~lisa/twiki/bin/view.cgi/Divers/VimPythonRecommendations diff -r a0f178bc9052 -r fe6c25eb1e37 doc/v2_planning/formulas.txt --- a/doc/v2_planning/formulas.txt Fri Sep 17 16:12:33 2010 -0400 +++ b/doc/v2_planning/formulas.txt Fri Sep 17 16:13:58 2010 -0400 @@ -9,47 +9,6 @@ - Olivier B. - Nicolas -TODO ----- -* define a list of search tag to start with -* propose an interface(many inputs, outputs, doc style, hierrache, to search, html output?) -* find existing repositories with files for formulas. -* move existing formulas to pylearn as examples and add other basics ones. -** theano.tensor.nnet will probably be copied to pylearn.formulas.nnet and depricated. - -Why we need formulas --------------------- - -Their is a few reasons why having a library of mathematical formula for theano is a good reason: - -* Some formula have some special thing needed for the gpu. - * Sometimes we need to cast to floatX... -* Some formula have numerical stability problem. -* Some formula gradiant have numerical stability problem. (Happen more frequently then the previous ones) - * If theano don't always do some stability optimization, we could do it manually in the formulas -* Some formula as complex to implement and take many try to do correctly. - -Having a library help in that we solve those problem only once. - -Formulas definition -------------------- - -We define formulas as something that don't have a state. They are implemented as python function -that take theano variable as input and output theano variable. If you want state, look at what the -learner commity will do. - -Formulas doc must have ----------------------- - -* A latex mathematical description of the formulas(for picture representation in generated documentation) -* Tags(for searching): - * a list of lower lovel fct used - * category(name of the submodule itself) -* Tell if we did some work to make it more numerical stable. Do theano do the optimization needed? -* Tell if the grad is numericaly stable? Do theano do the optimization needed? -* Tell if work on gpu/not/unknow -* Tell alternate name -* Tell the domaine, range of the input/output(range should use the english notation of including or excluding) List of existing repos ---------------------- @@ -57,33 +16,3 @@ Olivier B. ? Xavier G.: git@github.com:glorotxa/DeepANN.git, see file deepANN/{Activations.py(to nnet),Noise.py,Reconstruction_cost.py(to costs),Regularization.py(to regularization} -Proposed hierarchy ------------------- - -Here is the proposed hierarchy for formulas - -pylearn.formulas.costs: generic / common cost functions, e.g. various cross-entropies, squared error, -abs. error, various sparsity penalties (L1, Student) - -pylearn.formulas.regularization: formulas for regularization - -pylearn.formulas.linear: formulas for linear classifier, linear regression, factor analysis, PCA - -pylearn.formulas.nnet: formulas for building layers of various kinds, various activation functions, -layers which could be plugged with various costs & penalties, and stacked - -pylearn.formulas.ae: formulas for auto-encoders and denoising auto-encoder variants - -pylearn.formulas.noise: formulas for corruption processes - -pylearn.formulas.rbm: energies, free energies, conditional distributions, Gibbs sampling - -pylearn.formulas.trees: formulas for decision trees - -pylearn.formulas.boosting: formulas for boosting variants - -pylearn.formulas.maths for other math formulas - -pylearn.formulas.scipy.stats: example to implement the same interface as existing lib - -etc. diff -r a0f178bc9052 -r fe6c25eb1e37 doc/v2_planning/index.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/v2_planning/index.txt Fri Sep 17 16:13:58 2010 -0400 @@ -0,0 +1,8 @@ +.. _libdoc: + +.. toctree:: + :maxdepth: 1 + + API_formulas + API_coding_style + api_optimization diff -r a0f178bc9052 -r fe6c25eb1e37 doc/v2_planning/learn_meeting.py --- a/doc/v2_planning/learn_meeting.py Fri Sep 17 16:12:33 2010 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,76 +0,0 @@ - - -def bagging(learner_factory): - for i in range(N): - learner_i = learner_factory.new() - # todo: get dataset_i ?? - learner_i.use_dataset(dataset_i) - learner_i.train() -''' - List of tasks types: - Attributes - - sequential - spatial - structured - semi-supervised - missing-values - - - Supervised (x,y) - - classification - regression - probabilistic classification - ranking - conditional density estimation - collaborative filtering - ordinal regression ?= ranking - - Unsupervised (x) - - de-noising - feature learning ( transformation ) PCA, DAA - density estimation - inference - - Other - - generation (sampling) - structure learning ??? - - -Notes on metrics & statistics: - - some are applied to an example, others on a batch - - most statistics are on the dataset -''' -class Learner(Object): - - #def use_dataset(dataset) - - # return a dictionary of hyperparameters names(keys) - # and value(values) - def get_hyper_parameters() - def set_hyper_parameters(dictionary) - - - - - # Ver B - def eval(dataset) - def predict(dataset) - - # Trainable - def train(dataset) # train until complition - - # Incremental - def use_dataset(dataset) - def adapt(n_steps =1) - def has_converged() - - # - -class HyperLearner(Learner): - - ### def get_hyper_parameter_distribution(name) - def set_hyper_parameters_distribution(dictionary) diff -r a0f178bc9052 -r fe6c25eb1e37 doc/v2_planning/learner.txt --- a/doc/v2_planning/learner.txt Fri Sep 17 16:12:33 2010 -0400 +++ b/doc/v2_planning/learner.txt Fri Sep 17 16:13:58 2010 -0400 @@ -1,6 +1,6 @@ Comittee: AB, PL, GM, IG, RP, NB, PV -Leader: ? +Leader: PL Discussion of Function Specification for Learner Types ====================================================== diff -r a0f178bc9052 -r fe6c25eb1e37 doc/v2_planning/plugin_RP.py