# HG changeset patch # User Olivier Delalleau # Date 1285959361 14400 # Node ID b5673b32e8ec0833d96f44a02dc9c05825ef4e86 # Parent a8f909502886725caa72e502df0b8c9d644aece6 API_coding_style: More work on code example diff -r a8f909502886 -r b5673b32e8ec doc/v2_planning/API_coding_style.txt --- a/doc/v2_planning/API_coding_style.txt Fri Oct 01 14:55:41 2010 -0400 +++ b/doc/v2_planning/API_coding_style.txt Fri Oct 01 14:56:01 2010 -0400 @@ -259,10 +259,10 @@ """Module docstring as the first line, as usual.""" - __authors__ = "Olivier Delalleau, Frederic Bastien, David Warde-Farley" + __authors__ = "Olivier Delalleau, Frederic Bastien, David Warde-Farley" __copyright__ = "(c) 2010, Universite de Montreal" - __license__ = "3-clause BSD License" - __contact__ = "Name Of Current Guardian of this file " + __license__ = "3-clause BSD License" + __contact__ = "Name Of Current Guardian of this file " * Use ``//`` for integer division and ``/ float(...)`` if you want the floating point operation (for readability and compatibility across all @@ -345,6 +345,8 @@ The position of the first element (on the same line or a new line) should be chosen depending on what is easiest to read (sometimes both can be ok). + Other formattings may be ok depending on the specific situation, use + common sense and pick whichever looks best. .. code-block:: python @@ -474,18 +476,19 @@ =========== The following code sample illustrates some of the coding guidelines one should -follow in Pylearn. This is still a work-in-progress. +follow in Pylearn. This is still a work-in-progress. Feel free to improve it and +add more! .. code-block:: python #! /usr/env/bin python - """Sample code. There may still be mistakes / missing elements.""" + """Sample code. Edit it as you like!""" - __authors__ = "Olivier Delalleau" + __authors__ = "Olivier Delalleau" __copyright__ = "(c) 2010, Universite de Montreal" - __license__ = "3-clause BSD License" - __contact__ = "Olivier Delalleau " + __license__ = "3-clause BSD License" + __contact__ = "Olivier Delalleau " # Standard library imports are on a single line. import os, sys, time @@ -495,30 +498,145 @@ import numpy import scipy import theano - # Put 'from' imports below. + # Individual 'from' imports come after packages. from numpy import argmax from theano import tensor # Application-specific imports come last. - from pylearn import dataset - from pylearn.optimization import minimize + # The absolute path should always be used. + from pylearn import datasets, learner + from pylearn.formulas import noise + + + # All exceptions inherit from Exception. + class PylearnError(Exception): + # TODO Write doc. + pass + + # All top-level classes inherit from object. + class StorageExample(object): + # TODO Write doc. + pass + + + # Two blank lines between definitions of top-level classes and functions. + class AwesomeLearner(learner.Learner): + # TODO Write doc. + + def __init__(self, print_fields=None): + # TODO Write doc. + # print_fields is a list of strings whose counts found in the + # training set should be printed at the end of training. If None, + # then nothing is printed. + # Do not forget to call the parent class constructor. + super(AwesomeLearner, self).__init__() + # Use None instead of an empty list as default argument to + # print_fields to avoid issues with mutable default arguments. + self.print_fields = if_none(print_fields, []) + + # One blank line between method definitions. + def add_field(self, field): + # TODO Write doc. + # Test if something belongs to a container with `in`, not + # container-specific methods like `find`. + if field in self.print_fields: + # TODO Print a warning and do nothing. + pass + else: + # This is why using [] as default to print_fields in the + # constructor would have been a bad idea. + self.print_fields.append(field) - def print_files_in(directory): - """Print the first line of each file in given directory.""" - # TODO To be continued... + def train(self, dataset): + # TODO Write doc (store the mean of each field in the training + # set). + self.mean_fields = {} + count = {} + for sample_dict in dataset: + # Whenever it is enough for what you need, use iterative + # instead of list versions of dictionary methods. + for field, value in sample_dict.iteritems(): + # Keep line length to max 80 characters, using parentheses + # instead of \ to continue long lines. + self.mean_fields[field] = (self.mean_fields.get(field, 0) + + value) + count[field] = count.get(field, 0) + 1 + for field in self.mean_fields: + self.mean_fields[field] /= float(count[field]) + for field in self.print_fields: + # Test is done with `in`, not `has_key`. + if field in self.sum_fields: + # TODO Use log module instead. + print '%s: %s' % (field, self.sum_fields[field]) + else: + # TODO Print warning. + pass + + def test_error(self, dataset): + # TODO Write doc. + if not hasattr(self, 'sum_fields'): + # Exceptions should be raised as follows (in particular, no + # string exceptions!). + raise PylearnError('Cannot test a learner that was not ' + 'trained.') + error = 0 + count = 0 + for sample_dict in dataset: + for field, value in sample_dict.iteritems(): + try: + # Minimize code into a try statement. + mean = self.mean_fields[field] + # Always specicy which kind of exception you are + # intercepting with except. + except KeyError: + raise PylearnError( + "Found in a test sample a field ('%s') that had " + "never been seen in the training set." % field) + error += (value - self.mean_fields[field])**2 + count += 1 + # Remember to divide by a floating point number unless you + # explicitly want an integer division (in which case you should + # use //). + mse = error / float(count) + # TODO Use log module instead. + print 'MSE: %s' % mse + return mse + + + def if_none(val_if_not_none, val_if_none): + # TODO Write doc. + if val_if_not_none is not None: + return val_if_not_none + else: + return val_if_none + + + def print_subdirs_in(directory): + # TODO Write doc. + # Using list comprehension rather than filter. + sub_dirs = sorted([d for d in os.listdir(directory) + if os.path.isdir(os.path.join(directory, d))]) + print '%s: %s' % (directory, ' '.join(sub_dirs)) + # A `for` loop is often easier to read than a call to `map`. + for d in sub_dirs: + print_subdirs_in(os.path.join(directory, d)) + def main(): if len(sys.argv) != 2: # Note: conventions on how to display script documentation and - # parse arguments are still to-be-determined. + # parse arguments are still to-be-determined. This is just one + # way to do it. print("""\ Usage: %s - Print first line of each file in given directory (in alphabetic order).""" + For the given directory and all sub-directories found inside it, print + the list of the directories they contain.""" % os.path.basename(sys.argv[0])) return 1 - print_files_in(sys.argv[1]) + print_subdirs_in(sys.argv[1]) return 0 + # Top-level executable code should be minimal. if __name__ == '__main__': sys.exit(main())