diff doc/v2_planning/API_coding_style.txt @ 1174:fe6c25eb1e37

merge
author pascanur
date Fri, 17 Sep 2010 16:13:58 -0400
parents a0f178bc9052 53340a8df1fa
children 67f4edabb0cc
line wrap: on
line diff
--- a/doc/v2_planning/API_coding_style.txt	Fri Sep 17 16:12:33 2010 -0400
+++ b/doc/v2_planning/API_coding_style.txt	Fri Sep 17 16:13:58 2010 -0400
@@ -2,6 +2,10 @@
  Coding Style Guidelines
 =========================
 
+Note: until the Pylearn documentation is properly compiled, you can view
+the HTML version of this document `here
+<http://www.iro.umontreal.ca/~delallea/tmp/coding_style/html/API_coding_style.html>`_.
+
 Main Goals
 ==========
 
@@ -58,20 +62,81 @@
 
         """
 
+    * Standard library imports can (and should) be on the same line, to avoid
+      wasting space on straighforward imports:
+
+      .. code-block:: python
+
+        # Good.
+        import os, sys, time
+        # Good when it does not fit on a single line.
+        import std_lib_module_1, std_lib_module_2, std_lib_module_3
+        import std_lib_module_4, std_lib_module_5, std_lib_module_6
+        # Bad.
+        import os
+        import sys
+        import time
+
+    * Importing class / functions from a module is allowed when these are
+      used multiple times, and no ambiguity is possible.
+
+      .. code-block:: python
+
+        # Good when Bar and Blah are used many times.
+        from foo import Bar, Blah
+        do_something_with(Bar(), Blah(), Bar(), Blah(), Bar(), Blah())
+        # Good in most situations.
+        import foo
+        do_something_with(foo.Bar(), foo.Blah())
+        # Bad.
+        from foo import *
+        from numpy import any   # Potential ambiguity with __builtin__.any
+
 Excerpts
 ~~~~~~~~
 
 We emphasize here a few important topics that are found in the official
 guidelines:
 
+    * Only use ASCII characters in code files.
+
+    * Code indent must be done with four blank characters (no tabs).
+
+    * Limit lines to 79 characters.
+
+    * Naming conventions: ``ClassName``, ``TOP_LEVEL_CONSTANT``,
+      ``everything_else``.
+
+    * Comments should start with a capital letter (unless the first word is a
+      code identifier) and end with a period (short inline comments may skip
+      the period at the end).
+
+    * Imports should be listed in alphabetical order. It makes it easier to
+      verify that something is imported, and avoids duplicated imports.
+
+    * Use absolute imports only. This is compatible across a wider range of
+      Python versions, and avoids confusion about what is being
+      imported.
+
+    * Avoid renaming imported modules. This makes code more difficult to
+      re-use, and is not grep-friendly.
+
+      .. code-block:: python
+
+        # Good.
+        from theano import tensor
+        # Bad.
+        from theano import tensor as T
+
     * Avoid using lists if all you care about is iterating on something. Using
       lists:
-        - uses more memory (and possibly more CPU if the code may break out of
-          the iteration),
-        - can lead to ugly code when converted to Python 3 with 2to3,
-        - can have a different behavior if evaluating elements in the list has
-          side effects (if you want these side effects, make it explicit by
-          assigning the list to some variable before iterating on it).
+
+         - uses more memory (and possibly more CPU if the code may break out of
+           the iteration),
+         - can lead to ugly code when converted to Python 3 with 2to3,
+         - can have a different behavior if evaluating elements in the list has
+           side effects (if you want these side effects, make it explicit by
+           assigning the list to some variable before iterating on it).
 
       +------------------------+------------------------+
       | Iterative version      |    List version        |
@@ -101,7 +166,7 @@
         for f_x in imap(f, x):
             ...
         all_f_x = map(f, x)
-        map(f, x)
+        map(f, x)   # f has some side effect.
         # Bad.
         for element in map(f, x):
             ...
@@ -133,12 +198,107 @@
         has_key = my_dict.has_key(key)
         has_substring = my_string.find(substring) >= 0
 
+    * Do not use mutable arguments as default values. Instead, use a helper
+      function (conditional expressions are forbidden at this point, see
+      below).
+
+      .. code-block:: python
+
+        # Good.
+        def f(array=None):
+            array = pylearn.if_none(array, [])
+            ...
+        # Bad.
+        def f(array=[]): # Dangerous if `array` is modified down the road.
+            ...
+
+    * Use a leading underscore '_' in names of internal attributes / methods,
+      but avoid the double underscore '__' unless you know what you are
+      doing.
+
 
 Additional Recommendations
 --------------------------
 
 Things you should do even if they are not listed in official guidelines:
 
+    * All Python code files should start like this:
+
+      .. code-block:: python
+
+        """Module docstring as the first line, as usual."""
+
+        __authors__ = "Olivier Delalleau, Frederic Bastien, David Warde-Farley"
+        __copyright__ = "(c) 2010, Universite de Montreal"
+        __license__ = "3-clause BSD License"
+        __contact__ = "Name Of Current Guardian of this file <email@address>"
+
+    * Use ``//`` for integer division and ``/ float(...)`` if you want the
+      floating point operation (for readability and compatibility across all
+      versions of Python).
+
+      .. code-block:: python
+
+        # Good.
+        n_samples_per_split = n_samples // n_splits
+        mean_x = sum(x) / float(len(x))
+        # Bad.
+        n_samples_per_split = n_samples / n_splits
+        mean_x = sum(x) / len(x)
+
+    * Always raise an exception with ``raise MyException(args)`` where ``MyException``
+      inherits from ``Exception``. This is required for compatibility across
+      all versions of Python.
+
+      .. code-block:: python
+
+        # Good.
+        raise NotImplementedError('The Pylearn team is too lazy.')
+        # Bad.
+        raise NotImplementedError, 'The Pylearn team is too lazy.'
+        raise 'The Pylearn team is too lazy to implement this.'
+
+    * Use either ``try ... except`` or ``try ... finally``, but do not mix
+      ``except`` with ``finally`` (which is not supported in Python 2.4).
+      You can however embed one into the other to mimic the ``try ... except ...
+      finally`` behavior.
+
+      .. code-block:: python
+    
+        # Good.
+        try:
+            try:
+                something_that_may_fail()
+            except SomeError:
+                do_something_if_it_failed()
+        finally:
+            always_do_this_regardless_of_what_happened()
+        # Bad.
+        try:
+            something_that_may_fail()
+        except SomeError:
+            do_something_if_it_failed()
+        finally:
+            always_do_this_regardless_of_what_happened()
+
+    * No conditional expression (not supported in Python 2.4). These are
+      expressions of the form ``x = y if condition else z``.
+
+    * Do not use the ``all`` and ``any`` builtin functions (they are not supported
+      in Python 2.4). Instead, import them from ``theano.gof.python25`` (or
+      use ``numpy.all`` / ``numpy.any`` for array data).
+
+    * Do not use the ``hashlib`` module (not supported in Python 2.4). We will
+      probably provide a wrapper around it to be compatible with all Python
+      versions.
+
+    * Use ``numpy.inf`` and ``numpy.nan`` rather than
+      ``float('inf')`` / ``float('nan')`` (should be slightly more efficient even
+      if efficiency is typically not an issue here, the main goal being code
+      consistency). Also, always use ``numpy.isinf`` / ``numpy.isnan`` to
+      test infinite / NaN values. This is important because ``numpy.nan !=
+      float('nan')``.
+
     * Avoid backslashes whenever possible. They make it more
       difficult to edit code, and they are ugly (as well as potentially
       dangerous if there are trailing white spaces).
@@ -195,6 +355,50 @@
                                            my_everything]:
             ...
 
+    * Use the ``key`` argument instead of ``cmp`` when sorting (for Python 3
+      compatibility).
+
+      .. code-block:: python
+
+        # Good.
+        my_list.sort(key=abs)
+        # Bad.
+        my_list.sort(cmp=lambda x, y: cmp(abs(x), abs(y)))
+
+    * Whenever you read / write binary files, specify it in the mode ('rb' for
+      reading, 'wb' for writing). This is important for cross-platform and
+      Python 3 compatibility (e.g. when pickling / unpickling objects).
+
+      .. code-block:: python
+
+        # Good.
+        cPickle.dump(obj, open('my_obj.pkl', 'wb', protocol=-1))
+        # Bad.
+        cPickle.dump(obj, open('my_obj.pkl', 'w', protocol=-1))
+
+    * Avoid tuple parameter unpacking as it can lead to very ugly code when
+      converting to Python 3.
+
+      .. code-block:: python
+
+        # Good.
+        def f(x, y_z):
+            y, z = y_z
+            ...
+        # Bad.
+        def f(x, (y, z)):
+            ...
+
+    * Only use ``cPickle``, not ``pickle`` (except for debugging purpose since
+      error messages from ``pickle`` are sometimes easier to understand).
+
+    * A script's only top-level code should be something like:
+
+      .. code-block:: python
+
+        if __name__ == '__main__':
+            sys.exit(main())
+
 
 The ``logging`` Module vs. the ``warning`` Module
 =================================================
@@ -246,10 +450,73 @@
 Code Sample
 ===========
 
-The following code sample illustrates many of the coding guidelines one should
-follow in Pylearn.
+The following code sample illustrates some of the coding guidelines one should
+follow in Pylearn. This is still a work-in-progress.
 
 .. code-block:: python
 
+    #! /usr/env/bin python
+
+    """Sample code. There may still be mistakes / missing elements."""
+
+    __authors__ = "Olivier Delalleau"
+    __copyright__ = "(c) 2010, Universite de Montreal"
+    __license__ = "3-clause BSD License"
+    __contact__ = "Olivier Delalleau <delallea@iro>"
+
+    # Standard library imports are on a single line.
     import os, sys, time
 
+    # Third-party imports come after standard library imports, and there is
+    # only one import per line. Imports are sorted lexicographically.
+    import numpy
+    import scipy
+    import theano
+    # Put 'from' imports below.
+    from numpy import argmax
+    from theano import tensor
+    
+    # Application-specific imports come last.
+    from pylearn import dataset
+    from pylearn.optimization import minimize
+
+    def print_files_in(directory):
+        """Print the first line of each file in given directory."""
+        # TODO To be continued...
+
+    def main():
+        if len(sys.argv) != 2:
+            # Note: conventions on how to display script documentation and
+            # parse arguments are still to-be-determined.
+            print("""\
+    Usage: %s <directory>
+    Print first line of each file in given directory (in alphabetic order)."""
+                  % os.path.basename(sys.argv[0]))
+            return 1
+        print_files_in(sys.argv[1])
+        return 0
+
+    # Top-level executable code should be minimal.
+    if __name__ == '__main__':
+        sys.exit(main())
+    
+
+Automatic Code Verification
+===========================
+
+Tools will be available to make it easier to automatically ensure that code
+committed to Pylearn complies to above specifications. This work is not
+finalized yet, but David started a `Wiki page`_ with helpful configuration
+tips for Vim.
+
+.. _Wiki page: http://www.iro.umontreal.ca/~lisa/twiki/bin/view.cgi/Divers/VimPythonRecommendations
+
+TODO
+====
+
+Things still missing from this document, being discussed in coding_style.txt:
+    - Proper style for C code and Mercurial commits
+    - Enforcing 100% test coverage of the code base
+    - Providing ways to add type checking for function arguments
+    - Conventions for script usage documentation and argument parsing
+