changeset 576:ef424abb7458

bugfixes and a lot of documentation
author Olivier Breuleux <breuleuo@iro.umontreal.ca>
date Thu, 04 Dec 2008 16:51:46 -0500
parents 9f5891cd4048
children df2e2c7ba4ac
files pylearn/dbdict/newstuff.py
diffstat 1 files changed, 289 insertions(+), 30 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/dbdict/newstuff.py	Wed Dec 03 23:23:03 2008 -0500
+++ b/pylearn/dbdict/newstuff.py	Thu Dec 04 16:51:46 2008 -0500
@@ -39,7 +39,7 @@
 def convert(obj):
     try:
         return eval(obj, {}, {})
-    except NameError:
+    except (NameError, SyntaxError):
         return obj
 
 def flatten(obj):
@@ -121,6 +121,8 @@
 def format_help(topic):
     if topic is None:
         return 'No help.'
+    elif isinstance(topic, str):
+        help = topic
     elif hasattr(topic, 'help'):
         help = topic.help()
     else:
@@ -244,7 +246,7 @@
         v = self.COMPLETE
         with self:
             try:
-                v = self.experiment(self, self.state)
+                v = self.experiment(self.state, self)
             finally:
                 self.state.dbdict.status = self.DONE if v is self.COMPLETE else self.START
 
@@ -326,8 +328,8 @@
 
 class RSyncChannel(StandardChannel):
 
-    def __init__(self, path, remote_path, experiment, state):
-        super(RSyncChannel, self).__init__(path, experiment, state)
+    def __init__(self, path, remote_path, experiment, state, redirect_stdout = False, redirect_stderr = False):
+        super(RSyncChannel, self).__init__(path, experiment, state, redirect_stdout, redirect_stderr)
 
         ssh_prefix='ssh://'
         if remote_path.startswith(ssh_prefix):
@@ -351,9 +353,9 @@
 
         # TODO: use something more portable than os.system
         if direction == 'push':
-            rsync_cmd = 'rsync -ar "%s/" "%s/"' % (path, remote_path)
+            rsync_cmd = 'rsync -ac "%s/" "%s/"' % (path, remote_path)
         elif direction == 'pull':
-            rsync_cmd = 'rsync -ar "%s/" "%s/"' % (remote_path, path)
+            rsync_cmd = 'rsync -ac "%s/" "%s/"' % (remote_path, path)
         else:
             raise RSyncException('invalid direction', direction)
 
@@ -368,7 +370,7 @@
                                                                         path = self.remote_path))
         else:
             touch_cmd = ("mkdir -p '%(path)s'" % dict(path = self.remote_path))
-        print "ECHO", touch_cmd
+        # print "ECHO", touch_cmd
         touch_rval = os.system(touch_cmd)
         if 0 != touch_rval:
             raise Exception('touch failure', (touch_rval, touch_cmd))
@@ -393,7 +395,7 @@
 
     RESTART_PRIORITY = 2.0
 
-    def __init__(self, username, password, hostname, dbname, tablename, path, remote_root):
+    def __init__(self, username, password, hostname, dbname, tablename, path, remote_root, redirect_stdout = False, redirect_stderr = False):
         self.username, self.password, self.hostname, self.dbname, self.tablename \
             = username, password, hostname, dbname, tablename
 
@@ -413,9 +415,9 @@
             state = expand(self.dbstate)
             experiment = resolve(state.dbdict.experiment)
             remote_path = os.path.join(remote_root, self.dbname, self.tablename, str(self.dbstate.id))
-            super(DBRSyncChannel, self).__init__(path, remote_path, experiment, state)
+            super(DBRSyncChannel, self).__init__(path, remote_path, experiment, state, redirect_stdout, redirect_stderr)
         except:
-            self.dbstate['dbdict.status'] = self.START
+            self.dbstate['dbdict.status'] = self.DONE
             raise
 
     def save(self):
@@ -463,25 +465,14 @@
     """
     Start an experiment with parameters given on the command line.
 
-    Usage: cmdline <experiment> <prop1::type> <prop1=value1> <prop2=value2> ...
+    Usage: cmdline <experiment> <prop1::type> <parameters>
 
     Run an experiment with parameters provided on the command
-    line.  The symbol described by <experiment> will be imported
-    using the normal python import rules and will be called with
-    the dictionary described on the command line.
+    line. See the help topics for experiment and parameters for
+    syntax information.
 
-    The signature of the function located at <experiment> must
-    look like:
-        def my_experiment(state, channel):
-            ...
-
-    Examples of setting parameters:
-        a=2 => state['a'] = 2
-        b.c=3 => state['b']['c'] = 3
-        p::mymodule.Something => state['p']['__builder__']=mymodule.Something
-
-    Example call:
-        run_experiment cmdline mymodule.my_experiment \\
+    Example use:
+        dbdict-run cmdline mymodule.my_experiment \\
             stopper::pylearn.stopper.nsteps \\ # use pylearn.stopper.nsteps
             stopper.n=10000 \\ # the argument "n" of nsteps is 10000
             lr=0.03
@@ -498,6 +489,36 @@
 
 
 def runner_sqlschedule(dbdescr, experiment, *strings):
+    """
+    Schedule a job to run using the sql command.
+
+    Usage: sqlschedule <tablepath> <experiment> <parameters>
+
+    See the experiment and parameters topics for more information about
+    these parameters.
+
+    Assuming that a postgres database is running on `host`, contains a
+    database called `dbname` and that `user` has the permissions to
+    create, read and modify tables on that database, tablepath should
+    be of the following form:
+        postgres://user:pass@host/dbname/tablename
+
+    If no table is named `tablename`, one will be created
+    automatically. The state corresponding to the experiment and
+    parameters specified in the command will be saved in the database,
+    but no experiment will be run.
+
+    To run an experiment scheduled using sqlschedule, see the sql
+    command.
+
+    Example use:
+        dbdict-run sqlschedule postgres://user:pass@host/dbname/tablename \\
+            mymodule.my_experiment \\
+            stopper::pylearn.stopper.nsteps \\ # use pylearn.stopper.nsteps
+            stopper.n=10000 \\ # the argument "n" of nsteps is 10000
+            lr=0.03
+    """
+
     try:
         username, password, hostname, dbname, tablename \
             = sql.parse_dbstring(dbdescr)
@@ -512,6 +533,10 @@
         table_prefix = tablename)
 
     state = parse(*strings)
+    try:
+        resolve(experiment)
+    except:
+        raise UsageError('The first parameter to sqlschedule must be a valid, importable symbol.')
     state['dbdict.experiment'] = experiment
     sql.add_experiments_to_db([state], db, verbose = 1)
 
@@ -520,16 +545,50 @@
 
 
 def runner_sql(dbdescr, exproot):
+    """
+    Run jobs from a sql table.
+
+    Usage: sql <tablepath> <exproot>
+
+    The jobs should be scheduled first with the sqlschedule command.
+
+    Assuming that a postgres database is running on `host`, contains a
+    database called `dbname` and that `user` has the permissions to
+    create, read and modify tables on that database, tablepath should
+    be of the following form:
+        postgres://user:pass@host/dbname/tablename
+
+    exproot can be a local path or a remote path. Examples of exproots:
+      /some/local/path
+      ssh://some_host:/some/remote/path # relative to the filesystem root
+      ssh://some_host:other/remote/path # relative to the HOME on some_host
+
+    The exproot will contain a subdirectory hierarchy corresponding to
+    the dbname, tablename and job id which is a unique integer.
+
+    The sql runner will pick any job in the table which is not running
+    and is not done and will terminate when that job ends. You may call
+    the same command multiple times, sequentially or in parallel, to
+    run as many unfinished jobs as have been scheduled in that table
+    with sqlschedule.
+
+    Example use:
+        dbdict-run sql \\
+            postgres://user:pass@host/dbname/tablename \\
+            ssh://central_host:myexperiments
+    """
     try:
         username, password, hostname, dbname, tablename \
             = sql.parse_dbstring(dbdescr)
     except:
         raise UsageError('Wrong syntax for dbdescr')
     workdir = tempfile.mkdtemp()
-    print 'wdir', workdir
+    #print 'wdir', workdir
     channel = DBRSyncChannel(username, password, hostname, dbname, tablename,
                              workdir,
-                             exproot)
+                             exproot,
+                             redirect_stdout = True,
+                             redirect_stderr = True)
     channel.run()
     shutil.rmtree(workdir, ignore_errors=True)
 
@@ -545,13 +604,169 @@
 
     Usage: help <topic>
     """
+    def bold(x):
+        return '\033[1m%s\033[0m' % x
     if topic is None:
-        print 'Available commands: (use help <command> for more info)'
+        print bold('Topics: (use help <topic> for more info)')
+        print 'example        Example of defining and running an experiment.'
+        print 'experiment     How to define an experiment.'
+        print 'parameters     How to list the parameters for an experiment.'
         print
+        print bold('Available commands: (use help <command> for more info)')
         for name, command in sorted(runner_registry.iteritems()):
             print name.ljust(20), format_help(command).split('\n')[0]
         return
-    print format_help(runner_registry.get(topic, None))
+    elif topic == 'experiment':
+        helptext = """
+
+        dbdict-run serves to run experiments. To define an experiment, you
+        only have to define a function respecting the following protocol in
+        a python file or module:
+
+        def my_experiment(state, channel):
+           # experiment code goes here
+
+        The return value of my_experiment may be channel.COMPLETE or
+        channel.INCOMPLETE. If the latter is returned, the experiment may
+        be resumed at a later point. Note that the return value `None`
+        is interpreted as channel.COMPLETE.
+
+        If a command defined by dbdict-run has an <experiment> parameter,
+        that parameter must be a string such that it could be used in a
+        python import statement to import the my_experiment function. For
+        example if you defined my_experiment in my_module.py, you can pass
+        'my_module.my_experiment' as the experiment parameter.
+
+        When entering my_experiment, the current working directory will be
+        set for you to a directory specially created for the experiment.
+        The location and name of that directory vary depending on which
+        dbdict-run command you run. You may create logs, save files, pictures,
+        results, etc. in it.
+
+        state is an object containing the parameters given to the experiment.
+        For example, if you run the followinc command:
+
+        dbdict-run cmdline my_module.my_experiment a.x=6
+
+        `state.a.x` will contain the integer 6, and so will `state['a']['x']`.
+        If the state is changed, it will be saved when the experiment ends
+        or when channel.save() is called. The next time the experiment is run
+        with the same working directory, the modified state will be provided.
+
+        It is not recommended to store large amounts of data in the state.  It
+        should be limited to scalar or string parameters. Results such as
+        weight matrices should be stored in files in the working directory.
+
+        channel is an object with the following important methods:
+
+         - channel.switch() (or channel()) will give the control back to the
+            user, if it is appropriate to do so. If a call to channel.switch()
+            returns the string 'stop', it typically means that the signal
+            SIGTERM (or SIGINT) was received. Therefore, the experiment may be
+            killed soon, so it should save and return True or
+            channel.INCOMPLETE so it can be resumed later. This should be
+            checked periodically or data loss may be incurred.
+
+         - channel.save() will save the current state. It is automatically
+            called when the function returns, but it is a good idea to do it
+            periodically.
+
+         - channel.save_and_switch() is an useful shortcut to do both operations
+            described above.
+        """
+
+    elif topic == 'parameters':
+        helptext = """
+        If a command takes <parameters> arguments, the arguments should each
+        take one of the following forms:
+
+        key=value
+
+          Set a parameter with name `key` to `value`. The value will be casted
+          to an appropriate type automatically and it will be accessible to
+          the experiment using `state.key`.
+
+          If `key` is a dotted name, the value will be set in nested
+          dictionaries corresponding to each part.
+
+          Examples:
+            a=1           state.a <- 1
+            b=2.3         state.b <- 2.3
+            c.d="hello"   state.c.d <- "hello"
+
+        key::builder
+
+          This is equivalent to key.__builder__=builder.
+
+          The builder should be a symbol that can be used with import or
+          __import__ and it should be callable.
+
+          If a key has a builder defined, the experiment code may easily make
+          an object out of it using the `make` function. `obj = make(state.key)`.
+          This will call the builder on the substate corresponding to state.key,
+          as will be made clear in the example:
+
+          Example:
+            regexp::re.compile
+            regexp.pattern='a.*c'
+
+          from pylearn.dbdict.newstuff import make
+          def experiment(state, channel):
+              regexp = make(state.regexp) # regexp is now re.compile(pattern = 'a.*c')
+              print regexp.sub('blahblah', 'hello abbbbc there')
+
+          If the above experiment was called with the state produced by the
+          parameters in the example, it would print 'hello blahblah there'.
+        """
+
+    elif topic == 'example':
+        helptext = """
+        Example of an experiment that trains some model for 100000 iterations:
+
+        # defined in: my_experiments.py
+        def experiment(state, channel):
+            try:
+                model = cPickle.load(open('model', 'r'))
+            except:
+                model = my_model(state.some_param, state.other_param)
+                state.n = 0
+            dataset = my_dataset(skipto = state.n)
+            for i in xrange(100000 - state.n):
+                model.update(dataset.next())
+                if i and i % 1000 == 0:
+                    if channel.save_and_switch() == 'stop':
+                        state.n += i + 1
+                        rval = channel.INCOMPLETE
+                        break
+            else:
+                state.result = model.cost(some_test_set)
+                rval = channel.COMPLETE
+            cPickle.dump(model, open('model', 'w'))
+            return rval
+
+        And then you could run it this way:
+        
+        dbdict-run cmdline my_experiments.experiment \\
+                           some_param=1 \\
+                           other_param=0.4
+
+        Or this way:
+
+        dbdict-run sqlschedule postgres://user:pass@host/dbname/tablename \\
+                           my_experiments.experiment \\
+                           some_param=1 \\
+                           other_param=0.4
+
+        dbdict-run sql postgres://user:pass@host/dbname/tablename workdir
+
+        You need to make sure that the module `my_experiments` is accessible
+        from python. You can check with the command
+
+        $ python -m my_experiments
+        """
+    else:
+        helptext = runner_registry.get(topic, None)
+    print format_help(helptext)
 
 runner_registry['help'] = help
 
@@ -612,3 +827,47 @@
 # class GreenletChannel(MultipleChannel):
 #     # uses a single process for all tasks, using greenlets to switch between them
 #     pass
+
+
+
+
+
+
+
+
+
+
+
+
+
+# Hello,
+
+# I have changed/improved the dbdict interface a bit. All the new features are in pylearn.dbdict.newstuff and I copied and modified some existing features in another new file, pylearn.dbdict.sql.
+
+# I'm not completely sure how to organize the help right now (will do tomorrow) but in a nutshell you can use it in several ways already:
+
+# command line:
+# python newstuff.py cmdline mymodule.some_experiment a=2 b=3 c.x=4 c.y="hello"
+
+# register an experiment in sql:
+# python newstuff.py sqlschedule postgres://user:pass@host/dbname/tablename mymodule.some_experiment a=2 b=3 c.x=4 c.y="hello"
+
+# run experiments found in an sql table (and ideally registered using sqlschedule):
+# python newstuff.py sql postgres://user:pass@host/dbname/tablename ssh://host:remote_directory
+
+# I verified that SIGINT and SIGTERM are caught correctly. They are. I am missing handling for push_error.
+
+# Options that I am planning to add:
+# cmdline: --force to run a job that's already running or already done
+# sql: -n<n> to run n jobs instead of just one
+# sql: --retry-failed to re-run a job that raised an exception (if your mistake was at the end of training, it's nice to have) (this would require a special status in the db)
+# sql: --display to display stdout and stderr in addition to redirecting them to files
+# sqlschedule: --force to reset the status of the job described to START, if it already exists (useful if a bug leaves the status to RUNNING)
+
+# The experiment is a function like:
+# def run(state, channel):
+
+# The state is a sort of nested dictionary where getattr and getitem, setattr and setitem are aliased.
+
+# Olivier
+