# HG changeset patch # User Olivier Breuleux # Date 1228427506 18000 # Node ID ef424abb745878f439a272e3fdf45858ff930604 # Parent 9f5891cd4048feff9d45472f5d2b8242d6d66fc9 bugfixes and a lot of documentation diff -r 9f5891cd4048 -r ef424abb7458 pylearn/dbdict/newstuff.py --- a/pylearn/dbdict/newstuff.py Wed Dec 03 23:23:03 2008 -0500 +++ b/pylearn/dbdict/newstuff.py Thu Dec 04 16:51:46 2008 -0500 @@ -39,7 +39,7 @@ def convert(obj): try: return eval(obj, {}, {}) - except NameError: + except (NameError, SyntaxError): return obj def flatten(obj): @@ -121,6 +121,8 @@ def format_help(topic): if topic is None: return 'No help.' + elif isinstance(topic, str): + help = topic elif hasattr(topic, 'help'): help = topic.help() else: @@ -244,7 +246,7 @@ v = self.COMPLETE with self: try: - v = self.experiment(self, self.state) + v = self.experiment(self.state, self) finally: self.state.dbdict.status = self.DONE if v is self.COMPLETE else self.START @@ -326,8 +328,8 @@ class RSyncChannel(StandardChannel): - def __init__(self, path, remote_path, experiment, state): - super(RSyncChannel, self).__init__(path, experiment, state) + def __init__(self, path, remote_path, experiment, state, redirect_stdout = False, redirect_stderr = False): + super(RSyncChannel, self).__init__(path, experiment, state, redirect_stdout, redirect_stderr) ssh_prefix='ssh://' if remote_path.startswith(ssh_prefix): @@ -351,9 +353,9 @@ # TODO: use something more portable than os.system if direction == 'push': - rsync_cmd = 'rsync -ar "%s/" "%s/"' % (path, remote_path) + rsync_cmd = 'rsync -ac "%s/" "%s/"' % (path, remote_path) elif direction == 'pull': - rsync_cmd = 'rsync -ar "%s/" "%s/"' % (remote_path, path) + rsync_cmd = 'rsync -ac "%s/" "%s/"' % (remote_path, path) else: raise RSyncException('invalid direction', direction) @@ -368,7 +370,7 @@ path = self.remote_path)) else: touch_cmd = ("mkdir -p '%(path)s'" % dict(path = self.remote_path)) - print "ECHO", touch_cmd + # print "ECHO", touch_cmd touch_rval = os.system(touch_cmd) if 0 != touch_rval: raise Exception('touch failure', (touch_rval, touch_cmd)) @@ -393,7 +395,7 @@ RESTART_PRIORITY = 2.0 - def __init__(self, username, password, hostname, dbname, tablename, path, remote_root): + def __init__(self, username, password, hostname, dbname, tablename, path, remote_root, redirect_stdout = False, redirect_stderr = False): self.username, self.password, self.hostname, self.dbname, self.tablename \ = username, password, hostname, dbname, tablename @@ -413,9 +415,9 @@ state = expand(self.dbstate) experiment = resolve(state.dbdict.experiment) remote_path = os.path.join(remote_root, self.dbname, self.tablename, str(self.dbstate.id)) - super(DBRSyncChannel, self).__init__(path, remote_path, experiment, state) + super(DBRSyncChannel, self).__init__(path, remote_path, experiment, state, redirect_stdout, redirect_stderr) except: - self.dbstate['dbdict.status'] = self.START + self.dbstate['dbdict.status'] = self.DONE raise def save(self): @@ -463,25 +465,14 @@ """ Start an experiment with parameters given on the command line. - Usage: cmdline ... + Usage: cmdline Run an experiment with parameters provided on the command - line. The symbol described by will be imported - using the normal python import rules and will be called with - the dictionary described on the command line. + line. See the help topics for experiment and parameters for + syntax information. - The signature of the function located at must - look like: - def my_experiment(state, channel): - ... - - Examples of setting parameters: - a=2 => state['a'] = 2 - b.c=3 => state['b']['c'] = 3 - p::mymodule.Something => state['p']['__builder__']=mymodule.Something - - Example call: - run_experiment cmdline mymodule.my_experiment \\ + Example use: + dbdict-run cmdline mymodule.my_experiment \\ stopper::pylearn.stopper.nsteps \\ # use pylearn.stopper.nsteps stopper.n=10000 \\ # the argument "n" of nsteps is 10000 lr=0.03 @@ -498,6 +489,36 @@ def runner_sqlschedule(dbdescr, experiment, *strings): + """ + Schedule a job to run using the sql command. + + Usage: sqlschedule + + See the experiment and parameters topics for more information about + these parameters. + + Assuming that a postgres database is running on `host`, contains a + database called `dbname` and that `user` has the permissions to + create, read and modify tables on that database, tablepath should + be of the following form: + postgres://user:pass@host/dbname/tablename + + If no table is named `tablename`, one will be created + automatically. The state corresponding to the experiment and + parameters specified in the command will be saved in the database, + but no experiment will be run. + + To run an experiment scheduled using sqlschedule, see the sql + command. + + Example use: + dbdict-run sqlschedule postgres://user:pass@host/dbname/tablename \\ + mymodule.my_experiment \\ + stopper::pylearn.stopper.nsteps \\ # use pylearn.stopper.nsteps + stopper.n=10000 \\ # the argument "n" of nsteps is 10000 + lr=0.03 + """ + try: username, password, hostname, dbname, tablename \ = sql.parse_dbstring(dbdescr) @@ -512,6 +533,10 @@ table_prefix = tablename) state = parse(*strings) + try: + resolve(experiment) + except: + raise UsageError('The first parameter to sqlschedule must be a valid, importable symbol.') state['dbdict.experiment'] = experiment sql.add_experiments_to_db([state], db, verbose = 1) @@ -520,16 +545,50 @@ def runner_sql(dbdescr, exproot): + """ + Run jobs from a sql table. + + Usage: sql + + The jobs should be scheduled first with the sqlschedule command. + + Assuming that a postgres database is running on `host`, contains a + database called `dbname` and that `user` has the permissions to + create, read and modify tables on that database, tablepath should + be of the following form: + postgres://user:pass@host/dbname/tablename + + exproot can be a local path or a remote path. Examples of exproots: + /some/local/path + ssh://some_host:/some/remote/path # relative to the filesystem root + ssh://some_host:other/remote/path # relative to the HOME on some_host + + The exproot will contain a subdirectory hierarchy corresponding to + the dbname, tablename and job id which is a unique integer. + + The sql runner will pick any job in the table which is not running + and is not done and will terminate when that job ends. You may call + the same command multiple times, sequentially or in parallel, to + run as many unfinished jobs as have been scheduled in that table + with sqlschedule. + + Example use: + dbdict-run sql \\ + postgres://user:pass@host/dbname/tablename \\ + ssh://central_host:myexperiments + """ try: username, password, hostname, dbname, tablename \ = sql.parse_dbstring(dbdescr) except: raise UsageError('Wrong syntax for dbdescr') workdir = tempfile.mkdtemp() - print 'wdir', workdir + #print 'wdir', workdir channel = DBRSyncChannel(username, password, hostname, dbname, tablename, workdir, - exproot) + exproot, + redirect_stdout = True, + redirect_stderr = True) channel.run() shutil.rmtree(workdir, ignore_errors=True) @@ -545,13 +604,169 @@ Usage: help """ + def bold(x): + return '\033[1m%s\033[0m' % x if topic is None: - print 'Available commands: (use help for more info)' + print bold('Topics: (use help for more info)') + print 'example Example of defining and running an experiment.' + print 'experiment How to define an experiment.' + print 'parameters How to list the parameters for an experiment.' print + print bold('Available commands: (use help for more info)') for name, command in sorted(runner_registry.iteritems()): print name.ljust(20), format_help(command).split('\n')[0] return - print format_help(runner_registry.get(topic, None)) + elif topic == 'experiment': + helptext = """ + + dbdict-run serves to run experiments. To define an experiment, you + only have to define a function respecting the following protocol in + a python file or module: + + def my_experiment(state, channel): + # experiment code goes here + + The return value of my_experiment may be channel.COMPLETE or + channel.INCOMPLETE. If the latter is returned, the experiment may + be resumed at a later point. Note that the return value `None` + is interpreted as channel.COMPLETE. + + If a command defined by dbdict-run has an parameter, + that parameter must be a string such that it could be used in a + python import statement to import the my_experiment function. For + example if you defined my_experiment in my_module.py, you can pass + 'my_module.my_experiment' as the experiment parameter. + + When entering my_experiment, the current working directory will be + set for you to a directory specially created for the experiment. + The location and name of that directory vary depending on which + dbdict-run command you run. You may create logs, save files, pictures, + results, etc. in it. + + state is an object containing the parameters given to the experiment. + For example, if you run the followinc command: + + dbdict-run cmdline my_module.my_experiment a.x=6 + + `state.a.x` will contain the integer 6, and so will `state['a']['x']`. + If the state is changed, it will be saved when the experiment ends + or when channel.save() is called. The next time the experiment is run + with the same working directory, the modified state will be provided. + + It is not recommended to store large amounts of data in the state. It + should be limited to scalar or string parameters. Results such as + weight matrices should be stored in files in the working directory. + + channel is an object with the following important methods: + + - channel.switch() (or channel()) will give the control back to the + user, if it is appropriate to do so. If a call to channel.switch() + returns the string 'stop', it typically means that the signal + SIGTERM (or SIGINT) was received. Therefore, the experiment may be + killed soon, so it should save and return True or + channel.INCOMPLETE so it can be resumed later. This should be + checked periodically or data loss may be incurred. + + - channel.save() will save the current state. It is automatically + called when the function returns, but it is a good idea to do it + periodically. + + - channel.save_and_switch() is an useful shortcut to do both operations + described above. + """ + + elif topic == 'parameters': + helptext = """ + If a command takes arguments, the arguments should each + take one of the following forms: + + key=value + + Set a parameter with name `key` to `value`. The value will be casted + to an appropriate type automatically and it will be accessible to + the experiment using `state.key`. + + If `key` is a dotted name, the value will be set in nested + dictionaries corresponding to each part. + + Examples: + a=1 state.a <- 1 + b=2.3 state.b <- 2.3 + c.d="hello" state.c.d <- "hello" + + key::builder + + This is equivalent to key.__builder__=builder. + + The builder should be a symbol that can be used with import or + __import__ and it should be callable. + + If a key has a builder defined, the experiment code may easily make + an object out of it using the `make` function. `obj = make(state.key)`. + This will call the builder on the substate corresponding to state.key, + as will be made clear in the example: + + Example: + regexp::re.compile + regexp.pattern='a.*c' + + from pylearn.dbdict.newstuff import make + def experiment(state, channel): + regexp = make(state.regexp) # regexp is now re.compile(pattern = 'a.*c') + print regexp.sub('blahblah', 'hello abbbbc there') + + If the above experiment was called with the state produced by the + parameters in the example, it would print 'hello blahblah there'. + """ + + elif topic == 'example': + helptext = """ + Example of an experiment that trains some model for 100000 iterations: + + # defined in: my_experiments.py + def experiment(state, channel): + try: + model = cPickle.load(open('model', 'r')) + except: + model = my_model(state.some_param, state.other_param) + state.n = 0 + dataset = my_dataset(skipto = state.n) + for i in xrange(100000 - state.n): + model.update(dataset.next()) + if i and i % 1000 == 0: + if channel.save_and_switch() == 'stop': + state.n += i + 1 + rval = channel.INCOMPLETE + break + else: + state.result = model.cost(some_test_set) + rval = channel.COMPLETE + cPickle.dump(model, open('model', 'w')) + return rval + + And then you could run it this way: + + dbdict-run cmdline my_experiments.experiment \\ + some_param=1 \\ + other_param=0.4 + + Or this way: + + dbdict-run sqlschedule postgres://user:pass@host/dbname/tablename \\ + my_experiments.experiment \\ + some_param=1 \\ + other_param=0.4 + + dbdict-run sql postgres://user:pass@host/dbname/tablename workdir + + You need to make sure that the module `my_experiments` is accessible + from python. You can check with the command + + $ python -m my_experiments + """ + else: + helptext = runner_registry.get(topic, None) + print format_help(helptext) runner_registry['help'] = help @@ -612,3 +827,47 @@ # class GreenletChannel(MultipleChannel): # # uses a single process for all tasks, using greenlets to switch between them # pass + + + + + + + + + + + + + +# Hello, + +# I have changed/improved the dbdict interface a bit. All the new features are in pylearn.dbdict.newstuff and I copied and modified some existing features in another new file, pylearn.dbdict.sql. + +# I'm not completely sure how to organize the help right now (will do tomorrow) but in a nutshell you can use it in several ways already: + +# command line: +# python newstuff.py cmdline mymodule.some_experiment a=2 b=3 c.x=4 c.y="hello" + +# register an experiment in sql: +# python newstuff.py sqlschedule postgres://user:pass@host/dbname/tablename mymodule.some_experiment a=2 b=3 c.x=4 c.y="hello" + +# run experiments found in an sql table (and ideally registered using sqlschedule): +# python newstuff.py sql postgres://user:pass@host/dbname/tablename ssh://host:remote_directory + +# I verified that SIGINT and SIGTERM are caught correctly. They are. I am missing handling for push_error. + +# Options that I am planning to add: +# cmdline: --force to run a job that's already running or already done +# sql: -n to run n jobs instead of just one +# sql: --retry-failed to re-run a job that raised an exception (if your mistake was at the end of training, it's nice to have) (this would require a special status in the db) +# sql: --display to display stdout and stderr in addition to redirecting them to files +# sqlschedule: --force to reset the status of the job described to START, if it already exists (useful if a bug leaves the status to RUNNING) + +# The experiment is a function like: +# def run(state, channel): + +# The state is a sort of nested dictionary where getattr and getitem, setattr and setitem are aliased. + +# Olivier +