# HG changeset patch # User desjagui@atchoum.iro.umontreal.ca # Date 1232343988 18000 # Node ID 123ca7751a805088b8c0c1a55c4abbe814a59c71 # Parent 25d9f91f1afa68be966473681d00ccb5f1e29653 Added hash value for each dict in Trial table. The hash is used when inserting experiments in the db to skip insertion of duplicate values diff -r 25d9f91f1afa -r 123ca7751a80 pylearn/dbdict/api0.py --- a/pylearn/dbdict/api0.py Sun Jan 18 22:17:56 2009 -0500 +++ b/pylearn/dbdict/api0.py Mon Jan 19 00:46:28 2009 -0500 @@ -3,6 +3,7 @@ from sqlalchemy.orm import sessionmaker from sqlalchemy import Table, Column, MetaData, ForeignKey from sqlalchemy import Integer, String, Float, Boolean, DateTime, Text, Binary +from sqlalchemy.databases import postgres from sqlalchemy.orm import mapper, relation, backref, eagerload from sqlalchemy.sql import operators, select from sql_commands import crazy_sql_command @@ -69,7 +70,7 @@ h_self._link_table = link_table #TODO: replace this crude algorithm (ticket #17) - if ['id', 'create', 'write', 'read', 'status', 'priority'] != [c.name for c in dict_table.c]: + if ['id', 'create', 'write', 'read', 'status', 'priority','hash'] != [c.name for c in dict_table.c]: raise ValueError(h_self.e_bad_table, dict_table) if ['id', 'name', 'ntype', 'fval', 'sval', 'bval'] != [c.name for c in pair_table.c]: raise ValueError(h_self.e_bad_table, pair_table) @@ -276,6 +277,7 @@ # helper routine by update() and __setitem__ def _set_in_session(d_self, key, val, session): """Modify an existing key or create a key to hold val""" + #FIRST SOME MIRRORING HACKS if key == 'dbdict.status': ival = int(val) @@ -283,6 +285,9 @@ if key == 'dbdict.sql.priority': fval = float(val) d_self.priority = fval + if key == 'dbdict.hash': + ival = int(val) + d_self.hash = ival if key in d_self._forbidden_keys: raise KeyError(key) @@ -521,7 +526,8 @@ Column('write', DateTime), Column('read', DateTime), Column('status', Integer), - Column('priority', Float(53)) + Column('priority', Float(53)), + Column('hash', postgres.PGBigInteger) ) t_keyval = Table(table_prefix+keyval_suffix, metadata, diff -r 25d9f91f1afa -r 123ca7751a80 pylearn/dbdict/sql.py --- a/pylearn/dbdict/sql.py Sun Jan 18 22:17:56 2009 -0500 +++ b/pylearn/dbdict/sql.py Mon Jan 19 00:46:28 2009 -0500 @@ -15,6 +15,7 @@ EXPERIMENT = 'dbdict.experiment' #using the dictionary to store these is too slow STATUS = 'dbdict.status' +HASH = 'dbdict.hash' PRIORITY = 'dbdict.sql.priority' HOST = 'dbdict.sql.hostname' @@ -122,6 +123,7 @@ wait = numpy.random.rand(1)*retry_max_sleep if verbose: print 'another process stole our dct. Waiting %f secs' % wait time.sleep(wait) + if dct: str(dct) # for loading of attrs in UGLY WAY!!! s.close() @@ -213,19 +215,32 @@ :param force_dup: forces insertion even if an identical dictionary is already in the db """ + # compute hash for the job, will be used to avoid duplicates job = copy.copy(jobdict) + jobhash = hash(`job`) + if session is None: s = db.session() - do_insert = force_dup or (None is db.query(s).filter_eq_dct(job).first()) - s.close() + print 'here1' else: - do_insert = force_dup or (None is db.query(session).filter_eq_dct(job).first()) + s = session + print 'here2' + + do_insert = force_dup or (None is s.query(db._Dict).filter(db._Dict.hash==jobhash).first()) + print 'do_insert = ', do_insert + + rval = None if do_insert: job[STATUS] = START + job[HASH] = jobhash job[PRIORITY] = 1.0 - return db.insert(job, session=session) - else: - return None + rval = db.insert(job, session=s) + s.commit() + + if session is None: + s.close() + return rval + def insert_job(experiment_fn, state, db, force_dup=False, session=None): state = copy.copy(state) @@ -233,6 +248,9 @@ return insert_dict(state, db, force_dup=force_dup, session=session) +# TODO: FIXME: WARNING +# Should use insert_dict instead of db.insert. Need one entry point for adding jobs to +# database, so that hashing can be done consistently def add_experiments_to_db(jobs, db, verbose=0, add_dups=False, type_check=None, session=None): """Add experiments paramatrized by jobs[i] to database db.