changeset 625:123ca7751a80

Added hash value for each dict in Trial table. The hash is used when inserting experiments in the db to skip insertion of duplicate values
author desjagui@atchoum.iro.umontreal.ca
date Mon, 19 Jan 2009 00:46:28 -0500
parents 25d9f91f1afa
children 886d8f289fb7
files pylearn/dbdict/api0.py pylearn/dbdict/sql.py
diffstat 2 files changed, 32 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/dbdict/api0.py	Sun Jan 18 22:17:56 2009 -0500
+++ b/pylearn/dbdict/api0.py	Mon Jan 19 00:46:28 2009 -0500
@@ -3,6 +3,7 @@
 from sqlalchemy.orm import sessionmaker
 from sqlalchemy import Table, Column, MetaData, ForeignKey    
 from sqlalchemy import Integer, String, Float, Boolean, DateTime, Text, Binary
+from sqlalchemy.databases import postgres
 from sqlalchemy.orm import mapper, relation, backref, eagerload
 from sqlalchemy.sql import operators, select
 from sql_commands import crazy_sql_command
@@ -69,7 +70,7 @@
         h_self._link_table = link_table
 
         #TODO: replace this crude algorithm (ticket #17)
-        if ['id', 'create', 'write', 'read', 'status', 'priority'] != [c.name for c in dict_table.c]:
+        if ['id', 'create', 'write', 'read', 'status', 'priority','hash'] != [c.name for c in dict_table.c]:
             raise ValueError(h_self.e_bad_table, dict_table)
         if ['id', 'name', 'ntype', 'fval', 'sval', 'bval'] != [c.name for c in pair_table.c]:
             raise ValueError(h_self.e_bad_table, pair_table)
@@ -276,6 +277,7 @@
             # helper routine by update() and __setitem__
             def _set_in_session(d_self, key, val, session):
                 """Modify an existing key or create a key to hold val"""
+                
                 #FIRST SOME MIRRORING HACKS
                 if key == 'dbdict.status':
                     ival = int(val)
@@ -283,6 +285,9 @@
                 if key == 'dbdict.sql.priority':
                     fval = float(val)
                     d_self.priority = fval
+                if key == 'dbdict.hash':
+                    ival = int(val)
+                    d_self.hash = ival
 
                 if key in d_self._forbidden_keys:
                     raise KeyError(key)
@@ -521,7 +526,8 @@
             Column('write', DateTime),
             Column('read', DateTime),
             Column('status', Integer),
-            Column('priority', Float(53))
+            Column('priority', Float(53)),
+            Column('hash', postgres.PGBigInteger)
             )
 
     t_keyval = Table(table_prefix+keyval_suffix, metadata,
--- a/pylearn/dbdict/sql.py	Sun Jan 18 22:17:56 2009 -0500
+++ b/pylearn/dbdict/sql.py	Mon Jan 19 00:46:28 2009 -0500
@@ -15,6 +15,7 @@
 EXPERIMENT = 'dbdict.experiment'
 #using the dictionary to store these is too slow
 STATUS = 'dbdict.status'
+HASH = 'dbdict.hash'
 PRIORITY = 'dbdict.sql.priority'
 
 HOST = 'dbdict.sql.hostname'
@@ -122,6 +123,7 @@
             wait = numpy.random.rand(1)*retry_max_sleep
             if verbose: print 'another process stole our dct. Waiting %f secs' % wait
             time.sleep(wait)
+
     if dct:
         str(dct) # for loading of attrs in UGLY WAY!!!
     s.close()
@@ -213,19 +215,32 @@
     :param force_dup: forces insertion even if an identical dictionary is already in the db
 
     """
+    # compute hash for the job, will be used to avoid duplicates
     job = copy.copy(jobdict)
+    jobhash = hash(`job`)
+
     if session is None:
         s = db.session()
-        do_insert = force_dup or (None is db.query(s).filter_eq_dct(job).first())
-        s.close()
+        print 'here1'
     else:
-        do_insert = force_dup or (None is db.query(session).filter_eq_dct(job).first())
+        s = session
+        print 'here2'
+
+    do_insert = force_dup or (None is s.query(db._Dict).filter(db._Dict.hash==jobhash).first())
+    print 'do_insert = ', do_insert
+
+    rval = None
     if do_insert:
         job[STATUS] = START
+        job[HASH] = jobhash
         job[PRIORITY] = 1.0
-        return db.insert(job, session=session)
-    else:
-        return None
+        rval = db.insert(job, session=s)
+        s.commit()
+
+    if session is None:
+        s.close()
+    return rval
+
 
 def insert_job(experiment_fn, state, db, force_dup=False, session=None):
     state = copy.copy(state)
@@ -233,6 +248,9 @@
     return insert_dict(state, db, force_dup=force_dup, session=session)
 
 
+# TODO: FIXME: WARNING
+# Should use insert_dict instead of db.insert.  Need one entry point for adding jobs to 
+# database, so that hashing can be done consistently
 def add_experiments_to_db(jobs, db, verbose=0, add_dups=False, type_check=None, session=None):
     """Add experiments paramatrized by jobs[i] to database db.