changeset 645:d8af9c8a4bbb

Changed database schema to be more efficient: * got rid of link table * use indexes for keyval.name, and for ids used in LEFT OUTER JOIN statement (keyval.dict_id and trial.id) * rewrote sql code for view (much simpler now!) to join on the foreign key Validated that the results are the same as the icml09 results. TODO: port icml09 & test4 results to "icml09full" table under the current schema
author desjagui@atchoum.iro.umontreal.ca
date Mon, 02 Feb 2009 17:41:54 -0500
parents 48411bd1f5b7
children fa6c399dc292 37a259d6946a
files pylearn/dbdict/api0.py pylearn/dbdict/sql_commands.py
diffstat 2 files changed, 23 insertions(+), 61 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/dbdict/api0.py	Fri Jan 30 16:54:06 2009 -0500
+++ b/pylearn/dbdict/api0.py	Mon Feb 02 17:41:54 2009 -0500
@@ -1,7 +1,7 @@
 from sqlalchemy import create_engine, desc
 import sqlalchemy.pool
 from sqlalchemy.orm import sessionmaker
-from sqlalchemy import Table, Column, MetaData, ForeignKey    
+from sqlalchemy import Table, Column, MetaData, ForeignKey, ForeignKeyConstraint
 from sqlalchemy import Integer, String, Float, Boolean, DateTime, Text, Binary
 from sqlalchemy.databases import postgres
 from sqlalchemy.orm import mapper, relation, backref, eagerload
@@ -18,7 +18,6 @@
     Attributes:
     dict_table
     pair_table
-    link_table
 
 
 
@@ -51,30 +50,19 @@
             #TODO: Consider union?
             #TODO: Are there stanard ways of doing this kind of thing?
 
-    link_table
-
-        An SqlAlchemy-mapped class corresponding to database table with the
-        following schema:
-
-            Column('dict_id', Integer, ForeignKey('%s.id' % t_trial), primary_key=True),
-            Column('keyval_id', Integer, ForeignKey('%s.id' % t_keyval), primary_key=True))
-
     """
 
     e_bad_table = 'incompatible columns in table'
 
-    def __init__(h_self, Session, engine, dict_table, pair_table, link_table):
+    def __init__(h_self, Session, engine, dict_table, pair_table):
         h_self._engine = engine;
         h_self._dict_table = dict_table
         h_self._pair_table = pair_table
-        h_self._link_table = link_table
 
         #TODO: replace this crude algorithm (ticket #17)
         if ['id', 'create', 'write', 'read', 'status', 'priority','hash'] != [c.name for c in dict_table.c]:
             raise ValueError(h_self.e_bad_table, dict_table)
-        if ['id', 'name', 'ntype', 'fval', 'sval', 'bval'] != [c.name for c in pair_table.c]:
-            raise ValueError(h_self.e_bad_table, pair_table)
-        if ['dict_id', 'pair_id'] != [c.name for c in link_table.c]:
+        if ['id', 'dict_id', 'name', 'ntype', 'fval', 'sval', 'bval'] != [c.name for c in pair_table.c]:
             raise ValueError(h_self.e_bad_table, pair_table)
 
         h_self._session_fn = Session
@@ -305,7 +293,6 @@
         mapper(Dict, dict_table,
                 properties = {
                     '_attrs': relation(KeyVal, 
-                        secondary=link_table, 
                         cascade="all, delete-orphan")
                     })
 
@@ -473,11 +460,9 @@
         # generate raw sql command string
         viewsql = crazy_sql_command(view.name, cols, \
                                     h_self._dict_table.name, \
-                                    h_self._pair_table.name, \
-                                    h_self._link_table.name);
+                                    h_self._pair_table.name)
         
-        #print 'Creating sql view with command:\n', viewsql;
-
+        print 'Creating sql view with command:\n', viewsql;
         h_self._engine.execute(viewsql);
         s.commit();
         s.close()
@@ -508,8 +493,7 @@
 def db_from_engine(engine, 
         table_prefix='DbHandle_default_',
         trial_suffix='trial',
-        keyval_suffix='keyval',
-        link_suffix='link'):
+        keyval_suffix='keyval'):
     """Create a DbHandle instance
 
     @type engine: sqlalchemy engine (e.g. from create_engine)
@@ -518,7 +502,6 @@
     @type table_prefix: string
     @type trial_suffix: string
     @type keyval_suffix: string
-    @type link_suffix: string
 
     @rtype: DbHandle instance
 
@@ -526,7 +509,6 @@
     many-to-many pattern that it needs: 
      - I{table_prefix + trial_suffix},
      - I{table_prefix + keyval_suffix}
-     - I{table_prefix + link_suffix}
 
     """
     Session = sessionmaker(autoflush=True, autocommit=False)
@@ -540,29 +522,25 @@
             Column('read', DateTime),
             Column('status', Integer),
             Column('priority', Float(53)),
-            Column('hash', postgres.PGBigInteger)
-            )
+            Column('hash', postgres.PGBigInteger))
 
     t_keyval = Table(table_prefix+keyval_suffix, metadata,
             Column('id', Integer, primary_key=True),
-            Column('name', String(128), nullable=False), #name of attribute
+            Column('dict_id', Integer, index=True),
+            Column('name', String(128), index=True, nullable=False), #name of attribute
             Column('ntype', Boolean),
             Column('fval', Float(53)),
             Column('sval', Text),
-            Column('bval', Binary))
+            Column('bval', Binary),
+            ForeignKeyConstraint(['dict_id'], [table_prefix+trial_suffix+'.id']))
 
-    t_link = Table(table_prefix+link_suffix, metadata,
-            Column('dict_id', Integer, ForeignKey('%s.id' % t_trial),
-                primary_key=True),
-            Column('pair_id', Integer, ForeignKey('%s.id' % t_keyval),
-                primary_key=True))
-
+                #, ForeignKey('%s.id' % t_trial)),
     metadata.bind = engine
     metadata.create_all() # no-op when tables already exist
     #warning: tables can exist, but have incorrect schema
     # see bug mentioned in DbHandle constructor
 
-    return DbHandle(Session, engine, t_trial, t_keyval, t_link)
+    return DbHandle(Session, engine, t_trial, t_keyval)
 
 def sqlite_memory_db(echo=False, **kwargs):
     """Return a DbHandle backed by a memory-based database"""
--- a/pylearn/dbdict/sql_commands.py	Fri Jan 30 16:54:06 2009 -0500
+++ b/pylearn/dbdict/sql_commands.py	Mon Feb 02 17:41:54 2009 -0500
@@ -1,5 +1,5 @@
 
-def crazy_sql_command(viewname, cols, dicttab, keytab, linktab, id_col='id', dict_id='dict_id', pair_id='pair_id'):
+def crazy_sql_command(viewname, cols, dicttab, keytab, id_col='id', dict_id='dict_id'):
 
     #create or replace view expview as select * from (select id as v1_id, value as nhid from
     #test0 where name='nhid') nhid LEFT OUTER JOIN (select id as v2_id, value as lrate from
@@ -8,37 +8,21 @@
     col_queries = []
     colname0 = None
     for i, (colname, table_col) in enumerate(cols):
-        safe_col = colname.replace('__','')
-        safe_col = safe_col.replace('.','__')
+        safe_col = colname.replace('_','')   # get rid of underscores
+        safe_col = safe_col.replace('.','_') # replace dots with underscores
       
         cols[i][0] = safe_col
 
-        if i == 0:
-            q = """(select %(dict_id)s as v%(i)s_id, %(table_col)s as %(safe_col)s 
-                    from \"%(dicttab)s\", \"%(keytab)s\", \"%(linktab)s\"
-                    where name='%(colname)s'
-                    and \"%(keytab)s\".%(id_col)s = \"%(linktab)s\".%(pair_id)s
-                    and \"%(dicttab)s\".%(id_col)s = \"%(linktab)s\".%(dict_id)s)
-                    %(safe_col)s """ % locals()
-            colname0 = safe_col
-        else:
-            q = """ LEFT OUTER JOIN (select %(dict_id)s as v%(i)s_id, %(table_col)s as %(safe_col)s 
-                    from \"%(dicttab)s\", \"%(keytab)s\", \"%(linktab)s\" 
-                    where name='%(colname)s'
-                    and \"%(keytab)s\".%(id_col)s = \"%(linktab)s\".%(pair_id)s
-                    and \"%(dicttab)s\".%(id_col)s = \"%(linktab)s\".%(dict_id)s)
-                    %(safe_col)s 
-                    on %(colname0)s.v0_id = %(safe_col)s.v%(i)s_id""" % locals()
-
+        q = """LEFT OUTER JOIN 
+               (select %(dict_id)s, %(table_col)s as %(safe_col)s from \"%(keytab)s\"
+                where name='%(colname)s') %(safe_col)s 
+                on %(safe_col)s.dict_id = %(dicttab)s.%(id_col)s""" % locals()
+        
         col_queries.append(q)
        
-    header = " create or replace view %s as select %s.v0_id as id, %s from " \
-            % (viewname, colname0, (", ".join([c[0] for c in cols])))
+    header = "create or replace view %s as select %s.%s, %s from %s " \
+            % (viewname, dicttab, id_col, (", ".join([c[0] for c in cols])), dicttab)
 
     rval = header + "\n".join(col_queries)
 
     return rval
-
-
-#print crazy_sql_command('test0', 'expview', (('nhid', 'value'), ('lrate', 'value'), ('a','value')))
-