# HG changeset patch # User desjagui@atchoum.iro.umontreal.ca # Date 1233614514 18000 # Node ID d8af9c8a4bbbbfd67dd198d33175a8fcd4179984 # Parent 48411bd1f5b7626778605add61112d8d61df2fb0 Changed database schema to be more efficient: * got rid of link table * use indexes for keyval.name, and for ids used in LEFT OUTER JOIN statement (keyval.dict_id and trial.id) * rewrote sql code for view (much simpler now!) to join on the foreign key Validated that the results are the same as the icml09 results. TODO: port icml09 & test4 results to "icml09full" table under the current schema diff -r 48411bd1f5b7 -r d8af9c8a4bbb pylearn/dbdict/api0.py --- a/pylearn/dbdict/api0.py Fri Jan 30 16:54:06 2009 -0500 +++ b/pylearn/dbdict/api0.py Mon Feb 02 17:41:54 2009 -0500 @@ -1,7 +1,7 @@ from sqlalchemy import create_engine, desc import sqlalchemy.pool from sqlalchemy.orm import sessionmaker -from sqlalchemy import Table, Column, MetaData, ForeignKey +from sqlalchemy import Table, Column, MetaData, ForeignKey, ForeignKeyConstraint from sqlalchemy import Integer, String, Float, Boolean, DateTime, Text, Binary from sqlalchemy.databases import postgres from sqlalchemy.orm import mapper, relation, backref, eagerload @@ -18,7 +18,6 @@ Attributes: dict_table pair_table - link_table @@ -51,30 +50,19 @@ #TODO: Consider union? #TODO: Are there stanard ways of doing this kind of thing? - link_table - - An SqlAlchemy-mapped class corresponding to database table with the - following schema: - - Column('dict_id', Integer, ForeignKey('%s.id' % t_trial), primary_key=True), - Column('keyval_id', Integer, ForeignKey('%s.id' % t_keyval), primary_key=True)) - """ e_bad_table = 'incompatible columns in table' - def __init__(h_self, Session, engine, dict_table, pair_table, link_table): + def __init__(h_self, Session, engine, dict_table, pair_table): h_self._engine = engine; h_self._dict_table = dict_table h_self._pair_table = pair_table - h_self._link_table = link_table #TODO: replace this crude algorithm (ticket #17) if ['id', 'create', 'write', 'read', 'status', 'priority','hash'] != [c.name for c in dict_table.c]: raise ValueError(h_self.e_bad_table, dict_table) - if ['id', 'name', 'ntype', 'fval', 'sval', 'bval'] != [c.name for c in pair_table.c]: - raise ValueError(h_self.e_bad_table, pair_table) - if ['dict_id', 'pair_id'] != [c.name for c in link_table.c]: + if ['id', 'dict_id', 'name', 'ntype', 'fval', 'sval', 'bval'] != [c.name for c in pair_table.c]: raise ValueError(h_self.e_bad_table, pair_table) h_self._session_fn = Session @@ -305,7 +293,6 @@ mapper(Dict, dict_table, properties = { '_attrs': relation(KeyVal, - secondary=link_table, cascade="all, delete-orphan") }) @@ -473,11 +460,9 @@ # generate raw sql command string viewsql = crazy_sql_command(view.name, cols, \ h_self._dict_table.name, \ - h_self._pair_table.name, \ - h_self._link_table.name); + h_self._pair_table.name) - #print 'Creating sql view with command:\n', viewsql; - + print 'Creating sql view with command:\n', viewsql; h_self._engine.execute(viewsql); s.commit(); s.close() @@ -508,8 +493,7 @@ def db_from_engine(engine, table_prefix='DbHandle_default_', trial_suffix='trial', - keyval_suffix='keyval', - link_suffix='link'): + keyval_suffix='keyval'): """Create a DbHandle instance @type engine: sqlalchemy engine (e.g. from create_engine) @@ -518,7 +502,6 @@ @type table_prefix: string @type trial_suffix: string @type keyval_suffix: string - @type link_suffix: string @rtype: DbHandle instance @@ -526,7 +509,6 @@ many-to-many pattern that it needs: - I{table_prefix + trial_suffix}, - I{table_prefix + keyval_suffix} - - I{table_prefix + link_suffix} """ Session = sessionmaker(autoflush=True, autocommit=False) @@ -540,29 +522,25 @@ Column('read', DateTime), Column('status', Integer), Column('priority', Float(53)), - Column('hash', postgres.PGBigInteger) - ) + Column('hash', postgres.PGBigInteger)) t_keyval = Table(table_prefix+keyval_suffix, metadata, Column('id', Integer, primary_key=True), - Column('name', String(128), nullable=False), #name of attribute + Column('dict_id', Integer, index=True), + Column('name', String(128), index=True, nullable=False), #name of attribute Column('ntype', Boolean), Column('fval', Float(53)), Column('sval', Text), - Column('bval', Binary)) + Column('bval', Binary), + ForeignKeyConstraint(['dict_id'], [table_prefix+trial_suffix+'.id'])) - t_link = Table(table_prefix+link_suffix, metadata, - Column('dict_id', Integer, ForeignKey('%s.id' % t_trial), - primary_key=True), - Column('pair_id', Integer, ForeignKey('%s.id' % t_keyval), - primary_key=True)) - + #, ForeignKey('%s.id' % t_trial)), metadata.bind = engine metadata.create_all() # no-op when tables already exist #warning: tables can exist, but have incorrect schema # see bug mentioned in DbHandle constructor - return DbHandle(Session, engine, t_trial, t_keyval, t_link) + return DbHandle(Session, engine, t_trial, t_keyval) def sqlite_memory_db(echo=False, **kwargs): """Return a DbHandle backed by a memory-based database""" diff -r 48411bd1f5b7 -r d8af9c8a4bbb pylearn/dbdict/sql_commands.py --- a/pylearn/dbdict/sql_commands.py Fri Jan 30 16:54:06 2009 -0500 +++ b/pylearn/dbdict/sql_commands.py Mon Feb 02 17:41:54 2009 -0500 @@ -1,5 +1,5 @@ -def crazy_sql_command(viewname, cols, dicttab, keytab, linktab, id_col='id', dict_id='dict_id', pair_id='pair_id'): +def crazy_sql_command(viewname, cols, dicttab, keytab, id_col='id', dict_id='dict_id'): #create or replace view expview as select * from (select id as v1_id, value as nhid from #test0 where name='nhid') nhid LEFT OUTER JOIN (select id as v2_id, value as lrate from @@ -8,37 +8,21 @@ col_queries = [] colname0 = None for i, (colname, table_col) in enumerate(cols): - safe_col = colname.replace('__','') - safe_col = safe_col.replace('.','__') + safe_col = colname.replace('_','') # get rid of underscores + safe_col = safe_col.replace('.','_') # replace dots with underscores cols[i][0] = safe_col - if i == 0: - q = """(select %(dict_id)s as v%(i)s_id, %(table_col)s as %(safe_col)s - from \"%(dicttab)s\", \"%(keytab)s\", \"%(linktab)s\" - where name='%(colname)s' - and \"%(keytab)s\".%(id_col)s = \"%(linktab)s\".%(pair_id)s - and \"%(dicttab)s\".%(id_col)s = \"%(linktab)s\".%(dict_id)s) - %(safe_col)s """ % locals() - colname0 = safe_col - else: - q = """ LEFT OUTER JOIN (select %(dict_id)s as v%(i)s_id, %(table_col)s as %(safe_col)s - from \"%(dicttab)s\", \"%(keytab)s\", \"%(linktab)s\" - where name='%(colname)s' - and \"%(keytab)s\".%(id_col)s = \"%(linktab)s\".%(pair_id)s - and \"%(dicttab)s\".%(id_col)s = \"%(linktab)s\".%(dict_id)s) - %(safe_col)s - on %(colname0)s.v0_id = %(safe_col)s.v%(i)s_id""" % locals() - + q = """LEFT OUTER JOIN + (select %(dict_id)s, %(table_col)s as %(safe_col)s from \"%(keytab)s\" + where name='%(colname)s') %(safe_col)s + on %(safe_col)s.dict_id = %(dicttab)s.%(id_col)s""" % locals() + col_queries.append(q) - header = " create or replace view %s as select %s.v0_id as id, %s from " \ - % (viewname, colname0, (", ".join([c[0] for c in cols]))) + header = "create or replace view %s as select %s.%s, %s from %s " \ + % (viewname, dicttab, id_col, (", ".join([c[0] for c in cols])), dicttab) rval = header + "\n".join(col_queries) return rval - - -#print crazy_sql_command('test0', 'expview', (('nhid', 'value'), ('lrate', 'value'), ('a','value'))) -