# HG changeset patch # User catherine@DellZilla # Date 1254856186 14400 # Node ID e3dd9e4467d1ca5b826f243a0635624be4e16b17 # Parent 846a59c10cf47c61ace28fa2e748ae626ad7d8ff# Parent 0f019dfb795f5e183eefc9caa6552a3bf121ed53 first-pass metadata collection working diff -r 846a59c10cf4 -r e3dd9e4467d1 sqlpython/mysqlpy.py --- a/sqlpython/mysqlpy.py Fri Oct 02 18:12:46 2009 -0400 +++ b/sqlpython/mysqlpy.py Tue Oct 06 15:09:46 2009 -0400 @@ -211,4 +211,7 @@ sys.argv = [sys.argv[0]] # the --test argument upsets unittest.main() unittest.main() else: + #import cProfile, pstats + #cProfile.run('run()', 'stats.txt') run() + diff -r 846a59c10cf4 -r e3dd9e4467d1 sqlpython/schemagroup.py --- a/sqlpython/schemagroup.py Fri Oct 02 18:12:46 2009 -0400 +++ b/sqlpython/schemagroup.py Tue Oct 06 15:09:46 2009 -0400 @@ -6,60 +6,56 @@ childtype = OracleSchemaGroup grp = childtype(gerald_connstring, connection, user) return grp + +def gerald_connection_string(sqlalchemy_connection_string): + return sqlalchemy_connection_string.split('/?mode=')[0].replace('//','/') class RefreshThread(threading.Thread): def __init__(self, schemagroup, owner): - print 'beginning a thread for ' + owner threading.Thread.__init__(self) self.schemagroup = schemagroup self.owner = owner self.schema = self.schemagroup.schemas.get(self.owner) def run(self): - if self.schema and (self.schema.age() < self.schema.refreshed): + if (not self.schema) or (self.schema.is_stale()): + self.schema = self.schemagroup.childtype(self.owner, self.schemagroup) + else: self.schema.refreshed = self.schema.time() - else: - self.schema = self.schemagroup.childtype(self.owner, self.schemagroup) self.schemagroup.schemas[self.owner] = self.schema - print 'finished thread for ' + self.owner - -class SchemaGroup(object): - def __init__(self, gerald_connstring, connection, user): - self.connstring = gerald_connstring - self.connection = connection - self.cursor = connection.cursor() - self.user = user - def refresh(self): - # should trigger the beginning of a refresh - # or maybe check whether one is needed? - now = self.schemas[self.user].time() - for schemaname in self.all_schemanames(): - schema_update_thread = RefreshThread(self, schemaname) - schema_update_thread.start() -class OracleSchema(gerald.OracleSchema): - def __init__(self, schemaname, parent_group): - self.parent_group = parent_group - gerald.OracleSchema.__init__(self, schemaname, - self.parent_group.connstring) - self.refreshed = self.time() - def time(self): - curs = self.parent_group.connection.cursor() +class RefreshGroupThread(threading.Thread): + def __init__(self, schemas): + threading.Thread.__init__(self) + self.schemas = schemas + self.daemon = True + def run(self): + self.schemas.refresh() + +class SchemaDict(dict): + schema_types = {'oracle': gerald.OracleSchema} + def __init__(self, dct, rdbms, user, connection, connection_string): + dict.__init__(self, dct) + self.child_type = self.schema_types[rdbms] + self.user = user + self.connection = connection + self.gerald_connection_string = gerald_connection_string(connection_string) + self.refresh_thread = RefreshGroupThread(self) + self.complete = False + # do we need a second thread for a second run? + def refresh(self): + curs = self.connection.cursor() curs.execute('SELECT sysdate FROM dual') - return curs.fetchone()[0] - def age(self): - curs = self.parent_group.connection.cursor() - curs.execute('''SELECT max(last_ddl_time) - FROM all_objects - WHERE owner = :owner''', - {'owner': self.name}) - return curs.fetchone()[0] - -class OracleSchemaGroup(SchemaGroup): - childtype = OracleSchema - def __init__(self, connection_string, connection, user): - SchemaGroup.__init__(self, connection_string, connection, user) - self.schemas = {user: OracleSchema(user, self)} - def all_schemanames(self): - self.cursor.execute('SELECT DISTINCT owner FROM all_objects') - return [r[0] for r in self.cursor.fetchall()] + current_database_time = curs.fetchone()[0] + curs.execute('''SELECT owner, MAX(last_ddl_time) + FROM all_objects + GROUP BY owner + -- sort :username to top + ORDER BY REPLACE(owner, :username, 'A'), owner''', + {'username': self.user.upper()}) + for (owner, last_ddl_time) in curs.fetchall(): + if (owner not in self) or (self[owner].refreshed < last_ddl_time): + self[owner] = self.child_type(owner, self.gerald_connection_string) + self[owner].refreshed = current_database_time + # what if a user's last object is deleted? + self.complete = True \ No newline at end of file diff -r 846a59c10cf4 -r e3dd9e4467d1 sqlpython/sqlpyPlus.py --- a/sqlpython/sqlpyPlus.py Fri Oct 02 18:12:46 2009 -0400 +++ b/sqlpython/sqlpyPlus.py Tue Oct 06 15:09:46 2009 -0400 @@ -1441,25 +1441,35 @@ ''' seek = '^%s$' % (arg.replace('*', '.*').replace('?','.'). \ replace('%', '.*')) -# import pdb; pdb.set_trace() - schemas = self.connections[self.connection_number]['schemas'].schemas - schema = schemas[self.connections[self.connection_number]['user']].schema + schemas = self.connections[self.connection_number]['schemas'] result = [] - for (name, obj) in schema.items(): - if hasattr(obj, 'type'): - dbtype = obj.type + username = self.connections[self.connection_number]['user'].upper() + for (schema_name, schema) in schemas.items(): + if opts.all or schema_name == username: + for (name, obj) in schema.schema.items(): + if hasattr(obj, 'type'): + dbtype = obj.type + else: + dbtype = str(type(obj)).rstrip("'>").split('.')[-1] + if opts.all: + name = '%s.%s' % (schema_name, name) + descriptor = '%s/%s' % (dbtype, name) + descriptor = descriptor.upper() + if (not arg) or \ + re.search(seek, descriptor, re.IGNORECASE) or \ + re.search(seek, name, re.IGNORECASE) or \ + re.search(seek, dbtype, re.IGNORECASE): + result.append(descriptor) + # if opts.long: status, last_ddl_time + if not schemas.complete: + if opts.all: + qualifier = 'may be ' else: - dbtype = str(type(obj)).rstrip("'>").split('.')[-1] - descriptor = '%s/%s' % (dbtype, name) - descriptor = descriptor.upper() - if (not arg) or \ - re.search(seek, descriptor, re.IGNORECASE) or \ - re.search(seek, name, re.IGNORECASE) or \ - re.search(seek, dbtype, re.IGNORECASE): - result.append(descriptor) - # if opts.long: status, last_ddl_time - result.sort(reverse=bool(opts.reverse)) - self.poutput('\n'.join(result)) + qualifier = '' + self.perror('Metadata discovery still in progress - results %sincomplete' % qualifier) + if result: + result.sort(reverse=bool(opts.reverse)) + self.poutput('\n'.join(result)) @options([make_option('-i', '--ignore-case', dest='ignorecase', action='store_true', help='Case-insensitive search')]) def do_grep(self, arg, opts): diff -r 846a59c10cf4 -r e3dd9e4467d1 sqlpython/sqlpy_gerald.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sqlpython/sqlpy_gerald.py Tue Oct 06 15:09:46 2009 -0400 @@ -0,0 +1,22 @@ +import gerald + +class SchemaSet(object): + def __init__(self, connection, rdbms, connect_string): + self.schemas = {} + curs = connection.cursor() + if rdbms == 'postgres': + curs.execute('SELECT schema_name FROM information_schema.schemata') + for row in curs.fetchall(): + schema = row[0] + self.schemas[schema] = gerald.PostgresSchema(schema, connect_string) + elif rdbms == 'oracle': + curs.execute('SELECT DISTINCT owner FROM all_objects') + for row in curs.fetchall(): + schema = row[0] + + + +import psycopg2 +connstr = 'postgres:/catherine:catherine@localhost/catherine' +conn = psycopg2.connect("dbname='catherine' user='catherine' password='catherine' host='localhost'") +ss = SchemaSet(conn, 'postgres', connstr) diff -r 846a59c10cf4 -r e3dd9e4467d1 sqlpython/sqlpython.py