Mercurial > pylearn
view bin/pkldu.py @ 1441:c9179b0ed002
pca - better comments
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Wed, 02 Mar 2011 13:13:40 -0500 |
parents | 4b27456d3bce |
children | 509d6669429d |
line wrap: on
line source
#!/bin/env python """ Script to analyze disk usage of pickled files. See usage. """ __authors__ = "Ian Goodfellow, Razvan Pascanu" __copyright__ = "(c) 2010, Universite de Montreal" __contact__ = "Razvan Pascanu <r.pascanu@gmail>" import cPickle, optparse, time, sys usage = """ pkldu [OPTIONS] file indices First argument of the program is the file to analyze. Following arguments help you indexing in the object. For example : pkldu.py foo.pkl .my_field [my_key] 3 will load an object obj from foo.pkl and analyze obj.my_field["my_key"][3] """ space_units = [(' B', 1), ('kB', 2**10), ('MB', 2**20), ('GB', 2**30), ('TB', 2**40)] time_units = [('s', 1), ('m', 60), ('h', 3600) ] def load(filepath): f = open(filepath,'rb') obj = cPickle.load(f) f.close() return obj def format_string(s, maxlen): if len(s) > maxlen: s = s[:maxlen] return s + ' '*(maxlen - len(s)) def prettyprint(size, units, human_readable = False): unit_name = units[0][0] rval = size if human_readable: for unit, val in units: if float(size)/val > 1: unit_name = unit rval = float(size)/val return (rval, unit_name) def analyze(options, filepath, indices): orig_obj = load(filepath) cycle_check = {} obj_name = 'root_obj' cycle_check[id(orig_obj)] = obj_name for field in indices: if field.startswith('['): assert field.endswith(']') obj_name += '[' + field[1:-1] + ']' orig_obj = orig_obj[field[1:-1]] elif field.startswith('.'): obj_name += '.' + field orig_obj = getattr(orig_obj,field[1:]) else: obj_name + '[' + field + ']' orig_obj = orig_obj[eval(field)] if id(orig_obj) in cycle_check: print ( "You're going in circles, "+obj_name+" is the same as " +cycle_check[id(orig_obj)]) quit() cycle_check[id(orig_obj)] = obj_name s = cPickle.dumps(orig_obj) prev_bytes = len(s) print 'original object : \t\t\t\t%6.2f %s'%prettyprint(prev_bytes, space_units, options.human) t1 = time.time() x = cPickle.loads(s) t2 = time.time() prev_t = t2 - t1 print 'load time: %6.2f %s'%prettyprint(prev_t, time_units, options.human) print_entries = [] if isinstance(orig_obj, dict): print 'Object is a dictionary' keys = [ key for key in orig_obj.keys() ] for key in keys: key_name = format_string(key, 40) s = cPickle.dumps(orig_obj[key]) new_bytes = len(s) t1 = time.time() x = cPickle.loads(s) t2 = time.time() new_t = t2 - t1 print_entry = 'key: %40s %6.2f %s ( loads in %6.2f %s)'%( (key_name,) + prettyprint(new_bytes, space_units, options.human) + prettyprint(new_t, time_units, options.human) ) if options.order is not 'none': print 'Processed', key_name print_entries += [(new_bytes, print_entry)] else: print print_entry elif isinstance(orig_obj, (tuple, list)): print 'Object is a list/tuple of ', len(orig_obj), 'elements' for idx, v in enumerate(orig_obj): s = cPickle.dumps(v) new_bytes = len(s) t1 = time.time() x = cPickle.loads(s) t2 = time.time() new_t = t2 - t1 print_entry = 'entry: %03d \t\t\t\t %6.2f %s ( loads in %6.2f %s)' %( (idx,)+ prettyprint(new_bytes, space_units, options.human) + prettyprint(new_t, time_units, options.human) ) if options.order is not 'none': print 'Processed entry number ', idx print_entries += [(new_bytes, print_entry)] else: print print_entry else: print 'Object is a '+str(type(orig_obj)) for field in dir(orig_obj): field_name = format_string( field, 40) if field.startswith('__') and not options.reserved: # We skip reserved fields break try: s = cPickle.dumps(getattr(orig_obj, field)) new_bytes = len(s) t1 = time.time() x = cPickle.loads(s) t2 = time.time() new_t = t2 - t1 print_entry ='field: %40s %6.2f %s ( loads in %6.2f %s)' %( (field_name,)+ prettyprint(new_bytes, space_units, options.human) + prettyprint(new_t, time_units, options.human) ) if options.order is not 'none': print 'Processed field ', field_name print_entries += [(new_bytes, print_entry)] else: print print_entry except: print 'Could not pickle field', field_name if options.order in ('desc','asc'): reverse = False if options.order == 'desc': reverse = True print_entries = sorted(print_entries , key = lambda x:x[0] , reverse = reverse) for entry in print_entries: print entry[1] def process_options(): parser = optparse.OptionParser(usage) parser.add_option( '-H' , "--human-readable" , dest = 'human' , action="store_true" , default=False , help = (' If information should be presented in ' 'human readable format') ) parser.add_option( '-r' , "--reserved-fields" , dest = 'reserved' , action="store_true" , default=False , help = (' If information about python reserved ' ' fields (i.e. starting with `__`) ' ' should be displayed' ) ) parser.add_option( '-o' , "--order-fields" , dest = 'order' , default= 'none' , help = (' Order fields acording the their size.' ' Possible values are {none, desc, asc}') ) return parser.parse_args() if __name__ == '__main__': (options,args) = process_options() analyze(options, args[0], args[1:])