Mercurial > pylearn
view bin/pkldu.py @ 1430:931a19eeab5a
'allow to randomize the sparse valid/test utlc dataset at load time'
author | Frederic Bastien <nouiz@nouiz.org> |
---|---|
date | Tue, 08 Feb 2011 16:19:18 -0500 |
parents | ea5d27727869 |
children | 14ba52c38f07 |
line wrap: on
line source
#!/bin/env python import sys from util import serial import cPickle import time """ Usage: first argument is a cPickle file to load if no more arguments are supplied, will analyze the disk usage of each element of the root-level object stored in the file subsequent arguments let you index into fields / dictionary entries of the object For example, pkldu.py foo.pkl .my_field [my_key] 3 will load an object obj from foo.pkl and analyze obj.my_field["my_key"][3] """ filepath = sys.argv[1] orig_obj = serial.load(filepath) cycle_check = {} obj_name = 'root_obj' cycle_check[id(orig_obj)] = obj_name for field in sys.argv[2:]: if field.startswith('['): assert field.endswith(']') obj_name += '[' + field[1:-1] + ']' orig_obj = orig_obj[field[1:-1]] elif field.startswith('.'): obj_name += '.' + field orig_obj = getattr(orig_obj,field[1:]) else: obj_name + '[' + field + ']' orig_obj = orig_obj[eval(field)] if id(orig_obj) in cycle_check: print "You're going in circles, "+obj_name+" is the same as "+cycle_check[id(orig_obj)] quit() cycle_check[id(orig_obj)] = obj_name s = cPickle.dumps(orig_obj) prev_bytes = len(s) print 'orig_obj bytes: \t\t\t\t'+str(prev_bytes) t1 = time.time() x = cPickle.loads(s) t2 = time.time() prev_t = t2 - t1 print 'orig load time: '+str(prev_t) idx = 0 while len(dir(orig_obj)) > idx: stop = False while True: fields = dir(orig_obj) if idx >= len(fields): stop = True break field = fields[idx] success = True try: delattr(orig_obj,field) except: print "got error trying to delete "+field idx += 1 success = False if success and field in dir(orig_obj): print field + ' reappears after being deleted' idx += 1 if success: break if stop: break s = cPickle.dumps(orig_obj) new_bytes = len(s) diff_bytes = prev_bytes - new_bytes prev_bytes = new_bytes t1 = time.time() x = cPickle.loads(s) t2 = time.time() new_t = t2 - t1 diff_t = prev_t - new_t prev_t = new_t print field+': \t\t\t\t'+str(diff_bytes)+'\t\t\t'+str(diff_t) if type(orig_obj) == type({}): print 'orig_obj is a dictionary' keys = [ key for key in orig_obj.keys() ] for key in keys: del orig_obj[key] s = cPickle.dumps(orig_obj) new_bytes = len(s) t1 = time.time() x = cPickle.loads(s) t2 = time.time() new_t = t2 - t1 diff_t = prev_t - new_t prev_t = new_t print field+': \t\t\t\t'+str(diff_bytes)+'\t\t\t'+str(diff_t) if type(orig_obj) == type([]): print 'orig_obj is a list' i = 0 while len(orig_obj) > 0: stringrep = str(orig_obj[0]) if len(stringrep) > 15: stringrep = stringrep[0:12] + "..." del orig_obj[0] s = cPickle.dumps(orig_obj) new_bytes = len(s) diff_bytes = prev_bytes - new_bytes prev_bytes = new_bytes t1 = time.time() x = cPickle.loads(s) t2 = time.time() new_t = t2 - t1 diff_t = prev_t - new_t prev_t = new_t print field+': \t\t\t\t'+str(diff_bytes)+'\t\t\t'+str(diff_t) i+= 1