# HG changeset patch # User Ian Goodfellow # Date 1297116296 18000 # Node ID ea5d27727869d746840c5b96b1c341a86cdce320 # Parent 8c209c8470873e389e78a6233652dc4fe5361eab added pickle disk usage inspection utility 'pkldu' diff -r 8c209c847087 -r ea5d27727869 bin/pkldu.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/pkldu.py Mon Feb 07 17:04:56 2011 -0500 @@ -0,0 +1,136 @@ +#!/bin/env python +import sys +from util import serial +import cPickle +import time + +""" Usage: +first argument is a cPickle file to load +if no more arguments are supplied, will analyze the disk usage of each element of the root-level object stored in the file +subsequent arguments let you index into fields / dictionary entries of the object +For example, +pkldu.py foo.pkl .my_field [my_key] 3 +will load an object obj from foo.pkl and analyze obj.my_field["my_key"][3] +""" + +filepath = sys.argv[1] + +orig_obj = serial.load(filepath) + +cycle_check = {} + +obj_name = 'root_obj' +cycle_check[id(orig_obj)] = obj_name + +for field in sys.argv[2:]: + if field.startswith('['): + assert field.endswith(']') + obj_name += '[' + field[1:-1] + ']' + orig_obj = orig_obj[field[1:-1]] + elif field.startswith('.'): + obj_name += '.' + field + orig_obj = getattr(orig_obj,field[1:]) + else: + obj_name + '[' + field + ']' + orig_obj = orig_obj[eval(field)] + if id(orig_obj) in cycle_check: + print "You're going in circles, "+obj_name+" is the same as "+cycle_check[id(orig_obj)] + quit() + cycle_check[id(orig_obj)] = obj_name + +s = cPickle.dumps(orig_obj) +prev_bytes = len(s) +print 'orig_obj bytes: \t\t\t\t'+str(prev_bytes) +t1 = time.time() +x = cPickle.loads(s) +t2 = time.time() +prev_t = t2 - t1 +print 'orig load time: '+str(prev_t) + + +idx = 0 + +while len(dir(orig_obj)) > idx: + stop = False + + while True: + fields = dir(orig_obj) + if idx >= len(fields): + stop = True + break + field = fields[idx] + + success = True + try: + delattr(orig_obj,field) + + except: + print "got error trying to delete "+field + idx += 1 + success = False + if success and field in dir(orig_obj): + print field + ' reappears after being deleted' + idx += 1 + if success: + break + + if stop: + break + + s = cPickle.dumps(orig_obj) + new_bytes = len(s) + diff_bytes = prev_bytes - new_bytes + prev_bytes = new_bytes + t1 = time.time() + x = cPickle.loads(s) + t2 = time.time() + new_t = t2 - t1 + diff_t = prev_t - new_t + prev_t = new_t + print field+': \t\t\t\t'+str(diff_bytes)+'\t\t\t'+str(diff_t) + +if type(orig_obj) == type({}): + print 'orig_obj is a dictionary' + + keys = [ key for key in orig_obj.keys() ] + + for key in keys: + del orig_obj[key] + + s = cPickle.dumps(orig_obj) + new_bytes = len(s) + t1 = time.time() + x = cPickle.loads(s) + t2 = time.time() + new_t = t2 - t1 + diff_t = prev_t - new_t + prev_t = new_t + print field+': \t\t\t\t'+str(diff_bytes)+'\t\t\t'+str(diff_t) + + +if type(orig_obj) == type([]): + print 'orig_obj is a list' + + i = 0 + while len(orig_obj) > 0: + stringrep = str(orig_obj[0]) + if len(stringrep) > 15: + stringrep = stringrep[0:12] + "..." + del orig_obj[0] + + s = cPickle.dumps(orig_obj) + new_bytes = len(s) + diff_bytes = prev_bytes - new_bytes + prev_bytes = new_bytes + + t1 = time.time() + x = cPickle.loads(s) + t2 = time.time() + new_t = t2 - t1 + diff_t = prev_t - new_t + prev_t = new_t + print field+': \t\t\t\t'+str(diff_bytes)+'\t\t\t'+str(diff_t) + + + i+= 1 +