diff bin/pkldu.py @ 1423:ea5d27727869

added pickle disk usage inspection utility 'pkldu'
author Ian Goodfellow
date Mon, 07 Feb 2011 17:04:56 -0500
parents
children 14ba52c38f07
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bin/pkldu.py	Mon Feb 07 17:04:56 2011 -0500
@@ -0,0 +1,136 @@
+#!/bin/env python
+import sys
+from util import serial
+import cPickle
+import time
+
+""" Usage:
+first argument is a cPickle file to load
+if no more arguments are supplied, will analyze the disk usage of each element of the root-level object stored in the file
+subsequent arguments let you index into fields / dictionary entries of the object
+For example,
+pkldu.py foo.pkl .my_field [my_key] 3
+will load an object obj from foo.pkl and analyze obj.my_field["my_key"][3]
+"""
+
+filepath = sys.argv[1]
+
+orig_obj = serial.load(filepath)
+
+cycle_check = {}
+
+obj_name = 'root_obj'
+cycle_check[id(orig_obj)] = obj_name
+
+for field in sys.argv[2:]:
+    if field.startswith('['):
+        assert field.endswith(']')
+        obj_name += '[' + field[1:-1] + ']'
+        orig_obj = orig_obj[field[1:-1]]
+    elif field.startswith('.'):
+        obj_name += '.' + field
+        orig_obj = getattr(orig_obj,field[1:])
+    else:
+        obj_name + '[' + field + ']'
+        orig_obj = orig_obj[eval(field)]
+    if id(orig_obj) in cycle_check:
+        print "You're going in circles, "+obj_name+" is the same as "+cycle_check[id(orig_obj)]
+        quit()
+    cycle_check[id(orig_obj)] = obj_name
+
+s = cPickle.dumps(orig_obj)
+prev_bytes = len(s)
+print 'orig_obj bytes: \t\t\t\t'+str(prev_bytes)
+t1 = time.time()
+x = cPickle.loads(s)
+t2 = time.time()
+prev_t = t2 - t1
+print 'orig load time: '+str(prev_t)
+
+
+idx = 0
+
+while len(dir(orig_obj)) > idx:
+    stop = False
+
+    while True:
+        fields = dir(orig_obj)
+        if idx >= len(fields):
+            stop = True
+            break
+        field = fields[idx]
+
+        success = True
+        try:
+            delattr(orig_obj,field)
+
+        except:
+            print "got error trying to delete "+field
+            idx += 1
+            success = False
+        if success and field in dir(orig_obj):
+            print field + ' reappears after being deleted'
+            idx += 1
+        if success:
+            break
+
+    if stop:
+        break
+
+    s = cPickle.dumps(orig_obj)
+    new_bytes = len(s)
+    diff_bytes = prev_bytes - new_bytes
+    prev_bytes = new_bytes
+    t1 = time.time()
+    x = cPickle.loads(s)
+    t2 = time.time()
+    new_t = t2 - t1
+    diff_t = prev_t - new_t
+    prev_t = new_t
+    print field+': \t\t\t\t'+str(diff_bytes)+'\t\t\t'+str(diff_t)
+
+if type(orig_obj) == type({}):
+    print 'orig_obj is a dictionary'
+
+    keys = [ key for key in orig_obj.keys() ]
+
+    for key in keys:
+        del orig_obj[key]
+
+        s = cPickle.dumps(orig_obj)
+        new_bytes = len(s)
+        t1 = time.time()
+        x = cPickle.loads(s)
+        t2 = time.time()
+        new_t = t2 - t1
+        diff_t = prev_t - new_t
+        prev_t = new_t
+        print field+': \t\t\t\t'+str(diff_bytes)+'\t\t\t'+str(diff_t)
+
+
+if type(orig_obj) == type([]):
+    print 'orig_obj is a list'
+
+    i = 0
+    while len(orig_obj) > 0:
+        stringrep = str(orig_obj[0])
+        if len(stringrep) > 15:
+            stringrep = stringrep[0:12] + "..."
+        del orig_obj[0]
+        
+        s = cPickle.dumps(orig_obj)
+        new_bytes = len(s)
+        diff_bytes = prev_bytes - new_bytes
+        prev_bytes = new_bytes
+
+        t1 = time.time()
+        x = cPickle.loads(s)
+        t2 = time.time()
+        new_t = t2 - t1
+        diff_t = prev_t - new_t
+        prev_t = new_t
+        print field+': \t\t\t\t'+str(diff_bytes)+'\t\t\t'+str(diff_t)
+
+
+        i+= 1
+