annotate bin/pkldu.py @ 1428:3823dbfff6cf

add parameter to randomize the valid and test data.
author Frederic Bastien <nouiz@nouiz.org>
date Tue, 08 Feb 2011 12:57:15 -0500
parents ea5d27727869
children 14ba52c38f07
rev   line source
1423
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
1 #!/bin/env python
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
2 import sys
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
3 from util import serial
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
4 import cPickle
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
5 import time
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
6
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
7 """ Usage:
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
8 first argument is a cPickle file to load
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
9 if no more arguments are supplied, will analyze the disk usage of each element of the root-level object stored in the file
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
10 subsequent arguments let you index into fields / dictionary entries of the object
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
11 For example,
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
12 pkldu.py foo.pkl .my_field [my_key] 3
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
13 will load an object obj from foo.pkl and analyze obj.my_field["my_key"][3]
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
14 """
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
15
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
16 filepath = sys.argv[1]
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
17
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
18 orig_obj = serial.load(filepath)
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
19
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
20 cycle_check = {}
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
21
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
22 obj_name = 'root_obj'
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
23 cycle_check[id(orig_obj)] = obj_name
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
24
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
25 for field in sys.argv[2:]:
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
26 if field.startswith('['):
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
27 assert field.endswith(']')
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
28 obj_name += '[' + field[1:-1] + ']'
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
29 orig_obj = orig_obj[field[1:-1]]
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
30 elif field.startswith('.'):
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
31 obj_name += '.' + field
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
32 orig_obj = getattr(orig_obj,field[1:])
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
33 else:
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
34 obj_name + '[' + field + ']'
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
35 orig_obj = orig_obj[eval(field)]
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
36 if id(orig_obj) in cycle_check:
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
37 print "You're going in circles, "+obj_name+" is the same as "+cycle_check[id(orig_obj)]
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
38 quit()
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
39 cycle_check[id(orig_obj)] = obj_name
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
40
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
41 s = cPickle.dumps(orig_obj)
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
42 prev_bytes = len(s)
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
43 print 'orig_obj bytes: \t\t\t\t'+str(prev_bytes)
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
44 t1 = time.time()
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
45 x = cPickle.loads(s)
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
46 t2 = time.time()
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
47 prev_t = t2 - t1
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
48 print 'orig load time: '+str(prev_t)
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
49
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
50
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
51 idx = 0
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
52
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
53 while len(dir(orig_obj)) > idx:
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
54 stop = False
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
55
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
56 while True:
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
57 fields = dir(orig_obj)
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
58 if idx >= len(fields):
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
59 stop = True
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
60 break
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
61 field = fields[idx]
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
62
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
63 success = True
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
64 try:
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
65 delattr(orig_obj,field)
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
66
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
67 except:
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
68 print "got error trying to delete "+field
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
69 idx += 1
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
70 success = False
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
71 if success and field in dir(orig_obj):
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
72 print field + ' reappears after being deleted'
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
73 idx += 1
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
74 if success:
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
75 break
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
76
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
77 if stop:
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
78 break
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
79
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
80 s = cPickle.dumps(orig_obj)
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
81 new_bytes = len(s)
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
82 diff_bytes = prev_bytes - new_bytes
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
83 prev_bytes = new_bytes
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
84 t1 = time.time()
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
85 x = cPickle.loads(s)
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
86 t2 = time.time()
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
87 new_t = t2 - t1
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
88 diff_t = prev_t - new_t
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
89 prev_t = new_t
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
90 print field+': \t\t\t\t'+str(diff_bytes)+'\t\t\t'+str(diff_t)
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
91
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
92 if type(orig_obj) == type({}):
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
93 print 'orig_obj is a dictionary'
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
94
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
95 keys = [ key for key in orig_obj.keys() ]
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
96
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
97 for key in keys:
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
98 del orig_obj[key]
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
99
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
100 s = cPickle.dumps(orig_obj)
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
101 new_bytes = len(s)
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
102 t1 = time.time()
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
103 x = cPickle.loads(s)
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
104 t2 = time.time()
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
105 new_t = t2 - t1
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
106 diff_t = prev_t - new_t
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
107 prev_t = new_t
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
108 print field+': \t\t\t\t'+str(diff_bytes)+'\t\t\t'+str(diff_t)
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
109
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
110
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
111 if type(orig_obj) == type([]):
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
112 print 'orig_obj is a list'
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
113
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
114 i = 0
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
115 while len(orig_obj) > 0:
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
116 stringrep = str(orig_obj[0])
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
117 if len(stringrep) > 15:
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
118 stringrep = stringrep[0:12] + "..."
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
119 del orig_obj[0]
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
120
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
121 s = cPickle.dumps(orig_obj)
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
122 new_bytes = len(s)
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
123 diff_bytes = prev_bytes - new_bytes
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
124 prev_bytes = new_bytes
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
125
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
126 t1 = time.time()
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
127 x = cPickle.loads(s)
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
128 t2 = time.time()
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
129 new_t = t2 - t1
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
130 diff_t = prev_t - new_t
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
131 prev_t = new_t
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
132 print field+': \t\t\t\t'+str(diff_bytes)+'\t\t\t'+str(diff_t)
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
133
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
134
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
135 i+= 1
ea5d27727869 added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff changeset
136