Mercurial > pylearn
annotate bin/pkldu.py @ 1428:3823dbfff6cf
add parameter to randomize the valid and test data.
author | Frederic Bastien <nouiz@nouiz.org> |
---|---|
date | Tue, 08 Feb 2011 12:57:15 -0500 |
parents | ea5d27727869 |
children | 14ba52c38f07 |
rev | line source |
---|---|
1423
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
1 #!/bin/env python |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
2 import sys |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
3 from util import serial |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
4 import cPickle |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
5 import time |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
6 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
7 """ Usage: |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
8 first argument is a cPickle file to load |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
9 if no more arguments are supplied, will analyze the disk usage of each element of the root-level object stored in the file |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
10 subsequent arguments let you index into fields / dictionary entries of the object |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
11 For example, |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
12 pkldu.py foo.pkl .my_field [my_key] 3 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
13 will load an object obj from foo.pkl and analyze obj.my_field["my_key"][3] |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
14 """ |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
15 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
16 filepath = sys.argv[1] |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
17 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
18 orig_obj = serial.load(filepath) |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
19 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
20 cycle_check = {} |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
21 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
22 obj_name = 'root_obj' |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
23 cycle_check[id(orig_obj)] = obj_name |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
24 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
25 for field in sys.argv[2:]: |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
26 if field.startswith('['): |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
27 assert field.endswith(']') |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
28 obj_name += '[' + field[1:-1] + ']' |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
29 orig_obj = orig_obj[field[1:-1]] |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
30 elif field.startswith('.'): |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
31 obj_name += '.' + field |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
32 orig_obj = getattr(orig_obj,field[1:]) |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
33 else: |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
34 obj_name + '[' + field + ']' |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
35 orig_obj = orig_obj[eval(field)] |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
36 if id(orig_obj) in cycle_check: |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
37 print "You're going in circles, "+obj_name+" is the same as "+cycle_check[id(orig_obj)] |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
38 quit() |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
39 cycle_check[id(orig_obj)] = obj_name |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
40 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
41 s = cPickle.dumps(orig_obj) |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
42 prev_bytes = len(s) |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
43 print 'orig_obj bytes: \t\t\t\t'+str(prev_bytes) |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
44 t1 = time.time() |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
45 x = cPickle.loads(s) |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
46 t2 = time.time() |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
47 prev_t = t2 - t1 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
48 print 'orig load time: '+str(prev_t) |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
49 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
50 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
51 idx = 0 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
52 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
53 while len(dir(orig_obj)) > idx: |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
54 stop = False |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
55 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
56 while True: |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
57 fields = dir(orig_obj) |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
58 if idx >= len(fields): |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
59 stop = True |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
60 break |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
61 field = fields[idx] |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
62 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
63 success = True |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
64 try: |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
65 delattr(orig_obj,field) |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
66 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
67 except: |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
68 print "got error trying to delete "+field |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
69 idx += 1 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
70 success = False |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
71 if success and field in dir(orig_obj): |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
72 print field + ' reappears after being deleted' |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
73 idx += 1 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
74 if success: |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
75 break |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
76 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
77 if stop: |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
78 break |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
79 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
80 s = cPickle.dumps(orig_obj) |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
81 new_bytes = len(s) |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
82 diff_bytes = prev_bytes - new_bytes |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
83 prev_bytes = new_bytes |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
84 t1 = time.time() |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
85 x = cPickle.loads(s) |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
86 t2 = time.time() |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
87 new_t = t2 - t1 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
88 diff_t = prev_t - new_t |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
89 prev_t = new_t |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
90 print field+': \t\t\t\t'+str(diff_bytes)+'\t\t\t'+str(diff_t) |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
91 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
92 if type(orig_obj) == type({}): |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
93 print 'orig_obj is a dictionary' |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
94 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
95 keys = [ key for key in orig_obj.keys() ] |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
96 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
97 for key in keys: |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
98 del orig_obj[key] |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
99 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
100 s = cPickle.dumps(orig_obj) |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
101 new_bytes = len(s) |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
102 t1 = time.time() |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
103 x = cPickle.loads(s) |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
104 t2 = time.time() |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
105 new_t = t2 - t1 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
106 diff_t = prev_t - new_t |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
107 prev_t = new_t |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
108 print field+': \t\t\t\t'+str(diff_bytes)+'\t\t\t'+str(diff_t) |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
109 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
110 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
111 if type(orig_obj) == type([]): |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
112 print 'orig_obj is a list' |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
113 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
114 i = 0 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
115 while len(orig_obj) > 0: |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
116 stringrep = str(orig_obj[0]) |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
117 if len(stringrep) > 15: |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
118 stringrep = stringrep[0:12] + "..." |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
119 del orig_obj[0] |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
120 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
121 s = cPickle.dumps(orig_obj) |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
122 new_bytes = len(s) |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
123 diff_bytes = prev_bytes - new_bytes |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
124 prev_bytes = new_bytes |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
125 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
126 t1 = time.time() |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
127 x = cPickle.loads(s) |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
128 t2 = time.time() |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
129 new_t = t2 - t1 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
130 diff_t = prev_t - new_t |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
131 prev_t = new_t |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
132 print field+': \t\t\t\t'+str(diff_bytes)+'\t\t\t'+str(diff_t) |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
133 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
134 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
135 i+= 1 |
ea5d27727869
added pickle disk usage inspection utility 'pkldu'
Ian Goodfellow
parents:
diff
changeset
|
136 |