Mercurial > pylearn
comparison doc/v2_planning/arch_src/plugin_JB_main.py @ 1212:478bb1f8215c
plugin_JB - added SPAWN control element and demo program
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Wed, 22 Sep 2010 01:37:55 -0400 |
parents | |
children | 9fac28d80fb7 |
comparison
equal
deleted
inserted
replaced
1211:e7ac87720fee | 1212:478bb1f8215c |
---|---|
1 """plugin_JB_main - main functions illustrating control flow library""" | |
2 | |
3 from plugin_JB import * #TODO: don't do this | |
4 | |
5 | |
6 #################################################### | |
7 # [Dummy] Components involved in learning algorithms | |
8 | |
9 class Dataset(object): | |
10 def __init__(self, data): | |
11 self.pos = 0 | |
12 self.data = data | |
13 def next(self): | |
14 rval = self.data[self.pos] | |
15 self.pos += 1 | |
16 if self.pos == len(self.data): | |
17 self.pos = 0 | |
18 return rval | |
19 def seek(self, pos): | |
20 self.pos = pos | |
21 | |
22 class KFold(object): | |
23 def __init__(self, data, K): | |
24 self.data = data | |
25 self.k = -1 | |
26 self.scores = [None]*K | |
27 self.K = K | |
28 def next_fold(self): | |
29 self.k += 1 | |
30 self.data.seek(0) # restart the stream | |
31 def next(self): | |
32 #TODO: skip the examples that are ommitted in this split | |
33 return self.data.next() | |
34 def init_test(self): | |
35 pass | |
36 def next_test(self): | |
37 return self.data.next() | |
38 def test_size(self): | |
39 return 5 | |
40 def store_scores(self, scores): | |
41 self.scores[self.k] = scores | |
42 | |
43 def prog(self, clear, train, test): | |
44 return REPEAT(self.K, [ | |
45 CALL(self.next_fold), | |
46 clear, | |
47 train, | |
48 CALL(self.init_test), | |
49 BUFFER_REPEAT(self.test_size(), | |
50 SEQ([ CALL(self.next_test), test])), | |
51 FILT(self.store_scores) ]) | |
52 | |
53 class PCA_Analysis(object): | |
54 def __init__(self): | |
55 self.clear() | |
56 | |
57 def clear(self): | |
58 self.mean = 0 | |
59 self.eigvecs=0 | |
60 self.eigvals=0 | |
61 def analyze(self, X): | |
62 self.mean = numpy.mean(X, axis=0) | |
63 self.eigvecs=1 | |
64 self.eigvals=1 | |
65 def filt(self, X): | |
66 return (X - self.mean) * self.eigvecs #TODO: divide by root eigvals? | |
67 def pseudo_inverse(self, Y): | |
68 return Y | |
69 | |
70 class Layer(object): | |
71 def __init__(self, w): | |
72 self.w = w | |
73 def filt(self, x): | |
74 return self.w*x | |
75 def clear(self): | |
76 self.w =0 | |
77 | |
78 def cd1_update(X, layer, lr): | |
79 # update self.layer from observation X | |
80 layer.w += X.mean() * lr #TODO: not exactly correct math! | |
81 | |
82 | |
83 ############################################################### | |
84 # Example algorithms written in this control flow mini-language | |
85 | |
86 def main_weave(): | |
87 # Uses weave to demonstrate the interleaving of two bufferings of a single stream | |
88 | |
89 l = [0] | |
90 def f(a): | |
91 print l | |
92 l[0] += a | |
93 return l[0] | |
94 | |
95 print WEAVE(1, [ | |
96 BUFFER_REPEAT(3,CALL(f,1)), | |
97 BUFFER_REPEAT(5,CALL(f,1)), | |
98 ]).run() | |
99 | |
100 def main_weave_popen(): | |
101 # Uses weave and Popen to demonstrate the control of a program with some asynchronous | |
102 # parallelism | |
103 | |
104 p = WEAVE(2,[ | |
105 SEQ([POPEN(['sleep', '5']), PRINT('done 1')]), | |
106 SEQ([POPEN(['sleep', '10']), PRINT('done 2')]), | |
107 LOOP([ | |
108 CALL(print_obj, 'polling...'), | |
109 CALL(time.sleep, 1)])]) | |
110 # The LOOP would forever if the WEAVE were not configured to stop after 2 of its elements | |
111 # complete. | |
112 | |
113 p.run() | |
114 # Note that the program can be run multiple times... | |
115 p.run() | |
116 | |
117 def main_spawn(): | |
118 # illustate the use of SPAWN to drive a set of control programs | |
119 # in other processes | |
120 data1 = {0:"blah data1"} | |
121 data2 = {1:"foo data2"} | |
122 p = WEAVE(2,[ | |
123 SPAWN(data1, REPEAT(3, [ | |
124 CALL(importable_fn, data1), | |
125 PRINT("hello from 1")])), | |
126 SPAWN(data2, REPEAT(1, [ | |
127 CALL(importable_fn, data2), | |
128 PRINT("hello from 2")])), | |
129 LOOP([ | |
130 CALL(print_obj, 'polling...'), | |
131 CALL(time.sleep, 0.5)])]) | |
132 print 'BEFORE' | |
133 print data1 | |
134 print data2 | |
135 p.run() | |
136 print 'AFTER' | |
137 print data1 | |
138 print data2 | |
139 | |
140 def main_kfold_dbn(): | |
141 # Uses many of the control-flow elements to define the k-fold evaluation of a dbn | |
142 # The algorithm is not quite right, but the example shows off all of the required | |
143 # control-flow elements I think. | |
144 | |
145 # create components | |
146 dataset = Dataset(numpy.random.RandomState(123).randn(13,1)) | |
147 pca = PCA_Analysis() | |
148 layer1 = Layer(w=4) | |
149 layer2 = Layer(w=3) | |
150 kf = KFold(dataset, K=10) | |
151 | |
152 pca_batchsize=1000 | |
153 cd_batchsize = 5 | |
154 n_cd_updates_layer1 = 10 | |
155 n_cd_updates_layer2 = 10 | |
156 | |
157 # create algorithm | |
158 | |
159 train_pca = SEQ([ | |
160 BUFFER_REPEAT(pca_batchsize, CALL(kf.next)), | |
161 FILT(pca.analyze)]) | |
162 | |
163 train_layer1 = REPEAT(n_cd_updates_layer1, [ | |
164 BUFFER_REPEAT(cd_batchsize, CALL(kf.next)), | |
165 FILT(pca.filt), | |
166 FILT(cd1_update, layer=layer1, lr=.01)]) | |
167 | |
168 train_layer2 = REPEAT(n_cd_updates_layer2, [ | |
169 BUFFER_REPEAT(cd_batchsize, CALL(kf.next)), | |
170 FILT(pca.filt), | |
171 FILT(layer1.filt), | |
172 FILT(cd1_update, layer=layer2, lr=.01)]) | |
173 | |
174 kfold_prog = kf.prog( | |
175 clear = SEQ([ # FRAGMENT 1: this bit is the reset/clear stage | |
176 CALL(pca.clear), | |
177 CALL(layer1.clear), | |
178 CALL(layer2.clear), | |
179 ]), | |
180 train = SEQ([ | |
181 train_pca, | |
182 WEAVE(1, [ # Silly example of how to do debugging / loggin with WEAVE | |
183 train_layer1, | |
184 LOOP(CALL(print_obj_attr, layer1, 'w'))]), | |
185 train_layer2, | |
186 ]), | |
187 test=SEQ([ | |
188 FILT(pca.filt), # may want to allow this SEQ to be | |
189 FILT(layer1.filt), # optimized into a shorter one that | |
190 FILT(layer2.filt), # compiles these calls together with | |
191 FILT(numpy.mean)])) # Theano | |
192 | |
193 pkg1 = dict(prog=kfold_prog, kf=kf) | |
194 pkg2 = copy.deepcopy(pkg1) # programs can be copied | |
195 | |
196 try: | |
197 pkg3 = cPickle.loads(cPickle.dumps(pkg1)) | |
198 except: | |
199 print >> sys.stderr, "pickling doesnt work, but it can be fixed I think" | |
200 | |
201 pkg = pkg2 | |
202 | |
203 # running a program updates the variables in its package, but not the other package | |
204 pkg['prog'].run() | |
205 print pkg['kf'].scores | |
206 | |
207 | |
208 if __name__ == '__main__': | |
209 sys.exit(eval(sys.argv[1])) |