comparison doc/v2_planning/arch_src/plugin_JB_main.py @ 1212:478bb1f8215c

plugin_JB - added SPAWN control element and demo program
author James Bergstra <bergstrj@iro.umontreal.ca>
date Wed, 22 Sep 2010 01:37:55 -0400
parents
children 9fac28d80fb7
comparison
equal deleted inserted replaced
1211:e7ac87720fee 1212:478bb1f8215c
1 """plugin_JB_main - main functions illustrating control flow library"""
2
3 from plugin_JB import * #TODO: don't do this
4
5
6 ####################################################
7 # [Dummy] Components involved in learning algorithms
8
9 class Dataset(object):
10 def __init__(self, data):
11 self.pos = 0
12 self.data = data
13 def next(self):
14 rval = self.data[self.pos]
15 self.pos += 1
16 if self.pos == len(self.data):
17 self.pos = 0
18 return rval
19 def seek(self, pos):
20 self.pos = pos
21
22 class KFold(object):
23 def __init__(self, data, K):
24 self.data = data
25 self.k = -1
26 self.scores = [None]*K
27 self.K = K
28 def next_fold(self):
29 self.k += 1
30 self.data.seek(0) # restart the stream
31 def next(self):
32 #TODO: skip the examples that are ommitted in this split
33 return self.data.next()
34 def init_test(self):
35 pass
36 def next_test(self):
37 return self.data.next()
38 def test_size(self):
39 return 5
40 def store_scores(self, scores):
41 self.scores[self.k] = scores
42
43 def prog(self, clear, train, test):
44 return REPEAT(self.K, [
45 CALL(self.next_fold),
46 clear,
47 train,
48 CALL(self.init_test),
49 BUFFER_REPEAT(self.test_size(),
50 SEQ([ CALL(self.next_test), test])),
51 FILT(self.store_scores) ])
52
53 class PCA_Analysis(object):
54 def __init__(self):
55 self.clear()
56
57 def clear(self):
58 self.mean = 0
59 self.eigvecs=0
60 self.eigvals=0
61 def analyze(self, X):
62 self.mean = numpy.mean(X, axis=0)
63 self.eigvecs=1
64 self.eigvals=1
65 def filt(self, X):
66 return (X - self.mean) * self.eigvecs #TODO: divide by root eigvals?
67 def pseudo_inverse(self, Y):
68 return Y
69
70 class Layer(object):
71 def __init__(self, w):
72 self.w = w
73 def filt(self, x):
74 return self.w*x
75 def clear(self):
76 self.w =0
77
78 def cd1_update(X, layer, lr):
79 # update self.layer from observation X
80 layer.w += X.mean() * lr #TODO: not exactly correct math!
81
82
83 ###############################################################
84 # Example algorithms written in this control flow mini-language
85
86 def main_weave():
87 # Uses weave to demonstrate the interleaving of two bufferings of a single stream
88
89 l = [0]
90 def f(a):
91 print l
92 l[0] += a
93 return l[0]
94
95 print WEAVE(1, [
96 BUFFER_REPEAT(3,CALL(f,1)),
97 BUFFER_REPEAT(5,CALL(f,1)),
98 ]).run()
99
100 def main_weave_popen():
101 # Uses weave and Popen to demonstrate the control of a program with some asynchronous
102 # parallelism
103
104 p = WEAVE(2,[
105 SEQ([POPEN(['sleep', '5']), PRINT('done 1')]),
106 SEQ([POPEN(['sleep', '10']), PRINT('done 2')]),
107 LOOP([
108 CALL(print_obj, 'polling...'),
109 CALL(time.sleep, 1)])])
110 # The LOOP would forever if the WEAVE were not configured to stop after 2 of its elements
111 # complete.
112
113 p.run()
114 # Note that the program can be run multiple times...
115 p.run()
116
117 def main_spawn():
118 # illustate the use of SPAWN to drive a set of control programs
119 # in other processes
120 data1 = {0:"blah data1"}
121 data2 = {1:"foo data2"}
122 p = WEAVE(2,[
123 SPAWN(data1, REPEAT(3, [
124 CALL(importable_fn, data1),
125 PRINT("hello from 1")])),
126 SPAWN(data2, REPEAT(1, [
127 CALL(importable_fn, data2),
128 PRINT("hello from 2")])),
129 LOOP([
130 CALL(print_obj, 'polling...'),
131 CALL(time.sleep, 0.5)])])
132 print 'BEFORE'
133 print data1
134 print data2
135 p.run()
136 print 'AFTER'
137 print data1
138 print data2
139
140 def main_kfold_dbn():
141 # Uses many of the control-flow elements to define the k-fold evaluation of a dbn
142 # The algorithm is not quite right, but the example shows off all of the required
143 # control-flow elements I think.
144
145 # create components
146 dataset = Dataset(numpy.random.RandomState(123).randn(13,1))
147 pca = PCA_Analysis()
148 layer1 = Layer(w=4)
149 layer2 = Layer(w=3)
150 kf = KFold(dataset, K=10)
151
152 pca_batchsize=1000
153 cd_batchsize = 5
154 n_cd_updates_layer1 = 10
155 n_cd_updates_layer2 = 10
156
157 # create algorithm
158
159 train_pca = SEQ([
160 BUFFER_REPEAT(pca_batchsize, CALL(kf.next)),
161 FILT(pca.analyze)])
162
163 train_layer1 = REPEAT(n_cd_updates_layer1, [
164 BUFFER_REPEAT(cd_batchsize, CALL(kf.next)),
165 FILT(pca.filt),
166 FILT(cd1_update, layer=layer1, lr=.01)])
167
168 train_layer2 = REPEAT(n_cd_updates_layer2, [
169 BUFFER_REPEAT(cd_batchsize, CALL(kf.next)),
170 FILT(pca.filt),
171 FILT(layer1.filt),
172 FILT(cd1_update, layer=layer2, lr=.01)])
173
174 kfold_prog = kf.prog(
175 clear = SEQ([ # FRAGMENT 1: this bit is the reset/clear stage
176 CALL(pca.clear),
177 CALL(layer1.clear),
178 CALL(layer2.clear),
179 ]),
180 train = SEQ([
181 train_pca,
182 WEAVE(1, [ # Silly example of how to do debugging / loggin with WEAVE
183 train_layer1,
184 LOOP(CALL(print_obj_attr, layer1, 'w'))]),
185 train_layer2,
186 ]),
187 test=SEQ([
188 FILT(pca.filt), # may want to allow this SEQ to be
189 FILT(layer1.filt), # optimized into a shorter one that
190 FILT(layer2.filt), # compiles these calls together with
191 FILT(numpy.mean)])) # Theano
192
193 pkg1 = dict(prog=kfold_prog, kf=kf)
194 pkg2 = copy.deepcopy(pkg1) # programs can be copied
195
196 try:
197 pkg3 = cPickle.loads(cPickle.dumps(pkg1))
198 except:
199 print >> sys.stderr, "pickling doesnt work, but it can be fixed I think"
200
201 pkg = pkg2
202
203 # running a program updates the variables in its package, but not the other package
204 pkg['prog'].run()
205 print pkg['kf'].scores
206
207
208 if __name__ == '__main__':
209 sys.exit(eval(sys.argv[1]))