Mercurial > pylearn
diff doc/v2_planning/arch_src/plugin_JB_main.py @ 1212:478bb1f8215c
plugin_JB - added SPAWN control element and demo program
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Wed, 22 Sep 2010 01:37:55 -0400 |
parents | |
children | 9fac28d80fb7 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/v2_planning/arch_src/plugin_JB_main.py Wed Sep 22 01:37:55 2010 -0400 @@ -0,0 +1,209 @@ +"""plugin_JB_main - main functions illustrating control flow library""" + +from plugin_JB import * #TODO: don't do this + + +#################################################### +# [Dummy] Components involved in learning algorithms + +class Dataset(object): + def __init__(self, data): + self.pos = 0 + self.data = data + def next(self): + rval = self.data[self.pos] + self.pos += 1 + if self.pos == len(self.data): + self.pos = 0 + return rval + def seek(self, pos): + self.pos = pos + +class KFold(object): + def __init__(self, data, K): + self.data = data + self.k = -1 + self.scores = [None]*K + self.K = K + def next_fold(self): + self.k += 1 + self.data.seek(0) # restart the stream + def next(self): + #TODO: skip the examples that are ommitted in this split + return self.data.next() + def init_test(self): + pass + def next_test(self): + return self.data.next() + def test_size(self): + return 5 + def store_scores(self, scores): + self.scores[self.k] = scores + + def prog(self, clear, train, test): + return REPEAT(self.K, [ + CALL(self.next_fold), + clear, + train, + CALL(self.init_test), + BUFFER_REPEAT(self.test_size(), + SEQ([ CALL(self.next_test), test])), + FILT(self.store_scores) ]) + +class PCA_Analysis(object): + def __init__(self): + self.clear() + + def clear(self): + self.mean = 0 + self.eigvecs=0 + self.eigvals=0 + def analyze(self, X): + self.mean = numpy.mean(X, axis=0) + self.eigvecs=1 + self.eigvals=1 + def filt(self, X): + return (X - self.mean) * self.eigvecs #TODO: divide by root eigvals? + def pseudo_inverse(self, Y): + return Y + +class Layer(object): + def __init__(self, w): + self.w = w + def filt(self, x): + return self.w*x + def clear(self): + self.w =0 + +def cd1_update(X, layer, lr): + # update self.layer from observation X + layer.w += X.mean() * lr #TODO: not exactly correct math! + + +############################################################### +# Example algorithms written in this control flow mini-language + +def main_weave(): + # Uses weave to demonstrate the interleaving of two bufferings of a single stream + + l = [0] + def f(a): + print l + l[0] += a + return l[0] + + print WEAVE(1, [ + BUFFER_REPEAT(3,CALL(f,1)), + BUFFER_REPEAT(5,CALL(f,1)), + ]).run() + +def main_weave_popen(): + # Uses weave and Popen to demonstrate the control of a program with some asynchronous + # parallelism + + p = WEAVE(2,[ + SEQ([POPEN(['sleep', '5']), PRINT('done 1')]), + SEQ([POPEN(['sleep', '10']), PRINT('done 2')]), + LOOP([ + CALL(print_obj, 'polling...'), + CALL(time.sleep, 1)])]) + # The LOOP would forever if the WEAVE were not configured to stop after 2 of its elements + # complete. + + p.run() + # Note that the program can be run multiple times... + p.run() + +def main_spawn(): + # illustate the use of SPAWN to drive a set of control programs + # in other processes + data1 = {0:"blah data1"} + data2 = {1:"foo data2"} + p = WEAVE(2,[ + SPAWN(data1, REPEAT(3, [ + CALL(importable_fn, data1), + PRINT("hello from 1")])), + SPAWN(data2, REPEAT(1, [ + CALL(importable_fn, data2), + PRINT("hello from 2")])), + LOOP([ + CALL(print_obj, 'polling...'), + CALL(time.sleep, 0.5)])]) + print 'BEFORE' + print data1 + print data2 + p.run() + print 'AFTER' + print data1 + print data2 + +def main_kfold_dbn(): + # Uses many of the control-flow elements to define the k-fold evaluation of a dbn + # The algorithm is not quite right, but the example shows off all of the required + # control-flow elements I think. + + # create components + dataset = Dataset(numpy.random.RandomState(123).randn(13,1)) + pca = PCA_Analysis() + layer1 = Layer(w=4) + layer2 = Layer(w=3) + kf = KFold(dataset, K=10) + + pca_batchsize=1000 + cd_batchsize = 5 + n_cd_updates_layer1 = 10 + n_cd_updates_layer2 = 10 + + # create algorithm + + train_pca = SEQ([ + BUFFER_REPEAT(pca_batchsize, CALL(kf.next)), + FILT(pca.analyze)]) + + train_layer1 = REPEAT(n_cd_updates_layer1, [ + BUFFER_REPEAT(cd_batchsize, CALL(kf.next)), + FILT(pca.filt), + FILT(cd1_update, layer=layer1, lr=.01)]) + + train_layer2 = REPEAT(n_cd_updates_layer2, [ + BUFFER_REPEAT(cd_batchsize, CALL(kf.next)), + FILT(pca.filt), + FILT(layer1.filt), + FILT(cd1_update, layer=layer2, lr=.01)]) + + kfold_prog = kf.prog( + clear = SEQ([ # FRAGMENT 1: this bit is the reset/clear stage + CALL(pca.clear), + CALL(layer1.clear), + CALL(layer2.clear), + ]), + train = SEQ([ + train_pca, + WEAVE(1, [ # Silly example of how to do debugging / loggin with WEAVE + train_layer1, + LOOP(CALL(print_obj_attr, layer1, 'w'))]), + train_layer2, + ]), + test=SEQ([ + FILT(pca.filt), # may want to allow this SEQ to be + FILT(layer1.filt), # optimized into a shorter one that + FILT(layer2.filt), # compiles these calls together with + FILT(numpy.mean)])) # Theano + + pkg1 = dict(prog=kfold_prog, kf=kf) + pkg2 = copy.deepcopy(pkg1) # programs can be copied + + try: + pkg3 = cPickle.loads(cPickle.dumps(pkg1)) + except: + print >> sys.stderr, "pickling doesnt work, but it can be fixed I think" + + pkg = pkg2 + + # running a program updates the variables in its package, but not the other package + pkg['prog'].run() + print pkg['kf'].scores + + +if __name__ == '__main__': + sys.exit(eval(sys.argv[1]))