diff doc/v2_planning/arch_src/plugin_JB_main.py @ 1212:478bb1f8215c

plugin_JB - added SPAWN control element and demo program
author James Bergstra <bergstrj@iro.umontreal.ca>
date Wed, 22 Sep 2010 01:37:55 -0400
parents
children 9fac28d80fb7
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/v2_planning/arch_src/plugin_JB_main.py	Wed Sep 22 01:37:55 2010 -0400
@@ -0,0 +1,209 @@
+"""plugin_JB_main - main functions illustrating control flow library"""
+
+from plugin_JB import * #TODO: don't do this
+
+
+####################################################
+# [Dummy] Components involved in learning algorithms
+
+class Dataset(object):
+    def __init__(self, data):
+        self.pos = 0
+        self.data = data
+    def next(self):
+        rval = self.data[self.pos]
+        self.pos += 1
+        if self.pos == len(self.data):
+            self.pos = 0
+        return rval
+    def seek(self, pos):
+        self.pos = pos
+
+class KFold(object):
+    def __init__(self, data, K):
+        self.data = data
+        self.k = -1
+        self.scores = [None]*K
+        self.K = K
+    def next_fold(self):
+        self.k += 1
+        self.data.seek(0) # restart the stream
+    def next(self):
+        #TODO: skip the examples that are ommitted in this split
+        return self.data.next()
+    def init_test(self):
+        pass
+    def next_test(self):
+        return self.data.next()
+    def test_size(self):
+        return 5
+    def store_scores(self, scores):
+        self.scores[self.k] = scores
+
+    def prog(self, clear, train, test):
+        return REPEAT(self.K, [
+            CALL(self.next_fold),
+            clear,
+            train,
+            CALL(self.init_test),
+            BUFFER_REPEAT(self.test_size(),
+                SEQ([ CALL(self.next_test), test])),
+            FILT(self.store_scores) ])
+
+class PCA_Analysis(object):
+    def __init__(self):
+        self.clear()
+
+    def clear(self):
+        self.mean = 0
+        self.eigvecs=0
+        self.eigvals=0
+    def analyze(self, X):
+        self.mean = numpy.mean(X, axis=0)
+        self.eigvecs=1
+        self.eigvals=1
+    def filt(self, X):
+        return (X - self.mean) * self.eigvecs #TODO: divide by root eigvals?
+    def pseudo_inverse(self, Y):
+        return Y
+
+class Layer(object):
+    def __init__(self, w):
+        self.w = w
+    def filt(self, x):
+        return self.w*x
+    def clear(self):
+        self.w =0
+
+def cd1_update(X, layer, lr):
+    # update self.layer from observation X
+    layer.w += X.mean() * lr #TODO: not exactly correct math!
+
+
+###############################################################
+# Example algorithms written in this control flow mini-language
+
+def main_weave():
+    # Uses weave to demonstrate the interleaving of two bufferings of a single stream
+
+    l = [0]
+    def f(a):
+        print l
+        l[0] += a
+        return l[0]
+
+    print WEAVE(1, [
+        BUFFER_REPEAT(3,CALL(f,1)),
+        BUFFER_REPEAT(5,CALL(f,1)),
+        ]).run()
+
+def main_weave_popen():
+    # Uses weave and Popen to demonstrate the control of a program with some asynchronous
+    # parallelism
+
+    p = WEAVE(2,[
+        SEQ([POPEN(['sleep', '5']), PRINT('done 1')]),
+        SEQ([POPEN(['sleep', '10']), PRINT('done 2')]),
+        LOOP([ 
+            CALL(print_obj, 'polling...'),
+            CALL(time.sleep, 1)])])
+    # The LOOP would forever if the WEAVE were not configured to stop after 2 of its elements
+    # complete.
+
+    p.run()
+    # Note that the program can be run multiple times...
+    p.run()
+
+def main_spawn():
+    # illustate the use of SPAWN to drive a set of control programs 
+    # in other processes
+    data1 = {0:"blah data1"}
+    data2 = {1:"foo data2"}
+    p = WEAVE(2,[
+        SPAWN(data1, REPEAT(3, [
+            CALL(importable_fn, data1), 
+            PRINT("hello from 1")])),
+        SPAWN(data2, REPEAT(1, [
+            CALL(importable_fn, data2), 
+            PRINT("hello from 2")])),
+        LOOP([ 
+            CALL(print_obj, 'polling...'),
+            CALL(time.sleep, 0.5)])])
+    print 'BEFORE'
+    print data1
+    print data2
+    p.run()
+    print 'AFTER'
+    print data1
+    print data2
+
+def main_kfold_dbn():
+    # Uses many of the control-flow elements to define the k-fold evaluation of a dbn
+    # The algorithm is not quite right, but the example shows off all of the required
+    # control-flow elements I think.
+
+    # create components
+    dataset = Dataset(numpy.random.RandomState(123).randn(13,1))
+    pca = PCA_Analysis()
+    layer1 = Layer(w=4)
+    layer2 = Layer(w=3)
+    kf = KFold(dataset, K=10)
+
+    pca_batchsize=1000
+    cd_batchsize = 5
+    n_cd_updates_layer1 = 10
+    n_cd_updates_layer2 = 10
+
+    # create algorithm
+
+    train_pca = SEQ([
+        BUFFER_REPEAT(pca_batchsize, CALL(kf.next)), 
+        FILT(pca.analyze)])
+
+    train_layer1 = REPEAT(n_cd_updates_layer1, [
+        BUFFER_REPEAT(cd_batchsize, CALL(kf.next)),
+        FILT(pca.filt), 
+        FILT(cd1_update, layer=layer1, lr=.01)])
+
+    train_layer2 = REPEAT(n_cd_updates_layer2, [
+        BUFFER_REPEAT(cd_batchsize, CALL(kf.next)),
+        FILT(pca.filt), 
+        FILT(layer1.filt),
+        FILT(cd1_update, layer=layer2, lr=.01)])
+
+    kfold_prog = kf.prog(
+            clear = SEQ([   # FRAGMENT 1: this bit is the reset/clear stage
+                CALL(pca.clear),
+                CALL(layer1.clear),
+                CALL(layer2.clear),
+                ]),
+            train = SEQ([
+                train_pca,
+                WEAVE(1, [    # Silly example of how to do debugging / loggin with WEAVE
+                    train_layer1, 
+                    LOOP(CALL(print_obj_attr, layer1, 'w'))]),
+                train_layer2,
+                ]),
+            test=SEQ([
+                FILT(pca.filt),       # may want to allow this SEQ to be 
+                FILT(layer1.filt),    # optimized into a shorter one that
+                FILT(layer2.filt),    # compiles these calls together with 
+                FILT(numpy.mean)]))   # Theano
+
+    pkg1 = dict(prog=kfold_prog, kf=kf)
+    pkg2 = copy.deepcopy(pkg1)       # programs can be copied
+
+    try:
+        pkg3 = cPickle.loads(cPickle.dumps(pkg1)) 
+    except:
+        print >> sys.stderr, "pickling doesnt work, but it can be fixed I think"
+
+    pkg = pkg2
+
+    # running a program updates the variables in its package, but not the other package
+    pkg['prog'].run()
+    print pkg['kf'].scores
+
+
+if __name__ == '__main__':
+    sys.exit(eval(sys.argv[1]))