changeset 1199:98954d8cb92d

v2planning - modifs to plugin_JB
author James Bergstra <bergstrj@iro.umontreal.ca>
date Mon, 20 Sep 2010 02:56:11 -0400
parents 1387771296a8
children acfd5e747a75
files doc/v2_planning/plugin_JB.py
diffstat 1 files changed, 60 insertions(+), 45 deletions(-) [+]
line wrap: on
line diff
--- a/doc/v2_planning/plugin_JB.py	Mon Sep 20 02:34:23 2010 -0400
+++ b/doc/v2_planning/plugin_JB.py	Mon Sep 20 02:56:11 2010 -0400
@@ -95,7 +95,6 @@
     def step(self):
         pass
 
-
 class BUFFER_REPEAT(ELEMENT):
     """
     Accumulate a number of return values into one list / array.
@@ -160,7 +159,7 @@
         else:
             return self.fn(*self.args, **self.kwargs)
     def __getstate__(self):
-        rval = self.__dict__
+        rval = dict(self.__dict__)
         if type(self.fn) is type(self.step): #instancemethod
             fn = rval.pop('fn')
             rval['i fn'] = fn.im_func, fn.im_self, fn.im_class
@@ -170,8 +169,12 @@
             dct['fn'] = type(self.step)(*dct.pop('i fn'))
         self.__dict__.update(dct)
 
-def FILT(*args, **kwargs):
-    return CALL(use_start_arg=True, *args, **kwargs)
+def FILT(fn, **kwargs):
+    """
+    Return a CALL object that uses the return value from the previous CALL as the first and
+    only positional argument.
+    """
+    return CALL(fn, use_start_arg=True, **kwargs)
 
 def CHOOSE(which, options):
     """
@@ -284,6 +287,16 @@
     def store_scores(self, scores):
         self.scores[self.k] = scores
 
+    def prog(self, clear, train, test):
+        return REPEAT(self.K, [
+            CALL(self.next_fold),
+            clear,
+            train,
+            CALL(self.init_test),
+            BUFFER_REPEAT(self.test_size(),
+                SEQ([ CALL(self.next_test), test])),
+            FILT(self.store_scores) ])
+
 class PCA_Analysis(object):
     def __init__(self):
         self.clear()
@@ -316,14 +329,9 @@
 def no_op(*args, **kwargs):
     pass
 
-class cd1_update(object):
-    def __init__(self, layer, lr):
-        self.layer = layer
-        self.lr = lr
-
-    def __call__(self, X):
-        # update self.layer from observation X
-        self.layer.w += X.mean() * self.lr #TODO: not exactly correct math
+def cd1_update(X, layer, lr):
+    # update self.layer from observation X
+    layer.w += X.mean() * lr #TODO: not exactly correct math!
 
 def simple_main():
 
@@ -346,53 +354,60 @@
     layer2 = Layer(w=3)
     kf = KFold(dataset, K=10)
 
+    pca_batchsize=1000
+    cd_batchsize = 5
+    n_cd_updates_layer1 = 10
+    n_cd_updates_layer2 = 10
+
     # create algorithm
 
     train_pca = SEQ([
-        BUFFER_REPEAT(1000, CALL(kf.next)), 
+        BUFFER_REPEAT(pca_batchsize, CALL(kf.next)), 
         FILT(pca.analyze)])
 
-    train_layer1 = REPEAT(10, [
-        BUFFER_REPEAT(10, CALL(kf.next)),
+    train_layer1 = REPEAT(n_cd_updates_layer1, [
+        BUFFER_REPEAT(cd_batchsize, CALL(kf.next)),
         FILT(pca.filt), 
-        FILT(cd1_update(layer1, lr=.01))])
+        FILT(cd1_update, layer=layer1, lr=.01)])
 
-    train_layer2 = REPEAT(10, [
-        BUFFER_REPEAT(10, CALL(kf.next)),
+    train_layer2 = REPEAT(n_cd_updates_layer2, [
+        BUFFER_REPEAT(cd_batchsize, CALL(kf.next)),
         FILT(pca.filt), 
         FILT(layer1.filt),
-        FILT(cd1_update(layer2, lr=.01))])
-
-    train_prog = SEQ([
-        train_pca,
-        WEAVE([
-            train_layer1, 
-            LOOP(CALL(print_obj_attr, layer1, 'w'))]),
-        train_layer2,
-        ])
+        FILT(cd1_update, layer=layer2, lr=.01)])
 
-    kfold_prog = REPEAT(10, [
-        CALL(kf.next_fold),
-        CALL(pca.clear),
-        CALL(layer1.clear),
-        CALL(layer2.clear),
-        train_prog,
-        CALL(kf.init_test),
-        BUFFER_REPEAT(kf.test_size(),
-            SEQ([
-                CALL(kf.next_test),  
+    kfold_prog = kf.prog(
+            clear = SEQ([   # FRAGMENT 1: this bit is the reset/clear stage
+                CALL(pca.clear),
+                CALL(layer1.clear),
+                CALL(layer2.clear),
+                ]),
+            train = SEQ([
+                train_pca,
+                WEAVE([    # Silly example of how to do debugging / loggin with WEAVE
+                    train_layer1, 
+                    LOOP(CALL(print_obj_attr, layer1, 'w'))]),
+                train_layer2,
+                ]),
+            test=SEQ([
                 FILT(pca.filt),       # may want to allow this SEQ to be 
                 FILT(layer1.filt),    # optimized into a shorter one that
-                FILT(layer2.filt),
-                FILT(numpy.mean)])), # chains together theano graphs
-        FILT(kf.store_scores),
-        ])
+                FILT(layer2.filt),    # compiles these calls together with 
+                FILT(numpy.mean)]))   # Theano
+
+    pkg1 = dict(prog=kfold_prog, kf=kf)
+    pkg2 = copy.deepcopy(pkg1)       # programs can be copied
 
-    vm = VirtualMachine(kfold_prog)
+    try:
+        pkg3 = cPickle.loads(cPickle.dumps(pkg1)) 
+    except:
+        print >> sys.stderr, "pickling doesnt work, but it can be fixed I think"
 
-    #vm2 = copy.deepcopy(vm)
-    vm.run(n_steps=200000)
-    print kf.scores
+    pkg = pkg2
+
+    # running a program updates the variables in its package, but not the other package
+    VirtualMachine(pkg['prog']).run()
+    print pkg['kf'].scores
 
 
 if __name__ == '__main__':