view doc/v2_planning/arch_src/plugin_JB_main.py @ 1212:478bb1f8215c

plugin_JB - added SPAWN control element and demo program
author James Bergstra <bergstrj@iro.umontreal.ca>
date Wed, 22 Sep 2010 01:37:55 -0400
parents
children 9fac28d80fb7
line wrap: on
line source

"""plugin_JB_main - main functions illustrating control flow library"""

from plugin_JB import * #TODO: don't do this


####################################################
# [Dummy] Components involved in learning algorithms

class Dataset(object):
    def __init__(self, data):
        self.pos = 0
        self.data = data
    def next(self):
        rval = self.data[self.pos]
        self.pos += 1
        if self.pos == len(self.data):
            self.pos = 0
        return rval
    def seek(self, pos):
        self.pos = pos

class KFold(object):
    def __init__(self, data, K):
        self.data = data
        self.k = -1
        self.scores = [None]*K
        self.K = K
    def next_fold(self):
        self.k += 1
        self.data.seek(0) # restart the stream
    def next(self):
        #TODO: skip the examples that are ommitted in this split
        return self.data.next()
    def init_test(self):
        pass
    def next_test(self):
        return self.data.next()
    def test_size(self):
        return 5
    def store_scores(self, scores):
        self.scores[self.k] = scores

    def prog(self, clear, train, test):
        return REPEAT(self.K, [
            CALL(self.next_fold),
            clear,
            train,
            CALL(self.init_test),
            BUFFER_REPEAT(self.test_size(),
                SEQ([ CALL(self.next_test), test])),
            FILT(self.store_scores) ])

class PCA_Analysis(object):
    def __init__(self):
        self.clear()

    def clear(self):
        self.mean = 0
        self.eigvecs=0
        self.eigvals=0
    def analyze(self, X):
        self.mean = numpy.mean(X, axis=0)
        self.eigvecs=1
        self.eigvals=1
    def filt(self, X):
        return (X - self.mean) * self.eigvecs #TODO: divide by root eigvals?
    def pseudo_inverse(self, Y):
        return Y

class Layer(object):
    def __init__(self, w):
        self.w = w
    def filt(self, x):
        return self.w*x
    def clear(self):
        self.w =0

def cd1_update(X, layer, lr):
    # update self.layer from observation X
    layer.w += X.mean() * lr #TODO: not exactly correct math!


###############################################################
# Example algorithms written in this control flow mini-language

def main_weave():
    # Uses weave to demonstrate the interleaving of two bufferings of a single stream

    l = [0]
    def f(a):
        print l
        l[0] += a
        return l[0]

    print WEAVE(1, [
        BUFFER_REPEAT(3,CALL(f,1)),
        BUFFER_REPEAT(5,CALL(f,1)),
        ]).run()

def main_weave_popen():
    # Uses weave and Popen to demonstrate the control of a program with some asynchronous
    # parallelism

    p = WEAVE(2,[
        SEQ([POPEN(['sleep', '5']), PRINT('done 1')]),
        SEQ([POPEN(['sleep', '10']), PRINT('done 2')]),
        LOOP([ 
            CALL(print_obj, 'polling...'),
            CALL(time.sleep, 1)])])
    # The LOOP would forever if the WEAVE were not configured to stop after 2 of its elements
    # complete.

    p.run()
    # Note that the program can be run multiple times...
    p.run()

def main_spawn():
    # illustate the use of SPAWN to drive a set of control programs 
    # in other processes
    data1 = {0:"blah data1"}
    data2 = {1:"foo data2"}
    p = WEAVE(2,[
        SPAWN(data1, REPEAT(3, [
            CALL(importable_fn, data1), 
            PRINT("hello from 1")])),
        SPAWN(data2, REPEAT(1, [
            CALL(importable_fn, data2), 
            PRINT("hello from 2")])),
        LOOP([ 
            CALL(print_obj, 'polling...'),
            CALL(time.sleep, 0.5)])])
    print 'BEFORE'
    print data1
    print data2
    p.run()
    print 'AFTER'
    print data1
    print data2

def main_kfold_dbn():
    # Uses many of the control-flow elements to define the k-fold evaluation of a dbn
    # The algorithm is not quite right, but the example shows off all of the required
    # control-flow elements I think.

    # create components
    dataset = Dataset(numpy.random.RandomState(123).randn(13,1))
    pca = PCA_Analysis()
    layer1 = Layer(w=4)
    layer2 = Layer(w=3)
    kf = KFold(dataset, K=10)

    pca_batchsize=1000
    cd_batchsize = 5
    n_cd_updates_layer1 = 10
    n_cd_updates_layer2 = 10

    # create algorithm

    train_pca = SEQ([
        BUFFER_REPEAT(pca_batchsize, CALL(kf.next)), 
        FILT(pca.analyze)])

    train_layer1 = REPEAT(n_cd_updates_layer1, [
        BUFFER_REPEAT(cd_batchsize, CALL(kf.next)),
        FILT(pca.filt), 
        FILT(cd1_update, layer=layer1, lr=.01)])

    train_layer2 = REPEAT(n_cd_updates_layer2, [
        BUFFER_REPEAT(cd_batchsize, CALL(kf.next)),
        FILT(pca.filt), 
        FILT(layer1.filt),
        FILT(cd1_update, layer=layer2, lr=.01)])

    kfold_prog = kf.prog(
            clear = SEQ([   # FRAGMENT 1: this bit is the reset/clear stage
                CALL(pca.clear),
                CALL(layer1.clear),
                CALL(layer2.clear),
                ]),
            train = SEQ([
                train_pca,
                WEAVE(1, [    # Silly example of how to do debugging / loggin with WEAVE
                    train_layer1, 
                    LOOP(CALL(print_obj_attr, layer1, 'w'))]),
                train_layer2,
                ]),
            test=SEQ([
                FILT(pca.filt),       # may want to allow this SEQ to be 
                FILT(layer1.filt),    # optimized into a shorter one that
                FILT(layer2.filt),    # compiles these calls together with 
                FILT(numpy.mean)]))   # Theano

    pkg1 = dict(prog=kfold_prog, kf=kf)
    pkg2 = copy.deepcopy(pkg1)       # programs can be copied

    try:
        pkg3 = cPickle.loads(cPickle.dumps(pkg1)) 
    except:
        print >> sys.stderr, "pickling doesnt work, but it can be fixed I think"

    pkg = pkg2

    # running a program updates the variables in its package, but not the other package
    pkg['prog'].run()
    print pkg['kf'].scores


if __name__ == '__main__':
    sys.exit(eval(sys.argv[1]))