view doc/v2_planning/arch_src/plugin_JB_main.py @ 1419:cff305ad9f60

TensorFnDataset - added x_ attribute that caches the dataset function return value, but does not get pickled.
author James Bergstra <bergstrj@iro.umontreal.ca>
date Fri, 04 Feb 2011 16:05:22 -0500
parents 9fac28d80fb7
children
line wrap: on
line source

"""plugin_JB_main - main functions illustrating control flow library"""

from plugin_JB import * #TODO: don't do this


####################################################
# [Dummy] Components involved in learning algorithms

class Dataset(object):
    def __init__(self, data):
        self.pos = 0
        self.data = data
    def next(self, n=1):
        rval = self.data[self.pos:self.pos+n]
        self.pos += n
        if self.pos >= len(self.data):
            self.pos = 0
        return rval
    def seek(self, pos):
        self.pos = pos

class KFold(object):
    def __init__(self, data, K):
        self.data = data
        self.k = -1
        self.scores = [None]*K
        self.K = K
    def next_fold(self):
        self.k += 1
        self.data.seek(0) # restart the stream
    def next(self, n=1):
        #TODO: skip the examples that are ommitted in this split
        return self.data.next(n)
    def init_test(self):
        pass
    def next_test(self, n=1):
        return self.data.next(n)
    def test_size(self):
        return 5
    def store_scores(self, scores):
        self.scores[self.k] = scores

    def prog(self, clear, train, test, test_data_reg, test_counter_reg, test_scores_reg):
        return REPEAT(self.K, SEQ([
            CALL(self.next_fold),
            clear,
            train,
            CALL(self.init_test),
            REPEAT(self.test_size(), SEQ([
                CALL(self.next_test, _set=test_data_reg), 
                test]),
                counter=test_counter_reg),
            CALL(self.store_scores, test_scores_reg)]))

class PCA_Analysis(object):
    def __init__(self):
        self.clear()

    def clear(self):
        self.mean = 0
        self.eigvecs=0
        self.eigvals=0
    def analyze(self, X):
        self.mean = numpy.mean(X, axis=0)
        self.eigvecs=1
        self.eigvals=1
    def filt(self, X):
        return (X - self.mean) * self.eigvecs #TODO: divide by root eigvals?
    def pseudo_inverse(self, Y):
        return Y

class Layer(object):
    def __init__(self, w):
        self.w = w
    def filt(self, x):
        return self.w*x
    def clear(self):
        self.w =0

def cd1_update(X, layer, lr):
    # update self.layer from observation X
    layer.w += X.mean() * lr #TODO: not exactly correct math!


###############################################################
# Example algorithms written in this control flow mini-language

def main_weave():
    # Uses weave to demonstrate the interleaving of two bufferings of a single stream

    l = [0]
    def f(a):
        print l
        l[0] += a
        return l[0]

    print WEAVE(1, [
        REPEAT(3,CALL(f,1)),
        REPEAT(5,CALL(f,1)),
        ]).run()

def main_weave_popen():
    # Uses weave and Popen to demonstrate the control of a program with some asynchronous
    # parallelism

    p = WEAVE(2,[
        SEQ([POPEN(['sleep', '5']), PRINT('done 1')]),
        SEQ([POPEN(['sleep', '10']), PRINT('done 2')]),
        LOOP(SEQ([ 
            CALL(print_obj, 'polling...'),
            CALL(time.sleep, 1)]))])
    # The LOOP would forever if the WEAVE were not configured to stop after 2 of its elements
    # complete.

    p.run()
    # Note that the program can be run multiple times...
    p.run()

def main_spawn():
    # illustate the use of SPAWN to drive a set of control programs 
    # in other processes
    data1 = {0:"blah data1"}
    data2 = {1:"foo data2"}
    p = WEAVE(2,[
        SPAWN(data1, REPEAT(3, SEQ([
            CALL(importable_fn, data1), 
            PRINT("hello from 1")]))),
        SPAWN(data2, REPEAT(1, SEQ([
            CALL(importable_fn, data2), 
            PRINT("hello from 2")]))),
        LOOP(SEQ([ 
            CALL(print_obj, 'polling...'),
            CALL(time.sleep, 0.5)]))])
    print 'BEFORE'
    print data1
    print data2
    p.run()
    print 'AFTER'
    print data1
    print data2

def main_kfold_dbn():
    # Uses many of the control-flow elements to define the k-fold evaluation of a dbn
    # The algorithm is not quite right, but the example shows off all of the required
    # control-flow elements I think.

    # create components
    dataset = Dataset(numpy.random.RandomState(123).randn(13,1))
    pca = PCA_Analysis()
    layer1 = Layer(w=4)
    layer2 = Layer(w=3)
    kf = KFold(dataset, K=10)
    reg = Registers()

    pca_batchsize=1000
    cd_batchsize = 5
    n_cd_updates_layer1 = 10
    n_cd_updates_layer2 = 10

    # create algorithm

    train_pca = SEQ([
        CALL(kf.next, pca_batchsize, _set=reg('x')), 
        CALL(pca.analyze, reg('x'))])

    train_layer1 = REPEAT(n_cd_updates_layer1, SEQ([
        CALL(kf.next, cd_batchsize, _set=reg('x')),
        CALL(pca.filt, reg('x'), _set=reg('x')), 
        CALL(cd1_update, reg('x'), layer=layer1, lr=.01)]))

    train_layer2 = REPEAT(n_cd_updates_layer2, SEQ([
        CALL(kf.next, cd_batchsize, _set=reg('x')),
        CALL(pca.filt, reg('x'), _set=reg('x')), 
        CALL(layer1.filt, reg('x'), _set=reg('x')),
        CALL(cd1_update, reg('x'), layer=layer2, lr=.01)]))

    kfold_prog = kf.prog(
            clear = SEQ([   # FRAGMENT 1: this bit is the reset/clear stage
                CALL(pca.clear),
                CALL(layer1.clear),
                CALL(layer2.clear),
                ]),
            train = SEQ([
                train_pca,
                WEAVE(1, [    # Silly example of how to do debugging / loggin with WEAVE
                    train_layer1, 
                    LOOP(PRINT(reg('x')))]),
                train_layer2,
                ]),
            test=SEQ([
                CALL(pca.filt, reg('testx'), _set=reg('x')),  
                CALL(layer1.filt, reg('x'), _set=reg('x')),
                CALL(layer2.filt, reg('x'), _set=reg('x')),
                CALL(numpy.mean, reg('x'), _set=reg('score'))]),
            test_data_reg=reg('testx'),
            test_counter_reg=reg('i'),
            test_scores_reg=reg('score'))

    pkg1 = dict(prog=kfold_prog, kf=kf)
    pkg2 = copy.deepcopy(pkg1)       # programs can be copied

    try:
        pkg3 = cPickle.loads(cPickle.dumps(pkg1)) 
    except:
        print >> sys.stderr, "pickling doesnt work, but it can be fixed I think"

    pkg = pkg2

    # running a program updates the variables in its package, but not the other package
    pkg['prog'].run()
    print pkg['kf'].scores


if __name__ == '__main__':
    try:
        sys.argv[1]
    except:
        print """You have to tell which main function to use, try:
    - python plugin_JB_main.py 'main_kfold_dbn()'
    - python plugin_JB_main.py 'main_weave()'
    - python plugin_JB_main.py 'main_weave_popen()'
    - python plugin_JB_main.py 'main_spawn()'
        """
        sys.exit(1)
    sys.exit(eval(sys.argv[1]))