pylearn: doc/v2_planning/arch_src/plugin_JB

comparison doc/v2_planning/arch_src/plugin_JB_main.py @ 1212:478bb1f8215c

plugin_JB - added SPAWN control element and demo program

author	James Bergstra <bergstrj@iro.umontreal.ca>
date	Wed, 22 Sep 2010 01:37:55 -0400
parents
children	9fac28d80fb7

comparison

equal deleted inserted replaced

-:e7ac87720fee
+:478bb1f8215c
+"""plugin_JB_main - main functions illustrating control flow library"""
+from plugin_JB import * #TODO: don't do this
+####################################################
+# [Dummy] Components involved in learning algorithms
+class Dataset(object):
+def __init__(self, data):
+self.pos = 0
+self.data = data
+def next(self):
+rval = self.data[self.pos]
+self.pos += 1
+if self.pos == len(self.data):
+self.pos = 0
+return rval
+def seek(self, pos):
+self.pos = pos
+class KFold(object):
+def __init__(self, data, K):
+self.data = data
+self.k = -1
+self.scores = [None]*K
+self.K = K
+def next_fold(self):
+self.k += 1
+self.data.seek(0) # restart the stream
+def next(self):
+#TODO: skip the examples that are ommitted in this split
+return self.data.next()
+def init_test(self):
+pass
+def next_test(self):
+return self.data.next()
+def test_size(self):
+return 5
+def store_scores(self, scores):
+self.scores[self.k] = scores
+def prog(self, clear, train, test):
+return REPEAT(self.K, [
+CALL(self.next_fold),
+clear,
+train,
+CALL(self.init_test),
+BUFFER_REPEAT(self.test_size(),
+SEQ([ CALL(self.next_test), test])),
+FILT(self.store_scores) ])
+class PCA_Analysis(object):
+def __init__(self):
+self.clear()
+def clear(self):
+self.mean = 0
+self.eigvecs=0
+self.eigvals=0
+def analyze(self, X):
+self.mean = numpy.mean(X, axis=0)
+self.eigvecs=1
+self.eigvals=1
+def filt(self, X):
+return (X - self.mean) * self.eigvecs #TODO: divide by root eigvals?
+def pseudo_inverse(self, Y):
+return Y
+class Layer(object):
+def __init__(self, w):
+self.w = w
+def filt(self, x):
+return self.w*x
+def clear(self):
+self.w =0
+def cd1_update(X, layer, lr):
+# update self.layer from observation X
+layer.w += X.mean() * lr #TODO: not exactly correct math!
+###############################################################
+# Example algorithms written in this control flow mini-language
+def main_weave():
+# Uses weave to demonstrate the interleaving of two bufferings of a single stream
+l = [0]
+def f(a):
+print l
+l[0] += a
+return l[0]
+print WEAVE(1, [
+BUFFER_REPEAT(3,CALL(f,1)),
+BUFFER_REPEAT(5,CALL(f,1)),
+]).run()
+def main_weave_popen():
+# Uses weave and Popen to demonstrate the control of a program with some asynchronous
+# parallelism
+p = WEAVE(2,[
+SEQ([POPEN(['sleep', '5']), PRINT('done 1')]),
+SEQ([POPEN(['sleep', '10']), PRINT('done 2')]),
+LOOP([
+CALL(print_obj, 'polling...'),
+CALL(time.sleep, 1)])])
+# The LOOP would forever if the WEAVE were not configured to stop after 2 of its elements
+# complete.
+p.run()
+# Note that the program can be run multiple times...
+p.run()
+def main_spawn():
+# illustate the use of SPAWN to drive a set of control programs
+# in other processes
+data1 = {0:"blah data1"}
+data2 = {1:"foo data2"}
+p = WEAVE(2,[
+SPAWN(data1, REPEAT(3, [
+CALL(importable_fn, data1),
+PRINT("hello from 1")])),
+SPAWN(data2, REPEAT(1, [
+CALL(importable_fn, data2),
+PRINT("hello from 2")])),
+LOOP([
+CALL(print_obj, 'polling...'),
+CALL(time.sleep, 0.5)])])
+print 'BEFORE'
+print data1
+print data2
+p.run()
+print 'AFTER'
+print data1
+print data2
+def main_kfold_dbn():
+# Uses many of the control-flow elements to define the k-fold evaluation of a dbn
+# The algorithm is not quite right, but the example shows off all of the required
+# control-flow elements I think.
+# create components
+dataset = Dataset(numpy.random.RandomState(123).randn(13,1))
+pca = PCA_Analysis()
+layer1 = Layer(w=4)
+layer2 = Layer(w=3)
+kf = KFold(dataset, K=10)
+pca_batchsize=1000
+cd_batchsize = 5
+n_cd_updates_layer1 = 10
+n_cd_updates_layer2 = 10
+# create algorithm
+train_pca = SEQ([
+BUFFER_REPEAT(pca_batchsize, CALL(kf.next)),
+FILT(pca.analyze)])
+train_layer1 = REPEAT(n_cd_updates_layer1, [
+BUFFER_REPEAT(cd_batchsize, CALL(kf.next)),
+FILT(pca.filt),
+FILT(cd1_update, layer=layer1, lr=.01)])
+train_layer2 = REPEAT(n_cd_updates_layer2, [
+BUFFER_REPEAT(cd_batchsize, CALL(kf.next)),
+FILT(pca.filt),
+FILT(layer1.filt),
+FILT(cd1_update, layer=layer2, lr=.01)])
+kfold_prog = kf.prog(
+clear = SEQ([   # FRAGMENT 1: this bit is the reset/clear stage
+CALL(pca.clear),
+CALL(layer1.clear),
+CALL(layer2.clear),
+]),
+train = SEQ([
+train_pca,
+WEAVE(1, [    # Silly example of how to do debugging / loggin with WEAVE
+train_layer1,
+LOOP(CALL(print_obj_attr, layer1, 'w'))]),
+train_layer2,
+]),
+test=SEQ([
+FILT(pca.filt),       # may want to allow this SEQ to be
+FILT(layer1.filt),    # optimized into a shorter one that
+FILT(layer2.filt),    # compiles these calls together with
+FILT(numpy.mean)]))   # Theano
+pkg1 = dict(prog=kfold_prog, kf=kf)
+pkg2 = copy.deepcopy(pkg1)       # programs can be copied
+try:
+pkg3 = cPickle.loads(cPickle.dumps(pkg1))
+except:
+print >> sys.stderr, "pickling doesnt work, but it can be fixed I think"
+pkg = pkg2
+# running a program updates the variables in its package, but not the other package
+pkg['prog'].run()
+print pkg['kf'].scores
+if __name__ == '__main__':
+sys.exit(eval(sys.argv[1]))

Mercurial > pylearn

comparison doc/v2_planning/arch_src/plugin_JB_main.py @ 1212:478bb1f8215c