Mercurial > pylearn
view doc/v2_planning/arch_src/plugin_JB_main.py @ 1419:cff305ad9f60
TensorFnDataset - added x_ attribute that caches the dataset function return
value, but does not get pickled.
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Fri, 04 Feb 2011 16:05:22 -0500 |
parents | 9fac28d80fb7 |
children |
line wrap: on
line source
"""plugin_JB_main - main functions illustrating control flow library""" from plugin_JB import * #TODO: don't do this #################################################### # [Dummy] Components involved in learning algorithms class Dataset(object): def __init__(self, data): self.pos = 0 self.data = data def next(self, n=1): rval = self.data[self.pos:self.pos+n] self.pos += n if self.pos >= len(self.data): self.pos = 0 return rval def seek(self, pos): self.pos = pos class KFold(object): def __init__(self, data, K): self.data = data self.k = -1 self.scores = [None]*K self.K = K def next_fold(self): self.k += 1 self.data.seek(0) # restart the stream def next(self, n=1): #TODO: skip the examples that are ommitted in this split return self.data.next(n) def init_test(self): pass def next_test(self, n=1): return self.data.next(n) def test_size(self): return 5 def store_scores(self, scores): self.scores[self.k] = scores def prog(self, clear, train, test, test_data_reg, test_counter_reg, test_scores_reg): return REPEAT(self.K, SEQ([ CALL(self.next_fold), clear, train, CALL(self.init_test), REPEAT(self.test_size(), SEQ([ CALL(self.next_test, _set=test_data_reg), test]), counter=test_counter_reg), CALL(self.store_scores, test_scores_reg)])) class PCA_Analysis(object): def __init__(self): self.clear() def clear(self): self.mean = 0 self.eigvecs=0 self.eigvals=0 def analyze(self, X): self.mean = numpy.mean(X, axis=0) self.eigvecs=1 self.eigvals=1 def filt(self, X): return (X - self.mean) * self.eigvecs #TODO: divide by root eigvals? def pseudo_inverse(self, Y): return Y class Layer(object): def __init__(self, w): self.w = w def filt(self, x): return self.w*x def clear(self): self.w =0 def cd1_update(X, layer, lr): # update self.layer from observation X layer.w += X.mean() * lr #TODO: not exactly correct math! ############################################################### # Example algorithms written in this control flow mini-language def main_weave(): # Uses weave to demonstrate the interleaving of two bufferings of a single stream l = [0] def f(a): print l l[0] += a return l[0] print WEAVE(1, [ REPEAT(3,CALL(f,1)), REPEAT(5,CALL(f,1)), ]).run() def main_weave_popen(): # Uses weave and Popen to demonstrate the control of a program with some asynchronous # parallelism p = WEAVE(2,[ SEQ([POPEN(['sleep', '5']), PRINT('done 1')]), SEQ([POPEN(['sleep', '10']), PRINT('done 2')]), LOOP(SEQ([ CALL(print_obj, 'polling...'), CALL(time.sleep, 1)]))]) # The LOOP would forever if the WEAVE were not configured to stop after 2 of its elements # complete. p.run() # Note that the program can be run multiple times... p.run() def main_spawn(): # illustate the use of SPAWN to drive a set of control programs # in other processes data1 = {0:"blah data1"} data2 = {1:"foo data2"} p = WEAVE(2,[ SPAWN(data1, REPEAT(3, SEQ([ CALL(importable_fn, data1), PRINT("hello from 1")]))), SPAWN(data2, REPEAT(1, SEQ([ CALL(importable_fn, data2), PRINT("hello from 2")]))), LOOP(SEQ([ CALL(print_obj, 'polling...'), CALL(time.sleep, 0.5)]))]) print 'BEFORE' print data1 print data2 p.run() print 'AFTER' print data1 print data2 def main_kfold_dbn(): # Uses many of the control-flow elements to define the k-fold evaluation of a dbn # The algorithm is not quite right, but the example shows off all of the required # control-flow elements I think. # create components dataset = Dataset(numpy.random.RandomState(123).randn(13,1)) pca = PCA_Analysis() layer1 = Layer(w=4) layer2 = Layer(w=3) kf = KFold(dataset, K=10) reg = Registers() pca_batchsize=1000 cd_batchsize = 5 n_cd_updates_layer1 = 10 n_cd_updates_layer2 = 10 # create algorithm train_pca = SEQ([ CALL(kf.next, pca_batchsize, _set=reg('x')), CALL(pca.analyze, reg('x'))]) train_layer1 = REPEAT(n_cd_updates_layer1, SEQ([ CALL(kf.next, cd_batchsize, _set=reg('x')), CALL(pca.filt, reg('x'), _set=reg('x')), CALL(cd1_update, reg('x'), layer=layer1, lr=.01)])) train_layer2 = REPEAT(n_cd_updates_layer2, SEQ([ CALL(kf.next, cd_batchsize, _set=reg('x')), CALL(pca.filt, reg('x'), _set=reg('x')), CALL(layer1.filt, reg('x'), _set=reg('x')), CALL(cd1_update, reg('x'), layer=layer2, lr=.01)])) kfold_prog = kf.prog( clear = SEQ([ # FRAGMENT 1: this bit is the reset/clear stage CALL(pca.clear), CALL(layer1.clear), CALL(layer2.clear), ]), train = SEQ([ train_pca, WEAVE(1, [ # Silly example of how to do debugging / loggin with WEAVE train_layer1, LOOP(PRINT(reg('x')))]), train_layer2, ]), test=SEQ([ CALL(pca.filt, reg('testx'), _set=reg('x')), CALL(layer1.filt, reg('x'), _set=reg('x')), CALL(layer2.filt, reg('x'), _set=reg('x')), CALL(numpy.mean, reg('x'), _set=reg('score'))]), test_data_reg=reg('testx'), test_counter_reg=reg('i'), test_scores_reg=reg('score')) pkg1 = dict(prog=kfold_prog, kf=kf) pkg2 = copy.deepcopy(pkg1) # programs can be copied try: pkg3 = cPickle.loads(cPickle.dumps(pkg1)) except: print >> sys.stderr, "pickling doesnt work, but it can be fixed I think" pkg = pkg2 # running a program updates the variables in its package, but not the other package pkg['prog'].run() print pkg['kf'].scores if __name__ == '__main__': try: sys.argv[1] except: print """You have to tell which main function to use, try: - python plugin_JB_main.py 'main_kfold_dbn()' - python plugin_JB_main.py 'main_weave()' - python plugin_JB_main.py 'main_weave_popen()' - python plugin_JB_main.py 'main_spawn()' """ sys.exit(1) sys.exit(eval(sys.argv[1]))