# HG changeset patch # User Razvan Pascanu # Date 1285166379 14400 # Node ID 681b5e7e3b81ea9198537420dbfc68969f4d7049 # Parent 33513a46c41bcf6b2bc17c6c284dcf0421a73d54 a few comments on James version diff -r 33513a46c41b -r 681b5e7e3b81 doc/v2_planning/arch_src/plugin_JB_comments_RP.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/v2_planning/arch_src/plugin_JB_comments_RP.txt Wed Sep 22 10:39:39 2010 -0400 @@ -0,0 +1,60 @@ +I agree with Ian, maybe using caps is not the best idea. It reminds be of BASIC which I used to do long time ago :). It also makes the code look a bit scary. + +I like the approach and I think it goes close to my earliest proposition and to what I am proposing for the layer committeee ( though we did not have a meeting yet). +I would though write it in a more Theano like ( Ian has a example of how that would look). I would also drop the CALL and FLIT constructs, and actually have a +decorator ( or something ) that wraps around a function to transform it into a call or flit. I hope that this is only syntactic sugar ( does this change anything +in the actual implementation ?? ) that makes things more natural. What I want to reach is something that looks very much as Theano, just that now you are creating +the graph of execution steps. Refractoring what you wrote this will look like + +x = buffer_repeat( 1000, dataset.next()) +train_pca = pca.analyze(x) + +train_pca.run() + +If you allow a FLIT to also get multiple inputs ( so not just the one) which comes natural in this way of writing you can get to describe a DAG that not only +describes the order of execution but also deals with what takes data from what. I'm sorry for not being there yesturday, from what I remember I have the +feeling that for you that is done under the hood and not taken care by this flow control structures. + +To be a bit more explicit, in the way of writing the code above you can see that : + a) dataset_next() has to run before pca_analyze + b) pca_analyze needs the result (data) object of buffer_repeat( dataset.next()) + +I've actually elaborated on this idea here and there, and figured out what the result from such a control flow thing is, and how to make everything explicit +in the graph. Parts of this is in my plugin_RP.py ( Step 1) though it is a bit of a moving target. I also have a sligtly different way of writing REPEAT +and BUFFER_REPEAT .. though I think is mostly the same. I actually did not know how to deal with distributed things until I saw how you deal with that in your code. +Copy-pasted a version of a SDAA with my way of writing : + + ## Layer 1: + + data_x,data_y = GPU_transform(load_mnist()) + noisy_data_x = gaussian_noise(data_x, amount = 0.1) + hidden1 = tanh(dotW_b(data_x, n_units = 200)) + reconstruct1 = reconstruct(hidden1.replace(data_x, noisy_data_x), + noisy_data_x) + err1 = cross_entropy(reconstruct1, data_x) + learner1 = SGD(err1) + + # Layer 2 : + noisy_hidden1 = gaussian_noise(hidden1, amount = 0.1) + hidden2 = tanh(dotW_b(hidden1, n_units = 200)) + reconstruct2 = reconstruct(hidden2.replace(hidden1,noisy_hidden1), + noisy_hidden1) + err2 = cross_entropy(reconstruct2, hidden) + learner2 = SGD(err2) + + # Top layer: + + output = sigmoid(dotW_b(hidden2, n_units = 10)) + err = cross_entropy(output, data_y) + learner = SGD(err) + + +GPU_transform,gaussian_noise and so on are functions that have been decorated ( or classes if you want) +that you would write using FLIT. Reconstruct for me is a different CONTROL FLOW element. +In this case I don't use REPEAT or BUFFER_REPEAT or the other very cool control flow elements, but you +can easily imagine writing something like + +pretrained_in_parallel = weave( learner1, learner2) +results = spawn(repeat(5000,learner1),repeat(500,learner2)) + + diff -r 33513a46c41b -r 681b5e7e3b81 doc/v2_planning/plugin_RP.py --- a/doc/v2_planning/plugin_RP.py Wed Sep 22 10:05:48 2010 -0400 +++ b/doc/v2_planning/plugin_RP.py Wed Sep 22 10:39:39 2010 -0400 @@ -54,7 +54,7 @@ ====== I will start with step 2 ( because I think that is more of a hot subject - right now). I will assume you have the write plugins at had. + right now). I will assume you have the right plugins at hand. This is a DBN with early stopping and .. .. code-block:: python @@ -76,7 +76,7 @@ x1 = recurrent_layer() x1.t0 = x0 x1.value = binomial_sample(sigmoid( reconstruct( binomial_sample(h1), x0))) -cost = free_energy(train_x) - free_energy(x1.tp(5)) +cost = free_energy(train_x) - free_energy(x1.t(5)) grads = [ (g.var, T.grad(cost.var, g.var)) for g in cost.params ] pseudo_cost = sum([ pl.sum(pl.abs(g)) for g in cost.params]) rbm1 = SGD( cost = pseudo_cost, grads = grads) @@ -96,17 +96,21 @@ ### Constructing Modes ### -pretrain_layer1 = ca.mode('pretrain0') +class pretrain_layer1 () + + def register() + { + } pretrain_layer2 = ca.mode('pretrain1') early_stopping = ca.mode('early') -valid1 = ca.mode('stuff') +code_block = ca.mode('code_block') kfolds = ca.mode('kfolds') # Construct modes dependency graph -valid0.include([ pretrian_layer1, pretrain_layer2, early_stopper]) -kfolds.include( valid0 ) +code_block.include([ pretrian_layer1, pretrain_layer2, early_stopper]) +kfolds.include( code_block ) -pretrain_layer1.act( on = valid1.begin(), when = always()) +pretrain_layer1.act( on = code_block.begin(), when = always()) pretrain_layer2.act( on = pretrain_layer1.end(), when = always()) early_stopping.act ( on = pretrain_layer2.end(), when = always()) @@ -128,44 +132,49 @@ # Construct pre-training plugins -rbm1_plugin = plugin_wrapper(rbm1, sched = pretrain_layer1) +rbm1_plugin = pretrain_layer1.include(plugin_wrapper(rbm1)) +rbm2_plugin = pretrain_layer2.include(plugin_wrapper(rbm2)) +rbm1_counter = pretrain_layer1.include(counter) +rbm2_counter = pretrain_layer2.include(counter) + rbm1_plugin.listen(Message('init'), update_hyperparameters) -rbm2_plugin = plugin_wrapper(rbm2, sched = pretrain_layer2) +rbm1_plugin.listen(Message('continue'), dataset_restart) rbm2_plugin.listen(Message('init'), update_hyperparameters) -rbm1_counter = pretrain_layer1.register(counter) -rbm2_counter = pretrain_layer2.register(counter) +rbm2_plugin.listen(Message('continue'), dataset_restart) # Dependency graph for pre-training layer 0 -rbm1_plugin.act( on = [ pretrain_layer1.begin() - Message('continue') ], +rbm1_plugin.act( on = [ pretrain_layer1.begin() , + rbm1_plugin.value() ] , when = always()) rbm1_counter.act( on = rbm1_plugin.eod(), when = always() ) # Dependency graph for pre-training layer 1 -rbm2_plugin.act( on = pretrain_layer2.begin(), when = always()) +rbm2_plugin.act( on = [ pretrain_layer2.begin() , + rbm2_plugin.value() ] , + when = always()) pretrain_layer2.stop( on = rbm2_plugin.eod(), when = always()) # Constructing fine-tunning plugins -learner = early_stopper.register(plugin_wrapper(logreg)) +learner = early_stopper.include(plugin_wrapper(logreg)) +validation = early_stopper.include( plugin_wrapper(valid_err))) +clock = early_stopper.include( ca.generate_clock()) +early_stopper_plugin = early_stopper.include( early_stopper_plugin) + + +def save_model(plugin): + cPickle.dump(plugin.object, 'just_the_model.pkl') + learner.listen(Message('init'), update_hyperparameters) -validation = early_stopper.register( plugin_wrapper(valid_err))) validation.listen(Message('init'), update_hyperparameters) -clock = early_stopper.register( ca.generate_clock()) -early_stopper_plugin = early_stopper.register( early_stopper_plugin) - -@FnPlugin -def save_weights(self, message): - cPickle.dump(logreg, open('model.pkl')) - +validation.listen(early_stopper_plugin.new_best_score(), save_model) learner.act( on = early_stopper.begin(), when = always()) learner.act( on = learner.value(), when = always()) validation.act( on = clock.hour(), when = every(n = 1)) early_stopper.act( on = validation.value(), when = always()) -save_model.act( on = early_stopper.new_best_error(), when =always()) @FnPlugin def kfolds_plugin(self,event): @@ -183,10 +192,10 @@ self.fire(Message('terminate')) -kfolds.register(kfolds_plugin) -kfolds_plugin.act(kfolds.begin(), when = always()) -kfolds_plugin.act(valid0.end(), always() ) -valid0.act(Message('new split'), always() ) +kfolds.include(kfolds_plugin) +kfolds_plugin.act([kfolds.begin(), Message('new split')], when = always()) +kfolds_plugin.act(code_block.end(), always() ) +code_block.act(Message('new split'), always() ) sched.include(kfolds) @@ -194,6 +203,8 @@ ''' + + Notes: when a mode is regstered to begin with a certain message, it will rebroadcast that message when it starts, with only switching the @@ -276,6 +287,8 @@ the ones given as values; throws an exception if it is impossible +* replace(nodes, dict) -> function; call replace on all nodes given that dictionary + * reconstruct(dict) -> transform; tries to reconstruct the nodes given as keys starting from the nodes given as values by going through the inverse of all transforms that @@ -298,12 +311,15 @@ * switch(hyperparam, dict) -> transform; a lazy switch that allows you do construct by hyper-parameters -* get_hyperparameter -> method; given a name it will return the first node +* get_hyperparameter(name) -> method; given a name it will return the first node starting from top that is a hyper parameter and has that name -* get_parameter -> method; given a name it will return the first node +* get_parameter(name) -> method; given a name it will return the first node starting from top that is a parameter and has that name +* get_hyperparameters() +* get_parameters() + @@ -392,7 +408,7 @@ # sketch of writing a RNN x = load_mnist() y = recurrent_layer() - y.value = tanh(dotW(x, n=50) + dotW(y.tm(1),50)) + y.value = tanh(dotW(x, n=50).t(0) + dotW(y.t(-1),50)) y.t0 = zeros( (50,)) out = dotW(y,10)