changeset 1154:f923dddf0bf7

a better version of the script
author pascanur
date Thu, 16 Sep 2010 23:42:26 -0400
parents ae5ba6206fd3
children b70a1fcb7b4f
files doc/v2_planning/plugin_RP.py
diffstat 1 files changed, 91 insertions(+), 95 deletions(-) [+]
line wrap: on
line diff
--- a/doc/v2_planning/plugin_RP.py	Thu Sep 16 17:34:30 2010 -0400
+++ b/doc/v2_planning/plugin_RP.py	Thu Sep 16 23:42:26 2010 -0400
@@ -28,13 +28,26 @@
 .. code-block::
 '''
     sch = Schedular()
-    p = ProducerFactory()
-    p = sched.schedule_plugin(event = every(p.outputStuffs()), p )
-    p = sched.schedule_plugin(event = Event("begin"), p)
-    c = sched.schedule_plugin(event = every(p.outputStuffs()), ConsumerFactory )
-    pc= sched.schedule_plugin(event = every(p.outputStuffs()), ProducerConsumerFactory )
+
+    @FnPlugin(sch)
+    def producer(self,event):
+        self.fire('stuff', value = 'some text')
+
+    @FnPlugin(sch)
+    def consumer(self,event):
+        print event.value
 
-    sched.run()
+    @FnPlugin(sch)
+    def prod_consumer(self,event):
+        print event.value
+        self.fire('stuff2', value = 'stuff')
+
+    producer.act( on = Event('begin'), when = once() )
+    producer.act( on = Event('stuff'), when = always() )
+    consumer.act( on = Event('stuff'), when = always() )
+    prod_consumer.act( on = Event('stuff'), when = always() )
+
+    sch.run()
 
 
 
@@ -53,113 +66,96 @@
 Possible script
 ---------------
 
- Sorry for long variable names, I wanted to make it clear what things are ..
+Notes : This would look the same for any other architecture that does not
+imply pre-training ( i.e. deep networks). For example the mlp.
 
 .. code-block::
 '''
-    sched = Schedular()
-    # This is a shortcut .. I've been to the dataset committee and they have
-    # something else in mind, a bit more fancy; I totally agree with their
-    # ideas I just wrote it like this for brevity;
-    train_data, valid_data, test_data = load_mnist()
 
-    # This part was not actually discussed into details ; I have my own
-    # opinions of how this part should be done .. but for now I decomposed it 
-    # in two functions for convinience
-    logreg = generate_logreg_model()
-
+sched = Schedular()
 
-    
-    # Note that this is not meant to replace the string idea of Olivier. I
-    # actually think that is a cool idea, when writing things down I realized
-    # it might be a bit more intuitive if you would get that object by calling
-    # a method of the instance of the plugin with a significant name
-    # I added a warpping function that sort of tells on which such events 
-    # you can have similar to what Olivier wrote { every, at .. }
-    doOneTrainingStepPlugin =ModelPluginFactory( model = logreg )
-    trainDataPlugin = sched.schedule_plugin(
-                       event = every(doOneTrainingStepPlugin.new_train_error),
-                                        DatasetsPluginFactory( data = train_data) )
+# Data / Model Building : 
+# I skiped over how to design this part
+# though I have some ideas
+real_train_data, real_valid_data = load_mnist()
+model = logreg()
 
-    trainDataPlugin = sched.schedule_plugin(
-                       event = Event('begin'), trainDataPlugin )
-
-    clock = sched.schedule_plugin( event = all_events, ClockFactory())
-
-    doOneTrainingStepPlugin = sched.schedule_plugin( 
-                             event = every(trainDataPlugin.new_batch()),
-                             ModelFactory( model = logreg))
-
-
+# Main Plugins ( already provided in the library ); 
+# This wrappers also registers the plugin
+train_data = create_data_plugin( sched, data = real_train_data)
+valid_data = create_data_plugin( sched, data = real_valid_data)
+train_model    = create_train_model(sched, model = model)
+validate_model = create_valid_model(sched, model = model, data = valid_data)
+early_stopper  = create_early_stopper(sched)
 
 
-    # Arguably we wouldn't need such a plugin. I added just to show how to
-    # deal with multiple events from same plugin; the plugin is suppose to 
-    # reset the index of the dataset to 0, so that you start a new epoch 
-    resetDataset = sched.schedule_plugin(
-                           event = every(trainDataPlugin.end_of_dataset()),
-                           ResetDatasetFactory( data = train_data) )
+# On the fly plugins ( print random stuff); the main difference from my 
+# FnPlugin from Olivier's version is that it also register the plugin in sched
+@FnPlugin(sched)
+def print_error(self, event):
+    if event.type == Event('begin'):
+        self.value = []
+    elif event.type == train_model.error():
+        self.value += [event.value]
+    else event.type == train_data.eod():
+        print 'Error :', numpy.mean(self.value)
+
+@FnPlugin(sched)
+def save_model(self, event):
+    if event.type == early_stopper.new_best_error():
+        cPickle.dump(model.parameters(), open('best_params.pkl','wb'))
 
 
-    checkValidationPlugin = sched.schedule_plugin(
-                             event =every_nth(doOneTrainingStepPlugin.done(), n=1000),
-                             ValidationFactory( model = logreg data = valid_data))
-
-    # You have the options to also do :
-    #
-    # checkValidationPlugin = sched.schedule_plugin(
-    #                         event =every(trainDataPlugin.end_of_dataset()),
-    #                         ValidationFactory( model = logreg, data = valid_data))
-    # checkValidationPlugin = sched.schedule_plugin(
-    #                         event =every(clock.hour()),
-    #                         ValidationFactory( model = logreg, data = valid_data))
+# Create the dependency graph describing what does what 
+train_model.act(on = train_data.batch(), when = always())
+validate_model.act(on = train_model.done(), when = every(n=10000)) 
+early_stopper.act(on = validate_model.error(), when = always())
+print_error.act( on = train_model.error(), when = always() )
+print_error.act( on = train_data.eod(), when = always() )
+save_model.act( on = eraly_stopper.new_best_errot(), when = always() )
 
-    # This plugin would be responsible to send the Event("terminate") when the
-    # patience expired.
-    earlyStopperPlugin = sched.schedule_plugin(
-                            event = every(checkValidationPlugin.new_validation_error()),
-                            earlyStopperFactory(initial_patience = 10) )
-
-    # Printing & Saving plugins
+# Run the entire thing
+sched.run()
 
-    printTrainingError = sched.schedule_plugin(
-                            event = every(doOneTrainingStepPlugin.new_train_error()),
-                            AggregateAndPrintFactory())
-
-    printTrainingError = sched.schedule_plugin( 
-                            event = every(trainDataPlugin.end_of_dataset()),
-                            printTrainingError)
-    saveWeightsPlugin = sched.schedule_plugin(
-                            event = every(earlyStopperPlugin.new_best_valid_error()),
-                            saveWeightsFactory( model = logreg) )
-
-    sched.run()
 
 '''
 Notes
 =====
 
- In my code schedule_plugin returns the plugin that it regsiters. I think that 
- writing something like 
-   x = f( .. ) 
-   y = f(x) 
-
- makes more readable then writing f( .., event_belongs_to = x), or even worse,
- you only see text, and you would have to go to the plugins to see what events 
- they actually produce.
+ * I think we should have a FnPlugin decorator ( exactly like Olivier's) just
+ that also attaches the new created plugin to the schedule. This way you 
+ can create plugin on the fly ( as long as they are simple functions that
+ print stuff, or compute simple statitics ).
+ * I added a method act to a Plugin. You use that to create the dependency
+ graph ( it could also be named listen to be more plugin like interface)
+ * Plugins are obtained in 3 ways  :
+     - by wrapping a dataset / model or something similar
+     - by a function that constructs it from nothing
+     - by decorating a function
+   In all cases I would suggest then when creating them you should provide
+   the schedular as well, and the constructor also registers the plugin
 
- At this point I am more concern with how the scripts will look ( the cognitive 
- load to understand them) and how easy is to go to hack into them. From this point 
- of view I would have the following suggestions : 
-   * dataset and model creation should create outside the schedular with possibly 
-   other mechanisms
-   * there are two types of plugins, those that do not affect the experiment, 
-   they just compute statistics and print them, or save different data and those
-   plugin that change the state of the model, like train, or influence the life 
-   of the experiment. There should be a minimum of plugins of the second category,
-   to still have the code readable. ( When understanding a script, you only need 
-   to understand that part, the rest you assume is just printing stuff). 
-   The different categories should also be grouped.
+ * The plugin concept works well as long as the plugins are a bit towards
+ heavy duty computation, disregarding printing plugins and such. If you have
+ many small plugins this system might only introduce an overhead. I would 
+ argue that using theano is restricted to each plugin. Therefore I would
+ strongly suggest that the architecture to be done outside the schedular
+ with a different approach.
+
+ * I would suggest that the framework to be used only for the training loop
+ (after you get the adapt function, compute error function) so is more about
+ the meta-learner, hyper-learner learner level.
+
+ * A general remark that I guess everyone will agree on. We should make 
+ sure that implementing a new plugin is as easy/simple as possible. We 
+ have to hide all the complexity in the schedular ( it is the part of the 
+ code we will not need or we would rarely need to work on). 
+
+ * I have not went into how to implement the different components, but 
+ following Olivier's code I think that part would be more or less straight
+ forward. 
+
+ '''
 
 
 '''