# HG changeset patch # User Olivier Delalleau # Date 1284658130 14400 # Node ID 4f03a9a743dc5c4ff261c60c8a115ba7f9466e6c # Parent 6c79394b6b209ceba6d3bd6614a8dff6a568b96b# Parent 9f0502f8c7a5a5e598b1f98fd4fc9baf3b4e95c1 Merged diff -r 6c79394b6b20 -r 4f03a9a743dc doc/v2_planning/plugin_architecture_GD.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/doc/v2_planning/plugin_architecture_GD.txt Thu Sep 16 13:28:50 2010 -0400 @@ -0,0 +1,267 @@ +Overview +======== + +The "central authority" (CA) is the glue which takes care of interfacing plugins +with one another. It has 3 basic roles: +* it maintains a list of "registered" or "active" plugins +* it receives and queues the various messages sent by the plugins +* dispatches the messages to the recipient, based on various "events" + +Events can take different forms: +* the CA can trigger various events based on running time +* can be linked to messages emitted by the various plugins. Events can be + triggered based on the frequency of such messages. +* Once an event is triggered, it is relayed to the appropriate "recipient + plugin(s)" + +It is the responsibility of each plugin to inform the CA of which "events" it +cares about. + + +Generic Pseudo-code +=================== + +I'll try to write this in pseudo-python as best I can. I'll do this in +traditional OOP, as this is what I'm more comfortable with. I'll leave it up to +James and OB to python-ize this :) + + +class MessageX(Message): + """ + A message is basically a data container. This could very well be replaced by + a generic Python object. + """ + +class Plugin(object): + """ + The base plugin object doesn't do much. It contains a reference to the CA + (upon plugin being registered with the CA), provides boilerplate code + for storing which "events" this plugin is susceptible to, as well as code + for registering callback functions for the various messages. + """ + + CA = None # to be initialized upon plugin registration + active_msg = {} # dictionary of messages this plugin is susceptible to + callbacks = {} # mapping of message class names --> callback function + + def listen(msg_class, interval): + """ + :param msg_class: reference to the "message" class we are interested in. + These messages will be forwarded to this plugin, when + the trigger condition is met. + :param interval: integer. Forward the message to this plugin every 'interval' + such messages. + """ + self.active_msg[msg_class] = interval + + + def check_trigger(msg_class, time): + """ + Checks whether or not the "trigger" condition associated with message of + class 'msg_class' is satisfied or not. This could be the default + behavior, and be overridden by the various plugins. + """ + return time % self.active_msg[msg_class] == 0 + + + def handler(msg_class, callback): + """ + Decorator which registers a callback function for the given message + type. + + NOTE: I don't think what I wrote would work as a Python decorator. I am + not sure how to handle decoraters with multiple parameters (one + explicit, and the other as the reference to the function). I'm pretty + sure James or OB could figure it out though ! + + :params msg_class: reference to the message class for which we are + registering a callback function + :params callback : reference to which function to call for a given message + """ + + self.callbacks[msg_class] = callback + + + def execute(self, message): + """ + Boiler-plate code which executes the right callback function, for the + given message type. + """ + for (msg_class, callback) in self.callbacks.iteritems(): + if message.__class__ == msg_class: + callback(message) + + +class ProducerPlugin(Plugin): + + def dostuff(): + """ + A typical "producer" plugin. It basically performs an arbitrary action + and asks the CA to forward the results (in the form of a message) to + other plugins. + """ + + # iteratively do stuff and relay messages to other plugins + while(condition): + + msga = # do something + ca.send(msga) # ask CA to forward to other plugins + + +class ConsumerPlugin(Plugin): + + @handler(MessageA) + def func(msga): + """ + A consumer or "passive plugin" (eg. logger, etc). This function is + register as being the callback function for Message A objects. + """ + # do something with message A + + +class ConsumerProducerPlugin(Plugin): + + @handler(MessageA) + def func(msga): + """ + Example of a consumer / producer plugin. It receives MessageA messages, + processes the data, then asks the CA to send a new message (MessageB) as + the result of its computation. The CA will automatically forward to all + interested parties. + + :param msga: MessageA instance + """ + + data = dostuff(msga) # process message + msgb = MessageB(data) # generate new message for other plugins + ca.send(msgb) # ask CA to forward to other plugins + + + +class CentralAuthority(object): + + active_plugins = [] # contains a list of registered plugins + + mailmain = {} # dictionary which contains, for each message class, a + # list of plugins interested in this message + + event_count = {} # dictionary of "event" counts for various messages + + def register(plugin): + """ + Registers the plugin and adds it as a listener for the various messages + it is interested in. + :param plugin: plugin instance which we want to "activate" + """ + + # each plugin must have a reference to the CA + plugin.ca = self + + # maintain list of active plugins + active_plugins.append(plugin) + + # remember which messages this plugin cares about + for msg in plugin.active_msg.keys(): + self.mailman[msg].append(plugin) + self.event_count[msg] = 0 + + def send(msg): + """ + This function relays the message to the appropriate plugins, based on + their "trigger" condition. It also keeps track of the number of times + this event was raised. + + :param msg: message instance + """ + + event_count[msg.__class__] += 1 + + # for all plugins interested in this message ... + for plugin in self.mailman[msg.__class__]: + + # check if trigger condition is met + if plugin.check_trigger(msg, self.event_count[msg.__class__]): + + # have the plugin execute the message + plugin.execute(msg) + + + def run(self): + """ + This would be the main loop of the program. I won't go into details + because its still somewhat blurry in my head :) But basically, the CA + could be configured to send out its own messages, independently from all + other plugins. + + These could be "synchronous" messages such as: "5 seconds have passed", + or others such as "save state we are about to get killed". + + NOTE: seems like this would almost have to live in its own thread ... + """ + + # the following would be parametrized obviously + while(True): + msg = ElapsedTimeMessage(5) + self.send(msg) + sleep(5) + + + +Putting it all-together +======================= + + +def main(): + + ca = CentralAuthority() + + producer = ProducerPlugin() + ca.register(producer) + + consumer = ConsumerPlugin() + consumer.listen(MessageB, 1) + ca.register(consumer)) + + other = ConsumerProducerPlugin() + other.listen(MessageB, 10) + ca.register(other) + + # this is the function call which gets the ball rolling + producer.dostuff() + + +DISCUSSION: blocking vs. non-blocking +===================================== + +In the above example, I used "blocking" sends. However it is not-clear that this +is the best option. + +In the example, the producer basically acts as the main loop. It relinquishes +control of the main loop when the CA decides to forward the message to other +plugins. Control will only be returned once the cascade of send/receives +initiated with MessageA is complete (all subplugins have processed MessageA and +any messages sent as a side-effect have also been processed). + +This definitely imposes constraints on what the plugins can do, and how they do +it. For the type of single-processor / linear jobs we tend to run, this might be +enough (??). + +The good news is that going forward, the above plugin architecture can also +scale to distributed systems, by changing the sends to be non-blocking. Plugins +could then live on different machines and process data as they see fit. +Synchronization would be enforced by way of messages. In the above, the "main +producer" would thus become a consumer/producer who listens for "done processing +MessageA" messages and produces a new MessageA as a result. + +On single-processor systems, the synchronization overhead might be too costly +however. That is something we would have to investigate. On the plus side +however, our plugins would be "future proof" and lend themselves well to the +type of "massively parallel jobs" we wish to run (i.e. meta-learners, etc.) + + + +Logistic Regression +=================== + + +TO COME SOON (?)