changeset 1332:837768915081

added test idea to test_mcRBM
author James Bergstra <bergstrj@iro.umontreal.ca>
date Mon, 18 Oct 2010 08:53:08 -0400
parents 0541e7d6e916
children 6fd2610c1706
files pylearn/algorithms/tests/test_mcRBM.py
diffstat 1 files changed, 108 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/algorithms/tests/test_mcRBM.py	Thu Oct 14 23:55:55 2010 -0400
+++ b/pylearn/algorithms/tests/test_mcRBM.py	Mon Oct 18 08:53:08 2010 -0400
@@ -486,3 +486,111 @@
         def checkpoint():
             return checkpoint
         run_classif_experiment(checkpoint=checkpoint)
+
+
+
+if 0: # TEST IDEA OUT HERE
+
+
+    class doc_db(dict):
+        # A key->document dictionary.
+        # A "document" is itself a dictionary.
+
+        # A "document" can be a small or large object, but it cannot be partially retrieved.
+
+        # This simple data structure is used in pylearn to cache intermediate reults between
+        # several process invocations.
+
+    class UNSPECIFIED(object): pass
+
+    class CtrlObj(object):
+
+        def get(self, key, default_val=UNSPECIFIED, copy=True):
+            # Default to return a COPY because a set() is required to make a change persistent.
+            # Inplace changes that the CtrlObj does not know about (via set) will not be saved.
+            pass
+
+        def get_key(self, val):
+            """Return the key that retrieved `val`.
+            
+            This is useful for specifying cache keys for unhashable (e.g. numpy) objects that
+            happen to be stored in the db.
+            """
+            # if 
+            # lookup whether val is an obj
+            pass
+        def set(self, key, val):
+            pass
+        def delete(self, key):
+            pass
+        def checkpoint(self):
+            pass
+
+        @staticmethod
+        def cache_pickle(pass_ctrl=False):
+            def decorator(f):
+                # cache rval using pickle mechanism
+                def rval(*args, **kwargs):
+                    pass
+                return rval
+            return decorator
+
+        @staticmethod
+        def cache_dict(pass_ctrl=False):
+            def decorator(f):
+                # cache rval dict directly
+                def rval(*args, **kwargs):
+                    pass
+                return rval
+            return decorator
+
+        @staticmethod(f):
+        def cache_numpy(pass_ctrl=False, memmap_thresh=100*1000*1000):
+            def decorator(f):
+                # cache rval dict directly
+                def rval(*args, **kwargs):
+                    pass
+                return rval
+            return decorator
+
+    @CtrlObj.cache_numpy()
+    def get_whitened_dataset(pca_parameters):
+        # do computations
+        return None
+
+    @CtrlObj.cache_pickle(pass_ctrl=True)
+    def train_mcRBM(data, lr, n_hid, ctrl):
+
+        rbm = 45
+        for i in 10000:
+            # do some training
+            rbm += 1
+            ctrl.checkpoint()
+        return rbm
+
+    def run_experiment(args):
+
+        ctrl_obj = CtrlObj.factory(args)
+        # Could use db, or filesystem, or both, etc.
+        # There would be generic ones, but the experimenter should be very aware of what is being
+        # cached where, when, and how.  This is how results are stored and retrieved after all.
+        # Cluster-friendly jobs should not use local files directly, but should store cached
+        # computations and results to such a database.
+        #  Different jobs should avoid using the same keys in the database because coordinating
+        #  writes is difficult, and conflicts will inevitably arise.
+
+        raw_data = get_raw_data(ctrl=ctrl)
+        raw_data_key = ctrl.get_key(raw_data)
+        pca = get_pca(raw_data, max_energy=.05, ctrl=ctrl, 
+                _ctrl_raw_data_key=raw_data_key)
+        whitened_data = get_whitened_dataset(pca_parameters, ctrl=ctrl,
+                _ctrl_data_key=raw_data_key)
+
+        rbm = train_mcRBM(
+                data=whitened_data,
+                lr=0.01,
+                n_hid=100,
+                ctrl=ctrl,
+                _ctrl_data_key=raw_data_key
+                )
+