# HG changeset patch # User James Bergstra # Date 1287406388 14400 # Node ID 83776891508101cb519be9bc350735530b83c7b8 # Parent 0541e7d6e91621440b7756965f3b082a48330d91 added test idea to test_mcRBM diff -r 0541e7d6e916 -r 837768915081 pylearn/algorithms/tests/test_mcRBM.py --- a/pylearn/algorithms/tests/test_mcRBM.py Thu Oct 14 23:55:55 2010 -0400 +++ b/pylearn/algorithms/tests/test_mcRBM.py Mon Oct 18 08:53:08 2010 -0400 @@ -486,3 +486,111 @@ def checkpoint(): return checkpoint run_classif_experiment(checkpoint=checkpoint) + + + +if 0: # TEST IDEA OUT HERE + + + class doc_db(dict): + # A key->document dictionary. + # A "document" is itself a dictionary. + + # A "document" can be a small or large object, but it cannot be partially retrieved. + + # This simple data structure is used in pylearn to cache intermediate reults between + # several process invocations. + + class UNSPECIFIED(object): pass + + class CtrlObj(object): + + def get(self, key, default_val=UNSPECIFIED, copy=True): + # Default to return a COPY because a set() is required to make a change persistent. + # Inplace changes that the CtrlObj does not know about (via set) will not be saved. + pass + + def get_key(self, val): + """Return the key that retrieved `val`. + + This is useful for specifying cache keys for unhashable (e.g. numpy) objects that + happen to be stored in the db. + """ + # if + # lookup whether val is an obj + pass + def set(self, key, val): + pass + def delete(self, key): + pass + def checkpoint(self): + pass + + @staticmethod + def cache_pickle(pass_ctrl=False): + def decorator(f): + # cache rval using pickle mechanism + def rval(*args, **kwargs): + pass + return rval + return decorator + + @staticmethod + def cache_dict(pass_ctrl=False): + def decorator(f): + # cache rval dict directly + def rval(*args, **kwargs): + pass + return rval + return decorator + + @staticmethod(f): + def cache_numpy(pass_ctrl=False, memmap_thresh=100*1000*1000): + def decorator(f): + # cache rval dict directly + def rval(*args, **kwargs): + pass + return rval + return decorator + + @CtrlObj.cache_numpy() + def get_whitened_dataset(pca_parameters): + # do computations + return None + + @CtrlObj.cache_pickle(pass_ctrl=True) + def train_mcRBM(data, lr, n_hid, ctrl): + + rbm = 45 + for i in 10000: + # do some training + rbm += 1 + ctrl.checkpoint() + return rbm + + def run_experiment(args): + + ctrl_obj = CtrlObj.factory(args) + # Could use db, or filesystem, or both, etc. + # There would be generic ones, but the experimenter should be very aware of what is being + # cached where, when, and how. This is how results are stored and retrieved after all. + # Cluster-friendly jobs should not use local files directly, but should store cached + # computations and results to such a database. + # Different jobs should avoid using the same keys in the database because coordinating + # writes is difficult, and conflicts will inevitably arise. + + raw_data = get_raw_data(ctrl=ctrl) + raw_data_key = ctrl.get_key(raw_data) + pca = get_pca(raw_data, max_energy=.05, ctrl=ctrl, + _ctrl_raw_data_key=raw_data_key) + whitened_data = get_whitened_dataset(pca_parameters, ctrl=ctrl, + _ctrl_data_key=raw_data_key) + + rbm = train_mcRBM( + data=whitened_data, + lr=0.01, + n_hid=100, + ctrl=ctrl, + _ctrl_data_key=raw_data_key + ) +