diff transformations/pipeline.py @ 50:ff59670cd1f9

Ajouté l'enregistrement de la complexité, et un strict minimum pour reloader les fichiers d'images et de paramètres
author fsavard
date Thu, 04 Feb 2010 14:13:57 -0500
parents fabf910467b2
children c89defea1e65
line wrap: on
line diff
--- a/transformations/pipeline.py	Thu Feb 04 13:40:44 2010 -0500
+++ b/transformations/pipeline.py	Thu Feb 04 14:13:57 2010 -0500
@@ -14,8 +14,10 @@
 # To debug locally, also call with -s 1 (to stop after 1 batch ~= 100)
 # (otherwise we allocate all needed memory, might be loonnng and/or crash
 # if, lucky like me, you have an age-old laptop creaking from everywhere)
-DEBUG = True
-DEBUG_X = False # Debug under X (pylab.show())
+DEBUG = False
+DEBUG_X = False
+if DEBUG:
+    DEBUG_X = False # Debug under X (pylab.show())
 
 DEBUG_IMAGES_PATH = None
 if DEBUG:
@@ -89,11 +91,12 @@
     def init_memory(self):
         self.init_num_params_stored()
 
-        total = (self.num_batches + 1) * self.batch_size
+        total = self.num_batches * self.batch_size
         num_px = self.image_size[0] * self.image_size[1]
 
         self.res_data = numpy.empty((total, num_px))
-        self.params = numpy.empty((total, self.num_params_stored))
+        # +1 to store complexity
+        self.params = numpy.empty((total, self.num_params_stored+1))
 
     def run(self, batch_iterator, complexity_iterator):
         img_size = self.image_size
@@ -114,7 +117,9 @@
 
                 img = img.reshape(img_size)
 
-                param_idx = 0
+                param_idx = 1
+                # store complexity along with other params
+                self.params[global_idx, 0] = complexity
                 for mod in self.modules:
                     # This used to be done _per batch_,
                     # ie. out of the "for img" loop                   
@@ -192,11 +197,27 @@
 
 def just_nist_iterator(nist, batch_size, stop_after=None):
     for i in xrange(0, nist.dim[0], batch_size):
+        if not stop_after is None and i >= stop_after:
+            break
+
         nist.train_data.seek(0)
         yield ft.read(nist.train_data, slice(i, i+batch_size)).astype(numpy.float32)/255
 
-        if not stop_after is None and i >= stop_after:
-            break
+
+
+# Mostly for debugging, for the moment, just to see if we can
+# reload the images and parameters.
+def reload(output_file_path, params_output_file_path):
+    images_ft = open(output_file_path, 'rb')
+    images_ft_dim = tuple(ft._read_header(images_ft)[3])
+
+    print "Images dimensions: ", images_ft_dim
+
+    params = numpy.load(params_output_file_path)
+
+    print "Params dimensions: ", params.shape
+    print params
+    
 
 ##############################################################################
 # MAIN
@@ -225,23 +246,22 @@
     output_file_path = None
     params_output_file_path = None
     stop_after = None
-
-    import sys
-    print "python version: ", sys.version
+    reload_mode = False
 
     try:
-        opts, args = getopt.getopt(get_argv(), "m:z:o:p:s:", ["max-complexity=", "probability-zero=", "output-file=", "params-output-file=", "stop-after="])
+        opts, args = getopt.getopt(get_argv(), "rm:z:o:p:s:", ["reload","max-complexity=", "probability-zero=", "output-file=", "params-output-file=", "stop-after="])
     except getopt.GetoptError, err:
         # print help information and exit:
         print str(err) # will print something like "option -a not recognized"
         usage()
         sys.exit(2)
-    output = None
-    verbose = False
+
     for o, a in opts:
         if o in ('-m', '--max-complexity'):
             max_complexity = float(a)
             assert max_complexity >= 0.0 and max_complexity <= 1.0
+        elif o in ('-r', '--reload'):
+            reload_mode = True
         elif o in ("-z", "--probability-zero"):
             probability_zero = float(a)
             assert probability_zero >= 0.0 and probability_zero <= 1.0
@@ -260,26 +280,29 @@
         usage()
         sys.exit(2)
 
-    if DEBUG_IMAGES_PATH:
-        '''
-        # This code is yet untested
-        debug_images = DebugImages(DEBUG_IMAGES_PATH)
-        num_batches = 1
-        batch_size = len(debug_images.filelist)
-        pl = Pipeline(modules=MODULE_INSTANCES, num_batches=num_batches, batch_size=BATCH_SIZE, image_size=(32,32))
-        batch_it = debug_images_iterator(debug_images)
-        '''
+    if reload_mode:
+        reload(output_file_path, params_output_file_path)
     else:
-        nist = NistData()
-        num_batches = nist.dim[0]/BATCH_SIZE
-        if stop_after:
-            num_batches = stop_after
-        pl = Pipeline(modules=MODULE_INSTANCES, num_batches=num_batches, batch_size=BATCH_SIZE, image_size=(32,32))
-        batch_it = just_nist_iterator(nist, BATCH_SIZE, stop_after)
+        if DEBUG_IMAGES_PATH:
+            '''
+            # This code is yet untested
+            debug_images = DebugImages(DEBUG_IMAGES_PATH)
+            num_batches = 1
+            batch_size = len(debug_images.filelist)
+            pl = Pipeline(modules=MODULE_INSTANCES, num_batches=num_batches, batch_size=BATCH_SIZE, image_size=(32,32))
+            batch_it = debug_images_iterator(debug_images)
+            '''
+        else:
+            nist = NistData()
+            num_batches = nist.dim[0]/BATCH_SIZE
+            if stop_after:
+                num_batches = stop_after
+            pl = Pipeline(modules=MODULE_INSTANCES, num_batches=num_batches, batch_size=BATCH_SIZE, image_size=(32,32))
+            batch_it = just_nist_iterator(nist, BATCH_SIZE, stop_after)
 
-    cpx_it = range_complexity_iterator(probability_zero, max_complexity)
-    pl.run(batch_it, cpx_it)
-    pl.write_output(output_file_path, params_output_file_path)
+        cpx_it = range_complexity_iterator(probability_zero, max_complexity)
+        pl.run(batch_it, cpx_it)
+        pl.write_output(output_file_path, params_output_file_path)
 
 _main()