changeset 760:1e0fa60bfacd

Finalize new validation split: indices are now randomized again inside training set and validation set.
author Pascal Lamblin <lamblinp@iro.umontreal.ca>
date Tue, 02 Jun 2009 22:29:21 -0400
parents 8447bc9bb2d4
children 60394c460390
files pylearn/datasets/norb_small.py
diffstat 1 files changed, 13 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/datasets/norb_small.py	Tue Jun 02 21:15:41 2009 -0400
+++ b/pylearn/datasets/norb_small.py	Tue Jun 02 22:29:21 2009 -0400
@@ -98,11 +98,21 @@
             train_info = read(open(train['info']))
 
             ordered_itrain = numpy.nonzero(train_info[:,0] != valid_variant)[0]
+            max_ntrain = ordered_itrain.shape[0]
             ordered_ivalid = numpy.nonzero(train_info[:,0] == valid_variant)[0]
+            max_nvalid = ordered_ivalid.shape[0]
+
+            if self.ntrain > max_ntrain:
+                print 'WARNING: ntrain is %i, but there are only %i training samples available' % (self.ntrain, max_ntrain)
+                self.ntrain = max_ntrain
 
-            # TODO: randomize
-            self.itr = ordered_itrain
-            self.ival = ordered_ivalid
+            if self.nvalid > max_nvalid:
+                print 'WARNING: nvalid is %i, but there are only %i validation samples available' % (self.nvalid, max_nvalid)
+                self.nvalid = max_nvalid
+
+            # Randomize
+            self.itr = ordered_itrain[rng.permutation(self.max_ntrain)][0:self.ntrain]
+            self.ival = ordered_ivalid[rng.permutation(self.max_ntrain)][0:self.nvalid]
 
         self.current = None