pylearn: dataset.py comparison

comparison dataset.py @ 80:40476a7746e8

bugfix

author	Frederic Bastien <bastienf@iro.umontreal.ca>
date	Mon, 05 May 2008 10:56:58 -0400
parents	dde1fb1b63ba
children	158653a9bc7c

comparison

equal deleted inserted replaced

-:427e02ef0629
+:40476a7746e8
 Using the third syntax, i1, i2, i3 will be list-like containers of the
 f1, f2, and f3 fields of a batch of examples on each loop iteration.
 The minibatches iterator is expected to return upon each call to next()
 a DataSetFields object, which is a LookupList (indexed by the field names) whose
-elements are iterable over the minibatch examples, and which keeps a pointer to
+elements are iterable and indexable over the minibatch examples, and which keeps a pointer to
 a sub-dataset that can be used to iterate over the individual examples
 in the minibatch. Hence a minibatch can be converted back to a regular
 dataset or its fields can be looked at individually (and possibly iterated over).
 PARAMETERS
 def __len__(self):
 return self.length
 def __getitem__(self,i):
-if type(i) in (int,slice,list):
+if type(i) in (slice,list):
 return DataSetFields(MinibatchDataSet(
-Example(self._fields.keys(),[field[i] for field in self._fields])),self._fields)
+Example(self._fields.keys(),[field[i] for field in self._fields])),self.fieldNames())
+if type(i) is int:
+return DataSetFields(MinibatchDataSet(
+Example(self._fields.keys(),[[field[i]] for field in self._fields])),self.fieldNames())
 if self.hasFields(i):
 return self._fields[i]
 assert i in self.__dict__ # else it means we are trying to access a non-existing property
 return self.__dict__[i]
 return self.fields_columns.keys()
 def __len__(self):
 return len(self.data)
-def __getitem__(self,i):
+def __getitem__(self,key):
 """More efficient implementation than the default __getitem__"""
 fieldnames=self.fields_columns.keys()
-if type(i) is int:
+if type(key) is int:
 return Example(fieldnames,
-[self.data[i,self.fields_columns[f]] for f in fieldnames])
+[self.data[key,self.fields_columns[f]] for f in fieldnames])
-if type(i) in (slice,list):
+if type(key) is slice:
 return MinibatchDataSet(Example(fieldnames,
-[self.data[i,self.fields_columns[f]] for f in fieldnames]))
+[self.data[key,self.fields_columns[f]] for f in fieldnames]))
+if type(key) is list:
+for i in range(len(key)):
+if self.hasFields(key[i]):
+key[i]=self.fields_columns[key[i]]
+return MinibatchDataSet(Example(fieldnames,
+[self.data[key,self.fields_columns[f]] for f in fieldnames]))
 # else check for a fieldname
-if self.hasFields(i):
+if self.hasFields(key):
-return Example([i],[self.data[self.fields_columns[i],:]])
+return self.data[self.fields_columns[key],:]
 # else we are trying to access a property of the dataset
-assert i in self.__dict__ # else it means we are trying to access a non-existing property
+assert key in self.__dict__ # else it means we are trying to access a non-existing property
-return self.__dict__[i]
+return self.__dict__[key]
 def minibatches_nowrap(self,fieldnames,minibatch_size,n_batches,offset):
 class ArrayDataSetIterator(object):
 def __init__(self,dataset,fieldnames,minibatch_size,n_batches,offset):

Mercurial > pylearn

comparison dataset.py @ 80:40476a7746e8