pylearn: dataset.py comparison

comparison dataset.py @ 4:f7dcfb5f9d5b

Added test for dataset.

author	bengioy@bengiomac.local
date	Sun, 23 Mar 2008 22:14:10 -0400
parents	378b68d5c4ad
children	8039918516fe

comparison

equal deleted inserted replaced

-:378b68d5c4ad
+:f7dcfb5f9d5b
 """dataset[i:j] returns the subdataset with examples i,i+1,...,j-1."""
 raise NotImplementedError
 # we may want ArrayDataSet defined in another python file
-from numpy import *
+import numpy
 class ArrayDataSet(FiniteDataSet):
 """
 A fixed-length and fixed-width dataset in which each element is a numpy.array
 or a number, hence the whole dataset corresponds to a numpy.array. Fields
 each 'example' is just a one-row ArrayDataSet, otherwise it is a numpy.array.
 Any dataset can also be converted to a numpy.array (losing the notion of fields)
 by the asarray(dataset) call.
 """
-def __self__(self,dataset=None,data=None,fields={}):
+def __init__(self,dataset=None,data=None,fields={}):
 """
 Construct an ArrayDataSet, either from a DataSet, or from
 a numpy.array plus an optional specification of fields (by
 a dictionary of column slices indexed by field names).
 """
 self.current_row=-1 # used for view of this dataset as an iterator
-if dataset:
+if dataset!=None:
 assert data==None and fields=={}
 # convert dataset to an ArrayDataSet
 raise NotImplementedError
-if data:
+if data!=None:
 assert dataset==None
 self.data=data
 self.fields=fields
 self.width = data.shape[1]
 for fieldname in fields:
 fieldslice=fields[fieldname]
-assert fieldslice.start>=0 and fieldslice.stop<=width)
+# make sure fieldslice.start and fieldslice.step are defined
+start=fieldslice.start
+step=fieldslice.step
+if not start:
+start=0
+if not step:
+step=1
+if not fieldslice.start or not fieldslice.step:
+fieldslice = slice(start,fieldslice.stop,step)
+# and coherent with the data array
+assert fieldslice.start>=0 and fieldslice.stop<=self.width
 def next(self):
-"""Return the next example in the dataset. If the dataset has fields,
+"""
-the 'example' is just a one-row ArrayDataSet, otherwise it is a numpy.array."""
+Return the next example in the dataset. If the dataset has fields,
-if fields:
+the 'example' is just a one-row ArrayDataSet, otherwise it is a numpy.array.
+"""
+if self.fields:
 self.current_row+=1
 if self.current_row==len(self.data):
-self.current_row=0
+self.current_row=-1
+raise StopIteration
 return self[self.current_row]
 else:
 return self.data[self.current_row]
 def __getattr__(self,fieldname):
 """Return a sub-dataset containing only the given fieldname as field."""
-data = self.fields[fieldname]
+data=self.data[self.fields[fieldname]]
-return ArrayDataSet(data=data)
+if len(data)==1:
+return data
+else:
+return ArrayDataSet(data=data)
 def __call__(self,*fieldnames):
 """Return a sub-dataset containing only the given fieldnames as fields."""
 min_col=self.data.shape[1]
 max_col=0
 """dataset[i:j] returns the subdataset with examples i,i+1,...,j-1."""
 return ArrayDataSet(data=self.data[slice(slice_args)],fields=self.fields)
 def asarray(self):
 if self.fields:
-columns_used = zeros((self.data.shape[1]),dtype=bool)
+columns_used = numpy.zeros((self.data.shape[1]),dtype=bool)
 for field_slice in self.fields.values():
 for c in xrange(field_slice.start,field_slice.stop,field_slice.step):
 columns_used[c]=True
 # try to figure out if we can map all the slices into one slice:
 mappable_to_one_slice = True
 # else make contiguous copy
 n_columns = sum(columns_used)
 result = zeros((len(self.data),n_columns)+self.data.shape[2:],self.data.dtype)
 c=0
 for field_slice in self.fields.values():
-slice_width=field_slice.stop-field_slice.start
+slice_width=field_slice.stop-field_slice.start/field_slice.step
-if field_slice.step:
-slice_width /= field_slice.step
 # copy the field here
 result[:,slice(c,slice_width)]=self.data[field_slice]
 c+=slice_width
 return result
 return self.data

Mercurial > pylearn

comparison dataset.py @ 4:f7dcfb5f9d5b