diff dataset.py @ 9:de616c423dbd

Improving comments in dataset.py
author bengioy@esprit.iro.umontreal.ca
date Mon, 24 Mar 2008 16:52:47 -0400
parents d1c394486037
children be128b9127c8 88168361a5ab
line wrap: on
line diff
--- a/dataset.py	Mon Mar 24 15:56:53 2008 -0400
+++ b/dataset.py	Mon Mar 24 16:52:47 2008 -0400
@@ -10,9 +10,9 @@
     One can obtain a sub-dataset by taking dataset.field or dataset(field1,field2,field3,...).
     Fields are not mutually exclusive, i.e. two fields can overlap in their actual content.
     The content of a field can be of any type, but often will be a numpy array.
-    The minibatch_size field, if different than 1, means that the iterator (next() method)
+    The minibatch_size attribute, if different than 1, means that the iterator (next() method)
     returns not a single example but an array of length minibatch_size, i.e., an indexable
-    object.
+    object with minibatch_size examples in it.
     """
 
     def __init__(self,minibatch_size=1):
@@ -25,7 +25,7 @@
         minibatch in the dataset. A minibatch (of length > 1) should be something one 
         can iterate on again in order to obtain the individual examples. If the dataset 
         has fields, then the example or the minibatch must have the same fields
-        (typically this is implemented by returning another (small) dataset, when
+        (typically this is implemented by returning another smaller dataset, when
         there are fields).
         """
         raise NotImplementedError
@@ -102,19 +102,22 @@
 
 class ArrayDataSet(FiniteDataSet):
     """
-    A fixed-length and fixed-width dataset in which each element is a numpy array
-    or a number, hence the whole dataset corresponds to a numpy array. Fields
-    must correspond to a slice of columns. If the dataset has fields,
+    An ArrayDataSet behaves like a numpy array but adds the notion of fields
+    and minibatch_size from DataSet. It is a  fixed-length and fixed-width dataset 
+    in which each element is a numpy array or a number, hence the whole 
+    dataset corresponds to a numpy array. Fields
+    must correspond to a slice of array columns. If the dataset has fields,
     each 'example' is just a one-row ArrayDataSet, otherwise it is a numpy array.
-    Any dataset can also be converted to a numpy array (losing the notion of fields)
-    by the numpy.array(dataset) call.
+    Any dataset can also be converted to a numpy array (losing the notion of fields
+    and of minibatch_size) by the numpy.array(dataset) call.
     """
 
     def __init__(self,dataset=None,data=None,fields={},minibatch_size=1):
         """
-        Construct an ArrayDataSet, either from a DataSet, or from
-        a numpy array plus an optional specification of fields (by
-        a dictionary of column slices indexed by field names).
+	There are two ways to construct an ArrayDataSet: (1) from an
+	existing dataset (which may result in a copy of the data in a numpy array),
+	or (2) from a numpy.array (the data argument), along with an optional description
+	of the fields (dictionary of column slices indexed by field names).
         """
         FiniteDataSet.__init__(self,minibatch_size)
         if dataset!=None: