Mercurial > pylearn
diff dataset.py @ 9:de616c423dbd
Improving comments in dataset.py
author | bengioy@esprit.iro.umontreal.ca |
---|---|
date | Mon, 24 Mar 2008 16:52:47 -0400 |
parents | d1c394486037 |
children | be128b9127c8 88168361a5ab |
line wrap: on
line diff
--- a/dataset.py Mon Mar 24 15:56:53 2008 -0400 +++ b/dataset.py Mon Mar 24 16:52:47 2008 -0400 @@ -10,9 +10,9 @@ One can obtain a sub-dataset by taking dataset.field or dataset(field1,field2,field3,...). Fields are not mutually exclusive, i.e. two fields can overlap in their actual content. The content of a field can be of any type, but often will be a numpy array. - The minibatch_size field, if different than 1, means that the iterator (next() method) + The minibatch_size attribute, if different than 1, means that the iterator (next() method) returns not a single example but an array of length minibatch_size, i.e., an indexable - object. + object with minibatch_size examples in it. """ def __init__(self,minibatch_size=1): @@ -25,7 +25,7 @@ minibatch in the dataset. A minibatch (of length > 1) should be something one can iterate on again in order to obtain the individual examples. If the dataset has fields, then the example or the minibatch must have the same fields - (typically this is implemented by returning another (small) dataset, when + (typically this is implemented by returning another smaller dataset, when there are fields). """ raise NotImplementedError @@ -102,19 +102,22 @@ class ArrayDataSet(FiniteDataSet): """ - A fixed-length and fixed-width dataset in which each element is a numpy array - or a number, hence the whole dataset corresponds to a numpy array. Fields - must correspond to a slice of columns. If the dataset has fields, + An ArrayDataSet behaves like a numpy array but adds the notion of fields + and minibatch_size from DataSet. It is a fixed-length and fixed-width dataset + in which each element is a numpy array or a number, hence the whole + dataset corresponds to a numpy array. Fields + must correspond to a slice of array columns. If the dataset has fields, each 'example' is just a one-row ArrayDataSet, otherwise it is a numpy array. - Any dataset can also be converted to a numpy array (losing the notion of fields) - by the numpy.array(dataset) call. + Any dataset can also be converted to a numpy array (losing the notion of fields + and of minibatch_size) by the numpy.array(dataset) call. """ def __init__(self,dataset=None,data=None,fields={},minibatch_size=1): """ - Construct an ArrayDataSet, either from a DataSet, or from - a numpy array plus an optional specification of fields (by - a dictionary of column slices indexed by field names). + There are two ways to construct an ArrayDataSet: (1) from an + existing dataset (which may result in a copy of the data in a numpy array), + or (2) from a numpy.array (the data argument), along with an optional description + of the fields (dictionary of column slices indexed by field names). """ FiniteDataSet.__init__(self,minibatch_size) if dataset!=None: