Mercurial > ift6266
diff datasets/dataset.py @ 163:4b28d7382dbf
Add inital implementation of datasets.
For the moment only nist_digits is defined.
author | Arnaud Bergeron <abergeron@gmail.com> |
---|---|
date | Thu, 25 Feb 2010 18:40:01 -0500 |
parents | |
children | d6672a7daea5 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datasets/dataset.py Thu Feb 25 18:40:01 2010 -0500 @@ -0,0 +1,46 @@ +from dsetiter import DataIterator + +class DataSet(object): + def test(self, batchsize, bufsize=None): + r""" + Returns an iterator over the test examples. + + Parameters + batchsize (int) -- the size of the minibatches, 0 means + return the whole set at once. + bufsize (int, optional) -- the size of the in-memory buffer, + 0 to disable. + """ + return self._return_it(batchsize, bufsize, self._test) + + def train(self, batchsize, bufsize=None): + r""" + Returns an iterator over the training examples. + + Parameters + batchsize (int) -- the size of the minibatches, 0 means + return the whole set at once. + bufsize (int, optional) -- the size of the in-memory buffer, + 0 to disable. + """ + return self._return_it(batchsize, bufsize, self._train) + + def valid(self, batchsize, bufsize=None): + r""" + Returns an iterator over the validation examples. + + Parameters + batchsize (int) -- the size of the minibatches, 0 means + return the whole set at once. + bufsize (int, optional) -- the size of the in-memory buffer, + 0 to disable. + """ + return self._return_it(batchsize, bufsize, self._valid) + + def _return_it(batchsize, bufsize, data): + r""" + Must return an iterator over the specified dataset (`data`). + + Implement this in subclassses. + """ + raise NotImplemented