Mercurial > ift6266
annotate datasets/dataset.py @ 395:f61a04074723
code for amazon MT
author | goldfinger |
---|---|
date | Tue, 27 Apr 2010 13:45:32 -0400 |
parents | d6672a7daea5 |
children |
rev | line source |
---|---|
163
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
1 from dsetiter import DataIterator |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
2 |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
3 class DataSet(object): |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
4 def test(self, batchsize, bufsize=None): |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
5 r""" |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
6 Returns an iterator over the test examples. |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
7 |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
8 Parameters |
176
d6672a7daea5
Update comments in the dataset definition (you can't pass 0 as minibatch size).
Arnaud Bergeron <abergeron@gmail.com>
parents:
163
diff
changeset
|
9 batchsize (int) -- the size of the minibatches |
163
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
10 bufsize (int, optional) -- the size of the in-memory buffer, |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
11 0 to disable. |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
12 """ |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
13 return self._return_it(batchsize, bufsize, self._test) |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
14 |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
15 def train(self, batchsize, bufsize=None): |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
16 r""" |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
17 Returns an iterator over the training examples. |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
18 |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
19 Parameters |
176
d6672a7daea5
Update comments in the dataset definition (you can't pass 0 as minibatch size).
Arnaud Bergeron <abergeron@gmail.com>
parents:
163
diff
changeset
|
20 batchsize (int) -- the size of the minibatches |
163
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
21 bufsize (int, optional) -- the size of the in-memory buffer, |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
22 0 to disable. |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
23 """ |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
24 return self._return_it(batchsize, bufsize, self._train) |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
25 |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
26 def valid(self, batchsize, bufsize=None): |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
27 r""" |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
28 Returns an iterator over the validation examples. |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
29 |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
30 Parameters |
176
d6672a7daea5
Update comments in the dataset definition (you can't pass 0 as minibatch size).
Arnaud Bergeron <abergeron@gmail.com>
parents:
163
diff
changeset
|
31 batchsize (int) -- the size of the minibatches |
163
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
32 bufsize (int, optional) -- the size of the in-memory buffer, |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
33 0 to disable. |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
34 """ |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
35 return self._return_it(batchsize, bufsize, self._valid) |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
36 |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
37 def _return_it(batchsize, bufsize, data): |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
38 r""" |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
39 Must return an iterator over the specified dataset (`data`). |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
40 |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
41 Implement this in subclassses. |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
42 """ |
4b28d7382dbf
Add inital implementation of datasets.
Arnaud Bergeron <abergeron@gmail.com>
parents:
diff
changeset
|
43 raise NotImplemented |