Mercurial > pylearn
comparison dataset.py @ 135:0d8e721cc63c
Fixed bugs in dataset to make test_mlp.py work
author | Yoshua Bengio <bengioy@iro.umontreal.ca> |
---|---|
date | Mon, 12 May 2008 14:30:21 -0400 |
parents | 3f4e5c9bdc5e |
children | ceae4de18981 |
comparison
equal
deleted
inserted
replaced
134:3f4e5c9bdc5e | 135:0d8e721cc63c |
---|---|
427 return DataSet.MinibatchToSingleExampleIterator( | 427 return DataSet.MinibatchToSingleExampleIterator( |
428 self.minibatches(minibatch_size=1,n_batches=1,offset=i)).next() | 428 self.minibatches(minibatch_size=1,n_batches=1,offset=i)).next() |
429 rows=None | 429 rows=None |
430 # or a slice | 430 # or a slice |
431 if type(i) is slice: | 431 if type(i) is slice: |
432 if not i.start: i.start=0 | 432 if not i.start: i=slice(0,i.stop,i.step) |
433 if not i.step: i.step=1 | 433 if not i.step: i=slice(i.start,i.stop,1) |
434 if i.step is 1: | 434 if i.step is 1: |
435 return self.minibatches(minibatch_size=i.stop-i.start,n_batches=1,offset=i.start).next().examples() | 435 return self.minibatches(minibatch_size=i.stop-i.start,n_batches=1,offset=i.start).next().examples() |
436 rows = range(i.start,i.stop,i.step) | 436 rows = range(i.start,i.stop,i.step) |
437 # or a list of indices | 437 # or a list of indices |
438 elif type(i) is list: | 438 elif type(i) is list: |
495 def __or__(self,other): | 495 def __or__(self,other): |
496 """ | 496 """ |
497 dataset1 | dataset2 returns a dataset whose list of fields is the concatenation of the list of | 497 dataset1 | dataset2 returns a dataset whose list of fields is the concatenation of the list of |
498 fields of the argument datasets. This only works if they all have the same length. | 498 fields of the argument datasets. This only works if they all have the same length. |
499 """ | 499 """ |
500 return HStackedDataSet(self,other) | 500 return HStackedDataSet([self,other]) |
501 | 501 |
502 def __and__(self,other): | 502 def __and__(self,other): |
503 """ | 503 """ |
504 dataset1 & dataset2 is a dataset that concatenates the examples from the argument datasets | 504 dataset1 & dataset2 is a dataset that concatenates the examples from the argument datasets |
505 (and whose length is the sum of the length of the argument datasets). This only | 505 (and whose length is the sum of the length of the argument datasets). This only |
506 works if they all have the same fields. | 506 works if they all have the same fields. |
507 """ | 507 """ |
508 return VStackedDataSet(self,other) | 508 return VStackedDataSet([self,other]) |
509 | 509 |
510 def hstack(datasets): | 510 def hstack(datasets): |
511 """ | 511 """ |
512 hstack(dataset1,dataset2,...) returns dataset1 | datataset2 | ... | 512 hstack(dataset1,dataset2,...) returns dataset1 | datataset2 | ... |
513 which is a dataset whose fields list is the concatenation of the fields | 513 which is a dataset whose fields list is the concatenation of the fields |
1066 self.current=offset | 1066 self.current=offset |
1067 def __iter__(self): return self | 1067 def __iter__(self): return self |
1068 def next(self): | 1068 def next(self): |
1069 upper = self.current+minibatch_size | 1069 upper = self.current+minibatch_size |
1070 cache_len = len(self.dataset.cached_examples) | 1070 cache_len = len(self.dataset.cached_examples) |
1071 if upper>=cache_len: # whole minibatch is not already in cache | 1071 if upper>cache_len: # whole minibatch is not already in cache |
1072 # cache everything from current length to upper | 1072 # cache everything from current length to upper |
1073 for example in self.dataset.source_dataset[cache_len:upper]: | 1073 for example in self.dataset.source_dataset[cache_len:upper]: |
1074 self.dataset.cached_examples.append(example) | 1074 self.dataset.cached_examples.append(example) |
1075 all_fields_minibatch = Example(self.dataset.fieldNames(), | 1075 all_fields_minibatch = Example(self.dataset.fieldNames(), |
1076 self.dataset.cached_examples[self.current:self.current+minibatch_size]) | 1076 self.dataset.cached_examples[self.current:self.current+minibatch_size]) |