comparison dataset.py @ 135:0d8e721cc63c

Fixed bugs in dataset to make test_mlp.py work
author Yoshua Bengio <bengioy@iro.umontreal.ca>
date Mon, 12 May 2008 14:30:21 -0400
parents 3f4e5c9bdc5e
children ceae4de18981
comparison
equal deleted inserted replaced
134:3f4e5c9bdc5e 135:0d8e721cc63c
427 return DataSet.MinibatchToSingleExampleIterator( 427 return DataSet.MinibatchToSingleExampleIterator(
428 self.minibatches(minibatch_size=1,n_batches=1,offset=i)).next() 428 self.minibatches(minibatch_size=1,n_batches=1,offset=i)).next()
429 rows=None 429 rows=None
430 # or a slice 430 # or a slice
431 if type(i) is slice: 431 if type(i) is slice:
432 if not i.start: i.start=0 432 if not i.start: i=slice(0,i.stop,i.step)
433 if not i.step: i.step=1 433 if not i.step: i=slice(i.start,i.stop,1)
434 if i.step is 1: 434 if i.step is 1:
435 return self.minibatches(minibatch_size=i.stop-i.start,n_batches=1,offset=i.start).next().examples() 435 return self.minibatches(minibatch_size=i.stop-i.start,n_batches=1,offset=i.start).next().examples()
436 rows = range(i.start,i.stop,i.step) 436 rows = range(i.start,i.stop,i.step)
437 # or a list of indices 437 # or a list of indices
438 elif type(i) is list: 438 elif type(i) is list:
495 def __or__(self,other): 495 def __or__(self,other):
496 """ 496 """
497 dataset1 | dataset2 returns a dataset whose list of fields is the concatenation of the list of 497 dataset1 | dataset2 returns a dataset whose list of fields is the concatenation of the list of
498 fields of the argument datasets. This only works if they all have the same length. 498 fields of the argument datasets. This only works if they all have the same length.
499 """ 499 """
500 return HStackedDataSet(self,other) 500 return HStackedDataSet([self,other])
501 501
502 def __and__(self,other): 502 def __and__(self,other):
503 """ 503 """
504 dataset1 & dataset2 is a dataset that concatenates the examples from the argument datasets 504 dataset1 & dataset2 is a dataset that concatenates the examples from the argument datasets
505 (and whose length is the sum of the length of the argument datasets). This only 505 (and whose length is the sum of the length of the argument datasets). This only
506 works if they all have the same fields. 506 works if they all have the same fields.
507 """ 507 """
508 return VStackedDataSet(self,other) 508 return VStackedDataSet([self,other])
509 509
510 def hstack(datasets): 510 def hstack(datasets):
511 """ 511 """
512 hstack(dataset1,dataset2,...) returns dataset1 | datataset2 | ... 512 hstack(dataset1,dataset2,...) returns dataset1 | datataset2 | ...
513 which is a dataset whose fields list is the concatenation of the fields 513 which is a dataset whose fields list is the concatenation of the fields
1066 self.current=offset 1066 self.current=offset
1067 def __iter__(self): return self 1067 def __iter__(self): return self
1068 def next(self): 1068 def next(self):
1069 upper = self.current+minibatch_size 1069 upper = self.current+minibatch_size
1070 cache_len = len(self.dataset.cached_examples) 1070 cache_len = len(self.dataset.cached_examples)
1071 if upper>=cache_len: # whole minibatch is not already in cache 1071 if upper>cache_len: # whole minibatch is not already in cache
1072 # cache everything from current length to upper 1072 # cache everything from current length to upper
1073 for example in self.dataset.source_dataset[cache_len:upper]: 1073 for example in self.dataset.source_dataset[cache_len:upper]:
1074 self.dataset.cached_examples.append(example) 1074 self.dataset.cached_examples.append(example)
1075 all_fields_minibatch = Example(self.dataset.fieldNames(), 1075 all_fields_minibatch = Example(self.dataset.fieldNames(),
1076 self.dataset.cached_examples[self.current:self.current+minibatch_size]) 1076 self.dataset.cached_examples[self.current:self.current+minibatch_size])