pylearn: dataset.py comparison

comparison dataset.py @ 156:cc8b032417db

merged

author	James Bergstra <bergstrj@iro.umontreal.ca>
date	Mon, 12 May 2008 16:17:22 -0400
parents	f8a1ae7eb83e
children	28a988bd19c3

comparison

equal deleted inserted replaced

-:ae5651a3696b
+:cc8b032417db
 the record most likely to be accessed next.
 """
 def __init__(self,source_dataset,cache_all_upon_construction=False):
 self.source_dataset=source_dataset
 self.cache_all_upon_construction=cache_all_upon_construction
+self.cached_examples = []
 if cache_all_upon_construction:
 # this potentially brings all the source examples
 # into memory at once, which may be too much
 # the work could possibly be done by minibatches
 # that are as large as possible but no more than what memory allows.
 fields_values = source_dataset.minibatches(minibatch_size=len(source_dataset)).__iter__().next()
-self.cached_examples = zip(*fields_values)
+assert all([len(self)==len(field_values) for field_values in fields_values])
-else:
+for example in fields_values.examples():
-self.cached_examples = []
+self.cached_examples.append(example)
 self.fieldNames = source_dataset.fieldNames
 self.hasFields = source_dataset.hasFields
 self.valuesHStack = source_dataset.valuesHStack
 self.valuesVStack = source_dataset.valuesVStack
 if upper>cache_len: # whole minibatch is not already in cache
 # cache everything from current length to upper
 for example in self.dataset.source_dataset[cache_len:upper]:
 self.dataset.cached_examples.append(example)
 all_fields_minibatch = Example(self.dataset.fieldNames(),
-*self.dataset.cached_examples[self.current:self.current+minibatch_size])
+zip(*self.dataset.cached_examples[self.current:self.current+minibatch_size]))
 if self.dataset.fieldNames()==fieldnames:
 return all_fields_minibatch
 return Example(fieldnames,[all_fields_minibatch[name] for name in fieldnames])
 return CacheIterator(self)
+def __getitem__(self,i):
+if type(i)==int and len(self.cached_examples)>i:
+return self.cached_examples[i]
+else:
+return DataSet.__getitem__(self,i)
 class ApplyFunctionDataSet(DataSet):
 """
 A dataset that contains as fields the results of applying a given function
 example-wise or minibatch-wise to all the fields of an input dataset.

Mercurial > pylearn

comparison dataset.py @ 156:cc8b032417db