Mercurial > pylearn
diff dataset.py @ 258:19b14afe04b7
merged
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Tue, 03 Jun 2008 16:06:21 -0400 |
parents | 4ad6bc9b4f03 8ec867d12428 |
children | 6e69fb91f3c0 6226ebafefc3 |
line wrap: on
line diff
--- a/dataset.py Tue Jun 03 16:05:28 2008 -0400 +++ b/dataset.py Tue Jun 03 16:06:21 2008 -0400 @@ -1150,6 +1150,7 @@ def __init__(self,dataset): self.dataset=dataset self.current=offset + self.all_fields = self.dataset.fieldNames()==fieldnames def __iter__(self): return self def next(self): upper = self.current+minibatch_size @@ -1161,7 +1162,7 @@ all_fields_minibatch = Example(self.dataset.fieldNames(), zip(*self.dataset.cached_examples[self.current:self.current+minibatch_size])) self.current+=minibatch_size - if self.dataset.fieldNames()==fieldnames: + if self.all_fields: return all_fields_minibatch return Example(fieldnames,[all_fields_minibatch[name] for name in fieldnames]) return CacheIterator(self) @@ -1170,8 +1171,31 @@ if type(i)==int and len(self.cached_examples)>i: return self.cached_examples[i] else: - return DataSet.__getitem__(self,i) - + return self.source_dataset[i] + + def __iter__(self): + class CacheIteratorIter(object): + def __init__(self,dataset): + self.dataset=dataset + self.l = len(dataset) + self.current = 0 + self.fieldnames = self.dataset.fieldNames() + self.example = LookupList(self.fieldnames,[0]*len(self.fieldnames)) + def __iter__(self): return self + def next(self): + if self.current>=self.l: + raise StopIteration + cache_len = len(self.dataset.cached_examples) + if self.current>=cache_len: # whole minibatch is not already in cache + # cache everything from current length to upper + self.dataset.cached_examples.append( + self.dataset.source_dataset[self.current]) + self.example._values = self.dataset.cached_examples[self.current] + self.current+=1 + return self.example + + return CacheIteratorIter(self) + class ApplyFunctionDataSet(DataSet): """ A L{DataSet} that contains as fields the results of applying a