# HG changeset patch # User Yoshua Bengio # Date 1213646819 14400 # Node ID 09140ba68e17501e9f676db12b9ddf0fbaac2e08 # Parent 2480024bf401aa87f8563c3c4b07e95d81da5c72 Added untested RenamedFieldsDataSet diff -r 2480024bf401 -r 09140ba68e17 dataset.py --- a/dataset.py Mon Jun 16 12:57:32 2008 -0400 +++ b/dataset.py Mon Jun 16 16:06:59 2008 -0400 @@ -664,7 +664,50 @@ def dontuse__getitem__(self,i): return FieldsSubsetDataSet(self.src[i],self.fieldnames) - +class RenamedFieldsDataSet(DataSet): + """ + A sub-class of L{DataSet} that selects and renames a subset of the fields. + """ + def __init__(self,src,src_fieldnames,new_fieldnames): + self.src=src + self.src_fieldnames=src_fieldnames + self.new_fieldnames=new_fieldnames + assert src.hasFields(*src_fieldnames) + assert len(src_fieldnames)==len(new_fieldnames) + self.valuesHStack = src.valuesHStack + self.valuesVStack = src.valuesVStack + + def __len__(self): return len(self.src) + + def fieldNames(self): + return self.new_fieldnames + + def __iter__(self): + class FieldsSubsetIterator(object): + def __init__(self,ds): + self.ds=ds + self.src_iter=ds.src.__iter__() + self.example=None + def __iter__(self): return self + def next(self): + complete_example = self.src_iter.next() + if self.example: + self.example._values=[complete_example[field] + for field in self.ds.src_fieldnames] + else: + self.example=Example(self.ds.new_fieldnames, + [complete_example[field] + for field in self.ds.src_fieldnames]) + return self.example + return FieldsSubsetIterator(self) + + def minibatches_nowrap(self,fieldnames,minibatch_size,n_batches,offset): + assert self.hasFields(*fieldnames) + return self.src.minibatches_nowrap(fieldnames,minibatch_size,n_batches,offset) + def __getitem__(self,i): + return FieldsSubsetDataSet(self.src[i],self.new_fieldnames) + + class DataSetFields(Example): """ Although a L{DataSet} iterates over examples (like rows of a matrix), an associated