comparison dataset.py @ 80:40476a7746e8

bugfix
author Frederic Bastien <bastienf@iro.umontreal.ca>
date Mon, 05 May 2008 10:56:58 -0400
parents dde1fb1b63ba
children 158653a9bc7c
comparison
equal deleted inserted replaced
79:427e02ef0629 80:40476a7746e8
257 Using the third syntax, i1, i2, i3 will be list-like containers of the 257 Using the third syntax, i1, i2, i3 will be list-like containers of the
258 f1, f2, and f3 fields of a batch of examples on each loop iteration. 258 f1, f2, and f3 fields of a batch of examples on each loop iteration.
259 259
260 The minibatches iterator is expected to return upon each call to next() 260 The minibatches iterator is expected to return upon each call to next()
261 a DataSetFields object, which is a LookupList (indexed by the field names) whose 261 a DataSetFields object, which is a LookupList (indexed by the field names) whose
262 elements are iterable over the minibatch examples, and which keeps a pointer to 262 elements are iterable and indexable over the minibatch examples, and which keeps a pointer to
263 a sub-dataset that can be used to iterate over the individual examples 263 a sub-dataset that can be used to iterate over the individual examples
264 in the minibatch. Hence a minibatch can be converted back to a regular 264 in the minibatch. Hence a minibatch can be converted back to a regular
265 dataset or its fields can be looked at individually (and possibly iterated over). 265 dataset or its fields can be looked at individually (and possibly iterated over).
266 266
267 PARAMETERS 267 PARAMETERS
607 607
608 def __len__(self): 608 def __len__(self):
609 return self.length 609 return self.length
610 610
611 def __getitem__(self,i): 611 def __getitem__(self,i):
612 if type(i) in (int,slice,list): 612 if type(i) in (slice,list):
613 return DataSetFields(MinibatchDataSet( 613 return DataSetFields(MinibatchDataSet(
614 Example(self._fields.keys(),[field[i] for field in self._fields])),self._fields) 614 Example(self._fields.keys(),[field[i] for field in self._fields])),self.fieldNames())
615 if type(i) is int:
616 return DataSetFields(MinibatchDataSet(
617 Example(self._fields.keys(),[[field[i]] for field in self._fields])),self.fieldNames())
618
615 if self.hasFields(i): 619 if self.hasFields(i):
616 return self._fields[i] 620 return self._fields[i]
617 assert i in self.__dict__ # else it means we are trying to access a non-existing property 621 assert i in self.__dict__ # else it means we are trying to access a non-existing property
618 return self.__dict__[i] 622 return self.__dict__[i]
619 623
916 return self.fields_columns.keys() 920 return self.fields_columns.keys()
917 921
918 def __len__(self): 922 def __len__(self):
919 return len(self.data) 923 return len(self.data)
920 924
921 def __getitem__(self,i): 925 def __getitem__(self,key):
922 """More efficient implementation than the default __getitem__""" 926 """More efficient implementation than the default __getitem__"""
923 fieldnames=self.fields_columns.keys() 927 fieldnames=self.fields_columns.keys()
924 if type(i) is int: 928 if type(key) is int:
925 return Example(fieldnames, 929 return Example(fieldnames,
926 [self.data[i,self.fields_columns[f]] for f in fieldnames]) 930 [self.data[key,self.fields_columns[f]] for f in fieldnames])
927 if type(i) in (slice,list): 931 if type(key) is slice:
928 return MinibatchDataSet(Example(fieldnames, 932 return MinibatchDataSet(Example(fieldnames,
929 [self.data[i,self.fields_columns[f]] for f in fieldnames])) 933 [self.data[key,self.fields_columns[f]] for f in fieldnames]))
934 if type(key) is list:
935 for i in range(len(key)):
936 if self.hasFields(key[i]):
937 key[i]=self.fields_columns[key[i]]
938 return MinibatchDataSet(Example(fieldnames,
939 [self.data[key,self.fields_columns[f]] for f in fieldnames]))
940
930 # else check for a fieldname 941 # else check for a fieldname
931 if self.hasFields(i): 942 if self.hasFields(key):
932 return Example([i],[self.data[self.fields_columns[i],:]]) 943 return self.data[self.fields_columns[key],:]
933 # else we are trying to access a property of the dataset 944 # else we are trying to access a property of the dataset
934 assert i in self.__dict__ # else it means we are trying to access a non-existing property 945 assert key in self.__dict__ # else it means we are trying to access a non-existing property
935 return self.__dict__[i] 946 return self.__dict__[key]
936 947
937 948
938 def minibatches_nowrap(self,fieldnames,minibatch_size,n_batches,offset): 949 def minibatches_nowrap(self,fieldnames,minibatch_size,n_batches,offset):
939 class ArrayDataSetIterator(object): 950 class ArrayDataSetIterator(object):
940 def __init__(self,dataset,fieldnames,minibatch_size,n_batches,offset): 951 def __init__(self,dataset,fieldnames,minibatch_size,n_batches,offset):