comparison dataset.py @ 82:158653a9bc7c

Automated merge with ssh://p-omega1@lgcm.iro.umontreal.ca/tlearn
author Frederic Bastien <bastienf@iro.umontreal.ca>
date Mon, 05 May 2008 11:02:03 -0400
parents 3499918faa9d 40476a7746e8
children c0f211213a58
comparison
equal deleted inserted replaced
78:3499918faa9d 82:158653a9bc7c
279 Using the third syntax, i1, i2, i3 will be list-like containers of the 279 Using the third syntax, i1, i2, i3 will be list-like containers of the
280 f1, f2, and f3 fields of a batch of examples on each loop iteration. 280 f1, f2, and f3 fields of a batch of examples on each loop iteration.
281 281
282 The minibatches iterator is expected to return upon each call to next() 282 The minibatches iterator is expected to return upon each call to next()
283 a DataSetFields object, which is a LookupList (indexed by the field names) whose 283 a DataSetFields object, which is a LookupList (indexed by the field names) whose
284 elements are iterable over the minibatch examples, and which keeps a pointer to 284 elements are iterable and indexable over the minibatch examples, and which keeps a pointer to
285 a sub-dataset that can be used to iterate over the individual examples 285 a sub-dataset that can be used to iterate over the individual examples
286 in the minibatch. Hence a minibatch can be converted back to a regular 286 in the minibatch. Hence a minibatch can be converted back to a regular
287 dataset or its fields can be looked at individually (and possibly iterated over). 287 dataset or its fields can be looked at individually (and possibly iterated over).
288 288
289 PARAMETERS 289 PARAMETERS
630 630
631 def __len__(self): 631 def __len__(self):
632 return self.length 632 return self.length
633 633
634 def __getitem__(self,i): 634 def __getitem__(self,i):
635 if type(i) in (slice,list):
636 return DataSetFields(MinibatchDataSet(
637 Example(self._fields.keys(),[field[i] for field in self._fields])),self.fieldNames())
635 if type(i) is int: 638 if type(i) is int:
636 return Example(self._fields.keys(),[field[i] for field in self._fields]) 639 return DataSetFields(MinibatchDataSet(
637 if type(i) in (slice,list): 640 Example(self._fields.keys(),[[field[i]] for field in self._fields])),self.fieldNames())
638 return MinibatchDataSet(Example(self._fields.keys(), 641
639 [field[i] for field in self._fields]),
640 self.valuesVStack,self.valuesHStack)
641 if self.hasFields(i): 642 if self.hasFields(i):
642 return self._fields[i] 643 return self._fields[i]
643 assert i in self.__dict__ # else it means we are trying to access a non-existing property 644 assert i in self.__dict__ # else it means we are trying to access a non-existing property
644 return self.__dict__[i] 645 return self.__dict__[i]
645 646
937 return self.fields_columns.keys() 938 return self.fields_columns.keys()
938 939
939 def __len__(self): 940 def __len__(self):
940 return len(self.data) 941 return len(self.data)
941 942
942 def __getitem__(self,i): 943 def __getitem__(self,key):
943 """More efficient implementation than the default __getitem__""" 944 """More efficient implementation than the default __getitem__"""
944 fieldnames=self.fields_columns.keys() 945 fieldnames=self.fields_columns.keys()
945 if type(i) is int: 946 if type(key) is int:
946 return Example(fieldnames, 947 return Example(fieldnames,
947 [self.data[i,self.fields_columns[f]] for f in fieldnames]) 948 [self.data[key,self.fields_columns[f]] for f in fieldnames])
948 if type(i) in (slice,list): 949 if type(key) is slice:
949 return MinibatchDataSet(Example(fieldnames, 950 return MinibatchDataSet(Example(fieldnames,
950 [self.data[i,self.fields_columns[f]] for f in fieldnames]), 951 [self.data[key,self.fields_columns[f]] for f in fieldnames]))
952 if type(key) is list:
953 for i in range(len(key)):
954 if self.hasFields(key[i]):
955 key[i]=self.fields_columns[key[i]]
956 return MinibatchDataSet(Example(fieldnames,
957 [self.data[key,self.fields_columns[f]] for f in fieldnames]),
951 self.valuesVStack,self.valuesHStack) 958 self.valuesVStack,self.valuesHStack)
959
952 # else check for a fieldname 960 # else check for a fieldname
953 if self.hasFields(i): 961 if self.hasFields(key):
954 return Example([i],[self.data[self.fields_columns[i],:]]) 962 return self.data[self.fields_columns[key],:]
955 # else we are trying to access a property of the dataset 963 # else we are trying to access a property of the dataset
956 assert i in self.__dict__ # else it means we are trying to access a non-existing property 964 assert key in self.__dict__ # else it means we are trying to access a non-existing property
957 return self.__dict__[i] 965 return self.__dict__[key]
958 966
959 967
960 def minibatches_nowrap(self,fieldnames,minibatch_size,n_batches,offset): 968 def minibatches_nowrap(self,fieldnames,minibatch_size,n_batches,offset):
961 class ArrayDataSetIterator(object): 969 class ArrayDataSetIterator(object):
962 def __init__(self,dataset,fieldnames,minibatch_size,n_batches,offset): 970 def __init__(self,dataset,fieldnames,minibatch_size,n_batches,offset):