Mercurial > pylearn
comparison dataset.py @ 80:40476a7746e8
bugfix
author | Frederic Bastien <bastienf@iro.umontreal.ca> |
---|---|
date | Mon, 05 May 2008 10:56:58 -0400 |
parents | dde1fb1b63ba |
children | 158653a9bc7c |
comparison
equal
deleted
inserted
replaced
79:427e02ef0629 | 80:40476a7746e8 |
---|---|
257 Using the third syntax, i1, i2, i3 will be list-like containers of the | 257 Using the third syntax, i1, i2, i3 will be list-like containers of the |
258 f1, f2, and f3 fields of a batch of examples on each loop iteration. | 258 f1, f2, and f3 fields of a batch of examples on each loop iteration. |
259 | 259 |
260 The minibatches iterator is expected to return upon each call to next() | 260 The minibatches iterator is expected to return upon each call to next() |
261 a DataSetFields object, which is a LookupList (indexed by the field names) whose | 261 a DataSetFields object, which is a LookupList (indexed by the field names) whose |
262 elements are iterable over the minibatch examples, and which keeps a pointer to | 262 elements are iterable and indexable over the minibatch examples, and which keeps a pointer to |
263 a sub-dataset that can be used to iterate over the individual examples | 263 a sub-dataset that can be used to iterate over the individual examples |
264 in the minibatch. Hence a minibatch can be converted back to a regular | 264 in the minibatch. Hence a minibatch can be converted back to a regular |
265 dataset or its fields can be looked at individually (and possibly iterated over). | 265 dataset or its fields can be looked at individually (and possibly iterated over). |
266 | 266 |
267 PARAMETERS | 267 PARAMETERS |
607 | 607 |
608 def __len__(self): | 608 def __len__(self): |
609 return self.length | 609 return self.length |
610 | 610 |
611 def __getitem__(self,i): | 611 def __getitem__(self,i): |
612 if type(i) in (int,slice,list): | 612 if type(i) in (slice,list): |
613 return DataSetFields(MinibatchDataSet( | 613 return DataSetFields(MinibatchDataSet( |
614 Example(self._fields.keys(),[field[i] for field in self._fields])),self._fields) | 614 Example(self._fields.keys(),[field[i] for field in self._fields])),self.fieldNames()) |
615 if type(i) is int: | |
616 return DataSetFields(MinibatchDataSet( | |
617 Example(self._fields.keys(),[[field[i]] for field in self._fields])),self.fieldNames()) | |
618 | |
615 if self.hasFields(i): | 619 if self.hasFields(i): |
616 return self._fields[i] | 620 return self._fields[i] |
617 assert i in self.__dict__ # else it means we are trying to access a non-existing property | 621 assert i in self.__dict__ # else it means we are trying to access a non-existing property |
618 return self.__dict__[i] | 622 return self.__dict__[i] |
619 | 623 |
916 return self.fields_columns.keys() | 920 return self.fields_columns.keys() |
917 | 921 |
918 def __len__(self): | 922 def __len__(self): |
919 return len(self.data) | 923 return len(self.data) |
920 | 924 |
921 def __getitem__(self,i): | 925 def __getitem__(self,key): |
922 """More efficient implementation than the default __getitem__""" | 926 """More efficient implementation than the default __getitem__""" |
923 fieldnames=self.fields_columns.keys() | 927 fieldnames=self.fields_columns.keys() |
924 if type(i) is int: | 928 if type(key) is int: |
925 return Example(fieldnames, | 929 return Example(fieldnames, |
926 [self.data[i,self.fields_columns[f]] for f in fieldnames]) | 930 [self.data[key,self.fields_columns[f]] for f in fieldnames]) |
927 if type(i) in (slice,list): | 931 if type(key) is slice: |
928 return MinibatchDataSet(Example(fieldnames, | 932 return MinibatchDataSet(Example(fieldnames, |
929 [self.data[i,self.fields_columns[f]] for f in fieldnames])) | 933 [self.data[key,self.fields_columns[f]] for f in fieldnames])) |
934 if type(key) is list: | |
935 for i in range(len(key)): | |
936 if self.hasFields(key[i]): | |
937 key[i]=self.fields_columns[key[i]] | |
938 return MinibatchDataSet(Example(fieldnames, | |
939 [self.data[key,self.fields_columns[f]] for f in fieldnames])) | |
940 | |
930 # else check for a fieldname | 941 # else check for a fieldname |
931 if self.hasFields(i): | 942 if self.hasFields(key): |
932 return Example([i],[self.data[self.fields_columns[i],:]]) | 943 return self.data[self.fields_columns[key],:] |
933 # else we are trying to access a property of the dataset | 944 # else we are trying to access a property of the dataset |
934 assert i in self.__dict__ # else it means we are trying to access a non-existing property | 945 assert key in self.__dict__ # else it means we are trying to access a non-existing property |
935 return self.__dict__[i] | 946 return self.__dict__[key] |
936 | 947 |
937 | 948 |
938 def minibatches_nowrap(self,fieldnames,minibatch_size,n_batches,offset): | 949 def minibatches_nowrap(self,fieldnames,minibatch_size,n_batches,offset): |
939 class ArrayDataSetIterator(object): | 950 class ArrayDataSetIterator(object): |
940 def __init__(self,dataset,fieldnames,minibatch_size,n_batches,offset): | 951 def __init__(self,dataset,fieldnames,minibatch_size,n_batches,offset): |