Mercurial > pylearn
comparison dataset.py @ 82:158653a9bc7c
Automated merge with ssh://p-omega1@lgcm.iro.umontreal.ca/tlearn
author | Frederic Bastien <bastienf@iro.umontreal.ca> |
---|---|
date | Mon, 05 May 2008 11:02:03 -0400 |
parents | 3499918faa9d 40476a7746e8 |
children | c0f211213a58 |
comparison
equal
deleted
inserted
replaced
78:3499918faa9d | 82:158653a9bc7c |
---|---|
279 Using the third syntax, i1, i2, i3 will be list-like containers of the | 279 Using the third syntax, i1, i2, i3 will be list-like containers of the |
280 f1, f2, and f3 fields of a batch of examples on each loop iteration. | 280 f1, f2, and f3 fields of a batch of examples on each loop iteration. |
281 | 281 |
282 The minibatches iterator is expected to return upon each call to next() | 282 The minibatches iterator is expected to return upon each call to next() |
283 a DataSetFields object, which is a LookupList (indexed by the field names) whose | 283 a DataSetFields object, which is a LookupList (indexed by the field names) whose |
284 elements are iterable over the minibatch examples, and which keeps a pointer to | 284 elements are iterable and indexable over the minibatch examples, and which keeps a pointer to |
285 a sub-dataset that can be used to iterate over the individual examples | 285 a sub-dataset that can be used to iterate over the individual examples |
286 in the minibatch. Hence a minibatch can be converted back to a regular | 286 in the minibatch. Hence a minibatch can be converted back to a regular |
287 dataset or its fields can be looked at individually (and possibly iterated over). | 287 dataset or its fields can be looked at individually (and possibly iterated over). |
288 | 288 |
289 PARAMETERS | 289 PARAMETERS |
630 | 630 |
631 def __len__(self): | 631 def __len__(self): |
632 return self.length | 632 return self.length |
633 | 633 |
634 def __getitem__(self,i): | 634 def __getitem__(self,i): |
635 if type(i) in (slice,list): | |
636 return DataSetFields(MinibatchDataSet( | |
637 Example(self._fields.keys(),[field[i] for field in self._fields])),self.fieldNames()) | |
635 if type(i) is int: | 638 if type(i) is int: |
636 return Example(self._fields.keys(),[field[i] for field in self._fields]) | 639 return DataSetFields(MinibatchDataSet( |
637 if type(i) in (slice,list): | 640 Example(self._fields.keys(),[[field[i]] for field in self._fields])),self.fieldNames()) |
638 return MinibatchDataSet(Example(self._fields.keys(), | 641 |
639 [field[i] for field in self._fields]), | |
640 self.valuesVStack,self.valuesHStack) | |
641 if self.hasFields(i): | 642 if self.hasFields(i): |
642 return self._fields[i] | 643 return self._fields[i] |
643 assert i in self.__dict__ # else it means we are trying to access a non-existing property | 644 assert i in self.__dict__ # else it means we are trying to access a non-existing property |
644 return self.__dict__[i] | 645 return self.__dict__[i] |
645 | 646 |
937 return self.fields_columns.keys() | 938 return self.fields_columns.keys() |
938 | 939 |
939 def __len__(self): | 940 def __len__(self): |
940 return len(self.data) | 941 return len(self.data) |
941 | 942 |
942 def __getitem__(self,i): | 943 def __getitem__(self,key): |
943 """More efficient implementation than the default __getitem__""" | 944 """More efficient implementation than the default __getitem__""" |
944 fieldnames=self.fields_columns.keys() | 945 fieldnames=self.fields_columns.keys() |
945 if type(i) is int: | 946 if type(key) is int: |
946 return Example(fieldnames, | 947 return Example(fieldnames, |
947 [self.data[i,self.fields_columns[f]] for f in fieldnames]) | 948 [self.data[key,self.fields_columns[f]] for f in fieldnames]) |
948 if type(i) in (slice,list): | 949 if type(key) is slice: |
949 return MinibatchDataSet(Example(fieldnames, | 950 return MinibatchDataSet(Example(fieldnames, |
950 [self.data[i,self.fields_columns[f]] for f in fieldnames]), | 951 [self.data[key,self.fields_columns[f]] for f in fieldnames])) |
952 if type(key) is list: | |
953 for i in range(len(key)): | |
954 if self.hasFields(key[i]): | |
955 key[i]=self.fields_columns[key[i]] | |
956 return MinibatchDataSet(Example(fieldnames, | |
957 [self.data[key,self.fields_columns[f]] for f in fieldnames]), | |
951 self.valuesVStack,self.valuesHStack) | 958 self.valuesVStack,self.valuesHStack) |
959 | |
952 # else check for a fieldname | 960 # else check for a fieldname |
953 if self.hasFields(i): | 961 if self.hasFields(key): |
954 return Example([i],[self.data[self.fields_columns[i],:]]) | 962 return self.data[self.fields_columns[key],:] |
955 # else we are trying to access a property of the dataset | 963 # else we are trying to access a property of the dataset |
956 assert i in self.__dict__ # else it means we are trying to access a non-existing property | 964 assert key in self.__dict__ # else it means we are trying to access a non-existing property |
957 return self.__dict__[i] | 965 return self.__dict__[key] |
958 | 966 |
959 | 967 |
960 def minibatches_nowrap(self,fieldnames,minibatch_size,n_batches,offset): | 968 def minibatches_nowrap(self,fieldnames,minibatch_size,n_batches,offset): |
961 class ArrayDataSetIterator(object): | 969 class ArrayDataSetIterator(object): |
962 def __init__(self,dataset,fieldnames,minibatch_size,n_batches,offset): | 970 def __init__(self,dataset,fieldnames,minibatch_size,n_batches,offset): |