# HG changeset patch # User Thierry Bertin-Mahieux # Date 1213217943 14400 # Node ID e2eab74b6a2837dea5506d624ddf181d275e76a6 # Parent 14081904d8f3add01628700f9019c8cfc4b8f30d NArraysDataSet, a generalization ArrayDataSet where every field is a ndarray, is implemented. Not really tested aside basic stuff... diff -r 14081904d8f3 -r e2eab74b6a28 dataset.py --- a/dataset.py Wed Jun 11 16:40:47 2008 -0400 +++ b/dataset.py Wed Jun 11 16:59:03 2008 -0400 @@ -1056,24 +1056,24 @@ Every tensor is treated as a numpy matrix (using numpy.asmatrix) """ - ArrayFieldsDataSet.__init(self,**kwargs) + ArrayFieldsDataSet.__init__(self,**kwargs) assert len(data_arrays) == len(fieldnames) assert len(fieldnames) > 0 num_examples = numpy.asmatrix(data_arrays[0]).shape[0] - for k in range(data_arrays) : + for k in range(len(data_arrays)) : assert numpy.asmatrix(data_arrays[k]).shape[0] == num_examples self._fieldnames = fieldnames self._datas = [] - for k in range(data_arrays) : + for k in range(len(data_arrays)) : self._datas.append( numpy.asmatrix(data_arrays[k]) ) - raise NotImplemented + #raise NotImplemented def __len__(self) : """ Length of the dataset is based on the first array = data_arrays[0], using its shape """ - return self.datas[0].shape[0] + return self._datas[0].shape[0] def fieldNames(self) : """ @@ -1081,6 +1081,23 @@ """ return self._fieldnames + def minibatches_nowrap(self,fieldnames,minibatch_size,n_batches,offset): + cursor = Example(fieldnames,[0]*len(fieldnames)) + fieldnames = self.fieldNames() if fieldnames is None else fieldnames + for n in xrange(n_batches): + if offset == len(self): + break + for f in range(len(cursor._names)) : + idx = self._fieldnames.index(cursor._names[f]) + assert idx >= 0 + sub_data = self._datas[f][offset : offset+minibatch_size] + cursor._values[f] = sub_data + offset += len(sub_data) #can be less than minibatch_size at end + yield cursor + + #return ArrayDataSetIterator(self,fieldnames,minibatch_size,n_batches,offset) + + class ArrayDataSet(ArrayFieldsDataSet):