Mercurial > pylearn
comparison dataset.py @ 309:923de30457f0
get item now returns LookupLists
author | Thierry Bertin-Mahieux <bertinmt@iro.umontreal.ca> |
---|---|
date | Wed, 11 Jun 2008 11:18:14 -0400 |
parents | f5d33f9c0b9c |
children | ebccfd05ccd5 |
comparison
equal
deleted
inserted
replaced
308:9ebc960260c5 | 309:923de30457f0 |
---|---|
435 @returns: single or multiple examples | 435 @returns: single or multiple examples |
436 | 436 |
437 @type i: integer or slice or <iterable> of integers | 437 @type i: integer or slice or <iterable> of integers |
438 @param i: | 438 @param i: |
439 dataset[i] returns the (i+1)-th example of the dataset. | 439 dataset[i] returns the (i+1)-th example of the dataset. |
440 dataset[i:j] returns the subdataset with examples i,i+1,...,j-1. | 440 dataset[i:j] returns a LookupList with examples i,i+1,...,j-1. |
441 dataset[i:j:s] returns the subdataset with examples i,i+2,i+4...,j-2. | 441 dataset[i:j:s] returns a LookupList with examples i,i+2,i+4...,j-2. |
442 dataset[[i1,i2,..,in]] returns the subdataset with examples i1,i2,...,in. | 442 dataset[[i1,i2,..,in]] returns a LookupList with examples i1,i2,...,in. |
443 | 443 |
444 @note: | 444 @note: |
445 Some stream datasets may be unable to implement random access, i.e. | 445 Some stream datasets may be unable to implement random access, i.e. |
446 arbitrary slicing/indexing because they can only iterate through | 446 arbitrary slicing/indexing because they can only iterate through |
447 examples one or a minibatch at a time and do not actually store or keep | 447 examples one or a minibatch at a time and do not actually store or keep |
452 always be the most efficient way to obtain the result, especially if | 452 always be the most efficient way to obtain the result, especially if |
453 the data are actually stored in a memory array. | 453 the data are actually stored in a memory array. |
454 """ | 454 """ |
455 | 455 |
456 if type(i) is int: | 456 if type(i) is int: |
457 #TODO: consider asserting that i >= 0 | 457 assert i >= 0 # TBM: see if someone complains and want negative i |
458 i_batch = self.minibatches_nowrap(self.fieldNames(), | 458 i_batch = self.minibatches_nowrap(self.fieldNames(), |
459 minibatch_size=1, n_batches=1, offset=i) | 459 minibatch_size=1, n_batches=1, offset=i) |
460 return DataSet.MinibatchToSingleExampleIterator(i_batch).next() | 460 return DataSet.MinibatchToSingleExampleIterator(i_batch).next() |
461 | 461 |
462 #if i is a contiguous slice | 462 #if i is a contiguous slice |
463 if type(i) is slice and (i.step in (None, 1)): | 463 if type(i) is slice and (i.step in (None, 1)): |
464 offset = 0 if i.start is None else i.start | 464 offset = 0 if i.start is None else i.start |
465 upper_bound = len(self) if i.stop is None else i.stop | 465 upper_bound = len(self) if i.stop is None else i.stop |
466 return MinibatchDataSet(self.minibatches_nowrap(self.fieldNames(), | 466 #return MinibatchDataSet(self.minibatches_nowrap(self.fieldNames(), |
467 # minibatch_size=upper_bound - offset, | |
468 # n_batches=1, | |
469 # offset=offset).next()) | |
470 # now returns a LookupList | |
471 return self.minibatches_nowrap(self.fieldNames(), | |
467 minibatch_size=upper_bound - offset, | 472 minibatch_size=upper_bound - offset, |
468 n_batches=1, | 473 n_batches=1, |
469 offset=offset).next()) | 474 offset=offset).next() |
470 | 475 |
471 # if slice has a step param, convert it to list and handle it with the | 476 # if slice has a step param, convert it to list and handle it with the |
472 # list code | 477 # list code |
473 if type(i) is slice: | 478 if type(i) is slice: |
474 offset = 0 if i.start is None else i.start | 479 offset = 0 if i.start is None else i.start |
492 f.append(v) | 497 f.append(v) |
493 #build them into a LookupList (a.ka. Example) | 498 #build them into a LookupList (a.ka. Example) |
494 zz = zip(self.fieldNames(),field_values) | 499 zz = zip(self.fieldNames(),field_values) |
495 vst = [self.valuesVStack(fieldname,field_values) for fieldname,field_values in zz] | 500 vst = [self.valuesVStack(fieldname,field_values) for fieldname,field_values in zz] |
496 example = Example(self.fieldNames(), vst) | 501 example = Example(self.fieldNames(), vst) |
497 return MinibatchDataSet(example, self.valuesVStack, self.valuesHStack) | 502 #return MinibatchDataSet(example, self.valuesVStack, self.valuesHStack) |
503 # now returns a LookupList | |
504 return example | |
505 | |
506 # what in the world is i? | |
498 raise TypeError(i, type(i)) | 507 raise TypeError(i, type(i)) |
499 | 508 |
500 def valuesHStack(self,fieldnames,fieldvalues): | 509 def valuesHStack(self,fieldnames,fieldvalues): |
501 """ | 510 """ |
502 Return a value that corresponds to concatenating (horizontally) several field values. | 511 Return a value that corresponds to concatenating (horizontally) several field values. |