# HG changeset patch # User James Bergstra # Date 1296853405 18000 # Node ID 383d4c061546e5690ea7336ec43a6e2a637ebcf9 # Parent f49801e39fe311e702a7989ff5bddf7f441eb17e TensorFnDataset - cleaned up docs a little diff -r f49801e39fe3 -r 383d4c061546 pylearn/dataset_ops/protocol.py --- a/pylearn/dataset_ops/protocol.py Fri Feb 04 16:02:57 2011 -0500 +++ b/pylearn/dataset_ops/protocol.py Fri Feb 04 16:03:25 2011 -0500 @@ -67,18 +67,25 @@ ^ hash(self.batch_size)) class TensorFnDataset(TensorDataset): - """A good base class for TensorDatasets that can be read from disk and cached in memory + """A good base class for TensorDatasets that are backed by indexed objects. + E.g. numpy ndarrays and memmaps. - The dataset is accessed via a function call to make this Op pickle-able. If the function - is a normal module-level function, then this Op will be picklable. If the dataset were a - property of this Op, then pickling the Op would require pickling the entire dataset. + This Op looks up the dataset by a function call, rather than by storing it + as a member variable. This is done to make the graph serializable without + having to save the dataset itself, which is typically large. + + This Op is picklable if (and only if) the function that accesses the dataset + can be serialized. """ def __init__(self, dtype, bcast, fn, single_shape=None, batch_size=None): """ :type fn: callable or (callable, args) tuple [MUST BE PICKLABLE!] + :param fn: function that returns the dataset as a ndarray-like object. - :param fn: function that returns the dataset as a tensor. Leading index is the example - index, others are considered part of each example. + :type bcast: tuple of bool + :param bcast: the broadcastable flag for the return value if this op is + indexed by a scalar (the one example case) A (False,) will be + pre-pended to this pattern when the Op is indexed by a vector. """ super(TensorFnDataset, self).__init__(dtype, bcast, single_shape, batch_size) try: