changeset 1418:383d4c061546

TensorFnDataset - cleaned up docs a little
author James Bergstra <bergstrj@iro.umontreal.ca>
date Fri, 04 Feb 2011 16:03:25 -0500
parents f49801e39fe3
children cff305ad9f60
files pylearn/dataset_ops/protocol.py
diffstat 1 files changed, 13 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/dataset_ops/protocol.py	Fri Feb 04 16:02:57 2011 -0500
+++ b/pylearn/dataset_ops/protocol.py	Fri Feb 04 16:03:25 2011 -0500
@@ -67,18 +67,25 @@
                 ^ hash(self.batch_size))
 
 class TensorFnDataset(TensorDataset):
-    """A good base class for TensorDatasets that can be read from disk and cached in memory
+    """A good base class for TensorDatasets that are backed by indexed objects.
+    E.g. numpy ndarrays and memmaps.
 
-    The dataset is accessed via a function call to make this Op pickle-able.  If the function
-    is a normal module-level function, then this Op will be picklable.  If the dataset were a
-    property of this Op, then pickling the Op would require pickling the entire dataset.
+    This Op looks up the dataset by a function call, rather than by storing it
+    as a member variable.  This is done to make the graph serializable without
+    having to save the dataset itself, which is typically large.
+
+    This Op is picklable if (and only if) the function that accesses the dataset
+    can be serialized.
     """
     def __init__(self, dtype, bcast, fn, single_shape=None, batch_size=None):
         """
         :type fn: callable or (callable, args) tuple [MUST BE PICKLABLE!]
+        :param fn: function that returns the dataset as a ndarray-like object.
 
-        :param fn: function that returns the dataset as a tensor. Leading index is the example
-        index, others are considered part of each example.
+        :type bcast: tuple of bool
+        :param bcast: the broadcastable flag for the return value if this op is
+            indexed by a scalar (the one example case)  A (False,) will be
+            pre-pended to this pattern when the Op is indexed by a vector.
         """
         super(TensorFnDataset, self).__init__(dtype, bcast, single_shape, batch_size)
         try: