diff dataset.py @ 167:4803cb76e26b

Updated documentation
author Joseph Turian <turian@gmail.com>
date Mon, 12 May 2008 18:51:42 -0400
parents ee11ed427ba8
children 895b4b60f5e8
line wrap: on
line diff
--- a/dataset.py	Mon May 12 18:40:17 2008 -0400
+++ b/dataset.py	Mon May 12 18:51:42 2008 -0400
@@ -530,7 +530,7 @@
 
 class FieldsSubsetDataSet(DataSet):
     """
-    A sub-class of DataSet that selects a subset of the fields.
+    A sub-class of L{DataSet} that selects a subset of the fields.
     """
     def __init__(self,src,fieldnames):
         self.src=src
@@ -571,7 +571,7 @@
         
 class DataSetFields(LookupList):
     """
-    Although a DataSet iterates over examples (like rows of a matrix), an associated
+    Although a L{DataSet} iterates over examples (like rows of a matrix), an associated
     DataSetFields iterates over fields (like columns of a matrix), and can be understood
     as a transpose of the associated dataset.
 
@@ -638,7 +638,7 @@
     
 class MinibatchDataSet(DataSet):
     """
-    Turn a LookupList of same-length (iterable) fields into an example-iterable dataset.
+    Turn a L{LookupList} of same-length (iterable) fields into an example-iterable dataset.
     Each element of the lookup-list should be an iterable and sliceable, all of the same length.
     """
     def __init__(self,fields_lookuplist,values_vstack=DataSet().valuesVStack,
@@ -708,14 +708,14 @@
     
 class HStackedDataSet(DataSet):
     """
-    A DataSet that wraps several datasets and shows a view that includes all their fields,
+    A L{DataSet} that wraps several datasets and shows a view that includes all their fields,
     i.e. whose list of fields is the concatenation of their lists of fields.
 
     If a field name is found in more than one of the datasets, then either an error is
     raised or the fields are renamed (either by prefixing the __name__ attribute 
     of the dataset + ".", if it exists, or by suffixing the dataset index in the argument list).
 
-    TODO: automatically detect a chain of stacked datasets due to A | B | C | D ...
+    @todo: automatically detect a chain of stacked datasets due to A | B | C | D ...
     """
     def __init__(self,datasets,accept_nonunique_names=False,description=None,field_types=None):
         DataSet.__init__(self,description,field_types)
@@ -807,11 +807,11 @@
 
 class VStackedDataSet(DataSet):
     """
-    A DataSet that wraps several datasets and shows a view that includes all their examples,
+    A L{DataSet} that wraps several datasets and shows a view that includes all their examples,
     in the order provided. This clearly assumes that they all have the same field names
     and all (except possibly the last one) are of finite length.
 
-    TODO: automatically detect a chain of stacked datasets due to A + B + C + D ...
+    @todo: automatically detect a chain of stacked datasets due to A + B + C + D ...
     """
     def __init__(self,datasets):
         self.datasets=datasets
@@ -1025,7 +1025,7 @@
 
 class CachedDataSet(DataSet):
   """
-  Wrap a dataset whose values are computationally expensive to obtain
+  Wrap a L{DataSet} whose values are computationally expensive to obtain
   (e.g. because they involve some computation, or disk access),
   so that repeated accesses to the same example are done cheaply,
   by caching every example value that has been accessed at least once.
@@ -1034,10 +1034,10 @@
   (and cached) upon construction of the CachedDataSet, rather at the
   first access.
 
-  @todo when cache_all_upon_construction create mini-batches that are as 
+  @todo: when cache_all_upon_construction create mini-batches that are as 
   large as possible but not so large as to fill up memory.
   
-  @todo add disk-buffering capability, so that when the cache becomes too
+  @todo: add disk-buffering capability, so that when the cache becomes too
   big for memory, we cache things on disk, trying to keep in memory only
   the record most likely to be accessed next.
   """
@@ -1092,18 +1092,18 @@
                       
 class ApplyFunctionDataSet(DataSet):
   """
-  A dataset that contains as fields the results of applying a given function
-  example-wise or minibatch-wise to all the fields of an input dataset.
-  The output of the function should be an iterable (e.g. a list or a LookupList)
-  over the resulting values.
+  A L{DataSet} that contains as fields the results of applying a
+  given function example-wise or minibatch-wise to all the fields of
+  an input dataset.  The output of the function should be an iterable
+  (e.g. a list or a LookupList) over the resulting values.
 
-  In minibatch mode, the function is expected to work on minibatches (takes
-  a minibatch in input and returns a minibatch in output). More precisely,
-  it means that each element of the input or output list should be iterable
-  and indexable over the individual example values (typically these
-  elements will be numpy arrays). All of the elements in the input and
-  output lists should have the same length, which is the length of the
-  minibatch.
+  In minibatch mode, the function is expected to work on minibatches
+  (takes a minibatch in input and returns a minibatch in output). More
+  precisely, it means that each element of the input or output list
+  should be iterable and indexable over the individual example values
+  (typically these elements will be numpy arrays). All of the elements
+  in the input and output lists should have the same length, which is
+  the length of the minibatch.
 
   The function is applied each time an example or a minibatch is accessed.
   To avoid re-doing computation, wrap this dataset inside a CachedDataSet.
@@ -1186,9 +1186,10 @@
 
 def supervised_learning_dataset(src_dataset,input_fields,target_fields,weight_field=None):
     """
-    Wraps an arbitrary DataSet into one for supervised learning tasks by forcing the
-    user to define a set of fields as the 'input' field and a set of fields
-    as the 'target' field. Optionally, a single weight_field can also be defined.
+    Wraps an arbitrary L{DataSet} into one for supervised learning tasks
+    by forcing the user to define a set of fields as the 'input' field
+    and a set of fields as the 'target' field. Optionally, a single
+    weight_field can also be defined.
     """
     args = ((input_fields,'input'),(output_fields,'target'))
     if weight_field: args+=(([weight_field],'weight'))