diff pylearn/sandbox/random_transformation.py @ 537:b054271b2504

new file structure layout, factories, etc.
author James Bergstra <bergstrj@iro.umontreal.ca>
date Wed, 12 Nov 2008 21:57:54 -0500
parents random_transformation.py@18702ceb2096
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/sandbox/random_transformation.py	Wed Nov 12 21:57:54 2008 -0500
@@ -0,0 +1,132 @@
+"""
+New L{Op}s that aren't in core theano
+"""
+
+from theano import sparse
+from theano import tensor
+from theano import scalar
+from theano.gof import op
+
+from theano.sparse import _is_dense, _is_sparse, _is_dense_result, _is_sparse_result
+
+import scipy.sparse
+
+import numpy
+
+class RowRandomTransformation(op.Op):
+    """
+    Given C{x}, a (sparse) matrix with shape (exmpls, dimensions), we
+    multiply it by a deterministic random matrix of shape (dimensions,
+    length) to obtain random transformation output of shape (exmpls,
+    length).
+
+    Each element of the deterministic random matrix is selected uniformly
+    from [-1, +1).
+    @todo: Use another random distribution?
+
+    @note: This function should be written such that if length is
+    increased, we obtain the same results (except longer). Similarly,
+    the rows should be able to be permuted and get the same result in
+    the same fashion.
+
+    @todo: This may be slow?
+    @todo: Rewrite for dense matrices too?
+    @todo: Is there any way to verify the convention that each row is
+    an example? Should I rename the variables in the code to make the
+    semantics more explicit?
+    @todo: AUTOTEST: Autotest that dense and spare versions of this are identical.
+    @todo: Rename? Is Row the correct name? Maybe column-wise?
+
+    @type  x: L{scipy.sparse.spmatrix}
+    @param x: Sparse matrix to be randomly transformed with shape (exmpls, dimensions)
+    @type  length: int
+    @param length: The number of transformations of C{x} to be performed.
+    @param initial_seed: Initial seed for the RNG.
+    @rtype: L{numpy.ndarray}
+    @return: Array with C{length} random transformations, with shape (exmpls, length)
+    """
+
+    import random
+    """
+    RNG used for random transformations.
+    Does not share state with rest of program.
+    @todo: Make STATIC and private. Ask James or Olivier how to make this more Pythonic.
+    """
+    _trng = random.Random()
+
+    def __init__(self, x, length, initial_seed=0, **kwargs):
+        """
+        @todo: Which broadcastable values should I use?
+        """
+        assert 0        # Needs to be updated to Olivier's new Op creation approach
+        op.Op.__init__(self, **kwargs)
+        x = sparse.as_sparse(x)
+        self.initial_seed = initial_seed
+        self.length = length
+        self.inputs = [x]
+        self.outputs = [tensor.Tensor(x.dtype, broadcastable=[False, False])]
+#        self.outputs = [tensor.Tensor(x.dtype, broadcastable=[True, True])]
+
+    def _random_matrix_value(self, row, col, rows):
+        """
+        From a deterministic random matrix, find one element.
+        @param row: The row of the element to be read.
+        @param col: The column of the element to be read.
+        @param row: The number of rows in the matrix.
+        @type row: int
+        @type col: int
+        @type rows: int
+        @note: This function is designed such that if we extend
+        the number of columns in the random matrix, the values of
+        the earlier entries is unchanged.
+        @todo: Make this static
+        """
+        # Choose the random entry at (l, c)
+        rngidx = col * rows + row
+        # Set the random number state for this random entry
+        # Note: This may be slow
+        self._trng.seed(rngidx + self.initial_seed)
+
+        # Determine the value for this entry
+        val = self._trng.uniform(-1, +1)
+#       print "Exmpl #%d, dimension #%d => Random projection #%d has idx %d (+ seed %d) and value %f" % (r, c, j, rngidx, self.initial_seed, val)
+        return val
+
+    def impl(self, xorig):
+        assert _is_sparse(xorig)
+        assert len(xorig.shape) == 2
+        # Since conversions to and from the COO format are quite fast, you
+        # can use this approach to efficiently implement lots computations
+        # on sparse matrices.
+        x = xorig.tocoo()
+        (rows, cols) = x.shape
+        tot = rows * cols
+        out = numpy.zeros((rows, self.length))
+#        print "l = %d" % self.length
+#        print "x.getnnz() = %d" % x.getnnz()
+        all = zip(x.col, x.row, x.data)
+        all.sort()      # TODO: Maybe this is very slow?
+        lastc = None
+        lastl = None
+        lastval = None
+        for l in range(self.length):
+            for (c, r, data) in all:
+                assert c < cols
+                assert r < rows
+                if not c == lastc or not l == lastl:
+                    lastc = c
+                    lastl = l
+                    lastval = self._random_matrix_value(c, l, cols)
+                val = lastval
+#                val = self._random_matrix_value(c, l, cols)
+#                val = self._trng.uniform(-1, +1)
+#                val = 1.0
+                out[r][l] += val * data
+        return out
+    def __copy__(self):
+        return self.__class__(self.inputs[0], self.length, self.initial_seed)
+    def clone_with_new_inputs(self, *new_inputs):
+        return self.__class__(new_inputs[0], self.length, self.initial_seed)
+    def desc(self, *new_inputs):
+        return (self.__class__, self.length, self.initial_seed)
+row_random_transformation = RowRandomTransformation()