pylearn: random_transformation.py comparison

comparison random_transformation.py @ 356:18702ceb2096

Added more functions

author	Joseph Turian <turian@iro.umontreal.ca>
date	Thu, 19 Jun 2008 16:18:37 -0400
parents
children

comparison

equal deleted inserted replaced

-:430c9e92cd23
+:18702ceb2096
+"""
+New L{Op}s that aren't in core theano
+"""
+from theano import sparse
+from theano import tensor
+from theano import scalar
+from theano.gof import op
+from theano.sparse import _is_dense, _is_sparse, _is_dense_result, _is_sparse_result
+import scipy.sparse
+import numpy
+class RowRandomTransformation(op.Op):
+"""
+Given C{x}, a (sparse) matrix with shape (exmpls, dimensions), we
+multiply it by a deterministic random matrix of shape (dimensions,
+length) to obtain random transformation output of shape (exmpls,
+length).
+Each element of the deterministic random matrix is selected uniformly
+from [-1, +1).
+@todo: Use another random distribution?
+@note: This function should be written such that if length is
+increased, we obtain the same results (except longer). Similarly,
+the rows should be able to be permuted and get the same result in
+the same fashion.
+@todo: This may be slow?
+@todo: Rewrite for dense matrices too?
+@todo: Is there any way to verify the convention that each row is
+an example? Should I rename the variables in the code to make the
+semantics more explicit?
+@todo: AUTOTEST: Autotest that dense and spare versions of this are identical.
+@todo: Rename? Is Row the correct name? Maybe column-wise?
+@type  x: L{scipy.sparse.spmatrix}
+@param x: Sparse matrix to be randomly transformed with shape (exmpls, dimensions)
+@type  length: int
+@param length: The number of transformations of C{x} to be performed.
+@param initial_seed: Initial seed for the RNG.
+@rtype: L{numpy.ndarray}
+@return: Array with C{length} random transformations, with shape (exmpls, length)
+"""
+import random
+"""
+RNG used for random transformations.
+Does not share state with rest of program.
+@todo: Make STATIC and private. Ask James or Olivier how to make this more Pythonic.
+"""
+_trng = random.Random()
+def __init__(self, x, length, initial_seed=0, **kwargs):
+"""
+@todo: Which broadcastable values should I use?
+"""
+assert 0        # Needs to be updated to Olivier's new Op creation approach
+op.Op.__init__(self, **kwargs)
+x = sparse.as_sparse(x)
+self.initial_seed = initial_seed
+self.length = length
+self.inputs = [x]
+self.outputs = [tensor.Tensor(x.dtype, broadcastable=[False, False])]
+#        self.outputs = [tensor.Tensor(x.dtype, broadcastable=[True, True])]
+def _random_matrix_value(self, row, col, rows):
+"""
+From a deterministic random matrix, find one element.
+@param row: The row of the element to be read.
+@param col: The column of the element to be read.
+@param row: The number of rows in the matrix.
+@type row: int
+@type col: int
+@type rows: int
+@note: This function is designed such that if we extend
+the number of columns in the random matrix, the values of
+the earlier entries is unchanged.
+@todo: Make this static
+"""
+# Choose the random entry at (l, c)
+rngidx = col * rows + row
+# Set the random number state for this random entry
+# Note: This may be slow
+self._trng.seed(rngidx + self.initial_seed)
+# Determine the value for this entry
+val = self._trng.uniform(-1, +1)
+#       print "Exmpl #%d, dimension #%d => Random projection #%d has idx %d (+ seed %d) and value %f" % (r, c, j, rngidx, self.initial_seed, val)
+return val
+def impl(self, xorig):
+assert _is_sparse(xorig)
+assert len(xorig.shape) == 2
+# Since conversions to and from the COO format are quite fast, you
+# can use this approach to efficiently implement lots computations
+# on sparse matrices.
+x = xorig.tocoo()
+(rows, cols) = x.shape
+tot = rows * cols
+out = numpy.zeros((rows, self.length))
+#        print "l = %d" % self.length
+#        print "x.getnnz() = %d" % x.getnnz()
+all = zip(x.col, x.row, x.data)
+all.sort()      # TODO: Maybe this is very slow?
+lastc = None
+lastl = None
+lastval = None
+for l in range(self.length):
+for (c, r, data) in all:
+assert c < cols
+assert r < rows
+if not c == lastc or not l == lastl:
+lastc = c
+lastl = l
+lastval = self._random_matrix_value(c, l, cols)
+val = lastval
+#                val = self._random_matrix_value(c, l, cols)
+#                val = self._trng.uniform(-1, +1)
+#                val = 1.0
+out[r][l] += val * data
+return out
+def __copy__(self):
+return self.__class__(self.inputs[0], self.length, self.initial_seed)
+def clone_with_new_inputs(self, *new_inputs):
+return self.__class__(new_inputs[0], self.length, self.initial_seed)
+def desc(self, *new_inputs):
+return (self.__class__, self.length, self.initial_seed)
+row_random_transformation = RowRandomTransformation()

Mercurial > pylearn

comparison random_transformation.py @ 356:18702ceb2096