Mercurial > pylearn
diff random_transformation.py @ 356:18702ceb2096
Added more functions
author | Joseph Turian <turian@iro.umontreal.ca> |
---|---|
date | Thu, 19 Jun 2008 16:18:37 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/random_transformation.py Thu Jun 19 16:18:37 2008 -0400 @@ -0,0 +1,132 @@ +""" +New L{Op}s that aren't in core theano +""" + +from theano import sparse +from theano import tensor +from theano import scalar +from theano.gof import op + +from theano.sparse import _is_dense, _is_sparse, _is_dense_result, _is_sparse_result + +import scipy.sparse + +import numpy + +class RowRandomTransformation(op.Op): + """ + Given C{x}, a (sparse) matrix with shape (exmpls, dimensions), we + multiply it by a deterministic random matrix of shape (dimensions, + length) to obtain random transformation output of shape (exmpls, + length). + + Each element of the deterministic random matrix is selected uniformly + from [-1, +1). + @todo: Use another random distribution? + + @note: This function should be written such that if length is + increased, we obtain the same results (except longer). Similarly, + the rows should be able to be permuted and get the same result in + the same fashion. + + @todo: This may be slow? + @todo: Rewrite for dense matrices too? + @todo: Is there any way to verify the convention that each row is + an example? Should I rename the variables in the code to make the + semantics more explicit? + @todo: AUTOTEST: Autotest that dense and spare versions of this are identical. + @todo: Rename? Is Row the correct name? Maybe column-wise? + + @type x: L{scipy.sparse.spmatrix} + @param x: Sparse matrix to be randomly transformed with shape (exmpls, dimensions) + @type length: int + @param length: The number of transformations of C{x} to be performed. + @param initial_seed: Initial seed for the RNG. + @rtype: L{numpy.ndarray} + @return: Array with C{length} random transformations, with shape (exmpls, length) + """ + + import random + """ + RNG used for random transformations. + Does not share state with rest of program. + @todo: Make STATIC and private. Ask James or Olivier how to make this more Pythonic. + """ + _trng = random.Random() + + def __init__(self, x, length, initial_seed=0, **kwargs): + """ + @todo: Which broadcastable values should I use? + """ + assert 0 # Needs to be updated to Olivier's new Op creation approach + op.Op.__init__(self, **kwargs) + x = sparse.as_sparse(x) + self.initial_seed = initial_seed + self.length = length + self.inputs = [x] + self.outputs = [tensor.Tensor(x.dtype, broadcastable=[False, False])] +# self.outputs = [tensor.Tensor(x.dtype, broadcastable=[True, True])] + + def _random_matrix_value(self, row, col, rows): + """ + From a deterministic random matrix, find one element. + @param row: The row of the element to be read. + @param col: The column of the element to be read. + @param row: The number of rows in the matrix. + @type row: int + @type col: int + @type rows: int + @note: This function is designed such that if we extend + the number of columns in the random matrix, the values of + the earlier entries is unchanged. + @todo: Make this static + """ + # Choose the random entry at (l, c) + rngidx = col * rows + row + # Set the random number state for this random entry + # Note: This may be slow + self._trng.seed(rngidx + self.initial_seed) + + # Determine the value for this entry + val = self._trng.uniform(-1, +1) +# print "Exmpl #%d, dimension #%d => Random projection #%d has idx %d (+ seed %d) and value %f" % (r, c, j, rngidx, self.initial_seed, val) + return val + + def impl(self, xorig): + assert _is_sparse(xorig) + assert len(xorig.shape) == 2 + # Since conversions to and from the COO format are quite fast, you + # can use this approach to efficiently implement lots computations + # on sparse matrices. + x = xorig.tocoo() + (rows, cols) = x.shape + tot = rows * cols + out = numpy.zeros((rows, self.length)) +# print "l = %d" % self.length +# print "x.getnnz() = %d" % x.getnnz() + all = zip(x.col, x.row, x.data) + all.sort() # TODO: Maybe this is very slow? + lastc = None + lastl = None + lastval = None + for l in range(self.length): + for (c, r, data) in all: + assert c < cols + assert r < rows + if not c == lastc or not l == lastl: + lastc = c + lastl = l + lastval = self._random_matrix_value(c, l, cols) + val = lastval +# val = self._random_matrix_value(c, l, cols) +# val = self._trng.uniform(-1, +1) +# val = 1.0 + out[r][l] += val * data + return out + def __copy__(self): + return self.__class__(self.inputs[0], self.length, self.initial_seed) + def clone_with_new_inputs(self, *new_inputs): + return self.__class__(new_inputs[0], self.length, self.initial_seed) + def desc(self, *new_inputs): + return (self.__class__, self.length, self.initial_seed) +row_random_transformation = RowRandomTransformation()