Mercurial > pylearn
view random_transformation.py @ 492:6dfdcee64e9b
merge
author | Olivier Breuleux <breuleuo@iro.umontreal.ca> |
---|---|
date | Tue, 28 Oct 2008 11:39:47 -0400 |
parents | 18702ceb2096 |
children |
line wrap: on
line source
""" New L{Op}s that aren't in core theano """ from theano import sparse from theano import tensor from theano import scalar from theano.gof import op from theano.sparse import _is_dense, _is_sparse, _is_dense_result, _is_sparse_result import scipy.sparse import numpy class RowRandomTransformation(op.Op): """ Given C{x}, a (sparse) matrix with shape (exmpls, dimensions), we multiply it by a deterministic random matrix of shape (dimensions, length) to obtain random transformation output of shape (exmpls, length). Each element of the deterministic random matrix is selected uniformly from [-1, +1). @todo: Use another random distribution? @note: This function should be written such that if length is increased, we obtain the same results (except longer). Similarly, the rows should be able to be permuted and get the same result in the same fashion. @todo: This may be slow? @todo: Rewrite for dense matrices too? @todo: Is there any way to verify the convention that each row is an example? Should I rename the variables in the code to make the semantics more explicit? @todo: AUTOTEST: Autotest that dense and spare versions of this are identical. @todo: Rename? Is Row the correct name? Maybe column-wise? @type x: L{scipy.sparse.spmatrix} @param x: Sparse matrix to be randomly transformed with shape (exmpls, dimensions) @type length: int @param length: The number of transformations of C{x} to be performed. @param initial_seed: Initial seed for the RNG. @rtype: L{numpy.ndarray} @return: Array with C{length} random transformations, with shape (exmpls, length) """ import random """ RNG used for random transformations. Does not share state with rest of program. @todo: Make STATIC and private. Ask James or Olivier how to make this more Pythonic. """ _trng = random.Random() def __init__(self, x, length, initial_seed=0, **kwargs): """ @todo: Which broadcastable values should I use? """ assert 0 # Needs to be updated to Olivier's new Op creation approach op.Op.__init__(self, **kwargs) x = sparse.as_sparse(x) self.initial_seed = initial_seed self.length = length self.inputs = [x] self.outputs = [tensor.Tensor(x.dtype, broadcastable=[False, False])] # self.outputs = [tensor.Tensor(x.dtype, broadcastable=[True, True])] def _random_matrix_value(self, row, col, rows): """ From a deterministic random matrix, find one element. @param row: The row of the element to be read. @param col: The column of the element to be read. @param row: The number of rows in the matrix. @type row: int @type col: int @type rows: int @note: This function is designed such that if we extend the number of columns in the random matrix, the values of the earlier entries is unchanged. @todo: Make this static """ # Choose the random entry at (l, c) rngidx = col * rows + row # Set the random number state for this random entry # Note: This may be slow self._trng.seed(rngidx + self.initial_seed) # Determine the value for this entry val = self._trng.uniform(-1, +1) # print "Exmpl #%d, dimension #%d => Random projection #%d has idx %d (+ seed %d) and value %f" % (r, c, j, rngidx, self.initial_seed, val) return val def impl(self, xorig): assert _is_sparse(xorig) assert len(xorig.shape) == 2 # Since conversions to and from the COO format are quite fast, you # can use this approach to efficiently implement lots computations # on sparse matrices. x = xorig.tocoo() (rows, cols) = x.shape tot = rows * cols out = numpy.zeros((rows, self.length)) # print "l = %d" % self.length # print "x.getnnz() = %d" % x.getnnz() all = zip(x.col, x.row, x.data) all.sort() # TODO: Maybe this is very slow? lastc = None lastl = None lastval = None for l in range(self.length): for (c, r, data) in all: assert c < cols assert r < rows if not c == lastc or not l == lastl: lastc = c lastl = l lastval = self._random_matrix_value(c, l, cols) val = lastval # val = self._random_matrix_value(c, l, cols) # val = self._trng.uniform(-1, +1) # val = 1.0 out[r][l] += val * data return out def __copy__(self): return self.__class__(self.inputs[0], self.length, self.initial_seed) def clone_with_new_inputs(self, *new_inputs): return self.__class__(new_inputs[0], self.length, self.initial_seed) def desc(self, *new_inputs): return (self.__class__, self.length, self.initial_seed) row_random_transformation = RowRandomTransformation()