comparison pylearn/sandbox/random_transformation.py @ 537:b054271b2504

new file structure layout, factories, etc.
author James Bergstra <bergstrj@iro.umontreal.ca>
date Wed, 12 Nov 2008 21:57:54 -0500
parents random_transformation.py@18702ceb2096
children
comparison
equal deleted inserted replaced
518:4aa7f74ea93f 537:b054271b2504
1 """
2 New L{Op}s that aren't in core theano
3 """
4
5 from theano import sparse
6 from theano import tensor
7 from theano import scalar
8 from theano.gof import op
9
10 from theano.sparse import _is_dense, _is_sparse, _is_dense_result, _is_sparse_result
11
12 import scipy.sparse
13
14 import numpy
15
16 class RowRandomTransformation(op.Op):
17 """
18 Given C{x}, a (sparse) matrix with shape (exmpls, dimensions), we
19 multiply it by a deterministic random matrix of shape (dimensions,
20 length) to obtain random transformation output of shape (exmpls,
21 length).
22
23 Each element of the deterministic random matrix is selected uniformly
24 from [-1, +1).
25 @todo: Use another random distribution?
26
27 @note: This function should be written such that if length is
28 increased, we obtain the same results (except longer). Similarly,
29 the rows should be able to be permuted and get the same result in
30 the same fashion.
31
32 @todo: This may be slow?
33 @todo: Rewrite for dense matrices too?
34 @todo: Is there any way to verify the convention that each row is
35 an example? Should I rename the variables in the code to make the
36 semantics more explicit?
37 @todo: AUTOTEST: Autotest that dense and spare versions of this are identical.
38 @todo: Rename? Is Row the correct name? Maybe column-wise?
39
40 @type x: L{scipy.sparse.spmatrix}
41 @param x: Sparse matrix to be randomly transformed with shape (exmpls, dimensions)
42 @type length: int
43 @param length: The number of transformations of C{x} to be performed.
44 @param initial_seed: Initial seed for the RNG.
45 @rtype: L{numpy.ndarray}
46 @return: Array with C{length} random transformations, with shape (exmpls, length)
47 """
48
49 import random
50 """
51 RNG used for random transformations.
52 Does not share state with rest of program.
53 @todo: Make STATIC and private. Ask James or Olivier how to make this more Pythonic.
54 """
55 _trng = random.Random()
56
57 def __init__(self, x, length, initial_seed=0, **kwargs):
58 """
59 @todo: Which broadcastable values should I use?
60 """
61 assert 0 # Needs to be updated to Olivier's new Op creation approach
62 op.Op.__init__(self, **kwargs)
63 x = sparse.as_sparse(x)
64 self.initial_seed = initial_seed
65 self.length = length
66 self.inputs = [x]
67 self.outputs = [tensor.Tensor(x.dtype, broadcastable=[False, False])]
68 # self.outputs = [tensor.Tensor(x.dtype, broadcastable=[True, True])]
69
70 def _random_matrix_value(self, row, col, rows):
71 """
72 From a deterministic random matrix, find one element.
73 @param row: The row of the element to be read.
74 @param col: The column of the element to be read.
75 @param row: The number of rows in the matrix.
76 @type row: int
77 @type col: int
78 @type rows: int
79 @note: This function is designed such that if we extend
80 the number of columns in the random matrix, the values of
81 the earlier entries is unchanged.
82 @todo: Make this static
83 """
84 # Choose the random entry at (l, c)
85 rngidx = col * rows + row
86 # Set the random number state for this random entry
87 # Note: This may be slow
88 self._trng.seed(rngidx + self.initial_seed)
89
90 # Determine the value for this entry
91 val = self._trng.uniform(-1, +1)
92 # print "Exmpl #%d, dimension #%d => Random projection #%d has idx %d (+ seed %d) and value %f" % (r, c, j, rngidx, self.initial_seed, val)
93 return val
94
95 def impl(self, xorig):
96 assert _is_sparse(xorig)
97 assert len(xorig.shape) == 2
98 # Since conversions to and from the COO format are quite fast, you
99 # can use this approach to efficiently implement lots computations
100 # on sparse matrices.
101 x = xorig.tocoo()
102 (rows, cols) = x.shape
103 tot = rows * cols
104 out = numpy.zeros((rows, self.length))
105 # print "l = %d" % self.length
106 # print "x.getnnz() = %d" % x.getnnz()
107 all = zip(x.col, x.row, x.data)
108 all.sort() # TODO: Maybe this is very slow?
109 lastc = None
110 lastl = None
111 lastval = None
112 for l in range(self.length):
113 for (c, r, data) in all:
114 assert c < cols
115 assert r < rows
116 if not c == lastc or not l == lastl:
117 lastc = c
118 lastl = l
119 lastval = self._random_matrix_value(c, l, cols)
120 val = lastval
121 # val = self._random_matrix_value(c, l, cols)
122 # val = self._trng.uniform(-1, +1)
123 # val = 1.0
124 out[r][l] += val * data
125 return out
126 def __copy__(self):
127 return self.__class__(self.inputs[0], self.length, self.initial_seed)
128 def clone_with_new_inputs(self, *new_inputs):
129 return self.__class__(new_inputs[0], self.length, self.initial_seed)
130 def desc(self, *new_inputs):
131 return (self.__class__, self.length, self.initial_seed)
132 row_random_transformation = RowRandomTransformation()