356
|
1 """
|
|
2 New L{Op}s that aren't in core theano
|
|
3 """
|
|
4
|
|
5 from theano import sparse
|
|
6 from theano import tensor
|
|
7 from theano import scalar
|
|
8 from theano.gof import op
|
|
9
|
|
10 from theano.sparse import _is_dense, _is_sparse, _is_dense_result, _is_sparse_result
|
|
11
|
|
12 import scipy.sparse
|
|
13
|
|
14 import numpy
|
|
15
|
|
16 class RowRandomTransformation(op.Op):
|
|
17 """
|
|
18 Given C{x}, a (sparse) matrix with shape (exmpls, dimensions), we
|
|
19 multiply it by a deterministic random matrix of shape (dimensions,
|
|
20 length) to obtain random transformation output of shape (exmpls,
|
|
21 length).
|
|
22
|
|
23 Each element of the deterministic random matrix is selected uniformly
|
|
24 from [-1, +1).
|
|
25 @todo: Use another random distribution?
|
|
26
|
|
27 @note: This function should be written such that if length is
|
|
28 increased, we obtain the same results (except longer). Similarly,
|
|
29 the rows should be able to be permuted and get the same result in
|
|
30 the same fashion.
|
|
31
|
|
32 @todo: This may be slow?
|
|
33 @todo: Rewrite for dense matrices too?
|
|
34 @todo: Is there any way to verify the convention that each row is
|
|
35 an example? Should I rename the variables in the code to make the
|
|
36 semantics more explicit?
|
|
37 @todo: AUTOTEST: Autotest that dense and spare versions of this are identical.
|
|
38 @todo: Rename? Is Row the correct name? Maybe column-wise?
|
|
39
|
|
40 @type x: L{scipy.sparse.spmatrix}
|
|
41 @param x: Sparse matrix to be randomly transformed with shape (exmpls, dimensions)
|
|
42 @type length: int
|
|
43 @param length: The number of transformations of C{x} to be performed.
|
|
44 @param initial_seed: Initial seed for the RNG.
|
|
45 @rtype: L{numpy.ndarray}
|
|
46 @return: Array with C{length} random transformations, with shape (exmpls, length)
|
|
47 """
|
|
48
|
|
49 import random
|
|
50 """
|
|
51 RNG used for random transformations.
|
|
52 Does not share state with rest of program.
|
|
53 @todo: Make STATIC and private. Ask James or Olivier how to make this more Pythonic.
|
|
54 """
|
|
55 _trng = random.Random()
|
|
56
|
|
57 def __init__(self, x, length, initial_seed=0, **kwargs):
|
|
58 """
|
|
59 @todo: Which broadcastable values should I use?
|
|
60 """
|
|
61 assert 0 # Needs to be updated to Olivier's new Op creation approach
|
|
62 op.Op.__init__(self, **kwargs)
|
|
63 x = sparse.as_sparse(x)
|
|
64 self.initial_seed = initial_seed
|
|
65 self.length = length
|
|
66 self.inputs = [x]
|
|
67 self.outputs = [tensor.Tensor(x.dtype, broadcastable=[False, False])]
|
|
68 # self.outputs = [tensor.Tensor(x.dtype, broadcastable=[True, True])]
|
|
69
|
|
70 def _random_matrix_value(self, row, col, rows):
|
|
71 """
|
|
72 From a deterministic random matrix, find one element.
|
|
73 @param row: The row of the element to be read.
|
|
74 @param col: The column of the element to be read.
|
|
75 @param row: The number of rows in the matrix.
|
|
76 @type row: int
|
|
77 @type col: int
|
|
78 @type rows: int
|
|
79 @note: This function is designed such that if we extend
|
|
80 the number of columns in the random matrix, the values of
|
|
81 the earlier entries is unchanged.
|
|
82 @todo: Make this static
|
|
83 """
|
|
84 # Choose the random entry at (l, c)
|
|
85 rngidx = col * rows + row
|
|
86 # Set the random number state for this random entry
|
|
87 # Note: This may be slow
|
|
88 self._trng.seed(rngidx + self.initial_seed)
|
|
89
|
|
90 # Determine the value for this entry
|
|
91 val = self._trng.uniform(-1, +1)
|
|
92 # print "Exmpl #%d, dimension #%d => Random projection #%d has idx %d (+ seed %d) and value %f" % (r, c, j, rngidx, self.initial_seed, val)
|
|
93 return val
|
|
94
|
|
95 def impl(self, xorig):
|
|
96 assert _is_sparse(xorig)
|
|
97 assert len(xorig.shape) == 2
|
|
98 # Since conversions to and from the COO format are quite fast, you
|
|
99 # can use this approach to efficiently implement lots computations
|
|
100 # on sparse matrices.
|
|
101 x = xorig.tocoo()
|
|
102 (rows, cols) = x.shape
|
|
103 tot = rows * cols
|
|
104 out = numpy.zeros((rows, self.length))
|
|
105 # print "l = %d" % self.length
|
|
106 # print "x.getnnz() = %d" % x.getnnz()
|
|
107 all = zip(x.col, x.row, x.data)
|
|
108 all.sort() # TODO: Maybe this is very slow?
|
|
109 lastc = None
|
|
110 lastl = None
|
|
111 lastval = None
|
|
112 for l in range(self.length):
|
|
113 for (c, r, data) in all:
|
|
114 assert c < cols
|
|
115 assert r < rows
|
|
116 if not c == lastc or not l == lastl:
|
|
117 lastc = c
|
|
118 lastl = l
|
|
119 lastval = self._random_matrix_value(c, l, cols)
|
|
120 val = lastval
|
|
121 # val = self._random_matrix_value(c, l, cols)
|
|
122 # val = self._trng.uniform(-1, +1)
|
|
123 # val = 1.0
|
|
124 out[r][l] += val * data
|
|
125 return out
|
|
126 def __copy__(self):
|
|
127 return self.__class__(self.inputs[0], self.length, self.initial_seed)
|
|
128 def clone_with_new_inputs(self, *new_inputs):
|
|
129 return self.__class__(new_inputs[0], self.length, self.initial_seed)
|
|
130 def desc(self, *new_inputs):
|
|
131 return (self.__class__, self.length, self.initial_seed)
|
|
132 row_random_transformation = RowRandomTransformation()
|