Mercurial > pylearn
annotate pylearn/algorithms/pca_online_estimator.py @ 1438:8f15ef656598
put a file to the same license as the rest of the project.
author | Frederic Bastien <nouiz@nouiz.org> |
---|---|
date | Fri, 25 Feb 2011 16:37:48 -0500 |
parents | cedb48a300fc |
children | c584d8f8f280 |
rev | line source |
---|---|
1409
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
1 import numpy |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
2 from scipy import linalg |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
3 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
4 # Todo: |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
5 # - complete docstring (explain arguments, pseudo code) |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
6 # - Consider case with discount = 1.0 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
7 # - reevaluation when not at the end of a minibatch |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
8 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
9 class PcaOnlineEstimator(object): |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
10 """Online estimation of the leading eigen values/vectors of the covariance |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
11 of some samples. |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
12 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
13 Maintains a moving (with discount) low rank (n_eigen) estimate of the |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
14 covariance matrix of some observations. New observations are accumulated |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
15 until the batch is complete, at which point the low rank estimate is |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
16 reevaluated. |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
17 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
18 Example: |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
19 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
20 pca_esti = pca_online_estimator.PcaOnlineEstimator(dimension_of_the_samples) |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
21 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
22 for i in range(number_of_samples): |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
23 pca_esti.observe(samples[i]) |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
24 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
25 [eigvals, eigvecs] = pca_esti.getLeadingEigen() |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
26 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
27 """ |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
28 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
29 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
30 def __init__(self, n_dim, n_eigen = 10, minibatch_size = 25, gamma = 0.999, regularizer = 1e-6, centering = True): |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
31 # dimension of the observations |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
32 self.n_dim = n_dim |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
33 # rank of the low-rank estimate |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
34 self.n_eigen = n_eigen |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
35 # how many observations between reevaluations of the low rank estimate |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
36 self.minibatch_size = minibatch_size |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
37 # the discount factor in the moving estimate |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
38 self.gamma = gamma |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
39 # regularizer of the covariance estimate |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
40 self.regularizer = regularizer |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
41 # wether we center the observations or not: obtain leading eigen of |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
42 # covariance (centering = True) vs second moment (centering = False) |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
43 self.centering = centering |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
44 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
45 # Total number of observations: to compute the normalizer for the mean and |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
46 # the covariance. |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
47 self.n_observations = 0 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
48 # Index in the current minibatch |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
49 self.minibatch_index = 0 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
50 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
51 # Matrix containing on its *rows*: |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
52 # - the current unnormalized eigen vector estimates |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
53 # - the observations since the last reevaluation |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
54 self.Xt = numpy.zeros([self.n_eigen + self.minibatch_size, self.n_dim]) |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
55 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
56 # The discounted sum of the observations. |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
57 self.x_sum = numpy.zeros([self.n_dim]) |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
58 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
59 # The Gram matrix of the observations, ie Xt Xt' (since Xt is rowwise) |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
60 self.G = numpy.zeros([self.n_eigen + self.minibatch_size, self.n_eigen + self.minibatch_size]) |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
61 for i in range(self.n_eigen): |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
62 self.G[i,i] = self.regularizer |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
63 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
64 # I don't think it's worth "allocating" these 3 next (though they need to be |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
65 # declared). I don't know how to do in place operations... |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
66 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
67 # Hold the results of the eigendecomposition of the Gram matrix G |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
68 # (eigen vectors on columns of V). |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
69 self.d = numpy.zeros([self.n_eigen + self.minibatch_size]) |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
70 self.V = numpy.zeros([self.n_eigen + self.minibatch_size, self.n_eigen + self.minibatch_size]) |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
71 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
72 # Holds the unnormalized eigenvectors of the covariance matrix before |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
73 # they're copied back to Xt. |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
74 self.Ut = numpy.zeros([self.n_eigen, self.n_dim]) |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
75 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
76 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
77 def observe(self, x): |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
78 assert(numpy.size(x) == self.n_dim) |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
79 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
80 self.n_observations += 1 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
81 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
82 # Add the *non-centered* observation to Xt. |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
83 row = self.n_eigen + self.minibatch_index |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
84 self.Xt[row] = x |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
85 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
86 # Update the discounted sum of the observations. |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
87 self.x_sum *= self.gamma |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
88 self.x_sum += x |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
89 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
90 # To get the mean, we must normalize the sum by: |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
91 # /gamma^(n_observations-1) + /gamma^(n_observations-2) + ... + 1 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
92 normalizer = (1.0 - pow(self.gamma, self.n_observations)) /(1.0 - self.gamma); |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
93 #print "normalizer: ", normalizer |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
94 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
95 # Now center the observation. |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
96 # We will lose the first observation as it is the only one in the mean. |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
97 if self.centering: |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
98 self.Xt[row] -= self.x_sum / normalizer |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
99 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
100 # Multiply the observation by the discount compensator. Basically |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
101 # we make this observation look "younger" than the previous ones. The actual |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
102 # discount is applied in the reevaluation (and when solving the equations in |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
103 # the case of TONGA) by multiplying every direction with the same aging factor. |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
104 rn = pow(self.gamma, -0.5*(self.minibatch_index+1)); |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
105 self.Xt[row] *= rn |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
106 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
107 # Update the Gram Matrix. |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
108 # The column. |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
109 self.G[:row+1,row] = numpy.dot( self.Xt[:row+1,:], self.Xt[row,:].transpose() ) |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
110 # The symetric row. |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
111 # There are row+1 values, but the diag doesn't need to get copied. |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
112 self.G[row,:row] = self.G[:row,row].transpose() |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
113 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
114 self.minibatch_index += 1 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
115 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
116 if self.minibatch_index == self.minibatch_size: |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
117 self.reevaluate() |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
118 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
119 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
120 def reevaluate(self): |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
121 # TODO do the modifications to handle when this is not true. |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
122 assert(self.minibatch_index == self.minibatch_size); |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
123 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
124 # Regularize - not necessary but in case |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
125 for i in range(self.n_eigen + self.minibatch_size): |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
126 self.G[i,i] += self.regularizer |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
127 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
128 # The Gram matrix is up to date. Get its low rank eigendecomposition. |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
129 # NOTE: the eigenvalues are in ASCENDING order and the vectors are on |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
130 # the columns. |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
131 # With scipy 0.7, you can ask for only some eigenvalues (the n_eigen top |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
132 # ones) but it doesn't look loke it for scipy 0.6. |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
133 self.d, self.V = linalg.eigh(self.G) #, overwrite_a=True) |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
134 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
135 # Convert the n_eigen LAST eigenvectors of the Gram matrix contained in V |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
136 # into *unnormalized* eigenvectors U of the covariance (unnormalized wrt |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
137 # the eigen values, not the moving average). |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
138 self.Ut = numpy.dot(self.V[:,-self.n_eigen:].transpose(), self.Xt) |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
139 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
140 # Take into account the discount factor. |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
141 # Here, minibatch index is minibatch_size. We age everyone. Because of the |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
142 # previous multiplications to make some observations "younger" we multiply |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
143 # everyone by the same factor. |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
144 # TODO VERIFY THIS! |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
145 rn = pow(self.gamma, -0.5*(self.minibatch_index+1)) |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
146 inv_rn2 = 1.0/(rn*rn) |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
147 self.Ut *= 1.0/rn |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
148 self.d *= inv_rn2; |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
149 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
150 #print "*** Reevaluate! ***" |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
151 #normalizer = (1.0 - pow(self.gamma, self.n_observations)) /(1.0 - self.gamma) |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
152 #print "normalizer: ", normalizer |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
153 #print self.d / normalizer |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
154 #print self.Ut # unnormalized eigen vectors (wrt eigenvalues AND moving average). |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
155 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
156 # Update Xt, G and minibatch_index |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
157 self.Xt[:self.n_eigen,:] = self.Ut |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
158 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
159 for i in range(self.n_eigen): |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
160 self.G[i,i] = self.d[-self.n_eigen+i] |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
161 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
162 self.minibatch_index = 0 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
163 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
164 # Returns a copy of the current estimate of the eigen values and vectors |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
165 # (normalized vectors on rows), normalized by the discounted number of observations. |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
166 def getLeadingEigen(self): |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
167 # We subtract self.minibatch_index in case this call is not right after a reevaluate call. |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
168 normalizer = (1.0 - pow(self.gamma, self.n_observations - self.minibatch_index)) /(1.0 - self.gamma) |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
169 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
170 eigvals = self.d[-self.n_eigen:] / normalizer |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
171 eigvecs = numpy.zeros([self.n_eigen, self.n_dim]) |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
172 for i in range(self.n_eigen): |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
173 eigvecs[i] = self.Ut[-self.n_eigen+i] / numpy.sqrt(numpy.dot(self.Ut[-self.n_eigen+i], self.Ut[-self.n_eigen+i])) |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
174 |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
175 return [eigvals, eigvecs] |
cedb48a300fc
added a pca online estimator
Philippe Hamel <higgsbosonh@hotmail.com>
parents:
diff
changeset
|
176 |