changeset 1441:c9179b0ed002

pca - better comments
author James Bergstra <bergstrj@iro.umontreal.ca>
date Wed, 02 Mar 2011 13:13:40 -0500
parents a19c371a8d3a
children 08beb6f28809 490616262500
files pylearn/preprocessing/pca.py
diffstat 1 files changed, 9 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/preprocessing/pca.py	Wed Mar 02 13:05:20 2011 -0500
+++ b/pylearn/preprocessing/pca.py	Wed Mar 02 13:13:40 2011 -0500
@@ -36,6 +36,8 @@
 
     :param cov: data covariance matrix
     :type cov: a numpy ndarray 
+
+    :returns: (eigvals, eigvecs) of decomposition
     """
 
     w, v = scipy.linalg.eigh(a=cov, lower=lower)
@@ -71,7 +73,7 @@
 
 
 def pca_from_examples(X, max_components=None, max_energy_fraction=None, x_centered=False):
-    """Return (eigvals, eigvecs), centered_X of observations `X` (1-per-row)
+    """Return ((eigvals, eigvecs), centered_X) of observations `X` (1-per-row)
 
     This function exists to wrap several algorithms for getting the principle components.
 
@@ -85,13 +87,17 @@
     :param x_centered:
         True means to consider X as having mean 0 (even if it actually doesn't!)
 
+    :returns: ((eigvals, eigvecs), centered_X) of PCA decomposition
+
     """
     if x_centered:
         centered_X = X
     else:
         centered_X = X - numpy.mean(X, axis=0)
-    return pca_from_cov( numpy.cov(centered_X.T), max_components=max_components,
-            max_energy_fraction=max_energy_fraction), centered_X
+    cov_X = numpy.dot(centered_X.T, centered_X) / (len(X)- 1)
+    evals, evecs = pca_from_cov(cov_X, max_components=max_components,
+            max_energy_fraction=max_energy_fraction)
+    return ((evals, evecs), centered_X)
 
 
 def pca_whiten((eigvals, eigvecs), centered_X,eps=1e-14):