# HG changeset patch # User James Bergstra # Date 1299089620 18000 # Node ID c9179b0ed002ad7682de4ec6dfd0db0cd7ce6913 # Parent a19c371a8d3af208b49e9c759721bff5a98b1bd0 pca - better comments diff -r a19c371a8d3a -r c9179b0ed002 pylearn/preprocessing/pca.py --- a/pylearn/preprocessing/pca.py Wed Mar 02 13:05:20 2011 -0500 +++ b/pylearn/preprocessing/pca.py Wed Mar 02 13:13:40 2011 -0500 @@ -36,6 +36,8 @@ :param cov: data covariance matrix :type cov: a numpy ndarray + + :returns: (eigvals, eigvecs) of decomposition """ w, v = scipy.linalg.eigh(a=cov, lower=lower) @@ -71,7 +73,7 @@ def pca_from_examples(X, max_components=None, max_energy_fraction=None, x_centered=False): - """Return (eigvals, eigvecs), centered_X of observations `X` (1-per-row) + """Return ((eigvals, eigvecs), centered_X) of observations `X` (1-per-row) This function exists to wrap several algorithms for getting the principle components. @@ -85,13 +87,17 @@ :param x_centered: True means to consider X as having mean 0 (even if it actually doesn't!) + :returns: ((eigvals, eigvecs), centered_X) of PCA decomposition + """ if x_centered: centered_X = X else: centered_X = X - numpy.mean(X, axis=0) - return pca_from_cov( numpy.cov(centered_X.T), max_components=max_components, - max_energy_fraction=max_energy_fraction), centered_X + cov_X = numpy.dot(centered_X.T, centered_X) / (len(X)- 1) + evals, evecs = pca_from_cov(cov_X, max_components=max_components, + max_energy_fraction=max_energy_fraction) + return ((evals, evecs), centered_X) def pca_whiten((eigvals, eigvecs), centered_X,eps=1e-14):