Mercurial > pylearn
changeset 972:0b392d1401c5
mcRBM - adding math and comments
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Mon, 23 Aug 2010 15:59:21 -0400 |
parents | 507159eea97e |
children | aa201f357d7b |
files | pylearn/algorithms/mcRBM.py |
diffstat | 1 files changed, 34 insertions(+), 3 deletions(-) [+] |
line wrap: on
line diff
--- a/pylearn/algorithms/mcRBM.py Mon Aug 23 15:54:54 2010 -0400 +++ b/pylearn/algorithms/mcRBM.py Mon Aug 23 15:59:21 2010 -0400 @@ -40,6 +40,8 @@ - \sum_j \sum_i W_{ij} g_j v_i - \sum_j c_j g_j +For the energy function to correspond to a probability distribution, P must be non-positive. + Conventions in this file ======================== @@ -70,6 +72,16 @@ # Recall: # Q(x) = exp(-E(x))/Z ==> -log(Q(x)) - log(Z) = E(x) # +# +# E (v, h, g) = +# - 0.5 \sum_f \sum_k P_{fk} h_k ( \sum_i U_{if} v_i )^2 / |U_{*f}|^2 |v|^2 +# - \sum_k b_k h_k +# + 0.5 \sum_i v_i^2 +# - \sum_j \sum_i W_{ij} g_j v_i +# - \sum_j c_j g_j +# - \sum_i a_i v_i +# +# # Derivation, in which partition functions are ignored. # # E(v) = -\log(Q(v)) @@ -80,14 +92,20 @@ # - \sum_k b_k h_k # + 0.5 \sum_i v_i^2 # - \sum_j \sum_i W_{ij} g_j v_i -# - \sum_j c_j g_j )) +# - \sum_j c_j g_j +# - \sum_i a_i v_i )) +# +# Get rid of double negs in exp # = -\log( \sum_{h} exp( # + 0.5 \sum_f \sum_k P_{fk} h_k ( \sum_i U_{if} v_i )^2 / (|U_{*f}| * |v|) # + \sum_k b_k h_k # - 0.5 \sum_i v_i^2 # ) * \sum_{g} exp( # + \sum_j \sum_i W_{ij} g_j v_i -# + \sum_j c_j g_j ))) +# + \sum_j c_j g_j)) +# - \sum_i a_i v_i +# +# Break up log # = -\log( \sum_{h} exp( # + 0.5 \sum_f \sum_k P_{fk} h_k ( \sum_i U_{if} v_i )^2 / (|U_{*f}|*|v|) # + \sum_k b_k h_k @@ -96,15 +114,28 @@ # + \sum_j \sum_i W_{ij} g_j v_i # + \sum_j c_j g_j ))) # + 0.5 \sum_i v_i^2 +# - \sum_i a_i v_i +# +# Use domain h is binary to turn log(sum(exp(sum...))) into sum(log(.. # = -\log(\sum_{h} exp( # + 0.5 \sum_f \sum_k P_{fk} h_k ( \sum_i U_{if} v_i )^2 / (|U_{*f}|* |v|) # + \sum_k b_k h_k # )) # - \sum_{j} \log(1 + exp(\sum_i W_{ij} v_i + c_j )) # + 0.5 \sum_i v_i^2 -# = - \sum_{k} \log(1 + exp(b_k + 0.5 \sum_f P_{fk}( \sum_i U_{if} v_i )^2 / (|U_{*f}|* # |v|))) +# - \sum_i a_i v_i +# +# = - \sum_{k} \log(1 + exp(b_k + 0.5 \sum_f P_{fk}( \sum_i U_{if} v_i )^2 / (|U_{*f}|*|v|))) # - \sum_{j} \log(1 + exp(\sum_i W_{ij} v_i + c_j )) # + 0.5 \sum_i v_i^2 +# - \sum_i a_i v_i +# +# For negative-one-diagonal P this gives: +# +# = - \sum_{k} \log(1 + exp(b_k - 0.5 \sum_i (U_{ik} v_i )^2 / (|U_{*k}|*|v|))) +# - \sum_{j} \log(1 + exp(\sum_i W_{ij} v_i + c_j )) +# + 0.5 \sum_i v_i^2 +# - \sum_i a_i v_i import sys import logging