# HG changeset patch # User James Bergstra # Date 1282593561 14400 # Node ID 0b392d1401c5be5a0ba9478df8231debe93668bc # Parent 507159eea97ef5306e16611cc9eab8f33e651436 mcRBM - adding math and comments diff -r 507159eea97e -r 0b392d1401c5 pylearn/algorithms/mcRBM.py --- a/pylearn/algorithms/mcRBM.py Mon Aug 23 15:54:54 2010 -0400 +++ b/pylearn/algorithms/mcRBM.py Mon Aug 23 15:59:21 2010 -0400 @@ -40,6 +40,8 @@ - \sum_j \sum_i W_{ij} g_j v_i - \sum_j c_j g_j +For the energy function to correspond to a probability distribution, P must be non-positive. + Conventions in this file ======================== @@ -70,6 +72,16 @@ # Recall: # Q(x) = exp(-E(x))/Z ==> -log(Q(x)) - log(Z) = E(x) # +# +# E (v, h, g) = +# - 0.5 \sum_f \sum_k P_{fk} h_k ( \sum_i U_{if} v_i )^2 / |U_{*f}|^2 |v|^2 +# - \sum_k b_k h_k +# + 0.5 \sum_i v_i^2 +# - \sum_j \sum_i W_{ij} g_j v_i +# - \sum_j c_j g_j +# - \sum_i a_i v_i +# +# # Derivation, in which partition functions are ignored. # # E(v) = -\log(Q(v)) @@ -80,14 +92,20 @@ # - \sum_k b_k h_k # + 0.5 \sum_i v_i^2 # - \sum_j \sum_i W_{ij} g_j v_i -# - \sum_j c_j g_j )) +# - \sum_j c_j g_j +# - \sum_i a_i v_i )) +# +# Get rid of double negs in exp # = -\log( \sum_{h} exp( # + 0.5 \sum_f \sum_k P_{fk} h_k ( \sum_i U_{if} v_i )^2 / (|U_{*f}| * |v|) # + \sum_k b_k h_k # - 0.5 \sum_i v_i^2 # ) * \sum_{g} exp( # + \sum_j \sum_i W_{ij} g_j v_i -# + \sum_j c_j g_j ))) +# + \sum_j c_j g_j)) +# - \sum_i a_i v_i +# +# Break up log # = -\log( \sum_{h} exp( # + 0.5 \sum_f \sum_k P_{fk} h_k ( \sum_i U_{if} v_i )^2 / (|U_{*f}|*|v|) # + \sum_k b_k h_k @@ -96,15 +114,28 @@ # + \sum_j \sum_i W_{ij} g_j v_i # + \sum_j c_j g_j ))) # + 0.5 \sum_i v_i^2 +# - \sum_i a_i v_i +# +# Use domain h is binary to turn log(sum(exp(sum...))) into sum(log(.. # = -\log(\sum_{h} exp( # + 0.5 \sum_f \sum_k P_{fk} h_k ( \sum_i U_{if} v_i )^2 / (|U_{*f}|* |v|) # + \sum_k b_k h_k # )) # - \sum_{j} \log(1 + exp(\sum_i W_{ij} v_i + c_j )) # + 0.5 \sum_i v_i^2 -# = - \sum_{k} \log(1 + exp(b_k + 0.5 \sum_f P_{fk}( \sum_i U_{if} v_i )^2 / (|U_{*f}|* # |v|))) +# - \sum_i a_i v_i +# +# = - \sum_{k} \log(1 + exp(b_k + 0.5 \sum_f P_{fk}( \sum_i U_{if} v_i )^2 / (|U_{*f}|*|v|))) # - \sum_{j} \log(1 + exp(\sum_i W_{ij} v_i + c_j )) # + 0.5 \sum_i v_i^2 +# - \sum_i a_i v_i +# +# For negative-one-diagonal P this gives: +# +# = - \sum_{k} \log(1 + exp(b_k - 0.5 \sum_i (U_{ik} v_i )^2 / (|U_{*k}|*|v|))) +# - \sum_{j} \log(1 + exp(\sum_i W_{ij} v_i + c_j )) +# + 0.5 \sum_i v_i^2 +# - \sum_i a_i v_i import sys import logging