0
|
1 import numpy
|
|
2 import theano
|
|
3 import theano.tensor as T
|
|
4
|
|
5 from theano.compile.sandbox.sharedvalue import shared
|
|
6 from theano.compile.sandbox.pfunc import pfunc
|
|
7 from theano.compile.sandbox.shared_randomstreams import RandomStreams
|
|
8 from theano.tensor.nnet import sigmoid
|
|
9
|
|
10 class A():
|
|
11
|
|
12 @execute
|
|
13 def propup();
|
|
14 # do symbolic prop
|
|
15 self.hid = T.dot(
|
|
16
|
|
17 class RBM():
|
|
18
|
|
19 def __init__(self, input=None, vsize=None, hsize=None, bsize=10, lr=1e-1, seed=123):
|
|
20 """
|
|
21 RBM constructor. Defines the parameters of the model along with
|
|
22 basic operations for inferring hidden from visible (and vice-versa), as well
|
|
23 as for performing CD updates.
|
|
24 param input: None for standalone RBMs or symbolic variable if RBM is
|
|
25 part of a larger graph.
|
|
26 param vsize: number of visible units
|
|
27 param hsize: number of hidden units
|
|
28 param bsize: size of minibatch
|
|
29 param lr: unsupervised learning rate
|
|
30 param seed: seed for random number generator
|
|
31 """
|
|
32 assert vsize and hsize
|
|
33
|
|
34 self.vsize = vsize
|
|
35 self.hsize = hsize
|
|
36 self.lr = shared(lr, 'lr')
|
|
37
|
|
38 # setup theano random number generator
|
|
39 self.random = RandomStreams(seed)
|
|
40
|
|
41 #### INITIALIZATION ####
|
|
42
|
|
43 # initialize input layer for standalone RBM or layer0 of DBN
|
|
44 self.input = input if input else T.dmatrix('input')
|
|
45 # initialize biases
|
|
46 self.b = shared(numpy.zeros(vsize), 'b')
|
|
47 self.c = shared(numpy.zeros(hsize), 'c')
|
|
48 # initialize random weights
|
|
49 rngseed = numpy.random.RandomState(seed).randint(2**30)
|
|
50 rng = numpy.random.RandomState(rngseed)
|
|
51 ubound = 1./numpy.sqrt(max(self.vsize,self.hsize))
|
|
52 self.w = shared(rng.uniform(low=-ubound, high=ubound, size=(hsize,vsize)), 'w')
|
|
53
|
|
54
|
|
55 #### POSITIVE AND NEGATIVE PHASE ####
|
|
56
|
|
57 # define graph for positive phase
|
|
58 ph, ph_s = self.def_propup(self.input)
|
|
59 # function which computes p(h|v=x) and ~ p(h|v=x)
|
|
60 self.pos_phase = pfunc([self.input], [ph, ph_s])
|
|
61
|
|
62 # define graph for negative phase
|
|
63 nv, nv_s = self.def_propdown(ph_s)
|
|
64 nh, nh_s = self.def_propup(nv_s)
|
|
65 # function which computes p(v|h=ph_s), ~ p(v|h=ph_s) and p(h|v=nv_s)
|
|
66 self.neg_phase = pfunc([ph_s], [nv, nv_s, nh, nh_s])
|
|
67
|
|
68 # calculate CD gradients for each parameter
|
|
69 db = T.mean(self.input, axis=0) - T.mean(nv, axis=0)
|
|
70 dc = T.mean(ph, axis=0) - T.mean(nh, axis=0)
|
|
71 dwp = T.dot(ph.T, self.input)/nv.shape[0]
|
|
72 dwn = T.dot(nh.T, nv)/nv.shape[0]
|
|
73 dw = dwp - dwn
|
|
74
|
|
75 # define dictionary of stochastic gradient update equations
|
|
76 updates = {self.b: self.b - self.lr * db,
|
|
77 self.c: self.c - self.lr * dc,
|
|
78 self.w: self.w - self.lr * dw}
|
|
79
|
|
80 # define private function, which performs one step in direction of CD gradient
|
|
81 self.cd_step = pfunc([self.input, ph, nv, nh], [], updates=updates)
|
|
82
|
|
83
|
|
84 def def_propup(self, vis):
|
|
85 """ Symbolic definition of p(hid|vis) """
|
|
86 hid_activation = T.dot(vis, self.w.T) + self.c
|
|
87 hid = sigmoid(hid_activation)
|
|
88 hid_sample = self.random.binomial(T.shape(hid), 1, hid)*1.0
|
|
89 return hid, hid_sample
|
|
90
|
|
91 def def_propdown(self, hid):
|
|
92 """ Symbolic definition of p(vis|hid) """
|
|
93 vis_activation = T.dot(hid, self.w) + self.b
|
|
94 vis = sigmoid(vis_activation)
|
|
95 vis_sample = self.random.binomial(T.shape(vis), 1, vis)*1.0
|
|
96 return vis, vis_sample
|
|
97
|
|
98 def cd(self, x, k=1):
|
|
99 """ Performs actual CD update """
|
|
100 ph, ph_s = self.pos_phase(x)
|
|
101
|
|
102 nh_s = ph_s
|
|
103 for ki in range(k):
|
|
104 nv, nv_s, nh, nh_s = self.neg_phase(nh_s)
|
|
105
|
|
106 self.cd_step(x, ph, nv_s, nh)
|
|
107
|
|
108
|
|
109
|
|
110 import os
|
|
111 from pylearn.datasets import MNIST
|
|
112
|
|
113 if __name__ == '__main__':
|
|
114
|
|
115 bsize = 10
|
|
116
|
|
117 # initialize dataset
|
|
118 dataset = MNIST.first_1k()
|
|
119 # initialize RBM with 784 visible units and 500 hidden units
|
|
120 r = RBM(vsize=784, hsize=500, bsize=bsize, lr=0.1)
|
|
121
|
|
122 # for a fixed number of epochs ...
|
|
123 for e in range(10):
|
|
124
|
|
125 print '@epoch %i ' % e
|
|
126
|
|
127 # iterate over all training set mini-batches
|
|
128 for i in range(len(dataset.train.x)/bsize):
|
|
129
|
|
130 rng = range(i*bsize,(i+1)*bsize) # index range of subsequent mini-batch
|
|
131 x = dataset.train.x[rng] # next mini-batch
|
|
132 r.cd(x) # perform cd update
|
|
133
|