comparison code_tutoriel/rbm.py @ 0:fda5f787baa6

commit initial
author Dumitru Erhan <dumitru.erhan@gmail.com>
date Thu, 21 Jan 2010 11:26:43 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:fda5f787baa6
1 import numpy
2 import theano
3 import theano.tensor as T
4
5 from theano.compile.sandbox.sharedvalue import shared
6 from theano.compile.sandbox.pfunc import pfunc
7 from theano.compile.sandbox.shared_randomstreams import RandomStreams
8 from theano.tensor.nnet import sigmoid
9
10 class A():
11
12 @execute
13 def propup();
14 # do symbolic prop
15 self.hid = T.dot(
16
17 class RBM():
18
19 def __init__(self, input=None, vsize=None, hsize=None, bsize=10, lr=1e-1, seed=123):
20 """
21 RBM constructor. Defines the parameters of the model along with
22 basic operations for inferring hidden from visible (and vice-versa), as well
23 as for performing CD updates.
24 param input: None for standalone RBMs or symbolic variable if RBM is
25 part of a larger graph.
26 param vsize: number of visible units
27 param hsize: number of hidden units
28 param bsize: size of minibatch
29 param lr: unsupervised learning rate
30 param seed: seed for random number generator
31 """
32 assert vsize and hsize
33
34 self.vsize = vsize
35 self.hsize = hsize
36 self.lr = shared(lr, 'lr')
37
38 # setup theano random number generator
39 self.random = RandomStreams(seed)
40
41 #### INITIALIZATION ####
42
43 # initialize input layer for standalone RBM or layer0 of DBN
44 self.input = input if input else T.dmatrix('input')
45 # initialize biases
46 self.b = shared(numpy.zeros(vsize), 'b')
47 self.c = shared(numpy.zeros(hsize), 'c')
48 # initialize random weights
49 rngseed = numpy.random.RandomState(seed).randint(2**30)
50 rng = numpy.random.RandomState(rngseed)
51 ubound = 1./numpy.sqrt(max(self.vsize,self.hsize))
52 self.w = shared(rng.uniform(low=-ubound, high=ubound, size=(hsize,vsize)), 'w')
53
54
55 #### POSITIVE AND NEGATIVE PHASE ####
56
57 # define graph for positive phase
58 ph, ph_s = self.def_propup(self.input)
59 # function which computes p(h|v=x) and ~ p(h|v=x)
60 self.pos_phase = pfunc([self.input], [ph, ph_s])
61
62 # define graph for negative phase
63 nv, nv_s = self.def_propdown(ph_s)
64 nh, nh_s = self.def_propup(nv_s)
65 # function which computes p(v|h=ph_s), ~ p(v|h=ph_s) and p(h|v=nv_s)
66 self.neg_phase = pfunc([ph_s], [nv, nv_s, nh, nh_s])
67
68 # calculate CD gradients for each parameter
69 db = T.mean(self.input, axis=0) - T.mean(nv, axis=0)
70 dc = T.mean(ph, axis=0) - T.mean(nh, axis=0)
71 dwp = T.dot(ph.T, self.input)/nv.shape[0]
72 dwn = T.dot(nh.T, nv)/nv.shape[0]
73 dw = dwp - dwn
74
75 # define dictionary of stochastic gradient update equations
76 updates = {self.b: self.b - self.lr * db,
77 self.c: self.c - self.lr * dc,
78 self.w: self.w - self.lr * dw}
79
80 # define private function, which performs one step in direction of CD gradient
81 self.cd_step = pfunc([self.input, ph, nv, nh], [], updates=updates)
82
83
84 def def_propup(self, vis):
85 """ Symbolic definition of p(hid|vis) """
86 hid_activation = T.dot(vis, self.w.T) + self.c
87 hid = sigmoid(hid_activation)
88 hid_sample = self.random.binomial(T.shape(hid), 1, hid)*1.0
89 return hid, hid_sample
90
91 def def_propdown(self, hid):
92 """ Symbolic definition of p(vis|hid) """
93 vis_activation = T.dot(hid, self.w) + self.b
94 vis = sigmoid(vis_activation)
95 vis_sample = self.random.binomial(T.shape(vis), 1, vis)*1.0
96 return vis, vis_sample
97
98 def cd(self, x, k=1):
99 """ Performs actual CD update """
100 ph, ph_s = self.pos_phase(x)
101
102 nh_s = ph_s
103 for ki in range(k):
104 nv, nv_s, nh, nh_s = self.neg_phase(nh_s)
105
106 self.cd_step(x, ph, nv_s, nh)
107
108
109
110 import os
111 from pylearn.datasets import MNIST
112
113 if __name__ == '__main__':
114
115 bsize = 10
116
117 # initialize dataset
118 dataset = MNIST.first_1k()
119 # initialize RBM with 784 visible units and 500 hidden units
120 r = RBM(vsize=784, hsize=500, bsize=bsize, lr=0.1)
121
122 # for a fixed number of epochs ...
123 for e in range(10):
124
125 print '@epoch %i ' % e
126
127 # iterate over all training set mini-batches
128 for i in range(len(dataset.train.x)/bsize):
129
130 rng = range(i*bsize,(i+1)*bsize) # index range of subsequent mini-batch
131 x = dataset.train.x[rng] # next mini-batch
132 r.cd(x) # perform cd update
133