annotate algorithms/logistic_regression.py @ 501:4fb6f7320518

N-class logistic regression top-layer works
author Joseph Turian <turian@gmail.com>
date Tue, 28 Oct 2008 13:54:01 -0400
parents a419edf4e06c
children 17945defd813
rev   line source
470
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
1 import theano
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
2 from theano import tensor as T
495
7560817a07e8 nnet_ops => nnet
Joseph Turian <turian@gmail.com>
parents: 491
diff changeset
3 from theano.tensor import nnet
470
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
4 from theano.compile import module
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
5 from theano import printing, pprint
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
6 from theano import compile
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
7
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
8 import numpy as N
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
9
499
a419edf4e06c removed unpicklable nested classes in logistic regression
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 497
diff changeset
10 class LogRegInstanceType(module.FancyModuleInstance):
501
4fb6f7320518 N-class logistic regression top-layer works
Joseph Turian <turian@gmail.com>
parents: 499
diff changeset
11 def initialize(self, n_in, n_out=1, rng=N.random, seed=None):
499
a419edf4e06c removed unpicklable nested classes in logistic regression
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 497
diff changeset
12 #self.component is the LogisticRegressionTemplate instance that built this guy.
501
4fb6f7320518 N-class logistic regression top-layer works
Joseph Turian <turian@gmail.com>
parents: 499
diff changeset
13 """
4fb6f7320518 N-class logistic regression top-layer works
Joseph Turian <turian@gmail.com>
parents: 499
diff changeset
14 @todo: Remove seed. Used only to keep Stacker happy.
4fb6f7320518 N-class logistic regression top-layer works
Joseph Turian <turian@gmail.com>
parents: 499
diff changeset
15 """
499
a419edf4e06c removed unpicklable nested classes in logistic regression
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 497
diff changeset
16
a419edf4e06c removed unpicklable nested classes in logistic regression
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 497
diff changeset
17 self.w = N.zeros((n_in, n_out))
a419edf4e06c removed unpicklable nested classes in logistic regression
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 497
diff changeset
18 self.b = N.zeros(n_out)
a419edf4e06c removed unpicklable nested classes in logistic regression
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 497
diff changeset
19 self.lr = 0.01
a419edf4e06c removed unpicklable nested classes in logistic regression
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 497
diff changeset
20 self.__hide__ = ['params']
470
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
21
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
22 class Module_Nclass(module.FancyModule):
499
a419edf4e06c removed unpicklable nested classes in logistic regression
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 497
diff changeset
23 InstanceType = LogRegInstanceType
470
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
24
499
a419edf4e06c removed unpicklable nested classes in logistic regression
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 497
diff changeset
25 def __init__(self, x=None, targ=None, w=None, b=None, lr=None, regularize=False):
470
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
26 super(Module_Nclass, self).__init__() #boilerplate
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
27
499
a419edf4e06c removed unpicklable nested classes in logistic regression
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 497
diff changeset
28 self.x = x if x is not None else T.matrix('input')
470
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
29 self.targ = targ if targ is not None else T.lvector()
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
30
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
31 self.w = w if w is not None else module.Member(T.dmatrix())
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
32 self.b = b if b is not None else module.Member(T.dvector())
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
33 self.lr = lr if lr is not None else module.Member(T.dscalar())
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
34
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
35 self.params = [p for p in [self.w, self.b] if p.owner is None]
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
36
497
a272f4cbf004 'x' => 'input'
Joseph Turian <turian@gmail.com>
parents: 495
diff changeset
37 xent, output = nnet.crossentropy_softmax_1hot(
499
a419edf4e06c removed unpicklable nested classes in logistic regression
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 497
diff changeset
38 T.dot(self.x, self.w) + self.b, self.targ)
470
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
39 sum_xent = T.sum(xent)
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
40
497
a272f4cbf004 'x' => 'input'
Joseph Turian <turian@gmail.com>
parents: 495
diff changeset
41 self.output = output
470
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
42 self.sum_xent = sum_xent
499
a419edf4e06c removed unpicklable nested classes in logistic regression
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 497
diff changeset
43
a419edf4e06c removed unpicklable nested classes in logistic regression
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 497
diff changeset
44 #compatibility with current implementation of stacker/daa or something
a419edf4e06c removed unpicklable nested classes in logistic regression
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 497
diff changeset
45 #TODO: remove this, make a wrapper
491
180d125dc7e2 made logistic_regression classes compatible with stacker
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents: 476
diff changeset
46 self.cost = sum_xent
499
a419edf4e06c removed unpicklable nested classes in logistic regression
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 497
diff changeset
47 self.input = self.x
470
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
48
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
49 #define the apply method
497
a272f4cbf004 'x' => 'input'
Joseph Turian <turian@gmail.com>
parents: 495
diff changeset
50 self.pred = T.argmax(T.dot(self.input, self.w) + self.b, axis=1)
a272f4cbf004 'x' => 'input'
Joseph Turian <turian@gmail.com>
parents: 495
diff changeset
51 self.apply = module.Method([self.input], self.pred)
470
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
52
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
53 if self.params:
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
54 gparams = T.grad(sum_xent, self.params)
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
55
497
a272f4cbf004 'x' => 'input'
Joseph Turian <turian@gmail.com>
parents: 495
diff changeset
56 self.update = module.Method([self.input, self.targ], sum_xent,
470
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
57 updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gparams)))
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
58
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
59 class Module(module.FancyModule):
499
a419edf4e06c removed unpicklable nested classes in logistic regression
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 497
diff changeset
60 InstanceType = LogRegInstanceType
470
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
61
497
a272f4cbf004 'x' => 'input'
Joseph Turian <turian@gmail.com>
parents: 495
diff changeset
62 def __init__(self, input=None, targ=None, w=None, b=None, lr=None, regularize=False):
470
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
63 super(Module, self).__init__() #boilerplate
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
64
497
a272f4cbf004 'x' => 'input'
Joseph Turian <turian@gmail.com>
parents: 495
diff changeset
65 self.input = input if input is not None else T.matrix('input')
470
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
66 self.targ = targ if targ is not None else T.lcol()
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
67
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
68 self.w = w if w is not None else module.Member(T.dmatrix())
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
69 self.b = b if b is not None else module.Member(T.dvector())
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
70 self.lr = lr if lr is not None else module.Member(T.dscalar())
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
71
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
72 self.params = [p for p in [self.w, self.b] if p.owner is None]
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
73
497
a272f4cbf004 'x' => 'input'
Joseph Turian <turian@gmail.com>
parents: 495
diff changeset
74 output = nnet.sigmoid(T.dot(self.x, self.w))
a272f4cbf004 'x' => 'input'
Joseph Turian <turian@gmail.com>
parents: 495
diff changeset
75 xent = -self.targ * T.log(output) - (1.0 - self.targ) * T.log(1.0 - output)
470
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
76 sum_xent = T.sum(xent)
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
77
497
a272f4cbf004 'x' => 'input'
Joseph Turian <turian@gmail.com>
parents: 495
diff changeset
78 self.output = output
470
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
79 self.xent = xent
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
80 self.sum_xent = sum_xent
491
180d125dc7e2 made logistic_regression classes compatible with stacker
Olivier Breuleux <breuleuo@iro.umontreal.ca>
parents: 476
diff changeset
81 self.cost = sum_xent
470
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
82
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
83 #define the apply method
497
a272f4cbf004 'x' => 'input'
Joseph Turian <turian@gmail.com>
parents: 495
diff changeset
84 self.pred = (T.dot(self.input, self.w) + self.b) > 0.0
a272f4cbf004 'x' => 'input'
Joseph Turian <turian@gmail.com>
parents: 495
diff changeset
85 self.apply = module.Method([self.input], self.pred)
470
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
86
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
87 #if this module has any internal parameters, define an update function for them
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
88 if self.params:
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
89 gparams = T.grad(sum_xent, self.params)
497
a272f4cbf004 'x' => 'input'
Joseph Turian <turian@gmail.com>
parents: 495
diff changeset
90 self.update = module.Method([self.input, self.targ], sum_xent,
470
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
91 updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gparams)))
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
92
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
93 class Learner(object):
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
94 """TODO: Encapsulate the algorithm for finding an optimal regularization coefficient"""
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
95 pass
bd937e845bbb new stuff: algorithms/logistic_regression, datasets/MNIST
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
96