Mercurial > pylearn
comparison pylearn/algorithms/logistic_regression.py @ 537:b054271b2504
new file structure layout, factories, etc.
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Wed, 12 Nov 2008 21:57:54 -0500 |
parents | algorithms/logistic_regression.py@c7ce66b4e8f4 |
children | 85d3300c9a9c |
comparison
equal
deleted
inserted
replaced
518:4aa7f74ea93f | 537:b054271b2504 |
---|---|
1 import sys, copy | |
2 import theano | |
3 from theano import tensor as T | |
4 from theano.tensor import nnet | |
5 from theano.compile import module | |
6 from theano import printing, pprint | |
7 from theano import compile | |
8 | |
9 import numpy as N | |
10 | |
11 from ..datasets import make_dataset | |
12 from .minimizer import make_minimizer | |
13 from .stopper import make_stopper | |
14 | |
15 class LogRegN(module.FancyModule): | |
16 | |
17 def __init__(self, | |
18 n_in=None, n_out=None, | |
19 input=None, target=None, | |
20 w=None, b=None, | |
21 l2=None, l1=None): | |
22 super(LogRegN, self).__init__() #boilerplate | |
23 | |
24 self.n_in = n_in | |
25 self.n_out = n_out | |
26 | |
27 self.input = input if input is not None else T.matrix() | |
28 self.target = target if target is not None else T.lvector() | |
29 | |
30 self.w = w if w is not None else module.Member(T.dmatrix()) | |
31 self.b = b if b is not None else module.Member(T.dvector()) | |
32 | |
33 #the params of the model are the ones we fit to the data | |
34 self.params = [p for p in [self.w, self.b] if p.owner is None] | |
35 | |
36 #the hyper-parameters of the model are not fit to the data | |
37 self.l2 = l2 if l2 is not None else module.Member(T.dscalar()) | |
38 self.l1 = l1 if l1 is not None else module.Member(T.dscalar()) | |
39 | |
40 #here we actually build the model | |
41 self.linear_output = T.dot(self.input, self.w) + self.b | |
42 if 0: | |
43 self.softmax = nnet.softmax(self.linear_output) | |
44 | |
45 self._max_pr, self.argmax = T.max_and_argmax(self.linear_output) | |
46 self._xent = self.target * T.log(self.softmax) | |
47 else: | |
48 (self._xent, self.softmax, self._max_pr, self.argmax) =\ | |
49 nnet.crossentropy_softmax_max_and_argmax_1hot( | |
50 self.linear_output, self.target) | |
51 | |
52 self.unregularized_cost = T.sum(self._xent) | |
53 self.l1_cost = self.l1 * T.sum(abs(self.w)) | |
54 self.l2_cost = self.l2 * T.sum(self.w**2) | |
55 self.regularized_cost = self.unregularized_cost + self.l1_cost + self.l2_cost | |
56 self._loss_zero_one = T.mean(T.neq(self.argmax, self.target)) | |
57 | |
58 # METHODS | |
59 if 0: #TODO: PENDING THE BETTER IMPLEMENTATION ABOVE | |
60 self.predict = module.Method([self.input], self.argmax) | |
61 self.label_probs = module.Method([self.input], self.softmax) | |
62 self.validate = module.Method([self.input, self.target], | |
63 [self._loss_zero_one, self.regularized_cost, self.unregularized_cost]) | |
64 | |
65 def _instance_initialize(self, obj): | |
66 obj.w = N.zeros((self.n_in, self.n_out)) | |
67 obj.b = N.zeros(self.n_out) | |
68 obj.__pp_hide__ = ['params'] | |
69 | |
70 def logistic_regression(n_in, n_out, l1, l2, minimizer=None): | |
71 if n_out == 2: | |
72 raise NotImplementedError() | |
73 else: | |
74 rval = LogRegN(n_in=n_in, n_out=n_out, l1=l1, l2=l2) | |
75 rval.minimizer = minimizer([rval.input, rval.target], rval.regularized_cost, | |
76 rval.params) | |
77 return rval.make() | |
78 | |
79 #TODO: grouping parameters by prefix does not play well with providing defaults. Think... | |
80 class _fit_logreg_defaults(object): | |
81 minimizer_algo = 'dummy' | |
82 #minimizer_lr = 0.001 | |
83 dataset = 'MNIST_1k' | |
84 l1 = 0.0 | |
85 l2 = 0.0 | |
86 batchsize = 8 | |
87 verbose = 1 | |
88 | |
89 from ..datasets import MNIST | |
90 import sgd #TODO: necessary to add it to factory list | |
91 # consider pre-importing each file in algorithms, datasets (possibly with try/catch around each | |
92 # import so that this import failure is ignored) | |
93 | |
94 def fit_logistic_regression_online(state, channel=lambda *args, **kwargs:None): | |
95 #use stochastic gradient descent | |
96 state.use_defaults(_fit_logreg_defaults) | |
97 | |
98 dataset = make_dataset(**state.subdict(prefix='dataset_')) | |
99 train = dataset.train | |
100 valid = dataset.valid | |
101 test = dataset.test | |
102 | |
103 logreg = logistic_regression( | |
104 n_in=train.x.shape[1], | |
105 n_out=dataset.n_classes, | |
106 l2=state.l2, | |
107 l1=state.l1, | |
108 minimizer=make_minimizer(**state.subdict(prefix='minimizer_'))) | |
109 | |
110 batchsize = state.batchsize | |
111 verbose = state.verbose | |
112 iter = [0] | |
113 | |
114 def step(): | |
115 # step by making a pass through the training set | |
116 for j in xrange(0,len(train.x)-batchsize+1,batchsize): | |
117 cost_j = logreg.minimizer.step_cost(train.x[j:j+batchsize], train.y[j:j+batchsize]) | |
118 if verbose > 1: | |
119 print 'estimated train cost', cost_j | |
120 #TODO: consult iter[0] for periodic saving to cwd (model, minimizer, and stopper) | |
121 | |
122 def check(): | |
123 validate = logreg.validate(valid.x, valid.y) | |
124 if verbose > 0: | |
125 print 'iter', iter[0], 'validate', validate | |
126 sys.stdout.flush() | |
127 iter[0] += 1 | |
128 return validate[0] | |
129 | |
130 def save(): | |
131 return copy.deepcopy(logreg) | |
132 | |
133 stopper = make_stopper(**state.subdict(prefix='stopper_')) | |
134 stopper.find_min(step, check, save) | |
135 | |
136 state.train_01, state.train_rcost, state.train_cost = logreg.validate(train.x, train.y) | |
137 state.valid_01, state.valid_rcost, state.valid_cost = logreg.validate(valid.x, valid.y) | |
138 state.test_01, state.test_rcost, state.test_cost = logreg.validate(test.x, test.y) | |
139 | |
140 state.n_train = len(train.y) | |
141 state.n_valid = len(valid.y) | |
142 state.n_test = len(test.y) | |
143 | |
144 class LogReg2(module.FancyModule): | |
145 def __init__(self, input=None, targ=None, w=None, b=None, lr=None, regularize=False): | |
146 super(LogReg2, self).__init__() #boilerplate | |
147 | |
148 self.input = input if input is not None else T.matrix('input') | |
149 self.targ = targ if targ is not None else T.lcol() | |
150 | |
151 self.w = w if w is not None else module.Member(T.dmatrix()) | |
152 self.b = b if b is not None else module.Member(T.dvector()) | |
153 self.lr = lr if lr is not None else module.Member(T.dscalar()) | |
154 | |
155 self.params = [p for p in [self.w, self.b] if p.owner is None] | |
156 | |
157 output = nnet.sigmoid(T.dot(self.x, self.w) + self.b) | |
158 xent = -self.targ * T.log(output) - (1.0 - self.targ) * T.log(1.0 - output) | |
159 sum_xent = T.sum(xent) | |
160 | |
161 self.output = output | |
162 self.xent = xent | |
163 self.sum_xent = sum_xent | |
164 self.cost = sum_xent | |
165 | |
166 #define the apply method | |
167 self.pred = (T.dot(self.input, self.w) + self.b) > 0.0 | |
168 self.apply = module.Method([self.input], self.pred) | |
169 | |
170 #if this module has any internal parameters, define an update function for them | |
171 if self.params: | |
172 gparams = T.grad(sum_xent, self.params) | |
173 self.update = module.Method([self.input, self.targ], sum_xent, | |
174 updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gparams))) | |
175 | |
176 |