Mercurial > ift6266
comparison deep/deep_mlp/job.py @ 626:75dbbe409578
Added code for deep mlp, experiment code to go along with it. Also added code I used to filter the P07 / PNIST07 datasets to keep only digits.
author | fsavard |
---|---|
date | Wed, 16 Mar 2011 13:43:32 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
625:128bc92897f2 | 626:75dbbe409578 |
---|---|
1 #!/usr/bin/env python | |
2 # coding: utf-8 | |
3 | |
4 ''' | |
5 Launching | |
6 | |
7 jobman sqlschedules postgres://ift6266h10@gershwin/ift6266h10_sandbox_db/mlp_dumi mlp_jobman.experiment mlp_jobman.conf | |
8 'n_hidden={{500,1000,2000}}' | |
9 'n_hidden_layers={{2,3}}' | |
10 'train_on={{NIST,NISTP,P07}}' | |
11 'train_subset={{DIGITS_ONLY,ALL}}' | |
12 'learning_rate_log10={{-1.,-2.,-3.}}' | |
13 | |
14 in mlp_jobman.conf: | |
15 rng_seed=1234 | |
16 L1_reg=0.0 | |
17 L2_reg=0.0 | |
18 n_epochs=10 | |
19 minibatch_size=20 | |
20 ''' | |
21 | |
22 import os, sys, copy, operator, time | |
23 import theano | |
24 import theano.tensor as T | |
25 import numpy | |
26 from mlp import MLP | |
27 from ift6266 import datasets | |
28 from pylearn.io.seriestables import * | |
29 import tables | |
30 from jobman.tools import DD | |
31 | |
32 N_INPUTS = 32*32 | |
33 REDUCE_EVERY = 250 | |
34 | |
35 TEST_RUN = False | |
36 | |
37 TEST_HP = DD({'n_hidden':200, | |
38 'n_hidden_layers': 2, | |
39 'train_on':'NIST', | |
40 'train_subset':'ALL', | |
41 'learning_rate_log10':-2, | |
42 'rng_seed':1234, | |
43 'L1_reg':0.0, | |
44 'L2_reg':0.0, | |
45 'n_epochs':2, | |
46 'minibatch_size':20}) | |
47 | |
48 ########################################### | |
49 # digits datasets | |
50 # nist_digits is already in NIST_PATH and in ift6266.datasets | |
51 # NOTE: for these datasets the test and valid sets are wrong | |
52 # (don't correspond to the training set... they're just placeholders) | |
53 | |
54 from ift6266.datasets.defs import NIST_PATH, DATA_PATH | |
55 TRANSFORMED_DIGITS_PATH = '/data/lisatmp/ift6266h10/data/transformed_digits' | |
56 | |
57 P07_digits = FTDataSet(\ | |
58 train_data = [os.path.join(TRANSFORMED_DIGITS_PATH,\ | |
59 'data/P07_train'+str(i)+'_data.ft')\ | |
60 for i in range(0, 100)], | |
61 train_lbl = [os.path.join(TRANSFORMED_DIGITS_PATH,\ | |
62 'data/P07_train'+str(i)+'_labels.ft')\ | |
63 for i in range(0,100)], | |
64 test_data = [os.path.join(DATA_PATH,'data/P07_test_data.ft')], | |
65 test_lbl = [os.path.join(DATA_PATH,'data/P07_test_labels.ft')], | |
66 valid_data = [os.path.join(DATA_PATH,'data/P07_valid_data.ft')], | |
67 valid_lbl = [os.path.join(DATA_PATH,'data/P07_valid_labels.ft')], | |
68 indtype=theano.config.floatX, inscale=255., maxsize=None) | |
69 | |
70 #Added PNIST | |
71 PNIST07_digits = FTDataSet(train_data = [os.path.join(TRANSFORMED_DIGITS_PATH,\ | |
72 'PNIST07_train'+str(i)+'_data.ft')\ | |
73 for i in range(0,100)], | |
74 train_lbl = [os.path.join(TRANSFORMED_DIGITS_PATH,\ | |
75 'PNIST07_train'+str(i)+'_labels.ft')\ | |
76 for i in range(0,100)], | |
77 test_data = [os.path.join(DATA_PATH,'data/PNIST07_test_data.ft')], | |
78 test_lbl = [os.path.join(DATA_PATH,'data/PNIST07_test_labels.ft')], | |
79 valid_data = [os.path.join(DATA_PATH,'data/PNIST07_valid_data.ft')], | |
80 valid_lbl = [os.path.join(DATA_PATH,'data/PNIST07_valid_labels.ft')], | |
81 indtype=theano.config.floatX, inscale=255., maxsize=None) | |
82 | |
83 | |
84 # building valid_test_datasets | |
85 # - on veut des dataset_obj pour les 3 datasets | |
86 # - donc juste à bâtir FTDataset(train=nimportequoi, test, valid=pNIST etc.) | |
87 # - on veut dans l'array mettre des pointeurs vers la fonction either test ou valid | |
88 # donc PAS dataset_obj, mais dataset_obj.train (sans les parenthèses) | |
89 def build_test_valid_sets(): | |
90 nist_ds = datasets.nist_all() | |
91 pnist_ds = datasets.PNIST07() | |
92 p07_ds = datasets.nist_P07() | |
93 | |
94 test_valid_fns = [nist_ds.test, nist_ds.valid, | |
95 pnist_ds.test, pnist_ds.valid, | |
96 p07_ds.test, p07_ds.valid] | |
97 | |
98 test_valid_names = ["nist_all__test", "nist_all__valid", | |
99 "NISTP__test", "NISTP__valid", | |
100 "P07__test", "P07__valid"] | |
101 | |
102 return test_valid_fns, test_valid_names | |
103 | |
104 def add_error_series(series, error_name, hdf5_file, | |
105 index_names=('minibatch_idx',), use_accumulator=False, | |
106 reduce_every=250): | |
107 # train | |
108 series_base = ErrorSeries(error_name=error_name, | |
109 table_name=error_name, | |
110 hdf5_file=hdf5_file, | |
111 index_names=index_names) | |
112 | |
113 if use_accumulator: | |
114 series[error_name] = \ | |
115 AccumulatorSeriesWrapper(base_series=series_base, | |
116 reduce_every=reduce_every) | |
117 else: | |
118 series[error_name] = series_base | |
119 | |
120 TEST_VALID_FNS,TEST_VALID_NAMES = None, None | |
121 def compute_and_save_errors(state, mlp, series, hdf5_file, minibatch_idx): | |
122 global TEST_VALID_FNS,TEST_VALID_NAMES | |
123 | |
124 TEST_VALID_FNS,TEST_VALID_NAMES = build_test_valid_sets() | |
125 | |
126 # if the training is on digits only, then there'll be a 100% | |
127 # error on digits in the valid/test set... just ignore them | |
128 | |
129 test_fn = theano.function([mlp.input], mlp.logRegressionLayer.y_pred) | |
130 | |
131 test_batch_size = 100 | |
132 for test_ds_fn,test_ds_name in zip(TEST_VALID_FNS,TEST_VALID_NAMES): | |
133 # reset error counts for every test/valid set | |
134 # note: float | |
135 total_errors = total_digit_errors = \ | |
136 total_uppercase_errors = total_lowercase_errors = 0. | |
137 | |
138 total_all = total_lowercase = total_uppercase = total_digit = 0 | |
139 | |
140 for mb_x,mb_y in test_ds_fn(test_batch_size): | |
141 digit_mask = mb_y < 10 | |
142 uppercase_mask = mb_y >= 36 | |
143 lowercase_mask = numpy.ones((len(mb_x),)) \ | |
144 - digit_mask - uppercase_mask | |
145 | |
146 total_all += len(mb_x) | |
147 total_digit += sum(digit_mask) | |
148 total_uppercase += sum(uppercase_mask) | |
149 total_lowercase += sum(lowercase_mask) | |
150 | |
151 predictions = test_fn(mb_x) | |
152 | |
153 all_errors = (mb_y != predictions) | |
154 total_errors += sum(all_errors) | |
155 | |
156 if len(all_errors) != len(digit_mask): | |
157 print "size all", all_errors.shape, " digit", digit_mask.shape | |
158 total_digit_errors += sum(numpy.multiply(all_errors, digit_mask)) | |
159 total_uppercase_errors += sum(numpy.multiply(all_errors, uppercase_mask)) | |
160 total_lowercase_errors += sum(numpy.multiply(all_errors, lowercase_mask)) | |
161 | |
162 four_errors = [float(total_errors) / total_all, | |
163 float(total_digit_errors) / total_digit, | |
164 float(total_lowercase_errors) / total_lowercase, | |
165 float(total_uppercase_errors) / total_uppercase] | |
166 | |
167 four_errors_names = ["all", "digits", "lower", "upper"] | |
168 | |
169 # record stats per set | |
170 print "Errors on", test_ds_name, ",".join(four_errors_names),\ | |
171 ":", ",".join([str(e) for e in four_errors]) | |
172 | |
173 # now in the state | |
174 for err, errname in zip(four_errors, four_errors_names): | |
175 error_full_name = 'error__'+test_ds_name+'_'+errname | |
176 min_name = 'min_'+error_full_name | |
177 minpos_name = 'minpos_'+error_full_name | |
178 | |
179 if state.has_key(min_name): | |
180 if state[min_name] > err: | |
181 state[min_name] = err | |
182 state[minpos_name] = pos_str | |
183 else: | |
184 # also create the series | |
185 add_error_series(series, error_full_name, hdf5_file, | |
186 index_names=('minibatch_idx',)) | |
187 state[min_name] = err | |
188 state[minpos_name] = minibatch_idx | |
189 | |
190 state[minpos_name] = pos_str | |
191 series[error_full_name].append((minibatch_idx,), err) | |
192 | |
193 def jobman_entrypoint(state, channel): | |
194 global TEST_RUN | |
195 minibatch_size = state.minibatch_size | |
196 | |
197 print_every = 100000 | |
198 COMPUTE_ERROR_EVERY = 10**7 / minibatch_size # compute error every 10 million examples | |
199 if TEST_RUN: | |
200 print_every = 100 | |
201 COMPUTE_ERROR_EVERY = 1000 / minibatch_size | |
202 | |
203 print "entrypoint, state is" | |
204 print state | |
205 | |
206 ###################### | |
207 # select dataset and dataset subset, plus adjust epoch num to make number | |
208 # of examples seen independent of dataset | |
209 # exemple: pour le cas DIGITS_ONLY, il faut changer le nombre d'époques | |
210 # et pour le cas NIST pur (pas de transformations), il faut multiplier par 100 | |
211 # en partant car on a pas les variations | |
212 | |
213 # compute this in terms of the P07 dataset size (=80M) | |
214 MINIBATCHES_TO_SEE = state.n_epochs * 8 * (10**6) / minibatch_size | |
215 | |
216 if state.train_on == 'NIST' and state.train_subset == 'ALL': | |
217 dataset_obj = datasets.nist_all() | |
218 elif state.train_on == 'NIST' and state.train_subset == 'DIGITS_ONLY': | |
219 dataset_obj = datasets.nist_digits() | |
220 elif state.train_on == 'NISTP' and state.train_subset == 'ALL': | |
221 dataset_obj = datasets.PNIST07() | |
222 elif state.train_on == 'NISTP' and state.train_subset == 'DIGITS_ONLY': | |
223 dataset_obj = PNIST07_digits | |
224 elif state.train_on == 'P07' and state.train_subset == 'ALL': | |
225 dataset_obj = datasets.nist_P07() | |
226 elif state.train_on == 'P07' and state.train_subset == 'DIGITS_ONLY': | |
227 dataset_obj = datasets.P07_digits | |
228 | |
229 dataset = dataset_obj | |
230 | |
231 if state.train_subset == 'ALL': | |
232 n_classes = 62 | |
233 elif state.train_subset == 'DIGITS_ONLY': | |
234 n_classes = 10 | |
235 else: | |
236 raise NotImplementedError() | |
237 | |
238 ############################### | |
239 # construct model | |
240 | |
241 print "constructing model..." | |
242 x = T.matrix('x') | |
243 y = T.ivector('y') | |
244 | |
245 rng = numpy.random.RandomState(state.rng_seed) | |
246 | |
247 # construct the MLP class | |
248 model = MLP(rng = rng, input=x, n_in=N_INPUTS, | |
249 n_hidden_layers = state.n_hidden_layers, | |
250 n_hidden = state.n_hidden, n_out=n_classes) | |
251 | |
252 | |
253 # cost and training fn | |
254 cost = T.mean(model.negative_log_likelihood(y)) \ | |
255 + state.L1_reg * model.L1 \ | |
256 + state.L2_reg * model.L2_sqr | |
257 | |
258 print "L1, L2: ", state.L1_reg, state.L2_reg | |
259 | |
260 gradient_nll_wrt_params = [] | |
261 for param in model.params: | |
262 gparam = T.grad(cost, param) | |
263 gradient_nll_wrt_params.append(gparam) | |
264 | |
265 learning_rate = 10**float(state.learning_rate_log10) | |
266 print "Learning rate", learning_rate | |
267 | |
268 train_updates = {} | |
269 for param, gparam in zip(model.params, gradient_nll_wrt_params): | |
270 train_updates[param] = param - learning_rate * gparam | |
271 | |
272 train_fn = theano.function([x,y], cost, updates=train_updates) | |
273 | |
274 ####################### | |
275 # create series | |
276 basedir = os.getcwd() | |
277 | |
278 h5f = tables.openFile(os.path.join(basedir, "series.h5"), "w") | |
279 | |
280 series = {} | |
281 add_error_series(series, "training_error", h5f, | |
282 index_names=('minibatch_idx',), use_accumulator=True, | |
283 reduce_every=REDUCE_EVERY) | |
284 | |
285 ########################## | |
286 # training loop | |
287 | |
288 start_time = time.clock() | |
289 | |
290 print "begin training..." | |
291 print "will train for", MINIBATCHES_TO_SEE, "examples" | |
292 | |
293 mb_idx = 0 | |
294 | |
295 while(mb_idx*minibatch_size<nb_max_exemples): | |
296 | |
297 last_costs = [] | |
298 | |
299 for mb_x, mb_y in dataset.train(minibatch_size): | |
300 if TEST_RUN and mb_idx > 1000: | |
301 break | |
302 | |
303 last_cost = train_fn(mb_x, mb_y) | |
304 series["training_error"].append((mb_idx,), last_cost) | |
305 | |
306 last_costs.append(last_cost) | |
307 if (len(last_costs)+1) % print_every == 0: | |
308 print "Mean over last", print_every, "minibatches: ", numpy.mean(last_costs) | |
309 last_costs = [] | |
310 | |
311 if (mb_idx+1) % COMPUTE_ERROR_EVERY == 0: | |
312 # compute errors | |
313 print "computing errors on all datasets..." | |
314 print "Time since training began: ", (time.clock()-start_time)/60., "minutes" | |
315 compute_and_save_errors(state, model, series, h5f, mb_idx) | |
316 | |
317 channel.save() | |
318 | |
319 sys.stdout.flush() | |
320 | |
321 end_time = time.clock() | |
322 | |
323 print "-"*80 | |
324 print "Finished. Training took", (end_time-start_time)/60., "minutes" | |
325 print state | |
326 | |
327 def run_test(): | |
328 global TEST_RUN | |
329 from fsml.job_management import mock_channel | |
330 TEST_RUN = True | |
331 jobman_entrypoint(TEST_HP, mock_channel) | |
332 | |
333 if __name__ == '__main__': | |
334 run_test() | |
335 |