comparison deep/stacked_dae/v_guillaume/nist_sda_retrieve.py @ 436:0ca069550abd

Added : single class version of SDA
author Guillaume Sicard <guitch21@gmail.com>
date Mon, 03 May 2010 06:14:05 -0400
parents
children
comparison
equal deleted inserted replaced
435:d8129a09ffb1 436:0ca069550abd
1 #!/usr/bin/python
2 # coding: utf-8
3
4 import ift6266
5 import pylearn
6
7 import numpy
8 import theano
9 import time
10
11 import pylearn.version
12 import theano.tensor as T
13 from theano.tensor.shared_randomstreams import RandomStreams
14
15 import copy
16 import sys
17 import os
18 import os.path
19
20 from jobman import DD
21 import jobman, jobman.sql
22 from pylearn.io import filetensor
23
24 from utils import produit_cartesien_jobs
25 from copy import copy
26
27 from sgd_optimization import SdaSgdOptimizer
28
29 #from ift6266.utils.scalar_series import *
30 from ift6266.utils.seriestables import *
31 import tables
32
33 from ift6266 import datasets
34 from config2 import *
35
36 '''
37 Function called by jobman upon launching each job
38 Its path is the one given when inserting jobs: see EXPERIMENT_PATH
39 '''
40 def jobman_entrypoint(state, channel):
41 # record mercurial versions of each package
42 pylearn.version.record_versions(state,[theano,ift6266,pylearn])
43 # TODO: remove this, bad for number of simultaneous requests on DB
44 channel.save()
45
46 # For test runs, we don't want to use the whole dataset so
47 # reduce it to fewer elements if asked to.
48 rtt = None
49 if state.has_key('reduce_train_to'):
50 rtt = state['reduce_train_to']
51 elif REDUCE_TRAIN_TO:
52 rtt = REDUCE_TRAIN_TO
53
54 if state.has_key('decrease_lr'):
55 decrease_lr = state['decrease_lr']
56 else :
57 decrease_lr = 0
58
59 n_ins = 32*32
60 n_outs = 62 # 10 digits, 26*2 (lower, capitals)
61
62 examples_per_epoch = NIST_ALL_TRAIN_SIZE
63 #To be sure variables will not be only in the if statement
64 PATH = ''
65 nom_reptrain = ''
66 nom_serie = ""
67 if state['pretrain_choice'] == 0:
68 PATH=PATH_NIST
69 nom_pretrain='NIST'
70 nom_serie="series_NIST.h5"
71 elif state['pretrain_choice'] == 1:
72 PATH=PATH_P07
73 nom_pretrain='P07'
74 nom_serie="series_P07.h5"
75
76 series = create_series(state.num_hidden_layers,nom_serie)
77
78 print "Creating optimizer with state, ", state
79
80 optimizer = SdaSgdOptimizer(dataset=datasets.nist_all(),
81 hyperparameters=state, \
82 n_ins=n_ins, n_outs=n_outs,\
83 examples_per_epoch=examples_per_epoch, \
84 series=series,
85 max_minibatches=rtt)
86
87 parameters=[]
88 #Number of files of P07 used for pretraining
89 nb_file=0
90 ## if state['pretrain_choice'] == 0:
91 ## print('\n\tpretraining with NIST\n')
92 ## optimizer.pretrain(datasets.nist_all())
93 ## elif state['pretrain_choice'] == 1:
94 ## #To know how many file will be used during pretraining
95 ## nb_file = state['pretraining_epochs_per_layer']
96 ## state['pretraining_epochs_per_layer'] = 1 #Only 1 time over the dataset
97 ## if nb_file >=100:
98 ## sys.exit("The code does not support this much pretraining epoch (99 max with P07).\n"+
99 ## "You have to correct the code (and be patient, P07 is huge !!)\n"+
100 ## "or reduce the number of pretraining epoch to run the code (better idea).\n")
101 ## print('\n\tpretraining with P07')
102 ## optimizer.pretrain(datasets.nist_P07(min_file=0,max_file=nb_file))
103
104 print ('Retrieve pre-train done earlier ( '+nom_pretrain+' )')
105
106
107
108 sys.stdout.flush()
109 channel.save()
110
111 #Set some of the parameters used for the finetuning
112 if state.has_key('finetune_set'):
113 finetune_choice=state['finetune_set']
114 else:
115 finetune_choice=FINETUNE_SET
116
117 if state.has_key('max_finetuning_epochs'):
118 max_finetune_epoch_NIST=state['max_finetuning_epochs']
119 else:
120 max_finetune_epoch_NIST=MAX_FINETUNING_EPOCHS
121
122 if state.has_key('max_finetuning_epochs_P07'):
123 max_finetune_epoch_P07=state['max_finetuning_epochs_P07']
124 else:
125 max_finetune_epoch_P07=max_finetune_epoch_NIST
126
127 #Decide how the finetune is done
128
129 if finetune_choice == 0:
130 print('\n\n\tfinetune with NIST\n\n')
131 optimizer.reload_parameters(PATH+'params_pretrain.txt')
132 optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,decrease=decrease_lr)
133 channel.save()
134 if finetune_choice == 1:
135 print('\n\n\tfinetune with P07\n\n')
136 optimizer.reload_parameters(PATH+'params_pretrain.txt')
137 optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0,decrease=decrease_lr)
138 channel.save()
139 if finetune_choice == 2:
140 print('\n\n\tfinetune with P07 followed by NIST\n\n')
141 optimizer.reload_parameters(PATH+'params_pretrain.txt')
142 optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=20,decrease=decrease_lr)
143 optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=21,decrease=decrease_lr)
144 channel.save()
145 if finetune_choice == 3:
146 print('\n\n\tfinetune with NIST only on the logistic regression on top (but validation on P07).\n\
147 All hidden units output are input of the logistic regression\n\n')
148 optimizer.reload_parameters(PATH+'params_pretrain.txt')
149 optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,special=1,decrease=decrease_lr)
150
151
152 if finetune_choice==-1:
153 print('\nSERIE OF 4 DIFFERENT FINETUNINGS')
154 print('\n\n\tfinetune with NIST\n\n')
155 sys.stdout.flush()
156 optimizer.reload_parameters(PATH+'params_pretrain.txt')
157 optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,decrease=decrease_lr)
158 channel.save()
159 print('\n\n\tfinetune with P07\n\n')
160 sys.stdout.flush()
161 optimizer.reload_parameters(PATH+'params_pretrain.txt')
162 optimizer.finetune(datasets.nist_P07(),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0,decrease=decrease_lr)
163 channel.save()
164 print('\n\n\tfinetune with P07 (done earlier) followed by NIST (written here)\n\n')
165 sys.stdout.flush()
166 optimizer.reload_parameters('params_finetune_P07.txt')
167 optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=21,decrease=decrease_lr)
168 channel.save()
169 print('\n\n\tfinetune with NIST only on the logistic regression on top.\n\
170 All hidden units output are input of the logistic regression\n\n')
171 sys.stdout.flush()
172 optimizer.reload_parameters(PATH+'params_pretrain.txt')
173 optimizer.finetune(datasets.nist_all(),datasets.nist_P07(),max_finetune_epoch_NIST,ind_test=1,special=1,decrease=decrease_lr)
174 channel.save()
175
176 channel.save()
177
178 return channel.COMPLETE
179
180 # These Series objects are used to save various statistics
181 # during the training.
182 def create_series(num_hidden_layers, nom_serie):
183
184 # Replace series we don't want to save with DummySeries, e.g.
185 # series['training_error'] = DummySeries()
186
187 series = {}
188
189 basedir = os.getcwd()
190
191 h5f = tables.openFile(os.path.join(basedir, nom_serie), "w")
192
193 # reconstruction
194 reconstruction_base = \
195 ErrorSeries(error_name="reconstruction_error",
196 table_name="reconstruction_error",
197 hdf5_file=h5f,
198 index_names=('epoch','minibatch'),
199 title="Reconstruction error (mean over "+str(REDUCE_EVERY)+" minibatches)")
200 series['reconstruction_error'] = \
201 AccumulatorSeriesWrapper(base_series=reconstruction_base,
202 reduce_every=REDUCE_EVERY)
203
204 # train
205 training_base = \
206 ErrorSeries(error_name="training_error",
207 table_name="training_error",
208 hdf5_file=h5f,
209 index_names=('epoch','minibatch'),
210 title="Training error (mean over "+str(REDUCE_EVERY)+" minibatches)")
211 series['training_error'] = \
212 AccumulatorSeriesWrapper(base_series=training_base,
213 reduce_every=REDUCE_EVERY)
214
215 # valid and test are not accumulated/mean, saved directly
216 series['validation_error'] = \
217 ErrorSeries(error_name="validation_error",
218 table_name="validation_error",
219 hdf5_file=h5f,
220 index_names=('epoch','minibatch'))
221
222 series['test_error'] = \
223 ErrorSeries(error_name="test_error",
224 table_name="test_error",
225 hdf5_file=h5f,
226 index_names=('epoch','minibatch'))
227
228 param_names = []
229 for i in range(num_hidden_layers):
230 param_names += ['layer%d_W'%i, 'layer%d_b'%i, 'layer%d_bprime'%i]
231 param_names += ['logreg_layer_W', 'logreg_layer_b']
232
233 # comment out series we don't want to save
234 series['params'] = SharedParamsStatisticsWrapper(
235 new_group_name="params",
236 base_group="/",
237 arrays_names=param_names,
238 hdf5_file=h5f,
239 index_names=('epoch',))
240
241 return series
242
243 # Perform insertion into the Postgre DB based on combination
244 # of hyperparameter values above
245 # (see comment for produit_cartesien_jobs() to know how it works)
246 def jobman_insert_nist():
247 jobs = produit_cartesien_jobs(JOB_VALS)
248
249 db = jobman.sql.db(JOBDB)
250 for job in jobs:
251 job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH})
252 jobman.sql.insert_dict(job, db)
253
254 print "inserted"
255
256 if __name__ == '__main__':
257
258 args = sys.argv[1:]
259
260 #if len(args) > 0 and args[0] == 'load_nist':
261 # test_load_nist()
262
263 if len(args) > 0 and args[0] == 'jobman_insert':
264 jobman_insert_nist()
265
266 elif len(args) > 0 and args[0] == 'test_jobman_entrypoint':
267 chanmock = DD({'COMPLETE':0,'save':(lambda:None)})
268 jobman_entrypoint(DD(DEFAULT_HP_NIST), chanmock)
269
270 else:
271 print "Bad arguments"
272