comparison deep/stacked_dae/v_sylvain/nist_sda_retrieve.py @ 287:f9b93ae45723

Programme pour reprendre une partie des experiences seulement. Utile seulement pour un usage tres specifique
author SylvainPL <sylvain.pannetier.lebeuf@umontreal.ca>
date Fri, 26 Mar 2010 16:17:56 -0400
parents
children a78dbbc61f37
comparison
equal deleted inserted replaced
286:1cc535f3e254 287:f9b93ae45723
1 #!/usr/bin/python
2 # coding: utf-8
3
4 import ift6266
5 import pylearn
6
7 import numpy
8 import theano
9 import time
10
11 import pylearn.version
12 import theano.tensor as T
13 from theano.tensor.shared_randomstreams import RandomStreams
14
15 import copy
16 import sys
17 import os
18 import os.path
19
20 from jobman import DD
21 import jobman, jobman.sql
22 from pylearn.io import filetensor
23
24 from utils import produit_cartesien_jobs
25 from copy import copy
26
27 from sgd_optimization import SdaSgdOptimizer
28
29 #from ift6266.utils.scalar_series import *
30 from ift6266.utils.seriestables import *
31 import tables
32
33 from ift6266 import datasets
34 from config2 import *
35
36 '''
37 Function called by jobman upon launching each job
38 Its path is the one given when inserting jobs: see EXPERIMENT_PATH
39 '''
40 def jobman_entrypoint(state, channel):
41 # record mercurial versions of each package
42 pylearn.version.record_versions(state,[theano,ift6266,pylearn])
43 # TODO: remove this, bad for number of simultaneous requests on DB
44 channel.save()
45
46 # For test runs, we don't want to use the whole dataset so
47 # reduce it to fewer elements if asked to.
48 rtt = None
49 if state.has_key('reduce_train_to'):
50 rtt = state['reduce_train_to']
51 elif REDUCE_TRAIN_TO:
52 rtt = REDUCE_TRAIN_TO
53
54 n_ins = 32*32
55 n_outs = 62 # 10 digits, 26*2 (lower, capitals)
56
57 examples_per_epoch = NIST_ALL_TRAIN_SIZE
58
59 series = create_series(state.num_hidden_layers)
60
61 print "Creating optimizer with state, ", state
62
63 optimizer = SdaSgdOptimizer(dataset=datasets.nist_all(),
64 hyperparameters=state, \
65 n_ins=n_ins, n_outs=n_outs,\
66 examples_per_epoch=examples_per_epoch, \
67 series=series,
68 max_minibatches=rtt)
69
70 parameters=[]
71 #Number of files of P07 used for pretraining
72 nb_file=0
73 ## if state['pretrain_choice'] == 0:
74 ## print('\n\tpretraining with NIST\n')
75 ## optimizer.pretrain(datasets.nist_all())
76 ## elif state['pretrain_choice'] == 1:
77 ## #To know how many file will be used during pretraining
78 ## nb_file = state['pretraining_epochs_per_layer']
79 ## state['pretraining_epochs_per_layer'] = 1 #Only 1 time over the dataset
80 ## if nb_file >=100:
81 ## sys.exit("The code does not support this much pretraining epoch (99 max with P07).\n"+
82 ## "You have to correct the code (and be patient, P07 is huge !!)\n"+
83 ## "or reduce the number of pretraining epoch to run the code (better idea).\n")
84 ## print('\n\tpretraining with P07')
85 ## optimizer.pretrain(datasets.nist_P07(min_file=0,max_file=nb_file))
86 print ('Retrieve pre-train done earlier')
87
88 sys.stdout.flush()
89
90 #Set some of the parameters used for the finetuning
91 if state.has_key('finetune_set'):
92 finetune_choice=state['finetune_set']
93 else:
94 finetune_choice=FINETUNE_SET
95
96 if state.has_key('max_finetuning_epochs'):
97 max_finetune_epoch_NIST=state['max_finetuning_epochs']
98 else:
99 max_finetune_epoch_NIST=MAX_FINETUNING_EPOCHS
100
101 if state.has_key('max_finetuning_epochs_P07'):
102 max_finetune_epoch_P07=state['max_finetuning_epochs_P07']
103 else:
104 max_finetune_epoch_P07=max_finetune_epoch_NIST
105
106 #Decide how the finetune is done
107
108 if finetune_choice == 0:
109 print('\n\n\tfinetune with NIST\n\n')
110 optimizer.reload_parameters('/u/pannetis/IFT6266/ift6266/deep/stacked_dae/v_sylvain/ift6266h10_db/pannetis_finetuningSDA/1/params_pretrain.txt')
111 optimizer.finetune(datasets.nist_all(),datasets.nist_P07(min_file=nb_file),max_finetune_epoch_NIST,ind_test=1)
112 channel.save()
113 if finetune_choice == 1:
114 print('\n\n\tfinetune with P07\n\n')
115 optimizer.reload_parameters('/u/pannetis/IFT6266/ift6266/deep/stacked_dae/v_sylvain/ift6266h10_db/pannetis_finetuningSDA/1/params_pretrain.txt')
116 optimizer.finetune(datasets.nist_P07(min_file=nb_file),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0)
117 channel.save()
118 if finetune_choice == 2:
119 print('\n\n\tfinetune with NIST followed by P07\n\n')
120 optimizer.reload_parameters('/u/pannetis/IFT6266/ift6266/deep/stacked_dae/v_sylvain/ift6266h10_db/pannetis_finetuningSDA/1/params_pretrain.txt')
121 optimizer.finetune(datasets.nist_all(),datasets.nist_P07(min_file=nb_file),max_finetune_epoch_NIST,ind_test=21)
122 optimizer.finetune(datasets.nist_P07(min_file=nb_file),datasets.nist_all(),max_finetune_epoch_P07,ind_test=20)
123 channel.save()
124 if finetune_choice == 3:
125 print('\n\n\tfinetune with NIST only on the logistic regression on top (but validation on P07).\n\
126 All hidden units output are input of the logistic regression\n\n')
127 optimizer.reload_parameters('/u/pannetis/IFT6266/ift6266/deep/stacked_dae/v_sylvain/ift6266h10_db/pannetis_finetuningSDA/1/params_pretrain.txt')
128 optimizer.finetune(datasets.nist_all(),datasets.nist_P07(min_file=nb_file),max_finetune_epoch_NIST,ind_test=1,special=1)
129
130
131 if finetune_choice==-1:
132 print('\nSERIE OF 3 DIFFERENT FINETUNINGS')
133 print('\n\n\tfinetune with NIST\n\n')
134 sys.stdout.flush()
135 optimizer.reload_parameters('/u/pannetis/IFT6266/ift6266/deep/stacked_dae/v_sylvain/ift6266h10_db/pannetis_finetuningSDA/1/params_pretrain.txt')
136 optimizer.finetune(datasets.nist_all(),datasets.nist_P07(min_file=nb_file),max_finetune_epoch_NIST,ind_test=1)
137 channel.save()
138 print('\n\n\tfinetune with P07\n\n')
139 sys.stdout.flush()
140 optimizer.reload_parameters('/u/pannetis/IFT6266/ift6266/deep/stacked_dae/v_sylvain/ift6266h10_db/pannetis_finetuningSDA/1/params_pretrain.txt')
141 optimizer.finetune(datasets.nist_P07(min_file=nb_file),datasets.nist_all(),max_finetune_epoch_P07,ind_test=0)
142 channel.save()
143 print('\n\n\tfinetune with NIST (done earlier) followed by P07 (written here)\n\n')
144 sys.stdout.flush()
145 optimizer.reload_parameters('/u/pannetis/IFT6266/ift6266/deep/stacked_dae/v_sylvain/ift6266h10_db/pannetis_finetuningSDA/1/params_finetune_NIST.txt')
146 optimizer.finetune(datasets.nist_P07(min_file=nb_file),datasets.nist_all(),max_finetune_epoch_P07,ind_test=20)
147 channel.save()
148 print('\n\n\tfinetune with NIST only on the logistic regression on top.\n\
149 All hidden units output are input of the logistic regression\n\n')
150 sys.stdout.flush()
151 optimizer.reload_parameters('/u/pannetis/IFT6266/ift6266/deep/stacked_dae/v_sylvain/ift6266h10_db/pannetis_finetuningSDA/1/params_pretrain.txt')
152 optimizer.finetune(datasets.nist_all(),datasets.nist_P07(min_file=nb_file),max_finetune_epoch_NIST,ind_test=1,special=1)
153 channel.save()
154
155 channel.save()
156
157 return channel.COMPLETE
158
159 # These Series objects are used to save various statistics
160 # during the training.
161 def create_series(num_hidden_layers):
162
163 # Replace series we don't want to save with DummySeries, e.g.
164 # series['training_error'] = DummySeries()
165
166 series = {}
167
168 basedir = os.getcwd()
169
170 h5f = tables.openFile(os.path.join(basedir, "series.h5"), "w")
171
172 # reconstruction
173 reconstruction_base = \
174 ErrorSeries(error_name="reconstruction_error",
175 table_name="reconstruction_error",
176 hdf5_file=h5f,
177 index_names=('epoch','minibatch'),
178 title="Reconstruction error (mean over "+str(REDUCE_EVERY)+" minibatches)")
179 series['reconstruction_error'] = \
180 AccumulatorSeriesWrapper(base_series=reconstruction_base,
181 reduce_every=REDUCE_EVERY)
182
183 # train
184 training_base = \
185 ErrorSeries(error_name="training_error",
186 table_name="training_error",
187 hdf5_file=h5f,
188 index_names=('epoch','minibatch'),
189 title="Training error (mean over "+str(REDUCE_EVERY)+" minibatches)")
190 series['training_error'] = \
191 AccumulatorSeriesWrapper(base_series=training_base,
192 reduce_every=REDUCE_EVERY)
193
194 # valid and test are not accumulated/mean, saved directly
195 series['validation_error'] = \
196 ErrorSeries(error_name="validation_error",
197 table_name="validation_error",
198 hdf5_file=h5f,
199 index_names=('epoch','minibatch'))
200
201 series['test_error'] = \
202 ErrorSeries(error_name="test_error",
203 table_name="test_error",
204 hdf5_file=h5f,
205 index_names=('epoch','minibatch'))
206
207 param_names = []
208 for i in range(num_hidden_layers):
209 param_names += ['layer%d_W'%i, 'layer%d_b'%i, 'layer%d_bprime'%i]
210 param_names += ['logreg_layer_W', 'logreg_layer_b']
211
212 # comment out series we don't want to save
213 series['params'] = SharedParamsStatisticsWrapper(
214 new_group_name="params",
215 base_group="/",
216 arrays_names=param_names,
217 hdf5_file=h5f,
218 index_names=('epoch',))
219
220 return series
221
222 # Perform insertion into the Postgre DB based on combination
223 # of hyperparameter values above
224 # (see comment for produit_cartesien_jobs() to know how it works)
225 def jobman_insert_nist():
226 jobs = produit_cartesien_jobs(JOB_VALS)
227
228 db = jobman.sql.db(JOBDB)
229 for job in jobs:
230 job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH})
231 jobman.sql.insert_dict(job, db)
232
233 print "inserted"
234
235 if __name__ == '__main__':
236
237 args = sys.argv[1:]
238
239 #if len(args) > 0 and args[0] == 'load_nist':
240 # test_load_nist()
241
242 if len(args) > 0 and args[0] == 'jobman_insert':
243 jobman_insert_nist()
244
245 elif len(args) > 0 and args[0] == 'test_jobman_entrypoint':
246 chanmock = DD({'COMPLETE':0,'save':(lambda:None)})
247 jobman_entrypoint(DD(DEFAULT_HP_NIST), chanmock)
248
249 else:
250 print "Bad arguments"
251