comparison deep/stacked_dae/v_sylvain/nist_sda.py @ 250:6d49cf134a40

ajout de fonctionnalite pour different finetune dataset
author SylvainPL <sylvain.pannetier.lebeuf@umontreal.ca>
date Tue, 16 Mar 2010 21:24:09 -0400
parents 9fc641d7adda
children f14fb56b3f8d
comparison
equal deleted inserted replaced
249:1bf046c0c84a 250:6d49cf134a40
19 19
20 from jobman import DD 20 from jobman import DD
21 import jobman, jobman.sql 21 import jobman, jobman.sql
22 from pylearn.io import filetensor 22 from pylearn.io import filetensor
23 23
24 from ift6266 import datasets
25
26 from utils import produit_cartesien_jobs 24 from utils import produit_cartesien_jobs
25 from copy import copy
27 26
28 from sgd_optimization import SdaSgdOptimizer 27 from sgd_optimization import SdaSgdOptimizer
29 28
30 #from ift6266.utils.scalar_series import * 29 #from ift6266.utils.scalar_series import *
31 from ift6266.utils.seriestables import * 30 from ift6266.utils.seriestables import *
32 import tables 31 import tables
33 32
34 ############################################################################## 33 from ift6266 import datasets
35 # GLOBALS 34 from config import *
36
37 TEST_CONFIG = False
38
39 #NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all'
40 JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_sandbox_db/sylvainpl_sda_vsylvain'
41 EXPERIMENT_PATH = "ift6266.deep.stacked_dae.v_sylvain.nist_sda.jobman_entrypoint"
42
43 REDUCE_TRAIN_TO = None
44 MAX_FINETUNING_EPOCHS = 1000
45 # number of minibatches before taking means for valid error etc.
46 REDUCE_EVERY = 100
47
48 if TEST_CONFIG:
49 REDUCE_TRAIN_TO = 1000
50 MAX_FINETUNING_EPOCHS = 2
51 REDUCE_EVERY = 10
52 MINIBATCH_SIZE=20
53
54 # Possible values the hyperparameters can take. These are then
55 # combined with produit_cartesien_jobs so we get a list of all
56 # possible combinations, each one resulting in a job inserted
57 # in the jobman DB.
58 JOB_VALS = {'pretraining_lr': [0.1],#, 0.01],#, 0.001],#, 0.0001],
59 'pretraining_epochs_per_layer': [10],
60 'hidden_layers_sizes': [500],
61 'corruption_levels': [0.1],
62 'minibatch_size': [20],
63 'max_finetuning_epochs':[MAX_FINETUNING_EPOCHS],
64 'finetuning_lr':[0.1], #0.001 was very bad, so we leave it out
65 'num_hidden_layers':[1,1]}
66
67 # Just useful for tests... minimal number of epochs
68 DEFAULT_HP_NIST = DD({'finetuning_lr':0.1,
69 'pretraining_lr':0.1,
70 'pretraining_epochs_per_layer':2,
71 'max_finetuning_epochs':2,
72 'hidden_layers_sizes':500,
73 'corruption_levels':0.2,
74 'minibatch_size':20,
75 'reduce_train_to':10000,
76 'num_hidden_layers':1})
77 35
78 ''' 36 '''
79 Function called by jobman upon launching each job 37 Function called by jobman upon launching each job
80 Its path is the one given when inserting jobs: see EXPERIMENT_PATH 38 Its path is the one given when inserting jobs: see EXPERIMENT_PATH
81 ''' 39 '''
83 # record mercurial versions of each package 41 # record mercurial versions of each package
84 pylearn.version.record_versions(state,[theano,ift6266,pylearn]) 42 pylearn.version.record_versions(state,[theano,ift6266,pylearn])
85 # TODO: remove this, bad for number of simultaneous requests on DB 43 # TODO: remove this, bad for number of simultaneous requests on DB
86 channel.save() 44 channel.save()
87 45
88 workingdir = os.getcwd()
89
90 ########### Il faudrait arranger ici pour train plus petit
91
92 ## print "Will load NIST"
93 ##
94 ## nist = NIST(minibatch_size=20)
95 ##
96 ## print "NIST loaded"
97 ##
98 # For test runs, we don't want to use the whole dataset so 46 # For test runs, we don't want to use the whole dataset so
99 # reduce it to fewer elements if asked to. 47 # reduce it to fewer elements if asked to.
100 rtt = None 48 rtt = None
101 if state.has_key('reduce_train_to'): 49 if state.has_key('reduce_train_to'):
102 rtt = int(state['reduce_train_to']/state['minibatch_size']) 50 rtt = state['reduce_train_to']
103 elif REDUCE_TRAIN_TO: 51 elif REDUCE_TRAIN_TO:
104 rtt = int(REDUCE_TRAIN_TO/MINIBATCH_SIZE) 52 rtt = REDUCE_TRAIN_TO
105 53
106 if rtt:
107 print "Reducing training set to "+str(rtt*state['minibatch_size'])+ " examples"
108 else:
109 rtt=float('inf') #No reduction
110 ## nist.reduce_train_set(rtt)
111 ##
112 ## train,valid,test = nist.get_tvt()
113 ## dataset = (train,valid,test)
114
115 n_ins = 32*32 54 n_ins = 32*32
116 n_outs = 62 # 10 digits, 26*2 (lower, capitals) 55 n_outs = 62 # 10 digits, 26*2 (lower, capitals)
117 56
57 examples_per_epoch = NIST_ALL_TRAIN_SIZE
58
118 series = create_series(state.num_hidden_layers) 59 series = create_series(state.num_hidden_layers)
119 60
120 print "Creating optimizer with state, ", state 61 print "Creating optimizer with state, ", state
121 62
122 optimizer = SdaSgdOptimizer(dataset=datasets.nist_all, hyperparameters=state, \ 63 optimizer = SdaSgdOptimizer(dataset=datasets.nist_all,
64 hyperparameters=state, \
123 n_ins=n_ins, n_outs=n_outs,\ 65 n_ins=n_ins, n_outs=n_outs,\
124 series=series) 66 examples_per_epoch=examples_per_epoch, \
125 67 series=series,
126 optimizer.pretrain(datasets.nist_all,rtt) 68 max_minibatches=rtt)
69
70 parameters=[]
71 optimizer.pretrain(datasets.nist_all)
127 channel.save() 72 channel.save()
128 73
129 optimizer.finetune(datasets.nist_all,rtt) 74 #Set some of the parameters used for the finetuning
75 if state.has_key('finetune_set'):
76 finetune_choice=state['finetune_set']
77 else:
78 finetune_choice=FINETUNE_SET
79
80 if state.has_key('max_finetuning_epochs'):
81 max_finetune_epoch_NIST=state['max_finetuning_epochs']
82 else:
83 max_finetune_epoch_NIST=MAX_FINETUNING_EPOCHS
84
85 if state.has_key('max_finetuning_epochs_P07'):
86 max_finetune_epoch_P07=state['max_finetuning_epochs_P07']
87 else:
88 max_finetune_epoch_P07=max_finetune_epoch_NIST
89
90 #Decide how the finetune is done
91
92 if finetune_choice==0:
93 print('\n\n\tfinetune avec nist\n\n')
94 optimizer.reload_parameters()
95 optimizer.finetune(datasets.nist_all,max_finetune_epoch_NIST)
96 if finetune_choice==1:
97 print('\n\n\tfinetune avec P07\n\n')
98 optimizer.reload_parameters()
99 optimizer.finetune(datasets.nist_P07,max_finetune_epoch_P07)
100 if finetune_choice==2:
101 print('\n\n\tfinetune avec nist suivi de P07\n\n')
102 optimizer.reload_parameters()
103 optimizer.finetune(datasets.nist_all,max_finetune_epoch_NIST)
104 optimizer.finetune(datasets.nist_P07,max_finetune_epoch_P07)
105
106 if finetune_choice==-1:
107 print('\nSerie de 3 essais de fine-tuning')
108 print('\n\n\tfinetune avec nist\n\n')
109 optimizer.reload_parameters()
110 optimizer.finetune(datasets.nist_all,max_finetune_epoch_NIST)
111 channel.save()
112 print('\n\n\tfinetune avec P07\n\n')
113 optimizer.reload_parameters()
114 optimizer.finetune(datasets.nist_P07,max_finetune_epoch_P07)
115 channel.save()
116 print('\n\n\tfinetune avec nist suivi de P07\n\n')
117 optimizer.reload_parameters()
118 optimizer.finetune(datasets.nist_all,max_finetune_epoch_NIST)
119 optimizer.finetune(datasets.nist_P07,max_finetune_epoch_P07)
120 channel.save()
121
130 channel.save() 122 channel.save()
131 123
132 return channel.COMPLETE 124 return channel.COMPLETE
133 125
134 # These Series objects are used to save various statistics 126 # These Series objects are used to save various statistics
205 job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH}) 197 job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH})
206 jobman.sql.insert_dict(job, db) 198 jobman.sql.insert_dict(job, db)
207 199
208 print "inserted" 200 print "inserted"
209 201
210 class NIST:
211 def __init__(self, minibatch_size, basepath=None, reduce_train_to=None):
212 global NIST_ALL_LOCATION
213
214 self.minibatch_size = minibatch_size
215 self.basepath = basepath and basepath or NIST_ALL_LOCATION
216
217 self.set_filenames()
218
219 # arrays of 2 elements: .x, .y
220 self.train = [None, None]
221 self.test = [None, None]
222
223 self.load_train_test()
224
225 self.valid = [[], []]
226 self.split_train_valid()
227 if reduce_train_to:
228 self.reduce_train_set(reduce_train_to)
229
230 def get_tvt(self):
231 return self.train, self.valid, self.test
232
233 def set_filenames(self):
234 self.train_files = ['all_train_data.ft',
235 'all_train_labels.ft']
236
237 self.test_files = ['all_test_data.ft',
238 'all_test_labels.ft']
239
240 def load_train_test(self):
241 self.load_data_labels(self.train_files, self.train)
242 self.load_data_labels(self.test_files, self.test)
243
244 def load_data_labels(self, filenames, pair):
245 for i, fn in enumerate(filenames):
246 f = open(os.path.join(self.basepath, fn))
247 pair[i] = filetensor.read(f)
248 f.close()
249
250 def reduce_train_set(self, max):
251 self.train[0] = self.train[0][:max]
252 self.train[1] = self.train[1][:max]
253
254 if max < len(self.test[0]):
255 for ar in (self.test, self.valid):
256 ar[0] = ar[0][:max]
257 ar[1] = ar[1][:max]
258
259 def split_train_valid(self):
260 test_len = len(self.test[0])
261
262 new_train_x = self.train[0][:-test_len]
263 new_train_y = self.train[1][:-test_len]
264
265 self.valid[0] = self.train[0][-test_len:]
266 self.valid[1] = self.train[1][-test_len:]
267
268 self.train[0] = new_train_x
269 self.train[1] = new_train_y
270
271 def test_load_nist():
272 print "Will load NIST"
273
274 import time
275 t1 = time.time()
276 nist = NIST(20)
277 t2 = time.time()
278
279 print "NIST loaded. time delta = ", t2-t1
280
281 tr,v,te = nist.get_tvt()
282
283 print "Lenghts: ", len(tr[0]), len(v[0]), len(te[0])
284
285 raw_input("Press any key")
286
287 if __name__ == '__main__': 202 if __name__ == '__main__':
288 203
289 import sys
290
291 args = sys.argv[1:] 204 args = sys.argv[1:]
292 205
293 if len(args) > 0 and args[0] == 'load_nist': 206 #if len(args) > 0 and args[0] == 'load_nist':
294 test_load_nist() 207 # test_load_nist()
295 208
296 elif len(args) > 0 and args[0] == 'jobman_insert': 209 if len(args) > 0 and args[0] == 'jobman_insert':
297 jobman_insert_nist() 210 jobman_insert_nist()
298 211
299 elif len(args) > 0 and args[0] == 'test_jobman_entrypoint': 212 elif len(args) > 0 and args[0] == 'test_jobman_entrypoint':
300 chanmock = DD({'COMPLETE':0,'save':(lambda:None)}) 213 chanmock = DD({'COMPLETE':0,'save':(lambda:None)})
301 jobman_entrypoint(DEFAULT_HP_NIST, chanmock) 214 jobman_entrypoint(DD(DEFAULT_HP_NIST), chanmock)
302 215
303 else: 216 else:
304 print "Bad arguments" 217 print "Bad arguments"
305 218