comparison deep/stacked_dae/nist_sda.py @ 201:25444fc301e0

Branch merge
author Arnaud Bergeron <abergeron@gmail.com>
date Wed, 03 Mar 2010 16:46:16 -0500
parents e656edaedb48
children 6ea5dcf0541e e1f5f66dd7dd
comparison
equal deleted inserted replaced
200:3f2cc90ad51c 201:25444fc301e0
19 19
20 from jobman import DD 20 from jobman import DD
21 import jobman, jobman.sql 21 import jobman, jobman.sql
22 from pylearn.io import filetensor 22 from pylearn.io import filetensor
23 23
24 from utils import produit_croise_jobs 24 from utils import produit_cartesien_jobs
25 25
26 from sgd_optimization import SdaSgdOptimizer 26 from sgd_optimization import SdaSgdOptimizer
27 27
28 from ift6266.utils.scalar_series import * 28 from ift6266.utils.scalar_series import *
29 29
30 TEST_CONFIG = False 30 TEST_CONFIG = False
31 31
32 NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all' 32 NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all'
33 33
34 JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_sandbox_db/fsavard_sda2' 34 JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/fsavard_sda4'
35 35
36 REDUCE_TRAIN_TO = None 36 REDUCE_TRAIN_TO = None
37 MAX_FINETUNING_EPOCHS = 1000 37 MAX_FINETUNING_EPOCHS = 1000
38 REDUCE_EVERY = 1000 # number of minibatches before taking means for valid error etc. 38 REDUCE_EVERY = 1000 # number of minibatches before taking means for valid error etc.
39 if TEST_CONFIG: 39 if TEST_CONFIG:
41 MAX_FINETUNING_EPOCHS = 2 41 MAX_FINETUNING_EPOCHS = 2
42 REDUCE_EVERY = 10 42 REDUCE_EVERY = 10
43 43
44 EXPERIMENT_PATH = "ift6266.deep.stacked_dae.nist_sda.jobman_entrypoint" 44 EXPERIMENT_PATH = "ift6266.deep.stacked_dae.nist_sda.jobman_entrypoint"
45 45
46 # Possible values the hyperparameters can take. These are then
47 # combined with produit_cartesien_jobs so we get a list of all
48 # possible combinations, each one resulting in a job inserted
49 # in the jobman DB.
46 JOB_VALS = {'pretraining_lr': [0.1, 0.01],#, 0.001],#, 0.0001], 50 JOB_VALS = {'pretraining_lr': [0.1, 0.01],#, 0.001],#, 0.0001],
47 'pretraining_epochs_per_layer': [10,20], 51 'pretraining_epochs_per_layer': [10,20],
48 'hidden_layers_sizes': [300,800], 52 'hidden_layers_sizes': [300,800],
49 'corruption_levels': [0.1,0.2,0.3], 53 'corruption_levels': [0.1,0.2,0.3],
50 'minibatch_size': [20], 54 'minibatch_size': [20],
61 'corruption_levels':0.2, 65 'corruption_levels':0.2,
62 'minibatch_size':20, 66 'minibatch_size':20,
63 #'reduce_train_to':300, 67 #'reduce_train_to':300,
64 'num_hidden_layers':2}) 68 'num_hidden_layers':2})
65 69
70 # Function called by jobman upon launching each job
71 # Its path is the one given when inserting jobs:
72 # ift6266.deep.stacked_dae.nist_sda.jobman_entrypoint
66 def jobman_entrypoint(state, channel): 73 def jobman_entrypoint(state, channel):
74 # record mercurial versions of each package
67 pylearn.version.record_versions(state,[theano,ift6266,pylearn]) 75 pylearn.version.record_versions(state,[theano,ift6266,pylearn])
68 channel.save() 76 channel.save()
69 77
70 workingdir = os.getcwd() 78 workingdir = os.getcwd()
71 79
72 print "Will load NIST" 80 print "Will load NIST"
73 81
74 nist = NIST(20) 82 nist = NIST(minibatch_size=20)
75 83
76 print "NIST loaded" 84 print "NIST loaded"
77 85
86 # For test runs, we don't want to use the whole dataset so
87 # reduce it to fewer elements if asked to.
78 rtt = None 88 rtt = None
79 if state.has_key('reduce_train_to'): 89 if state.has_key('reduce_train_to'):
80 rtt = state['reduce_train_to'] 90 rtt = state['reduce_train_to']
81 elif REDUCE_TRAIN_TO: 91 elif REDUCE_TRAIN_TO:
82 rtt = REDUCE_TRAIN_TO 92 rtt = REDUCE_TRAIN_TO
83 93
84 if rtt: 94 if rtt:
85 print "Reducing training set to "+str( rtt)+ " examples" 95 print "Reducing training set to "+str(rtt)+ " examples"
86 nist.reduce_train_set(rtt) 96 nist.reduce_train_set(rtt)
87 97
88 train,valid,test = nist.get_tvt() 98 train,valid,test = nist.get_tvt()
89 dataset = (train,valid,test) 99 dataset = (train,valid,test)
90 100
91 n_ins = 32*32 101 n_ins = 32*32
92 n_outs = 62 # 10 digits, 26*2 (lower, capitals) 102 n_outs = 62 # 10 digits, 26*2 (lower, capitals)
93 103
94 hls = state.hidden_layers_sizes 104 # b,b',W for each hidden layer
95 cl = state.corruption_levels 105 # + b,W of last layer (logreg)
96 nhl = state.num_hidden_layers 106 numparams = state.num_hidden_layers * 3 + 2
97 state.hidden_layers_sizes = [hls] * nhl
98 state.corruption_levels = [cl] * nhl
99
100 # b,b',W for each hidden layer + b,W of last layer (logreg)
101 numparams = nhl * 3 + 2
102 series_mux = None 107 series_mux = None
103 series_mux = create_series(workingdir, numparams) 108 series_mux = create_series(workingdir, numparams)
104 109
105 print "Creating optimizer with state, ", state 110 print "Creating optimizer with state, ", state
106 111
112 channel.save() 117 channel.save()
113 118
114 optimizer.finetune() 119 optimizer.finetune()
115 channel.save() 120 channel.save()
116 121
117 pylearn.version.record_versions(state,[theano,ift6266,pylearn])
118 channel.save()
119
120 return channel.COMPLETE 122 return channel.COMPLETE
121 123
124 # These Series objects are used to save various statistics
125 # during the training.
122 def create_series(basedir, numparams): 126 def create_series(basedir, numparams):
123 mux = SeriesMultiplexer() 127 mux = SeriesMultiplexer()
124 128
125 # comment out series we don't want to save 129 # comment out series we don't want to save
126 mux.add_series(AccumulatorSeries(name="reconstruction_error", 130 mux.add_series(AccumulatorSeries(name="reconstruction_error",
138 142
139 mux.add_series(ParamsArrayStats(numparams,name="params",directory=basedir)) 143 mux.add_series(ParamsArrayStats(numparams,name="params",directory=basedir))
140 144
141 return mux 145 return mux
142 146
147 # Perform insertion into the Postgre DB based on combination
148 # of hyperparameter values above
149 # (see comment for produit_cartesien_jobs() to know how it works)
143 def jobman_insert_nist(): 150 def jobman_insert_nist():
144 jobs = produit_croise_jobs(JOB_VALS) 151 jobs = produit_cartesien_jobs(JOB_VALS)
145 152
146 db = jobman.sql.db(JOBDB) 153 db = jobman.sql.db(JOBDB)
147 for job in jobs: 154 for job in jobs:
148 job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH}) 155 job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH})
149 jobman.sql.insert_dict(job, db) 156 jobman.sql.insert_dict(job, db)
225 232
226 print "Lenghts: ", len(tr[0]), len(v[0]), len(te[0]) 233 print "Lenghts: ", len(tr[0]), len(v[0]), len(te[0])
227 234
228 raw_input("Press any key") 235 raw_input("Press any key")
229 236
230 # hp for hyperparameters
231 def sgd_optimization_nist(hp=None, dataset_dir='/data/lisa/data/nist'):
232 global DEFAULT_HP_NIST
233 hp = hp and hp or DEFAULT_HP_NIST
234
235 print "Will load NIST"
236
237 import time
238 t1 = time.time()
239 nist = NIST(20, reduce_train_to=100)
240 t2 = time.time()
241
242 print "NIST loaded. time delta = ", t2-t1
243
244 train,valid,test = nist.get_tvt()
245 dataset = (train,valid,test)
246
247 print train[0][15]
248 print type(train[0][1])
249
250
251 print "Lengths train, valid, test: ", len(train[0]), len(valid[0]), len(test[0])
252
253 n_ins = 32*32
254 n_outs = 62 # 10 digits, 26*2 (lower, capitals)
255
256 optimizer = SdaSgdOptimizer(dataset, hp, n_ins, n_outs, input_divider=255.0)
257 optimizer.train()
258
259 if __name__ == '__main__': 237 if __name__ == '__main__':
260 238
261 import sys 239 import sys
262 240
263 args = sys.argv[1:] 241 args = sys.argv[1:]
275 jobman_entrypoint(DEFAULT_HP_NIST, chanmock) 253 jobman_entrypoint(DEFAULT_HP_NIST, chanmock)
276 254
277 elif len(args) > 0 and args[0] == 'estimate': 255 elif len(args) > 0 and args[0] == 'estimate':
278 estimate_total_time() 256 estimate_total_time()
279 else: 257 else:
280 sgd_optimization_nist() 258 print "Bad arguments"
281