comparison deep/stacked_dae/nist_sda.py @ 207:43af74a348ac

Merge branches from main repo.
author Arnaud Bergeron <abergeron@gmail.com>
date Thu, 04 Mar 2010 20:43:21 -0500
parents 10a801240bfc
children acb942530923
comparison
equal deleted inserted replaced
206:e12702b88a2d 207:43af74a348ac
19 19
20 from jobman import DD 20 from jobman import DD
21 import jobman, jobman.sql 21 import jobman, jobman.sql
22 from pylearn.io import filetensor 22 from pylearn.io import filetensor
23 23
24 from utils import produit_croise_jobs 24 from utils import produit_cartesien_jobs
25 25
26 from sgd_optimization import SdaSgdOptimizer 26 from sgd_optimization import SdaSgdOptimizer
27 27
28 SERIES_AVAILABLE = False 28 from ift6266.utils.scalar_series import *
29 try: 29
30 from scalar_series import * 30 ##############################################################################
31 SERIES_AVAILABLE = True 31 # GLOBALS
32 except ImportError:
33 print "Could not import Series"
34 32
35 TEST_CONFIG = False 33 TEST_CONFIG = False
36 34
37 NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all' 35 NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all'
38 36 JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/fsavard_sda4'
39 JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/fsavard_sda2' 37 EXPERIMENT_PATH = "ift6266.deep.stacked_dae.nist_sda.jobman_entrypoint"
40 38
41 REDUCE_TRAIN_TO = None 39 REDUCE_TRAIN_TO = None
42 MAX_FINETUNING_EPOCHS = 1000 40 MAX_FINETUNING_EPOCHS = 1000
43 REDUCE_EVERY = 1000 # number of minibatches before taking means for valid error etc. 41 # number of minibatches before taking means for valid error etc.
42 REDUCE_EVERY = 1000
43
44 if TEST_CONFIG: 44 if TEST_CONFIG:
45 REDUCE_TRAIN_TO = 1000 45 REDUCE_TRAIN_TO = 1000
46 MAX_FINETUNING_EPOCHS = 2 46 MAX_FINETUNING_EPOCHS = 2
47 REDUCE_EVERY = 10 47 REDUCE_EVERY = 10
48 48
49 EXPERIMENT_PATH = "ift6266.deep.stacked_dae.nist_sda.jobman_entrypoint" 49 # Possible values the hyperparameters can take. These are then
50 50 # combined with produit_cartesien_jobs so we get a list of all
51 # possible combinations, each one resulting in a job inserted
52 # in the jobman DB.
51 JOB_VALS = {'pretraining_lr': [0.1, 0.01],#, 0.001],#, 0.0001], 53 JOB_VALS = {'pretraining_lr': [0.1, 0.01],#, 0.001],#, 0.0001],
52 'pretraining_epochs_per_layer': [10,20], 54 'pretraining_epochs_per_layer': [10,20],
53 'hidden_layers_sizes': [300,800], 55 'hidden_layers_sizes': [300,800],
54 'corruption_levels': [0.1,0.2,0.3], 56 'corruption_levels': [0.1,0.2,0.3],
55 'minibatch_size': [20], 57 'minibatch_size': [20],
56 'max_finetuning_epochs':[MAX_FINETUNING_EPOCHS], 58 'max_finetuning_epochs':[MAX_FINETUNING_EPOCHS],
57 'finetuning_lr':[0.1, 0.01], #0.001 was very bad, so we leave it out 59 'finetuning_lr':[0.1, 0.01], #0.001 was very bad, so we leave it out
58 'num_hidden_layers':[2,3]} 60 'num_hidden_layers':[2,3]}
59 61
60 # Just useful for tests... minimal number of epochs 62 # Just useful for tests... minimal number of epochs
61 DEFAULT_HP_NIST = DD({'finetuning_lr':0.01, 63 DEFAULT_HP_NIST = DD({'finetuning_lr':0.1,
62 'pretraining_lr':0.01, 64 'pretraining_lr':0.1,
63 'pretraining_epochs_per_layer':1, 65 'pretraining_epochs_per_layer':20,
64 'max_finetuning_epochs':1, 66 'max_finetuning_epochs':2,
65 'hidden_layers_sizes':1000, 67 'hidden_layers_sizes':300,
66 'corruption_levels':0.2, 68 'corruption_levels':0.2,
67 'minibatch_size':20, 69 'minibatch_size':20,
68 'reduce_train_to':1000, 70 #'reduce_train_to':300,
69 'num_hidden_layers':1}) 71 'num_hidden_layers':2})
70 72
73 '''
74 Function called by jobman upon launching each job
75 Its path is the one given when inserting jobs:
76 ift6266.deep.stacked_dae.nist_sda.jobman_entrypoint
77 '''
71 def jobman_entrypoint(state, channel): 78 def jobman_entrypoint(state, channel):
79 # record mercurial versions of each package
72 pylearn.version.record_versions(state,[theano,ift6266,pylearn]) 80 pylearn.version.record_versions(state,[theano,ift6266,pylearn])
73 channel.save() 81 channel.save()
74 82
75 workingdir = os.getcwd() 83 workingdir = os.getcwd()
76 84
77 print "Will load NIST" 85 print "Will load NIST"
78 sys.stdout.flush() 86
79 87 nist = NIST(minibatch_size=20)
80 nist = NIST(20)
81 88
82 print "NIST loaded" 89 print "NIST loaded"
83 sys.stdout.flush() 90
84 91 # For test runs, we don't want to use the whole dataset so
92 # reduce it to fewer elements if asked to.
85 rtt = None 93 rtt = None
86 if state.has_key('reduce_train_to'): 94 if state.has_key('reduce_train_to'):
87 rtt = state['reduce_train_to'] 95 rtt = state['reduce_train_to']
88 elif REDUCE_TRAIN_TO: 96 elif REDUCE_TRAIN_TO:
89 rtt = REDUCE_TRAIN_TO 97 rtt = REDUCE_TRAIN_TO
90 98
91 if rtt: 99 if rtt:
92 print "Reducing training set to ", rtt, " examples" 100 print "Reducing training set to "+str(rtt)+ " examples"
93 nist.reduce_train_set(rtt) 101 nist.reduce_train_set(rtt)
94 102
95 train,valid,test = nist.get_tvt() 103 train,valid,test = nist.get_tvt()
96 dataset = (train,valid,test) 104 dataset = (train,valid,test)
97 105
98 n_ins = 32*32 106 n_ins = 32*32
99 n_outs = 62 # 10 digits, 26*2 (lower, capitals) 107 n_outs = 62 # 10 digits, 26*2 (lower, capitals)
100 108
101 hls = state.hidden_layers_sizes 109 # b,b',W for each hidden layer
102 cl = state.corruption_levels 110 # + b,W of last layer (logreg)
103 nhl = state.num_hidden_layers 111 numparams = state.num_hidden_layers * 3 + 2
104 state.hidden_layers_sizes = [hls] * nhl
105 state.corruption_levels = [cl] * nhl
106
107 # b,b',W for each hidden layer + b,W of last layer (logreg)
108 numparams = nhl * 3 + 2
109 series_mux = None 112 series_mux = None
110 if SERIES_AVAILABLE: 113 series_mux = create_series(workingdir, numparams)
111 series_mux = create_series(workingdir, numparams) 114
115 print "Creating optimizer with state, ", state
112 116
113 optimizer = SdaSgdOptimizer(dataset=dataset, hyperparameters=state, \ 117 optimizer = SdaSgdOptimizer(dataset=dataset, hyperparameters=state, \
114 n_ins=n_ins, n_outs=n_outs,\ 118 n_ins=n_ins, n_outs=n_outs,\
115 input_divider=255.0, series_mux=series_mux) 119 input_divider=255.0, series_mux=series_mux)
116 120
118 channel.save() 122 channel.save()
119 123
120 optimizer.finetune() 124 optimizer.finetune()
121 channel.save() 125 channel.save()
122 126
123 pylearn.version.record_versions(state,[theano,ift6266,pylearn])
124 channel.save()
125
126 return channel.COMPLETE 127 return channel.COMPLETE
127 128
129 # These Series objects are used to save various statistics
130 # during the training.
128 def create_series(basedir, numparams): 131 def create_series(basedir, numparams):
129 mux = SeriesMultiplexer() 132 mux = SeriesMultiplexer()
130 133
131 # comment out series we don't want to save 134 # comment out series we don't want to save
132 mux.add_series(AccumulatorSeries(name="reconstruction_error", 135 mux.add_series(AccumulatorSeries(name="reconstruction_error",
144 147
145 mux.add_series(ParamsArrayStats(numparams,name="params",directory=basedir)) 148 mux.add_series(ParamsArrayStats(numparams,name="params",directory=basedir))
146 149
147 return mux 150 return mux
148 151
152 # Perform insertion into the Postgre DB based on combination
153 # of hyperparameter values above
154 # (see comment for produit_cartesien_jobs() to know how it works)
149 def jobman_insert_nist(): 155 def jobman_insert_nist():
150 jobs = produit_croise_jobs(JOB_VALS) 156 jobs = produit_cartesien_jobs(JOB_VALS)
151 157
152 db = jobman.sql.db(JOBDB) 158 db = jobman.sql.db(JOBDB)
153 for job in jobs: 159 for job in jobs:
154 job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH}) 160 job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH})
155 jobman.sql.insert_dict(job, db) 161 jobman.sql.insert_dict(job, db)
231 237
232 print "Lenghts: ", len(tr[0]), len(v[0]), len(te[0]) 238 print "Lenghts: ", len(tr[0]), len(v[0]), len(te[0])
233 239
234 raw_input("Press any key") 240 raw_input("Press any key")
235 241
236 # hp for hyperparameters
237 def sgd_optimization_nist(hp=None, dataset_dir='/data/lisa/data/nist'):
238 global DEFAULT_HP_NIST
239 hp = hp and hp or DEFAULT_HP_NIST
240
241 print "Will load NIST"
242
243 import time
244 t1 = time.time()
245 nist = NIST(20, reduce_train_to=100)
246 t2 = time.time()
247
248 print "NIST loaded. time delta = ", t2-t1
249
250 train,valid,test = nist.get_tvt()
251 dataset = (train,valid,test)
252
253 print train[0][15]
254 print type(train[0][1])
255
256
257 print "Lengths train, valid, test: ", len(train[0]), len(valid[0]), len(test[0])
258
259 n_ins = 32*32
260 n_outs = 62 # 10 digits, 26*2 (lower, capitals)
261
262 optimizer = SdaSgdOptimizer(dataset, hp, n_ins, n_outs, input_divider=255.0)
263 optimizer.train()
264
265 if __name__ == '__main__': 242 if __name__ == '__main__':
266 243
267 import sys 244 import sys
268 245
269 args = sys.argv[1:] 246 args = sys.argv[1:]
273 250
274 elif len(args) > 0 and args[0] == 'jobman_insert': 251 elif len(args) > 0 and args[0] == 'jobman_insert':
275 jobman_insert_nist() 252 jobman_insert_nist()
276 253
277 elif len(args) > 0 and args[0] == 'test_jobman_entrypoint': 254 elif len(args) > 0 and args[0] == 'test_jobman_entrypoint':
278 chanmock = DD({'COMPLETE':0}) 255 chanmock = DD({'COMPLETE':0,'save':(lambda:None)})
279 jobman_entrypoint(DEFAULT_HP_NIST, chanmock) 256 jobman_entrypoint(DEFAULT_HP_NIST, chanmock)
280 257
281 elif len(args) > 0 and args[0] == 'estimate':
282 estimate_total_time()
283 else: 258 else:
284 sgd_optimization_nist() 259 print "Bad arguments"
285 260