Mercurial > ift6266
comparison deep/stacked_dae/nist_sda.py @ 207:43af74a348ac
Merge branches from main repo.
author | Arnaud Bergeron <abergeron@gmail.com> |
---|---|
date | Thu, 04 Mar 2010 20:43:21 -0500 |
parents | 10a801240bfc |
children | acb942530923 |
comparison
equal
deleted
inserted
replaced
206:e12702b88a2d | 207:43af74a348ac |
---|---|
19 | 19 |
20 from jobman import DD | 20 from jobman import DD |
21 import jobman, jobman.sql | 21 import jobman, jobman.sql |
22 from pylearn.io import filetensor | 22 from pylearn.io import filetensor |
23 | 23 |
24 from utils import produit_croise_jobs | 24 from utils import produit_cartesien_jobs |
25 | 25 |
26 from sgd_optimization import SdaSgdOptimizer | 26 from sgd_optimization import SdaSgdOptimizer |
27 | 27 |
28 SERIES_AVAILABLE = False | 28 from ift6266.utils.scalar_series import * |
29 try: | 29 |
30 from scalar_series import * | 30 ############################################################################## |
31 SERIES_AVAILABLE = True | 31 # GLOBALS |
32 except ImportError: | |
33 print "Could not import Series" | |
34 | 32 |
35 TEST_CONFIG = False | 33 TEST_CONFIG = False |
36 | 34 |
37 NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all' | 35 NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all' |
38 | 36 JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/fsavard_sda4' |
39 JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/fsavard_sda2' | 37 EXPERIMENT_PATH = "ift6266.deep.stacked_dae.nist_sda.jobman_entrypoint" |
40 | 38 |
41 REDUCE_TRAIN_TO = None | 39 REDUCE_TRAIN_TO = None |
42 MAX_FINETUNING_EPOCHS = 1000 | 40 MAX_FINETUNING_EPOCHS = 1000 |
43 REDUCE_EVERY = 1000 # number of minibatches before taking means for valid error etc. | 41 # number of minibatches before taking means for valid error etc. |
42 REDUCE_EVERY = 1000 | |
43 | |
44 if TEST_CONFIG: | 44 if TEST_CONFIG: |
45 REDUCE_TRAIN_TO = 1000 | 45 REDUCE_TRAIN_TO = 1000 |
46 MAX_FINETUNING_EPOCHS = 2 | 46 MAX_FINETUNING_EPOCHS = 2 |
47 REDUCE_EVERY = 10 | 47 REDUCE_EVERY = 10 |
48 | 48 |
49 EXPERIMENT_PATH = "ift6266.deep.stacked_dae.nist_sda.jobman_entrypoint" | 49 # Possible values the hyperparameters can take. These are then |
50 | 50 # combined with produit_cartesien_jobs so we get a list of all |
51 # possible combinations, each one resulting in a job inserted | |
52 # in the jobman DB. | |
51 JOB_VALS = {'pretraining_lr': [0.1, 0.01],#, 0.001],#, 0.0001], | 53 JOB_VALS = {'pretraining_lr': [0.1, 0.01],#, 0.001],#, 0.0001], |
52 'pretraining_epochs_per_layer': [10,20], | 54 'pretraining_epochs_per_layer': [10,20], |
53 'hidden_layers_sizes': [300,800], | 55 'hidden_layers_sizes': [300,800], |
54 'corruption_levels': [0.1,0.2,0.3], | 56 'corruption_levels': [0.1,0.2,0.3], |
55 'minibatch_size': [20], | 57 'minibatch_size': [20], |
56 'max_finetuning_epochs':[MAX_FINETUNING_EPOCHS], | 58 'max_finetuning_epochs':[MAX_FINETUNING_EPOCHS], |
57 'finetuning_lr':[0.1, 0.01], #0.001 was very bad, so we leave it out | 59 'finetuning_lr':[0.1, 0.01], #0.001 was very bad, so we leave it out |
58 'num_hidden_layers':[2,3]} | 60 'num_hidden_layers':[2,3]} |
59 | 61 |
60 # Just useful for tests... minimal number of epochs | 62 # Just useful for tests... minimal number of epochs |
61 DEFAULT_HP_NIST = DD({'finetuning_lr':0.01, | 63 DEFAULT_HP_NIST = DD({'finetuning_lr':0.1, |
62 'pretraining_lr':0.01, | 64 'pretraining_lr':0.1, |
63 'pretraining_epochs_per_layer':1, | 65 'pretraining_epochs_per_layer':20, |
64 'max_finetuning_epochs':1, | 66 'max_finetuning_epochs':2, |
65 'hidden_layers_sizes':1000, | 67 'hidden_layers_sizes':300, |
66 'corruption_levels':0.2, | 68 'corruption_levels':0.2, |
67 'minibatch_size':20, | 69 'minibatch_size':20, |
68 'reduce_train_to':1000, | 70 #'reduce_train_to':300, |
69 'num_hidden_layers':1}) | 71 'num_hidden_layers':2}) |
70 | 72 |
73 ''' | |
74 Function called by jobman upon launching each job | |
75 Its path is the one given when inserting jobs: | |
76 ift6266.deep.stacked_dae.nist_sda.jobman_entrypoint | |
77 ''' | |
71 def jobman_entrypoint(state, channel): | 78 def jobman_entrypoint(state, channel): |
79 # record mercurial versions of each package | |
72 pylearn.version.record_versions(state,[theano,ift6266,pylearn]) | 80 pylearn.version.record_versions(state,[theano,ift6266,pylearn]) |
73 channel.save() | 81 channel.save() |
74 | 82 |
75 workingdir = os.getcwd() | 83 workingdir = os.getcwd() |
76 | 84 |
77 print "Will load NIST" | 85 print "Will load NIST" |
78 sys.stdout.flush() | 86 |
79 | 87 nist = NIST(minibatch_size=20) |
80 nist = NIST(20) | |
81 | 88 |
82 print "NIST loaded" | 89 print "NIST loaded" |
83 sys.stdout.flush() | 90 |
84 | 91 # For test runs, we don't want to use the whole dataset so |
92 # reduce it to fewer elements if asked to. | |
85 rtt = None | 93 rtt = None |
86 if state.has_key('reduce_train_to'): | 94 if state.has_key('reduce_train_to'): |
87 rtt = state['reduce_train_to'] | 95 rtt = state['reduce_train_to'] |
88 elif REDUCE_TRAIN_TO: | 96 elif REDUCE_TRAIN_TO: |
89 rtt = REDUCE_TRAIN_TO | 97 rtt = REDUCE_TRAIN_TO |
90 | 98 |
91 if rtt: | 99 if rtt: |
92 print "Reducing training set to ", rtt, " examples" | 100 print "Reducing training set to "+str(rtt)+ " examples" |
93 nist.reduce_train_set(rtt) | 101 nist.reduce_train_set(rtt) |
94 | 102 |
95 train,valid,test = nist.get_tvt() | 103 train,valid,test = nist.get_tvt() |
96 dataset = (train,valid,test) | 104 dataset = (train,valid,test) |
97 | 105 |
98 n_ins = 32*32 | 106 n_ins = 32*32 |
99 n_outs = 62 # 10 digits, 26*2 (lower, capitals) | 107 n_outs = 62 # 10 digits, 26*2 (lower, capitals) |
100 | 108 |
101 hls = state.hidden_layers_sizes | 109 # b,b',W for each hidden layer |
102 cl = state.corruption_levels | 110 # + b,W of last layer (logreg) |
103 nhl = state.num_hidden_layers | 111 numparams = state.num_hidden_layers * 3 + 2 |
104 state.hidden_layers_sizes = [hls] * nhl | |
105 state.corruption_levels = [cl] * nhl | |
106 | |
107 # b,b',W for each hidden layer + b,W of last layer (logreg) | |
108 numparams = nhl * 3 + 2 | |
109 series_mux = None | 112 series_mux = None |
110 if SERIES_AVAILABLE: | 113 series_mux = create_series(workingdir, numparams) |
111 series_mux = create_series(workingdir, numparams) | 114 |
115 print "Creating optimizer with state, ", state | |
112 | 116 |
113 optimizer = SdaSgdOptimizer(dataset=dataset, hyperparameters=state, \ | 117 optimizer = SdaSgdOptimizer(dataset=dataset, hyperparameters=state, \ |
114 n_ins=n_ins, n_outs=n_outs,\ | 118 n_ins=n_ins, n_outs=n_outs,\ |
115 input_divider=255.0, series_mux=series_mux) | 119 input_divider=255.0, series_mux=series_mux) |
116 | 120 |
118 channel.save() | 122 channel.save() |
119 | 123 |
120 optimizer.finetune() | 124 optimizer.finetune() |
121 channel.save() | 125 channel.save() |
122 | 126 |
123 pylearn.version.record_versions(state,[theano,ift6266,pylearn]) | |
124 channel.save() | |
125 | |
126 return channel.COMPLETE | 127 return channel.COMPLETE |
127 | 128 |
129 # These Series objects are used to save various statistics | |
130 # during the training. | |
128 def create_series(basedir, numparams): | 131 def create_series(basedir, numparams): |
129 mux = SeriesMultiplexer() | 132 mux = SeriesMultiplexer() |
130 | 133 |
131 # comment out series we don't want to save | 134 # comment out series we don't want to save |
132 mux.add_series(AccumulatorSeries(name="reconstruction_error", | 135 mux.add_series(AccumulatorSeries(name="reconstruction_error", |
144 | 147 |
145 mux.add_series(ParamsArrayStats(numparams,name="params",directory=basedir)) | 148 mux.add_series(ParamsArrayStats(numparams,name="params",directory=basedir)) |
146 | 149 |
147 return mux | 150 return mux |
148 | 151 |
152 # Perform insertion into the Postgre DB based on combination | |
153 # of hyperparameter values above | |
154 # (see comment for produit_cartesien_jobs() to know how it works) | |
149 def jobman_insert_nist(): | 155 def jobman_insert_nist(): |
150 jobs = produit_croise_jobs(JOB_VALS) | 156 jobs = produit_cartesien_jobs(JOB_VALS) |
151 | 157 |
152 db = jobman.sql.db(JOBDB) | 158 db = jobman.sql.db(JOBDB) |
153 for job in jobs: | 159 for job in jobs: |
154 job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH}) | 160 job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH}) |
155 jobman.sql.insert_dict(job, db) | 161 jobman.sql.insert_dict(job, db) |
231 | 237 |
232 print "Lenghts: ", len(tr[0]), len(v[0]), len(te[0]) | 238 print "Lenghts: ", len(tr[0]), len(v[0]), len(te[0]) |
233 | 239 |
234 raw_input("Press any key") | 240 raw_input("Press any key") |
235 | 241 |
236 # hp for hyperparameters | |
237 def sgd_optimization_nist(hp=None, dataset_dir='/data/lisa/data/nist'): | |
238 global DEFAULT_HP_NIST | |
239 hp = hp and hp or DEFAULT_HP_NIST | |
240 | |
241 print "Will load NIST" | |
242 | |
243 import time | |
244 t1 = time.time() | |
245 nist = NIST(20, reduce_train_to=100) | |
246 t2 = time.time() | |
247 | |
248 print "NIST loaded. time delta = ", t2-t1 | |
249 | |
250 train,valid,test = nist.get_tvt() | |
251 dataset = (train,valid,test) | |
252 | |
253 print train[0][15] | |
254 print type(train[0][1]) | |
255 | |
256 | |
257 print "Lengths train, valid, test: ", len(train[0]), len(valid[0]), len(test[0]) | |
258 | |
259 n_ins = 32*32 | |
260 n_outs = 62 # 10 digits, 26*2 (lower, capitals) | |
261 | |
262 optimizer = SdaSgdOptimizer(dataset, hp, n_ins, n_outs, input_divider=255.0) | |
263 optimizer.train() | |
264 | |
265 if __name__ == '__main__': | 242 if __name__ == '__main__': |
266 | 243 |
267 import sys | 244 import sys |
268 | 245 |
269 args = sys.argv[1:] | 246 args = sys.argv[1:] |
273 | 250 |
274 elif len(args) > 0 and args[0] == 'jobman_insert': | 251 elif len(args) > 0 and args[0] == 'jobman_insert': |
275 jobman_insert_nist() | 252 jobman_insert_nist() |
276 | 253 |
277 elif len(args) > 0 and args[0] == 'test_jobman_entrypoint': | 254 elif len(args) > 0 and args[0] == 'test_jobman_entrypoint': |
278 chanmock = DD({'COMPLETE':0}) | 255 chanmock = DD({'COMPLETE':0,'save':(lambda:None)}) |
279 jobman_entrypoint(DEFAULT_HP_NIST, chanmock) | 256 jobman_entrypoint(DEFAULT_HP_NIST, chanmock) |
280 | 257 |
281 elif len(args) > 0 and args[0] == 'estimate': | |
282 estimate_total_time() | |
283 else: | 258 else: |
284 sgd_optimization_nist() | 259 print "Bad arguments" |
285 | 260 |