Mercurial > ift6266
comparison scripts/stacked_dae/nist_sda.py @ 139:7d8366fb90bf
Ajouté des __init__.py dans l'arborescence pour que les scripts puissent être utilisés avec des paths pour jobman, et fait pas mal de modifs dans stacked_dae pour pouvoir réutiliser le travail fait pour des tests où le pretraining est le même.
author | fsavard |
---|---|
date | Mon, 22 Feb 2010 13:38:25 -0500 |
parents | 5c79a2557f2f |
children | 3346fcd3818b |
comparison
equal
deleted
inserted
replaced
138:128507ac4edf | 139:7d8366fb90bf |
---|---|
4 import numpy | 4 import numpy |
5 import theano | 5 import theano |
6 import time | 6 import time |
7 import theano.tensor as T | 7 import theano.tensor as T |
8 from theano.tensor.shared_randomstreams import RandomStreams | 8 from theano.tensor.shared_randomstreams import RandomStreams |
9 | 9 import copy |
10 | |
11 import sys | |
10 import os.path | 12 import os.path |
11 | 13 |
12 from sgd_optimization import sgd_optimization | 14 from sgd_optimization import SdaSgdOptimizer |
13 | 15 |
14 from jobman import DD | 16 from jobman import DD |
17 import jobman, jobman.sql | |
15 from pylearn.io import filetensor | 18 from pylearn.io import filetensor |
16 | 19 |
17 from utils import produit_croise_jobs | 20 from utils import produit_croise_jobs |
18 | 21 |
22 TEST_CONFIG = True | |
23 | |
19 NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all' | 24 NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all' |
20 | 25 |
26 JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/' | |
27 REDUCE_TRAIN_TO = None | |
28 MAX_FINETUNING_EPOCHS = 1000 | |
29 if TEST_CONFIG: | |
30 JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_sandbox_db/' | |
31 REDUCE_TRAIN_TO = 1000 | |
32 MAX_FINETUNING_EPOCHS = 2 | |
33 | |
34 JOBDB_JOBS = JOBDB + 'fsavard_sda1_jobs' | |
35 JOBDB_RESULTS = JOBDB + 'fsavard_sda1_results' | |
36 EXPERIMENT_PATH = "ift6266.scripts.stacked_dae.nist_sda.jobman_entrypoint" | |
37 | |
38 # There used to be | |
39 # 'finetuning_lr': [0.00001, 0.0001, 0.001, 0.01, 0.1] | |
40 # and | |
41 # 'num_hidden_layers':[1,2,3] | |
42 # but this is now handled by a special mechanism in SgdOptimizer | |
43 # to reuse intermediate results (for the same training of lower layers, | |
44 # we can test many finetuning_lr) | |
45 JOB_VALS = {'pretraining_lr': [0.1, 0.01, 0.001],#, 0.0001], | |
46 'pretraining_epochs_per_layer': [10,20], | |
47 'hidden_layers_sizes': [300,800], | |
48 'corruption_levels': [0.1,0.2], | |
49 'minibatch_size': [20], | |
50 'max_finetuning_epochs':[MAX_FINETUNING_EPOCHS]} | |
51 FINETUNING_LR_VALS = [0.1, 0.01, 0.001]#, 0.0001] | |
52 NUM_HIDDEN_LAYERS_VALS = [1,2,3] | |
53 | |
21 # Just useful for tests... minimal number of epochs | 54 # Just useful for tests... minimal number of epochs |
22 DEFAULT_HP_NIST = DD({'finetuning_lr':0.1, | 55 DEFAULT_HP_NIST = DD({'finetuning_lr':0.01, |
23 'pretraining_lr':0.1, | 56 'pretraining_lr':0.01, |
24 'pretraining_epochs_per_layer':1, | 57 'pretraining_epochs_per_layer':1, |
25 'max_finetuning_epochs':1, | 58 'max_finetuning_epochs':1, |
26 'hidden_layers_sizes':[1000,1000], | 59 'hidden_layers_sizes':[1000], |
27 'corruption_levels':[0.2,0.2], | 60 'corruption_levels':[0.2], |
28 'minibatch_size':20}) | 61 'minibatch_size':20}) |
29 | 62 |
30 def jobman_entrypoint_nist(state, channel): | 63 def jobman_entrypoint(state, channel): |
31 sgd_optimization_nist(state) | 64 state = copy.copy(state) |
65 | |
66 print "Will load NIST" | |
67 nist = NIST(20) | |
68 print "NIST loaded" | |
69 | |
70 rtt = None | |
71 if state.has_key('reduce_train_to'): | |
72 rtt = state['reduce_train_to'] | |
73 elif REDUCE_TRAIN_TO: | |
74 rtt = REDUCE_TRAIN_TO | |
75 | |
76 if rtt: | |
77 print "Reducing training set to ", rtt, " examples" | |
78 nist.reduce_train_set(rtt) | |
79 | |
80 train,valid,test = nist.get_tvt() | |
81 dataset = (train,valid,test) | |
82 | |
83 n_ins = 32*32 | |
84 n_outs = 62 # 10 digits, 26*2 (lower, capitals) | |
85 | |
86 db = jobman.sql.db(JOBDB_RESULTS) | |
87 optimizer = SdaSgdOptimizer(dataset, state, n_ins, n_outs,\ | |
88 input_divider=255.0, job_tree=True, results_db=db, \ | |
89 experiment=EXPERIMENT_PATH, \ | |
90 finetuning_lr_to_try=FINETUNING_LR_VALS, \ | |
91 num_hidden_layers_to_try=NUM_HIDDEN_LAYERS_VALS) | |
92 optimizer.train() | |
93 | |
94 return channel.COMPLETE | |
95 | |
96 def estimate_pretraining_time(job): | |
97 job = DD(job) | |
98 # time spent on pretraining estimated as O(n^2) where n=num hidens | |
99 # no need to multiply by num_hidden_layers, as results from num=1 | |
100 # is reused for num=2, or 3, so in the end we get the same time | |
101 # as if we were training 3 times a single layer | |
102 # constants: | |
103 # - 20 mins to pretrain a layer with 1000 units (per 1 epoch) | |
104 # - 12 mins to finetune (per 1 epoch) | |
105 # basically the job_tree trick gives us a 5 times speedup on the | |
106 # pretraining time due to reusing for finetuning_lr | |
107 # and gives us a second x2 speedup for reusing previous layers | |
108 # to explore num_hidden_layers | |
109 return (job.pretraining_epochs_per_layer * 20 / (1000.0*1000) \ | |
110 * job.hidden_layer_sizes * job.hidden_layer_sizes) | |
111 | |
112 def estimate_total_time(): | |
113 jobs = produit_croise_jobs(JOB_VALS) | |
114 sumtime = 0.0 | |
115 sum_without = 0.0 | |
116 for job in jobs: | |
117 sumtime += estimate_pretraining_time(job) | |
118 # 12 mins per epoch * 30 epochs | |
119 # 5 finetuning_lr per pretraining combination | |
120 sum_without = (12*20*len(jobs) + sumtime*2) * len(FINETUNING_LR_VALS) | |
121 sumtime += len(FINETUNING_LR_VALS) * len(jobs) * 12 * 20 | |
122 print "num jobs=", len(jobs) | |
123 print "estimate", sumtime/60, " hours" | |
124 print "estimate without tree optimization", sum_without/60, "ratio", sumtime / sum_without | |
32 | 125 |
33 def jobman_insert_nist(): | 126 def jobman_insert_nist(): |
34 vals = {'finetuning_lr': [0.00001, 0.0001, 0.001, 0.01, 0.1], | 127 jobs = produit_croise_jobs(JOB_VALS) |
35 'pretraining_lr': [0.00001, 0.0001, 0.001, 0.01, 0.1], | 128 |
36 'pretraining_epochs_per_layer': [2,5,20], | 129 db = jobman.sql.db(JOBDB_JOBS) |
37 'hidden_layer_sizes': [100,300,1000], | |
38 'num_hidden_layers':[1,2,3], | |
39 'corruption_levels': [0.1,0.2,0.4], | |
40 'minibatch_size': [5,20,100]} | |
41 | |
42 jobs = produit_croise_jobs(vals) | |
43 | |
44 for job in jobs: | 130 for job in jobs: |
45 insert_job(job) | 131 job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH}) |
46 | 132 jobman.sql.insert_dict(job, db) |
133 | |
134 print "inserted" | |
47 | 135 |
48 class NIST: | 136 class NIST: |
49 def __init__(self, minibatch_size, basepath=None): | 137 def __init__(self, minibatch_size, basepath=None, reduce_train_to=None): |
50 global NIST_ALL_LOCATION | 138 global NIST_ALL_LOCATION |
51 | 139 |
52 self.minibatch_size = minibatch_size | 140 self.minibatch_size = minibatch_size |
53 self.basepath = basepath and basepath or NIST_ALL_LOCATION | 141 self.basepath = basepath and basepath or NIST_ALL_LOCATION |
54 | 142 |
59 self.test = [None, None] | 147 self.test = [None, None] |
60 | 148 |
61 self.load_train_test() | 149 self.load_train_test() |
62 | 150 |
63 self.valid = [[], []] | 151 self.valid = [[], []] |
64 #self.split_train_valid() | 152 self.split_train_valid() |
65 | 153 if reduce_train_to: |
154 self.reduce_train_set(reduce_train_to) | |
66 | 155 |
67 def get_tvt(self): | 156 def get_tvt(self): |
68 return self.train, self.valid, self.test | 157 return self.train, self.valid, self.test |
69 | 158 |
70 def set_filenames(self): | 159 def set_filenames(self): |
82 for i, fn in enumerate(filenames): | 171 for i, fn in enumerate(filenames): |
83 f = open(os.path.join(self.basepath, fn)) | 172 f = open(os.path.join(self.basepath, fn)) |
84 pair[i] = filetensor.read(f) | 173 pair[i] = filetensor.read(f) |
85 f.close() | 174 f.close() |
86 | 175 |
176 def reduce_train_set(self, max): | |
177 self.train[0] = self.train[0][:max] | |
178 self.train[1] = self.train[1][:max] | |
179 | |
180 if max < len(self.test[0]): | |
181 for ar in (self.test, self.valid): | |
182 ar[0] = ar[0][:max] | |
183 ar[1] = ar[1][:max] | |
184 | |
87 def split_train_valid(self): | 185 def split_train_valid(self): |
88 test_len = len(self.test[0]) | 186 test_len = len(self.test[0]) |
89 | 187 |
90 new_train_x = self.train[0][:-test_len] | 188 new_train_x = self.train[0][:-test_len] |
91 new_train_y = self.train[1][:-test_len] | 189 new_train_y = self.train[1][:-test_len] |
119 | 217 |
120 print "Will load NIST" | 218 print "Will load NIST" |
121 | 219 |
122 import time | 220 import time |
123 t1 = time.time() | 221 t1 = time.time() |
124 nist = NIST(20) | 222 nist = NIST(20, reduce_train_to=100) |
125 t2 = time.time() | 223 t2 = time.time() |
126 | 224 |
127 print "NIST loaded. time delta = ", t2-t1 | 225 print "NIST loaded. time delta = ", t2-t1 |
128 | 226 |
129 train,valid,test = nist.get_tvt() | 227 train,valid,test = nist.get_tvt() |
130 dataset = (train,valid,test) | 228 dataset = (train,valid,test) |
131 | 229 |
132 print "Lenghts train, valid, test: ", len(train[0]), len(valid[0]), len(test[0]) | 230 print train[0][15] |
231 print type(train[0][1]) | |
232 | |
233 | |
234 print "Lengths train, valid, test: ", len(train[0]), len(valid[0]), len(test[0]) | |
133 | 235 |
134 n_ins = 32*32 | 236 n_ins = 32*32 |
135 n_outs = 62 # 10 digits, 26*2 (lower, capitals) | 237 n_outs = 62 # 10 digits, 26*2 (lower, capitals) |
136 | 238 |
137 sgd_optimization(dataset, hp, n_ins, n_outs) | 239 optimizer = SdaSgdOptimizer(dataset, hp, n_ins, n_outs, input_divider=255.0) |
240 optimizer.train() | |
138 | 241 |
139 if __name__ == '__main__': | 242 if __name__ == '__main__': |
140 | 243 |
141 import sys | 244 import sys |
142 | 245 |
143 args = sys.argv[1:] | 246 args = sys.argv[1:] |
144 | 247 |
145 if len(args) > 0 and args[0] == 'load_nist': | 248 if len(args) > 0 and args[0] == 'load_nist': |
146 test_load_nist() | 249 test_load_nist() |
147 | 250 |
251 elif len(args) > 0 and args[0] == 'jobman_insert': | |
252 jobman_insert_nist() | |
253 elif len(args) > 0 and args[0] == 'test_job_tree': | |
254 # dont forget to comment out sql.inserts and make reduce_train_to=100 | |
255 print "TESTING JOB TREE" | |
256 chanmock = {'COMPLETE':0} | |
257 hp = copy.copy(DEFAULT_HP_NIST) | |
258 hp.update({'reduce_train_to':100}) | |
259 jobman_entrypoint(hp, chanmock) | |
260 elif len(args) > 0 and args[0] == 'estimate': | |
261 estimate_total_time() | |
148 else: | 262 else: |
149 sgd_optimization_nist() | 263 sgd_optimization_nist() |
150 | 264 |