Mercurial > ift6266
comparison deep/stacked_dae/nist_sda.py @ 167:1f5937e9e530
More moves - transformations into data_generation, added "deep" folder
author | Dumitru Erhan <dumitru.erhan@gmail.com> |
---|---|
date | Fri, 26 Feb 2010 14:15:38 -0500 |
parents | scripts/stacked_dae/nist_sda.py@3346fcd3818b |
children | b9ea8e2d071a |
comparison
equal
deleted
inserted
replaced
166:17ae5a1a4dd1 | 167:1f5937e9e530 |
---|---|
1 #!/usr/bin/python | |
2 # coding: utf-8 | |
3 | |
4 import numpy | |
5 import theano | |
6 import time | |
7 import theano.tensor as T | |
8 from theano.tensor.shared_randomstreams import RandomStreams | |
9 import copy | |
10 | |
11 import sys | |
12 import os.path | |
13 | |
14 from sgd_optimization import SdaSgdOptimizer | |
15 | |
16 from jobman import DD | |
17 import jobman, jobman.sql | |
18 from pylearn.io import filetensor | |
19 | |
20 from utils import produit_croise_jobs | |
21 | |
22 TEST_CONFIG = False | |
23 | |
24 NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all' | |
25 | |
26 JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/' | |
27 REDUCE_TRAIN_TO = None | |
28 MAX_FINETUNING_EPOCHS = 1000 | |
29 if TEST_CONFIG: | |
30 JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_sandbox_db/' | |
31 REDUCE_TRAIN_TO = 1000 | |
32 MAX_FINETUNING_EPOCHS = 2 | |
33 | |
34 JOBDB_JOBS = JOBDB + 'fsavard_sda1_jobs' | |
35 JOBDB_RESULTS = JOBDB + 'fsavard_sda1_results' | |
36 EXPERIMENT_PATH = "ift6266.scripts.stacked_dae.nist_sda.jobman_entrypoint" | |
37 | |
38 # There used to be | |
39 # 'finetuning_lr': [0.00001, 0.0001, 0.001, 0.01, 0.1] | |
40 # and | |
41 # 'num_hidden_layers':[1,2,3] | |
42 # but this is now handled by a special mechanism in SgdOptimizer | |
43 # to reuse intermediate results (for the same training of lower layers, | |
44 # we can test many finetuning_lr) | |
45 JOB_VALS = {'pretraining_lr': [0.1, 0.01, 0.001],#, 0.0001], | |
46 'pretraining_epochs_per_layer': [10,20], | |
47 'hidden_layers_sizes': [300,800], | |
48 'corruption_levels': [0.1,0.2], | |
49 'minibatch_size': [20], | |
50 'max_finetuning_epochs':[MAX_FINETUNING_EPOCHS]} | |
51 FINETUNING_LR_VALS = [0.1, 0.01, 0.001]#, 0.0001] | |
52 NUM_HIDDEN_LAYERS_VALS = [1,2,3] | |
53 | |
54 # Just useful for tests... minimal number of epochs | |
55 DEFAULT_HP_NIST = DD({'finetuning_lr':0.01, | |
56 'pretraining_lr':0.01, | |
57 'pretraining_epochs_per_layer':1, | |
58 'max_finetuning_epochs':1, | |
59 'hidden_layers_sizes':[1000], | |
60 'corruption_levels':[0.2], | |
61 'minibatch_size':20}) | |
62 | |
63 def jobman_entrypoint(state, channel): | |
64 state = copy.copy(state) | |
65 | |
66 print "Will load NIST" | |
67 nist = NIST(20) | |
68 print "NIST loaded" | |
69 | |
70 rtt = None | |
71 if state.has_key('reduce_train_to'): | |
72 rtt = state['reduce_train_to'] | |
73 elif REDUCE_TRAIN_TO: | |
74 rtt = REDUCE_TRAIN_TO | |
75 | |
76 if rtt: | |
77 print "Reducing training set to ", rtt, " examples" | |
78 nist.reduce_train_set(rtt) | |
79 | |
80 train,valid,test = nist.get_tvt() | |
81 dataset = (train,valid,test) | |
82 | |
83 n_ins = 32*32 | |
84 n_outs = 62 # 10 digits, 26*2 (lower, capitals) | |
85 | |
86 db = jobman.sql.db(JOBDB_RESULTS) | |
87 optimizer = SdaSgdOptimizer(dataset, state, n_ins, n_outs,\ | |
88 input_divider=255.0, job_tree=True, results_db=db, \ | |
89 experiment=EXPERIMENT_PATH, \ | |
90 finetuning_lr_to_try=FINETUNING_LR_VALS, \ | |
91 num_hidden_layers_to_try=NUM_HIDDEN_LAYERS_VALS) | |
92 optimizer.train() | |
93 | |
94 return channel.COMPLETE | |
95 | |
96 def estimate_pretraining_time(job): | |
97 job = DD(job) | |
98 # time spent on pretraining estimated as O(n^2) where n=num hidens | |
99 # no need to multiply by num_hidden_layers, as results from num=1 | |
100 # is reused for num=2, or 3, so in the end we get the same time | |
101 # as if we were training 3 times a single layer | |
102 # constants: | |
103 # - 20 mins to pretrain a layer with 1000 units (per 1 epoch) | |
104 # - 12 mins to finetune (per 1 epoch) | |
105 # basically the job_tree trick gives us a 5 times speedup on the | |
106 # pretraining time due to reusing for finetuning_lr | |
107 # and gives us a second x2 speedup for reusing previous layers | |
108 # to explore num_hidden_layers | |
109 return (job.pretraining_epochs_per_layer * 20 / (1000.0*1000) \ | |
110 * job.hidden_layer_sizes * job.hidden_layer_sizes) | |
111 | |
112 def estimate_total_time(): | |
113 jobs = produit_croise_jobs(JOB_VALS) | |
114 sumtime = 0.0 | |
115 sum_without = 0.0 | |
116 for job in jobs: | |
117 sumtime += estimate_pretraining_time(job) | |
118 # 12 mins per epoch * 30 epochs | |
119 # 5 finetuning_lr per pretraining combination | |
120 sum_without = (12*20*len(jobs) + sumtime*2) * len(FINETUNING_LR_VALS) | |
121 sumtime += len(FINETUNING_LR_VALS) * len(jobs) * 12 * 20 | |
122 print "num jobs=", len(jobs) | |
123 print "estimate", sumtime/60, " hours" | |
124 print "estimate without tree optimization", sum_without/60, "ratio", sumtime / sum_without | |
125 | |
126 def jobman_insert_nist(): | |
127 jobs = produit_croise_jobs(JOB_VALS) | |
128 | |
129 db = jobman.sql.db(JOBDB_JOBS) | |
130 for job in jobs: | |
131 job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH}) | |
132 jobman.sql.insert_dict(job, db) | |
133 | |
134 print "inserted" | |
135 | |
136 class NIST: | |
137 def __init__(self, minibatch_size, basepath=None, reduce_train_to=None): | |
138 global NIST_ALL_LOCATION | |
139 | |
140 self.minibatch_size = minibatch_size | |
141 self.basepath = basepath and basepath or NIST_ALL_LOCATION | |
142 | |
143 self.set_filenames() | |
144 | |
145 # arrays of 2 elements: .x, .y | |
146 self.train = [None, None] | |
147 self.test = [None, None] | |
148 | |
149 self.load_train_test() | |
150 | |
151 self.valid = [[], []] | |
152 self.split_train_valid() | |
153 if reduce_train_to: | |
154 self.reduce_train_set(reduce_train_to) | |
155 | |
156 def get_tvt(self): | |
157 return self.train, self.valid, self.test | |
158 | |
159 def set_filenames(self): | |
160 self.train_files = ['all_train_data.ft', | |
161 'all_train_labels.ft'] | |
162 | |
163 self.test_files = ['all_test_data.ft', | |
164 'all_test_labels.ft'] | |
165 | |
166 def load_train_test(self): | |
167 self.load_data_labels(self.train_files, self.train) | |
168 self.load_data_labels(self.test_files, self.test) | |
169 | |
170 def load_data_labels(self, filenames, pair): | |
171 for i, fn in enumerate(filenames): | |
172 f = open(os.path.join(self.basepath, fn)) | |
173 pair[i] = filetensor.read(f) | |
174 f.close() | |
175 | |
176 def reduce_train_set(self, max): | |
177 self.train[0] = self.train[0][:max] | |
178 self.train[1] = self.train[1][:max] | |
179 | |
180 if max < len(self.test[0]): | |
181 for ar in (self.test, self.valid): | |
182 ar[0] = ar[0][:max] | |
183 ar[1] = ar[1][:max] | |
184 | |
185 def split_train_valid(self): | |
186 test_len = len(self.test[0]) | |
187 | |
188 new_train_x = self.train[0][:-test_len] | |
189 new_train_y = self.train[1][:-test_len] | |
190 | |
191 self.valid[0] = self.train[0][-test_len:] | |
192 self.valid[1] = self.train[1][-test_len:] | |
193 | |
194 self.train[0] = new_train_x | |
195 self.train[1] = new_train_y | |
196 | |
197 def test_load_nist(): | |
198 print "Will load NIST" | |
199 | |
200 import time | |
201 t1 = time.time() | |
202 nist = NIST(20) | |
203 t2 = time.time() | |
204 | |
205 print "NIST loaded. time delta = ", t2-t1 | |
206 | |
207 tr,v,te = nist.get_tvt() | |
208 | |
209 print "Lenghts: ", len(tr[0]), len(v[0]), len(te[0]) | |
210 | |
211 raw_input("Press any key") | |
212 | |
213 # hp for hyperparameters | |
214 def sgd_optimization_nist(hp=None, dataset_dir='/data/lisa/data/nist'): | |
215 global DEFAULT_HP_NIST | |
216 hp = hp and hp or DEFAULT_HP_NIST | |
217 | |
218 print "Will load NIST" | |
219 | |
220 import time | |
221 t1 = time.time() | |
222 nist = NIST(20, reduce_train_to=100) | |
223 t2 = time.time() | |
224 | |
225 print "NIST loaded. time delta = ", t2-t1 | |
226 | |
227 train,valid,test = nist.get_tvt() | |
228 dataset = (train,valid,test) | |
229 | |
230 print train[0][15] | |
231 print type(train[0][1]) | |
232 | |
233 | |
234 print "Lengths train, valid, test: ", len(train[0]), len(valid[0]), len(test[0]) | |
235 | |
236 n_ins = 32*32 | |
237 n_outs = 62 # 10 digits, 26*2 (lower, capitals) | |
238 | |
239 optimizer = SdaSgdOptimizer(dataset, hp, n_ins, n_outs, input_divider=255.0) | |
240 optimizer.train() | |
241 | |
242 if __name__ == '__main__': | |
243 | |
244 import sys | |
245 | |
246 args = sys.argv[1:] | |
247 | |
248 if len(args) > 0 and args[0] == 'load_nist': | |
249 test_load_nist() | |
250 | |
251 elif len(args) > 0 and args[0] == 'jobman_insert': | |
252 jobman_insert_nist() | |
253 elif len(args) > 0 and args[0] == 'test_job_tree': | |
254 # dont forget to comment out sql.inserts and make reduce_train_to=100 | |
255 print "TESTING JOB TREE" | |
256 chanmock = {'COMPLETE':0} | |
257 hp = copy.copy(DEFAULT_HP_NIST) | |
258 hp.update({'reduce_train_to':100}) | |
259 jobman_entrypoint(hp, chanmock) | |
260 elif len(args) > 0 and args[0] == 'estimate': | |
261 estimate_total_time() | |
262 else: | |
263 sgd_optimization_nist() | |
264 |