comparison deep/stacked_dae/nist_sda.py @ 167:1f5937e9e530

More moves - transformations into data_generation, added "deep" folder
author Dumitru Erhan <dumitru.erhan@gmail.com>
date Fri, 26 Feb 2010 14:15:38 -0500
parents scripts/stacked_dae/nist_sda.py@3346fcd3818b
children b9ea8e2d071a
comparison
equal deleted inserted replaced
166:17ae5a1a4dd1 167:1f5937e9e530
1 #!/usr/bin/python
2 # coding: utf-8
3
4 import numpy
5 import theano
6 import time
7 import theano.tensor as T
8 from theano.tensor.shared_randomstreams import RandomStreams
9 import copy
10
11 import sys
12 import os.path
13
14 from sgd_optimization import SdaSgdOptimizer
15
16 from jobman import DD
17 import jobman, jobman.sql
18 from pylearn.io import filetensor
19
20 from utils import produit_croise_jobs
21
22 TEST_CONFIG = False
23
24 NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all'
25
26 JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/'
27 REDUCE_TRAIN_TO = None
28 MAX_FINETUNING_EPOCHS = 1000
29 if TEST_CONFIG:
30 JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_sandbox_db/'
31 REDUCE_TRAIN_TO = 1000
32 MAX_FINETUNING_EPOCHS = 2
33
34 JOBDB_JOBS = JOBDB + 'fsavard_sda1_jobs'
35 JOBDB_RESULTS = JOBDB + 'fsavard_sda1_results'
36 EXPERIMENT_PATH = "ift6266.scripts.stacked_dae.nist_sda.jobman_entrypoint"
37
38 # There used to be
39 # 'finetuning_lr': [0.00001, 0.0001, 0.001, 0.01, 0.1]
40 # and
41 # 'num_hidden_layers':[1,2,3]
42 # but this is now handled by a special mechanism in SgdOptimizer
43 # to reuse intermediate results (for the same training of lower layers,
44 # we can test many finetuning_lr)
45 JOB_VALS = {'pretraining_lr': [0.1, 0.01, 0.001],#, 0.0001],
46 'pretraining_epochs_per_layer': [10,20],
47 'hidden_layers_sizes': [300,800],
48 'corruption_levels': [0.1,0.2],
49 'minibatch_size': [20],
50 'max_finetuning_epochs':[MAX_FINETUNING_EPOCHS]}
51 FINETUNING_LR_VALS = [0.1, 0.01, 0.001]#, 0.0001]
52 NUM_HIDDEN_LAYERS_VALS = [1,2,3]
53
54 # Just useful for tests... minimal number of epochs
55 DEFAULT_HP_NIST = DD({'finetuning_lr':0.01,
56 'pretraining_lr':0.01,
57 'pretraining_epochs_per_layer':1,
58 'max_finetuning_epochs':1,
59 'hidden_layers_sizes':[1000],
60 'corruption_levels':[0.2],
61 'minibatch_size':20})
62
63 def jobman_entrypoint(state, channel):
64 state = copy.copy(state)
65
66 print "Will load NIST"
67 nist = NIST(20)
68 print "NIST loaded"
69
70 rtt = None
71 if state.has_key('reduce_train_to'):
72 rtt = state['reduce_train_to']
73 elif REDUCE_TRAIN_TO:
74 rtt = REDUCE_TRAIN_TO
75
76 if rtt:
77 print "Reducing training set to ", rtt, " examples"
78 nist.reduce_train_set(rtt)
79
80 train,valid,test = nist.get_tvt()
81 dataset = (train,valid,test)
82
83 n_ins = 32*32
84 n_outs = 62 # 10 digits, 26*2 (lower, capitals)
85
86 db = jobman.sql.db(JOBDB_RESULTS)
87 optimizer = SdaSgdOptimizer(dataset, state, n_ins, n_outs,\
88 input_divider=255.0, job_tree=True, results_db=db, \
89 experiment=EXPERIMENT_PATH, \
90 finetuning_lr_to_try=FINETUNING_LR_VALS, \
91 num_hidden_layers_to_try=NUM_HIDDEN_LAYERS_VALS)
92 optimizer.train()
93
94 return channel.COMPLETE
95
96 def estimate_pretraining_time(job):
97 job = DD(job)
98 # time spent on pretraining estimated as O(n^2) where n=num hidens
99 # no need to multiply by num_hidden_layers, as results from num=1
100 # is reused for num=2, or 3, so in the end we get the same time
101 # as if we were training 3 times a single layer
102 # constants:
103 # - 20 mins to pretrain a layer with 1000 units (per 1 epoch)
104 # - 12 mins to finetune (per 1 epoch)
105 # basically the job_tree trick gives us a 5 times speedup on the
106 # pretraining time due to reusing for finetuning_lr
107 # and gives us a second x2 speedup for reusing previous layers
108 # to explore num_hidden_layers
109 return (job.pretraining_epochs_per_layer * 20 / (1000.0*1000) \
110 * job.hidden_layer_sizes * job.hidden_layer_sizes)
111
112 def estimate_total_time():
113 jobs = produit_croise_jobs(JOB_VALS)
114 sumtime = 0.0
115 sum_without = 0.0
116 for job in jobs:
117 sumtime += estimate_pretraining_time(job)
118 # 12 mins per epoch * 30 epochs
119 # 5 finetuning_lr per pretraining combination
120 sum_without = (12*20*len(jobs) + sumtime*2) * len(FINETUNING_LR_VALS)
121 sumtime += len(FINETUNING_LR_VALS) * len(jobs) * 12 * 20
122 print "num jobs=", len(jobs)
123 print "estimate", sumtime/60, " hours"
124 print "estimate without tree optimization", sum_without/60, "ratio", sumtime / sum_without
125
126 def jobman_insert_nist():
127 jobs = produit_croise_jobs(JOB_VALS)
128
129 db = jobman.sql.db(JOBDB_JOBS)
130 for job in jobs:
131 job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH})
132 jobman.sql.insert_dict(job, db)
133
134 print "inserted"
135
136 class NIST:
137 def __init__(self, minibatch_size, basepath=None, reduce_train_to=None):
138 global NIST_ALL_LOCATION
139
140 self.minibatch_size = minibatch_size
141 self.basepath = basepath and basepath or NIST_ALL_LOCATION
142
143 self.set_filenames()
144
145 # arrays of 2 elements: .x, .y
146 self.train = [None, None]
147 self.test = [None, None]
148
149 self.load_train_test()
150
151 self.valid = [[], []]
152 self.split_train_valid()
153 if reduce_train_to:
154 self.reduce_train_set(reduce_train_to)
155
156 def get_tvt(self):
157 return self.train, self.valid, self.test
158
159 def set_filenames(self):
160 self.train_files = ['all_train_data.ft',
161 'all_train_labels.ft']
162
163 self.test_files = ['all_test_data.ft',
164 'all_test_labels.ft']
165
166 def load_train_test(self):
167 self.load_data_labels(self.train_files, self.train)
168 self.load_data_labels(self.test_files, self.test)
169
170 def load_data_labels(self, filenames, pair):
171 for i, fn in enumerate(filenames):
172 f = open(os.path.join(self.basepath, fn))
173 pair[i] = filetensor.read(f)
174 f.close()
175
176 def reduce_train_set(self, max):
177 self.train[0] = self.train[0][:max]
178 self.train[1] = self.train[1][:max]
179
180 if max < len(self.test[0]):
181 for ar in (self.test, self.valid):
182 ar[0] = ar[0][:max]
183 ar[1] = ar[1][:max]
184
185 def split_train_valid(self):
186 test_len = len(self.test[0])
187
188 new_train_x = self.train[0][:-test_len]
189 new_train_y = self.train[1][:-test_len]
190
191 self.valid[0] = self.train[0][-test_len:]
192 self.valid[1] = self.train[1][-test_len:]
193
194 self.train[0] = new_train_x
195 self.train[1] = new_train_y
196
197 def test_load_nist():
198 print "Will load NIST"
199
200 import time
201 t1 = time.time()
202 nist = NIST(20)
203 t2 = time.time()
204
205 print "NIST loaded. time delta = ", t2-t1
206
207 tr,v,te = nist.get_tvt()
208
209 print "Lenghts: ", len(tr[0]), len(v[0]), len(te[0])
210
211 raw_input("Press any key")
212
213 # hp for hyperparameters
214 def sgd_optimization_nist(hp=None, dataset_dir='/data/lisa/data/nist'):
215 global DEFAULT_HP_NIST
216 hp = hp and hp or DEFAULT_HP_NIST
217
218 print "Will load NIST"
219
220 import time
221 t1 = time.time()
222 nist = NIST(20, reduce_train_to=100)
223 t2 = time.time()
224
225 print "NIST loaded. time delta = ", t2-t1
226
227 train,valid,test = nist.get_tvt()
228 dataset = (train,valid,test)
229
230 print train[0][15]
231 print type(train[0][1])
232
233
234 print "Lengths train, valid, test: ", len(train[0]), len(valid[0]), len(test[0])
235
236 n_ins = 32*32
237 n_outs = 62 # 10 digits, 26*2 (lower, capitals)
238
239 optimizer = SdaSgdOptimizer(dataset, hp, n_ins, n_outs, input_divider=255.0)
240 optimizer.train()
241
242 if __name__ == '__main__':
243
244 import sys
245
246 args = sys.argv[1:]
247
248 if len(args) > 0 and args[0] == 'load_nist':
249 test_load_nist()
250
251 elif len(args) > 0 and args[0] == 'jobman_insert':
252 jobman_insert_nist()
253 elif len(args) > 0 and args[0] == 'test_job_tree':
254 # dont forget to comment out sql.inserts and make reduce_train_to=100
255 print "TESTING JOB TREE"
256 chanmock = {'COMPLETE':0}
257 hp = copy.copy(DEFAULT_HP_NIST)
258 hp.update({'reduce_train_to':100})
259 jobman_entrypoint(hp, chanmock)
260 elif len(args) > 0 and args[0] == 'estimate':
261 estimate_total_time()
262 else:
263 sgd_optimization_nist()
264