Mercurial > ift6266
comparison deep/stacked_dae/nist_sda.py @ 275:7b4507295eba
merge
author | Xavier Glorot <glorotxa@iro.umontreal.ca> |
---|---|
date | Mon, 22 Mar 2010 10:20:10 -0400 |
parents | acb942530923 b077d9e97a3b |
children | 206374eed2fb |
comparison
equal
deleted
inserted
replaced
274:44409b6652aa | 275:7b4507295eba |
---|---|
1 #!/usr/bin/python | 1 #!/usr/bin/python |
2 # coding: utf-8 | 2 # coding: utf-8 |
3 | |
4 # Must be imported first | |
5 from config import * | |
3 | 6 |
4 import ift6266 | 7 import ift6266 |
5 import pylearn | 8 import pylearn |
6 | 9 |
7 import numpy | 10 import numpy |
23 | 26 |
24 from utils import produit_cartesien_jobs | 27 from utils import produit_cartesien_jobs |
25 | 28 |
26 from sgd_optimization import SdaSgdOptimizer | 29 from sgd_optimization import SdaSgdOptimizer |
27 | 30 |
28 from ift6266.utils.scalar_series import * | 31 #from ift6266.utils.scalar_series import * |
32 from ift6266.utils.seriestables import * | |
33 import tables | |
29 | 34 |
30 ############################################################################## | 35 from ift6266 import datasets |
31 # GLOBALS | |
32 | |
33 TEST_CONFIG = False | |
34 | |
35 NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all' | |
36 JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/fsavard_sda4' | |
37 EXPERIMENT_PATH = "ift6266.deep.stacked_dae.nist_sda.jobman_entrypoint" | |
38 | |
39 REDUCE_TRAIN_TO = None | |
40 MAX_FINETUNING_EPOCHS = 1000 | |
41 # number of minibatches before taking means for valid error etc. | |
42 REDUCE_EVERY = 1000 | |
43 | |
44 if TEST_CONFIG: | |
45 REDUCE_TRAIN_TO = 1000 | |
46 MAX_FINETUNING_EPOCHS = 2 | |
47 REDUCE_EVERY = 10 | |
48 | |
49 # Possible values the hyperparameters can take. These are then | |
50 # combined with produit_cartesien_jobs so we get a list of all | |
51 # possible combinations, each one resulting in a job inserted | |
52 # in the jobman DB. | |
53 JOB_VALS = {'pretraining_lr': [0.1, 0.01],#, 0.001],#, 0.0001], | |
54 'pretraining_epochs_per_layer': [10,20], | |
55 'hidden_layers_sizes': [300,800], | |
56 'corruption_levels': [0.1,0.2,0.3], | |
57 'minibatch_size': [20], | |
58 'max_finetuning_epochs':[MAX_FINETUNING_EPOCHS], | |
59 'finetuning_lr':[0.1, 0.01], #0.001 was very bad, so we leave it out | |
60 'num_hidden_layers':[2,3]} | |
61 | |
62 # Just useful for tests... minimal number of epochs | |
63 DEFAULT_HP_NIST = DD({'finetuning_lr':0.1, | |
64 'pretraining_lr':0.1, | |
65 'pretraining_epochs_per_layer':20, | |
66 'max_finetuning_epochs':2, | |
67 'hidden_layers_sizes':800, | |
68 'corruption_levels':0.2, | |
69 'minibatch_size':20, | |
70 #'reduce_train_to':300, | |
71 'num_hidden_layers':2}) | |
72 | 36 |
73 ''' | 37 ''' |
74 Function called by jobman upon launching each job | 38 Function called by jobman upon launching each job |
75 Its path is the one given when inserting jobs: | 39 Its path is the one given when inserting jobs: see EXPERIMENT_PATH |
76 ift6266.deep.stacked_dae.nist_sda.jobman_entrypoint | |
77 ''' | 40 ''' |
78 def jobman_entrypoint(state, channel): | 41 def jobman_entrypoint(state, channel): |
79 # record mercurial versions of each package | 42 # record mercurial versions of each package |
80 pylearn.version.record_versions(state,[theano,ift6266,pylearn]) | 43 pylearn.version.record_versions(state,[theano,ift6266,pylearn]) |
44 # TODO: remove this, bad for number of simultaneous requests on DB | |
81 channel.save() | 45 channel.save() |
82 | |
83 workingdir = os.getcwd() | |
84 | |
85 print "Will load NIST" | |
86 | |
87 nist = NIST(minibatch_size=20) | |
88 | |
89 print "NIST loaded" | |
90 | 46 |
91 # For test runs, we don't want to use the whole dataset so | 47 # For test runs, we don't want to use the whole dataset so |
92 # reduce it to fewer elements if asked to. | 48 # reduce it to fewer elements if asked to. |
93 rtt = None | 49 rtt = None |
94 if state.has_key('reduce_train_to'): | 50 if state.has_key('reduce_train_to'): |
95 rtt = state['reduce_train_to'] | 51 rtt = state['reduce_train_to'] |
96 elif REDUCE_TRAIN_TO: | 52 elif REDUCE_TRAIN_TO: |
97 rtt = REDUCE_TRAIN_TO | 53 rtt = REDUCE_TRAIN_TO |
98 | 54 |
99 if rtt: | |
100 print "Reducing training set to "+str(rtt)+ " examples" | |
101 nist.reduce_train_set(rtt) | |
102 | |
103 train,valid,test = nist.get_tvt() | |
104 dataset = (train,valid,test) | |
105 | |
106 n_ins = 32*32 | 55 n_ins = 32*32 |
107 n_outs = 62 # 10 digits, 26*2 (lower, capitals) | 56 n_outs = 62 # 10 digits, 26*2 (lower, capitals) |
57 | |
58 examples_per_epoch = NIST_ALL_TRAIN_SIZE | |
108 | 59 |
109 # b,b',W for each hidden layer | 60 series = create_series(state.num_hidden_layers) |
110 # + b,W of last layer (logreg) | |
111 numparams = state.num_hidden_layers * 3 + 2 | |
112 series_mux = None | |
113 series_mux = create_series(workingdir, numparams) | |
114 | 61 |
115 print "Creating optimizer with state, ", state | 62 print "Creating optimizer with state, ", state |
116 | 63 |
117 optimizer = SdaSgdOptimizer(dataset=dataset, hyperparameters=state, \ | 64 optimizer = SdaSgdOptimizer(dataset=datasets.nist_all(), |
65 hyperparameters=state, \ | |
118 n_ins=n_ins, n_outs=n_outs,\ | 66 n_ins=n_ins, n_outs=n_outs,\ |
119 input_divider=255.0, series_mux=series_mux) | 67 examples_per_epoch=examples_per_epoch, \ |
68 series=series, | |
69 max_minibatches=rtt) | |
120 | 70 |
121 optimizer.pretrain() | 71 optimizer.pretrain(datasets.nist_all()) |
122 channel.save() | 72 channel.save() |
123 | 73 |
124 optimizer.finetune() | 74 optimizer.finetune(datasets.nist_all()) |
125 channel.save() | 75 channel.save() |
126 | 76 |
127 return channel.COMPLETE | 77 return channel.COMPLETE |
128 | 78 |
129 # These Series objects are used to save various statistics | 79 # These Series objects are used to save various statistics |
130 # during the training. | 80 # during the training. |
131 def create_series(basedir, numparams): | 81 def create_series(num_hidden_layers): |
132 mux = SeriesMultiplexer() | 82 |
83 # Replace series we don't want to save with DummySeries, e.g. | |
84 # series['training_error'] = DummySeries() | |
85 | |
86 series = {} | |
87 | |
88 basedir = os.getcwd() | |
89 | |
90 h5f = tables.openFile(os.path.join(basedir, "series.h5"), "w") | |
91 | |
92 # reconstruction | |
93 reconstruction_base = \ | |
94 ErrorSeries(error_name="reconstruction_error", | |
95 table_name="reconstruction_error", | |
96 hdf5_file=h5f, | |
97 index_names=('epoch','minibatch'), | |
98 title="Reconstruction error (mean over "+str(REDUCE_EVERY)+" minibatches)") | |
99 series['reconstruction_error'] = \ | |
100 AccumulatorSeriesWrapper(base_series=reconstruction_base, | |
101 reduce_every=REDUCE_EVERY) | |
102 | |
103 # train | |
104 training_base = \ | |
105 ErrorSeries(error_name="training_error", | |
106 table_name="training_error", | |
107 hdf5_file=h5f, | |
108 index_names=('epoch','minibatch'), | |
109 title="Training error (mean over "+str(REDUCE_EVERY)+" minibatches)") | |
110 series['training_error'] = \ | |
111 AccumulatorSeriesWrapper(base_series=training_base, | |
112 reduce_every=REDUCE_EVERY) | |
113 | |
114 # valid and test are not accumulated/mean, saved directly | |
115 series['validation_error'] = \ | |
116 ErrorSeries(error_name="validation_error", | |
117 table_name="validation_error", | |
118 hdf5_file=h5f, | |
119 index_names=('epoch','minibatch')) | |
120 | |
121 series['test_error'] = \ | |
122 ErrorSeries(error_name="test_error", | |
123 table_name="test_error", | |
124 hdf5_file=h5f, | |
125 index_names=('epoch','minibatch')) | |
126 | |
127 param_names = [] | |
128 for i in range(num_hidden_layers): | |
129 param_names += ['layer%d_W'%i, 'layer%d_b'%i, 'layer%d_bprime'%i] | |
130 param_names += ['logreg_layer_W', 'logreg_layer_b'] | |
133 | 131 |
134 # comment out series we don't want to save | 132 # comment out series we don't want to save |
135 mux.add_series(AccumulatorSeries(name="reconstruction_error", | 133 series['params'] = SharedParamsStatisticsWrapper( |
136 reduce_every=REDUCE_EVERY, # every 1000 batches, we take the mean and save | 134 new_group_name="params", |
137 mean=True, | 135 base_group="/", |
138 directory=basedir, flush_every=1)) | 136 arrays_names=param_names, |
137 hdf5_file=h5f, | |
138 index_names=('epoch',)) | |
139 | 139 |
140 mux.add_series(AccumulatorSeries(name="training_error", | 140 return series |
141 reduce_every=REDUCE_EVERY, # every 1000 batches, we take the mean and save | |
142 mean=True, | |
143 directory=basedir, flush_every=1)) | |
144 | |
145 mux.add_series(BaseSeries(name="validation_error", directory=basedir, flush_every=1)) | |
146 mux.add_series(BaseSeries(name="test_error", directory=basedir, flush_every=1)) | |
147 | |
148 mux.add_series(ParamsArrayStats(numparams,name="params",directory=basedir)) | |
149 | |
150 return mux | |
151 | 141 |
152 # Perform insertion into the Postgre DB based on combination | 142 # Perform insertion into the Postgre DB based on combination |
153 # of hyperparameter values above | 143 # of hyperparameter values above |
154 # (see comment for produit_cartesien_jobs() to know how it works) | 144 # (see comment for produit_cartesien_jobs() to know how it works) |
155 def jobman_insert_nist(): | 145 def jobman_insert_nist(): |
160 job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH}) | 150 job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH}) |
161 jobman.sql.insert_dict(job, db) | 151 jobman.sql.insert_dict(job, db) |
162 | 152 |
163 print "inserted" | 153 print "inserted" |
164 | 154 |
165 class NIST: | |
166 def __init__(self, minibatch_size, basepath=None, reduce_train_to=None): | |
167 global NIST_ALL_LOCATION | |
168 | |
169 self.minibatch_size = minibatch_size | |
170 self.basepath = basepath and basepath or NIST_ALL_LOCATION | |
171 | |
172 self.set_filenames() | |
173 | |
174 # arrays of 2 elements: .x, .y | |
175 self.train = [None, None] | |
176 self.test = [None, None] | |
177 | |
178 self.load_train_test() | |
179 | |
180 self.valid = [[], []] | |
181 self.split_train_valid() | |
182 if reduce_train_to: | |
183 self.reduce_train_set(reduce_train_to) | |
184 | |
185 def get_tvt(self): | |
186 return self.train, self.valid, self.test | |
187 | |
188 def set_filenames(self): | |
189 self.train_files = ['all_train_data.ft', | |
190 'all_train_labels.ft'] | |
191 | |
192 self.test_files = ['all_test_data.ft', | |
193 'all_test_labels.ft'] | |
194 | |
195 def load_train_test(self): | |
196 self.load_data_labels(self.train_files, self.train) | |
197 self.load_data_labels(self.test_files, self.test) | |
198 | |
199 def load_data_labels(self, filenames, pair): | |
200 for i, fn in enumerate(filenames): | |
201 f = open(os.path.join(self.basepath, fn)) | |
202 pair[i] = filetensor.read(f) | |
203 f.close() | |
204 | |
205 def reduce_train_set(self, max): | |
206 self.train[0] = self.train[0][:max] | |
207 self.train[1] = self.train[1][:max] | |
208 | |
209 if max < len(self.test[0]): | |
210 for ar in (self.test, self.valid): | |
211 ar[0] = ar[0][:max] | |
212 ar[1] = ar[1][:max] | |
213 | |
214 def split_train_valid(self): | |
215 test_len = len(self.test[0]) | |
216 | |
217 new_train_x = self.train[0][:-test_len] | |
218 new_train_y = self.train[1][:-test_len] | |
219 | |
220 self.valid[0] = self.train[0][-test_len:] | |
221 self.valid[1] = self.train[1][-test_len:] | |
222 | |
223 self.train[0] = new_train_x | |
224 self.train[1] = new_train_y | |
225 | |
226 def test_load_nist(): | |
227 print "Will load NIST" | |
228 | |
229 import time | |
230 t1 = time.time() | |
231 nist = NIST(20) | |
232 t2 = time.time() | |
233 | |
234 print "NIST loaded. time delta = ", t2-t1 | |
235 | |
236 tr,v,te = nist.get_tvt() | |
237 | |
238 print "Lenghts: ", len(tr[0]), len(v[0]), len(te[0]) | |
239 | |
240 raw_input("Press any key") | |
241 | |
242 if __name__ == '__main__': | 155 if __name__ == '__main__': |
243 | |
244 import sys | |
245 | 156 |
246 args = sys.argv[1:] | 157 args = sys.argv[1:] |
247 | 158 |
248 if len(args) > 0 and args[0] == 'load_nist': | 159 #if len(args) > 0 and args[0] == 'load_nist': |
249 test_load_nist() | 160 # test_load_nist() |
250 | 161 |
251 elif len(args) > 0 and args[0] == 'jobman_insert': | 162 if len(args) > 0 and args[0] == 'jobman_insert': |
252 jobman_insert_nist() | 163 jobman_insert_nist() |
253 | 164 |
254 elif len(args) > 0 and args[0] == 'test_jobman_entrypoint': | 165 elif len(args) > 0 and args[0] == 'test_jobman_entrypoint': |
255 chanmock = DD({'COMPLETE':0,'save':(lambda:None)}) | 166 chanmock = DD({'COMPLETE':0,'save':(lambda:None)}) |
256 jobman_entrypoint(DEFAULT_HP_NIST, chanmock) | 167 jobman_entrypoint(DEFAULT_HP_NIST, chanmock) |