Mercurial > ift6266
comparison deep/stacked_dae/nist_sda.py @ 185:b9ea8e2d071a
Enlevé ce qui concernait la réutilisation de résultats de préentraînement (trop compliqué pour peu de bénéfice: c'est le finetuning qui est vraiment long
author | fsavard |
---|---|
date | Fri, 26 Feb 2010 17:45:52 -0500 |
parents | 1f5937e9e530 |
children | d364a130b221 |
comparison
equal
deleted
inserted
replaced
172:4d3d3627df3e | 185:b9ea8e2d071a |
---|---|
1 #!/usr/bin/python | 1 #!/usr/bin/python |
2 # coding: utf-8 | 2 # coding: utf-8 |
3 | |
4 import ift6266 | |
5 import pylearn | |
3 | 6 |
4 import numpy | 7 import numpy |
5 import theano | 8 import theano |
6 import time | 9 import time |
10 | |
11 import pylearn.version | |
7 import theano.tensor as T | 12 import theano.tensor as T |
8 from theano.tensor.shared_randomstreams import RandomStreams | 13 from theano.tensor.shared_randomstreams import RandomStreams |
14 | |
9 import copy | 15 import copy |
10 | |
11 import sys | 16 import sys |
17 import os | |
12 import os.path | 18 import os.path |
13 | |
14 from sgd_optimization import SdaSgdOptimizer | |
15 | 19 |
16 from jobman import DD | 20 from jobman import DD |
17 import jobman, jobman.sql | 21 import jobman, jobman.sql |
18 from pylearn.io import filetensor | 22 from pylearn.io import filetensor |
19 | 23 |
20 from utils import produit_croise_jobs | 24 from utils import produit_croise_jobs |
21 | 25 |
22 TEST_CONFIG = False | 26 from sgd_optimization import SdaSgdOptimizer |
27 | |
28 SERIES_AVAILABLE = False | |
29 try: | |
30 from scalar_series import * | |
31 SERIES_AVAILABLE = True | |
32 except ImportError: | |
33 print "Could not import Series" | |
34 | |
35 TEST_CONFIG = True | |
23 | 36 |
24 NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all' | 37 NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all' |
25 | 38 |
26 JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/' | 39 JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/fsavard_sda2' |
40 | |
27 REDUCE_TRAIN_TO = None | 41 REDUCE_TRAIN_TO = None |
28 MAX_FINETUNING_EPOCHS = 1000 | 42 MAX_FINETUNING_EPOCHS = 1000 |
43 REDUCE_EVERY = 1000 # number of minibatches before taking means for valid error etc. | |
29 if TEST_CONFIG: | 44 if TEST_CONFIG: |
30 JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_sandbox_db/' | |
31 REDUCE_TRAIN_TO = 1000 | 45 REDUCE_TRAIN_TO = 1000 |
32 MAX_FINETUNING_EPOCHS = 2 | 46 MAX_FINETUNING_EPOCHS = 2 |
33 | 47 REDUCE_EVERY = 10 |
34 JOBDB_JOBS = JOBDB + 'fsavard_sda1_jobs' | 48 |
35 JOBDB_RESULTS = JOBDB + 'fsavard_sda1_results' | |
36 EXPERIMENT_PATH = "ift6266.scripts.stacked_dae.nist_sda.jobman_entrypoint" | 49 EXPERIMENT_PATH = "ift6266.scripts.stacked_dae.nist_sda.jobman_entrypoint" |
37 | 50 |
38 # There used to be | 51 JOB_VALS = {'pretraining_lr': [0.1, 0.01],#, 0.001],#, 0.0001], |
39 # 'finetuning_lr': [0.00001, 0.0001, 0.001, 0.01, 0.1] | |
40 # and | |
41 # 'num_hidden_layers':[1,2,3] | |
42 # but this is now handled by a special mechanism in SgdOptimizer | |
43 # to reuse intermediate results (for the same training of lower layers, | |
44 # we can test many finetuning_lr) | |
45 JOB_VALS = {'pretraining_lr': [0.1, 0.01, 0.001],#, 0.0001], | |
46 'pretraining_epochs_per_layer': [10,20], | 52 'pretraining_epochs_per_layer': [10,20], |
47 'hidden_layers_sizes': [300,800], | 53 'hidden_layers_sizes': [300,800], |
48 'corruption_levels': [0.1,0.2], | 54 'corruption_levels': [0.1,0.2,0.3], |
49 'minibatch_size': [20], | 55 'minibatch_size': [20], |
50 'max_finetuning_epochs':[MAX_FINETUNING_EPOCHS]} | 56 'max_finetuning_epochs':[MAX_FINETUNING_EPOCHS], |
51 FINETUNING_LR_VALS = [0.1, 0.01, 0.001]#, 0.0001] | 57 'finetuning_lr':[0.1, 0.01], #0.001 was very bad, so we leave it out |
52 NUM_HIDDEN_LAYERS_VALS = [1,2,3] | 58 'num_hidden_layers':[2,3]} |
53 | 59 |
54 # Just useful for tests... minimal number of epochs | 60 # Just useful for tests... minimal number of epochs |
55 DEFAULT_HP_NIST = DD({'finetuning_lr':0.01, | 61 DEFAULT_HP_NIST = DD({'finetuning_lr':0.01, |
56 'pretraining_lr':0.01, | 62 'pretraining_lr':0.01, |
57 'pretraining_epochs_per_layer':1, | 63 'pretraining_epochs_per_layer':1, |
58 'max_finetuning_epochs':1, | 64 'max_finetuning_epochs':1, |
59 'hidden_layers_sizes':[1000], | 65 'hidden_layers_sizes':1000, |
60 'corruption_levels':[0.2], | 66 'corruption_levels':0.2, |
61 'minibatch_size':20}) | 67 'minibatch_size':20, |
68 'reduce_train_to':1000, | |
69 'num_hidden_layers':1}) | |
62 | 70 |
63 def jobman_entrypoint(state, channel): | 71 def jobman_entrypoint(state, channel): |
64 state = copy.copy(state) | 72 pylearn.version.record_versions(state,[theano,ift6266,pylearn]) |
73 channel.save() | |
74 | |
75 workingdir = os.getcwd() | |
65 | 76 |
66 print "Will load NIST" | 77 print "Will load NIST" |
78 sys.stdout.flush() | |
79 | |
67 nist = NIST(20) | 80 nist = NIST(20) |
81 | |
68 print "NIST loaded" | 82 print "NIST loaded" |
83 sys.stdout.flush() | |
69 | 84 |
70 rtt = None | 85 rtt = None |
71 if state.has_key('reduce_train_to'): | 86 if state.has_key('reduce_train_to'): |
72 rtt = state['reduce_train_to'] | 87 rtt = state['reduce_train_to'] |
73 elif REDUCE_TRAIN_TO: | 88 elif REDUCE_TRAIN_TO: |
81 dataset = (train,valid,test) | 96 dataset = (train,valid,test) |
82 | 97 |
83 n_ins = 32*32 | 98 n_ins = 32*32 |
84 n_outs = 62 # 10 digits, 26*2 (lower, capitals) | 99 n_outs = 62 # 10 digits, 26*2 (lower, capitals) |
85 | 100 |
86 db = jobman.sql.db(JOBDB_RESULTS) | 101 hls = state.hidden_layers_sizes |
87 optimizer = SdaSgdOptimizer(dataset, state, n_ins, n_outs,\ | 102 cl = state.corruption_levels |
88 input_divider=255.0, job_tree=True, results_db=db, \ | 103 nhl = state.num_hidden_layers |
89 experiment=EXPERIMENT_PATH, \ | 104 state.hidden_layers_sizes = [hls] * nhl |
90 finetuning_lr_to_try=FINETUNING_LR_VALS, \ | 105 state.corruption_levels = [cl] * nhl |
91 num_hidden_layers_to_try=NUM_HIDDEN_LAYERS_VALS) | 106 |
92 optimizer.train() | 107 # b,b',W for each hidden layer + b,W of last layer (logreg) |
108 numparams = nhl * 3 + 2 | |
109 series_mux = None | |
110 if SERIES_AVAILABLE: | |
111 series_mux = create_series(workingdir, numparams) | |
112 | |
113 optimizer = SdaSgdOptimizer(dataset=dataset, hyperparameters=state, \ | |
114 n_ins=n_ins, n_outs=n_outs,\ | |
115 input_divider=255.0, series_mux=series_mux) | |
116 | |
117 optimizer.pretrain() | |
118 channel.save() | |
119 | |
120 optimizer.finetune() | |
121 channel.save() | |
122 | |
123 pylearn.version.record_versions(state,[theano,ift6266,pylearn]) | |
124 channel.save() | |
93 | 125 |
94 return channel.COMPLETE | 126 return channel.COMPLETE |
95 | 127 |
96 def estimate_pretraining_time(job): | 128 def create_series(basedir, numparams): |
97 job = DD(job) | 129 mux = SeriesMultiplexer() |
98 # time spent on pretraining estimated as O(n^2) where n=num hidens | 130 |
99 # no need to multiply by num_hidden_layers, as results from num=1 | 131 # comment out series we don't want to save |
100 # is reused for num=2, or 3, so in the end we get the same time | 132 mux.add_series(AccumulatorSeries(name="reconstruction_error", |
101 # as if we were training 3 times a single layer | 133 reduce_every=REDUCE_EVERY, # every 1000 batches, we take the mean and save |
102 # constants: | 134 mean=True, |
103 # - 20 mins to pretrain a layer with 1000 units (per 1 epoch) | 135 directory=basedir, flush_every=1)) |
104 # - 12 mins to finetune (per 1 epoch) | 136 |
105 # basically the job_tree trick gives us a 5 times speedup on the | 137 mux.add_series(AccumulatorSeries(name="training_error", |
106 # pretraining time due to reusing for finetuning_lr | 138 reduce_every=REDUCE_EVERY, # every 1000 batches, we take the mean and save |
107 # and gives us a second x2 speedup for reusing previous layers | 139 mean=True, |
108 # to explore num_hidden_layers | 140 directory=basedir, flush_every=1)) |
109 return (job.pretraining_epochs_per_layer * 20 / (1000.0*1000) \ | 141 |
110 * job.hidden_layer_sizes * job.hidden_layer_sizes) | 142 mux.add_series(BaseSeries(name="validation_error", directory=basedir, flush_every=1)) |
111 | 143 mux.add_series(BaseSeries(name="test_error", directory=basedir, flush_every=1)) |
112 def estimate_total_time(): | 144 |
113 jobs = produit_croise_jobs(JOB_VALS) | 145 mux.add_series(ParamsArrayStats(numparams,name="params",directory=basedir)) |
114 sumtime = 0.0 | 146 |
115 sum_without = 0.0 | 147 return mux |
116 for job in jobs: | |
117 sumtime += estimate_pretraining_time(job) | |
118 # 12 mins per epoch * 30 epochs | |
119 # 5 finetuning_lr per pretraining combination | |
120 sum_without = (12*20*len(jobs) + sumtime*2) * len(FINETUNING_LR_VALS) | |
121 sumtime += len(FINETUNING_LR_VALS) * len(jobs) * 12 * 20 | |
122 print "num jobs=", len(jobs) | |
123 print "estimate", sumtime/60, " hours" | |
124 print "estimate without tree optimization", sum_without/60, "ratio", sumtime / sum_without | |
125 | 148 |
126 def jobman_insert_nist(): | 149 def jobman_insert_nist(): |
127 jobs = produit_croise_jobs(JOB_VALS) | 150 jobs = produit_croise_jobs(JOB_VALS) |
128 | 151 |
129 db = jobman.sql.db(JOBDB_JOBS) | 152 db = jobman.sql.db(JOBDB) |
130 for job in jobs: | 153 for job in jobs: |
131 job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH}) | 154 job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH}) |
132 jobman.sql.insert_dict(job, db) | 155 jobman.sql.insert_dict(job, db) |
133 | 156 |
134 print "inserted" | 157 print "inserted" |
248 if len(args) > 0 and args[0] == 'load_nist': | 271 if len(args) > 0 and args[0] == 'load_nist': |
249 test_load_nist() | 272 test_load_nist() |
250 | 273 |
251 elif len(args) > 0 and args[0] == 'jobman_insert': | 274 elif len(args) > 0 and args[0] == 'jobman_insert': |
252 jobman_insert_nist() | 275 jobman_insert_nist() |
253 elif len(args) > 0 and args[0] == 'test_job_tree': | 276 |
254 # dont forget to comment out sql.inserts and make reduce_train_to=100 | 277 elif len(args) > 0 and args[0] == 'test_jobman_entrypoint': |
255 print "TESTING JOB TREE" | 278 chanmock = DD({'COMPLETE':0}) |
256 chanmock = {'COMPLETE':0} | 279 jobman_entrypoint(DEFAULT_HP_NIST, chanmock) |
257 hp = copy.copy(DEFAULT_HP_NIST) | 280 |
258 hp.update({'reduce_train_to':100}) | |
259 jobman_entrypoint(hp, chanmock) | |
260 elif len(args) > 0 and args[0] == 'estimate': | 281 elif len(args) > 0 and args[0] == 'estimate': |
261 estimate_total_time() | 282 estimate_total_time() |
262 else: | 283 else: |
263 sgd_optimization_nist() | 284 sgd_optimization_nist() |
264 | 285 |