comparison deep/stacked_dae/nist_sda.py @ 185:b9ea8e2d071a

Enlevé ce qui concernait la réutilisation de résultats de préentraînement (trop compliqué pour peu de bénéfice: c'est le finetuning qui est vraiment long
author fsavard
date Fri, 26 Feb 2010 17:45:52 -0500
parents 1f5937e9e530
children d364a130b221
comparison
equal deleted inserted replaced
172:4d3d3627df3e 185:b9ea8e2d071a
1 #!/usr/bin/python 1 #!/usr/bin/python
2 # coding: utf-8 2 # coding: utf-8
3
4 import ift6266
5 import pylearn
3 6
4 import numpy 7 import numpy
5 import theano 8 import theano
6 import time 9 import time
10
11 import pylearn.version
7 import theano.tensor as T 12 import theano.tensor as T
8 from theano.tensor.shared_randomstreams import RandomStreams 13 from theano.tensor.shared_randomstreams import RandomStreams
14
9 import copy 15 import copy
10
11 import sys 16 import sys
17 import os
12 import os.path 18 import os.path
13
14 from sgd_optimization import SdaSgdOptimizer
15 19
16 from jobman import DD 20 from jobman import DD
17 import jobman, jobman.sql 21 import jobman, jobman.sql
18 from pylearn.io import filetensor 22 from pylearn.io import filetensor
19 23
20 from utils import produit_croise_jobs 24 from utils import produit_croise_jobs
21 25
22 TEST_CONFIG = False 26 from sgd_optimization import SdaSgdOptimizer
27
28 SERIES_AVAILABLE = False
29 try:
30 from scalar_series import *
31 SERIES_AVAILABLE = True
32 except ImportError:
33 print "Could not import Series"
34
35 TEST_CONFIG = True
23 36
24 NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all' 37 NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all'
25 38
26 JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/' 39 JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_db/fsavard_sda2'
40
27 REDUCE_TRAIN_TO = None 41 REDUCE_TRAIN_TO = None
28 MAX_FINETUNING_EPOCHS = 1000 42 MAX_FINETUNING_EPOCHS = 1000
43 REDUCE_EVERY = 1000 # number of minibatches before taking means for valid error etc.
29 if TEST_CONFIG: 44 if TEST_CONFIG:
30 JOBDB = 'postgres://ift6266h10@gershwin/ift6266h10_sandbox_db/'
31 REDUCE_TRAIN_TO = 1000 45 REDUCE_TRAIN_TO = 1000
32 MAX_FINETUNING_EPOCHS = 2 46 MAX_FINETUNING_EPOCHS = 2
33 47 REDUCE_EVERY = 10
34 JOBDB_JOBS = JOBDB + 'fsavard_sda1_jobs' 48
35 JOBDB_RESULTS = JOBDB + 'fsavard_sda1_results'
36 EXPERIMENT_PATH = "ift6266.scripts.stacked_dae.nist_sda.jobman_entrypoint" 49 EXPERIMENT_PATH = "ift6266.scripts.stacked_dae.nist_sda.jobman_entrypoint"
37 50
38 # There used to be 51 JOB_VALS = {'pretraining_lr': [0.1, 0.01],#, 0.001],#, 0.0001],
39 # 'finetuning_lr': [0.00001, 0.0001, 0.001, 0.01, 0.1]
40 # and
41 # 'num_hidden_layers':[1,2,3]
42 # but this is now handled by a special mechanism in SgdOptimizer
43 # to reuse intermediate results (for the same training of lower layers,
44 # we can test many finetuning_lr)
45 JOB_VALS = {'pretraining_lr': [0.1, 0.01, 0.001],#, 0.0001],
46 'pretraining_epochs_per_layer': [10,20], 52 'pretraining_epochs_per_layer': [10,20],
47 'hidden_layers_sizes': [300,800], 53 'hidden_layers_sizes': [300,800],
48 'corruption_levels': [0.1,0.2], 54 'corruption_levels': [0.1,0.2,0.3],
49 'minibatch_size': [20], 55 'minibatch_size': [20],
50 'max_finetuning_epochs':[MAX_FINETUNING_EPOCHS]} 56 'max_finetuning_epochs':[MAX_FINETUNING_EPOCHS],
51 FINETUNING_LR_VALS = [0.1, 0.01, 0.001]#, 0.0001] 57 'finetuning_lr':[0.1, 0.01], #0.001 was very bad, so we leave it out
52 NUM_HIDDEN_LAYERS_VALS = [1,2,3] 58 'num_hidden_layers':[2,3]}
53 59
54 # Just useful for tests... minimal number of epochs 60 # Just useful for tests... minimal number of epochs
55 DEFAULT_HP_NIST = DD({'finetuning_lr':0.01, 61 DEFAULT_HP_NIST = DD({'finetuning_lr':0.01,
56 'pretraining_lr':0.01, 62 'pretraining_lr':0.01,
57 'pretraining_epochs_per_layer':1, 63 'pretraining_epochs_per_layer':1,
58 'max_finetuning_epochs':1, 64 'max_finetuning_epochs':1,
59 'hidden_layers_sizes':[1000], 65 'hidden_layers_sizes':1000,
60 'corruption_levels':[0.2], 66 'corruption_levels':0.2,
61 'minibatch_size':20}) 67 'minibatch_size':20,
68 'reduce_train_to':1000,
69 'num_hidden_layers':1})
62 70
63 def jobman_entrypoint(state, channel): 71 def jobman_entrypoint(state, channel):
64 state = copy.copy(state) 72 pylearn.version.record_versions(state,[theano,ift6266,pylearn])
73 channel.save()
74
75 workingdir = os.getcwd()
65 76
66 print "Will load NIST" 77 print "Will load NIST"
78 sys.stdout.flush()
79
67 nist = NIST(20) 80 nist = NIST(20)
81
68 print "NIST loaded" 82 print "NIST loaded"
83 sys.stdout.flush()
69 84
70 rtt = None 85 rtt = None
71 if state.has_key('reduce_train_to'): 86 if state.has_key('reduce_train_to'):
72 rtt = state['reduce_train_to'] 87 rtt = state['reduce_train_to']
73 elif REDUCE_TRAIN_TO: 88 elif REDUCE_TRAIN_TO:
81 dataset = (train,valid,test) 96 dataset = (train,valid,test)
82 97
83 n_ins = 32*32 98 n_ins = 32*32
84 n_outs = 62 # 10 digits, 26*2 (lower, capitals) 99 n_outs = 62 # 10 digits, 26*2 (lower, capitals)
85 100
86 db = jobman.sql.db(JOBDB_RESULTS) 101 hls = state.hidden_layers_sizes
87 optimizer = SdaSgdOptimizer(dataset, state, n_ins, n_outs,\ 102 cl = state.corruption_levels
88 input_divider=255.0, job_tree=True, results_db=db, \ 103 nhl = state.num_hidden_layers
89 experiment=EXPERIMENT_PATH, \ 104 state.hidden_layers_sizes = [hls] * nhl
90 finetuning_lr_to_try=FINETUNING_LR_VALS, \ 105 state.corruption_levels = [cl] * nhl
91 num_hidden_layers_to_try=NUM_HIDDEN_LAYERS_VALS) 106
92 optimizer.train() 107 # b,b',W for each hidden layer + b,W of last layer (logreg)
108 numparams = nhl * 3 + 2
109 series_mux = None
110 if SERIES_AVAILABLE:
111 series_mux = create_series(workingdir, numparams)
112
113 optimizer = SdaSgdOptimizer(dataset=dataset, hyperparameters=state, \
114 n_ins=n_ins, n_outs=n_outs,\
115 input_divider=255.0, series_mux=series_mux)
116
117 optimizer.pretrain()
118 channel.save()
119
120 optimizer.finetune()
121 channel.save()
122
123 pylearn.version.record_versions(state,[theano,ift6266,pylearn])
124 channel.save()
93 125
94 return channel.COMPLETE 126 return channel.COMPLETE
95 127
96 def estimate_pretraining_time(job): 128 def create_series(basedir, numparams):
97 job = DD(job) 129 mux = SeriesMultiplexer()
98 # time spent on pretraining estimated as O(n^2) where n=num hidens 130
99 # no need to multiply by num_hidden_layers, as results from num=1 131 # comment out series we don't want to save
100 # is reused for num=2, or 3, so in the end we get the same time 132 mux.add_series(AccumulatorSeries(name="reconstruction_error",
101 # as if we were training 3 times a single layer 133 reduce_every=REDUCE_EVERY, # every 1000 batches, we take the mean and save
102 # constants: 134 mean=True,
103 # - 20 mins to pretrain a layer with 1000 units (per 1 epoch) 135 directory=basedir, flush_every=1))
104 # - 12 mins to finetune (per 1 epoch) 136
105 # basically the job_tree trick gives us a 5 times speedup on the 137 mux.add_series(AccumulatorSeries(name="training_error",
106 # pretraining time due to reusing for finetuning_lr 138 reduce_every=REDUCE_EVERY, # every 1000 batches, we take the mean and save
107 # and gives us a second x2 speedup for reusing previous layers 139 mean=True,
108 # to explore num_hidden_layers 140 directory=basedir, flush_every=1))
109 return (job.pretraining_epochs_per_layer * 20 / (1000.0*1000) \ 141
110 * job.hidden_layer_sizes * job.hidden_layer_sizes) 142 mux.add_series(BaseSeries(name="validation_error", directory=basedir, flush_every=1))
111 143 mux.add_series(BaseSeries(name="test_error", directory=basedir, flush_every=1))
112 def estimate_total_time(): 144
113 jobs = produit_croise_jobs(JOB_VALS) 145 mux.add_series(ParamsArrayStats(numparams,name="params",directory=basedir))
114 sumtime = 0.0 146
115 sum_without = 0.0 147 return mux
116 for job in jobs:
117 sumtime += estimate_pretraining_time(job)
118 # 12 mins per epoch * 30 epochs
119 # 5 finetuning_lr per pretraining combination
120 sum_without = (12*20*len(jobs) + sumtime*2) * len(FINETUNING_LR_VALS)
121 sumtime += len(FINETUNING_LR_VALS) * len(jobs) * 12 * 20
122 print "num jobs=", len(jobs)
123 print "estimate", sumtime/60, " hours"
124 print "estimate without tree optimization", sum_without/60, "ratio", sumtime / sum_without
125 148
126 def jobman_insert_nist(): 149 def jobman_insert_nist():
127 jobs = produit_croise_jobs(JOB_VALS) 150 jobs = produit_croise_jobs(JOB_VALS)
128 151
129 db = jobman.sql.db(JOBDB_JOBS) 152 db = jobman.sql.db(JOBDB)
130 for job in jobs: 153 for job in jobs:
131 job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH}) 154 job.update({jobman.sql.EXPERIMENT: EXPERIMENT_PATH})
132 jobman.sql.insert_dict(job, db) 155 jobman.sql.insert_dict(job, db)
133 156
134 print "inserted" 157 print "inserted"
248 if len(args) > 0 and args[0] == 'load_nist': 271 if len(args) > 0 and args[0] == 'load_nist':
249 test_load_nist() 272 test_load_nist()
250 273
251 elif len(args) > 0 and args[0] == 'jobman_insert': 274 elif len(args) > 0 and args[0] == 'jobman_insert':
252 jobman_insert_nist() 275 jobman_insert_nist()
253 elif len(args) > 0 and args[0] == 'test_job_tree': 276
254 # dont forget to comment out sql.inserts and make reduce_train_to=100 277 elif len(args) > 0 and args[0] == 'test_jobman_entrypoint':
255 print "TESTING JOB TREE" 278 chanmock = DD({'COMPLETE':0})
256 chanmock = {'COMPLETE':0} 279 jobman_entrypoint(DEFAULT_HP_NIST, chanmock)
257 hp = copy.copy(DEFAULT_HP_NIST) 280
258 hp.update({'reduce_train_to':100})
259 jobman_entrypoint(hp, chanmock)
260 elif len(args) > 0 and args[0] == 'estimate': 281 elif len(args) > 0 and args[0] == 'estimate':
261 estimate_total_time() 282 estimate_total_time()
262 else: 283 else:
263 sgd_optimization_nist() 284 sgd_optimization_nist()
264 285