comparison scripts/stacked_dae/nist_sda.py @ 131:5c79a2557f2f

Un peu de ménage dans code pour stacked DAE, splitté en fichiers dans un nouveau sous-répertoire.
author savardf
date Fri, 19 Feb 2010 08:43:10 -0500
parents
children 7d8366fb90bf
comparison
equal deleted inserted replaced
130:38929c29b602 131:5c79a2557f2f
1 #!/usr/bin/python
2 # coding: utf-8
3
4 import numpy
5 import theano
6 import time
7 import theano.tensor as T
8 from theano.tensor.shared_randomstreams import RandomStreams
9
10 import os.path
11
12 from sgd_optimization import sgd_optimization
13
14 from jobman import DD
15 from pylearn.io import filetensor
16
17 from utils import produit_croise_jobs
18
19 NIST_ALL_LOCATION = '/data/lisa/data/nist/by_class/all'
20
21 # Just useful for tests... minimal number of epochs
22 DEFAULT_HP_NIST = DD({'finetuning_lr':0.1,
23 'pretraining_lr':0.1,
24 'pretraining_epochs_per_layer':1,
25 'max_finetuning_epochs':1,
26 'hidden_layers_sizes':[1000,1000],
27 'corruption_levels':[0.2,0.2],
28 'minibatch_size':20})
29
30 def jobman_entrypoint_nist(state, channel):
31 sgd_optimization_nist(state)
32
33 def jobman_insert_nist():
34 vals = {'finetuning_lr': [0.00001, 0.0001, 0.001, 0.01, 0.1],
35 'pretraining_lr': [0.00001, 0.0001, 0.001, 0.01, 0.1],
36 'pretraining_epochs_per_layer': [2,5,20],
37 'hidden_layer_sizes': [100,300,1000],
38 'num_hidden_layers':[1,2,3],
39 'corruption_levels': [0.1,0.2,0.4],
40 'minibatch_size': [5,20,100]}
41
42 jobs = produit_croise_jobs(vals)
43
44 for job in jobs:
45 insert_job(job)
46
47
48 class NIST:
49 def __init__(self, minibatch_size, basepath=None):
50 global NIST_ALL_LOCATION
51
52 self.minibatch_size = minibatch_size
53 self.basepath = basepath and basepath or NIST_ALL_LOCATION
54
55 self.set_filenames()
56
57 # arrays of 2 elements: .x, .y
58 self.train = [None, None]
59 self.test = [None, None]
60
61 self.load_train_test()
62
63 self.valid = [[], []]
64 #self.split_train_valid()
65
66
67 def get_tvt(self):
68 return self.train, self.valid, self.test
69
70 def set_filenames(self):
71 self.train_files = ['all_train_data.ft',
72 'all_train_labels.ft']
73
74 self.test_files = ['all_test_data.ft',
75 'all_test_labels.ft']
76
77 def load_train_test(self):
78 self.load_data_labels(self.train_files, self.train)
79 self.load_data_labels(self.test_files, self.test)
80
81 def load_data_labels(self, filenames, pair):
82 for i, fn in enumerate(filenames):
83 f = open(os.path.join(self.basepath, fn))
84 pair[i] = filetensor.read(f)
85 f.close()
86
87 def split_train_valid(self):
88 test_len = len(self.test[0])
89
90 new_train_x = self.train[0][:-test_len]
91 new_train_y = self.train[1][:-test_len]
92
93 self.valid[0] = self.train[0][-test_len:]
94 self.valid[1] = self.train[1][-test_len:]
95
96 self.train[0] = new_train_x
97 self.train[1] = new_train_y
98
99 def test_load_nist():
100 print "Will load NIST"
101
102 import time
103 t1 = time.time()
104 nist = NIST(20)
105 t2 = time.time()
106
107 print "NIST loaded. time delta = ", t2-t1
108
109 tr,v,te = nist.get_tvt()
110
111 print "Lenghts: ", len(tr[0]), len(v[0]), len(te[0])
112
113 raw_input("Press any key")
114
115 # hp for hyperparameters
116 def sgd_optimization_nist(hp=None, dataset_dir='/data/lisa/data/nist'):
117 global DEFAULT_HP_NIST
118 hp = hp and hp or DEFAULT_HP_NIST
119
120 print "Will load NIST"
121
122 import time
123 t1 = time.time()
124 nist = NIST(20)
125 t2 = time.time()
126
127 print "NIST loaded. time delta = ", t2-t1
128
129 train,valid,test = nist.get_tvt()
130 dataset = (train,valid,test)
131
132 print "Lenghts train, valid, test: ", len(train[0]), len(valid[0]), len(test[0])
133
134 n_ins = 32*32
135 n_outs = 62 # 10 digits, 26*2 (lower, capitals)
136
137 sgd_optimization(dataset, hp, n_ins, n_outs)
138
139 if __name__ == '__main__':
140
141 import sys
142
143 args = sys.argv[1:]
144
145 if len(args) > 0 and args[0] == 'load_nist':
146 test_load_nist()
147
148 else:
149 sgd_optimization_nist()
150