# HG changeset patch # User humel # Date 1268785153 14400 # Node ID 1bf046c0c84aef2293be171f639789730dd649bc # Parent 7e6fecabb6562388e74ff2bcb1894b7e8261f150 Fixed a bug with image_shape. diff -r 7e6fecabb656 -r 1bf046c0c84a deep/convolutional_dae/stacked_convolutional_dae.py --- a/deep/convolutional_dae/stacked_convolutional_dae.py Tue Mar 16 14:46:25 2010 -0400 +++ b/deep/convolutional_dae/stacked_convolutional_dae.py Tue Mar 16 20:19:13 2010 -0400 @@ -21,7 +21,7 @@ def __init__(self, rng, input, n_in, n_out): self.input = input - + W_values = numpy.asarray( rng.uniform( \ low = -numpy.sqrt(6./(n_in+n_out)), \ high = numpy.sqrt(6./(n_in+n_out)), \ @@ -37,8 +37,7 @@ class dA_conv(object): def __init__(self, input, filter_shape, corruption_level = 0.1, - shared_W = None, shared_b = None, image_shape = None, - poolsize = (2,2)): + shared_W = None, shared_b = None, image_shape = None): theano_rng = RandomStreams() @@ -70,7 +69,9 @@ self.tilde_x = theano_rng.binomial( self.x.shape, 1, 1 - corruption_level,dtype=theano.config.floatX) * self.x conv1_out = conv.conv2d(self.tilde_x, self.W, filter_shape=filter_shape, - image_shape=image_shape, unroll_kern=4,unroll_batch=4, border_mode='valid') + image_shape=image_shape, + unroll_kern=4,unroll_batch=4, + border_mode='valid') self.y = T.tanh(conv1_out + self.b.dimshuffle('x', 0, 'x', 'x')) @@ -78,7 +79,8 @@ da_filter_shape = [ filter_shape[1], filter_shape[0], filter_shape[2],\ filter_shape[3] ] - da_image_shape = [ image_shape[0], filter_shape[0], image_shape[2]-filter_shape[2]+1, image_shape[3]-filter_shape[3]+1 ] + da_image_shape = [ batch_size, filter_shape[0], image_shape[2]-filter_shape[2]+1, image_shape[3]-filter_shape[3]+1 ] + #import pdb; pdb.set_trace() initial_W_prime = numpy.asarray( numpy.random.uniform( \ low = -numpy.sqrt(6./(fan_in+fan_out)), \ high = numpy.sqrt(6./(fan_in+fan_out)), \ @@ -113,7 +115,8 @@ self.b = theano.shared(value=b_values) conv_out = conv.conv2d(input, self.W, - filter_shape=filter_shape, image_shape=image_shape, unroll_kern=4,unroll_batch=4) + filter_shape=filter_shape, image_shape=image_shape, + unroll_kern=4,unroll_batch=4) fan_in = numpy.prod(filter_shape[1:]) @@ -151,7 +154,7 @@ max_poolsize=conv_hidden_layers_sizes[i][2] if i == 0 : - layer_input=self.x.reshape((self.x.shape[0], 1, 32, 32)) + layer_input=self.x.reshape((batch_size, 1, 32, 32)) else: layer_input=self.layers[-1].output @@ -167,7 +170,7 @@ da_layer = dA_conv(corruption_level = corruption_levels[0], input = layer_input, shared_W = layer.W, shared_b = layer.b, - filter_shape = filter_shape, + filter_shape=filter_shape, image_shape = image_shape ) gparams = T.grad(da_layer.cost, da_layer.params) @@ -218,9 +221,9 @@ self.errors = self.logLayer.errors(self.y) -def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 1, \ +def sgd_optimization_mnist( learning_rate=0.1, pretraining_epochs = 0, \ pretrain_lr = 0.1, training_epochs = 1000, \ - kernels = [ [4,5,5] , [4,3,3] ], mlp_layers=[500], \ + kernels = [ [2,5,5] , [2,3,3] ], mlp_layers=[500], \ corruption_levels = [ 0.2, 0.2, 0.2], batch_size = batch_size, \ max_pool_layers = [ [2,2] , [2,2] ], \ dataset=datasets.nist_digits): @@ -232,7 +235,7 @@ y = T.ivector('y') # the labels are presented as 1d vector of # [int] labels - layer0_input = x.reshape((x.shape[0],1,32,32)) + layer0_input = x.reshape((batch_size,1,32,32)) rng = numpy.random.RandomState(1234) conv_layers=[] @@ -241,14 +244,15 @@ max_pool_layers[0] ] conv_layers.append(init_layer) - conv_n_out = (32-kernels[0][2]+1)/max_pool_layers[0][0] - + conv_n_out = int((32-kernels[0][2]+1)/max_pool_layers[0][0]) + print init_layer[1] + for i in range(1,len(kernels)): layer = [ [ kernels[i][0],kernels[i-1][0],kernels[i][1],kernels[i][2] ],\ - [ batch_size, kernels[i-1][0], conv_n_out,conv_n_out ], + [ batch_size, kernels[i-1][0],conv_n_out,conv_n_out ], max_pool_layers[i] ] conv_layers.append(layer) - conv_n_out = (conv_n_out - kernels[i][2]+1)/max_pool_layers[i][0] + conv_n_out = int( (conv_n_out - kernels[i][2]+1)/max_pool_layers[i][0]) print layer [1] network = SdA(input = layer0_input, n_ins_mlp = kernels[-1][0]*conv_n_out**2, conv_hidden_layers_sizes = conv_layers, @@ -263,7 +267,9 @@ for i in xrange(len(network.layers)-len(mlp_layers)): for epoch in xrange(pretraining_epochs): for x, y in dataset.train(batch_size): - c = network.pretrain_functions[i](x) + if x.shape[0] == batch_size: + c = network.pretrain_functions[i](x) + print 'pre-training convolution layer %i, epoch %d, cost '%(i,epoch), c patience = 10000 # look as this many examples regardless @@ -285,12 +291,16 @@ while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 for x, y in dataset.train(batch_size): - + if x.shape[0] != batch_size: + continue cost_ij = network.finetune(x, y) iter += 1 if iter % validation_frequency == 0: - validation_losses = [test_model(xv, yv) for xv, yv in dataset.valid(batch_size)] + validation_losses = [] + for xv, yv in dataset.valid(batch_size): + if xv.shape[0] == batch_size: + validation_losses.append(test_model(xv, yv)) this_validation_loss = numpy.mean(validation_losses) print('epoch %i, iter %i, validation error %f %%' % \ (epoch, iter, this_validation_loss*100.)) @@ -308,7 +318,10 @@ best_iter = iter # test it on the test set - test_losses = [test_model(xt, yt) for xt, yt in dataset.test(batch_size)] + test_losses=[] + for xt, yt in dataset.test(batch_size): + if xt.shape[0] == batch_size: + test_losses.append(test_model(xt, yt)) test_score = numpy.mean(test_losses) print((' epoch %i, iter %i, test error of best ' 'model %f %%') %