# HG changeset patch # User Yoshua Bengio # Date 1300368124 14400 # Node ID ca20f94448dc6b902719af103760cd91dac1ea51 # Parent 249a180795e39674b88f46aea878a55d0d49f8ca# Parent 75dbbe40957887a9793e145cd3ade55b9425ae61 merge diff -r 249a180795e3 -r ca20f94448dc data_generation/mnist_resized/rescale_mnist.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_generation/mnist_resized/rescale_mnist.py Thu Mar 17 09:22:04 2011 -0400 @@ -0,0 +1,46 @@ +import numpy,cPickle,gzip,Image,pdb,sys + + +def zeropad(vect,img_size=(28,28),out_size=(32,32)): + delta = (numpy.abs(img_size[0]-out_size[0])/2,numpy.abs(img_size[1]-out_size[1])/2) + newvect = numpy.zeros(out_size) + newvect[delta[0]:-delta[0],delta[1]:-delta[1]] = vect.reshape(img_size) + return newvect.flatten() + +def rescale(vect,img_size=(28,28),out_size=(32,32), filter=Image.NEAREST): + im = Image.fromarray(numpy.asarray(vect.reshape(img_size)*255.,dtype='uint8')) + return (numpy.asarray(im.resize(out_size,filter),dtype='float32')/255.).flatten() + + +#pdb.set_trace() +def rescale_mnist(newsize=(32,32),output_file='mnist_rescaled_32_32.pkl',mnist=cPickle.load(gzip.open('mnist.pkl.gz'))): + newmnist = [] + for set in mnist: + newset=numpy.zeros((len(set[0]),newsize[0]*newsize[1])) + for i in xrange(len(set[0])): + print i, + sys.stdout.flush() + newset[i] = rescale(set[0][i]) + newmnist.append((newset,set[1])) + cPickle.dump(newmnist,open(output_file,'w'),protocol=-1) + print 'Done rescaling' + + +def zeropad_mnist(newsize=(32,32),output_file='mnist_zeropadded_32_32.pkl',mnist=cPickle.load(gzip.open('mnist.pkl.gz'))): + newmnist = [] + for set in mnist: + newset=numpy.zeros((len(set[0]),newsize[0]*newsize[1])) + for i in xrange(len(set[0])): + print i, + sys.stdout.flush() + newset[i] = zeropad(set[0][i]) + newmnist.append((newset,set[1])) + cPickle.dump(newmnist,open(output_file,'w'),protocol=-1) + print 'Done padding' + +if __name__ =='__main__': + print 'Creating resized datasets' + mnist_ds = cPickle.load(gzip.open('mnist.pkl.gz')) + #zeropad_mnist(mnist=mnist_ds) + rescale_mnist(mnist=mnist_ds) + print 'Finished.' diff -r 249a180795e3 -r ca20f94448dc data_generation/pipeline/filter_nist.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_generation/pipeline/filter_nist.py Thu Mar 17 09:22:04 2011 -0400 @@ -0,0 +1,62 @@ +import numpy +from pylearn.io import filetensor as ft +from ift6266 import datasets +from ift6266.datasets.ftfile import FTDataSet + +dataset_str = 'P07_' # NISTP # 'P07safe_' + +#base_path = '/data/lisatmp/ift6266h10/data/'+dataset_str +#base_output_path = '/data/lisatmp/ift6266h10/data/transformed_digits/'+dataset_str+'train' + +base_path = '/data/lisa/data/ift6266h10/data/'+dataset_str +base_output_path = '/data/lisatmp/ift6266h10/data/transformed_digits/'+dataset_str+'train' + +for fileno in range(100): + print "Processing file no ", fileno + + output_data_file = base_output_path+str(fileno)+'_data.ft' + output_labels_file = base_output_path+str(fileno)+'_labels.ft' + + print "Reading from ",base_path+'train'+str(fileno)+'_data.ft' + + dataset = lambda maxsize=None, min_file=0, max_file=100: \ + FTDataSet(train_data = [base_path+'train'+str(fileno)+'_data.ft'], + train_lbl = [base_path+'train'+str(fileno)+'_labels.ft'], + test_data = [base_path+'_test_data.ft'], + test_lbl = [base_path+'_test_labels.ft'], + valid_data = [base_path+'_valid_data.ft'], + valid_lbl = [base_path+'_valid_labels.ft']) + # no conversion or scaling... keep data as is + #indtype=theano.config.floatX, inscale=255., maxsize=maxsize) + + ds = dataset() + + all_x = [] + all_y = [] + + all_count = 0 + + for mb_x,mb_y in ds.train(1): + if mb_y[0] <= 9: + all_x.append(mb_x[0]) + all_y.append(mb_y[0]) + + if (all_count+1) % 100000 == 0: + print "Done next 100k" + + all_count += 1 + + # data is stored as uint8 on 0-255 + merged_x = numpy.asarray(all_x, dtype=numpy.uint8) + merged_y = numpy.asarray(all_y, dtype=numpy.int32) + + print "Kept", len(all_x), "(shape ", merged_x.shape, ") examples from", all_count + + f = open(output_data_file, 'wb') + ft.write(f, merged_x) + f.close() + + f = open(output_labels_file, 'wb') + ft.write(f, merged_y) + f.close() + diff -r 249a180795e3 -r ca20f94448dc data_generation/pipeline/visualize_filtered.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_generation/pipeline/visualize_filtered.py Thu Mar 17 09:22:04 2011 -0400 @@ -0,0 +1,43 @@ +import numpy +import pylab +from pylearn.io import filetensor as ft +from ift6266 import datasets +from ift6266.datasets.ftfile import FTDataSet + +import time +import matplotlib.cm as cm + + +dataset_str = 'P07safe_' #'PNIST07_' # NISTP + +base_path = '/data/lisatmp/ift6266h10/data/'+dataset_str +base_output_path = '/data/lisatmp/ift6266h10/data/transformed_digits/'+dataset_str+'train' + +fileno = 15 + +output_data_file = base_output_path+str(fileno)+'_data.ft' +output_labels_file = base_output_path+str(fileno)+'_labels.ft' + +dataset_obj = lambda maxsize=None, min_file=0, max_file=100: \ + FTDataSet(train_data = [output_data_file], + train_lbl = [output_labels_file], + test_data = [base_path+'_test_data.ft'], + test_lbl = [base_path+'_test_labels.ft'], + valid_data = [base_path+'_valid_data.ft'], + valid_lbl = [base_path+'_valid_labels.ft']) + # no conversion or scaling... keep data as is + #indtype=theano.config.floatX, inscale=255., maxsize=maxsize) + +dataset = dataset_obj() +train_ds = dataset.train(1) + +for i in range(2983): + if i < 2900: + continue + ex = train_ds.next() + pylab.ion() + pylab.clf() + pylab.imshow(ex[0].reshape(32,32),cmap=cm.gray) + pylab.draw() + time.sleep(0.5) + diff -r 249a180795e3 -r ca20f94448dc deep/deep_mlp/job.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deep/deep_mlp/job.py Thu Mar 17 09:22:04 2011 -0400 @@ -0,0 +1,335 @@ +#!/usr/bin/env python +# coding: utf-8 + +''' +Launching + +jobman sqlschedules postgres://ift6266h10@gershwin/ift6266h10_sandbox_db/mlp_dumi mlp_jobman.experiment mlp_jobman.conf +'n_hidden={{500,1000,2000}}' +'n_hidden_layers={{2,3}}' +'train_on={{NIST,NISTP,P07}}' +'train_subset={{DIGITS_ONLY,ALL}}' +'learning_rate_log10={{-1.,-2.,-3.}}' + +in mlp_jobman.conf: +rng_seed=1234 +L1_reg=0.0 +L2_reg=0.0 +n_epochs=10 +minibatch_size=20 +''' + +import os, sys, copy, operator, time +import theano +import theano.tensor as T +import numpy +from mlp import MLP +from ift6266 import datasets +from pylearn.io.seriestables import * +import tables +from jobman.tools import DD + +N_INPUTS = 32*32 +REDUCE_EVERY = 250 + +TEST_RUN = False + +TEST_HP = DD({'n_hidden':200, + 'n_hidden_layers': 2, + 'train_on':'NIST', + 'train_subset':'ALL', + 'learning_rate_log10':-2, + 'rng_seed':1234, + 'L1_reg':0.0, + 'L2_reg':0.0, + 'n_epochs':2, + 'minibatch_size':20}) + +########################################### +# digits datasets +# nist_digits is already in NIST_PATH and in ift6266.datasets +# NOTE: for these datasets the test and valid sets are wrong +# (don't correspond to the training set... they're just placeholders) + +from ift6266.datasets.defs import NIST_PATH, DATA_PATH +TRANSFORMED_DIGITS_PATH = '/data/lisatmp/ift6266h10/data/transformed_digits' + +P07_digits = FTDataSet(\ + train_data = [os.path.join(TRANSFORMED_DIGITS_PATH,\ + 'data/P07_train'+str(i)+'_data.ft')\ + for i in range(0, 100)], + train_lbl = [os.path.join(TRANSFORMED_DIGITS_PATH,\ + 'data/P07_train'+str(i)+'_labels.ft')\ + for i in range(0,100)], + test_data = [os.path.join(DATA_PATH,'data/P07_test_data.ft')], + test_lbl = [os.path.join(DATA_PATH,'data/P07_test_labels.ft')], + valid_data = [os.path.join(DATA_PATH,'data/P07_valid_data.ft')], + valid_lbl = [os.path.join(DATA_PATH,'data/P07_valid_labels.ft')], + indtype=theano.config.floatX, inscale=255., maxsize=None) + +#Added PNIST +PNIST07_digits = FTDataSet(train_data = [os.path.join(TRANSFORMED_DIGITS_PATH,\ + 'PNIST07_train'+str(i)+'_data.ft')\ + for i in range(0,100)], + train_lbl = [os.path.join(TRANSFORMED_DIGITS_PATH,\ + 'PNIST07_train'+str(i)+'_labels.ft')\ + for i in range(0,100)], + test_data = [os.path.join(DATA_PATH,'data/PNIST07_test_data.ft')], + test_lbl = [os.path.join(DATA_PATH,'data/PNIST07_test_labels.ft')], + valid_data = [os.path.join(DATA_PATH,'data/PNIST07_valid_data.ft')], + valid_lbl = [os.path.join(DATA_PATH,'data/PNIST07_valid_labels.ft')], + indtype=theano.config.floatX, inscale=255., maxsize=None) + + +# building valid_test_datasets +# - on veut des dataset_obj pour les 3 datasets +# - donc juste à bâtir FTDataset(train=nimportequoi, test, valid=pNIST etc.) +# - on veut dans l'array mettre des pointeurs vers la fonction either test ou valid +# donc PAS dataset_obj, mais dataset_obj.train (sans les parenthèses) +def build_test_valid_sets(): + nist_ds = datasets.nist_all() + pnist_ds = datasets.PNIST07() + p07_ds = datasets.nist_P07() + + test_valid_fns = [nist_ds.test, nist_ds.valid, + pnist_ds.test, pnist_ds.valid, + p07_ds.test, p07_ds.valid] + + test_valid_names = ["nist_all__test", "nist_all__valid", + "NISTP__test", "NISTP__valid", + "P07__test", "P07__valid"] + + return test_valid_fns, test_valid_names + +def add_error_series(series, error_name, hdf5_file, + index_names=('minibatch_idx',), use_accumulator=False, + reduce_every=250): + # train + series_base = ErrorSeries(error_name=error_name, + table_name=error_name, + hdf5_file=hdf5_file, + index_names=index_names) + + if use_accumulator: + series[error_name] = \ + AccumulatorSeriesWrapper(base_series=series_base, + reduce_every=reduce_every) + else: + series[error_name] = series_base + +TEST_VALID_FNS,TEST_VALID_NAMES = None, None +def compute_and_save_errors(state, mlp, series, hdf5_file, minibatch_idx): + global TEST_VALID_FNS,TEST_VALID_NAMES + + TEST_VALID_FNS,TEST_VALID_NAMES = build_test_valid_sets() + + # if the training is on digits only, then there'll be a 100% + # error on digits in the valid/test set... just ignore them + + test_fn = theano.function([mlp.input], mlp.logRegressionLayer.y_pred) + + test_batch_size = 100 + for test_ds_fn,test_ds_name in zip(TEST_VALID_FNS,TEST_VALID_NAMES): + # reset error counts for every test/valid set + # note: float + total_errors = total_digit_errors = \ + total_uppercase_errors = total_lowercase_errors = 0. + + total_all = total_lowercase = total_uppercase = total_digit = 0 + + for mb_x,mb_y in test_ds_fn(test_batch_size): + digit_mask = mb_y < 10 + uppercase_mask = mb_y >= 36 + lowercase_mask = numpy.ones((len(mb_x),)) \ + - digit_mask - uppercase_mask + + total_all += len(mb_x) + total_digit += sum(digit_mask) + total_uppercase += sum(uppercase_mask) + total_lowercase += sum(lowercase_mask) + + predictions = test_fn(mb_x) + + all_errors = (mb_y != predictions) + total_errors += sum(all_errors) + + if len(all_errors) != len(digit_mask): + print "size all", all_errors.shape, " digit", digit_mask.shape + total_digit_errors += sum(numpy.multiply(all_errors, digit_mask)) + total_uppercase_errors += sum(numpy.multiply(all_errors, uppercase_mask)) + total_lowercase_errors += sum(numpy.multiply(all_errors, lowercase_mask)) + + four_errors = [float(total_errors) / total_all, + float(total_digit_errors) / total_digit, + float(total_lowercase_errors) / total_lowercase, + float(total_uppercase_errors) / total_uppercase] + + four_errors_names = ["all", "digits", "lower", "upper"] + + # record stats per set + print "Errors on", test_ds_name, ",".join(four_errors_names),\ + ":", ",".join([str(e) for e in four_errors]) + + # now in the state + for err, errname in zip(four_errors, four_errors_names): + error_full_name = 'error__'+test_ds_name+'_'+errname + min_name = 'min_'+error_full_name + minpos_name = 'minpos_'+error_full_name + + if state.has_key(min_name): + if state[min_name] > err: + state[min_name] = err + state[minpos_name] = pos_str + else: + # also create the series + add_error_series(series, error_full_name, hdf5_file, + index_names=('minibatch_idx',)) + state[min_name] = err + state[minpos_name] = minibatch_idx + + state[minpos_name] = pos_str + series[error_full_name].append((minibatch_idx,), err) + +def jobman_entrypoint(state, channel): + global TEST_RUN + minibatch_size = state.minibatch_size + + print_every = 100000 + COMPUTE_ERROR_EVERY = 10**7 / minibatch_size # compute error every 10 million examples + if TEST_RUN: + print_every = 100 + COMPUTE_ERROR_EVERY = 1000 / minibatch_size + + print "entrypoint, state is" + print state + + ###################### + # select dataset and dataset subset, plus adjust epoch num to make number + # of examples seen independent of dataset + # exemple: pour le cas DIGITS_ONLY, il faut changer le nombre d'époques + # et pour le cas NIST pur (pas de transformations), il faut multiplier par 100 + # en partant car on a pas les variations + + # compute this in terms of the P07 dataset size (=80M) + MINIBATCHES_TO_SEE = state.n_epochs * 8 * (10**6) / minibatch_size + + if state.train_on == 'NIST' and state.train_subset == 'ALL': + dataset_obj = datasets.nist_all() + elif state.train_on == 'NIST' and state.train_subset == 'DIGITS_ONLY': + dataset_obj = datasets.nist_digits() + elif state.train_on == 'NISTP' and state.train_subset == 'ALL': + dataset_obj = datasets.PNIST07() + elif state.train_on == 'NISTP' and state.train_subset == 'DIGITS_ONLY': + dataset_obj = PNIST07_digits + elif state.train_on == 'P07' and state.train_subset == 'ALL': + dataset_obj = datasets.nist_P07() + elif state.train_on == 'P07' and state.train_subset == 'DIGITS_ONLY': + dataset_obj = datasets.P07_digits + + dataset = dataset_obj + + if state.train_subset == 'ALL': + n_classes = 62 + elif state.train_subset == 'DIGITS_ONLY': + n_classes = 10 + else: + raise NotImplementedError() + + ############################### + # construct model + + print "constructing model..." + x = T.matrix('x') + y = T.ivector('y') + + rng = numpy.random.RandomState(state.rng_seed) + + # construct the MLP class + model = MLP(rng = rng, input=x, n_in=N_INPUTS, + n_hidden_layers = state.n_hidden_layers, + n_hidden = state.n_hidden, n_out=n_classes) + + + # cost and training fn + cost = T.mean(model.negative_log_likelihood(y)) \ + + state.L1_reg * model.L1 \ + + state.L2_reg * model.L2_sqr + + print "L1, L2: ", state.L1_reg, state.L2_reg + + gradient_nll_wrt_params = [] + for param in model.params: + gparam = T.grad(cost, param) + gradient_nll_wrt_params.append(gparam) + + learning_rate = 10**float(state.learning_rate_log10) + print "Learning rate", learning_rate + + train_updates = {} + for param, gparam in zip(model.params, gradient_nll_wrt_params): + train_updates[param] = param - learning_rate * gparam + + train_fn = theano.function([x,y], cost, updates=train_updates) + + ####################### + # create series + basedir = os.getcwd() + + h5f = tables.openFile(os.path.join(basedir, "series.h5"), "w") + + series = {} + add_error_series(series, "training_error", h5f, + index_names=('minibatch_idx',), use_accumulator=True, + reduce_every=REDUCE_EVERY) + + ########################## + # training loop + + start_time = time.clock() + + print "begin training..." + print "will train for", MINIBATCHES_TO_SEE, "examples" + + mb_idx = 0 + + while(mb_idx*minibatch_size 1000: + break + + last_cost = train_fn(mb_x, mb_y) + series["training_error"].append((mb_idx,), last_cost) + + last_costs.append(last_cost) + if (len(last_costs)+1) % print_every == 0: + print "Mean over last", print_every, "minibatches: ", numpy.mean(last_costs) + last_costs = [] + + if (mb_idx+1) % COMPUTE_ERROR_EVERY == 0: + # compute errors + print "computing errors on all datasets..." + print "Time since training began: ", (time.clock()-start_time)/60., "minutes" + compute_and_save_errors(state, model, series, h5f, mb_idx) + + channel.save() + + sys.stdout.flush() + + end_time = time.clock() + + print "-"*80 + print "Finished. Training took", (end_time-start_time)/60., "minutes" + print state + +def run_test(): + global TEST_RUN + from fsml.job_management import mock_channel + TEST_RUN = True + jobman_entrypoint(TEST_HP, mock_channel) + +if __name__ == '__main__': + run_test() + diff -r 249a180795e3 -r ca20f94448dc deep/deep_mlp/logistic_sgd.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deep/deep_mlp/logistic_sgd.py Thu Mar 17 09:22:04 2011 -0400 @@ -0,0 +1,223 @@ +import numpy, time, cPickle, gzip, sys, os + +import theano +import theano.tensor as T + +class LogisticRegression(object): + def __init__(self, input, n_in, n_out): + self.W = theano.shared(value=numpy.zeros((n_in,n_out), + dtype = theano.config.floatX), + name='W') + self.b = theano.shared(value=numpy.zeros((n_out,), + dtype = theano.config.floatX), + name='b') + + self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W)+self.b) + + self.y_pred=T.argmax(self.p_y_given_x, axis=1) + + self.params = [self.W, self.b] + + def negative_log_likelihood(self, y): + return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y]) + + + def errors(self, y): + if y.ndim != self.y_pred.ndim: + raise TypeError('y should have the same shape as self.y_pred', + ('y', target.type, 'y_pred', self.y_pred.type)) + + if y.dtype.startswith('int'): + return T.mean(T.neq(self.y_pred, y)) + else: + raise NotImplementedError() + + +def load_data(dataset): + ''' Loads the dataset + + :type dataset: string + :param dataset: the path to the dataset (here MNIST) + ''' + + ############# + # LOAD DATA # + ############# + print '... loading data' + + # Load the dataset + f = gzip.open(dataset,'rb') + train_set, valid_set, test_set = cPickle.load(f) + f.close() + + + def shared_dataset(data_xy): + """ Function that loads the dataset into shared variables + + The reason we store our dataset in shared variables is to allow + Theano to copy it into the GPU memory (when code is run on GPU). + Since copying data into the GPU is slow, copying a minibatch everytime + is needed (the default behaviour if the data is not in a shared + variable) would lead to a large decrease in performance. + """ + data_x, data_y = data_xy + shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX)) + shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX)) + # When storing data on the GPU it has to be stored as floats + # therefore we will store the labels as ``floatX`` as well + # (``shared_y`` does exactly that). But during our computations + # we need them as ints (we use labels as index, and if they are + # floats it doesn't make sense) therefore instead of returning + # ``shared_y`` we will have to cast it to int. This little hack + # lets ous get around this issue + return shared_x, T.cast(shared_y, 'int32') + + test_set_x, test_set_y = shared_dataset(test_set) + valid_set_x, valid_set_y = shared_dataset(valid_set) + train_set_x, train_set_y = shared_dataset(train_set) + + rval = [(train_set_x, train_set_y), (valid_set_x,valid_set_y), (test_set_x, test_set_y)] + return rval + + + + +def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000, dataset='../data/mnist.pkl.gz', + batch_size = 600): + datasets = load_data(dataset) + + train_set_x, train_set_y = datasets[0] + valid_set_x, valid_set_y = datasets[1] + test_set_x , test_set_y = datasets[2] + + # compute number of minibatches for training, validation and testing + n_train_batches = train_set_x.value.shape[0] / batch_size + n_valid_batches = valid_set_x.value.shape[0] / batch_size + n_test_batches = test_set_x.value.shape[0] / batch_size + + + ###################### + # BUILD ACTUAL MODEL # + ###################### + print '... building the model' + + + # allocate symbolic variables for the data + index = T.lscalar() # index to a [mini]batch + x = T.matrix('x') # the data is presented as rasterized images + y = T.ivector('y') # the labels are presented as 1D vector of + # [int] labels + + # construct the logistic regression class + # Each MNIST image has size 28*28 + classifier = LogisticRegression( input=x, n_in=28*28, n_out=10) + + # the cost we minimize during training is the negative log likelihood of + # the model in symbolic format + cost = classifier.negative_log_likelihood(y) + + # compiling a Theano function that computes the mistakes that are made by + # the model on a minibatch + test_model = theano.function(inputs = [index], + outputs = classifier.errors(y), + givens={ + x:test_set_x[index*batch_size:(index+1)*batch_size], + y:test_set_y[index*batch_size:(index+1)*batch_size]}) + + validate_model = theano.function( inputs = [index], + outputs = classifier.errors(y), + givens={ + x:valid_set_x[index*batch_size:(index+1)*batch_size], + y:valid_set_y[index*batch_size:(index+1)*batch_size]}) + + # compute the gradient of cost with respect to theta = (W,b) + g_W = T.grad(cost = cost, wrt = classifier.W) + g_b = T.grad(cost = cost, wrt = classifier.b) + + # specify how to update the parameters of the model as a dictionary + updates ={classifier.W: classifier.W - learning_rate*g_W,\ + classifier.b: classifier.b - learning_rate*g_b} + + # compiling a Theano function `train_model` that returns the cost, but in + # the same time updates the parameter of the model based on the rules + # defined in `updates` + train_model = theano.function(inputs = [index], + outputs = cost, + updates = updates, + givens={ + x:train_set_x[index*batch_size:(index+1)*batch_size], + y:train_set_y[index*batch_size:(index+1)*batch_size]}) + + ############### + # TRAIN MODEL # + ############### + print '... training the model' + # early-stopping parameters + patience = 5000 # look as this many examples regardless + patience_increase = 2 # wait this much longer when a new best is + # found + improvement_threshold = 0.995 # a relative improvement of this much is + # considered significant + validation_frequency = min(n_train_batches, patience/2) + # go through this many + # minibatche before checking the network + # on the validation set; in this case we + # check every epoch + + best_params = None + best_validation_loss = float('inf') + test_score = 0. + start_time = time.clock() + + done_looping = False + epoch = 0 + while (epoch < n_epochs) and (not done_looping): + epoch = epoch + 1 + for minibatch_index in xrange(n_train_batches): + + minibatch_avg_cost = train_model(minibatch_index) + # iteration number + iter = epoch * n_train_batches + minibatch_index + + if (iter+1) % validation_frequency == 0: + # compute zero-one loss on validation set + validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] + this_validation_loss = numpy.mean(validation_losses) + + print('epoch %i, minibatch %i/%i, validation error %f %%' % \ + (epoch, minibatch_index+1,n_train_batches, \ + this_validation_loss*100.)) + + + # if we got the best validation score until now + if this_validation_loss < best_validation_loss: + #improve patience if loss improvement is good enough + if this_validation_loss < best_validation_loss * \ + improvement_threshold : + patience = max(patience, iter * patience_increase) + + best_validation_loss = this_validation_loss + # test it on the test set + + test_losses = [test_model(i) for i in xrange(n_test_batches)] + test_score = numpy.mean(test_losses) + + print((' epoch %i, minibatch %i/%i, test error of best ' + 'model %f %%') % \ + (epoch, minibatch_index+1, n_train_batches,test_score*100.)) + + if patience <= iter : + done_looping = True + break + + end_time = time.clock() + print(('Optimization complete with best validation score of %f %%,' + 'with test performance %f %%') % + (best_validation_loss * 100., test_score*100.)) + print 'The code run for %d epochs, with %f epochs/sec'%(epoch,1.*epoch/(end_time-start_time)) + print >> sys.stderr, ('The code for file '+os.path.split(__file__)[1]+' ran for %.1fs' % ((end_time-start_time))) + +if __name__ == '__main__': + sgd_optimization_mnist() + + diff -r 249a180795e3 -r ca20f94448dc deep/deep_mlp/mlp.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deep/deep_mlp/mlp.py Thu Mar 17 09:22:04 2011 -0400 @@ -0,0 +1,210 @@ +__docformat__ = 'restructedtext en' + +import numpy, time, cPickle, gzip, sys, os + +import theano +import theano.tensor as T + +from logistic_sgd import LogisticRegression, load_data + +class HiddenLayer(object): + def __init__(self, rng, input, n_in, n_out, activation = T.tanh): + print "Creating HiddenLayer with params" + print locals() + + self.input = input + + W_values = numpy.asarray( rng.uniform( + low = - numpy.sqrt(6./(n_in+n_out)), + high = numpy.sqrt(6./(n_in+n_out)), + size = (n_in, n_out)), dtype = theano.config.floatX) + if activation == theano.tensor.nnet.sigmoid: + W_values *= 4 + + self.W = theano.shared(value = W_values, name ='W') + + b_values = numpy.zeros((n_out,), dtype= theano.config.floatX) + self.b = theano.shared(value= b_values, name ='b') + + self.output = activation(T.dot(input, self.W) + self.b) + + self.params = [self.W, self.b] + + +class MLP(object): + def __init__(self, rng, input, n_in, n_hidden_layers, n_hidden, n_out): + print "Creating MLP with params" + print locals() + + self.input = input + + self.hiddenLayers = [] + + last_input = input + last_n_out = n_in + for i in range(n_hidden_layers): + self.hiddenLayers.append(\ + HiddenLayer(rng = rng, input = last_input, + n_in = last_n_out, + n_out = n_hidden, + activation = T.tanh)) + last_input = self.hiddenLayers[-1].output + last_n_out = n_hidden + + self.logRegressionLayer = LogisticRegression( + input = self.hiddenLayers[-1].output, + n_in = n_hidden, + n_out = n_out) + + self.L1 = abs(self.logRegressionLayer.W).sum() + for h in self.hiddenLayers: + self.L1 += abs(h.W).sum() + + self.L2_sqr = (self.logRegressionLayer.W**2).sum() + for h in self.hiddenLayers: + self.L2_sqr += (h.W**2).sum() + + self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood + + self.errors = self.logRegressionLayer.errors + + self.params = [] + for hl in self.hiddenLayers: + self.params += hl.params + self.params += self.logRegressionLayer.params + + +def test_mlp( learning_rate=0.01, L1_reg = 0.00, L2_reg = 0.0001, n_epochs=1000, + dataset = '../data/mnist.pkl.gz', batch_size = 20): + datasets = load_data(dataset) + + train_set_x, train_set_y = datasets[0] + valid_set_x, valid_set_y = datasets[1] + test_set_x , test_set_y = datasets[2] + + n_train_batches = train_set_x.value.shape[0] / batch_size + n_valid_batches = valid_set_x.value.shape[0] / batch_size + n_test_batches = test_set_x.value.shape[0] / batch_size + + ###################### + # BUILD ACTUAL MODEL # + ###################### + print '... building the model' + + # allocate symbolic variables for the data + index = T.lscalar() # index to a [mini]batch + x = T.matrix('x') # the data is presented as rasterized images + y = T.ivector('y') # the labels are presented as 1D vector of + # [int] labels + + rng = numpy.random.RandomState(1234) + + # construct the MLP class + classifier = MLP( rng = rng, input=x, n_in=28*28, n_hidden = 500, n_out=10) + + # the cost we minimize during training is the negative log likelihood of + # the model plus the regularization terms (L1 and L2); cost is expressed + # here symbolically + cost = classifier.negative_log_likelihood(y) \ + + L1_reg * classifier.L1 \ + + L2_reg * classifier.L2_sqr + + # compiling a Theano function that computes the mistakes that are made + # by the model on a minibatch + test_model = theano.function(inputs = [index], + outputs = classifier.errors(y), + givens={ + x:test_set_x[index*batch_size:(index+1)*batch_size], + y:test_set_y[index*batch_size:(index+1)*batch_size]}) + + validate_model = theano.function(inputs = [index], + outputs = classifier.errors(y), + givens={ + x:valid_set_x[index*batch_size:(index+1)*batch_size], + y:valid_set_y[index*batch_size:(index+1)*batch_size]}) + + # compute the gradient of cost with respect to theta (sotred in params) + # the resulting gradients will be stored in a list gparams + gparams = [] + for param in classifier.params: + gparam = T.grad(cost, param) + gparams.append(gparam) + + + # specify how to update the parameters of the model as a dictionary + updates = {} + # given two list the zip A = [ a1,a2,a3,a4] and B = [b1,b2,b3,b4] of + # same length, zip generates a list C of same size, where each element + # is a pair formed from the two lists : + # C = [ (a1,b1), (a2,b2), (a3,b3) , (a4,b4) ] + for param, gparam in zip(classifier.params, gparams): + updates[param] = param - learning_rate*gparam + + # compiling a Theano function `train_model` that returns the cost, but + # in the same time updates the parameter of the model based on the rules + # defined in `updates` + train_model =theano.function( inputs = [index], outputs = cost, + updates = updates, + givens={ + x:train_set_x[index*batch_size:(index+1)*batch_size], + y:train_set_y[index*batch_size:(index+1)*batch_size]}) + + ############### + # TRAIN MODEL # + ############### + print '... training' + + # early-stopping parameters + patience = 10000 # look as this many examples regardless + patience_increase = 2 # wait this much longer when a new best is + # found + improvement_threshold = 0.995 # a relative improvement of this much is + # considered significant + validation_frequency = min(n_train_batches,patience/2) + # go through this many + # minibatche before checking the network + # on the validation set; in this case we + # check every epoch + + + best_params = None + best_validation_loss = float('inf') + best_iter = 0 + test_score = 0. + start_time = time.clock() + + epoch = 0 + done_looping = False + + while (epoch < n_epochs) and (not done_looping): + epoch = epoch + 1 + for minibatch_index in xrange(n_train_batches): + + minibatch_avg_cost = train_model(minibatch_index) + # iteration number + iter = epoch * n_train_batches + minibatch_index + + if (iter+1) % validation_frequency == 0: + # compute zero-one loss on validation set + validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] + this_validation_loss = numpy.mean(validation_losses) + + print('epoch %i, minibatch %i/%i, validation error %f %%' % \ + (epoch, minibatch_index+1,n_train_batches, \ + this_validation_loss*100.)) + + + # if we got the best validation score until now + if this_validation_loss < best_validation_loss: + #improve patience if loss improvement is good enough + if this_validation_loss < best_validation_loss * \ + improvement_threshold : + patience = max(patience, iter * patience_increase) + + best_validation_loss = this_validation_loss + # test it on the test set + + test_losses = [test_model(i) for i in xrange(n_test_batches)] + test_score = numpy.mean(test_losses) + +