comparison deep/rbm/rbm.py @ 372:1e99dc965b5b

correcting some bugs
author goldfinger
date Sun, 25 Apr 2010 13:28:45 -0400
parents d81284e13d77
children e36ccffb3870
comparison
equal deleted inserted replaced
369:d81284e13d77 372:1e99dc965b5b
14 import numpy 14 import numpy
15 import pylab 15 import pylab
16 import time 16 import time
17 import theano.tensor.nnet 17 import theano.tensor.nnet
18 import pylearn 18 import pylearn
19 import ift6266 19 #import ift6266
20 import theano,pylearn.version,ift6266 20 import theano,pylearn.version #,ift6266
21 from pylearn.io import filetensor as ft 21 from pylearn.io import filetensor as ft
22 from ift6266 import datasets 22 #from ift6266 import datasets
23
24 from jobman.tools import DD, flatten
25 from jobman import sql
23 26
24 from theano.tensor.shared_randomstreams import RandomStreams 27 from theano.tensor.shared_randomstreams import RandomStreams
25 28
26 from utils import tile_raster_images 29 from utils import tile_raster_images
27 from logistic_sgd import load_data 30 from logistic_sgd import load_data
238 241
239 return cross_entropy 242 return cross_entropy
240 243
241 244
242 245
243 def test_rbm(b_size = 25, nhidden = 1000, kk = 1, persistance = 0, 246 def test_rbm(b_size = 20, nhidden = 1000, kk = 1, persistance = 0):
244 dataset= 0):
245 """ 247 """
246 Demonstrate *** 248 Demonstrate ***
247 249
248 This is demonstrated on MNIST. 250 This is demonstrated on MNIST.
249 251
255 257
256 """ 258 """
257 259
258 learning_rate=0.1 260 learning_rate=0.1
259 261
260 if data_set==0: 262 # if data_set==0:
261 datasets=datasets.nist_all() 263 # datasets=datasets.nist_all()
262 elif data_set==1: 264 # elif data_set==1:
263 datasets=datasets.nist_P07() 265 # datasets=datasets.nist_P07()
264 elif data_set==2: 266 # elif data_set==2:
265 datasets=datasets.PNIST07() 267 # datasets=datasets.PNIST07()
266 268
267 269
270 data_path = '/data/lisa/data/nist/by_class/'
271 f = open(data_path+'all/all_train_data.ft')
272 g = open(data_path+'all/all_train_labels.ft')
273 h = open(data_path+'all/all_test_data.ft')
274 i = open(data_path+'all/all_test_labels.ft')
275
276 train_set_x = theano.shared(ft.read(f))
277 train_set_y = ft.read(g)
278 test_set_x = ft.read(h)
279 test_set_y = ft.read(i)
280
281 f.close()
282 g.close()
283 i.close()
284 h.close()
285
286 #t = len(train_set_x)
287 print train_set_x.value.shape
288
268 # revoir la recuperation des donnees 289 # revoir la recuperation des donnees
269 ## dataset = load_data(dataset) 290 ## dataset = load_data(dataset)
270 ## 291 ##
271 ## train_set_x, train_set_y = datasets[0] 292 ## train_set_x, train_set_y = datasets[0]
272 ## test_set_x , test_set_y = datasets[2] 293 ## test_set_x , test_set_y = datasets[2]
273 ## training_epochs = 10 # a determiner 294 training_epochs = 1 # a determiner
274 295
275 batch_size = b_size # size of the minibatch 296 batch_size = b_size # size of the minibatch
276 297
277 # compute number of minibatches for training, validation and testing 298 # compute number of minibatches for training, validation and testing
278 #n_train_batches = train_set_x.value.shape[0] / batch_size 299 n_train_batches = train_set_x.value.shape[0] / batch_size
279 300
280 # allocate symbolic variables for the data 301 # allocate symbolic variables for the data
281 index = T.lscalar() # index to a [mini]batch 302 index = T.scalar() # index to a [mini]batch
282 x = T.matrix('x') # the data is presented as rasterized images 303 x = T.matrix('x') # the data is presented as rasterized images
283 304
284 rng = numpy.random.RandomState(123) 305 rng = numpy.random.RandomState(123)
285 theano_rng = RandomStreams( rng.randint(2**30)) 306 theano_rng = RandomStreams( rng.randint(2**30))
286 307
302 cost, updates = rbm.cd(lr=learning_rate, persistent=None, k= kk) 323 cost, updates = rbm.cd(lr=learning_rate, persistent=None, k= kk)
303 324
304 ################################# 325 #################################
305 # Training the RBM # 326 # Training the RBM #
306 ################################# 327 #################################
307 dirname = 'data=%i'%dataset + ' persistance=%i'%persistance + ' n_hidden=%i'%n_hidden + 'batch_size=i%'%b_size 328 #os.chdir('~')
329 dirname = str(persistance) + '_' + str(nhidden) + '_' + str(b_size) + '_'+ str(kk)
308 os.makedirs(dirname) 330 os.makedirs(dirname)
309 os.chdir(dirname) 331 os.chdir(dirname)
310 332 print 'yes'
311 # it is ok for a theano function to have no output 333 # it is ok for a theano function to have no output
312 # the purpose of train_rbm is solely to update the RBM parameters 334 # the purpose of train_rbm is solely to update the RBM parameters
313 train_rbm = theano.function([x], cost, 335 train_rbm = theano.function([index], cost,
314 updates = updates, 336 updates = updates,
315 ) 337 givens = { x: train_set_x[index*batch_size:(index+1)*batch_size]})
316 338
339 print 'yep'
317 plotting_time = 0.0 340 plotting_time = 0.0
318 start_time = time.clock() 341 start_time = time.clock()
319 bufsize = 1000 342 bufsize = 1000
320 343
321 # go through training epochs 344 # go through training epochs
322 costs = [] 345 costs = []
323 for epoch in xrange(training_epochs): 346 for epoch in xrange(training_epochs):
324 347
325 # go through the training set 348 # go through the training set
326 mean_cost = [] 349 mean_cost = []
327 for mini_x, mini_y in datasets.train(b_size): 350 for batch_index in xrange(n_train_batches):
328 mean_cost += [train_rbm(mini_x)] 351 mean_cost += [train_rbm(batch_index)]
352 # for mini_x, mini_y in datasets.train(b_size):
353 # mean_cost += [train_rbm(mini_x)]
329 ## learning_rate = learning_rate - 0.0001 354 ## learning_rate = learning_rate - 0.0001
330 ## learning_rate = learning_rate/(tau+( epoch*batch_index*batch_size)) 355 ## learning_rate = learning_rate/(tau+( epoch*batch_index*batch_size))
331 356
332 #learning_rate = learning_rate/10 357 #learning_rate = learning_rate/10
333 358
346 end_time = time.clock() 371 end_time = time.clock()
347 372
348 pretraining_time = (end_time - start_time) - plotting_time 373 pretraining_time = (end_time - start_time) - plotting_time
349 374
350 375
351
352
353
354 376
355 ################################# 377 #################################
356 # Sampling from the RBM # 378 # Sampling from the RBM #
357 ################################# 379 #################################
358 380
359 # find out the number of test samples 381 # find out the number of test samples
360 number_of_test_samples = 1000 382 #number_of_test_samples = 100
361 383 number_of_test_samples = test_set_x.value.shape[0]
362 test_set_x, test_y = datasets.test(100*b_size) 384
385 #test_set_x, test_y = datasets.test(100*b_size)
363 # pick random test examples, with which to initialize the persistent chain 386 # pick random test examples, with which to initialize the persistent chain
364 test_idx = rng.randint(number_of_test_samples - b_size) 387 test_idx = rng.randint(number_of_test_samples - b_size)
365 persistent_vis_chain = theano.shared(test_set_x.value[test_idx:test_idx+b_size]) 388 persistent_vis_chain = theano.shared(test_set_x.value[test_idx:test_idx+b_size])
366 389
367 # define one step of Gibbs sampling (mf = mean-field) 390 # define one step of Gibbs sampling (mf = mean-field)
401 image.save('sample_%i_step_%i.png'%(idx,idx*jdx)) 424 image.save('sample_%i_step_%i.png'%(idx,idx*jdx))
402 425
403 #save the model 426 #save the model
404 model = [rbm.W, rbm.vbias, rbm.hbias] 427 model = [rbm.W, rbm.vbias, rbm.hbias]
405 f = fopen('params.txt', 'w') 428 f = fopen('params.txt', 'w')
406 pickle.dump(model, f) 429 cPickle.dump(model, f, protocol = -1)
407 f.close() 430 f.close()
408 #os.chdir('./..') 431 #os.chdir('./..')
409 return numpy.mean(costs), pretraining_time/360 432 return numpy.mean(costs), pretraining_time*36
410 433
411 434
412 def experiment(state, channel): 435 def experiment(state, channel):
413 436
414 (mean_cost, time_execution) = test_rbm(b_size = state.b_size,\ 437 (mean_cost, time_execution) = test_rbm(b_size = state.b_size,\
415 nhidden = state.ndidden,\ 438 nhidden = state.ndidden,\
416 kk = state.kk,\ 439 kk = state.kk,\
417 persistance = state.persistance,\ 440 persistance = state.persistance,\
418 dataset = state.dataset) 441 )
419 442
420 state.mean_costs = mean_costs 443 state.mean_costs = mean_costs
421 state.time_execution = time_execution 444 state.time_execution = time_execution
422 pylearn.version.record_versions(state,[theano,ift6266,pylearn]) 445 pylearn.version.record_versions(state,[theano,ift6266,pylearn])
423 return channel.COMPLETE 446 return channel.COMPLETE
424 447
425 if __name__ == '__main__': 448 if __name__ == '__main__':
426 449
427 test_rbm() 450 TABLE_NAME='RBM_tapha'
451
452 # DB path...
453 test_rbm()
454 #db = sql.db('postgres://ift6266h10:f0572cd63b@gershwin/ift6266h10_db/'+ TABLE_NAME)
455
456 #state = DD()
457 #for b_size in 50, 75, 100:
458 # state.b_size = b_size
459 # for nhidden in 1000,1250,1500:
460 # state.nhidden = nhidden
461 # for kk in 1,2,3,4:
462 # state.kk = kk
463 # for persistance in 0,1:
464 # state.persistance = persistance
465 # sql.insert_job(rbm.experiment, flatten(state), db)
466
467
468 #db.createView(TABLE_NAME + 'view')