Mercurial > pylearn
view pylearn/sandbox/train_mcRBM.py @ 1488:440e1afe28a3
Fix a mistake in .hgignore.
author | valentin Bisson <bissonva@iro.umontreal.ca> |
---|---|
date | Fri, 22 Jul 2011 11:41:32 -0400 |
parents | d4a14c6c36e0 |
children |
line wrap: on
line source
""" This is a copy of mcRBM training that James modified to print out more information, visualize filters, etc. Once mcRBM is stable, it can be deleted. """ # mcRBM training # Refer to Ranzato and Hinton CVPR 2010 "Modeling Pixel Means and Covariances Using Factorized Third-Order BMs" # # Marc'Aurelio Ranzato # 28 July 2010 import sys import numpy as np import cudamat as cmt from scipy.io import loadmat, savemat #import gpu_lock # put here you locking system package, if any from ConfigParser import * demodata = None from pylearn.io import image_tiling def tile(X, fname): X = np.dot(X, demodata['invpcatransf'].T) R=16 C=16 X = (X[:,:256], X[:,256:512], X[:,512:], None) #X = (X[:,0::3], X[:,1::3], X[:,2::3], None) _img = image_tiling.tile_raster_images(X, img_shape=(R,C), min_dynamic_range=1e-2) image_tiling.save_tiled_raster_images(_img, fname) def save_imshow(X, fname): image_tiling.Image.fromarray( (image_tiling.scale_to_unit_interval(X)*255).astype('uint8'), 'L').save(fname) ###################################################################### # compute the value of the free energy at a given input # F = - sum log(1+exp(- .5 FH (VF data/norm(data))^2 + bias_cov)) +... # - sum log(1+exp(w_mean data + bias_mean)) + ... # - bias_vis data + 0.5 data^2 # NOTE: FH is constrained to be positive # (in the paper the sign is negative but the sign in front of it is also flipped) def compute_energy_mcRBM(data,normdata,vel,energy,VF,FH,bias_cov,bias_vis,w_mean,bias_mean,t1,t2,t6,feat,featsq,feat_mean,length,lengthsq,normcoeff,small,num_vis): # normalize input data vectors data.mult(data, target = t6) # DxP (nr input dims x nr samples) t6.sum(axis = 0, target = lengthsq) # 1xP lengthsq.mult(0.5, target = energy) # energy of quadratic regularization term lengthsq.mult(1./num_vis) # normalize by number of components (like std) lengthsq.add(small) # small prevents division by 0 cmt.sqrt(lengthsq, target = length) length.reciprocal(target = normcoeff) # 1xP data.mult_by_row(normcoeff, target = normdata) # normalized data ## potential # covariance contribution cmt.dot(VF.T, normdata, target = feat) # HxP (nr factors x nr samples) feat.mult(feat, target = featsq) # HxP cmt.dot(FH.T,featsq, target = t1) # OxP (nr cov hiddens x nr samples) t1.mult(-0.5) t1.add_col_vec(bias_cov) # OxP cmt.exp(t1) # OxP t1.add(1, target = t2) # OxP cmt.log(t2) t2.mult(-1) energy.add_sums(t2, axis=0) # mean contribution cmt.dot(w_mean.T, data, target = feat_mean) # HxP (nr mean hiddens x nr samples) feat_mean.add_col_vec(bias_mean) # HxP cmt.exp(feat_mean) feat_mean.add(1) cmt.log(feat_mean) feat_mean.mult(-1) energy.add_sums(feat_mean, axis=0) # visible bias term data.mult_by_col(bias_vis, target = t6) t6.mult(-1) # DxP energy.add_sums(t6, axis=0) # 1xP # kinetic vel.mult(vel, target = t6) energy.add_sums(t6, axis = 0, mult = .5) ################################################################# # compute the derivative if the free energy at a given input def compute_gradient_mcRBM(data,normdata,VF,FH,bias_cov,bias_vis,w_mean,bias_mean,t1,t2,t3,t4,t6,feat,featsq,feat_mean,gradient,normgradient,length,lengthsq,normcoeff,small,num_vis): # normalize input data data.mult(data, target = t6) # DxP t6.sum(axis = 0, target = lengthsq) # 1xP lengthsq.mult(1./num_vis) # normalize by number of components (like std) lengthsq.add(small) cmt.sqrt(lengthsq, target = length) length.reciprocal(target = normcoeff) # 1xP data.mult_by_row(normcoeff, target = normdata) # normalized data cmt.dot(VF.T, normdata, target = feat) # HxP feat.mult(feat, target = featsq) # HxP cmt.dot(FH.T,featsq, target = t1) # OxP t1.mult(-.5) t1.add_col_vec(bias_cov) # OxP t1.apply_sigmoid(target = t2) # OxP cmt.dot(FH,t2, target = t3) # HxP t3.mult(feat) cmt.dot(VF, t3, target = normgradient) # VxP # final bprop through normalization length.mult(lengthsq, target = normcoeff) normcoeff.reciprocal() # 1xP normgradient.mult(data, target = gradient) # VxP gradient.sum(axis = 0, target = t4) # 1xP t4.mult(-1./num_vis) data.mult_by_row(t4, target = gradient) normgradient.mult_by_row(lengthsq, target = t6) gradient.add(t6) gradient.mult_by_row(normcoeff) # add quadratic term gradient gradient.add(data) # add visible bias term gradient.add_col_mult(bias_vis, -1) # add MEAN contribution to gradient cmt.dot(w_mean.T, data, target = feat_mean) # HxP feat_mean.add_col_vec(bias_mean) # HxP feat_mean.apply_sigmoid() # HxP gradient.subtract_dot(w_mean,feat_mean) # VxP ############################################################3 # Hybrid Monte Carlo sampler def draw_HMC_samples(data,negdata,normdata,vel,gradient,normgradient,new_energy,old_energy,VF,FH,bias_cov,bias_vis,w_mean,bias_mean,hmc_step,hmc_step_nr,hmc_ave_rej,hmc_target_ave_rej,t1,t2,t3,t4,t5,t6,t7,thresh,feat,featsq,batch_size,feat_mean,length,lengthsq,normcoeff,small,num_vis): vel.fill_with_randn() negdata.assign(data) compute_energy_mcRBM(negdata,normdata,vel,old_energy,VF,FH,bias_cov,bias_vis,w_mean,bias_mean,t1,t2,t6,feat,featsq,feat_mean,length,lengthsq,normcoeff,small,num_vis) compute_gradient_mcRBM(negdata,normdata,VF,FH,bias_cov,bias_vis,w_mean,bias_mean,t1,t2,t3,t4,t6,feat,featsq,feat_mean,gradient,normgradient,length,lengthsq,normcoeff,small,num_vis) # half step vel.add_mult(gradient, -0.5*hmc_step) negdata.add_mult(vel,hmc_step) # full leap-frog steps for ss in range(hmc_step_nr - 1): ## re-evaluate the gradient compute_gradient_mcRBM(negdata,normdata,VF,FH,bias_cov,bias_vis,w_mean,bias_mean,t1,t2,t3,t4,t6,feat,featsq,feat_mean,gradient,normgradient,length,lengthsq,normcoeff,small,num_vis) # update variables vel.add_mult(gradient, -hmc_step) negdata.add_mult(vel,hmc_step) # final half-step compute_gradient_mcRBM(negdata,normdata,VF,FH,bias_cov,bias_vis,w_mean,bias_mean,t1,t2,t3,t4,t6,feat,featsq,feat_mean,gradient,normgradient,length,lengthsq,normcoeff,small,num_vis) vel.add_mult(gradient, -0.5*hmc_step) # compute new energy compute_energy_mcRBM(negdata,normdata,vel,new_energy,VF,FH,bias_cov,bias_vis,w_mean,bias_mean,t1,t2,t6,feat,featsq,feat_mean,length,lengthsq,normcoeff,small,num_vis) # rejecton old_energy.subtract(new_energy, target = thresh) cmt.exp(thresh) t4.fill_with_rand() t4.less_than(thresh) # update negdata and rejection rate t4.mult(-1) t4.add(1) # now 1's detect rejections t4.sum(axis = 1, target = t5) t5.copy_to_host() rej = t5.numpy_array[0,0]/batch_size data.mult_by_row(t4, target = t6) negdata.mult_by_row(t4, target = t7) negdata.subtract(t7) negdata.add(t6) hmc_ave_rej = 0.9*hmc_ave_rej + 0.1*rej if hmc_ave_rej < hmc_target_ave_rej: hmc_step = min(hmc_step*1.01,0.25) else: hmc_step = max(hmc_step*0.99,.001) return hmc_step, hmc_ave_rej ###################################################### # mcRBM trainer: sweeps over the training set. # For each batch of samples compute derivatives to update the parameters # at the training samples and at the negative samples drawn calling HMC sampler. def train_mcRBM(): config = ConfigParser() config.read('input_configuration') verbose = config.getint('VERBOSITY','verbose') num_epochs = config.getint('MAIN_PARAMETER_SETTING','num_epochs') batch_size = config.getint('MAIN_PARAMETER_SETTING','batch_size') startFH = config.getint('MAIN_PARAMETER_SETTING','startFH') startwd = config.getint('MAIN_PARAMETER_SETTING','startwd') doPCD = config.getint('MAIN_PARAMETER_SETTING','doPCD') # model parameters num_fac = config.getint('MODEL_PARAMETER_SETTING','num_fac') num_hid_cov = config.getint('MODEL_PARAMETER_SETTING','num_hid_cov') num_hid_mean = config.getint('MODEL_PARAMETER_SETTING','num_hid_mean') apply_mask = config.getint('MODEL_PARAMETER_SETTING','apply_mask') # load data data_file_name = config.get('DATA','data_file_name') d = loadmat(data_file_name) # input in the format PxD (P vectorized samples with D dimensions) global demodata demodata = d totnumcases = d["whitendata"].shape[0] d = d["whitendata"][0:np.floor(totnumcases/batch_size)*batch_size,:].copy() totnumcases = d.shape[0] num_vis = d.shape[1] num_batches = int(totnumcases/batch_size) dev_dat = cmt.CUDAMatrix(d.T) # VxP tile(d[:100], "100_whitened_data.png") # training parameters epsilon = config.getfloat('OPTIMIZER_PARAMETERS','epsilon') epsilonVF = 2*epsilon epsilonFH = 0.02*epsilon epsilonb = 0.02*epsilon epsilonw_mean = 0.2*epsilon epsilonb_mean = 0.1*epsilon weightcost_final = config.getfloat('OPTIMIZER_PARAMETERS','weightcost_final') # HMC setting hmc_step_nr = config.getint('HMC_PARAMETERS','hmc_step_nr') hmc_step = 0.01 hmc_target_ave_rej = config.getfloat('HMC_PARAMETERS','hmc_target_ave_rej') hmc_ave_rej = hmc_target_ave_rej # initialize weights VF = cmt.CUDAMatrix(np.array(0.02 * np.random.randn(num_vis, num_fac), dtype=np.float32, order='F')) # VxH if apply_mask == 0: FH = cmt.CUDAMatrix( np.array( np.eye(num_fac,num_hid_cov), dtype=np.float32, order='F') ) # HxO else: dd = loadmat('your_FHinit_mask_file.mat') # see CVPR2010paper_material/topo2D_3x3_stride2_576filt.mat for an example FH = cmt.CUDAMatrix( np.array( dd["FH"], dtype=np.float32, order='F') ) bias_cov = cmt.CUDAMatrix( np.array(2.0*np.ones((num_hid_cov, 1)), dtype=np.float32, order='F') ) bias_vis = cmt.CUDAMatrix( np.array(np.zeros((num_vis, 1)), dtype=np.float32, order='F') ) w_mean = cmt.CUDAMatrix( np.array( 0.05 * np.random.randn(num_vis, num_hid_mean), dtype=np.float32, order='F') ) # VxH bias_mean = cmt.CUDAMatrix( np.array( -2.0*np.ones((num_hid_mean,1)), dtype=np.float32, order='F') ) # initialize variables to store derivatives VFinc = cmt.CUDAMatrix( np.array(np.zeros((num_vis, num_fac)), dtype=np.float32, order='F')) FHinc = cmt.CUDAMatrix( np.array(np.zeros((num_fac, num_hid_cov)), dtype=np.float32, order='F')) bias_covinc = cmt.CUDAMatrix( np.array(np.zeros((num_hid_cov, 1)), dtype=np.float32, order='F')) bias_visinc = cmt.CUDAMatrix( np.array(np.zeros((num_vis, 1)), dtype=np.float32, order='F')) w_meaninc = cmt.CUDAMatrix( np.array(np.zeros((num_vis, num_hid_mean)), dtype=np.float32, order='F')) bias_meaninc = cmt.CUDAMatrix( np.array(np.zeros((num_hid_mean, 1)), dtype=np.float32, order='F')) # initialize temporary storage data = cmt.CUDAMatrix( np.array(np.empty((num_vis, batch_size)), dtype=np.float32, order='F')) # VxP normdata = cmt.CUDAMatrix( np.array(np.empty((num_vis, batch_size)), dtype=np.float32, order='F')) # VxP negdataini = cmt.CUDAMatrix( np.array(np.empty((num_vis, batch_size)), dtype=np.float32, order='F')) # VxP feat = cmt.CUDAMatrix( np.array(np.empty((num_fac, batch_size)), dtype=np.float32, order='F')) featsq = cmt.CUDAMatrix( np.array(np.empty((num_fac, batch_size)), dtype=np.float32, order='F')) negdata = cmt.CUDAMatrix( np.array(np.random.randn(num_vis, batch_size), dtype=np.float32, order='F')) old_energy = cmt.CUDAMatrix( np.array(np.zeros((1, batch_size)), dtype=np.float32, order='F')) new_energy = cmt.CUDAMatrix( np.array(np.zeros((1, batch_size)), dtype=np.float32, order='F')) gradient = cmt.CUDAMatrix( np.array(np.empty((num_vis, batch_size)), dtype=np.float32, order='F')) # VxP normgradient = cmt.CUDAMatrix( np.array(np.empty((num_vis, batch_size)), dtype=np.float32, order='F')) # VxP thresh = cmt.CUDAMatrix( np.array(np.zeros((1, batch_size)), dtype=np.float32, order='F')) feat_mean = cmt.CUDAMatrix( np.array(np.empty((num_hid_mean, batch_size)), dtype=np.float32, order='F')) vel = cmt.CUDAMatrix( np.array(np.random.randn(num_vis, batch_size), dtype=np.float32, order='F')) length = cmt.CUDAMatrix( np.array(np.zeros((1, batch_size)), dtype=np.float32, order='F')) # 1xP lengthsq = cmt.CUDAMatrix( np.array(np.zeros((1, batch_size)), dtype=np.float32, order='F')) # 1xP normcoeff = cmt.CUDAMatrix( np.array(np.zeros((1, batch_size)), dtype=np.float32, order='F')) # 1xP if apply_mask==1: # this used to constrain very large FH matrices only allowing to change values in a neighborhood dd = loadmat('your_FHinit_mask_file.mat') mask = cmt.CUDAMatrix( np.array(dd["mask"], dtype=np.float32, order='F')) normVF = 1 small = 0.5 # other temporary vars t1 = cmt.CUDAMatrix( np.array(np.empty((num_hid_cov, batch_size)), dtype=np.float32, order='F')) t2 = cmt.CUDAMatrix( np.array(np.empty((num_hid_cov, batch_size)), dtype=np.float32, order='F')) t3 = cmt.CUDAMatrix( np.array(np.empty((num_fac, batch_size)), dtype=np.float32, order='F')) t4 = cmt.CUDAMatrix( np.array(np.empty((1,batch_size)), dtype=np.float32, order='F')) t5 = cmt.CUDAMatrix( np.array(np.empty((1,1)), dtype=np.float32, order='F')) t6 = cmt.CUDAMatrix( np.array(np.empty((num_vis, batch_size)), dtype=np.float32, order='F')) t7 = cmt.CUDAMatrix( np.array(np.empty((num_vis, batch_size)), dtype=np.float32, order='F')) t8 = cmt.CUDAMatrix( np.array(np.empty((num_vis, num_fac)), dtype=np.float32, order='F')) t9 = cmt.CUDAMatrix( np.array(np.zeros((num_fac, num_hid_cov)), dtype=np.float32, order='F')) t10 = cmt.CUDAMatrix( np.array(np.empty((1,num_fac)), dtype=np.float32, order='F')) t11 = cmt.CUDAMatrix( np.array(np.empty((1,num_hid_cov)), dtype=np.float32, order='F')) # start training for epoch in range(num_epochs): def print_stuff(): print "VF: " + '%3.2e' % VF.euclid_norm() \ + ", DVF: " + '%3.2e' % (VFinc.euclid_norm()*(epsilonVFc/batch_size))\ + ", VF_inc: " + '%3.2e' % (VFinc.euclid_norm())\ + ", FH: " + '%3.2e' % FH.euclid_norm() \ + ", DFH: " + '%3.2e' % (FHinc.euclid_norm()*(epsilonFHc/batch_size)) \ + ", bias_cov: " + '%3.2e' % bias_cov.euclid_norm() \ + ", Dbias_cov: " + '%3.2e' % (bias_covinc.euclid_norm()*(epsilonbc/batch_size)) \ + ", bias_vis: " + '%3.2e' % bias_vis.euclid_norm() \ + ", Dbias_vis: " + '%3.2e' % (bias_visinc.euclid_norm()*(epsilonbc/batch_size)) \ + ", wm: " + '%3.2e' % w_mean.euclid_norm() \ + ", Dwm: " + '%3.2e' % (w_meaninc.euclid_norm()*(epsilonw_meanc/batch_size)) \ + ", bm: " + '%3.2e' % bias_mean.euclid_norm() \ + ", Dbm: " + '%3.2e' % (bias_meaninc.euclid_norm()*(epsilonb_meanc/batch_size)) \ + ", step: " + '%3.2e' % hmc_step \ + ", rej: " + '%3.2e' % hmc_ave_rej sys.stdout.flush() def save_stuff(): VF.copy_to_host() FH.copy_to_host() bias_cov.copy_to_host() w_mean.copy_to_host() bias_mean.copy_to_host() bias_vis.copy_to_host() savemat("ws_temp", { 'VF':VF.numpy_array, 'FH':FH.numpy_array, 'bias_cov': bias_cov.numpy_array, 'bias_vis': bias_vis.numpy_array, 'w_mean': w_mean.numpy_array, 'bias_mean': bias_mean.numpy_array, 'epoch':epoch}) tile(VF.numpy_array.T, 'VF_%000i.png'%epoch) tile(w_mean.numpy_array.T, 'w_mean_%000i.png'%epoch) save_imshow(FH.numpy_array, 'FH_%000i.png'%epoch) # anneal learning rates epsilonVFc = epsilonVF/max(1,epoch/20) epsilonFHc = epsilonFH/max(1,epoch/20) epsilonbc = epsilonb/max(1,epoch/20) epsilonw_meanc = epsilonw_mean/max(1,epoch/20) epsilonb_meanc = epsilonb_mean/max(1,epoch/20) weightcost = weightcost_final if epoch <= startFH: epsilonFHc = 0 if epoch <= startwd: weightcost = 0 print "Epoch " + str(epoch + 1), 'num_batches', num_batches if epoch == 0: print_stuff() for batch in range(num_batches): # get current minibatch data = dev_dat.slice(batch*batch_size,(batch + 1)*batch_size) # DxP (nr dims x nr samples) # normalize input data data.mult(data, target = t6) # DxP t6.sum(axis = 0, target = lengthsq) # 1xP lengthsq.mult(1./num_vis) # normalize by number of components (like std) lengthsq.add(small) # small avoids division by 0 cmt.sqrt(lengthsq, target = length) length.reciprocal(target = normcoeff) # 1xP data.mult_by_row(normcoeff, target = normdata) # normalized data ## compute positive sample derivatives # covariance part cmt.dot(VF.T, normdata, target = feat) # HxP (nr facs x nr samples) feat.mult(feat, target = featsq) # HxP cmt.dot(FH.T,featsq, target = t1) # OxP (nr cov hiddens x nr samples) t1.mult(-0.5) t1.add_col_vec(bias_cov) # OxP t1.apply_sigmoid(target = t2) # OxP cmt.dot(featsq, t2.T, target = FHinc) # HxO cmt.dot(FH,t2, target = t3) # HxP t3.mult(feat) cmt.dot(normdata, t3.T, target = VFinc) # VxH t2.sum(axis = 1, target = bias_covinc) bias_covinc.mult(-1) # visible bias data.sum(axis = 1, target = bias_visinc) bias_visinc.mult(-1) # mean part cmt.dot(w_mean.T, data, target = feat_mean) # HxP (nr mean hiddens x nr samples) feat_mean.add_col_vec(bias_mean) # HxP feat_mean.apply_sigmoid() # HxP feat_mean.mult(-1) cmt.dot(data, feat_mean.T, target = w_meaninc) feat_mean.sum(axis = 1, target = bias_meaninc) # HMC sampling: draw an approximate sample from the model if doPCD == 0: # CD-1 (set negative data to current training samples) hmc_step, hmc_ave_rej = draw_HMC_samples(data,negdata,normdata,vel,gradient,normgradient,new_energy,old_energy,VF,FH,bias_cov,bias_vis,w_mean,bias_mean,hmc_step,hmc_step_nr,hmc_ave_rej,hmc_target_ave_rej,t1,t2,t3,t4,t5,t6,t7,thresh,feat,featsq,batch_size,feat_mean,length,lengthsq,normcoeff,small,num_vis) else: # PCD-1 (use previous negative data as starting point for chain) negdataini.assign(negdata) hmc_step, hmc_ave_rej = draw_HMC_samples(negdataini,negdata,normdata,vel,gradient,normgradient,new_energy,old_energy,VF,FH,bias_cov,bias_vis,w_mean,bias_mean,hmc_step,hmc_step_nr,hmc_ave_rej,hmc_target_ave_rej,t1,t2,t3,t4,t5,t6,t7,thresh,feat,featsq,batch_size,feat_mean,length,lengthsq,normcoeff,small,num_vis) # compute derivatives at the negative samples # normalize input data negdata.mult(negdata, target = t6) # DxP t6.sum(axis = 0, target = lengthsq) # 1xP lengthsq.mult(1./num_vis) # normalize by number of components (like std) lengthsq.add(small) cmt.sqrt(lengthsq, target = length) length.reciprocal(target = normcoeff) # 1xP negdata.mult_by_row(normcoeff, target = normdata) # normalized data # covariance part cmt.dot(VF.T, normdata, target = feat) # HxP feat.mult(feat, target = featsq) # HxP cmt.dot(FH.T,featsq, target = t1) # OxP t1.mult(-0.5) t1.add_col_vec(bias_cov) # OxP t1.apply_sigmoid(target = t2) # OxP FHinc.subtract_dot(featsq, t2.T) # HxO FHinc.mult(0.5) cmt.dot(FH,t2, target = t3) # HxP t3.mult(feat) VFinc.subtract_dot(normdata, t3.T) # VxH bias_covinc.add_sums(t2, axis = 1) # visible bias bias_visinc.add_sums(negdata, axis = 1) # mean part cmt.dot(w_mean.T, negdata, target = feat_mean) # HxP feat_mean.add_col_vec(bias_mean) # HxP feat_mean.apply_sigmoid() # HxP w_meaninc.add_dot(negdata, feat_mean.T) bias_meaninc.add_sums(feat_mean, axis = 1) # update parameters VFinc.add_mult(VF.sign(), weightcost) # L1 regularization VF.add_mult(VFinc, -epsilonVFc/batch_size) # normalize columns of VF: normalize by running average of their norm VF.mult(VF, target = t8) t8.sum(axis = 0, target = t10) cmt.sqrt(t10) t10.sum(axis=1,target = t5) t5.copy_to_host() normVF = .95*normVF + (.05/num_fac) * t5.numpy_array[0,0] # estimate norm t10.reciprocal() VF.mult_by_row(t10) VF.mult(normVF) bias_cov.add_mult(bias_covinc, -epsilonbc/batch_size) bias_vis.add_mult(bias_visinc, -epsilonbc/batch_size) if epoch > startFH: FHinc.add_mult(FH.sign(), weightcost) # L1 regularization FH.add_mult(FHinc, -epsilonFHc/batch_size) # update # set to 0 negative entries in FH FH.greater_than(0, target = t9) FH.mult(t9) if apply_mask==1: FH.mult(mask) # normalize columns of FH: L1 norm set to 1 in each column FH.sum(axis = 0, target = t11) t11.reciprocal() FH.mult_by_row(t11) w_meaninc.add_mult(w_mean.sign(),weightcost) w_mean.add_mult(w_meaninc, -epsilonw_meanc/batch_size) bias_mean.add_mult(bias_meaninc, -epsilonb_meanc/batch_size) if verbose == 1: print_stuff() # back-up every once in a while if np.mod(epoch,1) == 0: save_stuff() # final back-up VF.copy_to_host() FH.copy_to_host() bias_cov.copy_to_host() bias_vis.copy_to_host() w_mean.copy_to_host() bias_mean.copy_to_host() savemat("ws_fac" + str(num_fac) + "_cov" + str(num_hid_cov) + "_mean" + str(num_hid_mean), {'VF':VF.numpy_array,'FH':FH.numpy_array,'bias_cov': bias_cov.numpy_array, 'bias_vis': bias_vis.numpy_array, 'w_mean': w_mean.numpy_array, 'bias_mean': bias_mean.numpy_array, 'epoch':epoch}) ###################################33 # main if __name__ == "__main__": # initialize CUDA #cmt.cuda_set_device(gpu_lock.obtain_lock_id()) # uncomment if you have a locking system or desire to choose the GPU board number cmt.cublas_init() cmt.CUDAMatrix.init_random(1) train_mcRBM() cmt.cublas_shutdown()