comparison data_generation/amt/amt_generate.py @ 394:ae8102aad586

Generation script for AMT data
author humel
date Tue, 27 Apr 2010 13:19:00 -0400
parents
children 8973abe35a9d
comparison
equal deleted inserted replaced
393:4c840798d290 394:ae8102aad586
1 import numpy, Image
2 from pylearn.io import filetensor as ft
3
4
5 DATAPATH = '/data/lisa/data/'
6 DATASET = 'nist' # nist, p07, pnist
7 NUM_BATCHES = 250
8 BATCH_SIZE = 10
9 IMGSHP = (32,32)
10 WHITE_SPACE_THICKNESS = 1
11 DATASET_PATH = { 'nist' : [ DATAPATH + 'nist/by_class/all/all_test_data.ft',
12 DATAPATH + 'nist/by_class/all/all_test_labels.ft' ],
13 'p07' : [ DATAPATH + 'ift6266h10/data/P07_test_data.ft',
14 DATAPATH + 'ift6266h10/data/P07_test_labels.ft' ],
15 'pnist': [ DATAPATH + 'ift6266h10/data/PNIST07_test_data.ft',
16 DATAPATH + 'ift6266h10/data/PNIST07_test_labels.ft' ] }
17
18 def generate_batches():
19 # Generate a directory containing NUM_BATCHES of DATASET
20 total = NUM_BATCHES * BATCH_SIZE
21
22 # Create a matrix of random integers within the range
23 # [0,lenght_dataset-1]
24
25 f = open(DATASET_PATH[DATASET][0])
26 g = open(DATASET_PATH[DATASET][1])
27 test_data = ft.read(f)
28 test_labels = ft.read(g)
29
30 resulting_data = numpy.zeros((total,IMGSHP[0]*IMGSHP[1]))
31 resulting_labels = numpy.zeros((total,))
32 f.close();g.close()
33
34 ds_size = len(test_data)
35 rand_seq = numpy.random.random_integers(ds_size, size=(NUM_BATCHES,BATCH_SIZE))
36
37 for i in range(NUM_BATCHES):
38 for j in range(BATCH_SIZE):
39 resulting_data[i*BATCH_SIZE+j]=test_data[rand_seq[i,j]]
40 resulting_labels[i*BATCH_SIZE+j] = test_labels[rand_seq[i,j]]
41 image = generate_image(resulting_data[i*BATCH_SIZE:(i+1)*BATCH_SIZE])
42 text = generate_labels(resulting_labels[i*BATCH_SIZE:(i+1)*BATCH_SIZE], rand_seq[i])
43 filename = DATASET + '_' + str("%04d" % int(i+1))
44 image.save(filename+'.jpeg')
45 save_text(text,filename)
46
47 ft_name = 'AMT_'+DATASET+'_'+str(NUM_BATCHES)
48 generate_ft_file(resulting_data,resulting_labels,ft_name)
49
50 def save_text(text,filename):
51 f = open(filename+'.txt', 'w')
52 f.write(text)
53 f.close()
54 def generate_ft_file(data,labels,ft_name):
55 fdata = open(ft_name+'_data.ft','w')
56 flabels = open(ft_name+'_labels.ft','w')
57 ft.write(fdata,data)
58 ft.write(flabels,labels)
59 fdata.close();flabels.close()
60
61 def generate_image(seq):
62 all_images = []
63
64 white_space = numpy.asarray(numpy.zeros((IMGSHP[0],WHITE_SPACE_THICKNESS))+255.,dtype='uint8')
65
66 for i in range(len(seq)):
67 all_images += [numpy.asarray(seq[i].reshape((IMGSHP))*255.,dtype='uint8')]
68
69 all_images_stacked = numpy.hstack(numpy.asarray([numpy.hstack((image,white_space)) for image in all_images]))
70 return Image.fromarray(all_images_stacked)
71
72 def generate_labels(seq, indexes):
73 return str(seq) + '\n' + str(indexes)
74
75 if __name__ =='__main__':
76 print 'Starting data generation'
77 generate_batches()