Mercurial > ift6266
comparison data_generation/amt/amt_generate.py @ 394:ae8102aad586
Generation script for AMT data
author | humel |
---|---|
date | Tue, 27 Apr 2010 13:19:00 -0400 |
parents | |
children | 8973abe35a9d |
comparison
equal
deleted
inserted
replaced
393:4c840798d290 | 394:ae8102aad586 |
---|---|
1 import numpy, Image | |
2 from pylearn.io import filetensor as ft | |
3 | |
4 | |
5 DATAPATH = '/data/lisa/data/' | |
6 DATASET = 'nist' # nist, p07, pnist | |
7 NUM_BATCHES = 250 | |
8 BATCH_SIZE = 10 | |
9 IMGSHP = (32,32) | |
10 WHITE_SPACE_THICKNESS = 1 | |
11 DATASET_PATH = { 'nist' : [ DATAPATH + 'nist/by_class/all/all_test_data.ft', | |
12 DATAPATH + 'nist/by_class/all/all_test_labels.ft' ], | |
13 'p07' : [ DATAPATH + 'ift6266h10/data/P07_test_data.ft', | |
14 DATAPATH + 'ift6266h10/data/P07_test_labels.ft' ], | |
15 'pnist': [ DATAPATH + 'ift6266h10/data/PNIST07_test_data.ft', | |
16 DATAPATH + 'ift6266h10/data/PNIST07_test_labels.ft' ] } | |
17 | |
18 def generate_batches(): | |
19 # Generate a directory containing NUM_BATCHES of DATASET | |
20 total = NUM_BATCHES * BATCH_SIZE | |
21 | |
22 # Create a matrix of random integers within the range | |
23 # [0,lenght_dataset-1] | |
24 | |
25 f = open(DATASET_PATH[DATASET][0]) | |
26 g = open(DATASET_PATH[DATASET][1]) | |
27 test_data = ft.read(f) | |
28 test_labels = ft.read(g) | |
29 | |
30 resulting_data = numpy.zeros((total,IMGSHP[0]*IMGSHP[1])) | |
31 resulting_labels = numpy.zeros((total,)) | |
32 f.close();g.close() | |
33 | |
34 ds_size = len(test_data) | |
35 rand_seq = numpy.random.random_integers(ds_size, size=(NUM_BATCHES,BATCH_SIZE)) | |
36 | |
37 for i in range(NUM_BATCHES): | |
38 for j in range(BATCH_SIZE): | |
39 resulting_data[i*BATCH_SIZE+j]=test_data[rand_seq[i,j]] | |
40 resulting_labels[i*BATCH_SIZE+j] = test_labels[rand_seq[i,j]] | |
41 image = generate_image(resulting_data[i*BATCH_SIZE:(i+1)*BATCH_SIZE]) | |
42 text = generate_labels(resulting_labels[i*BATCH_SIZE:(i+1)*BATCH_SIZE], rand_seq[i]) | |
43 filename = DATASET + '_' + str("%04d" % int(i+1)) | |
44 image.save(filename+'.jpeg') | |
45 save_text(text,filename) | |
46 | |
47 ft_name = 'AMT_'+DATASET+'_'+str(NUM_BATCHES) | |
48 generate_ft_file(resulting_data,resulting_labels,ft_name) | |
49 | |
50 def save_text(text,filename): | |
51 f = open(filename+'.txt', 'w') | |
52 f.write(text) | |
53 f.close() | |
54 def generate_ft_file(data,labels,ft_name): | |
55 fdata = open(ft_name+'_data.ft','w') | |
56 flabels = open(ft_name+'_labels.ft','w') | |
57 ft.write(fdata,data) | |
58 ft.write(flabels,labels) | |
59 fdata.close();flabels.close() | |
60 | |
61 def generate_image(seq): | |
62 all_images = [] | |
63 | |
64 white_space = numpy.asarray(numpy.zeros((IMGSHP[0],WHITE_SPACE_THICKNESS))+255.,dtype='uint8') | |
65 | |
66 for i in range(len(seq)): | |
67 all_images += [numpy.asarray(seq[i].reshape((IMGSHP))*255.,dtype='uint8')] | |
68 | |
69 all_images_stacked = numpy.hstack(numpy.asarray([numpy.hstack((image,white_space)) for image in all_images])) | |
70 return Image.fromarray(all_images_stacked) | |
71 | |
72 def generate_labels(seq, indexes): | |
73 return str(seq) + '\n' + str(indexes) | |
74 | |
75 if __name__ =='__main__': | |
76 print 'Starting data generation' | |
77 generate_batches() |