comparison scripts/setup_batches.py @ 346:7bc555cc9aab

Ajouté dans set_batches : choix de la classe principale
author Guillaume Sicard <guitch21@gmail.com>
date Mon, 19 Apr 2010 07:09:44 -0400
parents 5b260cc8f477
children 22919039f7ab
comparison
equal deleted inserted replaced
345:74287f0371bf 346:7bc555cc9aab
13 digits_test_data = 'digits/digits_test_data.ft' 13 digits_test_data = 'digits/digits_test_data.ft'
14 digits_test_labels = 'digits/digits_test_labels.ft' 14 digits_test_labels = 'digits/digits_test_labels.ft'
15 15
16 lower_train_data = 'lower/lower_train_data.ft' 16 lower_train_data = 'lower/lower_train_data.ft'
17 lower_train_labels = 'lower/lower_train_labels.ft' 17 lower_train_labels = 'lower/lower_train_labels.ft'
18 lower_test_data = 'lower/lower_test_data.ft'
19 lower_test_labels = 'lower/lower_test_labels.ft'
20
18 upper_train_data = 'upper/upper_train_data.ft' 21 upper_train_data = 'upper/upper_train_data.ft'
19 upper_train_labels = 'upper/upper_train_labels.ft' 22 upper_train_labels = 'upper/upper_train_labels.ft'
23 upper_test_data = 'upper/upper_test_data.ft'
24 upper_test_labels = 'upper/upper_test_labels.ft'
25
20 test_data = 'all/all_test_data.ft' 26 test_data = 'all/all_test_data.ft'
21 test_labels = 'all/all_test_labels.ft' 27 test_labels = 'all/all_test_labels.ft'
22 28
23 print 'Opening data...' 29 print 'Opening data...'
24 30
27 f_digits_test_data = open(data_path + digits_test_data) 33 f_digits_test_data = open(data_path + digits_test_data)
28 f_digits_test_labels = open(data_path + digits_test_labels) 34 f_digits_test_labels = open(data_path + digits_test_labels)
29 35
30 f_lower_train_data = open(data_path + lower_train_data) 36 f_lower_train_data = open(data_path + lower_train_data)
31 f_lower_train_labels = open(data_path + lower_train_labels) 37 f_lower_train_labels = open(data_path + lower_train_labels)
38 f_lower_test_data = open(data_path + lower_test_data)
39 f_lower_test_labels = open(data_path + lower_test_labels)
40
32 f_upper_train_data = open(data_path + upper_train_data) 41 f_upper_train_data = open(data_path + upper_train_data)
33 f_upper_train_labels = open(data_path + upper_train_labels) 42 f_upper_train_labels = open(data_path + upper_train_labels)
34 43 f_upper_test_data = open(data_path + upper_test_data)
35 f_test_data = open(data_path + test_data) 44 f_upper_test_labels = open(data_path + upper_test_labels)
36 f_test_labels = open(data_path + test_labels) 45
46 #f_test_data = open(data_path + test_data)
47 #f_test_labels = open(data_path + test_labels)
37 48
38 self.raw_digits_train_data = ft.read(f_digits_train_data) 49 self.raw_digits_train_data = ft.read(f_digits_train_data)
39 self.raw_digits_train_labels = ft.read(f_digits_train_labels) 50 self.raw_digits_train_labels = ft.read(f_digits_train_labels)
40 self.raw_digits_test_data = ft.read(f_digits_test_data) 51 self.raw_digits_test_data = ft.read(f_digits_test_data)
41 self.raw_digits_test_labels = ft.read(f_digits_test_labels) 52 self.raw_digits_test_labels = ft.read(f_digits_test_labels)
42 53
43 self.raw_lower_train_data = ft.read(f_lower_train_data) 54 self.raw_lower_train_data = ft.read(f_lower_train_data)
44 self.raw_lower_train_labels = ft.read(f_lower_train_labels) 55 self.raw_lower_train_labels = ft.read(f_lower_train_labels)
56 self.raw_lower_test_data = ft.read(f_lower_test_data)
57 self.raw_lower_test_labels = ft.read(f_lower_test_labels)
58
45 self.raw_upper_train_data = ft.read(f_upper_train_data) 59 self.raw_upper_train_data = ft.read(f_upper_train_data)
46 self.raw_upper_train_labels = ft.read(f_upper_train_labels) 60 self.raw_upper_train_labels = ft.read(f_upper_train_labels)
61 self.raw_upper_test_data = ft.read(f_upper_test_data)
62 self.raw_upper_test_labels = ft.read(f_upper_test_labels)
47 63
48 self.raw_test_data = ft.read(f_test_data) 64 self.raw_test_data = ft.read(f_test_data)
49 self.raw_test_labels = ft.read(f_test_labels) 65 self.raw_test_labels = ft.read(f_test_labels)
50 66
51 f_digits_train_data.close() 67 f_digits_train_data.close()
53 f_digits_test_data.close() 69 f_digits_test_data.close()
54 f_digits_test_labels.close() 70 f_digits_test_labels.close()
55 71
56 f_lower_train_data.close() 72 f_lower_train_data.close()
57 f_lower_train_labels.close() 73 f_lower_train_labels.close()
74 f_lower_test_data.close()
75 f_lower_test_labels.close()
76
58 f_upper_train_data.close() 77 f_upper_train_data.close()
59 f_upper_train_labels.close() 78 f_upper_train_labels.close()
79 f_upper_test_data.close()
80 f_upper_test_labels.close()
60 81
61 f_test_data.close() 82 f_test_data.close()
62 f_test_labels.close() 83 f_test_labels.close()
63 84
64 print 'Data opened' 85 print 'Data opened'
65 86
66 def set_batches(self, start_ratio = -1, end_ratio = -1, batch_size = 20, verbose = False): 87 def set_batches(self, main_class = "d", start_ratio = -1, end_ratio = -1, batch_size = 20, verbose = False):
67 self.batch_size = batch_size 88 self.batch_size = batch_size
68 89
69 digits_train_size = len(self.raw_digits_train_labels) 90 digits_train_size = len(self.raw_digits_train_labels)
70 digits_test_size = len(self.raw_digits_test_labels) 91 digits_test_size = len(self.raw_digits_test_labels)
71 92
72 lower_train_size = len(self.raw_lower_train_labels) 93 lower_train_size = len(self.raw_lower_train_labels)
94
73 upper_train_size = len(self.raw_upper_train_labels) 95 upper_train_size = len(self.raw_upper_train_labels)
96 upper_test_size = len(self.raw_upper_test_labels)
74 97
75 if verbose == True: 98 if verbose == True:
76 print 'digits_train_size = %d' %digits_train_size 99 print 'digits_train_size = %d' %digits_train_size
77 print 'digits_test_size = %d' %digits_test_size 100 print 'digits_test_size = %d' %digits_test_size
78 print 'lower_train_size = %d' %lower_train_size 101 print 'lower_train_size = %d' %lower_train_size
79 print 'upper_train_size = %d' %upper_train_size 102 print 'upper_train_size = %d' %upper_train_size
80 103 print 'upper_test_size = %d' %upper_test_size
81 # define main and other datasets 104
82 raw_main_train_data = self.raw_digits_train_data 105 if main_class == "u":
83 raw_other_train_data1 = self.raw_lower_train_labels 106 # define main and other datasets
84 raw_other_train_data2 = self.raw_upper_train_labels 107 raw_main_train_data = self.raw_upper_train_data
85 raw_test_data = self.raw_digits_test_data 108 raw_other_train_data1 = self.raw_lower_train_labels
86 #raw_test_data = self.raw_test_data 109 raw_other_train_data2 = self.raw_digits_train_labels
87 110 raw_test_data = self.raw_upper_test_data
88 raw_main_train_labels = self.raw_digits_train_labels 111
89 raw_other_train_labels1 = self.raw_lower_train_labels 112 raw_main_train_labels = self.raw_upper_train_labels
90 raw_other_train_labels2 = self.raw_upper_train_labels 113 raw_other_train_labels1 = self.raw_lower_train_labels
91 raw_test_labels = self.raw_digits_test_labels 114 raw_other_train_labels2 = self.raw_digits_train_labels
92 #raw_test_labels = self.raw_test_labels 115 raw_test_labels = self.raw_upper_test_labels
116
117 elif main_class == "l":
118 # define main and other datasets
119 raw_main_train_data = self.raw_lower_train_data
120 raw_other_train_data1 = self.raw_upper_train_labels
121 raw_other_train_data2 = self.raw_digits_train_labels
122 raw_test_data = self.raw_lower_test_data
123
124 raw_main_train_labels = self.raw_lower_train_labels
125 raw_other_train_labels1 = self.raw_upper_train_labels
126 raw_other_train_labels2 = self.raw_digits_train_labels
127 raw_test_labels = self.raw_lower_test_labels
128
129 else:
130 main_class = "d"
131 # define main and other datasets
132 raw_main_train_data = self.raw_digits_train_data
133 raw_other_train_data1 = self.raw_lower_train_labels
134 raw_other_train_data2 = self.raw_upper_train_labels
135 raw_test_data = self.raw_digits_test_data
136
137 raw_main_train_labels = self.raw_digits_train_labels
138 raw_other_train_labels1 = self.raw_lower_train_labels
139 raw_other_train_labels2 = self.raw_upper_train_labels
140 raw_test_labels = self.raw_digits_test_labels
93 141
94 main_train_size = len(raw_main_train_labels) 142 main_train_size = len(raw_main_train_labels)
95 other_train_size1 = len(raw_other_train_labels1) 143 other_train_size1 = len(raw_other_train_labels1)
96 other_train_size2 = len(raw_other_train_labels2) 144 other_train_size2 = len(raw_other_train_labels2)
97 other_train_size = other_train_size1 + other_train_size2 145 other_train_size = other_train_size1 + other_train_size2
111 self.end_ratio = float(main_train_size - test_size) / float(main_train_size + other_train_size) 159 self.end_ratio = float(main_train_size - test_size) / float(main_train_size + other_train_size)
112 else: 160 else:
113 self.end_ratio = end_ratio 161 self.end_ratio = end_ratio
114 162
115 if verbose == True: 163 if verbose == True:
164 print 'main class : %s' %main_class
116 print 'start_ratio = %f' %self.start_ratio 165 print 'start_ratio = %f' %self.start_ratio
117 print 'end_ratio = %f' %self.end_ratio 166 print 'end_ratio = %f' %self.end_ratio
118 167
119 i_main = 0 168 i_main = 0
120 i_other1 = 0 169 i_other1 = 0