Mercurial > pylearn
comparison _test_dataset.py @ 284:8e923cb2e8fc
renamed file
author | Frederic Bastien <bastienf@iro.umontreal.ca> |
---|---|
date | Fri, 06 Jun 2008 13:52:37 -0400 |
parents | _test2_dataset.py@2e22cc120688 |
children | 3af204aa71e5 174374d59405 |
comparison
equal
deleted
inserted
replaced
283:275b92d40ea6 | 284:8e923cb2e8fc |
---|---|
1 #!/bin/env python | |
2 from dataset import * | |
3 from math import * | |
4 import numpy,unittest | |
5 from misc import * | |
6 | |
7 def have_raised(to_eval, **var): | |
8 have_thrown = False | |
9 try: | |
10 eval(to_eval) | |
11 except : | |
12 have_thrown = True | |
13 return have_thrown | |
14 | |
15 def have_raised2(f, *args, **kwargs): | |
16 have_thrown = False | |
17 try: | |
18 f(*args, **kwargs) | |
19 except : | |
20 have_thrown = True | |
21 return have_thrown | |
22 | |
23 def test1(): | |
24 print "test1" | |
25 global a,ds | |
26 a = numpy.random.rand(10,4) | |
27 print a | |
28 ds = ArrayDataSet(a,{'x':slice(3),'y':3,'z':[0,2]}) | |
29 print "len(ds)=",len(ds) | |
30 assert(len(ds)==10) | |
31 print "example 0 = ",ds[0] | |
32 # assert | |
33 print "x=",ds["x"] | |
34 print "x|y" | |
35 for x,y in ds("x","y"): | |
36 print x,y | |
37 minibatch_iterator = ds.minibatches(fieldnames=['z','y'],n_batches=1,minibatch_size=3,offset=4) | |
38 minibatch = minibatch_iterator.__iter__().next() | |
39 print "minibatch=",minibatch | |
40 for var in minibatch: | |
41 print "var=",var | |
42 print "take a slice and look at field y",ds[1:6:2]["y"] | |
43 | |
44 del a,ds,x,y,minibatch_iterator,minibatch,var | |
45 | |
46 def test_iterate_over_examples(array,ds): | |
47 #not in doc!!! | |
48 i=0 | |
49 for example in range(len(ds)): | |
50 assert (ds[example]['x']==array[example][:3]).all() | |
51 assert ds[example]['y']==array[example][3] | |
52 assert (ds[example]['z']==array[example][[0,2]]).all() | |
53 i+=1 | |
54 assert i==len(ds) | |
55 del example,i | |
56 | |
57 # - for example in dataset: | |
58 i=0 | |
59 for example in ds: | |
60 assert len(example)==3 | |
61 assert (example['x']==array[i][:3]).all() | |
62 assert example['y']==array[i][3] | |
63 assert (example['z']==array[i][0:3:2]).all() | |
64 assert (numpy.append(example['x'],example['y'])==array[i]).all() | |
65 i+=1 | |
66 assert i==len(ds) | |
67 del example,i | |
68 | |
69 # - for val1,val2,... in dataset: | |
70 i=0 | |
71 for x,y,z in ds: | |
72 assert (x==array[i][:3]).all() | |
73 assert y==array[i][3] | |
74 assert (z==array[i][0:3:2]).all() | |
75 assert (numpy.append(x,y)==array[i]).all() | |
76 i+=1 | |
77 assert i==len(ds) | |
78 del x,y,z,i | |
79 | |
80 # - for example in dataset(field1, field2,field3, ...): | |
81 i=0 | |
82 for example in ds('x','y','z'): | |
83 assert len(example)==3 | |
84 assert (example['x']==array[i][:3]).all() | |
85 assert example['y']==array[i][3] | |
86 assert (example['z']==array[i][0:3:2]).all() | |
87 assert (numpy.append(example['x'],example['y'])==array[i]).all() | |
88 i+=1 | |
89 assert i==len(ds) | |
90 del example,i | |
91 i=0 | |
92 for example in ds('y','x'): | |
93 assert len(example)==2 | |
94 assert (example['x']==array[i][:3]).all() | |
95 assert example['y']==array[i][3] | |
96 assert (numpy.append(example['x'],example['y'])==array[i]).all() | |
97 i+=1 | |
98 assert i==len(ds) | |
99 del example,i | |
100 | |
101 # - for val1,val2,val3 in dataset(field1, field2,field3): | |
102 i=0 | |
103 for x,y,z in ds('x','y','z'): | |
104 assert (x==array[i][:3]).all() | |
105 assert y==array[i][3] | |
106 assert (z==array[i][0:3:2]).all() | |
107 assert (numpy.append(x,y)==array[i]).all() | |
108 i+=1 | |
109 assert i==len(ds) | |
110 del x,y,z,i | |
111 i=0 | |
112 for y,x in ds('y','x',): | |
113 assert (x==array[i][:3]).all() | |
114 assert y==array[i][3] | |
115 assert (numpy.append(x,y)==array[i]).all() | |
116 i+=1 | |
117 assert i==len(ds) | |
118 del x,y,i | |
119 | |
120 def test_minibatch_size(minibatch,minibatch_size,len_ds,nb_field,nb_iter_finished): | |
121 ##full minibatch or the last minibatch | |
122 for idx in range(nb_field): | |
123 test_minibatch_field_size(minibatch[idx],minibatch_size,len_ds,nb_iter_finished) | |
124 del idx | |
125 def test_minibatch_field_size(minibatch_field,minibatch_size,len_ds,nb_iter_finished): | |
126 assert len(minibatch_field)==minibatch_size or ((nb_iter_finished*minibatch_size+len(minibatch_field))==len_ds and len(minibatch_field)<minibatch_size) | |
127 | |
128 # - for minibatch in dataset.minibatches([field1, field2, ...],minibatch_size=N): | |
129 i=0 | |
130 mi=0 | |
131 m=ds.minibatches(['x','z'], minibatch_size=3) | |
132 assert isinstance(m,DataSet.MinibatchWrapAroundIterator) | |
133 for minibatch in m: | |
134 assert isinstance(minibatch,DataSetFields) | |
135 assert len(minibatch)==2 | |
136 test_minibatch_size(minibatch,m.minibatch_size,len(ds),2,mi) | |
137 if type(ds)==ArrayDataSet: | |
138 assert (minibatch[0][:,::2]==minibatch[1]).all() | |
139 else: | |
140 for j in xrange(len(minibatch[0])): | |
141 (minibatch[0][j][::2]==minibatch[1][j]).all() | |
142 mi+=1 | |
143 i+=len(minibatch[0]) | |
144 assert i==len(ds) | |
145 assert mi==4 | |
146 del minibatch,i,m,mi | |
147 | |
148 i=0 | |
149 mi=0 | |
150 m=ds.minibatches(['x','y'], minibatch_size=3) | |
151 assert isinstance(m,DataSet.MinibatchWrapAroundIterator) | |
152 for minibatch in m: | |
153 assert len(minibatch)==2 | |
154 test_minibatch_size(minibatch,m.minibatch_size,len(ds),2,mi) | |
155 mi+=1 | |
156 for id in range(len(minibatch[0])): | |
157 assert (numpy.append(minibatch[0][id],minibatch[1][id])==array[i]).all() | |
158 i+=1 | |
159 assert i==len(ds) | |
160 assert mi==4 | |
161 del minibatch,i,id,m,mi | |
162 | |
163 # - for mini1,mini2,mini3 in dataset.minibatches([field1, field2, field3], minibatch_size=N): | |
164 i=0 | |
165 mi=0 | |
166 m=ds.minibatches(['x','z'], minibatch_size=3) | |
167 assert isinstance(m,DataSet.MinibatchWrapAroundIterator) | |
168 for x,z in m: | |
169 test_minibatch_field_size(x,m.minibatch_size,len(ds),mi) | |
170 test_minibatch_field_size(z,m.minibatch_size,len(ds),mi) | |
171 for id in range(len(x)): | |
172 assert (x[id][::2]==z[id]).all() | |
173 i+=1 | |
174 mi+=1 | |
175 assert i==len(ds) | |
176 assert mi==4 | |
177 del x,z,i,m,mi | |
178 i=0 | |
179 mi=0 | |
180 m=ds.minibatches(['x','y'], minibatch_size=3) | |
181 for x,y in m: | |
182 test_minibatch_field_size(x,m.minibatch_size,len(ds),mi) | |
183 test_minibatch_field_size(y,m.minibatch_size,len(ds),mi) | |
184 mi+=1 | |
185 for id in range(len(x)): | |
186 assert (numpy.append(x[id],y[id])==array[i]).all() | |
187 i+=1 | |
188 assert i==len(ds) | |
189 assert mi==4 | |
190 del x,y,i,id,m,mi | |
191 | |
192 #not in doc | |
193 i=0 | |
194 m=ds.minibatches(['x','y'],n_batches=1,minibatch_size=3,offset=4) | |
195 assert isinstance(m,DataSet.MinibatchWrapAroundIterator) | |
196 for x,y in m: | |
197 assert len(x)==m.minibatch_size | |
198 assert len(y)==m.minibatch_size | |
199 for id in range(m.minibatch_size): | |
200 assert (numpy.append(x[id],y[id])==array[i+4]).all() | |
201 i+=1 | |
202 assert i==m.n_batches*m.minibatch_size | |
203 del x,y,i,id,m | |
204 | |
205 i=0 | |
206 m=ds.minibatches(['x','y'],n_batches=2,minibatch_size=3,offset=4) | |
207 assert isinstance(m,DataSet.MinibatchWrapAroundIterator) | |
208 for x,y in m: | |
209 assert len(x)==m.minibatch_size | |
210 assert len(y)==m.minibatch_size | |
211 for id in range(m.minibatch_size): | |
212 assert (numpy.append(x[id],y[id])==array[i+4]).all() | |
213 i+=1 | |
214 assert i==m.n_batches*m.minibatch_size | |
215 del x,y,i,id,m | |
216 | |
217 i=0 | |
218 m=ds.minibatches(['x','y'],n_batches=20,minibatch_size=3,offset=4) | |
219 assert isinstance(m,DataSet.MinibatchWrapAroundIterator) | |
220 for x,y in m: | |
221 assert len(x)==m.minibatch_size | |
222 assert len(y)==m.minibatch_size | |
223 for id in range(m.minibatch_size): | |
224 assert (numpy.append(x[id],y[id])==array[(i+4)%array.shape[0]]).all() | |
225 i+=1 | |
226 assert i==m.n_batches*m.minibatch_size | |
227 del x,y,i,id | |
228 | |
229 #@todo: we can't do minibatch bigger then the size of the dataset??? | |
230 assert have_raised2(ds.minibatches,['x','y'],n_batches=1,minibatch_size=len(array)+1,offset=0) | |
231 assert not have_raised2(ds.minibatches,['x','y'],n_batches=1,minibatch_size=len(array),offset=0) | |
232 | |
233 def test_ds_iterator(array,iterator1,iterator2,iterator3): | |
234 l=len(iterator1) | |
235 i=0 | |
236 for x,y in iterator1: | |
237 assert (x==array[i][:3]).all() | |
238 assert y==array[i][3] | |
239 assert (numpy.append(x,y)==array[i]).all() | |
240 i+=1 | |
241 assert i==l | |
242 i=0 | |
243 for y,z in iterator2: | |
244 assert y==array[i][3] | |
245 assert (z==array[i][0:3:2]).all() | |
246 i+=1 | |
247 assert i==l | |
248 i=0 | |
249 for x,y,z in iterator3: | |
250 assert (x==array[i][:3]).all() | |
251 assert y==array[i][3] | |
252 assert (z==array[i][0:3:2]).all() | |
253 assert (numpy.append(x,y)==array[i]).all() | |
254 i+=1 | |
255 assert i==l | |
256 | |
257 def test_getitem(array,ds): | |
258 def test_ds(orig,ds,index): | |
259 i=0 | |
260 assert len(ds)==len(index) | |
261 for x,z,y in ds('x','z','y'): | |
262 assert (orig[index[i]]['x']==array[index[i]][:3]).all() | |
263 assert (orig[index[i]]['x']==x).all() | |
264 assert orig[index[i]]['y']==array[index[i]][3] | |
265 assert orig[index[i]]['y']==y | |
266 assert (orig[index[i]]['z']==array[index[i]][0:3:2]).all() | |
267 assert (orig[index[i]]['z']==z).all() | |
268 i+=1 | |
269 del i | |
270 ds[0] | |
271 if len(ds)>2: | |
272 ds[:1] | |
273 ds[1:1] | |
274 ds[1:1:1] | |
275 if len(ds)>5: | |
276 ds[[1,2,3]] | |
277 for x in ds: | |
278 pass | |
279 | |
280 #ds[:n] returns a dataset with the n first examples. | |
281 ds2=ds[:3] | |
282 assert isinstance(ds2,DataSet) | |
283 test_ds(ds,ds2,index=[0,1,2]) | |
284 del ds2 | |
285 | |
286 #ds[i1:i2:s]# returns a ds with the examples i1,i1+s,...i2-s. | |
287 ds2=ds[1:7:2] | |
288 assert isinstance(ds2,DataSet) | |
289 test_ds(ds,ds2,[1,3,5]) | |
290 del ds2 | |
291 | |
292 #ds[i] | |
293 ds2=ds[5] | |
294 assert isinstance(ds2,Example) | |
295 assert have_raised("var['ds']["+str(len(ds))+"]",ds=ds) # index not defined | |
296 assert not have_raised("var['ds']["+str(len(ds)-1)+"]",ds=ds) | |
297 del ds2 | |
298 | |
299 #ds[[i1,i2,...in]]# returns a ds with examples i1,i2,...in. | |
300 ds2=ds[[4,7,2,8]] | |
301 assert isinstance(ds2,DataSet) | |
302 test_ds(ds,ds2,[4,7,2,8]) | |
303 del ds2 | |
304 | |
305 #ds.<property># returns the value of a property associated with | |
306 #the name <property>. The following properties should be supported: | |
307 # - 'description': a textual description or name for the ds | |
308 # - 'fieldtypes': a list of types (one per field) | |
309 | |
310 #* ds1 | ds2 | ds3 == ds.hstack([ds1,ds2,ds3])#???? | |
311 #assert hstack([ds('x','y'),ds('z')])==ds | |
312 #hstack([ds('z','y'),ds('x')])==ds | |
313 assert have_raised2(hstack,[ds('x'),ds('x')]) | |
314 assert have_raised2(hstack,[ds('y','x'),ds('x')]) | |
315 assert not have_raised2(hstack,[ds('x'),ds('y')]) | |
316 | |
317 # i=0 | |
318 # for example in hstack([ds('x'),ds('y'),ds('z')]): | |
319 # example==ds[i] | |
320 # i+=1 | |
321 # del i,example | |
322 #* ds1 & ds2 & ds3 == ds.vstack([ds1,ds2,ds3])#???? | |
323 | |
324 def test_fields_fct(ds): | |
325 #@todo, fill correctly | |
326 assert len(ds.fields())==3 | |
327 i=0 | |
328 v=0 | |
329 for field in ds.fields(): | |
330 for field_value in field: # iterate over the values associated to that field for all the ds examples | |
331 v+=1 | |
332 i+=1 | |
333 assert i==3 | |
334 assert v==3*10 | |
335 del i,v | |
336 | |
337 i=0 | |
338 v=0 | |
339 for field in ds('x','z').fields(): | |
340 i+=1 | |
341 for val in field: | |
342 v+=1 | |
343 assert i==2 | |
344 assert v==2*10 | |
345 del i,v | |
346 | |
347 i=0 | |
348 v=0 | |
349 for field in ds.fields('x','y'): | |
350 i+=1 | |
351 for val in field: | |
352 v+=1 | |
353 assert i==2 | |
354 assert v==2*10 | |
355 del i,v | |
356 | |
357 i=0 | |
358 v=0 | |
359 for field_examples in ds.fields(): | |
360 for example_value in field_examples: | |
361 v+=1 | |
362 i+=1 | |
363 assert i==3 | |
364 assert v==3*10 | |
365 del i,v | |
366 | |
367 assert ds == ds.fields().examples() | |
368 assert len(ds('x','y').fields()) == 2 | |
369 assert len(ds('x','z').fields()) == 2 | |
370 assert len(ds('y').fields()) == 1 | |
371 | |
372 del field | |
373 def test_all(array,ds): | |
374 assert len(ds)==10 | |
375 | |
376 test_iterate_over_examples(array, ds) | |
377 test_getitem(array, ds) | |
378 test_ds_iterator(array,ds('x','y'),ds('y','z'),ds('x','y','z')) | |
379 test_fields_fct(ds) | |
380 | |
381 class T_DataSet(unittest.TestCase): | |
382 def test_ArrayDataSet(self): | |
383 #don't test stream | |
384 #tested only with float value | |
385 #don't always test with y | |
386 #don't test missing value | |
387 #don't test with tuple | |
388 #don't test proterties | |
389 a2 = numpy.random.rand(10,4) | |
390 ds = ArrayDataSet(a2,{'x':slice(3),'y':3,'z':[0,2]})###???tuple not tested | |
391 ds = ArrayDataSet(a2,LookupList(['x','y','z'],[slice(3),3,[0,2]]))###???tuple not tested | |
392 #assert ds==a? should this work? | |
393 | |
394 test_all(a2,ds) | |
395 | |
396 del a2, ds | |
397 | |
398 def test_CachedDataSet(self): | |
399 a = numpy.random.rand(10,4) | |
400 ds1 = ArrayDataSet(a,LookupList(['x','y','z'],[slice(3),3,[0,2]]))###???tuple not tested | |
401 ds2 = CachedDataSet(ds1) | |
402 ds3 = CachedDataSet(ds1,cache_all_upon_construction=True) | |
403 | |
404 test_all(a,ds2) | |
405 test_all(a,ds3) | |
406 | |
407 del a,ds1,ds2,ds3 | |
408 | |
409 | |
410 def test_DataSetFields(self): | |
411 raise NotImplementedError() | |
412 | |
413 def test_ApplyFunctionDataSet(self): | |
414 a = numpy.random.rand(10,4) | |
415 a2 = a+1 | |
416 ds1 = ArrayDataSet(a,LookupList(['x','y','z'],[slice(3),3,[0,2]]))###???tuple not tested | |
417 | |
418 ds2 = ApplyFunctionDataSet(ds1,lambda x,y,z: (x+1,y+1,z+1), ['x','y','z'],minibatch_mode=False) | |
419 ds3 = ApplyFunctionDataSet(ds1,lambda x,y,z: (numpy.array(x)+1,numpy.array(y)+1,numpy.array(z)+1), | |
420 ['x','y','z'], | |
421 minibatch_mode=True) | |
422 | |
423 test_all(a2,ds2) | |
424 test_all(a2,ds3) | |
425 | |
426 del a,ds1,ds2,ds3 | |
427 | |
428 def test_FieldsSubsetDataSet(self): | |
429 raise NotImplementedError() | |
430 def test_MinibatchDataSet(self): | |
431 raise NotImplementedError() | |
432 def test_HStackedDataSet(self): | |
433 raise NotImplementedError() | |
434 def test_VStackedDataSet(self): | |
435 raise NotImplementedError() | |
436 def test_ArrayFieldsDataSet(self): | |
437 raise NotImplementedError() | |
438 | |
439 | |
440 if __name__=='__main__': | |
441 unittest.main() | |
442 |