comparison _test_dataset.py @ 284:8e923cb2e8fc

renamed file
author Frederic Bastien <bastienf@iro.umontreal.ca>
date Fri, 06 Jun 2008 13:52:37 -0400
parents _test2_dataset.py@2e22cc120688
children 3af204aa71e5 174374d59405
comparison
equal deleted inserted replaced
283:275b92d40ea6 284:8e923cb2e8fc
1 #!/bin/env python
2 from dataset import *
3 from math import *
4 import numpy,unittest
5 from misc import *
6
7 def have_raised(to_eval, **var):
8 have_thrown = False
9 try:
10 eval(to_eval)
11 except :
12 have_thrown = True
13 return have_thrown
14
15 def have_raised2(f, *args, **kwargs):
16 have_thrown = False
17 try:
18 f(*args, **kwargs)
19 except :
20 have_thrown = True
21 return have_thrown
22
23 def test1():
24 print "test1"
25 global a,ds
26 a = numpy.random.rand(10,4)
27 print a
28 ds = ArrayDataSet(a,{'x':slice(3),'y':3,'z':[0,2]})
29 print "len(ds)=",len(ds)
30 assert(len(ds)==10)
31 print "example 0 = ",ds[0]
32 # assert
33 print "x=",ds["x"]
34 print "x|y"
35 for x,y in ds("x","y"):
36 print x,y
37 minibatch_iterator = ds.minibatches(fieldnames=['z','y'],n_batches=1,minibatch_size=3,offset=4)
38 minibatch = minibatch_iterator.__iter__().next()
39 print "minibatch=",minibatch
40 for var in minibatch:
41 print "var=",var
42 print "take a slice and look at field y",ds[1:6:2]["y"]
43
44 del a,ds,x,y,minibatch_iterator,minibatch,var
45
46 def test_iterate_over_examples(array,ds):
47 #not in doc!!!
48 i=0
49 for example in range(len(ds)):
50 assert (ds[example]['x']==array[example][:3]).all()
51 assert ds[example]['y']==array[example][3]
52 assert (ds[example]['z']==array[example][[0,2]]).all()
53 i+=1
54 assert i==len(ds)
55 del example,i
56
57 # - for example in dataset:
58 i=0
59 for example in ds:
60 assert len(example)==3
61 assert (example['x']==array[i][:3]).all()
62 assert example['y']==array[i][3]
63 assert (example['z']==array[i][0:3:2]).all()
64 assert (numpy.append(example['x'],example['y'])==array[i]).all()
65 i+=1
66 assert i==len(ds)
67 del example,i
68
69 # - for val1,val2,... in dataset:
70 i=0
71 for x,y,z in ds:
72 assert (x==array[i][:3]).all()
73 assert y==array[i][3]
74 assert (z==array[i][0:3:2]).all()
75 assert (numpy.append(x,y)==array[i]).all()
76 i+=1
77 assert i==len(ds)
78 del x,y,z,i
79
80 # - for example in dataset(field1, field2,field3, ...):
81 i=0
82 for example in ds('x','y','z'):
83 assert len(example)==3
84 assert (example['x']==array[i][:3]).all()
85 assert example['y']==array[i][3]
86 assert (example['z']==array[i][0:3:2]).all()
87 assert (numpy.append(example['x'],example['y'])==array[i]).all()
88 i+=1
89 assert i==len(ds)
90 del example,i
91 i=0
92 for example in ds('y','x'):
93 assert len(example)==2
94 assert (example['x']==array[i][:3]).all()
95 assert example['y']==array[i][3]
96 assert (numpy.append(example['x'],example['y'])==array[i]).all()
97 i+=1
98 assert i==len(ds)
99 del example,i
100
101 # - for val1,val2,val3 in dataset(field1, field2,field3):
102 i=0
103 for x,y,z in ds('x','y','z'):
104 assert (x==array[i][:3]).all()
105 assert y==array[i][3]
106 assert (z==array[i][0:3:2]).all()
107 assert (numpy.append(x,y)==array[i]).all()
108 i+=1
109 assert i==len(ds)
110 del x,y,z,i
111 i=0
112 for y,x in ds('y','x',):
113 assert (x==array[i][:3]).all()
114 assert y==array[i][3]
115 assert (numpy.append(x,y)==array[i]).all()
116 i+=1
117 assert i==len(ds)
118 del x,y,i
119
120 def test_minibatch_size(minibatch,minibatch_size,len_ds,nb_field,nb_iter_finished):
121 ##full minibatch or the last minibatch
122 for idx in range(nb_field):
123 test_minibatch_field_size(minibatch[idx],minibatch_size,len_ds,nb_iter_finished)
124 del idx
125 def test_minibatch_field_size(minibatch_field,minibatch_size,len_ds,nb_iter_finished):
126 assert len(minibatch_field)==minibatch_size or ((nb_iter_finished*minibatch_size+len(minibatch_field))==len_ds and len(minibatch_field)<minibatch_size)
127
128 # - for minibatch in dataset.minibatches([field1, field2, ...],minibatch_size=N):
129 i=0
130 mi=0
131 m=ds.minibatches(['x','z'], minibatch_size=3)
132 assert isinstance(m,DataSet.MinibatchWrapAroundIterator)
133 for minibatch in m:
134 assert isinstance(minibatch,DataSetFields)
135 assert len(minibatch)==2
136 test_minibatch_size(minibatch,m.minibatch_size,len(ds),2,mi)
137 if type(ds)==ArrayDataSet:
138 assert (minibatch[0][:,::2]==minibatch[1]).all()
139 else:
140 for j in xrange(len(minibatch[0])):
141 (minibatch[0][j][::2]==minibatch[1][j]).all()
142 mi+=1
143 i+=len(minibatch[0])
144 assert i==len(ds)
145 assert mi==4
146 del minibatch,i,m,mi
147
148 i=0
149 mi=0
150 m=ds.minibatches(['x','y'], minibatch_size=3)
151 assert isinstance(m,DataSet.MinibatchWrapAroundIterator)
152 for minibatch in m:
153 assert len(minibatch)==2
154 test_minibatch_size(minibatch,m.minibatch_size,len(ds),2,mi)
155 mi+=1
156 for id in range(len(minibatch[0])):
157 assert (numpy.append(minibatch[0][id],minibatch[1][id])==array[i]).all()
158 i+=1
159 assert i==len(ds)
160 assert mi==4
161 del minibatch,i,id,m,mi
162
163 # - for mini1,mini2,mini3 in dataset.minibatches([field1, field2, field3], minibatch_size=N):
164 i=0
165 mi=0
166 m=ds.minibatches(['x','z'], minibatch_size=3)
167 assert isinstance(m,DataSet.MinibatchWrapAroundIterator)
168 for x,z in m:
169 test_minibatch_field_size(x,m.minibatch_size,len(ds),mi)
170 test_minibatch_field_size(z,m.minibatch_size,len(ds),mi)
171 for id in range(len(x)):
172 assert (x[id][::2]==z[id]).all()
173 i+=1
174 mi+=1
175 assert i==len(ds)
176 assert mi==4
177 del x,z,i,m,mi
178 i=0
179 mi=0
180 m=ds.minibatches(['x','y'], minibatch_size=3)
181 for x,y in m:
182 test_minibatch_field_size(x,m.minibatch_size,len(ds),mi)
183 test_minibatch_field_size(y,m.minibatch_size,len(ds),mi)
184 mi+=1
185 for id in range(len(x)):
186 assert (numpy.append(x[id],y[id])==array[i]).all()
187 i+=1
188 assert i==len(ds)
189 assert mi==4
190 del x,y,i,id,m,mi
191
192 #not in doc
193 i=0
194 m=ds.minibatches(['x','y'],n_batches=1,minibatch_size=3,offset=4)
195 assert isinstance(m,DataSet.MinibatchWrapAroundIterator)
196 for x,y in m:
197 assert len(x)==m.minibatch_size
198 assert len(y)==m.minibatch_size
199 for id in range(m.minibatch_size):
200 assert (numpy.append(x[id],y[id])==array[i+4]).all()
201 i+=1
202 assert i==m.n_batches*m.minibatch_size
203 del x,y,i,id,m
204
205 i=0
206 m=ds.minibatches(['x','y'],n_batches=2,minibatch_size=3,offset=4)
207 assert isinstance(m,DataSet.MinibatchWrapAroundIterator)
208 for x,y in m:
209 assert len(x)==m.minibatch_size
210 assert len(y)==m.minibatch_size
211 for id in range(m.minibatch_size):
212 assert (numpy.append(x[id],y[id])==array[i+4]).all()
213 i+=1
214 assert i==m.n_batches*m.minibatch_size
215 del x,y,i,id,m
216
217 i=0
218 m=ds.minibatches(['x','y'],n_batches=20,minibatch_size=3,offset=4)
219 assert isinstance(m,DataSet.MinibatchWrapAroundIterator)
220 for x,y in m:
221 assert len(x)==m.minibatch_size
222 assert len(y)==m.minibatch_size
223 for id in range(m.minibatch_size):
224 assert (numpy.append(x[id],y[id])==array[(i+4)%array.shape[0]]).all()
225 i+=1
226 assert i==m.n_batches*m.minibatch_size
227 del x,y,i,id
228
229 #@todo: we can't do minibatch bigger then the size of the dataset???
230 assert have_raised2(ds.minibatches,['x','y'],n_batches=1,minibatch_size=len(array)+1,offset=0)
231 assert not have_raised2(ds.minibatches,['x','y'],n_batches=1,minibatch_size=len(array),offset=0)
232
233 def test_ds_iterator(array,iterator1,iterator2,iterator3):
234 l=len(iterator1)
235 i=0
236 for x,y in iterator1:
237 assert (x==array[i][:3]).all()
238 assert y==array[i][3]
239 assert (numpy.append(x,y)==array[i]).all()
240 i+=1
241 assert i==l
242 i=0
243 for y,z in iterator2:
244 assert y==array[i][3]
245 assert (z==array[i][0:3:2]).all()
246 i+=1
247 assert i==l
248 i=0
249 for x,y,z in iterator3:
250 assert (x==array[i][:3]).all()
251 assert y==array[i][3]
252 assert (z==array[i][0:3:2]).all()
253 assert (numpy.append(x,y)==array[i]).all()
254 i+=1
255 assert i==l
256
257 def test_getitem(array,ds):
258 def test_ds(orig,ds,index):
259 i=0
260 assert len(ds)==len(index)
261 for x,z,y in ds('x','z','y'):
262 assert (orig[index[i]]['x']==array[index[i]][:3]).all()
263 assert (orig[index[i]]['x']==x).all()
264 assert orig[index[i]]['y']==array[index[i]][3]
265 assert orig[index[i]]['y']==y
266 assert (orig[index[i]]['z']==array[index[i]][0:3:2]).all()
267 assert (orig[index[i]]['z']==z).all()
268 i+=1
269 del i
270 ds[0]
271 if len(ds)>2:
272 ds[:1]
273 ds[1:1]
274 ds[1:1:1]
275 if len(ds)>5:
276 ds[[1,2,3]]
277 for x in ds:
278 pass
279
280 #ds[:n] returns a dataset with the n first examples.
281 ds2=ds[:3]
282 assert isinstance(ds2,DataSet)
283 test_ds(ds,ds2,index=[0,1,2])
284 del ds2
285
286 #ds[i1:i2:s]# returns a ds with the examples i1,i1+s,...i2-s.
287 ds2=ds[1:7:2]
288 assert isinstance(ds2,DataSet)
289 test_ds(ds,ds2,[1,3,5])
290 del ds2
291
292 #ds[i]
293 ds2=ds[5]
294 assert isinstance(ds2,Example)
295 assert have_raised("var['ds']["+str(len(ds))+"]",ds=ds) # index not defined
296 assert not have_raised("var['ds']["+str(len(ds)-1)+"]",ds=ds)
297 del ds2
298
299 #ds[[i1,i2,...in]]# returns a ds with examples i1,i2,...in.
300 ds2=ds[[4,7,2,8]]
301 assert isinstance(ds2,DataSet)
302 test_ds(ds,ds2,[4,7,2,8])
303 del ds2
304
305 #ds.<property># returns the value of a property associated with
306 #the name <property>. The following properties should be supported:
307 # - 'description': a textual description or name for the ds
308 # - 'fieldtypes': a list of types (one per field)
309
310 #* ds1 | ds2 | ds3 == ds.hstack([ds1,ds2,ds3])#????
311 #assert hstack([ds('x','y'),ds('z')])==ds
312 #hstack([ds('z','y'),ds('x')])==ds
313 assert have_raised2(hstack,[ds('x'),ds('x')])
314 assert have_raised2(hstack,[ds('y','x'),ds('x')])
315 assert not have_raised2(hstack,[ds('x'),ds('y')])
316
317 # i=0
318 # for example in hstack([ds('x'),ds('y'),ds('z')]):
319 # example==ds[i]
320 # i+=1
321 # del i,example
322 #* ds1 & ds2 & ds3 == ds.vstack([ds1,ds2,ds3])#????
323
324 def test_fields_fct(ds):
325 #@todo, fill correctly
326 assert len(ds.fields())==3
327 i=0
328 v=0
329 for field in ds.fields():
330 for field_value in field: # iterate over the values associated to that field for all the ds examples
331 v+=1
332 i+=1
333 assert i==3
334 assert v==3*10
335 del i,v
336
337 i=0
338 v=0
339 for field in ds('x','z').fields():
340 i+=1
341 for val in field:
342 v+=1
343 assert i==2
344 assert v==2*10
345 del i,v
346
347 i=0
348 v=0
349 for field in ds.fields('x','y'):
350 i+=1
351 for val in field:
352 v+=1
353 assert i==2
354 assert v==2*10
355 del i,v
356
357 i=0
358 v=0
359 for field_examples in ds.fields():
360 for example_value in field_examples:
361 v+=1
362 i+=1
363 assert i==3
364 assert v==3*10
365 del i,v
366
367 assert ds == ds.fields().examples()
368 assert len(ds('x','y').fields()) == 2
369 assert len(ds('x','z').fields()) == 2
370 assert len(ds('y').fields()) == 1
371
372 del field
373 def test_all(array,ds):
374 assert len(ds)==10
375
376 test_iterate_over_examples(array, ds)
377 test_getitem(array, ds)
378 test_ds_iterator(array,ds('x','y'),ds('y','z'),ds('x','y','z'))
379 test_fields_fct(ds)
380
381 class T_DataSet(unittest.TestCase):
382 def test_ArrayDataSet(self):
383 #don't test stream
384 #tested only with float value
385 #don't always test with y
386 #don't test missing value
387 #don't test with tuple
388 #don't test proterties
389 a2 = numpy.random.rand(10,4)
390 ds = ArrayDataSet(a2,{'x':slice(3),'y':3,'z':[0,2]})###???tuple not tested
391 ds = ArrayDataSet(a2,LookupList(['x','y','z'],[slice(3),3,[0,2]]))###???tuple not tested
392 #assert ds==a? should this work?
393
394 test_all(a2,ds)
395
396 del a2, ds
397
398 def test_CachedDataSet(self):
399 a = numpy.random.rand(10,4)
400 ds1 = ArrayDataSet(a,LookupList(['x','y','z'],[slice(3),3,[0,2]]))###???tuple not tested
401 ds2 = CachedDataSet(ds1)
402 ds3 = CachedDataSet(ds1,cache_all_upon_construction=True)
403
404 test_all(a,ds2)
405 test_all(a,ds3)
406
407 del a,ds1,ds2,ds3
408
409
410 def test_DataSetFields(self):
411 raise NotImplementedError()
412
413 def test_ApplyFunctionDataSet(self):
414 a = numpy.random.rand(10,4)
415 a2 = a+1
416 ds1 = ArrayDataSet(a,LookupList(['x','y','z'],[slice(3),3,[0,2]]))###???tuple not tested
417
418 ds2 = ApplyFunctionDataSet(ds1,lambda x,y,z: (x+1,y+1,z+1), ['x','y','z'],minibatch_mode=False)
419 ds3 = ApplyFunctionDataSet(ds1,lambda x,y,z: (numpy.array(x)+1,numpy.array(y)+1,numpy.array(z)+1),
420 ['x','y','z'],
421 minibatch_mode=True)
422
423 test_all(a2,ds2)
424 test_all(a2,ds3)
425
426 del a,ds1,ds2,ds3
427
428 def test_FieldsSubsetDataSet(self):
429 raise NotImplementedError()
430 def test_MinibatchDataSet(self):
431 raise NotImplementedError()
432 def test_HStackedDataSet(self):
433 raise NotImplementedError()
434 def test_VStackedDataSet(self):
435 raise NotImplementedError()
436 def test_ArrayFieldsDataSet(self):
437 raise NotImplementedError()
438
439
440 if __name__=='__main__':
441 unittest.main()
442