view test_dataset.py @ 86:fdf72ea4f2bc

added function test_ds in test_ArrayDataSet who test a sub dataset
author Frederic Bastien <bastienf@iro.umontreal.ca>
date Mon, 05 May 2008 14:37:34 -0400
parents aa9e786ee849
children 3fd6879e0f76
line wrap: on
line source

#!/bin/env python
from dataset import *
from math import *
import numpy

def have_raised(to_eval):
    have_thrown = False
    try:
        eval(to_eval)
    except :
        have_thrown = True
    return have_thrown

def test1():
    print "test1"
    global a,ds
    a = numpy.random.rand(10,4)
    print a
    ds = ArrayDataSet(a,{'x':slice(3),'y':3,'z':[0,2]})
    print "len(ds)=",len(ds)
    assert(len(ds)==10)
    print "example 0 = ",ds[0]
#    assert
    print "x=",ds["x"]
    print "x|y"
    for x,y in ds("x","y"):
        print x,y
    minibatch_iterator = ds.minibatches(fieldnames=['z','y'],n_batches=1,minibatch_size=3,offset=4)
    minibatch = minibatch_iterator.__iter__().next()
    print "minibatch=",minibatch
    for var in minibatch:
        print "var=",var
    print "take a slice and look at field y",ds[1:6:2]["y"]

def test_ArrayDataSet():
    #don't test stream
    #tested only with float value
    #test with y too
    #test missing value

    print "test_ArrayDataSet"
    a = numpy.random.rand(10,4)
    ds = ArrayDataSet(a,{'x':slice(3),'y':3,'z':[0,2]})###???tuple not tested

    assert len(ds)==10
    #assert ds==a? should this work?
    for i in range(len(ds)):
        assert ds[i]['x'].all()==a[i][:2].all()
        assert ds[i]['y']==a[i][3]
        assert ds[i]['z'].all()==a[i][0:3:2].all()
    i=0
    for x in ds('x','y'):
        assert numpy.append(x['x'],x['y']).all()==a[i].all()
        i+=1

    i=0
    for x,y in ds('x','y'):
        assert numpy.append(x,y).all()==a[i].all()
        i+=1
    for minibatch in ds.minibatches(['x','z'], minibatch_size=3):
        assert minibatch[0][:,0:3:2].all()==minibatch[1].all()
    for x,z in ds.minibatches(['x','z'], minibatch_size=3):
        assert x[:,0:3:2].all()==z.all()

#    for minibatch in ds.minibatches(['z','y'], minibatch_size=3):
#        print minibatch
#    minibatch_iterator = ds.minibatches(fieldnames=['z','y'],n_batches=1,minibatch_size=3,offset=4)
#    minibatch = minibatch_iterator.__iter__().next()
#    print "minibatch=",minibatch
#    for var in minibatch:
#        print "var=",var
#    print "take a slice and look at field y",ds[1:6:2]["y"]
    assert have_raised("ds['h']")  # h is not defined...
    assert have_raised("ds[['h']]")  # h is not defined...

    assert len(ds.fields())==3
    for field in ds.fields():
        for field_value in field: # iterate over the values associated to that field for all the ds examples
            pass
    for field in ds('x','z').fields():
        pass
    for field in ds.fields('x','y'):
        pass
    for field_examples in ds.fields():
        for example_value in field_examples:
            pass

    assert ds == ds.fields().examples()

    def test_ds(orig,ds,index):
        i=0
        assert len(ds)==len(index)
        for x,z,y in ds('x','z','y'):
            print x,y,z
            assert orig[index[i]]['x'].all()==a[index[i]][:3].all()
            assert orig[index[i]]['x'].all()==x.all()
            assert orig[index[i]]['y']==a[index[i]][3]
            assert orig[index[i]]['y']==y
            assert orig[index[i]]['z'].all()==a[index[i]][0:3:2].all()
            assert orig[index[i]]['z'].all()==z.all()
            i+=1
        del i

    #ds[:n] returns a dataset with the n first examples.
    ds2=ds[:3]
    test_ds(ds,ds2,index=[0,1,2])

    #ds[i1:i2:s]# returns a ds with the examples i1,i1+s,...i2-s.
    ds2=ds[1:7:2]
    ds2[1]
    test_ds(ds,ds2,[1,3,5])
    #ds[[i1,i2,...in]]# returns a ds with examples i1,i2,...in.
#   ds2=ds[[4,7,2,8]]# fail
#   assert len(ds2)==4
#   i=0
#   index=[4,7,2,8]
#    for x in ds2:
#        assert ds[index[i]]['x'].all()==a[index[i]][:3].all()
#        assert ds[index[i]]['x'].all()==x.all()
#        assert ds[index[i]]['y']==a[index[i]][3]
#        assert ds[index[i]]['y']==y
#        assert ds[index[i]]['z'].all()==a[index[i]][0:3:2].all()
#        assert ds[index[i]]['z'].all()==z.all()
#        i+=1
    #ds[i1,i2,...]# should we accept????
    #ds[fieldname]# an iterable over the values of the field fieldname across
      #the ds (the iterable is obtained by default by calling valuesVStack
      #over the values for individual examples).

    #ds.<property># returns the value of a property associated with
      #the name <property>. The following properties should be supported:
      #    - 'description': a textual description or name for the ds
      #    - 'fieldtypes': a list of types (one per field)
    #* ds1 | ds2 | ds3 == ds.hstack([ds1,ds2,ds3])
    #* ds1 & ds2 & ds3 == ds.vstack([ds1,ds2,ds3])

#    for (x,y) in (ds('x','y'),a): #don't work # haven't found a variant that work.
#        assert numpy.append(x,y)==z

def test_LookupList():
    #test only the example in the doc???
    print "test_LookupList"
    example = LookupList(['x','y','z'],[1,2,3])
    example['x'] = [1, 2, 3] # set or change a field
    x, y, z = example
    x = example[0]
    x = example["x"]
    assert example.keys()==['x','y','z']
    assert example.values()==[[1,2,3],2,3]
    assert example.items()==[('x',[1,2,3]),('y',2),('z',3)]
    example.append_keyval('u',0) # adds item with name 'u' and value 0
    assert len(example)==4 # number of items = 4 here
    example2 = LookupList(['v','w'], ['a','b'])
    example3 = LookupList(['x','y','z','u','v','w'], [[1, 2, 3],2,3,0,'a','b'])
    assert example+example2==example3
    assert have_raised("example+example")

def test_ApplyFunctionDataSet():
    print "test_ApplyFunctionDataSet"
    raise NotImplementedError()
def test_CacheDataSet():
    print "test_CacheDataSet"
    raise NotImplementedError()
def test_FieldsSubsetDataSet():
    print "test_FieldsSubsetDataSet"
    raise NotImplementedError()
def test_DataSetFields():
    print "test_DataSetFields"
    raise NotImplementedError()
def test_MinibatchDataSet():
    print "test_MinibatchDataSet"
    raise NotImplementedError()
def test_HStackedDataSet():
    print "test_HStackedDataSet"
    raise NotImplementedError()
def test_VStackedDataSet():
    print "test_VStackedDataSet"
    raise NotImplementedError()
def test_ArrayFieldsDataSet():
    print "test_ArrayFieldsDataSet"
    raise NotImplementedError()

test1()
test_LookupList()
test_ArrayDataSet()