# HG changeset patch # User James Bergstra # Date 1256252967 14400 # Node ID b2948ae5087c1a07c97d57b9aff1e77b9e286a57 # Parent c19085585464fdddaba7da611400569c31c28baa added forget() to tearDown of cifar unittests diff -r c19085585464 -r b2948ae5087c pylearn/dataset_ops/tests/test_cifar10.py --- a/pylearn/dataset_ops/tests/test_cifar10.py Thu Oct 22 18:53:50 2009 -0400 +++ b/pylearn/dataset_ops/tests/test_cifar10.py Thu Oct 22 19:09:27 2009 -0400 @@ -1,121 +1,131 @@ +import unittest import numpy import theano from theano.compile.sandbox import pfunc, shared from theano import tensor -from pylearn.dataset_ops.cifar10 import cifar10 - -def test_single(): - - s_idx = theano.tensor.iscalar() - - for dtype in ('uint8', 'float64', 'float32'): - x, y = cifar10(s_idx, split='train', dtype=dtype, rasterized=False, color='grey') - assert x.dtype == dtype - -def test_shape_range(): - """Test that the image numbers come out in the right range for various dtypes""" - s_idx = theano.tensor.iscalar() +from pylearn.dataset_ops.cifar10 import cifar10, forget - #uint8 not-rasterized grey - x, y = cifar10(s_idx, split='train', dtype='uint8', rasterized=False, color='grey') - f = pfunc([s_idx], [x,y]) - xval, yval = f(0) - assert str(xval.dtype) == 'uint8' - assert xval.min() >= 0 - assert xval.max() < 256 - assert xval.max() > 1 - assert xval.shape == (32,32) - - #uint8 not-rasterized rgb - x, y = cifar10(s_idx, split='train', dtype='uint8', rasterized=False, color='rgb') - f = pfunc([s_idx], [x,y]) - xval, yval = f(0) - assert str(xval.dtype) == 'uint8' - assert xval.min() >= 0 - assert xval.max() < 256 - assert xval.max() > 1 - assert xval.shape == (32,32, 3) - - #uint8 rasterized grey - x, y = cifar10(s_idx, split='train', dtype='uint8', rasterized=True, color='grey') - f = pfunc([s_idx], [x,y]) - xval, yval = f(0) - assert str(xval.dtype) == 'uint8' - assert xval.min() >= 0 - assert xval.max() < 256 - assert xval.max() > 1 - assert xval.shape == (1024,) +class TestCifar10(unittest.TestCase): - #uint8 rasterized rgb - x, y = cifar10(s_idx, split='train', dtype='uint8', rasterized=True, color='rgb') - f = pfunc([s_idx], [x,y]) - xval, yval = f(0) - assert str(xval.dtype) == 'uint8' - assert xval.min() >= 0 - assert xval.max() < 256 - assert xval.max() > 1 - assert xval.shape == (1024, 3) - - # ranges are handled independently from shapes, so I'll consider the shapes have been - # tested above, and now I just look at ranges for floating-point dtypes - - #float32 - x, y = cifar10(s_idx, split='train', dtype='float32', rasterized=False, color='grey') - f = pfunc([s_idx], [x,y]) - xval, yval = f(0) - assert str(xval.dtype) == 'float32' - assert xval.min() >= 0.0 - assert xval.max() <= 1.0 - assert xval.max() > 0.01 - assert xval.shape == (32,32) + def setUp(self): + pass - #float64 - x, y = cifar10(s_idx + range(5), split='train', dtype='float64', rasterized=True, color='rgb') - f = pfunc([s_idx], [x,y]) - xval, yval = f(0) - assert str(xval.dtype) == 'float64' - assert xval.min() >= 0.0 - assert xval.max() <= 1.0 - assert xval.max() > 0.01 - assert xval.shape == (5, 1024, 3) - -def test_split_different(): - s_idx = theano.tensor.iscalar() - x, y = cifar10(s_idx, split='train', dtype='uint8', rasterized=False, color='grey') - f = pfunc([s_idx], [x,y]) - train_xval, train_yval = f(0) - - x, y = cifar10(s_idx, split='valid', dtype='uint8', rasterized=False, color='grey') - f = pfunc([s_idx], [x,y]) - valid_xval, valid_yval = f(0) - - x, y = cifar10(s_idx, split='test', dtype='uint8', rasterized=False, color='grey') - f = pfunc([s_idx], [x,y]) - test_xval, test_yval = f(0) - - assert not numpy.all(train_xval == valid_xval) - assert not numpy.all(train_xval == test_xval) - assert not numpy.all(valid_xval == test_xval) + def tearDown(self): + forget() -def test_split_length(): - """test that each split has the correct length""" - s_idx = theano.tensor.iscalar() - for bsize in [1, 3, 5]: - for (split, goodlen) in [('train', 40000), ('valid', 10000), ('test', 10000)]: - if bsize == 1: - x, y = cifar10(s_idx, split=split, dtype='uint8', rasterized=False, color='grey') - else: - x, y = cifar10(s_idx*bsize + range(bsize), split=split, dtype='uint8', rasterized=False, color='grey') + def test_single(self): + + s_idx = theano.tensor.iscalar() + + for dtype in ('uint8', 'float64', 'float32'): + x, y = cifar10(s_idx, split='train', dtype=dtype, rasterized=False, color='grey') + assert x.dtype == dtype + + def test_shape_range(self): + """Test that the image numbers come out in the right range for various dtypes""" + s_idx = theano.tensor.iscalar() + + #uint8 not-rasterized grey + x, y = cifar10(s_idx, split='train', dtype='uint8', rasterized=False, color='grey') + f = pfunc([s_idx], [x,y]) + xval, yval = f(0) + assert str(xval.dtype) == 'uint8' + assert xval.min() >= 0 + assert xval.max() < 256 + assert xval.max() > 1 + assert xval.shape == (32,32) + + #uint8 not-rasterized rgb + x, y = cifar10(s_idx, split='train', dtype='uint8', rasterized=False, color='rgb') + f = pfunc([s_idx], [x,y]) + xval, yval = f(0) + assert str(xval.dtype) == 'uint8' + assert xval.min() >= 0 + assert xval.max() < 256 + assert xval.max() > 1 + assert xval.shape == (32,32, 3) + + #uint8 rasterized grey + x, y = cifar10(s_idx, split='train', dtype='uint8', rasterized=True, color='grey') + f = pfunc([s_idx], [x,y]) + xval, yval = f(0) + assert str(xval.dtype) == 'uint8' + assert xval.min() >= 0 + assert xval.max() < 256 + assert xval.max() > 1 + assert xval.shape == (1024,) + + #uint8 rasterized rgb + x, y = cifar10(s_idx, split='train', dtype='uint8', rasterized=True, color='rgb') + f = pfunc([s_idx], [x,y]) + xval, yval = f(0) + assert str(xval.dtype) == 'uint8' + assert xval.min() >= 0 + assert xval.max() < 256 + assert xval.max() > 1 + assert xval.shape == (1024, 3) + + # ranges are handled independently from shapes, so I'll consider the shapes have been + # tested above, and now I just look at ranges for floating-point dtypes - f = pfunc([s_idx], [x,y]) - i = 0 - while i < 900000: - try: - f(i) - except IndexError: - break - i += 1 - assert i == (goodlen / bsize) # when goodlen % bsize, we should skip the extra bit + #float32 + x, y = cifar10(s_idx, split='train', dtype='float32', rasterized=False, color='grey') + f = pfunc([s_idx], [x,y]) + xval, yval = f(0) + assert str(xval.dtype) == 'float32' + assert xval.min() >= 0.0 + assert xval.max() <= 1.0 + assert xval.max() > 0.01 + assert xval.shape == (32,32) + + #float64 + x, y = cifar10(s_idx + range(5), split='train', dtype='float64', rasterized=True, color='rgb') + f = pfunc([s_idx], [x,y]) + xval, yval = f(0) + assert str(xval.dtype) == 'float64' + assert xval.min() >= 0.0 + assert xval.max() <= 1.0 + assert xval.max() > 0.01 + assert xval.shape == (5, 1024, 3) + + def test_split_different(self): + s_idx = theano.tensor.iscalar() + x, y = cifar10(s_idx, split='train', dtype='uint8', rasterized=False, color='grey') + f = pfunc([s_idx], [x,y]) + train_xval, train_yval = f(0) + + x, y = cifar10(s_idx, split='valid', dtype='uint8', rasterized=False, color='grey') + f = pfunc([s_idx], [x,y]) + valid_xval, valid_yval = f(0) + x, y = cifar10(s_idx, split='test', dtype='uint8', rasterized=False, color='grey') + f = pfunc([s_idx], [x,y]) + test_xval, test_yval = f(0) + + assert not numpy.all(train_xval == valid_xval) + assert not numpy.all(train_xval == test_xval) + assert not numpy.all(valid_xval == test_xval) + + + def test_split_length(self): + """test that each split has the correct length""" + s_idx = theano.tensor.iscalar() + for bsize in [1, 3, 5]: + for (split, goodlen) in [('train', 40000), ('valid', 10000), ('test', 10000)]: + if bsize == 1: + x, y = cifar10(s_idx, split=split, dtype='uint8', rasterized=False, color='grey') + else: + x, y = cifar10(s_idx*bsize + range(bsize), split=split, dtype='uint8', rasterized=False, color='grey') + + f = pfunc([s_idx], [x,y]) + i = 0 + while i < 900000: + try: + f(i) + except IndexError: + break + i += 1 + assert i == (goodlen / bsize) # when goodlen % bsize, we should skip the extra bit +