Mercurial > pylearn

import unittest
import numpy
import theano
from theano.compile import pfunc, shared
from theano import tensor

from pylearn.dataset_ops.cifar10 import cifar10, forget

class TestCifar10(unittest.TestCase):

    def setUp(self):
        pass

    def tearDown(self):
        forget()


    def test_single(self):

        s_idx = theano.tensor.iscalar()

        for dtype in ('uint8', 'float64', 'float32'):
            x, y = cifar10(s_idx, split='train', dtype=dtype, rasterized=False, color='grey')
            assert x.dtype == dtype

    def test_shape_range(self):
        """Test that the image numbers come out in the right range for various dtypes"""
        s_idx = theano.tensor.iscalar()

        #uint8 not-rasterized grey
        x, y = cifar10(s_idx, split='train', dtype='uint8', rasterized=False, color='grey')
        f = pfunc([s_idx], [x,y])
        xval, yval = f(0)
        assert str(xval.dtype) == 'uint8'
        assert xval.min() >= 0
        assert xval.max() < 256
        assert xval.max() > 1
        assert xval.shape == (32,32)

        #uint8 not-rasterized rgb
        x, y = cifar10(s_idx, split='train', dtype='uint8', rasterized=False, color='rgb')
        f = pfunc([s_idx], [x,y])
        xval, yval = f(0)
        assert str(xval.dtype) == 'uint8'
        assert xval.min() >= 0
        assert xval.max() < 256
        assert xval.max() > 1
        assert xval.shape == (32,32, 3)

        #uint8 rasterized grey
        x, y = cifar10(s_idx, split='train', dtype='uint8', rasterized=True, color='grey')
        f = pfunc([s_idx], [x,y])
        xval, yval = f(0)
        assert str(xval.dtype) == 'uint8'
        assert xval.min() >= 0
        assert xval.max() < 256
        assert xval.max() > 1
        assert xval.shape == (1024,)

        #uint8 rasterized rgb
        x, y = cifar10(s_idx, split='train', dtype='uint8', rasterized=True, color='rgb')
        f = pfunc([s_idx], [x,y])
        xval, yval = f(0)
        assert str(xval.dtype) == 'uint8'
        assert xval.min() >= 0
        assert xval.max() < 256
        assert xval.max() > 1
        assert xval.shape == (1024, 3)

        # ranges are handled independently from shapes, so I'll consider the shapes have been
        # tested above, and now I just look at ranges for floating-point dtypes

        #float32
        x, y = cifar10(s_idx, split='train', dtype='float32', rasterized=False, color='grey')
        f = pfunc([s_idx], [x,y])
        xval, yval = f(0)
        assert str(xval.dtype) == 'float32'
        assert xval.min() >= 0.0
        assert xval.max() <= 1.0
        assert xval.max() > 0.01
        assert xval.shape == (32,32)

        #float64
        x, y = cifar10(s_idx + range(5), split='train', dtype='float64', rasterized=True, color='rgb')
        f = pfunc([s_idx], [x,y])
        xval, yval = f(0)
        assert str(xval.dtype) == 'float64'
        assert xval.min() >= 0.0
        assert xval.max() <= 1.0
        assert xval.max() > 0.01
        assert xval.shape == (5, 1024, 3)

    def test_split_different(self):
        s_idx = theano.tensor.iscalar()
        x, y = cifar10(s_idx, split='train', dtype='uint8', rasterized=False, color='grey')
        f = pfunc([s_idx], [x,y])
        train_xval, train_yval = f(0)

        x, y = cifar10(s_idx, split='valid', dtype='uint8', rasterized=False, color='grey')
        f = pfunc([s_idx], [x,y])
        valid_xval, valid_yval = f(0)

        x, y = cifar10(s_idx, split='test', dtype='uint8', rasterized=False, color='grey')
        f = pfunc([s_idx], [x,y])
        test_xval, test_yval = f(0)

        assert not numpy.all(train_xval == valid_xval)
        assert not numpy.all(train_xval == test_xval)
        assert not numpy.all(valid_xval == test_xval)


    def test_split_length(self):
        """test that each split has the correct length"""
        s_idx = theano.tensor.iscalar()
        for bsize in [1, 3, 5]:
            for (split, goodlen) in [('train', 40000), ('valid', 10000), ('test', 10000)]:
                if bsize == 1:
                    x, y = cifar10(s_idx, split=split, dtype='uint8', rasterized=False, color='grey')
                else:
                    x, y = cifar10(s_idx*bsize + range(bsize), split=split, dtype='uint8', rasterized=False, color='grey')

                f = pfunc([s_idx], [x,y])
                i = 0
                while i < 900000:
                    try:
                        f(i)
                    except IndexError:
                        break
                    i += 1
                assert i == (goodlen / bsize) # when goodlen % bsize, we should skip the extra bit
author	Frederic Bastien <nouiz@nouiz.org>
date	Mon, 09 Sep 2013 10:08:05 -0400
parents	912be602c3ac
children