view stat_ops.py @ 431:0f8c81b0776d

Adding file make_test_datasets to host simple data-generating processes to create artificial datasets meant to test various learning algorithms.
author Yoshua Bengio <bengioy@iro.umontreal.ca>
date Tue, 29 Jul 2008 10:19:25 -0400
parents fe57b96f33d4
children
line wrap: on
line source


import theano
from theano import gof
from theano import tensor
import numpy


class ExampleWiseMean(gof.Op):
    
    def __init__(self):
        self.destroy_map = {0: [1, 2]}

    def make_node(self, x):
        return gof.Apply(self,
                         [x, tensor.value(float('nan')), tensor.value(0)],
                         [tensor.Tensor(dtype = 'float64',
                                        broadcastable = x.type.broadcastable)()])

    def perform(self, node, (x, sum, n), (out,)):
        if numpy.isnan(sum).any():
            sum.resize(x.shape, refcheck=0)
            sum[:] = x
        else:
            sum += x
        n += 1
        out[0] = sum / n

    def c_code(self, name, node, (x, sum, n), (out, ), sub):
        return """
        PyObject* multi;
        int nelems;
        if (isnan(((double*)(%(sum)s->data))[0])) {
            PyArray_Dims dims;
            dims.len = %(x)s->nd;
            dims.ptr = %(x)s->dimensions;
            PyArray_Resize(%(sum)s, &dims, 0, PyArray_CORDER);
            multi = PyArray_MultiIterNew(2, %(sum)s, %(x)s);
            nelems = PyArray_SIZE(%(sum)s);
            while (nelems--) {
                // Copy %(x)s in %(sum)s
                *(double*)PyArray_MultiIter_DATA(multi, 0) = *(double*)PyArray_MultiIter_DATA(multi, 1);
                PyArray_MultiIter_NEXT(multi);
            }
        }
        else {
            // Add some error checking on the size of x
            multi = PyArray_MultiIterNew(2, %(sum)s, %(x)s);
            nelems = PyArray_SIZE(%(sum)s);
            while (nelems--) {
                // Add %(x)s to %(sum)s
                *(double*)PyArray_MultiIter_DATA(multi, 0) += *(double*)PyArray_MultiIter_DATA(multi, 1);
                PyArray_MultiIter_NEXT(multi);
            }
        }
        ((npy_int64*)(%(n)s->data))[0]++;
        int n = ((npy_int64*)(%(n)s->data))[0];
        if (%(out)s == NULL) {
            %(out)s = (PyArrayObject*)PyArray_EMPTY(%(sum)s->nd, %(sum)s->dimensions, NPY_FLOAT64, 0);
        }
        multi = PyArray_MultiIterNew(2, %(sum)s, %(out)s);
        nelems = PyArray_SIZE(%(sum)s);
        while (nelems--) {
            // %(out)s <- %(sum)s / %(n)s
            *(double*)PyArray_MultiIter_DATA(multi, 1) = *(double*)PyArray_MultiIter_DATA(multi, 0) / n;
            PyArray_MultiIter_NEXT(multi);
        }        
        """ % dict(locals(), **sub)



if __name__ == '__main__':
    
    vectors = numpy.random.RandomState(666).rand(10, 2)

    x = tensor.dvector()
    e = ExampleWiseMean()(x)

    # f = theano.function([x], [e], linker = 'py')

    # for i, v in enumerate(vectors):
    #     print v, "->", f(v), numpy.mean(vectors[:i+1], axis=0)

    # print

    f = theano.function([x], [e], linker = 'c|py')

    for i, v in enumerate(vectors):
        print v, "->", f(v), numpy.mean(vectors[:i+1], axis=0)