changeset 326:fe57b96f33d4

made ExampleWiseMean Op
author Olivier Breuleux <breuleuo@iro.umontreal.ca>
date Fri, 13 Jun 2008 15:26:35 -0400
parents 7a734dba4cac
children 2480024bf401
files learner.py stat_ops.py statscollector.py
diffstat 3 files changed, 94 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/learner.py	Fri Jun 13 11:14:59 2008 -0400
+++ b/learner.py	Fri Jun 13 15:26:35 2008 -0400
@@ -17,7 +17,7 @@
     in machine learning:  an offline learning algorithm is applied
     to a training dataset, 
 
-       model = learning_algorithm(training_set)
+        model = learning_algorithm(training_set)
         
     resulting in a fully trained model that can be applied to another dataset
     in order to perform some desired computation:
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/stat_ops.py	Fri Jun 13 15:26:35 2008 -0400
@@ -0,0 +1,92 @@
+
+import theano
+from theano import gof
+from theano import tensor
+import numpy
+
+
+class ExampleWiseMean(gof.Op):
+    
+    def __init__(self):
+        self.destroy_map = {0: [1, 2]}
+
+    def make_node(self, x):
+        return gof.Apply(self,
+                         [x, tensor.value(float('nan')), tensor.value(0)],
+                         [tensor.Tensor(dtype = 'float64',
+                                        broadcastable = x.type.broadcastable)()])
+
+    def perform(self, node, (x, sum, n), (out,)):
+        if numpy.isnan(sum).any():
+            sum.resize(x.shape, refcheck=0)
+            sum[:] = x
+        else:
+            sum += x
+        n += 1
+        out[0] = sum / n
+
+    def c_code(self, name, node, (x, sum, n), (out, ), sub):
+        return """
+        PyObject* multi;
+        int nelems;
+        if (isnan(((double*)(%(sum)s->data))[0])) {
+            PyArray_Dims dims;
+            dims.len = %(x)s->nd;
+            dims.ptr = %(x)s->dimensions;
+            PyArray_Resize(%(sum)s, &dims, 0, PyArray_CORDER);
+            multi = PyArray_MultiIterNew(2, %(sum)s, %(x)s);
+            nelems = PyArray_SIZE(%(sum)s);
+            while (nelems--) {
+                // Copy %(x)s in %(sum)s
+                *(double*)PyArray_MultiIter_DATA(multi, 0) = *(double*)PyArray_MultiIter_DATA(multi, 1);
+                PyArray_MultiIter_NEXT(multi);
+            }
+        }
+        else {
+            // Add some error checking on the size of x
+            multi = PyArray_MultiIterNew(2, %(sum)s, %(x)s);
+            nelems = PyArray_SIZE(%(sum)s);
+            while (nelems--) {
+                // Add %(x)s to %(sum)s
+                *(double*)PyArray_MultiIter_DATA(multi, 0) += *(double*)PyArray_MultiIter_DATA(multi, 1);
+                PyArray_MultiIter_NEXT(multi);
+            }
+        }
+        ((npy_int64*)(%(n)s->data))[0]++;
+        int n = ((npy_int64*)(%(n)s->data))[0];
+        if (%(out)s == NULL) {
+            %(out)s = (PyArrayObject*)PyArray_EMPTY(%(sum)s->nd, %(sum)s->dimensions, NPY_FLOAT64, 0);
+        }
+        multi = PyArray_MultiIterNew(2, %(sum)s, %(out)s);
+        nelems = PyArray_SIZE(%(sum)s);
+        while (nelems--) {
+            // %(out)s <- %(sum)s / %(n)s
+            *(double*)PyArray_MultiIter_DATA(multi, 1) = *(double*)PyArray_MultiIter_DATA(multi, 0) / n;
+            PyArray_MultiIter_NEXT(multi);
+        }        
+        """ % dict(locals(), **sub)
+
+
+
+if __name__ == '__main__':
+    
+    vectors = numpy.random.RandomState(666).rand(10, 2)
+
+    x = tensor.dvector()
+    e = ExampleWiseMean()(x)
+
+    # f = theano.function([x], [e], linker = 'py')
+
+    # for i, v in enumerate(vectors):
+    #     print v, "->", f(v), numpy.mean(vectors[:i+1], axis=0)
+
+    # print
+
+    f = theano.function([x], [e], linker = 'c|py')
+
+    for i, v in enumerate(vectors):
+        print v, "->", f(v), numpy.mean(vectors[:i+1], axis=0)
+
+
+
+
--- a/statscollector.py	Fri Jun 13 11:14:59 2008 -0400
+++ b/statscollector.py	Fri Jun 13 15:26:35 2008 -0400
@@ -3,7 +3,7 @@
 
 #    def my_stats((residue,nll),(regularizer)):
 #            mse=examplewise_mean(square_norm(residue))
-# 	         training_loss=regularizer+examplewise_sum(nll)
+#            training_loss=regularizer+examplewise_sum(nll)
 #            set_names(locals())
 #            return ((residue,nll),(regularizer),(),(mse,training_loss))
 #    my_stats_collector = make_stats_collector(my_stats)