changeset 649:c433b9cf9d09

merge
author James Bergstra <bergstrj@iro.umontreal.ca>
date Wed, 04 Feb 2009 15:56:20 -0500
parents 4a7d413c3425 (diff) fa6c399dc292 (current diff)
children 83e8fe9b1c82
files bin/dbdict-query bin/dbdict-run bin/dbdict-run-job pylearn/algorithms/logistic_regression.py pylearn/dbdict/__init__.py pylearn/dbdict/api0.py pylearn/dbdict/crap.py pylearn/dbdict/dbdict_run.py pylearn/dbdict/dbdict_run_sql.py pylearn/dbdict/dconfig.py pylearn/dbdict/design.txt pylearn/dbdict/experiment.py pylearn/dbdict/newstuff.py pylearn/dbdict/sample_create_jobs.py pylearn/dbdict/scratch.py pylearn/dbdict/sql.py pylearn/dbdict/sql_commands.py pylearn/dbdict/test_api0.py pylearn/dbdict/tests/test_experiment.py pylearn/dbdict/tools.py
diffstat 2 files changed, 84 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/algorithms/logistic_regression.py	Tue Feb 03 16:47:40 2009 -0500
+++ b/pylearn/algorithms/logistic_regression.py	Wed Feb 04 15:56:20 2009 -0500
@@ -98,6 +98,8 @@
 #FIX : Guillaume suggested a convention: plugin handlers (dataset_factory, minimizer_factory,
 #      etc.) should never provide default arguments for parameters, and accept **kwargs to catch
 #      irrelevant parameters.
+#SOLUTION: the jobman deals in nested dictionaries.  This means that there is no [dumb] reason that
+#          irrelevant arguments should be passed at all.
 class _fit_logreg_defaults(object):
     minimizer_algo = 'dummy'
     #minimizer_lr = 0.001
@@ -107,9 +109,6 @@
     batchsize = 8
     verbose = 1
 
-# consider pre-importing each file in algorithms, datasets (possibly with try/catch around each
-# import so that this import failure is ignored)
-
 def fit_logistic_regression_online(state, channel=lambda *args, **kwargs:None):
     #use stochastic gradient descent
     state.use_defaults(_fit_logreg_defaults)
@@ -193,3 +192,62 @@
                                         updates = dict((p, p - self.lr * g) for p, g in zip(self.params, gparams)))
 
 
+class LogReg_New(module.FancyModule):
+    """A symbolic module for performing multi-class logistic regression."""
+
+    params = property(
+            lambda self: [p for p in [self.w, self.b] if p.owner is None],
+            doc="WRITEME"
+            )
+
+    def __init__(self, n_in=None, n_out=None, w=None, b=None):
+        super(LogRegNew, self).__init__() #boilerplate
+
+        self.n_in = n_in
+        self.n_out = n_out
+
+        self.w = w if w is not None else module.Member(T.dmatrix())
+        self.b = b if b is not None else module.Member(T.dvector())
+
+    def l1(self):
+        return abs(self.w).sum()
+
+    def l2(self):
+        return (self.w**2).sum()
+
+    def activation(self, input):
+        return T.dot(self.input, self.w) + self.b
+
+    def softmax(self, input):
+        return nnet.softmax(self.activation(input))
+
+    def argmax(self, input):
+        return T.max_and_argmax(self.linear_output(input))[1]
+
+    def xent(self, input, target):
+        """The cross-entropy between the prediction from `input`, and the true `target`.
+
+        This function returns a symbolic vector, with the cross-entropy for each row in
+        `input`.  
+        
+        Hint: To sum these costs into a scalar value, use "xent(input, target).sum()"
+        """
+        return target * T.log(self.softmax(input))
+
+    def errors(self, input, target):
+        """The zero-one error of the prediction from `input`, with respect to the true `target`.
+
+        This function returns a symbolic vector, with the incorrectness of each prediction
+        (made row-wise from `input`).
+        
+        Hint: Count errors with "errors(input, target).sum()", and get the error-rate with
+        "errors(input, target).mean()"
+
+        """
+        return T.neq(self.argmax(input), self.target)
+
+    def _instance_initialize(self, obj):
+        obj.w = N.zeros((self.n_in, self.n_out))
+        obj.b = N.zeros(self.n_out)
+        obj.__pp_hide__ = ['params']
+
--- a/pylearn/datasets/flickr.py	Tue Feb 03 16:47:40 2009 -0500
+++ b/pylearn/datasets/flickr.py	Wed Feb 04 15:56:20 2009 -0500
@@ -26,6 +26,10 @@
     valid = filetensor.read(open(os.path.join(root, path_valid_10class)))
     test = filetensor.read(open(os.path.join(root, path_test_10class)))
 
+    assert train.shape[1] == 75*75 +1
+    assert valid.shape[1] == 75*75 +1
+    assert test.shape[1] == 75*75 +1
+
     rval = Dataset()
 
     rval.train = Dataset.Obj(
@@ -46,3 +50,22 @@
 def translations_10class():
     raise NotImplementedError('TODO')
 
+
+def render_a_few_images(n=10, prefix='flickr_img', suffix='png'):
+    #TODO: document this and move it to a more common 
+    #      place where other datasets can use it
+    from PIL import Image
+    root = os.path.join(data_root(), 'flickr')
+    valid = filetensor.read(open(os.path.join(root, path_valid_10class)))
+    assert valid.shape == (1000,75*75+1)
+    for i in xrange(n):
+        pixelarray = valid[i,0:-1].reshape((75,75)).T
+        assert numpy.all(pixelarray >= 0)
+        assert numpy.all(pixelarray <= 1)
+
+        pixel_uint8 = numpy.asarray( pixelarray * 255.0, dtype='uint8')
+        im = Image.frombuffer('L', pixel_uint8.shape, pixel_uint8.data, 'raw', 'L', 0, 1)
+        im.save(prefix + str(i) + '.' + suffix)
+        
+
+