changeset 604:52a99d83f06d

merge
author Olivier Breuleux <breuleuo@iro.umontreal.ca>
date Thu, 15 Jan 2009 17:12:36 -0500
parents 84e3af9d0110 (current diff) 28f7dc848efc (diff)
children 8f40262297cf 7cee8c7f0449
files pylearn/dbdict/newstuff.py
diffstat 7 files changed, 90 insertions(+), 43 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/algorithms/__init__.py	Thu Jan 15 17:07:49 2009 -0500
+++ b/pylearn/algorithms/__init__.py	Thu Jan 15 17:12:36 2009 -0500
@@ -1,5 +0,0 @@
-
-from .minimizer import make_minimizer, minimizer_factory
-from .stopper import make_stopper, stopper_factory
-from .stacker import Stacker
-from .regressor import BinRegressor
--- a/pylearn/algorithms/logistic_regression.py	Thu Jan 15 17:07:49 2009 -0500
+++ b/pylearn/algorithms/logistic_regression.py	Thu Jan 15 17:12:36 2009 -0500
@@ -8,11 +8,20 @@
 
 import numpy as N
 
-from ..datasets import make_dataset
-from .minimizer import make_minimizer
-from .stopper import make_stopper
+class LogRegN(module.FancyModule):
+    """
+    A symbolic module for performing N-class logistic regression.
+
+    Notable variables
+    -----------------
 
-class LogRegN(module.FancyModule):
+    self.input
+    self.target 
+    self.softmax
+    self.argmax
+    self.regularized_cost
+    self.unregularized_cost
+    """
 
     def __init__(self, 
             n_in=None, n_out=None,
@@ -94,8 +103,6 @@
     batchsize = 8
     verbose = 1
 
-from ..datasets import MNIST
-import sgd #TODO:  necessary to add it to factory list
 # consider pre-importing each file in algorithms, datasets (possibly with try/catch around each
 # import so that this import failure is ignored)
 
@@ -103,7 +110,7 @@
     #use stochastic gradient descent
     state.use_defaults(_fit_logreg_defaults)
 
-    dataset = make_dataset(**state.subdict(prefix='dataset_'))
+    dataset = make(state.dataset)
     train = dataset.train
     valid = dataset.valid
     test = dataset.test
--- a/pylearn/algorithms/sgd.py	Thu Jan 15 17:07:49 2009 -0500
+++ b/pylearn/algorithms/sgd.py	Thu Jan 15 17:12:36 2009 -0500
@@ -4,8 +4,6 @@
 from theano.compile import module
 from theano import tensor as T
 
-from minimizer import minimizer_factory
-
 class StochasticGradientDescent(module.FancyModule):
     """Fixed stepsize gradient descent"""
     def __init__(self, args, cost, params, gradients=None, stepsize=None):
@@ -40,8 +38,6 @@
     def _instance_initialize(self, obj):
         pass
 
-
-@minimizer_factory('sgd')
 def sgd_minimizer(stepsize=None, **args):
     def m(i,c,p,g=None):
         return StochasticGradientDescent(i, c, p, stepsize=stepsize, **args)
--- a/pylearn/algorithms/stopper.py	Thu Jan 15 17:07:49 2009 -0500
+++ b/pylearn/algorithms/stopper.py	Thu Jan 15 17:12:36 2009 -0500
@@ -57,21 +57,6 @@
                     best = (save(), stp.iter, stp.score)
         return best
 
-_stoppers = {}
-
-def stopper_factory(algo):
-    def decorator(fn):
-        if algo in _stoppers:
-            raise Exception('stopper in use', algo)
-        else:
-            _stoppers[algo] = fn
-        return fn
-    return decorator
-
-def make_stopper(algo, **kwargs):
-    return _stoppers[algo](**kwargs)
-
-
 class ICML08Stopper(Stopper):
     @staticmethod
     def icml08(ntrain, batchsize):
@@ -131,13 +116,10 @@
     #TODO: could optimize next() function. Most of what's in ICML08Stopper.next()
     #is not necessary
 
-
-@stopper_factory('icml08')
-def icml08_stopper(i_wait, v_int, min_improvement, patience, hard_limit):
+def geometric_patience(i_wait, v_int, min_improvement, patience, hard_limit):
     return ICML08Stopper(i_wait, v_int, min_improvement, patience, hard_limit)
 
-@stopper_factory('nstages')
-def nstages_stopper(hard_limit, v_int):
+def nstages(hard_limit, v_int):
     return ICML08Stopper(hard_limit, v_int, 1.0, 1.0, hard_limit)
 
 
--- a/pylearn/datasets/embeddings/parameters.py	Thu Jan 15 17:07:49 2009 -0500
+++ b/pylearn/datasets/embeddings/parameters.py	Thu Jan 15 17:12:36 2009 -0500
@@ -1,10 +1,10 @@
 """
 Locations of the embedding data files.
 """
-#WEIGHTSFILE     = "/home/fringant2/lisa/data/word_embeddings.collobert-and-weston/lm-weights.txt"
-#VOCABFILE       = "/home/fringant2/lisa/data/word_embeddings.collobert-and-weston/words.asc"
-WEIGHTSFILE     = "/home/joseph/data/word_embeddings.collobert-and-weston/lm-weights.txt"
-VOCABFILE       = "/home/joseph/data/word_embeddings.collobert-and-weston/words.asc"
+WEIGHTSFILE     = "/home/fringant2/lisa/data/word_embeddings.collobert-and-weston/lm-weights.txt"
+VOCABFILE       = "/home/fringant2/lisa/data/word_embeddings.collobert-and-weston/words.asc"
+#WEIGHTSFILE     = "/home/joseph/data/word_embeddings.collobert-and-weston/lm-weights.txt"
+#VOCABFILE       = "/home/joseph/data/word_embeddings.collobert-and-weston/words.asc"
 NUMBER_OF_WORDS = 30000
 DIMENSIONS      = 50
 UNKNOWN         = "UNKNOWN"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/datasets/flickr.py	Thu Jan 15 17:12:36 2009 -0500
@@ -0,0 +1,52 @@
+"""
+Routines to load variations on the Flickr image dataset.
+"""
+from __future__ import absolute_import
+
+import os
+import numpy
+
+from ..io import filetensor
+if 0:
+    from .config import data_root
+else:
+    def data_root():
+        return '/u/lisa/db/flickr/filetensor'
+from .dataset import Dataset
+
+
+path_test_10class ='flickr_10classes_test.ft'
+
+path_train_10class = 'flickr_10classes_train.ft'
+
+path_valid_10class = 'flickr_10classes_valid.ft'
+
+def basic_10class(folder = None):
+    """Return the basic flickr image classification problem.
+    The images are 75x75, and there are 7500 training examples.
+    """
+    root = data_root() if folder is None else folder
+    train = filetensor.read(open(os.path.join(root, path_train_10class)))
+    valid = filetensor.read(open(os.path.join(root, path_valid_10class)))
+    test = filetensor.read(open(os.path.join(root, path_test_10class)))
+
+    rval = Dataset()
+
+    rval.train = Dataset.Obj(
+            x=train[:, 0:-1],
+            y=numpy.asarray(train[:, -1], dtype='int64'))
+    rval.valid = Dataset.Obj(
+            x=valid[:, 0:-1],
+            y=numpy.asarray(valid[:, -1], dtype='int64'))
+    rval.test = Dataset.Obj(
+            x=test[:, 0:-1],
+            y=numpy.asarray(test[:, -1], dtype='int64'))
+
+    rval.n_classes = 10
+    rval.img_shape = (75,75)
+
+    return rval
+
+def translations_10class():
+    raise NotImplementedError('TODO')
+
--- a/pylearn/dbdict/newstuff.py	Thu Jan 15 17:07:49 2009 -0500
+++ b/pylearn/dbdict/newstuff.py	Thu Jan 15 17:12:36 2009 -0500
@@ -26,11 +26,26 @@
 ### resolve
 ################################################################################
 
-def resolve(name):
+def resolve(name, try_import=True):
+    """
+    Resolve a string of the form X.Y...Z to a python object by repeatedly using getattr, and
+    __import__ to introspect objects (in this case X, then Y, etc. until finally Z is loaded).
+
+    """
     symbols = name.split('.')
     builder = __import__(symbols[0])
-    for sym in symbols[1:]:
-        builder = getattr(builder, sym)
+    try:
+        for sym in symbols[1:]:
+            try:
+                builder = getattr(builder, sym)
+            except AttributeError, e:
+                if try_import:
+                    __import__(builder.__name__, fromlist=[sym])
+                    builder = getattr(builder, sym)
+                else:
+                    raise e
+    except (AttributeError, ImportError), e:
+        raise type(e)('Failed to resolve compound symbol %s' % name, e)
     return builder
 
 ################################################################################