# HG changeset patch # User James Bergstra # Date 1228342458 18000 # Node ID ecbad22bd2f54ca1951e386d4ade0dee96ca4739 # Parent 96221aa02fcb130eb355d4b0f0cba580d35ff221# Parent c6563c62998477690d87442b8c78a0dd9b56f38b merged from pylearn_trunk diff -r 96221aa02fcb -r ecbad22bd2f5 pylearn/algorithms/kernel_regression.py --- a/pylearn/algorithms/kernel_regression.py Mon Dec 01 16:16:21 2008 -0500 +++ b/pylearn/algorithms/kernel_regression.py Wed Dec 03 17:14:18 2008 -0500 @@ -4,16 +4,16 @@ from pylearn.learner import OfflineLearningAlgorithm from theano import tensor as T -from nnet_ops import prepend_1_to_each_row +from theano.tensor.nnet import prepend_1_to_each_row from theano.scalar import as_scalar from common.autoname import AutoName import theano import numpy # map a N-vector to a 1xN matrix -row_vector = theano.elemwise.DimShuffle((False,),['x',0]) +row_vector = theano.tensor.DimShuffle((False,),['x',0]) # map a N-vector to a Nx1 matrix -col_vector = theano.elemwise.DimShuffle((False,),[0,'x']) +col_vector = theano.tensor.DimShuffle((False,),[0,'x']) class KernelRegression(OfflineLearningAlgorithm): """ diff -r 96221aa02fcb -r ecbad22bd2f5 pylearn/algorithms/linear_regression.py --- a/pylearn/algorithms/linear_regression.py Mon Dec 01 16:16:21 2008 -0500 +++ b/pylearn/algorithms/linear_regression.py Wed Dec 03 17:14:18 2008 -0500 @@ -6,7 +6,7 @@ from pylearn.learner import OfflineLearningAlgorithm,OnlineLearningAlgorithm from theano import tensor as T -from nnet_ops import prepend_1_to_each_row +from theano.tensor.nnet import prepend_1_to_each_row from theano.scalar import as_scalar from common.autoname import AutoName import theano @@ -96,14 +96,14 @@ __compiled = False @classmethod - def compile(cls,linker='c|py'): + def compile(cls, mode = "FAST_RUN"): if cls.__compiled: return def fn(input_vars,output_vars): - return staticmethod(theano.function(input_vars,output_vars, linker=linker)) + return staticmethod(theano.function(input_vars, output_vars, mode=mode)) - cls.compute_outputs = fn([cls.inputs,cls.theta],[cls.outputs]) - cls.compute_errors = fn([cls.outputs,cls.targets],[cls.squared_errors]) + cls.compute_outputs = fn([cls.inputs,cls.theta],cls.outputs) + cls.compute_errors = fn([cls.outputs,cls.targets],cls.squared_errors) cls.__compiled = True @@ -115,17 +115,17 @@ XtX = T.matrix() # (n_inputs+1) x (n_inputs+1) XtY = T.matrix() # (n_inputs+1) x n_outputs extended_input = prepend_1_to_each_row(P.inputs) - new_XtX = T.add_inplace(XtX,T.dot(extended_input.T,extended_input)) - new_XtY = T.add_inplace(XtY,T.dot(extended_input.T,P.targets)) + new_XtX = T.add(XtX,T.dot(extended_input.T,extended_input)) + new_XtY = T.add(XtY,T.dot(extended_input.T,P.targets)) __compiled = False @classmethod - def compile(cls,linker='c|py'): + def compile(cls, mode="FAST_RUN"): if cls.__compiled: return def fn(input_vars,output_vars): - return staticmethod(theano.function(input_vars,output_vars, linker=linker)) + return staticmethod(theano.function(input_vars, output_vars, mode=mode)) cls.update = fn([cls.XtX,cls.XtY,cls.P.inputs,cls.P.targets],[cls.new_XtX,cls.new_XtY]) diff -r 96221aa02fcb -r ecbad22bd2f5 pylearn/algorithms/logistic_regression.py --- a/pylearn/algorithms/logistic_regression.py Mon Dec 01 16:16:21 2008 -0500 +++ b/pylearn/algorithms/logistic_regression.py Wed Dec 03 17:14:18 2008 -0500 @@ -149,12 +149,12 @@ def __init__(self, input=None, targ=None, w=None, b=None, lr=None, regularize=False): super(LogReg2, self).__init__() #boilerplate - self.input = input if input is not None else T.matrix('input') - self.targ = targ if targ is not None else T.lcol() + self.input = module.Member(input) if input is not None else T.matrix('input') + self.targ = module.Member(targ) if targ is not None else T.lcol() - self.w = w if w is not None else module.Member(T.dmatrix()) - self.b = b if b is not None else module.Member(T.dvector()) - self.lr = lr if lr is not None else module.Member(T.dscalar()) + self.w = module.Member(w) if w is not None else module.Member(T.dmatrix()) + self.b = module.Member(b) if b is not None else module.Member(T.dvector()) + self.lr = module.Member(lr) if lr is not None else module.Member(T.dscalar()) self.params = [p for p in [self.w, self.b] if p.owner is None] diff -r 96221aa02fcb -r ecbad22bd2f5 pylearn/algorithms/sandbox/_test_onehotop.py --- a/pylearn/algorithms/sandbox/_test_onehotop.py Mon Dec 01 16:16:21 2008 -0500 +++ b/pylearn/algorithms/sandbox/_test_onehotop.py Wed Dec 03 17:14:18 2008 -0500 @@ -3,7 +3,7 @@ import unittest from theano import compile from theano import gradient - +from theano import function from theano.tensor import as_tensor import random @@ -14,8 +14,8 @@ x = as_tensor([3, 2, 1]) y = as_tensor(5) o = one_hot(x, y) - y = compile.eval_outputs([o]) - self.failUnless(numpy.all(y == numpy.asarray([[0, 0, 0, 1, 0], [0, 0, 1, 0, 0], [0, 1, 0, 0, 0]]))) + f = function([],o) + self.failUnless(numpy.all(f() == numpy.asarray([[0, 0, 0, 1, 0], [0, 0, 1, 0, 0], [0, 1, 0, 0, 0]]))) if __name__ == '__main__': unittest.main() diff -r 96221aa02fcb -r ecbad22bd2f5 pylearn/algorithms/tests/test_daa.py --- a/pylearn/algorithms/tests/test_daa.py Mon Dec 01 16:16:21 2008 -0500 +++ b/pylearn/algorithms/tests/test_daa.py Wed Dec 03 17:14:18 2008 -0500 @@ -28,7 +28,7 @@ model.local_update[l]([[0, 1, 0, 1]]) model.local_update[l]([[1, 0, 1, 0]]) - for i in range(1): + for i in range(10): model.update([[0, 1, 0, 1]], [[1]]) model.update([[1, 0, 1, 0]], [[0]]) print model.classify([[0, 1, 0, 1]]) @@ -41,23 +41,31 @@ daa = models.Stacker([(models.SigmoidXEDenoisingAA, 'hidden')] * ndaa + [(pylearn.algorithms.logistic_regression.Module_Nclass, 'pred')], regularize = False) - model = daa.make([4, 20, 20, 20, 10], + model = daa.make([4] + [20] * ndaa + [10], lr = 0.01, mode = mode, seed = 10) - model.layers[0].noise_level = 0.3 - model.layers[1].noise_level = 0.3 - model.layers[2].noise_level = 0.3 + for l in range(ndaa): model.layers[l].noise_level = 0.3 - for l in range(3): + instances = [([[0, 1, 0, 1]], [1]), ([[1, 0, 1, 0]], [0])] + + for l in range(ndaa): for i in range(10): - model.local_update[l]([[0, 1, 0, 1]]) - model.local_update[l]([[1, 0, 1, 0]]) + for (input, output) in instances: + model.local_update[l](input) - for i in range(1): - model.update([[0, 1, 0, 1]], [1]) - model.update([[1, 0, 1, 0]], [0]) + for i in range(10): + for (input, output) in instances: +# model.update(input, output) + print "OLD:", + print model.validate(input, output) + oldloss = model.update(input, output) + print oldloss + print "NEW:" + print model.validate(input, output) + print + print model.apply([[0, 1, 0, 1]]) print model.apply([[1, 0, 1, 0]]) diff -r 96221aa02fcb -r ecbad22bd2f5 pylearn/algorithms/tests/test_linear_regression.py --- a/pylearn/algorithms/tests/test_linear_regression.py Mon Dec 01 16:16:21 2008 -0500 +++ b/pylearn/algorithms/tests/test_linear_regression.py Wed Dec 03 17:14:18 2008 -0500 @@ -21,5 +21,18 @@ print 'mse = ',mse if __name__ == '__main__': - unittest.main() - + import sys + + if len(sys.argv)==1: + unittest.main() + else: + assert sys.argv[1]=="--debug" + tests = [] + for arg in sys.argv[2:]: + tests.append(arg) + if tests: + unittest.TestSuite(map(T_DataSet, tests)).debug() + else: + module = __import__("_test_linear_regression") + tests = unittest.TestLoader().loadTestsFromModule(module) + tests.debug() diff -r 96221aa02fcb -r ecbad22bd2f5 pylearn/datasets/embeddings/parameters.py --- a/pylearn/datasets/embeddings/parameters.py Mon Dec 01 16:16:21 2008 -0500 +++ b/pylearn/datasets/embeddings/parameters.py Wed Dec 03 17:14:18 2008 -0500 @@ -1,10 +1,10 @@ """ Locations of the embedding data files. """ -WEIGHTSFILE = "/home/fringant2/lisa/data/word_embeddings.collobert-and-weston/lm-weights.txt" -VOCABFILE = "/home/fringant2/lisa/data/word_embeddings.collobert-and-weston/words.asc" -#WEIGHTSFILE = "/home/joseph/data/word_embeddings.collobert-and-weston/lm-weights.txt" -#VOCABFILE = "/home/joseph/data/word_embeddings.collobert-and-weston/words.asc" +#WEIGHTSFILE = "/home/fringant2/lisa/data/word_embeddings.collobert-and-weston/lm-weights.txt" +#VOCABFILE = "/home/fringant2/lisa/data/word_embeddings.collobert-and-weston/words.asc" +WEIGHTSFILE = "/home/joseph/data/word_embeddings.collobert-and-weston/lm-weights.txt" +VOCABFILE = "/home/joseph/data/word_embeddings.collobert-and-weston/words.asc" NUMBER_OF_WORDS = 30000 DIMENSIONS = 50 UNKNOWN = "UNKNOWN" diff -r 96221aa02fcb -r ecbad22bd2f5 pylearn/datasets/embeddings/process.py --- a/pylearn/datasets/embeddings/process.py Mon Dec 01 16:16:21 2008 -0500 +++ b/pylearn/datasets/embeddings/process.py Wed Dec 03 17:14:18 2008 -0500 @@ -11,6 +11,12 @@ __word_to_embedding = None __read = False +def length(): + """ + @return: The length of embeddings + """ + return len(__word_to_embedding[__words[0]]) + def word_to_embedding(w): read_embeddings() return __word_to_embedding[w] @@ -39,29 +45,21 @@ w = __words[i] __word_to_embedding[w] = l __read = True + for w in __word_to_embedding: assert len(__word_to_embedding[__words[0]]) == len(__word_to_embedding[w]) sys.stderr.write("...done reading %s\n" % WEIGHTSFILE) import re numberre = re.compile("[0-9]") -slashre = re.compile("\\\/") -def preprocess_word(origw): +def preprocess_word(w): """ Convert a word so that it can be embedded directly. Returned the preprocessed sequence. - @note: Preprocessing is appropriate for Penn Treebank style documents. + @note: Perhaps run L{common.penntreebank.preprocess} on the word first. """ read_embeddings() - if origw == "-LRB-": w = "(" - elif origw == "-RRB-": w = ")" - elif origw == "-LCB-": w = "{" - elif origw == "-RCB-": w = "}" - elif origw == "-LSB-": w = "[" - elif origw == "-RSB-": w = "]" - else: - w = origw + if w not in __word_to_embedding: w = string.lower(w) - w = slashre.sub("/", w) w = numberre.sub("NUMBER", w) if w not in __word_to_embedding: # sys.stderr.write("Word not in vocabulary, using %s: %s (original %s)\n" % (UNKNOWN, w, origw)) diff -r 96221aa02fcb -r ecbad22bd2f5 pylearn/datasets/make_test_datasets.py --- a/pylearn/datasets/make_test_datasets.py Mon Dec 01 16:16:21 2008 -0500 +++ b/pylearn/datasets/make_test_datasets.py Wed Dec 03 17:14:18 2008 -0500 @@ -1,4 +1,4 @@ -import dataset +from dataset import ArrayDataSet from shapeset.dset import Polygons from linear_regression import linear_predictor from kernel_regression import kernel_predictor @@ -110,6 +110,7 @@ # testset = ArrayDataSet(inputs[n_examples/2:],{'input':slice(0,n_inputs)}) | \ # ArrayDataSet(targets[n_examples/2:],{'target':slice(0,n_targets)}) data = hstack((inputs,targets)) + trainset = ArrayDataSet(data[0:n_train], {'input':slice(0,n_inputs),'target':slice(n_inputs,n_inputs+n_targets)}) testset = ArrayDataSet(data[n_train:], diff -r 96221aa02fcb -r ecbad22bd2f5 pylearn/old_dataset/_test_dataset.py --- a/pylearn/old_dataset/_test_dataset.py Mon Dec 01 16:16:21 2008 -0500 +++ b/pylearn/old_dataset/_test_dataset.py Wed Dec 03 17:14:18 2008 -0500 @@ -315,6 +315,7 @@ #ds[i] returns the (i+1)-th example of the dataset. ds2=ds[5] assert isinstance(ds2,Example) + test_ds(ds,ds2,[5]) assert have_raised("var['ds']["+str(len(ds))+"]",ds=ds) # index not defined assert not have_raised("var['ds']["+str(len(ds)-1)+"]",ds=ds) del ds2 diff -r 96221aa02fcb -r ecbad22bd2f5 squashfn.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/squashfn.py Wed Dec 03 17:14:18 2008 -0500 @@ -0,0 +1,14 @@ + +def squashfn(str): + if str == "sigmoid": + import theano.tensor.nnet as nnet + return nnet.sigmoid + elif str == "tanh": + import theano.tensor as t + return t.tanh + elif str == "softsign": + from theano.sandbox.softsign import softsign + return softsign + else: assert 0 + +