view external/wrap_libsvm.py @ 530:844bad76459c

replaced the use of removed function compile.eval_outputs with theano.function.
author Frederic Bastien <bastienf@iro.umontreal.ca>
date Mon, 17 Nov 2008 14:18:28 -0500
parents 716c04512dbe
children
line wrap: on
line source

"""Run an experiment using libsvm.
"""
import numpy
from ..datasets import dataset_from_descr

# libsvm currently has no python installation instructions/convention.
#
# This module uses a specific convention for libsvm's installation.
# I base this on installing libsvm-2.88.
# To install libsvm's python module, do three things:
# 1. Build libsvm (run make in both the root dir and the python subdir).
# 2. touch a '__init__.py' file in the python subdir
# 3. add a symbolic link to a PYTHONPATH location that looks like this:
#    libsvm -> <your root path>/libsvm-2.88/python/
#
# That is the sort of thing that this module expects from 'import libsvm'

import libsvm

def score_01(x, y, model):
    assert len(x) == len(y)
    size = len(x)
    errors = 0
    for i in range(size):
        prediction = model.predict(x[i])
        #probability = model.predict_probability
        if (y[i] != prediction):
            errors = errors + 1
    return float(errors)/size

#this is the dbdict experiment interface... if you happen to use dbdict
class State(object):
    #TODO: parametrize to get all the kernel types, not hardcode for RBF
    dataset = 'MNIST_1k'
    C = 10.0
    kernel = 'RBF'
    # rel_gamma is related to the procedure Jerome used. He mentioned why in
    # quadratic_neurons/neuropaper/draft3.pdf.
    rel_gamma = 1.0   

    def __init__(self, **kwargs):
        for k, v in kwargs:
            setattr(self, k, type(getattr(self, k))(v))


def dbdict_run_svm_experiment(state, channel=lambda *args, **kwargs:None):
    """Parameters are described in state, and returned in state.

    :param state: object instance to store parameters and return values
    :param channel: not used

    :returns: None

    This is the kind of function that dbdict-run can use.

    """
    ((train_x, train_y), (valid_x, valid_y), (test_x, test_y)) = dataset_from_descr(state.dataset)

    #libsvm needs stuff in int32 on a 32bit machine
    #TODO: test this on a 64bit machine
    train_y = numpy.asarray(train_y, dtype='int32')
    valid_y = numpy.asarray(valid_y, dtype='int32')
    test_y = numpy.asarray(test_y, dtype='int32')
    problem = svm.svm_problem(train_y, train_x);

    gamma0 = 0.5 / numpy.sum(numpy.var(train_x, axis=0))

    param = svm.svm_parameter(C=state.C,
            kernel_type=getattr(svm, state.kernel),
            gamma=state.rel_gamma * gamma0)

    model = svm.svm_model(problem, param) #this is the expensive part

    state.train_01 = score_01(train_x, train_y, model)
    state.valid_01 = score_01(valid_x, valid_y, model)
    state.test_01 = score_01(test_x, test_y, model)

    state.n_train = len(train_y)
    state.n_valid = len(valid_y)
    state.n_test = len(test_y)

def run_svm_experiment(**kwargs):
    """Python-friendly interface to dbdict_run_svm_experiment

    Parameters are used to construct a `State` instance, which is returned after running
    `dbdict_run_svm_experiment` on it.

    .. code-block:: python
        results = run_svm_experiment(dataset='MNIST_1k', C=100.0, rel_gamma=0.01)
        print results.n_train
        # 1000
        print results.valid_01, results.test_01
        # 0.14, 0.10  #.. or something...

    """
    state = State(**kwargs)
    state_run_svm_experiment(state)
    return state