517
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
1 """Run an experiment using libsvm.
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
2 """
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
3 import numpy
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
4 from ..datasets import dataset_from_descr
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
5
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
6 # libsvm currently has no python installation instructions/convention.
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
7 #
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
8 # This module uses a specific convention for libsvm's installation.
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
9 # I base this on installing libsvm-2.88.
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
10 # To install libsvm's python module, do three things:
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
11 # 1. Build libsvm (run make in both the root dir and the python subdir).
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
12 # 2. touch a '__init__.py' file in the python subdir
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
13 # 3. add a symbolic link to a PYTHONPATH location that looks like this:
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
14 # libsvm -> <your root path>/libsvm-2.88/python/
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
15 #
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
16 # That is the sort of thing that this module expects from 'import libsvm'
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
17
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
18 import libsvm
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
19
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
20 def score_01(x, y, model):
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
21 assert len(x) == len(y)
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
22 size = len(x)
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
23 errors = 0
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
24 for i in range(size):
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
25 prediction = model.predict(x[i])
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
26 #probability = model.predict_probability
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
27 if (y[i] != prediction):
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
28 errors = errors + 1
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
29 return float(errors)/size
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
30
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
31 #this is the dbdict experiment interface... if you happen to use dbdict
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
32 class State(object):
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
33 #TODO: parametrize to get all the kernel types, not hardcode for RBF
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
34 dataset = 'MNIST_1k'
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
35 C = 10.0
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
36 kernel = 'RBF'
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
37 # rel_gamma is related to the procedure Jerome used. He mentioned why in
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
38 # quadratic_neurons/neuropaper/draft3.pdf.
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
39 rel_gamma = 1.0
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
40
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
41 def __init__(self, **kwargs):
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
42 for k, v in kwargs:
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
43 setattr(self, k, type(getattr(self, k))(v))
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
44
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
45
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
46 def dbdict_run_svm_experiment(state, channel=lambda *args, **kwargs:None):
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
47 """Parameters are described in state, and returned in state.
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
48
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
49 :param state: object instance to store parameters and return values
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
50 :param channel: not used
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
51
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
52 :returns: None
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
53
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
54 This is the kind of function that dbdict-run can use.
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
55
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
56 """
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
57 ((train_x, train_y), (valid_x, valid_y), (test_x, test_y)) = dataset_from_descr(state.dataset)
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
58
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
59 #libsvm needs stuff in int32 on a 32bit machine
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
60 #TODO: test this on a 64bit machine
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
61 train_y = numpy.asarray(train_y, dtype='int32')
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
62 valid_y = numpy.asarray(valid_y, dtype='int32')
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
63 test_y = numpy.asarray(test_y, dtype='int32')
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
64 problem = svm.svm_problem(train_y, train_x);
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
65
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
66 gamma0 = 0.5 / numpy.sum(numpy.var(train_x, axis=0))
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
67
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
68 param = svm.svm_parameter(C=state.C,
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
69 kernel_type=getattr(svm, state.kernel),
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
70 gamma=state.rel_gamma * gamma0)
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
71
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
72 model = svm.svm_model(problem, param) #this is the expensive part
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
73
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
74 state.train_01 = score_01(train_x, train_y, model)
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
75 state.valid_01 = score_01(valid_x, valid_y, model)
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
76 state.test_01 = score_01(test_x, test_y, model)
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
77
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
78 state.n_train = len(train_y)
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
79 state.n_valid = len(valid_y)
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
80 state.n_test = len(test_y)
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
81
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
82 def run_svm_experiment(**kwargs):
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
83 """Python-friendly interface to dbdict_run_svm_experiment
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
84
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
85 Parameters are used to construct a `State` instance, which is returned after running
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
86 `dbdict_run_svm_experiment` on it.
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
87
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
88 .. code-block:: python
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
89 results = run_svm_experiment(dataset='MNIST_1k', C=100.0, rel_gamma=0.01)
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
90 print results.n_train
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
91 # 1000
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
92 print results.valid_01, results.test_01
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
93 # 0.14, 0.10 #.. or something...
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
94
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
95 """
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
96 state = State(**kwargs)
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
97 state_run_svm_experiment(state)
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
98 return state
|
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
99
|