Mercurial > pylearn
annotate sandbox/sparse_random_autoassociator/model.py @ 534:eaa5ad4089a1
Another bugfix in pylearn.embeddings.length()
author | Joseph Turian <turian@gmail.com> |
---|---|
date | Tue, 18 Nov 2008 03:49:37 -0500 |
parents | 36baeb7125a4 |
children |
rev | line source |
---|---|
370
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
1 """ |
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
2 The model for an autoassociator for sparse inputs, using Ronan Collobert + Jason |
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
3 Weston's sampling trick (2008). |
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
4 """ |
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
5 |
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
6 from graph import trainfn |
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
7 import parameters |
372
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
8 |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
9 import globals |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
10 from globals import LR |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
11 |
370
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
12 import numpy |
372
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
13 import random |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
14 random.seed(globals.SEED) |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
15 |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
16 def _select_indices(instance): |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
17 """ |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
18 Choose nonzero and zero indices (feature columns) of the instance. |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
19 We select B{all} nonzero indices. |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
20 We select L{globals.ZERO_SAMPLE_SIZE} zero indices randomly, |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
21 without replacement. |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
22 @bug: If there are not ZERO_SAMPLE_SIZE zeroes, we will enter |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
23 an endless loop. |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
24 @return: (nonzero_indices, zero_indices) |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
25 """ |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
26 # Get the nonzero indices |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
27 nonzero_indices = instance.keys() |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
28 nonzero_indices.sort() |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
29 |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
30 # Get the zero indices |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
31 # @bug: If there are not ZERO_SAMPLE_SIZE zeroes, we will enter an endless loop. |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
32 zero_indices = [] |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
33 while len(zero_indices) < globals.ZERO_SAMPLE_SIZE: |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
34 idx = random.randint(0, globals.INPUT_DIMENSION - 1) |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
35 if idx in nonzero_indices or idx in zero_indices: continue |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
36 zero_indices.append(idx) |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
37 zero_indices.sort() |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
38 |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
39 return (nonzero_indices, zero_indices) |
370
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
40 |
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
41 class Model: |
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
42 def __init__(self): |
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
43 self.parameters = parameters.Parameters(randomly_initialize=True) |
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
44 |
372
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
45 def update(self, instance): |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
46 """ |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
47 Update the L{Model} using one training instance. |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
48 @param instance: A dict from feature index to (non-zero) value. |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
49 @todo: Should assert that nonzero_indices and zero_indices |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
50 are correct (i.e. are truly nonzero/zero). |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
51 """ |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
52 (nonzero_indices, zero_indices) = _select_indices(instance) |
373
42cc94cf6c12
No update if there aren't any non-zeros
Joseph Turian <turian@gmail.com>
parents:
372
diff
changeset
|
53 # No update if there aren't any non-zeros. |
42cc94cf6c12
No update if there aren't any non-zeros
Joseph Turian <turian@gmail.com>
parents:
372
diff
changeset
|
54 if len(nonzero_indices) == 0: return |
372
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
55 xnonzero = numpy.asarray([instance[idx] for idx in nonzero_indices]) |
370
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
56 print |
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
57 print "xnonzero:", xnonzero |
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
58 |
372
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
59 (ynonzero, yzero, loss, gw1nonzero, gb1, gw2nonzero, gw2zero, gb2nonzero, gb2zero) = trainfn(xnonzero, self.parameters.w1[nonzero_indices, :], self.parameters.b1, self.parameters.w2[:, nonzero_indices], self.parameters.w2[:, zero_indices], self.parameters.b2[nonzero_indices], self.parameters.b2[zero_indices]) |
370
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
60 print "OLD ynonzero:", ynonzero |
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
61 print "OLD yzero:", yzero |
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
62 print "OLD total loss:", loss |
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
63 |
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
64 # SGD update |
372
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
65 self.parameters.w1[nonzero_indices, :] -= LR * gw1nonzero |
370
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
66 self.parameters.b1 -= LR * gb1 |
372
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
67 self.parameters.w2[:, nonzero_indices] -= LR * gw2nonzero |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
68 self.parameters.w2[:, zero_indices] -= LR * gw2zero |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
69 self.parameters.b2[nonzero_indices] -= LR * gb2nonzero |
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
70 self.parameters.b2[zero_indices] -= LR * gb2zero |
370
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
71 |
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
72 # Recompute the loss, to make sure it's descreasing |
372
75bab24bb2d8
Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents:
370
diff
changeset
|
73 (ynonzero, yzero, loss, gw1nonzero, gb1, gw2nonzero, gw2zero, gb2nonzero, gb2zero) = trainfn(xnonzero, self.parameters.w1[nonzero_indices, :], self.parameters.b1, self.parameters.w2[:, nonzero_indices], self.parameters.w2[:, zero_indices], self.parameters.b2[nonzero_indices], self.parameters.b2[zero_indices]) |
370
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
74 print "NEW ynonzero:", ynonzero |
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
75 print "NEW yzero:", yzero |
a1bbcde6b456
Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff
changeset
|
76 print "NEW total loss:", loss |