annotate sandbox/sparse_random_autoassociator/model.py @ 672:27b1344a57b1

Added preprocessing back in
author Joseph Turian <turian@gmail.com>
date Thu, 20 Nov 2008 06:38:06 -0500
parents 36baeb7125a4
children
rev   line source
370
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
1 """
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
2 The model for an autoassociator for sparse inputs, using Ronan Collobert + Jason
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
3 Weston's sampling trick (2008).
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
4 """
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
5
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
6 from graph import trainfn
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
7 import parameters
372
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
8
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
9 import globals
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
10 from globals import LR
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
11
370
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
12 import numpy
372
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
13 import random
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
14 random.seed(globals.SEED)
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
15
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
16 def _select_indices(instance):
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
17 """
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
18 Choose nonzero and zero indices (feature columns) of the instance.
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
19 We select B{all} nonzero indices.
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
20 We select L{globals.ZERO_SAMPLE_SIZE} zero indices randomly,
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
21 without replacement.
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
22 @bug: If there are not ZERO_SAMPLE_SIZE zeroes, we will enter
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
23 an endless loop.
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
24 @return: (nonzero_indices, zero_indices)
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
25 """
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
26 # Get the nonzero indices
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
27 nonzero_indices = instance.keys()
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
28 nonzero_indices.sort()
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
29
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
30 # Get the zero indices
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
31 # @bug: If there are not ZERO_SAMPLE_SIZE zeroes, we will enter an endless loop.
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
32 zero_indices = []
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
33 while len(zero_indices) < globals.ZERO_SAMPLE_SIZE:
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
34 idx = random.randint(0, globals.INPUT_DIMENSION - 1)
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
35 if idx in nonzero_indices or idx in zero_indices: continue
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
36 zero_indices.append(idx)
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
37 zero_indices.sort()
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
38
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
39 return (nonzero_indices, zero_indices)
370
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
40
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
41 class Model:
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
42 def __init__(self):
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
43 self.parameters = parameters.Parameters(randomly_initialize=True)
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
44
372
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
45 def update(self, instance):
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
46 """
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
47 Update the L{Model} using one training instance.
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
48 @param instance: A dict from feature index to (non-zero) value.
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
49 @todo: Should assert that nonzero_indices and zero_indices
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
50 are correct (i.e. are truly nonzero/zero).
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
51 """
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
52 (nonzero_indices, zero_indices) = _select_indices(instance)
373
42cc94cf6c12 No update if there aren't any non-zeros
Joseph Turian <turian@gmail.com>
parents: 372
diff changeset
53 # No update if there aren't any non-zeros.
42cc94cf6c12 No update if there aren't any non-zeros
Joseph Turian <turian@gmail.com>
parents: 372
diff changeset
54 if len(nonzero_indices) == 0: return
372
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
55 xnonzero = numpy.asarray([instance[idx] for idx in nonzero_indices])
370
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
56 print
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
57 print "xnonzero:", xnonzero
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
58
372
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
59 (ynonzero, yzero, loss, gw1nonzero, gb1, gw2nonzero, gw2zero, gb2nonzero, gb2zero) = trainfn(xnonzero, self.parameters.w1[nonzero_indices, :], self.parameters.b1, self.parameters.w2[:, nonzero_indices], self.parameters.w2[:, zero_indices], self.parameters.b2[nonzero_indices], self.parameters.b2[zero_indices])
370
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
60 print "OLD ynonzero:", ynonzero
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
61 print "OLD yzero:", yzero
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
62 print "OLD total loss:", loss
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
63
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
64 # SGD update
372
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
65 self.parameters.w1[nonzero_indices, :] -= LR * gw1nonzero
370
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
66 self.parameters.b1 -= LR * gb1
372
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
67 self.parameters.w2[:, nonzero_indices] -= LR * gw2nonzero
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
68 self.parameters.w2[:, zero_indices] -= LR * gw2zero
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
69 self.parameters.b2[nonzero_indices] -= LR * gb2nonzero
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
70 self.parameters.b2[zero_indices] -= LR * gb2zero
370
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
71
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
72 # Recompute the loss, to make sure it's descreasing
372
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
73 (ynonzero, yzero, loss, gw1nonzero, gb1, gw2nonzero, gw2zero, gb2nonzero, gb2zero) = trainfn(xnonzero, self.parameters.w1[nonzero_indices, :], self.parameters.b1, self.parameters.w2[:, nonzero_indices], self.parameters.w2[:, zero_indices], self.parameters.b2[nonzero_indices], self.parameters.b2[zero_indices])
370
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
74 print "NEW ynonzero:", ynonzero
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
75 print "NEW yzero:", yzero
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
76 print "NEW total loss:", loss