annotate sandbox/simple_autoassociator/model.py @ 437:2d8490d76b3e

added two methods to make_test_datasets
author Olivier Breuleux <breuleuo@iro.umontreal.ca>
date Wed, 06 Aug 2008 19:39:36 -0400
parents 4f61201fa9a9
children
rev   line source
370
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
1 """
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
2 The model for an autoassociator for sparse inputs, using Ronan Collobert + Jason
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
3 Weston's sampling trick (2008).
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
4 """
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
5
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
6 from graph import trainfn
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
7 import parameters
372
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
8
370
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
9 import numpy
372
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
10 import random
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
11
416
8849eba55520 Can now do minibatch update
Joseph Turian <turian@iro.umontreal.ca>
parents: 411
diff changeset
12 import pylearn.sparse_instance
8849eba55520 Can now do minibatch update
Joseph Turian <turian@iro.umontreal.ca>
parents: 411
diff changeset
13
370
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
14 class Model:
417
4f61201fa9a9 Parameters are no longer global
Joseph Turian <turian@iro.umontreal.ca>
parents: 416
diff changeset
15 """
4f61201fa9a9 Parameters are no longer global
Joseph Turian <turian@iro.umontreal.ca>
parents: 416
diff changeset
16 @todo: Add momentum.
4f61201fa9a9 Parameters are no longer global
Joseph Turian <turian@iro.umontreal.ca>
parents: 416
diff changeset
17 @todo: Add learning rate decay schedule.
4f61201fa9a9 Parameters are no longer global
Joseph Turian <turian@iro.umontreal.ca>
parents: 416
diff changeset
18 """
4f61201fa9a9 Parameters are no longer global
Joseph Turian <turian@iro.umontreal.ca>
parents: 416
diff changeset
19 def __init__(self, input_dimension, hidden_dimension, learning_rate = 0.1, weight_decay = 0.0002, random_seed = 666):
4f61201fa9a9 Parameters are no longer global
Joseph Turian <turian@iro.umontreal.ca>
parents: 416
diff changeset
20 self.input_dimension = input_dimension
4f61201fa9a9 Parameters are no longer global
Joseph Turian <turian@iro.umontreal.ca>
parents: 416
diff changeset
21 self.hidden_dimension = hidden_dimension
4f61201fa9a9 Parameters are no longer global
Joseph Turian <turian@iro.umontreal.ca>
parents: 416
diff changeset
22 self.learning_rate = learning_rate
4f61201fa9a9 Parameters are no longer global
Joseph Turian <turian@iro.umontreal.ca>
parents: 416
diff changeset
23 self.weight_decay = weight_decay
4f61201fa9a9 Parameters are no longer global
Joseph Turian <turian@iro.umontreal.ca>
parents: 416
diff changeset
24 self.random_seed = random_seed
370
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
25
417
4f61201fa9a9 Parameters are no longer global
Joseph Turian <turian@iro.umontreal.ca>
parents: 416
diff changeset
26 random.seed(random_seed)
4f61201fa9a9 Parameters are no longer global
Joseph Turian <turian@iro.umontreal.ca>
parents: 416
diff changeset
27
4f61201fa9a9 Parameters are no longer global
Joseph Turian <turian@iro.umontreal.ca>
parents: 416
diff changeset
28 self.parameters = parameters.Parameters(input_dimension=self.input_dimension, hidden_dimension=self.hidden_dimension, randomly_initialize=True, random_seed=self.random_seed)
4f61201fa9a9 Parameters are no longer global
Joseph Turian <turian@iro.umontreal.ca>
parents: 416
diff changeset
29
4f61201fa9a9 Parameters are no longer global
Joseph Turian <turian@iro.umontreal.ca>
parents: 416
diff changeset
30 def deterministic_reconstruction(self, x):
4f61201fa9a9 Parameters are no longer global
Joseph Turian <turian@iro.umontreal.ca>
parents: 416
diff changeset
31 (y, h, loss, gw1, gb1, gw2, gb2) = trainfn(x, self.parameters.w1, self.parameters.b1, self.parameters.w2, self.parameters.b2)
4f61201fa9a9 Parameters are no longer global
Joseph Turian <turian@iro.umontreal.ca>
parents: 416
diff changeset
32 return y
416
8849eba55520 Can now do minibatch update
Joseph Turian <turian@iro.umontreal.ca>
parents: 411
diff changeset
33
8849eba55520 Can now do minibatch update
Joseph Turian <turian@iro.umontreal.ca>
parents: 411
diff changeset
34 def update(self, instances):
372
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
35 """
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
36 Update the L{Model} using one training instance.
416
8849eba55520 Can now do minibatch update
Joseph Turian <turian@iro.umontreal.ca>
parents: 411
diff changeset
37 @param instances: A list of dict from feature index to (non-zero) value.
372
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
38 @todo: Should assert that nonzero_indices and zero_indices
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
39 are correct (i.e. are truly nonzero/zero).
417
4f61201fa9a9 Parameters are no longer global
Joseph Turian <turian@iro.umontreal.ca>
parents: 416
diff changeset
40 @todo: Multiply L{self.weight_decay} by L{self.learning_rate}, as done in Semantic Hashing?
4f61201fa9a9 Parameters are no longer global
Joseph Turian <turian@iro.umontreal.ca>
parents: 416
diff changeset
41 @todo: Decay the biases too?
372
75bab24bb2d8 Moved more logic into model.py
Joseph Turian <turian@gmail.com>
parents: 370
diff changeset
42 """
416
8849eba55520 Can now do minibatch update
Joseph Turian <turian@iro.umontreal.ca>
parents: 411
diff changeset
43 minibatch = len(instances)
417
4f61201fa9a9 Parameters are no longer global
Joseph Turian <turian@iro.umontreal.ca>
parents: 416
diff changeset
44 x = pylearn.sparse_instance.to_vector(instances, self.input_dimension)
386
a474341861fa Added a simple AA
Joseph Turian <turian@gmail.com>
parents: 373
diff changeset
45
411
faffaae0d2f9 Autoassociator now seems to work
Joseph Turian <turian@iro.umontreal.ca>
parents: 404
diff changeset
46 (y, h, loss, gw1, gb1, gw2, gb2) = trainfn(x, self.parameters.w1, self.parameters.b1, self.parameters.w2, self.parameters.b2)
faffaae0d2f9 Autoassociator now seems to work
Joseph Turian <turian@iro.umontreal.ca>
parents: 404
diff changeset
47 # print
faffaae0d2f9 Autoassociator now seems to work
Joseph Turian <turian@iro.umontreal.ca>
parents: 404
diff changeset
48 # print "instance:", instance
faffaae0d2f9 Autoassociator now seems to work
Joseph Turian <turian@iro.umontreal.ca>
parents: 404
diff changeset
49 # print "x:", x
faffaae0d2f9 Autoassociator now seems to work
Joseph Turian <turian@iro.umontreal.ca>
parents: 404
diff changeset
50 # print "OLD y:", y
370
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
51 print "OLD total loss:", loss
411
faffaae0d2f9 Autoassociator now seems to work
Joseph Turian <turian@iro.umontreal.ca>
parents: 404
diff changeset
52 # print "gw1:", gw1
faffaae0d2f9 Autoassociator now seems to work
Joseph Turian <turian@iro.umontreal.ca>
parents: 404
diff changeset
53 # print "gb1:", gb1
faffaae0d2f9 Autoassociator now seems to work
Joseph Turian <turian@iro.umontreal.ca>
parents: 404
diff changeset
54 # print "gw2:", gw2
faffaae0d2f9 Autoassociator now seems to work
Joseph Turian <turian@iro.umontreal.ca>
parents: 404
diff changeset
55 # print "gb2:", gb2
370
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
56
417
4f61201fa9a9 Parameters are no longer global
Joseph Turian <turian@iro.umontreal.ca>
parents: 416
diff changeset
57 self.parameters.w1 *= (1 - self.weight_decay)
4f61201fa9a9 Parameters are no longer global
Joseph Turian <turian@iro.umontreal.ca>
parents: 416
diff changeset
58 self.parameters.w2 *= (1 - self.weight_decay)
370
a1bbcde6b456 Moved sparse_random_autoassociator from my repository
Joseph Turian <turian@gmail.com>
parents:
diff changeset
59
417
4f61201fa9a9 Parameters are no longer global
Joseph Turian <turian@iro.umontreal.ca>
parents: 416
diff changeset
60 # SGD update
4f61201fa9a9 Parameters are no longer global
Joseph Turian <turian@iro.umontreal.ca>
parents: 416
diff changeset
61 self.parameters.w1 -= self.learning_rate * gw1 / minibatch
4f61201fa9a9 Parameters are no longer global
Joseph Turian <turian@iro.umontreal.ca>
parents: 416
diff changeset
62 self.parameters.b1 -= self.learning_rate * gb1 / minibatch
4f61201fa9a9 Parameters are no longer global
Joseph Turian <turian@iro.umontreal.ca>
parents: 416
diff changeset
63 self.parameters.w2 -= self.learning_rate * gw2 / minibatch
4f61201fa9a9 Parameters are no longer global
Joseph Turian <turian@iro.umontreal.ca>
parents: 416
diff changeset
64 self.parameters.b2 -= self.learning_rate * gb2 / minibatch
4f61201fa9a9 Parameters are no longer global
Joseph Turian <turian@iro.umontreal.ca>
parents: 416
diff changeset
65
4f61201fa9a9 Parameters are no longer global
Joseph Turian <turian@iro.umontreal.ca>
parents: 416
diff changeset
66 # # Recompute the loss, to make sure it's descreasing
4f61201fa9a9 Parameters are no longer global
Joseph Turian <turian@iro.umontreal.ca>
parents: 416
diff changeset
67 # (y, h, loss, gw1, gb1, gw2, gb2) = trainfn(x, self.parameters.w1, self.parameters.b1, self.parameters.w2, self.parameters.b2)
4f61201fa9a9 Parameters are no longer global
Joseph Turian <turian@iro.umontreal.ca>
parents: 416
diff changeset
68 ## print "NEW y:", y
4f61201fa9a9 Parameters are no longer global
Joseph Turian <turian@iro.umontreal.ca>
parents: 416
diff changeset
69 # print "NEW total loss:", loss
4f61201fa9a9 Parameters are no longer global
Joseph Turian <turian@iro.umontreal.ca>
parents: 416
diff changeset
70 ## print "h:", h
4f61201fa9a9 Parameters are no longer global
Joseph Turian <turian@iro.umontreal.ca>
parents: 416
diff changeset
71 ## print self.parameters