annotate pylearn/dataset_ops/majorminer.py @ 998:8ba8b08e0442

added the image_patches dataset used in RanzatoHinton2010 modified mcRBM to use it.
author James Bergstra <bergstrj@iro.umontreal.ca>
date Tue, 24 Aug 2010 16:51:53 -0400
parents bb8ef344d0a9
children
rev   line source
915
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
1 from __future__ import absolute_import
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
2
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
3 import os
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
4 import numpy
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
5
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
6 import theano
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
7 import theano.sparse
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
8 import scipy.sparse
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
9
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
10 from ..datasets.majorminer import Meta
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
11
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
12 _meta = None
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
13
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
14 class MajorMiner(theano.Op):
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
15 """Meta-information of major-miner dataset"""
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
16
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
17 def __init__(self, meta=None):
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
18 global _meta
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
19 # on construction we make sure a *global* configuration is set
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
20 # this is done because self.* might get pickled and we don't want to pickle
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
21 # the whole dataset
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
22 if _meta is None:
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
23 if meta is None: _meta = Meta()
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
24 else: _meta = meta
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
25 else:
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
26 if meta is None: pass # no problem, we use global _meta
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
27 else: raise NotImplementedError('global MajorMiner meta-information already set')
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
28
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
29 def __eq__(self, other):
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
30 return type(self) == type(other)
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
31 def __hash__(self):
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
32 return hash(type(self))
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
33
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
34 def make_node(self, idx):
918
bb8ef344d0a9 majorminer op - make it only work on one track at a time
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 915
diff changeset
35 _idx = theano.tensor.as_tensor_variable(idx, ndim=0)
915
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
36 return theano.Apply(self,
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
37 [_idx],
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
38 [theano.sparse.csr_matrix('MajorMiner.tag_counts'),
918
bb8ef344d0a9 majorminer op - make it only work on one track at a time
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 915
diff changeset
39 theano.generic('MajorMiner.track_path')])
915
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
40 def perform(self, node, (idx,), out_storage):
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
41 global _meta
918
bb8ef344d0a9 majorminer op - make it only work on one track at a time
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 915
diff changeset
42 lil = scipy.sparse.lil_matrix((1, len(_meta.tags)), dtype='int8')
bb8ef344d0a9 majorminer op - make it only work on one track at a time
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 915
diff changeset
43
bb8ef344d0a9 majorminer op - make it only work on one track at a time
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 915
diff changeset
44 for tag_id, count in _meta.track_tags[idx]:
bb8ef344d0a9 majorminer op - make it only work on one track at a time
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 915
diff changeset
45 lil[0,tag_id] = count
915
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
46
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
47 out_storage[0][0] = lil.tocsr()
918
bb8ef344d0a9 majorminer op - make it only work on one track at a time
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 915
diff changeset
48 out_storage[1][0] = _meta.tracks[idx]
915
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
49
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
50 def grad(self, inputs, output):
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
51 return [None for i in inputs]
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
52
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
53
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
54 def test_basic():
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
55 a = theano.tensor.lvector()
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
56 f = theano.function([a], MajorMiner()(a))
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
57 print 'f([0]):', f([0])
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
58 rval_0_1 = f([0,1])
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
59 rval_0_8 = f([0,8])
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
60
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
61 assert rval_0_1[1][0] == rval_0_8[1][0] #compare strings
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
62 assert rval_0_1[1][1] != rval_0_8[1][1] #track 1 != track 8
5cb947647432 adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
63