Mercurial > pylearn
annotate pylearn/dataset_ops/majorminer.py @ 998:8ba8b08e0442
added the image_patches dataset used in RanzatoHinton2010
modified mcRBM to use it.
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Tue, 24 Aug 2010 16:51:53 -0400 |
parents | bb8ef344d0a9 |
children |
rev | line source |
---|---|
915
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
1 from __future__ import absolute_import |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
2 |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
3 import os |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
4 import numpy |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
5 |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
6 import theano |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
7 import theano.sparse |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
8 import scipy.sparse |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
9 |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
10 from ..datasets.majorminer import Meta |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
11 |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
12 _meta = None |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
13 |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
14 class MajorMiner(theano.Op): |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
15 """Meta-information of major-miner dataset""" |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
16 |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
17 def __init__(self, meta=None): |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
18 global _meta |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
19 # on construction we make sure a *global* configuration is set |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
20 # this is done because self.* might get pickled and we don't want to pickle |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
21 # the whole dataset |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
22 if _meta is None: |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
23 if meta is None: _meta = Meta() |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
24 else: _meta = meta |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
25 else: |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
26 if meta is None: pass # no problem, we use global _meta |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
27 else: raise NotImplementedError('global MajorMiner meta-information already set') |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
28 |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
29 def __eq__(self, other): |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
30 return type(self) == type(other) |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
31 def __hash__(self): |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
32 return hash(type(self)) |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
33 |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
34 def make_node(self, idx): |
918
bb8ef344d0a9
majorminer op - make it only work on one track at a time
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
915
diff
changeset
|
35 _idx = theano.tensor.as_tensor_variable(idx, ndim=0) |
915
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
36 return theano.Apply(self, |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
37 [_idx], |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
38 [theano.sparse.csr_matrix('MajorMiner.tag_counts'), |
918
bb8ef344d0a9
majorminer op - make it only work on one track at a time
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
915
diff
changeset
|
39 theano.generic('MajorMiner.track_path')]) |
915
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
40 def perform(self, node, (idx,), out_storage): |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
41 global _meta |
918
bb8ef344d0a9
majorminer op - make it only work on one track at a time
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
915
diff
changeset
|
42 lil = scipy.sparse.lil_matrix((1, len(_meta.tags)), dtype='int8') |
bb8ef344d0a9
majorminer op - make it only work on one track at a time
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
915
diff
changeset
|
43 |
bb8ef344d0a9
majorminer op - make it only work on one track at a time
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
915
diff
changeset
|
44 for tag_id, count in _meta.track_tags[idx]: |
bb8ef344d0a9
majorminer op - make it only work on one track at a time
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
915
diff
changeset
|
45 lil[0,tag_id] = count |
915
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
46 |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
47 out_storage[0][0] = lil.tocsr() |
918
bb8ef344d0a9
majorminer op - make it only work on one track at a time
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
915
diff
changeset
|
48 out_storage[1][0] = _meta.tracks[idx] |
915
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
49 |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
50 def grad(self, inputs, output): |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
51 return [None for i in inputs] |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
52 |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
53 |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
54 def test_basic(): |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
55 a = theano.tensor.lvector() |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
56 f = theano.function([a], MajorMiner()(a)) |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
57 print 'f([0]):', f([0]) |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
58 rval_0_1 = f([0,1]) |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
59 rval_0_8 = f([0,8]) |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
60 |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
61 assert rval_0_1[1][0] == rval_0_8[1][0] #compare strings |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
62 assert rval_0_1[1][1] != rval_0_8[1][1] #track 1 != track 8 |
5cb947647432
adding majorminer dataset
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
63 |