Mercurial > pylearn
annotate pylearn/datasets/imagenet.py @ 1492:e7c4d031d333
Fix for Windows paths
author | Olivier Delalleau <delallea@iro> |
---|---|
date | Tue, 16 Aug 2011 15:44:01 -0400 |
parents | 4c24b2023f32 |
children |
rev | line source |
---|---|
695
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
1 import sys, time |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
2 |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
3 from ..io.filetensor import arraylike |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
4 from .config import data_root |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
5 |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
6 _filenames = [ |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
7 'n01627424.ft', #short compared to other ones (50K examples) |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
8 'n01861778.ft', |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
9 'n03405725.ft', |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
10 'n04451818.ft', |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
11 #'n09287968.ft', #short compared to other ones (6K examples) |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
12 'n13134947.ft', |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
13 'n01503061.ft', |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
14 'n01661091.ft', |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
15 'n02512053.ft', |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
16 'n03800933.ft', |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
17 'n04524313.ft', |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
18 'n11669921.ft'] |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
19 |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
20 def _arraylike_list(): |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
21 return [arraylike( |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
22 open(data_root()+'/image_net/filetensor/12_top_classes/'+f), |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
23 rank=1) |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
24 for f in _filenames] |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
25 |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
26 class _loop_range(object): |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
27 def __init__(self, a, start=0, stop=None): |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
28 self.a = a |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
29 self.start = start |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
30 self.stop = len(a) if stop is None else stop |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
31 self.len = self.stop - self.start |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
32 if self.len <= 0: |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
33 raise ValueError('len must be positive') |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
34 |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
35 def __getitem__(self, i): |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
36 return self.a[self.start + i % self.len] |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
37 |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
38 def example_stream_train(test_per_class=10000): |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
39 streams = [_loop_range(a, start=test_per_class) |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
40 for a in _arraylike_list()] |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
41 poslist = [0 for s in streams] |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
42 |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
43 label = 0 |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
44 while True: |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
45 yield streams[label][poslist[label]], label |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
46 poslist[label] += 1 |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
47 label = (label + 1) % len(streams) |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
48 |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
49 def example_stream_test(test_per_class=10000): |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
50 for label, a in enumerate(_arraylike_list()): |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
51 #print 'class %3i'%label, 'size %8i'% len(a) |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
52 for i, a_i in enumerate(a): |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
53 if i == test_per_class: |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
54 break |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
55 yield a_i, label |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
56 if i != test_per_class: |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
57 print >> sys.stderr, "Warning: class too short:", label, i |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
58 |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
59 |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
60 |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
61 def test_example_stream_test(): |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
62 t = time.time() |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
63 ntestperclass=10000 |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
64 for i, e in enumerate(example_stream_test(ntestperclass)): |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
65 #sys.stdout.write('.') |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
66 pass |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
67 print time.time()-t |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
68 assert i+1 == len(_filenames)*ntestperclass |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
69 |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
70 def test_example_stream_train(): |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
71 t=time.time() |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
72 for i, (data, label) in enumerate(example_stream_train(10000)): |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
73 #print i, label, data.shape |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
74 if i == len(_filenames)*50000: |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
75 break |
4c24b2023f32
added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff
changeset
|
76 print 'reading', i, 'examples took', time.time()-t |