annotate pylearn/datasets/imagenet.py @ 1492:e7c4d031d333

Fix for Windows paths
author Olivier Delalleau <delallea@iro>
date Tue, 16 Aug 2011 15:44:01 -0400
parents 4c24b2023f32
children
rev   line source
695
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
1 import sys, time
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
2
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
3 from ..io.filetensor import arraylike
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
4 from .config import data_root
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
5
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
6 _filenames = [
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
7 'n01627424.ft', #short compared to other ones (50K examples)
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
8 'n01861778.ft',
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
9 'n03405725.ft',
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
10 'n04451818.ft',
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
11 #'n09287968.ft', #short compared to other ones (6K examples)
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
12 'n13134947.ft',
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
13 'n01503061.ft',
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
14 'n01661091.ft',
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
15 'n02512053.ft',
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
16 'n03800933.ft',
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
17 'n04524313.ft',
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
18 'n11669921.ft']
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
19
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
20 def _arraylike_list():
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
21 return [arraylike(
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
22 open(data_root()+'/image_net/filetensor/12_top_classes/'+f),
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
23 rank=1)
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
24 for f in _filenames]
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
25
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
26 class _loop_range(object):
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
27 def __init__(self, a, start=0, stop=None):
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
28 self.a = a
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
29 self.start = start
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
30 self.stop = len(a) if stop is None else stop
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
31 self.len = self.stop - self.start
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
32 if self.len <= 0:
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
33 raise ValueError('len must be positive')
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
34
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
35 def __getitem__(self, i):
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
36 return self.a[self.start + i % self.len]
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
37
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
38 def example_stream_train(test_per_class=10000):
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
39 streams = [_loop_range(a, start=test_per_class)
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
40 for a in _arraylike_list()]
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
41 poslist = [0 for s in streams]
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
42
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
43 label = 0
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
44 while True:
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
45 yield streams[label][poslist[label]], label
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
46 poslist[label] += 1
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
47 label = (label + 1) % len(streams)
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
48
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
49 def example_stream_test(test_per_class=10000):
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
50 for label, a in enumerate(_arraylike_list()):
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
51 #print 'class %3i'%label, 'size %8i'% len(a)
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
52 for i, a_i in enumerate(a):
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
53 if i == test_per_class:
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
54 break
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
55 yield a_i, label
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
56 if i != test_per_class:
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
57 print >> sys.stderr, "Warning: class too short:", label, i
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
58
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
59
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
60
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
61 def test_example_stream_test():
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
62 t = time.time()
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
63 ntestperclass=10000
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
64 for i, e in enumerate(example_stream_test(ntestperclass)):
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
65 #sys.stdout.write('.')
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
66 pass
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
67 print time.time()-t
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
68 assert i+1 == len(_filenames)*ntestperclass
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
69
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
70 def test_example_stream_train():
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
71 t=time.time()
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
72 for i, (data, label) in enumerate(example_stream_train(10000)):
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
73 #print i, label, data.shape
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
74 if i == len(_filenames)*50000:
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
75 break
4c24b2023f32 added datasets/imagenet
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
76 print 'reading', i, 'examples took', time.time()-t