annotate pylearn/datasets/flickr.py @ 1492:e7c4d031d333

Fix for Windows paths
author Olivier Delalleau <delallea@iro>
date Tue, 16 Aug 2011 15:44:01 -0400
parents 4a7d413c3425
children
rev   line source
600
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
1 """
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
2 Routines to load variations on the Flickr image dataset.
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
3 """
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
4 from __future__ import absolute_import
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
5
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
6 import os
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
7 import numpy
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
8
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
9 from ..io import filetensor
614
f6c74f34cd35 Use usual data_root
Pascal Lamblin <lamblinp@iro.umontreal.ca>
parents: 602
diff changeset
10 from .config import data_root
600
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
11 from .dataset import Dataset
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
12
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
13
602
28f7dc848efc fixed flickr relpath mistake
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 601
diff changeset
14 path_test_10class ='flickr_10classes_test.ft'
28f7dc848efc fixed flickr relpath mistake
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 601
diff changeset
15
28f7dc848efc fixed flickr relpath mistake
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 601
diff changeset
16 path_train_10class = 'flickr_10classes_train.ft'
600
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
17
602
28f7dc848efc fixed flickr relpath mistake
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 601
diff changeset
18 path_valid_10class = 'flickr_10classes_valid.ft'
600
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
19
602
28f7dc848efc fixed flickr relpath mistake
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 601
diff changeset
20 def basic_10class(folder = None):
600
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
21 """Return the basic flickr image classification problem.
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
22 The images are 75x75, and there are 7500 training examples.
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
23 """
614
f6c74f34cd35 Use usual data_root
Pascal Lamblin <lamblinp@iro.umontreal.ca>
parents: 602
diff changeset
24 root = os.path.join(data_root(), 'flickr') if folder is None else folder
602
28f7dc848efc fixed flickr relpath mistake
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 601
diff changeset
25 train = filetensor.read(open(os.path.join(root, path_train_10class)))
28f7dc848efc fixed flickr relpath mistake
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 601
diff changeset
26 valid = filetensor.read(open(os.path.join(root, path_valid_10class)))
28f7dc848efc fixed flickr relpath mistake
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 601
diff changeset
27 test = filetensor.read(open(os.path.join(root, path_test_10class)))
600
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
28
648
4a7d413c3425 adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 614
diff changeset
29 assert train.shape[1] == 75*75 +1
4a7d413c3425 adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 614
diff changeset
30 assert valid.shape[1] == 75*75 +1
4a7d413c3425 adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 614
diff changeset
31 assert test.shape[1] == 75*75 +1
4a7d413c3425 adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 614
diff changeset
32
600
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
33 rval = Dataset()
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
34
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
35 rval.train = Dataset.Obj(
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
36 x=train[:, 0:-1],
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
37 y=numpy.asarray(train[:, -1], dtype='int64'))
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
38 rval.valid = Dataset.Obj(
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
39 x=valid[:, 0:-1],
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
40 y=numpy.asarray(valid[:, -1], dtype='int64'))
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
41 rval.test = Dataset.Obj(
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
42 x=test[:, 0:-1],
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
43 y=numpy.asarray(test[:, -1], dtype='int64'))
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
44
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
45 rval.n_classes = 10
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
46 rval.img_shape = (75,75)
602
28f7dc848efc fixed flickr relpath mistake
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 601
diff changeset
47
600
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
48 return rval
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
49
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
50 def translations_10class():
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
51 raise NotImplementedError('TODO')
e56303df3c77 initial flickr
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
diff changeset
52
648
4a7d413c3425 adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 614
diff changeset
53
4a7d413c3425 adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 614
diff changeset
54 def render_a_few_images(n=10, prefix='flickr_img', suffix='png'):
4a7d413c3425 adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 614
diff changeset
55 #TODO: document this and move it to a more common
4a7d413c3425 adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 614
diff changeset
56 # place where other datasets can use it
4a7d413c3425 adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 614
diff changeset
57 from PIL import Image
4a7d413c3425 adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 614
diff changeset
58 root = os.path.join(data_root(), 'flickr')
4a7d413c3425 adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 614
diff changeset
59 valid = filetensor.read(open(os.path.join(root, path_valid_10class)))
4a7d413c3425 adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 614
diff changeset
60 assert valid.shape == (1000,75*75+1)
4a7d413c3425 adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 614
diff changeset
61 for i in xrange(n):
4a7d413c3425 adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 614
diff changeset
62 pixelarray = valid[i,0:-1].reshape((75,75)).T
4a7d413c3425 adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 614
diff changeset
63 assert numpy.all(pixelarray >= 0)
4a7d413c3425 adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 614
diff changeset
64 assert numpy.all(pixelarray <= 1)
4a7d413c3425 adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 614
diff changeset
65
4a7d413c3425 adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 614
diff changeset
66 pixel_uint8 = numpy.asarray( pixelarray * 255.0, dtype='uint8')
4a7d413c3425 adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 614
diff changeset
67 im = Image.frombuffer('L', pixel_uint8.shape, pixel_uint8.data, 'raw', 'L', 0, 1)
4a7d413c3425 adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 614
diff changeset
68 im.save(prefix + str(i) + '.' + suffix)
4a7d413c3425 adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 614
diff changeset
69
4a7d413c3425 adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 614
diff changeset
70
4a7d413c3425 adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents: 614
diff changeset
71