Mercurial > pylearn
annotate pylearn/datasets/flickr.py @ 1476:8c10bda4bb5f
Configured default train/valid/test split for icml07.MNIST_rotated_background
dataset. Defaults are the ones used by Hugo in the ICML07 paper and in all
contracting auto-encoder papers.
author | gdesjardins |
---|---|
date | Fri, 20 May 2011 16:53:00 -0400 |
parents | 4a7d413c3425 |
children |
rev | line source |
---|---|
600 | 1 """ |
2 Routines to load variations on the Flickr image dataset. | |
3 """ | |
4 from __future__ import absolute_import | |
5 | |
6 import os | |
7 import numpy | |
8 | |
9 from ..io import filetensor | |
614
f6c74f34cd35
Use usual data_root
Pascal Lamblin <lamblinp@iro.umontreal.ca>
parents:
602
diff
changeset
|
10 from .config import data_root |
600 | 11 from .dataset import Dataset |
12 | |
13 | |
602
28f7dc848efc
fixed flickr relpath mistake
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
601
diff
changeset
|
14 path_test_10class ='flickr_10classes_test.ft' |
28f7dc848efc
fixed flickr relpath mistake
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
601
diff
changeset
|
15 |
28f7dc848efc
fixed flickr relpath mistake
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
601
diff
changeset
|
16 path_train_10class = 'flickr_10classes_train.ft' |
600 | 17 |
602
28f7dc848efc
fixed flickr relpath mistake
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
601
diff
changeset
|
18 path_valid_10class = 'flickr_10classes_valid.ft' |
600 | 19 |
602
28f7dc848efc
fixed flickr relpath mistake
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
601
diff
changeset
|
20 def basic_10class(folder = None): |
600 | 21 """Return the basic flickr image classification problem. |
22 The images are 75x75, and there are 7500 training examples. | |
23 """ | |
614
f6c74f34cd35
Use usual data_root
Pascal Lamblin <lamblinp@iro.umontreal.ca>
parents:
602
diff
changeset
|
24 root = os.path.join(data_root(), 'flickr') if folder is None else folder |
602
28f7dc848efc
fixed flickr relpath mistake
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
601
diff
changeset
|
25 train = filetensor.read(open(os.path.join(root, path_train_10class))) |
28f7dc848efc
fixed flickr relpath mistake
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
601
diff
changeset
|
26 valid = filetensor.read(open(os.path.join(root, path_valid_10class))) |
28f7dc848efc
fixed flickr relpath mistake
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
601
diff
changeset
|
27 test = filetensor.read(open(os.path.join(root, path_test_10class))) |
600 | 28 |
648
4a7d413c3425
adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
614
diff
changeset
|
29 assert train.shape[1] == 75*75 +1 |
4a7d413c3425
adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
614
diff
changeset
|
30 assert valid.shape[1] == 75*75 +1 |
4a7d413c3425
adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
614
diff
changeset
|
31 assert test.shape[1] == 75*75 +1 |
4a7d413c3425
adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
614
diff
changeset
|
32 |
600 | 33 rval = Dataset() |
34 | |
35 rval.train = Dataset.Obj( | |
36 x=train[:, 0:-1], | |
37 y=numpy.asarray(train[:, -1], dtype='int64')) | |
38 rval.valid = Dataset.Obj( | |
39 x=valid[:, 0:-1], | |
40 y=numpy.asarray(valid[:, -1], dtype='int64')) | |
41 rval.test = Dataset.Obj( | |
42 x=test[:, 0:-1], | |
43 y=numpy.asarray(test[:, -1], dtype='int64')) | |
44 | |
45 rval.n_classes = 10 | |
46 rval.img_shape = (75,75) | |
602
28f7dc848efc
fixed flickr relpath mistake
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
601
diff
changeset
|
47 |
600 | 48 return rval |
49 | |
50 def translations_10class(): | |
51 raise NotImplementedError('TODO') | |
52 | |
648
4a7d413c3425
adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
614
diff
changeset
|
53 |
4a7d413c3425
adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
614
diff
changeset
|
54 def render_a_few_images(n=10, prefix='flickr_img', suffix='png'): |
4a7d413c3425
adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
614
diff
changeset
|
55 #TODO: document this and move it to a more common |
4a7d413c3425
adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
614
diff
changeset
|
56 # place where other datasets can use it |
4a7d413c3425
adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
614
diff
changeset
|
57 from PIL import Image |
4a7d413c3425
adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
614
diff
changeset
|
58 root = os.path.join(data_root(), 'flickr') |
4a7d413c3425
adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
614
diff
changeset
|
59 valid = filetensor.read(open(os.path.join(root, path_valid_10class))) |
4a7d413c3425
adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
614
diff
changeset
|
60 assert valid.shape == (1000,75*75+1) |
4a7d413c3425
adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
614
diff
changeset
|
61 for i in xrange(n): |
4a7d413c3425
adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
614
diff
changeset
|
62 pixelarray = valid[i,0:-1].reshape((75,75)).T |
4a7d413c3425
adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
614
diff
changeset
|
63 assert numpy.all(pixelarray >= 0) |
4a7d413c3425
adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
614
diff
changeset
|
64 assert numpy.all(pixelarray <= 1) |
4a7d413c3425
adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
614
diff
changeset
|
65 |
4a7d413c3425
adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
614
diff
changeset
|
66 pixel_uint8 = numpy.asarray( pixelarray * 255.0, dtype='uint8') |
4a7d413c3425
adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
614
diff
changeset
|
67 im = Image.frombuffer('L', pixel_uint8.shape, pixel_uint8.data, 'raw', 'L', 0, 1) |
4a7d413c3425
adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
614
diff
changeset
|
68 im.save(prefix + str(i) + '.' + suffix) |
4a7d413c3425
adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
614
diff
changeset
|
69 |
4a7d413c3425
adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
614
diff
changeset
|
70 |
4a7d413c3425
adding a little hacky script to flickr that renders a few images as png
James Bergstra <bergstrj@iro.umontreal.ca>
parents:
614
diff
changeset
|
71 |