Mercurial > pylearn
view pylearn/datasets/tinyimages.py @ 1380:785aeb7a4df2
added a fn to datasets/tiny_images to output a mosaic of images from the dataset
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Fri, 03 Dec 2010 09:09:00 -0500 |
parents | 976539956475 |
children | 234e5e48d60d |
line wrap: on
line source
"""Code for loading the tinyimages dataset.""" __authors__ = "James Bergstra" __copyright__ = "(c) 2010, Universite de Montreal" __license__ = "3-clause BSD License" __contact__ = "bergstrj@iro.umontreal.ca" import logging, os, sys import PIL.Image import numpy import pylearn.io.image_tiling logger = logging.getLogger('pylearn.datasets.tinyimages') def sorted_listdir(*path): r = os.listdir(os.path.join(*path)) r.sort() return r _original='/data/lisa/data/tinyimages/tinyimages/original' def iterate_over_filenames(path=_original): """ Generate (root, letter, label, filename) tuples for each image file in the dataset. """ for letter in sorted_listdir(path): for label in sorted_listdir(path, letter): for img in sorted_listdir(path, letter, label): yield path, letter, label, img def load_image(path): """Return the image at `path` as a numpy ndarray """ rval = numpy.asarray(PIL.Image.open(path)) return rval def image_generator(path=_original): """ Generate numpy ndarrays of size (32,32,3) and dtype 'uint8' for each image in the dataset. Be careful with this generator because the dataset in total is close to 20GB! """ n_colour_conversions = 0 n_yielded = 0 for p in iterate_over_filenames(path=_original): y = load_image(os.path.join(*p)) n_yielded += 1 if y.shape == (32,32): logger.info("put %i'th/%i images in colour"%(n_colour_conversions, n_yielded)) y = numpy.asarray([y,y,y]).transpose((1,2,0)).copy() n_colour_conversions += 1 assert y.shape == (32,32,3), (p,y.shape) assert y.dtype == numpy.uint8, (p,y.dtype) yield y def load_first_N(N): i = 0 it = iterate_over_filenames() while i < N: yield it.next() i +=1 def arrange_first_N_into_tiling(R,C, filename): R=int(R) C=int(C) A = numpy.asarray([i.copy() for i,ii in zip(image_generator(), xrange(R*C))], dtype='float32') print A.shape A.shape = (R*C, 32*32,3) pylearn.io.image_tiling.save_tiled_raster_images( pylearn.io.image_tiling.tile_raster_images( (A[:,:,0], A[:,:,1], A[:,:,2], None), (32,32)), filename) n_images = 1608356 def main(argv=[]): if argv: arrange_first_N_into_tiling( argv[0], argv[1], argv[2]) else: def iter_len(x): i = 0 for xx in x: i += 1 return i n_files = iter_len(iterate_over_filenames()) print 'got %i files' % n_files assert n_images == n_files for p in load_first_N(10): load_image(os.path.join(*p)) if __name__ == '__main__': sys.exit(main(sys.argv[1:]))