view pylearn/io/ubyte.py @ 1473:91a475ca9b6d

image_tiling with better scaling for integer inputs
author James Bergstra <bergstrj@iro.umontreal.ca>
date Wed, 18 May 2011 10:52:22 -0400
parents 43e726898cf9
children
line wrap: on
line source

"""Provides read_ubyte_matrix
"""
import numpy

def read_ubyte_matrix(filename, size0, size1, offset, write=False, align=True, as_dtype='float32'):
    """Read a matrix in 'ubyte' format.

    This function basically exists in order to read the MNIST files distributed by Yann LeCun.
    Offset should be 16 for the image files, and 8 for the label files.

    Returns a matrix of size0 x size1 elements of type `as_dtype`.
    """
    f = file(filename, 'rb')
    dtype_size = 1
    f.seek(offset, 0)
    buf = f.read(size0 * size1 * dtype_size)
    arr = numpy.frombuffer(buf, dtype='uint8')
    rval = numpy.array(arr.reshape((size0, size1)), dtype=as_dtype, copy=write) 
    rval.setflags(write=write, align=align)
    return rval