# HG changeset patch # User James Bergstra # Date 1253143109 14400 # Node ID 43e726898cf9f850270b71cd1aed55e9af1ff632 # Parent ee38dfda37001ea3eeeeafd695dff74c3e4cb7cf added io/ubyte file for reading MNIST official files diff -r ee38dfda3700 -r 43e726898cf9 pylearn/io/ubyte.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/io/ubyte.py Wed Sep 16 19:18:29 2009 -0400 @@ -0,0 +1,20 @@ +"""Provides read_ubyte_matrix +""" +import numpy + +def read_ubyte_matrix(filename, size0, size1, offset, write=False, align=True, as_dtype='float32'): + """Read a matrix in 'ubyte' format. + + This function basically exists in order to read the MNIST files distributed by Yann LeCun. + Offset should be 16 for the image files, and 8 for the label files. + + Returns a matrix of size0 x size1 elements of type `as_dtype`. + """ + f = file(filename, 'rb') + dtype_size = 1 + f.seek(offset, 0) + buf = f.read(size0 * size1 * dtype_size) + arr = numpy.frombuffer(buf, dtype='uint8') + rval = numpy.array(arr.reshape((size0, size1)), dtype=as_dtype, copy=write) + rval.setflags(write=write, align=align) + return rval