Mercurial > pylearn
changeset 826:43e726898cf9
added io/ubyte file for reading MNIST official files
author | James Bergstra <bergstrj@iro.umontreal.ca> |
---|---|
date | Wed, 16 Sep 2009 19:18:29 -0400 |
parents | ee38dfda3700 |
children | 9945cd79fe79 67b92a42f86b |
files | pylearn/io/ubyte.py |
diffstat | 1 files changed, 20 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pylearn/io/ubyte.py Wed Sep 16 19:18:29 2009 -0400 @@ -0,0 +1,20 @@ +"""Provides read_ubyte_matrix +""" +import numpy + +def read_ubyte_matrix(filename, size0, size1, offset, write=False, align=True, as_dtype='float32'): + """Read a matrix in 'ubyte' format. + + This function basically exists in order to read the MNIST files distributed by Yann LeCun. + Offset should be 16 for the image files, and 8 for the label files. + + Returns a matrix of size0 x size1 elements of type `as_dtype`. + """ + f = file(filename, 'rb') + dtype_size = 1 + f.seek(offset, 0) + buf = f.read(size0 * size1 * dtype_size) + arr = numpy.frombuffer(buf, dtype='uint8') + rval = numpy.array(arr.reshape((size0, size1)), dtype=as_dtype, copy=write) + rval.setflags(write=write, align=align) + return rval