changeset 826:43e726898cf9

added io/ubyte file for reading MNIST official files
author James Bergstra <bergstrj@iro.umontreal.ca>
date Wed, 16 Sep 2009 19:18:29 -0400
parents ee38dfda3700
children 9945cd79fe79 67b92a42f86b
files pylearn/io/ubyte.py
diffstat 1 files changed, 20 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pylearn/io/ubyte.py	Wed Sep 16 19:18:29 2009 -0400
@@ -0,0 +1,20 @@
+"""Provides read_ubyte_matrix
+"""
+import numpy
+
+def read_ubyte_matrix(filename, size0, size1, offset, write=False, align=True, as_dtype='float32'):
+    """Read a matrix in 'ubyte' format.
+
+    This function basically exists in order to read the MNIST files distributed by Yann LeCun.
+    Offset should be 16 for the image files, and 8 for the label files.
+
+    Returns a matrix of size0 x size1 elements of type `as_dtype`.
+    """
+    f = file(filename, 'rb')
+    dtype_size = 1
+    f.seek(offset, 0)
+    buf = f.read(size0 * size1 * dtype_size)
+    arr = numpy.frombuffer(buf, dtype='uint8')
+    rval = numpy.array(arr.reshape((size0, size1)), dtype=as_dtype, copy=write) 
+    rval.setflags(write=write, align=align)
+    return rval