changeset 1401:e06c0ff46d2a

allow pylearn.io.filetensor to accept gzip file handle.
author Frederic Bastien <nouiz@nouiz.org>
date Thu, 20 Jan 2011 11:28:08 -0500
parents 08a00dea117d
children b14f3d6f5cd4
files pylearn/io/filetensor.py
diffstat 1 files changed, 20 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/pylearn/io/filetensor.py	Thu Jan 13 17:53:38 2011 -0500
+++ b/pylearn/io/filetensor.py	Thu Jan 20 11:28:08 2011 -0500
@@ -20,7 +20,8 @@
 @todo: add complex type support
 
 """
-import sys
+import gzip
+
 import numpy
 
 def _prod(lst):
@@ -52,10 +53,19 @@
     s_array = numpy.fromstring(s, dtype='int32')
     return s_array.item()
 
-def _read_header(f, debug=False, fromgzip=False):
+def _read_header(f, debug=False, fromgzip=None):
     """
+    :param f: an open file handle. 
+    :type f: a file or gzip.GzipFile object
+
+    :param fromgzip: bool or None
+    :type fromgzip: if None determine the type of file handle.
+
     :returns: data type, element size, rank, shape, size
     """
+    if fromgzip is None:
+        fromgzip = isinstance(f, gzip.GzipFile)
+
     #what is the data type of this matrix?
     #magic_s = f.read(4)
     #magic = numpy.fromstring(magic_s, dtype='int32')
@@ -173,7 +183,7 @@
     """Load all or part of file 'f' into a numpy ndarray
 
     @param f: file from which to read
-    @type f: file-like object
+    @type f: file-like object. Can be a gzip open file.
 
     If subtensor is not None, it should be like the argument to
     numpy.ndarray.__getitem__.  The following two expressions should return
@@ -190,7 +200,13 @@
     f_start = f.tell()
 
     rval = None
-    if subtensor is None:
+    if isinstance(f, gzip.GzipFile):
+        assert subtensor is None, "Not implemented the subtensor case for gzip file"
+        d = f.read()
+        rval = numpy.fromstring(d, dtype=magic_t, 
+                                count=_prod(dim)).reshape(dim)
+        del d
+    elif subtensor is None:
         rval = numpy.fromfile(f, dtype=magic_t, count=_prod(dim)).reshape(dim)
     elif isinstance(subtensor, slice):
         if subtensor.step not in (None, 1):