# HG changeset patch # User Frederic Bastien # Date 1294418663 18000 # Node ID 337253b82409fb0bbcdf48a058b53c29014b7868 # Parent 212b142dcfc8c736639bee014239aa3d1ae7968b repair the class/fct that allow to read pnist07 and others by allowing them to read gziped file. diff -r 212b142dcfc8 -r 337253b82409 datasets/ftfile.py --- a/datasets/ftfile.py Thu Jan 06 14:23:41 2011 -0500 +++ b/datasets/ftfile.py Fri Jan 07 11:44:23 2011 -0500 @@ -1,8 +1,12 @@ +from itertools import izip +import os + +import numpy from pylearn.io.filetensor import _read_header, _prod -import numpy, theano + from dataset import DataSet from dsetiter import DataIterator -from itertools import izip, imap + class FTFile(object): def __init__(self, fname, scale=1, dtype=None): @@ -10,8 +14,16 @@ Tests: >>> f = FTFile('/data/lisa/data/nist/by_class/digits/digits_test_labels.ft') """ - self.file = open(fname, 'rb') - self.magic_t, self.elsize, _, self.dim, _ = _read_header(self.file, False) + if os.path.exists(fname): + self.file = open(fname, 'rb') + self.magic_t, self.elsize, _, self.dim, _ = _read_header(self.file, False) + self.gz=False + else: + import gzip + self.file = gzip.open(fname+'.gz','rb') + self.magic_t, self.elsize, _, self.dim, _ = _read_header(self.file, False, True) + self.gz=True + self.size = self.dim[0] self.scale = scale self.dtype = dtype @@ -81,7 +93,11 @@ num = self.size self.dim[0] = num self.size -= num - res = numpy.fromfile(self.file, dtype=self.magic_t, count=_prod(self.dim)).reshape(self.dim) + if self.gz: + d = self.file.read(_prod(self.dim)*self.elsize) + res = numpy.fromstring(d, dtype=self.magic_t, count=_prod(self.dim)).reshape(self.dim) + else: + res = numpy.fromfile(self.file, dtype=self.magic_t, count=_prod(self.dim)).reshape(self.dim) if self.dtype is not None: res = res.astype(self.dtype) if self.scale != 1: