# HG changeset patch # User Frederic Bastien # Date 1294340225 18000 # Node ID 5e481b2241173ffa9c443d3bb388bfb00cf0bcf1 # Parent 21d53fd07f6e057963087f1ef7c2dc7f4e8add0f fix the reading of PNIST dataset following Dumi compression of the data. diff -r 21d53fd07f6e -r 5e481b224117 datasets/ftfile.py --- a/datasets/ftfile.py Mon Dec 20 11:54:35 2010 -0500 +++ b/datasets/ftfile.py Thu Jan 06 13:57:05 2011 -0500 @@ -1,8 +1,12 @@ +from itertools import izip +import os + +import numpy from pylearn.io.filetensor import _read_header, _prod -import numpy, theano + from dataset import DataSet from dsetiter import DataIterator -from itertools import izip, imap + class FTFile(object): def __init__(self, fname, scale=1, dtype=None): @@ -10,8 +14,17 @@ Tests: >>> f = FTFile('/data/lisa/data/nist/by_class/digits/digits_test_labels.ft') """ - self.file = open(fname, 'rb') - self.magic_t, self.elsize, _, self.dim, _ = _read_header(self.file, False) + if os.path.exists(fname): + self.file = open(fname, 'rb') + self.magic_t, self.elsize, _, self.dim, _ = _read_header(self.file, False) + self.gz=False + else: + import gzip + self.file = gzip.open(fname+'.gz','rb') + self.magic_t, self.elsize, _, self.dim, _ = _read_header(self.file.read(100), False, True) + self.file.seek(0) + self.gz=True + self.size = self.dim[0] self.scale = scale self.dtype = dtype @@ -81,7 +94,10 @@ num = self.size self.dim[0] = num self.size -= num - res = numpy.fromfile(self.file, dtype=self.magic_t, count=_prod(self.dim)).reshape(self.dim) + if self.gz: + res = numpy.fromstring(self.file.read(), dtype=self.magic_t, count=_prod(self.dim)).reshape(self.dim) + else: + res = numpy.fromfile(self.file, dtype=self.magic_t, count=_prod(self.dim)).reshape(self.dim) if self.dtype is not None: res = res.astype(self.dtype) if self.scale != 1: