comparison datasets/ftfile.py @ 614:212b142dcfc8

reverted previous change as it did not work.
author Frederic Bastien <nouiz@nouiz.org>
date Thu, 06 Jan 2011 14:23:41 -0500
parents 5e481b224117
children 337253b82409
comparison
equal deleted inserted replaced
613:5e481b224117 614:212b142dcfc8
1 from itertools import izip
2 import os
3
4 import numpy
5 from pylearn.io.filetensor import _read_header, _prod 1 from pylearn.io.filetensor import _read_header, _prod
6 2 import numpy, theano
7 from dataset import DataSet 3 from dataset import DataSet
8 from dsetiter import DataIterator 4 from dsetiter import DataIterator
9 5 from itertools import izip, imap
10 6
11 class FTFile(object): 7 class FTFile(object):
12 def __init__(self, fname, scale=1, dtype=None): 8 def __init__(self, fname, scale=1, dtype=None):
13 r""" 9 r"""
14 Tests: 10 Tests:
15 >>> f = FTFile('/data/lisa/data/nist/by_class/digits/digits_test_labels.ft') 11 >>> f = FTFile('/data/lisa/data/nist/by_class/digits/digits_test_labels.ft')
16 """ 12 """
17 if os.path.exists(fname): 13 self.file = open(fname, 'rb')
18 self.file = open(fname, 'rb') 14 self.magic_t, self.elsize, _, self.dim, _ = _read_header(self.file, False)
19 self.magic_t, self.elsize, _, self.dim, _ = _read_header(self.file, False)
20 self.gz=False
21 else:
22 import gzip
23 self.file = gzip.open(fname+'.gz','rb')
24 self.magic_t, self.elsize, _, self.dim, _ = _read_header(self.file.read(100), False, True)
25 self.file.seek(0)
26 self.gz=True
27
28 self.size = self.dim[0] 15 self.size = self.dim[0]
29 self.scale = scale 16 self.scale = scale
30 self.dtype = dtype 17 self.dtype = dtype
31 18
32 def skip(self, num): 19 def skip(self, num):
92 """ 79 """
93 if num > self.size: 80 if num > self.size:
94 num = self.size 81 num = self.size
95 self.dim[0] = num 82 self.dim[0] = num
96 self.size -= num 83 self.size -= num
97 if self.gz: 84 res = numpy.fromfile(self.file, dtype=self.magic_t, count=_prod(self.dim)).reshape(self.dim)
98 res = numpy.fromstring(self.file.read(), dtype=self.magic_t, count=_prod(self.dim)).reshape(self.dim)
99 else:
100 res = numpy.fromfile(self.file, dtype=self.magic_t, count=_prod(self.dim)).reshape(self.dim)
101 if self.dtype is not None: 85 if self.dtype is not None:
102 res = res.astype(self.dtype) 86 res = res.astype(self.dtype)
103 if self.scale != 1: 87 if self.scale != 1:
104 res /= self.scale 88 res /= self.scale
105 return res 89 return res