Mercurial > ift6266
comparison datasets/ftfile.py @ 614:212b142dcfc8
reverted previous change as it did not work.
author | Frederic Bastien <nouiz@nouiz.org> |
---|---|
date | Thu, 06 Jan 2011 14:23:41 -0500 |
parents | 5e481b224117 |
children | 337253b82409 |
comparison
equal
deleted
inserted
replaced
613:5e481b224117 | 614:212b142dcfc8 |
---|---|
1 from itertools import izip | |
2 import os | |
3 | |
4 import numpy | |
5 from pylearn.io.filetensor import _read_header, _prod | 1 from pylearn.io.filetensor import _read_header, _prod |
6 | 2 import numpy, theano |
7 from dataset import DataSet | 3 from dataset import DataSet |
8 from dsetiter import DataIterator | 4 from dsetiter import DataIterator |
9 | 5 from itertools import izip, imap |
10 | 6 |
11 class FTFile(object): | 7 class FTFile(object): |
12 def __init__(self, fname, scale=1, dtype=None): | 8 def __init__(self, fname, scale=1, dtype=None): |
13 r""" | 9 r""" |
14 Tests: | 10 Tests: |
15 >>> f = FTFile('/data/lisa/data/nist/by_class/digits/digits_test_labels.ft') | 11 >>> f = FTFile('/data/lisa/data/nist/by_class/digits/digits_test_labels.ft') |
16 """ | 12 """ |
17 if os.path.exists(fname): | 13 self.file = open(fname, 'rb') |
18 self.file = open(fname, 'rb') | 14 self.magic_t, self.elsize, _, self.dim, _ = _read_header(self.file, False) |
19 self.magic_t, self.elsize, _, self.dim, _ = _read_header(self.file, False) | |
20 self.gz=False | |
21 else: | |
22 import gzip | |
23 self.file = gzip.open(fname+'.gz','rb') | |
24 self.magic_t, self.elsize, _, self.dim, _ = _read_header(self.file.read(100), False, True) | |
25 self.file.seek(0) | |
26 self.gz=True | |
27 | |
28 self.size = self.dim[0] | 15 self.size = self.dim[0] |
29 self.scale = scale | 16 self.scale = scale |
30 self.dtype = dtype | 17 self.dtype = dtype |
31 | 18 |
32 def skip(self, num): | 19 def skip(self, num): |
92 """ | 79 """ |
93 if num > self.size: | 80 if num > self.size: |
94 num = self.size | 81 num = self.size |
95 self.dim[0] = num | 82 self.dim[0] = num |
96 self.size -= num | 83 self.size -= num |
97 if self.gz: | 84 res = numpy.fromfile(self.file, dtype=self.magic_t, count=_prod(self.dim)).reshape(self.dim) |
98 res = numpy.fromstring(self.file.read(), dtype=self.magic_t, count=_prod(self.dim)).reshape(self.dim) | |
99 else: | |
100 res = numpy.fromfile(self.file, dtype=self.magic_t, count=_prod(self.dim)).reshape(self.dim) | |
101 if self.dtype is not None: | 85 if self.dtype is not None: |
102 res = res.astype(self.dtype) | 86 res = res.astype(self.dtype) |
103 if self.scale != 1: | 87 if self.scale != 1: |
104 res /= self.scale | 88 res /= self.scale |
105 return res | 89 return res |