pylearn: filetensor.py comparison

comparison filetensor.py @ 248:82ba488b2c24

polished filetensor a little

author	James Bergstra <bergstrj@iro.umontreal.ca>
date	Tue, 03 Jun 2008 13:14:45 -0400
parents	2b6656b2ef52
children	040cb796f4e0

comparison

equal deleted inserted replaced

-:c702abb7f875
+:82ba488b2c24
 - for vector: rank=1, dimensions = [?, 1, 1]
 - for matrix: rank=2, dimensions = [?, ?, 1]
 For rank >= 3, the number of dimensions matches the rank exactly.
+@todo: add complex type support
 """
 import sys
 import numpy
-def prod(lst):
+def _prod(lst):
 p = 1
 for l in lst:
 p *= l
 return p
 _magic_dtype = {
 0x1E3D4C51 : ('float32', 4),
-0x1E3D4C52 : ('packed matrix', 0), #what is a packed matrix?
+#0x1E3D4C52 : ('packed matrix', 0), #what is a packed matrix?
 0x1E3D4C53 : ('float64', 8),
 0x1E3D4C54 : ('int32', 4),
 0x1E3D4C55 : ('uint8', 1),
 0x1E3D4C56 : ('int16', 2),
 }
 _dtype_magic = {
 'float32': 0x1E3D4C51,
-'packed matrix': 0x1E3D4C52,
+#'packed matrix': 0x1E3D4C52,
 'float64': 0x1E3D4C53,
 'int32': 0x1E3D4C54,
 'uint8': 0x1E3D4C55,
 'int16': 0x1E3D4C56
 }
-def _unused():
-f.seek(0,2) #seek to end
-f_len =  f.tell()
-f.seek(f_data_start,0) #seek back to where we were
-if debug: print 'length:', f_len
-f_data_bytes = (f_len - f_data_start)
-if debug: print 'data bytes according to header: ', dim_size * elsize
-if debug: print 'data bytes according to file  : ', f_data_bytes
-if debug: print 'reading data...'
-sys.stdout.flush()
-def _write_int32(f, i):
-i_array = numpy.asarray(i, dtype='int32')
-if 0: print 'writing int32', i, i_array
-i_array.tofile(f)
-def _read_int32(f):
-s = f.read(4)
-s_array = numpy.fromstring(s, dtype='int32')
-return s_array.item()
-def read_ndarray(f, dim, dtype):
-return numpy.fromfile(f, dtype=dtype, count=prod(dim)).reshape(dim)
 #
 # TODO: implement item selection:
 #  e.g. load('some mat', subtensor=(:6, 2:5))
 #
 Support for subtensors is currently spotty, so check the code to see if your
 particular type of subtensor is supported.
 """
+def _read_int32(f):
+s = f.read(4)
+s_array = numpy.fromstring(s, dtype='int32')
+return s_array.item()
 if isinstance(f, str):
 if debug: print 'f', f
 f = file(f, 'r')
 ndim = _read_int32(f)
 if debug: print 'header ndim', ndim
 #what are the dimensions of the tensor?
 dim = numpy.fromfile(f, dtype='int32', count=max(ndim,3))[:ndim]
-dim_size = prod(dim)
+dim_size = _prod(dim)
 if debug: print 'header dim', dim, dim_size
 rval = None
 if subtensor is None:
-rval = read_ndarray(f, dim, magic_t)
+rval = numpy.fromfile(f, dtype=magic_t, count=_prod(dim)).reshape(dim)
 elif isinstance(subtensor, slice):
 if subtensor.step not in (None, 1):
 raise NotImplementedError('slice with step', subtensor.step)
 if subtensor.start not in (None, 0):
-bytes_per_row = prod(dim[1:]) * elsize
+bytes_per_row = _prod(dim[1:]) * elsize
 raise NotImplementedError('slice with start', subtensor.start)
 dim[0] = min(dim[0], subtensor.stop)
-rval = read_ndarray(f, dim, magic_t)
+rval = numpy.fromfile(f, dtype=magic_t, count=_prod(dim)).reshape(dim)
 else:
 raise NotImplementedError('subtensor access not written yet:', subtensor)
 return rval
 def write(f, mat):
+"""Write a numpy.ndarray to file.
+If 'f' is a string, then it will be interpreted as a filename. This filename
+will be opened in 'w+' mode, and (automatically) closed at the end of the function.
+"""
+def _write_int32(f, i):
+i_array = numpy.asarray(i, dtype='int32')
+if 0: print 'writing int32', i, i_array
+i_array.tofile(f)
 if isinstance(f, str):
-f = file(f, 'w')
+f = file(f, 'w+')
-_write_int32(f, _dtype_magic[str(mat.dtype)])
+try:
+_write_int32(f, _dtype_magic[str(mat.dtype)])
+except KeyError:
+raise TypeError('Invalid ndarray dtype for filetensor format', mat.dtype)
 _write_int32(f, len(mat.shape))
 shape = mat.shape
 if len(shape) < 3:
 shape = list(shape) + [1] * (3 - len(shape))
-print 'writing shape =', shape
+if 0: print 'writing shape =', shape
 for sh in shape:
 _write_int32(f, sh)
 mat.tofile(f)
-if __name__ == '__main__':
-#a small test script, starts by reading sys.argv[1]
-rval = read(sys.argv[1], None, debug=True) #load from filename
-print 'rval', rval.shape, rval.size
-if 0:
-f = file('/tmp/some_mat', 'w');
-write(f, rval)
-print ''
-f.close()
-f = file('/tmp/some_mat', 'r');
-rval2 = read(f) #load from file handle
-print 'rval2', rval2.shape, rval2.size
-assert rval.dtype == rval2.dtype
-assert rval.shape == rval2.shape
-assert numpy.all(rval == rval2)
-print 'ok'

Mercurial > pylearn

comparison filetensor.py @ 248:82ba488b2c24