Mercurial > pylearn
changeset 380:c2f17f231960
added function to load amat file
author | Frederic Bastien <bastienf@iro.umontreal.ca> |
---|---|
date | Wed, 09 Jul 2008 16:55:27 -0400 |
parents | 74b402b5a81b |
children | b9f545594207 |
files | pmat.py |
diffstat | 1 files changed, 56 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/pmat.py Mon Jul 07 12:27:06 2008 -0400 +++ b/pmat.py Wed Jul 09 16:55:27 2008 -0400 @@ -36,6 +36,7 @@ #import numarray, sys, os, os.path import numpy.numarray, sys, os, os.path +import fpconst def array_columns( a, cols ): indices = None @@ -97,6 +98,18 @@ return dataset.ArrayDataSet(a,lookup_list.LookupList(fieldnames,[x for x in range(a.shape[1])])) +def load_amat_as_array_dataset(fname): + import dataset,lookup_list + + #load the amat as array + (a,fieldnames)=readAMat(fname) + + #load the fieldnames + if len(fieldnames)==0: + self.fieldnames = [ "field_"+str(i) for i in range(a.shape[1]) ] + + return dataset.ArrayDataSet(a,lookup_list.LookupList(fieldnames,[x for x in range(a.shape[1])])) + def save_array_dataset_as_pmat(fname,ds): ar=ds.data save_array_as_pmat(fname,ar,ds.fieldNames()) @@ -440,6 +453,49 @@ def __len__(self): return self.length + + +#copied from PLEARNDIR:python_modules/plearn/vmat/readAMat.py +def safefloat(str): + """Convert the given string to its float value. It is 'safe' in the sense + that missing values ('nan') will be properly converted to the corresponding + float value under all platforms, contrarily to 'float(str)'. + """ + if str.lower() == 'nan': + return fpconst.NaN + else: + return float(str) + +#copied from PLEARNDIR:python_modules/plearn/vmat/readAMat.py +def readAMat(amatname): + """Read a PLearn .amat file and return it as a numarray Array. + + Return a tuple, with as the first argument the array itself, and as + the second argument the fieldnames (list of strings). + """ + ### NOTE: this version is much faster than first creating the array and + ### updating each row as it is read... Bizarrely enough + f = open(amatname) + a = [] + fieldnames = [] + for line in f: + if line.startswith("#size:"): + (length,width) = line[6:].strip().split() + elif line.startswith("#sizes:"): # ignore input/target/weight/extra sizes + continue + + elif line.startswith("#:"): + fieldnames = line[2:].strip().split() + pass + elif not line.startswith('#'): + # Add all non-comment lines. + row = [ safefloat(x) for x in line.strip().split() ] + if row: + a.append(row) + + f.close() + return numpy.numarray.array(a), fieldnames + if __name__ == '__main__': pmat = PMat( 'tmp.pmat', 'w', fieldnames=['F1', 'F2'] )