changeset 380:c2f17f231960

added function to load amat file
author Frederic Bastien <bastienf@iro.umontreal.ca>
date Wed, 09 Jul 2008 16:55:27 -0400
parents 74b402b5a81b
children b9f545594207
files pmat.py
diffstat 1 files changed, 56 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/pmat.py	Mon Jul 07 12:27:06 2008 -0400
+++ b/pmat.py	Wed Jul 09 16:55:27 2008 -0400
@@ -36,6 +36,7 @@
 
 #import numarray, sys, os, os.path
 import numpy.numarray, sys, os, os.path
+import fpconst
 
 def array_columns( a, cols ):
     indices = None
@@ -97,6 +98,18 @@
 
     return dataset.ArrayDataSet(a,lookup_list.LookupList(fieldnames,[x for x in range(a.shape[1])]))
 
+def load_amat_as_array_dataset(fname):
+    import dataset,lookup_list
+    
+    #load the amat as array
+    (a,fieldnames)=readAMat(fname)
+    
+    #load the fieldnames
+    if len(fieldnames)==0:
+        self.fieldnames = [ "field_"+str(i) for i in range(a.shape[1]) ]
+
+    return dataset.ArrayDataSet(a,lookup_list.LookupList(fieldnames,[x for x in range(a.shape[1])]))
+
 def save_array_dataset_as_pmat(fname,ds):
     ar=ds.data
     save_array_as_pmat(fname,ar,ds.fieldNames())
@@ -440,6 +453,49 @@
     def __len__(self):
         return self.length
 
+
+
+#copied from PLEARNDIR:python_modules/plearn/vmat/readAMat.py
+def safefloat(str):
+    """Convert the given string to its float value. It is 'safe' in the sense
+    that missing values ('nan') will be properly converted to the corresponding
+    float value under all platforms, contrarily to 'float(str)'.
+    """
+    if str.lower() == 'nan':
+        return fpconst.NaN
+    else:
+        return float(str)
+
+#copied from PLEARNDIR:python_modules/plearn/vmat/readAMat.py
+def readAMat(amatname):
+    """Read a PLearn .amat file and return it as a numarray Array.
+
+    Return a tuple, with as the first argument the array itself, and as
+    the second argument the fieldnames (list of strings).
+    """
+    ### NOTE: this version is much faster than first creating the array and
+    ### updating each row as it is read...  Bizarrely enough
+    f = open(amatname)
+    a = []
+    fieldnames = []
+    for line in f:
+        if line.startswith("#size:"):
+            (length,width) = line[6:].strip().split()
+        elif line.startswith("#sizes:"):  # ignore input/target/weight/extra sizes
+            continue
+
+        elif line.startswith("#:"):
+            fieldnames = line[2:].strip().split()
+            pass
+        elif not line.startswith('#'):
+            # Add all non-comment lines.
+            row = [ safefloat(x) for x in line.strip().split() ]
+            if row:
+                a.append(row)
+
+    f.close()
+    return numpy.numarray.array(a), fieldnames
+
             
 if __name__ == '__main__':
     pmat = PMat( 'tmp.pmat', 'w', fieldnames=['F1', 'F2'] )