comparison featuremap.py @ 356:18702ceb2096

Added more functions
author Joseph Turian <turian@iro.umontreal.ca>
date Thu, 19 Jun 2008 16:18:37 -0400
parents
children
comparison
equal deleted inserted replaced
355:430c9e92cd23 356:18702ceb2096
1 """
2 Feature mapping.
3
4 A feature map is idenfied by a unique name, e.g. "parsing features, experiment 35".
5 This unique name also determines the name of the on-disk version of the feature map.
6
7 @todo: This should be rewritten to be more Pythonic. Perhaps use a class?
8 @todo: Maybe look at older C++ Id/Vocab code? Id could have a __str__ method
9 @todo: Clearer documentation.
10 @todo: Create an fmap directory
11 @todo: Use cPickle, not pickle
12
13 @todo: Autosynchronize mode: Each time a new entry is added
14 to a L{FeatureMap}, the on-disk version of the feature map is
15 updated. Alternately, synchronize to disk when the object is destroyed.
16 """
17
18 from common import myopen
19 import pickle
20
21 # We want this map to be a singleton
22 name_to_fmap = {}
23
24 def get(name=None, synchronize=True):
25 """
26 Get the L{FeatureMap} for a particular feature name.
27 """
28 global name_to_fmap
29 if name not in name_to_fmap:
30 # Create a new L{FeatureMap}
31 name_to_fmap[name] = FeatureMap(name, synchronize)
32 fmap = name_to_fmap[name]
33 assert fmap.name == name
34 assert fmap.synchronize == synchronize
35 return fmap
36
37 def free_memory():
38 """
39 Free the memory associated with all feature maps.
40 """
41 global name_to_fmap
42 name_to_fmap = {}
43
44 class KeyError(Exception):
45 """Exception raised for keys missing from a readonly FeatureMap
46 Attributes:
47 name -- Name of the FeatureMap raising the error.
48 key -- Key not present.
49 """
50 def __init__(self, name, key):
51 self.name = name
52 self.key = key
53
54
55 class FeatureMap:
56 """
57 Map from a feature string to a numerial ID (starting from 0).
58
59 If synchronize is False, the feature map is considered temporary
60 and we never actually synchronize it with disk. It expires with the
61 lifetime of this execution.
62
63 @warning: Do not construct this directly. Instead, use the global get() method.
64 @todo: More documentation
65 """
66
67 # name = None
68 # synchronize = True
69 # map = {}
70 # readonly = False # If True, then each time we look for an ID
71 # that is not present we throw a ValueError
72 def __init__(self, name=None, synchronize=True):
73 self.name = name
74 self.synchronize = synchronize
75 self.map = {}
76 self.reverse_map = {}
77 self.readonly = False
78
79 # There must be a name provided, or we cannot perform synchronization
80 assert self.name or not self.synchronize
81
82 if self.synchronize:
83 # Try loading map from disk
84 self.load()
85
86 def exists(self, str):
87 """ Return True iff this str is in the map """
88 return str in self.map
89
90 def id(self, str):
91 """ Get the ID for this string. Add a new ID if not is available """
92 """ @todo: Don't want to synchronize every add, this may be too slow. """
93 if str not in self.map:
94 if self.readonly: raise KeyError(self.name, str)
95 l = self.len
96 self.map[str] = l
97 self.reverse_map[l] = str
98 assert l+1 == self.len
99 return l
100 else: return self.map[str]
101
102 def str(self, id):
103 """ Get the string for this ID. """
104 return self.reverse_map[id]
105
106 # This next function should just convert a list to a list
107 # def ids(self, lst):
108 # """ Get the IDs for the elements of a list. Return the ID numbers of these keys as a map. """
109 # idset = {}
110 # for k in lst:
111 # try:
112 # idset[self.id(k)] = True
113 # except KeyError, e:
114 # print "Feature map '%s' does not contain key '%s'. Skipping..." % (e.name, e.key)
115 # return idset
116
117 len = property(lambda self: len(self.map), doc="Number of different feature IDs")
118 filename = property(lambda self: "fmap.%s.pkl.gz" % self.name, doc="The on-disk file synchronized to this feature map.")
119
120 def load(self):
121 """ Load the map from disk. """
122 assert self.synchronize
123 try:
124 f = myopen(self.filename, "rb")
125 (self.map, self.reverse_map) = pickle.load(f)
126 except IOError: print "Could not open %s" % self.filename
127
128 def dump(self):
129 """ Dump the map to disk. """
130 assert self.synchronize
131 f = myopen(self.filename, "wb")
132 pickle.dump((self.map, self.reverse_map), f)