356
|
1 """
|
|
2 Feature mapping.
|
|
3
|
|
4 A feature map is idenfied by a unique name, e.g. "parsing features, experiment 35".
|
|
5 This unique name also determines the name of the on-disk version of the feature map.
|
|
6
|
|
7 @todo: This should be rewritten to be more Pythonic. Perhaps use a class?
|
|
8 @todo: Maybe look at older C++ Id/Vocab code? Id could have a __str__ method
|
|
9 @todo: Clearer documentation.
|
|
10 @todo: Create an fmap directory
|
|
11 @todo: Use cPickle, not pickle
|
|
12
|
|
13 @todo: Autosynchronize mode: Each time a new entry is added
|
|
14 to a L{FeatureMap}, the on-disk version of the feature map is
|
|
15 updated. Alternately, synchronize to disk when the object is destroyed.
|
|
16 """
|
|
17
|
|
18 from common import myopen
|
|
19 import pickle
|
|
20
|
|
21 # We want this map to be a singleton
|
|
22 name_to_fmap = {}
|
|
23
|
|
24 def get(name=None, synchronize=True):
|
|
25 """
|
|
26 Get the L{FeatureMap} for a particular feature name.
|
|
27 """
|
|
28 global name_to_fmap
|
|
29 if name not in name_to_fmap:
|
|
30 # Create a new L{FeatureMap}
|
|
31 name_to_fmap[name] = FeatureMap(name, synchronize)
|
|
32 fmap = name_to_fmap[name]
|
|
33 assert fmap.name == name
|
|
34 assert fmap.synchronize == synchronize
|
|
35 return fmap
|
|
36
|
|
37 def free_memory():
|
|
38 """
|
|
39 Free the memory associated with all feature maps.
|
|
40 """
|
|
41 global name_to_fmap
|
|
42 name_to_fmap = {}
|
|
43
|
|
44 class KeyError(Exception):
|
|
45 """Exception raised for keys missing from a readonly FeatureMap
|
|
46 Attributes:
|
|
47 name -- Name of the FeatureMap raising the error.
|
|
48 key -- Key not present.
|
|
49 """
|
|
50 def __init__(self, name, key):
|
|
51 self.name = name
|
|
52 self.key = key
|
|
53
|
|
54
|
|
55 class FeatureMap:
|
|
56 """
|
|
57 Map from a feature string to a numerial ID (starting from 0).
|
|
58
|
|
59 If synchronize is False, the feature map is considered temporary
|
|
60 and we never actually synchronize it with disk. It expires with the
|
|
61 lifetime of this execution.
|
|
62
|
|
63 @warning: Do not construct this directly. Instead, use the global get() method.
|
|
64 @todo: More documentation
|
|
65 """
|
|
66
|
|
67 # name = None
|
|
68 # synchronize = True
|
|
69 # map = {}
|
|
70 # readonly = False # If True, then each time we look for an ID
|
|
71 # that is not present we throw a ValueError
|
|
72 def __init__(self, name=None, synchronize=True):
|
|
73 self.name = name
|
|
74 self.synchronize = synchronize
|
|
75 self.map = {}
|
|
76 self.reverse_map = {}
|
|
77 self.readonly = False
|
|
78
|
|
79 # There must be a name provided, or we cannot perform synchronization
|
|
80 assert self.name or not self.synchronize
|
|
81
|
|
82 if self.synchronize:
|
|
83 # Try loading map from disk
|
|
84 self.load()
|
|
85
|
|
86 def exists(self, str):
|
|
87 """ Return True iff this str is in the map """
|
|
88 return str in self.map
|
|
89
|
|
90 def id(self, str):
|
|
91 """ Get the ID for this string. Add a new ID if not is available """
|
|
92 """ @todo: Don't want to synchronize every add, this may be too slow. """
|
|
93 if str not in self.map:
|
|
94 if self.readonly: raise KeyError(self.name, str)
|
|
95 l = self.len
|
|
96 self.map[str] = l
|
|
97 self.reverse_map[l] = str
|
|
98 assert l+1 == self.len
|
|
99 return l
|
|
100 else: return self.map[str]
|
|
101
|
|
102 def str(self, id):
|
|
103 """ Get the string for this ID. """
|
|
104 return self.reverse_map[id]
|
|
105
|
|
106 # This next function should just convert a list to a list
|
|
107 # def ids(self, lst):
|
|
108 # """ Get the IDs for the elements of a list. Return the ID numbers of these keys as a map. """
|
|
109 # idset = {}
|
|
110 # for k in lst:
|
|
111 # try:
|
|
112 # idset[self.id(k)] = True
|
|
113 # except KeyError, e:
|
|
114 # print "Feature map '%s' does not contain key '%s'. Skipping..." % (e.name, e.key)
|
|
115 # return idset
|
|
116
|
|
117 len = property(lambda self: len(self.map), doc="Number of different feature IDs")
|
|
118 filename = property(lambda self: "fmap.%s.pkl.gz" % self.name, doc="The on-disk file synchronized to this feature map.")
|
|
119
|
|
120 def load(self):
|
|
121 """ Load the map from disk. """
|
|
122 assert self.synchronize
|
|
123 try:
|
|
124 f = myopen(self.filename, "rb")
|
|
125 (self.map, self.reverse_map) = pickle.load(f)
|
|
126 except IOError: print "Could not open %s" % self.filename
|
|
127
|
|
128 def dump(self):
|
|
129 """ Dump the map to disk. """
|
|
130 assert self.synchronize
|
|
131 f = myopen(self.filename, "wb")
|
|
132 pickle.dump((self.map, self.reverse_map), f)
|