Mercurial > pylearn
comparison featuremap.py @ 356:18702ceb2096
Added more functions
author | Joseph Turian <turian@iro.umontreal.ca> |
---|---|
date | Thu, 19 Jun 2008 16:18:37 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
355:430c9e92cd23 | 356:18702ceb2096 |
---|---|
1 """ | |
2 Feature mapping. | |
3 | |
4 A feature map is idenfied by a unique name, e.g. "parsing features, experiment 35". | |
5 This unique name also determines the name of the on-disk version of the feature map. | |
6 | |
7 @todo: This should be rewritten to be more Pythonic. Perhaps use a class? | |
8 @todo: Maybe look at older C++ Id/Vocab code? Id could have a __str__ method | |
9 @todo: Clearer documentation. | |
10 @todo: Create an fmap directory | |
11 @todo: Use cPickle, not pickle | |
12 | |
13 @todo: Autosynchronize mode: Each time a new entry is added | |
14 to a L{FeatureMap}, the on-disk version of the feature map is | |
15 updated. Alternately, synchronize to disk when the object is destroyed. | |
16 """ | |
17 | |
18 from common import myopen | |
19 import pickle | |
20 | |
21 # We want this map to be a singleton | |
22 name_to_fmap = {} | |
23 | |
24 def get(name=None, synchronize=True): | |
25 """ | |
26 Get the L{FeatureMap} for a particular feature name. | |
27 """ | |
28 global name_to_fmap | |
29 if name not in name_to_fmap: | |
30 # Create a new L{FeatureMap} | |
31 name_to_fmap[name] = FeatureMap(name, synchronize) | |
32 fmap = name_to_fmap[name] | |
33 assert fmap.name == name | |
34 assert fmap.synchronize == synchronize | |
35 return fmap | |
36 | |
37 def free_memory(): | |
38 """ | |
39 Free the memory associated with all feature maps. | |
40 """ | |
41 global name_to_fmap | |
42 name_to_fmap = {} | |
43 | |
44 class KeyError(Exception): | |
45 """Exception raised for keys missing from a readonly FeatureMap | |
46 Attributes: | |
47 name -- Name of the FeatureMap raising the error. | |
48 key -- Key not present. | |
49 """ | |
50 def __init__(self, name, key): | |
51 self.name = name | |
52 self.key = key | |
53 | |
54 | |
55 class FeatureMap: | |
56 """ | |
57 Map from a feature string to a numerial ID (starting from 0). | |
58 | |
59 If synchronize is False, the feature map is considered temporary | |
60 and we never actually synchronize it with disk. It expires with the | |
61 lifetime of this execution. | |
62 | |
63 @warning: Do not construct this directly. Instead, use the global get() method. | |
64 @todo: More documentation | |
65 """ | |
66 | |
67 # name = None | |
68 # synchronize = True | |
69 # map = {} | |
70 # readonly = False # If True, then each time we look for an ID | |
71 # that is not present we throw a ValueError | |
72 def __init__(self, name=None, synchronize=True): | |
73 self.name = name | |
74 self.synchronize = synchronize | |
75 self.map = {} | |
76 self.reverse_map = {} | |
77 self.readonly = False | |
78 | |
79 # There must be a name provided, or we cannot perform synchronization | |
80 assert self.name or not self.synchronize | |
81 | |
82 if self.synchronize: | |
83 # Try loading map from disk | |
84 self.load() | |
85 | |
86 def exists(self, str): | |
87 """ Return True iff this str is in the map """ | |
88 return str in self.map | |
89 | |
90 def id(self, str): | |
91 """ Get the ID for this string. Add a new ID if not is available """ | |
92 """ @todo: Don't want to synchronize every add, this may be too slow. """ | |
93 if str not in self.map: | |
94 if self.readonly: raise KeyError(self.name, str) | |
95 l = self.len | |
96 self.map[str] = l | |
97 self.reverse_map[l] = str | |
98 assert l+1 == self.len | |
99 return l | |
100 else: return self.map[str] | |
101 | |
102 def str(self, id): | |
103 """ Get the string for this ID. """ | |
104 return self.reverse_map[id] | |
105 | |
106 # This next function should just convert a list to a list | |
107 # def ids(self, lst): | |
108 # """ Get the IDs for the elements of a list. Return the ID numbers of these keys as a map. """ | |
109 # idset = {} | |
110 # for k in lst: | |
111 # try: | |
112 # idset[self.id(k)] = True | |
113 # except KeyError, e: | |
114 # print "Feature map '%s' does not contain key '%s'. Skipping..." % (e.name, e.key) | |
115 # return idset | |
116 | |
117 len = property(lambda self: len(self.map), doc="Number of different feature IDs") | |
118 filename = property(lambda self: "fmap.%s.pkl.gz" % self.name, doc="The on-disk file synchronized to this feature map.") | |
119 | |
120 def load(self): | |
121 """ Load the map from disk. """ | |
122 assert self.synchronize | |
123 try: | |
124 f = myopen(self.filename, "rb") | |
125 (self.map, self.reverse_map) = pickle.load(f) | |
126 except IOError: print "Could not open %s" % self.filename | |
127 | |
128 def dump(self): | |
129 """ Dump the map to disk. """ | |
130 assert self.synchronize | |
131 f = myopen(self.filename, "wb") | |
132 pickle.dump((self.map, self.reverse_map), f) |