Mercurial > lcfOS
comparison python/pylex.py @ 178:c694ec551f34
Added lex yacc test scripts
author | Windel Bouwman |
---|---|
date | Sat, 04 May 2013 12:07:17 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
177:460db5669efa | 178:c694ec551f34 |
---|---|
1 import collections | |
2 | |
3 """ Simple script to try fsa construction """ | |
4 # 0. possible characters: | |
5 chars = ['a', 'b', 'c', 'd'] | |
6 # 1. pattern definitions: | |
7 P1 = "aab" | |
8 P2 = "abbad" | |
9 P3 = "caababbac" | |
10 patterns = [P1, P2, P3] | |
11 # 2. input string: | |
12 txt = "ababaabaaabaabaacabbadbababaabbaab" | |
13 | |
14 # Generator functions to generate states and transitions: | |
15 class Item: | |
16 def __init__(self, regex, dotpos): | |
17 self.regex = regex | |
18 self.dotpos = dotpos | |
19 def __eq__(self, other): | |
20 if type(self) is type(other): | |
21 return self.regex == other.regex and self.dotpos == other.dotpos | |
22 return False | |
23 def __repr__(self): | |
24 return '{0} _dot_ {1}'.format(self.regex[0:self.dotpos], self.regex[self.dotpos:]) | |
25 def __hash__(self): | |
26 return (self.regex, self.dotpos).__hash__() | |
27 def matches(self, ch): | |
28 if self.dotpos < len(self.regex): | |
29 if self.regex[self.dotpos] == ch: | |
30 return True | |
31 return False | |
32 @property | |
33 def IsReduce(self): | |
34 return self.dotpos == len(self.regex) | |
35 | |
36 def InitialItemSet(): | |
37 s = set() | |
38 for p in patterns: | |
39 s.add(Item(p, 0)) | |
40 return frozenset(s) | |
41 | |
42 def NextItemSet(its, ch): | |
43 nis = set() | |
44 for item in its: | |
45 if item.matches(ch): | |
46 nis.add(Item(item.regex, item.dotpos + 1)) | |
47 return frozenset(nis) | |
48 | |
49 def ClassOfToken(its): | |
50 for i in its: | |
51 if i.IsReduce: | |
52 return i.regex | |
53 return None | |
54 | |
55 sw = frozenset() # Empty set | |
56 states = set() | |
57 transitions = {} | |
58 s0 = InitialItemSet() | |
59 | |
60 def preCalc(): | |
61 states.add(s0) | |
62 worklist = [s0] | |
63 while len(worklist) > 0: | |
64 its = worklist.pop(0) | |
65 for ch in chars: | |
66 nis = NextItemSet(its, ch) | |
67 if nis and not nis in states: | |
68 states.add(nis) | |
69 worklist.append(nis) | |
70 transitions[(its, ch)] = nis | |
71 | |
72 preCalc() | |
73 print(len(states), states) | |
74 print(len(transitions), transitions) | |
75 | |
76 # Lex function: | |
77 Token = collections.namedtuple('Token', 'pos len name') | |
78 | |
79 def getNextToken(): | |
80 readIndex = 0 | |
81 while True: | |
82 startTok = readIndex | |
83 endTok = None | |
84 classTok = None | |
85 itemSet = s0 | |
86 while itemSet: | |
87 ch = txt[readIndex] | |
88 # Lookup next itemset: | |
89 if (itemSet, ch) in transitions: | |
90 itemSet = transitions[(itemSet, ch)] | |
91 else: | |
92 itemSet = sw | |
93 # check what we recognized: | |
94 ct = ClassOfToken(itemSet) | |
95 if ct: | |
96 classTok = ct | |
97 endTok = readIndex | |
98 readIndex += 1 | |
99 print('restart') | |
100 if classTok: | |
101 token = Token(startTok, endTok - startTok, classTok) | |
102 readIndex = endTok + 1 | |
103 else: | |
104 readIndex = startTok + 1 | |
105 token = None | |
106 yield token | |
107 | |
108 for tok in getNextToken(): | |
109 print(tok) | |
110 if tok: | |
111 print(txt) | |
112 print(' ' * tok.pos + tok.name) | |
113 |