comparison python/pylex.py @ 178:c694ec551f34

Added lex yacc test scripts
author Windel Bouwman
date Sat, 04 May 2013 12:07:17 +0200
parents
children
comparison
equal deleted inserted replaced
177:460db5669efa 178:c694ec551f34
1 import collections
2
3 """ Simple script to try fsa construction """
4 # 0. possible characters:
5 chars = ['a', 'b', 'c', 'd']
6 # 1. pattern definitions:
7 P1 = "aab"
8 P2 = "abbad"
9 P3 = "caababbac"
10 patterns = [P1, P2, P3]
11 # 2. input string:
12 txt = "ababaabaaabaabaacabbadbababaabbaab"
13
14 # Generator functions to generate states and transitions:
15 class Item:
16 def __init__(self, regex, dotpos):
17 self.regex = regex
18 self.dotpos = dotpos
19 def __eq__(self, other):
20 if type(self) is type(other):
21 return self.regex == other.regex and self.dotpos == other.dotpos
22 return False
23 def __repr__(self):
24 return '{0} _dot_ {1}'.format(self.regex[0:self.dotpos], self.regex[self.dotpos:])
25 def __hash__(self):
26 return (self.regex, self.dotpos).__hash__()
27 def matches(self, ch):
28 if self.dotpos < len(self.regex):
29 if self.regex[self.dotpos] == ch:
30 return True
31 return False
32 @property
33 def IsReduce(self):
34 return self.dotpos == len(self.regex)
35
36 def InitialItemSet():
37 s = set()
38 for p in patterns:
39 s.add(Item(p, 0))
40 return frozenset(s)
41
42 def NextItemSet(its, ch):
43 nis = set()
44 for item in its:
45 if item.matches(ch):
46 nis.add(Item(item.regex, item.dotpos + 1))
47 return frozenset(nis)
48
49 def ClassOfToken(its):
50 for i in its:
51 if i.IsReduce:
52 return i.regex
53 return None
54
55 sw = frozenset() # Empty set
56 states = set()
57 transitions = {}
58 s0 = InitialItemSet()
59
60 def preCalc():
61 states.add(s0)
62 worklist = [s0]
63 while len(worklist) > 0:
64 its = worklist.pop(0)
65 for ch in chars:
66 nis = NextItemSet(its, ch)
67 if nis and not nis in states:
68 states.add(nis)
69 worklist.append(nis)
70 transitions[(its, ch)] = nis
71
72 preCalc()
73 print(len(states), states)
74 print(len(transitions), transitions)
75
76 # Lex function:
77 Token = collections.namedtuple('Token', 'pos len name')
78
79 def getNextToken():
80 readIndex = 0
81 while True:
82 startTok = readIndex
83 endTok = None
84 classTok = None
85 itemSet = s0
86 while itemSet:
87 ch = txt[readIndex]
88 # Lookup next itemset:
89 if (itemSet, ch) in transitions:
90 itemSet = transitions[(itemSet, ch)]
91 else:
92 itemSet = sw
93 # check what we recognized:
94 ct = ClassOfToken(itemSet)
95 if ct:
96 classTok = ct
97 endTok = readIndex
98 readIndex += 1
99 print('restart')
100 if classTok:
101 token = Token(startTok, endTok - startTok, classTok)
102 readIndex = endTok + 1
103 else:
104 readIndex = startTok + 1
105 token = None
106 yield token
107
108 for tok in getNextToken():
109 print(tok)
110 if tok:
111 print(txt)
112 print(' ' * tok.pos + tok.name)
113