lcfOS: python/pylex.py comparison

comparison python/pylex.py @ 178:c694ec551f34

Added lex yacc test scripts

author	Windel Bouwman
date	Sat, 04 May 2013 12:07:17 +0200
parents
children

comparison

equal deleted inserted replaced

-:460db5669efa
+:c694ec551f34
+import collections
+""" Simple script to try fsa construction """
+# 0. possible characters:
+chars = ['a', 'b', 'c', 'd']
+# 1. pattern definitions:
+P1 = "aab"
+P2 = "abbad"
+P3 = "caababbac"
+patterns = [P1, P2, P3]
+# 2. input string:
+txt = "ababaabaaabaabaacabbadbababaabbaab"
+# Generator functions to generate states and transitions:
+class Item:
+def __init__(self, regex, dotpos):
+self.regex = regex
+self.dotpos = dotpos
+def __eq__(self, other):
+if type(self) is type(other):
+return self.regex == other.regex and self.dotpos == other.dotpos
+return False
+def __repr__(self):
+return '{0} _dot_ {1}'.format(self.regex[0:self.dotpos], self.regex[self.dotpos:])
+def __hash__(self):
+return (self.regex, self.dotpos).__hash__()
+def matches(self, ch):
+if self.dotpos < len(self.regex):
+if self.regex[self.dotpos] == ch:
+return True
+return False
+@property
+def IsReduce(self):
+return self.dotpos == len(self.regex)
+def InitialItemSet():
+s = set()
+for p in patterns:
+s.add(Item(p, 0))
+return frozenset(s)
+def NextItemSet(its, ch):
+nis = set()
+for item in its:
+if item.matches(ch):
+nis.add(Item(item.regex, item.dotpos + 1))
+return frozenset(nis)
+def ClassOfToken(its):
+for i in its:
+if i.IsReduce:
+return i.regex
+return None
+sw = frozenset() # Empty set
+states = set()
+transitions = {}
+s0 = InitialItemSet()
+def preCalc():
+states.add(s0)
+worklist = [s0]
+while len(worklist) > 0:
+its = worklist.pop(0)
+for ch in chars:
+nis = NextItemSet(its, ch)
+if nis and not nis in states:
+states.add(nis)
+worklist.append(nis)
+transitions[(its, ch)] = nis
+preCalc()
+print(len(states), states)
+print(len(transitions), transitions)
+# Lex function:
+Token = collections.namedtuple('Token', 'pos len name')
+def getNextToken():
+readIndex = 0
+while True:
+startTok = readIndex
+endTok = None
+classTok = None
+itemSet = s0
+while itemSet:
+ch = txt[readIndex]
+# Lookup next itemset:
+if (itemSet, ch) in transitions:
+itemSet = transitions[(itemSet, ch)]
+else:
+itemSet = sw
+# check what we recognized:
+ct = ClassOfToken(itemSet)
+if ct:
+classTok = ct
+endTok = readIndex
+readIndex += 1
+print('restart')
+if classTok:
+token = Token(startTok, endTok - startTok, classTok)
+readIndex = endTok + 1
+else:
+readIndex = startTok + 1
+token = None
+yield token
+for tok in getNextToken():
+print(tok)
+if tok:
+print(txt)
+print(' ' * tok.pos + tok.name)

Mercurial > lcfOS

comparison python/pylex.py @ 178:c694ec551f34