Mercurial > lcfOS
diff python/pylex.py @ 178:c694ec551f34
Added lex yacc test scripts
author | Windel Bouwman |
---|---|
date | Sat, 04 May 2013 12:07:17 +0200 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/pylex.py Sat May 04 12:07:17 2013 +0200 @@ -0,0 +1,113 @@ +import collections + +""" Simple script to try fsa construction """ +# 0. possible characters: +chars = ['a', 'b', 'c', 'd'] +# 1. pattern definitions: +P1 = "aab" +P2 = "abbad" +P3 = "caababbac" +patterns = [P1, P2, P3] +# 2. input string: +txt = "ababaabaaabaabaacabbadbababaabbaab" + +# Generator functions to generate states and transitions: +class Item: + def __init__(self, regex, dotpos): + self.regex = regex + self.dotpos = dotpos + def __eq__(self, other): + if type(self) is type(other): + return self.regex == other.regex and self.dotpos == other.dotpos + return False + def __repr__(self): + return '{0} _dot_ {1}'.format(self.regex[0:self.dotpos], self.regex[self.dotpos:]) + def __hash__(self): + return (self.regex, self.dotpos).__hash__() + def matches(self, ch): + if self.dotpos < len(self.regex): + if self.regex[self.dotpos] == ch: + return True + return False + @property + def IsReduce(self): + return self.dotpos == len(self.regex) + +def InitialItemSet(): + s = set() + for p in patterns: + s.add(Item(p, 0)) + return frozenset(s) + +def NextItemSet(its, ch): + nis = set() + for item in its: + if item.matches(ch): + nis.add(Item(item.regex, item.dotpos + 1)) + return frozenset(nis) + +def ClassOfToken(its): + for i in its: + if i.IsReduce: + return i.regex + return None + +sw = frozenset() # Empty set +states = set() +transitions = {} +s0 = InitialItemSet() + +def preCalc(): + states.add(s0) + worklist = [s0] + while len(worklist) > 0: + its = worklist.pop(0) + for ch in chars: + nis = NextItemSet(its, ch) + if nis and not nis in states: + states.add(nis) + worklist.append(nis) + transitions[(its, ch)] = nis + +preCalc() +print(len(states), states) +print(len(transitions), transitions) + +# Lex function: +Token = collections.namedtuple('Token', 'pos len name') + +def getNextToken(): + readIndex = 0 + while True: + startTok = readIndex + endTok = None + classTok = None + itemSet = s0 + while itemSet: + ch = txt[readIndex] + # Lookup next itemset: + if (itemSet, ch) in transitions: + itemSet = transitions[(itemSet, ch)] + else: + itemSet = sw + # check what we recognized: + ct = ClassOfToken(itemSet) + if ct: + classTok = ct + endTok = readIndex + readIndex += 1 + print('restart') + if classTok: + token = Token(startTok, endTok - startTok, classTok) + readIndex = endTok + 1 + else: + readIndex = startTok + 1 + token = None + yield token + +for tok in getNextToken(): + print(tok) + if tok: + print(txt) + print(' ' * tok.pos + tok.name) +