view python/baselex.py @ 357:818be710e13d

Added acceptance function to burg
author Windel Bouwman
date Fri, 14 Mar 2014 15:14:29 +0100
parents 8d07a4254f04
children 0c44e494ef58
line wrap: on
line source


import re
from ppci import Token, CompilerError

def tokenize(tok_spec, txt):
    tok_re = '|'.join('(?P<{}>{})'.format(pair[0], pair[1]) for pair in tok_spec)
    gettok = re.compile(tok_re).match
    func_map = {pair[0]: pair[2] for pair in tok_spec}

    # Parse line:
    line = txt
    mo = gettok(line)
    pos = 0
    while mo:
        typ = mo.lastgroup
        val = mo.group(typ)
        func = func_map[typ]
        if func:
            typ, val = func(typ, val)
            yield Token(typ, val)
        pos = mo.end()
        mo = gettok(line, pos)
    if len(line) != pos:
        raise CompilerError('Lex fault at {}'.format(line[pos:]))