view python/baselex.py @ 334:6f4753202b9a

Added more recipes
author Windel Bouwman
date Thu, 13 Feb 2014 22:02:08 +0100
parents 8d07a4254f04
children 818be710e13d
line wrap: on
line source


import re
from ppci import Token

def tokenize(tok_spec, txt):
    tok_re = '|'.join('(?P<{}>{})'.format(pair[0], pair[1]) for pair in tok_spec)
    gettok = re.compile(tok_re).match
    func_map = {pair[0]: pair[2] for pair in tok_spec}

    # Parse line:
    line = txt
    mo = gettok(line)
    pos = 0
    while mo:
        typ = mo.lastgroup
        val = mo.group(typ)
        func = func_map[typ]
        if func:
            typ, val = func(typ, val)
            yield Token(typ, val)
        pos = mo.end()
        mo = gettok(line, pos)
    if len(line) != pos:
        raise ParserException('Lex fault at {}'.format(line[pos:]))