Mercurial > lcfOS
diff python/c3/lexer.py @ 287:1c7c1e619be8
File movage
author | Windel Bouwman |
---|---|
date | Thu, 21 Nov 2013 11:57:27 +0100 |
parents | 05184b95fa16 |
children | a747a45dcd78 |
line wrap: on
line diff
--- a/python/c3/lexer.py Fri Nov 15 13:52:32 2013 +0100 +++ b/python/c3/lexer.py Thu Nov 21 11:57:27 2013 +0100 @@ -12,14 +12,18 @@ 'struct', 'cast', \ 'import', 'module' ] -def tokenize(s): - """ +def tokenize(input_file): + """ Tokenizer, generates an iterator that returns tokens! + Input is a file like object. + This GREAT example was taken from python re doc page! - """ - tok_spec = [ + """ + filename = input_file.name if hasattr(input_file, 'name') else '' + s = input_file.read() + tok_spec = [ ('REAL', r'\d+\.\d+'), ('HEXNUMBER', r'0x[\da-fA-F]+'), ('NUMBER', r'\d+'), @@ -32,13 +36,13 @@ ('LEESTEKEN', r'==|->|<<|>>|[\.,=:;\-+*\[\]/\(\)]|>=|<=|<>|>|<|{|}|&|\^|\|'), ('STRING', r"'.*?'") ] - tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) - gettok = re.compile(tok_re).match - line = 1 - pos = line_start = 0 - mo = gettok(s) - incomment = False - while mo is not None: + tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) + gettok = re.compile(tok_re).match + line = 1 + pos = line_start = 0 + mo = gettok(s) + incomment = False + while mo is not None: typ = mo.lastgroup val = mo.group(typ) if typ == 'NEWLINE': @@ -69,14 +73,14 @@ val = float(val) elif typ == 'STRING': val = val[1:-1] - loc = SourceLocation(line, mo.start()-line_start, mo.end() - mo.start()) + loc = SourceLocation(filename, line, mo.start() - line_start, mo.end() - mo.start()) yield Token(typ, val, loc) pos = mo.end() mo = gettok(s, pos) - if pos != len(s): - col = pos - line_start - loc = SourceLocation(line, col, 1) - raise CompilerError('Unexpected character "{0}"'.format(s[pos]), loc) - loc = SourceLocation(line, 0, 0) - yield Token('END', '', loc) + if pos != len(s): + col = pos - line_start + loc = SourceLocation(filename, line, col, 1) + raise CompilerError('Unexpected character "{0}"'.format(s[pos]), loc) + loc = SourceLocation(filename, line, 0, 0) + yield Token('END', '', loc)