lcfOS: python/ppci/c3/lexer.py comparison

comparison python/ppci/c3/lexer.py @ 396:fb3c1f029b30

Added baselexer into c3 lexer

author	Windel Bouwman
date	Tue, 27 May 2014 22:19:32 +0200
parents	6ae782a085e0
children	5d03c10fe19d

comparison

equal deleted inserted replaced

-:3b0c495e3008
+:fb3c1f029b30
 import re
-from ppci import CompilerError, SourceLocation, Token
+from ppci import CompilerError, SourceLocation, Token, make_num
 from baselex import BaseLexer
 """
 Lexical analyzer part. Splits the input character stream into tokens.
 """
 'function', 'var', 'type', 'const',
 'struct', 'cast', 'sizeof',
 'import', 'module']
-class Lexer:
+class Lexer(BaseLexer):
 """ Generates a sequence of token from an input stream """
 def __init__(self, diag):
 self.diag = diag
+tok_spec = [
+('REAL', r'\d+\.\d+', lambda typ, val: (typ, float(val))),
+('HEXNUMBER', r'0x[\da-fA-F]+', lambda typ, val: ('NUMBER', make_num(val))),
+('NUMBER', r'\d+', lambda typ, val: (typ, int(val))),
+('ID', r'[A-Za-z][A-Za-z\d_]*', self.handle_id),
+('NEWLINE', r'\n', lambda typ, val: self.newline()),
+('SKIP', r'[ \t]', None),
+('COMMENTS', r'//.*', None),
+('LONGCOMMENTBEGIN', r'\/\*', self.handle_comment_start),
+('LONGCOMMENTEND', r'\*\/', self.handle_comment_stop),
+('LEESTEKEN', r'==|->|<<|>>|!=|\+\+|[\.,=:;\-+*\[\]/\(\)]|>=|<=|<>|>|<|{|}|&|\^|\|', lambda typ, val: (val, val)),
+('STRING', r'".*?"', lambda typ, val: (typ, val[1:-1]))
+]
+super().__init__(tok_spec)
-def lex(self, source):
+def lex(self, input_file):
-return self.tokenize(source)
-def tokenize(self, input_file):
-"""
-Tokenizer, generates an iterator that
-returns tokens!
-Input is a file like object.
-This GREAT example was taken from python re doc page!
-"""
 filename = input_file.name if hasattr(input_file, 'name') else ''
 s = input_file.read()
 input_file.close()
 self.diag.addSource(filename, s)
-tok_spec = [
+self.filename = filename
-('REAL', r'\d+\.\d+'),
+return self.tokenize(s)
-('HEXNUMBER', r'0x[\da-fA-F]+'),
-('NUMBER', r'\d+'),
+def handle_comment_start(self, typ, val):
-('ID', r'[A-Za-z][A-Za-z\d_]*'),
+self.incomment = True
-('NEWLINE', r'\n'),
-('SKIP', r'[ \t]'),
+def handle_comment_stop(self, typ, val):
-('COMMENTS', r'//.*'),
+self.incomment = False
-('LONGCOMMENTBEGIN', r'\/\*'),
-('LONGCOMMENTEND', r'\*\/'),
+def tokenize(self, text):
-('LEESTEKEN', r'==|->|<<|>>|!=|\+\+|[\.,=:;\-+*\[\]/\(\)]|>=|<=|<>|>|<|{|}|&|\^|\|'),
+""" Keeps track of the long comments """
-('STRING', r'".*?"')
+self.incomment = False
-]
+for token in super().tokenize(text):
-tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
+if self.incomment:
-gettok = re.compile(tok_re).match
-line = 1
-pos = line_start = 0
-mo = gettok(s)
-incomment = False
-while mo is not None:
-typ = mo.lastgroup
-val = mo.group(typ)
-if typ == 'NEWLINE':
-line_start = pos
-line += 1
-elif typ == 'COMMENTS':
-pass
-elif typ == 'LONGCOMMENTBEGIN':
-incomment = True
-elif typ == 'LONGCOMMENTEND':
-incomment = False
-elif typ == 'SKIP':
-pass
-elif incomment:
 pass    # Wait until we are not in a comment section
 else:
-if typ == 'ID':
+yield token
-if val in keywords:
+loc = SourceLocation(self.filename, self.line, 0, 0)
-typ = val
+yield Token('EOF', 'EOF', loc)
-elif typ == 'LEESTEKEN':
-typ = val
+def handle_id(self, typ, val):
-elif typ == 'NUMBER':
+if val in keywords:
-val = int(val)
+typ = val
-elif typ == 'HEXNUMBER':
+return typ, val
-val = int(val[2:], 16)
-typ = 'NUMBER'
-elif typ == 'REAL':
-val = float(val)
-elif typ == 'STRING':
-val = val[1:-1]
-loc = SourceLocation(filename, line, mo.start() - line_start,
-mo.end() - mo.start())
-yield Token(typ, val, loc)
-pos = mo.end()
-mo = gettok(s, pos)
-if pos != len(s):
-col = pos - line_start
-loc = SourceLocation(filename, line, col, 1)
-raise CompilerError('Unexpected: "{0}"'.format(s[pos]), loc)
-loc = SourceLocation(filename, line, 0, 0)
-yield Token('END', '', loc)

Mercurial > lcfOS

comparison python/ppci/c3/lexer.py @ 396:fb3c1f029b30