diff python/c3/lexer.py @ 293:6aa721e7b10b

Try to improve build sequence
author Windel Bouwman
date Thu, 28 Nov 2013 20:39:37 +0100
parents a747a45dcd78
children
line wrap: on
line diff
--- a/python/c3/lexer.py	Wed Nov 27 08:06:42 2013 +0100
+++ b/python/c3/lexer.py	Thu Nov 28 20:39:37 2013 +0100
@@ -13,75 +13,81 @@
    'struct', 'cast',
    'import', 'module']
 
-def tokenize(input_file):
-    """
-       Tokenizer, generates an iterator that
-       returns tokens!
+
+class Lexer:
+    def __init__(self, diag):
+        self.diag = diag
 
-       Input is a file like object.
+    def tokenize(self, input_file):
+        """
+           Tokenizer, generates an iterator that
+           returns tokens!
+
+           Input is a file like object.
 
-       This GREAT example was taken from python re doc page!
-    """
-    filename = input_file.name if hasattr(input_file, 'name') else ''
-    s = input_file.read()
-    input_file.close()
-    tok_spec = [
-       ('REAL', r'\d+\.\d+'),
-       ('HEXNUMBER', r'0x[\da-fA-F]+'),
-       ('NUMBER', r'\d+'),
-       ('ID', r'[A-Za-z][A-Za-z\d_]*'),
-       ('NEWLINE', r'\n'),
-       ('SKIP', r'[ \t]'),
-       ('COMMENTS', r'//.*'),
-       ('LONGCOMMENTBEGIN', r'\/\*'),
-       ('LONGCOMMENTEND', r'\*\/'),
-       ('LEESTEKEN', r'==|->|<<|>>|[\.,=:;\-+*\[\]/\(\)]|>=|<=|<>|>|<|{|}|&|\^|\|'),
-       ('STRING', r"'.*?'")
-     ]
-    tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
-    gettok = re.compile(tok_re).match
-    line = 1
-    pos = line_start = 0
-    mo = gettok(s)
-    incomment = False
-    while mo is not None:
-        typ = mo.lastgroup
-        val = mo.group(typ)
-        if typ == 'NEWLINE':
-            line_start = pos
-            line += 1
-        elif typ == 'COMMENTS':
-            pass
-        elif typ == 'LONGCOMMENTBEGIN':
-            incomment = True
-        elif typ == 'LONGCOMMENTEND':
-            incomment = False
-        elif typ == 'SKIP':
-            pass
-        elif incomment:
-            pass # Wait until we are not in a comment section
-        else:
-            if typ == 'ID':
-                if val in keywords:
+           This GREAT example was taken from python re doc page!
+        """
+        filename = input_file.name if hasattr(input_file, 'name') else ''
+        s = input_file.read()
+        input_file.close()
+        self.diag.addSource(filename, s)
+        tok_spec = [
+           ('REAL', r'\d+\.\d+'),
+           ('HEXNUMBER', r'0x[\da-fA-F]+'),
+           ('NUMBER', r'\d+'),
+           ('ID', r'[A-Za-z][A-Za-z\d_]*'),
+           ('NEWLINE', r'\n'),
+           ('SKIP', r'[ \t]'),
+           ('COMMENTS', r'//.*'),
+           ('LONGCOMMENTBEGIN', r'\/\*'),
+           ('LONGCOMMENTEND', r'\*\/'),
+           ('LEESTEKEN', r'==|->|<<|>>|[\.,=:;\-+*\[\]/\(\)]|>=|<=|<>|>|<|{|}|&|\^|\|'),
+           ('STRING', r"'.*?'")
+         ]
+        tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
+        gettok = re.compile(tok_re).match
+        line = 1
+        pos = line_start = 0
+        mo = gettok(s)
+        incomment = False
+        while mo is not None:
+            typ = mo.lastgroup
+            val = mo.group(typ)
+            if typ == 'NEWLINE':
+                line_start = pos
+                line += 1
+            elif typ == 'COMMENTS':
+                pass
+            elif typ == 'LONGCOMMENTBEGIN':
+                incomment = True
+            elif typ == 'LONGCOMMENTEND':
+                incomment = False
+            elif typ == 'SKIP':
+                pass
+            elif incomment:
+                pass # Wait until we are not in a comment section
+            else:
+                if typ == 'ID':
+                    if val in keywords:
+                        typ = val
+                elif typ == 'LEESTEKEN':
                     typ = val
-            elif typ == 'LEESTEKEN':
-                typ = val
-            elif typ == 'NUMBER':
-                val = int(val)
-            elif typ == 'HEXNUMBER':
-                val = int(val[2:], 16)
-                typ = 'NUMBER'
-            elif typ == 'REAL':
-                val = float(val)
-            elif typ == 'STRING':
-                val = val[1:-1]
-            loc = SourceLocation(filename, line, mo.start() - line_start, mo.end() - mo.start())
-            yield Token(typ, val, loc)
-        pos = mo.end()
-        mo = gettok(s, pos)
-    if pos != len(s):
-        col = pos - line_start
-        loc = SourceLocation(filename, line, col, 1)
-        raise CompilerError('Unexpected character "{0}"'.format(s[pos]), loc)
-    loc = SourceLocation(filename, line, 0, 0)
-    yield Token('END', '', loc)
+                elif typ == 'NUMBER':
+                    val = int(val)
+                elif typ == 'HEXNUMBER':
+                    val = int(val[2:], 16)
+                    typ = 'NUMBER'
+                elif typ == 'REAL':
+                    val = float(val)
+                elif typ == 'STRING':
+                    val = val[1:-1]
+                loc = SourceLocation(filename, line, mo.start() - line_start, mo.end() - mo.start())
+                yield Token(typ, val, loc)
+            pos = mo.end()
+            mo = gettok(s, pos)
+        if pos != len(s):
+            col = pos - line_start
+            loc = SourceLocation(filename, line, col, 1)
+            raise CompilerError('Unexpected character "{0}"'.format(s[pos]), loc)
+        loc = SourceLocation(filename, line, 0, 0)
+        yield Token('END', '', loc)