diff python/c3/lexer.py @ 287:1c7c1e619be8

File movage
author Windel Bouwman
date Thu, 21 Nov 2013 11:57:27 +0100
parents 05184b95fa16
children a747a45dcd78
line wrap: on
line diff
--- a/python/c3/lexer.py	Fri Nov 15 13:52:32 2013 +0100
+++ b/python/c3/lexer.py	Thu Nov 21 11:57:27 2013 +0100
@@ -12,14 +12,18 @@
    'struct', 'cast', \
    'import', 'module' ]
 
-def tokenize(s):
-     """
+def tokenize(input_file):
+    """
        Tokenizer, generates an iterator that
        returns tokens!
 
+       Input is a file like object.
+
        This GREAT example was taken from python re doc page!
-     """
-     tok_spec = [
+    """
+    filename = input_file.name if hasattr(input_file, 'name') else ''
+    s = input_file.read()
+    tok_spec = [
        ('REAL', r'\d+\.\d+'),
        ('HEXNUMBER', r'0x[\da-fA-F]+'),
        ('NUMBER', r'\d+'),
@@ -32,13 +36,13 @@
        ('LEESTEKEN', r'==|->|<<|>>|[\.,=:;\-+*\[\]/\(\)]|>=|<=|<>|>|<|{|}|&|\^|\|'),
        ('STRING', r"'.*?'")
      ]
-     tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
-     gettok = re.compile(tok_re).match
-     line = 1
-     pos = line_start = 0
-     mo = gettok(s)
-     incomment = False
-     while mo is not None:
+    tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
+    gettok = re.compile(tok_re).match
+    line = 1
+    pos = line_start = 0
+    mo = gettok(s)
+    incomment = False
+    while mo is not None:
        typ = mo.lastgroup
        val = mo.group(typ)
        if typ == 'NEWLINE':
@@ -69,14 +73,14 @@
            val = float(val)
          elif typ == 'STRING':
            val = val[1:-1]
-         loc = SourceLocation(line, mo.start()-line_start, mo.end() - mo.start())
+         loc = SourceLocation(filename, line, mo.start() - line_start, mo.end() - mo.start())
          yield Token(typ, val, loc)
        pos = mo.end()
        mo = gettok(s, pos)
-     if pos != len(s):
-         col = pos - line_start
-         loc = SourceLocation(line, col, 1)
-         raise CompilerError('Unexpected character "{0}"'.format(s[pos]), loc)
-     loc = SourceLocation(line, 0, 0)
-     yield Token('END', '', loc)
+    if pos != len(s):
+        col = pos - line_start
+        loc = SourceLocation(filename, line, col, 1)
+        raise CompilerError('Unexpected character "{0}"'.format(s[pos]), loc)
+    loc = SourceLocation(filename, line, 0, 0)
+    yield Token('END', '', loc)