Mercurial > lcfOS
changeset 148:e5263f74b287
Added c3 language frontend initial parser
author | Windel Bouwman |
---|---|
date | Fri, 01 Mar 2013 10:24:01 +0100 |
parents | 4e79484a9d47 |
children | 74241ca312cc |
files | python/c3/astnodes.py python/c3/lexer.py python/c3/parser.py python/c3/semantics.py python/ppci/errors.py python/testc3.py |
diffstat | 6 files changed, 617 insertions(+), 14 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/c3/astnodes.py Fri Mar 01 10:24:01 2013 +0100 @@ -0,0 +1,196 @@ +""" +AST nodes for the c3 language. +""" + +class Node: + location = None + def getChildren(self): + children = [] + members = dir(self) + for member in members: + member = getattr(self, member) + if isinstance(member, Node): + children.append(member) + elif type(member) is list: + for mi in member: + if isinstance(mi, Node): + children.append(mi) + return children + + +class Id(Node): + def __init__(self, tok, pub): + self.name = tok.val + self.is_public = pub + def __repr__(self): + return 'ID {0}'.format(self.name) + +# Selectors: +class Designator(Node): + def __init__(self, obj, selectors, typ): + self.obj = obj + self.selectors = selectors + self.typ = typ + def __repr__(self): + return 'DESIGNATOR {0}, selectors {1}, type {2}'.format(self.obj, self.selectors, self.typ) + +""" +Type classes +""" +def isType(a, b): + """ Compare types a and b and check if they are equal """ + if type(a) is type(b): + if type(a) is BaseType: + return (a.name == b.name) and (a.size == b.size) + elif type(a) is ProcedureType: + if len(a.parameters) != len(b.parameters): + print('Number of parameters does not match') + return False + for aparam, bparam in zip(a.parameters, b.parameters): + if not isType(aparam.typ, bparam.typ): + print('Parameter {0} does not match parameter {1}'.format(aparam, bparam)) + return False + if a.result is None: + # TODO: how to handle a None return type?? + pass + if not isType(a.result, b.result): + print('Procedure return value mismatch {0} != {1}'.format(a.result, b.result)) + return False + return True + else: + print(a) + print(b) + Error('Not implemented {0}'.format(a)) + else: + return False + +class Type: + def isType(self, b): + return isType(self, b) + +class BaseType(Type): + def __init__(self, name): + self.name = name + def __repr__(self): + return '[TYPE {0}]'.format(self.name) + +class FunctionType(Type): + def __init__(self, parameters, returntype): + self.parameters = parameters + self.returntype = returntype + def __repr__(self): + return '[PROCTYPE {0} RET {1}]'.format(self.parameters, self.returntype) + +class DefinedType(Type): + def __init__(self, name, typ): + self.name = name + self.typ = typ + def __repr__(self): + return 'Named type {0} of type {1}'.format(self.name, self.typ) + +# Variables, parameters, local variables, constants: +class Symbol(Node): + pass + +class Constant(Symbol): + def __init__(self, value, typ, name=None, public=False): + self.name = name + self.value = value + self.typ = typ + self.public = public + def __repr__(self): + return 'CONSTANT {0} = {1}'.format(self.name, self.value) + +class Variable(Symbol): + def __init__(self, name, typ, public): + self.name = name + self.typ = typ + self.public = public + self.isLocal = False + self.isReadOnly = False + self.isParameter = False + def __repr__(self): + txt = '[public] ' if self.public else '' + return '{2}VAR {0} : {1}'.format(self.name, self.typ, txt) + +class Parameter(Node): + """ A parameter has a passing method, name and typ """ + def __init__(self, name, typ): + self.name = name + self.typ = typ + def __repr__(self): + return 'PARAM {0} {1}'.format(self.name, self.typ) + +# Operations: +class Unop(Node): + def __init__(self, a, op): + self.a = a + self.op = op + def __repr__(self): + return 'UNOP {0}'.format(self.op) + +class Binop(Node): + def __init__(self, a, op, b): + self.a = a + self.b = b + self.op = op # Operation: '+', '-', '*', '/', 'mod' + def __repr__(self): + return 'BINOP {0} {1}'.format(self.op, self.typ) + +# Modules +class Package(Node): + def __init__(self, name): + self.name = name + def __repr__(self): + return 'PACKAGE {0}'.format(self.name) + +# Procedure types +class Procedure(Symbol): + """ Actual implementation of a function """ + def __init__(self, name, typ, block): + self.name = name + self.block = block + self.typ = typ + def __repr__(self): + return 'PROCEDURE {0} {1}'.format(self.name, self.typ) + +# Statements +class CompoundStatement(Node): + def __init__(self, statements): + self.statements = statements + def __repr__(self): + return 'COMPOUND STATEMENT' + +class EmptyStatement(Node): + def __repr__(self): + return 'EMPTY STATEMENT' + +class Assignment(Node): + def __init__(self, lval, rval): + self.lval = lval + self.rval = rval + def __repr__(self): + return 'ASSIGNMENT' + +class ProcedureCall(Node): + def __init__(self, proc, args): + self.proc = proc + self.args = args + def __repr__(self): + return 'CALL {0} '.format(self.proc) + +class IfStatement(Node): + def __init__(self, condition, truestatement, falsestatement=None): + self.condition = condition + self.truestatement = truestatement + self.falsestatement = falsestatement + def __repr__(self): + return 'IF-statement' + +class WhileStatement(Node): + def __init__(self, condition, statements): + self.condition = condition + self.dostatements = statements + def __repr__(self): + return 'WHILE-statement' +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/c3/lexer.py Fri Mar 01 10:24:01 2013 +0100 @@ -0,0 +1,73 @@ +import collections, re +from ppci.errors import CompilerException, SourceLocation + +""" + Lexical analyzer part. Splits the input character stream into tokens. +""" + +# Token is used in the lexical analyzer: +Token = collections.namedtuple('Token', 'typ val loc') + +keywords = ['and', 'or', 'not','true', 'false', \ + 'else', 'if', 'while', 'return', \ + 'public', 'function', 'var', 'type', \ + 'import', 'package' ] + +def tokenize(s): + """ + Tokenizer, generates an iterator that + returns tokens! + + This GREAT example was taken from python re doc page! + """ + tok_spec = [ + ('REAL', r'\d+\.\d+'), + ('HEXNUMBER', r'0x[\da-fA-F]+'), + ('NUMBER', r'\d+'), + ('ID', r'[A-Za-z][A-Za-z\d_]*'), + ('NEWLINE', r'\n'), + ('SKIP', r'[ \t]'), + ('COMMENTS', r'//.*'), + ('LEESTEKEN', r'==|[\.,=:;\-+*\[\]/\(\)]|>=|<=|<>|>|<|{|}'), + ('STRING', r"'.*?'") + ] + tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) + gettok = re.compile(tok_re).match + line = 1 + pos = line_start = 0 + mo = gettok(s) + while mo is not None: + typ = mo.lastgroup + val = mo.group(typ) + if typ == 'NEWLINE': + line_start = pos + line += 1 + elif typ == 'COMMENTS': + pass + elif typ == 'SKIP': + pass + else: + if typ == 'ID': + if val in keywords: + typ = val + elif typ == 'LEESTEKEN': + typ = val + elif typ == 'NUMBER': + val = int(val) + elif typ == 'HEXNUMBER': + val = int(val[2:], 16) + typ = 'NUMBER' + elif typ == 'REAL': + val = float(val) + elif typ == 'STRING': + val = val[1:-1] + loc = SourceLocation(line, mo.start()-line_start) + yield Token(typ, val, loc) + pos = mo.end() + mo = gettok(s, pos) + if pos != len(s): + col = pos - line_start + pos = line + raise CompilerException('Unexpected character {0}'.format(s[pos]), pos) + yield Token('END', '', line) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/c3/parser.py Fri Mar 01 10:24:01 2013 +0100 @@ -0,0 +1,217 @@ +from . import astnodes, lexer, semantics +from ppci.errors import CompilerException, SourceLocation + +# binop precedence for expressions: +binopPrecs = {'or': 5, 'and': 10, \ + '<': 20, '>': 20, '==': 20, '<=': 20, '>=': 20, '!=': 20, \ + '+': 30, '-': 30, '*': 40, '/': 40 } + +def getTokenPrecedence(typ): + if typ in binopPrecs: + return binopPrecs[typ] + return -1 + +class Parser: + """ Parses sourcecode into an abstract syntax tree (AST) """ + def __init__(self, sema, diag): + self.sema = sema + self.diag = diag + def parseSource(self, source): + self.initLex(source) + try: + self.parsePackage() + except CompilerException as e: + self.diag.diag(e) + def Error(self, msg): + raise CompilerException(msg, self.token.loc) + def skipToSemiCol(self): + while not (self.Peak == ';' or self.Peak == 'END'): + self.NextToken() + # Lexer helpers: + def Consume(self, typ): + if self.Peak == typ: + return self.NextToken() + else: + self.Error('Excected: "{0}", got "{1}"'.format(typ, self.Peak)) + @property + def Peak(self): + return self.token.typ + @property + def PeakPrec(self): + return getTokenPrecedence(self.Peak) + def hasConsumed(self, typ): + if self.Peak == typ: + self.Consume(typ) + return True + return False + def NextToken(self): + t = self.token + if t.typ != 'END': + self.token = self.tokens.__next__() + return t + def initLex(self, source): + self.tokens = lexer.tokenize(source) # Lexical stage + self.token = self.tokens.__next__() + + def parsePackage(self): + self.Consume('package') + name = self.Consume('ID') + self.Consume(';') + self.sema.handlePackage(name.val, name.loc) + # TODO: parse uses + while self.Peak != 'END': + self.parseTopLevel() + self.Consume('END') + + def parseTopLevel(self): + is_public = self.hasConsumed('public') + if self.Peak == 'function': + self.parseFunctionDefinition(is_public) + elif self.Peak == 'var': + self.parseVarDef(is_public) + + def parseDesignator(self): + """ A designator designates an object """ + name = self.Consume('ID') + return name + + # Type system + def parseType(self): + d = self.parseDesignator() + return d + + # Variable declarations: + def parseVarDef(self, is_public): + self.Consume('var') + typ = self.parseType() + ID = self.Consume('ID') + self.Consume(';') + v = Variable(i.name, typename, public=is_public) + self.curScope.add(v) + + # Procedures + def parseFunctionDefinition(self, is_pub): + self.Consume('function') + returntype = self.parseType() + procname = self.Consume('ID') + self.Consume('(') + parameters = [] + if not self.hasConsumed(')'): + typ = self.parseType() + name = self.Consume('ID') + parameters.append(astnodes.Parameter(name, typ)) + while self.hasConsumed(','): + typ = self.parseType() + name = self.Consume('ID') + parameters.append(astnodes.Parameter(name, typ)) + self.Consume(')') + proctyp = astnodes.FunctionType(parameters, returntype) + body = self.parseCompoundStatement() + return astnodes.Procedure(procname, proctyp, body) + + # Statements: + def parseAssignment(self, lval): + self.Consume('=') + rval = self.parseExpression() + return astnodes.Assignment(lval, rval) + + def parseProcedureCall(self, procedure): + self.Consume('(') + args = [] + if not self.hasConsumed(')'): + args.append(self.parseExpression()) + while self.hasConsumed(','): + args.append(self.parseExpression()) + self.Consume(')') + return ProcedureCall(procedure, args) + + def parseLocal(self, t): + name = self.Consume('ID') + if self.hasConsumed('='): + ival = self.parseExpression() + else: + ival = None + self.sema.actOnLocal(t, name, ival) + def parseLocalDeclaration(self): + self.Consume('var') + t = self.parseType() + self.parseLocal(t) + while self.hasConsumed(','): + self.parseLocal(t) + + def parseIfStatement(self): + self.Consume('if') + self.Consume('(') + condition = self.parseExpression() + self.Consume(')') + truestatement = self.parseStatement() + if self.hasConsumed('else'): + els = self.parseStatement() + return astnodes.IfStatement(condition, truestatement, els) + return astnodes.IfStatement(condition, truestatement) + + def parseWhileStatement(self): + self.Consume('while') + self.Consume('(') + condition = self.parseExpression() + self.Consume(')') + statements = self.parseStatement() + def parseReturnStatement(self): + self.Consume('return') + expr = self.parseExpression() + + def parseCompoundStatement(self): + self.Consume('{') + statements = [self.parseStatement()] + while self.hasConsumed(';'): + statements.append(self.parseStatement()) + self.Consume('}') + return astnodes.CompoundStatement(statements) + + def parseStatement(self): + # Determine statement type based on the pending token: + if self.Peak == 'if': + return self.parseIfStatement() + elif self.Peak == 'while': + return self.parseWhileStatement() + elif self.Peak == '{': + return self.parseCompoundStatement() + elif self.Peak == 'var': + return self.parseLocalDeclaration() + elif self.Peak == 'return': + return self.parseReturnStatement() + elif self.Peak == 'ID': + # Assignment or procedure call + designator = self.parseDesignator() + if self.Peak == '(': + return self.parseProcedureCall(designator) + elif self.Peak == '=': + return self.parseAssignment(designator) + self.Error('Unable to determine statement') + + # Parsing expressions: + def parseExpression(self): + return self.parseBinopRhs(self.parsePrimary(), 0) + def parsePrimary(self): + if self.hasConsumed('('): + e = self.parseExpression() + self.Consume(')') + return e + elif self.Peak == 'NUMBER': + val = self.Consume('NUMBER') + return astnodes.Constant(val, val) + elif self.Peak == 'ID': + d = self.parseDesignator() + return d + self.Error('Expected NUM, ID or (expr), got {0}'.format(self.Peak)) + + def parseBinopRhs(self, lhs, min_prec): + while self.PeakPrec >= min_prec: + op_prec = self.PeakPrec + op = self.Consume(self.Peak) + rhs = self.parsePrimary() + while self.PeakPrec > op_prec: + rhs = self.parseBinopRhs(rhs, self.PeakPrec) + lhs = astnodes.Binop(lhs, op, rhs) + return lhs +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/c3/semantics.py Fri Mar 01 10:24:01 2013 +0100 @@ -0,0 +1,50 @@ +from . import astnodes + +class Scope: + """ A scope contains all symbols in a scope """ + def __init__(self, parent=None): + self.symbols = {} + self.parent = parent + def getType(self, name): + t = self.getSymbol(name) + print(t) + assert isinstance(t, astnodes.Type) + return t + def getSymbol(self, name): + if name in self.symbols: + return self.symbols[name] + # Look for symbol: + if self.parent: + return self.parent.getSymbol(name) + raise CompilerException("Symbol {0} not found".format(name), name.loc) + def hasSymbol(self, name): + if name in self.symbols: + return True + if self.parent: + return self.parent.hasSymbol(name) + return False + + def addSymbol(self, sym): + self.symbols[sym.name] = sym + +def createBuiltins(scope): + scope.addSymbol(astnodes.BaseType('int')) + +class Semantics: + """ This class constructs the AST from parser input """ + def __init__(self, diag): + self.diag = diag + def handlePackage(self, name, loc): + self.mod = astnodes.Package(name) + self.mod.loc = loc + self.mod.scope = self.curScope = Scope() + createBuiltins(self.curScope) + def handleBinop(self, lhs, op, rhs): + pass + def actOnLocal(self, t, name, ival): + s = astnodes.Variable(name, t, False) + self.curScope.addSymbol(s) + def actOnType(self, tok): + # Try to lookup type, in case of failure return void + pass +
--- a/python/ppci/errors.py Fri Feb 22 10:33:48 2013 +0100 +++ b/python/ppci/errors.py Fri Mar 01 10:24:01 2013 +0100 @@ -1,15 +1,21 @@ -""" Error handling routines """ +""" + Error handling routines + Diagnostic utils +""" + +from collections import namedtuple + +SourceLocation = namedtuple('SourceLocation', ['row', 'col']) +SourceRange = namedtuple('SourceRange', ['p1', 'p2']) class CompilerException(Exception): - def __init__(self, msg, row=0, col=0, filename=None): + def __init__(self, msg, loc): self.msg = msg - self.row = row - self.col = col - self.filename = filename + self.loc = loc def __repr__(self): - return self.msg + return 'error {0} at {1}'.format(self.msg, self.loc) def __str__(self): - return self.msg + return 'error {0} at {1}'.format(self.msg, self.loc) class ErrorNode: def __init__(self, row, col, msg): @@ -25,23 +31,31 @@ def printError(source, e): def printLine(row, txt): print(str(row)+':'+txt) - if e.row == 0: + if e.loc.row == 0: print('Error: {0}'.format(e.msg)) else: lines = source.split('\n') - prerow = e.row - 3 + ro, co = e.loc.row, e.loc.col + prerow = ro - 2 if prerow < 1: prerow = 1 - afterrow = e.row + 3 + afterrow = ro + 3 if afterrow > len(lines): afterrow = len(lines) # print preceding source lines: - for r in range(prerow, e.row): + for r in range(prerow, ro): printLine(r, lines[r-1]) # print source line containing error: - printLine(e.row, lines[e.row-1]) - print(' '*(len(str(e.row)+':')+e.col-1) + '^ Error: {0}'.format(e.msg)) + printLine(ro, lines[ro-1]) + print(' '*(len(str(ro)+':')+co-1) + '^ Error: {0}'.format(e.msg)) # print trailing source line: - for r in range(e.row+1, afterrow+1): + for r in range(ro+1, afterrow+1): printLine(r, lines[r-1]) + +class Diagnostics: + def __init__(self): + self.diags = [] + def diag(self, d): + self.diags.append(d) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/testc3.py Fri Mar 01 10:24:01 2013 +0100 @@ -0,0 +1,53 @@ +import c3.parser, c3.semantics +from ppci.errors import printError, Diagnostics + +testsrc = """ +package test; + +public function void test1() +{ + var u32 b; + var int a = 10; + b = 20; + var int buf; + var int i; + i = 2; + if (i > 1) + { + buf = b + 22 * i - 13 + (55 * 2 *9-2) / 44 + } +} + +public function int t2(u32 a, u32 b) +{ + return a + b; + a = 2 +} + +""" + +def printAst(ast): + print(ast) + for c in ast.getChildren(): + printAst(c) + if isinstance(ast, c3.astnodes.Package): + print('PACK', ast.scope) + +def do(): + print('[0] source:') + print(testsrc) + print('[1] parsing') + diag = Diagnostics() + sema = c3.semantics.Semantics(diag) + p = c3.parser.Parser(sema, diag) + + p.parseSource(testsrc) + + for d in diag.diags: + print('ERROR:', d) + printError(testsrc, d) + print('[2] ast:') + printAst(sema.mod) + +do() +