# HG changeset patch # User Windel Bouwman # Date 1373015928 -7200 # Node ID 003c8a976fffebd0c38d4e6f5cbb7176b7a81519 # Parent 62386bcee1bae003c39d0da0c780af48f503adac Merge of semantics and parser again .. diff -r 62386bcee1ba -r 003c8a976fff README.md --- a/README.md Sun Jun 30 19:00:41 2013 +0200 +++ b/README.md Fri Jul 05 11:18:48 2013 +0200 @@ -1,4 +1,3 @@ - # Project goals * To write a microkernel sort of OS. @@ -7,10 +6,6 @@ * Create python scripts that form the major part of the OS. * Make IDE in python that can compile the OS. -Status badge: - -[![Build Status](https://drone.io/bitbucket.org/windel/lcfos/status.png)](https://drone.io/bitbucket.org/windel/lcfos/latest) - # Directory structure 'os' contains the os written in oberon like language. @@ -20,36 +15,12 @@ # Software dependencies * python3 * pyqt4 -Optional: - * bochs - * nasm # How to start the IDE -$ cd ide -$ python runide.py - -= About the C version of the OS = - -To build the C kernel, enter: -$ cd cos -$ make - -Running the OS with bochs: -$ bochs -q + cd python + python runide.py -Running the OS with kvm: -$ kvm -fda bootdisk.img -or: -$ qemu-kvm -fda bootdisk.img +[![Build Status](https://drone.io/bitbucket.org/windel/lcfos/status.png)](https://drone.io/bitbucket.org/windel/lcfos/latest) -required tools: -- bochs: for simulating the OS -- mtools: for copying files to the bootdisk -- nasm: for assembler instructions -- gcc: for compiling the C sources -- make: for building the system -- python 3: for building the initial ramdisk -Enjoy! - diff -r 62386bcee1ba -r 003c8a976fff python/asm.py --- a/python/asm.py Sun Jun 30 19:00:41 2013 +0200 +++ b/python/asm.py Fri Jul 05 11:18:48 2013 +0200 @@ -1,4 +1,4 @@ -import re, sys, argparse +import re, argparse import pyyacc from ppci import Token, CompilerError, SourceLocation from target import Target diff -r 62386bcee1ba -r 003c8a976fff python/c3/astnodes.py --- a/python/c3/astnodes.py Sun Jun 30 19:00:41 2013 +0200 +++ b/python/c3/astnodes.py Fri Jul 05 11:18:48 2013 +0200 @@ -1,18 +1,40 @@ """ -AST nodes for the c3 language. +AST (abstract syntax tree) nodes for the c3 language. +The tree is build by the parser. +Then it is checked +Finally code is generated from it. """ class Node: pass +# Modules +class Package(Node): + def __init__(self, name, loc): + self.name = name + self.loc = loc + self.declarations = [] + def __repr__(self): + return 'PACKAGE {}'.format(self.name) + class Designator(Node): - def __init__(self, tname): - self.tname = tname - def __repr__(self): - return 'DESIGNATOR {}'.format(self.tname) + def __init__(self, tname, loc): + self.tname = tname + self.loc = loc + def __repr__(self): + return 'DESIGNATOR {}'.format(self.tname) """ Type classes + +types must be comparable. + +There are the following types: +- base type +- struct type +- pointer type +- typedef type +- function type """ class Type(Node): @@ -33,6 +55,14 @@ params = ', '.join([str(v) for v in self.parametertypes]) return '{1} f({0})'.format(params, self.returntype) +class PointerType(Type): + def __init__(self, ptype): + self.ptype = ptype + +class StructureType(Type): + def __init__(self, mems): + self.mems = mems + class DefinedType(Type): def __init__(self, name, typ): self.name = name @@ -42,28 +72,28 @@ # Variables, parameters, local variables, constants: class Symbol(Node): - def __init__(self, name): + def __init__(self, name): self.name = name self.refs = [] - def addRef(self, r): + def addRef(self, r): self.refs.append(r) - @property - def References(self): + @property + def References(self): return self.refs class Constant(Symbol): - def __init__(self, name, typ, value): + def __init__(self, name, typ, value): super().__init__(name) self.typ = typ self.value = value - def __repr__(self): + def __repr__(self): return 'CONSTANT {0} = {1}'.format(self.name, self.value) class Variable(Symbol): - def __init__(self, name, typ, ival=None): + def __init__(self, name, typ): super().__init__(name) self.typ = typ - self.ival = ival + self.ival = None self.isLocal = False self.isReadOnly = False self.isParameter = False @@ -72,11 +102,13 @@ # Procedure types class Function(Symbol): - """ Actual implementation of a function """ - def __init__(self, name): - super().__init__(name) - def __repr__(self): - return '{}'.format(self.name) + """ Actual implementation of a function """ + def __init__(self, name, loc): + super().__init__(name) + self.loc = loc + + def __repr__(self): + return '{}'.format(self.name) # Operations / Expressions: class Unop(Node): @@ -87,32 +119,29 @@ return 'UNOP {}'.format(self.op) class Binop(Node): - def __init__(self, a, op, b): + def __init__(self, a, op, b, loc): self.a = a self.b = b self.op = op # Operation: '+', '-', '*', '/', 'mod' + self.loc = loc def __repr__(self): return 'BINOP {}'.format(self.op) class VariableUse(Node): - def __init__(self, target): + def __init__(self, target, loc): self.target = target + self.loc = loc def __repr__(self): nm = self.target.name if hasattr(self.target, 'name') else '' return 'VAR USE {}'.format(nm) class Literal(Node): - def __init__(self, val): + def __init__(self, val, loc): self.val = val + self.loc = loc def __repr__(self): return 'LITERAL {}'.format(self.val) -# Modules -class Package(Node): - def __init__(self, name): - self.name = name - def __repr__(self): - return 'PACKAGE {}'.format(self.name) # Statements class CompoundStatement(Node): @@ -132,31 +161,35 @@ return 'RETURN STATEMENT' class Assignment(Node): - def __init__(self, lval, rval): + def __init__(self, lval, rval, loc): self.lval = lval self.rval = rval + self.loc = loc def __repr__(self): return 'ASSIGNMENT' class FunctionCall(Node): - def __init__(self, proc, args): + def __init__(self, proc, args, loc): self.proc = proc self.args = args + self.loc = loc def __repr__(self): return 'CALL {0} '.format(self.proc) class IfStatement(Node): - def __init__(self, condition, truestatement, falsestatement): + def __init__(self, condition, truestatement, falsestatement, loc): self.condition = condition self.truestatement = truestatement self.falsestatement = falsestatement + self.loc = loc def __repr__(self): return 'IF-statement' class WhileStatement(Node): - def __init__(self, condition, statement): + def __init__(self, condition, statement, loc): self.condition = condition - self.dostatement = statement + self.statement = statement + self.loc = loc def __repr__(self): return 'WHILE-statement' diff -r 62386bcee1ba -r 003c8a976fff python/c3/builder.py --- a/python/c3/builder.py Sun Jun 30 19:00:41 2013 +0200 +++ b/python/c3/builder.py Fri Jul 05 11:18:48 2013 +0200 @@ -16,9 +16,9 @@ def build(self, src): """ Create IR-code from sources """ pkg = self.parser.parseSource(src) - self.pkg = pkg if not pkg: return + self.pkg = pkg if not self.al.analyzePackage(pkg): return if not self.tc.checkPackage(pkg): diff -r 62386bcee1ba -r 003c8a976fff python/c3/lexer.py --- a/python/c3/lexer.py Sun Jun 30 19:00:41 2013 +0200 +++ b/python/c3/lexer.py Fri Jul 05 11:18:48 2013 +0200 @@ -6,9 +6,10 @@ Lexical analyzer part. Splits the input character stream into tokens. """ -keywords = ['and', 'or', 'not','true', 'false', \ +keywords = ['and', 'or', 'not', 'true', 'false', \ 'else', 'if', 'while', 'return', \ 'function', 'var', 'type', 'const', \ + 'struct', \ 'import', 'package' ] def tokenize(s): diff -r 62386bcee1ba -r 003c8a976fff python/c3/parser.py --- a/python/c3/parser.py Sun Jun 30 19:00:41 2013 +0200 +++ b/python/c3/parser.py Fri Jul 05 11:18:48 2013 +0200 @@ -1,4 +1,4 @@ -from . import astnodes, lexer, semantics +from . import astnodes, lexer from ppci import CompilerError # binop precedence for expressions: @@ -7,140 +7,173 @@ '+': 30, '-': 30, '*': 40, '/': 40 } class Parser: - """ Parses sourcecode into an abstract syntax tree (AST) """ - def __init__(self, diag): - self.sema = semantics.Semantics(diag) - self.diag = diag - def parseSource(self, source): + """ Parses sourcecode into an abstract syntax tree (AST) """ + def __init__(self, diag): + self.diag = diag + + def parseSource(self, source): self.initLex(source) - self.sema.reinit() try: self.parsePackage() - return self.sema.mod + return self.mod except CompilerError as e: self.diag.addDiag(e) - def Error(self, msg): + def Error(self, msg): raise CompilerError(msg, self.token.loc) - # Lexer helpers: - def Consume(self, typ): + # Lexer helpers: + def Consume(self, typ): if self.Peak == typ: return self.NextToken() else: self.Error('Excected: "{0}", got "{1}"'.format(typ, self.Peak)) - @property - def Peak(self): + @property + def Peak(self): return self.token.typ - @property - def PeakPrec(self): + @property + def PeakPrec(self): if self.Peak in binopPrecs: return binopPrecs[self.Peak] return -1 - def hasConsumed(self, typ): + def hasConsumed(self, typ): if self.Peak == typ: self.Consume(typ) return True return False - def NextToken(self): + + def NextToken(self): t = self.token if t.typ != 'END': self.token = self.tokens.__next__() return t - def initLex(self, source): + + def initLex(self, source): self.tokens = lexer.tokenize(source) # Lexical stage self.token = self.tokens.__next__() - def skipToSemi(self, tt): - while self.Peak != tt and self.Peak != 'END': - self.NextToken() - if self.Peak == tt: - self.Consume(tt) - - def parsePackage(self): + + def parseUses(self): + pass + + def parsePackage(self): self.Consume('package') name = self.Consume('ID') self.Consume(';') - self.sema.handlePackage(name.val, name.loc) + self.mod = astnodes.Package(name.val, name.loc) + self.parseUses() # TODO: parse uses while self.Peak != 'END': self.parseTopLevel() self.Consume('END') - def parseTopLevel(self): + def parseTopLevel(self): if self.Peak == 'function': - self.parseFunctionDefinition() + self.parseFunctionDef() elif self.Peak == 'var': self.parseVarDef() elif self.Peak == 'const': self.parseConstDef() + elif self.Peak == 'type': + self.parseTypeDef() else: - self.Error('Expected function or variable') + self.Error('Expected function, var, const or type') - def parseDesignator(self): + def parseDesignator(self): """ A designator designates an object """ name = self.Consume('ID') - return self.sema.actOnDesignator(name.val, name.loc) - - # Type system - def parseType(self): - d = self.parseDesignator() + d = astnodes.Designator(name.val, name.loc) return d - # Variable declarations: - def parseVarDef(self): + # Type system + def parseTypeSpec(self): + # For now, do simple type spec, just parse an ID: + return self.parseDesignator() + if self.Peak == 'struct': + self.Consume('struct') + self.Consume('{') + mems = [] + while self.Peak != '}': + mem_t = self.parseTypeSpec() + mem_n = self.Consume('ID') + mems.append((mem_t, mem_n)) + while self.hasConsumed(','): + mem_n = self.Consume('ID') + mems.append((mem_t, mem_n)) + self.Consume(';') + self.Consume('}') + theT = astnodes.StructureType(mems) + else: + theT = self.parseDesignator() + # Check for pointer suffix: + while self.hasConsumed('*'): + theT = astnodes.PointerType(theT) + return theT + + def parseTypeDef(self): + self.Consume('type') + newtype = self.parseTypeSpec() + typename = self.Consume('ID') + # TODO: action here :) + self.Consume(';') + return astnodes.DefinedType(typename, newtype) + + # Variable declarations: + def parseVarDef(self): self.Consume('var') - t = self.parseType() + t = self.parseTypeSpec() def parseVar(): name = self.Consume('ID') - ival = None + v = astnodes.Variable(name.val, t) + v.loc = name.loc if self.hasConsumed('='): - ival = self.parseExpression() - self.sema.actOnVarDef(name.val, name.loc, t, ival) + v.ival = self.parseExpression() parseVar() while self.hasConsumed(','): parseVar() self.Consume(';') - def parseConstDef(self): + def parseConstDef(self): self.Consume('const') - t = self.parseType() + t = self.parseTypeSpec() def parseConst(): name = self.Consume('ID') self.Consume('=') val = self.parseExpression() - self.sema.actOnConstDef(name.val, name.loc, t, val) + c = astnodes.Constant(name.val, t, val) + c.loc = name.loc parseConst() while self.hasConsumed(','): parseConst() self.Consume(';') - # Procedures - def parseFunctionDefinition(self): - self.Consume('function') - returntype = self.parseType() - pname = self.Consume('ID') - self.sema.actOnFuncDef1(pname.val, pname.loc) + # Procedures + def parseFunctionDef(self): + loc = self.Consume('function').loc + returntype = self.parseTypeSpec() + fname = self.Consume('ID').val + f = astnodes.Function(fname, loc) self.Consume('(') parameters = [] if not self.hasConsumed(')'): def parseParameter(): - typ = self.parseType() + typ = self.parseTypeSpec() name = self.Consume('ID') - parameters.append(self.sema.actOnParameter(name.val, name.loc, typ)) + param = astnodes.Variable(name.val, typ) + param.loc = name.loc + parameters.append(param) parseParameter() while self.hasConsumed(','): parseParameter() self.Consume(')') body = self.parseCompoundStatement() - self.sema.actOnFuncDef2(parameters, returntype, body) - # Statements: - def parseAssignment(self, lval): - lval = self.sema.actOnVariableUse(lval, lval.loc) + # Statements: + def parseAssignment(self, lval): + lval = astnodes.VariableUse(lval, lval.loc) loc = self.Consume('=').loc rval = self.parseExpression() self.Consume(';') - return self.sema.actOnAssignment(lval, rval, loc) + return astnodes.Assignment(lval, rval, loc) - def parseProcedureCall(self, func): + def parseCall(self, func): self.Consume('(') args = [] if not self.hasConsumed(')'): @@ -148,9 +181,9 @@ while self.hasConsumed(','): args.append(self.parseExpression()) self.Consume(')') - return self.sema.actOnFunctionCall(func, args, func.loc) + return astnodes.FunctionCall(func, args, func.loc) - def parseIfStatement(self): + def parseIfStatement(self): loc = self.Consume('if').loc self.Consume('(') condition = self.parseExpression() @@ -160,23 +193,23 @@ no = self.parseCompoundStatement() else: no = astnodes.EmptyStatement() - return self.sema.actOnIfStatement(condition, yes, no, loc) + return astnodes.IfStatement(condition, yes, no, loc) - def parseWhileStatement(self): - self.Consume('while') + def parseWhileStatement(self): + loc = self.Consume('while').loc self.Consume('(') condition = self.parseExpression() self.Consume(')') statements = self.parseCompoundStatement() - return astnodes.WhileStatement(condition, statements) + return astnodes.WhileStatement(condition, statements, loc) - def parseReturnStatement(self): + def parseReturnStatement(self): self.Consume('return') expr = self.parseExpression() self.Consume(';') return astnodes.ReturnStatement(expr) - def parseCompoundStatement(self): + def parseCompoundStatement(self): self.Consume('{') statements = [] while not self.hasConsumed('}'): @@ -185,7 +218,7 @@ statements.append(s) return astnodes.CompoundStatement(statements) - def parseStatement(self): + def parseStatement(self): # Determine statement type based on the pending token: if self.Peak == 'if': return self.parseIfStatement() @@ -200,49 +233,51 @@ return astnodes.EmptyStatement() elif self.Peak == 'return': return self.parseReturnStatement() - elif self.Peak == 'ID': + else: designator = self.parseDesignator() if self.Peak == '(': - return self.parseProcedureCall(designator) + return self.parseCall(designator) elif self.Peak == '=': return self.parseAssignment(designator) - self.Error('Unable to determine statement') + else: + self.Error('Unable to determine statement') - # Parsing expressions: - def parseExpression(self): + # Parsing expressions: + def parseExpression(self): return self.parseBinopRhs(self.parsePrimary(), 0) - def parsePrimary(self): + + def parsePrimary(self): if self.hasConsumed('('): e = self.parseExpression() self.Consume(')') return e elif self.Peak == 'NUMBER': val = self.Consume('NUMBER') - return self.sema.actOnNumber(val.val, val.loc) + return astnodes.Literal(val.val, val.loc) elif self.Peak == 'REAL': val = self.Consume('REAL') - return self.sema.actOnNumber(val.val, val.loc) + return astnodes.Literal(val.val, val.loc) elif self.Peak == 'true': val = self.Consume('true') - return self.sema.actOnNumber(True, val.loc) + return astnodes.Literal(True, val.loc) elif self.Peak == 'false': val = self.Consume('false') - return self.sema.actOnNumber(False, val.loc) + return astnodes.Literal(False, val.loc) elif self.Peak == 'ID': d = self.parseDesignator() if self.Peak == '(': - return self.parseProcedureCall(d) + return self.parseCall(d) else: - return self.sema.actOnVariableUse(d, d.loc) + return astnodes.VariableUse(d, d.loc) self.Error('Expected NUM, ID or (expr), got {0}'.format(self.Peak)) - def parseBinopRhs(self, lhs, min_prec): + def parseBinopRhs(self, lhs, min_prec): while self.PeakPrec >= min_prec: op_prec = self.PeakPrec op = self.Consume(self.Peak) rhs = self.parsePrimary() while self.PeakPrec > op_prec: rhs = self.parseBinopRhs(rhs, self.PeakPrec) - lhs = self.sema.actOnBinop(lhs, op.typ, rhs, op.loc) + lhs = astnodes.Binop(lhs, op.typ, rhs, op.loc) return lhs diff -r 62386bcee1ba -r 003c8a976fff python/c3/semantics.py --- a/python/c3/semantics.py Sun Jun 30 19:00:41 2013 +0200 +++ b/python/c3/semantics.py Fri Jul 05 11:18:48 2013 +0200 @@ -6,74 +6,16 @@ """ This class constructs the AST from parser input """ def __init__(self, diag): self.diag = diag - def reinit(self): - # Set mod to empty package: - self.mod = astnodes.Package('unnamed') - self.mod.scope = Scope(topScope) def addSymbol(self, s): if self.curScope.hasSymbol(s.name): msg = 'Redefinition of {0}'.format(s.name) raise CompilerError(msg, s.loc) else: self.curScope.addSymbol(s) - def handlePackage(self, name, loc): - self.mod = astnodes.Package(name) - self.mod.loc = loc - self.mod.scope = self.curScope = Scope(topScope) - def actOnVarDef(self, name, loc, t, ival): - s = astnodes.Variable(name, t) - s.loc = loc - self.addSymbol(s) - def actOnConstDef(self, name, loc, t, val): - s = astnodes.Constant(name, t, val) - s.loc = loc - self.addSymbol(s) - def actOnFuncDef1(self, name, loc): - self.curFunc = astnodes.Function(name) - self.curFunc.loc = loc - self.addSymbol(self.curFunc) - self.curScope = self.curFunc.scope = Scope(self.curScope) - def actOnParameter(self, name, loc, t): - p = astnodes.Variable(name, t) - p.isParameter = True - p.loc = loc - self.addSymbol(p) - return p def actOnFuncDef2(self, parameters, returntype, body): self.curFunc.body = body paramtypes = [p.typ for p in parameters] self.curFunc.typ = astnodes.FunctionType(paramtypes, returntype) self.curFunc = None self.curScope = self.curScope.parent - def actOnType(self, tok): - # Try to lookup type, in case of failure return void - pass - def actOnDesignator(self, tname, loc): - d = astnodes.Designator(tname) - d.scope = self.curScope - d.loc = loc - return d - def actOnBinop(self, lhs, op, rhs, loc): - bo = astnodes.Binop(lhs, op, rhs) - bo.loc = loc - return bo - def actOnNumber(self, num, loc): - n = astnodes.Literal(num) - n.loc = loc - return n - def actOnVariableUse(self, d, loc): - vu = astnodes.VariableUse(d) - vu.loc = loc - return vu - def actOnAssignment(self, lval, rval, loc): - a = astnodes.Assignment(lval, rval) - a.loc = loc - return a - def actOnFunctionCall(self, func, args, loc): - fc = astnodes.FunctionCall(func, args) - fc.loc = loc - return fc - def actOnIfStatement(self, cond, yes, no, loc): - i = astnodes.IfStatement(cond, yes, no) - i.loc = loc - return i + diff -r 62386bcee1ba -r 003c8a976fff python/testc3.py --- a/python/testc3.py Sun Jun 30 19:00:41 2013 +0200 +++ b/python/testc3.py Fri Jul 05 11:18:48 2013 +0200 @@ -56,28 +56,6 @@ """ - -def c3compile(src, diag): - # Structures: - builder = c3.Builder(diag) - ir = builder.build(src) - # optional optimize here - x86gen = x86.X86CodeGenSimple(diag) - ok = len(diag.diags) == 0 - if not ok: - return - print('generating x86 code') - x86gen.genBin(ir) - with open('dummydummy.asm', 'w') as f: - f.write('bits 64\n') - for a in x86gen.asm: - print(a) - f.write(str(a) + '\n') - -def do(): - diag = ppci.DiagnosticsManager() - c3compile(testsrc, diag) - class testLexer(unittest.TestCase): def testUnexpectedCharacter(self): snippet = """ var s \u6c34 """ @@ -224,6 +202,57 @@ if not ircode: self.diag.printErrors(snippet) self.assertTrue(ircode) + + @unittest.skip + def testPointerType(self): + snippet = """ + package testpointer; + var int* pa; + function void t(int a, double b) + { + *pa = 22; + } + """ + self.diag.clear() + ircode = self.builder.build(snippet) + if not ircode: + self.diag.printErrors(snippet) + self.assertTrue(ircode) + + @unittest.skip + def testComplexType(self): + snippet = """ + package testpointer; + type int my_int; + + type struct { + int x, y; + } point; + + type struct { + int mem1; + int memb2; + point P1; + } my_struct; + + type my_struct* my_sptr; + + function void t(int a, double b, my_sptr x) + { + var my_struct *msp; + + msp = x; + *pa = 22; + x->memb2 = *pa + a * b; + + mxp->P1.x = a * x->P1.y; + } + """ + self.diag.clear() + ircode = self.builder.build(snippet) + if not ircode: + self.diag.printErrors(snippet) + self.assertTrue(ircode) def test2(self): # testsrc2 is valid code: @@ -254,7 +283,6 @@ self.assertTrue(ir) if __name__ == '__main__': - do() unittest.main() diff -r 62386bcee1ba -r 003c8a976fff python/zcc.py --- a/python/zcc.py Sun Jun 30 19:00:41 2013 +0200 +++ b/python/zcc.py Fri Jul 05 11:18:48 2013 +0200 @@ -1,6 +1,6 @@ #!/usr/bin/python -import sys, os, argparse +import sys, argparse import c3, ppci, codegen import arm_cm3 import codegenarm @@ -8,7 +8,8 @@ # Parse arguments: parser = argparse.ArgumentParser(description='lcfos Compiler') -parser.add_argument('source', type=argparse.FileType('r'), help='the source file to build') +parser.add_argument('source', type=argparse.FileType('r'), \ + help='the source file to build') parser.add_argument('-d', '--dumpir', action='store_true', help="Dump IR-code") parser.add_argument('-o', '--output', help='Output file', metavar='filename') @@ -45,7 +46,6 @@ # TODO: store data if __name__ == '__main__': - args = parser.parse_args() - print(args, type(args)) - main(args) + arguments = parser.parse_args() + main(arguments)