# HG changeset patch # User Windel Bouwman # Date 1385031447 -3600 # Node ID 1c7c1e619be8e8b56d6ba1af725ac45da4ae260d # Parent d9df72971cbf0acdd9de22a217645cee1363b43d File movage diff -r d9df72971cbf -r 1c7c1e619be8 cos/bochsrc.txt --- a/cos/bochsrc.txt Fri Nov 15 13:52:32 2013 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,31 +0,0 @@ -display_library: x, options="gui_debug" # use GTK debugger gui -romimage: file=$BXSHARE/BIOS-bochs-latest -cpu: count=1, ips=1500000, reset_on_triple_fault=1, ignore_bad_msrs=1, msrs="msrs.def" -cpuid: mmx=1, sep=1, sse=sse4_2, aes=1, movbe=1, xsave=1 -memory: guest=128, host=256 -megs: 48 -vgaromimage: file=$BXSHARE/VGABIOS-lgpl-latest -vga: extension=vbe -floppya: image=bootdisk.img, status=inserted -ata0: enabled=1, ioaddr1=0x1f0, ioaddr2=0x3f0, irq=14 -ata1: enabled=1, ioaddr1=0x170, ioaddr2=0x370, irq=15 -ata2: enabled=0, ioaddr1=0x1e8, ioaddr2=0x3e0, irq=11 -ata3: enabled=0, ioaddr1=0x168, ioaddr2=0x360, irq=9 -boot: floppy -floppy_bootsig_check: disabled=0 -panic: action=ask -error: action=report -info: action=report -debug: action=ignore -debugger_log: - - -vga_update_interval: 300000 -keyboard_serial_delay: 250 -keyboard_paste_delay: 100000 -mouse: enabled=0 -private_colormap: enabled=0 -keyboard_mapping: enabled=0, map= -i440fxsupport: enabled=1 - -magic_break: enabled=1 - diff -r d9df72971cbf -r 1c7c1e619be8 python/asm.py --- a/python/asm.py Fri Nov 15 13:52:32 2013 +0100 +++ b/python/asm.py Thu Nov 21 11:57:27 2013 +0100 @@ -45,15 +45,16 @@ elif typ == 'STRING': val = val[1:-1] col = mo.start() - line_start - loc = SourceLocation(line, col, 0) # TODO retrieve length? + loc = SourceLocation('', line, col, 0) # TODO retrieve length? yield Token(typ, val, loc) pos = mo.end() mo = gettok(s, pos) if pos != len(s): col = pos - line_start - loc = SourceLocation(line, col, 0) + loc = SourceLocation('', line, col, 0) raise CompilerError('Unexpected character {0}'.format(s[pos]), loc) + class Lexer: def __init__(self, src): self.tokens = tokenize(src) @@ -66,6 +67,7 @@ def Peak(self): return self.curTok + class Parser: def __init__(self): # Construct a parser given a grammar: diff -r d9df72971cbf -r 1c7c1e619be8 python/c3/__init__.py --- a/python/c3/__init__.py Fri Nov 15 13:52:32 2013 +0100 +++ b/python/c3/__init__.py Thu Nov 21 11:57:27 2013 +0100 @@ -6,8 +6,7 @@ # Convenience imports: from .parser import Parser -from .typecheck import TypeChecker -from .analyse import Analyzer +from .analyse import Analyzer, TypeChecker from .codegenerator import CodeGenerator from .astprinter import AstPrinter from .visitor import Visitor diff -r d9df72971cbf -r 1c7c1e619be8 python/c3/analyse.py --- a/python/c3/analyse.py Fri Nov 15 13:52:32 2013 +0100 +++ b/python/c3/analyse.py Thu Nov 21 11:57:27 2013 +0100 @@ -1,14 +1,14 @@ import logging from .visitor import Visitor from .astnodes import * -from .scope import Scope, topScope -from .typecheck import theType +from .scope import * + class Analyzer: """ Context handling is done here. Scope is attached to the correct modules. - This class checks names and references + This class checks names and references. """ def __init__(self, diag): self.diag = diag @@ -18,7 +18,7 @@ self.logger.info('Checking package {}'.format(pkg.name)) self.ok = True visitor = Visitor() - # Prepare top level scope: + # Prepare top level scope and set scope to all objects: self.scopeStack = [topScope] modScope = Scope(self.CurrentScope) self.scopeStack.append(modScope) @@ -34,7 +34,6 @@ for x in ip.declarations: modScope.addSymbol(x) visitor.visit(pkg, self.findRefs) - visitor.visit(pkg, self.sanity) return self.ok def error(self, msg, loc=None): @@ -82,12 +81,9 @@ s.addRef(None) return s else: - self.ok = False - msg = 'Cannot resolve name {0}'.format(d.tname) - self.diag.error(msg, d.loc) + self.error('Cannot resolve name {0}'.format(d.tname), d.loc) def resolveType(self, t, scope): - # TODO: what about structs? if type(t) is PointerType: t.ptype = self.resolveType(t.ptype, scope) return t @@ -131,9 +127,192 @@ elif type(sym) is DefinedType: sym.typ = self.resolveType(sym.typ, sym.scope) - def sanity(self, sym): - if type(sym) is FunctionType: +# Type checking: + +def theType(t): + """ + Recurse until a 'real' type is found + """ + if type(t) is DefinedType: + return theType(t.typ) + return t + +def equalTypes(a, b): + """ + Compare types a and b for equality. + Not equal until proven otherwise. + """ + # Recurse into named types: + a = theType(a) + b = theType(b) + + # Compare for structural equivalence: + if type(a) is type(b): + if type(a) is BaseType: + return a.name == b.name + elif type(a) is PointerType: + return equalTypes(a.ptype, b.ptype) + elif type(a) is StructureType: + if len(a.mems) != len(b.mems): + return False + for amem, bmem in zip(a.mems, b.mems): + if not equalTypes(amem.typ, bmem.typ): + return False + return True + else: + raise Exception('Type compare for {} not implemented'.format(type(a))) + return False + +def canCast(fromT, toT): + fromT = theType(fromT) + toT = theType(toT) + if isinstance(fromT, PointerType) and isinstance(toT, PointerType): + return True + elif fromT is intType and isinstance(toT, PointerType): + return True + return False + +def expectRval(s): + # TODO: solve this better + s.expect_rvalue = True + +class TypeChecker: + def __init__(self, diag): + self.diag = diag + + def error(self, msg, loc): + """ + Wrapper that registers the message and marks the result invalid + """ + self.diag.error(msg, loc) + self.ok = False + + def checkPackage(self, pkg): + self.ok = True + visitor = Visitor() + visitor.visit(pkg, f_post=self.check2) + return self.ok + + def check2(self, sym): + if type(sym) in [IfStatement, WhileStatement]: + if not equalTypes(sym.condition.typ, boolType): + msg = 'Condition must be of type {}'.format(boolType) + self.error(msg, sym.condition.loc) + elif type(sym) is Assignment: + l, r = sym.lval, sym.rval + if not equalTypes(l.typ, r.typ): + msg = 'Cannot assign {} to {}'.format(r.typ, l.typ) + self.error(msg, sym.loc) + if not l.lvalue: + self.error('No valid lvalue {}'.format(l), l.loc) + #if sym.rval.lvalue: + # self.error('Right hand side must be an rvalue', sym.rval.loc) + expectRval(sym.rval) + elif type(sym) is ReturnStatement: pass - elif type(sym) is Function: + elif type(sym) is FunctionCall: + # Check arguments: + ngiv = len(sym.args) + ptypes = sym.proc.typ.parametertypes + nreq = len(ptypes) + if ngiv != nreq: + self.error('Function {2}: {0} arguments required, {1} given'.format(nreq, ngiv, sym.proc.name), sym.loc) + else: + for a, at in zip(sym.args, ptypes): + expectRval(a) + if not equalTypes(a.typ, at): + self.error('Got {0}, expected {1}'.format(a.typ, at), a.loc) + # determine return type: + sym.typ = sym.proc.typ.returntype + elif type(sym) is VariableUse: + sym.lvalue = True + if isinstance(sym.target, Variable): + sym.typ = sym.target.typ + else: + print('warning {} has no target, defaulting to int'.format(sym)) + sym.typ = intType + elif type(sym) is Literal: + sym.lvalue = False + if type(sym.val) is int: + sym.typ = intType + elif type(sym.val) is float: + sym.typ = doubleType + elif type(sym.val) is bool: + sym.typ = boolType + else: + raise Exception('Unknown literal type'.format(sym.val)) + elif type(sym) is Unop: + if sym.op == '&': + sym.typ = PointerType(sym.a.typ) + sym.lvalue = False + else: + raise Exception('Unknown unop {0}'.format(sym.op)) + elif type(sym) is Deref: + # pointer deref + sym.lvalue = True + # check if the to be dereferenced variable is a pointer type: + ptype = theType(sym.ptr.typ) + if type(ptype) is PointerType: + sym.typ = ptype.ptype + else: + self.error('Cannot dereference non-pointer type {}'.format(ptype), sym.loc) + sym.typ = intType + elif type(sym) is FieldRef: + basetype = sym.base.typ + sym.lvalue = sym.base.lvalue + basetype = theType(basetype) + if type(basetype) is StructureType: + if basetype.hasField(sym.field): + sym.typ = basetype.fieldType(sym.field) + else: + self.error('{} does not contain field {}'.format(basetype, sym.field), sym.loc) + sym.typ = intType + else: + self.error('Cannot select field {} of non-structure type {}'.format(sym.field, basetype), sym.loc) + sym.typ = intType + elif type(sym) is Binop: + sym.lvalue = False + if sym.op in ['+', '-', '*', '/', '<<', '>>', '|', '&']: + expectRval(sym.a) + expectRval(sym.b) + if equalTypes(sym.a.typ, sym.b.typ): + if equalTypes(sym.a.typ, intType): + sym.typ = sym.a.typ + else: + self.error('Can only add integers', sym.loc) + sym.typ = intType + else: + # assume void here? TODO: throw exception! + sym.typ = intType + self.error('Types unequal {} != {}'.format(sym.a.typ, sym.b.typ), sym.loc) + elif sym.op in ['>', '<', '==', '<=', '>=']: + expectRval(sym.a) + expectRval(sym.b) + sym.typ = boolType + if not equalTypes(sym.a.typ, sym.b.typ): + self.error('Types unequal {} != {}'.format(sym.a.typ, sym.b.typ), sym.loc) + elif sym.op in ['or', 'and']: + sym.typ = boolType + if not equalTypes(sym.a.typ, boolType): + self.error('Must be {0}'.format(boolType), sym.a.loc) + if not equalTypes(sym.b.typ, boolType): + self.error('Must be {0}'.format(boolType), sym.b.loc) + else: + raise Exception('Unknown binop {0}'.format(sym.op)) + elif isinstance(sym, Variable): + # check initial value type: + # TODO pass - + elif type(sym) is TypeCast: + if canCast(sym.a.typ, sym.to_type): + sym.typ = sym.to_type + else: + self.error('Cannot cast {} to {}'.format(sym.a.typ, sym.to_type), sym.loc) + sym.typ = intType + elif type(sym) is Constant: + if not equalTypes(sym.typ, sym.value.typ): + self.error('Cannot assign {0} to {1}'.format(sym.value.typ, sym.typ), sym.loc) + elif type(sym) in [CompoundStatement, Package, Function, FunctionType, ExpressionStatement, DefinedType]: + pass + else: + raise NotImplementedError('Unknown type check {0}'.format(sym)) diff -r d9df72971cbf -r 1c7c1e619be8 python/c3/builder.py --- a/python/c3/builder.py Fri Nov 15 13:52:32 2013 +0100 +++ b/python/c3/builder.py Thu Nov 21 11:57:27 2013 +0100 @@ -4,13 +4,13 @@ from . astprinter import AstPrinter import glob + class Builder: """ Generates IR-code from c3 source. Reports errors to the diagnostics system """ def __init__(self, diag): - self.pack_dir = None self.logger = logging.getLogger('c3') self.diag = diag self.parser = Parser(diag) @@ -23,44 +23,29 @@ """ package provider for use when analyzing """ if pname in self.packages: return self.packages[pname] - else: - # Try to lookup package from file - fns = glob.glob('./**/{}.c3'.format(pname)) - if fns: - with open(fns[0]) as f: - src = f.read() - self.build(src) - if self.pack_dir: - fns = glob.glob('{}/{}.c3'.format(self.pack_dir, pname)) - if fns: - with open(fns[0]) as f: - src = f.read() - self.build(src) - if pname in self.packages: - return self.packages[pname] - def parse(self, src): - pkg = self.parser.parseSource(src) - if not pkg: - return + def checkSource(self, srcs, imps=[]): + """ Performs syntax and type check. """ + # Parse source: + for src in srcs: + pkg = self.parser.parseSource(src) + src.close() + if not pkg: + return + # Store for later use: + self.packages[pkg.name] = pkg - # TODO: merge the two below? - #AstPrinter().printAst(pkg) - if not self.al.analyzePackage(pkg, self): - return - if not self.tc.checkPackage(pkg): - return + for pkg in self.packages.values(): + # Only return ircode when everything is OK + # TODO: merge the two below? + if not self.al.analyzePackage(pkg, self): + return + if not self.tc.checkPackage(pkg): + return + yield pkg - # Store for later use: - self.packages[pkg.name] = pkg - return pkg - - def build(self, src, pack_dir=None): + def build(self, srcs, imps=[]): """ Create IR-code from sources """ - self.pack_dir = pack_dir - pkg = self.parse(src) + for pkg in self.checkSource(srcs, imps): + yield self.cg.gencode(pkg) - # Only return ircode when everything is OK - if pkg: - return self.cg.gencode(pkg) - diff -r d9df72971cbf -r 1c7c1e619be8 python/c3/codegenerator.py --- a/python/c3/codegenerator.py Fri Nov 15 13:52:32 2013 +0100 +++ b/python/c3/codegenerator.py Thu Nov 21 11:57:27 2013 +0100 @@ -3,7 +3,7 @@ from . import astnodes from .scope import boolType, intType from ppci import CompilerError -from .typecheck import theType +from .analyse import theType class CodeGenerator(ir.Builder): diff -r d9df72971cbf -r 1c7c1e619be8 python/c3/lexer.py --- a/python/c3/lexer.py Fri Nov 15 13:52:32 2013 +0100 +++ b/python/c3/lexer.py Thu Nov 21 11:57:27 2013 +0100 @@ -12,14 +12,18 @@ 'struct', 'cast', \ 'import', 'module' ] -def tokenize(s): - """ +def tokenize(input_file): + """ Tokenizer, generates an iterator that returns tokens! + Input is a file like object. + This GREAT example was taken from python re doc page! - """ - tok_spec = [ + """ + filename = input_file.name if hasattr(input_file, 'name') else '' + s = input_file.read() + tok_spec = [ ('REAL', r'\d+\.\d+'), ('HEXNUMBER', r'0x[\da-fA-F]+'), ('NUMBER', r'\d+'), @@ -32,13 +36,13 @@ ('LEESTEKEN', r'==|->|<<|>>|[\.,=:;\-+*\[\]/\(\)]|>=|<=|<>|>|<|{|}|&|\^|\|'), ('STRING', r"'.*?'") ] - tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) - gettok = re.compile(tok_re).match - line = 1 - pos = line_start = 0 - mo = gettok(s) - incomment = False - while mo is not None: + tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) + gettok = re.compile(tok_re).match + line = 1 + pos = line_start = 0 + mo = gettok(s) + incomment = False + while mo is not None: typ = mo.lastgroup val = mo.group(typ) if typ == 'NEWLINE': @@ -69,14 +73,14 @@ val = float(val) elif typ == 'STRING': val = val[1:-1] - loc = SourceLocation(line, mo.start()-line_start, mo.end() - mo.start()) + loc = SourceLocation(filename, line, mo.start() - line_start, mo.end() - mo.start()) yield Token(typ, val, loc) pos = mo.end() mo = gettok(s, pos) - if pos != len(s): - col = pos - line_start - loc = SourceLocation(line, col, 1) - raise CompilerError('Unexpected character "{0}"'.format(s[pos]), loc) - loc = SourceLocation(line, 0, 0) - yield Token('END', '', loc) + if pos != len(s): + col = pos - line_start + loc = SourceLocation(filename, line, col, 1) + raise CompilerError('Unexpected character "{0}"'.format(s[pos]), loc) + loc = SourceLocation(filename, line, 0, 0) + yield Token('END', '', loc) diff -r d9df72971cbf -r 1c7c1e619be8 python/c3/typecheck.py --- a/python/c3/typecheck.py Fri Nov 15 13:52:32 2013 +0100 +++ b/python/c3/typecheck.py Thu Nov 21 11:57:27 2013 +0100 @@ -1,192 +1,4 @@ from .astnodes import * -from .scope import * from .visitor import Visitor -def theType(t): - """ - Recurse until a 'real' type is found - """ - if type(t) is DefinedType: - return theType(t.typ) - return t -def equalTypes(a, b): - """ - Compare types a and b for equality. - Not equal until proven otherwise. - """ - # Recurse into named types: - a = theType(a) - b = theType(b) - - # Compare for structural equivalence: - if type(a) is type(b): - if type(a) is BaseType: - return a.name == b.name - elif type(a) is PointerType: - return equalTypes(a.ptype, b.ptype) - elif type(a) is StructureType: - if len(a.mems) != len(b.mems): - return False - for amem, bmem in zip(a.mems, b.mems): - if not equalTypes(amem.typ, bmem.typ): - return False - return True - else: - raise Exception('Type compare for {} not implemented'.format(type(a))) - return False - -def canCast(fromT, toT): - fromT = theType(fromT) - toT = theType(toT) - if isinstance(fromT, PointerType) and isinstance(toT, PointerType): - return True - elif fromT is intType and isinstance(toT, PointerType): - return True - return False - -def expectRval(s): - # TODO: solve this better - s.expect_rvalue = True - -class TypeChecker: - def __init__(self, diag): - self.diag = diag - - def error(self, msg, loc): - """ - Wrapper that registers the message and marks the result invalid - """ - self.diag.error(msg, loc) - self.ok = False - - def checkPackage(self, pkg): - self.ok = True - visitor = Visitor() - visitor.visit(pkg, f_post=self.check2) - return self.ok - - def check2(self, sym): - if type(sym) in [IfStatement, WhileStatement]: - if not equalTypes(sym.condition.typ, boolType): - msg = 'Condition must be of type {}'.format(boolType) - self.error(msg, sym.condition.loc) - elif type(sym) is Assignment: - l, r = sym.lval, sym.rval - if not equalTypes(l.typ, r.typ): - msg = 'Cannot assign {} to {}'.format(r.typ, l.typ) - self.error(msg, sym.loc) - if not l.lvalue: - self.error('No valid lvalue {}'.format(l), l.loc) - #if sym.rval.lvalue: - # self.error('Right hand side must be an rvalue', sym.rval.loc) - expectRval(sym.rval) - elif type(sym) is ReturnStatement: - pass - elif type(sym) is FunctionCall: - # Check arguments: - ngiv = len(sym.args) - ptypes = sym.proc.typ.parametertypes - nreq = len(ptypes) - if ngiv != nreq: - self.error('Function {2}: {0} arguments required, {1} given'.format(nreq, ngiv, sym.proc.name), sym.loc) - else: - for a, at in zip(sym.args, ptypes): - expectRval(a) - if not equalTypes(a.typ, at): - self.error('Got {0}, expected {1}'.format(a.typ, at), a.loc) - # determine return type: - sym.typ = sym.proc.typ.returntype - elif type(sym) is VariableUse: - sym.lvalue = True - if isinstance(sym.target, Variable): - sym.typ = sym.target.typ - else: - print('warning {} has no target, defaulting to int'.format(sym)) - sym.typ = intType - elif type(sym) is Literal: - sym.lvalue = False - if type(sym.val) is int: - sym.typ = intType - elif type(sym.val) is float: - sym.typ = doubleType - elif type(sym.val) is bool: - sym.typ = boolType - else: - raise Exception('Unknown literal type'.format(sym.val)) - elif type(sym) is Unop: - if sym.op == '&': - sym.typ = PointerType(sym.a.typ) - sym.lvalue = False - else: - raise Exception('Unknown unop {0}'.format(sym.op)) - elif type(sym) is Deref: - # pointer deref - sym.lvalue = True - # check if the to be dereferenced variable is a pointer type: - ptype = theType(sym.ptr.typ) - if type(ptype) is PointerType: - sym.typ = ptype.ptype - else: - self.error('Cannot dereference non-pointer type {}'.format(ptype), sym.loc) - sym.typ = intType - elif type(sym) is FieldRef: - basetype = sym.base.typ - sym.lvalue = sym.base.lvalue - basetype = theType(basetype) - if type(basetype) is StructureType: - if basetype.hasField(sym.field): - sym.typ = basetype.fieldType(sym.field) - else: - self.error('{} does not contain field {}'.format(basetype, sym.field), sym.loc) - sym.typ = intType - else: - self.error('Cannot select field {} of non-structure type {}'.format(sym.field, basetype), sym.loc) - sym.typ = intType - elif type(sym) is Binop: - sym.lvalue = False - if sym.op in ['+', '-', '*', '/', '<<', '>>', '|', '&']: - expectRval(sym.a) - expectRval(sym.b) - if equalTypes(sym.a.typ, sym.b.typ): - if equalTypes(sym.a.typ, intType): - sym.typ = sym.a.typ - else: - self.error('Can only add integers', sym.loc) - sym.typ = intType - else: - # assume void here? TODO: throw exception! - sym.typ = intType - self.error('Types unequal {} != {}'.format(sym.a.typ, sym.b.typ), sym.loc) - elif sym.op in ['>', '<', '==', '<=', '>=']: - expectRval(sym.a) - expectRval(sym.b) - sym.typ = boolType - if not equalTypes(sym.a.typ, sym.b.typ): - self.error('Types unequal {} != {}'.format(sym.a.typ, sym.b.typ), sym.loc) - elif sym.op in ['or', 'and']: - sym.typ = boolType - if not equalTypes(sym.a.typ, boolType): - self.error('Must be {0}'.format(boolType), sym.a.loc) - if not equalTypes(sym.b.typ, boolType): - self.error('Must be {0}'.format(boolType), sym.b.loc) - else: - raise Exception('Unknown binop {0}'.format(sym.op)) - elif isinstance(sym, Variable): - # check initial value type: - # TODO - pass - elif type(sym) is TypeCast: - if canCast(sym.a.typ, sym.to_type): - sym.typ = sym.to_type - else: - self.error('Cannot cast {} to {}'.format(sym.a.typ, sym.to_type), sym.loc) - sym.typ = intType - elif type(sym) is Constant: - if not equalTypes(sym.typ, sym.value.typ): - self.error('Cannot assign {0} to {1}'.format(sym.value.typ, sym.typ), sym.loc) - elif type(sym) in [CompoundStatement, Package, Function, FunctionType, ExpressionStatement, DefinedType]: - pass - else: - raise NotImplementedError('Unknown type check {0}'.format(sym)) - diff -r d9df72971cbf -r 1c7c1e619be8 python/cortexm3.py --- a/python/cortexm3.py Fri Nov 15 13:52:32 2013 +0100 +++ b/python/cortexm3.py Thu Nov 21 11:57:27 2013 +0100 @@ -6,6 +6,10 @@ from ppci import CompilerError import ir +""" + ARM target description. +""" + # TODO: encode this in DSL (domain specific language) # TBD: is this required? diff -r d9df72971cbf -r 1c7c1e619be8 python/grind.py --- a/python/grind.py Fri Nov 15 13:52:32 2013 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,12 +0,0 @@ - -import cProfile -import unittest -import pstats - -if __name__ == '__main__': - suite = unittest.TestLoader().discover('.') - def runtests(): - unittest.TextTestRunner().run(suite) - #s = cProfile.run('runtests()',sort='cumtime') - s = cProfile.run('runtests()',sort='tottime') - diff -r d9df72971cbf -r 1c7c1e619be8 python/hexedit.py --- a/python/hexedit.py Fri Nov 15 13:52:32 2013 +0100 +++ b/python/hexedit.py Thu Nov 21 11:57:27 2013 +0100 @@ -1,6 +1,7 @@ #!/usr/bin/python import sys +import os from PyQt4.QtCore import * from PyQt4.QtGui import * from PyQt4 import uic @@ -167,7 +168,8 @@ class HexEditor(QMainWindow): def __init__(self): super().__init__() - uic.loadUi('hexeditor.ui', baseinstance=self) + basedir = os.path.dirname(__file__) + uic.loadUi(os.path.join(basedir, 'hexeditor.ui'), baseinstance=self) self.he = HexEdit() self.setCentralWidget(self.he) self.actionOpen.triggered.connect(self.doOpen) diff -r d9df72971cbf -r 1c7c1e619be8 python/ide.py --- a/python/ide.py Fri Nov 15 13:52:32 2013 +0100 +++ b/python/ide.py Thu Nov 21 11:57:27 2013 +0100 @@ -3,6 +3,7 @@ import sys import os import logging +import traceback from PyQt4.QtCore import * from PyQt4.QtGui import * @@ -12,11 +13,11 @@ from astviewer import AstViewer from codeedit import CodeEdit from logview import LogView as BuildOutput +from disasm import Disassembly stutil = __import__('st-util') import c3 import zcc import outstream -import traceback def handle_exception(tp, v, tb): @@ -65,51 +66,6 @@ self.sigErrorSelected.emit(err) -class DisAsmModel(QAbstractTableModel): - def __init__(self): - super().__init__() - self.outs = None - self.instructions = [] - self.headers = ['Address', 'Bytes', 'Instruction'] - self.txts = [] - self.txts.append(lambda i: '0x{:08x}'.format(i.address)) - self.txts.append(lambda i: str(i.encode())) - self.txts.append(lambda i: str(i)) - - def rowCount(self, parent): - return len(self.instructions) - - def columnCount(self, parent): - return len(self.headers) - - def data(self, index, role): - if not index.isValid(): - return - row, col = index.row(), index.column() - if role == Qt.DisplayRole: - i = self.instructions[row] - return self.txts[col](i) - - def headerData(self, section, orientation, role): - if orientation == Qt.Horizontal and role == Qt.DisplayRole: - return self.headers[section] - - def setInstructions(self, ins): - self.instructions = ins - self.modelReset.emit() - - -class Disassembly(QTableView): - def __init__(self): - super().__init__() - self.dm = DisAsmModel() - self.setModel(self.dm) - - def showPos(self, p): - for i in self.dm.instructions: - if i.address == p: - row = self.dm.instructions.index(i) - self.selectRow(row) class AboutDialog(QDialog): def __init__(self, parent=None): @@ -350,7 +306,7 @@ wd = os.path.dirname(fn) self.diag.clear() outs = outstream.TextOutputStream() - if not zcc.zcc(ce.Source, outs, self.diag, do_optimize=True, pack_dir=wd): + if not zcc.zcc(ce.Source, outs, self.diag, do_optimize=True): # Set errors: self.builderrors.setErrorList(self.diag.diags) ce.setErrors(self.diag.diags) diff -r d9df72971cbf -r 1c7c1e619be8 python/iso9660.py --- a/python/iso9660.py Fri Nov 15 13:52:32 2013 +0100 +++ b/python/iso9660.py Thu Nov 21 11:57:27 2013 +0100 @@ -1,49 +1,82 @@ +#!/usr/bin/env python import argparse -""" +__doc__ = """ ISO 9660 filesystem utility. """ -def read_vol_desc(f): - s = f.read(2048) - ty = s[0] - Id = s[1:6] - assert Id == 'CD001'.encode('ascii') - ver = s[6] - assert ver == 1 - data = s[7:] - assert len(data) == 2041 - return ty, Id, s + +class VolumeDescriptor: + @classmethod + def FromData(cls, d): + ty = d[0] + Id = d[1:6] + assert Id == 'CD001'.encode('ascii') + ver = d[6] + assert ver == 1 + cls = vol_desc_types[ty] + return cls(d) + -def parse_boot_record(sec): - boot_sys_id = sec[7:39] - boot_id = sec[39:71] - print(boot_sys_id) - print(boot_id) +vol_desc_types = {} +def vol_type(t): + def reg_func(cls): + vol_desc_types[t] = cls + return cls + return reg_func + + +@vol_type(0) +class BootRecordVolumeDescriptor(VolumeDescriptor): + def __init__(self, d): + boot_sys_id = d[7:39] + boot_id = d[39:71] + print(boot_sys_id) + print(boot_id) + -def parse_primary_volume(sec): - sys_id = sec[8:40] - vol_id = sec[40:72] - print(sys_id) - print(vol_id) +@vol_type(1) +class PrimaryVolumeDescriptor(VolumeDescriptor): + def __init__(self, d): + sys_id = d[8:40] + vol_id = d[40:72] + print(sys_id) + print(vol_id) + + +@vol_type(255) +class VolumeDescriptorTerminator(VolumeDescriptor): + def __init__(self, d): + pass + -def read_iso(f): - # System area - system = f.read(16 * 2048) - while True: - ty, Id, dat = read_vol_desc(f) - print(ty, Id) - if ty == 255: - break - elif ty == 0: - parse_boot_record(dat) - elif ty == 1: - parse_primary_volume(dat) +class ISOfs: + def __init__(self): + self.vol_descriptors = [] + + def read(self, f): + # System area: + self.system_area = f.read(16 * 2048) + while True: + d = f.read(2048) + desc = VolumeDescriptor.FromData(d) + self.vol_descriptors.append(desc) + if type(desc) is VolumeDescriptorTerminator: + break + + def dump(self): + for vd in self.vol_descriptors: + print(vd) if __name__ == '__main__': - with open('mikeos.iso', 'rb') as f: - read_iso(f) + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument('filename') + args = parser.parse_args() + fs = ISOfs() + with open(args.filename, 'rb') as f: + fs.read(f) + fs.dump() diff -r d9df72971cbf -r 1c7c1e619be8 python/old/assembler.py --- a/python/old/assembler.py Fri Nov 15 13:52:32 2013 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,354 +0,0 @@ -""" - Assembler code generation functions -""" - -from .errors import Error - -modrm = {'rax': 0, 'rbx': 1} - -# Table 3.1 of the intel manual: -# use REX.W on the table below: -regs64 = {'rax': 0,'rcx':1,'rdx':2,'rbx':3,'rsp':4,'rbp':5,'rsi':6,'rdi':7,'r8':0,'r9':1,'r10':2,'r11':3,'r12':4,'r13':5,'r14':6,'r15':7} -regs32 = {'eax': 0, 'ecx':1, 'edx':2, 'ebx': 3, 'esp': 4, 'ebp': 5, 'esi':6, 'edi':7} -regs8 = {'al':0,'cl':1,'dl':2,'bl':3,'ah':4,'ch':5,'dh':6,'bh':7} - -# Calculation of the rexb bit: -rexbit = {'rax': 0, 'rcx':0, 'rdx':0, 'rbx': 0, 'rsp': 0, 'rbp': 0, 'rsi':0, 'rdi':0,'r8':1,'r9':1,'r10':1,'r11':1,'r12':1,'r13':1,'r14':1,'r15':1} - -# Helper functions: -def imm64(x): - """ represent 64 bits integer in little endian 8 bytes""" - if x < 0: - x = x + (1 << 64) - x = x & 0xFFFFFFFFFFFFFFFF - return [ (x >> (p*8)) & 0xFF for p in range(8) ] - -def imm32(x): - """ represent 32 bits integer in little endian 4 bytes""" - if x < 0: - x = x + (1 << 32) - x = x & 0xFFFFFFFF - return [ (x >> (p*8)) & 0xFF for p in range(4) ] - -def imm8(x): - if x < 0: - x = x + (1 << 8) - x = x & 0xFF - return [ x ] - -def modrm(mod=0, rm=0, reg=0): - """ Construct the modrm byte from its components """ - assert(mod <= 3) - assert(rm <= 7) - assert(reg <= 7) - return (mod << 6) | (reg << 3) | rm - -def rex(w=0, r=0, x=0, b=0): - """ Create a REX prefix byte """ - assert(w <= 1) - assert(r <= 1) - assert(x <= 1) - assert(b <= 1) - return 0x40 | (w<<3) | (r<<2) | (x<<1) | b - -def sib(ss=0, index=0, base=0): - assert(ss <= 3) - assert(index <= 7) - assert(base <= 7) - return (ss << 6) | (index << 3) | base - -tttn = {'L':0xc,'G':0xf,'NE':0x5,'GE':0xd,'LE':0xe, 'E':0x4} - -# Actual instructions: -def nearjump(distance, condition=None): - """ jmp imm32 """ - lim = (1<<30) - if abs(distance) > lim: - Error('near jump cannot jump over more than {0} bytes'.format(lim)) - if condition: - if distance < 0: - distance -= 6 # Skip own instruction - opcode = 0x80 | tttn[condition] # Jcc imm32 - return [0x0F, opcode] + imm32(distance) - else: - if distance < 0: - distance -= 5 # Skip own instruction - return [ 0xE9 ] + imm32(distance) - -def shortjump(distance, condition=None): - """ jmp imm8 """ - lim = 118 - if abs(distance) > lim: - Error('short jump cannot jump over more than {0} bytes'.format(lim)) - if distance < 0: - distance -= 2 # Skip own instruction - if condition: - opcode = 0x70 | tttn[condition] # Jcc rel8 - else: - opcode = 0xeb # jmp rel8 - return [opcode] + imm8(distance) - -# Helper that determines jump type: -def reljump(distance): - if abs(distance) < 110: - return shortjump(distance) - else: - return nearjump(distance) - -def push(reg): - if reg in regs64: - if rexbit[reg] == 1: - return [0x41, 0x50 + regs64[reg]] - else: - return [0x50 + regs64[reg]] - else: - Error('push for {0} not implemented'.format(reg)) - -def pop(reg): - if reg in regs64: - if rexbit[reg] == 1: - rexprefix = rex(b=1) - opcode = 0x58 + regs64[reg] - return [rexprefix, opcode] - else: - opcode = 0x58 + regs64[reg] - return [ opcode ] - else: - Error('pop for {0} not implemented'.format(reg)) - -def INT(number): - opcode = 0xcd - return [opcode] + imm8(number) - -def syscall(): - return [0x0F, 0x05] - -def call(distance): - if type(distance) is int: - return [0xe8]+imm32(distance) - elif type(distance) is str and distance in regs64: - reg = distance - opcode = 0xFF # 0xFF /2 == call r/m64 - mod_rm = modrm(mod=3, reg=2, rm=regs64[reg]) - if rexbit[reg] == 1: - rexprefix = rex(b=rexbit[reg]) - return [rexprefix, opcode, mod_rm] - else: - return [opcode, mod_rm] - else: - Error('Cannot call to {0}'.format(distance)) - -def ret(): - return [ 0xc3 ] - -def increg64(reg): - assert(reg in regs64) - rexprefix = rex(w=1, b=rexbit[reg]) - opcode = 0xff - mod_rm = modrm(mod=3, rm=regs64[reg]) - return [rexprefix, opcode, mod_rm] - -def prepost8(r8, rm8): - assert(r8 in regs8) - pre = [] - if type(rm8) is list: - # TODO: merge mem access with prepost for 64 bits - if len(rm8) == 1: - base, = rm8 - if type(base) is str and base in regs64: - assert(not base in ['rbp', 'rsp', 'r12', 'r13']) - mod_rm = modrm(mod=0, rm=regs64[base], reg=regs8[r8]) - if rexbit[base] == 1: - pre.append(rex(b=1)) - post = [mod_rm] - else: - Error('One arg of type {0} not implemented'.format(base)) - elif len(rm8) == 2: - base, offset = rm8 - assert(type(offset) is int) - assert(base in regs64) - - if base == 'rsp' or base == 'r12': - Error('Cannot use rsp or r12 as base yet') - if rexbit[base] == 1: - pre.append( rex(b=1) ) - mod_rm = modrm(mod=1, rm=regs64[base], reg=regs8[r8]) - post = [mod_rm] + imm8(offset) - else: - Error('not supporting prepost8 with list len {0}'.format(len(rm8))) - else: - Error('Not supporting move with reg8 {0}'.format(r8)) - return pre, post - -def prepost(r64, rm64): - assert(r64 in regs64) - if type(rm64) is list: - if len(rm64) == 3: - base, index, disp = rm64 - assert(base in regs64) - assert(index in regs64) - assert(type(disp) is int) - # Assert that no special cases are used: - # TODO: swap base and index to avoid special cases - # TODO: exploit special cases and make better code - assert(index != 'rsp') - - rexprefix = rex(w=1, r=rexbit[r64], x=rexbit[index], b=rexbit[base]) - # mod=1 and rm=4 indicates a SIB byte: [--][--]+imm8 - mod_rm = modrm(mod=1, rm=4, reg=regs64[r64]) - si_b = sib(ss=0, index=regs64[index], base=regs64[base]) - return [rexprefix], [mod_rm, si_b] + imm8(disp) - elif len(rm64) == 2: - base, offset = rm64 - assert(type(offset) is int) - if base == 'RIP': - # RIP pointer relative addressing mode! - rexprefix = rex(w=1, r=rexbit[r64]) - mod_rm = modrm(mod=0, rm=5, reg=regs64[r64]) - return [rexprefix], [mod_rm] + imm32(offset) - else: - assert(base in regs64) - - if base == 'rsp' or base == 'r12': - # extended function that uses SIB byte - rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[base]) - # rm=4 indicates a SIB byte follows - mod_rm = modrm(mod=1, rm=4, reg=regs64[r64]) - # index=4 indicates that index is not used - si_b = sib(ss=0, index=4, base=regs64[base]) - return [rexprefix], [mod_rm, si_b] + imm8(offset) - else: - rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[base]) - mod_rm = modrm(mod=1, rm=regs64[base], reg=regs64[r64]) - return [rexprefix], [mod_rm] + imm8(offset) - elif len(rm64) == 1: - offset = rm64[0] - if type(offset) is int: - rexprefix = rex(w=1, r=rexbit[r64]) - mod_rm = modrm(mod=0, rm=4,reg=regs64[r64]) - si_b = sib(ss=0, index=4,base=5) # 0x25 - return [rexprefix], [mod_rm, si_b] + imm32(offset) - else: - Error('Memory reference of type {0} not implemented'.format(offset)) - else: - Error('Memory reference not implemented') - elif rm64 in regs64: - rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[rm64]) - mod_rm = modrm(3, rm=regs64[rm64], reg=regs64[r64]) - return [rexprefix], [mod_rm] - -def leareg64(rega, m): - opcode = 0x8d # lea r64, m - pre, post = prepost(rega, m) - return pre + [opcode] + post - -def mov(rega, regb): - if type(regb) is int: - pre = [rex(w=1, b=rexbit[rega])] - opcode = 0xb8 + regs64[rega] - post = imm64(regb) - elif type(regb) is str: - if regb in regs64: - opcode = 0x89 # mov r/m64, r64 - pre, post = prepost(regb, rega) - elif regb in regs8: - opcode = 0x88 # mov r/m8, r8 - pre, post = prepost8(regb, rega) - else: - Error('Unknown register {0}'.format(regb)) - elif type(rega) is str: - if rega in regs64: - opcode = 0x8b # mov r64, r/m64 - pre, post = prepost(rega, regb) - else: - Error('Unknown register {0}'.format(rega)) - else: - Error('Move of this kind {0}, {1} not implemented'.format(rega, regb)) - return pre + [opcode] + post - -def xorreg64(rega, regb): - rexprefix = rex(w=1, r=rexbit[regb], b=rexbit[rega]) - opcode = 0x31 # XOR r/m64, r64 - # Alternative is 0x33 XOR r64, r/m64 - mod_rm = modrm(3, rm=regs64[rega], reg=regs64[regb]) - return [rexprefix, opcode, mod_rm] - -# integer arithmatic: -def addreg64(rega, regb): - if regb in regs64: - pre, post = prepost(regb, rega) - opcode = 0x01 # ADD r/m64, r64 - return pre + [opcode] + post - elif type(regb) is int: - if regb < 100: - rexprefix = rex(w=1, b=rexbit[rega]) - opcode = 0x83 # add r/m, imm8 - mod_rm = modrm(3, rm=regs64[rega], reg=0) - return [rexprefix, opcode, mod_rm]+imm8(regb) - elif regb < (1<<31): - rexprefix = rex(w=1, b=rexbit[rega]) - opcode = 0x81 # add r/m64, imm32 - mod_rm = modrm(3, rm=regs64[rega], reg=0) - return [rexprefix, opcode, mod_rm]+imm32(regb) - else: - Error('Constant value too large!') - else: - Error('unknown second operand!'.format(regb)) - -def subreg64(rega, regb): - if regb in regs64: - pre, post = prepost(regb, rega) - opcode = 0x29 # SUB r/m64, r64 - return pre + [opcode] + post - elif type(regb) is int: - if regb < 100: - rexprefix = rex(w=1, b=rexbit[rega]) - opcode = 0x83 # sub r/m, imm8 - mod_rm = modrm(3, rm=regs64[rega], reg=5) - return [rexprefix, opcode, mod_rm]+imm8(regb) - elif regb < (1<<31): - rexprefix = rex(w=1, b=rexbit[rega]) - opcode = 0x81 # sub r/m64, imm32 - mod_rm = modrm(3, rm=regs64[rega], reg=5) - return [rexprefix, opcode, mod_rm]+imm32(regb) - else: - Error('Constant value too large!') - - else: - Error('unknown second operand!'.format(regb)) - -def idivreg64(reg): - rexprefix = rex(w=1, b=rexbit[reg]) - opcode = 0xf7 # IDIV r/m64 - mod_rm = modrm(3, rm=regs64[reg], reg=7) - return [rexprefix, opcode, mod_rm] - -def imulreg64_rax(reg): - rexprefix = rex(w=1, b=rexbit[reg]) - opcode = 0xf7 # IMUL r/m64 - mod_rm = modrm(3, rm=regs64[reg], reg=5) - return [rexprefix, opcode, mod_rm] - -def imulreg64(rega, regb): - pre, post = prepost(rega, regb) - opcode = 0x0f # IMUL r64, r/m64 - opcode2 = 0xaf - return pre + [opcode, opcode2] + post - -def cmpreg64(rega, regb): - if regb in regs64: - pre, post = prepost(regb, rega) - opcode = 0x39 # CMP r/m64, r64 - return pre + [opcode] + post - elif type(regb) is int: - rexprefix = rex(w=1, b=rexbit[rega]) - opcode = 0x83 # CMP r/m64, imm8 - mod_rm = modrm(3, rm=regs64[rega], reg=7) - return [rexprefix, opcode, mod_rm] + imm8(regb) - - else: - Error('not implemented cmp64') - -# Mapping that maps string names to the right functions: -opcodes = {'mov':(mov,2), 'lea':(leareg64,2), 'int':(INT,1), 'syscall':(syscall,0)} - diff -r d9df72971cbf -r 1c7c1e619be8 python/old/modules.py --- a/python/old/modules.py Fri Nov 15 13:52:32 2013 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,193 +0,0 @@ -import struct -from .errors import Error -from .nodes import * -from .builtin import integer, real, char, boolean, void -import os.path - -""" - File format for compiled modules. - * [11] magic identifier - * [STR] mod name - * [STR] signature, a md5 signature of the module. - * [I32] size of code - * code image - * [I32] entrypoint for initcode - * imported modules - ** [I32] num of imported modules - *** [STR] name of module - *** signature of the module - *** [I32] offset in the process image where the interface symbols must be placed - * public interface - ** [I32] num of interface elements - *** [STR] proc name - *** [I32] offset in code image - *** [type] return type - *** [I32] number of parameters - **** parameter - ***** parameter kind - ***** parameter name - ***** parameter type -""" - -MAGIC = b'LCFOSMODC' - -loadedModules = [] - -def loadModule(modname): - """ returns a Module object specified by a name """ - # Check if the module was already loaded: - for mod in loadedModules: - if mod.name == modname: - return mod - - # Try to load the module from file: - srcfilename = modname + '.mod' - binfilename = modname + '.bin' - sourceExists = os.path.exists(srcfilename) - if os.path.exists(binfilename): - if sourceExists: - compileModule() - else: - return loadModuleFromFile(binfilename) - else: - Error("Cannot load module '{0}'!".format(modname)) - -def loadModuleFromFile(filename): - f = open(filename, 'rb') - magic = f.read(len(MAGIC)) - assert(magic == MAGIC) - - # Helper functions: - def readI32(): - int32, = struct.unpack('> (p*8)) & 0xFF for p in range(8) ] + +def imm32(x): + """ represent 32 bits integer in little endian 4 bytes""" + if x < 0: + x = x + (1 << 32) + x = x & 0xFFFFFFFF + return [ (x >> (p*8)) & 0xFF for p in range(4) ] + +def imm8(x): + if x < 0: + x = x + (1 << 8) + x = x & 0xFF + return [ x ] + +def modrm(mod=0, rm=0, reg=0): + """ Construct the modrm byte from its components """ + assert(mod <= 3) + assert(rm <= 7) + assert(reg <= 7) + return (mod << 6) | (reg << 3) | rm + +def rex(w=0, r=0, x=0, b=0): + """ Create a REX prefix byte """ + assert(w <= 1) + assert(r <= 1) + assert(x <= 1) + assert(b <= 1) + return 0x40 | (w<<3) | (r<<2) | (x<<1) | b + +def sib(ss=0, index=0, base=0): + assert(ss <= 3) + assert(index <= 7) + assert(base <= 7) + return (ss << 6) | (index << 3) | base + +tttn = {'L':0xc,'G':0xf,'NE':0x5,'GE':0xd,'LE':0xe, 'E':0x4} + +# Actual instructions: +def nearjump(distance, condition=None): + """ jmp imm32 """ + lim = (1<<30) + if abs(distance) > lim: + Error('near jump cannot jump over more than {0} bytes'.format(lim)) + if condition: + if distance < 0: + distance -= 6 # Skip own instruction + opcode = 0x80 | tttn[condition] # Jcc imm32 + return [0x0F, opcode] + imm32(distance) + else: + if distance < 0: + distance -= 5 # Skip own instruction + return [ 0xE9 ] + imm32(distance) + +def shortjump(distance, condition=None): + """ jmp imm8 """ + lim = 118 + if abs(distance) > lim: + Error('short jump cannot jump over more than {0} bytes'.format(lim)) + if distance < 0: + distance -= 2 # Skip own instruction + if condition: + opcode = 0x70 | tttn[condition] # Jcc rel8 + else: + opcode = 0xeb # jmp rel8 + return [opcode] + imm8(distance) + +# Helper that determines jump type: +def reljump(distance): + if abs(distance) < 110: + return shortjump(distance) + else: + return nearjump(distance) + +def push(reg): + if reg in regs64: + if rexbit[reg] == 1: + return [0x41, 0x50 + regs64[reg]] + else: + return [0x50 + regs64[reg]] + else: + Error('push for {0} not implemented'.format(reg)) + +def pop(reg): + if reg in regs64: + if rexbit[reg] == 1: + rexprefix = rex(b=1) + opcode = 0x58 + regs64[reg] + return [rexprefix, opcode] + else: + opcode = 0x58 + regs64[reg] + return [ opcode ] + else: + Error('pop for {0} not implemented'.format(reg)) + +def INT(number): + opcode = 0xcd + return [opcode] + imm8(number) + +def syscall(): + return [0x0F, 0x05] + +def call(distance): + if type(distance) is int: + return [0xe8]+imm32(distance) + elif type(distance) is str and distance in regs64: + reg = distance + opcode = 0xFF # 0xFF /2 == call r/m64 + mod_rm = modrm(mod=3, reg=2, rm=regs64[reg]) + if rexbit[reg] == 1: + rexprefix = rex(b=rexbit[reg]) + return [rexprefix, opcode, mod_rm] + else: + return [opcode, mod_rm] + else: + Error('Cannot call to {0}'.format(distance)) + +def ret(): + return [ 0xc3 ] + +def increg64(reg): + assert(reg in regs64) + rexprefix = rex(w=1, b=rexbit[reg]) + opcode = 0xff + mod_rm = modrm(mod=3, rm=regs64[reg]) + return [rexprefix, opcode, mod_rm] + +def prepost8(r8, rm8): + assert(r8 in regs8) + pre = [] + if type(rm8) is list: + # TODO: merge mem access with prepost for 64 bits + if len(rm8) == 1: + base, = rm8 + if type(base) is str and base in regs64: + assert(not base in ['rbp', 'rsp', 'r12', 'r13']) + mod_rm = modrm(mod=0, rm=regs64[base], reg=regs8[r8]) + if rexbit[base] == 1: + pre.append(rex(b=1)) + post = [mod_rm] + else: + Error('One arg of type {0} not implemented'.format(base)) + elif len(rm8) == 2: + base, offset = rm8 + assert(type(offset) is int) + assert(base in regs64) + + if base == 'rsp' or base == 'r12': + Error('Cannot use rsp or r12 as base yet') + if rexbit[base] == 1: + pre.append( rex(b=1) ) + mod_rm = modrm(mod=1, rm=regs64[base], reg=regs8[r8]) + post = [mod_rm] + imm8(offset) + else: + Error('not supporting prepost8 with list len {0}'.format(len(rm8))) + else: + Error('Not supporting move with reg8 {0}'.format(r8)) + return pre, post + +def prepost(r64, rm64): + assert(r64 in regs64) + if type(rm64) is list: + if len(rm64) == 3: + base, index, disp = rm64 + assert(base in regs64) + assert(index in regs64) + assert(type(disp) is int) + # Assert that no special cases are used: + # TODO: swap base and index to avoid special cases + # TODO: exploit special cases and make better code + assert(index != 'rsp') + + rexprefix = rex(w=1, r=rexbit[r64], x=rexbit[index], b=rexbit[base]) + # mod=1 and rm=4 indicates a SIB byte: [--][--]+imm8 + mod_rm = modrm(mod=1, rm=4, reg=regs64[r64]) + si_b = sib(ss=0, index=regs64[index], base=regs64[base]) + return [rexprefix], [mod_rm, si_b] + imm8(disp) + elif len(rm64) == 2: + base, offset = rm64 + assert(type(offset) is int) + if base == 'RIP': + # RIP pointer relative addressing mode! + rexprefix = rex(w=1, r=rexbit[r64]) + mod_rm = modrm(mod=0, rm=5, reg=regs64[r64]) + return [rexprefix], [mod_rm] + imm32(offset) + else: + assert(base in regs64) + + if base == 'rsp' or base == 'r12': + # extended function that uses SIB byte + rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[base]) + # rm=4 indicates a SIB byte follows + mod_rm = modrm(mod=1, rm=4, reg=regs64[r64]) + # index=4 indicates that index is not used + si_b = sib(ss=0, index=4, base=regs64[base]) + return [rexprefix], [mod_rm, si_b] + imm8(offset) + else: + rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[base]) + mod_rm = modrm(mod=1, rm=regs64[base], reg=regs64[r64]) + return [rexprefix], [mod_rm] + imm8(offset) + elif len(rm64) == 1: + offset = rm64[0] + if type(offset) is int: + rexprefix = rex(w=1, r=rexbit[r64]) + mod_rm = modrm(mod=0, rm=4,reg=regs64[r64]) + si_b = sib(ss=0, index=4,base=5) # 0x25 + return [rexprefix], [mod_rm, si_b] + imm32(offset) + else: + Error('Memory reference of type {0} not implemented'.format(offset)) + else: + Error('Memory reference not implemented') + elif rm64 in regs64: + rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[rm64]) + mod_rm = modrm(3, rm=regs64[rm64], reg=regs64[r64]) + return [rexprefix], [mod_rm] + +def leareg64(rega, m): + opcode = 0x8d # lea r64, m + pre, post = prepost(rega, m) + return pre + [opcode] + post + +def mov(rega, regb): + if type(regb) is int: + pre = [rex(w=1, b=rexbit[rega])] + opcode = 0xb8 + regs64[rega] + post = imm64(regb) + elif type(regb) is str: + if regb in regs64: + opcode = 0x89 # mov r/m64, r64 + pre, post = prepost(regb, rega) + elif regb in regs8: + opcode = 0x88 # mov r/m8, r8 + pre, post = prepost8(regb, rega) + else: + Error('Unknown register {0}'.format(regb)) + elif type(rega) is str: + if rega in regs64: + opcode = 0x8b # mov r64, r/m64 + pre, post = prepost(rega, regb) + else: + Error('Unknown register {0}'.format(rega)) + else: + Error('Move of this kind {0}, {1} not implemented'.format(rega, regb)) + return pre + [opcode] + post + +def xorreg64(rega, regb): + rexprefix = rex(w=1, r=rexbit[regb], b=rexbit[rega]) + opcode = 0x31 # XOR r/m64, r64 + # Alternative is 0x33 XOR r64, r/m64 + mod_rm = modrm(3, rm=regs64[rega], reg=regs64[regb]) + return [rexprefix, opcode, mod_rm] + +# integer arithmatic: +def addreg64(rega, regb): + if regb in regs64: + pre, post = prepost(regb, rega) + opcode = 0x01 # ADD r/m64, r64 + return pre + [opcode] + post + elif type(regb) is int: + if regb < 100: + rexprefix = rex(w=1, b=rexbit[rega]) + opcode = 0x83 # add r/m, imm8 + mod_rm = modrm(3, rm=regs64[rega], reg=0) + return [rexprefix, opcode, mod_rm]+imm8(regb) + elif regb < (1<<31): + rexprefix = rex(w=1, b=rexbit[rega]) + opcode = 0x81 # add r/m64, imm32 + mod_rm = modrm(3, rm=regs64[rega], reg=0) + return [rexprefix, opcode, mod_rm]+imm32(regb) + else: + Error('Constant value too large!') + else: + Error('unknown second operand!'.format(regb)) + +def subreg64(rega, regb): + if regb in regs64: + pre, post = prepost(regb, rega) + opcode = 0x29 # SUB r/m64, r64 + return pre + [opcode] + post + elif type(regb) is int: + if regb < 100: + rexprefix = rex(w=1, b=rexbit[rega]) + opcode = 0x83 # sub r/m, imm8 + mod_rm = modrm(3, rm=regs64[rega], reg=5) + return [rexprefix, opcode, mod_rm]+imm8(regb) + elif regb < (1<<31): + rexprefix = rex(w=1, b=rexbit[rega]) + opcode = 0x81 # sub r/m64, imm32 + mod_rm = modrm(3, rm=regs64[rega], reg=5) + return [rexprefix, opcode, mod_rm]+imm32(regb) + else: + Error('Constant value too large!') + + else: + Error('unknown second operand!'.format(regb)) + +def idivreg64(reg): + rexprefix = rex(w=1, b=rexbit[reg]) + opcode = 0xf7 # IDIV r/m64 + mod_rm = modrm(3, rm=regs64[reg], reg=7) + return [rexprefix, opcode, mod_rm] + +def imulreg64_rax(reg): + rexprefix = rex(w=1, b=rexbit[reg]) + opcode = 0xf7 # IMUL r/m64 + mod_rm = modrm(3, rm=regs64[reg], reg=5) + return [rexprefix, opcode, mod_rm] + +def imulreg64(rega, regb): + pre, post = prepost(rega, regb) + opcode = 0x0f # IMUL r64, r/m64 + opcode2 = 0xaf + return pre + [opcode, opcode2] + post + +def cmpreg64(rega, regb): + if regb in regs64: + pre, post = prepost(regb, rega) + opcode = 0x39 # CMP r/m64, r64 + return pre + [opcode] + post + elif type(regb) is int: + rexprefix = rex(w=1, b=rexbit[rega]) + opcode = 0x83 # CMP r/m64, imm8 + mod_rm = modrm(3, rm=regs64[rega], reg=7) + return [rexprefix, opcode, mod_rm] + imm8(regb) + + else: + Error('not implemented cmp64') + +# Mapping that maps string names to the right functions: +opcodes = {'mov':(mov,2), 'lea':(leareg64,2), 'int':(INT,1), 'syscall':(syscall,0)} + diff -r d9df72971cbf -r 1c7c1e619be8 python/zcc.py --- a/python/zcc.py Fri Nov 15 13:52:32 2013 +0100 +++ b/python/zcc.py Thu Nov 21 11:57:27 2013 +0100 @@ -1,12 +1,16 @@ #!/usr/bin/python -import sys, argparse -import c3, ppci, codegen +import sys +import argparse +import logging + +import c3 +import ppci +import codegen import codegenarm from optimize import optimize import outstream import hexfile -import logging logformat='%(asctime)s|%(levelname)s|%(name)s|%(message)s' @@ -18,45 +22,54 @@ # Parse arguments: parser = argparse.ArgumentParser(description='lcfos Compiler') +# Input: parser.add_argument('source', type=argparse.FileType('r'), \ - help='the source file to build') + help='the source file to build', nargs="+") +parser.add_argument('-i', '--import', type=argparse.FileType('r'), \ + help='Possible import module', action='append') + parser.add_argument('--dumpir', action='store_true', help="Dump IR-code") parser.add_argument('--dumpasm', action='store_true', help="Dump ASM-code") parser.add_argument('--optimize', action='store_true', help="Optimize") -parser.add_argument('--package_dir', help="Look in this directory for packages") +parser.add_argument('--target', help="Backend selection") parser.add_argument('-o', '--output', help='Output file', metavar='filename') parser.add_argument('--hexfile', help='Output hexfile', type=argparse.FileType('w')) parser.add_argument('--log', help='Log level (INFO,DEBUG)', type=logLevel) -def zcc(src, outs, diag, dumpir=False, do_optimize=False, pack_dir=None): +def zcc(srcs, outs, diag, dumpir=False, do_optimize=False): + """ + Compile sources into output stream. + Sources is an iterable of open files. + """ logging.info('Zcc started') # Front end: c3b = c3.Builder(diag) - ircode = c3b.build(src, pack_dir=pack_dir) - if not ircode: - return + imps = [] + for ircode in c3b.build(srcs, imps): + print(ircode) + if not ircode: + return - # Optimization passes: - if do_optimize: - optimize(ircode) + # Optimization passes: + if do_optimize: + optimize(ircode) - if dumpir: - ircode.dump() + if dumpir: + ircode.dump() - # Code generation: - cg = codegenarm.ArmCodeGenerator(outs) - obj = cg.generate(ircode) + # Code generation: + cg = codegenarm.ArmCodeGenerator(outs) + obj = cg.generate(ircode) return True def main(args): logging.basicConfig(format=logformat, level=args.log) - src = args.source.read() - args.source.close() + src = args.source diag = ppci.DiagnosticsManager() outs = outstream.TextOutputStream() # Invoke compiler: - res = zcc(src, outs, diag, dumpir=args.dumpir, do_optimize=args.optimize, pack_dir=args.package_dir) + res = zcc(src, outs, diag, dumpir=args.dumpir, do_optimize=args.optimize) if not res: diag.printErrors(src) return 1 @@ -65,14 +78,10 @@ outs.dump() code_bytes = outs.sections['code'].to_bytes() - #print('bytes:', code_bytes) if args.output: output_filename = args.output - else: - output_filename = 'b.output' - - with open(output_filename, 'wb') as f: - f.write(code_bytes) + with open(output_filename, 'wb') as f: + f.write(code_bytes) if args.hexfile: logging.info('Creating hexfile') @@ -83,5 +92,6 @@ if __name__ == '__main__': arguments = parser.parse_args() + print(arguments) sys.exit(main(arguments)) diff -r d9df72971cbf -r 1c7c1e619be8 test/c3examples/burn2.c3 --- a/test/c3examples/burn2.c3 Fri Nov 15 13:52:32 2013 +0100 +++ b/test/c3examples/burn2.c3 Thu Nov 21 11:57:27 2013 +0100 @@ -22,12 +22,12 @@ return; } - /*if (pin > 15) + if (pin > 15) { return; - }*/ + } - var RCC_Type RCC; + var stm32f4xx.RCC_Type RCC; RCC = cast(0x40023800); // Enable the clock to port D: @@ -51,7 +51,7 @@ a = 0 while (a < 1000) { - a = a + 1; + a = add(a, 1); } while(true) {} diff -r d9df72971cbf -r 1c7c1e619be8 test/grind.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/grind.py Thu Nov 21 11:57:27 2013 +0100 @@ -0,0 +1,12 @@ + +import cProfile +import unittest +import pstats + +if __name__ == '__main__': + suite = unittest.TestLoader().discover('.') + def runtests(): + unittest.TextTestRunner().run(suite) + #s = cProfile.run('runtests()',sort='cumtime') + s = cProfile.run('runtests()',sort='tottime') + diff -r d9df72971cbf -r 1c7c1e619be8 test/gui/testhexedit.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/gui/testhexedit.py Thu Nov 21 11:57:27 2013 +0100 @@ -0,0 +1,32 @@ +import unittest +import hexedit +from PyQt4.QtGui import QApplication +from PyQt4.QtTest import QTest +from PyQt4.QtCore import Qt +import sys + + +class HexTest(unittest.TestCase): + def setUp(self): + self.app = QApplication(sys.argv) + self.ui = hexedit.HexEditor() + self.bv = self.ui.he.bv + # Provide some random data: + self.bv.Data = bytearray(range(10)) * 8 + b'x' + + def tearDown(self): + self.app.processEvents() + self.app.quit() + + def testOpenButton(self): + self.assertEqual(0, self.bv.CursorPosition) + #QTest.mouseClick(self.bv, Qt.LeftButton) + self.assertEqual(161, self.bv.CursorPosition) + QTest.keyClick(self.bv, Qt.Key_Left) + self.assertEqual(160, self.bv.CursorPosition) + QTest.keyClick(self.bv, Qt.Key_Up) + self.assertEqual(128, self.bv.CursorPosition) + + +if __name__ == '__main__': + unittest.main() diff -r d9df72971cbf -r 1c7c1e619be8 test/runtests.sh --- a/test/runtests.sh Fri Nov 15 13:52:32 2013 +0100 +++ b/test/runtests.sh Thu Nov 21 11:57:27 2013 +0100 @@ -1,11 +1,14 @@ #!/usr/bin/env bash -export PYTHONPATH=$PYTHONPATH:../python +export PYTHONPATH=$PYTHONPATH:`pwd`/../python if [ $1 == "loop" ]; then DIR=.. while :; do - python -m unittest + python -m unittest -v + cd gui + #python -m unittest -v + cd .. echo "Awaiting changes in $DIR" inotifywait -r -e modify $DIR done diff -r d9df72971cbf -r 1c7c1e619be8 test/testc3.py --- a/test/testc3.py Fri Nov 15 13:52:32 2013 +0100 +++ b/test/testc3.py Thu Nov 21 11:57:27 2013 +0100 @@ -1,7 +1,11 @@ import c3 -import time, ppci, x86, ir +import time +import ppci +import x86 +import ir import unittest import glob +import io testsrc = """module test; @@ -64,29 +68,30 @@ class testLexer(unittest.TestCase): def testUnexpectedCharacter(self): - snippet = """ var s \u6c34 """ + snippet = io.StringIO(""" var s \u6c34 """) with self.assertRaises(ppci.CompilerError): list(c3.lexer.tokenize(snippet)) def testBlockComment(self): - snippet = """ + snippet = io.StringIO(""" /* Demo */ var int x = 0; - """ + """) toks = ['var', 'ID', 'ID', '=', 'NUMBER', ';', 'END'] self.assertSequenceEqual([tok.typ for tok in c3.lexer.tokenize(snippet)], toks) def testBlockCommentMultiLine(self): - snippet = """ + snippet = io.StringIO(""" /* Demo bla1 bla2 */ var int x = 0; - """ + """) toks = ['var', 'ID', 'ID', '=', 'NUMBER', ';', 'END'] self.assertSequenceEqual([tok.typ for tok in c3.lexer.tokenize(snippet)], toks) + class testBuilder(unittest.TestCase): def setUp(self): self.diag = ppci.DiagnosticsManager() @@ -98,33 +103,35 @@ def expectErrors(self, snippet, rows): """ Helper to test for expected errors on rows """ - ircode = self.builder.build(snippet) + ircode = list(self.builder.build([io.StringIO(snippet)])) actualErrors = [err.row for err in self.diag.diags] if rows != actualErrors: self.diag.printErrors(snippet) self.assertSequenceEqual(rows, actualErrors) - self.assertFalse(ircode) + # self.assertFalse(all(ircode)) - def expectOK(self, snippet, pack_dir=None): - ircode = self.builder.build(snippet, pack_dir=pack_dir) + def expectOK(self, snippet): + if type(snippet) is list: + ircode = self.builder.build(snippet) + else: + ircode = self.builder.build([io.StringIO(snippet)]) if not ircode: self.diag.printErrors(snippet) - self.assertTrue(ircode) + self.assertTrue(all(ircode)) return ircode def testPackage(self): p1 = """module p1; type int A; """ - self.assertTrue(self.builder.build(p1)) p2 = """module p2; import p1; var A b; """ - self.expectOK(p2) + self.expectOK([io.StringIO(s) for s in (p1, p2)]) def testFunctArgs(self): - snippet = """ + snippet = """ module testargs; function void t2(int a, double b) { @@ -132,11 +139,11 @@ t2(2); t2(1, 1.2); } - """ - self.expectErrors(snippet, [5, 6]) + """ + self.expectErrors(snippet, [5, 6]) def testExpressions(self): - snippet = """ + snippet = """ module test; function void t(int a, double b) { @@ -147,11 +154,11 @@ c = a; c = b > 1; } - """ - self.expectErrors(snippet, [8, 9, 10]) + """ + self.expectErrors(snippet, [8, 9, 10]) def testExpression1(self): - snippet = """ + snippet = """ module testexpr1; function void t() { @@ -160,34 +167,34 @@ b = a * 2 + a * a; c = b * a - 3; } - """ - self.expectOK(snippet) + """ + self.expectOK(snippet) def testEmpty(self): - snippet = """ - module A - """ - self.expectErrors(snippet, [3]) + snippet = """ + module A + """ + self.expectErrors(snippet, [3]) def testEmpty2(self): - snippet = "" - self.expectErrors(snippet, [1]) + snippet = "" + self.expectErrors(snippet, [1]) def testRedefine(self): - snippet = """ - module test; - var int a; - var int b; - var int a; - """ - self.expectErrors(snippet, [5]) + snippet = """ + module test; + var int a; + var int b; + var int a; + """ + self.expectErrors(snippet, [5]) def testWhile(self): - snippet = """ - module tstwhile; - var int a; - function void t() - { + snippet = """ + module tstwhile; + var int a; + function void t() + { var int i = 0; while (i < 1054) { @@ -202,15 +209,15 @@ while(false) { } - } - """ - self.expectOK(snippet) + } + """ + self.expectOK(snippet) def testIf(self): snippet = """ - module tstIFF; - function void t(int b) - { + module tstIFF; + function void t(int b) + { var int a; a = 2; if (a > b) @@ -226,7 +233,7 @@ } return b; - } + } """ self.expectOK(snippet) @@ -426,8 +433,7 @@ } """ - ircode = self.expectOK(snippet) - self.assertEqual(1, len(ircode.Functions)) + self.expectOK(snippet) if __name__ == '__main__': unittest.main() diff -r d9df72971cbf -r 1c7c1e619be8 test/testx86asm.py --- a/test/testx86asm.py Fri Nov 15 13:52:32 2013 +0100 +++ b/test/testx86asm.py Thu Nov 21 11:57:27 2013 +0100 @@ -24,6 +24,7 @@ def testCall(self): assert(assembler.call('r10') == [0x41, 0xff, 0xd2]) assert(assembler.call('rcx') == [0xff, 0xd1]) + def testXOR(self): assert(assembler.xorreg64('rax', 'rax') == [0x48, 0x31, 0xc0]) assert(assembler.xorreg64('r9', 'r8') == [0x4d, 0x31, 0xc1]) diff -r d9df72971cbf -r 1c7c1e619be8 test/testzcc.py --- a/test/testzcc.py Fri Nov 15 13:52:32 2013 +0100 +++ b/test/testzcc.py Thu Nov 21 11:57:27 2013 +0100 @@ -3,27 +3,45 @@ import zcc import outstream import ppci +import io +import os class ZccTestCase(unittest.TestCase): """ Tests the compiler driver """ - def do(self, fn): - """ Compile blink.c3 """ - args = zcc.parser.parse_args([fn, '--package_dir', './c3examples/']) + def do(self, filenames): + basedir = 'c3examples' + filenames = [os.path.join(basedir, fn) for fn in filenames] + args = zcc.parser.parse_args(filenames + []) self.assertEqual(0, zcc.main(args)) - def testExamples(self): + def t2estExamples(self): """ Test all examples in the c3/examples directory """ example_filenames = glob.glob('./c3examples/*.c3') for filename in example_filenames: self.do(filename) + def testBurn(self): + self.do(['stm32f4xx.c3', 'burn.c3']) + + def testBurn2(self): + self.do(['stm32f4xx.c3','burn2.c3']) + + def testComments(self): + self.do(['comments.c3']) + + def testCast(self): + self.do(['cast.c3']) + def testSectionAddress(self): - src = "module tst; function void t2() {var int t3; t3 = 2;}" + src = """module tst; + function void t2() {var int t3; t3 = 2;} + """ + f = io.StringIO(src) diag = ppci.DiagnosticsManager() outs = outstream.TextOutputStream() - self.assertTrue(zcc.zcc(src, outs, diag)) + self.assertTrue(zcc.zcc([f], outs, diag)) code = outs.getSection('code') self.assertEqual(0x08000000, code.address) data = outs.getSection('data')