# HG changeset patch # User Windel Bouwman # Date 1405369435 -7200 # Node ID 0374c65cb437ddfe40cfdb44a902e64a8dfe8988 # Parent a7c444404df98b4ad7338859b78f154d2cdbb441 Move compiler to seperate repo diff -r a7c444404df9 -r 0374c65cb437 python/baselex.py --- a/python/baselex.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,57 +0,0 @@ - -import re -from ppci import Token, CompilerError, SourceLocation -from pyyacc import EOF - - -class BaseLexer: - """ Base class for a lexer. This class can be overridden to create a - lexer. This class handles the regular expression generation and - source position accounting. - """ - def __init__(self, tok_spec): - tok_re = '|'.join('(?P<{}>{})'.format(pair[0], pair[1]) for pair in tok_spec) - self.gettok = re.compile(tok_re).match - self.func_map = {pair[0]: pair[2] for pair in tok_spec} - self.filename = None - - def feed(self, txt): - """ Feeds the lexer with extra input """ - self.tokens = self.tokenize(txt) - - def tokenize(self, txt): - """ Generator that generates tokens from text - It does not yield the EOF token. - """ - self.line = 1 - self.line_start = 0 - self.pos = 0 - mo = self.gettok(txt) - while mo: - typ = mo.lastgroup - val = mo.group(typ) - column = mo.start() - self.line_start - length = mo.end() - mo.start() - loc = SourceLocation(self.filename, self.line, column, length) - func = self.func_map[typ] - if func: - res = func(typ, val) - if res: - typ, val = res - yield Token(typ, val, loc) - self.pos = mo.end() - mo = self.gettok(txt, self.pos) - if len(txt) != self.pos: - raise CompilerError('Lex fault at {}'.format(txt[self.pos:])) - - def newline(self): - """ Enters a new line """ - self.line_start = self.pos - self.line = self.line + 1 - - def next_token(self): - try: - return self.tokens.__next__() - except StopIteration: - loc = SourceLocation(self.filename, self.line, 0, 0) - return Token(EOF, EOF, loc) diff -r a7c444404df9 -r 0374c65cb437 python/burg.x --- a/python/burg.x Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,31 +0,0 @@ - -%tokens ':' ';' '(' ')' ',' string id number '%%' '%terminal' header - -%% - -burgdef: header '%%' directives '%%' rules { self.system.header_lines = $1.val }; - -directives: - | directives directive; - -directive: termdef; - -termdef: '%terminal' termids; - -termids: - | termids termid; - -termid: id { self.system.add_terminal($1.val) }; - -rules: - | rules rule; - -rule: id ':' tree cost string { self.system.add_rule($1.val, $3, $4, None, $5.val) }; -rule: id ':' tree cost string string { self.system.add_rule($1.val, $3, $4, $5.val, $6.val) }; - -cost: number { return $1.val }; - -tree: id { return self.system.tree($1.val) } - | id '(' tree ')' { return self.system.tree($1.val, $3) } - | id '(' tree ',' tree ')' { return self.system.tree($1.val, $3, $5) }; - diff -r a7c444404df9 -r 0374c65cb437 python/c3c.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/c3c.py Mon Jul 14 22:23:55 2014 +0200 @@ -0,0 +1,49 @@ +#!/usr/bin/env python + +import sys +import argparse +import logging + +from ppci.buildfunctions import c3toir +import ppci.buildtasks # Include not used, but it registers build tasks. +import ppci +from zcc import logLevel + + +def make_parser(): + parser = argparse.ArgumentParser(description='c3 Compiler') + + parser.add_argument('--log', help='Log level (INFO,DEBUG,[WARN])', + type=logLevel, default='INFO') + + parser.add_argument('--target', help='target machine', default="arm") + parser.add_argument('-o', '--output', help='target machine', + type=argparse.FileType('w'), default=sys.stdout) + parser.add_argument('-i', '--include', action='append', + help='include file', default=[]) + parser.add_argument('sources', metavar='source', + help='source file', nargs='+') + return parser + + +def main(args): + # Configure some logging: + logging.getLogger().setLevel(logging.DEBUG) + ch = logging.StreamHandler() + ch.setFormatter(logging.Formatter(ppci.logformat)) + ch.setLevel(args.log) + logging.getLogger().addHandler(ch) + + res = c3toir(args.sources, args.include, args.target) + writer = ppci.irutils.Writer() + for ir_module in res: + writer.write(ir_module, args.output) + + logging.getLogger().removeHandler(ch) + return res + + +if __name__ == '__main__': + parser = make_parser() + arguments = parser.parse_args() + sys.exit(main(arguments)) diff -r a7c444404df9 -r 0374c65cb437 python/ppci/__init__.py --- a/python/ppci/__init__.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,27 +0,0 @@ -# File to make this directory a package. - -import sys -import os - -version = '0.0.1' - -# Assert python version: -if sys.version_info.major != 3: - print("Needs to be run in python version 3.x") - sys.exit(1) - -from .common import SourceLocation, SourceRange, Token -from .common import CompilerError, DiagnosticsManager - -logformat='%(asctime)s|%(levelname)s|%(name)s|%(message)s' - -def same_dir(full_path, filename): - return os.path.join(os.path.dirname(os.path.abspath(full_path)), filename) - - -def make_num(txt): - if txt.startswith('0x'): - return int(txt[2:], 16) - else: - return int(txt) - diff -r a7c444404df9 -r 0374c65cb437 python/ppci/assembler.py --- a/python/ppci/assembler.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,133 +0,0 @@ - -import re -import pyyacc -from baselex import BaseLexer -from . import Token, CompilerError, SourceLocation, make_num -from .target import Target, Label - - -def bit_type(value): - assert value < (2**32) - assert value >= 0 - t = 'val32' - for n in [16, 12, 8, 5, 3]: - if value < (2**n): - t = 'val{}'.format(n) - return t - - -class AsmLexer(BaseLexer): - def __init__(self, kws): - tok_spec = [ - ('REAL', r'\d+\.\d+', lambda typ, val: (typ, float(val))), - ('HEXNUMBER', r'0x[\da-fA-F]+', self.handle_number), - ('NUMBER', r'\d+', self.handle_number), - ('ID', r'[A-Za-z][A-Za-z\d_]*', self.handle_id), - ('SKIP', r'[ \t]', None), - ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<|}|{', lambda typ, val: (val, val)), - ('STRING', r"'.*?'", lambda typ, val: (typ, val[1:-1])), - ('COMMENT', r";.*", None) - ] - super().__init__(tok_spec) - self.kws = kws - - def handle_id(self, typ, val): - if val.lower() in self.kws: - typ = val.lower() - return (typ, val) - - def handle_number(self, typ, val): - val = make_num(val) - typ = bit_type(val) - return typ, val - - -class Parser: - def add_rule(self, prod, rhs, f): - """ Helper function to add a rule, why this is required? """ - if prod == 'instruction': - def f_wrap(*args): - i = f(args) - if i: - self.emit(i) - else: - def f_wrap(*rhs): - return f(rhs) - self.g.add_production(prod, rhs, f_wrap) - - def __init__(self, kws, instruction_rules, emit): - # Construct a parser given a grammar: - tokens2 = ['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', '=', - pyyacc.EPS, 'COMMENT', '{', '}', - pyyacc.EOF, 'val32', 'val16', 'val12', 'val8', 'val5', 'val3'] - tokens2.extend(kws) - self.kws = kws - g = pyyacc.Grammar(tokens2) - self.g = g - # Global structure of assembly line: - g.add_production('asmline', ['asmline2']) - g.add_production('asmline', ['asmline2', 'COMMENT']) - g.add_production('asmline2', ['label', 'instruction']) - g.add_production('asmline2', ['instruction']) - g.add_production('asmline2', ['label']) - g.add_production('asmline2', []) - g.add_production('label', ['ID', ':'], self.p_label) - - # Add instruction rules for the target in question: - for prod, rhs, f in instruction_rules: - self.add_rule(prod, rhs, f) - - #g.add_production('instruction', []) - g.start_symbol = 'asmline' - self.emit = emit - self.p = g.generate_parser() - # print('length of table:', len(self.p.action_table)) - - # Parser handlers: - - def p_label(self, lname, cn): - lab = Label(lname.val) - self.emit(lab) - - def parse(self, lexer): - self.p.parse(lexer) - - -class BaseAssembler: - """ Assembler base class, inherited by assemblers specific for a target """ - def __init__(self, target): - self.target = target - assert isinstance(target, Target) - - def make_parser(self): - self.parser = Parser(self.target.asm_keywords, self.target.assembler_rules, self.emit) - self.lexer = AsmLexer(self.target.asm_keywords) - - def prepare(self): - pass - - def emit(self, *args): - self.stream.emit(*args) - - # Top level interface: - def parse_line(self, line): - """ Parse line into assembly instructions """ - self.lexer.feed(line) - self.parser.parse(self.lexer) - - def assemble(self, asmsrc, stream): - """ Assemble this source snippet """ - if type(asmsrc) is str: - pass - elif hasattr(asmsrc, 'read'): - asmsrc2 = asmsrc.read() - asmsrc.close() - asmsrc = asmsrc2 - # TODO: use generic newline?? - # TODO: the bothersome newline ... - self.stream = stream - for line in asmsrc.split('\n'): - self.parse_line(line) - - def flush(self): - pass diff -r a7c444404df9 -r 0374c65cb437 python/ppci/bitfun.py --- a/python/ppci/bitfun.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,25 +0,0 @@ - - -def rotate_right(v, n): - """ bit-wise Rotate right n times """ - mask = (2**n) - 1 - mask_bits = v & mask - return (v >> n) | (mask_bits << (32 - n)) - -def rotate_left(v, n): - assert n >= 0 - assert n < 32 - return rotate_right(v, 32 - n) - -def encode_imm32(v): - """ Bundle 32 bit value into 4 bits rotation and 8 bits value - """ - for i in range(0, 16): - v2 = rotate_left(v, i*2) - if (v2 & 0xFFFFFF00) == 0: - rotation = i - val = v2 & 0xFF - x = (rotation << 8) | val - return x - raise Exception("Invalid value {}".format(v)) - diff -r a7c444404df9 -r 0374c65cb437 python/ppci/buildfunctions.py --- a/python/ppci/buildfunctions.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,152 +0,0 @@ - -""" - This module contains a set of handy functions to invoke compilation, - linking - and assembling. -""" - -import logging -from .target import Target -from .c3 import Builder -from .irutils import Verifier -from .codegen import CodeGenerator -from .transform import CleanPass, RemoveAddZero -from .linker import Linker -from .layout import Layout, load_layout -from .target.target_list import targets -from .outstream import BinaryOutputStream -from .objectfile import ObjectFile, load_object -from . import DiagnosticsManager -from .tasks import TaskError, TaskRunner -from .recipe import RecipeLoader - - -def fix_target(tg): - """ Try to return an instance of the Target class """ - if isinstance(tg, Target): - return tg - elif isinstance(tg, str): - if tg in targets: - return targets[tg] - raise TaskError('Invalid target {}'.format(tg)) - - -def fix_file(f): - """ Determine if argument is a file like object or make it so! """ - if hasattr(f, 'read'): - # Assume this is a file like object - return f - elif isinstance(f, str): - return open(f, 'r') - else: - raise TaskError('cannot use {} as input'.format(f)) - - -def fix_object(o): - if isinstance(o, ObjectFile): - return o - elif isinstance(o, str): - with open(o, 'r') as f: - return load_object(f) - else: - raise TaskError('Cannot use {} as objectfile'.format(o)) - - -def fix_layout(l): - if isinstance(l, Layout): - return l - elif hasattr(l, 'read'): - # Assume file handle - return load_layout(l) - elif isinstance(l, str): - with open(l, 'r') as f: - return load_layout(f) - else: - raise TaskError('Cannot use {} as layout'.format(l)) - - -def construct(buildfile, targets=[]): - recipe_loader = RecipeLoader() - try: - project = recipe_loader.load_file(buildfile) - except OSError: - raise TaskError('Could not construct {}'.format(buildfile)) - project = None - - if project: - runner = TaskRunner() - res = runner.run(project, targets) - else: - res = 1 - - return res - - -def assemble(source, target): - """ Invoke the assembler on the given source, returns an object containing - the output. """ - logger = logging.getLogger('assemble') - target = fix_target(target) - source = fix_file(source) - output = ObjectFile() - assembler = target.assembler - logger.debug('Assembling into code section') - ostream = BinaryOutputStream(output) - ostream.select_section('code') - assembler.prepare() - assembler.assemble(source, ostream) - assembler.flush() - return output - - -def c3compile(sources, includes, target): - """ Compile a set of sources for the given target """ - logger = logging.getLogger('c3c') - logger.debug('C3 compilation started') - target = fix_target(target) - sources = [fix_file(fn) for fn in sources] - includes = [fix_file(fn) for fn in includes] - output = ObjectFile() - diag = DiagnosticsManager() - c3b = Builder(diag, target) - cg = CodeGenerator(target) - - output_stream = BinaryOutputStream(output) - - for ircode in c3b.build(sources, includes): - if not ircode: - # Something went wrong, do not continue the code generation - continue - - d = {'ircode': ircode} - logger.debug('Verifying code {}'.format(ircode), extra=d) - Verifier().verify(ircode) - - # Optimization passes: - CleanPass().run(ircode) - Verifier().verify(ircode) - RemoveAddZero().run(ircode) - Verifier().verify(ircode) - CleanPass().run(ircode) - Verifier().verify(ircode) - - # Code generation: - d = {'ircode': ircode} - logger.debug('Starting code generation for {}'.format(ircode), extra=d) - - cg.generate(ircode, output_stream) - - if not c3b.ok: - diag.printErrors() - raise TaskError('Compile errors') - return output - - -def link(objects, layout, target): - """ Links the iterable of objects into one using the given layout """ - objects = list(map(fix_object, objects)) - layout = fix_layout(layout) - target = fix_target(target) - linker = Linker(target) - output_obj = linker.link(objects, layout) - return output_obj diff -r a7c444404df9 -r 0374c65cb437 python/ppci/buildtasks.py --- a/python/ppci/buildtasks.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,115 +0,0 @@ - -""" -Defines task classes that can compile, link etc.. -Task can depend upon one another. -""" - -from .tasks import Task, TaskError, register_task -from .buildfunctions import c3compile, link, assemble, fix_object, construct -from pyyacc import ParserException -from . import CompilerError - - -@register_task("empty") -class EmptyTask(Task): - """ Basic task that does nothing """ - def run(self): - pass - - -@register_task("echo") -class EchoTask(Task): - """ Simple task that echoes a message """ - def run(self): - message = self.arguments['message'] - print(message) - - -@register_task("property") -class Property(Task): - """ Sets a property to a value """ - def run(self): - name = self.arguments['name'] - value = self.arguments['value'] - self.target.project.set_property(name, value) - - -@register_task("build") -class ConstructTask(Task): - """ Builds another build description file (build.xml) """ - def run(self): - project = self.get_argument('file') - construct(project) - - -@register_task("assemble") -class AssembleTask(Task): - """ Task that can runs the assembler over the source and enters the - output into an object file """ - - def run(self): - target = self.get_argument('target') - source = self.relpath(self.get_argument('source')) - output_filename = self.relpath(self.get_argument('output')) - - try: - output = assemble(source, target) - except ParserException as e: - raise TaskError('Error during assembly:' + str(e)) - except CompilerError as e: - raise TaskError('Error during assembly:' + str(e)) - except OSError as e: - raise TaskError('Error:' + str(e)) - with open(output_filename, 'w') as f: - output.save(f) - self.logger.debug('Assembling finished') - - -@register_task("compile") -class C3cTask(Task): - """ Task that compiles C3 source for some target into an object file """ - def run(self): - target = self.get_argument('target') - sources = self.open_file_set(self.arguments['sources']) - output_filename = self.relpath(self.get_argument('output')) - if 'includes' in self.arguments: - includes = self.open_file_set(self.arguments['includes']) - else: - includes = [] - - output = c3compile(sources, includes, target) - # Store output: - with open(output_filename, 'w') as output_file: - output.save(output_file) - - -@register_task("link") -class LinkTask(Task): - """ Link together a collection of object files """ - def run(self): - layout = self.relpath(self.get_argument('layout')) - target = self.get_argument('target') - objects = self.open_file_set(self.get_argument('objects')) - output_filename = self.relpath(self.get_argument('output')) - - try: - output_obj = link(objects, layout, target) - except CompilerError as e: - raise TaskError(e.msg) - - # Store output: - with open(output_filename, 'w') as output_file: - output_obj.save(output_file) - - -@register_task("objcopy") -class ObjCopyTask(Task): - def run(self): - image_name = self.get_argument('imagename') - output_filename = self.relpath(self.get_argument('output')) - object_filename = self.relpath(self.get_argument('objectfile')) - - obj = fix_object(object_filename) - image = obj.get_image(image_name) - with open(output_filename, 'wb') as output_file: - output_file.write(image) diff -r a7c444404df9 -r 0374c65cb437 python/ppci/c3/__init__.py --- a/python/ppci/c3/__init__.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ -""" This is the C3 language front end. """ - -from .parser import Parser -from .lexer import Lexer -from .codegenerator import CodeGenerator -from .visitor import Visitor -from .visitor import AstPrinter -from .builder import Builder diff -r a7c444404df9 -r 0374c65cb437 python/ppci/c3/astnodes.py --- a/python/ppci/c3/astnodes.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,421 +0,0 @@ -""" -AST (abstract syntax tree) nodes for the c3 language. -The tree is build by the parser. -Then it is checked -Finally code is generated from it. -""" - - -class Node: - """ Base class of all nodes in a AST """ - pass - - -# Variables, parameters, local variables, constants and named types: -class Symbol(Node): - """ Symbol is the base class for all named things like variables, - functions, constants and types and modules """ - def __init__(self, name): - self.name = name - self.refs = [] - - def addRef(self, r): - self.refs.append(r) - - @property - def References(self): - return self.refs - - -# Modules -class Package(Symbol): - def __init__(self, name, loc): - super().__init__(name) - self.loc = loc - self.declarations = [] - self.imports = [] - - def add_declaration(self, decl): - self.declarations.append(decl) - if isinstance(decl, Function): - decl.package = self - - @property - def Types(self): - return self.innerScope.Types - - @property - def Functions(self): - return self.innerScope.Functions - - def __repr__(self): - return 'MODULE {}'.format(self.name) - - -class Type(Node): - """ Base class of all types """ - pass - - -class NamedType(Type, Symbol): - """ Some types are named, for example a user defined type (typedef) - and built in types. That is why this class derives from both Type - and Symbol. """ - def __init__(self, name): - Symbol.__init__(self, name) - - -class BaseType(NamedType): - """ Built in type """ - def __init__(self, name): - super().__init__(name) - - def __repr__(self): - return '{}'.format(self.name) - - -class FunctionType(Type): - """ Function blueprint, defines argument types and return type """ - def __init__(self, parametertypes, returntype): - self.parametertypes = parametertypes - self.returntype = returntype - - def __repr__(self): - params = ', '.join([str(v) for v in self.parametertypes]) - return '{1} f({0})'.format(params, self.returntype) - - -class PointerType(Type): - """ A type that points to data of some other type """ - def __init__(self, ptype): - assert isinstance(ptype, Type) or isinstance(ptype, Expression) - self.ptype = ptype - - def __repr__(self): - return '({}*)'.format(self.ptype) - - -class StructField: - """ Field of a struct type """ - def __init__(self, name, typ): - assert type(name) is str - self.name = name - self.typ = typ - - def __repr__(self): - return 'Member {}'.format(self.name) - - -class StructureType(Type): - """ Struct type consisting of several named members """ - def __init__(self, mems): - self.mems = mems - assert all(type(mem) is StructField for mem in mems) - - def hasField(self, name): - for mem in self.mems: - if name == mem.name: - return True - return False - - def fieldType(self, name): - return self.findField(name).typ - - def fieldOffset(self, name): - return self.findField(name).offset - - def findField(self, name): - for mem in self.mems: - if name == mem.name: - return mem - raise KeyError(name) - - def __repr__(self): - return 'STRUCT' - - -class ArrayType(Type): - """ Array type """ - def __init__(self, element_type, size): - self.element_type = element_type - self.size = size - - def __repr__(self): - return 'ARRAY {}'.format(self.size) - - -class DefinedType(NamedType): - """ A named type indicating another type """ - def __init__(self, name, typ, loc): - assert isinstance(name, str) - super().__init__(name) - self.typ = typ - self.loc = loc - - def __repr__(self): - return 'Named type {0} of type {1}'.format(self.name, self.typ) - - -class Constant(Symbol): - """ Constant definition """ - def __init__(self, name, typ, value): - super().__init__(name) - self.typ = typ - self.value = value - - def __repr__(self): - return 'CONSTANT {0} = {1}'.format(self.name, self.value) - - -class Variable(Symbol): - def __init__(self, name, typ): - super().__init__(name) - self.typ = typ - self.isLocal = False - self.isParameter = False - - def __repr__(self): - return 'Var {} [{}]'.format(self.name, self.typ) - - -class LocalVariable(Variable): - def __init__(self, name, typ): - super().__init__(name, typ) - self.isLocal = True - - -class FormalParameter(Variable): - def __init__(self, name, typ): - super().__init__(name, typ) - self.isParameter = True - - -# Procedure types -class Function(Symbol): - """ Actual implementation of a function """ - def __init__(self, name, loc): - super().__init__(name) - self.loc = loc - self.declarations = [] - - def add_declaration(self, decl): - self.declarations.append(decl) - - def __repr__(self): - return 'Func {}'.format(self.name) - - -# Operations / Expressions: -class Expression(Node): - def __init__(self, loc): - self.loc = loc - - -class Sizeof(Expression): - def __init__(self, typ, loc): - super().__init__(loc) - self.query_typ = typ - - -class Deref(Expression): - def __init__(self, ptr, loc): - super().__init__(loc) - assert isinstance(ptr, Expression) - self.ptr = ptr - - def __repr__(self): - return 'DEREF {}'.format(self.ptr) - - -class TypeCast(Expression): - def __init__(self, to_type, x, loc): - super().__init__(loc) - self.to_type = to_type - self.a = x - - def __repr__(self): - return 'TYPECAST {}'.format(self.to_type) - - -class Member(Expression): - """ Field reference of some object, can also be package selection """ - def __init__(self, base, field, loc): - super().__init__(loc) - assert isinstance(base, Expression) - assert isinstance(field, str) - self.base = base - self.field = field - - def __repr__(self): - return 'MEMBER {}.{}'.format(self.base, self.field) - - -class Index(Expression): - """ Index something, for example an array """ - def __init__(self, base, i, loc): - super().__init__(loc) - self.base = base - self.i = i - - def __repr__(self): - return 'Index {}'.format(self.i) - - -class Unop(Expression): - """ Operation on one operand """ - def __init__(self, op, a, loc): - super().__init__(loc) - assert isinstance(a, Expression) - assert isinstance(op, str) - self.a = a - self.op = op - - def __repr__(self): - return 'UNOP {}'.format(self.op) - - -class Binop(Expression): - """ Expression taking two operands and one operator """ - def __init__(self, a, op, b, loc): - super().__init__(loc) - assert isinstance(a, Expression), type(a) - assert isinstance(b, Expression) - assert isinstance(op, str) - self.a = a - self.b = b - self.op = op # Operation: '+', '-', '*', '/', 'mod' - - def __repr__(self): - return 'BINOP {}'.format(self.op) - - -class Identifier(Expression): - """ Reference to some identifier, can be anything from package, variable - function or type, any named thing! """ - def __init__(self, target, loc): - super().__init__(loc) - self.target = target - - def __repr__(self): - return 'ID {}'.format(self.target) - - -class Literal(Expression): - """ Constant value or string """ - def __init__(self, val, loc): - super().__init__(loc) - self.val = val - - def __repr__(self): - return 'LITERAL {}'.format(self.val) - - -class FunctionCall(Expression): - """ Call to a some function """ - def __init__(self, proc, args, loc): - super().__init__(loc) - self.proc = proc - self.args = args - - def __repr__(self): - return 'CALL {0} '.format(self.proc) - - -# Statements -class Statement(Node): - """ Base class of all statements """ - def __init__(self, loc): - self.loc = loc - - -class Empty(Statement): - """ Empty statement which does nothing! """ - def __init__(self): - super().__init__(None) - - def __repr__(self): - return 'NOP' - - -class Compound(Statement): - """ Statement consisting of a sequence of other statements """ - def __init__(self, statements): - super().__init__(None) - self.statements = statements - for s in self.statements: - assert isinstance(s, Statement) - - def __repr__(self): - return 'COMPOUND STATEMENT' - - -class Return(Statement): - def __init__(self, expr, loc): - super().__init__(loc) - self.expr = expr - - def __repr__(self): - return 'RETURN STATEMENT' - - -class Assignment(Statement): - def __init__(self, lval, rval, loc): - super().__init__(loc) - assert isinstance(lval, Expression) - assert isinstance(rval, Expression) - self.lval = lval - self.rval = rval - - def __repr__(self): - return 'ASSIGNMENT' - - -class ExpressionStatement(Statement): - def __init__(self, ex, loc): - super().__init__(loc) - self.ex = ex - - def __repr__(self): - return 'Epression' - - -class If(Statement): - def __init__(self, condition, truestatement, falsestatement, loc): - super().__init__(loc) - self.condition = condition - self.truestatement = truestatement - self.falsestatement = falsestatement - - def __repr__(self): - return 'IF-statement' - - -class Switch(Statement): - def __init__(self, condition, loc): - super().__init__(loc) - self.condition = condition - - def __repr__(self): - return 'Switch on {}'.format(self.condition) - - -class While(Statement): - def __init__(self, condition, statement, loc): - super().__init__(loc) - self.condition = condition - self.statement = statement - - def __repr__(self): - return 'WHILE-statement' - - -class For(Statement): - def __init__(self, init, condition, final, statement, loc): - super().__init__(loc) - self.init = init - self.condition = condition - self.final = final - self.statement = statement - - def __repr__(self): - return 'FOR-statement' diff -r a7c444404df9 -r 0374c65cb437 python/ppci/c3/builder.py --- a/python/ppci/c3/builder.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,139 +0,0 @@ -import logging -from .lexer import Lexer -from .parser import Parser -from .codegenerator import CodeGenerator -from .scope import createTopScope, Scope -from .visitor import AstPrinter, Visitor -from .astnodes import Package, Function, Identifier, Symbol - - -class C3Pass: - def __init__(self, diag): - self.diag = diag - self.logger = logging.getLogger('c3') - self.visitor = Visitor() - - def error(self, msg, loc=None): - self.pkg.ok = False - self.diag.error(msg, loc) - - def visit(self, pkg, pre, post): - self.visitor.visit(pkg, pre, post) - - -class ScopeFiller(C3Pass): - scoped_types = [Package, Function] - - def __init__(self, diag, topScope, packages): - super().__init__(diag) - self.topScope = topScope - self.packages = packages - - """ Scope is attached to the correct modules. """ - def addScope(self, pkg): - self.logger.debug('Adding scoping to package {}'.format(pkg.name)) - self.pkg = pkg - # Prepare top level scope and set scope to all objects: - self.scopeStack = [self.topScope] - modScope = Scope(self.CurrentScope) - self.scopeStack.append(modScope) - self.visit(pkg, self.enterScope, self.quitScope) - assert len(self.scopeStack) == 2 - - self.logger.debug('Resolving imports for package {}'.format(pkg.name)) - # Handle imports: - for i in pkg.imports: - if i not in self.packages: - self.error('Cannot import {}'.format(i)) - continue - pkg.scope.addSymbol(self.packages[i]) - - @property - def CurrentScope(self): - return self.scopeStack[-1] - - def addSymbol(self, sym): - if self.CurrentScope.hasSymbol(sym.name): - self.error('Redefinition of {0}'.format(sym.name), sym.loc) - else: - self.CurrentScope.addSymbol(sym) - - def enterScope(self, sym): - # Attach scope to references: - if type(sym) is Identifier: - sym.scope = self.CurrentScope - - # Add symbols to current scope: - if isinstance(sym, Symbol): - self.addSymbol(sym) - sym.scope = self.CurrentScope - - # Create subscope for items creating a scope: - if type(sym) in self.scoped_types: - newScope = Scope(self.CurrentScope) - self.scopeStack.append(newScope) - sym.innerScope = self.CurrentScope - - def quitScope(self, sym): - # Pop out of scope: - if type(sym) in self.scoped_types: - self.scopeStack.pop(-1) - - -class Builder: - """ - Generates IR-code from c3 source. - Reports errors to the diagnostics system. - """ - def __init__(self, diag, target): - self.logger = logging.getLogger('c3') - self.diag = diag - self.lexer = Lexer(diag) - self.parser = Parser(diag) - self.cg = CodeGenerator(diag) - self.topScope = createTopScope(target) # Scope with built in types - - def build(self, srcs, imps=[]): - """ Create IR-code from sources """ - self.logger.debug('Building {} source files'.format(len(srcs + imps))) - iter(srcs) # Check if srcs are iterable - iter(imps) - self.ok = True - self.pkgs = {} - - # Lexing and parsing stage (phase 1) - def doParse(src): - tokens = self.lexer.lex(src) - pkg = self.parser.parseSource(tokens) - return pkg - s_pkgs = list(map(doParse, srcs)) - i_pkgs = list(map(doParse, imps)) - all_pkgs = s_pkgs + i_pkgs - if not all(all_pkgs): - self.ok = False - self.logger.debug('Parsing failed') - return - - self.logger.debug('Parsed {} packages'.format(len(all_pkgs))) - - # Fix scopes and package refs (phase 1.5) - packages = {pkg.name: pkg for pkg in all_pkgs} - self.pkgs = packages - - scopeFiller = ScopeFiller(self.diag, self.topScope, packages) - # Fix scopes: - for pkg in all_pkgs: - scopeFiller.addScope(pkg) - if not all(pkg.ok for pkg in all_pkgs): - self.ok = False - self.logger.debug('Scope filling failed') - return - - # Generate intermediate code (phase 2) - # Only return ircode when everything is OK - for pkg in s_pkgs: - yield self.cg.gencode(pkg) - if not all(pkg.ok for pkg in all_pkgs): - self.logger.debug('Code generation failed') - self.ok = False - self.logger.debug('C3 build complete!') diff -r a7c444404df9 -r 0374c65cb437 python/ppci/c3/codegenerator.py --- a/python/ppci/c3/codegenerator.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,552 +0,0 @@ -import logging -import struct -from .. import ir -from .. import irutils -from . import astnodes as ast - - -class SemanticError(Exception): - """ Error thrown when a semantic issue is observed """ - def __init__(self, msg, loc): - super().__init__() - self.msg = msg - self.loc = loc - - -class CodeGenerator: - """ - Generates intermediate (IR) code from a package. The entry function is - 'genModule'. The main task of this part is to rewrite complex control - structures, such as while and for loops into simple conditional - jump statements. Also complex conditional statements are simplified. - Such as 'and' and 'or' statements are rewritten in conditional jumps. - And structured datatypes are rewritten. - - Type checking is done in one run with code generation. - """ - def __init__(self, diag): - self.logger = logging.getLogger('c3cgen') - self.builder = irutils.Builder() - self.diag = diag - - def gencode(self, pkg): - """ Generate code for a single module """ - self.builder.prepare() - assert type(pkg) is ast.Package - self.pkg = pkg - self.intType = pkg.scope['int'] - self.boolType = pkg.scope['bool'] - self.pointerSize = 4 - self.logger.debug('Generating ir-code for {}'.format(pkg.name), - extra={'c3_ast': pkg}) - self.varMap = {} # Maps variables to storage locations. - self.builder.m = ir.Module(pkg.name) - try: - for typ in pkg.Types: - self.check_type(typ) - # Only generate function if function contains a body: - real_functions = list(filter( - lambda f: f.body, pkg.Functions)) - for v in pkg.innerScope.Variables: - v2 = ir.GlobalVariable(v.name, ir.i32) - self.varMap[v] = v2 - if not v.isLocal: - self.builder.m.add_variable(v2) - for s in real_functions: - self.gen_function(s) - except SemanticError as e: - self.error(e.msg, e.loc) - if self.pkg.ok: - return self.builder.m - - def error(self, msg, loc=None): - self.pkg.ok = False - self.diag.error(msg, loc) - - def gen_function(self, fn): - # TODO: handle arguments - f = self.builder.new_function(fn.name) - f.return_value = self.builder.newTemp() - self.builder.setFunction(f) - l2 = self.builder.newBlock() - self.builder.emit(ir.Jump(l2)) - self.builder.setBlock(l2) - # generate room for locals: - - for sym in fn.innerScope: - self.check_type(sym.typ) - if sym.isParameter: - p = ir.Parameter(sym.name, ir.i32) - variable = ir.LocalVariable(sym.name + '_copy', ir.i32) - f.addParameter(p) - f.addLocal(variable) - # Move parameter into local copy: - self.builder.emit(ir.Move(ir.Mem(variable), p)) - elif sym.isLocal: - variable = ir.LocalVariable(sym.name, ir.i32) - f.addLocal(variable) - elif isinstance(sym, ast.Variable): - variable = ir.LocalVariable(sym.name, ir.i32) - f.addLocal(variable) - else: - raise NotImplementedError('{}'.format(sym)) - self.varMap[sym] = variable - - self.gen_stmt(fn.body) - self.builder.emit(ir.Move(f.return_value, ir.Const(0))) - self.builder.emit(ir.Jump(f.epiloog)) - self.builder.setFunction(None) - - def gen_stmt(self, code): - """ Generate code for a statement """ - try: - assert isinstance(code, ast.Statement) - self.builder.setLoc(code.loc) - if type(code) is ast.Compound: - for s in code.statements: - self.gen_stmt(s) - elif type(code) is ast.Empty: - pass - elif type(code) is ast.Assignment: - self.gen_assignment_stmt(code) - elif type(code) is ast.ExpressionStatement: - self.builder.emit(ir.Exp(self.gen_expr_code(code.ex))) - elif type(code) is ast.If: - self.gen_if_stmt(code) - elif type(code) is ast.Return: - re = self.gen_expr_code(code.expr) - self.builder.emit(ir.Move(self.builder.fn.return_value, re)) - self.builder.emit(ir.Jump(self.builder.fn.epiloog)) - b = self.builder.newBlock() - self.builder.setBlock(b) - elif type(code) is ast.While: - self.gen_while(code) - elif type(code) is ast.For: - self.gen_for_stmt(code) - elif type(code) is ast.Switch: - raise NotImplementedError('Unknown stmt {}'.format(code)) - else: - raise NotImplementedError('Unknown stmt {}'.format(code)) - except SemanticError as e: - self.error(e.msg, e.loc) - - def gen_assignment_stmt(self, code): - """ Generate code for assignment statement """ - lval = self.gen_expr_code(code.lval) - rval = self.gen_expr_code(code.rval) - if not self.equal_types(code.lval.typ, code.rval.typ): - raise SemanticError('Cannot assign {} to {}' - .format(code.rval.typ, code.lval.typ), - code.loc) - if not code.lval.lvalue: - raise SemanticError('No valid lvalue {}'.format(code.lval), - code.lval.loc) - self.builder.emit(ir.Move(lval, rval)) - - def gen_if_stmt(self, code): - """ Generate code for if statement """ - true_block = self.builder.newBlock() - bbfalse = self.builder.newBlock() - te = self.builder.newBlock() - self.gen_cond_code(code.condition, true_block, bbfalse) - self.builder.setBlock(true_block) - self.gen_stmt(code.truestatement) - self.builder.emit(ir.Jump(te)) - self.builder.setBlock(bbfalse) - self.gen_stmt(code.falsestatement) - self.builder.emit(ir.Jump(te)) - self.builder.setBlock(te) - - def gen_while(self, code): - """ Generate code for while statement """ - bbdo = self.builder.newBlock() - test_block = self.builder.newBlock() - final_block = self.builder.newBlock() - self.builder.emit(ir.Jump(test_block)) - self.builder.setBlock(test_block) - self.gen_cond_code(code.condition, bbdo, final_block) - self.builder.setBlock(bbdo) - self.gen_stmt(code.statement) - self.builder.emit(ir.Jump(test_block)) - self.builder.setBlock(final_block) - - def gen_for_stmt(self, code): - """ Generate for statement code """ - bbdo = self.builder.newBlock() - test_block = self.builder.newBlock() - final_block = self.builder.newBlock() - self.gen_stmt(code.init) - self.builder.emit(ir.Jump(test_block)) - self.builder.setBlock(test_block) - self.gen_cond_code(code.condition, bbdo, final_block) - self.builder.setBlock(bbdo) - self.gen_stmt(code.statement) - self.gen_stmt(code.final) - self.builder.emit(ir.Jump(test_block)) - self.builder.setBlock(final_block) - - def gen_cond_code(self, expr, bbtrue, bbfalse): - """ Generate conditional logic. - Implement sequential logical operators. """ - if type(expr) is ast.Binop: - if expr.op == 'or': - l2 = self.builder.newBlock() - self.gen_cond_code(expr.a, bbtrue, l2) - if not self.equal_types(expr.a.typ, self.boolType): - raise SemanticError('Must be boolean', expr.a.loc) - self.builder.setBlock(l2) - self.gen_cond_code(expr.b, bbtrue, bbfalse) - if not self.equal_types(expr.b.typ, self.boolType): - raise SemanticError('Must be boolean', expr.b.loc) - elif expr.op == 'and': - l2 = self.builder.newBlock() - self.gen_cond_code(expr.a, l2, bbfalse) - if not self.equal_types(expr.a.typ, self.boolType): - self.error('Must be boolean', expr.a.loc) - self.builder.setBlock(l2) - self.gen_cond_code(expr.b, bbtrue, bbfalse) - if not self.equal_types(expr.b.typ, self.boolType): - raise SemanticError('Must be boolean', expr.b.loc) - elif expr.op in ['==', '>', '<', '!=', '<=', '>=']: - ta = self.gen_expr_code(expr.a) - tb = self.gen_expr_code(expr.b) - if not self.equal_types(expr.a.typ, expr.b.typ): - raise SemanticError('Types unequal {} != {}' - .format(expr.a.typ, expr.b.typ), - expr.loc) - self.builder.emit(ir.CJump(ta, expr.op, tb, bbtrue, bbfalse)) - else: - raise SemanticError('non-bool: {}'.format(expr.op), expr.loc) - expr.typ = self.boolType - elif type(expr) is ast.Literal: - self.gen_expr_code(expr) - if expr.val: - self.builder.emit(ir.Jump(bbtrue)) - else: - self.builder.emit(ir.Jump(bbfalse)) - else: - raise NotImplementedError('Unknown cond {}'.format(expr)) - - # Check that the condition is a boolean value: - if not self.equal_types(expr.typ, self.boolType): - self.error('Condition must be boolean', expr.loc) - - def gen_expr_code(self, expr): - """ Generate code for an expression. Return the generated ir-value """ - assert isinstance(expr, ast.Expression) - if type(expr) is ast.Binop: - expr.lvalue = False - if expr.op in ['+', '-', '*', '/', '<<', '>>', '|', '&']: - ra = self.gen_expr_code(expr.a) - rb = self.gen_expr_code(expr.b) - if self.equal_types(expr.a.typ, self.intType) and \ - self.equal_types(expr.b.typ, self.intType): - expr.typ = expr.a.typ - elif self.equal_types(expr.b.typ, self.intType) and \ - type(expr.a.typ) is ast.PointerType: - # Special case for pointer arithmatic TODO: coerce! - expr.typ = expr.a.typ - else: - raise SemanticError('Can only add integers', expr.loc) - else: - raise NotImplementedError("Cannot use equality as expressions") - return ir.Binop(ra, expr.op, rb, "op", ir.i32) - elif type(expr) is ast.Unop: - if expr.op == '&': - ra = self.gen_expr_code(expr.a) - expr.typ = ast.PointerType(expr.a.typ) - if not expr.a.lvalue: - raise SemanticError('No valid lvalue', expr.a.loc) - expr.lvalue = False - assert type(ra) is ir.Mem - return ra.e - else: - raise NotImplementedError('Unknown unop {0}'.format(expr.op)) - elif type(expr) is ast.Identifier: - # Generate code for this identifier. - tg = self.resolveSymbol(expr) - expr.kind = type(tg) - expr.typ = tg.typ - # This returns the dereferenced variable. - if isinstance(tg, ast.Variable): - expr.lvalue = True - return ir.Mem(self.varMap[tg]) - elif isinstance(tg, ast.Constant): - c_val = self.gen_expr_code(tg.value) - return self.evalConst(c_val) - else: - raise NotImplementedError(str(tg)) - elif type(expr) is ast.Deref: - # dereference pointer type: - addr = self.gen_expr_code(expr.ptr) - ptr_typ = self.the_type(expr.ptr.typ) - expr.lvalue = True - if type(ptr_typ) is ast.PointerType: - expr.typ = ptr_typ.ptype - return ir.Mem(addr) - else: - raise SemanticError('Cannot deref non-pointer', expr.loc) - elif type(expr) is ast.Member: - return self.gen_member_expr(expr) - elif type(expr) is ast.Index: - return self.gen_index_expr(expr) - elif type(expr) is ast.Literal: - return self.gen_literal_expr(expr) - elif type(expr) is ast.TypeCast: - return self.gen_type_cast(expr) - elif type(expr) is ast.Sizeof: - # The type of this expression is int: - expr.typ = self.intType - self.check_type(expr.query_typ) - type_size = self.size_of(expr.query_typ) - return ir.Const(type_size) - elif type(expr) is ast.FunctionCall: - return self.gen_function_call(expr) - else: - raise NotImplementedError('Unknown expr {}'.format(expr)) - - def gen_member_expr(self, expr): - base = self.gen_expr_code(expr.base) - expr.lvalue = expr.base.lvalue - basetype = self.the_type(expr.base.typ) - if type(basetype) is ast.StructureType: - if basetype.hasField(expr.field): - expr.typ = basetype.fieldType(expr.field) - else: - raise SemanticError('{} does not contain field {}' - .format(basetype, expr.field), - expr.loc) - else: - raise SemanticError('Cannot select {} of non-structure type {}' - .format(expr.field, basetype), expr.loc) - - assert type(base) is ir.Mem, type(base) - bt = self.the_type(expr.base.typ) - offset = ir.Const(bt.fieldOffset(expr.field)) - addr = ir.Add(base.e, offset, "mem_addr", ir.i32) - return ir.Mem(addr) - - def gen_index_expr(self, expr): - """ Array indexing """ - base = self.gen_expr_code(expr.base) - idx = self.gen_expr_code(expr.i) - base_typ = self.the_type(expr.base.typ) - if not isinstance(base_typ, ast.ArrayType): - raise SemanticError('Cannot index non-array type {}' - .format(base_typ), - expr.base.loc) - idx_type = self.the_type(expr.i.typ) - if not self.equal_types(idx_type, self.intType): - raise SemanticError('Index must be int not {}' - .format(idx_type), expr.i.loc) - assert type(base) is ir.Mem - element_type = self.the_type(base_typ.element_type) - element_size = self.size_of(element_type) - expr.typ = base_typ.element_type - expr.lvalue = True - - offset = ir.Mul(idx, ir.Const(element_size), "element_offset", ir.i32) - addr = ir.Add(base.e, offset, "element_address", ir.i32) - return ir.Mem(addr) - - def gen_literal_expr(self, expr): - """ Generate code for literal """ - expr.lvalue = False - typemap = {int: 'int', - float: 'double', - bool: 'bool', - str: 'string'} - if type(expr.val) in typemap: - expr.typ = self.pkg.scope[typemap[type(expr.val)]] - else: - raise SemanticError('Unknown literal type {}' - .format(expr.val), expr.loc) - # Construct correct const value: - if type(expr.val) is str: - cval = self.pack_string(expr.val) - return ir.Addr(ir.Const(cval)) - else: - return ir.Const(expr.val) - - def pack_string(self, txt): - """ Pack a string using 4 bytes length followed by text data """ - length = struct.pack('|<<|>>|!=|\+\+|[\.,=:;\-+*\[\]/\(\)]|>=|<=|<>|>|<|{|}|&|\^|\|', - lambda typ, val: (val, val)), - ('STRING', r'".*?"', lambda typ, val: (typ, val[1:-1])) - ] - super().__init__(tok_spec) - - def lex(self, input_file): - filename = input_file.name if hasattr(input_file, 'name') else '' - s = input_file.read() - input_file.close() - self.diag.addSource(filename, s) - self.filename = filename - return self.tokenize(s) - - def handle_comment_start(self, typ, val): - self.incomment = True - - def handle_comment_stop(self, typ, val): - self.incomment = False - - def tokenize(self, text): - """ Keeps track of the long comments """ - self.incomment = False - for token in super().tokenize(text): - if self.incomment: - pass # Wait until we are not in a comment section - else: - yield token - loc = SourceLocation(self.filename, self.line, 0, 0) - yield Token('EOF', 'EOF', loc) - - def handle_id(self, typ, val): - if val in keywords: - typ = val - return typ, val diff -r a7c444404df9 -r 0374c65cb437 python/ppci/c3/parser.py --- a/python/ppci/c3/parser.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,471 +0,0 @@ -import logging -from .. import CompilerError -from .astnodes import Member, Literal, TypeCast, Unop, Binop -from .astnodes import Assignment, ExpressionStatement, Compound -from .astnodes import Return, While, If, Empty, For -from .astnodes import FunctionType, Function, FormalParameter -from .astnodes import StructureType, DefinedType, PointerType, ArrayType -from .astnodes import Constant, Variable, Sizeof -from .astnodes import StructField, Deref, Index -from .astnodes import Package -from .astnodes import Identifier -from .astnodes import FunctionCall - - -class Parser: - """ Parses sourcecode into an abstract syntax tree (AST) """ - def __init__(self, diag): - self.logger = logging.getLogger('c3') - self.diag = diag - - def parseSource(self, tokens): - self.logger.debug('Parsing source') - self.tokens = tokens - self.token = self.tokens.__next__() - try: - self.parse_package() - self.logger.debug('Parsing complete') - self.mod.ok = True # Valid until proven wrong :) - return self.mod - except CompilerError as e: - self.diag.addDiag(e) - - def Error(self, msg): - raise CompilerError(msg, self.token.loc) - - # Lexer helpers: - def Consume(self, typ): - if self.Peak == typ: - return self.NextToken() - else: - self.Error('Excected: "{0}", got "{1}"'.format(typ, self.Peak)) - - @property - def Peak(self): - return self.token.typ - - @property - def CurLoc(self): - return self.token.loc - - def hasConsumed(self, typ): - if self.Peak == typ: - self.Consume(typ) - return True - return False - - def NextToken(self): - t = self.token - if t.typ != 'EOF': - self.token = self.tokens.__next__() - return t - - def addDeclaration(self, decl): - self.currentPart.add_declaration(decl) - - def parseImport(self): - self.Consume('import') - name = self.Consume('ID').val - self.mod.imports.append(name) - self.Consume(';') - - def parse_package(self): - """ Parse a package definition """ - self.Consume('module') - name = self.Consume('ID') - self.Consume(';') - self.logger.debug('Parsing package {}'.format(name.val)) - self.mod = Package(name.val, name.loc) - self.currentPart = self.mod - while self.Peak != 'EOF': - self.parse_top_level() - self.Consume('EOF') - - def parse_top_level(self): - """ Parse toplevel declaration """ - if self.Peak == 'function': - self.parse_function_def() - elif self.Peak == 'var': - self.parse_variable_def() - # TODO handle variable initialization - elif self.Peak == 'const': - self.parseConstDef() - elif self.Peak == 'type': - self.parse_type_def() - elif self.Peak == 'import': - self.parseImport() - else: - self.Error('Expected function, var, const or type') - - def parseDesignator(self): - """ A designator designates an object with a name. """ - name = self.Consume('ID') - return Identifier(name.val, name.loc) - - def parseIdSequence(self): - ids = [self.Consume('ID')] - while self.hasConsumed(','): - ids.append(self.Consume('ID')) - return ids - - # Type system - def PostFixId(self): - pfe = self.PrimaryExpression_Id() - while self.Peak in ['.']: - if self.hasConsumed('.'): - field = self.Consume('ID') - pfe = Member(pfe, field.val, field.loc) - else: - raise Exception() - return pfe - - def PrimaryExpression_Id(self): - if self.Peak == 'ID': - return self.parseDesignator() - self.Error('Expected ID, got {0}'.format(self.Peak)) - - def parse_type_spec(self): - """ Parse type specification """ - if self.Peak == 'struct': - self.Consume('struct') - self.Consume('{') - mems = [] - while self.Peak != '}': - mem_t = self.parse_type_spec() - for i in self.parseIdSequence(): - mems.append(StructField(i.val, mem_t)) - self.Consume(';') - self.Consume('}') - theT = StructureType(mems) - elif self.Peak == 'enum': - # TODO) - raise NotImplementedError() - else: - theT = self.PostFixId() - - # Check for pointer or array suffix: - while self.Peak in ['*', '[']: - if self.hasConsumed('*'): - theT = PointerType(theT) - elif self.hasConsumed('['): - if self.Peak == ']': - size = 0 - self.Consume(']') - else: - size = self.Expression() - self.Consume(']') - theT = ArrayType(theT, size) - else: - raise Exception() - return theT - - def parse_type_def(self): - self.Consume('type') - newtype = self.parse_type_spec() - typename = self.Consume('ID') - self.logger.debug('Parsing type {}'.format(typename.val)) - self.Consume(';') - df = DefinedType(typename.val, newtype, typename.loc) - self.addDeclaration(df) - - # Variable declarations: - def parse_variable_def(self): - """ Parse variable declaration """ - self.Consume('var') - t = self.parse_type_spec() - for name in self.parseIdSequence(): - v = Variable(name.val, t) - v.loc = name.loc - self.addDeclaration(v) - self.Consume(';') - - def parseConstDef(self): - self.Consume('const') - t = self.parse_type_spec() - while True: - name = self.Consume('ID') - self.Consume('=') - val = self.Expression() - c = Constant(name.val, t, val) - self.addDeclaration(c) - c.loc = name.loc - if not self.hasConsumed(','): - break - self.Consume(';') - - def parse_function_def(self): - loc = self.Consume('function').loc - returntype = self.parse_type_spec() - fname = self.Consume('ID').val - self.logger.debug('Parsing function {}'.format(fname)) - f = Function(fname, loc) - self.addDeclaration(f) - savePart = self.currentPart - self.currentPart = f - self.Consume('(') - parameters = [] - if not self.hasConsumed(')'): - while True: - typ = self.parse_type_spec() - name = self.Consume('ID') - param = FormalParameter(name.val, typ) - param.loc = name.loc - self.addDeclaration(param) - parameters.append(param) - if not self.hasConsumed(','): - break - self.Consume(')') - paramtypes = [p.typ for p in parameters] - f.typ = FunctionType(paramtypes, returntype) - if self.Peak == ';': - self.Consume(';') - f.body = None - else: - f.body = self.parseCompound() - self.currentPart = savePart - - def parse_if(self): - loc = self.Consume('if').loc - self.Consume('(') - condition = self.Expression() - self.Consume(')') - yes = self.Statement() - no = self.Statement() if self.hasConsumed('else') else Empty() - return If(condition, yes, no, loc) - - def parse_switch(self): - loc = self.Consume('switch').loc - self.Consume('(') - condition = self.Expression() - self.Consume(')') - return Switch(condition, loc) - - def parse_while(self): - loc = self.Consume('while').loc - self.Consume('(') - condition = self.Expression() - self.Consume(')') - statements = self.Statement() - return While(condition, statements, loc) - - def parse_for(self): - loc = self.Consume('for').loc - self.Consume('(') - init = self.Statement() - self.Consume(';') - condition = self.Expression() - self.Consume(';') - final = self.Statement() - self.Consume(')') - statements = self.Statement() - return For(init, condition, final, statements, loc) - - def parseReturn(self): - loc = self.Consume('return').loc - if self.Peak == ';': - expr = Literal(0, loc) - else: - expr = self.Expression() - self.Consume(';') - return Return(expr, loc) - - def parseCompound(self): - self.Consume('{') - statements = [] - while not self.hasConsumed('}'): - statements.append(self.Statement()) - return Compound(statements) - - def Statement(self): - # Determine statement type based on the pending token: - if self.Peak == 'if': - return self.parse_if() - elif self.Peak == 'while': - return self.parse_while() - elif self.Peak == 'for': - return self.parse_for() - elif self.Peak == 'switch': - return self.parse_switch() - elif self.Peak == '{': - return self.parseCompound() - elif self.hasConsumed(';'): - return Empty() - elif self.Peak == 'var': - self.parse_variable_def() - return Empty() - elif self.Peak == 'return': - return self.parseReturn() - else: - x = self.UnaryExpression() - if self.Peak == '=': - # We enter assignment mode here. - loc = self.Consume('=').loc - rhs = self.Expression() - return Assignment(x, rhs, loc) - else: - return ExpressionStatement(x, x.loc) - - # Expression section: - # We not implement these C constructs: - # a(2), f = 2 - # and this: - # a = 2 < x : 4 ? 1; - - def Expression(self): - exp = self.LogicalAndExpression() - while self.Peak == 'or': - loc = self.Consume('or').loc - e2 = self.LogicalAndExpression() - exp = Binop(exp, 'or', e2, loc) - return exp - - def LogicalAndExpression(self): - o = self.EqualityExpression() - while self.Peak == 'and': - loc = self.Consume('and').loc - o2 = self.EqualityExpression() - o = Binop(o, 'and', o2, loc) - return o - - def EqualityExpression(self): - ee = self.SimpleExpression() - while self.Peak in ['<', '==', '>', '>=', '<=', '!=']: - op = self.Consume(self.Peak) - ee2 = self.SimpleExpression() - ee = Binop(ee, op.typ, ee2, op.loc) - return ee - - def SimpleExpression(self): - """ Shift operations before + and - ? """ - e = self.AddExpression() - while self.Peak in ['>>', '<<']: - op = self.Consume(self.Peak) - e2 = self.AddExpression() - e = Binop(e, op.typ, e2, op.loc) - return e - - def AddExpression(self): - e = self.Term() - while self.Peak in ['+', '-']: - op = self.Consume(self.Peak) - e2 = self.Term() - e = Binop(e, op.typ, e2, op.loc) - return e - - def Term(self): - t = self.BitwiseOr() - while self.Peak in ['*', '/']: - op = self.Consume(self.Peak) - t2 = self.BitwiseOr() - t = Binop(t, op.typ, t2, op.loc) - return t - - def BitwiseOr(self): - a = self.BitwiseAnd() - while self.Peak == '|': - op = self.Consume(self.Peak) - b = self.BitwiseAnd() - a = Binop(a, op.typ, b, op.loc) - return a - - def BitwiseAnd(self): - a = self.CastExpression() - while self.Peak == '&': - op = self.Consume(self.Peak) - b = self.CastExpression() - a = Binop(a, op.typ, b, op.loc) - return a - - # Domain of unary expressions: - - def CastExpression(self): - """ - the C-style type cast conflicts with '(' expr ')' - so introduce extra keyword 'cast' - """ - if self.Peak == 'cast': - loc = self.Consume('cast').loc - self.Consume('<') - t = self.parse_type_spec() - self.Consume('>') - self.Consume('(') - ce = self.Expression() - self.Consume(')') - return TypeCast(t, ce, loc) - elif self.Peak == 'sizeof': - return self.sizeof_expression() - else: - return self.UnaryExpression() - - def sizeof_expression(self): - """ Compiler internal function to determine size of a type """ - loc = self.Consume('sizeof').loc - self.Consume('(') - typ = self.parse_type_spec() - self.Consume(')') - return Sizeof(typ, loc) - - def UnaryExpression(self): - if self.Peak in ['&', '*']: - op = self.Consume(self.Peak) - ce = self.CastExpression() - if op.val == '*': - return Deref(ce, op.loc) - else: - return Unop(op.typ, ce, op.loc) - else: - return self.PostFixExpression() - - def PostFixExpression(self): - pfe = self.PrimaryExpression() - while self.Peak in ['[', '.', '->', '(', '++']: - if self.hasConsumed('['): - i = self.Expression() - self.Consume(']') - pfe = Index(pfe, i, i.loc) - elif self.hasConsumed('->'): - field = self.Consume('ID') - pfe = Deref(pfe, pfe.loc) - pfe = Member(pfe, field.val, field.loc) - elif self.hasConsumed('.'): - field = self.Consume('ID') - pfe = Member(pfe, field.val, field.loc) - elif self.Peak == '++': - loc = self.Consume('++').loc - pfe = Unop('++', pfe, loc) - elif self.hasConsumed('('): - # Function call - args = [] - if not self.hasConsumed(')'): - args.append(self.Expression()) - while self.hasConsumed(','): - args.append(self.Expression()) - self.Consume(')') - pfe = FunctionCall(pfe, args, pfe.loc) - else: - raise Exception() - return pfe - - def PrimaryExpression(self): - if self.hasConsumed('('): - e = self.Expression() - self.Consume(')') - return e - elif self.Peak == 'NUMBER': - val = self.Consume('NUMBER') - return Literal(val.val, val.loc) - elif self.Peak == 'REAL': - val = self.Consume('REAL') - return Literal(val.val, val.loc) - elif self.Peak == 'true': - val = self.Consume('true') - return Literal(True, val.loc) - elif self.Peak == 'false': - val = self.Consume('false') - return Literal(False, val.loc) - elif self.Peak == 'STRING': - val = self.Consume('STRING') - return Literal(val.val, val.loc) - elif self.Peak == 'ID': - return self.parseDesignator() - self.Error('Expected NUM, ID or (expr), got {0}'.format(self.Peak)) diff -r a7c444404df9 -r 0374c65cb437 python/ppci/c3/scope.py --- a/python/ppci/c3/scope.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,92 +0,0 @@ -from .astnodes import Constant, Variable, Function, BaseType, Symbol -from .astnodes import ArrayType, StructureType, DefinedType, PointerType -from .astnodes import StructField - - -class Scope: - """ A scope contains all symbols in a scope. It also has a parent scope, - when looking for a symbol, also the parent scopes are checked. """ - def __init__(self, parent=None): - self.symbols = {} - self.parent = parent - - def __iter__(self): - # Iterate in a deterministic manner: - return iter(self.Constants + self.Variables + self.Functions) - - @property - def Syms(self): - """ Get all the symbols defined in this scope """ - syms = self.symbols.values() - return sorted(syms, key=lambda v: v.name) - - @property - def Types(self): - return [s for s in self.Syms if isinstance(s, DefinedType)] - - @property - def Constants(self): - return [s for s in self.Syms if type(s) is Constant] - - @property - def Variables(self): - return [s for s in self.Syms if isinstance(s, Variable)] - - @property - def Functions(self): - return [s for s in self.Syms if type(s) is Function] - - def getSymbol(self, name): - if name in self.symbols: - return self.symbols[name] - # Look for symbol: - elif self.parent: - return self.parent.getSymbol(name) - else: - raise KeyError(name) - - def __getitem__(self, key): - return self.getSymbol(key) - - def hasSymbol(self, name): - if name in self.symbols: - return True - elif self.parent: - return self.parent.hasSymbol(name) - else: - return False - - def __contains__(self, name): - return self.hasSymbol(name) - - def addSymbol(self, sym): - assert sym.name not in self.symbols - assert isinstance(sym, Symbol) - self.symbols[sym.name] = sym - - def __repr__(self): - return 'Scope with {} symbols'.format(len(self.symbols)) - - -def createTopScope(target): - scope = Scope() - for tn in ['u64', 'u32', 'u16', 'u8']: - scope.addSymbol(BaseType(tn)) - # buildin types: - intType = BaseType('int') - intType.bytesize = target.byte_sizes['int'] - scope.addSymbol(intType) - scope.addSymbol(BaseType('double')) - scope.addSymbol(BaseType('void')) - scope.addSymbol(BaseType('bool')) - byteType = BaseType('byte') - byteType.bytesize = target.byte_sizes['byte'] - scope.addSymbol(byteType) - - # Construct string type from others: - ln = StructField('len', intType) - txt = StructField('txt', ArrayType(byteType, 0)) - strType = DefinedType('string', PointerType(StructureType([ln, txt])), - None) - scope.addSymbol(strType) - return scope diff -r a7c444404df9 -r 0374c65cb437 python/ppci/c3/visitor.py --- a/python/ppci/c3/visitor.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,115 +0,0 @@ -from .astnodes import * - - -class Visitor: - """ - Visitor that can visit all nodes in the AST - and run pre and post functions. - """ - def visit(self, node, f_pre=None, f_post=None): - self.f_pre = f_pre - self.f_post = f_post - self.do(node) - - def do(self, node): - # Run pre function: - if self.f_pre: - self.f_pre(node) - - # Descent into subnodes: - if type(node) is Package: - for decl in node.declarations: - self.do(decl) - elif type(node) is Function: - for s in node.declarations: - self.do(s) - self.do(node.typ) - if node.body: - self.do(node.body) - elif type(node) is Compound: - for s in node.statements: - self.do(s) - elif type(node) is If: - self.do(node.condition) - self.do(node.truestatement) - self.do(node.falsestatement) - elif type(node) is While: - self.do(node.condition) - self.do(node.statement) - elif type(node) is For: - self.do(node.init) - self.do(node.condition) - self.do(node.final) - self.do(node.statement) - elif type(node) is Assignment: - self.do(node.lval) - self.do(node.rval) - elif type(node) is FunctionCall: - for arg in node.args: - self.do(arg) - self.do(node.proc) - elif type(node) is Return: - self.do(node.expr) - elif type(node) is Binop: - self.do(node.a) - self.do(node.b) - elif type(node) is Unop: - self.do(node.a) - elif type(node) is ExpressionStatement: - self.do(node.ex) - elif type(node) is TypeCast: - self.do(node.a) - self.do(node.to_type) - elif type(node) is Sizeof: - self.do(node.query_typ) - elif type(node) is Member: - self.do(node.base) - elif type(node) is Index: - self.do(node.base) - self.do(node.i) - elif type(node) is Deref: - self.do(node.ptr) - elif type(node) is Constant: - self.do(node.typ) - self.do(node.value) - elif type(node) is DefinedType: - self.do(node.typ) - elif isinstance(node, Variable): - self.do(node.typ) - elif type(node) is PointerType: - self.do(node.ptype) - elif type(node) is StructureType: - for m in node.mems: - self.do(m.typ) - elif type(node) is ArrayType: - self.do(node.element_type) - self.do(node.size) - elif type(node) is FunctionType: - for pt in node.parametertypes: - self.do(pt) - self.do(node.returntype) - elif type(node) in [Identifier, Literal, Empty]: - # Those nodes do not have child nodes. - pass - else: - raise Exception('Could not visit "{0}"'.format(node)) - - # run post function - if self.f_post: - self.f_post(node) - - -class AstPrinter: - """ Prints an AST as text """ - def printAst(self, pkg, f): - self.indent = 2 - self.f = f - visitor = Visitor() - visitor.visit(pkg, self.print1, self.print2) - - def print1(self, node): - print(' ' * self.indent + str(node), file=self.f) - self.indent += 2 - - def print2(self, node): - self.indent -= 2 diff -r a7c444404df9 -r 0374c65cb437 python/ppci/codegen/__init__.py --- a/python/ppci/codegen/__init__.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -from .codegen import CodeGenerator diff -r a7c444404df9 -r 0374c65cb437 python/ppci/codegen/canon.py --- a/python/ppci/codegen/canon.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,141 +0,0 @@ -from .. import ir -from .. import irutils -from itertools import chain - -def make(function, frame): - """ - Create canonicalized version of the IR-code. This means: - - Calls out of expressions. - - Other things? - """ - # Change the tree. This modifies the IR-tree! - # Move all parameters into registers - parmoves = [] - for p in function.arguments: - pt = newTemp() - frame.parMap[p] = pt - parmoves.append(ir.Move(pt, frame.argLoc(p.num))) - function.entry.instructions = parmoves + function.entry.instructions - - for block in function.Blocks: - for stmt in block.instructions: - rewriteStmt(stmt, frame) - linearize(block) - # TODO: schedule here? - -# Visit all nodes with some function: -# TODO: rewrite into visitor. - -# Rewrite rewrites call instructions into Eseq instructions. - - -def rewriteStmt(stmt, frame): - if isinstance(stmt, ir.Jump): - pass - elif isinstance(stmt, ir.CJump): - stmt.a = rewriteExp(stmt.a, frame) - stmt.b = rewriteExp(stmt.b, frame) - elif isinstance(stmt, ir.Move): - stmt.src = rewriteExp(stmt.src, frame) - stmt.dst = rewriteExp(stmt.dst, frame) - elif isinstance(stmt, ir.Terminator): - pass - elif isinstance(stmt, ir.Exp): - stmt.e = rewriteExp(stmt.e, frame) - else: - raise NotImplementedError('STMT NI: {}'.format(stmt)) - -newTemp = irutils.NamedClassGenerator('canon_reg', ir.Temp).gen - -def rewriteExp(exp, frame): - if isinstance(exp, ir.Binop): - exp.a = rewriteExp(exp.a, frame) - exp.b = rewriteExp(exp.b, frame) - return exp - elif isinstance(exp, ir.Const): - return exp - elif isinstance(exp, ir.Temp): - return exp - elif isinstance(exp, ir.Parameter): - return frame.parMap[exp] - elif isinstance(exp, ir.LocalVariable): - offset = frame.allocVar(exp) - return ir.Add(frame.fp, ir.Const(offset), "Offset", ir.i32) - elif isinstance(exp, ir.GlobalVariable): - #frame.load_global_address(ir.label_name(exp)) - return exp - elif isinstance(exp, ir.Mem): - exp.e = rewriteExp(exp.e, frame) - return exp - elif isinstance(exp, ir.Addr): - exp.e = rewriteExp(exp.e, frame) - return exp - elif isinstance(exp, ir.Call): - exp.arguments = [rewriteExp(p, frame) for p in exp.arguments] - # Rewrite call into eseq: - t = newTemp() - return ir.Eseq(ir.Move(t, exp), t) - else: - raise NotImplementedError('NI: {}, {}'.format(exp, type(exp))) - -# The flatten functions pull out seq instructions to the sequence list. - -def flattenExp(exp): - if isinstance(exp, ir.Binop): - exp.a, sa = flattenExp(exp.a) - exp.b, sb = flattenExp(exp.b) - return exp, sa + sb - elif isinstance(exp, ir.Temp): - return exp, [] - elif isinstance(exp, ir.Const): - return exp, [] - elif isinstance(exp, ir.Mem): - exp.e, s = flattenExp(exp.e) - return exp, s - elif isinstance(exp, ir.GlobalVariable): - return exp, [] - elif isinstance(exp, ir.Addr): - exp.e, s = flattenExp(exp.e) - return exp, s - elif isinstance(exp, ir.Eseq): - s = flattenStmt(exp.stmt) - exp.e, se = flattenExp(exp.e) - return exp.e, s + se - elif isinstance(exp, ir.Call): - sp = [] - p = [] - for p_, sp_ in (flattenExp(p) for p in exp.arguments): - p.append(p_) - sp.extend(sp_) - exp.arguments = p - return exp, sp - else: - raise NotImplementedError('NI: {}'.format(exp)) - - -def flattenStmt(stmt): - if isinstance(stmt, ir.Jump): - return [stmt] - elif isinstance(stmt, ir.CJump): - stmt.a, sa = flattenExp(stmt.a) - stmt.b, sb = flattenExp(stmt.b) - return sa + sb + [stmt] - elif isinstance(stmt, ir.Move): - stmt.dst, sd = flattenExp(stmt.dst) - stmt.src, ss = flattenExp(stmt.src) - return sd + ss + [stmt] - elif isinstance(stmt, ir.Terminator): - return [stmt] - elif isinstance(stmt, ir.Exp): - stmt.e, se = flattenExp(stmt.e) - return se + [stmt] - else: - raise NotImplementedError('STMT NI: {}'.format(stmt)) - - -def linearize(block): - """ - Move seq instructions to top and flatten these in an instruction list - """ - i = list(flattenStmt(s) for s in block.instructions) - block.instructions = list(chain.from_iterable(i)) diff -r a7c444404df9 -r 0374c65cb437 python/ppci/codegen/codegen.py --- a/python/ppci/codegen/codegen.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,63 +0,0 @@ -from .. import ir -from ..irutils import Verifier -from ..transform import RemoveAddZero -from ..target import Target -from .. import CompilerError -from .canon import make as canonicalize -from .registerallocator import RegisterAllocator -import logging - - -class CodeGenerator: - """ Generic code generator """ - def __init__(self, target): - # TODO: schedule traces in better order. - # This is optional! - assert isinstance(target, Target), target - self.logger = logging.getLogger('codegen') - self.target = target - self.ins_sel = target.ins_sel - self.ra = RegisterAllocator() - self.verifier = Verifier() - - def generateFunc(self, irfunc, outs): - """ Generate code for one function into a frame """ - self.logger.debug('Generating code for {}'.format(irfunc.name)) - # Create a frame for this function: - frame = self.target.FrameClass(ir.label_name(irfunc)) - - # Canonicalize the intermediate language: - canonicalize(irfunc, frame) - RemoveAddZero().run(irfunc) - self.logger.debug('after canonicalize', extra={'irfunc': irfunc}) - self.verifier.verify_function(irfunc) - self.ins_sel.munchFunction(irfunc, frame) - self.logger.debug('Selected instructions', extra={'ppci_frame': frame}) - - # Do register allocation: - self.ra.allocFrame(frame) - self.logger.debug('Registers allocated, now adding final glue') - # TODO: Peep-hole here? - - # Add label and return and stack adjustment: - frame.EntryExitGlue3() - - # Materialize the register allocated instructions into a stream of - # real instructions. - self.target.lower_frame_to_stream(frame, outs) - self.logger.debug('Instructions materialized') - return frame - - def generate(self, ircode, outs): - """ Generate code into output stream """ - assert isinstance(ircode, ir.Module) - outs.select_section('data') - for global_variable in ircode.Variables: - self.target.emit_global(outs, ir.label_name(global_variable)) - outs.select_section('code') - - # Munch program into a bunch of frames. One frame per function. - # Each frame has a flat list of abstract instructions. - # Generate code for all functions: - self.frames = [self.generateFunc(f, outs) for f in ircode.Functions] - return self.frames diff -r a7c444404df9 -r 0374c65cb437 python/ppci/codegen/flowgraph.py --- a/python/ppci/codegen/flowgraph.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,47 +0,0 @@ -from .graph import DiGraph, DiNode - - -class FlowGraphNode(DiNode): - """ A node in the flow graph """ - def __init__(self, g, ins): - super().__init__(g) - self.ins = ins - self.uses = set(ins.src) - self.defs = set(ins.dst) - self.live_in = set() - self.live_out = set() - - def __repr__(self): - r = '{}'.format(self.ins) - if self.uses: - r += ' uses:' + ', '.join(str(u) for u in self.uses) - if self.defs: - r += ' defs:' + ', '.join(str(d) for d in self.defs) - return r - - - -class FlowGraph(DiGraph): - def __init__(self, instrs): - """ Create a flowgraph from a list of abstract instructions """ - super().__init__() - self._map = {} - # Add nodes: - for ins in instrs: - n = FlowGraphNode(self, ins) - self._map[ins] = n - self.add_node(n) - - # Make edges: - prev = None - for ins in instrs: - n = self._map[ins] - if prev: - self.addEdge(prev, n) - if ins.jumps: - prev = None - for j in ins.jumps: - to_n = self._map[j] - self.addEdge(n, to_n) - else: - prev = n diff -r a7c444404df9 -r 0374c65cb437 python/ppci/codegen/graph.py --- a/python/ppci/codegen/graph.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,107 +0,0 @@ - -class Graph: - """ - Generic graph base class. - Can dump to graphiz dot format for example! - """ - def __init__(self): - self.nodes = set() - self.edges = set() - self.adj_map = {} - - def add_node(self, n): - self.nodes.add(n) - if n not in self.adj_map: - self.adj_map[n] = set() - - def delNode(self, n): - self.nodes.remove(n) - - def addEdge(self, n, m): - """ Add an edge from n to m """ - self.edges.add((n, m)) - self.edges.add((m, n)) - self.adj_map[n].add(m) - self.adj_map[m].add(n) - - def hasEdge(self, n, m): - return (n, m) in self.edges - - def delEdge(self, n, m): - self.edges.remove((n, m)) - self.edges.remove((m, n)) - - def adjecent(self, n): - """ Return all nodes with edges to n """ - return self.adj_map[n] & self.nodes - - def to_dot(self, f): - """ Generate graphviz dot representation """ - for n in self.nodes: - print(' {} [label="{}" shape=box3d];'.format(id(n), n), file=f) - for n, m in self.edges: - print(' {} -> {};'.format(id(n), id(m)), file=f) - - -class Node: - """ - Node in a graph. - """ - def __init__(self, g): - self.g = g - self.addDegree = 0 # Hack to increase degree - - @property - def Adjecent(self): - return self.g.adjecent(self) - - @property - def Degree(self): - return len(self.Adjecent) + self.addDegree - - -class DiGraph(Graph): - """ Directed graph. """ - def __init__(self): - super().__init__() - self.suc_map = {} - self.pre_map = {} - - def addEdge(self, n, m): - """ Add a directed edge from n to m """ - assert n in self.nodes - assert m in self.nodes - self.edges.add((n, m)) - self.suc_map[n].add(m) - self.pre_map[m].add(n) - self.adj_map[n].add(m) - self.adj_map[m].add(n) - - def add_node(self, n): - super().add_node(n) - if n not in self.suc_map: - self.suc_map[n] = set() - if n not in self.pre_map: - self.pre_map[n] = set() - - def hasEdge(self, n, m): - return (n, m) in self.edges - - def successors(self, n): - return self.suc_map[n] & self.nodes - - def predecessors(self, n): - return self.pre_map[n] & self.nodes - - -class DiNode(Node): - @property - def Succ(self): - return self.g.successors(self) - - @property - def Pred(self): - return self.g.predecessors(self) - - def __gt__(self, other): - return self in other.Succ diff -r a7c444404df9 -r 0374c65cb437 python/ppci/codegen/interferencegraph.py --- a/python/ppci/codegen/interferencegraph.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,102 +0,0 @@ -import logging -from .graph import Graph, Node - - -class InterferenceGraphNode(Node): - def __init__(self, g, varname): - super().__init__(g) - self.temps = [varname] - self.moves = set() - self.color = None - - def __repr__(self): - return '{}({})'.format(self.temps, self.color) - - def __gt__(self, other): - return str(self.temps) > str(other.temps) - -class InterferenceGraph(Graph): - """ - Interference graph. - """ - def __init__(self, flowgraph): - """ Create a new interference graph from a flowgraph """ - super().__init__() - self.logger = logging.getLogger('interferencegraph') - # Calculate liveness in CFG: - ### - # Liveness: - # in[n] = use[n] UNION (out[n] - def[n]) - # out[n] = for s in n.succ in union in[s] - ### - for n in flowgraph.nodes: - n.live_in = set() - n.live_out = set() - - # Sort flowgraph nodes backwards: - cfg_nodes = list(flowgraph.nodes) - self.logger.debug('CFG nodes: {}'.format(cfg_nodes)) - cfg_nodes.sort(reverse=True) - - # Dataflow fixed point iteration: - n_iterations = 0 - change = True - while change: - change = False - for n in cfg_nodes: - _in = n.live_in - _out = n.live_out - n.live_in = n.uses | (n.live_out - n.defs) - if n.Succ: - n.live_out = set.union(*(s.live_in for s in n.Succ)) - else: - n.live_out = set() - n.live_out = n.live_out | n.defs - change = change or (_in != n.live_in) or (_out != n.live_out) - n_iterations += 1 - - self.logger.debug('Iterations: {} * {}'.format(n_iterations, len(cfg_nodes))) - # Construct interference graph: - for n in flowgraph.nodes: - for tmp in n.live_out: - n1 = self.getNode(tmp) - for tmp2 in (n.live_out - {tmp}): - n2 = self.getNode(tmp2) - self.addEdge(n1, n2) - - def to_dot(self, f): - """ Generate graphviz dot representation """ - for n in self.nodes: - print(' {} [label="{}" shape=box3d];'.format(id(n), n), file=f) - for n, m in self.edges: - print(' {} -> {};'.format(id(n), id(m)), file=f) - - def to_txt(self): - for node in self.nodes: - print('{} interferes: {}'.format(node, node.Adjecent)) - - def getNode(self, tmp): - # Linear search - # TODO: can be improved for speed! - for n in self.nodes: - if tmp in n.temps: - return n - n = InterferenceGraphNode(self, tmp) - self.add_node(n) - return n - - def Combine(self, n, m): - """ Combine n and m into n """ - n.temps.extend(m.temps) - n.moves.update(m.moves) - # Reroute all edges: - e1 = [e for e in self.edges if e[0] is m] - e2 = [e for e in self.edges if e[1] is m] - for e in e1: - self.edges.remove(e) - self.addEdge(n, e[1]) - for e in e2: - self.edges.remove(e) - self.addEdge(n, e[0]) - # Remove node m: - self.delNode(m) diff -r a7c444404df9 -r 0374c65cb437 python/ppci/codegen/registerallocator.py --- a/python/ppci/codegen/registerallocator.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,201 +0,0 @@ -import logging -from .flowgraph import FlowGraph -from .interferencegraph import InterferenceGraph - -# Nifty first function: -def first(x): - """ Take the first element of a collection after sorting the things """ - x = list(x) - x.sort() - return next(iter(x)) - - -class RegisterAllocator: - """ - Target independent register allocator. - - Algorithm is iterated register coalescing by Appel and George. - - Chaitin's algorithm: remove all nodes with less than K neighbours. - These nodes can be colored when added back. - - The process consists of the following steps: - - build interference graph from the instruction list - - remove low degree non move related nodes. - - (optional) coalesc registers to remove redundant moves - - (optional) spill registers - - select registers - """ - def __init__(self): - self.logger = logging.getLogger('registerallocator') - - def InitData(self, f): - self.f = f - # Register information: - self.regs = set(f.regs) - self.K = len(self.regs) - - # Move related sets: - self.coalescedMoves = set() - self.constrainedMoves = set() - self.frozenMoves = set() - self.activeMoves = set() - self.worklistMoves = set() - - def Build(self): - """ 1. Construct interference graph from instruction list """ - self.f.cfg = FlowGraph(self.f.instructions) - self.logger.debug('Constructed flowgraph', extra={'ra_cfg':self.f.cfg}) - self.f.ig = InterferenceGraph(self.f.cfg) - self.logger.debug('Constructed interferencegraph', extra={'ra_ig':self.f.ig}) - - self.Node = self.f.ig.getNode - - # Divide nodes into pre-colored and initial: - pre_tmp = list(self.f.tempMap.keys()) - self.precolored = set(self.Node(tmp) for tmp in pre_tmp) - self.workSet = set(self.f.ig.nodes - self.precolored) - - for n in self.precolored: - n.addDegree = 100 + len(self.f.ig.nodes) + self.K - - # Initialize color map: - self.color = {} - for tmp, c in self.f.tempMap.items(): - self.color[self.Node(tmp)] = c - - self.moves = [i for i in self.f.instructions if i.ismove] - for mv in self.moves: - self.Node(mv.src[0]).moves.add(mv) - self.Node(mv.dst[0]).moves.add(mv) - - def NodeMoves(self, n): - return n.moves & (self.activeMoves | self.worklistMoves) - - def MoveRelated(self, n): - return bool(self.NodeMoves(n)) - - @property - def SpillWorkSet(self): - c = lambda n: n.Degree >= self.K - return set(filter(c, self.workSet)) - - @property - def FreezeWorkSet(self): - c = lambda n: n.Degree < self.K and self.MoveRelated(n) - return set(filter(c, self.workSet)) - - @property - def SimplifyWorkSet(self): - c = lambda n: n.Degree < self.K and not self.MoveRelated(n) - return set(filter(c, self.workSet)) - - def makeWorkList(self): - """ Divide initial nodes into worklists """ - self.selectStack = [] - - # Fill initial move set: - for m in self.moves: - self.worklistMoves.add(m) - - def Simplify(self): - """ 2. Remove nodes from the graph """ - n = first(self.SimplifyWorkSet) - self.workSet.remove(n) - self.selectStack.append(n) - # Pop out of graph: - self.f.ig.delNode(n) - - def EnableMoves(self, nodes): - for n in nodes: - for m in self.NodeMoves(n): - if m in self.activeMoves: - self.activeMoves.remove(m) - self.worklistMoves.add(m) - - def Coalesc(self): - """ Coalesc conservative. """ - m = first(self.worklistMoves) - x = self.Node(m.dst[0]) - y = self.Node(m.src[0]) - u, v = (y, x) if y in self.precolored else (x, y) - self.worklistMoves.remove(m) - if u is v: - self.coalescedMoves.add(m) - elif v in self.precolored or self.f.ig.hasEdge(u, v): - self.constrainedMoves.add(m) - elif u not in self.precolored and self.Conservative(u, v): - self.coalescedMoves.add(m) - self.workSet.remove(v) - self.f.ig.Combine(u, v) - else: - self.activeMoves.add(m) - - def Conservative(self, u, v): - """ Briggs conservative criteria for coalesc """ - nodes = u.Adjecent | v.Adjecent - c = lambda n: n.Degree >= self.K - k = len(list(filter(c, nodes))) - return k < self.K - - def Freeze(self): - """ Give up coalescing on some node """ - u = first(self.FreezeWorkSet) - self.freezeMoves(u) - - def freezeMoves(self, u): - """ Freeze moves for node u """ - for m in self.NodeMoves(u): - if m in self.activeMoves: - self.activeMoves.remove(m) - else: - sekf.worklistMoves.remove(m) - self.frozenMoves.add(m) - # Check other part of the move for still being move related: - v = m.src[0] if u is m.dst[0] else m.dst[0] - - def SelectSpill(self): - raise NotImplementedError("Spill is not implemented") - - def AssignColors(self): - """ Add nodes back to the graph to color it. """ - while self.selectStack: - n = self.selectStack.pop(-1) # Start with the last added - self.f.ig.add_node(n) - takenregs = set(self.color[m] for m in n.Adjecent) - okColors = self.regs - takenregs - if okColors: - self.color[n] = first(okColors) - n.color = self.color[n] - else: - raise NotImplementedError('Spill required here!') - - def ApplyColors(self): - # Remove coalesced moves: - for mv in self.coalescedMoves: - self.f.instructions.remove(mv) - - # Use allocated registers: - lookup = lambda t: self.color[self.Node(t)] - for i in self.f.instructions: - i.src = tuple(map(lookup, i.src)) - i.dst = tuple(map(lookup, i.dst)) - - def allocFrame(self, f): - """ Do iterated register allocation for a single stack frame. """ - self.InitData(f) - self.Build() - self.makeWorkList() - while True: - if self.SimplifyWorkSet: - self.Simplify() - elif self.worklistMoves: - self.Coalesc() - elif self.FreezeWorkSet: - self.Freeze() - elif self.SpillWorkSet: - raise NotImplementedError('Spill not implemented') - else: - break # Done! - self.AssignColors() - self.ApplyColors() diff -r a7c444404df9 -r 0374c65cb437 python/ppci/common.py --- a/python/ppci/common.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,112 +0,0 @@ -from collections import namedtuple -import logging - -""" - Error handling routines - Diagnostic utils - Source location structures -""" - -class Token: - """ - Token is used in the lexical analyzer. The lexical analyzer takes - a text and splits it into tokens. - """ - def __init__(self, typ, val, loc): - self.typ = typ - self.val = val - assert type(loc) is SourceLocation - self.loc = loc - - def __repr__(self): - return 'Token({0}, {1})'.format(self.typ, self.val) - - -class SourceLocation: - def __init__(self, filename, row, col, ln): - self.filename = filename - self.row = row - self.col = col - self.length = ln - - def __repr__(self): - return '{}, {}, {}'.format(self.filename, self.row, self.col) - - -SourceRange = namedtuple('SourceRange', ['p1', 'p2']) - - -class CompilerError(Exception): - def __init__(self, msg, loc=None): - self.msg = msg - self.loc = loc - if loc: - assert type(loc) is SourceLocation, \ - '{0} must be SourceLocation'.format(type(loc)) - self.row = loc.row - self.col = loc.col - else: - self.row = self.col = 0 - - def __repr__(self): - return '"{}"'.format(self.msg) - - -class DiagnosticsManager: - def __init__(self): - self.diags = [] - self.sources = {} - self.logger = logging.getLogger('diagnostics') - - def addSource(self, name, src): - self.logger.debug('Adding source, filename="{}"'.format(name)) - self.sources[name] = src - - def addDiag(self, d): - self.logger.error(str(d.msg)) - self.diags.append(d) - - def error(self, msg, loc): - self.addDiag(CompilerError(msg, loc)) - - def clear(self): - del self.diags[:] - self.sources.clear() - - def printErrors(self): - if len(self.diags) > 0: - print('{0} Errors'.format(len(self.diags))) - for d in self.diags: - self.printError(d) - - def printError(self, e): - def printLine(row, txt): - print(str(row) + ':' + txt) - print('==============') - if not e.loc: - print('Error: {0}'.format(e)) - else: - if e.loc.filename not in self.sources: - print('Error: {0}'.format(e)) - return - print("File: {}".format(e.loc.filename)) - source = self.sources[e.loc.filename] - lines = source.split('\n') - ro, co = e.row, e.col - prerow = ro - 2 - if prerow < 1: - prerow = 1 - afterrow = ro + 3 - if afterrow > len(lines): - afterrow = len(lines) - - # print preceding source lines: - for r in range(prerow, ro): - printLine(r, lines[r-1]) - # print source line containing error: - printLine(ro, lines[ro-1]) - print(' '*(len(str(ro)+':')+co-1) + '^ Error: {0}'.format(e.msg)) - # print trailing source line: - for r in range(ro+1, afterrow+1): - printLine(r, lines[r-1]) - print('==============') diff -r a7c444404df9 -r 0374c65cb437 python/ppci/ir.py --- a/python/ppci/ir.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,459 +0,0 @@ -""" -Intermediate representation (IR) code classes. -""" - - -def label_name(dut): - """ Returns the assembly code label name """ - if isinstance(dut, Block): - f = dut.function - return label_name(f) + '_' + dut.name - elif isinstance(dut, Function) or isinstance(dut, GlobalVariable): - return label_name(dut.module) + '_' + dut.name - elif isinstance(dut, Module): - return dut.name - else: - raise NotImplementedError(str(dut)) - - -class Typ: - def __init__(self): - pass - - -i32 = Typ() -i8 = Typ() - -class Module: - """ Container unit for variables and functions. """ - def __init__(self, name): - self.name = name - self.functions = [] - self.variables = [] - - def __repr__(self): - return 'module {0}'.format(self.name) - - def add_function(self, f): - """ Add a function to this module """ - self.functions.append(f) - f.module = self - - def add_variable(self, v): - assert type(v) is GlobalVariable - self.variables.append(v) - v.module = self - - def get_variables(self): - return self.variables - - Variables = property(get_variables) - - def get_functions(self): - return self.functions - - Functions = property(get_functions) - - def find_function(self, name): - for f in self.funcs: - if f.name == name: - return f - raise KeyError(name) - - -class Function: - """ Represents a function. """ - def __init__(self, name, module=None): - self.name = name - self.entry = Block('entry') - self.entry.function = self - self.epiloog = Block('epilog') - self.epiloog.function = self - self.epiloog.addInstruction(Terminator()) - self.return_value = Temp('{}_retval'.format(name)) - self.arguments = [] - self.localvars = [] - if module: - module.add_function(self) - - def __repr__(self): - args = ','.join(str(a) for a in self.arguments) - return 'function i32 {}({})'.format(self.name, args) - - def add_block(self, bb): - #self.bbs.append(bb) - bb.function = self - - def removeBlock(self, bb): - #self.bbs.remove(bb) - bb.function = None - - def getBlocks(self): - bbs = [self.entry] - worklist = [self.entry] - while worklist: - b = worklist.pop() - for sb in b.Successors: - if sb not in bbs: - bbs.append(sb) - worklist.append(sb) - bbs.remove(self.entry) - if self.epiloog in bbs: - bbs.remove(self.epiloog) - bbs.insert(0, self.entry) - bbs.append(self.epiloog) - return bbs - - def findBasicBlock(self, name): - for bb in self.bbs: - if bb.name == name: - return bb - raise KeyError(name) - - Blocks = property(getBlocks) - - @property - def Entry(self): - return self.entry - - def check(self): - for b in self.Blocks: - b.check() - - def addParameter(self, p): - assert type(p) is Parameter - p.num = len(self.arguments) - self.arguments.append(p) - - def addLocal(self, l): - assert type(l) is LocalVariable - self.localvars.append(l) - - -class Block: - """ - Uninterrupted sequence of instructions with a label at the start. - """ - def __init__(self, name, function=None): - self.name = name - self.function = function - self.instructions = [] - - parent = property(lambda s: s.function) - - def __repr__(self): - return '{0}:'.format(self.name) - - def addInstruction(self, i): - i.parent = self - assert not isinstance(self.LastInstruction, LastStatement) - self.instructions.append(i) - - def replaceInstruction(self, i1, i2): - idx = self.instructions.index(i1) - i1.parent = None - i1.delete() - i2.parent = self - self.instructions[idx] = i2 - - def removeInstruction(self, i): - i.parent = None - #i.delete() - self.instructions.remove(i) - - @property - def Instructions(self): - return self.instructions - - @property - def LastInstruction(self): - if not self.Empty: - return self.instructions[-1] - - @property - def Empty(self): - return len(self.instructions) == 0 - - @property - def FirstInstruction(self): - return self.instructions[0] - - def getSuccessors(self): - if not self.Empty: - return self.LastInstruction.Targets - return [] - - Successors = property(getSuccessors) - - def getPredecessors(self): - preds = [] - for bb in self.parent.Blocks: - if self in bb.Successors: - preds.append(bb) - return preds - - Predecessors = property(getPredecessors) - - def precedes(self, other): - raise NotImplementedError() - - -# Instructions: - -class Value: - """ A value has a type and a name """ - def __init__(self, name, ty): - assert isinstance(ty, Typ) - self.name = name - self.ty = ty - - -class User(Value): - """ Value that uses other values """ - def __init__(self, name, ty): - super().__init__(name, ty) - # Create a collection to store the values this value uses. - # TODO: think of better naming.. - self.uses = set() - - def add_use(self, v): - assert isinstance(v, Value) - self.uses.add(v) - - -class Expression(User): - """ Base class for an expression """ - pass - - -class Const(Expression): - """ Represents a constant value """ - def __init__(self, value): - self.value = value - - def __repr__(self): - return 'Const {}'.format(self.value) - - -class Call(Expression): - """ Call a function with some arguments """ - def __init__(self, f, arguments): - assert type(f) is str - self.f = f - self.arguments = arguments - - def __repr__(self): - args = ', '.join(str(arg) for arg in self.arguments) - return '{}({})'.format(self.f, args) - - -# Data operations -class Binop(Expression): - """ Generic binary operation """ - ops = ['+', '-', '*', '/', '|', '&', '<<', '>>'] - - def __init__(self, a, operation, b, name, ty): - super().__init__(name, ty) - assert operation in Binop.ops - #assert type(value1) is type(value2) - assert isinstance(a, Value), str(a) - assert isinstance(b, Value), str(b) - self.a = a - self.b = b - self.operation = operation - - def __repr__(self): - a, b = self.a, self.b - return '({} {} {})'.format(a, self.operation, b) - - -class Add(Binop): - """ Add a and b """ - def __init__(self, a, b, name, ty): - super().__init__(a, '+', b, name, ty) - - -def Sub(a, b): - """ Substract b from a """ - return Binop(a, '-', b) - - -def Mul(a, b, name, ty): - """ Multiply a by b """ - return Binop(a, '*', b, name, ty) - - -def Div(a, b): - """ Divide a in b pieces """ - return Binop(a, '/', b) - - -def Phi(User): - """ Imaginary phi instruction to make SSA possible. """ - def __init__(self, name, ty): - super().__init__(name, ty) - self.inputs = [] - - def add_input(self, value, block): - self.inputs.append((value, block)) - - -class Eseq(Expression): - """ Sequence of instructions where the last is an expression """ - def __init__(self, stmt, e): - self.stmt = stmt - self.e = e - - def __repr__(self): - return '({}, {})'.format(self.stmt, self.e) - - -class Alloc(Expression): - """ Allocates space on the stack """ - def __init__(self): - super().__init__() - - def __repr__(self): - return 'Alloc' - - -class Variable(Expression): - def __init__(self, name, ty): - super().__init__(name, ty) - self.name = name - - def __repr__(self): - return 'Var {}'.format(self.name) - - -class LocalVariable(Variable): - def __repr__(self): - return 'Local {}'.format(self.name) - - -class GlobalVariable(Variable): - def __repr__(self): - return 'Global {}'.format(self.name) - - -class Parameter(Variable): - def __repr__(self): - return 'Param {}'.format(self.name) - - -class Temp(Expression): - """ Temporary storage, same as register """ - def __init__(self, name): - self.name = name - - def __repr__(self): - return 'TMP_{}'.format(self.name) - - -class Mem(Expression): - """ Memory access """ - def __init__(self, e): - self.e = e - - def __repr__(self): - return '[{}]'.format(self.e) - - -class Load(Value): - """ Load a value from memory """ - def __init__(self, address, name, ty): - super().__init__(name, ty) - assert isinstance(address, Value) - self.address = address - - def __repr__(self): - return 'load {}'.format(self.address) - - -class Store: - """ Store a value into memory """ - def __init__(self, address, value): - self.address = address - - -class Addr(Expression): - """ Address of label """ - def __init__(self, e): - self.e = e - - def __repr__(self): - return '&{}'.format(self.e) - - -class Statement: - """ Base class for all instructions. """ - @property - def IsTerminator(self): - return isinstance(self, LastStatement) - - -class Move(Statement): - """ Move source to destination """ - def __init__(self, dst, src): - self.dst = dst - self.src = src - - def __repr__(self): - return '{} = {}'.format(self.dst, self.src) - - -class Exp(Statement): - def __init__(self, e): - self.e = e - - def __repr__(self): - return '{}'.format(self.e) - - -# Branching: -class LastStatement(Statement): - def changeTarget(self, old, new): - idx = self.Targets.index(old) - self.Targets[idx] = new - - -class Terminator(LastStatement): - """ Instruction that terminates the terminal block """ - def __init__(self): - self.Targets = [] - - def __repr__(self): - return 'Terminator' - - -class Jump(LastStatement): - """ Jump statement to some target location """ - def __init__(self, target): - self.Targets = [target] - - def setTarget(self, t): - self.Targets[0] = t - - target = property(lambda s: s.Targets[0], setTarget) - - def __repr__(self): - return 'JUMP {}'.format(self.target.name) - - -class CJump(LastStatement): - """ Conditional jump to true or false labels. """ - conditions = ['==', '<', '>', '>=', '<=', '!='] - - def __init__(self, a, cond, b, lab_yes, lab_no): - assert cond in CJump.conditions - self.a = a - self.cond = cond - self.b = b - self.Targets = [lab_yes, lab_no] - - lab_yes = property(lambda s: s.Targets[0]) - lab_no = property(lambda s: s.Targets[1]) - - def __repr__(self): - return 'IF {} {} {} THEN {} ELSE {}'\ - .format(self.a, self.cond, self.b, self.lab_yes, self.lab_no) diff -r a7c444404df9 -r 0374c65cb437 python/ppci/ir2tree.py --- a/python/ppci/ir2tree.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,65 +0,0 @@ -from tree import Tree -from . import ir - -""" Create a tree from ir code. """ - -f_map = {} # Mapping from types to tree creation functions - -def register(tp): - """ Register a function for type tp """ - def reg_f(f): - f_map[tp] = f - return f - return reg_f - -@register(ir.Binop) -@register(ir.Add) -def binop_to_tree(e): - names = {'+':'ADDI32', '-':'SUBI32', '|':'ORI32', '<<':'SHLI32', - '*':'MULI32', '&':'ANDI32', '>>':'SHRI32'} - op = names[e.operation] - assert e.ty == ir.i32 - return Tree(op, makeTree(e.a), makeTree(e.b)) - -@register(ir.Temp) -def temp_to_tree(e): - t = Tree('REGI32') - t.value = e - return t - -@register(ir.GlobalVariable) -def global_address_to_tree(e): - t = Tree('GLOBALADDRESS') - t.value = ir.label_name(e) - return t - -@register(ir.Const) -def const_to_tree(e): - if type(e.value) is bytes: - t = Tree('CONSTDATA') - t.value = e.value - return t - elif type(e.value) is int: - t = Tree('CONSTI32') - t.value = e.value - return t - else: - raise Exception('{} not implemented'.format(type(e.value))) - -@register(ir.Mem) -def mem_to_tree(e): - return Tree('MEMI32', makeTree(e.e)) - -@register(ir.Addr) -def mem_to_tree(e): - return Tree('ADR', makeTree(e.e)) - -@register(ir.Call) -def call_to_tree(e): - t = Tree('CALL') - t.value = e - return t - -def makeTree(ir_node): - """ Transform an ir node into a tree usable for matching """ - return f_map[type(ir_node)](ir_node) diff -r a7c444404df9 -r 0374c65cb437 python/ppci/irmach.py --- a/python/ppci/irmach.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,55 +0,0 @@ - -""" - Abstract assembly language instructions. - - This is the second intermediate representation. - - Instructions are selected and scheduled at this stage. -""" - -from .target import Instruction - - -class Frame: - """ - Activation record abstraction. This class contains a flattened - function. Instructions are selected and scheduled at this stage. - Frames differ per machine. - """ - def __init__(self, name): - self.name = name - self.instructions = [] - self.stacksize = 0 - - def __repr__(self): - return 'Frame {}'.format(self.name) - - -class AbstractInstruction: - """ - Abstract machine instruction class. This is a very simple - abstraction of machine instructions. - """ - def __init__(self, cls, ops=(), src=(), dst=(), jumps=(), others=(), ismove=False): - assert type(cls) is type or isinstance(cls, Instruction), str(cls) - self.assem = cls - self.ops = tuple(ops) - self.src = tuple(src) - self.dst = tuple(dst) - self.jumps = tuple(jumps) - self.others = tuple(others) - self.ismove = ismove - - def __gt__(self, other): - """ To make the class fit for sorting """ - return str(self) > str(other) - - def __repr__(self): - """ Substitutes source, dst and labels in the string """ - if isinstance(self.assem, Instruction): - x = str(self.assem) - else: - cn = self.assem.__name__ - x = '{}, def={}, use={}, other={}' - x = x.format(cn, self.dst, self.src, self.others) - return x diff -r a7c444404df9 -r 0374c65cb437 python/ppci/irutils.py --- a/python/ppci/irutils.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,269 +0,0 @@ - -""" - Some utilities for ir-code. -""" -import re -from . import ir - -def dumpgv(m, outf): - print('digraph G ', file=outf) - print('{', file=outf) - for f in m.Functions: - print('{} [label="{}" shape=box3d]'.format(id(f), f), file=outf) - for bb in f.Blocks: - contents = str(bb) + '\n' - contents += '\n'.join([str(i) for i in bb.Instructions]) - print('{0} [shape=note label="{1}"];' - .format(id(bb), contents), file=outf) - for successor in bb.Successors: - print('"{}" -> "{}"'.format(id(bb), id(successor)), file=outf) - - print('"{}" -> "{}" [label="entry"]' - .format(id(f), id(f.entry)), file=outf) - print('}', file=outf) - - -class Writer: - def __init__(self, extra_indent=''): - self.extra_indent = extra_indent - - def write(self, ir, f): - """ Write ir-code to file f """ - print('{}{}'.format(self.extra_indent, ir), file=f) - for v in ir.Variables: - print('{}{}'.format(self.extra_indent, v), file=f) - for function in ir.Functions: - self.write_function(function, f) - - def write_function(self, fn, f): - args = ','.join('i32 ' + str(a) for a in fn.arguments) - print('{}function i32 {}({})'.format(self.extra_indent, fn.name, args), file=f) - for bb in fn.Blocks: - print('{} {}'.format(self.extra_indent, bb), file=f) - for ins in bb.Instructions: - print('{} {}'.format(self.extra_indent, ins), file=f) - - -class IrParseException(Exception): - pass - - -class Reader: - def read(self, f): - """ Read ir code from file f """ - # Read lines from the file: - lines = [line.rstrip() for line in f] - - # Create a regular expression for the lexing part: - tok_spec = [ - ('NUMBER', r'\d+'), - ('ID', r'[A-Za-z][A-Za-z\d_]*'), - ('SKIP2', r' '), - ('SKIP1', r' '), - ('OTHER', r'[\.,=:;\-+*\[\]/\(\)]|>|<|{|}|&|\^|\|') - ] - tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) - gettok = re.compile(tok_re).match - - def tokenize(): - for line in lines: - if not line: - continue # Skip empty lines - mo = gettok(line) - first = True - while mo: - typ = mo.lastgroup - val = mo.group(typ) - if typ == 'ID': - if val in ['function', 'module']: - typ = val - yield (typ, val) - elif typ == 'OTHER': - typ = val - yield (typ, val) - elif typ in ['SKIP1', 'SKIP2']: - if first: - yield (typ, val) - elif typ == 'NUMBER': - yield (typ, int(val)) - else: - raise NotImplementedError(str(typ)) - first = False - pos = mo.end() - mo = gettok(line, pos) - if len(line) != pos: - raise IrParseException('Lex fault') - yield ('eol', 'eol') - yield ('eof', 'eof') - self.tokens = tokenize() - self.token = self.tokens.__next__() - - try: - module = self.parse_module() - return module - except IrParseException as e: - print(e) - - def next_token(self): - t = self.token - if t[0] != 'eof': - self.token = self.tokens.__next__() - return t - - @property - def Peak(self): - return self.token[0] - - def Consume(self, typ): - if self.Peak == typ: - return self.next_token() - else: - raise IrParseException('Expected "{}" got "{}"'.format(typ, self.Peak)) - - def parse_module(self): - """ Entry for recursive descent parser """ - self.Consume('module') - name = self.Consume('ID')[1] - module = ir.Module(name) - self.Consume('eol') - while self.Peak != 'eof': - if self.Peak == 'function': - module.add_function(self.parse_function()) - else: - raise IrParseException('Expected function got {}'.format(self.Peak)) - return module - - def parse_function(self): - self.Consume('function') - self.parse_type() - name = self.Consume('ID')[1] - function = ir.Function(name) - self.Consume('(') - while self.Peak != ')': - self.parse_type() - self.Consume('ID') - if self.Peak != ',': - break - else: - self.Consume(',') - self.Consume(')') - self.Consume('eol') - while self.Peak == 'SKIP1': - function.add_block(self.parse_block()) - return function - - def parse_type(self): - self.Consume('ID') - - def parse_block(self): - self.Consume('SKIP1') - name = self.Consume('ID')[1] - block = ir.Block(name) - self.Consume(':') - self.Consume('eol') - while self.Peak == 'SKIP2': - self.parse_statement() - return block - - def parse_statement(self): - self.Consume('SKIP2') - while self.Peak != 'eol': - # raise NotImplementedError() - self.next_token() - self.Consume('eol') - - -# Constructing IR: - -class NamedClassGenerator: - def __init__(self, prefix, cls): - self.prefix = prefix - self.cls = cls - - def NumGen(): - a = 0 - while True: - yield a - a = a + 1 - self.nums = NumGen() - - def gen(self, prefix=None): - if not prefix: - prefix = self.prefix - return self.cls('{0}{1}'.format(prefix, self.nums.__next__())) - - -class Builder: - """ Base class for ir code generators """ - def __init__(self): - self.prepare() - - def prepare(self): - self.newTemp = NamedClassGenerator('reg', ir.Temp).gen - self.newBlock2 = NamedClassGenerator('block', ir.Block).gen - self.bb = None - self.m = None - self.fn = None - self.loc = None - - # Helpers: - def setModule(self, m): - self.m = m - - def new_function(self, name): - f = ir.Function(name) - self.m.add_function(f) - return f - - def newBlock(self): - assert self.fn - b = self.newBlock2() - b.function = self.fn - return b - - def setFunction(self, f): - self.fn = f - self.bb = f.entry if f else None - - def setBlock(self, b): - self.bb = b - - def setLoc(self, l): - self.loc = l - - def emit(self, i): - assert isinstance(i, ir.Statement) - i.debugLoc = self.loc - if not self.bb: - raise Exception('No basic block') - self.bb.addInstruction(i) - - -class Verifier: - """ Checks an ir module for correctness """ - def verify(self, module): - """ Verifies a module for some sanity """ - assert isinstance(module, ir.Module) - for f in module.Functions: - self.verify_function(f) - - def verify_function(self, function): - for b in function.Blocks: - self.verify_block_termination(b) - - # Now we can build a dominator tree - for b in function.Blocks: - self.verify_block(b) - - def verify_block_termination(self, block): - assert not block.Empty - assert block.LastInstruction.IsTerminator - for i in block.Instructions[:-1]: - assert not isinstance(i, ir.LastStatement) - - def verify_block(self, block): - for instruction in block.Instructions: - self.verify_instruction(instruction) - - def verify_instruction(self, instruction): - pass diff -r a7c444404df9 -r 0374c65cb437 python/ppci/layout.py --- a/python/ppci/layout.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,145 +0,0 @@ -from baselex import BaseLexer -import pyyacc -from . import make_num - - -class Layout: - def __init__(self): - self.memories = [] - - def add_memory(self, memory): - self.memories.append(memory) - - def __eq__(self, other): - return self.memories == other.memories - - def __repr__(self): - return str(self.memories) - - -class Memory: - """ Specification of how a memory may look like and what it contains. """ - def __init__(self, name): - self.inputs = [] - self.name = name - self.location = 0x0 - self.size = 0x0 - - def add_input(self, inp): - assert isinstance(inp, Input) - self.inputs.append(inp) - - def __repr__(self): - return 'MEM {} loc={:08X} size={:08X}'.format(self.name, self.location, self.size) + str(self.inputs) - - def __eq__(self, other): - return str(self) == str(other) - - -class Input: - pass - - -class Section(Input): - def __init__(self, section_name): - self.section_name = section_name - - def __repr__(self): - return 'Section({})'.format(self.section_name) - - -class Align(Input): - def __init__(self, alignment): - self.alignment = alignment - - def __repr__(self): - return 'Align({})'.format(self.alignment) - - -class SymbolDefinition(Input): - def __init__(self, symbol_name): - self.symbol_name = symbol_name - - def __repr__(self): - return 'Symbol define: {}'.format(self.symbol_name) - - -class LayoutLexer(BaseLexer): - def __init__(self): - tok_spec = [ - ('HEXNUMBER', r'0x[\da-fA-F]+', self.handle_number), - ('NUMBER', r'\d+', self.handle_number), - ('ID', r'[A-Za-z][A-Za-z\d_]*', self.handle_id), - ('SKIP', r'[ \t\r\n]', None), - ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<|}|{', lambda typ, val: (val, val)), - ('STRING', r"'.*?'", lambda typ, val: (typ, val[1:-1])), - ] - super().__init__(tok_spec) - self.kws = ['MEMORY', 'ALIGN', 'LOCATION','SECTION','SIZE', 'DEFINESYMBOL'] - - def handle_id(self, typ, val): - if val in self.kws: - typ = val - return typ, val - - def handle_number(self, typ, val): - val = make_num(val) - typ = 'NUMBER' - return typ, val - - -class LayoutParser: - def __init__(self, kws): - toks = ['ID', 'NUMBER', '{', '}', '.', ':', '=', '(', ')', pyyacc.EPS, pyyacc.EOF] + kws - g = pyyacc.Grammar(toks) - g.add_production('layout', ['mem_list']) - g.add_one_or_more('mem', 'mem_list') - g.add_production('mem', ['MEMORY', 'ID', 'LOCATION', '=', 'NUMBER', 'SIZE', '=', 'NUMBER', '{', 'input_list', '}'], self.handle_mem) - g.add_one_or_more('input', 'input_list') - g.add_production('input', ['ALIGN', '(', 'NUMBER', ')'], self.handle_align) - g.add_production('input', ['SECTION', '(', 'ID', ')'], self.handle_section) - g.add_production('input', ['DEFINESYMBOL', '(', 'ID', ')'], self.handle_defsym) - - g.start_symbol = 'layout' - self.p = g.generate_parser() - - def parse(self, lexer, layout): - self.layout = layout - self.p.parse(lexer) - - def handle_mem(self, mem_tag, mem_name, loc_tag, eq1, loc, size_tag, eq2, size, lbrace, inps, rbrace): - m = Memory(mem_name.val) - m.size = size.val - m.location = loc.val - for inp in inps: - m.add_input(inp) - self.layout.add_memory(m) - - def handle_align(self, align_tag, lbrace, alignment, rbrace): - return Align(alignment.val) - - def handle_section(self, section_tag, lbrace, section_name, rbrace): - return Section(section_name.val) - - def handle_defsym(self, section_tag, lbrace, name, rbrace): - return SymbolDefinition(name.val) - - -class LayoutLoader: - def __init__(self): - self.lexer = LayoutLexer() - self.parser = LayoutParser(self.lexer.kws) - - def load_layout(self, f): - layout = Layout() - self.lexer.feed(f.read()) # TODO: perhaps the read is better in the lexer? - self.parser.parse(self.lexer, layout) - return layout - -# Single definition: -_lloader = LayoutLoader() - - -def load_layout(f): - return _lloader.load_layout(f) - diff -r a7c444404df9 -r 0374c65cb437 python/ppci/linker.py --- a/python/ppci/linker.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,104 +0,0 @@ -import logging -from .objectfile import ObjectFile -from . import CompilerError -from .layout import Layout, Section, SymbolDefinition, Align - - -class Linker: - """ Merges the sections of several object files and - performs relocation """ - def __init__(self, target): - self.logger = logging.getLogger('Linker') - self.target = target - - def merge_sections(self, objs, dst): - for iobj in objs: - offsets = {} - # Merge sections: - for in_s in iobj.sections.values(): - out_s = dst.get_section(in_s.name) - # TODO: align section in other way: - while out_s.Size % 4 != 0: - out_s.add_data(bytes([0])) - - # Add new section: - offsets[in_s.name] = out_s.Size - out_s.add_data(in_s.data) - self.logger.debug('{} {}({})'.format(offsets[in_s.name], iobj, in_s.name)) - - # Merge symbols: - for sym in iobj.symbols.values(): - out_s = dst.get_section(sym.section) - value = offsets[sym.section] + sym.value - dst.add_symbol(sym.name, value, sym.section) - self.logger.debug('{} at 0x{:08X} in section {}'.format(sym.name, value, sym.section)) - - # Merge relocations: - for reloc in iobj.relocations: - offset = offsets[reloc.section] + reloc.offset - dst.add_relocation(reloc.sym, offset, reloc.typ, reloc.section) - - def layout_sections(self, dst, layout): - """ Use the given layout to place sections into memories """ - # Create sections with address: - dst.images = {} - for mem in layout.memories: - cur_addr = mem.location - output_memory = bytearray() - for memory_input in mem.inputs: - if type(memory_input) is Section: - section = dst.get_section(memory_input.section_name) - section.address = cur_addr - cur_addr += section.Size - output_memory += section.data - # TODO: align sections - elif type(memory_input) is SymbolDefinition: - dst.add_symbol(memory_input.symbol_name, cur_addr, "code") - elif type(memory_input) is Align: - while (cur_addr % memory_input.alignment) != 0: - cur_addr += 1 - output_memory += bytes([0]) - else: - print(memory_input) - dst.images[mem.name] = bytes(output_memory) - - def do_relocations(self, dst): - """ Perform the correct relocation as listed """ - for reloc in dst.relocations: - # Lookup symbol: - if reloc.sym not in dst.symbols: - raise CompilerError('Undefined reference "{}"'.format(reloc.sym)) - - sym_value = dst.get_symbol_value(reloc.sym) - section = dst.get_section(reloc.section) - - # Determine location in memory of reloc patchup position: - reloc_value = section.address + reloc.offset - - if reloc.typ in self.target.reloc_map: - f = self.target.reloc_map[reloc.typ] - f(reloc, sym_value, section, reloc_value) - else: - raise NotImplementedError('Unknown relocation type {}'.format(reloc.typ)) - - def link(self, objs, layout): - """ Link together the given object files using the layout """ - assert type(objs) is list - assert type(layout) is Layout - # Create new object file to store output: - dst = ObjectFile() - - # First merge all sections into output sections: - self.merge_sections(objs, dst) - - # Apply layout rules: - self.layout_sections(dst, layout) - - # Perform relocations: - self.do_relocations(dst) - - # Create memories for the second time - # TODO: make this nicer? - self.layout_sections(dst, layout) - - return dst diff -r a7c444404df9 -r 0374c65cb437 python/ppci/mem2reg.py --- a/python/ppci/mem2reg.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,70 +0,0 @@ -import logging -from transform import FunctionPass -from ir import * - -def isAllocPromotable(allocinst): - # Check if alloc value is only used by load and store operations. - assert type(allocinst) is Alloc - return all(type(use) in [Load, Store] for use in allocinst.value.used_by) - - -class Mem2RegPromotor(FunctionPass): - def promoteSingleBlock(self, ai): - v = ai.value - bb = ai.Block - - # Replace all loads with the value: - loads = [i for i in v.used_by if isinstance(i, Load)] - stores = [i for i in v.used_by if isinstance(i, Store)] - stores.sort(key=lambda s: s.Position) - stores.reverse() - - for load in loads: - idx = load.Position - # Search upwards: - for store in stores: - if store.Position < load.Position: - break - load.value.replaceby(store.value) - logging.debug('replaced {} with {}'.format(load, store.value)) - bb.removeInstruction(load) - - # Remove store instructions: - for store in stores: - sv = store.value - logging.debug('removing {}'.format(store)) - bb.removeInstruction(store) - #assert sv.Used - - # Remove alloca instruction: - assert not ai.value.Used, ai.value.used_by - bb.removeInstruction(ai) - - def promote(self, ai): - # Find load operations and replace them with assignments - v = ai.value - if len(ai.value.UsedInBlocks) == 1: - self.promoteSingleBlock(ai) - return - - loads = [i for i in v.used_by if isinstance(i, Load)] - stores = [i for i in v.used_by if isinstance(i, Store)] - - # Each store instruction can be removed (later). - # Instead of storing the value, we use it - # where the load would have been! - replMap = {} - for store in stores: - replMap[store] = store.value - - # for each load, track back what the defining store - # was. - for load in loads: - pass - - def onFunction(self, f): - for bb in f.BasicBlocks: - allocs = [i for i in bb.Instructions if isinstance(i, Alloc)] - for i in allocs: - if isAllocPromotable(i): - self.promote(i) diff -r a7c444404df9 -r 0374c65cb437 python/ppci/objectfile.py --- a/python/ppci/objectfile.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,175 +0,0 @@ - -""" -Object files are used to store assembled code. Information contained -is code, symbol table and relocation information. -""" - -import json -import binascii -from . import CompilerError, make_num - -class Symbol: - def __init__(self, name, value, section): - self.name = name - self.value = value - self.section = section - - def __repr__(self): - return 'SYM {}, val={} sec={}'.format(self.name, self.value, self.section) - - def __eq__(self, other): - return (self.name, self.value, self.section) == \ - (other.name, other.value, other.section) - - -class Relocation: - """ Represents a relocation entry. A relocation always has a symbol to refer to - and a relocation type """ - def __init__(self, sym, offset, typ, section): - self.sym = sym - self.offset = offset - self.typ = typ - self.section = section - - def __repr__(self): - return 'RELOC {} off={} t={} sec={}'.format(self.sym, self.offset, self.typ, self.section) - - def __eq__(self, other): - return (self.sym, self.offset, self.typ, self.section) ==\ - (other.sym, other.offset, other.typ, other.section) - - -class Section: - def __init__(self, name): - self.name = name - self.address = 0 - self.data = bytearray() - - def add_data(self, data): - self.data += data - - @property - def Size(self): - return len(self.data) - - def __repr__(self): - return 'SECTION {}'.format(self.name) - - def __eq__(self, other): - return (self.name == other.name) and (self.address == other.address) \ - and (self.data == other.data) - - -class ObjectFile: - """ Container for sections with compiled code or data. - Also contains symbols and relocation entries """ - def __init__(self): - self.symbols = {} - self.sections = {} - self.relocations = [] - self.images = {} - - def find_symbol(self, name): - return self.symbols[name] - - def add_symbol(self, name, value, section): - if name in self.symbols: - raise CompilerError('{} already defined'.format(name)) - assert section in self.sections - sym = Symbol(name, value, section) - self.symbols[name] = sym - return sym - - def add_relocation(self, sym_name, offset, typ, section): - assert type(sym_name) is str, str(sym_name) - assert section in self.sections - # assert sym_name in self.symbols - reloc = Relocation(sym_name, offset, typ, section) - self.relocations.append(reloc) - return reloc - - def get_section(self, name): - if not name in self.sections: - self.sections[name] = Section(name) - return self.sections[name] - - def get_image(self, name): - return self.images[name] - - def get_symbol_value(self, name): - symbol = self.find_symbol(name) - section = self.get_section(symbol.section) - return symbol.value + section.address - - def __eq__(self, other): - return (self.symbols == other.symbols) and \ - (self.sections == other.sections) and \ - (self.relocations == other.relocations) - - def save(self, f): - save_object(self, f) - - -def save_object(o, f): - json.dump(serialize(o), f, indent=2, sort_keys=True) - - -def load_object(f): - return deserialize(json.load(f)) - - -def bin2asc(data): - return binascii.hexlify(data).decode('ascii') - -def asc2bin(data): - return bytearray(binascii.unhexlify(data.encode('ascii'))) - - -def serialize(x): - res = {} - if isinstance(x, ObjectFile): - res['sections'] = [] - for sname in sorted(x.sections.keys()): - s = x.sections[sname] - res['sections'].append(serialize(s)) - res['symbols'] = [] - for sname in sorted(x.symbols.keys()): - s = x.symbols[sname] - res['symbols'].append(serialize(s)) - res['relocations'] = [] - for reloc in x.relocations: - res['relocations'].append(serialize(reloc)) - res['images'] = {} - for image_name in x.images: - res['images'][image_name] = bin2asc(x.images[image_name]) - elif isinstance(x, Section): - res['name'] = x.name - res['address'] = hex(x.address) - res['data'] = bin2asc(x.data) - elif isinstance(x, Symbol): - res['name'] = x.name - res['value'] = hex(x.value) - res['section'] = x.section - elif isinstance(x, Relocation): - res['symbol'] = x.sym - res['offset'] = hex(x.offset) - res['type'] = x.typ - res['section'] = x.section - return res - - -def deserialize(d): - obj = ObjectFile() - for section in d['sections']: - so = obj.get_section(section['name']) - so.address = make_num(section['address']) - so.data = asc2bin(section['data']) - for reloc in d['relocations']: - obj.add_relocation(reloc['symbol'], make_num(reloc['offset']), - reloc['type'], reloc['section']) - for sym in d['symbols']: - obj.add_symbol(sym['name'], make_num(sym['value']), sym['section']) - for image_name in d['images']: - obj.images[image_name] = asc2bin(d['images'][image_name]) - return obj - diff -r a7c444404df9 -r 0374c65cb437 python/ppci/optimize.py --- a/python/ppci/optimize.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,19 +0,0 @@ -from mem2reg import Mem2RegPromotor -from transform import CommonSubexpressionElimination, CleanPass -from transform import DeadCodeDeleter, ConstantFolder - -def optimize(ir): - return - cf = ConstantFolder() - cf.run(ir) - return - dcd = DeadCodeDeleter() - m2r = Mem2RegPromotor() - clr = CleanPass() - cse = CommonSubexpressionElimination() - dcd.run(ir) - clr.run(ir) - m2r.run(ir) - cse.run(ir) - cf.run(ir) - dcd.run(ir) diff -r a7c444404df9 -r 0374c65cb437 python/ppci/outstream.py --- a/python/ppci/outstream.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,94 +0,0 @@ -import logging -import binascii -from .target import Instruction, Alignment -from .objectfile import ObjectFile - -""" - The output stream is a stream of instructions that can be output - to a file or binary or hexfile. -""" - - -class OutputStream: - """ Interface to generator code with. """ - def emit(self, item): - raise NotImplementedError('Abstract base class') - - def select_section(self, sname): - raise NotImplementedError('Abstract base class') - - -class BinaryOutputStream(OutputStream): - """ Output stream that writes to object file """ - def __init__(self, obj_file): - super().__init__() - self.obj_file = obj_file - self.literal_pool = [] - - def emit(self, item): - """ Encode instruction and add symbol and relocation information """ - assert isinstance(item, Instruction), str(item) + str(type(item)) - assert self.currentSection - section = self.currentSection - address = self.currentSection.Size - b = item.encode() - syms = item.symbols() - relocs = item.relocations() - section.add_data(b) - for sym in syms: - self.obj_file.add_symbol(sym, address, section.name) - for sym, typ in relocs: - self.obj_file.add_relocation(sym, address, typ, section.name) - # Special case for align, TODO do this different? - if type(item) is Alignment: - while section.Size % item.align != 0: - section.add_data(bytes([0])) - - def select_section(self, sname): - self.currentSection = self.obj_file.get_section(sname) - - -class DummyOutputStream(OutputStream): - """ Stream that implements the bare minimum and does nothing """ - def emit(self, item): - pass - - def select_section(self, sname): - pass - - -class LoggerOutputStream(OutputStream): - """ Stream that emits instructions as text in the log """ - def __init__(self): - self.logger = logging.getLogger('LoggerOutputStream') - - def emit(self, item): - self.logger.debug(str(item)) - - def select_section(self, sname): - self.logger.debug('.section {}'.format(sname)) - - -class MasterOutputStream(OutputStream): - """ Stream that emits to multiple sub streams """ - def __init__(self, substreams=[]): - self.substreams = list(substreams) # Use copy constructor!!! - - def add_substream(self, output_stream): - self.substreams.append(output_stream) - - def emit(self, item): - for output_stream in self.substreams: - output_stream.emit(item) - - def select_section(self, sname): - for output_stream in self.substreams: - output_stream.select_section(sname) - - -def BinaryAndLoggingStream(output): - """ Create a stream object that both logs and writes to an object file """ - o2 = BinaryOutputStream(output) - o1 = LoggerOutputStream() - ostream = MasterOutputStream([o1, o2]) - return ostream diff -r a7c444404df9 -r 0374c65cb437 python/ppci/recipe.py --- a/python/ppci/recipe.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,52 +0,0 @@ -#!/usr/bin/python3 - -import os -import xml.dom.minidom - -from .tasks import Project, Target - - -class RecipeLoader: - """ Loads a recipe into a runner from a dictionary or file """ - def load_file(self, recipe_file): - """ Loads a build configuration from file """ - recipe_dir = os.path.abspath(os.path.dirname(recipe_file)) - dom = xml.dom.minidom.parse(recipe_file) - project = self.load_project(dom) - project.set_property('basedir', recipe_dir) - return project - - def load_project(self, elem): - elem = elem.getElementsByTagName("project")[0] - name = elem.getAttribute('name') - project = Project(name) - if elem.hasAttribute('default'): - project.default = elem.getAttribute('default') - else: - project.default = None - - for pe in elem.getElementsByTagName("property"): - name = pe.getAttribute('name') - value = pe.getAttribute('value') - project.set_property(name, value) - for te in elem.getElementsByTagName("target"): - name = te.getAttribute('name') - target = Target(name, project) - if te.hasAttribute('depends'): - dependencies = te.getAttribute('depends').split(',') - for dep in dependencies: - target.add_dependency(dep) - # print(name) - project.add_target(target) - for cn in te.childNodes: - # print(cn, type(cn)) - if type(cn) is xml.dom.minidom.Element: - task_name = cn.tagName - task_props = {} - for i in range(cn.attributes.length): - atr = cn.attributes.item(i) - #print(atr, atr.name, atr.value) - task_props[atr.name] = atr.value - target.add_task((task_name, task_props)) - return project - diff -r a7c444404df9 -r 0374c65cb437 python/ppci/report.py --- a/python/ppci/report.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,93 +0,0 @@ - -import logging -import io - -from . import outstream -from .c3 import AstPrinter -from . import logformat -from .irutils import Writer - -class RstFormatter(logging.Formatter): - """ Formatter that tries to create an rst document """ - def __init__(self): - super().__init__(fmt=logformat) - - def format(self, record): - s = super().format(record) - s += '\n' - if hasattr(record, 'c3_ast'): - f = io.StringIO() - print('', file=f) - print('', file=f) - print('.. code::', file=f) - print('', file=f) - AstPrinter().printAst(record.c3_ast, f) - print('', file=f) - s += '\n' + f.getvalue() - if hasattr(record, 'ircode'): - f = io.StringIO() - print('', file=f) - print('', file=f) - print('.. code::', file=f) - print('', file=f) - Writer(' ').write(record.ircode, f) - print('', file=f) - s += '\n' + f.getvalue() - if hasattr(record, 'irfunc'): - f = io.StringIO() - print('', file=f) - print('', file=f) - print('.. code::', file=f) - print('', file=f) - Writer(' ').write_function(record.irfunc, f) - print('', file=f) - s += '\n' + f.getvalue() - if hasattr(record, 'ppci_frame'): - f = io.StringIO() - frame = record.ppci_frame - print('', file=f) - print('.. code::', file=f) - print('', file=f) - print(' {}'.format(frame.name), file=f) - for i in frame.instructions: - print(' {}'.format(i),file=f) - print('', file=f) - s += '\n' + f.getvalue() - if hasattr(record, 'ra_cfg'): - f = io.StringIO() - print('', file=f) - print('', file=f) - print('.. graphviz::', file=f) - print('', file=f) - print(' digraph G {', file=f) - print(' size="8,80";', file=f) - cfg = record.ra_cfg - cfg.to_dot(f) - print(' }', file=f) - print('', file=f) - s += '\n' + f.getvalue() - if hasattr(record, 'ra_ig'): - f = io.StringIO() - print('', file=f) - print('', file=f) - print('.. graphviz::', file=f) - print('', file=f) - print(' digraph G {', file=f) - print(' ratio="compress";', file=f) - print(' size="8,80";', file=f) - ig = record.ra_ig - ig.to_dot(f) - print(' }', file=f) - print('', file=f) - s += '\n' + f.getvalue() - if hasattr(record, 'zcc_outs'): - f = io.StringIO() - print('', file=f) - print('', file=f) - print('.. code::', file=f) - print('', file=f) - outstream.OutputStreamWriter(' ').dump(record.zcc_outs, f) - print('', file=f) - s += '\n' + f.getvalue() - return s - diff -r a7c444404df9 -r 0374c65cb437 python/ppci/target/__init__.py --- a/python/ppci/target/__init__.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ -#!/usr/bin/env python - -from .basetarget import Nop, Instruction, Label, Target, Comment, Alignment - - -class SimpleTarget(Target): - def __init__(self): - super().__init__('SimpleTarget') diff -r a7c444404df9 -r 0374c65cb437 python/ppci/target/arm/__init__.py --- a/python/ppci/target/arm/__init__.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,271 +0,0 @@ - -from ..basetarget import Target, Label -from ..arm.registers import R0, R1, R2, R3, R4, R5, R6, R7 -from ..arm.registers import R8, R9, R10, R11, R12, SP, LR, PC -from ..arm.registers import register_range - -from .instructions import Dcd, Mov, Mov1, Add, Add2, Sub, Orr1, Mul, Mov2 -from .instructions import Add1, Mul1 -from .instructions import Lsr1, Lsl1, And1, Sub1 -from .instructions import B, Bl, Ble, Bgt, Beq, Blt, Cmp, Cmp2 -from .instructions import Push, Pop, Str, Ldr, Ldr3, Str1, Ldr1, Adr -from .instructions import Mcr, Mrc -from .instructions import LdrPseudo -from .selector import ArmInstructionSelector -from .frame import ArmFrame -from ...assembler import BaseAssembler - - -class ArmAssembler(BaseAssembler): - def __init__(self, target): - super().__init__(target) - self.target.add_keyword('section') - self.target.add_instruction(['section', 'ID'], - lambda rhs: self.select_section(rhs[1].val)) - self.target.add_keyword('repeat') - self.target.add_keyword('endrepeat') - self.target.add_instruction(['repeat', 'imm32'], self.begin_repeat) - self.target.add_instruction(['endrepeat'], self.end_repeat) - - # Construct the parser from the given rules: - self.make_parser() - self.lit_pool = [] - self.lit_counter = 0 - self.inMacro = False - - def prepare(self): - self.inMacro = False - - def begin_repeat(self, rhs): - if self.inMacro: - raise Exception() - self.inMacro = True - self.rep_count = rhs[1] - self.recording = [] - - def end_repeat(self, rhs): - if not self.inMacro: - raise Exception() - self.inMacro = False - for rec in self.recording * self.rep_count: - self.emit(*rec) - - def emit(self, *args): - if self.inMacro: - self.recording.append(args) - else: - super().emit(*args) - - def select_section(self, name): - self.flush() - self.stream.select_section(name) - - def flush(self): - if self.inMacro: - raise Exception() - while self.lit_pool: - i = self.lit_pool.pop(0) - self.emit(i) - - def add_literal(self, v): - """ For use in the pseudo instruction LDR r0, =SOMESYM """ - # Invent some label for the literal and store it. - self.lit_counter += 1 - label_name = "_lit_{}".format(self.lit_counter) - self.lit_pool.append(Label(label_name)) - self.lit_pool.append(Dcd(v)) - return label_name - - -class ArmTarget(Target): - def __init__(self): - super().__init__('arm') - self.make_parser() - self.ins_sel = ArmInstructionSelector() - self.FrameClass = ArmFrame - self.assembler = ArmAssembler(self) - - self.add_lowering(Ldr3, lambda im: Ldr3(im.dst[0], im.others[0])) - self.add_lowering(Str1, lambda im: Str1(im.src[1], im.src[0], im.others[0])) - self.add_lowering(Ldr1, lambda im: Ldr1(im.dst[0], im.src[0], im.others[0])) - self.add_lowering(Adr, lambda im: Adr(im.dst[0], im.others[0])) - self.add_lowering(Mov2, lambda im: Mov2(im.dst[0], im.src[0])) - self.add_lowering(Cmp2, lambda im: Cmp2(im.src[0], im.src[1])) - self.add_lowering(Add1, lambda im: Add1(im.dst[0], im.src[0], im.src[1])) - self.add_lowering(Add2, lambda im: Add2(im.dst[0], im.src[0], im.others[0])) - self.add_lowering(Sub1, lambda im: Sub1(im.dst[0], im.src[0], im.src[1])) - self.add_lowering(Mul1, lambda im: Mul1(im.dst[0], im.src[0], im.src[1])) - self.add_lowering(Lsr1, lambda im: Lsr1(im.dst[0], im.src[0], im.src[1])) - self.add_lowering(And1, lambda im: And1(im.dst[0], im.src[0], im.src[1])) - self.add_lowering(Mov1, lambda im: Mov1(im.dst[0], im.others[0])) - - def emit_global(self, outs, lname): - outs.emit(Label(lname)) - outs.emit(Dcd(0)) - - def make_parser(self): - # Assembly grammar: - self.add_keyword('r0') - self.add_keyword('r1') - self.add_keyword('r2') - self.add_keyword('r3') - self.add_keyword('r4') - self.add_keyword('r5') - self.add_keyword('r6') - self.add_keyword('r7') - self.add_keyword('r8') - self.add_keyword('r9') - self.add_keyword('r10') - self.add_keyword('r11') - self.add_keyword('r12') - self.add_keyword('sp') - self.add_keyword('lr') - self.add_keyword('pc') - - self.add_rule('reg', ['r0'], lambda rhs: R0) - self.add_rule('reg', ['r1'], lambda rhs: R1) - self.add_rule('reg', ['r2'], lambda rhs: R2) - self.add_rule('reg', ['r3'], lambda rhs: R3) - self.add_rule('reg', ['r4'], lambda rhs: R4) - self.add_rule('reg', ['r5'], lambda rhs: R5) - self.add_rule('reg', ['r6'], lambda rhs: R6) - self.add_rule('reg', ['r7'], lambda rhs: R7) - self.add_rule('reg', ['r8'], lambda rhs: R8) - self.add_rule('reg', ['r9'], lambda rhs: R9) - self.add_rule('reg', ['r10'], lambda rhs: R10) - self.add_rule('reg', ['r11'], lambda rhs: R11) - self.add_rule('reg', ['r12'], lambda rhs: R12) - self.add_rule('reg', ['sp'], lambda rhs: SP) - self.add_rule('reg', ['lr'], lambda rhs: LR) - self.add_rule('reg', ['pc'], lambda rhs: PC) - - self.add_keyword('dcd') - self.add_instruction(['dcd', 'imm32'], - lambda rhs: Dcd(rhs[1])) - - self.add_keyword('mov') - self.add_instruction(['mov', 'reg', ',', 'imm32'], - lambda rhs: Mov(rhs[1], rhs[3])) - self.add_instruction(['mov', 'reg', ',', 'reg'], - lambda rhs: Mov(rhs[1], rhs[3])) - - self.add_keyword('cmp') - self.add_instruction(['cmp', 'reg', ',', 'imm32'], - lambda rhs: Cmp(rhs[1], rhs[3])) - self.add_instruction(['cmp', 'reg', ',', 'reg'], - lambda rhs: Cmp(rhs[1], rhs[3])) - - # Arithmatic: - self.add_keyword('add') - self.add_instruction(['add', 'reg', ',', 'reg', ',', 'imm32'], - lambda rhs: Add(rhs[1], rhs[3], rhs[5])) - - self.add_instruction(['add', 'reg', ',', 'reg', ',', 'reg'], - lambda rhs: Add(rhs[1], rhs[3], rhs[5])) - - self.add_keyword('sub') - self.add_instruction(['sub', 'reg', ',', 'reg', ',', 'imm32'], - lambda rhs: Sub(rhs[1], rhs[3], rhs[5])) - - self.add_instruction(['sub', 'reg', ',', 'reg', ',', 'reg'], - lambda rhs: Sub(rhs[1], rhs[3], rhs[5])) - - self.add_keyword('mul') - self.add_instruction(['mul', 'reg', ',', 'reg', ',', 'reg'], - lambda rhs: Mul(rhs[1], rhs[3], rhs[5])) - - self.add_keyword('orr') - self.add_instruction(['orr', 'reg', ',', 'reg', ',', 'reg'], - lambda rhs: Orr1(rhs[1], rhs[3], rhs[5])) - - self.add_keyword('and') - self.add_instruction(['and', 'reg', ',', 'reg', ',', 'reg'], - lambda rhs: And1(rhs[1], rhs[3], rhs[5])) - - self.add_keyword('lsr') - self.add_instruction(['lsr', 'reg', ',', 'reg', ',', 'reg'], - lambda rhs: Lsr1(rhs[1], rhs[3], rhs[5])) - - self.add_keyword('lsl') - self.add_instruction(['lsl', 'reg', ',', 'reg', ',', 'reg'], - lambda rhs: Lsl1(rhs[1], rhs[3], rhs[5])) - - - # Jumping: - self.add_keyword('b') - self.add_instruction(['b', 'ID'], lambda rhs: B(rhs[1].val)) - self.add_keyword('ble') - self.add_instruction(['ble', 'ID'], lambda rhs: Ble(rhs[1].val)) - self.add_keyword('bgt') - self.add_instruction(['bgt', 'ID'], lambda rhs: Bgt(rhs[1].val)) - self.add_keyword('beq') - self.add_instruction(['beq', 'ID'], lambda rhs: Beq(rhs[1].val)) - self.add_keyword('blt') - self.add_instruction(['blt', 'ID'], lambda rhs: Blt(rhs[1].val)) - - self.add_keyword('bl') - self.add_instruction(['bl', 'ID'], lambda rhs: Bl(rhs[1].val)) - - # memory: - self.add_keyword('pop') - self.add_instruction(['pop', 'reg_list'], lambda rhs: Pop(rhs[1])) - - self.add_keyword('push') - self.add_instruction(['push', 'reg_list'], lambda rhs: Push(rhs[1])) - - self.add_keyword('ldr') - self.add_instruction(['ldr', 'reg', ',', '[', 'reg', ',', 'imm8', ']'], - lambda rhs: Ldr(rhs[1], rhs[4], rhs[6])) - - self.add_instruction(['ldr', 'reg', ',', 'ID'], - lambda rhs: Ldr(rhs[1], rhs[3].val)) - - # This is a pseudo instruction: - self.add_instruction(['ldr', 'reg', ',', '=', 'ID'], - lambda rhs: LdrPseudo(rhs[1], rhs[4].val, self.assembler.add_literal)) - - self.add_keyword('str') - self.add_instruction(['str', 'reg', ',', '[', 'reg', ',', 'imm8', ']'], - lambda rhs: Str(rhs[1], rhs[4], rhs[6])) - - self.add_instruction(['str', 'reg', ',', '[', 'reg', ',', 'reg', ']'], - lambda rhs: Str(rhs[1], rhs[4], rhs[6])) - - self.add_keyword('adr') - self.add_instruction(['adr', 'reg', ',', 'ID'], - lambda rhs: Adr(rhs[1], rhs[3].val)) - - - # Register list grammar: - self.add_rule('reg_list', ['{', 'reg_list_inner', '}'], - lambda rhs: rhs[1]) - self.add_rule('reg_list_inner', ['reg_or_range'], - lambda rhs: rhs[0]) - self.add_rule('reg_list_inner', ['reg_or_range', ',', 'reg_list_inner'], - lambda rhs: rhs[0] | rhs[2]) - self.add_rule('reg_or_range', ['reg'], lambda rhs: {rhs[0]}) - - self.add_rule('reg_or_range', ['reg', '-', 'reg'], - lambda rhs: register_range(rhs[0], rhs[2])) - - # Add MCR and MRC (co-processor) - for i in range(16): - creg = 'c{}'.format(i) - self.add_keyword(creg) - self.add_rule('coreg', [creg], i) - - for i in range(8, 16): - px = 'p{}'.format(i) - self.add_keyword(px) - # Ran into trouble when using i inside lambda function: - # When using inside lambda (as a closure), i is bound to the latest - # value (15) - self.add_rule('coproc', [px], i) - - self.add_keyword('mcr') - self.add_instruction(['mcr', 'coproc', ',', 'imm3', ',', 'reg', ',', 'coreg', ',', 'coreg', ',', 'imm3'], - lambda rhs: Mcr(rhs[1], rhs[3], rhs[5], rhs[7], rhs[9], rhs[11])) - - self.add_keyword('mrc') - self.add_instruction(['mrc', 'coproc', ',', 'imm3', ',', 'reg', ',', 'coreg', ',', 'coreg', ',', 'imm3'], - lambda rhs: Mrc(rhs[1], rhs[3], rhs[5], rhs[7], rhs[9], rhs[11])) diff -r a7c444404df9 -r 0374c65cb437 python/ppci/target/arm/arm.brg --- a/python/ppci/target/arm/arm.brg Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ - -from ppci.target.arm.instructions import Add1, Add2, Sub1, Mul1 -from ppci.target.arm.instructions import Ldr1, Ldr3, Adr -from ppci.target.arm.instructions import And1, Lsr1, Lsl1, Mov1 -from ppci.target.basetarget import LabelAddress - -%% - -%terminal ADDI32 SUBI32 MULI32 ADR -%terminal ORI32 SHLI32 SHRI32 ANDI32 -%terminal CONSTI32 CONSTDATA MEMI32 REGI32 CALL GLOBALADDRESS -%terminal MOVI32 - -%% - -reg: ADDI32(reg, reg) 2 'd = self.newTmp(); self.emit(Add1, dst=[d], src=[c0, c1]); return d' -reg: ADDI32(reg, cn) 2 'return tree.children[1].value < 256' 'd = self.newTmp(); self.emit(Add2, dst=[d], src=[c0], others=[c1]); return d' -reg: ADDI32(cn, reg) 2 'return tree.children[0].value < 256' 'd = self.newTmp(); self.emit(Add2, dst=[d], src=[c1], others=[c0]); return d' -reg: SUBI32(reg, reg) 2 'd = self.newTmp(); self.emit(Sub1, dst=[d], src=[c0, c1]); return d' -reg: MULI32(reg, reg) 2 'd = self.newTmp(); self.emit(Mul1, dst=[d], src=[c0, c1]); return d' -reg: ANDI32(reg, reg) 2 'd = self.newTmp(); self.emit(And1, dst=[d], src=[c0, c1]); return d' -reg: SHRI32(reg, reg) 2 'd = self.newTmp(); self.emit(Lsr1, dst=[d], src=[c0, c1]); return d' - -reg: MEMI32(ADDI32(reg, cn)) 2 'd = self.newTmp(); self.emit(Ldr1, dst=[d], src=[c0], others=[c1]); return d' -reg: MEMI32(reg) 2 'd = self.newTmp(); self.emit(Ldr1, dst=[d], src=[c0], others=[0]); return d' -reg: GLOBALADDRESS 21 'd = self.newTmp(); ln = self.selector.frame.add_constant(LabelAddress(tree.value)); self.emit(Ldr3, dst=[d], others=[ln]); return d' - -cn: CONSTI32 0 'return tree.value' - -reg: CONSTI32 6 'd = self.newTmp(); ln = self.selector.frame.add_constant(tree.value); self.emit(Ldr3, dst=[d], others=[ln]); return d' - -reg: CONSTI32 2 'return (type(tree.value) is int) and (tree.value < 256)' 'd = self.newTmp(); self.emit(Mov1, dst=[d], others=[tree.value]); return d' - -reg: ADR(CONSTDATA) 2 'd = self.newTmp(); ln = self.selector.frame.add_constant(tree.children[0].value); self.emit(Adr, dst=[d], others=[ln]); return d' - -reg: REGI32 1 'return tree.value' - -reg: CALL 1 'return self.selector.munchCall(tree.value)' - diff -r a7c444404df9 -r 0374c65cb437 python/ppci/target/arm/frame.py --- a/python/ppci/target/arm/frame.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,108 +0,0 @@ -from ... import ir -from ..basetarget import Label, Alignment, LabelAddress -from ...irmach import AbstractInstruction, Frame -from .instructions import Dcd, Add, Sub, Push, Pop, Mov, Db -from .registers import R0, R1, R2, R3, R4, R5, R6, R7, R8, R11, LR, PC, SP - - -class ArmFrame(Frame): - """ Arm specific frame for functions. """ - def __init__(self, name): - # We use r7 as frame pointer. - super().__init__(name) - self.regs = [R0, R1, R2, R3, R4, R5, R6, R7, R8] - self.rv = ir.Temp('special_RV') - self.p1 = ir.Temp('special_P1') - self.p2 = ir.Temp('special_P2') - self.p3 = ir.Temp('special_P3') - self.p4 = ir.Temp('special_P4') - self.fp = ir.Temp('special_FP') - # Pre-colored registers: - self.tempMap = {} - self.tempMap[self.rv] = R0 - self.tempMap[self.p1] = R1 - self.tempMap[self.p2] = R2 - self.tempMap[self.p3] = R3 - self.tempMap[self.p4] = R4 - self.tempMap[self.fp] = R11 - self.locVars = {} - self.parMap = {} - # Literal pool: - self.constants = [] - - def argLoc(self, pos): - """ - Gets the function parameter location in IR-code format. - """ - if pos == 0: - return self.p1 - elif pos == 1: - return self.p2 - elif pos == 2: - return self.p3 - elif pos == 3: - return self.p4 - else: - raise NotImplementedError('No more than 4 parameters implemented') - - def allocVar(self, lvar): - if lvar not in self.locVars: - self.locVars[lvar] = self.stacksize - self.stacksize = self.stacksize + 4 - return self.locVars[lvar] - - def add_constant(self, value): - assert type(value) in [int, bytes, LabelAddress] - lab_name = '{}_literal_{}'.format(self.name, len(self.constants)) - self.constants.append((lab_name, value)) - return lab_name - - def prologue(self): - """ Returns prologue instruction sequence """ - pre = [ - Label(self.name), # Label indication function - Push({LR, R11}) - ] - if self.stacksize > 0: - pre.append(Sub(SP, SP, self.stacksize)) # Reserve stack space - pre += [ - Mov(R11, SP) # Setup frame pointer - ] - return pre - - def epilogue(self): - """ Return epilogue sequence for a frame. Adjust frame pointer and add constant pool """ - post = [] - if self.stacksize > 0: - post.append(Add(SP, SP, self.stacksize)) - post += [ - Pop({PC, R11}), - Alignment(4) # Align at 4 bytes - ] - - # Add constant literals: - for ln, v in self.constants: - if isinstance(v, int): - post.extend([Label(ln), Dcd(v)]) - elif isinstance(v, LabelAddress): - post.extend([Label(ln), Dcd(v)]) - elif isinstance(v, bytes): - post.append(Label(ln)) - for c in v: - post.append(Db(c)) - post.append(Alignment(4)) # Align at 4 bytes - else: - raise Exception('Constant of type {} not supported'.format(v)) - return post - - def EntryExitGlue3(self): - """ - Add code for the prologue and the epilogue. Add a label, the - return instruction and the stack pointer adjustment for the frame. - """ - for index, ins in enumerate(self.prologue()): - self.instructions.insert(index, AbstractInstruction(ins)) - - # Postfix code: - for ins in self.epilogue(): - self.instructions.append(AbstractInstruction(ins)) diff -r a7c444404df9 -r 0374c65cb437 python/ppci/target/arm/instructions.py --- a/python/ppci/target/arm/instructions.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,537 +0,0 @@ - -from ..basetarget import Instruction, LabelAddress -from ...bitfun import encode_imm32 - -from .token import ArmToken -from .registers import R0, SP, ArmRegister - - - -# Instructions: - -class ArmInstruction(Instruction): - def __init__(self): - self.token = ArmToken() - - -class ConstantData(ArmInstruction): - def __init__(self, v): - super().__init__() - assert isinstance(v, int) - self.v = v - - -class Dcd(ArmInstruction): - def __init__(self, v): - super().__init__() - assert isinstance(v, int) or isinstance(v, LabelAddress) - self.v = v - - def encode(self): - if type(self.v) is int: - self.token[0:32] = self.v - else: - self.token[0:32] = 0 - return self.token.encode() - - def relocations(self): - if type(self.v) is LabelAddress: - return [(self.v.name, 'absaddr32')] - return [] - - def __repr__(self): - if type(self.v) is int: - return 'DCD {}'.format(hex(self.v)) - else: - return 'DCD ={}'.format(self.v.name) - - -class Db(ConstantData): - def encode(self): - assert self.v < 256 - return bytes([self.v]) - - def __repr__(self): - return 'DB {}'.format(hex(self.v)) - - -def Mov(*args): - if len(args) == 2: - if isinstance(args[1], int): - return Mov1(*args) - elif isinstance(args[1], ArmRegister): - return Mov2(*args) - raise Exception() - - -class Mov1(ArmInstruction): - """ Mov Rd, imm16 """ - def __init__(self, reg, imm): - super().__init__() - assert type(imm) is int - self.reg = reg - self.imm = imm - - def encode(self): - self.token[0:12] = encode_imm32(self.imm) - self.token.Rd = self.reg.num - self.token[16:20] = 0 - self.token[20] = 0 # Set flags - self.token[21:28] = 0b0011101 - self.token.cond = AL - return self.token.encode() - - def __repr__(self): - return 'Mov {}, {}'.format(self.reg, self.imm) - - -class Mov2(ArmInstruction): - def __init__(self, rd, rm): - super().__init__() - self.rd = rd - self.rm = rm - - def encode(self): - self.token[0:4] = self.rm.num - self.token[4:12] = 0 - self.token[12:16] = self.rd.num - self.token[16:20] = 0 - self.token.S = 0 - self.token[21:28] = 0xD - self.token.cond = AL - return self.token.encode() - - def __repr__(self): - return 'MOV {}, {}'.format(self.rd, self.rm) - - -def Cmp(*args): - if len(args) == 2: - if isinstance(args[1], int): - return Cmp1(*args) - elif isinstance(args[1], ArmRegister): - return Cmp2(*args) - raise Exception() - - -class Cmp1(ArmInstruction): - """ CMP Rn, imm """ - def __init__(self, reg, imm): - super().__init__() - assert type(imm) is int - self.reg = reg - self.imm = imm - - def encode(self): - self.token[0:12] = encode_imm32(self.imm) - self.token.Rn = self.reg.num - self.token[20:28] = 0b00110101 - self.token.cond = AL - return self.token.encode() - - def __repr__(self): - return 'CMP {}, {}'.format(self.reg, self.imm) - - -class Cmp2(ArmInstruction): - """ CMP Rn, Rm """ - def __init__(self, rn, rm): - super().__init__() - self.rn = rn - self.rm = rm - - def encode(self): - self.token.Rn = self.rn.num - self.token.Rm = self.rm.num - self.token[7:16] = 0 - self.token[20:28] = 0b10101 - self.token.cond = AL - return self.token.encode() - - def __repr__(self): - return 'CMP {}, {}'.format(self.rn, self.rm) - - -def Add(*args): - if len(args) == 3 and isinstance(args[0], ArmRegister) and \ - isinstance(args[1], ArmRegister): - if isinstance(args[2], ArmRegister): - return Add1(args[0], args[1], args[2]) - elif isinstance(args[2], int): - return Add2(args[0], args[1], args[2]) - raise Exception() - -def Sub(*args): - if len(args) == 3 and isinstance(args[0], ArmRegister) and \ - isinstance(args[1], ArmRegister): - if isinstance(args[2], ArmRegister): - return Sub1(args[0], args[1], args[2]) - elif isinstance(args[2], int): - return Sub2(args[0], args[1], args[2]) - raise Exception() - - -def Mul(*args): - return Mul1(args[0], args[1], args[2]) - - -class Mul1(ArmInstruction): - def __init__(self, rd, rn, rm): - super().__init__() - self.rd = rd - self.rn = rn - self.rm = rm - - def encode(self): - self.token[0:4] = self.rn.num - self.token[4:8] = 0b1001 - self.token[8:12] = self.rm.num - self.token[16:20] = self.rd.num - self.token.S = 0 - self.token.cond = AL - return self.token.encode() - - -class OpRegRegReg(ArmInstruction): - """ add rd, rn, rm """ - def __init__(self, rd, rn, rm, shift=0): - super().__init__() - self.rd = rd - self.rn = rn - self.rm = rm - - def encode(self): - self.token[0:4] = self.rm.num - self.token[4] = 0 - self.token[5:7] = 0 - self.token[7:12] = 0 # Shift - self.token.Rd = self.rd.num - self.token.Rn = self.rn.num - self.token.S = 0 # Set flags - self.token[21:28] = self.opcode - self.token.cond = 0xE # Always! - return self.token.encode() - - def __repr__(self): - return '{} {}, {}, {}'.format(self.mnemonic, self.rd, self.rn, self.rm) - - -class Add1(OpRegRegReg): - mnemonic = 'ADD' - opcode = 0b0000100 - - -class Sub1(OpRegRegReg): - mnemonic = 'SUB' - opcode = 0b0000010 - - -class Orr1(OpRegRegReg): - mnemonic = 'ORR' - opcode = 0b0001100 - - -class And1(OpRegRegReg): - mnemonic = 'AND' - opcode = 0b0000000 - - -class ShiftBase(ArmInstruction): - """ ? rd, rn, rm """ - def __init__(self, rd, rn, rm): - super().__init__() - self.rd = rd - self.rn = rn - self.rm = rm - - def encode(self): - self.token[0:4] = self.rn.num - self.token[4:8] = self.opcode - self.token[8:12] = self.rm.num - self.token[12:16] = self.rd.num - self.token.S = 0 # Set flags - self.token[21:28] = 0b1101 - self.token.cond = 0xE # Always! - return self.token.encode() - - def __repr__(self): - return '{} {}, {}, {}'.format(self.mnemonic, self.rd, self.rn, self.rm) - - -class Lsr1(ShiftBase): - mnemonic = 'LSR' - opcode = 0b0011 - - -class Lsl1(ShiftBase): - mnemonic = 'LSL' - opcode = 0b0001 - - -class OpRegRegImm(ArmInstruction): - """ add rd, rn, imm12 """ - def __init__(self, rd, rn, imm): - super().__init__() - self.rd = rd - self.rn = rn - self.imm2 = encode_imm32(imm) - self.imm = imm - - def encode(self): - self.token[0:12] = self.imm2 - self.token.Rd = self.rd.num - self.token.Rn = self.rn.num - self.token.S = 0 # Set flags - self.token[21:28] = self.opcode - self.token.cond = 0xE # Always! - return self.token.encode() - - def __repr__(self): - return '{} {}, {}, {}'.format(self.mnemonic, self.rd, self.rn, self.imm) - - -class Add2(OpRegRegImm): - mnemonic = 'ADD' - opcode = 0b0010100 - - -class Sub2(OpRegRegImm): - mnemonic = 'SUB' - opcode = 0b0010010 - - - -# Branches: - -class BranchBaseRoot(ArmInstruction): - def __init__(self, target): - super().__init__() - self.target = target - - def encode(self): - self.token.cond = self.cond - self.token[24:28] = self.opcode - return self.token.encode() - - def relocations(self): - return [(self.target, 'b_imm24')] - - def __repr__(self): - mnemonic = self.__class__.__name__ - return '{} {}'.format(mnemonic, self.target) - - -EQ, NE, CS, CC, MI, PL, VS, VC, HI, LS, GE, LT, GT, LE, AL = range(15) - -class BranchBase(BranchBaseRoot): - opcode = 0b1010 - -class BranchLinkBase(BranchBaseRoot): - opcode = 0b1011 - -class Bl(BranchLinkBase): - cond = AL - -class B(BranchBase): - cond = AL - -class Beq(BranchBase): - cond = EQ - -class Bgt(BranchBase): - cond = GT - -class Bge(BranchBase): - cond = GE - -class Ble(BranchBase): - cond = LE - -class Blt(BranchBase): - cond = LT - -class Bne(BranchBase): - cond = NE - -# Memory: - -def reg_list_to_mask(reg_list): - mask = 0 - for reg in reg_list: - mask |= (1 << reg.num) - return mask - - -class Push(ArmInstruction): - def __init__(self, register_set): - super().__init__() - self.reg_list = register_set - - def encode(self): - self.token.cond = AL - self.token[16:28] = 0b100100101101 - reg_list = 0 - self.token[0:16] = reg_list_to_mask(self.reg_list) - return self.token.encode() - - def __repr__(self): - return 'PUSH {}'.format(self.reg_list) - - -class Pop(ArmInstruction): - def __init__(self, register_set): - super().__init__() - self.reg_list = register_set - - def encode(self): - self.token.cond = AL - self.token[16:28] = 0b100010111101 - self.token[0:16] = reg_list_to_mask(self.reg_list) - return self.token.encode() - - def __repr__(self): - return 'POP {}'.format(self.reg_list) - - -def Ldr(*args): - """ Convenience function that creates the correct instruction """ - if len(args) == 3: - if isinstance(args[1], ArmRegister): - return Ldr1(*args) - elif len(args) == 2: - if isinstance(args[1], ArmRegister): - return Ldr1(args[0], args[1], 0) - elif isinstance(args[1], str): - return Ldr3(*args) - raise Exception() - - -def LdrPseudo(rt, lab, add_lit): - """ Ldr rt, =lab ==> ldr rt, [pc, offset in litpool] ... dcd lab """ - lit_lbl = add_lit(LabelAddress(lab)) - return Ldr(rt, lit_lbl) - -def Str(*args): - if len(args) == 3 and isinstance(args[1], ArmRegister): - return Str1(*args) - elif len(args) == 2 and isinstance(args[1], ArmRegister): - return Str1(args[0], args[1], 0) - raise Exception() - - -class LdrStrBase(ArmInstruction): - def __init__(self, rt, rn, offset): - super().__init__() - self.rt = rt - self.rn = rn - self.offset = offset - - def encode(self): - self.token.cond = AL - self.token.Rn = self.rn.num - self.token[25:28] = self.opcode - self.token[20] = self.bit20 - self.token[12:16] = self.rt.num - self.token[24] = 1 # Index - if self.offset >= 0: - self.token[23] = 1 # U == 1 'add' - self.token[0:12] = self.offset - else: - self.token[23] = 0 - self.token[0:12] = -self.offset - return self.token.encode() - - def __repr__(self): - return '{} {}, [{}, {}]'.format(self.mnemonic, self.rt, self.rn, - hex(self.offset)) - - -class Str1(LdrStrBase): - opcode = 0b010 - bit20 = 0 - mnemonic = 'STR' - - -class Ldr1(LdrStrBase): - opcode = 0b010 - bit20 = 1 - mnemonic = 'LDR' - - -class Adr(ArmInstruction): - def __init__(self, rd, label): - super().__init__() - self.rd = rd - self.label = label - - def __repr__(self): - return 'ADR {}, {}'.format(self.rd, self.label) - - def relocations(self): - return [(self.label, 'adr_imm12')] - - def encode(self): - self.token.cond = AL - self.token[0:12] = 0 # Filled by linker - self.token[12:16] = self.rd.num - self.token[16:20] = 0b1111 - self.token[25] = 1 - return self.token.encode() - - -class Ldr3(ArmInstruction): - """ Load PC relative constant value - LDR rt, label - encoding A1 - """ - def __init__(self, rt, label): - super().__init__() - self.rt = rt - self.label = label - - def __repr__(self): - return 'LDR {}, {}'.format(self.rt, self.label) - - def relocations(self): - return [(self.label, 'ldr_imm12')] - - def encode(self): - self.token.cond = AL - self.token[0:12] = 0 # Filled by linker - self.token[12:16] = self.rt.num - self.token[16:23] = 0b0011111 - self.token[24:28] = 0b0101 - return self.token.encode() - - -class McrBase(ArmInstruction): - """ Mov arm register to coprocessor register """ - def __init__(self, coproc, opc1, rt, crn, crm, opc2): - super().__init__() - self.coproc = coproc - self.opc1 = opc1 - self.rt = rt - self.crn = crn - self.crm = crm - self.opc2 = opc2 - - def encode(self): - self.token[0:4] = self.crm - self.token[4] = 1 - self.token[5:8] = self.opc2 - self.token[8:12] = self.coproc - self.token[12:16] = self.rt.num - self.token[16:20] = self.crn - self.token[20] = self.b20 - self.token[21:24] = self.opc1 - self.token[24:28] = 0b1110 - self.token.cond = AL - return self.token.encode() - - -class Mcr(McrBase): - b20 = 0 - - -class Mrc(McrBase): - b20 = 1 diff -r a7c444404df9 -r 0374c65cb437 python/ppci/target/arm/registers.py --- a/python/ppci/target/arm/registers.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,49 +0,0 @@ - -from ..basetarget import Register - -class ArmRegister(Register): - def __init__(self, num, name): - super().__init__(name) - self.num = num - - def __repr__(self): - return self.name - - -class Reg8Op(ArmRegister): - pass - - -def get_register(n): - for x in registers: - if x.num == n: - return x - raise Exception('No register found with this number') - -def register_range(a, b): - """ Return set of registers from a to b """ - assert a.num < b.num - return {get_register(n) for n in range(a.num, b.num + 1)} - - -R0 = Reg8Op(0, 'r0') -R1 = Reg8Op(1, 'r1') -R2 = Reg8Op(2, 'r2') -R3 = Reg8Op(3, 'r3') -R4 = Reg8Op(4, 'r4') -R5 = Reg8Op(5, 'r5') -R6 = Reg8Op(6, 'r6') -R7 = Reg8Op(7, 'r7') -R8 = ArmRegister(8, 'r8') -R9 = ArmRegister(9, 'r9') -R10 = ArmRegister(10, 'r10') -R11 = ArmRegister(11, 'r11') -R12 = ArmRegister(12, 'r12') - -# Other registers: -# TODO -SP = ArmRegister(13, 'sp') -LR = ArmRegister(14, 'lr') -PC = ArmRegister(15, 'pc') - -registers = [R0, R1, R2, R3, R4, R5, R6, R7, SP, LR, PC] diff -r a7c444404df9 -r 0374c65cb437 python/ppci/target/arm/selector.py --- a/python/ppci/target/arm/selector.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,90 +0,0 @@ -from ... import ir, same_dir -from ppci.irmach import AbstractInstruction as makeIns -from ppci.ir2tree import makeTree -from .instructions import Str1, Mov2 -from .instructions import B, Bl, Blt, Bgt, Beq, Bne, Cmp2, Ble, Bge -import pyburg -from ..basetarget import Nop -from ..instructionselector import InstructionSelector - -# Import BURG spec for arm: -spec_file = same_dir(__file__, 'arm.brg') -arm_matcher = pyburg.load_as_module(spec_file) - - -class ArmMatcher(arm_matcher.Matcher): - """ Matcher that derives from a burg spec generated matcher """ - def __init__(self, selector): - super().__init__() - self.newTmp = selector.newTmp - self.emit = selector.emit - self.selector = selector - - -class ArmInstructionSelector(InstructionSelector): - """ Instruction selector for the arm architecture """ - def __init__(self): - super().__init__() - self.matcher = ArmMatcher(self) - - def munchExpr(self, e): - # Use BURG system here: - t = makeTree(e) - return self.matcher.gen(t) - - def munchCall(self, e): - """ Generate code for call sequence """ - # Move arguments into proper locations: - reguses = [] - for i, a in enumerate(e.arguments): - loc = self.frame.argLoc(i) - m = ir.Move(loc, a) - self.munchStm(m) - if isinstance(loc, ir.Temp): - reguses.append(loc) - self.emit(Bl(e.f), src=reguses, dst=[self.frame.rv]) - d = self.newTmp() - self.move(d, self.frame.rv) - return d - - def munchStm(self, s): - if isinstance(s, ir.Terminator): - pass - elif isinstance(s, ir.Move) and isinstance(s.dst, ir.Mem) and \ - isinstance(s.dst.e, ir.Binop) and s.dst.e.operation == '+' and \ - isinstance(s.dst.e.b, ir.Const): - a = self.munchExpr(s.dst.e.a) - val = self.munchExpr(s.src) - c = s.dst.e.b.value - self.emit(Str1, others=[c], src=[a, val]) - elif isinstance(s, ir.Move) and isinstance(s.dst, ir.Mem): - memloc = self.munchExpr(s.dst.e) - val = self.munchExpr(s.src) - self.emit(Str1, others=[0], src=[memloc, val]) - elif isinstance(s, ir.Move) and isinstance(s.dst, ir.Temp): - val = self.munchExpr(s.src) - dreg = s.dst - self.move(dreg, val) - elif isinstance(s, ir.Exp): - # Generate expression code and discard the result. - x = self.munchExpr(s.e) - self.emit(Nop(), src=[x]) - elif isinstance(s, ir.Jump): - tgt = self.targets[s.target] - self.emit(B(ir.label_name(s.target)), jumps=[tgt]) - elif isinstance(s, ir.CJump): - a = self.munchExpr(s.a) - b = self.munchExpr(s.b) - self.emit(Cmp2, src=[a, b]) - ntgt = self.targets[s.lab_no] - ytgt = self.targets[s.lab_yes] - jmp_ins = makeIns(B(ir.label_name(s.lab_no)), jumps=[ntgt]) - opnames = {'<': Blt, '>':Bgt, '==':Beq, '!=':Bne, '>=':Bge} - op = opnames[s.cond](ir.label_name(s.lab_yes)) - self.emit(op, jumps=[ytgt, jmp_ins]) # Explicitely add fallthrough - self.emit2(jmp_ins) - else: - raise NotImplementedError('Stmt --> {}'.format(s)) - - def move(self, dst, src): - self.emit(Mov2, src=[src], dst=[dst], ismove=True) diff -r a7c444404df9 -r 0374c65cb437 python/ppci/target/arm/token.py --- a/python/ppci/target/arm/token.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,16 +0,0 @@ - -from ..token import Token, u32, bit_range - - -class ArmToken(Token): - def __init__(self): - super().__init__(32) - - cond = bit_range(28, 32) - S = bit_range(20, 21) - Rd = bit_range(12, 16) - Rn = bit_range(16, 20) - Rm = bit_range(0, 4) - - def encode(self): - return u32(self.bit_value) diff -r a7c444404df9 -r 0374c65cb437 python/ppci/target/basetarget.py --- a/python/ppci/target/basetarget.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,284 +0,0 @@ -import types -from ppci import CompilerError -from ..bitfun import encode_imm32 -import struct - -""" - Base classes for defining a target -""" - -class Instruction: - """ Base instruction class """ - def encode(self): - return bytes() - - def relocations(self): - return [] - - def symbols(self): - return [] - - def literals(self, add_literal): - pass - - -class Nop(Instruction): - """ Instruction that does nothing and has zero size """ - def encode(self): - return bytes() - - def __repr__(self): - return 'NOP' - - -class PseudoInstruction(Instruction): - pass - - -class Label(PseudoInstruction): - def __init__(self, name): - self.name = name - - def __repr__(self): - return '{}:'.format(self.name) - - def symbols(self): - return [self.name] - - -class Comment(PseudoInstruction): - def __init__(self, txt): - self.txt = txt - - def encode(self): - return bytes() - - def __repr__(self): - return '; {}'.format(self.txt) - - -class Alignment(PseudoInstruction): - def __init__(self, a): - self.align = a - - def __repr__(self): - return 'ALIGN({})'.format(self.align) - - def encode(self): - pad = [] - # TODO - address = 0 - while (address % self.align) != 0: - address += 1 - pad.append(0) - return bytes(pad) - - -class Register: - def __init__(self, name): - self.name = name - - def __gt__(self, other): - return self.num > other.num - - -class LabelAddress: - def __init__(self, name): - self.name = name - - -class Target: - def __init__(self, name, desc=''): - self.name = name - self.desc = desc - self.registers = [] - self.byte_sizes = {'int' : 4} # For front end! - self.byte_sizes['byte'] = 1 - - # For lowering: - self.lower_functions = {} - - # For assembler: - self.assembler_rules = [] - self.asm_keywords = [] - - self.generate_base_rules() - self.reloc_map = reloc_map # TODO: make this target specific. - - def generate_base_rules(self): - # Base rules for constants: - self.add_rule('imm32', ['val32'], lambda x: x[0].val) - self.add_rule('imm32', ['imm16'], lambda x: x[0]) - - self.add_rule('imm16', ['val16'], lambda x: x[0].val) - self.add_rule('imm16', ['imm12'], lambda x: x[0]) - - self.add_rule('imm12', ['val12'], lambda x: x[0].val) - self.add_rule('imm12', ['imm8'], lambda x: x[0]) - - self.add_rule('imm8', ['val8'], lambda x: x[0].val) - self.add_rule('imm8', ['imm5'], lambda x: x[0]) - - self.add_rule('imm5', ['val5'], lambda x: x[0].val) - self.add_rule('imm5', ['imm3'], lambda x: x[0]) - - self.add_rule('imm3', ['val3'], lambda x: x[0].val) - - def add_keyword(self, kw): - self.asm_keywords.append(kw) - - def add_instruction(self, rhs, f): - self.add_rule('instruction', rhs, f) - - def add_rule(self, lhs, rhs, f): - if type(f) is int: - f2 = lambda x: f - else: - f2 = f - assert type(f2) in [types.FunctionType, types.MethodType] - self.assembler_rules.append((lhs, rhs, f2)) - - def lower_frame_to_stream(self, frame, outs): - """ Lower instructions from frame to output stream """ - for im in frame.instructions: - if isinstance(im.assem, Instruction): - outs.emit(im.assem) - else: - # TODO assert isinstance(Abs - ins = self.lower_functions[im.assem](im) - outs.emit(ins) - - def add_lowering(self, cls, f): - """ Add a function to the table of lowering options for this target """ - self.lower_functions[cls] = f - - def add_reloc(self, name, f): - self.reloc_map[name] = f - - - -def align(x, m): - while ((x % m) != 0): - x = x + 1 - return x - -def wrap_negative(x, bits): - b = struct.unpack('> 2 - section.data[reloc.offset] = rel8 - - -@reloc('wrap_new11') -def apply_wrap_new11(reloc, sym_value, section, reloc_value): - offset = sym_value - (align(reloc_value, 2) + 4) - assert offset in range(-2048, 2046, 2) - imm11 = wrap_negative(offset >> 1, 11) - section.data[reloc.offset] = (imm11 & 0xff) - section.data[reloc.offset + 1] |= (imm11 >> 8) & 0x7 - - -@reloc('rel8') -def apply_rel8(reloc, sym_value, section, reloc_value): - assert sym_value % 2 == 0 - offset = sym_value - (align(reloc_value, 2) + 4) - assert offset in range(-256, 254, 2), str(offset) + str(reloc) - imm8 = wrap_negative(offset >> 1, 8) - section.data[reloc.offset] = imm8 - - -@reloc('bl_imm11_imm10') -def apply_bl_imm11(reloc, sym_value, section, reloc_value): - assert sym_value % 2 == 0 - offset = sym_value - (align(reloc_value, 2) + 4) - assert offset in range(-16777216, 16777214, 2), str(offset) - imm32 = wrap_negative(offset >> 1, 32) - imm11 = imm32 & 0x7FF - imm10 = (imm32 >> 11) & 0x3FF - s = (imm32 >> 24) & 0x1 - section.data[reloc.offset + 2] = imm11 & 0xFF - section.data[reloc.offset + 3] |= (imm11 >> 8) & 0x7 - section.data[reloc.offset] = imm10 & 0xff - section.data[reloc.offset + 1] |= ((imm10 >> 8) & 0x3) | (s << 2) - -@reloc('b_imm11_imm6') -def apply_b_imm11_imm6(reloc, sym_value, section, reloc_value): - assert sym_value % 2 == 0 - offset = sym_value - (align(reloc_value, 2) + 4) - assert offset in range(-1048576, 1048574, 2), str(offset) - imm32 = wrap_negative(offset >> 1, 32) - imm11 = imm32 & 0x7FF - imm6 = (imm32 >> 11) & 0x3F - s = (imm32 >> 24) & 0x1 - section.data[reloc.offset + 2] = imm11 & 0xFF - section.data[reloc.offset + 3] |= (imm11 >> 8) & 0x7 - section.data[reloc.offset] |= imm6 - section.data[reloc.offset + 1] |= (s << 2) - -# ARM reloc!! -# TODO: move to target classes??? -@reloc('b_imm24') -def apply_b_imm24(reloc, sym_value, section, reloc_value): - assert sym_value % 4 == 0 - assert reloc_value % 4 == 0 - offset = (sym_value - (reloc_value + 8)) - rel24 = wrap_negative(offset >> 2, 24) - section.data[reloc.offset+2] = (rel24 >> 16) & 0xFF - section.data[reloc.offset+1] = (rel24 >> 8) & 0xFF - section.data[reloc.offset+0] = rel24 & 0xFF - - -@reloc('ldr_imm12') -def apply_ldr_imm12(reloc, sym_value, section, reloc_value): - assert sym_value % 4 == 0 - assert reloc_value % 4 == 0 - offset = (sym_value - (reloc_value + 8)) - U = 1 - if offset < 0: - offset = -offset - U = 0 - assert offset < 4096, str(sym) + str(section) + str(reloc) - section.data[reloc.offset+2] |= (U << 7) - section.data[reloc.offset+1] |= (offset >> 8) & 0xF - section.data[reloc.offset+0] = offset & 0xFF - -@reloc('adr_imm12') -def apply_adr_imm12(reloc, sym_value, section, reloc_value): - assert sym_value % 4 == 0 - assert reloc_value % 4 == 0 - offset = (sym_value - (reloc_value + 8)) - U = 2 - if offset < 0: - offset = -offset - U = 1 - assert offset < 4096 - offset = encode_imm32(offset) - section.data[reloc.offset+2] |= (U << 6) - section.data[reloc.offset+1] |= (offset >> 8) & 0xF - section.data[reloc.offset+0] = offset & 0xFF - -@reloc('absaddr32') -def apply_absaddr32(reloc, sym_value, section, reloc_value): - assert sym_value % 4 == 0 - assert reloc_value % 4 == 0 - offset = sym_value - section.data[reloc.offset+3] = (offset >> 24) & 0xFF - section.data[reloc.offset+2] = (offset >> 16) & 0xFF - section.data[reloc.offset+1] = (offset >> 8) & 0xFF - section.data[reloc.offset+0] = offset & 0xFF diff -r a7c444404df9 -r 0374c65cb437 python/ppci/target/instructionselector.py --- a/python/ppci/target/instructionselector.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,59 +0,0 @@ -from ppci import ir -from ppci import irmach -from ppci.irmach import AbstractInstruction as makeIns -from .basetarget import Label - -def genTemps(): - n = 900 - while True: - yield ir.Temp('t{}'.format(n)) - n = n + 1 - - -class InstructionSelector: - """ - Base instruction selector. This class must be overridden by - backends. - """ - def __init__(self): - self.temps = genTemps() - - def newTmp(self): - return self.temps.__next__() - - def munchFunction(self, f, frame): - # Entry point for instruction selection - assert isinstance(f, ir.Function) - self.targets = {} - # Enter a frame per function: - self.frame = frame - # First define labels: - for bb in f.Blocks: - itgt = makeIns(Label(ir.label_name(bb))) - self.targets[bb] = itgt - # Generate code for all blocks: - for bb in f.Blocks: - self.emit2(self.targets[bb]) - for i in bb.Instructions: - self.munchStm(i) - self.munchStm(ir.Move(self.frame.rv, f.return_value)) - - def move(self, dst, src): - raise NotImplementedError('Not target implemented') - - def emit(self, *args, **kwargs): - """ Abstract instruction emitter """ - i = makeIns(*args, **kwargs) - return self.emit2(i) - - def emit2(self, i): - self.frame.instructions.append(i) - return i - - def munchStm(self, s): - """ Implement this in the target specific back-end """ - raise NotImplementedError() - - def munchExpr(self, e): - """ Implement this in the target specific back-end """ - raise NotImplementedError() diff -r a7c444404df9 -r 0374c65cb437 python/ppci/target/msp430/__init__.py --- a/python/ppci/target/msp430/__init__.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,3 +0,0 @@ - -# Stub to make this a package - diff -r a7c444404df9 -r 0374c65cb437 python/ppci/target/msp430/instructions.py --- a/python/ppci/target/msp430/instructions.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,211 +0,0 @@ - -from ..basetarget import Register, Instruction, Target -from ..token import Token, u16, bit_range -from .registers import Msp430Register - - -class Msp430Token(Token): - def __init__(self): - super().__init__(16) - - condition = bit_range(10, 13) - opcode = bit_range(12, 16) - register = bit_range(0, 4) - destination = bit_range(0, 4) - source = bit_range(8, 12) - bw = bit_range(6, 7) # TODO: actually a single bit! - Ad = bit_range(7, 8) # TODO: actually a single bit! - As = bit_range(4, 6) - - def encode(self): - return u16(self.bit_value) - -REGISTER_MODE = 1 -SYMBOLIC_MODE = 3 -ABSOLUTE_MODE = 4 -#TODO: add more modes! -IMMEDIATE_MODE = 7 - -class Msp430Operand: - pass - -class Msp430DestinationOperand(Msp430Operand): - def __init__(self, param): - if isinstance(param, Msp430Register): - self.reg = param.num - self.Ad = 0 - else: - raise Exception() - - -class Msp430SourceOperand(Msp430Operand): - def __init__(self, param): - if isinstance(param, Msp430Register): - self.reg = param.num - self.As = 0 - self.extra_bytes = bytes() - elif isinstance(param, int): - self.reg = 0 - self.As = 3 - self.extra_bytes = u16(param) - else: - raise Exception() - - -class Msp430Instruction(Instruction): - b = 0 - def __init__(self): - self.token = Msp430Token() - - -class Reti(Msp430Instruction): - def encode(self): - self.token[0:16] = 0x1300 - return self.token.encode() - - -######################### -# Jump instructions: -######################### - -class JumpInstruction(Msp430Instruction): - def __init__(self, target): - super().__init__() - self.target = target - - def encode(self): - self.token.condition = self.condition - self.token.offset = 0 - self.token[13] = 1 - return self.token.encode() - - def relocations(self): - return [(self.target, 'msp_reloc')] - - -class Jnz(JumpInstruction): - condition = 0 - - -class Jz(JumpInstruction): - condition = 1 - - -class Jnc(JumpInstruction): - condition = 2 - - -class Jc(JumpInstruction): - condition = 3 - - -class Jn(JumpInstruction): - condition = 4 - - -class Jge(JumpInstruction): - condition = 5 - - -class Jl(JumpInstruction): - condition = 6 - - -class Jmp(JumpInstruction): - condition = 7 - - -######################### -# Single operand arithmatic: -######################### - - -class OneOpArith(Msp430Instruction): - def __init__(self, op1): - self.op1 = op1 - - def encode(self): - # TODO: - bits[15:10] = '00100' - h1 = (self.opcode << 4) - return pack_ins(h1) - - -def oneOpIns(mne, opc): - """ Helper function to define a one operand arithmetic instruction """ - members = {'opcode': opc} - ins_cls = type(mne + '_ins', (OneOpArith,), members) - - -oneOpIns('rrc', 0) -oneOpIns('swpb', 1) -oneOpIns('rra', 2) -oneOpIns('sxt', 3) -oneOpIns('push', 4) -oneOpIns('call', 5) - - -######################### -# Two operand arithmatic instructions: -######################### - - -class TwoOpArith(Msp430Instruction): - def __init__(self, src, dst): - super().__init__() - self.src = Msp430SourceOperand(src) - self.dst = Msp430DestinationOperand(dst) - - def encode(self): - """ - Smart things have been done by MSP430 designers. - As (2 bits) is the source addressing mode selector. - Ad (1 bit) is the destination adressing mode selector. - For the source there are 7 different addressing mode. - For the destination there are 4. - The trick is to use also the register to distuingish the - different modes. - """ - # TODO: Make memory also possible - self.token.bw = self.b # When b=1, the operation is byte mode - self.token.As = self.src.As - self.token.Ad = self.dst.Ad - self.token.destination = self.dst.reg - self.token.source = self.src.reg - self.token.opcode = self.opcode - return self.token.encode() + self.src.extra_bytes - - -def twoOpIns(mne, opc): - """ Helper function to define a two operand arithmetic instruction """ - members = {'opcode': opc} - ins_cls = type(mne + '_ins', (TwoOpArith,), members) - - -class Mov(TwoOpArith): - """ Moves the source to the destination """ - opcode = 4 - - -# This is equivalent to the helper function twoOpIns: -class Add(TwoOpArith): - """ Adds the source to the destination """ - mnemonic = 'add' - opcode = 5 - - -twoOpIns('addc', 6) -twoOpIns('subc', 7) -twoOpIns('sub', 8) - - -class Cmp(TwoOpArith): - opcode = 9 - - -twoOpIns('dadd', 10) -twoOpIns('bit', 11) -twoOpIns('bic', 12) -twoOpIns('bis', 13) -twoOpIns('xor', 14) -twoOpIns('and', 15) diff -r a7c444404df9 -r 0374c65cb437 python/ppci/target/msp430/msp430.py --- a/python/ppci/target/msp430/msp430.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,64 +0,0 @@ -import struct -import types -from ..basetarget import Register, Instruction, Target -from ppci import CompilerError -from .registers import r10, r11, r12, r13, r14, r15 -from .instructions import Reti, Mov, Add -from ...assembler import BaseAssembler - - -# Create the target class (singleton): - -class Msp430Assembler(BaseAssembler): - def __init__(self, target): - super().__init__(target) - self.make_parser() - - -class Msp430Target(Target): - def __init__(self): - super().__init__('msp430') - - # Registers: - self.add_keyword('r10') - self.add_keyword('r11') - self.add_keyword('r12') - self.add_keyword('r13') - self.add_keyword('r14') - self.add_keyword('r15') - self.add_rule('reg', ['r10'], lambda rhs: r10) - self.add_rule('reg', ['r11'], lambda rhs: r11) - self.add_rule('reg', ['r12'], lambda rhs: r12) - self.add_rule('reg', ['r13'], lambda rhs: r13) - self.add_rule('reg', ['r14'], lambda rhs: r14) - self.add_rule('reg', ['r15'], lambda rhs: r15) - - # Instructions rules: - self.add_keyword('mov') - self.add_instruction(['mov', 'reg', ',', 'reg'], - lambda rhs: Mov(rhs[1], rhs[3])) - self.add_instruction(['mov', 'imm16', ',', 'reg'], - lambda rhs: Mov(rhs[1], rhs[3])) - - self.add_keyword('add') - self.add_instruction(['add', 'reg', ',', 'reg'], - lambda rhs: Add(rhs[1], rhs[3])) - - self.add_keyword('reti') - self.add_instruction(['reti'], lambda rhs: Reti()) - - self.assembler = Msp430Assembler(self) - - self.registers.append(r10) - self.registers.append(r11) - self.registers.append(r12) - self.registers.append(r13) - self.registers.append(r14) - self.registers.append(r15) - - - -# Target description for the MSP430 processor - - - diff -r a7c444404df9 -r 0374c65cb437 python/ppci/target/msp430/registers.py --- a/python/ppci/target/msp430/registers.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,18 +0,0 @@ - -from ..basetarget import Register - -class Msp430Register(Register): - def __init__(self, num, name): - super().__init__(name) - self.num = num - -# 8 bit registers: -PCB = Msp430Register(0, 'r0') -rpc = PCB -r10 = Msp430Register(10, 'r10') -r11 = Msp430Register(11, 'r11') -r12 = Msp430Register(12, 'r12') -r13 = Msp430Register(13, 'r13') -r14 = Msp430Register(14, 'r14') -r15 = Msp430Register(15, 'r15') - diff -r a7c444404df9 -r 0374c65cb437 python/ppci/target/target_list.py --- a/python/ppci/target/target_list.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,19 +0,0 @@ - -""" - Contains a list of instantiated targets. -""" - -from .arm import ArmTarget -from .thumb import ThumbTarget -from .msp430.msp430 import Msp430Target -from .x86.target_x86 import X86Target - -# Instance: -arm_target = ArmTarget() -thumb_target = ThumbTarget() -x86target = X86Target() -msp430target = Msp430Target() - -target_list = [arm_target, thumb_target, x86target, msp430target] -targets = {t.name: t for t in target_list} -targetnames = list(targets.keys()) diff -r a7c444404df9 -r 0374c65cb437 python/ppci/target/thumb/__init__.py --- a/python/ppci/target/thumb/__init__.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,4 +0,0 @@ - - -from .armtarget import ThumbTarget - diff -r a7c444404df9 -r 0374c65cb437 python/ppci/target/thumb/arm.brg --- a/python/ppci/target/thumb/arm.brg Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,32 +0,0 @@ - -from ppci.target.thumb.instructions import Orr, Lsl, Str2, Ldr2, Ldr3 -from ppci.target.thumb.instructions import B, Bl, Bgt, Blt, Beq, Bne -from ppci.target.thumb.instructions import Mov2, Mov3, Sub3 -from ppci.target.thumb.instructions import Add3, Sub, Cmp, Sub2, Add2, Mul - -%% - -%terminal ADDI32 SUBI32 MULI32 -%terminal ORI32 SHLI32 -%terminal CONSTI32 MEMI32 REGI32 CALL -%terminal MOVI32 -%terminal GLOBALADDRESS CONSTDATA - -%% - - -reg: ADDI32(reg, reg) 2 'd = self.newTmp(); self.emit(Add3, dst=[d], src=[c0, c1]); return d' -reg: SUBI32(reg, reg) 2 'd = self.newTmp(); self.emit(Sub3, dst=[d], src=[c0, c1]); return d' -reg: ORI32(reg, reg) 2 'd = self.newTmp(); self.selector.move(d, c0); self.emit(Orr, dst=[], src=[d, c1]); return d' -reg: SHLI32(reg, reg) 2 'd = self.newTmp(); self.selector.move(d, c0); self.emit(Lsl, dst=[], src=[d, c1]); return d' -reg: MULI32(reg, reg) 2 'd = self.newTmp(); self.selector.move(d, c0); self.emit(Mul, dst=[d], src=[c1, d]); return d' - -reg: CONSTI32 3 'd = self.newTmp(); ln = self.selector.frame.addConstant(tree.value); self.emit(Ldr3, dst=[d], others=[ln]); return d' -reg: MEMI32(reg) 4 'd = self.newTmp(); self.emit(Ldr2, dst=[d], src=[c0], others=[0]); return d' -reg: REGI32 1 'return tree.value' -reg: CALL 1 'return self.selector.munchCall(tree.value)' - - -stmt: MOVI32(MEMI32(addr), reg) 3 'self.emit(Str2, src=[c0, c1])' - -addr: reg 0 '' diff -r a7c444404df9 -r 0374c65cb437 python/ppci/target/thumb/arminstructionselector.py --- a/python/ppci/target/thumb/arminstructionselector.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,92 +0,0 @@ -from ... import ir, same_dir -from ppci.irmach import AbstractInstruction as makeIns -from ppci.ir2tree import makeTree -import pyburg -from ..basetarget import Nop -from ..instructionselector import InstructionSelector -from .instructions import Orr, Lsl, Str2, Ldr2, Ldr3 -from .instructions import B, Bl, Bgt, Blt, Beq, Bne -from .instructions import Mov2, Mov3 -from .instructions import Cmp, Sub2, Mul - -# Import BURG spec for arm: -spec_file = same_dir(__file__, 'arm.brg') -arm_matcher = pyburg.load_as_module(spec_file) - - -class ArmMatcher(arm_matcher.Matcher): - """ Matcher that derives from a burg spec generated matcher """ - def __init__(self, selector): - super().__init__() - self.newTmp = selector.newTmp - self.emit = selector.emit - self.selector = selector - - -class ArmInstructionSelector(InstructionSelector): - """ Instruction selector for the arm architecture """ - def __init__(self): - super().__init__() - self.matcher = ArmMatcher(self) - - def munchExpr(self, e): - # Use BURG system here: - t = makeTree(e) - return self.matcher.gen(t) - - def munchCall(self, e): - """ Generate code for call sequence """ - # Move arguments into proper locations: - reguses = [] - for i, a in enumerate(e.arguments): - loc = self.frame.argLoc(i) - m = ir.Move(loc, a) - self.munchStm(m) - if isinstance(loc, ir.Temp): - reguses.append(loc) - self.emit(Bl(e.f), src=reguses, dst=[self.frame.rv]) - d = self.newTmp() - self.move(d, self.frame.rv) - return d - - def munchStm(self, s): - if isinstance(s, ir.Terminator): - pass - elif isinstance(s, ir.Move) and isinstance(s.dst, ir.Mem) and \ - isinstance(s.dst.e, ir.Binop) and s.dst.e.operation == '+' and \ - isinstance(s.dst.e.b, ir.Const): - a = self.munchExpr(s.dst.e.a) - val = self.munchExpr(s.src) - c = s.dst.e.b.value - self.emit(Str2, others=[c], src=[a, val]) - elif isinstance(s, ir.Move) and isinstance(s.dst, ir.Mem): - memloc = self.munchExpr(s.dst.e) - val = self.munchExpr(s.src) - self.emit(Str2, others=[0], src=[memloc, val]) - elif isinstance(s, ir.Move) and isinstance(s.dst, ir.Temp): - val = self.munchExpr(s.src) - dreg = s.dst - self.move(dreg, val) - elif isinstance(s, ir.Exp): - # Generate expression code and discard the result. - x = self.munchExpr(s.e) - self.emit(Nop(), src=[x]) - elif isinstance(s, ir.Jump): - tgt = self.targets[s.target] - self.emit(B(ir.label_name(s.target)), jumps=[tgt]) - elif isinstance(s, ir.CJump): - a = self.munchExpr(s.a) - b = self.munchExpr(s.b) - self.emit(Cmp, src=[a, b]) - ntgt = self.targets[s.lab_no] - ytgt = self.targets[s.lab_yes] - jmp_ins = makeIns(B(ir.label_name(s.lab_no)), jumps=[ntgt]) - opnames = {'<': Blt, '>':Bgt, '==':Beq, '!=':Bne} - op = opnames[s.cond](ir.label_name(s.lab_yes)) - self.emit(op, jumps=[ytgt, jmp_ins]) # Explicitely add fallthrough - self.emit2(jmp_ins) - else: - raise NotImplementedError('Stmt --> {}'.format(s)) - - def move(self, dst, src): - self.emit(Mov2, src=[src], dst=[dst], ismove=True) diff -r a7c444404df9 -r 0374c65cb437 python/ppci/target/thumb/armtarget.py --- a/python/ppci/target/thumb/armtarget.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,169 +0,0 @@ -import struct -from ..basetarget import Register, Instruction, Target, Label, Alignment -from .instructions import Add2, Sub, Sub3, Add3, Cmp, Lsl, Orr, Add, Cmp2, Sub2, Add2, Mul, And -from .instructions import Dcd, Pop, Push, Yield, Mov2, Mov3 -from .instructions import B, Bl, Bne, Beq, Blt, Bgt -from .instructions import Ldr, Str2, Ldr2, Str1, Ldr1, Ldr3 - -from .frame import ArmFrame -from .arminstructionselector import ArmInstructionSelector -from ..arm.registers import R0, R1, R2, R3, R4, R5, R6, R7, SP, LR, PC -from ..arm.registers import register_range -from ...assembler import BaseAssembler - - -""" ARM target description. """ - -# TODO: encode this in DSL (domain specific language) -# TBD: is this required? -# TODO: make a difference between armv7 and armv5? - - -class ThumbAssembler(BaseAssembler): - def __init__(self, target): - super().__init__(target) - self.make_parser() - - -class ThumbTarget(Target): - def __init__(self): - super().__init__('thumb') - self.ins_sel = ArmInstructionSelector() - self.FrameClass = ArmFrame - self.add_rules() - - # Add lowering options: - self.add_lowering(Str2, lambda im: Str2(im.src[1], im.src[0], im.others[0])) - self.add_lowering(Ldr2, lambda im: Ldr2(im.dst[0], im.src[0], im.others[0])) - self.add_lowering(Ldr3, lambda im: Ldr3(im.dst[0], im.others[0])) - self.add_lowering(Mov3, lambda im: Mov3(im.dst[0], im.others[0])) - self.add_lowering(Add2, lambda im: Add2(im.dst[0], im.src[0], im.others[0])) - self.add_lowering(Sub2, lambda im: Sub2(im.dst[0], im.src[0], im.others[0])) - self.add_lowering(Mov2, lambda im: Mov2(im.dst[0], im.src[0])) - self.add_lowering(Add3, lambda im: Add3(im.dst[0], im.src[0], im.src[1])) - self.add_lowering(Sub3, lambda im: Sub3(im.dst[0], im.src[0], im.src[1])) - self.add_lowering(Mul, lambda im: Mul(im.src[0], im.dst[0])) - self.add_lowering(And, lambda im: And(im.src[0], im.src[1])) - self.add_lowering(Orr, lambda im: Orr(im.src[0], im.src[1])) - self.add_lowering(Lsl, lambda im: Lsl(im.src[0], im.src[1])) - self.add_lowering(Cmp, lambda im: Cmp(im.src[0], im.src[1])) - - self.assembler = ThumbAssembler(self) - - def add_rules(self): - - # Add instructions: - self.add_keyword('dcd') - self.add_instruction(['dcd', 'imm32'], lambda rhs: Dcd(rhs[1])) - - self.add_keyword('mov') - self.add_instruction(['mov', 'reg8', ',', 'reg8'], - lambda rhs: Mov2(rhs[1], rhs[3])) - - self.add_instruction(['mov', 'reg8', ',', 'imm8'], - lambda rhs: Mov3(rhs[1], rhs[3])) - - self.add_keyword('add') - self.add_instruction(['add', 'reg8', ',', 'reg8', ',', 'imm3'], - lambda rhs: Add2(rhs[1], rhs[3], rhs[5])) - - self.add_instruction(['add', 'reg8', ',', 'reg8', ',', 'reg8'], - lambda rhs: Add3(rhs[1], rhs[3], rhs[5])) - - self.add_keyword('sub') - self.add_instruction(['sub', 'reg8', ',', 'reg8', ',', 'imm3'], - lambda rhs: Sub(rhs[1], rhs[3], rhs[5])) - - self.add_instruction(['sub', 'sp', ',', 'sp', ',', 'imm8'], - lambda rhs: Sub(SP, SP, rhs[5])) - - self.add_instruction(['add', 'sp', ',', 'sp', ',', 'imm8'], - lambda rhs: Add(SP, SP, rhs[5])) - - self.add_keyword('cmp') - self.add_instruction(['cmp', 'reg8', ',', 'reg8'], - lambda rhs: Cmp(rhs[1], rhs[3])) - self.add_instruction(['cmp', 'reg8', ',', 'imm8'], - lambda rhs: Cmp2(rhs[1], rhs[3])) - - self.add_keyword('lsl') - self.add_instruction(['lsl', 'reg8', ',', 'reg8'], - lambda rhs: Lsl(rhs[1], rhs[3])) - - self.add_keyword('str') - self.add_instruction(['str', 'reg8', ',', '[', 'reg8', '+', 'imm5', ']'], - lambda rhs: Str2(rhs[1], rhs[4], rhs[6])) - - self.add_keyword('ldr') - self.add_instruction(['ldr', 'reg8', ',', '[', 'reg8', '+', 'imm5', ']'], - lambda rhs: Ldr2(rhs[1], rhs[4], rhs[6])) - - self.add_instruction(['str', 'reg8', ',', '[', 'sp', '+', 'imm8', ']'], - lambda rhs: Str1(rhs[1], rhs[6])) - - self.add_instruction(['ldr', 'reg8', ',', '[', 'sp', '+', 'imm8', ']'], - lambda rhs: Ldr1(rhs[1], rhs[6])) - - self.add_keyword('pop') - self.add_instruction(['pop', 'reg_list'], lambda rhs: Pop(rhs[1])) - self.add_keyword('push') - self.add_instruction(['push', 'reg_list'], lambda rhs: Push(rhs[1])) - - self.add_keyword('yield') - self.add_instruction(['yield'], lambda rhs: Yield()) - - self.add_keyword('b') - self.add_keyword('bl') - self.add_instruction(['b', 'ID'], lambda rhs: B(rhs[1].val)) - self.add_instruction(['bl', 'ID'], lambda rhs: Bl(rhs[1].val)) - self.add_keyword('beq') - self.add_keyword('bne') - self.add_keyword('blt') - self.add_keyword('bgt') - self.add_instruction(['beq', 'ID'], lambda rhs: Beq(rhs[1].val)) - self.add_instruction(['bne', 'ID'], lambda rhs: Bne(rhs[1].val)) - self.add_instruction(['blt', 'ID'], lambda rhs: Blt(rhs[1].val)) - self.add_instruction(['bgt', 'ID'], lambda rhs: Bgt(rhs[1].val)) - - self.add_keyword('align') - self.add_instruction(['align', 'imm8'], lambda rhs: Alignment(rhs[1])) - - self.add_instruction(['ldr', 'reg8', ',', 'ID'], - lambda rhs: Ldr(rhs[1], rhs[3].val)) - - # Additional rules: - - # Register list grammar: - self.add_rule('reg_list', ['{', 'reg_list_inner', '}'], - lambda rhs: rhs[1]) - self.add_rule('reg_list_inner', ['reg_or_range'], - lambda rhs: rhs[0]) - self.add_rule('reg_list_inner', ['reg_or_range', ',', 'reg_list_inner'], - lambda rhs: rhs[0] | rhs[2]) - self.add_rule('reg_or_range', ['reg8'], lambda rhs: {rhs[0]}) - self.add_rule('reg_or_range', ['lr'], lambda rhs: {LR}) - self.add_rule('reg_or_range', ['pc'], lambda rhs: {PC}) - - self.add_rule('reg_or_range', ['reg8', '-', 'reg8'], - lambda rhs: register_range(rhs[0], rhs[2])) - - self.add_keyword('r0') - self.add_keyword('r1') - self.add_keyword('r2') - self.add_keyword('r3') - self.add_keyword('r4') - self.add_keyword('r5') - self.add_keyword('r6') - self.add_keyword('r7') - self.add_keyword('sp') - self.add_keyword('lr') - self.add_keyword('pc') - self.add_rule('reg8', ['r0'], lambda rhs: R0) - self.add_rule('reg8', ['r1'], lambda rhs: R1) - self.add_rule('reg8', ['r2'], lambda rhs: R2) - self.add_rule('reg8', ['r3'], lambda rhs: R3) - self.add_rule('reg8', ['r4'], lambda rhs: R4) - self.add_rule('reg8', ['r5'], lambda rhs: R5) - self.add_rule('reg8', ['r6'], lambda rhs: R6) - self.add_rule('reg8', ['r7'], lambda rhs: R7) - diff -r a7c444404df9 -r 0374c65cb437 python/ppci/target/thumb/armtoken.py --- a/python/ppci/target/thumb/armtoken.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,12 +0,0 @@ - -from ..token import Token, u16, bit_range - -class ThumbToken(Token): - def __init__(self): - super().__init__(16) - - rd = bit_range(0, 3) - - def encode(self): - return u16(self.bit_value) - diff -r a7c444404df9 -r 0374c65cb437 python/ppci/target/thumb/frame.py --- a/python/ppci/target/thumb/frame.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,96 +0,0 @@ -from ... import ir -from ..basetarget import Label, Alignment -from ...irmach import AbstractInstruction, Frame -from .instructions import Dcd, AddSp, SubSp, Push, Pop, Mov2 -from ..arm.registers import R0, R1, R2, R3, R4, R5, R6, R7, LR, PC, SP - - -class ArmFrame(Frame): - """ Arm specific frame for functions. """ - def __init__(self, name): - # We use r7 as frame pointer. - super().__init__(name) - self.regs = [R0, R1, R2, R3, R4, R5, R6] - self.rv = ir.Temp('special_RV') - self.p1 = ir.Temp('special_P1') - self.p2 = ir.Temp('special_P2') - self.p3 = ir.Temp('special_P3') - self.p4 = ir.Temp('special_P4') - self.fp = ir.Temp('special_FP') - # Pre-colored registers: - self.tempMap = {} - self.tempMap[self.rv] = R0 - self.tempMap[self.p1] = R1 - self.tempMap[self.p2] = R2 - self.tempMap[self.p3] = R3 - self.tempMap[self.p4] = R4 - self.tempMap[self.fp] = R7 - self.locVars = {} - self.parMap = {} - # Literal pool: - self.constants = [] - - def argLoc(self, pos): - """ - Gets the function parameter location in IR-code format. - """ - if pos == 0: - return self.p1 - elif pos == 1: - return self.p2 - elif pos == 2: - return self.p3 - elif pos == 3: - return self.p4 - else: - raise NotImplementedError('No more than 4 parameters implemented') - - def allocVar(self, lvar): - if lvar not in self.locVars: - self.locVars[lvar] = self.stacksize - self.stacksize = self.stacksize + 4 - return self.locVars[lvar] - - def addConstant(self, value): - lab_name = '{}_literal_{}'.format(self.name, len(self.constants)) - self.constants.append((lab_name, value)) - return lab_name - - def prologue(self): - """ Returns prologue instruction sequence """ - pre = [ - Label(self.name), # Label indication function - Push({LR, R7}) - ] - if self.stacksize > 0: - pre.append(SubSp(self.stacksize)) # Reserve stack space - pre += [ - Mov2(R7, SP) # Setup frame pointer - ] - return pre - - def epilogue(self): - """ Return epilogue sequence for a frame. Adjust frame pointer and add constant pool """ - post = [] - if self.stacksize > 0: - post.append(AddSp(self.stacksize)) - post += [ - Pop({PC, R7}), - Alignment(4) # Align at 4 bytes - ] - # Add constant literals: - for ln, v in self.constants: - post.extend([Label(ln), Dcd(v)]) - return post - - def EntryExitGlue3(self): - """ - Add code for the prologue and the epilogue. Add a label, the - return instruction and the stack pointer adjustment for the frame. - """ - for index, ins in enumerate(self.prologue()): - self.instructions.insert(index, AbstractInstruction(ins)) - - # Postfix code: - for ins in self.epilogue(): - self.instructions.append(AbstractInstruction(ins)) diff -r a7c444404df9 -r 0374c65cb437 python/ppci/target/thumb/instructions.py --- a/python/ppci/target/thumb/instructions.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,489 +0,0 @@ -from ..basetarget import Register, Instruction, Target, Label -from ..token import u16, u32 -from .armtoken import ThumbToken -from ..arm.registers import R0, ArmRegister, SP - - -# Instructions: - -class ThumbInstruction(Instruction): - pass - - -class Dcd(ThumbInstruction): - def __init__(self, expr): - if isinstance(expr, int): - self.expr = expr - self.label = None - else: - raise NotImplementedError() - - def encode(self): - return u32(self.expr) - - def relocations(self): - return [] - - def __repr__(self): - return 'DCD 0x{0:X}'.format(self.expr) - - -class nop_ins(ThumbInstruction): - def encode(self): - return bytes() - - def __repr__(self): - return 'NOP' - - -# Memory related - -class LS_imm5_base(ThumbInstruction): - """ ??? Rt, [Rn, imm5] """ - def __init__(self, rt, rn, imm5): - assert imm5 % 4 == 0 - self.imm5 = imm5 >> 2 - self.rn = rn - self.rt = rt - assert self.rn.num < 8 - assert self.rt.num < 8 - self.token = ThumbToken() - - def encode(self): - Rn = self.rn.num - Rt = self.rt.num - imm5 = self.imm5 - self.token[0:3] = Rt - self.token[3:6] = Rn - self.token[6:11] = imm5 - self.token[11:16] = self.opcode - return self.token.encode() - - def __repr__(self): - mnemonic = "???" - return '{} {}, [{}, {}]'.format(mnemonic, self.rt, self.rn, self.imm5) - - -class Str2(LS_imm5_base): - opcode = 0xC - - -class Ldr2(LS_imm5_base): - opcode = 0xD - - -class ls_sp_base_imm8(ThumbInstruction): - def __init__(self, rt, offset): - self.rt = rt - self.offset = offset - - def encode(self): - rt = self.rt.num - assert rt < 8 - imm8 = self.offset >> 2 - assert imm8 < 256 - h = (self.opcode << 8) | (rt << 8) | imm8 - return u16(h) - - def __repr__(self): - mnemonic = self.__class__.__name__ - return '{} {}, [sp,#{}]'.format(mnemonic, self.rt, self.offset) - - -def Ldr(*args): - if len(args) == 2 and isinstance(args[0], ArmRegister) \ - and isinstance(args[1], str): - return Ldr3(*args) - else: - raise Exception() - - -class Ldr3(ThumbInstruction): - """ ldr Rt, LABEL, load value from pc relative position """ - def __init__(self, rt, label): - self.rt = rt - self.label = label - - def relocations(self): - return [(self.label, 'lit_add_8')] - - def encode(self): - rt = self.rt.num - assert rt < 8 - imm8 = 0 - h = (0x9 << 11) | (rt << 8) | imm8 - return u16(h) - - def __repr__(self): - return 'LDR {}, {}'.format(self.rt, self.label) - - -class Ldr1(ls_sp_base_imm8): - """ ldr Rt, [SP, imm8] """ - opcode = 0x98 - - -class Str1(ls_sp_base_imm8): - """ str Rt, [SP, imm8] """ - opcode = 0x90 - - -class Mov3(ThumbInstruction): - """ mov Rd, imm8, move immediate value into register """ - opcode = 4 # 00100 Rd(3) imm8 - def __init__(self, rd, imm): - assert imm < 256 - self.imm = imm - self.rd = rd - self.token = ThumbToken() - - def encode(self): - rd = self.rd.num - self.token[8:11] = rd - self.token[0:8] = self.imm - self.token[11:16] = self.opcode - return self.token.encode() - - def __repr__(self): - return 'MOV {}, {}'.format(self.rd, self.imm) - - -# Arithmatics: - - -class regregimm3_base(ThumbInstruction): - def __init__(self, rd, rn, imm3): - self.rd = rd - self.rn = rn - assert imm3 < 8 - self.imm3 = imm3 - self.token = ThumbToken() - - def encode(self): - rd = self.rd.num - self.token[0:3] = rd - self.token[3:6] = self.rn.num - self.token[6:9] = self.imm3 - self.token[9:16] = self.opcode - return self.token.encode() - - def __repr__(self): - mnemonic = self.__class__.__name__ - return '{} {}, {}, {}'.format(mnemonic, self.rd, self.rn, self.imm3) - - - -class Add2(regregimm3_base): - """ add Rd, Rn, imm3 """ - opcode = 0b0001110 - - -class Sub2(regregimm3_base): - """ sub Rd, Rn, imm3 """ - opcode = 0b0001111 - - -def Sub(*args): - if len(args) == 3 and args[0] is SP and args[1] is SP and \ - isinstance(args[2], int) and args[2] < 256: - return SubSp(args[2]) - elif len(args) == 3 and isinstance(args[0], ArmRegister) and \ - isinstance(args[1], ArmRegister) and isinstance(args[2], int) and \ - args[2] < 8: - return Sub2(args[0], args[1], args[2]) - else: - raise Exception() - - -def Add(*args): - if len(args) == 3 and args[0] is SP and args[1] is SP and \ - isinstance(args[2], int) and args[2] < 256: - return AddSp(args[2]) - elif len(args) == 3 and isinstance(args[0], ArmRegister) and \ - isinstance(args[1], ArmRegister) and isinstance(args[2], int) and \ - args[2] < 8: - return Add2(args[0], args[1], args[2]) - else: - raise Exception() - - -class regregreg_base(ThumbInstruction): - """ ??? Rd, Rn, Rm """ - def __init__(self, rd, rn, rm): - self.rd = rd - self.rn = rn - self.rm = rm - - def encode(self): - at = ThumbToken() - at.rd = self.rd.num - rn = self.rn.num - rm = self.rm.num - at[3:6] = rn - at[6:9] = rm - at[9:16] = self.opcode - return at.encode() - - def __repr__(self): - return '{} {}, {}, {}'.format(self.mnemonic, self.rd, self.rn, self.rm) - - -class Add3(regregreg_base): - mnemonic = 'ADD' - opcode = 0b0001100 - - -class Sub3(regregreg_base): - mnemonic = 'SUB' - opcode = 0b0001101 - - -class Mov2(ThumbInstruction): - """ mov rd, rm """ - mnemonic = 'MOV' - def __init__(self, rd, rm): - self.rd = rd - self.rm = rm - - def encode(self): - at = ThumbToken() - at.rd = self.rd.num & 0x7 - D = (self.rd.num >> 3) & 0x1 - Rm = self.rm.num - opcode = 0b01000110 - at[8:16] = opcode - at[3:7] = Rm - at[7] = D - return at.encode() - - def __repr__(self): - return '{} {}, {}'.format(self.mnemonic, self.rd, self.rm) - - -class Mul(ThumbInstruction): - """ mul Rn, Rdm """ - mnemonic = 'MUL' - def __init__(self, rn, rdm): - self.rn = rn - self.rdm = rdm - - def encode(self): - at = ThumbToken() - rn = self.rn.num - at.rd = self.rdm.num - opcode = 0b0100001101 - #h = (opcode << 6) | (rn << 3) | rdm - at[6:16] = opcode - at[3:6] = rn - return at.encode() - - def __repr__(self): - return '{} {}, {}'.format(self.mnemonic, self.rn, self.rdm) - - -class regreg_base(ThumbInstruction): - """ ??? Rdn, Rm """ - def __init__(self, rdn, rm): - self.rdn = rdn - self.rm = rm - - def encode(self): - at = ThumbToken() - at.rd = self.rdn.num - rm = self.rm.num - at[3:6] = rm - at[6:16] = self.opcode - return at.encode() - - def __repr__(self): - mnemonic = self.__class__.__name__ - return '{} {}, {}'.format(mnemonic, self.rdn, self.rm) - - -class movregreg_ins(regreg_base): - """ mov Rd, Rm (reg8 operands) """ - opcode = 0 - - -class And(regreg_base): - opcode = 0b0100000000 - - -class Orr(regreg_base): - opcode = 0b0100001100 - - -class Cmp(regreg_base): - opcode = 0b0100001010 - - -class Lsl(regreg_base): - opcode = 0b0100000010 - - -class Cmp2(ThumbInstruction): - """ cmp Rn, imm8 """ - opcode = 5 # 00101 - def __init__(self, rn, imm): - self.rn = rn - self.imm = imm - - def encode(self): - at = ThumbToken() - at[0:8] = self.imm - at[8:11] = self.rn.num - at[11:16] = self.opcode - return at.encode() - - -# Jumping: - -class jumpBase_ins(ThumbInstruction): - def __init__(self, target_label): - assert type(target_label) is str - self.target = target_label - self.offset = 0 - - def __repr__(self): - mnemonic = self.__class__.__name__ - return '{} {}'.format(mnemonic, self.target) - - -class B(jumpBase_ins): - def encode(self): - h = (0b11100 << 11) | 0 - # | 1 # 1 to enable thumb mode - return u16(h) - - def relocations(self): - return [(self.target, 'wrap_new11')] - -class Bl(jumpBase_ins): - def encode(self): - imm11 = 0 - imm10 = 0 - j1 = 1 # TODO: what do these mean? - j2 = 1 - s = 0 - h1 = (0b11110 << 11) | (s << 10) | imm10 - h2 = (0b1101 << 12) | (j1 << 13) | (j2 << 11) | imm11 - return u16(h1) + u16(h2) - - def relocations(self): - return [(self.target, 'bl_imm11_imm10')] - - -class cond_base_ins(jumpBase_ins): - def encode(self): - imm8 = 0 - h = (0b1101 << 12) | (self.cond << 8) | imm8 - return u16(h) - - def relocations(self): - return [(self.target, 'rel8')] - - -class cond_base_ins_long(jumpBase_ins): - """ Encoding T3 """ - def encode(self): - j1 = 1 # TODO: what do these mean? - j2 = 1 - h1 = (0b11110 << 11) | (self.cond << 6) - h2 = (0b1101 << 12) | (j1 << 13) | (j2 << 11) - return u16(h1) + u16(h2) - - def relocations(self): - return [(self.target, 'b_imm11_imm6')] - - -class Beq(cond_base_ins): - cond = 0 - - -class Bne(cond_base_ins): - cond = 1 - - -class Blt(cond_base_ins): - cond = 0b1011 - - -class Bgt(cond_base_ins): - cond = 0b1100 - - -class Push(ThumbInstruction): - def __init__(self, regs): - assert type(regs) is set - self.regs = regs - - def __repr__(self): - return 'Push {{{}}}'.format(self.regs) - - def encode(self): - at = ThumbToken() - for n in register_numbers(self.regs): - if n < 8: - at[n] = 1 - elif n == 14: - at[8] = 1 - else: - raise NotImplementedError('not implemented for {}'.format(n)) - at[9:16] = 0x5a - return at.encode() - - - -def register_numbers(regs): - for r in regs: - yield r.num - -class Pop(ThumbInstruction): - def __init__(self, regs): - assert type(regs) is set - self.regs = regs - self.token = ThumbToken() - - def __repr__(self): - return 'Pop {{{}}}'.format(self.regs) - - def encode(self): - for n in register_numbers(self.regs): - if n < 8: - self.token[n] = 1 - elif n == 15: - self.token[8] = 1 - else: - raise NotImplementedError('not implemented for this register') - self.token[9:16] = 0x5E - return self.token.encode() - - - -class Yield(ThumbInstruction): - def encode(self): - return u16(0xbf10) - -# misc: - -# add/sub SP: -class addspsp_base(ThumbInstruction): - def __init__(self, imm7): - self.imm7 = imm7 - assert self.imm7 % 4 == 0 - self.imm7 >>= 2 - - def encode(self): - return u16((self.opcode << 7) | self.imm7) - - def __repr__(self): - mnemonic = self.__class__.__name__ - return '{} sp, sp, {}'.format(mnemonic, self.imm7 << 2) - - -class AddSp(addspsp_base): - opcode = 0b101100000 - - -class SubSp(addspsp_base): - opcode = 0b101100001 diff -r a7c444404df9 -r 0374c65cb437 python/ppci/target/token.py --- a/python/ppci/target/token.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,61 +0,0 @@ - -import struct - - -def u16(h): - return struct.pack('> (p*8)) & 0xFF for p in range(8) ] - -def imm32(x): - """ represent 32 bits integer in little endian 4 bytes""" - if x < 0: - x = x + (1 << 32) - x = x & 0xFFFFFFFF - return [ (x >> (p*8)) & 0xFF for p in range(4) ] - - -def imm8(x): - if x < 0: - x = x + (1 << 8) - x = x & 0xFF - return [ x ] - - -class ModRmToken(Token): - """ Construct the modrm byte from its components """ - def __init__(self, mod=0, rm=0, reg=0): - super().__init__(8) - assert(mod <= 3) - assert(rm <= 7) - assert(reg <= 7) - self.mod = mod - self.rm = rm - self.reg = reg - - mod = bit_range(6, 8) - rm = bit_range(0, 3) - reg = bit_range(3, 6) - - def encode(self): - return u8(self.bit_value) - - -class RexToken(Token): - """ Create a REX prefix byte """ - def __init__(self, w=0, r=0, x=0, b=0): - super().__init__(8) - assert(w <= 1) - assert(r <= 1) - assert(x <= 1) - assert(b <= 1) - self.w = w - self.r = r - self.x = x - self.b = b - self.set_bit(6, 1) - - w = bit_range(3, 4) - r = bit_range(2, 3) - x = bit_range(1, 2) - b = bit_range(0, 1) - - def encode(self): - return u8(self.bit_value) - - -def sib(ss=0, index=0, base=0): - assert(ss <= 3) - assert(index <= 7) - assert(base <= 7) - return (ss << 6) | (index << 3) | base - -tttn = {'L':0xc,'G':0xf,'NE':0x5,'GE':0xd,'LE':0xe, 'E':0x4} - -# Actual instructions: -def nearjump(distance, condition=None): - """ jmp imm32 """ - lim = (1<<30) - if abs(distance) > lim: - Error('near jump cannot jump over more than {0} bytes'.format(lim)) - if condition: - if distance < 0: - distance -= 6 # Skip own instruction - opcode = 0x80 | tttn[condition] # Jcc imm32 - return [0x0F, opcode] + imm32(distance) - else: - if distance < 0: - distance -= 5 # Skip own instruction - return [ 0xE9 ] + imm32(distance) - -def shortjump(distance, condition=None): - """ jmp imm8 """ - lim = 118 - if abs(distance) > lim: - Error('short jump cannot jump over more than {0} bytes'.format(lim)) - if distance < 0: - distance -= 2 # Skip own instruction - if condition: - opcode = 0x70 | tttn[condition] # Jcc rel8 - else: - opcode = 0xeb # jmp rel8 - return [opcode] + imm8(distance) - -# Helper that determines jump type: -def reljump(distance): - if abs(distance) < 110: - return shortjump(distance) - else: - return nearjump(distance) - - -class Push(Instruction): - def __init__(self, reg): - assert(reg in regs64), str(reg) - self.reg = reg - - def encode(self): - code = [] - if self.reg.rexbit == 1: - code.append(0x41) - code.append(0x50 + self.reg.regbits) - return bytes(code) - - -class Pop(Instruction): - def __init__(self, reg): - assert(reg in regs64), str(reg) - self.reg = reg - - def encode(self): - code = [] - if self.reg.rexbit == 1: - code.append(0x41) - code.append(0x58 + self.reg.regbits) - return bytes(code) - - -def pop(reg): - if reg in regs64: - if rexbit[reg] == 1: - rexprefix = rex(b=1) - opcode = 0x58 + regs64[reg] - return [rexprefix, opcode] - else: - opcode = 0x58 + regs64[reg] - return [ opcode ] - else: - Error('pop for {0} not implemented'.format(reg)) - -def INT(number): - opcode = 0xcd - return [opcode] + imm8(number) - -def syscall(): - return [0x0F, 0x05] - -def call(distance): - if type(distance) is int: - return [0xe8]+imm32(distance) - elif type(distance) is str and distance in regs64: - reg = distance - opcode = 0xFF # 0xFF /2 == call r/m64 - mod_rm = modrm(mod=3, reg=2, rm=regs64[reg]) - if rexbit[reg] == 1: - rexprefix = rex(b=rexbit[reg]) - return [rexprefix, opcode, mod_rm] - else: - return [opcode, mod_rm] - else: - Error('Cannot call to {0}'.format(distance)) - - -class Ret(Instruction): - def __init__(self): - pass - - def encode(self): - return [ 0xc3 ] - - -class Inc(Instruction): - def __init__(self, reg): - assert(reg in regs64), str(reg) - self.rex = RexToken(w=1, b=reg.rexbit) - self.opcode = 0xff - self.mod_rm = ModRmToken(mod=3, rm=reg.regbits) - - def encode(self): - code = bytes([self.opcode]) - return self.rex.encode() + code + self.mod_rm.encode() - - -def prepost8(r8, rm8): - assert(r8 in regs8) - pre = [] - if type(rm8) is list: - # TODO: merge mem access with prepost for 64 bits - if len(rm8) == 1: - base, = rm8 - if type(base) is str and base in regs64: - assert(not base in ['rbp', 'rsp', 'r12', 'r13']) - mod_rm = modrm(mod=0, rm=regs64[base], reg=regs8[r8]) - if rexbit[base] == 1: - pre.append(rex(b=1)) - post = [mod_rm] - else: - Error('One arg of type {0} not implemented'.format(base)) - elif len(rm8) == 2: - base, offset = rm8 - assert(type(offset) is int) - assert(base in regs64) - - if base == 'rsp' or base == 'r12': - Error('Cannot use rsp or r12 as base yet') - if rexbit[base] == 1: - pre.append( rex(b=1) ) - mod_rm = modrm(mod=1, rm=regs64[base], reg=regs8[r8]) - post = [mod_rm] + imm8(offset) - else: - Error('not supporting prepost8 with list len {0}'.format(len(rm8))) - else: - Error('Not supporting move with reg8 {0}'.format(r8)) - return pre, post - -def prepost(r64, rm64): - assert(r64 in regs64) - if type(rm64) is list: - if len(rm64) == 3: - base, index, disp = rm64 - assert(base in regs64) - assert(index in regs64) - assert(type(disp) is int) - # Assert that no special cases are used: - # TODO: swap base and index to avoid special cases - # TODO: exploit special cases and make better code - assert(index != 'rsp') - - rexprefix = rex(w=1, r=rexbit[r64], x=rexbit[index], b=rexbit[base]) - # mod=1 and rm=4 indicates a SIB byte: [--][--]+imm8 - mod_rm = modrm(mod=1, rm=4, reg=regs64[r64]) - si_b = sib(ss=0, index=regs64[index], base=regs64[base]) - return [rexprefix], [mod_rm, si_b] + imm8(disp) - elif len(rm64) == 2: - base, offset = rm64 - assert(type(offset) is int) - if base == 'RIP': - # RIP pointer relative addressing mode! - rexprefix = rex(w=1, r=rexbit[r64]) - mod_rm = modrm(mod=0, rm=5, reg=regs64[r64]) - return [rexprefix], [mod_rm] + imm32(offset) - else: - assert(base in regs64) - - if base == 'rsp' or base == 'r12': - # extended function that uses SIB byte - rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[base]) - # rm=4 indicates a SIB byte follows - mod_rm = modrm(mod=1, rm=4, reg=regs64[r64]) - # index=4 indicates that index is not used - si_b = sib(ss=0, index=4, base=regs64[base]) - return [rexprefix], [mod_rm, si_b] + imm8(offset) - else: - rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[base]) - mod_rm = modrm(mod=1, rm=regs64[base], reg=regs64[r64]) - return [rexprefix], [mod_rm] + imm8(offset) - elif len(rm64) == 1: - offset = rm64[0] - if type(offset) is int: - rexprefix = rex(w=1, r=rexbit[r64]) - mod_rm = modrm(mod=0, rm=4,reg=regs64[r64]) - si_b = sib(ss=0, index=4,base=5) # 0x25 - return [rexprefix], [mod_rm, si_b] + imm32(offset) - else: - Error('Memory reference of type {0} not implemented'.format(offset)) - else: - Error('Memory reference not implemented') - elif rm64 in regs64: - rexprefix = rex(w=1, r=rexbit[r64], b=rexbit[rm64]) - mod_rm = modrm(3, rm=regs64[rm64], reg=regs64[r64]) - return [rexprefix], [mod_rm] - - -def leareg64(rega, m): - opcode = 0x8d # lea r64, m - pre, post = prepost(rega, m) - return pre + [opcode] + post - - -class Mov1(Instruction): - """ Mov r64 to r64 """ - def __init__(self, dst, src): - assert src in regs64, str(src) - assert dst in regs64, str(dst) - self.rex = RexToken(w=1, r=dst.rexbit, b=src.rexbit) - self.mod_rm = ModRmToken(mod=3, rm=dst.regbits, reg=src.regbits) - - def encode(self): - opcode = 0x89 # mov r/m64, r64 - code = bytes([opcode]) - return self.rex.encode() + code + self.mod_rm.encode() - - -def Mov(dst, src): - if type(src) is int: - pre = [rex(w=1, b=rexbit[rega])] - opcode = 0xb8 + regs64[rega] - post = imm64(regb) - elif type(src) is X86Register: - return Mov1(dst, src) - elif type(src) is str: - if rega in regs64: - opcode = 0x8b # mov r64, r/m64 - pre, post = prepost(rega, regb) - else: - raise Exception('Unknown register {0}'.format(rega)) - else: - raise Exception('Move of this kind {0}, {1} not implemented'.format(rega, regb)) - return pre + [opcode] + post - - -def Xor(rega, regb): - return Xor1(rega, regb) - - -class Xor1(Instruction): - def __init__(self, a, b): - self.rex = RexToken(w=1, r=b.rexbit, b=a.rexbit) - self.mod_rm = ModRmToken(mod=3, rm=a.regbits, reg=b.regbits) - - def encode(self): - opcode = 0x31 # XOR r/m64, r64 - # Alternative is 0x33 XOR r64, r/m64 - code = bytes([opcode]) - return self.rex.encode() + code + self.mod_rm.encode() - - -# integer arithmatic: -def addreg64(rega, regb): - if regb in regs64: - pre, post = prepost(regb, rega) - opcode = 0x01 # ADD r/m64, r64 - return pre + [opcode] + post - elif type(regb) is int: - if regb < 100: - rexprefix = rex(w=1, b=rexbit[rega]) - opcode = 0x83 # add r/m, imm8 - mod_rm = modrm(3, rm=regs64[rega], reg=0) - return [rexprefix, opcode, mod_rm]+imm8(regb) - elif regb < (1<<31): - rexprefix = rex(w=1, b=rexbit[rega]) - opcode = 0x81 # add r/m64, imm32 - mod_rm = modrm(3, rm=regs64[rega], reg=0) - return [rexprefix, opcode, mod_rm]+imm32(regb) - else: - Error('Constant value too large!') - else: - Error('unknown second operand!'.format(regb)) - -def subreg64(rega, regb): - if regb in regs64: - pre, post = prepost(regb, rega) - opcode = 0x29 # SUB r/m64, r64 - return pre + [opcode] + post - elif type(regb) is int: - if regb < 100: - rexprefix = rex(w=1, b=rexbit[rega]) - opcode = 0x83 # sub r/m, imm8 - mod_rm = modrm(3, rm=regs64[rega], reg=5) - return [rexprefix, opcode, mod_rm]+imm8(regb) - elif regb < (1<<31): - rexprefix = rex(w=1, b=rexbit[rega]) - opcode = 0x81 # sub r/m64, imm32 - mod_rm = modrm(3, rm=regs64[rega], reg=5) - return [rexprefix, opcode, mod_rm]+imm32(regb) - else: - Error('Constant value too large!') - - else: - Error('unknown second operand!'.format(regb)) - -def idivreg64(reg): - rexprefix = rex(w=1, b=rexbit[reg]) - opcode = 0xf7 # IDIV r/m64 - mod_rm = modrm(3, rm=regs64[reg], reg=7) - return [rexprefix, opcode, mod_rm] - -def imulreg64_rax(reg): - rexprefix = rex(w=1, b=rexbit[reg]) - opcode = 0xf7 # IMUL r/m64 - mod_rm = modrm(3, rm=regs64[reg], reg=5) - return [rexprefix, opcode, mod_rm] - -def imulreg64(rega, regb): - pre, post = prepost(rega, regb) - opcode = 0x0f # IMUL r64, r/m64 - opcode2 = 0xaf - return pre + [opcode, opcode2] + post - - -def cmpreg64(rega, regb): - if regb in regs64: - pre, post = prepost(regb, rega) - opcode = 0x39 # CMP r/m64, r64 - return pre + [opcode] + post - elif type(regb) is int: - rexprefix = rex(w=1, b=rexbit[rega]) - opcode = 0x83 # CMP r/m64, imm8 - mod_rm = modrm(3, rm=regs64[rega], reg=7) - return [rexprefix, opcode, mod_rm] + imm8(regb) - else: - Error('not implemented cmp64') diff -r a7c444404df9 -r 0374c65cb437 python/ppci/target/x86/registers.py --- a/python/ppci/target/x86/registers.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,52 +0,0 @@ -""" - Contains register definitions for x86 target. -""" -from ..basetarget import Register - - -class X86Register(Register): - def __init__(self, num, name): - super().__init__(name) - self.num = num - - def __repr__(self): - return 'x86reg {}'.format(self.name) - - @property - def rexbit(self): - return (self.num >> 3) & 0x1 - - @property - def regbits(self): - return self.num & 0x7 - -# Calculation of the rexb bit: -# rexbit = {'rax': 0, 'rcx':0, 'rdx':0, 'rbx': 0, 'rsp': 0, 'rbp': 0, 'rsi':0, -# 'rdi':0,'r8':1,'r9':1,'r10':1,'r11':1,'r12':1,'r13':1,'r14':1,'r15':1} - -# regs64 = {'rax': 0,'rcx':1,'rdx':2,'rbx':3,'rsp':4,'rbp':5,'rsi':6,'rdi':7, -# 'r8':0,'r9':1,'r10':2,'r11':3,'r12':4,'r13':5,'r14':6,'r15':7} -# regs32 = {'eax': 0, 'ecx':1, 'edx':2, 'ebx': 3, 'esp': 4, 'ebp': 5, 'esi':6, -# 'edi':7} -# regs8 = {'al':0,'cl':1,'dl':2,'bl':3,'ah':4,'ch':5,'dh':6,'bh':7} -rax = X86Register(0, 'rax') -rcx = X86Register(1, 'rcx') -rdx = X86Register(2, 'rdx') -rbx = X86Register(3, 'rbx') -rsp = X86Register(4, 'rsp') -rbp = X86Register(5, 'rbp') -rsi = X86Register(6, 'rsi') -rdi = X86Register(7, 'rdi') - -r8 = X86Register(8, 'r8') -r9 = X86Register(9, 'r9') -r10 = X86Register(10, 'r10') -r11 = X86Register(11, 'r11') -r12 = X86Register(12, 'r12') -r13 = X86Register(13, 'r13') -r14 = X86Register(14, 'r14') -r15 = X86Register(15, 'r15') - -low_regs = {rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi} - -regs64 = {r8, r9, r10, r11, r12, r13, r14, r15} | low_regs diff -r a7c444404df9 -r 0374c65cb437 python/ppci/target/x86/target_x86.py --- a/python/ppci/target/x86/target_x86.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,59 +0,0 @@ -from ..basetarget import Target -from ...assembler import BaseAssembler -from .registers import rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi -from .registers import r8, r9, r10, r11, r12, r13, r14, r15, regs64 -from .instructions import Mov, Inc, Xor, Push, Pop - - -class X86Assembler(BaseAssembler): - def __init__(self, target): - super().__init__(target) - self.make_parser() - - -class X86Target(Target): - """ x86 target containing assembler, linker""" - def __init__(self): - super().__init__('x86') - - for reg in regs64: - self.add_keyword(reg.name) - - self.add_rule('reg', ['rax'], lambda rhs: rax) - self.add_rule('reg', ['rcx'], lambda rhs: rcx) - self.add_rule('reg', ['rdx'], lambda rhs: rdx) - self.add_rule('reg', ['rbx'], lambda rhs: rbx) - self.add_rule('reg', ['rsp'], lambda rhs: rsp) - self.add_rule('reg', ['rbp'], lambda rhs: rbp) - self.add_rule('reg', ['rsi'], lambda rhs: rsi) - self.add_rule('reg', ['rdi'], lambda rhs: rdi) - self.add_rule('reg', ['r8'], lambda rhs: r8) - self.add_rule('reg', ['r9'], lambda rhs: r9) - self.add_rule('reg', ['r10'], lambda rhs: r10) - self.add_rule('reg', ['r11'], lambda rhs: r11) - self.add_rule('reg', ['r12'], lambda rhs: r12) - self.add_rule('reg', ['r13'], lambda rhs: r13) - self.add_rule('reg', ['r14'], lambda rhs: r14) - self.add_rule('reg', ['r15'], lambda rhs: r15) - - self.add_keyword('mov') - self.add_instruction(['mov', 'reg', ',', 'reg'], - lambda rhs: Mov(rhs[1], rhs[3])) - - self.add_keyword('xor') - self.add_instruction(['xor', 'reg', ',', 'reg'], - lambda rhs: Xor(rhs[1], rhs[3])) - - self.add_keyword('inc') - self.add_instruction(['inc', 'reg'], - lambda rhs: Inc(rhs[1])) - - self.add_keyword('push') - self.add_instruction(['push', 'reg'], - lambda rhs: Push(rhs[1])) - - self.add_keyword('pop') - self.add_instruction(['pop', 'reg'], - lambda rhs: Pop(rhs[1])) - - self.assembler = X86Assembler(self) diff -r a7c444404df9 -r 0374c65cb437 python/ppci/tasks.py --- a/python/ppci/tasks.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,198 +0,0 @@ -""" - This module defines tasks and a runner for these tasks. Tasks can - have dependencies and it can be determined if they need to be run. -""" - -import logging -import re -import os -import glob - - -task_map = {} -def register_task(name): - """ Decorator that registers a task class """ - def f(cls): - task_map[name] = cls - return cls - return f - - -class TaskError(Exception): - """ When a task fails, this exception is raised """ - def __init__(self, msg): - self.msg = msg - - -class Project: - """ A project contains a set of named targets that can depend upon - eachother """ - def __init__(self, name): - self.name = name - self.targets = {} - self.properties = {} - self.macro_regex = re.compile('\$\{([^\}]+)\}') - - def set_property(self, name, value): - self.properties[name] = value - - def get_property(self, name): - if name not in self.properties: - raise TaskError('Property "{}" not found'.format(name)) - return self.properties[name] - - def add_target(self, t): - if t.name in self.targets: - raise TaskError("Duplicate target '{}'".format(t.name)) - self.targets[t.name] = t - - def get_target(self, target_name): - if target_name not in self.targets: - raise TaskError('target "{}" not found'.format(target_name)) - return self.targets[target_name] - - def expand_macros(self, txt): - """ Replace all macros in txt with the correct properties """ - while True: - mo = self.macro_regex.search(txt) - if not mo: - break - propname = mo.group(1) - propval = self.get_property(propname) - txt = txt[:mo.start()] + propval + txt[mo.end():] - return txt - - def dfs(self, target_name, state): - state.add(target_name) - target = self.get_target(target_name) - for dep in target.dependencies: - if dep in state: - raise TaskError('Dependency loop detected {} -> {}' - .format(target_name, dep)) - self.dfs(dep, state) - - def check_target(self, target_name): - state = set() - self.dfs(target_name, state) - - def dependencies(self, target_name): - assert type(target_name) is str - target = self.get_target(target_name) - cdst = list(self.dependencies(dep) for dep in target.dependencies) - cdst.append(target.dependencies) - return set.union(*cdst) - - -class Target: - """ Defines a target that has a name and a list of tasks to execute """ - def __init__(self, name, project): - self.name = name - self.project = project - self.tasks = [] - self.dependencies = set() - - def add_task(self, task): - self.tasks.append(task) - - def add_dependency(self, target_name): - """ Add another task as a dependency for this task """ - self.dependencies.add(target_name) - - def __gt__(self, other): - return other.name in self.project.dependencies(self.name) - - def __repr__(self): - return 'Target "{}"'.format(self.name) - - -class Task: - """ Task that can run, and depend on other tasks """ - def __init__(self, target, kwargs, sub_elements=[]): - self.logger = logging.getLogger('task') - self.target = target - self.name = self.__class__.__name__ - self.arguments = kwargs - self.subs = sub_elements - - def get_argument(self, name): - if name not in self.arguments: - raise TaskError('attribute "{}" not specified'.format(name)) - return self.arguments[name] - - def get_property(self, name): - return self.target.project.get_property(name) - - def relpath(self, filename): - basedir = self.get_property('basedir') - return os.path.join(basedir, filename) - - def open_file_set(self, s): - """ Creates a list of open file handles. s can be one of these: - - A string like "a.c3" - - A string like "*.c3" - - A string like "a.c3;src/*.c3" - """ - assert type(s) is str - fns = [] - for part in s.split(';'): - fns += glob.glob(self.relpath(part)) - return fns - - def run(self): - raise NotImplementedError("Implement this abstract method!") - - def __repr__(self): - return 'Task "{}"'.format(self.name) - - -class TaskRunner: - """ Basic task runner that can run some tasks in sequence """ - def __init__(self): - self.logger = logging.getLogger('taskrunner') - - def run(self, project, targets=[]): - """ Try to run a project """ - # Determine what targets to run: - if targets: - target_list = targets - else: - if project.default: - target_list = [project.default] - else: - target_list = [] - - try: - if not target_list: - self.logger.info('Done!') - return 0 - - # Check for loops: - for target in target_list: - project.check_target(target) - - # Calculate all dependencies: - target_list = set.union(*[project.dependencies(t) for t in target_list]).union(set(target_list)) - # Lookup actual targets: - target_list = [project.get_target(target_name) for target_name in target_list] - target_list.sort() - - self.logger.info('Target sequence: {}'.format(target_list)) - - # Run tasks: - for target in target_list: - self.logger.info('Target {}'.format(target.name)) - for task in target.tasks: - if type(task) is tuple: - tname, props = task - for arg in props: - props[arg] = project.expand_macros(props[arg]) - task = task_map[tname](target, props) - self.logger.info('Running {}'.format(task)) - task.run() - else: - raise Exception() - self.logger.info('Done!') - except TaskError as e: - self.logger.error(str(e.msg)) - return 1 - return 0 diff -r a7c444404df9 -r 0374c65cb437 python/ppci/transform.py --- a/python/ppci/transform.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,224 +0,0 @@ -""" - Transformation to optimize IR-code -""" - -import logging -from . import ir -# Standard passes: - -class FunctionPass: - def __init__(self): - self.logger = logging.getLogger(str(self.__class__.__name__)) - - def run(self, ir_module): - """ Main entry point for the pass """ - self.logger.debug('Running pass {}'.format(self.__class__.__name__)) - self.prepare() - if isinstance(ir_module, ir.Module): - for f in ir_module.Functions: - self.onFunction(f) - elif isinstance(ir_module, ir.Function): - self.onFunction(ir_module) - else: - raise Exception() - - def onFunction(self, f): - """ Override this virtual method """ - raise NotImplementedError() - - def prepare(self): - pass - - -class BasicBlockPass(FunctionPass): - def onFunction(self, f): - for bb in f.Blocks: - self.onBasicBlock(bb) - - def onBasicBlock(self, bb): - """ Override this virtual method """ - raise NotImplementedError() - - -class InstructionPass(BasicBlockPass): - def onBasicBlock(self, bb): - for ins in iter(bb.Instructions): - self.onInstruction(ins) - - def onInstruction(self, ins): - """ Override this virtual method """ - raise NotImplementedError() - - -class BasePass(BasicBlockPass): - def onBasicBlock(self, bb): - pass - - -# Usefull transforms: -class ConstantFolder(BasePass): - def __init__(self): - super().__init__() - self.ops = {} - self.ops['+'] = lambda x, y: x + y - self.ops['-'] = lambda x, y: x - y - self.ops['*'] = lambda x, y: x * y - self.ops['<<'] = lambda x, y: x << y - - def postExpr(self, expr): - if type(i) is BinaryOperator and i.operation in self.ops.keys() and type(i.a) is Const and type(i.b) is Const: - vr = self.ops[i.operation](i.a.value, i.b.value) - return Const(vr) - else: - return expr - - -class DeadCodeDeleter(BasicBlockPass): - def onBasicBlock(self, bb): - def instructionUsed(ins): - if not type(ins) in [ImmLoad, BinaryOperator]: - return True - if len(ins.defs) == 0: - # In case this instruction does not define any - # variables, assume it is usefull. - return True - return any(d.Used for d in ins.defs) - - change = True - while change: - change = False - for i in bb.Instructions: - if instructionUsed(i): - continue - bb.removeInstruction(i) - change = True - - -class CommonSubexpressionElimination(BasicBlockPass): - def onBasicBlock(self, bb): - constMap = {} - to_remove = [] - for i in bb.Instructions: - if isinstance(i, ImmLoad): - if i.value in constMap: - t_new = constMap[i.value] - t_old = i.target - logging.debug('Replacing {} with {}'.format(t_old, t_new)) - t_old.replaceby(t_new) - to_remove.append(i) - else: - constMap[i.value] = i.target - elif isinstance(i, BinaryOperator): - k = (i.value1, i.operation, i.value2) - if k in constMap: - t_old = i.result - t_new = constMap[k] - logging.debug('Replacing {} with {}'.format(t_old, t_new)) - t_old.replaceby(t_new) - to_remove.append(i) - else: - constMap[k] = i.result - for i in to_remove: - self.logger.debug('removing {}'.format(i)) - bb.removeInstruction(i) - - -child_nodes = {} -child_nodes[ir.Binop] = ['a', 'b'] -child_nodes[ir.Add] = ['a', 'b'] -child_nodes[ir.Const] = [] -child_nodes[ir.Temp] = [] -child_nodes[ir.Exp] = ['e'] -child_nodes[ir.Mem] = ['e'] -child_nodes[ir.Addr] = ['e'] -child_nodes[ir.LocalVariable] = [] -child_nodes[ir.GlobalVariable] = [] -child_nodes[ir.Parameter] = [] -child_nodes[ir.Jump] = [] -child_nodes[ir.Terminator] = [] -child_nodes[ir.Call] = ['arguments'] -child_nodes[ir.CJump] = ['a', 'b'] -child_nodes[ir.Move] = ['src', 'dst'] - - -def apply_function(x, f): - """ Recursively apply function """ - # Handle list: - if type(x) is list: - for i in range(len(x)): - x[i] = apply_function(x[i], f) - return x - - # Normal node: - for child in child_nodes[type(x)]: - v = getattr(x, child) - v = apply_function(v, f) - assert not (v is None) - setattr(x, child, v) - # Apply function! - return f(x) - - -class ExpressionFixer(InstructionPass): - def onInstruction(self, i): - apply_function(i, self.grok) - - -class RemoveAddZero(ExpressionFixer): - def grok(self, v): - if type(v) is ir.Binop: - if v.operation == '+': - if type(v.b) is ir.Const and v.b.value == 0: - self.logger.debug('Folding {} to {}'.format(v, v.a)) - return v.a - elif v.operation == '*': - if type(v.b) is ir.Const and v.b.value == 1: - self.logger.debug('Multiple 1 {} to {}'.format(v, v.a)) - return v.a - return v - - -class CleanPass(FunctionPass): - def onFunction(self, f): - self.remove_empty_blocks(f) - self.remove_one_preds(f) - - def remove_empty_blocks(self, f): - """ Remove empty basic blocks from function. """ - # If a block only contains a branch, it can be removed: - empty = lambda b: type(b.FirstInstruction) is ir.Jump - empty_blocks = list(filter(empty, f.Blocks)) - for b in empty_blocks: - # Update predecessors - preds = b.Predecessors - if b not in preds + [f.entry]: - # Do not remove if preceeded by itself - tgt = b.LastInstruction.target - for pred in preds: - pred.LastInstruction.changeTarget(b, tgt) - self.logger.debug('Removing empty block: {}'.format(b)) - f.removeBlock(b) - - def remove_one_preds(self, f): - """ Remove basic blocks with only one predecessor """ - change = True - while change: - change = False - for block in f.Blocks: - preds = block.Predecessors - if len(preds) == 1 and block not in preds and type(preds[0].LastInstruction) is ir.Jump and block is not f.epiloog: - self.glue_blocks(preds[0], block, f) - change = True - - def glue_blocks(self, block1, block2, f): - """ Glue two blocks together into the first block """ - self.logger.debug('Merging {} and {}'.format(block1.name, block2.name)) - - # Remove the last jump: - block1.removeInstruction(block1.LastInstruction) - - # Copy all instructions to block1: - for instruction in block2.Instructions: - block1.addInstruction(instruction) - # This does not work somehow: - #block2.parent.removeBlock(block2) diff -r a7c444404df9 -r 0374c65cb437 python/pyburg.py --- a/python/pyburg.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,343 +0,0 @@ -#!/usr/bin/python - -""" -Bottom up rewrite generator ---------------------------- - -This script takes as input a description of patterns and outputs a -matcher class that can match trees given the patterns. - -Patterns are specified as follows:: - - reg -> ADDI32(reg, reg) 2 (. add NT0 NT1 .) - reg -> MULI32(reg, reg) 3 (. .) - -or a multiply add:: - - reg -> ADDI32(MULI32(reg, reg), reg) 4 (. muladd $1, $2, $3 .) - -The general specification pattern is:: - - [result] -> [tree] [cost] [template code] - -Trees ------ - -A tree is described using parenthesis notation. For example a node X with -three child nodes is described as: - - X(a, b, b) - -Trees can be nested: - - X(Y(a, a), a) - -The 'a' in the example above indicates an open connection to a next tree -pattern. - - -In the example above 'reg' is a non-terminal. ADDI32 is a terminal. non-terminals -cannot have child nodes. A special case occurs in this case: - - reg -> rc - -where 'rc' is a non-terminal. This is an example of a chain rule. Chain rules -can be used to allow several variants of non-terminals. - -The generated matcher uses dynamic programming to find the best match of the -tree. This strategy consists of two steps: - - - label: During this phase the given tree is traversed in a bottom up way. - each node is labelled with a possible matching rule and the corresponding cost. - - select: In this step, the tree is traversed again, selecting at each point - the cheapest way to get to the goal. - -""" - -import sys -import os -import io -import types -import argparse -from ppci import Token, SourceLocation -from pyyacc import ParserException -import yacc -import baselex -from tree import Tree - -# Generate parser on the fly: -spec_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'burg.x') -burg_parser = yacc.load_as_module(spec_file) - - -class BurgLexer(baselex.BaseLexer): - def __init__(self): - tok_spec = [ - ('id', r'[A-Za-z][A-Za-z\d_]*', lambda typ, val: (typ, val)), - ('kw', r'%[A-Za-z][A-Za-z\d_]*', lambda typ, val: (val, val)), - ('number', r'\d+', lambda typ, val: (typ, int(val))), - ('STRING', r"'[^']*'", lambda typ, val: ('string', val[1:-1])), - ('OTHER', r'[:;\|\(\),]', lambda typ, val: (val, val)), - ('SKIP', r'[ ]', None) - ] - super().__init__(tok_spec) - - def tokenize(self, txt): - lines = txt.split('\n') - header_lines = [] - section = 0 - for line in lines: - loc = SourceLocation(self.filename, 0, 0, 0) - line = line.strip() - if not line: - continue # Skip empty lines - elif line == '%%': - section += 1 - if section == 1: - yield Token('header', header_lines, loc) - yield Token('%%', '%%', loc) - else: - if section == 0: - header_lines.append(line) - else: - # we could use yield from below, but python 3.2 does not work then: - for tk in super().tokenize(line): - yield tk - - -class Rule: - """ A rewrite rule. Specifies a tree that can be rewritten into a result - at a specific cost """ - def __init__(self, non_term, tree, cost, acceptance, template): - self.non_term = non_term - self.tree = tree - self.cost = cost - self.acceptance = acceptance - self.template = template - self.nr = 0 - - def __repr__(self): - return '{} -> {} ${}'.format(self.non_term, self.tree, self.cost) - - -class Symbol: - def __init__(self, name): - self.name = name - - -class Term(Symbol): - pass - - -class Nonterm(Symbol): - def __init__(self, name): - super().__init__(name) - self.chain_rules = [] - - -class BurgSystem: - def __init__(self): - self.rules = [] - self.symbols = {} - self.goal = None - - def symType(self, t): - return (s.name for s in self.symbols.values() if type(s) is t) - - terminals = property(lambda s: s.symType(Term)) - - non_terminals = property(lambda s: s.symType(Nonterm)) - - def add_rule(self, non_term, tree, cost, acceptance, template): - template = template.strip() - if not template: - template = 'pass' - rule = Rule(non_term, tree, cost, acceptance, template) - if len(tree.children) == 0 and tree.name not in self.terminals: - self.non_term(tree.name).chain_rules.append(rule) - self.non_term(rule.non_term) - self.rules.append(rule) - rule.nr = len(self.rules) - - def non_term(self, name): - if name in self.terminals: - raise BurgError('Cannot redefine terminal') - if not self.goal: - self.goal = name - return self.install(name, Nonterm) - - def tree(self, name, *args): - return Tree(name, *args) - - def install(self, name, t): - assert type(name) is str - if name in self.symbols: - assert type(self.symbols[name]) is t - else: - self.symbols[name] = t(name) - return self.symbols[name] - - def add_terminal(self, terminal): - self.install(terminal, Term) - - -class BurgError(Exception): - pass - - -class BurgParser(burg_parser.Parser): - """ Derived from automatically generated parser """ - def parse(self, l): - self.system = BurgSystem() - super().parse(l) - return self.system - - -class BurgGenerator: - def print(self, *args): - """ Print helper function that prints to output file """ - print(*args, file=self.output_file) - - def generate(self, system, output_file): - """ Generate script that implements the burg spec """ - self.output_file = output_file - self.system = system - - self.print('#!/usr/bin/python') - self.print('from tree import Tree, BaseMatcher, State') - for header in self.system.header_lines: - self.print(header) - self.print() - self.print('class Matcher(BaseMatcher):') - self.print(' def __init__(self):') - self.print(' self.kid_functions = {}') - self.print(' self.nts_map = {}') - self.print(' self.pat_f = {}') - for rule in self.system.rules: - kids, dummy = self.compute_kids(rule.tree, 't') - rule.num_nts = len(dummy) - lf = 'lambda t: [{}]'.format(', '.join(kids), rule) - pf = 'self.P{}'.format(rule.nr) - self.print(' # {}: {}'.format(rule.nr, rule)) - self.print(' self.kid_functions[{}] = {}'.format(rule.nr, lf)) - self.print(' self.nts_map[{}] = {}'.format(rule.nr, dummy)) - self.print(' self.pat_f[{}] = {}'.format(rule.nr, pf)) - self.print() - for rule in self.system.rules: - if rule.num_nts > 0: - args = ', '.join('c{}'.format(x) for x in range(rule.num_nts)) - args = ', ' + args - else: - args = '' - # Create template function: - self.print(' def P{}(self, tree{}):'.format(rule.nr, args)) - template = rule.template - for t in template.split(';'): - self.print(' {}'.format(t.strip())) - # Create acceptance function: - if rule.acceptance: - self.print(' def A{}(self, tree):'.format(rule.nr)) - for t in rule.acceptance.split(';'): - self.print(' {}'.format(t.strip())) - self.emit_state() - self.print(' def gen(self, tree):') - self.print(' self.burm_label(tree)') - self.print(' if not tree.state.has_goal("{}"):'.format(self.system.goal)) - self.print(' raise Exception("Tree {} not covered".format(tree))') - self.print(' return self.apply_rules(tree, "{}")'.format(self.system.goal)) - - def emit_record(self, rule, state_var): - # TODO: check for rules fullfilled (by not using 999999) - acc = '' - if rule.acceptance: - acc = ' and self.A{}(tree)'.format(rule.nr) - self.print(' nts = self.nts({})'.format(rule.nr)) - self.print(' kids = self.kids(tree, {})'.format(rule.nr)) - self.print(' if all(x.state.has_goal(y) for x, y in zip(kids, nts)){}:'.format(acc)) - self.print(' c = sum(x.state.get_cost(y) for x, y in zip(kids, nts)) + {}'.format(rule.cost)) - self.print(' tree.state.set_cost("{}", c, {})'.format(rule.non_term, rule.nr)) - for cr in self.system.symbols[rule.non_term].chain_rules: - self.print(' # Chain rule: {}'.format(cr)) - self.print(' tree.state.set_cost("{}", c + {}, {})'.format(cr.non_term, cr.cost, cr.nr)) - - def emit_state(self): - """ Emit a function that assigns a new state to a node """ - self.print(' def burm_state(self, tree):') - self.print(' tree.state = State()') - for term in self.system.terminals: - self.emitcase(term) - self.print() - - def emitcase(self, term): - rules = [rule for rule in self.system.rules if rule.tree.name == term] - for rule in rules: - condition = self.emittest(rule.tree, 'tree') - self.print(' if {}:'.format(condition)) - self.emit_record(rule, 'state') - - def compute_kids(self, t, root_name): - """ Compute of a pattern the blanks that must be provided from below in the tree """ - if t.name in self.system.non_terminals: - return [root_name], [t.name] - else: - k = [] - nts = [] - for i, c in enumerate(t.children): - pfx = root_name + '.children[{}]'.format(i) - kf, dummy = self.compute_kids(c, pfx) - nts.extend(dummy) - k.extend(kf) - return k, nts - - - def emittest(self, tree, prefix): - """ Generate condition for a tree pattern """ - ct = (c for c in tree.children if c.name not in self.system.non_terminals) - child_tests = (self.emittest(c, prefix + '.children[{}]'.format(i)) for i, c in enumerate(ct)) - child_tests = ('({})'.format(ct) for ct in child_tests) - child_tests = ' and '.join(child_tests) - child_tests = ' and ' + child_tests if child_tests else '' - tst = '{}.name == "{}"'.format(prefix, tree.name) - return tst + child_tests - - -def make_argument_parser(): - """ Constructs an argument parser """ - parser = argparse.ArgumentParser(description='pyburg bottom up rewrite system generator compiler compiler') - parser.add_argument('source', type=argparse.FileType('r'), \ - help='the parser specification') - parser.add_argument('-o', '--output', type=argparse.FileType('w'), \ - default=sys.stdout) - return parser - - -def load_as_module(filename): - """ Load a parser spec file, generate LR tables and create module """ - ob = io.StringIO() - args = argparse.Namespace(source=open(filename), output=ob) - main(args) - - matcher_mod = types.ModuleType('generated_matcher') - exec(ob.getvalue(), matcher_mod.__dict__) - return matcher_mod - - -def main(args): - src = args.source.read() - args.source.close() - - # Parse specification into burgsystem: - l = BurgLexer() - p = BurgParser() - l.feed(src) - burg_system = p.parse(l) - - # Generate matcher: - generator = BurgGenerator() - generator.generate(burg_system, args.output) - - -if __name__ == '__main__': - # Parse arguments: - args = make_argument_parser().parse_args() - main(args) diff -r a7c444404df9 -r 0374c65cb437 python/pyyacc.py --- a/python/pyyacc.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,414 +0,0 @@ -""" - Parser generator script -""" - -from ppci import Token - -EPS = 'EPS' -EOF = 'EOF' - - -class ParserGenerationException(Exception): - """ Raised when something goes wrong during parser generation """ - pass - - -class ParserException(Exception): - """ Raised during a failure in the parsing process """ - pass - - -class Action: - def __repr__(self): - return 'Action' - - def __eq__(self, other): - return str(self) == str(other) - - -class Shift(Action): - def __init__(self, to_state): - self.to_state = to_state - - def __repr__(self): - return 'Shift({})'.format(self.to_state) - - -class Reduce(Action): - def __init__(self, rule): - self.rule = rule - - def __repr__(self): - return 'Reduce({})'.format(self.rule) - - -class Accept(Reduce): - def __repr__(self): - return 'Accept({})'.format(self.rule) - - -def print_grammar(g): - """ Pretty print a grammar """ - print(g) - for production in g.productions: - print(production) - - -def calculate_first_sets(grammar): - """ - Calculate first sets for each grammar symbol - This is a dictionary which maps each grammar symbol - to a set of terminals that can be encountered first - when looking for the symbol. - """ - first = {} - nullable = {} - for terminal in grammar.terminals | {EOF, EPS}: - first[terminal] = set([terminal]) - nullable[terminal] = False - for nt in grammar.nonterminals: - first[nt] = set() - nullable[nt] = False - while True: - some_change = False - for rule in grammar.productions: - # Check for null-ability: - if all(nullable[beta] for beta in rule.symbols): - if not nullable[rule.name]: - nullable[rule.name] = True - some_change = True - # Update first sets: - for beta in rule.symbols: - if not nullable[beta]: - if first[beta] - first[rule.name]: - first[rule.name] |= first[beta] - some_change = True - break - if not some_change: - break - return first - - -class Grammar: - """ Defines a grammar of a language """ - def __init__(self, terminals): - self.terminals = set(terminals) - self.nonterminals = set() - self.productions = [] - self._first = None # Cached first set - self.start_symbol = None - - def __repr__(self): - return 'Grammar with {} rules'.format(len(self.productions)) - - def add_production(self, name, symbols, f=None): - """ Add a production rule to the grammar """ - production = Production(name, symbols, f) - self.productions.append(production) - if name in self.terminals: - raise ParserGenerationException("Cannot redefine terminal {0}".format(name)) - self.nonterminals.add(name) - self._first = None # Invalidate cached version - - def add_one_or_more(self, element_nonterm, list_nonterm): - """ Helper to add the rule - lst: elem - lst: lst elem - """ - def a(el): - return [el] - def b(ls, el): - ls.append(el) - return ls - self.add_production(list_nonterm, [element_nonterm], a) - self.add_production(list_nonterm, [list_nonterm, element_nonterm], b) - - def productionsForName(self, name): - """ Retrieve all productions for a non terminal """ - return [p for p in self.productions if p.name == name] - - @property - def Symbols(self): - """ Get all the symbols defined by this grammar """ - return self.nonterminals | self.terminals - - @property - def first(self): - """ - The first set is a mapping from a grammar symbol to a set of - set of all terminal symbols that can be the first terminal when - looking for the grammar symbol - """ - if not self._first: - self._first = calculate_first_sets(self) - return self._first - - def closure(self, itemset): - """ Expand itemset by using epsilon moves """ - worklist = list(itemset) - def addIt(itm): - if not itm in itemset: - itemset.add(itm) - worklist.append(itm) - def first2(itm): - # When using the first sets, create a copy: - f = set(self.first[itm.NextNext]) - if EPS in f: - f.discard(EPS) - f.add(itm.look_ahead) - return f - # Start of algorithm: - while worklist: - item = worklist.pop(0) - if not item.IsShift: - continue - if not (item.Next in self.nonterminals): - continue - C = item.Next - for add_p in self.productionsForName(C): - for b in first2(item): - addIt(Item(add_p, 0, b)) - return frozenset(itemset) - - def initialItemSet(self): - """ Calculates the initial item set """ - iis = set() - for p in self.productionsForName(self.start_symbol): - iis.add(Item(p, 0, EOF)) - return self.closure(iis) - - def nextItemSet(self, itemset, symbol): - """ - Determines the next itemset for the current set and a symbol - This is the goto procedure - """ - next_set = set() - for item in itemset: - if item.can_shift_over(symbol): - next_set.add(item.shifted()) - return self.closure(next_set) - - def genCanonicalSet(self, iis): - states = [] - worklist = [] - transitions = {} - def addSt(s): - if not (s in states): - worklist.append(s) - states.append(s) - addSt(iis) - while len(worklist) > 0: - itemset = worklist.pop(0) - for symbol in self.Symbols: - nis = self.nextItemSet(itemset, symbol) - if not nis: - continue - addSt(nis) - transitions[(states.index(itemset), symbol)] = states.index(nis) - return states, transitions - - def checkSymbols(self): - """ Checks no symbols are undefined """ - for production in self.productions: - for symbol in production.symbols: - if symbol not in self.Symbols: - raise ParserGenerationException('Symbol {0} undefined'.format(symbol)) - - def generate_parser(self): - """ Generates a parser from the grammar """ - action_table, goto_table = self.generate_tables() - p = LRParser(action_table, goto_table, self.start_symbol) - p.grammar = self - return p - - def generate_tables(self): - """ Generate parsing tables """ - if not self.start_symbol: - self.start_symbol = self.productions[0].name - self.checkSymbols() - action_table = {} - goto_table = {} - iis = self.initialItemSet() - - # First generate all item sets by using the nextItemset function: - states, transitions = self.genCanonicalSet(iis) - - def setAction(state, t, action): - assert isinstance(action, Action) - key = (state, t) - assert type(state) is int - assert type(t) is str - if key in action_table: - action2 = action_table[key] - if action != action2: - if (type(action2) is Reduce) and (type(action) is Shift): - # Automatically resolve and do the shift action! - # Simple, but almost always what you want!! - action_table[key] = action - elif isinstance(action2, Shift) and isinstance(action, Reduce): - pass - else: - a1 = str(action) - a2 = str(action2) - raise ParserGenerationException('LR construction conflict {0} vs {1}'.format(a1, a2)) - else: - action_table[key] = action - - # Fill action table: - for state in states: - # Detect conflicts: - for item in state: - if item.IsShift and item.Next in self.terminals: - # Rule 1, a shift item: - nextstate = transitions[(states.index(state), item.Next)] - setAction(states.index(state), item.Next, Shift(nextstate)) - if item.IsReduce: - if item.production.name == self.start_symbol and item.look_ahead == EOF: - # Rule 3: accept: - act = Accept(self.productions.index(item.production)) - else: - # Rule 2, reduce item: - act = Reduce(self.productions.index(item.production)) - setAction(states.index(state), item.look_ahead, act) - for nt in self.nonterminals: - key = (states.index(state), nt) - if key in transitions: - goto_table[key] = transitions[key] - return action_table, goto_table - - -class Production: - """ Production rule for a grammar """ - def __init__(self, name, symbols, f): - self.name = name - self.symbols = symbols - self.f = f - - def __repr__(self): - action = ' ' + str(self.f) if self.f else '' - return '{0} -> {1}'.format(self.name, self.symbols) + action - - -class Item: - """ - Represents a partially parsed item - It has a production it is looking for, a position - in this production called the 'dot' and a look ahead - symbol that must follow this item. - """ - def __init__(self, production, dotpos, look_ahead): - self.production = production - self.dotpos = dotpos - assert self.dotpos <= len(self.production.symbols) - self.look_ahead = look_ahead - self._is_shift = self.dotpos < len(self.production.symbols) - self.IsShift = self._is_shift - if self.IsShift: - self.Next = self.production.symbols[self.dotpos] - self._data = (self.production, self.dotpos, self.look_ahead) - self._hash = self._data.__hash__() - - def __eq__(self, other): - if type(other) is type(self): - return self._data == other._data - return False - - def __hash__(self): - return self._hash - - @property - def IsReduce(self): - """ Check if this item has the dot at the end """ - return not self._is_shift - - def can_shift_over(self, symbol): - """ Determines if this item can shift over the given symbol """ - return self._is_shift and self.Next == symbol - - def shifted(self): - """ Creates a new item that is shifted one position """ - return Item(self.production, self.dotpos + 1, self.look_ahead) - - @property - def NextNext(self): - """ Gets the symbol after the next symbol, or EPS if at the end """ - if self.dotpos + 1 >= len(self.production.symbols): - return EPS - else: - return self.production.symbols[self.dotpos + 1] - - def __repr__(self): - prod = self.production - predot = ' '.join(prod.symbols[0:self.dotpos]) - postdot = ' '.join(prod.symbols[self.dotpos:]) - name = prod.name - args = (name, predot, postdot, self.look_ahead) - return '[{0} -> {1} . {2} -> {3}]'.format(*args) - - -class LRParser: - """ LR parser automata. This class takes goto and action table - and can then process a sequence of tokens. - """ - def __init__(self, action_table, goto_table, start_symbol): - self.action_table = action_table - self.goto_table = goto_table - self.start_symbol = start_symbol - - def parse(self, lexer): - """ Parse an iterable with tokens """ - assert hasattr(lexer, 'next_token'), '{0} is no lexer'.format(type(lexer)) - stack = [0] - r_data_stack = [] - look_ahead = lexer.next_token() - assert type(look_ahead) is Token - # TODO: exit on this condition: - while stack != [0, self.start_symbol, 0]: - state = stack[-1] # top of stack - key = (state, look_ahead.typ) - if not key in self.action_table: - raise ParserException('Error parsing at character {0}'.format(look_ahead)) - action = self.action_table[key] - if type(action) is Reduce: - f_args = [] - prod = self.grammar.productions[action.rule] - for s in prod.symbols: - stack.pop() - stack.pop() - f_args.append(r_data_stack.pop()) - f_args.reverse() - r_data = None - if prod.f: - r_data = prod.f(*f_args) - state = stack[-1] - stack.append(prod.name) - stack.append(self.goto_table[(state, prod.name)]) - r_data_stack.append(r_data) - elif type(action) is Shift: - stack.append(look_ahead.typ) - stack.append(action.to_state) - r_data_stack.append(look_ahead) - look_ahead = lexer.next_token() - assert type(look_ahead) is Token - elif type(action) is Accept: - # Pop last rule data off the stack: - f_args = [] - param = self.grammar.productions[action.rule] - for s in param.symbols: - stack.pop() - stack.pop() - f_args.append(r_data_stack.pop()) - f_args.reverse() - if param.f: - ret_val = param.f(*f_args) - else: - ret_val = None - # Break out! - stack.append(param.name) - stack.append(0) - break - # At exit, the stack must be 1 long - # TODO: fix that this holds: - #assert stack == [0, self.start_symbol, 0] - return ret_val - diff -r a7c444404df9 -r 0374c65cb437 python/tree.py --- a/python/tree.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,57 +0,0 @@ - -class Tree: - """ Tree node with a name and possibly some child nodes """ - def __init__(self, name, *args): - self.name = name - self.value = None - self.children = args - - def __repr__(self): - if self.children: - ch = ', '.join(str(c) for c in self.children) - return '{}({})'.format(self.name, ch) - else: - return '{}'.format(self.name) - - -class State: - """ State used to label tree nodes """ - def __init__(self): - self.labels = {} - - def has_goal(self, goal): - return goal in self.labels - - def get_cost(self, goal): - return self.labels[goal][0] - - def get_rule(self, goal): - return self.labels[goal][1] - - def set_cost(self, goal, cost, rule): - if self.has_goal(goal): - if self.get_cost(goal) > cost: - self.labels[goal] = (cost, rule) - else: - self.labels[goal] = (cost, rule) - - -class BaseMatcher: - """ Base class for matcher objects. """ - def kids(self, tree, rule): - return self.kid_functions[rule](tree) - - def nts(self, rule): - return self.nts_map[rule] - - def burm_label(self, tree): - """ Label all nodes in the tree bottom up """ - for c in tree.children: - self.burm_label(c) - self.burm_state(tree) - - def apply_rules(self, tree, goal): - rule = tree.state.get_rule(goal) - results = [self.apply_rules(kid_tree, kid_goal) - for kid_tree, kid_goal in zip(self.kids(tree, rule), self.nts(rule))] - return self.pat_f[rule](tree, *results) diff -r a7c444404df9 -r 0374c65cb437 python/yacc.py --- a/python/yacc.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,277 +0,0 @@ -#!/usr/bin/python - -""" -Parser generator utility. This script can generate a python script from a -grammar description. - -Invoke the script on a grammar specification file: - -.. code:: - - $ ./yacc.py test.x -o test_parser.py - -And use the generated parser by deriving a user class: - - -.. code:: - - import test_parser - class MyParser(test_parser.Parser): - pass - p = MyParser() - p.parse() - - -Alternatively you can load the parser on the fly: - -.. code:: - - import yacc - parser_mod = yacc.load_as_module('mygrammar.x') - class MyParser(parser_mod.Parser): - pass - p = MyParser() - p.parse() - -""" - -import argparse -import re -import sys -import datetime -import types -import io -import logging -from pyyacc import Grammar -from baselex import BaseLexer -from ppci import Token, SourceLocation - - -class XaccLexer(BaseLexer): - def __init__(self): - tok_spec = [ - ('ID', r'[A-Za-z][A-Za-z\d_]*', lambda typ, val: (typ, val)), - ('STRING', r"'[^']*'", lambda typ, val: ('ID', val[1:-1])), - ('BRACEDCODE', r"\{[^\}]*\}", lambda typ, val: (typ, val)), - ('OTHER', r'[:;\|]', lambda typ, val: (val, val)), - ('SKIP', r'[ ]', None) - ] - super().__init__(tok_spec) - - def tokenize(self, txt): - lines = txt.split('\n') - section = 0 - for line in lines: - line = line.strip() - loc = SourceLocation(self.filename, 0, 0, 0) - if not line: - continue # Skip empty lines - if line == '%%': - section += 1 - yield Token('%%', '%%', loc) - continue - if section == 0: - if line.startswith('%tokens'): - yield Token('%tokens', '%tokens', loc) - for tk in super().tokenize(line[7:]): - yield tk - else: - yield Token('HEADER', line, loc) - elif section == 1: - for tk in super().tokenize(line): - yield tk - - -class ParseError(Exception): - pass - - -class XaccParser: - """ Implements a recursive descent parser to parse grammar rules. - We could have made an generated parser, but that would yield a chicken - egg issue. - """ - def __init__(self): - pass - - def prepare_peak(self, lexer): - self.lexer = lexer - self.look_ahead = self.lexer.next_token() - - @property - def Peak(self): - """ Sneak peak to the next token in line """ - return self.look_ahead.typ - - def next_token(self): - """ Take the next token """ - token = self.look_ahead - self.look_ahead = self.lexer.next_token() - return token - - def consume(self, typ): - """ Eat next token of type typ or raise an exception """ - if self.Peak == typ: - return self.next_token() - else: - raise ParseError('Expected {}, but got {}'.format(typ, self.Peak)) - - def has_consumed(self, typ): - """ Consume typ if possible and return true if so """ - if self.Peak == typ: - self.consume(typ) - return True - return False - - def parse_grammar(self, lexer): - """ Entry parse function into recursive descent parser """ - self.prepare_peak(lexer) - # parse header - self.headers = [] - terminals = [] - while self.Peak in ['HEADER', '%tokens']: - if self.Peak == '%tokens': - self.consume('%tokens') - while self.Peak == 'ID': - terminals.append(self.consume('ID').val) - else: - self.headers.append(self.consume('HEADER').val) - self.consume('%%') - self.grammar = Grammar(terminals) - while self.Peak != 'EOF': - self.parse_rule() - return self.grammar - - def parse_symbol(self): - return self.consume('ID').val - - def parse_rhs(self): - """ Parse the right hand side of a rule definition """ - symbols = [] - while self.Peak not in [';', 'BRACEDCODE', '|']: - symbols.append(self.parse_symbol()) - if self.Peak == 'BRACEDCODE': - action = self.consume('BRACEDCODE').val - action = action[1:-1].strip() - else: - action = None - return symbols, action - - def parse_rule(self): - """ Parse a rule definition """ - p = self.parse_symbol() - self.consume(':') - symbols, action = self.parse_rhs() - self.grammar.add_production(p, symbols, action) - while self.has_consumed('|'): - symbols, action = self.parse_rhs() - self.grammar.add_production(p, symbols, action) - self.consume(';') - - -class XaccGenerator: - """ Generator that writes generated parser to file """ - def __init__(self): - self.logger = logging.getLogger('yacc') - - def generate(self, grammar, headers, output_file): - self.output_file = output_file - self.grammar = grammar - self.headers = headers - self.logger.info('Generating parser for grammar {}'.format(grammar)) - self.action_table, self.goto_table = grammar.generate_tables() - self.generate_python_script() - - def print(self, *args): - """ Print helper function that prints to output file """ - print(*args, file=self.output_file) - - def generate_python_script(self): - """ Generate python script with the parser table """ - self.print('#!/usr/bin/python') - stamp = datetime.datetime.now().ctime() - self.print('""" Automatically generated by xacc on {} """'.format(stamp)) - self.print('from pyyacc import LRParser, Reduce, Shift, Accept, Production, Grammar') - self.print('from ppci import Token') - self.print('') - for h in self.headers: - print(h, file=output_file) - self.print('') - self.print('class Parser(LRParser):') - self.print(' def __init__(self):') - # Generate rules: - self.print(' self.start_symbol = "{}"'.format(self.grammar.start_symbol)) - self.print(' self.grammar = Grammar({})'.format(self.grammar.terminals)) - for rule_number, rule in enumerate(self.grammar.productions): - rule.f_name = 'action_{}_{}'.format(rule.name, rule_number) - self.print(' self.grammar.add_production("{}", {}, self.{})'.format(rule.name, rule.symbols, rule.f_name)) - # Fill action table: - self.print(' self.action_table = {}') - for state in self.action_table: - action = self.action_table[state] - self.print(' self.action_table[{}] = {}'.format(state, action)) - self.print('') - - # Fill goto table: - self.print(' self.goto_table = {}') - for state_number in self.goto_table: - to = self.goto_table[state_number] - self.print(' self.goto_table[{}] = {}'.format(state_number, to)) - self.print('') - - # Generate a function for each action: - for rule in self.grammar.productions: - num_symbols = len(rule.symbols) - args = ', '.join('arg{}'.format(n + 1) for n in range(num_symbols)) - self.print(' def {}(self, {}):'.format(rule.f_name, args)) - if rule.f == None: - semantics = 'pass' - else: - semantics = str(rule.f) - if semantics.strip() == '': - semantics = 'pass' - for n in range(num_symbols): - semantics = semantics.replace('${}'.format(n + 1), 'arg{}'.format(n + 1)) - self.print(' {}'.format(semantics)) - self.print('') - - -def make_argument_parser(): - # Parse arguments: - parser = argparse.ArgumentParser(description='xacc compiler compiler') - parser.add_argument('source', type=argparse.FileType('r'), \ - help='the parser specification') - parser.add_argument('-o', '--output', type=argparse.FileType('w'), \ - default=sys.stdout) - return parser - - -def load_as_module(filename): - """ Load a parser spec file, generate LR tables and create module """ - ob = io.StringIO() - args = argparse.Namespace(source=open(filename), output=ob) - main(args) - - parser_mod = types.ModuleType('generated_parser') - exec(ob.getvalue(), parser_mod.__dict__) - return parser_mod - - -def main(args): - src = args.source.read() - args.source.close() - - # Construction of generator parts: - lexer = XaccLexer() - parser = XaccParser() - generator = XaccGenerator() - - # Sequence source through the generator parts: - lexer.feed(src) - grammar = parser.parse_grammar(lexer) - generator.generate(grammar, parser.headers, args.output) - - -if __name__ == '__main__': - args = make_argument_parser().parse_args() - main(args) diff -r a7c444404df9 -r 0374c65cb437 python/zcc.py --- a/python/zcc.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,63 +0,0 @@ -#!/usr/bin/env python - -import sys -import argparse -import logging - -from ppci.report import RstFormatter -from ppci.buildfunctions import construct -import ppci.buildtasks # Include not used, but it registers build tasks. -import ppci - - -def logLevel(s): - """ Converts a string to a valid logging level """ - numeric_level = getattr(logging, s.upper(), None) - if not isinstance(numeric_level, int): - raise ValueError('Invalid log level: {}'.format(s)) - return numeric_level - - -def make_parser(): - parser = argparse.ArgumentParser(description='lcfos Compiler') - - parser.add_argument('--log', help='Log level (INFO,DEBUG,[WARN])', - type=logLevel, default='INFO') - parser.add_argument('--report', - help='Specify a file to write the compile report to', - type=argparse.FileType('w')) - parser.add_argument('--buildfile', - help='use buildfile, otherwise build.xml is the default', - default='build.xml') - - parser.add_argument('targets', metavar='target', nargs='*') - return parser - - -def main(args): - # Configure some logging: - logging.getLogger().setLevel(logging.DEBUG) - ch = logging.StreamHandler() - ch.setFormatter(logging.Formatter(ppci.logformat)) - ch.setLevel(args.log) - logging.getLogger().addHandler(ch) - - if args.report: - fh = logging.StreamHandler(args.report) - fh.setFormatter(RstFormatter()) - logging.getLogger().addHandler(fh) - - res = construct(args.buildfile, args.targets) - - if args.report: - logging.getLogger().removeHandler(fh) - args.report.close() - - logging.getLogger().removeHandler(ch) - return res - - -if __name__ == '__main__': - parser = make_parser() - arguments = parser.parse_args() - sys.exit(main(arguments)) diff -r a7c444404df9 -r 0374c65cb437 test/testarmasm.py --- a/test/testarmasm.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,168 +0,0 @@ -import unittest -import io -from ppci.outstream import BinaryOutputStream -from ppci.objectfile import ObjectFile -from testasm import AsmTestCaseBase -from ppci.target.target_list import arm_target -from ppci.layout import load_layout - - -class ArmAssemblerTestCase(AsmTestCaseBase): - """ ARM-mode (not thumb-mode) instruction assembly test case """ - def setUp(self): - self.target = arm_target - self.obj = ObjectFile() - self.ostream = BinaryOutputStream(self.obj) - self.ostream.select_section('code') - self.assembler = arm_target.assembler - self.assembler.prepare() - - def testMovImm(self): - self.feed('mov r4, 100') - self.check('6440a0e3') - - def testMovImm2(self): - self.feed('mov sp, 0x6000') - self.check('06daa0e3') - - def testMovReg(self): - self.feed('mov r3, sp') - self.feed('mov pc, lr') - self.feed('mov pc, r2') - self.feed('mov sp, r4') - self.feed('mov r5, r6') - self.check('0d30a0e1 0ef0a0e1 02f0a0e1 04d0a0e1 0650a0e1') - - def testAdd2(self): - self.feed('add r12, r11, 300') - self.check('4bcf8be2') - - def testAdd1(self): - self.feed('add r9, r7, r2') - self.check('029087e0') - - def testSub1(self): - self.feed('sub r5, r6, r2') - self.check('025046e0') - - def testSub2(self): - self.feed('sub r0, r1, 0x80000001') - self.check('060141e2') - - def testAnd1(self): - self.feed('and r9, r0, r2') - self.feed('and r4, r8, r6') - self.check('029000e0 064008e0') - - def testOrr1(self): - self.feed('orr r8, r7, r6') - self.check('068087e1') - - def testLsl(self): - self.feed('lsl r11, r5, r3') - self.feed('lsl r4, r8, r6') - self.check('15b3a0e1 1846a0e1') - - def testLsr(self): - self.feed('lsr r9, r0, r2') - self.feed('lsr r4, r8, r6') - self.check('3092a0e1 3846a0e1') - - def testBranches(self): - self.feed("""b sjakie - ble sjakie - bgt sjakie - beq sjakie - bl sjakie - sjakie: - b sjakie - ble sjakie - bgt sjakie - beq sjakie - bl sjakie""") - self.check('030000ea 020000da 010000ca 0000000a ffffffeb feffffea \ - fdffffda fcffffca fbffff0a faffffeb') - - def testPush(self): - self.feed('push {r11,r5,r4,lr}') - self.check('30482de9') - - def testPop(self): - self.feed('pop {r4,r5,r6}') - self.check('7000bde8') - - def testStr(self): - self.feed('str r9, [r2, 33]') - self.check('219082e5') - - def testLdr(self): - self.feed('ldr r5, [r3, 87]') - self.check('575093e5') - - def testLdrLabel(self): - self.feed('ldr r5, lab1') - self.feed('ldr r11, lab1') - self.feed('ldr r10, lab1') - self.feed('lab1:') - self.feed('dcd 0x12345566') - self.check('04509fe5 00b09fe5 04a01fe5 66553412') - - def testAdr(self): - self.feed('adr r5, cval') - self.feed('adr r9, cval') - self.feed('adr r8, cval') - self.feed('cval:') - self.feed('adr r11, cval') - self.feed('adr r12, cval') - self.feed('adr r1, cval') - self.check('04508fe2 00908fe2 04804fe2 08b04fe2 0cc04fe2 10104fe2') - - def testLdrLabelAddress(self): - self.feed('ldr r8, =a') - self.feed('a:') - self.check('04801fe5 04000000') - - def testLdrLabelAddressAt10000(self): - """ Link code at 0x10000 and check if symbol was correctly patched """ - self.feed('ldr r8, =a') - self.feed('a:') - spec = """ - MEMORY flash LOCATION=0x10000 SIZE=0x10000 { - SECTION(code) - } - """ - layout = load_layout(io.StringIO(spec)) - self.check('04801fe5 04000100', layout) - - def testCmp(self): - self.feed('cmp r4, r11') - self.feed('cmp r5, 0x50000') - self.check('0b0054e1 050855e3') - - def testSequence1(self): - self.feed('sub r4,r5,23') - self.feed('blt x') - self.feed('x:') - self.feed('mul r4,r5,r2') - self.check('174045e2 ffffffba 950204e0') - - def testMcr(self): - """ Test move coprocessor register from arm register """ - self.feed('mcr p15, 0, r1, c2, c0, 0') - self.feed('mcr p14, 0, r1, c8, c7, 0') - self.check('101f02ee 171e08ee') - - def testMrc(self): - self.feed('mrc p15, 0, r1, c2, c0, 0') - self.feed('mrc p14, 0, r1, c8, c7, 0') - self.check('101f12ee 171e18ee') - - def testRepeat(self): - self.feed('repeat 0x5') - self.feed('dcd 0x11') - self.feed('endrepeat') - self.check('11000000 11000000 11000000 11000000 11000000') - - -if __name__ == '__main__': - unittest.main() diff -r a7c444404df9 -r 0374c65cb437 test/testasm.py --- a/test/testasm.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,72 +0,0 @@ -#!/usr/bin/python - -import unittest -from ppci import CompilerError -from ppci.assembler import AsmLexer -from ppci.objectfile import ObjectFile -from ppci.outstream import BinaryOutputStream -from ppci.target.basetarget import Label -from ppci.buildfunctions import link -from ppci.layout import Layout - - -class AssemblerLexingCase(unittest.TestCase): - """ Tests the assemblers lexer """ - - def setUp(self): - self.lexer = AsmLexer([]) - - def do(self, asmline, toks): - output = [] - self.lexer.feed(asmline) - while 'EOF' not in output: - output.append(self.lexer.next_token().typ) - self.assertSequenceEqual(toks, output) - - def testLex0(self): - """ Check if the lexer is OK """ - asmline = 'mov rax, rbx ' - toks = ['ID', 'ID', ',', 'ID', 'EOF'] - self.do(asmline, toks) - - def testLex1(self): - """ Test if lexer correctly maps some tokens """ - asmline = 'lab1: mov rax, rbx ' - toks = ['ID', ':', 'ID', 'ID', ',', 'ID', 'EOF'] - self.do(asmline, toks) - - def testLex2(self): - """ Test if lexer correctly maps some tokens """ - asmline, toks = 'mov 3.13 0xC 13', ['ID', 'REAL', 'val5', 'val5', 'EOF'] - self.do(asmline, toks) - - def testLex3(self): - """ Test if lexer fails on a token that is invalid """ - asmline = '0z4: mov rax, rbx $ ' - with self.assertRaises(CompilerError): - self.do(asmline, []) - - -class OustreamTestCase(unittest.TestCase): - def test1(self): - obj = ObjectFile() - o = BinaryOutputStream(obj) - o.select_section('.text') - o.emit(Label('a')) - self.assertSequenceEqual(bytes(), obj.get_section('.text').data) - - -class AsmTestCaseBase(unittest.TestCase): - """ Base testcase for assembly """ - def feed(self, line): - self.assembler.assemble(line, self.ostream) - - def check(self, hexstr, layout=Layout()): - self.assembler.flush() - self.obj = link([self.obj], layout, self.target) - data = bytes(self.obj.get_section('code').data) - self.assertSequenceEqual(bytes.fromhex(hexstr), data) - - -if __name__ == '__main__': - unittest.main() diff -r a7c444404df9 -r 0374c65cb437 test/testc3.py --- a/test/testc3.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,649 +0,0 @@ -import unittest -import logging -import io -from ppci.c3 import Builder, Lexer -from ppci.target import SimpleTarget -import ppci - - -class testLexer(unittest.TestCase): - def setUp(self): - diag = ppci.DiagnosticsManager() - self.l = Lexer(diag) - - def testUnexpectedCharacter(self): - snippet = io.StringIO(""" var s \u6c34 """) - with self.assertRaises(ppci.CompilerError): - list(self.l.lex(snippet)) - - def check(self, snippet, toks): - toks2 = list(tok.typ for tok in self.l.lex(io.StringIO(snippet))) - self.assertSequenceEqual(toks, toks2) - - def testBlockComment(self): - snippet = """ - /* Demo */ - var int x = 0; - """ - toks = ['var', 'ID', 'ID', '=', 'NUMBER', ';', 'EOF'] - self.check(snippet, toks) - - def testBlockCommentMultiLine(self): - snippet = """ - /* Demo - bla1 - bla2 - */ - var int x = 0; - """ - toks = ['var', 'ID', 'ID', '=', 'NUMBER', ';', 'EOF'] - self.check(snippet, toks) - - -class testBuilder(unittest.TestCase): - def setUp(self): - self.diag = ppci.DiagnosticsManager() - self.builder = Builder(self.diag, SimpleTarget()) - self.diag.clear() - # Add a null logging handler to disable warning log messages: - nh = logging.NullHandler() - logging.getLogger().addHandler(nh) - - def makeFileList(self, snippet): - """ Try to make a list with opened files """ - if type(snippet) is list: - l2 = [] - for s in snippet: - if type(s) is str: - l2.append(io.StringIO(s)) - else: - l2.append(s) - return l2 - else: - return [io.StringIO(snippet)] - - def expectErrors(self, snippet, rows): - """ Helper to test for expected errors on rows """ - list(self.builder.build([io.StringIO(snippet)])) - actualErrors = [err.row for err in self.diag.diags] - if rows != actualErrors: - self.diag.printErrors() - self.assertSequenceEqual(rows, actualErrors) - - def expectOK(self, snippet): - """ Expect a snippet to be OK """ - ircode = list(self.builder.build(self.makeFileList(snippet))) - if len(self.diag.diags) > 0: - self.diag.printErrors() - self.assertTrue(all(ircode)) - self.assertEqual(0, len(self.diag.diags)) - return ircode - - def testPackage(self): - p1 = """module p1; - type int A; - """ - p2 = """module p2; - import p1; - var p1.A b; - """ - self.expectOK([p1, p2]) - - def testPackageMutual(self): - p1 = """module p1; - import p2; - type int A; - var p2.B b; - """ - p2 = """module p2; - import p1; - var p1.A a; - """ - self.expectOK([p1, p2]) - - def testConstant(self): - snip = """module C; - const int a = 2; - """ - self.expectOK(snip) - - @unittest.skip('Not checked yet') - def testConstantMutual(self): - snip = """module C; - const int a = b + 1; - const int b = a + 1; - function void f() - { - return b; - } - """ - self.expectOK(snip) - - def testPackageNotExists(self): - p1 = """module p1; - import p23; - """ - self.expectErrors(p1, [0]) - - def testFunctArgs(self): - snippet = """ - module testargs; - function void t2(int a, double b) - { - t2(2, 2); - t2(2); - t2(1, 1.2); - } - """ - self.expectErrors(snippet, [5, 6]) - - def testReturn(self): - snippet = """ - module testreturn; - function void t() - { - return; - } - """ - self.expectOK(snippet) - - def testReturn2(self): - snippet = """ - module testreturn; - function int t() - { - return 2; - } - """ - self.expectOK(snippet) - - def testExpressions(self): - snippet = """ - module test; - function void t(int a, double b) - { - var int a2; - var bool c; - - a2 = b * a; - c = a; - } - """ - self.expectErrors(snippet, [8, 9]) - - def testExpression1(self): - snippet = """ - module testexpr1; - function void t() - { - var int a, b, c; - a = 1; - b = a * 2 + a * a; - c = b * a - 3; - } - """ - self.expectOK(snippet) - - def testEmpty(self): - snippet = """ - module A - """ - self.expectErrors(snippet, [3]) - - def testEmpty2(self): - snippet = "" - self.expectErrors(snippet, [1]) - - def testRedefine(self): - snippet = """ - module test; - var int a; - var int b; - var int a; - """ - self.expectErrors(snippet, [5]) - - def testWhile(self): - snippet = """ - module tstwhile; - function void t() - { - var int i; - i = 0; - while (i < 1054) - { - i = i + 3; - } - } - """ - self.expectOK(snippet) - - def testWhile2(self): - snippet = """ - module tstwhile; - function void t() - { - while(true) - { - } - - while(false) - { - } - } - """ - self.expectOK(snippet) - - def testIf(self): - snippet = """ - module tstIFF; - function void t(int b) - { - var int a; - a = 2; - if (a > b) - { - if (a > 1337) - { - b = 2; - } - } - else - { - b = 1; - } - - return b; - } - """ - self.expectOK(snippet) - - def testAndCondition(self): - snippet = """ - module tst; - function void t() { - if (4 > 3 and 1 < 10) { - } - } - """ - self.expectOK(snippet) - - def testOrCondition(self): - snippet = """ - module tst; - function void t() { - if (3 > 4 or 3 < 10) { - } - } - """ - self.expectOK(snippet) - - def testNonBoolCondition(self): - snippet = """ - module tst; - function void t() { - if (3+3) { - } - } - """ - self.expectErrors(snippet, [4]) - - def testTypeDef(self): - snippet = """ - module testtypedef; - type int my_int; - function void t() - { - var my_int a; - var int b; - a = 2; - b = a + 2; - } - """ - self.expectOK(snippet) - - def testLocalVariable(self): - snippet = """ - module testlocalvar; - function void t() - { - var int a, b; - a = 2; - b = a + 2; - } - """ - self.expectOK(snippet) - - def testUnknownType(self): - snippet = """module testlocalvar; - function void t() - { - var int2 a; - } - """ - self.expectErrors(snippet, [4]) - - def testStruct1(self): - snippet = """ - module teststruct1; - function void t() - { - var struct {int x, y;} a; - a.x = 2; - a.y = a.x + 2; - } - """ - self.expectOK(snippet) - - def testStruct2(self): - """ Select struct member from non struct type """ - snippet = """ - module teststruct1; - function void t() { - var int a; - a.z = 2; - } - """ - self.expectErrors(snippet, [5]) - - def testArray(self): - snippet = """ - module testarray; - function void t() - { - var int[100] x; - var int a, b; - a = 2; - b = x[a*2+9 - a] * x[22+12]; - x[1] = x[2]; - } - """ - self.expectOK(snippet) - - def testArrayFail(self): - snippet = """ - module testarray; - function void t() - { - var bool c; - c = false; - var int[100] x; - x[1] = x[c]; - } - """ - self.expectErrors(snippet, [8]) - - def testArrayFail2(self): - snippet = """ - module testarray; - function void t() - { - var int c; - var int x; - c = x[2]; - } - """ - self.expectErrors(snippet, [7]) - - @unittest.skip('TODO') - def testArrayFail3(self): - snippet = """ - module testarray; - function void t() - { - var int c[20]; - } - """ - self.expectErrors(snippet, [7]) - - def testStructCall(self): - snippet = """ - module teststruct1; - function void t() - { - var struct {int x, y;} a; - a.x(9); - } - """ - self.expectErrors(snippet, [6]) - - def testString(self): - snippet = """ - module teststring; - function void t() - { - var string a; - a = "Hello world"; - print(a); - print("Moi"); - } - - function void print(string a) - { - } - """ - self.expectOK(snippet) - - def testSizeof1(self): - snippet = """ - module testsizeof; - - function void t() - { - var int a; - a = sizeof(int*); - } - """ - self.expectOK(snippet) - - def testSizeof2(self): - snippet = """ - module testsizeof2; - - function void t() - { - sizeof(int*) = 2; - } - """ - self.expectErrors(snippet, [6]) - - @unittest.skip('TODO: Too hard') - def testWrongVarUse(self): - snippet = """ - module testsizeof; - - function void t() - { - int a = 1; - } - """ - self.expectOK(snippet) - - def testPointerType1(self): - snippet = """ - module testpointer1; - var int* pa; - function void t() - { - var int a; - pa = &a; - *pa = 22; - a = *pa + *pa * 8; - } - """ - self.expectOK(snippet) - - def testPointerType(self): - snippet = """ - module testpointer; - var int* pa, pb; - function void t(int a, double b) - { - var int a2; - a2 = a; // parameters cannot be escaped for now.. - pa = &a2; - pb = pa; - *pa = 22; - } - """ - self.expectOK(snippet) - - def testPointerTypeInCorrect(self): - snippet = """ - module testpointerincorrect; - var int* pa; - function void t(int a, double b) - { - pa = 2; // type conflict - pa = &a; - pa = &2; // No valid lvalue - &a = pa; // No valid lvalue - **pa = 22; // Cannot deref int - } - """ - self.expectErrors(snippet, [6, 8, 9, 10]) - - def testPointerTypeIr(self): - snippet = """ - module testptr_ir; - function void t() - { - var int* a; - a = cast(40); - *a = 2; - } - """ - self.expectOK(snippet) - - def testPointerTypeIr2(self): - snippet = """ - module testptr_ir; - type struct {int x,y;}* gpio; - function void t() - { - var gpio a; - a = cast(40); - a->x = 2; - a->y = a->x - 14; - } - """ - self.expectOK(snippet) - - def testPointerArithmatic(self): - snippet = """ - module testpointerarithmatic; - function void t() - { - var int* pa; - *(pa+2) = 2; - } - """ - self.expectOK(snippet) - - def testWrongCast(self): - snippet = """ - module testptr_ir; - type struct {int x,y;}* gpio; - function void t() - { - var gpio a; - *cast(*a); - } - """ - self.expectErrors(snippet, [7]) - - def testLinkedList(self): - """ - Test if a struct can contain a field with a pointer to itself - """ - snippet = """ - module testlinkedlist; - - type struct { - int x; - list_t* next; - } list_t; - - function void t() - { - var list_t* a; - a = a->next; - } - """ - self.expectOK(snippet) - - def testInfiniteStruct(self): - """ - Test if a struct can contain a field with itself as type? - This should not be possible! - """ - snippet = """ - module testnestedstruct; - - type struct { - int x; - list_t inner; - } list_t; - - """ - self.expectErrors(snippet, [0]) - - def testMutualStructs(self): - """ - Test if two structs can contain each other! - This should not be possible! - """ - snippet = """ - module testnestedstruct; - - type struct { - int x; - B other; - } A; - - type struct { - int x; - A other; - } B; - - """ - self.expectErrors(snippet, [0]) - - def testComplexType(self): - snippet = """ - module testpointer; - type int my_int; - - type struct { - int x, y; - } point; - - type struct { - int mem1; - int memb2; - point P1; - } my_struct; - - type my_struct* my_sptr; - var int* pa; - - function void t(int a, int b, my_sptr x) - { - var my_struct *msp; - - var my_struct u, v; - var point *pt; - - pt = &msp->P1; - msp = x; - *pa = 22 + u.mem1 * v.memb2 - u.P1.x; - x->memb2 = *pa + a * b; - - msp->P1.x = a * x->P1.y; - } - """ - self.expectOK(snippet) - - -if __name__ == '__main__': - unittest.main() diff -r a7c444404df9 -r 0374c65cb437 test/testcg.py --- a/test/testcg.py Fri Jun 20 16:36:49 2014 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,41 +0,0 @@ -import unittest -import ppci -from ppci.codegen import CodeGenerator -from ppci import ir -from ppci.target.target_list import thumb_target -from ppci.outstream import BinaryOutputStream - - -def genTestFunction(): - m = ir.Module('tst') - f = ir.Function('tst') - m.add_function(f) - return m, f, f.entry - - -class testCodeGeneration(unittest.TestCase): - def setUp(self): - self.cg = CodeGenerator(thumb_target) - - def testFunction(self): - s = BinaryOutputStream(ppci.objectfile.ObjectFile()) - m, f, bb = genTestFunction() - bb.addInstruction(ir.Exp(ir.Const(123))) - bb.addInstruction(ir.Jump(f.epiloog)) - obj = self.cg.generate(m, s) - self.assertTrue(obj) - - -class testArmCodeGeneration(unittest.TestCase): - def testStack(self): - s = BinaryOutputStream(ppci.objectfile.ObjectFile()) - cg = CodeGenerator(thumb_target) - m, f, bb = genTestFunction() - bb.addInstruction(ir.Move(ir.Mem(ir.Const(1)), ir.Const(22))) - bb.addInstruction(ir.Jump(f.epiloog)) - cg.generate(m, s) - #s.dump() - - -if __name__ == '__main__': - unittest.main()