# HG changeset patch # User Windel Bouwman # Date 1370027204 -7200 # Node ID 37ac6c016e0f063cec382de4402f467ad1387e6d # Parent b01429a5d695fc9a68f4624f7400e531a41aa58d Expanded asm subsystem diff -r b01429a5d695 -r 37ac6c016e0f python/libasm.py --- a/python/libasm.py Wed May 29 22:36:37 2013 +0200 +++ b/python/libasm.py Fri May 31 21:06:44 2013 +0200 @@ -78,35 +78,118 @@ def Peak(self): return self.curTok -class Assembler: - def handle_ins(self, id0): - self.ins = id0 - def p_label(self, lname, cn): - self.label = lname +class ANode: + def __eq__(self, other): + return self.__repr__() == other.__repr__() + +class ALabel(ANode): + def __init__(self, name): + self.name = name + def __repr__(self): + return '{0}:'.format(self.name) + +class AInstruction(ANode): + def __init__(self, opcode, operands): + self.opcode = opcode + self.operands = operands + def __repr__(self): + ops = ', '.join(map(str, self.operands)) + return '{0} {1}'.format(self.opcode, ops) + +class AExpression(ANode): + def __add__(self, other): + return ABinop('+', self, other) + def __mul__(self, other): + return ABinop('*', self, other) +class ABinop(AExpression): + def __init__(self, op, arg1, arg2): + self.op = op + self.arg1 = arg1 + self.arg2 = arg2 + def __repr__(self): + return '{0} {1} {2}'.format(self.op, self.arg1, self.arg2) + +class AUnop(AExpression): + def __init__(self, op, arg): + self.op = op + self.arg = arg + def __repr__(self): + return '{0} {1}'.format(self.op, self.arg) + +class ASymbol(AExpression): + def __init__(self, name): + self.name = name + def __repr__(self): + return self.name + +class ANumber(AExpression): + def __init__(self, n): + self.n = n + def __repr__(self): + return '{0}'.format(self.n) + +class Assembler: def __init__(self): + self.output = [] # Construct a parser given a grammar: - g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', pyyacc.EPS]) - - g.add_production('asmline', ['label', 'instruction', 'operands']) - g.add_production('asmline', ['instruction', 'operands']) + ident = lambda x: x # Identity helper function + g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS]) + g.add_production('asmline', ['label', 'instruction']) + g.add_production('asmline', ['instruction']) + g.add_production('asmline', ['label']) g.add_production('label', ['ID', ':'], self.p_label) - g.add_production('instruction', ['ID'], self.handle_ins) - g.add_production('operands', ['operand']) - g.add_production('operands', ['operands', ',', 'operand']) - g.add_production('operand', ['expression']) - g.add_production('operand', ['[', 'expression', ']']) - g.add_production('expression', ['term']) - g.add_production('expression', ['expression', 'addop', 'term']) - g.add_production('addop', ['-']) - g.add_production('addop', ['+']) - g.add_production('term', ['factor']) - g.add_production('factor', ['ID']) - g.add_production('factor', ['NUMBER']) - # TODO: expand grammar + g.add_production('instruction', ['opcode', 'operands'], self.p_ins_1) + g.add_production('instruction', ['opcode'], self.p_ins_2) + g.add_production('opcode', ['ID'], ident) + g.add_production('operands', ['operand'], self.p_operands_1) + g.add_production('operands', ['operands', ',', 'operand'], self.p_operands_2) + g.add_production('operand', ['expression'], ident) + g.add_production('operand', ['[', 'expression', ']'], self.p_mem_op) + g.add_production('expression', ['term'], ident) + g.add_production('expression', ['expression', 'addop', 'term'], self.p_binop) + g.add_production('addop', ['-'], ident) + g.add_production('addop', ['+'], ident) + g.add_production('mulop', ['*'], ident) + g.add_production('term', ['factor'], ident) + g.add_production('term', ['term', 'mulop', 'factor'], self.p_binop) + g.add_production('factor', ['ID'], self.p_symbol) + g.add_production('factor', ['NUMBER'], self.p_number) g.start_symbol = 'asmline' + self.p = g.genParser() - self.p = g.genParser() + # Parser handlers: + def p_ins_1(self, opc, ops): + ins = AInstruction(opc, ops) + self.emit(ins) + def p_ins_2(self, opc): + self.p_ins_1(opc, []) + def p_operands_1(self, op1): + return [op1] + def p_operands_2(self, ops, comma, op2): + assert type(ops) is list + ops.append(op2) + return ops + def p_mem_op(self, brace_open, exp, brace_close): + return AUnop('[]', exp) + def handle_ins(self, id0, operands): + ins = AInstruction(id0) + self.emit(ins) + def p_label(self, lname, cn): + lab = ALabel(lname) + self.emit(lab) + def p_binop(self, exp1, op, exp2): + return ABinop(op, exp1, exp2) + def p_symbol(self, name): + return ASymbol(name) + def p_number(self, n): + n = int(n) + return ANumber(n) + + # Top level: + def emit(self, a): + self.output.append(a) + def parse_line(self, line): """ Parse line into asm AST """ tokens = tokenize(line) diff -r b01429a5d695 -r 37ac6c016e0f python/pyyacc.py --- a/python/pyyacc.py Wed May 29 22:36:37 2013 +0200 +++ b/python/pyyacc.py Fri May 31 21:06:44 2013 +0200 @@ -209,7 +209,7 @@ if item.IsReduce: if item.production.name == self.start_symbol and item.look_ahead == EOF: # Rule 3: accept: - setAction(states.index(state), item.look_ahead, (ACCEPT, None)) + setAction(states.index(state), item.look_ahead, (ACCEPT, item.production)) else: # Rule 2, reduce item: setAction(states.index(state), item.look_ahead, (REDUCE, item.production)) @@ -218,7 +218,7 @@ if key in transitions: goto_table[key] = transitions[key] - return LRParser(action_table, goto_table) + return LRParser(action_table, goto_table, self.start_symbol) class Production: @@ -299,45 +299,65 @@ class LRParser: """ LR parser """ - def __init__(self, action_table, goto_table): + def __init__(self, action_table, goto_table, start_symbol): self.action_table = action_table self.goto_table = goto_table + self.start_symbol = start_symbol def parse(self, toks): """ Parse an iterable with tokens """ assert hasattr(toks, '__iter__'), '{0} not iter type'.format(type(toks)) stack = [0] + r_data_stack = [] try: look_ahead = toks.__next__() except StopIteration: look_ahead = Token(EOF, EOF) assert type(look_ahead) is Token - while True: + # TODO: exit on this condition: + while stack != [0, self.start_symbol, 2222]: + #print(stack) state = stack[-1] # top of stack key = (state, look_ahead.typ) if not key in self.action_table: raise ParserException('Error parsing at character {0}'.format(look_ahead)) action, param = self.action_table[key] if action == REDUCE: - #print('reduce', param) f_args = [] for s in param.symbols: stack.pop() stack.pop() - f_args.append(0) + f_args.append(r_data_stack.pop()) + f_args.reverse() + r_data = None if param.f: - param.f(*f_args) + r_data = param.f(*f_args) state = stack[-1] stack.append(param.name) stack.append(self.goto_table[(state, param.name)]) + r_data_stack.append(r_data) elif action == SHIFT: stack.append(look_ahead.typ) stack.append(param) + r_data_stack.append(look_ahead.val) try: look_ahead = toks.__next__() except StopIteration: look_ahead = Token(EOF, EOF) assert type(look_ahead) is Token elif action == ACCEPT: + # Pop last rule data off the stack: + f_args = [] + for s in param.symbols: + stack.pop() + stack.pop() + f_args.append(r_data_stack.pop()) + f_args.reverse() + if param.f: + param.f(*f_args) + # Break out! break + # At exit, the stack must be 1 long + # TODO: fix that this holds: + #assert len(stack) == 1, 'stack {0} not totally reduce'.format(stack) diff -r b01429a5d695 -r 37ac6c016e0f python/testasm.py --- a/python/testasm.py Wed May 29 22:36:37 2013 +0200 +++ b/python/testasm.py Fri May 31 21:06:44 2013 +0200 @@ -3,6 +3,7 @@ import unittest import libasm import ppci +from libasm import AInstruction, ABinop, AUnop, ASymbol, ALabel, ANumber class AssemblerTestCase(unittest.TestCase): """ @@ -41,12 +42,45 @@ asmline = 'a: mov rax, [rbx + 2]' a = libasm.Assembler() a.parse_line(asmline) + output = [] + output.append(ALabel('a')) + output.append(AInstruction('mov', [ASymbol('rax'), AUnop('[]', ASymbol('rbx') + ANumber(2))])) + self.assertSequenceEqual(output, a.output) def testParse3(self): # A label must be optional: asmline = 'mov rax, 1' a = libasm.Assembler() a.parse_line(asmline) + output = [] + output.append(AInstruction('mov', [ASymbol('rax'), ANumber(1)])) + self.assertSequenceEqual(output, a.output) + + def testParse4(self): + # Test 3 operands: + asmline = 'add rax, [4*rbx + 22], rcx' + a = libasm.Assembler() + a.parse_line(asmline) + output = [] + ops = [] + ops.append(ASymbol('rax')) + ops.append(AUnop('[]', ANumber(4) * ASymbol('rbx') + ANumber(22))) + ops.append(ASymbol('rcx')) + output.append(AInstruction('add', ops)) + self.assertSequenceEqual(output, a.output) + + def testParse5(self): + # An instruction must be optional: + asmline = 'lab1:' + a = libasm.Assembler() + a.parse_line(asmline) + output = [] + output.append(ALabel('lab1')) + self.assertSequenceEqual(output, a.output) + + def testX86(self): + # TODO + pass if __name__ == '__main__': unittest.main() diff -r b01429a5d695 -r 37ac6c016e0f python/testpyy.py --- a/python/testpyy.py Wed May 29 22:36:37 2013 +0200 +++ b/python/testpyy.py Fri May 31 21:06:44 2013 +0200 @@ -48,7 +48,7 @@ g.start_symbol = 'stmt' p = g.genParser() # Ambiguous program: - tokens = genTokens(['if', 'then','if', 'then', 'ass', 'else', 'ass' ]) + tokens = genTokens(['if', 'then','if', 'then', 'ass', 'else', 'ass']) p.parse(tokens) def testUndefinedTerminal(self): @@ -104,6 +104,22 @@ tokens = genTokens(['id', 'id']) # i.e. "inc rax" p.parse(tokens) + def test_cb(self): + """ Test callback of one rule and order or parameters """ + self.cb_called = False + def cb(a, c, b): + self.cb_called = True + self.assertEqual(a, 'a') + self.assertEqual(b, 'b') + self.assertEqual(c, 'c') + g = Grammar(['a', 'b', 'c']) + g.add_production('goal', ['a', 'c', 'b'], cb) + g.start_symbol = 'goal' + p = g.genParser() + tokens = genTokens(['a', 'c', 'b']) + p.parse(tokens) + self.assertTrue(self.cb_called) + class testExpressionGrammar(unittest.TestCase): def setUp(self):