# HG changeset patch # User Windel Bouwman # Date 1369859797 -7200 # Node ID b01429a5d695fc9a68f4624f7400e531a41aa58d # Parent f091e7d70996f2577acbf5478335bfb5246aca38 Fixed test diff -r f091e7d70996 -r b01429a5d695 python/c3/builder.py --- a/python/c3/builder.py Sun May 26 23:58:59 2013 +0200 +++ b/python/c3/builder.py Wed May 29 22:36:37 2013 +0200 @@ -22,6 +22,8 @@ return if not self.tc.checkPackage(pkg): return + + # Only return ircode when everything is OK ircode = self.cg.gencode(pkg) return ircode diff -r f091e7d70996 -r b01429a5d695 python/c3/lexer.py --- a/python/c3/lexer.py Sun May 26 23:58:59 2013 +0200 +++ b/python/c3/lexer.py Wed May 29 22:36:37 2013 +0200 @@ -67,5 +67,6 @@ col = pos - line_start pos = line raise CompilerError('Unexpected character {0}'.format(s[pos]), pos) - yield Token('END', '', line) + loc = SourceLocation(line, 0, 0) + yield Token('END', '', loc) diff -r f091e7d70996 -r b01429a5d695 python/c3/semantics.py --- a/python/c3/semantics.py Sun May 26 23:58:59 2013 +0200 +++ b/python/c3/semantics.py Wed May 29 22:36:37 2013 +0200 @@ -1,5 +1,6 @@ from . import astnodes from .scope import Scope, topScope +from ppci import CompilerError class Semantics: """ This class constructs the AST from parser input """ @@ -12,7 +13,7 @@ def addSymbol(self, s): if self.curScope.hasSymbol(s.name): msg = 'Redefinition of {0}'.format(s.name) - self.diag.error(msg, s.loc) + raise CompilerError(msg, s.loc) else: self.curScope.addSymbol(s) def handlePackage(self, name, loc): diff -r f091e7d70996 -r b01429a5d695 python/libasm.py --- a/python/libasm.py Sun May 26 23:58:59 2013 +0200 +++ b/python/libasm.py Wed May 29 22:36:37 2013 +0200 @@ -79,14 +79,19 @@ return self.curTok class Assembler: + def handle_ins(self, id0): + self.ins = id0 + def p_label(self, lname, cn): + self.label = lname + def __init__(self): # Construct a parser given a grammar: g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', pyyacc.EPS]) g.add_production('asmline', ['label', 'instruction', 'operands']) - g.add_production('label', ['ID', ':']) - g.add_production('label', [pyyacc.EPS]) # label is optional - g.add_production('instruction', ['ID']) + g.add_production('asmline', ['instruction', 'operands']) + g.add_production('label', ['ID', ':'], self.p_label) + g.add_production('instruction', ['ID'], self.handle_ins) g.add_production('operands', ['operand']) g.add_production('operands', ['operands', ',', 'operand']) g.add_production('operand', ['expression']) @@ -102,22 +107,28 @@ g.start_symbol = 'asmline' self.p = g.genParser() + def parse_line(self, line): + """ Parse line into asm AST """ + tokens = tokenize(line) + self.p.parse(tokens) + aast = 1 # TODO + return aast def assemble(self, asmsrc): - lxr = Lexer(asmsrc) - prsr = Parser(lxr) - instructions = prsr.parse() - return instructions + lxr = Lexer(asmsrc) + prsr = Parser(lxr) + instructions = prsr.parse() + return instructions def assembleLine(self, line): """ Assemble a single source line. Do not take newlines into account """ - tokens = tokenize(line) - self.p.parse(tokens) + aast = self.parseLine(line) + self.assemble_aast(aast) - def assembleAst(self, at): + def assemble_aast(self, at): """ Assemble a parsed asm line """ pass diff -r f091e7d70996 -r b01429a5d695 python/ppci/common.py --- a/python/ppci/common.py Sun May 26 23:58:59 2013 +0200 +++ b/python/ppci/common.py Wed May 29 22:36:37 2013 +0200 @@ -1,7 +1,16 @@ from collections import namedtuple # Token is used in the lexical analyzer: -Token = namedtuple('Token', 'typ val loc') +class Token: + def __init__(self, typ, val, loc=None): + self.typ = typ + self.val = val + if loc is None: + loc = SourceLocation(0, 0, 0) + assert type(loc) is SourceLocation + self.loc = loc + def __repr__(self): + return 'Token({0}, {1})'.format(self.typ, self.val) class SourceLocation: def __init__(self, row, col, ln): diff -r f091e7d70996 -r b01429a5d695 python/ppci/errors.py --- a/python/ppci/errors.py Sun May 26 23:58:59 2013 +0200 +++ b/python/ppci/errors.py Wed May 29 22:36:37 2013 +0200 @@ -3,10 +3,13 @@ Diagnostic utils """ +from . import SourceLocation + class CompilerError(Exception): def __init__(self, msg, loc): self.msg = msg self.loc = loc + assert type(loc) is SourceLocation, '{0} must be SourceLocation'.format(type(loc)) def __repr__(self): return 'Compilererror {0} at row {1}'.format(self.msg, self.loc.row) diff -r f091e7d70996 -r b01429a5d695 python/pyyacc.py --- a/python/pyyacc.py Sun May 26 23:58:59 2013 +0200 +++ b/python/pyyacc.py Wed May 29 22:36:37 2013 +0200 @@ -28,14 +28,15 @@ self.productions = [] self._first = None # Cached first set - def add_production(self, name, symbols): + def add_production(self, name, symbols, f=None): """ Add a production rule to the grammar """ - production = Production(name, symbols) + production = Production(name, symbols, f) self.productions.append(production) if name in self.terminals: raise ParserGenerationException("Cannot redefine terminal {0}".format(name)) if not name in self.nonterminals: self.nonterminals.append(name) + self._first = None # Invalidate cached version def productionsForName(self, name): """ Retrieve all productions for a non terminal """ @@ -156,7 +157,7 @@ """ Checks no symbols are undefined """ for production in self.productions: for symbol in production.symbols: - if symbol not in self.Symbols: + if symbol not in self.Symbols + [EPS]: raise ParserGenerationException('Symbol {0} undefined'.format(symbol)) @@ -169,13 +170,31 @@ # First generate all item sets by using the nextItemset function: states, transitions = self.genCanonicalSet(iis) + + def action_str(act): + a, p = act + if a is SHIFT: + return 'Shift {0}'.format(0) + elif a is REDUCE: + return 'Reduce {0}'.format(p) + return 'Other' def setAction(state, t, action): key = (state, t) if key in action_table: action2 = action_table[key] if action != action2: - raise ParserGenerationException('LR construction conflict') + if (action2[0] == REDUCE) and (action[0] == SHIFT): + # Automatically resolve and do the shift action! + # Simple, but almost always what you want!! + action_table[key] = action + else: + if (action2[0] == SHIFT) and (action[0] == REDUCE): + pass + else: + a1 = action_str(action) + a2 = action_str(action2) + raise ParserGenerationException('LR construction conflict {0} vs {1}'.format(a1, a2)) else: action_table[key] = action @@ -204,9 +223,10 @@ class Production: """ Production rule for a grammar """ - def __init__(self, name, symbols): + def __init__(self, name, symbols, f=None): self.name = name self.symbols = symbols + self.f = f def __repr__(self): return '{0} -> {1}'.format(self.name, self.symbols) @@ -287,18 +307,26 @@ """ Parse an iterable with tokens """ assert hasattr(toks, '__iter__'), '{0} not iter type'.format(type(toks)) stack = [0] - look_ahead = toks.__next__() + try: + look_ahead = toks.__next__() + except StopIteration: + look_ahead = Token(EOF, EOF) assert type(look_ahead) is Token while True: state = stack[-1] # top of stack key = (state, look_ahead.typ) if not key in self.action_table: - raise Exception('Error parsing at character {0}'.format(look_ahead)) + raise ParserException('Error parsing at character {0}'.format(look_ahead)) action, param = self.action_table[key] if action == REDUCE: + #print('reduce', param) + f_args = [] for s in param.symbols: stack.pop() stack.pop() + f_args.append(0) + if param.f: + param.f(*f_args) state = stack[-1] stack.append(param.name) stack.append(self.goto_table[(state, param.name)]) @@ -308,34 +336,8 @@ try: look_ahead = toks.__next__() except StopIteration: - look_ahead = Token(EOF, EOF, 0) + look_ahead = Token(EOF, EOF) assert type(look_ahead) is Token elif action == ACCEPT: break - -def testSimpleGrammar(): - # 1. define a simple grammar: - g = Grammar(['EOF', 'identifier', '(', ')', '+', '*']) - g.add_production('input', ['expression']) - g.add_production('expression', ['term']) - g.add_production('expression', ['expression', '+', 'term']) - g.add_production('term', ['factor']) - g.add_production('term', ['term', '*', 'factor']) - g.add_production('factor', ['(', 'expression', ')']) - g.add_production('factor', ['identifier']) - g.start_symbol = 'input' - # 2. define input: - tokens = ['identifier', '+', 'identifier', '+', 'identifier'] - # 3. build parser: - p = g.genParser() - # 4. feed input: - def genTokens(lst): - for t in lst: - yield Token(t, t, 0) - p.parse(genTokens(tokens)) - - -if __name__ == '__main__': - testSimpleGrammar() - diff -r f091e7d70996 -r b01429a5d695 python/testasm.py --- a/python/testasm.py Sun May 26 23:58:59 2013 +0200 +++ b/python/testasm.py Wed May 29 22:36:37 2013 +0200 @@ -35,12 +35,18 @@ def testParse(self): asmline = 'lab1: mov rax, rbx' a = libasm.Assembler() - a.assembleLine(asmline) + a.parse_line(asmline) def testParse2(self): asmline = 'a: mov rax, [rbx + 2]' a = libasm.Assembler() - a.assembleLine(asmline) + a.parse_line(asmline) + + def testParse3(self): + # A label must be optional: + asmline = 'mov rax, 1' + a = libasm.Assembler() + a.parse_line(asmline) if __name__ == '__main__': unittest.main() diff -r f091e7d70996 -r b01429a5d695 python/testc3.py --- a/python/testc3.py Sun May 26 23:58:59 2013 +0200 +++ b/python/testc3.py Wed May 29 22:36:37 2013 +0200 @@ -136,20 +136,23 @@ } """ self.diag.clear() - ir = self.builder.build(snippet) + ircode = self.builder.build(snippet) self.assertEqual(len(self.diag.diags), 3) self.assertEqual(self.diag.diags[0].loc.row, 8) self.assertEqual(self.diag.diags[1].loc.row, 9) self.assertEqual(self.diag.diags[2].loc.row, 10) - self.assertFalse(ir) + self.assertFalse(ircode) def testEmpty(self): snippet = """ package A """ - self.builder.build(snippet) + ircode = self.builder.build(snippet) + self.assertFalse(ircode) def testEmpty2(self): snippet = "" - self.builder.build(snippet) + self.diag.clear() + ircode = self.builder.build(snippet) + self.assertFalse(ircode) def testRedefine(self): snippet = """ package test; @@ -158,7 +161,8 @@ var int a; """ self.diag.clear() - self.builder.build(snippet) + ircode = self.builder.build(snippet) + self.assertFalse(ircode) self.assertEqual(len(self.diag.diags), 1) self.assertEqual(self.diag.diags[0].loc.row, 5) def testWhile(self): diff -r f091e7d70996 -r b01429a5d695 python/testpyy.py --- a/python/testpyy.py Sun May 26 23:58:59 2013 +0200 +++ b/python/testpyy.py Wed May 29 22:36:37 2013 +0200 @@ -1,18 +1,16 @@ import unittest, pprint -from pyyacc import Grammar, Item, EOF, ParserGenerationException +from pyyacc import Grammar, Item, ParserGenerationException, ParserException, EPS, EOF from ppci import Token def genTokens(lst): for t in lst: - yield Token(t, t, 0) + yield Token(t, t) class testLR(unittest.TestCase): - def setUp(self): - pass - + """ Test basic LR(1) parser generator constructs """ def testSimpleGrammar(self): # 1. define a simple grammar: - g = Grammar(['EOF', 'identifier', '(', ')', '+', '*']) + g = Grammar(['identifier', '(', ')', '+', '*']) g.add_production('input', ['expression']) g.add_production('expression', ['term']) g.add_production('expression', ['expression', '+', 'term']) @@ -30,7 +28,7 @@ def testReduceReduceConflict(self): """ Check if a reduce-reduce conflict is detected """ # Define a grammar with an obvious reduce-reduce conflict: - g = Grammar([EOF, 'id']) + g = Grammar(['id']) g.add_production('goal', ['a']) g.add_production('a', ['b']) g.add_production('a', ['c']) @@ -40,16 +38,22 @@ with self.assertRaises(ParserGenerationException): p = g.genParser() def testShiftReduceConflict(self): - g = Grammar([EOF, 'if', 'then', 'else']) - g.add_production('if_stmt', ['if', 'then']) - g.add_production('if_stmt', ['if', 'then', 'else']) - g.add_production('stmt', ['if_stmt', 'else']) + """ Must be handled automatically by doing shift """ + g = Grammar([EOF, 'if', 'then', 'else', 'ass']) + # Ambiguous grammar: + g.add_production('if_stmt', ['if', 'then', 'stmt']) + g.add_production('if_stmt', ['if', 'then', 'stmt', 'else', 'stmt']) + g.add_production('stmt', ['if_stmt']) + g.add_production('stmt', ['ass']) g.start_symbol = 'stmt' - with self.assertRaises(ParserGenerationException): - g.genParser() + p = g.genParser() + # Ambiguous program: + tokens = genTokens(['if', 'then','if', 'then', 'ass', 'else', 'ass' ]) + p.parse(tokens) + def testUndefinedTerminal(self): """ Test correct behavior when a terminal is undefined """ - g = Grammar([EOF, 'b']) + g = Grammar(['b']) g.add_production('goal', ['a']) g.add_production('a', ['b']) g.add_production('a', ['c']) @@ -65,6 +69,41 @@ g.add_production('a', ['c']) g.start_symbol = 'goal' g.genParser() + def testEmpty(self): + """ Test empty token stream """ + g = Grammar([',']) + g.add_production('input', [',']) + g.start_symbol = 'input' + p = g.genParser() + tokens = genTokens([]) + with self.assertRaises(ParserException): + p.parse(tokens) + + def testEps(self): + """ Test epsilon terminal """ + g = Grammar(['a', 'b']) + g.add_production('input', ['optional_a', 'b']) + g.add_production('optional_a', ['a']) + g.add_production('optional_a', []) + g.start_symbol = 'input' + p = g.genParser() + tokens = genTokens(['b']) + p.parse(tokens) + + def testEps2(self): + g = Grammar(['id', ':']) + g.add_production('input', ['opt_lab', 'ins', 'op1']) + g.add_production('input', ['ins', 'op1']) + g.add_production('opt_lab', ['id', ':']) + g.add_production('ins', ['id']) + g.add_production('op1', ['id']) + g.start_symbol = 'input' + p = g.genParser() + tokens = genTokens(['id', ':', 'id', 'id']) # i.e. "lab_0: inc rax" + p.parse(tokens) + tokens = genTokens(['id', 'id']) # i.e. "inc rax" + p.parse(tokens) + class testExpressionGrammar(unittest.TestCase): def setUp(self):