Mercurial > lcfOS
changeset 318:e84047f29c78
Add burg and yacc initial attempts
author | Windel Bouwman |
---|---|
date | Tue, 31 Dec 2013 12:38:15 +0100 |
parents | e30a77ae359b |
children | 8d07a4254f04 |
files | python/arm.pb python/asm.py python/burg.x python/ppci/irutils.py python/pyburg.py python/pyyacc.py python/target/arminstructionselector.py python/tree.py python/yacc.py python/zcc.py readme.rst test/expression.x test/testasm.py test/testpyy.py |
diffstat | 14 files changed, 568 insertions(+), 128 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/arm.pb Tue Dec 31 12:38:15 2013 +0100 @@ -0,0 +1,7 @@ + + + + +reg: ADD32(reg, const) { self.emit(Add, src=[$1], dst=[$c], other=[$2]) } 1 + +
--- a/python/asm.py Sun Dec 22 15:50:59 2013 +0100 +++ b/python/asm.py Tue Dec 31 12:38:15 2013 +0100 @@ -1,19 +1,20 @@ #!/usr/bin/env python3 -import re, argparse +import re +import argparse import pyyacc from ppci import Token, CompilerError, SourceLocation from target import Target, Label from asmnodes import ALabel, AInstruction, ABinop, AUnop, ASymbol, ANumber def tokenize(s): - """ + """ Tokenizer, generates an iterator that returns tokens! This GREAT example was taken from python re doc page! - """ - tok_spec = [ + """ + tok_spec = [ ('REAL', r'\d+\.\d+'), ('HEXNUMBER', r'0x[\da-fA-F]+'), ('NUMBER', r'\d+'), @@ -22,13 +23,13 @@ ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<|}|{'), ('STRING', r"'.*?'"), ('COMMENT', r";.*") - ] - tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) - gettok = re.compile(tok_re).match - line = 1 - pos = line_start = 0 - mo = gettok(s) - while mo is not None: + ] + tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) + gettok = re.compile(tok_re).match + line = 1 + pos = line_start = 0 + mo = gettok(s) + while mo is not None: typ = mo.lastgroup val = mo.group(typ) if typ == 'NEWLINE': @@ -51,10 +52,11 @@ yield Token(typ, val, loc) pos = mo.end() mo = gettok(s, pos) - if pos != len(s): + if pos != len(s): col = pos - line_start loc = SourceLocation('', line, col, 0) raise CompilerError('Unexpected character {0}'.format(s[pos]), loc) + yield Token('EOF', pyyacc.EOF) class Lexer: @@ -62,33 +64,29 @@ self.tokens = tokenize(src) self.curTok = self.tokens.__next__() - def eat(self): + def next_token(self): t = self.curTok - self.curTok = self.tokens.__next__() + if t.typ != 'EOF': + self.curTok = self.tokens.__next__() return t - @property - def Peak(self): - return self.curTok - class Parser: def __init__(self): # Construct a parser given a grammar: ident = lambda x: x # Identity helper function - g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS, 'COMMENT', '{', '}']) + g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS, 'COMMENT', '{', '}', + pyyacc.EOF]) g.add_production('asmline', ['asmline2']) g.add_production('asmline', ['asmline2', 'COMMENT']) g.add_production('asmline2', ['label', 'instruction']) g.add_production('asmline2', ['instruction']) g.add_production('asmline2', ['label']) g.add_production('asmline2', []) - g.add_production('optcomment', []) - g.add_production('optcomment', ['COMMENT']) g.add_production('label', ['ID', ':'], self.p_label) g.add_production('instruction', ['opcode', 'operands'], self.p_ins_1) g.add_production('instruction', ['opcode'], self.p_ins_2) - g.add_production('opcode', ['ID'], ident) + g.add_production('opcode', ['ID'], lambda x: x.val) g.add_production('operands', ['operand'], self.p_operands_1) g.add_production('operands', ['operands', ',', 'operand'], self.p_operands_2) g.add_production('operand', ['expression'], ident) @@ -98,13 +96,13 @@ g.add_production('listitems', ['listitems', ',', 'expression'], self.p_listitems_2) g.add_production('expression', ['term'], ident) g.add_production('expression', ['expression', 'addop', 'term'], self.p_binop) - g.add_production('addop', ['-'], ident) - g.add_production('addop', ['+'], ident) - g.add_production('mulop', ['*'], ident) + g.add_production('addop', ['-'], lambda x: x.val) + g.add_production('addop', ['+'], lambda x: x.val) + g.add_production('mulop', ['*'], lambda x: x.val) g.add_production('term', ['factor'], ident) g.add_production('term', ['term', 'mulop', 'factor'], self.p_binop) - g.add_production('factor', ['ID'], lambda name: ASymbol(name)) - g.add_production('factor', ['NUMBER'], lambda num: ANumber(int(num))) + g.add_production('factor', ['ID'], lambda name: ASymbol(name.val)) + g.add_production('factor', ['NUMBER'], lambda num: ANumber(int(num.val))) g.start_symbol = 'asmline' self.p = g.genParser() @@ -112,10 +110,13 @@ def p_ins_1(self, opc, ops): ins = AInstruction(opc, ops) self.emit(ins) + def p_ins_2(self, opc): self.p_ins_1(opc, []) + def p_operands_1(self, op1): return [op1] + def p_operands_2(self, ops, comma, op2): assert type(ops) is list ops.append(op2) @@ -131,17 +132,20 @@ def p_list_op(self, brace_open, lst, brace_close): return AUnop('{}', lst) + def p_mem_op(self, brace_open, exp, brace_close): return AUnop('[]', exp) + def p_label(self, lname, cn): - lab = ALabel(lname) + lab = ALabel(lname.val) self.emit(lab) + def p_binop(self, exp1, op, exp2): return ABinop(op, exp1, exp2) - def parse(self, tokens, emitter): + def parse(self, lexer, emitter): self.emit = emitter - self.p.parse(tokens) + self.p.parse(lexer) # Pre construct parser to save time: asmParser = Parser() @@ -163,7 +167,7 @@ def parse_line(self, line): """ Parse line into asm AST """ - tokens = tokenize(line) + tokens = Lexer(line) self.p.parse(tokens, self.emit) def assemble(self, asmsrc): @@ -172,16 +176,15 @@ self.assemble_line(line) def assemble_line(self, line): - """ - Assemble a single source line. - Do not take newlines into account + """ + Assemble a single source line. + Do not take newlines into account """ self.parse_line(line) self.assemble_aast() def assemble_aast(self): """ Assemble a parsed asm line """ - # TODO if not self.target: raise CompilerError('Cannot assemble without target') while self.stack: @@ -199,8 +202,8 @@ if __name__ == '__main__': # When run as main file, try to grab command line arguments: parser = argparse.ArgumentParser(description="Assembler") - parser.add_argument('sourcefile', type=argparse.FileType('r'), help='the source file to assemble') + parser.add_argument('sourcefile', type=argparse.FileType('r'), + help='the source file to assemble') args = parser.parse_args() a = Assembler() obj = a.assemble(args.sourcefile.read()) -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/burg.x Tue Dec 31 12:38:15 2013 +0100 @@ -0,0 +1,15 @@ + +from tree import Tree +%tokens ':' ';' '(' ')' ',' template id number '%%' header + +%% + +rule: id ':' tree template cost { self.add_rule($1, $3, $4, $5) }; + +cost: number { return number.val }; + +tree: id { return Tree($1) } + | id '(' tree ')' { return Tree($1, $3) } + | id '(' tree ',' tree ')' { return Tree($1, $3, $5) }; + +
--- a/python/ppci/irutils.py Sun Dec 22 15:50:59 2013 +0100 +++ b/python/ppci/irutils.py Tue Dec 31 12:38:15 2013 +0100 @@ -119,7 +119,7 @@ return self.next_token() else: raise IrParseException('Expected "{}" got "{}"'.format(typ, self.Peak)) - + def parse_module(self): """ Entry for recursive descent parser """ self.Consume('module') @@ -132,7 +132,7 @@ else: raise IrParseException('Expected function got {}'.format(self.Peak)) return module - + def parse_function(self): self.Consume('function') self.parse_type() @@ -259,7 +259,7 @@ assert block.LastInstruction.IsTerminator for i in block.Instructions[:-1]: assert not isinstance(i, ir.LastStatement) - + def verify_block(self, block): for instruction in block.Instructions: self.verify_instruction(instruction)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/pyburg.py Tue Dec 31 12:38:15 2013 +0100 @@ -0,0 +1,102 @@ +#!/usr/bin/python + +""" Bottom up rewrite generator in python """ +import sys +import re +import argparse +from ppci import Token +import burg_parser + + +class BurgLexer: + def feed(self, txt): + tok_spec = [ + ('ID', r'[A-Za-z][A-Za-z\d_]*'), + ('STRING', r"'[^']*'"), + ('BRACEDCODE', r"\{[^\}]*\}"), + ('OTHER', r'[:;\|]'), + ('SKIP', r'[ ]') + ] + tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) + gettok = re.compile(tok_re).match + + lines = txt.split('\n') + def tokenize_line(line): + """ Generator that splits up a line into tokens """ + mo = gettok(line) + pos = 0 + while mo: + typ = mo.lastgroup + val = mo.group(typ) + if typ == 'ID': + yield Token(typ, val) + elif typ == 'STRING': + typ = 'ID' + yield Token(typ, val[1:-1]) + elif typ == 'OTHER': + typ = val + yield Token(typ, val) + elif typ == 'BRACEDCODE': + yield Token(typ, val) + elif typ == 'SKIP': + pass + else: + raise NotImplementedError(str(typ)) + pos = mo.end() + mo = gettok(line, pos) + if len(line) != pos: + raise ParseError('Lex fault at {}'.format(line)) + + def tokenize(): + section = 0 + for line in lines: + line = line.strip() + if not line: + continue # Skip empty lines + if line == '%%': + section += 1 + yield Token('%%', '%%') + continue + if section == 0: + if line.startswith('%tokens'): + yield Token('%tokens', '%tokens') + yield from tokenize_line(line[7:]) + else: + yield Token('HEADER', line) + elif section == 1: + yield from tokenize_line(line) + yield Token('eof', 'eof') + self.tokens = tokenize() + self.token = self.tokens.__next__() + + def next_token(self): + t = self.token + if t.typ != 'eof': + self.token = self.tokens.__next__() + return t + + +class BurgParser(burg_parser.Parser): + """ Derive from automatically generated parser """ + def add_rule(self, *args): + print(args) + + +def main(): + # Parse arguments: + parser = argparse.ArgumentParser(description='pyburg bottom up rewrite system generator compiler compiler') + parser.add_argument('source', type=argparse.FileType('r'), \ + help='the parser specification') + parser.add_argument('-o', '--output', type=argparse.FileType('w'), \ + default=sys.stdout) + args = parser.parse_args() + src = args.source.read() + args.source.close() + + l = BurgLexer() + p = BurgParser() + l.feed(src) + p.parse(l) + +if __name__ == '__main__': + main()
--- a/python/pyyacc.py Sun Dec 22 15:50:59 2013 +0100 +++ b/python/pyyacc.py Tue Dec 31 12:38:15 2013 +0100 @@ -2,14 +2,11 @@ Parser generator script """ -import hashlib from ppci import Token EPS = 'EPS' EOF = 'EOF' -SHIFT = 1 -REDUCE = 2 -ACCEPT = 3 + class ParserGenerationException(Exception): """ Raised when something goes wrong during parser generation """ @@ -21,6 +18,41 @@ pass +class Action: + def __repr__(self): + return 'Action' + + def __eq__(self, other): + return str(self) == str(other) + + +class Shift(Action): + def __init__(self, to_state): + self.to_state = to_state + + def __repr__(self): + return 'Shift({})'.format(self.to_state) + + +class Reduce(Action): + def __init__(self, rule): + self.rule = rule + + def __repr__(self): + return 'Reduce({})'.format(self.rule) + + +class Accept(Reduce): + def __repr__(self): + return 'Accept({})'.format(self.rule) + + +def print_grammar(g): + """ Pretty print a grammar """ + print(g) + for production in g.productions: + print(production) + class Grammar: """ Defines a grammar of a language """ def __init__(self, terminals): @@ -28,6 +60,10 @@ self.nonterminals = [] self.productions = [] self._first = None # Cached first set + self.start_symbol = None + + def __repr__(self): + return 'Grammar with {} rules'.format(len(self.productions)) def add_production(self, name, symbols, f=None): """ Add a production rule to the grammar """ @@ -50,10 +86,10 @@ @property def first(self): - """ + """ The first set is a mapping from a grammar symbol to a set of set of all terminal symbols that can be the first terminal when - looking for the grammar symbol + looking for the grammar symbol """ if not self._first: self._first = self.calcFirstSets() @@ -125,7 +161,7 @@ return self.closure(iis) def nextItemSet(self, itemset, symbol): - """ + """ Determines the next itemset for the current set and a symbol This is the goto procedure """ @@ -134,7 +170,7 @@ if item.can_shift_over(symbol): next_set.add(item.shifted()) return self.closure(next_set) - + def genCanonicalSet(self, iis): states = [] worklist = [] @@ -153,7 +189,7 @@ addSt(nis) transitions[(states.index(itemset), symbol)] = states.index(nis) return states, transitions - + def checkSymbols(self): """ Checks no symbols are undefined """ for production in self.productions: @@ -161,20 +197,16 @@ if symbol not in self.Symbols + [EPS]: raise ParserGenerationException('Symbol {0} undefined'.format(symbol)) - def getSignature(self): - m = hashlib.md5() - m.update((str(self.productions) + str(self.start_symbol)).encode('ascii')) - signature = m.hexdigest() - def genParser(self): """ Generates a parser from the grammar (using a caching algorithm) """ - signature = self.getSignature() action_table, goto_table = self.doGenerate() p = LRParser(action_table, goto_table, self.start_symbol) p.grammar = self return p def doGenerate(self): + if not self.start_symbol: + self.start_symbol = self.productions[0].name self.checkSymbols() action_table = {} goto_table = {} @@ -183,32 +215,24 @@ # First generate all item sets by using the nextItemset function: states, transitions = self.genCanonicalSet(iis) - def action_str(act): - a, p = act - if a is SHIFT: - return 'Shift {0}'.format(0) - elif a is REDUCE: - return 'Reduce {0}'.format(p) - return 'Other' - def setAction(state, t, action): + assert isinstance(action, Action) key = (state, t) assert type(state) is int assert type(t) is str if key in action_table: action2 = action_table[key] if action != action2: - if (action2[0] == REDUCE) and (action[0] == SHIFT): + if (type(action2) is Reduce) and (type(action) is Shift): # Automatically resolve and do the shift action! # Simple, but almost always what you want!! action_table[key] = action + elif isinstance(action2, Shift) and isinstance(action, Reduce): + pass else: - if (action2[0] == SHIFT) and (action[0] == REDUCE): - pass - else: - a1 = action_str(action) - a2 = action_str(action2) - raise ParserGenerationException('LR construction conflict {0} vs {1}'.format(a1, a2)) + a1 = str(action) + a2 = str(action2) + raise ParserGenerationException('LR construction conflict {0} vs {1}'.format(a1, a2)) else: action_table[key] = action @@ -219,14 +243,15 @@ if item.IsShift and item.Next in self.terminals: # Rule 1, a shift item: nextstate = transitions[(states.index(state), item.Next)] - setAction(states.index(state), item.Next, (SHIFT, nextstate)) + setAction(states.index(state), item.Next, Shift(nextstate)) if item.IsReduce: if item.production.name == self.start_symbol and item.look_ahead == EOF: # Rule 3: accept: - setAction(states.index(state), item.look_ahead, (ACCEPT, self.productions.index(item.production))) + act = Accept(self.productions.index(item.production)) else: # Rule 2, reduce item: - setAction(states.index(state), item.look_ahead, (REDUCE, self.productions.index(item.production))) + act = Reduce(self.productions.index(item.production)) + setAction(states.index(state), item.look_ahead, act) for nt in self.nonterminals: key = (states.index(state), nt) if key in transitions: @@ -242,12 +267,13 @@ self.f = f def __repr__(self): - return '{0} -> {1}'.format(self.name, self.symbols) + action = ' ' + str(self.f) if self.f else '' + return '{0} -> {1}'.format(self.name, self.symbols) + action class Item: - """ - Represents a partially parsed item + """ + Represents a partially parsed item It has a production it is looking for, a position in this production called the 'dot' and a look ahead symbol that must follow this item. @@ -311,68 +337,63 @@ class LRParser: - """ LR parser """ + """ LR parser automata """ def __init__(self, action_table, goto_table, start_symbol): self.action_table = action_table self.goto_table = goto_table self.start_symbol = start_symbol - def parse(self, toks): + def parse(self, lexer): """ Parse an iterable with tokens """ - assert hasattr(toks, '__iter__'), '{0} not iter type'.format(type(toks)) + assert hasattr(lexer, 'next_token'), '{0} is no lexer'.format(type(lexer)) stack = [0] r_data_stack = [] - try: - look_ahead = toks.__next__() - except StopIteration: - look_ahead = Token(EOF, EOF) + look_ahead = lexer.next_token() assert type(look_ahead) is Token # TODO: exit on this condition: while stack != [0, self.start_symbol, 2222]: - #print(stack) state = stack[-1] # top of stack key = (state, look_ahead.typ) if not key in self.action_table: raise ParserException('Error parsing at character {0}'.format(look_ahead)) - action, param = self.action_table[key] - if action == REDUCE: + action = self.action_table[key] + if type(action) is Reduce: f_args = [] - param = self.grammar.productions[param] - for s in param.symbols: + prod = self.grammar.productions[action.rule] + for s in prod.symbols: stack.pop() stack.pop() f_args.append(r_data_stack.pop()) f_args.reverse() r_data = None - if param.f: - r_data = param.f(*f_args) + if prod.f: + r_data = prod.f(*f_args) state = stack[-1] - stack.append(param.name) - stack.append(self.goto_table[(state, param.name)]) + stack.append(prod.name) + stack.append(self.goto_table[(state, prod.name)]) r_data_stack.append(r_data) - elif action == SHIFT: + elif type(action) is Shift: stack.append(look_ahead.typ) - stack.append(param) - r_data_stack.append(look_ahead.val) - try: - look_ahead = toks.__next__() - except StopIteration: - look_ahead = Token(EOF, EOF) + stack.append(action.to_state) + r_data_stack.append(look_ahead) + look_ahead = lexer.next_token() assert type(look_ahead) is Token - elif action == ACCEPT: + elif type(action) is Accept: # Pop last rule data off the stack: f_args = [] - param = self.grammar.productions[param] + param = self.grammar.productions[action.rule] for s in param.symbols: stack.pop() stack.pop() f_args.append(r_data_stack.pop()) f_args.reverse() if param.f: - param.f(*f_args) + ret_val = param.f(*f_args) + else: + ret_val = None # Break out! break # At exit, the stack must be 1 long # TODO: fix that this holds: #assert len(stack) == 1, 'stack {0} not totally reduce'.format(stack) - + return ret_val
--- a/python/target/arminstructionselector.py Sun Dec 22 15:50:59 2013 +0100 +++ b/python/target/arminstructionselector.py Tue Dec 31 12:38:15 2013 +0100 @@ -12,9 +12,7 @@ class ArmInstructionSelector(InstructionSelector): """ Instruction selector for the arm architecture """ def munchExpr(self, e): - if isinstance(e, ir.Alloc): - return 0 - elif isinstance(e, ir.Binop) and e.operation == '+' and \ + if isinstance(e, ir.Binop) and e.operation == '+' and \ isinstance(e.b, ir.Const) and e.b.value < 8: a = self.munchExpr(e.a) d = self.newTmp() @@ -142,4 +140,4 @@ raise NotImplementedError('Stmt --> {}'.format(s)) def move(self, dst, src): - self.emit(Mov2, src=[src], dst=[dst]) + self.emit(Mov2, src=[src], dst=[dst], ismove=True)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/tree.py Tue Dec 31 12:38:15 2013 +0100 @@ -0,0 +1,8 @@ + +class Tree: + def __init__(self, name, *args): + self.name = name + self.children = args + + def __repr__(self): + return 'Tree({}, {})'.format(self.name, self.children)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/python/yacc.py Tue Dec 31 12:38:15 2013 +0100 @@ -0,0 +1,246 @@ +#!/usr/bin/python + +""" Parser generator utility """ + +import argparse +import re +import sys +import datetime +from pyyacc import Grammar, print_grammar + + +class XaccLexer: + def __init__(self): + pass + + def feed(self, txt): + # Create a regular expression for the lexing part: + tok_spec = [ + ('ID', r'[A-Za-z][A-Za-z\d_]*'), + ('STRING', r"'[^']*'"), + ('BRACEDCODE', r"\{[^\}]*\}"), + ('OTHER', r'[:;\|]'), + ('SKIP', r'[ ]') + ] + tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) + gettok = re.compile(tok_re).match + + lines = txt.split('\n') + + def tokenize_line(line): + """ Generator that splits up a line into tokens """ + mo = gettok(line) + pos = 0 + while mo: + typ = mo.lastgroup + val = mo.group(typ) + if typ == 'ID': + yield (typ, val) + elif typ == 'STRING': + typ = 'ID' + yield (typ, val[1:-1]) + elif typ == 'OTHER': + typ = val + yield (typ, val) + elif typ == 'BRACEDCODE': + yield (typ, val) + elif typ == 'SKIP': + pass + else: + raise NotImplementedError(str(typ)) + pos = mo.end() + mo = gettok(line, pos) + if len(line) != pos: + raise ParseError('Lex fault at {}'.format(line)) + + def tokenize(): + section = 0 + for line in lines: + line = line.strip() + if not line: + continue # Skip empty lines + if line == '%%': + section += 1 + yield('%%', '%%') + continue + if section == 0: + if line.startswith('%tokens'): + yield('%tokens', '%tokens') + yield from tokenize_line(line[7:]) + else: + yield ('HEADER', line) + elif section == 1: + yield from tokenize_line(line) + yield ('eof', 'eof') + self.tokens = tokenize() + self.token = self.tokens.__next__() + + def next_token(self): + t = self.token + if t[0] != 'eof': + self.token = self.tokens.__next__() + #print(t) + return t + + +class ParseError(Exception): + pass + + +class XaccParser: + """ Implements a recursive descent parser to parse grammar rules. + We could have made an generated parser, but that would yield a chicken + egg issue. + """ + def __init__(self, lexer): + self.lexer = lexer + + @property + def Peak(self): + """ Sneak peak to the next token in line """ + return self.lexer.token[0] + + def next_token(self): + """ Take the next token """ + return self.lexer.next_token() + + def consume(self, typ): + """ Eat next token of type typ or raise an exception """ + if self.Peak == typ: + return self.next_token() + else: + raise ParseError('Expected {}, but got {}'.format(typ, self.Peak)) + + def has_consumed(self, typ): + """ Consume typ if possible and return true if so """ + if self.Peak == typ: + self.consume(typ) + return True + return False + + def parse_grammar(self): + """ Entry parse function into recursive descent parser """ + # parse header + headers = [] + terminals = [] + while self.Peak in ['HEADER', '%tokens']: + if self.Peak == '%tokens': + self.consume('%tokens') + while self.Peak == 'ID': + terminals.append(self.consume('ID')[1]) + else: + headers.append(self.consume('HEADER')[1]) + self.consume('%%') + self.grammar = Grammar(terminals) + while self.Peak != 'eof': + self.parse_rule() + return self.grammar + + def parse_symbol(self): + return self.consume('ID')[1] + + def parse_rhs(self): + symbols = [] + while self.Peak not in [';', 'BRACEDCODE', '|']: + symbols.append(self.parse_symbol()) + if self.Peak == 'BRACEDCODE': + action = self.consume('BRACEDCODE')[1] + action = action[1:-1].strip() + else: + action = None + return symbols, action + + def parse_rule(self): + p = self.parse_symbol() + self.consume(':') + symbols, action = self.parse_rhs() + self.grammar.add_production(p, symbols, action) + while self.has_consumed('|'): + symbols, action = self.parse_rhs() + self.grammar.add_production(p, symbols, action) + self.consume(';') + + +class XaccGenerator: + """ Generator that writes generated parser to file """ + def __init__(self): + pass + + def generate(self, grammar, output_file): + print_grammar(grammar) + self.grammar = grammar + self.action_table, self.goto_table = grammar.doGenerate() + self.generate_python_script(output_file) + + def generate_python_script(self, f): + """ Generate python script with the parser table """ + print('#!/usr/bin/python', file=f) + stamp = datetime.datetime.now().ctime() + print('""" Automatically generated by xacc on {} """'.format(stamp), file=f) + print('from pyyacc import LRParser, Reduce, Shift, Accept, Production, Grammar', file=f) + print('from ppci import Token', file=f) + print(file=f) + print('class Parser(LRParser):', file=f) + print(' def __init__(self):', file=f) + # Generate rules: + print(' self.start_symbol = "{}"'.format(self.grammar.start_symbol), file=f) + print(' self.grammar = Grammar({})'.format(self.grammar.terminals), file=f) + for rule_number, rule in enumerate(self.grammar.productions): + rule.f_name = 'action_{}_{}'.format(rule.name, rule_number) + print(' self.grammar.add_production("{}", {}, self.{})'.format(rule.name, rule.symbols, rule.f_name), file=f) + # Fill action table: + print(' self.action_table = {}', file=f) + for state in self.action_table: + action = self.action_table[state] + print(' self.action_table[{}] = {}'.format(state, action), file=f) + print('', file=f) + + # Fill goto table: + print(' self.goto_table = {}', file=f) + for gt in self.goto_table: + to = self.goto_table[gt] + print(' self.goto_table[{}] = {}'.format(gt, to), file=f) + print('', file=f) + + # Generate a function for each action: + for rule in self.grammar.productions: + M = len(rule.symbols) + args = ', '.join('arg{}'.format(n + 1) for n in range(M)) + print(' def {}(self, {}):'.format(rule.f_name, args), file=f) + if rule.f == None: + semantics = 'pass' + else: + semantics = str(rule.f) + if semantics.strip() == '': + semantics = 'pass' + for n in range(M): + semantics = semantics.replace('${}'.format(n + 1), 'arg{}'.format(n + 1)) + print(' {}'.format(semantics), file=f) + print('', file=f) + + +def main(): + # Parse arguments: + parser = argparse.ArgumentParser(description='xacc compiler compiler') + parser.add_argument('source', type=argparse.FileType('r'), \ + help='the parser specification') + parser.add_argument('-o', '--output', type=argparse.FileType('w'), \ + default=sys.stdout) + args = parser.parse_args() + src = args.source.read() + args.source.close() + + # Construction of generator parts: + lexer = XaccLexer() + parser = XaccParser(lexer) + generator = XaccGenerator() + + # Sequence source through the generator parts: + lexer.feed(src) + grammar = parser.parse_grammar() + generator.generate(grammar, args.output) + args.output.close() + + +if __name__ == '__main__': + main()
--- a/python/zcc.py Sun Dec 22 15:50:59 2013 +0100 +++ b/python/zcc.py Tue Dec 31 12:38:15 2013 +0100 @@ -118,7 +118,6 @@ # Parse arguments: parser = argparse.ArgumentParser(description='lcfos Compiler') -# Input: parser.add_argument('source', type=argparse.FileType('r'), \ help='the source file to build', nargs="+") parser.add_argument('-i', '--imp', type=argparse.FileType('r'), \
--- a/readme.rst Sun Dec 22 15:50:59 2013 +0100 +++ b/readme.rst Tue Dec 31 12:38:15 2013 +0100 @@ -30,6 +30,13 @@ python ide.py +Source code +----------- + +Sources are located here: +https://bitbucket.org/windel/lcfos + + Docs ----
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/expression.x Tue Dec 31 12:38:15 2013 +0100 @@ -0,0 +1,16 @@ + +%tokens '+' number '(' ')' '*' +%% + +input: expression {return $1}; + +expression: term { return $1 } + | expression '+' term {}; + +term: factor { return $1 } + | term '*' factor { return $1 * $3 }; + +factor: '(' expression ')' + | number {return $1.val}; + +
--- a/test/testasm.py Sun Dec 22 15:50:59 2013 +0100 +++ b/test/testasm.py Tue Dec 31 12:38:15 2013 +0100 @@ -13,20 +13,20 @@ def testLex0(self): """ Check if the lexer is OK """ - asmline, toks = 'mov rax, rbx ', ['ID', 'ID', ',', 'ID'] + asmline, toks = 'mov rax, rbx ', ['ID', 'ID', ',', 'ID', 'EOF'] self.assertSequenceEqual([tok.typ for tok in tokenize(asmline)], toks) def testLex1(self): """ Test if lexer correctly maps some tokens """ - asmline, toks = 'lab1: mov rax, rbx ', ['ID', ':', 'ID', 'ID', ',', 'ID'] - self.assertSequenceEqual([tok.typ for tok in tokenize(asmline)], toks) - - def testLex1(self): - """ Test if lexer correctly maps some tokens """ - asmline, toks = 'mov 3.13 0xC 13', ['ID', 'REAL', 'NUMBER', 'NUMBER'] + asmline, toks = 'lab1: mov rax, rbx ', ['ID', ':', 'ID', 'ID', ',', 'ID', 'EOF'] self.assertSequenceEqual([tok.typ for tok in tokenize(asmline)], toks) def testLex2(self): + """ Test if lexer correctly maps some tokens """ + asmline, toks = 'mov 3.13 0xC 13', ['ID', 'REAL', 'NUMBER', 'NUMBER', 'EOF'] + self.assertSequenceEqual([tok.typ for tok in tokenize(asmline)], toks) + + def testLex3(self): """ Test if lexer fails on a token that is invalid """ asmline = '0z4: mov rax, rbx $ ' with self.assertRaises(CompilerError): @@ -81,7 +81,7 @@ def testParse6(self): # A line can be empty self.a.parse_line('') - + class AssemblerOtherTestCase(unittest.TestCase): def testWithoutTarget(self):
--- a/test/testpyy.py Sun Dec 22 15:50:59 2013 +0100 +++ b/test/testpyy.py Tue Dec 31 12:38:15 2013 +0100 @@ -1,10 +1,25 @@ -import unittest, pprint -from pyyacc import Grammar, Item, ParserGenerationException, ParserException, EPS, EOF +import unittest +from pyyacc import Grammar, Item, ParserGenerationException, ParserException +from pyyacc import EPS, EOF from ppci import Token -def genTokens(lst): - for t in lst: - yield Token(t, t) + +class genTokens: + def __init__(self, lst): + def tokGen(): + for t in lst: + yield Token(t, t) + while True: + yield Token(EOF, EOF) + self.tokens = tokGen() + self.token = self.tokens.__next__() + + def next_token(self): + t = self.token + if t.typ != EOF: + self.token = self.tokens.__next__() + return t + class testLR(unittest.TestCase): """ Test basic LR(1) parser generator constructs """ @@ -25,6 +40,7 @@ p = g.genParser() # 4. feed input: p.parse(tokens) + def testReduceReduceConflict(self): """ Check if a reduce-reduce conflict is detected """ # Define a grammar with an obvious reduce-reduce conflict: @@ -37,6 +53,7 @@ g.start_symbol = 'goal' with self.assertRaises(ParserGenerationException): p = g.genParser() + def testShiftReduceConflict(self): """ Must be handled automatically by doing shift """ g = Grammar([EOF, 'if', 'then', 'else', 'ass']) @@ -60,6 +77,7 @@ g.start_symbol = 'goal' with self.assertRaises(ParserGenerationException): g.genParser() + def testRedefineTerminal(self): """ Test correct behavior when a terminal is redefined """ g = Grammar([EOF, 'b', 'c']) @@ -69,6 +87,7 @@ g.add_production('a', ['c']) g.start_symbol = 'goal' g.genParser() + def testEmpty(self): """ Test empty token stream """ g = Grammar([',']) @@ -78,7 +97,7 @@ tokens = genTokens([]) with self.assertRaises(ParserException): p.parse(tokens) - + def testEps(self): """ Test epsilon terminal """ g = Grammar(['a', 'b']) @@ -109,9 +128,9 @@ self.cb_called = False def cb(a, c, b): self.cb_called = True - self.assertEqual(a, 'a') - self.assertEqual(b, 'b') - self.assertEqual(c, 'c') + self.assertEqual(a.val, 'a') + self.assertEqual(b.val, 'b') + self.assertEqual(c.val, 'c') g = Grammar(['a', 'b', 'c']) g.add_production('goal', ['a', 'c', 'b'], cb) g.start_symbol = 'goal' @@ -147,7 +166,8 @@ # Must result in 12 sets: self.assertEqual(len(s), 24) -class testPG(unittest.TestCase): + +class testParserGenerator(unittest.TestCase): """ Tests several parts of the parser generator """ def setUp(self): g = Grammar(['(', ')']) @@ -216,5 +236,3 @@ if __name__ == '__main__': unittest.main() - -