comparison python/asm.py @ 318:e84047f29c78

Add burg and yacc initial attempts
author Windel Bouwman
date Tue, 31 Dec 2013 12:38:15 +0100
parents 084cccaa5deb
children 8d07a4254f04
comparison
equal deleted inserted replaced
317:e30a77ae359b 318:e84047f29c78
1 #!/usr/bin/env python3 1 #!/usr/bin/env python3
2 2
3 import re, argparse 3 import re
4 import argparse
4 import pyyacc 5 import pyyacc
5 from ppci import Token, CompilerError, SourceLocation 6 from ppci import Token, CompilerError, SourceLocation
6 from target import Target, Label 7 from target import Target, Label
7 from asmnodes import ALabel, AInstruction, ABinop, AUnop, ASymbol, ANumber 8 from asmnodes import ALabel, AInstruction, ABinop, AUnop, ASymbol, ANumber
8 9
9 def tokenize(s): 10 def tokenize(s):
10 """ 11 """
11 Tokenizer, generates an iterator that 12 Tokenizer, generates an iterator that
12 returns tokens! 13 returns tokens!
13 14
14 This GREAT example was taken from python re doc page! 15 This GREAT example was taken from python re doc page!
15 """ 16 """
16 tok_spec = [ 17 tok_spec = [
17 ('REAL', r'\d+\.\d+'), 18 ('REAL', r'\d+\.\d+'),
18 ('HEXNUMBER', r'0x[\da-fA-F]+'), 19 ('HEXNUMBER', r'0x[\da-fA-F]+'),
19 ('NUMBER', r'\d+'), 20 ('NUMBER', r'\d+'),
20 ('ID', r'[A-Za-z][A-Za-z\d_]*'), 21 ('ID', r'[A-Za-z][A-Za-z\d_]*'),
21 ('SKIP', r'[ \t]'), 22 ('SKIP', r'[ \t]'),
22 ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<|}|{'), 23 ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<|}|{'),
23 ('STRING', r"'.*?'"), 24 ('STRING', r"'.*?'"),
24 ('COMMENT', r";.*") 25 ('COMMENT', r";.*")
25 ] 26 ]
26 tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) 27 tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
27 gettok = re.compile(tok_re).match 28 gettok = re.compile(tok_re).match
28 line = 1 29 line = 1
29 pos = line_start = 0 30 pos = line_start = 0
30 mo = gettok(s) 31 mo = gettok(s)
31 while mo is not None: 32 while mo is not None:
32 typ = mo.lastgroup 33 typ = mo.lastgroup
33 val = mo.group(typ) 34 val = mo.group(typ)
34 if typ == 'NEWLINE': 35 if typ == 'NEWLINE':
35 line_start = pos 36 line_start = pos
36 line += 1 37 line += 1
49 col = mo.start() - line_start 50 col = mo.start() - line_start
50 loc = SourceLocation('', line, col, 0) # TODO retrieve length? 51 loc = SourceLocation('', line, col, 0) # TODO retrieve length?
51 yield Token(typ, val, loc) 52 yield Token(typ, val, loc)
52 pos = mo.end() 53 pos = mo.end()
53 mo = gettok(s, pos) 54 mo = gettok(s, pos)
54 if pos != len(s): 55 if pos != len(s):
55 col = pos - line_start 56 col = pos - line_start
56 loc = SourceLocation('', line, col, 0) 57 loc = SourceLocation('', line, col, 0)
57 raise CompilerError('Unexpected character {0}'.format(s[pos]), loc) 58 raise CompilerError('Unexpected character {0}'.format(s[pos]), loc)
59 yield Token('EOF', pyyacc.EOF)
58 60
59 61
60 class Lexer: 62 class Lexer:
61 def __init__(self, src): 63 def __init__(self, src):
62 self.tokens = tokenize(src) 64 self.tokens = tokenize(src)
63 self.curTok = self.tokens.__next__() 65 self.curTok = self.tokens.__next__()
64 66
65 def eat(self): 67 def next_token(self):
66 t = self.curTok 68 t = self.curTok
67 self.curTok = self.tokens.__next__() 69 if t.typ != 'EOF':
70 self.curTok = self.tokens.__next__()
68 return t 71 return t
69
70 @property
71 def Peak(self):
72 return self.curTok
73 72
74 73
75 class Parser: 74 class Parser:
76 def __init__(self): 75 def __init__(self):
77 # Construct a parser given a grammar: 76 # Construct a parser given a grammar:
78 ident = lambda x: x # Identity helper function 77 ident = lambda x: x # Identity helper function
79 g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS, 'COMMENT', '{', '}']) 78 g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS, 'COMMENT', '{', '}',
79 pyyacc.EOF])
80 g.add_production('asmline', ['asmline2']) 80 g.add_production('asmline', ['asmline2'])
81 g.add_production('asmline', ['asmline2', 'COMMENT']) 81 g.add_production('asmline', ['asmline2', 'COMMENT'])
82 g.add_production('asmline2', ['label', 'instruction']) 82 g.add_production('asmline2', ['label', 'instruction'])
83 g.add_production('asmline2', ['instruction']) 83 g.add_production('asmline2', ['instruction'])
84 g.add_production('asmline2', ['label']) 84 g.add_production('asmline2', ['label'])
85 g.add_production('asmline2', []) 85 g.add_production('asmline2', [])
86 g.add_production('optcomment', [])
87 g.add_production('optcomment', ['COMMENT'])
88 g.add_production('label', ['ID', ':'], self.p_label) 86 g.add_production('label', ['ID', ':'], self.p_label)
89 g.add_production('instruction', ['opcode', 'operands'], self.p_ins_1) 87 g.add_production('instruction', ['opcode', 'operands'], self.p_ins_1)
90 g.add_production('instruction', ['opcode'], self.p_ins_2) 88 g.add_production('instruction', ['opcode'], self.p_ins_2)
91 g.add_production('opcode', ['ID'], ident) 89 g.add_production('opcode', ['ID'], lambda x: x.val)
92 g.add_production('operands', ['operand'], self.p_operands_1) 90 g.add_production('operands', ['operand'], self.p_operands_1)
93 g.add_production('operands', ['operands', ',', 'operand'], self.p_operands_2) 91 g.add_production('operands', ['operands', ',', 'operand'], self.p_operands_2)
94 g.add_production('operand', ['expression'], ident) 92 g.add_production('operand', ['expression'], ident)
95 g.add_production('operand', ['[', 'expression', ']'], self.p_mem_op) 93 g.add_production('operand', ['[', 'expression', ']'], self.p_mem_op)
96 g.add_production('operand', ['{', 'listitems', '}'], self.p_list_op) 94 g.add_production('operand', ['{', 'listitems', '}'], self.p_list_op)
97 g.add_production('listitems', ['expression'], self.p_listitems_1) 95 g.add_production('listitems', ['expression'], self.p_listitems_1)
98 g.add_production('listitems', ['listitems', ',', 'expression'], self.p_listitems_2) 96 g.add_production('listitems', ['listitems', ',', 'expression'], self.p_listitems_2)
99 g.add_production('expression', ['term'], ident) 97 g.add_production('expression', ['term'], ident)
100 g.add_production('expression', ['expression', 'addop', 'term'], self.p_binop) 98 g.add_production('expression', ['expression', 'addop', 'term'], self.p_binop)
101 g.add_production('addop', ['-'], ident) 99 g.add_production('addop', ['-'], lambda x: x.val)
102 g.add_production('addop', ['+'], ident) 100 g.add_production('addop', ['+'], lambda x: x.val)
103 g.add_production('mulop', ['*'], ident) 101 g.add_production('mulop', ['*'], lambda x: x.val)
104 g.add_production('term', ['factor'], ident) 102 g.add_production('term', ['factor'], ident)
105 g.add_production('term', ['term', 'mulop', 'factor'], self.p_binop) 103 g.add_production('term', ['term', 'mulop', 'factor'], self.p_binop)
106 g.add_production('factor', ['ID'], lambda name: ASymbol(name)) 104 g.add_production('factor', ['ID'], lambda name: ASymbol(name.val))
107 g.add_production('factor', ['NUMBER'], lambda num: ANumber(int(num))) 105 g.add_production('factor', ['NUMBER'], lambda num: ANumber(int(num.val)))
108 g.start_symbol = 'asmline' 106 g.start_symbol = 'asmline'
109 self.p = g.genParser() 107 self.p = g.genParser()
110 108
111 # Parser handlers: 109 # Parser handlers:
112 def p_ins_1(self, opc, ops): 110 def p_ins_1(self, opc, ops):
113 ins = AInstruction(opc, ops) 111 ins = AInstruction(opc, ops)
114 self.emit(ins) 112 self.emit(ins)
113
115 def p_ins_2(self, opc): 114 def p_ins_2(self, opc):
116 self.p_ins_1(opc, []) 115 self.p_ins_1(opc, [])
116
117 def p_operands_1(self, op1): 117 def p_operands_1(self, op1):
118 return [op1] 118 return [op1]
119
119 def p_operands_2(self, ops, comma, op2): 120 def p_operands_2(self, ops, comma, op2):
120 assert type(ops) is list 121 assert type(ops) is list
121 ops.append(op2) 122 ops.append(op2)
122 return ops 123 return ops
123 124
129 lis.append(li2) 130 lis.append(li2)
130 return lis 131 return lis
131 132
132 def p_list_op(self, brace_open, lst, brace_close): 133 def p_list_op(self, brace_open, lst, brace_close):
133 return AUnop('{}', lst) 134 return AUnop('{}', lst)
135
134 def p_mem_op(self, brace_open, exp, brace_close): 136 def p_mem_op(self, brace_open, exp, brace_close):
135 return AUnop('[]', exp) 137 return AUnop('[]', exp)
138
136 def p_label(self, lname, cn): 139 def p_label(self, lname, cn):
137 lab = ALabel(lname) 140 lab = ALabel(lname.val)
138 self.emit(lab) 141 self.emit(lab)
142
139 def p_binop(self, exp1, op, exp2): 143 def p_binop(self, exp1, op, exp2):
140 return ABinop(op, exp1, exp2) 144 return ABinop(op, exp1, exp2)
141 145
142 def parse(self, tokens, emitter): 146 def parse(self, lexer, emitter):
143 self.emit = emitter 147 self.emit = emitter
144 self.p.parse(tokens) 148 self.p.parse(lexer)
145 149
146 # Pre construct parser to save time: 150 # Pre construct parser to save time:
147 asmParser = Parser() 151 asmParser = Parser()
148 152
149 class Assembler: 153 class Assembler:
161 """ Emit a parsed instruction """ 165 """ Emit a parsed instruction """
162 self.stack.append(a) 166 self.stack.append(a)
163 167
164 def parse_line(self, line): 168 def parse_line(self, line):
165 """ Parse line into asm AST """ 169 """ Parse line into asm AST """
166 tokens = tokenize(line) 170 tokens = Lexer(line)
167 self.p.parse(tokens, self.emit) 171 self.p.parse(tokens, self.emit)
168 172
169 def assemble(self, asmsrc): 173 def assemble(self, asmsrc):
170 """ Assemble this source snippet """ 174 """ Assemble this source snippet """
171 for line in asmsrc.split('\n'): 175 for line in asmsrc.split('\n'):
172 self.assemble_line(line) 176 self.assemble_line(line)
173 177
174 def assemble_line(self, line): 178 def assemble_line(self, line):
175 """ 179 """
176 Assemble a single source line. 180 Assemble a single source line.
177 Do not take newlines into account 181 Do not take newlines into account
178 """ 182 """
179 self.parse_line(line) 183 self.parse_line(line)
180 self.assemble_aast() 184 self.assemble_aast()
181 185
182 def assemble_aast(self): 186 def assemble_aast(self):
183 """ Assemble a parsed asm line """ 187 """ Assemble a parsed asm line """
184 # TODO
185 if not self.target: 188 if not self.target:
186 raise CompilerError('Cannot assemble without target') 189 raise CompilerError('Cannot assemble without target')
187 while self.stack: 190 while self.stack:
188 vi = self.stack.pop(0) 191 vi = self.stack.pop(0)
189 if type(vi) is AInstruction: 192 if type(vi) is AInstruction:
197 200
198 201
199 if __name__ == '__main__': 202 if __name__ == '__main__':
200 # When run as main file, try to grab command line arguments: 203 # When run as main file, try to grab command line arguments:
201 parser = argparse.ArgumentParser(description="Assembler") 204 parser = argparse.ArgumentParser(description="Assembler")
202 parser.add_argument('sourcefile', type=argparse.FileType('r'), help='the source file to assemble') 205 parser.add_argument('sourcefile', type=argparse.FileType('r'),
206 help='the source file to assemble')
203 args = parser.parse_args() 207 args = parser.parse_args()
204 a = Assembler() 208 a = Assembler()
205 obj = a.assemble(args.sourcefile.read()) 209 obj = a.assemble(args.sourcefile.read())
206