comparison python/ppci/assembler.py @ 334:6f4753202b9a

Added more recipes
author Windel Bouwman
date Thu, 13 Feb 2014 22:02:08 +0100
parents
children d1ecc493384e
comparison
equal deleted inserted replaced
333:dcae6574c974 334:6f4753202b9a
1
2 import re
3 import pyyacc
4 from . import Token, CompilerError, SourceLocation
5 from target import Target, Label
6 from .asmnodes import ALabel, AInstruction, ABinop, AUnop, ASymbol, ANumber
7
8 def tokenize(s):
9 """
10 Tokenizer, generates an iterator that
11 returns tokens!
12
13 This GREAT example was taken from python re doc page!
14 """
15 tok_spec = [
16 ('REAL', r'\d+\.\d+'),
17 ('HEXNUMBER', r'0x[\da-fA-F]+'),
18 ('NUMBER', r'\d+'),
19 ('ID', r'[A-Za-z][A-Za-z\d_]*'),
20 ('SKIP', r'[ \t]'),
21 ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<|}|{'),
22 ('STRING', r"'.*?'"),
23 ('COMMENT', r";.*")
24 ]
25 tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
26 gettok = re.compile(tok_re).match
27 line = 1
28 pos = line_start = 0
29 mo = gettok(s)
30 while mo is not None:
31 typ = mo.lastgroup
32 val = mo.group(typ)
33 if typ == 'NEWLINE':
34 line_start = pos
35 line += 1
36 elif typ != 'SKIP':
37 if typ == 'LEESTEKEN':
38 typ = val
39 elif typ == 'NUMBER':
40 val = int(val)
41 elif typ == 'HEXNUMBER':
42 val = int(val[2:], 16)
43 typ = 'NUMBER'
44 elif typ == 'REAL':
45 val = float(val)
46 elif typ == 'STRING':
47 val = val[1:-1]
48 col = mo.start() - line_start
49 loc = SourceLocation('', line, col, 0) # TODO retrieve length?
50 yield Token(typ, val, loc)
51 pos = mo.end()
52 mo = gettok(s, pos)
53 if pos != len(s):
54 col = pos - line_start
55 loc = SourceLocation('', line, col, 0)
56 raise CompilerError('Unexpected character {0}'.format(s[pos]), loc)
57 yield Token('EOF', pyyacc.EOF)
58
59
60 class Lexer:
61 def __init__(self, src):
62 self.tokens = tokenize(src)
63 self.curTok = self.tokens.__next__()
64
65 def next_token(self):
66 t = self.curTok
67 if t.typ != 'EOF':
68 self.curTok = self.tokens.__next__()
69 return t
70
71
72 class Parser:
73 def __init__(self):
74 # Construct a parser given a grammar:
75 ident = lambda x: x # Identity helper function
76 g = pyyacc.Grammar(['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', pyyacc.EPS, 'COMMENT', '{', '}',
77 pyyacc.EOF])
78 g.add_production('asmline', ['asmline2'])
79 g.add_production('asmline', ['asmline2', 'COMMENT'])
80 g.add_production('asmline2', ['label', 'instruction'])
81 g.add_production('asmline2', ['instruction'])
82 g.add_production('asmline2', ['label'])
83 g.add_production('asmline2', [])
84 g.add_production('label', ['ID', ':'], self.p_label)
85 #g.add_production('label', [])
86 g.add_production('instruction', ['opcode', 'operands'], self.p_ins_1)
87 g.add_production('instruction', ['opcode'], self.p_ins_2)
88 #g.add_production('instruction', [])
89 g.add_production('opcode', ['ID'], lambda x: x.val)
90 g.add_production('operands', ['operand'], self.p_operands_1)
91 g.add_production('operands', ['operands', ',', 'operand'], self.p_operands_2)
92 g.add_production('operand', ['expression'], ident)
93 g.add_production('operand', ['[', 'expression', ']'], self.p_mem_op)
94 g.add_production('operand', ['{', 'listitems', '}'], self.p_list_op)
95 g.add_production('listitems', ['expression'], self.p_listitems_1)
96 g.add_production('listitems', ['listitems', ',', 'expression'], self.p_listitems_2)
97 g.add_production('expression', ['term'], ident)
98 g.add_production('expression', ['expression', 'addop', 'term'], self.p_binop)
99 g.add_production('addop', ['-'], lambda x: x.val)
100 g.add_production('addop', ['+'], lambda x: x.val)
101 g.add_production('mulop', ['*'], lambda x: x.val)
102 g.add_production('term', ['factor'], ident)
103 g.add_production('term', ['term', 'mulop', 'factor'], self.p_binop)
104 g.add_production('factor', ['ID'], lambda name: ASymbol(name.val))
105 g.add_production('factor', ['NUMBER'], lambda num: ANumber(int(num.val)))
106 g.start_symbol = 'asmline'
107 self.p = g.genParser()
108
109 # Parser handlers:
110 def p_ins_1(self, opc, ops):
111 ins = AInstruction(opc, ops)
112 self.emit(ins)
113
114 def p_ins_2(self, opc):
115 self.p_ins_1(opc, [])
116
117 def p_operands_1(self, op1):
118 return [op1]
119
120 def p_operands_2(self, ops, comma, op2):
121 assert type(ops) is list
122 ops.append(op2)
123 return ops
124
125 def p_listitems_1(self, li1):
126 return [li1]
127
128 def p_listitems_2(self, lis, comma, li2):
129 assert type(lis) is list
130 lis.append(li2)
131 return lis
132
133 def p_list_op(self, brace_open, lst, brace_close):
134 return AUnop('{}', lst)
135
136 def p_mem_op(self, brace_open, exp, brace_close):
137 return AUnop('[]', exp)
138
139 def p_label(self, lname, cn):
140 lab = ALabel(lname.val)
141 self.emit(lab)
142
143 def p_binop(self, exp1, op, exp2):
144 return ABinop(op, exp1, exp2)
145
146 def parse(self, lexer, emitter):
147 self.emit = emitter
148 self.p.parse(lexer)
149
150 # Pre construct parser to save time:
151 asmParser = Parser()
152
153 class Assembler:
154 def __init__(self, target=None, stream=None):
155 self.target = target
156 self.stream = stream
157 self.restart()
158 self.p = asmParser
159
160 # Top level interface:
161 def restart(self):
162 self.stack = []
163
164 def emit(self, a):
165 """ Emit a parsed instruction """
166 self.stack.append(a)
167
168 def parse_line(self, line):
169 """ Parse line into asm AST """
170 tokens = Lexer(line)
171 self.p.parse(tokens, self.emit)
172
173 def assemble(self, asmsrc):
174 """ Assemble this source snippet """
175 if type(asmsrc) is not str:
176 asmsrc2 = asmsrc.read()
177 asmsrc.close()
178 asmsrc = asmsrc2
179 for line in asmsrc.split('\n'):
180 self.assemble_line(line)
181
182 def assemble_line(self, line):
183 """
184 Assemble a single source line.
185 Do not take newlines into account
186 """
187 self.parse_line(line)
188 self.assemble_aast()
189
190 def assemble_aast(self):
191 """ Assemble a parsed asm line """
192 if not self.target:
193 raise CompilerError('Cannot assemble without target')
194 while self.stack:
195 vi = self.stack.pop(0)
196 if type(vi) is AInstruction:
197 mi = self.target.mapInstruction(vi)
198 elif type(vi) is ALabel:
199 mi = Label(vi.name)
200 else:
201 raise NotImplementedError('{}'.format(vi))
202 if self.stream:
203 self.stream.emit(mi)