334
|
1
|
|
2 import re
|
|
3 import pyyacc
|
|
4 from . import Token, CompilerError, SourceLocation
|
342
|
5 from .target import Target, Label
|
334
|
6
|
340
|
7
|
341
|
8 def bit_type(value):
|
345
|
9 assert value < (2**32)
|
341
|
10 assert value >= 0
|
|
11 t = 'val32'
|
345
|
12 for n in [16, 12, 8, 5, 3]:
|
341
|
13 if value < (2**n):
|
|
14 t = 'val{}'.format(n)
|
|
15 return t
|
|
16
|
|
17 def tokenize(s, kws):
|
334
|
18 """
|
|
19 Tokenizer, generates an iterator that
|
|
20 returns tokens!
|
|
21
|
|
22 This GREAT example was taken from python re doc page!
|
|
23 """
|
|
24 tok_spec = [
|
|
25 ('REAL', r'\d+\.\d+'),
|
|
26 ('HEXNUMBER', r'0x[\da-fA-F]+'),
|
|
27 ('NUMBER', r'\d+'),
|
|
28 ('ID', r'[A-Za-z][A-Za-z\d_]*'),
|
|
29 ('SKIP', r'[ \t]'),
|
|
30 ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<|}|{'),
|
|
31 ('STRING', r"'.*?'"),
|
|
32 ('COMMENT', r";.*")
|
|
33 ]
|
|
34 tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
|
|
35 gettok = re.compile(tok_re).match
|
|
36 line = 1
|
|
37 pos = line_start = 0
|
|
38 mo = gettok(s)
|
|
39 while mo is not None:
|
|
40 typ = mo.lastgroup
|
|
41 val = mo.group(typ)
|
|
42 if typ == 'NEWLINE':
|
|
43 line_start = pos
|
|
44 line += 1
|
|
45 elif typ != 'SKIP':
|
|
46 if typ == 'LEESTEKEN':
|
|
47 typ = val
|
|
48 elif typ == 'NUMBER':
|
|
49 val = int(val)
|
|
50 elif typ == 'HEXNUMBER':
|
|
51 val = int(val[2:], 16)
|
|
52 typ = 'NUMBER'
|
|
53 elif typ == 'REAL':
|
|
54 val = float(val)
|
|
55 elif typ == 'STRING':
|
|
56 val = val[1:-1]
|
341
|
57 elif typ == 'ID':
|
|
58 if val.lower() in kws: # ['r3', 'sp', 'add', 'yield', 'r4', 'r0', 'r1', 'sub', 'r5', 'r6', 'r2']:
|
|
59 typ = val.lower()
|
334
|
60 col = mo.start() - line_start
|
|
61 loc = SourceLocation('', line, col, 0) # TODO retrieve length?
|
341
|
62 if typ == 'NUMBER':
|
|
63 typ = bit_type(val)
|
334
|
64 yield Token(typ, val, loc)
|
|
65 pos = mo.end()
|
|
66 mo = gettok(s, pos)
|
|
67 if pos != len(s):
|
|
68 col = pos - line_start
|
|
69 loc = SourceLocation('', line, col, 0)
|
|
70 raise CompilerError('Unexpected character {0}'.format(s[pos]), loc)
|
|
71 yield Token('EOF', pyyacc.EOF)
|
|
72
|
|
73
|
|
74 class Lexer:
|
341
|
75 def __init__(self, src, kws):
|
|
76 self.tokens = tokenize(src, kws)
|
334
|
77 self.curTok = self.tokens.__next__()
|
|
78
|
|
79 def next_token(self):
|
|
80 t = self.curTok
|
|
81 if t.typ != 'EOF':
|
|
82 self.curTok = self.tokens.__next__()
|
|
83 return t
|
|
84
|
|
85
|
|
86 class Parser:
|
341
|
87 def add_rule(self, prod, rhs, f):
|
|
88 """ Helper function to add a rule, why this is required? """
|
|
89 if prod == 'instruction':
|
|
90 def f_wrap(*args):
|
|
91 i = f(args)
|
|
92 self.emit(i)
|
|
93 else:
|
|
94 def f_wrap(*rhs):
|
|
95 return f(rhs)
|
|
96 self.g.add_production(prod, rhs, f_wrap)
|
|
97
|
|
98 def __init__(self, kws, instruction_rules, emit):
|
334
|
99 # Construct a parser given a grammar:
|
341
|
100 tokens2 = ['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*',
|
|
101 pyyacc.EPS, 'COMMENT', '{', '}',
|
345
|
102 pyyacc.EOF, 'val32', 'val16', 'val12', 'val8', 'val5', 'val3']
|
341
|
103 tokens2.extend(kws)
|
|
104 self.kws = kws
|
|
105 g = pyyacc.Grammar(tokens2)
|
|
106 self.g = g
|
340
|
107 # Global structure of assembly line:
|
334
|
108 g.add_production('asmline', ['asmline2'])
|
|
109 g.add_production('asmline', ['asmline2', 'COMMENT'])
|
|
110 g.add_production('asmline2', ['label', 'instruction'])
|
|
111 g.add_production('asmline2', ['instruction'])
|
|
112 g.add_production('asmline2', ['label'])
|
|
113 g.add_production('asmline2', [])
|
|
114 g.add_production('label', ['ID', ':'], self.p_label)
|
|
115 #g.add_production('label', [])
|
340
|
116
|
|
117 # Add instruction rules for the target in question:
|
|
118 for prod, rhs, f in instruction_rules:
|
341
|
119 self.add_rule(prod, rhs, f)
|
340
|
120
|
334
|
121 #g.add_production('instruction', [])
|
341
|
122 g.add_production('expression', ['term'], lambda x: x)
|
334
|
123 g.add_production('expression', ['expression', 'addop', 'term'], self.p_binop)
|
|
124 g.add_production('addop', ['-'], lambda x: x.val)
|
|
125 g.add_production('addop', ['+'], lambda x: x.val)
|
|
126 g.add_production('mulop', ['*'], lambda x: x.val)
|
341
|
127 g.add_production('term', ['factor'], lambda x: x)
|
334
|
128 g.add_production('term', ['term', 'mulop', 'factor'], self.p_binop)
|
|
129 g.add_production('factor', ['ID'], lambda name: ASymbol(name.val))
|
|
130 g.add_production('factor', ['NUMBER'], lambda num: ANumber(int(num.val)))
|
|
131 g.start_symbol = 'asmline'
|
341
|
132 self.emit = emit
|
|
133 self.p = g.generate_parser()
|
342
|
134 # print('length of table:', len(self.p.action_table))
|
334
|
135
|
|
136 # Parser handlers:
|
|
137 def p_ins_1(self, opc, ops):
|
|
138 ins = AInstruction(opc, ops)
|
|
139 self.emit(ins)
|
|
140
|
|
141 def p_ins_2(self, opc):
|
|
142 self.p_ins_1(opc, [])
|
|
143
|
|
144 def p_operands_1(self, op1):
|
|
145 return [op1]
|
|
146
|
|
147 def p_operands_2(self, ops, comma, op2):
|
|
148 assert type(ops) is list
|
|
149 ops.append(op2)
|
|
150 return ops
|
|
151
|
|
152 def p_listitems_1(self, li1):
|
|
153 return [li1]
|
|
154
|
|
155 def p_listitems_2(self, lis, comma, li2):
|
|
156 assert type(lis) is list
|
|
157 lis.append(li2)
|
|
158 return lis
|
|
159
|
|
160 def p_list_op(self, brace_open, lst, brace_close):
|
|
161 return AUnop('{}', lst)
|
|
162
|
|
163 def p_mem_op(self, brace_open, exp, brace_close):
|
|
164 return AUnop('[]', exp)
|
|
165
|
|
166 def p_label(self, lname, cn):
|
341
|
167 lab = Label(lname.val)
|
334
|
168 self.emit(lab)
|
|
169
|
|
170 def p_binop(self, exp1, op, exp2):
|
|
171 return ABinop(op, exp1, exp2)
|
|
172
|
341
|
173 def parse(self, lexer):
|
334
|
174 self.p.parse(lexer)
|
|
175
|
|
176
|
|
177 class Assembler:
|
346
|
178 def __init__(self, target):
|
334
|
179 self.target = target
|
340
|
180 assert isinstance(target, Target)
|
346
|
181 self.parser = Parser(target.asm_keywords, target.assembler_rules, self.emit)
|
|
182
|
|
183 def emit(self, *args):
|
|
184 self.stream.emit(*args)
|
334
|
185
|
|
186 # Top level interface:
|
|
187 def parse_line(self, line):
|
340
|
188 """ Parse line into assembly instructions """
|
341
|
189 tokens = Lexer(line, self.target.asm_keywords)
|
340
|
190 self.parser.parse(tokens)
|
334
|
191
|
346
|
192 def assemble(self, asmsrc, stream):
|
334
|
193 """ Assemble this source snippet """
|
340
|
194 if hasattr(asmsrc, 'read'):
|
334
|
195 asmsrc2 = asmsrc.read()
|
|
196 asmsrc.close()
|
|
197 asmsrc = asmsrc2
|
340
|
198 # TODO: use generic newline??
|
|
199 # TODO: the bothersome newline ...
|
346
|
200 self.stream = stream
|
334
|
201 for line in asmsrc.split('\n'):
|
340
|
202 self.parse_line(line)
|
346
|
203 self.stream = None
|
334
|
204
|