334
|
1
|
|
2 import re
|
|
3 import pyyacc
|
|
4 from . import Token, CompilerError, SourceLocation
|
342
|
5 from .target import Target, Label
|
334
|
6
|
340
|
7
|
341
|
8 def bit_type(value):
|
345
|
9 assert value < (2**32)
|
341
|
10 assert value >= 0
|
|
11 t = 'val32'
|
345
|
12 for n in [16, 12, 8, 5, 3]:
|
341
|
13 if value < (2**n):
|
|
14 t = 'val{}'.format(n)
|
|
15 return t
|
|
16
|
|
17 def tokenize(s, kws):
|
334
|
18 """
|
|
19 Tokenizer, generates an iterator that
|
|
20 returns tokens!
|
|
21
|
|
22 This GREAT example was taken from python re doc page!
|
|
23 """
|
|
24 tok_spec = [
|
|
25 ('REAL', r'\d+\.\d+'),
|
|
26 ('HEXNUMBER', r'0x[\da-fA-F]+'),
|
|
27 ('NUMBER', r'\d+'),
|
|
28 ('ID', r'[A-Za-z][A-Za-z\d_]*'),
|
|
29 ('SKIP', r'[ \t]'),
|
|
30 ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<|}|{'),
|
|
31 ('STRING', r"'.*?'"),
|
|
32 ('COMMENT', r";.*")
|
|
33 ]
|
|
34 tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
|
|
35 gettok = re.compile(tok_re).match
|
|
36 line = 1
|
|
37 pos = line_start = 0
|
|
38 mo = gettok(s)
|
|
39 while mo is not None:
|
|
40 typ = mo.lastgroup
|
|
41 val = mo.group(typ)
|
|
42 if typ == 'NEWLINE':
|
|
43 line_start = pos
|
|
44 line += 1
|
|
45 elif typ != 'SKIP':
|
|
46 if typ == 'LEESTEKEN':
|
|
47 typ = val
|
|
48 elif typ == 'NUMBER':
|
|
49 val = int(val)
|
|
50 elif typ == 'HEXNUMBER':
|
|
51 val = int(val[2:], 16)
|
|
52 typ = 'NUMBER'
|
|
53 elif typ == 'REAL':
|
|
54 val = float(val)
|
|
55 elif typ == 'STRING':
|
|
56 val = val[1:-1]
|
341
|
57 elif typ == 'ID':
|
|
58 if val.lower() in kws: # ['r3', 'sp', 'add', 'yield', 'r4', 'r0', 'r1', 'sub', 'r5', 'r6', 'r2']:
|
|
59 typ = val.lower()
|
334
|
60 col = mo.start() - line_start
|
|
61 loc = SourceLocation('', line, col, 0) # TODO retrieve length?
|
341
|
62 if typ == 'NUMBER':
|
|
63 typ = bit_type(val)
|
334
|
64 yield Token(typ, val, loc)
|
|
65 pos = mo.end()
|
|
66 mo = gettok(s, pos)
|
|
67 if pos != len(s):
|
|
68 col = pos - line_start
|
|
69 loc = SourceLocation('', line, col, 0)
|
|
70 raise CompilerError('Unexpected character {0}'.format(s[pos]), loc)
|
|
71 yield Token('EOF', pyyacc.EOF)
|
|
72
|
|
73
|
|
74 class Lexer:
|
341
|
75 def __init__(self, src, kws):
|
|
76 self.tokens = tokenize(src, kws)
|
334
|
77 self.curTok = self.tokens.__next__()
|
|
78
|
|
79 def next_token(self):
|
|
80 t = self.curTok
|
|
81 if t.typ != 'EOF':
|
|
82 self.curTok = self.tokens.__next__()
|
|
83 return t
|
|
84
|
|
85
|
|
86 class Parser:
|
341
|
87 def add_rule(self, prod, rhs, f):
|
|
88 """ Helper function to add a rule, why this is required? """
|
|
89 if prod == 'instruction':
|
|
90 def f_wrap(*args):
|
|
91 i = f(args)
|
381
|
92 if i:
|
|
93 self.emit(i)
|
341
|
94 else:
|
|
95 def f_wrap(*rhs):
|
|
96 return f(rhs)
|
|
97 self.g.add_production(prod, rhs, f_wrap)
|
|
98
|
|
99 def __init__(self, kws, instruction_rules, emit):
|
334
|
100 # Construct a parser given a grammar:
|
375
|
101 tokens2 = ['ID', 'NUMBER', ',', '[', ']', ':', '+', '-', '*', '=',
|
341
|
102 pyyacc.EPS, 'COMMENT', '{', '}',
|
345
|
103 pyyacc.EOF, 'val32', 'val16', 'val12', 'val8', 'val5', 'val3']
|
341
|
104 tokens2.extend(kws)
|
|
105 self.kws = kws
|
|
106 g = pyyacc.Grammar(tokens2)
|
|
107 self.g = g
|
340
|
108 # Global structure of assembly line:
|
334
|
109 g.add_production('asmline', ['asmline2'])
|
|
110 g.add_production('asmline', ['asmline2', 'COMMENT'])
|
|
111 g.add_production('asmline2', ['label', 'instruction'])
|
|
112 g.add_production('asmline2', ['instruction'])
|
|
113 g.add_production('asmline2', ['label'])
|
|
114 g.add_production('asmline2', [])
|
|
115 g.add_production('label', ['ID', ':'], self.p_label)
|
340
|
116
|
|
117 # Add instruction rules for the target in question:
|
|
118 for prod, rhs, f in instruction_rules:
|
341
|
119 self.add_rule(prod, rhs, f)
|
340
|
120
|
334
|
121 #g.add_production('instruction', [])
|
|
122 g.start_symbol = 'asmline'
|
341
|
123 self.emit = emit
|
|
124 self.p = g.generate_parser()
|
342
|
125 # print('length of table:', len(self.p.action_table))
|
334
|
126
|
|
127 # Parser handlers:
|
|
128
|
|
129 def p_label(self, lname, cn):
|
341
|
130 lab = Label(lname.val)
|
334
|
131 self.emit(lab)
|
|
132
|
341
|
133 def parse(self, lexer):
|
334
|
134 self.p.parse(lexer)
|
|
135
|
|
136
|
381
|
137 class BaseAssembler:
|
|
138 """ Assembler base class, inherited by assemblers specific for a target """
|
346
|
139 def __init__(self, target):
|
334
|
140 self.target = target
|
340
|
141 assert isinstance(target, Target)
|
381
|
142
|
|
143 def make_parser(self):
|
|
144 self.parser = Parser(self.target.asm_keywords, self.target.assembler_rules, self.emit)
|
346
|
145
|
|
146 def emit(self, *args):
|
|
147 self.stream.emit(*args)
|
334
|
148
|
|
149 # Top level interface:
|
|
150 def parse_line(self, line):
|
340
|
151 """ Parse line into assembly instructions """
|
341
|
152 tokens = Lexer(line, self.target.asm_keywords)
|
340
|
153 self.parser.parse(tokens)
|
334
|
154
|
346
|
155 def assemble(self, asmsrc, stream):
|
334
|
156 """ Assemble this source snippet """
|
381
|
157 if type(asmsrc) is str:
|
|
158 pass
|
|
159 elif hasattr(asmsrc, 'read'):
|
334
|
160 asmsrc2 = asmsrc.read()
|
|
161 asmsrc.close()
|
|
162 asmsrc = asmsrc2
|
340
|
163 # TODO: use generic newline??
|
|
164 # TODO: the bothersome newline ...
|
346
|
165 self.stream = stream
|
334
|
166 for line in asmsrc.split('\n'):
|
340
|
167 self.parse_line(line)
|
334
|
168
|
381
|
169 def flush(self):
|
|
170 pass
|