comparison python/libasm.py @ 191:6b2bec5653f1

Added assembler testset
author Windel Bouwman
date Sun, 26 May 2013 15:28:07 +0200
parents 5e1dd04cb61c
children f091e7d70996
comparison
equal deleted inserted replaced
190:65dda7e7e8bd 191:6b2bec5653f1
1 import collections, re 1 import re
2 import pyyacc
3 from ppci import Token, CompilerError, SourceLocation
2 4
3 # Different instruction sets: 5 # Different instruction sets:
4 class InstructionSet: 6 class InstructionSet:
5 pass 7 pass
6 8
7 class X86(InstructionSet): 9 class X86(InstructionSet):
8 pass 10 pass
9 11
10 # Generic assembler: 12 # Generic assembler:
11
12 class SourceLocation:
13 def __init__(self, x):
14 self.pos = x
15
16 class SourceRange:
17 def __init__(self, p1, p2):
18 self.p1 = p1
19 self.p2 = p2
20
21 # Token is used in the lexical analyzer:
22 Token = collections.namedtuple('Token', 'typ val row col')
23
24 keywords = ['global', 'db'] 13 keywords = ['global', 'db']
25 14
26 def tokenize(s): 15 def tokenize(s):
27 """ 16 """
28 Tokenizer, generates an iterator that 17 Tokenizer, generates an iterator that
33 tok_spec = [ 22 tok_spec = [
34 ('REAL', r'\d+\.\d+'), 23 ('REAL', r'\d+\.\d+'),
35 ('HEXNUMBER', r'0x[\da-fA-F]+'), 24 ('HEXNUMBER', r'0x[\da-fA-F]+'),
36 ('NUMBER', r'\d+'), 25 ('NUMBER', r'\d+'),
37 ('ID', r'[A-Za-z][A-Za-z\d_]*'), 26 ('ID', r'[A-Za-z][A-Za-z\d_]*'),
38 ('NEWLINE', r'\n'),
39 ('SKIP', r'[ \t]'), 27 ('SKIP', r'[ \t]'),
40 ('LEESTEKEN', r':=|[\.,=:;\-+*\[\]/\(\)]|>=|<=|<>|>|<'), 28 ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<'),
41 ('STRING', r"'.*?'") 29 ('STRING', r"'.*?'")
42 ] 30 ]
43 tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) 31 tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
44 print(tok_re)
45 gettok = re.compile(tok_re).match 32 gettok = re.compile(tok_re).match
46 line = 1 33 line = 1
47 pos = line_start = 0 34 pos = line_start = 0
48 mo = gettok(s) 35 mo = gettok(s)
49 while mo is not None: 36 while mo is not None:
67 typ = 'NUMBER' 54 typ = 'NUMBER'
68 elif typ == 'REAL': 55 elif typ == 'REAL':
69 val = float(val) 56 val = float(val)
70 elif typ == 'STRING': 57 elif typ == 'STRING':
71 val = val[1:-1] 58 val = val[1:-1]
72 yield Token(typ, val, line, mo.start()-line_start) 59 col = mo.start() - line_start
60 loc = SourceLocation(line, col, 0) # TODO retrieve length?
61 yield Token(typ, val, loc)
73 pos = mo.end() 62 pos = mo.end()
74 mo = gettok(s, pos) 63 mo = gettok(s, pos)
75 if pos != len(s): 64 if pos != len(s):
76 col = pos - line_start 65 col = pos - line_start
77 raise CompilerException('Unexpected character {0}'.format(s[pos]), line, col) 66 loc = SourceLocation(line, col, 0)
78 yield Token('END', '', line, 0) 67 raise CompilerError('Unexpected character {0}'.format(s[pos]), loc)
79 68
80 class Lexer: 69 class Lexer:
81 def __init__(self, src): 70 def __init__(self, src):
82 self.tokens = tokenize(src) 71 self.tokens = tokenize(src)
83 self.curTok = self.tokens.__next__() 72 self.curTok = self.tokens.__next__()
87 return t 76 return t
88 @property 77 @property
89 def Peak(self): 78 def Peak(self):
90 return self.curTok 79 return self.curTok
91 80
92 class Parser: 81 class Assembler:
93 def __init__(self, lxr): 82 def __init__(self):
94 self.lxr = lxr 83 # Construct a parser given a grammar:
95 def parse(self): 84 g = pyyacc.Grammar(['ID', ',', '[', ']', ':'])
96 t = self.lxr.eat()
97 85
98 while True: 86 g.add_production('asmline', ['label', 'instruction', 'operands'])
99 ins = self.parseLine() 87 g.add_production('label', ['ID', ':'])
100 print(ins) 88 g.add_production('label', ['EPS'])
101 t = self.lxr.eat() 89 g.add_production('instruction', ['ID'])
102 def parseLine(self): 90 g.add_production('operands', ['operand'])
103 self.parseLabel() 91 g.add_production('operands', ['operands', ',', 'operand'])
104 if self.lxr.Peak == ';': 92 g.add_production('operand', ['expression'])
105 self.eatComments() 93 g.add_production('expression', ['ID'])
106 def parseLabel(self): 94 # TODO: expand grammar
107 i = self.lxr.eat() 95 g.start_symbol = 'asmline'
108 96
109 class Assembler: 97 self.p = g.genParser()
110 def assemble(self, asmsrc): 98
111 print('assembling', asmsrc) 99 def assemble(self, asmsrc):
112 lxr = Lexer(asmsrc) 100 lxr = Lexer(asmsrc)
113 prsr = Parser(lxr) 101 prsr = Parser(lxr)
114 instructions = prsr.parse() 102 instructions = prsr.parse()
115 return instructions 103 return instructions
116 104
105 def assembleLine(self, line):
106 """
107 Assemble a single source line.
108 Do not take newlines into account
109 """
110 tokens = tokenize(line)
111 self.p.parse(tokens)
112
113 def assembleAst(self, at):
114 """ Assemble a parsed asm line """
115 pass
116
117