Mercurial > lcfOS
comparison python/libasm.py @ 191:6b2bec5653f1
Added assembler testset
author | Windel Bouwman |
---|---|
date | Sun, 26 May 2013 15:28:07 +0200 |
parents | 5e1dd04cb61c |
children | f091e7d70996 |
comparison
equal
deleted
inserted
replaced
190:65dda7e7e8bd | 191:6b2bec5653f1 |
---|---|
1 import collections, re | 1 import re |
2 import pyyacc | |
3 from ppci import Token, CompilerError, SourceLocation | |
2 | 4 |
3 # Different instruction sets: | 5 # Different instruction sets: |
4 class InstructionSet: | 6 class InstructionSet: |
5 pass | 7 pass |
6 | 8 |
7 class X86(InstructionSet): | 9 class X86(InstructionSet): |
8 pass | 10 pass |
9 | 11 |
10 # Generic assembler: | 12 # Generic assembler: |
11 | |
12 class SourceLocation: | |
13 def __init__(self, x): | |
14 self.pos = x | |
15 | |
16 class SourceRange: | |
17 def __init__(self, p1, p2): | |
18 self.p1 = p1 | |
19 self.p2 = p2 | |
20 | |
21 # Token is used in the lexical analyzer: | |
22 Token = collections.namedtuple('Token', 'typ val row col') | |
23 | |
24 keywords = ['global', 'db'] | 13 keywords = ['global', 'db'] |
25 | 14 |
26 def tokenize(s): | 15 def tokenize(s): |
27 """ | 16 """ |
28 Tokenizer, generates an iterator that | 17 Tokenizer, generates an iterator that |
33 tok_spec = [ | 22 tok_spec = [ |
34 ('REAL', r'\d+\.\d+'), | 23 ('REAL', r'\d+\.\d+'), |
35 ('HEXNUMBER', r'0x[\da-fA-F]+'), | 24 ('HEXNUMBER', r'0x[\da-fA-F]+'), |
36 ('NUMBER', r'\d+'), | 25 ('NUMBER', r'\d+'), |
37 ('ID', r'[A-Za-z][A-Za-z\d_]*'), | 26 ('ID', r'[A-Za-z][A-Za-z\d_]*'), |
38 ('NEWLINE', r'\n'), | |
39 ('SKIP', r'[ \t]'), | 27 ('SKIP', r'[ \t]'), |
40 ('LEESTEKEN', r':=|[\.,=:;\-+*\[\]/\(\)]|>=|<=|<>|>|<'), | 28 ('LEESTEKEN', r':=|[\.,=:\-+*\[\]/\(\)]|>=|<=|<>|>|<'), |
41 ('STRING', r"'.*?'") | 29 ('STRING', r"'.*?'") |
42 ] | 30 ] |
43 tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) | 31 tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) |
44 print(tok_re) | |
45 gettok = re.compile(tok_re).match | 32 gettok = re.compile(tok_re).match |
46 line = 1 | 33 line = 1 |
47 pos = line_start = 0 | 34 pos = line_start = 0 |
48 mo = gettok(s) | 35 mo = gettok(s) |
49 while mo is not None: | 36 while mo is not None: |
67 typ = 'NUMBER' | 54 typ = 'NUMBER' |
68 elif typ == 'REAL': | 55 elif typ == 'REAL': |
69 val = float(val) | 56 val = float(val) |
70 elif typ == 'STRING': | 57 elif typ == 'STRING': |
71 val = val[1:-1] | 58 val = val[1:-1] |
72 yield Token(typ, val, line, mo.start()-line_start) | 59 col = mo.start() - line_start |
60 loc = SourceLocation(line, col, 0) # TODO retrieve length? | |
61 yield Token(typ, val, loc) | |
73 pos = mo.end() | 62 pos = mo.end() |
74 mo = gettok(s, pos) | 63 mo = gettok(s, pos) |
75 if pos != len(s): | 64 if pos != len(s): |
76 col = pos - line_start | 65 col = pos - line_start |
77 raise CompilerException('Unexpected character {0}'.format(s[pos]), line, col) | 66 loc = SourceLocation(line, col, 0) |
78 yield Token('END', '', line, 0) | 67 raise CompilerError('Unexpected character {0}'.format(s[pos]), loc) |
79 | 68 |
80 class Lexer: | 69 class Lexer: |
81 def __init__(self, src): | 70 def __init__(self, src): |
82 self.tokens = tokenize(src) | 71 self.tokens = tokenize(src) |
83 self.curTok = self.tokens.__next__() | 72 self.curTok = self.tokens.__next__() |
87 return t | 76 return t |
88 @property | 77 @property |
89 def Peak(self): | 78 def Peak(self): |
90 return self.curTok | 79 return self.curTok |
91 | 80 |
92 class Parser: | 81 class Assembler: |
93 def __init__(self, lxr): | 82 def __init__(self): |
94 self.lxr = lxr | 83 # Construct a parser given a grammar: |
95 def parse(self): | 84 g = pyyacc.Grammar(['ID', ',', '[', ']', ':']) |
96 t = self.lxr.eat() | |
97 | 85 |
98 while True: | 86 g.add_production('asmline', ['label', 'instruction', 'operands']) |
99 ins = self.parseLine() | 87 g.add_production('label', ['ID', ':']) |
100 print(ins) | 88 g.add_production('label', ['EPS']) |
101 t = self.lxr.eat() | 89 g.add_production('instruction', ['ID']) |
102 def parseLine(self): | 90 g.add_production('operands', ['operand']) |
103 self.parseLabel() | 91 g.add_production('operands', ['operands', ',', 'operand']) |
104 if self.lxr.Peak == ';': | 92 g.add_production('operand', ['expression']) |
105 self.eatComments() | 93 g.add_production('expression', ['ID']) |
106 def parseLabel(self): | 94 # TODO: expand grammar |
107 i = self.lxr.eat() | 95 g.start_symbol = 'asmline' |
108 | 96 |
109 class Assembler: | 97 self.p = g.genParser() |
110 def assemble(self, asmsrc): | 98 |
111 print('assembling', asmsrc) | 99 def assemble(self, asmsrc): |
112 lxr = Lexer(asmsrc) | 100 lxr = Lexer(asmsrc) |
113 prsr = Parser(lxr) | 101 prsr = Parser(lxr) |
114 instructions = prsr.parse() | 102 instructions = prsr.parse() |
115 return instructions | 103 return instructions |
116 | 104 |
105 def assembleLine(self, line): | |
106 """ | |
107 Assemble a single source line. | |
108 Do not take newlines into account | |
109 """ | |
110 tokens = tokenize(line) | |
111 self.p.parse(tokens) | |
112 | |
113 def assembleAst(self, at): | |
114 """ Assemble a parsed asm line """ | |
115 pass | |
116 | |
117 |