Mercurial > lcfOS
comparison python/libasm.py @ 159:5e1dd04cb61c
Added attempt to assembler
author | Windel Bouwman |
---|---|
date | Fri, 08 Mar 2013 17:16:22 +0100 |
parents | |
children | 6b2bec5653f1 |
comparison
equal
deleted
inserted
replaced
158:9683a4cd848f | 159:5e1dd04cb61c |
---|---|
1 import collections, re | |
2 | |
3 # Different instruction sets: | |
4 class InstructionSet: | |
5 pass | |
6 | |
7 class X86(InstructionSet): | |
8 pass | |
9 | |
10 # Generic assembler: | |
11 | |
12 class SourceLocation: | |
13 def __init__(self, x): | |
14 self.pos = x | |
15 | |
16 class SourceRange: | |
17 def __init__(self, p1, p2): | |
18 self.p1 = p1 | |
19 self.p2 = p2 | |
20 | |
21 # Token is used in the lexical analyzer: | |
22 Token = collections.namedtuple('Token', 'typ val row col') | |
23 | |
24 keywords = ['global', 'db'] | |
25 | |
26 def tokenize(s): | |
27 """ | |
28 Tokenizer, generates an iterator that | |
29 returns tokens! | |
30 | |
31 This GREAT example was taken from python re doc page! | |
32 """ | |
33 tok_spec = [ | |
34 ('REAL', r'\d+\.\d+'), | |
35 ('HEXNUMBER', r'0x[\da-fA-F]+'), | |
36 ('NUMBER', r'\d+'), | |
37 ('ID', r'[A-Za-z][A-Za-z\d_]*'), | |
38 ('NEWLINE', r'\n'), | |
39 ('SKIP', r'[ \t]'), | |
40 ('LEESTEKEN', r':=|[\.,=:;\-+*\[\]/\(\)]|>=|<=|<>|>|<'), | |
41 ('STRING', r"'.*?'") | |
42 ] | |
43 tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec) | |
44 print(tok_re) | |
45 gettok = re.compile(tok_re).match | |
46 line = 1 | |
47 pos = line_start = 0 | |
48 mo = gettok(s) | |
49 while mo is not None: | |
50 typ = mo.lastgroup | |
51 val = mo.group(typ) | |
52 if typ == 'NEWLINE': | |
53 line_start = pos | |
54 line += 1 | |
55 elif typ == 'COMMENTS': | |
56 pass | |
57 elif typ != 'SKIP': | |
58 if typ == 'ID': | |
59 if val in keywords: | |
60 typ = val | |
61 elif typ == 'LEESTEKEN': | |
62 typ = val | |
63 elif typ == 'NUMBER': | |
64 val = int(val) | |
65 elif typ == 'HEXNUMBER': | |
66 val = int(val[2:], 16) | |
67 typ = 'NUMBER' | |
68 elif typ == 'REAL': | |
69 val = float(val) | |
70 elif typ == 'STRING': | |
71 val = val[1:-1] | |
72 yield Token(typ, val, line, mo.start()-line_start) | |
73 pos = mo.end() | |
74 mo = gettok(s, pos) | |
75 if pos != len(s): | |
76 col = pos - line_start | |
77 raise CompilerException('Unexpected character {0}'.format(s[pos]), line, col) | |
78 yield Token('END', '', line, 0) | |
79 | |
80 class Lexer: | |
81 def __init__(self, src): | |
82 self.tokens = tokenize(src) | |
83 self.curTok = self.tokens.__next__() | |
84 def eat(self): | |
85 t = self.curTok | |
86 self.curTok = self.tokens.__next__() | |
87 return t | |
88 @property | |
89 def Peak(self): | |
90 return self.curTok | |
91 | |
92 class Parser: | |
93 def __init__(self, lxr): | |
94 self.lxr = lxr | |
95 def parse(self): | |
96 t = self.lxr.eat() | |
97 | |
98 while True: | |
99 ins = self.parseLine() | |
100 print(ins) | |
101 t = self.lxr.eat() | |
102 def parseLine(self): | |
103 self.parseLabel() | |
104 if self.lxr.Peak == ';': | |
105 self.eatComments() | |
106 def parseLabel(self): | |
107 i = self.lxr.eat() | |
108 | |
109 class Assembler: | |
110 def assemble(self, asmsrc): | |
111 print('assembling', asmsrc) | |
112 lxr = Lexer(asmsrc) | |
113 prsr = Parser(lxr) | |
114 instructions = prsr.parse() | |
115 return instructions | |
116 |