view python/libasm.py @ 188:1113da536872

Used markdown
author Windel Bouwman
date Sat, 25 May 2013 14:46:12 +0200
parents 5e1dd04cb61c
children 6b2bec5653f1
line wrap: on
line source

import collections, re

# Different instruction sets:
class InstructionSet:
   pass

class X86(InstructionSet):
   pass

# Generic assembler:

class SourceLocation:
   def __init__(self, x):
      self.pos = x

class SourceRange:
   def __init__(self, p1, p2):
      self.p1 = p1
      self.p2 = p2

# Token is used in the lexical analyzer:
Token = collections.namedtuple('Token', 'typ val row col')

keywords = ['global', 'db']

def tokenize(s):
     """
       Tokenizer, generates an iterator that
       returns tokens!

       This GREAT example was taken from python re doc page!
     """
     tok_spec = [
       ('REAL', r'\d+\.\d+'),
       ('HEXNUMBER', r'0x[\da-fA-F]+'),
       ('NUMBER', r'\d+'),
       ('ID', r'[A-Za-z][A-Za-z\d_]*'),
       ('NEWLINE', r'\n'),
       ('SKIP', r'[ \t]'),
       ('LEESTEKEN', r':=|[\.,=:;\-+*\[\]/\(\)]|>=|<=|<>|>|<'),
       ('STRING', r"'.*?'")
     ]
     tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
     print(tok_re)
     gettok = re.compile(tok_re).match
     line = 1
     pos = line_start = 0
     mo = gettok(s)
     while mo is not None:
       typ = mo.lastgroup
       val = mo.group(typ)
       if typ == 'NEWLINE':
         line_start = pos
         line += 1
       elif typ == 'COMMENTS':
         pass
       elif typ != 'SKIP':
         if typ == 'ID':
           if val in keywords:
             typ = val
         elif typ == 'LEESTEKEN':
           typ = val
         elif typ == 'NUMBER':
           val = int(val)
         elif typ == 'HEXNUMBER':
           val = int(val[2:], 16)
           typ = 'NUMBER'
         elif typ == 'REAL':
           val = float(val)
         elif typ == 'STRING':
           val = val[1:-1]
         yield Token(typ, val, line, mo.start()-line_start)
       pos = mo.end()
       mo = gettok(s, pos)
     if pos != len(s):
       col = pos - line_start
       raise CompilerException('Unexpected character {0}'.format(s[pos]), line, col)
     yield Token('END', '', line, 0)

class Lexer:
   def __init__(self, src):
      self.tokens = tokenize(src)
      self.curTok = self.tokens.__next__()
   def eat(self):
      t = self.curTok
      self.curTok = self.tokens.__next__()
      return t
   @property
   def Peak(self):
      return self.curTok

class Parser:
   def __init__(self, lxr):
      self.lxr = lxr
   def parse(self):
      t = self.lxr.eat()

      while True:
         ins = self.parseLine()
         print(ins)
         t = self.lxr.eat()
   def parseLine(self):
      self.parseLabel()
      if self.lxr.Peak == ';':
         self.eatComments()
   def parseLabel(self):
      i = self.lxr.eat()

class Assembler:
   def assemble(self, asmsrc):
      print('assembling', asmsrc)
      lxr = Lexer(asmsrc)
      prsr = Parser(lxr)
      instructions = prsr.parse()
      return instructions