New compiler is running now and giving some results

2020-07-06 17:21:13 +05:30 · 2020-07-06 17:21:13 +05:30 · 703c5e467d
parent 48d72383fd
commit 703c5e467d
4 changed files with 131 additions and 107 deletions
--- a/compiler/init.py
+++ b/compiler/init.py
@ -1,8 +1,6 @@
-from tokenizer import JackTokenizer
+from engine import Engine
 import sys
 if __name__ == '__main__':
-  j = JackTokenizer(sys.argv[1], True)
+  Engine(sys.argv[1]).compileClass()
  # c = CompilationEngine(sys.argv[1])
  # c.CompileClass()
--- a/compiler/engine.py
+++ b/compiler/engine.py
@ -1,6 +1,6 @@
 from tokenizer import JackTokenizer
 from keywords import *
-from grammar import CLASS
+from grammar import CLASS,Element
 """
 New Compilation Engine
@ -9,7 +9,7 @@ class Engine:
  def __init__(self, input_file):
    self.i = 0
    self.jt = JackTokenizer(input_file, False)
-    self.file = open(self.xml_file())
+    # self.file = open(self.xml_file(input_file))
  def xml_file(self, input_file):
    return input_file + ".xml"
@ -20,13 +20,14 @@ class Engine:
    return Atom(token.value)
  def compileClass(self):
-    self.compile(grammar.CLASS)
+    self.compile(CLASS)
  def advance(self):
    self.jt.advance()
  def ZeroOrMany(self, grammarList):
-    if compile(grammarList[0]):
+    # print("ZeroOrMany")
    if self.compile(grammarList[0]):
      # We now expect the whole of it
      for e in grammarList:
        self.compile(e)
@ -39,22 +40,31 @@ class Engine:
    print(line)
  def MatchDict(self, dictionary):
    # print("MatchDict")
    xml_rows_for_lookup_terms = []
    lookup_keys = ()
    # How much to lookahead
-    lookahead = len(list(dict.keys())[0])
+    lookahead = len(list(dictionary.keys())[0])
    for _ in range(lookahead):
      xml_rows_for_lookup_terms += [self.jt.xml_row()]
      lookup_keys = lookup_keys + (self.atom(),)
      self.advance()
    grammar = dict[lookup_keys]
    # We must open this before we compile the remainder
    if isinstance(grammar, Element):
      self.open(grammar)
      grammar = grammar.grammar
    # Now we put the first X terms from the conditional
    for line in xml_rows_for_lookup_terms:
      self.write(line)
-    for e in dict[lookup_keys]:
+    return self.compile(grammar)
      self.compile(e)
  def ZeroOrOne(self, grammarTuple):
    # print("ZeroOrOne")
    if self.compile(grammarTuple[0]):
      for e in grammarTuple:
        self.compile(e)
@ -68,32 +78,39 @@ class Engine:
    current = self.atom()
    # We use in here to accomodate for bitmasks
    if current in expected:
-      print(current)
+      print(self.jt.xml_row(), end="")
      self.advance()
      return True
    else:
-      raise Exception("Expected %s, got %s" % (expected, current))
+      return False
  def open(self, el):
    print("<%s>" % el.name)
  def close(self, el):
    print("</%s>" % el.name)
  def compile(self, thing):
    # TODO: OPEN TAGS
    if isinstance(thing, Element):
-      print("open %s" % thing.name)
+      self.open(thing)
-      grammar = thing.grammar
+      for e in thing.grammar:
        self.compile(e)
      self.close(thing)
    elif callable(thing):
      grammar = thing()
      self.compile(grammar)
    else:
      grammar = thing
-    grammarType = type(grammar)
+      grammarType = type(grammar)
-    elif grammarType == list:
+      if grammarType == list:
-      return self.ZeroOrMany(thing)
+        return self.ZeroOrMany(grammar)
-    elif grammarType == dict:
+      elif grammarType == dict:
-      return self.MatchDict(thing)
+        return self.MatchDict(grammar)
-    elif grammarType == tuple:
+      elif grammarType == tuple:
-      return self.ZeroOrOne(thing)
+        return self.ZeroOrOne(grammar)
-    elif grammarType == Atom:
+      elif grammarType == Atom:
-      return self.Atom(thing)
+        return self.Atom(grammar)
-    elif callable(thing):
+      else:
-      return self.compile(thing)
+        raise Exception("Should not have reached here")
    if isinstance(thing, Element):
      print("close %s" % thing.name)
--- a/compiler/grammar.py
+++ b/compiler/grammar.py
@ -3,7 +3,10 @@ from keywords import Atom
 """
 The grammar is defined by the following constructs:
-The top level object is called GRAMMAR, which is the grammar for a class. It is a list object.
+The top level object is called GRAMMAR, which is the grammar for a class.
 It is a instance of the Element class
 The element class contains a grammar element, which is always defined as a list
 for an element class.
 Inside this list, each element can be any of the following:
@ -18,37 +21,41 @@ a Python structure.
 """
 class Element:
  # Usually I avoid inverted boolean variable names, but this is much cleaner
  def __init__(self, name, grammar):
    assert(type(grammar)==list)
    self.name = name
    self.grammar = grammar
 TYPES = Element('type', Atom.INT | Atom.CHAR | Atom.BOOLEAN | Atom.IDENTIFIER)
 CLASSVARDEC = Element('classVarDec', [
  # static|field type (, name)* ;
  Atom.STATIC | Atom.FIELD,
-  TYPES,
+  Atom.INT | Atom.CHAR | Atom.BOOLEAN | Atom.IDENTIFIER,
  Atom.IDENTIFIER,
  [Atom.COMMA, Atom.IDENTIFIER],
  Atom.SEMICOLON
 ])
-VARDEC = Element('varDec', [Atom.VAR, TYPES, Atom.IDENTIFIER,
+VARDEC = Element('varDec', [Atom.VAR, Atom.INT | Atom.CHAR | Atom.BOOLEAN | Atom.IDENTIFIER, Atom.IDENTIFIER,
  [Atom.COMMA, Atom.IDENTIFIER],
  Atom.SEMICOLON
 ])
 UNARY_OP = Element('unaryOp', Atom.NOT | Atom.MINUS)
-CONSTANT = Element('KeywordConstant', Atom.TRUE | Atom.FALSE|Atom.NULL|Atom.THIS)
+# Since this is not a non-terminal, we can just write it as a constant
 OP = Atom.PLUS | Atom.MINUS | Atom.MUL | Atom.DIV | Atom.AND | Atom.OR | Atom.GT | Atom.LT | Atom.EQ
 UNARY_OP = Atom.NOT | Atom.MINUS
 CONSTANT = Atom.TRUE | Atom.FALSE|Atom.NULL|Atom.THIS
 """ Pseudo-element to help define subroutine declarations """
 RETURN_TYPES= Atom.INT | Atom.CHAR|Atom.BOOLEAN|Atom.IDENTIFIER|Atom.VOID
-TERM = Element('term', Atom.INTEGERCONSTANT | Atom.STRINGCONSTANT | Atom.TRUE | Atom.FALSE | Atom.IDENTIFIER)
+# TODO: This is missing a lot of stuff
-
+TERM = Element('term', [Atom.INTEGERCONSTANT | Atom.STRINGCONSTANT | Atom.TRUE | Atom.FALSE | Atom.IDENTIFIER])
 OP = Element('op', Atom.PLUS | Atom.MINUS | Atom.MUL | Atom.DIV | Atom.AND | Atom.OR | Atom.GT | Atom.LT | Atom.EQ)
 EXPRESSION = Element('expression', [TERM, [OP, TERM]])
-EXPRESSIONLIST = Element('expressionList', (EXPRESSION, [Atom.COMMA, EXPRESSION]))
+EXPRESSIONLIST = Element('expressionList', [(EXPRESSION, [Atom.COMMA, EXPRESSION])])
-SUBROUTINE_CALL = Element('subroutineCall', {
+DO_STATEMENT = Element('doStatement', [{
  (Atom.IDENTIFIER, Atom.PARAN_OPEN): [
    EXPRESSIONLIST,
    Atom.PARAN_CLOSE,
@ -59,54 +66,63 @@ SUBROUTINE_CALL = Element('subroutineCall', {
    EXPRESSIONLIST,
    Atom.PARAN_CLOSE
  ]
-})
+}])
-STATEMENT = Element('statement', {
+LET_STATEMENT = Element('whileStatement', [
-  (Atom.LET): [Atom.IDENTIFIER, (Atom.SQUARE_OPEN, EXPRESSION, Atom.SQUARE_CLOSE)],
+  Atom.IDENTIFIER, (Atom.SQUARE_OPEN, EXPRESSION, Atom.SQUARE_CLOSE)])
  (Atom.IF): [
    Atom.PARAN_OPEN,
    EXPRESSION,
    Atom.PARAN_CLOSE,
    Atom.BRACE_OPEN,
    lambda: STATEMENTS,
    Atom.BRACE_CLOSE,
    # This is the tricky one
    ( Atom.ELSE, Atom.BRACE_OPEN, lambda:STATEMENT, Atom.BRACE_CLOSE)
  ],
  (Atom.WHILE): [
    Atom.PARAN_OPEN,
    EXPRESSION,
    Atom.PARAN_CLOSE,
    Atom.BRACE_OPEN,
    lambda: STATEMENTS,
    Atom.BRACE_CLOSE,
  ],
  (Atom.DO): SUBROUTINE_CALL,
  (Atom.RETURN): [(EXPRESSION), Atom.SEMICOLON]
 })
-STATEMENTS = Element('statements', [STATEMENT])
+IF_STATEMENT = Element('ifStatement', [
  Atom.PARAN_OPEN,
  EXPRESSION,
  Atom.PARAN_CLOSE,
  Atom.BRACE_OPEN,
  lambda: STATEMENTS,
  Atom.BRACE_CLOSE,
  # This is the tricky one
  ( Atom.ELSE, Atom.BRACE_OPEN, lambda:STATEMENT, Atom.BRACE_CLOSE)
 ])
 WHILE_STATEMENT = Element('whileStatement', [
  Atom.PARAN_OPEN,
  EXPRESSION,
  Atom.PARAN_CLOSE,
  Atom.BRACE_OPEN,
  lambda: STATEMENTS,
  Atom.BRACE_CLOSE,
 ])
 RETURN_STATEMENT = Element('returnStatement', [(EXPRESSION), Atom.SEMICOLON])
 # Just a constant, since this isn't a non-terminal
 STATEMENT = {
  (Atom.LET): LET_STATEMENT,
  (Atom.IF): IF_STATEMENT,
  (Atom.WHILE): WHILE_STATEMENT,
  (Atom.DO): DO_STATEMENT,
  (Atom.RETURN): RETURN_STATEMENT
 }
 STATEMENTS = Element('statements', [[STATEMENT]])
 SUBROUTINE_BODY = Element('subroutineBody', [
  # One or more variable declarations
  # `var type varName (, varName)* ;`
-    [VARDEC],
+  Atom.BRACE_OPEN,
-    STATEMENTS
+  [VARDEC],
  STATEMENTS,
  Atom.BRACE_CLOSE
 ])
 """ Pseudo-element to help define subroutine declarations """
 RETURN_TYPES= Atom.INT | Atom.CHAR|Atom.BOOLEAN|Atom.IDENTIFIER|Atom.VOID
 # Parameter List =
 #  (
 #    (type varName) (, type varName)*
 #  )?
 # we use tuples for zero OR one of a sequence
-PARAMETER_LIST = Element('parameterList', (
+PARAMETER_LIST = Element('parameterList', [(
-  TYPES,
+  Atom.INT | Atom.CHAR | Atom.BOOLEAN | Atom.IDENTIFIER,
  Atom.IDENTIFIER,
-  [Atom.COMMA, TYPES, Atom.IDENTIFIER]
+  [Atom.COMMA, Atom.INT | Atom.CHAR|Atom.BOOLEAN|Atom.IDENTIFIER, Atom.IDENTIFIER]
-))
+)])
 SUBROUTINEDEC = Element('subroutineDec', [
  # (constructor | function | method) (void | type) subRoutineName '(' parameterList ')'
@ -117,10 +133,7 @@ SUBROUTINEDEC = Element('subroutineDec', [
  Atom.PARAN_OPEN,
  PARAMETER_LIST,
  Atom.PARAN_CLOSE,
  # Subroutine Body
  Atom.BRACE_OPEN,
  SUBROUTINE_BODY,
  Atom.BRACE_CLOSE,
 ])
 CLASS = Element('class', [
--- a/compiler/tokenizer.py
+++ b/compiler/tokenizer.py
@ -1,33 +1,29 @@
 import re
-from keywords import Keyword
+from keywords import *
 from html import escape
 from enum import Enum
 # Superclass in some sense
 class Token(Enum):
  KEYWORD = 1
  SYMBOL = 2
 class JackTokenizer:
  SYMBOL_MAP = {
-    '{': Keyword.BRACE_OPEN ,
+    '{': Symbol.BRACE_OPEN ,
-    '}': Keyword.BRACE_CLOSE ,
+    '}': Symbol.BRACE_CLOSE ,
-    '(': Keyword.PARAN_OPEN ,
+    '(': Symbol.PARAN_OPEN ,
-    ')': Keyword.PARAN_CLOSE ,
+    ')': Symbol.PARAN_CLOSE ,
-    '[': Keyword.SQUARE_OPEN ,
+    '[': Symbol.SQUARE_OPEN ,
-    ']': Keyword.SQUARE_CLOSE ,
+    ']': Symbol.SQUARE_CLOSE ,
-    '.': Keyword.DOT ,
+    '.': Symbol.DOT ,
-    ';': Keyword.SEMICOLON ,
+    ';': Symbol.SEMICOLON ,
-    '+': Keyword.PLUS ,
+    '+': Symbol.PLUS ,
-    '-': Keyword.MINUS ,
+    '-': Symbol.MINUS ,
-    '*': Keyword.MUL ,
+    '*': Symbol.MUL ,
-    '/': Keyword.DIV ,
+    '/': Symbol.DIV ,
-    '&': Keyword.AND ,
+    '&': Symbol.AND ,
-    '|': Keyword.OR ,
+    '|': Symbol.OR ,
-    '<': Keyword.LT ,
+    '<': Symbol.LT ,
-    '>': Keyword.GT ,
+    '>': Symbol.GT ,
-    '=': Keyword.EQ ,
+    '=': Symbol.EQ ,
-    '~': Keyword.NOT ,
+    '~': Symbol.NOT ,
-    ',': Keyword.COMMA,
+    ',': Symbol.COMMA,
  }
  KEYWORD_MAP = {
@ -61,16 +57,16 @@ class JackTokenizer:
    elif re.compile("(\(|\)|\[|\]|,|\+|-|;|<|>|=|~|&|{|}|\*|\/|\||\.)").match(t):
      return JackTokenizer.SYMBOL_MAP[t]
    elif re.compile("\d+").match(t):
-      return Keyword.INTEGERCONSTANT
+      return Token.INTEGERCONSTANT
    elif re.compile("\".*\"").match(t):
-      return Keyword.STRINGCONSTANT
+      return Token.STRINGCONSTANT
    else:
      # TODO: Put an assert to ensure valid identifier
-      return Keyword.IDENTIFIER
+      return Token.IDENTIFIER
    pass
  def printable_token(self):
-    if self.tokenType() == Keyword.STRINGCONSTANT:
+    if self.tokenType() == Token.STRINGCONSTANT:
      return self.current_token()[1:-1]
    else:
      return escape(self.current_token(), True)
@ -95,7 +91,7 @@ class JackTokenizer:
  """ Returns the integer value of the current token """
  def intVal(self):
-    self.assert_type(Keyword.INTEGERCONSTANT)
+    self.assert_type(Token.INTEGERCONSTANT)
    return int(self.token)
  """ Returns a list of tokens for that line """