From 703c5e467dd540256976cbb3a2cf20b4748714a2 Mon Sep 17 00:00:00 2001
From: Nemo <me@captnemo.in>
Date: Mon, 6 Jul 2020 17:21:13 +0530
Subject: [PATCH] New compiler is running now and giving some results

---
 compiler/__init__.py  |   6 +--
 compiler/engine.py    |  67 +++++++++++++++----------
 compiler/grammar.py   | 111 +++++++++++++++++++++++-------------------
 compiler/tokenizer.py |  54 ++++++++++----------
 4 files changed, 131 insertions(+), 107 deletions(-)

diff --git a/compiler/__init__.py b/compiler/__init__.py
index 89f550f..30539c6 100644
--- a/compiler/__init__.py
+++ b/compiler/__init__.py
@@ -1,8 +1,6 @@
-from tokenizer import JackTokenizer
+from engine import Engine
 import sys
 
 if __name__ == '__main__':
-  j = JackTokenizer(sys.argv[1], True)
-  # c = CompilationEngine(sys.argv[1])
-  # c.CompileClass()
+  Engine(sys.argv[1]).compileClass()
 
diff --git a/compiler/engine.py b/compiler/engine.py
index 72b79fd..c89a5ca 100644
--- a/compiler/engine.py
+++ b/compiler/engine.py
@@ -1,6 +1,6 @@
 from tokenizer import JackTokenizer
 from keywords import *
-from grammar import CLASS
+from grammar import CLASS,Element
 
 """
 New Compilation Engine
@@ -9,7 +9,7 @@ class Engine:
   def __init__(self, input_file):
     self.i = 0
     self.jt = JackTokenizer(input_file, False)
-    self.file = open(self.xml_file())
+    # self.file = open(self.xml_file(input_file))
 
   def xml_file(self, input_file):
     return input_file + ".xml"
@@ -20,13 +20,14 @@ class Engine:
     return Atom(token.value)
 
   def compileClass(self):
-    self.compile(grammar.CLASS)
+    self.compile(CLASS)
 
   def advance(self):
     self.jt.advance()
 
   def ZeroOrMany(self, grammarList):
-    if compile(grammarList[0]):
+    # print("ZeroOrMany")
+    if self.compile(grammarList[0]):
       # We now expect the whole of it
       for e in grammarList:
         self.compile(e)
@@ -39,22 +40,31 @@ class Engine:
     print(line)
 
   def MatchDict(self, dictionary):
+    # print("MatchDict")
     xml_rows_for_lookup_terms = []
     lookup_keys = ()
     # How much to lookahead
-    lookahead = len(list(dict.keys())[0])
+    lookahead = len(list(dictionary.keys())[0])
     for _ in range(lookahead):
       xml_rows_for_lookup_terms += [self.jt.xml_row()]
       lookup_keys = lookup_keys + (self.atom(),)
       self.advance()
 
+    grammar = dict[lookup_keys]
+
+    # We must open this before we compile the remainder
+    if isinstance(grammar, Element):
+      self.open(grammar)
+      grammar = grammar.grammar
+
+    # Now we put the first X terms from the conditional
     for line in xml_rows_for_lookup_terms:
       self.write(line)
 
-    for e in dict[lookup_keys]:
-      self.compile(e)
+    return self.compile(grammar)
 
   def ZeroOrOne(self, grammarTuple):
+    # print("ZeroOrOne")
     if self.compile(grammarTuple[0]):
       for e in grammarTuple:
         self.compile(e)
@@ -68,32 +78,39 @@ class Engine:
     current = self.atom()
     # We use in here to accomodate for bitmasks
     if current in expected:
-      print(current)
+      print(self.jt.xml_row(), end="")
       self.advance()
+      return True
     else:
-      raise Exception("Expected %s, got %s" % (expected, current))
+      return False
+
+  def open(self, el):
+    print("<%s>" % el.name)
+
+  def close(self, el):
+    print("</%s>" % el.name)
 
   def compile(self, thing):
     # TODO: OPEN TAGS
     if isinstance(thing, Element):
-      print("open %s" % thing.name)
-      grammar = thing.grammar
+      self.open(thing)
+      for e in thing.grammar:
+        self.compile(e)
+      self.close(thing)
     elif callable(thing):
       grammar = thing()
+      self.compile(grammar)
     else:
       grammar = thing
-    grammarType = type(grammar)
+      grammarType = type(grammar)
 
-    elif grammarType == list:
-      return self.ZeroOrMany(thing)
-    elif grammarType == dict:
-      return self.MatchDict(thing)
-    elif grammarType == tuple:
-      return self.ZeroOrOne(thing)
-    elif grammarType == Atom:
-      return self.Atom(thing)
-    elif callable(thing):
-      return self.compile(thing)
-
-    if isinstance(thing, Element):
-      print("close %s" % thing.name)
+      if grammarType == list:
+        return self.ZeroOrMany(grammar)
+      elif grammarType == dict:
+        return self.MatchDict(grammar)
+      elif grammarType == tuple:
+        return self.ZeroOrOne(grammar)
+      elif grammarType == Atom:
+        return self.Atom(grammar)
+      else:
+        raise Exception("Should not have reached here")
diff --git a/compiler/grammar.py b/compiler/grammar.py
index 9211119..ab3a6a0 100644
--- a/compiler/grammar.py
+++ b/compiler/grammar.py
@@ -3,7 +3,10 @@ from keywords import Atom
 """
 The grammar is defined by the following constructs:
 
-The top level object is called GRAMMAR, which is the grammar for a class. It is a list object.
+The top level object is called GRAMMAR, which is the grammar for a class.
+It is a instance of the Element class
+The element class contains a grammar element, which is always defined as a list
+for an element class.
 
 Inside this list, each element can be any of the following:
 
@@ -18,37 +21,41 @@ a Python structure.
 
 """
 class Element:
+  # Usually I avoid inverted boolean variable names, but this is much cleaner
   def __init__(self, name, grammar):
+    assert(type(grammar)==list)
     self.name = name
     self.grammar = grammar
 
-TYPES = Element('type', Atom.INT | Atom.CHAR | Atom.BOOLEAN | Atom.IDENTIFIER)
-
 CLASSVARDEC = Element('classVarDec', [
   # static|field type (, name)* ;
   Atom.STATIC | Atom.FIELD,
-  TYPES,
+  Atom.INT | Atom.CHAR | Atom.BOOLEAN | Atom.IDENTIFIER,
+  Atom.IDENTIFIER,
   [Atom.COMMA, Atom.IDENTIFIER],
   Atom.SEMICOLON
 ])
 
-VARDEC = Element('varDec', [Atom.VAR, TYPES, Atom.IDENTIFIER,
+VARDEC = Element('varDec', [Atom.VAR, Atom.INT | Atom.CHAR | Atom.BOOLEAN | Atom.IDENTIFIER, Atom.IDENTIFIER,
   [Atom.COMMA, Atom.IDENTIFIER],
   Atom.SEMICOLON
 ])
-UNARY_OP = Element('unaryOp', Atom.NOT | Atom.MINUS)
 
-CONSTANT = Element('KeywordConstant', Atom.TRUE | Atom.FALSE|Atom.NULL|Atom.THIS)
+# Since this is not a non-terminal, we can just write it as a constant
+OP = Atom.PLUS | Atom.MINUS | Atom.MUL | Atom.DIV | Atom.AND | Atom.OR | Atom.GT | Atom.LT | Atom.EQ
+UNARY_OP = Atom.NOT | Atom.MINUS
+CONSTANT = Atom.TRUE | Atom.FALSE|Atom.NULL|Atom.THIS
+""" Pseudo-element to help define subroutine declarations """
+RETURN_TYPES= Atom.INT | Atom.CHAR|Atom.BOOLEAN|Atom.IDENTIFIER|Atom.VOID
 
-TERM = Element('term', Atom.INTEGERCONSTANT | Atom.STRINGCONSTANT | Atom.TRUE | Atom.FALSE | Atom.IDENTIFIER)
-
-OP = Element('op', Atom.PLUS | Atom.MINUS | Atom.MUL | Atom.DIV | Atom.AND | Atom.OR | Atom.GT | Atom.LT | Atom.EQ)
+# TODO: This is missing a lot of stuff
+TERM = Element('term', [Atom.INTEGERCONSTANT | Atom.STRINGCONSTANT | Atom.TRUE | Atom.FALSE | Atom.IDENTIFIER])
 
 EXPRESSION = Element('expression', [TERM, [OP, TERM]])
 
-EXPRESSIONLIST = Element('expressionList', (EXPRESSION, [Atom.COMMA, EXPRESSION]))
+EXPRESSIONLIST = Element('expressionList', [(EXPRESSION, [Atom.COMMA, EXPRESSION])])
 
-SUBROUTINE_CALL = Element('subroutineCall', {
+DO_STATEMENT = Element('doStatement', [{
   (Atom.IDENTIFIER, Atom.PARAN_OPEN): [
     EXPRESSIONLIST,
     Atom.PARAN_CLOSE,
@@ -59,54 +66,63 @@ SUBROUTINE_CALL = Element('subroutineCall', {
     EXPRESSIONLIST,
     Atom.PARAN_CLOSE
   ]
-})
+}])
 
-STATEMENT = Element('statement', {
-  (Atom.LET): [Atom.IDENTIFIER, (Atom.SQUARE_OPEN, EXPRESSION, Atom.SQUARE_CLOSE)],
-  (Atom.IF): [
-    Atom.PARAN_OPEN,
-    EXPRESSION,
-    Atom.PARAN_CLOSE,
-    Atom.BRACE_OPEN,
-    lambda: STATEMENTS,
-    Atom.BRACE_CLOSE,
-    # This is the tricky one
-    ( Atom.ELSE, Atom.BRACE_OPEN, lambda:STATEMENT, Atom.BRACE_CLOSE)
-  ],
-  (Atom.WHILE): [
-    Atom.PARAN_OPEN,
-    EXPRESSION,
-    Atom.PARAN_CLOSE,
-    Atom.BRACE_OPEN,
-    lambda: STATEMENTS,
-    Atom.BRACE_CLOSE,
-  ],
-  (Atom.DO): SUBROUTINE_CALL,
-  (Atom.RETURN): [(EXPRESSION), Atom.SEMICOLON]
-})
+LET_STATEMENT = Element('whileStatement', [
+  Atom.IDENTIFIER, (Atom.SQUARE_OPEN, EXPRESSION, Atom.SQUARE_CLOSE)])
 
-STATEMENTS = Element('statements', [STATEMENT])
+IF_STATEMENT = Element('ifStatement', [
+  Atom.PARAN_OPEN,
+  EXPRESSION,
+  Atom.PARAN_CLOSE,
+  Atom.BRACE_OPEN,
+  lambda: STATEMENTS,
+  Atom.BRACE_CLOSE,
+  # This is the tricky one
+  ( Atom.ELSE, Atom.BRACE_OPEN, lambda:STATEMENT, Atom.BRACE_CLOSE)
+])
+
+WHILE_STATEMENT = Element('whileStatement', [
+  Atom.PARAN_OPEN,
+  EXPRESSION,
+  Atom.PARAN_CLOSE,
+  Atom.BRACE_OPEN,
+  lambda: STATEMENTS,
+  Atom.BRACE_CLOSE,
+])
+
+RETURN_STATEMENT = Element('returnStatement', [(EXPRESSION), Atom.SEMICOLON])
+
+# Just a constant, since this isn't a non-terminal
+STATEMENT = {
+  (Atom.LET): LET_STATEMENT,
+  (Atom.IF): IF_STATEMENT,
+  (Atom.WHILE): WHILE_STATEMENT,
+  (Atom.DO): DO_STATEMENT,
+  (Atom.RETURN): RETURN_STATEMENT
+}
+
+STATEMENTS = Element('statements', [[STATEMENT]])
 
 SUBROUTINE_BODY = Element('subroutineBody', [
   # One or more variable declarations
   # `var type varName (, varName)* ;`
-    [VARDEC],
-    STATEMENTS
+  Atom.BRACE_OPEN,
+  [VARDEC],
+  STATEMENTS,
+  Atom.BRACE_CLOSE
 ])
 
-""" Pseudo-element to help define subroutine declarations """
-RETURN_TYPES= Atom.INT | Atom.CHAR|Atom.BOOLEAN|Atom.IDENTIFIER|Atom.VOID
-
 # Parameter List =
 #  (
 #    (type varName) (, type varName)*
 #  )?
 # we use tuples for zero OR one of a sequence
-PARAMETER_LIST = Element('parameterList', (
-  TYPES,
+PARAMETER_LIST = Element('parameterList', [(
+  Atom.INT | Atom.CHAR | Atom.BOOLEAN | Atom.IDENTIFIER,
   Atom.IDENTIFIER,
-  [Atom.COMMA, TYPES, Atom.IDENTIFIER]
-))
+  [Atom.COMMA, Atom.INT | Atom.CHAR|Atom.BOOLEAN|Atom.IDENTIFIER, Atom.IDENTIFIER]
+)])
 
 SUBROUTINEDEC = Element('subroutineDec', [
   # (constructor | function | method) (void | type) subRoutineName '(' parameterList ')'
@@ -117,10 +133,7 @@ SUBROUTINEDEC = Element('subroutineDec', [
   Atom.PARAN_OPEN,
   PARAMETER_LIST,
   Atom.PARAN_CLOSE,
-  # Subroutine Body
-  Atom.BRACE_OPEN,
   SUBROUTINE_BODY,
-  Atom.BRACE_CLOSE,
 ])
 
 CLASS = Element('class', [
diff --git a/compiler/tokenizer.py b/compiler/tokenizer.py
index c5b8641..080c982 100644
--- a/compiler/tokenizer.py
+++ b/compiler/tokenizer.py
@@ -1,33 +1,29 @@
 import re
-from keywords import Keyword
+from keywords import *
 from html import escape
 from enum import Enum
 # Superclass in some sense
-class Token(Enum):
-  KEYWORD = 1
-  SYMBOL = 2
-
 class JackTokenizer:
   SYMBOL_MAP = {
-    '{': Keyword.BRACE_OPEN ,
-    '}': Keyword.BRACE_CLOSE ,
-    '(': Keyword.PARAN_OPEN ,
-    ')': Keyword.PARAN_CLOSE ,
-    '[': Keyword.SQUARE_OPEN ,
-    ']': Keyword.SQUARE_CLOSE ,
-    '.': Keyword.DOT ,
-    ';': Keyword.SEMICOLON ,
-    '+': Keyword.PLUS ,
-    '-': Keyword.MINUS ,
-    '*': Keyword.MUL ,
-    '/': Keyword.DIV ,
-    '&': Keyword.AND ,
-    '|': Keyword.OR ,
-    '<': Keyword.LT ,
-    '>': Keyword.GT ,
-    '=': Keyword.EQ ,
-    '~': Keyword.NOT ,
-    ',': Keyword.COMMA,
+    '{': Symbol.BRACE_OPEN ,
+    '}': Symbol.BRACE_CLOSE ,
+    '(': Symbol.PARAN_OPEN ,
+    ')': Symbol.PARAN_CLOSE ,
+    '[': Symbol.SQUARE_OPEN ,
+    ']': Symbol.SQUARE_CLOSE ,
+    '.': Symbol.DOT ,
+    ';': Symbol.SEMICOLON ,
+    '+': Symbol.PLUS ,
+    '-': Symbol.MINUS ,
+    '*': Symbol.MUL ,
+    '/': Symbol.DIV ,
+    '&': Symbol.AND ,
+    '|': Symbol.OR ,
+    '<': Symbol.LT ,
+    '>': Symbol.GT ,
+    '=': Symbol.EQ ,
+    '~': Symbol.NOT ,
+    ',': Symbol.COMMA,
   }
 
   KEYWORD_MAP = {
@@ -61,16 +57,16 @@ class JackTokenizer:
     elif re.compile("(\(|\)|\[|\]|,|\+|-|;|<|>|=|~|&|{|}|\*|\/|\||\.)").match(t):
       return JackTokenizer.SYMBOL_MAP[t]
     elif re.compile("\d+").match(t):
-      return Keyword.INTEGERCONSTANT
+      return Token.INTEGERCONSTANT
     elif re.compile("\".*\"").match(t):
-      return Keyword.STRINGCONSTANT
+      return Token.STRINGCONSTANT
     else:
       # TODO: Put an assert to ensure valid identifier
-      return Keyword.IDENTIFIER
+      return Token.IDENTIFIER
     pass
 
   def printable_token(self):
-    if self.tokenType() == Keyword.STRINGCONSTANT:
+    if self.tokenType() == Token.STRINGCONSTANT:
       return self.current_token()[1:-1]
     else:
       return escape(self.current_token(), True)
@@ -95,7 +91,7 @@ class JackTokenizer:
 
   """ Returns the integer value of the current token """
   def intVal(self):
-    self.assert_type(Keyword.INTEGERCONSTANT)
+    self.assert_type(Token.INTEGERCONSTANT)
     return int(self.token)
 
   """ Returns a list of tokens for that line """