From d209fabc9a6f86ece2bc6cc509778e6475961283 Mon Sep 17 00:00:00 2001 From: Nemo Date: Thu, 18 Jun 2020 18:21:06 +0530 Subject: [PATCH] Work from yesterday on the CompilationEngine - Restructured the grammar into a separate python module today - Doesn't cover array expressions and subroutine calls inside expressions --- compiler/compilation.py | 123 ++++++++++++++++++++++++++++++++++++++++ compiler/grammar.py | 120 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 243 insertions(+) create mode 100644 compiler/compilation.py create mode 100644 compiler/grammar.py diff --git a/compiler/compilation.py b/compiler/compilation.py new file mode 100644 index 0000000..832e7e8 --- /dev/null +++ b/compiler/compilation.py @@ -0,0 +1,123 @@ +from grammar import GRAMMAR + +class CompilationEngine: + def xml_file(self, input_file): + return input_file + ".xml" + + """ Calling keyword does an implicit assert """ + def k(self): + assert(self.jt.tokenType() == Token.KEYWORD) + return CompilationEngine.KEYWORD_MAP[self.jt.current_token()] + + def v(self): + assert(self.type() == Token.SYMBOL or self.type() == Token.IDENTIFIER) + return self.jt.current_token() + + """ Opens one of the heirarchial tags """ + def open(self, tag): + self.file.write((self.i * " ") + "<%s>" % tag) + self.i+=2 + + """ Closes a tag """ + def close(self, tag): + self.file.write((self.i * " ") + "<%s>" % tag) + self.i-=2 + + """ Advances the Tokenizer and prints a debug statement""" + def advance(self): + old = self.jt.current_token() + self.jt.advance() + print("Advanced from {old} to {t} {new}".format(t=self.type(),old=old, new=self.jt.current_token())) + + def type(self): + return self.jt.tokenType() + + def s(self): + return CompilationEngine.SYMBOL_MAP[self.jt.symbol()] + + def CompileClass(self): + self.open("class") + self.do_the_thing(Keyword.CLASS, Token.IDENTIFIER, Symbol.BRACE_OPEN) + while(not (self.type() == Token.SYMBOL and self.v() == '}' )): + if (self.k() in [Keyword.STATIC, Keyword.FIELD]): + self.CompileClassVarDec() + elif(self.k() in [Keyword.CONSTRUCTOR, Keyword.FUNCTION, Keyword.METHOD]): + self.CompuleSubroutine() + else: + raise RuntimeError("Invalid Token") + assert(self.type() == Token.SYMBOL and self.v() == '}') + self.close("class") + + def we_need_a_type(self): + if (self.type() == Token.Keyword): + self.do_the_thing(Keyword.INT | Keyword.CHAR | Keyword.BOOLEAN) + else: + self.do_the_thing(Token.IDENTIFIER) + + def CompileClassVarDec(self): + self.open('classVarDec') + self.do_the_thing(Keyword.STATIC | Keyword.FIELD) + self.we_need_a_type() + self.do_the_thing(Token.IDENTIFIER, [SYMBOL.PARAN_OPEN, Token.IDENTIFIER]) + self.close('classVarDec') + self.advance() + + + """ Writes a single line(with \n) on the XML, taking into account the indentation """ + def write(self, klass, subklass = None): + print(self.type()) + print(klass) + assert(klass == self.type()) + if (klass == Token.SYMBOL): + assert(subklass == self.s()) + elif(klass == Token.KEYWORD): + assert(subklass & self.k()) + elif(subklass == Token.INTEGERCONSTANT): + self.jt.intVal() + self.file.write((self.i * " ") + self.jt.xml_row()) + + def matches(self, T): + return (isinstance(T, Symbol) and T == self.s()) or (isinstance(T, Keyword) and T & self.k()) or T==Token.INTEGERCONSTANT or T==Token.STRINGCONSTANT + + def do_the_thing(self,*args): + for T in args: + # We use a list for *, which is zero or more times + if isinstance(T, list): + # LL(0) for now + if + if isinstance(T, Symbol): + self.write(Token.SYMBOL, T) + elif isinstance(T, Keyword): + self.write(Token.KEYWORD, T) + else: + assert(self.type() in [Token.IDENTIFIER, Token.INTEGERCONSTANT, Token.STRINGCONSTANT]) + self.write(T) + self.advance() + + def CompuleSubroutine(self): + self.open('subroutineDec') + self.write() + pass + def CompuleParameterList(self): + pass + def CompileVarDec(self): + pass + def CompileStatements(self): + pass + def CompileDo(self): + pass + def CompileLet(self): + pass + def CompileWhile(self): + pass + def CompileReturn(self): + pass + def CompileIf(self): + pass + def CompileTerm(self): + pass + + def __init__(self, input_file): + self.i = 0 + self.jt = JackTokenizer(input_file, False) + self.file = open(self.xml_file(input_file), 'w') diff --git a/compiler/grammar.py b/compiler/grammar.py new file mode 100644 index 0000000..178f441 --- /dev/null +++ b/compiler/grammar.py @@ -0,0 +1,120 @@ +from keywords import Keyword + +""" +The grammar is defined by the following constructs: + +The top level object is called GRAMMAR, which is the grammar for a class. It is a list object. + +Inside this list, each element can be any of the following: + +- a token (denoted by a Keyword enum) +- a bitwise mask of the Keyword enum to denote multiple possibilities +- Another list, to denote zero-or-more of a inner-sequence +- A tuple, to denote zero-or-one of a inner-sequence + +This is basically an attempt to translate Figure 10.5 from the book into +a Python structure. + +""" + +TYPE = Keyword.INT | Keyword.CHAR | Keyword.BOOLEAN | Keyword.IDENTIFIER + +UNARY_OP = Keyword.NOT | Keyword.MINUS + +TERM = Keyword.INTEGERCONSTANT | Keyword.STRINGCONSTANT | Keyword.TRUE | Keyword.FALSE | Keyword.IDENTIFIER + +OP = Keyword.PLUS | Keyword.MINUS | Keyword.MUL | Keyword.DIV | Keyword.AND | Keyword.OR | Keyword.GT | Keyword.LT | Keyword.EQ + +EXPRESSION = [TERM, [OP, TERM]] + +EXPRESSIONLIST = (EXPRESSION, [Keyword.COMMA, EXPRESSION]) + +SUBROUTINE_CALL = { + (Keyword.IDENTIFIER, Keyword.PARAN_OPEN): [ + EXPRESSIONLIST, + Keyword.PARAN_CLOSE, + ], + (Keyword.IDENTIFIER, Keyword.DOT): [ + Keyword.IDENTIFIER, + Keyword.PARAN_OPEN, + EXPRESSIONLIST, + Keyword.PARAN_CLOSE + ] +} + +STATEMENTS = { + (Keyword.LET): [Keyword.IDENTIFIER, (Keyword.SQUARE_OPEN, EXPRESSION, Keyword.SQUARE_CLOSE)], + (Keyword.IF): [ + Keyword.PARAN_OPEN, + EXPRESSION, + Keyword.PARAN_CLOSE, + Keyword.BRACE_OPEN, + lambda:STATEMENTS, + Keyword.BRACE_CLOSE, + ( Keyword.ELSE, Keyword.BRACE_OPEN, lambda:STATEMENTS, Keyword.BRACE_CLOSE) + ], + (Keyword.WHILE): [ + Keyword.PARAN_OPEN, + EXPRESSION, + Keyword.PARAN_CLOSE, + Keyword.BRACE_OPEN, + lambda:STATEMENTS, + Keyword.BRACE_CLOSE, + ], + (Keyword.DO): SUBROUTINE_CALL, + (Keyword.RETURN): [(EXPRESSION), Keyword.SEMICOLON] +} + +SUBROUTINEDEC = [ + # (constructor | function | method) (void | type) subRoutineName '(' parameterList ')' + # subroutineBody + Keyword.CONSTRUCTOR | Keyword.FUNCTION | Keyword.METHOD, + Keyword.VOID | TYPE, + Keyword.IDENTIFIER, + Keyword.PARAN_OPEN, + # Parameter List = + # ( + # (type varName) (, type varName)* + # )? + # we use tuples for zero OR one of a sequence + ( + TYPE, + Keyword.IDENTIFIER, + [Keyword.COMMA, TYPE, Keyword.IDENTIFIER] + ), + Keyword.PARAN_CLOSE, + # Subroutine Body { + Keyword.BRACE_OPEN, + # One or more variable declarations + # `var type varName (, varName)* ;` + [ + Keyword.VAR, + TYPE, + Keyword.IDENTIFIER, + [Keyword.COMMA, Keyword.IDENTIFIER], + Keyword.SEMICOLON + ], + STATEMENTS, + Keyword.BRACE_CLOSE, +] + +CLASSVARDEC = [ + # static|field type (, name)* ; + Keyword.STATIC | Keyword.FIELD, + TYPE, + [Keyword.COMMA, Keyword.IDENTIFIER], + Keyword.SEMICOLON +] + +GRAMMAR = [ + # class className { + Keyword.CLASS, + Keyword.IDENTIFIER, + Keyword.BRACE_OPEN, + # class Variable Declarations (one or more) = list + CLASSVARDEC, + # subroutine declarations (one or more) = list + SUBROUTINEDEC, + # } + Keyword.BRACE_CLOSE +]