From 703c5e467dd540256976cbb3a2cf20b4748714a2 Mon Sep 17 00:00:00 2001 From: Nemo Date: Mon, 6 Jul 2020 17:21:13 +0530 Subject: [PATCH] New compiler is running now and giving some results --- compiler/__init__.py | 6 +-- compiler/engine.py | 67 +++++++++++++++---------- compiler/grammar.py | 111 +++++++++++++++++++++++------------------- compiler/tokenizer.py | 54 ++++++++++---------- 4 files changed, 131 insertions(+), 107 deletions(-) diff --git a/compiler/__init__.py b/compiler/__init__.py index 89f550f..30539c6 100644 --- a/compiler/__init__.py +++ b/compiler/__init__.py @@ -1,8 +1,6 @@ -from tokenizer import JackTokenizer +from engine import Engine import sys if __name__ == '__main__': - j = JackTokenizer(sys.argv[1], True) - # c = CompilationEngine(sys.argv[1]) - # c.CompileClass() + Engine(sys.argv[1]).compileClass() diff --git a/compiler/engine.py b/compiler/engine.py index 72b79fd..c89a5ca 100644 --- a/compiler/engine.py +++ b/compiler/engine.py @@ -1,6 +1,6 @@ from tokenizer import JackTokenizer from keywords import * -from grammar import CLASS +from grammar import CLASS,Element """ New Compilation Engine @@ -9,7 +9,7 @@ class Engine: def __init__(self, input_file): self.i = 0 self.jt = JackTokenizer(input_file, False) - self.file = open(self.xml_file()) + # self.file = open(self.xml_file(input_file)) def xml_file(self, input_file): return input_file + ".xml" @@ -20,13 +20,14 @@ class Engine: return Atom(token.value) def compileClass(self): - self.compile(grammar.CLASS) + self.compile(CLASS) def advance(self): self.jt.advance() def ZeroOrMany(self, grammarList): - if compile(grammarList[0]): + # print("ZeroOrMany") + if self.compile(grammarList[0]): # We now expect the whole of it for e in grammarList: self.compile(e) @@ -39,22 +40,31 @@ class Engine: print(line) def MatchDict(self, dictionary): + # print("MatchDict") xml_rows_for_lookup_terms = [] lookup_keys = () # How much to lookahead - lookahead = len(list(dict.keys())[0]) + lookahead = len(list(dictionary.keys())[0]) for _ in range(lookahead): xml_rows_for_lookup_terms += [self.jt.xml_row()] lookup_keys = lookup_keys + (self.atom(),) self.advance() + grammar = dict[lookup_keys] + + # We must open this before we compile the remainder + if isinstance(grammar, Element): + self.open(grammar) + grammar = grammar.grammar + + # Now we put the first X terms from the conditional for line in xml_rows_for_lookup_terms: self.write(line) - for e in dict[lookup_keys]: - self.compile(e) + return self.compile(grammar) def ZeroOrOne(self, grammarTuple): + # print("ZeroOrOne") if self.compile(grammarTuple[0]): for e in grammarTuple: self.compile(e) @@ -68,32 +78,39 @@ class Engine: current = self.atom() # We use in here to accomodate for bitmasks if current in expected: - print(current) + print(self.jt.xml_row(), end="") self.advance() + return True else: - raise Exception("Expected %s, got %s" % (expected, current)) + return False + + def open(self, el): + print("<%s>" % el.name) + + def close(self, el): + print("" % el.name) def compile(self, thing): # TODO: OPEN TAGS if isinstance(thing, Element): - print("open %s" % thing.name) - grammar = thing.grammar + self.open(thing) + for e in thing.grammar: + self.compile(e) + self.close(thing) elif callable(thing): grammar = thing() + self.compile(grammar) else: grammar = thing - grammarType = type(grammar) + grammarType = type(grammar) - elif grammarType == list: - return self.ZeroOrMany(thing) - elif grammarType == dict: - return self.MatchDict(thing) - elif grammarType == tuple: - return self.ZeroOrOne(thing) - elif grammarType == Atom: - return self.Atom(thing) - elif callable(thing): - return self.compile(thing) - - if isinstance(thing, Element): - print("close %s" % thing.name) + if grammarType == list: + return self.ZeroOrMany(grammar) + elif grammarType == dict: + return self.MatchDict(grammar) + elif grammarType == tuple: + return self.ZeroOrOne(grammar) + elif grammarType == Atom: + return self.Atom(grammar) + else: + raise Exception("Should not have reached here") diff --git a/compiler/grammar.py b/compiler/grammar.py index 9211119..ab3a6a0 100644 --- a/compiler/grammar.py +++ b/compiler/grammar.py @@ -3,7 +3,10 @@ from keywords import Atom """ The grammar is defined by the following constructs: -The top level object is called GRAMMAR, which is the grammar for a class. It is a list object. +The top level object is called GRAMMAR, which is the grammar for a class. +It is a instance of the Element class +The element class contains a grammar element, which is always defined as a list +for an element class. Inside this list, each element can be any of the following: @@ -18,37 +21,41 @@ a Python structure. """ class Element: + # Usually I avoid inverted boolean variable names, but this is much cleaner def __init__(self, name, grammar): + assert(type(grammar)==list) self.name = name self.grammar = grammar -TYPES = Element('type', Atom.INT | Atom.CHAR | Atom.BOOLEAN | Atom.IDENTIFIER) - CLASSVARDEC = Element('classVarDec', [ # static|field type (, name)* ; Atom.STATIC | Atom.FIELD, - TYPES, + Atom.INT | Atom.CHAR | Atom.BOOLEAN | Atom.IDENTIFIER, + Atom.IDENTIFIER, [Atom.COMMA, Atom.IDENTIFIER], Atom.SEMICOLON ]) -VARDEC = Element('varDec', [Atom.VAR, TYPES, Atom.IDENTIFIER, +VARDEC = Element('varDec', [Atom.VAR, Atom.INT | Atom.CHAR | Atom.BOOLEAN | Atom.IDENTIFIER, Atom.IDENTIFIER, [Atom.COMMA, Atom.IDENTIFIER], Atom.SEMICOLON ]) -UNARY_OP = Element('unaryOp', Atom.NOT | Atom.MINUS) -CONSTANT = Element('KeywordConstant', Atom.TRUE | Atom.FALSE|Atom.NULL|Atom.THIS) +# Since this is not a non-terminal, we can just write it as a constant +OP = Atom.PLUS | Atom.MINUS | Atom.MUL | Atom.DIV | Atom.AND | Atom.OR | Atom.GT | Atom.LT | Atom.EQ +UNARY_OP = Atom.NOT | Atom.MINUS +CONSTANT = Atom.TRUE | Atom.FALSE|Atom.NULL|Atom.THIS +""" Pseudo-element to help define subroutine declarations """ +RETURN_TYPES= Atom.INT | Atom.CHAR|Atom.BOOLEAN|Atom.IDENTIFIER|Atom.VOID -TERM = Element('term', Atom.INTEGERCONSTANT | Atom.STRINGCONSTANT | Atom.TRUE | Atom.FALSE | Atom.IDENTIFIER) - -OP = Element('op', Atom.PLUS | Atom.MINUS | Atom.MUL | Atom.DIV | Atom.AND | Atom.OR | Atom.GT | Atom.LT | Atom.EQ) +# TODO: This is missing a lot of stuff +TERM = Element('term', [Atom.INTEGERCONSTANT | Atom.STRINGCONSTANT | Atom.TRUE | Atom.FALSE | Atom.IDENTIFIER]) EXPRESSION = Element('expression', [TERM, [OP, TERM]]) -EXPRESSIONLIST = Element('expressionList', (EXPRESSION, [Atom.COMMA, EXPRESSION])) +EXPRESSIONLIST = Element('expressionList', [(EXPRESSION, [Atom.COMMA, EXPRESSION])]) -SUBROUTINE_CALL = Element('subroutineCall', { +DO_STATEMENT = Element('doStatement', [{ (Atom.IDENTIFIER, Atom.PARAN_OPEN): [ EXPRESSIONLIST, Atom.PARAN_CLOSE, @@ -59,54 +66,63 @@ SUBROUTINE_CALL = Element('subroutineCall', { EXPRESSIONLIST, Atom.PARAN_CLOSE ] -}) +}]) -STATEMENT = Element('statement', { - (Atom.LET): [Atom.IDENTIFIER, (Atom.SQUARE_OPEN, EXPRESSION, Atom.SQUARE_CLOSE)], - (Atom.IF): [ - Atom.PARAN_OPEN, - EXPRESSION, - Atom.PARAN_CLOSE, - Atom.BRACE_OPEN, - lambda: STATEMENTS, - Atom.BRACE_CLOSE, - # This is the tricky one - ( Atom.ELSE, Atom.BRACE_OPEN, lambda:STATEMENT, Atom.BRACE_CLOSE) - ], - (Atom.WHILE): [ - Atom.PARAN_OPEN, - EXPRESSION, - Atom.PARAN_CLOSE, - Atom.BRACE_OPEN, - lambda: STATEMENTS, - Atom.BRACE_CLOSE, - ], - (Atom.DO): SUBROUTINE_CALL, - (Atom.RETURN): [(EXPRESSION), Atom.SEMICOLON] -}) +LET_STATEMENT = Element('whileStatement', [ + Atom.IDENTIFIER, (Atom.SQUARE_OPEN, EXPRESSION, Atom.SQUARE_CLOSE)]) -STATEMENTS = Element('statements', [STATEMENT]) +IF_STATEMENT = Element('ifStatement', [ + Atom.PARAN_OPEN, + EXPRESSION, + Atom.PARAN_CLOSE, + Atom.BRACE_OPEN, + lambda: STATEMENTS, + Atom.BRACE_CLOSE, + # This is the tricky one + ( Atom.ELSE, Atom.BRACE_OPEN, lambda:STATEMENT, Atom.BRACE_CLOSE) +]) + +WHILE_STATEMENT = Element('whileStatement', [ + Atom.PARAN_OPEN, + EXPRESSION, + Atom.PARAN_CLOSE, + Atom.BRACE_OPEN, + lambda: STATEMENTS, + Atom.BRACE_CLOSE, +]) + +RETURN_STATEMENT = Element('returnStatement', [(EXPRESSION), Atom.SEMICOLON]) + +# Just a constant, since this isn't a non-terminal +STATEMENT = { + (Atom.LET): LET_STATEMENT, + (Atom.IF): IF_STATEMENT, + (Atom.WHILE): WHILE_STATEMENT, + (Atom.DO): DO_STATEMENT, + (Atom.RETURN): RETURN_STATEMENT +} + +STATEMENTS = Element('statements', [[STATEMENT]]) SUBROUTINE_BODY = Element('subroutineBody', [ # One or more variable declarations # `var type varName (, varName)* ;` - [VARDEC], - STATEMENTS + Atom.BRACE_OPEN, + [VARDEC], + STATEMENTS, + Atom.BRACE_CLOSE ]) -""" Pseudo-element to help define subroutine declarations """ -RETURN_TYPES= Atom.INT | Atom.CHAR|Atom.BOOLEAN|Atom.IDENTIFIER|Atom.VOID - # Parameter List = # ( # (type varName) (, type varName)* # )? # we use tuples for zero OR one of a sequence -PARAMETER_LIST = Element('parameterList', ( - TYPES, +PARAMETER_LIST = Element('parameterList', [( + Atom.INT | Atom.CHAR | Atom.BOOLEAN | Atom.IDENTIFIER, Atom.IDENTIFIER, - [Atom.COMMA, TYPES, Atom.IDENTIFIER] -)) + [Atom.COMMA, Atom.INT | Atom.CHAR|Atom.BOOLEAN|Atom.IDENTIFIER, Atom.IDENTIFIER] +)]) SUBROUTINEDEC = Element('subroutineDec', [ # (constructor | function | method) (void | type) subRoutineName '(' parameterList ')' @@ -117,10 +133,7 @@ SUBROUTINEDEC = Element('subroutineDec', [ Atom.PARAN_OPEN, PARAMETER_LIST, Atom.PARAN_CLOSE, - # Subroutine Body - Atom.BRACE_OPEN, SUBROUTINE_BODY, - Atom.BRACE_CLOSE, ]) CLASS = Element('class', [ diff --git a/compiler/tokenizer.py b/compiler/tokenizer.py index c5b8641..080c982 100644 --- a/compiler/tokenizer.py +++ b/compiler/tokenizer.py @@ -1,33 +1,29 @@ import re -from keywords import Keyword +from keywords import * from html import escape from enum import Enum # Superclass in some sense -class Token(Enum): - KEYWORD = 1 - SYMBOL = 2 - class JackTokenizer: SYMBOL_MAP = { - '{': Keyword.BRACE_OPEN , - '}': Keyword.BRACE_CLOSE , - '(': Keyword.PARAN_OPEN , - ')': Keyword.PARAN_CLOSE , - '[': Keyword.SQUARE_OPEN , - ']': Keyword.SQUARE_CLOSE , - '.': Keyword.DOT , - ';': Keyword.SEMICOLON , - '+': Keyword.PLUS , - '-': Keyword.MINUS , - '*': Keyword.MUL , - '/': Keyword.DIV , - '&': Keyword.AND , - '|': Keyword.OR , - '<': Keyword.LT , - '>': Keyword.GT , - '=': Keyword.EQ , - '~': Keyword.NOT , - ',': Keyword.COMMA, + '{': Symbol.BRACE_OPEN , + '}': Symbol.BRACE_CLOSE , + '(': Symbol.PARAN_OPEN , + ')': Symbol.PARAN_CLOSE , + '[': Symbol.SQUARE_OPEN , + ']': Symbol.SQUARE_CLOSE , + '.': Symbol.DOT , + ';': Symbol.SEMICOLON , + '+': Symbol.PLUS , + '-': Symbol.MINUS , + '*': Symbol.MUL , + '/': Symbol.DIV , + '&': Symbol.AND , + '|': Symbol.OR , + '<': Symbol.LT , + '>': Symbol.GT , + '=': Symbol.EQ , + '~': Symbol.NOT , + ',': Symbol.COMMA, } KEYWORD_MAP = { @@ -61,16 +57,16 @@ class JackTokenizer: elif re.compile("(\(|\)|\[|\]|,|\+|-|;|<|>|=|~|&|{|}|\*|\/|\||\.)").match(t): return JackTokenizer.SYMBOL_MAP[t] elif re.compile("\d+").match(t): - return Keyword.INTEGERCONSTANT + return Token.INTEGERCONSTANT elif re.compile("\".*\"").match(t): - return Keyword.STRINGCONSTANT + return Token.STRINGCONSTANT else: # TODO: Put an assert to ensure valid identifier - return Keyword.IDENTIFIER + return Token.IDENTIFIER pass def printable_token(self): - if self.tokenType() == Keyword.STRINGCONSTANT: + if self.tokenType() == Token.STRINGCONSTANT: return self.current_token()[1:-1] else: return escape(self.current_token(), True) @@ -95,7 +91,7 @@ class JackTokenizer: """ Returns the integer value of the current token """ def intVal(self): - self.assert_type(Keyword.INTEGERCONSTANT) + self.assert_type(Token.INTEGERCONSTANT) return int(self.token) """ Returns a list of tokens for that line """