From 333484ce7f1925bfa318694d89fe34109b8d98b2 Mon Sep 17 00:00:00 2001 From: Nemo Date: Wed, 24 Jun 2020 18:12:36 +0530 Subject: [PATCH] Various other WIP attempts --- compiler/engine.py | 10 +++ compiler/grammar.py | 143 +++++++++++++++++++++++-------------------- compiler/grammar.yml | 50 +++++++++++++++ 3 files changed, 137 insertions(+), 66 deletions(-) create mode 100644 compiler/engine.py create mode 100644 compiler/grammar.yml diff --git a/compiler/engine.py b/compiler/engine.py new file mode 100644 index 0000000..24feb6b --- /dev/null +++ b/compiler/engine.py @@ -0,0 +1,10 @@ +from tokenizer import JackTokenizer + +""" +New Compilation Engine +""" +class Engine: + def __init__(self, input_file): + self.i = 0 + self.jt = JackTokenizer(input_file, False) + self.file = open(input_file + ".xml", 'w') diff --git a/compiler/grammar.py b/compiler/grammar.py index 178f441..4b06791 100644 --- a/compiler/grammar.py +++ b/compiler/grammar.py @@ -1,5 +1,67 @@ from keywords import Keyword +class Element: + def __init__(name, grammar, terminal = False): + self.name = name + self.grammar = grammar + self.terminal = terminal + +CLASS = Element('class', [ + # class className { + Keyword.CLASS, + Keyword.IDENTIFIER, + Keyword.BRACE_OPEN, + # class Variable Declarations (one or more) = list + CLASSVARDEC, + # subroutine declarations (one or more) = list + SUBROUTINEDEC, + # } + Keyword.BRACE_CLOSE +]) + +CLASSVARDEC = Element('classVarDec', [ + # static|field type (, name)* ; + Keyword.STATIC | Keyword.FIELD, + TYPE, + [Keyword.COMMA, Keyword.IDENTIFIER], + Keyword.SEMICOLON +]) + +# Parameter List = +# ( +# (type varName) (, type varName)* +# )? +# we use tuples for zero OR one of a sequence +PARAMETER_LIST = Element('parameterList', ( + TYPE, + Keyword.IDENTIFIER, + [Keyword.COMMA, TYPE, Keyword.IDENTIFIER] +)) + +TYPE = Element('type', Keyword.INT | Keyword.CHAR | Keyword.BOOLEAN | Keyword.IDENTIFIER, True) + +SUBROUTINE_BODY = Element('subroutineBody', [ + # One or more variable declarations + # `var type varName (, varName)* ;` + [VARDEC], + STATEMENTS +]) + +SUBROUTINEDEC = Element('subroutineDec', [ + # (constructor | function | method) (void | type) subRoutineName '(' parameterList ')' + # subroutineBody + Keyword.CONSTRUCTOR | Keyword.FUNCTION | Keyword.METHOD, + Keyword.VOID | TYPE, + Keyword.IDENTIFIER, + Keyword.PARAN_OPEN, + PARAMETER_LIST, + Keyword.PARAN_CLOSE, + # Subroutine Body { + Keyword.BRACE_OPEN, + SUBROUTINE_BODY, + Keyword.BRACE_CLOSE, +]) + """ The grammar is defined by the following constructs: @@ -11,25 +73,26 @@ Inside this list, each element can be any of the following: - a bitwise mask of the Keyword enum to denote multiple possibilities - Another list, to denote zero-or-more of a inner-sequence - A tuple, to denote zero-or-one of a inner-sequence +- A lambda denotes a non-terminal part of the grammar This is basically an attempt to translate Figure 10.5 from the book into a Python structure. """ -TYPE = Keyword.INT | Keyword.CHAR | Keyword.BOOLEAN | Keyword.IDENTIFIER +UNARY_OP = Element('unaryOp', Keyword.NOT | Keyword.MINUS, True) -UNARY_OP = Keyword.NOT | Keyword.MINUS +CONSTANT = Element('KeywordConstant', Keyword.TRUE | Keyword.FALSE, Keyword.NULL, Keyword.THIS, True) -TERM = Keyword.INTEGERCONSTANT | Keyword.STRINGCONSTANT | Keyword.TRUE | Keyword.FALSE | Keyword.IDENTIFIER +TERM = Element('term', Keyword.INTEGERCONSTANT | Keyword.STRINGCONSTANT | Keyword.TRUE | Keyword.FALSE | Keyword.IDENTIFIER) -OP = Keyword.PLUS | Keyword.MINUS | Keyword.MUL | Keyword.DIV | Keyword.AND | Keyword.OR | Keyword.GT | Keyword.LT | Keyword.EQ +OP = Element('op', Keyword.PLUS | Keyword.MINUS | Keyword.MUL | Keyword.DIV | Keyword.AND | Keyword.OR | Keyword.GT | Keyword.LT | Keyword.EQ, True) -EXPRESSION = [TERM, [OP, TERM]] +EXPRESSION = Element('expression', [TERM, [OP, TERM]]) -EXPRESSIONLIST = (EXPRESSION, [Keyword.COMMA, EXPRESSION]) +EXPRESSIONLIST = Element('expressionList', (EXPRESSION, [Keyword.COMMA, EXPRESSION])) -SUBROUTINE_CALL = { +SUBROUTINE_CALL = Element('subroutineCall', { (Keyword.IDENTIFIER, Keyword.PARAN_OPEN): [ EXPRESSIONLIST, Keyword.PARAN_CLOSE, @@ -40,81 +103,29 @@ SUBROUTINE_CALL = { EXPRESSIONLIST, Keyword.PARAN_CLOSE ] -} +}) -STATEMENTS = { +STATEMENT = Element('statement', { (Keyword.LET): [Keyword.IDENTIFIER, (Keyword.SQUARE_OPEN, EXPRESSION, Keyword.SQUARE_CLOSE)], (Keyword.IF): [ Keyword.PARAN_OPEN, EXPRESSION, Keyword.PARAN_CLOSE, Keyword.BRACE_OPEN, - lambda:STATEMENTS, + lambda: STATEMENTS, Keyword.BRACE_CLOSE, - ( Keyword.ELSE, Keyword.BRACE_OPEN, lambda:STATEMENTS, Keyword.BRACE_CLOSE) + ( Keyword.ELSE, Keyword.BRACE_OPEN, lambda:STATEMENT, Keyword.BRACE_CLOSE) ], (Keyword.WHILE): [ Keyword.PARAN_OPEN, EXPRESSION, Keyword.PARAN_CLOSE, Keyword.BRACE_OPEN, - lambda:STATEMENTS, + lambda: STATEMENTS, Keyword.BRACE_CLOSE, ], (Keyword.DO): SUBROUTINE_CALL, (Keyword.RETURN): [(EXPRESSION), Keyword.SEMICOLON] -} +}) -SUBROUTINEDEC = [ - # (constructor | function | method) (void | type) subRoutineName '(' parameterList ')' - # subroutineBody - Keyword.CONSTRUCTOR | Keyword.FUNCTION | Keyword.METHOD, - Keyword.VOID | TYPE, - Keyword.IDENTIFIER, - Keyword.PARAN_OPEN, - # Parameter List = - # ( - # (type varName) (, type varName)* - # )? - # we use tuples for zero OR one of a sequence - ( - TYPE, - Keyword.IDENTIFIER, - [Keyword.COMMA, TYPE, Keyword.IDENTIFIER] - ), - Keyword.PARAN_CLOSE, - # Subroutine Body { - Keyword.BRACE_OPEN, - # One or more variable declarations - # `var type varName (, varName)* ;` - [ - Keyword.VAR, - TYPE, - Keyword.IDENTIFIER, - [Keyword.COMMA, Keyword.IDENTIFIER], - Keyword.SEMICOLON - ], - STATEMENTS, - Keyword.BRACE_CLOSE, -] - -CLASSVARDEC = [ - # static|field type (, name)* ; - Keyword.STATIC | Keyword.FIELD, - TYPE, - [Keyword.COMMA, Keyword.IDENTIFIER], - Keyword.SEMICOLON -] - -GRAMMAR = [ - # class className { - Keyword.CLASS, - Keyword.IDENTIFIER, - Keyword.BRACE_OPEN, - # class Variable Declarations (one or more) = list - CLASSVARDEC, - # subroutine declarations (one or more) = list - SUBROUTINEDEC, - # } - Keyword.BRACE_CLOSE -] +STATEMENTS = Element('statements', [STATEMENT]) diff --git a/compiler/grammar.yml b/compiler/grammar.yml new file mode 100644 index 0000000..a458524 --- /dev/null +++ b/compiler/grammar.yml @@ -0,0 +1,50 @@ +keyword: + - 'class' + - 'method' + - 'function' + - 'constructor' + - 'int' + - 'boolean' + - 'char' + - 'void' + - 'var' + - 'static' + - 'field' + - 'let' + - 'do' + - 'if' + - 'else' + - 'while' + - 'return' + - 'true' + - 'false' + - 'null' + - 'this' +symbol: + - '{' + - '}' + - '(' + - ')' + - '[' + - ']' + - '.' + - ';' + - '+' + - '-' + - '*' + - '/' + - '&' + - '|' + - '<' + - '>' + - '=' + - '~' + - ',' +# Predefined +integerConstant: null +StringConstant: null +identifier: null +className: +class: + - class + - *className