Various other WIP attempts

This commit is contained in:
Nemo 2020-06-24 18:12:36 +05:30
parent d209fabc9a
commit 333484ce7f
3 changed files with 137 additions and 66 deletions

10
compiler/engine.py Normal file
View File

@ -0,0 +1,10 @@
from tokenizer import JackTokenizer
"""
New Compilation Engine
"""
class Engine:
def __init__(self, input_file):
self.i = 0
self.jt = JackTokenizer(input_file, False)
self.file = open(input_file + ".xml", 'w')

View File

@ -1,5 +1,67 @@
from keywords import Keyword from keywords import Keyword
class Element:
def __init__(name, grammar, terminal = False):
self.name = name
self.grammar = grammar
self.terminal = terminal
CLASS = Element('class', [
# class className {
Keyword.CLASS,
Keyword.IDENTIFIER,
Keyword.BRACE_OPEN,
# class Variable Declarations (one or more) = list
CLASSVARDEC,
# subroutine declarations (one or more) = list
SUBROUTINEDEC,
# }
Keyword.BRACE_CLOSE
])
CLASSVARDEC = Element('classVarDec', [
# static|field type (, name)* ;
Keyword.STATIC | Keyword.FIELD,
TYPE,
[Keyword.COMMA, Keyword.IDENTIFIER],
Keyword.SEMICOLON
])
# Parameter List =
# (
# (type varName) (, type varName)*
# )?
# we use tuples for zero OR one of a sequence
PARAMETER_LIST = Element('parameterList', (
TYPE,
Keyword.IDENTIFIER,
[Keyword.COMMA, TYPE, Keyword.IDENTIFIER]
))
TYPE = Element('type', Keyword.INT | Keyword.CHAR | Keyword.BOOLEAN | Keyword.IDENTIFIER, True)
SUBROUTINE_BODY = Element('subroutineBody', [
# One or more variable declarations
# `var type varName (, varName)* ;`
[VARDEC],
STATEMENTS
])
SUBROUTINEDEC = Element('subroutineDec', [
# (constructor | function | method) (void | type) subRoutineName '(' parameterList ')'
# subroutineBody
Keyword.CONSTRUCTOR | Keyword.FUNCTION | Keyword.METHOD,
Keyword.VOID | TYPE,
Keyword.IDENTIFIER,
Keyword.PARAN_OPEN,
PARAMETER_LIST,
Keyword.PARAN_CLOSE,
# Subroutine Body {
Keyword.BRACE_OPEN,
SUBROUTINE_BODY,
Keyword.BRACE_CLOSE,
])
""" """
The grammar is defined by the following constructs: The grammar is defined by the following constructs:
@ -11,25 +73,26 @@ Inside this list, each element can be any of the following:
- a bitwise mask of the Keyword enum to denote multiple possibilities - a bitwise mask of the Keyword enum to denote multiple possibilities
- Another list, to denote zero-or-more of a inner-sequence - Another list, to denote zero-or-more of a inner-sequence
- A tuple, to denote zero-or-one of a inner-sequence - A tuple, to denote zero-or-one of a inner-sequence
- A lambda denotes a non-terminal part of the grammar
This is basically an attempt to translate Figure 10.5 from the book into This is basically an attempt to translate Figure 10.5 from the book into
a Python structure. a Python structure.
""" """
TYPE = Keyword.INT | Keyword.CHAR | Keyword.BOOLEAN | Keyword.IDENTIFIER UNARY_OP = Element('unaryOp', Keyword.NOT | Keyword.MINUS, True)
UNARY_OP = Keyword.NOT | Keyword.MINUS CONSTANT = Element('KeywordConstant', Keyword.TRUE | Keyword.FALSE, Keyword.NULL, Keyword.THIS, True)
TERM = Keyword.INTEGERCONSTANT | Keyword.STRINGCONSTANT | Keyword.TRUE | Keyword.FALSE | Keyword.IDENTIFIER TERM = Element('term', Keyword.INTEGERCONSTANT | Keyword.STRINGCONSTANT | Keyword.TRUE | Keyword.FALSE | Keyword.IDENTIFIER)
OP = Keyword.PLUS | Keyword.MINUS | Keyword.MUL | Keyword.DIV | Keyword.AND | Keyword.OR | Keyword.GT | Keyword.LT | Keyword.EQ OP = Element('op', Keyword.PLUS | Keyword.MINUS | Keyword.MUL | Keyword.DIV | Keyword.AND | Keyword.OR | Keyword.GT | Keyword.LT | Keyword.EQ, True)
EXPRESSION = [TERM, [OP, TERM]] EXPRESSION = Element('expression', [TERM, [OP, TERM]])
EXPRESSIONLIST = (EXPRESSION, [Keyword.COMMA, EXPRESSION]) EXPRESSIONLIST = Element('expressionList', (EXPRESSION, [Keyword.COMMA, EXPRESSION]))
SUBROUTINE_CALL = { SUBROUTINE_CALL = Element('subroutineCall', {
(Keyword.IDENTIFIER, Keyword.PARAN_OPEN): [ (Keyword.IDENTIFIER, Keyword.PARAN_OPEN): [
EXPRESSIONLIST, EXPRESSIONLIST,
Keyword.PARAN_CLOSE, Keyword.PARAN_CLOSE,
@ -40,81 +103,29 @@ SUBROUTINE_CALL = {
EXPRESSIONLIST, EXPRESSIONLIST,
Keyword.PARAN_CLOSE Keyword.PARAN_CLOSE
] ]
} })
STATEMENTS = { STATEMENT = Element('statement', {
(Keyword.LET): [Keyword.IDENTIFIER, (Keyword.SQUARE_OPEN, EXPRESSION, Keyword.SQUARE_CLOSE)], (Keyword.LET): [Keyword.IDENTIFIER, (Keyword.SQUARE_OPEN, EXPRESSION, Keyword.SQUARE_CLOSE)],
(Keyword.IF): [ (Keyword.IF): [
Keyword.PARAN_OPEN, Keyword.PARAN_OPEN,
EXPRESSION, EXPRESSION,
Keyword.PARAN_CLOSE, Keyword.PARAN_CLOSE,
Keyword.BRACE_OPEN, Keyword.BRACE_OPEN,
lambda:STATEMENTS, lambda: STATEMENTS,
Keyword.BRACE_CLOSE, Keyword.BRACE_CLOSE,
( Keyword.ELSE, Keyword.BRACE_OPEN, lambda:STATEMENTS, Keyword.BRACE_CLOSE) ( Keyword.ELSE, Keyword.BRACE_OPEN, lambda:STATEMENT, Keyword.BRACE_CLOSE)
], ],
(Keyword.WHILE): [ (Keyword.WHILE): [
Keyword.PARAN_OPEN, Keyword.PARAN_OPEN,
EXPRESSION, EXPRESSION,
Keyword.PARAN_CLOSE, Keyword.PARAN_CLOSE,
Keyword.BRACE_OPEN, Keyword.BRACE_OPEN,
lambda:STATEMENTS, lambda: STATEMENTS,
Keyword.BRACE_CLOSE, Keyword.BRACE_CLOSE,
], ],
(Keyword.DO): SUBROUTINE_CALL, (Keyword.DO): SUBROUTINE_CALL,
(Keyword.RETURN): [(EXPRESSION), Keyword.SEMICOLON] (Keyword.RETURN): [(EXPRESSION), Keyword.SEMICOLON]
} })
SUBROUTINEDEC = [ STATEMENTS = Element('statements', [STATEMENT])
# (constructor | function | method) (void | type) subRoutineName '(' parameterList ')'
# subroutineBody
Keyword.CONSTRUCTOR | Keyword.FUNCTION | Keyword.METHOD,
Keyword.VOID | TYPE,
Keyword.IDENTIFIER,
Keyword.PARAN_OPEN,
# Parameter List =
# (
# (type varName) (, type varName)*
# )?
# we use tuples for zero OR one of a sequence
(
TYPE,
Keyword.IDENTIFIER,
[Keyword.COMMA, TYPE, Keyword.IDENTIFIER]
),
Keyword.PARAN_CLOSE,
# Subroutine Body {
Keyword.BRACE_OPEN,
# One or more variable declarations
# `var type varName (, varName)* ;`
[
Keyword.VAR,
TYPE,
Keyword.IDENTIFIER,
[Keyword.COMMA, Keyword.IDENTIFIER],
Keyword.SEMICOLON
],
STATEMENTS,
Keyword.BRACE_CLOSE,
]
CLASSVARDEC = [
# static|field type (, name)* ;
Keyword.STATIC | Keyword.FIELD,
TYPE,
[Keyword.COMMA, Keyword.IDENTIFIER],
Keyword.SEMICOLON
]
GRAMMAR = [
# class className {
Keyword.CLASS,
Keyword.IDENTIFIER,
Keyword.BRACE_OPEN,
# class Variable Declarations (one or more) = list
CLASSVARDEC,
# subroutine declarations (one or more) = list
SUBROUTINEDEC,
# }
Keyword.BRACE_CLOSE
]

50
compiler/grammar.yml Normal file
View File

@ -0,0 +1,50 @@
keyword:
- 'class'
- 'method'
- 'function'
- 'constructor'
- 'int'
- 'boolean'
- 'char'
- 'void'
- 'var'
- 'static'
- 'field'
- 'let'
- 'do'
- 'if'
- 'else'
- 'while'
- 'return'
- 'true'
- 'false'
- 'null'
- 'this'
symbol:
- '{'
- '}'
- '('
- ')'
- '['
- ']'
- '.'
- ';'
- '+'
- '-'
- '*'
- '/'
- '&'
- '|'
- '<'
- '>'
- '='
- '~'
- ','
# Predefined
integerConstant: null
StringConstant: null
identifier: null
className:
class:
- class
- *className