Various other WIP attempts

This commit is contained in:
Nemo 2020-06-24 18:12:36 +05:30
parent d209fabc9a
commit 333484ce7f
3 changed files with 137 additions and 66 deletions

10
compiler/engine.py Normal file
View File

@ -0,0 +1,10 @@
from tokenizer import JackTokenizer
"""
New Compilation Engine
"""
class Engine:
def __init__(self, input_file):
self.i = 0
self.jt = JackTokenizer(input_file, False)
self.file = open(input_file + ".xml", 'w')

View File

@ -1,5 +1,67 @@
from keywords import Keyword
class Element:
def __init__(name, grammar, terminal = False):
self.name = name
self.grammar = grammar
self.terminal = terminal
CLASS = Element('class', [
# class className {
Keyword.CLASS,
Keyword.IDENTIFIER,
Keyword.BRACE_OPEN,
# class Variable Declarations (one or more) = list
CLASSVARDEC,
# subroutine declarations (one or more) = list
SUBROUTINEDEC,
# }
Keyword.BRACE_CLOSE
])
CLASSVARDEC = Element('classVarDec', [
# static|field type (, name)* ;
Keyword.STATIC | Keyword.FIELD,
TYPE,
[Keyword.COMMA, Keyword.IDENTIFIER],
Keyword.SEMICOLON
])
# Parameter List =
# (
# (type varName) (, type varName)*
# )?
# we use tuples for zero OR one of a sequence
PARAMETER_LIST = Element('parameterList', (
TYPE,
Keyword.IDENTIFIER,
[Keyword.COMMA, TYPE, Keyword.IDENTIFIER]
))
TYPE = Element('type', Keyword.INT | Keyword.CHAR | Keyword.BOOLEAN | Keyword.IDENTIFIER, True)
SUBROUTINE_BODY = Element('subroutineBody', [
# One or more variable declarations
# `var type varName (, varName)* ;`
[VARDEC],
STATEMENTS
])
SUBROUTINEDEC = Element('subroutineDec', [
# (constructor | function | method) (void | type) subRoutineName '(' parameterList ')'
# subroutineBody
Keyword.CONSTRUCTOR | Keyword.FUNCTION | Keyword.METHOD,
Keyword.VOID | TYPE,
Keyword.IDENTIFIER,
Keyword.PARAN_OPEN,
PARAMETER_LIST,
Keyword.PARAN_CLOSE,
# Subroutine Body {
Keyword.BRACE_OPEN,
SUBROUTINE_BODY,
Keyword.BRACE_CLOSE,
])
"""
The grammar is defined by the following constructs:
@ -11,25 +73,26 @@ Inside this list, each element can be any of the following:
- a bitwise mask of the Keyword enum to denote multiple possibilities
- Another list, to denote zero-or-more of a inner-sequence
- A tuple, to denote zero-or-one of a inner-sequence
- A lambda denotes a non-terminal part of the grammar
This is basically an attempt to translate Figure 10.5 from the book into
a Python structure.
"""
TYPE = Keyword.INT | Keyword.CHAR | Keyword.BOOLEAN | Keyword.IDENTIFIER
UNARY_OP = Element('unaryOp', Keyword.NOT | Keyword.MINUS, True)
UNARY_OP = Keyword.NOT | Keyword.MINUS
CONSTANT = Element('KeywordConstant', Keyword.TRUE | Keyword.FALSE, Keyword.NULL, Keyword.THIS, True)
TERM = Keyword.INTEGERCONSTANT | Keyword.STRINGCONSTANT | Keyword.TRUE | Keyword.FALSE | Keyword.IDENTIFIER
TERM = Element('term', Keyword.INTEGERCONSTANT | Keyword.STRINGCONSTANT | Keyword.TRUE | Keyword.FALSE | Keyword.IDENTIFIER)
OP = Keyword.PLUS | Keyword.MINUS | Keyword.MUL | Keyword.DIV | Keyword.AND | Keyword.OR | Keyword.GT | Keyword.LT | Keyword.EQ
OP = Element('op', Keyword.PLUS | Keyword.MINUS | Keyword.MUL | Keyword.DIV | Keyword.AND | Keyword.OR | Keyword.GT | Keyword.LT | Keyword.EQ, True)
EXPRESSION = [TERM, [OP, TERM]]
EXPRESSION = Element('expression', [TERM, [OP, TERM]])
EXPRESSIONLIST = (EXPRESSION, [Keyword.COMMA, EXPRESSION])
EXPRESSIONLIST = Element('expressionList', (EXPRESSION, [Keyword.COMMA, EXPRESSION]))
SUBROUTINE_CALL = {
SUBROUTINE_CALL = Element('subroutineCall', {
(Keyword.IDENTIFIER, Keyword.PARAN_OPEN): [
EXPRESSIONLIST,
Keyword.PARAN_CLOSE,
@ -40,81 +103,29 @@ SUBROUTINE_CALL = {
EXPRESSIONLIST,
Keyword.PARAN_CLOSE
]
}
})
STATEMENTS = {
STATEMENT = Element('statement', {
(Keyword.LET): [Keyword.IDENTIFIER, (Keyword.SQUARE_OPEN, EXPRESSION, Keyword.SQUARE_CLOSE)],
(Keyword.IF): [
Keyword.PARAN_OPEN,
EXPRESSION,
Keyword.PARAN_CLOSE,
Keyword.BRACE_OPEN,
lambda:STATEMENTS,
lambda: STATEMENTS,
Keyword.BRACE_CLOSE,
( Keyword.ELSE, Keyword.BRACE_OPEN, lambda:STATEMENTS, Keyword.BRACE_CLOSE)
( Keyword.ELSE, Keyword.BRACE_OPEN, lambda:STATEMENT, Keyword.BRACE_CLOSE)
],
(Keyword.WHILE): [
Keyword.PARAN_OPEN,
EXPRESSION,
Keyword.PARAN_CLOSE,
Keyword.BRACE_OPEN,
lambda:STATEMENTS,
lambda: STATEMENTS,
Keyword.BRACE_CLOSE,
],
(Keyword.DO): SUBROUTINE_CALL,
(Keyword.RETURN): [(EXPRESSION), Keyword.SEMICOLON]
}
})
SUBROUTINEDEC = [
# (constructor | function | method) (void | type) subRoutineName '(' parameterList ')'
# subroutineBody
Keyword.CONSTRUCTOR | Keyword.FUNCTION | Keyword.METHOD,
Keyword.VOID | TYPE,
Keyword.IDENTIFIER,
Keyword.PARAN_OPEN,
# Parameter List =
# (
# (type varName) (, type varName)*
# )?
# we use tuples for zero OR one of a sequence
(
TYPE,
Keyword.IDENTIFIER,
[Keyword.COMMA, TYPE, Keyword.IDENTIFIER]
),
Keyword.PARAN_CLOSE,
# Subroutine Body {
Keyword.BRACE_OPEN,
# One or more variable declarations
# `var type varName (, varName)* ;`
[
Keyword.VAR,
TYPE,
Keyword.IDENTIFIER,
[Keyword.COMMA, Keyword.IDENTIFIER],
Keyword.SEMICOLON
],
STATEMENTS,
Keyword.BRACE_CLOSE,
]
CLASSVARDEC = [
# static|field type (, name)* ;
Keyword.STATIC | Keyword.FIELD,
TYPE,
[Keyword.COMMA, Keyword.IDENTIFIER],
Keyword.SEMICOLON
]
GRAMMAR = [
# class className {
Keyword.CLASS,
Keyword.IDENTIFIER,
Keyword.BRACE_OPEN,
# class Variable Declarations (one or more) = list
CLASSVARDEC,
# subroutine declarations (one or more) = list
SUBROUTINEDEC,
# }
Keyword.BRACE_CLOSE
]
STATEMENTS = Element('statements', [STATEMENT])

50
compiler/grammar.yml Normal file
View File

@ -0,0 +1,50 @@
keyword:
- 'class'
- 'method'
- 'function'
- 'constructor'
- 'int'
- 'boolean'
- 'char'
- 'void'
- 'var'
- 'static'
- 'field'
- 'let'
- 'do'
- 'if'
- 'else'
- 'while'
- 'return'
- 'true'
- 'false'
- 'null'
- 'this'
symbol:
- '{'
- '}'
- '('
- ')'
- '['
- ']'
- '.'
- ';'
- '+'
- '-'
- '*'
- '/'
- '&'
- '|'
- '<'
- '>'
- '='
- '~'
- ','
# Predefined
integerConstant: null
StringConstant: null
identifier: null
className:
class:
- class
- *className