nand2tetris/compiler/grammar.py

139 lines
3.6 KiB
Python
Raw Normal View History

2020-07-06 09:30:36 +00:00
from keywords import Atom
"""
The grammar is defined by the following constructs:
The top level object is called GRAMMAR, which is the grammar for a class. It is a list object.
Inside this list, each element can be any of the following:
- a token (denoted by a Keyword enum)
- a bitwise mask of the Keyword enum to denote multiple possibilities
- Another list, to denote zero-or-more of a inner-sequence
- A tuple, to denote zero-or-one of a inner-sequence
2020-06-24 12:42:36 +00:00
- A lambda denotes a non-terminal part of the grammar
This is basically an attempt to translate Figure 10.5 from the book into
a Python structure.
"""
2020-07-06 09:06:41 +00:00
class Element:
def __init__(self, name, grammar, terminal = False):
self.name = name
self.grammar = grammar
self.terminal = terminal
2020-07-06 09:30:36 +00:00
TYPES = Element('type', Atom.INT | Atom.CHAR | Atom.BOOLEAN | Atom.IDENTIFIER, True)
2020-07-06 09:06:41 +00:00
CLASSVARDEC = Element('classVarDec', [
# static|field type (, name)* ;
2020-07-06 09:30:36 +00:00
Atom.STATIC | Atom.FIELD,
2020-07-06 09:06:41 +00:00
TYPES,
2020-07-06 09:30:36 +00:00
[Atom.COMMA, Atom.IDENTIFIER],
Atom.SEMICOLON
2020-07-06 09:06:41 +00:00
])
2020-07-06 09:30:36 +00:00
VARDEC = Element('varDec', [Atom.VAR, TYPES, Atom.IDENTIFIER,
[Atom.COMMA, Atom.IDENTIFIER],
Atom.SEMICOLON
2020-07-06 09:06:41 +00:00
])
2020-07-06 09:30:36 +00:00
UNARY_OP = Element('unaryOp', Atom.NOT | Atom.MINUS, True)
2020-07-06 09:30:36 +00:00
CONSTANT = Element('KeywordConstant', Atom.TRUE | Atom.FALSE|Atom.NULL|Atom.THIS, True)
2020-07-06 09:30:36 +00:00
TERM = Element('term', Atom.INTEGERCONSTANT | Atom.STRINGCONSTANT | Atom.TRUE | Atom.FALSE | Atom.IDENTIFIER)
2020-07-06 09:30:36 +00:00
OP = Element('op', Atom.PLUS | Atom.MINUS | Atom.MUL | Atom.DIV | Atom.AND | Atom.OR | Atom.GT | Atom.LT | Atom.EQ, True)
2020-06-24 12:42:36 +00:00
EXPRESSION = Element('expression', [TERM, [OP, TERM]])
2020-07-06 09:30:36 +00:00
EXPRESSIONLIST = Element('expressionList', (EXPRESSION, [Atom.COMMA, EXPRESSION]))
2020-06-24 12:42:36 +00:00
SUBROUTINE_CALL = Element('subroutineCall', {
2020-07-06 09:30:36 +00:00
(Atom.IDENTIFIER, Atom.PARAN_OPEN): [
EXPRESSIONLIST,
2020-07-06 09:30:36 +00:00
Atom.PARAN_CLOSE,
],
2020-07-06 09:30:36 +00:00
(Atom.IDENTIFIER, Atom.DOT): [
Atom.IDENTIFIER,
Atom.PARAN_OPEN,
EXPRESSIONLIST,
2020-07-06 09:30:36 +00:00
Atom.PARAN_CLOSE
]
2020-06-24 12:42:36 +00:00
})
2020-06-24 12:42:36 +00:00
STATEMENT = Element('statement', {
2020-07-06 09:30:36 +00:00
(Atom.LET): [Atom.IDENTIFIER, (Atom.SQUARE_OPEN, EXPRESSION, Atom.SQUARE_CLOSE)],
(Atom.IF): [
Atom.PARAN_OPEN,
EXPRESSION,
2020-07-06 09:30:36 +00:00
Atom.PARAN_CLOSE,
Atom.BRACE_OPEN,
2020-06-24 12:42:36 +00:00
lambda: STATEMENTS,
2020-07-06 09:30:36 +00:00
Atom.BRACE_CLOSE,
2020-07-06 09:06:41 +00:00
# This is the tricky one
2020-07-06 09:30:36 +00:00
( Atom.ELSE, Atom.BRACE_OPEN, lambda:STATEMENT, Atom.BRACE_CLOSE)
],
2020-07-06 09:30:36 +00:00
(Atom.WHILE): [
Atom.PARAN_OPEN,
EXPRESSION,
2020-07-06 09:30:36 +00:00
Atom.PARAN_CLOSE,
Atom.BRACE_OPEN,
2020-06-24 12:42:36 +00:00
lambda: STATEMENTS,
2020-07-06 09:30:36 +00:00
Atom.BRACE_CLOSE,
],
2020-07-06 09:30:36 +00:00
(Atom.DO): SUBROUTINE_CALL,
(Atom.RETURN): [(EXPRESSION), Atom.SEMICOLON]
2020-06-24 12:42:36 +00:00
})
2020-06-24 12:42:36 +00:00
STATEMENTS = Element('statements', [STATEMENT])
2020-07-06 09:06:41 +00:00
SUBROUTINE_BODY = Element('subroutineBody', [
# One or more variable declarations
# `var type varName (, varName)* ;`
[VARDEC],
STATEMENTS
])
""" Pseudo-element to help define subroutine declarations """
2020-07-06 09:30:36 +00:00
RETURN_TYPES= Atom.INT | Atom.CHAR|Atom.BOOLEAN|Atom.IDENTIFIER|Atom.VOID
2020-07-06 09:06:41 +00:00
# Parameter List =
# (
# (type varName) (, type varName)*
# )?
# we use tuples for zero OR one of a sequence
PARAMETER_LIST = Element('parameterList', (
TYPES,
2020-07-06 09:30:36 +00:00
Atom.IDENTIFIER,
[Atom.COMMA, TYPES, Atom.IDENTIFIER]
2020-07-06 09:06:41 +00:00
))
SUBROUTINEDEC = Element('subroutineDec', [
# (constructor | function | method) (void | type) subRoutineName '(' parameterList ')'
# subroutineBody
2020-07-06 09:30:36 +00:00
Atom.CONSTRUCTOR | Atom.FUNCTION | Atom.METHOD,
2020-07-06 09:06:41 +00:00
RETURN_TYPES,
2020-07-06 09:30:36 +00:00
Atom.IDENTIFIER,
Atom.PARAN_OPEN,
2020-07-06 09:06:41 +00:00
PARAMETER_LIST,
2020-07-06 09:30:36 +00:00
Atom.PARAN_CLOSE,
2020-07-06 09:06:41 +00:00
# Subroutine Body {
2020-07-06 09:30:36 +00:00
Atom.BRACE_OPEN,
2020-07-06 09:06:41 +00:00
SUBROUTINE_BODY,
2020-07-06 09:30:36 +00:00
Atom.BRACE_CLOSE,
2020-07-06 09:06:41 +00:00
])
CLASS = Element('class', [
# class className {
2020-07-06 09:30:36 +00:00
Atom.CLASS,
Atom.IDENTIFIER,
Atom.BRACE_OPEN,
2020-07-06 09:06:41 +00:00
# class Variable Declarations (one or more) = list
CLASSVARDEC,
# subroutine declarations (one or more) = list
SUBROUTINEDEC,
# }
2020-07-06 09:30:36 +00:00
Atom.BRACE_CLOSE
2020-07-06 09:06:41 +00:00
])