New compiler is running now and giving some results

This commit is contained in:
Nemo 2020-07-06 17:21:13 +05:30
parent 48d72383fd
commit 703c5e467d
4 changed files with 131 additions and 107 deletions

View File

@ -1,8 +1,6 @@
from tokenizer import JackTokenizer
from engine import Engine
import sys
if __name__ == '__main__':
j = JackTokenizer(sys.argv[1], True)
# c = CompilationEngine(sys.argv[1])
# c.CompileClass()
Engine(sys.argv[1]).compileClass()

View File

@ -1,6 +1,6 @@
from tokenizer import JackTokenizer
from keywords import *
from grammar import CLASS
from grammar import CLASS,Element
"""
New Compilation Engine
@ -9,7 +9,7 @@ class Engine:
def __init__(self, input_file):
self.i = 0
self.jt = JackTokenizer(input_file, False)
self.file = open(self.xml_file())
# self.file = open(self.xml_file(input_file))
def xml_file(self, input_file):
return input_file + ".xml"
@ -20,13 +20,14 @@ class Engine:
return Atom(token.value)
def compileClass(self):
self.compile(grammar.CLASS)
self.compile(CLASS)
def advance(self):
self.jt.advance()
def ZeroOrMany(self, grammarList):
if compile(grammarList[0]):
# print("ZeroOrMany")
if self.compile(grammarList[0]):
# We now expect the whole of it
for e in grammarList:
self.compile(e)
@ -39,22 +40,31 @@ class Engine:
print(line)
def MatchDict(self, dictionary):
# print("MatchDict")
xml_rows_for_lookup_terms = []
lookup_keys = ()
# How much to lookahead
lookahead = len(list(dict.keys())[0])
lookahead = len(list(dictionary.keys())[0])
for _ in range(lookahead):
xml_rows_for_lookup_terms += [self.jt.xml_row()]
lookup_keys = lookup_keys + (self.atom(),)
self.advance()
grammar = dict[lookup_keys]
# We must open this before we compile the remainder
if isinstance(grammar, Element):
self.open(grammar)
grammar = grammar.grammar
# Now we put the first X terms from the conditional
for line in xml_rows_for_lookup_terms:
self.write(line)
for e in dict[lookup_keys]:
self.compile(e)
return self.compile(grammar)
def ZeroOrOne(self, grammarTuple):
# print("ZeroOrOne")
if self.compile(grammarTuple[0]):
for e in grammarTuple:
self.compile(e)
@ -68,32 +78,39 @@ class Engine:
current = self.atom()
# We use in here to accomodate for bitmasks
if current in expected:
print(current)
print(self.jt.xml_row(), end="")
self.advance()
return True
else:
raise Exception("Expected %s, got %s" % (expected, current))
return False
def open(self, el):
print("<%s>" % el.name)
def close(self, el):
print("</%s>" % el.name)
def compile(self, thing):
# TODO: OPEN TAGS
if isinstance(thing, Element):
print("open %s" % thing.name)
grammar = thing.grammar
self.open(thing)
for e in thing.grammar:
self.compile(e)
self.close(thing)
elif callable(thing):
grammar = thing()
self.compile(grammar)
else:
grammar = thing
grammarType = type(grammar)
elif grammarType == list:
return self.ZeroOrMany(thing)
if grammarType == list:
return self.ZeroOrMany(grammar)
elif grammarType == dict:
return self.MatchDict(thing)
return self.MatchDict(grammar)
elif grammarType == tuple:
return self.ZeroOrOne(thing)
return self.ZeroOrOne(grammar)
elif grammarType == Atom:
return self.Atom(thing)
elif callable(thing):
return self.compile(thing)
if isinstance(thing, Element):
print("close %s" % thing.name)
return self.Atom(grammar)
else:
raise Exception("Should not have reached here")

View File

@ -3,7 +3,10 @@ from keywords import Atom
"""
The grammar is defined by the following constructs:
The top level object is called GRAMMAR, which is the grammar for a class. It is a list object.
The top level object is called GRAMMAR, which is the grammar for a class.
It is a instance of the Element class
The element class contains a grammar element, which is always defined as a list
for an element class.
Inside this list, each element can be any of the following:
@ -18,37 +21,41 @@ a Python structure.
"""
class Element:
# Usually I avoid inverted boolean variable names, but this is much cleaner
def __init__(self, name, grammar):
assert(type(grammar)==list)
self.name = name
self.grammar = grammar
TYPES = Element('type', Atom.INT | Atom.CHAR | Atom.BOOLEAN | Atom.IDENTIFIER)
CLASSVARDEC = Element('classVarDec', [
# static|field type (, name)* ;
Atom.STATIC | Atom.FIELD,
TYPES,
Atom.INT | Atom.CHAR | Atom.BOOLEAN | Atom.IDENTIFIER,
Atom.IDENTIFIER,
[Atom.COMMA, Atom.IDENTIFIER],
Atom.SEMICOLON
])
VARDEC = Element('varDec', [Atom.VAR, TYPES, Atom.IDENTIFIER,
VARDEC = Element('varDec', [Atom.VAR, Atom.INT | Atom.CHAR | Atom.BOOLEAN | Atom.IDENTIFIER, Atom.IDENTIFIER,
[Atom.COMMA, Atom.IDENTIFIER],
Atom.SEMICOLON
])
UNARY_OP = Element('unaryOp', Atom.NOT | Atom.MINUS)
CONSTANT = Element('KeywordConstant', Atom.TRUE | Atom.FALSE|Atom.NULL|Atom.THIS)
# Since this is not a non-terminal, we can just write it as a constant
OP = Atom.PLUS | Atom.MINUS | Atom.MUL | Atom.DIV | Atom.AND | Atom.OR | Atom.GT | Atom.LT | Atom.EQ
UNARY_OP = Atom.NOT | Atom.MINUS
CONSTANT = Atom.TRUE | Atom.FALSE|Atom.NULL|Atom.THIS
""" Pseudo-element to help define subroutine declarations """
RETURN_TYPES= Atom.INT | Atom.CHAR|Atom.BOOLEAN|Atom.IDENTIFIER|Atom.VOID
TERM = Element('term', Atom.INTEGERCONSTANT | Atom.STRINGCONSTANT | Atom.TRUE | Atom.FALSE | Atom.IDENTIFIER)
OP = Element('op', Atom.PLUS | Atom.MINUS | Atom.MUL | Atom.DIV | Atom.AND | Atom.OR | Atom.GT | Atom.LT | Atom.EQ)
# TODO: This is missing a lot of stuff
TERM = Element('term', [Atom.INTEGERCONSTANT | Atom.STRINGCONSTANT | Atom.TRUE | Atom.FALSE | Atom.IDENTIFIER])
EXPRESSION = Element('expression', [TERM, [OP, TERM]])
EXPRESSIONLIST = Element('expressionList', (EXPRESSION, [Atom.COMMA, EXPRESSION]))
EXPRESSIONLIST = Element('expressionList', [(EXPRESSION, [Atom.COMMA, EXPRESSION])])
SUBROUTINE_CALL = Element('subroutineCall', {
DO_STATEMENT = Element('doStatement', [{
(Atom.IDENTIFIER, Atom.PARAN_OPEN): [
EXPRESSIONLIST,
Atom.PARAN_CLOSE,
@ -59,11 +66,12 @@ SUBROUTINE_CALL = Element('subroutineCall', {
EXPRESSIONLIST,
Atom.PARAN_CLOSE
]
})
}])
STATEMENT = Element('statement', {
(Atom.LET): [Atom.IDENTIFIER, (Atom.SQUARE_OPEN, EXPRESSION, Atom.SQUARE_CLOSE)],
(Atom.IF): [
LET_STATEMENT = Element('whileStatement', [
Atom.IDENTIFIER, (Atom.SQUARE_OPEN, EXPRESSION, Atom.SQUARE_CLOSE)])
IF_STATEMENT = Element('ifStatement', [
Atom.PARAN_OPEN,
EXPRESSION,
Atom.PARAN_CLOSE,
@ -72,41 +80,49 @@ STATEMENT = Element('statement', {
Atom.BRACE_CLOSE,
# This is the tricky one
( Atom.ELSE, Atom.BRACE_OPEN, lambda:STATEMENT, Atom.BRACE_CLOSE)
],
(Atom.WHILE): [
])
WHILE_STATEMENT = Element('whileStatement', [
Atom.PARAN_OPEN,
EXPRESSION,
Atom.PARAN_CLOSE,
Atom.BRACE_OPEN,
lambda: STATEMENTS,
Atom.BRACE_CLOSE,
],
(Atom.DO): SUBROUTINE_CALL,
(Atom.RETURN): [(EXPRESSION), Atom.SEMICOLON]
})
])
STATEMENTS = Element('statements', [STATEMENT])
RETURN_STATEMENT = Element('returnStatement', [(EXPRESSION), Atom.SEMICOLON])
# Just a constant, since this isn't a non-terminal
STATEMENT = {
(Atom.LET): LET_STATEMENT,
(Atom.IF): IF_STATEMENT,
(Atom.WHILE): WHILE_STATEMENT,
(Atom.DO): DO_STATEMENT,
(Atom.RETURN): RETURN_STATEMENT
}
STATEMENTS = Element('statements', [[STATEMENT]])
SUBROUTINE_BODY = Element('subroutineBody', [
# One or more variable declarations
# `var type varName (, varName)* ;`
Atom.BRACE_OPEN,
[VARDEC],
STATEMENTS
STATEMENTS,
Atom.BRACE_CLOSE
])
""" Pseudo-element to help define subroutine declarations """
RETURN_TYPES= Atom.INT | Atom.CHAR|Atom.BOOLEAN|Atom.IDENTIFIER|Atom.VOID
# Parameter List =
# (
# (type varName) (, type varName)*
# )?
# we use tuples for zero OR one of a sequence
PARAMETER_LIST = Element('parameterList', (
TYPES,
PARAMETER_LIST = Element('parameterList', [(
Atom.INT | Atom.CHAR | Atom.BOOLEAN | Atom.IDENTIFIER,
Atom.IDENTIFIER,
[Atom.COMMA, TYPES, Atom.IDENTIFIER]
))
[Atom.COMMA, Atom.INT | Atom.CHAR|Atom.BOOLEAN|Atom.IDENTIFIER, Atom.IDENTIFIER]
)])
SUBROUTINEDEC = Element('subroutineDec', [
# (constructor | function | method) (void | type) subRoutineName '(' parameterList ')'
@ -117,10 +133,7 @@ SUBROUTINEDEC = Element('subroutineDec', [
Atom.PARAN_OPEN,
PARAMETER_LIST,
Atom.PARAN_CLOSE,
# Subroutine Body
Atom.BRACE_OPEN,
SUBROUTINE_BODY,
Atom.BRACE_CLOSE,
])
CLASS = Element('class', [

View File

@ -1,33 +1,29 @@
import re
from keywords import Keyword
from keywords import *
from html import escape
from enum import Enum
# Superclass in some sense
class Token(Enum):
KEYWORD = 1
SYMBOL = 2
class JackTokenizer:
SYMBOL_MAP = {
'{': Keyword.BRACE_OPEN ,
'}': Keyword.BRACE_CLOSE ,
'(': Keyword.PARAN_OPEN ,
')': Keyword.PARAN_CLOSE ,
'[': Keyword.SQUARE_OPEN ,
']': Keyword.SQUARE_CLOSE ,
'.': Keyword.DOT ,
';': Keyword.SEMICOLON ,
'+': Keyword.PLUS ,
'-': Keyword.MINUS ,
'*': Keyword.MUL ,
'/': Keyword.DIV ,
'&': Keyword.AND ,
'|': Keyword.OR ,
'<': Keyword.LT ,
'>': Keyword.GT ,
'=': Keyword.EQ ,
'~': Keyword.NOT ,
',': Keyword.COMMA,
'{': Symbol.BRACE_OPEN ,
'}': Symbol.BRACE_CLOSE ,
'(': Symbol.PARAN_OPEN ,
')': Symbol.PARAN_CLOSE ,
'[': Symbol.SQUARE_OPEN ,
']': Symbol.SQUARE_CLOSE ,
'.': Symbol.DOT ,
';': Symbol.SEMICOLON ,
'+': Symbol.PLUS ,
'-': Symbol.MINUS ,
'*': Symbol.MUL ,
'/': Symbol.DIV ,
'&': Symbol.AND ,
'|': Symbol.OR ,
'<': Symbol.LT ,
'>': Symbol.GT ,
'=': Symbol.EQ ,
'~': Symbol.NOT ,
',': Symbol.COMMA,
}
KEYWORD_MAP = {
@ -61,16 +57,16 @@ class JackTokenizer:
elif re.compile("(\(|\)|\[|\]|,|\+|-|;|<|>|=|~|&|{|}|\*|\/|\||\.)").match(t):
return JackTokenizer.SYMBOL_MAP[t]
elif re.compile("\d+").match(t):
return Keyword.INTEGERCONSTANT
return Token.INTEGERCONSTANT
elif re.compile("\".*\"").match(t):
return Keyword.STRINGCONSTANT
return Token.STRINGCONSTANT
else:
# TODO: Put an assert to ensure valid identifier
return Keyword.IDENTIFIER
return Token.IDENTIFIER
pass
def printable_token(self):
if self.tokenType() == Keyword.STRINGCONSTANT:
if self.tokenType() == Token.STRINGCONSTANT:
return self.current_token()[1:-1]
else:
return escape(self.current_token(), True)
@ -95,7 +91,7 @@ class JackTokenizer:
""" Returns the integer value of the current token """
def intVal(self):
self.assert_type(Keyword.INTEGERCONSTANT)
self.assert_type(Token.INTEGERCONSTANT)
return int(self.token)
""" Returns a list of tokens for that line """