Got most of the Compilation Engine working

- Expressions aren't tested
- Some issues with xml ordering and empty tags
This commit is contained in:
Nemo 2020-07-06 21:12:00 +05:30
parent 703c5e467d
commit d25f7407da
5 changed files with 312 additions and 66 deletions

View File

@ -1,6 +1,7 @@
from tokenizer import JackTokenizer
from keywords import *
from grammar import CLASS,Element
from grammar import *
import sys
"""
New Compilation Engine
@ -9,7 +10,7 @@ class Engine:
def __init__(self, input_file):
self.i = 0
self.jt = JackTokenizer(input_file, False)
# self.file = open(self.xml_file(input_file))
self.file = open(self.xml_file(input_file), "w")
def xml_file(self, input_file):
return input_file + ".xml"
@ -25,47 +26,66 @@ class Engine:
def advance(self):
self.jt.advance()
def ZeroOrMany(self, grammarList):
# print("ZeroOrMany")
if self.compile(grammarList[0]):
def ZeroOrMany(self, grammarList, matchOnly):
# print("ZOM called")
ret = self.compile(grammarList[0], matchOnly)
if ret and matchOnly:
return True
elif ret:
# We now expect the whole of it
for e in grammarList:
self.compile(e)
# We try for another list after this
return self.ZeroOrMany(grammarList)
self.ZeroOrMany(grammarList, False)
return True
else:
return None
def write(self, line):
print(line)
def write(self, line, end = "\n"):
self.file.write(self.i*" " + line + end)
def MatchDict(self, dictionary):
# print("MatchDict")
xml_rows_for_lookup_terms = []
lookup_keys = ()
def MatchDict(self, dictionary, matchOnly):
# Easy way out
xml_rows_for_lookup_terms = [self.jt.xml_row()]
lookup_keys = (self.atom(),)
# How much to lookahead
lookahead = len(list(dictionary.keys())[0])
for _ in range(lookahead):
keys = list(dictionary.keys())
lookahead = len(keys[0])
# We don't have to move the cursor for LL0 grammar
if matchOnly:
assert(lookahead == 1)
for _ in range(lookahead-1):
self.advance()
xml_rows_for_lookup_terms += [self.jt.xml_row()]
lookup_keys = lookup_keys + (self.atom(),)
self.advance()
grammar = dict[lookup_keys]
if not lookup_keys in dictionary:
return False
grammar = el = dictionary[lookup_keys]
# We must open this before we compile the remainder
if isinstance(grammar, Element):
self.open(grammar)
self.open(el)
grammar = grammar.grammar
# Now we put the first X terms from the conditional
for line in xml_rows_for_lookup_terms:
self.write(line)
self.write(line, end="")
return self.compile(grammar)
self.advance()
for e in grammar:
self.compile(e)
def ZeroOrOne(self, grammarTuple):
# print("ZeroOrOne")
if self.compile(grammarTuple[0]):
if isinstance(el, Element):
self.close(el)
return True
def ZeroOrOne(self, grammarTuple, matchOnly):
if self.compile(grammarTuple[0], True):
for e in grammarTuple:
self.compile(e)
return True
@ -73,44 +93,59 @@ class Engine:
return None
""" Has to MATCH """
def Atom(self, atom):
def MatchAtom(self, atom, matchOnly):
expected = atom
current = self.atom()
# We use in here to accomodate for bitmasks
if current in expected:
print(self.jt.xml_row(), end="")
match = current in expected
if match and matchOnly:
return True
elif match:
self.write(self.jt.xml_row(), end="")
self.advance()
return True
else:
# print("%s != %s" % (current, expected))
return False
def open(self, el):
print("<%s>" % el.name)
self.write("<%s>" % el.name)
self.i+=2
def close(self, el):
print("</%s>" % el.name)
self.i-=2
self.write("</%s>" % el.name)
def compile(self, thing):
"""
If you set matchOnly = true, the cursor will not move forward
if it is forced to move forward, it will instead RAISE AN ERROR
"""
def compile(self, thing, matchOnly = False):
# TODO: OPEN TAGS
if isinstance(thing, Element):
ret = False
if self.compile(thing.grammar[0], True):
self.open(thing)
for e in thing.grammar:
self.compile(e)
ret = self.compile(e)
self.close(thing)
return ret
else:
return ret
elif callable(thing):
grammar = thing()
self.compile(grammar)
return self.compile(grammar, matchOnly)
else:
grammar = thing
grammarType = type(grammar)
if grammarType == list:
return self.ZeroOrMany(grammar)
return self.ZeroOrMany(grammar, matchOnly)
elif grammarType == dict:
return self.MatchDict(grammar)
return self.MatchDict(grammar, matchOnly)
elif grammarType == tuple:
return self.ZeroOrOne(grammar)
return self.ZeroOrOne(grammar, matchOnly)
elif grammarType == Atom:
return self.Atom(grammar)
return self.MatchAtom(grammar, matchOnly)
else:
raise Exception("Should not have reached here")

View File

@ -27,6 +27,9 @@ class Element:
self.name = name
self.grammar = grammar
def __repr__(self):
return self.name
CLASSVARDEC = Element('classVarDec', [
# static|field type (, name)* ;
Atom.STATIC | Atom.FIELD,
@ -56,36 +59,41 @@ EXPRESSION = Element('expression', [TERM, [OP, TERM]])
EXPRESSIONLIST = Element('expressionList', [(EXPRESSION, [Atom.COMMA, EXPRESSION])])
DO_STATEMENT = Element('doStatement', [{
(Atom.IDENTIFIER, Atom.PARAN_OPEN): [
(Atom.IDENTIFIER, Atom.PAREN_OPEN): [
EXPRESSIONLIST,
Atom.PARAN_CLOSE,
Atom.PAREN_CLOSE,
],
(Atom.IDENTIFIER, Atom.DOT): [
Atom.IDENTIFIER,
Atom.PARAN_OPEN,
Atom.PAREN_OPEN,
EXPRESSIONLIST,
Atom.PARAN_CLOSE
Atom.PAREN_CLOSE
]
}])
},Atom.SEMICOLON])
LET_STATEMENT = Element('whileStatement', [
Atom.IDENTIFIER, (Atom.SQUARE_OPEN, EXPRESSION, Atom.SQUARE_CLOSE)])
LET_STATEMENT = Element('letStatement', [
Atom.IDENTIFIER,
(Atom.SQUARE_OPEN, EXPRESSION, Atom.SQUARE_CLOSE),
Atom.EQ,
EXPRESSION,
Atom.SEMICOLON
])
IF_STATEMENT = Element('ifStatement', [
Atom.PARAN_OPEN,
Atom.PAREN_OPEN,
EXPRESSION,
Atom.PARAN_CLOSE,
Atom.PAREN_CLOSE,
Atom.BRACE_OPEN,
lambda: STATEMENTS,
Atom.BRACE_CLOSE,
# This is the tricky one
( Atom.ELSE, Atom.BRACE_OPEN, lambda:STATEMENT, Atom.BRACE_CLOSE)
( Atom.ELSE, Atom.BRACE_OPEN, lambda:STATEMENTS, Atom.BRACE_CLOSE)
])
WHILE_STATEMENT = Element('whileStatement', [
Atom.PARAN_OPEN,
Atom.PAREN_OPEN,
EXPRESSION,
Atom.PARAN_CLOSE,
Atom.PAREN_CLOSE,
Atom.BRACE_OPEN,
lambda: STATEMENTS,
Atom.BRACE_CLOSE,
@ -95,11 +103,11 @@ RETURN_STATEMENT = Element('returnStatement', [(EXPRESSION), Atom.SEMICOLON])
# Just a constant, since this isn't a non-terminal
STATEMENT = {
(Atom.LET): LET_STATEMENT,
(Atom.IF): IF_STATEMENT,
(Atom.WHILE): WHILE_STATEMENT,
(Atom.DO): DO_STATEMENT,
(Atom.RETURN): RETURN_STATEMENT
(Atom.LET,): LET_STATEMENT,
(Atom.IF,): IF_STATEMENT,
(Atom.WHILE,): WHILE_STATEMENT,
(Atom.DO,): DO_STATEMENT,
(Atom.RETURN,): RETURN_STATEMENT
}
STATEMENTS = Element('statements', [[STATEMENT]])
@ -130,9 +138,9 @@ SUBROUTINEDEC = Element('subroutineDec', [
Atom.CONSTRUCTOR | Atom.FUNCTION | Atom.METHOD,
RETURN_TYPES,
Atom.IDENTIFIER,
Atom.PARAN_OPEN,
Atom.PAREN_OPEN,
PARAMETER_LIST,
Atom.PARAN_CLOSE,
Atom.PAREN_CLOSE,
SUBROUTINE_BODY,
])

View File

@ -1,7 +1,13 @@
from enum import IntFlag,auto
class PrintableFlag(IntFlag):
def __repr__(self):
if self.name:
return self.name
return super().__str__()
""" Super class for everything """
class Atom(IntFlag):
class Atom(PrintableFlag):
# Keywords
CLASS = auto()
METHOD = auto()
@ -27,8 +33,8 @@ class Atom(IntFlag):
# Symbols Start here
BRACE_OPEN = auto()
BRACE_CLOSE = auto()
PARAN_OPEN = auto()
PARAN_CLOSE = auto()
PAREN_OPEN = auto()
PAREN_CLOSE = auto()
SQUARE_OPEN = auto()
SQUARE_CLOSE = auto()
DOT = auto()
@ -49,7 +55,7 @@ class Atom(IntFlag):
INTEGERCONSTANT = auto()
STRINGCONSTANT = auto()
class Keyword(IntFlag):
class Keyword(PrintableFlag):
CLASS = Atom.CLASS.value
METHOD = Atom.METHOD.value
FUNCTION = Atom.FUNCTION.value
@ -72,12 +78,12 @@ class Keyword(IntFlag):
NULL = Atom.NULL.value
THIS = Atom.THIS.value
class Symbol(IntFlag):
class Symbol(PrintableFlag):
# Symbols Start here
BRACE_OPEN = Atom.BRACE_OPEN.value
BRACE_CLOSE = Atom.BRACE_CLOSE.value
PARAN_OPEN = Atom.PARAN_OPEN.value
PARAN_CLOSE = Atom.PARAN_CLOSE.value
PAREN_OPEN = Atom.PAREN_OPEN.value
PAREN_CLOSE = Atom.PAREN_CLOSE.value
SQUARE_OPEN = Atom.SQUARE_OPEN.value
SQUARE_CLOSE = Atom.SQUARE_CLOSE.value
DOT = Atom.DOT.value
@ -94,7 +100,7 @@ class Symbol(IntFlag):
NOT = Atom.NOT.value
COMMA = Atom.COMMA.value
class Token(IntFlag):
class Token(PrintableFlag):
IDENTIFIER = Atom.IDENTIFIER.value
INTEGERCONSTANT = Atom.INTEGERCONSTANT.value
STRINGCONSTANT = Atom.STRINGCONSTANT.value

View File

@ -7,8 +7,8 @@ class JackTokenizer:
SYMBOL_MAP = {
'{': Symbol.BRACE_OPEN ,
'}': Symbol.BRACE_CLOSE ,
'(': Symbol.PARAN_OPEN ,
')': Symbol.PARAN_CLOSE ,
'(': Symbol.PAREN_OPEN ,
')': Symbol.PAREN_CLOSE ,
'[': Symbol.SQUARE_OPEN ,
']': Symbol.SQUARE_CLOSE ,
'.': Symbol.DOT ,

View File

@ -0,0 +1,197 @@
<class>
<keyword> class </keyword>
<identifier> Main </identifier>
<symbol> { </symbol>
<classVarDec>
<keyword> static </keyword>
<keyword> boolean </keyword>
<identifier> test </identifier>
<symbol> ; </symbol>
</classVarDec>
<subroutineDec>
<keyword> function </keyword>
<keyword> void </keyword>
<identifier> main </identifier>
<symbol> ( </symbol>
<symbol> ) </symbol>
<subroutineBody>
<symbol> { </symbol>
<varDec>
<keyword> var </keyword>
<identifier> SquareGame </identifier>
<identifier> game </identifier>
<symbol> ; </symbol>
</varDec>
<letStatement>
<keyword> let </keyword>
<identifier> game </identifier>
<symbol> = </symbol>
<term>
<identifier> game </identifier>
</term>
<expression>
</expression>
<symbol> ; </symbol>
</letStatement>
<statements>
<doStatement>
<keyword> do </keyword>
<identifier> game </identifier>
<symbol> . </symbol>
<identifier> run </identifier>
<symbol> ( </symbol>
<symbol> ) </symbol>
<symbol> ; </symbol>
</doStatement>
<doStatement>
<keyword> do </keyword>
<identifier> game </identifier>
<symbol> . </symbol>
<identifier> dispose </identifier>
<symbol> ( </symbol>
<symbol> ) </symbol>
<symbol> ; </symbol>
</doStatement>
<returnStatement>
<keyword> return </keyword>
<symbol> ; </symbol>
</returnStatement>
</statements>
<symbol> } </symbol>
</subroutineBody>
</subroutineDec>
<subroutineDec>
<keyword> function </keyword>
<keyword> void </keyword>
<identifier> test </identifier>
<symbol> ( </symbol>
<symbol> ) </symbol>
<subroutineBody>
<symbol> { </symbol>
<varDec>
<keyword> var </keyword>
<keyword> int </keyword>
<identifier> i </identifier>
<symbol> , </symbol>
<identifier> j </identifier>
<symbol> ; </symbol>
</varDec>
<varDec>
<keyword> var </keyword>
<identifier> String </identifier>
<identifier> s </identifier>
<symbol> ; </symbol>
</varDec>
<varDec>
<keyword> var </keyword>
<identifier> Array </identifier>
<identifier> a </identifier>
<symbol> ; </symbol>
</varDec>
<ifStatement>
<keyword> if </keyword>
<symbol> ( </symbol>
<term>
<identifier> i </identifier>
</term>
<expression>
</expression>
<symbol> ) </symbol>
<symbol> { </symbol>
<letStatement>
<keyword> let </keyword>
<identifier> s </identifier>
<symbol> = </symbol>
<term>
<identifier> i </identifier>
</term>
<expression>
</expression>
<symbol> ; </symbol>
</letStatement>
<statements>
<letStatement>
<keyword> let </keyword>
<identifier> s </identifier>
<symbol> = </symbol>
<term>
<identifier> j </identifier>
</term>
<expression>
</expression>
<symbol> ; </symbol>
</letStatement>
<letStatement>
<keyword> let </keyword>
<identifier> a </identifier>
<symbol> [ </symbol>
<term>
<identifier> i </identifier>
</term>
<expression>
</expression>
<symbol> ] </symbol>
<symbol> = </symbol>
<term>
<identifier> j </identifier>
</term>
<expression>
</expression>
<symbol> ; </symbol>
</letStatement>
</statements>
<symbol> } </symbol>
<keyword> else </keyword>
<symbol> { </symbol>
<letStatement>
<keyword> let </keyword>
<identifier> i </identifier>
<symbol> = </symbol>
<term>
<identifier> i </identifier>
</term>
<expression>
</expression>
<symbol> ; </symbol>
</letStatement>
<statements>
<letStatement>
<keyword> let </keyword>
<identifier> j </identifier>
<symbol> = </symbol>
<term>
<identifier> j </identifier>
</term>
<expression>
</expression>
<symbol> ; </symbol>
</letStatement>
<letStatement>
<keyword> let </keyword>
<identifier> i </identifier>
<symbol> = </symbol>
<term>
<identifier> i </identifier>
</term>
<expression>
<symbol> | </symbol>
<term>
<identifier> j </identifier>
</term>
</expression>
<symbol> ; </symbol>
</letStatement>
</statements>
<symbol> } </symbol>
</ifStatement>
<statements>
<returnStatement>
<keyword> return </keyword>
<symbol> ; </symbol>
</returnStatement>
</statements>
<symbol> } </symbol>
</subroutineBody>
</subroutineDec>
<symbol> } </symbol>
</class>