Got most of the Compilation Engine working

- Expressions aren't tested
- Some issues with xml ordering and empty tags
This commit is contained in:
Nemo 2020-07-06 21:12:00 +05:30
parent 703c5e467d
commit d25f7407da
5 changed files with 312 additions and 66 deletions

View File

@ -1,6 +1,7 @@
from tokenizer import JackTokenizer from tokenizer import JackTokenizer
from keywords import * from keywords import *
from grammar import CLASS,Element from grammar import *
import sys
""" """
New Compilation Engine New Compilation Engine
@ -9,7 +10,7 @@ class Engine:
def __init__(self, input_file): def __init__(self, input_file):
self.i = 0 self.i = 0
self.jt = JackTokenizer(input_file, False) self.jt = JackTokenizer(input_file, False)
# self.file = open(self.xml_file(input_file)) self.file = open(self.xml_file(input_file), "w")
def xml_file(self, input_file): def xml_file(self, input_file):
return input_file + ".xml" return input_file + ".xml"
@ -25,47 +26,66 @@ class Engine:
def advance(self): def advance(self):
self.jt.advance() self.jt.advance()
def ZeroOrMany(self, grammarList): def ZeroOrMany(self, grammarList, matchOnly):
# print("ZeroOrMany") # print("ZOM called")
if self.compile(grammarList[0]): ret = self.compile(grammarList[0], matchOnly)
if ret and matchOnly:
return True
elif ret:
# We now expect the whole of it # We now expect the whole of it
for e in grammarList: for e in grammarList:
self.compile(e) self.compile(e)
# We try for another list after this # We try for another list after this
return self.ZeroOrMany(grammarList) self.ZeroOrMany(grammarList, False)
return True
else: else:
return None return None
def write(self, line): def write(self, line, end = "\n"):
print(line) self.file.write(self.i*" " + line + end)
def MatchDict(self, dictionary): def MatchDict(self, dictionary, matchOnly):
# print("MatchDict") # Easy way out
xml_rows_for_lookup_terms = [] xml_rows_for_lookup_terms = [self.jt.xml_row()]
lookup_keys = () lookup_keys = (self.atom(),)
# How much to lookahead # How much to lookahead
lookahead = len(list(dictionary.keys())[0]) keys = list(dictionary.keys())
for _ in range(lookahead): lookahead = len(keys[0])
# We don't have to move the cursor for LL0 grammar
if matchOnly:
assert(lookahead == 1)
for _ in range(lookahead-1):
self.advance()
xml_rows_for_lookup_terms += [self.jt.xml_row()] xml_rows_for_lookup_terms += [self.jt.xml_row()]
lookup_keys = lookup_keys + (self.atom(),) lookup_keys = lookup_keys + (self.atom(),)
self.advance()
grammar = dict[lookup_keys] if not lookup_keys in dictionary:
return False
grammar = el = dictionary[lookup_keys]
# We must open this before we compile the remainder # We must open this before we compile the remainder
if isinstance(grammar, Element): if isinstance(grammar, Element):
self.open(grammar) self.open(el)
grammar = grammar.grammar grammar = grammar.grammar
# Now we put the first X terms from the conditional # Now we put the first X terms from the conditional
for line in xml_rows_for_lookup_terms: for line in xml_rows_for_lookup_terms:
self.write(line) self.write(line, end="")
return self.compile(grammar) self.advance()
for e in grammar:
self.compile(e)
def ZeroOrOne(self, grammarTuple): if isinstance(el, Element):
# print("ZeroOrOne") self.close(el)
if self.compile(grammarTuple[0]):
return True
def ZeroOrOne(self, grammarTuple, matchOnly):
if self.compile(grammarTuple[0], True):
for e in grammarTuple: for e in grammarTuple:
self.compile(e) self.compile(e)
return True return True
@ -73,44 +93,59 @@ class Engine:
return None return None
""" Has to MATCH """ """ Has to MATCH """
def Atom(self, atom): def MatchAtom(self, atom, matchOnly):
expected = atom expected = atom
current = self.atom() current = self.atom()
# We use in here to accomodate for bitmasks # We use in here to accomodate for bitmasks
if current in expected: match = current in expected
print(self.jt.xml_row(), end="") if match and matchOnly:
return True
elif match:
self.write(self.jt.xml_row(), end="")
self.advance() self.advance()
return True return True
else: else:
# print("%s != %s" % (current, expected))
return False return False
def open(self, el): def open(self, el):
print("<%s>" % el.name) self.write("<%s>" % el.name)
self.i+=2
def close(self, el): def close(self, el):
print("</%s>" % el.name) self.i-=2
self.write("</%s>" % el.name)
def compile(self, thing): """
If you set matchOnly = true, the cursor will not move forward
if it is forced to move forward, it will instead RAISE AN ERROR
"""
def compile(self, thing, matchOnly = False):
# TODO: OPEN TAGS # TODO: OPEN TAGS
if isinstance(thing, Element): if isinstance(thing, Element):
self.open(thing) ret = False
for e in thing.grammar: if self.compile(thing.grammar[0], True):
self.compile(e) self.open(thing)
self.close(thing) for e in thing.grammar:
ret = self.compile(e)
self.close(thing)
return ret
else:
return ret
elif callable(thing): elif callable(thing):
grammar = thing() grammar = thing()
self.compile(grammar) return self.compile(grammar, matchOnly)
else: else:
grammar = thing grammar = thing
grammarType = type(grammar) grammarType = type(grammar)
if grammarType == list: if grammarType == list:
return self.ZeroOrMany(grammar) return self.ZeroOrMany(grammar, matchOnly)
elif grammarType == dict: elif grammarType == dict:
return self.MatchDict(grammar) return self.MatchDict(grammar, matchOnly)
elif grammarType == tuple: elif grammarType == tuple:
return self.ZeroOrOne(grammar) return self.ZeroOrOne(grammar, matchOnly)
elif grammarType == Atom: elif grammarType == Atom:
return self.Atom(grammar) return self.MatchAtom(grammar, matchOnly)
else: else:
raise Exception("Should not have reached here") raise Exception("Should not have reached here")

View File

@ -27,6 +27,9 @@ class Element:
self.name = name self.name = name
self.grammar = grammar self.grammar = grammar
def __repr__(self):
return self.name
CLASSVARDEC = Element('classVarDec', [ CLASSVARDEC = Element('classVarDec', [
# static|field type (, name)* ; # static|field type (, name)* ;
Atom.STATIC | Atom.FIELD, Atom.STATIC | Atom.FIELD,
@ -56,36 +59,41 @@ EXPRESSION = Element('expression', [TERM, [OP, TERM]])
EXPRESSIONLIST = Element('expressionList', [(EXPRESSION, [Atom.COMMA, EXPRESSION])]) EXPRESSIONLIST = Element('expressionList', [(EXPRESSION, [Atom.COMMA, EXPRESSION])])
DO_STATEMENT = Element('doStatement', [{ DO_STATEMENT = Element('doStatement', [{
(Atom.IDENTIFIER, Atom.PARAN_OPEN): [ (Atom.IDENTIFIER, Atom.PAREN_OPEN): [
EXPRESSIONLIST, EXPRESSIONLIST,
Atom.PARAN_CLOSE, Atom.PAREN_CLOSE,
], ],
(Atom.IDENTIFIER, Atom.DOT): [ (Atom.IDENTIFIER, Atom.DOT): [
Atom.IDENTIFIER, Atom.IDENTIFIER,
Atom.PARAN_OPEN, Atom.PAREN_OPEN,
EXPRESSIONLIST, EXPRESSIONLIST,
Atom.PARAN_CLOSE Atom.PAREN_CLOSE
] ]
}]) },Atom.SEMICOLON])
LET_STATEMENT = Element('whileStatement', [ LET_STATEMENT = Element('letStatement', [
Atom.IDENTIFIER, (Atom.SQUARE_OPEN, EXPRESSION, Atom.SQUARE_CLOSE)]) Atom.IDENTIFIER,
(Atom.SQUARE_OPEN, EXPRESSION, Atom.SQUARE_CLOSE),
Atom.EQ,
EXPRESSION,
Atom.SEMICOLON
])
IF_STATEMENT = Element('ifStatement', [ IF_STATEMENT = Element('ifStatement', [
Atom.PARAN_OPEN, Atom.PAREN_OPEN,
EXPRESSION, EXPRESSION,
Atom.PARAN_CLOSE, Atom.PAREN_CLOSE,
Atom.BRACE_OPEN, Atom.BRACE_OPEN,
lambda: STATEMENTS, lambda: STATEMENTS,
Atom.BRACE_CLOSE, Atom.BRACE_CLOSE,
# This is the tricky one # This is the tricky one
( Atom.ELSE, Atom.BRACE_OPEN, lambda:STATEMENT, Atom.BRACE_CLOSE) ( Atom.ELSE, Atom.BRACE_OPEN, lambda:STATEMENTS, Atom.BRACE_CLOSE)
]) ])
WHILE_STATEMENT = Element('whileStatement', [ WHILE_STATEMENT = Element('whileStatement', [
Atom.PARAN_OPEN, Atom.PAREN_OPEN,
EXPRESSION, EXPRESSION,
Atom.PARAN_CLOSE, Atom.PAREN_CLOSE,
Atom.BRACE_OPEN, Atom.BRACE_OPEN,
lambda: STATEMENTS, lambda: STATEMENTS,
Atom.BRACE_CLOSE, Atom.BRACE_CLOSE,
@ -95,11 +103,11 @@ RETURN_STATEMENT = Element('returnStatement', [(EXPRESSION), Atom.SEMICOLON])
# Just a constant, since this isn't a non-terminal # Just a constant, since this isn't a non-terminal
STATEMENT = { STATEMENT = {
(Atom.LET): LET_STATEMENT, (Atom.LET,): LET_STATEMENT,
(Atom.IF): IF_STATEMENT, (Atom.IF,): IF_STATEMENT,
(Atom.WHILE): WHILE_STATEMENT, (Atom.WHILE,): WHILE_STATEMENT,
(Atom.DO): DO_STATEMENT, (Atom.DO,): DO_STATEMENT,
(Atom.RETURN): RETURN_STATEMENT (Atom.RETURN,): RETURN_STATEMENT
} }
STATEMENTS = Element('statements', [[STATEMENT]]) STATEMENTS = Element('statements', [[STATEMENT]])
@ -130,9 +138,9 @@ SUBROUTINEDEC = Element('subroutineDec', [
Atom.CONSTRUCTOR | Atom.FUNCTION | Atom.METHOD, Atom.CONSTRUCTOR | Atom.FUNCTION | Atom.METHOD,
RETURN_TYPES, RETURN_TYPES,
Atom.IDENTIFIER, Atom.IDENTIFIER,
Atom.PARAN_OPEN, Atom.PAREN_OPEN,
PARAMETER_LIST, PARAMETER_LIST,
Atom.PARAN_CLOSE, Atom.PAREN_CLOSE,
SUBROUTINE_BODY, SUBROUTINE_BODY,
]) ])

View File

@ -1,7 +1,13 @@
from enum import IntFlag,auto from enum import IntFlag,auto
class PrintableFlag(IntFlag):
def __repr__(self):
if self.name:
return self.name
return super().__str__()
""" Super class for everything """ """ Super class for everything """
class Atom(IntFlag): class Atom(PrintableFlag):
# Keywords # Keywords
CLASS = auto() CLASS = auto()
METHOD = auto() METHOD = auto()
@ -27,8 +33,8 @@ class Atom(IntFlag):
# Symbols Start here # Symbols Start here
BRACE_OPEN = auto() BRACE_OPEN = auto()
BRACE_CLOSE = auto() BRACE_CLOSE = auto()
PARAN_OPEN = auto() PAREN_OPEN = auto()
PARAN_CLOSE = auto() PAREN_CLOSE = auto()
SQUARE_OPEN = auto() SQUARE_OPEN = auto()
SQUARE_CLOSE = auto() SQUARE_CLOSE = auto()
DOT = auto() DOT = auto()
@ -49,7 +55,7 @@ class Atom(IntFlag):
INTEGERCONSTANT = auto() INTEGERCONSTANT = auto()
STRINGCONSTANT = auto() STRINGCONSTANT = auto()
class Keyword(IntFlag): class Keyword(PrintableFlag):
CLASS = Atom.CLASS.value CLASS = Atom.CLASS.value
METHOD = Atom.METHOD.value METHOD = Atom.METHOD.value
FUNCTION = Atom.FUNCTION.value FUNCTION = Atom.FUNCTION.value
@ -72,12 +78,12 @@ class Keyword(IntFlag):
NULL = Atom.NULL.value NULL = Atom.NULL.value
THIS = Atom.THIS.value THIS = Atom.THIS.value
class Symbol(IntFlag): class Symbol(PrintableFlag):
# Symbols Start here # Symbols Start here
BRACE_OPEN = Atom.BRACE_OPEN.value BRACE_OPEN = Atom.BRACE_OPEN.value
BRACE_CLOSE = Atom.BRACE_CLOSE.value BRACE_CLOSE = Atom.BRACE_CLOSE.value
PARAN_OPEN = Atom.PARAN_OPEN.value PAREN_OPEN = Atom.PAREN_OPEN.value
PARAN_CLOSE = Atom.PARAN_CLOSE.value PAREN_CLOSE = Atom.PAREN_CLOSE.value
SQUARE_OPEN = Atom.SQUARE_OPEN.value SQUARE_OPEN = Atom.SQUARE_OPEN.value
SQUARE_CLOSE = Atom.SQUARE_CLOSE.value SQUARE_CLOSE = Atom.SQUARE_CLOSE.value
DOT = Atom.DOT.value DOT = Atom.DOT.value
@ -94,7 +100,7 @@ class Symbol(IntFlag):
NOT = Atom.NOT.value NOT = Atom.NOT.value
COMMA = Atom.COMMA.value COMMA = Atom.COMMA.value
class Token(IntFlag): class Token(PrintableFlag):
IDENTIFIER = Atom.IDENTIFIER.value IDENTIFIER = Atom.IDENTIFIER.value
INTEGERCONSTANT = Atom.INTEGERCONSTANT.value INTEGERCONSTANT = Atom.INTEGERCONSTANT.value
STRINGCONSTANT = Atom.STRINGCONSTANT.value STRINGCONSTANT = Atom.STRINGCONSTANT.value

View File

@ -7,8 +7,8 @@ class JackTokenizer:
SYMBOL_MAP = { SYMBOL_MAP = {
'{': Symbol.BRACE_OPEN , '{': Symbol.BRACE_OPEN ,
'}': Symbol.BRACE_CLOSE , '}': Symbol.BRACE_CLOSE ,
'(': Symbol.PARAN_OPEN , '(': Symbol.PAREN_OPEN ,
')': Symbol.PARAN_CLOSE , ')': Symbol.PAREN_CLOSE ,
'[': Symbol.SQUARE_OPEN , '[': Symbol.SQUARE_OPEN ,
']': Symbol.SQUARE_CLOSE , ']': Symbol.SQUARE_CLOSE ,
'.': Symbol.DOT , '.': Symbol.DOT ,

View File

@ -0,0 +1,197 @@
<class>
<keyword> class </keyword>
<identifier> Main </identifier>
<symbol> { </symbol>
<classVarDec>
<keyword> static </keyword>
<keyword> boolean </keyword>
<identifier> test </identifier>
<symbol> ; </symbol>
</classVarDec>
<subroutineDec>
<keyword> function </keyword>
<keyword> void </keyword>
<identifier> main </identifier>
<symbol> ( </symbol>
<symbol> ) </symbol>
<subroutineBody>
<symbol> { </symbol>
<varDec>
<keyword> var </keyword>
<identifier> SquareGame </identifier>
<identifier> game </identifier>
<symbol> ; </symbol>
</varDec>
<letStatement>
<keyword> let </keyword>
<identifier> game </identifier>
<symbol> = </symbol>
<term>
<identifier> game </identifier>
</term>
<expression>
</expression>
<symbol> ; </symbol>
</letStatement>
<statements>
<doStatement>
<keyword> do </keyword>
<identifier> game </identifier>
<symbol> . </symbol>
<identifier> run </identifier>
<symbol> ( </symbol>
<symbol> ) </symbol>
<symbol> ; </symbol>
</doStatement>
<doStatement>
<keyword> do </keyword>
<identifier> game </identifier>
<symbol> . </symbol>
<identifier> dispose </identifier>
<symbol> ( </symbol>
<symbol> ) </symbol>
<symbol> ; </symbol>
</doStatement>
<returnStatement>
<keyword> return </keyword>
<symbol> ; </symbol>
</returnStatement>
</statements>
<symbol> } </symbol>
</subroutineBody>
</subroutineDec>
<subroutineDec>
<keyword> function </keyword>
<keyword> void </keyword>
<identifier> test </identifier>
<symbol> ( </symbol>
<symbol> ) </symbol>
<subroutineBody>
<symbol> { </symbol>
<varDec>
<keyword> var </keyword>
<keyword> int </keyword>
<identifier> i </identifier>
<symbol> , </symbol>
<identifier> j </identifier>
<symbol> ; </symbol>
</varDec>
<varDec>
<keyword> var </keyword>
<identifier> String </identifier>
<identifier> s </identifier>
<symbol> ; </symbol>
</varDec>
<varDec>
<keyword> var </keyword>
<identifier> Array </identifier>
<identifier> a </identifier>
<symbol> ; </symbol>
</varDec>
<ifStatement>
<keyword> if </keyword>
<symbol> ( </symbol>
<term>
<identifier> i </identifier>
</term>
<expression>
</expression>
<symbol> ) </symbol>
<symbol> { </symbol>
<letStatement>
<keyword> let </keyword>
<identifier> s </identifier>
<symbol> = </symbol>
<term>
<identifier> i </identifier>
</term>
<expression>
</expression>
<symbol> ; </symbol>
</letStatement>
<statements>
<letStatement>
<keyword> let </keyword>
<identifier> s </identifier>
<symbol> = </symbol>
<term>
<identifier> j </identifier>
</term>
<expression>
</expression>
<symbol> ; </symbol>
</letStatement>
<letStatement>
<keyword> let </keyword>
<identifier> a </identifier>
<symbol> [ </symbol>
<term>
<identifier> i </identifier>
</term>
<expression>
</expression>
<symbol> ] </symbol>
<symbol> = </symbol>
<term>
<identifier> j </identifier>
</term>
<expression>
</expression>
<symbol> ; </symbol>
</letStatement>
</statements>
<symbol> } </symbol>
<keyword> else </keyword>
<symbol> { </symbol>
<letStatement>
<keyword> let </keyword>
<identifier> i </identifier>
<symbol> = </symbol>
<term>
<identifier> i </identifier>
</term>
<expression>
</expression>
<symbol> ; </symbol>
</letStatement>
<statements>
<letStatement>
<keyword> let </keyword>
<identifier> j </identifier>
<symbol> = </symbol>
<term>
<identifier> j </identifier>
</term>
<expression>
</expression>
<symbol> ; </symbol>
</letStatement>
<letStatement>
<keyword> let </keyword>
<identifier> i </identifier>
<symbol> = </symbol>
<term>
<identifier> i </identifier>
</term>
<expression>
<symbol> | </symbol>
<term>
<identifier> j </identifier>
</term>
</expression>
<symbol> ; </symbol>
</letStatement>
</statements>
<symbol> } </symbol>
</ifStatement>
<statements>
<returnStatement>
<keyword> return </keyword>
<symbol> ; </symbol>
</returnStatement>
</statements>
<symbol> } </symbol>
</subroutineBody>
</subroutineDec>
<symbol> } </symbol>
</class>