New compiler is running now and giving some results
This commit is contained in:
parent
48d72383fd
commit
703c5e467d
|
@ -1,8 +1,6 @@
|
||||||
from tokenizer import JackTokenizer
|
from engine import Engine
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
j = JackTokenizer(sys.argv[1], True)
|
Engine(sys.argv[1]).compileClass()
|
||||||
# c = CompilationEngine(sys.argv[1])
|
|
||||||
# c.CompileClass()
|
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
from tokenizer import JackTokenizer
|
from tokenizer import JackTokenizer
|
||||||
from keywords import *
|
from keywords import *
|
||||||
from grammar import CLASS
|
from grammar import CLASS,Element
|
||||||
|
|
||||||
"""
|
"""
|
||||||
New Compilation Engine
|
New Compilation Engine
|
||||||
|
@ -9,7 +9,7 @@ class Engine:
|
||||||
def __init__(self, input_file):
|
def __init__(self, input_file):
|
||||||
self.i = 0
|
self.i = 0
|
||||||
self.jt = JackTokenizer(input_file, False)
|
self.jt = JackTokenizer(input_file, False)
|
||||||
self.file = open(self.xml_file())
|
# self.file = open(self.xml_file(input_file))
|
||||||
|
|
||||||
def xml_file(self, input_file):
|
def xml_file(self, input_file):
|
||||||
return input_file + ".xml"
|
return input_file + ".xml"
|
||||||
|
@ -20,13 +20,14 @@ class Engine:
|
||||||
return Atom(token.value)
|
return Atom(token.value)
|
||||||
|
|
||||||
def compileClass(self):
|
def compileClass(self):
|
||||||
self.compile(grammar.CLASS)
|
self.compile(CLASS)
|
||||||
|
|
||||||
def advance(self):
|
def advance(self):
|
||||||
self.jt.advance()
|
self.jt.advance()
|
||||||
|
|
||||||
def ZeroOrMany(self, grammarList):
|
def ZeroOrMany(self, grammarList):
|
||||||
if compile(grammarList[0]):
|
# print("ZeroOrMany")
|
||||||
|
if self.compile(grammarList[0]):
|
||||||
# We now expect the whole of it
|
# We now expect the whole of it
|
||||||
for e in grammarList:
|
for e in grammarList:
|
||||||
self.compile(e)
|
self.compile(e)
|
||||||
|
@ -39,22 +40,31 @@ class Engine:
|
||||||
print(line)
|
print(line)
|
||||||
|
|
||||||
def MatchDict(self, dictionary):
|
def MatchDict(self, dictionary):
|
||||||
|
# print("MatchDict")
|
||||||
xml_rows_for_lookup_terms = []
|
xml_rows_for_lookup_terms = []
|
||||||
lookup_keys = ()
|
lookup_keys = ()
|
||||||
# How much to lookahead
|
# How much to lookahead
|
||||||
lookahead = len(list(dict.keys())[0])
|
lookahead = len(list(dictionary.keys())[0])
|
||||||
for _ in range(lookahead):
|
for _ in range(lookahead):
|
||||||
xml_rows_for_lookup_terms += [self.jt.xml_row()]
|
xml_rows_for_lookup_terms += [self.jt.xml_row()]
|
||||||
lookup_keys = lookup_keys + (self.atom(),)
|
lookup_keys = lookup_keys + (self.atom(),)
|
||||||
self.advance()
|
self.advance()
|
||||||
|
|
||||||
|
grammar = dict[lookup_keys]
|
||||||
|
|
||||||
|
# We must open this before we compile the remainder
|
||||||
|
if isinstance(grammar, Element):
|
||||||
|
self.open(grammar)
|
||||||
|
grammar = grammar.grammar
|
||||||
|
|
||||||
|
# Now we put the first X terms from the conditional
|
||||||
for line in xml_rows_for_lookup_terms:
|
for line in xml_rows_for_lookup_terms:
|
||||||
self.write(line)
|
self.write(line)
|
||||||
|
|
||||||
for e in dict[lookup_keys]:
|
return self.compile(grammar)
|
||||||
self.compile(e)
|
|
||||||
|
|
||||||
def ZeroOrOne(self, grammarTuple):
|
def ZeroOrOne(self, grammarTuple):
|
||||||
|
# print("ZeroOrOne")
|
||||||
if self.compile(grammarTuple[0]):
|
if self.compile(grammarTuple[0]):
|
||||||
for e in grammarTuple:
|
for e in grammarTuple:
|
||||||
self.compile(e)
|
self.compile(e)
|
||||||
|
@ -68,32 +78,39 @@ class Engine:
|
||||||
current = self.atom()
|
current = self.atom()
|
||||||
# We use in here to accomodate for bitmasks
|
# We use in here to accomodate for bitmasks
|
||||||
if current in expected:
|
if current in expected:
|
||||||
print(current)
|
print(self.jt.xml_row(), end="")
|
||||||
self.advance()
|
self.advance()
|
||||||
|
return True
|
||||||
else:
|
else:
|
||||||
raise Exception("Expected %s, got %s" % (expected, current))
|
return False
|
||||||
|
|
||||||
|
def open(self, el):
|
||||||
|
print("<%s>" % el.name)
|
||||||
|
|
||||||
|
def close(self, el):
|
||||||
|
print("</%s>" % el.name)
|
||||||
|
|
||||||
def compile(self, thing):
|
def compile(self, thing):
|
||||||
# TODO: OPEN TAGS
|
# TODO: OPEN TAGS
|
||||||
if isinstance(thing, Element):
|
if isinstance(thing, Element):
|
||||||
print("open %s" % thing.name)
|
self.open(thing)
|
||||||
grammar = thing.grammar
|
for e in thing.grammar:
|
||||||
|
self.compile(e)
|
||||||
|
self.close(thing)
|
||||||
elif callable(thing):
|
elif callable(thing):
|
||||||
grammar = thing()
|
grammar = thing()
|
||||||
|
self.compile(grammar)
|
||||||
else:
|
else:
|
||||||
grammar = thing
|
grammar = thing
|
||||||
grammarType = type(grammar)
|
grammarType = type(grammar)
|
||||||
|
|
||||||
elif grammarType == list:
|
if grammarType == list:
|
||||||
return self.ZeroOrMany(thing)
|
return self.ZeroOrMany(grammar)
|
||||||
elif grammarType == dict:
|
elif grammarType == dict:
|
||||||
return self.MatchDict(thing)
|
return self.MatchDict(grammar)
|
||||||
elif grammarType == tuple:
|
elif grammarType == tuple:
|
||||||
return self.ZeroOrOne(thing)
|
return self.ZeroOrOne(grammar)
|
||||||
elif grammarType == Atom:
|
elif grammarType == Atom:
|
||||||
return self.Atom(thing)
|
return self.Atom(grammar)
|
||||||
elif callable(thing):
|
else:
|
||||||
return self.compile(thing)
|
raise Exception("Should not have reached here")
|
||||||
|
|
||||||
if isinstance(thing, Element):
|
|
||||||
print("close %s" % thing.name)
|
|
||||||
|
|
|
@ -3,7 +3,10 @@ from keywords import Atom
|
||||||
"""
|
"""
|
||||||
The grammar is defined by the following constructs:
|
The grammar is defined by the following constructs:
|
||||||
|
|
||||||
The top level object is called GRAMMAR, which is the grammar for a class. It is a list object.
|
The top level object is called GRAMMAR, which is the grammar for a class.
|
||||||
|
It is a instance of the Element class
|
||||||
|
The element class contains a grammar element, which is always defined as a list
|
||||||
|
for an element class.
|
||||||
|
|
||||||
Inside this list, each element can be any of the following:
|
Inside this list, each element can be any of the following:
|
||||||
|
|
||||||
|
@ -18,37 +21,41 @@ a Python structure.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
class Element:
|
class Element:
|
||||||
|
# Usually I avoid inverted boolean variable names, but this is much cleaner
|
||||||
def __init__(self, name, grammar):
|
def __init__(self, name, grammar):
|
||||||
|
assert(type(grammar)==list)
|
||||||
self.name = name
|
self.name = name
|
||||||
self.grammar = grammar
|
self.grammar = grammar
|
||||||
|
|
||||||
TYPES = Element('type', Atom.INT | Atom.CHAR | Atom.BOOLEAN | Atom.IDENTIFIER)
|
|
||||||
|
|
||||||
CLASSVARDEC = Element('classVarDec', [
|
CLASSVARDEC = Element('classVarDec', [
|
||||||
# static|field type (, name)* ;
|
# static|field type (, name)* ;
|
||||||
Atom.STATIC | Atom.FIELD,
|
Atom.STATIC | Atom.FIELD,
|
||||||
TYPES,
|
Atom.INT | Atom.CHAR | Atom.BOOLEAN | Atom.IDENTIFIER,
|
||||||
|
Atom.IDENTIFIER,
|
||||||
[Atom.COMMA, Atom.IDENTIFIER],
|
[Atom.COMMA, Atom.IDENTIFIER],
|
||||||
Atom.SEMICOLON
|
Atom.SEMICOLON
|
||||||
])
|
])
|
||||||
|
|
||||||
VARDEC = Element('varDec', [Atom.VAR, TYPES, Atom.IDENTIFIER,
|
VARDEC = Element('varDec', [Atom.VAR, Atom.INT | Atom.CHAR | Atom.BOOLEAN | Atom.IDENTIFIER, Atom.IDENTIFIER,
|
||||||
[Atom.COMMA, Atom.IDENTIFIER],
|
[Atom.COMMA, Atom.IDENTIFIER],
|
||||||
Atom.SEMICOLON
|
Atom.SEMICOLON
|
||||||
])
|
])
|
||||||
UNARY_OP = Element('unaryOp', Atom.NOT | Atom.MINUS)
|
|
||||||
|
|
||||||
CONSTANT = Element('KeywordConstant', Atom.TRUE | Atom.FALSE|Atom.NULL|Atom.THIS)
|
# Since this is not a non-terminal, we can just write it as a constant
|
||||||
|
OP = Atom.PLUS | Atom.MINUS | Atom.MUL | Atom.DIV | Atom.AND | Atom.OR | Atom.GT | Atom.LT | Atom.EQ
|
||||||
|
UNARY_OP = Atom.NOT | Atom.MINUS
|
||||||
|
CONSTANT = Atom.TRUE | Atom.FALSE|Atom.NULL|Atom.THIS
|
||||||
|
""" Pseudo-element to help define subroutine declarations """
|
||||||
|
RETURN_TYPES= Atom.INT | Atom.CHAR|Atom.BOOLEAN|Atom.IDENTIFIER|Atom.VOID
|
||||||
|
|
||||||
TERM = Element('term', Atom.INTEGERCONSTANT | Atom.STRINGCONSTANT | Atom.TRUE | Atom.FALSE | Atom.IDENTIFIER)
|
# TODO: This is missing a lot of stuff
|
||||||
|
TERM = Element('term', [Atom.INTEGERCONSTANT | Atom.STRINGCONSTANT | Atom.TRUE | Atom.FALSE | Atom.IDENTIFIER])
|
||||||
OP = Element('op', Atom.PLUS | Atom.MINUS | Atom.MUL | Atom.DIV | Atom.AND | Atom.OR | Atom.GT | Atom.LT | Atom.EQ)
|
|
||||||
|
|
||||||
EXPRESSION = Element('expression', [TERM, [OP, TERM]])
|
EXPRESSION = Element('expression', [TERM, [OP, TERM]])
|
||||||
|
|
||||||
EXPRESSIONLIST = Element('expressionList', (EXPRESSION, [Atom.COMMA, EXPRESSION]))
|
EXPRESSIONLIST = Element('expressionList', [(EXPRESSION, [Atom.COMMA, EXPRESSION])])
|
||||||
|
|
||||||
SUBROUTINE_CALL = Element('subroutineCall', {
|
DO_STATEMENT = Element('doStatement', [{
|
||||||
(Atom.IDENTIFIER, Atom.PARAN_OPEN): [
|
(Atom.IDENTIFIER, Atom.PARAN_OPEN): [
|
||||||
EXPRESSIONLIST,
|
EXPRESSIONLIST,
|
||||||
Atom.PARAN_CLOSE,
|
Atom.PARAN_CLOSE,
|
||||||
|
@ -59,54 +66,63 @@ SUBROUTINE_CALL = Element('subroutineCall', {
|
||||||
EXPRESSIONLIST,
|
EXPRESSIONLIST,
|
||||||
Atom.PARAN_CLOSE
|
Atom.PARAN_CLOSE
|
||||||
]
|
]
|
||||||
})
|
}])
|
||||||
|
|
||||||
STATEMENT = Element('statement', {
|
LET_STATEMENT = Element('whileStatement', [
|
||||||
(Atom.LET): [Atom.IDENTIFIER, (Atom.SQUARE_OPEN, EXPRESSION, Atom.SQUARE_CLOSE)],
|
Atom.IDENTIFIER, (Atom.SQUARE_OPEN, EXPRESSION, Atom.SQUARE_CLOSE)])
|
||||||
(Atom.IF): [
|
|
||||||
Atom.PARAN_OPEN,
|
|
||||||
EXPRESSION,
|
|
||||||
Atom.PARAN_CLOSE,
|
|
||||||
Atom.BRACE_OPEN,
|
|
||||||
lambda: STATEMENTS,
|
|
||||||
Atom.BRACE_CLOSE,
|
|
||||||
# This is the tricky one
|
|
||||||
( Atom.ELSE, Atom.BRACE_OPEN, lambda:STATEMENT, Atom.BRACE_CLOSE)
|
|
||||||
],
|
|
||||||
(Atom.WHILE): [
|
|
||||||
Atom.PARAN_OPEN,
|
|
||||||
EXPRESSION,
|
|
||||||
Atom.PARAN_CLOSE,
|
|
||||||
Atom.BRACE_OPEN,
|
|
||||||
lambda: STATEMENTS,
|
|
||||||
Atom.BRACE_CLOSE,
|
|
||||||
],
|
|
||||||
(Atom.DO): SUBROUTINE_CALL,
|
|
||||||
(Atom.RETURN): [(EXPRESSION), Atom.SEMICOLON]
|
|
||||||
})
|
|
||||||
|
|
||||||
STATEMENTS = Element('statements', [STATEMENT])
|
IF_STATEMENT = Element('ifStatement', [
|
||||||
|
Atom.PARAN_OPEN,
|
||||||
|
EXPRESSION,
|
||||||
|
Atom.PARAN_CLOSE,
|
||||||
|
Atom.BRACE_OPEN,
|
||||||
|
lambda: STATEMENTS,
|
||||||
|
Atom.BRACE_CLOSE,
|
||||||
|
# This is the tricky one
|
||||||
|
( Atom.ELSE, Atom.BRACE_OPEN, lambda:STATEMENT, Atom.BRACE_CLOSE)
|
||||||
|
])
|
||||||
|
|
||||||
|
WHILE_STATEMENT = Element('whileStatement', [
|
||||||
|
Atom.PARAN_OPEN,
|
||||||
|
EXPRESSION,
|
||||||
|
Atom.PARAN_CLOSE,
|
||||||
|
Atom.BRACE_OPEN,
|
||||||
|
lambda: STATEMENTS,
|
||||||
|
Atom.BRACE_CLOSE,
|
||||||
|
])
|
||||||
|
|
||||||
|
RETURN_STATEMENT = Element('returnStatement', [(EXPRESSION), Atom.SEMICOLON])
|
||||||
|
|
||||||
|
# Just a constant, since this isn't a non-terminal
|
||||||
|
STATEMENT = {
|
||||||
|
(Atom.LET): LET_STATEMENT,
|
||||||
|
(Atom.IF): IF_STATEMENT,
|
||||||
|
(Atom.WHILE): WHILE_STATEMENT,
|
||||||
|
(Atom.DO): DO_STATEMENT,
|
||||||
|
(Atom.RETURN): RETURN_STATEMENT
|
||||||
|
}
|
||||||
|
|
||||||
|
STATEMENTS = Element('statements', [[STATEMENT]])
|
||||||
|
|
||||||
SUBROUTINE_BODY = Element('subroutineBody', [
|
SUBROUTINE_BODY = Element('subroutineBody', [
|
||||||
# One or more variable declarations
|
# One or more variable declarations
|
||||||
# `var type varName (, varName)* ;`
|
# `var type varName (, varName)* ;`
|
||||||
[VARDEC],
|
Atom.BRACE_OPEN,
|
||||||
STATEMENTS
|
[VARDEC],
|
||||||
|
STATEMENTS,
|
||||||
|
Atom.BRACE_CLOSE
|
||||||
])
|
])
|
||||||
|
|
||||||
""" Pseudo-element to help define subroutine declarations """
|
|
||||||
RETURN_TYPES= Atom.INT | Atom.CHAR|Atom.BOOLEAN|Atom.IDENTIFIER|Atom.VOID
|
|
||||||
|
|
||||||
# Parameter List =
|
# Parameter List =
|
||||||
# (
|
# (
|
||||||
# (type varName) (, type varName)*
|
# (type varName) (, type varName)*
|
||||||
# )?
|
# )?
|
||||||
# we use tuples for zero OR one of a sequence
|
# we use tuples for zero OR one of a sequence
|
||||||
PARAMETER_LIST = Element('parameterList', (
|
PARAMETER_LIST = Element('parameterList', [(
|
||||||
TYPES,
|
Atom.INT | Atom.CHAR | Atom.BOOLEAN | Atom.IDENTIFIER,
|
||||||
Atom.IDENTIFIER,
|
Atom.IDENTIFIER,
|
||||||
[Atom.COMMA, TYPES, Atom.IDENTIFIER]
|
[Atom.COMMA, Atom.INT | Atom.CHAR|Atom.BOOLEAN|Atom.IDENTIFIER, Atom.IDENTIFIER]
|
||||||
))
|
)])
|
||||||
|
|
||||||
SUBROUTINEDEC = Element('subroutineDec', [
|
SUBROUTINEDEC = Element('subroutineDec', [
|
||||||
# (constructor | function | method) (void | type) subRoutineName '(' parameterList ')'
|
# (constructor | function | method) (void | type) subRoutineName '(' parameterList ')'
|
||||||
|
@ -117,10 +133,7 @@ SUBROUTINEDEC = Element('subroutineDec', [
|
||||||
Atom.PARAN_OPEN,
|
Atom.PARAN_OPEN,
|
||||||
PARAMETER_LIST,
|
PARAMETER_LIST,
|
||||||
Atom.PARAN_CLOSE,
|
Atom.PARAN_CLOSE,
|
||||||
# Subroutine Body
|
|
||||||
Atom.BRACE_OPEN,
|
|
||||||
SUBROUTINE_BODY,
|
SUBROUTINE_BODY,
|
||||||
Atom.BRACE_CLOSE,
|
|
||||||
])
|
])
|
||||||
|
|
||||||
CLASS = Element('class', [
|
CLASS = Element('class', [
|
||||||
|
|
|
@ -1,33 +1,29 @@
|
||||||
import re
|
import re
|
||||||
from keywords import Keyword
|
from keywords import *
|
||||||
from html import escape
|
from html import escape
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
# Superclass in some sense
|
# Superclass in some sense
|
||||||
class Token(Enum):
|
|
||||||
KEYWORD = 1
|
|
||||||
SYMBOL = 2
|
|
||||||
|
|
||||||
class JackTokenizer:
|
class JackTokenizer:
|
||||||
SYMBOL_MAP = {
|
SYMBOL_MAP = {
|
||||||
'{': Keyword.BRACE_OPEN ,
|
'{': Symbol.BRACE_OPEN ,
|
||||||
'}': Keyword.BRACE_CLOSE ,
|
'}': Symbol.BRACE_CLOSE ,
|
||||||
'(': Keyword.PARAN_OPEN ,
|
'(': Symbol.PARAN_OPEN ,
|
||||||
')': Keyword.PARAN_CLOSE ,
|
')': Symbol.PARAN_CLOSE ,
|
||||||
'[': Keyword.SQUARE_OPEN ,
|
'[': Symbol.SQUARE_OPEN ,
|
||||||
']': Keyword.SQUARE_CLOSE ,
|
']': Symbol.SQUARE_CLOSE ,
|
||||||
'.': Keyword.DOT ,
|
'.': Symbol.DOT ,
|
||||||
';': Keyword.SEMICOLON ,
|
';': Symbol.SEMICOLON ,
|
||||||
'+': Keyword.PLUS ,
|
'+': Symbol.PLUS ,
|
||||||
'-': Keyword.MINUS ,
|
'-': Symbol.MINUS ,
|
||||||
'*': Keyword.MUL ,
|
'*': Symbol.MUL ,
|
||||||
'/': Keyword.DIV ,
|
'/': Symbol.DIV ,
|
||||||
'&': Keyword.AND ,
|
'&': Symbol.AND ,
|
||||||
'|': Keyword.OR ,
|
'|': Symbol.OR ,
|
||||||
'<': Keyword.LT ,
|
'<': Symbol.LT ,
|
||||||
'>': Keyword.GT ,
|
'>': Symbol.GT ,
|
||||||
'=': Keyword.EQ ,
|
'=': Symbol.EQ ,
|
||||||
'~': Keyword.NOT ,
|
'~': Symbol.NOT ,
|
||||||
',': Keyword.COMMA,
|
',': Symbol.COMMA,
|
||||||
}
|
}
|
||||||
|
|
||||||
KEYWORD_MAP = {
|
KEYWORD_MAP = {
|
||||||
|
@ -61,16 +57,16 @@ class JackTokenizer:
|
||||||
elif re.compile("(\(|\)|\[|\]|,|\+|-|;|<|>|=|~|&|{|}|\*|\/|\||\.)").match(t):
|
elif re.compile("(\(|\)|\[|\]|,|\+|-|;|<|>|=|~|&|{|}|\*|\/|\||\.)").match(t):
|
||||||
return JackTokenizer.SYMBOL_MAP[t]
|
return JackTokenizer.SYMBOL_MAP[t]
|
||||||
elif re.compile("\d+").match(t):
|
elif re.compile("\d+").match(t):
|
||||||
return Keyword.INTEGERCONSTANT
|
return Token.INTEGERCONSTANT
|
||||||
elif re.compile("\".*\"").match(t):
|
elif re.compile("\".*\"").match(t):
|
||||||
return Keyword.STRINGCONSTANT
|
return Token.STRINGCONSTANT
|
||||||
else:
|
else:
|
||||||
# TODO: Put an assert to ensure valid identifier
|
# TODO: Put an assert to ensure valid identifier
|
||||||
return Keyword.IDENTIFIER
|
return Token.IDENTIFIER
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def printable_token(self):
|
def printable_token(self):
|
||||||
if self.tokenType() == Keyword.STRINGCONSTANT:
|
if self.tokenType() == Token.STRINGCONSTANT:
|
||||||
return self.current_token()[1:-1]
|
return self.current_token()[1:-1]
|
||||||
else:
|
else:
|
||||||
return escape(self.current_token(), True)
|
return escape(self.current_token(), True)
|
||||||
|
@ -95,7 +91,7 @@ class JackTokenizer:
|
||||||
|
|
||||||
""" Returns the integer value of the current token """
|
""" Returns the integer value of the current token """
|
||||||
def intVal(self):
|
def intVal(self):
|
||||||
self.assert_type(Keyword.INTEGERCONSTANT)
|
self.assert_type(Token.INTEGERCONSTANT)
|
||||||
return int(self.token)
|
return int(self.token)
|
||||||
|
|
||||||
""" Returns a list of tokens for that line """
|
""" Returns a list of tokens for that line """
|
||||||
|
|
Loading…
Reference in New Issue