New compiler is running now and giving some results
This commit is contained in:
parent
48d72383fd
commit
703c5e467d
|
@ -1,8 +1,6 @@
|
|||
from tokenizer import JackTokenizer
|
||||
from engine import Engine
|
||||
import sys
|
||||
|
||||
if __name__ == '__main__':
|
||||
j = JackTokenizer(sys.argv[1], True)
|
||||
# c = CompilationEngine(sys.argv[1])
|
||||
# c.CompileClass()
|
||||
Engine(sys.argv[1]).compileClass()
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
from tokenizer import JackTokenizer
|
||||
from keywords import *
|
||||
from grammar import CLASS
|
||||
from grammar import CLASS,Element
|
||||
|
||||
"""
|
||||
New Compilation Engine
|
||||
|
@ -9,7 +9,7 @@ class Engine:
|
|||
def __init__(self, input_file):
|
||||
self.i = 0
|
||||
self.jt = JackTokenizer(input_file, False)
|
||||
self.file = open(self.xml_file())
|
||||
# self.file = open(self.xml_file(input_file))
|
||||
|
||||
def xml_file(self, input_file):
|
||||
return input_file + ".xml"
|
||||
|
@ -20,13 +20,14 @@ class Engine:
|
|||
return Atom(token.value)
|
||||
|
||||
def compileClass(self):
|
||||
self.compile(grammar.CLASS)
|
||||
self.compile(CLASS)
|
||||
|
||||
def advance(self):
|
||||
self.jt.advance()
|
||||
|
||||
def ZeroOrMany(self, grammarList):
|
||||
if compile(grammarList[0]):
|
||||
# print("ZeroOrMany")
|
||||
if self.compile(grammarList[0]):
|
||||
# We now expect the whole of it
|
||||
for e in grammarList:
|
||||
self.compile(e)
|
||||
|
@ -39,22 +40,31 @@ class Engine:
|
|||
print(line)
|
||||
|
||||
def MatchDict(self, dictionary):
|
||||
# print("MatchDict")
|
||||
xml_rows_for_lookup_terms = []
|
||||
lookup_keys = ()
|
||||
# How much to lookahead
|
||||
lookahead = len(list(dict.keys())[0])
|
||||
lookahead = len(list(dictionary.keys())[0])
|
||||
for _ in range(lookahead):
|
||||
xml_rows_for_lookup_terms += [self.jt.xml_row()]
|
||||
lookup_keys = lookup_keys + (self.atom(),)
|
||||
self.advance()
|
||||
|
||||
grammar = dict[lookup_keys]
|
||||
|
||||
# We must open this before we compile the remainder
|
||||
if isinstance(grammar, Element):
|
||||
self.open(grammar)
|
||||
grammar = grammar.grammar
|
||||
|
||||
# Now we put the first X terms from the conditional
|
||||
for line in xml_rows_for_lookup_terms:
|
||||
self.write(line)
|
||||
|
||||
for e in dict[lookup_keys]:
|
||||
self.compile(e)
|
||||
return self.compile(grammar)
|
||||
|
||||
def ZeroOrOne(self, grammarTuple):
|
||||
# print("ZeroOrOne")
|
||||
if self.compile(grammarTuple[0]):
|
||||
for e in grammarTuple:
|
||||
self.compile(e)
|
||||
|
@ -68,32 +78,39 @@ class Engine:
|
|||
current = self.atom()
|
||||
# We use in here to accomodate for bitmasks
|
||||
if current in expected:
|
||||
print(current)
|
||||
print(self.jt.xml_row(), end="")
|
||||
self.advance()
|
||||
return True
|
||||
else:
|
||||
raise Exception("Expected %s, got %s" % (expected, current))
|
||||
return False
|
||||
|
||||
def open(self, el):
|
||||
print("<%s>" % el.name)
|
||||
|
||||
def close(self, el):
|
||||
print("</%s>" % el.name)
|
||||
|
||||
def compile(self, thing):
|
||||
# TODO: OPEN TAGS
|
||||
if isinstance(thing, Element):
|
||||
print("open %s" % thing.name)
|
||||
grammar = thing.grammar
|
||||
self.open(thing)
|
||||
for e in thing.grammar:
|
||||
self.compile(e)
|
||||
self.close(thing)
|
||||
elif callable(thing):
|
||||
grammar = thing()
|
||||
self.compile(grammar)
|
||||
else:
|
||||
grammar = thing
|
||||
grammarType = type(grammar)
|
||||
|
||||
elif grammarType == list:
|
||||
return self.ZeroOrMany(thing)
|
||||
if grammarType == list:
|
||||
return self.ZeroOrMany(grammar)
|
||||
elif grammarType == dict:
|
||||
return self.MatchDict(thing)
|
||||
return self.MatchDict(grammar)
|
||||
elif grammarType == tuple:
|
||||
return self.ZeroOrOne(thing)
|
||||
return self.ZeroOrOne(grammar)
|
||||
elif grammarType == Atom:
|
||||
return self.Atom(thing)
|
||||
elif callable(thing):
|
||||
return self.compile(thing)
|
||||
|
||||
if isinstance(thing, Element):
|
||||
print("close %s" % thing.name)
|
||||
return self.Atom(grammar)
|
||||
else:
|
||||
raise Exception("Should not have reached here")
|
||||
|
|
|
@ -3,7 +3,10 @@ from keywords import Atom
|
|||
"""
|
||||
The grammar is defined by the following constructs:
|
||||
|
||||
The top level object is called GRAMMAR, which is the grammar for a class. It is a list object.
|
||||
The top level object is called GRAMMAR, which is the grammar for a class.
|
||||
It is a instance of the Element class
|
||||
The element class contains a grammar element, which is always defined as a list
|
||||
for an element class.
|
||||
|
||||
Inside this list, each element can be any of the following:
|
||||
|
||||
|
@ -18,37 +21,41 @@ a Python structure.
|
|||
|
||||
"""
|
||||
class Element:
|
||||
# Usually I avoid inverted boolean variable names, but this is much cleaner
|
||||
def __init__(self, name, grammar):
|
||||
assert(type(grammar)==list)
|
||||
self.name = name
|
||||
self.grammar = grammar
|
||||
|
||||
TYPES = Element('type', Atom.INT | Atom.CHAR | Atom.BOOLEAN | Atom.IDENTIFIER)
|
||||
|
||||
CLASSVARDEC = Element('classVarDec', [
|
||||
# static|field type (, name)* ;
|
||||
Atom.STATIC | Atom.FIELD,
|
||||
TYPES,
|
||||
Atom.INT | Atom.CHAR | Atom.BOOLEAN | Atom.IDENTIFIER,
|
||||
Atom.IDENTIFIER,
|
||||
[Atom.COMMA, Atom.IDENTIFIER],
|
||||
Atom.SEMICOLON
|
||||
])
|
||||
|
||||
VARDEC = Element('varDec', [Atom.VAR, TYPES, Atom.IDENTIFIER,
|
||||
VARDEC = Element('varDec', [Atom.VAR, Atom.INT | Atom.CHAR | Atom.BOOLEAN | Atom.IDENTIFIER, Atom.IDENTIFIER,
|
||||
[Atom.COMMA, Atom.IDENTIFIER],
|
||||
Atom.SEMICOLON
|
||||
])
|
||||
UNARY_OP = Element('unaryOp', Atom.NOT | Atom.MINUS)
|
||||
|
||||
CONSTANT = Element('KeywordConstant', Atom.TRUE | Atom.FALSE|Atom.NULL|Atom.THIS)
|
||||
# Since this is not a non-terminal, we can just write it as a constant
|
||||
OP = Atom.PLUS | Atom.MINUS | Atom.MUL | Atom.DIV | Atom.AND | Atom.OR | Atom.GT | Atom.LT | Atom.EQ
|
||||
UNARY_OP = Atom.NOT | Atom.MINUS
|
||||
CONSTANT = Atom.TRUE | Atom.FALSE|Atom.NULL|Atom.THIS
|
||||
""" Pseudo-element to help define subroutine declarations """
|
||||
RETURN_TYPES= Atom.INT | Atom.CHAR|Atom.BOOLEAN|Atom.IDENTIFIER|Atom.VOID
|
||||
|
||||
TERM = Element('term', Atom.INTEGERCONSTANT | Atom.STRINGCONSTANT | Atom.TRUE | Atom.FALSE | Atom.IDENTIFIER)
|
||||
|
||||
OP = Element('op', Atom.PLUS | Atom.MINUS | Atom.MUL | Atom.DIV | Atom.AND | Atom.OR | Atom.GT | Atom.LT | Atom.EQ)
|
||||
# TODO: This is missing a lot of stuff
|
||||
TERM = Element('term', [Atom.INTEGERCONSTANT | Atom.STRINGCONSTANT | Atom.TRUE | Atom.FALSE | Atom.IDENTIFIER])
|
||||
|
||||
EXPRESSION = Element('expression', [TERM, [OP, TERM]])
|
||||
|
||||
EXPRESSIONLIST = Element('expressionList', (EXPRESSION, [Atom.COMMA, EXPRESSION]))
|
||||
EXPRESSIONLIST = Element('expressionList', [(EXPRESSION, [Atom.COMMA, EXPRESSION])])
|
||||
|
||||
SUBROUTINE_CALL = Element('subroutineCall', {
|
||||
DO_STATEMENT = Element('doStatement', [{
|
||||
(Atom.IDENTIFIER, Atom.PARAN_OPEN): [
|
||||
EXPRESSIONLIST,
|
||||
Atom.PARAN_CLOSE,
|
||||
|
@ -59,11 +66,12 @@ SUBROUTINE_CALL = Element('subroutineCall', {
|
|||
EXPRESSIONLIST,
|
||||
Atom.PARAN_CLOSE
|
||||
]
|
||||
})
|
||||
}])
|
||||
|
||||
STATEMENT = Element('statement', {
|
||||
(Atom.LET): [Atom.IDENTIFIER, (Atom.SQUARE_OPEN, EXPRESSION, Atom.SQUARE_CLOSE)],
|
||||
(Atom.IF): [
|
||||
LET_STATEMENT = Element('whileStatement', [
|
||||
Atom.IDENTIFIER, (Atom.SQUARE_OPEN, EXPRESSION, Atom.SQUARE_CLOSE)])
|
||||
|
||||
IF_STATEMENT = Element('ifStatement', [
|
||||
Atom.PARAN_OPEN,
|
||||
EXPRESSION,
|
||||
Atom.PARAN_CLOSE,
|
||||
|
@ -72,41 +80,49 @@ STATEMENT = Element('statement', {
|
|||
Atom.BRACE_CLOSE,
|
||||
# This is the tricky one
|
||||
( Atom.ELSE, Atom.BRACE_OPEN, lambda:STATEMENT, Atom.BRACE_CLOSE)
|
||||
],
|
||||
(Atom.WHILE): [
|
||||
])
|
||||
|
||||
WHILE_STATEMENT = Element('whileStatement', [
|
||||
Atom.PARAN_OPEN,
|
||||
EXPRESSION,
|
||||
Atom.PARAN_CLOSE,
|
||||
Atom.BRACE_OPEN,
|
||||
lambda: STATEMENTS,
|
||||
Atom.BRACE_CLOSE,
|
||||
],
|
||||
(Atom.DO): SUBROUTINE_CALL,
|
||||
(Atom.RETURN): [(EXPRESSION), Atom.SEMICOLON]
|
||||
})
|
||||
])
|
||||
|
||||
STATEMENTS = Element('statements', [STATEMENT])
|
||||
RETURN_STATEMENT = Element('returnStatement', [(EXPRESSION), Atom.SEMICOLON])
|
||||
|
||||
# Just a constant, since this isn't a non-terminal
|
||||
STATEMENT = {
|
||||
(Atom.LET): LET_STATEMENT,
|
||||
(Atom.IF): IF_STATEMENT,
|
||||
(Atom.WHILE): WHILE_STATEMENT,
|
||||
(Atom.DO): DO_STATEMENT,
|
||||
(Atom.RETURN): RETURN_STATEMENT
|
||||
}
|
||||
|
||||
STATEMENTS = Element('statements', [[STATEMENT]])
|
||||
|
||||
SUBROUTINE_BODY = Element('subroutineBody', [
|
||||
# One or more variable declarations
|
||||
# `var type varName (, varName)* ;`
|
||||
Atom.BRACE_OPEN,
|
||||
[VARDEC],
|
||||
STATEMENTS
|
||||
STATEMENTS,
|
||||
Atom.BRACE_CLOSE
|
||||
])
|
||||
|
||||
""" Pseudo-element to help define subroutine declarations """
|
||||
RETURN_TYPES= Atom.INT | Atom.CHAR|Atom.BOOLEAN|Atom.IDENTIFIER|Atom.VOID
|
||||
|
||||
# Parameter List =
|
||||
# (
|
||||
# (type varName) (, type varName)*
|
||||
# )?
|
||||
# we use tuples for zero OR one of a sequence
|
||||
PARAMETER_LIST = Element('parameterList', (
|
||||
TYPES,
|
||||
PARAMETER_LIST = Element('parameterList', [(
|
||||
Atom.INT | Atom.CHAR | Atom.BOOLEAN | Atom.IDENTIFIER,
|
||||
Atom.IDENTIFIER,
|
||||
[Atom.COMMA, TYPES, Atom.IDENTIFIER]
|
||||
))
|
||||
[Atom.COMMA, Atom.INT | Atom.CHAR|Atom.BOOLEAN|Atom.IDENTIFIER, Atom.IDENTIFIER]
|
||||
)])
|
||||
|
||||
SUBROUTINEDEC = Element('subroutineDec', [
|
||||
# (constructor | function | method) (void | type) subRoutineName '(' parameterList ')'
|
||||
|
@ -117,10 +133,7 @@ SUBROUTINEDEC = Element('subroutineDec', [
|
|||
Atom.PARAN_OPEN,
|
||||
PARAMETER_LIST,
|
||||
Atom.PARAN_CLOSE,
|
||||
# Subroutine Body
|
||||
Atom.BRACE_OPEN,
|
||||
SUBROUTINE_BODY,
|
||||
Atom.BRACE_CLOSE,
|
||||
])
|
||||
|
||||
CLASS = Element('class', [
|
||||
|
|
|
@ -1,33 +1,29 @@
|
|||
import re
|
||||
from keywords import Keyword
|
||||
from keywords import *
|
||||
from html import escape
|
||||
from enum import Enum
|
||||
# Superclass in some sense
|
||||
class Token(Enum):
|
||||
KEYWORD = 1
|
||||
SYMBOL = 2
|
||||
|
||||
class JackTokenizer:
|
||||
SYMBOL_MAP = {
|
||||
'{': Keyword.BRACE_OPEN ,
|
||||
'}': Keyword.BRACE_CLOSE ,
|
||||
'(': Keyword.PARAN_OPEN ,
|
||||
')': Keyword.PARAN_CLOSE ,
|
||||
'[': Keyword.SQUARE_OPEN ,
|
||||
']': Keyword.SQUARE_CLOSE ,
|
||||
'.': Keyword.DOT ,
|
||||
';': Keyword.SEMICOLON ,
|
||||
'+': Keyword.PLUS ,
|
||||
'-': Keyword.MINUS ,
|
||||
'*': Keyword.MUL ,
|
||||
'/': Keyword.DIV ,
|
||||
'&': Keyword.AND ,
|
||||
'|': Keyword.OR ,
|
||||
'<': Keyword.LT ,
|
||||
'>': Keyword.GT ,
|
||||
'=': Keyword.EQ ,
|
||||
'~': Keyword.NOT ,
|
||||
',': Keyword.COMMA,
|
||||
'{': Symbol.BRACE_OPEN ,
|
||||
'}': Symbol.BRACE_CLOSE ,
|
||||
'(': Symbol.PARAN_OPEN ,
|
||||
')': Symbol.PARAN_CLOSE ,
|
||||
'[': Symbol.SQUARE_OPEN ,
|
||||
']': Symbol.SQUARE_CLOSE ,
|
||||
'.': Symbol.DOT ,
|
||||
';': Symbol.SEMICOLON ,
|
||||
'+': Symbol.PLUS ,
|
||||
'-': Symbol.MINUS ,
|
||||
'*': Symbol.MUL ,
|
||||
'/': Symbol.DIV ,
|
||||
'&': Symbol.AND ,
|
||||
'|': Symbol.OR ,
|
||||
'<': Symbol.LT ,
|
||||
'>': Symbol.GT ,
|
||||
'=': Symbol.EQ ,
|
||||
'~': Symbol.NOT ,
|
||||
',': Symbol.COMMA,
|
||||
}
|
||||
|
||||
KEYWORD_MAP = {
|
||||
|
@ -61,16 +57,16 @@ class JackTokenizer:
|
|||
elif re.compile("(\(|\)|\[|\]|,|\+|-|;|<|>|=|~|&|{|}|\*|\/|\||\.)").match(t):
|
||||
return JackTokenizer.SYMBOL_MAP[t]
|
||||
elif re.compile("\d+").match(t):
|
||||
return Keyword.INTEGERCONSTANT
|
||||
return Token.INTEGERCONSTANT
|
||||
elif re.compile("\".*\"").match(t):
|
||||
return Keyword.STRINGCONSTANT
|
||||
return Token.STRINGCONSTANT
|
||||
else:
|
||||
# TODO: Put an assert to ensure valid identifier
|
||||
return Keyword.IDENTIFIER
|
||||
return Token.IDENTIFIER
|
||||
pass
|
||||
|
||||
def printable_token(self):
|
||||
if self.tokenType() == Keyword.STRINGCONSTANT:
|
||||
if self.tokenType() == Token.STRINGCONSTANT:
|
||||
return self.current_token()[1:-1]
|
||||
else:
|
||||
return escape(self.current_token(), True)
|
||||
|
@ -95,7 +91,7 @@ class JackTokenizer:
|
|||
|
||||
""" Returns the integer value of the current token """
|
||||
def intVal(self):
|
||||
self.assert_type(Keyword.INTEGERCONSTANT)
|
||||
self.assert_type(Token.INTEGERCONSTANT)
|
||||
return int(self.token)
|
||||
|
||||
""" Returns a list of tokens for that line """
|
||||
|
|
Loading…
Reference in New Issue