[10] Initial WIP Tokenizer
- Initial WIP - Hated the comment parsing, need to figure that out better - Strings are not yet supported - Symbols and keywords are perfectly covered
This commit is contained in:
parent
9e40244023
commit
8dedf23832
|
@ -0,0 +1,126 @@
|
|||
from enum import Enum
|
||||
import re
|
||||
|
||||
class Token(Enum):
|
||||
KEYWORD = 1
|
||||
SYMBOL = 2
|
||||
IDENTIFIER = 3
|
||||
INT_CONST = 4
|
||||
STRING_CONST = 5
|
||||
|
||||
class Keyword(Enum):
|
||||
CLASS = 1
|
||||
METHOD = 2
|
||||
FUNCTION = 3
|
||||
CONSTRUCTOR = 4
|
||||
INT = 5
|
||||
BOOLEAN = 6
|
||||
CHAR = 7
|
||||
VOID = 8
|
||||
VAR = 9
|
||||
STATIC = 10
|
||||
FIELD = 11
|
||||
LET = 12
|
||||
DO = 13
|
||||
IF = 14
|
||||
ELSE = 15
|
||||
WHILE = 16
|
||||
RETURN = 17
|
||||
TRUE = 18
|
||||
FALSE = 19
|
||||
NULL = 20
|
||||
THIS = 21
|
||||
|
||||
class JackAnalyzer:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
class JackTokenizer:
|
||||
|
||||
# KEYWORD_REGEXES='(class|constructor|function|method|field|static|var|int|char|boolean|void|true|false|null|this|let|do|if|else|while|return)'
|
||||
|
||||
# SYMBOL_REGEXES = [
|
||||
# "{","}","\(","\)","]","["
|
||||
# ]
|
||||
|
||||
""" Returns the type of the current token """
|
||||
def tokenType(self):
|
||||
pass
|
||||
|
||||
""" Returns the character which is the current token """
|
||||
def symbol(self):
|
||||
if self.tokenType() != Token.SYMBOL:
|
||||
raise RuntimeError("Should only be called when tokenType is SYMBOL")
|
||||
|
||||
""" Returns the identifier which is the current token """
|
||||
def identifier(self):
|
||||
if self.tokenType() != Token.IDENTIFIER:
|
||||
raise RuntimeError("Should only be called when tokenType is IDENTIFIER")
|
||||
|
||||
""" Returns the integer value of the current token """
|
||||
def intVal(self):
|
||||
if self.tokenType() != Token.INT_CONST:
|
||||
raise RuntimeError("Should only be called when tokenType is INT_CONST")
|
||||
return int(self.token)
|
||||
|
||||
""" Returns a list of tokens for that line """
|
||||
def parse_line(self, line):
|
||||
line = line.strip()
|
||||
# If this line as a single line comment anywhere
|
||||
# strip the line to start of //
|
||||
if line.find("//") != -1:
|
||||
# print("Starting single line comment on %s" % line)
|
||||
line = line[:line.find("//")].strip()
|
||||
|
||||
if self.insideMultiLineComment:
|
||||
if line.find("*/") == -1:
|
||||
# print("Still inside multi line comment, continuing %s" % line)
|
||||
# The comment doesn't end in this line
|
||||
return []
|
||||
else:
|
||||
# print("Closing multi line comment, continuing %s" % line)
|
||||
self.insideMultiLineComment = False
|
||||
# comments ends here, huzzah!
|
||||
line = line[:line.find("*/")].strip()
|
||||
|
||||
# Same for the multi-line comment, but this time
|
||||
# Also set insideMultiLineComment = true
|
||||
elif line.find("/*") != -1:
|
||||
# The comment ends on the same line
|
||||
if line.find("*/") != -1:
|
||||
# TODO: This doesn't handle multiple multi-line comments on the same line
|
||||
# TODO: this also breaks on /* inside strings :(
|
||||
line = line[:line.find("/*")] + line[line.find("*/") + 2:].strip()
|
||||
# print("This line has a /* and */ %s" % line)
|
||||
# print("This line has a /* and */ %s" % len(line))
|
||||
else:
|
||||
# print("Starting multi line comment on %s" % line)
|
||||
line = line[:line.find("/*")].strip()
|
||||
self.insideMultiLineComment = True
|
||||
|
||||
# We don't need no empty lines
|
||||
if len(line) == 0:
|
||||
return []
|
||||
else:
|
||||
regex = re.compile("(class|constructor|function|method|field|static|var|int|char|boolean|void|true|false|null|this|let|do|if|else|while|return)|(\(|\)|\[|\]|,|\+|-|;|<|>|=|~|&|{|}|\*|\/|\|)")
|
||||
tokens = regex.split(line)
|
||||
return [e.strip() for e in tokens if e != None and e.strip()!='']
|
||||
|
||||
def advance(self):
|
||||
self.tokens = []
|
||||
for line in self.file:
|
||||
self.tokens += self.parse_line(line)
|
||||
|
||||
print(self.tokens)
|
||||
|
||||
def __init__(self, filename):
|
||||
self.insideMultiLineComment = False
|
||||
self.file = open(filename, 'r')
|
||||
|
||||
class CompilationEngine:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
if __name__ == '__main__':
|
||||
jt = JackTokenizer("../projects/10/Square/Square.jack")
|
||||
jt.advance()
|
Loading…
Reference in New Issue