diff --git a/compiler/__init__.py b/compiler/__init__.py
new file mode 100644
index 0000000..87c976c
--- /dev/null
+++ b/compiler/__init__.py
@@ -0,0 +1,126 @@
+from enum import Enum
+import re
+
+class Token(Enum):
+  KEYWORD = 1
+  SYMBOL = 2
+  IDENTIFIER = 3
+  INT_CONST = 4
+  STRING_CONST = 5
+
+class Keyword(Enum):
+  CLASS = 1
+  METHOD = 2
+  FUNCTION = 3
+  CONSTRUCTOR = 4
+  INT = 5
+  BOOLEAN = 6
+  CHAR = 7
+  VOID = 8
+  VAR = 9
+  STATIC = 10
+  FIELD = 11
+  LET = 12
+  DO = 13
+  IF = 14
+  ELSE = 15
+  WHILE = 16
+  RETURN = 17
+  TRUE = 18
+  FALSE = 19
+  NULL = 20
+  THIS = 21
+
+class JackAnalyzer:
+  def __init__(self):
+    pass
+
+class JackTokenizer:
+
+  # KEYWORD_REGEXES='(class|constructor|function|method|field|static|var|int|char|boolean|void|true|false|null|this|let|do|if|else|while|return)'
+
+  # SYMBOL_REGEXES = [
+  #   "{","}","\(","\)","]","["
+  # ]
+
+  """ Returns the type of the current token """
+  def tokenType(self):
+    pass
+
+  """ Returns the character which is the current token """
+  def symbol(self):
+    if self.tokenType() != Token.SYMBOL:
+      raise RuntimeError("Should only be called when tokenType is SYMBOL")
+
+  """ Returns the identifier which is the current token """
+  def identifier(self):
+    if self.tokenType() != Token.IDENTIFIER:
+      raise RuntimeError("Should only be called when tokenType is IDENTIFIER")
+
+  """ Returns the integer value of the current token """
+  def intVal(self):
+    if self.tokenType() != Token.INT_CONST:
+      raise RuntimeError("Should only be called when tokenType is INT_CONST")
+    return int(self.token)
+
+  """ Returns a list of tokens for that line """
+  def parse_line(self, line):
+    line = line.strip()
+    # If this line as a single line comment anywhere
+    # strip the line to start of //
+    if line.find("//") != -1:
+      # print("Starting single line comment on %s" % line)
+      line = line[:line.find("//")].strip()
+
+    if self.insideMultiLineComment:
+      if line.find("*/") == -1:
+        # print("Still inside multi line comment, continuing %s" % line)
+        # The comment doesn't end in this line
+        return []
+      else:
+        # print("Closing multi line comment, continuing %s" % line)
+        self.insideMultiLineComment = False
+        # comments ends here, huzzah!
+        line = line[:line.find("*/")].strip()
+
+    # Same for the multi-line comment, but this time
+    # Also set insideMultiLineComment = true
+    elif line.find("/*") != -1:
+      # The comment ends on the same line
+      if line.find("*/") != -1:
+        # TODO: This doesn't handle multiple multi-line comments on the same line
+        # TODO: this also breaks on /* inside strings :(
+        line = line[:line.find("/*")] + line[line.find("*/") + 2:].strip()
+        # print("This line has a /* and */ %s" % line)
+        # print("This line has a /* and */ %s" % len(line))
+      else:
+        # print("Starting multi line comment on %s" % line)
+        line = line[:line.find("/*")].strip()
+        self.insideMultiLineComment = True
+
+    # We don't need no empty lines
+    if len(line) == 0:
+      return []
+    else:
+      regex = re.compile("(class|constructor|function|method|field|static|var|int|char|boolean|void|true|false|null|this|let|do|if|else|while|return)|(\(|\)|\[|\]|,|\+|-|;|<|>|=|~|&|{|}|\*|\/|\|)")
+      tokens = regex.split(line)
+      return [e.strip() for e in tokens if e != None and e.strip()!='']
+
+  def advance(self):
+    self.tokens = []
+    for line in self.file:
+      self.tokens += self.parse_line(line)
+
+    print(self.tokens)
+
+  def __init__(self, filename):
+    self.insideMultiLineComment = False
+    self.file = open(filename, 'r')
+
+class CompilationEngine:
+  def __init__(self):
+    pass
+
+if __name__ == '__main__':
+  jt = JackTokenizer("../projects/10/Square/Square.jack")
+  jt.advance()