Add VM translator, Jack analyzer and compiler

This commit is contained in:
2020-11-15 15:53:24 -05:00
parent f2a0b6d531
commit fb224f31ed
12 changed files with 2708 additions and 0 deletions

9
jack_compiler/Cargo.toml Normal file
View File

@@ -0,0 +1,9 @@
[package]
name = "jack_compiler"
version = "0.1.0"
authors = ["Felix Martin <mail@felixm.de>"]
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]

View File

@@ -0,0 +1,178 @@
use crate::symbol_table::get_empty_symbol_table;
use crate::symbol_table::SymbolTable;
use crate::tokenizer::Keyword;
use crate::tokenizer::Symbol;
use crate::tokenizer::Token;
use std::fs;
#[derive(Debug)]
pub struct Writer {
pub table: SymbolTable,
pub code: String,
pub class_name: String,
pub subroutine_name: String,
pub in_constructor: bool,
pub in_method: bool,
if_label_count: usize,
while_label_count: usize,
}
pub fn get_code_writer() -> Writer {
Writer {
table: get_empty_symbol_table(),
code: String::new(),
class_name: String::new(),
subroutine_name: String::new(),
in_constructor: false,
in_method: false,
if_label_count: 0,
while_label_count: 0,
}
}
fn segment_to_string(segment: Keyword) -> String {
use crate::tokenizer::Keyword::*;
match segment {
Constant => String::from("constant"),
Argument => String::from("argument"),
Local => String::from("local"),
Temp => String::from("temp"),
Field => String::from("this"),
Static => String::from("static"),
That => String::from("that"),
Pointer => String::from("pointer"),
_ => panic!("Unexpected segment {:?}", segment),
}
}
impl Writer {
pub fn write_to_file(&self, file: &String) {
fs::write(file, self.code.to_string()).expect("Unable to write file");
}
pub fn start_subroutine(&mut self) {
self.table.remove_subroutine_symbols();
self.in_constructor = false;
self.in_method = false;
self.if_label_count = 0;
self.while_label_count = 0;
}
pub fn start_class(&mut self) {
self.table = get_empty_symbol_table();
}
pub fn define_symbol(&mut self, name: String, symbol_type: Token, kind: Keyword) {
self.table.define_symbol(name, symbol_type, kind);
}
pub fn write_push(&mut self, segment: Keyword, index: usize) {
let segment = segment_to_string(segment);
let s = format!("push {} {}\n", segment, index);
self.code.push_str(&s);
}
pub fn write_pop(&mut self, segment: Keyword, index: usize) {
let segment = segment_to_string(segment);
let s = format!("pop {} {}\n", segment, index);
self.code.push_str(&s);
}
pub fn write_arithmetic(&mut self, op: Symbol) {
use crate::tokenizer::Symbol::*;
let s = match op {
Plus => "add\n",
Minus => "sub\n",
Mul => "call Math.multiply 2\n",
Div => "call Math.divide 2\n",
ExclusiveAnd => "and\n",
ExclusiveOr => "or\n",
Smaller => "lt\n",
Greater => "gt\n",
Equal => "eq\n",
UnaryMinus => "neg\n",
Not => "not\n",
_ => panic!("Unsupported operator {:?}.", op),
};
self.code.push_str(&s);
}
pub fn write_function(&mut self) {
let n_locals = self.table.get_count(Keyword::Local);
let s = format!(
"function {}.{} {}\n",
self.class_name, self.subroutine_name, n_locals
);
self.code.push_str(&s);
if self.in_constructor {
// Allocate class memory and initialize This.
let n_fields = self.table.get_count(Keyword::Field);
self.write_push(Keyword::Constant, n_fields);
self.write_call(&"Memory".to_string(), &"alloc".to_string(), 1);
self.write_pop(Keyword::Pointer, 0);
} else if self.in_method {
self.write_push(Keyword::Argument, 0);
self.write_pop(Keyword::Pointer, 0);
}
}
pub fn write_call(&mut self, class_name: &String, subroutine_name: &String, n_args: usize) {
let s = format!("call {}.{} {}\n", class_name, subroutine_name, n_args);
self.code.push_str(&s);
}
pub fn write_return(&mut self) {
self.code.push_str("return\n");
}
pub fn write_var_assignment(&mut self, var_name: &String) {
let index = self.table.index_of(var_name);
let symbol_type = self.table.kind_of(var_name);
// if symbol_type == Keyword::Static {
// println!("{:?}", self.table);
// panic!("assignment to static not supported, yet");
// }
self.write_pop(symbol_type, index);
}
pub fn write_var_read(&mut self, var_name: &String) {
let index = self.table.index_of(var_name);
let symbol_type = self.table.kind_of(var_name);
// if symbol_type == Keyword::Static {
// panic!("read from static not supported, yet");
// }
self.write_push(symbol_type, index);
}
pub fn write_label(&mut self, label_name: &String) {
let s = format!("label {}\n", label_name);
self.code.push_str(&s);
}
pub fn write_if_goto(&mut self, label_name: &String) {
let s = format!("if-goto {}\n", label_name);
self.code.push_str(&s);
}
pub fn write_goto(&mut self, label_name: &String) {
let s = format!("goto {}\n", label_name);
self.code.push_str(&s);
}
pub fn get_if_labels(&mut self) -> (String, String, String) {
let l1 = format!("IF_FALSE{}", self.if_label_count);
let l2 = format!("IF_TRUE{}", self.if_label_count);
let l3 = format!("IF_END{}", self.if_label_count);
self.if_label_count += 1;
return (l1, l2, l3);
}
pub fn get_while_labels(&mut self) -> (String, String, String) {
let l1 = format!("WHILE_EXP{}", self.while_label_count);
let l2 = format!("WHILE_START{}", self.while_label_count);
let l3 = format!("WHILE_END{}", self.while_label_count);
self.while_label_count += 1;
return (l1, l2, l3);
}
}

53
jack_compiler/src/main.rs Normal file
View File

@@ -0,0 +1,53 @@
mod parser;
mod tokenizer;
mod symbol_table;
mod code_writer;
use std::env;
use std::fs;
use std::path::Path;
use std::ffi::OsStr;
use crate::code_writer::get_code_writer;
fn main() {
fn is_jack_file(filename: &Path) -> bool {
let p = Path::new(filename);
if p.is_file() && (p.extension().unwrap() == OsStr::new("jack")) {
return true;
}
return false;
}
fn translate_dir(directory: &Path) {
let paths = fs::read_dir(directory).unwrap();
for path in paths {
let filename = path.unwrap().path();
if is_jack_file(&filename) {
translate_single_file(filename.as_path())
}
}
}
fn translate_single_file(input_file: &Path) {
let mut tokens = tokenizer::tokenize_file(input_file);
let mut writer = get_code_writer();
println!("Compiling {:?}", input_file);
parser::compile_class(&mut tokens, &mut writer);
let output_file = str::replace(input_file.to_str().unwrap(), ".jack", ".vm");
writer.write_to_file(&output_file);
}
let args: Vec<String> = env::args().collect();
for arg in &args[1..] {
let arg_path = Path::new(arg);
println!("{:?}", arg_path);
if is_jack_file(&arg_path) {
translate_single_file(&arg_path);
} else if arg_path.is_dir() {
translate_dir(&arg_path);
} else {
println!("{} is not a *.jack file or directory!", arg);
}
}
}

348
jack_compiler/src/parser.rs Normal file
View File

@@ -0,0 +1,348 @@
use crate::code_writer::Writer;
use crate::tokenizer::identifier;
use crate::tokenizer::int_const;
use crate::tokenizer::string_const;
use crate::tokenizer::Keyword::*;
use crate::tokenizer::Symbol::*;
use crate::tokenizer::Token::{Keyword, Symbol};
use crate::tokenizer::Tokens;
pub fn compile_class(tokens: &mut Tokens, writer: &mut Writer) {
writer.start_class();
tokens.eat(Keyword(Class));
let class_name = tokens.eat(identifier()).to_string();
writer.class_name = class_name.to_string();
tokens.eat(Symbol(LCurly));
while tokens.is_one_of(vec![Keyword(Static), Keyword(Field)]) {
compile_class_var_dec(tokens, writer);
}
while tokens.is_one_of(vec![
Keyword(Constructor),
Keyword(Function),
Keyword(Method),
]) {
compile_subroutine(tokens, writer);
}
tokens.eat(Symbol(RCurly));
}
fn compile_class_var_dec(tokens: &mut Tokens, writer: &mut Writer) {
let kind = tokens.eat(Keyword(AnyKeyword)).to_keyword();
let symbol_type = tokens.eat_one_of(vec![Keyword(AnyKeyword), identifier()]);
let name = tokens.eat(identifier()).to_string();
writer.define_symbol(name, symbol_type.clone(), kind);
while tokens.is_sequence(vec![Symbol(Comma), identifier()]) {
tokens.eat(Symbol(Comma));
let name = tokens.eat(identifier()).to_string();
writer.define_symbol(name, symbol_type.clone(), kind);
}
tokens.eat(Symbol(Semicolon));
}
fn compile_subroutine(tokens: &mut Tokens, writer: &mut Writer) {
writer.start_subroutine();
let routine_keyword = tokens.eat(Keyword(AnyKeyword)).to_keyword();
let symbol_type = tokens.eat_one_of(vec![Keyword(AnyKeyword), identifier()]);
match routine_keyword {
Method => {
writer.in_method = true;
writer.define_symbol("this".to_string(), symbol_type.clone(), Argument);
},
Function => (),
Constructor => { writer.in_constructor = true; },
s => panic!("Unsupported routine type {:?}", s),
}
writer.subroutine_name = tokens.eat(identifier()).to_string();
compile_parameter_list(tokens, writer);
compile_subroutine_body(tokens, writer);
}
fn compile_parameter_list(tokens: &mut Tokens, writer: &mut Writer) {
tokens.eat(Symbol(LBrace));
if tokens.is_one_of(vec![Keyword(AnyKeyword), identifier()]) {
let symbol_type = tokens.eat_one_of(vec![Keyword(AnyKeyword), identifier()]);
let arg_name = tokens.eat(identifier()).to_string();
writer.define_symbol(arg_name, symbol_type.clone(), Argument);
while tokens.is(Symbol(Comma)) {
tokens.eat(Symbol(Comma));
let symbol_type = tokens.eat_one_of(vec![Keyword(AnyKeyword), identifier()]);
let arg_name = tokens.eat(identifier()).to_string();
writer.define_symbol(arg_name, symbol_type.clone(), Argument);
}
}
tokens.eat(Symbol(RBrace));
}
fn compile_subroutine_body(tokens: &mut Tokens, writer: &mut Writer) {
tokens.eat(Symbol(LCurly));
while tokens.is(Keyword(Var)) {
compile_var_dec(tokens, writer);
}
writer.write_function();
compile_statements(tokens, writer);
tokens.eat(Symbol(RCurly));
}
fn compile_var_dec(tokens: &mut Tokens, writer: &mut Writer) {
tokens.eat(Keyword(Var));
let symbol_type = tokens.eat_one_of(vec![Keyword(AnyKeyword), identifier()]);
let var_name = tokens.eat(identifier()).to_string();
writer.define_symbol(var_name, symbol_type.clone(), Local);
while tokens.is(Symbol(Comma)) {
tokens.eat(Symbol(Comma));
let var_name = tokens.eat(identifier()).to_string();
writer.define_symbol(var_name, symbol_type.clone(), Local);
}
tokens.eat(Symbol(Semicolon));
}
fn compile_statements(tokens: &mut Tokens, writer: &mut Writer) {
loop {
if tokens.is(Keyword(Let)) {
compile_let(tokens, writer);
} else if tokens.is(Keyword(If)) {
compile_if(tokens, writer);
} else if tokens.is(Keyword(While)) {
compile_while(tokens, writer);
} else if tokens.is(Keyword(Do)) {
compile_do(tokens, writer);
} else if tokens.is(Keyword(Return)) {
compile_return(tokens, writer);
} else {
break;
}
}
}
fn compile_do(tokens: &mut Tokens, writer: &mut Writer) {
tokens.eat(Keyword(Do));
compile_subroutine_call(tokens, writer);
writer.write_pop(Temp, 0);
tokens.eat(Symbol(Semicolon));
}
fn compile_let(tokens: &mut Tokens, writer: &mut Writer) {
tokens.eat(Keyword(Let));
let var_name = tokens.eat(identifier()).to_string();
if tokens.is(Symbol(Equal)) {
tokens.eat(Symbol(Equal));
compile_expression(tokens, writer);
writer.write_var_assignment(&var_name);
} else if tokens.is(Symbol(LSquare)) {
tokens.eat(Symbol(LSquare));
writer.write_var_read(&var_name);
compile_expression(tokens, writer);
tokens.eat(Symbol(RSquare));
writer.write_arithmetic(Plus);
// Address of array access is now on stack
tokens.eat(Symbol(Equal));
compile_expression(tokens, writer);
// Value to assign to array is on stack
writer.write_pop(Temp, 0); // Buffer value to assign
writer.write_pop(Pointer, 1); // Set That to access address
writer.write_push(Temp, 0); // Restore value to assign
writer.write_pop(That, 0); // Do actual assignment
}
tokens.eat(Symbol(Semicolon));
}
fn compile_while(tokens: &mut Tokens, writer: &mut Writer) {
tokens.eat(Keyword(While));
let (l_while_exp, l_while_start, l_while_end) = writer.get_while_labels();
writer.write_label(&l_while_exp);
tokens.eat(Symbol(LBrace));
compile_expression(tokens, writer);
writer.write_if_goto(&l_while_start);
writer.write_goto(&l_while_end);
tokens.eat(Symbol(RBrace));
tokens.eat(Symbol(LCurly));
writer.write_label(&l_while_start);
compile_statements(tokens, writer);
writer.write_goto(&l_while_exp);
tokens.eat(Symbol(RCurly));
writer.write_label(&l_while_end);
}
fn compile_return(tokens: &mut Tokens, writer: &mut Writer) {
tokens.eat(Keyword(Return));
if !tokens.is(Symbol(Semicolon)) {
compile_expression(tokens, writer);
} else {
writer.write_push(Constant, 0);
}
writer.write_return();
tokens.eat(Symbol(Semicolon));
}
fn compile_if(tokens: &mut Tokens, writer: &mut Writer) {
let (l_false, l_true, l_end) = writer.get_if_labels();
tokens.eat(Keyword(If));
tokens.eat(Symbol(LBrace));
compile_expression(tokens, writer);
tokens.eat(Symbol(RBrace));
writer.write_if_goto(&l_true);
writer.write_goto(&l_false);
tokens.eat(Symbol(LCurly));
writer.write_label(&l_true);
compile_statements(tokens, writer);
tokens.eat(Symbol(RCurly));
if tokens.is(Keyword(Else)) {
writer.write_goto(&l_end);
writer.write_label(&l_false);
tokens.eat(Keyword(Else));
tokens.eat(Symbol(LCurly));
compile_statements(tokens, writer);
tokens.eat(Symbol(RCurly));
writer.write_label(&l_end);
} else {
writer.write_label(&l_false);
}
}
fn compile_expression(tokens: &mut Tokens, writer: &mut Writer) {
compile_term(tokens, writer);
while tokens.is_one_of(vec![
Symbol(Plus),
Symbol(Minus),
Symbol(Mul),
Symbol(Div),
Symbol(ExclusiveAnd),
Symbol(ExclusiveOr),
Symbol(Smaller),
Symbol(Greater),
Symbol(Equal),
]) {
let s = tokens.eat(Symbol(AnySymbol)).to_symbol();
compile_term(tokens, writer);
writer.write_arithmetic(s);
}
}
fn compile_term(tokens: &mut Tokens, writer: &mut Writer) {
if tokens.is(int_const()) {
// integerConstant
let i = tokens.eat(int_const()).to_int();
writer.write_push(Constant, i);
} else if tokens.is(string_const()) {
let s = tokens.eat(string_const()).to_string();
let bytes = s.as_bytes();
writer.write_push(Constant, bytes.len());
writer.write_call(&"String".to_string(), &"new".to_string(), 1);
for b in bytes {
writer.write_push(Constant, (*b).into());
writer.write_call(&"String".to_string(), &"appendChar".to_string(), 2);
}
} else if tokens.is(Keyword(AnyKeyword)) {
// keywordConstant
let keyword = tokens.eat(Keyword(AnyKeyword)).to_keyword();
match keyword {
True => {
writer.write_push(Constant, 0);
writer.write_arithmetic(Not);
}
False => writer.write_push(Constant, 0),
Null => writer.write_push(Constant, 0),
This => writer.write_push(Pointer, 0),
_ => panic!("Unexpected keyword {:?}", keyword),
}
} else if tokens.is_sequence(vec![identifier(), Symbol(LSquare)]) {
// arrayName
let var_name = tokens.eat(identifier()).to_string();
tokens.eat(Symbol(LSquare));
writer.write_var_read(&var_name);
compile_expression(tokens, writer);
tokens.eat(Symbol(RSquare));
writer.write_arithmetic(Plus); // Address of array access is now on stack
writer.write_pop(Pointer, 1); // Set That to address
writer.write_push(That, 0); // Push value from array onto stack
} else if tokens.is_sequence(vec![identifier(), Symbol(LBrace)]) {
// subroutineCall foo()
compile_subroutine_call(tokens, writer);
} else if tokens.is_sequence(vec![identifier(), Symbol(Dot)]) {
// subroutineCall foo.something
compile_subroutine_call(tokens, writer);
} else if tokens.is(Symbol(LBrace)) {
// ( expression )
tokens.eat(Symbol(LBrace));
compile_expression(tokens, writer);
tokens.eat(Symbol(RBrace));
} else if tokens.is_one_of(vec![Symbol(Minus), Symbol(Not)]) {
// unaryOp term
let symbol = tokens.eat(Symbol(AnySymbol)).to_symbol();
compile_term(tokens, writer);
if symbol == Minus {
writer.write_arithmetic(UnaryMinus);
} else {
writer.write_arithmetic(Not);
}
} else if tokens.is(identifier()) {
// varName
let var_name = tokens.eat(identifier()).to_string();
writer.write_var_read(&var_name);
} else {
panic!("Unexpected token {:?} for compile_term", tokens.peek());
}
}
fn compile_subroutine_call(tokens: &mut Tokens, writer: &mut Writer) {
let mut class_name = String::new();
let mut subroutine_name = String::new();
let mut n_args: usize = 0;
if tokens.is_sequence(vec![identifier(), Symbol(LBrace)]) {
// method call for 'this'
class_name = writer.class_name.to_string();
writer.write_push(Pointer, 0); // Push This
n_args += 1;
subroutine_name = tokens.eat(identifier()).to_string();
} else if tokens.is_sequence(vec![identifier(), Symbol(Dot), identifier()]) {
class_name = tokens.eat(identifier()).to_string();
tokens.eat(Symbol(Dot));
subroutine_name = tokens.eat(identifier()).to_string();
if writer.table.has_symbol(&class_name) {
// method call for identifier 'class_name'
let index = writer.table.index_of(&class_name);
let symbol_type = writer.table.kind_of(&class_name);
class_name = writer.table.get_token(&class_name).to_string();
writer.write_push(symbol_type, index); // Push class object
n_args += 1;
} else { // symbol not in table means function call
}
}
tokens.eat(Symbol(LBrace));
while !tokens.is(Symbol(RBrace)) {
n_args += 1;
compile_expression(tokens, writer);
if tokens.is(Symbol(Comma)) {
tokens.eat(Symbol(Comma));
}
}
writer.write_call(&class_name, &subroutine_name, n_args);
tokens.eat(Symbol(RBrace));
}

View File

@@ -0,0 +1,93 @@
use std::collections::HashMap;
use crate::tokenizer::Keyword;
use crate::tokenizer::Token;
#[derive(Debug)]
struct Symbol {
name: String,
symbol_type: Token,
kind: Keyword,
index: usize,
}
#[derive(Debug)]
pub struct SymbolTable {
count: HashMap<Keyword, usize>,
fields: HashMap<String, Symbol>,
}
pub fn get_empty_symbol_table() -> SymbolTable {
let mut count = HashMap::new();
count.insert(Keyword::Static, 0);
count.insert(Keyword::Field, 0);
count.insert(Keyword::Argument, 0);
count.insert(Keyword::Local, 0);
SymbolTable {
count: count,
fields: HashMap::new(),
}
}
impl SymbolTable {
pub fn kind_of(&self, name: &String) -> Keyword {
match self.fields.get(name) {
Some(symbol) => symbol.kind,
None => panic!("Symbol {} does not exist", name),
}
}
pub fn index_of(&self, name: &String) -> usize {
match self.fields.get(name) {
Some(s) => s.index,
None => panic!("Symbol {} does not exist", name),
}
}
pub fn get_token(&self, name: &String) -> Token {
match self.fields.get(name) {
Some(s) => s.symbol_type.clone(),
None => panic!("Symbol {} does not exist", name),
}
}
pub fn get_count(&self, symbol_kind: Keyword) -> usize {
match self.count.get(&symbol_kind) {
Some(s) => *s,
None => 0,
}
}
pub fn has_symbol(&self, name: &String) -> bool {
self.fields.contains_key(name)
}
pub fn remove_subroutine_symbols(&mut self) {
let mut to_remove: Vec<String> = vec![];
for (key, symbol) in self.fields.iter() {
match symbol.kind {
Keyword::Argument => to_remove.push(key.to_string()),
Keyword::Local => to_remove.push(key.to_string()),
_ => (),
}
}
for key in to_remove {
self.fields.remove(&key);
}
self.count.insert(Keyword::Argument, 0);
self.count.insert(Keyword::Local, 0);
}
pub fn define_symbol(&mut self, name: String, symbol_type: Token, kind: Keyword) {
let index: usize = *self.count.get(&kind).unwrap();
let s = Symbol {
name: name.to_string(),
symbol_type: symbol_type,
kind: kind,
index: index,
};
self.count.insert(kind, index + 1);
self.fields.insert(name, s);
}
}

View File

@@ -0,0 +1,400 @@
use crate::tokenizer;
use std::fs;
use std::path::Path;
#[derive(Debug, Clone)]
pub enum Token {
Keyword(Keyword),
Symbol(Symbol),
Identifier(String),
IntConst(usize),
StringConst(String),
}
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum Keyword {
Class,
Constructor,
Function,
Method,
Field,
Var,
Int,
Char,
Boolean,
Constant,
Argument,
Local,
Static,
That,
Pointer,
Temp,
Void,
True,
False,
Null,
This,
Let,
Do,
If,
Else,
While,
Return,
AnyKeyword,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Symbol {
LCurly,
RCurly,
LBrace,
RBrace,
LSquare,
RSquare,
Dot,
Comma,
Semicolon,
Plus,
Minus,
Mul,
Div,
ExclusiveAnd,
ExclusiveOr,
Smaller,
Greater,
Equal,
UnaryMinus,
Not,
AnySymbol,
}
#[derive(Debug)]
pub struct Tokens {
pub tokens: Vec<Token>,
index: usize,
}
pub fn identifier() -> Token {
Token::Identifier(String::new())
}
pub fn int_const() -> Token {
Token::IntConst(0)
}
pub fn string_const() -> Token {
Token::StringConst(String::new())
}
pub fn equal(t1: &Token, t2: &Token) -> bool {
match t1 {
Token::Keyword(k1) => match t2 {
Token::Keyword(k2) if k1 == k2 => true,
Token::Keyword(_) if k1 == &Keyword::AnyKeyword => true,
_ => false,
},
Token::Symbol(s1) => match t2 {
Token::Symbol(s2) if s1 == s2 => true,
Token::Symbol(_) if s1 == &Symbol::AnySymbol => true,
_ => false,
},
Token::Identifier(_) => match t2 {
Token::Identifier(_) => true,
_ => false,
},
Token::IntConst(_) => match t2 {
Token::IntConst(_) => true,
_ => false,
},
Token::StringConst(_) => match t2 {
Token::StringConst(_) => true,
_ => false,
}
}
}
impl Token {
pub fn to_string(&self) -> String {
match self {
Token::Identifier(s) => s.to_string(),
Token::StringConst(s) => s.to_string(),
_ => panic!("Cannot convert {:?} to string.", self),
}
}
pub fn to_keyword(&self) -> tokenizer::Keyword {
match self {
tokenizer::Token::Keyword(k) => k.clone(),
_ => tokenizer::Keyword::AnyKeyword,
}
}
pub fn to_int(&self) -> usize {
match self {
tokenizer::Token::IntConst(i) => *i,
_ => panic!("Cannot convert {:?} to int.", self),
}
}
pub fn to_symbol(&self) -> tokenizer::Symbol {
match self {
tokenizer::Token::Symbol(s) => s.clone(),
_ => panic!("Cannot convert {:?} to symbol.", self),
}
}
}
impl Tokens {
pub fn peek(&self) -> tokenizer::Token {
let i = self.index;
let t = self.tokens.get(i).unwrap();
t.clone()
}
fn get_token(&self, index: usize) -> tokenizer::Token {
let t = self.tokens.get(index).unwrap();
t.clone()
}
pub fn eat(&mut self, expected_token: Token) -> tokenizer::Token {
let i = self.index;
let t = self.tokens.get(i).unwrap();
self.index += 1;
if !equal(&expected_token, t) {
panic!{"Expected {:?} but got {:?}.", expected_token, t};
}
t.clone()
}
pub fn eat_one_of(&mut self, tokens: Vec<Token>) -> tokenizer::Token {
let t2 = self.get_token(self.index);
for t1 in &tokens {
if equal(&t1, &t2) {
self.index += 1;
return t2.clone();
}
}
panic!{"Expected one of {:?} but got {:?}.", tokens, t2};
}
pub fn is(&self, expected_token: Token) -> bool {
let t = self.get_token(self.index);
if equal(&expected_token, &t) {
true
} else {
false
}
}
pub fn is_one_of(&self, tokens: Vec<Token>) -> bool {
let t2 = self.get_token(self.index);
for t1 in tokens {
if equal(&t1, &t2) {
return true;
}
}
return false;
}
pub fn is_sequence(&self, tokens: Vec<Token>) -> bool {
let mut index = self.index;
for t1 in tokens {
let t2 = self.get_token(index);
if !(equal(&t1, &t2)) {
return false;
}
index += 1;
}
true
}
}
pub fn tokenize_file(file: &Path) -> Tokens {
let mut tokens = vec![];
let chars: Vec<char> = fs::read_to_string(file).unwrap().chars().collect();
let length = chars.len();
let mut index: usize = 0;
while index < length {
index = eat_comment(&chars, index);
let c = chars[index];
if c.is_whitespace() {
index += 1;
} else if parse_symbol(&chars, &mut tokens, index) != index {
// if there is a symbol it has already been added to token list.
index += 1
} else if c.is_ascii_alphabetic() {
index = parse_keyword_or_identifier(&chars, &mut tokens, index);
} else if c.is_ascii_digit() {
index = parse_integer_constant(&chars, &mut tokens, index);
} else if c == '"' {
index = parse_string_constant(&chars, &mut tokens, index);
} else {
println!("Unexpected char {:?}", c);
index += 1;
}
}
let tokens = Tokens {
tokens: tokens,
index: 0,
};
return tokens;
}
fn eat_comment(chars: &Vec<char>, index: usize) -> usize {
let start_index = index;
let mut index = index;
if chars[index] == '/' && chars[index + 1] == '/' {
index += 2;
while chars[index] != '\n' {
index += 1;
}
index += 1;
} else if chars[index] == '/' && chars[index + 1] == '*' {
index += 2;
while !(chars[index] == '*' && chars[index + 1] == '/') {
index += 1;
}
index += 2;
}
if start_index != index {
// print_vector_slice(chars, start_index, index);
return eat_comment(chars, index);
}
return index;
}
fn parse_symbol(chars: &Vec<char>, tokens: &mut Vec<Token>, index: usize) -> usize {
let mut index = index;
let c = chars[index];
use Symbol::*;
let s = match c {
'{' => Some(LCurly),
'}' => Some(RCurly),
'(' => Some(LBrace),
')' => Some(RBrace),
'[' => Some(LSquare),
']' => Some(RSquare),
'.' => Some(Dot),
',' => Some(Comma),
';' => Some(Semicolon),
'+' => Some(Plus),
'-' => Some(Minus),
'*' => Some(Mul),
'/' => Some(Div),
'&' => Some(ExclusiveAnd),
'|' => Some(ExclusiveOr),
'<' => Some(Smaller),
'>' => Some(Greater),
'=' => Some(Equal),
'~' => Some(Not),
_ => None,
};
match s {
Some(s) => {
let t = Token::Symbol(s);
tokens.push(t);
index += 1;
}
None => (),
}
return index;
}
fn parse_integer_constant(chars: &Vec<char>, tokens: &mut Vec<Token>, index: usize) -> usize {
let start_index = index;
let mut index = index;
let mut number = String::new();
while chars[index].is_ascii_digit() {
number.push(chars[index]);
index += 1;
}
if start_index == index {
return index;
}
let number: usize = number.parse::<usize>().unwrap();
let t = Token::IntConst(number);
tokens.push(t);
return index;
}
fn parse_string_constant(chars: &Vec<char>, tokens: &mut Vec<Token>, index: usize) -> usize {
let start_index = index;
let mut index = index;
if chars[index] != '"' {
return index;
}
index += 1;
while chars[index] != '"' {
index += 1;
}
index += 1;
let s = chars[start_index + 1..index - 1].into_iter().collect();
let t = Token::StringConst(s);
tokens.push(t);
return index;
}
fn parse_keyword_or_identifier(chars: &Vec<char>, tokens: &mut Vec<Token>, index: usize) -> usize {
let mut index = index;
let mut token_string = String::new();
if !chars[index].is_ascii_alphabetic() {
return index;
}
token_string.push(chars[index]);
index += 1;
while chars[index].is_alphanumeric() {
token_string.push(chars[index]);
index += 1;
}
use Keyword::*;
let t = match token_string.as_str() {
"class" => Token::Keyword(Class),
"constructor" => Token::Keyword(Constructor),
"function" => Token::Keyword(Function),
"method" => Token::Keyword(Method),
"field" => Token::Keyword(Field),
"static" => Token::Keyword(Static),
"var" => Token::Keyword(Var),
"int" => Token::Keyword(Int),
"char" => Token::Keyword(Char),
"boolean" => Token::Keyword(Boolean),
"void" => Token::Keyword(Void),
"true" => Token::Keyword(True),
"false" => Token::Keyword(False),
"null" => Token::Keyword(Null),
"this" => Token::Keyword(This),
"let" => Token::Keyword(Let),
"do" => Token::Keyword(Do),
"if" => Token::Keyword(If),
"else" => Token::Keyword(Else),
"while" => Token::Keyword(While),
"return" => Token::Keyword(Return),
s => Token::Identifier(s.to_string()),
};
tokens.push(t);
return index;
}
#[allow(dead_code)]
fn print_vector_slice(chars: &Vec<char>, start: usize, stop: usize) {
let s: String = chars[start..stop].into_iter().collect();
println!("{:?}", s);
}