From fb224f31edaba16fb00107f0789185e526789e0f Mon Sep 17 00:00:00 2001 From: Felix Martin Date: Sun, 15 Nov 2020 15:53:24 -0500 Subject: [PATCH] Add VM translator, Jack analyzer and compiler --- jack_analyzer/Cargo.toml | 10 + jack_analyzer/src/main.rs | 122 ++++ jack_analyzer/src/parser.rs | 412 ++++++++++++++ jack_analyzer/src/tokenizer.rs | 177 ++++++ jack_compiler/Cargo.toml | 9 + jack_compiler/src/code_writer.rs | 178 ++++++ jack_compiler/src/main.rs | 53 ++ jack_compiler/src/parser.rs | 348 ++++++++++++ jack_compiler/src/symbol_table.rs | 93 ++++ jack_compiler/src/tokenizer.rs | 400 +++++++++++++ vm_translator/Cargo.toml | 9 + vm_translator/src/main.rs | 897 ++++++++++++++++++++++++++++++ 12 files changed, 2708 insertions(+) create mode 100644 jack_analyzer/Cargo.toml create mode 100644 jack_analyzer/src/main.rs create mode 100644 jack_analyzer/src/parser.rs create mode 100644 jack_analyzer/src/tokenizer.rs create mode 100644 jack_compiler/Cargo.toml create mode 100644 jack_compiler/src/code_writer.rs create mode 100644 jack_compiler/src/main.rs create mode 100644 jack_compiler/src/parser.rs create mode 100644 jack_compiler/src/symbol_table.rs create mode 100644 jack_compiler/src/tokenizer.rs create mode 100644 vm_translator/Cargo.toml create mode 100644 vm_translator/src/main.rs diff --git a/jack_analyzer/Cargo.toml b/jack_analyzer/Cargo.toml new file mode 100644 index 0000000..d89fb24 --- /dev/null +++ b/jack_analyzer/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "jack_analyzer" +version = "0.1.0" +authors = ["Felix Martin "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +xmltree = "^0" \ No newline at end of file diff --git a/jack_analyzer/src/main.rs b/jack_analyzer/src/main.rs new file mode 100644 index 0000000..9ee7914 --- /dev/null +++ b/jack_analyzer/src/main.rs @@ -0,0 +1,122 @@ +mod parser; +mod tokenizer; +use std::env; +use std::ffi::OsStr; +use std::fs; +use std::fs::File; +use std::path::Path; +use xmltree; +use xmltree::Element; +use xmltree::EmitterConfig; +use xmltree::XMLNode; + +pub fn create_node(token: &tokenizer::Token) -> xmltree::XMLNode { + use tokenizer::Token::*; + let (tag, text) = match token { + Keyword(text) => ("keyword", text), + Symbol(text) => ("symbol", text), + Identifier(text) => ("identifier", text), + IntConst(text) => ("integerConstant", text), + StringConst(text) => ("stringConstant", text), + }; + let mut e = xmltree::Element::new(tag); + let text = match text.as_str() { + "<" => "<", + ">" => ">", + "\"" => """, + "&" => "&", + "'" => "'", + _ => text.as_str(), + }; + let t = XMLNode::Text(text.to_string()); + e.children.push(t); + return xmltree::XMLNode::Element(e); +} + +fn main() { + fn is_jack_file(filename: &Path) -> bool { + let p = Path::new(filename); + if p.is_file() && (p.extension().unwrap() == OsStr::new("jack")) { + return true; + } + return false; + } + + fn translate_dir(directory: &Path) { + let paths = fs::read_dir(directory).unwrap(); + for path in paths { + let filename = path.unwrap().path(); + if is_jack_file(&filename) { + translate_single_file(filename.as_path()) + } + } + } + + fn write_xml(file: &String, root: xmltree::Element) { + let mut config = EmitterConfig::new(); + config.perform_indent = true; + // With xmltree 0.1.0 + // config.perform_escaping = true; + // does not work when perform_indent is also true. + // Therefore I am escaping manually and setting this to false. + config.perform_escaping = false; + config.normalize_empty_elements = false; + let f = File::create(file.to_string()).unwrap(); + root.write_with_config(f, config).unwrap(); + // With xmltree 0.1.0 + // config.write_document_declaration = false; + // did not work so I am removing that line manually. + remove_first_line(&file); + } + + fn write_token_xml(input_file: &Path, tokens: &Vec) { + let mut root = Element::new("tokens"); + + for token in tokens { + let n = create_node(token); + root.children.push(n); + } + + let input_str = input_file.to_str().unwrap(); + let output_file = str::replace(input_str, ".jack", "T.xml"); + write_xml(&output_file, root); + } + + fn write_parse_tree_xml(input_file: &Path, root: xmltree::Element) { + let input_str = input_file.to_str().unwrap(); + let output_file = str::replace(input_str, ".jack", ".xml"); + write_xml(&output_file, root); + } + + fn remove_first_line(filename: &String) { + let contents = fs::read_to_string(filename).expect("Unable to read file"); + let mut lines = contents.lines(); + let mut output = String::new(); + lines.next(); // skipping the first line + for line in lines { + output.push_str(line); + output.push_str("\n"); + } + fs::write(filename, output).expect("Unable to write file"); + } + + fn translate_single_file(input_file: &Path) { + let tokens = tokenizer::tokenize_file(input_file); + write_token_xml(input_file, &tokens); + let parse_tree = parser::parse_tokens(tokens); + write_parse_tree_xml(input_file, parse_tree); + } + + let args: Vec = env::args().collect(); + + for arg in &args[1..] { + let arg_path = Path::new(arg); + if is_jack_file(&arg_path) { + translate_single_file(&arg_path); + } else if arg_path.is_dir() { + translate_dir(&arg_path); + } else { + println!("{} is not a *.jack file or directory!", arg); + } + } +} diff --git a/jack_analyzer/src/parser.rs b/jack_analyzer/src/parser.rs new file mode 100644 index 0000000..d0cc979 --- /dev/null +++ b/jack_analyzer/src/parser.rs @@ -0,0 +1,412 @@ +use crate::create_node; +use crate::tokenizer::Token; +use crate::tokenizer::Token::*; +use xmltree::Element; + +type Tokens<'a> = std::iter::Peekable>; + +pub fn parse_tokens(tokens: Vec) -> Element { + let mut tokens: Tokens = tokens.iter().peekable(); + let mut root = Element::new("class"); + compile_class(&mut tokens, &mut root); + if tokens.len() != 0 { + println!("Did not parse all tokens!"); + } + root +} + +fn eat_specific_keyword_token(tokens: &mut Tokens, expected: &str, tree: &mut Element) { + let token = tokens.next().unwrap(); + match token { + Keyword(string) if string == expected => tree.children.push(create_node(token)), + t => println!("Unexpected {:?} - Keyword(\"{}\")", t, expected), + } +} + +fn eat_specific_symbol_token(tokens: &mut Tokens, expected: &str, tree: &mut Element) { + let token = tokens.next().unwrap(); + match token { + Symbol(string) if string == expected => tree.children.push(create_node(token)), + t => println!("Unexpected {:?} - Symbol(\"{}\")", t, expected), + } +} + +fn eat_identifier_token(tokens: &mut Tokens, tree: &mut Element) { + let token = tokens.next().unwrap(); + match token { + Identifier(_) => tree.children.push(create_node(token)), + t => println!("Unexpected {:?} - Identifier(_)", t), + } +} + +fn eat_keyword_token(tokens: &mut Tokens, tree: &mut Element) { + let token = tokens.next().unwrap(); + match token { + Keyword(_) => tree.children.push(create_node(token)), + t => println!("Unexpected {:?} - Keyword(_)", t), + } +} + +fn eat_type_token(tokens: &mut Tokens, tree: &mut Element) { + let token = tokens.peek().unwrap(); + match token { + Keyword(_) => eat_keyword_token(tokens, tree), + Identifier(_) => eat_identifier_token(tokens, tree), + t => println!("Unexpected {:?} - Keyword(type)/Identifier(_)", t), + } +} + +fn compile_class(tokens: &mut Tokens, tree: &mut Element) { + eat_specific_keyword_token(tokens, "class", tree); + eat_identifier_token(tokens, tree); + eat_specific_symbol_token(tokens, "{", tree); + + // classVarDec* + loop { + match tokens.peek().unwrap() { + Keyword(string) if string == "static" || string == "field" => { + let mut child_tree = Element::new("classVarDec"); + compile_class_var_dec(tokens, &mut child_tree); + tree.children.push(xmltree::XMLNode::Element(child_tree)); + } + _ => break, + } + } + + // subroutineDec* + loop { + match tokens.peek().unwrap() { + Keyword(string) + if string == "constructor" || string == "function" || string == "method" => + { + let mut child_tree = Element::new("subroutineDec"); + compile_subroutine(tokens, &mut child_tree); + tree.children.push(xmltree::XMLNode::Element(child_tree)); + } + _ => break, + } + } + eat_specific_symbol_token(tokens, "}", tree); +} + +fn compile_class_var_dec(tokens: &mut Tokens, tree: &mut Element) { + eat_keyword_token(tokens, tree); + eat_type_token(tokens, tree); + eat_identifier_token(tokens, tree); + loop { + match tokens.peek().unwrap() { + Symbol(s) if s == "," => { + eat_specific_symbol_token(tokens, ",", tree); + eat_identifier_token(tokens, tree); + } + _ => break, + } + } + eat_specific_symbol_token(tokens, ";", tree); +} + +fn compile_subroutine(tokens: &mut Tokens, tree: &mut Element) { + eat_keyword_token(tokens, tree); + eat_type_token(tokens, tree); + eat_identifier_token(tokens, tree); + eat_specific_symbol_token(tokens, "(", tree); + + let mut child_tree = Element::new("parameterList"); + compile_parameter_list(tokens, &mut child_tree); + tree.children.push(xmltree::XMLNode::Element(child_tree)); + + eat_specific_symbol_token(tokens, ")", tree); + + let mut child_tree = Element::new("subroutineBody"); + compile_subroutine_body(tokens, &mut child_tree); + tree.children.push(xmltree::XMLNode::Element(child_tree)); +} + +fn compile_parameter_list(tokens: &mut Tokens, tree: &mut Element) { + match tokens.peek().unwrap() { + Symbol(s) if s == ")" => return, + _ => (), + } + eat_type_token(tokens, tree); + eat_identifier_token(tokens, tree); + + loop { + match tokens.peek().unwrap() { + Symbol(s) if s == "," => { + eat_specific_symbol_token(tokens, ",", tree); + eat_type_token(tokens, tree); + eat_identifier_token(tokens, tree); + } + _ => break, + } + } +} + +fn compile_subroutine_body(tokens: &mut Tokens, tree: &mut Element) { + eat_specific_symbol_token(tokens, "{", tree); + + loop { + match tokens.peek().unwrap() { + Keyword(s) if s == "var" => { + let mut child_tree = Element::new("varDec"); + compile_var_dec(tokens, &mut child_tree); + tree.children.push(xmltree::XMLNode::Element(child_tree)); + } + _ => break, + }; + } + + let mut child_tree = Element::new("statements"); + compile_statements(tokens, &mut child_tree); + tree.children.push(xmltree::XMLNode::Element(child_tree)); + + eat_specific_symbol_token(tokens, "}", tree); +} + +fn compile_var_dec(tokens: &mut Tokens, tree: &mut Element) { + eat_specific_keyword_token(tokens, "var", tree); + eat_type_token(tokens, tree); + eat_identifier_token(tokens, tree); + + loop { + match tokens.peek().unwrap() { + Symbol(s) if s == "," => { + eat_specific_symbol_token(tokens, ",", tree); + eat_identifier_token(tokens, tree); + } + _ => break, + } + } + + eat_specific_symbol_token(tokens, ";", tree); +} + +fn compile_statements(tokens: &mut Tokens, tree: &mut Element) { + loop { + match tokens.peek().unwrap() { + Keyword(s) if s == "do" => { + let mut child_tree = Element::new("doStatement"); + compile_do(tokens, &mut child_tree); + tree.children.push(xmltree::XMLNode::Element(child_tree)); + } + Keyword(s) if s == "let" => { + let mut child_tree = Element::new("letStatement"); + compile_let(tokens, &mut child_tree); + tree.children.push(xmltree::XMLNode::Element(child_tree)); + } + Keyword(s) if s == "while" => { + let mut child_tree = Element::new("whileStatement"); + compile_while(tokens, &mut child_tree); + tree.children.push(xmltree::XMLNode::Element(child_tree)); + } + Keyword(s) if s == "return" => { + let mut child_tree = Element::new("returnStatement"); + compile_return(tokens, &mut child_tree); + tree.children.push(xmltree::XMLNode::Element(child_tree)); + } + Keyword(s) if s == "if" => { + let mut child_tree = Element::new("ifStatement"); + compile_if(tokens, &mut child_tree); + tree.children.push(xmltree::XMLNode::Element(child_tree)); + } + _ => break, + } + } +} + +fn compile_do(tokens: &mut Tokens, mut tree: &mut Element) { + eat_specific_keyword_token(tokens, "do", tree); + eat_identifier_token(tokens, &mut tree); + compile_subroutine_call(tokens, &mut tree); + eat_specific_symbol_token(tokens, ";", tree); +} + +fn compile_let(tokens: &mut Tokens, tree: &mut Element) { + eat_specific_keyword_token(tokens, "let", tree); + eat_identifier_token(tokens, tree); + + match tokens.peek().unwrap() { + Symbol(s) if s == "[" => { + eat_specific_symbol_token(tokens, "[", tree); + let mut child_tree = Element::new("expression"); + compile_expression(tokens, &mut child_tree); + tree.children.push(xmltree::XMLNode::Element(child_tree)); + eat_specific_symbol_token(tokens, "]", tree); + } + _ => (), + } + eat_specific_symbol_token(tokens, "=", tree); + let mut child_tree = Element::new("expression"); + compile_expression(tokens, &mut child_tree); + tree.children.push(xmltree::XMLNode::Element(child_tree)); + eat_specific_symbol_token(tokens, ";", tree); +} + +fn compile_while(tokens: &mut Tokens, tree: &mut Element) { + eat_specific_keyword_token(tokens, "while", tree); + eat_specific_symbol_token(tokens, "(", tree); + let mut child_tree = Element::new("expression"); + compile_expression(tokens, &mut child_tree); + tree.children.push(xmltree::XMLNode::Element(child_tree)); + eat_specific_symbol_token(tokens, ")", tree); + + eat_specific_symbol_token(tokens, "{", tree); + let mut child_tree = Element::new("statements"); + compile_statements(tokens, &mut child_tree); + tree.children.push(xmltree::XMLNode::Element(child_tree)); + eat_specific_symbol_token(tokens, "}", tree); +} + +fn compile_return(tokens: &mut Tokens, tree: &mut Element) { + eat_specific_keyword_token(tokens, "return", tree); + match tokens.peek().unwrap() { + Symbol(s) if s == ";" => (), + _ => { + let mut child_tree = Element::new("expression"); + compile_expression(tokens, &mut child_tree); + tree.children.push(xmltree::XMLNode::Element(child_tree)); + } + } + eat_specific_symbol_token(tokens, ";", tree); +} + +fn compile_if(tokens: &mut Tokens, tree: &mut Element) { + eat_specific_keyword_token(tokens, "if", tree); + eat_specific_symbol_token(tokens, "(", tree); + + let mut child_tree = Element::new("expression"); + compile_expression(tokens, &mut child_tree); + tree.children.push(xmltree::XMLNode::Element(child_tree)); + + eat_specific_symbol_token(tokens, ")", tree); + + eat_specific_symbol_token(tokens, "{", tree); + + let mut child_tree = Element::new("statements"); + compile_statements(tokens, &mut child_tree); + tree.children.push(xmltree::XMLNode::Element(child_tree)); + + eat_specific_symbol_token(tokens, "}", tree); + + match tokens.peek().unwrap() { + Keyword(s) if s == "else" => { + eat_specific_keyword_token(tokens, "else", tree); + eat_specific_symbol_token(tokens, "{", tree); + + let mut child_tree = Element::new("statements"); + compile_statements(tokens, &mut child_tree); + tree.children.push(xmltree::XMLNode::Element(child_tree)); + + eat_specific_symbol_token(tokens, "}", tree); + } + _ => (), + } +} + +fn compile_expression(tokens: &mut Tokens, tree: &mut Element) { + let mut child_tree = Element::new("term"); + compile_term(tokens, &mut child_tree); + tree.children.push(xmltree::XMLNode::Element(child_tree)); + + loop { + match tokens.peek().unwrap() { + Symbol(s) if OPERATORS.contains(&s.chars().next().unwrap()) => { + eat_specific_symbol_token(tokens, s, tree); + let mut child_tree = Element::new("term"); + compile_term(tokens, &mut child_tree); + tree.children.push(xmltree::XMLNode::Element(child_tree)); + } + _ => break, + } + } +} + +fn compile_term(tokens: &mut Tokens, mut tree: &mut Element) { + let token = tokens.next().unwrap(); + match token { + Identifier(_) => match tokens.peek().unwrap() { + Symbol(s) if s == "[" => { + tree.children.push(create_node(token)); + eat_specific_symbol_token(tokens, "[", tree); + let mut child_tree = Element::new("expression"); + compile_expression(tokens, &mut child_tree); + tree.children.push(xmltree::XMLNode::Element(child_tree)); + eat_specific_symbol_token(tokens, "]", tree); + } + Symbol(s) if s == "(" || s == "." => { + // let mut child_tree = Element::new("subroutineCall"); + tree.children.push(create_node(token)); + compile_subroutine_call(tokens, &mut tree); + // tree.children.push(xmltree::XMLNode::Element(child_tree)); + } + _ => tree.children.push(create_node(token)), + }, + IntConst(_) => tree.children.push(create_node(token)), + StringConst(_) => tree.children.push(create_node(token)), + Keyword(s) if s == "true" || s == "false" || s == "null" || s == "this" => { + tree.children.push(create_node(token)); + } + Symbol(s) if s == "-" || s == "~" => { + tree.children.push(create_node(token)); + let mut child_tree = Element::new("term"); + compile_term(tokens, &mut child_tree); + tree.children.push(xmltree::XMLNode::Element(child_tree)); + } + Symbol(s) if s == "(" => { + tree.children.push(create_node(token)); + let mut child_tree = Element::new("expression"); + compile_expression(tokens, &mut child_tree); + tree.children.push(xmltree::XMLNode::Element(child_tree)); + eat_specific_symbol_token(tokens, ")", tree); + } + + _ => (), + } +} + +fn compile_expression_list(tokens: &mut Tokens, tree: &mut Element) { + match tokens.peek().unwrap() { + Symbol(s) if s == ")" => return, + _ => (), + } + + let mut child_tree = Element::new("expression"); + compile_expression(tokens, &mut child_tree); + tree.children.push(xmltree::XMLNode::Element(child_tree)); + + loop { + match tokens.peek().unwrap() { + Symbol(s) if s == "," => { + eat_specific_symbol_token(tokens, ",", tree); + let mut child_tree = Element::new("expression"); + compile_expression(tokens, &mut child_tree); + tree.children.push(xmltree::XMLNode::Element(child_tree)); + } + _ => break, + } + } +} + +fn compile_subroutine_call(tokens: &mut Tokens, mut tree: &mut Element) { + match tokens.peek().unwrap() { + Symbol(s) if s == "." => { + eat_specific_symbol_token(tokens, ".", &mut tree); + eat_identifier_token(tokens, &mut tree); + eat_specific_symbol_token(tokens, "(", &mut tree); + let mut child_tree = Element::new("expressionList"); + compile_expression_list(tokens, &mut child_tree); + tree.children.push(xmltree::XMLNode::Element(child_tree)); + eat_specific_symbol_token(tokens, ")", &mut tree); + } + Symbol(s) if s == "(" => { + eat_specific_symbol_token(tokens, "(", &mut tree); + let mut child_tree = Element::new("expressionList"); + compile_expression_list(tokens, &mut child_tree); + tree.children.push(xmltree::XMLNode::Element(child_tree)); + eat_specific_symbol_token(tokens, ")", &mut tree); + } + _ => (), + } +} + +const OPERATORS: &[char] = &['+', '-', '*', '/', '&', '|', '<', '>', '=']; diff --git a/jack_analyzer/src/tokenizer.rs b/jack_analyzer/src/tokenizer.rs new file mode 100644 index 0000000..1921fac --- /dev/null +++ b/jack_analyzer/src/tokenizer.rs @@ -0,0 +1,177 @@ +use std::fs; +use std::path::Path; + +fn eat_comment(chars: &Vec, index: usize) -> usize { + let start_index = index; + let mut index = index; + if chars[index] == '/' && chars[index + 1] == '/' { + index += 2; + while chars[index] != '\n' { + index += 1; + } + index += 1; + } else if chars[index] == '/' && chars[index + 1] == '*' { + index += 2; + while !(chars[index] == '*' && chars[index + 1] == '/') { + index += 1; + } + index += 2; + } + + if start_index != index { + // print_vector_slice(chars, start_index, index); + return eat_comment(chars, index); + } + return index; +} + +fn parse_symbol(chars: &Vec, tokens: &mut Vec, index: usize) -> usize { + let mut index = index; + let c = chars[index]; + if SYMBOLS.contains(&c) { + index += 1; + let t = Token::Symbol(c.to_string()); + tokens.push(t); + } + return index; +} + +fn parse_integer_constant(chars: &Vec, tokens: &mut Vec, index: usize) -> usize { + let start_index = index; + let mut index = index; + let mut number = String::new(); + + while chars[index].is_ascii_digit() { + number.push(chars[index]); + index += 1; + } + + if start_index == index { + return index; + } + + let t = Token::IntConst(number); + tokens.push(t); + + return index; +} + +fn parse_string_constant(chars: &Vec, tokens: &mut Vec, index: usize) -> usize { + let start_index = index; + let mut index = index; + + if chars[index] != '"' { + return index; + } + index += 1; + + while chars[index] != '"' { + index += 1; + } + index += 1; + + let s = chars[start_index + 1..index - 1].into_iter().collect(); + let t = Token::StringConst(s); + tokens.push(t); + + return index; +} + +fn parse_keyword_or_identifier(chars: &Vec, tokens: &mut Vec, index: usize) -> usize { + let mut index = index; + let mut token_string = String::new(); + + if !chars[index].is_ascii_alphabetic() { + return index; + } + token_string.push(chars[index]); + index += 1; + + while chars[index].is_alphanumeric() { + token_string.push(chars[index]); + index += 1; + } + + if KEYWORDS.contains(&token_string.as_str()) { + let t = Token::Keyword(token_string); + tokens.push(t); + } else { + let t = Token::Identifier(token_string); + tokens.push(t); + } + + return index; +} + +pub fn tokenize_file(file: &Path) -> Vec { + let mut tokens = vec![]; + let chars: Vec = fs::read_to_string(file).unwrap().chars().collect(); + let length = chars.len(); + let mut index: usize = 0; + + while index < length { + index = eat_comment(&chars, index); + let c = chars[index]; + + if c.is_whitespace() { + index += 1; + } else if SYMBOLS.contains(&c) { + index = parse_symbol(&chars, &mut tokens, index); + } else if c.is_ascii_alphabetic() { + index = parse_keyword_or_identifier(&chars, &mut tokens, index); + } else if c.is_ascii_digit() { + index = parse_integer_constant(&chars, &mut tokens, index); + } else if c == '"' { + index = parse_string_constant(&chars, &mut tokens, index); + } else { + println!("Unexpected char {:?}", c); + index += 1; + } + } + + return tokens; +} + +#[derive(Debug)] +pub enum Token { + Keyword(String), + Symbol(String), + Identifier(String), + IntConst(String), + StringConst(String), +} + +#[allow(dead_code)] +const KEYWORDS: &[&str] = &[ + "class", + "constructor", + "function", + "method", + "field", + "static", + "var", + "int", + "char", + "boolean", + "void", + "true", + "false", + "null", + "this", + "let", + "do", + "if", + "else", + "while", + "return", +]; + +const SYMBOLS: &[char] = &[ + '{', '}', '(', ')', '[', ']', '.', ',', ';', '+', '-', '*', '/', '&', '|', '<', '>', '=', '~', +]; + +#[allow(dead_code)] +fn print_vector_slice(chars: &Vec, start: usize, stop: usize) { + let s: String = chars[start..stop].into_iter().collect(); + println!("{:?}", s); +} diff --git a/jack_compiler/Cargo.toml b/jack_compiler/Cargo.toml new file mode 100644 index 0000000..33372d1 --- /dev/null +++ b/jack_compiler/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "jack_compiler" +version = "0.1.0" +authors = ["Felix Martin "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] \ No newline at end of file diff --git a/jack_compiler/src/code_writer.rs b/jack_compiler/src/code_writer.rs new file mode 100644 index 0000000..be9c696 --- /dev/null +++ b/jack_compiler/src/code_writer.rs @@ -0,0 +1,178 @@ +use crate::symbol_table::get_empty_symbol_table; +use crate::symbol_table::SymbolTable; +use crate::tokenizer::Keyword; +use crate::tokenizer::Symbol; +use crate::tokenizer::Token; +use std::fs; + +#[derive(Debug)] +pub struct Writer { + pub table: SymbolTable, + pub code: String, + pub class_name: String, + pub subroutine_name: String, + pub in_constructor: bool, + pub in_method: bool, + if_label_count: usize, + while_label_count: usize, +} + +pub fn get_code_writer() -> Writer { + Writer { + table: get_empty_symbol_table(), + code: String::new(), + class_name: String::new(), + subroutine_name: String::new(), + in_constructor: false, + in_method: false, + if_label_count: 0, + while_label_count: 0, + } +} + +fn segment_to_string(segment: Keyword) -> String { + use crate::tokenizer::Keyword::*; + match segment { + Constant => String::from("constant"), + Argument => String::from("argument"), + Local => String::from("local"), + Temp => String::from("temp"), + Field => String::from("this"), + Static => String::from("static"), + That => String::from("that"), + Pointer => String::from("pointer"), + _ => panic!("Unexpected segment {:?}", segment), + } +} + +impl Writer { + pub fn write_to_file(&self, file: &String) { + fs::write(file, self.code.to_string()).expect("Unable to write file"); + } + + pub fn start_subroutine(&mut self) { + self.table.remove_subroutine_symbols(); + self.in_constructor = false; + self.in_method = false; + self.if_label_count = 0; + self.while_label_count = 0; + } + + pub fn start_class(&mut self) { + self.table = get_empty_symbol_table(); + } + + pub fn define_symbol(&mut self, name: String, symbol_type: Token, kind: Keyword) { + self.table.define_symbol(name, symbol_type, kind); + } + + pub fn write_push(&mut self, segment: Keyword, index: usize) { + let segment = segment_to_string(segment); + let s = format!("push {} {}\n", segment, index); + self.code.push_str(&s); + } + + pub fn write_pop(&mut self, segment: Keyword, index: usize) { + let segment = segment_to_string(segment); + let s = format!("pop {} {}\n", segment, index); + self.code.push_str(&s); + } + + pub fn write_arithmetic(&mut self, op: Symbol) { + use crate::tokenizer::Symbol::*; + let s = match op { + Plus => "add\n", + Minus => "sub\n", + Mul => "call Math.multiply 2\n", + Div => "call Math.divide 2\n", + ExclusiveAnd => "and\n", + ExclusiveOr => "or\n", + Smaller => "lt\n", + Greater => "gt\n", + Equal => "eq\n", + UnaryMinus => "neg\n", + Not => "not\n", + _ => panic!("Unsupported operator {:?}.", op), + }; + self.code.push_str(&s); + } + + pub fn write_function(&mut self) { + let n_locals = self.table.get_count(Keyword::Local); + let s = format!( + "function {}.{} {}\n", + self.class_name, self.subroutine_name, n_locals + ); + self.code.push_str(&s); + + if self.in_constructor { + // Allocate class memory and initialize This. + let n_fields = self.table.get_count(Keyword::Field); + self.write_push(Keyword::Constant, n_fields); + self.write_call(&"Memory".to_string(), &"alloc".to_string(), 1); + self.write_pop(Keyword::Pointer, 0); + } else if self.in_method { + self.write_push(Keyword::Argument, 0); + self.write_pop(Keyword::Pointer, 0); + } + } + + pub fn write_call(&mut self, class_name: &String, subroutine_name: &String, n_args: usize) { + let s = format!("call {}.{} {}\n", class_name, subroutine_name, n_args); + self.code.push_str(&s); + } + + pub fn write_return(&mut self) { + self.code.push_str("return\n"); + } + + pub fn write_var_assignment(&mut self, var_name: &String) { + let index = self.table.index_of(var_name); + let symbol_type = self.table.kind_of(var_name); + // if symbol_type == Keyword::Static { + // println!("{:?}", self.table); + // panic!("assignment to static not supported, yet"); + // } + self.write_pop(symbol_type, index); + } + + pub fn write_var_read(&mut self, var_name: &String) { + let index = self.table.index_of(var_name); + let symbol_type = self.table.kind_of(var_name); + // if symbol_type == Keyword::Static { + // panic!("read from static not supported, yet"); + // } + self.write_push(symbol_type, index); + } + + pub fn write_label(&mut self, label_name: &String) { + let s = format!("label {}\n", label_name); + self.code.push_str(&s); + } + + pub fn write_if_goto(&mut self, label_name: &String) { + let s = format!("if-goto {}\n", label_name); + self.code.push_str(&s); + } + + pub fn write_goto(&mut self, label_name: &String) { + let s = format!("goto {}\n", label_name); + self.code.push_str(&s); + } + + pub fn get_if_labels(&mut self) -> (String, String, String) { + let l1 = format!("IF_FALSE{}", self.if_label_count); + let l2 = format!("IF_TRUE{}", self.if_label_count); + let l3 = format!("IF_END{}", self.if_label_count); + self.if_label_count += 1; + return (l1, l2, l3); + } + + pub fn get_while_labels(&mut self) -> (String, String, String) { + let l1 = format!("WHILE_EXP{}", self.while_label_count); + let l2 = format!("WHILE_START{}", self.while_label_count); + let l3 = format!("WHILE_END{}", self.while_label_count); + self.while_label_count += 1; + return (l1, l2, l3); + } +} diff --git a/jack_compiler/src/main.rs b/jack_compiler/src/main.rs new file mode 100644 index 0000000..c0688a7 --- /dev/null +++ b/jack_compiler/src/main.rs @@ -0,0 +1,53 @@ +mod parser; +mod tokenizer; +mod symbol_table; +mod code_writer; +use std::env; +use std::fs; +use std::path::Path; +use std::ffi::OsStr; +use crate::code_writer::get_code_writer; + + +fn main() { + fn is_jack_file(filename: &Path) -> bool { + let p = Path::new(filename); + if p.is_file() && (p.extension().unwrap() == OsStr::new("jack")) { + return true; + } + return false; + } + + fn translate_dir(directory: &Path) { + let paths = fs::read_dir(directory).unwrap(); + for path in paths { + let filename = path.unwrap().path(); + if is_jack_file(&filename) { + translate_single_file(filename.as_path()) + } + } + } + + fn translate_single_file(input_file: &Path) { + let mut tokens = tokenizer::tokenize_file(input_file); + let mut writer = get_code_writer(); + println!("Compiling {:?}", input_file); + parser::compile_class(&mut tokens, &mut writer); + let output_file = str::replace(input_file.to_str().unwrap(), ".jack", ".vm"); + writer.write_to_file(&output_file); + } + + let args: Vec = env::args().collect(); + + for arg in &args[1..] { + let arg_path = Path::new(arg); + println!("{:?}", arg_path); + if is_jack_file(&arg_path) { + translate_single_file(&arg_path); + } else if arg_path.is_dir() { + translate_dir(&arg_path); + } else { + println!("{} is not a *.jack file or directory!", arg); + } + } +} diff --git a/jack_compiler/src/parser.rs b/jack_compiler/src/parser.rs new file mode 100644 index 0000000..0e99468 --- /dev/null +++ b/jack_compiler/src/parser.rs @@ -0,0 +1,348 @@ +use crate::code_writer::Writer; +use crate::tokenizer::identifier; +use crate::tokenizer::int_const; +use crate::tokenizer::string_const; +use crate::tokenizer::Keyword::*; +use crate::tokenizer::Symbol::*; +use crate::tokenizer::Token::{Keyword, Symbol}; +use crate::tokenizer::Tokens; + +pub fn compile_class(tokens: &mut Tokens, writer: &mut Writer) { + writer.start_class(); + tokens.eat(Keyword(Class)); + let class_name = tokens.eat(identifier()).to_string(); + writer.class_name = class_name.to_string(); + tokens.eat(Symbol(LCurly)); + + while tokens.is_one_of(vec![Keyword(Static), Keyword(Field)]) { + compile_class_var_dec(tokens, writer); + } + + while tokens.is_one_of(vec![ + Keyword(Constructor), + Keyword(Function), + Keyword(Method), + ]) { + compile_subroutine(tokens, writer); + } + + tokens.eat(Symbol(RCurly)); +} + +fn compile_class_var_dec(tokens: &mut Tokens, writer: &mut Writer) { + let kind = tokens.eat(Keyword(AnyKeyword)).to_keyword(); + let symbol_type = tokens.eat_one_of(vec![Keyword(AnyKeyword), identifier()]); + let name = tokens.eat(identifier()).to_string(); + writer.define_symbol(name, symbol_type.clone(), kind); + + while tokens.is_sequence(vec![Symbol(Comma), identifier()]) { + tokens.eat(Symbol(Comma)); + let name = tokens.eat(identifier()).to_string(); + writer.define_symbol(name, symbol_type.clone(), kind); + } + + tokens.eat(Symbol(Semicolon)); +} + +fn compile_subroutine(tokens: &mut Tokens, writer: &mut Writer) { + writer.start_subroutine(); + + let routine_keyword = tokens.eat(Keyword(AnyKeyword)).to_keyword(); + let symbol_type = tokens.eat_one_of(vec![Keyword(AnyKeyword), identifier()]); + + match routine_keyword { + Method => { + writer.in_method = true; + writer.define_symbol("this".to_string(), symbol_type.clone(), Argument); + }, + Function => (), + Constructor => { writer.in_constructor = true; }, + s => panic!("Unsupported routine type {:?}", s), + } + + writer.subroutine_name = tokens.eat(identifier()).to_string(); + compile_parameter_list(tokens, writer); + compile_subroutine_body(tokens, writer); +} + +fn compile_parameter_list(tokens: &mut Tokens, writer: &mut Writer) { + tokens.eat(Symbol(LBrace)); + if tokens.is_one_of(vec![Keyword(AnyKeyword), identifier()]) { + let symbol_type = tokens.eat_one_of(vec![Keyword(AnyKeyword), identifier()]); + let arg_name = tokens.eat(identifier()).to_string(); + writer.define_symbol(arg_name, symbol_type.clone(), Argument); + + while tokens.is(Symbol(Comma)) { + tokens.eat(Symbol(Comma)); + let symbol_type = tokens.eat_one_of(vec![Keyword(AnyKeyword), identifier()]); + let arg_name = tokens.eat(identifier()).to_string(); + writer.define_symbol(arg_name, symbol_type.clone(), Argument); + } + } + tokens.eat(Symbol(RBrace)); +} + +fn compile_subroutine_body(tokens: &mut Tokens, writer: &mut Writer) { + tokens.eat(Symbol(LCurly)); + + while tokens.is(Keyword(Var)) { + compile_var_dec(tokens, writer); + } + + writer.write_function(); + compile_statements(tokens, writer); + tokens.eat(Symbol(RCurly)); +} + +fn compile_var_dec(tokens: &mut Tokens, writer: &mut Writer) { + tokens.eat(Keyword(Var)); + let symbol_type = tokens.eat_one_of(vec![Keyword(AnyKeyword), identifier()]); + let var_name = tokens.eat(identifier()).to_string(); + writer.define_symbol(var_name, symbol_type.clone(), Local); + + while tokens.is(Symbol(Comma)) { + tokens.eat(Symbol(Comma)); + let var_name = tokens.eat(identifier()).to_string(); + writer.define_symbol(var_name, symbol_type.clone(), Local); + } + + tokens.eat(Symbol(Semicolon)); +} + +fn compile_statements(tokens: &mut Tokens, writer: &mut Writer) { + loop { + if tokens.is(Keyword(Let)) { + compile_let(tokens, writer); + } else if tokens.is(Keyword(If)) { + compile_if(tokens, writer); + } else if tokens.is(Keyword(While)) { + compile_while(tokens, writer); + } else if tokens.is(Keyword(Do)) { + compile_do(tokens, writer); + } else if tokens.is(Keyword(Return)) { + compile_return(tokens, writer); + } else { + break; + } + } +} + +fn compile_do(tokens: &mut Tokens, writer: &mut Writer) { + tokens.eat(Keyword(Do)); + compile_subroutine_call(tokens, writer); + writer.write_pop(Temp, 0); + tokens.eat(Symbol(Semicolon)); +} + +fn compile_let(tokens: &mut Tokens, writer: &mut Writer) { + tokens.eat(Keyword(Let)); + let var_name = tokens.eat(identifier()).to_string(); + + if tokens.is(Symbol(Equal)) { + tokens.eat(Symbol(Equal)); + compile_expression(tokens, writer); + writer.write_var_assignment(&var_name); + } else if tokens.is(Symbol(LSquare)) { + tokens.eat(Symbol(LSquare)); + writer.write_var_read(&var_name); + compile_expression(tokens, writer); + tokens.eat(Symbol(RSquare)); + writer.write_arithmetic(Plus); + // Address of array access is now on stack + tokens.eat(Symbol(Equal)); + compile_expression(tokens, writer); + // Value to assign to array is on stack + writer.write_pop(Temp, 0); // Buffer value to assign + writer.write_pop(Pointer, 1); // Set That to access address + writer.write_push(Temp, 0); // Restore value to assign + writer.write_pop(That, 0); // Do actual assignment + } + + tokens.eat(Symbol(Semicolon)); +} + +fn compile_while(tokens: &mut Tokens, writer: &mut Writer) { + tokens.eat(Keyword(While)); + let (l_while_exp, l_while_start, l_while_end) = writer.get_while_labels(); + + writer.write_label(&l_while_exp); + tokens.eat(Symbol(LBrace)); + compile_expression(tokens, writer); + writer.write_if_goto(&l_while_start); + writer.write_goto(&l_while_end); + tokens.eat(Symbol(RBrace)); + + tokens.eat(Symbol(LCurly)); + writer.write_label(&l_while_start); + compile_statements(tokens, writer); + writer.write_goto(&l_while_exp); + tokens.eat(Symbol(RCurly)); + + writer.write_label(&l_while_end); +} + +fn compile_return(tokens: &mut Tokens, writer: &mut Writer) { + tokens.eat(Keyword(Return)); + + if !tokens.is(Symbol(Semicolon)) { + compile_expression(tokens, writer); + } else { + writer.write_push(Constant, 0); + } + writer.write_return(); + tokens.eat(Symbol(Semicolon)); +} + +fn compile_if(tokens: &mut Tokens, writer: &mut Writer) { + let (l_false, l_true, l_end) = writer.get_if_labels(); + tokens.eat(Keyword(If)); + tokens.eat(Symbol(LBrace)); + compile_expression(tokens, writer); + tokens.eat(Symbol(RBrace)); + + writer.write_if_goto(&l_true); + writer.write_goto(&l_false); + + tokens.eat(Symbol(LCurly)); + writer.write_label(&l_true); + compile_statements(tokens, writer); + tokens.eat(Symbol(RCurly)); + + if tokens.is(Keyword(Else)) { + writer.write_goto(&l_end); + writer.write_label(&l_false); + tokens.eat(Keyword(Else)); + tokens.eat(Symbol(LCurly)); + compile_statements(tokens, writer); + tokens.eat(Symbol(RCurly)); + writer.write_label(&l_end); + } else { + writer.write_label(&l_false); + } + +} + +fn compile_expression(tokens: &mut Tokens, writer: &mut Writer) { + compile_term(tokens, writer); + + while tokens.is_one_of(vec![ + Symbol(Plus), + Symbol(Minus), + Symbol(Mul), + Symbol(Div), + Symbol(ExclusiveAnd), + Symbol(ExclusiveOr), + Symbol(Smaller), + Symbol(Greater), + Symbol(Equal), + ]) { + let s = tokens.eat(Symbol(AnySymbol)).to_symbol(); + compile_term(tokens, writer); + writer.write_arithmetic(s); + } +} + +fn compile_term(tokens: &mut Tokens, writer: &mut Writer) { + if tokens.is(int_const()) { + // integerConstant + let i = tokens.eat(int_const()).to_int(); + writer.write_push(Constant, i); + } else if tokens.is(string_const()) { + let s = tokens.eat(string_const()).to_string(); + let bytes = s.as_bytes(); + writer.write_push(Constant, bytes.len()); + writer.write_call(&"String".to_string(), &"new".to_string(), 1); + for b in bytes { + writer.write_push(Constant, (*b).into()); + writer.write_call(&"String".to_string(), &"appendChar".to_string(), 2); + } + } else if tokens.is(Keyword(AnyKeyword)) { + // keywordConstant + let keyword = tokens.eat(Keyword(AnyKeyword)).to_keyword(); + match keyword { + True => { + writer.write_push(Constant, 0); + writer.write_arithmetic(Not); + } + False => writer.write_push(Constant, 0), + Null => writer.write_push(Constant, 0), + This => writer.write_push(Pointer, 0), + _ => panic!("Unexpected keyword {:?}", keyword), + } + } else if tokens.is_sequence(vec![identifier(), Symbol(LSquare)]) { + // arrayName + let var_name = tokens.eat(identifier()).to_string(); + tokens.eat(Symbol(LSquare)); + writer.write_var_read(&var_name); + compile_expression(tokens, writer); + tokens.eat(Symbol(RSquare)); + writer.write_arithmetic(Plus); // Address of array access is now on stack + writer.write_pop(Pointer, 1); // Set That to address + writer.write_push(That, 0); // Push value from array onto stack + } else if tokens.is_sequence(vec![identifier(), Symbol(LBrace)]) { + // subroutineCall foo() + compile_subroutine_call(tokens, writer); + } else if tokens.is_sequence(vec![identifier(), Symbol(Dot)]) { + // subroutineCall foo.something + compile_subroutine_call(tokens, writer); + } else if tokens.is(Symbol(LBrace)) { + // ( expression ) + tokens.eat(Symbol(LBrace)); + compile_expression(tokens, writer); + tokens.eat(Symbol(RBrace)); + } else if tokens.is_one_of(vec![Symbol(Minus), Symbol(Not)]) { + // unaryOp term + let symbol = tokens.eat(Symbol(AnySymbol)).to_symbol(); + compile_term(tokens, writer); + if symbol == Minus { + writer.write_arithmetic(UnaryMinus); + } else { + writer.write_arithmetic(Not); + } + } else if tokens.is(identifier()) { + // varName + let var_name = tokens.eat(identifier()).to_string(); + writer.write_var_read(&var_name); + } else { + panic!("Unexpected token {:?} for compile_term", tokens.peek()); + } +} + +fn compile_subroutine_call(tokens: &mut Tokens, writer: &mut Writer) { + let mut class_name = String::new(); + let mut subroutine_name = String::new(); + let mut n_args: usize = 0; + + if tokens.is_sequence(vec![identifier(), Symbol(LBrace)]) { + // method call for 'this' + class_name = writer.class_name.to_string(); + writer.write_push(Pointer, 0); // Push This + n_args += 1; + subroutine_name = tokens.eat(identifier()).to_string(); + } else if tokens.is_sequence(vec![identifier(), Symbol(Dot), identifier()]) { + class_name = tokens.eat(identifier()).to_string(); + tokens.eat(Symbol(Dot)); + subroutine_name = tokens.eat(identifier()).to_string(); + if writer.table.has_symbol(&class_name) { + // method call for identifier 'class_name' + let index = writer.table.index_of(&class_name); + let symbol_type = writer.table.kind_of(&class_name); + class_name = writer.table.get_token(&class_name).to_string(); + writer.write_push(symbol_type, index); // Push class object + n_args += 1; + } else { // symbol not in table means function call + } + } + + tokens.eat(Symbol(LBrace)); + while !tokens.is(Symbol(RBrace)) { + n_args += 1; + compile_expression(tokens, writer); + if tokens.is(Symbol(Comma)) { + tokens.eat(Symbol(Comma)); + } + } + + writer.write_call(&class_name, &subroutine_name, n_args); + tokens.eat(Symbol(RBrace)); +} diff --git a/jack_compiler/src/symbol_table.rs b/jack_compiler/src/symbol_table.rs new file mode 100644 index 0000000..d9d13ae --- /dev/null +++ b/jack_compiler/src/symbol_table.rs @@ -0,0 +1,93 @@ +use std::collections::HashMap; +use crate::tokenizer::Keyword; +use crate::tokenizer::Token; + +#[derive(Debug)] +struct Symbol { + name: String, + symbol_type: Token, + kind: Keyword, + index: usize, +} + +#[derive(Debug)] +pub struct SymbolTable { + count: HashMap, + fields: HashMap, +} + +pub fn get_empty_symbol_table() -> SymbolTable { + let mut count = HashMap::new(); + count.insert(Keyword::Static, 0); + count.insert(Keyword::Field, 0); + count.insert(Keyword::Argument, 0); + count.insert(Keyword::Local, 0); + + SymbolTable { + count: count, + fields: HashMap::new(), + } +} + +impl SymbolTable { + + pub fn kind_of(&self, name: &String) -> Keyword { + match self.fields.get(name) { + Some(symbol) => symbol.kind, + None => panic!("Symbol {} does not exist", name), + } + } + + pub fn index_of(&self, name: &String) -> usize { + match self.fields.get(name) { + Some(s) => s.index, + None => panic!("Symbol {} does not exist", name), + } + } + + pub fn get_token(&self, name: &String) -> Token { + match self.fields.get(name) { + Some(s) => s.symbol_type.clone(), + None => panic!("Symbol {} does not exist", name), + } + } + + pub fn get_count(&self, symbol_kind: Keyword) -> usize { + match self.count.get(&symbol_kind) { + Some(s) => *s, + None => 0, + } + } + + pub fn has_symbol(&self, name: &String) -> bool { + self.fields.contains_key(name) + } + + pub fn remove_subroutine_symbols(&mut self) { + let mut to_remove: Vec = vec![]; + for (key, symbol) in self.fields.iter() { + match symbol.kind { + Keyword::Argument => to_remove.push(key.to_string()), + Keyword::Local => to_remove.push(key.to_string()), + _ => (), + } + } + for key in to_remove { + self.fields.remove(&key); + } + self.count.insert(Keyword::Argument, 0); + self.count.insert(Keyword::Local, 0); + } + + pub fn define_symbol(&mut self, name: String, symbol_type: Token, kind: Keyword) { + let index: usize = *self.count.get(&kind).unwrap(); + let s = Symbol { + name: name.to_string(), + symbol_type: symbol_type, + kind: kind, + index: index, + }; + self.count.insert(kind, index + 1); + self.fields.insert(name, s); + } +} diff --git a/jack_compiler/src/tokenizer.rs b/jack_compiler/src/tokenizer.rs new file mode 100644 index 0000000..e957188 --- /dev/null +++ b/jack_compiler/src/tokenizer.rs @@ -0,0 +1,400 @@ +use crate::tokenizer; +use std::fs; +use std::path::Path; + +#[derive(Debug, Clone)] +pub enum Token { + Keyword(Keyword), + Symbol(Symbol), + Identifier(String), + IntConst(usize), + StringConst(String), +} + +#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)] +pub enum Keyword { + Class, + Constructor, + Function, + Method, + Field, + Var, + Int, + Char, + Boolean, + Constant, + Argument, + Local, + Static, + That, + Pointer, + Temp, + Void, + True, + False, + Null, + This, + Let, + Do, + If, + Else, + While, + Return, + AnyKeyword, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Symbol { + LCurly, + RCurly, + LBrace, + RBrace, + LSquare, + RSquare, + Dot, + Comma, + Semicolon, + Plus, + Minus, + Mul, + Div, + ExclusiveAnd, + ExclusiveOr, + Smaller, + Greater, + Equal, + UnaryMinus, + Not, + AnySymbol, +} + +#[derive(Debug)] +pub struct Tokens { + pub tokens: Vec, + index: usize, +} + +pub fn identifier() -> Token { + Token::Identifier(String::new()) +} + +pub fn int_const() -> Token { + Token::IntConst(0) +} + +pub fn string_const() -> Token { + Token::StringConst(String::new()) +} + +pub fn equal(t1: &Token, t2: &Token) -> bool { + match t1 { + Token::Keyword(k1) => match t2 { + Token::Keyword(k2) if k1 == k2 => true, + Token::Keyword(_) if k1 == &Keyword::AnyKeyword => true, + _ => false, + }, + Token::Symbol(s1) => match t2 { + Token::Symbol(s2) if s1 == s2 => true, + Token::Symbol(_) if s1 == &Symbol::AnySymbol => true, + _ => false, + }, + Token::Identifier(_) => match t2 { + Token::Identifier(_) => true, + _ => false, + }, + Token::IntConst(_) => match t2 { + Token::IntConst(_) => true, + _ => false, + }, + Token::StringConst(_) => match t2 { + Token::StringConst(_) => true, + _ => false, + } + } +} + +impl Token { + pub fn to_string(&self) -> String { + match self { + Token::Identifier(s) => s.to_string(), + Token::StringConst(s) => s.to_string(), + _ => panic!("Cannot convert {:?} to string.", self), + } + } + + pub fn to_keyword(&self) -> tokenizer::Keyword { + match self { + tokenizer::Token::Keyword(k) => k.clone(), + _ => tokenizer::Keyword::AnyKeyword, + } + } + + pub fn to_int(&self) -> usize { + match self { + tokenizer::Token::IntConst(i) => *i, + _ => panic!("Cannot convert {:?} to int.", self), + } + } + + pub fn to_symbol(&self) -> tokenizer::Symbol { + match self { + tokenizer::Token::Symbol(s) => s.clone(), + _ => panic!("Cannot convert {:?} to symbol.", self), + } + } +} + +impl Tokens { + pub fn peek(&self) -> tokenizer::Token { + let i = self.index; + let t = self.tokens.get(i).unwrap(); + t.clone() + } + + fn get_token(&self, index: usize) -> tokenizer::Token { + let t = self.tokens.get(index).unwrap(); + t.clone() + } + + pub fn eat(&mut self, expected_token: Token) -> tokenizer::Token { + let i = self.index; + let t = self.tokens.get(i).unwrap(); + self.index += 1; + if !equal(&expected_token, t) { + panic!{"Expected {:?} but got {:?}.", expected_token, t}; + } + t.clone() + } + + pub fn eat_one_of(&mut self, tokens: Vec) -> tokenizer::Token { + let t2 = self.get_token(self.index); + for t1 in &tokens { + if equal(&t1, &t2) { + self.index += 1; + return t2.clone(); + } + } + panic!{"Expected one of {:?} but got {:?}.", tokens, t2}; + } + + pub fn is(&self, expected_token: Token) -> bool { + let t = self.get_token(self.index); + if equal(&expected_token, &t) { + true + } else { + false + } + } + + pub fn is_one_of(&self, tokens: Vec) -> bool { + let t2 = self.get_token(self.index); + for t1 in tokens { + if equal(&t1, &t2) { + return true; + } + } + return false; + } + + pub fn is_sequence(&self, tokens: Vec) -> bool { + let mut index = self.index; + for t1 in tokens { + let t2 = self.get_token(index); + if !(equal(&t1, &t2)) { + return false; + } + index += 1; + } + true + } +} + +pub fn tokenize_file(file: &Path) -> Tokens { + let mut tokens = vec![]; + let chars: Vec = fs::read_to_string(file).unwrap().chars().collect(); + let length = chars.len(); + let mut index: usize = 0; + + while index < length { + index = eat_comment(&chars, index); + let c = chars[index]; + + if c.is_whitespace() { + index += 1; + } else if parse_symbol(&chars, &mut tokens, index) != index { + // if there is a symbol it has already been added to token list. + index += 1 + } else if c.is_ascii_alphabetic() { + index = parse_keyword_or_identifier(&chars, &mut tokens, index); + } else if c.is_ascii_digit() { + index = parse_integer_constant(&chars, &mut tokens, index); + } else if c == '"' { + index = parse_string_constant(&chars, &mut tokens, index); + } else { + println!("Unexpected char {:?}", c); + index += 1; + } + } + + let tokens = Tokens { + tokens: tokens, + index: 0, + }; + return tokens; +} + +fn eat_comment(chars: &Vec, index: usize) -> usize { + let start_index = index; + let mut index = index; + if chars[index] == '/' && chars[index + 1] == '/' { + index += 2; + while chars[index] != '\n' { + index += 1; + } + index += 1; + } else if chars[index] == '/' && chars[index + 1] == '*' { + index += 2; + while !(chars[index] == '*' && chars[index + 1] == '/') { + index += 1; + } + index += 2; + } + + if start_index != index { + // print_vector_slice(chars, start_index, index); + return eat_comment(chars, index); + } + return index; +} + +fn parse_symbol(chars: &Vec, tokens: &mut Vec, index: usize) -> usize { + let mut index = index; + let c = chars[index]; + + use Symbol::*; + let s = match c { + '{' => Some(LCurly), + '}' => Some(RCurly), + '(' => Some(LBrace), + ')' => Some(RBrace), + '[' => Some(LSquare), + ']' => Some(RSquare), + '.' => Some(Dot), + ',' => Some(Comma), + ';' => Some(Semicolon), + '+' => Some(Plus), + '-' => Some(Minus), + '*' => Some(Mul), + '/' => Some(Div), + '&' => Some(ExclusiveAnd), + '|' => Some(ExclusiveOr), + '<' => Some(Smaller), + '>' => Some(Greater), + '=' => Some(Equal), + '~' => Some(Not), + _ => None, + }; + + match s { + Some(s) => { + let t = Token::Symbol(s); + tokens.push(t); + index += 1; + } + None => (), + } + + return index; +} + +fn parse_integer_constant(chars: &Vec, tokens: &mut Vec, index: usize) -> usize { + let start_index = index; + let mut index = index; + let mut number = String::new(); + + while chars[index].is_ascii_digit() { + number.push(chars[index]); + index += 1; + } + + if start_index == index { + return index; + } + + let number: usize = number.parse::().unwrap(); + let t = Token::IntConst(number); + tokens.push(t); + + return index; +} + +fn parse_string_constant(chars: &Vec, tokens: &mut Vec, index: usize) -> usize { + let start_index = index; + let mut index = index; + + if chars[index] != '"' { + return index; + } + index += 1; + + while chars[index] != '"' { + index += 1; + } + index += 1; + + let s = chars[start_index + 1..index - 1].into_iter().collect(); + let t = Token::StringConst(s); + tokens.push(t); + + return index; +} + +fn parse_keyword_or_identifier(chars: &Vec, tokens: &mut Vec, index: usize) -> usize { + let mut index = index; + let mut token_string = String::new(); + + if !chars[index].is_ascii_alphabetic() { + return index; + } + token_string.push(chars[index]); + index += 1; + + while chars[index].is_alphanumeric() { + token_string.push(chars[index]); + index += 1; + } + + use Keyword::*; + let t = match token_string.as_str() { + "class" => Token::Keyword(Class), + "constructor" => Token::Keyword(Constructor), + "function" => Token::Keyword(Function), + "method" => Token::Keyword(Method), + "field" => Token::Keyword(Field), + "static" => Token::Keyword(Static), + "var" => Token::Keyword(Var), + "int" => Token::Keyword(Int), + "char" => Token::Keyword(Char), + "boolean" => Token::Keyword(Boolean), + "void" => Token::Keyword(Void), + "true" => Token::Keyword(True), + "false" => Token::Keyword(False), + "null" => Token::Keyword(Null), + "this" => Token::Keyword(This), + "let" => Token::Keyword(Let), + "do" => Token::Keyword(Do), + "if" => Token::Keyword(If), + "else" => Token::Keyword(Else), + "while" => Token::Keyword(While), + "return" => Token::Keyword(Return), + s => Token::Identifier(s.to_string()), + }; + tokens.push(t); + return index; +} + +#[allow(dead_code)] +fn print_vector_slice(chars: &Vec, start: usize, stop: usize) { + let s: String = chars[start..stop].into_iter().collect(); + println!("{:?}", s); +} diff --git a/vm_translator/Cargo.toml b/vm_translator/Cargo.toml new file mode 100644 index 0000000..b297334 --- /dev/null +++ b/vm_translator/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "vm_translator" +version = "0.1.0" +authors = ["Felix Martin "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/vm_translator/src/main.rs b/vm_translator/src/main.rs new file mode 100644 index 0000000..bc3afd9 --- /dev/null +++ b/vm_translator/src/main.rs @@ -0,0 +1,897 @@ +use std::env; +use std::fs; +use std::fs::File; +use std::io::Write; +use std::io::{self, BufRead}; +use std::path::Path; +use std::ffi::OsStr; + +enum Command { + Arithmetic(ArithmeticCommand), + MemoryAccess(MemoryAccessCommand), + ProgramFlow(ProgramFlowCommand), + FunctionCalling(FunctionCallingCommand), + Error(String), + NoCommand, +} + +enum ArithmeticCommand { + Add, + Sub, + Neg, + Eq, + Gt, + Lt, + And, + Or, + Not, +} + +struct MemoryAccessCommand { + access_type: MemoryAccessType, + segment: Segment, + index: String, // TODO: might be better to use &str here? +} + +#[derive(Debug)] +enum MemoryAccessType { + Push, + Pop, +} + +#[derive(Debug)] +enum Segment { + Argument, + Local, + Static, + Constant, + This, + That, + Pointer, + Temp, +} + +enum ProgramFlowCommand { + Label(String), + Goto(String), + IfGoto(String) +} + +enum FunctionCallingCommand { + Function { name: String, n: String }, + Call { name: String, m: String }, + Return, +} + +struct CodeGen { + acc: String, // accumulator for code + comp_counter: u32, // counter to create unique labels for comparison jump instructions + call_counter: u32, // counter to create unique labels for return addresses + vm_ref: String, // Name of VM, e.g. "Foo" for "Foo.vm" - used for push/pop static + current_function: String, // name of the last Function command +} + +fn make_error_command(error: &str, line: &String) -> Command { + let mut s = String::new(); + s.push_str("// "); + s.push_str(&error); + s.push_str(": '"); + s.push_str(line); + s.push_str("'."); + println!("{}", s); + Command::Error(s) +} + +fn parse_single_token(tokens: &Vec<&str>, line: &String) -> Command { + use crate::ArithmeticCommand::*; + use crate::Command::Arithmetic; + use crate::Command::FunctionCalling; + use crate::FunctionCallingCommand::Return; + + return match tokens[0] { + "add" => Arithmetic(Add), + "sub" => Arithmetic(Sub), + "neg" => Arithmetic(Neg), + "eq" => Arithmetic(Eq), + "gt" => Arithmetic(Gt), + "lt" => Arithmetic(Lt), + "and" => Arithmetic(And), + "or" => Arithmetic(Or), + "not" => Arithmetic(Not), + "return" => FunctionCalling(Return), + _ => make_error_command("Unrecognized single token command", &line), + }; +} + +fn parse_two_tokens(tokens: &Vec<&str>, line: &String) -> Command { + use crate::ProgramFlowCommand::*; + use crate::Command::ProgramFlow; + return match (tokens[0], tokens[1]) { + ("label", symbol) => ProgramFlow(Label(symbol.to_string())), + ("goto", symbol) => ProgramFlow(Goto(symbol.to_string())), + ("if-goto", symbol) => ProgramFlow(IfGoto(symbol.to_string())), + _ => make_error_command("Unrecognized two tokens command", &line), + }; +} + +fn parse_three_tokens(tokens: &Vec<&str>, line: &String) -> Command { + use crate::Command::MemoryAccess; + use crate::Command::FunctionCalling; + use crate::MemoryAccessType::*; + use crate::Segment::*; + use crate::FunctionCallingCommand::*; + + fn make(access_type: MemoryAccessType, segment: Segment, index: &str) -> Command { + return MemoryAccess(MemoryAccessCommand { + access_type: access_type, + segment: segment, + index: index.to_string(), + }) + } + + return match (tokens[0], tokens[1], tokens[2]) { + ("push", "argument", index) => make(Push, Argument, index), + ("pop", "argument", index) => make(Pop, Argument, index), + ("push", "local", index) => make(Push, Local, index), + ("pop", "local", index) => make(Pop, Local, index), + ("push", "static", index) => make(Push, Static, index), + ("pop", "static", index) => make(Pop, Static, index), + ("push", "constant", index) => make(Push, Constant, index), + ("pop", "constant", index) => make(Pop, Constant, index), + ("push", "this", index) => make(Push, This, index), + ("pop", "this", index) => make(Pop, This, index), + ("push", "that", index) => make(Push, That, index), + ("pop", "that", index) => make(Pop, That, index), + ("push", "pointer", index) => make(Push, Pointer, index), + ("pop", "pointer", index) => make(Pop, Pointer, index), + ("push", "temp", index) => make(Push, Temp, index), + ("pop", "temp", index) => make(Pop, Temp, index), + ("function", name, narg) => FunctionCalling(Function { name: name.to_string(), n: narg.to_string() }), + ("call", name, narg) => FunctionCalling(Call { name: name.to_string(), m: narg.to_string() }), + _ => make_error_command("Unexpected three tokens", line) + }; +} + +fn parse_line(line: &String) -> Command { + let mut tokens: Vec<&str> = Vec::new(); + let mut iter = line.split_whitespace(); + + while let Some(token) = iter.next() { + if token == "//" { + break; + } + tokens.push(token); + } + + return match tokens.len() { + 0 => Command::NoCommand, + 1 => parse_single_token(&tokens, &line), + 2 => parse_two_tokens(&tokens, &line), + 3 => parse_three_tokens(&tokens, &line), + _ => make_error_command("Unexpected number of tokens", &line), + }; +} + +fn parse_file(filename: &String) -> Vec { + let filename = Path::new(filename); + let mut commands: Vec = Vec::new(); + if let Ok(file) = File::open(filename) { + for line in io::BufReader::new(file).lines() { + let command = match line { + Ok(ok) => parse_line(&ok), + Err(err) => make_error_command("Error reading line", &err.to_string()), + }; + commands.push(command); + } + } else { + panic!("Could not open {:?}!", filename); + } + commands +} + +fn generate_code_arithmetic(command: &ArithmeticCommand, mut code_gen: &mut CodeGen) { + fn binary_operator(command: &str, operator: &str, code_gen: &mut CodeGen) { + let s = format!( + "\ + // {}\n\ + @SP\n\ + A = M\n\ + A = A - 1\n\ + A = A - 1\n\ + D = M\n\ + A = A + 1\n\ + D = D {} M\n\ + A = A - 1\n\ + M = D\n\ + @SP\n\ + M = M - 1\n\n\ + ", + command, operator + ); + code_gen.acc.push_str(&s); + } + + fn unary_operator(command: &str, operator: &str, code_gen: &mut CodeGen) { + let s = format!( + "\ + // {}\n\ + @SP\n\ + A = M\n\ + A = A - 1\n\ + M = {}M\n\n\ + ", + command, operator + ); + code_gen.acc.push_str(&s) + } + + fn comparison_operator(command: &str, operator: &str, code_gen: &mut CodeGen) { + code_gen.comp_counter += 1; + let s = format!( + "\ + // {cmd}\n\ + @SP\n\ + A = M\n\ + A = A - 1\n\ + A = A - 1\n\ + D = M\n\ + A = A + 1\n\ + D = D - M\n\ + @IF_{op}_{index}\n\ + D;{op}\n\ + @ELSE_{op}_{index}\n\ + 0;JMP\n\ + (IF_{op}_{index})\n\ + D = -1\n\ + @END_{op}_{index}\n\ + 0;JMP\n\ + (ELSE_{op}_{index})\n\ + D = 0\n\ + (END_{op}_{index})\n\ + @SP\n\ + A = M\n\ + A = A - 1\n\ + A = A - 1\n\ + M = D\n\ + @SP\n\ + M = M - 1\n\ + \n", + cmd = command, + op = operator, + index = code_gen.comp_counter + ); + code_gen.acc.push_str(&s); + } + + use crate::ArithmeticCommand::*; + match command { + Add => binary_operator("add", "+", &mut code_gen), + Sub => binary_operator("sub", "-", &mut code_gen), + Neg => unary_operator("neg", "-", &mut code_gen), + Eq => comparison_operator("eq", "JEQ", &mut code_gen), + Gt => comparison_operator("gt", "JGT", &mut code_gen), + Lt => comparison_operator("lt", "JLT", &mut code_gen), + And => binary_operator("and", "&", &mut code_gen), + Or => binary_operator("or", "|", &mut code_gen), + Not => unary_operator("not", "!", &mut code_gen), + }; +} + +fn generate_code_memory_access(command: &MemoryAccessCommand, code_gen: &mut CodeGen) { + use crate::MemoryAccessType::*; + use crate::Segment::*; + + fn pop_regular(segment_name: &str, segment_id: &str, index: &String, code_gen: &mut CodeGen) { + let s = format!( + "\ + // pop {segment_name} {index}\n\ + @{index}\n\ + D = A\n\ + @{segment_id}\n\ + A = M\n\ + D = D + A\n\ + @R13\n\ + M = D\n\ + // ^ R13 = {segment_name} + index\n\ + @SP\n\ + A = M\n\ + A = A - 1\n\ + D = M\n\ + @SP\n\ + M = M - 1\n\ + // ^ pop into D\n\ + @R13\n\ + A = M\n\ + M = D\n\ + // ^ *R13 = D\n\n\ + ", + segment_name = segment_name, + segment_id = segment_id, + index = index + ); + code_gen.acc.push_str(&s); + } + + fn push_regular(segment_name: &str, segment_id: &str, index: &String, code_gen: &mut CodeGen) { + let s = format!( + "\ + // push {segment_name} {index}\n\ + @{index}\n\ + D = A\n\ + @{segment_id}\n\ + A = M\n\ + A = D + A\n\ + D = M\n\ + // ^ D = *({segment_id} + index)\n\ + @SP\n\ + A = M\n\ + M = D\n\ + @SP\n\ + M = M + 1\n\ + // ^ push D\n\n\ + ", + segment_name = segment_name, + segment_id = segment_id, + index = index + ); + code_gen.acc.push_str(&s); + } + + fn push_constant(index: &String, code_gen: &mut CodeGen) { + let s = format!( + "\ + // push constant {}\n\ + @{}\n\ + D = A\n\ + @SP\n\ + A = M\n\ + M = D\n\ + @SP\n\ + M = M + 1\n\n\ + ", + index, index + ); + code_gen.acc.push_str(&s); + } + + fn pop_static(index: &String, code_gen: &mut CodeGen) { + let mut symbol = String::from(code_gen.vm_ref.as_str()); + symbol.push_str("."); + symbol.push_str(index); + + let s = format!("\ + // pop static {symbol}\n\ + @SP\n\ + A = M\n\ + A = A - 1\n\ + D = M\n\ + @SP\n\ + M = M - 1\n\ + // ^ pop into D\n\ + @{symbol}\n\ + M = D\n\ + // ^ {symbol} = D\n\ + \n", symbol=symbol); + code_gen.acc.push_str(&s); + } + + fn push_static(index: &String, code_gen: &mut CodeGen) { + let mut symbol = String::from(code_gen.vm_ref.as_str()); + symbol.push_str("."); + symbol.push_str(index); + + let s = format!("\ + // push static {symbol}\n\ + @{symbol}\n\ + D = M\n\ + // ^ D = {symbol}\n\ + @SP\n\ + A = M\n\ + M = D\n\ + @SP\n\ + M = M + 1\n\ + // ^ push from D\n\ + \n", symbol=symbol); + + + code_gen.acc.push_str(&s); + } + + fn pop_temp(index: &String, code_gen: &mut CodeGen) { + let s = format!( + "\ + // pop temp {index}\n\ + @{index}\n\ + D = A\n\ + @5\n\ + A = D + A\n\ + D = A\n\ + @R13\n\ + M = D\n\ + // ^ R13 = temp + index\n\ + @SP\n\ + A = M\n\ + A = A - 1\n\ + D = M\n\ + @SP\n\ + M = M - 1\n\ + // ^ pop into D\n\ + @R13\n\ + A = M\n\ + M = D\n\ + // ^ *R13 = D\n\n\ + ", + index = index + ); + code_gen.acc.push_str(&s); + } + + fn push_temp(index: &String, code_gen: &mut CodeGen) { + let s = format!( + "\ + // push temp {index}\n\ + @{index} + D = A + @5 + A = D + A + D = M + // ^ D = *(temp + index)\n\ + @SP\n\ + A = M\n\ + M = D\n\ + @SP\n\ + M = M + 1\n\ + // ^ push D\n\n\ + ", + index = index + ); + code_gen.acc.push_str(&s); + } + + fn push_pointer(index: &String, code_gen: &mut CodeGen) { + let segment = match index.as_str() { + "0" => "THIS", + "1" => "THAT", + _ => "INVALID" + }; + + let s = format!("\ + // push pointer {segment}\n\ + @{segment}\n\ + D = M\n\ + @SP\n\ + A = M\n\ + M = D\n\ + @SP\n\ + M = M + 1\n\ + // ^ push {segment}\n\n", + segment=segment + ); + code_gen.acc.push_str(&s); + } + + fn pop_pointer(index: &String, code_gen: &mut CodeGen) { + let segment = match index.as_str() { + "0" => "THIS", + "1" => "THAT", + _ => "INVALID" + }; + + let s = format!("\ + // pop pointer {segment}\n\ + @SP\n\ + A = M\n\ + A = A - 1\n\ + D = M\n\ + @SP\n\ + M = M - 1\n\ + @{segment}\n\ + M = D\n\ + // ^ pop into {segment}\n\ + \n", + segment=segment + ); + code_gen.acc.push_str(&s); + } + + match command { + MemoryAccessCommand { access_type: Push, segment: Constant, index } + => push_constant(index, code_gen), + + MemoryAccessCommand { access_type: Pop, segment: Local, index } + => pop_regular("local", "LCL", index, code_gen), + + MemoryAccessCommand { access_type: Push, segment: Local, index } + => push_regular("local", "LCL", index, code_gen), + + MemoryAccessCommand { access_type: Pop, segment: Argument, index } + => pop_regular("argument", "ARG", index, code_gen), + + MemoryAccessCommand { access_type: Push, segment: Argument, index } + => push_regular("argument", "ARG", index, code_gen), + + MemoryAccessCommand { access_type: Pop, segment: This, index } + => pop_regular("this", "THIS", index, code_gen), + + MemoryAccessCommand { access_type: Push, segment: This, index } + => push_regular("this", "THIS", index, code_gen), + + MemoryAccessCommand { access_type: Pop, segment: That, index } + => pop_regular("that", "THAT", index, code_gen), + + MemoryAccessCommand { access_type: Push, segment: That, index } + => push_regular("that", "THAT", index, code_gen), + + MemoryAccessCommand { access_type: Pop, segment: Static, index } + => pop_static(index, code_gen), + + MemoryAccessCommand { access_type: Push, segment: Static, index } + => push_static(index, code_gen), + + MemoryAccessCommand { access_type: Pop, segment: Temp, index } + => pop_temp(index, code_gen), + + MemoryAccessCommand { access_type: Push, segment: Temp, index } + => push_temp(index, code_gen), + + MemoryAccessCommand { access_type: Pop, segment: Pointer, index } + => pop_pointer(index, code_gen), + + MemoryAccessCommand { access_type: Push, segment: Pointer, index } + => push_pointer(index, code_gen), + + MemoryAccessCommand { access_type, segment, index, + } => { + let s = format!( + "// warning: {:?} {:?} {} not implemented.\n\n", + access_type, segment, index + ); + code_gen.acc.push_str(&s); + } + } +} + +fn generate_code_program_flow(command: &ProgramFlowCommand, code_gen: &mut CodeGen) { + + fn get_flow_label(label: &String, code_gen: &mut CodeGen) -> std::string::String { + if code_gen.current_function != "" { + return format!("{}:{}", code_gen.current_function, label); + } + else { + return label.to_string(); + } + } + + use crate::ProgramFlowCommand::*; + match command { + Label(label) => { + let label = get_flow_label(label, code_gen); + let s = format!("// label {}\n({})\n\n", label, label); + code_gen.acc.push_str(&s); + }, + Goto(label) => { + let label = get_flow_label(label, code_gen); + let s = format!("// goto {}\n@{}\n0;JMP\n\n", label, label); + code_gen.acc.push_str(&s); + }, + IfGoto(label) => { + let label = get_flow_label(label, code_gen); + let s = format!("\ + // if-goto {label}\n\ + @SP\n\ + A = M\n\ + A = A - 1\n\ + D = M\n\ + @SP\n\ + M = M - 1\n\ + @{label}\n\ + D;JNE\n\ + ", label=label); + code_gen.acc.push_str(&s); + } + } +} + +fn generate_code_function_calling(command: &FunctionCallingCommand, code_gen: &mut CodeGen) { + use crate::FunctionCallingCommand::*; + + fn call(function_name: &String, nargs: &String, code_gen: &mut CodeGen) { + let return_label = format!("{}:return:{}", function_name, code_gen.call_counter); + code_gen.call_counter += 1; + + let s = format!( + "\ + // call {function_name} {nargs}\n\ + @{return_label}\n\ + D = A\n\ + @SP\n\ + A = M\n\ + M = D\n\ + @SP\n\ + M = M + 1\n\ + // ^ push return-address\n\ + @LCL\n\ + D = M\n\ + @SP\n\ + A = M\n\ + M = D\n\ + @SP\n\ + M = M + 1\n\ + // ^ push lcl\n\ + @ARG\n\ + D = M\n\ + @SP\n\ + A = M\n\ + M = D\n\ + @SP\n\ + M = M + 1\n\ + // ^ push arg\n\ + @THIS\n\ + D = M\n\ + @SP\n\ + A = M\n\ + M = D\n\ + @SP\n\ + M = M + 1\n\ + // ^ push this\n\ + @THAT\n\ + D = M\n\ + @SP\n\ + A = M\n\ + M = D\n\ + @SP\n\ + M = M + 1\n\ + // ^ push that\n\ + @SP\n\ + D = M\n\ + @{nargs}\n\ + D = D - A\n\ + @5\n\ + D = D - A\n\ + @ARG\n\ + M = D\n\ + // ^ ARG = SP - {nargs} - 5\n\ + @SP\n\ + D = M\n\ + @LCL\n\ + M = D\n\ + // ^ LCL = SP\n\ + @{function_name}\n\ + 0;JMP\n\ + // ^ goto {function_name}\n\ + ({return_label})\n\ + \n\n", + nargs=nargs, + function_name=function_name, + return_label=return_label, + ); + code_gen.acc.push_str(&s); + + } + + fn function(function_name: &String, nargs: &String, code_gen: &mut CodeGen) { + code_gen.current_function = function_name.to_string(); + + let s = format!( + "\ + // function {function_name} {nargs}\n\ + ({function_name})\n\ + @0\n\ + D = A\n\ + ", + function_name=function_name, + nargs=nargs, + ); + code_gen.acc.push_str(&s); + let nargs: u32 = nargs.parse().unwrap(); + for _ in 0..nargs { + code_gen.acc.push_str("@SP\nA = M\nM = D\n@SP\nM = M + 1\n// ^ push 0\n"); + } + let s = format!("// ^ push 0 * {nargs}\n\n", nargs=nargs); + code_gen.acc.push_str(&s); + } + + fn fnreturn(code_gen: &mut CodeGen) { + let s = format!( + "\ + // return\n\ + @LCL\n\ + D = M\n\ + @R13\n\ + M = D\n\ + // ^ R13 = FRAME = LCL\n\ + @5\n\ + D = A\n\ + @R13\n\ + A = M - D\n\ + D = M\n\ + @R14\n\ + M = D\n\ + // ^ R14 = RET = *(FRAME - 5)\n\ + @SP\n\ + A = M\n\ + A = A - 1\n\ + D = M\n\ + @SP\n\ + M = M - 1\n\ + @ARG\n\ + A = M\n\ + M = D\n\ + // ^ POP into *ARG\n\ + @ARG\n\ + D = M + 1\n\ + @SP\n\ + M = D\n\ + // ^ SP = ARG + 1\n\ + @1\n\ + D = A\n\ + @R13\n\ + A = M - D\n\ + D = M\n\ + @THAT\n\ + M = D\n\ + // ^ THAT = *(FRAME - 1)\n\ + @2\n\ + D = A\n\ + @R13\n\ + A = M - D\n\ + D = M\n\ + @THIS\n\ + M = D\n\ + // ^ THIS = *(FRAME - 2)\n\ + @3\n\ + D = A\n\ + @R13\n\ + A = M - D\n\ + D = M\n\ + @ARG\n\ + M = D\n\ + // ^ ARG = *(FRAME - 3)\n\ + @4\n\ + D = A\n\ + @R13\n\ + A = M - D\n\ + D = M\n\ + @LCL\n\ + M = D\n\ + // ^ LCL = *(FRAME - 4)\n\ + @R14\n\ + A = M\n\ + 0;JMP\n\ + // ^ goto RET\n\ + \n\n", + ); + code_gen.acc.push_str(&s); + } + + match command { + Call { name, m } => call(name, m, code_gen), + Function { name, n, } => function(name, n, code_gen), + Return => fnreturn(code_gen), + }; +} + +fn generate_code_error(error: &String, code_gen: &mut CodeGen) { + code_gen.acc.push_str(error); + code_gen.acc.push_str("\n"); +} + +fn generate_code(commands: &Vec, mut code_gen: &mut CodeGen) { + use crate::Command::*; + for command in commands { + match command { + Arithmetic(c) => generate_code_arithmetic(c, &mut code_gen), + MemoryAccess(c) => generate_code_memory_access(c, &mut code_gen), + ProgramFlow(c) => generate_code_program_flow(c, &mut code_gen), + FunctionCalling(c) => generate_code_function_calling(c, &mut code_gen), + Error(string) => generate_code_error(string, &mut code_gen), + NoCommand => (), + }; + } +} + +fn generate_bootstrap_code(code_gen: &mut CodeGen) { + use crate::Command::FunctionCalling; + use crate::FunctionCallingCommand::Call; + let s = format!( + "\ + // Bootstrap code\n\ + @256\n\ + D = A\n\ + @SP\n\ + M = D\n\ + // ^ SP = 256\n\n"); + code_gen.acc.push_str(&s); + let v = vec![FunctionCalling(Call { name: "Sys.init".to_string(), m: "0".to_string() })]; + generate_code(&v, code_gen); +} + +fn main() { + fn write_output_file(filename: &String, output: &String) { + let path = Path::new(&filename); + let display = path.display(); + + // Open a file in write-only mode, returns `io::Result` + let mut file = match File::create(&path) { + Err(why) => panic!("Couldn't create {}: {}", display, why.to_string()), + Ok(file) => file, + }; + + match file.write_all(output.as_bytes()) { + Err(why) => panic!("Couldn't write to {}: {}", display, why.to_string()), + Ok(_) => println!("<= {}", display), + } + } + + fn filename_to_vm_ref(filename: &String) -> String { + let p = Path::new(filename); + p.file_stem().and_then(OsStr::to_str).unwrap().to_string() + } + + fn translate_single_file(filename: &String) { + let mut code_gen = CodeGen { + acc: String::new(), + comp_counter: 0, + call_counter: 0, + vm_ref: filename_to_vm_ref(filename), + current_function: String::new(), + }; + let commands = parse_file(filename); + generate_code(&commands, &mut code_gen); + let filename = str::replace(filename, ".vm", ".asm"); + write_output_file(&filename, &code_gen.acc); + } + + fn translate_file(filename: &String, mut code_gen: &mut CodeGen) { + let commands = parse_file(filename); + generate_code(&commands, &mut code_gen); + } + + fn translate_dir(dirname: &String) { + println!("=> {}", dirname); + let mut code_gen = CodeGen { + acc: String::new(), + comp_counter: 0, + call_counter: 0, + vm_ref: String::new(), + current_function: String::new(), + }; + + generate_bootstrap_code(&mut code_gen); + let paths = fs::read_dir(dirname).unwrap(); + for path in paths { + let filename = path.unwrap().path().to_str().unwrap().to_string(); + if is_vm_file(&filename) { + println!(" -> {}", filename); + let vm_ref = filename_to_vm_ref(&filename); + let s = format!("// Start {}\n\n", filename); + code_gen.vm_ref = vm_ref; + code_gen.current_function = "".to_string(); + code_gen.acc.push_str(&s); + translate_file(&filename, &mut code_gen); + } + } + + let filename = format!("{}/{}.asm", dirname, filename_to_vm_ref(dirname)); + write_output_file(&filename, &code_gen.acc); + } + + fn is_vm_file(filename: &String) -> bool { + let p = Path::new(filename); + if p.is_file() && (p.extension().unwrap() == OsStr::new("vm")) { + return true; + } + return false; + } + + let args: Vec = env::args().collect(); + for arg in &args[1..] { + if is_vm_file(arg) { + translate_single_file(&arg); + } else if Path::new(arg).is_dir() { + translate_dir(&arg); + } else { + println!("{} is not a *.vm file or directory!", arg); + } + } +}