Add VM translator, Jack analyzer and compiler
This commit is contained in:
10
jack_analyzer/Cargo.toml
Normal file
10
jack_analyzer/Cargo.toml
Normal file
@@ -0,0 +1,10 @@
|
||||
[package]
|
||||
name = "jack_analyzer"
|
||||
version = "0.1.0"
|
||||
authors = ["Felix Martin <mail@felixm.de>"]
|
||||
edition = "2018"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
xmltree = "^0"
|
||||
122
jack_analyzer/src/main.rs
Normal file
122
jack_analyzer/src/main.rs
Normal file
@@ -0,0 +1,122 @@
|
||||
mod parser;
|
||||
mod tokenizer;
|
||||
use std::env;
|
||||
use std::ffi::OsStr;
|
||||
use std::fs;
|
||||
use std::fs::File;
|
||||
use std::path::Path;
|
||||
use xmltree;
|
||||
use xmltree::Element;
|
||||
use xmltree::EmitterConfig;
|
||||
use xmltree::XMLNode;
|
||||
|
||||
pub fn create_node(token: &tokenizer::Token) -> xmltree::XMLNode {
|
||||
use tokenizer::Token::*;
|
||||
let (tag, text) = match token {
|
||||
Keyword(text) => ("keyword", text),
|
||||
Symbol(text) => ("symbol", text),
|
||||
Identifier(text) => ("identifier", text),
|
||||
IntConst(text) => ("integerConstant", text),
|
||||
StringConst(text) => ("stringConstant", text),
|
||||
};
|
||||
let mut e = xmltree::Element::new(tag);
|
||||
let text = match text.as_str() {
|
||||
"<" => "<",
|
||||
">" => ">",
|
||||
"\"" => """,
|
||||
"&" => "&",
|
||||
"'" => "'",
|
||||
_ => text.as_str(),
|
||||
};
|
||||
let t = XMLNode::Text(text.to_string());
|
||||
e.children.push(t);
|
||||
return xmltree::XMLNode::Element(e);
|
||||
}
|
||||
|
||||
fn main() {
|
||||
fn is_jack_file(filename: &Path) -> bool {
|
||||
let p = Path::new(filename);
|
||||
if p.is_file() && (p.extension().unwrap() == OsStr::new("jack")) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
fn translate_dir(directory: &Path) {
|
||||
let paths = fs::read_dir(directory).unwrap();
|
||||
for path in paths {
|
||||
let filename = path.unwrap().path();
|
||||
if is_jack_file(&filename) {
|
||||
translate_single_file(filename.as_path())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn write_xml(file: &String, root: xmltree::Element) {
|
||||
let mut config = EmitterConfig::new();
|
||||
config.perform_indent = true;
|
||||
// With xmltree 0.1.0
|
||||
// config.perform_escaping = true;
|
||||
// does not work when perform_indent is also true.
|
||||
// Therefore I am escaping manually and setting this to false.
|
||||
config.perform_escaping = false;
|
||||
config.normalize_empty_elements = false;
|
||||
let f = File::create(file.to_string()).unwrap();
|
||||
root.write_with_config(f, config).unwrap();
|
||||
// With xmltree 0.1.0
|
||||
// config.write_document_declaration = false;
|
||||
// did not work so I am removing that line manually.
|
||||
remove_first_line(&file);
|
||||
}
|
||||
|
||||
fn write_token_xml(input_file: &Path, tokens: &Vec<tokenizer::Token>) {
|
||||
let mut root = Element::new("tokens");
|
||||
|
||||
for token in tokens {
|
||||
let n = create_node(token);
|
||||
root.children.push(n);
|
||||
}
|
||||
|
||||
let input_str = input_file.to_str().unwrap();
|
||||
let output_file = str::replace(input_str, ".jack", "T.xml");
|
||||
write_xml(&output_file, root);
|
||||
}
|
||||
|
||||
fn write_parse_tree_xml(input_file: &Path, root: xmltree::Element) {
|
||||
let input_str = input_file.to_str().unwrap();
|
||||
let output_file = str::replace(input_str, ".jack", ".xml");
|
||||
write_xml(&output_file, root);
|
||||
}
|
||||
|
||||
fn remove_first_line(filename: &String) {
|
||||
let contents = fs::read_to_string(filename).expect("Unable to read file");
|
||||
let mut lines = contents.lines();
|
||||
let mut output = String::new();
|
||||
lines.next(); // skipping the first line
|
||||
for line in lines {
|
||||
output.push_str(line);
|
||||
output.push_str("\n");
|
||||
}
|
||||
fs::write(filename, output).expect("Unable to write file");
|
||||
}
|
||||
|
||||
fn translate_single_file(input_file: &Path) {
|
||||
let tokens = tokenizer::tokenize_file(input_file);
|
||||
write_token_xml(input_file, &tokens);
|
||||
let parse_tree = parser::parse_tokens(tokens);
|
||||
write_parse_tree_xml(input_file, parse_tree);
|
||||
}
|
||||
|
||||
let args: Vec<String> = env::args().collect();
|
||||
|
||||
for arg in &args[1..] {
|
||||
let arg_path = Path::new(arg);
|
||||
if is_jack_file(&arg_path) {
|
||||
translate_single_file(&arg_path);
|
||||
} else if arg_path.is_dir() {
|
||||
translate_dir(&arg_path);
|
||||
} else {
|
||||
println!("{} is not a *.jack file or directory!", arg);
|
||||
}
|
||||
}
|
||||
}
|
||||
412
jack_analyzer/src/parser.rs
Normal file
412
jack_analyzer/src/parser.rs
Normal file
@@ -0,0 +1,412 @@
|
||||
use crate::create_node;
|
||||
use crate::tokenizer::Token;
|
||||
use crate::tokenizer::Token::*;
|
||||
use xmltree::Element;
|
||||
|
||||
type Tokens<'a> = std::iter::Peekable<std::slice::Iter<'a, Token>>;
|
||||
|
||||
pub fn parse_tokens(tokens: Vec<Token>) -> Element {
|
||||
let mut tokens: Tokens = tokens.iter().peekable();
|
||||
let mut root = Element::new("class");
|
||||
compile_class(&mut tokens, &mut root);
|
||||
if tokens.len() != 0 {
|
||||
println!("Did not parse all tokens!");
|
||||
}
|
||||
root
|
||||
}
|
||||
|
||||
fn eat_specific_keyword_token(tokens: &mut Tokens, expected: &str, tree: &mut Element) {
|
||||
let token = tokens.next().unwrap();
|
||||
match token {
|
||||
Keyword(string) if string == expected => tree.children.push(create_node(token)),
|
||||
t => println!("Unexpected {:?} - Keyword(\"{}\")", t, expected),
|
||||
}
|
||||
}
|
||||
|
||||
fn eat_specific_symbol_token(tokens: &mut Tokens, expected: &str, tree: &mut Element) {
|
||||
let token = tokens.next().unwrap();
|
||||
match token {
|
||||
Symbol(string) if string == expected => tree.children.push(create_node(token)),
|
||||
t => println!("Unexpected {:?} - Symbol(\"{}\")", t, expected),
|
||||
}
|
||||
}
|
||||
|
||||
fn eat_identifier_token(tokens: &mut Tokens, tree: &mut Element) {
|
||||
let token = tokens.next().unwrap();
|
||||
match token {
|
||||
Identifier(_) => tree.children.push(create_node(token)),
|
||||
t => println!("Unexpected {:?} - Identifier(_)", t),
|
||||
}
|
||||
}
|
||||
|
||||
fn eat_keyword_token(tokens: &mut Tokens, tree: &mut Element) {
|
||||
let token = tokens.next().unwrap();
|
||||
match token {
|
||||
Keyword(_) => tree.children.push(create_node(token)),
|
||||
t => println!("Unexpected {:?} - Keyword(_)", t),
|
||||
}
|
||||
}
|
||||
|
||||
fn eat_type_token(tokens: &mut Tokens, tree: &mut Element) {
|
||||
let token = tokens.peek().unwrap();
|
||||
match token {
|
||||
Keyword(_) => eat_keyword_token(tokens, tree),
|
||||
Identifier(_) => eat_identifier_token(tokens, tree),
|
||||
t => println!("Unexpected {:?} - Keyword(type)/Identifier(_)", t),
|
||||
}
|
||||
}
|
||||
|
||||
fn compile_class(tokens: &mut Tokens, tree: &mut Element) {
|
||||
eat_specific_keyword_token(tokens, "class", tree);
|
||||
eat_identifier_token(tokens, tree);
|
||||
eat_specific_symbol_token(tokens, "{", tree);
|
||||
|
||||
// classVarDec*
|
||||
loop {
|
||||
match tokens.peek().unwrap() {
|
||||
Keyword(string) if string == "static" || string == "field" => {
|
||||
let mut child_tree = Element::new("classVarDec");
|
||||
compile_class_var_dec(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
|
||||
// subroutineDec*
|
||||
loop {
|
||||
match tokens.peek().unwrap() {
|
||||
Keyword(string)
|
||||
if string == "constructor" || string == "function" || string == "method" =>
|
||||
{
|
||||
let mut child_tree = Element::new("subroutineDec");
|
||||
compile_subroutine(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
eat_specific_symbol_token(tokens, "}", tree);
|
||||
}
|
||||
|
||||
fn compile_class_var_dec(tokens: &mut Tokens, tree: &mut Element) {
|
||||
eat_keyword_token(tokens, tree);
|
||||
eat_type_token(tokens, tree);
|
||||
eat_identifier_token(tokens, tree);
|
||||
loop {
|
||||
match tokens.peek().unwrap() {
|
||||
Symbol(s) if s == "," => {
|
||||
eat_specific_symbol_token(tokens, ",", tree);
|
||||
eat_identifier_token(tokens, tree);
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
eat_specific_symbol_token(tokens, ";", tree);
|
||||
}
|
||||
|
||||
fn compile_subroutine(tokens: &mut Tokens, tree: &mut Element) {
|
||||
eat_keyword_token(tokens, tree);
|
||||
eat_type_token(tokens, tree);
|
||||
eat_identifier_token(tokens, tree);
|
||||
eat_specific_symbol_token(tokens, "(", tree);
|
||||
|
||||
let mut child_tree = Element::new("parameterList");
|
||||
compile_parameter_list(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
|
||||
eat_specific_symbol_token(tokens, ")", tree);
|
||||
|
||||
let mut child_tree = Element::new("subroutineBody");
|
||||
compile_subroutine_body(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
}
|
||||
|
||||
fn compile_parameter_list(tokens: &mut Tokens, tree: &mut Element) {
|
||||
match tokens.peek().unwrap() {
|
||||
Symbol(s) if s == ")" => return,
|
||||
_ => (),
|
||||
}
|
||||
eat_type_token(tokens, tree);
|
||||
eat_identifier_token(tokens, tree);
|
||||
|
||||
loop {
|
||||
match tokens.peek().unwrap() {
|
||||
Symbol(s) if s == "," => {
|
||||
eat_specific_symbol_token(tokens, ",", tree);
|
||||
eat_type_token(tokens, tree);
|
||||
eat_identifier_token(tokens, tree);
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn compile_subroutine_body(tokens: &mut Tokens, tree: &mut Element) {
|
||||
eat_specific_symbol_token(tokens, "{", tree);
|
||||
|
||||
loop {
|
||||
match tokens.peek().unwrap() {
|
||||
Keyword(s) if s == "var" => {
|
||||
let mut child_tree = Element::new("varDec");
|
||||
compile_var_dec(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
}
|
||||
_ => break,
|
||||
};
|
||||
}
|
||||
|
||||
let mut child_tree = Element::new("statements");
|
||||
compile_statements(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
|
||||
eat_specific_symbol_token(tokens, "}", tree);
|
||||
}
|
||||
|
||||
fn compile_var_dec(tokens: &mut Tokens, tree: &mut Element) {
|
||||
eat_specific_keyword_token(tokens, "var", tree);
|
||||
eat_type_token(tokens, tree);
|
||||
eat_identifier_token(tokens, tree);
|
||||
|
||||
loop {
|
||||
match tokens.peek().unwrap() {
|
||||
Symbol(s) if s == "," => {
|
||||
eat_specific_symbol_token(tokens, ",", tree);
|
||||
eat_identifier_token(tokens, tree);
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
|
||||
eat_specific_symbol_token(tokens, ";", tree);
|
||||
}
|
||||
|
||||
fn compile_statements(tokens: &mut Tokens, tree: &mut Element) {
|
||||
loop {
|
||||
match tokens.peek().unwrap() {
|
||||
Keyword(s) if s == "do" => {
|
||||
let mut child_tree = Element::new("doStatement");
|
||||
compile_do(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
}
|
||||
Keyword(s) if s == "let" => {
|
||||
let mut child_tree = Element::new("letStatement");
|
||||
compile_let(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
}
|
||||
Keyword(s) if s == "while" => {
|
||||
let mut child_tree = Element::new("whileStatement");
|
||||
compile_while(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
}
|
||||
Keyword(s) if s == "return" => {
|
||||
let mut child_tree = Element::new("returnStatement");
|
||||
compile_return(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
}
|
||||
Keyword(s) if s == "if" => {
|
||||
let mut child_tree = Element::new("ifStatement");
|
||||
compile_if(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn compile_do(tokens: &mut Tokens, mut tree: &mut Element) {
|
||||
eat_specific_keyword_token(tokens, "do", tree);
|
||||
eat_identifier_token(tokens, &mut tree);
|
||||
compile_subroutine_call(tokens, &mut tree);
|
||||
eat_specific_symbol_token(tokens, ";", tree);
|
||||
}
|
||||
|
||||
fn compile_let(tokens: &mut Tokens, tree: &mut Element) {
|
||||
eat_specific_keyword_token(tokens, "let", tree);
|
||||
eat_identifier_token(tokens, tree);
|
||||
|
||||
match tokens.peek().unwrap() {
|
||||
Symbol(s) if s == "[" => {
|
||||
eat_specific_symbol_token(tokens, "[", tree);
|
||||
let mut child_tree = Element::new("expression");
|
||||
compile_expression(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
eat_specific_symbol_token(tokens, "]", tree);
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
eat_specific_symbol_token(tokens, "=", tree);
|
||||
let mut child_tree = Element::new("expression");
|
||||
compile_expression(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
eat_specific_symbol_token(tokens, ";", tree);
|
||||
}
|
||||
|
||||
fn compile_while(tokens: &mut Tokens, tree: &mut Element) {
|
||||
eat_specific_keyword_token(tokens, "while", tree);
|
||||
eat_specific_symbol_token(tokens, "(", tree);
|
||||
let mut child_tree = Element::new("expression");
|
||||
compile_expression(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
eat_specific_symbol_token(tokens, ")", tree);
|
||||
|
||||
eat_specific_symbol_token(tokens, "{", tree);
|
||||
let mut child_tree = Element::new("statements");
|
||||
compile_statements(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
eat_specific_symbol_token(tokens, "}", tree);
|
||||
}
|
||||
|
||||
fn compile_return(tokens: &mut Tokens, tree: &mut Element) {
|
||||
eat_specific_keyword_token(tokens, "return", tree);
|
||||
match tokens.peek().unwrap() {
|
||||
Symbol(s) if s == ";" => (),
|
||||
_ => {
|
||||
let mut child_tree = Element::new("expression");
|
||||
compile_expression(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
}
|
||||
}
|
||||
eat_specific_symbol_token(tokens, ";", tree);
|
||||
}
|
||||
|
||||
fn compile_if(tokens: &mut Tokens, tree: &mut Element) {
|
||||
eat_specific_keyword_token(tokens, "if", tree);
|
||||
eat_specific_symbol_token(tokens, "(", tree);
|
||||
|
||||
let mut child_tree = Element::new("expression");
|
||||
compile_expression(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
|
||||
eat_specific_symbol_token(tokens, ")", tree);
|
||||
|
||||
eat_specific_symbol_token(tokens, "{", tree);
|
||||
|
||||
let mut child_tree = Element::new("statements");
|
||||
compile_statements(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
|
||||
eat_specific_symbol_token(tokens, "}", tree);
|
||||
|
||||
match tokens.peek().unwrap() {
|
||||
Keyword(s) if s == "else" => {
|
||||
eat_specific_keyword_token(tokens, "else", tree);
|
||||
eat_specific_symbol_token(tokens, "{", tree);
|
||||
|
||||
let mut child_tree = Element::new("statements");
|
||||
compile_statements(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
|
||||
eat_specific_symbol_token(tokens, "}", tree);
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
fn compile_expression(tokens: &mut Tokens, tree: &mut Element) {
|
||||
let mut child_tree = Element::new("term");
|
||||
compile_term(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
|
||||
loop {
|
||||
match tokens.peek().unwrap() {
|
||||
Symbol(s) if OPERATORS.contains(&s.chars().next().unwrap()) => {
|
||||
eat_specific_symbol_token(tokens, s, tree);
|
||||
let mut child_tree = Element::new("term");
|
||||
compile_term(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn compile_term(tokens: &mut Tokens, mut tree: &mut Element) {
|
||||
let token = tokens.next().unwrap();
|
||||
match token {
|
||||
Identifier(_) => match tokens.peek().unwrap() {
|
||||
Symbol(s) if s == "[" => {
|
||||
tree.children.push(create_node(token));
|
||||
eat_specific_symbol_token(tokens, "[", tree);
|
||||
let mut child_tree = Element::new("expression");
|
||||
compile_expression(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
eat_specific_symbol_token(tokens, "]", tree);
|
||||
}
|
||||
Symbol(s) if s == "(" || s == "." => {
|
||||
// let mut child_tree = Element::new("subroutineCall");
|
||||
tree.children.push(create_node(token));
|
||||
compile_subroutine_call(tokens, &mut tree);
|
||||
// tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
}
|
||||
_ => tree.children.push(create_node(token)),
|
||||
},
|
||||
IntConst(_) => tree.children.push(create_node(token)),
|
||||
StringConst(_) => tree.children.push(create_node(token)),
|
||||
Keyword(s) if s == "true" || s == "false" || s == "null" || s == "this" => {
|
||||
tree.children.push(create_node(token));
|
||||
}
|
||||
Symbol(s) if s == "-" || s == "~" => {
|
||||
tree.children.push(create_node(token));
|
||||
let mut child_tree = Element::new("term");
|
||||
compile_term(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
}
|
||||
Symbol(s) if s == "(" => {
|
||||
tree.children.push(create_node(token));
|
||||
let mut child_tree = Element::new("expression");
|
||||
compile_expression(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
eat_specific_symbol_token(tokens, ")", tree);
|
||||
}
|
||||
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
fn compile_expression_list(tokens: &mut Tokens, tree: &mut Element) {
|
||||
match tokens.peek().unwrap() {
|
||||
Symbol(s) if s == ")" => return,
|
||||
_ => (),
|
||||
}
|
||||
|
||||
let mut child_tree = Element::new("expression");
|
||||
compile_expression(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
|
||||
loop {
|
||||
match tokens.peek().unwrap() {
|
||||
Symbol(s) if s == "," => {
|
||||
eat_specific_symbol_token(tokens, ",", tree);
|
||||
let mut child_tree = Element::new("expression");
|
||||
compile_expression(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn compile_subroutine_call(tokens: &mut Tokens, mut tree: &mut Element) {
|
||||
match tokens.peek().unwrap() {
|
||||
Symbol(s) if s == "." => {
|
||||
eat_specific_symbol_token(tokens, ".", &mut tree);
|
||||
eat_identifier_token(tokens, &mut tree);
|
||||
eat_specific_symbol_token(tokens, "(", &mut tree);
|
||||
let mut child_tree = Element::new("expressionList");
|
||||
compile_expression_list(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
eat_specific_symbol_token(tokens, ")", &mut tree);
|
||||
}
|
||||
Symbol(s) if s == "(" => {
|
||||
eat_specific_symbol_token(tokens, "(", &mut tree);
|
||||
let mut child_tree = Element::new("expressionList");
|
||||
compile_expression_list(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
eat_specific_symbol_token(tokens, ")", &mut tree);
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
const OPERATORS: &[char] = &['+', '-', '*', '/', '&', '|', '<', '>', '='];
|
||||
177
jack_analyzer/src/tokenizer.rs
Normal file
177
jack_analyzer/src/tokenizer.rs
Normal file
@@ -0,0 +1,177 @@
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
fn eat_comment(chars: &Vec<char>, index: usize) -> usize {
|
||||
let start_index = index;
|
||||
let mut index = index;
|
||||
if chars[index] == '/' && chars[index + 1] == '/' {
|
||||
index += 2;
|
||||
while chars[index] != '\n' {
|
||||
index += 1;
|
||||
}
|
||||
index += 1;
|
||||
} else if chars[index] == '/' && chars[index + 1] == '*' {
|
||||
index += 2;
|
||||
while !(chars[index] == '*' && chars[index + 1] == '/') {
|
||||
index += 1;
|
||||
}
|
||||
index += 2;
|
||||
}
|
||||
|
||||
if start_index != index {
|
||||
// print_vector_slice(chars, start_index, index);
|
||||
return eat_comment(chars, index);
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
fn parse_symbol(chars: &Vec<char>, tokens: &mut Vec<Token>, index: usize) -> usize {
|
||||
let mut index = index;
|
||||
let c = chars[index];
|
||||
if SYMBOLS.contains(&c) {
|
||||
index += 1;
|
||||
let t = Token::Symbol(c.to_string());
|
||||
tokens.push(t);
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
fn parse_integer_constant(chars: &Vec<char>, tokens: &mut Vec<Token>, index: usize) -> usize {
|
||||
let start_index = index;
|
||||
let mut index = index;
|
||||
let mut number = String::new();
|
||||
|
||||
while chars[index].is_ascii_digit() {
|
||||
number.push(chars[index]);
|
||||
index += 1;
|
||||
}
|
||||
|
||||
if start_index == index {
|
||||
return index;
|
||||
}
|
||||
|
||||
let t = Token::IntConst(number);
|
||||
tokens.push(t);
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
fn parse_string_constant(chars: &Vec<char>, tokens: &mut Vec<Token>, index: usize) -> usize {
|
||||
let start_index = index;
|
||||
let mut index = index;
|
||||
|
||||
if chars[index] != '"' {
|
||||
return index;
|
||||
}
|
||||
index += 1;
|
||||
|
||||
while chars[index] != '"' {
|
||||
index += 1;
|
||||
}
|
||||
index += 1;
|
||||
|
||||
let s = chars[start_index + 1..index - 1].into_iter().collect();
|
||||
let t = Token::StringConst(s);
|
||||
tokens.push(t);
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
fn parse_keyword_or_identifier(chars: &Vec<char>, tokens: &mut Vec<Token>, index: usize) -> usize {
|
||||
let mut index = index;
|
||||
let mut token_string = String::new();
|
||||
|
||||
if !chars[index].is_ascii_alphabetic() {
|
||||
return index;
|
||||
}
|
||||
token_string.push(chars[index]);
|
||||
index += 1;
|
||||
|
||||
while chars[index].is_alphanumeric() {
|
||||
token_string.push(chars[index]);
|
||||
index += 1;
|
||||
}
|
||||
|
||||
if KEYWORDS.contains(&token_string.as_str()) {
|
||||
let t = Token::Keyword(token_string);
|
||||
tokens.push(t);
|
||||
} else {
|
||||
let t = Token::Identifier(token_string);
|
||||
tokens.push(t);
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
pub fn tokenize_file(file: &Path) -> Vec<Token> {
|
||||
let mut tokens = vec![];
|
||||
let chars: Vec<char> = fs::read_to_string(file).unwrap().chars().collect();
|
||||
let length = chars.len();
|
||||
let mut index: usize = 0;
|
||||
|
||||
while index < length {
|
||||
index = eat_comment(&chars, index);
|
||||
let c = chars[index];
|
||||
|
||||
if c.is_whitespace() {
|
||||
index += 1;
|
||||
} else if SYMBOLS.contains(&c) {
|
||||
index = parse_symbol(&chars, &mut tokens, index);
|
||||
} else if c.is_ascii_alphabetic() {
|
||||
index = parse_keyword_or_identifier(&chars, &mut tokens, index);
|
||||
} else if c.is_ascii_digit() {
|
||||
index = parse_integer_constant(&chars, &mut tokens, index);
|
||||
} else if c == '"' {
|
||||
index = parse_string_constant(&chars, &mut tokens, index);
|
||||
} else {
|
||||
println!("Unexpected char {:?}", c);
|
||||
index += 1;
|
||||
}
|
||||
}
|
||||
|
||||
return tokens;
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Token {
|
||||
Keyword(String),
|
||||
Symbol(String),
|
||||
Identifier(String),
|
||||
IntConst(String),
|
||||
StringConst(String),
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
const KEYWORDS: &[&str] = &[
|
||||
"class",
|
||||
"constructor",
|
||||
"function",
|
||||
"method",
|
||||
"field",
|
||||
"static",
|
||||
"var",
|
||||
"int",
|
||||
"char",
|
||||
"boolean",
|
||||
"void",
|
||||
"true",
|
||||
"false",
|
||||
"null",
|
||||
"this",
|
||||
"let",
|
||||
"do",
|
||||
"if",
|
||||
"else",
|
||||
"while",
|
||||
"return",
|
||||
];
|
||||
|
||||
const SYMBOLS: &[char] = &[
|
||||
'{', '}', '(', ')', '[', ']', '.', ',', ';', '+', '-', '*', '/', '&', '|', '<', '>', '=', '~',
|
||||
];
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn print_vector_slice(chars: &Vec<char>, start: usize, stop: usize) {
|
||||
let s: String = chars[start..stop].into_iter().collect();
|
||||
println!("{:?}", s);
|
||||
}
|
||||
9
jack_compiler/Cargo.toml
Normal file
9
jack_compiler/Cargo.toml
Normal file
@@ -0,0 +1,9 @@
|
||||
[package]
|
||||
name = "jack_compiler"
|
||||
version = "0.1.0"
|
||||
authors = ["Felix Martin <mail@felixm.de>"]
|
||||
edition = "2018"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
178
jack_compiler/src/code_writer.rs
Normal file
178
jack_compiler/src/code_writer.rs
Normal file
@@ -0,0 +1,178 @@
|
||||
use crate::symbol_table::get_empty_symbol_table;
|
||||
use crate::symbol_table::SymbolTable;
|
||||
use crate::tokenizer::Keyword;
|
||||
use crate::tokenizer::Symbol;
|
||||
use crate::tokenizer::Token;
|
||||
use std::fs;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Writer {
|
||||
pub table: SymbolTable,
|
||||
pub code: String,
|
||||
pub class_name: String,
|
||||
pub subroutine_name: String,
|
||||
pub in_constructor: bool,
|
||||
pub in_method: bool,
|
||||
if_label_count: usize,
|
||||
while_label_count: usize,
|
||||
}
|
||||
|
||||
pub fn get_code_writer() -> Writer {
|
||||
Writer {
|
||||
table: get_empty_symbol_table(),
|
||||
code: String::new(),
|
||||
class_name: String::new(),
|
||||
subroutine_name: String::new(),
|
||||
in_constructor: false,
|
||||
in_method: false,
|
||||
if_label_count: 0,
|
||||
while_label_count: 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn segment_to_string(segment: Keyword) -> String {
|
||||
use crate::tokenizer::Keyword::*;
|
||||
match segment {
|
||||
Constant => String::from("constant"),
|
||||
Argument => String::from("argument"),
|
||||
Local => String::from("local"),
|
||||
Temp => String::from("temp"),
|
||||
Field => String::from("this"),
|
||||
Static => String::from("static"),
|
||||
That => String::from("that"),
|
||||
Pointer => String::from("pointer"),
|
||||
_ => panic!("Unexpected segment {:?}", segment),
|
||||
}
|
||||
}
|
||||
|
||||
impl Writer {
|
||||
pub fn write_to_file(&self, file: &String) {
|
||||
fs::write(file, self.code.to_string()).expect("Unable to write file");
|
||||
}
|
||||
|
||||
pub fn start_subroutine(&mut self) {
|
||||
self.table.remove_subroutine_symbols();
|
||||
self.in_constructor = false;
|
||||
self.in_method = false;
|
||||
self.if_label_count = 0;
|
||||
self.while_label_count = 0;
|
||||
}
|
||||
|
||||
pub fn start_class(&mut self) {
|
||||
self.table = get_empty_symbol_table();
|
||||
}
|
||||
|
||||
pub fn define_symbol(&mut self, name: String, symbol_type: Token, kind: Keyword) {
|
||||
self.table.define_symbol(name, symbol_type, kind);
|
||||
}
|
||||
|
||||
pub fn write_push(&mut self, segment: Keyword, index: usize) {
|
||||
let segment = segment_to_string(segment);
|
||||
let s = format!("push {} {}\n", segment, index);
|
||||
self.code.push_str(&s);
|
||||
}
|
||||
|
||||
pub fn write_pop(&mut self, segment: Keyword, index: usize) {
|
||||
let segment = segment_to_string(segment);
|
||||
let s = format!("pop {} {}\n", segment, index);
|
||||
self.code.push_str(&s);
|
||||
}
|
||||
|
||||
pub fn write_arithmetic(&mut self, op: Symbol) {
|
||||
use crate::tokenizer::Symbol::*;
|
||||
let s = match op {
|
||||
Plus => "add\n",
|
||||
Minus => "sub\n",
|
||||
Mul => "call Math.multiply 2\n",
|
||||
Div => "call Math.divide 2\n",
|
||||
ExclusiveAnd => "and\n",
|
||||
ExclusiveOr => "or\n",
|
||||
Smaller => "lt\n",
|
||||
Greater => "gt\n",
|
||||
Equal => "eq\n",
|
||||
UnaryMinus => "neg\n",
|
||||
Not => "not\n",
|
||||
_ => panic!("Unsupported operator {:?}.", op),
|
||||
};
|
||||
self.code.push_str(&s);
|
||||
}
|
||||
|
||||
pub fn write_function(&mut self) {
|
||||
let n_locals = self.table.get_count(Keyword::Local);
|
||||
let s = format!(
|
||||
"function {}.{} {}\n",
|
||||
self.class_name, self.subroutine_name, n_locals
|
||||
);
|
||||
self.code.push_str(&s);
|
||||
|
||||
if self.in_constructor {
|
||||
// Allocate class memory and initialize This.
|
||||
let n_fields = self.table.get_count(Keyword::Field);
|
||||
self.write_push(Keyword::Constant, n_fields);
|
||||
self.write_call(&"Memory".to_string(), &"alloc".to_string(), 1);
|
||||
self.write_pop(Keyword::Pointer, 0);
|
||||
} else if self.in_method {
|
||||
self.write_push(Keyword::Argument, 0);
|
||||
self.write_pop(Keyword::Pointer, 0);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn write_call(&mut self, class_name: &String, subroutine_name: &String, n_args: usize) {
|
||||
let s = format!("call {}.{} {}\n", class_name, subroutine_name, n_args);
|
||||
self.code.push_str(&s);
|
||||
}
|
||||
|
||||
pub fn write_return(&mut self) {
|
||||
self.code.push_str("return\n");
|
||||
}
|
||||
|
||||
pub fn write_var_assignment(&mut self, var_name: &String) {
|
||||
let index = self.table.index_of(var_name);
|
||||
let symbol_type = self.table.kind_of(var_name);
|
||||
// if symbol_type == Keyword::Static {
|
||||
// println!("{:?}", self.table);
|
||||
// panic!("assignment to static not supported, yet");
|
||||
// }
|
||||
self.write_pop(symbol_type, index);
|
||||
}
|
||||
|
||||
pub fn write_var_read(&mut self, var_name: &String) {
|
||||
let index = self.table.index_of(var_name);
|
||||
let symbol_type = self.table.kind_of(var_name);
|
||||
// if symbol_type == Keyword::Static {
|
||||
// panic!("read from static not supported, yet");
|
||||
// }
|
||||
self.write_push(symbol_type, index);
|
||||
}
|
||||
|
||||
pub fn write_label(&mut self, label_name: &String) {
|
||||
let s = format!("label {}\n", label_name);
|
||||
self.code.push_str(&s);
|
||||
}
|
||||
|
||||
pub fn write_if_goto(&mut self, label_name: &String) {
|
||||
let s = format!("if-goto {}\n", label_name);
|
||||
self.code.push_str(&s);
|
||||
}
|
||||
|
||||
pub fn write_goto(&mut self, label_name: &String) {
|
||||
let s = format!("goto {}\n", label_name);
|
||||
self.code.push_str(&s);
|
||||
}
|
||||
|
||||
pub fn get_if_labels(&mut self) -> (String, String, String) {
|
||||
let l1 = format!("IF_FALSE{}", self.if_label_count);
|
||||
let l2 = format!("IF_TRUE{}", self.if_label_count);
|
||||
let l3 = format!("IF_END{}", self.if_label_count);
|
||||
self.if_label_count += 1;
|
||||
return (l1, l2, l3);
|
||||
}
|
||||
|
||||
pub fn get_while_labels(&mut self) -> (String, String, String) {
|
||||
let l1 = format!("WHILE_EXP{}", self.while_label_count);
|
||||
let l2 = format!("WHILE_START{}", self.while_label_count);
|
||||
let l3 = format!("WHILE_END{}", self.while_label_count);
|
||||
self.while_label_count += 1;
|
||||
return (l1, l2, l3);
|
||||
}
|
||||
}
|
||||
53
jack_compiler/src/main.rs
Normal file
53
jack_compiler/src/main.rs
Normal file
@@ -0,0 +1,53 @@
|
||||
mod parser;
|
||||
mod tokenizer;
|
||||
mod symbol_table;
|
||||
mod code_writer;
|
||||
use std::env;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use std::ffi::OsStr;
|
||||
use crate::code_writer::get_code_writer;
|
||||
|
||||
|
||||
fn main() {
|
||||
fn is_jack_file(filename: &Path) -> bool {
|
||||
let p = Path::new(filename);
|
||||
if p.is_file() && (p.extension().unwrap() == OsStr::new("jack")) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
fn translate_dir(directory: &Path) {
|
||||
let paths = fs::read_dir(directory).unwrap();
|
||||
for path in paths {
|
||||
let filename = path.unwrap().path();
|
||||
if is_jack_file(&filename) {
|
||||
translate_single_file(filename.as_path())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn translate_single_file(input_file: &Path) {
|
||||
let mut tokens = tokenizer::tokenize_file(input_file);
|
||||
let mut writer = get_code_writer();
|
||||
println!("Compiling {:?}", input_file);
|
||||
parser::compile_class(&mut tokens, &mut writer);
|
||||
let output_file = str::replace(input_file.to_str().unwrap(), ".jack", ".vm");
|
||||
writer.write_to_file(&output_file);
|
||||
}
|
||||
|
||||
let args: Vec<String> = env::args().collect();
|
||||
|
||||
for arg in &args[1..] {
|
||||
let arg_path = Path::new(arg);
|
||||
println!("{:?}", arg_path);
|
||||
if is_jack_file(&arg_path) {
|
||||
translate_single_file(&arg_path);
|
||||
} else if arg_path.is_dir() {
|
||||
translate_dir(&arg_path);
|
||||
} else {
|
||||
println!("{} is not a *.jack file or directory!", arg);
|
||||
}
|
||||
}
|
||||
}
|
||||
348
jack_compiler/src/parser.rs
Normal file
348
jack_compiler/src/parser.rs
Normal file
@@ -0,0 +1,348 @@
|
||||
use crate::code_writer::Writer;
|
||||
use crate::tokenizer::identifier;
|
||||
use crate::tokenizer::int_const;
|
||||
use crate::tokenizer::string_const;
|
||||
use crate::tokenizer::Keyword::*;
|
||||
use crate::tokenizer::Symbol::*;
|
||||
use crate::tokenizer::Token::{Keyword, Symbol};
|
||||
use crate::tokenizer::Tokens;
|
||||
|
||||
pub fn compile_class(tokens: &mut Tokens, writer: &mut Writer) {
|
||||
writer.start_class();
|
||||
tokens.eat(Keyword(Class));
|
||||
let class_name = tokens.eat(identifier()).to_string();
|
||||
writer.class_name = class_name.to_string();
|
||||
tokens.eat(Symbol(LCurly));
|
||||
|
||||
while tokens.is_one_of(vec![Keyword(Static), Keyword(Field)]) {
|
||||
compile_class_var_dec(tokens, writer);
|
||||
}
|
||||
|
||||
while tokens.is_one_of(vec![
|
||||
Keyword(Constructor),
|
||||
Keyword(Function),
|
||||
Keyword(Method),
|
||||
]) {
|
||||
compile_subroutine(tokens, writer);
|
||||
}
|
||||
|
||||
tokens.eat(Symbol(RCurly));
|
||||
}
|
||||
|
||||
fn compile_class_var_dec(tokens: &mut Tokens, writer: &mut Writer) {
|
||||
let kind = tokens.eat(Keyword(AnyKeyword)).to_keyword();
|
||||
let symbol_type = tokens.eat_one_of(vec![Keyword(AnyKeyword), identifier()]);
|
||||
let name = tokens.eat(identifier()).to_string();
|
||||
writer.define_symbol(name, symbol_type.clone(), kind);
|
||||
|
||||
while tokens.is_sequence(vec![Symbol(Comma), identifier()]) {
|
||||
tokens.eat(Symbol(Comma));
|
||||
let name = tokens.eat(identifier()).to_string();
|
||||
writer.define_symbol(name, symbol_type.clone(), kind);
|
||||
}
|
||||
|
||||
tokens.eat(Symbol(Semicolon));
|
||||
}
|
||||
|
||||
fn compile_subroutine(tokens: &mut Tokens, writer: &mut Writer) {
|
||||
writer.start_subroutine();
|
||||
|
||||
let routine_keyword = tokens.eat(Keyword(AnyKeyword)).to_keyword();
|
||||
let symbol_type = tokens.eat_one_of(vec![Keyword(AnyKeyword), identifier()]);
|
||||
|
||||
match routine_keyword {
|
||||
Method => {
|
||||
writer.in_method = true;
|
||||
writer.define_symbol("this".to_string(), symbol_type.clone(), Argument);
|
||||
},
|
||||
Function => (),
|
||||
Constructor => { writer.in_constructor = true; },
|
||||
s => panic!("Unsupported routine type {:?}", s),
|
||||
}
|
||||
|
||||
writer.subroutine_name = tokens.eat(identifier()).to_string();
|
||||
compile_parameter_list(tokens, writer);
|
||||
compile_subroutine_body(tokens, writer);
|
||||
}
|
||||
|
||||
fn compile_parameter_list(tokens: &mut Tokens, writer: &mut Writer) {
|
||||
tokens.eat(Symbol(LBrace));
|
||||
if tokens.is_one_of(vec![Keyword(AnyKeyword), identifier()]) {
|
||||
let symbol_type = tokens.eat_one_of(vec![Keyword(AnyKeyword), identifier()]);
|
||||
let arg_name = tokens.eat(identifier()).to_string();
|
||||
writer.define_symbol(arg_name, symbol_type.clone(), Argument);
|
||||
|
||||
while tokens.is(Symbol(Comma)) {
|
||||
tokens.eat(Symbol(Comma));
|
||||
let symbol_type = tokens.eat_one_of(vec![Keyword(AnyKeyword), identifier()]);
|
||||
let arg_name = tokens.eat(identifier()).to_string();
|
||||
writer.define_symbol(arg_name, symbol_type.clone(), Argument);
|
||||
}
|
||||
}
|
||||
tokens.eat(Symbol(RBrace));
|
||||
}
|
||||
|
||||
fn compile_subroutine_body(tokens: &mut Tokens, writer: &mut Writer) {
|
||||
tokens.eat(Symbol(LCurly));
|
||||
|
||||
while tokens.is(Keyword(Var)) {
|
||||
compile_var_dec(tokens, writer);
|
||||
}
|
||||
|
||||
writer.write_function();
|
||||
compile_statements(tokens, writer);
|
||||
tokens.eat(Symbol(RCurly));
|
||||
}
|
||||
|
||||
fn compile_var_dec(tokens: &mut Tokens, writer: &mut Writer) {
|
||||
tokens.eat(Keyword(Var));
|
||||
let symbol_type = tokens.eat_one_of(vec![Keyword(AnyKeyword), identifier()]);
|
||||
let var_name = tokens.eat(identifier()).to_string();
|
||||
writer.define_symbol(var_name, symbol_type.clone(), Local);
|
||||
|
||||
while tokens.is(Symbol(Comma)) {
|
||||
tokens.eat(Symbol(Comma));
|
||||
let var_name = tokens.eat(identifier()).to_string();
|
||||
writer.define_symbol(var_name, symbol_type.clone(), Local);
|
||||
}
|
||||
|
||||
tokens.eat(Symbol(Semicolon));
|
||||
}
|
||||
|
||||
fn compile_statements(tokens: &mut Tokens, writer: &mut Writer) {
|
||||
loop {
|
||||
if tokens.is(Keyword(Let)) {
|
||||
compile_let(tokens, writer);
|
||||
} else if tokens.is(Keyword(If)) {
|
||||
compile_if(tokens, writer);
|
||||
} else if tokens.is(Keyword(While)) {
|
||||
compile_while(tokens, writer);
|
||||
} else if tokens.is(Keyword(Do)) {
|
||||
compile_do(tokens, writer);
|
||||
} else if tokens.is(Keyword(Return)) {
|
||||
compile_return(tokens, writer);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn compile_do(tokens: &mut Tokens, writer: &mut Writer) {
|
||||
tokens.eat(Keyword(Do));
|
||||
compile_subroutine_call(tokens, writer);
|
||||
writer.write_pop(Temp, 0);
|
||||
tokens.eat(Symbol(Semicolon));
|
||||
}
|
||||
|
||||
fn compile_let(tokens: &mut Tokens, writer: &mut Writer) {
|
||||
tokens.eat(Keyword(Let));
|
||||
let var_name = tokens.eat(identifier()).to_string();
|
||||
|
||||
if tokens.is(Symbol(Equal)) {
|
||||
tokens.eat(Symbol(Equal));
|
||||
compile_expression(tokens, writer);
|
||||
writer.write_var_assignment(&var_name);
|
||||
} else if tokens.is(Symbol(LSquare)) {
|
||||
tokens.eat(Symbol(LSquare));
|
||||
writer.write_var_read(&var_name);
|
||||
compile_expression(tokens, writer);
|
||||
tokens.eat(Symbol(RSquare));
|
||||
writer.write_arithmetic(Plus);
|
||||
// Address of array access is now on stack
|
||||
tokens.eat(Symbol(Equal));
|
||||
compile_expression(tokens, writer);
|
||||
// Value to assign to array is on stack
|
||||
writer.write_pop(Temp, 0); // Buffer value to assign
|
||||
writer.write_pop(Pointer, 1); // Set That to access address
|
||||
writer.write_push(Temp, 0); // Restore value to assign
|
||||
writer.write_pop(That, 0); // Do actual assignment
|
||||
}
|
||||
|
||||
tokens.eat(Symbol(Semicolon));
|
||||
}
|
||||
|
||||
fn compile_while(tokens: &mut Tokens, writer: &mut Writer) {
|
||||
tokens.eat(Keyword(While));
|
||||
let (l_while_exp, l_while_start, l_while_end) = writer.get_while_labels();
|
||||
|
||||
writer.write_label(&l_while_exp);
|
||||
tokens.eat(Symbol(LBrace));
|
||||
compile_expression(tokens, writer);
|
||||
writer.write_if_goto(&l_while_start);
|
||||
writer.write_goto(&l_while_end);
|
||||
tokens.eat(Symbol(RBrace));
|
||||
|
||||
tokens.eat(Symbol(LCurly));
|
||||
writer.write_label(&l_while_start);
|
||||
compile_statements(tokens, writer);
|
||||
writer.write_goto(&l_while_exp);
|
||||
tokens.eat(Symbol(RCurly));
|
||||
|
||||
writer.write_label(&l_while_end);
|
||||
}
|
||||
|
||||
fn compile_return(tokens: &mut Tokens, writer: &mut Writer) {
|
||||
tokens.eat(Keyword(Return));
|
||||
|
||||
if !tokens.is(Symbol(Semicolon)) {
|
||||
compile_expression(tokens, writer);
|
||||
} else {
|
||||
writer.write_push(Constant, 0);
|
||||
}
|
||||
writer.write_return();
|
||||
tokens.eat(Symbol(Semicolon));
|
||||
}
|
||||
|
||||
fn compile_if(tokens: &mut Tokens, writer: &mut Writer) {
|
||||
let (l_false, l_true, l_end) = writer.get_if_labels();
|
||||
tokens.eat(Keyword(If));
|
||||
tokens.eat(Symbol(LBrace));
|
||||
compile_expression(tokens, writer);
|
||||
tokens.eat(Symbol(RBrace));
|
||||
|
||||
writer.write_if_goto(&l_true);
|
||||
writer.write_goto(&l_false);
|
||||
|
||||
tokens.eat(Symbol(LCurly));
|
||||
writer.write_label(&l_true);
|
||||
compile_statements(tokens, writer);
|
||||
tokens.eat(Symbol(RCurly));
|
||||
|
||||
if tokens.is(Keyword(Else)) {
|
||||
writer.write_goto(&l_end);
|
||||
writer.write_label(&l_false);
|
||||
tokens.eat(Keyword(Else));
|
||||
tokens.eat(Symbol(LCurly));
|
||||
compile_statements(tokens, writer);
|
||||
tokens.eat(Symbol(RCurly));
|
||||
writer.write_label(&l_end);
|
||||
} else {
|
||||
writer.write_label(&l_false);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
fn compile_expression(tokens: &mut Tokens, writer: &mut Writer) {
|
||||
compile_term(tokens, writer);
|
||||
|
||||
while tokens.is_one_of(vec![
|
||||
Symbol(Plus),
|
||||
Symbol(Minus),
|
||||
Symbol(Mul),
|
||||
Symbol(Div),
|
||||
Symbol(ExclusiveAnd),
|
||||
Symbol(ExclusiveOr),
|
||||
Symbol(Smaller),
|
||||
Symbol(Greater),
|
||||
Symbol(Equal),
|
||||
]) {
|
||||
let s = tokens.eat(Symbol(AnySymbol)).to_symbol();
|
||||
compile_term(tokens, writer);
|
||||
writer.write_arithmetic(s);
|
||||
}
|
||||
}
|
||||
|
||||
fn compile_term(tokens: &mut Tokens, writer: &mut Writer) {
|
||||
if tokens.is(int_const()) {
|
||||
// integerConstant
|
||||
let i = tokens.eat(int_const()).to_int();
|
||||
writer.write_push(Constant, i);
|
||||
} else if tokens.is(string_const()) {
|
||||
let s = tokens.eat(string_const()).to_string();
|
||||
let bytes = s.as_bytes();
|
||||
writer.write_push(Constant, bytes.len());
|
||||
writer.write_call(&"String".to_string(), &"new".to_string(), 1);
|
||||
for b in bytes {
|
||||
writer.write_push(Constant, (*b).into());
|
||||
writer.write_call(&"String".to_string(), &"appendChar".to_string(), 2);
|
||||
}
|
||||
} else if tokens.is(Keyword(AnyKeyword)) {
|
||||
// keywordConstant
|
||||
let keyword = tokens.eat(Keyword(AnyKeyword)).to_keyword();
|
||||
match keyword {
|
||||
True => {
|
||||
writer.write_push(Constant, 0);
|
||||
writer.write_arithmetic(Not);
|
||||
}
|
||||
False => writer.write_push(Constant, 0),
|
||||
Null => writer.write_push(Constant, 0),
|
||||
This => writer.write_push(Pointer, 0),
|
||||
_ => panic!("Unexpected keyword {:?}", keyword),
|
||||
}
|
||||
} else if tokens.is_sequence(vec![identifier(), Symbol(LSquare)]) {
|
||||
// arrayName
|
||||
let var_name = tokens.eat(identifier()).to_string();
|
||||
tokens.eat(Symbol(LSquare));
|
||||
writer.write_var_read(&var_name);
|
||||
compile_expression(tokens, writer);
|
||||
tokens.eat(Symbol(RSquare));
|
||||
writer.write_arithmetic(Plus); // Address of array access is now on stack
|
||||
writer.write_pop(Pointer, 1); // Set That to address
|
||||
writer.write_push(That, 0); // Push value from array onto stack
|
||||
} else if tokens.is_sequence(vec![identifier(), Symbol(LBrace)]) {
|
||||
// subroutineCall foo()
|
||||
compile_subroutine_call(tokens, writer);
|
||||
} else if tokens.is_sequence(vec![identifier(), Symbol(Dot)]) {
|
||||
// subroutineCall foo.something
|
||||
compile_subroutine_call(tokens, writer);
|
||||
} else if tokens.is(Symbol(LBrace)) {
|
||||
// ( expression )
|
||||
tokens.eat(Symbol(LBrace));
|
||||
compile_expression(tokens, writer);
|
||||
tokens.eat(Symbol(RBrace));
|
||||
} else if tokens.is_one_of(vec![Symbol(Minus), Symbol(Not)]) {
|
||||
// unaryOp term
|
||||
let symbol = tokens.eat(Symbol(AnySymbol)).to_symbol();
|
||||
compile_term(tokens, writer);
|
||||
if symbol == Minus {
|
||||
writer.write_arithmetic(UnaryMinus);
|
||||
} else {
|
||||
writer.write_arithmetic(Not);
|
||||
}
|
||||
} else if tokens.is(identifier()) {
|
||||
// varName
|
||||
let var_name = tokens.eat(identifier()).to_string();
|
||||
writer.write_var_read(&var_name);
|
||||
} else {
|
||||
panic!("Unexpected token {:?} for compile_term", tokens.peek());
|
||||
}
|
||||
}
|
||||
|
||||
fn compile_subroutine_call(tokens: &mut Tokens, writer: &mut Writer) {
|
||||
let mut class_name = String::new();
|
||||
let mut subroutine_name = String::new();
|
||||
let mut n_args: usize = 0;
|
||||
|
||||
if tokens.is_sequence(vec![identifier(), Symbol(LBrace)]) {
|
||||
// method call for 'this'
|
||||
class_name = writer.class_name.to_string();
|
||||
writer.write_push(Pointer, 0); // Push This
|
||||
n_args += 1;
|
||||
subroutine_name = tokens.eat(identifier()).to_string();
|
||||
} else if tokens.is_sequence(vec![identifier(), Symbol(Dot), identifier()]) {
|
||||
class_name = tokens.eat(identifier()).to_string();
|
||||
tokens.eat(Symbol(Dot));
|
||||
subroutine_name = tokens.eat(identifier()).to_string();
|
||||
if writer.table.has_symbol(&class_name) {
|
||||
// method call for identifier 'class_name'
|
||||
let index = writer.table.index_of(&class_name);
|
||||
let symbol_type = writer.table.kind_of(&class_name);
|
||||
class_name = writer.table.get_token(&class_name).to_string();
|
||||
writer.write_push(symbol_type, index); // Push class object
|
||||
n_args += 1;
|
||||
} else { // symbol not in table means function call
|
||||
}
|
||||
}
|
||||
|
||||
tokens.eat(Symbol(LBrace));
|
||||
while !tokens.is(Symbol(RBrace)) {
|
||||
n_args += 1;
|
||||
compile_expression(tokens, writer);
|
||||
if tokens.is(Symbol(Comma)) {
|
||||
tokens.eat(Symbol(Comma));
|
||||
}
|
||||
}
|
||||
|
||||
writer.write_call(&class_name, &subroutine_name, n_args);
|
||||
tokens.eat(Symbol(RBrace));
|
||||
}
|
||||
93
jack_compiler/src/symbol_table.rs
Normal file
93
jack_compiler/src/symbol_table.rs
Normal file
@@ -0,0 +1,93 @@
|
||||
use std::collections::HashMap;
|
||||
use crate::tokenizer::Keyword;
|
||||
use crate::tokenizer::Token;
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Symbol {
|
||||
name: String,
|
||||
symbol_type: Token,
|
||||
kind: Keyword,
|
||||
index: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct SymbolTable {
|
||||
count: HashMap<Keyword, usize>,
|
||||
fields: HashMap<String, Symbol>,
|
||||
}
|
||||
|
||||
pub fn get_empty_symbol_table() -> SymbolTable {
|
||||
let mut count = HashMap::new();
|
||||
count.insert(Keyword::Static, 0);
|
||||
count.insert(Keyword::Field, 0);
|
||||
count.insert(Keyword::Argument, 0);
|
||||
count.insert(Keyword::Local, 0);
|
||||
|
||||
SymbolTable {
|
||||
count: count,
|
||||
fields: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
impl SymbolTable {
|
||||
|
||||
pub fn kind_of(&self, name: &String) -> Keyword {
|
||||
match self.fields.get(name) {
|
||||
Some(symbol) => symbol.kind,
|
||||
None => panic!("Symbol {} does not exist", name),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn index_of(&self, name: &String) -> usize {
|
||||
match self.fields.get(name) {
|
||||
Some(s) => s.index,
|
||||
None => panic!("Symbol {} does not exist", name),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_token(&self, name: &String) -> Token {
|
||||
match self.fields.get(name) {
|
||||
Some(s) => s.symbol_type.clone(),
|
||||
None => panic!("Symbol {} does not exist", name),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_count(&self, symbol_kind: Keyword) -> usize {
|
||||
match self.count.get(&symbol_kind) {
|
||||
Some(s) => *s,
|
||||
None => 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn has_symbol(&self, name: &String) -> bool {
|
||||
self.fields.contains_key(name)
|
||||
}
|
||||
|
||||
pub fn remove_subroutine_symbols(&mut self) {
|
||||
let mut to_remove: Vec<String> = vec![];
|
||||
for (key, symbol) in self.fields.iter() {
|
||||
match symbol.kind {
|
||||
Keyword::Argument => to_remove.push(key.to_string()),
|
||||
Keyword::Local => to_remove.push(key.to_string()),
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
for key in to_remove {
|
||||
self.fields.remove(&key);
|
||||
}
|
||||
self.count.insert(Keyword::Argument, 0);
|
||||
self.count.insert(Keyword::Local, 0);
|
||||
}
|
||||
|
||||
pub fn define_symbol(&mut self, name: String, symbol_type: Token, kind: Keyword) {
|
||||
let index: usize = *self.count.get(&kind).unwrap();
|
||||
let s = Symbol {
|
||||
name: name.to_string(),
|
||||
symbol_type: symbol_type,
|
||||
kind: kind,
|
||||
index: index,
|
||||
};
|
||||
self.count.insert(kind, index + 1);
|
||||
self.fields.insert(name, s);
|
||||
}
|
||||
}
|
||||
400
jack_compiler/src/tokenizer.rs
Normal file
400
jack_compiler/src/tokenizer.rs
Normal file
@@ -0,0 +1,400 @@
|
||||
use crate::tokenizer;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Token {
|
||||
Keyword(Keyword),
|
||||
Symbol(Symbol),
|
||||
Identifier(String),
|
||||
IntConst(usize),
|
||||
StringConst(String),
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
|
||||
pub enum Keyword {
|
||||
Class,
|
||||
Constructor,
|
||||
Function,
|
||||
Method,
|
||||
Field,
|
||||
Var,
|
||||
Int,
|
||||
Char,
|
||||
Boolean,
|
||||
Constant,
|
||||
Argument,
|
||||
Local,
|
||||
Static,
|
||||
That,
|
||||
Pointer,
|
||||
Temp,
|
||||
Void,
|
||||
True,
|
||||
False,
|
||||
Null,
|
||||
This,
|
||||
Let,
|
||||
Do,
|
||||
If,
|
||||
Else,
|
||||
While,
|
||||
Return,
|
||||
AnyKeyword,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum Symbol {
|
||||
LCurly,
|
||||
RCurly,
|
||||
LBrace,
|
||||
RBrace,
|
||||
LSquare,
|
||||
RSquare,
|
||||
Dot,
|
||||
Comma,
|
||||
Semicolon,
|
||||
Plus,
|
||||
Minus,
|
||||
Mul,
|
||||
Div,
|
||||
ExclusiveAnd,
|
||||
ExclusiveOr,
|
||||
Smaller,
|
||||
Greater,
|
||||
Equal,
|
||||
UnaryMinus,
|
||||
Not,
|
||||
AnySymbol,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Tokens {
|
||||
pub tokens: Vec<Token>,
|
||||
index: usize,
|
||||
}
|
||||
|
||||
pub fn identifier() -> Token {
|
||||
Token::Identifier(String::new())
|
||||
}
|
||||
|
||||
pub fn int_const() -> Token {
|
||||
Token::IntConst(0)
|
||||
}
|
||||
|
||||
pub fn string_const() -> Token {
|
||||
Token::StringConst(String::new())
|
||||
}
|
||||
|
||||
pub fn equal(t1: &Token, t2: &Token) -> bool {
|
||||
match t1 {
|
||||
Token::Keyword(k1) => match t2 {
|
||||
Token::Keyword(k2) if k1 == k2 => true,
|
||||
Token::Keyword(_) if k1 == &Keyword::AnyKeyword => true,
|
||||
_ => false,
|
||||
},
|
||||
Token::Symbol(s1) => match t2 {
|
||||
Token::Symbol(s2) if s1 == s2 => true,
|
||||
Token::Symbol(_) if s1 == &Symbol::AnySymbol => true,
|
||||
_ => false,
|
||||
},
|
||||
Token::Identifier(_) => match t2 {
|
||||
Token::Identifier(_) => true,
|
||||
_ => false,
|
||||
},
|
||||
Token::IntConst(_) => match t2 {
|
||||
Token::IntConst(_) => true,
|
||||
_ => false,
|
||||
},
|
||||
Token::StringConst(_) => match t2 {
|
||||
Token::StringConst(_) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Token {
|
||||
pub fn to_string(&self) -> String {
|
||||
match self {
|
||||
Token::Identifier(s) => s.to_string(),
|
||||
Token::StringConst(s) => s.to_string(),
|
||||
_ => panic!("Cannot convert {:?} to string.", self),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_keyword(&self) -> tokenizer::Keyword {
|
||||
match self {
|
||||
tokenizer::Token::Keyword(k) => k.clone(),
|
||||
_ => tokenizer::Keyword::AnyKeyword,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_int(&self) -> usize {
|
||||
match self {
|
||||
tokenizer::Token::IntConst(i) => *i,
|
||||
_ => panic!("Cannot convert {:?} to int.", self),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_symbol(&self) -> tokenizer::Symbol {
|
||||
match self {
|
||||
tokenizer::Token::Symbol(s) => s.clone(),
|
||||
_ => panic!("Cannot convert {:?} to symbol.", self),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Tokens {
|
||||
pub fn peek(&self) -> tokenizer::Token {
|
||||
let i = self.index;
|
||||
let t = self.tokens.get(i).unwrap();
|
||||
t.clone()
|
||||
}
|
||||
|
||||
fn get_token(&self, index: usize) -> tokenizer::Token {
|
||||
let t = self.tokens.get(index).unwrap();
|
||||
t.clone()
|
||||
}
|
||||
|
||||
pub fn eat(&mut self, expected_token: Token) -> tokenizer::Token {
|
||||
let i = self.index;
|
||||
let t = self.tokens.get(i).unwrap();
|
||||
self.index += 1;
|
||||
if !equal(&expected_token, t) {
|
||||
panic!{"Expected {:?} but got {:?}.", expected_token, t};
|
||||
}
|
||||
t.clone()
|
||||
}
|
||||
|
||||
pub fn eat_one_of(&mut self, tokens: Vec<Token>) -> tokenizer::Token {
|
||||
let t2 = self.get_token(self.index);
|
||||
for t1 in &tokens {
|
||||
if equal(&t1, &t2) {
|
||||
self.index += 1;
|
||||
return t2.clone();
|
||||
}
|
||||
}
|
||||
panic!{"Expected one of {:?} but got {:?}.", tokens, t2};
|
||||
}
|
||||
|
||||
pub fn is(&self, expected_token: Token) -> bool {
|
||||
let t = self.get_token(self.index);
|
||||
if equal(&expected_token, &t) {
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_one_of(&self, tokens: Vec<Token>) -> bool {
|
||||
let t2 = self.get_token(self.index);
|
||||
for t1 in tokens {
|
||||
if equal(&t1, &t2) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
pub fn is_sequence(&self, tokens: Vec<Token>) -> bool {
|
||||
let mut index = self.index;
|
||||
for t1 in tokens {
|
||||
let t2 = self.get_token(index);
|
||||
if !(equal(&t1, &t2)) {
|
||||
return false;
|
||||
}
|
||||
index += 1;
|
||||
}
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
pub fn tokenize_file(file: &Path) -> Tokens {
|
||||
let mut tokens = vec![];
|
||||
let chars: Vec<char> = fs::read_to_string(file).unwrap().chars().collect();
|
||||
let length = chars.len();
|
||||
let mut index: usize = 0;
|
||||
|
||||
while index < length {
|
||||
index = eat_comment(&chars, index);
|
||||
let c = chars[index];
|
||||
|
||||
if c.is_whitespace() {
|
||||
index += 1;
|
||||
} else if parse_symbol(&chars, &mut tokens, index) != index {
|
||||
// if there is a symbol it has already been added to token list.
|
||||
index += 1
|
||||
} else if c.is_ascii_alphabetic() {
|
||||
index = parse_keyword_or_identifier(&chars, &mut tokens, index);
|
||||
} else if c.is_ascii_digit() {
|
||||
index = parse_integer_constant(&chars, &mut tokens, index);
|
||||
} else if c == '"' {
|
||||
index = parse_string_constant(&chars, &mut tokens, index);
|
||||
} else {
|
||||
println!("Unexpected char {:?}", c);
|
||||
index += 1;
|
||||
}
|
||||
}
|
||||
|
||||
let tokens = Tokens {
|
||||
tokens: tokens,
|
||||
index: 0,
|
||||
};
|
||||
return tokens;
|
||||
}
|
||||
|
||||
fn eat_comment(chars: &Vec<char>, index: usize) -> usize {
|
||||
let start_index = index;
|
||||
let mut index = index;
|
||||
if chars[index] == '/' && chars[index + 1] == '/' {
|
||||
index += 2;
|
||||
while chars[index] != '\n' {
|
||||
index += 1;
|
||||
}
|
||||
index += 1;
|
||||
} else if chars[index] == '/' && chars[index + 1] == '*' {
|
||||
index += 2;
|
||||
while !(chars[index] == '*' && chars[index + 1] == '/') {
|
||||
index += 1;
|
||||
}
|
||||
index += 2;
|
||||
}
|
||||
|
||||
if start_index != index {
|
||||
// print_vector_slice(chars, start_index, index);
|
||||
return eat_comment(chars, index);
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
fn parse_symbol(chars: &Vec<char>, tokens: &mut Vec<Token>, index: usize) -> usize {
|
||||
let mut index = index;
|
||||
let c = chars[index];
|
||||
|
||||
use Symbol::*;
|
||||
let s = match c {
|
||||
'{' => Some(LCurly),
|
||||
'}' => Some(RCurly),
|
||||
'(' => Some(LBrace),
|
||||
')' => Some(RBrace),
|
||||
'[' => Some(LSquare),
|
||||
']' => Some(RSquare),
|
||||
'.' => Some(Dot),
|
||||
',' => Some(Comma),
|
||||
';' => Some(Semicolon),
|
||||
'+' => Some(Plus),
|
||||
'-' => Some(Minus),
|
||||
'*' => Some(Mul),
|
||||
'/' => Some(Div),
|
||||
'&' => Some(ExclusiveAnd),
|
||||
'|' => Some(ExclusiveOr),
|
||||
'<' => Some(Smaller),
|
||||
'>' => Some(Greater),
|
||||
'=' => Some(Equal),
|
||||
'~' => Some(Not),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
match s {
|
||||
Some(s) => {
|
||||
let t = Token::Symbol(s);
|
||||
tokens.push(t);
|
||||
index += 1;
|
||||
}
|
||||
None => (),
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
fn parse_integer_constant(chars: &Vec<char>, tokens: &mut Vec<Token>, index: usize) -> usize {
|
||||
let start_index = index;
|
||||
let mut index = index;
|
||||
let mut number = String::new();
|
||||
|
||||
while chars[index].is_ascii_digit() {
|
||||
number.push(chars[index]);
|
||||
index += 1;
|
||||
}
|
||||
|
||||
if start_index == index {
|
||||
return index;
|
||||
}
|
||||
|
||||
let number: usize = number.parse::<usize>().unwrap();
|
||||
let t = Token::IntConst(number);
|
||||
tokens.push(t);
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
fn parse_string_constant(chars: &Vec<char>, tokens: &mut Vec<Token>, index: usize) -> usize {
|
||||
let start_index = index;
|
||||
let mut index = index;
|
||||
|
||||
if chars[index] != '"' {
|
||||
return index;
|
||||
}
|
||||
index += 1;
|
||||
|
||||
while chars[index] != '"' {
|
||||
index += 1;
|
||||
}
|
||||
index += 1;
|
||||
|
||||
let s = chars[start_index + 1..index - 1].into_iter().collect();
|
||||
let t = Token::StringConst(s);
|
||||
tokens.push(t);
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
fn parse_keyword_or_identifier(chars: &Vec<char>, tokens: &mut Vec<Token>, index: usize) -> usize {
|
||||
let mut index = index;
|
||||
let mut token_string = String::new();
|
||||
|
||||
if !chars[index].is_ascii_alphabetic() {
|
||||
return index;
|
||||
}
|
||||
token_string.push(chars[index]);
|
||||
index += 1;
|
||||
|
||||
while chars[index].is_alphanumeric() {
|
||||
token_string.push(chars[index]);
|
||||
index += 1;
|
||||
}
|
||||
|
||||
use Keyword::*;
|
||||
let t = match token_string.as_str() {
|
||||
"class" => Token::Keyword(Class),
|
||||
"constructor" => Token::Keyword(Constructor),
|
||||
"function" => Token::Keyword(Function),
|
||||
"method" => Token::Keyword(Method),
|
||||
"field" => Token::Keyword(Field),
|
||||
"static" => Token::Keyword(Static),
|
||||
"var" => Token::Keyword(Var),
|
||||
"int" => Token::Keyword(Int),
|
||||
"char" => Token::Keyword(Char),
|
||||
"boolean" => Token::Keyword(Boolean),
|
||||
"void" => Token::Keyword(Void),
|
||||
"true" => Token::Keyword(True),
|
||||
"false" => Token::Keyword(False),
|
||||
"null" => Token::Keyword(Null),
|
||||
"this" => Token::Keyword(This),
|
||||
"let" => Token::Keyword(Let),
|
||||
"do" => Token::Keyword(Do),
|
||||
"if" => Token::Keyword(If),
|
||||
"else" => Token::Keyword(Else),
|
||||
"while" => Token::Keyword(While),
|
||||
"return" => Token::Keyword(Return),
|
||||
s => Token::Identifier(s.to_string()),
|
||||
};
|
||||
tokens.push(t);
|
||||
return index;
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn print_vector_slice(chars: &Vec<char>, start: usize, stop: usize) {
|
||||
let s: String = chars[start..stop].into_iter().collect();
|
||||
println!("{:?}", s);
|
||||
}
|
||||
9
vm_translator/Cargo.toml
Normal file
9
vm_translator/Cargo.toml
Normal file
@@ -0,0 +1,9 @@
|
||||
[package]
|
||||
name = "vm_translator"
|
||||
version = "0.1.0"
|
||||
authors = ["Felix Martin <mail@felixm.de>"]
|
||||
edition = "2018"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
897
vm_translator/src/main.rs
Normal file
897
vm_translator/src/main.rs
Normal file
@@ -0,0 +1,897 @@
|
||||
use std::env;
|
||||
use std::fs;
|
||||
use std::fs::File;
|
||||
use std::io::Write;
|
||||
use std::io::{self, BufRead};
|
||||
use std::path::Path;
|
||||
use std::ffi::OsStr;
|
||||
|
||||
enum Command {
|
||||
Arithmetic(ArithmeticCommand),
|
||||
MemoryAccess(MemoryAccessCommand),
|
||||
ProgramFlow(ProgramFlowCommand),
|
||||
FunctionCalling(FunctionCallingCommand),
|
||||
Error(String),
|
||||
NoCommand,
|
||||
}
|
||||
|
||||
enum ArithmeticCommand {
|
||||
Add,
|
||||
Sub,
|
||||
Neg,
|
||||
Eq,
|
||||
Gt,
|
||||
Lt,
|
||||
And,
|
||||
Or,
|
||||
Not,
|
||||
}
|
||||
|
||||
struct MemoryAccessCommand {
|
||||
access_type: MemoryAccessType,
|
||||
segment: Segment,
|
||||
index: String, // TODO: might be better to use &str here?
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum MemoryAccessType {
|
||||
Push,
|
||||
Pop,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum Segment {
|
||||
Argument,
|
||||
Local,
|
||||
Static,
|
||||
Constant,
|
||||
This,
|
||||
That,
|
||||
Pointer,
|
||||
Temp,
|
||||
}
|
||||
|
||||
enum ProgramFlowCommand {
|
||||
Label(String),
|
||||
Goto(String),
|
||||
IfGoto(String)
|
||||
}
|
||||
|
||||
enum FunctionCallingCommand {
|
||||
Function { name: String, n: String },
|
||||
Call { name: String, m: String },
|
||||
Return,
|
||||
}
|
||||
|
||||
struct CodeGen {
|
||||
acc: String, // accumulator for code
|
||||
comp_counter: u32, // counter to create unique labels for comparison jump instructions
|
||||
call_counter: u32, // counter to create unique labels for return addresses
|
||||
vm_ref: String, // Name of VM, e.g. "Foo" for "Foo.vm" - used for push/pop static
|
||||
current_function: String, // name of the last Function command
|
||||
}
|
||||
|
||||
fn make_error_command(error: &str, line: &String) -> Command {
|
||||
let mut s = String::new();
|
||||
s.push_str("// ");
|
||||
s.push_str(&error);
|
||||
s.push_str(": '");
|
||||
s.push_str(line);
|
||||
s.push_str("'.");
|
||||
println!("{}", s);
|
||||
Command::Error(s)
|
||||
}
|
||||
|
||||
fn parse_single_token(tokens: &Vec<&str>, line: &String) -> Command {
|
||||
use crate::ArithmeticCommand::*;
|
||||
use crate::Command::Arithmetic;
|
||||
use crate::Command::FunctionCalling;
|
||||
use crate::FunctionCallingCommand::Return;
|
||||
|
||||
return match tokens[0] {
|
||||
"add" => Arithmetic(Add),
|
||||
"sub" => Arithmetic(Sub),
|
||||
"neg" => Arithmetic(Neg),
|
||||
"eq" => Arithmetic(Eq),
|
||||
"gt" => Arithmetic(Gt),
|
||||
"lt" => Arithmetic(Lt),
|
||||
"and" => Arithmetic(And),
|
||||
"or" => Arithmetic(Or),
|
||||
"not" => Arithmetic(Not),
|
||||
"return" => FunctionCalling(Return),
|
||||
_ => make_error_command("Unrecognized single token command", &line),
|
||||
};
|
||||
}
|
||||
|
||||
fn parse_two_tokens(tokens: &Vec<&str>, line: &String) -> Command {
|
||||
use crate::ProgramFlowCommand::*;
|
||||
use crate::Command::ProgramFlow;
|
||||
return match (tokens[0], tokens[1]) {
|
||||
("label", symbol) => ProgramFlow(Label(symbol.to_string())),
|
||||
("goto", symbol) => ProgramFlow(Goto(symbol.to_string())),
|
||||
("if-goto", symbol) => ProgramFlow(IfGoto(symbol.to_string())),
|
||||
_ => make_error_command("Unrecognized two tokens command", &line),
|
||||
};
|
||||
}
|
||||
|
||||
fn parse_three_tokens(tokens: &Vec<&str>, line: &String) -> Command {
|
||||
use crate::Command::MemoryAccess;
|
||||
use crate::Command::FunctionCalling;
|
||||
use crate::MemoryAccessType::*;
|
||||
use crate::Segment::*;
|
||||
use crate::FunctionCallingCommand::*;
|
||||
|
||||
fn make(access_type: MemoryAccessType, segment: Segment, index: &str) -> Command {
|
||||
return MemoryAccess(MemoryAccessCommand {
|
||||
access_type: access_type,
|
||||
segment: segment,
|
||||
index: index.to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
return match (tokens[0], tokens[1], tokens[2]) {
|
||||
("push", "argument", index) => make(Push, Argument, index),
|
||||
("pop", "argument", index) => make(Pop, Argument, index),
|
||||
("push", "local", index) => make(Push, Local, index),
|
||||
("pop", "local", index) => make(Pop, Local, index),
|
||||
("push", "static", index) => make(Push, Static, index),
|
||||
("pop", "static", index) => make(Pop, Static, index),
|
||||
("push", "constant", index) => make(Push, Constant, index),
|
||||
("pop", "constant", index) => make(Pop, Constant, index),
|
||||
("push", "this", index) => make(Push, This, index),
|
||||
("pop", "this", index) => make(Pop, This, index),
|
||||
("push", "that", index) => make(Push, That, index),
|
||||
("pop", "that", index) => make(Pop, That, index),
|
||||
("push", "pointer", index) => make(Push, Pointer, index),
|
||||
("pop", "pointer", index) => make(Pop, Pointer, index),
|
||||
("push", "temp", index) => make(Push, Temp, index),
|
||||
("pop", "temp", index) => make(Pop, Temp, index),
|
||||
("function", name, narg) => FunctionCalling(Function { name: name.to_string(), n: narg.to_string() }),
|
||||
("call", name, narg) => FunctionCalling(Call { name: name.to_string(), m: narg.to_string() }),
|
||||
_ => make_error_command("Unexpected three tokens", line)
|
||||
};
|
||||
}
|
||||
|
||||
fn parse_line(line: &String) -> Command {
|
||||
let mut tokens: Vec<&str> = Vec::new();
|
||||
let mut iter = line.split_whitespace();
|
||||
|
||||
while let Some(token) = iter.next() {
|
||||
if token == "//" {
|
||||
break;
|
||||
}
|
||||
tokens.push(token);
|
||||
}
|
||||
|
||||
return match tokens.len() {
|
||||
0 => Command::NoCommand,
|
||||
1 => parse_single_token(&tokens, &line),
|
||||
2 => parse_two_tokens(&tokens, &line),
|
||||
3 => parse_three_tokens(&tokens, &line),
|
||||
_ => make_error_command("Unexpected number of tokens", &line),
|
||||
};
|
||||
}
|
||||
|
||||
fn parse_file(filename: &String) -> Vec<Command> {
|
||||
let filename = Path::new(filename);
|
||||
let mut commands: Vec<Command> = Vec::new();
|
||||
if let Ok(file) = File::open(filename) {
|
||||
for line in io::BufReader::new(file).lines() {
|
||||
let command = match line {
|
||||
Ok(ok) => parse_line(&ok),
|
||||
Err(err) => make_error_command("Error reading line", &err.to_string()),
|
||||
};
|
||||
commands.push(command);
|
||||
}
|
||||
} else {
|
||||
panic!("Could not open {:?}!", filename);
|
||||
}
|
||||
commands
|
||||
}
|
||||
|
||||
fn generate_code_arithmetic(command: &ArithmeticCommand, mut code_gen: &mut CodeGen) {
|
||||
fn binary_operator(command: &str, operator: &str, code_gen: &mut CodeGen) {
|
||||
let s = format!(
|
||||
"\
|
||||
// {}\n\
|
||||
@SP\n\
|
||||
A = M\n\
|
||||
A = A - 1\n\
|
||||
A = A - 1\n\
|
||||
D = M\n\
|
||||
A = A + 1\n\
|
||||
D = D {} M\n\
|
||||
A = A - 1\n\
|
||||
M = D\n\
|
||||
@SP\n\
|
||||
M = M - 1\n\n\
|
||||
",
|
||||
command, operator
|
||||
);
|
||||
code_gen.acc.push_str(&s);
|
||||
}
|
||||
|
||||
fn unary_operator(command: &str, operator: &str, code_gen: &mut CodeGen) {
|
||||
let s = format!(
|
||||
"\
|
||||
// {}\n\
|
||||
@SP\n\
|
||||
A = M\n\
|
||||
A = A - 1\n\
|
||||
M = {}M\n\n\
|
||||
",
|
||||
command, operator
|
||||
);
|
||||
code_gen.acc.push_str(&s)
|
||||
}
|
||||
|
||||
fn comparison_operator(command: &str, operator: &str, code_gen: &mut CodeGen) {
|
||||
code_gen.comp_counter += 1;
|
||||
let s = format!(
|
||||
"\
|
||||
// {cmd}\n\
|
||||
@SP\n\
|
||||
A = M\n\
|
||||
A = A - 1\n\
|
||||
A = A - 1\n\
|
||||
D = M\n\
|
||||
A = A + 1\n\
|
||||
D = D - M\n\
|
||||
@IF_{op}_{index}\n\
|
||||
D;{op}\n\
|
||||
@ELSE_{op}_{index}\n\
|
||||
0;JMP\n\
|
||||
(IF_{op}_{index})\n\
|
||||
D = -1\n\
|
||||
@END_{op}_{index}\n\
|
||||
0;JMP\n\
|
||||
(ELSE_{op}_{index})\n\
|
||||
D = 0\n\
|
||||
(END_{op}_{index})\n\
|
||||
@SP\n\
|
||||
A = M\n\
|
||||
A = A - 1\n\
|
||||
A = A - 1\n\
|
||||
M = D\n\
|
||||
@SP\n\
|
||||
M = M - 1\n\
|
||||
\n",
|
||||
cmd = command,
|
||||
op = operator,
|
||||
index = code_gen.comp_counter
|
||||
);
|
||||
code_gen.acc.push_str(&s);
|
||||
}
|
||||
|
||||
use crate::ArithmeticCommand::*;
|
||||
match command {
|
||||
Add => binary_operator("add", "+", &mut code_gen),
|
||||
Sub => binary_operator("sub", "-", &mut code_gen),
|
||||
Neg => unary_operator("neg", "-", &mut code_gen),
|
||||
Eq => comparison_operator("eq", "JEQ", &mut code_gen),
|
||||
Gt => comparison_operator("gt", "JGT", &mut code_gen),
|
||||
Lt => comparison_operator("lt", "JLT", &mut code_gen),
|
||||
And => binary_operator("and", "&", &mut code_gen),
|
||||
Or => binary_operator("or", "|", &mut code_gen),
|
||||
Not => unary_operator("not", "!", &mut code_gen),
|
||||
};
|
||||
}
|
||||
|
||||
fn generate_code_memory_access(command: &MemoryAccessCommand, code_gen: &mut CodeGen) {
|
||||
use crate::MemoryAccessType::*;
|
||||
use crate::Segment::*;
|
||||
|
||||
fn pop_regular(segment_name: &str, segment_id: &str, index: &String, code_gen: &mut CodeGen) {
|
||||
let s = format!(
|
||||
"\
|
||||
// pop {segment_name} {index}\n\
|
||||
@{index}\n\
|
||||
D = A\n\
|
||||
@{segment_id}\n\
|
||||
A = M\n\
|
||||
D = D + A\n\
|
||||
@R13\n\
|
||||
M = D\n\
|
||||
// ^ R13 = {segment_name} + index\n\
|
||||
@SP\n\
|
||||
A = M\n\
|
||||
A = A - 1\n\
|
||||
D = M\n\
|
||||
@SP\n\
|
||||
M = M - 1\n\
|
||||
// ^ pop into D\n\
|
||||
@R13\n\
|
||||
A = M\n\
|
||||
M = D\n\
|
||||
// ^ *R13 = D\n\n\
|
||||
",
|
||||
segment_name = segment_name,
|
||||
segment_id = segment_id,
|
||||
index = index
|
||||
);
|
||||
code_gen.acc.push_str(&s);
|
||||
}
|
||||
|
||||
fn push_regular(segment_name: &str, segment_id: &str, index: &String, code_gen: &mut CodeGen) {
|
||||
let s = format!(
|
||||
"\
|
||||
// push {segment_name} {index}\n\
|
||||
@{index}\n\
|
||||
D = A\n\
|
||||
@{segment_id}\n\
|
||||
A = M\n\
|
||||
A = D + A\n\
|
||||
D = M\n\
|
||||
// ^ D = *({segment_id} + index)\n\
|
||||
@SP\n\
|
||||
A = M\n\
|
||||
M = D\n\
|
||||
@SP\n\
|
||||
M = M + 1\n\
|
||||
// ^ push D\n\n\
|
||||
",
|
||||
segment_name = segment_name,
|
||||
segment_id = segment_id,
|
||||
index = index
|
||||
);
|
||||
code_gen.acc.push_str(&s);
|
||||
}
|
||||
|
||||
fn push_constant(index: &String, code_gen: &mut CodeGen) {
|
||||
let s = format!(
|
||||
"\
|
||||
// push constant {}\n\
|
||||
@{}\n\
|
||||
D = A\n\
|
||||
@SP\n\
|
||||
A = M\n\
|
||||
M = D\n\
|
||||
@SP\n\
|
||||
M = M + 1\n\n\
|
||||
",
|
||||
index, index
|
||||
);
|
||||
code_gen.acc.push_str(&s);
|
||||
}
|
||||
|
||||
fn pop_static(index: &String, code_gen: &mut CodeGen) {
|
||||
let mut symbol = String::from(code_gen.vm_ref.as_str());
|
||||
symbol.push_str(".");
|
||||
symbol.push_str(index);
|
||||
|
||||
let s = format!("\
|
||||
// pop static {symbol}\n\
|
||||
@SP\n\
|
||||
A = M\n\
|
||||
A = A - 1\n\
|
||||
D = M\n\
|
||||
@SP\n\
|
||||
M = M - 1\n\
|
||||
// ^ pop into D\n\
|
||||
@{symbol}\n\
|
||||
M = D\n\
|
||||
// ^ {symbol} = D\n\
|
||||
\n", symbol=symbol);
|
||||
code_gen.acc.push_str(&s);
|
||||
}
|
||||
|
||||
fn push_static(index: &String, code_gen: &mut CodeGen) {
|
||||
let mut symbol = String::from(code_gen.vm_ref.as_str());
|
||||
symbol.push_str(".");
|
||||
symbol.push_str(index);
|
||||
|
||||
let s = format!("\
|
||||
// push static {symbol}\n\
|
||||
@{symbol}\n\
|
||||
D = M\n\
|
||||
// ^ D = {symbol}\n\
|
||||
@SP\n\
|
||||
A = M\n\
|
||||
M = D\n\
|
||||
@SP\n\
|
||||
M = M + 1\n\
|
||||
// ^ push from D\n\
|
||||
\n", symbol=symbol);
|
||||
|
||||
|
||||
code_gen.acc.push_str(&s);
|
||||
}
|
||||
|
||||
fn pop_temp(index: &String, code_gen: &mut CodeGen) {
|
||||
let s = format!(
|
||||
"\
|
||||
// pop temp {index}\n\
|
||||
@{index}\n\
|
||||
D = A\n\
|
||||
@5\n\
|
||||
A = D + A\n\
|
||||
D = A\n\
|
||||
@R13\n\
|
||||
M = D\n\
|
||||
// ^ R13 = temp + index\n\
|
||||
@SP\n\
|
||||
A = M\n\
|
||||
A = A - 1\n\
|
||||
D = M\n\
|
||||
@SP\n\
|
||||
M = M - 1\n\
|
||||
// ^ pop into D\n\
|
||||
@R13\n\
|
||||
A = M\n\
|
||||
M = D\n\
|
||||
// ^ *R13 = D\n\n\
|
||||
",
|
||||
index = index
|
||||
);
|
||||
code_gen.acc.push_str(&s);
|
||||
}
|
||||
|
||||
fn push_temp(index: &String, code_gen: &mut CodeGen) {
|
||||
let s = format!(
|
||||
"\
|
||||
// push temp {index}\n\
|
||||
@{index}
|
||||
D = A
|
||||
@5
|
||||
A = D + A
|
||||
D = M
|
||||
// ^ D = *(temp + index)\n\
|
||||
@SP\n\
|
||||
A = M\n\
|
||||
M = D\n\
|
||||
@SP\n\
|
||||
M = M + 1\n\
|
||||
// ^ push D\n\n\
|
||||
",
|
||||
index = index
|
||||
);
|
||||
code_gen.acc.push_str(&s);
|
||||
}
|
||||
|
||||
fn push_pointer(index: &String, code_gen: &mut CodeGen) {
|
||||
let segment = match index.as_str() {
|
||||
"0" => "THIS",
|
||||
"1" => "THAT",
|
||||
_ => "INVALID"
|
||||
};
|
||||
|
||||
let s = format!("\
|
||||
// push pointer {segment}\n\
|
||||
@{segment}\n\
|
||||
D = M\n\
|
||||
@SP\n\
|
||||
A = M\n\
|
||||
M = D\n\
|
||||
@SP\n\
|
||||
M = M + 1\n\
|
||||
// ^ push {segment}\n\n",
|
||||
segment=segment
|
||||
);
|
||||
code_gen.acc.push_str(&s);
|
||||
}
|
||||
|
||||
fn pop_pointer(index: &String, code_gen: &mut CodeGen) {
|
||||
let segment = match index.as_str() {
|
||||
"0" => "THIS",
|
||||
"1" => "THAT",
|
||||
_ => "INVALID"
|
||||
};
|
||||
|
||||
let s = format!("\
|
||||
// pop pointer {segment}\n\
|
||||
@SP\n\
|
||||
A = M\n\
|
||||
A = A - 1\n\
|
||||
D = M\n\
|
||||
@SP\n\
|
||||
M = M - 1\n\
|
||||
@{segment}\n\
|
||||
M = D\n\
|
||||
// ^ pop into {segment}\n\
|
||||
\n",
|
||||
segment=segment
|
||||
);
|
||||
code_gen.acc.push_str(&s);
|
||||
}
|
||||
|
||||
match command {
|
||||
MemoryAccessCommand { access_type: Push, segment: Constant, index }
|
||||
=> push_constant(index, code_gen),
|
||||
|
||||
MemoryAccessCommand { access_type: Pop, segment: Local, index }
|
||||
=> pop_regular("local", "LCL", index, code_gen),
|
||||
|
||||
MemoryAccessCommand { access_type: Push, segment: Local, index }
|
||||
=> push_regular("local", "LCL", index, code_gen),
|
||||
|
||||
MemoryAccessCommand { access_type: Pop, segment: Argument, index }
|
||||
=> pop_regular("argument", "ARG", index, code_gen),
|
||||
|
||||
MemoryAccessCommand { access_type: Push, segment: Argument, index }
|
||||
=> push_regular("argument", "ARG", index, code_gen),
|
||||
|
||||
MemoryAccessCommand { access_type: Pop, segment: This, index }
|
||||
=> pop_regular("this", "THIS", index, code_gen),
|
||||
|
||||
MemoryAccessCommand { access_type: Push, segment: This, index }
|
||||
=> push_regular("this", "THIS", index, code_gen),
|
||||
|
||||
MemoryAccessCommand { access_type: Pop, segment: That, index }
|
||||
=> pop_regular("that", "THAT", index, code_gen),
|
||||
|
||||
MemoryAccessCommand { access_type: Push, segment: That, index }
|
||||
=> push_regular("that", "THAT", index, code_gen),
|
||||
|
||||
MemoryAccessCommand { access_type: Pop, segment: Static, index }
|
||||
=> pop_static(index, code_gen),
|
||||
|
||||
MemoryAccessCommand { access_type: Push, segment: Static, index }
|
||||
=> push_static(index, code_gen),
|
||||
|
||||
MemoryAccessCommand { access_type: Pop, segment: Temp, index }
|
||||
=> pop_temp(index, code_gen),
|
||||
|
||||
MemoryAccessCommand { access_type: Push, segment: Temp, index }
|
||||
=> push_temp(index, code_gen),
|
||||
|
||||
MemoryAccessCommand { access_type: Pop, segment: Pointer, index }
|
||||
=> pop_pointer(index, code_gen),
|
||||
|
||||
MemoryAccessCommand { access_type: Push, segment: Pointer, index }
|
||||
=> push_pointer(index, code_gen),
|
||||
|
||||
MemoryAccessCommand { access_type, segment, index,
|
||||
} => {
|
||||
let s = format!(
|
||||
"// warning: {:?} {:?} {} not implemented.\n\n",
|
||||
access_type, segment, index
|
||||
);
|
||||
code_gen.acc.push_str(&s);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn generate_code_program_flow(command: &ProgramFlowCommand, code_gen: &mut CodeGen) {
|
||||
|
||||
fn get_flow_label(label: &String, code_gen: &mut CodeGen) -> std::string::String {
|
||||
if code_gen.current_function != "" {
|
||||
return format!("{}:{}", code_gen.current_function, label);
|
||||
}
|
||||
else {
|
||||
return label.to_string();
|
||||
}
|
||||
}
|
||||
|
||||
use crate::ProgramFlowCommand::*;
|
||||
match command {
|
||||
Label(label) => {
|
||||
let label = get_flow_label(label, code_gen);
|
||||
let s = format!("// label {}\n({})\n\n", label, label);
|
||||
code_gen.acc.push_str(&s);
|
||||
},
|
||||
Goto(label) => {
|
||||
let label = get_flow_label(label, code_gen);
|
||||
let s = format!("// goto {}\n@{}\n0;JMP\n\n", label, label);
|
||||
code_gen.acc.push_str(&s);
|
||||
},
|
||||
IfGoto(label) => {
|
||||
let label = get_flow_label(label, code_gen);
|
||||
let s = format!("\
|
||||
// if-goto {label}\n\
|
||||
@SP\n\
|
||||
A = M\n\
|
||||
A = A - 1\n\
|
||||
D = M\n\
|
||||
@SP\n\
|
||||
M = M - 1\n\
|
||||
@{label}\n\
|
||||
D;JNE\n\
|
||||
", label=label);
|
||||
code_gen.acc.push_str(&s);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn generate_code_function_calling(command: &FunctionCallingCommand, code_gen: &mut CodeGen) {
|
||||
use crate::FunctionCallingCommand::*;
|
||||
|
||||
fn call(function_name: &String, nargs: &String, code_gen: &mut CodeGen) {
|
||||
let return_label = format!("{}:return:{}", function_name, code_gen.call_counter);
|
||||
code_gen.call_counter += 1;
|
||||
|
||||
let s = format!(
|
||||
"\
|
||||
// call {function_name} {nargs}\n\
|
||||
@{return_label}\n\
|
||||
D = A\n\
|
||||
@SP\n\
|
||||
A = M\n\
|
||||
M = D\n\
|
||||
@SP\n\
|
||||
M = M + 1\n\
|
||||
// ^ push return-address\n\
|
||||
@LCL\n\
|
||||
D = M\n\
|
||||
@SP\n\
|
||||
A = M\n\
|
||||
M = D\n\
|
||||
@SP\n\
|
||||
M = M + 1\n\
|
||||
// ^ push lcl\n\
|
||||
@ARG\n\
|
||||
D = M\n\
|
||||
@SP\n\
|
||||
A = M\n\
|
||||
M = D\n\
|
||||
@SP\n\
|
||||
M = M + 1\n\
|
||||
// ^ push arg\n\
|
||||
@THIS\n\
|
||||
D = M\n\
|
||||
@SP\n\
|
||||
A = M\n\
|
||||
M = D\n\
|
||||
@SP\n\
|
||||
M = M + 1\n\
|
||||
// ^ push this\n\
|
||||
@THAT\n\
|
||||
D = M\n\
|
||||
@SP\n\
|
||||
A = M\n\
|
||||
M = D\n\
|
||||
@SP\n\
|
||||
M = M + 1\n\
|
||||
// ^ push that\n\
|
||||
@SP\n\
|
||||
D = M\n\
|
||||
@{nargs}\n\
|
||||
D = D - A\n\
|
||||
@5\n\
|
||||
D = D - A\n\
|
||||
@ARG\n\
|
||||
M = D\n\
|
||||
// ^ ARG = SP - {nargs} - 5\n\
|
||||
@SP\n\
|
||||
D = M\n\
|
||||
@LCL\n\
|
||||
M = D\n\
|
||||
// ^ LCL = SP\n\
|
||||
@{function_name}\n\
|
||||
0;JMP\n\
|
||||
// ^ goto {function_name}\n\
|
||||
({return_label})\n\
|
||||
\n\n",
|
||||
nargs=nargs,
|
||||
function_name=function_name,
|
||||
return_label=return_label,
|
||||
);
|
||||
code_gen.acc.push_str(&s);
|
||||
|
||||
}
|
||||
|
||||
fn function(function_name: &String, nargs: &String, code_gen: &mut CodeGen) {
|
||||
code_gen.current_function = function_name.to_string();
|
||||
|
||||
let s = format!(
|
||||
"\
|
||||
// function {function_name} {nargs}\n\
|
||||
({function_name})\n\
|
||||
@0\n\
|
||||
D = A\n\
|
||||
",
|
||||
function_name=function_name,
|
||||
nargs=nargs,
|
||||
);
|
||||
code_gen.acc.push_str(&s);
|
||||
let nargs: u32 = nargs.parse().unwrap();
|
||||
for _ in 0..nargs {
|
||||
code_gen.acc.push_str("@SP\nA = M\nM = D\n@SP\nM = M + 1\n// ^ push 0\n");
|
||||
}
|
||||
let s = format!("// ^ push 0 * {nargs}\n\n", nargs=nargs);
|
||||
code_gen.acc.push_str(&s);
|
||||
}
|
||||
|
||||
fn fnreturn(code_gen: &mut CodeGen) {
|
||||
let s = format!(
|
||||
"\
|
||||
// return\n\
|
||||
@LCL\n\
|
||||
D = M\n\
|
||||
@R13\n\
|
||||
M = D\n\
|
||||
// ^ R13 = FRAME = LCL\n\
|
||||
@5\n\
|
||||
D = A\n\
|
||||
@R13\n\
|
||||
A = M - D\n\
|
||||
D = M\n\
|
||||
@R14\n\
|
||||
M = D\n\
|
||||
// ^ R14 = RET = *(FRAME - 5)\n\
|
||||
@SP\n\
|
||||
A = M\n\
|
||||
A = A - 1\n\
|
||||
D = M\n\
|
||||
@SP\n\
|
||||
M = M - 1\n\
|
||||
@ARG\n\
|
||||
A = M\n\
|
||||
M = D\n\
|
||||
// ^ POP into *ARG\n\
|
||||
@ARG\n\
|
||||
D = M + 1\n\
|
||||
@SP\n\
|
||||
M = D\n\
|
||||
// ^ SP = ARG + 1\n\
|
||||
@1\n\
|
||||
D = A\n\
|
||||
@R13\n\
|
||||
A = M - D\n\
|
||||
D = M\n\
|
||||
@THAT\n\
|
||||
M = D\n\
|
||||
// ^ THAT = *(FRAME - 1)\n\
|
||||
@2\n\
|
||||
D = A\n\
|
||||
@R13\n\
|
||||
A = M - D\n\
|
||||
D = M\n\
|
||||
@THIS\n\
|
||||
M = D\n\
|
||||
// ^ THIS = *(FRAME - 2)\n\
|
||||
@3\n\
|
||||
D = A\n\
|
||||
@R13\n\
|
||||
A = M - D\n\
|
||||
D = M\n\
|
||||
@ARG\n\
|
||||
M = D\n\
|
||||
// ^ ARG = *(FRAME - 3)\n\
|
||||
@4\n\
|
||||
D = A\n\
|
||||
@R13\n\
|
||||
A = M - D\n\
|
||||
D = M\n\
|
||||
@LCL\n\
|
||||
M = D\n\
|
||||
// ^ LCL = *(FRAME - 4)\n\
|
||||
@R14\n\
|
||||
A = M\n\
|
||||
0;JMP\n\
|
||||
// ^ goto RET\n\
|
||||
\n\n",
|
||||
);
|
||||
code_gen.acc.push_str(&s);
|
||||
}
|
||||
|
||||
match command {
|
||||
Call { name, m } => call(name, m, code_gen),
|
||||
Function { name, n, } => function(name, n, code_gen),
|
||||
Return => fnreturn(code_gen),
|
||||
};
|
||||
}
|
||||
|
||||
fn generate_code_error(error: &String, code_gen: &mut CodeGen) {
|
||||
code_gen.acc.push_str(error);
|
||||
code_gen.acc.push_str("\n");
|
||||
}
|
||||
|
||||
fn generate_code(commands: &Vec<Command>, mut code_gen: &mut CodeGen) {
|
||||
use crate::Command::*;
|
||||
for command in commands {
|
||||
match command {
|
||||
Arithmetic(c) => generate_code_arithmetic(c, &mut code_gen),
|
||||
MemoryAccess(c) => generate_code_memory_access(c, &mut code_gen),
|
||||
ProgramFlow(c) => generate_code_program_flow(c, &mut code_gen),
|
||||
FunctionCalling(c) => generate_code_function_calling(c, &mut code_gen),
|
||||
Error(string) => generate_code_error(string, &mut code_gen),
|
||||
NoCommand => (),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
fn generate_bootstrap_code(code_gen: &mut CodeGen) {
|
||||
use crate::Command::FunctionCalling;
|
||||
use crate::FunctionCallingCommand::Call;
|
||||
let s = format!(
|
||||
"\
|
||||
// Bootstrap code\n\
|
||||
@256\n\
|
||||
D = A\n\
|
||||
@SP\n\
|
||||
M = D\n\
|
||||
// ^ SP = 256\n\n");
|
||||
code_gen.acc.push_str(&s);
|
||||
let v = vec![FunctionCalling(Call { name: "Sys.init".to_string(), m: "0".to_string() })];
|
||||
generate_code(&v, code_gen);
|
||||
}
|
||||
|
||||
fn main() {
|
||||
fn write_output_file(filename: &String, output: &String) {
|
||||
let path = Path::new(&filename);
|
||||
let display = path.display();
|
||||
|
||||
// Open a file in write-only mode, returns `io::Result<File>`
|
||||
let mut file = match File::create(&path) {
|
||||
Err(why) => panic!("Couldn't create {}: {}", display, why.to_string()),
|
||||
Ok(file) => file,
|
||||
};
|
||||
|
||||
match file.write_all(output.as_bytes()) {
|
||||
Err(why) => panic!("Couldn't write to {}: {}", display, why.to_string()),
|
||||
Ok(_) => println!("<= {}", display),
|
||||
}
|
||||
}
|
||||
|
||||
fn filename_to_vm_ref(filename: &String) -> String {
|
||||
let p = Path::new(filename);
|
||||
p.file_stem().and_then(OsStr::to_str).unwrap().to_string()
|
||||
}
|
||||
|
||||
fn translate_single_file(filename: &String) {
|
||||
let mut code_gen = CodeGen {
|
||||
acc: String::new(),
|
||||
comp_counter: 0,
|
||||
call_counter: 0,
|
||||
vm_ref: filename_to_vm_ref(filename),
|
||||
current_function: String::new(),
|
||||
};
|
||||
let commands = parse_file(filename);
|
||||
generate_code(&commands, &mut code_gen);
|
||||
let filename = str::replace(filename, ".vm", ".asm");
|
||||
write_output_file(&filename, &code_gen.acc);
|
||||
}
|
||||
|
||||
fn translate_file(filename: &String, mut code_gen: &mut CodeGen) {
|
||||
let commands = parse_file(filename);
|
||||
generate_code(&commands, &mut code_gen);
|
||||
}
|
||||
|
||||
fn translate_dir(dirname: &String) {
|
||||
println!("=> {}", dirname);
|
||||
let mut code_gen = CodeGen {
|
||||
acc: String::new(),
|
||||
comp_counter: 0,
|
||||
call_counter: 0,
|
||||
vm_ref: String::new(),
|
||||
current_function: String::new(),
|
||||
};
|
||||
|
||||
generate_bootstrap_code(&mut code_gen);
|
||||
let paths = fs::read_dir(dirname).unwrap();
|
||||
for path in paths {
|
||||
let filename = path.unwrap().path().to_str().unwrap().to_string();
|
||||
if is_vm_file(&filename) {
|
||||
println!(" -> {}", filename);
|
||||
let vm_ref = filename_to_vm_ref(&filename);
|
||||
let s = format!("// Start {}\n\n", filename);
|
||||
code_gen.vm_ref = vm_ref;
|
||||
code_gen.current_function = "".to_string();
|
||||
code_gen.acc.push_str(&s);
|
||||
translate_file(&filename, &mut code_gen);
|
||||
}
|
||||
}
|
||||
|
||||
let filename = format!("{}/{}.asm", dirname, filename_to_vm_ref(dirname));
|
||||
write_output_file(&filename, &code_gen.acc);
|
||||
}
|
||||
|
||||
fn is_vm_file(filename: &String) -> bool {
|
||||
let p = Path::new(filename);
|
||||
if p.is_file() && (p.extension().unwrap() == OsStr::new("vm")) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
let args: Vec<String> = env::args().collect();
|
||||
for arg in &args[1..] {
|
||||
if is_vm_file(arg) {
|
||||
translate_single_file(&arg);
|
||||
} else if Path::new(arg).is_dir() {
|
||||
translate_dir(&arg);
|
||||
} else {
|
||||
println!("{} is not a *.vm file or directory!", arg);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user