Add VM translator, Jack analyzer and compiler

This commit is contained in:
2020-11-15 15:53:24 -05:00
parent f2a0b6d531
commit fb224f31ed
12 changed files with 2708 additions and 0 deletions

9
vm_translator/Cargo.toml Normal file
View File

@@ -0,0 +1,9 @@
[package]
name = "vm_translator"
version = "0.1.0"
authors = ["Felix Martin <mail@felixm.de>"]
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]

897
vm_translator/src/main.rs Normal file
View File

@@ -0,0 +1,897 @@
use std::env;
use std::fs;
use std::fs::File;
use std::io::Write;
use std::io::{self, BufRead};
use std::path::Path;
use std::ffi::OsStr;
enum Command {
Arithmetic(ArithmeticCommand),
MemoryAccess(MemoryAccessCommand),
ProgramFlow(ProgramFlowCommand),
FunctionCalling(FunctionCallingCommand),
Error(String),
NoCommand,
}
enum ArithmeticCommand {
Add,
Sub,
Neg,
Eq,
Gt,
Lt,
And,
Or,
Not,
}
struct MemoryAccessCommand {
access_type: MemoryAccessType,
segment: Segment,
index: String, // TODO: might be better to use &str here?
}
#[derive(Debug)]
enum MemoryAccessType {
Push,
Pop,
}
#[derive(Debug)]
enum Segment {
Argument,
Local,
Static,
Constant,
This,
That,
Pointer,
Temp,
}
enum ProgramFlowCommand {
Label(String),
Goto(String),
IfGoto(String)
}
enum FunctionCallingCommand {
Function { name: String, n: String },
Call { name: String, m: String },
Return,
}
struct CodeGen {
acc: String, // accumulator for code
comp_counter: u32, // counter to create unique labels for comparison jump instructions
call_counter: u32, // counter to create unique labels for return addresses
vm_ref: String, // Name of VM, e.g. "Foo" for "Foo.vm" - used for push/pop static
current_function: String, // name of the last Function command
}
fn make_error_command(error: &str, line: &String) -> Command {
let mut s = String::new();
s.push_str("// ");
s.push_str(&error);
s.push_str(": '");
s.push_str(line);
s.push_str("'.");
println!("{}", s);
Command::Error(s)
}
fn parse_single_token(tokens: &Vec<&str>, line: &String) -> Command {
use crate::ArithmeticCommand::*;
use crate::Command::Arithmetic;
use crate::Command::FunctionCalling;
use crate::FunctionCallingCommand::Return;
return match tokens[0] {
"add" => Arithmetic(Add),
"sub" => Arithmetic(Sub),
"neg" => Arithmetic(Neg),
"eq" => Arithmetic(Eq),
"gt" => Arithmetic(Gt),
"lt" => Arithmetic(Lt),
"and" => Arithmetic(And),
"or" => Arithmetic(Or),
"not" => Arithmetic(Not),
"return" => FunctionCalling(Return),
_ => make_error_command("Unrecognized single token command", &line),
};
}
fn parse_two_tokens(tokens: &Vec<&str>, line: &String) -> Command {
use crate::ProgramFlowCommand::*;
use crate::Command::ProgramFlow;
return match (tokens[0], tokens[1]) {
("label", symbol) => ProgramFlow(Label(symbol.to_string())),
("goto", symbol) => ProgramFlow(Goto(symbol.to_string())),
("if-goto", symbol) => ProgramFlow(IfGoto(symbol.to_string())),
_ => make_error_command("Unrecognized two tokens command", &line),
};
}
fn parse_three_tokens(tokens: &Vec<&str>, line: &String) -> Command {
use crate::Command::MemoryAccess;
use crate::Command::FunctionCalling;
use crate::MemoryAccessType::*;
use crate::Segment::*;
use crate::FunctionCallingCommand::*;
fn make(access_type: MemoryAccessType, segment: Segment, index: &str) -> Command {
return MemoryAccess(MemoryAccessCommand {
access_type: access_type,
segment: segment,
index: index.to_string(),
})
}
return match (tokens[0], tokens[1], tokens[2]) {
("push", "argument", index) => make(Push, Argument, index),
("pop", "argument", index) => make(Pop, Argument, index),
("push", "local", index) => make(Push, Local, index),
("pop", "local", index) => make(Pop, Local, index),
("push", "static", index) => make(Push, Static, index),
("pop", "static", index) => make(Pop, Static, index),
("push", "constant", index) => make(Push, Constant, index),
("pop", "constant", index) => make(Pop, Constant, index),
("push", "this", index) => make(Push, This, index),
("pop", "this", index) => make(Pop, This, index),
("push", "that", index) => make(Push, That, index),
("pop", "that", index) => make(Pop, That, index),
("push", "pointer", index) => make(Push, Pointer, index),
("pop", "pointer", index) => make(Pop, Pointer, index),
("push", "temp", index) => make(Push, Temp, index),
("pop", "temp", index) => make(Pop, Temp, index),
("function", name, narg) => FunctionCalling(Function { name: name.to_string(), n: narg.to_string() }),
("call", name, narg) => FunctionCalling(Call { name: name.to_string(), m: narg.to_string() }),
_ => make_error_command("Unexpected three tokens", line)
};
}
fn parse_line(line: &String) -> Command {
let mut tokens: Vec<&str> = Vec::new();
let mut iter = line.split_whitespace();
while let Some(token) = iter.next() {
if token == "//" {
break;
}
tokens.push(token);
}
return match tokens.len() {
0 => Command::NoCommand,
1 => parse_single_token(&tokens, &line),
2 => parse_two_tokens(&tokens, &line),
3 => parse_three_tokens(&tokens, &line),
_ => make_error_command("Unexpected number of tokens", &line),
};
}
fn parse_file(filename: &String) -> Vec<Command> {
let filename = Path::new(filename);
let mut commands: Vec<Command> = Vec::new();
if let Ok(file) = File::open(filename) {
for line in io::BufReader::new(file).lines() {
let command = match line {
Ok(ok) => parse_line(&ok),
Err(err) => make_error_command("Error reading line", &err.to_string()),
};
commands.push(command);
}
} else {
panic!("Could not open {:?}!", filename);
}
commands
}
fn generate_code_arithmetic(command: &ArithmeticCommand, mut code_gen: &mut CodeGen) {
fn binary_operator(command: &str, operator: &str, code_gen: &mut CodeGen) {
let s = format!(
"\
// {}\n\
@SP\n\
A = M\n\
A = A - 1\n\
A = A - 1\n\
D = M\n\
A = A + 1\n\
D = D {} M\n\
A = A - 1\n\
M = D\n\
@SP\n\
M = M - 1\n\n\
",
command, operator
);
code_gen.acc.push_str(&s);
}
fn unary_operator(command: &str, operator: &str, code_gen: &mut CodeGen) {
let s = format!(
"\
// {}\n\
@SP\n\
A = M\n\
A = A - 1\n\
M = {}M\n\n\
",
command, operator
);
code_gen.acc.push_str(&s)
}
fn comparison_operator(command: &str, operator: &str, code_gen: &mut CodeGen) {
code_gen.comp_counter += 1;
let s = format!(
"\
// {cmd}\n\
@SP\n\
A = M\n\
A = A - 1\n\
A = A - 1\n\
D = M\n\
A = A + 1\n\
D = D - M\n\
@IF_{op}_{index}\n\
D;{op}\n\
@ELSE_{op}_{index}\n\
0;JMP\n\
(IF_{op}_{index})\n\
D = -1\n\
@END_{op}_{index}\n\
0;JMP\n\
(ELSE_{op}_{index})\n\
D = 0\n\
(END_{op}_{index})\n\
@SP\n\
A = M\n\
A = A - 1\n\
A = A - 1\n\
M = D\n\
@SP\n\
M = M - 1\n\
\n",
cmd = command,
op = operator,
index = code_gen.comp_counter
);
code_gen.acc.push_str(&s);
}
use crate::ArithmeticCommand::*;
match command {
Add => binary_operator("add", "+", &mut code_gen),
Sub => binary_operator("sub", "-", &mut code_gen),
Neg => unary_operator("neg", "-", &mut code_gen),
Eq => comparison_operator("eq", "JEQ", &mut code_gen),
Gt => comparison_operator("gt", "JGT", &mut code_gen),
Lt => comparison_operator("lt", "JLT", &mut code_gen),
And => binary_operator("and", "&", &mut code_gen),
Or => binary_operator("or", "|", &mut code_gen),
Not => unary_operator("not", "!", &mut code_gen),
};
}
fn generate_code_memory_access(command: &MemoryAccessCommand, code_gen: &mut CodeGen) {
use crate::MemoryAccessType::*;
use crate::Segment::*;
fn pop_regular(segment_name: &str, segment_id: &str, index: &String, code_gen: &mut CodeGen) {
let s = format!(
"\
// pop {segment_name} {index}\n\
@{index}\n\
D = A\n\
@{segment_id}\n\
A = M\n\
D = D + A\n\
@R13\n\
M = D\n\
// ^ R13 = {segment_name} + index\n\
@SP\n\
A = M\n\
A = A - 1\n\
D = M\n\
@SP\n\
M = M - 1\n\
// ^ pop into D\n\
@R13\n\
A = M\n\
M = D\n\
// ^ *R13 = D\n\n\
",
segment_name = segment_name,
segment_id = segment_id,
index = index
);
code_gen.acc.push_str(&s);
}
fn push_regular(segment_name: &str, segment_id: &str, index: &String, code_gen: &mut CodeGen) {
let s = format!(
"\
// push {segment_name} {index}\n\
@{index}\n\
D = A\n\
@{segment_id}\n\
A = M\n\
A = D + A\n\
D = M\n\
// ^ D = *({segment_id} + index)\n\
@SP\n\
A = M\n\
M = D\n\
@SP\n\
M = M + 1\n\
// ^ push D\n\n\
",
segment_name = segment_name,
segment_id = segment_id,
index = index
);
code_gen.acc.push_str(&s);
}
fn push_constant(index: &String, code_gen: &mut CodeGen) {
let s = format!(
"\
// push constant {}\n\
@{}\n\
D = A\n\
@SP\n\
A = M\n\
M = D\n\
@SP\n\
M = M + 1\n\n\
",
index, index
);
code_gen.acc.push_str(&s);
}
fn pop_static(index: &String, code_gen: &mut CodeGen) {
let mut symbol = String::from(code_gen.vm_ref.as_str());
symbol.push_str(".");
symbol.push_str(index);
let s = format!("\
// pop static {symbol}\n\
@SP\n\
A = M\n\
A = A - 1\n\
D = M\n\
@SP\n\
M = M - 1\n\
// ^ pop into D\n\
@{symbol}\n\
M = D\n\
// ^ {symbol} = D\n\
\n", symbol=symbol);
code_gen.acc.push_str(&s);
}
fn push_static(index: &String, code_gen: &mut CodeGen) {
let mut symbol = String::from(code_gen.vm_ref.as_str());
symbol.push_str(".");
symbol.push_str(index);
let s = format!("\
// push static {symbol}\n\
@{symbol}\n\
D = M\n\
// ^ D = {symbol}\n\
@SP\n\
A = M\n\
M = D\n\
@SP\n\
M = M + 1\n\
// ^ push from D\n\
\n", symbol=symbol);
code_gen.acc.push_str(&s);
}
fn pop_temp(index: &String, code_gen: &mut CodeGen) {
let s = format!(
"\
// pop temp {index}\n\
@{index}\n\
D = A\n\
@5\n\
A = D + A\n\
D = A\n\
@R13\n\
M = D\n\
// ^ R13 = temp + index\n\
@SP\n\
A = M\n\
A = A - 1\n\
D = M\n\
@SP\n\
M = M - 1\n\
// ^ pop into D\n\
@R13\n\
A = M\n\
M = D\n\
// ^ *R13 = D\n\n\
",
index = index
);
code_gen.acc.push_str(&s);
}
fn push_temp(index: &String, code_gen: &mut CodeGen) {
let s = format!(
"\
// push temp {index}\n\
@{index}
D = A
@5
A = D + A
D = M
// ^ D = *(temp + index)\n\
@SP\n\
A = M\n\
M = D\n\
@SP\n\
M = M + 1\n\
// ^ push D\n\n\
",
index = index
);
code_gen.acc.push_str(&s);
}
fn push_pointer(index: &String, code_gen: &mut CodeGen) {
let segment = match index.as_str() {
"0" => "THIS",
"1" => "THAT",
_ => "INVALID"
};
let s = format!("\
// push pointer {segment}\n\
@{segment}\n\
D = M\n\
@SP\n\
A = M\n\
M = D\n\
@SP\n\
M = M + 1\n\
// ^ push {segment}\n\n",
segment=segment
);
code_gen.acc.push_str(&s);
}
fn pop_pointer(index: &String, code_gen: &mut CodeGen) {
let segment = match index.as_str() {
"0" => "THIS",
"1" => "THAT",
_ => "INVALID"
};
let s = format!("\
// pop pointer {segment}\n\
@SP\n\
A = M\n\
A = A - 1\n\
D = M\n\
@SP\n\
M = M - 1\n\
@{segment}\n\
M = D\n\
// ^ pop into {segment}\n\
\n",
segment=segment
);
code_gen.acc.push_str(&s);
}
match command {
MemoryAccessCommand { access_type: Push, segment: Constant, index }
=> push_constant(index, code_gen),
MemoryAccessCommand { access_type: Pop, segment: Local, index }
=> pop_regular("local", "LCL", index, code_gen),
MemoryAccessCommand { access_type: Push, segment: Local, index }
=> push_regular("local", "LCL", index, code_gen),
MemoryAccessCommand { access_type: Pop, segment: Argument, index }
=> pop_regular("argument", "ARG", index, code_gen),
MemoryAccessCommand { access_type: Push, segment: Argument, index }
=> push_regular("argument", "ARG", index, code_gen),
MemoryAccessCommand { access_type: Pop, segment: This, index }
=> pop_regular("this", "THIS", index, code_gen),
MemoryAccessCommand { access_type: Push, segment: This, index }
=> push_regular("this", "THIS", index, code_gen),
MemoryAccessCommand { access_type: Pop, segment: That, index }
=> pop_regular("that", "THAT", index, code_gen),
MemoryAccessCommand { access_type: Push, segment: That, index }
=> push_regular("that", "THAT", index, code_gen),
MemoryAccessCommand { access_type: Pop, segment: Static, index }
=> pop_static(index, code_gen),
MemoryAccessCommand { access_type: Push, segment: Static, index }
=> push_static(index, code_gen),
MemoryAccessCommand { access_type: Pop, segment: Temp, index }
=> pop_temp(index, code_gen),
MemoryAccessCommand { access_type: Push, segment: Temp, index }
=> push_temp(index, code_gen),
MemoryAccessCommand { access_type: Pop, segment: Pointer, index }
=> pop_pointer(index, code_gen),
MemoryAccessCommand { access_type: Push, segment: Pointer, index }
=> push_pointer(index, code_gen),
MemoryAccessCommand { access_type, segment, index,
} => {
let s = format!(
"// warning: {:?} {:?} {} not implemented.\n\n",
access_type, segment, index
);
code_gen.acc.push_str(&s);
}
}
}
fn generate_code_program_flow(command: &ProgramFlowCommand, code_gen: &mut CodeGen) {
fn get_flow_label(label: &String, code_gen: &mut CodeGen) -> std::string::String {
if code_gen.current_function != "" {
return format!("{}:{}", code_gen.current_function, label);
}
else {
return label.to_string();
}
}
use crate::ProgramFlowCommand::*;
match command {
Label(label) => {
let label = get_flow_label(label, code_gen);
let s = format!("// label {}\n({})\n\n", label, label);
code_gen.acc.push_str(&s);
},
Goto(label) => {
let label = get_flow_label(label, code_gen);
let s = format!("// goto {}\n@{}\n0;JMP\n\n", label, label);
code_gen.acc.push_str(&s);
},
IfGoto(label) => {
let label = get_flow_label(label, code_gen);
let s = format!("\
// if-goto {label}\n\
@SP\n\
A = M\n\
A = A - 1\n\
D = M\n\
@SP\n\
M = M - 1\n\
@{label}\n\
D;JNE\n\
", label=label);
code_gen.acc.push_str(&s);
}
}
}
fn generate_code_function_calling(command: &FunctionCallingCommand, code_gen: &mut CodeGen) {
use crate::FunctionCallingCommand::*;
fn call(function_name: &String, nargs: &String, code_gen: &mut CodeGen) {
let return_label = format!("{}:return:{}", function_name, code_gen.call_counter);
code_gen.call_counter += 1;
let s = format!(
"\
// call {function_name} {nargs}\n\
@{return_label}\n\
D = A\n\
@SP\n\
A = M\n\
M = D\n\
@SP\n\
M = M + 1\n\
// ^ push return-address\n\
@LCL\n\
D = M\n\
@SP\n\
A = M\n\
M = D\n\
@SP\n\
M = M + 1\n\
// ^ push lcl\n\
@ARG\n\
D = M\n\
@SP\n\
A = M\n\
M = D\n\
@SP\n\
M = M + 1\n\
// ^ push arg\n\
@THIS\n\
D = M\n\
@SP\n\
A = M\n\
M = D\n\
@SP\n\
M = M + 1\n\
// ^ push this\n\
@THAT\n\
D = M\n\
@SP\n\
A = M\n\
M = D\n\
@SP\n\
M = M + 1\n\
// ^ push that\n\
@SP\n\
D = M\n\
@{nargs}\n\
D = D - A\n\
@5\n\
D = D - A\n\
@ARG\n\
M = D\n\
// ^ ARG = SP - {nargs} - 5\n\
@SP\n\
D = M\n\
@LCL\n\
M = D\n\
// ^ LCL = SP\n\
@{function_name}\n\
0;JMP\n\
// ^ goto {function_name}\n\
({return_label})\n\
\n\n",
nargs=nargs,
function_name=function_name,
return_label=return_label,
);
code_gen.acc.push_str(&s);
}
fn function(function_name: &String, nargs: &String, code_gen: &mut CodeGen) {
code_gen.current_function = function_name.to_string();
let s = format!(
"\
// function {function_name} {nargs}\n\
({function_name})\n\
@0\n\
D = A\n\
",
function_name=function_name,
nargs=nargs,
);
code_gen.acc.push_str(&s);
let nargs: u32 = nargs.parse().unwrap();
for _ in 0..nargs {
code_gen.acc.push_str("@SP\nA = M\nM = D\n@SP\nM = M + 1\n// ^ push 0\n");
}
let s = format!("// ^ push 0 * {nargs}\n\n", nargs=nargs);
code_gen.acc.push_str(&s);
}
fn fnreturn(code_gen: &mut CodeGen) {
let s = format!(
"\
// return\n\
@LCL\n\
D = M\n\
@R13\n\
M = D\n\
// ^ R13 = FRAME = LCL\n\
@5\n\
D = A\n\
@R13\n\
A = M - D\n\
D = M\n\
@R14\n\
M = D\n\
// ^ R14 = RET = *(FRAME - 5)\n\
@SP\n\
A = M\n\
A = A - 1\n\
D = M\n\
@SP\n\
M = M - 1\n\
@ARG\n\
A = M\n\
M = D\n\
// ^ POP into *ARG\n\
@ARG\n\
D = M + 1\n\
@SP\n\
M = D\n\
// ^ SP = ARG + 1\n\
@1\n\
D = A\n\
@R13\n\
A = M - D\n\
D = M\n\
@THAT\n\
M = D\n\
// ^ THAT = *(FRAME - 1)\n\
@2\n\
D = A\n\
@R13\n\
A = M - D\n\
D = M\n\
@THIS\n\
M = D\n\
// ^ THIS = *(FRAME - 2)\n\
@3\n\
D = A\n\
@R13\n\
A = M - D\n\
D = M\n\
@ARG\n\
M = D\n\
// ^ ARG = *(FRAME - 3)\n\
@4\n\
D = A\n\
@R13\n\
A = M - D\n\
D = M\n\
@LCL\n\
M = D\n\
// ^ LCL = *(FRAME - 4)\n\
@R14\n\
A = M\n\
0;JMP\n\
// ^ goto RET\n\
\n\n",
);
code_gen.acc.push_str(&s);
}
match command {
Call { name, m } => call(name, m, code_gen),
Function { name, n, } => function(name, n, code_gen),
Return => fnreturn(code_gen),
};
}
fn generate_code_error(error: &String, code_gen: &mut CodeGen) {
code_gen.acc.push_str(error);
code_gen.acc.push_str("\n");
}
fn generate_code(commands: &Vec<Command>, mut code_gen: &mut CodeGen) {
use crate::Command::*;
for command in commands {
match command {
Arithmetic(c) => generate_code_arithmetic(c, &mut code_gen),
MemoryAccess(c) => generate_code_memory_access(c, &mut code_gen),
ProgramFlow(c) => generate_code_program_flow(c, &mut code_gen),
FunctionCalling(c) => generate_code_function_calling(c, &mut code_gen),
Error(string) => generate_code_error(string, &mut code_gen),
NoCommand => (),
};
}
}
fn generate_bootstrap_code(code_gen: &mut CodeGen) {
use crate::Command::FunctionCalling;
use crate::FunctionCallingCommand::Call;
let s = format!(
"\
// Bootstrap code\n\
@256\n\
D = A\n\
@SP\n\
M = D\n\
// ^ SP = 256\n\n");
code_gen.acc.push_str(&s);
let v = vec![FunctionCalling(Call { name: "Sys.init".to_string(), m: "0".to_string() })];
generate_code(&v, code_gen);
}
fn main() {
fn write_output_file(filename: &String, output: &String) {
let path = Path::new(&filename);
let display = path.display();
// Open a file in write-only mode, returns `io::Result<File>`
let mut file = match File::create(&path) {
Err(why) => panic!("Couldn't create {}: {}", display, why.to_string()),
Ok(file) => file,
};
match file.write_all(output.as_bytes()) {
Err(why) => panic!("Couldn't write to {}: {}", display, why.to_string()),
Ok(_) => println!("<= {}", display),
}
}
fn filename_to_vm_ref(filename: &String) -> String {
let p = Path::new(filename);
p.file_stem().and_then(OsStr::to_str).unwrap().to_string()
}
fn translate_single_file(filename: &String) {
let mut code_gen = CodeGen {
acc: String::new(),
comp_counter: 0,
call_counter: 0,
vm_ref: filename_to_vm_ref(filename),
current_function: String::new(),
};
let commands = parse_file(filename);
generate_code(&commands, &mut code_gen);
let filename = str::replace(filename, ".vm", ".asm");
write_output_file(&filename, &code_gen.acc);
}
fn translate_file(filename: &String, mut code_gen: &mut CodeGen) {
let commands = parse_file(filename);
generate_code(&commands, &mut code_gen);
}
fn translate_dir(dirname: &String) {
println!("=> {}", dirname);
let mut code_gen = CodeGen {
acc: String::new(),
comp_counter: 0,
call_counter: 0,
vm_ref: String::new(),
current_function: String::new(),
};
generate_bootstrap_code(&mut code_gen);
let paths = fs::read_dir(dirname).unwrap();
for path in paths {
let filename = path.unwrap().path().to_str().unwrap().to_string();
if is_vm_file(&filename) {
println!(" -> {}", filename);
let vm_ref = filename_to_vm_ref(&filename);
let s = format!("// Start {}\n\n", filename);
code_gen.vm_ref = vm_ref;
code_gen.current_function = "".to_string();
code_gen.acc.push_str(&s);
translate_file(&filename, &mut code_gen);
}
}
let filename = format!("{}/{}.asm", dirname, filename_to_vm_ref(dirname));
write_output_file(&filename, &code_gen.acc);
}
fn is_vm_file(filename: &String) -> bool {
let p = Path::new(filename);
if p.is_file() && (p.extension().unwrap() == OsStr::new("vm")) {
return true;
}
return false;
}
let args: Vec<String> = env::args().collect();
for arg in &args[1..] {
if is_vm_file(arg) {
translate_single_file(&arg);
} else if Path::new(arg).is_dir() {
translate_dir(&arg);
} else {
println!("{} is not a *.vm file or directory!", arg);
}
}
}