Add VM translator, Jack analyzer and compiler

This commit is contained in:
2020-11-15 15:53:24 -05:00
parent f2a0b6d531
commit fb224f31ed
12 changed files with 2708 additions and 0 deletions

10
jack_analyzer/Cargo.toml Normal file
View File

@@ -0,0 +1,10 @@
[package]
name = "jack_analyzer"
version = "0.1.0"
authors = ["Felix Martin <mail@felixm.de>"]
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
xmltree = "^0"

122
jack_analyzer/src/main.rs Normal file
View File

@@ -0,0 +1,122 @@
mod parser;
mod tokenizer;
use std::env;
use std::ffi::OsStr;
use std::fs;
use std::fs::File;
use std::path::Path;
use xmltree;
use xmltree::Element;
use xmltree::EmitterConfig;
use xmltree::XMLNode;
pub fn create_node(token: &tokenizer::Token) -> xmltree::XMLNode {
use tokenizer::Token::*;
let (tag, text) = match token {
Keyword(text) => ("keyword", text),
Symbol(text) => ("symbol", text),
Identifier(text) => ("identifier", text),
IntConst(text) => ("integerConstant", text),
StringConst(text) => ("stringConstant", text),
};
let mut e = xmltree::Element::new(tag);
let text = match text.as_str() {
"<" => "&lt;",
">" => "&gt;",
"\"" => "&quot;",
"&" => "&amp;",
"'" => "&apos;",
_ => text.as_str(),
};
let t = XMLNode::Text(text.to_string());
e.children.push(t);
return xmltree::XMLNode::Element(e);
}
fn main() {
fn is_jack_file(filename: &Path) -> bool {
let p = Path::new(filename);
if p.is_file() && (p.extension().unwrap() == OsStr::new("jack")) {
return true;
}
return false;
}
fn translate_dir(directory: &Path) {
let paths = fs::read_dir(directory).unwrap();
for path in paths {
let filename = path.unwrap().path();
if is_jack_file(&filename) {
translate_single_file(filename.as_path())
}
}
}
fn write_xml(file: &String, root: xmltree::Element) {
let mut config = EmitterConfig::new();
config.perform_indent = true;
// With xmltree 0.1.0
// config.perform_escaping = true;
// does not work when perform_indent is also true.
// Therefore I am escaping manually and setting this to false.
config.perform_escaping = false;
config.normalize_empty_elements = false;
let f = File::create(file.to_string()).unwrap();
root.write_with_config(f, config).unwrap();
// With xmltree 0.1.0
// config.write_document_declaration = false;
// did not work so I am removing that line manually.
remove_first_line(&file);
}
fn write_token_xml(input_file: &Path, tokens: &Vec<tokenizer::Token>) {
let mut root = Element::new("tokens");
for token in tokens {
let n = create_node(token);
root.children.push(n);
}
let input_str = input_file.to_str().unwrap();
let output_file = str::replace(input_str, ".jack", "T.xml");
write_xml(&output_file, root);
}
fn write_parse_tree_xml(input_file: &Path, root: xmltree::Element) {
let input_str = input_file.to_str().unwrap();
let output_file = str::replace(input_str, ".jack", ".xml");
write_xml(&output_file, root);
}
fn remove_first_line(filename: &String) {
let contents = fs::read_to_string(filename).expect("Unable to read file");
let mut lines = contents.lines();
let mut output = String::new();
lines.next(); // skipping the first line
for line in lines {
output.push_str(line);
output.push_str("\n");
}
fs::write(filename, output).expect("Unable to write file");
}
fn translate_single_file(input_file: &Path) {
let tokens = tokenizer::tokenize_file(input_file);
write_token_xml(input_file, &tokens);
let parse_tree = parser::parse_tokens(tokens);
write_parse_tree_xml(input_file, parse_tree);
}
let args: Vec<String> = env::args().collect();
for arg in &args[1..] {
let arg_path = Path::new(arg);
if is_jack_file(&arg_path) {
translate_single_file(&arg_path);
} else if arg_path.is_dir() {
translate_dir(&arg_path);
} else {
println!("{} is not a *.jack file or directory!", arg);
}
}
}

412
jack_analyzer/src/parser.rs Normal file
View File

@@ -0,0 +1,412 @@
use crate::create_node;
use crate::tokenizer::Token;
use crate::tokenizer::Token::*;
use xmltree::Element;
type Tokens<'a> = std::iter::Peekable<std::slice::Iter<'a, Token>>;
pub fn parse_tokens(tokens: Vec<Token>) -> Element {
let mut tokens: Tokens = tokens.iter().peekable();
let mut root = Element::new("class");
compile_class(&mut tokens, &mut root);
if tokens.len() != 0 {
println!("Did not parse all tokens!");
}
root
}
fn eat_specific_keyword_token(tokens: &mut Tokens, expected: &str, tree: &mut Element) {
let token = tokens.next().unwrap();
match token {
Keyword(string) if string == expected => tree.children.push(create_node(token)),
t => println!("Unexpected {:?} - Keyword(\"{}\")", t, expected),
}
}
fn eat_specific_symbol_token(tokens: &mut Tokens, expected: &str, tree: &mut Element) {
let token = tokens.next().unwrap();
match token {
Symbol(string) if string == expected => tree.children.push(create_node(token)),
t => println!("Unexpected {:?} - Symbol(\"{}\")", t, expected),
}
}
fn eat_identifier_token(tokens: &mut Tokens, tree: &mut Element) {
let token = tokens.next().unwrap();
match token {
Identifier(_) => tree.children.push(create_node(token)),
t => println!("Unexpected {:?} - Identifier(_)", t),
}
}
fn eat_keyword_token(tokens: &mut Tokens, tree: &mut Element) {
let token = tokens.next().unwrap();
match token {
Keyword(_) => tree.children.push(create_node(token)),
t => println!("Unexpected {:?} - Keyword(_)", t),
}
}
fn eat_type_token(tokens: &mut Tokens, tree: &mut Element) {
let token = tokens.peek().unwrap();
match token {
Keyword(_) => eat_keyword_token(tokens, tree),
Identifier(_) => eat_identifier_token(tokens, tree),
t => println!("Unexpected {:?} - Keyword(type)/Identifier(_)", t),
}
}
fn compile_class(tokens: &mut Tokens, tree: &mut Element) {
eat_specific_keyword_token(tokens, "class", tree);
eat_identifier_token(tokens, tree);
eat_specific_symbol_token(tokens, "{", tree);
// classVarDec*
loop {
match tokens.peek().unwrap() {
Keyword(string) if string == "static" || string == "field" => {
let mut child_tree = Element::new("classVarDec");
compile_class_var_dec(tokens, &mut child_tree);
tree.children.push(xmltree::XMLNode::Element(child_tree));
}
_ => break,
}
}
// subroutineDec*
loop {
match tokens.peek().unwrap() {
Keyword(string)
if string == "constructor" || string == "function" || string == "method" =>
{
let mut child_tree = Element::new("subroutineDec");
compile_subroutine(tokens, &mut child_tree);
tree.children.push(xmltree::XMLNode::Element(child_tree));
}
_ => break,
}
}
eat_specific_symbol_token(tokens, "}", tree);
}
fn compile_class_var_dec(tokens: &mut Tokens, tree: &mut Element) {
eat_keyword_token(tokens, tree);
eat_type_token(tokens, tree);
eat_identifier_token(tokens, tree);
loop {
match tokens.peek().unwrap() {
Symbol(s) if s == "," => {
eat_specific_symbol_token(tokens, ",", tree);
eat_identifier_token(tokens, tree);
}
_ => break,
}
}
eat_specific_symbol_token(tokens, ";", tree);
}
fn compile_subroutine(tokens: &mut Tokens, tree: &mut Element) {
eat_keyword_token(tokens, tree);
eat_type_token(tokens, tree);
eat_identifier_token(tokens, tree);
eat_specific_symbol_token(tokens, "(", tree);
let mut child_tree = Element::new("parameterList");
compile_parameter_list(tokens, &mut child_tree);
tree.children.push(xmltree::XMLNode::Element(child_tree));
eat_specific_symbol_token(tokens, ")", tree);
let mut child_tree = Element::new("subroutineBody");
compile_subroutine_body(tokens, &mut child_tree);
tree.children.push(xmltree::XMLNode::Element(child_tree));
}
fn compile_parameter_list(tokens: &mut Tokens, tree: &mut Element) {
match tokens.peek().unwrap() {
Symbol(s) if s == ")" => return,
_ => (),
}
eat_type_token(tokens, tree);
eat_identifier_token(tokens, tree);
loop {
match tokens.peek().unwrap() {
Symbol(s) if s == "," => {
eat_specific_symbol_token(tokens, ",", tree);
eat_type_token(tokens, tree);
eat_identifier_token(tokens, tree);
}
_ => break,
}
}
}
fn compile_subroutine_body(tokens: &mut Tokens, tree: &mut Element) {
eat_specific_symbol_token(tokens, "{", tree);
loop {
match tokens.peek().unwrap() {
Keyword(s) if s == "var" => {
let mut child_tree = Element::new("varDec");
compile_var_dec(tokens, &mut child_tree);
tree.children.push(xmltree::XMLNode::Element(child_tree));
}
_ => break,
};
}
let mut child_tree = Element::new("statements");
compile_statements(tokens, &mut child_tree);
tree.children.push(xmltree::XMLNode::Element(child_tree));
eat_specific_symbol_token(tokens, "}", tree);
}
fn compile_var_dec(tokens: &mut Tokens, tree: &mut Element) {
eat_specific_keyword_token(tokens, "var", tree);
eat_type_token(tokens, tree);
eat_identifier_token(tokens, tree);
loop {
match tokens.peek().unwrap() {
Symbol(s) if s == "," => {
eat_specific_symbol_token(tokens, ",", tree);
eat_identifier_token(tokens, tree);
}
_ => break,
}
}
eat_specific_symbol_token(tokens, ";", tree);
}
fn compile_statements(tokens: &mut Tokens, tree: &mut Element) {
loop {
match tokens.peek().unwrap() {
Keyword(s) if s == "do" => {
let mut child_tree = Element::new("doStatement");
compile_do(tokens, &mut child_tree);
tree.children.push(xmltree::XMLNode::Element(child_tree));
}
Keyword(s) if s == "let" => {
let mut child_tree = Element::new("letStatement");
compile_let(tokens, &mut child_tree);
tree.children.push(xmltree::XMLNode::Element(child_tree));
}
Keyword(s) if s == "while" => {
let mut child_tree = Element::new("whileStatement");
compile_while(tokens, &mut child_tree);
tree.children.push(xmltree::XMLNode::Element(child_tree));
}
Keyword(s) if s == "return" => {
let mut child_tree = Element::new("returnStatement");
compile_return(tokens, &mut child_tree);
tree.children.push(xmltree::XMLNode::Element(child_tree));
}
Keyword(s) if s == "if" => {
let mut child_tree = Element::new("ifStatement");
compile_if(tokens, &mut child_tree);
tree.children.push(xmltree::XMLNode::Element(child_tree));
}
_ => break,
}
}
}
fn compile_do(tokens: &mut Tokens, mut tree: &mut Element) {
eat_specific_keyword_token(tokens, "do", tree);
eat_identifier_token(tokens, &mut tree);
compile_subroutine_call(tokens, &mut tree);
eat_specific_symbol_token(tokens, ";", tree);
}
fn compile_let(tokens: &mut Tokens, tree: &mut Element) {
eat_specific_keyword_token(tokens, "let", tree);
eat_identifier_token(tokens, tree);
match tokens.peek().unwrap() {
Symbol(s) if s == "[" => {
eat_specific_symbol_token(tokens, "[", tree);
let mut child_tree = Element::new("expression");
compile_expression(tokens, &mut child_tree);
tree.children.push(xmltree::XMLNode::Element(child_tree));
eat_specific_symbol_token(tokens, "]", tree);
}
_ => (),
}
eat_specific_symbol_token(tokens, "=", tree);
let mut child_tree = Element::new("expression");
compile_expression(tokens, &mut child_tree);
tree.children.push(xmltree::XMLNode::Element(child_tree));
eat_specific_symbol_token(tokens, ";", tree);
}
fn compile_while(tokens: &mut Tokens, tree: &mut Element) {
eat_specific_keyword_token(tokens, "while", tree);
eat_specific_symbol_token(tokens, "(", tree);
let mut child_tree = Element::new("expression");
compile_expression(tokens, &mut child_tree);
tree.children.push(xmltree::XMLNode::Element(child_tree));
eat_specific_symbol_token(tokens, ")", tree);
eat_specific_symbol_token(tokens, "{", tree);
let mut child_tree = Element::new("statements");
compile_statements(tokens, &mut child_tree);
tree.children.push(xmltree::XMLNode::Element(child_tree));
eat_specific_symbol_token(tokens, "}", tree);
}
fn compile_return(tokens: &mut Tokens, tree: &mut Element) {
eat_specific_keyword_token(tokens, "return", tree);
match tokens.peek().unwrap() {
Symbol(s) if s == ";" => (),
_ => {
let mut child_tree = Element::new("expression");
compile_expression(tokens, &mut child_tree);
tree.children.push(xmltree::XMLNode::Element(child_tree));
}
}
eat_specific_symbol_token(tokens, ";", tree);
}
fn compile_if(tokens: &mut Tokens, tree: &mut Element) {
eat_specific_keyword_token(tokens, "if", tree);
eat_specific_symbol_token(tokens, "(", tree);
let mut child_tree = Element::new("expression");
compile_expression(tokens, &mut child_tree);
tree.children.push(xmltree::XMLNode::Element(child_tree));
eat_specific_symbol_token(tokens, ")", tree);
eat_specific_symbol_token(tokens, "{", tree);
let mut child_tree = Element::new("statements");
compile_statements(tokens, &mut child_tree);
tree.children.push(xmltree::XMLNode::Element(child_tree));
eat_specific_symbol_token(tokens, "}", tree);
match tokens.peek().unwrap() {
Keyword(s) if s == "else" => {
eat_specific_keyword_token(tokens, "else", tree);
eat_specific_symbol_token(tokens, "{", tree);
let mut child_tree = Element::new("statements");
compile_statements(tokens, &mut child_tree);
tree.children.push(xmltree::XMLNode::Element(child_tree));
eat_specific_symbol_token(tokens, "}", tree);
}
_ => (),
}
}
fn compile_expression(tokens: &mut Tokens, tree: &mut Element) {
let mut child_tree = Element::new("term");
compile_term(tokens, &mut child_tree);
tree.children.push(xmltree::XMLNode::Element(child_tree));
loop {
match tokens.peek().unwrap() {
Symbol(s) if OPERATORS.contains(&s.chars().next().unwrap()) => {
eat_specific_symbol_token(tokens, s, tree);
let mut child_tree = Element::new("term");
compile_term(tokens, &mut child_tree);
tree.children.push(xmltree::XMLNode::Element(child_tree));
}
_ => break,
}
}
}
fn compile_term(tokens: &mut Tokens, mut tree: &mut Element) {
let token = tokens.next().unwrap();
match token {
Identifier(_) => match tokens.peek().unwrap() {
Symbol(s) if s == "[" => {
tree.children.push(create_node(token));
eat_specific_symbol_token(tokens, "[", tree);
let mut child_tree = Element::new("expression");
compile_expression(tokens, &mut child_tree);
tree.children.push(xmltree::XMLNode::Element(child_tree));
eat_specific_symbol_token(tokens, "]", tree);
}
Symbol(s) if s == "(" || s == "." => {
// let mut child_tree = Element::new("subroutineCall");
tree.children.push(create_node(token));
compile_subroutine_call(tokens, &mut tree);
// tree.children.push(xmltree::XMLNode::Element(child_tree));
}
_ => tree.children.push(create_node(token)),
},
IntConst(_) => tree.children.push(create_node(token)),
StringConst(_) => tree.children.push(create_node(token)),
Keyword(s) if s == "true" || s == "false" || s == "null" || s == "this" => {
tree.children.push(create_node(token));
}
Symbol(s) if s == "-" || s == "~" => {
tree.children.push(create_node(token));
let mut child_tree = Element::new("term");
compile_term(tokens, &mut child_tree);
tree.children.push(xmltree::XMLNode::Element(child_tree));
}
Symbol(s) if s == "(" => {
tree.children.push(create_node(token));
let mut child_tree = Element::new("expression");
compile_expression(tokens, &mut child_tree);
tree.children.push(xmltree::XMLNode::Element(child_tree));
eat_specific_symbol_token(tokens, ")", tree);
}
_ => (),
}
}
fn compile_expression_list(tokens: &mut Tokens, tree: &mut Element) {
match tokens.peek().unwrap() {
Symbol(s) if s == ")" => return,
_ => (),
}
let mut child_tree = Element::new("expression");
compile_expression(tokens, &mut child_tree);
tree.children.push(xmltree::XMLNode::Element(child_tree));
loop {
match tokens.peek().unwrap() {
Symbol(s) if s == "," => {
eat_specific_symbol_token(tokens, ",", tree);
let mut child_tree = Element::new("expression");
compile_expression(tokens, &mut child_tree);
tree.children.push(xmltree::XMLNode::Element(child_tree));
}
_ => break,
}
}
}
fn compile_subroutine_call(tokens: &mut Tokens, mut tree: &mut Element) {
match tokens.peek().unwrap() {
Symbol(s) if s == "." => {
eat_specific_symbol_token(tokens, ".", &mut tree);
eat_identifier_token(tokens, &mut tree);
eat_specific_symbol_token(tokens, "(", &mut tree);
let mut child_tree = Element::new("expressionList");
compile_expression_list(tokens, &mut child_tree);
tree.children.push(xmltree::XMLNode::Element(child_tree));
eat_specific_symbol_token(tokens, ")", &mut tree);
}
Symbol(s) if s == "(" => {
eat_specific_symbol_token(tokens, "(", &mut tree);
let mut child_tree = Element::new("expressionList");
compile_expression_list(tokens, &mut child_tree);
tree.children.push(xmltree::XMLNode::Element(child_tree));
eat_specific_symbol_token(tokens, ")", &mut tree);
}
_ => (),
}
}
const OPERATORS: &[char] = &['+', '-', '*', '/', '&', '|', '<', '>', '='];

View File

@@ -0,0 +1,177 @@
use std::fs;
use std::path::Path;
fn eat_comment(chars: &Vec<char>, index: usize) -> usize {
let start_index = index;
let mut index = index;
if chars[index] == '/' && chars[index + 1] == '/' {
index += 2;
while chars[index] != '\n' {
index += 1;
}
index += 1;
} else if chars[index] == '/' && chars[index + 1] == '*' {
index += 2;
while !(chars[index] == '*' && chars[index + 1] == '/') {
index += 1;
}
index += 2;
}
if start_index != index {
// print_vector_slice(chars, start_index, index);
return eat_comment(chars, index);
}
return index;
}
fn parse_symbol(chars: &Vec<char>, tokens: &mut Vec<Token>, index: usize) -> usize {
let mut index = index;
let c = chars[index];
if SYMBOLS.contains(&c) {
index += 1;
let t = Token::Symbol(c.to_string());
tokens.push(t);
}
return index;
}
fn parse_integer_constant(chars: &Vec<char>, tokens: &mut Vec<Token>, index: usize) -> usize {
let start_index = index;
let mut index = index;
let mut number = String::new();
while chars[index].is_ascii_digit() {
number.push(chars[index]);
index += 1;
}
if start_index == index {
return index;
}
let t = Token::IntConst(number);
tokens.push(t);
return index;
}
fn parse_string_constant(chars: &Vec<char>, tokens: &mut Vec<Token>, index: usize) -> usize {
let start_index = index;
let mut index = index;
if chars[index] != '"' {
return index;
}
index += 1;
while chars[index] != '"' {
index += 1;
}
index += 1;
let s = chars[start_index + 1..index - 1].into_iter().collect();
let t = Token::StringConst(s);
tokens.push(t);
return index;
}
fn parse_keyword_or_identifier(chars: &Vec<char>, tokens: &mut Vec<Token>, index: usize) -> usize {
let mut index = index;
let mut token_string = String::new();
if !chars[index].is_ascii_alphabetic() {
return index;
}
token_string.push(chars[index]);
index += 1;
while chars[index].is_alphanumeric() {
token_string.push(chars[index]);
index += 1;
}
if KEYWORDS.contains(&token_string.as_str()) {
let t = Token::Keyword(token_string);
tokens.push(t);
} else {
let t = Token::Identifier(token_string);
tokens.push(t);
}
return index;
}
pub fn tokenize_file(file: &Path) -> Vec<Token> {
let mut tokens = vec![];
let chars: Vec<char> = fs::read_to_string(file).unwrap().chars().collect();
let length = chars.len();
let mut index: usize = 0;
while index < length {
index = eat_comment(&chars, index);
let c = chars[index];
if c.is_whitespace() {
index += 1;
} else if SYMBOLS.contains(&c) {
index = parse_symbol(&chars, &mut tokens, index);
} else if c.is_ascii_alphabetic() {
index = parse_keyword_or_identifier(&chars, &mut tokens, index);
} else if c.is_ascii_digit() {
index = parse_integer_constant(&chars, &mut tokens, index);
} else if c == '"' {
index = parse_string_constant(&chars, &mut tokens, index);
} else {
println!("Unexpected char {:?}", c);
index += 1;
}
}
return tokens;
}
#[derive(Debug)]
pub enum Token {
Keyword(String),
Symbol(String),
Identifier(String),
IntConst(String),
StringConst(String),
}
#[allow(dead_code)]
const KEYWORDS: &[&str] = &[
"class",
"constructor",
"function",
"method",
"field",
"static",
"var",
"int",
"char",
"boolean",
"void",
"true",
"false",
"null",
"this",
"let",
"do",
"if",
"else",
"while",
"return",
];
const SYMBOLS: &[char] = &[
'{', '}', '(', ')', '[', ']', '.', ',', ';', '+', '-', '*', '/', '&', '|', '<', '>', '=', '~',
];
#[allow(dead_code)]
fn print_vector_slice(chars: &Vec<char>, start: usize, stop: usize) {
let s: String = chars[start..stop].into_iter().collect();
println!("{:?}", s);
}