Add VM translator, Jack analyzer and compiler
This commit is contained in:
10
jack_analyzer/Cargo.toml
Normal file
10
jack_analyzer/Cargo.toml
Normal file
@@ -0,0 +1,10 @@
|
||||
[package]
|
||||
name = "jack_analyzer"
|
||||
version = "0.1.0"
|
||||
authors = ["Felix Martin <mail@felixm.de>"]
|
||||
edition = "2018"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
xmltree = "^0"
|
||||
122
jack_analyzer/src/main.rs
Normal file
122
jack_analyzer/src/main.rs
Normal file
@@ -0,0 +1,122 @@
|
||||
mod parser;
|
||||
mod tokenizer;
|
||||
use std::env;
|
||||
use std::ffi::OsStr;
|
||||
use std::fs;
|
||||
use std::fs::File;
|
||||
use std::path::Path;
|
||||
use xmltree;
|
||||
use xmltree::Element;
|
||||
use xmltree::EmitterConfig;
|
||||
use xmltree::XMLNode;
|
||||
|
||||
pub fn create_node(token: &tokenizer::Token) -> xmltree::XMLNode {
|
||||
use tokenizer::Token::*;
|
||||
let (tag, text) = match token {
|
||||
Keyword(text) => ("keyword", text),
|
||||
Symbol(text) => ("symbol", text),
|
||||
Identifier(text) => ("identifier", text),
|
||||
IntConst(text) => ("integerConstant", text),
|
||||
StringConst(text) => ("stringConstant", text),
|
||||
};
|
||||
let mut e = xmltree::Element::new(tag);
|
||||
let text = match text.as_str() {
|
||||
"<" => "<",
|
||||
">" => ">",
|
||||
"\"" => """,
|
||||
"&" => "&",
|
||||
"'" => "'",
|
||||
_ => text.as_str(),
|
||||
};
|
||||
let t = XMLNode::Text(text.to_string());
|
||||
e.children.push(t);
|
||||
return xmltree::XMLNode::Element(e);
|
||||
}
|
||||
|
||||
fn main() {
|
||||
fn is_jack_file(filename: &Path) -> bool {
|
||||
let p = Path::new(filename);
|
||||
if p.is_file() && (p.extension().unwrap() == OsStr::new("jack")) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
fn translate_dir(directory: &Path) {
|
||||
let paths = fs::read_dir(directory).unwrap();
|
||||
for path in paths {
|
||||
let filename = path.unwrap().path();
|
||||
if is_jack_file(&filename) {
|
||||
translate_single_file(filename.as_path())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn write_xml(file: &String, root: xmltree::Element) {
|
||||
let mut config = EmitterConfig::new();
|
||||
config.perform_indent = true;
|
||||
// With xmltree 0.1.0
|
||||
// config.perform_escaping = true;
|
||||
// does not work when perform_indent is also true.
|
||||
// Therefore I am escaping manually and setting this to false.
|
||||
config.perform_escaping = false;
|
||||
config.normalize_empty_elements = false;
|
||||
let f = File::create(file.to_string()).unwrap();
|
||||
root.write_with_config(f, config).unwrap();
|
||||
// With xmltree 0.1.0
|
||||
// config.write_document_declaration = false;
|
||||
// did not work so I am removing that line manually.
|
||||
remove_first_line(&file);
|
||||
}
|
||||
|
||||
fn write_token_xml(input_file: &Path, tokens: &Vec<tokenizer::Token>) {
|
||||
let mut root = Element::new("tokens");
|
||||
|
||||
for token in tokens {
|
||||
let n = create_node(token);
|
||||
root.children.push(n);
|
||||
}
|
||||
|
||||
let input_str = input_file.to_str().unwrap();
|
||||
let output_file = str::replace(input_str, ".jack", "T.xml");
|
||||
write_xml(&output_file, root);
|
||||
}
|
||||
|
||||
fn write_parse_tree_xml(input_file: &Path, root: xmltree::Element) {
|
||||
let input_str = input_file.to_str().unwrap();
|
||||
let output_file = str::replace(input_str, ".jack", ".xml");
|
||||
write_xml(&output_file, root);
|
||||
}
|
||||
|
||||
fn remove_first_line(filename: &String) {
|
||||
let contents = fs::read_to_string(filename).expect("Unable to read file");
|
||||
let mut lines = contents.lines();
|
||||
let mut output = String::new();
|
||||
lines.next(); // skipping the first line
|
||||
for line in lines {
|
||||
output.push_str(line);
|
||||
output.push_str("\n");
|
||||
}
|
||||
fs::write(filename, output).expect("Unable to write file");
|
||||
}
|
||||
|
||||
fn translate_single_file(input_file: &Path) {
|
||||
let tokens = tokenizer::tokenize_file(input_file);
|
||||
write_token_xml(input_file, &tokens);
|
||||
let parse_tree = parser::parse_tokens(tokens);
|
||||
write_parse_tree_xml(input_file, parse_tree);
|
||||
}
|
||||
|
||||
let args: Vec<String> = env::args().collect();
|
||||
|
||||
for arg in &args[1..] {
|
||||
let arg_path = Path::new(arg);
|
||||
if is_jack_file(&arg_path) {
|
||||
translate_single_file(&arg_path);
|
||||
} else if arg_path.is_dir() {
|
||||
translate_dir(&arg_path);
|
||||
} else {
|
||||
println!("{} is not a *.jack file or directory!", arg);
|
||||
}
|
||||
}
|
||||
}
|
||||
412
jack_analyzer/src/parser.rs
Normal file
412
jack_analyzer/src/parser.rs
Normal file
@@ -0,0 +1,412 @@
|
||||
use crate::create_node;
|
||||
use crate::tokenizer::Token;
|
||||
use crate::tokenizer::Token::*;
|
||||
use xmltree::Element;
|
||||
|
||||
type Tokens<'a> = std::iter::Peekable<std::slice::Iter<'a, Token>>;
|
||||
|
||||
pub fn parse_tokens(tokens: Vec<Token>) -> Element {
|
||||
let mut tokens: Tokens = tokens.iter().peekable();
|
||||
let mut root = Element::new("class");
|
||||
compile_class(&mut tokens, &mut root);
|
||||
if tokens.len() != 0 {
|
||||
println!("Did not parse all tokens!");
|
||||
}
|
||||
root
|
||||
}
|
||||
|
||||
fn eat_specific_keyword_token(tokens: &mut Tokens, expected: &str, tree: &mut Element) {
|
||||
let token = tokens.next().unwrap();
|
||||
match token {
|
||||
Keyword(string) if string == expected => tree.children.push(create_node(token)),
|
||||
t => println!("Unexpected {:?} - Keyword(\"{}\")", t, expected),
|
||||
}
|
||||
}
|
||||
|
||||
fn eat_specific_symbol_token(tokens: &mut Tokens, expected: &str, tree: &mut Element) {
|
||||
let token = tokens.next().unwrap();
|
||||
match token {
|
||||
Symbol(string) if string == expected => tree.children.push(create_node(token)),
|
||||
t => println!("Unexpected {:?} - Symbol(\"{}\")", t, expected),
|
||||
}
|
||||
}
|
||||
|
||||
fn eat_identifier_token(tokens: &mut Tokens, tree: &mut Element) {
|
||||
let token = tokens.next().unwrap();
|
||||
match token {
|
||||
Identifier(_) => tree.children.push(create_node(token)),
|
||||
t => println!("Unexpected {:?} - Identifier(_)", t),
|
||||
}
|
||||
}
|
||||
|
||||
fn eat_keyword_token(tokens: &mut Tokens, tree: &mut Element) {
|
||||
let token = tokens.next().unwrap();
|
||||
match token {
|
||||
Keyword(_) => tree.children.push(create_node(token)),
|
||||
t => println!("Unexpected {:?} - Keyword(_)", t),
|
||||
}
|
||||
}
|
||||
|
||||
fn eat_type_token(tokens: &mut Tokens, tree: &mut Element) {
|
||||
let token = tokens.peek().unwrap();
|
||||
match token {
|
||||
Keyword(_) => eat_keyword_token(tokens, tree),
|
||||
Identifier(_) => eat_identifier_token(tokens, tree),
|
||||
t => println!("Unexpected {:?} - Keyword(type)/Identifier(_)", t),
|
||||
}
|
||||
}
|
||||
|
||||
fn compile_class(tokens: &mut Tokens, tree: &mut Element) {
|
||||
eat_specific_keyword_token(tokens, "class", tree);
|
||||
eat_identifier_token(tokens, tree);
|
||||
eat_specific_symbol_token(tokens, "{", tree);
|
||||
|
||||
// classVarDec*
|
||||
loop {
|
||||
match tokens.peek().unwrap() {
|
||||
Keyword(string) if string == "static" || string == "field" => {
|
||||
let mut child_tree = Element::new("classVarDec");
|
||||
compile_class_var_dec(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
|
||||
// subroutineDec*
|
||||
loop {
|
||||
match tokens.peek().unwrap() {
|
||||
Keyword(string)
|
||||
if string == "constructor" || string == "function" || string == "method" =>
|
||||
{
|
||||
let mut child_tree = Element::new("subroutineDec");
|
||||
compile_subroutine(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
eat_specific_symbol_token(tokens, "}", tree);
|
||||
}
|
||||
|
||||
fn compile_class_var_dec(tokens: &mut Tokens, tree: &mut Element) {
|
||||
eat_keyword_token(tokens, tree);
|
||||
eat_type_token(tokens, tree);
|
||||
eat_identifier_token(tokens, tree);
|
||||
loop {
|
||||
match tokens.peek().unwrap() {
|
||||
Symbol(s) if s == "," => {
|
||||
eat_specific_symbol_token(tokens, ",", tree);
|
||||
eat_identifier_token(tokens, tree);
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
eat_specific_symbol_token(tokens, ";", tree);
|
||||
}
|
||||
|
||||
fn compile_subroutine(tokens: &mut Tokens, tree: &mut Element) {
|
||||
eat_keyword_token(tokens, tree);
|
||||
eat_type_token(tokens, tree);
|
||||
eat_identifier_token(tokens, tree);
|
||||
eat_specific_symbol_token(tokens, "(", tree);
|
||||
|
||||
let mut child_tree = Element::new("parameterList");
|
||||
compile_parameter_list(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
|
||||
eat_specific_symbol_token(tokens, ")", tree);
|
||||
|
||||
let mut child_tree = Element::new("subroutineBody");
|
||||
compile_subroutine_body(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
}
|
||||
|
||||
fn compile_parameter_list(tokens: &mut Tokens, tree: &mut Element) {
|
||||
match tokens.peek().unwrap() {
|
||||
Symbol(s) if s == ")" => return,
|
||||
_ => (),
|
||||
}
|
||||
eat_type_token(tokens, tree);
|
||||
eat_identifier_token(tokens, tree);
|
||||
|
||||
loop {
|
||||
match tokens.peek().unwrap() {
|
||||
Symbol(s) if s == "," => {
|
||||
eat_specific_symbol_token(tokens, ",", tree);
|
||||
eat_type_token(tokens, tree);
|
||||
eat_identifier_token(tokens, tree);
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn compile_subroutine_body(tokens: &mut Tokens, tree: &mut Element) {
|
||||
eat_specific_symbol_token(tokens, "{", tree);
|
||||
|
||||
loop {
|
||||
match tokens.peek().unwrap() {
|
||||
Keyword(s) if s == "var" => {
|
||||
let mut child_tree = Element::new("varDec");
|
||||
compile_var_dec(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
}
|
||||
_ => break,
|
||||
};
|
||||
}
|
||||
|
||||
let mut child_tree = Element::new("statements");
|
||||
compile_statements(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
|
||||
eat_specific_symbol_token(tokens, "}", tree);
|
||||
}
|
||||
|
||||
fn compile_var_dec(tokens: &mut Tokens, tree: &mut Element) {
|
||||
eat_specific_keyword_token(tokens, "var", tree);
|
||||
eat_type_token(tokens, tree);
|
||||
eat_identifier_token(tokens, tree);
|
||||
|
||||
loop {
|
||||
match tokens.peek().unwrap() {
|
||||
Symbol(s) if s == "," => {
|
||||
eat_specific_symbol_token(tokens, ",", tree);
|
||||
eat_identifier_token(tokens, tree);
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
|
||||
eat_specific_symbol_token(tokens, ";", tree);
|
||||
}
|
||||
|
||||
fn compile_statements(tokens: &mut Tokens, tree: &mut Element) {
|
||||
loop {
|
||||
match tokens.peek().unwrap() {
|
||||
Keyword(s) if s == "do" => {
|
||||
let mut child_tree = Element::new("doStatement");
|
||||
compile_do(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
}
|
||||
Keyword(s) if s == "let" => {
|
||||
let mut child_tree = Element::new("letStatement");
|
||||
compile_let(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
}
|
||||
Keyword(s) if s == "while" => {
|
||||
let mut child_tree = Element::new("whileStatement");
|
||||
compile_while(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
}
|
||||
Keyword(s) if s == "return" => {
|
||||
let mut child_tree = Element::new("returnStatement");
|
||||
compile_return(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
}
|
||||
Keyword(s) if s == "if" => {
|
||||
let mut child_tree = Element::new("ifStatement");
|
||||
compile_if(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn compile_do(tokens: &mut Tokens, mut tree: &mut Element) {
|
||||
eat_specific_keyword_token(tokens, "do", tree);
|
||||
eat_identifier_token(tokens, &mut tree);
|
||||
compile_subroutine_call(tokens, &mut tree);
|
||||
eat_specific_symbol_token(tokens, ";", tree);
|
||||
}
|
||||
|
||||
fn compile_let(tokens: &mut Tokens, tree: &mut Element) {
|
||||
eat_specific_keyword_token(tokens, "let", tree);
|
||||
eat_identifier_token(tokens, tree);
|
||||
|
||||
match tokens.peek().unwrap() {
|
||||
Symbol(s) if s == "[" => {
|
||||
eat_specific_symbol_token(tokens, "[", tree);
|
||||
let mut child_tree = Element::new("expression");
|
||||
compile_expression(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
eat_specific_symbol_token(tokens, "]", tree);
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
eat_specific_symbol_token(tokens, "=", tree);
|
||||
let mut child_tree = Element::new("expression");
|
||||
compile_expression(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
eat_specific_symbol_token(tokens, ";", tree);
|
||||
}
|
||||
|
||||
fn compile_while(tokens: &mut Tokens, tree: &mut Element) {
|
||||
eat_specific_keyword_token(tokens, "while", tree);
|
||||
eat_specific_symbol_token(tokens, "(", tree);
|
||||
let mut child_tree = Element::new("expression");
|
||||
compile_expression(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
eat_specific_symbol_token(tokens, ")", tree);
|
||||
|
||||
eat_specific_symbol_token(tokens, "{", tree);
|
||||
let mut child_tree = Element::new("statements");
|
||||
compile_statements(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
eat_specific_symbol_token(tokens, "}", tree);
|
||||
}
|
||||
|
||||
fn compile_return(tokens: &mut Tokens, tree: &mut Element) {
|
||||
eat_specific_keyword_token(tokens, "return", tree);
|
||||
match tokens.peek().unwrap() {
|
||||
Symbol(s) if s == ";" => (),
|
||||
_ => {
|
||||
let mut child_tree = Element::new("expression");
|
||||
compile_expression(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
}
|
||||
}
|
||||
eat_specific_symbol_token(tokens, ";", tree);
|
||||
}
|
||||
|
||||
fn compile_if(tokens: &mut Tokens, tree: &mut Element) {
|
||||
eat_specific_keyword_token(tokens, "if", tree);
|
||||
eat_specific_symbol_token(tokens, "(", tree);
|
||||
|
||||
let mut child_tree = Element::new("expression");
|
||||
compile_expression(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
|
||||
eat_specific_symbol_token(tokens, ")", tree);
|
||||
|
||||
eat_specific_symbol_token(tokens, "{", tree);
|
||||
|
||||
let mut child_tree = Element::new("statements");
|
||||
compile_statements(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
|
||||
eat_specific_symbol_token(tokens, "}", tree);
|
||||
|
||||
match tokens.peek().unwrap() {
|
||||
Keyword(s) if s == "else" => {
|
||||
eat_specific_keyword_token(tokens, "else", tree);
|
||||
eat_specific_symbol_token(tokens, "{", tree);
|
||||
|
||||
let mut child_tree = Element::new("statements");
|
||||
compile_statements(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
|
||||
eat_specific_symbol_token(tokens, "}", tree);
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
fn compile_expression(tokens: &mut Tokens, tree: &mut Element) {
|
||||
let mut child_tree = Element::new("term");
|
||||
compile_term(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
|
||||
loop {
|
||||
match tokens.peek().unwrap() {
|
||||
Symbol(s) if OPERATORS.contains(&s.chars().next().unwrap()) => {
|
||||
eat_specific_symbol_token(tokens, s, tree);
|
||||
let mut child_tree = Element::new("term");
|
||||
compile_term(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn compile_term(tokens: &mut Tokens, mut tree: &mut Element) {
|
||||
let token = tokens.next().unwrap();
|
||||
match token {
|
||||
Identifier(_) => match tokens.peek().unwrap() {
|
||||
Symbol(s) if s == "[" => {
|
||||
tree.children.push(create_node(token));
|
||||
eat_specific_symbol_token(tokens, "[", tree);
|
||||
let mut child_tree = Element::new("expression");
|
||||
compile_expression(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
eat_specific_symbol_token(tokens, "]", tree);
|
||||
}
|
||||
Symbol(s) if s == "(" || s == "." => {
|
||||
// let mut child_tree = Element::new("subroutineCall");
|
||||
tree.children.push(create_node(token));
|
||||
compile_subroutine_call(tokens, &mut tree);
|
||||
// tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
}
|
||||
_ => tree.children.push(create_node(token)),
|
||||
},
|
||||
IntConst(_) => tree.children.push(create_node(token)),
|
||||
StringConst(_) => tree.children.push(create_node(token)),
|
||||
Keyword(s) if s == "true" || s == "false" || s == "null" || s == "this" => {
|
||||
tree.children.push(create_node(token));
|
||||
}
|
||||
Symbol(s) if s == "-" || s == "~" => {
|
||||
tree.children.push(create_node(token));
|
||||
let mut child_tree = Element::new("term");
|
||||
compile_term(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
}
|
||||
Symbol(s) if s == "(" => {
|
||||
tree.children.push(create_node(token));
|
||||
let mut child_tree = Element::new("expression");
|
||||
compile_expression(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
eat_specific_symbol_token(tokens, ")", tree);
|
||||
}
|
||||
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
fn compile_expression_list(tokens: &mut Tokens, tree: &mut Element) {
|
||||
match tokens.peek().unwrap() {
|
||||
Symbol(s) if s == ")" => return,
|
||||
_ => (),
|
||||
}
|
||||
|
||||
let mut child_tree = Element::new("expression");
|
||||
compile_expression(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
|
||||
loop {
|
||||
match tokens.peek().unwrap() {
|
||||
Symbol(s) if s == "," => {
|
||||
eat_specific_symbol_token(tokens, ",", tree);
|
||||
let mut child_tree = Element::new("expression");
|
||||
compile_expression(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn compile_subroutine_call(tokens: &mut Tokens, mut tree: &mut Element) {
|
||||
match tokens.peek().unwrap() {
|
||||
Symbol(s) if s == "." => {
|
||||
eat_specific_symbol_token(tokens, ".", &mut tree);
|
||||
eat_identifier_token(tokens, &mut tree);
|
||||
eat_specific_symbol_token(tokens, "(", &mut tree);
|
||||
let mut child_tree = Element::new("expressionList");
|
||||
compile_expression_list(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
eat_specific_symbol_token(tokens, ")", &mut tree);
|
||||
}
|
||||
Symbol(s) if s == "(" => {
|
||||
eat_specific_symbol_token(tokens, "(", &mut tree);
|
||||
let mut child_tree = Element::new("expressionList");
|
||||
compile_expression_list(tokens, &mut child_tree);
|
||||
tree.children.push(xmltree::XMLNode::Element(child_tree));
|
||||
eat_specific_symbol_token(tokens, ")", &mut tree);
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
const OPERATORS: &[char] = &['+', '-', '*', '/', '&', '|', '<', '>', '='];
|
||||
177
jack_analyzer/src/tokenizer.rs
Normal file
177
jack_analyzer/src/tokenizer.rs
Normal file
@@ -0,0 +1,177 @@
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
fn eat_comment(chars: &Vec<char>, index: usize) -> usize {
|
||||
let start_index = index;
|
||||
let mut index = index;
|
||||
if chars[index] == '/' && chars[index + 1] == '/' {
|
||||
index += 2;
|
||||
while chars[index] != '\n' {
|
||||
index += 1;
|
||||
}
|
||||
index += 1;
|
||||
} else if chars[index] == '/' && chars[index + 1] == '*' {
|
||||
index += 2;
|
||||
while !(chars[index] == '*' && chars[index + 1] == '/') {
|
||||
index += 1;
|
||||
}
|
||||
index += 2;
|
||||
}
|
||||
|
||||
if start_index != index {
|
||||
// print_vector_slice(chars, start_index, index);
|
||||
return eat_comment(chars, index);
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
fn parse_symbol(chars: &Vec<char>, tokens: &mut Vec<Token>, index: usize) -> usize {
|
||||
let mut index = index;
|
||||
let c = chars[index];
|
||||
if SYMBOLS.contains(&c) {
|
||||
index += 1;
|
||||
let t = Token::Symbol(c.to_string());
|
||||
tokens.push(t);
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
fn parse_integer_constant(chars: &Vec<char>, tokens: &mut Vec<Token>, index: usize) -> usize {
|
||||
let start_index = index;
|
||||
let mut index = index;
|
||||
let mut number = String::new();
|
||||
|
||||
while chars[index].is_ascii_digit() {
|
||||
number.push(chars[index]);
|
||||
index += 1;
|
||||
}
|
||||
|
||||
if start_index == index {
|
||||
return index;
|
||||
}
|
||||
|
||||
let t = Token::IntConst(number);
|
||||
tokens.push(t);
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
fn parse_string_constant(chars: &Vec<char>, tokens: &mut Vec<Token>, index: usize) -> usize {
|
||||
let start_index = index;
|
||||
let mut index = index;
|
||||
|
||||
if chars[index] != '"' {
|
||||
return index;
|
||||
}
|
||||
index += 1;
|
||||
|
||||
while chars[index] != '"' {
|
||||
index += 1;
|
||||
}
|
||||
index += 1;
|
||||
|
||||
let s = chars[start_index + 1..index - 1].into_iter().collect();
|
||||
let t = Token::StringConst(s);
|
||||
tokens.push(t);
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
fn parse_keyword_or_identifier(chars: &Vec<char>, tokens: &mut Vec<Token>, index: usize) -> usize {
|
||||
let mut index = index;
|
||||
let mut token_string = String::new();
|
||||
|
||||
if !chars[index].is_ascii_alphabetic() {
|
||||
return index;
|
||||
}
|
||||
token_string.push(chars[index]);
|
||||
index += 1;
|
||||
|
||||
while chars[index].is_alphanumeric() {
|
||||
token_string.push(chars[index]);
|
||||
index += 1;
|
||||
}
|
||||
|
||||
if KEYWORDS.contains(&token_string.as_str()) {
|
||||
let t = Token::Keyword(token_string);
|
||||
tokens.push(t);
|
||||
} else {
|
||||
let t = Token::Identifier(token_string);
|
||||
tokens.push(t);
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
pub fn tokenize_file(file: &Path) -> Vec<Token> {
|
||||
let mut tokens = vec![];
|
||||
let chars: Vec<char> = fs::read_to_string(file).unwrap().chars().collect();
|
||||
let length = chars.len();
|
||||
let mut index: usize = 0;
|
||||
|
||||
while index < length {
|
||||
index = eat_comment(&chars, index);
|
||||
let c = chars[index];
|
||||
|
||||
if c.is_whitespace() {
|
||||
index += 1;
|
||||
} else if SYMBOLS.contains(&c) {
|
||||
index = parse_symbol(&chars, &mut tokens, index);
|
||||
} else if c.is_ascii_alphabetic() {
|
||||
index = parse_keyword_or_identifier(&chars, &mut tokens, index);
|
||||
} else if c.is_ascii_digit() {
|
||||
index = parse_integer_constant(&chars, &mut tokens, index);
|
||||
} else if c == '"' {
|
||||
index = parse_string_constant(&chars, &mut tokens, index);
|
||||
} else {
|
||||
println!("Unexpected char {:?}", c);
|
||||
index += 1;
|
||||
}
|
||||
}
|
||||
|
||||
return tokens;
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Token {
|
||||
Keyword(String),
|
||||
Symbol(String),
|
||||
Identifier(String),
|
||||
IntConst(String),
|
||||
StringConst(String),
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
const KEYWORDS: &[&str] = &[
|
||||
"class",
|
||||
"constructor",
|
||||
"function",
|
||||
"method",
|
||||
"field",
|
||||
"static",
|
||||
"var",
|
||||
"int",
|
||||
"char",
|
||||
"boolean",
|
||||
"void",
|
||||
"true",
|
||||
"false",
|
||||
"null",
|
||||
"this",
|
||||
"let",
|
||||
"do",
|
||||
"if",
|
||||
"else",
|
||||
"while",
|
||||
"return",
|
||||
];
|
||||
|
||||
const SYMBOLS: &[char] = &[
|
||||
'{', '}', '(', ')', '[', ']', '.', ',', ';', '+', '-', '*', '/', '&', '|', '<', '>', '=', '~',
|
||||
];
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn print_vector_slice(chars: &Vec<char>, start: usize, stop: usize) {
|
||||
let s: String = chars[start..stop].into_iter().collect();
|
||||
println!("{:?}", s);
|
||||
}
|
||||
Reference in New Issue
Block a user