N2T/jack_compiler/src/parser.rs

349 lines
12 KiB
Rust

use crate::code_writer::Writer;
use crate::tokenizer::identifier;
use crate::tokenizer::int_const;
use crate::tokenizer::string_const;
use crate::tokenizer::Keyword::*;
use crate::tokenizer::Symbol::*;
use crate::tokenizer::Token::{Keyword, Symbol};
use crate::tokenizer::Tokens;
pub fn compile_class(tokens: &mut Tokens, writer: &mut Writer) {
writer.start_class();
tokens.eat(Keyword(Class));
let class_name = tokens.eat(identifier()).to_string();
writer.class_name = class_name.to_string();
tokens.eat(Symbol(LCurly));
while tokens.is_one_of(vec![Keyword(Static), Keyword(Field)]) {
compile_class_var_dec(tokens, writer);
}
while tokens.is_one_of(vec![
Keyword(Constructor),
Keyword(Function),
Keyword(Method),
]) {
compile_subroutine(tokens, writer);
}
tokens.eat(Symbol(RCurly));
}
fn compile_class_var_dec(tokens: &mut Tokens, writer: &mut Writer) {
let kind = tokens.eat(Keyword(AnyKeyword)).to_keyword();
let symbol_type = tokens.eat_one_of(vec![Keyword(AnyKeyword), identifier()]);
let name = tokens.eat(identifier()).to_string();
writer.define_symbol(name, symbol_type.clone(), kind);
while tokens.is_sequence(vec![Symbol(Comma), identifier()]) {
tokens.eat(Symbol(Comma));
let name = tokens.eat(identifier()).to_string();
writer.define_symbol(name, symbol_type.clone(), kind);
}
tokens.eat(Symbol(Semicolon));
}
fn compile_subroutine(tokens: &mut Tokens, writer: &mut Writer) {
writer.start_subroutine();
let routine_keyword = tokens.eat(Keyword(AnyKeyword)).to_keyword();
let symbol_type = tokens.eat_one_of(vec![Keyword(AnyKeyword), identifier()]);
match routine_keyword {
Method => {
writer.in_method = true;
writer.define_symbol("this".to_string(), symbol_type.clone(), Argument);
},
Function => (),
Constructor => { writer.in_constructor = true; },
s => panic!("Unsupported routine type {:?}", s),
}
writer.subroutine_name = tokens.eat(identifier()).to_string();
compile_parameter_list(tokens, writer);
compile_subroutine_body(tokens, writer);
}
fn compile_parameter_list(tokens: &mut Tokens, writer: &mut Writer) {
tokens.eat(Symbol(LBrace));
if tokens.is_one_of(vec![Keyword(AnyKeyword), identifier()]) {
let symbol_type = tokens.eat_one_of(vec![Keyword(AnyKeyword), identifier()]);
let arg_name = tokens.eat(identifier()).to_string();
writer.define_symbol(arg_name, symbol_type.clone(), Argument);
while tokens.is(Symbol(Comma)) {
tokens.eat(Symbol(Comma));
let symbol_type = tokens.eat_one_of(vec![Keyword(AnyKeyword), identifier()]);
let arg_name = tokens.eat(identifier()).to_string();
writer.define_symbol(arg_name, symbol_type.clone(), Argument);
}
}
tokens.eat(Symbol(RBrace));
}
fn compile_subroutine_body(tokens: &mut Tokens, writer: &mut Writer) {
tokens.eat(Symbol(LCurly));
while tokens.is(Keyword(Var)) {
compile_var_dec(tokens, writer);
}
writer.write_function();
compile_statements(tokens, writer);
tokens.eat(Symbol(RCurly));
}
fn compile_var_dec(tokens: &mut Tokens, writer: &mut Writer) {
tokens.eat(Keyword(Var));
let symbol_type = tokens.eat_one_of(vec![Keyword(AnyKeyword), identifier()]);
let var_name = tokens.eat(identifier()).to_string();
writer.define_symbol(var_name, symbol_type.clone(), Local);
while tokens.is(Symbol(Comma)) {
tokens.eat(Symbol(Comma));
let var_name = tokens.eat(identifier()).to_string();
writer.define_symbol(var_name, symbol_type.clone(), Local);
}
tokens.eat(Symbol(Semicolon));
}
fn compile_statements(tokens: &mut Tokens, writer: &mut Writer) {
loop {
if tokens.is(Keyword(Let)) {
compile_let(tokens, writer);
} else if tokens.is(Keyword(If)) {
compile_if(tokens, writer);
} else if tokens.is(Keyword(While)) {
compile_while(tokens, writer);
} else if tokens.is(Keyword(Do)) {
compile_do(tokens, writer);
} else if tokens.is(Keyword(Return)) {
compile_return(tokens, writer);
} else {
break;
}
}
}
fn compile_do(tokens: &mut Tokens, writer: &mut Writer) {
tokens.eat(Keyword(Do));
compile_subroutine_call(tokens, writer);
writer.write_pop(Temp, 0);
tokens.eat(Symbol(Semicolon));
}
fn compile_let(tokens: &mut Tokens, writer: &mut Writer) {
tokens.eat(Keyword(Let));
let var_name = tokens.eat(identifier()).to_string();
if tokens.is(Symbol(Equal)) {
tokens.eat(Symbol(Equal));
compile_expression(tokens, writer);
writer.write_var_assignment(&var_name);
} else if tokens.is(Symbol(LSquare)) {
tokens.eat(Symbol(LSquare));
writer.write_var_read(&var_name);
compile_expression(tokens, writer);
tokens.eat(Symbol(RSquare));
writer.write_arithmetic(Plus);
// Address of array access is now on stack
tokens.eat(Symbol(Equal));
compile_expression(tokens, writer);
// Value to assign to array is on stack
writer.write_pop(Temp, 0); // Buffer value to assign
writer.write_pop(Pointer, 1); // Set That to access address
writer.write_push(Temp, 0); // Restore value to assign
writer.write_pop(That, 0); // Do actual assignment
}
tokens.eat(Symbol(Semicolon));
}
fn compile_while(tokens: &mut Tokens, writer: &mut Writer) {
tokens.eat(Keyword(While));
let (l_while_exp, l_while_start, l_while_end) = writer.get_while_labels();
writer.write_label(&l_while_exp);
tokens.eat(Symbol(LBrace));
compile_expression(tokens, writer);
writer.write_if_goto(&l_while_start);
writer.write_goto(&l_while_end);
tokens.eat(Symbol(RBrace));
tokens.eat(Symbol(LCurly));
writer.write_label(&l_while_start);
compile_statements(tokens, writer);
writer.write_goto(&l_while_exp);
tokens.eat(Symbol(RCurly));
writer.write_label(&l_while_end);
}
fn compile_return(tokens: &mut Tokens, writer: &mut Writer) {
tokens.eat(Keyword(Return));
if !tokens.is(Symbol(Semicolon)) {
compile_expression(tokens, writer);
} else {
writer.write_push(Constant, 0);
}
writer.write_return();
tokens.eat(Symbol(Semicolon));
}
fn compile_if(tokens: &mut Tokens, writer: &mut Writer) {
let (l_false, l_true, l_end) = writer.get_if_labels();
tokens.eat(Keyword(If));
tokens.eat(Symbol(LBrace));
compile_expression(tokens, writer);
tokens.eat(Symbol(RBrace));
writer.write_if_goto(&l_true);
writer.write_goto(&l_false);
tokens.eat(Symbol(LCurly));
writer.write_label(&l_true);
compile_statements(tokens, writer);
tokens.eat(Symbol(RCurly));
if tokens.is(Keyword(Else)) {
writer.write_goto(&l_end);
writer.write_label(&l_false);
tokens.eat(Keyword(Else));
tokens.eat(Symbol(LCurly));
compile_statements(tokens, writer);
tokens.eat(Symbol(RCurly));
writer.write_label(&l_end);
} else {
writer.write_label(&l_false);
}
}
fn compile_expression(tokens: &mut Tokens, writer: &mut Writer) {
compile_term(tokens, writer);
while tokens.is_one_of(vec![
Symbol(Plus),
Symbol(Minus),
Symbol(Mul),
Symbol(Div),
Symbol(ExclusiveAnd),
Symbol(ExclusiveOr),
Symbol(Smaller),
Symbol(Greater),
Symbol(Equal),
]) {
let s = tokens.eat(Symbol(AnySymbol)).to_symbol();
compile_term(tokens, writer);
writer.write_arithmetic(s);
}
}
fn compile_term(tokens: &mut Tokens, writer: &mut Writer) {
if tokens.is(int_const()) {
// integerConstant
let i = tokens.eat(int_const()).to_int();
writer.write_push(Constant, i);
} else if tokens.is(string_const()) {
let s = tokens.eat(string_const()).to_string();
let bytes = s.as_bytes();
writer.write_push(Constant, bytes.len());
writer.write_call(&"String".to_string(), &"new".to_string(), 1);
for b in bytes {
writer.write_push(Constant, (*b).into());
writer.write_call(&"String".to_string(), &"appendChar".to_string(), 2);
}
} else if tokens.is(Keyword(AnyKeyword)) {
// keywordConstant
let keyword = tokens.eat(Keyword(AnyKeyword)).to_keyword();
match keyword {
True => {
writer.write_push(Constant, 0);
writer.write_arithmetic(Not);
}
False => writer.write_push(Constant, 0),
Null => writer.write_push(Constant, 0),
This => writer.write_push(Pointer, 0),
_ => panic!("Unexpected keyword {:?}", keyword),
}
} else if tokens.is_sequence(vec![identifier(), Symbol(LSquare)]) {
// arrayName
let var_name = tokens.eat(identifier()).to_string();
tokens.eat(Symbol(LSquare));
writer.write_var_read(&var_name);
compile_expression(tokens, writer);
tokens.eat(Symbol(RSquare));
writer.write_arithmetic(Plus); // Address of array access is now on stack
writer.write_pop(Pointer, 1); // Set That to address
writer.write_push(That, 0); // Push value from array onto stack
} else if tokens.is_sequence(vec![identifier(), Symbol(LBrace)]) {
// subroutineCall foo()
compile_subroutine_call(tokens, writer);
} else if tokens.is_sequence(vec![identifier(), Symbol(Dot)]) {
// subroutineCall foo.something
compile_subroutine_call(tokens, writer);
} else if tokens.is(Symbol(LBrace)) {
// ( expression )
tokens.eat(Symbol(LBrace));
compile_expression(tokens, writer);
tokens.eat(Symbol(RBrace));
} else if tokens.is_one_of(vec![Symbol(Minus), Symbol(Not)]) {
// unaryOp term
let symbol = tokens.eat(Symbol(AnySymbol)).to_symbol();
compile_term(tokens, writer);
if symbol == Minus {
writer.write_arithmetic(UnaryMinus);
} else {
writer.write_arithmetic(Not);
}
} else if tokens.is(identifier()) {
// varName
let var_name = tokens.eat(identifier()).to_string();
writer.write_var_read(&var_name);
} else {
panic!("Unexpected token {:?} for compile_term", tokens.peek());
}
}
fn compile_subroutine_call(tokens: &mut Tokens, writer: &mut Writer) {
let mut class_name = String::new();
let mut subroutine_name = String::new();
let mut n_args: usize = 0;
if tokens.is_sequence(vec![identifier(), Symbol(LBrace)]) {
// method call for 'this'
class_name = writer.class_name.to_string();
writer.write_push(Pointer, 0); // Push This
n_args += 1;
subroutine_name = tokens.eat(identifier()).to_string();
} else if tokens.is_sequence(vec![identifier(), Symbol(Dot), identifier()]) {
class_name = tokens.eat(identifier()).to_string();
tokens.eat(Symbol(Dot));
subroutine_name = tokens.eat(identifier()).to_string();
if writer.table.has_symbol(&class_name) {
// method call for identifier 'class_name'
let index = writer.table.index_of(&class_name);
let symbol_type = writer.table.kind_of(&class_name);
class_name = writer.table.get_token(&class_name).to_string();
writer.write_push(symbol_type, index); // Push class object
n_args += 1;
} else { // symbol not in table means function call
}
}
tokens.eat(Symbol(LBrace));
while !tokens.is(Symbol(RBrace)) {
n_args += 1;
compile_expression(tokens, writer);
if tokens.is(Symbol(Comma)) {
tokens.eat(Symbol(Comma));
}
}
writer.write_call(&class_name, &subroutine_name, n_args);
tokens.eat(Symbol(RBrace));
}