401 lines
9.6 KiB
Rust
401 lines
9.6 KiB
Rust
use crate::tokenizer;
|
|
use std::fs;
|
|
use std::path::Path;
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub enum Token {
|
|
Keyword(Keyword),
|
|
Symbol(Symbol),
|
|
Identifier(String),
|
|
IntConst(usize),
|
|
StringConst(String),
|
|
}
|
|
|
|
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
|
|
pub enum Keyword {
|
|
Class,
|
|
Constructor,
|
|
Function,
|
|
Method,
|
|
Field,
|
|
Var,
|
|
Int,
|
|
Char,
|
|
Boolean,
|
|
Constant,
|
|
Argument,
|
|
Local,
|
|
Static,
|
|
That,
|
|
Pointer,
|
|
Temp,
|
|
Void,
|
|
True,
|
|
False,
|
|
Null,
|
|
This,
|
|
Let,
|
|
Do,
|
|
If,
|
|
Else,
|
|
While,
|
|
Return,
|
|
AnyKeyword,
|
|
}
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
pub enum Symbol {
|
|
LCurly,
|
|
RCurly,
|
|
LBrace,
|
|
RBrace,
|
|
LSquare,
|
|
RSquare,
|
|
Dot,
|
|
Comma,
|
|
Semicolon,
|
|
Plus,
|
|
Minus,
|
|
Mul,
|
|
Div,
|
|
ExclusiveAnd,
|
|
ExclusiveOr,
|
|
Smaller,
|
|
Greater,
|
|
Equal,
|
|
UnaryMinus,
|
|
Not,
|
|
AnySymbol,
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub struct Tokens {
|
|
pub tokens: Vec<Token>,
|
|
index: usize,
|
|
}
|
|
|
|
pub fn identifier() -> Token {
|
|
Token::Identifier(String::new())
|
|
}
|
|
|
|
pub fn int_const() -> Token {
|
|
Token::IntConst(0)
|
|
}
|
|
|
|
pub fn string_const() -> Token {
|
|
Token::StringConst(String::new())
|
|
}
|
|
|
|
pub fn equal(t1: &Token, t2: &Token) -> bool {
|
|
match t1 {
|
|
Token::Keyword(k1) => match t2 {
|
|
Token::Keyword(k2) if k1 == k2 => true,
|
|
Token::Keyword(_) if k1 == &Keyword::AnyKeyword => true,
|
|
_ => false,
|
|
},
|
|
Token::Symbol(s1) => match t2 {
|
|
Token::Symbol(s2) if s1 == s2 => true,
|
|
Token::Symbol(_) if s1 == &Symbol::AnySymbol => true,
|
|
_ => false,
|
|
},
|
|
Token::Identifier(_) => match t2 {
|
|
Token::Identifier(_) => true,
|
|
_ => false,
|
|
},
|
|
Token::IntConst(_) => match t2 {
|
|
Token::IntConst(_) => true,
|
|
_ => false,
|
|
},
|
|
Token::StringConst(_) => match t2 {
|
|
Token::StringConst(_) => true,
|
|
_ => false,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Token {
|
|
pub fn to_string(&self) -> String {
|
|
match self {
|
|
Token::Identifier(s) => s.to_string(),
|
|
Token::StringConst(s) => s.to_string(),
|
|
_ => panic!("Cannot convert {:?} to string.", self),
|
|
}
|
|
}
|
|
|
|
pub fn to_keyword(&self) -> tokenizer::Keyword {
|
|
match self {
|
|
tokenizer::Token::Keyword(k) => k.clone(),
|
|
_ => tokenizer::Keyword::AnyKeyword,
|
|
}
|
|
}
|
|
|
|
pub fn to_int(&self) -> usize {
|
|
match self {
|
|
tokenizer::Token::IntConst(i) => *i,
|
|
_ => panic!("Cannot convert {:?} to int.", self),
|
|
}
|
|
}
|
|
|
|
pub fn to_symbol(&self) -> tokenizer::Symbol {
|
|
match self {
|
|
tokenizer::Token::Symbol(s) => s.clone(),
|
|
_ => panic!("Cannot convert {:?} to symbol.", self),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Tokens {
|
|
pub fn peek(&self) -> tokenizer::Token {
|
|
let i = self.index;
|
|
let t = self.tokens.get(i).unwrap();
|
|
t.clone()
|
|
}
|
|
|
|
fn get_token(&self, index: usize) -> tokenizer::Token {
|
|
let t = self.tokens.get(index).unwrap();
|
|
t.clone()
|
|
}
|
|
|
|
pub fn eat(&mut self, expected_token: Token) -> tokenizer::Token {
|
|
let i = self.index;
|
|
let t = self.tokens.get(i).unwrap();
|
|
self.index += 1;
|
|
if !equal(&expected_token, t) {
|
|
panic!{"Expected {:?} but got {:?}.", expected_token, t};
|
|
}
|
|
t.clone()
|
|
}
|
|
|
|
pub fn eat_one_of(&mut self, tokens: Vec<Token>) -> tokenizer::Token {
|
|
let t2 = self.get_token(self.index);
|
|
for t1 in &tokens {
|
|
if equal(&t1, &t2) {
|
|
self.index += 1;
|
|
return t2.clone();
|
|
}
|
|
}
|
|
panic!{"Expected one of {:?} but got {:?}.", tokens, t2};
|
|
}
|
|
|
|
pub fn is(&self, expected_token: Token) -> bool {
|
|
let t = self.get_token(self.index);
|
|
if equal(&expected_token, &t) {
|
|
true
|
|
} else {
|
|
false
|
|
}
|
|
}
|
|
|
|
pub fn is_one_of(&self, tokens: Vec<Token>) -> bool {
|
|
let t2 = self.get_token(self.index);
|
|
for t1 in tokens {
|
|
if equal(&t1, &t2) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
pub fn is_sequence(&self, tokens: Vec<Token>) -> bool {
|
|
let mut index = self.index;
|
|
for t1 in tokens {
|
|
let t2 = self.get_token(index);
|
|
if !(equal(&t1, &t2)) {
|
|
return false;
|
|
}
|
|
index += 1;
|
|
}
|
|
true
|
|
}
|
|
}
|
|
|
|
pub fn tokenize_file(file: &Path) -> Tokens {
|
|
let mut tokens = vec![];
|
|
let chars: Vec<char> = fs::read_to_string(file).unwrap().chars().collect();
|
|
let length = chars.len();
|
|
let mut index: usize = 0;
|
|
|
|
while index < length {
|
|
index = eat_comment(&chars, index);
|
|
let c = chars[index];
|
|
|
|
if c.is_whitespace() {
|
|
index += 1;
|
|
} else if parse_symbol(&chars, &mut tokens, index) != index {
|
|
// if there is a symbol it has already been added to token list.
|
|
index += 1
|
|
} else if c.is_ascii_alphabetic() {
|
|
index = parse_keyword_or_identifier(&chars, &mut tokens, index);
|
|
} else if c.is_ascii_digit() {
|
|
index = parse_integer_constant(&chars, &mut tokens, index);
|
|
} else if c == '"' {
|
|
index = parse_string_constant(&chars, &mut tokens, index);
|
|
} else {
|
|
println!("Unexpected char {:?}", c);
|
|
index += 1;
|
|
}
|
|
}
|
|
|
|
let tokens = Tokens {
|
|
tokens: tokens,
|
|
index: 0,
|
|
};
|
|
return tokens;
|
|
}
|
|
|
|
fn eat_comment(chars: &Vec<char>, index: usize) -> usize {
|
|
let start_index = index;
|
|
let mut index = index;
|
|
if chars[index] == '/' && chars[index + 1] == '/' {
|
|
index += 2;
|
|
while chars[index] != '\n' {
|
|
index += 1;
|
|
}
|
|
index += 1;
|
|
} else if chars[index] == '/' && chars[index + 1] == '*' {
|
|
index += 2;
|
|
while !(chars[index] == '*' && chars[index + 1] == '/') {
|
|
index += 1;
|
|
}
|
|
index += 2;
|
|
}
|
|
|
|
if start_index != index {
|
|
// print_vector_slice(chars, start_index, index);
|
|
return eat_comment(chars, index);
|
|
}
|
|
return index;
|
|
}
|
|
|
|
fn parse_symbol(chars: &Vec<char>, tokens: &mut Vec<Token>, index: usize) -> usize {
|
|
let mut index = index;
|
|
let c = chars[index];
|
|
|
|
use Symbol::*;
|
|
let s = match c {
|
|
'{' => Some(LCurly),
|
|
'}' => Some(RCurly),
|
|
'(' => Some(LBrace),
|
|
')' => Some(RBrace),
|
|
'[' => Some(LSquare),
|
|
']' => Some(RSquare),
|
|
'.' => Some(Dot),
|
|
',' => Some(Comma),
|
|
';' => Some(Semicolon),
|
|
'+' => Some(Plus),
|
|
'-' => Some(Minus),
|
|
'*' => Some(Mul),
|
|
'/' => Some(Div),
|
|
'&' => Some(ExclusiveAnd),
|
|
'|' => Some(ExclusiveOr),
|
|
'<' => Some(Smaller),
|
|
'>' => Some(Greater),
|
|
'=' => Some(Equal),
|
|
'~' => Some(Not),
|
|
_ => None,
|
|
};
|
|
|
|
match s {
|
|
Some(s) => {
|
|
let t = Token::Symbol(s);
|
|
tokens.push(t);
|
|
index += 1;
|
|
}
|
|
None => (),
|
|
}
|
|
|
|
return index;
|
|
}
|
|
|
|
fn parse_integer_constant(chars: &Vec<char>, tokens: &mut Vec<Token>, index: usize) -> usize {
|
|
let start_index = index;
|
|
let mut index = index;
|
|
let mut number = String::new();
|
|
|
|
while chars[index].is_ascii_digit() {
|
|
number.push(chars[index]);
|
|
index += 1;
|
|
}
|
|
|
|
if start_index == index {
|
|
return index;
|
|
}
|
|
|
|
let number: usize = number.parse::<usize>().unwrap();
|
|
let t = Token::IntConst(number);
|
|
tokens.push(t);
|
|
|
|
return index;
|
|
}
|
|
|
|
fn parse_string_constant(chars: &Vec<char>, tokens: &mut Vec<Token>, index: usize) -> usize {
|
|
let start_index = index;
|
|
let mut index = index;
|
|
|
|
if chars[index] != '"' {
|
|
return index;
|
|
}
|
|
index += 1;
|
|
|
|
while chars[index] != '"' {
|
|
index += 1;
|
|
}
|
|
index += 1;
|
|
|
|
let s = chars[start_index + 1..index - 1].into_iter().collect();
|
|
let t = Token::StringConst(s);
|
|
tokens.push(t);
|
|
|
|
return index;
|
|
}
|
|
|
|
fn parse_keyword_or_identifier(chars: &Vec<char>, tokens: &mut Vec<Token>, index: usize) -> usize {
|
|
let mut index = index;
|
|
let mut token_string = String::new();
|
|
|
|
if !chars[index].is_ascii_alphabetic() {
|
|
return index;
|
|
}
|
|
token_string.push(chars[index]);
|
|
index += 1;
|
|
|
|
while chars[index].is_alphanumeric() || chars[index] == '_' {
|
|
token_string.push(chars[index]);
|
|
index += 1;
|
|
}
|
|
|
|
use Keyword::*;
|
|
let t = match token_string.as_str() {
|
|
"class" => Token::Keyword(Class),
|
|
"constructor" => Token::Keyword(Constructor),
|
|
"function" => Token::Keyword(Function),
|
|
"method" => Token::Keyword(Method),
|
|
"field" => Token::Keyword(Field),
|
|
"static" => Token::Keyword(Static),
|
|
"var" => Token::Keyword(Var),
|
|
"int" => Token::Keyword(Int),
|
|
"char" => Token::Keyword(Char),
|
|
"boolean" => Token::Keyword(Boolean),
|
|
"void" => Token::Keyword(Void),
|
|
"true" => Token::Keyword(True),
|
|
"false" => Token::Keyword(False),
|
|
"null" => Token::Keyword(Null),
|
|
"this" => Token::Keyword(This),
|
|
"let" => Token::Keyword(Let),
|
|
"do" => Token::Keyword(Do),
|
|
"if" => Token::Keyword(If),
|
|
"else" => Token::Keyword(Else),
|
|
"while" => Token::Keyword(While),
|
|
"return" => Token::Keyword(Return),
|
|
s => Token::Identifier(s.to_string()),
|
|
};
|
|
tokens.push(t);
|
|
return index;
|
|
}
|
|
|
|
#[allow(dead_code)]
|
|
fn print_vector_slice(chars: &Vec<char>, start: usize, stop: usize) {
|
|
let s: String = chars[start..stop].into_iter().collect();
|
|
println!("{:?}", s);
|
|
}
|