From 5cc7255168bf0bf63b1e50cb8b2d79cc58a35148 Mon Sep 17 00:00:00 2001 From: Felix Martin Date: Sun, 16 May 2021 13:04:55 -0400 Subject: [PATCH] Implement basic lexer --- .gitignore | 1 + src/lexer.rs | 16 +++++++++++----- src/main.rs | 7 +++++-- src/parser.rs | 39 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 56 insertions(+), 7 deletions(-) create mode 100644 src/parser.rs diff --git a/.gitignore b/.gitignore index fb1121f..765cf3b 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ # These are backup files generated by rustfmt **/*.rs.bk +.vscode diff --git a/src/lexer.rs b/src/lexer.rs index 50842ff..ad2a3d0 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -1,4 +1,4 @@ -#[derive(Debug)] +#[derive(Debug, PartialEq, Eq)] pub enum Token { Identifier(String), Boolean(bool), @@ -6,13 +6,19 @@ pub enum Token { LeftRoundBracket, RightRoundBracket, Quote, + None, } -type Tokens = Vec; +impl Default for Token { + fn default() -> Token { + Token::None + } +} -pub fn read(code: &str) -> () { - let tokens = scan(code, 0, vec![]); - print!("{:?}", tokens); +pub type Tokens = Vec; + +pub fn read(code: &str) -> Tokens { + scan(code, 0, vec![]) } fn scan(code: &str, mut ix: usize, mut tokens: Tokens) -> Tokens { diff --git a/src/main.rs b/src/main.rs index fe16819..dce5c80 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,9 @@ mod lexer; +mod parser; fn main() { - let scm_code = "(+ a 32)"; - lexer::read(scm_code); + let scm_code = "(+ a (* 32 b) c #t #f)"; + let tokens = lexer::read(scm_code); + let datum = parser::parse(tokens); + println!("{:?}", datum); } diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..a73c59b --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,39 @@ +use crate::lexer::Token; +use crate::lexer::Tokens; + + +#[derive(Debug)] +pub enum Datum { + Boolean(bool), + Number(i64), + Symbol(String), + List(Vec), +} + +pub fn parse(tokens: Tokens) -> Datum { + let (datum, _) = parse_datum(&tokens, 0); + datum +} + +fn parse_datum(tokens: &Tokens, ix: usize) -> (Datum, usize) { + match &tokens[ix] { + Token::Identifier(s) => (Datum::Symbol(s.to_string()), ix + 1), + Token::Boolean(b) => (Datum::Boolean(*b), ix + 1), + Token::Number(n) => (Datum::Number(*n), ix + 1), + Token::LeftRoundBracket => parse_list(tokens, ix + 1), + _ => panic!("Unexpected token {:?}", tokens[ix]) + } +} + +pub fn parse_list(tokens: &Tokens, mut ix: usize) -> (Datum, usize) { + let mut datums = vec![]; + + // FIXME: will crash when RightRoundBracket is missing + while tokens[ix] != Token::RightRoundBracket { + let (datum, new_ix) = parse_datum(tokens, ix); + datums.push(datum); + ix = new_ix; + } + + (Datum::List(datums), ix + 1) +}