From 69be2cc98024c949ebe1bffd79263f19e624f9f6 Mon Sep 17 00:00:00 2001 From: Daniel Connelly Date: Sun, 26 Nov 2023 15:06:43 +0100 Subject: [PATCH] parse binary expressions --- src/analyzer.rs | 36 ++++++++--------- src/ast.rs | 40 +++++++++++++------ src/parser.rs | 102 ++++++++++++++++++++++++++++++++++++++++++------ src/scanner.rs | 4 +- src/token.rs | 11 +++++- 5 files changed, 149 insertions(+), 44 deletions(-) diff --git a/src/analyzer.rs b/src/analyzer.rs index 899ab0f..b642511 100644 --- a/src/analyzer.rs +++ b/src/analyzer.rs @@ -68,21 +68,6 @@ impl Analyzer { Analyzer { ctx } } - fn with_locals(locals: T) -> Analyzer - where - S: ToString, - T: IntoIterator, - { - let ctx = locals.into_iter().fold( - SymbolTable::default(), - |mut acc, (name, typ)| { - acc.def_local(name.to_string(), typ); - acc - }, - ); - Analyzer::with_context(ctx) - } - fn func(&mut self, f: Func) -> Result { // TODO: look for return statements when we handle return types self.ctx.push_frame(); @@ -213,6 +198,17 @@ mod test { parser::Parser::new(scanner::scan(input)) } + fn with_locals>( + locals: T, + ) -> Analyzer { + let mut ctx = SymbolTable::default(); + ctx.push_frame(); + locals + .into_iter() + .for_each(|(name, typ)| ctx.def_local(name.to_string(), typ)); + Analyzer::with_context(ctx) + } + #[test] fn test_hello() { let input = b" @@ -456,11 +452,15 @@ mod test { #[test] fn test_binary() { let input: Vec<(Analyzer, &[u8])> = vec![ - (Analyzer::with_locals(vec![("x", Type::Int)]), b"x + 7"), - (Analyzer::with_locals(vec![("x", Type::Str)]), b"x + \"s\""), - (Analyzer::with_locals(vec![("x", Type::Str)]), b"x + 7"), + (Analyzer::default(), b"14 + 7"), + (Analyzer::default(), b"\"a\" + \"b\""), + (with_locals(vec![("x", Type::Int)]), b"x + 7"), + (with_locals(vec![("x", Type::Str)]), b"x + \"s\""), + (with_locals(vec![("x", Type::Str)]), b"x + 7"), ]; let expected = vec![ + Ok(Type::Int), + Ok(Type::Str), Ok(Type::Int), Ok(Type::Str), Err(Error::InvalidOpTypes { diff --git a/src/ast.rs b/src/ast.rs index 18a1e80..25687be 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -1,3 +1,4 @@ +use crate::token::OpToken; use crate::types::{FnType, Resolution, Type, Typed}; use std::fmt; @@ -64,8 +65,8 @@ pub struct Call { pub resolved_type: AST::CallCargo, } -impl Call { - pub fn untyped(target: Expr, args: Vec) -> Call { +impl Call { + pub fn untyped(target: Expr, args: Vec) -> Call { Call { target: Box::new(target), args, resolved_type: () } } } @@ -113,8 +114,8 @@ pub struct Ident { pub resolution: AST::IdentCargo, } -impl Ident { - pub fn untyped>(name: S) -> Ident { +impl Ident { + pub fn untyped>(name: S) -> Ident { Ident { name: name.into(), resolution: () } } } @@ -133,6 +134,20 @@ impl Typed for TypedIdent { #[derive(Debug, PartialEq, Eq, Clone, Copy)] pub enum Op { Add, + Sub, + Mul, + Div, +} + +impl From for Op { + fn from(value: OpToken) -> Self { + match value { + OpToken::Plus => Op::Add, + OpToken::Minus => Op::Sub, + OpToken::Star => Op::Mul, + OpToken::Slash => Op::Div, + } + } } #[derive(Debug, PartialEq, Eq, Clone)] @@ -143,6 +158,12 @@ pub struct Binary { pub cargo: AST::BinaryCargo, } +impl Binary { + pub fn untyped(op: Op, lhs: Expr, rhs: Expr) -> Binary { + Binary { op, lhs: Box::new(lhs), rhs: Box::new(rhs), cargo: () } + } +} + impl Typed for Binary { fn typ(&self) -> Type { self.cargo.clone() @@ -201,11 +222,8 @@ pub struct Param { pub resolved_type: AST::ParamCargo, } -impl Param { - pub fn untyped>( - name: S, - typ: TypeSpec, - ) -> Param { +impl Param { + pub fn untyped>(name: S, typ: TypeSpec) -> Param { Param { name: name.into(), typ, resolved_type: () } } } @@ -274,13 +292,13 @@ pub struct Func { pub resolved_type: AST::FuncCargo, } -impl Func { +impl Func { pub fn untyped>( name: S, params: Vec, body: Block, ret: TypeSpec, - ) -> Func { + ) -> Func { Func { name: name.into(), params, body, ret, resolved_type: () } } } diff --git a/src/parser.rs b/src/parser.rs index dbdb614..e801de4 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,7 +1,7 @@ // TODO: write BNF grammar use crate::ast::{Def::*, Expr::*, Stmt::*, *}; use crate::scanner; -use crate::token::{Token, Token::*}; +use crate::token::{OpToken::*, Token::*, *}; use std::io; use std::iter; use std::result; @@ -32,15 +32,15 @@ impl Parser { self.scanner.peek().is_none() } - fn eat Option>( + fn eat Option, S: ToString>( &mut self, mut f: F, - want: String, + want: S, ) -> Result { let got = self.scanner.next().ok_or(Error::UnexpectedEOF)??; match f(&got) { Some(t) => Ok(t), - None => Err(Error::Invalid { want, got }), + None => Err(Error::Invalid { want: want.to_string(), got }), } } @@ -58,6 +58,13 @@ impl Parser { ) } + fn eat_op(&mut self) -> Result { + self.eat( + |tok| if let OpTok(op) = tok { Some(*op) } else { None }, + "op", + ) + } + fn list Result>( &mut self, mut f: F, @@ -73,7 +80,7 @@ impl Parser { } fn primary(&mut self) -> Result { - let prim = match self.scanner.next().ok_or(Error::UnexpectedEOF)?? { + match self.scanner.next().ok_or(Error::UnexpectedEOF)?? { Str(value) => Ok(StrExpr(Literal::new(value))), Int(value) => Ok(IntExpr(Literal::new(value))), IdentTok(name) => Ok(IdentExpr(Ident { name, resolution: () })), @@ -83,17 +90,45 @@ impl Parser { Ok(expr) } got => Err(Error::Invalid { want: String::from("prim"), got }), - }?; + } + } + + fn call(&mut self) -> Result { + let target = self.primary()?; if let Some(Ok(Lparen)) = self.scanner.peek() { self.eat_tok(Lparen)?; let args = self.list(|p| p.expr())?; self.eat_tok(Rparen)?; - Ok(CallExpr(Call::untyped(prim, args))) + Ok(CallExpr(Call::untyped(target, args))) } else { - Ok(prim) + Ok(target) } } + fn mul_div(&mut self) -> Result { + let mut expr = self.call()?; + while matches!(self.scanner.peek(), Some(Ok(OpTok(Star | Slash)))) { + let op = self.eat_op()?; + expr = BinaryExpr(Binary::untyped(op.into(), expr, self.call()?)); + } + Ok(expr) + } + + fn add_sub(&mut self) -> Result { + // TODO: unify this with |mul_div| and |list| + let mut expr = self.mul_div()?; + while matches!(self.scanner.peek(), Some(Ok(OpTok(Plus | Minus)))) { + let op = self.eat_op()?; + expr = + BinaryExpr(Binary::untyped(op.into(), expr, self.mul_div()?)); + } + Ok(expr) + } + + pub fn expr(&mut self) -> Result { + self.add_sub() + } + fn type_spec(&mut self) -> Result { // TODO: parse more complicated types Ok(TypeSpec::Simple(self.eat_ident()?)) @@ -141,10 +176,6 @@ impl Parser { Ok(Param::untyped(name, typ)) } - pub fn expr(&mut self) -> Result { - self.primary() - } - pub fn fn_expr(&mut self) -> Result { self.eat_tok(FnTok)?; let name = self.eat_ident()?; @@ -176,6 +207,7 @@ pub fn parse(scanner: scanner::Scanner) -> Result { #[cfg(test)] mod test { use super::*; + use crate::ast::Op::*; fn parse(input: &[u8]) -> Parser<&[u8]> { Parser { scanner: scanner::scan(input).peekable() } @@ -272,6 +304,52 @@ mod test { assert_eq!(expected, actual); } + #[test] + fn test_binary() { + let input = b"x + y + (5 + 4) + foo(7, 10) + z + ((a + b) + c)"; + let expected = BinaryExpr(Binary::untyped( + Op::Add, + BinaryExpr(Binary::untyped( + Add, + BinaryExpr(Binary::untyped( + Add, + BinaryExpr(Binary::untyped( + Add, + BinaryExpr(Binary::untyped( + Add, + IdentExpr(Ident::untyped("x")), + IdentExpr(Ident::untyped("y")), + )), + BinaryExpr(Binary::untyped( + Add, + IntExpr(Literal::new(5)), + IntExpr(Literal::new(4)), + )), + )), + CallExpr(Call::untyped( + IdentExpr(Ident::untyped("foo")), + vec![ + IntExpr(Literal::new(7)), + IntExpr(Literal::new(10)), + ], + )), + )), + IdentExpr(Ident::untyped("z")), + )), + BinaryExpr(Binary::untyped( + Add, + BinaryExpr(Binary::untyped( + Add, + IdentExpr(Ident::untyped("a")), + IdentExpr(Ident::untyped("b")), + )), + IdentExpr(Ident::untyped("c")), + )), + )); + let actual = parse(input).expr().unwrap(); + assert_eq!(expected, actual); + } + #[test] fn test_fn() { let input = b" fn hello ( world: int, all: str ) { foo(27); } "; diff --git a/src/scanner.rs b/src/scanner.rs index c1b0a4e..5a26cf9 100644 --- a/src/scanner.rs +++ b/src/scanner.rs @@ -1,4 +1,4 @@ -use crate::token::*; +use crate::token::{OpToken::*, *}; use std::io; use std::iter; use std::num; @@ -120,7 +120,7 @@ impl iter::Iterator for Scanner { use Token::*; self.skip_whitespace(); let result = self.peek()?.and_then(|b| match b { - b'+' => self.advance_emit(1, Plus), + b'+' => self.advance_emit(1, OpTok(Plus)), b'=' => self.advance_emit(1, Eq), b'(' => self.advance_emit(1, Lparen), b')' => self.advance_emit(1, Rparen), diff --git a/src/token.rs b/src/token.rs index a750f71..6886957 100644 --- a/src/token.rs +++ b/src/token.rs @@ -1,3 +1,12 @@ +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub enum OpToken { + Plus, + Minus, + Star, + Slash, +} + +// TODO: stop importing * everywhere and make the names sane #[derive(Debug, PartialEq, Clone)] pub enum Token { Lparen, @@ -13,5 +22,5 @@ pub enum Token { Int(i64), LetTok, FnTok, - Plus, + OpTok(OpToken), }