From 53b86f1dbe220a440f84d16ca951596508935685 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Sat, 18 Feb 2023 22:23:53 +0100 Subject: [PATCH 01/94] Use IndexMap for constants and signature --- src/compiler/ast.rs | 23 +++++++---------------- src/compiler/compiler.rs | 9 +++++---- src/compiler/iml/parselet.rs | 13 +++++++------ 3 files changed, 19 insertions(+), 26 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index d96efc7f..3cbabd94 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -1,5 +1,5 @@ //! Compiler's internal Abstract Syntax Tree traversal -use std::collections::HashSet; +use indexmap::IndexMap; use tokay_macros::tokay_function; extern crate self as tokay; use super::*; @@ -162,13 +162,8 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { "value_parselet" => { compiler.parselet_push(); - // Generic signature - let mut gen: Vec<(String, Option)> = Vec::new(); - let mut gen_names = HashSet::new(); - - // Function signature - let mut sig: Vec<(String, Option)> = Vec::new(); - let mut sig_names = HashSet::new(); + let mut gen: IndexMap> = IndexMap::new(); + let mut sig: IndexMap> = IndexMap::new(); // Traverse the AST let mut sigs = List::from(node["children"].clone()); @@ -187,15 +182,13 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { match emit { "gen" => { // check if identifier was not provided twice - if gen_names.contains(&ident) { + if gen.contains_key(&ident) { compiler.errors.push(Error::new( traverse_node_offset(node), format!("Generic '{}' already given in signature before", ident), )); continue; - } else { - gen_names.insert(ident.clone()); } compiler.set_constant(&ident, ImlValue::Generic(ident.to_string())); @@ -213,7 +206,7 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { None }; - gen.push((ident.to_string(), default)); + gen.insert(ident.to_string(), default); //println!("{} {} {:?}", emit.to_string(), ident, default); } "arg" => { @@ -241,15 +234,13 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { } // check if identifier was not provided twice - if sig_names.contains(&ident) { + if sig.contains_key(&ident) { compiler.errors.push(Error::new( traverse_node_offset(node), format!("Argument '{}' already given in signature before", ident), )); continue; - } else { - sig_names.insert(ident.clone()); } compiler.new_local(&ident); @@ -267,7 +258,7 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { None }; - sig.push((ident.to_string(), default)); + sig.insert(ident.to_string(), default); //println!("{} {} {:?}", emit.to_string(), ident, default); } _ => unreachable!(), diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index 52696af4..3af0af00 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -6,6 +6,7 @@ use crate::error::Error; use crate::reader::*; use crate::value::{RefValue, Token}; use crate::vm::*; +use indexmap::IndexMap; use std::cell::RefCell; use std::collections::HashMap; use std::rc::Rc; @@ -209,8 +210,8 @@ impl Compiler { offset: Option, name: Option, severity: Option, - gen: Option)>>, - sig: Option)>>, + gen: Option>>, + sig: Option>>, body: ImlOp, ) -> ImlValue { assert!(self.scopes.len() > 0 && matches!(self.scopes[0], Scope::Parselet { .. })); @@ -240,8 +241,8 @@ impl Compiler { } } - let signature = sig.unwrap_or(Vec::new()); - let constants = gen.unwrap_or(Vec::new()); + let constants = gen.unwrap_or(IndexMap::new()); + let signature = sig.unwrap_or(IndexMap::new()); assert!( signature.len() <= variables.len(), diff --git a/src/compiler/iml/parselet.rs b/src/compiler/iml/parselet.rs index 7782a02b..7d8c0bd2 100644 --- a/src/compiler/iml/parselet.rs +++ b/src/compiler/iml/parselet.rs @@ -1,17 +1,18 @@ //! Intermediate representation of a parselet use super::*; use crate::reader::Offset; +use indexmap::IndexMap; use std::collections::{HashMap, HashSet}; #[derive(Debug)] /// Intermediate parselet pub(in crate::compiler) struct ImlParselet { - pub offset: Option, // Offset of definition - pub consuming: bool, // Flag if parselet is consuming - pub severity: u8, // Capture push severity - pub name: Option, // Parselet's name from source (for debugging) - pub constants: Vec<(String, Option)>, // Constant signature with default constants; generic parselet when set. - pub signature: Vec<(String, Option)>, // Argument signature with default arguments + pub offset: Option, // Offset of definition + pub consuming: bool, // Flag if parselet is consuming + pub severity: u8, // Capture push severity + pub name: Option, // Parselet's name from source (for debugging) + pub constants: IndexMap>, // Constant signature with default constants; generic parselet when set. + pub signature: IndexMap>, // Argument signature with default arguments pub locals: usize, // Total number of local variables present (including arguments) pub begin: ImlOp, // Begin-operations pub end: ImlOp, // End-operations From 6b297f18379ad127cd9f28037844492c8fcc6474 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Sat, 18 Feb 2023 22:26:51 +0100 Subject: [PATCH 02/94] Unify naming for constants and signature --- src/compiler/ast.rs | 16 ++++++++-------- src/compiler/compiler.rs | 8 ++++---- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index 3cbabd94..3a1a7886 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -162,8 +162,8 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { "value_parselet" => { compiler.parselet_push(); - let mut gen: IndexMap> = IndexMap::new(); - let mut sig: IndexMap> = IndexMap::new(); + let mut constants: IndexMap> = IndexMap::new(); + let mut signature: IndexMap> = IndexMap::new(); // Traverse the AST let mut sigs = List::from(node["children"].clone()); @@ -182,7 +182,7 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { match emit { "gen" => { // check if identifier was not provided twice - if gen.contains_key(&ident) { + if constants.contains_key(&ident) { compiler.errors.push(Error::new( traverse_node_offset(node), format!("Generic '{}' already given in signature before", ident), @@ -206,7 +206,7 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { None }; - gen.insert(ident.to_string(), default); + constants.insert(ident.to_string(), default); //println!("{} {} {:?}", emit.to_string(), ident, default); } "arg" => { @@ -234,7 +234,7 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { } // check if identifier was not provided twice - if sig.contains_key(&ident) { + if signature.contains_key(&ident) { compiler.errors.push(Error::new( traverse_node_offset(node), format!("Argument '{}' already given in signature before", ident), @@ -258,7 +258,7 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { None }; - sig.insert(ident.to_string(), default); + signature.insert(ident.to_string(), default); //println!("{} {} {:?}", emit.to_string(), ident, default); } _ => unreachable!(), @@ -272,8 +272,8 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { traverse_node_offset(node), None, None, - Some(gen), - Some(sig), + Some(constants), + Some(signature), body, ); diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index 3af0af00..1a19643a 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -210,8 +210,8 @@ impl Compiler { offset: Option, name: Option, severity: Option, - gen: Option>>, - sig: Option>>, + constants: Option>>, + signature: Option>>, body: ImlOp, ) -> ImlValue { assert!(self.scopes.len() > 0 && matches!(self.scopes[0], Scope::Parselet { .. })); @@ -241,8 +241,8 @@ impl Compiler { } } - let constants = gen.unwrap_or(IndexMap::new()); - let signature = sig.unwrap_or(IndexMap::new()); + let constants = constants.unwrap_or(IndexMap::new()); + let signature = signature.unwrap_or(IndexMap::new()); assert!( signature.len() <= variables.len(), From ae9a4b735f4d034efc836f2b9d29ed9fcc2f771d Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Sun, 19 Feb 2023 00:16:48 +0100 Subject: [PATCH 03/94] Some renamings --- src/compiler/ast.rs | 2 +- src/compiler/iml/op.rs | 37 +++++++++++++++++++------------------ src/compiler/iml/value.rs | 10 +++++----- 3 files changed, 25 insertions(+), 24 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index 3a1a7886..8acc69d7 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -191,7 +191,7 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { continue; } - compiler.set_constant(&ident, ImlValue::Generic(ident.to_string())); + compiler.set_constant(&ident, ImlValue::Undefined(ident.to_string())); assert!(children.len() <= 2); let default = if children.len() == 2 { diff --git a/src/compiler/iml/op.rs b/src/compiler/iml/op.rs index c9d8e146..8e72de9f 100644 --- a/src/compiler/iml/op.rs +++ b/src/compiler/iml/op.rs @@ -15,17 +15,17 @@ pub(in crate::compiler) type SharedImlOp = Rc>; /// Target of a call or load #[derive(Clone)] pub(in crate::compiler) enum ImlTarget { - Unresolved(String), // Compile-time identifier (unresolved!) - Generic(String), // Compile-time generic identifier - Static(ImlValue), // Compile-time static value - Local(usize), // Runtime local value - Global(usize), // Runtime global value + Unknown(String), // Compile-time unknown identifier + Undefined(String), // Compile-time declared but undefined identifier (used by generic parselets) + Static(ImlValue), // Compile-time static value + Local(usize), // Runtime local value + Global(usize), // Runtime global value } impl ImlTarget { pub fn is_consuming(&self) -> bool { match self { - Self::Unresolved(name) | Self::Generic(name) => { + Self::Unknown(name) | Self::Undefined(name) => { crate::utils::identifier_is_consumable(name) } Self::Static(value) => value.is_consuming(), @@ -35,9 +35,10 @@ impl ImlTarget { } impl std::fmt::Debug for ImlTarget { + // Manual implementation is required to avoid endless recursion here fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Self::Unresolved(name) | Self::Generic(name) => write!(f, "{}", name), + Self::Unknown(name) | Self::Undefined(name) => write!(f, "{}", name), Self::Static(value) => write!(f, "{}", value), Self::Local(addr) => write!(f, "local@{}", addr), Self::Global(addr) => write!(f, "global@{}", addr), @@ -152,7 +153,7 @@ impl ImlOp { pub fn load_by_name(compiler: &mut Compiler, offset: Option, name: String) -> ImlOp { ImlOp::Load { offset, - target: ImlTarget::Unresolved(name), + target: ImlTarget::Unknown(name), } .try_resolve(compiler) } @@ -193,7 +194,7 @@ impl ImlOp { ImlOp::Call { offset, - target: ImlTarget::Unresolved(name), + target: ImlTarget::Unknown(name), args, } .try_resolve(compiler) @@ -214,11 +215,11 @@ impl ImlOp { match self { Self::Shared(op) => return op.borrow_mut().resolve(compiler), Self::Load { target, .. } | Self::Call { target, .. } => { - if let ImlTarget::Unresolved(name) = target { + if let ImlTarget::Unknown(name) = target { if let Some(value) = compiler.get_constant(&name) { // In case this is a generic, the value is resolved to a generic for later dispose - if matches!(value, ImlValue::Generic(_)) { - *target = ImlTarget::Generic(name.clone()); + if matches!(value, ImlValue::Undefined(_)) { + *target = ImlTarget::Undefined(name.clone()); } else { *target = ImlTarget::Static(value); } @@ -367,7 +368,7 @@ impl ImlOp { } ops.push(match target { - ImlTarget::Unresolved(name) => { + ImlTarget::Unknown(name) => { linker.errors.push(Error::new( *offset, format!("Use of unresolved symbol '{}'", name), @@ -375,8 +376,8 @@ impl ImlOp { Op::Nop } - ImlTarget::Generic(name) => { - unreachable!("Use of generic symbol '{}' may not occur", name) + ImlTarget::Undefined(name) => { + unreachable!("Use of undefined symbol '{}'", name) } ImlTarget::Static(value) => linker.push(value), ImlTarget::Local(idx) => Op::LoadFast(*idx), @@ -393,14 +394,14 @@ impl ImlOp { } match target { - ImlTarget::Unresolved(name) => { + ImlTarget::Unknown(name) => { linker.errors.push(Error::new( *offset, format!("Call to unresolved symbol '{}'", name), )); } - ImlTarget::Generic(name) => { - unreachable!("Call to generic symbol '{}' may not occur", name) + ImlTarget::Undefined(name) => { + unreachable!("Call to undefined symbol '{}' may not occur", name) } ImlTarget::Static(value) => { let idx = linker.register(value); diff --git a/src/compiler/iml/value.rs b/src/compiler/iml/value.rs index cac8860b..18a5fc60 100644 --- a/src/compiler/iml/value.rs +++ b/src/compiler/iml/value.rs @@ -7,7 +7,7 @@ use std::rc::Rc; /** Compile-time values */ #[derive(Clone, PartialEq, Eq)] pub(in crate::compiler) enum ImlValue { - Generic(String), + Undefined(String), Parselet(Rc>), Value(RefValue), } @@ -43,7 +43,7 @@ impl ImlValue { /// Check whether intermediate value represents consuming pub fn is_consuming(&self) -> bool { match self { - ImlValue::Generic(ident) => crate::utils::identifier_is_consumable(ident), + ImlValue::Undefined(ident) => crate::utils::identifier_is_consumable(ident), ImlValue::Parselet(parselet) => parselet.borrow().consuming, ImlValue::Value(value) => value.is_consuming(), } @@ -53,7 +53,7 @@ impl ImlValue { impl std::fmt::Debug for ImlValue { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Self::Generic(s) => write!(f, "Generic({:?})", s), + Self::Undefined(s) => write!(f, "{}", s), Self::Parselet(p) => p.borrow().fmt(f), Self::Value(v) => v.borrow().fmt(f), } @@ -63,7 +63,7 @@ impl std::fmt::Debug for ImlValue { impl std::fmt::Display for ImlValue { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Self::Generic(s) => write!(f, "{}", s), + Self::Undefined(s) => write!(f, "{}", s), Self::Parselet(p) => write!( f, "{}", @@ -77,7 +77,7 @@ impl std::fmt::Display for ImlValue { impl std::hash::Hash for ImlValue { fn hash(&self, state: &mut H) { match self { - Self::Generic(_) => unreachable!(), + Self::Undefined(_) => unreachable!(), Self::Parselet(p) => { state.write_u8('p' as u8); p.borrow().hash(state); From 2483e83eeadbdfa9116a4c53d2fd5b7d69589b68 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Sun, 5 Mar 2023 23:27:31 +0100 Subject: [PATCH 04/94] temporarily disabling prelude --- src/compiler/compiler.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index 5d8f9e75..bafa9f41 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -75,7 +75,7 @@ impl Compiler { }; // Compile with the default prelude - if with_prelude { + if false && with_prelude { compiler .compile_from_str(include_str!("../prelude.tok")) .unwrap(); // this should panic in case of an error! From 43353b84d4c54aebe0b9fdb1cef5cba130126127 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Sat, 11 Mar 2023 12:29:50 +0100 Subject: [PATCH 05/94] ImlValue::ParseletInstance? --- CHANGELOG.md | 1 - CONTRIBUTING.md | 1 + src/compiler/iml/value.rs | 72 +++++++++++++++++++++++++++++--------- src/value/iter/enumiter.rs | 13 ++++--- src/value/iter/mod.rs | 2 +- 5 files changed, 64 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a2e35ff0..49c25746 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,7 +22,6 @@ Released on Jan 13, 2023 - Operator `%` for modulo operation implemented - Area syntax `@(...)` for in-place reader extend (#78) - Character-class syntax changed from `[a-z]` into `Char`, `.` and `Any` substituted by `Char` (#98) - ) - Improved syntax for inline blocks and sequences (`|`-operator) - Improved list syntax - `()` the empty list diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3a66021c..f29e81b3 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -67,4 +67,5 @@ Next are some instructions on how to create a new release. - Uncomment line to use `tokay-macros` version from crates.io (obviously `tokay-macros = "x.x"`) - Comment line to use local `tokay-macros` (should be `# tokay-macros = { version = "x.x", path = "macros" }`) - `git commit` this state + - `git tag` this state - `cargo publish` diff --git a/src/compiler/iml/value.rs b/src/compiler/iml/value.rs index 18a5fc60..1a6df5aa 100644 --- a/src/compiler/iml/value.rs +++ b/src/compiler/iml/value.rs @@ -2,14 +2,20 @@ use super::*; use crate::value::{Object, RefValue}; use std::cell::RefCell; +use std::collections::HashMap; use std::rc::Rc; /** Compile-time values */ #[derive(Clone, PartialEq, Eq)] pub(in crate::compiler) enum ImlValue { - Undefined(String), - Parselet(Rc>), - Value(RefValue), + Undefined(String), // Yet undefined value + Value(RefValue), // Standard value object + Parselet(Rc>), // Parselet + ParseletInstance { + // Instance of a parselet with a constants setting + parselet: Rc>, + constants: HashMap, + }, } impl ImlValue { @@ -25,7 +31,8 @@ impl ImlValue { /// and when its callable if with or without arguments. pub fn is_callable(&self, without_arguments: bool) -> bool { match self { - ImlValue::Parselet(parselet) => { + ImlValue::Value(value) => value.is_callable(without_arguments), + ImlValue::Parselet(parselet) | ImlValue::ParseletInstance { parselet, .. } => { let parselet = parselet.borrow(); if without_arguments { @@ -35,7 +42,6 @@ impl ImlValue { true } } - ImlValue::Value(value) => value.is_callable(without_arguments), _ => unreachable!(), } } @@ -44,8 +50,10 @@ impl ImlValue { pub fn is_consuming(&self) -> bool { match self { ImlValue::Undefined(ident) => crate::utils::identifier_is_consumable(ident), - ImlValue::Parselet(parselet) => parselet.borrow().consuming, ImlValue::Value(value) => value.is_consuming(), + ImlValue::Parselet(parselet) | ImlValue::ParseletInstance { parselet, .. } => { + parselet.borrow().consuming + } } } } @@ -54,22 +62,46 @@ impl std::fmt::Debug for ImlValue { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Undefined(s) => write!(f, "{}", s), - Self::Parselet(p) => p.borrow().fmt(f), Self::Value(v) => v.borrow().fmt(f), + Self::Parselet(parselet) | ImlValue::ParseletInstance { parselet, .. } => { + parselet.borrow().fmt(f) + } } } } impl std::fmt::Display for ImlValue { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Undefined(s) => write!(f, "{}", s), - Self::Parselet(p) => write!( + fn write_parselet( + f: &mut std::fmt::Formatter<'_>, + parselet: &ImlParselet, + constants: Option<&HashMap>, + ) -> std::fmt::Result { + write!( f, "{}", - p.borrow().name.as_deref().unwrap_or("") - ), + parselet.name.as_deref().unwrap_or("") + )?; + + if let Some(constants) = constants { + write!(f, "<")?; + for (name, value) in constants { + write!(f, "{}: {}", name, value)?; + } + write!(f, ">")?; + } + + Ok(()) + } + + match self { + Self::Undefined(s) => write!(f, "{}", s), Self::Value(v) => write!(f, "{}", v.repr()), + Self::Parselet(parselet) => write_parselet(f, &parselet.borrow(), None), + Self::ParseletInstance { + parselet, + constants, + } => write_parselet(f, &parselet.borrow(), Some(constants)), } } } @@ -78,14 +110,22 @@ impl std::hash::Hash for ImlValue { fn hash(&self, state: &mut H) { match self { Self::Undefined(_) => unreachable!(), - Self::Parselet(p) => { - state.write_u8('p' as u8); - p.borrow().hash(state); - } Self::Value(v) => { state.write_u8('v' as u8); v.hash(state) } + Self::Parselet(parselet) => { + state.write_u8('p' as u8); + parselet.borrow().hash(state); + } + Self::ParseletInstance { + parselet, + constants, + } => { + state.write_u8('i' as u8); + parselet.borrow().hash(state); + constants.iter().collect::>().hash(state); + } } } } diff --git a/src/value/iter/enumiter.rs b/src/value/iter/enumiter.rs index d71dc5e0..05f314ab 100644 --- a/src/value/iter/enumiter.rs +++ b/src/value/iter/enumiter.rs @@ -17,12 +17,15 @@ impl EnumIter { /// Creates a enumerated iterator on an iterator. pub fn new(iter: RefValue) -> Iter { assert!(iter.is("iter")); - Iter::new(Box::new(Self { iter, count: BigInt::from(0) })) + Iter::new(Box::new(Self { + iter, + count: BigInt::from(0), + })) } tokay_method!("iter_enum : @iter", { if !iter.is("iter") { - return Err(Error::from("'iter' must be of type iter")) + return Err(Error::from("'iter' must be of type iter")); } Ok(RefValue::from(Self::new(iter))) @@ -45,11 +48,7 @@ impl RefValueIter for EnumIter { } fn repr(&self) -> String { - format!( - "", - self.iter.repr(), - self.count, - ) + format!("", self.iter.repr(), self.count,) } fn rev(&mut self) -> Result<(), Error> { diff --git a/src/value/iter/mod.rs b/src/value/iter/mod.rs index e8867792..a0f1fc30 100644 --- a/src/value/iter/mod.rs +++ b/src/value/iter/mod.rs @@ -3,7 +3,7 @@ pub mod iter; pub mod mapiter; pub mod methoditer; -pub use enumiter::{EnumIter}; +pub use enumiter::EnumIter; pub use iter::{Iter, RefValueIter}; pub use mapiter::MapIter; pub use methoditer::MethodIter; From fe9a1bb7e85d636535e63490e1d59758964abf21 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Mon, 13 Mar 2023 22:56:18 +0100 Subject: [PATCH 06/94] Let's use ImlValue::Parselet for instances as well Currently without usage. Still have to find a starting point. --- src/compiler/ast.rs | 2 +- src/compiler/compiler.rs | 5 ++- src/compiler/iml/op.rs | 5 ++- src/compiler/iml/value.rs | 80 +++++++++++++++------------------------ src/compiler/linker.rs | 14 ++++++- 5 files changed, 52 insertions(+), 54 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index 34c02f35..3b56e9fd 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -306,7 +306,7 @@ fn traverse_node_static(compiler: &mut Compiler, lvalue: Option<&str>, node: &Di compiler.parselet_pop(None, None, None, None, None, ImlOp::Nop); if let Some(lvalue) = lvalue { - if let ImlValue::Parselet(parselet) = &value { + if let ImlValue::Parselet { parselet, .. } = &value { let mut parselet = parselet.borrow_mut(); parselet.name = Some(lvalue.to_string()); } diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index bafa9f41..50848dc7 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -282,7 +282,10 @@ impl Compiler { self.scopes.push(scope); } - ImlValue::Parselet(Rc::new(RefCell::new(parselet))) + ImlValue::Parselet { + parselet: Rc::new(RefCell::new(parselet)), + constants: HashMap::new(), + } } else { unreachable!(); } diff --git a/src/compiler/iml/op.rs b/src/compiler/iml/op.rs index 5bd7c05a..a819bf05 100644 --- a/src/compiler/iml/op.rs +++ b/src/compiler/iml/op.rs @@ -702,7 +702,10 @@ impl ImlOp { .. } => { match callee { - ImlValue::Parselet(parselet) => { + ImlValue::Parselet { + parselet, + constants, + } if constants.is_empty() => { match parselet.try_borrow() { // In case the parselet cannot be borrowed, it is left-recursive! Err(_) => Some(Consumable { diff --git a/src/compiler/iml/value.rs b/src/compiler/iml/value.rs index 1a6df5aa..4142f950 100644 --- a/src/compiler/iml/value.rs +++ b/src/compiler/iml/value.rs @@ -8,11 +8,10 @@ use std::rc::Rc; /** Compile-time values */ #[derive(Clone, PartialEq, Eq)] pub(in crate::compiler) enum ImlValue { - Undefined(String), // Yet undefined value - Value(RefValue), // Standard value object - Parselet(Rc>), // Parselet - ParseletInstance { - // Instance of a parselet with a constants setting + Undefined(String), // Yet undefined value + Value(RefValue), // Standard value object + Parselet { + // Parselet parselet: Rc>, constants: HashMap, }, @@ -32,7 +31,7 @@ impl ImlValue { pub fn is_callable(&self, without_arguments: bool) -> bool { match self { ImlValue::Value(value) => value.is_callable(without_arguments), - ImlValue::Parselet(parselet) | ImlValue::ParseletInstance { parselet, .. } => { + ImlValue::Parselet { parselet, .. } => { let parselet = parselet.borrow(); if without_arguments { @@ -51,9 +50,7 @@ impl ImlValue { match self { ImlValue::Undefined(ident) => crate::utils::identifier_is_consumable(ident), ImlValue::Value(value) => value.is_consuming(), - ImlValue::Parselet(parselet) | ImlValue::ParseletInstance { parselet, .. } => { - parselet.borrow().consuming - } + ImlValue::Parselet { parselet, .. } => parselet.borrow().consuming, } } } @@ -63,45 +60,40 @@ impl std::fmt::Debug for ImlValue { match self { Self::Undefined(s) => write!(f, "{}", s), Self::Value(v) => v.borrow().fmt(f), - Self::Parselet(parselet) | ImlValue::ParseletInstance { parselet, .. } => { - parselet.borrow().fmt(f) - } + ImlValue::Parselet { parselet, .. } => parselet.borrow().fmt(f), } } } impl std::fmt::Display for ImlValue { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - fn write_parselet( - f: &mut std::fmt::Formatter<'_>, - parselet: &ImlParselet, - constants: Option<&HashMap>, - ) -> std::fmt::Result { - write!( - f, - "{}", - parselet.name.as_deref().unwrap_or("") - )?; - - if let Some(constants) = constants { - write!(f, "<")?; - for (name, value) in constants { - write!(f, "{}: {}", name, value)?; - } - write!(f, ">")?; - } - - Ok(()) - } - match self { Self::Undefined(s) => write!(f, "{}", s), Self::Value(v) => write!(f, "{}", v.repr()), - Self::Parselet(parselet) => write_parselet(f, &parselet.borrow(), None), - Self::ParseletInstance { + Self::Parselet { parselet, constants, - } => write_parselet(f, &parselet.borrow(), Some(constants)), + } => { + write!( + f, + "{}", + parselet + .borrow() + .name + .as_deref() + .unwrap_or("") + )?; + + if !constants.is_empty() { + write!(f, "<")?; + for (name, value) in constants { + write!(f, "{}: {}", name, value)?; + } + write!(f, ">")?; + } + + Ok(()) + } } } } @@ -114,15 +106,11 @@ impl std::hash::Hash for ImlValue { state.write_u8('v' as u8); v.hash(state) } - Self::Parselet(parselet) => { - state.write_u8('p' as u8); - parselet.borrow().hash(state); - } - Self::ParseletInstance { + Self::Parselet { parselet, constants, } => { - state.write_u8('i' as u8); + state.write_u8('p' as u8); parselet.borrow().hash(state); constants.iter().collect::>().hash(state); } @@ -130,12 +118,6 @@ impl std::hash::Hash for ImlValue { } } -impl From for ImlValue { - fn from(parselet: ImlParselet) -> Self { - Self::Parselet(Rc::new(RefCell::new(parselet))) - } -} - impl From for ImlValue { fn from(value: RefValue) -> Self { Self::Value(value) diff --git a/src/compiler/linker.rs b/src/compiler/linker.rs index 68e02d1d..ace36454 100644 --- a/src/compiler/linker.rs +++ b/src/compiler/linker.rs @@ -77,7 +77,13 @@ impl Linker { let outer = { match self.statics.get_index(i).unwrap() { (_, Some(_)) => unreachable!(), // may not exist! - (ImlValue::Parselet(parselet), None) => parselet.clone(), + ( + ImlValue::Parselet { + parselet, + constants, + }, + None, + ) if constants.is_empty() => parselet.clone(), _ => { i += 1; continue; @@ -167,7 +173,11 @@ impl Linker { .into_iter() .map(|(iml, parselet)| { if let Some(mut parselet) = parselet { - if let ImlValue::Parselet(imlparselet) = iml { + if let ImlValue::Parselet { + parselet: imlparselet, + .. + } = iml + { parselet.consuming = configs .get(&imlparselet.borrow().id()) .map_or(None, |config| Some(config.leftrec)); From 2b2d89d0cf750fb9e91efa28ad9fd466ef02454a Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Sun, 19 Mar 2023 11:33:43 +0100 Subject: [PATCH 07/94] Check for invalid parselet instances --- ROADMAP.md | 2 +- src/compiler/ast.rs | 11 ++++++++++- src/compiler/iml/op.rs | 31 +++++++++++++++++++++++++++++++ src/compiler/iml/value.rs | 8 ++++---- 4 files changed, 46 insertions(+), 6 deletions(-) diff --git a/ROADMAP.md b/ROADMAP.md index 64548fa5..3b4f26c4 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -7,7 +7,7 @@ This document describes upcoming changes to achieve with a specific version. - [x] Implement iterators and `for...in`-syntax (#101) - [ ] Implement generic parselets (#10, #105) - [ ] New list syntax `[...]`, redefining sequence/`dict` syntax (#100) - - The character-class token syntax will be replaced by a `Char`-builtin + - The character-class token syntax was replaced by a `Char`-builtin - List definition `list = []` - Dict definition `dict = ()` - Builtins `dict` and `list` should become obsolete, so variables can take their names diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index 3b56e9fd..9b4a041c 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -278,7 +278,16 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { ); //println!("parselet = {:#?}", ret); - return ret; + ret + } + + "value_generic" => { + let children = List::from(&node["children"]); + for (i, item) in children.iter().enumerate() { + println!("{}: {:?}", i, item); + } + + ImlValue::from(value!(void)) } _ => unimplemented!("unhandled value node {}", emit), diff --git a/src/compiler/iml/op.rs b/src/compiler/iml/op.rs index a819bf05..b6e609bb 100644 --- a/src/compiler/iml/op.rs +++ b/src/compiler/iml/op.rs @@ -403,6 +403,37 @@ impl ImlOp { unreachable!("Call to undefined symbol '{}' may not occur", name) } ImlTarget::Static(value) => { + if let ImlValue::Parselet { + parselet, + constants, + } = value + { + let parselet = parselet.borrow(); + + if !parselet.constants.is_empty() { + let mut required = Vec::new(); + + for (name, default) in &parselet.constants { + if default.is_none() && !constants.contains_key(name) { + required.push(name.to_string()); + } + } + + if !required.is_empty() { + linker.errors.push(Error::new( + offset.clone(), + format!( + "Missing generic configuration on call to '{}<{}>'", + value, + required.join(", ") + ), + )); + + return 0; + } + } + } + let idx = linker.register(value); match args { diff --git a/src/compiler/iml/value.rs b/src/compiler/iml/value.rs index 4142f950..4f6d85ae 100644 --- a/src/compiler/iml/value.rs +++ b/src/compiler/iml/value.rs @@ -8,12 +8,12 @@ use std::rc::Rc; /** Compile-time values */ #[derive(Clone, PartialEq, Eq)] pub(in crate::compiler) enum ImlValue { - Undefined(String), // Yet undefined value + Undefined(String), // Known but undefined value (used in generic parselets) Value(RefValue), // Standard value object Parselet { - // Parselet - parselet: Rc>, - constants: HashMap, + // Parselet instance + parselet: Rc>, // The parselet definition + constants: HashMap, // Optional parselet instance configuation }, } From 19c8b1caffc57d9556da7b65419d3026e4ec92f6 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Sun, 19 Mar 2023 15:09:58 +0100 Subject: [PATCH 08/94] Merge ImlTarget into ImlValue The previous implementation did not have any real advantage, except making the code more complex at this point. --- src/compiler/ast.rs | 18 ++------ src/compiler/compiler.rs | 6 +-- src/compiler/iml/op.rs | 86 ++++++++++++--------------------------- src/compiler/iml/value.rs | 42 +++++++++++-------- 4 files changed, 55 insertions(+), 97 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index 9b4a041c..6af3a5b7 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -308,10 +308,7 @@ fn traverse_node_static(compiler: &mut Compiler, lvalue: Option<&str>, node: &Di value!(void).into() } // Defined parselet or value - ImlOp::Load { - target: ImlTarget::Static(value), - .. - } => { + ImlOp::Load { target: value, .. } => { compiler.parselet_pop(None, None, None, None, None, ImlOp::Nop); if let Some(lvalue) = lvalue { @@ -1233,16 +1230,7 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { "opt" => "?", other => other, }, - if matches!( - res, - ImlOp::Load { - target: ImlTarget::Static(_), - .. - } | ImlOp::Call { - target: ImlTarget::Static(_), - .. - } - ) { + if matches!(res, ImlOp::Load { .. } | ImlOp::Call { .. }) { "value" } else { "sequence" @@ -1255,7 +1243,7 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { // Modifiers on usages of Token::Char can be optimized for better efficiency if let ImlOp::Call { - target: ImlTarget::Static(ImlValue::Value(target)), + target: ImlValue::Value(target), .. } = &res { diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index 50848dc7..58707aac 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -104,11 +104,7 @@ impl Compiler { println!("--- Global scope ---\n{:#?}", self.scopes.last().unwrap()) } - if let ImlOp::Call { - target: ImlTarget::Static(main), - .. - } = ret - { + if let ImlOp::Call { target: main, .. } = ret { if self.debug > 1 { println!("--- Intermediate main ---\n{:#?}", main); } diff --git a/src/compiler/iml/op.rs b/src/compiler/iml/op.rs index b6e609bb..373c1e06 100644 --- a/src/compiler/iml/op.rs +++ b/src/compiler/iml/op.rs @@ -12,40 +12,6 @@ use std::rc::Rc; pub(in crate::compiler) type SharedImlOp = Rc>; -/// Target of a call or load -#[derive(Clone)] -pub(in crate::compiler) enum ImlTarget { - Unknown(String), // Compile-time unknown identifier - Undefined(String), // Compile-time declared but undefined identifier (used by generic parselets) - Static(ImlValue), // Compile-time static value - Local(usize), // Runtime local value - Global(usize), // Runtime global value -} - -impl ImlTarget { - pub fn is_consuming(&self) -> bool { - match self { - Self::Unknown(name) | Self::Undefined(name) => { - crate::utils::identifier_is_consumable(name) - } - Self::Static(value) => value.is_consuming(), - _ => false, // cannot determine! - } - } -} - -impl std::fmt::Debug for ImlTarget { - // Manual implementation is required to avoid endless recursion here - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Unknown(name) | Self::Undefined(name) => write!(f, "{}", name), - Self::Static(value) => write!(f, "{}", value), - Self::Local(addr) => write!(f, "local@{}", addr), - Self::Global(addr) => write!(f, "global@{}", addr), - } - } -} - #[derive(Debug, Clone)] pub(in crate::compiler) enum ImlOp { Nop, // Empty operation @@ -53,12 +19,12 @@ pub(in crate::compiler) enum ImlOp { Shared(SharedImlOp), // Shared ImlOp tree can be shared from various locations during compilation Load { offset: Option, - target: ImlTarget, + target: ImlValue, //copy: bool, //enforce copy (Op::Sep) }, Call { offset: Option, - target: ImlTarget, + target: ImlValue, args: Option<(usize, bool)>, }, @@ -144,7 +110,7 @@ impl ImlOp { pub fn load(offset: Option, value: ImlValue) -> ImlOp { ImlOp::Load { offset, - target: ImlTarget::Static(value), + target: value, } } @@ -152,7 +118,7 @@ impl ImlOp { pub fn load_by_name(compiler: &mut Compiler, offset: Option, name: String) -> ImlOp { ImlOp::Load { offset, - target: ImlTarget::Unknown(name), + target: ImlValue::Unknown(name), } .try_resolve(compiler) } @@ -174,7 +140,7 @@ impl ImlOp { ImlOp::Call { offset, - target: ImlTarget::Static(value), + target: value, args, } } @@ -193,7 +159,7 @@ impl ImlOp { ImlOp::Call { offset, - target: ImlTarget::Unknown(name), + target: ImlValue::Unknown(name), args, } .try_resolve(compiler) @@ -214,21 +180,21 @@ impl ImlOp { match self { Self::Shared(op) => return op.borrow_mut().resolve(compiler), Self::Load { target, .. } | Self::Call { target, .. } => { - if let ImlTarget::Unknown(name) = target { + if let ImlValue::Unknown(name) = target { if let Some(value) = compiler.get_constant(&name) { // In case this is a generic, the value is resolved to a generic for later dispose if matches!(value, ImlValue::Undefined(_)) { - *target = ImlTarget::Undefined(name.clone()); + *target = ImlValue::Undefined(name.clone()); } else { - *target = ImlTarget::Static(value); + *target = value; } return true; } else if let Some(addr) = compiler.get_local(&name) { - *target = ImlTarget::Local(addr); + *target = ImlValue::Local(addr); return true; } else if let Some(addr) = compiler.get_global(&name) { - *target = ImlTarget::Global(addr); + *target = ImlValue::Global(addr); return true; } } @@ -367,7 +333,7 @@ impl ImlOp { } ops.push(match target { - ImlTarget::Unknown(name) => { + ImlValue::Unknown(name) => { linker.errors.push(Error::new( *offset, format!("Use of unresolved symbol '{}'", name), @@ -375,12 +341,12 @@ impl ImlOp { Op::Nop } - ImlTarget::Undefined(name) => { + ImlValue::Undefined(name) => { unreachable!("Use of undefined symbol '{}'", name) } - ImlTarget::Static(value) => linker.push(value), - ImlTarget::Local(idx) => Op::LoadFast(*idx), - ImlTarget::Global(idx) => Op::LoadGlobal(*idx), + ImlValue::Local(idx) => Op::LoadFast(*idx), + ImlValue::Global(idx) => Op::LoadGlobal(*idx), + value => linker.push(value), }); } ImlOp::Call { @@ -393,16 +359,19 @@ impl ImlOp { } match target { - ImlTarget::Unknown(name) => { + ImlValue::Unknown(name) => { linker.errors.push(Error::new( *offset, format!("Call to unresolved symbol '{}'", name), )); } - ImlTarget::Undefined(name) => { + ImlValue::Undefined(name) => { unreachable!("Call to undefined symbol '{}' may not occur", name) } - ImlTarget::Static(value) => { + ImlValue::Local(idx) => ops.push(Op::LoadFast(*idx)), + ImlValue::Global(idx) => ops.push(Op::LoadGlobal(*idx)), + value => { + // When value is a parselet, check for accepted constant configuration if let ImlValue::Parselet { parselet, constants, @@ -459,8 +428,6 @@ impl ImlOp { return ops.len() - start; } - ImlTarget::Local(idx) => ops.push(Op::LoadFast(*idx)), - ImlTarget::Global(idx) => ops.push(Op::LoadGlobal(*idx)), } match args { @@ -728,10 +695,7 @@ impl ImlOp { ) -> Option { match self { ImlOp::Shared(op) => op.borrow().finalize(visited, configs), - ImlOp::Call { - target: ImlTarget::Static(callee), - .. - } => { + ImlOp::Call { target: callee, .. } => { match callee { ImlValue::Parselet { parselet, @@ -791,7 +755,7 @@ impl ImlOp { None } } - _ => unreachable!(), + _ => None, } } ImlOp::Alt { alts } => { @@ -984,7 +948,7 @@ impl ImlOp { pub fn get_evaluable_value(&self) -> Result { if cfg!(feature = "static_expression_evaluation") { if let Self::Load { - target: ImlTarget::Static(ImlValue::Value(value)), + target: ImlValue::Value(value), .. } = self { diff --git a/src/compiler/iml/value.rs b/src/compiler/iml/value.rs index 4f6d85ae..b4b596b0 100644 --- a/src/compiler/iml/value.rs +++ b/src/compiler/iml/value.rs @@ -5,21 +5,24 @@ use std::cell::RefCell; use std::collections::HashMap; use std::rc::Rc; -/** Compile-time values */ +/** Intermediate value */ #[derive(Clone, PartialEq, Eq)] pub(in crate::compiler) enum ImlValue { - Undefined(String), // Known but undefined value (used in generic parselets) - Value(RefValue), // Standard value object + Unknown(String), // Compile-time unknown identifier + Undefined(String), // Compile-time known but undefined identifier (used in generic parselets) + Value(RefValue), // Compile-time static value Parselet { - // Parselet instance + // Compile-time parselet instance parselet: Rc>, // The parselet definition constants: HashMap, // Optional parselet instance configuation }, + Local(usize), // Runtime local value + Global(usize), // Runtime global value } impl ImlValue { pub fn value(self) -> RefValue { - if let ImlValue::Value(value) = self { + if let Self::Value(value) = self { value } else { panic!("{:?} cannot be unwrapped", self) @@ -30,8 +33,8 @@ impl ImlValue { /// and when its callable if with or without arguments. pub fn is_callable(&self, without_arguments: bool) -> bool { match self { - ImlValue::Value(value) => value.is_callable(without_arguments), - ImlValue::Parselet { parselet, .. } => { + Self::Value(value) => value.is_callable(without_arguments), + Self::Parselet { parselet, .. } => { let parselet = parselet.borrow(); if without_arguments { @@ -41,16 +44,19 @@ impl ImlValue { true } } - _ => unreachable!(), + _ => false, } } /// Check whether intermediate value represents consuming pub fn is_consuming(&self) -> bool { match self { - ImlValue::Undefined(ident) => crate::utils::identifier_is_consumable(ident), - ImlValue::Value(value) => value.is_consuming(), - ImlValue::Parselet { parselet, .. } => parselet.borrow().consuming, + Self::Unknown(name) | Self::Undefined(name) => { + crate::utils::identifier_is_consumable(name) + } + Self::Value(value) => value.is_consuming(), + Self::Parselet { parselet, .. } => parselet.borrow().consuming, + _ => false, } } } @@ -58,9 +64,11 @@ impl ImlValue { impl std::fmt::Debug for ImlValue { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Self::Undefined(s) => write!(f, "{}", s), + Self::Unknown(name) | Self::Undefined(name) => write!(f, "{}", name), Self::Value(v) => v.borrow().fmt(f), - ImlValue::Parselet { parselet, .. } => parselet.borrow().fmt(f), + Self::Parselet { .. } => write!(f, "{}", self), + Self::Local(addr) => write!(f, "local@{}", addr), + Self::Global(addr) => write!(f, "global@{}", addr), } } } @@ -68,8 +76,8 @@ impl std::fmt::Debug for ImlValue { impl std::fmt::Display for ImlValue { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Self::Undefined(s) => write!(f, "{}", s), - Self::Value(v) => write!(f, "{}", v.repr()), + Self::Unknown(name) | Self::Undefined(name) => write!(f, "{}", name), + Self::Value(value) => write!(f, "{}", value.repr()), Self::Parselet { parselet, constants, @@ -94,6 +102,8 @@ impl std::fmt::Display for ImlValue { Ok(()) } + Self::Local(addr) => write!(f, "local@{}", addr), + Self::Global(addr) => write!(f, "global@{}", addr), } } } @@ -101,7 +111,6 @@ impl std::fmt::Display for ImlValue { impl std::hash::Hash for ImlValue { fn hash(&self, state: &mut H) { match self { - Self::Undefined(_) => unreachable!(), Self::Value(v) => { state.write_u8('v' as u8); v.hash(state) @@ -114,6 +123,7 @@ impl std::hash::Hash for ImlValue { parselet.borrow().hash(state); constants.iter().collect::>().hash(state); } + other => unreachable!("{:?} is unhashable", other), } } } From 6f0d981cef29424629dd64a5ac5446d2bc095ae6 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Sun, 19 Mar 2023 16:15:36 +0100 Subject: [PATCH 09/94] Renaming iml-modules into appropriate filenames ...for better distinction. --- src/compiler/iml/{op.rs => imlop.rs} | 0 src/compiler/iml/{parselet.rs => imlparselet.rs} | 0 src/compiler/iml/{value.rs => imlvalue.rs} | 0 src/compiler/iml/mod.rs | 12 ++++++------ 4 files changed, 6 insertions(+), 6 deletions(-) rename src/compiler/iml/{op.rs => imlop.rs} (100%) rename src/compiler/iml/{parselet.rs => imlparselet.rs} (100%) rename src/compiler/iml/{value.rs => imlvalue.rs} (100%) diff --git a/src/compiler/iml/op.rs b/src/compiler/iml/imlop.rs similarity index 100% rename from src/compiler/iml/op.rs rename to src/compiler/iml/imlop.rs diff --git a/src/compiler/iml/parselet.rs b/src/compiler/iml/imlparselet.rs similarity index 100% rename from src/compiler/iml/parselet.rs rename to src/compiler/iml/imlparselet.rs diff --git a/src/compiler/iml/value.rs b/src/compiler/iml/imlvalue.rs similarity index 100% rename from src/compiler/iml/value.rs rename to src/compiler/iml/imlvalue.rs diff --git a/src/compiler/iml/mod.rs b/src/compiler/iml/mod.rs index ec4a9500..ee0c3317 100644 --- a/src/compiler/iml/mod.rs +++ b/src/compiler/iml/mod.rs @@ -1,14 +1,14 @@ //! Tokay intermediate code representation pub use crate::vm::*; -mod op; -mod parselet; -mod value; +mod imlop; +mod imlparselet; +mod imlvalue; use super::Linker; -pub(in crate::compiler) use op::*; -pub(in crate::compiler) use parselet::*; -pub(in crate::compiler) use value::*; +pub(in crate::compiler) use imlop::*; +pub(in crate::compiler) use imlparselet::*; +pub(in crate::compiler) use imlvalue::*; #[derive(Debug, Clone, PartialEq, PartialOrd)] pub(in crate::compiler) struct Consumable { From 9e4bb42c3342ecafb96ba4baddcff0016edffe63 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Sun, 19 Mar 2023 16:21:40 +0100 Subject: [PATCH 10/94] Rename ImlValue::value() to ImlValue::into_refvalue() --- src/compiler/ast.rs | 5 +++-- src/compiler/iml/imlvalue.rs | 2 +- src/compiler/linker.rs | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index 6af3a5b7..292cb898 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -378,7 +378,7 @@ fn traverse_node_lvalue(compiler: &mut Compiler, node: &Dict, store: bool, hold: "capture_index" => { let children = children.object::().unwrap(); - let index = traverse_node_value(compiler, children).value(); + let index = traverse_node_value(compiler, children).into_refvalue(); if store { if hold { @@ -866,7 +866,8 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { "capture_index" => { let children = node["children"].borrow(); - let index = traverse_node_value(compiler, children.object::().unwrap()).value(); + let index = + traverse_node_value(compiler, children.object::().unwrap()).into_refvalue(); ImlOp::from(Op::LoadFastCapture(index.to_usize().unwrap())) } diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index b4b596b0..361a58cf 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -21,7 +21,7 @@ pub(in crate::compiler) enum ImlValue { } impl ImlValue { - pub fn value(self) -> RefValue { + pub fn into_refvalue(self) -> RefValue { if let Self::Value(value) = self { value } else { diff --git a/src/compiler/linker.rs b/src/compiler/linker.rs index ace36454..08de8045 100644 --- a/src/compiler/linker.rs +++ b/src/compiler/linker.rs @@ -187,7 +187,7 @@ impl Linker { RefValue::from(parselet) } else { - iml.value() + iml.into_refvalue() } }) .collect(); From 50a5df895fca33047d6593122401975e818c7c95 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Mon, 20 Mar 2023 12:31:17 +0100 Subject: [PATCH 11/94] First traversal of value_generic/genarg Unfinished intermediate commit. --- src/compiler/ast.rs | 64 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 62 insertions(+), 2 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index 292cb898..7c770025 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -1,5 +1,6 @@ //! Compiler's internal Abstract Syntax Tree traversal use indexmap::IndexMap; +use std::collections::HashMap; use tokay_macros::tokay_function; extern crate self as tokay; use super::*; @@ -283,10 +284,69 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { "value_generic" => { let children = List::from(&node["children"]); - for (i, item) in children.iter().enumerate() { - println!("{}: {:?}", i, item); + let parselet = &children[0]; + + let mut by_seq = Vec::new(); + let mut by_name = HashMap::new(); + + for genarg in children[1..].iter() { + let genarg = genarg.borrow(); + let genarg = genarg.object::().unwrap(); + + let emit = genarg["emit"].borrow(); + + match emit.object::().unwrap().as_str() { + "genarg" => { + if !by_name.is_empty() { + compiler.errors.push(Error::new( + traverse_node_offset(genarg), + format!( + "Sequencial constants need to be specified before named constants." + ), + )); + + continue; + } + + let param = &genarg["children"].borrow(); + let param = param.object::().unwrap(); + + by_seq.push(traverse_node_static(compiler, None, param)); + } + + "genarg_named" => { + let children = List::from(&genarg["children"]); + + let ident = children[0].borrow(); + let ident = ident.object::().unwrap(); + let ident = ident["value"].borrow(); + let ident = ident.object::().unwrap().as_str(); + + if by_name.contains_key(ident) { + compiler.errors.push(Error::new( + traverse_node_offset(genarg), + format!("Named constant '{}' provided more than once.", ident), + )); + + continue; + } + + let param = &children[1].borrow(); + let param = param.object::().unwrap(); + + by_name.insert( + ident.to_string(), + traverse_node_static(compiler, None, param), + ); + } + + other => unimplemented!("Unhandled genarg type {:?}", other), + } } + println!("by_seq = {:?}", by_seq); + println!("by_name = {:?}", by_name); + ImlValue::from(value!(void)) } From 4eff8138feed61fab6e452da3bc2e57a394442a8 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Mon, 20 Mar 2023 15:33:21 +0100 Subject: [PATCH 12/94] Substitute Option by ImlValue::Void --- src/compiler/ast.rs | 55 +++++++++++++++++---------------- src/compiler/compiler.rs | 4 +-- src/compiler/iml/imlop.rs | 4 ++- src/compiler/iml/imlparselet.rs | 12 +++---- src/compiler/iml/imlvalue.rs | 8 ++++- src/compiler/linker.rs | 7 ++--- 6 files changed, 49 insertions(+), 41 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index 7c770025..97b6e4f0 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -163,8 +163,8 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { "value_parselet" => { compiler.parselet_push(); - let mut constants: IndexMap> = IndexMap::new(); - let mut signature: IndexMap> = IndexMap::new(); + let mut constants: IndexMap = IndexMap::new(); + let mut signature: IndexMap = IndexMap::new(); // Traverse the AST let mut sigs = List::from(node["children"].clone()); @@ -195,20 +195,20 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { compiler.set_constant(&ident, ImlValue::Undefined(ident.to_string())); assert!(children.len() <= 2); - let default = if children.len() == 2 { - let default = children[1].borrow(); - let value = traverse_node_static( - compiler, - Some(&ident), - default.object::().unwrap(), - ); - Some(value) - } else { - None - }; - constants.insert(ident.to_string(), default); - //println!("{} {} {:?}", emit.to_string(), ident, default); + constants.insert( + ident.to_string(), + if children.len() == 2 { + let default = children[1].borrow(); + traverse_node_static( + compiler, + Some(&ident), + default.object::().unwrap(), + ) + } else { + ImlValue::Void + }, + ); } "arg" => { let first = ident.chars().nth(0).unwrap(); @@ -247,19 +247,20 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { compiler.new_local(&ident); assert!(children.len() <= 2); - let default = if children.len() == 2 { - let default = children[1].borrow(); - let value = traverse_node_static( - compiler, - Some(&ident), - default.object::().unwrap(), - ); - Some(value) - } else { - None - }; - signature.insert(ident.to_string(), default); + signature.insert( + ident.to_string(), + if children.len() == 2 { + let default = children[1].borrow(); + traverse_node_static( + compiler, + Some(&ident), + default.object::().unwrap(), + ) + } else { + ImlValue::Void + }, + ); //println!("{} {} {:?}", emit.to_string(), ident, default); } _ => unreachable!(), diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index 58707aac..6791c3c1 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -210,8 +210,8 @@ impl Compiler { offset: Option, name: Option, severity: Option, - constants: Option>>, - signature: Option>>, + constants: Option>, + signature: Option>, body: ImlOp, ) -> ImlValue { assert!(self.scopes.len() > 0 && matches!(self.scopes[0], Scope::Parselet { .. })); diff --git a/src/compiler/iml/imlop.rs b/src/compiler/iml/imlop.rs index 373c1e06..6976afc6 100644 --- a/src/compiler/iml/imlop.rs +++ b/src/compiler/iml/imlop.rs @@ -383,7 +383,9 @@ impl ImlOp { let mut required = Vec::new(); for (name, default) in &parselet.constants { - if default.is_none() && !constants.contains_key(name) { + if matches!(default, ImlValue::Void) + && !constants.contains_key(name) + { required.push(name.to_string()); } } diff --git a/src/compiler/iml/imlparselet.rs b/src/compiler/iml/imlparselet.rs index 7d8c0bd2..f708ee68 100644 --- a/src/compiler/iml/imlparselet.rs +++ b/src/compiler/iml/imlparselet.rs @@ -7,12 +7,12 @@ use std::collections::{HashMap, HashSet}; #[derive(Debug)] /// Intermediate parselet pub(in crate::compiler) struct ImlParselet { - pub offset: Option, // Offset of definition - pub consuming: bool, // Flag if parselet is consuming - pub severity: u8, // Capture push severity - pub name: Option, // Parselet's name from source (for debugging) - pub constants: IndexMap>, // Constant signature with default constants; generic parselet when set. - pub signature: IndexMap>, // Argument signature with default arguments + pub offset: Option, // Offset of definition + pub consuming: bool, // Flag if parselet is consuming + pub severity: u8, // Capture push severity + pub name: Option, // Parselet's name from source (for debugging) + pub constants: IndexMap, // Constant signature with default constants; generic parselet when set. + pub signature: IndexMap, // Argument signature with default arguments pub locals: usize, // Total number of local variables present (including arguments) pub begin: ImlOp, // Begin-operations pub end: ImlOp, // End-operations diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index 361a58cf..9c3d4923 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -8,6 +8,7 @@ use std::rc::Rc; /** Intermediate value */ #[derive(Clone, PartialEq, Eq)] pub(in crate::compiler) enum ImlValue { + Void, // Compile-time void Unknown(String), // Compile-time unknown identifier Undefined(String), // Compile-time known but undefined identifier (used in generic parselets) Value(RefValue), // Compile-time static value @@ -39,7 +40,10 @@ impl ImlValue { if without_arguments { parselet.signature.len() == 0 - || parselet.signature.iter().all(|arg| arg.1.is_some()) + || parselet + .signature + .iter() + .all(|arg| !matches!(arg.1, Self::Void)) } else { true } @@ -64,6 +68,7 @@ impl ImlValue { impl std::fmt::Debug for ImlValue { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { + Self::Void => write!(f, "void"), Self::Unknown(name) | Self::Undefined(name) => write!(f, "{}", name), Self::Value(v) => v.borrow().fmt(f), Self::Parselet { .. } => write!(f, "{}", self), @@ -76,6 +81,7 @@ impl std::fmt::Debug for ImlValue { impl std::fmt::Display for ImlValue { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { + Self::Void => write!(f, "void"), Self::Unknown(name) | Self::Undefined(name) => write!(f, "{}", name), Self::Value(value) => write!(f, "{}", value.repr()), Self::Parselet { diff --git a/src/compiler/linker.rs b/src/compiler/linker.rs index 08de8045..7a031240 100644 --- a/src/compiler/linker.rs +++ b/src/compiler/linker.rs @@ -119,10 +119,9 @@ impl Linker { // Copy parameter name var_value.0.clone(), // Register default value, if any - if let Some(value) = &var_value.1 { - Some(self.register(value)) - } else { - None + match &var_value.1 { + ImlValue::Void => None, + value => Some(self.register(value)), }, ) }) From 0010a84dca0debd41d506114c548c2c611cd384c Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Wed, 22 Mar 2023 13:33:51 +0100 Subject: [PATCH 13/94] ImlValue::Generic & ImlParselet clean-up Introduces ImlValue::Generic to describe a generic usage. Removes constants from ImlParselet and making it part of ImlValue::Parselet. Yet without functionality. Under further consideration. --- src/compiler/ast.rs | 19 +++++-- src/compiler/compiler.rs | 3 +- src/compiler/iml/imlop.rs | 52 ++++++------------ src/compiler/iml/imlparselet.rs | 1 - src/compiler/iml/imlvalue.rs | 93 +++++++++++++++++++++++++++++---- 5 files changed, 115 insertions(+), 53 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index 97b6e4f0..30748557 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -285,10 +285,15 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { "value_generic" => { let children = List::from(&node["children"]); - let parselet = &children[0]; + // Traverse the target + let target = &children[0].borrow(); + let target = target.object::().unwrap(); + let target = traverse_node_static(compiler, None, target); + + // Traverse generic arguments let mut by_seq = Vec::new(); - let mut by_name = HashMap::new(); + let mut by_name = IndexMap::new(); for genarg in children[1..].iter() { let genarg = genarg.borrow(); @@ -345,10 +350,14 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { } } - println!("by_seq = {:?}", by_seq); - println!("by_name = {:?}", by_name); + let mut ret = ImlValue::Generic { + target: Box::new(target), + by_seq, + by_name, + }; - ImlValue::from(value!(void)) + ret.resolve(compiler); + ret } _ => unimplemented!("unhandled value node {}", emit), diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index 6791c3c1..0b53764a 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -264,7 +264,6 @@ impl Compiler { || end.is_consuming() || body.is_consuming(), severity: severity.unwrap_or(5), // severity - constants, // constants signature, // signature locals: *locals, // Ensure that begin and end are blocks. @@ -280,7 +279,7 @@ impl Compiler { ImlValue::Parselet { parselet: Rc::new(RefCell::new(parselet)), - constants: HashMap::new(), + constants, } } else { unreachable!(); diff --git a/src/compiler/iml/imlop.rs b/src/compiler/iml/imlop.rs index 6976afc6..f75b9310 100644 --- a/src/compiler/iml/imlop.rs +++ b/src/compiler/iml/imlop.rs @@ -176,33 +176,13 @@ impl ImlOp { shared } + /// Resolve volatile values from intermediate instructions pub(in crate::compiler) fn resolve(&mut self, compiler: &mut Compiler) -> bool { match self { Self::Shared(op) => return op.borrow_mut().resolve(compiler), - Self::Load { target, .. } | Self::Call { target, .. } => { - if let ImlValue::Unknown(name) = target { - if let Some(value) = compiler.get_constant(&name) { - // In case this is a generic, the value is resolved to a generic for later dispose - if matches!(value, ImlValue::Undefined(_)) { - *target = ImlValue::Undefined(name.clone()); - } else { - *target = value; - } - - return true; - } else if let Some(addr) = compiler.get_local(&name) { - *target = ImlValue::Local(addr); - return true; - } else if let Some(addr) = compiler.get_global(&name) { - *target = ImlValue::Global(addr); - return true; - } - } - } - _ => {} + Self::Load { target, .. } | Self::Call { target, .. } => target.resolve(compiler), + _ => false, } - - false } /// Turns ImlOp construct into a kleene (none-or-many) occurence. @@ -333,10 +313,10 @@ impl ImlOp { } ops.push(match target { - ImlValue::Unknown(name) => { + ImlValue::Unknown(_) | ImlValue::Generic { .. } => { linker.errors.push(Error::new( *offset, - format!("Use of unresolved symbol '{}'", name), + format!("Use of unresolved symbol '{}'", target), )); Op::Nop @@ -365,27 +345,29 @@ impl ImlOp { format!("Call to unresolved symbol '{}'", name), )); } + ImlValue::Generic { target, .. } => { + linker.errors.push(Error::new( + *offset, + format!("Call to unresolved symbol '{}'", target), + )); + } ImlValue::Undefined(name) => { - unreachable!("Call to undefined symbol '{}' may not occur", name) + unreachable!("Call to a value '{}' may not occur", name) } ImlValue::Local(idx) => ops.push(Op::LoadFast(*idx)), ImlValue::Global(idx) => ops.push(Op::LoadGlobal(*idx)), value => { // When value is a parselet, check for accepted constant configuration if let ImlValue::Parselet { - parselet, + parselet: _, constants, } = value { - let parselet = parselet.borrow(); - - if !parselet.constants.is_empty() { + if !constants.is_empty() { let mut required = Vec::new(); - for (name, default) in &parselet.constants { - if matches!(default, ImlValue::Void) - && !constants.contains_key(name) - { + for (name, default) in constants { + if matches!(default, ImlValue::Void) { required.push(name.to_string()); } } @@ -394,7 +376,7 @@ impl ImlOp { linker.errors.push(Error::new( offset.clone(), format!( - "Missing generic configuration on call to '{}<{}>'", + "On call to '{}', missing generic constants for {}", value, required.join(", ") ), diff --git a/src/compiler/iml/imlparselet.rs b/src/compiler/iml/imlparselet.rs index f708ee68..25f2fa1b 100644 --- a/src/compiler/iml/imlparselet.rs +++ b/src/compiler/iml/imlparselet.rs @@ -11,7 +11,6 @@ pub(in crate::compiler) struct ImlParselet { pub consuming: bool, // Flag if parselet is consuming pub severity: u8, // Capture push severity pub name: Option, // Parselet's name from source (for debugging) - pub constants: IndexMap, // Constant signature with default constants; generic parselet when set. pub signature: IndexMap, // Argument signature with default arguments pub locals: usize, // Total number of local variables present (including arguments) pub begin: ImlOp, // Begin-operations diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index 9c3d4923..8c1dff6e 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -1,27 +1,74 @@ //! Intermediate value representation use super::*; use crate::value::{Object, RefValue}; +use crate::Compiler; +use indexmap::IndexMap; use std::cell::RefCell; -use std::collections::HashMap; use std::rc::Rc; -/** Intermediate value */ +/** Intermediate value + +Intermediate values are values that result during the compile process based on current information +from the syntax tree and symbol table information. + +These can be memory locations of variables, static values, functions or values whose definition is +still pending. +*/ #[derive(Clone, PartialEq, Eq)] pub(in crate::compiler) enum ImlValue { Void, // Compile-time void Unknown(String), // Compile-time unknown identifier - Undefined(String), // Compile-time known but undefined identifier (used in generic parselets) - Value(RefValue), // Compile-time static value + Undefined(String), // Compile-time known but undefined identifier + Generic { + // Compile-time unknown generic value + target: Box, // The generic origin to be used + by_seq: Vec, // Constants by sequence + by_name: IndexMap, // Constants by name + }, + Value(RefValue), // Compile-time static value Parselet { // Compile-time parselet instance parselet: Rc>, // The parselet definition - constants: HashMap, // Optional parselet instance configuation + constants: IndexMap, // Optional parselet instance configuation }, - Local(usize), // Runtime local value - Global(usize), // Runtime global value + Local(usize), // Runtime local variable + Global(usize), // Runtime global variable } impl ImlValue { + pub fn resolve(&mut self, compiler: &mut Compiler) -> bool { + match self { + Self::Unknown(name) => { + if let Some(value) = compiler.get_constant(&name) { + // In case this is a generic, the value is resolved to a generic for later dispose + if matches!(value, ImlValue::Undefined(_)) { + *self = ImlValue::Undefined(name.clone()); + } else { + *self = value; + } + + return true; + } else if let Some(addr) = compiler.get_local(&name) { + *self = ImlValue::Local(addr); + return true; + } else if let Some(addr) = compiler.get_global(&name) { + *self = ImlValue::Global(addr); + return true; + } + } + Self::Generic { + target, + by_seq, + by_name, + } => { + todo!(); + } + _ => {} + } + + false + } + pub fn into_refvalue(self) -> RefValue { if let Self::Value(value) = self { value @@ -71,7 +118,7 @@ impl std::fmt::Debug for ImlValue { Self::Void => write!(f, "void"), Self::Unknown(name) | Self::Undefined(name) => write!(f, "{}", name), Self::Value(v) => v.borrow().fmt(f), - Self::Parselet { .. } => write!(f, "{}", self), + Self::Parselet { .. } | Self::Generic { .. } => write!(f, "{}", self), Self::Local(addr) => write!(f, "local@{}", addr), Self::Global(addr) => write!(f, "global@{}", addr), } @@ -100,14 +147,40 @@ impl std::fmt::Display for ImlValue { if !constants.is_empty() { write!(f, "<")?; - for (name, value) in constants { - write!(f, "{}: {}", name, value)?; + for (i, (name, value)) in constants.iter().enumerate() { + if matches!(value, ImlValue::Void) { + write!(f, "{}{}", if i > 0 { ", " } else { "" }, name)?; + } else { + write!(f, "{}{}:{}", if i > 0 { ", " } else { "" }, name, value)?; + } } write!(f, ">")?; } Ok(()) } + Self::Generic { + target, + by_seq, + by_name, + } => { + write!(f, "{}<", target)?; + + let mut first = true; + + for item in by_seq { + write!(f, "{}{}", if !first { ", " } else { "" }, item)?; + first = false; + } + + for (name, item) in by_name.iter() { + write!(f, "{}{}:{}", if !first { ", " } else { "" }, name, item)?; + first = false; + } + + write!(f, ">")?; + Ok(()) + } Self::Local(addr) => write!(f, "local@{}", addr), Self::Global(addr) => write!(f, "global@{}", addr), } From d4ee3969961038b64e4be8271be71975ef3ba037 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Wed, 5 Apr 2023 00:43:11 +0200 Subject: [PATCH 14/94] wip: get clarification for data model This is yet another intermediate commit. Compiler::get() returns any ImlValue with a name. ImlOp and ImlValue will merge as well in their meaning. --- src/compiler/ast.rs | 184 ++++++++++++++++++----------------- src/compiler/compiler.rs | 81 ++++++++------- src/compiler/iml/imlop.rs | 40 +++----- src/compiler/iml/imlvalue.rs | 104 +++++++++----------- src/reader.rs | 2 +- 5 files changed, 198 insertions(+), 213 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index 30748557..3feeb342 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -1,6 +1,5 @@ //! Compiler's internal Abstract Syntax Tree traversal use indexmap::IndexMap; -use std::collections::HashMap; use tokay_macros::tokay_function; extern crate self as tokay; use super::*; @@ -182,17 +181,25 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { match emit { "gen" => { + let offset = traverse_node_offset(node); + // check if identifier was not provided twice if constants.contains_key(&ident) { compiler.errors.push(Error::new( - traverse_node_offset(node), + offset, format!("Generic '{}' already given in signature before", ident), )); continue; } - compiler.set_constant(&ident, ImlValue::Undefined(ident.to_string())); + compiler.set_constant( + &ident, + ImlValue::Generic { + offset, + name: ident.to_string(), + }, + ); assert!(children.len() <= 2); @@ -292,32 +299,21 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { let target = traverse_node_static(compiler, None, target); // Traverse generic arguments - let mut by_seq = Vec::new(); - let mut by_name = IndexMap::new(); + let mut config = Vec::new(); for genarg in children[1..].iter() { let genarg = genarg.borrow(); let genarg = genarg.object::().unwrap(); + let offset = traverse_node_offset(genarg); let emit = genarg["emit"].borrow(); match emit.object::().unwrap().as_str() { "genarg" => { - if !by_name.is_empty() { - compiler.errors.push(Error::new( - traverse_node_offset(genarg), - format!( - "Sequencial constants need to be specified before named constants." - ), - )); - - continue; - } - let param = &genarg["children"].borrow(); let param = param.object::().unwrap(); - by_seq.push(traverse_node_static(compiler, None, param)); + config.push((offset, None, traverse_node_static(compiler, None, param))); } "genarg_named" => { @@ -328,6 +324,7 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { let ident = ident["value"].borrow(); let ident = ident.object::().unwrap().as_str(); + /* if by_name.contains_key(ident) { compiler.errors.push(Error::new( traverse_node_offset(genarg), @@ -336,24 +333,26 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { continue; } + */ let param = &children[1].borrow(); let param = param.object::().unwrap(); - by_name.insert( - ident.to_string(), + config.push(( + offset, + Some(ident.to_string()), traverse_node_static(compiler, None, param), - ); + )); } other => unimplemented!("Unhandled genarg type {:?}", other), } } - let mut ret = ImlValue::Generic { + let mut ret = ImlValue::Instance { target: Box::new(target), - by_seq, - by_name, + config, + offset: traverse_node_offset(node), }; ret.resolve(compiler); @@ -473,93 +472,98 @@ fn traverse_node_lvalue(compiler: &mut Compiler, node: &Dict, store: bool, hold: let name = item["value"].borrow(); let name = name.object::().unwrap().as_str(); - // Check for not assigning to a constant (at any level) - if compiler.get_constant(name).is_some() { - compiler.errors.push(Error::new( - traverse_node_offset(node), - format!("Cannot assign to constant '{}'", name), - )); - - break; - } - - // Check if identifier is valid - if let Err(mut error) = identifier_is_valid(name) { - if let Some(offset) = traverse_node_offset(node) { - error.patch_offset(offset); - } - - compiler.errors.push(error); - break; - } - - // Check if identifier is not defining a consumable - if utils::identifier_is_consumable(name) { - compiler.errors.push(Error::new( - traverse_node_offset(node), - - if &name[0..1] == "_" { - format!( - "The variable '{}' is invalid, only constants may start with '_'", - name - ) - } - else { - format!( - "Cannot assign variable named '{}'; Try lower-case identifier, e.g. '{}'", - name, name.to_lowercase() - ) - } - )); - - break; - } - /* Generates code for a symbol store, which means: 1. look-up local variable, and store into 2. look-up global variable, and store into 3. create local variable, and store into */ - if let Some(addr) = compiler.get_local(name) { - if store { - if hold { - ops.push(Op::StoreFastHold(addr).into()) + match compiler.get(name) { + // Known local + Some(ImlValue::Local(addr)) => { + if store { + if hold { + ops.push(Op::StoreFastHold(addr).into()) + } else { + ops.push(Op::StoreFast(addr).into()) + } } else { - ops.push(Op::StoreFast(addr).into()) + ops.push(Op::LoadFast(addr).into()) } - } else { - ops.push(Op::LoadFast(addr).into()) } - } else if let Some(addr) = compiler.get_global(name) { - if store { - if hold { - ops.push(Op::StoreGlobalHold(addr).into()) + // Known global + Some(ImlValue::Global(addr)) => { + if store { + if hold { + ops.push(Op::StoreGlobalHold(addr).into()) + } else { + ops.push(Op::StoreGlobal(addr).into()) + } } else { - ops.push(Op::StoreGlobal(addr).into()) + ops.push(Op::LoadGlobal(addr).into()) } - } else { - ops.push(Op::LoadGlobal(addr).into()) } - } else { - // When chained lvalue, name must be declared! - if children.len() > 1 { + // Check for not assigning to a constant (at any level) + Some(_) => { compiler.errors.push(Error::new( traverse_node_offset(node), - format!("Undeclared variable '{}', please define it first", name), + format!("Cannot assign to constant '{}'", name), )); break; } + // Undefined name + None => { + // Check if identifier is valid + if let Err(mut error) = identifier_is_valid(name) { + if let Some(offset) = traverse_node_offset(node) { + error.patch_offset(offset); + } - let addr = compiler.new_local(name); - if store { - if hold { - ops.push(Op::StoreFastHold(addr).into()) + compiler.errors.push(error); + break; + } + + // Check if identifier is not defining a consumable + if utils::identifier_is_consumable(name) { + compiler.errors.push(Error::new( + traverse_node_offset(node), + + if &name[0..1] == "_" { + format!( + "The variable '{}' is invalid, only constants may start with '_'", + name + ) + } + else { + format!( + "Cannot assign variable named '{}'; Try lower-case identifier, e.g. '{}'", + name, name.to_lowercase() + ) + } + )); + + break; + } + + // When chained lvalue, name must be declared! + if children.len() > 1 { + compiler.errors.push(Error::new( + traverse_node_offset(node), + format!("Undeclared variable '{}', please define it first", name), + )); + break; + } + + let addr = compiler.new_local(name); + if store { + if hold { + ops.push(Op::StoreFastHold(addr).into()) + } else { + ops.push(Op::StoreFast(addr).into()) + } } else { - ops.push(Op::StoreFast(addr).into()) + ops.push(Op::LoadFast(addr).into()) } - } else { - ops.push(Op::LoadFast(addr).into()) } } } diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index 0b53764a..9af8957f 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -324,25 +324,6 @@ impl Compiler { unreachable!("There _must_ be at least one parselet scope!"); } - /** Retrieves the address of a local variable under a given name. - - Returns None when the variable does not exist. */ - pub(in crate::compiler) fn get_local(&self, name: &str) -> Option { - // Retrieve local variables from next parselet scope owning variables, except global scope! - for scope in &self.scopes[..self.scopes.len() - 1] { - // Check for scope with variables - if let Scope::Parselet { variables, .. } = &scope { - if let Some(addr) = variables.get(name) { - return Some(*addr); - } - - break; - } - } - - None - } - /** Insert new local variable under given name in current scope. */ pub(in crate::compiler) fn new_local(&mut self, name: &str) -> usize { for scope in &mut self.scopes { @@ -400,19 +381,6 @@ impl Compiler { unreachable!("There _must_ be at least one parselet scope!"); } - /** Retrieve address of a global variable. */ - pub(in crate::compiler) fn get_global(&self, name: &str) -> Option { - if let Scope::Parselet { variables, .. } = self.scopes.last().unwrap() { - if let Some(addr) = variables.get(name) { - return Some(*addr); - } - - return None; - } - - unreachable!("Top-level scope is not a parselet scope"); - } - /** Set constant to name in current scope. */ pub(in crate::compiler) fn set_constant(&mut self, name: &str, mut value: ImlValue) { /* @@ -475,15 +443,46 @@ impl Compiler { unreachable!("There _must_ be at least one parselet or block scope!"); } - /** Get constant value, either from current or preceding scope, - a builtin or special. */ - pub(in crate::compiler) fn get_constant(&mut self, name: &str) -> Option { - // Check for constant in available scopes - for scope in &self.scopes { - if let Scope::Parselet { constants, .. } | Scope::Block { constants, .. } = scope { - if let Some(value) = constants.get(name) { - return Some(value.clone()); + /** Get named value, either from current or preceding scope, a builtin or special. */ + pub(in crate::compiler) fn get(&mut self, name: &str) -> Option { + let mut top_parselet = true; + + for (i, scope) in self.scopes.iter().enumerate() { + match scope { + Scope::Block { constants, .. } => { + if let Some(value) = constants.get(name) { + return Some(value.clone()); + } } + Scope::Parselet { + constants, + variables, + .. + } => { + if let Some(value) = constants.get(name) { + if !top_parselet && matches!(value, ImlValue::Generic { .. }) { + continue; + } + + return Some(value.clone()); + } + + // Check for global variable + if i + 1 == self.scopes.len() { + if let Some(addr) = variables.get(name) { + return Some(ImlValue::Global(*addr)); + } + } + // Check for local variable + else if top_parselet { + if let Some(addr) = variables.get(name) { + return Some(ImlValue::Local(*addr)); + } + } + + top_parselet = false; + } + _ => {} } } @@ -505,7 +504,7 @@ impl Compiler { RefValue::from(Token::builtin("Whitespaces").unwrap()).into(), ); - return Some(self.get_constant(name).unwrap()); + return Some(self.get(name).unwrap()); } // Check for built-in token diff --git a/src/compiler/iml/imlop.rs b/src/compiler/iml/imlop.rs index f75b9310..27f2beb8 100644 --- a/src/compiler/iml/imlop.rs +++ b/src/compiler/iml/imlop.rs @@ -106,7 +106,7 @@ impl ImlOp { } } - /// Load known value + /// Load value pub fn load(offset: Option, value: ImlValue) -> ImlOp { ImlOp::Load { offset, @@ -116,11 +116,7 @@ impl ImlOp { /// Load unknown value by name pub fn load_by_name(compiler: &mut Compiler, offset: Option, name: String) -> ImlOp { - ImlOp::Load { - offset, - target: ImlValue::Unknown(name), - } - .try_resolve(compiler) + Self::load(offset.clone(), ImlValue::Name { offset, name }).try_resolve(compiler) } /// Call known value @@ -158,8 +154,8 @@ impl ImlOp { } ImlOp::Call { - offset, - target: ImlValue::Unknown(name), + offset: offset.clone(), + target: ImlValue::Name { offset, name }, args, } .try_resolve(compiler) @@ -313,19 +309,19 @@ impl ImlOp { } ops.push(match target { - ImlValue::Unknown(_) | ImlValue::Generic { .. } => { + ImlValue::Name { name, .. } => { linker.errors.push(Error::new( *offset, - format!("Use of unresolved symbol '{}'", target), + format!("Use of unresolved symbol '{}'", name), )); Op::Nop } - ImlValue::Undefined(name) => { - unreachable!("Use of undefined symbol '{}'", name) + ImlValue::Generic { name, .. } => { + unreachable!("Use of generic symbol '{}'", name) } - ImlValue::Local(idx) => Op::LoadFast(*idx), - ImlValue::Global(idx) => Op::LoadGlobal(*idx), + ImlValue::Local(addr) => Op::LoadFast(*addr), + ImlValue::Global(addr) => Op::LoadGlobal(*addr), value => linker.push(value), }); } @@ -339,23 +335,17 @@ impl ImlOp { } match target { - ImlValue::Unknown(name) => { + ImlValue::Name { name, .. } => { linker.errors.push(Error::new( *offset, format!("Call to unresolved symbol '{}'", name), )); } - ImlValue::Generic { target, .. } => { - linker.errors.push(Error::new( - *offset, - format!("Call to unresolved symbol '{}'", target), - )); - } - ImlValue::Undefined(name) => { - unreachable!("Call to a value '{}' may not occur", name) + ImlValue::Generic { name, .. } => { + unreachable!("Call to generic '{}' may not occur", name) } - ImlValue::Local(idx) => ops.push(Op::LoadFast(*idx)), - ImlValue::Global(idx) => ops.push(Op::LoadGlobal(*idx)), + ImlValue::Local(addr) => ops.push(Op::LoadFast(*addr)), + ImlValue::Global(addr) => ops.push(Op::LoadGlobal(*addr)), value => { // When value is a parselet, check for accepted constant configuration if let ImlValue::Parselet { diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index 8c1dff6e..cbb5e482 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -1,7 +1,8 @@ //! Intermediate value representation use super::*; +use crate::compiler::Compiler; +use crate::reader::Offset; use crate::value::{Object, RefValue}; -use crate::Compiler; use indexmap::IndexMap; use std::cell::RefCell; use std::rc::Rc; @@ -16,15 +17,7 @@ still pending. */ #[derive(Clone, PartialEq, Eq)] pub(in crate::compiler) enum ImlValue { - Void, // Compile-time void - Unknown(String), // Compile-time unknown identifier - Undefined(String), // Compile-time known but undefined identifier - Generic { - // Compile-time unknown generic value - target: Box, // The generic origin to be used - by_seq: Vec, // Constants by sequence - by_name: IndexMap, // Constants by name - }, + Void, // Compile-time void Value(RefValue), // Compile-time static value Parselet { // Compile-time parselet instance @@ -33,36 +26,57 @@ pub(in crate::compiler) enum ImlValue { }, Local(usize), // Runtime local variable Global(usize), // Runtime global variable + + // Unresolved + Generic { + // Generic placeholder + offset: Option, + name: String, + }, + Name { + // Unresolved name + offset: Option, + name: String, + }, + Instance { + offset: Option, // Source offset + target: Box, // Instance target + config: Vec<(Option, Option, ImlValue)>, // Constant configuration + }, } impl ImlValue { pub fn resolve(&mut self, compiler: &mut Compiler) -> bool { match self { - Self::Unknown(name) => { - if let Some(value) = compiler.get_constant(&name) { - // In case this is a generic, the value is resolved to a generic for later dispose - if matches!(value, ImlValue::Undefined(_)) { - *self = ImlValue::Undefined(name.clone()); - } else { - *self = value; - } - - return true; - } else if let Some(addr) = compiler.get_local(&name) { - *self = ImlValue::Local(addr); - return true; - } else if let Some(addr) = compiler.get_global(&name) { - *self = ImlValue::Global(addr); + Self::Name { name, .. } => { + if let Some(value) = compiler.get(&name) { + *self = value; return true; } } - Self::Generic { + /* + Self::Instance { target, - by_seq, - by_name, + .. + } if matches!(target, ImlValue::Name(_)) => { + // Try to resolve target + if target.resolve(compiler) { + // On success, try to resolve the entire instance + return self.resolve(compiler); + } + } + Self::Instance { + target: + ImlValue::Parselet { + parselet, + constants, + }, + config, + offset, } => { todo!(); } + */ _ => {} } @@ -102,7 +116,7 @@ impl ImlValue { /// Check whether intermediate value represents consuming pub fn is_consuming(&self) -> bool { match self { - Self::Unknown(name) | Self::Undefined(name) => { + Self::Name { name, .. } | Self::Generic { name, .. } => { crate::utils::identifier_is_consumable(name) } Self::Value(value) => value.is_consuming(), @@ -116,11 +130,12 @@ impl std::fmt::Debug for ImlValue { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Void => write!(f, "void"), - Self::Unknown(name) | Self::Undefined(name) => write!(f, "{}", name), Self::Value(v) => v.borrow().fmt(f), - Self::Parselet { .. } | Self::Generic { .. } => write!(f, "{}", self), + Self::Parselet { .. } => write!(f, "{}", self), Self::Local(addr) => write!(f, "local@{}", addr), Self::Global(addr) => write!(f, "global@{}", addr), + Self::Name { name, .. } | Self::Generic { name, .. } => write!(f, "{}", name), + _ => todo!(), } } } @@ -129,7 +144,7 @@ impl std::fmt::Display for ImlValue { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Void => write!(f, "void"), - Self::Unknown(name) | Self::Undefined(name) => write!(f, "{}", name), + Self::Name { name, .. } | Self::Generic { name, .. } => write!(f, "{}", name), Self::Value(value) => write!(f, "{}", value.repr()), Self::Parselet { parselet, @@ -159,30 +174,7 @@ impl std::fmt::Display for ImlValue { Ok(()) } - Self::Generic { - target, - by_seq, - by_name, - } => { - write!(f, "{}<", target)?; - - let mut first = true; - - for item in by_seq { - write!(f, "{}{}", if !first { ", " } else { "" }, item)?; - first = false; - } - - for (name, item) in by_name.iter() { - write!(f, "{}{}:{}", if !first { ", " } else { "" }, name, item)?; - first = false; - } - - write!(f, ">")?; - Ok(()) - } - Self::Local(addr) => write!(f, "local@{}", addr), - Self::Global(addr) => write!(f, "global@{}", addr), + _ => todo!(), } } } diff --git a/src/reader.rs b/src/reader.rs index fc9ef141..896b1641 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -4,7 +4,7 @@ use std::io::prelude::*; use std::io::BufReader; /// Position inside a reader, with row and column counting. -#[derive(Debug, Clone, Copy, PartialEq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct Offset { // todo: Hold source filename information as well in the future? pub offset: usize, From c12f1fcaefbcb49af8d39dc9e477d7adb3af46c6 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Sun, 16 Apr 2023 23:31:34 +0200 Subject: [PATCH 15/94] wip: ImlValue::Shared Intermediate commit with a lot of stuff todo. --- src/compiler/ast.rs | 5 +- src/compiler/compiler.rs | 28 +-- src/compiler/iml/imlop.rs | 219 +++--------------------- src/compiler/iml/imlparselet.rs | 1 + src/compiler/iml/imlvalue.rs | 293 +++++++++++++++++++++++++++----- src/compiler/linker.rs | 43 +---- 6 files changed, 298 insertions(+), 291 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index 3feeb342..21cdb4aa 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -195,8 +195,9 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { compiler.set_constant( &ident, - ImlValue::Generic { + ImlValue::Name { offset, + generic: true, name: ident.to_string(), }, ); @@ -381,7 +382,7 @@ fn traverse_node_static(compiler: &mut Compiler, lvalue: Option<&str>, node: &Di compiler.parselet_pop(None, None, None, None, None, ImlOp::Nop); if let Some(lvalue) = lvalue { - if let ImlValue::Parselet { parselet, .. } = &value { + if let ImlValue::Parselet(parselet) = &value { let mut parselet = parselet.borrow_mut(); parselet.name = Some(lvalue.to_string()); } diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index 9af8957f..a49d89b2 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -50,7 +50,7 @@ pub struct Compiler { pub debug: u8, // Compiler debug mode pub(in crate::compiler) scopes: Vec, // Current compilation scopes - pub(in crate::compiler) usages: Vec, // Unresolved calls or loads + pub(in crate::compiler) usages: Vec, // Unresolved values pub(in crate::compiler) errors: Vec, // Collected errors during compilation } @@ -158,15 +158,15 @@ impl Compiler { &self.scopes[0] { // Cut out usages created inside this scope for processing - let usages: Vec = self.usages.drain(usage_start..).collect(); + let usages: Vec = self.usages.drain(usage_start..).collect(); // Afterwards, resolve and insert them again in case there where not resolved - for mut op in usages.into_iter() { - if op.resolve(self) { + for mut value in usages.into_iter() { + if value.resolve(self) { continue; } - self.usages.push(op); // Insert again for later resolve + self.usages.push(value); // Re-insert into usages for later resolve } } } @@ -256,6 +256,16 @@ impl Compiler { //println!("body = {:?}", body); //println!("end = {:?}", end); + // todo: Check if the available values define a useless parselet. + /* + if matches!(begin, ImlOp::Nop) + || matches!(end, ImlOp::Nop) + || matches!(body, ImlOp::Nop) + || signature.is_empty() { + return ImlValue::Void + } + */ + let parselet = ImlParselet { offset, name, @@ -264,6 +274,7 @@ impl Compiler { || end.is_consuming() || body.is_consuming(), severity: severity.unwrap_or(5), // severity + constants, // constants signature, // signature locals: *locals, // Ensure that begin and end are blocks. @@ -277,10 +288,7 @@ impl Compiler { self.scopes.push(scope); } - ImlValue::Parselet { - parselet: Rc::new(RefCell::new(parselet)), - constants, - } + ImlValue::Parselet(Rc::new(RefCell::new(parselet))) } else { unreachable!(); } @@ -460,7 +468,7 @@ impl Compiler { .. } => { if let Some(value) = constants.get(name) { - if !top_parselet && matches!(value, ImlValue::Generic { .. }) { + if !top_parselet && matches!(value, ImlValue::Name { generic: true, .. }) { continue; } diff --git a/src/compiler/iml/imlop.rs b/src/compiler/iml/imlop.rs index 27f2beb8..1fbab907 100644 --- a/src/compiler/iml/imlop.rs +++ b/src/compiler/iml/imlop.rs @@ -4,19 +4,13 @@ use super::*; use crate::reader::Offset; use crate::utils; use crate::Compiler; -use crate::Error; use crate::{Object, RefValue}; -use std::cell::RefCell; use std::collections::{HashMap, HashSet}; -use std::rc::Rc; - -pub(in crate::compiler) type SharedImlOp = Rc>; #[derive(Debug, Clone)] pub(in crate::compiler) enum ImlOp { - Nop, // Empty operation - Op(Op), // VM Operation - Shared(SharedImlOp), // Shared ImlOp tree can be shared from various locations during compilation + Nop, // Empty operation + Op(Op), // VM Operation Load { offset: Option, target: ImlValue, @@ -116,7 +110,15 @@ impl ImlOp { /// Load unknown value by name pub fn load_by_name(compiler: &mut Compiler, offset: Option, name: String) -> ImlOp { - Self::load(offset.clone(), ImlValue::Name { offset, name }).try_resolve(compiler) + Self::load( + offset.clone(), + ImlValue::Name { + offset, + name, + generic: false, + } + .try_resolve(compiler), + ) } /// Call known value @@ -155,30 +157,14 @@ impl ImlOp { ImlOp::Call { offset: offset.clone(), - target: ImlValue::Name { offset, name }, + target: ImlValue::Name { + offset, + name, + generic: false, + } + .try_resolve(compiler), args, } - .try_resolve(compiler) - } - - /// Try to resolve immediatelly, otherwise push shared reference to compiler's unresolved ImlOp. - fn try_resolve(mut self, compiler: &mut Compiler) -> ImlOp { - if self.resolve(compiler) { - return self; - } - - let shared = ImlOp::Shared(Rc::new(RefCell::new(self))); - compiler.usages.push(shared.clone()); - shared - } - - /// Resolve volatile values from intermediate instructions - pub(in crate::compiler) fn resolve(&mut self, compiler: &mut Compiler) -> bool { - match self { - Self::Shared(op) => return op.borrow_mut().resolve(compiler), - Self::Load { target, .. } | Self::Call { target, .. } => target.resolve(compiler), - _ => false, - } } /// Turns ImlOp construct into a kleene (none-or-many) occurence. @@ -300,30 +286,12 @@ impl ImlOp { match self { ImlOp::Nop => {} ImlOp::Op(op) => ops.push(op.clone()), - ImlOp::Shared(op) => { - op.borrow().compile(ops, linker); - } ImlOp::Load { offset, target } => { if let Some(offset) = offset { ops.push(Op::Offset(Box::new(*offset))); } - ops.push(match target { - ImlValue::Name { name, .. } => { - linker.errors.push(Error::new( - *offset, - format!("Use of unresolved symbol '{}'", name), - )); - - Op::Nop - } - ImlValue::Generic { name, .. } => { - unreachable!("Use of generic symbol '{}'", name) - } - ImlValue::Local(addr) => Op::LoadFast(*addr), - ImlValue::Global(addr) => Op::LoadGlobal(*addr), - value => linker.push(value), - }); + ops.push(target.compile_to_load(linker)); } ImlOp::Call { offset, @@ -334,90 +302,7 @@ impl ImlOp { ops.push(Op::Offset(Box::new(*offset))); } - match target { - ImlValue::Name { name, .. } => { - linker.errors.push(Error::new( - *offset, - format!("Call to unresolved symbol '{}'", name), - )); - } - ImlValue::Generic { name, .. } => { - unreachable!("Call to generic '{}' may not occur", name) - } - ImlValue::Local(addr) => ops.push(Op::LoadFast(*addr)), - ImlValue::Global(addr) => ops.push(Op::LoadGlobal(*addr)), - value => { - // When value is a parselet, check for accepted constant configuration - if let ImlValue::Parselet { - parselet: _, - constants, - } = value - { - if !constants.is_empty() { - let mut required = Vec::new(); - - for (name, default) in constants { - if matches!(default, ImlValue::Void) { - required.push(name.to_string()); - } - } - - if !required.is_empty() { - linker.errors.push(Error::new( - offset.clone(), - format!( - "On call to '{}', missing generic constants for {}", - value, - required.join(", ") - ), - )); - - return 0; - } - } - } - - let idx = linker.register(value); - - match args { - // Qualified call - Some((args, nargs)) => { - if *args == 0 && !*nargs { - ops.push(Op::CallStatic(idx)); - } else if *args > 0 && !*nargs { - ops.push(Op::CallStaticArg(Box::new((idx, *args)))); - } else { - ops.push(Op::CallStaticArgNamed(Box::new((idx, *args)))); - } - } - // Call or load - None => { - if value.is_callable(true) { - ops.push(Op::CallStatic(idx)); - } else { - ops.push(Op::LoadStatic(idx)); - } - } - } - - return ops.len() - start; - } - } - - match args { - // Qualified call - Some((args, nargs)) => { - if *args == 0 && *nargs == false { - ops.push(Op::Call); - } else if *args > 0 && *nargs == false { - ops.push(Op::CallArg(*args)); - } else { - ops.push(Op::CallArgNamed(*args)); - } - } - // Call or load - None => ops.push(Op::CallOrCopy), - } + ops.extend(target.compile_to_call(linker, *args)); } ImlOp::Alt { alts } => { let mut ret = Vec::new(); @@ -668,70 +553,7 @@ impl ImlOp { configs: &mut HashMap, ) -> Option { match self { - ImlOp::Shared(op) => op.borrow().finalize(visited, configs), - ImlOp::Call { target: callee, .. } => { - match callee { - ImlValue::Parselet { - parselet, - constants, - } if constants.is_empty() => { - match parselet.try_borrow() { - // In case the parselet cannot be borrowed, it is left-recursive! - Err(_) => Some(Consumable { - leftrec: true, - nullable: false, - }), - // Otherwise dive into this parselet... - Ok(parselet) => { - // ... only if it's generally flagged to be consuming. - if !parselet.consuming { - return None; - } - - let id = parselet.id(); - - if visited.contains(&id) { - Some(Consumable { - leftrec: false, - nullable: configs[&id].nullable, - }) - } else { - visited.insert(id); - - if !configs.contains_key(&id) { - configs.insert( - id, - Consumable { - leftrec: false, - nullable: false, - }, - ); - } - - //fixme: Finalize on begin and end as well! - let ret = parselet.body.finalize(visited, configs); - - visited.remove(&id); - - ret - } - } - } - } - ImlValue::Value(callee) => { - if callee.is_consuming() { - //println!("{:?} called, which is nullable={:?}", callee, callee.is_nullable()); - Some(Consumable { - leftrec: false, - nullable: callee.is_nullable(), - }) - } else { - None - } - } - _ => None, - } - } + ImlOp::Call { target, .. } => target.finalize(visited, configs), ImlOp::Alt { alts } => { let mut leftrec = false; let mut nullable = false; @@ -848,7 +670,6 @@ impl ImlOp { // Query along ImlOp structure match self { - ImlOp::Shared(op) => op.borrow().walk(func), ImlOp::Alt { alts: items } | ImlOp::Seq { seq: items, .. } => { for item in items { if !item.walk(func) { diff --git a/src/compiler/iml/imlparselet.rs b/src/compiler/iml/imlparselet.rs index 25f2fa1b..89d13a18 100644 --- a/src/compiler/iml/imlparselet.rs +++ b/src/compiler/iml/imlparselet.rs @@ -11,6 +11,7 @@ pub(in crate::compiler) struct ImlParselet { pub consuming: bool, // Flag if parselet is consuming pub severity: u8, // Capture push severity pub name: Option, // Parselet's name from source (for debugging) + pub constants: IndexMap, // Parselet generic signature with default configuration pub signature: IndexMap, // Argument signature with default arguments pub locals: usize, // Total number of local variables present (including arguments) pub begin: ImlOp, // Begin-operations diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index cbb5e482..025d7b47 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -2,9 +2,11 @@ use super::*; use crate::compiler::Compiler; use crate::reader::Offset; -use crate::value::{Object, RefValue}; -use indexmap::IndexMap; +use crate::value::{Object, RefValue, Value}; +use crate::Error; +use num::ToPrimitive; use std::cell::RefCell; +use std::collections::{HashMap, HashSet}; use std::rc::Rc; /** Intermediate value @@ -15,43 +17,55 @@ from the syntax tree and symbol table information. These can be memory locations of variables, static values, functions or values whose definition is still pending. */ + #[derive(Clone, PartialEq, Eq)] pub(in crate::compiler) enum ImlValue { - Void, // Compile-time void - Value(RefValue), // Compile-time static value - Parselet { - // Compile-time parselet instance - parselet: Rc>, // The parselet definition - constants: IndexMap, // Optional parselet instance configuation - }, - Local(usize), // Runtime local variable - Global(usize), // Runtime global variable + Void, + Shared(Rc>), + + // Resolved + Value(RefValue), // Compile-time static value + Local(usize), // Runtime local variable + Global(usize), // Runtime global variable + Parselet(Rc>), // Parselet // Unresolved - Generic { - // Generic placeholder - offset: Option, - name: String, - }, Name { // Unresolved name - offset: Option, - name: String, + offset: Option, // Source offset + generic: bool, // Generic name, to be resolved during compilation + name: String, // Identifier }, Instance { - offset: Option, // Source offset - target: Box, // Instance target + // Parselet instance + offset: Option, // Source offset + target: Box, // Instance target config: Vec<(Option, Option, ImlValue)>, // Constant configuration }, } impl ImlValue { + /// Try to resolve immediatelly, otherwise push shared reference to compiler's unresolved ImlValue. + pub fn try_resolve(mut self, compiler: &mut Compiler) -> Self { + if self.resolve(compiler) { + return self; + } + + let shared = Self::Shared(Rc::new(RefCell::new(self))); + compiler.usages.push(shared.clone()); + shared + } + + /// Resolve unresolved ImlValue. Returns true in case the provided value is (already) resolved. pub fn resolve(&mut self, compiler: &mut Compiler) -> bool { match self { + Self::Shared(value) => value.borrow_mut().resolve(compiler), Self::Name { name, .. } => { if let Some(value) = compiler.get(&name) { *self = value; - return true; + true + } else { + false } } /* @@ -77,17 +91,14 @@ impl ImlValue { todo!(); } */ - _ => {} + _ => true, } - - false } pub fn into_refvalue(self) -> RefValue { - if let Self::Value(value) = self { - value - } else { - panic!("{:?} cannot be unwrapped", self) + match self { + Self::Value(value) => value, + _ => unreachable!("{:?} cannot be unwrapped", self), } } @@ -95,8 +106,9 @@ impl ImlValue { /// and when its callable if with or without arguments. pub fn is_callable(&self, without_arguments: bool) -> bool { match self { + Self::Shared(value) => value.borrow().is_callable(without_arguments), Self::Value(value) => value.is_callable(without_arguments), - Self::Parselet { parselet, .. } => { + Self::Parselet(parselet) => { let parselet = parselet.borrow(); if without_arguments { @@ -116,25 +128,221 @@ impl ImlValue { /// Check whether intermediate value represents consuming pub fn is_consuming(&self) -> bool { match self { - Self::Name { name, .. } | Self::Generic { name, .. } => { - crate::utils::identifier_is_consumable(name) - } + Self::Shared(value) => value.borrow().is_consuming(), + Self::Name { name, .. } => crate::utils::identifier_is_consumable(name), Self::Value(value) => value.is_consuming(), - Self::Parselet { parselet, .. } => parselet.borrow().consuming, + Self::Parselet(parselet) => parselet.borrow().consuming, _ => false, } } + + // Finalize... this is a work in progress... + pub fn finalize( + &self, + visited: &mut HashSet, + configs: &mut HashMap, + ) -> Option { + match self { + ImlValue::Shared(value) => value.borrow().finalize(visited, configs), + ImlValue::Parselet(parselet) => { + match parselet.try_borrow() { + // In case the parselet cannot be borrowed, it is left-recursive! + Err(_) => Some(Consumable { + leftrec: true, + nullable: false, + }), + // Otherwise dive into this parselet... + Ok(parselet) => { + // ... only if it's generally flagged to be consuming. + if !parselet.consuming { + return None; + } + + let id = parselet.id(); + + if visited.contains(&id) { + Some(Consumable { + leftrec: false, + nullable: configs[&id].nullable, + }) + } else { + visited.insert(id); + + if !configs.contains_key(&id) { + configs.insert( + id, + Consumable { + leftrec: false, + nullable: false, + }, + ); + } + + //fixme: Finalize on begin and end as well! + let ret = parselet.body.finalize(visited, configs); + + visited.remove(&id); + + ret + } + } + } + } + ImlValue::Value(callee) => { + if callee.is_consuming() { + //println!("{:?} called, which is nullable={:?}", callee, callee.is_nullable()); + Some(Consumable { + leftrec: false, + nullable: callee.is_nullable(), + }) + } else { + None + } + } + _ => None, + } + } + + /** Generates code for a value load. For several, oftenly used values, there exists a direct operation pendant, + which makes storing the static value obsolete. Otherwise, *value* will be registered and a static load operation + is returned. */ + pub fn compile_to_load(&self, linker: &mut Linker) -> Op { + match self { + ImlValue::Shared(value) => return value.borrow().compile_to_load(linker), + ImlValue::Value(value) => match &*value.borrow() { + Value::Void => return Op::PushVoid, + Value::Null => return Op::PushNull, + Value::True => return Op::PushTrue, + Value::False => return Op::PushFalse, + Value::Int(i) => match i.to_i64() { + Some(0) => return Op::Push0, + Some(1) => return Op::Push1, + _ => {} + }, + _ => {} + }, + ImlValue::Parselet(_) => {} + ImlValue::Local(addr) => return Op::LoadFast(*addr), + ImlValue::Global(addr) => return Op::LoadGlobal(*addr), + ImlValue::Name { name, .. } => { + linker.errors.push(Error::new( + None, + format!("Use of unresolved symbol '{}'", name), + )); + + return Op::Nop; + } + _ => todo!(), + } + + Op::LoadStatic(linker.register(self)) + } + + /** Generates code for a value call. */ + pub fn compile_to_call(&self, linker: &mut Linker, args: Option<(usize, bool)>) -> Vec { + let mut ops = Vec::new(); + + match self { + ImlValue::Shared(value) => return value.borrow().compile_to_call(linker, args), + ImlValue::Local(addr) => ops.push(Op::LoadFast(*addr)), + ImlValue::Global(addr) => ops.push(Op::LoadGlobal(*addr)), + ImlValue::Name { name, .. } => { + linker.errors.push(Error::new( + None, + format!("Call to unresolved symbol '{}'", name), + )); + return ops; + } + value => { + // When value is a parselet, check for accepted constant configuration + /* + if let ImlValue::Parselet { + parselet: _, + constants, + } = value + { + if !constants.is_empty() { + let mut required = Vec::new(); + + for (name, default) in constants { + if matches!(default, ImlValue::Void) { + required.push(name.to_string()); + } + } + + if !required.is_empty() { + linker.errors.push(Error::new( + offset.clone(), + format!( + "On call to '{}', missing generic constants for {}", + value, + required.join(", ") + ), + )); + + return 0; + } + } + } + */ + + let idx = linker.register(value); + + match args { + // Qualified call + Some((args, nargs)) => { + if args == 0 && !nargs { + ops.push(Op::CallStatic(idx)); + } else if args > 0 && !nargs { + ops.push(Op::CallStaticArg(Box::new((idx, args)))); + } else { + ops.push(Op::CallStaticArgNamed(Box::new((idx, args)))); + } + } + // Call or load + None => { + if value.is_callable(true) { + ops.push(Op::CallStatic(idx)); + } else { + ops.push(Op::LoadStatic(idx)); + } + } + } + + return ops; + } + _ => todo!(), + } + + match args { + // Qualified call + Some((args, nargs)) => { + if args == 0 && nargs == false { + ops.push(Op::Call); + } else if args > 0 && nargs == false { + ops.push(Op::CallArg(args)); + } else { + ops.push(Op::CallArgNamed(args)); + } + } + // Call or load + None => ops.push(Op::CallOrCopy), + } + + ops + } } impl std::fmt::Debug for ImlValue { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Void => write!(f, "void"), + Self::Shared(value) => value.borrow().fmt(f), Self::Value(v) => v.borrow().fmt(f), Self::Parselet { .. } => write!(f, "{}", self), Self::Local(addr) => write!(f, "local@{}", addr), Self::Global(addr) => write!(f, "global@{}", addr), - Self::Name { name, .. } | Self::Generic { name, .. } => write!(f, "{}", name), + Self::Name { name, .. } => write!(f, "{}", name), _ => todo!(), } } @@ -144,12 +352,9 @@ impl std::fmt::Display for ImlValue { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Void => write!(f, "void"), - Self::Name { name, .. } | Self::Generic { name, .. } => write!(f, "{}", name), + Self::Shared(value) => value.borrow().fmt(f), Self::Value(value) => write!(f, "{}", value.repr()), - Self::Parselet { - parselet, - constants, - } => { + Self::Parselet(parselet) => { write!( f, "{}", @@ -160,6 +365,7 @@ impl std::fmt::Display for ImlValue { .unwrap_or("") )?; + /* if !constants.is_empty() { write!(f, "<")?; for (i, (name, value)) in constants.iter().enumerate() { @@ -171,9 +377,11 @@ impl std::fmt::Display for ImlValue { } write!(f, ">")?; } + */ Ok(()) } + Self::Name { name, .. } => write!(f, "{}", name), _ => todo!(), } } @@ -186,13 +394,10 @@ impl std::hash::Hash for ImlValue { state.write_u8('v' as u8); v.hash(state) } - Self::Parselet { - parselet, - constants, - } => { + Self::Parselet(parselet) => { state.write_u8('p' as u8); parselet.borrow().hash(state); - constants.iter().collect::>().hash(state); + //constants.iter().collect::>().hash(state); } other => unreachable!("{:?} is unhashable", other), } diff --git a/src/compiler/linker.rs b/src/compiler/linker.rs index 7a031240..e63b779f 100644 --- a/src/compiler/linker.rs +++ b/src/compiler/linker.rs @@ -2,11 +2,10 @@ use super::iml::*; use crate::value::Parselet; -use crate::vm::{Op, Program}; +use crate::vm::Program; use crate::Error; use crate::{RefValue, Value}; use indexmap::IndexMap; -use num::ToPrimitive; use std::collections::HashMap; /// The linker glues compiled intermediate program and finalized VM program together. @@ -32,34 +31,16 @@ impl Linker { In case *value* already exists inside of the current statics, the existing index will be returned, otherwiese the value is cloned and put into the statics table. */ pub fn register(&mut self, value: &ImlValue) -> usize { + if let ImlValue::Shared(value) = value { + return self.register(&*value.borrow()); + } + match self.statics.get_index_of(value) { None => self.statics.insert_full(value.clone(), None).0, Some(idx) => idx, } } - /** Generates code for a value push. For several, oftenly used values, there exists a direct operation pendant, - which makes storing the static value obsolete. Otherwise, *value* will be registered and a static load operation - is returned. */ - pub fn push(&mut self, value: &ImlValue) -> Op { - if let ImlValue::Value(value) = value { - match &*value.borrow() { - Value::Void => return Op::PushVoid, - Value::Null => return Op::PushNull, - Value::True => return Op::PushTrue, - Value::False => return Op::PushFalse, - Value::Int(i) => match i.to_i64() { - Some(0) => return Op::Push0, - Some(1) => return Op::Push1, - _ => {} - }, - _ => {} - } - } - - Op::LoadStatic(self.register(value)) - } - /** Turns the Linker and its intermediate values into a final VM program ready for execution. The finalization is done according to a grammar's point of view, as this is one of Tokays core features. @@ -77,13 +58,7 @@ impl Linker { let outer = { match self.statics.get_index(i).unwrap() { (_, Some(_)) => unreachable!(), // may not exist! - ( - ImlValue::Parselet { - parselet, - constants, - }, - None, - ) if constants.is_empty() => parselet.clone(), + (ImlValue::Parselet(parselet), None) => parselet.clone(), _ => { i += 1; continue; @@ -172,11 +147,7 @@ impl Linker { .into_iter() .map(|(iml, parselet)| { if let Some(mut parselet) = parselet { - if let ImlValue::Parselet { - parselet: imlparselet, - .. - } = iml - { + if let ImlValue::Parselet(imlparselet) = iml { parselet.consuming = configs .get(&imlparselet.borrow().id()) .map_or(None, |config| Some(config.leftrec)); From bef35e8e12e44c4a55aa0d8f62d6543bb82412da Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Mon, 17 Apr 2023 00:20:24 +0200 Subject: [PATCH 16/94] . --- src/compiler/iml/imlvalue.rs | 67 ++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index 025d7b47..c80c6b02 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -387,6 +387,73 @@ impl std::fmt::Display for ImlValue { } } +/* +impl std::fmt::Display for ImlValue { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Void => write!(f, "void"), + Self::Unknown(name) | Self::Undefined(name) => write!(f, "{}", name), + Self::Value(value) => write!(f, "{}", value.repr()), + Self::Parselet { + parselet, + constants, + } => { + write!( + f, + "{}", + parselet + .borrow() + .name + .as_deref() + .unwrap_or("") + )?; + + if !constants.is_empty() { + write!(f, "<")?; + for (i, (name, value)) in constants.iter().enumerate() { + if matches!(value, ImlValue::Void) { + write!(f, "{}{}", if i > 0 { ", " } else { "" }, name)?; + } else { + write!(f, "{}{}:{}", if i > 0 { ", " } else { "" }, name, value)?; + } + } + write!(f, ">")?; + } + + Ok(()) + } + Self::Local(addr) => write!(f, "local@{}", addr), + Self::Global(addr) => write!(f, "global@{}", addr), + Self::Symbol { + name, + gen_by_seq, + gen_by_name, + } => { + write!(f, "{}", target)?; + + let mut first = true; + + for item in gen_by_seq { + write!(f, "{}{}", if !first { ", " } else { "<" }, item)?; + first = false; + } + + for (name, item) in gen_by_name.iter() { + write!(f, "{}{}:{}", if !first { ", " } else { "<" }, name, item)?; + first = false; + } + + if !first { + write!(f, ">")?; + } + + Ok(()) + } + } + } +} +*/ + impl std::hash::Hash for ImlValue { fn hash(&self, state: &mut H) { match self { From 77e247f356e3b629d1140d8ae3b4b64b63dbf59c Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Mon, 17 Apr 2023 22:58:48 +0200 Subject: [PATCH 17/94] wip: Rename Linker into ImlProgram ImlProgram is a better name for the linker, to glue intermediate parts together into a program. --- src/compiler/compiler.rs | 2 +- src/compiler/iml/imlop.rs | 32 +++++++++++-------- src/compiler/{linker.rs => iml/imlprogram.rs} | 19 ++++++----- src/compiler/iml/imlvalue.rs | 22 +++++++------ src/compiler/iml/mod.rs | 3 +- src/compiler/mod.rs | 2 -- 6 files changed, 43 insertions(+), 37 deletions(-) rename src/compiler/{linker.rs => iml/imlprogram.rs} (90%) diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index a49d89b2..7b91e63b 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -109,7 +109,7 @@ impl Compiler { println!("--- Intermediate main ---\n{:#?}", main); } - match Linker::new(main).finalize() { + match ImlProgram::new(main).compile() { Ok(program) => { if self.debug > 1 { println!("--- Finalized program ---"); diff --git a/src/compiler/iml/imlop.rs b/src/compiler/iml/imlop.rs index 1fbab907..d912def1 100644 --- a/src/compiler/iml/imlop.rs +++ b/src/compiler/iml/imlop.rs @@ -280,7 +280,11 @@ impl ImlOp { } /// Compile ImlOp construct into Op instructions of the resulting Tokay VM program - pub(in crate::compiler) fn compile(&self, ops: &mut Vec, linker: &mut Linker) -> usize { + pub(in crate::compiler) fn compile( + &self, + ops: &mut Vec, + program: &mut ImlProgram, + ) -> usize { let start = ops.len(); match self { @@ -291,7 +295,7 @@ impl ImlOp { ops.push(Op::Offset(Box::new(*offset))); } - ops.push(target.compile_to_load(linker)); + ops.push(target.compile_to_load(program)); } ImlOp::Call { offset, @@ -302,7 +306,7 @@ impl ImlOp { ops.push(Op::Offset(Box::new(*offset))); } - ops.extend(target.compile_to_call(linker, *args)); + ops.extend(target.compile_to_call(program, *args)); } ImlOp::Alt { alts } => { let mut ret = Vec::new(); @@ -312,7 +316,7 @@ impl ImlOp { while let Some(item) = iter.next() { let mut alt = Vec::new(); - item.compile(&mut alt, linker); + item.compile(&mut alt, program); // When branch has more than one item, Frame it. if iter.len() > 0 { @@ -353,7 +357,7 @@ impl ImlOp { } ImlOp::Seq { seq, collection } => { for item in seq.iter() { - item.compile(ops, linker); + item.compile(ops, program); } // Check if the sequence exists of more than one operational instruction @@ -388,13 +392,13 @@ impl ImlOp { } // Then-part - let mut jump = then_part.compile(ops, linker) + 1; + let mut jump = then_part.compile(ops, program) + 1; if !*peek { let mut else_ops = Vec::new(); // Else-part - if else_part.compile(&mut else_ops, linker) > 0 { + if else_part.compile(&mut else_ops, program) > 0 { ops.push(Op::Forward(else_ops.len() + 1)); jump += 1; ops.extend(else_ops); @@ -419,9 +423,9 @@ impl ImlOp { let consuming: Option = None; // fixme: Currently not sure if this is an issue. let mut repeat = Vec::new(); - initial.compile(ops, linker); + initial.compile(ops, program); - if condition.compile(&mut repeat, linker) > 0 { + if condition.compile(&mut repeat, program) > 0 { if *iterator { repeat.push(Op::ForwardIfNotVoid(2)); } else { @@ -431,7 +435,7 @@ impl ImlOp { repeat.push(Op::Break); } - body.compile(&mut repeat, linker); + body.compile(&mut repeat, program); let len = repeat.len() + if consuming.is_some() { 3 } else { 2 }; ops.push(Op::Loop(len)); @@ -451,7 +455,7 @@ impl ImlOp { // DEPRECATED BELOW!!! ImlOp::Expect { body, msg } => { let mut expect = Vec::new(); - body.compile(&mut expect, linker); + body.compile(&mut expect, program); ops.push(Op::Frame(expect.len() + 2)); @@ -468,7 +472,7 @@ impl ImlOp { } ImlOp::Not { body } => { let mut body_ops = Vec::new(); - let body_len = body.compile(&mut body_ops, linker); + let body_len = body.compile(&mut body_ops, program); ops.push(Op::Frame(body_len + 3)); ops.extend(body_ops); ops.push(Op::Close); @@ -477,13 +481,13 @@ impl ImlOp { } ImlOp::Peek { body } => { ops.push(Op::Frame(0)); - body.compile(ops, linker); + body.compile(ops, program); ops.push(Op::Reset); ops.push(Op::Close); } ImlOp::Repeat { body, min, max } => { let mut body_ops = Vec::new(); - let body_len = body.compile(&mut body_ops, linker); + let body_len = body.compile(&mut body_ops, program); match (min, max) { (0, 0) => { diff --git a/src/compiler/linker.rs b/src/compiler/iml/imlprogram.rs similarity index 90% rename from src/compiler/linker.rs rename to src/compiler/iml/imlprogram.rs index e63b779f..f867e474 100644 --- a/src/compiler/linker.rs +++ b/src/compiler/iml/imlprogram.rs @@ -1,32 +1,31 @@ -//! The Linker merges ImlParselets into statics, which are transferred into a VM program afterwards. +//! ImlProgram glues ImlParselets, ImlOps and ImlValues together to produce a VM program. -use super::iml::*; +use super::*; use crate::value::Parselet; use crate::vm::Program; use crate::Error; -use crate::{RefValue, Value}; +use crate::RefValue; use indexmap::IndexMap; use std::collections::HashMap; -/// The linker glues compiled intermediate program and finalized VM program together. #[derive(Debug)] -pub(in crate::compiler) struct Linker { +pub(in crate::compiler) struct ImlProgram { statics: IndexMap>, // static values with optional final parselet replacement pub errors: Vec, // errors collected during finalization (at least these are unresolved symbols) } -impl Linker { +impl ImlProgram { pub fn new(main: ImlValue) -> Self { let mut statics = IndexMap::new(); statics.insert(main, None); - Linker { + ImlProgram { statics, errors: Vec::new(), } } - /** Registers an ImlValue in the Linker's statics map and returns its index. + /** Registers an ImlValue in the ImlProgram's statics map and returns its index. In case *value* already exists inside of the current statics, the existing index will be returned, otherwiese the value is cloned and put into the statics table. */ @@ -41,13 +40,13 @@ impl Linker { } } - /** Turns the Linker and its intermediate values into a final VM program ready for execution. + /** Turns the ImlProgram and its intermediate values into a final VM program ready for execution. The finalization is done according to a grammar's point of view, as this is one of Tokays core features. This closure algorithm runs until no more changes on any parselet configurations regarding left-recursive and nullable parselet detection occurs. */ - pub fn finalize(mut self) -> Result> { + pub fn compile(mut self) -> Result> { let mut finalize = Vec::new(); // list of consuming parselets required to be finalized // Loop until end of statics is reached diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index c80c6b02..f3d54a89 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -206,9 +206,9 @@ impl ImlValue { /** Generates code for a value load. For several, oftenly used values, there exists a direct operation pendant, which makes storing the static value obsolete. Otherwise, *value* will be registered and a static load operation is returned. */ - pub fn compile_to_load(&self, linker: &mut Linker) -> Op { + pub fn compile_to_load(&self, program: &mut ImlProgram) -> Op { match self { - ImlValue::Shared(value) => return value.borrow().compile_to_load(linker), + ImlValue::Shared(value) => return value.borrow().compile_to_load(program), ImlValue::Value(value) => match &*value.borrow() { Value::Void => return Op::PushVoid, Value::Null => return Op::PushNull, @@ -225,7 +225,7 @@ impl ImlValue { ImlValue::Local(addr) => return Op::LoadFast(*addr), ImlValue::Global(addr) => return Op::LoadGlobal(*addr), ImlValue::Name { name, .. } => { - linker.errors.push(Error::new( + program.errors.push(Error::new( None, format!("Use of unresolved symbol '{}'", name), )); @@ -235,19 +235,23 @@ impl ImlValue { _ => todo!(), } - Op::LoadStatic(linker.register(self)) + Op::LoadStatic(program.register(self)) } /** Generates code for a value call. */ - pub fn compile_to_call(&self, linker: &mut Linker, args: Option<(usize, bool)>) -> Vec { + pub fn compile_to_call( + &self, + program: &mut ImlProgram, + args: Option<(usize, bool)>, + ) -> Vec { let mut ops = Vec::new(); match self { - ImlValue::Shared(value) => return value.borrow().compile_to_call(linker, args), + ImlValue::Shared(value) => return value.borrow().compile_to_call(program, args), ImlValue::Local(addr) => ops.push(Op::LoadFast(*addr)), ImlValue::Global(addr) => ops.push(Op::LoadGlobal(*addr)), ImlValue::Name { name, .. } => { - linker.errors.push(Error::new( + program.errors.push(Error::new( None, format!("Call to unresolved symbol '{}'", name), )); @@ -271,7 +275,7 @@ impl ImlValue { } if !required.is_empty() { - linker.errors.push(Error::new( + program.errors.push(Error::new( offset.clone(), format!( "On call to '{}', missing generic constants for {}", @@ -286,7 +290,7 @@ impl ImlValue { } */ - let idx = linker.register(value); + let idx = program.register(value); match args { // Qualified call diff --git a/src/compiler/iml/mod.rs b/src/compiler/iml/mod.rs index ee0c3317..9d227574 100644 --- a/src/compiler/iml/mod.rs +++ b/src/compiler/iml/mod.rs @@ -3,11 +3,12 @@ pub use crate::vm::*; mod imlop; mod imlparselet; +mod imlprogram; mod imlvalue; -use super::Linker; pub(in crate::compiler) use imlop::*; pub(in crate::compiler) use imlparselet::*; +pub(in crate::compiler) use imlprogram::*; pub(in crate::compiler) use imlvalue::*; #[derive(Debug, Clone, PartialEq, PartialOrd)] diff --git a/src/compiler/mod.rs b/src/compiler/mod.rs index 2ac1483a..dc29a4c1 100644 --- a/src/compiler/mod.rs +++ b/src/compiler/mod.rs @@ -3,12 +3,10 @@ pub(crate) mod ast; mod compiler; mod iml; -mod linker; mod parser; use compiler::*; use iml::*; -use linker::*; use parser::*; pub(crate) use ast::identifier_is_valid; From 45a1570ebfb0816d138f9e2e2c3b9f95cb2aa546 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Mon, 17 Apr 2023 23:26:40 +0200 Subject: [PATCH 18/94] wip: Change signature of ImlOp::compile --- src/compiler/iml/imlop.rs | 24 ++++++++++++------------ src/compiler/iml/imlprogram.rs | 6 +++--- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/compiler/iml/imlop.rs b/src/compiler/iml/imlop.rs index d912def1..25b20f3a 100644 --- a/src/compiler/iml/imlop.rs +++ b/src/compiler/iml/imlop.rs @@ -282,8 +282,8 @@ impl ImlOp { /// Compile ImlOp construct into Op instructions of the resulting Tokay VM program pub(in crate::compiler) fn compile( &self, - ops: &mut Vec, program: &mut ImlProgram, + ops: &mut Vec, ) -> usize { let start = ops.len(); @@ -316,7 +316,7 @@ impl ImlOp { while let Some(item) = iter.next() { let mut alt = Vec::new(); - item.compile(&mut alt, program); + item.compile(program, &mut alt); // When branch has more than one item, Frame it. if iter.len() > 0 { @@ -357,7 +357,7 @@ impl ImlOp { } ImlOp::Seq { seq, collection } => { for item in seq.iter() { - item.compile(ops, program); + item.compile(program, ops); } // Check if the sequence exists of more than one operational instruction @@ -392,13 +392,13 @@ impl ImlOp { } // Then-part - let mut jump = then_part.compile(ops, program) + 1; + let mut jump = then_part.compile(program, ops) + 1; if !*peek { let mut else_ops = Vec::new(); // Else-part - if else_part.compile(&mut else_ops, program) > 0 { + if else_part.compile(program, &mut else_ops) > 0 { ops.push(Op::Forward(else_ops.len() + 1)); jump += 1; ops.extend(else_ops); @@ -423,9 +423,9 @@ impl ImlOp { let consuming: Option = None; // fixme: Currently not sure if this is an issue. let mut repeat = Vec::new(); - initial.compile(ops, program); + initial.compile(program, ops); - if condition.compile(&mut repeat, program) > 0 { + if condition.compile(program, &mut repeat) > 0 { if *iterator { repeat.push(Op::ForwardIfNotVoid(2)); } else { @@ -435,7 +435,7 @@ impl ImlOp { repeat.push(Op::Break); } - body.compile(&mut repeat, program); + body.compile(program, &mut repeat); let len = repeat.len() + if consuming.is_some() { 3 } else { 2 }; ops.push(Op::Loop(len)); @@ -455,7 +455,7 @@ impl ImlOp { // DEPRECATED BELOW!!! ImlOp::Expect { body, msg } => { let mut expect = Vec::new(); - body.compile(&mut expect, program); + body.compile(program, &mut expect); ops.push(Op::Frame(expect.len() + 2)); @@ -472,7 +472,7 @@ impl ImlOp { } ImlOp::Not { body } => { let mut body_ops = Vec::new(); - let body_len = body.compile(&mut body_ops, program); + let body_len = body.compile(program, &mut body_ops); ops.push(Op::Frame(body_len + 3)); ops.extend(body_ops); ops.push(Op::Close); @@ -481,13 +481,13 @@ impl ImlOp { } ImlOp::Peek { body } => { ops.push(Op::Frame(0)); - body.compile(ops, program); + body.compile(program, ops); ops.push(Op::Reset); ops.push(Op::Close); } ImlOp::Repeat { body, min, max } => { let mut body_ops = Vec::new(); - let body_len = body.compile(&mut body_ops, program); + let body_len = body.compile(program, &mut body_ops); match (min, max) { (0, 0) => { diff --git a/src/compiler/iml/imlprogram.rs b/src/compiler/iml/imlprogram.rs index f867e474..cbb1d233 100644 --- a/src/compiler/iml/imlprogram.rs +++ b/src/compiler/iml/imlprogram.rs @@ -76,9 +76,9 @@ impl ImlProgram { let mut end = Vec::new(); let mut body = Vec::new(); - parselet.begin.compile(&mut begin, &mut self); - parselet.end.compile(&mut end, &mut self); - parselet.body.compile(&mut body, &mut self); + parselet.begin.compile(&mut self, &mut begin); + parselet.end.compile(&mut self, &mut end); + parselet.body.compile(&mut self, &mut body); // Compile parselet from intermediate parselet let parselet = Parselet::new( From 19acfce885349bf6c3e35057079224773ec7026a Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Tue, 18 Apr 2023 00:36:55 +0200 Subject: [PATCH 19/94] wip: Move finalization entirely into ImlProgram --- src/compiler/iml/imlop.rs | 119 ----------------- src/compiler/iml/imlparselet.rs | 17 --- src/compiler/iml/imlprogram.rs | 218 +++++++++++++++++++++++++++++++- src/compiler/iml/imlvalue.rs | 68 ---------- src/compiler/iml/mod.rs | 6 - 5 files changed, 213 insertions(+), 215 deletions(-) diff --git a/src/compiler/iml/imlop.rs b/src/compiler/iml/imlop.rs index 25b20f3a..85b538ea 100644 --- a/src/compiler/iml/imlop.rs +++ b/src/compiler/iml/imlop.rs @@ -5,7 +5,6 @@ use crate::reader::Offset; use crate::utils; use crate::Compiler; use crate::{Object, RefValue}; -use std::collections::{HashMap, HashSet}; #[derive(Debug, Clone)] pub(in crate::compiler) enum ImlOp { @@ -547,124 +546,6 @@ impl ImlOp { ops.len() - start } - /** Finalize ImlOp construct on a grammar's point of view. - - This function must be run inside of a closure on every parselet until no more changes occur. - */ - pub(in crate::compiler) fn finalize( - &self, - visited: &mut HashSet, - configs: &mut HashMap, - ) -> Option { - match self { - ImlOp::Call { target, .. } => target.finalize(visited, configs), - ImlOp::Alt { alts } => { - let mut leftrec = false; - let mut nullable = false; - let mut consumes = false; - - for alt in alts { - if let Some(consumable) = alt.finalize(visited, configs) { - leftrec |= consumable.leftrec; - nullable |= consumable.nullable; - consumes = true; - } - } - - if consumes { - Some(Consumable { leftrec, nullable }) - } else { - None - } - } - ImlOp::Seq { seq, .. } => { - let mut leftrec = false; - let mut nullable = true; - let mut consumes = false; - - for item in seq { - if !nullable { - break; - } - - if let Some(consumable) = item.finalize(visited, configs) { - leftrec |= consumable.leftrec; - nullable = consumable.nullable; - consumes = true; - } - } - - if consumes { - Some(Consumable { leftrec, nullable }) - } else { - None - } - } - ImlOp::If { then, else_, .. } => { - let then = then.finalize(visited, configs); - - if let Some(else_) = else_.finalize(visited, configs) { - if let Some(then) = then { - Some(Consumable { - leftrec: then.leftrec || else_.leftrec, - nullable: then.nullable || else_.nullable, - }) - } else { - Some(else_) - } - } else { - then - } - } - ImlOp::Loop { - initial, - condition, - body, - .. - } => { - let mut ret: Option = None; - - for part in [initial, condition, body] { - let part = part.finalize(visited, configs); - - if let Some(part) = part { - ret = if let Some(ret) = ret { - Some(Consumable { - leftrec: ret.leftrec || part.leftrec, - nullable: ret.nullable || part.nullable, - }) - } else { - Some(part) - } - } - } - - ret - } - - // DEPRECATED BELOW!!! - ImlOp::Expect { body, .. } => body.finalize(visited, configs), - ImlOp::Not { body } | ImlOp::Peek { body } => body.finalize(visited, configs), - ImlOp::Repeat { body, min, .. } => { - if let Some(consumable) = body.finalize(visited, configs) { - if *min == 0 { - Some(Consumable { - leftrec: consumable.leftrec, - nullable: true, - }) - } else { - Some(consumable) - } - } else { - None - } - } - - // default case - _ => None, - } - } - /// Generic querying function taking a closure that either walks on the tree or stops. pub fn walk(&self, func: &mut dyn FnMut(&Self) -> bool) -> bool { // Call closure on current ImlOp, break on false return diff --git a/src/compiler/iml/imlparselet.rs b/src/compiler/iml/imlparselet.rs index 89d13a18..e1d25610 100644 --- a/src/compiler/iml/imlparselet.rs +++ b/src/compiler/iml/imlparselet.rs @@ -2,7 +2,6 @@ use super::*; use crate::reader::Offset; use indexmap::IndexMap; -use std::collections::{HashMap, HashSet}; #[derive(Debug)] /// Intermediate parselet @@ -24,22 +23,6 @@ impl ImlParselet { pub fn id(&self) -> usize { self as *const ImlParselet as usize } - - pub fn finalize(&self, configs: &mut HashMap) -> bool { - let mut changes = false; - let id = self.id(); - - for part in [&self.begin, &self.body, &self.end] { - if let Some(result) = part.finalize(&mut HashSet::new(), configs) { - if !configs.contains_key(&id) || configs[&id] < result { - configs.insert(id, result); - changes = true; - } - } - } - - changes - } } impl std::cmp::PartialEq for ImlParselet { diff --git a/src/compiler/iml/imlprogram.rs b/src/compiler/iml/imlprogram.rs index cbb1d233..7de49436 100644 --- a/src/compiler/iml/imlprogram.rs +++ b/src/compiler/iml/imlprogram.rs @@ -4,13 +4,20 @@ use super::*; use crate::value::Parselet; use crate::vm::Program; use crate::Error; -use crate::RefValue; +use crate::{Object, RefValue}; use indexmap::IndexMap; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; + +#[derive(Debug, Clone, PartialEq, PartialOrd)] +struct Consumable { + pub leftrec: bool, // Flag if consumable is left-recursive + pub nullable: bool, // Flag if consumable is nullable +} #[derive(Debug)] pub(in crate::compiler) struct ImlProgram { statics: IndexMap>, // static values with optional final parselet replacement + configs: HashMap, // Consumable configuration per parselet pub errors: Vec, // errors collected during finalization (at least these are unresolved symbols) } @@ -21,6 +28,7 @@ impl ImlProgram { ImlProgram { statics, + configs: HashMap::new(), errors: Vec::new(), } } @@ -112,7 +120,6 @@ impl ImlProgram { // Now, start the closure algorithm with left-recursive and nullable configurations for all parselets // put into the finalize list. - let mut configs = HashMap::new(); // hash-map of static-id and consuming configuration let mut changes = true; while changes { @@ -120,7 +127,7 @@ impl ImlProgram { for parselet in &finalize { let parselet = parselet.borrow_mut(); // parselet is locked for left-recursion detection - changes |= parselet.finalize(&mut configs); + changes |= self.finalize_parselet(&*parselet); } } @@ -147,7 +154,8 @@ impl ImlProgram { .map(|(iml, parselet)| { if let Some(mut parselet) = parselet { if let ImlValue::Parselet(imlparselet) = iml { - parselet.consuming = configs + parselet.consuming = self + .configs .get(&imlparselet.borrow().id()) .map_or(None, |config| Some(config.leftrec)); @@ -169,4 +177,204 @@ impl ImlProgram { Ok(Program::new(statics)) } + + fn finalize_parselet(&mut self, parselet: &ImlParselet) -> bool { + fn finalize_value( + value: &ImlValue, + visited: &mut HashSet, + configs: &mut HashMap, + ) -> Option { + match value { + ImlValue::Shared(value) => finalize_value(&*value.borrow(), visited, configs), + ImlValue::Parselet(parselet) => { + match parselet.try_borrow() { + // In case the parselet cannot be borrowed, it is left-recursive! + Err(_) => Some(Consumable { + leftrec: true, + nullable: false, + }), + // Otherwise dive into this parselet... + Ok(parselet) => { + // ... only if it's generally flagged to be consuming. + if !parselet.consuming { + return None; + } + + let id = parselet.id(); + + if visited.contains(&id) { + Some(Consumable { + leftrec: false, + nullable: configs[&id].nullable, + }) + } else { + visited.insert(id); + + if !configs.contains_key(&id) { + configs.insert( + id, + Consumable { + leftrec: false, + nullable: false, + }, + ); + } + + //fixme: Finalize on begin and end as well! + let ret = finalize_op(&parselet.body, visited, configs); + + visited.remove(&id); + + ret + } + } + } + } + ImlValue::Value(callee) => { + if callee.is_consuming() { + //println!("{:?} called, which is nullable={:?}", callee, callee.is_nullable()); + Some(Consumable { + leftrec: false, + nullable: callee.is_nullable(), + }) + } else { + None + } + } + _ => None, + } + } + + /** Finalize ImlOp construct on a grammar's point of view. + + This function must be run inside of a closure on every parselet until no more changes occur. + */ + fn finalize_op( + op: &ImlOp, + visited: &mut HashSet, + configs: &mut HashMap, + ) -> Option { + match op { + ImlOp::Call { target, .. } => finalize_value(target, visited, configs), + ImlOp::Alt { alts } => { + let mut leftrec = false; + let mut nullable = false; + let mut consumes = false; + + for alt in alts { + if let Some(consumable) = finalize_op(alt, visited, configs) { + leftrec |= consumable.leftrec; + nullable |= consumable.nullable; + consumes = true; + } + } + + if consumes { + Some(Consumable { leftrec, nullable }) + } else { + None + } + } + ImlOp::Seq { seq, .. } => { + let mut leftrec = false; + let mut nullable = true; + let mut consumes = false; + + for item in seq { + if !nullable { + break; + } + + if let Some(consumable) = finalize_op(item, visited, configs) { + leftrec |= consumable.leftrec; + nullable = consumable.nullable; + consumes = true; + } + } + + if consumes { + Some(Consumable { leftrec, nullable }) + } else { + None + } + } + ImlOp::If { then, else_, .. } => { + let then = finalize_op(then, visited, configs); + + if let Some(else_) = finalize_op(else_, visited, configs) { + if let Some(then) = then { + Some(Consumable { + leftrec: then.leftrec || else_.leftrec, + nullable: then.nullable || else_.nullable, + }) + } else { + Some(else_) + } + } else { + then + } + } + ImlOp::Loop { + initial, + condition, + body, + .. + } => { + let mut ret: Option = None; + + for part in [initial, condition, body] { + let part = finalize_op(part, visited, configs); + + if let Some(part) = part { + ret = if let Some(ret) = ret { + Some(Consumable { + leftrec: ret.leftrec || part.leftrec, + nullable: ret.nullable || part.nullable, + }) + } else { + Some(part) + } + } + } + + ret + } + + // DEPRECATED BELOW!!! + ImlOp::Expect { body, .. } => finalize_op(body, visited, configs), + ImlOp::Not { body } | ImlOp::Peek { body } => finalize_op(body, visited, configs), + ImlOp::Repeat { body, min, .. } => { + if let Some(consumable) = finalize_op(body, visited, configs) { + if *min == 0 { + Some(Consumable { + leftrec: consumable.leftrec, + nullable: true, + }) + } else { + Some(consumable) + } + } else { + None + } + } + + // default case + _ => None, + } + } + + let mut changes = false; + let id = parselet.id(); + + for part in [&parselet.begin, &parselet.body, &parselet.end] { + if let Some(result) = finalize_op(part, &mut HashSet::new(), &mut self.configs) { + if !self.configs.contains_key(&id) || self.configs[&id] < result { + self.configs.insert(id, result); + changes = true; + } + } + } + + changes + } } diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index f3d54a89..b5a6fc47 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -6,7 +6,6 @@ use crate::value::{Object, RefValue, Value}; use crate::Error; use num::ToPrimitive; use std::cell::RefCell; -use std::collections::{HashMap, HashSet}; use std::rc::Rc; /** Intermediate value @@ -136,73 +135,6 @@ impl ImlValue { } } - // Finalize... this is a work in progress... - pub fn finalize( - &self, - visited: &mut HashSet, - configs: &mut HashMap, - ) -> Option { - match self { - ImlValue::Shared(value) => value.borrow().finalize(visited, configs), - ImlValue::Parselet(parselet) => { - match parselet.try_borrow() { - // In case the parselet cannot be borrowed, it is left-recursive! - Err(_) => Some(Consumable { - leftrec: true, - nullable: false, - }), - // Otherwise dive into this parselet... - Ok(parselet) => { - // ... only if it's generally flagged to be consuming. - if !parselet.consuming { - return None; - } - - let id = parselet.id(); - - if visited.contains(&id) { - Some(Consumable { - leftrec: false, - nullable: configs[&id].nullable, - }) - } else { - visited.insert(id); - - if !configs.contains_key(&id) { - configs.insert( - id, - Consumable { - leftrec: false, - nullable: false, - }, - ); - } - - //fixme: Finalize on begin and end as well! - let ret = parselet.body.finalize(visited, configs); - - visited.remove(&id); - - ret - } - } - } - } - ImlValue::Value(callee) => { - if callee.is_consuming() { - //println!("{:?} called, which is nullable={:?}", callee, callee.is_nullable()); - Some(Consumable { - leftrec: false, - nullable: callee.is_nullable(), - }) - } else { - None - } - } - _ => None, - } - } - /** Generates code for a value load. For several, oftenly used values, there exists a direct operation pendant, which makes storing the static value obsolete. Otherwise, *value* will be registered and a static load operation is returned. */ diff --git a/src/compiler/iml/mod.rs b/src/compiler/iml/mod.rs index 9d227574..74af7b02 100644 --- a/src/compiler/iml/mod.rs +++ b/src/compiler/iml/mod.rs @@ -10,9 +10,3 @@ pub(in crate::compiler) use imlop::*; pub(in crate::compiler) use imlparselet::*; pub(in crate::compiler) use imlprogram::*; pub(in crate::compiler) use imlvalue::*; - -#[derive(Debug, Clone, PartialEq, PartialOrd)] -pub(in crate::compiler) struct Consumable { - pub leftrec: bool, // Flag if consumable is left-recursive - pub nullable: bool, // Flag if consumable is nullable -} From de8932e5b1a4689ef811d0ae981d0410fcb4676b Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Tue, 18 Apr 2023 18:42:31 +0200 Subject: [PATCH 20/94] wip: Separated self-contained ImlProgram::finalize() --- src/compiler/iml/imlprogram.rs | 138 +++++++++++++++++---------------- 1 file changed, 71 insertions(+), 67 deletions(-) diff --git a/src/compiler/iml/imlprogram.rs b/src/compiler/iml/imlprogram.rs index 7de49436..f70484d7 100644 --- a/src/compiler/iml/imlprogram.rs +++ b/src/compiler/iml/imlprogram.rs @@ -6,9 +6,11 @@ use crate::vm::Program; use crate::Error; use crate::{Object, RefValue}; use indexmap::IndexMap; +use std::cell::RefCell; use std::collections::{HashMap, HashSet}; +use std::rc::Rc; -#[derive(Debug, Clone, PartialEq, PartialOrd)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)] struct Consumable { pub leftrec: bool, // Flag if consumable is left-recursive pub nullable: bool, // Flag if consumable is nullable @@ -118,29 +120,7 @@ impl ImlProgram { i += 1; } - // Now, start the closure algorithm with left-recursive and nullable configurations for all parselets - // put into the finalize list. - let mut changes = true; - - while changes { - changes = false; - - for parselet in &finalize { - let parselet = parselet.borrow_mut(); // parselet is locked for left-recursion detection - changes |= self.finalize_parselet(&*parselet); - } - } - - /* - for p in &finalize { - let parselet = p.borrow(); - println!( - "{} consuming={:?}", - parselet.name.as_deref().unwrap_or("(unnamed)"), - configs[&parselet.id()] - ); - } - */ + self.finalize(&finalize); // Stop on any raised error if !self.errors.is_empty() { @@ -178,7 +158,7 @@ impl ImlProgram { Ok(Program::new(statics)) } - fn finalize_parselet(&mut self, parselet: &ImlParselet) -> bool { + fn finalize(&mut self, parselets: &Vec>>) { fn finalize_value( value: &ImlValue, visited: &mut HashSet, @@ -194,40 +174,7 @@ impl ImlProgram { nullable: false, }), // Otherwise dive into this parselet... - Ok(parselet) => { - // ... only if it's generally flagged to be consuming. - if !parselet.consuming { - return None; - } - - let id = parselet.id(); - - if visited.contains(&id) { - Some(Consumable { - leftrec: false, - nullable: configs[&id].nullable, - }) - } else { - visited.insert(id); - - if !configs.contains_key(&id) { - configs.insert( - id, - Consumable { - leftrec: false, - nullable: false, - }, - ); - } - - //fixme: Finalize on begin and end as well! - let ret = finalize_op(&parselet.body, visited, configs); - - visited.remove(&id); - - ret - } - } + Ok(parselet) => finalize_parselet(&parselet, visited, configs), } } ImlValue::Value(callee) => { @@ -363,18 +310,75 @@ impl ImlProgram { } } - let mut changes = false; - let id = parselet.id(); + fn finalize_parselet( + parselet: &ImlParselet, + visited: &mut HashSet, + configs: &mut HashMap, + ) -> Option { + // ... only if it's generally flagged to be consuming. + if !parselet.consuming { + return None; + } + + let id = parselet.id(); - for part in [&parselet.begin, &parselet.body, &parselet.end] { - if let Some(result) = finalize_op(part, &mut HashSet::new(), &mut self.configs) { - if !self.configs.contains_key(&id) || self.configs[&id] < result { - self.configs.insert(id, result); - changes = true; + if visited.contains(&id) { + Some(Consumable { + leftrec: false, + nullable: configs[&id].nullable, + }) + } else { + visited.insert(id); + + if !configs.contains_key(&id) { + configs.insert( + id, + Consumable { + leftrec: false, + nullable: false, + }, + ); } + + for part in [&parselet.begin, &parselet.body, &parselet.end] { + if let Some(result) = finalize_op(part, visited, configs) { + if configs[&id] < result { + configs.insert(id, result); + } + } + } + + visited.remove(&id); + Some(Consumable { + leftrec: false, + nullable: configs[&id].nullable, + }) + } + } + + // Now, start the closure algorithm with left-recursive and nullable configurations for all parselets + // put into the finalize list. + let mut changes = true; + + while changes { + changes = false; + + for parselet in parselets { + let parselet = parselet.borrow_mut(); // parselet is locked for left-recursion detection + changes = finalize_parselet(&*parselet, &mut HashSet::new(), &mut self.configs) + > self.configs.get(&parselet.id()).cloned(); } } - changes + /* + for parselet in parselets { + let parselet = parselet.borrow(); + println!( + "{} consuming={:?}", + parselet.name.as_deref().unwrap_or("(unnamed)"), + self.configs[&parselet.id()] + ); + } + */ } } From 6566246cf8f41133bbed359c95f0f8b39e7d9e81 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Tue, 18 Apr 2023 20:33:41 +0200 Subject: [PATCH 21/94] ImlProgram::finalize() improvements --- src/compiler/compiler.rs | 2 +- src/compiler/iml/imlprogram.rs | 52 ++++++++++++++++++++-------------- 2 files changed, 32 insertions(+), 22 deletions(-) diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index 7b91e63b..185079d5 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -75,7 +75,7 @@ impl Compiler { }; // Compile with the default prelude - if false && with_prelude { + if with_prelude { compiler .compile_from_str(include_str!("../prelude.tok")) .unwrap(); // this should panic in case of an error! diff --git a/src/compiler/iml/imlprogram.rs b/src/compiler/iml/imlprogram.rs index f70484d7..5c749531 100644 --- a/src/compiler/iml/imlprogram.rs +++ b/src/compiler/iml/imlprogram.rs @@ -10,16 +10,9 @@ use std::cell::RefCell; use std::collections::{HashMap, HashSet}; use std::rc::Rc; -#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)] -struct Consumable { - pub leftrec: bool, // Flag if consumable is left-recursive - pub nullable: bool, // Flag if consumable is nullable -} - #[derive(Debug)] pub(in crate::compiler) struct ImlProgram { statics: IndexMap>, // static values with optional final parselet replacement - configs: HashMap, // Consumable configuration per parselet pub errors: Vec, // errors collected during finalization (at least these are unresolved symbols) } @@ -30,7 +23,6 @@ impl ImlProgram { ImlProgram { statics, - configs: HashMap::new(), errors: Vec::new(), } } @@ -120,7 +112,7 @@ impl ImlProgram { i += 1; } - self.finalize(&finalize); + let leftrec = self.finalize(finalize); // Stop on any raised error if !self.errors.is_empty() { @@ -134,10 +126,9 @@ impl ImlProgram { .map(|(iml, parselet)| { if let Some(mut parselet) = parselet { if let ImlValue::Parselet(imlparselet) = iml { - parselet.consuming = self - .configs + parselet.consuming = leftrec .get(&imlparselet.borrow().id()) - .map_or(None, |config| Some(config.leftrec)); + .map_or(None, |leftrec| Some(*leftrec)); //println!("{:?} => {:?}", imlparselet.borrow().name, parselet.consuming); } @@ -158,7 +149,25 @@ impl ImlProgram { Ok(Program::new(statics)) } - fn finalize(&mut self, parselets: &Vec>>) { + /** Internal function to finalize a program on a grammar's point of view. + + The finalization performs a closure algorithm on every parselet to detect + + - nullable parselets + - left-recursive parselets + + until no more changes occur. + + It can only be run on a previously compiled program without any unresolved usages. + */ + fn finalize(&mut self, parselets: Vec>>) -> HashMap { + #[derive(Debug, Clone, Copy, PartialEq, PartialOrd)] + struct Consumable { + leftrec: bool, + nullable: bool, + } + + // Finalize ImlValue fn finalize_value( value: &ImlValue, visited: &mut HashSet, @@ -192,10 +201,7 @@ impl ImlProgram { } } - /** Finalize ImlOp construct on a grammar's point of view. - - This function must be run inside of a closure on every parselet until no more changes occur. - */ + // Finalize ImlOp fn finalize_op( op: &ImlOp, visited: &mut HashSet, @@ -310,6 +316,7 @@ impl ImlProgram { } } + // Finalize ImlParselet fn finalize_parselet( parselet: &ImlParselet, visited: &mut HashSet, @@ -359,14 +366,15 @@ impl ImlProgram { // Now, start the closure algorithm with left-recursive and nullable configurations for all parselets // put into the finalize list. let mut changes = true; + let mut configs = HashMap::new(); while changes { changes = false; - for parselet in parselets { + for parselet in &parselets { let parselet = parselet.borrow_mut(); // parselet is locked for left-recursion detection - changes = finalize_parselet(&*parselet, &mut HashSet::new(), &mut self.configs) - > self.configs.get(&parselet.id()).cloned(); + changes = finalize_parselet(&*parselet, &mut HashSet::new(), &mut configs) + > configs.get(&parselet.id()).cloned(); } } @@ -376,9 +384,11 @@ impl ImlProgram { println!( "{} consuming={:?}", parselet.name.as_deref().unwrap_or("(unnamed)"), - self.configs[&parselet.id()] + configs[&parselet.id()] ); } */ + + configs.into_iter().map(|(k, v)| (k, v.leftrec)).collect() } } From 594613642a6500dd15129acfbbf37e3ddd3d3c2b Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Wed, 19 Apr 2023 20:10:44 +0200 Subject: [PATCH 22/94] ImlValue::compile_load() & ImlValue::compile_call() --- src/compiler/iml/imlop.rs | 4 ++-- src/compiler/iml/imlvalue.rs | 40 ++++++++++++++++-------------------- 2 files changed, 20 insertions(+), 24 deletions(-) diff --git a/src/compiler/iml/imlop.rs b/src/compiler/iml/imlop.rs index 85b538ea..afb62a67 100644 --- a/src/compiler/iml/imlop.rs +++ b/src/compiler/iml/imlop.rs @@ -294,7 +294,7 @@ impl ImlOp { ops.push(Op::Offset(Box::new(*offset))); } - ops.push(target.compile_to_load(program)); + target.compile_load(program, ops); } ImlOp::Call { offset, @@ -305,7 +305,7 @@ impl ImlOp { ops.push(Op::Offset(Box::new(*offset))); } - ops.extend(target.compile_to_call(program, *args)); + target.compile_call(program, *args, ops); } ImlOp::Alt { alts } => { let mut ret = Vec::new(); diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index b5a6fc47..32ab9e96 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -138,48 +138,47 @@ impl ImlValue { /** Generates code for a value load. For several, oftenly used values, there exists a direct operation pendant, which makes storing the static value obsolete. Otherwise, *value* will be registered and a static load operation is returned. */ - pub fn compile_to_load(&self, program: &mut ImlProgram) -> Op { + pub fn compile_load(&self, program: &mut ImlProgram, ops: &mut Vec) { match self { - ImlValue::Shared(value) => return value.borrow().compile_to_load(program), + ImlValue::Shared(value) => return value.borrow().compile_load(program, ops), ImlValue::Value(value) => match &*value.borrow() { - Value::Void => return Op::PushVoid, - Value::Null => return Op::PushNull, - Value::True => return Op::PushTrue, - Value::False => return Op::PushFalse, + Value::Void => return ops.push(Op::PushVoid), + Value::Null => return ops.push(Op::PushNull), + Value::True => return ops.push(Op::PushTrue), + Value::False => return ops.push(Op::PushFalse), Value::Int(i) => match i.to_i64() { - Some(0) => return Op::Push0, - Some(1) => return Op::Push1, + Some(0) => return ops.push(Op::Push0), + Some(1) => return ops.push(Op::Push1), _ => {} }, _ => {} }, ImlValue::Parselet(_) => {} - ImlValue::Local(addr) => return Op::LoadFast(*addr), - ImlValue::Global(addr) => return Op::LoadGlobal(*addr), + ImlValue::Local(addr) => return ops.push(Op::LoadFast(*addr)), + ImlValue::Global(addr) => return ops.push(Op::LoadGlobal(*addr)), ImlValue::Name { name, .. } => { program.errors.push(Error::new( None, format!("Use of unresolved symbol '{}'", name), )); - return Op::Nop; + return; } _ => todo!(), } - Op::LoadStatic(program.register(self)) + ops.push(Op::LoadStatic(program.register(self))) } /** Generates code for a value call. */ - pub fn compile_to_call( + pub fn compile_call( &self, program: &mut ImlProgram, args: Option<(usize, bool)>, - ) -> Vec { - let mut ops = Vec::new(); - + ops: &mut Vec, + ) { match self { - ImlValue::Shared(value) => return value.borrow().compile_to_call(program, args), + ImlValue::Shared(value) => return value.borrow().compile_call(program, args, ops), ImlValue::Local(addr) => ops.push(Op::LoadFast(*addr)), ImlValue::Global(addr) => ops.push(Op::LoadGlobal(*addr)), ImlValue::Name { name, .. } => { @@ -187,7 +186,7 @@ impl ImlValue { None, format!("Call to unresolved symbol '{}'", name), )); - return ops; + return; } value => { // When value is a parselet, check for accepted constant configuration @@ -221,7 +220,6 @@ impl ImlValue { } } */ - let idx = program.register(value); match args { @@ -245,7 +243,7 @@ impl ImlValue { } } - return ops; + return; } _ => todo!(), } @@ -264,8 +262,6 @@ impl ImlValue { // Call or load None => ops.push(Op::CallOrCopy), } - - ops } } From a5c658fce133f98c660176b75da1a5aa9d87daef Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Wed, 19 Apr 2023 20:57:13 +0200 Subject: [PATCH 23/94] ImlOp::compile_to_vec() --- src/compiler/iml/imlop.rs | 13 ++++++++----- src/compiler/iml/imlprogram.rs | 14 +++----------- 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/src/compiler/iml/imlop.rs b/src/compiler/iml/imlop.rs index afb62a67..e0d5cb01 100644 --- a/src/compiler/iml/imlop.rs +++ b/src/compiler/iml/imlop.rs @@ -279,11 +279,14 @@ impl ImlOp { } /// Compile ImlOp construct into Op instructions of the resulting Tokay VM program - pub(in crate::compiler) fn compile( - &self, - program: &mut ImlProgram, - ops: &mut Vec, - ) -> usize { + pub fn compile_to_vec(&self, program: &mut ImlProgram) -> Vec { + let mut ops = Vec::new(); + self.compile(program, &mut ops); + ops + } + + /// Compile ImlOp construct into Op instructions of the resulting Tokay VM program + pub fn compile(&self, program: &mut ImlProgram, ops: &mut Vec) -> usize { let start = ops.len(); match self { diff --git a/src/compiler/iml/imlprogram.rs b/src/compiler/iml/imlprogram.rs index 5c749531..611fbaa6 100644 --- a/src/compiler/iml/imlprogram.rs +++ b/src/compiler/iml/imlprogram.rs @@ -74,14 +74,6 @@ impl ImlProgram { finalize.push(outer.clone()); } - let mut begin = Vec::new(); - let mut end = Vec::new(); - let mut body = Vec::new(); - - parselet.begin.compile(&mut self, &mut begin); - parselet.end.compile(&mut self, &mut end); - parselet.body.compile(&mut self, &mut body); - // Compile parselet from intermediate parselet let parselet = Parselet::new( parselet.name.clone(), @@ -103,9 +95,9 @@ impl ImlProgram { }) .collect(), parselet.locals, - begin, - end, - body, + parselet.begin.compile_to_vec(&mut self), + parselet.end.compile_to_vec(&mut self), + parselet.body.compile_to_vec(&mut self), ); *self.statics.get_index_mut(i).unwrap().1 = Some(parselet); From a48e3404a06227373e53544b98eaa992a729e77c Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Fri, 21 Apr 2023 22:38:02 +0200 Subject: [PATCH 24/94] Improving debug / ImlSharedParselet ImlSharedParselet can't run into infinite recursion when its recursively dumped. --- src/compiler/compiler.rs | 6 ++-- src/compiler/iml/imlop.rs | 2 +- src/compiler/iml/imlparselet.rs | 54 +++++++++++++++++++++++++++++++++ src/compiler/iml/imlprogram.rs | 4 +-- src/compiler/iml/imlvalue.rs | 35 ++++----------------- 5 files changed, 64 insertions(+), 37 deletions(-) diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index 185079d5..6442388b 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -7,9 +7,7 @@ use crate::reader::*; use crate::value::{RefValue, Token}; use crate::vm::*; use indexmap::IndexMap; -use std::cell::RefCell; use std::collections::HashMap; -use std::rc::Rc; /** Compiler symbolic scopes. @@ -75,7 +73,7 @@ impl Compiler { }; // Compile with the default prelude - if with_prelude { + if false && with_prelude { compiler .compile_from_str(include_str!("../prelude.tok")) .unwrap(); // this should panic in case of an error! @@ -288,7 +286,7 @@ impl Compiler { self.scopes.push(scope); } - ImlValue::Parselet(Rc::new(RefCell::new(parselet))) + ImlValue::Parselet(ImlSharedParselet::new(parselet)) } else { unreachable!(); } diff --git a/src/compiler/iml/imlop.rs b/src/compiler/iml/imlop.rs index e0d5cb01..22d011dc 100644 --- a/src/compiler/iml/imlop.rs +++ b/src/compiler/iml/imlop.rs @@ -218,7 +218,7 @@ impl ImlOp { ImlOp::Call { target, .. } | ImlOp::Load { target, .. } if target.is_consuming() => { - Some(format!("{:?}", target).to_string()) + Some(format!("{}", target)) } ImlOp::Seq { seq, .. } => { let mut txt = None; diff --git a/src/compiler/iml/imlparselet.rs b/src/compiler/iml/imlparselet.rs index e1d25610..2d41c666 100644 --- a/src/compiler/iml/imlparselet.rs +++ b/src/compiler/iml/imlparselet.rs @@ -2,6 +2,8 @@ use super::*; use crate::reader::Offset; use indexmap::IndexMap; +use std::cell::RefCell; +use std::rc::Rc; #[derive(Debug)] /// Intermediate parselet @@ -25,6 +27,16 @@ impl ImlParselet { } } +impl std::fmt::Display for ImlParselet { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + self.name.as_deref().unwrap_or("") + ) + } +} + impl std::cmp::PartialEq for ImlParselet { // It satisfies to just compare the parselet's memory address for equality fn eq(&self, other: &Self) -> bool { @@ -46,3 +58,45 @@ impl std::cmp::PartialOrd for ImlParselet { self.id().partial_cmp(&other.id()) } } + +/// Shared ImlParselet +#[derive(Clone, Eq, PartialEq)] +pub(in crate::compiler) struct ImlSharedParselet(Rc>); + +impl ImlSharedParselet { + pub fn new(parselet: ImlParselet) -> Self { + Self(Rc::new(RefCell::new(parselet))) + } +} + +impl std::fmt::Debug for ImlSharedParselet { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let ret = self.0.try_borrow_mut().is_ok(); + + if ret { + self.0.borrow().fmt(f) + } else { + write!(f, "{}", self.0.borrow()) + } + } +} + +impl std::fmt::Display for ImlSharedParselet { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0.borrow()) + } +} + +impl std::ops::Deref for ImlSharedParselet { + type Target = Rc>; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl std::ops::DerefMut for ImlSharedParselet { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} diff --git a/src/compiler/iml/imlprogram.rs b/src/compiler/iml/imlprogram.rs index 611fbaa6..069b24de 100644 --- a/src/compiler/iml/imlprogram.rs +++ b/src/compiler/iml/imlprogram.rs @@ -6,9 +6,7 @@ use crate::vm::Program; use crate::Error; use crate::{Object, RefValue}; use indexmap::IndexMap; -use std::cell::RefCell; use std::collections::{HashMap, HashSet}; -use std::rc::Rc; #[derive(Debug)] pub(in crate::compiler) struct ImlProgram { @@ -152,7 +150,7 @@ impl ImlProgram { It can only be run on a previously compiled program without any unresolved usages. */ - fn finalize(&mut self, parselets: Vec>>) -> HashMap { + fn finalize(&mut self, parselets: Vec) -> HashMap { #[derive(Debug, Clone, Copy, PartialEq, PartialOrd)] struct Consumable { leftrec: bool, diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index 32ab9e96..b3dea62b 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -17,16 +17,16 @@ These can be memory locations of variables, static values, functions or values w still pending. */ -#[derive(Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub(in crate::compiler) enum ImlValue { Void, Shared(Rc>), // Resolved - Value(RefValue), // Compile-time static value - Local(usize), // Runtime local variable - Global(usize), // Runtime global variable - Parselet(Rc>), // Parselet + Value(RefValue), // Compile-time static value + Local(usize), // Runtime local variable + Global(usize), // Runtime global variable + Parselet(ImlSharedParselet), // Parselet // Unresolved Name { @@ -265,21 +265,6 @@ impl ImlValue { } } -impl std::fmt::Debug for ImlValue { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Void => write!(f, "void"), - Self::Shared(value) => value.borrow().fmt(f), - Self::Value(v) => v.borrow().fmt(f), - Self::Parselet { .. } => write!(f, "{}", self), - Self::Local(addr) => write!(f, "local@{}", addr), - Self::Global(addr) => write!(f, "global@{}", addr), - Self::Name { name, .. } => write!(f, "{}", name), - _ => todo!(), - } - } -} - impl std::fmt::Display for ImlValue { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { @@ -287,15 +272,7 @@ impl std::fmt::Display for ImlValue { Self::Shared(value) => value.borrow().fmt(f), Self::Value(value) => write!(f, "{}", value.repr()), Self::Parselet(parselet) => { - write!( - f, - "{}", - parselet - .borrow() - .name - .as_deref() - .unwrap_or("") - )?; + write!(f, "{}", parselet)?; /* if !constants.is_empty() { From 24cdce6766e8b7d88a8906d58de572cba4d0e794 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Mon, 24 Apr 2023 22:40:56 +0200 Subject: [PATCH 25/94] wip: Improving code --- src/compiler/iml/imlparselet.rs | 34 +++++++++++++++++++++++++++++---- src/compiler/iml/imlprogram.rs | 27 ++------------------------ 2 files changed, 32 insertions(+), 29 deletions(-) diff --git a/src/compiler/iml/imlparselet.rs b/src/compiler/iml/imlparselet.rs index 2d41c666..0b202baf 100644 --- a/src/compiler/iml/imlparselet.rs +++ b/src/compiler/iml/imlparselet.rs @@ -1,6 +1,7 @@ //! Intermediate representation of a parselet use super::*; use crate::reader::Offset; +use crate::value::Parselet; use indexmap::IndexMap; use std::cell::RefCell; use std::rc::Rc; @@ -25,6 +26,32 @@ impl ImlParselet { pub fn id(&self) -> usize { self as *const ImlParselet as usize } + + pub fn compile(&self, program: &mut ImlProgram) -> Parselet { + Parselet::new( + self.name.clone(), + None, + self.severity, + self.signature + .iter() + .map(|var_value| { + ( + // Copy parameter name + var_value.0.clone(), + // Register default value, if any + match &var_value.1 { + ImlValue::Void => None, + value => Some(program.register(value)), + }, + ) + }) + .collect(), + self.locals, + self.begin.compile_to_vec(program), + self.end.compile_to_vec(program), + self.body.compile_to_vec(program), + ) + } } impl std::fmt::Display for ImlParselet { @@ -71,12 +98,11 @@ impl ImlSharedParselet { impl std::fmt::Debug for ImlSharedParselet { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let ret = self.0.try_borrow_mut().is_ok(); - - if ret { + // Avoid endless recursion in case of recursive parselets + if self.0.try_borrow_mut().is_ok() { self.0.borrow().fmt(f) } else { - write!(f, "{}", self.0.borrow()) + write!(f, "{} (recursive)", self.0.borrow()) } } } diff --git a/src/compiler/iml/imlprogram.rs b/src/compiler/iml/imlprogram.rs index 069b24de..0280cb86 100644 --- a/src/compiler/iml/imlprogram.rs +++ b/src/compiler/iml/imlprogram.rs @@ -72,31 +72,8 @@ impl ImlProgram { finalize.push(outer.clone()); } - // Compile parselet from intermediate parselet - let parselet = Parselet::new( - parselet.name.clone(), - None, - parselet.severity, - parselet - .signature - .iter() - .map(|var_value| { - ( - // Copy parameter name - var_value.0.clone(), - // Register default value, if any - match &var_value.1 { - ImlValue::Void => None, - value => Some(self.register(value)), - }, - ) - }) - .collect(), - parselet.locals, - parselet.begin.compile_to_vec(&mut self), - parselet.end.compile_to_vec(&mut self), - parselet.body.compile_to_vec(&mut self), - ); + // Compile VM parselet from intermediate parselet + let parselet = parselet.compile(&mut self); *self.statics.get_index_mut(i).unwrap().1 = Some(parselet); i += 1; From 952f0dc9c82e573efbec6c9ac5ef7b1cc40de62d Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Tue, 25 Apr 2023 10:04:21 +0200 Subject: [PATCH 26/94] wip: Clean-up, annotations and refactor --- src/compiler/ast.rs | 2 + src/compiler/iml/imlop.rs | 4 +- src/compiler/iml/imlprogram.rs | 1 + src/compiler/iml/imlvalue.rs | 69 ++++++++++++++++++---------------- 4 files changed, 42 insertions(+), 34 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index 21cdb4aa..a1b104ed 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -43,6 +43,8 @@ pub(in crate::compiler) fn traverse(compiler: &mut Compiler, ast: &RefValue) -> // Extract offset positions into an Offset structure fn traverse_node_offset(node: &Dict) -> Option { + //return None; // Temporarily discard any Offset information (shortens debug output) + let offset = node .get_str("offset") .and_then(|offset| Some(offset.to_usize().unwrap())); diff --git a/src/compiler/iml/imlop.rs b/src/compiler/iml/imlop.rs index 22d011dc..ced7ee92 100644 --- a/src/compiler/iml/imlop.rs +++ b/src/compiler/iml/imlop.rs @@ -297,7 +297,7 @@ impl ImlOp { ops.push(Op::Offset(Box::new(*offset))); } - target.compile_load(program, ops); + target.compile_load(program, &offset, ops); } ImlOp::Call { offset, @@ -308,7 +308,7 @@ impl ImlOp { ops.push(Op::Offset(Box::new(*offset))); } - target.compile_call(program, *args, ops); + target.compile_call(program, *args, &offset, ops); } ImlOp::Alt { alts } => { let mut ret = Vec::new(); diff --git a/src/compiler/iml/imlprogram.rs b/src/compiler/iml/imlprogram.rs index 0280cb86..c1f4ca9a 100644 --- a/src/compiler/iml/imlprogram.rs +++ b/src/compiler/iml/imlprogram.rs @@ -52,6 +52,7 @@ impl ImlProgram { // Loop until end of statics is reached let mut i = 0; + // self.statics grows inside of this while loop, therefore this condition. while i < self.statics.len() { // Pick only intermediate parselets, other static values are directly moved let outer = { diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index b3dea62b..c49e22da 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -59,7 +59,7 @@ impl ImlValue { pub fn resolve(&mut self, compiler: &mut Compiler) -> bool { match self { Self::Shared(value) => value.borrow_mut().resolve(compiler), - Self::Name { name, .. } => { + Self::Name { name, generic, .. } if !*generic => { if let Some(value) = compiler.get(&name) { *self = value; true @@ -90,7 +90,7 @@ impl ImlValue { todo!(); } */ - _ => true, + _ => false, } } @@ -138,10 +138,16 @@ impl ImlValue { /** Generates code for a value load. For several, oftenly used values, there exists a direct operation pendant, which makes storing the static value obsolete. Otherwise, *value* will be registered and a static load operation is returned. */ - pub fn compile_load(&self, program: &mut ImlProgram, ops: &mut Vec) { + pub fn compile_load( + &self, + program: &mut ImlProgram, + offset: &Option, + ops: &mut Vec, + ) { match self { - ImlValue::Shared(value) => return value.borrow().compile_load(program, ops), + ImlValue::Shared(value) => return value.borrow().compile_load(program, offset, ops), ImlValue::Value(value) => match &*value.borrow() { + // Some frequently used values have built-in push operations Value::Void => return ops.push(Op::PushVoid), Value::Null => return ops.push(Op::PushNull), Value::True => return ops.push(Op::PushTrue), @@ -158,7 +164,7 @@ impl ImlValue { ImlValue::Global(addr) => return ops.push(Op::LoadGlobal(*addr)), ImlValue::Name { name, .. } => { program.errors.push(Error::new( - None, + offset.clone(), format!("Use of unresolved symbol '{}'", name), )); @@ -175,51 +181,50 @@ impl ImlValue { &self, program: &mut ImlProgram, args: Option<(usize, bool)>, + offset: &Option, ops: &mut Vec, ) { match self { - ImlValue::Shared(value) => return value.borrow().compile_call(program, args, ops), + ImlValue::Shared(value) => { + return value.borrow().compile_call(program, args, offset, ops) + } ImlValue::Local(addr) => ops.push(Op::LoadFast(*addr)), ImlValue::Global(addr) => ops.push(Op::LoadGlobal(*addr)), ImlValue::Name { name, .. } => { program.errors.push(Error::new( - None, + offset.clone(), format!("Call to unresolved symbol '{}'", name), )); return; } value => { - // When value is a parselet, check for accepted constant configuration /* - if let ImlValue::Parselet { - parselet: _, - constants, - } = value - { - if !constants.is_empty() { - let mut required = Vec::new(); - - for (name, default) in constants { - if matches!(default, ImlValue::Void) { - required.push(name.to_string()); - } - } + // When value is a parselet, check for accepted constant configuration + if let ImlValue::Parselet(parselet) = value { + let mut required = Vec::new(); + let parselet = parselet.borrow(); - if !required.is_empty() { - program.errors.push(Error::new( - offset.clone(), - format!( - "On call to '{}', missing generic constants for {}", - value, - required.join(", ") - ), - )); - - return 0; + for (name, default) in &parselet.constants { + if matches!(default, ImlValue::Void) { + required.push(name.to_string()); } } + + if !required.is_empty() { + program.errors.push(Error::new( + offset.clone(), + format!( + "Call to '{}' requires generic argument '{}'", + value, + required.join(", ") + ), + )); + + return; + } } */ + let idx = program.register(value); match args { From a78a2e2106f6fe64926abda8a151a504e12ffa80 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Wed, 26 Apr 2023 01:40:22 +0200 Subject: [PATCH 27/94] wip: ImlValue::compile() and ImlValue::Parselet --- src/compiler/ast.rs | 4 +- src/compiler/compiler.rs | 6 +- src/compiler/iml/imlop.rs | 12 +- src/compiler/iml/imlparselet.rs | 9 +- src/compiler/iml/imlprogram.rs | 30 +++-- src/compiler/iml/imlvalue.rs | 210 +++++++++++++++----------------- 6 files changed, 127 insertions(+), 144 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index a1b104ed..67cb672e 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -43,7 +43,7 @@ pub(in crate::compiler) fn traverse(compiler: &mut Compiler, ast: &RefValue) -> // Extract offset positions into an Offset structure fn traverse_node_offset(node: &Dict) -> Option { - //return None; // Temporarily discard any Offset information (shortens debug output) + //return None; // Temporarily discard any Offset information (shortens debug output) let offset = node .get_str("offset") @@ -384,7 +384,7 @@ fn traverse_node_static(compiler: &mut Compiler, lvalue: Option<&str>, node: &Di compiler.parselet_pop(None, None, None, None, None, ImlOp::Nop); if let Some(lvalue) = lvalue { - if let ImlValue::Parselet(parselet) = &value { + if let ImlValue::Parselet { parselet, .. } = &value { let mut parselet = parselet.borrow_mut(); parselet.name = Some(lvalue.to_string()); } diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index 6442388b..d23cddf8 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -272,7 +272,6 @@ impl Compiler { || end.is_consuming() || body.is_consuming(), severity: severity.unwrap_or(5), // severity - constants, // constants signature, // signature locals: *locals, // Ensure that begin and end are blocks. @@ -286,7 +285,10 @@ impl Compiler { self.scopes.push(scope); } - ImlValue::Parselet(ImlSharedParselet::new(parselet)) + ImlValue::Parselet { + parselet: ImlSharedParselet::new(parselet), + constants, + } } else { unreachable!(); } diff --git a/src/compiler/iml/imlop.rs b/src/compiler/iml/imlop.rs index ced7ee92..ea1f1953 100644 --- a/src/compiler/iml/imlop.rs +++ b/src/compiler/iml/imlop.rs @@ -293,22 +293,14 @@ impl ImlOp { ImlOp::Nop => {} ImlOp::Op(op) => ops.push(op.clone()), ImlOp::Load { offset, target } => { - if let Some(offset) = offset { - ops.push(Op::Offset(Box::new(*offset))); - } - - target.compile_load(program, &offset, ops); + target.compile(program, &offset, None, ops); } ImlOp::Call { offset, target, args, } => { - if let Some(offset) = offset { - ops.push(Op::Offset(Box::new(*offset))); - } - - target.compile_call(program, *args, &offset, ops); + target.compile(program, &offset, Some(*args), ops); } ImlOp::Alt { alts } => { let mut ret = Vec::new(); diff --git a/src/compiler/iml/imlparselet.rs b/src/compiler/iml/imlparselet.rs index 0b202baf..d3c7a8c6 100644 --- a/src/compiler/iml/imlparselet.rs +++ b/src/compiler/iml/imlparselet.rs @@ -6,15 +6,14 @@ use indexmap::IndexMap; use std::cell::RefCell; use std::rc::Rc; -#[derive(Debug)] /// Intermediate parselet +#[derive(Debug)] pub(in crate::compiler) struct ImlParselet { pub offset: Option, // Offset of definition pub consuming: bool, // Flag if parselet is consuming pub severity: u8, // Capture push severity - pub name: Option, // Parselet's name from source (for debugging) - pub constants: IndexMap, // Parselet generic signature with default configuration - pub signature: IndexMap, // Argument signature with default arguments + pub name: Option, // Assigned name from source (for debugging) + pub signature: IndexMap, // Arguments signature with default values pub locals: usize, // Total number of local variables present (including arguments) pub begin: ImlOp, // Begin-operations pub end: ImlOp, // End-operations @@ -41,7 +40,7 @@ impl ImlParselet { // Register default value, if any match &var_value.1 { ImlValue::Void => None, - value => Some(program.register(value)), + value => Some(program.register(value).expect("Cannot register value")), }, ) }) diff --git a/src/compiler/iml/imlprogram.rs b/src/compiler/iml/imlprogram.rs index c1f4ca9a..f8f62f5d 100644 --- a/src/compiler/iml/imlprogram.rs +++ b/src/compiler/iml/imlprogram.rs @@ -27,16 +27,20 @@ impl ImlProgram { /** Registers an ImlValue in the ImlProgram's statics map and returns its index. + Only resolved values can be registered. + In case *value* already exists inside of the current statics, the existing index will be returned, otherwiese the value is cloned and put into the statics table. */ - pub fn register(&mut self, value: &ImlValue) -> usize { - if let ImlValue::Shared(value) = value { - return self.register(&*value.borrow()); - } - - match self.statics.get_index_of(value) { - None => self.statics.insert_full(value.clone(), None).0, - Some(idx) => idx, + pub fn register(&mut self, value: &ImlValue) -> Result { + match value { + ImlValue::Shared(value) => self.register(&*value.borrow()), + ImlValue::Parselet { .. } | ImlValue::Value(_) => { + match self.statics.get_index_of(value) { + None => Ok(self.statics.insert_full(value.clone(), None).0), + Some(idx) => Ok(idx), + } + } + _ => Err(()), // Cannot register unresolved value } } @@ -58,7 +62,7 @@ impl ImlProgram { let outer = { match self.statics.get_index(i).unwrap() { (_, Some(_)) => unreachable!(), // may not exist! - (ImlValue::Parselet(parselet), None) => parselet.clone(), + (ImlValue::Parselet { parselet, .. }, None) => parselet.clone(), _ => { i += 1; continue; @@ -93,7 +97,11 @@ impl ImlProgram { .into_iter() .map(|(iml, parselet)| { if let Some(mut parselet) = parselet { - if let ImlValue::Parselet(imlparselet) = iml { + if let ImlValue::Parselet { + parselet: imlparselet, + .. + } = iml + { parselet.consuming = leftrec .get(&imlparselet.borrow().id()) .map_or(None, |leftrec| Some(*leftrec)); @@ -143,7 +151,7 @@ impl ImlProgram { ) -> Option { match value { ImlValue::Shared(value) => finalize_value(&*value.borrow(), visited, configs), - ImlValue::Parselet(parselet) => { + ImlValue::Parselet { parselet, .. } => { match parselet.try_borrow() { // In case the parselet cannot be borrowed, it is left-recursive! Err(_) => Some(Consumable { diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index c49e22da..ce1e86c5 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -4,6 +4,7 @@ use crate::compiler::Compiler; use crate::reader::Offset; use crate::value::{Object, RefValue, Value}; use crate::Error; +use indexmap::IndexMap; use num::ToPrimitive; use std::cell::RefCell; use std::rc::Rc; @@ -23,12 +24,16 @@ pub(in crate::compiler) enum ImlValue { Shared(Rc>), // Resolved - Value(RefValue), // Compile-time static value - Local(usize), // Runtime local variable - Global(usize), // Runtime global variable - Parselet(ImlSharedParselet), // Parselet + Value(RefValue), // Compile-time static value + Local(usize), // Runtime local variable + Global(usize), // Runtime global variable + Parselet { + // Parselet + parselet: ImlSharedParselet, // Intermediate definition + constants: IndexMap, // Generic signature with default configuration + }, - // Unresolved + // Definitely unresolved Name { // Unresolved name offset: Option, // Source offset @@ -107,7 +112,7 @@ impl ImlValue { match self { Self::Shared(value) => value.borrow().is_callable(without_arguments), Self::Value(value) => value.is_callable(without_arguments), - Self::Parselet(parselet) => { + Self::Parselet { parselet, .. } => { let parselet = parselet.borrow(); if without_arguments { @@ -130,142 +135,115 @@ impl ImlValue { Self::Shared(value) => value.borrow().is_consuming(), Self::Name { name, .. } => crate::utils::identifier_is_consumable(name), Self::Value(value) => value.is_consuming(), - Self::Parselet(parselet) => parselet.borrow().consuming, + Self::Parselet { parselet, .. } => parselet.borrow().consuming, _ => false, } } - /** Generates code for a value load. For several, oftenly used values, there exists a direct operation pendant, - which makes storing the static value obsolete. Otherwise, *value* will be registered and a static load operation - is returned. */ - pub fn compile_load( + /// Compile a resolved intermediate value into VM code + pub fn compile( &self, program: &mut ImlProgram, offset: &Option, + call: Option>, ops: &mut Vec, ) { - match self { - ImlValue::Shared(value) => return value.borrow().compile_load(program, offset, ops), - ImlValue::Value(value) => match &*value.borrow() { - // Some frequently used values have built-in push operations - Value::Void => return ops.push(Op::PushVoid), - Value::Null => return ops.push(Op::PushNull), - Value::True => return ops.push(Op::PushTrue), - Value::False => return ops.push(Op::PushFalse), - Value::Int(i) => match i.to_i64() { - Some(0) => return ops.push(Op::Push0), - Some(1) => return ops.push(Op::Push1), - _ => {} - }, - _ => {} - }, - ImlValue::Parselet(_) => {} - ImlValue::Local(addr) => return ops.push(Op::LoadFast(*addr)), - ImlValue::Global(addr) => return ops.push(Op::LoadGlobal(*addr)), - ImlValue::Name { name, .. } => { - program.errors.push(Error::new( - offset.clone(), - format!("Use of unresolved symbol '{}'", name), - )); - - return; - } - _ => todo!(), + if let Some(offset) = offset { + ops.push(Op::Offset(Box::new(*offset))); } - ops.push(Op::LoadStatic(program.register(self))) - } - - /** Generates code for a value call. */ - pub fn compile_call( - &self, - program: &mut ImlProgram, - args: Option<(usize, bool)>, - offset: &Option, - ops: &mut Vec, - ) { match self { - ImlValue::Shared(value) => { - return value.borrow().compile_call(program, args, offset, ops) + ImlValue::Shared(value) => return value.borrow().compile(program, offset, call, ops), + ImlValue::Value(value) => { + if call.is_none() { + match &*value.borrow() { + // Some frequently used values have built-in push operations + Value::Void => return ops.push(Op::PushVoid), + Value::Null => return ops.push(Op::PushNull), + Value::True => return ops.push(Op::PushTrue), + Value::False => return ops.push(Op::PushFalse), + Value::Int(i) => match i.to_i64() { + Some(0) => return ops.push(Op::Push0), + Some(1) => return ops.push(Op::Push1), + _ => {} + }, + _ => {} + } + } } ImlValue::Local(addr) => ops.push(Op::LoadFast(*addr)), ImlValue::Global(addr) => ops.push(Op::LoadGlobal(*addr)), ImlValue::Name { name, .. } => { program.errors.push(Error::new( offset.clone(), - format!("Call to unresolved symbol '{}'", name), + if call.is_some() { + format!("Call to unresolved symbol '{}'", name) + } else { + format!("Use of unresolved symbol '{}'", name) + }, )); + return; } - value => { - /* + ImlValue::Parselet { + parselet, + constants, + } => { // When value is a parselet, check for accepted constant configuration - if let ImlValue::Parselet(parselet) = value { - let mut required = Vec::new(); - let parselet = parselet.borrow(); - - for (name, default) in &parselet.constants { - if matches!(default, ImlValue::Void) { - required.push(name.to_string()); - } - } + let parselet = parselet.borrow(); + let mut required = Vec::new(); - if !required.is_empty() { - program.errors.push(Error::new( - offset.clone(), - format!( - "Call to '{}' requires generic argument '{}'", - value, - required.join(", ") - ), - )); - - return; + for (name, default) in constants { + if matches!(default, ImlValue::Void) { + required.push(name.to_string()); } } - */ - - let idx = program.register(value); - match args { - // Qualified call - Some((args, nargs)) => { - if args == 0 && !nargs { - ops.push(Op::CallStatic(idx)); - } else if args > 0 && !nargs { - ops.push(Op::CallStaticArg(Box::new((idx, args)))); - } else { - ops.push(Op::CallStaticArgNamed(Box::new((idx, args)))); - } - } - // Call or load - None => { - if value.is_callable(true) { - ops.push(Op::CallStatic(idx)); - } else { - ops.push(Op::LoadStatic(idx)); - } - } + if !required.is_empty() { + program.errors.push(Error::new( + offset.clone(), + format!( + "Call to '{}' requires generic argument {}", + self, + required.join(", ") + ), + )); + + return; } - - return; } _ => todo!(), } - match args { - // Qualified call - Some((args, nargs)) => { - if args == 0 && nargs == false { - ops.push(Op::Call); - } else if args > 0 && nargs == false { - ops.push(Op::CallArg(args)); - } else { - ops.push(Op::CallArgNamed(args)); + // Try to register value as static + if let Ok(idx) = program.register(self) { + match call { + // Load + None => ops.push(Op::LoadStatic(idx)), + // Call or load + Some(None) => { + if self.is_callable(true) { + ops.push(Op::CallStatic(idx)); + } else { + ops.push(Op::LoadStatic(idx)); + } } + // Call (qualified) + Some(Some((0, false))) => ops.push(Op::CallStatic(idx)), + Some(Some((args, false))) => ops.push(Op::CallStaticArg(Box::new((idx, args)))), + Some(Some((args, true))) => ops.push(Op::CallStaticArgNamed(Box::new((idx, args)))), + } + } else { + match call { + // Load (already done previously) + None => {} + // Call or load + Some(None) => ops.push(Op::CallOrCopy), + // Call (qualified) + Some(Some((0, false))) => ops.push(Op::Call), + Some(Some((args, false))) => ops.push(Op::CallArg(args)), + Some(Some((args, true))) => ops.push(Op::CallArgNamed(args)), } - // Call or load - None => ops.push(Op::CallOrCopy), } } } @@ -276,10 +254,12 @@ impl std::fmt::Display for ImlValue { Self::Void => write!(f, "void"), Self::Shared(value) => value.borrow().fmt(f), Self::Value(value) => write!(f, "{}", value.repr()), - Self::Parselet(parselet) => { + Self::Parselet { + parselet, + constants, + } => { write!(f, "{}", parselet)?; - /* if !constants.is_empty() { write!(f, "<")?; for (i, (name, value)) in constants.iter().enumerate() { @@ -291,7 +271,6 @@ impl std::fmt::Display for ImlValue { } write!(f, ">")?; } - */ Ok(()) } @@ -375,10 +354,13 @@ impl std::hash::Hash for ImlValue { state.write_u8('v' as u8); v.hash(state) } - Self::Parselet(parselet) => { + Self::Parselet { + parselet, + constants, + } => { state.write_u8('p' as u8); parselet.borrow().hash(state); - //constants.iter().collect::>().hash(state); + constants.iter().collect::>().hash(state); } other => unreachable!("{:?} is unhashable", other), } From 31c4a4a103b5b003de88c1519438d27b7ff19402 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Wed, 26 Apr 2023 22:37:48 +0200 Subject: [PATCH 28/94] wip: Implement ImlValue::Instance resolving --- src/compiler/ast.rs | 34 +++++--- src/compiler/compiler.rs | 2 +- src/compiler/iml/imlvalue.rs | 148 +++++++++++++++++++++++++++++------ 3 files changed, 148 insertions(+), 36 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index 67cb672e..fc25c224 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -302,7 +302,8 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { let target = traverse_node_static(compiler, None, target); // Traverse generic arguments - let mut config = Vec::new(); + let mut args = Vec::new(); + let mut nargs = IndexMap::new(); for genarg in children[1..].iter() { let genarg = genarg.borrow(); @@ -313,10 +314,21 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { match emit.object::().unwrap().as_str() { "genarg" => { + if !nargs.is_empty() { + compiler.errors.push(Error::new( + traverse_node_offset(node), + format!( + "Sequencial generics need to be specified before named generics." + ), + )); + + continue; + } + let param = &genarg["children"].borrow(); let param = param.object::().unwrap(); - config.push((offset, None, traverse_node_static(compiler, None, param))); + args.push((offset, traverse_node_static(compiler, None, param))); } "genarg_named" => { @@ -327,25 +339,22 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { let ident = ident["value"].borrow(); let ident = ident.object::().unwrap().as_str(); - /* - if by_name.contains_key(ident) { + if nargs.contains_key(ident) { compiler.errors.push(Error::new( traverse_node_offset(genarg), - format!("Named constant '{}' provided more than once.", ident), + format!("Named generic '{}' provided more than once.", ident), )); continue; } - */ let param = &children[1].borrow(); let param = param.object::().unwrap(); - config.push(( - offset, - Some(ident.to_string()), - traverse_node_static(compiler, None, param), - )); + nargs.insert( + ident.to_string(), + (offset, traverse_node_static(compiler, None, param)), + ); } other => unimplemented!("Unhandled genarg type {:?}", other), @@ -354,7 +363,8 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { let mut ret = ImlValue::Instance { target: Box::new(target), - config, + args, + nargs, offset: traverse_node_offset(node), }; diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index d23cddf8..d13e3a3f 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -73,7 +73,7 @@ impl Compiler { }; // Compile with the default prelude - if false && with_prelude { + if with_prelude { compiler .compile_from_str(include_str!("../prelude.tok")) .unwrap(); // this should panic in case of an error! diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index ce1e86c5..e81ad039 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -2,6 +2,7 @@ use super::*; use crate::compiler::Compiler; use crate::reader::Offset; +use crate::utils; use crate::value::{Object, RefValue, Value}; use crate::Error; use indexmap::IndexMap; @@ -33,7 +34,7 @@ pub(in crate::compiler) enum ImlValue { constants: IndexMap, // Generic signature with default configuration }, - // Definitely unresolved + // Unresolved Name { // Unresolved name offset: Option, // Source offset @@ -42,9 +43,10 @@ pub(in crate::compiler) enum ImlValue { }, Instance { // Parselet instance - offset: Option, // Source offset - target: Box, // Instance target - config: Vec<(Option, Option, ImlValue)>, // Constant configuration + offset: Option, // Source offset + target: Box, // Instance target + args: Vec<(Option, ImlValue)>, // Sequential generic args + nargs: IndexMap, ImlValue)>, // Named generic args }, } @@ -72,30 +74,130 @@ impl ImlValue { false } } - /* Self::Instance { + offset, target, - .. - } if matches!(target, ImlValue::Name(_)) => { - // Try to resolve target - if target.resolve(compiler) { - // On success, try to resolve the entire instance - return self.resolve(compiler); + args, + nargs, + } => { + let mut is_resolved = true; + + // Resolve target + if !target.resolve(compiler) { + is_resolved = false; } - } - Self::Instance { - target: - ImlValue::Parselet { + + // Resolve sequential generic args + for arg in args.iter_mut() { + if !arg.1.resolve(compiler) { + is_resolved = false; + } + } + + // Resolve named generic args + for narg in nargs.values_mut() { + if !narg.1.resolve(compiler) { + is_resolved = false; + } + } + + // When everything is resolved, turn the instance definition into a parselet + if is_resolved { + if let ImlValue::Parselet { parselet, constants, - }, - config, - offset, - } => { - todo!(); + } = &**target + { + let mut new_constants = IndexMap::new(); + + for (name, default) in constants.iter() { + // Take arguments by sequence first + let arg = if !args.is_empty() { + args.remove(0) + } + // Otherwise, take named arguments by sequence + else if let Some(narg) = nargs.shift_remove(name) { + narg + } + // Otherwise, use default + else { + (*offset, default.clone()) + }; + + // Check integrity of constant names + if let Self::Void = arg.1 { + compiler.errors.push(Error::new( + arg.0, + format!("Expecting argument for generic '{}'", name), + )); + } else if arg.1.is_consuming() { + if !utils::identifier_is_consumable(name) { + compiler.errors.push(Error::new( + arg.0, + format!( + "Cannot assign consumable {} to non-consumable generic '{}'", + arg.1, name + ) + )); + } + } else if utils::identifier_is_consumable(name) { + compiler.errors.push(Error::new( + arg.0, + format!( + "Cannot assign non-consumable {} to consumable generic '{}'", + arg.1, name + ) + )); + } + + new_constants.insert(name.clone(), arg.1); + } + + // Report any errors for unconsumed generic arguments. + if !args.is_empty() { + compiler.errors.push(Error::new( + args[0].0, // report first parameter + format!( + "{} got too many generic arguments ({} in total, expected {})", + target, + constants.len() + args.len(), + constants.len() + ), + )); + } + + for (name, (offset, _)) in nargs { + if new_constants.get(name).is_some() { + compiler.errors.push(Error::new( + *offset, + format!("{} already got generic argument '{}'", target, name), + )); + } else { + compiler.errors.push(Error::new( + *offset, + format!( + "{} does not accept generic argument named '{}'", + target, name + ), + )); + } + } + + // Create new parselet from data provided + *self = Self::Parselet { + parselet: parselet.clone(), + constants: new_constants, + }; + + return true; + } else { + unimplemented!(); + } + } + + is_resolved } - */ - _ => false, + _ => true, } } @@ -212,7 +314,7 @@ impl ImlValue { return; } } - _ => todo!(), + _ => unreachable!(), } // Try to register value as static From e4700381c504d68ae1018c6a88f86e83d5dc41d6 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Fri, 28 Apr 2023 22:38:58 +0200 Subject: [PATCH 29/94] wip: ImlParseletModel This commit moves ImlValue::Parselet into ImlParselet, and parts of ImlParselet into ImlParseletModel, which is the model of the original parselet shared by several instances. ImlParselet is the parselet instance now generated from generics. This commit is untested yet with any generics, just want to have it saved. --- src/compiler/ast.rs | 2 +- src/compiler/compiler.rs | 19 +++-- src/compiler/iml/imlop.rs | 37 +++++---- src/compiler/iml/imlparselet.rs | 97 +++++++++++++++++++---- src/compiler/iml/imlprogram.rs | 69 +++++++++-------- src/compiler/iml/imlvalue.rs | 132 +++++++++++++------------------- 6 files changed, 207 insertions(+), 149 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index fc25c224..eb5655a2 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -394,7 +394,7 @@ fn traverse_node_static(compiler: &mut Compiler, lvalue: Option<&str>, node: &Di compiler.parselet_pop(None, None, None, None, None, ImlOp::Nop); if let Some(lvalue) = lvalue { - if let ImlValue::Parselet { parselet, .. } = &value { + if let ImlValue::Parselet(parselet) = &value { let mut parselet = parselet.borrow_mut(); parselet.name = Some(lvalue.to_string()); } diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index d13e3a3f..a2d3f81b 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -247,6 +247,7 @@ impl Compiler { "signature may not be longer than locals..." ); + // Ensure that begin and end are blocks. let begin = ensure_block(begin.drain(..).collect()); let end = ensure_block(end.drain(..).collect()); @@ -263,18 +264,13 @@ impl Compiler { return ImlValue::Void } */ - - let parselet = ImlParselet { - offset, - name, + let model = ImlParseletModel { consuming: *is_consuming || begin.is_consuming() || end.is_consuming() || body.is_consuming(), - severity: severity.unwrap_or(5), // severity - signature, // signature + signature, locals: *locals, - // Ensure that begin and end are blocks. begin, end, body, @@ -285,10 +281,13 @@ impl Compiler { self.scopes.push(scope); } - ImlValue::Parselet { - parselet: ImlSharedParselet::new(parselet), + ImlValue::from(ImlParselet::new( + model, constants, - } + offset, + name, + severity.unwrap_or(5), + )) } else { unreachable!(); } diff --git a/src/compiler/iml/imlop.rs b/src/compiler/iml/imlop.rs index ea1f1953..2436dff7 100644 --- a/src/compiler/iml/imlop.rs +++ b/src/compiler/iml/imlop.rs @@ -279,28 +279,33 @@ impl ImlOp { } /// Compile ImlOp construct into Op instructions of the resulting Tokay VM program - pub fn compile_to_vec(&self, program: &mut ImlProgram) -> Vec { + pub fn compile_to_vec(&self, program: &mut ImlProgram, parselet: &ImlParselet) -> Vec { let mut ops = Vec::new(); - self.compile(program, &mut ops); + self.compile(program, parselet, &mut ops); ops } /// Compile ImlOp construct into Op instructions of the resulting Tokay VM program - pub fn compile(&self, program: &mut ImlProgram, ops: &mut Vec) -> usize { + pub fn compile( + &self, + program: &mut ImlProgram, + parselet: &ImlParselet, + ops: &mut Vec, + ) -> usize { let start = ops.len(); match self { ImlOp::Nop => {} ImlOp::Op(op) => ops.push(op.clone()), ImlOp::Load { offset, target } => { - target.compile(program, &offset, None, ops); + target.compile(program, parselet, &offset, None, ops); } ImlOp::Call { offset, target, args, } => { - target.compile(program, &offset, Some(*args), ops); + target.compile(program, parselet, &offset, Some(*args), ops); } ImlOp::Alt { alts } => { let mut ret = Vec::new(); @@ -310,7 +315,7 @@ impl ImlOp { while let Some(item) = iter.next() { let mut alt = Vec::new(); - item.compile(program, &mut alt); + item.compile(program, parselet, &mut alt); // When branch has more than one item, Frame it. if iter.len() > 0 { @@ -351,7 +356,7 @@ impl ImlOp { } ImlOp::Seq { seq, collection } => { for item in seq.iter() { - item.compile(program, ops); + item.compile(program, parselet, ops); } // Check if the sequence exists of more than one operational instruction @@ -386,13 +391,13 @@ impl ImlOp { } // Then-part - let mut jump = then_part.compile(program, ops) + 1; + let mut jump = then_part.compile(program, parselet, ops) + 1; if !*peek { let mut else_ops = Vec::new(); // Else-part - if else_part.compile(program, &mut else_ops) > 0 { + if else_part.compile(program, parselet, &mut else_ops) > 0 { ops.push(Op::Forward(else_ops.len() + 1)); jump += 1; ops.extend(else_ops); @@ -417,9 +422,9 @@ impl ImlOp { let consuming: Option = None; // fixme: Currently not sure if this is an issue. let mut repeat = Vec::new(); - initial.compile(program, ops); + initial.compile(program, parselet, ops); - if condition.compile(program, &mut repeat) > 0 { + if condition.compile(program, parselet, &mut repeat) > 0 { if *iterator { repeat.push(Op::ForwardIfNotVoid(2)); } else { @@ -429,7 +434,7 @@ impl ImlOp { repeat.push(Op::Break); } - body.compile(program, &mut repeat); + body.compile(program, parselet, &mut repeat); let len = repeat.len() + if consuming.is_some() { 3 } else { 2 }; ops.push(Op::Loop(len)); @@ -449,7 +454,7 @@ impl ImlOp { // DEPRECATED BELOW!!! ImlOp::Expect { body, msg } => { let mut expect = Vec::new(); - body.compile(program, &mut expect); + body.compile(program, parselet, &mut expect); ops.push(Op::Frame(expect.len() + 2)); @@ -466,7 +471,7 @@ impl ImlOp { } ImlOp::Not { body } => { let mut body_ops = Vec::new(); - let body_len = body.compile(program, &mut body_ops); + let body_len = body.compile(program, parselet, &mut body_ops); ops.push(Op::Frame(body_len + 3)); ops.extend(body_ops); ops.push(Op::Close); @@ -475,13 +480,13 @@ impl ImlOp { } ImlOp::Peek { body } => { ops.push(Op::Frame(0)); - body.compile(program, ops); + body.compile(program, parselet, ops); ops.push(Op::Reset); ops.push(Op::Close); } ImlOp::Repeat { body, min, max } => { let mut body_ops = Vec::new(); - let body_len = body.compile(program, &mut body_ops); + let body_len = body.compile(program, parselet, &mut body_ops); match (min, max) { (0, 0) => { diff --git a/src/compiler/iml/imlparselet.rs b/src/compiler/iml/imlparselet.rs index d3c7a8c6..523b793e 100644 --- a/src/compiler/iml/imlparselet.rs +++ b/src/compiler/iml/imlparselet.rs @@ -6,32 +6,74 @@ use indexmap::IndexMap; use std::cell::RefCell; use std::rc::Rc; -/// Intermediate parselet +// ImlParseletModel +// ---------------------------------------------------------------------------- + +/// Intermediate parselet model #[derive(Debug)] -pub(in crate::compiler) struct ImlParselet { - pub offset: Option, // Offset of definition +pub(in crate::compiler) struct ImlParseletModel { pub consuming: bool, // Flag if parselet is consuming - pub severity: u8, // Capture push severity - pub name: Option, // Assigned name from source (for debugging) pub signature: IndexMap, // Arguments signature with default values pub locals: usize, // Total number of local variables present (including arguments) - pub begin: ImlOp, // Begin-operations - pub end: ImlOp, // End-operations - pub body: ImlOp, // Operations + pub begin: ImlOp, // Begin intermediate operations + pub end: ImlOp, // End intermediate operations + pub body: ImlOp, // Body intermediate Operations +} + +// ImlParselet +// ---------------------------------------------------------------------------- + +/// Intermediate parselet +#[derive(Debug)] +pub(in crate::compiler) struct ImlParselet { + pub model: Rc>, // Parselet base model + pub constants: IndexMap, // Generic signature with default configuration + pub offset: Option, // Offset of definition + pub name: Option, // Assigned name from source (for debugging) + pub severity: u8, // Capture push severity } /** Representation of parselet in intermediate code. */ impl ImlParselet { + pub fn new( + model: ImlParseletModel, + constants: IndexMap, + offset: Option, + name: Option, + severity: u8, + ) -> Self { + Self { + model: Rc::new(RefCell::new(model)), + constants, + offset, + name, + severity, + } + } + + pub fn derive(&self, constants: IndexMap, offset: Option) -> Self { + Self { + model: self.model.clone(), + constants, + offset, + name: self.name.clone(), + severity: self.severity, + } + } + pub fn id(&self) -> usize { self as *const ImlParselet as usize } pub fn compile(&self, program: &mut ImlProgram) -> Parselet { + let model = self.model.borrow(); + Parselet::new( self.name.clone(), None, self.severity, - self.signature + model + .signature .iter() .map(|var_value| { ( @@ -45,10 +87,10 @@ impl ImlParselet { ) }) .collect(), - self.locals, - self.begin.compile_to_vec(program), - self.end.compile_to_vec(program), - self.body.compile_to_vec(program), + model.locals, + model.begin.compile_to_vec(program, self), + model.end.compile_to_vec(program, self), + model.body.compile_to_vec(program, self), ) } } @@ -59,7 +101,21 @@ impl std::fmt::Display for ImlParselet { f, "{}", self.name.as_deref().unwrap_or("") - ) + )?; + + if !self.constants.is_empty() { + write!(f, "<")?; + for (i, (name, value)) in self.constants.iter().enumerate() { + if matches!(value, ImlValue::Void) { + write!(f, "{}{}", if i > 0 { ", " } else { "" }, name)?; + } else { + write!(f, "{}{}:{}", if i > 0 { ", " } else { "" }, name, value)?; + } + } + write!(f, ">")?; + } + + Ok(()) } } @@ -74,7 +130,9 @@ impl Eq for ImlParselet {} impl std::hash::Hash for ImlParselet { fn hash(&self, state: &mut H) { - self.id().hash(state); + let model = &*self.model.borrow(); + (model as *const ImlParseletModel as usize).hash(state); + self.constants.iter().collect::>().hash(state); } } @@ -85,6 +143,15 @@ impl std::cmp::PartialOrd for ImlParselet { } } +impl From for ImlValue { + fn from(parselet: ImlParselet) -> Self { + ImlValue::Parselet(ImlSharedParselet::new(parselet)) + } +} + +// ImlSharedParselet +// ---------------------------------------------------------------------------- + /// Shared ImlParselet #[derive(Clone, Eq, PartialEq)] pub(in crate::compiler) struct ImlSharedParselet(Rc>); diff --git a/src/compiler/iml/imlprogram.rs b/src/compiler/iml/imlprogram.rs index f8f62f5d..2c6548bc 100644 --- a/src/compiler/iml/imlprogram.rs +++ b/src/compiler/iml/imlprogram.rs @@ -59,28 +59,28 @@ impl ImlProgram { // self.statics grows inside of this while loop, therefore this condition. while i < self.statics.len() { // Pick only intermediate parselets, other static values are directly moved - let outer = { - match self.statics.get_index(i).unwrap() { - (_, Some(_)) => unreachable!(), // may not exist! - (ImlValue::Parselet { parselet, .. }, None) => parselet.clone(), - _ => { - i += 1; - continue; - } + let outer = match self.statics.get_index_mut(i).unwrap() { + (_, Some(_)) => unreachable!(), // may not exist! + (ImlValue::Parselet(parselet), None) => parselet.clone(), + _ => { + i += 1; + continue; } }; + // We have to do it this ugly way because of the borrow checker... let parselet = outer.borrow(); + let model = parselet.model.borrow(); // Memoize parselets required to be finalized (needs a general rework later...) - if parselet.consuming { + if model.consuming { + //fixme... finalize.push(outer.clone()); } // Compile VM parselet from intermediate parselet - let parselet = parselet.compile(&mut self); + *self.statics.get_index_mut(i).unwrap().1 = Some(parselet.compile(&mut self)); - *self.statics.get_index_mut(i).unwrap().1 = Some(parselet); i += 1; } @@ -97,11 +97,7 @@ impl ImlProgram { .into_iter() .map(|(iml, parselet)| { if let Some(mut parselet) = parselet { - if let ImlValue::Parselet { - parselet: imlparselet, - .. - } = iml - { + if let ImlValue::Parselet(imlparselet) = iml { parselet.consuming = leftrec .get(&imlparselet.borrow().id()) .map_or(None, |leftrec| Some(*leftrec)); @@ -146,12 +142,15 @@ impl ImlProgram { // Finalize ImlValue fn finalize_value( value: &ImlValue, + current: &ImlParselet, visited: &mut HashSet, configs: &mut HashMap, ) -> Option { match value { - ImlValue::Shared(value) => finalize_value(&*value.borrow(), visited, configs), - ImlValue::Parselet { parselet, .. } => { + ImlValue::Shared(value) => { + finalize_value(&*value.borrow(), current, visited, configs) + } + ImlValue::Parselet(parselet) => { match parselet.try_borrow() { // In case the parselet cannot be borrowed, it is left-recursive! Err(_) => Some(Consumable { @@ -173,6 +172,11 @@ impl ImlProgram { None } } + ImlValue::Name { + name, + generic: true, + .. + } => finalize_value(¤t.constants[name], current, visited, configs), _ => None, } } @@ -180,18 +184,19 @@ impl ImlProgram { // Finalize ImlOp fn finalize_op( op: &ImlOp, + current: &ImlParselet, visited: &mut HashSet, configs: &mut HashMap, ) -> Option { match op { - ImlOp::Call { target, .. } => finalize_value(target, visited, configs), + ImlOp::Call { target, .. } => finalize_value(target, current, visited, configs), ImlOp::Alt { alts } => { let mut leftrec = false; let mut nullable = false; let mut consumes = false; for alt in alts { - if let Some(consumable) = finalize_op(alt, visited, configs) { + if let Some(consumable) = finalize_op(alt, current, visited, configs) { leftrec |= consumable.leftrec; nullable |= consumable.nullable; consumes = true; @@ -214,7 +219,7 @@ impl ImlProgram { break; } - if let Some(consumable) = finalize_op(item, visited, configs) { + if let Some(consumable) = finalize_op(item, current, visited, configs) { leftrec |= consumable.leftrec; nullable = consumable.nullable; consumes = true; @@ -228,9 +233,9 @@ impl ImlProgram { } } ImlOp::If { then, else_, .. } => { - let then = finalize_op(then, visited, configs); + let then = finalize_op(then, current, visited, configs); - if let Some(else_) = finalize_op(else_, visited, configs) { + if let Some(else_) = finalize_op(else_, current, visited, configs) { if let Some(then) = then { Some(Consumable { leftrec: then.leftrec || else_.leftrec, @@ -252,7 +257,7 @@ impl ImlProgram { let mut ret: Option = None; for part in [initial, condition, body] { - let part = finalize_op(part, visited, configs); + let part = finalize_op(part, current, visited, configs); if let Some(part) = part { ret = if let Some(ret) = ret { @@ -270,10 +275,12 @@ impl ImlProgram { } // DEPRECATED BELOW!!! - ImlOp::Expect { body, .. } => finalize_op(body, visited, configs), - ImlOp::Not { body } | ImlOp::Peek { body } => finalize_op(body, visited, configs), + ImlOp::Expect { body, .. } => finalize_op(body, current, visited, configs), + ImlOp::Not { body } | ImlOp::Peek { body } => { + finalize_op(body, current, visited, configs) + } ImlOp::Repeat { body, min, .. } => { - if let Some(consumable) = finalize_op(body, visited, configs) { + if let Some(consumable) = finalize_op(body, current, visited, configs) { if *min == 0 { Some(Consumable { leftrec: consumable.leftrec, @@ -299,7 +306,9 @@ impl ImlProgram { configs: &mut HashMap, ) -> Option { // ... only if it's generally flagged to be consuming. - if !parselet.consuming { + let model = parselet.model.borrow(); + + if !model.consuming { return None; } @@ -323,8 +332,8 @@ impl ImlProgram { ); } - for part in [&parselet.begin, &parselet.body, &parselet.end] { - if let Some(result) = finalize_op(part, visited, configs) { + for part in [&model.begin, &model.body, &model.end] { + if let Some(result) = finalize_op(part, &parselet, visited, configs) { if configs[&id] < result { configs.insert(id, result); } diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index e81ad039..bec64874 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -18,21 +18,18 @@ from the syntax tree and symbol table information. These can be memory locations of variables, static values, functions or values whose definition is still pending. */ - #[derive(Debug, Clone, PartialEq, Eq)] pub(in crate::compiler) enum ImlValue { Void, Shared(Rc>), - // Resolved - Value(RefValue), // Compile-time static value - Local(usize), // Runtime local variable - Global(usize), // Runtime global variable - Parselet { - // Parselet - parselet: ImlSharedParselet, // Intermediate definition - constants: IndexMap, // Generic signature with default configuration - }, + // Resolved: static + Value(RefValue), // Compile-time static value + Parselet(ImlSharedParselet), // Parselet instance + + // Resolved: dynamic + Local(usize), // Runtime local variable + Global(usize), // Runtime global variable // Unresolved Name { @@ -42,7 +39,7 @@ pub(in crate::compiler) enum ImlValue { name: String, // Identifier }, Instance { - // Parselet instance + // Parselet instance definition offset: Option, // Source offset target: Box, // Instance target args: Vec<(Option, ImlValue)>, // Sequential generic args @@ -64,16 +61,9 @@ impl ImlValue { /// Resolve unresolved ImlValue. Returns true in case the provided value is (already) resolved. pub fn resolve(&mut self, compiler: &mut Compiler) -> bool { - match self { - Self::Shared(value) => value.borrow_mut().resolve(compiler), - Self::Name { name, generic, .. } if !*generic => { - if let Some(value) = compiler.get(&name) { - *self = value; - true - } else { - false - } - } + let resolve = match self { + Self::Shared(value) => return value.borrow_mut().resolve(compiler), + Self::Name { name, generic, .. } if !*generic => compiler.get(&name), Self::Instance { offset, target, @@ -103,14 +93,11 @@ impl ImlValue { // When everything is resolved, turn the instance definition into a parselet if is_resolved { - if let ImlValue::Parselet { - parselet, - constants, - } = &**target - { + if let ImlValue::Parselet(parselet) = &**target { + let parselet = parselet.borrow(); let mut new_constants = IndexMap::new(); - for (name, default) in constants.iter() { + for (name, default) in parselet.constants.iter() { // Take arguments by sequence first let arg = if !args.is_empty() { args.remove(0) @@ -160,8 +147,8 @@ impl ImlValue { format!( "{} got too many generic arguments ({} in total, expected {})", target, - constants.len() + args.len(), - constants.len() + new_constants.len() + args.len(), + new_constants.len() ), )); } @@ -183,22 +170,25 @@ impl ImlValue { } } - // Create new parselet from data provided - *self = Self::Parselet { - parselet: parselet.clone(), - constants: new_constants, - }; - - return true; + Some(ImlValue::from( + parselet.derive(new_constants, offset.clone()), + )) } else { - unimplemented!(); + unreachable!(); } + } else { + None } - - is_resolved } - _ => true, + _ => return true, + }; + + if let Some(resolve) = resolve { + *self = resolve; + return true; } + + false } pub fn into_refvalue(self) -> RefValue { @@ -214,8 +204,9 @@ impl ImlValue { match self { Self::Shared(value) => value.borrow().is_callable(without_arguments), Self::Value(value) => value.is_callable(without_arguments), - Self::Parselet { parselet, .. } => { + Self::Parselet(parselet) => { let parselet = parselet.borrow(); + let parselet = parselet.model.borrow(); if without_arguments { parselet.signature.len() == 0 @@ -235,9 +226,10 @@ impl ImlValue { pub fn is_consuming(&self) -> bool { match self { Self::Shared(value) => value.borrow().is_consuming(), - Self::Name { name, .. } => crate::utils::identifier_is_consumable(name), Self::Value(value) => value.is_consuming(), - Self::Parselet { parselet, .. } => parselet.borrow().consuming, + Self::Parselet(parselet) => parselet.borrow().model.borrow().consuming, + Self::Name { name, .. } => crate::utils::identifier_is_consumable(name), + Self::Instance { target, .. } => target.is_consuming(), _ => false, } } @@ -246,6 +238,7 @@ impl ImlValue { pub fn compile( &self, program: &mut ImlProgram, + parselet: &ImlParselet, offset: &Option, call: Option>, ops: &mut Vec, @@ -255,7 +248,9 @@ impl ImlValue { } match self { - ImlValue::Shared(value) => return value.borrow().compile(program, offset, call, ops), + ImlValue::Shared(value) => { + return value.borrow().compile(program, parselet, offset, call, ops) + } ImlValue::Value(value) => { if call.is_none() { match &*value.borrow() { @@ -275,7 +270,16 @@ impl ImlValue { } ImlValue::Local(addr) => ops.push(Op::LoadFast(*addr)), ImlValue::Global(addr) => ops.push(Op::LoadGlobal(*addr)), - ImlValue::Name { name, .. } => { + ImlValue::Name { + name, + generic: true, + .. + } => return parselet.constants[name].compile(program, parselet, offset, call, ops), + ImlValue::Name { + name, + generic: false, + .. + } => { program.errors.push(Error::new( offset.clone(), if call.is_some() { @@ -287,15 +291,12 @@ impl ImlValue { return; } - ImlValue::Parselet { - parselet, - constants, - } => { + ImlValue::Parselet(parselet) => { // When value is a parselet, check for accepted constant configuration let parselet = parselet.borrow(); let mut required = Vec::new(); - for (name, default) in constants { + for (name, default) in &parselet.constants { if matches!(default, ImlValue::Void) { required.push(name.to_string()); } @@ -356,26 +357,7 @@ impl std::fmt::Display for ImlValue { Self::Void => write!(f, "void"), Self::Shared(value) => value.borrow().fmt(f), Self::Value(value) => write!(f, "{}", value.repr()), - Self::Parselet { - parselet, - constants, - } => { - write!(f, "{}", parselet)?; - - if !constants.is_empty() { - write!(f, "<")?; - for (i, (name, value)) in constants.iter().enumerate() { - if matches!(value, ImlValue::Void) { - write!(f, "{}{}", if i > 0 { ", " } else { "" }, name)?; - } else { - write!(f, "{}{}:{}", if i > 0 { ", " } else { "" }, name, value)?; - } - } - write!(f, ">")?; - } - - Ok(()) - } + Self::Parselet(parselet) => write!(f, "{}", parselet), Self::Name { name, .. } => write!(f, "{}", name), _ => todo!(), } @@ -452,17 +434,13 @@ impl std::fmt::Display for ImlValue { impl std::hash::Hash for ImlValue { fn hash(&self, state: &mut H) { match self { - Self::Value(v) => { + Self::Value(value) => { state.write_u8('v' as u8); - v.hash(state) + value.hash(state) } - Self::Parselet { - parselet, - constants, - } => { + Self::Parselet(parselet) => { state.write_u8('p' as u8); parselet.borrow().hash(state); - constants.iter().collect::>().hash(state); } other => unreachable!("{:?} is unhashable", other), } From 0e57b4cc3b27d9415149b1b9371979bc37b472d2 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Sun, 30 Apr 2023 00:23:54 +0200 Subject: [PATCH 30/94] Improved error handling --- src/compiler/iml/imlvalue.rs | 154 +++++++++++++++++++---------------- src/vm/op.rs | 1 + 2 files changed, 83 insertions(+), 72 deletions(-) diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index bec64874..40f8c2f9 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -63,19 +63,19 @@ impl ImlValue { pub fn resolve(&mut self, compiler: &mut Compiler) -> bool { let resolve = match self { Self::Shared(value) => return value.borrow_mut().resolve(compiler), - Self::Name { name, generic, .. } if !*generic => compiler.get(&name), + Self::Name { + name, + generic: false, + .. + } => compiler.get(&name), + Self::Name { generic: true, .. } => return false, Self::Instance { offset, target, args, nargs, } => { - let mut is_resolved = true; - - // Resolve target - if !target.resolve(compiler) { - is_resolved = false; - } + let mut is_resolved = target.resolve(compiler); // Resolve sequential generic args for arg in args.iter_mut() { @@ -93,88 +93,98 @@ impl ImlValue { // When everything is resolved, turn the instance definition into a parselet if is_resolved { - if let ImlValue::Parselet(parselet) = &**target { - let parselet = parselet.borrow(); - let mut new_constants = IndexMap::new(); - - for (name, default) in parselet.constants.iter() { - // Take arguments by sequence first - let arg = if !args.is_empty() { - args.remove(0) - } - // Otherwise, take named arguments by sequence - else if let Some(narg) = nargs.shift_remove(name) { - narg - } - // Otherwise, use default - else { - (*offset, default.clone()) - }; + match &**target { + ImlValue::Parselet(parselet) => { + let parselet = parselet.borrow(); + let mut new_constants = IndexMap::new(); + + for (name, default) in parselet.constants.iter() { + // Take arguments by sequence first + let arg = if !args.is_empty() { + args.remove(0) + } + // Otherwise, take named arguments by sequence + else if let Some(narg) = nargs.shift_remove(name) { + narg + } + // Otherwise, use default + else { + (*offset, default.clone()) + }; - // Check integrity of constant names - if let Self::Void = arg.1 { - compiler.errors.push(Error::new( - arg.0, - format!("Expecting argument for generic '{}'", name), - )); - } else if arg.1.is_consuming() { - if !utils::identifier_is_consumable(name) { + // Check integrity of constant names + if let Self::Void = arg.1 { + compiler.errors.push(Error::new( + arg.0, + format!("Expecting argument for generic '{}'", name), + )); + } else if arg.1.is_consuming() { + if !utils::identifier_is_consumable(name) { + compiler.errors.push(Error::new( + arg.0, + format!( + "Cannot assign consumable {} to non-consumable generic '{}'", + arg.1, name + ) + )); + } + } else if utils::identifier_is_consumable(name) { compiler.errors.push(Error::new( arg.0, format!( - "Cannot assign consumable {} to non-consumable generic '{}'", + "Cannot assign non-consumable {} to consumable generic '{}'", arg.1, name ) )); } - } else if utils::identifier_is_consumable(name) { + + new_constants.insert(name.clone(), arg.1); + } + + // Report any errors for unconsumed generic arguments. + if !args.is_empty() { compiler.errors.push(Error::new( - arg.0, + args[0].0, // report first parameter format!( - "Cannot assign non-consumable {} to consumable generic '{}'", - arg.1, name - ) + "{} got too many generic arguments ({} in total, expected {})", + target, + new_constants.len() + args.len(), + new_constants.len() + ), )); } - new_constants.insert(name.clone(), arg.1); - } + for (name, (offset, _)) in nargs { + if new_constants.get(name).is_some() { + compiler.errors.push(Error::new( + *offset, + format!( + "{} already got generic argument '{}'", + target, name + ), + )); + } else { + compiler.errors.push(Error::new( + *offset, + format!( + "{} does not accept generic argument named '{}'", + target, name + ), + )); + } + } - // Report any errors for unconsumed generic arguments. - if !args.is_empty() { + Some(ImlValue::from( + parselet.derive(new_constants, offset.clone()), + )) + } + target => { compiler.errors.push(Error::new( - args[0].0, // report first parameter - format!( - "{} got too many generic arguments ({} in total, expected {})", - target, - new_constants.len() + args.len(), - new_constants.len() - ), + *offset, + format!("Cannot create instance from '{}'", target), )); + None } - - for (name, (offset, _)) in nargs { - if new_constants.get(name).is_some() { - compiler.errors.push(Error::new( - *offset, - format!("{} already got generic argument '{}'", target, name), - )); - } else { - compiler.errors.push(Error::new( - *offset, - format!( - "{} does not accept generic argument named '{}'", - target, name - ), - )); - } - } - - Some(ImlValue::from( - parselet.derive(new_constants, offset.clone()), - )) - } else { - unreachable!(); } } else { None diff --git a/src/vm/op.rs b/src/vm/op.rs index 026ab873..05b637c6 100644 --- a/src/vm/op.rs +++ b/src/vm/op.rs @@ -654,6 +654,7 @@ impl Op { Op::Sep => { let mut value = context.pop(); + // fixme: Replace by https://doc.rust-lang.org/std/rc/struct.Rc.html#method.unwrap_or_clone ? if Rc::strong_count(&value) > 1 { value = RefValue::from({ let inner = value.borrow(); From 462829844a630470bd667b91a02aed87aa698c06 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Sun, 30 Apr 2023 00:30:50 +0200 Subject: [PATCH 31/94] Remove warnings --- src/compiler/iml/imlparselet.rs | 1 + src/utils.rs | 1 + src/vm/op.rs | 2 -- 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/compiler/iml/imlparselet.rs b/src/compiler/iml/imlparselet.rs index 523b793e..aa71d0bb 100644 --- a/src/compiler/iml/imlparselet.rs +++ b/src/compiler/iml/imlparselet.rs @@ -24,6 +24,7 @@ pub(in crate::compiler) struct ImlParseletModel { // ---------------------------------------------------------------------------- /// Intermediate parselet +#[allow(dead_code)] #[derive(Debug)] pub(in crate::compiler) struct ImlParselet { pub model: Rc>, // Parselet base model diff --git a/src/utils.rs b/src/utils.rs index a776748f..41880355 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -55,6 +55,7 @@ the code is fed to the Tokay REPL, and expected output is tested against each li mode, it is important to specify multi-line definitions with the alternative `;` delimiter, otherwise a syntax error will occur (likewise in the normal REPL). */ +#[allow(dead_code)] pub(crate) fn testcase(code: &str) { //println!("---"); diff --git a/src/vm/op.rs b/src/vm/op.rs index 05b637c6..4e588470 100644 --- a/src/vm/op.rs +++ b/src/vm/op.rs @@ -46,7 +46,6 @@ pub(crate) enum Op { Backward(usize), // Jump backward // Interrupts - Skip, // Err(Reject::Skip) Next, // Err(Reject::Next) Push, // Ok(Accept::Push) LoadPush, // Ok(Accept::Push) with value @@ -351,7 +350,6 @@ impl Op { } // Interrupts - Op::Skip => Err(Reject::Skip), // currently not used. Op::Next => Err(Reject::Next), Op::Push => Ok(Accept::Push(Capture::Empty)), From e952bb64239d6ba345bd0af660dad42fb6f6eb3d Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Tue, 2 May 2023 18:45:03 +0200 Subject: [PATCH 32/94] Correctly handle static value calls --- src/compiler/iml/imlop.rs | 6 ------ src/compiler/iml/imlvalue.rs | 32 +++++++++++++++++--------------- 2 files changed, 17 insertions(+), 21 deletions(-) diff --git a/src/compiler/iml/imlop.rs b/src/compiler/iml/imlop.rs index 2436dff7..1438e8ae 100644 --- a/src/compiler/iml/imlop.rs +++ b/src/compiler/iml/imlop.rs @@ -129,12 +129,6 @@ impl ImlOp { return Self::load(offset, value); } - // Early recognize call to value which is generally not call-able - if !value.is_callable(true) && !value.is_callable(false) { - // Currently not planned as final - todo!("The value {:?} is generally not callable!", value); - } - ImlOp::Call { offset, target: value, diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index 40f8c2f9..eaed827e 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -257,25 +257,25 @@ impl ImlValue { ops.push(Op::Offset(Box::new(*offset))); } + let start = ops.len(); + match self { ImlValue::Shared(value) => { return value.borrow().compile(program, parselet, offset, call, ops) } ImlValue::Value(value) => { - if call.is_none() { - match &*value.borrow() { - // Some frequently used values have built-in push operations - Value::Void => return ops.push(Op::PushVoid), - Value::Null => return ops.push(Op::PushNull), - Value::True => return ops.push(Op::PushTrue), - Value::False => return ops.push(Op::PushFalse), - Value::Int(i) => match i.to_i64() { - Some(0) => return ops.push(Op::Push0), - Some(1) => return ops.push(Op::Push1), - _ => {} - }, + match &*value.borrow() { + // Some frequently used values have built-in push operations + Value::Void => ops.push(Op::PushVoid), + Value::Null => ops.push(Op::PushNull), + Value::True => ops.push(Op::PushTrue), + Value::False => ops.push(Op::PushFalse), + Value::Int(i) => match i.to_i64() { + Some(0) => ops.push(Op::Push0), + Some(1) => ops.push(Op::Push1), _ => {} - } + }, + _ => {} } } ImlValue::Local(addr) => ops.push(Op::LoadFast(*addr)), @@ -328,8 +328,10 @@ impl ImlValue { _ => unreachable!(), } - // Try to register value as static - if let Ok(idx) = program.register(self) { + // Check if something has been pushed before. + if start == ops.len() { + let idx = program.register(self).unwrap(); + match call { // Load None => ops.push(Op::LoadStatic(idx)), From 706b3a9feaed97d9ac0860fceb8fb649373d6b7b Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Wed, 3 May 2023 01:48:33 +0200 Subject: [PATCH 33/94] ImlValue::Op() and unique parselet instances --- src/compiler/ast.rs | 42 ++++++++++++++++++++------------- src/compiler/compiler.rs | 37 ++++++++++++++++++++--------- src/compiler/iml/imlparselet.rs | 10 ++++++-- src/compiler/iml/imlprogram.rs | 2 +- src/compiler/iml/imlvalue.rs | 29 ++++++++++------------- src/reader.rs | 2 +- src/vm/op.rs | 2 +- 7 files changed, 74 insertions(+), 50 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index eb5655a2..85819b38 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -79,13 +79,17 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { // Generate a value from the given code match emit { // Literals - "value_string" => ImlValue::from(node["value"].clone()), - "value_integer" => node["value"].clone().into(), - "value_float" => node["value"].clone().into(), - "value_true" => value!(true).into(), - "value_false" => value!(false).into(), - "value_null" => value!(null).into(), - "value_void" => value!(void).into(), + "value_string" => compiler.register_static(node["value"].clone()), + "value_integer" => match node["value"].to_i64() { + Ok(0) => ImlValue::Op(Op::Push0), + Ok(1) => ImlValue::Op(Op::Push1), + _ => compiler.register_static(node["value"].clone()), + }, + "value_float" => compiler.register_static(node["value"].clone()), + "value_true" => ImlValue::Op(Op::PushTrue), + "value_false" => ImlValue::Op(Op::PushFalse), + "value_null" => ImlValue::Op(Op::PushNull), + "value_void" => ImlValue::Op(Op::PushVoid), // Tokens "value_token_match" | "value_token_touch" => { @@ -99,14 +103,18 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { value = "#INVALID".to_string(); } - if emit == "value_token_match" { - RefValue::from(Token::Match(value)).into() + compiler.register_static(if emit == "value_token_match" { + RefValue::from(Token::Match(value)) } else { - RefValue::from(Token::Touch(value)).into() - } + RefValue::from(Token::Touch(value)) + }) + } + "value_token_any" => { + compiler.register_static(RefValue::from(Token::Char(CharClass::new().negate()))) + } + "value_token_anys" => { + compiler.register_static(RefValue::from(Token::Chars(CharClass::new().negate()))) } - "value_token_any" => RefValue::from(Token::Char(CharClass::new().negate())).into(), - "value_token_anys" => RefValue::from(Token::Chars(CharClass::new().negate())).into(), "value_token_ccl" | "value_token_ccls" => { let many = emit.ends_with("s"); @@ -153,11 +161,11 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { assert!(emit == "ccl"); } - if many { - RefValue::from(Token::Chars(ccl)).into() + compiler.register_static(if many { + RefValue::from(Token::Chars(ccl)) } else { - RefValue::from(Token::Char(ccl)).into() - } + RefValue::from(Token::Char(ccl)) + }) } // Parselets diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index a2d3f81b..7546696c 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -6,8 +6,7 @@ use crate::error::Error; use crate::reader::*; use crate::value::{RefValue, Token}; use crate::vm::*; -use indexmap::IndexMap; -use std::collections::HashMap; +use indexmap::{IndexMap, IndexSet}; /** Compiler symbolic scopes. @@ -20,8 +19,8 @@ pub(in crate::compiler) enum Scope { Parselet { // parselet-level scope (variables and constants can be defined here) usage_start: usize, // Begin of usages to resolve until when scope is closed - constants: HashMap, // Named constants symbol table - variables: HashMap, // Named variable symbol table + constants: IndexMap, // Named constants symbol table + variables: IndexMap, // Named variable symbol table temporaries: Vec, // List of unused temporary variables locals: usize, // Total amount of variables in this scope begin: Vec, // Begin operations @@ -31,7 +30,7 @@ pub(in crate::compiler) enum Scope { Block { // block level (constants can be defined here) usage_start: usize, // Begin of usages to resolve until when scope is closed - constants: HashMap, // Named constants symbol table + constants: IndexMap, // Named constants symbol table }, Loop, // loop level (allows use of break & continue) } @@ -47,9 +46,10 @@ pub struct Compiler { parser: Option, // Internal Tokay parser pub debug: u8, // Compiler debug mode - pub(in crate::compiler) scopes: Vec, // Current compilation scopes - pub(in crate::compiler) usages: Vec, // Unresolved values - pub(in crate::compiler) errors: Vec, // Collected errors during compilation + pub(in crate::compiler) statics: IndexSet, // Static values collected during compilation + pub(in crate::compiler) scopes: Vec, // Current compilation scopes + pub(in crate::compiler) usages: Vec, // Unresolved values + pub(in crate::compiler) errors: Vec, // Collected errors during compilation } impl Compiler { @@ -67,6 +67,7 @@ impl Compiler { let mut compiler = Self { parser: None, debug: 0, + statics: IndexSet::new(), scopes: Vec::new(), usages: Vec::new(), errors: Vec::new(), @@ -150,6 +151,20 @@ impl Compiler { self.compile(Reader::new(Box::new(std::io::Cursor::new(src.to_owned())))) } + /** Register a static value within a compiler instance. + + This avoids that the compiler produces multiple results pointing to effectively the same values + (althought they are different objects, but the same value) + */ + pub(in crate::compiler) fn register_static(&mut self, value: RefValue) -> ImlValue { + if let Some(value) = self.statics.get(&value) { + ImlValue::Value(value.clone()) + } else { + self.statics.insert(value.clone()); + ImlValue::Value(value) + } + } + /// Tries to resolves open usages from the current scope pub(in crate::compiler) fn resolve(&mut self) { if let Scope::Parselet { usage_start, .. } | Scope::Block { usage_start, .. } = @@ -175,8 +190,8 @@ impl Compiler { 0, Scope::Parselet { usage_start: self.usages.len(), - variables: HashMap::new(), - constants: HashMap::new(), + variables: IndexMap::new(), + constants: IndexMap::new(), temporaries: Vec::new(), locals: 0, begin: Vec::new(), @@ -192,7 +207,7 @@ impl Compiler { 0, Scope::Block { usage_start: self.usages.len(), - constants: HashMap::new(), + constants: IndexMap::new(), }, ) } diff --git a/src/compiler/iml/imlparselet.rs b/src/compiler/iml/imlparselet.rs index aa71d0bb..8395444f 100644 --- a/src/compiler/iml/imlparselet.rs +++ b/src/compiler/iml/imlparselet.rs @@ -20,6 +20,12 @@ pub(in crate::compiler) struct ImlParseletModel { pub body: ImlOp, // Body intermediate Operations } +impl ImlParseletModel { + pub fn id(&self) -> usize { + self as *const ImlParseletModel as usize + } +} + // ImlParselet // ---------------------------------------------------------------------------- @@ -70,7 +76,7 @@ impl ImlParselet { let model = self.model.borrow(); Parselet::new( - self.name.clone(), + Some(format!("{}", self)), None, self.severity, model @@ -123,7 +129,7 @@ impl std::fmt::Display for ImlParselet { impl std::cmp::PartialEq for ImlParselet { // It satisfies to just compare the parselet's memory address for equality fn eq(&self, other: &Self) -> bool { - self.id() == other.id() + self.model.borrow().id() == other.model.borrow().id() && self.constants == other.constants } } diff --git a/src/compiler/iml/imlprogram.rs b/src/compiler/iml/imlprogram.rs index 2c6548bc..eb874669 100644 --- a/src/compiler/iml/imlprogram.rs +++ b/src/compiler/iml/imlprogram.rs @@ -40,7 +40,7 @@ impl ImlProgram { Some(idx) => Ok(idx), } } - _ => Err(()), // Cannot register unresolved value + _ => Err(()), // Cannot register this kind of value } } diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index eaed827e..21061ab7 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -3,10 +3,10 @@ use super::*; use crate::compiler::Compiler; use crate::reader::Offset; use crate::utils; -use crate::value::{Object, RefValue, Value}; +use crate::value; +use crate::value::{Object, RefValue}; use crate::Error; use indexmap::IndexMap; -use num::ToPrimitive; use std::cell::RefCell; use std::rc::Rc; @@ -24,6 +24,7 @@ pub(in crate::compiler) enum ImlValue { Shared(Rc>), // Resolved: static + Op(Op), // Compile-time push operation (built-in static value) Value(RefValue), // Compile-time static value Parselet(ImlSharedParselet), // Parselet instance @@ -201,8 +202,15 @@ impl ImlValue { false } + /// Turn ImlValue into RefValue pub fn into_refvalue(self) -> RefValue { match self { + Self::Op(Op::PushVoid) => value!(void), + Self::Op(Op::PushNull) => value!(null), + Self::Op(Op::PushTrue) => value!(true), + Self::Op(Op::PushFalse) => value!(false), + Self::Op(Op::Push0) => value!(0), + Self::Op(Op::Push1) => value!(1), Self::Value(value) => value, _ => unreachable!("{:?} cannot be unwrapped", self), } @@ -263,21 +271,8 @@ impl ImlValue { ImlValue::Shared(value) => { return value.borrow().compile(program, parselet, offset, call, ops) } - ImlValue::Value(value) => { - match &*value.borrow() { - // Some frequently used values have built-in push operations - Value::Void => ops.push(Op::PushVoid), - Value::Null => ops.push(Op::PushNull), - Value::True => ops.push(Op::PushTrue), - Value::False => ops.push(Op::PushFalse), - Value::Int(i) => match i.to_i64() { - Some(0) => ops.push(Op::Push0), - Some(1) => ops.push(Op::Push1), - _ => {} - }, - _ => {} - } - } + ImlValue::Op(op) => ops.push(op.clone()), + ImlValue::Value(_) => {} ImlValue::Local(addr) => ops.push(Op::LoadFast(*addr)), ImlValue::Global(addr) => ops.push(Op::LoadGlobal(*addr)), ImlValue::Name { diff --git a/src/reader.rs b/src/reader.rs index 896b1641..55a2baab 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -4,7 +4,7 @@ use std::io::prelude::*; use std::io::BufReader; /// Position inside a reader, with row and column counting. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] pub struct Offset { // todo: Hold source filename information as well in the future? pub offset: usize, diff --git a/src/vm/op.rs b/src/vm/op.rs index 4e588470..8b1b6207 100644 --- a/src/vm/op.rs +++ b/src/vm/op.rs @@ -14,7 +14,7 @@ Atomic operations. Specifies all atomic level VM code operations to run the Tokay VM. */ -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Hash, PartialEq, Eq)] pub(crate) enum Op { Nop, Offset(Box), // Source offset position for debugging From 39fe57d011e915585c20d9f30bb1a21e389002e0 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Mon, 22 May 2023 01:43:37 +0200 Subject: [PATCH 34/94] Use compiler statics' for atomics --- src/compiler/ast.rs | 19 +++++++++---------- src/compiler/compiler.rs | 13 ++++++++++++- src/compiler/iml/imlprogram.rs | 2 +- src/compiler/iml/imlvalue.rs | 29 ++++++++++++++++------------- src/value/refvalue.rs | 1 + 5 files changed, 39 insertions(+), 25 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index 7bb9550a..bf8ed3f2 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -79,17 +79,17 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { // Generate a value from the given code match emit { // Literals - "value_string" => compiler.register_static(node["value"].clone()), + "value_void" => ImlValue::Value(compiler.statics[0].clone()), + "value_null" => ImlValue::Value(compiler.statics[1].clone()), + "value_true" => ImlValue::Value(compiler.statics[2].clone()), + "value_false" => ImlValue::Value(compiler.statics[3].clone()), "value_integer" => match node["value"].to_i64() { - Ok(0) => ImlValue::Op(Op::Push0), - Ok(1) => ImlValue::Op(Op::Push1), + Ok(0) => ImlValue::Value(compiler.statics[4].clone()), + Ok(1) => ImlValue::Value(compiler.statics[5].clone()), _ => compiler.register_static(node["value"].clone()), }, "value_float" => compiler.register_static(node["value"].clone()), - "value_true" => ImlValue::Op(Op::PushTrue), - "value_false" => ImlValue::Op(Op::PushFalse), - "value_null" => ImlValue::Op(Op::PushNull), - "value_void" => ImlValue::Op(Op::PushVoid), + "value_string" => compiler.register_static(node["value"].clone()), // Tokens "value_token_match" | "value_token_touch" => { @@ -468,7 +468,7 @@ fn traverse_node_lvalue(compiler: &mut Compiler, node: &Dict, store: bool, hold: "capture_index" => { let children = children.object::().unwrap(); - let index = traverse_node_value(compiler, children).into_refvalue(); + let index = traverse_node_value(compiler, children).unwrap(); if store { if hold { @@ -965,8 +965,7 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { "capture_index" => { let children = node["children"].borrow(); - let index = - traverse_node_value(compiler, children.object::().unwrap()).into_refvalue(); + let index = traverse_node_value(compiler, children.object::().unwrap()).unwrap(); ImlOp::from(Op::LoadFastCapture(index.to_usize().unwrap())) } diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index 0d167b7a..ead09c1e 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -4,6 +4,7 @@ use super::*; use crate::builtin::Builtin; use crate::error::Error; use crate::reader::*; +use crate::value; use crate::value::{RefValue, Token}; use crate::vm::*; use indexmap::{IndexMap, IndexSet}; @@ -45,7 +46,6 @@ won't be removed and can be accessed on later calls. pub struct Compiler { parser: Option, // Internal Tokay parser pub debug: u8, // Compiler debug mode - pub(in crate::compiler) statics: IndexSet, // Static values collected during compilation pub(in crate::compiler) scopes: Vec, // Current compilation scopes pub(in crate::compiler) usages: Vec, // Unresolved values @@ -73,6 +73,17 @@ impl Compiler { errors: Vec::new(), }; + for value in [ + value!(void), + value!(null), + value!(true), + value!(false), + value!(0), + value!(1), + ] { + compiler.statics.insert(value); + } + // Compile with the default prelude if with_prelude { compiler diff --git a/src/compiler/iml/imlprogram.rs b/src/compiler/iml/imlprogram.rs index eb874669..e9baf6c6 100644 --- a/src/compiler/iml/imlprogram.rs +++ b/src/compiler/iml/imlprogram.rs @@ -107,7 +107,7 @@ impl ImlProgram { RefValue::from(parselet) } else { - iml.into_refvalue() + iml.unwrap() } }) .collect(); diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index 21061ab7..6db3fe0f 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -3,10 +3,10 @@ use super::*; use crate::compiler::Compiler; use crate::reader::Offset; use crate::utils; -use crate::value; -use crate::value::{Object, RefValue}; +use crate::value::{Object, RefValue, Value}; use crate::Error; use indexmap::IndexMap; +use num::ToPrimitive; use std::cell::RefCell; use std::rc::Rc; @@ -24,7 +24,6 @@ pub(in crate::compiler) enum ImlValue { Shared(Rc>), // Resolved: static - Op(Op), // Compile-time push operation (built-in static value) Value(RefValue), // Compile-time static value Parselet(ImlSharedParselet), // Parselet instance @@ -203,14 +202,8 @@ impl ImlValue { } /// Turn ImlValue into RefValue - pub fn into_refvalue(self) -> RefValue { + pub fn unwrap(self) -> RefValue { match self { - Self::Op(Op::PushVoid) => value!(void), - Self::Op(Op::PushNull) => value!(null), - Self::Op(Op::PushTrue) => value!(true), - Self::Op(Op::PushFalse) => value!(false), - Self::Op(Op::Push0) => value!(0), - Self::Op(Op::Push1) => value!(1), Self::Value(value) => value, _ => unreachable!("{:?} cannot be unwrapped", self), } @@ -271,8 +264,18 @@ impl ImlValue { ImlValue::Shared(value) => { return value.borrow().compile(program, parselet, offset, call, ops) } - ImlValue::Op(op) => ops.push(op.clone()), - ImlValue::Value(_) => {} + ImlValue::Value(value) => match &*value.borrow() { + Value::Void => ops.push(Op::PushVoid), + Value::Null => ops.push(Op::PushNull), + Value::True => ops.push(Op::PushTrue), + Value::False => ops.push(Op::PushFalse), + Value::Int(i) => match i.to_i32() { + Some(0) => ops.push(Op::Push0), + Some(1) => ops.push(Op::Push1), + _ => {} + }, + _ => {} + }, ImlValue::Local(addr) => ops.push(Op::LoadFast(*addr)), ImlValue::Global(addr) => ops.push(Op::LoadGlobal(*addr)), ImlValue::Name { @@ -320,7 +323,7 @@ impl ImlValue { return; } } - _ => unreachable!(), + _ => unreachable!("{:?}", self), } // Check if something has been pushed before. diff --git a/src/value/refvalue.rs b/src/value/refvalue.rs index 9b5fec91..822da474 100644 --- a/src/value/refvalue.rs +++ b/src/value/refvalue.rs @@ -379,6 +379,7 @@ impl Object for RefValue { impl Hash for RefValue { fn hash(&self, state: &mut H) { match &*self.borrow() { + Value::Void => state.write_u8('V' as u8), Value::Null => state.write_u8('N' as u8), Value::True => state.write_u8('T' as u8), Value::False => state.write_u8('F' as u8), From 48210d41ed6167a3add856c7bfc83af2c7a79916 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Thu, 1 Jun 2023 12:29:47 +0200 Subject: [PATCH 35/94] Drafting Repeat

and improvements - This is a first draft of Repeat

, will be improved - context.debug holds individual debug level per context - Reject::Return is not necessary anymore --- src/prelude.tok | 19 +++++++++++++++++++ src/vm/context.rs | 14 ++++++++------ src/vm/mod.rs | 9 ++++----- src/vm/op.rs | 30 +++++++++++++++--------------- 4 files changed, 46 insertions(+), 26 deletions(-) diff --git a/src/prelude.tok b/src/prelude.tok index 8529cd8a..2de9f88c 100644 --- a/src/prelude.tok +++ b/src/prelude.tok @@ -1,4 +1,23 @@ # Tokay default prelude +Repeat : @

min=1, max=void { + P { + res += $1 + + if !max || res.len < max { + repeat + } + } + + end { + print("Repeat", res, res.len, min) + if res.len < min { + reject + } + + res + } +} + Number : Float | Int Token : Word | Number | AsciiPunctuation diff --git a/src/vm/context.rs b/src/vm/context.rs index 2975d280..a483ace7 100644 --- a/src/vm/context.rs +++ b/src/vm/context.rs @@ -40,6 +40,7 @@ pub struct Context<'program, 'parselet, 'runtime> { pub runtime: &'runtime mut Runtime, // Overall runtime pub depth: usize, // Recursion depth + pub debug: u8, // Debug level // Positions pub stack_start: usize, // Stack start (including locals and parameters) @@ -89,6 +90,7 @@ impl<'program, 'parselet, 'runtime> Context<'program, 'parselet, 'runtime> { // Create Context Self { + debug: runtime.debug, program, parselet, runtime, @@ -495,22 +497,22 @@ impl<'program, 'parselet, 'runtime> Context<'program, 'parselet, 'runtime> { /// Run the current context with the associated parselet pub fn run(&mut self, main: bool) -> Result { - if main { - return self.run_as_main(); - } - // Debugging - if self.runtime.debug < 3 { + if self.debug < 3 { if let Ok(inspect) = std::env::var("TOKAY_INSPECT") { for name in inspect.split(" ") { if name == self.parselet.name { - self.runtime.debug = 6; + self.debug = 6; break; } } } } + if main { + return self.run_as_main(); + } + // collected results (from repeated parselet) let mut retlist = List::new(); diff --git a/src/vm/mod.rs b/src/vm/mod.rs index 4f8e1bac..a8f4effd 100644 --- a/src/vm/mod.rs +++ b/src/vm/mod.rs @@ -74,12 +74,11 @@ impl From for Result { /// Representing the Err-value result on a branched run of the VM. #[derive(Debug, Clone)] pub enum Reject { - Next, // soft-reject, continue with next sequence - Skip, // soft-reject, skip consumed input and continue - Return, // hard-reject current parselet ('return'/'reject'-keyword) - Main, // hard-reject current parselet and exit to main scope ('escape'-keyword) + Next, // soft-reject, continue with next sequence + Skip, // soft-reject, skip consumed input and continue + Main, // hard-reject current parselet and exit to main scope ('escape'-keyword) Error(Box), //hard-reject with error message (runtime error) - // todo: Exit(u32) // stop entire program with exit code + // todo: Exit(u32) // stop entire program with exit code } impl From for Reject { diff --git a/src/vm/op.rs b/src/vm/op.rs index b38f71c1..3691ce85 100644 --- a/src/vm/op.rs +++ b/src/vm/op.rs @@ -139,10 +139,10 @@ impl Op { let op = &ops[ip]; // Debug - if context.runtime.debug == 3 { + if context.debug == 3 { context.log(&format!("{:03}:{:?}", ip, op)); - } else if context.runtime.debug > 3 { - if context.runtime.debug > 5 { + } else if context.debug > 3 { + if context.debug > 5 { // Skip any Nop-Operations if matches!(op, Op::Nop | Op::Offset(_)) { ip += 1; @@ -155,7 +155,7 @@ impl Op { dump(ops, context, ip); // Dump stack and frames - if context.runtime.debug > 4 { + if context.debug > 4 { context.log("--- Stack ---"); for i in 0..context.runtime.stack.len() { context.log(&format!(" {:03} {:?}", i, context.runtime.stack[i])); @@ -170,7 +170,7 @@ impl Op { } // Step-by-step - if context.runtime.debug > 5 { + if context.debug > 5 { let _ = io::stdin().read(&mut [0u8]).unwrap(); } } @@ -220,15 +220,12 @@ impl Op { Op::Collect => Ok(Accept::Push(context.collect( context.frame.capture_start, false, - context.runtime.debug > 5, + context.debug > 5, ))), Op::InCollect => { - let mut capture = context.collect( - context.frame.capture_start, - false, - context.runtime.debug > 5, - ); + let mut capture = + context.collect(context.frame.capture_start, false, context.debug > 5); if capture.get_severity() > 5 { capture.set_severity(5); @@ -369,7 +366,10 @@ impl Op { let value = context.pop(); Ok(Accept::Repeat(Some(value))) } - Op::Reject => Err(Reject::Return), + Op::Reject => { + state = Err(Reject::Next); + break; + } Op::LoadExit => { std::process::exit(context.pop().to_i64()? as i32); } @@ -388,7 +388,7 @@ impl Op { Op::CallOrCopy => { let value = context.pop(); - if false && context.runtime.debug > 3 { + if false && context.debug > 3 { println!( "CallOrCopy is_callable={:?} is_mutable={:?}", value.is_callable(true), @@ -710,7 +710,7 @@ impl Op { }; // Debug - if context.runtime.debug > 3 { + if context.debug > 3 { context.log(&format!("ip = {} state = {:?}", ip, state)); } @@ -749,7 +749,7 @@ impl Op { context.frame = context.frames.pop().unwrap(); } - if context.runtime.debug > 3 { + if context.debug > 3 { context.log(&format!("exit state = {:?}", state)); } From 7f2f893bde366d09cde88982b0da5d62e8507700 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Mon, 5 Jun 2023 19:40:58 +0200 Subject: [PATCH 36/94] Clarification of the `repeat` keyword Removing the possibility to repeat a parselet with a value. This clarifies the behavior of the `repeat`-keyword, as it is on the user on how to result of the parselet is handled. --- src/compiler/ast.rs | 6 ++-- src/compiler/parser.rs | 8 ----- src/compiler/tokay.tok | 2 +- src/vm/context.rs | 73 ++++++++------------------------------- src/vm/mod.rs | 13 +++---- src/vm/op.rs | 7 +--- tests/parselet_repeat.tok | 9 +++-- 7 files changed, 30 insertions(+), 88 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index bf8ed3f2..e8cb6098 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -1157,7 +1157,7 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { let mut ops = Vec::new(); let op = match parts[1] { - "accept" | "break" | "exit" | "push" | "repeat" => { + "accept" | "break" | "exit" | "push" => { if parts[1] == "break" && !compiler.loop_check() { compiler.errors.push(Error::new( traverse_node_offset(node), @@ -1178,7 +1178,6 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { "break" => Op::LoadBreak.into(), "exit" => Op::LoadExit.into(), "push" => Op::LoadPush.into(), - "repeat" => Op::LoadRepeat.into(), _ => unreachable!(), } } else { @@ -1187,7 +1186,6 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { "break" => Op::Break.into(), "exit" => Op::Exit.into(), "push" => Op::Push.into(), - "repeat" => Op::Repeat.into(), _ => unreachable!(), } } @@ -1210,6 +1208,8 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { "reject" => Op::Reject.into(), + "repeat" => Op::Repeat.into(), + "unary" => { let children = node["children"].borrow(); let children = children.object::().unwrap(); diff --git a/src/compiler/parser.rs b/src/compiler/parser.rs index d95d7435..3637fa4d 100644 --- a/src/compiler/parser.rs +++ b/src/compiler/parser.rs @@ -6699,14 +6699,6 @@ impl Parser { "emit" => "identifier", "value" => "_standalone_" ])), - (value!([ - "emit" => "op_mod_opt", - "children" => - (value!([ - "emit" => "identifier", - "value" => "Expression" - ])) - ])), (value!([ "emit" => "call", "children" => diff --git a/src/compiler/tokay.tok b/src/compiler/tokay.tok index 8de51a93..7bb0815c 100644 --- a/src/compiler/tokay.tok +++ b/src/compiler/tokay.tok @@ -368,7 +368,7 @@ Statement : @{ 'next' _standalone_ ast("op_next") 'push' _standalone_ Expression? ast("op_push") 'reject' _standalone_ ast("op_reject") - 'repeat' _standalone_ Expression? ast("op_repeat") + 'repeat' _standalone_ ast("op_repeat") 'return' _standalone_ Expression? ast("op_accept") Expression } diff --git a/src/vm/context.rs b/src/vm/context.rs index a483ace7..544baeab 100644 --- a/src/vm/context.rs +++ b/src/vm/context.rs @@ -513,9 +513,6 @@ impl<'program, 'parselet, 'runtime> Context<'program, 'parselet, 'runtime> { return self.run_as_main(); } - // collected results (from repeated parselet) - let mut retlist = List::new(); - // Begin let mut ret = match self.execute(&self.parselet.begin) { Ok(Accept::Next) | Err(Reject::Skip) => Capture::Empty, @@ -523,11 +520,7 @@ impl<'program, 'parselet, 'runtime> Context<'program, 'parselet, 'runtime> { self.reset(Some(self.runtime.reader.tell())); capture } - Ok(Accept::Repeat(value)) => { - if let Some(value) = value { - retlist.push(value); - } - + Ok(Accept::Repeat) => { self.reset(Some(self.runtime.reader.tell())); Capture::Empty } @@ -542,11 +535,7 @@ impl<'program, 'parselet, 'runtime> Context<'program, 'parselet, 'runtime> { Err(Reject::Skip) => {} Ok(Accept::Next) => break ret, Ok(Accept::Push(capture)) => break capture, - Ok(Accept::Repeat(value)) => { - if let Some(value) = value { - retlist.push(value); - } - } + Ok(Accept::Repeat) => {} Ok(accept) => return Ok(accept.into_push(self.parselet.severity)), Err(Reject::Next) if !first => break Capture::Empty, other => return other, @@ -559,32 +548,12 @@ impl<'program, 'parselet, 'runtime> Context<'program, 'parselet, 'runtime> { // End ret = match self.execute(&self.parselet.end) { - Ok(Accept::Next) | Err(Reject::Skip) => ret, + Ok(Accept::Next) | Err(Reject::Skip) | Ok(Accept::Repeat) => ret, Ok(Accept::Push(capture)) => capture, - Ok(Accept::Repeat(value)) => { - if let Some(value) = value { - retlist.push(value); - } - - ret - } Ok(accept) => return Ok(accept.into_push(self.parselet.severity)), other => return other, }; - // retlist has higher priority than ret - if !retlist.is_empty() { - ret = Capture::Value( - if retlist.len() > 1 { - RefValue::from(retlist) - } else { - retlist.pop().unwrap() - }, - None, - self.parselet.severity, - ); - } - Ok(Accept::Push(ret).into_push(self.parselet.severity)) } @@ -595,19 +564,15 @@ impl<'program, 'parselet, 'runtime> Context<'program, 'parselet, 'runtime> { */ fn run_as_main(&mut self) -> Result { // collected results - let mut retlist = List::new(); + let mut results = List::new(); // Begin match self.execute(&self.parselet.begin) { Ok(Accept::Next) | Err(Reject::Skip) | Ok(Accept::Push(Capture::Empty)) => {} Ok(Accept::Push(mut capture)) => { - retlist.push(capture.extract(&self.runtime.reader)); - } - Ok(Accept::Repeat(value)) => { - if let Some(value) = value { - retlist.push(value); - } + results.push(capture.extract(&self.runtime.reader)); } + Ok(Accept::Repeat) => {} Ok(accept) => return Ok(accept.into_push(self.parselet.severity)), other => return other, }; @@ -622,13 +587,9 @@ impl<'program, 'parselet, 'runtime> Context<'program, 'parselet, 'runtime> { | Ok(Accept::Next) | Ok(Accept::Push(Capture::Empty)) => {} Ok(Accept::Push(mut capture)) => { - retlist.push(capture.extract(&self.runtime.reader)); - } - Ok(Accept::Repeat(value)) => { - if let Some(value) = value { - retlist.push(value); - } + results.push(capture.extract(&self.runtime.reader)); } + Ok(Accept::Repeat) => {} Ok(accept) => return Ok(accept.into_push(self.parselet.severity)), other => return other, } @@ -651,24 +612,20 @@ impl<'program, 'parselet, 'runtime> Context<'program, 'parselet, 'runtime> { match self.execute(&self.parselet.end) { Ok(Accept::Next) | Err(Reject::Skip) | Ok(Accept::Push(Capture::Empty)) => {} Ok(Accept::Push(mut capture)) => { - retlist.push(capture.extract(&self.runtime.reader)); - } - Ok(Accept::Repeat(value)) => { - if let Some(value) = value { - retlist.push(value); - } + results.push(capture.extract(&self.runtime.reader)); } + Ok(Accept::Repeat) => {} Ok(accept) => return Ok(accept.into_push(self.parselet.severity)), other => return other, }; - // retlist has higher priority than ret - if !retlist.is_empty() { + // results has higher priority than ret + if !results.is_empty() { Ok(Accept::Push(Capture::Value( - if retlist.len() > 1 { - RefValue::from(retlist) + if results.len() > 1 { + RefValue::from(results) } else { - retlist.pop().unwrap() + results.pop().unwrap() }, None, self.parselet.severity, diff --git a/src/vm/mod.rs b/src/vm/mod.rs index a8f4effd..f78d9d25 100644 --- a/src/vm/mod.rs +++ b/src/vm/mod.rs @@ -22,7 +22,7 @@ pub enum Accept { Next, // soft-accept, run next instructions at incremented ip Hold, // soft-accept, run next instruction at current ip Push(Capture), // soft-accept, push a capture (also 'push'-keyword) - Repeat(Option), // hard-accept, repeat entire parselet ('repeat'-keyword) + Repeat, // hard-accept, repeat parselet on current position ('repeat'-keyword) Return(Option), // hard-accept, return/accept entire parselet ('return/accept'-keyword) } @@ -37,13 +37,8 @@ impl Accept { } Self::Push(capture) } - Self::Repeat(value) | Self::Return(value) => { - if let Some(value) = value { - Self::Push(Capture::Value(value, None, severity)) - } else { - Self::Push(Capture::Empty) - } - } + Self::Repeat | Self::Return(None) => Self::Push(Capture::Empty), + Self::Return(Some(value)) => Self::Push(Capture::Value(value, None, severity)), } } @@ -51,7 +46,7 @@ impl Accept { pub fn into_refvalue(self) -> RefValue { match self { Self::Push(capture) => capture.get_value(), - Self::Repeat(Some(value)) | Self::Return(Some(value)) => value, + Self::Return(Some(value)) => value, _ => tokay::value!(void), } } diff --git a/src/vm/op.rs b/src/vm/op.rs index 3691ce85..b4b1f7c7 100644 --- a/src/vm/op.rs +++ b/src/vm/op.rs @@ -52,7 +52,6 @@ pub(crate) enum Op { Accept, // Ok(Accept::Return) LoadAccept, // Ok(Accept::Return) with value Repeat, // Ok(Accept::Repeat) - LoadRepeat, // Ok(Accept::Repeat) with value Reject, // Ok(Err::Reject) LoadExit, // Exit with errorcode Exit, // Exit with 0 @@ -361,11 +360,7 @@ impl Op { let value = context.pop(); Ok(Accept::Return(Some(value))) } - Op::Repeat => Ok(Accept::Repeat(None)), - Op::LoadRepeat => { - let value = context.pop(); - Ok(Accept::Repeat(Some(value))) - } + Op::Repeat => Ok(Accept::Repeat), Op::Reject => { state = Err(Reject::Next); break; diff --git a/tests/parselet_repeat.tok b/tests/parselet_repeat.tok index b3c4f21b..acd10473 100644 --- a/tests/parselet_repeat.tok +++ b/tests/parselet_repeat.tok @@ -1,9 +1,12 @@ P: @{ - 'a' repeat $1 + begin count = 0 + 'x' count++ repeat + end count } P + #--- -#aaaa +#xaxxx #--- -#("a", "a", "a", "a") +#(1, 3) From 68fc8b67ad7db5368c158963e687729ae8ec0f9d Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Wed, 7 Jun 2023 01:55:12 +0200 Subject: [PATCH 37/94] `Self` and `self`, first draft of `List` - implements the keywords `Self` and `self` to self-reference current parselet - implements a drafted `List` builtin to parse separated lists --- src/compiler/ast.rs | 12 ++++--- src/compiler/iml/imlop.rs | 36 +++++++++++--------- src/compiler/iml/imlparselet.rs | 8 ++--- src/compiler/iml/imlprogram.rs | 16 +++++---- src/compiler/iml/imlvalue.rs | 23 ++++++++++--- src/compiler/parser.rs | 60 +++++++++++++++++++++++++++++++++ src/compiler/tokay.tok | 4 ++- src/prelude.tok | 6 ++++ src/vm/program.rs | 2 +- 9 files changed, 132 insertions(+), 35 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index e8cb6098..cc100ede 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -16,10 +16,12 @@ pub fn identifier_is_valid(ident: &str) -> Result<(), Error> { match ident { "Char" | "Chars" | "accept" | "begin" | "break" | "continue" | "else" | "end" | "exit" | "expect" | "false" | "for" | "if" | "in" | "loop" | "next" | "not" | "null" | "peek" - | "push" | "reject" | "repeat" | "return" | "true" | "void" => Err(Error::new( - None, - format!("Expected identifier, found reserved word '{}'", ident), - )), + | "push" | "reject" | "repeat" | "return" | "self" | "Self" | "true" | "void" => { + Err(Error::new( + None, + format!("Expected identifier, found reserved word '{}'", ident), + )) + } _ => Ok(()), } } @@ -83,6 +85,7 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { "value_null" => ImlValue::Value(compiler.statics[1].clone()), "value_true" => ImlValue::Value(compiler.statics[2].clone()), "value_false" => ImlValue::Value(compiler.statics[3].clone()), + "value_self" => ImlValue::This(false), "value_integer" => match node["value"].to_i64() { Ok(0) => ImlValue::Value(compiler.statics[4].clone()), Ok(1) => ImlValue::Value(compiler.statics[5].clone()), @@ -92,6 +95,7 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { "value_string" => compiler.register_static(node["value"].clone()), // Tokens + "value_token_self" => ImlValue::This(true), "value_token_match" | "value_token_touch" => { let mut value = node["value"].to_string(); diff --git a/src/compiler/iml/imlop.rs b/src/compiler/iml/imlop.rs index 1438e8ae..44977f5d 100644 --- a/src/compiler/iml/imlop.rs +++ b/src/compiler/iml/imlop.rs @@ -273,9 +273,14 @@ impl ImlOp { } /// Compile ImlOp construct into Op instructions of the resulting Tokay VM program - pub fn compile_to_vec(&self, program: &mut ImlProgram, parselet: &ImlParselet) -> Vec { + pub fn compile_to_vec( + &self, + program: &mut ImlProgram, + parselet: &ImlParselet, + this: usize, + ) -> Vec { let mut ops = Vec::new(); - self.compile(program, parselet, &mut ops); + self.compile(program, parselet, this, &mut ops); ops } @@ -284,6 +289,7 @@ impl ImlOp { &self, program: &mut ImlProgram, parselet: &ImlParselet, + this: usize, ops: &mut Vec, ) -> usize { let start = ops.len(); @@ -292,14 +298,14 @@ impl ImlOp { ImlOp::Nop => {} ImlOp::Op(op) => ops.push(op.clone()), ImlOp::Load { offset, target } => { - target.compile(program, parselet, &offset, None, ops); + target.compile(program, parselet, this, &offset, None, ops); } ImlOp::Call { offset, target, args, } => { - target.compile(program, parselet, &offset, Some(*args), ops); + target.compile(program, parselet, this, &offset, Some(*args), ops); } ImlOp::Alt { alts } => { let mut ret = Vec::new(); @@ -309,7 +315,7 @@ impl ImlOp { while let Some(item) = iter.next() { let mut alt = Vec::new(); - item.compile(program, parselet, &mut alt); + item.compile(program, parselet, this, &mut alt); // When branch has more than one item, Frame it. if iter.len() > 0 { @@ -350,7 +356,7 @@ impl ImlOp { } ImlOp::Seq { seq, collection } => { for item in seq.iter() { - item.compile(program, parselet, ops); + item.compile(program, parselet, this, ops); } // Check if the sequence exists of more than one operational instruction @@ -385,13 +391,13 @@ impl ImlOp { } // Then-part - let mut jump = then_part.compile(program, parselet, ops) + 1; + let mut jump = then_part.compile(program, parselet, this, ops) + 1; if !*peek { let mut else_ops = Vec::new(); // Else-part - if else_part.compile(program, parselet, &mut else_ops) > 0 { + if else_part.compile(program, parselet, this, &mut else_ops) > 0 { ops.push(Op::Forward(else_ops.len() + 1)); jump += 1; ops.extend(else_ops); @@ -416,9 +422,9 @@ impl ImlOp { let consuming: Option = None; // fixme: Currently not sure if this is an issue. let mut repeat = Vec::new(); - initial.compile(program, parselet, ops); + initial.compile(program, parselet, this, ops); - if condition.compile(program, parselet, &mut repeat) > 0 { + if condition.compile(program, parselet, this, &mut repeat) > 0 { if *iterator { repeat.push(Op::ForwardIfNotVoid(2)); } else { @@ -428,7 +434,7 @@ impl ImlOp { repeat.push(Op::Break); } - body.compile(program, parselet, &mut repeat); + body.compile(program, parselet, this, &mut repeat); let len = repeat.len() + if consuming.is_some() { 3 } else { 2 }; ops.push(Op::Loop(len)); @@ -448,7 +454,7 @@ impl ImlOp { // DEPRECATED BELOW!!! ImlOp::Expect { body, msg } => { let mut expect = Vec::new(); - body.compile(program, parselet, &mut expect); + body.compile(program, parselet, this, &mut expect); ops.push(Op::Frame(expect.len() + 2)); @@ -465,7 +471,7 @@ impl ImlOp { } ImlOp::Not { body } => { let mut body_ops = Vec::new(); - let body_len = body.compile(program, parselet, &mut body_ops); + let body_len = body.compile(program, parselet, this, &mut body_ops); ops.push(Op::Frame(body_len + 3)); ops.extend(body_ops); ops.push(Op::Close); @@ -474,13 +480,13 @@ impl ImlOp { } ImlOp::Peek { body } => { ops.push(Op::Frame(0)); - body.compile(program, parselet, ops); + body.compile(program, parselet, this, ops); ops.push(Op::Reset); ops.push(Op::Close); } ImlOp::Repeat { body, min, max } => { let mut body_ops = Vec::new(); - let body_len = body.compile(program, parselet, &mut body_ops); + let body_len = body.compile(program, parselet, this, &mut body_ops); match (min, max) { (0, 0) => { diff --git a/src/compiler/iml/imlparselet.rs b/src/compiler/iml/imlparselet.rs index 8395444f..e7cf74a6 100644 --- a/src/compiler/iml/imlparselet.rs +++ b/src/compiler/iml/imlparselet.rs @@ -72,7 +72,7 @@ impl ImlParselet { self as *const ImlParselet as usize } - pub fn compile(&self, program: &mut ImlProgram) -> Parselet { + pub fn compile(&self, program: &mut ImlProgram, this: usize) -> Parselet { let model = self.model.borrow(); Parselet::new( @@ -95,9 +95,9 @@ impl ImlParselet { }) .collect(), model.locals, - model.begin.compile_to_vec(program, self), - model.end.compile_to_vec(program, self), - model.body.compile_to_vec(program, self), + model.begin.compile_to_vec(program, self, this), + model.end.compile_to_vec(program, self, this), + model.body.compile_to_vec(program, self, this), ) } } diff --git a/src/compiler/iml/imlprogram.rs b/src/compiler/iml/imlprogram.rs index e9baf6c6..4772dca3 100644 --- a/src/compiler/iml/imlprogram.rs +++ b/src/compiler/iml/imlprogram.rs @@ -54,16 +54,16 @@ impl ImlProgram { let mut finalize = Vec::new(); // list of consuming parselets required to be finalized // Loop until end of statics is reached - let mut i = 0; + let mut idx = 0; // self.statics grows inside of this while loop, therefore this condition. - while i < self.statics.len() { + while idx < self.statics.len() { // Pick only intermediate parselets, other static values are directly moved - let outer = match self.statics.get_index_mut(i).unwrap() { + let outer = match self.statics.get_index_mut(idx).unwrap() { (_, Some(_)) => unreachable!(), // may not exist! (ImlValue::Parselet(parselet), None) => parselet.clone(), _ => { - i += 1; + idx += 1; continue; } }; @@ -79,9 +79,9 @@ impl ImlProgram { } // Compile VM parselet from intermediate parselet - *self.statics.get_index_mut(i).unwrap().1 = Some(parselet.compile(&mut self)); + *self.statics.get_index_mut(idx).unwrap().1 = Some(parselet.compile(&mut self, idx)); - i += 1; + idx += 1; } let leftrec = self.finalize(finalize); @@ -150,6 +150,10 @@ impl ImlProgram { ImlValue::Shared(value) => { finalize_value(&*value.borrow(), current, visited, configs) } + ImlValue::This(_) => Some(Consumable { + leftrec: true, + nullable: false, + }), ImlValue::Parselet(parselet) => { match parselet.try_borrow() { // In case the parselet cannot be borrowed, it is left-recursive! diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index 6db3fe0f..250f63f2 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -24,6 +24,7 @@ pub(in crate::compiler) enum ImlValue { Shared(Rc>), // Resolved: static + This(bool), // current function (false) or parselet (true) Value(RefValue), // Compile-time static value Parselet(ImlSharedParselet), // Parselet instance @@ -146,7 +147,7 @@ impl ImlValue { compiler.errors.push(Error::new( args[0].0, // report first parameter format!( - "{} got too many generic arguments ({} in total, expected {})", + "{} got too many generic arguments ({} in total, expected {})", target, new_constants.len() + args.len(), new_constants.len() @@ -214,6 +215,7 @@ impl ImlValue { pub fn is_callable(&self, without_arguments: bool) -> bool { match self { Self::Shared(value) => value.borrow().is_callable(without_arguments), + Self::This(_) => true, // fixme? Self::Value(value) => value.is_callable(without_arguments), Self::Parselet(parselet) => { let parselet = parselet.borrow(); @@ -237,6 +239,7 @@ impl ImlValue { pub fn is_consuming(&self) -> bool { match self { Self::Shared(value) => value.borrow().is_consuming(), + Self::This(consuming) => *consuming, Self::Value(value) => value.is_consuming(), Self::Parselet(parselet) => parselet.borrow().model.borrow().consuming, Self::Name { name, .. } => crate::utils::identifier_is_consumable(name), @@ -250,6 +253,7 @@ impl ImlValue { &self, program: &mut ImlProgram, parselet: &ImlParselet, + this: usize, offset: &Option, call: Option>, ops: &mut Vec, @@ -258,11 +262,14 @@ impl ImlValue { ops.push(Op::Offset(Box::new(*offset))); } + // Remember current ops start let start = ops.len(); match self { ImlValue::Shared(value) => { - return value.borrow().compile(program, parselet, offset, call, ops) + return value + .borrow() + .compile(program, parselet, this, offset, call, ops) } ImlValue::Value(value) => match &*value.borrow() { Value::Void => ops.push(Op::PushVoid), @@ -282,7 +289,9 @@ impl ImlValue { name, generic: true, .. - } => return parselet.constants[name].compile(program, parselet, offset, call, ops), + } => { + return parselet.constants[name].compile(program, parselet, this, offset, call, ops) + } ImlValue::Name { name, generic: false, @@ -299,6 +308,7 @@ impl ImlValue { return; } + ImlValue::This(_) => {} ImlValue::Parselet(parselet) => { // When value is a parselet, check for accepted constant configuration let parselet = parselet.borrow(); @@ -328,7 +338,10 @@ impl ImlValue { // Check if something has been pushed before. if start == ops.len() { - let idx = program.register(self).unwrap(); + let idx = match self { + ImlValue::This(_) => this, + _ => program.register(self).unwrap(), + }; match call { // Load @@ -366,6 +379,8 @@ impl std::fmt::Display for ImlValue { match self { Self::Void => write!(f, "void"), Self::Shared(value) => value.borrow().fmt(f), + Self::This(true) => write!(f, "Self"), + Self::This(false) => write!(f, "self"), Self::Value(value) => write!(f, "{}", value.repr()), Self::Parselet(parselet) => write!(f, "{}", parselet), Self::Name { name, .. } => write!(f, "{}", name), diff --git a/src/compiler/parser.rs b/src/compiler/parser.rs index 3637fa4d..11f1d763 100644 --- a/src/compiler/parser.rs +++ b/src/compiler/parser.rs @@ -3752,6 +3752,34 @@ impl Parser { ])) ])) ])) + ])), + (value!([ + "emit" => "sequence", + "children" => + (value!([ + (value!([ + "emit" => "value_token_touch", + "value" => "Self" + ])), + (value!([ + "emit" => "call", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "ast" + ])), + (value!([ + "emit" => "callarg", + "children" => + (value!([ + "emit" => "value_string", + "value" => "value_token_self" + ])) + ])) + ])) + ])) + ])) ])) ])) ])) @@ -4273,6 +4301,38 @@ impl Parser { ])) ])) ])), + (value!([ + "emit" => "sequence", + "children" => + (value!([ + (value!([ + "emit" => "value_token_touch", + "value" => "self" + ])), + (value!([ + "emit" => "identifier", + "value" => "_standalone_" + ])), + (value!([ + "emit" => "call", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "ast" + ])), + (value!([ + "emit" => "callarg", + "children" => + (value!([ + "emit" => "value_string", + "value" => "value_self" + ])) + ])) + ])) + ])) + ])) + ])), (value!([ "emit" => "sequence", "children" => diff --git a/src/compiler/tokay.tok b/src/compiler/tokay.tok index 7bb0815c..99f81813 100644 --- a/src/compiler/tokay.tok +++ b/src/compiler/tokay.tok @@ -14,7 +14,7 @@ ___ : (T_EOL _)* # optional line-breaks followed by whitespace _standalone_ : @{ # helper parselet to ensure that identifiers stand alone - # fixme: When generic parselets are available, this can be replaced by a Standalone invocation + # fixme: When generic parselets are available, this can be replaced by a Keyword

invocation peek not Char _ _ } @@ -238,6 +238,7 @@ TokenLiteral : @{ 'Chars' ast("value_token_anys") 'Char' '<' Ccl '>' ast("value_token_ccl") 'Char' ast("value_token_any") + 'Self' ast("value_token_self") } TokenAtom : @{ @@ -268,6 +269,7 @@ Literal : @{ 'false' _standalone_ ast("value_false") 'void' _standalone_ ast("value_void") 'null' _standalone_ ast("value_null") + 'self' _standalone_ ast("value_self") T_String ast("value_string") T_Float T_Integer diff --git a/src/prelude.tok b/src/prelude.tok index 2de9f88c..e9faf995 100644 --- a/src/prelude.tok +++ b/src/prelude.tok @@ -19,5 +19,11 @@ Repeat : @

min=1, max=void { } } +List : @ { + Self Separator P $1 + $3 + if empty Self Separator # allows for trailing Separator + P ($1, ) +} + Number : Float | Int Token : Word | Number | AsciiPunctuation diff --git a/src/vm/program.rs b/src/vm/program.rs index baee3dcc..012d3f73 100644 --- a/src/vm/program.rs +++ b/src/vm/program.rs @@ -53,7 +53,7 @@ impl Program { Ok(Some(value.clone())) } } - Ok(_) => Ok(None), + Ok(_) | Err(Reject::Next) => Ok(None), Err(Reject::Error(error)) => Err(*error), Err(other) => Err(Error::new(None, format!("Runtime error {:?}", other))), } From d966e2aebb5c3fb7da7fdc95b0bcaf16d24d24e4 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Wed, 7 Jun 2023 22:23:46 +0200 Subject: [PATCH 38/94] Implemented `Self` usage from todo in `tokay.tok` This fixes #31 as well already. --- src/compiler/parser.rs | 18 +++++++++--------- src/compiler/tokay.tok | 12 ++++++------ src/prelude.tok | 2 +- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/compiler/parser.rs b/src/compiler/parser.rs index 11f1d763..436cc1ca 100644 --- a/src/compiler/parser.rs +++ b/src/compiler/parser.rs @@ -3924,7 +3924,7 @@ impl Parser { (value!([ (value!([ "emit" => "identifier", - "value" => "Token1" + "value" => "Token" ])), (value!([ "emit" => "value_parselet", @@ -4049,8 +4049,8 @@ impl Parser { "emit" => "op_mod_expect", "children" => (value!([ - "emit" => "identifier", - "value" => "Token1" + "emit" => "value_token_self", + "value" => "Self" ])) ])), (value!([ @@ -4089,8 +4089,8 @@ impl Parser { "emit" => "op_mod_expect", "children" => (value!([ - "emit" => "identifier", - "value" => "Token1" + "emit" => "value_token_self", + "value" => "Self" ])) ])), (value!([ @@ -4129,8 +4129,8 @@ impl Parser { "emit" => "op_mod_expect", "children" => (value!([ - "emit" => "identifier", - "value" => "Token1" + "emit" => "value_token_self", + "value" => "Self" ])) ])), (value!([ @@ -4425,7 +4425,7 @@ impl Parser { ])), (value!([ "emit" => "identifier", - "value" => "Token1" + "value" => "Token" ])), (value!([ "emit" => "sequence", @@ -7379,7 +7379,7 @@ impl Parser { (value!([ (value!([ "emit" => "identifier", - "value" => "Token1" + "value" => "Token" ])), (value!([ "emit" => "identifier", diff --git a/src/compiler/tokay.tok b/src/compiler/tokay.tok index 99f81813..041a56e1 100644 --- a/src/compiler/tokay.tok +++ b/src/compiler/tokay.tok @@ -250,14 +250,14 @@ TokenAtom : @{ ParseletInstance } -Token1 : @{ # todo: Token1 can be renamed back to Token again when #31 is fixed and Self is used. +Token : @{ TokenAtom '+' ast("op_mod_pos") TokenAtom '*' ast("op_mod_kle") TokenAtom '?' ast("op_mod_opt") TokenAtom - 'peek' _standalone_ expect Token1 ast("op_mod_peek") - 'not' _standalone_ expect Token1 ast("op_mod_not") - 'expect' _standalone_ expect Token1 ast("op_mod_expect") + 'peek' _standalone_ expect Self ast("op_mod_peek") + 'not' _standalone_ expect Self ast("op_mod_not") + 'expect' _standalone_ expect Self ast("op_mod_expect") } # Expression & Flow @@ -280,7 +280,7 @@ Literal : @{ Atomic : @{ '(' _ ___ HoldExpression ___ ')' Literal - Token1 + Token 'if' _standalone_ expect HoldExpression ___ expect Statement (___ 'else' _standalone_ ___ expect Statement)? ast("op_if") 'for' _standalone_ expect Lvalue _ expect 'in' _standalone_ expect Expression ___ expect Statement ast("op_for") 'loop' _standalone_ HoldExpression ___ Block ast("op_loop") @@ -402,7 +402,7 @@ Instruction : @{ 'end' _standalone_ Sequences expect T_EOL ast("end") T_Identifier _ ':' _ { Literal _ peek T_EOL - Token1 _ peek T_EOL + Token _ peek T_EOL Sequences } expect T_EOL ast("constant") Statement T_EOL diff --git a/src/prelude.tok b/src/prelude.tok index e9faf995..ea7c12b9 100644 --- a/src/prelude.tok +++ b/src/prelude.tok @@ -21,7 +21,7 @@ Repeat : @

min=1, max=void { List : @ { Self Separator P $1 + $3 - if empty Self Separator # allows for trailing Separator + if empty (Self Separator) # allows for trailing Separator P ($1, ) } From bf0ca03ac455aeeb432bee0e78622482bbb36594 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Sun, 11 Jun 2023 22:23:37 +0200 Subject: [PATCH 39/94] *-deref (don't call) operator + prelude Expect This already fixes #17 --- src/compiler/ast.rs | 7 +++++++ src/compiler/parser.rs | 36 ++++++++++++++++++++++++++++++++++++ src/compiler/tokay.tok | 1 + src/prelude.tok | 16 +++++++--------- 4 files changed, 51 insertions(+), 9 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index cc100ede..e173b0e3 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -1214,6 +1214,13 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { "repeat" => Op::Repeat.into(), + "deref" => { + let children = node["children"].borrow(); + let children = children.object::().unwrap(); + + traverse_node_rvalue(compiler, children, Rvalue::Load) + } + "unary" => { let children = node["children"].borrow(); let children = children.object::().unwrap(); diff --git a/src/compiler/parser.rs b/src/compiler/parser.rs index 436cc1ca..0fcfcbfc 100644 --- a/src/compiler/parser.rs +++ b/src/compiler/parser.rs @@ -4911,6 +4911,42 @@ impl Parser { ])) ])) ])), + (value!([ + "emit" => "sequence", + "children" => + (value!([ + (value!([ + "emit" => "value_token_touch", + "value" => "*" + ])), + (value!([ + "emit" => "identifier", + "value" => "_" + ])), + (value!([ + "emit" => "identifier", + "value" => "Unary" + ])), + (value!([ + "emit" => "call", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "ast" + ])), + (value!([ + "emit" => "callarg", + "children" => + (value!([ + "emit" => "value_string", + "value" => "op_deref" + ])) + ])) + ])) + ])) + ])) + ])), (value!([ "emit" => "sequence", "children" => diff --git a/src/compiler/tokay.tok b/src/compiler/tokay.tok index 041a56e1..1a5d283e 100644 --- a/src/compiler/tokay.tok +++ b/src/compiler/tokay.tok @@ -299,6 +299,7 @@ Rvalue : @{ Unary : @{ '-' not '-' _ Unary ast("op_unary_neg") '!' _ Unary ast("op_unary_not") + '*' _ Unary ast("op_deref") Rvalue _ } diff --git a/src/prelude.tok b/src/prelude.tok index ea7c12b9..42b596a9 100644 --- a/src/prelude.tok +++ b/src/prelude.tok @@ -3,18 +3,11 @@ Repeat : @

min=1, max=void { P { res += $1 - - if !max || res.len < max { - repeat - } + if !max || res.len < max repeat } end { - print("Repeat", res, res.len, min) - if res.len < min { - reject - } - + if res.len < min reject res } } @@ -25,5 +18,10 @@ List : @ { P ($1, ) } +Expect : @

msg=void { + P + error(msg || "Expecting " + *P) +} + Number : Float | Int Token : Word | Number | AsciiPunctuation From 305fa23a8a4cbe175c649523acff20276c1e0a77 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Fri, 16 Jun 2023 09:27:05 +0200 Subject: [PATCH 40/94] Implement `reset`-keyword, `Not

`, `Peek

` --- src/compiler/ast.rs | 16 +++++++++------- src/compiler/parser.rs | 32 ++++++++++++++++++++++++++++++++ src/compiler/tokay.tok | 1 + src/prelude.tok | 9 +++++++++ 4 files changed, 51 insertions(+), 7 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index e173b0e3..f976513d 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -16,7 +16,7 @@ pub fn identifier_is_valid(ident: &str) -> Result<(), Error> { match ident { "Char" | "Chars" | "accept" | "begin" | "break" | "continue" | "else" | "end" | "exit" | "expect" | "false" | "for" | "if" | "in" | "loop" | "next" | "not" | "null" | "peek" - | "push" | "reject" | "repeat" | "return" | "self" | "Self" | "true" | "void" => { + | "push" | "reject" | "repeat" | "reset" | "return" | "self" | "Self" | "true" | "void" => { Err(Error::new( None, format!("Expected identifier, found reserved word '{}'", ident), @@ -1206,6 +1206,13 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { Op::Continue.into() } + "deref" => { + let children = node["children"].borrow(); + let children = children.object::().unwrap(); + + traverse_node_rvalue(compiler, children, Rvalue::Load) + } + "next" => Op::Next.into(), "nop" => ImlOp::Nop, @@ -1214,12 +1221,7 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { "repeat" => Op::Repeat.into(), - "deref" => { - let children = node["children"].borrow(); - let children = children.object::().unwrap(); - - traverse_node_rvalue(compiler, children, Rvalue::Load) - } + "reset" => Op::Reset.into(), "unary" => { let children = node["children"].borrow(); diff --git a/src/compiler/parser.rs b/src/compiler/parser.rs index 0fcfcbfc..39810696 100644 --- a/src/compiler/parser.rs +++ b/src/compiler/parser.rs @@ -6815,6 +6815,38 @@ impl Parser { ])) ])) ])), + (value!([ + "emit" => "sequence", + "children" => + (value!([ + (value!([ + "emit" => "value_token_touch", + "value" => "reset" + ])), + (value!([ + "emit" => "identifier", + "value" => "_standalone_" + ])), + (value!([ + "emit" => "call", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "ast" + ])), + (value!([ + "emit" => "callarg", + "children" => + (value!([ + "emit" => "value_string", + "value" => "op_reset" + ])) + ])) + ])) + ])) + ])) + ])), (value!([ "emit" => "sequence", "children" => diff --git a/src/compiler/tokay.tok b/src/compiler/tokay.tok index 1a5d283e..cec4f7c9 100644 --- a/src/compiler/tokay.tok +++ b/src/compiler/tokay.tok @@ -372,6 +372,7 @@ Statement : @{ 'push' _standalone_ Expression? ast("op_push") 'reject' _standalone_ ast("op_reject") 'repeat' _standalone_ ast("op_repeat") + 'reset' _standalone_ ast("op_reset") 'return' _standalone_ Expression? ast("op_accept") Expression } diff --git a/src/prelude.tok b/src/prelude.tok index 42b596a9..f2573f25 100644 --- a/src/prelude.tok +++ b/src/prelude.tok @@ -18,6 +18,15 @@ List : @ { P ($1, ) } +Not : @

{ + P reject + accept +} + +Peek : @

{ + P reset +} + Expect : @

msg=void { P error(msg || "Expecting " + *P) From b37d399fa2fa9f9df65af9c5dced79cbd9ef79d2 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Tue, 20 Jun 2023 20:44:41 +0200 Subject: [PATCH 41/94] Some improvements on ImlValue::Instance --- src/compiler/ast.rs | 43 +++++++++++------- src/compiler/compiler.rs | 16 ++++--- src/compiler/iml/imlop.rs | 38 +++++++++------- src/compiler/iml/imlvalue.rs | 86 +++++++++++------------------------- 4 files changed, 85 insertions(+), 98 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index f976513d..03544bf7 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -39,7 +39,7 @@ pub(in crate::compiler) fn traverse(compiler: &mut Compiler, ast: &RefValue) -> } else if let Some(dict) = ast.borrow().object::() { traverse_node_rvalue(compiler, dict, Rvalue::CallOrLoad) } else { - ImlOp::load(None, ImlValue::from(RefValue::from(ast.clone()))) + ImlOp::load(compiler, None, ImlValue::from(RefValue::from(ast.clone()))) } } @@ -290,7 +290,8 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { } let body = body.borrow(); - let body = traverse_node_rvalue(compiler, body.object::().unwrap(), Rvalue::Load); + let body = + traverse_node_rvalue(compiler, body.object::().unwrap(), Rvalue::CallOrLoad); let ret = compiler.parselet_pop( traverse_node_offset(node), @@ -709,9 +710,11 @@ fn traverse_node_rvalue(compiler: &mut Compiler, node: &Dict, mode: Rvalue) -> I let value = traverse_node_value(compiler, node); return match mode { - Rvalue::Load => ImlOp::load(offset, value), - Rvalue::CallOrLoad => ImlOp::call(offset, value, None), - Rvalue::Call(args, nargs) => ImlOp::call(offset, value, Some((args, nargs))), + Rvalue::Load => ImlOp::load(compiler, offset, value), + Rvalue::CallOrLoad => ImlOp::call(compiler, offset, value, None), + Rvalue::Call(args, nargs) => { + ImlOp::call(compiler, offset, value, Some((args, nargs))) + } }; } @@ -870,7 +873,7 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { body, ); - ImlOp::call(None, main, None) + ImlOp::call(compiler, None, main, None) } _ => body, } @@ -928,6 +931,7 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { let ident = ident.object::().unwrap().as_str(); ops.push(ImlOp::load( + compiler, traverse_node_offset(¶m), ImlValue::from(RefValue::from(ident)), )); @@ -1232,7 +1236,11 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { // Evaluate operation at compile-time if possible if let Ok(value) = res.get_evaluable_value() { if let Ok(value) = value.unary_op(parts[2]) { - return ImlOp::load(traverse_node_offset(node), ImlValue::from(value)); + return ImlOp::load( + compiler, + traverse_node_offset(node), + ImlValue::from(value), + ); } } @@ -1297,6 +1305,7 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { { if let Ok(value) = left.binary_op(right, parts[2]) { return ImlOp::load( + compiler, traverse_node_offset(node), ImlValue::from(value), ); @@ -1366,6 +1375,7 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { // mod_pos on Token::Char becomes Token::Chars "pos" | "kle" => { let mut chars = ImlOp::call( + compiler, traverse_node_offset(node), ImlValue::from(RefValue::from(Token::Chars(ccl.clone()))), None, @@ -1381,6 +1391,7 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { // mod_not on Token::Char becomes negated Token::Char "not" => { return ImlOp::call( + compiler, traverse_node_offset(node), ImlValue::from(RefValue::from(Token::Char( ccl.clone().negate(), @@ -1475,22 +1486,20 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { // Create an iter() on the iter expression let initial = ImlOp::from(vec![ traverse_node_rvalue(compiler, iter_expr, Rvalue::CallOrLoad), - ImlOp::call( - None, - compiler.get_builtin("iter").unwrap(), - Some((1, false)), - ), + { + let iter = compiler.get_builtin("iter").unwrap(); + ImlOp::call(compiler, None, iter, Some((1, false))) + }, ImlOp::from(Op::StoreFast(temp)), ]); // Create the condition, which calls iter_next() until void is returned let condition = ImlOp::from(vec![ ImlOp::from(Op::LoadFast(temp)), - ImlOp::call( - None, - compiler.get_builtin("iter_next").unwrap(), - Some((1, false)), - ), + { + let iter_next = compiler.get_builtin("iter_next").unwrap(); + ImlOp::call(compiler, None, iter_next, Some((1, false))) + }, traverse_node_lvalue( compiler, var, true, true, //hold for preceding loop break check ), diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index ead09c1e..48b008a4 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -433,15 +433,17 @@ impl Compiler { if name == "_" || name == "__" { self.parselet_push(); - self.parselet_mark_consuming(); + + // becomes `Value+` + let value_pos = ImlOp::call(self, None, value, None).into_positive(); + value = self.parselet_pop( None, Some("__".to_string()), Some(0), // Zero severity None, None, - // becomes `Value+` - ImlOp::call(None, value, None).into_positive(), + value_pos, ); // Remind "__" as new constant @@ -449,15 +451,17 @@ impl Compiler { // ...and then in-place "_" is defined as `_ : __?` self.parselet_push(); - self.parselet_mark_consuming(); + + // becomes `Value?` + let value_opt = ImlOp::call(self, None, value, None).into_optional(); + value = self.parselet_pop( None, Some(name.to_string()), Some(0), // Zero severity None, None, - // becomes `Value?` - ImlOp::call(None, value, None).into_optional(), + value_opt, ); // Insert "_" afterwards diff --git a/src/compiler/iml/imlop.rs b/src/compiler/iml/imlop.rs index 44977f5d..89732426 100644 --- a/src/compiler/iml/imlop.rs +++ b/src/compiler/iml/imlop.rs @@ -100,7 +100,7 @@ impl ImlOp { } /// Load value - pub fn load(offset: Option, value: ImlValue) -> ImlOp { + pub fn load(_compiler: &mut Compiler, offset: Option, value: ImlValue) -> ImlOp { ImlOp::Load { offset, target: value, @@ -109,24 +109,32 @@ impl ImlOp { /// Load unknown value by name pub fn load_by_name(compiler: &mut Compiler, offset: Option, name: String) -> ImlOp { - Self::load( - offset.clone(), - ImlValue::Name { - offset, - name, - generic: false, - } - .try_resolve(compiler), - ) + let value = ImlValue::Name { + offset, + name, + generic: false, + } + .try_resolve(compiler); + + Self::load(compiler, offset.clone(), value) } /// Call known value - pub fn call(offset: Option, value: ImlValue, args: Option<(usize, bool)>) -> ImlOp { + pub fn call( + compiler: &mut Compiler, + offset: Option, + value: ImlValue, + args: Option<(usize, bool)>, + ) -> ImlOp { // When args is unset, and the value is not callable without arguments, - // consider this call as a load. + // consider this call is a load. if args.is_none() && !value.is_callable(true) { // Currently not planned as final - return Self::load(offset, value); + return Self::load(compiler, offset, value); + } + + if value.is_consuming() { + compiler.parselet_mark_consuming(); } ImlOp::Call { @@ -605,17 +613,15 @@ impl ImlOp { ImlOp::Call { target, .. } => { if target.is_consuming() { consuming = true; - return false; // stop further examination } } ImlOp::Op(Op::Next) => { consuming = true; - return false; // stop further examination } _ => {} } - true + !consuming }); consuming diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index 250f63f2..72e8d185 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -191,12 +191,13 @@ impl ImlValue { None } } - _ => return true, + _ => return true, // anything else is considered as resolved }; if let Some(resolve) = resolve { *self = resolve; - return true; + // Recall resolve on the resolved object. + return self.resolve(compiler); } false @@ -231,6 +232,7 @@ impl ImlValue { true } } + Self::Instance { target, .. } => target.is_callable(without_arguments), _ => false, } } @@ -340,7 +342,8 @@ impl ImlValue { if start == ops.len() { let idx = match self { ImlValue::This(_) => this, - _ => program.register(self).unwrap(), + ImlValue::Instance { .. } => panic!("OK"), + resolved => program.register(resolved).unwrap(), }; match call { @@ -383,78 +386,43 @@ impl std::fmt::Display for ImlValue { Self::This(false) => write!(f, "self"), Self::Value(value) => write!(f, "{}", value.repr()), Self::Parselet(parselet) => write!(f, "{}", parselet), - Self::Name { name, .. } => write!(f, "{}", name), - _ => todo!(), - } - } -} - -/* -impl std::fmt::Display for ImlValue { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Void => write!(f, "void"), - Self::Unknown(name) | Self::Undefined(name) => write!(f, "{}", name), - Self::Value(value) => write!(f, "{}", value.repr()), - Self::Parselet { - parselet, - constants, - } => { - write!( - f, - "{}", - parselet - .borrow() - .name - .as_deref() - .unwrap_or("") - )?; - - if !constants.is_empty() { - write!(f, "<")?; - for (i, (name, value)) in constants.iter().enumerate() { - if matches!(value, ImlValue::Void) { - write!(f, "{}{}", if i > 0 { ", " } else { "" }, name)?; - } else { - write!(f, "{}{}:{}", if i > 0 { ", " } else { "" }, name, value)?; - } - } - write!(f, ">")?; - } - - Ok(()) + Self::Global(var) => write!(f, "global({})", var), + Self::Local(var) => write!(f, "local({})", var), + Self::Name { name, generic, .. } => { + write!(f, "{}{}", name, if *generic { "!" } else { "" }) } - Self::Local(addr) => write!(f, "local@{}", addr), - Self::Global(addr) => write!(f, "global@{}", addr), - Self::Symbol { - name, - gen_by_seq, - gen_by_name, + Self::Instance { + target, + args, + nargs, + .. } => { write!(f, "{}", target)?; + write!(f, "<")?; let mut first = true; - for item in gen_by_seq { - write!(f, "{}{}", if !first { ", " } else { "<" }, item)?; + for arg in args { + write!(f, "{}{}", if !first { ", " } else { "" }, arg.1)?; first = false; } - for (name, item) in gen_by_name.iter() { - write!(f, "{}{}:{}", if !first { ", " } else { "<" }, name, item)?; + for narg in nargs.keys() { + write!( + f, + "{}{}:{}", + if !first { ", " } else { "" }, + narg, + nargs[narg].1 + )?; first = false; } - if !first { - write!(f, ">")?; - } - - Ok(()) + write!(f, ">") } } } } -*/ impl std::hash::Hash for ImlValue { fn hash(&self, state: &mut H) { From d401e13a51e1145425ed3a7a55eb01f0254efbff Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Thu, 22 Jun 2023 15:08:33 +0200 Subject: [PATCH 42/94] Re-introduce unresolved ImlValue::Generic Some considering and code refactor, still intermediate as its not working. --- src/compiler/ast.rs | 3 +- src/compiler/compiler.rs | 4 -- src/compiler/iml/imlop.rs | 14 +------ src/compiler/iml/imlparselet.rs | 2 + src/compiler/iml/imlprogram.rs | 8 ++-- src/compiler/iml/imlvalue.rs | 67 +++++++++++++++++---------------- 6 files changed, 42 insertions(+), 56 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index 03544bf7..ddc42607 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -209,9 +209,8 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { compiler.set_constant( &ident, - ImlValue::Name { + ImlValue::Generic { offset, - generic: true, name: ident.to_string(), }, ); diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index 48b008a4..41358e8b 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -499,10 +499,6 @@ impl Compiler { .. } => { if let Some(value) = constants.get(name) { - if !top_parselet && matches!(value, ImlValue::Name { generic: true, .. }) { - continue; - } - return Some(value.clone()); } diff --git a/src/compiler/iml/imlop.rs b/src/compiler/iml/imlop.rs index 89732426..f07daf0d 100644 --- a/src/compiler/iml/imlop.rs +++ b/src/compiler/iml/imlop.rs @@ -109,12 +109,7 @@ impl ImlOp { /// Load unknown value by name pub fn load_by_name(compiler: &mut Compiler, offset: Option, name: String) -> ImlOp { - let value = ImlValue::Name { - offset, - name, - generic: false, - } - .try_resolve(compiler); + let value = ImlValue::Name { offset, name }.try_resolve(compiler); Self::load(compiler, offset.clone(), value) } @@ -158,12 +153,7 @@ impl ImlOp { ImlOp::Call { offset: offset.clone(), - target: ImlValue::Name { - offset, - name, - generic: false, - } - .try_resolve(compiler), + target: ImlValue::Name { offset, name }.try_resolve(compiler), args, } } diff --git a/src/compiler/iml/imlparselet.rs b/src/compiler/iml/imlparselet.rs index e7cf74a6..6c92f6a6 100644 --- a/src/compiler/iml/imlparselet.rs +++ b/src/compiler/iml/imlparselet.rs @@ -58,6 +58,8 @@ impl ImlParselet { } } + /// Derives a parselet from a given constants configuration. + //pub fn derive(&self, from: &IndexMap, offset: Option) -> Option { pub fn derive(&self, constants: IndexMap, offset: Option) -> Self { Self { model: self.model.clone(), diff --git a/src/compiler/iml/imlprogram.rs b/src/compiler/iml/imlprogram.rs index 4772dca3..a5d73b5a 100644 --- a/src/compiler/iml/imlprogram.rs +++ b/src/compiler/iml/imlprogram.rs @@ -176,11 +176,9 @@ impl ImlProgram { None } } - ImlValue::Name { - name, - generic: true, - .. - } => finalize_value(¤t.constants[name], current, visited, configs), + ImlValue::Generic { name, .. } => { + finalize_value(¤t.constants[name], current, visited, configs) + } _ => None, } } diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index 72e8d185..cf9009a7 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -36,7 +36,11 @@ pub(in crate::compiler) enum ImlValue { Name { // Unresolved name offset: Option, // Source offset - generic: bool, // Generic name, to be resolved during compilation + name: String, // Identifier + }, + Generic { + // Unresolved generic + offset: Option, // Source offset name: String, // Identifier }, Instance { @@ -64,12 +68,7 @@ impl ImlValue { pub fn resolve(&mut self, compiler: &mut Compiler) -> bool { let resolve = match self { Self::Shared(value) => return value.borrow_mut().resolve(compiler), - Self::Name { - name, - generic: false, - .. - } => compiler.get(&name), - Self::Name { generic: true, .. } => return false, + Self::Name { name, .. } => compiler.get(&name), Self::Instance { offset, target, @@ -196,8 +195,7 @@ impl ImlValue { if let Some(resolve) = resolve { *self = resolve; - // Recall resolve on the resolved object. - return self.resolve(compiler); + return true; } false @@ -244,17 +242,19 @@ impl ImlValue { Self::This(consuming) => *consuming, Self::Value(value) => value.is_consuming(), Self::Parselet(parselet) => parselet.borrow().model.borrow().consuming, - Self::Name { name, .. } => crate::utils::identifier_is_consumable(name), + Self::Name { name, .. } | Self::Generic { name, .. } => { + crate::utils::identifier_is_consumable(name) + } Self::Instance { target, .. } => target.is_consuming(), _ => false, } } - /// Compile a resolved intermediate value into VM code + /// Compile a resolved intermediate value into VM code or register it as a static pub fn compile( &self, program: &mut ImlProgram, - parselet: &ImlParselet, + current: &ImlParselet, this: usize, offset: &Option, call: Option>, @@ -271,7 +271,7 @@ impl ImlValue { ImlValue::Shared(value) => { return value .borrow() - .compile(program, parselet, this, offset, call, ops) + .compile(program, current, this, offset, call, ops) } ImlValue::Value(value) => match &*value.borrow() { Value::Void => ops.push(Op::PushVoid), @@ -287,18 +287,10 @@ impl ImlValue { }, ImlValue::Local(addr) => ops.push(Op::LoadFast(*addr)), ImlValue::Global(addr) => ops.push(Op::LoadGlobal(*addr)), - ImlValue::Name { - name, - generic: true, - .. - } => { - return parselet.constants[name].compile(program, parselet, this, offset, call, ops) + ImlValue::Generic { name, .. } => { + return current.constants[name].compile(program, current, this, offset, call, ops) } - ImlValue::Name { - name, - generic: false, - .. - } => { + ImlValue::Name { name, .. } => { program.errors.push(Error::new( offset.clone(), if call.is_some() { @@ -312,13 +304,17 @@ impl ImlValue { } ImlValue::This(_) => {} ImlValue::Parselet(parselet) => { - // When value is a parselet, check for accepted constant configuration let parselet = parselet.borrow(); + + // Check for accepted constant configuration; + // This has to be checked here, because a parselet is not always the result + // of an ImlValue::Instance, and therefore this can only be checked up here. let mut required = Vec::new(); - for (name, default) in &parselet.constants { - if matches!(default, ImlValue::Void) { - required.push(name.to_string()); + for (name, value) in &parselet.constants { + match value { + ImlValue::Void => required.push(name.to_string()), + _ => {} } } @@ -326,7 +322,7 @@ impl ImlValue { program.errors.push(Error::new( offset.clone(), format!( - "Call to '{}' requires generic argument {}", + "{} requires assignment of generic argument {}", self, required.join(", ") ), @@ -342,7 +338,13 @@ impl ImlValue { if start == ops.len() { let idx = match self { ImlValue::This(_) => this, - ImlValue::Instance { .. } => panic!("OK"), + /* + ImlValue::Parselet(parselet) => { + let parselet = parselet.borrow(); + + + } + */ resolved => program.register(resolved).unwrap(), }; @@ -388,9 +390,8 @@ impl std::fmt::Display for ImlValue { Self::Parselet(parselet) => write!(f, "{}", parselet), Self::Global(var) => write!(f, "global({})", var), Self::Local(var) => write!(f, "local({})", var), - Self::Name { name, generic, .. } => { - write!(f, "{}{}", name, if *generic { "!" } else { "" }) - } + Self::Name { name, .. } => write!(f, "{}", name), + Self::Generic { name, .. } => write!(f, "{}!", name), Self::Instance { target, args, From 35861fcef0a9bb99851bcd613a3b895a381836e9 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Thu, 27 Jul 2023 00:40:02 +0200 Subject: [PATCH 43/94] Broken attempt to derive during finalization This commit crashes randomly. Committed to have this attempt documented somewhere. Need another approach or entire redesign. --- src/compiler/iml/imlparselet.rs | 47 ++++++++++++++++++--- src/compiler/iml/imlprogram.rs | 72 +++++++++++++++++++++------------ src/compiler/iml/imlvalue.rs | 36 +++++++++-------- 3 files changed, 108 insertions(+), 47 deletions(-) diff --git a/src/compiler/iml/imlparselet.rs b/src/compiler/iml/imlparselet.rs index 6c92f6a6..7f5c4b59 100644 --- a/src/compiler/iml/imlparselet.rs +++ b/src/compiler/iml/imlparselet.rs @@ -58,16 +58,35 @@ impl ImlParselet { } } - /// Derives a parselet from a given constants configuration. - //pub fn derive(&self, from: &IndexMap, offset: Option) -> Option { - pub fn derive(&self, constants: IndexMap, offset: Option) -> Self { - Self { + /** Derives a parselet from a given namespace when required. + + The namespace defines the constant configuration of a surrounding parselet, + and extends the parselet's constant configuration, making it a derivation. + + Returns None if no derivation can be created, otherwise returns Some(Self). + */ + pub fn derive(&self, namespace: &IndexMap) -> Option { + let mut constants = self.constants.clone(); + let mut modified = false; + + for value in constants.values_mut() { + if let ImlValue::Generic { name, .. } = value { + *value = namespace.get(name).unwrap().clone(); + modified = true; + } + } + + if !modified { + return None; + } + + Some(ImlParselet { model: self.model.clone(), constants, - offset, + offset: self.offset.clone(), name: self.name.clone(), severity: self.severity, - } + }) } pub fn id(&self) -> usize { @@ -169,6 +188,22 @@ impl ImlSharedParselet { pub fn new(parselet: ImlParselet) -> Self { Self(Rc::new(RefCell::new(parselet))) } + + pub fn derive(&self, namespace: &IndexMap) -> Self { + if let Ok(parselet) = self.try_borrow() { + if let Some(derive) = parselet.derive(namespace) { + return Self::new(derive); + } + } + + self.clone() + } +} + +impl std::hash::Hash for ImlSharedParselet { + fn hash(&self, state: &mut H) { + self.borrow().hash(state); + } } impl std::fmt::Debug for ImlSharedParselet { diff --git a/src/compiler/iml/imlprogram.rs b/src/compiler/iml/imlprogram.rs index a5d73b5a..c711a3e9 100644 --- a/src/compiler/iml/imlprogram.rs +++ b/src/compiler/iml/imlprogram.rs @@ -34,12 +34,10 @@ impl ImlProgram { pub fn register(&mut self, value: &ImlValue) -> Result { match value { ImlValue::Shared(value) => self.register(&*value.borrow()), - ImlValue::Parselet { .. } | ImlValue::Value(_) => { - match self.statics.get_index_of(value) { - None => Ok(self.statics.insert_full(value.clone(), None).0), - Some(idx) => Ok(idx), - } - } + ImlValue::Parselet(_) | ImlValue::Value(_) => match self.statics.get_index_of(value) { + None => Ok(self.statics.insert_full(value.clone(), None).0), + Some(idx) => Ok(idx), + }, _ => Err(()), // Cannot register this kind of value } } @@ -51,7 +49,7 @@ impl ImlProgram { and nullable parselet detection occurs. */ pub fn compile(mut self) -> Result> { - let mut finalize = Vec::new(); // list of consuming parselets required to be finalized + let mut finalize = HashSet::new(); // list of consuming parselets required to be finalized // Loop until end of statics is reached let mut idx = 0; @@ -75,7 +73,7 @@ impl ImlProgram { // Memoize parselets required to be finalized (needs a general rework later...) if model.consuming { //fixme... - finalize.push(outer.clone()); + finalize.insert(outer.clone()); } // Compile VM parselet from intermediate parselet @@ -132,7 +130,7 @@ impl ImlProgram { It can only be run on a previously compiled program without any unresolved usages. */ - fn finalize(&mut self, parselets: Vec) -> HashMap { + fn finalize(&mut self, parselets: HashSet) -> HashMap { #[derive(Debug, Clone, Copy, PartialEq, PartialOrd)] struct Consumable { leftrec: bool, @@ -142,27 +140,44 @@ impl ImlProgram { // Finalize ImlValue fn finalize_value( value: &ImlValue, + all: &HashSet, current: &ImlParselet, visited: &mut HashSet, configs: &mut HashMap, ) -> Option { match value { ImlValue::Shared(value) => { - finalize_value(&*value.borrow(), current, visited, configs) + finalize_value(&*value.borrow(), all, current, visited, configs) } ImlValue::This(_) => Some(Consumable { leftrec: true, nullable: false, }), ImlValue::Parselet(parselet) => { + // Try to borrow the parselet directly match parselet.try_borrow() { // In case the parselet cannot be borrowed, it is left-recursive! Err(_) => Some(Consumable { leftrec: true, nullable: false, }), - // Otherwise dive into this parselet... - Ok(parselet) => finalize_parselet(&parselet, visited, configs), + // Otherwise, further examine derive! + Ok(_) => { + // Try to derive the parselet with current constants + let derived = parselet.derive(¤t.constants); + + // The derived parselet must be in all! + let parselet = all.get(&derived).unwrap(); + + match parselet.try_borrow() { + // In case the derived parselet cannot be borrowed, it is left-recursive! + Err(_) => Some(Consumable { + leftrec: true, + nullable: false, + }), + Ok(parselet) => finalize_parselet(&parselet, all, visited, configs), + } + } } } ImlValue::Value(callee) => { @@ -177,7 +192,8 @@ impl ImlProgram { } } ImlValue::Generic { name, .. } => { - finalize_value(¤t.constants[name], current, visited, configs) + // fixme: Is this still relevant??? + finalize_value(¤t.constants[name], all, current, visited, configs) } _ => None, } @@ -186,19 +202,22 @@ impl ImlProgram { // Finalize ImlOp fn finalize_op( op: &ImlOp, + all: &HashSet, current: &ImlParselet, visited: &mut HashSet, configs: &mut HashMap, ) -> Option { match op { - ImlOp::Call { target, .. } => finalize_value(target, current, visited, configs), + ImlOp::Call { target, .. } => { + finalize_value(target, all, current, visited, configs) + } ImlOp::Alt { alts } => { let mut leftrec = false; let mut nullable = false; let mut consumes = false; for alt in alts { - if let Some(consumable) = finalize_op(alt, current, visited, configs) { + if let Some(consumable) = finalize_op(alt, all, current, visited, configs) { leftrec |= consumable.leftrec; nullable |= consumable.nullable; consumes = true; @@ -221,7 +240,8 @@ impl ImlProgram { break; } - if let Some(consumable) = finalize_op(item, current, visited, configs) { + if let Some(consumable) = finalize_op(item, all, current, visited, configs) + { leftrec |= consumable.leftrec; nullable = consumable.nullable; consumes = true; @@ -235,9 +255,9 @@ impl ImlProgram { } } ImlOp::If { then, else_, .. } => { - let then = finalize_op(then, current, visited, configs); + let then = finalize_op(then, all, current, visited, configs); - if let Some(else_) = finalize_op(else_, current, visited, configs) { + if let Some(else_) = finalize_op(else_, all, current, visited, configs) { if let Some(then) = then { Some(Consumable { leftrec: then.leftrec || else_.leftrec, @@ -259,7 +279,7 @@ impl ImlProgram { let mut ret: Option = None; for part in [initial, condition, body] { - let part = finalize_op(part, current, visited, configs); + let part = finalize_op(part, all, current, visited, configs); if let Some(part) = part { ret = if let Some(ret) = ret { @@ -277,12 +297,12 @@ impl ImlProgram { } // DEPRECATED BELOW!!! - ImlOp::Expect { body, .. } => finalize_op(body, current, visited, configs), + ImlOp::Expect { body, .. } => finalize_op(body, all, current, visited, configs), ImlOp::Not { body } | ImlOp::Peek { body } => { - finalize_op(body, current, visited, configs) + finalize_op(body, all, current, visited, configs) } ImlOp::Repeat { body, min, .. } => { - if let Some(consumable) = finalize_op(body, current, visited, configs) { + if let Some(consumable) = finalize_op(body, all, current, visited, configs) { if *min == 0 { Some(Consumable { leftrec: consumable.leftrec, @@ -304,6 +324,7 @@ impl ImlProgram { // Finalize ImlParselet fn finalize_parselet( parselet: &ImlParselet, + all: &HashSet, visited: &mut HashSet, configs: &mut HashMap, ) -> Option { @@ -335,7 +356,7 @@ impl ImlProgram { } for part in [&model.begin, &model.body, &model.end] { - if let Some(result) = finalize_op(part, &parselet, visited, configs) { + if let Some(result) = finalize_op(part, all, &parselet, visited, configs) { if configs[&id] < result { configs.insert(id, result); } @@ -360,8 +381,9 @@ impl ImlProgram { for parselet in &parselets { let parselet = parselet.borrow_mut(); // parselet is locked for left-recursion detection - changes = finalize_parselet(&*parselet, &mut HashSet::new(), &mut configs) - > configs.get(&parselet.id()).cloned(); + changes = + finalize_parselet(&*parselet, &parselets, &mut HashSet::new(), &mut configs) + > configs.get(&parselet.id()).cloned(); } } diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index cf9009a7..bc303308 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -96,7 +96,7 @@ impl ImlValue { match &**target { ImlValue::Parselet(parselet) => { let parselet = parselet.borrow(); - let mut new_constants = IndexMap::new(); + let mut constants = IndexMap::new(); for (name, default) in parselet.constants.iter() { // Take arguments by sequence first @@ -138,7 +138,7 @@ impl ImlValue { )); } - new_constants.insert(name.clone(), arg.1); + constants.insert(name.clone(), arg.1); } // Report any errors for unconsumed generic arguments. @@ -148,14 +148,14 @@ impl ImlValue { format!( "{} got too many generic arguments ({} in total, expected {})", target, - new_constants.len() + args.len(), - new_constants.len() + constants.len() + args.len(), + constants.len() ), )); } for (name, (offset, _)) in nargs { - if new_constants.get(name).is_some() { + if constants.get(name).is_some() { compiler.errors.push(Error::new( *offset, format!( @@ -174,9 +174,17 @@ impl ImlValue { } } - Some(ImlValue::from( - parselet.derive(new_constants, offset.clone()), - )) + // Make a parselet derivation from the instance definition; + // This can be the final parselet definition, but constants + // might contain Generic references as well, which are being + // resolved during compilation. + Some(ImlValue::from(ImlParselet { + model: parselet.model.clone(), + constants, + offset: parselet.offset.clone(), + name: parselet.name.clone(), + severity: parselet.severity, + })) } target => { compiler.errors.push(Error::new( @@ -338,13 +346,9 @@ impl ImlValue { if start == ops.len() { let idx = match self { ImlValue::This(_) => this, - /* - ImlValue::Parselet(parselet) => { - let parselet = parselet.borrow(); - - - } - */ + ImlValue::Parselet(parselet) => program + .register(&ImlValue::Parselet(parselet.derive(¤t.constants))) + .unwrap(), resolved => program.register(resolved).unwrap(), }; @@ -434,7 +438,7 @@ impl std::hash::Hash for ImlValue { } Self::Parselet(parselet) => { state.write_u8('p' as u8); - parselet.borrow().hash(state); + parselet.hash(state); } other => unreachable!("{:?} is unhashable", other), } From a5d6f507217f314a831f05ae6bb796d5066d5036 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Thu, 27 Jul 2023 00:43:30 +0200 Subject: [PATCH 44/94] Adding generics test examples --- g.tok | 18 ++++++++++++++++++ g2.tok | 10 ++++++++++ g3.tok | 11 +++++++++++ 3 files changed, 39 insertions(+) create mode 100644 g.tok create mode 100644 g2.tok create mode 100644 g3.tok diff --git a/g.tok b/g.tok new file mode 100644 index 00000000..0655c92a --- /dev/null +++ b/g.tok @@ -0,0 +1,18 @@ +T: @ { + P 'x' P print(t $0) +} + +#T +print("Start") +T<'a'> +T<'b'> +T<'a'> +'\n' +Char print("Nee" $1) +#T<'a', "yo"> +#T +#T +#T +#T +#T<1,2,3> +#T<'x'>("y") diff --git a/g2.tok b/g2.tok new file mode 100644 index 00000000..e05e42e3 --- /dev/null +++ b/g2.tok @@ -0,0 +1,10 @@ +# Simple generic parselet T, serving as a template. +T: @

{ + P 'x' P print($0) +} + +# The final parselets are instanciated by its usage. +Parse_axa: T<'a'> +Parse_bxb: T<'b'> + +Parse_axa Parse_axa Parse_bxb print("matched!") diff --git a/g3.tok b/g3.tok new file mode 100644 index 00000000..52b57cc2 --- /dev/null +++ b/g3.tok @@ -0,0 +1,11 @@ +# cargo run -- -e g3.tok -- xxbbxbbxxbb + +X : @ { + A A +} + +Y : @ { + X +} + +Y<'x'> Y<'b'> From acac3e894ace7d9fa271afeeb076c5c3c2277bf5 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Fri, 28 Jul 2023 13:59:03 +0200 Subject: [PATCH 45/94] ImlParselet::is_resolved() Redesign of entire parselet handling in compiler required, to avoid mutable borrow errors that occasionally occur during hashing. Didn't recognize this problem before. --- src/compiler/iml/imlparselet.rs | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/compiler/iml/imlparselet.rs b/src/compiler/iml/imlparselet.rs index 7f5c4b59..5fb4f2df 100644 --- a/src/compiler/iml/imlparselet.rs +++ b/src/compiler/iml/imlparselet.rs @@ -58,6 +58,14 @@ impl ImlParselet { } } + /// Checks if a ImlParselet is completely resolved, or if it has open generics + pub fn is_resolved(&self) -> bool { + !self + .constants + .values() + .any(|value| matches!(value, ImlValue::Generic { .. })) + } + /** Derives a parselet from a given namespace when required. The namespace defines the constant configuration of a surrounding parselet, @@ -66,20 +74,18 @@ impl ImlParselet { Returns None if no derivation can be created, otherwise returns Some(Self). */ pub fn derive(&self, namespace: &IndexMap) -> Option { + if self.is_resolved() { + return None; + } + let mut constants = self.constants.clone(); - let mut modified = false; for value in constants.values_mut() { - if let ImlValue::Generic { name, .. } = value { + while let ImlValue::Generic { name, .. } = value { *value = namespace.get(name).unwrap().clone(); - modified = true; } } - if !modified { - return None; - } - Some(ImlParselet { model: self.model.clone(), constants, From b25c621d6d78709361d46978aae6429f07c45bb2 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Sat, 5 Aug 2023 08:34:43 +0200 Subject: [PATCH 46/94] Refactor finalization, part I --- src/compiler/iml/imlprogram.rs | 113 +++++++++++++-------------------- 1 file changed, 45 insertions(+), 68 deletions(-) diff --git a/src/compiler/iml/imlprogram.rs b/src/compiler/iml/imlprogram.rs index c711a3e9..28639ff2 100644 --- a/src/compiler/iml/imlprogram.rs +++ b/src/compiler/iml/imlprogram.rs @@ -1,11 +1,11 @@ -//! ImlProgram glues ImlParselets, ImlOps and ImlValues together to produce a VM program. +//! ImlProgram glues ImlParselet, ImlOp and ImlValue together to produce a VM program. use super::*; use crate::value::Parselet; use crate::vm::Program; use crate::Error; use crate::{Object, RefValue}; -use indexmap::IndexMap; +use indexmap::{indexmap, IndexMap, IndexSet}; use std::collections::{HashMap, HashSet}; #[derive(Debug)] @@ -16,11 +16,8 @@ pub(in crate::compiler) struct ImlProgram { impl ImlProgram { pub fn new(main: ImlValue) -> Self { - let mut statics = IndexMap::new(); - statics.insert(main, None); - ImlProgram { - statics, + statics: indexmap!(main => None), errors: Vec::new(), } } @@ -97,7 +94,7 @@ impl ImlProgram { if let Some(mut parselet) = parselet { if let ImlValue::Parselet(imlparselet) = iml { parselet.consuming = leftrec - .get(&imlparselet.borrow().id()) + .get(&imlparselet) .map_or(None, |leftrec| Some(*leftrec)); //println!("{:?} => {:?}", imlparselet.borrow().name, parselet.consuming); @@ -130,7 +127,10 @@ impl ImlProgram { It can only be run on a previously compiled program without any unresolved usages. */ - fn finalize(&mut self, parselets: HashSet) -> HashMap { + fn finalize( + &mut self, + parselets: HashSet, + ) -> HashMap { #[derive(Debug, Clone, Copy, PartialEq, PartialOrd)] struct Consumable { leftrec: bool, @@ -142,8 +142,8 @@ impl ImlProgram { value: &ImlValue, all: &HashSet, current: &ImlParselet, - visited: &mut HashSet, - configs: &mut HashMap, + visited: &mut IndexSet, + configs: &mut HashMap, ) -> Option { match value { ImlValue::Shared(value) => { @@ -154,31 +154,13 @@ impl ImlProgram { nullable: false, }), ImlValue::Parselet(parselet) => { - // Try to borrow the parselet directly - match parselet.try_borrow() { - // In case the parselet cannot be borrowed, it is left-recursive! - Err(_) => Some(Consumable { - leftrec: true, - nullable: false, - }), - // Otherwise, further examine derive! - Ok(_) => { - // Try to derive the parselet with current constants - let derived = parselet.derive(¤t.constants); - - // The derived parselet must be in all! - let parselet = all.get(&derived).unwrap(); - - match parselet.try_borrow() { - // In case the derived parselet cannot be borrowed, it is left-recursive! - Err(_) => Some(Consumable { - leftrec: true, - nullable: false, - }), - Ok(parselet) => finalize_parselet(&parselet, all, visited, configs), - } - } - } + // Try to derive the parselet with current constants + let derived = parselet.derive(¤t.constants); + + // The derived parselet must be in all! + let parselet = all.get(&derived).unwrap(); + + finalize_parselet(&parselet, all, visited, configs) } ImlValue::Value(callee) => { if callee.is_consuming() { @@ -204,8 +186,8 @@ impl ImlProgram { op: &ImlOp, all: &HashSet, current: &ImlParselet, - visited: &mut HashSet, - configs: &mut HashMap, + visited: &mut IndexSet, + configs: &mut HashMap, ) -> Option { match op { ImlOp::Call { target, .. } => { @@ -323,50 +305,45 @@ impl ImlProgram { // Finalize ImlParselet fn finalize_parselet( - parselet: &ImlParselet, + current: &ImlSharedParselet, all: &HashSet, - visited: &mut HashSet, - configs: &mut HashMap, + visited: &mut IndexSet, + configs: &mut HashMap, ) -> Option { // ... only if it's generally flagged to be consuming. + let parselet = current.borrow(); let model = parselet.model.borrow(); if !model.consuming { return None; } - let id = parselet.id(); - - if visited.contains(&id) { + if let Some(idx) = visited.get_index_of(current) { Some(Consumable { - leftrec: false, - nullable: configs[&id].nullable, + leftrec: idx == 0, + nullable: configs[current].nullable, }) } else { - visited.insert(id); - - if !configs.contains_key(&id) { - configs.insert( - id, - Consumable { - leftrec: false, - nullable: false, - }, - ); - } + visited.insert(current.clone()); + configs + .entry(current.clone()) + .or_insert_with(|| Consumable { + leftrec: false, + nullable: false, + }); for part in [&model.begin, &model.body, &model.end] { if let Some(result) = finalize_op(part, all, &parselet, visited, configs) { - if configs[&id] < result { - configs.insert(id, result); + if configs[current] < result { + configs.insert(current.clone(), result); } } } - visited.remove(&id); + visited.remove(current); Some(Consumable { leftrec: false, - nullable: configs[&id].nullable, + nullable: configs[current].nullable, }) } } @@ -380,20 +357,20 @@ impl ImlProgram { changes = false; for parselet in &parselets { - let parselet = parselet.borrow_mut(); // parselet is locked for left-recursion detection - changes = - finalize_parselet(&*parselet, &parselets, &mut HashSet::new(), &mut configs) - > configs.get(&parselet.id()).cloned(); + let result = + finalize_parselet(parselet, &parselets, &mut IndexSet::new(), &mut configs); + changes = result > configs.get(parselet).cloned(); } } /* - for parselet in parselets { - let parselet = parselet.borrow(); + println!("--- final config ---"); + + for parselet in &parselets { println!( "{} consuming={:?}", - parselet.name.as_deref().unwrap_or("(unnamed)"), - configs[&parselet.id()] + parselet.borrow().name.as_deref().unwrap_or("(unnamed)"), + configs[&parselet] ); } */ From 733b63c048e848d89efe5761888728699bffd030 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Sat, 5 Aug 2023 08:44:49 +0200 Subject: [PATCH 47/94] Refactor finalization, part II --- src/compiler/iml/imlprogram.rs | 63 +++++++++++++++++----------------- 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/src/compiler/iml/imlprogram.rs b/src/compiler/iml/imlprogram.rs index 28639ff2..957463a5 100644 --- a/src/compiler/iml/imlprogram.rs +++ b/src/compiler/iml/imlprogram.rs @@ -140,14 +140,13 @@ impl ImlProgram { // Finalize ImlValue fn finalize_value( value: &ImlValue, - all: &HashSet, current: &ImlParselet, visited: &mut IndexSet, configs: &mut HashMap, ) -> Option { match value { ImlValue::Shared(value) => { - finalize_value(&*value.borrow(), all, current, visited, configs) + finalize_value(&*value.borrow(), current, visited, configs) } ImlValue::This(_) => Some(Consumable { leftrec: true, @@ -157,10 +156,10 @@ impl ImlProgram { // Try to derive the parselet with current constants let derived = parselet.derive(¤t.constants); - // The derived parselet must be in all! - let parselet = all.get(&derived).unwrap(); + // The derived parselet's original must be in the configs! + let parselet = configs.get_key_value(&derived).unwrap().0.clone(); - finalize_parselet(&parselet, all, visited, configs) + finalize_parselet(&parselet, visited, configs) } ImlValue::Value(callee) => { if callee.is_consuming() { @@ -175,7 +174,7 @@ impl ImlProgram { } ImlValue::Generic { name, .. } => { // fixme: Is this still relevant??? - finalize_value(¤t.constants[name], all, current, visited, configs) + finalize_value(¤t.constants[name], current, visited, configs) } _ => None, } @@ -184,22 +183,19 @@ impl ImlProgram { // Finalize ImlOp fn finalize_op( op: &ImlOp, - all: &HashSet, current: &ImlParselet, visited: &mut IndexSet, configs: &mut HashMap, ) -> Option { match op { - ImlOp::Call { target, .. } => { - finalize_value(target, all, current, visited, configs) - } + ImlOp::Call { target, .. } => finalize_value(target, current, visited, configs), ImlOp::Alt { alts } => { let mut leftrec = false; let mut nullable = false; let mut consumes = false; for alt in alts { - if let Some(consumable) = finalize_op(alt, all, current, visited, configs) { + if let Some(consumable) = finalize_op(alt, current, visited, configs) { leftrec |= consumable.leftrec; nullable |= consumable.nullable; consumes = true; @@ -222,8 +218,7 @@ impl ImlProgram { break; } - if let Some(consumable) = finalize_op(item, all, current, visited, configs) - { + if let Some(consumable) = finalize_op(item, current, visited, configs) { leftrec |= consumable.leftrec; nullable = consumable.nullable; consumes = true; @@ -237,9 +232,9 @@ impl ImlProgram { } } ImlOp::If { then, else_, .. } => { - let then = finalize_op(then, all, current, visited, configs); + let then = finalize_op(then, current, visited, configs); - if let Some(else_) = finalize_op(else_, all, current, visited, configs) { + if let Some(else_) = finalize_op(else_, current, visited, configs) { if let Some(then) = then { Some(Consumable { leftrec: then.leftrec || else_.leftrec, @@ -261,7 +256,7 @@ impl ImlProgram { let mut ret: Option = None; for part in [initial, condition, body] { - let part = finalize_op(part, all, current, visited, configs); + let part = finalize_op(part, current, visited, configs); if let Some(part) = part { ret = if let Some(ret) = ret { @@ -279,12 +274,12 @@ impl ImlProgram { } // DEPRECATED BELOW!!! - ImlOp::Expect { body, .. } => finalize_op(body, all, current, visited, configs), + ImlOp::Expect { body, .. } => finalize_op(body, current, visited, configs), ImlOp::Not { body } | ImlOp::Peek { body } => { - finalize_op(body, all, current, visited, configs) + finalize_op(body, current, visited, configs) } ImlOp::Repeat { body, min, .. } => { - if let Some(consumable) = finalize_op(body, all, current, visited, configs) { + if let Some(consumable) = finalize_op(body, current, visited, configs) { if *min == 0 { Some(Consumable { leftrec: consumable.leftrec, @@ -306,7 +301,6 @@ impl ImlProgram { // Finalize ImlParselet fn finalize_parselet( current: &ImlSharedParselet, - all: &HashSet, visited: &mut IndexSet, configs: &mut HashMap, ) -> Option { @@ -319,21 +313,17 @@ impl ImlProgram { } if let Some(idx) = visited.get_index_of(current) { + // When in visited, this is a recursion Some(Consumable { - leftrec: idx == 0, + leftrec: idx == 0, // If the idx is 0, current is the seeked parselet, and is left-recursive nullable: configs[current].nullable, }) } else { + // If not already visited, add and recurse. visited.insert(current.clone()); - configs - .entry(current.clone()) - .or_insert_with(|| Consumable { - leftrec: false, - nullable: false, - }); for part in [&model.begin, &model.body, &model.end] { - if let Some(result) = finalize_op(part, all, &parselet, visited, configs) { + if let Some(result) = finalize_op(part, &parselet, visited, configs) { if configs[current] < result { configs.insert(current.clone(), result); } @@ -341,6 +331,7 @@ impl ImlProgram { } visited.remove(current); + Some(Consumable { leftrec: false, nullable: configs[current].nullable, @@ -351,14 +342,24 @@ impl ImlProgram { // Now, start the closure algorithm with left-recursive and nullable configurations for all parselets // put into the finalize list. let mut changes = true; - let mut configs = HashMap::new(); + let mut configs = parselets + .iter() + .map(|k| { + ( + k.clone(), + Consumable { + leftrec: false, + nullable: false, + }, + ) + }) + .collect(); while changes { changes = false; for parselet in &parselets { - let result = - finalize_parselet(parselet, &parselets, &mut IndexSet::new(), &mut configs); + let result = finalize_parselet(parselet, &mut IndexSet::new(), &mut configs); changes = result > configs.get(parselet).cloned(); } } From 3412a7e462cab4975e8dc450eac67ecda63f319a Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Sat, 5 Aug 2023 10:00:24 +0200 Subject: [PATCH 48/94] Improve Peek

--- src/prelude.tok | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/prelude.tok b/src/prelude.tok index f2573f25..216cb9e2 100644 --- a/src/prelude.tok +++ b/src/prelude.tok @@ -24,7 +24,7 @@ Not : @

{ } Peek : @

{ - P reset + p = {P} reset accept p } Expect : @

msg=void { From e23349157d6a18dee5f1f96caaf5733b097f4df1 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Sat, 5 Aug 2023 20:53:43 +0200 Subject: [PATCH 49/94] `prelude.tok` with `Pos

, `Kle

` and `Opt

` --- src/prelude.tok | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/prelude.tok b/src/prelude.tok index 216cb9e2..75691711 100644 --- a/src/prelude.tok +++ b/src/prelude.tok @@ -1,17 +1,23 @@ # Tokay default prelude Repeat : @

min=1, max=void { + begin { + res = list() + } + P { res += $1 if !max || res.len < max repeat } - end { - if res.len < min reject - res - } + if !res || res.len < min reject + res } +Pos : @

{ Repeat

} +Kle : @

{ Repeat

(min=0) } +Opt : @

{ P | Void } + List : @ { Self Separator P $1 + $3 if empty (Self Separator) # allows for trailing Separator @@ -20,7 +26,7 @@ List : @ { Not : @

{ P reject - accept + Void } Peek : @

{ From adcaec660dcc10352dd5074fcec4b88e2fdc108f Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Sat, 12 Aug 2023 00:34:54 +0200 Subject: [PATCH 50/94] Bugfix for unresolved instances --- minicalc.tok | 10 ++++++++++ src/compiler/ast.rs | 5 ++--- src/compiler/iml/imlprogram.rs | 1 + src/compiler/iml/imlvalue.rs | 17 +++++++++++++---- src/vm/context.rs | 1 + 5 files changed, 27 insertions(+), 7 deletions(-) create mode 100644 minicalc.tok diff --git a/minicalc.tok b/minicalc.tok new file mode 100644 index 00000000..55c1b8f3 --- /dev/null +++ b/minicalc.tok @@ -0,0 +1,10 @@ + +Main : @{ + Expect print("= " + $1) +} + +Expr : @{ + Int +} + +Main diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index c092b082..e14cea2e 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -373,15 +373,14 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { } } - let mut ret = ImlValue::Instance { + let ret = ImlValue::Instance { target: Box::new(target), args, nargs, offset: traverse_node_offset(node), }; - ret.resolve(compiler); - ret + ret.try_resolve(compiler) } _ => unimplemented!("unhandled value node {}", emit), diff --git a/src/compiler/iml/imlprogram.rs b/src/compiler/iml/imlprogram.rs index 957463a5..9ff06a06 100644 --- a/src/compiler/iml/imlprogram.rs +++ b/src/compiler/iml/imlprogram.rs @@ -74,6 +74,7 @@ impl ImlProgram { } // Compile VM parselet from intermediate parselet + // println!("...compiling {} {:?}", idx, parselet.name); *self.statics.get_index_mut(idx).unwrap().1 = Some(parselet.compile(&mut self, idx)); idx += 1; diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index bc303308..432bdff3 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -191,11 +191,11 @@ impl ImlValue { *offset, format!("Cannot create instance from '{}'", target), )); - None + return false; } } } else { - None + return false; } } _ => return true, // anything else is considered as resolved @@ -339,7 +339,7 @@ impl ImlValue { return; } } - _ => unreachable!("{:?}", self), + _ => unreachable!("{}", self), } // Check if something has been pushed before. @@ -391,7 +391,15 @@ impl std::fmt::Display for ImlValue { Self::This(true) => write!(f, "Self"), Self::This(false) => write!(f, "self"), Self::Value(value) => write!(f, "{}", value.repr()), - Self::Parselet(parselet) => write!(f, "{}", parselet), + Self::Parselet(parselet) => write!( + f, + "{}", + parselet + .borrow() + .name + .as_deref() + .unwrap_or("") + ), Self::Global(var) => write!(f, "global({})", var), Self::Local(var) => write!(f, "local({})", var), Self::Name { name, .. } => write!(f, "{}", name), @@ -432,6 +440,7 @@ impl std::fmt::Display for ImlValue { impl std::hash::Hash for ImlValue { fn hash(&self, state: &mut H) { match self { + Self::Shared(value) => value.borrow().hash(state), Self::Value(value) => { state.write_u8('v' as u8); value.hash(state) diff --git a/src/vm/context.rs b/src/vm/context.rs index d6177399..5c9e4443 100644 --- a/src/vm/context.rs +++ b/src/vm/context.rs @@ -472,6 +472,7 @@ impl<'program, 'reader, 'thread, 'parselet> Context<'program, 'reader, 'thread, pub fn run(&mut self, main: bool) -> Result { // Debugging if self.debug < 3 { + //println!("{:?}", self.parselet.name); if let Ok(inspect) = std::env::var("TOKAY_INSPECT") { for name in inspect.split(" ") { if name == self.parselet.name { From 27dce472ba4117679ba6de5a5fdbfbac865b341a Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Sun, 13 Aug 2023 00:49:22 +0200 Subject: [PATCH 51/94] Improving derivation stuff and handle Self-values - Moves all derivation and compilation stuff into ImlSharedParselet - current parselet tuple in compilation - use ImlSharedParselet in finalization --- src/compiler/iml/imlop.rs | 34 ++++---- src/compiler/iml/imlparselet.rs | 145 ++++++++++++++++---------------- src/compiler/iml/imlprogram.rs | 20 ++--- src/compiler/iml/imlvalue.rs | 33 +++++--- 4 files changed, 120 insertions(+), 112 deletions(-) diff --git a/src/compiler/iml/imlop.rs b/src/compiler/iml/imlop.rs index f07daf0d..e0397f21 100644 --- a/src/compiler/iml/imlop.rs +++ b/src/compiler/iml/imlop.rs @@ -274,11 +274,10 @@ impl ImlOp { pub fn compile_to_vec( &self, program: &mut ImlProgram, - parselet: &ImlParselet, - this: usize, + current: (&ImlSharedParselet, usize), ) -> Vec { let mut ops = Vec::new(); - self.compile(program, parselet, this, &mut ops); + self.compile(program, current, &mut ops); ops } @@ -286,8 +285,7 @@ impl ImlOp { pub fn compile( &self, program: &mut ImlProgram, - parselet: &ImlParselet, - this: usize, + current: (&ImlSharedParselet, usize), ops: &mut Vec, ) -> usize { let start = ops.len(); @@ -296,14 +294,14 @@ impl ImlOp { ImlOp::Nop => {} ImlOp::Op(op) => ops.push(op.clone()), ImlOp::Load { offset, target } => { - target.compile(program, parselet, this, &offset, None, ops); + target.compile(program, current, &offset, None, ops); } ImlOp::Call { offset, target, args, } => { - target.compile(program, parselet, this, &offset, Some(*args), ops); + target.compile(program, current, &offset, Some(*args), ops); } ImlOp::Alt { alts } => { let mut ret = Vec::new(); @@ -313,7 +311,7 @@ impl ImlOp { while let Some(item) = iter.next() { let mut alt = Vec::new(); - item.compile(program, parselet, this, &mut alt); + item.compile(program, current, &mut alt); // When branch has more than one item, Frame it. if iter.len() > 0 { @@ -354,7 +352,7 @@ impl ImlOp { } ImlOp::Seq { seq, collection } => { for item in seq.iter() { - item.compile(program, parselet, this, ops); + item.compile(program, current, ops); } // Check if the sequence exists of more than one operational instruction @@ -389,13 +387,13 @@ impl ImlOp { } // Then-part - let mut jump = then_part.compile(program, parselet, this, ops) + 1; + let mut jump = then_part.compile(program, current, ops) + 1; if !*peek { let mut else_ops = Vec::new(); // Else-part - if else_part.compile(program, parselet, this, &mut else_ops) > 0 { + if else_part.compile(program, current, &mut else_ops) > 0 { ops.push(Op::Forward(else_ops.len() + 1)); jump += 1; ops.extend(else_ops); @@ -420,9 +418,9 @@ impl ImlOp { let consuming: Option = None; // fixme: Currently not sure if this is an issue. let mut repeat = Vec::new(); - initial.compile(program, parselet, this, ops); + initial.compile(program, current, ops); - if condition.compile(program, parselet, this, &mut repeat) > 0 { + if condition.compile(program, current, &mut repeat) > 0 { if *iterator { repeat.push(Op::ForwardIfNotVoid(2)); } else { @@ -432,7 +430,7 @@ impl ImlOp { repeat.push(Op::Break); } - body.compile(program, parselet, this, &mut repeat); + body.compile(program, current, &mut repeat); let len = repeat.len() + if consuming.is_some() { 3 } else { 2 }; ops.push(Op::Loop(len)); @@ -452,7 +450,7 @@ impl ImlOp { // DEPRECATED BELOW!!! ImlOp::Expect { body, msg } => { let mut expect = Vec::new(); - body.compile(program, parselet, this, &mut expect); + body.compile(program, current, &mut expect); ops.push(Op::Frame(expect.len() + 2)); @@ -469,7 +467,7 @@ impl ImlOp { } ImlOp::Not { body } => { let mut body_ops = Vec::new(); - let body_len = body.compile(program, parselet, this, &mut body_ops); + let body_len = body.compile(program, current, &mut body_ops); ops.push(Op::Frame(body_len + 3)); ops.extend(body_ops); ops.push(Op::Close); @@ -478,13 +476,13 @@ impl ImlOp { } ImlOp::Peek { body } => { ops.push(Op::Frame(0)); - body.compile(program, parselet, this, ops); + body.compile(program, current, ops); ops.push(Op::Reset); ops.push(Op::Close); } ImlOp::Repeat { body, min, max } => { let mut body_ops = Vec::new(); - let body_len = body.compile(program, parselet, this, &mut body_ops); + let body_len = body.compile(program, current, &mut body_ops); match (min, max) { (0, 0) => { diff --git a/src/compiler/iml/imlparselet.rs b/src/compiler/iml/imlparselet.rs index 5fb4f2df..e729fae4 100644 --- a/src/compiler/iml/imlparselet.rs +++ b/src/compiler/iml/imlparselet.rs @@ -58,75 +58,9 @@ impl ImlParselet { } } - /// Checks if a ImlParselet is completely resolved, or if it has open generics - pub fn is_resolved(&self) -> bool { - !self - .constants - .values() - .any(|value| matches!(value, ImlValue::Generic { .. })) - } - - /** Derives a parselet from a given namespace when required. - - The namespace defines the constant configuration of a surrounding parselet, - and extends the parselet's constant configuration, making it a derivation. - - Returns None if no derivation can be created, otherwise returns Some(Self). - */ - pub fn derive(&self, namespace: &IndexMap) -> Option { - if self.is_resolved() { - return None; - } - - let mut constants = self.constants.clone(); - - for value in constants.values_mut() { - while let ImlValue::Generic { name, .. } = value { - *value = namespace.get(name).unwrap().clone(); - } - } - - Some(ImlParselet { - model: self.model.clone(), - constants, - offset: self.offset.clone(), - name: self.name.clone(), - severity: self.severity, - }) - } - pub fn id(&self) -> usize { self as *const ImlParselet as usize } - - pub fn compile(&self, program: &mut ImlProgram, this: usize) -> Parselet { - let model = self.model.borrow(); - - Parselet::new( - Some(format!("{}", self)), - None, - self.severity, - model - .signature - .iter() - .map(|var_value| { - ( - // Copy parameter name - var_value.0.clone(), - // Register default value, if any - match &var_value.1 { - ImlValue::Void => None, - value => Some(program.register(value).expect("Cannot register value")), - }, - ) - }) - .collect(), - model.locals, - model.begin.compile_to_vec(program, self, this), - model.end.compile_to_vec(program, self, this), - model.body.compile_to_vec(program, self, this), - ) - } } impl std::fmt::Display for ImlParselet { @@ -195,14 +129,83 @@ impl ImlSharedParselet { Self(Rc::new(RefCell::new(parselet))) } - pub fn derive(&self, namespace: &IndexMap) -> Self { - if let Ok(parselet) = self.try_borrow() { - if let Some(derive) = parselet.derive(namespace) { - return Self::new(derive); + /// Checks if a parselet is completely resolved, or if it has open generics + pub fn is_generic(&self) -> bool { + self.borrow() + .constants + .values() + .any(|value| matches!(value, ImlValue::Generic { .. } | ImlValue::This(_))) + } + + /** Derives a parselet by surrouning parselet from. + + The namespace defines the constant configuration of a surrounding parselet, + and extends the parselet's constant configuration, making it a derivation. + + Returns derived parselet in case it was derive, otherwise returns a clone of self. + */ + pub fn derive(&self, from: &ImlSharedParselet) -> Self { + let mut constants = self.borrow().constants.clone(); + let mut changes = false; + + for value in constants.values_mut() { + // Replace any generics + while let ImlValue::Generic { name, .. } = value { + *value = from.borrow().constants.get(name).unwrap().clone(); + changes = true; + } + + // Replace any this + if let ImlValue::This(_) = value { + *value = ImlValue::Parselet(from.clone()); + changes = true; } } - self.clone() + if !changes { + return self.clone(); + } + + // Create derivation of this parselet + let parselet = self.borrow(); + + Self::new(ImlParselet { + model: parselet.model.clone(), + constants, + offset: parselet.offset.clone(), + name: parselet.name.clone(), + severity: parselet.severity, + }) + } + + pub fn compile(&self, program: &mut ImlProgram, this: usize) -> Parselet { + let parselet = self.borrow(); + let model = parselet.model.borrow(); + + Parselet::new( + Some(format!("{}", parselet)), + None, + parselet.severity, + model + .signature + .iter() + .map(|var_value| { + ( + // Copy parameter name + var_value.0.clone(), + // Register default value, if any + match &var_value.1 { + ImlValue::Void => None, + value => Some(program.register(value).expect("Cannot register value")), + }, + ) + }) + .collect(), + model.locals, + model.begin.compile_to_vec(program, (self, this)), + model.end.compile_to_vec(program, (self, this)), + model.body.compile_to_vec(program, (self, this)), + ) } } diff --git a/src/compiler/iml/imlprogram.rs b/src/compiler/iml/imlprogram.rs index 9ff06a06..cd282cde 100644 --- a/src/compiler/iml/imlprogram.rs +++ b/src/compiler/iml/imlprogram.rs @@ -54,7 +54,7 @@ impl ImlProgram { // self.statics grows inside of this while loop, therefore this condition. while idx < self.statics.len() { // Pick only intermediate parselets, other static values are directly moved - let outer = match self.statics.get_index_mut(idx).unwrap() { + let parselet = match self.statics.get_index_mut(idx).unwrap() { (_, Some(_)) => unreachable!(), // may not exist! (ImlValue::Parselet(parselet), None) => parselet.clone(), _ => { @@ -63,14 +63,10 @@ impl ImlProgram { } }; - // We have to do it this ugly way because of the borrow checker... - let parselet = outer.borrow(); - let model = parselet.model.borrow(); - // Memoize parselets required to be finalized (needs a general rework later...) - if model.consuming { + if parselet.borrow().model.borrow().consuming { //fixme... - finalize.insert(outer.clone()); + finalize.insert(parselet.clone()); } // Compile VM parselet from intermediate parselet @@ -141,7 +137,7 @@ impl ImlProgram { // Finalize ImlValue fn finalize_value( value: &ImlValue, - current: &ImlParselet, + current: &ImlSharedParselet, visited: &mut IndexSet, configs: &mut HashMap, ) -> Option { @@ -155,7 +151,7 @@ impl ImlProgram { }), ImlValue::Parselet(parselet) => { // Try to derive the parselet with current constants - let derived = parselet.derive(¤t.constants); + let derived = parselet.derive(current); // The derived parselet's original must be in the configs! let parselet = configs.get_key_value(&derived).unwrap().0.clone(); @@ -175,7 +171,7 @@ impl ImlProgram { } ImlValue::Generic { name, .. } => { // fixme: Is this still relevant??? - finalize_value(¤t.constants[name], current, visited, configs) + finalize_value(¤t.borrow().constants[name], current, visited, configs) } _ => None, } @@ -184,7 +180,7 @@ impl ImlProgram { // Finalize ImlOp fn finalize_op( op: &ImlOp, - current: &ImlParselet, + current: &ImlSharedParselet, visited: &mut IndexSet, configs: &mut HashMap, ) -> Option { @@ -324,7 +320,7 @@ impl ImlProgram { visited.insert(current.clone()); for part in [&model.begin, &model.body, &model.end] { - if let Some(result) = finalize_op(part, &parselet, visited, configs) { + if let Some(result) = finalize_op(part, current, visited, configs) { if configs[current] < result { configs.insert(current.clone(), result); } diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index 432bdff3..470fe816 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -24,11 +24,11 @@ pub(in crate::compiler) enum ImlValue { Shared(Rc>), // Resolved: static - This(bool), // current function (false) or parselet (true) Value(RefValue), // Compile-time static value Parselet(ImlSharedParselet), // Parselet instance // Resolved: dynamic + This(bool), // self-reference function (false) or parselet (true) Local(usize), // Runtime local variable Global(usize), // Runtime global variable @@ -262,8 +262,7 @@ impl ImlValue { pub fn compile( &self, program: &mut ImlProgram, - current: &ImlParselet, - this: usize, + current: (&ImlSharedParselet, usize), offset: &Option, call: Option>, ops: &mut Vec, @@ -277,9 +276,7 @@ impl ImlValue { match self { ImlValue::Shared(value) => { - return value - .borrow() - .compile(program, current, this, offset, call, ops) + return value.borrow().compile(program, current, offset, call, ops) } ImlValue::Value(value) => match &*value.borrow() { Value::Void => ops.push(Op::PushVoid), @@ -296,7 +293,8 @@ impl ImlValue { ImlValue::Local(addr) => ops.push(Op::LoadFast(*addr)), ImlValue::Global(addr) => ops.push(Op::LoadGlobal(*addr)), ImlValue::Generic { name, .. } => { - return current.constants[name].compile(program, current, this, offset, call, ops) + return current.0.borrow().constants[name] + .compile(program, current, offset, call, ops) } ImlValue::Name { name, .. } => { program.errors.push(Error::new( @@ -345,10 +343,17 @@ impl ImlValue { // Check if something has been pushed before. if start == ops.len() { let idx = match self { - ImlValue::This(_) => this, - ImlValue::Parselet(parselet) => program - .register(&ImlValue::Parselet(parselet.derive(¤t.constants))) - .unwrap(), + ImlValue::This(_) => current.1, // use current index + ImlValue::Parselet(parselet) => { + if parselet.is_generic() { + // Otherwise, this is a generic, so create a derivation + let derive = ImlValue::Parselet(parselet.derive(current.0)); + program.register(&derive).unwrap() + } else { + // If target is resolved, just register + program.register(self).unwrap() + } + } resolved => program.register(resolved).unwrap(), }; @@ -449,6 +454,12 @@ impl std::hash::Hash for ImlValue { state.write_u8('p' as u8); parselet.hash(state); } + /* + Self::This(consumable) => { + state.write_u8('s' as u8); + consumable.hash(state); + } + */ other => unreachable!("{:?} is unhashable", other), } } From 90a2ba472a2a6e6f0d5f2fe861c00d1902397599 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Mon, 14 Aug 2023 17:54:30 +0200 Subject: [PATCH 52/94] Docstrings --- src/compiler/iml/imlparselet.rs | 39 ++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/src/compiler/iml/imlparselet.rs b/src/compiler/iml/imlparselet.rs index e729fae4..d82537fc 100644 --- a/src/compiler/iml/imlparselet.rs +++ b/src/compiler/iml/imlparselet.rs @@ -9,7 +9,10 @@ use std::rc::Rc; // ImlParseletModel // ---------------------------------------------------------------------------- -/// Intermediate parselet model +/** Intermediate parselet model. + +The model defines the code and local varibles of the parselet, and is shared by +several parselet configurations. */ #[derive(Debug)] pub(in crate::compiler) struct ImlParseletModel { pub consuming: bool, // Flag if parselet is consuming @@ -29,7 +32,12 @@ impl ImlParseletModel { // ImlParselet // ---------------------------------------------------------------------------- -/// Intermediate parselet +/** Intermediate parselet configuration. + +A parselet configuration is a model with as given constants definition. +The constants definition might be generic, which needs to be resolved first +before a parselet configuration is turned into a parselet. +*/ #[allow(dead_code)] #[derive(Debug)] pub(in crate::compiler) struct ImlParselet { @@ -129,7 +137,7 @@ impl ImlSharedParselet { Self(Rc::new(RefCell::new(parselet))) } - /// Checks if a parselet is completely resolved, or if it has open generics + /// Checks if an intermediate parselet is completely resolved, or if it has open generics pub fn is_generic(&self) -> bool { self.borrow() .constants @@ -137,36 +145,45 @@ impl ImlSharedParselet { .any(|value| matches!(value, ImlValue::Generic { .. } | ImlValue::This(_))) } - /** Derives a parselet by surrouning parselet from. + /** Derives an intermediate parselet by another intermediate parselet (`from`). + + The namespace defines the constant configuration of a surrounding parselet (`from`), + and extends the intermediate parselet's constant configuration, making it a derivation. + + ```tokay + A: X # intermediate generic parselet A + B: 'x' A # intermediate generic parselet B using a parselet instance of A - The namespace defines the constant configuration of a surrounding parselet, - and extends the parselet's constant configuration, making it a derivation. + B<'m'> B<'n'> # parselet instances, construct the final parselets: B<'m'>, A<'m'>, B<'n'> A<'n'> + ``` - Returns derived parselet in case it was derive, otherwise returns a clone of self. + The function either returns a derived parselet in case it was derive, + otherwise it returns a clone of self. */ pub fn derive(&self, from: &ImlSharedParselet) -> Self { let mut constants = self.borrow().constants.clone(); let mut changes = false; for value in constants.values_mut() { - // Replace any generics + // Replace any generics until no more are open while let ImlValue::Generic { name, .. } = value { *value = from.borrow().constants.get(name).unwrap().clone(); changes = true; } - // Replace any this + // Replace any values of self if let ImlValue::This(_) = value { *value = ImlValue::Parselet(from.clone()); changes = true; } } + // When there is no change, there is no derivation if !changes { return self.clone(); } - // Create derivation of this parselet + // Create derivation of the inner parselet let parselet = self.borrow(); Self::new(ImlParselet { @@ -178,6 +195,8 @@ impl ImlSharedParselet { }) } + /** Compiles an intermediate parselet into a compiled VM parselet, + which is part of the provided `program` and indexed by `this`. */ pub fn compile(&self, program: &mut ImlProgram, this: usize) -> Parselet { let parselet = self.borrow(); let model = parselet.model.borrow(); From 551d6efa8b0ec09ccc5df11c77ca0215f16cee6a Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Mon, 14 Aug 2023 21:57:45 +0200 Subject: [PATCH 53/94] Renamings - Renamed `ImlParselet` into `ImlParseletConfig` - Renamed `ImlSharedParselet` into `ImlParselet` --- src/compiler/compiler.rs | 2 +- src/compiler/iml/imlop.rs | 4 +-- src/compiler/iml/imlparselet.rs | 50 ++++++++++++++++----------------- src/compiler/iml/imlprogram.rs | 27 ++++++++---------- src/compiler/iml/imlvalue.rs | 10 +++---- 5 files changed, 45 insertions(+), 48 deletions(-) diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index 84745767..aa492c4d 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -312,7 +312,7 @@ impl Compiler { self.scopes.push(scope); } - ImlValue::from(ImlParselet::new( + ImlValue::from(ImlParseletConfig::new( model, constants, offset, diff --git a/src/compiler/iml/imlop.rs b/src/compiler/iml/imlop.rs index e0397f21..2d33d722 100644 --- a/src/compiler/iml/imlop.rs +++ b/src/compiler/iml/imlop.rs @@ -274,7 +274,7 @@ impl ImlOp { pub fn compile_to_vec( &self, program: &mut ImlProgram, - current: (&ImlSharedParselet, usize), + current: (&ImlParselet, usize), ) -> Vec { let mut ops = Vec::new(); self.compile(program, current, &mut ops); @@ -285,7 +285,7 @@ impl ImlOp { pub fn compile( &self, program: &mut ImlProgram, - current: (&ImlSharedParselet, usize), + current: (&ImlParselet, usize), ops: &mut Vec, ) -> usize { let start = ops.len(); diff --git a/src/compiler/iml/imlparselet.rs b/src/compiler/iml/imlparselet.rs index d82537fc..1b005457 100644 --- a/src/compiler/iml/imlparselet.rs +++ b/src/compiler/iml/imlparselet.rs @@ -29,7 +29,7 @@ impl ImlParseletModel { } } -// ImlParselet +// ImlParseletConfig // ---------------------------------------------------------------------------- /** Intermediate parselet configuration. @@ -40,7 +40,7 @@ before a parselet configuration is turned into a parselet. */ #[allow(dead_code)] #[derive(Debug)] -pub(in crate::compiler) struct ImlParselet { +pub(in crate::compiler) struct ImlParseletConfig { pub model: Rc>, // Parselet base model pub constants: IndexMap, // Generic signature with default configuration pub offset: Option, // Offset of definition @@ -49,7 +49,7 @@ pub(in crate::compiler) struct ImlParselet { } /** Representation of parselet in intermediate code. */ -impl ImlParselet { +impl ImlParseletConfig { pub fn new( model: ImlParseletModel, constants: IndexMap, @@ -67,11 +67,11 @@ impl ImlParselet { } pub fn id(&self) -> usize { - self as *const ImlParselet as usize + self as *const ImlParseletConfig as usize } } -impl std::fmt::Display for ImlParselet { +impl std::fmt::Display for ImlParseletConfig { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, @@ -95,16 +95,16 @@ impl std::fmt::Display for ImlParselet { } } -impl std::cmp::PartialEq for ImlParselet { +impl std::cmp::PartialEq for ImlParseletConfig { // It satisfies to just compare the parselet's memory address for equality fn eq(&self, other: &Self) -> bool { self.model.borrow().id() == other.model.borrow().id() && self.constants == other.constants } } -impl Eq for ImlParselet {} +impl Eq for ImlParseletConfig {} -impl std::hash::Hash for ImlParselet { +impl std::hash::Hash for ImlParseletConfig { fn hash(&self, state: &mut H) { let model = &*self.model.borrow(); (model as *const ImlParseletModel as usize).hash(state); @@ -112,28 +112,28 @@ impl std::hash::Hash for ImlParselet { } } -impl std::cmp::PartialOrd for ImlParselet { +impl std::cmp::PartialOrd for ImlParseletConfig { // It satisfies to just compare the parselet's memory address for equality fn partial_cmp(&self, other: &Self) -> Option { self.id().partial_cmp(&other.id()) } } -impl From for ImlValue { - fn from(parselet: ImlParselet) -> Self { - ImlValue::Parselet(ImlSharedParselet::new(parselet)) +impl From for ImlValue { + fn from(parselet: ImlParseletConfig) -> Self { + ImlValue::Parselet(ImlParselet::new(parselet)) } } -// ImlSharedParselet +// ImlParselet // ---------------------------------------------------------------------------- -/// Shared ImlParselet +/// Shared ImlParseletConfig #[derive(Clone, Eq, PartialEq)] -pub(in crate::compiler) struct ImlSharedParselet(Rc>); +pub(in crate::compiler) struct ImlParselet(Rc>); -impl ImlSharedParselet { - pub fn new(parselet: ImlParselet) -> Self { +impl ImlParselet { + pub fn new(parselet: ImlParseletConfig) -> Self { Self(Rc::new(RefCell::new(parselet))) } @@ -160,7 +160,7 @@ impl ImlSharedParselet { The function either returns a derived parselet in case it was derive, otherwise it returns a clone of self. */ - pub fn derive(&self, from: &ImlSharedParselet) -> Self { + pub fn derive(&self, from: &ImlParselet) -> Self { let mut constants = self.borrow().constants.clone(); let mut changes = false; @@ -186,7 +186,7 @@ impl ImlSharedParselet { // Create derivation of the inner parselet let parselet = self.borrow(); - Self::new(ImlParselet { + Self::new(ImlParseletConfig { model: parselet.model.clone(), constants, offset: parselet.offset.clone(), @@ -228,13 +228,13 @@ impl ImlSharedParselet { } } -impl std::hash::Hash for ImlSharedParselet { +impl std::hash::Hash for ImlParselet { fn hash(&self, state: &mut H) { self.borrow().hash(state); } } -impl std::fmt::Debug for ImlSharedParselet { +impl std::fmt::Debug for ImlParselet { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { // Avoid endless recursion in case of recursive parselets if self.0.try_borrow_mut().is_ok() { @@ -245,21 +245,21 @@ impl std::fmt::Debug for ImlSharedParselet { } } -impl std::fmt::Display for ImlSharedParselet { +impl std::fmt::Display for ImlParselet { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", self.0.borrow()) } } -impl std::ops::Deref for ImlSharedParselet { - type Target = Rc>; +impl std::ops::Deref for ImlParselet { + type Target = Rc>; fn deref(&self) -> &Self::Target { &self.0 } } -impl std::ops::DerefMut for ImlSharedParselet { +impl std::ops::DerefMut for ImlParselet { fn deref_mut(&mut self) -> &mut Self::Target { &mut self.0 } diff --git a/src/compiler/iml/imlprogram.rs b/src/compiler/iml/imlprogram.rs index cd282cde..4e67aab4 100644 --- a/src/compiler/iml/imlprogram.rs +++ b/src/compiler/iml/imlprogram.rs @@ -1,4 +1,4 @@ -//! ImlProgram glues ImlParselet, ImlOp and ImlValue together to produce a VM program. +//! ImlProgram glues ImlParseletConfig, ImlOp and ImlValue together to produce a VM program. use super::*; use crate::value::Parselet; @@ -124,10 +124,7 @@ impl ImlProgram { It can only be run on a previously compiled program without any unresolved usages. */ - fn finalize( - &mut self, - parselets: HashSet, - ) -> HashMap { + fn finalize(&mut self, parselets: HashSet) -> HashMap { #[derive(Debug, Clone, Copy, PartialEq, PartialOrd)] struct Consumable { leftrec: bool, @@ -137,9 +134,9 @@ impl ImlProgram { // Finalize ImlValue fn finalize_value( value: &ImlValue, - current: &ImlSharedParselet, - visited: &mut IndexSet, - configs: &mut HashMap, + current: &ImlParselet, + visited: &mut IndexSet, + configs: &mut HashMap, ) -> Option { match value { ImlValue::Shared(value) => { @@ -180,9 +177,9 @@ impl ImlProgram { // Finalize ImlOp fn finalize_op( op: &ImlOp, - current: &ImlSharedParselet, - visited: &mut IndexSet, - configs: &mut HashMap, + current: &ImlParselet, + visited: &mut IndexSet, + configs: &mut HashMap, ) -> Option { match op { ImlOp::Call { target, .. } => finalize_value(target, current, visited, configs), @@ -295,11 +292,11 @@ impl ImlProgram { } } - // Finalize ImlParselet + // Finalize ImlParseletConfig fn finalize_parselet( - current: &ImlSharedParselet, - visited: &mut IndexSet, - configs: &mut HashMap, + current: &ImlParselet, + visited: &mut IndexSet, + configs: &mut HashMap, ) -> Option { // ... only if it's generally flagged to be consuming. let parselet = current.borrow(); diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index 470fe816..9bf3b902 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -24,8 +24,8 @@ pub(in crate::compiler) enum ImlValue { Shared(Rc>), // Resolved: static - Value(RefValue), // Compile-time static value - Parselet(ImlSharedParselet), // Parselet instance + Value(RefValue), // Compile-time static value + Parselet(ImlParselet), // Parselet instance // Resolved: dynamic This(bool), // self-reference function (false) or parselet (true) @@ -44,7 +44,7 @@ pub(in crate::compiler) enum ImlValue { name: String, // Identifier }, Instance { - // Parselet instance definition + // Unresolved parselet instance definition offset: Option, // Source offset target: Box, // Instance target args: Vec<(Option, ImlValue)>, // Sequential generic args @@ -178,7 +178,7 @@ impl ImlValue { // This can be the final parselet definition, but constants // might contain Generic references as well, which are being // resolved during compilation. - Some(ImlValue::from(ImlParselet { + Some(ImlValue::from(ImlParseletConfig { model: parselet.model.clone(), constants, offset: parselet.offset.clone(), @@ -262,7 +262,7 @@ impl ImlValue { pub fn compile( &self, program: &mut ImlProgram, - current: (&ImlSharedParselet, usize), + current: (&ImlParselet, usize), offset: &Option, call: Option>, ops: &mut Vec, From a0cf9f934be65345d6ad1aa38f5f0a46632c1968 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Tue, 22 Aug 2023 10:30:10 +0200 Subject: [PATCH 54/94] Some internal revisions This is the result of some tests for debugging tokay.tok using new Expect

generic. --- src/compiler/ast.rs | 2 +- src/compiler/compiler.rs | 3 +- src/compiler/iml/imlop.rs | 4 +- src/compiler/iml/imlparselet.rs | 3 + src/prelude.tok | 4 +- src/value/token.rs | 105 ++++++++++++++++---------------- src/vm/op.rs | 31 +++++----- 7 files changed, 80 insertions(+), 72 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index e14cea2e..e8c959b5 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -1218,7 +1218,7 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { "repeat" => Op::Repeat.into(), - "reset" => Op::Reset.into(), + "reset" => Op::Reset(false).into(), "unary" => { let children = node["children"].borrow(); diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index aa492c4d..c98882ca 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -109,8 +109,9 @@ impl Compiler { return Err(self.errors.drain(..).collect()); } + assert!(self.scopes.len() == 1); + if self.debug > 1 { - assert!(self.scopes.len() == 1); println!("--- Global scope ---\n{:#?}", self.scopes.last().unwrap()) } diff --git a/src/compiler/iml/imlop.rs b/src/compiler/iml/imlop.rs index 2d33d722..b2472221 100644 --- a/src/compiler/iml/imlop.rs +++ b/src/compiler/iml/imlop.rs @@ -331,7 +331,7 @@ impl ImlOp { jumps.push(ret.len() - 1); } - ret.push(Op::Reset); + ret.push(Op::Reset(true)); } else { ret.extend(alt); } @@ -477,7 +477,7 @@ impl ImlOp { ImlOp::Peek { body } => { ops.push(Op::Frame(0)); body.compile(program, current, ops); - ops.push(Op::Reset); + ops.push(Op::Reset(true)); ops.push(Op::Close); } ImlOp::Repeat { body, min, max } => { diff --git a/src/compiler/iml/imlparselet.rs b/src/compiler/iml/imlparselet.rs index 1b005457..7a15c7e8 100644 --- a/src/compiler/iml/imlparselet.rs +++ b/src/compiler/iml/imlparselet.rs @@ -236,12 +236,15 @@ impl std::hash::Hash for ImlParselet { impl std::fmt::Debug for ImlParselet { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0.borrow()) // Avoid endless recursion in case of recursive parselets + /* if self.0.try_borrow_mut().is_ok() { self.0.borrow().fmt(f) } else { write!(f, "{} (recursive)", self.0.borrow()) } + */ } } diff --git a/src/prelude.tok b/src/prelude.tok index 75691711..6bc07fed 100644 --- a/src/prelude.tok +++ b/src/prelude.tok @@ -30,12 +30,12 @@ Not : @

{ } Peek : @

{ - p = {P} reset accept p + P reset } Expect : @

msg=void { P - error(msg || "Expecting " + *P) + error(msg || "Expecting " + *P + ", but got " + repr((Token | Char))) } Number : Float | Int diff --git a/src/value/token.rs b/src/value/token.rs index b44acffe..5abbd459 100644 --- a/src/value/token.rs +++ b/src/value/token.rs @@ -1,6 +1,5 @@ //! Token callables represented by Value::Token use super::{BoxedObject, Dict, Object, RefValue}; -use crate::reader::Reader; use crate::vm::*; use charclass::{charclass, CharClass}; use num_bigint::BigInt; @@ -8,6 +7,8 @@ use num_parse::*; use tokay_macros::tokay_token; extern crate self as tokay; +// todo: The entire Token enum could be split into separate objects. + #[derive(Debug, Clone, Hash, PartialEq, PartialOrd)] pub enum Token { Void, // Matches the empty word @@ -63,15 +64,61 @@ impl Token { ident => builtin_ccl(ident), } } +} + +impl Object for Token { + fn name(&self) -> &'static str { + "token" + } + + fn repr(&self) -> String { + match self { + Token::Void => "Void".to_string(), + Token::EOF => "EOF".to_string(), + Token::Char(ccl) => format!("{:?}", ccl), + Token::Chars(ccl) => format!("{:?}+", ccl), + Token::BuiltinChar(_) | Token::BuiltinChars(_) => "\"\n".to_string(), + Token::Touch(s) => format!("'{}'", s), + Token::Match(s) => format!("''{}''", s), + } + } + + fn is_callable(&self, without_arguments: bool) -> bool { + without_arguments // Tokens don't support arguments + } + + fn is_consuming(&self) -> bool { + true // Tokens always consume! + } + + fn is_nullable(&self) -> bool { + match self { + Token::Void => true, + Token::EOF => false, + Token::Char(ccl) | Token::Chars(ccl) => ccl.len() == 0, //True shouldn't be possible here by definition! + Token::BuiltinChar(_) | Token::BuiltinChars(_) => true, + Token::Match(s) | Token::Touch(s) => s.len() == 0, //True shouldn't be possible here by definition! + } + } + + fn call( + &self, + context: Option<&mut Context>, + args: Vec, + nargs: Option, + ) -> Result { + assert!(context.is_some() && args.len() == 0 && nargs.is_none()); + + let context = context.unwrap(); + let reader = &mut context.thread.reader; - pub fn read(&self, reader: &mut Reader) -> Result { match self { - Token::Void => Ok(Accept::Push(Capture::Empty)), + Token::Void => Ok(Accept::Next), Token::EOF => { - if let None = reader.peek() { - Ok(Accept::Next) - } else { + if let Some(_) = reader.peek() { Err(Reject::Next) + } else { + Ok(Accept::Next) } } Token::Char(ccl) => { @@ -172,52 +219,6 @@ impl Token { } } -impl Object for Token { - fn name(&self) -> &'static str { - "token" - } - - fn repr(&self) -> String { - match self { - Token::Void => "Void".to_string(), - Token::EOF => "EOF".to_string(), - Token::Char(ccl) => format!("{:?}", ccl), - Token::Chars(ccl) => format!("{:?}+", ccl), - Token::BuiltinChar(_) | Token::BuiltinChars(_) => "\"\n".to_string(), - Token::Touch(s) => format!("'{}'", s), - Token::Match(s) => format!("''{}''", s), - } - } - - fn is_callable(&self, without_arguments: bool) -> bool { - without_arguments // Tokens don't support arguments - } - - fn is_consuming(&self) -> bool { - true // Tokens always consume! - } - - fn is_nullable(&self) -> bool { - match self { - Token::Void => true, - Token::EOF => false, - Token::Char(ccl) | Token::Chars(ccl) => ccl.len() == 0, //True shouldn't be possible here by definition! - Token::BuiltinChar(_) | Token::BuiltinChars(_) => true, - Token::Match(s) | Token::Touch(s) => s.len() == 0, //True shouldn't be possible here by definition! - } - } - - fn call( - &self, - context: Option<&mut Context>, - args: Vec, - nargs: Option, - ) -> Result { - assert!(context.is_some() && args.len() == 0 && nargs.is_none()); - self.read(&mut context.unwrap().thread.reader) - } -} - impl From for RefValue { fn from(token: Token) -> Self { RefValue::from(Box::new(token) as BoxedObject) diff --git a/src/vm/op.rs b/src/vm/op.rs index 39b9f3b5..5355a0f6 100644 --- a/src/vm/op.rs +++ b/src/vm/op.rs @@ -23,7 +23,7 @@ pub(crate) enum Op { Frame(usize), // Start new frame with optional relative forward address fuse Capture, // Reset frame capture to current stack size, saving captures Extend, // Extend frame's reader to current position - Reset, // Reset frame + Reset(bool), // Reset frame, either full (true = stack+reader) or reader only (false) Close, // Close frame Collect, // Collect stack values from current frame InCollect, // Same as collect, but degrate the parselet level (5) (fixme: This is temporary!) @@ -122,17 +122,6 @@ impl Op { return Ok(Accept::Next); } - fn dump(ops: &[Op], context: &Context, ip: usize) { - for (i, op) in ops.iter().enumerate() { - context.log(&format!( - "{}{:03} {:?}", - if i == ip { ">" } else { " " }, - i, - op - )); - } - } - assert!(context.frames.len() == 0); // --------------------------------------------------------------------- @@ -157,6 +146,18 @@ impl Op { // Dump entire code context.log("--- Code ---"); + + fn dump(ops: &[Op], context: &Context, ip: usize) { + for (i, op) in ops.iter().enumerate() { + context.log(&format!( + "{}{:03} {:?}", + if i == ip { ">" } else { " " }, + i, + op + )); + } + } + dump(ops, context, ip); // Dump stack and frames @@ -211,8 +212,10 @@ impl Op { Ok(Accept::Next) } - Op::Reset => { - context.stack.truncate(context.frame.capture_start); + Op::Reset(full) => { + if *full { + context.stack.truncate(context.frame.capture_start); + } context.thread.reader.reset(context.frame.reader_start); Ok(Accept::Next) } From 863e9aba093034c6881a7d54ee856a19a455a328 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Wed, 23 Aug 2023 22:07:18 +0200 Subject: [PATCH 55/94] Redefinition of Accept::Return Now Accept::Return uses a Capture, rather than an Option. This solves several problems, especially with lower severities. --- src/vm/accept.rs | 18 ++++++++---------- src/vm/op.rs | 8 ++------ 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/src/vm/accept.rs b/src/vm/accept.rs index 5530285b..b632eb7e 100644 --- a/src/vm/accept.rs +++ b/src/vm/accept.rs @@ -6,11 +6,11 @@ extern crate self as tokay; /// Representing an accepting state within the Tokay VM. #[derive(Debug, Clone)] pub enum Accept { - Next, // soft-accept, run next instructions at incremented ip - Hold, // soft-accept, run next instruction at current ip - Push(Capture), // soft-accept, push a capture (also 'push'-keyword) - Repeat, // hard-accept, repeat parselet on current position ('repeat'-keyword) - Return(Option), // hard-accept, return/accept entire parselet ('return/accept'-keyword) + Next, // soft-accept, run next instructions at incremented ip + Hold, // soft-accept, run next instruction at current ip + Repeat, // hard-accept, repeat parselet on current position ('repeat'-keyword) + Push(Capture), // soft-accept, push a capture (also 'push'-keyword) + Return(Capture), // hard-accept, return parselet ('return/accept'-keywords) } impl Accept { @@ -18,22 +18,20 @@ impl Accept { pub fn into_push(self, severity: u8) -> Accept { match self { Self::Next | Self::Hold => Self::Push(Capture::Empty), - Self::Push(mut capture) => { + Self::Push(mut capture) | Self::Return(mut capture) => { if capture.get_severity() > severity { capture.set_severity(severity); } Self::Push(capture) } - Self::Repeat | Self::Return(None) => Self::Push(Capture::Empty), - Self::Return(Some(value)) => Self::Push(Capture::Value(value, None, severity)), + Self::Repeat => Self::Push(Capture::Empty), } } // Helper function, extracts a contained RefValue from the Accept. pub fn into_refvalue(self) -> RefValue { match self { - Self::Push(capture) => capture.get_value(), - Self::Return(Some(value)) => value, + Self::Push(capture) | Self::Return(capture) => capture.get_value(), _ => tokay::value!(void), } } diff --git a/src/vm/op.rs b/src/vm/op.rs index 5355a0f6..2ee701d9 100644 --- a/src/vm/op.rs +++ b/src/vm/op.rs @@ -363,12 +363,8 @@ impl Op { let value = context.pop(); Ok(Accept::Push(Capture::Value(value, None, 15))) // high severity for override required here } - - Op::Accept => Ok(Accept::Return(None)), - Op::LoadAccept => { - let value = context.pop(); - Ok(Accept::Return(Some(value))) - } + Op::Accept => Ok(Accept::Return(Capture::Empty)), + Op::LoadAccept => Ok(Accept::Return(context.stack.pop().unwrap())), Op::Repeat => Ok(Accept::Repeat), Op::Reject => { state = Err(Reject::Next); From 949b11c6c07ac9fad560b2fddeff34130b471f0f Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Wed, 23 Aug 2023 22:11:19 +0200 Subject: [PATCH 56/94] First attempt of `tokay.tok` using `Expect

` This version is capable to compile and run itself using cargo run -- src/compiler/tokay.tok -- src/compiler/tokay.tok But it can't be self-contained, as `prelude.tok` must be built into Tokay itself, same as the parser. --- src/compiler/tokay.tok | 128 +++++++++++++++++++++-------------------- src/prelude.tok | 2 +- 2 files changed, 66 insertions(+), 64 deletions(-) diff --git a/src/compiler/tokay.tok b/src/compiler/tokay.tok index d71eeb01..b22ba10c 100644 --- a/src/compiler/tokay.tok +++ b/src/compiler/tokay.tok @@ -23,8 +23,8 @@ T_EOL : @{ '\n' _ '\r' '\n'? _ ';' _ - peek EOF accept # accept is used to bypass bug #64 here - peek '}' + accept Peek<'}'> + accept Peek } # Prime Tokens @@ -83,7 +83,7 @@ T_String : @{ '\\' T_EscapeSequence Char<^\\\"> EOF error("Unclosed string, expecting '\"'") - }* str_join("", $2) expect '"' + }* str_join("", $2) Expect<'"'> } T_Touch : @{ @@ -91,7 +91,7 @@ T_Touch : @{ '\\' T_EscapeSequence Char<^\\\'> EOF error("Unclosed match, expecting '\''") - }* str_join("", $2) expect '\'' + }* str_join("", $2) Expect<'\''> } T_Integer : @{ @@ -149,31 +149,31 @@ Lvalue : @{ Load : @{ Lvalue '++' ast("inplace_post_inc") Lvalue '--' ast("inplace_post_dec") - '++' expect Lvalue ast("inplace_pre_inc") - '--' expect Lvalue ast("inplace_pre_dec") + '++' Expect ast("inplace_pre_inc") + '--' Expect ast("inplace_pre_dec") Variable } # Parselet Parselet : @{ - '@' _ ParseletGenerics? _ ParseletArguments? expect Block("body") ast("value_parselet") + '@' _ ParseletGenerics? _ ParseletArguments? Expect ast("value_parselet") } ## Parselet: Generics ParseletGeneric : @{ - T_Identifier _ (':' _ expect Atomic)? ast("gen") + T_Identifier _ (':' _ Expect)? ast("gen") } ParseletGenerics : @{ - '<' _ (ParseletGeneric (',' _)?)* _ expect '>' _ + '<' _ (ParseletGeneric (',' _)?)* _ Expect<'>'> _ } ## Parselet: Arguments ParseletArgument : @{ - T_Identifier _ ('=' _ expect Expression?)? ast("arg") + T_Identifier _ ('=' _ Expect)? ast("arg") } ParseletArguments : @{ @@ -185,20 +185,20 @@ ParseletArguments : @{ StaticParseletInstance : T_Consumable | Parselet ParseletInstanceArgument : @{ - T_Identifier _ ':' _ expect Atomic _ ast("genarg_named") + T_Identifier _ ':' _ Expect _ ast("genarg_named") Atomic _ ast("genarg") } ParseletInstance : @{ - StaticParseletInstance '<' _ (ParseletInstanceArgument (',' _)?)+ _ expect '>' _ ast("value_generic") + StaticParseletInstance '<' _ (ParseletInstanceArgument (',' _)?)+ _ Expect<'>'> _ ast("value_generic") StaticParseletInstance } # Inline Blocks and Sequences InlineSequenceItem : @{ - T_Alias _ '=>' _ expect Expression ast("alias") - Expression '=>' _ expect Expression ast("alias") + T_Alias _ '=>' _ Expect ast("alias") + Expression '=>' _ Expect ast("alias") Expression } @@ -214,14 +214,14 @@ InlineSequence : @{ } InlineBlock : @{ - '(' _ ___ InlineSequence {___ '|' _ ___ InlineSequence}+ ___ expect ')' ast("block") - '(' _ ___ InlineSequence ___ expect ')' + '(' _ ___ InlineSequence {___ '|' _ ___ InlineSequence}+ ___ Expect<')'> ast("block") + '(' _ ___ InlineSequence ___ Expect<')'> } # Call parameters (used by calls and rvalues) CallArgument : @{ - T_Identifier _ '=' _ expect Expression ast("callarg_named") + T_Identifier _ '=' _ Expect ast("callarg_named") Expression ast("callarg") } @@ -246,7 +246,7 @@ TokenAtom : @{ InlineBlock '@' _ InlineBlock ast("area") Block - ParseletInstance '(' _ ___ CallArguments? ___ expect ')' ast("call") + ParseletInstance '(' _ ___ CallArguments? ___ Expect<')'> ast("call") ParseletInstance } @@ -255,9 +255,10 @@ Token : @{ TokenAtom '*' ast("op_mod_kle") TokenAtom '?' ast("op_mod_opt") TokenAtom - 'peek' _standalone_ expect Self ast("op_mod_peek") - 'not' _standalone_ expect Self ast("op_mod_not") - 'expect' _standalone_ expect Self ast("op_mod_expect") + # deprecated: + 'peek' _standalone_ Expect ast("op_mod_peek") + 'not' _standalone_ Expect ast("op_mod_not") + #'expect' _standalone_ Expect ast("op_mod_expect") } # Expression & Flow @@ -281,17 +282,17 @@ Atomic : @{ '(' _ ___ HoldExpression ___ ')' Literal Token - 'if' _standalone_ expect HoldExpression ___ expect Statement (___ 'else' _standalone_ ___ expect Statement)? ast("op_if") - 'for' _standalone_ expect Lvalue _ expect 'in' _standalone_ expect Expression ___ expect Statement ast("op_for") + 'if' _standalone_ Expect ___ Expect (___ 'else' _standalone_ ___ Expect)? ast("op_if") + 'for' _standalone_ Expect _ Expect<'in'> _standalone_ Expect ___ Expect ast("op_for") 'loop' _standalone_ HoldExpression ___ Block ast("op_loop") - 'loop' _standalone_ expect Block ast("op_loop") + 'loop' _standalone_ Expect ast("op_loop") Load } # Rvalue can be a function call or value attribute/subscript Rvalue : @{ - Rvalue '(' _ ___ CallArguments? expect ')' ast("call") + Rvalue '(' _ ___ CallArguments? Expect<')'> ast("call") Rvalue (Attribute | Subscript)* ast("rvalue") Atomic } @@ -304,60 +305,60 @@ Unary : @{ } MulDiv : @{ - MulDiv '*' _ expect Unary ast("op_binary_mul") - MulDiv '//' _ expect Unary ast("op_binary_divi") - MulDiv '/' _ expect Unary ast("op_binary_div") - MulDiv '%' _ expect Unary ast("op_binary_mod") + MulDiv '*' _ Expect ast("op_binary_mul") + MulDiv '//' _ Expect ast("op_binary_divi") + MulDiv '/' _ Expect ast("op_binary_div") + MulDiv '%' _ Expect ast("op_binary_mod") Unary } AddSub : @{ - AddSub '+' not '+' _ expect MulDiv ast("op_binary_add") - AddSub '-' not '-' _ expect MulDiv ast("op_binary_sub") + AddSub '+' not '+' _ Expect ast("op_binary_add") + AddSub '-' not '-' _ Expect ast("op_binary_sub") MulDiv } Comparison : @{ AddSub { - '==' _ expect AddSub ast("cmp_eq") - '!=' _ expect AddSub ast("cmp_neq") - '<=' _ expect AddSub ast("cmp_lteq") - '>=' _ expect AddSub ast("cmp_gteq") - '<' _ expect AddSub ast("cmp_lt") - '>' _ expect AddSub ast("cmp_gt") + '==' _ Expect ast("cmp_eq") + '!=' _ Expect ast("cmp_neq") + '<=' _ Expect ast("cmp_lteq") + '>=' _ Expect ast("cmp_gteq") + '<' _ Expect ast("cmp_lt") + '>' _ Expect ast("cmp_gt") }+ ast("comparison") AddSub } LogicalAnd : @{ - LogicalAnd '&&' _ expect Comparison ast("op_logical_and") + LogicalAnd '&&' _ Expect ast("op_logical_and") Comparison } LogicalOr : @{ - LogicalOr '||' _ expect LogicalAnd ast("op_logical_or") + LogicalOr '||' _ Expect ast("op_logical_or") LogicalAnd } HoldExpression : @{ - Lvalue _ '+=' _ expect HoldExpression ast("assign_add_hold") - Lvalue _ '-=' _ expect HoldExpression ast("assign_sub_hold") - Lvalue _ '*=' _ expect HoldExpression ast("assign_mul_hold") - Lvalue _ '/=' _ expect HoldExpression ast("assign_div_hold") - Lvalue _ '//=' _ expect HoldExpression ast("assign_divi_hold") - Lvalue _ '%=' _ expect HoldExpression ast("assign_mod_hold") - Lvalue _ '=' not ('>' | '=') _ expect HoldExpression ast("assign_hold") + Lvalue _ '+=' _ Expect ast("assign_add_hold") + Lvalue _ '-=' _ Expect ast("assign_sub_hold") + Lvalue _ '*=' _ Expect ast("assign_mul_hold") + Lvalue _ '/=' _ Expect ast("assign_div_hold") + Lvalue _ '//=' _ Expect ast("assign_divi_hold") + Lvalue _ '%=' _ Expect ast("assign_mod_hold") + Lvalue _ '=' not ('>' | '=') _ Expect ast("assign_hold") LogicalOr } Expression : @{ - Lvalue _ '+=' _ expect HoldExpression ast("assign_add") - Lvalue _ '-=' _ expect HoldExpression ast("assign_sub") - Lvalue _ '*=' _ expect HoldExpression ast("assign_mul") - Lvalue _ '/=' _ expect HoldExpression ast("assign_div") - Lvalue _ '//=' _ expect HoldExpression ast("assign_divi") - Lvalue _ '%=' _ expect HoldExpression ast("assign_mod") - Lvalue _ '=' not ('>' | '=') _ expect HoldExpression ast("assign") + Lvalue _ '+=' _ Expect ast("assign_add") + Lvalue _ '-=' _ Expect ast("assign_sub") + Lvalue _ '*=' _ Expect ast("assign_mul") + Lvalue _ '/=' _ Expect ast("assign_div") + Lvalue _ '//=' _ Expect ast("assign_divi") + Lvalue _ '%=' _ Expect ast("assign_mod") + Lvalue _ '=' not ('>' | '=') _ Expect ast("assign") LogicalOr } @@ -380,13 +381,13 @@ Statement : @{ # Blocks and Sequences Block : @ emit = "block" { - '{' _ ___ '}' ast("value_void") - '{' _ Instruction* _ expect '}' ast(emit) + #'{' _ ___ '}' ast("value_void") + '{' _ Instruction* _ Expect<'}'> ast(emit) } SequenceItem : @{ - T_Alias _ '=>' _ expect Expression ast("alias") - Expression '=>' _ expect Expression ast("alias") + T_Alias _ '=>' _ Expect ast("alias") + Expression '=>' _ Expect ast("alias") Statement } @@ -400,15 +401,15 @@ Sequences : @{ } Instruction : @{ - 'begin' _standalone_ Sequences expect T_EOL ast("begin") - 'end' _standalone_ Sequences expect T_EOL ast("end") + 'begin' _standalone_ Sequences Expect ast("begin") + 'end' _standalone_ Sequences Expect ast("end") T_Identifier _ ':' _ { Literal _ peek T_EOL Token _ peek T_EOL Sequences - } expect T_EOL ast("constant") + } Expect ast("constant") Statement T_EOL - Sequences expect T_EOL + Sequences T_EOL T_EOL } @@ -421,5 +422,6 @@ Tokay : @{ Char error("Parse error, unexpected token", true) } -_ Tokay? expect EOF ast("main") -#_ Tokay? expect EOF ast_print(ast("main")) +_ Tokay? Expect ast("main") +#_ Tokay? Expect ast2rust(ast("main")) +#_ Tokay? Expect ast_print(ast("main")) diff --git a/src/prelude.tok b/src/prelude.tok index 6bc07fed..0cd4924a 100644 --- a/src/prelude.tok +++ b/src/prelude.tok @@ -34,7 +34,7 @@ Peek : @

{ } Expect : @

msg=void { - P + accept P error(msg || "Expecting " + *P + ", but got " + repr((Token | Char))) } From 62b47230e57ec39f060e85bee681e0d8d1aefd6c Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Thu, 24 Aug 2023 21:55:15 +0200 Subject: [PATCH 57/94] Fixed whitespace issue in tokay.tok and parser.rs Change was done manually. Sequences where invalidly detected as function calls. --- src/compiler/parser.rs | 4 ---- src/compiler/tokay.tok | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/src/compiler/parser.rs b/src/compiler/parser.rs index bb1b95d5..1525b15b 100644 --- a/src/compiler/parser.rs +++ b/src/compiler/parser.rs @@ -2930,10 +2930,6 @@ impl Parser { "value" => ">" ])) ])), - (value!([ - "emit" => "identifier", - "value" => "_" - ])), (value!([ "emit" => "call", "children" => diff --git a/src/compiler/tokay.tok b/src/compiler/tokay.tok index b22ba10c..de6bc7f5 100644 --- a/src/compiler/tokay.tok +++ b/src/compiler/tokay.tok @@ -190,7 +190,7 @@ ParseletInstanceArgument : @{ } ParseletInstance : @{ - StaticParseletInstance '<' _ (ParseletInstanceArgument (',' _)?)+ _ Expect<'>'> _ ast("value_generic") + StaticParseletInstance '<' _ (ParseletInstanceArgument (',' _)?)+ _ Expect<'>'> ast("value_generic") StaticParseletInstance } From ea09f24c0a75f7f360330e197b6d141abba6fc8e Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Thu, 24 Aug 2023 22:53:58 +0200 Subject: [PATCH 58/94] Built-in prelude using ast2rust (similar to parser) This could be optimized in future, but is okay for now to continue. --- build/Makefile | 15 + src/builtin/mod.rs | 3 +- src/compiler/compiler.rs | 8 +- src/compiler/mod.rs | 1 + src/compiler/parser.rs | 2 +- src/compiler/prelude.rs | 899 +++++++++++++++++++++++++++++++++++++++ src/main.rs | 4 +- src/utils.rs | 2 +- 8 files changed, 923 insertions(+), 11 deletions(-) create mode 100644 src/compiler/prelude.rs diff --git a/build/Makefile b/build/Makefile index 860401e9..66becc66 100644 --- a/build/Makefile +++ b/build/Makefile @@ -6,6 +6,7 @@ all: @echo "" @echo " make builtins update src/_builtins.rs from src/" @echo " make parser update src/compiler/parse.rs from src/compiler/tokay.tok" + @echo " make prelude update src/compiler/prelude.rs from src/prelude.tok" @echo "" # builtins -------------------------------------------------------------------- @@ -36,3 +37,17 @@ show-parser: reset-parser: git checkout $(PARSER) + +# prelude ---------------------------------------------------------------------- +PRELUDE=../src/compiler/prelude.rs + +prelude: $(PRELUDE) + +$(PRELUDE): .FORCE + $(ETARENEG) $@ >$@.1 && mv $@.1 $@ + +show-prelude: + $(ETARENEG) $(PRELUDE) 2>/dev/null + +reset-prelude: + git checkout $(PRELUDE) diff --git a/src/builtin/mod.rs b/src/builtin/mod.rs index 93e90965..52234c83 100644 --- a/src/builtin/mod.rs +++ b/src/builtin/mod.rs @@ -182,7 +182,8 @@ tokay_function!("type : @value", value!(value.name()).into()); tokay_function!("debug : @level", { if let Ok(level) = level.to_usize() { if level < u8::MAX as usize { - context.unwrap().thread.debug = level as u8; + let context = context.unwrap(); + context.debug = level as u8; return Ok(Accept::Next); } } diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index c98882ca..f0ee25b6 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -63,7 +63,7 @@ impl Compiler { By default, the prelude should be loaded, otherwise several standard parselets are not available. Ignoring the prelude is only useful on bootstrap currently. */ - pub fn new(with_prelude: bool) -> Self { + pub fn new() -> Self { let mut compiler = Self { parser: None, debug: 0, @@ -85,11 +85,7 @@ impl Compiler { } // Compile with the default prelude - if with_prelude { - compiler - .compile_from_str(include_str!("../prelude.tok")) - .unwrap(); // this should panic in case of an error! - } + compiler.load_prelude(); // Set compiler debug level afterwards compiler.debug = if let Ok(level) = std::env::var("TOKAY_DEBUG") { diff --git a/src/compiler/mod.rs b/src/compiler/mod.rs index dc29a4c1..daa77c6e 100644 --- a/src/compiler/mod.rs +++ b/src/compiler/mod.rs @@ -4,6 +4,7 @@ pub(crate) mod ast; mod compiler; mod iml; mod parser; +mod prelude; use compiler::*; use iml::*; diff --git a/src/compiler/parser.rs b/src/compiler/parser.rs index 1525b15b..899728c3 100644 --- a/src/compiler/parser.rs +++ b/src/compiler/parser.rs @@ -7680,7 +7680,7 @@ impl Parser { /*ETARENEG*/ ; - let mut compiler = Compiler::new(false); + let mut compiler = Compiler::new(); compiler.debug = 0; // unset debug always Self( diff --git a/src/compiler/prelude.rs b/src/compiler/prelude.rs new file mode 100644 index 00000000..9c30fe18 --- /dev/null +++ b/src/compiler/prelude.rs @@ -0,0 +1,899 @@ +/** Tokay default prelude + +The prelude is a default stub of Tokay standard parselets, which implements +fundamental parts of the Tokay language itself. + +It's defined in src/prelude.tok and pre-compiled as an AST within the code. +*/ +use super::*; +use crate::value; + +impl Compiler { + pub(super) fn load_prelude(&mut self) { + // fixme: Make this lazy_static, so its created only once! + let ast = + /*GENERATE cargo run -- "`sed 's/ast("main")/ast2rust(ast("main"), level=3)/g' compiler/tokay.tok`" -- prelude.tok */ + value!([ + "emit" => "main", + "children" => + (value!([ + (value!([ + "emit" => "constant", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "Repeat" + ])), + (value!([ + "emit" => "value_parselet", + "children" => + (value!([ + (value!([ + "emit" => "gen", + "children" => + (value!([ + "emit" => "identifier", + "value" => "P" + ])) + ])), + (value!([ + "emit" => "arg", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "min" + ])), + (value!([ + "emit" => "value_integer", + "value" => 1 + ])) + ])) + ])), + (value!([ + "emit" => "arg", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "max" + ])), + (value!([ + "emit" => "value_void", + "value" => "void" + ])) + ])) + ])), + (value!([ + "emit" => "body", + "children" => + (value!([ + (value!([ + "emit" => "begin", + "children" => + (value!([ + "emit" => "sequence", + "children" => + (value!([ + "emit" => "block", + "children" => + (value!([ + "emit" => "assign", + "children" => + (value!([ + (value!([ + "emit" => "lvalue", + "children" => + (value!([ + "emit" => "identifier", + "value" => "res" + ])) + ])), + (value!([ + "emit" => "call", + "children" => + (value!([ + "emit" => "identifier", + "value" => "list" + ])) + ])) + ])) + ])) + ])) + ])) + ])), + (value!([ + "emit" => "sequence", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "P" + ])), + (value!([ + "emit" => "block", + "children" => + (value!([ + (value!([ + "emit" => "assign_add", + "children" => + (value!([ + (value!([ + "emit" => "lvalue", + "children" => + (value!([ + "emit" => "identifier", + "value" => "res" + ])) + ])), + (value!([ + "emit" => "capture_index", + "children" => + (value!([ + "emit" => "value_integer", + "value" => 1 + ])) + ])) + ])) + ])), + (value!([ + "emit" => "op_if", + "children" => + (value!([ + (value!([ + "emit" => "op_logical_or", + "children" => + (value!([ + (value!([ + "emit" => "op_unary_not", + "children" => + (value!([ + "emit" => "identifier", + "value" => "max" + ])) + ])), + (value!([ + "emit" => "comparison", + "children" => + (value!([ + (value!([ + "emit" => "rvalue", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "res" + ])), + (value!([ + "emit" => "attribute", + "children" => + (value!([ + "emit" => "value_string", + "value" => "len" + ])) + ])) + ])) + ])), + (value!([ + "emit" => "cmp_lt", + "children" => + (value!([ + "emit" => "identifier", + "value" => "max" + ])) + ])) + ])) + ])) + ])) + ])), + (value!([ + "emit" => "op_repeat", + "value" => "repeat" + ])) + ])) + ])) + ])) + ])) + ])) + ])), + (value!([ + "emit" => "op_if", + "children" => + (value!([ + (value!([ + "emit" => "op_logical_or", + "children" => + (value!([ + (value!([ + "emit" => "op_unary_not", + "children" => + (value!([ + "emit" => "identifier", + "value" => "res" + ])) + ])), + (value!([ + "emit" => "comparison", + "children" => + (value!([ + (value!([ + "emit" => "rvalue", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "res" + ])), + (value!([ + "emit" => "attribute", + "children" => + (value!([ + "emit" => "value_string", + "value" => "len" + ])) + ])) + ])) + ])), + (value!([ + "emit" => "cmp_lt", + "children" => + (value!([ + "emit" => "identifier", + "value" => "min" + ])) + ])) + ])) + ])) + ])) + ])), + (value!([ + "emit" => "op_reject", + "value" => "reject" + ])) + ])) + ])), + (value!([ + "emit" => "identifier", + "value" => "res" + ])) + ])) + ])) + ])) + ])) + ])) + ])), + (value!([ + "emit" => "constant", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "Pos" + ])), + (value!([ + "emit" => "value_parselet", + "children" => + (value!([ + (value!([ + "emit" => "gen", + "children" => + (value!([ + "emit" => "identifier", + "value" => "P" + ])) + ])), + (value!([ + "emit" => "body", + "children" => + (value!([ + "emit" => "value_generic", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "Repeat" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "P" + ])) + ])) + ])) + ])) + ])) + ])) + ])) + ])) + ])), + (value!([ + "emit" => "constant", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "Kle" + ])), + (value!([ + "emit" => "value_parselet", + "children" => + (value!([ + (value!([ + "emit" => "gen", + "children" => + (value!([ + "emit" => "identifier", + "value" => "P" + ])) + ])), + (value!([ + "emit" => "body", + "children" => + (value!([ + "emit" => "call", + "children" => + (value!([ + (value!([ + "emit" => "value_generic", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "Repeat" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "P" + ])) + ])) + ])) + ])), + (value!([ + "emit" => "callarg_named", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "min" + ])), + (value!([ + "emit" => "value_integer", + "value" => 0 + ])) + ])) + ])) + ])) + ])) + ])) + ])) + ])) + ])) + ])), + (value!([ + "emit" => "constant", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "Opt" + ])), + (value!([ + "emit" => "value_parselet", + "children" => + (value!([ + (value!([ + "emit" => "gen", + "children" => + (value!([ + "emit" => "identifier", + "value" => "P" + ])) + ])), + (value!([ + "emit" => "body", + "children" => + (value!([ + "emit" => "block", + "children" => + (value!([ + (value!([ + "emit" => "sequence", + "children" => + (value!([ + "emit" => "identifier", + "value" => "P" + ])) + ])), + (value!([ + "emit" => "sequence", + "children" => + (value!([ + "emit" => "identifier", + "value" => "Void" + ])) + ])) + ])) + ])) + ])) + ])) + ])) + ])) + ])), + (value!([ + "emit" => "constant", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "List" + ])), + (value!([ + "emit" => "value_parselet", + "children" => + (value!([ + (value!([ + "emit" => "gen", + "children" => + (value!([ + "emit" => "identifier", + "value" => "P" + ])) + ])), + (value!([ + "emit" => "gen", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "Separator" + ])), + (value!([ + "emit" => "inline_sequence", + "children" => + (value!([ + (value!([ + "emit" => "value_token_touch", + "value" => "," + ])), + (value!([ + "emit" => "identifier", + "value" => "_" + ])) + ])) + ])) + ])) + ])), + (value!([ + "emit" => "gen", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "empty" + ])), + (value!([ + "emit" => "value_true", + "value" => "true" + ])) + ])) + ])), + (value!([ + "emit" => "body", + "children" => + (value!([ + (value!([ + "emit" => "sequence", + "children" => + (value!([ + (value!([ + "emit" => "value_token_self", + "value" => "Self" + ])), + (value!([ + "emit" => "identifier", + "value" => "Separator" + ])), + (value!([ + "emit" => "identifier", + "value" => "P" + ])), + (value!([ + "emit" => "op_binary_add", + "children" => + (value!([ + (value!([ + "emit" => "capture_index", + "children" => + (value!([ + "emit" => "value_integer", + "value" => 1 + ])) + ])), + (value!([ + "emit" => "capture_index", + "children" => + (value!([ + "emit" => "value_integer", + "value" => 3 + ])) + ])) + ])) + ])) + ])) + ])), + (value!([ + "emit" => "op_if", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "empty" + ])), + (value!([ + "emit" => "inline_sequence", + "children" => + (value!([ + (value!([ + "emit" => "value_token_self", + "value" => "Self" + ])), + (value!([ + "emit" => "identifier", + "value" => "Separator" + ])) + ])) + ])) + ])) + ])), + (value!([ + "emit" => "sequence", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "P" + ])), + (value!([ + "emit" => "list", + "children" => + (value!([ + "emit" => "capture_index", + "children" => + (value!([ + "emit" => "value_integer", + "value" => 1 + ])) + ])) + ])) + ])) + ])) + ])) + ])) + ])) + ])) + ])) + ])), + (value!([ + "emit" => "constant", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "Not" + ])), + (value!([ + "emit" => "value_parselet", + "children" => + (value!([ + (value!([ + "emit" => "gen", + "children" => + (value!([ + "emit" => "identifier", + "value" => "P" + ])) + ])), + (value!([ + "emit" => "body", + "children" => + (value!([ + (value!([ + "emit" => "sequence", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "P" + ])), + (value!([ + "emit" => "op_reject", + "value" => "reject" + ])) + ])) + ])), + (value!([ + "emit" => "identifier", + "value" => "Void" + ])) + ])) + ])) + ])) + ])) + ])) + ])), + (value!([ + "emit" => "constant", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "Peek" + ])), + (value!([ + "emit" => "value_parselet", + "children" => + (value!([ + (value!([ + "emit" => "gen", + "children" => + (value!([ + "emit" => "identifier", + "value" => "P" + ])) + ])), + (value!([ + "emit" => "body", + "children" => + (value!([ + "emit" => "sequence", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "P" + ])), + (value!([ + "emit" => "op_reset", + "value" => "reset" + ])) + ])) + ])) + ])) + ])) + ])) + ])) + ])), + (value!([ + "emit" => "constant", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "value_parselet", + "children" => + (value!([ + (value!([ + "emit" => "gen", + "children" => + (value!([ + "emit" => "identifier", + "value" => "P" + ])) + ])), + (value!([ + "emit" => "arg", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "msg" + ])), + (value!([ + "emit" => "value_void", + "value" => "void" + ])) + ])) + ])), + (value!([ + "emit" => "body", + "children" => + (value!([ + (value!([ + "emit" => "op_accept", + "children" => + (value!([ + "emit" => "identifier", + "value" => "P" + ])) + ])), + (value!([ + "emit" => "call", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "error" + ])), + (value!([ + "emit" => "callarg", + "children" => + (value!([ + "emit" => "op_logical_or", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "msg" + ])), + (value!([ + "emit" => "op_binary_add", + "children" => + (value!([ + (value!([ + "emit" => "op_binary_add", + "children" => + (value!([ + (value!([ + "emit" => "op_binary_add", + "children" => + (value!([ + (value!([ + "emit" => "value_string", + "value" => "Expecting " + ])), + (value!([ + "emit" => "op_deref", + "children" => + (value!([ + "emit" => "identifier", + "value" => "P" + ])) + ])) + ])) + ])), + (value!([ + "emit" => "value_string", + "value" => ", but got " + ])) + ])) + ])), + (value!([ + "emit" => "call", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "repr" + ])), + (value!([ + "emit" => "callarg", + "children" => + (value!([ + "emit" => "block", + "children" => + (value!([ + (value!([ + "emit" => "inline_sequence", + "children" => + (value!([ + "emit" => "identifier", + "value" => "Token" + ])) + ])), + (value!([ + "emit" => "inline_sequence", + "children" => + (value!([ + "emit" => "value_token_any", + "value" => "Char" + ])) + ])) + ])) + ])) + ])) + ])) + ])) + ])) + ])) + ])) + ])) + ])) + ])) + ])) + ])) + ])) + ])) + ])) + ])) + ])), + (value!([ + "emit" => "constant", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "Number" + ])), + (value!([ + "emit" => "block", + "children" => + (value!([ + (value!([ + "emit" => "sequence", + "children" => + (value!([ + "emit" => "identifier", + "value" => "Float" + ])) + ])), + (value!([ + "emit" => "sequence", + "children" => + (value!([ + "emit" => "identifier", + "value" => "Int" + ])) + ])) + ])) + ])) + ])) + ])), + (value!([ + "emit" => "constant", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "Token" + ])), + (value!([ + "emit" => "block", + "children" => + (value!([ + (value!([ + "emit" => "sequence", + "children" => + (value!([ + "emit" => "identifier", + "value" => "Word" + ])) + ])), + (value!([ + "emit" => "sequence", + "children" => + (value!([ + "emit" => "identifier", + "value" => "Number" + ])) + ])), + (value!([ + "emit" => "sequence", + "children" => + (value!([ + "emit" => "identifier", + "value" => "AsciiPunctuation" + ])) + ])) + ])) + ])) + ])) + ])) + ])) + ]) + /*ETARENEG*/ + ; + + self.compile_from_ast(&ast) + .expect("prelude cannot be compiled!") + .expect("prelude contains no main?"); + } +} diff --git a/src/main.rs b/src/main.rs index f550fe37..2c63514a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -98,7 +98,7 @@ fn get_readers(opts: &Opts) -> Vec { // Read-Eval-Print-Loop (REPL) for Tokay fn repl(opts: &Opts) { let mut globals: Vec = Vec::new(); - let mut compiler = Compiler::new(true); + let mut compiler = Compiler::new(); // todo: Implement a completer? let mut readline = rustyline::Editor::<()>::new(); @@ -230,7 +230,7 @@ fn main() { } if let Some(program) = program { - let mut compiler = Compiler::new(true); + let mut compiler = Compiler::new(); match compiler.compile(program) { Ok(None) => {} diff --git a/src/utils.rs b/src/utils.rs index 41880355..a9be5de9 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -9,7 +9,7 @@ use std::process::{Command, Stdio}; This function is mostly used internally within tests, but can also be used from outside. */ pub fn run(src: &str, input: &str) -> Result, String> { - let mut compiler = Compiler::new(true); + let mut compiler = Compiler::new(); match compiler.compile_from_str(src) { Ok(Some(program)) => program From 861c583e2a770db71fc2bb4a5feb11ac7b855ba3 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Thu, 24 Aug 2023 23:00:36 +0200 Subject: [PATCH 59/94] Some code maintenance for Compiler --- src/compiler/compiler.rs | 55 ++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index f0ee25b6..107337dd 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -1,4 +1,4 @@ -//! Tokay compiler interface +//! Tokay compiler use super::*; use crate::builtin::Builtin; @@ -16,7 +16,7 @@ Parselets introduce new variable scopes. Loops introduce a new loop scope. */ #[derive(Debug)] -pub(in crate::compiler) enum Scope { +pub(super) enum Scope { Parselet { // parselet-level scope (variables and constants can be defined here) usage_start: usize, // Begin of usages to resolve until when scope is closed @@ -44,24 +44,25 @@ The compiler works in a mode so that statics, variables and constants once built won't be removed and can be accessed on later calls. */ pub struct Compiler { - parser: Option, // Internal Tokay parser - pub debug: u8, // Compiler debug mode - pub(in crate::compiler) statics: IndexSet, // Static values collected during compilation - pub(in crate::compiler) scopes: Vec, // Current compilation scopes - pub(in crate::compiler) usages: Vec, // Unresolved values - pub(in crate::compiler) errors: Vec, // Collected errors during compilation + parser: Option, // Internal Tokay parser + pub debug: u8, // Compiler debug mode + pub(super) statics: IndexSet, // Static values collected during compilation + pub(super) scopes: Vec, // Current compilation scopes + pub(super) usages: Vec, // Unresolved values + pub(super) errors: Vec, // Collected errors during compilation } impl Compiler { /** Initialize a new compiler. + The compiler serves functions to compile Tokay source code into programs executable by + the Tokay VM. It uses an intermediate language representation to implement derives of + generics, statics, etc. + The compiler struct serves as some kind of helper that should be used during traversal of a Tokay program's AST. It therefore offers functions to open particular blocks and handle symbols in different levels. Parselets are created by using the parselet_pop() function with provided parameters. - - By default, the prelude should be loaded, otherwise several standard parselets are not available. - Ignoring the prelude is only useful on bootstrap currently. */ pub fn new() -> Self { let mut compiler = Self { @@ -167,7 +168,7 @@ impl Compiler { This avoids that the compiler produces multiple results pointing to effectively the same values (althought they are different objects, but the same value) */ - pub(in crate::compiler) fn register_static(&mut self, value: RefValue) -> ImlValue { + pub(super) fn register_static(&mut self, value: RefValue) -> ImlValue { if let Some(value) = self.statics.get(&value) { ImlValue::Value(value.clone()) } else { @@ -177,7 +178,7 @@ impl Compiler { } /// Tries to resolves open usages from the current scope - pub(in crate::compiler) fn resolve(&mut self) { + pub(super) fn resolve(&mut self) { if let Scope::Parselet { usage_start, .. } | Scope::Block { usage_start, .. } = &self.scopes[0] { @@ -196,7 +197,7 @@ impl Compiler { } /// Push a parselet scope - pub(in crate::compiler) fn parselet_push(&mut self) { + pub(super) fn parselet_push(&mut self) { self.scopes.insert( 0, Scope::Parselet { @@ -213,7 +214,7 @@ impl Compiler { } /// Push a block scope - pub(in crate::compiler) fn block_push(&mut self) { + pub(super) fn block_push(&mut self) { self.scopes.insert( 0, Scope::Block { @@ -224,12 +225,12 @@ impl Compiler { } /// Push a loop scope - pub(in crate::compiler) fn loop_push(&mut self) { + pub(super) fn loop_push(&mut self) { self.scopes.insert(0, Scope::Loop); } /// Resolves and drops a parselet scope and creates a new parselet from it. - pub(in crate::compiler) fn parselet_pop( + pub(super) fn parselet_pop( &mut self, offset: Option, name: Option, @@ -322,20 +323,20 @@ impl Compiler { } /// Drops a block scope. - pub(in crate::compiler) fn block_pop(&mut self) { + pub(super) fn block_pop(&mut self) { assert!(self.scopes.len() > 0 && matches!(self.scopes[0], Scope::Block { .. })); self.resolve(); self.scopes.remove(0); } /// Drops a loop scope. - pub(in crate::compiler) fn loop_pop(&mut self) { + pub(super) fn loop_pop(&mut self) { assert!(self.scopes.len() > 0 && matches!(self.scopes[0], Scope::Loop)); self.scopes.remove(0); } /// Marks the nearest parselet scope as consuming - pub(in crate::compiler) fn parselet_mark_consuming(&mut self) { + pub(super) fn parselet_mark_consuming(&mut self) { for scope in &mut self.scopes { if let Scope::Parselet { is_consuming, .. } = scope { *is_consuming = true; @@ -347,7 +348,7 @@ impl Compiler { } /// Check if there's a loop - pub(in crate::compiler) fn loop_check(&mut self) -> bool { + pub(super) fn loop_check(&mut self) -> bool { for i in 0..self.scopes.len() { match &self.scopes[i] { Scope::Parselet { .. } => return false, @@ -360,7 +361,7 @@ impl Compiler { } /** Insert new local variable under given name in current scope. */ - pub(in crate::compiler) fn new_local(&mut self, name: &str) -> usize { + pub(super) fn new_local(&mut self, name: &str) -> usize { for scope in &mut self.scopes { // Check for scope with variables if let Scope::Parselet { @@ -382,7 +383,7 @@ impl Compiler { } /** Pop unused or create new temporary variable */ - pub(in crate::compiler) fn pop_temp(&mut self) -> usize { + pub(super) fn pop_temp(&mut self) -> usize { for scope in &mut self.scopes { // Check for scope with variables if let Scope::Parselet { @@ -404,7 +405,7 @@ impl Compiler { } /** Release temporary variable for later re-use */ - pub(in crate::compiler) fn push_temp(&mut self, addr: usize) { + pub(super) fn push_temp(&mut self, addr: usize) { for scope in &mut self.scopes { // Check for scope with variables if let Scope::Parselet { temporaries, .. } = scope { @@ -417,7 +418,7 @@ impl Compiler { } /** Set constant to name in current scope. */ - pub(in crate::compiler) fn set_constant(&mut self, name: &str, mut value: ImlValue) { + pub(super) fn set_constant(&mut self, name: &str, mut value: ImlValue) { /* Special meaning for whitespace constants names "_" and "__". @@ -483,7 +484,7 @@ impl Compiler { } /** Get named value, either from current or preceding scope, a builtin or special. */ - pub(in crate::compiler) fn get(&mut self, name: &str) -> Option { + pub(super) fn get(&mut self, name: &str) -> Option { let mut top_parselet = true; for (i, scope) in self.scopes.iter().enumerate() { @@ -525,7 +526,7 @@ impl Compiler { } /** Get defined builtin. */ - pub(in crate::compiler) fn get_builtin(&mut self, name: &str) -> Option { + pub(super) fn get_builtin(&mut self, name: &str) -> Option { // Check for a builtin function if let Some(builtin) = Builtin::get(name) { return Some(RefValue::from(builtin).into()); // fixme: Makes a Value into a RefValue into a Value... From fa231542c56d697960aaf0f96915c00e981379ca Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Fri, 25 Aug 2023 11:04:37 +0200 Subject: [PATCH 60/94] Improving Expect

to respect EOF as well --- src/builtin/mod.rs | 1 + src/compiler/prelude.rs | 8 ++++++++ src/compiler/tokay.tok | 2 +- src/prelude.tok | 2 +- 4 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/builtin/mod.rs b/src/builtin/mod.rs index 52234c83..25e72f8e 100644 --- a/src/builtin/mod.rs +++ b/src/builtin/mod.rs @@ -184,6 +184,7 @@ tokay_function!("debug : @level", { if level < u8::MAX as usize { let context = context.unwrap(); context.debug = level as u8; + //context.thread.debug = level as u8; return Ok(Accept::Next); } } diff --git a/src/compiler/prelude.rs b/src/compiler/prelude.rs index 9c30fe18..e9ef2f17 100644 --- a/src/compiler/prelude.rs +++ b/src/compiler/prelude.rs @@ -796,6 +796,14 @@ impl Compiler { "emit" => "value_token_any", "value" => "Char" ])) + ])), + (value!([ + "emit" => "inline_sequence", + "children" => + (value!([ + "emit" => "value_string", + "value" => "end-of-file" + ])) ])) ])) ])) diff --git a/src/compiler/tokay.tok b/src/compiler/tokay.tok index de6bc7f5..e7f45805 100644 --- a/src/compiler/tokay.tok +++ b/src/compiler/tokay.tok @@ -381,7 +381,7 @@ Statement : @{ # Blocks and Sequences Block : @ emit = "block" { - #'{' _ ___ '}' ast("value_void") + '{' _ ___ '}' ast("value_void") '{' _ Instruction* _ Expect<'}'> ast(emit) } diff --git a/src/prelude.tok b/src/prelude.tok index 0cd4924a..9cb89bdd 100644 --- a/src/prelude.tok +++ b/src/prelude.tok @@ -35,7 +35,7 @@ Peek : @

{ Expect : @

msg=void { accept P - error(msg || "Expecting " + *P + ", but got " + repr((Token | Char))) + error(msg || "Expecting " + *P + ", but got " + repr((Token | Char | "end-of-file"))) } Number : Float | Int From b72ceb70356d53ca9a76ef2f1b876cc1c17cd299 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Sun, 27 Aug 2023 22:31:20 +0200 Subject: [PATCH 61/94] New parser.rs from tokay.tok, eliminating `expect`, `not` and `peek` --- src/compiler/ast.rs | 3 - src/compiler/iml/imlop.rs | 138 +--- src/compiler/iml/imlprogram.rs | 4 - src/compiler/parser.rs | 1400 ++++++++++++++++++++++---------- src/compiler/tokay.tok | 22 +- src/vm/op.rs | 29 +- 6 files changed, 1010 insertions(+), 586 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index e8c959b5..6fc96f18 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -1404,9 +1404,6 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { "pos" => res.into_positive(), "kle" => res.into_kleene(), "opt" => res.into_optional(), - "peek" => res.into_peek(), - "expect" => res.into_expect(None), - "not" => res.into_not(), _ => unreachable!(), } } diff --git a/src/compiler/iml/imlop.rs b/src/compiler/iml/imlop.rs index b2472221..4fcd4c05 100644 --- a/src/compiler/iml/imlop.rs +++ b/src/compiler/iml/imlop.rs @@ -52,22 +52,6 @@ pub(in crate::compiler) enum ImlOp { // v--- below variants are being replaced by Tokay generics as soon as they are implemented ---v // - // Expect (deprecated!) - Expect { - body: Box, - msg: Option, - }, - - // Not (deprecated!) - Not { - body: Box, - }, - - // Peek (deprecated!) - Peek { - body: Box, - }, - // Repeat (deprecated!) Repeat { body: Box, @@ -185,91 +169,6 @@ impl ImlOp { } } - /// Turns ImlOp construct into a peeked parser - pub fn into_peek(self) -> Self { - Self::Peek { - body: Box::new(self), - } - } - - /// Turns ImlOp construct into a negated parser - pub fn into_not(self) -> Self { - Self::Not { - body: Box::new(self), - } - } - - /// Turns ImlOp construct into an expecting parser - pub fn into_expect(self, mut msg: Option) -> Self { - // When no msg is provided, generate a message from the next consumables in range! - // This got a bit out of hand, and should be done later via something like a FIRST() attribute on parselet. - // Generally, this code becomes unnecessary when the Expect

generic is made available (see #10 for details) - if msg.is_none() { - fn get_expect(op: &ImlOp) -> Option { - match op { - ImlOp::Call { target, .. } | ImlOp::Load { target, .. } - if target.is_consuming() => - { - Some(format!("{}", target)) - } - ImlOp::Seq { seq, .. } => { - let mut txt = None; - - for item in seq { - item.walk(&mut |op| { - txt = get_expect(op); - !txt.is_some() - }); - - if txt.is_some() { - break; - } - } - - txt - } - ImlOp::Alt { alts, .. } => { - let mut all_txt = Vec::new(); - - for item in alts { - let mut txt = None; - - item.walk(&mut |op| { - txt = get_expect(op); - !txt.is_some() - }); - - if let Some(txt) = txt { - all_txt.push(txt); - } - } - - if all_txt.is_empty() { - None - } else { - Some(all_txt.join(" or ")) - } - } - _ => None, - } - } - - self.walk(&mut |op| { - msg = get_expect(op); - !msg.is_some() - }); - - if let Some(txt) = msg { - msg = Some(format!("Expecting {}", txt).to_string()) - } - } - - Self::Expect { - body: Box::new(self), - msg, - } - } - /// Compile ImlOp construct into Op instructions of the resulting Tokay VM program pub fn compile_to_vec( &self, @@ -448,38 +347,6 @@ impl ImlOp { } } // DEPRECATED BELOW!!! - ImlOp::Expect { body, msg } => { - let mut expect = Vec::new(); - body.compile(program, current, &mut expect); - - ops.push(Op::Frame(expect.len() + 2)); - - ops.extend(expect); - ops.extend(vec![ - Op::Forward(2), - Op::Error(Some(if let Some(msg) = msg { - msg.clone() - } else { - format!("Expecting {:?}", body) - })), - Op::Close, - ]); - } - ImlOp::Not { body } => { - let mut body_ops = Vec::new(); - let body_len = body.compile(program, current, &mut body_ops); - ops.push(Op::Frame(body_len + 3)); - ops.extend(body_ops); - ops.push(Op::Close); - ops.push(Op::Next); - ops.push(Op::Close); - } - ImlOp::Peek { body } => { - ops.push(Op::Frame(0)); - body.compile(program, current, ops); - ops.push(Op::Reset(true)); - ops.push(Op::Close); - } ImlOp::Repeat { body, min, max } => { let mut body_ops = Vec::new(); let body_len = body.compile(program, current, &mut body_ops); @@ -584,10 +451,7 @@ impl ImlOp { true } // DEPRECATED BELOW!!! - ImlOp::Expect { body, .. } - | ImlOp::Not { body } - | ImlOp::Peek { body } - | ImlOp::Repeat { body, .. } => body.walk(func), + ImlOp::Repeat { body, .. } => body.walk(func), _ => true, } diff --git a/src/compiler/iml/imlprogram.rs b/src/compiler/iml/imlprogram.rs index 4e67aab4..f512b1d5 100644 --- a/src/compiler/iml/imlprogram.rs +++ b/src/compiler/iml/imlprogram.rs @@ -268,10 +268,6 @@ impl ImlProgram { } // DEPRECATED BELOW!!! - ImlOp::Expect { body, .. } => finalize_op(body, current, visited, configs), - ImlOp::Not { body } | ImlOp::Peek { body } => { - finalize_op(body, current, visited, configs) - } ImlOp::Repeat { body, min, .. } => { if let Some(consumable) = finalize_op(body, current, visited, configs) { if *min == 0 { diff --git a/src/compiler/parser.rs b/src/compiler/parser.rs index 899728c3..f9b54429 100644 --- a/src/compiler/parser.rs +++ b/src/compiler/parser.rs @@ -169,33 +169,53 @@ impl Parser { "children" => (value!([ (value!([ - "emit" => "op_mod_peek", + "emit" => "value_generic", "children" => (value!([ - "emit" => "op_mod_not", - "children" => - (value!([ - "emit" => "value_token_ccl", - "children" => - (value!([ - "emit" => "ccl", - "children" => + (value!([ + "emit" => "identifier", + "value" => "Peek" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "value_generic", + "children" => + (value!([ (value!([ - (value!([ - "emit" => "range", - "value" => "AZ" - ])), - (value!([ - "emit" => "char", - "value" => "_" - ])), - (value!([ - "emit" => "range", - "value" => "az" - ])) + "emit" => "identifier", + "value" => "Not" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "value_token_ccl", + "children" => + (value!([ + "emit" => "ccl", + "children" => + (value!([ + (value!([ + "emit" => "range", + "value" => "AZ" + ])), + (value!([ + "emit" => "char", + "value" => "_" + ])), + (value!([ + "emit" => "range", + "value" => "az" + ])) + ])) + ])) + ])) ])) - ])) - ])) + ])) + ])) + ])) ])) ])), (value!([ @@ -279,29 +299,47 @@ impl Parser { ])) ])), (value!([ - "emit" => "sequence", + "emit" => "op_accept", "children" => (value!([ - (value!([ - "emit" => "op_mod_peek", - "children" => + "emit" => "value_generic", + "children" => + (value!([ (value!([ "emit" => "identifier", - "value" => "EOF" + "value" => "Peek" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "value_token_touch", + "value" => "}" + ])) ])) - ])), - (value!([ - "emit" => "op_accept", - "value" => "accept" - ])) + ])) ])) ])), (value!([ - "emit" => "op_mod_peek", + "emit" => "op_accept", "children" => (value!([ - "emit" => "value_token_touch", - "value" => "}" + "emit" => "value_generic", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "Peek" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "EOF" + ])) + ])) + ])) ])) ])) ])) @@ -1298,11 +1336,21 @@ impl Parser { ])) ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "value_token_touch", - "value" => "\"" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "value_token_touch", + "value" => "\"" + ])) + ])) ])) ])) ])) @@ -1434,11 +1482,21 @@ impl Parser { ])) ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "value_token_touch", - "value" => "'" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "value_token_touch", + "value" => "'" + ])) + ])) ])) ])) ])) @@ -2269,11 +2327,21 @@ impl Parser { "value" => "++" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "Lvalue" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "Lvalue" + ])) + ])) ])) ])), (value!([ @@ -2305,11 +2373,21 @@ impl Parser { "value" => "--" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "Lvalue" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "Lvalue" + ])) + ])) ])) ])), (value!([ @@ -2388,25 +2466,35 @@ impl Parser { ])) ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "call", - "children" => - (value!([ - (value!([ - "emit" => "identifier", - "value" => "Block" - ])), + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => (value!([ - "emit" => "callarg", + "emit" => "call", "children" => (value!([ - "emit" => "value_string", - "value" => "body" + (value!([ + "emit" => "identifier", + "value" => "Block" + ])), + (value!([ + "emit" => "callarg", + "children" => + (value!([ + "emit" => "value_string", + "value" => "body" + ])) + ])) ])) ])) - ])) + ])) ])) ])), (value!([ @@ -2475,11 +2563,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "Atomic" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "Atomic" + ])) + ])) ])) ])) ])) @@ -2572,11 +2670,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "value_token_touch", - "value" => ">" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "value_token_touch", + "value" => ">" + ])) + ])) ])) ])), (value!([ @@ -2631,15 +2739,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "op_mod_opt", - "children" => - (value!([ - "emit" => "identifier", - "value" => "Expression" - ])) + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "Expression" + ])) + ])) ])) ])) ])) @@ -2787,11 +2901,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "Atomic" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "Atomic" + ])) + ])) ])) ])), (value!([ @@ -2923,11 +3047,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "value_token_touch", - "value" => ">" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "value_token_touch", + "value" => ">" + ])) + ])) ])) ])), (value!([ @@ -2995,11 +3129,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "Expression" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "Expression" + ])) + ])) ])) ])), (value!([ @@ -3039,11 +3183,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "Expression" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "Expression" + ])) + ])) ])) ])), (value!([ @@ -3115,11 +3269,21 @@ impl Parser { "value" => "___" ])), (value!([ - "emit" => "op_mod_peek", + "emit" => "value_generic", "children" => (value!([ - "emit" => "value_token_touch", - "value" => ")" + (value!([ + "emit" => "identifier", + "value" => "Peek" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "value_token_touch", + "value" => ")" + ])) + ])) ])) ])), (value!([ @@ -3313,11 +3477,21 @@ impl Parser { "value" => "___" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "value_token_touch", - "value" => ")" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "value_token_touch", + "value" => ")" + ])) + ])) ])) ])), (value!([ @@ -3365,11 +3539,21 @@ impl Parser { "value" => "___" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "value_token_touch", - "value" => ")" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "value_token_touch", + "value" => ")" + ])) + ])) ])) ])) ])) @@ -3415,11 +3599,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "Expression" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "Expression" + ])) + ])) ])) ])), (value!([ @@ -3873,15 +4067,25 @@ impl Parser { "value" => "___" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "value_token_touch", - "value" => ")" - ])) - ])), - (value!([ - "emit" => "call", + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "value_token_touch", + "value" => ")" + ])) + ])) + ])) + ])), + (value!([ + "emit" => "call", "children" => (value!([ (value!([ @@ -4023,126 +4227,6 @@ impl Parser { (value!([ "emit" => "identifier", "value" => "TokenAtom" - ])), - (value!([ - "emit" => "sequence", - "children" => - (value!([ - (value!([ - "emit" => "value_token_touch", - "value" => "peek" - ])), - (value!([ - "emit" => "identifier", - "value" => "_standalone_" - ])), - (value!([ - "emit" => "op_mod_expect", - "children" => - (value!([ - "emit" => "value_token_self", - "value" => "Self" - ])) - ])), - (value!([ - "emit" => "call", - "children" => - (value!([ - (value!([ - "emit" => "identifier", - "value" => "ast" - ])), - (value!([ - "emit" => "callarg", - "children" => - (value!([ - "emit" => "value_string", - "value" => "op_mod_peek" - ])) - ])) - ])) - ])) - ])) - ])), - (value!([ - "emit" => "sequence", - "children" => - (value!([ - (value!([ - "emit" => "value_token_touch", - "value" => "not" - ])), - (value!([ - "emit" => "identifier", - "value" => "_standalone_" - ])), - (value!([ - "emit" => "op_mod_expect", - "children" => - (value!([ - "emit" => "value_token_self", - "value" => "Self" - ])) - ])), - (value!([ - "emit" => "call", - "children" => - (value!([ - (value!([ - "emit" => "identifier", - "value" => "ast" - ])), - (value!([ - "emit" => "callarg", - "children" => - (value!([ - "emit" => "value_string", - "value" => "op_mod_not" - ])) - ])) - ])) - ])) - ])) - ])), - (value!([ - "emit" => "sequence", - "children" => - (value!([ - (value!([ - "emit" => "value_token_touch", - "value" => "expect" - ])), - (value!([ - "emit" => "identifier", - "value" => "_standalone_" - ])), - (value!([ - "emit" => "op_mod_expect", - "children" => - (value!([ - "emit" => "value_token_self", - "value" => "Self" - ])) - ])), - (value!([ - "emit" => "call", - "children" => - (value!([ - (value!([ - "emit" => "identifier", - "value" => "ast" - ])), - (value!([ - "emit" => "callarg", - "children" => - (value!([ - "emit" => "value_string", - "value" => "op_mod_expect" - ])) - ])) - ])) - ])) - ])) ])) ])) ])) @@ -4431,11 +4515,21 @@ impl Parser { "value" => "_standalone_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "HoldExpression" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "HoldExpression" + ])) + ])) ])) ])), (value!([ @@ -4443,11 +4537,21 @@ impl Parser { "value" => "___" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "Statement" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "Statement" + ])) + ])) ])) ])), (value!([ @@ -4474,11 +4578,21 @@ impl Parser { "value" => "___" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "Statement" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "Statement" + ])) + ])) ])) ])) ])) @@ -4517,11 +4631,21 @@ impl Parser { "value" => "_standalone_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "Lvalue" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "Lvalue" + ])) + ])) ])) ])), (value!([ @@ -4529,11 +4653,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "value_token_touch", - "value" => "in" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "value_token_touch", + "value" => "in" + ])) + ])) ])) ])), (value!([ @@ -4541,11 +4675,21 @@ impl Parser { "value" => "_standalone_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "Expression" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "Expression" + ])) + ])) ])) ])), (value!([ @@ -4553,11 +4697,21 @@ impl Parser { "value" => "___" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "Statement" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "Statement" + ])) + ])) ])) ])), (value!([ @@ -4637,11 +4791,21 @@ impl Parser { "value" => "_standalone_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "Block" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "Block" + ])) + ])) ])) ])), (value!([ @@ -4717,11 +4881,21 @@ impl Parser { ])) ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "value_token_touch", - "value" => ")" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "value_token_touch", + "value" => ")" + ])) + ])) ])) ])), (value!([ @@ -4831,11 +5005,21 @@ impl Parser { "value" => "-" ])), (value!([ - "emit" => "op_mod_not", + "emit" => "value_generic", "children" => (value!([ - "emit" => "value_token_touch", - "value" => "-" + (value!([ + "emit" => "identifier", + "value" => "Not" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "value_token_touch", + "value" => "-" + ])) + ])) ])) ])), (value!([ @@ -4989,11 +5173,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "Unary" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "Unary" + ])) + ])) ])) ])), (value!([ @@ -5033,11 +5227,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "Unary" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "Unary" + ])) + ])) ])) ])), (value!([ @@ -5077,11 +5281,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "Unary" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "Unary" + ])) + ])) ])) ])), (value!([ @@ -5121,11 +5335,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "Unary" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "Unary" + ])) + ])) ])) ])), (value!([ @@ -5185,11 +5409,21 @@ impl Parser { "value" => "+" ])), (value!([ - "emit" => "op_mod_not", + "emit" => "value_generic", "children" => (value!([ - "emit" => "value_token_touch", - "value" => "+" + (value!([ + "emit" => "identifier", + "value" => "Not" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "value_token_touch", + "value" => "+" + ])) + ])) ])) ])), (value!([ @@ -5197,11 +5431,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "MulDiv" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "MulDiv" + ])) + ])) ])) ])), (value!([ @@ -5237,11 +5481,21 @@ impl Parser { "value" => "-" ])), (value!([ - "emit" => "op_mod_not", + "emit" => "value_generic", "children" => (value!([ - "emit" => "value_token_touch", - "value" => "-" + (value!([ + "emit" => "identifier", + "value" => "Not" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "value_token_touch", + "value" => "-" + ])) + ])) ])) ])), (value!([ @@ -5249,11 +5503,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "MulDiv" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "MulDiv" + ])) + ])) ])) ])), (value!([ @@ -5328,11 +5592,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "AddSub" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "AddSub" + ])) + ])) ])) ])), (value!([ @@ -5368,11 +5642,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "AddSub" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "AddSub" + ])) + ])) ])) ])), (value!([ @@ -5408,11 +5692,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "AddSub" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "AddSub" + ])) + ])) ])) ])), (value!([ @@ -5448,11 +5742,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "AddSub" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "AddSub" + ])) + ])) ])) ])), (value!([ @@ -5488,11 +5792,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "AddSub" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "AddSub" + ])) + ])) ])) ])), (value!([ @@ -5528,11 +5842,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "AddSub" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "AddSub" + ])) + ])) ])) ])), (value!([ @@ -5619,11 +5943,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "Comparison" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "Comparison" + ])) + ])) ])) ])), (value!([ @@ -5687,11 +6021,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "LogicalAnd" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "LogicalAnd" + ])) + ])) ])) ])), (value!([ @@ -5759,11 +6103,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "HoldExpression" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "HoldExpression" + ])) + ])) ])) ])), (value!([ @@ -5807,11 +6161,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "HoldExpression" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "HoldExpression" + ])) + ])) ])) ])), (value!([ @@ -5855,11 +6219,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "HoldExpression" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "HoldExpression" + ])) + ])) ])) ])), (value!([ @@ -5903,11 +6277,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "HoldExpression" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "HoldExpression" + ])) + ])) ])) ])), (value!([ @@ -5951,11 +6335,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "HoldExpression" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "HoldExpression" + ])) + ])) ])) ])), (value!([ @@ -5999,11 +6393,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "HoldExpression" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "HoldExpression" + ])) + ])) ])) ])), (value!([ @@ -6043,29 +6447,35 @@ impl Parser { "value" => "=" ])), (value!([ - "emit" => "op_mod_not", + "emit" => "value_generic", "children" => (value!([ - "emit" => "block", - "children" => - (value!([ - (value!([ - "emit" => "inline_sequence", - "children" => - (value!([ - "emit" => "value_token_touch", - "value" => ">" - ])) - ])), + (value!([ + "emit" => "identifier", + "value" => "Not" + ])), + (value!([ + "emit" => "genarg", + "children" => (value!([ - "emit" => "inline_sequence", + "emit" => "value_token_ccl", "children" => (value!([ - "emit" => "value_token_touch", - "value" => "=" + "emit" => "ccl", + "children" => + (value!([ + (value!([ + "emit" => "char", + "value" => ">" + ])), + (value!([ + "emit" => "char", + "value" => "=" + ])) + ])) ])) ])) - ])) + ])) ])) ])), (value!([ @@ -6073,11 +6483,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "HoldExpression" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "HoldExpression" + ])) + ])) ])) ])), (value!([ @@ -6145,11 +6565,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "HoldExpression" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "HoldExpression" + ])) + ])) ])) ])), (value!([ @@ -6193,11 +6623,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "HoldExpression" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "HoldExpression" + ])) + ])) ])) ])), (value!([ @@ -6241,11 +6681,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "HoldExpression" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "HoldExpression" + ])) + ])) ])) ])), (value!([ @@ -6289,11 +6739,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "HoldExpression" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "HoldExpression" + ])) + ])) ])) ])), (value!([ @@ -6337,11 +6797,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "HoldExpression" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "HoldExpression" + ])) + ])) ])) ])), (value!([ @@ -6385,11 +6855,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "HoldExpression" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "HoldExpression" + ])) + ])) ])) ])), (value!([ @@ -6429,29 +6909,35 @@ impl Parser { "value" => "=" ])), (value!([ - "emit" => "op_mod_not", + "emit" => "value_generic", "children" => (value!([ - "emit" => "block", - "children" => - (value!([ - (value!([ - "emit" => "inline_sequence", - "children" => - (value!([ - "emit" => "value_token_touch", - "value" => ">" - ])) - ])), + (value!([ + "emit" => "identifier", + "value" => "Not" + ])), + (value!([ + "emit" => "genarg", + "children" => (value!([ - "emit" => "inline_sequence", + "emit" => "value_token_ccl", "children" => (value!([ - "emit" => "value_token_touch", - "value" => "=" + "emit" => "ccl", + "children" => + (value!([ + (value!([ + "emit" => "char", + "value" => ">" + ])), + (value!([ + "emit" => "char", + "value" => "=" + ])) + ])) ])) ])) - ])) + ])) ])) ])), (value!([ @@ -6459,11 +6945,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "HoldExpression" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "HoldExpression" + ])) + ])) ])) ])), (value!([ @@ -6982,11 +7478,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "value_token_touch", - "value" => "}" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "value_token_touch", + "value" => "}" + ])) + ])) ])) ])), (value!([ @@ -7051,11 +7557,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "Expression" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "Expression" + ])) + ])) ])) ])), (value!([ @@ -7095,11 +7611,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "Expression" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "Expression" + ])) + ])) ])) ])), (value!([ @@ -7315,11 +7841,21 @@ impl Parser { "value" => "Sequences" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "T_EOL" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "T_EOL" + ])) + ])) ])) ])), (value!([ @@ -7359,11 +7895,21 @@ impl Parser { "value" => "Sequences" ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "T_EOL" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "T_EOL" + ])) + ])) ])) ])), (value!([ @@ -7423,11 +7969,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_peek", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "T_EOL" + (value!([ + "emit" => "identifier", + "value" => "Peek" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "T_EOL" + ])) + ])) ])) ])) ])) @@ -7445,11 +8001,21 @@ impl Parser { "value" => "_" ])), (value!([ - "emit" => "op_mod_peek", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "T_EOL" + (value!([ + "emit" => "identifier", + "value" => "Peek" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "T_EOL" + ])) + ])) ])) ])) ])) @@ -7461,11 +8027,21 @@ impl Parser { ])) ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "T_EOL" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "T_EOL" + ])) + ])) ])) ])), (value!([ @@ -7511,12 +8087,8 @@ impl Parser { "value" => "Sequences" ])), (value!([ - "emit" => "op_mod_expect", - "children" => - (value!([ - "emit" => "identifier", - "value" => "T_EOL" - ])) + "emit" => "identifier", + "value" => "T_EOL" ])) ])) ])), @@ -7648,11 +8220,21 @@ impl Parser { ])) ])), (value!([ - "emit" => "op_mod_expect", + "emit" => "value_generic", "children" => (value!([ - "emit" => "identifier", - "value" => "EOF" + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "EOF" + ])) + ])) ])) ])), (value!([ diff --git a/src/compiler/tokay.tok b/src/compiler/tokay.tok index e7f45805..443e57d6 100644 --- a/src/compiler/tokay.tok +++ b/src/compiler/tokay.tok @@ -15,7 +15,7 @@ ___ : (T_EOL _)* # optional line-breaks followed by whitespace _standalone_ : @{ # helper parselet to ensure that identifiers stand alone # fixme: When generic parselets are available, this can be replaced by a Keyword

invocation - peek not Char _ + Peek>> _ _ } @@ -204,7 +204,7 @@ InlineSequenceItem : @{ InlineSequence : @{ # Special case: Expression followed by "," is considered as a list with a single item (syntactic sugar) - Expression ___ ',' _ ___ peek ')' ast("list") + Expression ___ ',' _ ___ Peek<')'> ast("list") # A sequence is a list of items optionally separated by "," (InlineSequenceItem ___ (',' _)? ___)+ ast("inline_sequence") @@ -255,10 +255,6 @@ Token : @{ TokenAtom '*' ast("op_mod_kle") TokenAtom '?' ast("op_mod_opt") TokenAtom - # deprecated: - 'peek' _standalone_ Expect ast("op_mod_peek") - 'not' _standalone_ Expect ast("op_mod_not") - #'expect' _standalone_ Expect ast("op_mod_expect") } # Expression & Flow @@ -298,7 +294,7 @@ Rvalue : @{ } Unary : @{ - '-' not '-' _ Unary ast("op_unary_neg") + '-' Not<'-'> _ Unary ast("op_unary_neg") '!' _ Unary ast("op_unary_not") '*' _ Unary ast("op_deref") Rvalue _ @@ -313,8 +309,8 @@ MulDiv : @{ } AddSub : @{ - AddSub '+' not '+' _ Expect ast("op_binary_add") - AddSub '-' not '-' _ Expect ast("op_binary_sub") + AddSub '+' Not<'+'> _ Expect ast("op_binary_add") + AddSub '-' Not<'-'> _ Expect ast("op_binary_sub") MulDiv } @@ -347,7 +343,7 @@ HoldExpression : @{ Lvalue _ '/=' _ Expect ast("assign_div_hold") Lvalue _ '//=' _ Expect ast("assign_divi_hold") Lvalue _ '%=' _ Expect ast("assign_mod_hold") - Lvalue _ '=' not ('>' | '=') _ Expect ast("assign_hold") + Lvalue _ '=' Not=>> _ Expect ast("assign_hold") LogicalOr } @@ -358,7 +354,7 @@ Expression : @{ Lvalue _ '/=' _ Expect ast("assign_div") Lvalue _ '//=' _ Expect ast("assign_divi") Lvalue _ '%=' _ Expect ast("assign_mod") - Lvalue _ '=' not ('>' | '=') _ Expect ast("assign") + Lvalue _ '=' Not=>> _ Expect ast("assign") LogicalOr } @@ -404,8 +400,8 @@ Instruction : @{ 'begin' _standalone_ Sequences Expect ast("begin") 'end' _standalone_ Sequences Expect ast("end") T_Identifier _ ':' _ { - Literal _ peek T_EOL - Token _ peek T_EOL + Literal _ Peek + Token _ Peek Sequences } Expect ast("constant") Statement T_EOL diff --git a/src/vm/op.rs b/src/vm/op.rs index 2ee701d9..bc42118d 100644 --- a/src/vm/op.rs +++ b/src/vm/op.rs @@ -1,5 +1,4 @@ use super::*; -use crate::error::Error; use crate::reader::Offset; use crate::value; use crate::value::{Dict, List, Object, RefValue, Str, Value}; @@ -46,16 +45,15 @@ pub(crate) enum Op { Backward(usize), // Jump backward // Interrupts - Next, // Err(Reject::Next) - Push, // Ok(Accept::Push) - LoadPush, // Ok(Accept::Push) with value - Accept, // Ok(Accept::Return) - LoadAccept, // Ok(Accept::Return) with value - Repeat, // Ok(Accept::Repeat) - Reject, // Ok(Err::Reject) - LoadExit, // Exit with errorcode - Exit, // Exit with 0 - Error(Option), // Error with optional error message (otherwise its expected on stack) + Next, // Err(Reject::Next) + Push, // Ok(Accept::Push) + LoadPush, // Ok(Accept::Push) with value + Accept, // Ok(Accept::Return) + LoadAccept, // Ok(Accept::Return) with value + Repeat, // Ok(Accept::Repeat) + Reject, // Ok(Err::Reject) + LoadExit, // Exit with errorcode + Exit, // Exit with 0 // Call CallOrCopy, // Load and eventually call stack element without parameters @@ -375,15 +373,6 @@ impl Op { } Op::Exit => std::process::exit(0), - Op::Error(msg) => { - if let Some(msg) = msg { - Error::new(Some(context.frame.reader_start), msg.clone()).into() - } else { - Error::new(Some(context.frame.reader_start), context.pop().to_string()) - .into() - } - } - // Calls Op::CallOrCopy => { let value = context.pop(); From 5a592d860d5bf4a1074b74079740317b819eb0e3 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Mon, 4 Sep 2023 23:42:26 +0200 Subject: [PATCH 62/94] Add 'restrict'-mode to Compiler, improved reserved identifier detection --- src/compiler/ast.rs | 81 +++++++++++++++++++--------------------- src/compiler/compiler.rs | 2 + src/compiler/mod.rs | 2 +- src/compiler/prelude.rs | 2 + src/value/dict.rs | 5 ++- 5 files changed, 46 insertions(+), 46 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index 6fc96f18..b06c06b3 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -11,20 +11,14 @@ use crate::value::{Dict, List, Object, RefValue, Str, Token}; use crate::vm::*; use charclass::CharClass; -/// Checks whether identifier's name is the name of a reserved word. -pub fn identifier_is_valid(ident: &str) -> Result<(), Error> { - match ident { - "Char" | "Chars" | "accept" | "begin" | "break" | "continue" | "else" | "end" | "exit" - | "expect" | "false" | "for" | "if" | "in" | "loop" | "next" | "not" | "null" | "peek" - | "push" | "reject" | "repeat" | "reset" | "return" | "self" | "Self" | "true" | "void" => { - Err(Error::new( - None, - format!("Expected identifier, found reserved word '{}'", ident), - )) - } - _ => Ok(()), - } -} +pub static RESERVED_TOKENS: &[&'static str] = &[ + "Char", "Chars", "EOF", "Expect", "Not", "Kle", "Opt", "Peek", "Pos", "Repeat", "Self", "Void", +]; + +pub static RESERVED_KEYWORDS: &[&'static str] = &[ + "accept", "begin", "break", "continue", "else", "end", "exit", "false", "for", "if", "in", + "loop", "next", "null", "push", "reject", "repeat", "reset", "return", "self", "true", "void", +]; /// AST traversal entry pub(in crate::compiler) fn traverse(compiler: &mut Compiler, ast: &RefValue) -> ImlOp { @@ -537,13 +531,13 @@ fn traverse_node_lvalue(compiler: &mut Compiler, node: &Dict, store: bool, hold: } // Undefined name None => { - // Check if identifier is valid - if let Err(mut error) = identifier_is_valid(name) { - if let Some(offset) = traverse_node_offset(node) { - error.patch_offset(offset); - } + // Check if identifier is not a reserved word + if compiler.restrict && RESERVED_KEYWORDS.contains(&name) { + compiler.errors.push(Error::new( + traverse_node_offset(node), + format!("Expected identifier, found reserved word '{}'", name), + )); - compiler.errors.push(error); break 'load; } @@ -651,23 +645,19 @@ fn traverse_node_rvalue(compiler: &mut Compiler, node: &Dict, mode: Rvalue) -> I let offset = traverse_node_offset(node); - // Check if identifier is valid - return if let Err(mut error) = identifier_is_valid(name) { - if let Some(offset) = offset { - error.patch_offset(offset); - } + // Check if identifier is not a reserved word + if compiler.restrict && RESERVED_KEYWORDS.contains(&name) { + compiler.errors.push(Error::new( + offset, + format!("Expected identifier, found reserved word '{}'", name), + )); + } - compiler.errors.push(error); - ImlOp::Nop - } else { - match mode { - Rvalue::Load => ImlOp::load_by_name(compiler, offset, name.to_string()), - Rvalue::CallOrLoad => { - ImlOp::call_by_name(compiler, offset, name.to_string(), None) - } - Rvalue::Call(args, nargs) => { - ImlOp::call_by_name(compiler, offset, name.to_string(), Some((args, nargs))) - } + return match mode { + Rvalue::Load => ImlOp::load_by_name(compiler, offset, name.to_string()), + Rvalue::CallOrLoad => ImlOp::call_by_name(compiler, offset, name.to_string(), None), + Rvalue::Call(args, nargs) => { + ImlOp::call_by_name(compiler, offset, name.to_string(), Some((args, nargs))) } }; } @@ -1057,12 +1047,15 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { let ident = ident["value"].borrow(); let ident = ident.object::().unwrap().as_str(); - if let Err(mut error) = identifier_is_valid(ident) { - if let Some(offset) = traverse_node_offset(node) { - error.patch_offset(offset); - } + // Disallow assignment to any reserved identifier + if compiler.restrict + && (RESERVED_KEYWORDS.contains(&ident) || RESERVED_TOKENS.contains(&ident)) + { + compiler.errors.push(Error::new( + traverse_node_offset(node), + format!("Expected identifier, found reserved word '{}'", ident), + )); - compiler.errors.push(error); return ImlOp::Nop; } @@ -1363,6 +1356,7 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { { let target = target.borrow(); + // TODO: The Char-modifier-stuff needs the be refactored in a separate pull request. if let Some(Token::Char(ccl)) = target.object::() { match parts[2] { // mod_pos on Token::Char becomes Token::Chars @@ -1373,15 +1367,16 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { ImlValue::from(RefValue::from(Token::Chars(ccl.clone()))), None, ); + + // mod_kle on Token::Char becomes Token::Chars.into_optional() if parts[2] == "kle" { - // mod_kle on Token::Char becomes Token::Chars.into_optional() chars = chars.into_optional(); } return chars; } - // mod_not on Token::Char becomes negated Token::Char + // mod_not on Token::Char becomes a negated Token::Char "not" => { return ImlOp::call( compiler, diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index 107337dd..9f9e908d 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -46,6 +46,7 @@ won't be removed and can be accessed on later calls. pub struct Compiler { parser: Option, // Internal Tokay parser pub debug: u8, // Compiler debug mode + pub(super) restrict: bool, // Restrict assignment of reserved identifiers pub(super) statics: IndexSet, // Static values collected during compilation pub(super) scopes: Vec, // Current compilation scopes pub(super) usages: Vec, // Unresolved values @@ -68,6 +69,7 @@ impl Compiler { let mut compiler = Self { parser: None, debug: 0, + restrict: true, statics: IndexSet::new(), scopes: Vec::new(), usages: Vec::new(), diff --git a/src/compiler/mod.rs b/src/compiler/mod.rs index daa77c6e..a4e95bfc 100644 --- a/src/compiler/mod.rs +++ b/src/compiler/mod.rs @@ -10,5 +10,5 @@ use compiler::*; use iml::*; use parser::*; -pub(crate) use ast::identifier_is_valid; +pub(crate) use ast::{RESERVED_KEYWORDS, RESERVED_TOKENS}; pub use compiler::Compiler; diff --git a/src/compiler/prelude.rs b/src/compiler/prelude.rs index e9ef2f17..79b1776a 100644 --- a/src/compiler/prelude.rs +++ b/src/compiler/prelude.rs @@ -900,8 +900,10 @@ impl Compiler { /*ETARENEG*/ ; + self.restrict = false; self.compile_from_ast(&ast) .expect("prelude cannot be compiled!") .expect("prelude contains no main?"); + self.restrict = true; } } diff --git a/src/value/dict.rs b/src/value/dict.rs index 288701a5..70fe1503 100644 --- a/src/value/dict.rs +++ b/src/value/dict.rs @@ -41,9 +41,10 @@ impl Object for Dict { } if let Some(key) = key.object::() { - // todo: Put this into a utility function... + // check if identifier is allowed, otherwise put in "quotation marks" if !key.chars().all(|ch| ch.is_alphabetic() || ch == '_') - || crate::compiler::identifier_is_valid(key).is_err() + || crate::compiler::RESERVED_KEYWORDS.contains(&key.as_str()) + || crate::compiler::RESERVED_TOKENS.contains(&key.as_str()) { ret.push('"'); for ch in key.chars() { From afdec9b7ef63fee65dfd8eee3a8aabb2638373a5 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Wed, 20 Sep 2023 23:15:58 +0200 Subject: [PATCH 63/94] Improving implementation of Repeat - Playing with several Repeat implementations - Fixed `repeat` behavior in parselets reading beyond eof - `eof`-builtin to determine eof-state without a consumable - Improving debug by dumping reader state info --- src/_builtins.rs | 6 +- src/builtin/mod.rs | 4 + src/compiler/prelude.rs | 770 ++++++++++++++++++++-------------------- src/prelude.tok | 48 +-- src/reader.rs | 2 +- src/value/list.rs | 6 +- src/vm/context.rs | 7 +- src/vm/op.rs | 4 + 8 files changed, 426 insertions(+), 421 deletions(-) diff --git a/src/_builtins.rs b/src/_builtins.rs index 4a86f7e7..ad486dcf 100644 --- a/src/_builtins.rs +++ b/src/_builtins.rs @@ -6,7 +6,7 @@ use crate::builtin::Builtin; /*GENERATE cargo run -- _builtins.tok -- `find . -name "*.rs"` */ -pub static BUILTINS: [Builtin; 63] = [ +pub static BUILTINS: [Builtin; 64] = [ Builtin { name: "Float", func: crate::value::token::tokay_token_float, @@ -87,6 +87,10 @@ pub static BUILTINS: [Builtin; 63] = [ name: "dict_set_item", func: crate::value::dict::Dict::tokay_method_dict_set_item, }, + Builtin { + name: "eof", + func: crate::builtin::tokay_function_eof, + }, Builtin { name: "error", func: crate::error::tokay_function_error, diff --git a/src/builtin/mod.rs b/src/builtin/mod.rs index 25e72f8e..c2b484a6 100644 --- a/src/builtin/mod.rs +++ b/src/builtin/mod.rs @@ -212,3 +212,7 @@ tokay_function!("offset : @", { ]) .into() }); + +tokay_function!("eof : @", { + value!(context.unwrap().thread.reader.eof()).into() +}); diff --git a/src/compiler/prelude.rs b/src/compiler/prelude.rs index 79b1776a..d59f400c 100644 --- a/src/compiler/prelude.rs +++ b/src/compiler/prelude.rs @@ -23,7 +23,7 @@ impl Compiler { (value!([ (value!([ "emit" => "identifier", - "value" => "Repeat" + "value" => "Not" ])), (value!([ "emit" => "value_parselet", @@ -38,18 +38,94 @@ impl Compiler { ])) ])), (value!([ - "emit" => "arg", + "emit" => "body", "children" => (value!([ (value!([ - "emit" => "identifier", - "value" => "min" + "emit" => "sequence", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "P" + ])), + (value!([ + "emit" => "op_reject", + "value" => "reject" + ])) + ])) ])), (value!([ - "emit" => "value_integer", - "value" => 1 + "emit" => "identifier", + "value" => "Void" ])) ])) + ])) + ])) + ])) + ])) + ])), + (value!([ + "emit" => "constant", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "Peek" + ])), + (value!([ + "emit" => "value_parselet", + "children" => + (value!([ + (value!([ + "emit" => "gen", + "children" => + (value!([ + "emit" => "identifier", + "value" => "P" + ])) + ])), + (value!([ + "emit" => "body", + "children" => + (value!([ + "emit" => "sequence", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "P" + ])), + (value!([ + "emit" => "op_reset", + "value" => "reset" + ])) + ])) + ])) + ])) + ])) + ])) + ])) + ])), + (value!([ + "emit" => "constant", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "Expect" + ])), + (value!([ + "emit" => "value_parselet", + "children" => + (value!([ + (value!([ + "emit" => "gen", + "children" => + (value!([ + "emit" => "identifier", + "value" => "P" + ])) ])), (value!([ "emit" => "arg", @@ -57,7 +133,7 @@ impl Compiler { (value!([ (value!([ "emit" => "identifier", - "value" => "max" + "value" => "msg" ])), (value!([ "emit" => "value_void", @@ -70,192 +146,348 @@ impl Compiler { "children" => (value!([ (value!([ - "emit" => "begin", + "emit" => "op_accept", "children" => (value!([ - "emit" => "sequence", - "children" => - (value!([ - "emit" => "block", - "children" => - (value!([ - "emit" => "assign", - "children" => - (value!([ - (value!([ - "emit" => "lvalue", - "children" => - (value!([ - "emit" => "identifier", - "value" => "res" - ])) - ])), - (value!([ - "emit" => "call", - "children" => - (value!([ - "emit" => "identifier", - "value" => "list" - ])) - ])) - ])) - ])) - ])) + "emit" => "identifier", + "value" => "P" ])) ])), (value!([ - "emit" => "sequence", + "emit" => "call", "children" => (value!([ (value!([ "emit" => "identifier", - "value" => "P" + "value" => "error" ])), (value!([ - "emit" => "block", + "emit" => "callarg", "children" => (value!([ - (value!([ - "emit" => "assign_add", - "children" => + "emit" => "op_logical_or", + "children" => + (value!([ (value!([ - (value!([ - "emit" => "lvalue", - "children" => - (value!([ - "emit" => "identifier", - "value" => "res" - ])) - ])), - (value!([ - "emit" => "capture_index", - "children" => - (value!([ - "emit" => "value_integer", - "value" => 1 - ])) - ])) - ])) - ])), - (value!([ - "emit" => "op_if", - "children" => + "emit" => "identifier", + "value" => "msg" + ])), (value!([ - (value!([ - "emit" => "op_logical_or", - "children" => + "emit" => "op_binary_add", + "children" => + (value!([ (value!([ - (value!([ - "emit" => "op_unary_not", - "children" => + "emit" => "op_binary_add", + "children" => + (value!([ (value!([ - "emit" => "identifier", - "value" => "max" + "emit" => "op_binary_add", + "children" => + (value!([ + (value!([ + "emit" => "value_string", + "value" => "Expecting " + ])), + (value!([ + "emit" => "op_deref", + "children" => + (value!([ + "emit" => "identifier", + "value" => "P" + ])) + ])) + ])) + ])), + (value!([ + "emit" => "value_string", + "value" => ", but got " ])) - ])), - (value!([ - "emit" => "comparison", - "children" => + ])) + ])), + (value!([ + "emit" => "call", + "children" => + (value!([ (value!([ - (value!([ - "emit" => "rvalue", - "children" => - (value!([ - (value!([ - "emit" => "identifier", - "value" => "res" - ])), + "emit" => "identifier", + "value" => "repr" + ])), + (value!([ + "emit" => "callarg", + "children" => + (value!([ + "emit" => "block", + "children" => (value!([ - "emit" => "attribute", - "children" => - (value!([ - "emit" => "value_string", - "value" => "len" - ])) + (value!([ + "emit" => "inline_sequence", + "children" => + (value!([ + "emit" => "identifier", + "value" => "Token" + ])) + ])), + (value!([ + "emit" => "inline_sequence", + "children" => + (value!([ + "emit" => "value_token_any", + "value" => "Char" + ])) + ])), + (value!([ + "emit" => "inline_sequence", + "children" => + (value!([ + "emit" => "value_string", + "value" => "end-of-file" + ])) + ])) ])) - ])) - ])), - (value!([ - "emit" => "cmp_lt", - "children" => - (value!([ - "emit" => "identifier", - "value" => "max" - ])) - ])) + ])) ])) - ])) + ])) ])) - ])), - (value!([ - "emit" => "op_repeat", - "value" => "repeat" - ])) + ])) ])) - ])) + ])) ])) ])) ])) + ])) + ])) + ])) + ])) + ])) + ])) + ])), + (value!([ + "emit" => "constant", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "Repeat" + ])), + (value!([ + "emit" => "value_parselet", + "children" => + (value!([ + (value!([ + "emit" => "gen", + "children" => + (value!([ + "emit" => "identifier", + "value" => "P" + ])) + ])), + (value!([ + "emit" => "gen", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "min" ])), (value!([ - "emit" => "op_if", + "emit" => "value_integer", + "value" => 1 + ])) + ])) + ])), + (value!([ + "emit" => "gen", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "max" + ])), + (value!([ + "emit" => "value_void", + "value" => "void" + ])) + ])) + ])), + (value!([ + "emit" => "body", + "children" => + (value!([ + (value!([ + "emit" => "assign", "children" => (value!([ (value!([ - "emit" => "op_logical_or", + "emit" => "lvalue", "children" => (value!([ - (value!([ - "emit" => "op_unary_not", - "children" => + "emit" => "identifier", + "value" => "res" + ])) + ])), + (value!([ + "emit" => "call", + "children" => + (value!([ + "emit" => "identifier", + "value" => "list" + ])) + ])) + ])) + ])), + (value!([ + "emit" => "op_loop", + "children" => + (value!([ + "emit" => "block", + "children" => + (value!([ + (value!([ + "emit" => "sequence", + "children" => + (value!([ (value!([ "emit" => "identifier", - "value" => "res" - ])) - ])), - (value!([ - "emit" => "comparison", - "children" => + "value" => "P" + ])), (value!([ - (value!([ - "emit" => "rvalue", - "children" => + "emit" => "block", + "children" => + (value!([ (value!([ - (value!([ - "emit" => "identifier", - "value" => "res" - ])), - (value!([ - "emit" => "attribute", - "children" => + "emit" => "assign_add", + "children" => + (value!([ (value!([ - "emit" => "value_string", - "value" => "len" + "emit" => "lvalue", + "children" => + (value!([ + "emit" => "identifier", + "value" => "res" + ])) + ])), + (value!([ + "emit" => "capture_index", + "children" => + (value!([ + "emit" => "value_integer", + "value" => 1 + ])) ])) - ])) + ])) + ])), + (value!([ + "emit" => "op_if", + "children" => + (value!([ + (value!([ + "emit" => "op_logical_and", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "max" + ])), + (value!([ + "emit" => "comparison", + "children" => + (value!([ + (value!([ + "emit" => "rvalue", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "res" + ])), + (value!([ + "emit" => "attribute", + "children" => + (value!([ + "emit" => "value_string", + "value" => "len" + ])) + ])) + ])) + ])), + (value!([ + "emit" => "cmp_eq", + "children" => + (value!([ + "emit" => "identifier", + "value" => "max" + ])) + ])) + ])) + ])) + ])) + ])), + (value!([ + "emit" => "op_accept", + "children" => + (value!([ + "emit" => "identifier", + "value" => "res" + ])) + ])) + ])) ])) - ])), - (value!([ - "emit" => "cmp_lt", - "children" => + ])) + ])) + ])) + ])), + (value!([ + "emit" => "op_if", + "children" => + (value!([ + (value!([ + "emit" => "comparison", + "children" => + (value!([ (value!([ - "emit" => "identifier", - "value" => "min" + "emit" => "rvalue", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "res" + ])), + (value!([ + "emit" => "attribute", + "children" => + (value!([ + "emit" => "value_string", + "value" => "len" + ])) + ])) + ])) + ])), + (value!([ + "emit" => "cmp_lt", + "children" => + (value!([ + "emit" => "identifier", + "value" => "min" + ])) ])) - ])) + ])) + ])), + (value!([ + "emit" => "op_reject", + "value" => "reject" ])) - ])) + ])) + ])), + (value!([ + "emit" => "op_break", + "children" => + (value!([ + "emit" => "identifier", + "value" => "res" + ])) ])) - ])), - (value!([ - "emit" => "op_reject", - "value" => "reject" - ])) + ])) ])) - ])), - (value!([ - "emit" => "identifier", - "value" => "res" ])) ])) ])) @@ -579,250 +811,6 @@ impl Compiler { ])) ])) ])), - (value!([ - "emit" => "constant", - "children" => - (value!([ - (value!([ - "emit" => "identifier", - "value" => "Not" - ])), - (value!([ - "emit" => "value_parselet", - "children" => - (value!([ - (value!([ - "emit" => "gen", - "children" => - (value!([ - "emit" => "identifier", - "value" => "P" - ])) - ])), - (value!([ - "emit" => "body", - "children" => - (value!([ - (value!([ - "emit" => "sequence", - "children" => - (value!([ - (value!([ - "emit" => "identifier", - "value" => "P" - ])), - (value!([ - "emit" => "op_reject", - "value" => "reject" - ])) - ])) - ])), - (value!([ - "emit" => "identifier", - "value" => "Void" - ])) - ])) - ])) - ])) - ])) - ])) - ])), - (value!([ - "emit" => "constant", - "children" => - (value!([ - (value!([ - "emit" => "identifier", - "value" => "Peek" - ])), - (value!([ - "emit" => "value_parselet", - "children" => - (value!([ - (value!([ - "emit" => "gen", - "children" => - (value!([ - "emit" => "identifier", - "value" => "P" - ])) - ])), - (value!([ - "emit" => "body", - "children" => - (value!([ - "emit" => "sequence", - "children" => - (value!([ - (value!([ - "emit" => "identifier", - "value" => "P" - ])), - (value!([ - "emit" => "op_reset", - "value" => "reset" - ])) - ])) - ])) - ])) - ])) - ])) - ])) - ])), - (value!([ - "emit" => "constant", - "children" => - (value!([ - (value!([ - "emit" => "identifier", - "value" => "Expect" - ])), - (value!([ - "emit" => "value_parselet", - "children" => - (value!([ - (value!([ - "emit" => "gen", - "children" => - (value!([ - "emit" => "identifier", - "value" => "P" - ])) - ])), - (value!([ - "emit" => "arg", - "children" => - (value!([ - (value!([ - "emit" => "identifier", - "value" => "msg" - ])), - (value!([ - "emit" => "value_void", - "value" => "void" - ])) - ])) - ])), - (value!([ - "emit" => "body", - "children" => - (value!([ - (value!([ - "emit" => "op_accept", - "children" => - (value!([ - "emit" => "identifier", - "value" => "P" - ])) - ])), - (value!([ - "emit" => "call", - "children" => - (value!([ - (value!([ - "emit" => "identifier", - "value" => "error" - ])), - (value!([ - "emit" => "callarg", - "children" => - (value!([ - "emit" => "op_logical_or", - "children" => - (value!([ - (value!([ - "emit" => "identifier", - "value" => "msg" - ])), - (value!([ - "emit" => "op_binary_add", - "children" => - (value!([ - (value!([ - "emit" => "op_binary_add", - "children" => - (value!([ - (value!([ - "emit" => "op_binary_add", - "children" => - (value!([ - (value!([ - "emit" => "value_string", - "value" => "Expecting " - ])), - (value!([ - "emit" => "op_deref", - "children" => - (value!([ - "emit" => "identifier", - "value" => "P" - ])) - ])) - ])) - ])), - (value!([ - "emit" => "value_string", - "value" => ", but got " - ])) - ])) - ])), - (value!([ - "emit" => "call", - "children" => - (value!([ - (value!([ - "emit" => "identifier", - "value" => "repr" - ])), - (value!([ - "emit" => "callarg", - "children" => - (value!([ - "emit" => "block", - "children" => - (value!([ - (value!([ - "emit" => "inline_sequence", - "children" => - (value!([ - "emit" => "identifier", - "value" => "Token" - ])) - ])), - (value!([ - "emit" => "inline_sequence", - "children" => - (value!([ - "emit" => "value_token_any", - "value" => "Char" - ])) - ])), - (value!([ - "emit" => "inline_sequence", - "children" => - (value!([ - "emit" => "value_string", - "value" => "end-of-file" - ])) - ])) - ])) - ])) - ])) - ])) - ])) - ])) - ])) - ])) - ])) - ])) - ])) - ])) - ])) - ])) - ])) - ])) - ])) - ])), (value!([ "emit" => "constant", "children" => diff --git a/src/prelude.tok b/src/prelude.tok index 9cb89bdd..b1d9ace7 100644 --- a/src/prelude.tok +++ b/src/prelude.tok @@ -1,29 +1,5 @@ # Tokay default prelude -Repeat : @

min=1, max=void { - begin { - res = list() - } - - P { - res += $1 - if !max || res.len < max repeat - } - - if !res || res.len < min reject - res -} - -Pos : @

{ Repeat

} -Kle : @

{ Repeat

(min=0) } -Opt : @

{ P | Void } - -List : @ { - Self Separator P $1 + $3 - if empty (Self Separator) # allows for trailing Separator - P ($1, ) -} - Not : @

{ P reject Void @@ -38,5 +14,29 @@ Expect : @

msg=void { error(msg || "Expecting " + *P + ", but got " + repr((Token | Char | "end-of-file"))) } +Repeat : @ { + res = list() + + loop { + P { + res += $1 + if max && res.len == max return res + } + + if res.len < min reject + break res + } +} + +Pos : @

{ Repeat

} +Kle : @

{ Repeat

(min=0) } +Opt : @

{ P | Void } + +List : @ { + Self Separator P $1 + $3 + if empty (Self Separator) # allows for trailing Separator + P ($1, ) +} + Number : Float | Int Token : Word | Number | AsciiPunctuation diff --git a/src/reader.rs b/src/reader.rs index 68fbcc42..79f8ed6f 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -22,7 +22,7 @@ pub struct Reader { peeked: char, // Currently peeked char offset: Offset, // Current offset start: Offset, // Offset of last commit - eof: bool, // EOF marker + pub eof: bool, // EOF marker } impl Reader { diff --git a/src/value/list.rs b/src/value/list.rs index da9ca8a7..9ae8ee95 100644 --- a/src/value/list.rs +++ b/src/value/list.rs @@ -161,7 +161,7 @@ impl List { tokay_method!("list_iadd : @list, append", { // Don't append void if append.is_void() { - return Ok(list) + return Ok(list); } // In case list is not a list, make it a list. @@ -199,7 +199,7 @@ impl List { tokay_method!("list_add : @list, append", { // Don't append void if append.is_void() { - return Ok(list) + return Ok(list); } // In case list is not a list, make it a list. @@ -227,7 +227,7 @@ impl List { tokay_method!("list_push : @list, item, index=void", { // Don't push void if item.is_void() { - return Ok(list) + return Ok(list); } // In case list is not a list, make it a list. diff --git a/src/vm/context.rs b/src/vm/context.rs index 5c9e4443..c46181f7 100644 --- a/src/vm/context.rs +++ b/src/vm/context.rs @@ -509,7 +509,12 @@ impl<'program, 'reader, 'thread, 'parselet> Context<'program, 'reader, 'thread, Err(Reject::Skip) => {} Ok(Accept::Next) => break ret, Ok(Accept::Push(capture)) => break capture, - Ok(Accept::Repeat) => {} + Ok(Accept::Repeat) => { + // break on eof + if self.thread.reader.eof { + break ret; + } + } Ok(accept) => return Ok(accept.into_push(self.parselet.severity)), Err(Reject::Next) if !first => break Capture::Empty, other => return other, diff --git a/src/vm/op.rs b/src/vm/op.rs index bc42118d..9a0af614 100644 --- a/src/vm/op.rs +++ b/src/vm/op.rs @@ -160,6 +160,10 @@ impl Op { // Dump stack and frames if context.debug > 4 { + context.log("--- Reader ---"); + context.log(&format!(" offset={:?}", context.thread.reader.tell())); + context.log(&format!(" eof={:?}", context.thread.reader.eof)); + context.log("--- Stack ---"); for i in 0..context.stack.len() { context.log(&format!(" {:03} {:?}", i, context.stack[i])); From 84e476de44b45882969f817db55491b42d0023b5 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Wed, 20 Sep 2023 23:36:58 +0200 Subject: [PATCH 64/94] Accept `void` as primitive for value!()-macro calls --- src/compiler/ast.rs | 2 +- src/compiler/prelude.rs | 8 ++------ src/prelude.tok | 2 +- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index b06c06b3..e1390578 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -1793,7 +1793,7 @@ tokay_function!("ast2rust : @ast, level=0", { ); } else { assert!( - ["str", "int", "float", "bool"].contains(&value.name()), + ["str", "int", "float", "bool", "void"].contains(&value.name()), "No matching Rust primitive for {} found", value.name() ); diff --git a/src/compiler/prelude.rs b/src/compiler/prelude.rs index d59f400c..c50959e0 100644 --- a/src/compiler/prelude.rs +++ b/src/compiler/prelude.rs @@ -326,12 +326,8 @@ impl Compiler { ])) ])), (value!([ - "emit" => "call", - "children" => - (value!([ - "emit" => "identifier", - "value" => "list" - ])) + "emit" => "list", + "value" => void ])) ])) ])), diff --git a/src/prelude.tok b/src/prelude.tok index b1d9ace7..099c9d74 100644 --- a/src/prelude.tok +++ b/src/prelude.tok @@ -15,7 +15,7 @@ Expect : @

msg=void { } Repeat : @ { - res = list() + res = () loop { P { From 9e9be3dee4eb3e6f938aaaf840982d387bb31723 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Thu, 21 Sep 2023 15:25:05 +0200 Subject: [PATCH 65/94] Fixing preluded Kle --- src/compiler/prelude.rs | 24 +++++++++--------------- src/prelude.tok | 2 +- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/src/compiler/prelude.rs b/src/compiler/prelude.rs index c50959e0..b327ff7d 100644 --- a/src/compiler/prelude.rs +++ b/src/compiler/prelude.rs @@ -561,29 +561,23 @@ impl Compiler { "emit" => "body", "children" => (value!([ - "emit" => "call", + "emit" => "value_generic", "children" => (value!([ (value!([ - "emit" => "value_generic", + "emit" => "identifier", + "value" => "Repeat" + ])), + (value!([ + "emit" => "genarg", "children" => (value!([ - (value!([ - "emit" => "identifier", - "value" => "Repeat" - ])), - (value!([ - "emit" => "genarg", - "children" => - (value!([ - "emit" => "identifier", - "value" => "P" - ])) - ])) + "emit" => "identifier", + "value" => "P" ])) ])), (value!([ - "emit" => "callarg_named", + "emit" => "genarg_named", "children" => (value!([ (value!([ diff --git a/src/prelude.tok b/src/prelude.tok index 099c9d74..6175f518 100644 --- a/src/prelude.tok +++ b/src/prelude.tok @@ -29,7 +29,7 @@ Repeat : @ { } Pos : @

{ Repeat

} -Kle : @

{ Repeat

(min=0) } +Kle : @

{ Repeat } Opt : @

{ P | Void } List : @ { From 718936aaf9bd1662bb1f72e8fed0f35fd9f95ab5 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Thu, 21 Sep 2023 16:04:50 +0200 Subject: [PATCH 66/94] First version that uses generics for Pos/Kle/Opt --- src/compiler/ast.rs | 21 ++++++++++++++------- src/compiler/compiler.rs | 33 +++------------------------------ src/compiler/iml/imlop.rs | 27 --------------------------- src/compiler/iml/imlvalue.rs | 27 +++++++++++++++++++++++++++ 4 files changed, 44 insertions(+), 64 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index e1390578..548c57aa 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -1324,8 +1324,9 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { let children = node["children"].borrow(); let children = children.object::().unwrap(); - let res = traverse_node_rvalue(compiler, children, Rvalue::CallOrLoad); + let res = traverse_node_static(compiler, None, children); + /* if !res.is_consuming() { compiler.errors.push(Error::new( traverse_node_offset(node), @@ -1394,12 +1395,18 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { // Push operation position here ops.push(traverse_offset(node)); - - match parts[2] { - "pos" => res.into_positive(), - "kle" => res.into_kleene(), - "opt" => res.into_optional(), - _ => unreachable!(), + */ + + ImlOp::Call { + offset: None, + target: match parts[2] { + "pos" => res.into_positive(), + "kle" => res.into_kleene(), + "opt" => res.into_optional(), + _ => unreachable!(), + } + .try_resolve(compiler), + args: None, } } diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index 9f9e908d..f544a1bd 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -435,39 +435,12 @@ impl Compiler { let mut secondary = None; if name == "_" || name == "__" { - self.parselet_push(); - - // becomes `Value+` - let value_pos = ImlOp::call(self, None, value, None).into_positive(); - - value = self.parselet_pop( - None, - Some("__".to_string()), - Some(0), // Zero severity - None, - None, - value_pos, - ); - - // Remind "__" as new constant + // `__` becomes `Value+` + value = value.into_positive().try_resolve(self); secondary = Some(("__", value.clone())); // ...and then in-place "_" is defined as `_ : __?` - self.parselet_push(); - - // becomes `Value?` - let value_opt = ImlOp::call(self, None, value, None).into_optional(); - - value = self.parselet_pop( - None, - Some(name.to_string()), - Some(0), // Zero severity - None, - None, - value_opt, - ); - - // Insert "_" afterwards + value = value.into_optional().try_resolve(self); } // Insert constant into next constant-holding scope diff --git a/src/compiler/iml/imlop.rs b/src/compiler/iml/imlop.rs index 4fcd4c05..934f1959 100644 --- a/src/compiler/iml/imlop.rs +++ b/src/compiler/iml/imlop.rs @@ -142,33 +142,6 @@ impl ImlOp { } } - /// Turns ImlOp construct into a kleene (none-or-many) occurence. - pub fn into_kleene(self) -> Self { - Self::Repeat { - body: Box::new(self), - min: 0, - max: 0, - } - } - - /// Turns ImlOp construct into a positive (one-or-many) occurence. - pub fn into_positive(self) -> Self { - Self::Repeat { - body: Box::new(self), - min: 1, - max: 0, - } - } - - /// Turns ImlOp construct into an optional (none-or-one) occurence. - pub fn into_optional(self) -> Self { - Self::Repeat { - body: Box::new(self), - min: 0, - max: 1, - } - } - /// Compile ImlOp construct into Op instructions of the resulting Tokay VM program pub fn compile_to_vec( &self, diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index 9bf3b902..94326afb 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -53,6 +53,33 @@ pub(in crate::compiler) enum ImlValue { } impl ImlValue { + fn into_generic(self, name: &str) -> Self { + Self::Instance { + offset: None, + target: Box::new(ImlValue::Name { + offset: None, + name: name.to_string(), + }), + args: vec![(None, self)], + nargs: IndexMap::new(), + } + } + + /// Turns ImlValue into a Kle (none-or-many) occurence. + pub fn into_kleene(self) -> Self { + self.into_generic("Kle") + } + + /// Turns ImlValue into a Pos (one-or-many) occurence. + pub fn into_positive(self) -> Self { + self.into_generic("Pos") + } + + /// Turns ImlOp construct into an optional (none-or-one) occurence. + pub fn into_optional(self) -> Self { + self.into_generic("Opt") + } + /// Try to resolve immediatelly, otherwise push shared reference to compiler's unresolved ImlValue. pub fn try_resolve(mut self, compiler: &mut Compiler) -> Self { if self.resolve(compiler) { From 43ff0aa6d91f308eda7f0de94569eaa6c1c4dde1 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Thu, 21 Sep 2023 22:21:26 +0200 Subject: [PATCH 67/94] ImlValue::Instance with optional severity --- src/compiler/ast.rs | 7 ++++--- src/compiler/compiler.rs | 4 ++-- src/compiler/iml/imlvalue.rs | 22 +++++----------------- 3 files changed, 11 insertions(+), 22 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index 548c57aa..85e80037 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -372,6 +372,7 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { args, nargs, offset: traverse_node_offset(node), + severity: None, }; ret.try_resolve(compiler) @@ -1400,9 +1401,9 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { ImlOp::Call { offset: None, target: match parts[2] { - "pos" => res.into_positive(), - "kle" => res.into_kleene(), - "opt" => res.into_optional(), + "pos" => res.into_generic("Pos", None), + "kle" => res.into_generic("Kle", None), + "opt" => res.into_generic("Opt", None), _ => unreachable!(), } .try_resolve(compiler), diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index f544a1bd..5b8d9dae 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -436,11 +436,11 @@ impl Compiler { if name == "_" || name == "__" { // `__` becomes `Value+` - value = value.into_positive().try_resolve(self); + value = value.into_generic("Pos", Some(0)).try_resolve(self); secondary = Some(("__", value.clone())); // ...and then in-place "_" is defined as `_ : __?` - value = value.into_optional().try_resolve(self); + value = value.into_generic("Opt", Some(0)).try_resolve(self); } // Insert constant into next constant-holding scope diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index 94326afb..b993a3ad 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -49,11 +49,12 @@ pub(in crate::compiler) enum ImlValue { target: Box, // Instance target args: Vec<(Option, ImlValue)>, // Sequential generic args nargs: IndexMap, ImlValue)>, // Named generic args + severity: Option, // optional desired severity }, } impl ImlValue { - fn into_generic(self, name: &str) -> Self { + pub fn into_generic(self, name: &str, severity: Option) -> Self { Self::Instance { offset: None, target: Box::new(ImlValue::Name { @@ -62,24 +63,10 @@ impl ImlValue { }), args: vec![(None, self)], nargs: IndexMap::new(), + severity, } } - /// Turns ImlValue into a Kle (none-or-many) occurence. - pub fn into_kleene(self) -> Self { - self.into_generic("Kle") - } - - /// Turns ImlValue into a Pos (one-or-many) occurence. - pub fn into_positive(self) -> Self { - self.into_generic("Pos") - } - - /// Turns ImlOp construct into an optional (none-or-one) occurence. - pub fn into_optional(self) -> Self { - self.into_generic("Opt") - } - /// Try to resolve immediatelly, otherwise push shared reference to compiler's unresolved ImlValue. pub fn try_resolve(mut self, compiler: &mut Compiler) -> Self { if self.resolve(compiler) { @@ -101,6 +88,7 @@ impl ImlValue { target, args, nargs, + severity, } => { let mut is_resolved = target.resolve(compiler); @@ -210,7 +198,7 @@ impl ImlValue { constants, offset: parselet.offset.clone(), name: parselet.name.clone(), - severity: parselet.severity, + severity: severity.unwrap_or(parselet.severity), })) } target => { From d6731d5223df87ff95e691eb8baaa0beb240da4e Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Fri, 22 Sep 2023 23:53:48 +0200 Subject: [PATCH 68/94] Re-enable Char-modifier-optimization --- src/compiler/ast.rs | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index 85e80037..bae20004 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -1349,12 +1349,10 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { } else { compiler.parselet_mark_consuming(); } + */ // Modifiers on usages of Token::Char can be optimized for better efficiency - if let ImlOp::Call { - target: ImlValue::Value(target), - .. - } = &res + if let ImlValue::Value(target) = &res { let target = target.borrow(); @@ -1363,31 +1361,29 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { match parts[2] { // mod_pos on Token::Char becomes Token::Chars "pos" | "kle" => { - let mut chars = ImlOp::call( - compiler, - traverse_node_offset(node), - ImlValue::from(RefValue::from(Token::Chars(ccl.clone()))), - None, - ); + let mut chars = ImlValue::from(RefValue::from(Token::Chars(ccl.clone()))); - // mod_kle on Token::Char becomes Token::Chars.into_optional() + // mod_kle on Token::Char becomes optional Token::Chars if parts[2] == "kle" { - chars = chars.into_optional(); + chars = chars.into_generic("Opt", None).try_resolve(compiler); } - return chars; + return ImlOp::Call{ + offset: traverse_node_offset(node), + target: chars, + args: None, + }; } // mod_not on Token::Char becomes a negated Token::Char "not" => { - return ImlOp::call( - compiler, - traverse_node_offset(node), - ImlValue::from(RefValue::from(Token::Char( + return ImlOp::Call{ + offset: traverse_node_offset(node), + target: ImlValue::from(RefValue::from(Token::Char( ccl.clone().negate(), ))), - None, - ); + args: None, + }; } _ => {} } @@ -1396,7 +1392,6 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { // Push operation position here ops.push(traverse_offset(node)); - */ ImlOp::Call { offset: None, From e7bb163c1c1dbb34ef929b40eb0416a1d607699e Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Sat, 23 Sep 2023 10:54:24 +0200 Subject: [PATCH 69/94] Further improving on preluded Kle --- src/compiler/ast.rs | 13 ++++++------ src/compiler/prelude.rs | 44 +++++++++++++++++++++++++---------------- src/prelude.tok | 2 +- 3 files changed, 35 insertions(+), 24 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index bae20004..2a6dbd75 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -1352,8 +1352,7 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { */ // Modifiers on usages of Token::Char can be optimized for better efficiency - if let ImlValue::Value(target) = &res - { + if let ImlValue::Value(target) = &res { let target = target.borrow(); // TODO: The Char-modifier-stuff needs the be refactored in a separate pull request. @@ -1361,14 +1360,16 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { match parts[2] { // mod_pos on Token::Char becomes Token::Chars "pos" | "kle" => { - let mut chars = ImlValue::from(RefValue::from(Token::Chars(ccl.clone()))); + let mut chars = + ImlValue::from(RefValue::from(Token::Chars(ccl.clone()))); // mod_kle on Token::Char becomes optional Token::Chars if parts[2] == "kle" { - chars = chars.into_generic("Opt", None).try_resolve(compiler); + chars = + chars.into_generic("Opt", None).try_resolve(compiler); } - return ImlOp::Call{ + return ImlOp::Call { offset: traverse_node_offset(node), target: chars, args: None, @@ -1377,7 +1378,7 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { // mod_not on Token::Char becomes a negated Token::Char "not" => { - return ImlOp::Call{ + return ImlOp::Call { offset: traverse_node_offset(node), target: ImlValue::from(RefValue::from(Token::Char( ccl.clone().negate(), diff --git a/src/compiler/prelude.rs b/src/compiler/prelude.rs index b327ff7d..3b6a2dce 100644 --- a/src/compiler/prelude.rs +++ b/src/compiler/prelude.rs @@ -561,34 +561,44 @@ impl Compiler { "emit" => "body", "children" => (value!([ - "emit" => "value_generic", + "emit" => "op_logical_or", "children" => (value!([ (value!([ - "emit" => "identifier", - "value" => "Repeat" - ])), - (value!([ - "emit" => "genarg", - "children" => - (value!([ - "emit" => "identifier", - "value" => "P" - ])) - ])), - (value!([ - "emit" => "genarg_named", + "emit" => "value_generic", "children" => (value!([ (value!([ "emit" => "identifier", - "value" => "min" + "value" => "Repeat" + ])), + (value!([ + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "P" + ])) ])), (value!([ - "emit" => "value_integer", - "value" => 0 + "emit" => "genarg_named", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "min" + ])), + (value!([ + "emit" => "value_integer", + "value" => 0 + ])) + ])) ])) ])) + ])), + (value!([ + "emit" => "value_void", + "value" => "void" ])) ])) ])) diff --git a/src/prelude.tok b/src/prelude.tok index 6175f518..2989e663 100644 --- a/src/prelude.tok +++ b/src/prelude.tok @@ -29,7 +29,7 @@ Repeat : @ { } Pos : @

{ Repeat

} -Kle : @

{ Repeat } +Kle : @

{ Repeat || void } Opt : @

{ P | Void } List : @ { From a5d4b3d9d8b2efcb45223057ffad56ee1c3d14fb Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Sat, 23 Sep 2023 12:40:39 +0200 Subject: [PATCH 70/94] Context::collect() should ignore severity 0 values (#118) --- src/vm/context.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vm/context.rs b/src/vm/context.rs index c46181f7..9a4fc95f 100644 --- a/src/vm/context.rs +++ b/src/vm/context.rs @@ -314,7 +314,7 @@ impl<'program, 'reader, 'thread, 'parselet> Context<'program, 'reader, 'thread, let mut list = List::new(); // List collector let mut dict = Dict::new(); // Dict collector - let mut max = 0; // Maximum severity + let mut max = 1; // Maximum severity, must be at least 1 let mut idx = 0; // Keep the order for dicts // Collect any significant captures and values From 7db1984cf4eeb100717d020f6f0472d100bbbd01 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Tue, 26 Sep 2023 21:18:14 +0200 Subject: [PATCH 71/94] Redefine severity See table in src/value/README.md for details. --- src/compiler/compiler.rs | 1 + src/value/README.md | 8 ++++++++ src/value/list.rs | 6 +++--- src/value/token.rs | 2 +- src/vm/context.rs | 2 +- tests/parselet_leftrec.tok | 6 +++--- tests/token_modifiers.tok | 1 - 7 files changed, 17 insertions(+), 9 deletions(-) diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index 5bdf1d1e..35733790 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -415,6 +415,7 @@ impl Compiler { value = self.parselet_pop( None, Some("__".to_string()), + // Context::collect() should ignore severity 0 values (#118) Some(0), // Zero severity None, None, diff --git a/src/value/README.md b/src/value/README.md index 6920b93c..c2b26e01 100644 --- a/src/value/README.md +++ b/src/value/README.md @@ -30,3 +30,11 @@ This is how Tokay builtin values are converted during binary operations. | **str** | str | str | str | str | str | str | str | dict | list | **dict** | dict | dict | dict | dict | dict | dict | dict | dict | list | **list** | list | list | list | list | list | list | list | list | list + +# Token severity + +| Severity | Used by | +| -------- | ----------------------------------------------------------------------- | +| 0 | `_`, `__`, `Touch` | +| 5 | Any token (`Char`, `Match`, `Int`, `Float`, `Number`), parselet default | +| 10 | Any explicitly pushed value | diff --git a/src/value/list.rs b/src/value/list.rs index da9ca8a7..9ae8ee95 100644 --- a/src/value/list.rs +++ b/src/value/list.rs @@ -161,7 +161,7 @@ impl List { tokay_method!("list_iadd : @list, append", { // Don't append void if append.is_void() { - return Ok(list) + return Ok(list); } // In case list is not a list, make it a list. @@ -199,7 +199,7 @@ impl List { tokay_method!("list_add : @list, append", { // Don't append void if append.is_void() { - return Ok(list) + return Ok(list); } // In case list is not a list, make it a list. @@ -227,7 +227,7 @@ impl List { tokay_method!("list_push : @list, item, index=void", { // Don't push void if item.is_void() { - return Ok(list) + return Ok(list); } // In case list is not a list, make it a list. diff --git a/src/value/token.rs b/src/value/token.rs index b44acffe..d3a6abfb 100644 --- a/src/value/token.rs +++ b/src/value/token.rs @@ -158,7 +158,7 @@ impl Token { range, None, if matches!(self, Token::Touch(_)) { - 1 + 0 } else { 5 }, diff --git a/src/vm/context.rs b/src/vm/context.rs index 0e5c1451..bf8248c4 100644 --- a/src/vm/context.rs +++ b/src/vm/context.rs @@ -311,7 +311,7 @@ impl<'program, 'reader, 'thread, 'parselet> Context<'program, 'reader, 'thread, let mut list = List::new(); // List collector let mut dict = Dict::new(); // Dict collector - let mut max = 0; // Maximum severity + let mut max = self.parselet.severity; // Require at least parselet severity level let mut idx = 0; // Keep the order for dicts // Collect any significant captures and values diff --git a/tests/parselet_leftrec.tok b/tests/parselet_leftrec.tok index 95703a07..584bec55 100644 --- a/tests/parselet_leftrec.tok +++ b/tests/parselet_leftrec.tok @@ -1,13 +1,13 @@ # direct1 D1: @{ - D1? ''a'' + D1? Char } 'D1' print(D1) # direct2 D2: @{ - D2 'b' - 'a' + D2 Char + Char } 'D2' print(D2) diff --git a/tests/token_modifiers.tok b/tests/token_modifiers.tok index 5365b202..bd63862d 100644 --- a/tests/token_modifiers.tok +++ b/tests/token_modifiers.tok @@ -24,7 +24,6 @@ Int #abcbcd ad #Hello, World, Beta, Test #--- -#(("a", "b"), ("a", "b"), ("a", "b")) #("b", "b", "b") #("b", ("b", "b", "b"), "b") #(("a", "b"), ("a", ("b", "b", "b")), ("b", "b"), ("a", "b"), "b") From 926b69a607ddce62af45f6d0b0869d00c123dba8 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Tue, 26 Sep 2023 21:23:42 +0200 Subject: [PATCH 72/94] Removed obsolete comment --- src/compiler/compiler.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index 35733790..5bdf1d1e 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -415,7 +415,6 @@ impl Compiler { value = self.parselet_pop( None, Some("__".to_string()), - // Context::collect() should ignore severity 0 values (#118) Some(0), // Zero severity None, None, From 460dfa3c2fae5aa68d6382de1b39328e2ad703b1 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Wed, 27 Sep 2023 22:25:42 +0200 Subject: [PATCH 73/94] Handle line breaks within generic definitions --- src/compiler/parser.rs | 18 +++++++++++++++++- src/compiler/tokay.tok | 2 +- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/compiler/parser.rs b/src/compiler/parser.rs index f9b54429..802b1a8a 100644 --- a/src/compiler/parser.rs +++ b/src/compiler/parser.rs @@ -2633,6 +2633,10 @@ impl Parser { "emit" => "identifier", "value" => "_" ])), + (value!([ + "emit" => "identifier", + "value" => "___" + ])), (value!([ "emit" => "op_mod_kle", "children" => @@ -2644,6 +2648,10 @@ impl Parser { "emit" => "identifier", "value" => "ParseletGeneric" ])), + (value!([ + "emit" => "identifier", + "value" => "___" + ])), (value!([ "emit" => "op_mod_opt", "children" => @@ -2658,6 +2666,10 @@ impl Parser { (value!([ "emit" => "identifier", "value" => "_" + ])), + (value!([ + "emit" => "identifier", + "value" => "___" ])) ])) ])) @@ -2667,7 +2679,7 @@ impl Parser { ])), (value!([ "emit" => "identifier", - "value" => "_" + "value" => "___" ])), (value!([ "emit" => "value_generic", @@ -2690,6 +2702,10 @@ impl Parser { (value!([ "emit" => "identifier", "value" => "_" + ])), + (value!([ + "emit" => "identifier", + "value" => "___" ])) ])) ])) diff --git a/src/compiler/tokay.tok b/src/compiler/tokay.tok index 443e57d6..98101941 100644 --- a/src/compiler/tokay.tok +++ b/src/compiler/tokay.tok @@ -167,7 +167,7 @@ ParseletGeneric : @{ } ParseletGenerics : @{ - '<' _ (ParseletGeneric (',' _)?)* _ Expect<'>'> _ + '<' _ ___ (ParseletGeneric ___ (',' _ ___)?)* ___ Expect<'>'> _ ___ } ## Parselet: Arguments From f7e0b5d981a58475068480061b95c9daa4b67800 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Thu, 28 Sep 2023 00:56:44 +0200 Subject: [PATCH 74/94] Fix ImlOp's consuming state and improve Op::Reset* --- src/compiler/ast.rs | 2 +- src/compiler/compiler.rs | 1 + src/compiler/iml/imlop.rs | 21 +++++++++++++++------ src/vm/op.rs | 20 +++++++++++++++----- 4 files changed, 32 insertions(+), 12 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index e1390578..15ac3279 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -1211,7 +1211,7 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { "repeat" => Op::Repeat.into(), - "reset" => Op::Reset(false).into(), + "reset" => Op::ResetReader.into(), "unary" => { let children = node["children"].borrow(); diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index 9f9e908d..e72ce914 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -152,6 +152,7 @@ impl Compiler { if self.debug > 0 { ast::print(&ast); + //println!("###\n{:#?}\n###", ast); } self.compile_from_ast(&ast) diff --git a/src/compiler/iml/imlop.rs b/src/compiler/iml/imlop.rs index 4fcd4c05..23d490d8 100644 --- a/src/compiler/iml/imlop.rs +++ b/src/compiler/iml/imlop.rs @@ -209,12 +209,12 @@ impl ImlOp { let mut initial_fuse = None; while let Some(item) = iter.next() { - let mut alt = Vec::new(); - item.compile(program, current, &mut alt); + let alt = item.compile_to_vec(program, current); // When branch has more than one item, Frame it. if iter.len() > 0 { - let fuse = alt.len() + if item.is_consuming() { 3 } else { 2 }; + let consuming = item.is_consuming(); + let fuse = alt.len() + if consuming { 3 } else { 2 }; if initial_fuse.is_none() { initial_fuse = Some(fuse) // this is used for the initial frame @@ -224,13 +224,14 @@ impl ImlOp { ret.extend(alt); - if item.is_consuming() { + if consuming { // Insert Nop as location for later jump backpatch ret.push(Op::Nop); jumps.push(ret.len() - 1); + ret.push(Op::Reset); + } else { + ret.push(Op::ResetCapture); } - - ret.push(Op::Reset(true)); } else { ret.extend(alt); } @@ -457,7 +458,15 @@ impl ImlOp { } } + // Defines the ImlOp's consuming state from point of view as an ImlOp. + // The ImlOp deeply can still consume, but this is a semantic issue. + // During code-generation, this function is useful to determine whether + // the ImlOp is directly consuming or not. pub fn is_consuming(&self) -> bool { + if matches!(self, ImlOp::Loop { .. } | ImlOp::If { .. }) { + return false; + } + let mut consuming = false; self.walk(&mut |op| { diff --git a/src/vm/op.rs b/src/vm/op.rs index 9a0af614..f813f76c 100644 --- a/src/vm/op.rs +++ b/src/vm/op.rs @@ -22,7 +22,9 @@ pub(crate) enum Op { Frame(usize), // Start new frame with optional relative forward address fuse Capture, // Reset frame capture to current stack size, saving captures Extend, // Extend frame's reader to current position - Reset(bool), // Reset frame, either full (true = stack+reader) or reader only (false) + Reset, // Reset frame, stack+reader + ResetReader, // Reset reader + ResetCapture, // Reset captures Close, // Close frame Collect, // Collect stack values from current frame InCollect, // Same as collect, but degrate the parselet level (5) (fixme: This is temporary!) @@ -214,14 +216,22 @@ impl Op { Ok(Accept::Next) } - Op::Reset(full) => { - if *full { - context.stack.truncate(context.frame.capture_start); - } + Op::Reset => { + context.stack.truncate(context.frame.capture_start); + context.thread.reader.reset(context.frame.reader_start); + Ok(Accept::Next) + } + + Op::ResetReader => { context.thread.reader.reset(context.frame.reader_start); Ok(Accept::Next) } + Op::ResetCapture => { + context.stack.truncate(context.frame.capture_start); + Ok(Accept::Next) + } + Op::Close => { context.frame = context.frames.pop().unwrap(); Ok(Accept::Next) From 62cde51553546e50909300bdd9cc678e8d98bebe Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Thu, 28 Sep 2023 12:47:42 +0200 Subject: [PATCH 75/94] Redefined ImlOp::is_consuming() --- src/compiler/iml/imlop.rs | 93 +++++++++++++-------------------------- 1 file changed, 30 insertions(+), 63 deletions(-) diff --git a/src/compiler/iml/imlop.rs b/src/compiler/iml/imlop.rs index 23d490d8..6071a524 100644 --- a/src/compiler/iml/imlop.rs +++ b/src/compiler/iml/imlop.rs @@ -410,82 +410,49 @@ impl ImlOp { ops.len() - start } - /// Generic querying function taking a closure that either walks on the tree or stops. - pub fn walk(&self, func: &mut dyn FnMut(&Self) -> bool) -> bool { - // Call closure on current ImlOp, break on false return - if !func(self) { - return false; - } - - // Query along ImlOp structure - match self { - ImlOp::Alt { alts: items } | ImlOp::Seq { seq: items, .. } => { - for item in items { - if !item.walk(func) { - return false; - } - } - - true - } - ImlOp::If { then, else_, .. } => { - for i in [&then, &else_] { - if !i.walk(func) { - return false; - } - } - - true - } - ImlOp::Loop { - initial, - condition, - body, - .. - } => { - for i in [&initial, &condition, &body] { - if !i.walk(func) { - return false; - } - } - - true - } - // DEPRECATED BELOW!!! - ImlOp::Repeat { body, .. } => body.walk(func), - - _ => true, - } - } - // Defines the ImlOp's consuming state from point of view as an ImlOp. // The ImlOp deeply can still consume, but this is a semantic issue. // During code-generation, this function is useful to determine whether // the ImlOp is directly consuming or not. pub fn is_consuming(&self) -> bool { - if matches!(self, ImlOp::Loop { .. } | ImlOp::If { .. }) { - return false; - } - - let mut consuming = false; - - self.walk(&mut |op| { + fn walk(op: &ImlOp) -> Option { + // Query along ImlOp structure match op { ImlOp::Call { target, .. } => { if target.is_consuming() { - consuming = true; + return Some(true); + } + + None + } + ImlOp::Op(Op::Next) => Some(true), + ImlOp::Loop { .. } | ImlOp::If { peek: false, .. } => Some(false), + ImlOp::Alt { alts: items } | ImlOp::Seq { seq: items, .. } => { + for item in items { + if let Some(res) = walk(item) { + return Some(res); + } } + + None } - ImlOp::Op(Op::Next) => { - consuming = true; + ImlOp::If { then, else_, .. } => { + for item in [&then, &else_] { + if let Some(res) = walk(item) { + return Some(res); + } + } + + None } - _ => {} - } + // DEPRECATED BELOW!!! + ImlOp::Repeat { body, .. } => walk(body), - !consuming - }); + _ => None, + } + } - consuming + walk(self).unwrap_or(false) } /** Returns a value to operate with or evaluate during compile-time. From cda5cd6da600a45834f1f20a4bdb17727b3d76e2 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Thu, 28 Sep 2023 19:13:11 +0200 Subject: [PATCH 76/94] Updating prelude, with "blur"-feature for compatiblity --- src/compiler/prelude.rs | 256 ++++++++++++++++++++++++++++++++++++---- src/prelude.tok | 28 ++++- 2 files changed, 253 insertions(+), 31 deletions(-) diff --git a/src/compiler/prelude.rs b/src/compiler/prelude.rs index b327ff7d..cc1c4cd4 100644 --- a/src/compiler/prelude.rs +++ b/src/compiler/prelude.rs @@ -309,6 +309,20 @@ impl Compiler { ])) ])) ])), + (value!([ + "emit" => "gen", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "blur" + ])), + (value!([ + "emit" => "value_true", + "value" => "true" + ])) + ])) + ])), (value!([ "emit" => "body", "children" => @@ -419,12 +433,8 @@ impl Compiler { ])) ])), (value!([ - "emit" => "op_accept", - "children" => - (value!([ - "emit" => "identifier", - "value" => "res" - ])) + "emit" => "op_break", + "value" => "break" ])) ])) ])) @@ -476,14 +486,142 @@ impl Compiler { ])), (value!([ "emit" => "op_break", + "value" => "break" + ])) + ])) + ])) + ])), + (value!([ + "emit" => "op_if", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "blur" + ])), + (value!([ + "emit" => "block", + "children" => + (value!([ + "emit" => "op_if", "children" => (value!([ - "emit" => "identifier", - "value" => "res" + (value!([ + "emit" => "comparison", + "children" => + (value!([ + (value!([ + "emit" => "rvalue", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "res" + ])), + (value!([ + "emit" => "attribute", + "children" => + (value!([ + "emit" => "value_string", + "value" => "len" + ])) + ])) + ])) + ])), + (value!([ + "emit" => "cmp_eq", + "children" => + (value!([ + "emit" => "value_integer", + "value" => 0 + ])) + ])) + ])) + ])), + (value!([ + "emit" => "block", + "children" => + (value!([ + "emit" => "op_accept", + "children" => + (value!([ + "emit" => "value_void", + "value" => "void" + ])) + ])) + ])), + (value!([ + "emit" => "op_if", + "children" => + (value!([ + (value!([ + "emit" => "comparison", + "children" => + (value!([ + (value!([ + "emit" => "rvalue", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "res" + ])), + (value!([ + "emit" => "attribute", + "children" => + (value!([ + "emit" => "value_string", + "value" => "len" + ])) + ])) + ])) + ])), + (value!([ + "emit" => "cmp_eq", + "children" => + (value!([ + "emit" => "value_integer", + "value" => 1 + ])) + ])) + ])) + ])), + (value!([ + "emit" => "block", + "children" => + (value!([ + "emit" => "op_accept", + "children" => + (value!([ + "emit" => "rvalue", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "res" + ])), + (value!([ + "emit" => "item", + "children" => + (value!([ + "emit" => "value_integer", + "value" => 0 + ])) + ])) + ])) + ])) + ])) + ])) + ])) + ])) ])) ])) - ])) + ])) ])) + ])), + (value!([ + "emit" => "identifier", + "value" => "res" ])) ])) ])) @@ -511,6 +649,20 @@ impl Compiler { "value" => "P" ])) ])), + (value!([ + "emit" => "gen", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "blur" + ])), + (value!([ + "emit" => "value_true", + "value" => "true" + ])) + ])) + ])), (value!([ "emit" => "body", "children" => @@ -529,6 +681,20 @@ impl Compiler { "emit" => "identifier", "value" => "P" ])) + ])), + (value!([ + "emit" => "genarg_named", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "blur" + ])), + (value!([ + "emit" => "identifier", + "value" => "blur" + ])) + ])) ])) ])) ])) @@ -557,38 +723,76 @@ impl Compiler { "value" => "P" ])) ])), + (value!([ + "emit" => "gen", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "blur" + ])), + (value!([ + "emit" => "value_true", + "value" => "true" + ])) + ])) + ])), (value!([ "emit" => "body", "children" => (value!([ - "emit" => "value_generic", + "emit" => "op_logical_or", "children" => (value!([ (value!([ - "emit" => "identifier", - "value" => "Repeat" - ])), - (value!([ - "emit" => "genarg", - "children" => - (value!([ - "emit" => "identifier", - "value" => "P" - ])) - ])), - (value!([ - "emit" => "genarg_named", + "emit" => "value_generic", "children" => (value!([ (value!([ "emit" => "identifier", - "value" => "min" + "value" => "Repeat" ])), (value!([ - "emit" => "value_integer", - "value" => 0 + "emit" => "genarg", + "children" => + (value!([ + "emit" => "identifier", + "value" => "P" + ])) + ])), + (value!([ + "emit" => "genarg_named", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "min" + ])), + (value!([ + "emit" => "value_integer", + "value" => 0 + ])) + ])) + ])), + (value!([ + "emit" => "genarg_named", + "children" => + (value!([ + (value!([ + "emit" => "identifier", + "value" => "blur" + ])), + (value!([ + "emit" => "identifier", + "value" => "blur" + ])) + ])) ])) ])) + ])), + (value!([ + "emit" => "value_void", + "value" => "void" ])) ])) ])) diff --git a/src/prelude.tok b/src/prelude.tok index 6175f518..c967d3c8 100644 --- a/src/prelude.tok +++ b/src/prelude.tok @@ -14,22 +14,38 @@ Expect : @

msg=void { error(msg || "Expecting " + *P + ", but got " + repr((Token | Char | "end-of-file"))) } -Repeat : @ { +Repeat : @< + P, # Parselet + min: 1, # minimum occurence + max: void, # maximum occurence, void for unlimited + blur: true # result blurrying; empty list becomes void, one-item list becomes item +> { res = () loop { P { res += $1 - if max && res.len == max return res + if max && res.len == max break } if res.len < min reject - break res + break } + + if blur { + if res.len == 0 { + accept void + } + else if res.len == 1 { + accept res[0] + } + } + + res } -Pos : @

{ Repeat

} -Kle : @

{ Repeat } +Pos : @{ Repeat } +Kle : @{ Repeat || void } Opt : @

{ P | Void } List : @ { @@ -40,3 +56,5 @@ List : @ { Number : Float | Int Token : Word | Number | AsciiPunctuation + +# print("=> ", XKle<'a'>) From 92d9f5932bd2f06a16797c5231308784d5ad327e Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Thu, 28 Sep 2023 23:56:37 +0200 Subject: [PATCH 77/94] Fix preluded' Repeat to use `list_push()` instead of `+=` The `+=`-operator uses `list_extend()`, which generates different results when P returns a list. --- src/compiler/prelude.rs | 28 +++++++++++++++++++++------- src/prelude.tok | 2 +- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/compiler/prelude.rs b/src/compiler/prelude.rs index cc1c4cd4..2c38e300 100644 --- a/src/compiler/prelude.rs +++ b/src/compiler/prelude.rs @@ -365,23 +365,37 @@ impl Compiler { "children" => (value!([ (value!([ - "emit" => "assign_add", + "emit" => "call", "children" => (value!([ (value!([ - "emit" => "lvalue", + "emit" => "rvalue", "children" => (value!([ - "emit" => "identifier", - "value" => "res" + (value!([ + "emit" => "identifier", + "value" => "res" + ])), + (value!([ + "emit" => "attribute", + "children" => + (value!([ + "emit" => "value_string", + "value" => "push" + ])) + ])) ])) ])), (value!([ - "emit" => "capture_index", + "emit" => "callarg", "children" => (value!([ - "emit" => "value_integer", - "value" => 1 + "emit" => "capture_index", + "children" => + (value!([ + "emit" => "value_integer", + "value" => 1 + ])) ])) ])) ])) diff --git a/src/prelude.tok b/src/prelude.tok index c967d3c8..8ae9f25f 100644 --- a/src/prelude.tok +++ b/src/prelude.tok @@ -24,7 +24,7 @@ Repeat : @< loop { P { - res += $1 + res.push($1) if max && res.len == max break } From b5a16c5707a288d61082ade53488a294dc4aea62 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Sat, 30 Sep 2023 22:51:50 +0200 Subject: [PATCH 78/94] Modifiers with assumed severity --- src/compiler/ast.rs | 71 +++++++++++++++++++++++++-------------------- 1 file changed, 40 insertions(+), 31 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index 81e1920b..5851c54d 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -1351,43 +1351,52 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { } */ + let mut assume_severity = None; + // Modifiers on usages of Token::Char can be optimized for better efficiency if let ImlValue::Value(target) = &res { let target = target.borrow(); // TODO: The Char-modifier-stuff needs the be refactored in a separate pull request. - if let Some(Token::Char(ccl)) = target.object::() { - match parts[2] { - // mod_pos on Token::Char becomes Token::Chars - "pos" | "kle" => { - let mut chars = - ImlValue::from(RefValue::from(Token::Chars(ccl.clone()))); - - // mod_kle on Token::Char becomes optional Token::Chars - if parts[2] == "kle" { - chars = - chars.into_generic("Opt", None).try_resolve(compiler); + match target.object::() { + Some(Token::Char(ccl)) => { + match parts[2] { + // mod_pos on Token::Char becomes Token::Chars + "pos" | "kle" => { + let mut chars = ImlValue::from(RefValue::from( + Token::Chars(ccl.clone()), + )); + + // mod_kle on Token::Char becomes optional Token::Chars + if parts[2] == "kle" { + chars = chars + .into_generic("Opt", None) + .try_resolve(compiler); + } + + return ImlOp::Call { + offset: traverse_node_offset(node), + target: chars, + args: None, + }; } - return ImlOp::Call { - offset: traverse_node_offset(node), - target: chars, - args: None, - }; - } - - // mod_not on Token::Char becomes a negated Token::Char - "not" => { - return ImlOp::Call { - offset: traverse_node_offset(node), - target: ImlValue::from(RefValue::from(Token::Char( - ccl.clone().negate(), - ))), - args: None, - }; + // mod_not on Token::Char becomes a negated Token::Char + "not" => { + return ImlOp::Call { + offset: traverse_node_offset(node), + target: ImlValue::from(RefValue::from(Token::Char( + ccl.clone().negate(), + ))), + args: None, + }; + } + _ => {} } - _ => {} } + // fixme: This is an ugly hack to keep severity for modified versions + Some(Token::Touch(_)) => assume_severity = Some(0), + _ => {} } } @@ -1397,9 +1406,9 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { ImlOp::Call { offset: None, target: match parts[2] { - "pos" => res.into_generic("Pos", None), - "kle" => res.into_generic("Kle", None), - "opt" => res.into_generic("Opt", None), + "pos" => res.into_generic("Pos", assume_severity), + "kle" => res.into_generic("Kle", assume_severity), + "opt" => res.into_generic("Opt", assume_severity), _ => unreachable!(), } .try_resolve(compiler), From dc8e24bc501ae7f5e6631ebe8f5726ab9728423e Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Sat, 30 Sep 2023 22:53:15 +0200 Subject: [PATCH 79/94] Improving context debug --- src/vm/context.rs | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/src/vm/context.rs b/src/vm/context.rs index 24817b1d..65b3a15e 100644 --- a/src/vm/context.rs +++ b/src/vm/context.rs @@ -430,7 +430,7 @@ impl<'program, 'reader, 'thread, 'parselet> Context<'program, 'reader, 'thread, // Execute VM opcodes in a context. // This function is a wrapper for Op::run() which post-processes the result. - fn execute(&mut self, ops: &[Op]) -> Result { + fn execute(&mut self, name: &str, ops: &[Op]) -> Result { let mut state = Op::run(ops, self); match state { @@ -462,7 +462,7 @@ impl<'program, 'reader, 'thread, 'parselet> Context<'program, 'reader, 'thread, } if self.thread.debug > 3 { - self.log(&format!("final state = {:?}", state)); + self.log(&format!("{} final state = {:?}", name, state)); } state @@ -488,7 +488,7 @@ impl<'program, 'reader, 'thread, 'parselet> Context<'program, 'reader, 'thread, } // Begin - let mut ret = match self.execute(&self.parselet.begin) { + let mut ret = match self.execute("begin", &self.parselet.begin) { Ok(Accept::Next) | Err(Reject::Skip) => Capture::Empty, Ok(Accept::Push(capture)) => { self.reset(Some(self.thread.reader.tell())); @@ -505,7 +505,7 @@ impl<'program, 'reader, 'thread, 'parselet> Context<'program, 'reader, 'thread, // Body let mut first = true; ret = loop { - match self.execute(&self.parselet.body) { + match self.execute("body", &self.parselet.body) { Err(Reject::Skip) => {} Ok(Accept::Next) => break ret, Ok(Accept::Push(capture)) => break capture, @@ -526,14 +526,20 @@ impl<'program, 'reader, 'thread, 'parselet> Context<'program, 'reader, 'thread, }; // End - ret = match self.execute(&self.parselet.end) { + ret = match self.execute("end", &self.parselet.end) { Ok(Accept::Next) | Err(Reject::Skip) | Ok(Accept::Repeat) => ret, Ok(Accept::Push(capture)) => capture, Ok(accept) => return Ok(accept.into_push(self.parselet.severity)), other => return other, }; - Ok(Accept::Push(ret).into_push(self.parselet.severity)) + let ret = Accept::Push(ret).into_push(self.parselet.severity); + + if self.thread.debug > 3 { + self.log(&format!("ret = {:?}", ret)); + } + + Ok(ret) } /** Run the current context as a main parselet. @@ -547,7 +553,7 @@ impl<'program, 'reader, 'thread, 'parselet> Context<'program, 'reader, 'thread, let mut results = List::new(); // Begin - match self.execute(&self.parselet.begin) { + match self.execute("main begin", &self.parselet.begin) { Ok(Accept::Next) | Err(Reject::Skip) | Ok(Accept::Push(Capture::Empty)) => {} Ok(Accept::Push(mut capture)) => { results.push(capture.extract(&self.thread.reader)); @@ -562,7 +568,7 @@ impl<'program, 'reader, 'thread, 'parselet> Context<'program, 'reader, 'thread, // Body loop { - match self.execute(&self.parselet.body) { + match self.execute("main body", &self.parselet.body) { Err(Reject::Next) | Err(Reject::Skip) | Ok(Accept::Next) @@ -604,7 +610,7 @@ impl<'program, 'reader, 'thread, 'parselet> Context<'program, 'reader, 'thread, } // End - match self.execute(&self.parselet.end) { + match self.execute("main end", &self.parselet.end) { Ok(Accept::Next) | Err(Reject::Skip) | Ok(Accept::Push(Capture::Empty)) => {} Ok(Accept::Push(mut capture)) => { results.push(capture.extract(&self.thread.reader)); From cf5ed31073aefb4d6ee216efaa80a0a1d1a7a2a5 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Tue, 3 Oct 2023 23:30:51 +0200 Subject: [PATCH 80/94] Fixing left-recursion detection, improving debug ... and tested behavior with generated parselets. Broken, still WIP. --- src/compiler/ast.rs | 29 ++++++++++++++--------------- src/compiler/compiler.rs | 7 ++++++- src/compiler/iml/imlop.rs | 16 +++++++++------- src/compiler/iml/imlparselet.rs | 4 ++++ src/compiler/iml/imlprogram.rs | 26 ++++++++++++++++++++------ src/compiler/iml/imlvalue.rs | 6 +++++- 6 files changed, 58 insertions(+), 30 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index 5851c54d..bdfa9a34 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -373,6 +373,7 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { nargs, offset: traverse_node_offset(node), severity: None, + generated: false, }; ret.try_resolve(compiler) @@ -1383,13 +1384,14 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { // mod_not on Token::Char becomes a negated Token::Char "not" => { - return ImlOp::Call { - offset: traverse_node_offset(node), - target: ImlValue::from(RefValue::from(Token::Char( + return ImlOp::call( + compiler, + traverse_node_offset(node), + ImlValue::from(RefValue::from(Token::Char( ccl.clone().negate(), ))), - args: None, - }; + None, + ); } _ => {} } @@ -1400,20 +1402,17 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { } } - // Push operation position here - ops.push(traverse_offset(node)); - - ImlOp::Call { - offset: None, - target: match parts[2] { + ImlOp::call( + compiler, + traverse_node_offset(node), + match parts[2] { "pos" => res.into_generic("Pos", assume_severity), "kle" => res.into_generic("Kle", assume_severity), "opt" => res.into_generic("Opt", assume_severity), _ => unreachable!(), - } - .try_resolve(compiler), - args: None, - } + }, + None, + ) } "if" => { diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index 1b8b1554..5b1003fe 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -119,7 +119,10 @@ impl Compiler { println!("--- Intermediate main ---\n{:#?}", main); } - match ImlProgram::new(main).compile() { + let mut program = ImlProgram::new(main); + program.debug = self.debug > 1; + + match program.compile() { Ok(program) => { if self.debug > 1 { println!("--- Finalized program ---"); @@ -151,6 +154,7 @@ impl Compiler { }; if self.debug > 0 { + println!("--- Abstract Syntax Tree ---"); ast::print(&ast); //println!("###\n{:#?}\n###", ast); } @@ -319,6 +323,7 @@ impl Compiler { offset, name, severity.unwrap_or(5), + false, )) } else { unreachable!(); diff --git a/src/compiler/iml/imlop.rs b/src/compiler/iml/imlop.rs index 6174b315..3c27210a 100644 --- a/src/compiler/iml/imlop.rs +++ b/src/compiler/iml/imlop.rs @@ -84,10 +84,10 @@ impl ImlOp { } /// Load value - pub fn load(_compiler: &mut Compiler, offset: Option, value: ImlValue) -> ImlOp { + pub fn load(_compiler: &mut Compiler, offset: Option, target: ImlValue) -> ImlOp { ImlOp::Load { offset, - target: value, + target, } } @@ -102,23 +102,25 @@ impl ImlOp { pub fn call( compiler: &mut Compiler, offset: Option, - value: ImlValue, + target: ImlValue, args: Option<(usize, bool)>, ) -> ImlOp { + let target = target.try_resolve(compiler); + // When args is unset, and the value is not callable without arguments, // consider this call is a load. - if args.is_none() && !value.is_callable(true) { + if args.is_none() && !target.is_callable(true) { // Currently not planned as final - return Self::load(compiler, offset, value); + return Self::load(compiler, offset, target); } - if value.is_consuming() { + if target.is_consuming() { compiler.parselet_mark_consuming(); } ImlOp::Call { offset, - target: value, + target, args, } } diff --git a/src/compiler/iml/imlparselet.rs b/src/compiler/iml/imlparselet.rs index 7a15c7e8..823541f6 100644 --- a/src/compiler/iml/imlparselet.rs +++ b/src/compiler/iml/imlparselet.rs @@ -46,6 +46,7 @@ pub(in crate::compiler) struct ImlParseletConfig { pub offset: Option, // Offset of definition pub name: Option, // Assigned name from source (for debugging) pub severity: u8, // Capture push severity + pub generated: bool, } /** Representation of parselet in intermediate code. */ @@ -56,6 +57,7 @@ impl ImlParseletConfig { offset: Option, name: Option, severity: u8, + generated: bool, ) -> Self { Self { model: Rc::new(RefCell::new(model)), @@ -63,6 +65,7 @@ impl ImlParseletConfig { offset, name, severity, + generated, } } @@ -192,6 +195,7 @@ impl ImlParselet { offset: parselet.offset.clone(), name: parselet.name.clone(), severity: parselet.severity, + generated: parselet.generated, }) } diff --git a/src/compiler/iml/imlprogram.rs b/src/compiler/iml/imlprogram.rs index f512b1d5..8b25b883 100644 --- a/src/compiler/iml/imlprogram.rs +++ b/src/compiler/iml/imlprogram.rs @@ -12,6 +12,7 @@ use std::collections::{HashMap, HashSet}; pub(in crate::compiler) struct ImlProgram { statics: IndexMap>, // static values with optional final parselet replacement pub errors: Vec, // errors collected during finalization (at least these are unresolved symbols) + pub debug: bool, // Debug } impl ImlProgram { @@ -19,6 +20,7 @@ impl ImlProgram { ImlProgram { statics: indexmap!(main => None), errors: Vec::new(), + debug: false, } } @@ -76,6 +78,14 @@ impl ImlProgram { idx += 1; } + if self.debug { + println!("--- Parselets to finalize ---"); + + for (i, parselet) in finalize.iter().enumerate() { + println!("{:?} => {:#?}", i, parselet); + } + } + let leftrec = self.finalize(finalize); // Stop on any raised error @@ -302,10 +312,18 @@ impl ImlProgram { return None; } + //println!("- {}{}", ".".repeat(visited.len()), current); + if let Some(idx) = visited.get_index_of(current) { // When in visited, this is a recursion Some(Consumable { - leftrec: idx == 0, // If the idx is 0, current is the seeked parselet, and is left-recursive + // If the idx is 0, current is the seeked parselet, so it is left-recursive + leftrec: if idx == 0 && !current.borrow().generated { + configs.get_mut(current).unwrap().leftrec = true; + true + } else { + false + }, nullable: configs[current].nullable, }) } else { @@ -313,11 +331,7 @@ impl ImlProgram { visited.insert(current.clone()); for part in [&model.begin, &model.body, &model.end] { - if let Some(result) = finalize_op(part, current, visited, configs) { - if configs[current] < result { - configs.insert(current.clone(), result); - } - } + finalize_op(part, current, visited, configs); } visited.remove(current); diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index b993a3ad..aaca874a 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -50,6 +50,7 @@ pub(in crate::compiler) enum ImlValue { args: Vec<(Option, ImlValue)>, // Sequential generic args nargs: IndexMap, ImlValue)>, // Named generic args severity: Option, // optional desired severity + generated: bool, }, } @@ -64,6 +65,7 @@ impl ImlValue { args: vec![(None, self)], nargs: IndexMap::new(), severity, + generated: true, } } @@ -89,6 +91,7 @@ impl ImlValue { args, nargs, severity, + generated, } => { let mut is_resolved = target.resolve(compiler); @@ -191,7 +194,7 @@ impl ImlValue { // Make a parselet derivation from the instance definition; // This can be the final parselet definition, but constants - // might contain Generic references as well, which are being + // might contain generic references as well, which are being // resolved during compilation. Some(ImlValue::from(ImlParseletConfig { model: parselet.model.clone(), @@ -199,6 +202,7 @@ impl ImlValue { offset: parselet.offset.clone(), name: parselet.name.clone(), severity: severity.unwrap_or(parselet.severity), + generated: *generated, })) } target => { From c708a59cbf812fe2f2c5594bf2d83e687daa74c3 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Wed, 4 Oct 2023 19:48:08 +0200 Subject: [PATCH 81/94] Modifying parselet_leftrec.tok to fulfill current capabilities on left-recursion --- src/compiler/iml/imlop.rs | 5 +---- tests/parselet_leftrec.tok | 27 +++++++++++++-------------- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/src/compiler/iml/imlop.rs b/src/compiler/iml/imlop.rs index 3c27210a..629f5c9d 100644 --- a/src/compiler/iml/imlop.rs +++ b/src/compiler/iml/imlop.rs @@ -85,10 +85,7 @@ impl ImlOp { /// Load value pub fn load(_compiler: &mut Compiler, offset: Option, target: ImlValue) -> ImlOp { - ImlOp::Load { - offset, - target, - } + ImlOp::Load { offset, target } } /// Load unknown value by name diff --git a/tests/parselet_leftrec.tok b/tests/parselet_leftrec.tok index 584bec55..111304fd 100644 --- a/tests/parselet_leftrec.tok +++ b/tests/parselet_leftrec.tok @@ -1,28 +1,27 @@ -# direct1 +# direct D1: @{ - D1? Char + D1 Char + Char } 'D1' print(D1) -# direct2 -D2: @{ - D2 Char - Char +# indirect 1: currently not working, see issue #95 for details +I1: @{ + I1? Char } -'D2' print(D2) +'I1' print(I1) -# indirect -# currently not working, see issue #95 on this. +# indirect 2: currently not working, see issue #95 for details X: Y 'c' Y: Z 'b' Z: X | Y | 'a' -'I' print(Z) +'I2' print(Z) #--- -#D1aaaa -#D2abbb -#Iabbcb +#D1abbb +#I1aaaa +#I2abbcb #--- -#((("a", "a"), "a"), "a") #((("a", "b"), "b"), "b") #a +#a From d4ac089abea7f294b7c5ef370f50f5474c8720a5 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Thu, 5 Oct 2023 22:45:18 +0200 Subject: [PATCH 82/94] Removal of ImlOp::Repeat Finally remove deprecated stuff as it is replaced by Tokay internals for now. --- src/compiler/iml/imlop.rs | 69 ---------------------------------- src/compiler/iml/imlprogram.rs | 16 -------- src/vm/op.rs | 19 ++++++---- 3 files changed, 11 insertions(+), 93 deletions(-) diff --git a/src/compiler/iml/imlop.rs b/src/compiler/iml/imlop.rs index 629f5c9d..59f004b6 100644 --- a/src/compiler/iml/imlop.rs +++ b/src/compiler/iml/imlop.rs @@ -49,15 +49,6 @@ pub(in crate::compiler) enum ImlOp { condition: Box, // Abort condition body: Box, // Iterating body }, - - // v--- below variants are being replaced by Tokay generics as soon as they are implemented ---v // - - // Repeat (deprecated!) - Repeat { - body: Box, - min: usize, - max: usize, - }, } impl ImlOp { @@ -319,64 +310,6 @@ impl ImlOp { ops.push(Op::Break); } } - // DEPRECATED BELOW!!! - ImlOp::Repeat { body, min, max } => { - let mut body_ops = Vec::new(); - let body_len = body.compile(program, current, &mut body_ops); - - match (min, max) { - (0, 0) => { - // Kleene - ops.extend(vec![ - Op::Frame(0), // The overall capture - Op::Frame(body_len + 6), // The fused capture for repetition - ]); - ops.extend(body_ops); // here comes the body - ops.extend(vec![ - Op::ForwardIfConsumed(2), // When consumed we can commit and jump backward - Op::Forward(4), // otherwise leave the loop - Op::Capture, - Op::Extend, - Op::Backward(body_len + 4), // repeat the body - Op::Close, - Op::InCollect, - Op::Close, - ]); - } - (1, 0) => { - // Positive - ops.push(Op::Frame(0)); // The overall capture - ops.extend(body_ops.clone()); // here comes the body for the first time - ops.extend(vec![ - Op::ForwardIfConsumed(2), // If nothing was consumed, then... - Op::Next, //...reject - Op::Frame(body_len + 6), // The fused capture for repetition - ]); - ops.extend(body_ops); // here comes the body again inside the repetition - ops.extend(vec![ - Op::ForwardIfConsumed(2), // When consumed we can commit and jump backward - Op::Forward(4), // otherwise leave the loop - Op::Capture, - Op::Extend, - Op::Backward(body_len + 4), // repeat the body - Op::Close, - Op::InCollect, - Op::Close, - ]); - } - (0, 1) => { - // Optional - ops.push(Op::Frame(body_len + 1)); // on error, jump to the collect - ops.extend(body_ops); - ops.push(Op::InCollect); - ops.push(Op::Close); - } - (1, 1) => {} - (_, _) => unimplemented!( - "ImlOp::Repeat construct with min/max configuration > 1 not implemented yet" - ), - }; - } } ops.len() - start @@ -417,8 +350,6 @@ impl ImlOp { None } - // DEPRECATED BELOW!!! - ImlOp::Repeat { body, .. } => walk(body), _ => None, } diff --git a/src/compiler/iml/imlprogram.rs b/src/compiler/iml/imlprogram.rs index 8b25b883..eaef4988 100644 --- a/src/compiler/iml/imlprogram.rs +++ b/src/compiler/iml/imlprogram.rs @@ -277,22 +277,6 @@ impl ImlProgram { ret } - // DEPRECATED BELOW!!! - ImlOp::Repeat { body, min, .. } => { - if let Some(consumable) = finalize_op(body, current, visited, configs) { - if *min == 0 { - Some(Consumable { - leftrec: consumable.leftrec, - nullable: true, - }) - } else { - Some(consumable) - } - } else { - None - } - } - // default case _ => None, } diff --git a/src/vm/op.rs b/src/vm/op.rs index f813f76c..92c132d5 100644 --- a/src/vm/op.rs +++ b/src/vm/op.rs @@ -20,15 +20,15 @@ pub(crate) enum Op { // Capture frames Frame(usize), // Start new frame with optional relative forward address fuse - Capture, // Reset frame capture to current stack size, saving captures + // Capture, // Reset frame capture to current stack size, saving captures Extend, // Extend frame's reader to current position Reset, // Reset frame, stack+reader ResetReader, // Reset reader ResetCapture, // Reset captures Close, // Close frame Collect, // Collect stack values from current frame - InCollect, // Same as collect, but degrate the parselet level (5) (fixme: This is temporary!) - Fuse(usize), // Set frame fuse to relative forward address + // InCollect, // Same as collect, but degrate the parselet level (5) (fixme: This is temporary!) + Fuse(usize), // Set frame fuse to relative forward address // Loop frames Loop(usize), // Loop frame @@ -43,8 +43,8 @@ pub(crate) enum Op { ForwardIfConsumed(usize), // Jump forward when frame consumed input // Direct jumps - Forward(usize), // Jump forward - Backward(usize), // Jump backward + Forward(usize), // Jump forward + // Backward(usize), // Jump backward // Interrupts Next, // Err(Reject::Next) @@ -206,11 +206,12 @@ impl Op { Ok(Accept::Next) } + /* Op::Capture => { context.frame.capture_start = context.stack.len(); Ok(Accept::Next) } - + */ Op::Extend => { context.frame.reader_start = context.thread.reader.tell(); Ok(Accept::Next) @@ -243,6 +244,7 @@ impl Op { context.debug > 5, ))), + /* Op::InCollect => { let mut capture = context.collect(context.frame.capture_start, false, context.debug > 5); @@ -253,7 +255,7 @@ impl Op { Ok(Accept::Push(capture)) } - + */ Op::Fuse(addr) => { context.frame.fuse = Some(ip + *addr); Ok(Accept::Next) @@ -362,11 +364,12 @@ impl Op { Ok(Accept::Hold) } + /* Op::Backward(goto) => { ip -= goto; Ok(Accept::Hold) } - + */ // Interrupts Op::Next => Err(Reject::Next), From 307c7fcc3810b04a6eaa9538dd1783a9aa251f0d Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Mon, 9 Oct 2023 01:05:56 +0200 Subject: [PATCH 83/94] Store name with compile-time local and global This is useful for debugging and error reporting --- src/compiler/ast.rs | 4 ++-- src/compiler/compiler.rs | 10 ++++++++-- src/compiler/iml/imlvalue.rs | 26 +++++++++++++++++--------- src/main.rs | 2 +- 4 files changed, 28 insertions(+), 14 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index bdfa9a34..a86f19ec 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -496,7 +496,7 @@ fn traverse_node_lvalue(compiler: &mut Compiler, node: &Dict, store: bool, hold: 'load: loop { match compiler.get(name) { // Known local - Some(ImlValue::Local(addr)) => { + Some(ImlValue::Local { addr, .. }) => { if store { if hold { ops.push(Op::StoreFastHold(addr).into()) @@ -510,7 +510,7 @@ fn traverse_node_lvalue(compiler: &mut Compiler, node: &Dict, store: bool, hold: break; } // Known global - Some(ImlValue::Global(addr)) => { + Some(ImlValue::Global { addr, .. }) => { if store { if hold { ops.push(Op::StoreGlobalHold(addr).into()) diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index 5b1003fe..6db3cac3 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -487,13 +487,19 @@ impl Compiler { // Check for global variable if i + 1 == self.scopes.len() { if let Some(addr) = variables.get(name) { - return Some(ImlValue::Global(*addr)); + return Some(ImlValue::Global { + name: name.to_string(), + addr: *addr, + }); } } // Check for local variable else if top_parselet { if let Some(addr) = variables.get(name) { - return Some(ImlValue::Local(*addr)); + return Some(ImlValue::Local { + name: name.to_string(), + addr: *addr, + }); } } diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index aaca874a..af437941 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -28,9 +28,17 @@ pub(in crate::compiler) enum ImlValue { Parselet(ImlParselet), // Parselet instance // Resolved: dynamic - This(bool), // self-reference function (false) or parselet (true) - Local(usize), // Runtime local variable - Global(usize), // Runtime global variable + This(bool), // self-reference function (false) or parselet (true) + Local { + // Runtime local variable + name: String, + addr: usize, + }, + Global { + // Runtime global variable + name: String, + addr: usize, + }, // Unresolved Name { @@ -309,8 +317,8 @@ impl ImlValue { }, _ => {} }, - ImlValue::Local(addr) => ops.push(Op::LoadFast(*addr)), - ImlValue::Global(addr) => ops.push(Op::LoadGlobal(*addr)), + ImlValue::Local { addr, .. } => ops.push(Op::LoadFast(*addr)), + ImlValue::Global { addr, .. } => ops.push(Op::LoadGlobal(*addr)), ImlValue::Generic { name, .. } => { return current.0.borrow().constants[name] .compile(program, current, offset, call, ops) @@ -424,10 +432,10 @@ impl std::fmt::Display for ImlValue { .as_deref() .unwrap_or("") ), - Self::Global(var) => write!(f, "global({})", var), - Self::Local(var) => write!(f, "local({})", var), - Self::Name { name, .. } => write!(f, "{}", name), - Self::Generic { name, .. } => write!(f, "{}!", name), + Self::Local { name, .. } => write!(f, "local '{}'", name), + Self::Global { name, .. } => write!(f, "global '{}'", name), + Self::Name { name, .. } => write!(f, "name '{}'", name), + Self::Generic { name, .. } => write!(f, "generic '{}'", name), Self::Instance { target, args, diff --git a/src/main.rs b/src/main.rs index 2c63514a..1abad839 100644 --- a/src/main.rs +++ b/src/main.rs @@ -287,7 +287,7 @@ fn main() { } // otherwise just work on an empty input else { - Reader::new(None, Box::new(io::Cursor::new("".clone()))) + Reader::new(None, Box::new(io::Cursor::new(""))) }, ); } From 5b165ce6a61dd64d6f24a19d2fcde15d4ea93ace Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Thu, 12 Oct 2023 22:49:38 +0200 Subject: [PATCH 84/94] Several improvmenents on semantic error detection - Re-enables reporting of unused symbols - Improves reporting of unresolved values that are registered - Improves reporting of invalid generic configurations This commit also showed up that ImlValue::Shared is a bug problem, and was a conceptual mistake, making things heavy and blurry. Therefore, a huger rework of the ImlValue is scheduled, and the Iml*-stuff in general. --- src/compiler/ast.rs | 110 +++++++++++-------- src/compiler/compiler.rs | 36 ++++-- src/compiler/iml/imlparselet.rs | 2 +- src/compiler/iml/imlprogram.rs | 25 ++++- src/compiler/iml/imlvalue.rs | 22 ++-- tests/parselet_args_default_static.tok | 5 + tests/parselet_args_default_var.tok | 5 + tests/parselet_generic_default_defined.tok | 9 ++ tests/parselet_generic_default_undefined.tok | 8 ++ 9 files changed, 152 insertions(+), 70 deletions(-) create mode 100644 tests/parselet_args_default_static.tok create mode 100644 tests/parselet_args_default_var.tok create mode 100644 tests/parselet_generic_default_defined.tok create mode 100644 tests/parselet_generic_default_undefined.tok diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index a86f19ec..a00bd034 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -1,5 +1,5 @@ //! Compiler's internal Abstract Syntax Tree traversal -use indexmap::IndexMap; +use indexmap::{IndexMap, IndexSet}; use tokay_macros::tokay_function; extern crate self as tokay; use super::*; @@ -168,11 +168,12 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { // Parselets "value_parselet" => { - compiler.parselet_push(); - let mut constants: IndexMap = IndexMap::new(); let mut signature: IndexMap = IndexMap::new(); + let mut locals = IndexSet::new(); + let mut generics = IndexMap::new(); + // Traverse the AST let mut sigs = List::from(node["children"].clone()); let body = sigs.pop().unwrap(); @@ -185,48 +186,55 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { let emit = emit.object::().unwrap().as_str(); let children = List::from(node["children"].clone()); - let ident = children[0].borrow().object::().unwrap()["value"].to_string(); + let name = children[0].borrow().object::().unwrap()["value"].to_string(); match emit { "gen" => { let offset = traverse_node_offset(node); - // check if identifier was not provided twice - if constants.contains_key(&ident) { - compiler.errors.push(Error::new( - offset, - format!("Generic '{}' already given in signature before", ident), - )); + assert!(children.len() <= 2); - continue; + // Evaluate default parameter + let mut default = ImlValue::Void; + + if children.len() == 2 { + default = traverse_node_static( + compiler, + Some(&name), + children[1].borrow().object::().unwrap(), + ); + + if utils::identifier_is_consumable(&name) && !default.is_consuming() { + compiler.errors.push(Error::new( + offset, + format!( + "Generic '{}' defines consumable, but {} is not consuming", + name, default + ), + )); + } } - compiler.set_constant( - &ident, - ImlValue::Generic { + if generics + .insert( + name.to_string(), + ImlValue::Generic { + offset, + name: name.to_string(), + }, + ) + .is_some() + { + compiler.errors.push(Error::new( offset, - name: ident.to_string(), - }, - ); - - assert!(children.len() <= 2); + format!("Generic '{}' already defined in signature before", name), + )); + } - constants.insert( - ident.to_string(), - if children.len() == 2 { - let default = children[1].borrow(); - traverse_node_static( - compiler, - Some(&ident), - default.object::().unwrap(), - ) - } else { - ImlValue::Void - }, - ); + constants.insert(name.to_string(), default); } "arg" => { - let first = ident.chars().nth(0).unwrap(); + let first = name.chars().nth(0).unwrap(); // Check for correct identifier semantics if !first.is_lowercase() { @@ -236,40 +244,36 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { if first == '_' { format!( "Argument named '{}' invalid; May not start with '{}'", - ident, first + name, first ) } else { format!( "Argument named '{}' invalid; Use a name starting in lower-case, e.g. '{}{}'", - ident, &ident[0..1].to_lowercase(), &ident[1..] + name, &name[0..1].to_lowercase(), &name[1..] ) } ) ); } - // check if identifier was not provided twice - if signature.contains_key(&ident) { + // insert and check if identifier was not defined twice + if !locals.insert(name.to_string()) { compiler.errors.push(Error::new( traverse_node_offset(node), - format!("Argument '{}' already given in signature before", ident), + format!("Argument '{}' already given in signature before", name), )); - - continue; } - compiler.new_local(&ident); - assert!(children.len() <= 2); signature.insert( - ident.to_string(), + name.to_string(), if children.len() == 2 { let default = children[1].borrow(); traverse_node_static( compiler, - Some(&ident), + Some(&name), default.object::().unwrap(), ) } else { @@ -282,6 +286,19 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { } } + // Push new parselet scope + compiler.parselet_push(); + + // Create previously collected constants + for (name, generic) in generics { + compiler.set_constant(&name, generic); + } + + // Create previously collected locals + for local in locals { + compiler.new_local(&local); + } + let body = body.borrow(); let body = traverse_node_rvalue(compiler, body.object::().unwrap(), Rvalue::CallOrLoad); @@ -494,7 +511,7 @@ fn traverse_node_lvalue(compiler: &mut Compiler, node: &Dict, store: bool, hold: // This loop is only iterated in case a variable isn't known! 'load: loop { - match compiler.get(name) { + match compiler.get(traverse_node_offset(item), name) { // Known local Some(ImlValue::Local { addr, .. }) => { if store { @@ -613,6 +630,7 @@ Rvalue::Load generates code to just load the value, Rvalue::CallOrLoad generates code to either call the value without parameters or load it Rvalue::Call(args, nargs) generates code for a full-qualified value call */ +#[derive(Debug)] enum Rvalue { Load, // Generate code to just load the value CallOrLoad, // Generate code for a call without parameters, or load otherwise @@ -655,6 +673,8 @@ fn traverse_node_rvalue(compiler: &mut Compiler, node: &Dict, mode: Rvalue) -> I )); } + //println!("identifier = {:?}, mode = {:?}", name, mode); + return match mode { Rvalue::Load => ImlOp::load_by_name(compiler, offset, name.to_string()), Rvalue::CallOrLoad => ImlOp::call_by_name(compiler, offset, name.to_string(), None), diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index 6db3cac3..e19117b7 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -76,6 +76,7 @@ impl Compiler { errors: Vec::new(), }; + // Preload oftenly used static constants for value in [ value!(void), value!(null), @@ -101,15 +102,33 @@ impl Compiler { } /** Compile a Tokay program from an existing AST into the compiler. */ - pub fn compile_from_ast(&mut self, ast: &RefValue) -> Result, Vec> { + pub(super) fn compile_from_ast( + &mut self, + ast: &RefValue, + ) -> Result, Vec> { let ret = ast::traverse(self, &ast); + assert!(self.scopes.len() == 1); + + // TODO: This is only a short hack to report still unresolved symbols again. + // TODO: ImlValue, and especially ImlValue::Shared is a conceptual problem that generally must be revised. + // println!("self.usages = {:?}", self.usages); + for usage in self.usages.drain(..) { + if let ImlValue::Shared(usage) = usage { + let usage = usage.borrow(); + if let ImlValue::Name { offset, name } = &*usage { + self.errors.push(Error::new( + offset.clone(), + format!("Use of undefined name '{}'", name), + )); + } + } + } + if !self.errors.is_empty() { return Err(self.errors.drain(..).collect()); } - assert!(self.scopes.len() == 1); - if self.debug > 1 { println!("--- Global scope ---\n{:#?}", self.scopes.last().unwrap()) } @@ -250,11 +269,6 @@ impl Compiler { self.resolve(); - // Clear any unresolved usages when reaching global scope - if self.scopes.len() == 1 { - self.usages.clear(); - } - let mut scope = self.scopes.remove(0); if let Scope::Parselet { @@ -465,7 +479,7 @@ impl Compiler { } /** Get named value, either from current or preceding scope, a builtin or special. */ - pub(super) fn get(&mut self, name: &str) -> Option { + pub(super) fn get(&mut self, offset: Option, name: &str) -> Option { let mut top_parselet = true; for (i, scope) in self.scopes.iter().enumerate() { @@ -488,6 +502,7 @@ impl Compiler { if i + 1 == self.scopes.len() { if let Some(addr) = variables.get(name) { return Some(ImlValue::Global { + offset, name: name.to_string(), addr: *addr, }); @@ -497,6 +512,7 @@ impl Compiler { else if top_parselet { if let Some(addr) = variables.get(name) { return Some(ImlValue::Local { + offset, name: name.to_string(), addr: *addr, }); @@ -527,7 +543,7 @@ impl Compiler { RefValue::from(Token::builtin("Whitespaces").unwrap()).into(), ); - return Some(self.get(name).unwrap()); + return Some(self.get(None, name).unwrap()); } // Check for built-in token diff --git a/src/compiler/iml/imlparselet.rs b/src/compiler/iml/imlparselet.rs index 823541f6..bcb87367 100644 --- a/src/compiler/iml/imlparselet.rs +++ b/src/compiler/iml/imlparselet.rs @@ -219,7 +219,7 @@ impl ImlParselet { // Register default value, if any match &var_value.1 { ImlValue::Void => None, - value => Some(program.register(value).expect("Cannot register value")), + value => Some(program.register(value)), }, ) }) diff --git a/src/compiler/iml/imlprogram.rs b/src/compiler/iml/imlprogram.rs index eaef4988..0965ac5e 100644 --- a/src/compiler/iml/imlprogram.rs +++ b/src/compiler/iml/imlprogram.rs @@ -30,15 +30,30 @@ impl ImlProgram { In case *value* already exists inside of the current statics, the existing index will be returned, otherwiese the value is cloned and put into the statics table. */ - pub fn register(&mut self, value: &ImlValue) -> Result { + pub fn register(&mut self, value: &ImlValue) -> usize { match value { - ImlValue::Shared(value) => self.register(&*value.borrow()), + ImlValue::Shared(value) => return self.register(&*value.borrow()), ImlValue::Parselet(_) | ImlValue::Value(_) => match self.statics.get_index_of(value) { - None => Ok(self.statics.insert_full(value.clone(), None).0), - Some(idx) => Ok(idx), + None => return self.statics.insert_full(value.clone(), None).0, + Some(idx) => return idx, }, - _ => Err(()), // Cannot register this kind of value + ImlValue::This(_) | ImlValue::Void => unreachable!(), + ImlValue::Local { offset, name, .. } | ImlValue::Global { offset, name, .. } => { + self.errors.push(Error::new( + offset.clone(), + format!("Variable '{}' used in static context", name), + )) + } + ImlValue::Name { offset, .. } | ImlValue::Generic { offset, .. } => { + self.errors + .push(Error::new(offset.clone(), format!("Unresolved {}", value))); + } + ImlValue::Instance { offset, .. } => self + .errors + .push(Error::new(offset.clone(), format!("Unresolved {}", value))), } + + 0 } /** Turns the ImlProgram and its intermediate values into a final VM program ready for execution. diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index af437941..72f963da 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -21,7 +21,7 @@ still pending. #[derive(Debug, Clone, PartialEq, Eq)] pub(in crate::compiler) enum ImlValue { Void, - Shared(Rc>), + Shared(Rc>), // TODO: BAD SOLUTION, HAS TO BE REWORKED TO ImlRefValue OR SO... // Resolved: static Value(RefValue), // Compile-time static value @@ -31,11 +31,13 @@ pub(in crate::compiler) enum ImlValue { This(bool), // self-reference function (false) or parselet (true) Local { // Runtime local variable + offset: Option, // Source offset name: String, addr: usize, }, Global { // Runtime global variable + offset: Option, // Source offset name: String, addr: usize, }, @@ -92,7 +94,7 @@ impl ImlValue { pub fn resolve(&mut self, compiler: &mut Compiler) -> bool { let resolve = match self { Self::Shared(value) => return value.borrow_mut().resolve(compiler), - Self::Name { name, .. } => compiler.get(&name), + Self::Name { offset, name, .. } => compiler.get(offset.clone(), &name), Self::Instance { offset, target, @@ -117,7 +119,7 @@ impl ImlValue { } } - // When everything is resolved, turn the instance definition into a parselet + // When all instance members are resolved, try to turn the instance definition into a parselet if is_resolved { match &**target { ImlValue::Parselet(parselet) => { @@ -204,14 +206,16 @@ impl ImlValue { // This can be the final parselet definition, but constants // might contain generic references as well, which are being // resolved during compilation. - Some(ImlValue::from(ImlParseletConfig { + let derivation = ImlValue::from(ImlParseletConfig { model: parselet.model.clone(), constants, offset: parselet.offset.clone(), name: parselet.name.clone(), severity: severity.unwrap_or(parselet.severity), generated: *generated, - })) + }); + + Some(derivation) } target => { compiler.errors.push(Error::new( @@ -375,13 +379,13 @@ impl ImlValue { if parselet.is_generic() { // Otherwise, this is a generic, so create a derivation let derive = ImlValue::Parselet(parselet.derive(current.0)); - program.register(&derive).unwrap() + program.register(&derive) } else { // If target is resolved, just register - program.register(self).unwrap() + program.register(self) } } - resolved => program.register(resolved).unwrap(), + resolved => program.register(resolved), }; match call { @@ -487,7 +491,7 @@ impl std::hash::Hash for ImlValue { consumable.hash(state); } */ - other => unreachable!("{:?} is unhashable", other), + other => unreachable!("{} is unhashable", other), } } } diff --git a/tests/parselet_args_default_static.tok b/tests/parselet_args_default_static.tok new file mode 100644 index 00000000..09e42909 --- /dev/null +++ b/tests/parselet_args_default_static.tok @@ -0,0 +1,5 @@ +f : @x=y { print("Works!", x); } +y : 42 +f +#--- +#Works! 42 diff --git a/tests/parselet_args_default_var.tok b/tests/parselet_args_default_var.tok new file mode 100644 index 00000000..9f7e8f9a --- /dev/null +++ b/tests/parselet_args_default_var.tok @@ -0,0 +1,5 @@ +f : @x=y { print("Doesn't work!", x); } +y = 42 +f +#--- +#ERR:Line 1, column 8: Variable 'y' used in static context diff --git a/tests/parselet_generic_default_defined.tok b/tests/parselet_generic_default_defined.tok new file mode 100644 index 00000000..b1354016 --- /dev/null +++ b/tests/parselet_generic_default_defined.tok @@ -0,0 +1,9 @@ +P : @ { X print("Works!") } +Y : 'x' +P +#--- +#xxx +#--- +#Works! +#Works! +#Works! diff --git a/tests/parselet_generic_default_undefined.tok b/tests/parselet_generic_default_undefined.tok new file mode 100644 index 00000000..2d870dea --- /dev/null +++ b/tests/parselet_generic_default_undefined.tok @@ -0,0 +1,8 @@ +P : @ { X print("Doesn't work!") } +P : @ { X print("Doesn't work as well!") } +#--- +#x +#--- +#ERR:Line 1, column 7: Generic 'X' defines consumable, but name 'y' is not consuming +#ERR:Line 1, column 10: Use of undefined name 'y' +#ERR:Line 2, column 10: Use of undefined name 'Y' From 387dada5d4db28b9d15cf1477edd9f03369c65f3 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Sun, 15 Oct 2023 09:06:26 +0200 Subject: [PATCH 85/94] Fixing generic argument requirement error reporting --- src/compiler/iml/imlprogram.rs | 11 ++++++----- tests/parselet_generic_direct.tok | 7 +++++++ 2 files changed, 13 insertions(+), 5 deletions(-) create mode 100644 tests/parselet_generic_direct.tok diff --git a/src/compiler/iml/imlprogram.rs b/src/compiler/iml/imlprogram.rs index 0965ac5e..610b9d59 100644 --- a/src/compiler/iml/imlprogram.rs +++ b/src/compiler/iml/imlprogram.rs @@ -93,6 +93,12 @@ impl ImlProgram { idx += 1; } + // Stop on any raised error + if !self.errors.is_empty() { + return Err(self.errors); + } + + // Finalize parselets if self.debug { println!("--- Parselets to finalize ---"); @@ -103,11 +109,6 @@ impl ImlProgram { let leftrec = self.finalize(finalize); - // Stop on any raised error - if !self.errors.is_empty() { - return Err(self.errors); - } - // Assemble all statics to be transferred into a Program let statics: Vec = self .statics diff --git a/tests/parselet_generic_direct.tok b/tests/parselet_generic_direct.tok new file mode 100644 index 00000000..e8cd67b9 --- /dev/null +++ b/tests/parselet_generic_direct.tok @@ -0,0 +1,7 @@ +P : @ { 'x' print("Works!") } +#P<'z'> +P +#--- +#xxx +#--- +#ERR:Line 3, column 1: P requires assignment of generic argument X From ff60c5f14e8f596ae39b3823ed0fc13204b42c6f Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Fri, 20 Oct 2023 00:35:35 +0200 Subject: [PATCH 86/94] Replace ImlValue::Void by ImlValue::Unset --- src/compiler/ast.rs | 4 ++-- src/compiler/iml/imlparselet.rs | 4 ++-- src/compiler/iml/imlprogram.rs | 2 +- src/compiler/iml/imlvalue.rs | 12 ++++++------ 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index a00bd034..8eb21c0d 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -195,7 +195,7 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { assert!(children.len() <= 2); // Evaluate default parameter - let mut default = ImlValue::Void; + let mut default = ImlValue::Unset; if children.len() == 2 { default = traverse_node_static( @@ -277,7 +277,7 @@ fn traverse_node_value(compiler: &mut Compiler, node: &Dict) -> ImlValue { default.object::().unwrap(), ) } else { - ImlValue::Void + ImlValue::Unset }, ); //println!("{} {} {:?}", emit.to_string(), ident, default); diff --git a/src/compiler/iml/imlparselet.rs b/src/compiler/iml/imlparselet.rs index bcb87367..5490979b 100644 --- a/src/compiler/iml/imlparselet.rs +++ b/src/compiler/iml/imlparselet.rs @@ -85,7 +85,7 @@ impl std::fmt::Display for ImlParseletConfig { if !self.constants.is_empty() { write!(f, "<")?; for (i, (name, value)) in self.constants.iter().enumerate() { - if matches!(value, ImlValue::Void) { + if matches!(value, ImlValue::Unset) { write!(f, "{}{}", if i > 0 { ", " } else { "" }, name)?; } else { write!(f, "{}{}:{}", if i > 0 { ", " } else { "" }, name, value)?; @@ -218,7 +218,7 @@ impl ImlParselet { var_value.0.clone(), // Register default value, if any match &var_value.1 { - ImlValue::Void => None, + ImlValue::Unset => None, value => Some(program.register(value)), }, ) diff --git a/src/compiler/iml/imlprogram.rs b/src/compiler/iml/imlprogram.rs index 610b9d59..386d90c2 100644 --- a/src/compiler/iml/imlprogram.rs +++ b/src/compiler/iml/imlprogram.rs @@ -37,7 +37,7 @@ impl ImlProgram { None => return self.statics.insert_full(value.clone(), None).0, Some(idx) => return idx, }, - ImlValue::This(_) | ImlValue::Void => unreachable!(), + ImlValue::This(_) | ImlValue::Unset => unreachable!(), ImlValue::Local { offset, name, .. } | ImlValue::Global { offset, name, .. } => { self.errors.push(Error::new( offset.clone(), diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index 72f963da..dff84f78 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -20,8 +20,8 @@ still pending. */ #[derive(Debug, Clone, PartialEq, Eq)] pub(in crate::compiler) enum ImlValue { - Void, - Shared(Rc>), // TODO: BAD SOLUTION, HAS TO BE REWORKED TO ImlRefValue OR SO... + Unset, + Shared(Rc>), // Resolved: static Value(RefValue), // Compile-time static value @@ -141,7 +141,7 @@ impl ImlValue { }; // Check integrity of constant names - if let Self::Void = arg.1 { + if let Self::Unset = arg.1 { compiler.errors.push(Error::new( arg.0, format!("Expecting argument for generic '{}'", name), @@ -264,7 +264,7 @@ impl ImlValue { || parselet .signature .iter() - .all(|arg| !matches!(arg.1, Self::Void)) + .all(|arg| !matches!(arg.1, Self::Unset)) } else { true } @@ -350,7 +350,7 @@ impl ImlValue { for (name, value) in &parselet.constants { match value { - ImlValue::Void => required.push(name.to_string()), + ImlValue::Unset => required.push(name.to_string()), _ => {} } } @@ -422,7 +422,7 @@ impl ImlValue { impl std::fmt::Display for ImlValue { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Self::Void => write!(f, "void"), + Self::Unset => write!(f, "void"), Self::Shared(value) => value.borrow().fmt(f), Self::This(true) => write!(f, "Self"), Self::This(false) => write!(f, "self"), From 6db705ef90eb40d2dac377e78befec822d63cf67 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Fri, 20 Oct 2023 00:55:58 +0200 Subject: [PATCH 87/94] Renamed ImlValue::Shared into ImlValue::Unresolved --- src/compiler/compiler.rs | 5 +---- src/compiler/iml/imlparselet.rs | 2 +- src/compiler/iml/imlprogram.rs | 4 ++-- src/compiler/iml/imlvalue.rs | 38 ++++++++++++++------------------- 4 files changed, 20 insertions(+), 29 deletions(-) diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index e19117b7..e501007a 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -110,11 +110,8 @@ impl Compiler { assert!(self.scopes.len() == 1); - // TODO: This is only a short hack to report still unresolved symbols again. - // TODO: ImlValue, and especially ImlValue::Shared is a conceptual problem that generally must be revised. - // println!("self.usages = {:?}", self.usages); for usage in self.usages.drain(..) { - if let ImlValue::Shared(usage) = usage { + if let ImlValue::Unresolved(usage) = usage { let usage = usage.borrow(); if let ImlValue::Name { offset, name } = &*usage { self.errors.push(Error::new( diff --git a/src/compiler/iml/imlparselet.rs b/src/compiler/iml/imlparselet.rs index 5490979b..2d6dbac3 100644 --- a/src/compiler/iml/imlparselet.rs +++ b/src/compiler/iml/imlparselet.rs @@ -131,7 +131,7 @@ impl From for ImlValue { // ImlParselet // ---------------------------------------------------------------------------- -/// Shared ImlParseletConfig +/// Unresolved ImlParseletConfig #[derive(Clone, Eq, PartialEq)] pub(in crate::compiler) struct ImlParselet(Rc>); diff --git a/src/compiler/iml/imlprogram.rs b/src/compiler/iml/imlprogram.rs index 386d90c2..f4d24b74 100644 --- a/src/compiler/iml/imlprogram.rs +++ b/src/compiler/iml/imlprogram.rs @@ -32,7 +32,7 @@ impl ImlProgram { otherwiese the value is cloned and put into the statics table. */ pub fn register(&mut self, value: &ImlValue) -> usize { match value { - ImlValue::Shared(value) => return self.register(&*value.borrow()), + ImlValue::Unresolved(value) => return self.register(&*value.borrow()), ImlValue::Parselet(_) | ImlValue::Value(_) => match self.statics.get_index_of(value) { None => return self.statics.insert_full(value.clone(), None).0, Some(idx) => return idx, @@ -165,7 +165,7 @@ impl ImlProgram { configs: &mut HashMap, ) -> Option { match value { - ImlValue::Shared(value) => { + ImlValue::Unresolved(value) => { finalize_value(&*value.borrow(), current, visited, configs) } ImlValue::This(_) => Some(Consumable { diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index dff84f78..3257635f 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -20,15 +20,11 @@ still pending. */ #[derive(Debug, Clone, PartialEq, Eq)] pub(in crate::compiler) enum ImlValue { - Unset, - Shared(Rc>), - - // Resolved: static - Value(RefValue), // Compile-time static value - Parselet(ImlParselet), // Parselet instance - - // Resolved: dynamic - This(bool), // self-reference function (false) or parselet (true) + Unset, // Unset + Unresolved(Rc>), // Unresolved ImlValues are shared + Value(RefValue), // Static value + Parselet(ImlParselet), // Parselet + This(bool), // self-reference to function (false) or parselet (true) Local { // Runtime local variable offset: Option, // Source offset @@ -41,15 +37,13 @@ pub(in crate::compiler) enum ImlValue { name: String, addr: usize, }, - - // Unresolved - Name { - // Unresolved name + Generic { + // Known generic placeholder offset: Option, // Source offset name: String, // Identifier }, - Generic { - // Unresolved generic + Name { + // Unresolved name offset: Option, // Source offset name: String, // Identifier }, @@ -85,7 +79,7 @@ impl ImlValue { return self; } - let shared = Self::Shared(Rc::new(RefCell::new(self))); + let shared = Self::Unresolved(Rc::new(RefCell::new(self))); compiler.usages.push(shared.clone()); shared } @@ -93,7 +87,7 @@ impl ImlValue { /// Resolve unresolved ImlValue. Returns true in case the provided value is (already) resolved. pub fn resolve(&mut self, compiler: &mut Compiler) -> bool { let resolve = match self { - Self::Shared(value) => return value.borrow_mut().resolve(compiler), + Self::Unresolved(value) => return value.borrow_mut().resolve(compiler), Self::Name { offset, name, .. } => compiler.get(offset.clone(), &name), Self::Instance { offset, @@ -252,7 +246,7 @@ impl ImlValue { /// and when its callable if with or without arguments. pub fn is_callable(&self, without_arguments: bool) -> bool { match self { - Self::Shared(value) => value.borrow().is_callable(without_arguments), + Self::Unresolved(value) => value.borrow().is_callable(without_arguments), Self::This(_) => true, // fixme? Self::Value(value) => value.is_callable(without_arguments), Self::Parselet(parselet) => { @@ -277,7 +271,7 @@ impl ImlValue { /// Check whether intermediate value represents consuming pub fn is_consuming(&self) -> bool { match self { - Self::Shared(value) => value.borrow().is_consuming(), + Self::Unresolved(value) => value.borrow().is_consuming(), Self::This(consuming) => *consuming, Self::Value(value) => value.is_consuming(), Self::Parselet(parselet) => parselet.borrow().model.borrow().consuming, @@ -306,7 +300,7 @@ impl ImlValue { let start = ops.len(); match self { - ImlValue::Shared(value) => { + ImlValue::Unresolved(value) => { return value.borrow().compile(program, current, offset, call, ops) } ImlValue::Value(value) => match &*value.borrow() { @@ -423,7 +417,7 @@ impl std::fmt::Display for ImlValue { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Unset => write!(f, "void"), - Self::Shared(value) => value.borrow().fmt(f), + Self::Unresolved(value) => value.borrow().fmt(f), Self::This(true) => write!(f, "Self"), Self::This(false) => write!(f, "self"), Self::Value(value) => write!(f, "{}", value.repr()), @@ -476,7 +470,7 @@ impl std::fmt::Display for ImlValue { impl std::hash::Hash for ImlValue { fn hash(&self, state: &mut H) { match self { - Self::Shared(value) => value.borrow().hash(state), + Self::Unresolved(value) => value.borrow().hash(state), Self::Value(value) => { state.write_u8('v' as u8); value.hash(state) From 5633b2c80a709ddc347331db8044940b10789165 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Fri, 20 Oct 2023 23:54:51 +0200 Subject: [PATCH 88/94] Removal of dead error reporting --- src/compiler/iml/imlprogram.rs | 7 ++----- src/compiler/iml/imlvalue.rs | 12 ------------ 2 files changed, 2 insertions(+), 17 deletions(-) diff --git a/src/compiler/iml/imlprogram.rs b/src/compiler/iml/imlprogram.rs index f4d24b74..b326ee4a 100644 --- a/src/compiler/iml/imlprogram.rs +++ b/src/compiler/iml/imlprogram.rs @@ -37,20 +37,17 @@ impl ImlProgram { None => return self.statics.insert_full(value.clone(), None).0, Some(idx) => return idx, }, - ImlValue::This(_) | ImlValue::Unset => unreachable!(), ImlValue::Local { offset, name, .. } | ImlValue::Global { offset, name, .. } => { self.errors.push(Error::new( offset.clone(), format!("Variable '{}' used in static context", name), )) } - ImlValue::Name { offset, .. } | ImlValue::Generic { offset, .. } => { + ImlValue::Generic { offset, .. } | ImlValue::Instance { offset, .. } => { self.errors .push(Error::new(offset.clone(), format!("Unresolved {}", value))); } - ImlValue::Instance { offset, .. } => self - .errors - .push(Error::new(offset.clone(), format!("Unresolved {}", value))), + _ => unreachable!(), } 0 diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index 3257635f..119125db 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -321,18 +321,6 @@ impl ImlValue { return current.0.borrow().constants[name] .compile(program, current, offset, call, ops) } - ImlValue::Name { name, .. } => { - program.errors.push(Error::new( - offset.clone(), - if call.is_some() { - format!("Call to unresolved symbol '{}'", name) - } else { - format!("Use of unresolved symbol '{}'", name) - }, - )); - - return; - } ImlValue::This(_) => {} ImlValue::Parselet(parselet) => { let parselet = parselet.borrow(); From 4857a9607408212552b5979a1266fa8453427330 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Sat, 21 Oct 2023 01:26:24 +0200 Subject: [PATCH 89/94] Merge ImlValue::Local/Global into ImlValue::Variable --- src/compiler/ast.rs | 28 ++++++++++++---------------- src/compiler/compiler.rs | 19 ++++++------------- src/compiler/iml/imlprogram.rs | 10 ++++------ src/compiler/iml/imlvalue.rs | 32 ++++++++++++++++++-------------- 4 files changed, 40 insertions(+), 49 deletions(-) diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index 8eb21c0d..948acbc2 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -513,33 +513,29 @@ fn traverse_node_lvalue(compiler: &mut Compiler, node: &Dict, store: bool, hold: 'load: loop { match compiler.get(traverse_node_offset(item), name) { // Known local - Some(ImlValue::Local { addr, .. }) => { + Some(ImlValue::Variable { + addr, is_global, .. + }) => { if store { if hold { - ops.push(Op::StoreFastHold(addr).into()) + if is_global { + ops.push(Op::StoreGlobalHold(addr).into()) + } else { + ops.push(Op::StoreFastHold(addr).into()) + } + } else if is_global { + ops.push(Op::StoreGlobal(addr).into()) } else { ops.push(Op::StoreFast(addr).into()) } + } else if is_global { + ops.push(Op::LoadGlobal(addr).into()) } else { ops.push(Op::LoadFast(addr).into()) } break; } - // Known global - Some(ImlValue::Global { addr, .. }) => { - if store { - if hold { - ops.push(Op::StoreGlobalHold(addr).into()) - } else { - ops.push(Op::StoreGlobal(addr).into()) - } - } else { - ops.push(Op::LoadGlobal(addr).into()) - } - - break; - } // Check for not assigning to a constant (at any level) Some(_) => { compiler.errors.push(Error::new( diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index e501007a..c7bfbfc8 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -495,22 +495,15 @@ impl Compiler { return Some(value.clone()); } - // Check for global variable - if i + 1 == self.scopes.len() { - if let Some(addr) = variables.get(name) { - return Some(ImlValue::Global { - offset, - name: name.to_string(), - addr: *addr, - }); - } - } - // Check for local variable - else if top_parselet { + // Check for variable + let is_global = i + 1 == self.scopes.len(); + + if is_global || top_parselet { if let Some(addr) = variables.get(name) { - return Some(ImlValue::Local { + return Some(ImlValue::Variable { offset, name: name.to_string(), + is_global, addr: *addr, }); } diff --git a/src/compiler/iml/imlprogram.rs b/src/compiler/iml/imlprogram.rs index b326ee4a..5794e3f8 100644 --- a/src/compiler/iml/imlprogram.rs +++ b/src/compiler/iml/imlprogram.rs @@ -37,12 +37,10 @@ impl ImlProgram { None => return self.statics.insert_full(value.clone(), None).0, Some(idx) => return idx, }, - ImlValue::Local { offset, name, .. } | ImlValue::Global { offset, name, .. } => { - self.errors.push(Error::new( - offset.clone(), - format!("Variable '{}' used in static context", name), - )) - } + ImlValue::Variable { offset, name, .. } => self.errors.push(Error::new( + offset.clone(), + format!("Variable '{}' used in static context", name), + )), ImlValue::Generic { offset, .. } | ImlValue::Instance { offset, .. } => { self.errors .push(Error::new(offset.clone(), format!("Unresolved {}", value))); diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index 119125db..2301abce 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -25,17 +25,12 @@ pub(in crate::compiler) enum ImlValue { Value(RefValue), // Static value Parselet(ImlParselet), // Parselet This(bool), // self-reference to function (false) or parselet (true) - Local { - // Runtime local variable + Variable { + // Runtime variable offset: Option, // Source offset - name: String, - addr: usize, - }, - Global { - // Runtime global variable - offset: Option, // Source offset - name: String, - addr: usize, + name: String, // Name + is_global: bool, // Global + addr: usize, // Address }, Generic { // Known generic placeholder @@ -315,8 +310,15 @@ impl ImlValue { }, _ => {} }, - ImlValue::Local { addr, .. } => ops.push(Op::LoadFast(*addr)), - ImlValue::Global { addr, .. } => ops.push(Op::LoadGlobal(*addr)), + ImlValue::Variable { + addr, is_global, .. + } => { + if *is_global { + ops.push(Op::LoadGlobal(*addr)) + } else { + ops.push(Op::LoadFast(*addr)) + } + } ImlValue::Generic { name, .. } => { return current.0.borrow().constants[name] .compile(program, current, offset, call, ops) @@ -418,8 +420,10 @@ impl std::fmt::Display for ImlValue { .as_deref() .unwrap_or("") ), - Self::Local { name, .. } => write!(f, "local '{}'", name), - Self::Global { name, .. } => write!(f, "global '{}'", name), + Self::Variable { + name, is_global, .. + } if *is_global => write!(f, "global '{}'", name), + Self::Variable { name, .. } => write!(f, "local '{}'", name), Self::Name { name, .. } => write!(f, "name '{}'", name), Self::Generic { name, .. } => write!(f, "generic '{}'", name), Self::Instance { From 6bba5d27902fe7fb4452b02825b6b3f9d733e561 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Sat, 21 Oct 2023 10:36:27 +0200 Subject: [PATCH 90/94] Some renamings and docstring fixes --- ROADMAP.md | 3 ++- src/compiler/ast.rs | 6 +++--- src/compiler/iml/imlop.rs | 8 ++++---- src/compiler/iml/imlparselet.rs | 4 ++-- src/compiler/iml/imlvalue.rs | 9 +++++---- 5 files changed, 16 insertions(+), 14 deletions(-) diff --git a/ROADMAP.md b/ROADMAP.md index 3b4f26c4..1d701e2f 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -5,7 +5,8 @@ This document describes upcoming changes to achieve with a specific version. ## 0.7 - [x] Implement iterators and `for...in`-syntax (#101) -- [ ] Implement generic parselets (#10, #105) +- [x] Implement generic parselets (#10, #105) +- [ ] Implement embedded parselets (#120) - [ ] New list syntax `[...]`, redefining sequence/`dict` syntax (#100) - The character-class token syntax was replaced by a `Char`-builtin - List definition `list = []` diff --git a/src/compiler/ast.rs b/src/compiler/ast.rs index 948acbc2..b23c1603 100644 --- a/src/compiler/ast.rs +++ b/src/compiler/ast.rs @@ -1524,7 +1524,7 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { compiler.push_temp(temp); // Give temp variable back for possible reuse. ImlOp::Loop { - iterator: true, + use_iterator: true, initial: Box::new(initial), condition: Box::new(condition), body: Box::new(body), @@ -1541,7 +1541,7 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { let body = &children[0].borrow(); ImlOp::Loop { - iterator: false, + use_iterator: false, initial: Box::new(ImlOp::Nop), condition: Box::new(ImlOp::Nop), body: Box::new(traverse_node_rvalue( @@ -1555,7 +1555,7 @@ fn traverse_node(compiler: &mut Compiler, node: &Dict) -> ImlOp { let (condition, body) = (&children[0].borrow(), &children[1].borrow()); ImlOp::Loop { - iterator: false, + use_iterator: false, initial: Box::new(ImlOp::Nop), condition: Box::new(traverse_node_rvalue( compiler, diff --git a/src/compiler/iml/imlop.rs b/src/compiler/iml/imlop.rs index 59f004b6..61e741c2 100644 --- a/src/compiler/iml/imlop.rs +++ b/src/compiler/iml/imlop.rs @@ -44,7 +44,7 @@ pub(in crate::compiler) enum ImlOp { // Loop construct Loop { - iterator: bool, // Test condition either for void (=true) or bool (=false) + use_iterator: bool, // Test condition either for void (=true) or bool (=false) initial: Box, // Initialization condition: Box, // Abort condition body: Box, // Iterating body @@ -74,7 +74,7 @@ impl ImlOp { } } - /// Load value + /// Load value; This is only a shortcut for creating an ImlOp::Load{} pub fn load(_compiler: &mut Compiler, offset: Option, target: ImlValue) -> ImlOp { ImlOp::Load { offset, target } } @@ -273,7 +273,7 @@ impl ImlOp { } } ImlOp::Loop { - iterator, + use_iterator, initial, condition, body, @@ -284,7 +284,7 @@ impl ImlOp { initial.compile(program, current, ops); if condition.compile(program, current, &mut repeat) > 0 { - if *iterator { + if *use_iterator { repeat.push(Op::ForwardIfNotVoid(2)); } else { repeat.push(Op::ForwardIfTrue(2)); diff --git a/src/compiler/iml/imlparselet.rs b/src/compiler/iml/imlparselet.rs index 2d6dbac3..ee8b3cf4 100644 --- a/src/compiler/iml/imlparselet.rs +++ b/src/compiler/iml/imlparselet.rs @@ -160,8 +160,8 @@ impl ImlParselet { B<'m'> B<'n'> # parselet instances, construct the final parselets: B<'m'>, A<'m'>, B<'n'> A<'n'> ``` - The function either returns a derived parselet in case it was derive, - otherwise it returns a clone of self. + The function either returns a derived parselet in case it was derived, otherwise it returns + a clone of itself. */ pub fn derive(&self, from: &ImlParselet) -> Self { let mut constants = self.borrow().constants.clone(); diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index 2301abce..f126e19b 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -12,11 +12,12 @@ use std::rc::Rc; /** Intermediate value -Intermediate values are values that result during the compile process based on current information -from the syntax tree and symbol table information. +Intermediate values are value descriptors that result during the compile process based on current +information from the syntax tree and symbol table information.. -These can be memory locations of variables, static values, functions or values whose definition is -still pending. +These can be memory locations of variables, static values, parselets or values whose definition is +still pending. As some intermediate values consist of other intermediate values, they are being +modified and resolved during the compilation process. */ #[derive(Debug, Clone, PartialEq, Eq)] pub(in crate::compiler) enum ImlValue { From 755aaed809a66f59dd592b4197a9d3023dcbd48d Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Sat, 21 Oct 2023 11:55:04 +0200 Subject: [PATCH 91/94] Renamings and restructuring - Renamed `ImlParseletConfig` into `ImlParseletInstance` - Removed `ImlParselet.is_generic()` - Moved required generic argument checking into `ImlParselet.derive()` --- src/compiler/compiler.rs | 2 +- src/compiler/iml/imlparselet.rs | 112 +++++++++++++++++--------------- src/compiler/iml/imlprogram.rs | 6 +- src/compiler/iml/imlvalue.rs | 51 ++++----------- 4 files changed, 75 insertions(+), 96 deletions(-) diff --git a/src/compiler/compiler.rs b/src/compiler/compiler.rs index c7bfbfc8..d8b964f5 100644 --- a/src/compiler/compiler.rs +++ b/src/compiler/compiler.rs @@ -328,7 +328,7 @@ impl Compiler { self.scopes.push(scope); } - ImlValue::from(ImlParseletConfig::new( + ImlValue::from(ImlParseletInstance::new( model, constants, offset, diff --git a/src/compiler/iml/imlparselet.rs b/src/compiler/iml/imlparselet.rs index ee8b3cf4..2c11c936 100644 --- a/src/compiler/iml/imlparselet.rs +++ b/src/compiler/iml/imlparselet.rs @@ -29,7 +29,7 @@ impl ImlParseletModel { } } -// ImlParseletConfig +// ImlParseletInstance // ---------------------------------------------------------------------------- /** Intermediate parselet configuration. @@ -40,7 +40,7 @@ before a parselet configuration is turned into a parselet. */ #[allow(dead_code)] #[derive(Debug)] -pub(in crate::compiler) struct ImlParseletConfig { +pub(in crate::compiler) struct ImlParseletInstance { pub model: Rc>, // Parselet base model pub constants: IndexMap, // Generic signature with default configuration pub offset: Option, // Offset of definition @@ -49,8 +49,8 @@ pub(in crate::compiler) struct ImlParseletConfig { pub generated: bool, } -/** Representation of parselet in intermediate code. */ -impl ImlParseletConfig { +/** Representation of parselet instance in intermediate code. */ +impl ImlParseletInstance { pub fn new( model: ImlParseletModel, constants: IndexMap, @@ -70,11 +70,11 @@ impl ImlParseletConfig { } pub fn id(&self) -> usize { - self as *const ImlParseletConfig as usize + self as *const ImlParseletInstance as usize } } -impl std::fmt::Display for ImlParseletConfig { +impl std::fmt::Display for ImlParseletInstance { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, @@ -98,16 +98,16 @@ impl std::fmt::Display for ImlParseletConfig { } } -impl std::cmp::PartialEq for ImlParseletConfig { +impl std::cmp::PartialEq for ImlParseletInstance { // It satisfies to just compare the parselet's memory address for equality fn eq(&self, other: &Self) -> bool { self.model.borrow().id() == other.model.borrow().id() && self.constants == other.constants } } -impl Eq for ImlParseletConfig {} +impl Eq for ImlParseletInstance {} -impl std::hash::Hash for ImlParseletConfig { +impl std::hash::Hash for ImlParseletInstance { fn hash(&self, state: &mut H) { let model = &*self.model.borrow(); (model as *const ImlParseletModel as usize).hash(state); @@ -115,15 +115,15 @@ impl std::hash::Hash for ImlParseletConfig { } } -impl std::cmp::PartialOrd for ImlParseletConfig { +impl std::cmp::PartialOrd for ImlParseletInstance { // It satisfies to just compare the parselet's memory address for equality fn partial_cmp(&self, other: &Self) -> Option { self.id().partial_cmp(&other.id()) } } -impl From for ImlValue { - fn from(parselet: ImlParseletConfig) -> Self { +impl From for ImlValue { + fn from(parselet: ImlParseletInstance) -> Self { ImlValue::Parselet(ImlParselet::new(parselet)) } } @@ -131,21 +131,17 @@ impl From for ImlValue { // ImlParselet // ---------------------------------------------------------------------------- -/// Unresolved ImlParseletConfig +/// Shared ImlParseletInstance #[derive(Clone, Eq, PartialEq)] -pub(in crate::compiler) struct ImlParselet(Rc>); +pub(in crate::compiler) struct ImlParselet { + instance: Rc>, +} impl ImlParselet { - pub fn new(parselet: ImlParseletConfig) -> Self { - Self(Rc::new(RefCell::new(parselet))) - } - - /// Checks if an intermediate parselet is completely resolved, or if it has open generics - pub fn is_generic(&self) -> bool { - self.borrow() - .constants - .values() - .any(|value| matches!(value, ImlValue::Generic { .. } | ImlValue::This(_))) + pub fn new(parselet: ImlParseletInstance) -> Self { + Self { + instance: Rc::new(RefCell::new(parselet)), + } } /** Derives an intermediate parselet by another intermediate parselet (`from`). @@ -163,52 +159,64 @@ impl ImlParselet { The function either returns a derived parselet in case it was derived, otherwise it returns a clone of itself. */ - pub fn derive(&self, from: &ImlParselet) -> Self { - let mut constants = self.borrow().constants.clone(); + pub fn derive(&self, from: &ImlParselet) -> Result { + let instance = self.instance.borrow(); + let mut constants = instance.constants.clone(); let mut changes = false; + let mut required = Vec::new(); - for value in constants.values_mut() { + for (name, value) in constants.iter_mut() { // Replace any generics until no more are open while let ImlValue::Generic { name, .. } = value { *value = from.borrow().constants.get(name).unwrap().clone(); changes = true; } - // Replace any values of self - if let ImlValue::This(_) = value { - *value = ImlValue::Parselet(from.clone()); - changes = true; + match value { + ImlValue::This(_) => { + // Replace any references of self + *value = ImlValue::Parselet(from.clone()); + changes = true; + } + ImlValue::Unset => required.push(name.to_string()), + _ => {} } } + // Check for accepted constant configuration + if !required.is_empty() { + return Err(format!( + "{} requires assignment of generic argument {}", + instance.name.as_deref().unwrap_or(""), + required.join(", ") + )); + } + // When there is no change, there is no derivation if !changes { - return self.clone(); + return Ok(self.clone()); } - // Create derivation of the inner parselet - let parselet = self.borrow(); - - Self::new(ImlParseletConfig { - model: parselet.model.clone(), + Ok(Self::new(ImlParseletInstance { + model: instance.model.clone(), constants, - offset: parselet.offset.clone(), - name: parselet.name.clone(), - severity: parselet.severity, - generated: parselet.generated, - }) + offset: instance.offset.clone(), + name: instance.name.clone(), + severity: instance.severity, + generated: instance.generated, + })) } /** Compiles an intermediate parselet into a compiled VM parselet, which is part of the provided `program` and indexed by `this`. */ pub fn compile(&self, program: &mut ImlProgram, this: usize) -> Parselet { - let parselet = self.borrow(); - let model = parselet.model.borrow(); + let instance = self.instance.borrow(); + let model = instance.model.borrow(); Parselet::new( - Some(format!("{}", parselet)), + Some(format!("{}", instance)), None, - parselet.severity, + instance.severity, model .signature .iter() @@ -234,13 +242,13 @@ impl ImlParselet { impl std::hash::Hash for ImlParselet { fn hash(&self, state: &mut H) { - self.borrow().hash(state); + self.instance.borrow().hash(state); } } impl std::fmt::Debug for ImlParselet { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.0.borrow()) + write!(f, "{}", self.instance.borrow()) // Avoid endless recursion in case of recursive parselets /* if self.0.try_borrow_mut().is_ok() { @@ -254,20 +262,20 @@ impl std::fmt::Debug for ImlParselet { impl std::fmt::Display for ImlParselet { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.0.borrow()) + write!(f, "{}", self.instance.borrow()) } } impl std::ops::Deref for ImlParselet { - type Target = Rc>; + type Target = Rc>; fn deref(&self) -> &Self::Target { - &self.0 + &self.instance } } impl std::ops::DerefMut for ImlParselet { fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.0 + &mut self.instance } } diff --git a/src/compiler/iml/imlprogram.rs b/src/compiler/iml/imlprogram.rs index 5794e3f8..732e813c 100644 --- a/src/compiler/iml/imlprogram.rs +++ b/src/compiler/iml/imlprogram.rs @@ -1,4 +1,4 @@ -//! ImlProgram glues ImlParseletConfig, ImlOp and ImlValue together to produce a VM program. +//! ImlProgram glues ImlParselet, ImlOp and ImlValue together to produce a VM program. use super::*; use crate::value::Parselet; @@ -169,7 +169,7 @@ impl ImlProgram { }), ImlValue::Parselet(parselet) => { // Try to derive the parselet with current constants - let derived = parselet.derive(current); + let derived = parselet.derive(current).unwrap(); // The derived parselet's original must be in the configs! let parselet = configs.get_key_value(&derived).unwrap().0.clone(); @@ -293,7 +293,7 @@ impl ImlProgram { } } - // Finalize ImlParseletConfig + // Finalize ImlParselet fn finalize_parselet( current: &ImlParselet, visited: &mut IndexSet, diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index f126e19b..b652ced4 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -196,7 +196,7 @@ impl ImlValue { // This can be the final parselet definition, but constants // might contain generic references as well, which are being // resolved during compilation. - let derivation = ImlValue::from(ImlParseletConfig { + let derivation = ImlValue::from(ImlParseletInstance { model: parselet.model.clone(), constants, offset: parselet.offset.clone(), @@ -296,6 +296,7 @@ impl ImlValue { let start = ops.len(); match self { + ImlValue::Unset => return, ImlValue::Unresolved(value) => { return value.borrow().compile(program, current, offset, call, ops) } @@ -324,35 +325,7 @@ impl ImlValue { return current.0.borrow().constants[name] .compile(program, current, offset, call, ops) } - ImlValue::This(_) => {} - ImlValue::Parselet(parselet) => { - let parselet = parselet.borrow(); - - // Check for accepted constant configuration; - // This has to be checked here, because a parselet is not always the result - // of an ImlValue::Instance, and therefore this can only be checked up here. - let mut required = Vec::new(); - - for (name, value) in &parselet.constants { - match value { - ImlValue::Unset => required.push(name.to_string()), - _ => {} - } - } - - if !required.is_empty() { - program.errors.push(Error::new( - offset.clone(), - format!( - "{} requires assignment of generic argument {}", - self, - required.join(", ") - ), - )); - - return; - } - } + ImlValue::This(_) | ImlValue::Parselet(_) => {} _ => unreachable!("{}", self), } @@ -360,16 +333,13 @@ impl ImlValue { if start == ops.len() { let idx = match self { ImlValue::This(_) => current.1, // use current index - ImlValue::Parselet(parselet) => { - if parselet.is_generic() { - // Otherwise, this is a generic, so create a derivation - let derive = ImlValue::Parselet(parselet.derive(current.0)); - program.register(&derive) - } else { - // If target is resolved, just register - program.register(self) + ImlValue::Parselet(parselet) => match parselet.derive(current.0) { + Ok(parselet) => program.register(&ImlValue::Parselet(parselet)), + Err(msg) => { + program.errors.push(Error::new(offset.clone(), msg)); + return; } - } + }, resolved => program.register(resolved), }; @@ -407,7 +377,7 @@ impl ImlValue { impl std::fmt::Display for ImlValue { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Self::Unset => write!(f, "void"), + Self::Unset => write!(f, "unset"), Self::Unresolved(value) => value.borrow().fmt(f), Self::This(true) => write!(f, "Self"), Self::This(false) => write!(f, "self"), @@ -463,6 +433,7 @@ impl std::fmt::Display for ImlValue { impl std::hash::Hash for ImlValue { fn hash(&self, state: &mut H) { match self { + Self::Unset => state.write_u8('u' as u8), Self::Unresolved(value) => value.borrow().hash(state), Self::Value(value) => { state.write_u8('v' as u8); From d4388401d015074edfb6ba6ce56fa0360e5ca349 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Sat, 21 Oct 2023 23:40:56 +0200 Subject: [PATCH 92/94] Updating docstrings and adding tests --- g2.tok | 10 ------- g3.tok | 11 ------- src/compiler/iml/imlvalue.rs | 35 ++++++++++++++++------ tests/parselet_generic_in_generic.tok | 14 +++++++++ g.tok => tests/parselet_generic_simple.tok | 16 ++++++++-- 5 files changed, 53 insertions(+), 33 deletions(-) delete mode 100644 g2.tok delete mode 100644 g3.tok create mode 100644 tests/parselet_generic_in_generic.tok rename g.tok => tests/parselet_generic_simple.tok (55%) diff --git a/g2.tok b/g2.tok deleted file mode 100644 index e05e42e3..00000000 --- a/g2.tok +++ /dev/null @@ -1,10 +0,0 @@ -# Simple generic parselet T, serving as a template. -T: @

{ - P 'x' P print($0) -} - -# The final parselets are instanciated by its usage. -Parse_axa: T<'a'> -Parse_bxb: T<'b'> - -Parse_axa Parse_axa Parse_bxb print("matched!") diff --git a/g3.tok b/g3.tok deleted file mode 100644 index 52b57cc2..00000000 --- a/g3.tok +++ /dev/null @@ -1,11 +0,0 @@ -# cargo run -- -e g3.tok -- xxbbxbbxxbb - -X : @ { - A A -} - -Y : @ { - X -} - -Y<'x'> Y<'b'> diff --git a/src/compiler/iml/imlvalue.rs b/src/compiler/iml/imlvalue.rs index b652ced4..c8fa9069 100644 --- a/src/compiler/iml/imlvalue.rs +++ b/src/compiler/iml/imlvalue.rs @@ -27,7 +27,7 @@ pub(in crate::compiler) enum ImlValue { Parselet(ImlParselet), // Parselet This(bool), // self-reference to function (false) or parselet (true) Variable { - // Runtime variable + // Resolved variable offset: Option, // Source offset name: String, // Name is_global: bool, // Global @@ -55,6 +55,13 @@ pub(in crate::compiler) enum ImlValue { } impl ImlValue { + /** + Helper function, which creates an instance definition from self, + by turning self into name. + + This is used internally to implement `Kle

` from `P*` syntax + during the AST traversal. + */ pub fn into_generic(self, name: &str, severity: Option) -> Self { Self::Instance { offset: None, @@ -80,7 +87,14 @@ impl ImlValue { shared } - /// Resolve unresolved ImlValue. Returns true in case the provided value is (already) resolved. + /** + In-place resolve unresolved ImlValue. + + - ImlValue::Name are being resolved by the compiler's symbol table + - ImlValue::Instance are being recursively resolved to produce an ImlValue::Parselet + + Returns true in case the provided value is (already) resolved. + */ pub fn resolve(&mut self, compiler: &mut Compiler) -> bool { let resolve = match self { Self::Unresolved(value) => return value.borrow_mut().resolve(compiler), @@ -192,11 +206,11 @@ impl ImlValue { } } - // Make a parselet derivation from the instance definition; - // This can be the final parselet definition, but constants + // Make a parselet instance from the instance definition; + // This can be the final parselet instance, but constants // might contain generic references as well, which are being - // resolved during compilation. - let derivation = ImlValue::from(ImlParseletInstance { + // resolved during further compilation and derivation. + let instance = ImlValue::from(ImlParseletInstance { model: parselet.model.clone(), constants, offset: parselet.offset.clone(), @@ -205,7 +219,7 @@ impl ImlValue { generated: *generated, }); - Some(derivation) + Some(instance) } target => { compiler.errors.push(Error::new( @@ -230,7 +244,7 @@ impl ImlValue { false } - /// Turn ImlValue into RefValue + /// Conert ImlValue into RefValue pub fn unwrap(self) -> RefValue { match self { Self::Value(value) => value, @@ -279,7 +293,10 @@ impl ImlValue { } } - /// Compile a resolved intermediate value into VM code or register it as a static + /** Compile a resolved intermediate value into VM code or register it as a static. + + The function will panic when the value is not resolved. + */ pub fn compile( &self, program: &mut ImlProgram, diff --git a/tests/parselet_generic_in_generic.tok b/tests/parselet_generic_in_generic.tok new file mode 100644 index 00000000..775e400c --- /dev/null +++ b/tests/parselet_generic_in_generic.tok @@ -0,0 +1,14 @@ +X : @ { + A A +} + +Y : @ { + X +} + +Y<''x''> Y<''b''> + +#--- +#xxbbxbbxxbb +#--- +#((("x", "x"), ("b", "b")), (("x", "x"), ("b", "b"))) diff --git a/g.tok b/tests/parselet_generic_simple.tok similarity index 55% rename from g.tok rename to tests/parselet_generic_simple.tok index 0655c92a..4375910b 100644 --- a/g.tok +++ b/tests/parselet_generic_simple.tok @@ -5,10 +5,9 @@ T: @ { #T print("Start") T<'a'> -T<'b'> -T<'a'> +T<'b', "jeppie"> '\n' -Char print("Nee" $1) +Char print("Other" $1) #T<'a', "yo"> #T #T @@ -16,3 +15,14 @@ Char print("Nee" $1) #T #T<1,2,3> #T<'x'>("y") +#--- +#axaxaxabxb +#--- +#Start +#yeah axa +#Start +#Other x +#Start +#yeah axa +#Start +#jeppie bxb From d4ba7139bea392bf3ca6b738bedc82b52d083d30 Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Sun, 22 Oct 2023 10:40:03 +0200 Subject: [PATCH 93/94] Fixing `Expect` to report correct offset --- CHANGELOG.md | 12 ++++ minicalc.tok => examples/minicalc.tok | 1 + src/compiler/prelude.rs | 80 ++++++++++++++++----------- src/prelude.tok | 2 +- 4 files changed, 61 insertions(+), 34 deletions(-) rename minicalc.tok => examples/minicalc.tok (73%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 146fe406..3b758aae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,18 @@ Current main branch. +- Implementation of generic parselets + - Syntax changes + - Handle parselet instances + - Handle generic values in intermediate structures + - System-defined parselets in prelude + - `Repeat` + - Implement `Pos

`, `Opt

`, `Kle

` + - Compile `P+` into `Pos

`, `P?` into `Opt

`, `P*` into `Kle

` + - `List` + - `Peek

`, replaced `peek P` by `Peek

` + - `Not

`, replaced `not P` by `Not

` + - `Expect

msg=void`, replaced `expect P` by `Expect

` - v0.6.4: - Main parselet operates on multiple inputs (Readers) now - Restructuring parts of VM, `Runtime` renamed into `Thread` diff --git a/minicalc.tok b/examples/minicalc.tok similarity index 73% rename from minicalc.tok rename to examples/minicalc.tok index 55c1b8f3..80c95734 100644 --- a/minicalc.tok +++ b/examples/minicalc.tok @@ -4,6 +4,7 @@ Main : @{ } Expr : @{ + Expr '+' Int $1 + $3 Int } diff --git a/src/compiler/prelude.rs b/src/compiler/prelude.rs index 2c38e300..e08f527c 100644 --- a/src/compiler/prelude.rs +++ b/src/compiler/prelude.rs @@ -51,7 +51,7 @@ impl Compiler { ])), (value!([ "emit" => "op_reject", - "value" => "reject" + "value" => void ])) ])) ])), @@ -98,7 +98,7 @@ impl Compiler { ])), (value!([ "emit" => "op_reset", - "value" => "reset" + "value" => void ])) ])) ])) @@ -137,7 +137,7 @@ impl Compiler { ])), (value!([ "emit" => "value_void", - "value" => "void" + "value" => void ])) ])) ])), @@ -216,31 +216,45 @@ impl Compiler { "emit" => "callarg", "children" => (value!([ - "emit" => "block", + "emit" => "value_generic", "children" => (value!([ (value!([ - "emit" => "inline_sequence", - "children" => - (value!([ - "emit" => "identifier", - "value" => "Token" - ])) - ])), - (value!([ - "emit" => "inline_sequence", - "children" => - (value!([ - "emit" => "value_token_any", - "value" => "Char" - ])) + "emit" => "identifier", + "value" => "Peek" ])), (value!([ - "emit" => "inline_sequence", + "emit" => "genarg", "children" => (value!([ - "emit" => "value_string", - "value" => "end-of-file" + "emit" => "block", + "children" => + (value!([ + (value!([ + "emit" => "inline_sequence", + "children" => + (value!([ + "emit" => "value_token_any", + "value" => void + ])) + ])), + (value!([ + "emit" => "inline_sequence", + "children" => + (value!([ + "emit" => "identifier", + "value" => "Token" + ])) + ])), + (value!([ + "emit" => "inline_sequence", + "children" => + (value!([ + "emit" => "value_string", + "value" => "end-of-file" + ])) + ])) + ])) ])) ])) ])) @@ -305,7 +319,7 @@ impl Compiler { ])), (value!([ "emit" => "value_void", - "value" => "void" + "value" => void ])) ])) ])), @@ -319,7 +333,7 @@ impl Compiler { ])), (value!([ "emit" => "value_true", - "value" => "true" + "value" => void ])) ])) ])), @@ -448,7 +462,7 @@ impl Compiler { ])), (value!([ "emit" => "op_break", - "value" => "break" + "value" => void ])) ])) ])) @@ -494,13 +508,13 @@ impl Compiler { ])), (value!([ "emit" => "op_reject", - "value" => "reject" + "value" => void ])) ])) ])), (value!([ "emit" => "op_break", - "value" => "break" + "value" => void ])) ])) ])) @@ -560,7 +574,7 @@ impl Compiler { "children" => (value!([ "emit" => "value_void", - "value" => "void" + "value" => void ])) ])) ])), @@ -673,7 +687,7 @@ impl Compiler { ])), (value!([ "emit" => "value_true", - "value" => "true" + "value" => void ])) ])) ])), @@ -747,7 +761,7 @@ impl Compiler { ])), (value!([ "emit" => "value_true", - "value" => "true" + "value" => void ])) ])) ])), @@ -806,7 +820,7 @@ impl Compiler { ])), (value!([ "emit" => "value_void", - "value" => "void" + "value" => void ])) ])) ])) @@ -919,7 +933,7 @@ impl Compiler { ])), (value!([ "emit" => "value_true", - "value" => "true" + "value" => void ])) ])) ])), @@ -933,7 +947,7 @@ impl Compiler { (value!([ (value!([ "emit" => "value_token_self", - "value" => "Self" + "value" => void ])), (value!([ "emit" => "identifier", @@ -981,7 +995,7 @@ impl Compiler { (value!([ (value!([ "emit" => "value_token_self", - "value" => "Self" + "value" => void ])), (value!([ "emit" => "identifier", diff --git a/src/prelude.tok b/src/prelude.tok index 8ae9f25f..f721b873 100644 --- a/src/prelude.tok +++ b/src/prelude.tok @@ -11,7 +11,7 @@ Peek : @

{ Expect : @

msg=void { accept P - error(msg || "Expecting " + *P + ", but got " + repr((Token | Char | "end-of-file"))) + error(msg || "Expecting " + *P + ", but got " + repr(Peek<(Char | Token | "end-of-file")>)) } Repeat : @< From 03bdb0eb94730fcf5bd17a8ece474b98352d001b Mon Sep 17 00:00:00 2001 From: Jan Max Meyer Date: Sun, 22 Oct 2023 20:17:31 +0200 Subject: [PATCH 94/94] Documentation for prelude.tok and fixing bug #123 --- src/compiler/prelude.rs | 72 +++++++++++++++++++++++------------------ src/prelude.tok | 45 +++++++++++++++++++++++--- 2 files changed, 81 insertions(+), 36 deletions(-) diff --git a/src/compiler/prelude.rs b/src/compiler/prelude.rs index e08f527c..3ef248ac 100644 --- a/src/compiler/prelude.rs +++ b/src/compiler/prelude.rs @@ -359,6 +359,24 @@ impl Compiler { ])) ])) ])), + (value!([ + "emit" => "assign", + "children" => + (value!([ + (value!([ + "emit" => "lvalue", + "children" => + (value!([ + "emit" => "identifier", + "value" => "cnt" + ])) + ])), + (value!([ + "emit" => "value_integer", + "value" => 0 + ])) + ])) + ])), (value!([ "emit" => "op_loop", "children" => @@ -414,6 +432,24 @@ impl Compiler { ])) ])) ])), + (value!([ + "emit" => "assign_add", + "children" => + (value!([ + (value!([ + "emit" => "lvalue", + "children" => + (value!([ + "emit" => "identifier", + "value" => "cnt" + ])) + ])), + (value!([ + "emit" => "value_integer", + "value" => 1 + ])) + ])) + ])), (value!([ "emit" => "op_if", "children" => @@ -431,22 +467,8 @@ impl Compiler { "children" => (value!([ (value!([ - "emit" => "rvalue", - "children" => - (value!([ - (value!([ - "emit" => "identifier", - "value" => "res" - ])), - (value!([ - "emit" => "attribute", - "children" => - (value!([ - "emit" => "value_string", - "value" => "len" - ])) - ])) - ])) + "emit" => "identifier", + "value" => "cnt" ])), (value!([ "emit" => "cmp_eq", @@ -479,22 +501,8 @@ impl Compiler { "children" => (value!([ (value!([ - "emit" => "rvalue", - "children" => - (value!([ - (value!([ - "emit" => "identifier", - "value" => "res" - ])), - (value!([ - "emit" => "attribute", - "children" => - (value!([ - "emit" => "value_string", - "value" => "len" - ])) - ])) - ])) + "emit" => "identifier", + "value" => "cnt" ])), (value!([ "emit" => "cmp_lt", diff --git a/src/prelude.tok b/src/prelude.tok index f721b873..a02730c8 100644 --- a/src/prelude.tok +++ b/src/prelude.tok @@ -1,19 +1,42 @@ # Tokay default prelude +# The `Not`-builtin runs its parser and returns its negated result, +# so that `P` occurence becomes rejected. +# +# Everything else becomes accepted (Void). Not : @

{ P reject Void } +# The `Peek`-builtin runs `P` and returns its result, but resets the reading-context afterwards. +# +# It can be used to look ahead parsing constructs, but leaving the rest of the parser back to +# its original position, to decide. +# +# Due to Tokays memorizing features, the parsing will only be done once, and is remembered. Peek : @

{ P reset } +# The `Expect`-builtin either expects `P`, and otherwise raises a syntax error, +# reporting a useful parse error message. Expect : @

msg=void { accept P error(msg || "Expecting " + *P + ", but got " + repr(Peek<(Char | Token | "end-of-file")>)) } +# This is a simple programmatic sequential repetition of `P`. +# +# It allows to specify a minimum (`min`) and maximum (`max`) count of allowed repetitions +# Blurrying (`blur`) means, to turn empty list into void, and return single-item lists as the +# item, which was the default with the built-in repeat construct in Tokay < 0.7 +# +# For several reasons, repetitions can also be expressed on a specialized token-level or by +# the grammar itself using left- and right-recursive structures, resulting in left- or +# right-leaning parse trees. +# +# Used by the `Pos

` and `Kle

` modifiers. Repeat : @< P, # Parselet min: 1, # minimum occurence @@ -21,14 +44,16 @@ Repeat : @< blur: true # result blurrying; empty list becomes void, one-item list becomes item > { res = () + cnt = 0 loop { P { res.push($1) - if max && res.len == max break + cnt += 1 + if max && cnt == max break } - if res.len < min reject + if cnt < min reject break } @@ -44,17 +69,29 @@ Repeat : @< res } +# Repeats `P` one or multiple times. Pos : @{ Repeat } + +# Repeats `P` none or multiple times. Kle : @{ Repeat || void } + +# Optionally accepts `P` or nothing. Opt : @

{ P | Void } +# Implements a recursive, separated list. +# +# `P` is parsed as part of a list, which is separated by `Separator`. +# `Separator` defaults to `(',' _)`. +# +# The `empty` config allows for a trailing `Separator` with no `P`. List : @ { Self Separator P $1 + $3 if empty (Self Separator) # allows for trailing Separator P ($1, ) } +# Parses any number, either Float or Int. Number : Float | Int -Token : Word | Number | AsciiPunctuation -# print("=> ", XKle<'a'>) +# Parse any token, which is just no whitespace in default terms. +Token : Word | Number | AsciiPunctuation