Skip to content
This repository has been archived by the owner on Jan 6, 2025. It is now read-only.

Commit

Permalink
Merge pull request #3 from MiniaczQ/dev
Browse files Browse the repository at this point in the history
Lexer update
  • Loading branch information
MiniaczQ authored May 13, 2022
2 parents cc01c9b + a6391c0 commit cffe02a
Show file tree
Hide file tree
Showing 13 changed files with 469 additions and 247 deletions.
2 changes: 1 addition & 1 deletion src/lexer/char_scanner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ impl Scannable<char> for CharScanner {
}

// Returns the current character
fn peek(&self) -> char {
fn curr(&self) -> char {
self.current
}
}
81 changes: 68 additions & 13 deletions src/lexer/lexem.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::fmt::Display;
use std::{error::Error, fmt::Display};

use crate::{
lexer::{keywords::Keyword, operators::Operator},
Expand Down Expand Up @@ -61,22 +61,68 @@ impl Lexem {
pub struct LexemBuilder<'a> {
scanner: &'a mut CharScanner,
start: Position,
errors: &'a mut Vec<LexemError>,
}

impl<'a> LexemBuilder<'a> {
pub fn new(scanner: &'a mut CharScanner) -> Self {
let start = (&*scanner).last_pos();
Self { scanner, start }
#[derive(Debug, PartialEq, Eq)]
pub enum LexemErrorVariant {
CommentNeverEnds,
CommentTooLong,
StringNeverEnds,
StringTooLong,
IntegerPartTooBig,
DecimalPartTooBig,
IdentifierTooLong,
InvalidEscapeCharacter(char),
InvalidSequence(String),
}

impl Display for LexemErrorVariant {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
LexemErrorVariant::CommentNeverEnds => f.write_str("comment never ends"),
LexemErrorVariant::CommentTooLong => f.write_str("comment too long"),
LexemErrorVariant::StringNeverEnds => f.write_str("string never ends"),
LexemErrorVariant::StringTooLong => f.write_str("string too long"),
LexemErrorVariant::IntegerPartTooBig => f.write_str("integer part too big"),
LexemErrorVariant::DecimalPartTooBig => f.write_str("decimal part too big"),
LexemErrorVariant::IdentifierTooLong => f.write_str("identifier too long"),
LexemErrorVariant::InvalidEscapeCharacter(c) => {
f.write_fmt(format_args!("invalid escape character `\\{}`", c))
}
LexemErrorVariant::InvalidSequence(s) => {
f.write_fmt(format_args!("invalid sequence `{}`", s))
}
}
}
}

#[derive(Debug)]
pub struct LexemError {
pub start: Position,
pub end: Position,
pub variant: LexemErrorVariant,
}

/// Lexem start position
pub fn get_start(&self) -> Position {
self.start
impl Display for LexemError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_fmt(format_args!(
"Error from {} to {}: {}",
self.start, self.end, self.variant
))
}
}

impl Error for LexemError {}

/// Scanner position
pub fn get_here(&self) -> Position {
self.scanner.last_pos()
impl<'a> LexemBuilder<'a> {
pub fn new(scanner: &'a mut CharScanner, errors: &'a mut Vec<LexemError>) -> Self {
let start = (&*scanner).last_pos();
Self {
scanner,
start,
errors,
}
}

/// Create a lexem
Expand All @@ -88,6 +134,15 @@ impl<'a> LexemBuilder<'a> {
pub fn bake(&self, token_type: LexemType) -> Option<Lexem> {
Some(self.bake_raw(token_type))
}

/// Reports an error that happen during building
pub fn error(&mut self, e: LexemErrorVariant) {
self.errors.push(LexemError {
start: self.start,
end: self.scanner.last_pos(),
variant: e,
});
}
}

impl<'a> Scannable<char> for LexemBuilder<'a> {
Expand All @@ -97,8 +152,8 @@ impl<'a> Scannable<char> for LexemBuilder<'a> {
}

#[inline]
fn peek(&self) -> char {
self.scanner.peek()
fn curr(&self) -> char {
self.scanner.curr()
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/lexer/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ macro_rules! char_match {
} };
($token_builder: expr, $default: expr, $($pattern: literal, $operator: expr), +) => { {
$token_builder.pop();
match $token_builder.peek() {
match $token_builder.curr() {
$($pattern => {
$token_builder.pop();
Some($operator)
Expand Down
109 changes: 82 additions & 27 deletions src/lexer/matchers/comment.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::{
lexer::{
lexem::{Lexem, LexemBuilder, LexemType},
lexem::{Lexem, LexemBuilder, LexemErrorVariant, LexemType},
operators::Operator,
},
scannable::Scannable,
Expand All @@ -10,30 +10,30 @@ use crate::{
/// - `/` - division
/// - `//` - single line comment
/// - `/* [...] */` - multi-line comment
pub fn match_comment_or_division(tb: &mut LexemBuilder) -> Option<Lexem> {
if tb.peek() == '/' {
tb.pop();
match tb.peek() {
'*' => return Some(complete_multi_line_comment(tb)),
'/' => return Some(complete_single_line_comment(tb)),
_ => return tb.bake(LexemType::Operator(Operator::Slash)),
pub fn match_comment_or_division(lb: &mut LexemBuilder, max: usize) -> Option<Lexem> {
if lb.curr() == '/' {
lb.pop();
match lb.curr() {
'*' => return Some(complete_multi_line_comment(lb, max)),
'/' => return Some(complete_single_line_comment(lb, max)),
_ => return lb.bake(LexemType::Operator(Operator::Slash)),
}
}
None
}

/// Completes a multi-line comment
fn complete_multi_line_comment(tb: &mut LexemBuilder) -> Lexem {
fn complete_multi_line_comment(lb: &mut LexemBuilder, max: usize) -> Lexem {
let mut content: Vec<char> = vec![];
tb.pop();
lb.pop();
loop {
match tb.peek() {
match lb.curr() {
'*' => {
tb.pop();
match tb.peek() {
lb.pop();
match lb.curr() {
'/' => {
tb.pop();
break tb.bake_raw(LexemType::Comment(content.into_iter().collect()));
lb.pop();
break lb.bake_raw(LexemType::Comment(content.into_iter().collect()));
}
c => {
content.push('*');
Expand All @@ -42,46 +42,62 @@ fn complete_multi_line_comment(tb: &mut LexemBuilder) -> Lexem {
}
}
'\x03' => {
eprintln!("Comment started at {} never ends.", tb.get_start());
let t = tb.bake_raw(LexemType::Comment(content.into_iter().collect()));
tb.pop();
lb.error(LexemErrorVariant::CommentNeverEnds);
let t = lb.bake_raw(LexemType::Comment(content.into_iter().collect()));
lb.pop();
break t;
}
c => {
content.push(c);
}
}
tb.pop();
if content.len() > max {
content.pop();
lb.error(LexemErrorVariant::CommentTooLong);
break lb.bake_raw(LexemType::Comment(content.into_iter().collect()));
}
lb.pop();
}
}

/// Completes a single-line comment
fn complete_single_line_comment(tb: &mut LexemBuilder) -> Lexem {
fn complete_single_line_comment(lb: &mut LexemBuilder, max: usize) -> Lexem {
let mut content: Vec<char> = vec![];
tb.pop();
lb.pop();
loop {
match tb.peek() {
'\n' | '\x03' => return tb.bake_raw(LexemType::Comment(content.into_iter().collect())),
match lb.curr() {
'\n' | '\x03' => break lb.bake_raw(LexemType::Comment(content.into_iter().collect())),
c => {
content.push(c);
}
}
tb.pop();
if content.len() > max {
content.pop();
lb.error(LexemErrorVariant::CommentTooLong);
break lb.bake_raw(LexemType::Comment(content.into_iter().collect()));
}
lb.pop();
}
}

#[cfg(test)]
mod tests {
use crate::lexer::{
lexem::{Lexem, LexemType},
lexem::{Lexem, LexemError, LexemErrorVariant, LexemType},
matchers::test_utils::{lexem_with, matcher_with},
operators::Operator,
};

use super::match_comment_or_division;

fn matcher(string: &'static str) -> Option<Lexem> {
matcher_with(match_comment_or_division, string)
let r = matcher_with(|lb| match_comment_or_division(lb, 32), string);
assert!(r.1.is_empty());
r.0
}

fn err_matcher(string: &'static str) -> (Option<Lexem>, Vec<LexemError>) {
matcher_with(|lb| match_comment_or_division(lb, 32), string)
}

fn comment_lexem(
Expand Down Expand Up @@ -110,6 +126,25 @@ mod tests {
fn com_single_multi() {
assert_eq!(matcher("//a\nb"), comment_lexem("a", (1, 1), (1, 4)));
}

#[test]
fn com_single_max_long() {
assert_eq!(
matcher("//___a___b___a___c___a___b___a___d"),
comment_lexem("___a___b___a___c___a___b___a___d", (1, 1), (1, 35))
);
}

#[test]
fn com_single_too_long() {
let (result, errors) = err_matcher("//___a___b___a___c___a___b___a___d_");
assert_eq!(
result,
comment_lexem("___a___b___a___c___a___b___a___d", (1, 1), (1, 35))
);
assert!(errors[0].variant == LexemErrorVariant::CommentTooLong);
}

#[test]
fn empty_com_single_multi() {
assert_eq!(matcher("//\n"), comment_lexem("", (1, 1), (1, 3)));
Expand All @@ -120,6 +155,24 @@ mod tests {
assert_eq!(matcher("/*ab*/"), comment_lexem("ab", (1, 1), (1, 7)));
}

#[test]
fn com_multi_max_long() {
assert_eq!(
matcher("/*___a___b___a___c___a___b___a___d*/"),
comment_lexem("___a___b___a___c___a___b___a___d", (1, 1), (1, 37))
);
}

#[test]
fn com_multi_too_long() {
let (result, errors) = err_matcher("/*___a___b___a___c___a___b___a___d_*/");
assert_eq!(
result,
comment_lexem("___a___b___a___c___a___b___a___d", (1, 1), (1, 35))
);
assert!(errors[0].variant == LexemErrorVariant::CommentTooLong);
}

#[test]
fn empty_com_multi() {
assert_eq!(matcher("/**/"), comment_lexem("", (1, 1), (1, 5)));
Expand All @@ -132,7 +185,9 @@ mod tests {

#[test]
fn com_multi_no_end() {
assert_eq!(matcher("/*a\n"), comment_lexem("a\n", (1, 1), (2, 1)));
let (result, errors) = err_matcher("/*a\n");
assert_eq!(result, comment_lexem("a\n", (1, 1), (2, 1)));
assert!(errors[0].variant == LexemErrorVariant::CommentNeverEnds);
}

#[test]
Expand Down
Loading

0 comments on commit cffe02a

Please sign in to comment.