-
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
189 additions
and
104 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
use std::{ | ||
fs::File, | ||
io::{BufRead, BufReader}, | ||
path::PathBuf, | ||
}; | ||
|
||
use anyhow::{anyhow, Result}; | ||
|
||
#[derive(PartialEq, Eq)] | ||
pub enum CharType { | ||
Char(char), | ||
Eol, | ||
Eof, | ||
} | ||
|
||
pub struct CharReader { | ||
pub line: usize, | ||
pub column: usize, | ||
path: String, | ||
current_line: Option<String>, | ||
reader: BufReader<File>, | ||
done: bool, | ||
} | ||
|
||
impl CharReader { | ||
pub fn new(path: PathBuf) -> Result<Self> { | ||
Ok(Self { | ||
line: 0, | ||
column: 0, | ||
path: path.display().to_string(), | ||
current_line: None, | ||
done: false, | ||
reader: BufReader::new(File::open(path)?), | ||
}) | ||
} | ||
|
||
pub fn error(&self, character: &CharType, details: Option<String>) -> anyhow::Error { | ||
let prefix = format!("{}:{}:{}", self.path, self.line, self.column); | ||
let extra = details.map_or("".to_string(), |msg| format!(": {msg}")); | ||
let token = match &character { | ||
CharType::Char(char) => format!("character `{char}`"), | ||
CharType::Eol => "EOL (end of line)".to_string(), | ||
CharType::Eof => "EOF (end of file)".to_string(), | ||
}; | ||
|
||
anyhow!(format!("{prefix}: Unexpected {token}{extra}")) | ||
} | ||
|
||
pub fn next(&mut self) -> Result<CharType> { | ||
if self.done { | ||
return Ok(CharType::Eof); | ||
} | ||
match &self.current_line { | ||
None => { | ||
let mut buffer = "".to_string(); | ||
let size = self.reader.read_line(&mut buffer)?; | ||
if size == 0 { | ||
self.done = true; | ||
return Ok(CharType::Eof); | ||
} | ||
self.current_line = Some(buffer.clone()); | ||
self.line += 1; | ||
self.column = 0; | ||
self.next() | ||
} | ||
Some(line) => match line.chars().nth(self.column) { | ||
Some(char) => match char { | ||
'\n' => { | ||
self.current_line = None; | ||
Ok(CharType::Eol) | ||
} | ||
_ => { | ||
self.column += 1; | ||
Ok(CharType::Char(char)) | ||
} | ||
}, | ||
None => { | ||
self.current_line = None; | ||
Ok(CharType::Eol) | ||
} | ||
}, | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
use std::path::PathBuf; | ||
|
||
use anyhow::Result; | ||
|
||
use crate::reader::{CharReader, CharType}; | ||
|
||
#[derive(Debug)] | ||
pub enum Token { | ||
Text(String), | ||
CommentMark, | ||
HelpMark, | ||
EqualSign, | ||
} | ||
|
||
pub struct Tokenizer { | ||
reader: CharReader, | ||
} | ||
|
||
impl Tokenizer { | ||
pub fn new(path: PathBuf) -> Result<Self> { | ||
Ok(Self { | ||
reader: CharReader::new(path)?, | ||
}) | ||
} | ||
|
||
fn next_tokens(&mut self) -> Result<Vec<Token>> { | ||
let mut buffer = "".to_string(); | ||
loop { | ||
let char = self.reader.next()?; | ||
match char { | ||
CharType::Eof => return Ok(vec![]), | ||
CharType::Eol => { | ||
if buffer.is_empty() { | ||
continue; | ||
} | ||
return Ok(vec![Token::Text(buffer.trim().to_string())]); | ||
} | ||
CharType::Char(c) => { | ||
let mut token: Option<Token> = None; | ||
if c == '=' { | ||
token = Some(Token::EqualSign); | ||
} else if c == '#' && self.reader.column == 1 { | ||
token = Some(Token::CommentMark); | ||
} else if c == ' ' && buffer.ends_with(" #") { | ||
buffer = buffer.strip_suffix(" #").unwrap_or("").to_string(); | ||
token = Some(Token::HelpMark); | ||
} | ||
if let Some(t) = token { | ||
if buffer.is_empty() { | ||
return Ok(vec![t]); | ||
} | ||
return Ok(vec![Token::Text(buffer.trim().to_string()), t]); | ||
} | ||
buffer.push(c) | ||
} | ||
} | ||
} | ||
} | ||
|
||
// TODO: make iterator? | ||
pub fn tokenize(&mut self) -> Result<Vec<Token>> { | ||
let mut tokens: Vec<Token> = vec![]; | ||
loop { | ||
let new_tokens = self.next_tokens()?; | ||
if new_tokens.is_empty() { | ||
break; | ||
} | ||
tokens.extend(new_tokens); | ||
} | ||
Ok(tokens) | ||
} | ||
} |