Skip to content

Commit

Permalink
Adds basic tokenizer
Browse files Browse the repository at this point in the history
  • Loading branch information
cuducos committed Sep 20, 2023
1 parent acd8a66 commit 8c5381c
Show file tree
Hide file tree
Showing 5 changed files with 189 additions and 104 deletions.
20 changes: 13 additions & 7 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,25 @@ use std::path::PathBuf;
use anyhow::Result;

use crate::model::{
AutoGeneratedVariable, Block, Comment, SimpleVariable, VariableWithRandomValue, Variables,
AutoGeneratedVariable, Block, Comment, SimpleVariable, VariableType, VariableWithRandomValue,
};
use crate::parser::Parser;
use crate::tokenizer::Tokenizer;

mod model;
mod parser;
mod reader;
mod tokenizer;

fn main() -> Result<()> {
if let Some(path) = args().nth(1) {
let mut parser = Parser::new(PathBuf::from(&path))?;
for block in parser.parse()? {
println!("{block}");
}

let mut tokenizer = Tokenizer::new(PathBuf::from(&path))?;
println!("{:?}", tokenizer.tokenize()?);
return Ok(());
}

Expand All @@ -34,12 +40,12 @@ fn main() -> Result<()> {
let variable6 = AutoGeneratedVariable::new("AUTO_GENERATED", "{ANSWER}-{DEFAULT_VALUE_ONE}");

let variables = vec![
Variables::Input(variable1),
Variables::Input(variable2),
Variables::Input(variable3),
Variables::Input(variable4),
Variables::Random(variable5),
Variables::AutoGenerated(variable6),
VariableType::Input(variable1),
VariableType::Input(variable2),
VariableType::Input(variable3),
VariableType::Input(variable4),
VariableType::Random(variable5),
VariableType::AutoGenerated(variable6),
];
let block = Block::new(title, description, variables);
println!("{block}");
Expand Down
27 changes: 15 additions & 12 deletions src/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ impl Variable for VariableWithRandomValue {
}
}

pub enum Variables {
pub enum VariableType {
Input(SimpleVariable),
AutoGenerated(AutoGeneratedVariable),
Random(VariableWithRandomValue),
Expand All @@ -151,17 +151,17 @@ pub enum Variables {
pub struct Block {
pub title: Comment,
pub description: Option<Comment>,
pub variables: Vec<Variables>,
pub variables: Vec<VariableType>,

context: HashMap<String, String>,
}

impl Block {
pub fn new(title: Comment, description: Option<Comment>, variables: Vec<Variables>) -> Self {
pub fn new(title: Comment, description: Option<Comment>, variables: Vec<VariableType>) -> Self {
let context: HashMap<String, String> = HashMap::new();
let has_auto_generated_variables = variables
.iter()
.any(|v| matches!(v, Variables::AutoGenerated(_)));
.any(|v| matches!(v, VariableType::AutoGenerated(_)));

let mut block = Self {
title,
Expand All @@ -173,14 +173,14 @@ impl Block {
if has_auto_generated_variables {
for variable in &block.variables {
match variable {
Variables::Input(var) => block.context.insert(var.key(), var.value()),
Variables::AutoGenerated(_) => None,
Variables::Random(var) => block.context.insert(var.key(), var.value()),
VariableType::Input(var) => block.context.insert(var.key(), var.value()),
VariableType::AutoGenerated(_) => None,
VariableType::Random(var) => block.context.insert(var.key(), var.value()),
};
}

for variable in &mut block.variables {
if let Variables::AutoGenerated(var) = variable {
if let VariableType::AutoGenerated(var) = variable {
var.load_context(&block.context);
}
}
Expand All @@ -200,9 +200,9 @@ impl fmt::Display for Block {

for variable in &self.variables {
match variable {
Variables::Input(var) => lines.push(var.to_string()),
Variables::AutoGenerated(var) => lines.push(var.to_string()),
Variables::Random(var) => lines.push(var.to_string()),
VariableType::Input(var) => lines.push(var.to_string()),
VariableType::AutoGenerated(var) => lines.push(var.to_string()),
VariableType::Random(var) => lines.push(var.to_string()),
}
}

Expand Down Expand Up @@ -278,7 +278,10 @@ mod tests {
let mut variable1 = SimpleVariable::new("ANSWER", None, None);
variable1.user_input("42");
let variable2 = SimpleVariable::new("AS_TEXT", Some("fourty two"), None);
let variables = vec![Variables::Input(variable1), Variables::Input(variable2)];
let variables = vec![
VariableType::Input(variable1),
VariableType::Input(variable2),
];
let block = Block::new(title, description, variables);
let got = block.to_string();
assert_eq!(got, "# 42\n# Fourty-two\nANSWER=42\nAS_TEXT=fourty two")
Expand Down
90 changes: 5 additions & 85 deletions src/parser.rs
Original file line number Diff line number Diff line change
@@ -1,98 +1,18 @@
use std::{
fs::File,
io::{BufRead, BufReader},
path::PathBuf,
};

use anyhow::{anyhow, Result};

use crate::model::{Block, Comment};

#[derive(PartialEq, Eq)]
pub enum CharType {
Char(char),
Eol,
Eof,
}
use crate::{
model::{Block, Comment},
reader::{CharReader, CharType},
};

const CAPITAL_ASCII_LETTERS: &str = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";

struct CharReader {
line: usize,
column: usize,
path: String,
current_line: Option<String>,
reader: BufReader<File>,
done: bool,
}

impl CharReader {
fn new(path: PathBuf) -> Result<Self> {
Ok(Self {
line: 0,
column: 0,
path: path.display().to_string(),
current_line: None,
done: false,
reader: BufReader::new(File::open(path)?),
})
}

fn error(&self, character: &CharType, details: Option<String>) -> anyhow::Error {
let prefix = format!("{}:{}:{}", self.path, self.line, self.column);
let extra = details.map_or("".to_string(), |msg| format!(": {msg}"));
let token = match &character {
CharType::Char(char) => format!("character `{char}`"),
CharType::Eol => "EOL (end of line)".to_string(),
CharType::Eof => "EOF (end of file)".to_string(),
};

anyhow!(format!("{prefix}: Unexpected {token}{extra}"))
}

fn next(&mut self) -> Result<CharType> {
if self.done {
return Ok(CharType::Eof);
}
match &self.current_line {
None => {
let mut buffer = "".to_string();
let size = self.reader.read_line(&mut buffer)?;
if size == 0 {
self.done = true;
return Ok(CharType::Eof);
}
self.current_line = Some(buffer.clone());
self.line += 1;
self.column = 0;
self.next()
}
Some(line) => match line.chars().nth(self.column) {
Some(char) => match char {
'\n' => {
self.current_line = None;
Ok(CharType::Eol)
}
_ => {
self.column += 1;
Ok(CharType::Char(char))
}
},
None => {
self.current_line = None;
Ok(CharType::Eol)
}
},
}
}
}

pub struct Parser {
reader: CharReader,
}

impl Parser {
pub fn new(path: PathBuf) -> Result<Self> {
pub fn new(path: std::path::PathBuf) -> Result<Self> {
Ok(Self {
reader: CharReader::new(path)?,
})
Expand Down
84 changes: 84 additions & 0 deletions src/reader.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
use std::{
fs::File,
io::{BufRead, BufReader},
path::PathBuf,
};

use anyhow::{anyhow, Result};

#[derive(PartialEq, Eq)]
pub enum CharType {
Char(char),
Eol,
Eof,
}

pub struct CharReader {
pub line: usize,
pub column: usize,
path: String,
current_line: Option<String>,
reader: BufReader<File>,
done: bool,
}

impl CharReader {
pub fn new(path: PathBuf) -> Result<Self> {
Ok(Self {
line: 0,
column: 0,
path: path.display().to_string(),
current_line: None,
done: false,
reader: BufReader::new(File::open(path)?),
})
}

pub fn error(&self, character: &CharType, details: Option<String>) -> anyhow::Error {
let prefix = format!("{}:{}:{}", self.path, self.line, self.column);
let extra = details.map_or("".to_string(), |msg| format!(": {msg}"));
let token = match &character {
CharType::Char(char) => format!("character `{char}`"),
CharType::Eol => "EOL (end of line)".to_string(),
CharType::Eof => "EOF (end of file)".to_string(),
};

anyhow!(format!("{prefix}: Unexpected {token}{extra}"))
}

pub fn next(&mut self) -> Result<CharType> {
if self.done {
return Ok(CharType::Eof);
}
match &self.current_line {
None => {
let mut buffer = "".to_string();
let size = self.reader.read_line(&mut buffer)?;
if size == 0 {
self.done = true;
return Ok(CharType::Eof);
}
self.current_line = Some(buffer.clone());
self.line += 1;
self.column = 0;
self.next()
}
Some(line) => match line.chars().nth(self.column) {
Some(char) => match char {
'\n' => {
self.current_line = None;
Ok(CharType::Eol)
}
_ => {
self.column += 1;
Ok(CharType::Char(char))
}
},
None => {
self.current_line = None;
Ok(CharType::Eol)
}
},
}
}
}
72 changes: 72 additions & 0 deletions src/tokenizer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
use std::path::PathBuf;

use anyhow::Result;

use crate::reader::{CharReader, CharType};

#[derive(Debug)]
pub enum Token {
Text(String),
CommentMark,
HelpMark,
EqualSign,
}

pub struct Tokenizer {
reader: CharReader,
}

impl Tokenizer {
pub fn new(path: PathBuf) -> Result<Self> {
Ok(Self {
reader: CharReader::new(path)?,
})
}

fn next_tokens(&mut self) -> Result<Vec<Token>> {
let mut buffer = "".to_string();
loop {
let char = self.reader.next()?;
match char {
CharType::Eof => return Ok(vec![]),
CharType::Eol => {
if buffer.is_empty() {
continue;
}
return Ok(vec![Token::Text(buffer.trim().to_string())]);
}
CharType::Char(c) => {
let mut token: Option<Token> = None;
if c == '=' {
token = Some(Token::EqualSign);
} else if c == '#' && self.reader.column == 1 {
token = Some(Token::CommentMark);
} else if c == ' ' && buffer.ends_with(" #") {
buffer = buffer.strip_suffix(" #").unwrap_or("").to_string();
token = Some(Token::HelpMark);
}
if let Some(t) = token {
if buffer.is_empty() {
return Ok(vec![t]);
}
return Ok(vec![Token::Text(buffer.trim().to_string()), t]);
}
buffer.push(c)
}
}
}
}

// TODO: make iterator?
pub fn tokenize(&mut self) -> Result<Vec<Token>> {
let mut tokens: Vec<Token> = vec![];
loop {
let new_tokens = self.next_tokens()?;
if new_tokens.is_empty() {
break;
}
tokens.extend(new_tokens);
}
Ok(tokens)
}
}

0 comments on commit 8c5381c

Please sign in to comment.