Skip to content

Commit

Permalink
[naga wgsl-in] Introduce Word enum, for classifying identifiers.
Browse files Browse the repository at this point in the history
Change `Token::Word`'s payload from a `&str` to a
`front::wgsl::parse::word::Word` enum, with variants for words
reserved by the WGSL grammar like `struct`, `loop`, and so on.

Rather than doing string comparisons in `match` statements, hash
word-like tokens once at lexing time, and then let the parser's
`match` statements compare enum variants.
  • Loading branch information
jimblandy committed Nov 28, 2024
1 parent 5051838 commit dbc2ff3
Show file tree
Hide file tree
Showing 4 changed files with 190 additions and 100 deletions.
98 changes: 56 additions & 42 deletions naga/src/front/wgsl/parse/lexer.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use super::word::{self, Word};
use super::{number::consume_number, Error, ExpectedToken};
use crate::front::wgsl::error::NumberError;
use crate::front::wgsl::parse::directive::enable_extension::EnableExtensions;
Expand All @@ -13,7 +14,7 @@ pub enum Token<'a> {
Paren(char),
Attribute,
Number(Result<Number, NumberError>),
Word(&'a str),
Word(Word<'a>),
Operation(char),
LogicalOperation(char),
ShiftOperation(char),
Expand Down Expand Up @@ -160,7 +161,7 @@ fn consume_token(input: &str, generic: bool) -> (Token<'_>, &str) {
}
_ if is_word_start(cur) => {
let (word, rest) = consume_any(input, is_word_part);
(Token::Word(word), rest)
(Token::Word(Word::Other(word)), rest)
}
_ => (Token::Unknown(cur), chars.as_str()),
}
Expand Down Expand Up @@ -207,15 +208,22 @@ pub(in crate::front::wgsl) struct Lexer<'a> {
last_end_offset: usize,
#[allow(dead_code)]
pub(in crate::front::wgsl) enable_extensions: EnableExtensions,

/// A pointer to the global read-only WGSL word table.
///
/// We cache this here to avoid having to synchronize on the
/// `OnceLock` every time we want to look up an identifier.
word_table: &'static word::WordTable,
}

impl<'a> Lexer<'a> {
pub(in crate::front::wgsl) const fn new(input: &'a str) -> Self {
pub(in crate::front::wgsl) fn new(input: &'a str) -> Self {
Lexer {
input,
source: input,
last_end_offset: 0,
enable_extensions: EnableExtensions::empty(),
word_table: word::get_table(),
}
}

Expand Down Expand Up @@ -289,11 +297,17 @@ impl<'a> Lexer<'a> {
fn next_impl(&mut self, generic: bool) -> TokenSpan<'a> {
let mut start_byte_offset = self.current_byte_offset();
loop {
let (token, rest) = consume_token(self.input, generic);
let (mut token, rest) = consume_token(self.input, generic);
self.input = rest;
match token {
Token::Trivia => start_byte_offset = self.current_byte_offset(),
_ => {
// Check if `token` is an id with special meaning.
if let Token::Word(Word::Other(s)) = token {
if let Some(known) = self.word_table.get(s) {
token = Token::Word(*known);
}
}
self.last_end_offset = self.current_byte_offset();
return (token, self.span_from(start_byte_offset));
}
Expand Down Expand Up @@ -354,7 +368,7 @@ impl<'a> Lexer<'a> {
&mut self,
) -> Result<(&'a str, Span), Error<'a>> {
match self.next() {
(Token::Word(word), span) => Self::word_as_ident_with_span(word, span),
(Token::Word(word), span) => Self::word_as_ident_with_span(word.as_str(), span),
other => Err(Error::Unexpected(other.1, ExpectedToken::Identifier)),
}
}
Expand All @@ -363,7 +377,7 @@ impl<'a> Lexer<'a> {
&mut self,
) -> Result<(&'a str, Span), Error<'a>> {
match self.peek() {
(Token::Word(word), span) => Self::word_as_ident_with_span(word, span),
(Token::Word(word), span) => Self::word_as_ident_with_span(word.as_str(), span),
other => Err(Error::Unexpected(other.1, ExpectedToken::Identifier)),
}
}
Expand Down Expand Up @@ -397,7 +411,7 @@ impl<'a> Lexer<'a> {
self.expect_generic_paren('<')?;
let pair = match self.next() {
(Token::Word(word), span) => {
conv::get_scalar_type(word).ok_or(Error::UnknownScalarType(span))
conv::get_scalar_type(word.as_str()).ok_or(Error::UnknownScalarType(span))
}
(_, span) => Err(Error::UnknownScalarType(span)),
}?;
Expand All @@ -413,7 +427,7 @@ impl<'a> Lexer<'a> {
) -> Result<(Scalar, Span), Error<'a>> {
self.expect_generic_paren('<')?;
let pair = match self.next() {
(Token::Word(word), span) => conv::get_scalar_type(word)
(Token::Word(word), span) => conv::get_scalar_type(word.as_str())
.map(|scalar| (scalar, span))
.ok_or(Error::UnknownScalarType(span)),
(_, span) => Err(Error::UnknownScalarType(span)),
Expand Down Expand Up @@ -636,56 +650,56 @@ fn double_floats() {
Token::Number(Ok(Number::F64(0.0625))),
Token::Number(Ok(Number::F64(10.0))),
Token::Number(Ok(Number::AbstractInt(10))),
Token::Word("l"),
Token::Word(Word::Other("l")),
],
)
}

#[test]
fn test_tokens() {
sub_test("id123_OK", &[Token::Word("id123_OK")]);
sub_test("id123_OK", &[Token::Word(Word::Other("id123_OK"))]);
sub_test(
"92No",
&[
Token::Number(Ok(Number::AbstractInt(92))),
Token::Word("No"),
Token::Word(Word::Other("No")),
],
);
sub_test(
"2u3o",
&[
Token::Number(Ok(Number::U32(2))),
Token::Number(Ok(Number::AbstractInt(3))),
Token::Word("o"),
Token::Word(Word::Other("o")),
],
);
sub_test(
"2.4f44po",
&[
Token::Number(Ok(Number::F32(2.4))),
Token::Number(Ok(Number::AbstractInt(44))),
Token::Word("po"),
Token::Word(Word::Other("po")),
],
);
sub_test(
"Δέλτα réflexion Кызыл 𐰓𐰏𐰇 朝焼け سلام 검정 שָׁלוֹם गुलाबी փիրուզ",
&[
Token::Word("Δέλτα"),
Token::Word("réflexion"),
Token::Word("Кызыл"),
Token::Word("𐰓𐰏𐰇"),
Token::Word("朝焼け"),
Token::Word("سلام"),
Token::Word("검정"),
Token::Word("שָׁלוֹם"),
Token::Word("गुलाबी"),
Token::Word("փիրուզ"),
Token::Word(Word::Other("Δέλτα")),
Token::Word(Word::Other("réflexion")),
Token::Word(Word::Other("Кызыл")),
Token::Word(Word::Other("𐰓𐰏𐰇")),
Token::Word(Word::Other("朝焼け")),
Token::Word(Word::Other("سلام")),
Token::Word(Word::Other("검정")),
Token::Word(Word::Other("שָׁלוֹם")),
Token::Word(Word::Other("गुलाबी")),
Token::Word(Word::Other("փիրուզ")),
],
);
sub_test("æNoø", &[Token::Word("æNoø")]);
sub_test("No¾", &[Token::Word("No"), Token::Unknown('¾')]);
sub_test("No好", &[Token::Word("No好")]);
sub_test("_No", &[Token::Word("_No")]);
sub_test("æNoø", &[Token::Word(Word::Other("æNoø"))]);
sub_test("No¾", &[Token::Word(Word::Other("No")), Token::Unknown('¾')]);
sub_test("No好", &[Token::Word(Word::Other("No好"))]);
sub_test("_No", &[Token::Word(Word::Other("_No"))]);
sub_test(
"*/*/***/*//=/*****//",
&[
Expand All @@ -705,11 +719,11 @@ fn test_tokens() {
Token::Number(Ok(Number::AbstractFloat(1.0 + 0x2f as f64 / 256.0))),
Token::Number(Ok(Number::AbstractFloat(1.0 + 0x2f as f64 / 256.0))),
Token::Number(Ok(Number::AbstractFloat(1.125))),
Token::Word("h"),
Token::Word(Word::Other("h")),
Token::Number(Ok(Number::AbstractFloat(1.125))),
Token::Word("H"),
Token::Word(Word::Other("H")),
Token::Number(Ok(Number::AbstractFloat(1.125))),
Token::Word("lf"),
Token::Word(Word::Other("lf")),
],
)
}
Expand All @@ -720,37 +734,37 @@ fn test_variable_decl() {
"@group(0 ) var< uniform> texture: texture_multisampled_2d <f32 >;",
&[
Token::Attribute,
Token::Word("group"),
Token::Word(Word::Other("group")),
Token::Paren('('),
Token::Number(Ok(Number::AbstractInt(0))),
Token::Paren(')'),
Token::Word("var"),
Token::Word(Word::Var),
Token::Paren('<'),
Token::Word("uniform"),
Token::Word(Word::Other("uniform")),
Token::Paren('>'),
Token::Word("texture"),
Token::Word(Word::Other("texture")),
Token::Separator(':'),
Token::Word("texture_multisampled_2d"),
Token::Word(Word::Other("texture_multisampled_2d")),
Token::Paren('<'),
Token::Word("f32"),
Token::Word(Word::Other("f32")),
Token::Paren('>'),
Token::Separator(';'),
],
);
sub_test(
"var<storage,read_write> buffer: array<u32>;",
&[
Token::Word("var"),
Token::Word(Word::Var),
Token::Paren('<'),
Token::Word("storage"),
Token::Word(Word::Other("storage")),
Token::Separator(','),
Token::Word("read_write"),
Token::Word(Word::Other("read_write")),
Token::Paren('>'),
Token::Word("buffer"),
Token::Word(Word::Other("buffer")),
Token::Separator(':'),
Token::Word("array"),
Token::Word(Word::Other("array")),
Token::Paren('<'),
Token::Word("u32"),
Token::Word(Word::Other("u32")),
Token::Paren('>'),
Token::Separator(';'),
],
Expand Down
Loading

0 comments on commit dbc2ff3

Please sign in to comment.