Skip to content

Commit

Permalink
Vertical tabs (U+000B) are now considered to be whitespace.
Browse files Browse the repository at this point in the history
  • Loading branch information
tjol committed May 13, 2024
1 parent 38783e5 commit 9690dbc
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 1 deletion.
3 changes: 2 additions & 1 deletion src/tokenizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@ static size_t _refill_tokenizer(kdl_tokenizer* self)

bool _kdl_is_whitespace(uint32_t c)
{
return c == 0x0009 || // Character Tabulation
return c == 0x000B || // Vertical Tab
c == 0x0009 || // Character Tabulation
c == 0x0020 || // Space
c == 0x00A0 || // No-Break Space
c == 0x1680 || // Ogham Space Mark
Expand Down
19 changes: 19 additions & 0 deletions tests/kdlv2_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,27 @@ static void test_tokenizer_identifiers(void)
kdl_destroy_tokenizer(tok);
}

static void test_tokenizer_whitespace(void)
{
kdl_token token;

kdl_str doc = kdl_str_from_cstr("\x0b");

kdl_tokenizer* tok = kdl_create_string_tokenizer(doc);

ASSERT(kdl_pop_token(tok, &token) == KDL_TOKENIZER_OK);
ASSERT(token.type == KDL_TOKEN_WHITESPACE);
ASSERT(token.value.len == 1);
ASSERT(token.value.data[0] == 0xb);

ASSERT(kdl_pop_token(tok, &token) == KDL_TOKENIZER_EOF);

kdl_destroy_tokenizer(tok);
}

void TEST_MAIN(void)
{
run_test("Tokenizer: KDLv2 strings", &test_tokenizer_strings);
run_test("Tokenizer: KDLv2 identifiers", &test_tokenizer_identifiers);
run_test("Tokenizer: KDLv2 identifiers", &test_tokenizer_whitespace);
}

0 comments on commit 9690dbc

Please sign in to comment.