From 31fd13fe37ae48a2e48aca35334ff886f7bdd639 Mon Sep 17 00:00:00 2001 From: Arber Shabhasa Date: Wed, 3 Apr 2024 17:16:15 +0200 Subject: [PATCH] Naive saolution to xml with closing tags containing whitespaces --- lib/saxy/parser/builder.ex | 5 ++++- test/saxy_test.exs | 18 ++++++++++++++++-- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/lib/saxy/parser/builder.ex b/lib/saxy/parser/builder.ex index d5c205a..2ebb5fd 100644 --- a/lib/saxy/parser/builder.ex +++ b/lib/saxy/parser/builder.ex @@ -1184,7 +1184,7 @@ defmodule Saxy.Parser.Builder do lookahead buffer, @streaming do ">" <> rest -> [open_tag | stack] = state.stack - ending_tag = binary_part(original, pos, len) + ending_tag = binary_part(original, pos, len) |> String.trim() pos = pos + len + 1 if open_tag == ending_tag do @@ -1207,6 +1207,9 @@ defmodule Saxy.Parser.Builder do char <> rest when is_ascii_name_char(char) -> close_tag_name(rest, more?, original, pos, state, len + 1) + char <> rest when is_whitespace(char) -> + close_tag_name(rest, more?, original, pos, state, len + 1) + token in unquote(utf8_binaries()) when more? -> halt!(close_tag_name(token, more?, original, pos, state, len)) diff --git a/test/saxy_test.exs b/test/saxy_test.exs index e24d1d6..b579494 100644 --- a/test/saxy_test.exs +++ b/test/saxy_test.exs @@ -31,11 +31,11 @@ defmodule SaxyTest do for fixture <- @fixtures do stream = stream_fixture(fixture) element_stream = Saxy.stream_events(stream) - assert [_ | _] = Enum.to_list element_stream + assert [_ | _] = Enum.to_list(element_stream) end assert_raise Saxy.ParseError, fn -> - Enum.to_list Saxy.stream_events stream_fixture "incorrect.xml" + Enum.to_list(Saxy.stream_events(stream_fixture("incorrect.xml"))) end end @@ -73,6 +73,20 @@ defmodule SaxyTest do ] end + test "parse_string/4 parses XML binary with closing tags containing whitespaces" do + data = "Some Data" + + assert {:ok, state} = parse(data, StackHandler, [], expand_entity: :keep) + + assert state == [ + end_document: {}, + end_element: "foo", + characters: "Some Data", + start_element: {"foo", []}, + start_document: [] + ] + end + test "handles trailing Unicode codepoints during streaming" do data = "𠜎𠜱𠝹𠱓" stream = for byte <- :binary.bin_to_list(data), do: <>