Skip to content

Commit

Permalink
Naive saolution to xml with closing tags containing whitespaces
Browse files Browse the repository at this point in the history
  • Loading branch information
ashabhasa committed Apr 3, 2024
1 parent dce77bf commit 31fd13f
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 3 deletions.
5 changes: 4 additions & 1 deletion lib/saxy/parser/builder.ex
Original file line number Diff line number Diff line change
Expand Up @@ -1184,7 +1184,7 @@ defmodule Saxy.Parser.Builder do
lookahead buffer, @streaming do
">" <> rest ->
[open_tag | stack] = state.stack
ending_tag = binary_part(original, pos, len)
ending_tag = binary_part(original, pos, len) |> String.trim()
pos = pos + len + 1

if open_tag == ending_tag do
Expand All @@ -1207,6 +1207,9 @@ defmodule Saxy.Parser.Builder do
char <> rest when is_ascii_name_char(char) ->
close_tag_name(rest, more?, original, pos, state, len + 1)

char <> rest when is_whitespace(char) ->
close_tag_name(rest, more?, original, pos, state, len + 1)

token in unquote(utf8_binaries()) when more? ->
halt!(close_tag_name(token, more?, original, pos, state, len))

Expand Down
18 changes: 16 additions & 2 deletions test/saxy_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@ defmodule SaxyTest do
for fixture <- @fixtures do
stream = stream_fixture(fixture)
element_stream = Saxy.stream_events(stream)
assert [_ | _] = Enum.to_list element_stream
assert [_ | _] = Enum.to_list(element_stream)
end

assert_raise Saxy.ParseError, fn ->
Enum.to_list Saxy.stream_events stream_fixture "incorrect.xml"
Enum.to_list(Saxy.stream_events(stream_fixture("incorrect.xml")))
end
end

Expand Down Expand Up @@ -73,6 +73,20 @@ defmodule SaxyTest do
]
end

test "parse_string/4 parses XML binary with closing tags containing whitespaces" do
data = "<foo>Some Data</foo >"

assert {:ok, state} = parse(data, StackHandler, [], expand_entity: :keep)

assert state == [
end_document: {},
end_element: "foo",
characters: "Some Data",
start_element: {"foo", []},
start_document: []
]
end

test "handles trailing Unicode codepoints during streaming" do
data = "<foo>𠜎𠜱𠝹𠱓</foo>"
stream = for byte <- :binary.bin_to_list(data), do: <<byte>>
Expand Down

0 comments on commit 31fd13f

Please sign in to comment.