diff --git a/lib/tilex/blog/post_scrubber.ex b/lib/tilex/blog/post_scrubber.ex new file mode 100644 index 00000000..4639c656 --- /dev/null +++ b/lib/tilex/blog/post_scrubber.ex @@ -0,0 +1,73 @@ +defmodule Tilex.Blog.PostScrubber do + @moduledoc """ + PostScrubber is mostly a copy/pasta from the HtmlSanitizeEx library markdown_html scrubber + + The difference is below under the "additions" comment where we've added some additional customizations + + For more info on customizing scrubbers, see the docs for HtmlSanitizeEx + """ + + require HtmlSanitizeEx.Scrubber.Meta + alias HtmlSanitizeEx.Scrubber.Meta + + @valid_schemes ["http", "https", "mailto"] + + # Removes any CDATA tags before the traverser/scrubber runs. + Meta.remove_cdata_sections_before_scrub() + + Meta.strip_comments() + + Meta.allow_tag_with_uri_attributes("a", ["href"], @valid_schemes) + Meta.allow_tag_with_these_attributes("a", ["name", "title"]) + + Meta.allow_tag_with_this_attribute_values("a", "target", ["_blank"]) + + Meta.allow_tag_with_this_attribute_values("a", "rel", [ + "noopener", + "noreferrer" + ]) + + Meta.allow_tag_with_these_attributes("b", []) + Meta.allow_tag_with_these_attributes("blockquote", []) + Meta.allow_tag_with_these_attributes("br", []) + Meta.allow_tag_with_these_attributes("code", ["class"]) + Meta.allow_tag_with_these_attributes("del", []) + Meta.allow_tag_with_these_attributes("em", []) + Meta.allow_tag_with_these_attributes("h1", []) + Meta.allow_tag_with_these_attributes("h2", []) + Meta.allow_tag_with_these_attributes("h3", []) + Meta.allow_tag_with_these_attributes("h4", []) + Meta.allow_tag_with_these_attributes("h5", []) + Meta.allow_tag_with_these_attributes("h6", []) + Meta.allow_tag_with_these_attributes("hr", []) + Meta.allow_tag_with_these_attributes("i", []) + + Meta.allow_tag_with_uri_attributes("img", ["src"], @valid_schemes) + + Meta.allow_tag_with_these_attributes("img", [ + "width", + "height", + "title", + "alt" + ]) + + Meta.allow_tag_with_these_attributes("li", []) + Meta.allow_tag_with_these_attributes("ol", []) + Meta.allow_tag_with_these_attributes("p", []) + Meta.allow_tag_with_these_attributes("pre", []) + Meta.allow_tag_with_these_attributes("span", []) + Meta.allow_tag_with_these_attributes("strong", []) + Meta.allow_tag_with_these_attributes("table", []) + Meta.allow_tag_with_these_attributes("tbody", []) + Meta.allow_tag_with_these_attributes("td", []) + Meta.allow_tag_with_these_attributes("th", []) + Meta.allow_tag_with_these_attributes("thead", []) + Meta.allow_tag_with_these_attributes("tr", []) + Meta.allow_tag_with_these_attributes("u", []) + Meta.allow_tag_with_these_attributes("ul", []) + + # Additions + Meta.allow_tag_with_these_attributes("div", []) + + Meta.strip_everything_not_covered() +end diff --git a/lib/tilex/markdown.ex b/lib/tilex/markdown.ex index 0804f2dc..b3c3d62e 100644 --- a/lib/tilex/markdown.ex +++ b/lib/tilex/markdown.ex @@ -1,5 +1,7 @@ defmodule Tilex.Markdown do alias Tilex.Cache + alias Tilex.Blog.PostScrubber + alias HtmlSanitizeEx.Scrubber @earmark_options %Earmark.Options{ code_class_prefix: "language-", @@ -18,24 +20,22 @@ defmodule Tilex.Markdown do def to_html_live(markdown) do markdown |> Earmark.as_html!(@earmark_options) - |> HtmlSanitizeEx.html5() + |> sanitize_markdown_html() |> String.trim() end - def to_html(markdown) do - Cache.cache(markdown, fn -> - to_html_live(markdown) - end) - end + def to_html(markdown), do: Cache.cache(markdown, fn -> to_html_live(markdown) end) def to_content(markdown) do markdown |> Earmark.as_html!(@content_earmark_options) - |> HtmlSanitizeEx.html5() + |> sanitize_markdown_html() |> Floki.parse_fragment() |> case do {:ok, fragment} -> fragment |> Floki.text() |> String.trim() _error -> markdown end end + + defp sanitize_markdown_html(html), do: Scrubber.scrub(html, PostScrubber) end diff --git a/test/lib/tilex/markdown_test.exs b/test/lib/tilex/markdown_test.exs index aa0bf5b3..bd8c22f6 100644 --- a/test/lib/tilex/markdown_test.exs +++ b/test/lib/tilex/markdown_test.exs @@ -200,30 +200,47 @@ defmodule Lib.Tilex.MarkdownTest do input: "regular script ", html: "
\nregular script <script>alert('attack')</script>
", content: "regular script alert('attack')" + }, + %{ + snippet_description: "does not allow inputs or forms to be rendered as html", + input: "", + html: "", + content: "" + }, + %{ + snippet_description: + "allows HTML5 elements (such as fieldset and legend) and forms as part of codeblocks", + input: + "```html\n\n```", + html: + "<form name='login'>\n<fieldset>\n<legend>Email</legend>\n<input type='email' name='email' required />\n</fieldset>\n<div>\n<input type='submit' value='login' />\n<button type='button' id='validate'>\nvalidate\n</button>\n</div>\n</form>
",
+ content: "Email\n\n\n\n\n\nvalidate"
}
]
describe "to_html/1" do
- for %{input: input, html: html} <- @to_html_data do
- @input input
- @html html
+ for case <- @to_html_data do
+ @input Map.get(case, :input)
+ @html Map.get(case, :html)
+ @test_context Map.get(case, :snippet_description, inspect(@input))
- test "converts markdown '#{inspect(@input)}' into html live" do
+ test "converts markdown '#{@test_context}' into html live" do
assert Markdown.to_html_live(@input) == String.trim(@html)
end
- test "live and cached produce same value for '#{inspect(@input)}'" do
+ test "live and cached produce same value for '#{@test_context}'" do
assert Markdown.to_html_live(@input) == Markdown.to_html(@input)
end
end
end
describe "to_content/1" do
- for %{input: input, content: content} <- @to_html_data do
- @input input
- @content content
+ for case <- @to_html_data do
+ @input Map.get(case, :input)
+ @content Map.get(case, :content)
+ @test_context Map.get(case, :snippet_description, inspect(@input))
- test "gests content out of markdown '#{inspect(@input)}'" do
+ test "gests content out of markdown '#{@test_context}'" do
assert Markdown.to_content(@input) == String.trim(@content)
end
end