Skip to content

Commit

Permalink
Implement custom scrubber for Alchemy::Ingredients::Richtext
Browse files Browse the repository at this point in the history
We found that using Rails' HTML sanitizer does more than we want the
Richtext sanitization to do: It does not just remove nodes that are not
in the safelist, it also escapes some markup (especially in links).

This introduces a custom Loofah "scrubber" that only cares about the
element safelist.

The `sanitized_body` attribute is not for escaping at the view layer,
where all these safety precautions are necessary, but just for making
sure admin's don't use iframes when we don't want to.

See the following related issues and commits:
rails/rails-html-sanitizer@f3ba1a8
sparklemotion/nokogiri#3104
sparklemotion/nokogiri#969 (comment)
flavorjones/loofah#14 (comment)
  • Loading branch information
mamhoff committed Jan 19, 2024
1 parent 7ab2e6f commit b59edc6
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 6 deletions.
9 changes: 4 additions & 5 deletions app/models/alchemy/ingredients/richtext.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# frozen_string_literal: true

require "alchemy/scrubbers/safe_list"
module Alchemy
module Ingredients
# A blob of richtext
Expand Down Expand Up @@ -39,14 +40,12 @@ def custom_tinymce_config
private

def strip_content
self.stripped_body = Rails::Html::FullSanitizer.new.sanitize(value)
self.stripped_body = Rails::HTML5::FullSanitizer.new.sanitize(value)
end

def sanitize_content
self.sanitized_body = Rails::Html::SafeListSanitizer.new.sanitize(
value,
sanitizer_settings
)
scrubber = Alchemy::Scrubbers::SafeList.new(sanitizer_settings)
self.sanitized_body = Loofah.html5_fragment(value).scrub!(scrubber).to_html
end

def sanitizer_settings
Expand Down
51 changes: 51 additions & 0 deletions lib/alchemy/scrubbers/safe_list.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# frozen_string_literal: true

module Alchemy
module Scrubbers
class SafeList < Loofah::Scrubber
attr_reader :safe_tags, :safe_attributes

def initialize(config)
@direction = :top_down
@safe_tags = config[:safe_tags] || Rails::HTML::SafeListSanitizer::DEFAULT_ALLOWED_TAGS
@safe_attributes = config[:safe_attributes] || Rails::HTML::SafeListSanitizer::DEFAULT_ALLOWED_ATTRIBUTES
end

def scrub(node)
return CONTINUE if sanitize(node) == CONTINUE

node.remove
STOP
end

private

def sanitize(node)
case node.type
when Nokogiri::XML::Node::ELEMENT_NODE
if allowed_element?(node.name)
scrub_attributes(node)
return Loofah::Scrubber::CONTINUE
end
when Nokogiri::XML::Node::TEXT_NODE
return Loofah::Scrubber::CONTINUE
end
Loofah::Scrubber::STOP
end

def allowed_element?(node_name)
node_name.in?(safe_tags)
end

def scrub_attributes(node)
node.attribute_nodes.each do |attr_node|
if safe_attributes.include?(attr_node.name)
next
else
attr_node.remove
end
end
end
end
end
end
42 changes: 42 additions & 0 deletions spec/libraries/alchemy/scrubbers/safe_list_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# frozen_string_literal: true

require "rails_helper"
require "alchemy/scrubbers/safe_list"

RSpec.describe Alchemy::Scrubbers::SafeList do
let(:config) { {} }
let(:scrubber) { described_class.new(config) }
subject { Loofah.html5_fragment(html).scrub!(scrubber).to_html }

describe "#scrub" do
context "with a tag that is not allowed" do
let(:html) { "<script> console.log('oops') </script>" }

it { is_expected.to eq("") }
end

context "with an allowed tag" do
let(:html) { "<p>Some text</p>" }

it { is_expected.to eq(html) }
end

context "with an allowed attribute" do
let(:html) { "<p class=\"pretty\">Some text</p>" }

it { is_expected.to eq(html) }
end

context "with a disallowed attribute" do
let(:html) { "<p style='color: red;'>Some text</p>" }

it { is_expected.to eq("<p>Some text</p>") }
end

context "with a link with a space in the href" do
let(:html) { "<a href=\"/hello/ \">Hello!</a>" }

it { is_expected.to eq(html) }
end
end
end
14 changes: 13 additions & 1 deletion spec/models/alchemy/ingredients/richtext_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@
end
let(:richtext_settings) { {} }

let(:value) { "<h1 style=\"color: red;\">Hello!</h1><p class=\"green\">Welcome to Peters Petshop.</p>" }

let(:richtext_ingredient) do
described_class.new(
element: element,
type: described_class.name,
role: "text",
value: "<h1 style=\"color: red;\">Hello!</h1><p class=\"green\">Welcome to Peters Petshop.</p>"
value: value
)
end

Expand All @@ -33,6 +35,16 @@
expect(richtext_ingredient.sanitized_body).to eq("<h1>Hello!</h1><p class=\"green\">Welcome to Peters Petshop.</p>")
end

context "sanitizing with spaces in a link" do
let(:value) { "<a href=\"/hello/ \">Hello!</a><p class=\"green\">Welcome to Peters Petshop.</p>" }

it "won't HTML escape spaces in links" do
richtext_ingredient.save

expect(richtext_ingredient.sanitized_body).to eq("<a href=\"/hello/ \">Hello!</a><p class=\"green\">Welcome to Peters Petshop.</p>")
end
end

describe "#custom_tinymce_config" do
subject { richtext_ingredient.custom_tinymce_config }

Expand Down

0 comments on commit b59edc6

Please sign in to comment.