Skip to content

Commit

Permalink
Merge pull request #72 from mlibrary/remove_punctuation_from_browse_s…
Browse files Browse the repository at this point in the history
…tring

Clean up the user-submitted author browse string
  • Loading branch information
niquerio authored Dec 14, 2022
2 parents fd21365 + 94a897c commit f79fcb2
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 1 deletion.
3 changes: 2 additions & 1 deletion catalog-browse.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

require_relative "lib/catalog_solr_client"
require_relative "lib/utilities/browse_solr_client"
require_relative "lib/utilities/string_cleaner"
require_relative "lib/models/browse_list"
require_relative "lib/models/browse_list_presenter"
require_relative "lib/models/callnumber_list"
Expand All @@ -21,7 +22,7 @@

if ENV.fetch("AUTHOR_ON") == "true"
get "/author" do
author = params[:query]
author = StringCleaner.cleanup_author_browse_string(params[:query])
reference_id = params[:reference_id] || author
begin
list = AuthorList.for(direction: params[:direction], reference_id: reference_id, num_rows_to_display: 20, original_reference: author, banner_reference: params[:banner_reference])
Expand Down
43 changes: 43 additions & 0 deletions lib/utilities/string_cleaner.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
module StringCleaner
def self.prefixes
# are there more of these?
[
"academic_discipline",
"author",
"call_number_starts_with",
"contains",
"contributor",
"date",
"isbn",
"isn",
"issn",
"journal_title",
"keyword",
"pmid",
"publication_date",
"publisher",
"realuth",
"series",
"subject",
"title",
"title_starts_with"
]
end

def self.strip_symbols(str)
# str.gsub(/[\p{P}\p{Sm}\p{Sc}\p{So}^`]/, "")
str.gsub(/["']/, "")
end

# TODO: add bits to remove field prefix (e.g., 'author:') as defined in 00-catalog.yml
# this is where the author browse specific cleaning goes
def self.cleanup_author_browse_string(str)
prefixes.each do |x|
if str.match?(/^#{x}:/)
str.sub!(/^#{x}:/, "")
break
end
end
strip_symbols(str)
end
end
15 changes: 15 additions & 0 deletions spec/utilities/string_cleaner_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
describe StringCleaner do
context ".strip_symbols" do
it "removes certain punctuation" do
expect(described_class.strip_symbols('"\'')).to eq("")
end
end
context ".cleanup_author_browse_string" do
it "removes 'author:' prefix" do
expect(described_class.cleanup_author_browse_string('author:"Author Name"')).to eq("Author Name")
end
it "removes 'isn(' prefix" do
expect(described_class.cleanup_author_browse_string("isn:(123-456)")).to eq("(123-456)")
end
end
end

0 comments on commit f79fcb2

Please sign in to comment.