diff --git a/catalog-browse.rb b/catalog-browse.rb index db4df9c..cc4eb21 100644 --- a/catalog-browse.rb +++ b/catalog-browse.rb @@ -6,6 +6,7 @@ require_relative "lib/catalog_solr_client" require_relative "lib/utilities/browse_solr_client" +require_relative "lib/utilities/string_cleaner" require_relative "lib/models/browse_list" require_relative "lib/models/browse_list_presenter" require_relative "lib/models/callnumber_list" @@ -21,7 +22,7 @@ if ENV.fetch("AUTHOR_ON") == "true" get "/author" do - author = params[:query] + author = StringCleaner.cleanup_author_browse_string(params[:query]) reference_id = params[:reference_id] || author begin list = AuthorList.for(direction: params[:direction], reference_id: reference_id, num_rows_to_display: 20, original_reference: author, banner_reference: params[:banner_reference]) diff --git a/lib/utilities/string_cleaner.rb b/lib/utilities/string_cleaner.rb new file mode 100644 index 0000000..ff1679e --- /dev/null +++ b/lib/utilities/string_cleaner.rb @@ -0,0 +1,43 @@ +module StringCleaner + def self.prefixes + # are there more of these? + [ + "academic_discipline", + "author", + "call_number_starts_with", + "contains", + "contributor", + "date", + "isbn", + "isn", + "issn", + "journal_title", + "keyword", + "pmid", + "publication_date", + "publisher", + "realuth", + "series", + "subject", + "title", + "title_starts_with" + ] + end + + def self.strip_symbols(str) + # str.gsub(/[\p{P}\p{Sm}\p{Sc}\p{So}^`]/, "") + str.gsub(/["']/, "") + end + + # TODO: add bits to remove field prefix (e.g., 'author:') as defined in 00-catalog.yml + # this is where the author browse specific cleaning goes + def self.cleanup_author_browse_string(str) + prefixes.each do |x| + if str.match?(/^#{x}:/) + str.sub!(/^#{x}:/, "") + break + end + end + strip_symbols(str) + end +end diff --git a/spec/utilities/string_cleaner_spec.rb b/spec/utilities/string_cleaner_spec.rb new file mode 100644 index 0000000..e4a240f --- /dev/null +++ b/spec/utilities/string_cleaner_spec.rb @@ -0,0 +1,15 @@ +describe StringCleaner do + context ".strip_symbols" do + it "removes certain punctuation" do + expect(described_class.strip_symbols('"\'')).to eq("") + end + end + context ".cleanup_author_browse_string" do + it "removes 'author:' prefix" do + expect(described_class.cleanup_author_browse_string('author:"Author Name"')).to eq("Author Name") + end + it "removes 'isn(' prefix" do + expect(described_class.cleanup_author_browse_string("isn:(123-456)")).to eq("(123-456)") + end + end +end