From 436deed9d0b1283fe9241890b83160a9895d0503 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Fri, 21 Oct 2022 10:58:19 -0500 Subject: [PATCH 1/7] make sure settings hash is what elasticsearch expects --- lib/config/es_api_schemas/2.0.yml | 65 ++++++++++++++++--------------- 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/lib/config/es_api_schemas/2.0.yml b/lib/config/es_api_schemas/2.0.yml index f400161b2..d46c63ca0 100644 --- a/lib/config/es_api_schemas/2.0.yml +++ b/lib/config/es_api_schemas/2.0.yml @@ -1,37 +1,38 @@ # compatible with Apium v2.0 settings: - analysis: - char_filter: - escapes: - type: mapping - mappings: - - " => " - - " => " - - " => " - - " => " - - " => " - - " => " - - "- => " - - "& => " - - ": => " - - "; => " - - ", => " - - ". => " - - "$ => " - - "@ => " - - "~ => " - - "\" => " - - "' => " - - "[ => " - - "] => " - normalizer: - keyword_normalized: - type: custom - char_filter: - - escapes - filter: - - asciifolding - - lowercase + settings: + analysis: + char_filter: + escapes: + type: mapping + mappings: + - " => " + - " => " + - " => " + - " => " + - " => " + - " => " + - "- => " + - "& => " + - ": => " + - "; => " + - ", => " + - ". => " + - "$ => " + - "@ => " + - "~ => " + - "\" => " + - "' => " + - "[ => " + - "] => " + normalizer: + keyword_normalized: + type: custom + char_filter: + - escapes + filter: + - asciifolding + - lowercase mappings: properties: identifier: From 8a35aa24e0b8ae7042bd18c0c19b4ba7c8059bb6 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Fri, 21 Oct 2022 11:16:25 -0500 Subject: [PATCH 2/7] change where mappings are posted for es upgrade --- lib/datura/elasticsearch/index.rb | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/datura/elasticsearch/index.rb b/lib/datura/elasticsearch/index.rb index cb8da9b84..b90c4d0a0 100644 --- a/lib/datura/elasticsearch/index.rb +++ b/lib/datura/elasticsearch/index.rb @@ -21,7 +21,7 @@ def initialize(options = nil, schema_mapping: false) @index_url = File.join(@options["es_path"], @options["es_index"]) @pretty_url = "#{@index_url}?pretty=true" - @mapping_url = File.join(@index_url, "_mapping", "_doc?pretty=true") + @mapping_url = File.join(@index_url, "_mapping?pretty=true") # yaml settings (if exist) and mappings @requested_schema = YAML.load_file(@options["es_schema"]) @@ -33,7 +33,6 @@ def initialize(options = nil, schema_mapping: false) def create json = @requested_schema["settings"].to_json puts "Creating ES index for API version #{@options["api_version"]}: #{@pretty_url}" - if json && json != "null" RestClient.put(@pretty_url, json, { content_type: :json }) { |res, req, result| if result.code == "200" @@ -77,13 +76,13 @@ def get_schema_mapping # if mapping has not already been set, get the schema and manipulate if !defined?(@schema_mapping) @schema_mapping = { - "dyanmic" => nil, # /regex|regex/ + "dynamic" => nil, # /regex|regex/ "fields" => [], # [ fields ] "nested" => {} # { field: [ nested_fields ] } } schema = get_schema[@options["es_index"]] - doc = schema["mappings"]["_doc"] + doc = schema["mappings"] doc["properties"].each do |field, value| @schema_mapping["fields"] << field if value["type"] == "nested" From 2c7fa5c5a02d6b2e31310ae189a2090547f6e342 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Tue, 25 Oct 2022 12:41:36 -0500 Subject: [PATCH 3/7] add headers to ES requests for authorization --- lib/datura/elasticsearch/alias.rb | 6 +++--- lib/datura/elasticsearch/data.rb | 4 ++-- lib/datura/elasticsearch/index.rb | 18 +++++++++++------- lib/datura/file_type.rb | 5 ++++- 4 files changed, 20 insertions(+), 13 deletions(-) diff --git a/lib/datura/elasticsearch/alias.rb b/lib/datura/elasticsearch/alias.rb index 177ee14d1..4d6a3a118 100644 --- a/lib/datura/elasticsearch/alias.rb +++ b/lib/datura/elasticsearch/alias.rb @@ -20,7 +20,7 @@ def self.add { add: { alias: ali, index: idx } } ] } - RestClient.post(base_url, data.to_json, { content_type: :json }) { |res, req, result| + RestClient.post(base_url, data.to_json, @auth_header.merge({ content_type: :json })) { |res, req, result| if result.code == "200" puts res puts "Successfully added alias #{ali}. Current alias list:" @@ -40,7 +40,7 @@ def self.delete url = File.join(options["es_path"], idx, "_alias", ali) - res = JSON.parse(RestClient.delete(url)) + res = JSON.parse(RestClient.delete(url, @auth_header)) puts JSON.pretty_generate(res) list end @@ -48,7 +48,7 @@ def self.delete def self.list options = Datura::Options.new({}).all - res = RestClient.get(File.join(options["es_path"], "_aliases")) + res = RestClient.get(File.join(options["es_path"], "_aliases"), ) JSON.pretty_generate(JSON.parse(res)) end diff --git a/lib/datura/elasticsearch/data.rb b/lib/datura/elasticsearch/data.rb index 5deedadb1..4af171fce 100644 --- a/lib/datura/elasticsearch/data.rb +++ b/lib/datura/elasticsearch/data.rb @@ -47,7 +47,7 @@ def self.clear_all(options) if confirm == "Yes I'm sure" url = File.join(options["es_path"], options["es_index"], "_doc", "_delete_by_query?pretty=true") json = { "query" => { "match_all" => {} } } - RestClient.post(url, json.to_json, { content_type: :json }) { |res, req, result| + RestClient.post(url, json.to_json, @auth_header.merge({ content_type: :json })) { |res, req, result| if result.code == "200" puts res else @@ -66,7 +66,7 @@ def self.clear_index(options) if confirmation data = self.build_clear_data(options) - RestClient.post(url, data.to_json, { content_type: :json }) { |res, req, result| + RestClient.post(url, data.to_json, @auth_header.merge({ content_type: :json })) { |res, req, result| if result.code == "200" puts res else diff --git a/lib/datura/elasticsearch/index.rb b/lib/datura/elasticsearch/index.rb index b90c4d0a0..337f5cf04 100644 --- a/lib/datura/elasticsearch/index.rb +++ b/lib/datura/elasticsearch/index.rb @@ -1,6 +1,7 @@ require "json" require "rest-client" require "yaml" +require "base64" require_relative "./../elasticsearch.rb" @@ -25,6 +26,7 @@ def initialize(options = nil, schema_mapping: false) # yaml settings (if exist) and mappings @requested_schema = YAML.load_file(@options["es_schema"]) + @auth_header = Datura::Helpers.construct_auth_header(@options) # if requested, grab the mapping currently associated with this index # otherwise wait until after the requested schema is loaded get_schema_mapping if schema_mapping @@ -34,7 +36,7 @@ def create json = @requested_schema["settings"].to_json puts "Creating ES index for API version #{@options["api_version"]}: #{@pretty_url}" if json && json != "null" - RestClient.put(@pretty_url, json, { content_type: :json }) { |res, req, result| + RestClient.put(@pretty_url, json, @auth_header.merge({ content_type: :json })) { |res, req, result| if result.code == "200" puts res else @@ -42,7 +44,7 @@ def create end } else - RestClient.put(@pretty_url, nil) { |res, req, result| + RestClient.put(@pretty_url, nil, @auth_header) { |res, req, result| if result.code == "200" puts res else @@ -55,7 +57,7 @@ def create def delete puts "Deleting #{@options["es_index"]} via url #{@pretty_url}" - RestClient.delete(@pretty_url) { |res, req, result| + RestClient.delete(@pretty_url, @auth_header) { |res, req, result| if result.code != "200" raise "#{result.code} error deleting Elasticsearch index: #{res}" end @@ -63,7 +65,7 @@ def delete end def get_schema - RestClient.get(@mapping_url) { |res, req, result| + RestClient.get(@mapping_url, @auth_header) { |res, req, result| if result.code == "200" JSON.parse(res) else @@ -110,7 +112,7 @@ def set_schema json = @requested_schema["mappings"].to_json puts "Setting schema: #{@mapping_url}" - RestClient.put(@mapping_url, json, { content_type: :json }) { |res, req, result| + RestClient.put(@mapping_url, json, @auth_header.merge({ content_type: :json })) { |res, req, result| if result.code == "200" puts res else @@ -206,8 +208,9 @@ def self.clear_all(options) confirm = STDIN.gets.chomp if confirm == "Yes I'm sure" url = File.join(options["es_path"], options["es_index"], "_doc", "_delete_by_query?pretty=true") + auth_header = Datura::Helpers.construct_auth_header(options) json = { "query" => { "match_all" => {} } } - RestClient.post(url, json.to_json, { content_type: :json }) { |res, req, result| + RestClient.post(url, json.to_json, auth_header.merge({ content_type: :json })) { |res, req, result| if result.code == "200" puts res else @@ -226,7 +229,8 @@ def self.clear_index(options) if confirmation data = self.build_clear_data(options) - RestClient.post(url, data.to_json, { content_type: :json }) { |res, req, result| + auth_header = Datura::Helpers.construct_auth_header(options) + RestClient.post(url, data.to_json, auth_header.merge({content_type: :json })) { |res, req, result| if result.code == "200" puts res else diff --git a/lib/datura/file_type.rb b/lib/datura/file_type.rb index d57a5b62a..aeb0f6f46 100644 --- a/lib/datura/file_type.rb +++ b/lib/datura/file_type.rb @@ -30,6 +30,7 @@ def initialize(location, options) @out_html = File.join(output, "html") @out_iiif = File.join(output, "iiif") @out_solr = File.join(output, "solr") + @auth_header = Datura::Helpers.construct_auth_header(options) Datura::Helpers.make_dirs(@out_es, @out_html, @out_iiif, @out_solr) # script locations set in child classes end @@ -68,7 +69,9 @@ def post_es(es) # NOTE: If you need to do partial updates rather than replacement of doc # you will need to add _update at the end of this URL begin - RestClient.put("#{es.index_url}/_doc/#{id}", doc.to_json, {:content_type => :json } ) + puts @auth_header + byebug + RestClient.put("#{es.index_url}/_doc/#{id}", doc.to_json, @auth_header.merge({:content_type => :json }) ) rescue => e error = "Error transforming or posting to ES for #{self.filename(false)}: #{e}" end From 25f0a2bc1e140cfb64980c181d36866d5e10f0a8 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Tue, 25 Oct 2022 12:42:20 -0500 Subject: [PATCH 4/7] add method to construct basic auth header from options --- lib/datura/helpers.rb | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/datura/helpers.rb b/lib/datura/helpers.rb index 831d148c3..6e64557e2 100644 --- a/lib/datura/helpers.rb +++ b/lib/datura/helpers.rb @@ -171,4 +171,10 @@ def self.should_update?(file, since_date=nil) end end + def self.construct_auth_header(options) + username = options["es_user"] + password = options["es_password"] + { "Authorization" => "Basic #{Base64::encode64("#{username}:#{password}")}" } + end + end From 0ec26656597648c9f2061c4bda03a9e947d71521 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Tue, 25 Oct 2022 14:52:59 -0500 Subject: [PATCH 5/7] remove debugging code --- lib/datura/file_type.rb | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/datura/file_type.rb b/lib/datura/file_type.rb index aeb0f6f46..e17114837 100644 --- a/lib/datura/file_type.rb +++ b/lib/datura/file_type.rb @@ -69,8 +69,6 @@ def post_es(es) # NOTE: If you need to do partial updates rather than replacement of doc # you will need to add _update at the end of this URL begin - puts @auth_header - byebug RestClient.put("#{es.index_url}/_doc/#{id}", doc.to_json, @auth_header.merge({:content_type => :json }) ) rescue => e error = "Error transforming or posting to ES for #{self.filename(false)}: #{e}" From 7b90a0954fca67286217033497b298335c515972 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Tue, 25 Oct 2022 14:56:24 -0500 Subject: [PATCH 6/7] update conditional logic for status code, dynamic_templates key --- lib/datura/elasticsearch/index.rb | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/lib/datura/elasticsearch/index.rb b/lib/datura/elasticsearch/index.rb index 337f5cf04..664d5c690 100644 --- a/lib/datura/elasticsearch/index.rb +++ b/lib/datura/elasticsearch/index.rb @@ -93,12 +93,14 @@ def get_schema_mapping end regex_pieces = [] - doc["dynamic_templates"].each do |template| - mapping = template.map { |k,v| v["match"] }.first - # dynamic fields are listed like *_k and will need - # to be converted to ^.*_k$, then combined into a mega-regex - es_match = mapping.sub("*", ".*") - regex_pieces << es_match + if doc["dynamic_templates"] + doc["dynamic_templates"].each do |template| + mapping = template.map { |k,v| v["match"] }.first + # dynamic fields are listed like *_k and will need + # to be converted to ^.*_k$, then combined into a mega-regex + es_match = mapping.sub("*", ".*") + regex_pieces << es_match + end end if !regex_pieces.empty? regex_joined = regex_pieces.join("|") @@ -231,7 +233,7 @@ def self.clear_index(options) data = self.build_clear_data(options) auth_header = Datura::Helpers.construct_auth_header(options) RestClient.post(url, data.to_json, auth_header.merge({content_type: :json })) { |res, req, result| - if result.code == "200" + if result.code == "200" || result.code == "201" puts res else raise "#{result.code} error when clearing index: #{res}" From f982173a634cc26bf4202bfb639902d8433739b3 Mon Sep 17 00:00:00 2001 From: William Dewey Date: Thu, 26 Jan 2023 14:24:01 -0600 Subject: [PATCH 7/7] change endpoint for delete_by_query for ES8 compatibility --- lib/datura/elasticsearch/index.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/datura/elasticsearch/index.rb b/lib/datura/elasticsearch/index.rb index 664d5c690..71582e7ac 100644 --- a/lib/datura/elasticsearch/index.rb +++ b/lib/datura/elasticsearch/index.rb @@ -209,7 +209,7 @@ def self.clear_all(options) puts "Type: 'Yes I'm sure'" confirm = STDIN.gets.chomp if confirm == "Yes I'm sure" - url = File.join(options["es_path"], options["es_index"], "_doc", "_delete_by_query?pretty=true") + url = File.join(options["es_path"], options["es_index"], "_delete_by_query?pretty=true") auth_header = Datura::Helpers.construct_auth_header(options) json = { "query" => { "match_all" => {} } } RestClient.post(url, json.to_json, auth_header.merge({ content_type: :json })) { |res, req, result| @@ -226,7 +226,7 @@ def self.clear_all(options) end def self.clear_index(options) - url = File.join(options["es_path"], options["es_index"], "_doc", "_delete_by_query?pretty=true") + url = File.join(options["es_path"], options["es_index"], "_delete_by_query?pretty=true") confirmation = self.confirm_clear(options, url) if confirmation