From dcfbbcafad5f0e5181566c71e3c7e8100f466abf Mon Sep 17 00:00:00 2001 From: Dru Sellers Date: Thu, 22 Nov 2018 12:35:35 -0600 Subject: [PATCH] Add support for user configured mapping and settings --- README.md | 43 +++++++++++++++++++++----- lib/searchyll/configuration.rb | 55 +++++++++++++++++++++++++++++++--- lib/searchyll/indexer.rb | 25 +++++++++------- 3 files changed, 100 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index b2fbb3d..a80a420 100644 --- a/README.md +++ b/README.md @@ -22,15 +22,42 @@ gems: ## Configuration -``` +```yaml elasticsearch: - url: "http://localhost:9200/" # Required. Supports auth and SSL: https://user:pass@someurl.com - # Can also read URLs stored in environment variable named - # BONSAI_URL and ELASTICSEARCH_URL. - number_of_shards: 1 # Optional. Default is 1 primary shard. - number_of_replicas: 1 # Optional. Default is 1 replica. - index_name: "jekyll" # Optional. Default is "jekyll". - default_type: "post" # Optional. Default type is "post". + url: "http://localhost:9200/" # Required. Supports auth and SSL: https://user:pass@someurl.com + # Can also read URLs stored in environment variable named + # BONSAI_URL and ELASTICSEARCH_URL. + number_of_shards: 1 # Optional. Default is 1 primary shard. + number_of_replicas: 1 # Optional. Default is 1 replica. + index_name: "jekyll" # Optional. Default is "jekyll". + default_type: "post" # Optional. Default type is "post". + custom_settings: _es_settings.yml # Optional. No default. Relative to your src folder + custom_mappings: _es_mappings.yml # Optional. No default. Relative to your src folder +``` + +### Custom Settings File Example + +It should be written to be plugged into the `settings` slot of a [create index](https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-create-index.html) call + +```yaml +analysis: + analyzer: + stop_analyzer: + type: stop + stopwords: _english_ +index: + number_of_shards: 1 + number_of_replicas: 0 +``` + +### Custom Mappings File Example + +It should be written to be plugged into the `mappings.[type]` slot of a [create index](https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-create-index.html) call + +```yaml +properties: + field1: + type: text ``` ## Development diff --git a/lib/searchyll/configuration.rb b/lib/searchyll/configuration.rb index 9cb7738..504a03f 100644 --- a/lib/searchyll/configuration.rb +++ b/lib/searchyll/configuration.rb @@ -18,12 +18,13 @@ def valid? def reasons reasons = [] - if elasticsearch_url && elasticsearch_url.empty? + + if elasticsearch_url.nil? || elasticsearch_url.empty? reasons << 'No Elasticsearch url configured' reasons << ' Looked in ENV[BONSAI_URL]' reasons << ' Looked in ENV[ELASTICSEARCH_URL]' reasons << ' Looked in _config.elasticsearch.url' - elsif ! elasticsearch_url.start_with? 'http' + elsif elasticsearch_url && ! elasticsearch_url.start_with?('http') reasons << "Elasticsearch url must start with 'http' or 'https'" reasons << " Current Value: #{elasticsearch_url}" reasons << " Current Source: #{elasticsearch_url_source}" @@ -46,12 +47,16 @@ def elasticsearch_url_source # Getter for the number of primary shards def elasticsearch_number_of_shards - site.config['elasticsearch']['number_of_shards'] || 1 + settings = elasticsearch_settings + + settings['index']['number_of_shards'] end # Getter for the number of replicas def elasticsearch_number_of_replicas - site.config['elasticsearch']['number_of_replicas'] || 1 + settings = elasticsearch_settings + + settings['index']['number_of_replicas'] end # Getter for the index name @@ -63,5 +68,47 @@ def elasticsearch_index_base_name def elasticsearch_default_type site.config['elasticsearch']['default_type'] || 'post' end + + # Getter for es mapping + def elasticsearch_mapping_path + site.config['elasticsearch']['custom_mappings'] + end + + # Getter for es settings + def elasticsearch_settings_path + site.config['elasticsearch']['custom_settings'] + end + + def elasticsearch_mapping + read_yaml(elasticsearch_mapping_path, nil) + end + + def elasticsearch_settings + shards = site.config['elasticsearch']['number_of_shards'] || 1 + replicas = site.config['elasticsearch']['number_of_replicas'] || 1 + read_yaml(elasticsearch_settings_path, { + 'index' => { + 'number_of_shards' => shards, + 'number_of_replicas' => replicas, + 'refresh_interval' => -1 + } + }) + end + + def read_yaml(path, default) + if path + joined_path = File.join(@site.source, path) + expanded_path = File.expand_path(joined_path) + if File.exist?(expanded_path) + content = File.read(expanded_path) + # SafeYAML comes with Jekyll + SafeYAML.load(content) + else + default + end + else + default + end + end end end diff --git a/lib/searchyll/indexer.rb b/lib/searchyll/indexer.rb index c0e1647..f1bc0cc 100644 --- a/lib/searchyll/indexer.rb +++ b/lib/searchyll/indexer.rb @@ -111,20 +111,23 @@ def http_start # Prepare our indexing run by creating a new index. def prepare_index - create_index = http_put("/#{elasticsearch_index_name}") - create_index.body = { - index: { - number_of_shards: configuration.elasticsearch_number_of_shards, - number_of_replicas: 0, - refresh_interval: -1 - } - }.to_json # TODO: index settings + create_index_request = http_put("/#{elasticsearch_index_name}") + payload = { + settings: configuration.elasticsearch_settings, + } + + if configuration.elasticsearch_mapping + payload['mappings'] = {} + payload['mappings'].store(configuration.elasticsearch_default_type, configuration.elasticsearch_mapping) + end + + json_payload = payload.to_json + + create_index_request.body = json_payload http_start do |http| - http.request(create_index) + http.request(create_index_request) end - - # TODO: mapping? end def http_put(path)