Skip to content

Commit

Permalink
Updated the ES 7.X transformation code to handle arbitrary type names (
Browse files Browse the repository at this point in the history
…opensearch-project#1014)

* Updated the ES 7.X transformation code to handle arbitrary type names

Signed-off-by: Chris Helma <[email protected]>

* Minor tweaks to transformer code

Signed-off-by: Chris Helma <[email protected]>

* Improved a comment

Signed-off-by: Chris Helma <[email protected]>

* Made Spotless happy

Signed-off-by: Chris Helma <[email protected]>

* Updates per PR comments and discussion

Signed-off-by: Chris Helma <[email protected]>

---------

Signed-off-by: Chris Helma <[email protected]>
  • Loading branch information
chelma authored Sep 25, 2024
1 parent 50a3156 commit eb83fa0
Show file tree
Hide file tree
Showing 19 changed files with 230 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ services:
ports:
- '19200:9200'
volumes:
- snapshotStorage:/snapshots
- ./snapshots:/snapshots

# Sample command to kick off RFS here: https://github.com/opensearch-project/opensearch-migrations/blob/main/RFS/README.md#using-docker
reindex-from-snapshot:
Expand All @@ -31,7 +31,7 @@ services:
- AWS_SECRET_ACCESS_KEY=${secret_key}
- AWS_SESSION_TOKEN=${session_token}
volumes:
- snapshotStorage:/snapshots
- ./snapshots:/snapshots

opensearchtarget:
image: 'opensearchproject/opensearch:2.11.1'
Expand All @@ -46,7 +46,3 @@ services:
networks:
migrations:
driver: bridge

volumes:
snapshotStorage:
driver: local
Original file line number Diff line number Diff line change
Expand Up @@ -60,35 +60,66 @@ public static ObjectNode convertFlatSettingsToTree(ObjectNode flatSettings) {
}

/**
* If the object has mappings, then we need to ensure they aren't burried underneath an intermediate levels.
* This can show up a number of ways:
* - [{"_doc":{"properties":{"address":{"type":"text"}}}}]
* - [{"doc":{"properties":{"address":{"type":"text"}}}}]
* - [{"audit_message":{"properties":{"address":{"type":"text"}}}}]
* If the object has mappings, then we need to ensure they aren't burried underneath intermediate levels.
*/
public static void removeIntermediateMappingsLevels(ObjectNode root) {
if (root.has(MAPPINGS_KEY_STR)) {
var val = root.get(MAPPINGS_KEY_STR);
/**
* This probably came from a Snapshot
* There should only be a single member in the list because multi-type mappings were deprecated in ES 6.X and
* removed in ES 7.X. This list structure appears to be a holdover from previous versions of Elasticsearch.
* The exact name of the type can be arbitrarily chosen by the user; the default is _doc. We need to extract
* the mappings from beneath this intermediate key regardless of what it is named.
* - [{"_doc":{"properties":{"address":{"type":"text"}}}}]
* - [{"doc":{"properties":{"address":{"type":"text"}}}}]
* - [{"audit_message":{"properties":{"address":{"type":"text"}}}}]
*
* It's not impossible that the intermediate key is not present, in which case we should just extract the mappings:
* - [{"properties":{"address":{"type":"text"}}}]
*/
if (val instanceof ArrayNode) {
ArrayNode mappingsList = (ArrayNode) val;
root.set(MAPPINGS_KEY_STR, getMappingsFromBeneathIntermediate(mappingsList));
if (mappingsList.size() != 1) {
throw new IllegalArgumentException("Mappings list contains more than one member; this is unexpected: " + val.toString());
}
ObjectNode actualMappingsRoot = (ObjectNode) mappingsList.get(0);

root.set(MAPPINGS_KEY_STR, getMappingsFromBeneathIntermediate(actualMappingsRoot));
}

/**
* This came from somewhere else (like a REST call to the source cluster). It should be in a shape like:
* - {"_doc":{"properties":{"address":{"type":"text"}}}}
* - {"properties":{"address":{"type":"text"}}}
*/
else if (val instanceof ObjectNode) {
root.set(MAPPINGS_KEY_STR, getMappingsFromBeneathIntermediate((ObjectNode) val));
}

else {
throw new IllegalArgumentException("Mappings object is not in the expected shape: " + val.toString());
}
}
}

// Extract the mappings from their single-member list, will start like:
// [{"_doc":{"properties":{"address":{"type":"text"}}}}]
public static ObjectNode getMappingsFromBeneathIntermediate(ArrayNode mappingsRoot) {
ObjectNode actualMappingsRoot = (ObjectNode) mappingsRoot.get(0);
if (actualMappingsRoot.has("_doc")) {
return (ObjectNode) actualMappingsRoot.get("_doc").deepCopy();
} else if (actualMappingsRoot.has("doc")) {
return (ObjectNode) actualMappingsRoot.get("doc").deepCopy();
} else if (actualMappingsRoot.has("audit_message")) {
return (ObjectNode) actualMappingsRoot.get("audit_message").deepCopy();
} else {
throw new IllegalArgumentException("Mappings root does not contain one of the expected keys");
/**
* Extract the mappings from the type dict. It may be that there is no intermediate type key as well. So, the
* input could be:
* {"_doc":{"properties":{"address":{"type":"text"}}}}
* {"properties":{"address":{"type":"text"}}}
*
* If there is a type key ('_doc', etc), the key name can be arbitrary. We need to extract the mappings from beneath
* it regardless of what it is named.
*/
public static ObjectNode getMappingsFromBeneathIntermediate(ObjectNode mappingsRoot) {
if (mappingsRoot.has("properties")) {
return mappingsRoot;
} else if (!mappingsRoot.has("properties")) {
return (ObjectNode) mappingsRoot.get(mappingsRoot.fieldNames().next()).deepCopy();
}

throw new IllegalArgumentException("Mappings object is not in the expected shape: " + mappingsRoot.toString());
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ public static class Snapshot {
public static final Snapshot SNAPSHOT_ES_5_6;
public static final Snapshot SNAPSHOT_ES_6_8;
public static final Snapshot SNAPSHOT_ES_6_8_MERGED;
public static final Snapshot SNAPSHOT_ES_7_10_BWC_CHECK;
public static final Snapshot SNAPSHOT_ES_7_10_W_SOFT;
public static final Snapshot SNAPSHOT_ES_7_10_WO_SOFT;

Expand All @@ -36,6 +37,11 @@ public static class Snapshot {
"rfs_snapshot"
);

SNAPSHOT_ES_7_10_BWC_CHECK = new Snapshot(
rfsBaseDir.resolve(Paths.get("test-resources", "snapshots", "ES_7_10_BWC_Check")),
"rfs-snapshot"
);

SNAPSHOT_ES_7_10_W_SOFT = new Snapshot(
rfsBaseDir.resolve(Paths.get("test-resources", "snapshots", "ES_7_10_Updates_Deletes_w_Soft")),
"rfs_snapshot"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package org.opensearch.migrations.bulkload.transformers;


import org.junit.jupiter.api.Test;

import org.opensearch.migrations.Version;
import org.opensearch.migrations.bulkload.common.FileSystemRepo;
import org.opensearch.migrations.bulkload.common.TestResources;
import org.opensearch.migrations.bulkload.models.GlobalMetadata;
import org.opensearch.migrations.bulkload.models.IndexMetadata;
import org.opensearch.migrations.bulkload.version_os_2_11.GlobalMetadataData_OS_2_11;
import org.opensearch.migrations.bulkload.version_os_2_11.IndexMetadataData_OS_2_11;
import org.opensearch.migrations.cluster.ClusterProviderRegistry;

import lombok.extern.slf4j.Slf4j;

import static org.junit.jupiter.api.Assertions.assertEquals;

@Slf4j
public class Transformer_ES_7_10_OS_2_11Test {
@Test
public void transformGlobalMetadata_AsExpected() throws Exception {
TestResources.Snapshot snapshot = TestResources.SNAPSHOT_ES_7_10_BWC_CHECK;
Version version = Version.fromString("ES 7.10");

final var repo = new FileSystemRepo(snapshot.dir);
var sourceResourceProvider = ClusterProviderRegistry.getSnapshotReader(version, repo);

Transformer_ES_7_10_OS_2_11 transformer = new Transformer_ES_7_10_OS_2_11(2);

GlobalMetadata globalMetadata = sourceResourceProvider.getGlobalMetadata().fromRepo(snapshot.name);
GlobalMetadata transformedGlobalMetadata = transformer.transformGlobalMetadata(globalMetadata);
GlobalMetadataData_OS_2_11 finalMetadata = new GlobalMetadataData_OS_2_11(transformedGlobalMetadata.toObjectNode());

String expectedBwcTemplates = "{\"bwc_template\":{\"order\":0,\"index_patterns\":[\"bwc_index*\"],\"settings\":{\"number_of_shards\":\"1\",\"number_of_replicas\":\"0\"},\"mappings\":[{\"arbitrary_type\":{\"properties\":{\"title\":{\"type\":\"text\"},\"content\":{\"type\":\"text\"}}}}],\"aliases\":{\"bwc_alias\":{}}}}";
String expectedIndexTemplates = "{\"fwc_template\":{\"index_patterns\":[\"fwc_index*\"],\"template\":{\"aliases\":{\"fwc_alias\":{}}},\"composed_of\":[\"fwc_mappings\",\"fwc_settings\"]}}";
String expectedComponentTemplates = "{\"fwc_settings\":{\"template\":{\"settings\":{\"index\":{\"number_of_shards\":\"1\",\"number_of_replicas\":\"0\"}}}},\"fwc_mappings\":{\"template\":{\"mappings\":{\"properties\":{\"title\":{\"type\":\"text\"},\"content\":{\"type\":\"text\"}}}}}}";

assertEquals(expectedBwcTemplates, finalMetadata.getTemplates().toString());
assertEquals(expectedIndexTemplates, finalMetadata.getIndexTemplates().toString());
assertEquals(expectedComponentTemplates, finalMetadata.getComponentTemplates().toString());
}

@Test
public void transformIndexMetadata_AsExpected() throws Exception {
TestResources.Snapshot snapshot = TestResources.SNAPSHOT_ES_7_10_BWC_CHECK;
Version version = Version.fromString("ES 7.10");

final var repo = new FileSystemRepo(snapshot.dir);
var sourceResourceProvider = ClusterProviderRegistry.getSnapshotReader(version, repo);

Transformer_ES_7_10_OS_2_11 transformer = new Transformer_ES_7_10_OS_2_11(2);

IndexMetadata indexMetadataBwc = sourceResourceProvider.getIndexMetadata().fromRepo(snapshot.name, "bwc_index_1");
IndexMetadata transformedIndexBwc = transformer.transformIndexMetadata(indexMetadataBwc);
IndexMetadataData_OS_2_11 finalIndexBwc =new IndexMetadataData_OS_2_11(transformedIndexBwc.getRawJson(), transformedIndexBwc.getId(), transformedIndexBwc.getName());

IndexMetadata indexMetadataFwc = sourceResourceProvider.getIndexMetadata().fromRepo(snapshot.name, "fwc_index_1");
IndexMetadata transformedIndexFwc = transformer.transformIndexMetadata(indexMetadataFwc);
IndexMetadataData_OS_2_11 finalIndexFwc =new IndexMetadataData_OS_2_11(transformedIndexFwc.getRawJson(), transformedIndexFwc.getId(), transformedIndexFwc.getName());

String expectedIndexBwc = "{\"version\":3,\"mapping_version\":1,\"settings_version\":1,\"aliases_version\":1,\"routing_num_shards\":1024,\"state\":\"open\",\"settings\":{\"creation_date\":\"1727285787498\",\"number_of_replicas\":1,\"number_of_shards\":\"1\",\"provided_name\":\"bwc_index_1\",\"uuid\":\"P4PDS4fFSECIprHxpKSoiQ\",\"version\":{\"created\":\"7100299\"}},\"mappings\":{\"properties\":{\"content\":{\"type\":\"text\"},\"title\":{\"type\":\"text\"}}},\"aliases\":{\"bwc_alias\":{}},\"primary_terms\":[1],\"in_sync_allocations\":{\"0\":[\"0M-gMXkNQFC02lnWJN4BVQ\"]},\"rollover_info\":{},\"system\":false}";
String expectedIndexFwc = "{\"version\":3,\"mapping_version\":1,\"settings_version\":1,\"aliases_version\":1,\"routing_num_shards\":1024,\"state\":\"open\",\"settings\":{\"creation_date\":\"1727285787822\",\"number_of_replicas\":1,\"number_of_shards\":\"1\",\"provided_name\":\"fwc_index_1\",\"uuid\":\"riC1gLlrR2eh_uAh9Zdpiw\",\"version\":{\"created\":\"7100299\"}},\"mappings\":{\"properties\":{\"content\":{\"type\":\"text\"},\"title\":{\"type\":\"text\"}}},\"aliases\":{\"fwc_alias\":{}},\"primary_terms\":[1],\"in_sync_allocations\":{\"0\":[\"8sKrVCEaSxy4yP3cx8kcYQ\"]},\"rollover_info\":{},\"system\":false}";

assertEquals(expectedIndexBwc, finalIndexBwc.getRawJson().toString());
assertEquals(expectedIndexFwc, finalIndexFwc.getRawJson().toString());
}
}
104 changes: 104 additions & 0 deletions RFS/test-resources/inventory.md
Original file line number Diff line number Diff line change
Expand Up @@ -342,4 +342,108 @@ curl -X PUT "localhost:19200/test_updates_deletes" -H "Content-Type: application
}
}
}'
```

#### ES_7_10_BWC_Check
An Elastic 7.10 snapshot repo designed to exercise backwards compatibility across a couple different features. It contains two indices; one is created using pre-ES 7.8 templates and pre-ES 7.0 type declarations, and the other is created using forward-compatible index/component templates and no type declarations. Both indices contain a single document.

```
curl -X PUT "localhost:19200/_template/bwc_template?include_type_name=true" -H "Content-Type: application/json" -d '
{
"index_patterns": ["bwc_index*"],
"settings": {
"index": {
"number_of_shards": 1,
"number_of_replicas": 0
}
},
"mappings": {
"arbitrary_type": {
"properties": {
"title": {
"type": "text"
},
"content": {
"type": "text"
}
}
}
},
"aliases": {
"bwc_alias": {}
}
}'
curl -X PUT "localhost:19200/bwc_index_1" -H "Content-Type: application/json"
curl -X PUT "localhost:19200/bwc_alias/_doc/bwc_doc" -H "Content-Type: application/json" -d '
{
"title": "This is a doc in a backwards compatible index",
"content": "Four score and seven years ago"
}'
curl -X PUT "localhost:19200/_component_template/fwc_mappings" -H "Content-Type: application/json" -d '
{
"template": {
"mappings": {
"properties": {
"title": {
"type": "text"
},
"content": {
"type": "text"
}
}
}
}
}'
curl -X PUT "localhost:19200/_component_template/fwc_settings" -H "Content-Type: application/json" -d '
{
"template": {
"settings": {
"index": {
"number_of_shards": 1,
"number_of_replicas": 0
}
}
}
}'
curl -X PUT "localhost:19200/_index_template/fwc_template" -H "Content-Type: application/json" -d '
{
"index_patterns": ["fwc_index*"],
"composed_of": [
"fwc_mappings",
"fwc_settings"
],
"template": {
"aliases": {
"fwc_alias": {}
}
}
}'
curl -X PUT "localhost:19200/fwc_index_1" -H "Content-Type: application/json"
curl -X PUT "localhost:19200/fwc_alias/_doc/bwc_doc" -H "Content-Type: application/json" -d '
{
"title": "This is a doc in a forward compatible index",
"content": "Life, the Universe, and Everything"
}'
curl -X PUT "localhost:19200/_snapshot/test_repository" -H "Content-Type: application/json" -d '{
"type": "fs",
"settings": {
"location": "/snapshots",
"compress": false
}
}'
curl -X PUT "localhost:19200/_snapshot/test_repository/rfs-snapshot" -H "Content-Type: application/json" -d '{
"indices": "bwc_index_1,fwc_index_1",
"ignore_unavailable": true,
"include_global_state": true
}'
```
1 change: 1 addition & 0 deletions RFS/test-resources/snapshots/ES_7_10_BWC_Check/index-0
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"snapshots":[{"name":"rfs-snapshot","uuid":"rNzQqxl5Qrm99ZzB44Yz0w","state":1,"index_metadata_lookup":{"rtO3hIcQREynKwRVke75sQ":"P4PDS4fFSECIprHxpKSoiQ-_na_-1-1-1","6f5_qLyfQu-MKqe5IEJGuQ":"riC1gLlrR2eh_uAh9Zdpiw-_na_-1-1-1"},"version":"7.10.2"}],"indices":{"bwc_index_1":{"id":"rtO3hIcQREynKwRVke75sQ","snapshots":["rNzQqxl5Qrm99ZzB44Yz0w"],"shard_generations":["OSYnb8w3SKmTqQ_p7m84JA"]},"fwc_index_1":{"id":"6f5_qLyfQu-MKqe5IEJGuQ","snapshots":["rNzQqxl5Qrm99ZzB44Yz0w"],"shard_generations":["73ZKrS4iTc6nKzY0E4kC1w"]}},"min_version":"7.9.0","index_metadata_identifiers":{"riC1gLlrR2eh_uAh9Zdpiw-_na_-1-1-1":"AzJBKpIBKBXpMe1tfd-g","P4PDS4fFSECIprHxpKSoiQ-_na_-1-1-1":"AjJBKpIBKBXpMe1tfd-g"}}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 comments on commit eb83fa0

Please sign in to comment.