Skip to content

Commit

Permalink
Test the fast check whether we're running on aws
Browse files Browse the repository at this point in the history
- Test is skipped as it's hard to make it working alongside other tests
- Adds a new method to the library tool to inspect env variables
- Adds a runtime config to override imds endpoint for tests
  • Loading branch information
IvoDD committed Mar 15, 2024
1 parent af35596 commit 0ffa236
Show file tree
Hide file tree
Showing 7 changed files with 47 additions and 4 deletions.
6 changes: 4 additions & 2 deletions cpp/arcticdb/storage/s3/ec2_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,14 @@ bool has_connection_to_ec2_imds(){
}
CURLcode res;

// We allow overriding the default 169.254.169.254 endpoint for tests.
auto imds_endpoint = ConfigsMap::instance()->get_string("EC2.TestIMDSEndpointOverride", "http://169.254.169.254");
// Suggested approach by aws docs for IMDSv2 (https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instance-identity-documents.html):
// curl -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600"
// The below libcurl options should mimic the command above.
curl_slist *headers = nullptr;
headers = curl_slist_append(headers, "X-aws-ec2-metadata-token-ttl-seconds: 21600");
curl_easy_setopt(curl, CURLOPT_URL, "http://169.254.169.254/latest/api/token");
curl_easy_setopt(curl, CURLOPT_URL, fmt::format("{}/latest/api/token", imds_endpoint).c_str());
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "PUT");
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback);
Expand All @@ -49,7 +51,7 @@ bool has_connection_to_ec2_imds(){
// If attempting to connect via IMDSv2 fails we want to attempt a connection to IMDSv1:
// curl http://169.254.169.254/latest/dynamic/instance-identity/document
curl_easy_reset(curl);
curl_easy_setopt(curl, CURLOPT_URL, "http://169.254.169.254/latest/dynamic/instance-identity/document");
curl_easy_setopt(curl, CURLOPT_URL, fmt::format("{}/latest/dynamic/instance-identity/document", imds_endpoint).c_str());
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback);
curl_easy_setopt(curl, CURLOPT_TIMEOUT_MS, timeout);

Expand Down
2 changes: 1 addition & 1 deletion cpp/arcticdb/storage/s3/ec2_utils.hpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
namespace arcticdb::storage::s3 {
// Write some docs?
// A faster check than aws-sdk's attempt to connect with retries to ec2 imds
bool is_running_inside_aws_fast();
}
8 changes: 8 additions & 0 deletions cpp/arcticdb/toolbox/library_tool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <arcticdb/util/key_utils.hpp>
#include <arcticdb/util/variant.hpp>
#include <arcticdb/version/version_utils.hpp>
#include <cstdlib>

namespace arcticdb::toolbox::apy {

Expand Down Expand Up @@ -113,4 +114,11 @@ std::string LibraryTool::get_key_path(const VariantKey& key) {
return store_->key_path(key);
}

std::optional<std::string> LibraryTool::inspect_env_variable(std::string name){
auto value = getenv(name.c_str());
if (value == nullptr) return std::nullopt;
return std::string(value);
}


} // namespace arcticdb::toolbox::apy
2 changes: 2 additions & 0 deletions cpp/arcticdb/toolbox/library_tool.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ class LibraryTool {

void clear_ref_keys();

std::optional<std::string> inspect_env_variable(std::string name);

private:
// TODO: Remove the shared_ptr and just keep the store.
// The only reason we use a shared_ptr for the store is to be able to pass it to delete_all_keys_of_type.
Expand Down
3 changes: 2 additions & 1 deletion cpp/arcticdb/toolbox/python_bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ void register_bindings(py::module &m) {
[&](LibraryTool& lt, const VariantKey& key){
return adapt_read_df(lt.read(key));
},
"Read the most recent dataframe from the store");
"Read the most recent dataframe from the store")
.def("inspect_env_variable", &LibraryTool::inspect_env_variable);

// S3 Storage tool
using namespace arcticdb::storage::s3;
Expand Down
6 changes: 6 additions & 0 deletions python/arcticdb/storage_fixtures/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,12 @@ def __call__(self, environ, start_response):
path_info: bytes = environ.get("PATH_INFO", "")

with self.lock:
# Mock ec2 imds responses for testing
if path_info in ("/latest/dynamic/instance-identity/document", b"/latest/dynamic/instance-identity/document"):
start_response("200 OK", [("Content-Type", "text/plain")])
return [b"Something to prove imds is reachable"]

# Allow setting up a rate limit
if path_info in ("/rate_limit", b"/rate_limit"):
length = int(environ["CONTENT_LENGTH"])
body = environ["wsgi.input"].read(length).decode("ascii")
Expand Down
24 changes: 24 additions & 0 deletions python/tests/integration/arcticdb/test_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

from arcticdb import Arctic
from arcticdb_ext.exceptions import StorageException
from arcticdb_ext import set_config_string
from arcticdb.storage_fixtures.s3 import MotoS3StorageFixtureFactory


def test_s3_storage_failures(mock_s3_store_with_error_simulation):
Expand All @@ -24,3 +26,25 @@ def test_s3_storage_failures(mock_s3_store_with_error_simulation):

with pytest.raises(StorageException, match="Unexpected error: S3Error#17"):
lib.read(symbol_fail_read)


# TODO: To make this test run alongside other tests we'll need to:
# - Figure out how to do AWS::InitAPI multiple times in the same process. Currently we use std::call_once to ensure we
# we do this exactly once.
# - Perform cleanup after tests (unset AWS_EC2_METADATA_DISABLED after done with each test, unset the runtime config
# "EC2.TestIMDSEndpointOverride" to make follow up tests work as expected
@pytest.mark.skip(reason="Test only works if not run along other tests.")
@pytest.mark.parametrize("run_on_aws", [True, False])
def test_s3_running_on_aws_fast_check(lib_name, s3_storage_factory, run_on_aws):
if run_on_aws:
# To mock running on aws we override the IMDS endpoint with moto's endpoint which will be reachable.
set_config_string("EC2.TestIMDSEndpointOverride", s3_storage_factory.endpoint)

lib = s3_storage_factory.create_fixture().create_version_store_factory(lib_name)()
lib_tool = lib.library_tool()
# We use the AWS_EC2_METADATA_DISABLED variable to verify we're disabling the EC2 Metadata check when outside of AWS
# For some reason os.getenv can't access environment variables from the cpp layer so we use lib_tool.inspect_env_variable
if run_on_aws:
assert lib_tool.inspect_env_variable("AWS_EC2_METADATA_DISABLED") == None
else:
assert lib_tool.inspect_env_variable("AWS_EC2_METADATA_DISABLED") == "true"

0 comments on commit 0ffa236

Please sign in to comment.