Skip to content

Commit

Permalink
-Set ca path and directory automatically
Browse files Browse the repository at this point in the history
Revoke removing assert

Remove useless ca cert path in non ssl enabled testing environment

Address PR comment

Better test

Address PR comments

Update docs/mkdocs/docs/api/arctic_uri.md

Co-authored-by: Alex Seaton <[email protected]>
  • Loading branch information
2 people authored and phoebusm committed May 29, 2024
1 parent 65cb44d commit 5dd6a64
Show file tree
Hide file tree
Showing 21 changed files with 226 additions and 271 deletions.
169 changes: 0 additions & 169 deletions cpp/CMake/AzureVcpkg.cmake

This file was deleted.

4 changes: 1 addition & 3 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,7 @@ include(PythonUtils) # Must be called before Pybind (third_party) to override it

add_subdirectory(third_party)
add_subdirectory(proto)
if(NOT ${ARCTICDB_USING_CONDA})
include(AzureVcpkg) #AzureVcpkg.cmake is from https://github.com/Azure/azure-sdk-for-cpp/blob/main/cmake-modules/AzureVcpkg.cmake, commit ada77b3
endif()


python_utils_dump_vars_if_enabled("After Pybind")
python_utils_check_include_dirs("accepted by pybind")
Expand Down
9 changes: 7 additions & 2 deletions cpp/arcticdb/storage/azure/azure_real_client.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,14 @@ container_client(BlobContainerClient::CreateFromConnectionString(conf.endpoint()

Azure::Storage::Blobs::BlobClientOptions RealAzureClient::get_client_options(const Config &conf) {
BlobClientOptions client_options;
if (!conf.ca_cert_path().empty()) {//WARNING: Setting ca_cert_path will force Azure sdk uses libcurl as backend support, instead of winhttp
if (!conf.ca_cert_path().empty() || !conf.ca_cert_dir().empty()) {//WARNING: Setting ca_cert_path or ca_cert_dir will force Azure sdk uses libcurl as backend support, instead of winhttp
Azure::Core::Http::CurlTransportOptions curl_transport_options;
curl_transport_options.CAInfo = conf.ca_cert_path();
if (!conf.ca_cert_path().empty()) {
curl_transport_options.CAInfo = conf.ca_cert_path();
}
if (!conf.ca_cert_dir().empty()) {
curl_transport_options.CAPath = conf.ca_cert_dir();
}
client_options.Transport.Transport = std::make_shared<Azure::Core::Http::CurlTransport>(curl_transport_options);
}
return client_options;
Expand Down
10 changes: 8 additions & 2 deletions cpp/arcticdb/storage/azure/azure_storage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -351,10 +351,16 @@ AzureStorage::AzureStorage(const LibraryPath &library_path, OpenMode mode, const
ARCTICDB_RUNTIME_DEBUG(log::storage(), "Using Real Azure storage");
azure_client_ = std::make_unique<RealAzureClient>(conf);
}
if (conf.ca_cert_path().empty())
if (conf.ca_cert_path().empty()) {
ARCTICDB_RUNTIME_DEBUG(log::storage(), "Using default CA cert path");
else
} else {
ARCTICDB_RUNTIME_DEBUG(log::storage(), "CA cert path: {}", conf.ca_cert_path());
}
if (conf.ca_cert_dir().empty()) {
ARCTICDB_RUNTIME_DEBUG(log::storage(), "Using default CA cert directory");
} else {
ARCTICDB_RUNTIME_DEBUG(log::storage(), "CA cert directory: {}", conf.ca_cert_dir());
}
ARCTICDB_RUNTIME_DEBUG(log::storage(), "Connecting to Azure Blob Storage: {} Container: {}", conf.endpoint(), conf.container_name());

if (!conf.prefix().empty()) {
Expand Down
2 changes: 0 additions & 2 deletions cpp/arcticdb/storage/azure/azure_storage.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,6 @@ class AzureStorage final : public Storage {
unsigned int request_timeout_;
Azure::Storage::Blobs::UploadBlockBlobFromOptions upload_option_;
Azure::Storage::Blobs::DownloadBlobToOptions download_option_;

Azure::Storage::Blobs::BlobClientOptions get_client_options(const Config &conf);
};

inline arcticdb::proto::storage::VariantStorage pack_config(const std::string &container_name) {
Expand Down
3 changes: 2 additions & 1 deletion cpp/arcticdb/storage/python_bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,8 @@ void register_bindings(py::module& storage) {
.def(py::init<>())
.def_property("container_name", &AzureOverride::container_name, &AzureOverride::set_container_name)
.def_property("endpoint", &AzureOverride::endpoint, &AzureOverride::set_endpoint)
.def_property("ca_cert_path", &AzureOverride::ca_cert_path, &AzureOverride::set_ca_cert_path);
.def_property("ca_cert_path", &AzureOverride::ca_cert_path, &AzureOverride::set_ca_cert_path)
.def_property("ca_cert_dir", &AzureOverride::ca_cert_dir, &AzureOverride::set_ca_cert_dir);

py::class_<LmdbOverride>(storage, "LmdbOverride")
.def(py::init<>())
Expand Down
10 changes: 10 additions & 0 deletions cpp/arcticdb/storage/storage_override.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ class AzureOverride {
std::string container_name_;
std::string endpoint_;
std::string ca_cert_path_;
std::string ca_cert_dir_;

public:
std::string container_name() const {
Expand All @@ -153,6 +154,14 @@ class AzureOverride {
ca_cert_path_ = ca_cert_path;
}

std::string ca_cert_dir() const {
return ca_cert_dir_;
}

void set_ca_cert_dir(std::string_view ca_cert_dir){
ca_cert_dir_ = ca_cert_dir;
}

void modify_storage_config(arcticdb::proto::storage::VariantStorage& storage) const {
if(storage.config().Is<arcticdb::proto::azure_storage::Config>()) {
arcticdb::proto::azure_storage::Config azure_storage;
Expand All @@ -161,6 +170,7 @@ class AzureOverride {
azure_storage.set_container_name(container_name_);
azure_storage.set_endpoint(endpoint_);
azure_storage.set_ca_cert_path(ca_cert_path_);
azure_storage.set_ca_cert_dir(ca_cert_dir_);

util::pack_to_any(azure_storage, *storage.mutable_config());
}
Expand Down
1 change: 1 addition & 0 deletions cpp/proto/arcticc/pb2/azure_storage.proto
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@ message Config {
string prefix = 5;
string ca_cert_path = 6;
bool use_mock_storage_for_testing = 7;
string ca_cert_dir = 8;
}
2 changes: 2 additions & 0 deletions cpp/vcpkg.json
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
"name": "arrow",
"default-features": false
},
"azure-core-cpp",
"azure-identity-cpp",
"azure-storage-blobs-cpp",
"rocksdb",
Expand All @@ -69,6 +70,7 @@
"overrides": [
{ "name": "openssl", "version-string": "3.3.0" },
{ "name": "aws-sdk-cpp", "version": "1.11.201" },
{ "name": "azure-core-cpp", "version": "1.11.2" },
{ "name": "aws-c-s3", "version": "0.3.24" },
{ "name": "bitmagic", "version": "7.12.3" },
{ "name": "boost-algorithm", "version": "1.80.0#1" },
Expand Down
13 changes: 7 additions & 6 deletions docs/mkdocs/docs/api/arctic_uri.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ specifies the region and `region` is not set.

The Azure URI connection scheme has the form `azure://[options]`.
It is based on the Azure Connection String, with additional options for configuring ArcticDB.
Please refer to https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string for more details.
Please refer to [Azure](https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string) for more details.

`options` is a string that specifies connection specific options as `<name>=<value>` pairs joined with `;` (the final key value pair should not include a trailing `;`).

Expand All @@ -41,17 +41,18 @@ Additional options specific for ArcticDB:
|---------------|---------------|
| Container | Azure container for blobs |
| Path_prefix | Path within Azure container to use for data storage |
| CA_cert_path | (Non-Windows platform only) Azure CA certificate path. If not set, default path will be used. Note: For Linux distribution, default path is set to `/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem`. If the certificate cannot be found in the provided path, an Azure exception with no meaningful error code will be thrown. For more details, please see [here](https://github.com/Azure/azure-sdk-for-cpp/issues/4738). For example, `Failed to iterate azure blobs 'C' 0:`.<br><br>Default certificate path in various Linux distributions:<br>`/etc/ssl/certs/ca-certificates.crt` for Debian/Ubuntu/Gentoo etc.<br>`/etc/pki/tls/certs/ca-bundle.crt` for Fedora/RHEL 6<br>`/etc/ssl/ca-bundle.pem` for OpenSUSE<br>`/etc/pki/tls/cacert.pem` for OpenELEC<br>`/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem` for CentOS/RHEL 7<br>`/etc/ssl/cert.pem` for Alpine Linux |
| CA_cert_path | (Linux platform only) Azure CA certificate path. If not set, python ``ssl.get_default_verify_paths().cafile`` will be used. If the certificate cannot be found in the provided path, an Azure exception with no meaningful error code will be thrown. For more details, please see [here](https://github.com/Azure/azure-sdk-for-cpp/issues/4738). For example, `Failed to iterate azure blobs 'C' 0:`.|
| CA_cert_dir | (Linux platform only) Azure CA certificate directory. If not set, python ``ssl.get_default_verify_paths().capath`` will be used. Certificates can only be used if corresponding hash files [exist](https://www.openssl.org/docs/man1.0.2/man3/SSL_CTX_load_verify_locations.html). If the certificate cannot be found in the provided path, an Azure exception with no meaningful error code will be thrown. For more details, please see [here](https://github.com/Azure/azure-sdk-for-cpp/issues/4738). For example, `Failed to iterate azure blobs 'C' 0:`.|

For Windows user, `CA_cert_path` cannot be set. Please set CA certificate related option on Windows setting.
For details, you may refer to https://learn.microsoft.com/en-us/skype-sdk/sdn/articles/installing-the-trusted-root-certificate
For non-Linux platforms, neither `CA_cert_path` nor `CA_cert_dir` may be set. Please set CA certificate related options using operating system settings.
For Windows, please see [here](https://learn.microsoft.com/en-us/skype-sdk/sdn/articles/installing-the-trusted-root-certificate)

Exception: Azure exceptions message always ends with `{AZURE_SDK_HTTP_STATUS_CODE}:{AZURE_SDK_REASON_PHRASE}`.

Please refer to https://github.com/Azure/azure-sdk-for-cpp/blob/24ed290815d8f9dbcd758a60fdc5b6b9205f74e0/sdk/core/azure-core/inc/azure/core/http/http_status_code.hpp for
Please refer to [azure-sdk-for-cpp](https://github.com/Azure/azure-sdk-for-cpp/blob/24ed290815d8f9dbcd758a60fdc5b6b9205f74e0/sdk/core/azure-core/inc/azure/core/http/http_status_code.hpp) for
more details of provided status codes.

Note that due to a bug in Azure C++ SDK (https://github.com/Azure/azure-sdk-for-cpp/issues/4738), Azure may not give meaningful status codes and
Note that due to a [bug](https://github.com/Azure/azure-sdk-for-cpp/issues/4738) in Azure C++ SDK, Azure may not give meaningful status codes and
reason phrases in the exception. To debug these instances, please set the environment variable `export AZURE_LOG_LEVEL` to `1` to turn on the SDK debug logging.


Expand Down
21 changes: 18 additions & 3 deletions python/arcticdb/adapters/azure_library_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import re
import time
from typing import Optional
import ssl
import platform

from arcticc.pb2.storage_pb2 import EnvironmentConfigsMap, LibraryConfig
Expand All @@ -26,7 +27,10 @@
@dataclass
class ParsedQuery:
Path_prefix: Optional[str] = None
CA_cert_path: str = ""
# winhttp is used as Azure backend support on Windows by default; winhttp itself maintains ca cert.
# The options should be left empty else libcurl will be used on Windows
CA_cert_path: Optional[str] = "" # CURLOPT_CAINFO in curl
CA_cert_dir: Optional[str] = "" # CURLOPT_CAPATH in curl
Container: Optional[str] = None


Expand Down Expand Up @@ -54,9 +58,16 @@ def __init__(self, uri: str, encoding_version: EncodingVersion, *args, **kwargs)
]
)
self._container = self._query_params.Container
if platform.system() == "Windows" and self._query_params.CA_cert_path:
raise ValueError(f"CA_cert_path cannot be set on Windows platform")
if platform.system() != "Linux" and (self._query_params.CA_cert_path or self._query_params.CA_cert_dir):
raise ValueError("You have provided `ca_cert_path` or `ca_cert_dir` in the URI which is only supported on Linux. " \
"Remove the setting in the connection URI and use your operating system defaults.")
self._ca_cert_path = self._query_params.CA_cert_path
self._ca_cert_dir = self._query_params.CA_cert_dir
if not self._ca_cert_path and not self._ca_cert_dir and platform.system() == "Linux":
if ssl.get_default_verify_paths().cafile is not None:
self._ca_cert_path = ssl.get_default_verify_paths().cafile
if ssl.get_default_verify_paths().capath is not None:
self._ca_cert_dir = ssl.get_default_verify_paths().capath
self._encoding_version = encoding_version

super().__init__(uri, self._encoding_version)
Expand All @@ -80,6 +91,7 @@ def config_library(self):
endpoint=self._endpoint,
with_prefix=with_prefix,
ca_cert_path=self._ca_cert_path,
ca_cert_dir=self._ca_cert_dir,
)

lib = NativeVersionStore.create_store_from_config(
Expand Down Expand Up @@ -111,6 +123,8 @@ def get_storage_override(self) -> AzureOverride:
azure_override.endpoint = self._endpoint
if self._ca_cert_path:
azure_override.ca_cert_path = self._ca_cert_path
if self._ca_cert_dir:
azure_override.ca_cert_dir = self._ca_cert_dir

storage_override = StorageOverride()
storage_override.set_azure_override(azure_override)
Expand Down Expand Up @@ -138,6 +152,7 @@ def add_library_to_env(self, env_cfg: EnvironmentConfigsMap, name: str):
endpoint=self._endpoint,
with_prefix=with_prefix,
ca_cert_path=self._ca_cert_path,
ca_cert_dir=self._ca_cert_dir,
)

@property
Expand Down
Loading

0 comments on commit 5dd6a64

Please sign in to comment.