Skip to content

Commit

Permalink
-Set ca path and directory automatically
Browse files Browse the repository at this point in the history
  • Loading branch information
Phoebus Mak authored and phoebusm committed Apr 30, 2024
1 parent dfcba71 commit 657d396
Show file tree
Hide file tree
Showing 20 changed files with 177 additions and 261 deletions.
169 changes: 0 additions & 169 deletions cpp/CMake/AzureVcpkg.cmake

This file was deleted.

4 changes: 1 addition & 3 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,7 @@ include(PythonUtils) # Must be called before Pybind (third_party) to override it

add_subdirectory(third_party)
add_subdirectory(proto)
if(NOT ${ARCTICDB_USING_CONDA})
include(AzureVcpkg) #AzureVcpkg.cmake is from https://github.com/Azure/azure-sdk-for-cpp/blob/main/cmake-modules/AzureVcpkg.cmake, commit ada77b3
endif()


python_utils_dump_vars_if_enabled("After Pybind")
python_utils_check_include_dirs("accepted by pybind")
Expand Down
9 changes: 7 additions & 2 deletions cpp/arcticdb/storage/azure/azure_real_client.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,14 @@ container_client(BlobContainerClient::CreateFromConnectionString(conf.endpoint()

Azure::Storage::Blobs::BlobClientOptions RealAzureClient::get_client_options(const Config &conf) {
BlobClientOptions client_options;
if (!conf.ca_cert_path().empty()) {//WARNING: Setting ca_cert_path will force Azure sdk uses libcurl as backend support, instead of winhttp
if (!conf.ca_cert_path().empty() || !conf.ca_cert_dir().empty()) {//WARNING: Setting ca_cert_path or ca_cert_dir will force Azure sdk uses libcurl as backend support, instead of winhttp
Azure::Core::Http::CurlTransportOptions curl_transport_options;
curl_transport_options.CAInfo = conf.ca_cert_path();
if (!conf.ca_cert_path().empty()) {
curl_transport_options.CAInfo = conf.ca_cert_path();
}
if (!conf.ca_cert_dir().empty()) {
curl_transport_options.CAPath = conf.ca_cert_dir();
}
client_options.Transport.Transport = std::make_shared<Azure::Core::Http::CurlTransport>(curl_transport_options);
}
return client_options;
Expand Down
15 changes: 15 additions & 0 deletions cpp/arcticdb/storage/azure/azure_storage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,10 @@ AzureStorage::AzureStorage(const LibraryPath &library_path, OpenMode mode, const
ARCTICDB_RUNTIME_DEBUG(log::storage(), "Using default CA cert path");
else
ARCTICDB_RUNTIME_DEBUG(log::storage(), "CA cert path: {}", conf.ca_cert_path());
if (conf.ca_cert_dir().empty())
ARCTICDB_RUNTIME_DEBUG(log::storage(), "Using default CA cert directory");
else
ARCTICDB_RUNTIME_DEBUG(log::storage(), "CA cert directory: {}", conf.ca_cert_dir());
ARCTICDB_RUNTIME_DEBUG(log::storage(), "Connecting to Azure Blob Storage: {} Container: {}", conf.endpoint(), conf.container_name());

if (!conf.prefix().empty()) {
Expand All @@ -363,4 +367,15 @@ AzureStorage::AzureStorage(const LibraryPath &library_path, OpenMode mode, const
download_option_.TransferOptions.Concurrency = max_connections;
}

Azure::Storage::Blobs::BlobClientOptions AzureStorage::get_client_options(const Config &conf) {
BlobClientOptions client_options;
if (!conf.ca_cert_path().empty() || !conf.ca_cert_dir().empty()) {//WARNING: Setting ca_cert_path will force Azure sdk uses libcurl as backend support, instead of winhttp
Azure::Core::Http::CurlTransportOptions curl_transport_options;
curl_transport_options.CAInfo = conf.ca_cert_path();
curl_transport_options.CAPath = conf.ca_cert_dir();
client_options.Transport.Transport = std::make_shared<Azure::Core::Http::CurlTransport>(curl_transport_options);
}
return client_options;
}

} // namespace arcticdb::storage::azure
3 changes: 2 additions & 1 deletion cpp/arcticdb/storage/python_bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,8 @@ void register_bindings(py::module& storage) {
.def(py::init<>())
.def_property("container_name", &AzureOverride::container_name, &AzureOverride::set_container_name)
.def_property("endpoint", &AzureOverride::endpoint, &AzureOverride::set_endpoint)
.def_property("ca_cert_path", &AzureOverride::ca_cert_path, &AzureOverride::set_ca_cert_path);
.def_property("ca_cert_path", &AzureOverride::ca_cert_path, &AzureOverride::set_ca_cert_path)
.def_property("ca_cert_dir", &AzureOverride::ca_cert_dir, &AzureOverride::set_ca_cert_dir);

py::class_<LmdbOverride>(storage, "LmdbOverride")
.def(py::init<>())
Expand Down
10 changes: 10 additions & 0 deletions cpp/arcticdb/storage/storage_override.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ class AzureOverride {
std::string container_name_;
std::string endpoint_;
std::string ca_cert_path_;
std::string ca_cert_dir_;

public:
std::string container_name() const {
Expand All @@ -153,6 +154,14 @@ class AzureOverride {
ca_cert_path_ = ca_cert_path;
}

std::string ca_cert_dir() const {
return ca_cert_dir_;
}

void set_ca_cert_dir(std::string_view ca_cert_dir){
ca_cert_dir_ = ca_cert_dir;
}

void modify_storage_config(arcticdb::proto::storage::VariantStorage& storage) const {
if(storage.config().Is<arcticdb::proto::azure_storage::Config>()) {
arcticdb::proto::azure_storage::Config azure_storage;
Expand All @@ -161,6 +170,7 @@ class AzureOverride {
azure_storage.set_container_name(container_name_);
azure_storage.set_endpoint(endpoint_);
azure_storage.set_ca_cert_path(ca_cert_path_);
azure_storage.set_ca_cert_dir(ca_cert_dir_);

util::pack_to_any(azure_storage, *storage.mutable_config());
}
Expand Down
1 change: 1 addition & 0 deletions cpp/proto/arcticc/pb2/azure_storage.proto
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@ message Config {
string prefix = 5;
string ca_cert_path = 6;
bool use_mock_storage_for_testing = 7;
string ca_cert_dir = 8;
}
2 changes: 2 additions & 0 deletions cpp/vcpkg.json
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
"name": "arrow",
"default-features": false
},
"azure-core-cpp",
"azure-identity-cpp",
"azure-storage-blobs-cpp",
"rocksdb",
Expand All @@ -69,6 +70,7 @@
"overrides": [
{ "name": "openssl", "version-string": "1.1.1n#1" },
{ "name": "aws-sdk-cpp", "version": "1.11.201" },
{ "name": "azure-core-cpp", "version": "1.11.2" },
{ "name": "aws-c-s3", "version": "0.3.24" },
{ "name": "bitmagic", "version": "7.12.3" },
{ "name": "boost-algorithm", "version": "1.80.0#1" },
Expand Down
3 changes: 2 additions & 1 deletion docs/mkdocs/docs/api/arctic_uri.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,9 @@ Additional options specific for ArcticDB:
| Container | Azure container for blobs |
| Path_prefix | Path within Azure container to use for data storage |
| CA_cert_path | (Non-Windows platform only) Azure CA certificate path. If not set, default path will be used. Note: For Linux distribution, default path is set to `/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem`. If the certificate cannot be found in the provided path, an Azure exception with no meaningful error code will be thrown. For more details, please see [here](https://github.com/Azure/azure-sdk-for-cpp/issues/4738). For example, `Failed to iterate azure blobs 'C' 0:`.<br><br>Default certificate path in various Linux distributions:<br>`/etc/ssl/certs/ca-certificates.crt` for Debian/Ubuntu/Gentoo etc.<br>`/etc/pki/tls/certs/ca-bundle.crt` for Fedora/RHEL 6<br>`/etc/ssl/ca-bundle.pem` for OpenSUSE<br>`/etc/pki/tls/cacert.pem` for OpenELEC<br>`/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem` for CentOS/RHEL 7<br>`/etc/ssl/cert.pem` for Alpine Linux |
| CA_cert_dir | (Non-Windows platform only) Azure CA certificate directory. It sets option ``CURLOPT_CAPATH`` in Azure SDK's libcurl backend. If not set, python ``ssl.get_default_verify_paths().capath`` will be used. Certificates can only be used if corresponding hash files exist (https://www.openssl.org/docs/man1.0.2/man3/SSL_CTX_load_verify_locations.html) |

For Windows user, `CA_cert_path` cannot be set. Please set CA certificate related option on Windows setting.
For Windows user, `CA_cert_path` AND `CA_cert_dir` cannot be set. Please set CA certificate related option on Windows setting.
For details, you may refer to https://learn.microsoft.com/en-us/skype-sdk/sdn/articles/installing-the-trusted-root-certificate

Exception: Azure exceptions message always ends with `{AZURE_SDK_HTTP_STATUS_CODE}:{AZURE_SDK_REASON_PHRASE}`.
Expand Down
2 changes: 1 addition & 1 deletion environment_unix.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ dependencies:
- prometheus-cpp
- libprotobuf < 4
- openssl
- libcurl==8.5.0 #Until https://github.com/conda-forge/curl-feedstock/issues/135 addressed and release build available
- libcurl
- bitmagic
- spdlog
- azure-core-cpp
Expand Down
21 changes: 18 additions & 3 deletions python/arcticdb/adapters/azure_library_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import re
import time
from typing import Optional
import ssl
import platform

from arcticc.pb2.storage_pb2 import EnvironmentConfigsMap, LibraryConfig
Expand All @@ -26,7 +27,10 @@
@dataclass
class ParsedQuery:
Path_prefix: Optional[str] = None
CA_cert_path: str = ""
# winhttp is used as Azure backend support on Winodws by default; winhttp itself mainatains ca cert.
# The options should be left empty else libcurl will be used on Windows
CA_cert_path: Optional[str] = "" # CURLOPT_CAINFO in curl
CA_cert_dir: Optional[str] = "" # CURLOPT_CAPATH in curl
Container: Optional[str] = None


Expand Down Expand Up @@ -54,9 +58,16 @@ def __init__(self, uri: str, encoding_version: EncodingVersion, *args, **kwargs)
]
)
self._container = self._query_params.Container
if platform.system() == "Windows" and self._query_params.CA_cert_path:
raise ValueError(f"CA_cert_path cannot be set on Windows platform")
if platform.system() != "Linux" and (self._query_params.CA_cert_path or self._query_params.CA_cert_dir):
raise ValueError("You have provided `ca_cert_path` or `ca_cert_dir` in the URI which is only supported on Linux. " \
"Remove the setting in the connection URI and use your operating system defaults.")
self._ca_cert_path = self._query_params.CA_cert_path
self._ca_cert_dir = self._query_params.CA_cert_dir
if not self._ca_cert_path and not self._ca_cert_dir and platform.system() == "Linux":
if ssl.get_default_verify_paths().cafile is not None:
self._ca_cert_path = ssl.get_default_verify_paths().cafile
if ssl.get_default_verify_paths().capath is not None:
self._ca_cert_dir = ssl.get_default_verify_paths().capath
self._encoding_version = encoding_version

super().__init__(uri, self._encoding_version)
Expand All @@ -80,6 +91,7 @@ def config_library(self):
endpoint=self._endpoint,
with_prefix=with_prefix,
ca_cert_path=self._ca_cert_path,
ca_cert_dir=self._ca_cert_dir,
)

lib = NativeVersionStore.create_store_from_config(
Expand Down Expand Up @@ -111,6 +123,8 @@ def get_storage_override(self) -> AzureOverride:
azure_override.endpoint = self._endpoint
if self._ca_cert_path:
azure_override.ca_cert_path = self._ca_cert_path
if self._ca_cert_dir:
azure_override.ca_cert_dir = self._ca_cert_dir

storage_override = StorageOverride()
storage_override.set_azure_override(azure_override)
Expand Down Expand Up @@ -138,6 +152,7 @@ def add_library_to_env(self, env_cfg: EnvironmentConfigsMap, name: str):
endpoint=self._endpoint,
with_prefix=with_prefix,
ca_cert_path=self._ca_cert_path,
ca_cert_dir=self._ca_cert_dir,
)

@property
Expand Down
2 changes: 1 addition & 1 deletion python/arcticdb/storage_fixtures/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class ArcticUriFields(Enum):
PASSWORD = "PASSWORD"
BUCKET = "BUCKET"
CA_PATH = "CA_PATH"
SSL = "SSL"

def __str__(self):
return self.value
Expand Down Expand Up @@ -149,7 +150,6 @@ def replace_uri_field(cls, uri: str, field: ArcticUriFields, replacement: str, s
"""start & end are these regex groups: 1=field name, 2=field value, 3=optional field separator"""
regex = cls._FIELD_REGEX[field]
match = regex.search(uri)
assert match, f"{uri} does not have {field}"
return f"{uri[:match.start(start)]}{replacement}{uri[match.end(end):]}"


Expand Down
Loading

0 comments on commit 657d396

Please sign in to comment.