Skip to content

Commit

Permalink
Merge branch 'GAC-odbc-driver' of github.com:Bit-Quill/arrow into GAC…
Browse files Browse the repository at this point in the history
…-odbc-driver
  • Loading branch information
affonsov committed Jan 2, 2024
2 parents 3d7217d + 59a015f commit 03f8c29
Show file tree
Hide file tree
Showing 163 changed files with 2,366 additions and 1,810 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ruby.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ jobs:
fail-fast: false
matrix:
ubuntu:
- 20.04
- 22.04
env:
UBUNTU: ${{ matrix.ubuntu }}
steps:
Expand Down
559 changes: 301 additions & 258 deletions cpp/src/arrow/filesystem/azurefs.cc

Large diffs are not rendered by default.

150 changes: 84 additions & 66 deletions cpp/src/arrow/filesystem/azurefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,90 +25,118 @@
#include "arrow/util/macros.h"
#include "arrow/util/uri.h"

namespace Azure {
namespace Core {
namespace Credentials {

namespace Azure::Core::Credentials {
class TokenCredential;
}

} // namespace Credentials
} // namespace Core
namespace Storage {

namespace Azure::Storage {
class StorageSharedKeyCredential;
}

} // namespace Storage
} // namespace Azure

namespace arrow {
namespace fs {

enum class AzureCredentialsKind : int8_t {
/// Anonymous access (no credentials used), public
Anonymous,
/// Use explicitly-provided access key pair
StorageCredentials,
/// Use ServicePrincipleCredentials
ServicePrincipleCredentials,
/// Use Sas Token to authenticate
Sas,
/// Use Connection String
ConnectionString
};
namespace Azure::Storage::Blobs {
class BlobServiceClient;
}

namespace Azure::Storage::Files::DataLake {
class DataLakeServiceClient;
}

enum class AzureBackend : bool {
/// Official Azure Remote Backend
Azure,
/// Local Simulated Storage
Azurite
namespace arrow::fs {

enum class AzureBackend {
/// \brief Official Azure Remote Backend
kAzure,
/// \brief Local Simulated Storage
kAzurite
};

/// Options for the AzureFileSystem implementation.
struct ARROW_EXPORT AzureOptions {
std::string account_dfs_url;
std::string account_blob_url;
AzureBackend backend = AzureBackend::Azure;
AzureCredentialsKind credentials_kind = AzureCredentialsKind::Anonymous;
/// \brief The backend to connect to: Azure or Azurite (for testing).
AzureBackend backend = AzureBackend::kAzure;

std::string sas_token;
std::string connection_string;
std::shared_ptr<Azure::Storage::StorageSharedKeyCredential>
storage_credentials_provider;
std::shared_ptr<Azure::Core::Credentials::TokenCredential>
service_principle_credentials_provider;
// TODO(GH-38598): Add support for more auth methods.
// std::string connection_string;
// std::string sas_token;

/// \brief Default metadata for OpenOutputStream.
///
/// This will be ignored if non-empty metadata is passed to OpenOutputStream.
std::shared_ptr<const KeyValueMetadata> default_metadata;

private:
std::string account_blob_url_;
std::string account_dfs_url_;

enum class CredentialKind {
kAnonymous,
kStorageSharedKeyCredential,
} credential_kind_ = CredentialKind::kAnonymous;

std::shared_ptr<Azure::Storage::StorageSharedKeyCredential>
storage_shared_key_credential_;

public:
AzureOptions();
~AzureOptions();

Status ConfigureAccountKeyCredentials(const std::string& account_name,
const std::string& account_key);
Status ConfigureAccountKeyCredential(const std::string& account_name,
const std::string& account_key);

bool Equals(const AzureOptions& other) const;

const std::string& AccountBlobUrl() const { return account_blob_url_; }
const std::string& AccountDfsUrl() const { return account_dfs_url_; }

Result<std::unique_ptr<Azure::Storage::Blobs::BlobServiceClient>>
MakeBlobServiceClient() const;

Result<std::unique_ptr<Azure::Storage::Files::DataLake::DataLakeServiceClient>>
MakeDataLakeServiceClient() const;
};

/// \brief Azure-backed FileSystem implementation for ABFS and ADLS.
/// \brief FileSystem implementation backed by Azure Blob Storage (ABS) [1] and
/// Azure Data Lake Storage Gen2 (ADLS Gen2) [2].
///
/// ADLS Gen2 isn't a dedicated service or account type. It's a set of capabilities that
/// support high throughput analytic workloads, built on Azure Blob Storage. All the data
/// ingested via the ADLS Gen2 APIs is persisted as blobs in the storage account.
/// ADLS Gen2 provides filesystem semantics, file-level security, and Hadoop
/// compatibility. ADLS Gen1 exists as a separate object that will retired on 2024-02-29
/// and new ADLS accounts use Gen2 instead.
///
/// ABFS (Azure Blob Storage - https://azure.microsoft.com/en-us/products/storage/blobs/)
/// object-based cloud storage system.
/// ADLS Gen2 and Blob APIs can operate on the same data, but there are
/// some limitations [3]. The ones that are relevant to this
/// implementation are listed here:
///
/// ADLS (Azure Data Lake Storage -
/// https://azure.microsoft.com/en-us/products/storage/data-lake-storage/)
/// is a scalable data storage system designed for big-data applications.
/// ADLS provides filesystem semantics, file-level security, and Hadoop
/// compatibility. Gen1 exists as a separate object that will retired
/// on Feb 29, 2024. New ADLS accounts will use Gen2 instead, which is
/// implemented on top of ABFS.
/// - You can't use Blob APIs, and ADLS APIs to write to the same instance of a file. If
/// you write to a file by using ADLS APIs then that file's blocks won't be visible
/// to calls to the GetBlockList Blob API. The only exception is when you're
/// overwriting.
/// - When you use the ListBlobs operation without specifying a delimiter, the results
/// include both directories and blobs. If you choose to use a delimiter, use only a
/// forward slash (/) -- the only supported delimiter.
/// - If you use the DeleteBlob API to delete a directory, that directory is deleted only
/// if it's empty. This means that you can't use the Blob API delete directories
/// recursively.
///
/// TODO: GH-18014 Complete the internal implementation
/// and review the documentation
/// [1]: https://azure.microsoft.com/en-us/products/storage/blobs
/// [2]: https://azure.microsoft.com/en-us/products/storage/data-lake-storage
/// [3]:
/// https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-known-issues
class ARROW_EXPORT AzureFileSystem : public FileSystem {
private:
class Impl;
std::unique_ptr<Impl> impl_;

explicit AzureFileSystem(std::unique_ptr<Impl>&& impl);

public:
~AzureFileSystem() override = default;

static Result<std::shared_ptr<AzureFileSystem>> Make(
const AzureOptions& options, const io::IOContext& = io::default_io_context());

std::string type_name() const override { return "abfs"; }

/// Return the original Azure options when constructing the filesystem
Expand Down Expand Up @@ -152,16 +180,6 @@ class ARROW_EXPORT AzureFileSystem : public FileSystem {
Result<std::shared_ptr<io::OutputStream>> OpenAppendStream(
const std::string& path,
const std::shared_ptr<const KeyValueMetadata>& metadata = {}) override;

static Result<std::shared_ptr<AzureFileSystem>> Make(
const AzureOptions& options, const io::IOContext& = io::default_io_context());

private:
AzureFileSystem(const AzureOptions& options, const io::IOContext& io_context);

class Impl;
std::unique_ptr<Impl> impl_;
};

} // namespace fs
} // namespace arrow
} // namespace arrow::fs
6 changes: 6 additions & 0 deletions cpp/src/arrow/filesystem/azurefs_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,17 @@

namespace arrow::fs::internal {

namespace {

// TODO(GH-38772): Remove azurefs_internal.h/.cc by moving the detector to
// azurefs.cc (which contains a private copy of this helper function already).
Status ExceptionToStatus(const std::string& prefix,
const Azure::Storage::StorageException& exception) {
return Status::IOError(prefix, " Azure Error: ", exception.what());
}

} // namespace

Status HierarchicalNamespaceDetector::Init(
Azure::Storage::Files::DataLake::DataLakeServiceClient* datalake_service_client) {
datalake_service_client_ = datalake_service_client;
Expand Down
3 changes: 0 additions & 3 deletions cpp/src/arrow/filesystem/azurefs_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,6 @@

namespace arrow::fs::internal {

Status ExceptionToStatus(const std::string& prefix,
const Azure::Storage::StorageException& exception);

class HierarchicalNamespaceDetector {
public:
Status Init(
Expand Down
Loading

0 comments on commit 03f8c29

Please sign in to comment.