Skip to content

Commit

Permalink
feat(connector): introduce azblob sink (#18244) (#18254)
Browse files Browse the repository at this point in the history
Co-authored-by: congyi wang <[email protected]>
  • Loading branch information
github-actions[bot] and wcy-fdu authored Aug 27, 2024
1 parent 1182ece commit 97d1a56
Show file tree
Hide file tree
Showing 7 changed files with 164 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/connector/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ nexmark = { version = "0.2", features = ["serde"] }
num-bigint = "0.4"
opendal = { workspace = true, features = [
"executors-tokio",
"services-azblob",
"services-fs",
"services-gcs",
"services-memory",
Expand Down
131 changes: 131 additions & 0 deletions src/connector/src/sink/file_sink/azblob.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
// Copyright 2024 RisingWave Labs
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::{BTreeMap, HashMap};

use anyhow::anyhow;
use opendal::layers::{LoggingLayer, RetryLayer};
use opendal::services::Azblob;
use opendal::Operator;
use serde::Deserialize;
use serde_with::serde_as;
use with_options::WithOptions;

use super::opendal_sink::FileSink;
use crate::sink::file_sink::opendal_sink::OpendalSinkBackend;
use crate::sink::{Result, SinkError, SINK_TYPE_APPEND_ONLY, SINK_TYPE_OPTION, SINK_TYPE_UPSERT};
use crate::source::UnknownFields;
#[derive(Deserialize, Debug, Clone, WithOptions)]
pub struct AzblobCommon {
#[serde(rename = "azblob.container_name")]
pub container_name: String,
/// The directory where the sink file is located.
#[serde(rename = "azblob.path")]
pub path: String,
#[serde(rename = "azblob.credentials.account_name", default)]
pub account_name: Option<String>,
#[serde(rename = "azblob.credentials.account_key", default)]
pub account_key: Option<String>,
#[serde(rename = "azblob.endpoint_url")]
pub endpoint_url: String,
}

#[serde_as]
#[derive(Clone, Debug, Deserialize, WithOptions)]
pub struct AzblobConfig {
#[serde(flatten)]
pub common: AzblobCommon,

pub r#type: String, // accept "append-only"

#[serde(flatten)]
pub unknown_fields: HashMap<String, String>,
}

pub const AZBLOB_SINK: &str = "azblob";

impl<S: OpendalSinkBackend> FileSink<S> {
pub fn new_azblob_sink(config: AzblobConfig) -> Result<Operator> {
// Create azblob builder.
let mut builder = Azblob::default();
builder.container(&config.common.container_name);

builder.endpoint(&config.common.endpoint_url);

if let Some(account_name) = config.common.account_name {
builder.account_name(&account_name);
} else {
tracing::warn!(
"account_name azblob is not set, container {}",
config.common.container_name
);
}

if let Some(account_key) = config.common.account_key {
builder.account_key(&account_key);
} else {
tracing::warn!(
"account_key azblob is not set, container {}",
config.common.container_name
);
}
let operator: Operator = Operator::new(builder)?
.layer(LoggingLayer::default())
.layer(RetryLayer::default())
.finish();

Ok(operator)
}
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct AzblobSink;

impl UnknownFields for AzblobConfig {
fn unknown_fields(&self) -> HashMap<String, String> {
self.unknown_fields.clone()
}
}

impl OpendalSinkBackend for AzblobSink {
type Properties = AzblobConfig;

const SINK_NAME: &'static str = AZBLOB_SINK;

fn from_btreemap(btree_map: BTreeMap<String, String>) -> Result<Self::Properties> {
let config =
serde_json::from_value::<AzblobConfig>(serde_json::to_value(btree_map).unwrap())
.map_err(|e| SinkError::Config(anyhow!(e)))?;
if config.r#type != SINK_TYPE_APPEND_ONLY && config.r#type != SINK_TYPE_UPSERT {
return Err(SinkError::Config(anyhow!(
"`{}` must be {}, or {}",
SINK_TYPE_OPTION,
SINK_TYPE_APPEND_ONLY,
SINK_TYPE_UPSERT
)));
}
Ok(config)
}

fn new_operator(properties: AzblobConfig) -> Result<Operator> {
FileSink::<AzblobSink>::new_azblob_sink(properties)
}

fn get_path(properties: Self::Properties) -> String {
properties.common.path
}

fn get_engine_type() -> super::opendal_sink::EngineType {
super::opendal_sink::EngineType::Azblob
}
}
1 change: 1 addition & 0 deletions src/connector/src/sink/file_sink/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

pub mod azblob;
pub mod fs;
pub mod gcs;
pub mod opendal_sink;
Expand Down
1 change: 1 addition & 0 deletions src/connector/src/sink/file_sink/opendal_sink.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ pub enum EngineType {
Gcs,
S3,
Fs,
Azblob,
}

impl<S: OpendalSinkBackend> Sink for FileSink<S> {
Expand Down
3 changes: 3 additions & 0 deletions src/connector/src/sink/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,10 @@ macro_rules! for_all_sinks {
{ Doris, $crate::sink::doris::DorisSink },
{ Starrocks, $crate::sink::starrocks::StarrocksSink },
{ S3, $crate::sink::file_sink::opendal_sink::FileSink<$crate::sink::file_sink::s3::S3Sink>},

{ Gcs, $crate::sink::file_sink::opendal_sink::FileSink<$crate::sink::file_sink::gcs::GcsSink> },
{ Azblob, $crate::sink::file_sink::opendal_sink::FileSink<$crate::sink::file_sink::azblob::AzblobSink>},

{ Fs, $crate::sink::file_sink::opendal_sink::FileSink<FsSink> },
{ Snowflake, $crate::sink::snowflake::SnowflakeSink },
{ DeltaLake, $crate::sink::deltalake::DeltaLakeSink },
Expand Down
23 changes: 23 additions & 0 deletions src/connector/with_options_sink.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,29 @@
# THIS FILE IS AUTO_GENERATED. DO NOT EDIT
# UPDATE WITH: ./risedev generate-with-options

AzblobConfig:
fields:
- name: azblob.container_name
field_type: String
required: true
- name: azblob.path
field_type: String
comments: The directory where the sink file is located.
required: true
- name: azblob.credentials.account_name
field_type: String
required: false
default: Default::default
- name: azblob.credentials.account_key
field_type: String
required: false
default: Default::default
- name: azblob.endpoint_url
field_type: String
required: true
- name: r#type
field_type: String
required: true
BigQueryConfig:
fields:
- name: bigquery.local.path
Expand Down
4 changes: 4 additions & 0 deletions src/frontend/src/handler/create_sink.rs
Original file line number Diff line number Diff line change
Expand Up @@ -869,6 +869,7 @@ fn bind_sink_format_desc(session: &SessionImpl, value: ConnectorSchema) -> Resul

static CONNECTORS_COMPATIBLE_FORMATS: LazyLock<HashMap<String, HashMap<Format, Vec<Encode>>>> =
LazyLock::new(|| {
use risingwave_connector::sink::file_sink::azblob::AzblobSink;
use risingwave_connector::sink::file_sink::fs::FsSink;
use risingwave_connector::sink::file_sink::gcs::GcsSink;
use risingwave_connector::sink::file_sink::opendal_sink::FileSink;
Expand Down Expand Up @@ -896,6 +897,9 @@ static CONNECTORS_COMPATIBLE_FORMATS: LazyLock<HashMap<String, HashMap<Format, V
FileSink::<GcsSink>::SINK_NAME => hashmap!(
Format::Plain => vec![Encode::Parquet],
),
FileSink::<AzblobSink>::SINK_NAME => hashmap!(
Format::Plain => vec![Encode::Parquet],
),
FileSink::<FsSink>::SINK_NAME => hashmap!(
Format::Plain => vec![Encode::Parquet],
),
Expand Down

0 comments on commit 97d1a56

Please sign in to comment.