Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into improve_regex2
Browse files Browse the repository at this point in the history
  • Loading branch information
viirya committed Dec 22, 2023
2 parents a47e771 + 0e62fa4 commit 190fa60
Show file tree
Hide file tree
Showing 41 changed files with 1,313 additions and 315 deletions.
80 changes: 35 additions & 45 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion datafusion-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ aws-config = "0.55"
aws-credential-types = "0.55"
clap = { version = "3", features = ["derive", "cargo"] }
datafusion = { path = "../datafusion/core", version = "34.0.0", features = ["avro", "crypto_expressions", "encoding_expressions", "parquet", "regex_expressions", "unicode_expressions", "compression"] }
datafusion-common = { path = "../datafusion/common" }
dirs = "4.0.0"
env_logger = "0.9"
mimalloc = { version = "0.1", default-features = false }
Expand All @@ -49,6 +50,5 @@ url = "2.2"
[dev-dependencies]
assert_cmd = "2.0"
ctor = "0.2.0"
datafusion-common = { path = "../datafusion/common" }
predicates = "3.0"
rstest = "0.17"
3 changes: 2 additions & 1 deletion datafusion-examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ cargo run --example csv_sql
- [`catalog.rs`](examples/external_dependency/catalog.rs): Register the table into a custom catalog
- [`custom_datasource.rs`](examples/custom_datasource.rs): Run queries against a custom datasource (TableProvider)
- [`dataframe.rs`](examples/dataframe.rs): Run a query using a DataFrame against a local parquet file
- [`dataframe-to-s3.rs`](examples/external_dependency/dataframe-to-s3.rs): Run a query using a DataFrame against a parquet file from s3
- [`dataframe-to-s3.rs`](examples/external_dependency/dataframe-to-s3.rs): Run a query using a DataFrame against a parquet file from s3 and writing back to s3
- [`dataframe_output.rs`](examples/dataframe_output.rs): Examples of methods which write data out from a DataFrame
- [`dataframe_in_memory.rs`](examples/dataframe_in_memory.rs): Run a query using a DataFrame against data in memory
- [`deserialize_to_struct.rs`](examples/deserialize_to_struct.rs): Convert query results into rust structs using serde
- [`expr_api.rs`](examples/expr_api.rs): Create, execute, simplify and anaylze `Expr`s
Expand Down
76 changes: 76 additions & 0 deletions datafusion-examples/examples/dataframe_output.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use datafusion::{dataframe::DataFrameWriteOptions, prelude::*};
use datafusion_common::{parsers::CompressionTypeVariant, DataFusionError};

/// This example demonstrates the various methods to write out a DataFrame to local storage.
/// See datafusion-examples/examples/external_dependency/dataframe-to-s3.rs for an example
/// using a remote object store.
#[tokio::main]
async fn main() -> Result<(), DataFusionError> {
let ctx = SessionContext::new();

let mut df = ctx.sql("values ('a'), ('b'), ('c')").await.unwrap();

// Ensure the column names and types match the target table
df = df.with_column_renamed("column1", "tablecol1").unwrap();

ctx.sql(
"create external table
test(tablecol1 varchar)
stored as parquet
location './datafusion-examples/test_table/'",
)
.await?
.collect()
.await?;

// This is equivalent to INSERT INTO test VALUES ('a'), ('b'), ('c').
// The behavior of write_table depends on the TableProvider's implementation
// of the insert_into method.
df.clone()
.write_table("test", DataFrameWriteOptions::new())
.await?;

df.clone()
.write_parquet(
"./datafusion-examples/test_parquet/",
DataFrameWriteOptions::new(),
None,
)
.await?;

df.clone()
.write_csv(
"./datafusion-examples/test_csv/",
// DataFrameWriteOptions contains options which control how data is written
// such as compression codec
DataFrameWriteOptions::new().with_compression(CompressionTypeVariant::GZIP),
None,
)
.await?;

df.clone()
.write_json(
"./datafusion-examples/test_json/",
DataFrameWriteOptions::new(),
)
.await?;

Ok(())
}
Loading

0 comments on commit 190fa60

Please sign in to comment.