Merge remote-tracking branch 'upstream/main' into recluster_dis

databendlabs · Oct 19, 2023 · c65b27c · c65b27c
2 parents 59b68cc + fe7fd81
commit c65b27c
Show file tree

Hide file tree

Showing 104 changed files with 1,515 additions and 627 deletions.
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -541,7 +541,7 @@ jobs:
           gh release upload ${version} sha256sums.txt --clobber
 
   benchmark:
-    needs: [create_release, linux]
+    needs: [create_release, docker_separate]
     uses: ./.github/workflows/reuse.benchmark.yml
     secrets: inherit
     with:

diff --git a/.github/workflows/reuse.benchmark.yml b/.github/workflows/reuse.benchmark.yml
@@ -39,9 +39,9 @@ env:
 
 jobs:
   local:
-    if: inputs.runner_provider == 'aws' && inputs.source == 'release'
+    if: inputs.source == 'release'
     timeout-minutes: 60
-    runs-on: [self-hosted, X64, Linux, 16c32g, "${{ inputs.runner_provider }}"]
+    runs-on: [self-hosted, X64, Linux, 16c32g, aws]
     strategy:
       matrix:
         dataset:

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/docs/doc/10-deploy/02-deploying-databend.md b/docs/doc/10-deploy/02-deploying-databend.md
@@ -306,7 +306,7 @@ bucket = "databend"
 # GCS also supports changing the endpoint URL
 # but the endpoint should be compatible with GCS's JSON API
 # default:
-# endpoint_url = "https://storage.googleapis.com/"
+# endpoint_url = "https://storage.googleapis.com"
 
 # working directory of GCS
 # default:

diff --git a/docs/doc/10-deploy/10-node-config/03-environment-variables.md b/docs/doc/10-deploy/10-node-config/03-environment-variables.md
@@ -38,6 +38,8 @@ Below is a list of available environment variables, each correspondingly mapped
 
 ## Query Environment Variables
 
-The parameters under the [query] and [storage] sections in the configuration file [databend-query.toml](https://github.com/datafuselabs/databend/blob/main/scripts/distribution/configs/databend-query.toml) can be configured using environment variables. 
+The parameters under the [query] and [storage] sections in the configuration file [databend-query.toml](https://github.com/datafuselabs/databend/blob/main/scripts/distribution/configs/databend-query.toml) can be configured using environment variables. The names of the environment variables are formed by combining the word QUERY or STORAGE with the corresponding parameter names using underscores. 
 
-The names of the environment variables are formed by combining the word QUERY or STORAGE with the corresponding parameter names using underscores. For example, the environment variable for the parameter **admin_api_address** under the [query] section is QUERY_ADMIN_API_ADDRESS, and the environment variable for the parameter **bucket** under the [storage.s3] section is STORAGE_S3_BUCKET.
+Databend also accepts environment variables from storage services when they match Databend's environment variables. This allows you to work with the environment variable naming conventions you are most familiar with, eliminating the need to remember additional variable names and simplifying your configuration process.
+
+To illustrate with an example, if you want to set the access key ID for S3 using an environment variable, you have the flexibility to use either STORAGE_S3_ACCESS_KEY_ID provided by Databend or the well-known AWS_ACCESS_KEY_ID typically associated with AWS S3.
diff --git a/docs/doc/15-sql-functions/40-string-functions/split.md b/docs/doc/15-sql-functions/40-string-functions/split.md
@@ -0,0 +1,55 @@
+---
+title: SPLIT
+---
+import FunctionDescription from '@site/src/components/FunctionDescription';
+
+<FunctionDescription description="Introduced or updated: v1.2.164"/>
+
+Splits a string using a specified delimiter and returns the resulting parts as an array.
+
+See also: [SPLIT_PART](split_part.md)
+
+## Syntax
+
+```sql
+SPLIT('<input_string>', '<delimiter>')
+```
+
+## Return Type
+
+Array of strings. SPLIT returns NULL when either the input string or the delimiter is NULL.
+
+## Examples
+
+```sql
+-- Use a space as the delimiter
+-- SPLIT returns an array with two parts.
+SELECT SPLIT('Databend Cloud', ' ');
+
+split('databend cloud', ' ')|
+----------------------------+
+['Databend','Cloud']        |
+
+-- Use an empty string as the delimiter or a delimiter that does not exist in the input string
+-- SPLIT returns an array containing the entire input string as a single part.
+SELECT SPLIT('Databend Cloud', '');
+
+split('databend cloud', '')|
+---------------------------+
+['Databend Cloud']         |
+
+SELECT SPLIT('Databend Cloud', ',');
+
+split('databend cloud', ',')|
+----------------------------+
+['Databend Cloud']          |
+
+-- Use '	' (tab) as the delimiter
+-- SPLIT returns an array with timestamp, log level, and message.
+
+SELECT SPLIT('2023-10-19 15:30:45	INFO	Log message goes here', '	');
+
+split('2023-10-19 15:30:45\tinfo\tlog message goes here', '\t')|
+---------------------------------------------------------------+
+['2023-10-19 15:30:45','INFO','Log message goes here']         |
+```
diff --git a/docs/doc/15-sql-functions/40-string-functions/split_part.md b/docs/doc/15-sql-functions/40-string-functions/split_part.md
@@ -0,0 +1,67 @@
+---
+title: SPLIT_PART
+---
+import FunctionDescription from '@site/src/components/FunctionDescription';
+
+<FunctionDescription description="Introduced or updated: v1.2.164"/>
+
+Splits a string using a specified delimiter and returns the specified part.
+
+See also: [SPLIT](split.md)
+
+## Syntax
+
+```sql
+SPLIT_PART('<input_string>', '<delimiter>', '<position>')
+```
+
+The *position* argument specifies which part to return. It uses a 1-based index but can also accept positive, negative, or zero values:
+
+- If *position* is a positive number, it returns the part at the position from the left to the right, or NULL if it doesn't exist.
+- If *position* is a negative number, it returns the part at the position from the right to the left, or NULL if it doesn't exist.
+- If *position* is 0, it is treated as 1, effectively returning the first part of the string.
+
+## Return Type
+
+String. SPLIT_PART returns NULL when either the input string, the delimiter, or the position is NULL.
+
+## Examples
+
+```sql
+-- Use a space as the delimiter
+-- SPLIT_PART returns a specific part.
+SELECT SPLIT_PART('Databend Cloud', ' ', 1);
+
+split_part('databend cloud', ' ', 1)|
+------------------------------------+
+Databend                            |
+
+-- Use an empty string as the delimiter or a delimiter that does not exist in the input string
+-- SPLIT_PART returns the entire input string.
+SELECT SPLIT_PART('Databend Cloud', '', 1);
+
+split_part('databend cloud', '', 1)|
+-----------------------------------+
+Databend Cloud                     |
+
+SELECT SPLIT_PART('Databend Cloud', ',', 1);
+
+split_part('databend cloud', ',', 1)|
+------------------------------------+
+Databend Cloud                      |
+
+-- Use '    ' (tab) as the delimiter
+-- SPLIT_PART returns individual fields.
+SELECT SPLIT_PART('2023-10-19 15:30:45   INFO   Log message goes here', '   ', 3);
+
+split_part('2023-10-19 15:30:45   info   log message goes here', '   ', 3)|
+--------------------------------------------------------------------------+
+Log message goes here                                                     |
+
+-- SPLIT_PART returns an empty string as the specified part does not exist at all.
+SELECT SPLIT_PART('2023-10-19 15:30:45   INFO   Log message goes here', '   ', 4);
+
+split_part('2023-10-19 15:30:45   info   log message goes here', '   ', 4)|
+--------------------------------------------------------------------------+
+                                                                          |
+```