diff --git a/.github/workflows/java-ci.yaml b/.github/workflows/java-ci.yaml
index eab1b210..1f603c0b 100644
--- a/.github/workflows/java-ci.yaml
+++ b/.github/workflows/java-ci.yaml
@@ -53,14 +53,6 @@ jobs:
with:
submodules: recursive
- - name: Cache for ccache
- uses: actions/cache@v3
- with:
- path: ~/.ccache
- key: ${{ runner.os }}-${{ matrix.metadata }}-ccache-${{ hashFiles('**/git-modules.txt') }}
- restore-keys: |
- ${{ runner.os }}-${{ matrix.metadata }}-ccache-
-
- name: Install Dependencies for Linux
if: runner.os == 'Linux'
run: |
@@ -173,12 +165,73 @@ jobs:
# wait for hive docker ready
sleep 60
+ - name: Hive test
+ run: |
+ pushd java/hive/test
+ ./test.sh
+ popd
+
+ - name: Spark with hive test
+ run: |
+ pushd java/hive/test
+ ./spark-hive-test.sh
+ popd
+
+ - name: Stop hive docker
+ run: |
+ pushd java/hive/docker
+ docker-compose -f docker-compose.yaml stop
+ docker-compose -f docker-compose.yaml rm -f
+ popd
+
+ - name: Build mysql container
+ run: |
+ pushd java/hive/docker/dependency/mysql
+ docker-compose -f ./mysql-compose.yaml up -d
+ popd
+
+ - name: Start vineyard server for hive distributed test
+ run: |
+ ./build/bin/vineyardd --socket=./build/vineyard_sock/metastore/vineyard.sock -rpc_socket_port=18880 --etcd_endpoint="0.0.0.0:2383" &
+ ./build/bin/vineyardd --socket=./build/vineyard_sock/hiveserver/vineyard.sock -rpc_socket_port=18881 --etcd_endpoint="0.0.0.0:2383" &
+ ./build/bin/vineyardd --socket=./build/vineyard_sock/0/vineyard.sock -rpc_socket_port=18882 --etcd_endpoint="0.0.0.0:2383" &
+ ./build/bin/vineyardd --socket=./build/vineyard_sock/1/vineyard.sock -rpc_socket_port=18883 --etcd_endpoint="0.0.0.0:2383" &
+ ./build/bin/vineyardd --socket=./build/vineyard_sock/2/vineyard.sock -rpc_socket_port=18884 --etcd_endpoint="0.0.0.0:2383" &
+
+ - name: Build hadoop cluster
+ run: |
+ pushd java/hive/docker
+ docker-compose -f docker-compose-distributed.yaml up -d
+ popd
+
+ # wait for hive docker ready
+ sleep 60
+
+ - name: Hive distributed test
+ run: |
+ pushd java/hive/test
+ ./distributed-test.sh
+ popd
+
+ - name: Spark with hive distribued test
+ run: |
+ pushd java/hive/test
+ ./spark-hive-distributed-test.sh
+ popd
+
- name: Setup tmate session
if: false
uses: mxschmitt/action-tmate@v3
- - name: Hive test
+ - name: Stop container
run: |
- pushd java/hive/test
- ./test.sh
+ pushd java/hive/docker
+ docker-compose -f docker-compose-distributed.yaml stop
+ docker-compose -f docker-compose-distributed.yaml rm -f
popd
+
+ pushd java/hive/docker/dependency/mysql
+ docker-compose -f ./mysql-compose.yaml stop
+ docker-compose -f ./mysql-compose.yaml rm -f
+ popd
+
diff --git a/.gitignore b/.gitignore
index 50462dfe..7b1fb812 100644
--- a/.gitignore
+++ b/.gitignore
@@ -27,8 +27,8 @@ cmake-build-debug
*.whl
# hive sql work directory
-/java/hive/distributed/docker/mysql/conf/
-/java/hive/distributed/docker/mysql/data/
+/java/hive/docker/dependency/mysql/conf/
+/java/hive/docker/dependency/mysql/data/
# coredump
core.*.*
diff --git a/java/hive/README.rst b/java/hive/README.rst
index d0053f7e..7381a002 100644
--- a/java/hive/README.rst
+++ b/java/hive/README.rst
@@ -306,7 +306,7 @@ Build Hive Docker Image with Hadoop
### Build docker images
```bash
- cd v6d/java/hive/distributed
+ cd v6d/java/hive/docker
./build.sh
```
@@ -322,25 +322,6 @@ Build Hive Docker Image with Hadoop
# You can change the password in mysql-compose.yaml and hive-site.xml
```
-### Run hadoop & hive docker images
-```bash
- cd v6d/java/hive/docker
- docker-compose -f docker-compose-distributed.yaml up -d
-```
-
-### Create table
-```bash
- docker exec -it hive-hiveserver2 beeline -u "jdbc:hive2://hive-hiveserver2:10000" -n root
-```
-
-```sql
- -- in beeline
- drop table test_hive1;
- create table test_hive1(field int);
- insert into table test_hive1 values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
- select * from test_hive1;
-```
-
Using vineyard as storage
-----------------
@@ -366,23 +347,27 @@ Using vineyard as storage
### Copy vineyard jars to share dir
```bash
- mkdir -p ~/share
+ mkdir -p v6d/share
cd v6d/java/hive
# you can change share dir in docker-compose.yaml
- cp target/vineyard-hive-0.1-SNAPSHOT.jar ~/share
+ cp target/vineyard-hive-0.1-SNAPSHOT.jar ../../../share
```
-### Create table with vineyard
+### Run hadoop & hive docker images
+```bash
+ cd v6d/java/hive/docker
+ docker-compose -f docker-compose-distributed.yaml up -d
+```
+
+### Create table
```bash
docker exec -it hive-hiveserver2 beeline -u "jdbc:hive2://hive-hiveserver2:10000" -n root
```
```sql
-- in beeline
- drop table test_vineyard;
- create table test_vineyard(field int)
- stored as Vineyard
- location "vineyard:///user/hive_remote/warehouse/test_vineyard";
- insert into table test_vineyard values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
- select * from test_vineyard;
+ drop table test_hive;
+ create table test_hive(field int);
+ insert into table test_hive values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
+ select * from test_hive;
```
diff --git a/java/hive/docker/README.rst b/java/hive/docker/README.rst
deleted file mode 100644
index 1e26814e..00000000
--- a/java/hive/docker/README.rst
+++ /dev/null
@@ -1,98 +0,0 @@
-Build Hive Docker Image with Hadoop
------------------
-
-### Prepare vineyard jars
-```bash
- # Currently, the vineyard jar cannot run directly on hive because of
- # dependency conflicts. You can run it temporarily by reverting to an
- # older version of guava (such as 14.0.1) dependent by vineyard.
- # This problem will be fixed in the future.
- mvn clean package
-```
-
-### Build docker images
-```bash
- cd v6d/java/hive/docker
- ./build.sh
-```
-
-### Create network
-```bash
- docker network create hadoop-network
-```
-
-### Start sql server for hive metastore
-```bash
- cd v6d/java/hive/docker/dependency/mysql
- docker-compose -f mysql-compose.yaml up -d
- # You can change the password in mysql-compose.yaml and hive-site.xml
-```
-
-### Run hadoop & hive docker images
-```bash
- cd v6d/java/hive/docker
- docker-compose -f docker-compose-distributed.yaml up -d
-```
-
-### Prepare tez jars
-```bash
- docker exet -it hive-metastore bash
- # in docker
- hdfs dfs -put /tez.tar.gz /
- exit
-```
-
-### Restart all services
-```bash
- cd v6d/java/hive/distributed/docker
- docker-compose -f docker-compose.yaml restart
-```
-
-### Create table
-```bash
- docker exec -it hive-hiveserver2 beeline -u "jdbc:hive2://hive-hiveserver2:10000" -n root
-```
-
-Using vineyard as storage
------------------
-
-### Run vineyardd
-```bash
- cd v6d/build
-
- # at terminal 1
- ./bin/vineyardd --socket=~/vineyard_sock/0/vineyard.sock -rpc_socket_port=9601 --etcd_endpoint="0.0.0.0:2382"
-
- # at terminal 2
- ./bin/vineyardd --socket=~/vineyard_sock/1/vineyard.sock -rpc_socket_port=9602 --etcd_endpoint="0.0.0.0:2382"
-
- # at terminal 3
- ./bin/vineyardd --socket=~/vineyard_sock/2/vineyard.sock -rpc_socket_port=9603 --etcd_endpoint="0.0.0.0:2382"
-
- # at terminal 4
- ./bin/vineyardd --socket=~/vineyard_sock/metastore/vineyard.sock -rpc_socket_port=9604 --etcd_endpoint="0.0.0.0:2382"
-
- # at terminal 5
- ./bin/vineyardd --socket=~/vineyard_sock/hiveserver/vineyard.sock -rpc_socket_port=9605 --etcd_endpoint="0.0.0.0:2382"
-```
-
-### Copy vineyard jars to share dir
-```bash
- mkdir -p ~/share
- cd v6d/java/hive
- # you can change share dir in docker-compose.yaml
- cp target/vineyard-hive-0.1-SNAPSHOT.jar ~/share
-```
-
-### Create table with vineyard
-```bash
- docker exec -it hive-hiveserver2 beeline -u "jdbc:hive2://hive-hiveserver2:10000" -n root
-```
-
-```sql
- -- in beeline
- drop table test_vineyard;
- create table test_vineyard(field int);
- insert into table test_vineyard values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
- select * from test_vineyard;
-```
\ No newline at end of file
diff --git a/java/hive/docker/build.sh b/java/hive/docker/build.sh
index 10e40b8c..5265c2f6 100755
--- a/java/hive/docker/build.sh
+++ b/java/hive/docker/build.sh
@@ -1,10 +1,11 @@
WORK_DIR=~/hive-workdir
mkdir -p "$WORK_DIR"
-find ~/hive-workdir -maxdepth 1 -mindepth 1 ! -name '*.tar.gz' -exec rm -rf {} \;
+find ~/hive-workdir -maxdepth 1 -mindepth 1 ! \( -name '*.tar.gz' -o -name '*.tgz' \) -exec rm -rf {} \;
TEZ_VERSION=${TEZ_VERSION:-"0.9.1"}
HIVE_VERSION=${HIVE_VERSION:-"2.3.9"}
+SPARK_VERSION=${SPARK_VERSION:-"3.4.1"}
if [ -f "$WORK_DIR/apache-tez-$TEZ_VERSION-bin.tar.gz" ]; then
echo "Tez exists, skipping download..."
@@ -42,13 +43,36 @@ else
fi
fi
+if [ -f "$WORK_DIR/spark-$SPARK_VERSION-bin-hadoop3.tgz" ]; then
+ echo "Spark exists, skipping download..."
+else
+ echo "Download Spark..."
+ SPARK_URL=${SPARK_URL:-"https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop3.tgz"}
+ echo "Downloading Spark from $SPARK_URL..."
+ if ! curl --fail -L "$SPARK_URL" -o "$WORK_DIR/spark-$SPARK_VERSION-bin-hadoop3.tgz"; then
+ echo "Failed to download Spark, exiting..."
+ exit 1
+ fi
+fi
+
cp -R ./dependency/images/ "$WORK_DIR/"
cp ../target/vineyard-hive-0.1-SNAPSHOT.jar "$WORK_DIR/images/"
tar -xzf "$WORK_DIR/apache-hive-$HIVE_VERSION-bin.tar.gz" -C "$WORK_DIR/"
tar -xzf "$WORK_DIR/apache-tez-$TEZ_VERSION-bin.tar.gz" -C "$WORK_DIR/"
+tar -xzf "$WORK_DIR/spark-$SPARK_VERSION-bin-hadoop3.tgz" -C "$WORK_DIR/"
mv "$WORK_DIR/apache-hive-$HIVE_VERSION-bin" "$WORK_DIR/images/hive"
mv "$WORK_DIR/apache-tez-$TEZ_VERSION-bin" "$WORK_DIR/images/tez"
+mv "$WORK_DIR/spark-$SPARK_VERSION-bin-hadoop3" "$WORK_DIR/images/spark"
+
+network_name="hadoop-network"
+
+if [[ -z $(docker network ls --filter name=^${network_name}$ --format="{{.Name}}") ]]; then
+ echo "Docker network ${network_name} does not exist, creating it..."
+ docker network create hadoop-network
+else
+ echo "Docker network ${network_name} already exists"
+fi
docker build \
"$WORK_DIR/images" \
diff --git a/java/hive/docker/dependency/images/Dockerfile b/java/hive/docker/dependency/images/Dockerfile
index 6307eb4b..e0a9bafc 100755
--- a/java/hive/docker/dependency/images/Dockerfile
+++ b/java/hive/docker/dependency/images/Dockerfile
@@ -24,7 +24,14 @@ COPY hive /opt/apache/hive/
COPY hive-config/ /hive-config
COPY hive-config-distributed/ /hive-config-distributed
-ENV PATH=${HADOOP_HOME}/bin:${HADOOP_HOME}/sbin:${HIVE_HOME}/bin:${PATH}
+# prepare spark
+COPY spark /opt/apache/spark/
+ENV SPARK_HOME=/opt/apache/spark
+COPY spark-config/ /spark-config
+COPY spark-config-distributed/ /spark-config-distributed
+COPY ./vineyard-hive-0.1-SNAPSHOT.jar ${SPARK_HOME}/jars/
+
+ENV PATH=${HADOOP_HOME}/bin:${HADOOP_HOME}/sbin:${HIVE_HOME}/bin:${SPARK_HOME}/bin:${PATH}
COPY bootstrap.sh /opt/apache/
COPY mysql-connector-java-5.1.49/mysql-connector-java-5.1.49-bin.jar ${HIVE_HOME}/lib/
@@ -35,4 +42,5 @@ RUN sudo yum -y install unzip; \
sudo rm /usr/lib64/libstdc++.so.6; \
sudo ln -s /usr/lib64/libstdc++.so.6.0.26 /usr/lib64/libstdc++.so.6; \
sudo yum -y install vim; \
- rm libstdc.so_.6.0.26.zip libstdc++.so.6.0.26;
+ rm libstdc.so_.6.0.26.zip libstdc++.so.6.0.26; \
+ sudo yum install -y net-tools
diff --git a/java/hive/docker/dependency/images/hive-config/hive-site.xml b/java/hive/docker/dependency/images/hive-config/hive-site.xml
index 68dd37c1..7ece0476 100644
--- a/java/hive/docker/dependency/images/hive-config/hive-site.xml
+++ b/java/hive/docker/dependency/images/hive-config/hive-site.xml
@@ -112,4 +112,8 @@
hive.metastore.client.connect.retry.delay
5s
+
+ hive.metastore.uris
+ thrift://metastore:9083
+
diff --git a/java/hive/docker/dependency/images/spark-config-distributed/spark-defaults.conf b/java/hive/docker/dependency/images/spark-config-distributed/spark-defaults.conf
new file mode 100644
index 00000000..75b03864
--- /dev/null
+++ b/java/hive/docker/dependency/images/spark-config-distributed/spark-defaults.conf
@@ -0,0 +1 @@
+spark.yarn.stagingDir=file:///tmp/
diff --git a/java/hive/docker/dependency/images/spark-config/spark-defaults.conf b/java/hive/docker/dependency/images/spark-config/spark-defaults.conf
new file mode 100644
index 00000000..b8c0cf29
--- /dev/null
+++ b/java/hive/docker/dependency/images/spark-config/spark-defaults.conf
@@ -0,0 +1,54 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Default system properties included when running spark-submit.
+# This is useful for setting default environmental settings.
+
+# Example:
+# spark.master spark://master:7077
+# spark.eventLog.enabled true
+# spark.eventLog.dir hdfs://namenode:8021/directory
+# spark.serializer org.apache.spark.serializer.KryoSerializer
+# spark.driver.memory 5g
+# spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
+
+# spark.sql.inMemoryColumnarStorage.enableVectorizedReader=false
+# spark.sql.orc.enableNestedColumnVectorizedReader=false
+# spark.sql.orc.enableVectorizedReader=false
+# spark.sql.parquet.enableNestedColumnVectorizedReader=true
+# spark.sql.parquet.enableVectorizedReader=true
+# spark.hive.default.fileformat=Vineyard
+# spark.hive.metastore.warehouse.dir=vineyard:///opt/hive/data/warehouse
+# spark.sql.orc.impl=native
+# spark.sql.hive.convertMetastoreOrc=true
+# spark.hive.vectorized.execution.enabled=true
+# spark.hive.fetch.task.conversion=none
+# spark.hive.vectorized.use.vectorized.input.format=true
+# spark.hive.vectorized.use.row.serde.deserialize=false
+# spark.hive.vectorized.use.vector.serde.deserialize=true
+# spark.hive.vectorized.execution.reduce.enabled=true
+# spark.hive.exec.dynamic.partition.mode=nonstrict
+# spark.hive.metastore.sasl.enabled=false
+# spark.hive.server2.authentication=NOSASL
+# spark.hive.metastore.execute.setugi=false
+# spark.hive.metastore.warehouse.dir=/opt/hive/data/warehouse
+# spark.sql.warehouse.dir=/opt/hive/data/warehouse
+# spark.sql.hive.metastore.version=2.3.9
+# spark.sql.hive.metastore.jars=path
+# spark.sql.hive.metastore.jars.path=/opt/apache/hive/lib/*,/auxlib/*
+
+# spark.sql.catalogImplementation=hive
diff --git a/java/hive/docker/docker-compose-distributed.yaml b/java/hive/docker/docker-compose-distributed.yaml
index c7bca852..065ad889 100755
--- a/java/hive/docker/docker-compose-distributed.yaml
+++ b/java/hive/docker/docker-compose-distributed.yaml
@@ -16,8 +16,7 @@ services:
volumes:
- ~/hive-tmp:/tmp/
- ~/hive-user:/user/
- - ~/vineyard_sock/0/:/tmp/vineyard_sock
- - ~/share:/auxlib/
+ - ../../../share:/auxlib/
healthcheck:
test: ["CMD-SHELL", "netstat -tnlp|grep :${HADOOP_YARN_RM_PORT} || exit 1"]
interval: 20s
@@ -46,8 +45,8 @@ services:
volumes:
- ~/hive-tmp:/tmp/
- ~/hive-user:/user/
- - ~/vineyard_sock/0/:/tmp/vineyard_sock
- - ~/share:/auxlib/
+ - ../../../build/vineyard_sock/0/:/tmp/vineyard_sock
+ - ../../../share:/auxlib/
environment:
VINEYARD_IPC_SOCKET: /tmp/vineyard_sock/vineyard.sock
hadoop-yarn-nm-1:
@@ -73,8 +72,8 @@ services:
volumes:
- ~/hive-tmp:/tmp/
- ~/hive-user:/user/
- - ~/vineyard_sock/1/:/tmp/vineyard_sock
- - ~/share:/auxlib/
+ - ../../../build/vineyard_sock/1/:/tmp/vineyard_sock
+ - ../../../share:/auxlib/
environment:
VINEYARD_IPC_SOCKET: /tmp/vineyard_sock/vineyard.sock
hadoop-yarn-nm-2:
@@ -100,8 +99,8 @@ services:
volumes:
- ~/hive-tmp:/tmp/
- ~/hive-user:/user/
- - ~/vineyard_sock/2/:/tmp/vineyard_sock
- - ~/share:/auxlib/
+ - ../../../build/vineyard_sock/2/:/tmp/vineyard_sock
+ - ../../../share:/auxlib/
environment:
VINEYARD_IPC_SOCKET: /tmp/vineyard_sock/vineyard.sock
hadoop-yarn-proxyserver:
@@ -122,7 +121,7 @@ services:
volumes:
- ~/hive-tmp:/tmp/
- ~/hive-user:/user/
- - ~/share:/auxlib/
+ - ../../../share:/auxlib/
- ~/hive-test:/test/
healthcheck:
test: ["CMD-SHELL", "netstat -tnlp|grep :${HADOOP_YARN_PROXYSERVER_PORT} || exit 1"]
@@ -147,8 +146,7 @@ services:
volumes:
- ~/hive-tmp:/tmp/
- ~/hive-user:/user/
- - ~/vineyard_sock/historyserver/:/tmp/vineyard_sock
- - ~/share:/auxlib/
+ - ../../../share:/auxlib/
healthcheck:
test: ["CMD-SHELL", "netstat -tnlp|grep :${HADOOP_MR_HISTORYSERVER_PORT} || exit 1"]
interval: 30s
@@ -176,8 +174,8 @@ services:
volumes:
- ~/hive-tmp:/tmp/
- ~/hive-user:/user/
- - ~/vineyard_sock/metastore/:/tmp/vineyard_sock
- - ~/share:/auxlib/
+ - ../../../build/vineyard_sock/metastore/:/tmp/vineyard_sock
+ - ../../../share:/auxlib/
environment:
VINEYARD_IPC_SOCKET: /tmp/vineyard_sock/vineyard.sock
hive-hiveserver2:
@@ -203,8 +201,8 @@ services:
volumes:
- ~/hive-tmp:/tmp/
- ~/hive-user:/user/
- - ~/vineyard_sock/hiveserver/:/tmp/vineyard_sock
- - ~/share:/auxlib/
+ - ../../../build/vineyard_sock/hiveserver/:/tmp/vineyard_sock
+ - ../../../share:/auxlib/
environment:
VINEYARD_IPC_SOCKET: /tmp/vineyard_sock/vineyard.sock
diff --git a/java/hive/docker/docker-compose.yaml b/java/hive/docker/docker-compose.yaml
index c1c3ba54..6fc8a833 100644
--- a/java/hive/docker/docker-compose.yaml
+++ b/java/hive/docker/docker-compose.yaml
@@ -59,6 +59,23 @@ services:
- ../../../build/vineyard:/tmp/vineyard
- ../../../share:/opt/hive/auxlib
+ spark:
+ image: apache/hadoop_hive:v1
+ user: "root:root"
+ depends_on:
+ - metastore
+ container_name: spark
+ networks:
+ - hive
+ command: "tail -f /dev/null"
+ volumes:
+ - /user/hive/warehouse:/opt/hive/data/warehouse
+ - /user/hive/warehouse:/user/hive/warehouse
+ - ../../../java/hive/conf/hive-site.xml:/opt/hive/conf/hive-site.xml
+ - ../../../build/vineyard:/tmp/vineyard
+ - ../../../share:/auxlib
+ environment:
+ VINEYARD_IPC_SOCKET: /tmp/vineyard/vineyard.sock
networks:
hive:
diff --git a/java/hive/test/distributed-test.sh b/java/hive/test/distributed-test.sh
new file mode 100755
index 00000000..d2b99676
--- /dev/null
+++ b/java/hive/test/distributed-test.sh
@@ -0,0 +1,40 @@
+outdir=./query/out
+if [ -d "$outdir" ]; then
+ rm -r "$outdir"
+fi
+docker cp ./query hive-hiveserver2:/tmp/
+
+for file in ./query/*; do
+ query=$(basename "$file")
+ docker exec hive-hiveserver2 beeline -u 'jdbc:hive2://localhost:10000/;transportMode=https;httpPath=cliservice' \
+ -f /tmp/query/"$query" -n root
+done
+
+docker cp hive-hiveserver2:/tmp/out ./query/
+for dir in ./query/out/*; do
+ cat $dir/* > ./query/out/$(basename "$dir").q.out
+ rm -r $dir
+done
+
+filecount=$(find ./query/ -maxdepth 1 -name "*.q" | wc -l)
+testedcount=$(find ./query/out/ -maxdepth 1 -name "*.q.out"| wc -l)
+successcount=0
+failedcount=0
+
+for file in ./query/out/*; do
+ if [ -f "$file" ]; then
+ echo "Diff $file with expected/$(basename "$file")"
+ if diff -a "$file" ./expected/$(basename "$file"); then
+ successcount=$((successcount+1))
+ else
+ failedcount=$((failedcount+1))
+ fi
+ fi
+done
+
+echo "Total test: $filecount Success: $successcount Failed: $failedcount Skipped: $((filecount-testedcount))"
+if [ $successcount -eq $filecount ]; then
+ exit 0
+else
+ exit 1
+fi
diff --git a/java/hive/test/expected/test_all_primitive_types.q.out b/java/hive/test/expected/test_all_primitive_types.q.out
index 1c1aea94..9bb1cf81 100644
--- a/java/hive/test/expected/test_all_primitive_types.q.out
+++ b/java/hive/test/expected/test_all_primitive_types.q.out
@@ -1 +1 @@
-1,1,42,1,2.0,1.0,hello world1!,hello worl,hello worl,aGVsbG8gd29ybGQ0IQ==,2023-12-31,true,2023-12-31 23:59:59,1235.00
+1,1,42,1,2.0,1.0,hello world1!,hello world2!,hello world3! ,aGVsbG8gd29ybGQ0IQ==,2023-12-31,true,2023-12-31 23:59:59,1235.00
diff --git a/java/hive/test/query/out/test_all_primitive_types.q.out b/java/hive/test/query/out/test_all_primitive_types.q.out
deleted file mode 100644
index 1c1aea94..00000000
--- a/java/hive/test/query/out/test_all_primitive_types.q.out
+++ /dev/null
@@ -1 +0,0 @@
-1,1,42,1,2.0,1.0,hello world1!,hello worl,hello worl,aGVsbG8gd29ybGQ0IQ==,2023-12-31,true,2023-12-31 23:59:59,1235.00
diff --git a/java/hive/test/query/out/test_hive_dynamic_partition.q.out b/java/hive/test/query/out/test_hive_dynamic_partition.q.out
deleted file mode 100644
index fdfc4c8f..00000000
--- a/java/hive/test/query/out/test_hive_dynamic_partition.q.out
+++ /dev/null
@@ -1,3 +0,0 @@
-1,2,1,2017
-1,2,1,2018
-3,4,1,2018
diff --git a/java/hive/test/query/out/test_hive_static_partition.q.out b/java/hive/test/query/out/test_hive_static_partition.q.out
deleted file mode 100644
index d1719b1e..00000000
--- a/java/hive/test/query/out/test_hive_static_partition.q.out
+++ /dev/null
@@ -1,10 +0,0 @@
-1,2,114514
-999,2,666
-999,2,666
-999,2,666
-3,4,666
-999,2,666
-999,2,666
-999,2,666
-3,4,666
-1,2,114514
diff --git a/java/hive/test/query/out/test_insert.q.out b/java/hive/test/query/out/test_insert.q.out
deleted file mode 100644
index 03c9c360..00000000
--- a/java/hive/test/query/out/test_insert.q.out
+++ /dev/null
@@ -1,3 +0,0 @@
-a,1
-b,2
-c,3
diff --git a/java/hive/test/query/out/test_nested_types.q.out b/java/hive/test/query/out/test_nested_types.q.out
deleted file mode 100644
index 1b425d3d..00000000
--- a/java/hive/test/query/out/test_nested_types.q.out
+++ /dev/null
@@ -1 +0,0 @@
-421hello2world!
diff --git a/java/hive/test/query/test_all_primitive_types.q b/java/hive/test/query/test_all_primitive_types.q
index 48466b11..ecd953c6 100644
--- a/java/hive/test/query/test_all_primitive_types.q
+++ b/java/hive/test/query/test_all_primitive_types.q
@@ -7,8 +7,8 @@ create table test_all_primitive_types (
field_5 double,
field_6 float,
field_7 string,
- field_9 varchar(10),
- field_10 char(10),
+ field_9 varchar(20),
+ field_10 char(20),
field_8 binary,
field_11 date,
field_12 boolean,
@@ -32,7 +32,7 @@ insert into test_all_primitive_types select
timestamp('2023-12-31 23:59:59'),
cast(1234.56 as decimal);
-insert overwrite directory '/tmp/out/test_all_primitive_types/'
+insert overwrite directory 'file:///tmp/out/test_all_primitive_types/'
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
select * from test_all_primitive_types;
drop table test_all_primitive_types;
\ No newline at end of file
diff --git a/java/hive/test/query/test_hive_dynamic_partition.q b/java/hive/test/query/test_hive_dynamic_partition.q
index 155458bd..3bf15cd0 100644
--- a/java/hive/test/query/test_hive_dynamic_partition.q
+++ b/java/hive/test/query/test_hive_dynamic_partition.q
@@ -15,7 +15,7 @@ create table hive_dynamic_partition_test
)partitioned by(mounth int, year int);
insert into table hive_dynamic_partition_test partition(mounth=1, year) select src_id,dst_id,year from hive_dynamic_partition_data;
-insert overwrite directory '/tmp/out/test_hive_dynamic_partition/'
+insert overwrite directory 'file:///tmp/out/test_hive_dynamic_partition/'
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
select * from hive_dynamic_partition_test
order by src_id asc;
diff --git a/java/hive/test/query/test_static_partition.q b/java/hive/test/query/test_hive_static_partition.q
similarity index 91%
rename from java/hive/test/query/test_static_partition.q
rename to java/hive/test/query/test_hive_static_partition.q
index 5337ee07..9f3e28bd 100644
--- a/java/hive/test/query/test_static_partition.q
+++ b/java/hive/test/query/test_hive_static_partition.q
@@ -21,7 +21,7 @@ select * from hive_static_partition where value=666
union all
select * from hive_static_partition where value=114514;
-insert overwrite directory '/tmp/out/test_hive_static_partition/'
+insert overwrite directory 'file:///tmp/out/test_hive_static_partition/'
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
select * from result
order by field_1 asc;
diff --git a/java/hive/test/query/test_insert.q b/java/hive/test/query/test_insert.q
index 0c53f444..029194ad 100644
--- a/java/hive/test/query/test_insert.q
+++ b/java/hive/test/query/test_insert.q
@@ -3,7 +3,7 @@ create table hive_example(field_1 string,field_2 int);
insert into hive_example values('a', 1), ('b', 2), ('c', 3);
-insert overwrite directory '/tmp/out/test_insert/'
+insert overwrite directory 'file:///tmp/out/test_insert/'
row format delimited fields terminated by ','
select * from hive_example
order by field_1 asc;
diff --git a/java/hive/test/query/test_nested_types.q b/java/hive/test/query/test_nested_types.q
index 1366e970..8a0a8c56 100644
--- a/java/hive/test/query/test_nested_types.q
+++ b/java/hive/test/query/test_nested_types.q
@@ -13,7 +13,7 @@ insert into nested_table select
named_struct('field_1', 2,
'field_2', 'world!')));
-insert overwrite directory '/tmp/out/test_nested_types/'
+insert overwrite directory 'file:///tmp/out/test_nested_types/'
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
select * from nested_table;
drop table nested_table;
\ No newline at end of file
diff --git a/java/hive/test/spark-hive-distributed-test.sh b/java/hive/test/spark-hive-distributed-test.sh
new file mode 100755
index 00000000..a78e5ff8
--- /dev/null
+++ b/java/hive/test/spark-hive-distributed-test.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+outdir=./spark-query/spark-out
+if [ -d "$outdir" ]; then
+ rm -r "$outdir"
+fi
+
+docker exec hive-metastore sh -c 'cp /hive-config-distributed/hive-site.xml $SPARK_HOME/conf/'
+docker exec hive-metastore sh -c 'cp /spark-config-distributed/* $SPARK_HOME/conf/'
+
+docker cp ./spark-query hive-metastore:/tmp/
+
+for file in ./spark-query/*; do
+ query=$(basename "$file")
+ docker exec hive-metastore spark-shell --master yarn -i /tmp/spark-query/"$query"
+done
+
+docker cp hive-metastore:/tmp/spark-out ./spark-query/
+for dir in ./spark-query/spark-out/*; do
+ cat $dir/part-* > ./spark-query/spark-out/$(basename "$dir").q.out
+ rm -r $dir
+done
+
+filecount=$(find ./spark-query/ -name "*.scala" | wc -l)
+testedcount=$(find ./spark-query/spark-out/ -name "*.out" | wc -l)
+successcount=0
+failedcount=0
+
+for file in ./spark-query/spark-out/*; do
+ if [ -f "$file" ]; then
+ echo "Diff $file with expected/$(basename "$file")"
+ if diff -a "$file" ./expected/$(basename "$file"); then
+ successcount=$((successcount+1))
+ else
+ failedcount=$((failedcount+1))
+ fi
+ fi
+done
+
+echo "Total test: $filecount Success: $successcount Failed: $failedcount Skipped: $((filecount-testedcount))"
+
+if [ $successcount -eq $filecount ]; then
+ exit 0
+else
+ exit 1
+fi
\ No newline at end of file
diff --git a/java/hive/test/spark-hive-test.sh b/java/hive/test/spark-hive-test.sh
new file mode 100755
index 00000000..4ce53a0b
--- /dev/null
+++ b/java/hive/test/spark-hive-test.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+outdir=./spark-query/spark-out
+if [ -d "$outdir" ]; then
+ rm -r "$outdir"
+fi
+
+docker exec spark sh -c 'cp /hive-config/hive-site.xml $SPARK_HOME/conf/'
+docker exec spark sh -c 'cp /spark-config/* $SPARK_HOME/conf/'
+
+docker cp ./spark-query spark:/tmp/
+
+for file in ./spark-query/*; do
+ query=$(basename "$file")
+ docker exec spark spark-shell -i /tmp/spark-query/"$query"
+done
+
+docker cp spark:/tmp/spark-out ./spark-query/
+for dir in ./spark-query/spark-out/*; do
+ cat $dir/part-* > ./spark-query/spark-out/$(basename "$dir").q.out
+ rm -r $dir
+done
+
+filecount=$(find ./spark-query/ -name "*.scala" | wc -l)
+testedcount=$(find ./spark-query/spark-out/ -name "*.out" | wc -l)
+successcount=0
+failedcount=0
+
+for file in ./spark-query/spark-out/*; do
+ if [ -f "$file" ]; then
+ echo "Diff $file with expected/$(basename "$file")"
+ if diff -a "$file" ./expected/$(basename "$file"); then
+ successcount=$((successcount+1))
+ else
+ failedcount=$((failedcount+1))
+ fi
+ fi
+done
+
+echo "Total test: $filecount Success: $successcount Failed: $failedcount Skipped: $((filecount-testedcount))"
+
+if [ $successcount -eq $filecount ]; then
+ exit 0
+else
+ exit 1
+fi
\ No newline at end of file
diff --git a/java/hive/test/spark-query/test_all_primitive_types.scala b/java/hive/test/spark-query/test_all_primitive_types.scala
new file mode 100644
index 00000000..f7d36f8b
--- /dev/null
+++ b/java/hive/test/spark-query/test_all_primitive_types.scala
@@ -0,0 +1,75 @@
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.sql.SparkSession
+
+val conf = new SparkConf()
+conf.setAppName("Spark on Vineyard").setMaster("yarn").set("spark.scheduler.minRegisteredResourcesRatio", "1.0")
+
+val spark = SparkSession.builder().config(conf).enableHiveSupport().getOrCreate()
+
+spark.sql(
+ """
+ |show tables;
+ |""".stripMargin).show()
+
+spark.sql(
+ """
+ |drop table if exists test_all_primitive_types;
+ |""".stripMargin)
+
+spark.sql(
+ """
+ | create table test_all_primitive_types (
+ | field_1 tinyint,
+ | field_2 smallint,
+ | field_3 bigint,
+ | field_4 int,
+ | field_5 double,
+ | field_6 float,
+ | field_7 string,
+ | field_9 varchar(20),
+ | field_10 char(20),
+ | field_8 binary,
+ | field_11 date,
+ | field_12 boolean,
+ | field_13 timestamp,
+ | field_14 decimal(6, 2)
+ | )
+ | row format serde "io.v6d.hive.ql.io.VineyardSerDe"
+ | stored as
+ | INPUTFORMAT 'io.v6d.hive.ql.io.VineyardInputFormat'
+ | OUTPUTFORMAT 'io.v6d.hive.ql.io.VineyardOutputFormat'
+ | LOCATION 'vineyard:///opt/hive/data/warehouse/spark_example'
+ |""".stripMargin).show()
+
+spark.sql(
+ """
+ | insert into test_all_primitive_types select
+ | tinyint(1),
+ | smallint(1),
+ | 42,
+ | bigint(1),
+ | double(2.0),
+ | float(1.0),
+ | 'hello world1!',
+ | 'hello world2!',
+ | 'hello world3!',
+ | cast('hello world4!' as binary),
+ | date('2023-12-31'),
+ | true,
+ | timestamp('2023-12-31 23:59:59'),
+ | cast(1234.56 as decimal);
+ |""".stripMargin).show()
+
+spark.sql(
+ """
+ | insert overwrite directory '/tmp/spark-out/test_all_primitive_types/'
+ | ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+ | select * from test_all_primitive_types;
+ |""".stripMargin).show(truncate=false)
+
+spark.sql(
+ """
+ | drop table test_all_primitive_types;
+ |""".stripMargin).show(truncate=false)
+
+System.exit(0)