diff --git a/.github/workflows/hive-base-test.yml b/.github/workflows/hive-base-test.yml new file mode 100644 index 0000000000..f4b1523283 --- /dev/null +++ b/.github/workflows/hive-base-test.yml @@ -0,0 +1,188 @@ +# Copyright 2020-2023 Alibaba Group Holding Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Vineyard Hive base CI + +on: + push: + branches: + - main + paths: + - 'java/hive/**' + - '.github/workflows/hive-base-test.yml' + pull_request: + branches: + - main + paths: + - 'java/hive/**' + - '.github/workflows/hive-base-test.yml' + +concurrency: + group: ${{ github.repository }}-${{ github.event.number || github.head_ref || github.sha }}-${{ github.workflow }} + cancel-in-progress: true + +env: + CMAKE_C_COMPILER_LAUNCHER: ccache + CMAKE_CXX_COMPILER_LAUNCHER: ccache + +jobs: + ci: + runs-on: ${{ matrix.os }} + if: true #${{ github.repository == 'v6d-io/v6d' }} + strategy: + matrix: + os: [ubuntu-20.04] + malloc: [dlmalloc] + metadata: [etcd] + env: + RUNNER_ARGS: "--meta=${{ matrix.metadata }}" + VINEYARD_IPC_SOCKET: "/tmp/vineyard.ci.sock" + steps: + - uses: actions/checkout@v3 + with: + submodules: recursive + + - name: Cache for ccache + uses: actions/cache@v3 + with: + path: ~/.ccache + key: ${{ runner.os }}-${{ matrix.metadata }}-ccache-${{ hashFiles('**/git-modules.txt') }} + restore-keys: | + ${{ runner.os }}-${{ matrix.metadata }}-ccache- + + - name: Install Dependencies for Linux + if: runner.os == 'Linux' + run: | + export PATH=/usr/lib/ccache:$PATH + + sudo apt update -y + sudo apt-get install -y ca-certificates \ + ccache \ + cmake \ + doxygen \ + libboost-all-dev \ + libcurl4-openssl-dev \ + libgflags-dev \ + libgoogle-glog-dev \ + libgrpc-dev \ + libgrpc++-dev \ + libmpich-dev \ + libprotobuf-dev \ + libssl-dev \ + libunwind-dev \ + libz-dev \ + protobuf-compiler-grpc \ + python3-pip \ + openjdk-11-jdk \ + default-jdk \ + docker \ + wget + + # install apache-arrow + wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb + sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb + sudo apt update + sudo apt install -y libarrow-dev=14.0.1-1 \ + libarrow-dataset-dev=14.0.1-1 \ + libarrow-acero-dev=14.0.1-1 \ + libarrow-flight-dev=14.0.1-1 \ + libgandiva-dev=14.0.1-1 \ + libparquet-dev=14.0.1-1 + + # install python packages for codegen, and io adaptors + sudo pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple -U "Pygments>=2.4.1" + sudo pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple -r requirements-setup.txt -r requirements.txt -r requirements-dev.txt + + # install deps for java + sudo apt install -y maven + + - name: Setup tmate session + if: false + uses: mxschmitt/action-tmate@v3 + + - name: CMake + run: | + export PATH=/usr/lib/ccache:$PATH + export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64 + + mkdir build + pushd build + cmake .. -DCMAKE_BUILD_TYPE=Debug \ + -DBUILD_VINEYARD_JAVA=ON + + if [ "${{ matrix.metadata }}" == "redis" ]; then + cmake .. -DBUILD_VINEYARD_SERVER_REDIS=ON + fi + popd + + - name: Build Vineyard + run: | + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib:/usr/local/lib64 + + pushd build + make -j`nproc` + sudo make install + popd + + - name: Prepare java package + run: | + export JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64 + mkdir share + pushd java + mvn clean package -DskipTests -e + cp hive/target/vineyard-hive-0.1-SNAPSHOT.jar ../share/ + popd + + - name: Start vineyard server + run: | + start_port=8000 + end_port=9000 + function is_port_taken() { + (echo >/dev/tcp/localhost/$1) >/dev/null 2>&1 + } + + for (( port=start_port; port<=end_port; port++ )); do + if ! is_port_taken $port; then + export FREE_PORT=$port + break + fi + done + + ./build/bin/vineyardd --socket=./build/metastore/vineyard.sock -rpc_socket_port=8000 --etcd_endpoint="0.0.0.0:2382" & + ./build/bin/vineyardd --socket=./build/hiveserver/vineyard.sock -rpc_socket_port=8001 --etcd_endpoint="0.0.0.0:2382" & + + - name: Builder hive docker + run: | + # build hive docker + pushd java/hive/docker + ./build.sh + popd + + # start hive docker + pushd java/hive + docker-compose up -d --force-recreate --remove-orphans + popd + + # wait for hive docker ready + sleep 90 + + - name: Setup tmate session + if: false + uses: mxschmitt/action-tmate@v3 + + - name: Test + run: | + pushd java/hive/test + ./test.sh + popd diff --git a/.github/workflows/java-ci.yml b/.github/workflows/java-ci.yml index 43628846ad..91902c4b01 100644 --- a/.github/workflows/java-ci.yml +++ b/.github/workflows/java-ci.yml @@ -122,6 +122,7 @@ jobs: if [ "${{ matrix.metadata }}" == "redis" ]; then cmake .. -DBUILD_VINEYARD_SERVER_REDIS=ON fi + cd .. - name: Build run: | diff --git a/.gitignore b/.gitignore index a26798dfc8..d790cba8de 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,10 @@ compile_commands.json cmake-build-debug +#for hive +/hivelog/ +/hive-base-workdir/ + # for python packaging /dist/ *.egg-info/ diff --git a/java/hive/docker-compose.yml b/java/hive/docker-compose.yml index 03c630a4a8..9c777e0720 100644 --- a/java/hive/docker-compose.yml +++ b/java/hive/docker-compose.yml @@ -22,9 +22,8 @@ services: volumes: - /user/hive/warehouse:/opt/hive/data/warehouse - /user/hive/warehouse:/user/hive/warehouse - - ~/performance/spark:/spark - ../../java/hive/conf/hive-site.xml:/opt/hive/conf/hive-site.xml - - ../../build/vineyard:/tmp/vineyard + - ../../build/metastore:/tmp/vineyard - ../../share:/opt/hive/auxlib hive: @@ -52,10 +51,10 @@ services: volumes: - /user/hive/warehouse:/opt/hive/data/warehouse - /user/hive/warehouse:/user/hive/warehouse - - ~/performance/spark:/spark - ../../java/hive/conf/hive-site.xml:/opt/hive/conf/hive-site.xml - ../../java/hive/target:/opt/hive/auxlib - - ../../build/vineyard:/tmp/vineyard + - ../../build/hiveserver:/tmp/vineyard + networks: hive: diff --git a/java/hive/docker/build.sh b/java/hive/docker/build.sh index ee0a648f9e..9dd1c1076f 100755 --- a/java/hive/docker/build.sh +++ b/java/hive/docker/build.sh @@ -66,45 +66,62 @@ done SOURCE_DIR=${SOURCE_DIR:-"../../.."} repo=${REPO:-apache} -WORK_DIR="$(mktemp -d)" +# WORK_DIR="$(mktemp -d)" +WORK_DIR=../../../hive-base-workdir #WORK_DIR="/opt/tao/hive-docker-build" mkdir -p "$WORK_DIR" +find "$WORK_DIR" -maxdepth 1 -mindepth 1 ! -name '*.tar.gz' -exec rm -rf {} \; # HADOOP_VERSION=${HADOOP_VERSION:-$(mvn -f "$SOURCE_DIR/pom.xml" -q help:evaluate -Dexpression=hadoop.version -DforceStdout)} # TEZ_VERSION=${TEZ_VERSION:-$(mvn -f "$SOURCE_DIR/pom.xml" -q help:evaluate -Dexpression=tez.version -DforceStdout)} HADOOP_VERSION=${HADOOP_VERSION:-"3.3.4"} TEZ_VERSION=${TEZ_VERSION:-"0.9.1"} HIVE_VERSION=${HIVE_VERSION:-"2.3.9"} -HADOOP_URL=${HADOOP_URL:-"https://archive.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz"} -echo "Downloading Hadoop from $HADOOP_URL..." -if ! curl --fail -L "$HADOOP_URL" -o "$WORK_DIR/hadoop-$HADOOP_VERSION.tar.gz"; then - echo "Fail to download Hadoop, exiting...." - exit 1 +if [ -f "$WORK_DIR/hadoop-$HADOOP_VERSION.tar.gz" ]; then + echo "Hadoop exists, skipping download..." +else + echo "Download Hadoop..." + HADOOP_URL=${HADOOP_URL:-"https://archive.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz"} + echo "Downloading Hadoop from $HADOOP_URL..." + if ! curl --fail -L "$HADOOP_URL" -o "$WORK_DIR/hadoop-$HADOOP_VERSION.tar.gz"; then + echo "Fail to download Hadoop, exiting...." + exit 1 + fi fi -TEZ_URL=${TEZ_URL:-"https://archive.apache.org/dist/tez/$TEZ_VERSION/apache-tez-$TEZ_VERSION-bin.tar.gz"} -echo "Downloading Tez from $TEZ_URL..." -if ! curl --fail -L "$TEZ_URL" -o "$WORK_DIR/apache-tez-$TEZ_VERSION-bin.tar.gz"; then - echo "Failed to download Tez, exiting..." - exit 1 +if [ -f "$WORK_DIR/apache-tez-$TEZ_VERSION-bin.tar.gz" ]; then + echo "Tez exists, skipping download..." +else + echo "Download Tez..." + TEZ_URL=${TEZ_URL:-"https://archive.apache.org/dist/tez/$TEZ_VERSION/apache-tez-$TEZ_VERSION-bin.tar.gz"} + echo "Downloading Tez from $TEZ_URL..." + if ! curl --fail -L "$TEZ_URL" -o "$WORK_DIR/apache-tez-$TEZ_VERSION-bin.tar.gz"; then + echo "Failed to download Tez, exiting..." + exit 1 + fi fi -if [ -n "$HIVE_VERSION" ]; then - HIVE_URL=${HIVE_URL:-"https://archive.apache.org/dist/hive/hive-$HIVE_VERSION/apache-hive-$HIVE_VERSION-bin.tar.gz"} - echo "Downloading Hive from $HIVE_URL..." - if ! curl --fail -L "$HIVE_URL" -o "$WORK_DIR/apache-hive-$HIVE_VERSION-bin.tar.gz"; then - echo "Failed to download Hive, exiting..." - exit 1 - fi - hive_package="$WORK_DIR/apache-hive-$HIVE_VERSION-bin.tar.gz" +if [ -f "$WORK_DIR/apache-hive-$HIVE_VERSION-bin.tar.gz" ]; then + echo "Hive exists, skipping download..." else - HIVE_VERSION=$(mvn -f "$SOURCE_DIR/pom.xml" -q help:evaluate -Dexpression=project.version -DforceStdout) - HIVE_TAR="$SOURCE_DIR/packaging/target/apache-hive-$HIVE_VERSION-bin.tar.gz" - if ls $HIVE_TAR || mvn -f $SOURCE_DIR/pom.xml clean package -DskipTests -Pdist; then - cp "$HIVE_TAR" "$WORK_DIR/" + echo "Download Hive..." + if [ -n "$HIVE_VERSION" ]; then + HIVE_URL=${HIVE_URL:-"https://archive.apache.org/dist/hive/hive-$HIVE_VERSION/apache-hive-$HIVE_VERSION-bin.tar.gz"} + echo "Downloading Hive from $HIVE_URL..." + if ! curl --fail -L "$HIVE_URL" -o "$WORK_DIR/apache-hive-$HIVE_VERSION-bin.tar.gz"; then + echo "Failed to download Hive, exiting..." + exit 1 + fi + hive_package="$WORK_DIR/apache-hive-$HIVE_VERSION-bin.tar.gz" else - echo "Failed to compile Hive Project, exiting..." - exit 1 + HIVE_VERSION=$(mvn -f "$SOURCE_DIR/pom.xml" -q help:evaluate -Dexpression=project.version -DforceStdout) + HIVE_TAR="$SOURCE_DIR/packaging/target/apache-hive-$HIVE_VERSION-bin.tar.gz" + if ls $HIVE_TAR || mvn -f $SOURCE_DIR/pom.xml clean package -DskipTests -Pdist; then + cp "$HIVE_TAR" "$WORK_DIR/" + else + echo "Failed to compile Hive Project, exiting..." + exit 1 + fi fi fi @@ -118,5 +135,5 @@ docker build \ --build-arg "HIVE_VERSION=$HIVE_VERSION" \ --build-arg "HADOOP_VERSION=$HADOOP_VERSION" \ --build-arg "TEZ_VERSION=$TEZ_VERSION" \ + --no-cache -rm -r "${WORK_DIR}" diff --git a/java/hive/test/expected/test_all_primitive_types.q.out b/java/hive/test/expected/test_all_primitive_types.q.out new file mode 100644 index 0000000000..1c1aea94f7 --- /dev/null +++ b/java/hive/test/expected/test_all_primitive_types.q.out @@ -0,0 +1 @@ +1,1,42,1,2.0,1.0,hello world1!,hello worl,hello worl,aGVsbG8gd29ybGQ0IQ==,2023-12-31,true,2023-12-31 23:59:59,1235.00 diff --git a/java/hive/test/expected/test_hive_dynamic_partition.q.out b/java/hive/test/expected/test_hive_dynamic_partition.q.out new file mode 100644 index 0000000000..fdfc4c8f54 --- /dev/null +++ b/java/hive/test/expected/test_hive_dynamic_partition.q.out @@ -0,0 +1,3 @@ +1,2,1,2017 +1,2,1,2018 +3,4,1,2018 diff --git a/java/hive/test/expected/test_hive_static_partition.q.out b/java/hive/test/expected/test_hive_static_partition.q.out new file mode 100644 index 0000000000..d1719b1ed5 --- /dev/null +++ b/java/hive/test/expected/test_hive_static_partition.q.out @@ -0,0 +1,10 @@ +1,2,114514 +999,2,666 +999,2,666 +999,2,666 +3,4,666 +999,2,666 +999,2,666 +999,2,666 +3,4,666 +1,2,114514 diff --git a/java/hive/test/expected/test_insert.q.out b/java/hive/test/expected/test_insert.q.out new file mode 100644 index 0000000000..03c9c3607f --- /dev/null +++ b/java/hive/test/expected/test_insert.q.out @@ -0,0 +1,3 @@ +a,1 +b,2 +c,3 diff --git a/java/hive/test/expected/test_nested_types.q.out b/java/hive/test/expected/test_nested_types.q.out new file mode 100644 index 0000000000..1b425d3db9 --- /dev/null +++ b/java/hive/test/expected/test_nested_types.q.out @@ -0,0 +1 @@ +421hello2world! diff --git a/java/hive/test/query/out/test_all_primitive_types.q.out b/java/hive/test/query/out/test_all_primitive_types.q.out new file mode 100644 index 0000000000..1c1aea94f7 --- /dev/null +++ b/java/hive/test/query/out/test_all_primitive_types.q.out @@ -0,0 +1 @@ +1,1,42,1,2.0,1.0,hello world1!,hello worl,hello worl,aGVsbG8gd29ybGQ0IQ==,2023-12-31,true,2023-12-31 23:59:59,1235.00 diff --git a/java/hive/test/query/out/test_hive_dynamic_partition.q.out b/java/hive/test/query/out/test_hive_dynamic_partition.q.out new file mode 100644 index 0000000000..fdfc4c8f54 --- /dev/null +++ b/java/hive/test/query/out/test_hive_dynamic_partition.q.out @@ -0,0 +1,3 @@ +1,2,1,2017 +1,2,1,2018 +3,4,1,2018 diff --git a/java/hive/test/query/out/test_hive_static_partition.q.out b/java/hive/test/query/out/test_hive_static_partition.q.out new file mode 100644 index 0000000000..d1719b1ed5 --- /dev/null +++ b/java/hive/test/query/out/test_hive_static_partition.q.out @@ -0,0 +1,10 @@ +1,2,114514 +999,2,666 +999,2,666 +999,2,666 +3,4,666 +999,2,666 +999,2,666 +999,2,666 +3,4,666 +1,2,114514 diff --git a/java/hive/test/query/out/test_insert.q.out b/java/hive/test/query/out/test_insert.q.out new file mode 100644 index 0000000000..03c9c3607f --- /dev/null +++ b/java/hive/test/query/out/test_insert.q.out @@ -0,0 +1,3 @@ +a,1 +b,2 +c,3 diff --git a/java/hive/test/query/out/test_nested_types.q.out b/java/hive/test/query/out/test_nested_types.q.out new file mode 100644 index 0000000000..1b425d3db9 --- /dev/null +++ b/java/hive/test/query/out/test_nested_types.q.out @@ -0,0 +1 @@ +421hello2world! diff --git a/java/hive/test/query/test_all_primitive_types.q b/java/hive/test/query/test_all_primitive_types.q new file mode 100644 index 0000000000..48466b11f6 --- /dev/null +++ b/java/hive/test/query/test_all_primitive_types.q @@ -0,0 +1,38 @@ +drop table if exists test_all_primitive_types; +create table test_all_primitive_types ( + field_1 tinyint, + field_2 smallint, + field_3 bigint, + field_4 int, + field_5 double, + field_6 float, + field_7 string, + field_9 varchar(10), + field_10 char(10), + field_8 binary, + field_11 date, + field_12 boolean, + field_13 timestamp, + field_14 decimal(6, 2) +); + +insert into test_all_primitive_types select + tinyint(1), + smallint(1), + 42, + bigint(1), + double(2.0), + float(1.0), + 'hello world1!', + 'hello world2!', + 'hello world3!', + cast('hello world4!' as binary), + date('2023-12-31'), + true, + timestamp('2023-12-31 23:59:59'), + cast(1234.56 as decimal); + +insert overwrite directory '/tmp/out/test_all_primitive_types/' +ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' +select * from test_all_primitive_types; +drop table test_all_primitive_types; \ No newline at end of file diff --git a/java/hive/test/query/test_hive_dynamic_partition.q b/java/hive/test/query/test_hive_dynamic_partition.q new file mode 100644 index 0000000000..dd2491bd5f --- /dev/null +++ b/java/hive/test/query/test_hive_dynamic_partition.q @@ -0,0 +1,23 @@ +drop table if exists hive_dynamic_partition_data; +create table hive_dynamic_partition_data( + src_id int, + dst_id int, + year int) +stored as TEXTFILE +location "file:///tmp/hive_test/hive_dynamic_partition_data"; +insert into table hive_dynamic_partition_data values (1, 2, 2018),(3, 4, 2018),(1, 2, 2017); + +drop table if exists hive_dynamic_partition_test; +create table hive_dynamic_partition_test +( + src_id int, + dst_id int +)partitioned by(mounth int, year int); +insert into table hive_dynamic_partition_test partition(mounth=1, year) select src_id,dst_id,year from hive_dynamic_partition_data; + +insert overwrite directory '/tmp/out/test_hive_dynamic_partition/' +ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' +select * from hive_dynamic_partition_test; + +drop table hive_dynamic_partition_test; +drop table hive_dynamic_partition_data; \ No newline at end of file diff --git a/java/hive/test/query/test_insert.q b/java/hive/test/query/test_insert.q new file mode 100644 index 0000000000..04ee0e9a58 --- /dev/null +++ b/java/hive/test/query/test_insert.q @@ -0,0 +1,10 @@ +drop table if exists hive_example; +create table hive_example(field_1 string,field_2 int); + +insert into hive_example values('a', 1), ('b', 2), ('c', 3); + +insert overwrite directory '/tmp/out/test_insert/' +row format delimited fields terminated by ',' +select * from hive_example; + +drop table hive_example; diff --git a/java/hive/test/query/test_nested_types.q b/java/hive/test/query/test_nested_types.q new file mode 100644 index 0000000000..1366e970d1 --- /dev/null +++ b/java/hive/test/query/test_nested_types.q @@ -0,0 +1,19 @@ +drop table if exists nested_table; +create table nested_table ( + field_1 map>> +); + +insert into nested_table select + map( + 42, + array(named_struct('field_1', 1, + 'field_2', 'hello'), + named_struct('field_1', 2, + 'field_2', 'world!'))); + +insert overwrite directory '/tmp/out/test_nested_types/' +ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' +select * from nested_table; +drop table nested_table; \ No newline at end of file diff --git a/java/hive/test/query/test_static_partition.q b/java/hive/test/query/test_static_partition.q new file mode 100644 index 0000000000..2b5d546cb6 --- /dev/null +++ b/java/hive/test/query/test_static_partition.q @@ -0,0 +1,29 @@ +drop table if exists hive_static_partition; + +create table hive_static_partition( + src_id int, + dst_id int +) partitioned by (value int); +insert into table hive_static_partition partition(value=666) values (3, 4); +insert into table hive_static_partition partition(value=666) values (999, 2), (999, 2), (999, 2); +insert into table hive_static_partition partition(value=114514) values (1, 2); + +drop table if exists result; +create table result( + field_1 int, + field_2 int, + field_3 int +); +insert into result +select * from hive_static_partition +union all +select * from hive_static_partition where value=666 +union all +select * from hive_static_partition where value=114514; + +insert overwrite directory '/tmp/out/test_hive_static_partition/' +ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' +select * from result; + +drop table hive_static_partition; +drop table result; diff --git a/java/hive/test/test.sh b/java/hive/test/test.sh new file mode 100755 index 0000000000..674d8bc432 --- /dev/null +++ b/java/hive/test/test.sh @@ -0,0 +1,40 @@ +outdir=./query/out +if [ -d "$outdir" ]; then + rm -r "$outdir" +fi +docker cp ./query hive:/tmp/ + +for file in ./query/*; do + query=$(basename "$file") + docker exec hive beeline -u 'jdbc:hive2://localhost:10000/;transportMode=http;httpPath=cliservice' \ + -f /tmp/query/"$query" +done + +docker cp hive:/tmp/out ./query/ +for dir in ./query/out/*; do + cat $dir/* > ./query/out/$(basename "$dir").q.out + rm -r $dir +done + +filecount=$(find ./query/ -name "*.q" | wc -l) +testedcount=$(find ./query/out/ -name "*.out" | wc -l) +successcount=0 +failedcount=0 + +for file in ./query/out/*; do + if [ -f "$file" ]; then + echo "Diff $file with expected/$(basename "$file")" + if diff -a "$file" ./expected/$(basename "$file"); then + successcount=$((successcount+1)) + else + failedcount=$((failedcount+1)) + fi + fi +done + +echo "Total test: $filecount Success: $successcount Failed: $failedcount Skipped: $((filecount-testedcount))" +if [ $successcount -eq $filecount ]; then + exit 0 +else + exit 1 +fi