Skip to content

[Enhancement] improve sql digest for massive compound predicates (backport #53207) #27783

[Enhancement] improve sql digest for massive compound predicates (backport #53207)

[Enhancement] improve sql digest for massive compound predicates (backport #53207) #27783

name: CI PIPELINE - BRANCH
on:
pull_request:
types:
- opened
- synchronize
branches:
- 'branch*'
concurrency:
group: ${{ github.event.number }}
cancel-in-progress: true
jobs:
basic-checker:
runs-on: [ self-hosted, normal ]
name: RUN CHECKER
env:
PR_NUMBER: ${{ github.event.number }}
GH_TOKEN: ${{ github.token }}
outputs:
PASS: ${{ steps.check.outputs.pass }}
steps:
- name: Check
id: check
if: >
!contains(github.event.pull_request.title, '(sync #') &&
!contains(github.event.pull_request.labels.*.name, 'sync') &&
(!startsWith(github.head_ref, github.base_ref) || !contains(github.head_ref, '-sync-'))
run: |
echo "pass=true" >> $GITHUB_OUTPUT
- name: Check Blacklist
id: check_blacklist
env:
NO_SYNC: ${{ steps.check.outputs.pass }}
AUTHOR: ${{ github.event.pull_request.user.login }}
run: |
rm -rf ./ci-tool && cp -rf /var/lib/ci-tool ./ci-tool && cd ci-tool && git pull >/dev/null
./scripts/check-blacklist.sh
- name: Check Feature Issue
id: check_issue
run: |
cd ci-tool
python3 scripts/get_pr_issue.py check_issue ${GITHUB_REPOSITORY} ${PR_NUMBER}
be-checker:
runs-on: ubuntu-latest
needs: basic-checker
name: BE FILTER
if: needs.basic-checker.outputs.PASS == 'true'
outputs:
output1: ${{ steps.be-changes-info.outputs.be }}
output2: ${{ steps.be-changes-info.outputs.thirdparty }}
steps:
- uses: dorny/paths-filter@v3
id: changes
with:
filters: |
be:
- 'be/**'
- 'gensrc/**'
- 'run-be-ut.sh'
- 'build.sh'
- 'thirdparty/**'
- 'docker/dockerfiles/dev-env/dev-env.Dockerfile'
thirdparty:
- 'thirdparty/**'
- 'docker/dockerfiles/dev-env/dev-env.Dockerfile'
- name: BE CHECK INFO
id: be-changes-info
run: |
echo "be=${{ steps.changes.outputs.be }}" >> $GITHUB_OUTPUT
echo "thirdparty=${{ steps.changes.outputs.thirdparty }}" >> $GITHUB_OUTPUT
clang-format:
runs-on: [self-hosted, light]
needs: be-checker
name: Clang-Format
env:
PR_NUMBER: ${{ github.event.number }}
if: ${{ needs.be-checker.outputs.output1 == 'true' }}
steps:
- name: clean
run: |
rm -rf ${{ github.workspace }}
mkdir -p ${{ github.workspace }}
- name: Checkout Code
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: BRANCH INFO
id: branch
run: |
echo ${{github.base_ref}}
echo "branch=${{github.base_ref}}" >> $GITHUB_OUTPUT
- name: Checkout PR
run: |
BRANCH=${{steps.branch.outputs.branch}}
git config --global user.name "wanpengfei-git";
git config --global user.email "[email protected]";
git checkout $BRANCH;
git pull;
BRANCH_NAME="${BRANCH}-${PR_NUMBER}";
git fetch origin pull/${PR_NUMBER}/head:${BRANCH_NAME};
git checkout $BRANCH_NAME;
git checkout -b merge_pr;
git merge --squash --no-edit ${BRANCH} || (echo "::error::Merge conflict, please check." && exit -1);
- name: Run Clang-Format
run: |
export PATH=/var/lib/llvm/bin:$PATH
bash build-support/check-format.sh
- name: Clean ENV
if: always()
run: |
rm -rf ${{ github.workspace }}/*
thirdparty-update:
runs-on: [self-hosted, normal]
needs: [ be-checker, clang-format ]
name: Thirdparty Update
env:
PR_NUMBER: ${{ github.event.number }}
BRANCH: ${{ github.base_ref }}
REPO: ${{ github.repository }}
continue-on-error: true
strategy:
fail-fast: false
matrix:
build_type: [ Release, ASAN ]
steps:
- name: clean
run: |
rm -rf ${{ github.workspace }}
mkdir -p ${{ github.workspace }}
- name: Update Image (${{ matrix.build_type }})
id: update-image
if: needs.be-checker.outputs.output2 == 'true'
env:
linux_distro: ${{ matrix.build_type }}
run: |
cp -rf /var/lib/ci-tool ./ci-tool && cd ci-tool && git pull && source lib/init.sh
./bin/run-pr-update-image.sh
- name: Upload Thirdparty Result
uses: actions/upload-artifact@v4
with:
name: THIRDPARTY-RESULT-${{ matrix.build_type }}
path: image_cache.info
retention-days: 1
overwrite: true
if-no-files-found: ignore
- name: Clean ENV
if: always()
run: |
if [[ "${{ needs.be-checker.outputs.output2 }}" == 'true' ]]; then
cd ci-tool && source lib/init.sh
./bin/elastic-cluster.sh --delete
fi
rm -rf ${{ github.workspace }}/*
thirdparty-info:
runs-on: [ self-hosted, normal ]
needs:
- thirdparty-update
name: Thirdparty Info
outputs:
centos7_image_cache_id: ${{ steps.info.outputs.centos7_image_cache_id }}
ubuntu_image_cache_id: ${{ steps.info.outputs.ubuntu_image_cache_id }}
steps:
- name: Check Result
run: |
if [[ "${{ needs.thirdparty-update.result }}" == 'failure' ]]; then
echo "::error:: Thirdparty Update Error!"
exit 1
fi
- name: Download Thirdparty Artifact
uses: actions/download-artifact@v4
with:
pattern: THIRDPARTY-RESULT-*
path: outputs
- name: Read Info
id: info
if: needs.thirdparty-update.result == 'success'
run: |
image_cache_id=$(cat "./outputs/THIRDPARTY-RESULT-centos7/image_cache.info" || echo "")
echo "centos7_image_cache_id=${image_cache_id}" >> $GITHUB_OUTPUT
image_cache_id=$(cat "./outputs/THIRDPARTY-RESULT-ubuntu/image_cache.info" || echo "")
echo "ubuntu_image_cache_id=${image_cache_id}" >> $GITHUB_OUTPUT
be-ut:
runs-on: [self-hosted, normal]
needs: [ be-checker, thirdparty-info ]
timeout-minutes: 180
name: BE UT
env:
PR_NUMBER: ${{ github.event.number }}
BRANCH: ${{ github.base_ref }}
IMAGE_CACHE_ID: ${{ needs.thirdparty-info.outputs.ubuntu_image_cache_id }}
LINUX_DISTRO: ubuntu
steps:
- name: BRANCH INFO
id: branch
run: |
echo ${{github.base_ref}}
echo "branch=${{github.base_ref}}" >> $GITHUB_OUTPUT
- name: UPDATE ECI & RUN UT
id: run_ut
shell: bash
timeout-minutes: 180
run: |
rm -rf ./ci-tool && cp -rf /var/lib/ci-tool ./ci-tool && cd ci-tool && git pull && source lib/init.sh
if [[ "${{ needs.be-checker.outputs.output2 }}" == 'true' ]]; then
export image_cache_id=${IMAGE_CACHE_ID}
export image_tag=$BRANCH-$PR_NUMBER
fi
./bin/elastic-ut.sh --pr ${PR_NUMBER} --module be --branch ${{ steps.branch.outputs.branch }} --repository ${{ github.repository }} --linuxdistro ${LINUX_DISTRO}
- name: clean ECI
if: always()
run: |
echo ${{ steps.run_ut.outputs.ECI_ID }}
eci rm ${{ steps.run_ut.outputs.ECI_ID }}
- name: Upload log
uses: actions/upload-artifact@v4
if: always()
with:
name: BE UT LOG
path: ${{ steps.run_ut.outputs.BE_LOG }}
retention-days: 1
overwrite: true
- name: Clean ENV
if: always()
run: |
rm -f ${{ steps.run_ut.outputs.RES_FILE }}
rm -f ${{ steps.run_ut.outputs.RES_LOG }}
rm -rf ${{ steps.run_ut.outputs.BE_LOG }}
rm -rf ${{ github.workspace }}/*
fe-checker:
runs-on: ubuntu-latest
needs: basic-checker
name: FE FILTER
if: needs.basic-checker.outputs.PASS == 'true'
outputs:
output1: ${{ steps.fe-changes-info.outputs.fe }}
steps:
- uses: dorny/paths-filter@v3
id: changes
with:
filters: |
fe:
- 'fe/**'
- 'gensrc/**'
- 'run-fe-ut.sh'
- 'java-extensions/**'
- 'build.sh'
- run: echo ${{ steps.changes.outputs.fe }}
- name: FE CHECK INFO
id: fe-changes-info
run: |
echo "fe=${{ steps.changes.outputs.fe }}" >> $GITHUB_OUTPUT
fe-codestyle-check:
runs-on: ubuntu-latest
needs: fe-checker
if: needs.fe-checker.outputs.output1 == 'true'
name: FE Code Style Check
env:
PR_NUMBER: ${{ github.event.number }}
steps:
- name: clean
run: |
rm -rf ${{ github.workspace }}
mkdir -p ${{ github.workspace }}
- uses: dorny/paths-filter@v3
id: java-file
name: JAVA FILE
with:
filters: |
java:
- '**.java'
- run: echo ${{ steps.java-file.outputs.java }}
- name: BRANCH INFO
id: branch
run: |
echo ${{github.base_ref}}
echo "branch=${{github.base_ref}}" >> $GITHUB_OUTPUT
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Checkout PR
run: |
BRANCH=${{steps.branch.outputs.branch}}
git config --global user.name "wanpengfei-git";
git config --global user.email "[email protected]";
git checkout $BRANCH;
git pull;
BRANCH_NAME="${BRANCH}-${PR_NUMBER}";
git fetch origin pull/${PR_NUMBER}/head:${BRANCH_NAME};
git checkout $BRANCH_NAME;
git checkout -b merge_pr;
git merge --squash --no-edit ${BRANCH} || (echo "::error::Merge conflict, please check." && exit -1);
- name: Copy checkstyle files
if: ${{ steps.java-file.outputs.java == 'true' }}
run: |
cp fe/checkstyle* .
- name: Run java checkstyle
if: ${{ steps.java-file.outputs.java == 'true' }}
uses: dbelyaev/[email protected]
with:
workdir: "./fe"
checkstyle_config: checkstyle.xml
reporter: 'github-pr-check'
github_token: ${{ secrets.GITHUB_TOKEN }}
fail_on_error: true
level: error
sonarcloud-fe-checker:
runs-on: ubuntu-latest
needs: fe-checker
if: needs.fe-checker.outputs.output1 == 'true' && github.repository == 'StarRocks/starrocks'
name: FE Sonarcloud Check
env:
PR_NUMBER: ${{ github.event.number }}
CODE_PATH: ${{ github.workspace }}
BRANCH: ${{ github.base_ref }}
GH_TOKEN: ${{ github.token }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: BRANCH INFO
id: branch
run: |
echo ${{github.base_ref}}
echo "branch=${{github.base_ref}}" >> $GITHUB_OUTPUT
- name: Checkout PR
run: |
BRANCH=${{steps.branch.outputs.branch}}
git config --global user.name "wanpengfei-git";
git config --global user.email "[email protected]";
git checkout $BRANCH;
git pull;
BRANCH_NAME="${BRANCH}-${PR_NUMBER}";
git fetch origin pull/${PR_NUMBER}/head:${BRANCH_NAME};
git checkout $BRANCH_NAME;
git checkout -b merge_pr;
git merge --squash --no-edit ${BRANCH} || (echo "Merge conflict, please check." && exit -1);
- name: Set up JDK 17
uses: actions/setup-java@v3
with:
java-version: 17
distribution: 'adopt'
- name: Cache SonarCloud packages
uses: actions/cache@v4
with:
path: ~/.sonar/cache
key: ${{ runner.os }}-sonar
restore-keys: ${{ runner.os }}-sonar
- name: Cache Maven packages
uses: actions/cache@v4
with:
path: ~/.m2
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: ${{ runner.os }}-maven
- name: Install Apache Thrift 0.13
run: |
mkdir -p ./.setup-thrift/oras
mkdir -p ./.setup-thrift/thrift
curl -sLO https://github.com/deislabs/oras/releases/download/v0.7.0/oras_0.7.0_linux_amd64.tar.gz
tar -xvzf oras_0.7.0_linux_amd64.tar.gz
ln -sf $(pwd)/oras /usr/local/bin/oras
oras pull ghcr.io/dodopizza/setup-thrift/binaries:v0.13.0 --media-type application/vnd.unknown.layer.v1+tar.gz
tar zxf ./thrift.v0.13.0.tar.gz -C .
ln -sf $(pwd)/thrift /usr/local/bin/thrift
- name: Analyze FE
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Needed to get PR information, if any
SONAR_TOKEN: f0fb4d25c03bae90c2e994c45c29c49dc86fc169 # ${{ secrets.SONAR_TOKEN }}
run: |
thrift --version
whereis thrift
export STARROCKS_HOME=${{ github.workspace }}
source env.sh
mkdir -p thirdparty/installed/bin/
cd thirdparty/installed/bin/ && ln -s /usr/local/bin/thrift thrift
cd ${{ github.workspace }}/fe
mvn -B -DskipTests verify org.sonarsource.scanner.maven:sonar-maven-plugin:sonar -Dsonar.projectKey=StarRocks_starrocks -Dsonar.pullrequest.key=${{ github.event.number }} -Dsonar.pullrequest.base=${{ github.base_ref }} -Dsonar.pullrequest.branch=${{ github.head_ref }}
fe-ut:
runs-on: [self-hosted, normal]
needs: fe-codestyle-check
name: FE UT
env:
PR_NUMBER: ${{ github.event.number }}
steps:
- name: clean
run: |
rm -rf ${{ github.workspace }}
mkdir -p ${{ github.workspace }}
- name: BRANCH INFO
id: branch
run: |
echo "branch=${{github.base_ref}}" >> $GITHUB_OUTPUT
repo="${{ github.repository }}"
bucket_prefix=`echo ${repo%/*} | tr '[:upper:]' '[:lower:]'`
echo "bucket_prefix=${bucket_prefix}" >> $GITHUB_OUTPUT
- name: UPDATE ECI & RUN UT
id: run_ut
shell: bash
timeout-minutes: 60
run: |
rm -rf ./ci-tool && cp -rf /var/lib/ci-tool ./ci-tool && cd ci-tool && git pull && source lib/init.sh
./bin/elastic-ut.sh --pr ${PR_NUMBER} --module fe --branch ${{steps.branch.outputs.branch}} --build Release --repository ${{ github.repository }} --linuxdistro ubuntu
- name: Clean ECI
if: always()
run: |
echo ${{ steps.run_ut.outputs.ECI_ID }}
echo ">>> Dmesg info:"
eci exec ${{ steps.run_ut.outputs.ECI_ID }} bash -c "dmesg -T"
eci rm ${{ steps.run_ut.outputs.ECI_ID }}
- name: Upload log
if: always()
uses: actions/upload-artifact@v4
with:
name: FE UT LOG
path: ${{ steps.run_ut.outputs.RES_LOG }}
retention-days: 1
overwrite: true
- name: Clean ENV
if: always()
run: |
rm -rf ${{ steps.run_ut.outputs.FE_REPORT_DIR }}
rm -f ${{ steps.run_ut.outputs.RES_FILE }}
rm -f ${{ steps.run_ut.outputs.RES_LOG }}
rm -rf ${{ steps.run_ut.outputs.COV_DIR }}
rm -rf ${{ github.workspace }}/*
Teardown:
runs-on: [self-hosted, normal]
name: Teardown
needs: [ fe-ut, be-ut ]
if: always()
env:
PR_NUMBER: ${{ github.event.number }}
BRANCH: ${{ github.base_ref }}
steps:
- name: upload info
if: always()
run: |
echo $PR_NUMBER > pr_num.txt
GITHUB_SHA=$(cat $GITHUB_EVENT_PATH | jq -r .pull_request.head.sha)
echo ${GITHUB_SHA} > head_sha.txt
- name: Upload the PR number
uses: actions/upload-artifact@v4
with:
name: pr_num
path: ./pr_num.txt
retention-days: 3
overwrite: true
- name: Upload the PR HEAD REF
uses: actions/upload-artifact@v4
with:
name: head_sha
path: ./head_sha.txt
retention-days: 3
- name: Clean
run: |
rm -rf ${{ github.workspace }}/*