Merge branch 'main' into pointrange_optimization

Signed-off-by: Harsha Vamsi Kalluri <[email protected]>
harshavamsi · Jul 19, 2024 · 34a1f25 · 34a1f25
2 parents bd1e468 + 77a74e2
commit 34a1f25
Show file tree

Hide file tree

Showing 79 changed files with 5,829 additions and 852 deletions.
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
@@ -24,4 +24,4 @@
 
 /.github/ @peternied
 
-/MAINTAINERS.md @anasalkouz @andrross @ashking94 @Bukhtawar @CEHENKLE @dblock @dbwiddis @gbbafna @jed326 @kotwanikunal @mch2 @msfroh @nknize @owaiskazi19 @peternied @reta @Rishikesh1159 @sachinpkale @saratvemulapalli @shwetathareja @sohami @VachaShah
+/MAINTAINERS.md @anasalkouz @andrross @ashking94 @Bukhtawar @CEHENKLE @dblock @dbwiddis @gaobinlong @gbbafna @jed326 @kotwanikunal @mch2 @msfroh @nknize @owaiskazi19 @peternied @reta @Rishikesh1159 @sachinpkale @saratvemulapalli @shwetathareja @sohami @VachaShah
diff --git a/.github/benchmark-configs.json b/.github/benchmark-configs.json
@@ -0,0 +1,155 @@
+{
+  "name": "Cluster and opensearch-benchmark configurations",
+  "id_1": {
+    "description": "Indexing only configuration for NYC_TAXIS workload",
+    "supported_major_versions": ["2", "3"],
+    "cluster-benchmark-configs": {
+      "SINGLE_NODE_CLUSTER": "true",
+      "MIN_DISTRIBUTION": "true",
+      "TEST_WORKLOAD": "nyc_taxis",
+      "WORKLOAD_PARAMS": "{\"number_of_replicas\":\"0\",\"number_of_shards\":\"1\"}",
+      "EXCLUDE_TASKS": "type:search",
+      "CAPTURE_NODE_STAT": "true"
+    },
+    "cluster_configuration": {
+      "size": "Single-Node",
+      "data_instance_config": "4vCPU, 32G Mem, 16G Heap"
+    }
+  },
+  "id_2": {
+    "description": "Indexing only configuration for HTTP_LOGS workload",
+    "supported_major_versions": ["2", "3"],
+    "cluster-benchmark-configs": {
+      "SINGLE_NODE_CLUSTER": "true",
+      "MIN_DISTRIBUTION": "true",
+      "TEST_WORKLOAD": "http_logs",
+      "WORKLOAD_PARAMS": "{\"number_of_replicas\":\"0\",\"number_of_shards\":\"1\"}",
+      "EXCLUDE_TASKS": "type:search",
+      "CAPTURE_NODE_STAT": "true"
+    },
+    "cluster_configuration": {
+      "size": "Single-Node",
+      "data_instance_config": "4vCPU, 32G Mem, 16G Heap"
+    }
+  },
+  "id_3": {
+    "description": "Search only test-procedure for NYC_TAXIS, uses snapshot to restore the data for OS-3.0.0",
+    "supported_major_versions": ["3"],
+    "cluster-benchmark-configs": {
+      "SINGLE_NODE_CLUSTER": "true",
+      "MIN_DISTRIBUTION": "true",
+      "TEST_WORKLOAD": "nyc_taxis",
+      "WORKLOAD_PARAMS": "{\"snapshot_repo_name\":\"benchmark-workloads-repo-300\",\"snapshot_bucket_name\":\"benchmark-workload-snapshots\",\"snapshot_region\":\"us-east-1\",\"snapshot_base_path\":\"workload-snapshots-300\",\"snapshot_name\":\"nyc_taxis_1_shard\"}",
+      "CAPTURE_NODE_STAT": "true"
+    },
+    "cluster_configuration": {
+      "size": "Single-Node",
+      "data_instance_config": "4vCPU, 32G Mem, 16G Heap"
+    }
+  },
+  "id_4": {
+    "description": "Search only test-procedure for HTTP_LOGS, uses snapshot to restore the data for OS-3.0.0",
+    "supported_major_versions": ["3"],
+    "cluster-benchmark-configs": {
+      "SINGLE_NODE_CLUSTER": "true",
+      "MIN_DISTRIBUTION": "true",
+      "TEST_WORKLOAD": "http_logs",
+      "WORKLOAD_PARAMS": "{\"snapshot_repo_name\":\"benchmark-workloads-repo-300\",\"snapshot_bucket_name\":\"benchmark-workload-snapshots\",\"snapshot_region\":\"us-east-1\",\"snapshot_base_path\":\"workload-snapshots-300\",\"snapshot_name\":\"http_logs_1_shard\"}",
+      "CAPTURE_NODE_STAT": "true"
+    },
+    "cluster_configuration": {
+      "size": "Single-Node",
+      "data_instance_config": "4vCPU, 32G Mem, 16G Heap"
+    }
+  },
+  "id_5": {
+    "description": "Search only test-procedure for HTTP_LOGS, uses snapshot to restore the data for OS-3.0.0",
+    "supported_major_versions": ["3"],
+    "cluster-benchmark-configs": {
+      "SINGLE_NODE_CLUSTER": "true",
+      "MIN_DISTRIBUTION": "true",
+      "TEST_WORKLOAD": "big5",
+      "WORKLOAD_PARAMS": "{\"snapshot_repo_name\":\"benchmark-workloads-repo-300\",\"snapshot_bucket_name\":\"benchmark-workload-snapshots\",\"snapshot_region\":\"us-east-1\",\"snapshot_base_path\":\"workload-snapshots-300\",\"snapshot_name\":\"big5_1_shard\"}",
+      "CAPTURE_NODE_STAT": "true"
+    },
+    "cluster_configuration": {
+      "size": "Single-Node",
+      "data_instance_config": "4vCPU, 32G Mem, 16G Heap"
+    }
+  },
+  "id_6": {
+    "description": "Search only test-procedure for NYC_TAXIS, uses snapshot to restore the data for OS-2.x",
+    "supported_major_versions": ["2"],
+    "cluster-benchmark-configs": {
+      "SINGLE_NODE_CLUSTER": "true",
+      "MIN_DISTRIBUTION": "true",
+      "TEST_WORKLOAD": "nyc_taxis",
+      "WORKLOAD_PARAMS": "{\"snapshot_repo_name\":\"benchmark-workloads-repo\",\"snapshot_bucket_name\":\"benchmark-workload-snapshots\",\"snapshot_region\":\"us-east-1\",\"snapshot_base_path\":\"workload-snapshots\",\"snapshot_name\":\"nyc_taxis_1_shard\"}",
+      "CAPTURE_NODE_STAT": "true"
+    },
+    "cluster_configuration": {
+      "size": "Single-Node",
+      "data_instance_config": "4vCPU, 32G Mem, 16G Heap"
+    }
+  },
+  "id_7": {
+    "description": "Search only test-procedure for HTTP_LOGS, uses snapshot to restore the data for OS-2.x",
+    "supported_major_versions": ["2"],
+    "cluster-benchmark-configs": {
+      "SINGLE_NODE_CLUSTER": "true",
+      "MIN_DISTRIBUTION": "true",
+      "TEST_WORKLOAD": "http_logs",
+      "WORKLOAD_PARAMS": "{\"snapshot_repo_name\":\"benchmark-workloads-repo\",\"snapshot_bucket_name\":\"benchmark-workload-snapshots\",\"snapshot_region\":\"us-east-1\",\"snapshot_base_path\":\"workload-snapshots\",\"snapshot_name\":\"http_logs_1_shard\"}",
+      "CAPTURE_NODE_STAT": "true"
+    },
+    "cluster_configuration": {
+      "size": "Single-Node",
+      "data_instance_config": "4vCPU, 32G Mem, 16G Heap"
+    }
+  },
+  "id_8": {
+    "description": "Search only test-procedure for HTTP_LOGS, uses snapshot to restore the data for OS-2.x",
+    "supported_major_versions": ["2"],
+    "cluster-benchmark-configs": {
+      "SINGLE_NODE_CLUSTER": "true",
+      "MIN_DISTRIBUTION": "true",
+      "TEST_WORKLOAD": "big5",
+      "WORKLOAD_PARAMS": "{\"snapshot_repo_name\":\"benchmark-workloads-repo\",\"snapshot_bucket_name\":\"benchmark-workload-snapshots\",\"snapshot_region\":\"us-east-1\",\"snapshot_base_path\":\"workload-snapshots\",\"snapshot_name\":\"big5_1_shard\"}",
+      "CAPTURE_NODE_STAT": "true"
+    },
+    "cluster_configuration": {
+      "size": "Single-Node",
+      "data_instance_config": "4vCPU, 32G Mem, 16G Heap"
+    }
+  },
+  "id_9": {
+    "description": "Indexing and search configuration for pmc workload",
+    "supported_major_versions": ["2", "3"],
+    "cluster-benchmark-configs": {
+      "SINGLE_NODE_CLUSTER": "true",
+      "MIN_DISTRIBUTION": "true",
+      "TEST_WORKLOAD": "pmc",
+      "WORKLOAD_PARAMS": "{\"number_of_replicas\":\"0\",\"number_of_shards\":\"1\"}",
+      "CAPTURE_NODE_STAT": "true"
+    },
+    "cluster_configuration": {
+      "size": "Single-Node",
+      "data_instance_config": "4vCPU, 32G Mem, 16G Heap"
+    }
+  },
+  "id_10": {
+    "description": "Indexing only configuration for stack-overflow workload",
+    "supported_major_versions": ["2", "3"],
+    "cluster-benchmark-configs": {
+      "SINGLE_NODE_CLUSTER": "true",
+      "MIN_DISTRIBUTION": "true",
+      "TEST_WORKLOAD": "so",
+      "WORKLOAD_PARAMS": "{\"number_of_replicas\":\"0\",\"number_of_shards\":\"1\"}",
+      "CAPTURE_NODE_STAT": "true"
+    },
+    "cluster_configuration": {
+      "size": "Single-Node",
+      "data_instance_config": "4vCPU, 32G Mem, 16G Heap"
+    }
+  }
+}
diff --git a/.github/workflows/add-performance-comment.yml b/.github/workflows/add-performance-comment.yml
@@ -0,0 +1,25 @@
+name: Performance Label Action
+
+on:
+  pull_request_target:
+    types: [labeled]
+
+jobs:
+  add-comment:
+    if: github.event.label.name == 'Performance'
+    runs-on: ubuntu-latest
+    permissions:
+      pull-requests: write
+
+    steps:
+      - name: Add comment to PR
+        uses: actions/github-script@v6
+        with:
+          github-token: ${{secrets.GITHUB_TOKEN}}
+          script: |
+            github.rest.issues.createComment({
+              issue_number: context.issue.number,
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              body: "Hello!\nWe have added a performance benchmark workflow that runs by adding a comment on the PR.\n Please refer https://github.com/opensearch-project/OpenSearch/blob/main/PERFORMANCE_BENCHMARKS.md on how to run benchmarks on pull requests."
+            })
diff --git a/.github/workflows/benchmark-pull-request.yml b/.github/workflows/benchmark-pull-request.yml
@@ -0,0 +1,163 @@
+name: Run performance benchmark on pull request
+on:
+  issue_comment:
+    types: [created]
+jobs:
+  run-performance-benchmark-on-pull-request:
+    if: ${{ (github.event.issue.pull_request) && (contains(github.event.comment.body, '"run-benchmark-test"')) }}
+    runs-on: ubuntu-latest
+    permissions:
+      id-token: write
+      contents: read
+      issues: write
+      pull-requests: write
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v3
+      - name: Set up required env vars
+        run: |
+          echo "PR_NUMBER=${{ github.event.issue.number }}" >> $GITHUB_ENV
+          echo "REPOSITORY=${{ github.event.repository.full_name }}" >> $GITHUB_ENV
+          OPENSEARCH_VERSION=$(awk -F '=' '/^opensearch[[:space:]]*=/ {gsub(/[[:space:]]/, "", $2); print $2}' buildSrc/version.properties)
+          echo "OPENSEARCH_VERSION=$OPENSEARCH_VERSION" >> $GITHUB_ENV
+          major_version=$(echo $OPENSEARCH_VERSION | cut -d'.' -f1)
+          echo "OPENSEARCH_MAJOR_VERSION=$major_version" >> $GITHUB_ENV
+          echo "USER_TAGS=pull_request_number:${{ github.event.issue.number }},repository:OpenSearch" >> $GITHUB_ENV
+      - name: Check comment format
+        id: check_comment
+        uses: actions/github-script@v6
+        with:
+          script: |
+            const fs = require('fs');
+            const comment = context.payload.comment.body;
+            let commentJson;
+            try {
+              commentJson = JSON.parse(comment);
+            } catch (error) {
+              core.setOutput('invalid', 'true');
+              return;
+            }
+            if (!commentJson.hasOwnProperty('run-benchmark-test')) {
+              core.setOutput('invalid', 'true');
+              return;
+            }
+            const configId = commentJson['run-benchmark-test'];
+            let benchmarkConfigs;
+            try {
+              benchmarkConfigs = JSON.parse(fs.readFileSync('.github/benchmark-configs.json', 'utf8'));
+            } catch (error) {
+              core.setFailed('Failed to read benchmark-configs.json');
+              return;
+            }
+            const openSearchMajorVersion = process.env.OPENSEARCH_MAJOR_VERSION;
+            console.log('MAJOR_VERSION', openSearchMajorVersion)
+            if (!benchmarkConfigs.hasOwnProperty(configId) ||
+              !benchmarkConfigs[configId].supported_major_versions.includes(openSearchMajorVersion)) {
+              core.setOutput('invalid', 'true');
+              return;
+            }
+            const clusterBenchmarkConfigs = benchmarkConfigs[configId]['cluster-benchmark-configs'];
+            for (const [key, value] of Object.entries(clusterBenchmarkConfigs)) {
+              core.exportVariable(key, value);
+            }
+      - name: Post invalid format comment
+        if: steps.check_comment.outputs.invalid == 'true'
+        uses: actions/github-script@v6
+        with:
+          github-token: ${{secrets.GITHUB_TOKEN}}
+          script: |
+            github.rest.issues.createComment({
+              issue_number: context.issue.number,
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              body: 'Invalid comment format or config id. Please refer to https://github.com/opensearch-project/OpenSearch/blob/main/PERFORMANCE_BENCHMARKS.md on how to run benchmarks on pull requests.'
+            })
+      - name: Fail workflow for invalid comment
+        if: steps.check_comment.outputs.invalid == 'true'
+        run: |
+          echo "Invalid comment format detected. Failing the workflow."
+          exit 1
+      - id: get_approvers
+        run: |
+          echo "approvers=$(cat .github/CODEOWNERS | grep '^\*'  | tr -d '* ' | sed 's/@/,/g' | sed 's/,//1')" >> $GITHUB_OUTPUT
+      - uses: trstringer/manual-approval@v1
+        if: (!contains(steps.get_approvers.outputs.approvers, github.event.comment.user.login))
+        with:
+          secret: ${{ github.TOKEN }}
+          approvers: ${{ steps.get_approvers.outputs.approvers }}
+          minimum-approvals: 1
+          issue-title: 'Request to approve/deny benchmark run for PR #${{ env.PR_NUMBER }}'
+          issue-body: "Please approve or deny the benchmark run for PR #${{ env.PR_NUMBER }}"
+          exclude-workflow-initiator-as-approver: false
+      - name: Get PR Details
+        id: get_pr
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const issue = context.payload.issue;
+            const prNumber = issue.number;
+            console.log(`Pull Request Number: ${prNumber}`);
+
+            const { data: pull_request } = await github.rest.pulls.get({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              pull_number: prNumber,
+            });
+
+            return {
+              "headRepoFullName": pull_request.head.repo.full_name,
+              "headRef": pull_request.head.ref
+            };
+      - name: Set pr details env vars
+        run: |
+          echo '${{ steps.get_pr.outputs.result }}' | jq -r '.headRepoFullName'
+          echo '${{ steps.get_pr.outputs.result }}' | jq -r '.headRef'
+          headRepo=$(echo '${{ steps.get_pr.outputs.result }}' | jq -r '.headRepoFullName')
+          headRef=$(echo '${{ steps.get_pr.outputs.result }}' | jq -r '.headRef')
+          echo "prHeadRepo=$headRepo" >> $GITHUB_ENV
+          echo "prheadRef=$headRef" >> $GITHUB_ENV
+      - name: Checkout PR Repo
+        uses: actions/checkout@v2
+        with:
+          repository: ${{ env.prHeadRepo }}
+          ref: ${{ env.prHeadRef }}
+          token: ${{ secrets.GITHUB_TOKEN }}
+      - name: Setup Java
+        uses: actions/setup-java@v1
+        with:
+          java-version: 21
+      - name: Build and Assemble OpenSearch from PR
+        run: |
+          ./gradlew :distribution:archives:linux-tar:assemble -Dbuild.snapshot=false
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          role-to-assume: ${{ secrets.UPLOAD_ARCHIVE_ARTIFACT_ROLE }}
+          role-session-name: publish-to-s3
+          aws-region: us-west-2
+      - name: Push to S3
+        run: |
+          aws s3 cp distribution/archives/linux-tar/build/distributions/opensearch-min-$OPENSEARCH_VERSION-linux-x64.tar.gz s3://${{ secrets.ARCHIVE_ARTIFACT_BUCKET_NAME }}/PR-$PR_NUMBER/
+          echo "DISTRIBUTION_URL=${{ secrets.ARTIFACT_BUCKET_CLOUDFRONT_URL }}/PR-$PR_NUMBER/opensearch-min-$OPENSEARCH_VERSION-linux-x64.tar.gz" >> $GITHUB_ENV
+      - name: Checkout opensearch-build repo
+        uses: actions/checkout@v4
+        with:
+          repository: opensearch-project/opensearch-build
+          ref: main
+          path: opensearch-build
+      - name: Trigger jenkins workflow to run gradle check
+        run: |
+          cat $GITHUB_ENV
+          bash opensearch-build/scripts/benchmark/benchmark-pull-request.sh ${{ secrets.JENKINS_PR_BENCHMARK_GENERIC_WEBHOOK_TOKEN }}
+      - name: Update PR with Job Url
+        uses: actions/github-script@v6
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            const workflowUrl = process.env.WORKFLOW_URL;
+            github.rest.issues.createComment({
+              issue_number: context.issue.number,
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              body: `The Jenkins job url is ${workflowUrl} . Final results will be published once the job is completed.`
+            })
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -19,6 +19,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Print reason why parent task was cancelled ([#14604](https://github.com/opensearch-project/OpenSearch/issues/14604))
 - Add matchesPluginSystemIndexPattern to SystemIndexRegistry ([#14750](https://github.com/opensearch-project/OpenSearch/pull/14750))
 - Add Plugin interface for loading application based configuration templates (([#14659](https://github.com/opensearch-project/OpenSearch/issues/14659)))
+- Refactor remote-routing-table service inline with remote state interfaces([#14668](https://github.com/opensearch-project/OpenSearch/pull/14668))
+- Add prefix mode verification setting for repository verification (([#14790](https://github.com/opensearch-project/OpenSearch/pull/14790)))
 - [Range Queries] Add new approximateable query framework to short-circuit range queries ([#13788](https://github.com/opensearch-project/OpenSearch/pull/13788))
 
 ### Dependencies
@@ -74,6 +76,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Refactoring FilterPath.parse by using an iterative approach ([#14200](https://github.com/opensearch-project/OpenSearch/pull/14200))
 - Refactoring Grok.validatePatternBank by using an iterative approach ([#14206](https://github.com/opensearch-project/OpenSearch/pull/14206))
 - Update help output for _cat ([#14722](https://github.com/opensearch-project/OpenSearch/pull/14722))
+- Fix bulk upsert ignores the default_pipeline and final_pipeline when auto-created index matches the index template ([#12891](https://github.com/opensearch-project/OpenSearch/pull/12891))
+- Fix NPE in ReplicaShardAllocator ([#14385](https://github.com/opensearch-project/OpenSearch/pull/14385))
+- Fix constant_keyword field type used when creating index ([#14807](https://github.com/opensearch-project/OpenSearch/pull/14807))
 
 ### Security
 

diff --git a/MAINTAINERS.md b/MAINTAINERS.md
@@ -14,6 +14,7 @@ This document contains a list of maintainers in this repo. See [opensearch-proje
 | Charlotte Henkle         | [CEHENKLE](https://github.com/CEHENKLE)                 | Amazon      |
 | Dan Widdis               | [dbwiddis](https://github.com/dbwiddis)                 | Amazon      |
 | Daniel "dB." Doubrovkine | [dblock](https://github.com/dblock)                     | Amazon      |
+| Gao Binlong              | [gaobinlong](https://github.com/gaobinlong)             | Amazon      |
 | Gaurav Bafna             | [gbbafna](https://github.com/gbbafna)                   | Amazon      |
 | Jay Deng                 | [jed326](https://github.com/jed326)                     | Amazon      |
 | Kunal Kotwani            | [kotwanikunal](https://github.com/kotwanikunal)         | Amazon      |
Original file line number	Diff line number	Diff line change
Expand Up		@@ -24,4 +24,4 @@

		/.github/ @peternied

		/MAINTAINERS.md @anasalkouz @andrross @ashking94 @Bukhtawar @CEHENKLE @dblock @dbwiddis @gbbafna @jed326 @kotwanikunal @mch2 @msfroh @nknize @owaiskazi19 @peternied @reta @Rishikesh1159 @sachinpkale @saratvemulapalli @shwetathareja @sohami @VachaShah
		/MAINTAINERS.md @anasalkouz @andrross @ashking94 @Bukhtawar @CEHENKLE @dblock @dbwiddis @gaobinlong @gbbafna @jed326 @kotwanikunal @mch2 @msfroh @nknize @owaiskazi19 @peternied @reta @Rishikesh1159 @sachinpkale @saratvemulapalli @shwetathareja @sohami @VachaShah