Skip to content

Commit

Permalink
Merge branch 'main' into method02
Browse files Browse the repository at this point in the history
Signed-off-by: Sandeep Kumawat <[email protected]>
  • Loading branch information
skumawat2025 committed May 23, 2024
2 parents c2fe637 + b3049fb commit 88e6e27
Show file tree
Hide file tree
Showing 84 changed files with 3,197 additions and 651 deletions.
4 changes: 2 additions & 2 deletions .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# CODEOWNERS manages notifications, not PR approvals
# For PR approvals see /.github/workflows/maintainer-approval.yml
# For PR approvals see /.github/workflows/maintainer-approval.yml

# Files have a single rule applied, the last match decides the owner
# If you would like to more specifically apply ownership, include existing owner in new sub fields
Expand All @@ -24,4 +24,4 @@

/.github/ @peternied

/MAINTAINERS.md @anasalkouz @andrross @Bukhtawar @CEHENKLE @dblock @dbwiddis @dreamer-89 @gbbafna @kotwanikunal @mch2 @msfroh @nknize @owaiskazi19 @peternied @reta @Rishikesh1159 @sachinpkale @saratvemulapalli @shwetathareja @sohami @tlfeng @VachaShah
/MAINTAINERS.md @anasalkouz @andrross @Bukhtawar @CEHENKLE @dblock @dbwiddis @dreamer-89 @gbbafna @jed326 @kotwanikunal @mch2 @msfroh @nknize @owaiskazi19 @peternied @reta @Rishikesh1159 @sachinpkale @saratvemulapalli @shwetathareja @sohami @tlfeng @VachaShah
26 changes: 25 additions & 1 deletion .github/workflows/gradle-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,31 @@ jobs:
- name: Setup environment variables (PR)
if: github.event_name == 'pull_request_target'
run: |
echo "event_name=pull_request_target" >> $GITHUB_ENV
echo "branch_name=$(jq --raw-output .pull_request.base.ref $GITHUB_EVENT_PATH)" >> $GITHUB_ENV
echo "pr_from_sha=$(jq --raw-output .pull_request.head.sha $GITHUB_EVENT_PATH)" >> $GITHUB_ENV
echo "pr_from_clone_url=$(jq --raw-output .pull_request.head.repo.clone_url $GITHUB_EVENT_PATH)" >> $GITHUB_ENV
echo "pr_to_clone_url=$(jq --raw-output .pull_request.base.repo.clone_url $GITHUB_EVENT_PATH)" >> $GITHUB_ENV
echo "pr_title=$(jq --raw-output .pull_request.title $GITHUB_EVENT_PATH)" >> $GITHUB_ENV
echo "pr_number=$(jq --raw-output .pull_request.number $GITHUB_EVENT_PATH)" >> $GITHUB_ENV
echo "pr_owner=$(jq --raw-output .pull_request.user.login $GITHUB_EVENT_PATH)" >> $GITHUB_ENV
echo "pr_or_commit_description=$(jq --ascii-output .pull_request.body $GITHUB_EVENT_PATH)" >> $GITHUB_ENV
echo "post_merge_action=false" >> $GITHUB_ENV
# to get the PR data that can be used for post merge actions
- uses: actions/github-script@v7
if: github.event_name == 'push'
id: get_pr_data
with:
github-token: ${{secrets.GITHUB_TOKEN}}
script: |
return (
await github.rest.repos.listPullRequestsAssociatedWithCommit({
commit_sha: context.sha,
owner: context.repo.owner,
repo: context.repo.repo,
})
).data[0];
- name: Setup environment variables (Push)
if: github.event_name == 'push'
Expand All @@ -43,11 +63,15 @@ jobs:
ref_id=$(git rev-parse HEAD)
branch_name=$(git rev-parse --abbrev-ref HEAD)
echo "branch_name=$branch_name" >> $GITHUB_ENV
echo "event_name=push" >> $GITHUB_ENV
echo "pr_from_sha=$ref_id" >> $GITHUB_ENV
echo "pr_from_clone_url=$repo_url" >> $GITHUB_ENV
echo "pr_to_clone_url=$repo_url" >> $GITHUB_ENV
echo "pr_title=Push trigger $branch_name $ref_id $repo_url" >> $GITHUB_ENV
echo "pr_number=Null" >> $GITHUB_ENV
echo "pr_owner=$(jq --raw-output '.commits[0].author.username' $GITHUB_EVENT_PATH)" >> $GITHUB_ENV
echo 'pr_number=${{ fromJson(steps.get_pr_data.outputs.result).number }}' >> $GITHUB_ENV
echo "pr_or_commit_description=$(jq --ascii-output .head_commit.message $GITHUB_EVENT_PATH)" >> $GITHUB_ENV
echo "post_merge_action=true" >> $GITHUB_ENV
- name: Checkout opensearch-build repo
uses: actions/checkout@v4
Expand Down
8 changes: 7 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,28 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),

## [Unreleased 2.x]
### Added
- Add latency metrics for instrumenting critical clusterManager code paths ([#12333](https://github.com/opensearch-project/OpenSearch/pull/12333))
- Add support for Azure Managed Identity in repository-azure ([#12423](https://github.com/opensearch-project/OpenSearch/issues/12423))
- Add useCompoundFile index setting ([#13478](https://github.com/opensearch-project/OpenSearch/pull/13478))
- Make outbound side of transport protocol dependent ([#13293](https://github.com/opensearch-project/OpenSearch/pull/13293))
- [Remote Store] Add dynamic cluster settings to set timeout for segments upload to Remote Store ([#13679](https://github.com/opensearch-project/OpenSearch/pull/13679))
- [Remote Store] Upload translog checkpoint as object metadata to translog.tlog([#13637](https://github.com/opensearch-project/OpenSearch/pull/13637))

### Dependencies
- Bump `com.github.spullara.mustache.java:compiler` from 0.9.10 to 0.9.13 ([#13329](https://github.com/opensearch-project/OpenSearch/pull/13329), [#13559](https://github.com/opensearch-project/OpenSearch/pull/13559))
- Bump `org.gradle.test-retry` from 1.5.8 to 1.5.9 ([#13442](https://github.com/opensearch-project/OpenSearch/pull/13442))
- Bump `org.apache.commons:commons-text` from 1.11.0 to 1.12.0 ([#13557](https://github.com/opensearch-project/OpenSearch/pull/13557))
- Bump `org.hdrhistogram:HdrHistogram` from 2.1.12 to 2.2.1 ([#13556](https://github.com/opensearch-project/OpenSearch/pull/13556))
- Bump `com.gradle.enterprise` from 3.17.2 to 3.17.3 ([#13641](https://github.com/opensearch-project/OpenSearch/pull/13641))
- Bump `com.gradle.enterprise` from 3.17.2 to 3.17.4 ([#13641](https://github.com/opensearch-project/OpenSearch/pull/13641), [#13753](https://github.com/opensearch-project/OpenSearch/pull/13753))
- Bump `org.apache.hadoop:hadoop-minicluster` from 3.3.6 to 3.4.0 ([#13642](https://github.com/opensearch-project/OpenSearch/pull/13642))
- Bump `mockito` from 5.11.0 to 5.12.0 ([#13665](https://github.com/opensearch-project/OpenSearch/pull/13665))
- Bump `com.google.code.gson:gson` from 2.10.1 to 2.11.0 ([#13752](https://github.com/opensearch-project/OpenSearch/pull/13752))
- Bump `ch.qos.logback:logback-core` from 1.5.3 to 1.5.6 ([#13756](https://github.com/opensearch-project/OpenSearch/pull/13756))

### Changed
- Add ability for Boolean and date field queries to run when only doc_values are enabled ([#11650](https://github.com/opensearch-project/OpenSearch/pull/11650))
- Refactor implementations of query phase searcher, allow QueryCollectorContext to have zero collectors ([#13481](https://github.com/opensearch-project/OpenSearch/pull/13481))
- Adds support to inject telemetry instances to plugins ([#13636](https://github.com/opensearch-project/OpenSearch/pull/13636))

### Deprecated

Expand All @@ -32,6 +37,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Fix negative RequestStats metric issue ([#13553](https://github.com/opensearch-project/OpenSearch/pull/13553))
- Fix get field mapping API returns 404 error in mixed cluster with multiple versions ([#13624](https://github.com/opensearch-project/OpenSearch/pull/13624))
- Allow clearing `remote_store.compatibility_mode` setting ([#13646](https://github.com/opensearch-project/OpenSearch/pull/13646))
- Fix ReplicaShardBatchAllocator to batch shards without duplicates ([#13710](https://github.com/opensearch-project/OpenSearch/pull/13710))

### Security

Expand Down
1 change: 1 addition & 0 deletions MAINTAINERS.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ This document contains a list of maintainers in this repo. See [opensearch-proje
| Dan Widdis | [dbwiddis](https://github.com/dbwiddis) | Amazon |
| Daniel "dB." Doubrovkine | [dblock](https://github.com/dblock) | Amazon |
| Gaurav Bafna | [gbbafna](https://github.com/gbbafna) | Amazon |
| Jay Deng | [jed326](https://github.com/jed326) | Amazon |
| Kunal Kotwani | [kotwanikunal](https://github.com/kotwanikunal) | Amazon |
| Marc Handalian | [mch2](https://github.com/mch2) | Amazon |
| Michael Froh | [msfroh](https://github.com/msfroh) | Amazon |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,54 +32,67 @@

package org.opensearch.ingest.attachment;

import org.apache.commons.codec.digest.DigestUtils;
import org.apache.lucene.tests.util.LuceneTestCase.SuppressFileSystems;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.tika.metadata.Metadata;
import org.opensearch.common.io.PathUtils;
import org.opensearch.common.xcontent.XContentHelper;
import org.opensearch.common.xcontent.json.JsonXContent;
import org.opensearch.test.OpenSearchTestCase;

import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Map;

/**
* Evil test-coverage cheat, we parse a bunch of docs from tika
* so that we have a nice grab-bag variety, and assert some content
* comes back and no exception.
* Parse sample tika documents and assert the contents has not changed according to previously recorded checksums.
* Uncaught changes to tika parsing could potentially pose bwc issues.
* Note: In some cases tika will access a user's locale to inform the parsing of a file.
* The checksums of these files are left empty, and we only validate that parsed content is not null.
*/
@SuppressFileSystems("ExtrasFS") // don't try to parse extraN
public class TikaDocTests extends OpenSearchTestCase {

/** some test files from tika test suite, zipped up */
/** some test files from the apache tika unit test suite with accompanying sha1 checksums */
static final String TIKA_FILES = "/org/opensearch/ingest/attachment/test/tika-files/";
static final String TIKA_CHECKSUMS = "/org/opensearch/ingest/attachment/test/.checksums";

public void testFiles() throws Exception {
Path tmp = createTempDir();
logger.debug("unzipping all tika sample files");
try (DirectoryStream<Path> stream = Files.newDirectoryStream(PathUtils.get(getClass().getResource(TIKA_FILES).toURI()))) {
for (Path doc : stream) {
String filename = doc.getFileName().toString();
TestUtil.unzip(getClass().getResourceAsStream(TIKA_FILES + filename), tmp);
}
}
public void testParseSamples() throws Exception {
String checksumJson = Files.readString(PathUtils.get(getClass().getResource(TIKA_CHECKSUMS).toURI()));
Map<String, Object> checksums = XContentHelper.convertToMap(JsonXContent.jsonXContent, checksumJson, false);
DirectoryStream<Path> stream = Files.newDirectoryStream(unzipToTemp(TIKA_FILES));

try (DirectoryStream<Path> stream = Files.newDirectoryStream(tmp)) {
for (Path doc : stream) {
logger.debug("parsing: {}", doc);
assertParseable(doc);
for (Path doc : stream) {
String parsedContent = tryParse(doc);
assertNotNull(parsedContent);
assertFalse(parsedContent.isEmpty());

String check = checksums.get(doc.getFileName().toString()).toString();
if (!check.isEmpty()) {
assertEquals(check, DigestUtils.sha1Hex(parsedContent));
}
}

stream.close();
}

void assertParseable(Path fileName) throws Exception {
try {
byte bytes[] = Files.readAllBytes(fileName);
String parsedContent = TikaImpl.parse(bytes, new Metadata(), -1);
assertNotNull(parsedContent);
assertFalse(parsedContent.isEmpty());
logger.debug("extracted content: {}", parsedContent);
} catch (Exception e) {
throw new RuntimeException("parsing of filename: " + fileName.getFileName() + " failed", e);
private Path unzipToTemp(String zipDir) throws Exception {
Path tmp = createTempDir();
DirectoryStream<Path> stream = Files.newDirectoryStream(PathUtils.get(getClass().getResource(zipDir).toURI()));

for (Path doc : stream) {
String filename = doc.getFileName().toString();
TestUtil.unzip(getClass().getResourceAsStream(zipDir + filename), tmp);
}

stream.close();
return tmp;
}

private String tryParse(Path doc) throws Exception {
byte bytes[] = Files.readAllBytes(doc);
return TikaImpl.parse(bytes, new Metadata(), -1);
}
}
Loading

0 comments on commit 88e6e27

Please sign in to comment.