diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 0ca6c823..59990902 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -9,9 +9,14 @@ jobs: strategy: fail-fast: false matrix: - opensearch_version: [ '1.0.1', '1.1.0', '1.2.4', '1.3.7', '2.0.1', '2.1.0', '2.2.1', '2.3.0', '2.4.0', '2.5.0', '2.6.0', '2.7.0', '2.8.0', '2.9.0', '2.10.0', '2.11.0' ] + opensearch_version: [ '1.0.1', '1.1.0', '1.2.4', '1.3.7', '2.0.1', '2.1.0', '2.2.1', '2.3.0', '2.4.0', '2.5.0', '2.6.0', '2.7.0', '2.8.0', '2.9.0', '2.10.0', '2.11.1' ] secured: [ "true", "false" ] - + exclude: + # https://github.com/opensearch-project/opensearch-py/issues/612 + - opensearch_version: 2.0.1 + secured: "true" + - opensearch_version: 2.1.0 + secured: "true" steps: - name: Checkout uses: actions/checkout@v3 diff --git a/CHANGELOG.md b/CHANGELOG.md index fe4d309e..092e0e3f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) ### Removed - Removed unnecessary `# -*- coding: utf-8 -*-` headers from .py files ([#615](https://github.com/opensearch-project/opensearch-py/pull/615), [#617](https://github.com/opensearch-project/opensearch-py/pull/617)) ### Fixed +- Fix KeyError when scroll return no hits ([#616](https://github.com/opensearch-project/opensearch-py/pull/616)) ### Security ## [2.4.2] diff --git a/opensearchpy/_async/helpers/actions.py b/opensearchpy/_async/helpers/actions.py index 1a013d27..03f88043 100644 --- a/opensearchpy/_async/helpers/actions.py +++ b/opensearchpy/_async/helpers/actions.py @@ -390,14 +390,17 @@ async def async_scan( scroll_id = resp.get("_scroll_id") try: - while scroll_id and resp["hits"]["hits"]: - for hit in resp["hits"]["hits"]: + while scroll_id and resp.get("hits", {}).get("hits"): + for hit in resp.get("hits", {}).get("hits", []): yield hit - # Default to 0 if the value isn't included in the response - shards_successful = resp["_shards"].get("successful", 0) - shards_skipped = resp["_shards"].get("skipped", 0) - shards_total = resp["_shards"].get("total", 0) + _shards = resp.get("_shards") + + if _shards: + # Default to 0 if the value isn't included in the response + shards_successful = _shards.get("successful", 0) + shards_skipped = _shards.get("skipped", 0) + shards_total = _shards.get("total", 0) # check if we have any errors if (shards_successful + shards_skipped) < shards_total: diff --git a/opensearchpy/helpers/actions.py b/opensearchpy/helpers/actions.py index c6c03d7e..c7f24139 100644 --- a/opensearchpy/helpers/actions.py +++ b/opensearchpy/helpers/actions.py @@ -586,14 +586,17 @@ def scan( scroll_id = resp.get("_scroll_id") try: - while scroll_id and resp["hits"]["hits"]: - for hit in resp["hits"]["hits"]: + while scroll_id and resp.get("hits", {}).get("hits"): + for hit in resp.get("hits", {}).get("hits", []): yield hit - # Default to 0 if the value isn't included in the response - shards_successful = resp["_shards"].get("successful", 0) - shards_skipped = resp["_shards"].get("skipped", 0) - shards_total = resp["_shards"].get("total", 0) + _shards = resp.get("_shards") + + if _shards: + # Default to 0 if the value isn't included in the response + shards_successful = _shards.get("successful", 0) + shards_skipped = _shards.get("skipped", 0) + shards_total = _shards.get("total", 0) # check if we have any errors if (shards_successful + shards_skipped) < shards_total: @@ -614,6 +617,7 @@ def scan( shards_total, ), ) + resp = client.scroll( body={"scroll_id": scroll_id, "scroll": scroll}, **scroll_kwargs ) diff --git a/test_opensearchpy/test_async/test_server/test_helpers/test_actions.py b/test_opensearchpy/test_async/test_server/test_helpers/test_actions.py index 158df715..c6c54df0 100644 --- a/test_opensearchpy/test_async/test_server/test_helpers/test_actions.py +++ b/test_opensearchpy/test_async/test_server/test_helpers/test_actions.py @@ -776,6 +776,34 @@ async def test_scan_auth_kwargs_favor_scroll_kwargs_option( } assert async_client.scroll.call_args[1]["sort"] == "asc" + async def test_async_scan_with_missing_hits_key( + self, async_client: Any, scan_teardown: Any + ) -> None: + with patch.object( + async_client, + "search", + return_value=MockResponse({"_scroll_id": "dummy_scroll_id", "_shards": {}}), + ): + with patch.object( + async_client, + "scroll", + return_value=MockResponse( + {"_scroll_id": "dummy_scroll_id", "_shards": {}} + ), + ): + with patch.object( + async_client, "clear_scroll", return_value=MockResponse({}) + ): + async_scan_result = [ + hit + async for hit in actions.async_scan( + async_client, query={"query": {"match_all": {}}} + ) + ] + assert ( + async_scan_result == [] + ), "Expected empty results when 'hits' key is missing" + @pytest.fixture(scope="function") # type: ignore async def reindex_setup(async_client: Any) -> Any: diff --git a/test_opensearchpy/test_helpers/test_actions.py b/test_opensearchpy/test_helpers/test_actions.py index e8bd3396..c43c7322 100644 --- a/test_opensearchpy/test_helpers/test_actions.py +++ b/test_opensearchpy/test_helpers/test_actions.py @@ -28,6 +28,7 @@ import threading import time from typing import Any +from unittest.mock import Mock import mock import pytest @@ -270,3 +271,24 @@ def test_string_actions_are_marked_as_simple_inserts(self) -> None: self.assertEqual( ('{"index":{}}', "whatever"), helpers.expand_action("whatever") ) + + +class TestScanFunction(TestCase): + @mock.patch("opensearchpy.OpenSearch.clear_scroll") + @mock.patch("opensearchpy.OpenSearch.scroll") + @mock.patch("opensearchpy.OpenSearch.search") + def test_scan_with_missing_hits_key( + self, mock_search: Mock, mock_scroll: Mock, mock_clear_scroll: Mock + ) -> None: + # Simulate a response where the 'hits' key is missing + mock_search.return_value = {"_scroll_id": "dummy_scroll_id", "_shards": {}} + + mock_scroll.side_effect = [{"_scroll_id": "dummy_scroll_id", "_shards": {}}] + + mock_clear_scroll.return_value = None + + client = OpenSearch() + + # The test should pass without raising a KeyError + scan_result = list(helpers.scan(client, query={"query": {"match_all": {}}})) + assert scan_result == [], "Expected empty results when 'hits' key is missing"