diff --git a/CHANGELOG.md b/CHANGELOG.md index 2e5bf3f5..65a700e0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -190,6 +190,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - Improved CI performance of integration with unreleased OpenSearch ([#318](https://github.com/opensearch-project/opensearch-py/pull/318)) - Added k-NN guide and samples ([#449](https://github.com/opensearch-project/opensearch-py/pull/449)) - Added the ability to run tests matching a pattern to `.ci/run-tests` ([#454](https://github.com/opensearch-project/opensearch-py/pull/454)) +- Added a guide for taking snapshots ([#486](https://github.com/opensearch-project/opensearch-py/pull/429)) ### Changed - Moved security from `plugins` to `clients` ([#442](https://github.com/opensearch-project/opensearch-py/pull/442)) - Updated Security Client APIs ([#450](https://github.com/opensearch-project/opensearch-py/pull/450)) diff --git a/USER_GUIDE.md b/USER_GUIDE.md index 78aa6656..5f38b535 100644 --- a/USER_GUIDE.md +++ b/USER_GUIDE.md @@ -153,6 +153,7 @@ print(response) - [Search](guides/search.md) - [Point in Time](guides/point_in_time.md) - [Using a Proxy](guides/proxy.md) +- [Working with Snapshots](guides/snapshot.md) - [Index Templates](guides/index_template.md) - [Advanced Index Actions](guides/advanced_index_actions.md) - [Making Raw JSON REST Requests](guides/json.md) diff --git a/guides/snapshot.md b/guides/snapshot.md new file mode 100644 index 00000000..0630fc94 --- /dev/null +++ b/guides/snapshot.md @@ -0,0 +1,145 @@ +# Table of Contents +- [Snapshot Actions](#snapshot-actions) + - [Setup](#setup) + - [API Actions](#api-actions) + - [Create Snapshot Repository](#create-snapshot-repository) + - [Create Snapshot](#create-snapshot) + - [Verify Snapshot Repository](#verify-snapshot-repository) + - [Delete Snapshot](#delete-snapshot) + - [Restore Snapshot](#restore-snapshot) + - [Get Snapshot Status](#get-snapshot-status) + - [Clone Snapshot](#clone-snapshot) + - [Get Snapshot](#get-snapshot) + - [Get Repository](#get-repository) + - [Repository Analyze](#repository-analyze) + - [Cleanup](#cleanup) + +# Snapshot Actions +In this guide, we will look at some snapshot actions that allow you to manage and work with snapshots of your indices. + +A complete working sample for this guide can be found in [samples/snapshot](../samples/snapshot). + +## Setup +Let's create a client instance, and an index named `movies`: +```python +from opensearchpy import OpenSearch + +host = 'localhost' +port = 9200 +auth = ('admin', 'admin') # For testing only. Don't store credentials in code. + +client = OpenSearch( + hosts = [{'host': host, 'port': port}], + http_auth = auth, + use_ssl = True, + verify_certs = False, + ssl_show_warn = False +) + +print(client.info()) # Check server info and make sure the client is connected +client.indices.create(index='movies') +``` +## API Actions +### Create Snapshot Repository +Before taking a snapshot, you need to create a snapshot repository to store the snapshots. You can use the `create_repository` API action for this purpose. The following example creates a snapshot repository named `my_repository`: + +```python +repo_body = { + "type": "fs", # Replace 'fs' with the appropriate repository type + "settings": { + "location": "/path/to/repo", + } +} + +# Create the snapshot repository and capture the response +response = client.snapshot.create_repository(repository='my_repository', body=repo_body) + +# Print the response to see the result +print(response) +``` + +### Create Snapshot +To create a snapshot of an index, you can use the `create` method from the `snapshot` API. The following example creates a snapshot named `my_snapshot` for the movies index: + +```python +client.snapshot.create(repository='my_repository', snapshot='my_snapshot', body={"indices": "movies"}) +``` + +### Verify Snapshot Repository +The `verify_repository` API action allows you to verify a snapshot repository. Verifying a repository ensures that it is accessible and operational, but it does not validate the integrity of the snapshots stored within the repository. The following example verifies `my_repository`: + +```python +response = client.snapshot.verify_repository(repository='my_repository') + +# Print the HTTP status code +print("HTTP Status Code:", response.status_code) + +# Print the response content +print("Response Content:", response.content) +``` + +### Delete Snapshot +To delete a specific snapshot, use the `delete` API action: + +```python +client.snapshot.delete(repository='my_repository', snapshot='my_snapshot') +``` +### Restore Snapshot +To restore a snapshot and recreate the indices, mappings, and data, you can use the `restore` method. The following example restores the `my_snapshot` snapshot: + +```python +response = client.snapshot.restore(repository='my_repository', snapshot='my_snapshot') +``` + +### Get Snapshot Status +To check the status of a snapshot, you can use the `status` method. + +```python +response = client.snapshot.status(repository='my_repository', snapshot='my_snapshot') +``` + +### Clone Snapshot +You can clone an existing snapshot to create a new snapshot with the same contents. The `clone` operation allows you to create multiple copies of a snapshot, which can be useful for backup retention or creating snapshots for different purposes. The following example clones a snapshot named `my_snapshot` to create a new snapshot named `my_snapshot_clone`: + +```python +client.snapshot.clone( + repository='my_repository', + snapshot='my_snapshot', + target_snapshot='my_snapshot_clone' +) +``` +## Get Snapshot +To retrieve information about a specific snapshot, you can use the `get` API action. It provides metadata such as the snapshot's status, indices included in the snapshot, and the timestamp when the snapshot was taken. The following example retrieves information about the `my_snapshot`: + +```python +response = client.snapshot.get( + repository='my_repository', + snapshot='my_snapshot' +) + +# Print the response to see the result +print(response) +``` + +## Get Repository +To retrieve information about a snapshot repository, you can use the `get_repository` API action. It provides details about the configured repository, including its type and settings. The following example retrieves information about the `my_repository`: + +```python +response = client.snapshot.get_repository(repository='my_repository') +``` + +## Repository Analyze +The `repository_analyze` API action allows you to analyze a snapshot repository for correctness and performance. It checks for any inconsistencies or corruption in the repository. The following example performs a repository analysis on `my_repository`: + +```python +response = client.snapshot.repository_analyze(repository='my_repository') +``` + +## Cleanup + +Finally, let's delete the `movies` index and clean up all the snapshots and the repository: +```python +client.indices.delete(index='movies') +client.snapshot.delete(repository='my_repository', snapshot='my_snapshot') +client.snapshot.delete_repository(repository='my_repository') +``` \ No newline at end of file diff --git a/samples/snapshot/Dockerfile b/samples/snapshot/Dockerfile new file mode 100644 index 00000000..b2f22e3b --- /dev/null +++ b/samples/snapshot/Dockerfile @@ -0,0 +1,9 @@ +FROM opensearchproject/opensearch:2.11.0 + +ARG OPENSEARCH_HOME=/usr/share/opensearch +ARG UID=1000 +ARG GID=1000 + +RUN echo 'path.repo: ["/usr/share/opensearch/backups"]' >> $OPENSEARCH_HOME/config/opensearch.yml +RUN mkdir -p $OPENSEARCH_HOME/backups +RUN chown -Rv $UID:$GID $OPENSEARCH_HOME/backups diff --git a/samples/snapshot/README.md b/samples/snapshot/README.md new file mode 100644 index 00000000..ff947425 --- /dev/null +++ b/samples/snapshot/README.md @@ -0,0 +1,8 @@ +Run this sample as follows. + +``` +cd samples +docker run --rm -p 9200:9200 -p 9600:9600 -e "discovery.type=single-node" -it $(docker build -q snapshot ) +poetry install +poetry run python snapshot/snapshot_sample.py +``` diff --git a/samples/snapshot/snapshot_sample.py b/samples/snapshot/snapshot_sample.py new file mode 100644 index 00000000..ac512ed0 --- /dev/null +++ b/samples/snapshot/snapshot_sample.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python + +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# +# Modifications Copyright OpenSearch Contributors. See +# GitHub history for details. + +import os +import tempfile + +from opensearchpy import OpenSearch + +# connect to OpenSearch + +HOST = "localhost" +PORT = 9200 +auth = ( + "admin", + os.getenv("OPENSEARCH_PASSWORD", "admin"), +) # For testing only. Don't store credentials in code. + +client = OpenSearch( + hosts=[{"host": HOST, "port": PORT}], + http_auth=auth, + use_ssl=True, + verify_certs=False, + ssl_show_warn=False, +) + +# Create an index + +INDEX_NAME = "test-snapshot" +client.indices.create(index=INDEX_NAME) + +# Create a temporary directory for the snapshot repository +temp_repo = tempfile.TemporaryDirectory() +TEMP_REPO_LOCATION = "/usr/share/opensearch/backups" + +# Define the repository body with the temporary location +repo_body = { + "type": "fs", # Replace 'fs' with the appropriate repository type + "settings": { + "location": TEMP_REPO_LOCATION, # Replace with the desired repository location + }, +} + +REPOSITORY_NAME = "my_repository" +response = client.snapshot.create_repository(repository=REPOSITORY_NAME, body=repo_body) + +print(response) + +# Create a snapshot + +SNAPSHOT_NAME = "my_snapshot" +response = client.snapshot.create( + repository=REPOSITORY_NAME, snapshot=SNAPSHOT_NAME, body={"indices": INDEX_NAME} +) + +print(response) + +# Get Snapshot Information + +snapshot_info = client.snapshot.get(repository=REPOSITORY_NAME, snapshot=SNAPSHOT_NAME) + +print(snapshot_info) + +# Clean up - Delete Snapshot and Repository + +client.snapshot.delete(repository=REPOSITORY_NAME, snapshot=SNAPSHOT_NAME) +client.snapshot.delete_repository(repository=REPOSITORY_NAME) + +# Clean up - Delete Index + +client.indices.delete(index=INDEX_NAME)