-
Notifications
You must be signed in to change notification settings - Fork 21
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[#704] Add corruption recovery script
Problem: Currently the only way to fix the problem with node corruption is to manually delete the directory and load snapshot anew. Solution: Add script to do all of this automatically
- Loading branch information
1 parent
a05c86d
commit a78f852
Showing
5 changed files
with
335 additions
and
236 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
# SPDX-FileCopyrightText: 2023 Oxhead Alpha | ||
# SPDX-License-Identifier: LicenseRef-MIT-OA | ||
|
||
import os | ||
import urllib | ||
import json | ||
import shutil | ||
|
||
from tezos_baking.util import * | ||
from tezos_baking.tezos_setup_wizard import default_providers | ||
|
||
|
||
def check_node_corruption(): | ||
network = os.environ.get('NETWORK', None) | ||
if not network: | ||
print(""" | ||
Could not find network name in environment. | ||
Can't check node for corruption | ||
""") | ||
return False | ||
logs = get_proc_output(f"journalctl -u tezos-node-{network}.service") | ||
if 'Inconsistent_store' in logs.stdout: | ||
return True | ||
return False | ||
|
||
def restore_from_corruption(): | ||
node_data_directory = os.environ['TEZOS_NODE_DIR'] | ||
try: | ||
shutil.rmtree(node_data_directory) | ||
except Exception as e: | ||
print("Could not delete node data dir. Manual restoration is required") | ||
|
||
history_mode = None | ||
with open(f"{os.environ['TEZOS_NODE_DIR']}/config.json") as f: | ||
history_mode = json.load(f)["history_mode"] | ||
|
||
snapshot_array = None | ||
config = { | ||
"network": os.environ["NETWORK"], | ||
"history_mode": history_mode | ||
|
||
} | ||
|
||
snapshot_array = None | ||
for json_url in default_providers: | ||
with urllib.request.urlopen(json_url) as url: | ||
snapshot_array = json.load(url)["data"] | ||
if snapshot_array is not None: | ||
break | ||
|
||
snapshot_array.sort(reverse=True, key=lambda x: x["block_height"]) | ||
|
||
snapshot_meta = extract_relevant_snapshot(snapshot_array, config) | ||
snapshot_path = fetch_snapshot(snapshot_meta) | ||
|
||
reinstallation_result = get_proc_output(f""" | ||
octez-node snapshot import {snapshot_path} | ||
""") | ||
|
||
remove_tmp_snapshot = get_proc_output(f""" | ||
rm -rf {snapshot_path} | ||
""") | ||
|
||
if not reinstallation_result.returncode: | ||
print("Recovery from corruption was successfull") | ||
else: | ||
print("Recovery from corruption failed. Manual restoration is required") | ||
|
||
|
||
def main(): | ||
is_corrupted = check_node_corruption() | ||
is_baking_installed = not get_proc_output("which octez-baking").returncode | ||
should_restore = os.environ['RESTORE_FROM_CORRUPTION'] | ||
if not is_corrupted: | ||
return | ||
if not is_baking_installed: | ||
print(""" | ||
Node has been corrupted. | ||
It order to restore it, you need `octez-baking` to be installed | ||
""") | ||
return | ||
if not should_restore: | ||
print(""" | ||
Node has been corrupted. | ||
Automatic restoration is disabled. | ||
Manual restoration is required. | ||
""") | ||
return | ||
restore_from_corruption() | ||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.