From 99f6ae1830b53f8fdfacf01bf70115736adeac72 Mon Sep 17 00:00:00 2001 From: Nicolas Ochem Date: Wed, 24 Jul 2024 09:16:35 -0700 Subject: [PATCH] Remove snapshot engine (#15) * remove snapshot engine CI is broken, and the code is stale * fix util container build --- charts/snapshotEngine/Chart.yaml | 18 - charts/snapshotEngine/README.md | 401 ------------ .../snapshotEngine/templates/configmap.yaml | 23 - charts/snapshotEngine/templates/rbac.yaml | 55 -- .../templates/snapshot-scheduler.yaml | 45 -- charts/snapshotEngine/values.yaml | 103 ---- docs/04-other-helm-charts.md | 1 - snapshotEngine/Dockerfile | 65 -- snapshotEngine/createVolumeSnapshot.yaml | 11 - snapshotEngine/entrypoint.sh | 23 - snapshotEngine/getAllSnapshotMetadata.py | 61 -- snapshotEngine/getLatestSnapshotMetadata.py | 88 --- snapshotEngine/mainJob.yaml | 258 -------- snapshotEngine/scratchVolume.yaml | 12 - snapshotEngine/snapshot-maker.sh | 382 ------------ snapshotEngine/snapshot-scheduler.sh | 70 --- snapshotEngine/snapshot-website-base/Gemfile | 13 - .../snapshot-website-base/Gemfile.lock | 71 --- snapshotEngine/snapshotMakerJob.yaml | 25 - snapshotEngine/volumeFromSnap.yaml | 16 - snapshotEngine/zip-and-upload.sh | 575 ------------------ utils/Dockerfile | 8 +- 22 files changed, 4 insertions(+), 2320 deletions(-) delete mode 100755 charts/snapshotEngine/Chart.yaml delete mode 100755 charts/snapshotEngine/README.md delete mode 100644 charts/snapshotEngine/templates/configmap.yaml delete mode 100644 charts/snapshotEngine/templates/rbac.yaml delete mode 100755 charts/snapshotEngine/templates/snapshot-scheduler.yaml delete mode 100755 charts/snapshotEngine/values.yaml delete mode 100644 snapshotEngine/Dockerfile delete mode 100644 snapshotEngine/createVolumeSnapshot.yaml delete mode 100755 snapshotEngine/entrypoint.sh delete mode 100644 snapshotEngine/getAllSnapshotMetadata.py delete mode 100644 snapshotEngine/getLatestSnapshotMetadata.py delete mode 100644 snapshotEngine/mainJob.yaml delete mode 100644 snapshotEngine/scratchVolume.yaml delete mode 100755 snapshotEngine/snapshot-maker.sh delete mode 100755 snapshotEngine/snapshot-scheduler.sh delete mode 100644 snapshotEngine/snapshot-website-base/Gemfile delete mode 100644 snapshotEngine/snapshot-website-base/Gemfile.lock delete mode 100644 snapshotEngine/snapshotMakerJob.yaml delete mode 100644 snapshotEngine/volumeFromSnap.yaml delete mode 100755 snapshotEngine/zip-and-upload.sh diff --git a/charts/snapshotEngine/Chart.yaml b/charts/snapshotEngine/Chart.yaml deleted file mode 100755 index a326dff..0000000 --- a/charts/snapshotEngine/Chart.yaml +++ /dev/null @@ -1,18 +0,0 @@ -apiVersion: v2 -name: snapshotEngine -description: Produces filesystem artifacts (tarballs and snapshots) that Tezos Nodes can use to sync quickly. - -# A chart can be either an 'application' or a 'library' chart. -# -# Application charts are a collection of templates that can be packaged into versioned archives -# to be deployed. -# -# Library charts provide useful utilities or functions for the chart developer. They're included as -# a dependency of application charts to inject those utilities and functions into the rendering -# pipeline. Library charts do not define any templates and therefore cannot be deployed. -type: application - -# This is the chart version. This version number should be incremented each time you make changes -# to the chart and its templates, including the app version. -# Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.0.0 diff --git a/charts/snapshotEngine/README.md b/charts/snapshotEngine/README.md deleted file mode 100755 index e1a8981..0000000 --- a/charts/snapshotEngine/README.md +++ /dev/null @@ -1,401 +0,0 @@ -# Snapshot Engine - -A Helm chart for creating Tezos snapshots and tarballs for faster node sync, all in kubernetes, and deploy them to a bucket with a static website. - -Check out [xtz-shots.io](xtz-shots.io) for an example. - -- [Snapshot Engine](#snapshot-engine) - - [What is it?](#what-is-it) - - [Requirements](#requirements) - - [How To](#how-to) - - [Values](#values) - - [Produced files](#produced-files) - - [LZ4](#lz4) - - [JSON](#json) - - [Redirects](#redirects) - - [Components](#components) - - [Snapshot Warmer Deployment](#snapshot-warmer-deployment) - - [Snapshot Scheduler Deployment](#snapshot-scheduler-deployment) - - [Jobs](#jobs) - - [Snapshot Maker Job](#snapshot-maker-job) - - [Zip and Upload Job](#zip-and-upload-job) - - [Containers](#containers) - - [Docker & ECR](#docker--ecr) - - [Kubernetes Containers](#kubernetes-containers) - - [init-tezos-filesystem Container](#init-tezos-filesystem-container) - - [create-tezos-rolling-snapshot Container](#create-tezos-rolling-snapshot-container) - - [zip-and-upload Container](#zip-and-upload-container) - -## What is it? - -The Snapshot Engine is a Helm Chart to be deployed on a Kubernetes Cluster. It will deploy snapshottable Tezos nodes [tezos-k8s](https://github.com/tacoinfra/tezos-k8s) and produce Tezos `.rolling` snapshot files as well as a new archive and rolling finalized filesystem tarballs in LZ4 format for fast Tezos node syncing. - -## Requirements - -1. AWS EKS Cluster* -4. S3 Bucket* -6. IAM Role* with a Trust Policy scoped to the Kubernetes Service Account created by this Helm chart. -7. [OIDC Provider](https://docs.aws.amazon.com/eks/latest/userguide/enable-iam-roles-for-service-accounts.html)* -8. [Amazon EBS CSI Driver](https://github.com/kubernetes-sigs/aws-ebs-csi-driver)* -9. [Kubernetes VolumeSnapshot CRDs, and a new Storage Class](https://aws.amazon.com/blogs/containers/using-ebs-snapshots-for-persistent-storage-with-your-eks-cluster/) - -**We run our Tezos nodes on EKS. It may be possible to deploy the Snapshot Engine on other Kubernetes Clusters at this time, but we have not tested these options.* - -**We are hoping to make the Snapshot Engine cloud-agnostic, but for now AWS is required.* - -## How To - -1. Create an S3 Bucket. - - :warning: If you want to make it available over the internet, you will need to make it a [Public Bucket](https://aws.amazon.com/premiumsupport/knowledge-center/read-access-objects-s3-bucket/) and with the following Bucket Policy. - - Replace `BUCKET_NAME` with the name of your new S3 Bucket. - - :warning: Please evaluate in accordance with your own security policy. This will open up this bucket to the internet and allow anyone to download items from it and **you will incur AWS charges**. - - ```json - { - "Version": "2012-10-17", - "Statement": [ - { - "Sid": "PublicReadGetObject", - "Effect": "Allow", - "Principal": "*", - "Action": "s3:GetObject", - "Resource": "arn:aws:s3:::/*" - } - ] - } - ``` - -2. Create an IAM Role with the following statements. - - Replace `` with the ARN of your new S3 Bucket. - - :warning: Pay close attention to the seemlingly redundant final `Resource` area. - - `/` and `/*` provide permission to the root and contents of the S3 Bucket respectively. - - ```json - { - "Version": "2012-10-17", - "Statement": [ - { - "Action": ["ec2:CreateSnapshot"], - "Effect": "Allow", - "Resource": "*" - }, - { - "Action": ["ec2:DescribeSnapshots"], - "Effect": "Allow", - "Resource": "*" - }, - { - "Action": ["s3:*"], - "Effect": "Allow", - "Resource": [ - "ARN_OF_S3_BUCKET", - "ARN_OF_S3_BUCKET/*" - ] - } - ] - } - ``` - -3. Scope this new IAM role with a Trust Policy with the following content: - -:warning: You will need to update `SERVICE_ACCOUNT_NAMESPACE` with the name of Kubernetes namespace you will like your snapshottable Tezos nodes and Snapshot Engine chart to. - -```json -{ - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Allow", - "Principal": { - "Federated": "arn:aws:iam::ACCOUNT_ID:oidc-provider/OIDC_PROVIDER" - }, - "Action": "sts:AssumeRoleWithWebIdentity", - "Condition": { - "StringEquals": { - "OIDC_PROVIDER:sub": "system:serviceaccount:SERVICE_ACCOUNT_NAMESPACE:snapshot-engine-sa" - } - } - } - ] -} -``` - -5. Add our Helm repository. - -```bash -helm repo add tacoinfra https://tacoinfra.github.io/tezos-helm-charts/ -``` - -6. Deploy the chart feeding in the ARN of the IAM role you created above inline, or as as value in a values.yaml file. - -```bash -helm install snapshotEngine \ ---set iam_role_arn="IAM_ROLE_ARN" -``` - -OR - -```bash -cat << EOF > values.yaml -iam_role_arn: "IAM_ROLE_ARN" -EOF -helm install snapshotEngine -f values.yaml -``` - -7. Depending on the chain size (mainnet more time, testnet less time) you should have `LZ4` tarballs, and if you are deploying to a rolling node Tezos `.rolling` snapshots as well in your S3 bucket. - -:warning: Testnet artifacts may appear in as soon as 20-30 minutes or less depending on the size of the chain. Rolling mainnet artifacts will take a few hours, and mainnet archive tarballs could take up to 24 hours. - -```bash -aws s3 ls s3://mainnet.xtz-shots.io - PRE assets/ -2022-04-10 19:40:33 0 archive-tarball -2022-04-10 19:40:35 0 archive-tarball-metadata -2022-04-12 15:23:37 405077 base.json -2022-04-12 15:23:57 518 feed.xml -2022-04-12 15:23:57 11814 index.html -2022-04-04 21:13:08 3939264512 mainnet-2253544.rolling -2022-04-04 21:15:18 482 mainnet-2253544.rolling.json -2022-04-12 11:22:32 3744214343 tezos-mainnet-rolling-tarball-2274806.lz4 -2022-04-12 11:23:51 493 tezos-mainnet-rolling-tarball-2274806.lz4.json -2022-04-12 15:23:39 0 rolling -2022-04-11 12:51:53 0 rolling-metadata -2022-04-12 15:21:52 0 rolling-tarball -2022-04-12 15:21:53 0 rolling-tarball-metadata -2022-04-05 11:45:06 497 tezos-mainnet-archive-tarball-2252528.lz4.json -2022-04-05 12:16:13 353204307636 tezos-mainnet-archive-tarball-2255459.lz4 -``` - -## Values - -All parameters accepted by the chart are listed in [`values.yaml`](values.yaml), with explanatory comments. - -## Produced files - -### LZ4 - -These are tarballs of the `/var/tezos/node` directory. They are validated for block finalization, zipped, and uploaded to your S3 bucket. - -### JSON - -These are metadata files containing information about the uploaded artifact. Every artifact has its own metadata file, as well as a `base.json` containing a list of all artifacts created. - -### Redirects - -There are 6 - 0 byte files that are uploaded as redirects. These files are updated in S3 to redirect to the latest artifact for each. - -* rolling >> latest `.rolling` Tezos **rolling** snapshot file. -* rolling-tarball >> latest **rolling** `.lz4` tarball -* archive-tarball >> latest **archive** `.lz4` tarball -* rolling-metadata >> latest `.rolling.json` metadata file -* rolling-tarball-metadata >> latest **rolling** `.lz4.json` metadata file -* archive-tarball metadata >> latest **archive** `.lz4.json` metadata file - -## Components - -For 1 Kubernetes Namespace this Helm Chart creates - - -- 2 Kubernetes **Deployment** per history mode. (4 Total) -- Kubernetes **Role** -- Kubernetes **Rolebinding** -- Kubernetes **Service** Account -- Kubernetes **ClusterRoleBinding** -- Kubernetes **Configmap** - -### Snapshot Warmer Deployment - -This Kubernetes Deployment runs the `snapshotEngine` container located in the root of the `tezos-k8s` repository. - -The entrypoint is overridden and the `snapshot-warmer/scripts/snapshotwarmer.sh` script is provided as the container entrypoint. - -This script runs indefinitely and performs the following steps - - -1. Removes unlabeled VolumeSnapshots. VolumeSnapshots need to be labeled with their respective `history_mode` in order for them to be used later with the snapshot engine system. -2. Maintains 4 snapshots of a particular `history_mode` label. There isn't any particular reason for this, other than to keep the list of snapshots concise. "Maintains" meaning deletes the oldest snapshots once the count is over 4. -3. If there are not any snapshots in progress, then a new one is triggered named with the current time and date. -4. It waits until the snapshot is ready to use. - -We create only one snapshot at a time as having more than one in-progress slows down the snapshot process altogether. - -### Snapshot Scheduler Deployment - -A Kubernetes Deployment called the **Snapshot Scheduler** runs indefinitely triggering a new Kubernetes Job called **Snapshot Maker**. - -Snapshot Scheduler waits until the Snapshot Maker Job is gone to schedule a new job. This way there are snapshots constantly being created instead of running on a schedule. - -### Jobs - -Overview of Jobs triggered by the Snapshot Scheduler workflow. - -#### Snapshot Maker Job - -Triggered by Snapshot Scheduler Kubernetes Deployment. - -Steps this Job performs - - -1. Waits until targeted Tezos Node is ready and healthy in Kubernetes. -2. Deletes zip-and-upload job if it exists. This cleans up errored jobs and completed jobs. -3. Deletes rolling tarball restore PVC. -4. Deletes snapshot cache PVC. -5. Deletes snapshot restore PVC. -6. Waits if a snapshot is currently being taken. -7. Uses latest completed snapshot to restore to new snapshot restore PVC. -8. Creates snapshot cache volume, where files go that we don't want to be included in artifacts. -9. Creates restore volume to match size of snapshot plus 20%. -10. Triggers Zip and Upload job that creates artifacts. -11. Waits until Zip and Upload job is finished, or exits upon error. -12. Once Zip and Upload is finished the snapshot restore volume is deleted and this Job deletes itself. - -#### Zip and Upload Job - -Triggered by Snapshot Maker Kubernetes Job. - -This job initializes the Tezos storage that was restored to a new PVC, creates rolling Tezos snapshot if targeted Tezos Node is rolling history mode, then LZ4s and uploads artifacts to S3, and finally builds the xtz-shots website. - -### Containers - -Overview of containers built by Docker and stored in ECR as well as a description of the functionality of the containers in the Kubernetes Pods. - -#### Docker & ECR - -One container is used for all Kubernetes Jobs and Pods. The Dockerfile is located in `tezos-k8s/snapshotEngine`. - -Container is based on `jekyll` container. - -Tools installed include - - -* AWS v2 CLI -* jq -* yq -* kubectl -* jekyll (container base) -* curl -* bash - -The different functionality is accomplished by `sh` scripts located in this directory, and supplied by `args` in the deployments and jobs via `entrypoint.sh` in the same directory. - -`tezos-k8s/snapshotEngine/entrypoint.sh` - -```sh -case "$CMD" in - snapshot-scheduler) exec /snapshot-scheduler.sh "$@" ;; - snapshot-maker) exec /snapshot-maker.sh "$@" ;; - zip-and-upload) exec /zip-and-upload.sh "$@" ;; -esac -``` - -`tezos-k8s/snapshotEngine/snapshotMakerJob.yaml` - -```yaml - containers: - - name: snapshot-maker - ... - args: - - "snapshot-maker" -``` - -`tezos-k8s/snapshotEngine/mainJob.yaml` - -```yaml - containers: - ... - - name: zip-and-upload - ... - args: - - "zip-and-upload" - ... -``` - -Snapshot Maker Docker container is built and uploaded to ECR. - -#### Kubernetes Containers - -Overview of functionality of containers in Kubernetes Job Pods. - -##### init-tezos-filesystem Container - -In order for the storage to be imported successfully to a new node, the storage needs to be initialized by the `octez-node` application. - -This container performs the following steps - - -1. Chowns the history-mode-snapshot-cache-volume to 100 so subsequent containers can access files created in them. -2. Sets a trap so that we can exit this container after 2 minutes. `octez-node` does not provide exit criteria if there is an error. Around 20%-40% of the time there will be an error because the EC2 instance would normally need to be shut down before an EBS snapshot is taken. With Kubernetes this is not possible, so we time the filesystem initialization and kill it if it takes longer than 2 minutes. -3. Runs a headless Tezos RPC endpoint to initialize the storage. -4. Waits until RPC is available. -5. Writes `BLOCK_HASH`, `BLOCK_HEIGHT`, and `BLOCK_TIME` for later use to snapshot cache. - -##### create-tezos-rolling-snapshot Container - -This container only exists for a rolling history mode workflow. - -This container performs the following steps - - -1. Chowns the history-mode-snapshot-cache-volume and rolling-tarball-restore volume to 100 so subsequent containers can access files created in them. -2. Gets network name from the namespace. -3. Performs a `octez-node config init` on our restored snapshot storage. -4. Performs a `octez-node snapshot export` to create the `.rolling` file to be uploaded later. -5. Restores this new snapshot to the `rolling-tarball-restore` PVC to later create the rolling tarball. -6. Creates a file to alert the next job that the rolling snapshot is currently being created and tells it to wait. - -##### zip-and-upload Container - -This container LZ4s the rolling, and artifact filesystems into tarballs, and uploads the tarballs and `.rolling` file to the AWS S3 bucket website. Metadata is generated here and the website is built as well. - -This container performs the following steps - - -1. Downloads existing `base.json` metadata file if it exists, if not creates a new one. This contains all of the metadata for all artifacts ever created. -2. If archive artifact workflow `/var/tezos/node` is LZ4d for archive excluding sensitive files `identity.json`, and `peers.json`. -3. Archive tarball SHA256 is generated. -4. Archive tarball filesize is generated. -5. Metadata is added to `base.json` and uploaded. -6. Build artifact-specific metadata json file and upload it to AWS S3 Bucket. -7. Create and upload archive tarball redirect file. This forwards to the latest archive artifact. (EX. `mainnet.xtz-shots.io/archive-tarball >> mainnet.xtz-shots.io/tezos-mainnet-archive-tarball-203942.lz4`) -8. If rolling artifact workflow waits for `.rolling` snapshot to be created and restored to new PVC by previous container. -9. If rolling artifact workflow `/var/tezos/node` is LZ4d for archive excluding sensitive files `identity.json`, and `peers.json`. -10. Rolling tarball SHA256 is generated. -11. Rolling tarball filesize is generated. -12. Metadata is added to `base.json` and uploaded. -13. Build artifact-specific metadata json file and upload it to AWS S3 Bucket. -14. Create and upload rolling tarball redirect file. This forwards to the latest archive artifact. (EX. `mainnet.xtz-shots.io/rolling-tarball >> mainnet.xtz-shots.io/tezos-mainnet-rolling-tarball-203942.lz4`) -15. Upload `.rolling` file to S3 AWS bucket. -16. Generate filesize, and SHA256 sum of `.rolling` file. -17. Add metadata to `base.json` and upload. -18. Add metadata to artifact-specific json and upload. -19. Get metadata from artifact json files (curl) for web page. -20. Curls chain website page from chainWebsiteMarkdown -21. Build web page with Jekyll with curled Markdown and metadata files -22. Upload website files to S3 - -#### Rebuilding containers - -You may want to rebuild these containers instead of using the ones released as part of tezos-k8s. - -You can build and push your images to a repo of your choosing, but this is how it can be done without automation to ECR with Docker. We recommend utilizing a configuration management tool to help with container orchestration such as Terraform or Pulumi. - -```bash -# Get ECR login for Docker -aws ecr get-login-password --region YOUR_AWS_REGION | docker login --username AWS --password-stdin YOUR_ECR_URL - -# Build the image with Docker -docker build -t snapshotEngine snapshotEngine/ - -# Tag the image. Will be used in values.yaml -docker tag snapshotEngine:latest YOUR_ECR_URL/snapshotEngine:latest - -# Push the image to ECR -docker push YOUR_ECR_URL/snapshotEngine:latest -``` - -Then pass the URI of the image as helm values: - -```bash -helm install snapshotEngine \ ---set tezos_k8s_images.snapshotEngine="YOUR_ECR_URL/snapshotEngine:latest" -# -``` diff --git a/charts/snapshotEngine/templates/configmap.yaml b/charts/snapshotEngine/templates/configmap.yaml deleted file mode 100644 index 7e8f181..0000000 --- a/charts/snapshotEngine/templates/configmap.yaml +++ /dev/null @@ -1,23 +0,0 @@ -apiVersion: v1 -data: - NAMESPACE: {{ $.Release.Namespace }} - IMAGE_NAME: {{ $.Values.tezos_k8s_images.snapshotEngine }} - TEZOS_IMAGE: {{ $.Values.images.octez }} - JEKYLL_REMOTE_THEME_REPOSITORY: {{ $.Values.jekyllRemoteThemeRepository }} - SNAPSHOT_WEBSITE_DOMAIN_NAME: {{ $.Values.snapshotWebsiteDomainName }} - SERVICE_ACCOUNT: {{ $.Values.service_account }} - RESTORED_STORAGE_INIT_TIME: {{ $.Values.restoredStorageInitTime }} - DISABLE_STORAGE_INIT_TIME: {{ $.Values.disableStorageInitTime | quote }} - NETWORK_OVERRIDE: {{ $.Values.networkOverride | default "" | quote }} - ALL_SUBDOMAINS: {{ $.Values.allSubdomains }} - ARCHIVE_SLEEP_DELAY: {{ $.Values.artifactDelay.archive }} - ROLLING_SLEEP_DELAY: {{ $.Values.artifactDelay.rolling }} - SCHEMA_URL: {{ $.Values.schemaUrl }} - S3_BUCKET: {{ $.Values.s3BucketOverride }} - CLOUD_PROVIDER: {{ $.Values.cloudProvider }} - STORAGE_CLASS: {{ $.Values.volumeSnapClass }} - NODES: {{ $.Values.nodes }} -kind: ConfigMap -metadata: - name: snapshot-configmap - namespace: {{ $.Release.Namespace }} diff --git a/charts/snapshotEngine/templates/rbac.yaml b/charts/snapshotEngine/templates/rbac.yaml deleted file mode 100644 index 6e85990..0000000 --- a/charts/snapshotEngine/templates/rbac.yaml +++ /dev/null @@ -1,55 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - name: {{ .Values.service_account }} - namespace: {{ $.Release.Namespace }} - annotations: - eks.amazonaws.com/role-arn: {{ .Values.iam_role_arn }} ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: modify-volumesnapshots - namespace: {{ $.Release.Namespace }} -rules: - - apiGroups: ["snapshot.storage.k8s.io", "batch", ""] - resources: - - volumesnapshots - - volumesnapshotcontents - - jobs - - persistentvolumeclaims - - configmaps - - pods - verbs: - - get - - list - - delete - - create - - patch ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: snapshots-for-sa - namespace: {{ $.Release.Namespace }} -subjects: - - kind: ServiceAccount - name: {{ .Values.service_account }} -roleRef: - kind: Role - name: modify-volumesnapshots - apiGroup: rbac.authorization.k8s.io ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: {{ printf "%s-%s-service-accounts" "list-volumesnapshotcontents-for" $.Release.Namespace }} -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: list-volumesnapshotcontents -subjects: -- apiGroup: "" - kind: ServiceAccount - name: {{ .Values.service_account }} - namespace: {{ $.Release.Namespace }} \ No newline at end of file diff --git a/charts/snapshotEngine/templates/snapshot-scheduler.yaml b/charts/snapshotEngine/templates/snapshot-scheduler.yaml deleted file mode 100755 index d6a83df..0000000 --- a/charts/snapshotEngine/templates/snapshot-scheduler.yaml +++ /dev/null @@ -1,45 +0,0 @@ -{{ define "snapshot-scheduler.name" }} -{{- $history_mode := $.history_mode }} - {{- printf "%s-%s-%s" "snapshot-scheduler-for" $history_mode "node" }} -{{- end }} - -{{- range $node, $config := .Values.nodes }} -{{- if $config }} - {{- $_ := set $ "history_mode" $config.history_mode }} -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "snapshot-scheduler.name" $ }} - namespace: {{ $.Release.Namespace }} -spec: - replicas: 1 - selector: - matchLabels: - app: {{ include "snapshot-scheduler.name" $ }} - template: - metadata: - labels: - app: {{ include "snapshot-scheduler.name" $ }} - spec: - serviceAccountName: {{ $.Values.service_account }} - containers: - - name: {{ include "snapshot-scheduler.name" $ }} - image: {{ $.Values.tezos_k8s_images.snapshotEngine }} - imagePullPolicy: Always - args: - - "snapshot-scheduler" - env: - - name: HISTORY_MODE - value: {{ $config.history_mode }} - - name: NAMESPACE - value: {{ $.Release.Namespace }} - - name: SERVICE_ACCOUNT - value: {{ $.Values.service_account }} - - name: IMAGE_NAME - valueFrom: - configMapKeyRef: - name: snapshot-configmap - key: IMAGE_NAME ---- -{{- end }} -{{- end }} \ No newline at end of file diff --git a/charts/snapshotEngine/values.yaml b/charts/snapshotEngine/values.yaml deleted file mode 100755 index 0cd519e..0000000 --- a/charts/snapshotEngine/values.yaml +++ /dev/null @@ -1,103 +0,0 @@ -tezos_k8s_images: - snapshotEngine: ghcr.io/tacoinfra/tezos-k8s-snapshotengine:main - -# the tezos version used to run `octez-node snapshot import/export` -images: - octez: tezos/tezos:v17.1 - -# snapshotEngine containers interact with the kubernetes control -# plane to create volume snapshots. This requires a special IAM role -# so k8s can create the snapshots and volumes in the cloud. -# Specify this role below. -iam_role_arn: "" -service_account: snapshot-engine-sa - -# snapshotEngine chart does not run tezos nodes. -# It searches for separately defined tezos nodes and targets them -# by taking CSI volumeSnapshots of their volume, then restoring them -# into new pvcs and running jobs against them. -# Note that these tezos nodes should be in the same namespace. -# Specify an archive node to generate archive tarballs, a rolling node -# to generate rolling snapshots and tarballs, or both to generate all -# artifacts. -# Below, specify what volumes the snapshot engine should look for, in -# the following format: -# nodes: -# : -# history_mode: -# target_volume: -# where: -# NODE_CLASS is the node class as defined in tezos-k8s "tezos" chart. -# HISTORY_MODE is archive or rolling -# TARGET_VOLUME is the name of the pod's volume containing tezos data dir. -# -# The default configuration below will be looking for tezos nodes defined in the -# tezos chart with class names `snapshot-archive-node` and `snapshot-rolling-node`. -# If you'd like to name them differently, make sure to match the names in both the -# tezos chart and here in the snapshot engine chart. -# Also, the tezos-k8s chart names the node data volume 'var-volume' by default. -nodes: - snapshot-archive-node: - history_mode: archive - target_volume: var-volume - snapshot-rolling-node: - history_mode: rolling - target_volume: var-volume - -# snapshot website domain name -# Example: -# snapshotWebsiteDomainName: xtz-shots.io -snapshotWebsiteDomainName: "" - -# snapshotEngine generates static webpages to accompany the generated -# artifacts. They are rendered by jekyll. -# See an example jekyll template at: -# https://github.com/tacoinfra/xtz-shots-website/ -# 1 parameter needs to be passed: -# * jekyllRemoteThemeRepository: the github repository, in format organization/reponame@branch -# This repo@branch will be cloned into the jekyll container. This repo must contain a _config.yml -# and desired jekyll page templates. -# Example: -# jekyllRemoteThemeRepository: tacoinfra/xtz-shots-website@main -jekyllRemoteThemeRepository: "" - -# When setting up the CSI driver in Kubernetes to create a snapshottable storage class -# you will have also created and named a volumeSnapshotClass that will be used in this value. -# This value is used during the snapshot creation process. -volumeSnapClass: "" - -# The EBS snapshot of a node is restored to a new volume as a part of the artifact generation process. -# This storage must have a tezos node "turned on" to initialize the storage for proper restoration later. -# However, there are errors that occur during this initialization due to Tezos not being tolerant -# of the KILL -9 that occurs during the EBS snapshot process. -# Tezos also does not exit on error, but rather just hangs stdout on a random error. -# Therefore we limit this initialization process and pitch the job in a reasonable amount of time to account for these -# random errors. It may be necessary to increase this time for different changes, or as a particular chain -# grows in size over time. -# This time format is in unix sleep time format ex. 1s, 2m, 3h, 4d. -restoredStorageInitTime: 2m - -# We also provide the ability to disable the time limit for debugging purposes. -disableStorageInitTime: "false" - -# Used to start the snapshotable node, and for the initialization network on the restored snapshot. -# Can be useful if namespace does not begin with network name. -# Optional, defaults to beginning word of namespace before "-shots" ex "mainnet-shots" = "mainnet" -networkOverride: "" - -# List of all xtz-shots subdomains -allSubdomains: "" - -# If you find aftifacts are being created too frequently whether it be compute/storage cost reduction, or difficulty in managing -# the quantity of files generated, a sleep value can be updated here to delay the creation of artifacts. This can be expecially -# useful with small/test networks that are small in size and create every 20 minutes or so. -# Valid values include an integer followed by a single-character time unit denoting days, minutes hours seconds- d, h, m, s -# Ex: 1d, 2h, 25m, 59s, etc. -artifactDelay: - rolling: 0m - archive: 0m - -# URL to schema.json file to validate generated metadata against -schemaUrl: "https://tacoinfra.com/tezos-snapshot-metadata.schema.1.0.json" - -s3BucketOverride: "" diff --git a/docs/04-other-helm-charts.md b/docs/04-other-helm-charts.md index 01e18a0..5644f55 100644 --- a/docs/04-other-helm-charts.md +++ b/docs/04-other-helm-charts.md @@ -4,6 +4,5 @@ The Tezos-k8s project also maintains several other helm charts: * **[pyrometer](https://github.com/tacoinfra/tezos-k8s/tree/main/charts/pyrometer)**: a chart to deploy the [Pyrometer Tezos monitoring tool](https://gitlab.com/tezos-kiln/pyrometer) * **[tezos-faucet](https://github.com/tacoinfra/tezos-k8s/tree/main/charts/tezos-faucet)**: a faucet to dispense testnet funds -* **[snapshotEngine](https://github.com/tacoinfra/tezos-k8s/tree/main/charts/snapshotEngine)**: an utility to generate Tezos snapshots from running nodes, using Kubernetes CSI driver * **[tezos-reward-distributor](https://github.com/tacoinfra/tezos-k8s/tree/main/charts/tezos-reward-distributor)**: a chart to deploy [TRD](https://tezos-reward-distributor-organization.github.io/tezos-reward-distributor/) * **[tezos-proto-cruncher](https://github.com/tacoinfra/tezos-k8s/tree/main/charts/tezos-proto-cruncher)**: a chart to deploy a daemonset in your cluster to find vanity hashes for protocols in parallel diff --git a/snapshotEngine/Dockerfile b/snapshotEngine/Dockerfile deleted file mode 100644 index e9ef42c..0000000 --- a/snapshotEngine/Dockerfile +++ /dev/null @@ -1,65 +0,0 @@ -FROM jekyll/jekyll:4.2.0 - -ENV GLIBC_VER=2.31-r0 - -# Install python/pip -ENV PYTHONUNBUFFERED=1 -ENV KUBECTL_VERSION=v1.24.3 - -ARG TARGETARCH - -# -# Installs lz4, jq, yq, kubectl, and awscliv2, and python3 -# -RUN apk --no-cache add \ - binutils \ - curl \ - lz4 \ - 'jq<1.6-r1' \ - bash \ - && echo "Arch: ${TARGETARCH}" \ - && wget -q -O /usr/bin/yq $(wget -q -O - https://api.github.com/repos/mikefarah/yq/releases/latest \ - | jq -r --arg YQ_ARCH "yq_linux_${TARGETARCH}" '.assets[] | select(.name == $YQ_ARCH) | .browser_download_url') \ - && chmod +x /usr/bin/yq \ - && curl -LO https://storage.googleapis.com/kubernetes-release/release/${KUBECTL_VERSION}/bin/linux/${TARGETARCH}/kubectl \ - && chmod +x ./kubectl \ - && mv ./kubectl /usr/local/bin \ - && curl -sL https://alpine-pkgs.sgerrand.com/sgerrand.rsa.pub -o /etc/apk/keys/sgerrand.rsa.pub \ - && curl -sLO https://github.com/sgerrand/alpine-pkg-glibc/releases/download/${GLIBC_VER}/glibc-${GLIBC_VER}.apk \ - && curl -sLO https://github.com/sgerrand/alpine-pkg-glibc/releases/download/${GLIBC_VER}/glibc-bin-${GLIBC_VER}.apk \ - && curl -sLO https://github.com/sgerrand/alpine-pkg-glibc/releases/download/${GLIBC_VER}/glibc-i18n-${GLIBC_VER}.apk \ - && apk add --no-cache \ - glibc-${GLIBC_VER}.apk \ - glibc-bin-${GLIBC_VER}.apk \ - glibc-i18n-${GLIBC_VER}.apk \ - && /usr/glibc-compat/bin/localedef -i en_US -f UTF-8 en_US.UTF-8 \ - && curl -sL https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip -o awscliv2.zip \ - && unzip awscliv2.zip \ - && aws/install \ - && rm -rf \ - awscliv2.zip \ - aws \ - /usr/local/aws-cli/v2/*/dist/aws_completer \ - /usr/local/aws-cli/v2/*/dist/awscli/data/ac.index \ - /usr/local/aws-cli/v2/*/dist/awscli/examples \ - glibc-*.apk \ - && apk --no-cache del \ - binutils \ - && rm -rf /var/cache/apk/* \ - && apk add --update --no-cache python3-dev && ln -sf python3 /usr/bin/python \ - && python3 -m ensurepip \ - && pip3 install --no-cache-dir --upgrade pip && \ - pip3 install --no-cache-dir setuptools boto3 datefinder datetime pytz jsonschema==4.17.3 - -RUN chown jekyll:jekyll -R /usr/gem - -# TODO: Make file structure organized like with /scripts and /templates -# We install gemfile here to save time on website build in zip-and-upload -WORKDIR /snapshot-website-base -COPY --chown=jekyll:jekyll snapshot-website-base/Gem* / -RUN bundle install -WORKDIR / -COPY . / -RUN chown -R jekyll:jekyll /snapshot-website-base - -ENTRYPOINT ["/entrypoint.sh"] diff --git a/snapshotEngine/createVolumeSnapshot.yaml b/snapshotEngine/createVolumeSnapshot.yaml deleted file mode 100644 index 8709975..0000000 --- a/snapshotEngine/createVolumeSnapshot.yaml +++ /dev/null @@ -1,11 +0,0 @@ -apiVersion: snapshot.storage.k8s.io/v1 -kind: VolumeSnapshot -metadata: - name: "" - namespace: "" - labels: - history_mode: rolling -spec: - volumeSnapshotClassName: "" - source: - persistentVolumeClaimName: "" diff --git a/snapshotEngine/entrypoint.sh b/snapshotEngine/entrypoint.sh deleted file mode 100755 index bbc951c..0000000 --- a/snapshotEngine/entrypoint.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/sh -x - -CMD="$1" -shift - -case "$CMD" in - snapshot-scheduler) exec /snapshot-scheduler.sh "$@" ;; - snapshot-maker) exec /snapshot-maker.sh "$@" ;; - zip-and-upload) exec /zip-and-upload.sh "$@" ;; -esac - -# -# As we exec above, reaching here means that we did not -# find the command we were provided. - -echo "ERROR: could not find \"$CMD\"." -echo -echo "Valid options are:" -echo " snapshot-scheduler" -echo " snapshot-maker" -echo " zip-and-upload" - -exit 1 diff --git a/snapshotEngine/getAllSnapshotMetadata.py b/snapshotEngine/getAllSnapshotMetadata.py deleted file mode 100644 index 32aa49e..0000000 --- a/snapshotEngine/getAllSnapshotMetadata.py +++ /dev/null @@ -1,61 +0,0 @@ -from genericpath import exists -import os -import urllib, json -from jsonschema import validate -from datetime import datetime - -schemaURL = os.environ["SCHEMA_URL"] -allSubDomains = os.environ["ALL_SUBDOMAINS"].split(",") -s3Endpoint = "nyc3.digitaloceanspaces.com" -filename = "tezos-snapshots.json" - -# Write empty top-level array to initialize json -artifact_metadata = [] - -urllib.request.urlretrieve(schemaURL, "schema.json") - -print("Assembling global metadata file for all subdomains:") -print(allSubDomains) - -# Get each subdomain's base.json and combine all artifacts into 1 metadata file -for subDomain in allSubDomains: - baseJsonUrl = ( - "https://" + subDomain + "-shots" + "." + s3Endpoint + "/base.json" - ) - try: - with urllib.request.urlopen(baseJsonUrl) as url: - data = json.loads(url.read().decode()) - for entry in data: - artifact_metadata.append(entry) - except urllib.error.HTTPError: - continue - -now = datetime.now() - -# Matches octez block_timestamp. -# Is ISO 8601 with military offset of Z -dt_string = now.strftime('%Y-%m-%dT%H:%M:%SZ') - -# Meta document that includes the list of storage artifacts among some other useful keys. -metadata_document = json.dumps({ - "date_generated": dt_string, - "org": "Oxhead Alpha", - "$schema": schemaURL, - "data": artifact_metadata, -}, indent=4) - -with open("schema.json","r") as f: - schema = f.read() - -# json.validate() returns None if successful -if not validate(json.loads(metadata_document), json.loads(schema)): - print("Metadata successfully validated against schema!") -else: - raise Exception("Metadata NOT validated against schema!") - - -# Write to file -with open(filename, "w") as json_file: - json_file.write(metadata_document) - -print(f"Done assembling global metadata file {filename}") diff --git a/snapshotEngine/getLatestSnapshotMetadata.py b/snapshotEngine/getLatestSnapshotMetadata.py deleted file mode 100644 index 6994ffc..0000000 --- a/snapshotEngine/getLatestSnapshotMetadata.py +++ /dev/null @@ -1,88 +0,0 @@ -import json -from pathlib import Path -import random -from genericpath import exists - -import pprint -pp = pprint.PrettyPrinter(indent=4) - -filename='tezos-snapshots.json' - -if exists(filename): - print("SUCCESS tezos-snapshots.json exists locally!") - with open(filename, "r") as localJson: - snapshots = json.load(localJson) -else: - print("ERROR tezos-snapshots.json does not exist locally!") - -# sort per network -snapshots_per_network = {} - -# for some reason, the first page is empty, so we initialize the map with dummy data to generate a first page -# The error is: -# Error reading file /home/nochem/workspace/xtz-shots-website/_layouts/latest_snapshots.md/latest_snapshots.md: Not a directory @ rb_sysopen - /home/nochem/workspace/xtz-shots-website/_layouts/latest_snapshots.md/latest_snapshots.md -latest_snapshots = [{"name": "example", "latest_snapshots": {}}] - -all_snapshots = [{"name": "example", "all_snapshots": {}}] - -for snapshot in snapshots['data']: - network = snapshot["chain_name"] - if network not in snapshots_per_network: - snapshots_per_network[network] = [] - snapshots_per_network[network].append(snapshot) - -for network, snapshots in snapshots_per_network.items(): - network_latest_snapshots = {} - network_snapshots = {} - - # Find a lowest version available for a given network, artifact_type, and history_mode - for (artifact_type, history_mode, path) in [("tarball", "rolling", "rolling-tarball"), ("tarball", "archive", "archive-tarball"), ("tezos-snapshot", "rolling", "rolling")]: - # List of snapshot metadata for this particular artifact type and history mode - typed_snapshots = [s for s in snapshots if s["artifact_type"] == artifact_type and s["history_mode"] == history_mode] - - # Lowest version is the top item (int) of a sorted unique list of all the versions for this particular artifact type and history mode - # newlist = [item for item in list if "value" in list] - #octez_versions = sorted(list(set([ s['tezos_version']['version']['major'] for s in typed_snapshots if 'version' in s['tezos_version'] ]))) - - octez_versions = [] - for s in typed_snapshots: - if 'version' in s['tezos_version']: - octez_versions.append(s['tezos_version']['version']['major']) - - octez_versions = sorted(list(set(octez_versions))) - - if octez_versions: - lowest_octez_version = octez_versions[0] - else: - # no metadata yet for this namespace, ignoring - continue - - network_snapshots[path] = typed_snapshots - - # Latest offered should only show oldest supported build so let's filter by the oldest supported version we found above - typed_snapshots = [d for d in typed_snapshots if 'version' in d['tezos_version'] and d['tezos_version']['version']['major'] == lowest_octez_version ] - - # Latest snapshot of type is the last item in typed_snapshots which we just filtered by the latest supported tezos build - network_latest_snapshots[path] = typed_snapshots[-1] - - # This becomes the list of snapshots - latest_snapshots.append( - { - "name": network, - "permalink": network + "/index.html", - "latest_snapshots": network_latest_snapshots, - } - ) - all_snapshots.append( - { - "name": network, - "permalink": network + "/list.html", - "snapshots": network_snapshots, - } - ) - -Path("_data").mkdir(parents=True, exist_ok=True) -filename = "_data/snapshot_jekyll_data.json" -with open(filename, 'w') as f: - json.dump({"latest_snapshots": latest_snapshots, "all_snapshots": all_snapshots}, f, indent=2) -print(f"Done writing structured list of snapshots for Jekyll to render webpage: {filename}") diff --git a/snapshotEngine/mainJob.yaml b/snapshotEngine/mainJob.yaml deleted file mode 100644 index 9306269..0000000 --- a/snapshotEngine/mainJob.yaml +++ /dev/null @@ -1,258 +0,0 @@ -apiVersion: batch/v1 -kind: Job -metadata: - name: "zip-and-upload" - namespace: "" -spec: - template: - metadata: - labels: - app: snapshot-maker - spec: - serviceAccountName: "" - initContainers: - - name: init-tezos-filesystem - image: "" - command: ["/bin/sh"] - args: - - "-c" - - | - #!/bin/sh - - # Exit on any error - set -e - - # Give tezos user full ownership of mounted cache volume for writing metadata later - sudo chown -R tezos /"${HISTORY_MODE}"-snapshot-cache-volume - - # Set error trap in order to kill this job with the timer later - trap "exit" SIGINT SIGTERM - - # Strip network from namespace or use configmap value - NETWORK="${NETWORK_OVERRIDE:-${NAMESPACE%%-*}}" - - # Set up config for headless RPC using new restored storage - octez-node config init \ - --config-file /home/tezos/.tezos-node/config.json \ - --network "${CHAIN_NAME}" \ - --data-dir /var/tezos/node/data - - # Run headless tezos node to validate storage on restored volume - octez-node run --connections 0 --config-file /home/tezos/.tezos-node/config.json --rpc-addr=127.0.0.1:8732 & - - # Limit validation to restoredStorageInitTime. If this takes longer then there is a tezos error - # and this job is tossed. - # Tezos does not exit on error so we have to time the job. - # Configmaps can only have strings as keys, so we cant test for truthy values. - if [ "${DISABLE_STORAGE_INIT_TIME}" != "true" ]; then - printf "%s Storage init time limit has NOT been disabled. This job will be killed after %s\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" "${RESTORED_STORAGE_INIT_TIME}" - sleep "${RESTORED_STORAGE_INIT_TIME}" && kill -s SIGINT 1 & - else - printf "%s Storage init time limit has been disabled. WARNING - This job will run indefinitely if there is an error.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - fi - - # These loops wait on the RPC to come online and prevent log from printing same line - # over and over and over again. This prints one line and waits for the RPC to come online for a clean log. - until wget -qO- http://127.0.0.1:8732/chains/main/blocks/head/header >/dev/null 2>&1; do - printf "%s Waiting for node RPC to come online.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - until wget -qO- http://127.0.0.1:8732/chains/main/blocks/head/header >/dev/null 2>&1; do - sleep 1m # without sleep, this loop is a "busy wait". this sleep vastly reduces CPU usage while we wait for rpc - if wget -qO- http://127.0.0.1:8732/chains/main/blocks/head/header >/dev/null 2>&1; then - break - fi - done - done - - # If somehow we skip the above waiting loop, this kills the job if the RPC is not online. - if ! wget -qO- http://127.0.0.1:8732/chains/main/blocks/head/header >/dev/null 2>&1; then - printf "%s RPC is not online! Exiting...\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - exit 1 - - # Otherwise if RPC is online, then our storage is valid and we can check if the block - # is finalized and get our metadata from the RPC endpoint. - else - printf "%s Node online! Storage is initialized.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - if [ "${HISTORY_MODE}" = archive ]; then - TARGET="head" - - # Tezos devs have advised us that it is safer to target HEAD~2 for rolling artifacts. - else - HEAD_BLOCK=$(wget -qO- http://127.0.0.1:8732/chains/main/blocks/head/header | sed -E 's/.*"hash":"?([^,"]*)"?.*/\1/') - TARGET="${HEAD_BLOCK}~2" - fi - - # Get BLOCK_HASH from RPC - wget -qO- http://127.0.0.1:8732/chains/main/blocks/"${TARGET}"/header | sed -E 's/.*"hash":"?([^,"]*)"?.*/\1/' > /"${HISTORY_MODE}"-snapshot-cache-volume/BLOCK_HASH - - # Get BLOCK_HEIGHT from RPC - wget -qO- http://127.0.0.1:8732/chains/main/blocks/"${TARGET}"/header | sed -E 's/.*"level":"?([^,"]*)"?.*/\1/' > /"${HISTORY_MODE}"-snapshot-cache-volume/BLOCK_HEIGHT - - # We need to check if the block is finalized for archive nodes since we aren't getting - # validation by a Tezos snapshot like our rolling tarball. We are just zipping up the data dir from an archive node. - if [ "${HISTORY_MODE}" = archive ]; then - printf "%s Checking if archive snapshot block is finalized...\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - - # Query running node and get block level - RUNNING_NODE_BLOCK_LEVEL="$(wget -qO- http://snapshot-archive-node."${NAMESPACE}".svc:8732/chains/main/blocks/head/header | sed -E 's/.*"level":"?([^,"]*)"?.*/\1/')" - printf "%s Running node snapshot-archive-node head block level is %s. \n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" "${RUNNING_NODE_BLOCK_LEVEL}" - - # See if this is at least 2 blocks newer than our snapshot block. If its not, toss this job. - SNAPSHOT_BLOCK_LEVEL=$(cat /"${HISTORY_MODE}"-snapshot-cache-volume/BLOCK_HEIGHT) - printf "%s Snapshot head block level is %s. \n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" "${SNAPSHOT_BLOCK_LEVEL}" - if ! [ $((RUNNING_NODE_BLOCK_LEVEL - SNAPSHOT_BLOCK_LEVEL)) -ge 2 ]; then - printf "%s Running node snapshot-archive-node head block level is NOT 2 or more than our snapshot block level! Exiting...\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - exit 1 - fi - - # Query running node with snapshot block level to get hash - RUNNING_NODE_BLOCK_HASH=$(wget -qO- http://snapshot-archive-node."${NAMESPACE}".svc:8732/chains/main/blocks/"${SNAPSHOT_BLOCK_LEVEL}"/header | sed -E 's/.*"hash":"?([^,"]*)"?.*/\1/') - SNAPSHOT_BLOCK_HASH=$(cat /"${HISTORY_MODE}"-snapshot-cache-volume/BLOCK_HASH) - - # Check if this matches our snapshot hash, if not toss this job. If so, then block is finalized. - printf "%s Running node snapshot-archive-node block hash for level %s is %s . \n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" "${SNAPSHOT_BLOCK_LEVEL}" "${RUNNING_NODE_BLOCK_HASH}" - printf "%s Snapshot block hash for level %s is %s.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" "${SNAPSHOT_BLOCK_LEVEL}" "${SNAPSHOT_BLOCK_HASH}" - if [ "${RUNNING_NODE_BLOCK_HASH}" != "${SNAPSHOT_BLOCK_HASH}" ]; then - printf "%s Running node block hash at level %s is not equal to our hash. Exiting...\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" "${SNAPSHOT_BLOCK_LEVEL}" - exit 1 - fi - fi - - # Get BLOCK_TIMESTAMP from RPC - wget -qO- http://127.0.0.1:8732/chains/main/blocks/head/header | sed -E 's/.*"timestamp":"?([^,"]*)"?.*/\1/' > /"${HISTORY_MODE}"-snapshot-cache-volume/BLOCK_TIMESTAMP - - # Old version string - /usr/local/bin/octez-node --version > /"${HISTORY_MODE}"-snapshot-cache-volume/TEZOS_VERSION - - # Get new version object from RPC - wget -qO- http://127.0.0.1:8732/version > /"${HISTORY_MODE}"-snapshot-cache-volume/TEZOS_RPC_VERSION_INFO - - # Print variables for debug - printf "%s BLOCK_HASH is...$(cat /"${HISTORY_MODE}"-snapshot-cache-volume/BLOCK_HASH))\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - printf "%s BLOCK_HEIGHT is...$(cat /"${HISTORY_MODE}"-snapshot-cache-volume/BLOCK_HEIGHT)\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - printf "%s BLOCK_TIMESTAMP is...$(cat /"${HISTORY_MODE}"-snapshot-cache-volume/BLOCK_TIMESTAMP)\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - printf "%s TEZOS_VERSION is...$(cat /"${HISTORY_MODE}"-snapshot-cache-volume/TEZOS_VERSION)\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - printf "%s TEZOS_RPC_VERSION_INFO is...$(cat /"${HISTORY_MODE}"-snapshot-cache-volume/TEZOS_RPC_VERSION_INFO)\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - - # Blow open permissions for next job to write to volume - sudo chmod -R 755 /"${HISTORY_MODE}"-snapshot-cache-volume - fi - volumeMounts: - - mountPath: /var/tezos - name: persistent-storage - - mountPath: /snapshot-cache-volume - name: snapshot-cache-volume - env: - - name: HISTORY_MODE - value: "" - envFrom: - - configMapRef: - name: snapshot-configmap - - configMapRef: - name: tezos-config - containers: - - name: create-tezos-rolling-snapshot - image: "" - command: ["/bin/sh"] - args: - - "-c" - - | - #!/bin/sh - set -e - - sudo chown -R tezos /"${HISTORY_MODE}"-snapshot-cache-volume - sudo chown -R tezos /rolling-tarball-restore - - # Strip network from namespace or use configmap value - NETWORK="${NETWORK_OVERRIDE:-${NAMESPACE%%-*}}" - - BLOCK_HEIGHT=$(cat /"${HISTORY_MODE}"-snapshot-cache-volume/BLOCK_HEIGHT) - BLOCK_HASH=$(cat /"${HISTORY_MODE}"-snapshot-cache-volume/BLOCK_HASH) - ROLLING_SNAPSHOT_NAME="${NAMESPACE%%-*}"-"${BLOCK_HEIGHT}" - - octez-node config init \ - --config-file /home/tezos/.tezos-node/config.json \ - --network "${CHAIN_NAME}" \ - --data-dir /var/tezos/node/data - - if [ "${HISTORY_MODE}" = rolling ]; then - octez-node snapshot export \ - --block "${BLOCK_HASH}" \ - --config-file /home/tezos/.tezos-node/config.json \ - --rolling \ - /"${HISTORY_MODE}"-snapshot-cache-volume/"${ROLLING_SNAPSHOT_NAME}".rolling - - printf "%s Restoring rolling snapshot to empty filesystem for rolling tarball...\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - - touch /rolling-tarball-restore/snapshot-import-in-progress - - octez-node snapshot import \ - /"${HISTORY_MODE}"-snapshot-cache-volume/"${ROLLING_SNAPSHOT_NAME}".rolling \ - --block "${BLOCK_HASH}" \ - --config-file /home/tezos/.tezos-node/config.json \ - --data-dir /rolling-tarball-restore/var/tezos/node/data - - # Get octez snapshot version - /usr/local/bin/octez-node snapshot info /"${HISTORY_MODE}"-snapshot-cache-volume/"${ROLLING_SNAPSHOT_NAME}".rolling --json > /"${HISTORY_MODE}"-snapshot-cache-volume/SNAPSHOT_HEADER - - rm /rolling-tarball-restore/snapshot-import-in-progress - else - printf "%s Skipping rolling snapshot import since this job is for an archive node.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - fi - volumeMounts: - - mountPath: /var/tezos - name: persistent-storage - - mountPath: /snapshot-cache-volume - name: snapshot-cache-volume - - mountPath: /rolling-tarball-restore - name: rolling-tarball-restore - env: - - name: HISTORY_MODE - value: "" - envFrom: - - configMapRef: - name: snapshot-configmap - - configMapRef: - name: tezos-config - - name: zip-and-upload - image: "" - imagePullPolicy: Always - args: - - "zip-and-upload" - volumeMounts: - - mountPath: /var/tezos - name: persistent-storage - readOnly: true - - mountPath: /snapshot-cache-volume - name: snapshot-cache-volume - - mountPath: /rolling-tarball-restore - name: rolling-tarball-restore - - mountPath: /aws-secrets - name: aws-secrets - - mountPath: /do-secrets - name: do-secrets - env: - - name: HISTORY_MODE - value: "" - envFrom: - - configMapRef: - name: snapshot-configmap - restartPolicy: Never - volumes: - - name: persistent-storage - persistentVolumeClaim: - claimName: "" - - name: snapshot-cache-volume - persistentVolumeClaim: - claimName: snapshot-cache-volume - - name: rolling-tarball-restore - persistentVolumeClaim: - claimName: rolling-tarball-restore - - name: aws-secrets - secret: - secretName: aws-secrets - optional: true - - name: do-secrets - secret: - secretName: do-secrets - optional: true - backoffLimit: 0 diff --git a/snapshotEngine/scratchVolume.yaml b/snapshotEngine/scratchVolume.yaml deleted file mode 100644 index 4b1affb..0000000 --- a/snapshotEngine/scratchVolume.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: snapshot-cache-volume - namespace: "" -spec: - storageClassName: do-block-storage - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 20Gi diff --git a/snapshotEngine/snapshot-maker.sh b/snapshotEngine/snapshot-maker.sh deleted file mode 100755 index c8671c0..0000000 --- a/snapshotEngine/snapshot-maker.sh +++ /dev/null @@ -1,382 +0,0 @@ -#!/bin/bash - -# Delete all volumesnapshots so they arent setting around accruing charges -kubectl delete vs -l history_mode=$HISTORY_MODE - -PERSISTENT_VOLUME_CLAIM="var-volume-snapshot-${HISTORY_MODE}-node-0" - -# For yq to work, the values resulting from the above cmds need to be exported. -# We don't export them inline because of -# https://github.com/koalaman/shellcheck/wiki/SC2155 -export HISTORY_MODE -export PERSISTENT_VOLUME_CLAIM - -yq e -i '.metadata.namespace=strenv(NAMESPACE)' createVolumeSnapshot.yaml -yq e -i '.metadata.labels.history_mode=strenv(HISTORY_MODE)' createVolumeSnapshot.yaml -yq e -i '.spec.source.persistentVolumeClaimName=strenv(PERSISTENT_VOLUME_CLAIM)' createVolumeSnapshot.yaml -yq e -i '.spec.volumeSnapshotClassName=strenv(STORAGE_CLASS)' createVolumeSnapshot.yaml - -# Returns list of snapshots with a given status -# readyToUse true/false -getSnapshotNames() { - local readyToUse="${1##readyToUse=}" - shift - if [ -z "$readyToUse" ]; then - echo "Error: No jsonpath for volumesnapshots' ready status was provided." - exit 1 - fi - kubectl get volumesnapshots -o jsonpath="{.items[?(.status.readyToUse==$readyToUse)].metadata.name}" --namespace "$NAMESPACE" "$@" -} - -cd / - -ZIP_AND_UPLOAD_JOB_NAME=zip-and-upload-"${HISTORY_MODE}" - -# Delete zip-and-upload job if still around -if kubectl get job "${ZIP_AND_UPLOAD_JOB_NAME}"; then - printf "%s Old zip-and-upload job exits. Attempting to delete.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - if ! kubectl delete jobs "${ZIP_AND_UPLOAD_JOB_NAME}"; then - printf "%s Error deleting zip-and-upload job.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - exit 1 - fi - printf "%s Old zip-and-upload job deleted.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" -else - printf "%s No old zip-and-upload job detected for cleanup.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" -fi - -# Delete old PVCs if still around -if [ "${HISTORY_MODE}" = rolling ]; then - if [ "$(kubectl get pvc rolling-tarball-restore)" ]; then - printf "%s PVC Exists.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - sleep 5 - kubectl delete pvc rolling-tarball-restore - sleep 5 - fi -fi - -if [ "$(kubectl get pvc "${HISTORY_MODE}"-snapshot-cache-volume)" ]; then - printf "%s PVC Exists.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - sleep 5 - kubectl delete pvc "${HISTORY_MODE}"-snapshot-cache-volume - sleep 5 -fi - -if [ "$(kubectl get pvc "${HISTORY_MODE}"-snap-volume)" ]; then - printf "%s PVC Exists.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - sleep 5 - kubectl delete pvc "${HISTORY_MODE}"-snap-volume - sleep 5 -fi - -# Check latest artifact and sleep if its too new -# This was done because nodes sometimes OOM and jobs are restarted -# Resulting in more artifacts created than should be with -# a given sleep time. IE 3 days sleep should result in 2 artifacts in a 7 day lifecycle policy -# but if the job restarts during its sleeping time it would result in more, multiple per day even. - -SLEEP_TIME=0m - -if [ "${HISTORY_MODE}" = "archive" ]; then - SLEEP_TIME="${ARCHIVE_SLEEP_DELAY}" - if [ "${ARCHIVE_SLEEP_DELAY}" != "0m" ]; then - printf "%s artifactDelay.archive is set to %s.\n" "$(date "+%Y-%m-%d %H:%M:%S")" "${ARCHIVE_SLEEP_DELAY}" - fi -elif [ "${HISTORY_MODE}" = "rolling" ]; then - SLEEP_TIME="${ROLLING_SLEEP_DELAY}" - if [ "${ROLLING_SLEEP_DELAY}" != "0m" ]; then - printf "%s artifactDelay.rolling is set to %s.\n" "$(date "+%Y-%m-%d %H:%M:%S")" "${ROLLING_SLEEP_DELAY}" - fi -fi - -if [ "${SLEEP_TIME}" = "0m" ]; then - printf "%s artifactDelay.HISTORY_MODE was not set! No delay...\n" "$(date "+%Y-%m-%d %H:%M:%S")" -else - # Latest timestamp of this network's artifact of this history mode - LATEST_ARTIFACT_TIMESTAMP=$(curl https://${NAMESPACE}.nyc3.digitaloceanspaces.com/base.json | \ - jq --arg HISTORY_MODE "${HISTORY_MODE}" \ - '[.[] | select(.history_mode==$HISTORY_MODE)] | sort_by(.block_timestamp) | last | .block_timestamp' | tr -d '"') - - # If base.json doesnt exists continue - if [[ -n ${LATEST_ARTIFACT_TIMESTAMP} ]]; then - printf "%s Latest artifact timestamp is %s.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" "${LATEST_ARTIFACT_TIMESTAMP}" - printf "%s Sleep time is %s.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" "${SLEEP_TIME}" - - ## Now minus artifact timestamp - ARTIFACT_EPOCH_TIME=$(date -d "${LATEST_ARTIFACT_TIMESTAMP}" -D "%Y-%m-%dT%H:%M:%SZ" +%s) - - # Check if sleep time is in hours or days and convert to minutes - if [[ -n $(echo $SLEEP_TIME | grep d) ]]; then - # Converting hours to minutes - SLEEP_TIME_MINUTES=$(( ${SLEEP_TIME%?} * 24 * 60 )) - else - # if its hours already just pop off the h - SLEEP_TIME_MINUTES=$(( "${SLEEP_TIME%?}" * 60 )) - fi - - # Age of artifact in minutes - ARTIFACT_AGE=$(( ($(date +%s) - ARTIFACT_EPOCH_TIME) / 60 )) - - printf "%s Latest artifact is %s minutes old.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" "${ARTIFACT_AGE}" - printf "%s Our set sleep time in minutes is %s.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" "${SLEEP_TIME_MINUTES}" - - # If the age is less than our sleep minutes we need to continue to sleep - if [[ ${ARTIFACT_AGE} -lt ${SLEEP_TIME_MINUTES} ]]; then - TIME_LEFT=$(( SLEEP_TIME_MINUTES - ARTIFACT_AGE )) - printf "%s We need to sleep for %s minutes.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" "${TIME_LEFT}" - sleep "${TIME_LEFT}"m - else - printf "%s Newest artifact is older than sleep time starting job!.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - fi - fi -fi - -# Take volume snapshot -current_date=$(date "+%Y-%m-%d-%H-%M-%S" "$@") -export SNAPSHOT_NAME="$current_date-$HISTORY_MODE-node-snapshot" -# Update volume snapshot name -yq e -i '.metadata.name=strenv(SNAPSHOT_NAME)' createVolumeSnapshot.yaml - -printf "%s Creating snapshot ${SNAPSHOT_NAME} in ${NAMESPACE}.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - -# Create snapshot -if ! kubectl apply -f createVolumeSnapshot.yaml; then - printf "%s ERROR creating volumeSnapshot ${SNAPSHOT_NAME} in ${NAMESPACE} .\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - exit 1 -fi - -sleep 5 - -# Wait for snapshot to finish -until [ "$(getSnapshotNames readyToUse=true -l history_mode="${HISTORY_MODE}")" ]; do - printf "%s Snapshot in progress. \n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - until [ "$(getSnapshotNames readyToUse=true -l history_mode="${HISTORY_MODE}")" ]; do - sleep 1m # without sleep, this loop is a "busy wait". this sleep vastly reduces CPU usage while we wait for node - if [ "$(getSnapshotNames readyToUse=true -l history_mode="${HISTORY_MODE}")" ]; then - break - fi - done -done - -printf "%s Snapshot finished!\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - -SNAPSHOTS=$(kubectl get volumesnapshots -o jsonpath='{.items[?(.status.readyToUse==true)].metadata.name}' -l history_mode="${HISTORY_MODE}") -NEWEST_SNAPSHOT=${SNAPSHOTS##* } - -printf "%s Latest snapshot is %s.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" "${NEWEST_SNAPSHOT}" - -printf "%s Creating scratch volume for artifact processing...\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - -# Set namespace for both "${HISTORY_MODE}"-snapshot-cache-volume -NAMESPACE="${NAMESPACE}" yq e -i '.metadata.namespace=strenv(NAMESPACE)' scratchVolume.yaml - -# Set storage class for sratch volume yaml -STORAGE_CLASS="${STORAGE_CLASS}" yq e -i '.spec.storageClassName=strenv(STORAGE_CLASS)' scratchVolume.yaml - -sleep 5 - -# Create "${HISTORY_MODE}"-snapshot-cache-volume -printf "%s Creating PVC ${HISTORY_MODE}-snapshot-cache-volume.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" -NAME="${HISTORY_MODE}-snapshot-cache-volume" yq e -i '.metadata.name=strenv(NAME)' scratchVolume.yaml -if ! kubectl apply -f scratchVolume.yaml -then - printf "%s Error creating persistentVolumeClaim or persistentVolume.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - exit 1 -fi - -printf "%s PVC %s created.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" "${HISTORY_MODE}-snapshot-cache-volume" - - -if [ "${HISTORY_MODE}" = rolling ]; then - sleep 5 - # Create rolling-tarball-restore - printf "%s Creating PVC rolling-tarball-restore..\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - NAME="rolling-tarball-restore" yq e -i '.metadata.name=strenv(NAME)' scratchVolume.yaml - if ! kubectl apply -f scratchVolume.yaml - then - printf "%s Error creating persistentVolumeClaim or persistentVolume.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - exit 1 - fi - printf "%s PVC %s created.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" "rolling-tarball-restore" -fi - -## Snapshot volume namespace -NAMESPACE="${NAMESPACE}" yq e -i '.metadata.namespace=strenv(NAMESPACE)' volumeFromSnap.yaml - -# Set storageclass for restored volume -STORAGE_CLASS="${STORAGE_CLASS}" yq e -i '.spec.storageClassName=strenv(STORAGE_CLASS)' volumeFromSnap.yaml - -## Snapshot volume name -VOLUME_NAME="${HISTORY_MODE}-snap-volume" -VOLUME_NAME="${VOLUME_NAME}" yq e -i '.metadata.name=strenv(VOLUME_NAME)' volumeFromSnap.yaml - -# Point snapshot PVC at snapshot -NEWEST_SNAPSHOT="${NEWEST_SNAPSHOT}" yq e -i '.spec.dataSource.name=strenv(NEWEST_SNAPSHOT)' volumeFromSnap.yaml - -printf "%s Calculating needed snapshot restore volume size.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" -# Set size of snap volume to snapshot size plus 20% rounded up. -printf "%s Newest snapshot is %s.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" "${NEWEST_SNAPSHOT}" -SNAPSHOT_CONTENT=$(kubectl get volumesnapshot -n "${NAMESPACE}" "${NEWEST_SNAPSHOT}" -o jsonpath='{.status.boundVolumeSnapshotContentName}') -printf "%s Volumesnapshotcontent for %s is %s.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" "${NEWEST_SNAPSHOT}" "${SNAPSHOT_CONTENT}" -EBS_SNAPSHOT_RESTORE_SIZE=$(kubectl get volumesnapshotcontent "${SNAPSHOT_CONTENT}" -o jsonpath='{.status.restoreSize}') -printf "%s EBS Snapshot Restore Size is %s in bytes.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" "${EBS_SNAPSHOT_RESTORE_SIZE}" - -printf "%s EBS Snapshot size is %s.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" "$(echo "${EBS_SNAPSHOT_RESTORE_SIZE}" | awk '{print $1/1024/1024/1024 "GB"}')" - -# size in bytes | + 20% | to GB | rounded up -RESTORE_VOLUME_SIZE=$(echo "${EBS_SNAPSHOT_RESTORE_SIZE}" | awk '{print $1*1.2}' | awk '{print $1/1024/1024/1024}' | awk '{print ($0-int($0)>0)?int($0)+1:int($0)}') - -printf "%s We're rounding up and adding 20%% , volume size will be %sGB.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" "${RESTORE_VOLUME_SIZE}" - -RESTORE_VOLUME_SIZE="${RESTORE_VOLUME_SIZE}Gi" yq e -i '.spec.resources.requests.storage=strenv(RESTORE_VOLUME_SIZE)' volumeFromSnap.yaml - -sleep 5 - -printf "%s Creating volume from snapshot ${NEWEST_SNAPSHOT}.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" -if ! kubectl apply -f volumeFromSnap.yaml -then - printf "%s Error creating persistentVolumeClaim or persistentVolume.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - exit 1 -fi - -sleep 5 - -# Delete all volumesnapshots so they arent setting around accruing charges -kubectl delete vs -l history_mode=$HISTORY_MODE - -# TODO Check for PVC -printf "%s PersistentVolumeClaim ${HISTORY_MODE}-snap-volume created successfully in namespace ${NAMESPACE}.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - -# set history mode for tezos init container -HISTORY_MODE="${HISTORY_MODE}" yq e -i '.spec.template.spec.initContainers[0].env[0].value=strenv(HISTORY_MODE)' mainJob.yaml - -# set pvc name for tezos init container -PVC="${HISTORY_MODE}-snapshot-cache-volume" -MOUNT_PATH="/${PVC}" -MOUNT_PATH="${MOUNT_PATH}" yq e -i '.spec.template.spec.initContainers[0].volumeMounts[1].mountPath=strenv(MOUNT_PATH)' mainJob.yaml -PVC="${PVC}" yq e -i '.spec.template.spec.initContainers[0].volumeMounts[1].name=strenv(PVC)' mainJob.yaml - -# set history mode for rolling snapshot container -HISTORY_MODE="${HISTORY_MODE}" yq e -i '.spec.template.spec.containers[0].env[0].value=strenv(HISTORY_MODE)' mainJob.yaml - -# set pvc name for rolling snapshot container -MOUNT_PATH="${MOUNT_PATH}" yq e -i '.spec.template.spec.containers[0].volumeMounts[1].mountPath=strenv(MOUNT_PATH)' mainJob.yaml -PVC="${PVC}" yq e -i '.spec.template.spec.containers[0].volumeMounts[1].name=strenv(PVC)' mainJob.yaml - -# set history mode for zip and upload container -HISTORY_MODE="${HISTORY_MODE}" yq e -i '.spec.template.spec.containers[1].env[0].value=strenv(HISTORY_MODE)' mainJob.yaml - -# set pvc for zip and upload -MOUNT_PATH="${MOUNT_PATH}" yq e -i '.spec.template.spec.containers[1].volumeMounts[1].mountPath=strenv(MOUNT_PATH)' mainJob.yaml -PVC="${PVC}" yq e -i '.spec.template.spec.containers[1].volumeMounts[1].name=strenv(PVC)' mainJob.yaml - -# Set new PVC Name in snapshotting job -VOLUME_NAME="${VOLUME_NAME}" yq e -i '.spec.template.spec.volumes[0].persistentVolumeClaim.claimName=strenv(VOLUME_NAME)' mainJob.yaml - -# Set image name for zip and upload -IMAGE_NAME="${IMAGE_NAME}" yq e -i '.spec.template.spec.containers[1].image=strenv(IMAGE_NAME)' mainJob.yaml - -## Zip job namespace -NAMESPACE="${NAMESPACE}" yq e -i '.metadata.namespace=strenv(NAMESPACE)' mainJob.yaml - -# name per node type -ZIP_AND_UPLOAD_JOB_NAME="${ZIP_AND_UPLOAD_JOB_NAME}" yq e -i '.metadata.name=strenv(ZIP_AND_UPLOAD_JOB_NAME)' mainJob.yaml - -# Tezos image gets set in values.yaml in base of submod .images.octez -TEZOS_IMAGE="${TEZOS_IMAGE}" yq e -i '.spec.template.spec.initContainers[0].image=strenv(TEZOS_IMAGE)' mainJob.yaml -TEZOS_IMAGE="${TEZOS_IMAGE}" yq e -i '.spec.template.spec.containers[0].image=strenv(TEZOS_IMAGE)' mainJob.yaml - -# target pvc for artifact processing for entire job -VOLUME_NAME="${VOLUME_NAME}" yq e -i '.spec.template.spec.volumes[0].persistentVolumeClaim.claimName=strenv(VOLUME_NAME)' mainJob.yaml -PVC="${PVC}" yq e -i '.spec.template.spec.volumes[1].persistentVolumeClaim.claimName=strenv(PVC)' mainJob.yaml -PVC="${PVC}" yq e -i '.spec.template.spec.volumes[1].name=strenv(PVC)' mainJob.yaml - -# Gets rid of rolling job-related containers and volume/mounts. -if [ "${HISTORY_MODE}" = archive ]; then - # Removes create-tezos-rolling-snapshot container from entire job - yq eval -i 'del(.spec.template.spec.containers[0])' mainJob.yaml - # Removes rolling-tarball-restore volume from entire job (second to last volume) - yq eval -i 'del(.spec.template.spec.volumes[2])' mainJob.yaml - # Removes rolling-tarball-restore volumeMount from zip-and-upload container (second to last volume mount) - yq eval -i "del(.spec.template.spec.containers[0].volumeMounts[2])" mainJob.yaml -fi - -# Service account to be used by entire zip-and-upload job. -SERVICE_ACCOUNT="${SERVICE_ACCOUNT}" yq e -i '.spec.template.spec.serviceAccountName=strenv(SERVICE_ACCOUNT)' mainJob.yaml - -sleep 10 - -# Trigger subsequent filesystem inits, snapshots, tarballs, and uploads. -if ! kubectl apply -f mainJob.yaml -then - printf "%s Error creating Zip-and-upload job.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - exit 1 -fi - -sleep 20 - -# Wait for snapshotting job to complete -while [ "$(kubectl get jobs "zip-and-upload-${HISTORY_MODE}" --namespace "${NAMESPACE}" -o jsonpath='{.status.conditions[?(@.type=="Complete")].status}')" != "True" ]; do - printf "%s Waiting for zip-and-upload job to complete.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - while [ "$(kubectl get jobs "zip-and-upload-${HISTORY_MODE}" --namespace "${NAMESPACE}" -o jsonpath='{.status.conditions[?(@.type=="Complete")].status}')" != "True" ]; do - sleep 2m # without sleep, this loop is a "busy wait". this sleep vastly reduces CPU usage while we wait for job - if [ "$(kubectl get pod -l job-name=zip-and-upload-"${HISTORY_MODE}" --namespace="${NAMESPACE}"| grep -i -e error -e evicted -e pending)" ] || \ - [ "$(kubectl get jobs "zip-and-upload-${HISTORY_MODE}" --namespace="${NAMESPACE}" -o jsonpath='{.status.conditions[?(@.type=="Failed")].type}')" ] ; then - printf "%s Zip-and-upload job failed. This job will end and a new snapshot will be taken.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - break 2 - fi - if ! [ "$(kubectl get jobs "zip-and-upload-${HISTORY_MODE}" --namespace "${NAMESPACE}" -o jsonpath='{.status.conditions[?(@.type=="Complete")].status}')" != "True" ]; then - break - fi - done -done - -if ! [ "$(kubectl get jobs "zip-and-upload-${HISTORY_MODE}" --namespace "${NAMESPACE}" -o jsonpath='{.status.conditions[?(@.type=="Complete")].status}')" != "True" ]; then - printf "%s Zip-and-upload job completed successfully.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - - # Delete zip-and-upload job - if kubectl get job "${ZIP_AND_UPLOAD_JOB_NAME}"; then - printf "%s Old zip-and-upload job exits. Attempting to delete.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - if ! kubectl delete jobs "${ZIP_AND_UPLOAD_JOB_NAME}"; then - printf "%s Error deleting zip-and-upload job.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - exit 1 - fi - printf "%s Old zip-and-upload job deleted.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - else - printf "%s No old zip-and-upload job detected for cleanup.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - fi - - # Delete old PVCs - if [ "${HISTORY_MODE}" = rolling ]; then - if [ "$(kubectl get pvc rolling-tarball-restore)" ]; then - printf "%s PVC Exists.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - sleep 5 - kubectl delete pvc rolling-tarball-restore - sleep 5 - fi - fi - - if [ "$(kubectl get pvc "${HISTORY_MODE}"-snapshot-cache-volume)" ]; then - printf "%s PVC Exists.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - sleep 5 - kubectl delete pvc "${HISTORY_MODE}"-snapshot-cache-volume - sleep 5 - fi - - if [ "$(kubectl get pvc "${HISTORY_MODE}"-snap-volume)" ]; then - printf "%s PVC Exists.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - sleep 5 - kubectl delete pvc "${HISTORY_MODE}"-snap-volume - sleep 5 - fi - - # Delete all volumesnapshots so they arent setting around accruing charges - kubectl delete vs -l history_mode=$HISTORY_MODE - -fi - -printf "%s Deleting temporary snapshot volume.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" -sleep 5 -kubectl delete -f volumeFromSnap.yaml | while IFS= read -r line; do printf '%s %s\n' "$(date "+%Y-%m-%d %H:%M:%S" "$@")" "$line"; done -sleep 5 -kubectl delete job snapshot-maker --namespace "${NAMESPACE}" - diff --git a/snapshotEngine/snapshot-scheduler.sh b/snapshotEngine/snapshot-scheduler.sh deleted file mode 100755 index d8710df..0000000 --- a/snapshotEngine/snapshot-scheduler.sh +++ /dev/null @@ -1,70 +0,0 @@ -#!/bin/sh - -cd / - -## Snapshot Namespace -NAMESPACE="${NAMESPACE}" yq e -i '.metadata.namespace=strenv(NAMESPACE)' snapshotMakerJob.yaml - -SERVICE_ACCOUNT="${SERVICE_ACCOUNT}" yq e -i '.spec.template.spec.serviceAccountName=strenv(SERVICE_ACCOUNT)' snapshotMakerJob.yaml - -#Snapshot-maker image set -IMAGE_NAME="${IMAGE_NAME}" yq e -i '.spec.template.spec.containers[0].image=strenv(IMAGE_NAME)' snapshotMakerJob.yaml - -#History mode for maker job -HISTORY_MODE="${HISTORY_MODE}" yq e -i '.spec.template.spec.containers[0].env[0].value=strenv(HISTORY_MODE)' snapshotMakerJob.yaml - -# Job for each node type -JOB_NAME=snapshot-maker-"${HISTORY_MODE}"-node -JOB_NAME="${JOB_NAME}" yq e -i '.metadata.name=strenv(JOB_NAME)' snapshotMakerJob.yaml - -while true; do - - # Pause if nodes are not ready - until [ "$(kubectl get pods -n "${NAMESPACE}" -o 'jsonpath={..status.conditions[?(@.type=="Ready")].status}' -l appType=octez-node -l node_class_history_mode="${HISTORY_MODE}")" = "True" ]; do - printf "%s Tezos node is not ready for snapshot. Check node pod logs. \n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - until [ "$(kubectl get pods -n "${NAMESPACE}" -o 'jsonpath={..status.conditions[?(@.type=="Ready")].status}' -l appType=octez-node -l node_class_history_mode="${HISTORY_MODE}")" = "True" ]; do - sleep 1m # without sleep, this loop is a "busy wait". this sleep vastly reduces CPU usage while we wait for node - if [ "$(kubectl get pods -n "${NAMESPACE}" -o 'jsonpath={..status.conditions[?(@.type=="Ready")].status}' -l appType=octez-node -l node_class_history_mode="${HISTORY_MODE}")" = "True" ]; then - break - fi - done - done - - # Job exists - if [ "$(kubectl get jobs "${JOB_NAME}" --namespace "${NAMESPACE}")" ]; then - printf "%s Snapshot-maker job exists.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - if [ "$(kubectl get jobs "${JOB_NAME}" --namespace "${NAMESPACE}" -o jsonpath='{.status.conditions[?(@.type=="Complete")].status}')" != "True" ]; then - printf "%s Snapshot-maker job not complete.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - if [ "$(kubectl get jobs "${JOB_NAME}" --namespace "${NAMESPACE}" -o jsonpath='{.status.conditions[?(@.type=="Failed")].status}')" = "True" ]; then - printf "%s Snapshot-maker job failed. Check Job pod logs for more information.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - exit 1 - fi - printf "%s Waiting for snapshot-maker job to complete.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - sleep 5 - if kubectl get pod -l job-name="${JOB_NAME}" --namespace="${NAMESPACE}"| grep -i -e error -e evicted; then - printf "%s Snapshot-maker job error. Deleting and starting new job.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - if ! kubectl delete jobs "${JOB_NAME}" --namespace "${NAMESPACE}"; then - printf "%s Error deleting snapshot-maker job. Check pod logs.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - exit 1 - fi - fi - fi - else - printf "%s Snapshot-maker job does not exist.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - # If PVC exists bound with no jobs running delete the PVC - if [ "$(kubectl get pvc "${NAMESPACE}"-snap-volume -o 'jsonpath={..status.phase}' --namespace "${NAMESPACE}")" = "Bound" ]; then - printf "%s PVC Exists.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - if [ "$(kubectl get jobs --namespace "${NAMESPACE}" -o jsonpath='{.status.conditions[?(@.type=="Complete")].status}')" != "True" ] \ - && [ "$(kubectl get jobs --namespace "${NAMESPACE}" -o jsonpath='{.status.conditions[?(@.type=="Complete")].status}')" != "True" ]; then - printf "%s No jobs are running. Deleting PVC.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - kubectl delete pvc "${HISTORY_MODE}"-snap-volume --namespace "${NAMESPACE}" - sleep 5 - fi - fi - printf "%s Ready for new snapshot-maker job. Triggering job now.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - if ! kubectl apply -f snapshotMakerJob.yaml; then - printf "%s Error creating snapshot-maker job. Check pod logs for more information.\n" "$(date "+%Y-%m-%d %H:%M:%S" "$@")" - fi - sleep 5 - fi -done \ No newline at end of file diff --git a/snapshotEngine/snapshot-website-base/Gemfile b/snapshotEngine/snapshot-website-base/Gemfile deleted file mode 100644 index 56915eb..0000000 --- a/snapshotEngine/snapshot-website-base/Gemfile +++ /dev/null @@ -1,13 +0,0 @@ -# frozen_string_literal: true - -source "https://rubygems.org" - -git_source(:github) {|repo_name| "https://github.com/#{repo_name}" } - -# gem "rails" - -gem "jekyll", "~> 4.2.2" - -gem "webrick", "~> 1.7" - -gem "jekyll-datapage-generator" \ No newline at end of file diff --git a/snapshotEngine/snapshot-website-base/Gemfile.lock b/snapshotEngine/snapshot-website-base/Gemfile.lock deleted file mode 100644 index 05803c6..0000000 --- a/snapshotEngine/snapshot-website-base/Gemfile.lock +++ /dev/null @@ -1,71 +0,0 @@ -GEM - remote: https://rubygems.org/ - specs: - addressable (2.8.0) - public_suffix (>= 2.0.2, < 5.0) - colorator (1.1.0) - concurrent-ruby (1.1.10) - em-websocket (0.5.3) - eventmachine (>= 0.12.9) - http_parser.rb (~> 0) - eventmachine (1.2.7) - ffi (1.15.5) - forwardable-extended (2.6.0) - http_parser.rb (0.8.0) - i18n (1.12.0) - concurrent-ruby (~> 1.0) - jekyll (4.2.2) - addressable (~> 2.4) - colorator (~> 1.0) - em-websocket (~> 0.5) - i18n (~> 1.0) - jekyll-sass-converter (~> 2.0) - jekyll-watch (~> 2.0) - kramdown (~> 2.3) - kramdown-parser-gfm (~> 1.0) - liquid (~> 4.0) - mercenary (~> 0.4.0) - pathutil (~> 0.9) - rouge (~> 3.0) - safe_yaml (~> 1.0) - terminal-table (~> 2.0) - jekyll-datapage-generator (1.4.0) - jekyll-sass-converter (2.2.0) - sassc (> 2.0.1, < 3.0) - jekyll-watch (2.2.1) - listen (~> 3.0) - kramdown (2.4.0) - rexml - kramdown-parser-gfm (1.1.0) - kramdown (~> 2.0) - liquid (4.0.3) - listen (3.7.1) - rb-fsevent (~> 0.10, >= 0.10.3) - rb-inotify (~> 0.9, >= 0.9.10) - mercenary (0.4.0) - pathutil (0.16.2) - forwardable-extended (~> 2.6) - public_suffix (4.0.7) - rb-fsevent (0.11.1) - rb-inotify (0.10.1) - ffi (~> 1.0) - rexml (3.2.5) - rouge (3.29.0) - safe_yaml (1.0.5) - sassc (2.4.0) - ffi (~> 1.9) - terminal-table (2.0.0) - unicode-display_width (~> 1.1, >= 1.1.1) - unicode-display_width (1.8.0) - webrick (1.7.0) - -PLATFORMS - ruby - -DEPENDENCIES - jekyll (~> 4.2.2) - jekyll-datapage-generator - webrick (~> 1.7) - -BUNDLED WITH - 2.3.16 \ No newline at end of file diff --git a/snapshotEngine/snapshotMakerJob.yaml b/snapshotEngine/snapshotMakerJob.yaml deleted file mode 100644 index 0643906..0000000 --- a/snapshotEngine/snapshotMakerJob.yaml +++ /dev/null @@ -1,25 +0,0 @@ -apiVersion: batch/v1 -kind: Job -metadata: - name: snapshot-maker - namespace: "" -spec: - template: - metadata: - labels: - app: snapshot-maker - spec: - serviceAccountName: "" - containers: - - name: snapshot-maker - image: "" - imagePullPolicy: Always - args: - - "snapshot-maker" - env: - - name: HISTORY_MODE - value: "" - envFrom: - - configMapRef: - name: snapshot-configmap - restartPolicy: Never diff --git a/snapshotEngine/volumeFromSnap.yaml b/snapshotEngine/volumeFromSnap.yaml deleted file mode 100644 index 1f1017c..0000000 --- a/snapshotEngine/volumeFromSnap.yaml +++ /dev/null @@ -1,16 +0,0 @@ -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: "" - namespace: "" -spec: - storageClassName: do-block-storage - dataSource: - name: "" - kind: VolumeSnapshot - apiGroup: snapshot.storage.k8s.io - accessModes: - - ReadWriteOnce - resources: - requests: - storage: "" diff --git a/snapshotEngine/zip-and-upload.sh b/snapshotEngine/zip-and-upload.sh deleted file mode 100755 index 72a3327..0000000 --- a/snapshotEngine/zip-and-upload.sh +++ /dev/null @@ -1,575 +0,0 @@ -#!/bin/bash - -BLOCK_HEIGHT=$(cat /"${HISTORY_MODE}"-snapshot-cache-volume/BLOCK_HEIGHT) -BLOCK_HASH=$(cat /"${HISTORY_MODE}"-snapshot-cache-volume/BLOCK_HASH) -BLOCK_TIMESTAMP=$(cat /"${HISTORY_MODE}"-snapshot-cache-volume/BLOCK_TIMESTAMP) -#TEZOS_VERSION=$(cat /"${HISTORY_MODE}"-snapshot-cache-volume/TEZOS_VERSION) -NETWORK="${NAMESPACE%%-*}" -# Export for python -export S3_BUCKET="${NAMESPACE}" -TEZOS_RPC_VERSION_INFO="$(cat /"${HISTORY_MODE}"-snapshot-cache-volume/TEZOS_RPC_VERSION_INFO)" - -TEZOS_VERSION="$(echo "${TEZOS_RPC_VERSION_INFO}" | jq -r .version)" -TEZOS_VERSION_COMMIT_HASH="$(echo "${TEZOS_RPC_VERSION_INFO}" | jq -r .commit_info.commit_hash)" -TEZOS_VERSION_COMMIT_DATE="$(echo "${TEZOS_RPC_VERSION_INFO}" | jq -r .commit_info.commit_date)" - -# Needed for alternate cloud providers -AWS_S3_BUCKET="${NAMESPACE%-*}.${SNAPSHOT_WEBSITE_DOMAIN_NAME}" - -# Default to root, is overridden by below logic if CLOUD_PROVIDER is defined -# Used for redirect file that is always uploaded to AWS S3 -REDIRECT_ROOT="/" - -# CND Endpoint used for CDN URLs (different from command endpoint) -if [[ "${CLOUD_PROVIDER}" == "digitalocean" ]]; then - FQDN=${S3_BUCKET}.nyc3.digitaloceanspaces.com - URL="${FQDN}" - REDIRECT_ROOT="https://${FQDN}/" -else - URL="${S3_BUCKET}" -fi - -cd / - -# Sets aws command credentials depending on cloud provider -# Alias functionality outside of command blocks is not consistent -# so we opted for this. -# $1 is file name -set_aws_command_creds(){ - if [[ $1 == "aws" ]]; then - echo "AWS_ACCESS_KEY_ID=$(cat /aws-secrets/aws-access-id) \ - AWS_SECRET_ACCESS_KEY=$(cat /aws-secrets/aws-secret-key) \ - aws " - else - echo "AWS_ACCESS_KEY_ID=$(cat /do-secrets/do-spaces-access-id) \ - AWS_SECRET_ACCESS_KEY=$(cat /do-secrets/do-spaces-secret-key) \ - aws --endpoint-url https://nyc3.digitaloceanspaces.com " - fi -} - -# If block_height is not set than init container failed, exit this container -[[ -z "${BLOCK_HEIGHT}" ]] && exit 1 - -printf "%s BLOCK_HASH is...$(cat /"${HISTORY_MODE}"-snapshot-cache-volume/BLOCK_HASH))\n" "$(date "+%Y-%m-%d %H:%M:%S")" -printf "%s BLOCK_HEIGHT is...$(cat /"${HISTORY_MODE}"-snapshot-cache-volume/BLOCK_HEIGHT)\n" "$(date "+%Y-%m-%d %H:%M:%S")" -printf "%s BLOCK_TIMESTAMP is...$(cat /"${HISTORY_MODE}"-snapshot-cache-volume/BLOCK_TIMESTAMP)\n" "$(date "+%Y-%m-%d %H:%M:%S")" - -# -# Archive Tarball -# - -# Do not take archive tarball in rolling namespace -if [[ "${HISTORY_MODE}" = archive ]]; then - printf "%s ********************* Archive Tarball *********************\n" "$(date "+%Y-%m-%d %H:%M:%S")" - ARCHIVE_TARBALL_FILENAME=tezos-"${NETWORK}"-archive-tarball-"${BLOCK_HEIGHT}".lz4 - printf "%s Archive tarball filename is ${ARCHIVE_TARBALL_FILENAME}\n" "$(date "+%Y-%m-%d %H:%M:%S")" - - # If you upload a file bigger than 50GB, you have to do a multipart upload with a part size between 1 and 10000. - # Instead of guessing size, you can use expected-size which tells S3 how big the file is and it calculates the size for you. - # However if the file gets bigger than your expected size, the multipart upload fails because it uses a part size outside of the bounds (1-10000) - # This gets the old archive tarball size and then adds 10%. Archive tarballs dont seem to grow more than that. - if eval "$(set_aws_command_creds)" s3 ls s3://"${S3_BUCKET}" | grep archive-tarball-metadata; then #Use last file for expected size if it exists - EXPECTED_SIZE=$(curl -L http://"${S3_BUCKET}"/archive-tarball-metadata 2>/dev/null | jq -r '.filesize_bytes' | awk '{print $1*1.1}' | awk '{print ($0-int($0)>0)?int($0)+1:int($0)}') - else - EXPECTED_SIZE=1000000000000 #1000GB Arbitrary filesize for initial value. Only used if no archive-tarball-metadata exists. IE starting up test network - fi - - # LZ4 /var/tezos/node selectively and upload to S3 - printf "%s Archive Tarball : Tarballing /var/tezos/node, LZ4ing, and uploading to S3...\n" "$(date "+%Y-%m-%d %H:%M:%S")" - tar cvf - . \ - --exclude='node/data/identity.json' \ - --exclude='node/data/lock' \ - --exclude='node/data/peers.json' \ - --exclude='./lost+found' \ - -C /var/tezos \ - | lz4 | tee >(sha256sum | awk '{print $1}' > archive-tarball.sha256) \ - | eval "$(set_aws_command_creds) s3 cp - s3://${S3_BUCKET}/${ARCHIVE_TARBALL_FILENAME} --expected-size ${EXPECTED_SIZE} --acl public-read" - - SHA256=$(cat archive-tarball.sha256) - - FILESIZE_BYTES=$(eval "$(set_aws_command_creds)" s3api head-object \ - --bucket "${S3_BUCKET}" \ - --key "${ARCHIVE_TARBALL_FILENAME}" \ - --query ContentLength \ - --output text) - FILESIZE=$(echo "${FILESIZE_BYTES}" | awk '{ suffix="KMGT"; for(i=0; $1>1024 && i < length(suffix); i++) $1/=1024; print int($1) substr(suffix, i, 1), $3; }' | xargs) - - # Check if archive-tarball exists in S3 and process redirect - if ! eval "$(set_aws_command_creds)" s3api head-object --bucket "${S3_BUCKET}" --key "${ARCHIVE_TARBALL_FILENAME}" > /dev/null; then - printf "%s Archive Tarball : Error uploading ${ARCHIVE_TARBALL_FILENAME} to S3 Bucket ${S3_BUCKET}.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - else - printf "%s Archive Tarball : Upload of ${ARCHIVE_TARBALL_FILENAME} to S3 Bucket ${S3_BUCKET} successful!\n" "$(date "+%Y-%m-%d %H:%M:%S")" - - # Create archive tarball metadata json - jq -n \ - --arg BLOCK_HASH "${BLOCK_HASH}" \ - --arg BLOCK_HEIGHT "${BLOCK_HEIGHT}" \ - --arg BLOCK_TIMESTAMP "${BLOCK_TIMESTAMP}" \ - --arg ARCHIVE_TARBALL_FILENAME "${ARCHIVE_TARBALL_FILENAME}" \ - --arg URL "https://${URL}/${ARCHIVE_TARBALL_FILENAME}" \ - --arg SHA256 "${SHA256}" \ - --arg FILESIZE_BYTES "${FILESIZE_BYTES}" \ - --arg FILESIZE "${FILESIZE}" \ - --arg NETWORK "${NETWORK}" \ - --arg HISTORY_MODE "archive" \ - --arg ARTIFACT_TYPE "tarball" \ - --arg TEZOS_VERSION_COMMIT_HASH "${TEZOS_VERSION_COMMIT_HASH}" \ - --arg TEZOS_VERSION_COMMIT_DATE "${TEZOS_VERSION_COMMIT_DATE}" \ - '{ - "block_hash": $BLOCK_HASH, - "block_height": ($BLOCK_HEIGHT|fromjson), - "block_timestamp": $BLOCK_TIMESTAMP, - "filename": $ARCHIVE_TARBALL_FILENAME, - "url": $URL, - "sha256": $SHA256, - "filesize_bytes": ($FILESIZE_BYTES|fromjson), - "filesize": $FILESIZE, - "chain_name": $NETWORK, - "history_mode": $HISTORY_MODE, - "artifact_type": $ARTIFACT_TYPE, - "tezos_version": { - "implementation": "octez", - "version": "", - "commit_info": { - "commit_hash": $TEZOS_VERSION_COMMIT_HASH, - "commit_date": $TEZOS_VERSION_COMMIT_DATE - } - } - }' \ - > "${ARCHIVE_TARBALL_FILENAME}".json - - # Since version.additional_info will either be another object or "release" we just overwrite it from whatever we got above - # JQ has trouble inserting a key into a file this is the way we opted to insert it - tmp=$(mktemp) - jq --arg version "$TEZOS_VERSION" '.tezos_version.version = ($version|fromjson)' "${ARCHIVE_TARBALL_FILENAME}".json > "$tmp" && mv "$tmp" "${ARCHIVE_TARBALL_FILENAME}".json - - # Check metadata json exists - if [[ -s "${ARCHIVE_TARBALL_FILENAME}".json ]]; then - printf "%s Archive Tarball : ${ARCHIVE_TARBALL_FILENAME}.json created.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - - # Upload archive tarball metadata json - if ! eval "$(set_aws_command_creds)" s3 cp "${ARCHIVE_TARBALL_FILENAME}".json s3://"${S3_BUCKET}"/"${ARCHIVE_TARBALL_FILENAME}".json --acl public-read; then - printf "%s Archive Tarball : Error uploading ${ARCHIVE_TARBALL_FILENAME}.json to S3.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - else - printf "%s Archive Tarball : Artifact JSON ${ARCHIVE_TARBALL_FILENAME}.json uploaded to S3.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - fi - else - printf "%s Archive Tarball : Error creating ${ARCHIVE_TARBALL_FILENAME}.json.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - fi - - # Create archive tarball redirect file - if ! touch archive-tarball; then - printf "%s Archive Tarball : Error creating ${NETWORK}-archive-tarball file locally.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - else - printf "%s Archive Tarball : ${NETWORK}-archive-tarball created successfully.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - fi - - # Upload redirect file and set header for previously uploaded LZ4 File - if ! eval "$(set_aws_command_creds "aws")" s3 cp archive-tarball s3://"${AWS_S3_BUCKET}" --website-redirect "${REDIRECT_ROOT}${ARCHIVE_TARBALL_FILENAME}" --cache-control 'no-cache'; then - printf "%s Archive Tarball : Error uploading ${NETWORK}-archive-tarball. to S3\n" "$(date "+%Y-%m-%d %H:%M:%S")" - else - printf "%s Archive Tarball : Upload of ${NETWORK}-archive-tarball successful to S3.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - fi - - # Archive Tarball json redirect file - if ! touch archive-tarball-metadata; then - printf "%s Archive Tarball : Error creating ${NETWORK}-archive-tarball-metadata file locally.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - else - printf "%s Archive Tarball : Created ${NETWORK}-archive-tarball-metadata file locally.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - fi - - # Upload archive tarball json redirect file and set header for previously uploaded archive tarball json File - if ! eval "$(set_aws_command_creds "aws")" s3 cp archive-tarball-metadata s3://"${AWS_S3_BUCKET}" --website-redirect "${REDIRECT_ROOT}${ARCHIVE_TARBALL_FILENAME}".json --cache-control 'no-cache'; then - printf "%s archive Tarball : Error uploading ${NETWORK}-archive-tarball-metadata file to S3.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - else - printf "%s archive Tarball : Uploaded ${NETWORK}-archive-tarball-metadata file to S3.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - fi - fi -else - printf "%s Archive Tarball : Not creating archive tarball since this is a rolling job.\n" "$(date "+%Y-%m-%d %H:%M:%S")" -fi - -# Rolling artifacts for rolling history mode -if [ "${HISTORY_MODE}" = rolling ]; then - - # Rolling snapshot and tarball vars - ROLLING_SNAPSHOT_FILENAME="${NETWORK}"-"${BLOCK_HEIGHT}".rolling - ROLLING_SNAPSHOT=/"${HISTORY_MODE}"-snapshot-cache-volume/"${ROLLING_SNAPSHOT_FILENAME}" - ROLLING_TARBALL_FILENAME=tezos-"${NETWORK}"-rolling-tarball-"${BLOCK_HEIGHT}".lz4 - IMPORT_IN_PROGRESS=/rolling-tarball-restore/snapshot-import-in-progress - - # Wait for rolling snapshot file - until [ -f "${ROLLING_SNAPSHOT}" ]; do - printf "%s Waiting for ${ROLLING_SNAPSHOT} to exist...\n" "$(date "+%Y-%m-%d %H:%M:%S")" - until [ -f "${ROLLING_SNAPSHOT}" ]; do - if [ -f "${ROLLING_SNAPSHOT}" ];then - break - fi - done - done - - # Errors if above loop is broken out of but for some reason rolling snapshot doesnt exist - if [ -f "${ROLLING_SNAPSHOT}" ]; then - printf "%s ${ROLLING_SNAPSHOT} exists!\n" "$(date "+%Y-%m-%d %H:%M:%S")" - else - printf "%s ERROR ##### ${ROLLING_SNAPSHOT} does not exist!\n" "$(date "+%Y-%m-%d %H:%M:%S")" - sleep 10 - exit 1 - fi - - # Needs time in between checks. This is faster than the snapshot container can create and delete the import files - sleep 10s - - # Wait for rolling snapshot to import to temporary filesystem for tarball. - while [ -f "${IMPORT_IN_PROGRESS}" ]; do - printf "%s Waiting for snapshot to import...\n" "$(date "+%Y-%m-%d %H:%M:%S")" - while [ -f "${IMPORT_IN_PROGRESS}" ]; do - if ! [ -f "${IMPORT_IN_PROGRESS}" ]; then - break - fi - done - done - - # Errors if above loop is broken out of but for some reason import_in_progress_file still exists - if ! [ -f "${IMPORT_IN_PROGRESS}" ]; then - printf "%s Snapshot import finished!\n" "$(date "+%Y-%m-%d %H:%M:%S")" - else - printf "%s ERROR ##### Snapshot import did not finish!\n" "$(date "+%Y-%m-%d %H:%M:%S")" - sleep 10 - exit 1 - fi - - # LZ4 /"${HISTORY_MODE}"-snapshot-cache-volume/var/tezos/node selectively and upload to S3 - printf "%s ********************* Rolling Tarball *********************\\n" "$(date "+%Y-%m-%d %H:%M:%S")" - - # If you upload a file bigger than 50GB, you have to do a multipart upload with a part size between 1 and 10000. - # Instead of guessing size, you can use expected-size which tells S3 how big the file is and it calculates the size for you. - # However if the file gets bigger than your expected size, the multipart upload fails because it uses a part size outside of the bounds (1-10000) - # This gets the old rolling tarball size and then adds 10%. rolling tarballs dont seem to grow more than that. - printf "%s Rolling Tarball: Getting last rolling tarball filesize for multipart upload...\n" "$(date "+%Y-%m-%d %H:%M:%S")" - if eval "$(set_aws_command_creds "aws")" s3 ls s3://"${S3_BUCKET}" | grep rolling-tarball-metadata; then #Use last file for expected size if it exists - EXPECTED_SIZE=$(curl -L http://"${S3_BUCKET}"/rolling-tarball-metadata 2>/dev/null | jq -r '.filesize_bytes' | awk '{print $1*1.1}' | awk '{print ($0-int($0)>0)?int($0)+1:int($0)}') - printf "%s Rolling Tarball: Bucket has existing artifact metadata. \n" "$(date "+%Y-%m-%d %H:%M:%S")" - printf "%s Rolling Tarball: Expected size is - %s \n" "$(date "+%Y-%m-%d %H:%M:%S")" "${EXPECTED_SIZE}" - else - EXPECTED_SIZE=100000000000 #100GB Arbitrary filesize for initial value. Only used if no rolling-tarball-metadata exists. IE starting up test network - printf "%s Rolling Tarball: No existing rolling tarball metadata... \n" "$(date "+%Y-%m-%d %H:%M:%S")" - printf "%s Rolling Tarball: Expected size set arbitrarily to %s... \n" "$(date "+%Y-%m-%d %H:%M:%S")" "${EXPECTED_SIZE}" - fi - - printf "%s Rolling Tarball : Tarballing /rolling-tarball-restore/var/tezos/node, LZ4ing, and uploading to %s S3 bucket %s.\n" "$(date "+%Y-%m-%d %H:%M:%S")" "$([[ -n ${CLOUD_PROVIDER} ]] && echo ${CLOUD_PROVIDER} || echo aws)" "${S3_BUCKET}" - tar cvf - . \ - --exclude='node/data/identity.json' \ - --exclude='node/data/lock' \ - --exclude='node/data/peers.json' \ - --exclude='./lost+found' \ - -C /rolling-tarball-restore/var/tezos \ - | lz4 | tee >(sha256sum | awk '{print $1}' > rolling-tarball.sha256) \ - | eval "$(set_aws_command_creds) s3 cp - s3://${S3_BUCKET}/${ROLLING_TARBALL_FILENAME} --expected-size ${EXPECTED_SIZE} --acl public-read" - - - SHA256=$(cat rolling-tarball.sha256) - - FILESIZE_BYTES=$(eval "$(set_aws_command_creds)" s3api head-object \ - --bucket "${S3_BUCKET}" \ - --key "${ROLLING_TARBALL_FILENAME}" \ - --query ContentLength \ - --output text) - FILESIZE=$(echo "${FILESIZE_BYTES}" | awk '{ suffix="KMGT"; for(i=0; $1>1024 && i < length(suffix); i++) $1/=1024; print int($1) substr(suffix, i, 1), $3; }' | xargs) - - # Check if rolling-tarball exists and process redirect - if ! eval "$(set_aws_command_creds)" s3api head-object --bucket "${S3_BUCKET}" --key "${ROLLING_TARBALL_FILENAME}" > /dev/null; then - printf "%s Rolling Tarball : Error uploading ${ROLLING_TARBALL_FILENAME} to S3 Bucket ${S3_BUCKET}.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - else - printf "%s Rolling Tarball : Upload of ${ROLLING_TARBALL_FILENAME} to S3 Bucket ${S3_BUCKET} successful!\n" "$(date "+%Y-%m-%d %H:%M:%S")" - - jq -n \ - --arg BLOCK_HASH "$BLOCK_HASH" \ - --arg BLOCK_HEIGHT "$BLOCK_HEIGHT" \ - --arg BLOCK_TIMESTAMP "$BLOCK_TIMESTAMP" \ - --arg ROLLING_TARBALL_FILENAME "$ROLLING_TARBALL_FILENAME" \ - --arg URL "https://${URL}/${ROLLING_TARBALL_FILENAME}" \ - --arg SHA256 "$SHA256" \ - --arg FILESIZE_BYTES "$FILESIZE_BYTES" \ - --arg FILESIZE "$FILESIZE" \ - --arg NETWORK "$NETWORK" \ - --arg HISTORY_MODE "rolling" \ - --arg ARTIFACT_TYPE "tarball" \ - --arg TEZOS_VERSION_COMMIT_HASH "${TEZOS_VERSION_COMMIT_HASH}" \ - --arg TEZOS_VERSION_COMMIT_DATE "${TEZOS_VERSION_COMMIT_DATE}" \ - '{ - "block_hash": $BLOCK_HASH, - "block_height": ($BLOCK_HEIGHT|fromjson), - "block_timestamp": $BLOCK_TIMESTAMP, - "filename": $ROLLING_TARBALL_FILENAME, - "url": $URL, - "sha256": $SHA256, - "filesize_bytes": ($FILESIZE_BYTES|fromjson), - "filesize": $FILESIZE, - "chain_name": $NETWORK, - "history_mode": $HISTORY_MODE, - "artifact_type": $ARTIFACT_TYPE, - "tezos_version": { - "implementation": "octez", - "version": "", - "commit_info": { - "commit_hash": $TEZOS_VERSION_COMMIT_HASH, - "commit_date": $TEZOS_VERSION_COMMIT_DATE - } - } - }' \ - > "${ROLLING_TARBALL_FILENAME}".json - - # Since version.additional_info will either be another object or "release" we just overwrite it from whatever we got above - # JQ has trouble inserting a key into a file this is the way we opted to insert it - tmp=$(mktemp) - jq --arg version "$TEZOS_VERSION" '.tezos_version.version = ($version|fromjson)' "${ROLLING_TARBALL_FILENAME}".json > "$tmp" && mv "$tmp" "${ROLLING_TARBALL_FILENAME}".json - - # Check metadata exists - if [[ -s "${ROLLING_TARBALL_FILENAME}".json ]]; then - printf "%s Rolling Tarball : ${ROLLING_TARBALL_FILENAME}.json created.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - - # upload metadata json - if ! eval "$(set_aws_command_creds)" s3 cp "${ROLLING_TARBALL_FILENAME}".json s3://"${S3_BUCKET}"/"${ROLLING_TARBALL_FILENAME}".json --acl public-read; then - printf "%s Rolling Tarball : Error uploading ${ROLLING_TARBALL_FILENAME}.json to S3.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - else - printf "%s Rolling Tarball : Metadata JSON ${ROLLING_TARBALL_FILENAME}.json uploaded to S3.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - fi - else - printf "%s Rolling Tarball : Error creating ${ROLLING_TARBALL_FILENAME}.json locally.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - fi - - # Tarball redirect file - if ! touch rolling-tarball; then - printf "%s Rolling Tarball : Error creating ${NETWORK}-rolling-tarball file locally.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - else - printf "%s Rolling Tarball : Created ${NETWORK}-rolling-tarball file locally.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - fi - - # Upload redirect file and set header for previously uploaded LZ4 File - if ! eval "$(set_aws_command_creds "aws")" s3 cp rolling-tarball s3://"${AWS_S3_BUCKET}" --website-redirect "${REDIRECT_ROOT}${ROLLING_TARBALL_FILENAME}" --cache-control 'no-cache'; then - printf "%s Rolling Tarball : Error uploading ${NETWORK}-rolling-tarball file to S3.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - else - printf "%s Rolling Tarball : Uploaded ${NETWORK}-rolling-tarball file to S3.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - fi - - # Rolling Tarball json redirect file - if ! touch rolling-tarball-metadata; then - printf "%s Rolling Tarball : Error creating ${NETWORK}-rolling-tarball-metadata file locally.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - else - printf "%s Rolling Tarball : Created ${NETWORK}-rolling-tarball-metadata file locally.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - fi - - # Upload rolling tarball json redirect file and set header for previously uploaded rolling tarball json File - if ! eval "$(set_aws_command_creds "aws")" s3 cp rolling-tarball-metadata s3://"${AWS_S3_BUCKET}" --website-redirect "${REDIRECT_ROOT}${ROLLING_TARBALL_FILENAME}".json --cache-control 'no-cache'; then - printf "%s Rolling Tarball : Error uploading ${NETWORK}-rolling-tarball-metadata file to S3.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - else - printf "%s Rolling Tarball : Uploaded ${NETWORK}-rolling-tarball-metadata file to S3.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - fi - fi - - # - # Rolling Snapshot - # - printf "%s ********************* Rolling Tezos Snapshot *********************\\n" "$(date "+%Y-%m-%d %H:%M:%S")" - # If rolling snapshot exists locally - if test -f "${ROLLING_SNAPSHOT}"; then - printf "%s ${ROLLING_SNAPSHOT} exists!\n" "$(date "+%Y-%m-%d %H:%M:%S")" - - # Upload rolling snapshot to S3 and error on failure - if ! eval "$(set_aws_command_creds)" s3 cp "${ROLLING_SNAPSHOT}" s3://"${S3_BUCKET}" --acl public-read; then - printf "%s Rolling Tezos : Error uploading ${ROLLING_SNAPSHOT} to S3.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - else - printf "%s Rolling Tezos : Successfully uploaded ${ROLLING_SNAPSHOT} to S3.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - - printf "%s Rolling Tezos : Uploading redirect...\n" "$(date "+%Y-%m-%d %H:%M:%S")" - - FILESIZE_BYTES=$(stat -c %s "${ROLLING_SNAPSHOT}") - printf "FILESIZE_BYTES COMMAND=%s\n" "$(stat -c %s "${ROLLING_SNAPSHOT}")" - printf "FILESIZE_BYTES VARIABLE=%s\n" "${FILESIZE_BYTES}" - - FILESIZE=$(echo "${FILESIZE_BYTES}" | awk '{ suffix="KMGT"; for(i=0; $1>1024 && i < length(suffix); i++) $1/=1024; print int($1) substr(suffix, i, 1), $3; }' | xargs ) - SHA256=$(sha256sum "${ROLLING_SNAPSHOT}" | awk '{print $1}') - - TEZOS_VERSION_MAJOR="$(echo "${TEZOS_RPC_VERSION_INFO}" | jq .version.major)" - - if [[ $TEZOS_VERSION_MAJOR -lt 16 ]]; then - SNAPSHOT_VERSION=4 - else - SNAPSHOT_HEADER=$(cat /"${HISTORY_MODE}"-snapshot-cache-volume/SNAPSHOT_HEADER) - SNAPSHOT_VERSION="$(echo "${SNAPSHOT_HEADER}" | jq .snapshot_header.version)" - fi - - jq -n \ - --arg BLOCK_HASH "$BLOCK_HASH" \ - --arg BLOCK_HEIGHT "$BLOCK_HEIGHT" \ - --arg BLOCK_TIMESTAMP "$BLOCK_TIMESTAMP" \ - --arg ROLLING_SNAPSHOT_FILENAME "$ROLLING_SNAPSHOT_FILENAME" \ - --arg URL "https://${URL}/${ROLLING_SNAPSHOT_FILENAME}" \ - --arg SHA256 "$SHA256" \ - --arg FILESIZE_BYTES "$FILESIZE_BYTES" \ - --arg FILESIZE "$FILESIZE" \ - --arg NETWORK "$NETWORK" \ - --arg HISTORY_MODE "rolling" \ - --arg ARTIFACT_TYPE "tezos-snapshot" \ - --arg TEZOS_VERSION_COMMIT_HASH "${TEZOS_VERSION_COMMIT_HASH}" \ - --arg TEZOS_VERSION_COMMIT_DATE "${TEZOS_VERSION_COMMIT_DATE}" \ - --arg SNAPSHOT_VERSION "$SNAPSHOT_VERSION" \ - '{ - "block_hash": $BLOCK_HASH, - "block_height": ($BLOCK_HEIGHT|fromjson), - "block_timestamp": $BLOCK_TIMESTAMP, - "filename": $ROLLING_SNAPSHOT_FILENAME, - "url": $URL, - "sha256": $SHA256, - "filesize_bytes": ($FILESIZE_BYTES|fromjson), - "filesize": $FILESIZE, - "chain_name": $NETWORK, - "history_mode": $HISTORY_MODE, - "artifact_type": $ARTIFACT_TYPE, - "tezos_version":{ - "implementation": "octez", - "version": "", - "commit_info": { - "commit_hash": $TEZOS_VERSION_COMMIT_HASH, - "commit_date": $TEZOS_VERSION_COMMIT_DATE - } - }, - "snapshot_version": ($SNAPSHOT_VERSION|fromjson), - }' \ - > "${ROLLING_SNAPSHOT_FILENAME}".json - - # Since version.additional_info will either be another object or "release" we just overwrite it from whatever we got above - # JQ has trouble inserting a key into a file this is the way we opted to insert it - tmp=$(mktemp) - jq --arg version "$TEZOS_VERSION" '.tezos_version.version = ($version|fromjson)' "${ROLLING_SNAPSHOT_FILENAME}".json > "$tmp" && mv "$tmp" "${ROLLING_SNAPSHOT_FILENAME}".json - - # Check metadata json exists - if [[ -s "${ROLLING_SNAPSHOT_FILENAME}".json ]]; then - printf "%s Rolling Snapshot : ${ROLLING_SNAPSHOT_FILENAME}.json created.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - - # Upload Rolling Snapshot metadata json - if ! eval "$(set_aws_command_creds)" s3 cp "${ROLLING_SNAPSHOT_FILENAME}".json s3://"${S3_BUCKET}"/"${ROLLING_SNAPSHOT_FILENAME}".json --acl public-read; then - printf "%s Rolling Snapshot : Error uploading ${ROLLING_SNAPSHOT_FILENAME}.json to S3.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - else - printf "%s Rolling Snapshot : Artifact JSON ${ROLLING_SNAPSHOT_FILENAME}.json uploaded to S3.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - fi - else - printf "%s Rolling Snapshot : Error creating ${ROLLING_SNAPSHOT_FILENAME}.json.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - fi - - # Rolling snapshot redirect object - touch rolling - - # Upload rolling tezos snapshot redirect object - if ! eval "$(set_aws_command_creds "aws")" s3 cp rolling s3://"${AWS_S3_BUCKET}" --website-redirect "${REDIRECT_ROOT}${ROLLING_SNAPSHOT_FILENAME}" --cache-control 'no-cache'; then - printf "%s Rolling Tezos : Error uploading redirect object for ${ROLLING_SNAPSHOT} to S3.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - else - printf "%s Rolling Tezos : Successfully uploaded redirect object for ${ROLLING_SNAPSHOT} to S3.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - fi - - # Rolling snapshot json redirect file - if ! touch rolling-snapshot-metadata; then - printf "%s Rolling Snapshot : Error creating ${NETWORK}-rolling-snapshot-metadata file locally.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - else - printf "%s Rolling snapshot : Created ${NETWORK}-rolling-snapshot-metadata file locally.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - fi - - # Upload rolling snapshot json redirect file and set header for previously uploaded rolling snapshot json File - if ! eval "$(set_aws_command_creds "aws")" s3 cp rolling-snapshot-metadata s3://"${AWS_S3_BUCKET}" --website-redirect "${REDIRECT_ROOT}${ROLLING_SNAPSHOT_FILENAME}".json --cache-control 'no-cache'; then - printf "%s Rolling snapshot : Error uploading ${NETWORK}-rolling-snapshot-metadata file to S3.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - else - printf "%s Rolling snapshot : Uploaded ${NETWORK}-rolling-snapshot-metadata file to S3.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - fi - fi - else - printf "%s Rolling Tezos : ${ROLLING_SNAPSHOT} does not exist. Not uploading. \n" "$(date "+%Y-%m-%d %H:%M:%S")" - fi -else - printf "%s Skipping rolling snapshot import and export because this is an archive job. \n" "$(date "+%Y-%m-%d %H:%M:%S")" -fi - -if [[ -n "${SNAPSHOT_WEBSITE_DOMAIN_NAME}" ]]; then - - # Network bucket redirect - # Redirects from network.website.com to website.com/network - touch index.html - if ! eval "$(set_aws_command_creds "aws")" s3 cp index.html s3://"${AWS_S3_BUCKET}" --website-redirect https://"${SNAPSHOT_WEBSITE_DOMAIN_NAME}"/"${NETWORK}" --cache-control 'no-cache'; then - printf "%s ERROR ##### Could not upload network site redirect.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - else - printf "%s Successfully uploaded network site redirect.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - fi - - # Need to be in this dir for jekyll to run. - # Container-specific requirement - cd /srv/jekyll || exit - - # Copy Gemfile and Gemfile.lock to current dir - cp /snapshot-website-base/* . - - # Remote theme does not work - # Using git instead - REPO="${JEKYLL_REMOTE_THEME_REPOSITORY%@*}" - BRANCH="${JEKYLL_REMOTE_THEME_REPOSITORY#*@}" - LOCAL_DIR=monosite - git clone https://github.com/"${REPO}".git --branch "${BRANCH}" "${LOCAL_DIR}" - cp -r "${LOCAL_DIR}"/* . - rm -rf "${LOCAL_DIR}" - - # Create new base.json locally - touch base.json - echo '[]' > "base.json" - - printf "%s Building base.json... this may take a while.\n" "$(date "+%Y-%m-%d %H:%M:%S")" - eval "$(set_aws_command_creds)" s3 ls s3://"${S3_BUCKET}" | grep '\.json'| sort | awk '{print $4}' | awk -F '\\\\n' '{print $1}' | tr ' ' '\n' | grep -v -e base.json -e tezos-snapshots.json | while read ITEM; do - tmp=$(mktemp) && cp base.json "${tmp}" && jq --argjson file "$(curl -s https://"${FQDN}"/$ITEM)" '. += [$file]' "${tmp}" > base.json - done - - #Upload base.json - if ! eval "$(set_aws_command_creds)" s3 cp base.json s3://"${S3_BUCKET}"/base.json --acl public-read; then - printf "%s Upload base.json : Error uploading file base.json to S3. \n" "$(date "+%Y-%m-%d %H:%M:%S")" - else - printf "%s Upload base.json : File base.json successfully uploaded to S3. \n" "$(date "+%Y-%m-%d %H:%M:%S")" - fi - - # Create snapshot.json - # List of all snapshot metadata across all subdomains - # build site pages - python /getAllSnapshotMetadata.py - - # Fail if python raised exception (validation failure) - ret=$? - if [[ "${ret}" -ne 0 ]]; then - printf "%s Metadata did not validate sucessfully. Exiting... \n" "$(date "+%Y-%m-%d %H:%M:%S")" - sleep 20 - exit 1 - fi - - # Check if tezos-snapshots.json exists - # tezos-snapshots.json is a list of all snapshots in all buckets - if [[ ! -f tezos-snapshots.json ]]; then - printf "%s ERROR tezos-snapshots.json does not exist locally. \n" "$(date "+%Y-%m-%d %H:%M:%S")" - sleep 5 - exit 1 - fi - - # Upload tezos-snapshots.json - if ! eval "$(set_aws_command_creds "aws")" s3 cp tezos-snapshots.json s3://"${SNAPSHOT_WEBSITE_DOMAIN_NAME}"/tezos-snapshots.json; then - printf "%s Upload tezos-snapshots.json : Error uploading file tezos-snapshots.json to S3. \n" "$(date "+%Y-%m-%d %H:%M:%S")" - else - printf "%s Upload tezos-snapshots.json : File tezos-snapshots.json successfully uploaded to S3. \n" "$(date "+%Y-%m-%d %H:%M:%S")" - fi - - # Separate python for web page build - # Needs tezos-snapshots.json to exist before pages are built - python /getLatestSnapshotMetadata.py - - # Generate HTML from markdown and metadata - chown -R jekyll:jekyll ./* - bundle exec jekyll build --quiet - - # Upload chain page (index.html and assets) to root of website bucket - eval "$(set_aws_command_creds "aws")" s3 cp _site/ s3://"${SNAPSHOT_WEBSITE_DOMAIN_NAME}" --recursive | grep "*" - - exit 0 -fi \ No newline at end of file diff --git a/utils/Dockerfile b/utils/Dockerfile index 84e5c12..082a297 100644 --- a/utils/Dockerfile +++ b/utils/Dockerfile @@ -11,15 +11,15 @@ ENV PYTHONUNBUFFERED=1 # We re-install binutils at the end because Python execve(2)s ld(1) to # load zeromq. -RUN PIP="pip --no-cache install" \ +RUN PIP_INST="pip --no-cache install" \ APK_ADD="apk add --no-cache"; \ $APK_ADD --virtual .build-deps gcc python3-dev \ libffi-dev musl-dev make \ && $APK_ADD libsodium-dev libsecp256k1-dev gmp-dev \ && $APK_ADD zeromq-dev findmnt \ - && $PIP install base58 pynacl \ - && $PIP install mnemonic pytezos requests \ - && $PIP install pyblake2 pysodium flask \ + && $PIP_INST base58 pynacl \ + && $PIP_INST mnemonic pytezos requests \ + && $PIP_INST pyblake2 pysodium flask \ && apk del .build-deps \ && $APK_ADD jq netcat-openbsd curl binutils \ && $APK_ADD lz4