From 28063f2e964db965c375129a4f63655256f2d9af Mon Sep 17 00:00:00 2001 From: Michael Sutton Date: Sun, 20 Aug 2023 17:11:55 +0300 Subject: [PATCH] showcase logic for estimating a timestamp from DAA score --- src/store.py | 19 +++ src/tx_timestamp_estimation.ipynb | 187 ++++++++++++++++++++++++++++++ 2 files changed, 206 insertions(+) create mode 100644 src/tx_timestamp_estimation.ipynb diff --git a/src/store.py b/src/store.py index b0bee1a..fe9c84b 100644 --- a/src/store.py +++ b/src/store.py @@ -24,6 +24,9 @@ block_status_store = b'block-statuses' utxo_diff_store = b'utxo-diffs' utxo_diff_child_store = b'utxo-diff-children' +highest_chain_block_index = b'highest-chain-block-index' +chain_block_hash_by_index = b'chain-block-hash-by-index' +chain_block_index_by_hash = b'chain-block-index-by-hash' class Block: @@ -166,6 +169,22 @@ def get_raw_block(self, block_hash): b.ParseFromString(block_bytes) return b + def get_highest_chain_block_index(self): + index_bytes = self.db.get(self.prefix + sep + highest_chain_block_index) + index = int.from_bytes(index_bytes, 'little') + return index + + def get_chain_block_hash_by_index(self, index): + hash_bytes = self.db.get(self.prefix + sep + chain_block_hash_by_index + sep + + index.to_bytes(8, 'big')) + return hash_bytes + + def get_chain_block_index_by_hash(self, block_hash): + index_bytes = self.db.get(self.prefix + sep + chain_block_index_by_hash + sep + + block_hash) + index = int.from_bytes(index_bytes, 'little') + return index + def get_header_data(self, block_hash): if block_hash in self.headers: return self.headers[block_hash] diff --git a/src/tx_timestamp_estimation.ipynb b/src/tx_timestamp_estimation.ipynb new file mode 100644 index 0000000..b192199 --- /dev/null +++ b/src/tx_timestamp_estimation.ipynb @@ -0,0 +1,187 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "4cdceabc-1629-49e1-9e78-404baa02355c", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import datetime\n", + "import numpy as np\n", + "import pandas as pd\n", + "from store import *" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "6a8f210d-c69b-4b62-9970-fe7ea9f31ff6", + "metadata": {}, + "outputs": [], + "source": [ + "# Can be downloaded from https://mega.nz/file/rOJmhLIR#5j7wko32Mh0MlsQnC9yVG6jCvPql7Isqcyvgh3kmxKk\n", + "# See cell one before the last for avoiding this download\n", + "pre_checkpoint_store = Store(r'/home/pool/data/kaspa-data-22-11-21-correct-utxo-commit')\n", + "# The current node's datadir\n", + "current_store = Store(r'/home/pool/.kaspad/kaspa-mainnet/datadir2')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "c661c66d-34a9-43d0-ae43-106d0d39dd09", + "metadata": {}, + "outputs": [], + "source": [ + "'''\n", + "Builds a sample of headers throughout the history\n", + "'''\n", + "def build_header_samples(store):\n", + " # We first read the historic pruning point list. This gives\n", + " # us a per pruning-point period sample throughout history (~ 1 or 2 a day)\n", + " samples = store.pruning_points_chain()\n", + " # Pruning points are returned from later to earlier so we reverse\n", + " samples.reverse()\n", + " # Get the chain index of the last block (aka the current pruning point)\n", + " low = store.get_chain_block_index_by_hash(samples[-1])\n", + " # Get the index of the highest chain block (aka the sink/virtual-selected-parent)\n", + " high = store.get_highest_chain_block_index()\n", + " # Add a few more samples from recent data\n", + " for i in range(low + (high - low) // 3, high + 1, (high - low) // 3):\n", + " samples.append(store.get_chain_block_hash_by_index(i))\n", + " return [store.get_raw_header(h) for h in samples]" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "51c41329-fada-4be4-9341-9e2ac4f04e48", + "metadata": {}, + "outputs": [], + "source": [ + "'''\n", + "Estimate timestamp from DAA score by interpolating over the samples\n", + "'''\n", + "def estimate_timestamp(samples, daa_score):\n", + " if daa_score < samples[0].daaScore or daa_score > samples[-1].daaScore:\n", + " raise '{} is out of range'.format(daa_score)\n", + " # Optimization: this search can be done with a binary search \n", + " # since DAA score is monotonically increasing over the chain\n", + " for i in range(len(samples) - 1):\n", + " current, next = samples[i], samples[i+1]\n", + " if daa_score >= current.daaScore and daa_score < next.daaScore:\n", + " frac = (daa_score - current.daaScore) / (next.daaScore - current.daaScore)\n", + " interpolated_timestamp = int(current.timeInMilliseconds + \n", + " (next.timeInMilliseconds - current.timeInMilliseconds) * frac)\n", + " return datetime.fromtimestamp(interpolated_timestamp // 1000)\n", + " raise 'unreachable'" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "4c321877-9dfc-4cb6-b59a-50dbdae1bed2", + "metadata": {}, + "outputs": [], + "source": [ + "pre_checkpoint_samples = build_header_samples(pre_checkpoint_store)\n", + "current_samples = build_header_samples(current_store)\n", + "# Combine samples from both network phases\n", + "samples = pre_checkpoint_samples + current_samples" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "ff2c73bb-daa2-4cbe-a549-2a599cb45d80", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(datetime.datetime(2021, 11, 7, 17, 27, 23),\n", + " datetime.datetime(2023, 8, 9, 13, 36, 32),\n", + " datetime.datetime(2023, 8, 9, 13, 36, 16))" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "estimate_timestamp(samples, 55), estimate_timestamp(samples, 55137666), estimate_timestamp(samples, 55137650)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "ae10932b-add7-4ea9-948a-b1eca5ce39cd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 1636298787842\n", + "87133 1636386662010\n", + "176797 1636473700804\n", + "264837 1636560706885\n", + "355974 1636650005662\n", + "445152 1636737841327\n", + "536709 1636828600930\n", + "624635 1636912614350\n", + "712234 1636999362832\n", + "801831 1637088292662\n", + "890716 1637174890675\n", + "978396 1637260956454\n", + "1068387 1637349078269\n", + "1139626 1637418723538\n", + "1218320 1637495941516\n", + "1312860 1637609671037\n" + ] + } + ], + "source": [ + "# Print the (DAA score, timestamp) tuples from the pre-halt store.\n", + "# Since these values are fixed, one can simply use this list\n", + "for header in pre_checkpoint_samples:\n", + " print(header.daaScore, header.timeInMilliseconds)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d9daf3d6-6366-4a5b-9434-a01374f6b8dc", + "metadata": {}, + "outputs": [], + "source": [ + "# Close the opened resources\n", + "pre_checkpoint_store.close()\n", + "current_store.close()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}