From 28063f2e964db965c375129a4f63655256f2d9af Mon Sep 17 00:00:00 2001
From: Michael Sutton <msutton@cs.huji.ac.il>
Date: Sun, 20 Aug 2023 17:11:55 +0300
Subject: [PATCH] showcase logic for estimating a timestamp from DAA score

---
 src/store.py                      |  19 +++
 src/tx_timestamp_estimation.ipynb | 187 ++++++++++++++++++++++++++++++
 2 files changed, 206 insertions(+)
 create mode 100644 src/tx_timestamp_estimation.ipynb

diff --git a/src/store.py b/src/store.py
index b0bee1a..fe9c84b 100644
--- a/src/store.py
+++ b/src/store.py
@@ -24,6 +24,9 @@
 block_status_store = b'block-statuses'
 utxo_diff_store = b'utxo-diffs'
 utxo_diff_child_store = b'utxo-diff-children'
+highest_chain_block_index = b'highest-chain-block-index'
+chain_block_hash_by_index = b'chain-block-hash-by-index'
+chain_block_index_by_hash = b'chain-block-index-by-hash'
 
 
 class Block:
@@ -166,6 +169,22 @@ def get_raw_block(self, block_hash):
 		b.ParseFromString(block_bytes)
 		return b
 
+	def get_highest_chain_block_index(self):
+		index_bytes = self.db.get(self.prefix + sep + highest_chain_block_index)
+		index = int.from_bytes(index_bytes, 'little')
+		return index
+
+	def get_chain_block_hash_by_index(self, index):
+		hash_bytes = self.db.get(self.prefix + sep + chain_block_hash_by_index + sep + 
+			   index.to_bytes(8, 'big'))
+		return hash_bytes
+
+	def get_chain_block_index_by_hash(self, block_hash):
+		index_bytes = self.db.get(self.prefix + sep + chain_block_index_by_hash + sep + 
+			   block_hash)
+		index = int.from_bytes(index_bytes, 'little')
+		return index
+
 	def get_header_data(self, block_hash):
 		if block_hash in self.headers:
 			return self.headers[block_hash]
diff --git a/src/tx_timestamp_estimation.ipynb b/src/tx_timestamp_estimation.ipynb
new file mode 100644
index 0000000..b192199
--- /dev/null
+++ b/src/tx_timestamp_estimation.ipynb
@@ -0,0 +1,187 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "4cdceabc-1629-49e1-9e78-404baa02355c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datetime import datetime\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from store import *"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "6a8f210d-c69b-4b62-9970-fe7ea9f31ff6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Can be downloaded from https://mega.nz/file/rOJmhLIR#5j7wko32Mh0MlsQnC9yVG6jCvPql7Isqcyvgh3kmxKk\n",
+    "# See cell one before the last for avoiding this download\n",
+    "pre_checkpoint_store = Store(r'/home/pool/data/kaspa-data-22-11-21-correct-utxo-commit')\n",
+    "# The current node's datadir\n",
+    "current_store = Store(r'/home/pool/.kaspad/kaspa-mainnet/datadir2')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "c661c66d-34a9-43d0-ae43-106d0d39dd09",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "'''\n",
+    "Builds a sample of headers throughout the history\n",
+    "'''\n",
+    "def build_header_samples(store):\n",
+    "    # We first read the historic pruning point list. This gives\n",
+    "    # us a per pruning-point period sample throughout history (~ 1 or 2 a day)\n",
+    "    samples = store.pruning_points_chain()\n",
+    "    # Pruning points are returned from later to earlier so we reverse\n",
+    "    samples.reverse()\n",
+    "    # Get the chain index of the last block (aka the current pruning point)\n",
+    "    low = store.get_chain_block_index_by_hash(samples[-1])\n",
+    "    # Get the index of the highest chain block (aka the sink/virtual-selected-parent)\n",
+    "    high = store.get_highest_chain_block_index()\n",
+    "    # Add a few more samples from recent data\n",
+    "    for i in range(low + (high - low) // 3, high + 1, (high - low) // 3):\n",
+    "        samples.append(store.get_chain_block_hash_by_index(i))\n",
+    "    return [store.get_raw_header(h) for h in samples]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "51c41329-fada-4be4-9341-9e2ac4f04e48",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "'''\n",
+    "Estimate timestamp from DAA score by interpolating over the samples\n",
+    "'''\n",
+    "def estimate_timestamp(samples, daa_score):\n",
+    "    if daa_score < samples[0].daaScore or daa_score > samples[-1].daaScore:\n",
+    "        raise '{} is out of range'.format(daa_score)\n",
+    "    # Optimization: this search can be done with a binary search \n",
+    "    # since DAA score is monotonically increasing over the chain\n",
+    "    for i in range(len(samples) - 1):\n",
+    "        current, next = samples[i], samples[i+1]\n",
+    "        if daa_score >= current.daaScore and daa_score < next.daaScore:\n",
+    "            frac = (daa_score - current.daaScore) / (next.daaScore - current.daaScore)\n",
+    "            interpolated_timestamp = int(current.timeInMilliseconds + \n",
+    "                                         (next.timeInMilliseconds - current.timeInMilliseconds) * frac)\n",
+    "            return datetime.fromtimestamp(interpolated_timestamp // 1000)\n",
+    "    raise 'unreachable'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "4c321877-9dfc-4cb6-b59a-50dbdae1bed2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pre_checkpoint_samples = build_header_samples(pre_checkpoint_store)\n",
+    "current_samples = build_header_samples(current_store)\n",
+    "# Combine samples from both network phases\n",
+    "samples = pre_checkpoint_samples + current_samples"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "ff2c73bb-daa2-4cbe-a549-2a599cb45d80",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(datetime.datetime(2021, 11, 7, 17, 27, 23),\n",
+       " datetime.datetime(2023, 8, 9, 13, 36, 32),\n",
+       " datetime.datetime(2023, 8, 9, 13, 36, 16))"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "estimate_timestamp(samples, 55), estimate_timestamp(samples, 55137666), estimate_timestamp(samples, 55137650)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "ae10932b-add7-4ea9-948a-b1eca5ce39cd",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0 1636298787842\n",
+      "87133 1636386662010\n",
+      "176797 1636473700804\n",
+      "264837 1636560706885\n",
+      "355974 1636650005662\n",
+      "445152 1636737841327\n",
+      "536709 1636828600930\n",
+      "624635 1636912614350\n",
+      "712234 1636999362832\n",
+      "801831 1637088292662\n",
+      "890716 1637174890675\n",
+      "978396 1637260956454\n",
+      "1068387 1637349078269\n",
+      "1139626 1637418723538\n",
+      "1218320 1637495941516\n",
+      "1312860 1637609671037\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Print the (DAA score, timestamp) tuples from the pre-halt store.\n",
+    "# Since these values are fixed, one can simply use this list\n",
+    "for header in pre_checkpoint_samples:\n",
+    "    print(header.daaScore, header.timeInMilliseconds)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "d9daf3d6-6366-4a5b-9434-a01374f6b8dc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Close the opened resources\n",
+    "pre_checkpoint_store.close()\n",
+    "current_store.close()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}