Skip to content

Commit

Permalink
UDFS to convert hex strings (#1251)
Browse files Browse the repository at this point in the history
  • Loading branch information
lithium323 authored Jan 15, 2025
1 parent 7435f18 commit 261ab13
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 12 deletions.
67 changes: 58 additions & 9 deletions src/op_analytics/datapipeline/models/compute/udfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

import duckdb
import numba
from duckdb.functional import PythonUDFType
from duckdb.typing import BLOB, INTEGER
from duckdb.functional import PythonUDFType, FunctionNullHandling
from duckdb.typing import BLOB, INTEGER, VARCHAR, UBIGINT

from op_analytics.coreutils.duckdb_inmem.client import DuckDBContext

Expand All @@ -20,6 +20,32 @@ def count_zero_bytes(x: bytes) -> int:
return count


def hex_to_lossy(x: str | None) -> int | None:
"""Assumes that "x" is a hex string with the leading "0x" prefix."""
if x is None:
return None

assert len(x) == 66

# If the string beyond the 16 right-most bytes is zeros then the conversion
# to BIGINT will be valid.
#
# NOTE (pedrod): I also attempted to use the HUGEINT return type but it resulted
# in an incorrect conversion from the python type to the duckdb type.
if x[:-16] == "0x000000000000000000000000000000000000000000000000":
return int("0x" + x[-16:], 0)

# There are non-zero bytes beyond the right-most 32 bytes.
# This means this number cannot be represented as a hugeint.
return None


def hex_to_lossless(x: str) -> str:
"""Assumes that "x" is a hex string with the leading "0x" prefix."""

return str(int(x, 0))


_UDF_LOCK = threading.Lock()


Expand All @@ -42,6 +68,24 @@ def create_python_udfs(duckdb_context: DuckDBContext):
parameters=[BLOB],
return_type=INTEGER,
)

duckdb_context.client.create_function(
"hex_to_lossy",
hex_to_lossy,
type=PythonUDFType.NATIVE,
null_handling=FunctionNullHandling.SPECIAL,
parameters=[VARCHAR],
return_type=UBIGINT,
)

duckdb_context.client.create_function(
"hex_to_lossless",
hex_to_lossless,
type=PythonUDFType.NATIVE,
parameters=[VARCHAR],
return_type=VARCHAR,
)

duckdb_context.python_udfs_ready = True


Expand All @@ -59,7 +103,7 @@ def create_duckdb_macros(duckdb_context: DuckDBContext):
CREATE OR REPLACE MACRO wei_to_gwei(a)
AS a::DECIMAL(28, 0) * 0.000000001::DECIMAL(10, 10);
CREATE OR REPLACE MACRO gwei_to_eth(a)
AS a::DECIMAL(28, 10) * 0.000000001::DECIMAL(10, 10);
Expand All @@ -74,33 +118,33 @@ def create_duckdb_macros(duckdb_context: DuckDBContext):
-- Truncate a timestamp to hour.
CREATE OR REPLACE MACRO epoch_to_hour(a) AS
date_trunc('hour', make_timestamp(a * 1000000::BIGINT));
-- Truncate a timestamp to day.
CREATE OR REPLACE MACRO epoch_to_day(a) AS
date_trunc('day', make_timestamp(a * 1000000::BIGINT));
-- Division by 16 for DECIMAL types.
CREATE OR REPLACE MACRO div16(a)
AS a * 0.0625::DECIMAL(5, 5);
-- Get the length in bytes for binary data that is encoded as a hex string.
CREATE OR REPLACE MACRO hexstr_bytelen(x)
AS CAST((length(x) - 2) / 2 AS INT);
-- Count zero bytes for binary data that is encoded as a hex string.
CREATE OR REPLACE MACRO hexstr_zero_bytes(a)
AS count_zero_bytes(unhex(substr(a, 3)));
-- Calculate calldata gas used for binary data that is encoded as a hex
-- string (can be updated by an EIP).
CREATE OR REPLACE MACRO hexstr_calldata_gas(x)
AS 16 * (hexstr_bytelen(x) - hexstr_zero_bytes(x)) + 4 * hexstr_zero_bytes(x);
--Get the method id for input data. This is the first 4 bytes, or first 10
-- string characters for binary data that is encoded as a hex string.
CREATE OR REPLACE MACRO hexstr_method_id(x)
AS substring(x,1,10);
-- Trace address depth. Examples:
-- "" -> 0
-- "0" -> 1
Expand All @@ -125,6 +169,11 @@ def create_duckdb_macros(duckdb_context: DuckDBContext):
WHEN length(a) = 1 THEN ''
ELSE a[:-1 * (1+strpos(reverse(a), ','))]
END;
-- Convert indexed event arg to address.
CREATE OR REPLACE MACRO indexed_event_arg_to_address(a)
AS concat('0x', right(a, 40));
""")


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,11 @@ def mock_get_chain_responses(session, summary_response, data_type):
raise NotImplementedError(f"Mock data not implemented for {data_type}")


@patch(f"{MODULE}.write")
@patch(f"{MODULE}.execute.write")
def test(mock_write):
with (
patch(f"{MODULE}.get_data", new=mock_get_data),
patch(f"{MODULE}.get_chain_responses", new=mock_get_chain_responses),
patch(f"{MODULE}.helpers.get_data", new=mock_get_data),
patch(f"{MODULE}.helpers.get_chain_responses", new=mock_get_chain_responses),
):
execute_pull()

Expand Down
47 changes: 47 additions & 0 deletions tests/op_datasets/models/compute/test_udfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,3 +235,50 @@ def test_trace_address_helpers():
(3, "0,100"),
(3, "0,10"),
]


def test_conversion_from_hex_to_number():
ctx = init_client()
create_duckdb_macros(ctx)

test_inputs = [
# The comment line below is to help count non-zero characters:
# 21098765432109876543210987654321
"0x000000000000000000000000000000000000000000000000ffffffffffffffff",
"0x000000000000000000000000000000000000000000000000bdb713e036980000",
"0x00000000000000000000000000000000000000000002e64dbdb713e036980000",
"0x0000000000000000000000000000000000000000fff2e64dbdb713e036980000",
]

actual = []
for test in test_inputs:
result = ctx.client.sql(f"""
SELECT
hex_to_lossy('{test}') as m1,
hex_to_lossless('{test}') as m2,
""").fetchall()[0]
actual.append(result)

assert actual == [
(18446744073709551615, "18446744073709551615"),
(13670417047615963136, "13670417047615963136"),
(None, "3505430000000000000000000"),
(None, "79212325131150503526748651520"),
]

for lossy, lossless in actual:
if lossy is not None:
assert lossy == int(lossless)


def test_indexed_event_arg_to_address():
ctx = init_client()
create_duckdb_macros(ctx)

actual = ctx.client.sql("""
SELECT
indexed_event_arg_to_address('0x0000000000000000000000000b940efc27f8da57c8dbd6ff6540b6ef9a1af76a') as m1,
""").fetchall()[0]

expected = ("0x0b940efc27f8da57c8dbd6ff6540b6ef9a1af76a",)
assert actual == expected

0 comments on commit 261ab13

Please sign in to comment.