From 5ae4c7816aeaf1e4b832e4f389b379570bccfcc4 Mon Sep 17 00:00:00 2001 From: Shengqi Chen Date: Sat, 7 Sep 2024 21:55:03 +0800 Subject: [PATCH] zcommon: add specialized versions of cityhash4 Specializing cityhash4 on 32-bit architectures can reduce the size of stack frames as well as instruction count. This is a tiny but useful optimization, since some callsites invoke it frequently. When specializing into 1/2/3/4-arg versions, the stack usage (in bytes) on some 32-bit arches are listed as follows: - x86: 32, 32, 32, 40 - arm-v7a: 20, 20, 28, 36 - riscv: 0, 0, 0, 16 - power: 16, 16, 16, 32 - mipsel: 8, 8, 8, 24 Same tendency applies to the count of instructions. Therefore 1-arg version is defined as a macro to the 2-arg one. On all 64-bit arches, the differences are negligible. See more discussion at https://github.com/openzfs/zfs/pull/16483. Acked-by: Alexander Motin Signed-off-by: Shengqi Chen --- include/cityhash.h | 8 ++++++++ lib/libzfs/libzfs.abi | 13 +++++++++++++ module/zcommon/cityhash.c | 23 +++++++++++++++++++++-- 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/include/cityhash.h b/include/cityhash.h index 3b2d1e84b5b3..b78b5b174ad0 100644 --- a/include/cityhash.h +++ b/include/cityhash.h @@ -32,6 +32,14 @@ extern "C" { #endif +/* + * We have 2/3-argument specialized versions of cityhash4, + * which can reduce instruction count and stack usage on some 32-bit arches. + * For 1-arg version, using cityhash2 is enough. + */ +#define cityhash1(w) (cityhash2(w, 0)) +_SYS_CITYHASH_H uint64_t cityhash2(uint64_t, uint64_t); +_SYS_CITYHASH_H uint64_t cityhash3(uint64_t, uint64_t, uint64_t); _SYS_CITYHASH_H uint64_t cityhash4(uint64_t, uint64_t, uint64_t, uint64_t); #ifdef __cplusplus diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi index 51b29643ee0c..c3f8a7dc5569 100644 --- a/lib/libzfs/libzfs.abi +++ b/lib/libzfs/libzfs.abi @@ -153,6 +153,8 @@ + + @@ -9179,6 +9181,17 @@ + + + + + + + + + + + diff --git a/module/zcommon/cityhash.c b/module/zcommon/cityhash.c index 413a96df2cda..303d6855a376 100644 --- a/module/zcommon/cityhash.c +++ b/module/zcommon/cityhash.c @@ -49,8 +49,8 @@ cityhash_helper(uint64_t u, uint64_t v, uint64_t mul) return (b); } -uint64_t -cityhash4(uint64_t w1, uint64_t w2, uint64_t w3, uint64_t w4) +static inline uint64_t +cityhash_impl(uint64_t w1, uint64_t w2, uint64_t w3, uint64_t w4) { uint64_t mul = HASH_K2 + 64; uint64_t a = w1 * HASH_K1; @@ -59,9 +59,28 @@ cityhash4(uint64_t w1, uint64_t w2, uint64_t w3, uint64_t w4) uint64_t d = w3 * HASH_K2; return (cityhash_helper(rotate(a + b, 43) + rotate(c, 30) + d, a + rotate(b + HASH_K2, 18) + c, mul)); +} + +uint64_t +cityhash2(uint64_t w1, uint64_t w2) +{ + return (cityhash_impl(w1, w2, 0, 0)); +} +uint64_t +cityhash3(uint64_t w1, uint64_t w2, uint64_t w3) +{ + return (cityhash_impl(w1, w2, w3, 0)); +} + +uint64_t +cityhash4(uint64_t w1, uint64_t w2, uint64_t w3, uint64_t w4) +{ + return (cityhash_impl(w1, w2, w3, w4)); } #if defined(_KERNEL) +EXPORT_SYMBOL(cityhash2); +EXPORT_SYMBOL(cityhash3); EXPORT_SYMBOL(cityhash4); #endif