From 6ec99724bcaa22e9e66d0ec3e74cc533ddf499ae Mon Sep 17 00:00:00 2001 From: leoliu-oc Date: Wed, 12 Jun 2024 17:12:28 +0800 Subject: [PATCH] crypto: Add support for Zhaoxin SHA algorithm zhaoxin inclusion category: feature ------------------- Some Zhaoxin processors come with an integrated crypto engine (so called Zhaoxin ACE, Advanced Cryptography Engine) that provides instructions for very fast cryptographic operations with supportedSHA1/SHA256 algorithms. Signed-off-by: leoliu-oc --- drivers/crypto/Kconfig | 15 ++ drivers/crypto/Makefile | 1 + drivers/crypto/padlock-sha.c | 2 +- drivers/crypto/zhaoxin-sha.c | 304 +++++++++++++++++++++++++++++++++++ 4 files changed, 321 insertions(+), 1 deletion(-) create mode 100644 drivers/crypto/zhaoxin-sha.c diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 5094407d2b3d8..624e6a0919c53 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -79,6 +79,21 @@ config CRYPTO_DEV_ZHAOXIN_AES If unsure say M. The compiled module will be called zhaoxin-aes. +config CRYPTO_DEV_ZHAOXIN_SHA + tristate "Zhaoxin ACE driver for SHA1 and SHA256 algorithms" + depends on CRYPTO_DEV_ZHAOXIN + default CRYPTO_DEV_ZHAOXIN + select CRYPTO_HASH + select CRYPTO_SHA1 + select CRYPTO_SHA256 + help + Use Zhaoxin ACE for SHA1/SHA256 algorithms. + + Available in Zhaoxin processors. + + If unsure say M. The compiled module will be + called zhaoxin-sha. + config CRYPTO_DEV_GEODE tristate "Support for the Geode LX AES engine" depends on X86_32 && PCI diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index 407a03c1bd1c1..6bdeecdf9f2f6 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -32,6 +32,7 @@ obj-$(CONFIG_CRYPTO_DEV_OMAP_SHAM) += omap-sham.o obj-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o obj-$(CONFIG_CRYPTO_DEV_PADLOCK_SHA) += padlock-sha.o obj-$(CONFIG_CRYPTO_DEV_ZHAOXIN_AES) += zhaoxin-aes.o +obj-$(CONFIG_CRYPTO_DEV_ZHAOXIN_SHA) += zhaoxin-sha.o obj-$(CONFIG_CRYPTO_DEV_PPC4XX) += amcc/ obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/ obj-$(CONFIG_CRYPTO_DEV_QCOM_RNG) += qcom-rng.o diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c index 6865c7f1fc1a2..04858dc8b5979 100644 --- a/drivers/crypto/padlock-sha.c +++ b/drivers/crypto/padlock-sha.c @@ -491,7 +491,7 @@ static struct shash_alg sha256_alg_nano = { }; static const struct x86_cpu_id padlock_sha_ids[] = { - X86_MATCH_FEATURE(X86_FEATURE_PHE, NULL), + { X86_VENDOR_CENTAUR, 6, X86_MODEL_ANY, X86_FEATURE_PHE }, {} }; MODULE_DEVICE_TABLE(x86cpu, padlock_sha_ids); diff --git a/drivers/crypto/zhaoxin-sha.c b/drivers/crypto/zhaoxin-sha.c new file mode 100644 index 0000000000000..5fa117a1a50e6 --- /dev/null +++ b/drivers/crypto/zhaoxin-sha.c @@ -0,0 +1,304 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Support for ACE hardware crypto engine. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRIVER_VERSION "1.0.0" + +static inline void padlock_output_block(uint32_t *src, uint32_t *dst, size_t count) +{ + while (count--) + *dst++ = swab32(*src++); +} + +/* + * Add two shash_alg instance for hardware-implemented multiple-parts hash + * supported by Zhaoxin Processor. + */ +static int padlock_sha1_init_zhaoxin(struct shash_desc *desc) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + + *sctx = (struct sha1_state){ + .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, + }; + + return 0; +} + +static int padlock_sha1_update_zhaoxin(struct shash_desc *desc, const u8 *data, unsigned int len) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + unsigned int partial, done; + const u8 *src; + + /* The PHE require the out buffer must 128 bytes and 16-bytes aligned */ + u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __aligned(STACK_ALIGN); + u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); + + partial = sctx->count & 0x3f; + sctx->count += len; + done = 0; + src = data; + memcpy(dst, (u8 *)(sctx->state), SHA1_DIGEST_SIZE); + + if ((partial + len) >= SHA1_BLOCK_SIZE) { + /* Append the bytes in state's buffer to a block to handle */ + if (partial) { + done = -partial; + memcpy(sctx->buffer + partial, data, done + SHA1_BLOCK_SIZE); + src = sctx->buffer; + asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" + : "+S"(src), "+D"(dst) + : "a"((long)-1), "c"((unsigned long)1)); + done += SHA1_BLOCK_SIZE; + src = data + done; + } + + /* Process the left bytes from the input data */ + if (len - done >= SHA1_BLOCK_SIZE) { + asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" + : "+S"(src), "+D"(dst) + : "a"((long)-1), "c"((unsigned long)((len - done) / SHA1_BLOCK_SIZE))); + done += ((len - done) - (len - done) % SHA1_BLOCK_SIZE); + src = data + done; + } + partial = 0; + } + memcpy((u8 *)(sctx->state), dst, SHA1_DIGEST_SIZE); + memcpy(sctx->buffer + partial, src, len - done); + + return 0; +} + +static int padlock_sha1_final_zhaoxin(struct shash_desc *desc, u8 *out) +{ + struct sha1_state *state = (struct sha1_state *)shash_desc_ctx(desc); + unsigned int partial, padlen; + __be64 bits; + static const u8 padding[64] = { 0x80, }; + + bits = cpu_to_be64(state->count << 3); + + /* Pad out to 56 mod 64 */ + partial = state->count & 0x3f; + padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial); + padlock_sha1_update_zhaoxin(desc, padding, padlen); + + /* Append length field bytes */ + padlock_sha1_update_zhaoxin(desc, (const u8 *)&bits, sizeof(bits)); + + /* Swap to output */ + padlock_output_block((uint32_t *)(state->state), (uint32_t *)out, 5); + + return 0; +} + +static int padlock_sha256_init_zhaoxin(struct shash_desc *desc) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + + *sctx = (struct sha256_state) { + .state = { + SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, + SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7 + }, + }; + + return 0; +} + +static int padlock_sha256_update_zhaoxin(struct shash_desc *desc, const u8 *data, unsigned int len) +{ + struct sha256_state *sctx = shash_desc_ctx(desc); + unsigned int partial, done; + const u8 *src; + + /* The PHE require the out buffer must 128 bytes and 16-bytes aligned */ + u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __aligned(STACK_ALIGN); + u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); + + partial = sctx->count & 0x3f; + sctx->count += len; + done = 0; + src = data; + memcpy(dst, (u8 *)(sctx->state), SHA256_DIGEST_SIZE); + + if ((partial + len) >= SHA256_BLOCK_SIZE) { + + /* Append the bytes in state's buffer to a block to handle */ + if (partial) { + done = -partial; + memcpy(sctx->buf + partial, data, done + SHA256_BLOCK_SIZE); + src = sctx->buf; + asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" + : "+S"(src), "+D"(dst) + : "a"((long)-1), "c"((unsigned long)1)); + done += SHA256_BLOCK_SIZE; + src = data + done; + } + + /* Process the left bytes from input data */ + if (len - done >= SHA256_BLOCK_SIZE) { + asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" + : "+S"(src), "+D"(dst) + : "a"((long)-1), "c"((unsigned long)((len - done) / 64))); + done += ((len - done) - (len - done) % 64); + src = data + done; + } + partial = 0; + } + memcpy((u8 *)(sctx->state), dst, SHA256_DIGEST_SIZE); + memcpy(sctx->buf + partial, src, len - done); + + return 0; +} + +static int padlock_sha256_final_zhaoxin(struct shash_desc *desc, u8 *out) +{ + struct sha256_state *state = (struct sha256_state *)shash_desc_ctx(desc); + unsigned int partial, padlen; + __be64 bits; + static const u8 padding[64] = { 0x80, }; + + bits = cpu_to_be64(state->count << 3); + + /* Pad out to 56 mod 64 */ + partial = state->count & 0x3f; + padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial); + padlock_sha256_update_zhaoxin(desc, padding, padlen); + + /* Append length field bytes */ + padlock_sha256_update_zhaoxin(desc, (const u8 *)&bits, sizeof(bits)); + + /* Swap to output */ + padlock_output_block((uint32_t *)(state->state), (uint32_t *)out, 8); + + return 0; +} + +static int padlock_sha_export_zhaoxin(struct shash_desc *desc, void *out) +{ + int statesize = crypto_shash_statesize(desc->tfm); + void *sctx = shash_desc_ctx(desc); + + memcpy(out, sctx, statesize); + return 0; +} + +static int padlock_sha_import_zhaoxin(struct shash_desc *desc, const void *in) +{ + int statesize = crypto_shash_statesize(desc->tfm); + void *sctx = shash_desc_ctx(desc); + + memcpy(sctx, in, statesize); + return 0; +} + +static struct shash_alg sha1_alg_zhaoxin = { + .digestsize = SHA1_DIGEST_SIZE, + .init = padlock_sha1_init_zhaoxin, + .update = padlock_sha1_update_zhaoxin, + .final = padlock_sha1_final_zhaoxin, + .export = padlock_sha_export_zhaoxin, + .import = padlock_sha_import_zhaoxin, + .descsize = sizeof(struct sha1_state), + .statesize = sizeof(struct sha1_state), + .base = { + .cra_name = "sha1", + .cra_driver_name = "sha1-padlock-zhaoxin", + .cra_priority = PADLOCK_CRA_PRIORITY, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static struct shash_alg sha256_alg_zhaoxin = { + .digestsize = SHA256_DIGEST_SIZE, + .init = padlock_sha256_init_zhaoxin, + .update = padlock_sha256_update_zhaoxin, + .final = padlock_sha256_final_zhaoxin, + .export = padlock_sha_export_zhaoxin, + .import = padlock_sha_import_zhaoxin, + .descsize = sizeof(struct sha256_state), + .statesize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha256", + .cra_driver_name = "sha256-padlock-zhaoxin", + .cra_priority = PADLOCK_CRA_PRIORITY, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static const struct x86_cpu_id zhaoxin_sha_ids[] = { + { X86_VENDOR_CENTAUR, 7, X86_MODEL_ANY, X86_STEPPING_ANY, X86_FEATURE_PHE }, + { X86_VENDOR_ZHAOXIN, 7, X86_MODEL_ANY, X86_STEPPING_ANY, X86_FEATURE_PHE }, + {} +}; +MODULE_DEVICE_TABLE(x86cpu, zhaoxin_sha_ids); + +static int __init padlock_init(void) +{ + int rc = -ENODEV; + struct shash_alg *sha1; + struct shash_alg *sha256; + + if (!x86_match_cpu(zhaoxin_sha_ids) || !boot_cpu_has(X86_FEATURE_PHE_EN)) + return -ENODEV; + + sha1 = &sha1_alg_zhaoxin; + sha256 = &sha256_alg_zhaoxin; + + rc = crypto_register_shash(sha1); + if (rc) + goto out; + + rc = crypto_register_shash(sha256); + if (rc) + goto out_unreg1; + + pr_notice("Using ACE for SHA1/SHA256 algorithms.\n"); + + return 0; + +out_unreg1: + crypto_unregister_shash(sha1); + +out: + pr_err("ACE SHA1/SHA256 initialization failed.\n"); + return rc; +} + +static void __exit padlock_fini(void) +{ + crypto_unregister_shash(&sha1_alg_zhaoxin); + crypto_unregister_shash(&sha256_alg_zhaoxin); +} + +module_init(padlock_init); +module_exit(padlock_fini); + +MODULE_DESCRIPTION("ACE SHA1/SHA256 algorithms support."); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Michal Ludvig"); +MODULE_VERSION(DRIVER_VERSION); + +MODULE_ALIAS_CRYPTO("sha1-all"); +MODULE_ALIAS_CRYPTO("sha256-all"); +MODULE_ALIAS_CRYPTO("sha1-padlock"); +MODULE_ALIAS_CRYPTO("sha256-padlock");