diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 686e92d2663ee..02747d8eb55a8 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -34,6 +34,7 @@ enum cpuid_leafs CPUID_8000_001F_EAX, CPUID_8000_0021_EAX, CPUID_LNX_5, + CPUID_C000_0006_EAX, NR_CPUID_WORDS, }; @@ -94,8 +95,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 20, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 21, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 22, feature_bit) || \ REQUIRED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 22)) + BUILD_BUG_ON_ZERO(NCAPINTS != 23)) #define DISABLED_MASK_BIT_SET(feature_bit) \ ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ @@ -120,8 +122,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 20, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 21, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 22, feature_bit) || \ DISABLED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 22)) + BUILD_BUG_ON_ZERO(NCAPINTS != 23)) #define cpu_has(c, bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 8c1593dd2c317..31fb5ac670178 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -13,7 +13,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 22 /* N 32-bit words worth of info */ +#define NCAPINTS 23 /* N 32-bit words worth of info */ #define NBUGINTS 2 /* N 32-bit bug flags */ /* @@ -468,6 +468,9 @@ #define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */ #define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */ +/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000006, word 22 */ +#define X86_FEATURE_ZXPAUSE (22*32+ 0) /* ZHAOXIN ZXPAUSE */ + /* * BUG word(s) */ diff --git a/arch/x86/include/asm/delay.h b/arch/x86/include/asm/delay.h index 630891d258198..4dbb3fea67fb5 100644 --- a/arch/x86/include/asm/delay.h +++ b/arch/x86/include/asm/delay.h @@ -7,6 +7,7 @@ void __init use_tsc_delay(void); void __init use_tpause_delay(void); +void __init use_zxpause_delay(void); void use_mwaitx_delay(void); #endif /* _ASM_X86_DELAY_H */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 88fcf08458d9c..b108e656fa5b8 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -144,6 +144,7 @@ #define DISABLED_MASK19 0 #define DISABLED_MASK20 0 #define DISABLED_MASK21 0 -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22) +#define DISABLED_MASK22 0 +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 23) #endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 621bac6b74011..390fa38bf589d 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -75,12 +75,23 @@ #define MSR_IA32_UMWAIT_CONTROL 0xe1 #define MSR_IA32_UMWAIT_CONTROL_C02_DISABLE BIT(0) #define MSR_IA32_UMWAIT_CONTROL_RESERVED BIT(1) + +#define MSR_ZX_PAUSE_CONTROL 0x187f +#define MSR_ZX_PAUSE_CONTROL_C02_DISABLE BIT(0) +#define MSR_ZX_PAUSE_CONTROL_RESERVED BIT(1) + /* * The time field is bit[31:2], but representing a 32bit value with * bit[1:0] zero. */ #define MSR_IA32_UMWAIT_CONTROL_TIME_MASK (~0x03U) +/* + * The time field is bit[31:2], but representing a 32bit value with + * bit[1:0] zero. + */ +#define MSR_ZX_PAUSE_CONTROL_TIME_MASK (~0x03U) + /* Abbreviated from Intel SDM name IA32_CORE_CAPABILITIES */ #define MSR_IA32_CORE_CAPS 0x000000cf #define MSR_IA32_CORE_CAPS_INTEGRITY_CAPS_BIT 2 diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index bae83810505bf..b1836840cf83a 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -26,6 +26,8 @@ #define TPAUSE_C01_STATE 1 #define TPAUSE_C02_STATE 0 +#define ZXPAUSE_C01_STATE 1 + static __always_inline void __monitor(const void *eax, unsigned long ecx, unsigned long edx) { @@ -148,4 +150,23 @@ static inline void __tpause(u32 ecx, u32 edx, u32 eax) #endif } +/* + * Caller can specify whether to enter C0.1 (low latency, less + * power saving) or C0.2 state (saves more power, but longer wakeup + * latency). This may be overridden by the ZX_PAUSE_CONTROL MSR + * which can force requests for C0.2 to be downgraded to C0.1. + */ +static inline void __zxpause(u32 ecx, u32 edx, u32 eax) +{ + /* "zxpause %ecx, %edx, %eax;" */ + #ifdef CONFIG_AS_ZXPAUSE + asm volatile("zxpause %%ecx\n" + : + : "c"(ecx), "d"(edx), "a"(eax)); + #else + asm volatile(".byte 0xf2, 0x0f, 0xa6, 0xd0\t\n" + : + : "c"(ecx), "d"(edx), "a"(eax)); + #endif + } #endif /* _ASM_X86_MWAIT_H */ diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index e9187ddd3d1fd..76953f757f3c3 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -100,6 +100,7 @@ #define REQUIRED_MASK19 0 #define REQUIRED_MASK20 0 #define REQUIRED_MASK21 0 -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22) +#define REQUIRED_MASK22 0 +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 23) #endif /* _ASM_X86_REQUIRED_FEATURES_H */ diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 4350f6bfc0641..e10099f4a0afc 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -25,6 +25,7 @@ obj-y += bugs.o obj-y += aperfmperf.o obj-y += cpuid-deps.o obj-y += umwait.o +obj-y += zxpause.o obj-$(CONFIG_PROC_FS) += proc.o obj-y += capflags.o powerflags.o diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 345f7d905db67..6c1e1606f40c6 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -109,6 +109,9 @@ static void early_init_centaur(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); } + + if (cpuid_eax(0xC0000000) >= 0xC0000006) + c->x86_capability[CPUID_C000_0006_EAX] = cpuid_eax(0xC0000006); } static void init_centaur(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/zhaoxin.c b/arch/x86/kernel/cpu/zhaoxin.c index 05fa4ef634902..0b38ccef64523 100644 --- a/arch/x86/kernel/cpu/zhaoxin.c +++ b/arch/x86/kernel/cpu/zhaoxin.c @@ -79,6 +79,8 @@ static void early_init_zhaoxin(struct cpuinfo_x86 *c) c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff); } + if (cpuid_eax(0xC0000000) >= 0xC0000006) + c->x86_capability[CPUID_C000_0006_EAX] = cpuid_eax(0xC0000006); } static void init_zhaoxin(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/zxpause.c b/arch/x86/kernel/cpu/zxpause.c new file mode 100644 index 0000000000000..f3f236168871e --- /dev/null +++ b/arch/x86/kernel/cpu/zxpause.c @@ -0,0 +1,238 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +#include +#include + +#define ZXPAUSE_C02_ENABLE 0 + +#define ZXPAUSE_CTRL_VAL(max_time, c02_disable) \ + (((max_time) & MSR_ZX_PAUSE_CONTROL_TIME_MASK) | \ + ((c02_disable) & MSR_ZX_PAUSE_CONTROL_C02_DISABLE)) + +/* + * Cache ZX_PAUSE_CONTROL MSR. This is a systemwide control. By default, + * zxpause max time is 100000 in TSC-quanta and C0.2 is enabled + */ +static u32 zxpause_control_cached = ZXPAUSE_CTRL_VAL(100000, ZXPAUSE_C02_ENABLE); + +/* + * Cache the original ZX_PAUSE_CONTROL MSR value which is configured by + * hardware or BIOS before kernel boot. + */ +static u32 orig_zxpause_control_cached __ro_after_init; + +/* + * Serialize access to zxpause_control_cached and ZX_PAUSE_CONTROL MSR in + * the sysfs write functions. + */ +static DEFINE_MUTEX(zxpause_lock); + +static void zxpause_update_control_msr(void * unused) +{ + lockdep_assert_irqs_disabled(); + wrmsr(MSR_ZX_PAUSE_CONTROL, READ_ONCE(zxpause_control_cached), 0); +} + +/* + * The CPU hotplug callback sets the control MSR to the global control + * value. + * + * Disable interrupts so the read of zxpause_control_cached and the WRMSR + * are protected against a concurrent sysfs write. Otherwise the sysfs + * write could update the cached value after it had been read on this CPU + * and issue the IPI before the old value had been written. The IPI would + * interrupt, write the new value and after return from IPI the previous + * value would be written by this CPU. + * + * With interrupts disabled the upcoming CPU either sees the new control + * value or the IPI is updating this CPU to the new control value after + * interrupts have been reenabled. + */ +static int zxpause_cpu_online(unsigned int cpu) +{ + local_irq_disable(); + zxpause_update_control_msr(NULL); + local_irq_enable(); + return 0; +} + +/* + * The CPU hotplug callback sets the control MSR to the original control + * value. + */ +static int zxpause_cpu_offline(unsigned int cpu) +{ + /* + * This code is protected by the CPU hotplug already and + * orig_zxpause_control_cached is never changed after it caches + * the original control MSR value in zxpause_init(). So there + * is no race condition here. + */ + wrmsr(MSR_ZX_PAUSE_CONTROL, orig_zxpause_control_cached, 0); + + return 0; +} + +/* + * On resume, restore ZX_PAUSE_CONTROL MSR on the boot processor which + * is the only active CPU at this time. The MSR is set up on the APs via the + * CPU hotplug callback. + * + * This function is invoked on resume from suspend and hibernation. On + * resume from suspend the restore should be not required, but we neither + * trust the firmware nor does it matter if the same value is written + * again. + */ +static void zxpause_syscore_resume(void) +{ + zxpause_update_control_msr(NULL); +} + +static struct syscore_ops zxpause_syscore_ops = { + .resume = zxpause_syscore_resume, +}; + +/* sysfs interface */ + +/* + * When bit 0 in ZX_PAUSE_CONTROL MSR is 1, C0.2 is disabled. + * Otherwise, C0.2 is enabled. + */ +static inline bool zxpause_ctrl_c02_enabled(u32 ctrl) +{ + return !(ctrl & MSR_ZX_PAUSE_CONTROL_C02_DISABLE); +} + +static inline u32 zxpause_ctrl_max_time(u32 ctrl) +{ + return ctrl & MSR_ZX_PAUSE_CONTROL_TIME_MASK; +} + +static inline void zxpause_update_control(u32 maxtime, bool c02_enable) +{ + u32 ctrl = maxtime & MSR_ZX_PAUSE_CONTROL_TIME_MASK; + + if (!c02_enable) + ctrl |= MSR_ZX_PAUSE_CONTROL_C02_DISABLE; + + WRITE_ONCE(zxpause_control_cached, ctrl); + /* Propagate to all CPUs */ + on_each_cpu(zxpause_update_control_msr, NULL, 1); +} + +static ssize_t +enable_c02_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + u32 ctrl = READ_ONCE(zxpause_control_cached); + + return sprintf(buf, "%d\n", zxpause_ctrl_c02_enabled(ctrl)); +} + +static ssize_t enable_c02_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + bool c02_enable; + u32 ctrl; + int ret; + + ret = kstrtobool(buf, &c02_enable); + if (ret) + return ret; + + mutex_lock(&zxpause_lock); + + ctrl = READ_ONCE(zxpause_control_cached); + if (c02_enable != zxpause_ctrl_c02_enabled(ctrl)) + zxpause_update_control(ctrl, c02_enable); + + mutex_unlock(&zxpause_lock); + + return count; +} +static DEVICE_ATTR_RW(enable_c02); + +static ssize_t +max_time_show(struct device *kobj, struct device_attribute *attr, char *buf) +{ + u32 ctrl = READ_ONCE(zxpause_control_cached); + + return sprintf(buf, "%u\n", zxpause_ctrl_max_time(ctrl)); +} + +static ssize_t max_time_store(struct device *kobj, + struct device_attribute *attr, + const char *buf, size_t count) +{ + u32 max_time, ctrl; + int ret; + + ret = kstrtou32(buf, 0, &max_time); + if (ret) + return ret; + + /* bits[1:0] must be zero */ + if (max_time & ~MSR_ZX_PAUSE_CONTROL_TIME_MASK) + return -EINVAL; + + mutex_lock(&zxpause_lock); + + ctrl = READ_ONCE(zxpause_control_cached); + if (max_time != zxpause_ctrl_max_time(ctrl)) + zxpause_update_control(max_time, zxpause_ctrl_c02_enabled(ctrl)); + + mutex_unlock(&zxpause_lock); + + return count; +} +static DEVICE_ATTR_RW(max_time); + +static struct attribute *zxpause_attrs[] = { + &dev_attr_enable_c02.attr, + &dev_attr_max_time.attr, + NULL +}; + +static struct attribute_group zxpause_attr_group = { + .attrs = zxpause_attrs, + .name = "zxpause_control", +}; + +static int __init zxpause_init(void) +{ + struct device *dev; + int ret; + + if (!boot_cpu_has(X86_FEATURE_ZXPAUSE)) + return -ENODEV; + + /* + * Cache the original control MSR value before the control MSR is + * changed. This is the only place where orig_zxpause_control_cached + * is modified. + */ + rdmsrl(MSR_ZX_PAUSE_CONTROL, orig_zxpause_control_cached); + + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "zxpause:online", + zxpause_cpu_online, zxpause_cpu_offline); + if (ret < 0) { + /* + * On failure, the control MSR on all CPUs has the + * original control value. + */ + return ret; + } + + register_syscore_ops(&zxpause_syscore_ops); + + /* + * Add zxpause control interface. Ignore failure, so at least the + * default values are set up in case the machine manages to boot. + */ + dev = bus_get_dev_root(&cpu_subsys); + return sysfs_create_group(&dev->kobj, &zxpause_attr_group); +} +device_initcall(zxpause_init); diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index e42faa792c079..0b5aa39f0db6c 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -102,6 +102,9 @@ static __init void x86_late_time_init(void) if (static_cpu_has(X86_FEATURE_WAITPKG)) use_tpause_delay(); + + if (static_cpu_has(X86_FEATURE_ZXPAUSE)) + use_zxpause_delay(); } /* diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index 0e65d00e2339f..84076b82fb892 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -117,6 +117,27 @@ static void delay_halt_tpause(u64 start, u64 cycles) __tpause(TPAUSE_C02_STATE, edx, eax); } +/* +* On ZHAOXIN the ZXPAUSE instruction waits until any of: +* 1) the delta of TSC counter exceeds the value provided in EDX:EAX +* 2) global timeout in ZX_PAUSE_CONTROL is exceeded +* 3) an external interrupt occurs +*/ +static void delay_halt_zxpause(u64 unused, u64 cycles) +{ + u64 until = cycles; + u32 eax, edx; + + eax = lower_32_bits(until); + edx = upper_32_bits(until); + + /* + * Hard code the deeper (C0.1) sleep state because exit latency is + * small compared to the "microseconds" that usleep() will delay. + */ + __zxpause(ZXPAUSE_C01_STATE, edx, eax); +} + /* * On some AMD platforms, MWAITX has a configurable 32-bit timer, that * counts with TSC frequency. The input value is the number of TSC cycles @@ -183,6 +204,12 @@ void __init use_tpause_delay(void) delay_fn = delay_halt; } +void __init use_zxpause_delay(void) +{ + delay_halt_fn = delay_halt_zxpause; + delay_fn = delay_halt; +} + void use_mwaitx_delay(void) { delay_halt_fn = delay_halt_mwaitx; diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 845a4023ba44e..ef45425f83165 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -13,7 +13,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 21 /* N 32-bit words worth of info */ +#define NCAPINTS 23 /* N 32-bit words worth of info */ #define NBUGINTS 2 /* N 32-bit bug flags */ /* @@ -443,6 +443,9 @@ #define X86_FEATURE_AUTOIBRS (20*32+ 8) /* "" Automatic IBRS */ #define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* "" SMM_CTL MSR is not present */ +/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000006, word 22 */ +#define X86_FEATURE_ZXPAUSE (22*32+ 0) /* ZHAOXIN ZXPAUSE */ + /* * BUG word(s) */ diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index fafe9be7a6f4f..cbf8c3ea37fe3 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -131,6 +131,8 @@ #define DISABLED_MASK18 0 #define DISABLED_MASK19 0 #define DISABLED_MASK20 0 -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) +#define DISABLED_MASK21 0 +#define DISABLED_MASK22 0 +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 23) #endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 1d111350197f3..7eddf5b9e3938 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -72,12 +72,23 @@ #define MSR_IA32_UMWAIT_CONTROL 0xe1 #define MSR_IA32_UMWAIT_CONTROL_C02_DISABLE BIT(0) #define MSR_IA32_UMWAIT_CONTROL_RESERVED BIT(1) + +#define MSR_ZX_PAUSE_CONTROL 0x187f +#define MSR_ZX_PAUSE_CONTROL_C02_DISABLE BIT(0) +#define MSR_ZX_PAUSE_CONTROL_RESERVED BIT(1) + /* * The time field is bit[31:2], but representing a 32bit value with * bit[1:0] zero. */ #define MSR_IA32_UMWAIT_CONTROL_TIME_MASK (~0x03U) +/* + * The time field is bit[31:2], but representing a 32bit value with + * bit[1:0] zero. + */ +#define MSR_ZX_PAUSE_CONTROL_TIME_MASK (~0x03U) + /* Abbreviated from Intel SDM name IA32_CORE_CAPABILITIES */ #define MSR_IA32_CORE_CAPS 0x000000cf #define MSR_IA32_CORE_CAPS_INTEGRITY_CAPS_BIT 2 diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h index 7ba1726b71c7b..76953f757f3c3 100644 --- a/tools/arch/x86/include/asm/required-features.h +++ b/tools/arch/x86/include/asm/required-features.h @@ -99,6 +99,8 @@ #define REQUIRED_MASK18 0 #define REQUIRED_MASK19 0 #define REQUIRED_MASK20 0 -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) +#define REQUIRED_MASK21 0 +#define REQUIRED_MASK22 0 +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 23) #endif /* _ASM_X86_REQUIRED_FEATURES_H */