Skip to content

Commit

Permalink
KVM: x86: Introduce support for Zhaoxin ZXPAUSE instruction
Browse files Browse the repository at this point in the history
zhaoxin inclusion
category: feature

--------------------

This patch introduces support for the ZXPAUSE instruction, a new addition
akin to Intel's TPAUSE. Two primary distinctions set apart ZXPAUSE from
TPAUSE:

1. ZXPAUSE utilizes a delta tsc, determined from the lesser value between
   (MSR_ZX_PAUSE_CONTROL[31:2] << 2) and the EDX:EAX input to the ZXPAUSE
   instruction, subtracted from the current tsc value.

   In contrast, TPAUSE employs a target tsc, computed from the lesser
   value between (MSR_IA32_UMWAIT_CONTROL[31:2] << 2) and the EDX:EAX
   input to the TPAUSE instruction.

2. As of now, ZXPAUSE exclusively supports the C0.1 optimization state,
   whereas TPAUSE potentially extends support to both C0.1 and C0.2.

Successful integration of this patch hinges on QEMU's backing for ZXPAUSE,
a contribution we're currently forwarding to QEMU. It also requires the
preceding patch in this patchset, which offers Linux kernel support for
ZXPAUSE.

The choice of the name "vmx->msr_ia32_umwait_control" is deliberate. In
patches for other Linux versions (e.g., 5.5), a
"vmx->msr_ia32_umwait_control" already exists. By sharing this variable
name with Intel, it ensures compatibility. The difference is merely
software-based and poses no real-world conflicts.

Currently, if the Guest writes to the ZXPAUSE/TPAUSE CONTROL MSR, we
simply bypass the WRMSR instruction. If the Guest attempts to use
ZXPAUSE/TPAUSE to transition the vCPU into an optimized state, it will
succeed, with the duration of the optimized state being the value passed
in EDX:EAX.

Of course, this state can be interrupted by external interrupts and other
events specified in the specification.

Signed-off-by: leoliu-oc <[email protected]>
  • Loading branch information
leoliu-oc committed Jun 14, 2024
1 parent 730ba28 commit a96fd43
Show file tree
Hide file tree
Showing 12 changed files with 142 additions and 6 deletions.
7 changes: 7 additions & 0 deletions arch/x86/include/asm/msr-index.h
Original file line number Diff line number Diff line change
Expand Up @@ -779,6 +779,13 @@
#define MSR_VIA_RNG 0x0000110b
#define MSR_VIA_BCR2 0x00001147

/*
* Zhaoxin extend VMCS capabilities:
* bit 0: exec-cntl3 VMCS field.
*/
#define MSR_ZX_EXT_VMCS_CAPS 0x1675
#define MSR_ZX_VMCS_EXEC_CTL3 BIT(0)

/* Transmeta defined MSRs */
#define MSR_TMTA_LONGRUN_CTRL 0x80868010
#define MSR_TMTA_LONGRUN_FLAGS 0x80868011
Expand Down
8 changes: 8 additions & 0 deletions arch/x86/include/asm/vmx.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,12 @@
*/
#define TERTIARY_EXEC_IPI_VIRT VMCS_CONTROL_BIT(IPI_VIRT)

/*
* Definitions of Zhaoxin Tertiary Processor-Based VM-Execution Controls.
*/
#define ZX_TERTIARY_EXEC_GUEST_ZXPAUSE VMCS_CONTROL_BIT(GUEST_ZXPAUSE)


#define PIN_BASED_EXT_INTR_MASK VMCS_CONTROL_BIT(INTR_EXITING)
#define PIN_BASED_NMI_EXITING VMCS_CONTROL_BIT(NMI_EXITING)
#define PIN_BASED_VIRTUAL_NMIS VMCS_CONTROL_BIT(VIRTUAL_NMIS)
Expand Down Expand Up @@ -235,6 +241,7 @@ enum vmcs_field {
TERTIARY_VM_EXEC_CONTROL_HIGH = 0x00002035,
PID_POINTER_TABLE = 0x00002042,
PID_POINTER_TABLE_HIGH = 0x00002043,
ZXPAUSE_VMEXIT_TSC = 0x00002200,
GUEST_PHYSICAL_ADDRESS = 0x00002400,
GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
VMCS_LINK_POINTER = 0x00002800,
Expand Down Expand Up @@ -284,6 +291,7 @@ enum vmcs_field {
PLE_GAP = 0x00004020,
PLE_WINDOW = 0x00004022,
NOTIFY_WINDOW = 0x00004024,
ZX_TERTIARY_VM_EXEC_CONTROL = 0x00004200,
VM_INSTRUCTION_ERROR = 0x00004400,
VM_EXIT_REASON = 0x00004402,
VM_EXIT_INTR_INFO = 0x00004404,
Expand Down
5 changes: 4 additions & 1 deletion arch/x86/include/asm/vmxfeatures.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
/*
* Defines VMX CPU feature bits
*/
#define NVMXINTS 5 /* N 32-bit words worth of info */
#define NVMXINTS 6 /* N 32-bit words worth of info */

/*
* Note: If the comment begins with a quoted string, that string is used
Expand Down Expand Up @@ -89,4 +89,7 @@

/* Tertiary Processor-Based VM-Execution Controls, word 3 */
#define VMX_FEATURE_IPI_VIRT ( 3*32+ 4) /* Enable IPI virtualization */

/* Zhaoxin Tertiary Processor-Based VM-Execution Controls, word 3 */
#define VMX_FEATURE_GUEST_ZXPAUSE ( 5*32+ 0) /* zxpause instruction in guest mode */
#endif /* _ASM_X86_VMXFEATURES_H */
8 changes: 8 additions & 0 deletions arch/x86/kernel/cpu/feat_ctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ enum vmx_feature_leafs {
SECONDARY_CTLS,
TERTIARY_CTLS_LOW,
TERTIARY_CTLS_HIGH,
ZX_TERTIARY_CTLS,
NR_VMX_FEATURE_WORDS,
};

Expand Down Expand Up @@ -97,6 +98,13 @@ static void init_vmx_capabilities(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_EPT_AD);
if (c->vmx_capability[MISC_FEATURES] & VMX_F(VPID))
set_cpu_cap(c, X86_FEATURE_VPID);
/*
* Initialize Zhaoxin Tertiary Exec Control feature flags.
*/
rdmsr_safe(MSR_ZX_EXT_VMCS_CAPS, &supported, &ign);
if (supported & MSR_ZX_VMCS_EXEC_CTL3)
c->vmx_capability[ZX_TERTIARY_CTLS] |= VMX_F(GUEST_ZXPAUSE);

}
#endif /* CONFIG_X86_VMX_FEATURE_NAMES */

Expand Down
12 changes: 10 additions & 2 deletions arch/x86/kvm/cpuid.c
Original file line number Diff line number Diff line change
Expand Up @@ -791,6 +791,9 @@ void kvm_set_cpu_caps(void)
F(PMM) | F(PMM_EN)
);

/* Zhaoxin 0xC0000006 leaf */
kvm_cpu_cap_mask(CPUID_C000_0006_EAX, 0 /* bit0: zxpause */ | 0 /* bit1 HMAC */);

/*
* Hide RDTSCP and RDPID if either feature is reported as supported but
* probing MSR_TSC_AUX failed. This is purely a sanity check and
Expand Down Expand Up @@ -1305,17 +1308,22 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
}
/*Add support for Centaur's CPUID instruction*/
case 0xC0000000:
/*Just support up to 0xC0000004 now*/
entry->eax = min(entry->eax, 0xC0000004);
/* Extended to 0xC0000006 */
entry->eax = min(entry->eax, 0xC0000006);
break;
case 0xC0000001:
cpuid_entry_override(entry, CPUID_C000_0001_EDX);
break;
case 0xC0000006:
cpuid_entry_override(entry, CPUID_C000_0006_EAX);
break;

case 3: /* Processor serial number */
case 5: /* MONITOR/MWAIT */
case 0xC0000002:
case 0xC0000003:
case 0xC0000004:
case 0xC0000005:
default:
entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
break;
Expand Down
1 change: 1 addition & 0 deletions arch/x86/kvm/reverse_cpuid.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
[CPUID_8000_0021_EAX] = {0x80000021, 0, CPUID_EAX},
[CPUID_8000_0022_EAX] = {0x80000022, 0, CPUID_EAX},
[CPUID_7_2_EDX] = { 7, 2, CPUID_EDX},
[CPUID_C000_0006_EAX] = {0xc0000006, 0, CPUID_EAX},
};

/*
Expand Down
6 changes: 6 additions & 0 deletions arch/x86/kvm/vmx/capabilities.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ struct vmcs_config {
u32 pin_based_exec_ctrl;
u32 cpu_based_exec_ctrl;
u32 cpu_based_2nd_exec_ctrl;
u32 zx_cpu_based_3rd_exec_ctrl;
u64 cpu_based_3rd_exec_ctrl;
u32 vmexit_ctrl;
u32 vmentry_ctrl;
Expand Down Expand Up @@ -255,6 +256,11 @@ static inline bool cpu_has_vmx_xsaves(void)
SECONDARY_EXEC_ENABLE_XSAVES;
}

static inline bool cpu_has_vmx_zxpause(void)
{
return vmcs_config.zx_cpu_based_3rd_exec_ctrl & ZX_TERTIARY_EXEC_GUEST_ZXPAUSE;
}

static inline bool cpu_has_vmx_waitpkg(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
Expand Down
2 changes: 2 additions & 0 deletions arch/x86/kvm/vmx/vmcs.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ struct vmcs_controls_shadow {
u32 pin;
u32 exec;
u32 secondary_exec;
u32 zx_tertiary_exec;
u64 tertiary_exec;
u64 zx_vmexit_tsc;
};

/*
Expand Down
67 changes: 66 additions & 1 deletion arch/x86/kvm/vmx/vmx.c
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,8 @@ module_param(ple_window_max, uint, 0444);
int __read_mostly pt_mode = PT_MODE_SYSTEM;
module_param(pt_mode, int, S_IRUGO);

static u32 zx_ext_vmcs_cap;

static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush);
static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond);
static DEFINE_MUTEX(vmx_l1d_flush_mutex);
Expand Down Expand Up @@ -2015,7 +2017,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_UMWAIT_CONTROL:
if (!msr_info->host_initiated && !vmx_has_waitpkg(vmx))
return 1;

msr_info->data = vmx->msr_ia32_umwait_control;
break;
case MSR_ZX_PAUSE_CONTROL:
if (!msr_info->host_initiated && !vmx_guest_zxpause_enabled(vmx))
return 1;
msr_info->data = vmx->msr_ia32_umwait_control;
break;
case MSR_IA32_SPEC_CTRL:
Expand Down Expand Up @@ -2275,7 +2281,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
/* The reserved bit 1 and non-32 bit [63:32] should be zero */
if (data & (BIT_ULL(1) | GENMASK_ULL(63, 32)))
return 1;
vmx->msr_ia32_umwait_control = data;
break;
case MSR_ZX_PAUSE_CONTROL:
if (!msr_info->host_initiated && !vmx_guest_zxpause_enabled(vmx))
return 1;

/* The reserved bit 1 and non-32 bit [63:32] should be zero */
if (data & (BIT_ULL(1) | GENMASK_ULL(63, 32)))
return 1;
vmx->msr_ia32_umwait_control = data;
break;
case MSR_IA32_SPEC_CTRL:
Expand Down Expand Up @@ -2733,6 +2747,10 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
vmcs_conf->vmentry_ctrl = _vmentry_control;
vmcs_conf->misc = misc_msr;

/* Setup Zhaoxin exec-cntl3 VMCS field. */
if (zx_ext_vmcs_cap & MSR_ZX_VMCS_EXEC_CTL3)
vmcs_conf->zx_cpu_based_3rd_exec_ctrl |= ZX_TERTIARY_EXEC_GUEST_ZXPAUSE;

#if IS_ENABLED(CONFIG_HYPERV)
if (enlightened_vmcs)
evmcs_sanitize_exec_ctrls(vmcs_conf);
Expand Down Expand Up @@ -4525,6 +4543,29 @@ static u64 vmx_tertiary_exec_control(struct vcpu_vmx *vmx)
return exec_control;
}

static u32 vmx_zx_tertiary_exec_control(struct vcpu_vmx *vmx)
{
struct kvm_vcpu *vcpu = &vmx->vcpu;
u32 exec_control = vmcs_config.zx_cpu_based_3rd_exec_ctrl;

/*
* Show errors if Qemu wants to enable guest_zxpause while
* vmx not support it.
*/
if (guest_cpuid_has(vcpu, X86_FEATURE_ZXPAUSE)) {
if (!cpu_has_vmx_zxpause())
pr_err("VMX not support guest_zxpause!\n");
else
exec_control |= ZX_TERTIARY_EXEC_GUEST_ZXPAUSE;
} else {
exec_control &= ~ZX_TERTIARY_EXEC_GUEST_ZXPAUSE;
}

/* enable other features here */

return exec_control;
}

/*
* Adjust a single secondary execution control bit to intercept/allow an
* instruction in the guest. This is usually done based on whether or not a
Expand Down Expand Up @@ -4731,6 +4772,11 @@ static void init_vmcs(struct vcpu_vmx *vmx)
if (cpu_has_secondary_exec_ctrls())
secondary_exec_controls_set(vmx, vmx_secondary_exec_control(vmx));

if (zx_ext_vmcs_cap & MSR_ZX_VMCS_EXEC_CTL3) {
zx_tertiary_exec_controls_set(vmx, vmx_zx_tertiary_exec_control(vmx));
zx_vmexit_tsc_controls_set(vmx, 0);
}

if (cpu_has_tertiary_exec_ctrls())
tertiary_exec_controls_set(vmx, vmx_tertiary_exec_control(vmx));

Expand Down Expand Up @@ -6260,6 +6306,13 @@ void dump_vmcs(struct kvm_vcpu *vcpu)
else
tertiary_exec_control = 0;

if (zx_ext_vmcs_cap & MSR_ZX_VMCS_EXEC_CTL3) {
pr_err("*** Zhaoxin Specific Fields ***\n");
pr_err("Zhaoxin TertiaryExec Cntl = 0x%016x\n",
vmcs_read32(ZX_TERTIARY_VM_EXEC_CONTROL));
pr_err("ZXPAUSE Saved TSC = 0x%016llx\n", vmcs_read64(ZXPAUSE_VMEXIT_TSC));
}

pr_err("VMCS %p, last attempted VM-entry on CPU %d\n",
vmx->loaded_vmcs->vmcs, vcpu->arch.last_vmentry_cpu);
pr_err("*** Guest State ***\n");
Expand Down Expand Up @@ -7782,6 +7835,11 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
vmcs_set_secondary_exec_control(vmx,
vmx_secondary_exec_control(vmx));

if (zx_ext_vmcs_cap & MSR_ZX_VMCS_EXEC_CTL3) {
zx_tertiary_exec_controls_set(vmx, vmx_zx_tertiary_exec_control(vmx));
zx_vmexit_tsc_controls_set(vmx, 0);
}

if (guest_can_use(vcpu, X86_FEATURE_VMX))
vmx->msr_ia32_feature_control_valid_bits |=
FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
Expand Down Expand Up @@ -7932,6 +7990,9 @@ static __init void vmx_set_cpu_caps(void)

if (cpu_has_vmx_waitpkg())
kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);

if (cpu_has_vmx_zxpause())
kvm_cpu_cap_check_and_set(X86_FEATURE_ZXPAUSE);
}

static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
Expand Down Expand Up @@ -8455,6 +8516,10 @@ static __init int hardware_setup(void)
unsigned long host_bndcfgs;
struct desc_ptr dt;
int r;
u32 ign;

/* Caches Zhaoxin extend VMCS capabilities. */
rdmsr_safe(MSR_ZX_EXT_VMCS_CAPS, &zx_ext_vmcs_cap, &ign);

store_idt(&dt);
host_idt_base = dt.address;
Expand Down
20 changes: 18 additions & 2 deletions arch/x86/kvm/vmx/vmx.h
Original file line number Diff line number Diff line change
Expand Up @@ -575,8 +575,17 @@ static inline u8 vmx_get_rvi(void)
SECONDARY_EXEC_ENCLS_EXITING)

#define KVM_REQUIRED_VMX_TERTIARY_VM_EXEC_CONTROL 0
#define KVM_OPTIONAL_VMX_TERTIARY_VM_EXEC_CONTROL \
(TERTIARY_EXEC_IPI_VIRT)
#define KVM_OPTIONAL_VMX_TERTIARY_VM_EXEC_CONTROL (TERTIARY_EXEC_IPI_VIRT)

#define KVM_REQUIRED_VMX_ZX_TERTIARY_VM_EXEC_CONTROL 0
#define KVM_OPTIONAL_VMX_ZX_TERTIARY_VM_EXEC_CONTROL (ZX_TERTIARY_EXEC_GUEST_ZXPAUSE)

/*
* We shouldn't rw zxpause_vmexit_tsc vmcs field in this
* way, try to use another way in the future.
*/
#define KVM_REQUIRED_VMX_ZXPAUSE_VMEXIT_TSC 0
#define KVM_OPTIONAL_VMX_ZXPAUSE_VMEXIT_TSC 1

#define BUILD_CONTROLS_SHADOW(lname, uname, bits) \
static inline void lname##_controls_set(struct vcpu_vmx *vmx, u##bits val) \
Expand Down Expand Up @@ -610,6 +619,8 @@ BUILD_CONTROLS_SHADOW(pin, PIN_BASED_VM_EXEC_CONTROL, 32)
BUILD_CONTROLS_SHADOW(exec, CPU_BASED_VM_EXEC_CONTROL, 32)
BUILD_CONTROLS_SHADOW(secondary_exec, SECONDARY_VM_EXEC_CONTROL, 32)
BUILD_CONTROLS_SHADOW(tertiary_exec, TERTIARY_VM_EXEC_CONTROL, 64)
BUILD_CONTROLS_SHADOW(zx_tertiary_exec, ZX_TERTIARY_VM_EXEC_CONTROL, 32)
BUILD_CONTROLS_SHADOW(zx_vmexit_tsc, ZXPAUSE_VMEXIT_TSC, 64)

/*
* VMX_REGS_LAZY_LOAD_SET - The set of registers that will be updated in the
Expand Down Expand Up @@ -712,6 +723,11 @@ static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx)
SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
}

static inline bool vmx_guest_zxpause_enabled(struct vcpu_vmx *vmx)
{
return zx_tertiary_exec_controls_get(vmx) & ZX_TERTIARY_EXEC_GUEST_ZXPAUSE;
}

static inline bool vmx_need_pf_intercept(struct kvm_vcpu *vcpu)
{
if (!enable_ept)
Expand Down
5 changes: 5 additions & 0 deletions arch/x86/kvm/x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -1462,6 +1462,7 @@ static const u32 msrs_to_save_base[] = {
MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
MSR_IA32_UMWAIT_CONTROL,
MSR_ZX_PAUSE_CONTROL,

MSR_IA32_XFD, MSR_IA32_XFD_ERR,
};
Expand Down Expand Up @@ -7149,6 +7150,10 @@ static void kvm_probe_msr_to_save(u32 msr_index)
if (!kvm_cpu_cap_has(X86_FEATURE_WAITPKG))
return;
break;
case MSR_ZX_PAUSE_CONTROL:
if (!kvm_cpu_cap_has(X86_FEATURE_ZXPAUSE))
return;
break;
case MSR_IA32_RTIT_CTL:
case MSR_IA32_RTIT_STATUS:
if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT))
Expand Down
7 changes: 7 additions & 0 deletions tools/arch/x86/include/asm/msr-index.h
Original file line number Diff line number Diff line change
Expand Up @@ -764,6 +764,13 @@
#define MSR_TMTA_LRTI_READOUT 0x80868018
#define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a

/*
* Zhaoxin extend VMCS capabilities:
* bit 0: exec-cntl3 VMCS field.
*/
#define MSR_ZX_EXT_VMCS_CAPS 0x1675
#define MSR_ZX_VMCS_EXEC_CTL3 BIT(0)

/* Intel defined MSRs. */
#define MSR_IA32_P5_MC_ADDR 0x00000000
#define MSR_IA32_P5_MC_TYPE 0x00000001
Expand Down

0 comments on commit a96fd43

Please sign in to comment.