From 78126f4efa07b99bc7d7d9d4f134729c965c482c Mon Sep 17 00:00:00 2001 From: Antoni Boucher Date: Wed, 13 Sep 2023 21:12:59 -0400 Subject: [PATCH] Do not always enable avx2 --- src/abi.rs | 1 + src/base.rs | 18 ++++++++------- src/intrinsic/llvm.rs | 1 + src/lib.rs | 52 ++++++++++++++++++++++++++++++------------- 4 files changed, 49 insertions(+), 23 deletions(-) diff --git a/src/abi.rs b/src/abi.rs index 9f6e2f7ff10..813abaac793 100644 --- a/src/abi.rs +++ b/src/abi.rs @@ -3,6 +3,7 @@ use rustc_codegen_ssa::traits::{AbiBuilderMethods, BaseTypeMethods}; use rustc_data_structures::fx::FxHashSet; use rustc_middle::bug; use rustc_middle::ty::Ty; +#[cfg(feature = "master")] use rustc_session::config; use rustc_target::abi::call::{ArgAttributes, CastTarget, FnAbi, PassMode, Reg, RegKind}; diff --git a/src/base.rs b/src/base.rs index 8a75beada1a..12e38bffe59 100644 --- a/src/base.rs +++ b/src/base.rs @@ -1,6 +1,5 @@ use std::collections::HashSet; use std::env; -use std::sync::Arc; use std::time::Instant; use gccjit::{ @@ -8,8 +7,6 @@ use gccjit::{ FunctionType, GlobalKind, }; -#[cfg(feature="master")] -use gccjit::TargetInfo; use rustc_middle::dep_graph; use rustc_middle::ty::TyCtxt; #[cfg(feature="master")] @@ -22,8 +19,7 @@ use rustc_codegen_ssa::traits::DebugInfoMethods; use rustc_session::config::DebugInfo; use rustc_span::Symbol; -#[cfg(not(feature="master"))] -use crate::TargetInfo; +use crate::LockedTargetInfo; use crate::GccContext; use crate::builder::Builder; use crate::context::CodegenCx; @@ -70,7 +66,7 @@ pub fn linkage_to_gcc(linkage: Linkage) -> FunctionType { } } -pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, target_info: Arc) -> (ModuleCodegen, u64) { +pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, target_info: LockedTargetInfo) -> (ModuleCodegen, u64) { let prof_timer = tcx.prof.generic_activity("codegen_module"); let start_time = Instant::now(); @@ -89,7 +85,7 @@ pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, target_info: Arc< // the time we needed for codegenning it. let cost = time_to_codegen.as_secs() * 1_000_000_000 + time_to_codegen.subsec_nanos() as u64; - fn module_codegen(tcx: TyCtxt<'_>, (cgu_name, target_info): (Symbol, Arc)) -> ModuleCodegen { + fn module_codegen(tcx: TyCtxt<'_>, (cgu_name, target_info): (Symbol, LockedTargetInfo)) -> ModuleCodegen { let cgu = tcx.codegen_unit(cgu_name); // Instantiate monomorphizations without filling out definitions yet... let context = Context::default(); @@ -111,7 +107,8 @@ pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, target_info: Arc< // TODO(antoyo): only set on x86 platforms. context.add_command_line_option("-masm=intel"); - let features = ["64", "avxvnni", "bmi", "sse2", "avx", "avx2", "sha", "fma", "fma4", "gfni", "f16c", "aes", "bmi2", "pclmul", "rtm", + // TODO: instead of setting the features manually, set the correct -march flag. + let features = ["64", "avxvnni", "bmi", "sse2", "avx2", "sha", "fma", "fma4", "gfni", "f16c", "aes", "bmi2", "pclmul", "rtm", "vaes", "vpclmulqdq", "xsavec", ]; @@ -119,6 +116,11 @@ pub fn compile_codegen_unit(tcx: TyCtxt<'_>, cgu_name: Symbol, target_info: Arc< add_cpu_feature_flag(feature); } + // NOTE: we always enable AVX because the equivalent of llvm.x86.sse2.cmp.pd in GCC for + // SSE2 is multiple builtins, so we use the AVX __builtin_ia32_cmppd instead. + // FIXME(antoyo): use the proper builtins for llvm.x86.sse2.cmp.pd and similar. + context.add_command_line_option("-mavx"); + for arg in &tcx.sess.opts.cg.llvm_args { context.add_command_line_option(arg); } diff --git a/src/intrinsic/llvm.rs b/src/intrinsic/llvm.rs index f28348380d7..cb070e8267a 100644 --- a/src/intrinsic/llvm.rs +++ b/src/intrinsic/llvm.rs @@ -236,6 +236,7 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc let arg2 = builder.context.new_cast(None, arg2, arg2_type); args = vec![new_args[0], arg2].into(); }, + // These builtins are sent one more argument than needed. "__builtin_prefetch" => { let mut new_args = args.to_vec(); new_args.pop(); diff --git a/src/lib.rs b/src/lib.rs index b330f770597..a85ebfac222 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -73,6 +73,7 @@ mod type_of; use std::any::Any; use std::sync::Arc; +use std::sync::Mutex; #[cfg(not(feature="master"))] use std::sync::atomic::AtomicBool; #[cfg(not(feature="master"))] @@ -135,9 +136,24 @@ impl TargetInfo { } } +#[derive(Clone, Debug)] +pub struct LockedTargetInfo { + info: Arc>, +} + +impl LockedTargetInfo { + fn cpu_supports(&self, feature: &str) -> bool { + self.info.lock().expect("lock").cpu_supports(feature) + } + + fn supports_128bit_int(&self) -> bool { + self.info.lock().expect("lock").supports_128bit_int() + } +} + #[derive(Clone)] pub struct GccCodegenBackend { - target_info: Arc, + target_info: LockedTargetInfo, } impl CodegenBackend for GccCodegenBackend { @@ -146,6 +162,17 @@ impl CodegenBackend for GccCodegenBackend { } fn init(&self, sess: &Session) { + #[cfg(feature="master")] + { + let target_cpu = target_cpu(sess); + + // Get the second TargetInfo with the correct CPU features by setting the arch. + let context = Context::default(); + context.add_command_line_option(&format!("-march={}", target_cpu)); + + *self.target_info.info.lock().expect("lock") = context.get_target_info(); + } + #[cfg(feature="master")] gccjit::set_global_personality_function_name(b"rust_eh_personality\0"); if sess.lto() == Lto::Thin { @@ -161,7 +188,7 @@ impl CodegenBackend for GccCodegenBackend { let _int128_ty = check_context.new_c_type(CType::UInt128t); // NOTE: we cannot just call compile() as this would require other files than libgccjit.so. check_context.compile_to_file(gccjit::OutputKind::Assembler, temp_file.to_str().expect("path to str")); - self.target_info.supports_128bit_integers.store(check_context.get_last_error() == Ok(None), Ordering::SeqCst); + self.target_info.info.lock().expect("lock").supports_128bit_integers.store(check_context.get_last_error() == Ok(None), Ordering::SeqCst); } } @@ -217,7 +244,7 @@ impl ExtraBackendMethods for GccCodegenBackend { } fn compile_codegen_unit(&self, tcx: TyCtxt<'_>, cgu_name: Symbol) -> (ModuleCodegen, u64) { - base::compile_codegen_unit(tcx, cgu_name, Arc::clone(&self.target_info)) + base::compile_codegen_unit(tcx, cgu_name, self.target_info.clone()) } fn target_machine_factory(&self, _sess: &Session, _opt_level: OptLevel, _features: &[String]) -> TargetMachineFactoryFn { @@ -306,23 +333,18 @@ impl WriteBackendMethods for GccCodegenBackend { #[no_mangle] pub fn __rustc_codegen_backend() -> Box { #[cfg(feature="master")] - let target_info = { - // Get the native arch and check whether the target supports 128-bit integers. - let context = Context::default(); - let arch = context.get_target_info().arch().unwrap(); - - // Get the second TargetInfo with the correct CPU features by setting the arch. + let info = { + // Check whether the target supports 128-bit integers. let context = Context::default(); - context.add_command_line_option(&format!("-march={}", arch.to_str().unwrap())); - Arc::new(context.get_target_info()) + Arc::new(Mutex::new(context.get_target_info())) }; #[cfg(not(feature="master"))] - let target_info = Arc::new(TargetInfo { + let info = Arc::new(Mutex::new(TargetInfo { supports_128bit_integers: AtomicBool::new(false), - }); + })); Box::new(GccCodegenBackend { - target_info, + target_info: LockedTargetInfo { info }, }) } @@ -356,7 +378,7 @@ pub fn target_cpu(sess: &Session) -> &str { } } -pub fn target_features(sess: &Session, allow_unstable: bool, target_info: &Arc) -> Vec { +pub fn target_features(sess: &Session, allow_unstable: bool, target_info: &LockedTargetInfo) -> Vec { supported_target_features(sess) .iter() .filter_map(