[LLVM] Aliasing and cpu options for LLVM visitor and the benchmark (#…

…686) * Added may-alias and cpu options * Removed CPU checks * Use steady clock as we saw issue on VM Co-authored-by: Ioannis Magkanaris <[email protected]>
BlueBrain · Jun 3, 2021 · 6ec49d7 · 6ec49d7
1 parent 5a67fe8
commit 6ec49d7
Show file tree

Hide file tree

Showing 9 changed files with 103 additions and 123 deletions.
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -177,6 +177,7 @@ jobs:
     condition: false
     env:
       SHELL: 'bash'
+    condition: false
     displayName: 'Build Neuron and Run Integration Tests'
 - job: 'manylinux_wheels'
   timeoutInMinutes: 45

diff --git a/src/codegen/llvm/codegen_llvm_visitor.hpp b/src/codegen/llvm/codegen_llvm_visitor.hpp
@@ -95,14 +95,19 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
                        int vector_width = 1,
                        std::string vec_lib = "none",
                        bool add_debug_information = false,
-                       std::vector<std::string> fast_math_flags = {})
+                       std::vector<std::string> fast_math_flags = {},
+                       bool llvm_assume_alias = false)
         : mod_filename(mod_filename)
         , output_dir(output_dir)
         , opt_level_ir(opt_level_ir)
         , vector_width(vector_width)
         , vector_library(vec_lib)
         , add_debug_information(add_debug_information)
-        , ir_builder(*context, use_single_precision, vector_width, fast_math_flags)
+        , ir_builder(*context,
+                     use_single_precision,
+                     vector_width,
+                     fast_math_flags,
+                     !llvm_assume_alias)
         , debug_builder(*module) {}
 
     /// Dumps the generated LLVM IR module to string.

diff --git a/src/codegen/llvm/llvm_ir_builder.cpp b/src/codegen/llvm/llvm_ir_builder.cpp
@@ -200,12 +200,15 @@ void IRBuilder::set_kernel_attributes() {
     current_function->setDoesNotFreeMemory();
     current_function->setDoesNotThrow();
 
-    // We also want to specify that the pointers that instance struct holds, do not alias. In order
-    // to do that, we add a `noalias` attribute to the argument. As per Clang's specification:
+    // We also want to specify that the pointers that instance struct holds do not alias, unless
+    // specified otherwise. In order to do that, we add a `noalias` attribute to the argument. As
+    // per Clang's specification:
     //  > The `noalias` attribute indicates that the only memory accesses inside function are loads
     //  > and stores from objects pointed to by its pointer-typed arguments, with arbitrary
     //  > offsets.
-    current_function->addParamAttr(0, llvm::Attribute::NoAlias);
+    if (assume_noalias) {
+        current_function->addParamAttr(0, llvm::Attribute::NoAlias);
+    }
 
     // Finally, specify that the struct pointer does not capture and is read-only.
     current_function->addParamAttr(0, llvm::Attribute::NoCapture);

diff --git a/src/codegen/llvm/llvm_ir_builder.hpp b/src/codegen/llvm/llvm_ir_builder.hpp
@@ -58,6 +58,9 @@ class IRBuilder {
     /// The vector width used for the vectorized code.
     unsigned vector_width;
 
+    /// Instance struct fields do not alias.
+    bool assume_noalias;
+
     /// Masked value used to predicate vector instructions.
     llvm::Value* mask;
 
@@ -71,7 +74,8 @@ class IRBuilder {
     IRBuilder(llvm::LLVMContext& context,
               bool use_single_precision = false,
               unsigned vector_width = 1,
-              std::vector<std::string> fast_math_flags = {})
+              std::vector<std::string> fast_math_flags = {},
+              bool assume_noalias = true)
         : builder(context)
         , symbol_table(nullptr)
         , current_function(nullptr)
@@ -81,7 +85,8 @@ class IRBuilder {
         , vector_width(vector_width)
         , mask(nullptr)
         , kernel_id("")
-        , fast_math_flags(fast_math_flags) {}
+        , fast_math_flags(fast_math_flags)
+        , assume_noalias(assume_noalias) {}
 
     /// Transforms the fast math flags provided to the builder into LLVM's representation.
     llvm::FastMathFlags transform_to_fmf(std::vector<std::string>& flags) {

diff --git a/src/main.cpp b/src/main.cpp
@@ -186,6 +186,9 @@ int main(int argc, const char* argv[]) {
     /// run llvm benchmark
     bool run_llvm_benchmark(false);
 
+    /// do not assume that instance struct fields do not alias
+    bool llvm_assume_alias(false);
+
     /// optimisation level for IR generation
     int llvm_opt_level_ir = 0;
 
@@ -201,8 +204,8 @@ int main(int argc, const char* argv[]) {
     /// the number of repeated experiments for the benchmarking
     int num_experiments = 100;
 
-    /// specify the backend for LLVM IR to target
-    std::string backend = "default";
+    /// specify the cpu for LLVM IR to target
+    std::string cpu = "default";
 #endif
 
     app.get_formatter()->column_width(40);
@@ -324,6 +327,9 @@ int main(int argc, const char* argv[]) {
     llvm_opt->add_flag("--single-precision",
                        llvm_float_type,
                        "Use single precision floating-point types ({})"_format(llvm_float_type))->ignore_case();
+    llvm_opt->add_flag("--assume-may-alias",
+                       llvm_assume_alias,
+                       "Assume instance struct fields may alias ({})"_format(llvm_assume_alias))->ignore_case();
     llvm_opt->add_option("--vector-width",
         llvm_vec_width,
         "LLVM explicit vectorisation width ({})"_format(llvm_vec_width))->ignore_case();
@@ -351,9 +357,9 @@ int main(int argc, const char* argv[]) {
     benchmark_opt->add_option("--repeat",
                               num_experiments,
                               "Number of experiments for benchmarking ({})"_format(num_experiments))->ignore_case();
-    benchmark_opt->add_option("--backend",
-                       backend,
-                       "Target's backend ({})"_format(backend))->ignore_case()->check(CLI::IsMember({"avx2", "default", "sse2"}));
+    benchmark_opt->add_option("--cpu",
+                       cpu,
+                       "Target's backend ({})"_format(cpu))->ignore_case();
 #endif
     // clang-format on
 
@@ -664,7 +670,8 @@ int main(int argc, const char* argv[]) {
                                            llvm_vec_width,
                                            vector_library,
                                            !disable_debug_information,
-                                           llvm_fast_math_flags);
+                                           llvm_fast_math_flags,
+                                           llvm_assume_alias);
                 visitor.visit_program(*ast);
                 ast_to_nmodl(*ast, filepath("llvm", "mod"));
                 ast_to_json(*ast, filepath("llvm", "json"));
@@ -677,7 +684,7 @@ int main(int argc, const char* argv[]) {
                                                        shared_lib_paths,
                                                        num_experiments,
                                                        instance_size,
-                                                       backend,
+                                                       cpu,
                                                        llvm_opt_level_ir,
                                                        llvm_opt_level_codegen);
                     benchmark.run(ast);

diff --git a/test/benchmark/jit_driver.cpp b/test/benchmark/jit_driver.cpp
@@ -31,17 +31,30 @@ namespace runner {
 /*                            Utilities for JIT driver                                  */
 /****************************************************************************************/
 
+/// Get the host CPU features in the format:
+///   +feature,+feature,-feature,+feature,...
+/// where `+` indicates that the feature is enabled.
+std::string get_cpu_features(const std::string& cpu) {
+    llvm::SubtargetFeatures features;
+    llvm::StringMap<bool> host_features;
+    if (llvm::sys::getHostCPUFeatures(host_features)) {
+        for (auto& f: host_features)
+            features.AddFeature(f.first(), f.second);
+    }
+    return llvm::join(features.getFeatures().begin(), features.getFeatures().end(), ",");
+}
+
 /// Sets the target triple and the data layout of the module.
-static void set_triple_and_data_layout(llvm::Module& module, const std::string& features) {
+static void set_triple_and_data_layout(llvm::Module& module, const std::string& cpu) {
     // Get the default target triple for the host.
     auto target_triple = llvm::sys::getDefaultTargetTriple();
     std::string error_msg;
     auto* target = llvm::TargetRegistry::lookupTarget(target_triple, error_msg);
     if (!target)
         throw std::runtime_error("Error " + error_msg + "\n");
 
-    // Get the CPU information and set a target machine to create the data layout.
-    std::string cpu(llvm::sys::getHostCPUName());
+    // Set a target machine to create the data layout.
+    std::string features = get_cpu_features(cpu);
     std::unique_ptr<llvm::TargetMachine> tm(
         target->createTargetMachine(target_triple, cpu, features, {}, {}));
     if (!tm)
@@ -52,10 +65,10 @@ static void set_triple_and_data_layout(llvm::Module& module, const std::string&
     module.setTargetTriple(target_triple);
 }
 
-/// Creates llvm::TargetMachine with certain CPU features turned on/off.
+/// Creates llvm::TargetMachine with for a specified CPU.
 static std::unique_ptr<llvm::TargetMachine> create_target(
     llvm::orc::JITTargetMachineBuilder* tm_builder,
-    const std::string& features,
+    const std::string& cpu,
     int opt_level) {
     // First, look up the target.
     std::string error_msg;
@@ -66,8 +79,8 @@ static std::unique_ptr<llvm::TargetMachine> create_target(
 
     // Create default target machine with provided features.
     auto tm = target->createTargetMachine(target_triple,
-                                          llvm::sys::getHostCPUName().str(),
-                                          features,
+                                          cpu,
+                                          get_cpu_features(cpu),
                                           tm_builder->getOptions(),
                                           tm_builder->getRelocationModel(),
                                           tm_builder->getCodeModel(),
@@ -83,15 +96,13 @@ static std::unique_ptr<llvm::TargetMachine> create_target(
 /*                                      JIT driver                                      */
 /****************************************************************************************/
 
-void JITDriver::init(std::string features,
-                     std::vector<std::string> lib_paths,
-                     BenchmarkInfo* benchmark_info) {
+void JITDriver::init(const std::string& cpu, BenchmarkInfo* benchmark_info) {
     llvm::InitializeNativeTarget();
     llvm::InitializeNativeTargetAsmPrinter();
     utils::initialise_optimisation_passes();
 
     // Set the target triple and the data layout for the module.
-    set_triple_and_data_layout(*module, features);
+    set_triple_and_data_layout(*module, cpu);
     auto data_layout = module->getDataLayout();
 
     // If benchmarking, enable listeners to use GDB, perf or VTune. Note that LLVM should be built
@@ -120,24 +131,26 @@ void JITDriver::init(std::string features,
         if (intel_event_listener)
             layer->registerJITEventListener(*intel_event_listener);
 
-        for (const auto& lib_path: lib_paths) {
-            // For every library path, create a corresponding memory buffer.
-            auto memory_buffer = llvm::MemoryBuffer::getFile(lib_path);
-            if (!memory_buffer)
-                throw std::runtime_error("Unable to create memory buffer for " + lib_path);
-
-            // Create a new JIT library instance for this session and resolve symbols.
-            auto& jd = session.createBareJITDylib(std::string(lib_path));
-            auto loaded =
-                llvm::orc::DynamicLibrarySearchGenerator::Load(lib_path.data(),
-                                                               data_layout.getGlobalPrefix());
-
-            if (!loaded)
-                throw std::runtime_error("Unable to load " + lib_path);
-            jd.addGenerator(std::move(*loaded));
-            cantFail(layer->add(jd, std::move(*memory_buffer)));
+        // If benchmarking, resolve shared libraries.
+        if (benchmark_info) {
+            for (const auto& lib_path: benchmark_info->shared_lib_paths) {
+                // For every library path, create a corresponding memory buffer.
+                auto memory_buffer = llvm::MemoryBuffer::getFile(lib_path);
+                if (!memory_buffer)
+                    throw std::runtime_error("Unable to create memory buffer for " + lib_path);
+
+                // Create a new JIT library instance for this session and resolve symbols.
+                auto& jd = session.createBareJITDylib(std::string(lib_path));
+                auto loaded =
+                    llvm::orc::DynamicLibrarySearchGenerator::Load(lib_path.data(),
+                                                                   data_layout.getGlobalPrefix());
+
+                if (!loaded)
+                    throw std::runtime_error("Unable to load " + lib_path);
+                jd.addGenerator(std::move(*loaded));
+                cantFail(layer->add(jd, std::move(*memory_buffer)));
+            }
         }
-
         return layer;
     };
 
@@ -146,7 +159,7 @@ void JITDriver::init(std::string features,
         -> llvm::Expected<std::unique_ptr<llvm::orc::IRCompileLayer::IRCompiler>> {
         // Create target machine with some features possibly turned off.
         int opt_level_codegen = benchmark_info ? benchmark_info->opt_level_codegen : 0;
-        auto tm = create_target(&tm_builder, features, opt_level_codegen);
+        auto tm = create_target(&tm_builder, cpu, opt_level_codegen);
 
         // Optimise the LLVM IR module and save it to .ll file if benchmarking.
         if (benchmark_info) {

diff --git a/test/benchmark/jit_driver.hpp b/test/benchmark/jit_driver.hpp
@@ -17,6 +17,7 @@
 
 #include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/ExecutionEngine/Orc/LLJIT.h"
+#include "llvm/Support/Host.h"
 
 namespace nmodl {
 namespace runner {
@@ -29,6 +30,9 @@ struct BenchmarkInfo {
     /// Object file output directory.
     std::string output_dir;
 
+    /// Shared libraries' paths to link against.
+    std::vector<std::string> shared_lib_paths;
+
     /// Optimisation level for IT.
     int opt_level_ir;
 
@@ -63,9 +67,7 @@ class JITDriver {
         : module(std::move(m)) {}
 
     /// Initializes the JIT driver.
-    void init(std::string features = "",
-              std::vector<std::string> lib_paths = {},
-              BenchmarkInfo* benchmark_info = nullptr);
+    void init(const std::string& cpu, BenchmarkInfo* benchmark_info = nullptr);
 
     /// Lookups the entry-point without arguments in the JIT and executes it, returning the result.
     template <typename ReturnType>
@@ -131,7 +133,7 @@ class TestRunner: public BaseRunner {
         : BaseRunner(std::move(m)) {}
 
     virtual void initialize_driver() {
-        driver->init();
+        driver->init(llvm::sys::getHostCPUName().str());
     }
 };
 
@@ -145,27 +147,23 @@ class BenchmarkRunner: public BaseRunner {
     /// Benchmarking information passed to JIT driver.
     BenchmarkInfo benchmark_info;
 
-    /// CPU features specified by the user.
-    std::string features;
-
-    /// Shared libraries' paths to link against.
-    std::vector<std::string> shared_lib_paths;
+    /// CPU to target.
+    std::string cpu;
 
   public:
     BenchmarkRunner(std::unique_ptr<llvm::Module> m,
                     std::string filename,
                     std::string output_dir,
-                    std::string features = "",
+                    std::string cpu,
                     std::vector<std::string> lib_paths = {},
                     int opt_level_ir = 0,
                     int opt_level_codegen = 0)
         : BaseRunner(std::move(m))
-        , benchmark_info{filename, output_dir, opt_level_ir, opt_level_codegen}
-        , features(features)
-        , shared_lib_paths(lib_paths) {}
+        , cpu(cpu)
+        , benchmark_info{filename, output_dir, lib_paths, opt_level_ir, opt_level_codegen} {}
 
     virtual void initialize_driver() {
-        driver->init(features, shared_lib_paths, &benchmark_info);
+        driver->init(cpu, &benchmark_info);
     }
 };