Skip to content

Commit

Permalink
[LLVM] Aliasing and cpu options for LLVM visitor and the benchmark (#…
Browse files Browse the repository at this point in the history
…686)

* Added may-alias and cpu options
* Removed CPU checks
* Use steady clock as we saw issue on VM

Co-authored-by: Ioannis Magkanaris <[email protected]>
  • Loading branch information
georgemitenkov and iomaganaris authored Jun 3, 2021
1 parent 5a67fe8 commit 6ec49d7
Show file tree
Hide file tree
Showing 9 changed files with 103 additions and 123 deletions.
1 change: 1 addition & 0 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ jobs:
condition: false
env:
SHELL: 'bash'
condition: false
displayName: 'Build Neuron and Run Integration Tests'
- job: 'manylinux_wheels'
timeoutInMinutes: 45
Expand Down
9 changes: 7 additions & 2 deletions src/codegen/llvm/codegen_llvm_visitor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,14 +95,19 @@ class CodegenLLVMVisitor: public visitor::ConstAstVisitor {
int vector_width = 1,
std::string vec_lib = "none",
bool add_debug_information = false,
std::vector<std::string> fast_math_flags = {})
std::vector<std::string> fast_math_flags = {},
bool llvm_assume_alias = false)
: mod_filename(mod_filename)
, output_dir(output_dir)
, opt_level_ir(opt_level_ir)
, vector_width(vector_width)
, vector_library(vec_lib)
, add_debug_information(add_debug_information)
, ir_builder(*context, use_single_precision, vector_width, fast_math_flags)
, ir_builder(*context,
use_single_precision,
vector_width,
fast_math_flags,
!llvm_assume_alias)
, debug_builder(*module) {}

/// Dumps the generated LLVM IR module to string.
Expand Down
9 changes: 6 additions & 3 deletions src/codegen/llvm/llvm_ir_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,12 +200,15 @@ void IRBuilder::set_kernel_attributes() {
current_function->setDoesNotFreeMemory();
current_function->setDoesNotThrow();

// We also want to specify that the pointers that instance struct holds, do not alias. In order
// to do that, we add a `noalias` attribute to the argument. As per Clang's specification:
// We also want to specify that the pointers that instance struct holds do not alias, unless
// specified otherwise. In order to do that, we add a `noalias` attribute to the argument. As
// per Clang's specification:
// > The `noalias` attribute indicates that the only memory accesses inside function are loads
// > and stores from objects pointed to by its pointer-typed arguments, with arbitrary
// > offsets.
current_function->addParamAttr(0, llvm::Attribute::NoAlias);
if (assume_noalias) {
current_function->addParamAttr(0, llvm::Attribute::NoAlias);
}

// Finally, specify that the struct pointer does not capture and is read-only.
current_function->addParamAttr(0, llvm::Attribute::NoCapture);
Expand Down
9 changes: 7 additions & 2 deletions src/codegen/llvm/llvm_ir_builder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ class IRBuilder {
/// The vector width used for the vectorized code.
unsigned vector_width;

/// Instance struct fields do not alias.
bool assume_noalias;

/// Masked value used to predicate vector instructions.
llvm::Value* mask;

Expand All @@ -71,7 +74,8 @@ class IRBuilder {
IRBuilder(llvm::LLVMContext& context,
bool use_single_precision = false,
unsigned vector_width = 1,
std::vector<std::string> fast_math_flags = {})
std::vector<std::string> fast_math_flags = {},
bool assume_noalias = true)
: builder(context)
, symbol_table(nullptr)
, current_function(nullptr)
Expand All @@ -81,7 +85,8 @@ class IRBuilder {
, vector_width(vector_width)
, mask(nullptr)
, kernel_id("")
, fast_math_flags(fast_math_flags) {}
, fast_math_flags(fast_math_flags)
, assume_noalias(assume_noalias) {}

/// Transforms the fast math flags provided to the builder into LLVM's representation.
llvm::FastMathFlags transform_to_fmf(std::vector<std::string>& flags) {
Expand Down
21 changes: 14 additions & 7 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,9 @@ int main(int argc, const char* argv[]) {
/// run llvm benchmark
bool run_llvm_benchmark(false);

/// do not assume that instance struct fields do not alias
bool llvm_assume_alias(false);

/// optimisation level for IR generation
int llvm_opt_level_ir = 0;

Expand All @@ -201,8 +204,8 @@ int main(int argc, const char* argv[]) {
/// the number of repeated experiments for the benchmarking
int num_experiments = 100;

/// specify the backend for LLVM IR to target
std::string backend = "default";
/// specify the cpu for LLVM IR to target
std::string cpu = "default";
#endif

app.get_formatter()->column_width(40);
Expand Down Expand Up @@ -324,6 +327,9 @@ int main(int argc, const char* argv[]) {
llvm_opt->add_flag("--single-precision",
llvm_float_type,
"Use single precision floating-point types ({})"_format(llvm_float_type))->ignore_case();
llvm_opt->add_flag("--assume-may-alias",
llvm_assume_alias,
"Assume instance struct fields may alias ({})"_format(llvm_assume_alias))->ignore_case();
llvm_opt->add_option("--vector-width",
llvm_vec_width,
"LLVM explicit vectorisation width ({})"_format(llvm_vec_width))->ignore_case();
Expand Down Expand Up @@ -351,9 +357,9 @@ int main(int argc, const char* argv[]) {
benchmark_opt->add_option("--repeat",
num_experiments,
"Number of experiments for benchmarking ({})"_format(num_experiments))->ignore_case();
benchmark_opt->add_option("--backend",
backend,
"Target's backend ({})"_format(backend))->ignore_case()->check(CLI::IsMember({"avx2", "default", "sse2"}));
benchmark_opt->add_option("--cpu",
cpu,
"Target's backend ({})"_format(cpu))->ignore_case();
#endif
// clang-format on

Expand Down Expand Up @@ -664,7 +670,8 @@ int main(int argc, const char* argv[]) {
llvm_vec_width,
vector_library,
!disable_debug_information,
llvm_fast_math_flags);
llvm_fast_math_flags,
llvm_assume_alias);
visitor.visit_program(*ast);
ast_to_nmodl(*ast, filepath("llvm", "mod"));
ast_to_json(*ast, filepath("llvm", "json"));
Expand All @@ -677,7 +684,7 @@ int main(int argc, const char* argv[]) {
shared_lib_paths,
num_experiments,
instance_size,
backend,
cpu,
llvm_opt_level_ir,
llvm_opt_level_codegen);
benchmark.run(ast);
Expand Down
71 changes: 42 additions & 29 deletions test/benchmark/jit_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,30 @@ namespace runner {
/* Utilities for JIT driver */
/****************************************************************************************/

/// Get the host CPU features in the format:
/// +feature,+feature,-feature,+feature,...
/// where `+` indicates that the feature is enabled.
std::string get_cpu_features(const std::string& cpu) {
llvm::SubtargetFeatures features;
llvm::StringMap<bool> host_features;
if (llvm::sys::getHostCPUFeatures(host_features)) {
for (auto& f: host_features)
features.AddFeature(f.first(), f.second);
}
return llvm::join(features.getFeatures().begin(), features.getFeatures().end(), ",");
}

/// Sets the target triple and the data layout of the module.
static void set_triple_and_data_layout(llvm::Module& module, const std::string& features) {
static void set_triple_and_data_layout(llvm::Module& module, const std::string& cpu) {
// Get the default target triple for the host.
auto target_triple = llvm::sys::getDefaultTargetTriple();
std::string error_msg;
auto* target = llvm::TargetRegistry::lookupTarget(target_triple, error_msg);
if (!target)
throw std::runtime_error("Error " + error_msg + "\n");

// Get the CPU information and set a target machine to create the data layout.
std::string cpu(llvm::sys::getHostCPUName());
// Set a target machine to create the data layout.
std::string features = get_cpu_features(cpu);
std::unique_ptr<llvm::TargetMachine> tm(
target->createTargetMachine(target_triple, cpu, features, {}, {}));
if (!tm)
Expand All @@ -52,10 +65,10 @@ static void set_triple_and_data_layout(llvm::Module& module, const std::string&
module.setTargetTriple(target_triple);
}

/// Creates llvm::TargetMachine with certain CPU features turned on/off.
/// Creates llvm::TargetMachine with for a specified CPU.
static std::unique_ptr<llvm::TargetMachine> create_target(
llvm::orc::JITTargetMachineBuilder* tm_builder,
const std::string& features,
const std::string& cpu,
int opt_level) {
// First, look up the target.
std::string error_msg;
Expand All @@ -66,8 +79,8 @@ static std::unique_ptr<llvm::TargetMachine> create_target(

// Create default target machine with provided features.
auto tm = target->createTargetMachine(target_triple,
llvm::sys::getHostCPUName().str(),
features,
cpu,
get_cpu_features(cpu),
tm_builder->getOptions(),
tm_builder->getRelocationModel(),
tm_builder->getCodeModel(),
Expand All @@ -83,15 +96,13 @@ static std::unique_ptr<llvm::TargetMachine> create_target(
/* JIT driver */
/****************************************************************************************/

void JITDriver::init(std::string features,
std::vector<std::string> lib_paths,
BenchmarkInfo* benchmark_info) {
void JITDriver::init(const std::string& cpu, BenchmarkInfo* benchmark_info) {
llvm::InitializeNativeTarget();
llvm::InitializeNativeTargetAsmPrinter();
utils::initialise_optimisation_passes();

// Set the target triple and the data layout for the module.
set_triple_and_data_layout(*module, features);
set_triple_and_data_layout(*module, cpu);
auto data_layout = module->getDataLayout();

// If benchmarking, enable listeners to use GDB, perf or VTune. Note that LLVM should be built
Expand Down Expand Up @@ -120,24 +131,26 @@ void JITDriver::init(std::string features,
if (intel_event_listener)
layer->registerJITEventListener(*intel_event_listener);

for (const auto& lib_path: lib_paths) {
// For every library path, create a corresponding memory buffer.
auto memory_buffer = llvm::MemoryBuffer::getFile(lib_path);
if (!memory_buffer)
throw std::runtime_error("Unable to create memory buffer for " + lib_path);

// Create a new JIT library instance for this session and resolve symbols.
auto& jd = session.createBareJITDylib(std::string(lib_path));
auto loaded =
llvm::orc::DynamicLibrarySearchGenerator::Load(lib_path.data(),
data_layout.getGlobalPrefix());

if (!loaded)
throw std::runtime_error("Unable to load " + lib_path);
jd.addGenerator(std::move(*loaded));
cantFail(layer->add(jd, std::move(*memory_buffer)));
// If benchmarking, resolve shared libraries.
if (benchmark_info) {
for (const auto& lib_path: benchmark_info->shared_lib_paths) {
// For every library path, create a corresponding memory buffer.
auto memory_buffer = llvm::MemoryBuffer::getFile(lib_path);
if (!memory_buffer)
throw std::runtime_error("Unable to create memory buffer for " + lib_path);

// Create a new JIT library instance for this session and resolve symbols.
auto& jd = session.createBareJITDylib(std::string(lib_path));
auto loaded =
llvm::orc::DynamicLibrarySearchGenerator::Load(lib_path.data(),
data_layout.getGlobalPrefix());

if (!loaded)
throw std::runtime_error("Unable to load " + lib_path);
jd.addGenerator(std::move(*loaded));
cantFail(layer->add(jd, std::move(*memory_buffer)));
}
}

return layer;
};

Expand All @@ -146,7 +159,7 @@ void JITDriver::init(std::string features,
-> llvm::Expected<std::unique_ptr<llvm::orc::IRCompileLayer::IRCompiler>> {
// Create target machine with some features possibly turned off.
int opt_level_codegen = benchmark_info ? benchmark_info->opt_level_codegen : 0;
auto tm = create_target(&tm_builder, features, opt_level_codegen);
auto tm = create_target(&tm_builder, cpu, opt_level_codegen);

// Optimise the LLVM IR module and save it to .ll file if benchmarking.
if (benchmark_info) {
Expand Down
26 changes: 12 additions & 14 deletions test/benchmark/jit_driver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include "llvm/ExecutionEngine/JITEventListener.h"
#include "llvm/ExecutionEngine/Orc/LLJIT.h"
#include "llvm/Support/Host.h"

namespace nmodl {
namespace runner {
Expand All @@ -29,6 +30,9 @@ struct BenchmarkInfo {
/// Object file output directory.
std::string output_dir;

/// Shared libraries' paths to link against.
std::vector<std::string> shared_lib_paths;

/// Optimisation level for IT.
int opt_level_ir;

Expand Down Expand Up @@ -63,9 +67,7 @@ class JITDriver {
: module(std::move(m)) {}

/// Initializes the JIT driver.
void init(std::string features = "",
std::vector<std::string> lib_paths = {},
BenchmarkInfo* benchmark_info = nullptr);
void init(const std::string& cpu, BenchmarkInfo* benchmark_info = nullptr);

/// Lookups the entry-point without arguments in the JIT and executes it, returning the result.
template <typename ReturnType>
Expand Down Expand Up @@ -131,7 +133,7 @@ class TestRunner: public BaseRunner {
: BaseRunner(std::move(m)) {}

virtual void initialize_driver() {
driver->init();
driver->init(llvm::sys::getHostCPUName().str());
}
};

Expand All @@ -145,27 +147,23 @@ class BenchmarkRunner: public BaseRunner {
/// Benchmarking information passed to JIT driver.
BenchmarkInfo benchmark_info;

/// CPU features specified by the user.
std::string features;

/// Shared libraries' paths to link against.
std::vector<std::string> shared_lib_paths;
/// CPU to target.
std::string cpu;

public:
BenchmarkRunner(std::unique_ptr<llvm::Module> m,
std::string filename,
std::string output_dir,
std::string features = "",
std::string cpu,
std::vector<std::string> lib_paths = {},
int opt_level_ir = 0,
int opt_level_codegen = 0)
: BaseRunner(std::move(m))
, benchmark_info{filename, output_dir, opt_level_ir, opt_level_codegen}
, features(features)
, shared_lib_paths(lib_paths) {}
, cpu(cpu)
, benchmark_info{filename, output_dir, lib_paths, opt_level_ir, opt_level_codegen} {}

virtual void initialize_driver() {
driver->init(features, shared_lib_paths, &benchmark_info);
driver->init(cpu, &benchmark_info);
}
};

Expand Down
Loading

0 comments on commit 6ec49d7

Please sign in to comment.