Skip to content

Commit

Permalink
Support (optionally) emitting debuginfo w.r.t. LLVM source
Browse files Browse the repository at this point in the history
This change adds two new environmental variables:
  - `JULIA_DUMP_IR` - when provided, this is a path that all emitted
    LLVM IR (post-optimization, just before machine code generation)
    will be saved to
  - `JULIA_DEBUGINFO` - when set to "LLVM-IR" this will run an additional
    pass on any emitted functions to rewrite their debuginfo to refer to
    the LLVM source, rather than the Julia source it was generated from

The `debugir` pass that rewrites the debuginfo is vendored from:
  https://github.com/vaivaswatha/debugir. For simplicity, this is just a
copy of the one file that we need for the pass.

Using both of these together allows `gdb` to open the dumped IR and means
you can step through LLVM IR line-by-line, print SSA values, etc. This can
be very useful for debugging segfaults, or issues in codegen.
  • Loading branch information
topolarity committed Dec 10, 2024
1 parent 29a7ce4 commit 59f5c1e
Show file tree
Hide file tree
Showing 8 changed files with 681 additions and 2 deletions.
2 changes: 1 addition & 1 deletion THIRDPARTY.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ for exceptions.

- [crc32c.c](https://stackoverflow.com/questions/17645167/implementing-sse-4-2s-crc32c-in-software) (CRC-32c checksum code by Mark Adler) [[ZLib](https://opensource.org/licenses/Zlib)].
- [LDC](https://github.com/ldc-developers/ldc/blob/master/LICENSE) (for ccall/cfunction ABI definitions) [BSD-3]. The portion of code that Julia uses from LDC is [BSD-3] licensed.
- [LLVM](https://releases.llvm.org/3.9.0/LICENSE.TXT) (for parts of src/disasm.cpp) [UIUC]
- [LLVM](https://releases.llvm.org/3.9.0/LICENSE.TXT) (for parts of src/disasm.cpp and src/llvm-debugir.cpp) [UIUC]
- [NetBSD](https://www.netbsd.org/about/redistribution.html) (for setjmp, longjmp, and strptime implementations on Windows) [BSD-3]
- [Python](https://docs.python.org/3/license.html) (for strtod implementation on Windows) [PSF]
- [FEMTOLISP](https://github.com/JeffBezanson/femtolisp) [BSD-3]
Expand Down
3 changes: 2 additions & 1 deletion src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ RT_LLVMLINK :=
CG_LLVMLINK :=

ifeq ($(JULIACODEGEN),LLVM)
CODEGEN_SRCS := codegen jitlayers aotcompile debuginfo disasm llvm-simdloop \
CODEGEN_SRCS := codegen jitlayers aotcompile debuginfo disasm llvm-simdloop llvm-debugir \
llvm-final-gc-lowering llvm-pass-helpers llvm-late-gc-lowering llvm-ptls \
llvm-lower-handlers llvm-gc-invariant-verifier llvm-propagate-addrspaces \
llvm-multiversioning llvm-alloc-opt llvm-alloc-helpers cgmemmgr llvm-remove-addrspaces \
Expand Down Expand Up @@ -327,6 +327,7 @@ $(build_shlibdir)/libllvmcalltest.$(SHLIB_EXT): $(SRCDIR)/llvm-codegen-shared.h
$(BUILDDIR)/llvm-alloc-helpers.o $(BUILDDIR)/llvm-alloc-helpers.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-alloc-helpers.h
$(BUILDDIR)/llvm-alloc-opt.o $(BUILDDIR)/llvm-alloc-opt.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-alloc-helpers.h
$(BUILDDIR)/llvm-cpufeatures.o $(BUILDDIR)/llvm-cpufeatures.dbg.obj: $(SRCDIR)/jitlayers.h
$(BUILDDIR)/llvm-debugir.o $(BUILDDIR)/llvm-debugir.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h
$(BUILDDIR)/llvm-demote-float16.o $(BUILDDIR)/llvm-demote-float16.dbg.obj: $(SRCDIR)/jitlayers.h
$(BUILDDIR)/llvm-final-gc-lowering.o $(BUILDDIR)/llvm-final-gc-lowering.dbg.obj: $(SRCDIR)/llvm-gc-interface-passes.h
$(BUILDDIR)/llvm-gc-invariant-verifier.o $(BUILDDIR)/llvm-gc-invariant-verifier.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h
Expand Down
18 changes: 18 additions & 0 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10395,6 +10395,24 @@ extern "C" void jl_init_llvm(void)
#endif
#endif

const char *debuginfo_mode = getenv("JULIA_DEBUGINFO"); // JULIA_DEBUGINFO="LLVM-IR" JULIA_DUMP_IR="path/to/dir"
if (debuginfo_mode) {
if (strcasecmp(debuginfo_mode, "julia-source") == 0)
jl_ExecutionEngine->get_debuginfo_mode() = jl_debuginfo_emission_mode_t::julia_source;
else if (strcasecmp(debuginfo_mode, "llvm-ir") == 0)
jl_ExecutionEngine->get_debuginfo_mode() = jl_debuginfo_emission_mode_t::llvm_ir;
else if (strcmp(debuginfo_mode, "") != 0)
fprintf(stderr, "warning: unexpected argument to 'JULIA_DEBUGINFO' env var: \"%s\"\n", debuginfo_mode);
}

const char *dump_debugir_directory = getenv("JULIA_DUMP_IR");
if (dump_debugir_directory && strcmp(dump_debugir_directory, "") != 0) {
llvm::SmallString<PATH_MAX> AbsoluteFileName{};
llvm::sys::fs::expand_tilde(Twine(dump_debugir_directory), AbsoluteFileName);
llvm::sys::fs::make_absolute(AbsoluteFileName);
jl_ExecutionEngine->get_dump_debugir_directory() = AbsoluteFileName.str();
}

cl::PrintOptionValues();
}

Expand Down
72 changes: 72 additions & 0 deletions src/jitlayers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1314,6 +1314,39 @@ static void registerRTDyldJITObject(orc::MaterializationResponsibility &MR,
#endif

namespace {

static std::string createDebugIRName(Module const &M) JL_NOTSAFEPOINT {
std::string path = jl_ExecutionEngine->get_dump_debugir_directory();
if (!path.empty()) {
path += llvm::sys::path::get_separator();
}

std::string filename{};
for (auto &F : M.functions()) {
if (F.isDeclaration() || F.getName().starts_with("jfptr_"))
continue;

// Sanitize the filename by allowing only "[a-zA-Z0-9_\-\.]*"
filename = F.getName().str();
std::replace_if(std::begin(filename), std::end(filename),[](const char &ch) {
return !(std::isalnum(ch) || ch == '_' || ch == '-' || ch == '.');
}, '_');

break;
}

// If we couldn't find a name to use, just use some unique integer
if (filename.empty()) {
static _Atomic(uint64_t) counter{1};
filename = std::to_string(jl_atomic_fetch_add_relaxed(&counter, 1));
}

path += filename;
path += ".ll";

return path;
}

static std::unique_ptr<TargetMachine> createTargetMachine() JL_NOTSAFEPOINT {
TargetOptions options = TargetOptions();

Expand Down Expand Up @@ -1509,6 +1542,44 @@ namespace {
JL_TIMING(LLVM_JIT, JIT_Opt);
//Run the optimization
(****PMs[PoolIdx]).run(M);

bool debug_ir = jl_ExecutionEngine->get_debuginfo_mode() == jl_debuginfo_emission_mode_t::llvm_ir;
bool dump_ir = !jl_ExecutionEngine->get_dump_debugir_directory().empty();
if (!M.functions().empty() && (debug_ir || dump_ir)) {

// Generate a debug filename for the emitted IR
std::string debug_name = createDebugIRName(M);

// If requested, rewrite all debuginfo to reference the LLVM IR itself
std::unique_ptr<Module> displayM;
if (debug_ir) {
// displayM is the debug-stripped 'source' that the debuginfo now refers to
displayM = debugir::createDebugInfo(M, "", debug_name);
}

// Emit the IR that was compiled
if (dump_ir) {
std::error_code EC;
raw_fd_ostream OS_dbg(debug_name, EC, sys::fs::OF_Text);
if (displayM) {
displayM->print(OS_dbg, nullptr);
} else {
M.print(OS_dbg, nullptr);
}

// Emit the "instrumented" IR (unneeded unless you are debugging the debuginfo
// or running the instrumented IR in isolation)
if (0 && displayM) {
// Replace ".ll" suffix with ".dbg.ll"
debug_name.resize(debug_name.size() - 3);
debug_name += ".dbg.ll";
std::error_code EC;
raw_fd_ostream OS_dbg(debug_name, EC, sys::fs::OF_Text);
M.print(OS_dbg, nullptr);
}
}
}

assert(!verifyLLVMIR(M));
}

Expand Down Expand Up @@ -1898,6 +1969,7 @@ JuliaOJIT::JuliaOJIT()
JD(ES.createBareJITDylib("JuliaOJIT")),
ExternalJD(ES.createBareJITDylib("JuliaExternal")),
DLSymOpt(std::make_unique<DLSymOptimizer>(false)),
debuginfo_mode(jl_debuginfo_emission_mode_t::julia_source),
#ifdef JL_USE_JITLINK
MemMgr(createJITLinkMemoryManager()),
ObjectLayer(ES, *MemMgr),
Expand Down
32 changes: 32 additions & 0 deletions src/jitlayers.h
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,28 @@ using CompilerResultT = Expected<std::unique_ptr<llvm::MemoryBuffer>>;
using OptimizerResultT = Expected<orc::ThreadSafeModule>;
using SharedBytesT = StringSet<MaxAlignedAllocImpl<sizeof(StringSet<>::MapEntryTy)>>;

enum class jl_debuginfo_emission_mode_t {

// Source-referenced debuginfo (standard behavior)
//
// Preserves and emits any debuginfo in the IR from Julia source.
julia_source = 0,

// Julia IR-referenced debuginfo
//
// Replaces all debuginfo with references to the Julia SSAIR itself (treating the IR code as
// the 'program source'). Emits a text copy of all emitted IR to 'dump_debugir_directory' so
// the IR source is available when using a debugger.
/* julia_ir, not supported (yet) */

// LLVM IR-referenced debuginfo
//
// Replaces all debuginfo with references to the LLVM IR itself (treating the LLVM IR as the
// 'program source'). Emits a text copy of all emitted IR to 'dump_debugir_directory' so the
// IR source is available when using a debugger.
llvm_ir,
};

class JuliaOJIT {
private:
// any verification the user wants to do when adding an OwningResource to the pool
Expand Down Expand Up @@ -582,9 +604,16 @@ class JuliaOJIT {
jl_locked_stream &get_dump_llvm_opt_stream() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER {
return dump_llvm_opt_stream;
}
std::string &get_dump_debugir_directory() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER {
return dump_debugir_directory;
}
std::string getMangledName(StringRef Name) JL_NOTSAFEPOINT;
std::string getMangledName(const GlobalValue *GV) JL_NOTSAFEPOINT;

jl_debuginfo_emission_mode_t &get_debuginfo_mode() JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER {
return debuginfo_mode;
}

// Note that this is a potential safepoint due to jl_get_library_ and jl_dlsym calls
// but may be called from inside safe-regions due to jit compilation locks
void optimizeDLSyms(Module &M) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER;
Expand All @@ -610,6 +639,9 @@ class JuliaOJIT {
jl_locked_stream dump_emitted_mi_name_stream;
jl_locked_stream dump_compiles_stream;
jl_locked_stream dump_llvm_opt_stream;
std::string dump_debugir_directory;

jl_debuginfo_emission_mode_t debuginfo_mode;

std::mutex llvm_printing_mutex{};
SmallVector<std::function<void()>, 0> PrintLLVMTimers;
Expand Down
9 changes: 9 additions & 0 deletions src/llvm-codegen-shared.h
Original file line number Diff line number Diff line change
Expand Up @@ -527,3 +527,12 @@ void ConstantUses<U>::forward()
}
}
}

namespace debugir {

// Attaches debug info to M, assuming it is parsed from Directory/Filename.
// Returns a module for display in debugger devoid of any debug info.
std::unique_ptr<llvm::Module>
createDebugInfo(llvm::Module &M, std::string Directory, std::string Filename);

} // namespace debugir
Loading

0 comments on commit 59f5c1e

Please sign in to comment.