From 58bb414699157b41b77880e02f1d5290b72eda78 Mon Sep 17 00:00:00 2001 From: Marcos Paulo de Souza Date: Tue, 23 Jul 2024 10:31:29 -0300 Subject: [PATCH 1/2] Drop klpe_ prefix for extern function symbols Signed-off-by: Marcos Paulo de Souza --- libcextract/SymbolExternalizer.cpp | 27 +++++++++++++++++---------- libcextract/SymbolExternalizer.hh | 17 +++++++++++++++++ testsuite/linux/ibt-1.c | 2 +- testsuite/linux/ibt-2.c | 2 +- testsuite/linux/ibt-typeof-1.c | 4 ++-- 5 files changed, 38 insertions(+), 14 deletions(-) diff --git a/libcextract/SymbolExternalizer.cpp b/libcextract/SymbolExternalizer.cpp index 26f04d7..aefb5f1 100644 --- a/libcextract/SymbolExternalizer.cpp +++ b/libcextract/SymbolExternalizer.cpp @@ -121,23 +121,27 @@ class ExternalizerVisitor: public RecursiveASTVisitor ExternalizationType type = sym->ExtType; if (type == ExternalizationType::STRONG) { if (!sym->Done) { + /* If we are dealing with a symbol that is a function, and we are + * extracting code with IBT set, do not change the function now with a + * prefix. This helps to reduce the number of expanded headers, and also + * helps to match the code extracted with teh original source code. + */ std::string sym_name = decl->getName().str(); + const std::string new_name = SE.Ibt ? sym_name + : EXTERNALIZED_PREFIX + sym_name; + DeclaratorDecl *new_decl = SE.Create_Externalized_Var(decl, new_name); /* If we found the first instance of the function we want to externalize, then proceed to annotate the Decl so we can later decide what to do with it. */ - const std::string new_name = EXTERNALIZED_PREFIX + sym_name; - DeclaratorDecl *new_decl = SE.Create_Externalized_Var(decl, new_name); sym->NewName = new_name; sym->OldDecl = decl; sym->NewDecl = new_decl; - SE.Log.push_back({.OldName = sym_name, - .NewName = new_name, - .Type = type}); - - sym->Done = true; sym->Wrap = !SE.Ibt; + SE.Log.push_back({.OldName = sym_name, + .NewName = new_name, + .Type = type}); } } else if (type == ExternalizationType::WEAK) { /* Now checks if this is a function or a variable delcaration. */ @@ -224,7 +228,7 @@ class ExternalizerVisitor: public RecursiveASTVisitor /* We must be careful to ensure that the reference we got is actually written cleanly, e.g. it doesn't come from a macro expansion. */ - if (sym_name == PrettyPrint::Get_Source_Text(range)) { + if (sym_name == PrettyPrint::Get_Source_Text(range) && sym->Needs_Sym_Rename()) { /* Issue a text modification. */ SE.Replace_Text(range, sym->getUseName(), 100); } @@ -594,6 +598,8 @@ VarDecl *SymbolExternalizer::Create_Externalized_Var(DeclaratorDecl *decl, const sc ); + assert(ret); + /* return node. */ return ret; } @@ -722,7 +728,7 @@ void SymbolExternalizer::Rewrite_Macros(void) if (!maybe_macro && !MacroWalker::Is_Identifier_Macro_Argument(info, id_info)) { SymbolUpdateStatus *sym = getSymbolsUpdateStatus(id_info->getName()); - if (sym) + if (sym && sym->Needs_Sym_Rename()) Replace_Text(SourceRange(tok.getLocation(), tok.getLastLoc()), sym->getUseName(), 10); } } @@ -734,7 +740,8 @@ void SymbolExternalizer::Rewrite_Macros(void) for (auto &tok_range : ranges) { // At this point, tok_range will contain a valid symbol SymbolUpdateStatus *sym = getSymbolsUpdateStatus(tok_range.first); - Replace_Text(tok_range.second, sym->getUseName(), 10); + if (sym->Needs_Sym_Rename()) + Replace_Text(tok_range.second, sym->getUseName(), 10); } } } diff --git a/libcextract/SymbolExternalizer.hh b/libcextract/SymbolExternalizer.hh index 5840b15..3507d64 100644 --- a/libcextract/SymbolExternalizer.hh +++ b/libcextract/SymbolExternalizer.hh @@ -100,6 +100,23 @@ struct SymbolUpdateStatus return OldDecl && NewDecl && FirstUse; } + /* For IBT, NewDecl and OldDecl are the same if the symbols is a function, so + don't replace text if the name didn't change. */ + inline bool Needs_Sym_Rename(void) { + if (NewDecl == nullptr) + return true; + + return NewDecl->getName() != OldDecl->getName(); + /* + llvm::outs() << "XX OldName" << "\n"; + llvm::outs() << OldDecl->getName() << "\n"; + llvm::outs() << "XX NewName print" << "\n"; + NewDecl->print(llvm::outs(), 0); + llvm::outs() << "XX NewName name" << "\n"; + llvm::outs() << NewDecl->getName() << "\n"; + */ + } + void Dump(SourceManager &SM) { llvm::outs() << "SymbolUpdateStatus at 0x" << this << '\n'; diff --git a/testsuite/linux/ibt-1.c b/testsuite/linux/ibt-1.c index bbe9ee9..6d01d8a 100644 --- a/testsuite/linux/ibt-1.c +++ b/testsuite/linux/ibt-1.c @@ -14,6 +14,6 @@ int f(void) return 0; } -/* { dg-final { scan-tree-dump "u32 klpe_crc32c|u32 \(klpe_crc32c\)" } } */ +/* { dg-final { scan-tree-dump "u32 \(crc32c\)\(" } } */ /* { dg-final { scan-tree-dump "KLP_RELOC_SYMBOL\(libcrc32c, libcrc32c, crc32c\)" } } */ /* { dg-final { scan-tree-dump-not "\(\*klpe_crc32c\)" } } */ diff --git a/testsuite/linux/ibt-2.c b/testsuite/linux/ibt-2.c index bbe9ee9..6d01d8a 100644 --- a/testsuite/linux/ibt-2.c +++ b/testsuite/linux/ibt-2.c @@ -14,6 +14,6 @@ int f(void) return 0; } -/* { dg-final { scan-tree-dump "u32 klpe_crc32c|u32 \(klpe_crc32c\)" } } */ +/* { dg-final { scan-tree-dump "u32 \(crc32c\)\(" } } */ /* { dg-final { scan-tree-dump "KLP_RELOC_SYMBOL\(libcrc32c, libcrc32c, crc32c\)" } } */ /* { dg-final { scan-tree-dump-not "\(\*klpe_crc32c\)" } } */ diff --git a/testsuite/linux/ibt-typeof-1.c b/testsuite/linux/ibt-typeof-1.c index e7ff0db..80ccb07 100644 --- a/testsuite/linux/ibt-typeof-1.c +++ b/testsuite/linux/ibt-typeof-1.c @@ -15,7 +15,7 @@ int f(void) return 0; } -/* { dg-final { scan-tree-dump "u32 klpe_crc32c|u32 \(klpe_crc32c\)" } } */ +/* { dg-final { scan-tree-dump "u32 \(crc32c\)\(" } } */ /* { dg-final { scan-tree-dump "KLP_RELOC_SYMBOL\(libcrc32c, libcrc32c, crc32c\)" } } */ -/* { dg-final { scan-tree-dump "typeof\(klpe_crc32c\)" } } */ +/* { dg-final { scan-tree-dump "typeof\(crc32c\)" } } */ /* { dg-final { scan-tree-dump-not "\(\*klpe_crc32c\)" } } */ From a6408630a80797da3e7fb97f6a045cb09597c33b Mon Sep 17 00:00:00 2001 From: Marcos Paulo de Souza Date: Tue, 23 Jul 2024 17:42:38 -0300 Subject: [PATCH 2/2] WIP: IBT externalization no prefix Signed-off-by: Marcos Paulo de Souza --- libcextract/SymbolExternalizer.cpp | 73 +++++++++++++++++++++++++++--- libcextract/SymbolExternalizer.hh | 3 ++ 2 files changed, 69 insertions(+), 7 deletions(-) diff --git a/libcextract/SymbolExternalizer.cpp b/libcextract/SymbolExternalizer.cpp index aefb5f1..3050bd2 100644 --- a/libcextract/SymbolExternalizer.cpp +++ b/libcextract/SymbolExternalizer.cpp @@ -207,7 +207,7 @@ class ExternalizerVisitor: public RecursiveASTVisitor * Only execute the code in the visitor if we have already externalized the * symbol */ - if (sym == nullptr || !sym->Done) + if (sym == nullptr) return VISITOR_CONTINUE; /* Get the first effective use, which means an DeclRefExpr of a decl that @@ -283,6 +283,24 @@ bool SymbolExternalizer::Drop_Static(FunctionDecl *decl) return false; } +bool SymbolExternalizer::Add_Extern(FunctionDecl *decl) +{ + if (decl->isStatic()) { + auto ids = Get_Range_Of_Identifier(decl->getSourceRange(), StringRef("static")); + assert(ids.size() > 0 && "static decl without static keyword?"); + + SourceRange static_range = ids[0].second; + Replace_Text(static_range, "extern", 10); + + /* Update the storage class. */ + decl->setStorageClass(StorageClass::SC_Extern); + + return true; + } + + return false; +} + /* ---- Delta and TextModifications class ------ */ @@ -886,13 +904,54 @@ void SymbolExternalizer::Late_Externalize(void) SE.Remove_Text(sym->OldDecl->getSourceRange(), 1000); } } else { - /* Fallback to the old method of rewriting the declaration. */ - SE.Replace_Text(sym->OldDecl->getSourceRange(), outstr.str(), 1000); + if (SE.Ibt) { + /* Get the last usage (more recent decl), and check if it was the + * first usage. If yes, drop the declartion and use the NewDecl form + * SymbolUpdateStatus. + * + * For functions, drop the declarations and change the later + * prototypes to have the extern storage type. + * + * For variables, remove all references but the first one, because + * this will also be replaced by outstr (NewDecl from + * SymbolUpdateStatus). + */ + Decl *ibt_decl = sym->OldDecl->getMostRecentDecl(); + if (FunctionDecl *func = dyn_cast(ibt_decl)) { + while (func) { + if (!func->getPreviousDecl()) { + SE.Replace_Text(func->getSourceRange(), outstr.str(), 1000); + break; + } + + if (func->doesThisDeclarationHaveABody()) { + SE.Remove_Text(func->getSourceRange(), 1000); + } else { + Add_Extern(func); + } + + func = func->getPreviousDecl(); + } + } else if (VarDecl *var = dyn_cast(ibt_decl)) { + while (var) { + if (!var->getPreviousDecl()) { + SE.Replace_Text(var->getSourceRange(), outstr.str(), 1000); + break; + } + + SE.Remove_Text(var->getSourceRange(), 1000); + var = var->getPreviousDecl(); + } + } + } else { + /* Fallback to the old method of rewriting the declaration. */ + SE.Replace_Text(sym->OldDecl->getSourceRange(), outstr.str(), 1000); - /* Emit a warning for debuging purposes for now. */ - if (AllowLateExternalization) { - std::string msg = "LateLocation of " + sym->OldDecl->getName().str() + " is invalid\n"; - DiagsClass::Emit_Warn(msg); + /* Emit a warning for debuging purposes for now. */ + if (AllowLateExternalization) { + std::string msg = "LateLocation of " + sym->OldDecl->getName().str() + " is invalid\n"; + DiagsClass::Emit_Warn(msg); + } } } } diff --git a/libcextract/SymbolExternalizer.hh b/libcextract/SymbolExternalizer.hh index 3507d64..fe421e1 100644 --- a/libcextract/SymbolExternalizer.hh +++ b/libcextract/SymbolExternalizer.hh @@ -343,6 +343,9 @@ class SymbolExternalizer /* Drop `static` keyword in decl. */ bool Drop_Static(FunctionDecl *decl); + /* Replace `static` keyword in decl with extern. */ + bool Add_Extern(FunctionDecl *decl); + /** Commit changes to the loaded source file buffer. Should NOT modify the original file, only the content that was loaded in llvm's InMemory file system. */