I have this C code:
#include <inttypes.h>
#include <stdio.h>
uint8_t count1(uint32_t x) {
int out;
__asm {
mov edx, [x]
mov al, 0
next: cmp edx, 0
je done
mov cl, dl
and cl, 1
add al, cl
shr edx, 1
jmp next
done:
mov out, al
}
return out;
}
int main() {
uint32_t x = 0x5789ABCD;
uint8_t cnt = count1(x);
printf("Number of 1s in 0x%X: %hhu\n", x, cnt);
}
I want to write a plugin for LLVM that will transform it behind the scene to something like this:
#include <inttypes.h>
#include <stdio.h>
uint8_t count1(uint32_t x) {
int out;
__asm {
.file 1 "main.c"
.loc 1 7
mov edx, [x]
.loc 1 8
mov al, 0
.loc 1 9
next: cmp edx, 0
.loc 1 10
je done
.loc 1 11
mov cl, dl
.loc 1 12
and cl, 1
.loc 1 13
add al, cl
.loc 1 14
shr edx, 1
.loc 1 15
jmp next
done:
.loc 1 17
mov out, al
}
return out;
}
int main() {
uint32_t x = 0x5789ABCD;
uint8_t cnt = count1(x);
printf("Number of 1s in 0x%X: %hhu\n", x, cnt);
}
This will enable me to signle step debug through the __asm block. Notice the line numbers increment only by 1. That is intentional, because if the plugin modifies the AST, the line numbers in the original file won't change.
I wrote a plugin that can detect these statement, I can compile it and I can compile this program with clang with that plugin enabled.
But when I attempt to call Rewriter.ReplaceText(SourceRange(StartLoc, EndLoc), ModifiedAsm); the plugin segfaults.
I have tried checking if the ranges are valid (they are). I have tried checking if they don't come from a macro expansion (isMacroId() returns 0).
I have narrowed it down to the Rewriter.getRangeSize(..) function. But I have no idea what I'm doing wrong.
This is the relevant part of the plugin:
class MyASTVisitor : public RecursiveASTVisitor<MyASTVisitor> {
public:
explicit MyASTVisitor(ASTContext *Context, Rewriter &R)
: Context(Context), TheRewriter(R) {}
bool VisitStmt(Stmt *S) {
if (auto *Asm = dyn_cast<MSAsmStmt>(S)) {
// Get the assembly string
StringRef AsmString = Asm->getAsmString();
SourceLocation StartLoc = Asm->getBeginLoc();
SourceLocation EndLoc = Asm->getEndLoc();
// This will segfault
bool result = TheRewriter.ReplaceText(SourceRange(StartLoc, EndLoc),
AsmString);
llvm::errs() << "Replace result: " << result << "\n";
}
return true;
}
private:
ASTContext *Context;
Rewriter &TheRewriter;
};
Here is the error:
0. Program arguments: /opt/homebrew/Cellar/llvm@16/16.0.6_1/bin/clang-16 -cc1 -triple x86_64-apple-macosx15.0.0 -Wundef-prefix=TARGET_OS_ -Werror=undef-prefix -Wdeprecated-objc-isa-usage -Werror=deprecated-objc-isa-usage -emit-obj -mrelax-all -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name testfile.c -mrelocation-model pic -pic-level 2 -mframe-pointer=all -ffp-contract=on -fno-rounding-math -funwind-tables=2 -fcompatibility-qualified-id-block-type-checking -fvisibility-inlines-hidden-static-local-var -target-cpu penryn -tune-cpu generic -mllvm -treat-scalable-fixed-error-as-warning -debug-info-kind=standalone -dwarf-version=4 -debugger-tuning=lldb -target-linker-version 1053.12 -fcoverage-compilation-dir=/Users/jurajpetras/dev/asm_debug -resource-dir /opt/homebrew/Cellar/llvm@16/16.0.6_1/lib/clang/16 -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk -internal-isystem /Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk/usr/local/include -internal-isystem /opt/homebrew/Cellar/llvm@16/16.0.6_1/lib/clang/16/include -internal-externc-isystem /Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk/usr/include -O0 -fdebug-compilation-dir=/Users/jurajpetras/dev/asm_debug -ferror-limit 19 -stack-protector 1 -fblocks -fencode-extended-block-signature -fregister-global-dtors-with-atexit -fgnuc-version=4.2.1 -fmax-type-align=16 -fcolor-diagnostics -fasm-blocks -load ./build/libasm_debug.dylib -add-plugin asm_debug -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /var/folders/88/yqhhhgms02vcwxdp_x2dgbp40000gn/T/testfile-0c3ab4.o -x c testfile.c
1. <eof> parser at end of file
Stack dump without symbol names (ensure you have llvm-symbolizer in your PATH or set the environment var `LLVM_SYMBOLIZER_PATH` to point to it):
0 libLLVM.dylib 0x0000000110344b20 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) + 56
1 libLLVM.dylib 0x00000001103439a4 llvm::sys::RunSignalHandlers() + 112
2 libLLVM.dylib 0x00000001103451b4 SignalHandler(int) + 360
3 libsystem_platform.dylib 0x0000000199b08184 _sigtramp + 56
4 libasm_debug.dylib 0x00000001091de914 clang::Rewriter::getRangeSize(clang::CharSourceRange const&, clang::Rewriter::RewriteOptions) const + 212
5 libasm_debug.dylib 0x00000001091deb74 clang::Rewriter::getRangeSize(clang::SourceRange, clang::Rewriter::RewriteOptions) const + 36
6 libasm_debug.dylib 0x0000000108acc49c clang::Rewriter::ReplaceText(clang::SourceRange, llvm::StringRef) + 100
7 libasm_debug.dylib 0x0000000108acc30c (anonymous namespace)::MyASTVisitor::VisitStmt(clang::Stmt*) + 180
8 libasm_debug.dylib 0x0000000108acc22c clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::WalkUpFromStmt(clang::Stmt*) + 36
9 libasm_debug.dylib 0x0000000108acc170 clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::WalkUpFromAsmStmt(clang::AsmStmt*) + 48
10 libasm_debug.dylib 0x0000000108aa3b3c clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::WalkUpFromMSAsmStmt(clang::MSAsmStmt*) + 48
11 libasm_debug.dylib 0x0000000108aa3978 clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::TraverseMSAsmStmt(clang::MSAsmStmt*, llvm::SmallVectorImpl<llvm::PointerIntPair<clang::Stmt*, 1u, bool, llvm::PointerLikeTypeTraits<clang::Stmt*>, llvm::PointerIntPairInfo<clang::Stmt*, 1u, llvm::PointerLikeTypeTraits<clang::Stmt*>>>>*) + 80
12 libasm_debug.dylib 0x0000000108aa1184 clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::dataTraverseNode(clang::Stmt*, llvm::SmallVectorImpl<llvm::PointerIntPair<clang::Stmt*, 1u, bool, llvm::PointerLikeTypeTraits<clang::Stmt*>, llvm::PointerIntPairInfo<clang::Stmt*, 1u, llvm::PointerLikeTypeTraits<clang::Stmt*>>>>*) + 144
13 libasm_debug.dylib 0x0000000108a67288 clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::TraverseStmt(clang::Stmt*, llvm::SmallVectorImpl<llvm::PointerIntPair<clang::Stmt*, 1u, bool, llvm::PointerLikeTypeTraits<clang::Stmt*>, llvm::PointerIntPairInfo<clang::Stmt*, 1u, llvm::PointerLikeTypeTraits<clang::Stmt*>>>>*) + 672
14 libasm_debug.dylib 0x0000000108b4ef78 clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::TraverseFunctionHelper(clang::FunctionDecl*) + 1404
15 libasm_debug.dylib 0x0000000108a61c3c clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::TraverseFunctionDecl(clang::FunctionDecl*) + 128
16 libasm_debug.dylib 0x0000000108a58914 clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::TraverseDecl(clang::Decl*) + 2852
17 libasm_debug.dylib 0x0000000108ae473c clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::TraverseDeclContextHelper(clang::DeclContext*) + 216
18 libasm_debug.dylib 0x0000000108a6684c clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::TraverseTranslationUnitDecl(clang::TranslationUnitDecl*) + 564
19 libasm_debug.dylib 0x0000000108a58f14 clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::TraverseDecl(clang::Decl*) + 4388
20 libasm_debug.dylib 0x0000000108a57d8c (anonymous namespace)::MyASTConsumer::HandleTranslationUnit(clang::ASTContext&) + 52
21 libclang-cpp.dylib 0x00000001066383a8 clang::MultiplexConsumer::HandleTranslationUnit(clang::ASTContext&) + 52
22 libclang-cpp.dylib 0x0000000104911fa8 clang::ParseAST(clang::Sema&, bool, bool) + 752
23 libclang-cpp.dylib 0x00000001065ff750 clang::FrontendAction::Execute() + 112
24 libclang-cpp.dylib 0x0000000106582da8 clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) + 868
25 libclang-cpp.dylib 0x0000000106677440 clang::ExecuteCompilerInvocation(clang::CompilerInstance*) + 524
26 clang-16 0x00000001004c2ca4 cc1_main(llvm::ArrayRef<char const*>, char const*, void*) + 1464
27 clang-16 0x00000001004bfb50 ExecuteCC1Tool(llvm::SmallVectorImpl<char const*>&) + 948
28 clang-16 0x00000001004beddc clang_main(int, char**) + 11192
29 dyld 0x0000000199750274 start + 2840
clang-16: error: unable to execute command: Segmentation fault: 11
I can read the AST however I want. I can print whatever I want. The segfault only happens when the I call the Rewriter.
I am using M2 MacBook Pro and compiling like this:
clang -Xclang -load -Xclang ./build/libasm_debug.dylib -Xclang -add-plugin -Xclang asm_debug testfile.c -target x86_64-apple-macos -fasm-blocks -g -O0
The whole code can be found here: https://github.com/Hackder/asm_debug
Is my approach even correct? If so, why is it hitting a segfault?
Minimum reproducible example:
src/main.cpp:
#include "clang/AST/ASTConsumer.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Expr.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/AST/Stmt.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Frontend/FrontendPluginRegistry.h"
#include "clang/Rewrite/Core/Rewriter.h"
#include "clang/Rewrite/Frontend/Rewriters.h"
#include "clang/Sema/Sema.h"
#include "llvm/Support/raw_ostream.h"
using namespace clang;
namespace {
class MyASTVisitor : public RecursiveASTVisitor<MyASTVisitor> {
public:
explicit MyASTVisitor(ASTContext *Context, Rewriter &R)
: Context(Context), TheRewriter(R) {}
bool VisitStmt(Stmt *S) {
if (auto *Asm = dyn_cast<MSAsmStmt>(S)) {
// Get the assembly string
StringRef AsmString = Asm->getAsmString();
SourceLocation StartLoc = Asm->getBeginLoc();
SourceLocation EndLoc = Asm->getEndLoc();
// This will segfault
bool result = TheRewriter.ReplaceText(SourceRange(StartLoc, EndLoc),
AsmString);
llvm::errs() << "Replace result: " << result << "\n";
}
return true;
}
private:
ASTContext *Context;
Rewriter &TheRewriter;
};
class MyASTConsumer : public ASTConsumer {
public:
explicit MyASTConsumer(ASTContext *Context, Rewriter R)
: Visitor(Context, R) {}
virtual void HandleTranslationUnit(ASTContext &Context) override {
Visitor.TraverseDecl(Context.getTranslationUnitDecl());
}
private:
MyASTVisitor Visitor;
};
class MyPluginAction : public PluginASTAction {
protected:
std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
llvm::StringRef) override {
TheRewriter.setSourceMgr(CI.getSourceManager(), CI.getLangOpts());
return std::make_unique<MyASTConsumer>(&CI.getASTContext(),
TheRewriter);
}
bool ParseArgs(const CompilerInstance &CI,
const std::vector<std::string> &args) override {
return true;
}
private:
Rewriter TheRewriter;
};
} // namespace
static FrontendPluginRegistry::Add<MyPluginAction>
X("asm_debug", "Inject debug steps into inline assembly");
CMakeLists.txt:
cmake_minimum_required(VERSION 3.13)
project(MyClangPlugin LANGUAGES CXX C)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_OSX_ARCHITECTURES "arm64")
find_package(Clang REQUIRED CONFIG)
include_directories(${CLANG_INCLUDE_DIRS})
find_package(LLVM REQUIRED CONFIG)
list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}")
include(AddLLVM)
add_definitions(${LLVM_DEFINITIONS})
include_directories(${LLVM_INCLUDE_DIRS})
add_library(asm_debug SHARED src/main.cpp)
set_target_properties(asm_debug PROPERTIES
COMPILE_FLAGS "-fno-rtti"
LINK_FLAGS "-shared"
)
target_link_libraries(asm_debug PRIVATE LLVM clang)
target_link_libraries(asm_debug PRIVATE LLVM clangSupport clangFrontend clangAST clangBasic clangRewrite)
Tested with clang 16.0.6 and 19.1.1