2

I have this C code:

#include <inttypes.h>
#include <stdio.h>

uint8_t count1(uint32_t x) {
    int out;
    __asm {
        mov edx, [x]
        mov al, 0
next:   cmp edx, 0
        je done
        mov cl, dl
        and cl, 1
        add al, cl
        shr edx, 1
        jmp next
done:
        mov out, al
    }
    return out;
}

int main() {
    uint32_t x = 0x5789ABCD;
    uint8_t cnt = count1(x);
    printf("Number of 1s in 0x%X: %hhu\n", x, cnt);
}

I want to write a plugin for LLVM that will transform it behind the scene to something like this:

#include <inttypes.h>
#include <stdio.h>

uint8_t count1(uint32_t x) {
    int out;
    __asm {
        .file 1 "main.c"
        .loc 1 7
        mov edx, [x]
        .loc 1 8
        mov al, 0
        .loc 1 9
next:   cmp edx, 0
        .loc 1 10
        je done
        .loc 1 11
        mov cl, dl
        .loc 1 12
        and cl, 1
        .loc 1 13
        add al, cl
        .loc 1 14
        shr edx, 1
        .loc 1 15
        jmp next
done:
        .loc 1 17
        mov out, al
    }
    return out;
}

int main() {
    uint32_t x = 0x5789ABCD;
    uint8_t cnt = count1(x);
    printf("Number of 1s in 0x%X: %hhu\n", x, cnt);
}

This will enable me to signle step debug through the __asm block. Notice the line numbers increment only by 1. That is intentional, because if the plugin modifies the AST, the line numbers in the original file won't change.

I wrote a plugin that can detect these statement, I can compile it and I can compile this program with clang with that plugin enabled.

But when I attempt to call Rewriter.ReplaceText(SourceRange(StartLoc, EndLoc), ModifiedAsm); the plugin segfaults.

I have tried checking if the ranges are valid (they are). I have tried checking if they don't come from a macro expansion (isMacroId() returns 0).

I have narrowed it down to the Rewriter.getRangeSize(..) function. But I have no idea what I'm doing wrong.

This is the relevant part of the plugin:

class MyASTVisitor : public RecursiveASTVisitor<MyASTVisitor> {
  public:
    explicit MyASTVisitor(ASTContext *Context, Rewriter &R)
        : Context(Context), TheRewriter(R) {}

    bool VisitStmt(Stmt *S) {
        if (auto *Asm = dyn_cast<MSAsmStmt>(S)) {
            // Get the assembly string
            StringRef AsmString = Asm->getAsmString();

            SourceLocation StartLoc = Asm->getBeginLoc();
            SourceLocation EndLoc = Asm->getEndLoc();

            // This will segfault
            bool result = TheRewriter.ReplaceText(SourceRange(StartLoc, EndLoc),
                                                  AsmString);
            llvm::errs() << "Replace result: " << result << "\n";
        }
        return true;
    }

  private:
    ASTContext *Context;
    Rewriter &TheRewriter;
};

Here is the error:

0.      Program arguments: /opt/homebrew/Cellar/llvm@16/16.0.6_1/bin/clang-16 -cc1 -triple x86_64-apple-macosx15.0.0 -Wundef-prefix=TARGET_OS_ -Werror=undef-prefix -Wdeprecated-objc-isa-usage -Werror=deprecated-objc-isa-usage -emit-obj -mrelax-all -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name testfile.c -mrelocation-model pic -pic-level 2 -mframe-pointer=all -ffp-contract=on -fno-rounding-math -funwind-tables=2 -fcompatibility-qualified-id-block-type-checking -fvisibility-inlines-hidden-static-local-var -target-cpu penryn -tune-cpu generic -mllvm -treat-scalable-fixed-error-as-warning -debug-info-kind=standalone -dwarf-version=4 -debugger-tuning=lldb -target-linker-version 1053.12 -fcoverage-compilation-dir=/Users/jurajpetras/dev/asm_debug -resource-dir /opt/homebrew/Cellar/llvm@16/16.0.6_1/lib/clang/16 -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk -internal-isystem /Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk/usr/local/include -internal-isystem /opt/homebrew/Cellar/llvm@16/16.0.6_1/lib/clang/16/include -internal-externc-isystem /Library/Developer/CommandLineTools/SDKs/MacOSX14.sdk/usr/include -O0 -fdebug-compilation-dir=/Users/jurajpetras/dev/asm_debug -ferror-limit 19 -stack-protector 1 -fblocks -fencode-extended-block-signature -fregister-global-dtors-with-atexit -fgnuc-version=4.2.1 -fmax-type-align=16 -fcolor-diagnostics -fasm-blocks -load ./build/libasm_debug.dylib -add-plugin asm_debug -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /var/folders/88/yqhhhgms02vcwxdp_x2dgbp40000gn/T/testfile-0c3ab4.o -x c testfile.c
1.      <eof> parser at end of file
Stack dump without symbol names (ensure you have llvm-symbolizer in your PATH or set the environment var `LLVM_SYMBOLIZER_PATH` to point to it):
0  libLLVM.dylib            0x0000000110344b20 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) + 56
1  libLLVM.dylib            0x00000001103439a4 llvm::sys::RunSignalHandlers() + 112
2  libLLVM.dylib            0x00000001103451b4 SignalHandler(int) + 360
3  libsystem_platform.dylib 0x0000000199b08184 _sigtramp + 56
4  libasm_debug.dylib       0x00000001091de914 clang::Rewriter::getRangeSize(clang::CharSourceRange const&, clang::Rewriter::RewriteOptions) const + 212
5  libasm_debug.dylib       0x00000001091deb74 clang::Rewriter::getRangeSize(clang::SourceRange, clang::Rewriter::RewriteOptions) const + 36
6  libasm_debug.dylib       0x0000000108acc49c clang::Rewriter::ReplaceText(clang::SourceRange, llvm::StringRef) + 100
7  libasm_debug.dylib       0x0000000108acc30c (anonymous namespace)::MyASTVisitor::VisitStmt(clang::Stmt*) + 180
8  libasm_debug.dylib       0x0000000108acc22c clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::WalkUpFromStmt(clang::Stmt*) + 36
9  libasm_debug.dylib       0x0000000108acc170 clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::WalkUpFromAsmStmt(clang::AsmStmt*) + 48
10 libasm_debug.dylib       0x0000000108aa3b3c clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::WalkUpFromMSAsmStmt(clang::MSAsmStmt*) + 48
11 libasm_debug.dylib       0x0000000108aa3978 clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::TraverseMSAsmStmt(clang::MSAsmStmt*, llvm::SmallVectorImpl<llvm::PointerIntPair<clang::Stmt*, 1u, bool, llvm::PointerLikeTypeTraits<clang::Stmt*>, llvm::PointerIntPairInfo<clang::Stmt*, 1u, llvm::PointerLikeTypeTraits<clang::Stmt*>>>>*) + 80
12 libasm_debug.dylib       0x0000000108aa1184 clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::dataTraverseNode(clang::Stmt*, llvm::SmallVectorImpl<llvm::PointerIntPair<clang::Stmt*, 1u, bool, llvm::PointerLikeTypeTraits<clang::Stmt*>, llvm::PointerIntPairInfo<clang::Stmt*, 1u, llvm::PointerLikeTypeTraits<clang::Stmt*>>>>*) + 144
13 libasm_debug.dylib       0x0000000108a67288 clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::TraverseStmt(clang::Stmt*, llvm::SmallVectorImpl<llvm::PointerIntPair<clang::Stmt*, 1u, bool, llvm::PointerLikeTypeTraits<clang::Stmt*>, llvm::PointerIntPairInfo<clang::Stmt*, 1u, llvm::PointerLikeTypeTraits<clang::Stmt*>>>>*) + 672
14 libasm_debug.dylib       0x0000000108b4ef78 clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::TraverseFunctionHelper(clang::FunctionDecl*) + 1404
15 libasm_debug.dylib       0x0000000108a61c3c clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::TraverseFunctionDecl(clang::FunctionDecl*) + 128
16 libasm_debug.dylib       0x0000000108a58914 clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::TraverseDecl(clang::Decl*) + 2852
17 libasm_debug.dylib       0x0000000108ae473c clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::TraverseDeclContextHelper(clang::DeclContext*) + 216
18 libasm_debug.dylib       0x0000000108a6684c clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::TraverseTranslationUnitDecl(clang::TranslationUnitDecl*) + 564
19 libasm_debug.dylib       0x0000000108a58f14 clang::RecursiveASTVisitor<(anonymous namespace)::MyASTVisitor>::TraverseDecl(clang::Decl*) + 4388
20 libasm_debug.dylib       0x0000000108a57d8c (anonymous namespace)::MyASTConsumer::HandleTranslationUnit(clang::ASTContext&) + 52
21 libclang-cpp.dylib       0x00000001066383a8 clang::MultiplexConsumer::HandleTranslationUnit(clang::ASTContext&) + 52
22 libclang-cpp.dylib       0x0000000104911fa8 clang::ParseAST(clang::Sema&, bool, bool) + 752
23 libclang-cpp.dylib       0x00000001065ff750 clang::FrontendAction::Execute() + 112
24 libclang-cpp.dylib       0x0000000106582da8 clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) + 868
25 libclang-cpp.dylib       0x0000000106677440 clang::ExecuteCompilerInvocation(clang::CompilerInstance*) + 524
26 clang-16                 0x00000001004c2ca4 cc1_main(llvm::ArrayRef<char const*>, char const*, void*) + 1464
27 clang-16                 0x00000001004bfb50 ExecuteCC1Tool(llvm::SmallVectorImpl<char const*>&) + 948
28 clang-16                 0x00000001004beddc clang_main(int, char**) + 11192
29 dyld                     0x0000000199750274 start + 2840
clang-16: error: unable to execute command: Segmentation fault: 11

I can read the AST however I want. I can print whatever I want. The segfault only happens when the I call the Rewriter.

I am using M2 MacBook Pro and compiling like this:

clang -Xclang -load -Xclang ./build/libasm_debug.dylib -Xclang -add-plugin -Xclang asm_debug testfile.c -target x86_64-apple-macos -fasm-blocks -g -O0

The whole code can be found here: https://github.com/Hackder/asm_debug

Is my approach even correct? If so, why is it hitting a segfault?

Minimum reproducible example:

src/main.cpp:

#include "clang/AST/ASTConsumer.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Expr.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/AST/Stmt.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Frontend/FrontendPluginRegistry.h"
#include "clang/Rewrite/Core/Rewriter.h"
#include "clang/Rewrite/Frontend/Rewriters.h"
#include "clang/Sema/Sema.h"
#include "llvm/Support/raw_ostream.h"

using namespace clang;

namespace {

class MyASTVisitor : public RecursiveASTVisitor<MyASTVisitor> {
  public:
    explicit MyASTVisitor(ASTContext *Context, Rewriter &R)
        : Context(Context), TheRewriter(R) {}

    bool VisitStmt(Stmt *S) {
        if (auto *Asm = dyn_cast<MSAsmStmt>(S)) {
            // Get the assembly string
            StringRef AsmString = Asm->getAsmString();

            SourceLocation StartLoc = Asm->getBeginLoc();
            SourceLocation EndLoc = Asm->getEndLoc();

            // This will segfault
            bool result = TheRewriter.ReplaceText(SourceRange(StartLoc, EndLoc),
                                                  AsmString);
            llvm::errs() << "Replace result: " << result << "\n";
        }
        return true;
    }

  private:
    ASTContext *Context;
    Rewriter &TheRewriter;
};

class MyASTConsumer : public ASTConsumer {
  public:
    explicit MyASTConsumer(ASTContext *Context, Rewriter R)
        : Visitor(Context, R) {}

    virtual void HandleTranslationUnit(ASTContext &Context) override {
        Visitor.TraverseDecl(Context.getTranslationUnitDecl());
    }

  private:
    MyASTVisitor Visitor;
};

class MyPluginAction : public PluginASTAction {
  protected:
    std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
                                                   llvm::StringRef) override {
        TheRewriter.setSourceMgr(CI.getSourceManager(), CI.getLangOpts());
        return std::make_unique<MyASTConsumer>(&CI.getASTContext(),
                                               TheRewriter);
    }

    bool ParseArgs(const CompilerInstance &CI,
                   const std::vector<std::string> &args) override {
        return true;
    }

  private:
    Rewriter TheRewriter;
};

} // namespace

static FrontendPluginRegistry::Add<MyPluginAction>
    X("asm_debug", "Inject debug steps into inline assembly");

CMakeLists.txt:

cmake_minimum_required(VERSION 3.13)

project(MyClangPlugin LANGUAGES CXX C)

set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_OSX_ARCHITECTURES "arm64")

find_package(Clang REQUIRED CONFIG)
include_directories(${CLANG_INCLUDE_DIRS})

find_package(LLVM REQUIRED CONFIG)
list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}")
include(AddLLVM)

add_definitions(${LLVM_DEFINITIONS})
include_directories(${LLVM_INCLUDE_DIRS})

add_library(asm_debug SHARED src/main.cpp)
set_target_properties(asm_debug PROPERTIES
    COMPILE_FLAGS "-fno-rtti"
    LINK_FLAGS "-shared"
)

target_link_libraries(asm_debug PRIVATE LLVM clang)
target_link_libraries(asm_debug PRIVATE LLVM clangSupport clangFrontend clangAST clangBasic clangRewrite)

Tested with clang 16.0.6 and 19.1.1

1
  • @ScottMcPeak Thanks for the feedback, I edited the question to include the minimum reproducible example. Commented Oct 14, 2024 at 7:59

1 Answer 1

2

First bug: Passing Rewriter by value

The first bug is here:

class MyASTConsumer : public ASTConsumer {
  public:
    explicit MyASTConsumer(ASTContext *Context, Rewriter R)   // <--- bug
        : Visitor(Context, R) {}

This accepts a Rewriter by value, then passes it as a reference to the constructor of Visitor. Then, R is destroyed, leaving Visitor with a dangling reference to a destroyed object.

To fix this, change R to be a reference:

    explicit MyASTConsumer(ASTContext *Context, Rewriter &R)
                                                         ^ inserted

Second bug: Plugin action gets destroyed early

After fixing the above, the plugin works properly with -plugin, but with -add-plugin, it still crashes at ReplaceText. The cause is that the Clang plugin infrastructure creates and destroys the PluginASTAction objects seemingly haphazardly (certainly nothing about this is explained in the documentation), and in the -add-plugin case, the object used (along with its Rewriter) gets destroyed before the rest of the code runs (whereas it survives with -plugin).

The fix is to not store any data in the PluginASTAction, and instead move that data into the ASTConsumer (or somewhere else that will survive). Here is a diff relative to the original (unfixed) code that solves that problem, also incidentally removing the first bug:

@@ -44,14 +44,16 @@ class MyASTVisitor : public RecursiveASTVisitor<MyASTVisitor> {
 
 class MyASTConsumer : public ASTConsumer {
   public:
-    explicit MyASTConsumer(ASTContext *Context, Rewriter R)
-        : Visitor(Context, R) {}
+    explicit MyASTConsumer(ASTContext *Context)
+        : TheRewriter(Context->getSourceManager(), Context->getLangOpts()),
+          Visitor(Context, TheRewriter) {}
 
     virtual void HandleTranslationUnit(ASTContext &Context) override {
         Visitor.TraverseDecl(Context.getTranslationUnitDecl());
     }
 
   private:
+    Rewriter TheRewriter;
     MyASTVisitor Visitor;
 };
 
@@ -59,18 +61,13 @@ class MyPluginAction : public PluginASTAction {
   protected:
     std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
                                                    llvm::StringRef) override {
-        TheRewriter.setSourceMgr(CI.getSourceManager(), CI.getLangOpts());
-        return std::make_unique<MyASTConsumer>(&CI.getASTContext(),
-                                               TheRewriter);
+        return std::make_unique<MyASTConsumer>(&CI.getASTContext());
     }
 
     bool ParseArgs(const CompilerInstance &CI,
                    const std::vector<std::string> &args) override {
         return true;
     }
-
-  private:
-    Rewriter TheRewriter;
 };
 
 } // namespace

Post-fixes test

With the code in the question, I get a very similar crash to that shown in the question when run on Linux. After applying the above diff, the plugin appears to work as intended (on Linux at least) when provided the test input from the question:

$ g++ -c -o main.o src/main.cpp  -I/home/scott/opt/clang+llvm-16.0.0-x86_64-linux-gnu-ubuntu-18.04/include -std=c++17 -fPIC -fno-rtti -Wall
$ g++ -o rewrite.so main.o  -fPIC -shared
$ rm test.o
$ /home/scott/opt/clang+llvm-16.0.0-x86_64-linux-gnu-ubuntu-18.04/bin/clang -fplugin=./rewrite.so -Xclang -add-plugin -Xclang asm_debug -c test.c -fasm-blocks
Replace result: 0
$ ls test.o
test.o

This sequence confirms that test.o gets created, which is the primary difference between -plugin (which only runs the plugin) and -add-plugin (which runs the plugin along with the rest of the Clang back end).

Sign up to request clarification or add additional context in comments.

6 Comments

Thanks for spotting this, but even with this change I observe the same behavior. It still crashes on the same segfault. I retested everything (valid ranges, macro expansions, clang versions...) and as far as I can tell it behaves the same. I tried creating the rewriter right before the rewrite, but when I do so, the rewrite operation returns false, even though I checked both positions with TheRewriter.isRewritable
@Juraj I get essentially the same crash with the original code, and a successful run with the fix. I don't see how the same crash with the same trace could persist if the fix has been properly applied.
When running clang with -plugin (as you mentioned in your answer), the plugin runs fine. However notice that I have used -add-plugin, because I can't get clang to output anything with the -plugin option. When I run clang as you did: With -c clang exists successfully and nothing is emited. No object file to be found (even with -o ./test.o). Without the -c it fails with: ld: file is empty in '/var/folders/88/yqhhhgms02vcwxdp_x2dgbp40000gn/T/hello_world-a7df91.o'. With -add-plugin, the plugin segfaults.
@Juraj Got it, I overlooked that you were using -add-plugin. There is another bug (although it is debatable whether it is in your code or Clang), and I've edited the answer to address that too.
The Rewriter is meant to be used as part of a source-to-source transform. If you want to make changes to the generated object code without first creating intermediate source code, the usual method is to make an LLVM pass. Gather the needed information from the AST during the AST consumer pass, then during the LLVM pass, use that information to make changes to the LLVM IR.
|

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.