LLVM的MCJIT源码分析

本文的llvm版本,是基于llvm7.1.0的release包添加了一些利于理解的日志,没有改功能,已经上传到github:https://github.com/tedcy/llvm7_test

conanio/gcc5:2.91的镜像版本,使用项目中的build.sh就能编译

类图

RuntimeDyldImpl是RuntimeDyld的PIMPL模式,因此省略了RuntimeDyld

类图里面有让人眼花缭乱的MemMgr和Resolver传递,实际上都是指向的RTDyldMemoryManager

classDiagram
    direction TB

    class EngineBuilder {
        +setEngineKind(kind)
        +setMCJITMemoryManager(RTDyldMemoryManager* mcjmm) MemMgr=mcjmm,Resolver=mcjmm
        +ExecutionEngine* create()
    }

    class ExecutionEngine {
        <<abstract>>
        +virtual addModule(...)
        +virtual finalizeObject()
        +virtual uint64 getFunctionAddress(name)
    }

    class MCJIT {
        +addModule(...) override
        +finalizeObject() override
        +uint64 getFunctionAddress(name) override
    }
    
    class RuntimeDyldImpl {
        <<合并RuntimeDyld>>
        +RuntimeDyldImpl(MemoryManager &MemMgr, \nJITSymbolResolver &Resolver)
        +loadObjectImpl() 加载重定位,先计算全局符号表的函数和变量,后调用processRelocationRef计算重定位的符号\n整个过程在findOrEmitSection处理段,会调用MemoryManager的allocateCodeSection和allocateDataSection分配段
        +virtual processRelocationRef()
        +resolveRelocations() 实施重定位,先调用resolveExternalSymbols处理外部符号,后调用resolveRelocationList处理内部符号
        -resolveRelocationList() 调用resolveRelocation
        -resolveExternalSymbols() 调用Resolver.lookup()
        +virtual resolveRelocation()
    }
    
    class RuntimeDyldELF {
        <<封装了各平台的重定位细节>>
        +resolveRelocation() override
        +processRelocationRef() override
    }
    
    class JITSymbolResolver {
        <<abstract>>
        +virtual lookup()
    }
    
    class LegacyJITSymbolResolver {
        <<abstract>>
        +virtual findSymbolInLogicalDylib()
        +virtual findSymbol()
        +lookup() 调用findSymbolInLogicalDylib和findSymbol
    }
    
    class LinkingSymbolResolver {
        <<代理模式,先查MCJIT,再查持有的LegacyJITSymbolResolver>>
        +findSymbolInLogicalDylib() override
        +findSymbol() override
        +LinkingSymbolResolver(MCJIT &Parent,\n LegacyJITSymbolResolver* Resolver)
        -MCJIT &ParentEngine
        -LegacyJITSymbolResolver* ClientResolver
    }
    
    class MemoryManager {
        <<abstract>>
        +virtual allocateCodeSection()
        +virtual allocateDataSection()
    }
    
    EngineBuilder *-- MCJITMemoryManager: MemMgr
    EngineBuilder *-- LegacyJITSymbolResolver: Resolver
    EngineBuilder ..> ExecutionEngine : create(),这里调用的MCJIT#58;#58;\ncreateJIT(this->MemMgr, this->Resolver)
    
    MCJIT *-- MCJITMemoryManager : MemMgr(MemMgr)
    MCJIT *-- RuntimeDyldImpl : Dyld = RuntimeDyldELF#58;#58;create\n(this->MemMgr, this->Resolver)
    MCJIT *-- LinkingSymbolResolver : Resolver(*this, Resolver)
    
    LegacyJITSymbolResolver <|-- LinkingSymbolResolver
    JITSymbolResolver <|-- LegacyJITSymbolResolver
   
    RuntimeDyldImpl *-- MemoryManager : MemMgr
    RuntimeDyldImpl *-- JITSymbolResolver : Resolver
    
    RuntimeDyldImpl <|-- RuntimeDyldELF 

    ExecutionEngine <|-- MCJIT
    RTDyldMemoryManager <|-- SectionMemoryManager
    MCJITMemoryManager <|-- RTDyldMemoryManager
    LegacyJITSymbolResolver <|-- RTDyldMemoryManager
    MemoryManager <|-- MCJITMemoryManager

demo代码

时序图展示代码在https://github.com/tedcy/llvm7_test/blob/master/demo/engine/main.cpp

核心逻辑就是

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
extern "C" {
int pow2(int val) { return val * val; }
}
int main() {
std::unique_ptr<llvm::Module> module =
llvm::make_unique<llvm::Module>("MyModule", context);

llvm::ExecutionEngine* ee =
llvm::EngineBuilder(move(module))
.setEngineKind(llvm::EngineKind::JIT)
.setMCJITMemoryManager(std::unique_ptr<llvm::RTDyldMemoryManager>(
new llvm::SectionMemoryManager))
.setErrorStr(&errMsg)
.setVerifyModules(true)
.create();

ee->addGlobalMapping("pow2", (uint64_t)&pow2); //添加已知符号到内存表

LLVM_ObjectCache objCache(fileStr); //fileStr是a.o的二进制Object数据
ee->setObjectCache(&objCache); //设置使用预编译的二进制Object数据,不进行JIT编译
ee->finalizeObject(); //加载Object
ee->setObjectCache(nullptr); //设置为空

uint64_t addr = ee->getFunctionAddress("pow4"); //寻找Object中的pow4函数地址

typedef int (*pow4_t)(int);
pow4_t fn = (pow4_t)addr; //函数地址转换成函数指针
auto result = fn(2); //执行函数
cout << result << endl;
return 0;
}

demo时序图

计算重定位

sequenceDiagram
main ->>+ MCJIT : addGlobalMapping(Addr="pow2")
MCJIT -> MCJIT : EEState.getGlobalAddressMap()[Name] = Addr
MCJIT ->>- main : return
main ->>+ MCJIT : setObjectCache(&objCache)
MCJIT -> MCJIT : ObjCache = NewCache
MCJIT ->>- main : return
main ->>+ MCJIT : finalizeObject()
loop each M in MCJIT.OwnedModules
MCJIT ->>+ MCJIT : generateCodeForModule(M)
MCJIT ->>+ MCJIT : 如果缓存存在,使用缓存:<br>if(ObjCache) ObjectToLoad=ObjCache->getObject(M)<br>如果缓存不存在,JIT编译:<br>if (!ObjectToLoad) ObjectToLoad=emitObject(M)<br>从Object文件创建ObjectFile:<br>LoadedObject=createObjectFile(ObjectToLoad->getMemBufferRef())
MCJIT ->>+ RuntimeDyld : Dyld = RuntimeDyldELF::create(MemMgr, Resolver)
RuntimeDyld ->>+ RuntimeDyldELF : Dyld.loadObject(LoadedObject)
RuntimeDyldELF ->>+ RuntimeDyldELF : loadObjectImpl(LoadedObject)
loop for I in [LoadedObject.symbol_begin(),LoadedObject.symbol_end()]
RuntimeDyldELF ->> RuntimeDyldELF : 处理Weak和Common符号(Weak只是把它Weak去掉,Common符号C++没用上)
alt 计算全局符号表的函数和变量存下来<br>if (SymType == ST_Function or ST_Data)
RuntimeDyldELF ->>+ RuntimeDyldELF : findOrEmitSection(I->getSection())
RuntimeDyldELF ->>+ SectionMemoryManager : MemMgr.allocateDataSection()<br> or MemMgr.allocateCodeSection()
SectionMemoryManager ->>- RuntimeDyldELF : return
RuntimeDyldELF ->>- RuntimeDyldELF : return
RuntimeDyldELF ->> RuntimeDyldELF : GlobalSymbolTable[Name]=SymbolTableEntry(I->getSection(), SectOffset)
end
loop each section_iterator Si in [Obj.section_begin(), Obj.section_end()]<br>计算重定位的符号
RuntimeDyldELF ->>+ RuntimeDyldELF : findOrEmitSection(*Si)
RuntimeDyldELF ->>+ SectionMemoryManager : MemMgr.allocateDataSection()<br> or MemMgr.allocateCodeSection()
SectionMemoryManager ->>- RuntimeDyldELF : return
RuntimeDyldELF ->>- RuntimeDyldELF : return
loop each relocation_iterator I in [Si.relocation_begin(), Si.relocation_end()]
RuntimeDyldELF ->>+ RuntimeDyldELF : processRelocationRef()
alt 如果GlobalSymbolTable能找到
RuntimeDyldELF ->> RuntimeDyldELF : Value.SectionID = SymInfo.getSectionID()<br>Value.Offset = SymInfo.getOffset()<br>Value.Addend = SymInfo.getOffset() + Addend
else 如果SymType是Section
RuntimeDyldELF ->> RuntimeDyldELF : Value.SectionID = Symbol->getSection()<br>Value.Addend = Addend
end
alt 如果Arch是Triple::x86_64 且 RelType是最简单类型(例如R_X86_64_64)
RuntimeDyldELF ->>+ RuntimeDyldELF : processSimpleRelocation(...)
alt 如果Value.SymbolName不为空
RuntimeDyldELF ->>+ RuntimeDyldELF : addRelocationForSymbol(...)
alt 如果全局表GlobalSymbolTable找不到这个符号
RuntimeDyldELF ->> RuntimeDyldELF : 建立倒排索引(目标符号名为主键)<br>ExternalSymbolRelocations[Value.SymbolName].push_back(RE);
else 能找到,改下Addend
RuntimeDyldELF ->> RuntimeDyldELF : 建立倒排索引(目标SectionID为主键)<br>Relocations[Value.SectionID].push_back(RECopy);
end
RuntimeDyldELF ->>- RuntimeDyldELF : return
else 如果Value.SymbolName为空
RuntimeDyldELF ->> RuntimeDyldELF : 建立倒排索引(目标SectionID为主键)<br>Relocations[Value.SectionID].push_back(RE);
end
RuntimeDyldELF ->>- RuntimeDyldELF : return
end
RuntimeDyldELF ->>- RuntimeDyldELF : return
end
end
end
RuntimeDyldELF ->>- RuntimeDyldELF : return
RuntimeDyldELF ->>- RuntimeDyld : return
RuntimeDyld ->>- MCJIT : return
MCJIT ->>- MCJIT : return
MCJIT ->>- MCJIT : return
end
MCJIT ->>+ MCJIT : finalizeLoadedModules()省略,下一节展示
MCJIT ->>- MCJIT : return
MCJIT ->>- main : return
main ->>+ MCJIT : getFunctionAddress("pow4")
MCJIT ->>- main : return

应用重定位

sequenceDiagram
main ->>+ MCJIT : finalizeObject()
loop each M in MCJIT.OwnedModules
MCJIT ->>+ MCJIT : generateCodeForModule(M)省略,上一节展示
MCJIT ->>- MCJIT : return
end
MCJIT ->>+ MCJIT : finalizeLoadedModules()
MCJIT ->>+ RuntimeDyldELF : 应用重定位:resolveRelocations()
RuntimeDyldELF ->>+ RuntimeDyldELF : 应用外部符号重定位:resolveExternalSymbols()
RuntimeDyldELF ->> RuntimeDyldELF: NewSymbols = ExternalSymbolRelocations
RuntimeDyldELF ->>+ LegacyJITSymbolResolver : Resolver.lookup(NewSymbols)
loop SymName in NewSymbols
LegacyJITSymbolResolver ->>+ LinkingSymbolResolver : 从本地动态库找(MCJIT)不支持,固定返回nullptr<br>findSymbolInLogicalDylib(SymName)
end
LegacyJITSymbolResolver ->>- RuntimeDyldELF : return
RuntimeDyldELF ->>- RuntimeDyldELF : return
RuntimeDyldELF ->>- MCJIT : return
MCJIT ->>- MCJIT : return
MCJIT ->>- MCJIT : return

从demo日志看实际重定位流程