[PseudoProbe] Extend to skip instrumenting probe into the dests of invoke (#79919)
As before we only skip instrumenting probe of `unwind`(`KnownColdBlock`) block, this PR extends to skip the both EH flow from `invoke`, i.e. also skip the `normal` dest. For more contexts: when doing call-to-invoke conversion, the block is split by the `invoke` and two extra blocks(`normal` and `unwind`) are added. With this PR, the instrumentation is the same as the one before the call-to-invoke conversion. One significant benefit is this can help mitigate the "unstable IR" issue(https://discourse.llvm.org/t/ipo-for-linkonce-odr-functions/69404), the two versions now are on the same probe instrumentation, expected to be the same checksum. To achieve the same checksum, some tweaks is needed: - Now it also skips incrementing the probe ID for the skipped probe. - The checksum is also computed based on the CFG that skips the EH edges. We observed this fixes ~5% mismatched samples.
This commit is contained in:
parent
e93b5f5a47
commit
b8cc3ba409
@ -79,7 +79,6 @@ static void computeEHOnlyBlocks(FunctionT &F, DenseSet<BlockT *> &EHBlocks) {
|
||||
}
|
||||
}
|
||||
|
||||
EHBlocks.clear();
|
||||
for (auto Entry : Statuses) {
|
||||
if (Entry.second == EH)
|
||||
EHBlocks.insert(Entry.first);
|
||||
|
@ -81,8 +81,17 @@ private:
|
||||
uint64_t getFunctionHash() const { return FunctionHash; }
|
||||
uint32_t getBlockId(const BasicBlock *BB) const;
|
||||
uint32_t getCallsiteId(const Instruction *Call) const;
|
||||
void computeCFGHash();
|
||||
void computeProbeIdForBlocks();
|
||||
void findUnreachableBlocks(DenseSet<BasicBlock *> &BlocksToIgnore);
|
||||
void findInvokeNormalDests(DenseSet<BasicBlock *> &InvokeNormalDests);
|
||||
void computeBlocksToIgnore(DenseSet<BasicBlock *> &BlocksToIgnore,
|
||||
DenseSet<BasicBlock *> &BlocksAndCallsToIgnore);
|
||||
void computeProbeIdForCallsites(
|
||||
const DenseSet<BasicBlock *> &BlocksAndCallsToIgnore);
|
||||
const Instruction *
|
||||
getOriginalTerminator(const BasicBlock *Head,
|
||||
const DenseSet<BasicBlock *> &BlocksToIgnore);
|
||||
void computeCFGHash(const DenseSet<BasicBlock *> &BlocksToIgnore);
|
||||
void computeProbeIdForBlocks(const DenseSet<BasicBlock *> &BlocksToIgnore);
|
||||
void computeProbeIdForCallsites();
|
||||
|
||||
Function *F;
|
||||
|
@ -173,21 +173,114 @@ SampleProfileProber::SampleProfileProber(Function &Func,
|
||||
BlockProbeIds.clear();
|
||||
CallProbeIds.clear();
|
||||
LastProbeId = (uint32_t)PseudoProbeReservedId::Last;
|
||||
computeProbeIdForBlocks();
|
||||
computeProbeIdForCallsites();
|
||||
computeCFGHash();
|
||||
|
||||
DenseSet<BasicBlock *> BlocksToIgnore;
|
||||
DenseSet<BasicBlock *> BlocksAndCallsToIgnore;
|
||||
computeBlocksToIgnore(BlocksToIgnore, BlocksAndCallsToIgnore);
|
||||
|
||||
computeProbeIdForBlocks(BlocksToIgnore);
|
||||
computeProbeIdForCallsites(BlocksAndCallsToIgnore);
|
||||
computeCFGHash(BlocksToIgnore);
|
||||
}
|
||||
|
||||
// Two purposes to compute the blocks to ignore:
|
||||
// 1. Reduce the IR size.
|
||||
// 2. Make the instrumentation(checksum) stable. e.g. the frondend may
|
||||
// generate unstable IR while optimizing nounwind attribute, some versions are
|
||||
// optimized with the call-to-invoke conversion, while other versions do not.
|
||||
// This discrepancy in probe ID could cause profile mismatching issues.
|
||||
// Note that those ignored blocks are either cold blocks or new split blocks
|
||||
// whose original blocks are instrumented, so it shouldn't degrade the profile
|
||||
// quality.
|
||||
void SampleProfileProber::computeBlocksToIgnore(
|
||||
DenseSet<BasicBlock *> &BlocksToIgnore,
|
||||
DenseSet<BasicBlock *> &BlocksAndCallsToIgnore) {
|
||||
// Ignore the cold EH and unreachable blocks and calls.
|
||||
computeEHOnlyBlocks(*F, BlocksAndCallsToIgnore);
|
||||
findUnreachableBlocks(BlocksAndCallsToIgnore);
|
||||
|
||||
BlocksToIgnore.insert(BlocksAndCallsToIgnore.begin(),
|
||||
BlocksAndCallsToIgnore.end());
|
||||
|
||||
// Handle the call-to-invoke conversion case: make sure that the probe id and
|
||||
// callsite id are consistent before and after the block split. For block
|
||||
// probe, we only keep the head block probe id and ignore the block ids of the
|
||||
// normal dests. For callsite probe, it's different to block probe, there is
|
||||
// no additional callsite in the normal dests, so we don't ignore the
|
||||
// callsites.
|
||||
findInvokeNormalDests(BlocksToIgnore);
|
||||
}
|
||||
|
||||
// Unreachable blocks and calls are always cold, ignore them.
|
||||
void SampleProfileProber::findUnreachableBlocks(
|
||||
DenseSet<BasicBlock *> &BlocksToIgnore) {
|
||||
for (auto &BB : *F) {
|
||||
if (&BB != &F->getEntryBlock() && pred_size(&BB) == 0)
|
||||
BlocksToIgnore.insert(&BB);
|
||||
}
|
||||
}
|
||||
|
||||
// In call-to-invoke conversion, basic block can be split into multiple blocks,
|
||||
// only instrument probe in the head block, ignore the normal dests.
|
||||
void SampleProfileProber::findInvokeNormalDests(
|
||||
DenseSet<BasicBlock *> &InvokeNormalDests) {
|
||||
for (auto &BB : *F) {
|
||||
auto *TI = BB.getTerminator();
|
||||
if (auto *II = dyn_cast<InvokeInst>(TI)) {
|
||||
auto *ND = II->getNormalDest();
|
||||
InvokeNormalDests.insert(ND);
|
||||
|
||||
// The normal dest and the try/catch block are connected by an
|
||||
// unconditional branch.
|
||||
while (pred_size(ND) == 1) {
|
||||
auto *Pred = *pred_begin(ND);
|
||||
if (succ_size(Pred) == 1) {
|
||||
InvokeNormalDests.insert(Pred);
|
||||
ND = Pred;
|
||||
} else
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The call-to-invoke conversion splits the original block into a list of block,
|
||||
// we need to compute the hash using the original block's successors to keep the
|
||||
// CFG Hash consistent. For a given head block, we keep searching the
|
||||
// succesor(normal dest or unconditional branch dest) to find the tail block,
|
||||
// the tail block's successors are the original block's successors.
|
||||
const Instruction *SampleProfileProber::getOriginalTerminator(
|
||||
const BasicBlock *Head, const DenseSet<BasicBlock *> &BlocksToIgnore) {
|
||||
auto *TI = Head->getTerminator();
|
||||
if (auto *II = dyn_cast<InvokeInst>(TI)) {
|
||||
return getOriginalTerminator(II->getNormalDest(), BlocksToIgnore);
|
||||
} else if (succ_size(Head) == 1 &&
|
||||
BlocksToIgnore.contains(*succ_begin(Head))) {
|
||||
// Go to the unconditional branch dest.
|
||||
return getOriginalTerminator(*succ_begin(Head), BlocksToIgnore);
|
||||
}
|
||||
return TI;
|
||||
}
|
||||
|
||||
// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
|
||||
// value of each BB in the CFG. The higher 32 bits record the number of edges
|
||||
// preceded by the number of indirect calls.
|
||||
// This is derived from FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash().
|
||||
void SampleProfileProber::computeCFGHash() {
|
||||
void SampleProfileProber::computeCFGHash(
|
||||
const DenseSet<BasicBlock *> &BlocksToIgnore) {
|
||||
std::vector<uint8_t> Indexes;
|
||||
JamCRC JC;
|
||||
for (auto &BB : *F) {
|
||||
for (BasicBlock *Succ : successors(&BB)) {
|
||||
if (BlocksToIgnore.contains(&BB))
|
||||
continue;
|
||||
|
||||
auto *TI = getOriginalTerminator(&BB, BlocksToIgnore);
|
||||
for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
|
||||
auto *Succ = TI->getSuccessor(I);
|
||||
auto Index = getBlockId(Succ);
|
||||
// Ingore ignored-block(zero ID) to avoid unstable checksum.
|
||||
if (Index == 0)
|
||||
continue;
|
||||
for (int J = 0; J < 4; J++)
|
||||
Indexes.push_back((uint8_t)(Index >> (J * 8)));
|
||||
}
|
||||
@ -207,23 +300,23 @@ void SampleProfileProber::computeCFGHash() {
|
||||
<< ", Hash = " << FunctionHash << "\n");
|
||||
}
|
||||
|
||||
void SampleProfileProber::computeProbeIdForBlocks() {
|
||||
DenseSet<BasicBlock *> KnownColdBlocks;
|
||||
computeEHOnlyBlocks(*F, KnownColdBlocks);
|
||||
// Insert pseudo probe to non-cold blocks only. This will reduce IR size as
|
||||
// well as the binary size while retaining the profile quality.
|
||||
void SampleProfileProber::computeProbeIdForBlocks(
|
||||
const DenseSet<BasicBlock *> &BlocksToIgnore) {
|
||||
for (auto &BB : *F) {
|
||||
++LastProbeId;
|
||||
if (!KnownColdBlocks.contains(&BB))
|
||||
BlockProbeIds[&BB] = LastProbeId;
|
||||
if (BlocksToIgnore.contains(&BB))
|
||||
continue;
|
||||
BlockProbeIds[&BB] = ++LastProbeId;
|
||||
}
|
||||
}
|
||||
|
||||
void SampleProfileProber::computeProbeIdForCallsites() {
|
||||
void SampleProfileProber::computeProbeIdForCallsites(
|
||||
const DenseSet<BasicBlock *> &BlocksAndCallsToIgnore) {
|
||||
LLVMContext &Ctx = F->getContext();
|
||||
Module *M = F->getParent();
|
||||
|
||||
for (auto &BB : *F) {
|
||||
if (BlocksAndCallsToIgnore.contains(&BB))
|
||||
continue;
|
||||
for (auto &I : BB) {
|
||||
if (!isa<CallBase>(I))
|
||||
continue;
|
||||
|
@ -12,8 +12,8 @@
|
||||
; RUN: llvm-lto -thinlto-action=import %t3.bc -thinlto-index=%t3.index.bc -o /dev/null 2>&1 | FileCheck %s --check-prefix=WARN
|
||||
|
||||
|
||||
; CHECK-NOT: {i64 6699318081062747564, i64 4294967295, !"foo"
|
||||
; CHECK: !{i64 -2624081020897602054, i64 281479271677951, !"main"
|
||||
; CHECK-NOT: {i64 6699318081062747564, i64 [[#]], !"foo"
|
||||
; CHECK: !{i64 -2624081020897602054, i64 [[#]], !"main"
|
||||
|
||||
; WARN: warning: Pseudo-probe ignored: source module '{{.*}}' is compiled with -fpseudo-probe-for-profiling while destination module '{{.*}}' is not
|
||||
|
||||
|
@ -18,7 +18,7 @@ entry:
|
||||
to label %ret unwind label %lpad
|
||||
|
||||
ret:
|
||||
; CHECK: call void @llvm.pseudoprobe
|
||||
; CHECK-NOT: call void @llvm.pseudoprobe
|
||||
ret void
|
||||
|
||||
lpad: ; preds = %entry
|
||||
|
155
llvm/test/Transforms/SampleProfile/pseudo-probe-invoke.ll
Normal file
155
llvm/test/Transforms/SampleProfile/pseudo-probe-invoke.ll
Normal file
@ -0,0 +1,155 @@
|
||||
; REQUIRES: x86_64-linux
|
||||
; RUN: opt < %s -passes=pseudo-probe -S -o - | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
$__clang_call_terminate = comdat any
|
||||
|
||||
@x = dso_local global i32 0, align 4, !dbg !0
|
||||
|
||||
; Function Attrs: mustprogress noinline nounwind uwtable
|
||||
define dso_local void @_Z3barv() #0 personality ptr @__gxx_personality_v0 !dbg !14 {
|
||||
entry:
|
||||
; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 1
|
||||
%0 = load volatile i32, ptr @x, align 4, !dbg !17, !tbaa !19
|
||||
%tobool = icmp ne i32 %0, 0, !dbg !17
|
||||
br i1 %tobool, label %if.then, label %if.else, !dbg !23
|
||||
|
||||
if.then: ; preds = %entry
|
||||
; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 2
|
||||
invoke void @_Z3foov()
|
||||
to label %invoke.cont unwind label %terminate.lpad, !dbg !24
|
||||
|
||||
invoke.cont: ; preds = %if.then
|
||||
; CHECK-NOT: call void @llvm.pseudoprobe(i64 -1069303473483922844,
|
||||
invoke void @_Z3bazv()
|
||||
to label %invoke.cont1 unwind label %terminate.lpad, !dbg !26
|
||||
|
||||
invoke.cont1: ; preds = %invoke.cont
|
||||
; CHECK-NOT: call void @llvm.pseudoprobe(i64 -1069303473483922844,
|
||||
br label %if.end, !dbg !27
|
||||
|
||||
if.else: ; preds = %entry
|
||||
; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 3
|
||||
invoke void @_Z3foov()
|
||||
to label %invoke.cont2 unwind label %terminate.lpad, !dbg !28
|
||||
|
||||
invoke.cont2: ; preds = %if.else
|
||||
; CHECK-NOT: call void @llvm.pseudoprobe(i64 -1069303473483922844,
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %invoke.cont2, %invoke.cont1
|
||||
; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 4
|
||||
invoke void @_Z3foov()
|
||||
to label %invoke.cont3 unwind label %terminate.lpad, !dbg !29
|
||||
|
||||
invoke.cont3: ; preds = %if.end
|
||||
; CHECK-NOT: call void @llvm.pseudoprobe(i64 -1069303473483922844,
|
||||
%1 = load volatile i32, ptr @x, align 4, !dbg !30, !tbaa !19
|
||||
%tobool4 = icmp ne i32 %1, 0, !dbg !30
|
||||
br i1 %tobool4, label %if.then5, label %if.end6, !dbg !32
|
||||
|
||||
if.then5: ; preds = %invoke.cont3
|
||||
; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 5
|
||||
%2 = load volatile i32, ptr @x, align 4, !dbg !33, !tbaa !19
|
||||
%inc = add nsw i32 %2, 1, !dbg !33
|
||||
store volatile i32 %inc, ptr @x, align 4, !dbg !33, !tbaa !19
|
||||
br label %if.end6, !dbg !35
|
||||
|
||||
if.end6: ; preds = %if.then5, %invoke.cont3
|
||||
; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 6
|
||||
ret void, !dbg !36
|
||||
|
||||
terminate.lpad: ; preds = %if.end, %if.else, %invoke.cont, %if.then
|
||||
; CHECK-NOT: call void @llvm.pseudoprobe(i64 -1069303473483922844,
|
||||
%3 = landingpad { ptr, i32 }
|
||||
catch ptr null, !dbg !24
|
||||
%4 = extractvalue { ptr, i32 } %3, 0, !dbg !24
|
||||
call void @__clang_call_terminate(ptr %4) #3, !dbg !24
|
||||
unreachable, !dbg !24
|
||||
}
|
||||
|
||||
; Function Attrs: mustprogress noinline nounwind uwtable
|
||||
define dso_local void @_Z3foov() #0 !dbg !37 {
|
||||
entry:
|
||||
ret void, !dbg !38
|
||||
}
|
||||
|
||||
declare i32 @__gxx_personality_v0(...)
|
||||
|
||||
; Function Attrs: noinline noreturn nounwind uwtable
|
||||
define linkonce_odr hidden void @__clang_call_terminate(ptr noundef %0) #1 comdat {
|
||||
%2 = call ptr @__cxa_begin_catch(ptr %0) #4
|
||||
call void @_ZSt9terminatev() #3
|
||||
unreachable
|
||||
}
|
||||
|
||||
declare ptr @__cxa_begin_catch(ptr)
|
||||
|
||||
declare void @_ZSt9terminatev()
|
||||
|
||||
; Function Attrs: mustprogress noinline nounwind uwtable
|
||||
define dso_local void @_Z3bazv() #0 !dbg !39 {
|
||||
entry:
|
||||
ret void, !dbg !40
|
||||
}
|
||||
|
||||
; CHECK: ![[#]] = !{i64 -3270123626113159616, i64 4294967295, !"_Z3bazv"}
|
||||
|
||||
attributes #0 = { mustprogress noinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
|
||||
attributes #1 = { noinline noreturn nounwind uwtable "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
|
||||
attributes #2 = { mustprogress noinline norecurse nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
|
||||
attributes #3 = { noreturn nounwind }
|
||||
attributes #4 = { nounwind }
|
||||
|
||||
!llvm.dbg.cu = !{!2}
|
||||
!llvm.module.flags = !{!7, !8, !9, !10, !11, !12}
|
||||
!llvm.ident = !{!13}
|
||||
|
||||
!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
|
||||
!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true)
|
||||
!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 19.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None)
|
||||
!3 = !DIFile(filename: "test.cpp", directory: "/home", checksumkind: CSK_MD5, checksum: "a4c7b0392f3fd9c8ebb85065159dbb02")
|
||||
!4 = !{!0}
|
||||
!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6)
|
||||
!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
|
||||
!7 = !{i32 7, !"Dwarf Version", i32 5}
|
||||
!8 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!9 = !{i32 1, !"wchar_size", i32 4}
|
||||
!10 = !{i32 8, !"PIC Level", i32 2}
|
||||
!11 = !{i32 7, !"PIE Level", i32 2}
|
||||
!12 = !{i32 7, !"uwtable", i32 2}
|
||||
!13 = !{!"clang version 19.0.0"}
|
||||
!14 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", scope: !3, file: !3, line: 4, type: !15, scopeLine: 4, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
|
||||
!15 = !DISubroutineType(types: !16)
|
||||
!16 = !{null}
|
||||
!17 = !DILocation(line: 5, column: 6, scope: !18)
|
||||
!18 = distinct !DILexicalBlock(scope: !14, file: !3, line: 5, column: 6)
|
||||
!19 = !{!20, !20, i64 0}
|
||||
!20 = !{!"int", !21, i64 0}
|
||||
!21 = !{!"omnipotent char", !22, i64 0}
|
||||
!22 = !{!"Simple C++ TBAA"}
|
||||
!23 = !DILocation(line: 5, column: 6, scope: !14)
|
||||
!24 = !DILocation(line: 6, column: 5, scope: !25)
|
||||
!25 = distinct !DILexicalBlock(scope: !18, file: !3, line: 5, column: 9)
|
||||
!26 = !DILocation(line: 7, column: 5, scope: !25)
|
||||
!27 = !DILocation(line: 8, column: 3, scope: !25)
|
||||
!28 = !DILocation(line: 9, column: 5, scope: !18)
|
||||
!29 = !DILocation(line: 11, column: 3, scope: !14)
|
||||
!30 = !DILocation(line: 12, column: 6, scope: !31)
|
||||
!31 = distinct !DILexicalBlock(scope: !14, file: !3, line: 12, column: 6)
|
||||
!32 = !DILocation(line: 12, column: 6, scope: !14)
|
||||
!33 = !DILocation(line: 13, column: 5, scope: !34)
|
||||
!34 = distinct !DILexicalBlock(scope: !31, file: !3, line: 12, column: 9)
|
||||
!35 = !DILocation(line: 14, column: 5, scope: !34)
|
||||
!36 = !DILocation(line: 17, column: 1, scope: !14)
|
||||
!37 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !3, file: !3, line: 19, type: !15, scopeLine: 19, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
|
||||
!38 = !DILocation(line: 19, column: 13, scope: !37)
|
||||
!39 = distinct !DISubprogram(name: "baz", linkageName: "_Z3bazv", scope: !3, file: !3, line: 18, type: !15, scopeLine: 18, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
|
||||
!40 = !DILocation(line: 18, column: 13, scope: !39)
|
||||
!41 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 22, type: !42, scopeLine: 22, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
|
||||
!42 = !DISubroutineType(types: !43)
|
||||
!43 = !{!6}
|
||||
!44 = !DILocation(line: 23, column: 3, scope: !41)
|
||||
!45 = !DILocation(line: 24, column: 1, scope: !41)
|
Loading…
x
Reference in New Issue
Block a user