1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
|
From 8198f30f7e756e3368c3eda62ecc3d0cc62d1570 Mon Sep 17 00:00:00 2001
From: Jez Ng <jezng@fb.com>
Date: Tue, 14 Feb 2023 14:34:19 -0500
Subject: [PATCH] [lld-macho] Account for alignment in thunk insertion
algorithm
We previously neglected this, leading us to underestimate the maximum
possible branch address offset.
Fixing this should allow us to reduce `slop` to more reasonable
levels. I've lowered it to 256 for now, though I suspect we could go
lower.
Fixes https://github.com/llvm/llvm-project/issues/59259.
Reviewed By: serge-sans-paille
Differential Revision: https://reviews.llvm.org/D144029
---
lld/MachO/ConcatOutputSection.cpp | 10 +++--
lld/test/MachO/arm64-thunk-for-alignment.s | 44 ++++++++++++++++++++++
2 files changed, 51 insertions(+), 3 deletions(-)
create mode 100644 lld/test/MachO/arm64-thunk-for-alignment.s
diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp
index cbd3a2492d25..b522bd9b289e 100644
--- a/lld/MachO/ConcatOutputSection.cpp
+++ b/lld/MachO/ConcatOutputSection.cpp
@@ -246,10 +246,14 @@ void TextOutputSection::finalize() {
// contains several branch instructions in succession, then the distance
// from the current position to the position where the thunks are inserted
// grows. So leave room for a bunch of thunks.
- unsigned slop = 1024 * thunkSize;
- while (finalIdx < endIdx && addr + size + inputs[finalIdx]->getSize() <
- isecVA + forwardBranchRange - slop)
+ unsigned slop = 256 * thunkSize;
+ while (finalIdx < endIdx) {
+ size_t expectedNewSize = alignTo(addr + size, inputs[finalIdx]->align) +
+ inputs[finalIdx]->getSize();
+ if (expectedNewSize >= isecVA + forwardBranchRange - slop)
+ break;
finalizeOne(inputs[finalIdx++]);
+ }
if (!isec->hasCallSites)
continue;
diff --git a/lld/test/MachO/arm64-thunk-for-alignment.s b/lld/test/MachO/arm64-thunk-for-alignment.s
new file mode 100644
index 000000000000..f497b81f705b
--- /dev/null
+++ b/lld/test/MachO/arm64-thunk-for-alignment.s
@@ -0,0 +1,44 @@
+# REQUIRES: aarch64
+# RUN: rm -rf %t; split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/foo.s -o %t/foo.o
+# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/bar.s -o %t/bar.o
+# RUN: %lld -dylib -arch arm64 -lSystem -o %t/out %t/foo.o %t/bar.o
+
+# RUN: llvm-objdump --macho --syms %t/out | FileCheck %s
+# CHECK: _bar.thunk.0
+
+## Regression test for PR59259. Previously, we neglected to check section
+## alignments when deciding when to create thunks.
+
+## If we ignore alignment, the total size of _spacer1 + _spacer2 below is just
+## under the limit at which we attempt to insert thunks between the spacers.
+## However, with alignment accounted for, their total size ends up being
+## 0x8000000, which is just above the max forward branch range, making thunk
+## insertion necessary. Thus, not accounting for alignment led to an error.
+
+#--- foo.s
+
+_foo:
+ b _bar
+
+## Size of a `b` instruction.
+.equ callSize, 4
+## Refer to `slop` in TextOutputSection::finalize().
+.equ slopSize, 12 * 256
+
+_spacer1:
+ .space 0x4000000 - slopSize - 2 * callSize - 1
+
+.subsections_via_symbols
+
+#--- bar.s
+.globl _bar
+
+.p2align 14
+_spacer2:
+ .space 0x4000000
+
+_bar:
+ ret
+
+.subsections_via_symbols
--
2.39.0.1.g6739ec1790
|