llvm7: properly patch clang for musl/ppc64/remove obsolete patches

Turns out the ppc64-musl ELFv2 patch was not being applied at all,
and there were two patches that no longer apply and are no longer
relevant (the second one reports it's already applied, the first
one patches files that don't exist).

Remove those patches, and make sure the ppc64 elfv2 patch is
applied, because otherwise llvm will emit elfv1 binaries for
ppc64-musl and we don't want that. In regards to that, there is
also a patch missing in the clang frontend, as otherwise clang
will forcibly pass `-mabi=elfv1` internally out of box and it
will not work either way, so make sure the correct ABI is
passed by clang by default on musl triplets.

Also revbump in order to make sure cross-compiling works as
expected.

[ci skip]
This commit is contained in:
q66 2019-01-18 03:09:52 +01:00 committed by Helmut Pozimski
parent 48ea32b71d
commit 03f758dbf7
6 changed files with 31 additions and 215 deletions

View File

@ -0,0 +1,30 @@
--- clang/lib/Basic/Targets/PPC.h
+++ clang/lib/Basic/Targets/PPC.h
@@ -358,7 +358,10 @@ public:
ABI = "elfv2";
} else {
resetDataLayout("E-m:e-i64:64-n32:64");
- ABI = "elfv1";
+ if (Triple.getEnvironment() == llvm::Triple::Musl)
+ ABI = "elfv2";
+ else
+ ABI = "elfv1";
}
switch (getTriple().getOS()) {
diff --git a/tools/clang/lib/Driver/ToolChains/Clang.cpp b/tools/clang/lib/Driver/ToolChains/Clang.cpp
index 8e9c4c6a..40817ec3 100644
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -1618,7 +1618,10 @@ void Clang::AddPPCTargetArgs(const ArgList &Args,
break;
}
- ABIName = "elfv1";
+ if (getToolChain().getTriple().getEnvironment() == llvm::Triple::Musl)
+ ABIName = "elfv2";
+ else
+ ABIName = "elfv1";
break;
}
case llvm::Triple::ppc64le:

View File

@ -1,74 +0,0 @@
From bc72a21666a9efc78e71b0296313f6e1449649fa Mon Sep 17 00:00:00 2001
From: Konstantin Zhuravlyov <kzhuravl_dev@outlook.com>
Date: Wed, 22 Mar 2017 21:48:18 +0000
Subject: [PATCH] [AMDGPU] Fix bug 31610
Differential Revision: https://reviews.llvm.org/D31258
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298551 91177308-0d34-0410-b5e6-96231b3b80d8
---
lib/Target/AMDGPU/AMDGPURuntimeMetadata.h | 8 ++++----
lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp | 5 ++---
2 files changed, 6 insertions(+), 7 deletions(-)
diff --git a/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h b/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h
index ed147ff4c43..36e7a0c42a9 100644
--- a/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h
+++ b/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h
@@ -40,8 +40,8 @@ namespace AMDGPU {
namespace RuntimeMD {
// Version and revision of runtime metadata
- const unsigned char MDVersion = 2;
- const unsigned char MDRevision = 0;
+ const uint32_t MDVersion = 2;
+ const uint32_t MDRevision = 0;
// Name of keys for runtime metadata.
namespace KeyName {
@@ -215,7 +215,7 @@ namespace RuntimeMD {
struct Metadata {
std::string Name;
std::string Language;
- std::vector<uint8_t> LanguageVersion;
+ std::vector<uint32_t> LanguageVersion;
std::vector<uint32_t> ReqdWorkGroupSize;
std::vector<uint32_t> WorkGroupSizeHint;
std::string VecTypeHint;
@@ -265,7 +265,7 @@ namespace RuntimeMD {
namespace Program {
// In-memory representation of program information.
struct Metadata {
- std::vector<uint8_t> MDVersionSeq;
+ std::vector<uint32_t> MDVersionSeq;
std::vector<std::string> PrintfInfo;
std::vector<Kernel::Metadata> Kernels;
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp
index 0e87d750f5d..7c96898297c 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp
@@ -49,7 +49,6 @@ static cl::opt<bool>
CheckRuntimeMDParser("amdgpu-check-rtmd-parser", cl::Hidden,
cl::desc("Check AMDGPU runtime metadata YAML parser"));
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint8_t)
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t)
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata)
@@ -297,9 +296,9 @@ static Kernel::Metadata getRuntimeMDForKernel(const Function &F) {
auto Node = MD->getOperand(0);
if (Node->getNumOperands() > 1) {
Kernel.Language = "OpenCL C";
- uint16_t Major = mdconst::extract<ConstantInt>(Node->getOperand(0))
+ uint32_t Major = mdconst::extract<ConstantInt>(Node->getOperand(0))
->getZExtValue();
- uint16_t Minor = mdconst::extract<ConstantInt>(Node->getOperand(1))
+ uint32_t Minor = mdconst::extract<ConstantInt>(Node->getOperand(1))
->getZExtValue();
Kernel.LanguageVersion.push_back(Major);
Kernel.LanguageVersion.push_back(Minor);
--
2.12.2

View File

@ -1,140 +0,0 @@
From 25e2616626caafb896517e18cd8aa724fba2b200 Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stellard@amd.com>
Date: Tue, 29 Nov 2016 03:41:28 +0000
Subject: [PATCH] Merging r280589:
------------------------------------------------------------------------
r280589 | nhaehnle | 2016-09-03 05:26:32 -0700 (Sat, 03 Sep 2016) | 19 lines
AMDGPU: Fix an interaction between WQM and polygon stippling
Summary:
This fixes a rare bug in polygon stippling with non-monolithic pixel shaders.
The underlying problem is as follows: the prolog part contains the polygon
stippling sequence, i.e. a kill. The main part then enables WQM based on the
_reduced_ exec mask, effectively undoing most of the polygon stippling.
Since we cannot know whether polygon stippling will be used, the main part
of a non-monolithic shader must always return to exact mode to fix this
problem.
Reviewers: arsenm, tstellarAMD, mareko
Subscribers: arsenm, llvm-commits, kzhuravl
Differential Revision: https://reviews.llvm.org/D23131
------------------------------------------------------------------------
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_39@288105 91177308-0d34-0410-b5e6-96231b3b80d8
---
lib/Target/AMDGPU/SIInstructions.td | 1 +
lib/Target/AMDGPU/SIWholeQuadMode.cpp | 7 -----
test/CodeGen/AMDGPU/wqm.ll | 49 ++++++++++++++++++++++++++++++++---
3 files changed, 46 insertions(+), 11 deletions(-)
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
index 18b7d5d..dde5f2f 100644
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -2029,6 +2029,7 @@ def SI_RETURN : PseudoInstSI <
let hasSideEffects = 1;
let SALU = 1;
let hasNoSchedulingInfo = 1;
+ let DisableWQM = 1;
}
let Uses = [EXEC], Defs = [EXEC, VCC, M0],
diff --git a/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index b200c15..1534d58 100644
--- a/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -219,13 +219,6 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
markInstruction(MI, Flags, Worklist);
GlobalFlags |= Flags;
}
-
- if (WQMOutputs && MBB.succ_empty()) {
- // This is a prolog shader. Make sure we go back to exact mode at the end.
- Blocks[&MBB].OutNeeds = StateExact;
- Worklist.push_back(&MBB);
- GlobalFlags |= StateExact;
- }
}
return GlobalFlags;
diff --git a/test/CodeGen/AMDGPU/wqm.ll b/test/CodeGen/AMDGPU/wqm.ll
index 809a7ba..41e4264 100644
--- a/test/CodeGen/AMDGPU/wqm.ll
+++ b/test/CodeGen/AMDGPU/wqm.ll
@@ -17,17 +17,18 @@ main_body:
;CHECK-LABEL: {{^}}test2:
;CHECK-NEXT: ; %main_body
;CHECK-NEXT: s_wqm_b64 exec, exec
-;CHECK: image_sample
;CHECK-NOT: exec
-;CHECK: _load_dword v0,
-define amdgpu_ps float @test2(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <4 x i32> %c) {
+define amdgpu_ps void @test2(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <4 x i32> %c) {
main_body:
%c.1 = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %c, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%c.2 = bitcast <4 x float> %c.1 to <4 x i32>
%c.3 = extractelement <4 x i32> %c.2, i32 0
%gep = getelementptr float, float addrspace(1)* %ptr, i32 %c.3
%data = load float, float addrspace(1)* %gep
- ret float %data
+
+ call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %data, float undef, float undef, float undef)
+
+ ret void
}
; ... but disabled for stores (and, in this simple case, not re-enabled).
@@ -414,6 +415,46 @@ entry:
ret void
}
+; Must return to exact at the end of a non-void returning shader,
+; otherwise the EXEC mask exported by the epilog will be wrong. This is true
+; even if the shader has no kills, because a kill could have happened in a
+; previous shader fragment.
+;
+; CHECK-LABEL: {{^}}test_nonvoid_return:
+; CHECK: s_mov_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], exec
+; CHECK: s_wqm_b64 exec, exec
+;
+; CHECK: s_and_b64 exec, exec, [[LIVE]]
+; CHECK-NOT: exec
+define amdgpu_ps <4 x float> @test_nonvoid_return() nounwind {
+ %tex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+ %tex.i = bitcast <4 x float> %tex to <4 x i32>
+ %dtex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %tex.i, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+ ret <4 x float> %dtex
+}
+
+; CHECK-LABEL: {{^}}test_nonvoid_return_unreachable:
+; CHECK: s_mov_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], exec
+; CHECK: s_wqm_b64 exec, exec
+;
+; CHECK: s_and_b64 exec, exec, [[LIVE]]
+; CHECK-NOT: exec
+define amdgpu_ps <4 x float> @test_nonvoid_return_unreachable(i32 inreg %c) nounwind {
+entry:
+ %tex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+ %tex.i = bitcast <4 x float> %tex to <4 x i32>
+ %dtex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %tex.i, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+
+ %cc = icmp sgt i32 %c, 0
+ br i1 %cc, label %if, label %else
+
+if:
+ store volatile <4 x float> %dtex, <4 x float>* undef
+ unreachable
+
+else:
+ ret <4 x float> %dtex
+}
declare void @llvm.amdgcn.image.store.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #1

View File

@ -1,7 +1,7 @@
# Template file for 'llvm7'
pkgname=llvm7
version=7.0.1
revision=3
revision=4
wrksrc="llvm-${version}.src"
lib32disabled=yes
build_style=cmake