From feccc72c402023d29f5cf681e368a2e711ab9cb7 Mon Sep 17 00:00:00 2001 From: Daniel Kolesa Date: Mon, 17 May 2021 17:55:01 +0200 Subject: [PATCH] ispc: rebuild for llvm12 --- srcpkgs/ispc/patches/llvm12-001.patch | 57 + srcpkgs/ispc/patches/llvm12-002.patch | 923 +++++++++++ srcpkgs/ispc/patches/llvm12-003.patch | 1504 ++++++++++++++++++ srcpkgs/ispc/patches/llvm12-004.patch | 34 + srcpkgs/ispc/patches/llvm12-005.patch | 372 +++++ srcpkgs/ispc/patches/llvm12-006.patch | 126 ++ srcpkgs/ispc/patches/llvm12-007.patch | 160 ++ srcpkgs/ispc/patches/llvm12-008.patch | 149 ++ srcpkgs/ispc/patches/llvm12-009.patch | 2109 +++++++++++++++++++++++++ srcpkgs/ispc/template | 2 +- 10 files changed, 5435 insertions(+), 1 deletion(-) create mode 100644 srcpkgs/ispc/patches/llvm12-001.patch create mode 100644 srcpkgs/ispc/patches/llvm12-002.patch create mode 100644 srcpkgs/ispc/patches/llvm12-003.patch create mode 100644 srcpkgs/ispc/patches/llvm12-004.patch create mode 100644 srcpkgs/ispc/patches/llvm12-005.patch create mode 100644 srcpkgs/ispc/patches/llvm12-006.patch create mode 100644 srcpkgs/ispc/patches/llvm12-007.patch create mode 100644 srcpkgs/ispc/patches/llvm12-008.patch create mode 100644 srcpkgs/ispc/patches/llvm12-009.patch diff --git a/srcpkgs/ispc/patches/llvm12-001.patch b/srcpkgs/ispc/patches/llvm12-001.patch new file mode 100644 index 00000000000..5f2d3f5a409 --- /dev/null +++ b/srcpkgs/ispc/patches/llvm12-001.patch @@ -0,0 +1,57 @@ +From 1c0f89dccb774f216c7f6e76a99ee907a1e641cb Mon Sep 17 00:00:00 2001 +From: Dmitry Babokin +Date: Tue, 22 Dec 2020 17:03:59 -0800 +Subject: [PATCH] Fix to work with LLVM trunk after llvm/llvm-project@41c3b2713 + +--- + src/ctx.cpp | 18 ++++++++++++------ + 1 file changed, 12 insertions(+), 6 deletions(-) + +diff --git a/src/ctx.cpp b/src/ctx.cpp +index 6fbb8b9cf..977e9d222 100644 +--- a/src/ctx.cpp ++++ b/src/ctx.cpp +@@ -1473,11 +1473,15 @@ void FunctionEmitContext::AddDebugPos(llvm::Value *value, const SourcePos *pos, + llvm::Instruction *inst = llvm::dyn_cast(value); + if (inst != NULL && m->diBuilder) { + SourcePos p = pos ? *pos : currentPos; +- if (p.first_line != 0) ++ if (p.first_line != 0) { + // If first_line == 0, then we're in the middle of setting up + // the standard library or the like; don't add debug positions + // for those functions +- inst->setDebugLoc(llvm::DebugLoc::get(p.first_line, p.first_column, scope ? scope : GetDIScope())); ++ scope = scope ? scope : GetDIScope(); ++ llvm::DebugLoc diLoc = ++ llvm::DILocation::get(scope->getContext(), p.first_line, p.first_column, scope, nullptr, false); ++ inst->setDebugLoc(diLoc); ++ } + } + } + +@@ -1518,9 +1522,10 @@ void FunctionEmitContext::EmitVariableDebugInfo(Symbol *sym) { + llvm::DILocalVariable *var = m->diBuilder->createAutoVariable( + scope, sym->name, sym->pos.GetDIFile(), sym->pos.first_line, diType, true /* preserve through opts */); + ++ llvm::DebugLoc diLoc = ++ llvm::DILocation::get(scope->getContext(), sym->pos.first_line, sym->pos.first_column, scope, nullptr, false); + llvm::Instruction *declareInst = +- m->diBuilder->insertDeclare(sym->storagePtr, var, m->diBuilder->createExpression(), +- llvm::DebugLoc::get(sym->pos.first_line, sym->pos.first_column, scope), bblock); ++ m->diBuilder->insertDeclare(sym->storagePtr, var, m->diBuilder->createExpression(), diLoc, bblock); + AddDebugPos(declareInst, &sym->pos, scope); + } + +@@ -1535,9 +1540,10 @@ void FunctionEmitContext::EmitFunctionParameterDebugInfo(Symbol *sym, int argNum + m->diBuilder->createParameterVariable(scope, sym->name, argNum + 1, sym->pos.GetDIFile(), sym->pos.first_line, + diType, true /* preserve through opts */, flags); + ++ llvm::DebugLoc diLoc = ++ llvm::DILocation::get(scope->getContext(), sym->pos.first_line, sym->pos.first_column, scope, nullptr, false); + llvm::Instruction *declareInst = +- m->diBuilder->insertDeclare(sym->storagePtr, var, m->diBuilder->createExpression(), +- llvm::DebugLoc::get(sym->pos.first_line, sym->pos.first_column, scope), bblock); ++ m->diBuilder->insertDeclare(sym->storagePtr, var, m->diBuilder->createExpression(), diLoc, bblock); + AddDebugPos(declareInst, &sym->pos, scope); + } + diff --git a/srcpkgs/ispc/patches/llvm12-002.patch b/srcpkgs/ispc/patches/llvm12-002.patch new file mode 100644 index 00000000000..0903f30da10 --- /dev/null +++ b/srcpkgs/ispc/patches/llvm12-002.patch @@ -0,0 +1,923 @@ +From 0597a79d084c014780136da906afe21d15e982cb Mon Sep 17 00:00:00 2001 +From: DeepakRajendrakumaran +Date: Tue, 5 Jan 2021 13:53:30 -0800 +Subject: [PATCH] Remove LLVM 8.0 and LLVM 9.0. (#1966) + +--- + .appveyor.yml | 3 -- + .travis.yml | 7 --- + CMakeLists.txt | 12 ++---- + src/ast.cpp | 4 -- + src/ctx.cpp | 30 ------------- + src/gen/GlobalsLocalization.cpp | 4 -- + src/ispc.cpp | 20 --------- + src/ispc.h | 3 +- + src/ispc_version.h | 4 +- + src/main.cpp | 13 ------ + src/module.cpp | 27 ------------ + src/module.h | 2 - + src/opt.cpp | 64 ++-------------------------- + src/type.cpp | 10 ++--- + tests/lit-tests/1771.ispc | 1 - + tests/lit-tests/1844.ispc | 3 -- + tests/lit-tests/1926.ispc | 2 - + tests/lit-tests/cpus_x86.ispc | 2 + + tests/lit-tests/cpus_x86_llvm10.ispc | 11 ----- + tests/lit-tests/lit.cfg | 5 --- + 20 files changed, 14 insertions(+), 213 deletions(-) + delete mode 100644 tests/lit-tests/cpus_x86_llvm10.ispc + +diff --git a/.appveyor.yml b/.appveyor.yml +index 451a7b3e0..7945cca2a 100644 +--- a/.appveyor.yml ++++ b/.appveyor.yml +@@ -41,8 +41,6 @@ environment: + LLVM_VERSION: latest + - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2019 + LLVM_VERSION: 10.0 +- - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2019 +- LLVM_VERSION: 9.0 + + for: + - +@@ -62,7 +60,6 @@ for: + if "%APPVEYOR_BUILD_WORKER_IMAGE%"=="Visual Studio 2019" ( (set generator="Visual Studio 16") & (set vsversion=2019)) + set LLVM_TAR=llvm-11.0.0-win.vs2019-Release+Asserts-x86.arm.wasm.7z + if "%LLVM_VERSION%"=="10.0" (set LLVM_TAR=llvm-10.0.1-win.vs2019-Release+Asserts-x86.arm.wasm.zip) +- if "%LLVM_VERSION%"=="9.0" (set LLVM_TAR=llvm-9.0.1-win.vs2017-Release+Asserts-x86.arm.wasm.zip) + install: + - ps: choco install --no-progress winflexbison3 wget 7zip + - cmd: |- +diff --git a/.travis.yml b/.travis.yml +index 2e96017a6..3a6b85264 100644 +--- a/.travis.yml ++++ b/.travis.yml +@@ -108,13 +108,6 @@ jobs: + - LLVM_TAR=llvm-10.0.1-ubuntu16.04-Release+Asserts-x86.arm.wasm.tar.xz + - LLVM_REPO=https://github.com/dbabokin/llvm-project + - ISPC_HOME=$TRAVIS_BUILD_DIR +- # LLVM 9.0 + Ubuntu 16.04: build, lit tests, examples (build + run), benchmarks (build + trial run) +- - <<: *my_tag +- env: +- - LLVM_VERSION=9.0 OS=Ubuntu16.04 +- - LLVM_TAR=llvm-9.0.1-ubuntu16.04-Release+Asserts-x86.arm.wasm.tar.xz +- - LLVM_REPO=https://github.com/dbabokin/llvm-project +- - ISPC_HOME=$TRAVIS_BUILD_DIR + # WASM enabled build + # LLVM 11.0 + Ubuntu 16.04: build, lit tests, examples (build), benchmarks (build + trial run) + - <<: *my_tag +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 5fa863fcf..a6b3ed251 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -241,10 +241,8 @@ if (X86_ENABLED) + avx2-i8x32 avx2-i16x16 + avx2-i32x4 avx2-i32x8 avx2-i32x16 avx2-i64x4 + avx512knl-i32x16 +- avx512skx-i32x16 avx512skx-i32x8) +- if (${LLVM_VERSION_NUMBER} VERSION_GREATER_EQUAL "10.0.0") +- list(APPEND ISPC_TARGETS avx512skx-i8x64 avx512skx-i16x32) +- endif() ++ avx512skx-i32x16 avx512skx-i32x8 ++ avx512skx-i8x64 avx512skx-i16x32) + endif() + if (ARM_ENABLED) + list(APPEND ISPC_TARGETS neon-i8x16 neon-i16x8 neon-i32x4 neon-i32x8) +@@ -259,11 +257,7 @@ if (WASM_ENABLED) + endif() + + set(CLANG_LIBRARY_LIST clangFrontend clangDriver clangSerialization clangParse clangSema clangAnalysis clangAST clangBasic clangEdit clangLex) +-set(LLVM_COMPONENTS engine ipo bitreader bitwriter instrumentation linker option) +- +-if (${LLVM_VERSION_NUMBER} VERSION_GREATER_EQUAL "10.0.0") +- list(APPEND LLVM_COMPONENTS frontendopenmp) +-endif() ++set(LLVM_COMPONENTS engine ipo bitreader bitwriter instrumentation linker option frontendopenmp) + + if (X86_ENABLED) + list(APPEND LLVM_COMPONENTS x86) +diff --git a/src/ast.cpp b/src/ast.cpp +index 20f75a29c..5a43c4854 100644 +--- a/src/ast.cpp ++++ b/src/ast.cpp +@@ -44,9 +44,7 @@ + #include "sym.h" + #include "util.h" + +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + #include +-#endif + /////////////////////////////////////////////////////////////////////////// + // ASTNode + +@@ -62,9 +60,7 @@ void AST::AddFunction(Symbol *sym, Stmt *code) { + } + + void AST::GenerateIR() { +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + llvm::TimeTraceScope TimeScope("GenerateIR"); +-#endif + for (unsigned int i = 0; i < functions.size(); ++i) + functions[i]->GenerateIR(); + } +diff --git a/src/ctx.cpp b/src/ctx.cpp +index 977e9d222..42c4ea09d 100644 +--- a/src/ctx.cpp ++++ b/src/ctx.cpp +@@ -309,21 +309,13 @@ FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym, llvm::F + char buf[256]; + snprintf(buf, sizeof(buf), "__off_all_on_mask_%s", g->target->GetISAString()); + +-#if ISPC_LLVM_VERSION <= ISPC_LLVM_8_0 +- llvm::Constant *offFunc = m->module->getOrInsertFunction(buf, LLVMTypes::VoidType); +-#else // LLVM 9.0+ + llvm::FunctionCallee offFuncCallee = m->module->getOrInsertFunction(buf, LLVMTypes::VoidType); + llvm::Constant *offFunc = llvm::cast(offFuncCallee.getCallee()); +-#endif + AssertPos(currentPos, llvm::isa(offFunc)); + llvm::BasicBlock *offBB = llvm::BasicBlock::Create(*g->ctx, "entry", (llvm::Function *)offFunc, 0); + llvm::StoreInst *inst = new llvm::StoreInst(LLVMMaskAllOff, globalAllOnMaskPtr, offBB); + if (g->opt.forceAlignedMemory) { +-#if ISPC_LLVM_VERSION <= ISPC_LLVM_9_0 +- inst->setAlignment(g->target->getNativeVectorAlignment()); +-#else // LLVM 10.0+ + inst->setAlignment(llvm::MaybeAlign(g->target->getNativeVectorAlignment()).valueOrOne()); +-#endif + } + llvm::ReturnInst::Create(*g->ctx, offBB); + } +@@ -2288,11 +2280,7 @@ llvm::Value *FunctionEmitContext::LoadInst(llvm::Value *ptr, const Type *type, c + #endif + + if (g->opt.forceAlignedMemory && llvm::dyn_cast(pt->getElementType())) { +-#if ISPC_LLVM_VERSION <= ISPC_LLVM_9_0 +- inst->setAlignment(g->target->getNativeVectorAlignment()); +-#else // LLVM 10.0+ + inst->setAlignment(llvm::MaybeAlign(g->target->getNativeVectorAlignment()).valueOrOne()); +-#endif + } + + AddDebugPos(inst); +@@ -2431,11 +2419,7 @@ llvm::Value *FunctionEmitContext::LoadInst(llvm::Value *ptr, llvm::Value *mask, + // vs the proper alignment in practice.) + int align = 1; + +-#if ISPC_LLVM_VERSION <= ISPC_LLVM_9_0 +- inst->setAlignment(align); +-#else // LLVM 10.0+ + inst->setAlignment(llvm::MaybeAlign(align).valueOrOne()); +-#endif + } + + AddDebugPos(inst); +@@ -2649,11 +2633,7 @@ llvm::Value *FunctionEmitContext::AllocaInst(llvm::Type *llvmType, const char *n + align = g->target->getNativeVectorAlignment(); + + if (align != 0) { +-#if ISPC_LLVM_VERSION <= ISPC_LLVM_9_0 +- inst->setAlignment(align); +-#else // LLVM 10.0+ + inst->setAlignment(llvm::MaybeAlign(align).valueOrOne()); +-#endif + } + // Don't add debugging info to alloca instructions + return inst; +@@ -2926,11 +2906,7 @@ void FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr, const + llvm::StoreInst *inst = new llvm::StoreInst(value, ptr, bblock); + + if (g->opt.forceAlignedMemory && llvm::dyn_cast(pt->getElementType())) { +-#if ISPC_LLVM_VERSION <= ISPC_LLVM_9_0 +- inst->setAlignment(g->target->getNativeVectorAlignment()); +-#else // LLVM 10.0+ + inst->setAlignment(llvm::MaybeAlign(g->target->getNativeVectorAlignment()).valueOrOne()); +-#endif + } + + #ifdef ISPC_GENX_ENABLED +@@ -3025,16 +3001,10 @@ void FunctionEmitContext::MemcpyInst(llvm::Value *dest, llvm::Value *src, llvm:: + } + if (align == NULL) + align = LLVMInt32(1); +-#if ISPC_LLVM_VERSION <= ISPC_LLVM_8_0 +- llvm::Constant *mcFunc = +- m->module->getOrInsertFunction("llvm.memcpy.p0i8.p0i8.i64", LLVMTypes::VoidType, LLVMTypes::VoidPointerType, +- LLVMTypes::VoidPointerType, LLVMTypes::Int64Type, LLVMTypes::BoolType); +-#else // LLVM 9.0+ + llvm::FunctionCallee mcFuncCallee = + m->module->getOrInsertFunction("llvm.memcpy.p0i8.p0i8.i64", LLVMTypes::VoidType, LLVMTypes::VoidPointerType, + LLVMTypes::VoidPointerType, LLVMTypes::Int64Type, LLVMTypes::BoolType); + llvm::Constant *mcFunc = llvm::cast(mcFuncCallee.getCallee()); +-#endif + AssertPos(currentPos, mcFunc != NULL); + AssertPos(currentPos, llvm::isa(mcFunc)); + +diff --git a/src/gen/GlobalsLocalization.cpp b/src/gen/GlobalsLocalization.cpp +index 193a66289..a176e9462 100644 +--- a/src/gen/GlobalsLocalization.cpp ++++ b/src/gen/GlobalsLocalization.cpp +@@ -470,11 +470,7 @@ void GlobalsLocalization::LocalizeGlobals(LocalizationInfo &LI) { + Instruction &FirstI = *Fn->getEntryBlock().begin(); + Type *ElemTy = GV->getType()->getElementType(); + AllocaInst *Alloca = new AllocaInst(ElemTy, 0, GV->getName() + ".local", &FirstI); +-#if ISPC_LLVM_VERSION <= ISPC_LLVM_9_0 +- Alloca->setAlignment(GV->getAlignment()); +-#else // LLVM 10.0+ + Alloca->setAlignment(llvm::MaybeAlign(GV->getAlignment())); +-#endif + if (!isa(GV->getInitializer())) + new StoreInst(GV->getInitializer(), Alloca, &FirstI); + +diff --git a/src/ispc.cpp b/src/ispc.cpp +index e2d0f206e..fac5233f7 100644 +--- a/src/ispc.cpp ++++ b/src/ispc.cpp +@@ -272,9 +272,7 @@ typedef enum { + CPU_Silvermont, + + CPU_ICX, +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + CPU_TGL, +-#endif + #if ISPC_LLVM_VERSION >= ISPC_LLVM_12_0 + CPU_ADL, + CPU_SPR, +@@ -365,10 +363,8 @@ class AllCPUs { + + names[CPU_ICX].push_back("icelake-server"); + names[CPU_ICX].push_back("icx"); +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + names[CPU_TGL].push_back("tigerlake"); + names[CPU_TGL].push_back("tgl"); +-#endif + #if ISPC_LLVM_VERSION >= ISPC_LLVM_12_0 + names[CPU_ADL].push_back("alderlake"); + names[CPU_ADL].push_back("adl"); +@@ -411,11 +407,9 @@ class AllCPUs { + compat[CPU_ADL] = Set(CPU_ADL, CPU_x86_64, CPU_Bonnell, CPU_Penryn, CPU_Core2, CPU_Nehalem, CPU_Silvermont, + CPU_SandyBridge, CPU_IvyBridge, CPU_Haswell, CPU_Broadwell, CPU_None); + #endif +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + compat[CPU_TGL] = + Set(CPU_TGL, CPU_x86_64, CPU_Bonnell, CPU_Penryn, CPU_Core2, CPU_Nehalem, CPU_Silvermont, CPU_SandyBridge, + CPU_IvyBridge, CPU_Haswell, CPU_Broadwell, CPU_SKX, CPU_ICL, CPU_ICX, CPU_None); +-#endif + compat[CPU_ICX] = Set(CPU_ICX, CPU_x86_64, CPU_Bonnell, CPU_Penryn, CPU_Core2, CPU_Nehalem, CPU_Silvermont, + CPU_SandyBridge, CPU_IvyBridge, CPU_Haswell, CPU_Broadwell, CPU_SKX, CPU_ICL, CPU_None); + +@@ -556,9 +550,7 @@ Target::Target(Arch arch, const char *cpu, ISPCTarget ispc_target, bool pic, boo + #if ISPC_LLVM_VERSION >= ISPC_LLVM_12_0 + case CPU_SPR: + #endif +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + case CPU_TGL: +-#endif + case CPU_ICX: + case CPU_ICL: + case CPU_SKX: +@@ -916,7 +908,6 @@ Target::Target(Arch arch, const char *cpu, ISPCTarget ispc_target, bool pic, boo + } + break; + case ISPCTarget::avx512skx_i8x64: +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 // LLVM 10.0+ + // This target is enabled only for LLVM 10.0 and later + // because LLVM requires a number of fixes, which are + // committed to LLVM 11.0 and can be applied to 10.0, but not +@@ -937,12 +928,7 @@ Target::Target(Arch arch, const char *cpu, ISPCTarget ispc_target, bool pic, boo + this->m_hasVecPrefetch = false; + CPUfromISA = CPU_SKX; + break; +-#else +- unsupported_target = true; +- break; +-#endif + case ISPCTarget::avx512skx_i16x32: +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 // LLVM 10.0+ + // This target is enabled only for LLVM 10.0 and later + // because LLVM requires a number of fixes, which are + // committed to LLVM 11.0 and can be applied to 10.0, but not +@@ -963,10 +949,6 @@ Target::Target(Arch arch, const char *cpu, ISPCTarget ispc_target, bool pic, boo + this->m_hasVecPrefetch = false; + CPUfromISA = CPU_SKX; + break; +-#else +- unsupported_target = true; +- break; +-#endif + #ifdef ISPC_ARM_ENABLED + case ISPCTarget::neon_i8x16: + this->m_isa = Target::NEON; +@@ -1662,11 +1644,9 @@ Globals::Globals() { + isMultiTargetCompilation = false; + errorLimit = -1; + +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + enableTimeTrace = false; + // set default granularity to 500. + timeTraceGranularity = 500; +-#endif + target = NULL; + ctx = new llvm::LLVMContext; + +diff --git a/src/ispc.h b/src/ispc.h +index e25d8830e..de357a21c 100644 +--- a/src/ispc.h ++++ b/src/ispc.h +@@ -695,13 +695,12 @@ struct Globals { + + /* Number of errors to show in ISPC. */ + int errorLimit; +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 ++ + /* When true, enable compile time tracing. */ + bool enableTimeTrace; + + /* When compile time tracing is enabled, set time granularity. */ + int timeTraceGranularity; +-#endif + }; + + enum { +diff --git a/src/ispc_version.h b/src/ispc_version.h +index d781347fd..434b2d678 100644 +--- a/src/ispc_version.h ++++ b/src/ispc_version.h +@@ -44,13 +44,11 @@ + + #define ISPC_LLVM_VERSION (LLVM_VERSION_MAJOR * 10000 + LLVM_VERSION_MINOR * 100) + +-#define ISPC_LLVM_8_0 80000 +-#define ISPC_LLVM_9_0 90000 + #define ISPC_LLVM_10_0 100000 + #define ISPC_LLVM_11_0 110000 + #define ISPC_LLVM_12_0 120000 + +-#define OLDEST_SUPPORTED_LLVM ISPC_LLVM_8_0 ++#define OLDEST_SUPPORTED_LLVM ISPC_LLVM_10_0 + #define LATEST_SUPPORTED_LLVM ISPC_LLVM_12_0 + + #ifdef __ispc__xstr +diff --git a/src/main.cpp b/src/main.cpp +index 8f64330cc..5c920eabc 100644 +--- a/src/main.cpp ++++ b/src/main.cpp +@@ -55,9 +55,7 @@ + #include + #include + #include +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + #include +-#endif + + #ifdef ISPC_HOST_IS_WINDOWS + #define strcasecmp stricmp +@@ -181,11 +179,9 @@ static void lPrintVersion() { + snprintf(targetHelp, sizeof(targetHelp), "[--target-os=]\t\t\tSelect target OS. ={%s}", + g->target_registry->getSupportedOSes().c_str()); + PrintWithWordBreaks(targetHelp, 24, TerminalWidth(), stdout); +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + printf(" [--time-trace]\t\t\tTurn on time profiler. Generates JSON file based on output filename.\n"); + printf(" [--time-trace-granularity=\tMinimum time granularity (in microseconds) traced by time " + "profiler.\n"); +-#endif + printf(" [--version]\t\t\t\tPrint ispc version\n"); + #ifdef ISPC_GENX_ENABLED + printf(" [--vc-options=<\"-option1 -option2...\">]\t\t\t\tPass additional options to Vector Compiler backend\n"); +@@ -462,7 +458,6 @@ static void setCallingConv(VectorCallStatus vectorCall, Arch arch) { + } + } + +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + static void writeCompileTimeFile(const char *outFileName) { + llvm::SmallString<128> jsonFileName(outFileName); + jsonFileName.append(".json"); +@@ -479,7 +474,6 @@ static void writeCompileTimeFile(const char *outFileName) { + of->keep(); + return; + } +-#endif + + static std::set ParsingPhases(char *stages, ArgErrors &errorHandler) { + constexpr int parsing_limit = 100; +@@ -791,12 +785,10 @@ int main(int Argc, char *Argv[]) { + } + } else if (!strncmp(argv[i], "--force-alignment=", 18)) { + g->forceAlignment = atoi(argv[i] + 18); +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + } else if (!strcmp(argv[i], "--time-trace")) { + g->enableTimeTrace = true; + } else if (!strncmp(argv[i], "--time-trace-granularity=", 25)) { + g->timeTraceGranularity = atoi(argv[i] + 25); +-#endif + } else if (!strcmp(argv[i], "--woff") || !strcmp(argv[i], "-woff")) { + g->disableWarnings = true; + g->emitPerfWarnings = false; +@@ -1059,7 +1051,6 @@ int main(int Argc, char *Argv[]) { + + // This needs to happen after the TargetOS is decided. + setCallingConv(vectorCall, arch); +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + if (g->enableTimeTrace) { + llvm::timeTraceProfilerInitialize(g->timeTraceGranularity, "ispc"); + } +@@ -1077,9 +1068,5 @@ int main(int Argc, char *Argv[]) { + } + llvm::timeTraceProfilerCleanup(); + } +-#else +- int ret = Module::CompileAndOutput(file, arch, cpu, targets, flags, ot, outFileName, headerFileName, depsFileName, +- depsTargetName, hostStubFileName, devStubFileName); +-#endif + return ret; + } +diff --git a/src/module.cpp b/src/module.cpp +index fe7452df1..a85fec77a 100644 +--- a/src/module.cpp ++++ b/src/module.cpp +@@ -170,9 +170,7 @@ Module::Module(const char *fn) { + module->setDataLayout(g->target->getDataLayout()->getStringRepresentation()); + + if (g->generateDebuggingSymbols) { +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + llvm::TimeTraceScope TimeScope("Create Debug Data"); +-#endif + // To enable debug information on Windows, we have to let llvm know, that + // debug information should be emitted in CodeView format. + if (g->target_os == TargetOS::windows) { +@@ -226,10 +224,8 @@ extern YY_BUFFER_STATE yy_create_buffer(FILE *, int); + extern void yy_delete_buffer(YY_BUFFER_STATE); + + int Module::CompileFile() { +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + llvm::TimeTraceScope CompileFileTimeScope( + "CompileFile", llvm::StringRef(filename + ("_" + std::string(g->target->GetISAString())))); +-#endif + extern void ParserInit(); + ParserInit(); + +@@ -238,18 +234,14 @@ int Module::CompileFile() { + // variable 'm' to be initialized and available (which it isn't until + // the Module constructor returns...) + { +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + llvm::TimeTraceScope TimeScope("DefineStdlib"); +-#endif + DefineStdlib(symbolTable, g->ctx, module, g->includeStdlib); + } + + bool runPreprocessor = g->runCPP; + + if (runPreprocessor) { +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + llvm::TimeTraceScope TimeScope("Frontend parser"); +-#endif + if (!IsStdin(filename)) { + // Try to open the file first, since otherwise we crash in the + // preprocessor if the file doesn't exist. +@@ -268,9 +260,7 @@ int Module::CompileFile() { + yyparse(); + yy_delete_buffer(strbuf); + } else { +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + llvm::TimeTraceScope TimeScope("Frontend parser"); +-#endif + // No preprocessor, just open up the file if it's not stdin.. + FILE *f = NULL; + if (IsStdin(filename)) { +@@ -297,9 +287,7 @@ int Module::CompileFile() { + + if (diBuilder) + diBuilder->finalize(); +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + llvm::TimeTraceScope TimeScope("Optimize"); +-#endif + if (errorCount == 0) + Optimize(module, g->opt.level); + +@@ -1243,15 +1231,8 @@ bool Module::writeObjectFileOrAssembly(llvm::TargetMachine *targetMachine, llvm: + const char *outFileName) { + // Figure out if we're generating object file or assembly output, and + // set binary output for object files +-#if ISPC_LLVM_VERSION <= ISPC_LLVM_9_0 +- Assert(!g->target->isGenXTarget()); +- llvm::TargetMachine::CodeGenFileType fileType = +- (outputType == Object) ? llvm::TargetMachine::CGFT_ObjectFile : llvm::TargetMachine::CGFT_AssemblyFile; +- bool binary = (fileType == llvm::TargetMachine::CGFT_ObjectFile); +-#else // LLVM 10.0+ + llvm::CodeGenFileType fileType = (outputType == Object) ? llvm::CGFT_ObjectFile : llvm::CGFT_AssemblyFile; + bool binary = (fileType == llvm::CGFT_ObjectFile); +-#endif + + llvm::sys::fs::OpenFlags flags = binary ? llvm::sys::fs::F_None : llvm::sys::fs::F_Text; + +@@ -2219,11 +2200,7 @@ void Module::execPreprocessor(const char *infilename, llvm::raw_string_ostream * + inst.setTarget(target); + inst.createSourceManager(inst.getFileManager()); + +-#if ISPC_LLVM_VERSION <= ISPC_LLVM_9_0 +- clang::FrontendInputFile inputFile(infilename, clang::InputKind::Unknown); +-#else // LLVM 10.0+ + clang::FrontendInputFile inputFile(infilename, clang::InputKind()); +-#endif + + inst.InitializeSourceManager(inputFile); + +@@ -2722,9 +2699,7 @@ int Module::CompileAndOutput(const char *srcFile, Arch arch, const char *cpu, st + + m = new Module(srcFile); + if (m->CompileFile() == 0) { +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + llvm::TimeTraceScope TimeScope("Backend"); +-#endif + #ifdef ISPC_GENX_ENABLED + if (outputType == Asm || outputType == Object) { + if (g->target->isGenXTarget()) { +@@ -2855,9 +2830,7 @@ int Module::CompileAndOutput(const char *srcFile, Arch arch, const char *cpu, st + + m = new Module(srcFile); + int compileFileError = m->CompileFile(); +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + llvm::TimeTraceScope TimeScope("Backend"); +-#endif + if (compileFileError == 0) { + // Create the dispatch module, unless already created; + // in the latter case, just do the checking +diff --git a/src/module.h b/src/module.h +index 61d3cfea0..603124d50 100644 +--- a/src/module.h ++++ b/src/module.h +@@ -43,9 +43,7 @@ + + #include + +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + #include +-#endif + + #ifdef ISPC_GENX_ENABLED + #include "ocl_igc_interface/igc_ocl_device_ctx.h" +diff --git a/src/opt.cpp b/src/opt.cpp +index a655a8344..5b1ac7b63 100644 +--- a/src/opt.cpp ++++ b/src/opt.cpp +@@ -66,6 +66,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -85,9 +86,6 @@ + #include + #include + #include +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 +-#include +-#endif + + #ifdef ISPC_HOST_IS_LINUX + #include +@@ -1003,10 +1001,7 @@ bool IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) { + else + align = callInst->getCalledFunction() == avxMaskedLoad32 ? 4 : 8; + name = LLVMGetName(callInst->getArgOperand(0), "_load"); +-#if ISPC_LLVM_VERSION <= ISPC_LLVM_9_0 +- llvm::Instruction *loadInst = +- new llvm::LoadInst(castPtr, name, false /* not volatile */, align, (llvm::Instruction *)NULL); +-#elif ISPC_LLVM_VERSION == ISPC_LLVM_10_0 ++#if ISPC_LLVM_VERSION == ISPC_LLVM_10_0 + llvm::Instruction *loadInst = new llvm::LoadInst(castPtr, name, false /* not volatile */, + llvm::MaybeAlign(align), (llvm::Instruction *)NULL); + #else +@@ -1043,13 +1038,8 @@ bool IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) { + align = g->target->getNativeVectorAlignment(); + else + align = callInst->getCalledFunction() == avxMaskedStore32 ? 4 : 8; +-#if ISPC_LLVM_VERSION <= ISPC_LLVM_9_0 +- llvm::StoreInst *storeInst = new llvm::StoreInst(rvalue, castPtr, (llvm::Instruction *)NULL); +- storeInst->setAlignment(align); +-#else + llvm::StoreInst *storeInst = new llvm::StoreInst(rvalue, castPtr, (llvm::Instruction *)NULL, + llvm::MaybeAlign(align).valueOrOne()); +-#endif + lCopyMetadata(storeInst, callInst); + llvm::ReplaceInstWithInst(callInst, storeInst); + +@@ -1066,9 +1056,7 @@ bool IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) { + + bool IntrinsicsOpt::runOnFunction(llvm::Function &F) { + +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + llvm::TimeTraceScope FuncScope("IntrinsicsOpt::runOnFunction", F.getName()); +-#endif + bool modifiedAny = false; + for (llvm::BasicBlock &BB : F) { + modifiedAny |= runOnBasicBlock(BB); +@@ -1239,9 +1227,7 @@ bool InstructionSimplifyPass::runOnBasicBlock(llvm::BasicBlock &bb) { + + bool InstructionSimplifyPass::runOnFunction(llvm::Function &F) { + +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + llvm::TimeTraceScope FuncScope("InstructionSimplifyPass::runOnFunction", F.getName()); +-#endif + bool modifiedAny = false; + for (llvm::BasicBlock &BB : F) { + modifiedAny |= runOnBasicBlock(BB); +@@ -2974,16 +2960,10 @@ static bool lImproveMaskedStore(llvm::CallInst *callInst) { + + lvalue = new llvm::BitCastInst(lvalue, ptrType, "lvalue_to_ptr_type", callInst); + lCopyMetadata(lvalue, callInst); +-#if ISPC_LLVM_VERSION <= ISPC_LLVM_9_0 +- store = +- new llvm::StoreInst(rvalue, lvalue, false /* not volatile */, +- g->opt.forceAlignedMemory ? g->target->getNativeVectorAlignment() : info->align); +-#else + store = new llvm::StoreInst( + rvalue, lvalue, false /* not volatile */, + llvm::MaybeAlign(g->opt.forceAlignedMemory ? g->target->getNativeVectorAlignment() : info->align) + .valueOrOne()); +-#endif + } + if (store != NULL) { + lCopyMetadata(store, callInst); +@@ -3059,11 +3039,7 @@ static bool lImproveMaskedLoad(llvm::CallInst *callInst, llvm::BasicBlock::itera + { + llvm::Type *ptrType = llvm::PointerType::get(callInst->getType(), 0); + ptr = new llvm::BitCastInst(ptr, ptrType, "ptr_cast_for_load", callInst); +-#if ISPC_LLVM_VERSION <= ISPC_LLVM_9_0 +- load = new llvm::LoadInst(ptr, callInst->getName(), false /* not volatile */, +- g->opt.forceAlignedMemory ? g->target->getNativeVectorAlignment() : info->align, +- (llvm::Instruction *)NULL); +-#elif ISPC_LLVM_VERSION == ISPC_LLVM_10_0 ++#if ISPC_LLVM_VERSION == ISPC_LLVM_10_0 + load = new llvm::LoadInst( + ptr, callInst->getName(), false /* not volatile */, + llvm::MaybeAlign(g->opt.forceAlignedMemory ? g->target->getNativeVectorAlignment() : info->align) +@@ -3129,9 +3105,7 @@ bool ImproveMemoryOpsPass::runOnBasicBlock(llvm::BasicBlock &bb) { + + bool ImproveMemoryOpsPass::runOnFunction(llvm::Function &F) { + +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + llvm::TimeTraceScope FuncScope("ImproveMemoryOpsPass::runOnFunction", F.getName()); +-#endif + bool modifiedAny = false; + for (llvm::BasicBlock &BB : F) { + modifiedAny |= runOnBasicBlock(BB); +@@ -3424,9 +3398,7 @@ llvm::Value *lGEPAndLoad(llvm::Value *basePtr, int64_t offset, int align, llvm:: + llvm::Type *type) { + llvm::Value *ptr = lGEPInst(basePtr, LLVMInt64(offset), "new_base", insertBefore); + ptr = new llvm::BitCastInst(ptr, llvm::PointerType::get(type, 0), "ptr_cast", insertBefore); +-#if ISPC_LLVM_VERSION <= ISPC_LLVM_9_0 +- return new llvm::LoadInst(ptr, "gather_load", false /* not volatile */, align, insertBefore); +-#elif ISPC_LLVM_VERSION == ISPC_LLVM_10_0 ++#if ISPC_LLVM_VERSION == ISPC_LLVM_10_0 + return new llvm::LoadInst(ptr, "gather_load", false /* not volatile */, llvm::MaybeAlign(align), insertBefore); + #else // LLVM 11.0+ + return new llvm::LoadInst(llvm::dyn_cast(ptr->getType())->getPointerElementType(), ptr, +@@ -4135,9 +4107,7 @@ bool GatherCoalescePass::runOnBasicBlock(llvm::BasicBlock &bb) { + + bool GatherCoalescePass::runOnFunction(llvm::Function &F) { + +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + llvm::TimeTraceScope FuncScope("GatherCoalescePass::runOnFunction", F.getName()); +-#endif + bool modifiedAny = false; + for (llvm::BasicBlock &BB : F) { + modifiedAny |= runOnBasicBlock(BB); +@@ -4467,9 +4437,7 @@ bool ReplacePseudoMemoryOpsPass::runOnBasicBlock(llvm::BasicBlock &bb) { + + bool ReplacePseudoMemoryOpsPass::runOnFunction(llvm::Function &F) { + +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + llvm::TimeTraceScope FuncScope("ReplacePseudoMemoryOpsPass::runOnFunction", F.getName()); +-#endif + bool modifiedAny = false; + for (llvm::BasicBlock &BB : F) { + modifiedAny |= runOnBasicBlock(BB); +@@ -4573,9 +4541,7 @@ bool IsCompileTimeConstantPass::runOnBasicBlock(llvm::BasicBlock &bb) { + + bool IsCompileTimeConstantPass::runOnFunction(llvm::Function &F) { + +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + llvm::TimeTraceScope FuncScope("IsCompileTimeConstantPass::runOnFunction", F.getName()); +-#endif + bool modifiedAny = false; + for (llvm::BasicBlock &BB : F) { + modifiedAny |= runOnBasicBlock(BB); +@@ -5201,9 +5167,7 @@ bool PeepholePass::runOnBasicBlock(llvm::BasicBlock &bb) { + + bool PeepholePass::runOnFunction(llvm::Function &F) { + +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + llvm::TimeTraceScope FuncScope("PeepholePass::runOnFunction", F.getName()); +-#endif + bool modifiedAny = false; + for (llvm::BasicBlock &BB : F) { + modifiedAny |= runOnBasicBlock(BB); +@@ -5299,9 +5263,7 @@ bool ReplaceStdlibShiftPass::runOnBasicBlock(llvm::BasicBlock &bb) { + + bool ReplaceStdlibShiftPass::runOnFunction(llvm::Function &F) { + +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + llvm::TimeTraceScope FuncScope("ReplaceStdlibShiftPass::runOnFunction", F.getName()); +-#endif + bool modifiedAny = false; + for (llvm::BasicBlock &BB : F) { + modifiedAny |= runOnBasicBlock(BB); +@@ -5399,9 +5361,7 @@ llvm::Instruction *FixBooleanSelectPass::fixSelect(llvm::SelectInst *sel, llvm:: + } + + bool FixBooleanSelectPass::runOnFunction(llvm::Function &F) { +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + llvm::TimeTraceScope FuncScope("FixBooleanSelectPass::runOnFunction", F.getName()); +-#endif + bool modifiedAny = false; + + return modifiedAny; +@@ -5964,9 +5924,7 @@ bool GenXGatherCoalescing::runOnBasicBlock(llvm::BasicBlock &bb) { + } + + bool GenXGatherCoalescing::runOnFunction(llvm::Function &F) { +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + llvm::TimeTraceScope FuncScope("GenXGatherCoalescing::runOnFunction", F.getName()); +-#endif + bool modifiedAny = false; + for (llvm::BasicBlock &BB : F) { + modifiedAny |= runOnBasicBlock(BB); +@@ -6113,9 +6071,7 @@ bool PromoteToPrivateMemoryPass::runOnBasicBlock(llvm::BasicBlock &bb) { + + bool PromoteToPrivateMemoryPass::runOnFunction(llvm::Function &F) { + +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + llvm::TimeTraceScope FuncScope("PromoteToPrivateMemoryPass::runOnFunction", F.getName()); +-#endif + bool modifiedAny = false; + for (llvm::BasicBlock &BB : F) { + modifiedAny |= runOnBasicBlock(BB); +@@ -6181,9 +6137,7 @@ bool ReplaceLLVMIntrinsics::runOnBasicBlock(llvm::BasicBlock &bb) { + + bool ReplaceLLVMIntrinsics::runOnFunction(llvm::Function &F) { + +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + llvm::TimeTraceScope FuncScope("ReplaceLLVMIntrinsics::runOnFunction", F.getName()); +-#endif + bool modifiedAny = false; + for (llvm::BasicBlock &BB : F) { + modifiedAny |= runOnBasicBlock(BB); +@@ -6278,9 +6232,7 @@ bool ReplaceUnsupportedInsts::runOnBasicBlock(llvm::BasicBlock &bb) { + + bool ReplaceUnsupportedInsts::runOnFunction(llvm::Function &F) { + +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + llvm::TimeTraceScope FuncScope("ReplaceUnsupportedInsts::runOnFunction", F.getName()); +-#endif + bool modifiedAny = false; + for (llvm::BasicBlock &BB : F) { + modifiedAny |= runOnBasicBlock(BB); +@@ -6369,9 +6321,7 @@ bool CheckUnsupportedInsts::runOnBasicBlock(llvm::BasicBlock &bb) { + } + + bool CheckUnsupportedInsts::runOnFunction(llvm::Function &F) { +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + llvm::TimeTraceScope FuncScope("CheckUnsupportedInsts::runOnFunction", F.getName()); +-#endif + bool modifiedAny = false; + for (llvm::BasicBlock &BB : F) { + modifiedAny |= runOnBasicBlock(BB); +@@ -6440,9 +6390,7 @@ bool MangleOpenCLBuiltins::runOnBasicBlock(llvm::BasicBlock &bb) { + } + + bool MangleOpenCLBuiltins::runOnFunction(llvm::Function &F) { +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + llvm::TimeTraceScope FuncScope("MangleOpenCLBuiltins::runOnFunction", F.getName()); +-#endif + bool modifiedAny = false; + for (llvm::BasicBlock &BB : F) { + modifiedAny |= runOnBasicBlock(BB); +@@ -6828,9 +6776,7 @@ bool FixAddressSpace::runOnFunction(llvm::Function &F) { + // Transformations are correct when the function is not internal. + // This is due to address space calculation algorithm. + // TODO: problems can be met in case of Stack Calls +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + llvm::TimeTraceScope FuncScope("FixAddressSpace::runOnFunction", F.getName()); +-#endif + if (F.getLinkage() == llvm::GlobalValue::LinkageTypes::InternalLinkage) + return false; + +@@ -6854,9 +6800,7 @@ class DemotePHIs : public llvm::FunctionPass { + char DemotePHIs::ID = 0; + + bool DemotePHIs::runOnFunction(llvm::Function &F) { +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_10_0 + llvm::TimeTraceScope FuncScope("DemotePHIs::runOnFunction", F.getName()); +-#endif + if (F.isDeclaration() || skipFunction(F)) + return false; + std::vector WorkList; +diff --git a/src/type.cpp b/src/type.cpp +index 688a4e874..fa0cb04db 100644 +--- a/src/type.cpp ++++ b/src/type.cpp +@@ -1822,14 +1822,10 @@ llvm::DIType *StructType::GetDIType(llvm::DIScope *scope) const { + llvm::DIFile *diFile = pos.GetDIFile(); + llvm::DINamespace *diSpace = pos.GetDINamespace(); + return m->diBuilder->createStructType(diSpace, GetString(), diFile, +- pos.first_line, // Line number +- layout->getSizeInBits(), // Size in bits +-#if ISPC_LLVM_VERSION <= ISPC_LLVM_9_0 +- layout->getAlignment() * 8, // Alignment in bits +-#else // LLVM 10.0+ ++ pos.first_line, // Line number ++ layout->getSizeInBits(), // Size in bits + layout->getAlignment().value() * 8, // Alignment in bits +-#endif +- llvm::DINode::FlagZero, // Flags ++ llvm::DINode::FlagZero, // Flags + NULL, elements); + } + +diff --git a/tests/lit-tests/1771.ispc b/tests/lit-tests/1771.ispc +index 98064d84d..4feb73dd7 100644 +--- a/tests/lit-tests/1771.ispc ++++ b/tests/lit-tests/1771.ispc +@@ -5,7 +5,6 @@ + // RUN: %{ispc} %s --target=sse4-i16x8 --emit-asm -o - | FileCheck %s -check-prefix=CHECKSSE4_I16X8 + // RUN: %{ispc} %s --target=sse4-i8x16 --emit-asm -o - | FileCheck %s -check-prefix=CHECKSSE4_I8X16 + +-// REQUIRES: LLVM_10_0+ + // REQUIRES: X86_ENABLED + + // CHECKAVX2_I32X8: {{[a-z]*xor[a-z]*}} %xmm0, %xmm0, %xmm0 +diff --git a/tests/lit-tests/1844.ispc b/tests/lit-tests/1844.ispc +index 793a8866c..189edf4c3 100644 +--- a/tests/lit-tests/1844.ispc ++++ b/tests/lit-tests/1844.ispc +@@ -6,9 +6,6 @@ + + // REQUIRES: X86_ENABLED + +-// It doesn't work for anything earlier than 10.0, but it will not be fixed, so don't run. +-// REQUIRES: LLVM_10_0+ +- + extern uniform unsigned int64 var_6; + extern uniform unsigned int16 var_11; + extern uniform bool arr_201 [1] [15] [20]; +diff --git a/tests/lit-tests/1926.ispc b/tests/lit-tests/1926.ispc +index 70f7cc3c2..7921c91a6 100644 +--- a/tests/lit-tests/1926.ispc ++++ b/tests/lit-tests/1926.ispc +@@ -2,8 +2,6 @@ + // RUN: cat %T/1926.o.json \ + // RUN: | FileCheck %s + +-// REQUIRES: LLVM_10_0+ +- + // CHECK: "traceEvents" + // CHECK: "detail" + export uniform int foo(uniform int a[], uniform int count) { +diff --git a/tests/lit-tests/cpus_x86.ispc b/tests/lit-tests/cpus_x86.ispc +index 516efdd4c..4d60186f6 100644 +--- a/tests/lit-tests/cpus_x86.ispc ++++ b/tests/lit-tests/cpus_x86.ispc +@@ -24,6 +24,8 @@ + //; RUN: %{ispc} %s -o %t.o --nostdlib --target=sse2-i32x4 --cpu=icl + //; RUN: %{ispc} %s -o %t.o --nostdlib --target=sse2-i32x4 --cpu=icelake-server + //; RUN: %{ispc} %s -o %t.o --nostdlib --target=sse2-i32x4 --cpu=icx ++//; RUN: %{ispc} %s -o %t.o --nostdlib --target=sse2-i32x4 --cpu=tigerlake ++//; RUN: %{ispc} %s -o %t.o --nostdlib --target=sse2-i32x4 --cpu=tgl + + // REQUIRES: X86_ENABLED + +diff --git a/tests/lit-tests/cpus_x86_llvm10.ispc b/tests/lit-tests/cpus_x86_llvm10.ispc +deleted file mode 100644 +index ef00000e5..000000000 +--- a/tests/lit-tests/cpus_x86_llvm10.ispc ++++ /dev/null +@@ -1,11 +0,0 @@ +-// The test checks that cpu definitions (including all synonyms) are successfully consumed by compiler. +- +-//; RUN: %{ispc} %s -o %t.o --nostdlib --target=sse2-i32x4 --cpu=tigerlake +-//; RUN: %{ispc} %s -o %t.o --nostdlib --target=sse2-i32x4 --cpu=tgl +- +-// REQUIRES: X86_ENABLED +-// REQUIRES: LLVM_10_0+ +- +-uniform int i; +- +-void foo() {} +diff --git a/tests/lit-tests/lit.cfg b/tests/lit-tests/lit.cfg +index 17016579d..045e69437 100644 +--- a/tests/lit-tests/lit.cfg ++++ b/tests/lit-tests/lit.cfg +@@ -30,11 +30,6 @@ print("Config:") + + # LLVM version + llvm_version = LooseVersion(ispc_llvm_version_number) +-if llvm_version >= LooseVersion("10.0.0"): +- print("LLVM_10_0+: YES") +- config.available_features.add("LLVM_10_0+") +-else: +- print("LLVM_10_0+: NO") + + if llvm_version >= LooseVersion("12.0.0"): + print("LLVM_12_0+: YES") diff --git a/srcpkgs/ispc/patches/llvm12-003.patch b/srcpkgs/ispc/patches/llvm12-003.patch new file mode 100644 index 00000000000..0423afc9e3f --- /dev/null +++ b/srcpkgs/ispc/patches/llvm12-003.patch @@ -0,0 +1,1504 @@ +From 1851d18b213dbad169937076176b2d5509733c76 Mon Sep 17 00:00:00 2001 +From: Deepak Rajendrakumaran +Date: Tue, 12 Jan 2021 22:15:12 -0800 +Subject: [PATCH] fixes #1821 - Removing LLVMGetName() and switching to + LLVM::Twine. + +--- + src/ctx.cpp | 304 ++++++++++++++++++++++------------------------- + src/ctx.h | 72 +++++------ + src/expr.cpp | 36 +++--- + src/llvmutil.cpp | 17 --- + src/llvmutil.h | 5 - + src/opt.cpp | 103 +++++++++------- + 6 files changed, 255 insertions(+), 282 deletions(-) + +diff --git a/src/ctx.cpp b/src/ctx.cpp +index 42c4ea09d..ded524b71 100644 +--- a/src/ctx.cpp ++++ b/src/ctx.cpp +@@ -1020,8 +1020,8 @@ void FunctionEmitContext::EmitCaseLabel(int value, bool checkMask, SourcePos pos + llvm::BasicBlock *bbCaseImpl = NULL; + if (emitGenXHardwareMask()) { + // Create basic block with actual case implementation +- std::string bbName = bbCase->getName().str() + "_impl"; +- bbCaseImpl = CreateBasicBlock(bbName.c_str(), bbCase); ++ llvm::Twine bbName = llvm::Twine(bbCase->getName()) + "_impl"; ++ bbCaseImpl = CreateBasicBlock(bbName, bbCase); + } + #endif + +@@ -1185,7 +1185,7 @@ bool FunctionEmitContext::initLabelBBlocks(ASTNode *node, void *data) { + if (ctx->labelMap.find(ls->name) != ctx->labelMap.end()) + Error(ls->pos, "Multiple labels named \"%s\" in function.", ls->name.c_str()); + else { +- llvm::BasicBlock *bb = ctx->CreateBasicBlock(ls->name.c_str()); ++ llvm::BasicBlock *bb = ctx->CreateBasicBlock(ls->name); + ctx->labelMap[ls->name] = bb; + } + return true; +@@ -1299,7 +1299,7 @@ llvm::Value *FunctionEmitContext::Any(llvm::Value *mask) { + // We can actually call either one, since both are i32s as far as + // LLVM's type system is concerned... + llvm::Function *fmm = mm[0]->function; +- return CallInst(fmm, NULL, mask, LLVMGetName(mask, "_any")); ++ return CallInst(fmm, NULL, mask, llvm::Twine(mask->getName()) + "_any"); + } + + llvm::Value *FunctionEmitContext::All(llvm::Value *mask) { +@@ -1315,7 +1315,7 @@ llvm::Value *FunctionEmitContext::All(llvm::Value *mask) { + // We can actually call either one, since both are i32s as far as + // LLVM's type system is concerned... + llvm::Function *fmm = mm[0]->function; +- return CallInst(fmm, NULL, mask, LLVMGetName(mask, "_all")); ++ return CallInst(fmm, NULL, mask, llvm::Twine(mask->getName()) + "_all"); + } + + llvm::Value *FunctionEmitContext::None(llvm::Value *mask) { +@@ -1331,7 +1331,7 @@ llvm::Value *FunctionEmitContext::None(llvm::Value *mask) { + // We can actually call either one, since both are i32s as far as + // LLVM's type system is concerned... + llvm::Function *fmm = mm[0]->function; +- return CallInst(fmm, NULL, mask, LLVMGetName(mask, "_none")); ++ return CallInst(fmm, NULL, mask, llvm::Twine(mask->getName()) + "_none"); + } + + llvm::Value *FunctionEmitContext::LaneMask(llvm::Value *v) { +@@ -1349,7 +1349,7 @@ llvm::Value *FunctionEmitContext::LaneMask(llvm::Value *v) { + // We can actually call either one, since both are i32s as far as + // LLVM's type system is concerned... + llvm::Function *fmm = mm[0]->function; +- return CallInst(fmm, NULL, v, LLVMGetName(v, "_movmsk")); ++ return CallInst(fmm, NULL, v, llvm::Twine(v->getName()) + "_movmsk"); + } + + llvm::Value *FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) { +@@ -1364,11 +1364,12 @@ llvm::Value *FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2 + #else + if (g->target->getArch() == Arch::wasm32) { + llvm::Function *fmm = m->module->getFunction("__wasm_cmp_msk_eq"); +- return CallInst(fmm, NULL, {v1, v2}, LLVMGetName("wasm_cmp_msk_eq", v1, v2)); ++ return CallInst(fmm, NULL, {v1, v2}, ((llvm::Twine("wasm_cmp_msk_eq_") + v1->getName()) + "_") + v2->getName()); + } + llvm::Value *mm1 = LaneMask(v1); + llvm::Value *mm2 = LaneMask(v2); +- return CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, mm1, mm2, LLVMGetName("equal", v1, v2)); ++ return CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, mm1, mm2, ++ ((llvm::Twine("equal_") + v1->getName()) + "_") + v2->getName()); + #endif + } + +@@ -1392,7 +1393,7 @@ llvm::Value *FunctionEmitContext::GetStringPtr(const std::string &str) { + return new llvm::BitCastInst(lstrPtr, LLVMTypes::VoidPointerType, "str_void_ptr", bblock); + } + +-llvm::BasicBlock *FunctionEmitContext::CreateBasicBlock(const char *name, llvm::BasicBlock *insertAfter) { ++llvm::BasicBlock *FunctionEmitContext::CreateBasicBlock(const llvm::Twine &name, llvm::BasicBlock *insertAfter) { + llvm::BasicBlock *newBB = llvm::BasicBlock::Create(*g->ctx, name, llvmFunction); + if (insertAfter) + newBB->moveAfter(insertAfter); +@@ -1416,14 +1417,15 @@ llvm::Value *FunctionEmitContext::I1VecToBoolVec(llvm::Value *b) { + + for (unsigned int i = 0; i < at->getNumElements(); ++i) { + llvm::Value *elt = ExtractInst(b, i); +- llvm::Value *sext = SwitchBoolSize(elt, LLVMTypes::BoolVectorStorageType, LLVMGetName(elt, "_to_boolvec")); ++ llvm::Value *sext = ++ SwitchBoolSize(elt, LLVMTypes::BoolVectorStorageType, llvm::Twine(elt->getName()) + "_to_boolvec"); + ret = InsertInst(ret, sext, i); + } + return ret; + } else { + // For non-array types, convert to 'LLVMTypes::BoolVectorType' if + // necessary. +- return SwitchBoolSize(b, LLVMTypes::BoolVectorType, LLVMGetName(b, "_to_boolvec")); ++ return SwitchBoolSize(b, LLVMTypes::BoolVectorType, llvm::Twine(b->getName()) + "_to_boolvec"); + } + } + +@@ -1563,7 +1565,7 @@ static int lArrayVectorWidth(llvm::Type *t) { + } + + llvm::Value *FunctionEmitContext::BinaryOperator(llvm::Instruction::BinaryOps inst, llvm::Value *v0, llvm::Value *v1, +- const char *name) { ++ const llvm::Twine &name) { + if (v0 == NULL || v1 == NULL) { + AssertPos(currentPos, m->errorCount > 0); + return NULL; +@@ -1573,7 +1575,7 @@ llvm::Value *FunctionEmitContext::BinaryOperator(llvm::Instruction::BinaryOps in + llvm::Type *type = v0->getType(); + int arraySize = lArrayVectorWidth(type); + if (arraySize == 0) { +- llvm::Instruction *bop = llvm::BinaryOperator::Create(inst, v0, v1, name ? name : "", bblock); ++ llvm::Instruction *bop = llvm::BinaryOperator::Create(inst, v0, v1, name, bblock); + AddDebugPos(bop); + return bop; + } else { +@@ -1591,7 +1593,7 @@ llvm::Value *FunctionEmitContext::BinaryOperator(llvm::Instruction::BinaryOps in + } + } + +-llvm::Value *FunctionEmitContext::NotOperator(llvm::Value *v, const char *name) { ++llvm::Value *FunctionEmitContext::NotOperator(llvm::Value *v, const llvm::Twine &name) { + if (v == NULL) { + AssertPos(currentPos, m->errorCount > 0); + return NULL; +@@ -1603,14 +1605,14 @@ llvm::Value *FunctionEmitContext::NotOperator(llvm::Value *v, const char *name) + llvm::Type *type = v->getType(); + int arraySize = lArrayVectorWidth(type); + if (arraySize == 0) { +- llvm::Instruction *binst = llvm::BinaryOperator::CreateNot(v, name ? name : "not", bblock); ++ llvm::Instruction *binst = llvm::BinaryOperator::CreateNot(v, name.isTriviallyEmpty() ? "not" : name, bblock); + AddDebugPos(binst); + return binst; + } else { + llvm::Value *ret = llvm::UndefValue::get(type); + for (int i = 0; i < arraySize; ++i) { + llvm::Value *a = ExtractInst(v, i); +- llvm::Value *op = llvm::BinaryOperator::CreateNot(a, name ? name : "not", bblock); ++ llvm::Value *op = llvm::BinaryOperator::CreateNot(a, name.isTriviallyEmpty() ? "not" : name, bblock); + AddDebugPos(op); + ret = InsertInst(ret, op, i); + } +@@ -1638,7 +1640,7 @@ static llvm::Type *lGetMatchingBoolVectorType(llvm::Type *type) { + } + + llvm::Value *FunctionEmitContext::CmpInst(llvm::Instruction::OtherOps inst, llvm::CmpInst::Predicate pred, +- llvm::Value *v0, llvm::Value *v1, const char *name) { ++ llvm::Value *v0, llvm::Value *v1, const llvm::Twine &name) { + if (v0 == NULL || v1 == NULL) { + AssertPos(currentPos, m->errorCount > 0); + return NULL; +@@ -1648,7 +1650,8 @@ llvm::Value *FunctionEmitContext::CmpInst(llvm::Instruction::OtherOps inst, llvm + llvm::Type *type = v0->getType(); + int arraySize = lArrayVectorWidth(type); + if (arraySize == 0) { +- llvm::Instruction *ci = llvm::CmpInst::Create(inst, pred, v0, v1, name ? name : "cmp", bblock); ++ llvm::Instruction *ci = ++ llvm::CmpInst::Create(inst, pred, v0, v1, name.isTriviallyEmpty() ? "cmp" : name, bblock); + AddDebugPos(ci); + return ci; + } else { +@@ -1657,14 +1660,14 @@ llvm::Value *FunctionEmitContext::CmpInst(llvm::Instruction::OtherOps inst, llvm + for (int i = 0; i < arraySize; ++i) { + llvm::Value *a = ExtractInst(v0, i); + llvm::Value *b = ExtractInst(v1, i); +- llvm::Value *op = CmpInst(inst, pred, a, b, name); ++ llvm::Value *op = CmpInst(inst, pred, a, b, name.isTriviallyEmpty() ? "cmp" : name); + ret = InsertInst(ret, op, i); + } + return ret; + } + } + +-llvm::Value *FunctionEmitContext::SmearUniform(llvm::Value *value, const char *name) { ++llvm::Value *FunctionEmitContext::SmearUniform(llvm::Value *value, const llvm::Twine &name) { + if (value == NULL) { + AssertPos(currentPos, m->errorCount > 0); + return NULL; +@@ -1704,21 +1707,19 @@ llvm::Value *FunctionEmitContext::SmearUniform(llvm::Value *value, const char *n + return ret; + } + +-llvm::Value *FunctionEmitContext::BitCastInst(llvm::Value *value, llvm::Type *type, const char *name) { ++llvm::Value *FunctionEmitContext::BitCastInst(llvm::Value *value, llvm::Type *type, const llvm::Twine &name) { + if (value == NULL) { + AssertPos(currentPos, m->errorCount > 0); + return NULL; + } + +- if (name == NULL) +- name = LLVMGetName(value, "_bitcast"); +- +- llvm::Instruction *inst = new llvm::BitCastInst(value, type, name, bblock); ++ llvm::Instruction *inst = new llvm::BitCastInst( ++ value, type, name.isTriviallyEmpty() ? llvm::Twine(value->getName()) + "_bitcast" : name, bblock); + AddDebugPos(inst); + return inst; + } + +-llvm::Value *FunctionEmitContext::PtrToIntInst(llvm::Value *value, const char *name) { ++llvm::Value *FunctionEmitContext::PtrToIntInst(llvm::Value *value, const llvm::Twine &name) { + if (value == NULL) { + AssertPos(currentPos, m->errorCount > 0); + return NULL; +@@ -1728,23 +1729,19 @@ llvm::Value *FunctionEmitContext::PtrToIntInst(llvm::Value *value, const char *n + // no-op for varying pointers; they're already vectors of ints + return value; + +- if (name == NULL) +- name = LLVMGetName(value, "_ptr2int"); + llvm::Type *type = LLVMTypes::PointerIntType; +- llvm::Instruction *inst = new llvm::PtrToIntInst(value, type, name, bblock); ++ llvm::Instruction *inst = new llvm::PtrToIntInst( ++ value, type, name.isTriviallyEmpty() ? llvm::Twine(value->getName()) + "_ptr2int" : name, bblock); + AddDebugPos(inst); + return inst; + } + +-llvm::Value *FunctionEmitContext::PtrToIntInst(llvm::Value *value, llvm::Type *toType, const char *name) { ++llvm::Value *FunctionEmitContext::PtrToIntInst(llvm::Value *value, llvm::Type *toType, const llvm::Twine &name) { + if (value == NULL) { + AssertPos(currentPos, m->errorCount > 0); + return NULL; + } + +- if (name == NULL) +- name = LLVMGetName(value, "_ptr2int"); +- + llvm::Type *fromType = value->getType(); + if (llvm::isa(fromType)) { + // varying pointer +@@ -1752,27 +1749,26 @@ llvm::Value *FunctionEmitContext::PtrToIntInst(llvm::Value *value, llvm::Type *t + // already the right type--done + return value; + else if (fromType->getScalarSizeInBits() > toType->getScalarSizeInBits()) +- return TruncInst(value, toType, name); ++ return TruncInst(value, toType, ++ name.isTriviallyEmpty() ? llvm::Twine(value->getName()) + "_ptr2int" : name); + else { + AssertPos(currentPos, fromType->getScalarSizeInBits() < toType->getScalarSizeInBits()); +- return ZExtInst(value, toType, name); ++ return ZExtInst(value, toType, name.isTriviallyEmpty() ? llvm::Twine(value->getName()) + "_ptr2int" : name); + } + } + +- llvm::Instruction *inst = new llvm::PtrToIntInst(value, toType, name, bblock); ++ llvm::Instruction *inst = new llvm::PtrToIntInst( ++ value, toType, name.isTriviallyEmpty() ? llvm::Twine(value->getName()) + "_ptr2int" : name, bblock); + AddDebugPos(inst); + return inst; + } + +-llvm::Value *FunctionEmitContext::IntToPtrInst(llvm::Value *value, llvm::Type *toType, const char *name) { ++llvm::Value *FunctionEmitContext::IntToPtrInst(llvm::Value *value, llvm::Type *toType, const llvm::Twine &name) { + if (value == NULL) { + AssertPos(currentPos, m->errorCount > 0); + return NULL; + } + +- if (name == NULL) +- name = LLVMGetName(value, "_int2ptr"); +- + llvm::Type *fromType = value->getType(); + if (llvm::isa(fromType)) { + // varying pointer +@@ -1780,95 +1776,87 @@ llvm::Value *FunctionEmitContext::IntToPtrInst(llvm::Value *value, llvm::Type *t + // done + return value; + else if (fromType->getScalarSizeInBits() > toType->getScalarSizeInBits()) +- return TruncInst(value, toType, name); ++ return TruncInst(value, toType, ++ name.isTriviallyEmpty() ? llvm::Twine(value->getName()) + "_int2ptr" : name); + else { + AssertPos(currentPos, fromType->getScalarSizeInBits() < toType->getScalarSizeInBits()); +- return ZExtInst(value, toType, name); ++ return ZExtInst(value, toType, name.isTriviallyEmpty() ? llvm::Twine(value->getName()) + "_int2ptr" : name); + } + } + +- llvm::Instruction *inst = new llvm::IntToPtrInst(value, toType, name, bblock); ++ llvm::Instruction *inst = new llvm::IntToPtrInst( ++ value, toType, name.isTriviallyEmpty() ? llvm::Twine(value->getName()) + "_int2ptr" : name, bblock); + AddDebugPos(inst); + return inst; + } + +-llvm::Instruction *FunctionEmitContext::TruncInst(llvm::Value *value, llvm::Type *type, const char *name) { ++llvm::Instruction *FunctionEmitContext::TruncInst(llvm::Value *value, llvm::Type *type, const llvm::Twine &name) { + if (value == NULL) { + AssertPos(currentPos, m->errorCount > 0); + return NULL; + } + +- if (name == NULL) +- name = LLVMGetName(value, "_trunc"); +- + // TODO: we should probably handle the array case as in + // e.g. BitCastInst(), but we don't currently need that functionality +- llvm::Instruction *inst = new llvm::TruncInst(value, type, name, bblock); ++ llvm::Instruction *inst = new llvm::TruncInst( ++ value, type, name.isTriviallyEmpty() ? llvm::Twine(value->getName()) + "_trunc" : name, bblock); + AddDebugPos(inst); + return inst; + } + + llvm::Instruction *FunctionEmitContext::CastInst(llvm::Instruction::CastOps op, llvm::Value *value, llvm::Type *type, +- const char *name) { ++ const llvm::Twine &name) { + if (value == NULL) { + AssertPos(currentPos, m->errorCount > 0); + return NULL; + } + +- if (name == NULL) +- name = LLVMGetName(value, "_cast"); +- + // TODO: we should probably handle the array case as in + // e.g. BitCastInst(), but we don't currently need that functionality +- llvm::Instruction *inst = llvm::CastInst::Create(op, value, type, name, bblock); ++ llvm::Instruction *inst = llvm::CastInst::Create( ++ op, value, type, name.isTriviallyEmpty() ? llvm::Twine(value->getName()) + "_cast" : name, bblock); + AddDebugPos(inst); + return inst; + } + +-llvm::Instruction *FunctionEmitContext::FPCastInst(llvm::Value *value, llvm::Type *type, const char *name) { ++llvm::Instruction *FunctionEmitContext::FPCastInst(llvm::Value *value, llvm::Type *type, const llvm::Twine &name) { + if (value == NULL) { + AssertPos(currentPos, m->errorCount > 0); + return NULL; + } + +- if (name == NULL) +- name = LLVMGetName(value, "_cast"); +- + // TODO: we should probably handle the array case as in + // e.g. BitCastInst(), but we don't currently need that functionality +- llvm::Instruction *inst = llvm::CastInst::CreateFPCast(value, type, name, bblock); ++ llvm::Instruction *inst = llvm::CastInst::CreateFPCast( ++ value, type, name.isTriviallyEmpty() ? llvm::Twine(value->getName()) + "_cast" : name, bblock); + AddDebugPos(inst); + return inst; + } + +-llvm::Instruction *FunctionEmitContext::SExtInst(llvm::Value *value, llvm::Type *type, const char *name) { ++llvm::Instruction *FunctionEmitContext::SExtInst(llvm::Value *value, llvm::Type *type, const llvm::Twine &name) { + if (value == NULL) { + AssertPos(currentPos, m->errorCount > 0); + return NULL; + } + +- if (name == NULL) +- name = LLVMGetName(value, "_sext"); +- + // TODO: we should probably handle the array case as in + // e.g. BitCastInst(), but we don't currently need that functionality +- llvm::Instruction *inst = new llvm::SExtInst(value, type, name, bblock); ++ llvm::Instruction *inst = new llvm::SExtInst( ++ value, type, name.isTriviallyEmpty() ? llvm::Twine(value->getName()) + "_sext" : name, bblock); + AddDebugPos(inst); + return inst; + } + +-llvm::Instruction *FunctionEmitContext::ZExtInst(llvm::Value *value, llvm::Type *type, const char *name) { ++llvm::Instruction *FunctionEmitContext::ZExtInst(llvm::Value *value, llvm::Type *type, const llvm::Twine &name) { + if (value == NULL) { + AssertPos(currentPos, m->errorCount > 0); + return NULL; + } + +- if (name == NULL) +- name = LLVMGetName(value, "_zext"); +- + // TODO: we should probably handle the array case as in + // e.g. BitCastInst(), but we don't currently need that functionality +- llvm::Instruction *inst = new llvm::ZExtInst(value, type, name, bblock); ++ llvm::Instruction *inst = new llvm::ZExtInst( ++ value, type, name.isTriviallyEmpty() ? llvm::Twine(value->getName()) + "_zext" : name, bblock); + AddDebugPos(inst); + return inst; + } +@@ -1913,14 +1901,15 @@ llvm::Value *FunctionEmitContext::applyVaryingGEP(llvm::Value *basePtr, llvm::Va + scale = SmearUniform(scale); + Assert(index != NULL); + // offset = index * scale +- offset = BinaryOperator(llvm::Instruction::Mul, scale, index, LLVMGetName("mul", scale, index)); ++ offset = BinaryOperator(llvm::Instruction::Mul, scale, index, ++ ((llvm::Twine("mul_") + scale->getName()) + "_") + index->getName()); + } + + // For 64-bit targets, if we've been doing our offset calculations in + // 32 bits, we still have to convert to a 64-bit value before we + // actually add the offset to the pointer. + if (g->target->is32Bit() == false && g->opt.force32BitAddressing == true) +- offset = SExtInst(offset, LLVMTypes::Int64VectorType, LLVMGetName(offset, "_to_64")); ++ offset = SExtInst(offset, LLVMTypes::Int64VectorType, llvm::Twine(offset->getName()) + "_to_64"); + + // Smear out the pointer to be varying; either the base pointer or the + // index must be varying for this method to be called. +@@ -1929,7 +1918,7 @@ llvm::Value *FunctionEmitContext::applyVaryingGEP(llvm::Value *basePtr, llvm::Va + llvm::Value *varyingPtr = baseIsUniform ? SmearUniform(basePtr) : basePtr; + + // newPtr = ptr + offset +- return BinaryOperator(llvm::Instruction::Add, varyingPtr, offset, LLVMGetName(basePtr, "_offset")); ++ return BinaryOperator(llvm::Instruction::Add, varyingPtr, offset, llvm::Twine(basePtr->getName()) + "_offset"); + } + + void FunctionEmitContext::MatchIntegerTypes(llvm::Value **v0, llvm::Value **v1) { +@@ -2006,13 +1995,13 @@ llvm::Value *FunctionEmitContext::MakeSlicePointer(llvm::Value *ptr, llvm::Value + llvm::StructType *st = llvm::StructType::get(*g->ctx, eltTypes); + + llvm::Value *ret = llvm::UndefValue::get(st); +- ret = InsertInst(ret, ptr, 0, LLVMGetName(ret, "_slice_ptr")); +- ret = InsertInst(ret, offset, 1, LLVMGetName(ret, "_slice_offset")); ++ ret = InsertInst(ret, ptr, 0, llvm::Twine(ret->getName()) + "_slice_ptr"); ++ ret = InsertInst(ret, offset, 1, llvm::Twine(ret->getName()) + "_slice_offset"); + return ret; + } + + llvm::Value *FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index, const Type *ptrRefType, +- const char *name) { ++ const llvm::Twine &name) { + if (basePtr == NULL || index == NULL) { + AssertPos(currentPos, m->errorCount > 0); + return NULL; +@@ -2065,8 +2054,8 @@ llvm::Value *FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm:: + // uniform, so just emit the regular LLVM GEP instruction + llvm::Value *ind[1] = {index}; + llvm::ArrayRef arrayRef(&ind[0], &ind[1]); +- llvm::Instruction *inst = +- llvm::GetElementPtrInst::Create(PTYPE(basePtr), basePtr, arrayRef, name ? name : "gep", bblock); ++ llvm::Instruction *inst = llvm::GetElementPtrInst::Create(PTYPE(basePtr), basePtr, arrayRef, ++ name.isTriviallyEmpty() ? "gep" : name, bblock); + AddDebugPos(inst); + return inst; + } else +@@ -2074,7 +2063,7 @@ llvm::Value *FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm:: + } + + llvm::Value *FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index0, llvm::Value *index1, +- const Type *ptrRefType, const char *name) { ++ const Type *ptrRefType, const llvm::Twine &name) { + if (basePtr == NULL || index0 == NULL || index1 == NULL) { + AssertPos(currentPos, m->errorCount > 0); + return NULL; +@@ -2114,8 +2103,8 @@ llvm::Value *FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm:: + // uniform, so just emit the regular LLVM GEP instruction + llvm::Value *indices[2] = {index0, index1}; + llvm::ArrayRef arrayRef(&indices[0], &indices[2]); +- llvm::Instruction *inst = +- llvm::GetElementPtrInst::Create(PTYPE(basePtr), basePtr, arrayRef, name ? name : "gep", bblock); ++ llvm::Instruction *inst = llvm::GetElementPtrInst::Create(PTYPE(basePtr), basePtr, arrayRef, ++ name.isTriviallyEmpty() ? "gep" : name, bblock); + AddDebugPos(inst); + return inst; + } else { +@@ -2138,7 +2127,7 @@ llvm::Value *FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm:: + } + + llvm::Value *FunctionEmitContext::AddElementOffset(llvm::Value *fullBasePtr, int elementNum, const Type *ptrRefType, +- const char *name, const PointerType **resultPtrType) { ++ const llvm::Twine &name, const PointerType **resultPtrType) { + if (resultPtrType != NULL) + AssertPos(currentPos, ptrRefType != NULL); + +@@ -2195,8 +2184,8 @@ llvm::Value *FunctionEmitContext::AddElementOffset(llvm::Value *fullBasePtr, int + // If the pointer is uniform, we can use the regular LLVM GEP. + llvm::Value *offsets[2] = {LLVMInt32(0), LLVMInt32(elementNum)}; + llvm::ArrayRef arrayRef(&offsets[0], &offsets[2]); +- resultPtr = +- llvm::GetElementPtrInst::Create(PTYPE(basePtr), basePtr, arrayRef, name ? name : "struct_offset", bblock); ++ resultPtr = llvm::GetElementPtrInst::Create(PTYPE(basePtr), basePtr, arrayRef, ++ name.isTriviallyEmpty() ? "struct_offset" : name, bblock); + } else { + // Otherwise do the math to find the offset and add it to the given + // varying pointers +@@ -2237,7 +2226,7 @@ llvm::Value *FunctionEmitContext::AddElementOffset(llvm::Value *fullBasePtr, int + return resultPtr; + } + +-llvm::Value *FunctionEmitContext::SwitchBoolSize(llvm::Value *value, llvm::Type *toType, const char *name) { ++llvm::Value *FunctionEmitContext::SwitchBoolSize(llvm::Value *value, llvm::Type *toType, const llvm::Twine &name) { + if ((value == NULL) || (toType == NULL)) { + AssertPos(currentPos, m->errorCount > 0); + return NULL; +@@ -2248,20 +2237,18 @@ llvm::Value *FunctionEmitContext::SwitchBoolSize(llvm::Value *value, llvm::Type + llvm::Value *newBool = value; + if (g->target->getDataLayout()->getTypeSizeInBits(fromType) > + g->target->getDataLayout()->getTypeSizeInBits(toType)) { +- if (name == NULL) +- name = LLVMGetName(value, "_switchBool"); +- newBool = TruncInst(value, toType, name); ++ newBool = ++ TruncInst(value, toType, name.isTriviallyEmpty() ? (llvm::Twine(value->getName()) + "_switchBool") : name); + } else if (g->target->getDataLayout()->getTypeSizeInBits(fromType) < + g->target->getDataLayout()->getTypeSizeInBits(toType)) { +- if (name == NULL) +- name = LLVMGetName(value, "_switchBool"); +- newBool = SExtInst(value, toType, name); ++ newBool = ++ SExtInst(value, toType, name.isTriviallyEmpty() ? (llvm::Twine(value->getName()) + "_switchBool") : name); + } + + return newBool; + } + +-llvm::Value *FunctionEmitContext::LoadInst(llvm::Value *ptr, const Type *type, const char *name) { ++llvm::Value *FunctionEmitContext::LoadInst(llvm::Value *ptr, const Type *type, const llvm::Twine &name) { + if (ptr == NULL) { + AssertPos(currentPos, m->errorCount > 0); + return NULL; +@@ -2270,13 +2257,13 @@ llvm::Value *FunctionEmitContext::LoadInst(llvm::Value *ptr, const Type *type, c + llvm::PointerType *pt = llvm::dyn_cast(ptr->getType()); + AssertPos(currentPos, pt != NULL); + +- if (name == NULL) +- name = LLVMGetName(ptr, "_load"); +- + #if ISPC_LLVM_VERSION >= ISPC_LLVM_11_0 +- llvm::LoadInst *inst = new llvm::LoadInst(pt->getPointerElementType(), ptr, name, bblock); ++ llvm::LoadInst *inst = ++ new llvm::LoadInst(pt->getPointerElementType(), ptr, ++ name.isTriviallyEmpty() ? (llvm::Twine(ptr->getName()) + "_load") : name, bblock); + #else +- llvm::LoadInst *inst = new llvm::LoadInst(ptr, name, bblock); ++ llvm::LoadInst *inst = ++ new llvm::LoadInst(ptr, name.isTriviallyEmpty() ? (llvm::Twine(ptr->getName()) + "_load") : name, bblock); + #endif + + if (g->opt.forceAlignedMemory && llvm::dyn_cast(pt->getElementType())) { +@@ -2307,8 +2294,8 @@ llvm::Value *FunctionEmitContext::LoadInst(llvm::Value *ptr, const Type *type, c + static llvm::Value *lFinalSliceOffset(FunctionEmitContext *ctx, llvm::Value *ptr, const PointerType **ptrType) { + Assert(CastType(*ptrType) != NULL); + +- llvm::Value *slicePtr = ctx->ExtractInst(ptr, 0, LLVMGetName(ptr, "_ptr")); +- llvm::Value *sliceOffset = ctx->ExtractInst(ptr, 1, LLVMGetName(ptr, "_offset")); ++ llvm::Value *slicePtr = ctx->ExtractInst(ptr, 0, llvm::Twine(ptr->getName()) + "_ptr"); ++ llvm::Value *sliceOffset = ctx->ExtractInst(ptr, 1, llvm::Twine(ptr->getName()) + "_offset"); + + // slicePtr should be a pointer to an soa-width wide array of the + // final atomic/enum/pointer type +@@ -2327,14 +2314,14 @@ static llvm::Value *lFinalSliceOffset(FunctionEmitContext *ctx, llvm::Value *ptr + slicePtr = ctx->BitCastInst(slicePtr, (*ptrType)->LLVMType(g->ctx)); + + // And finally index based on the slice offset +- return ctx->GetElementPtrInst(slicePtr, sliceOffset, *ptrType, LLVMGetName(slicePtr, "_final_gep")); ++ return ctx->GetElementPtrInst(slicePtr, sliceOffset, *ptrType, llvm::Twine(slicePtr->getName()) + "_final_gep"); + } + + /** Utility routine that loads from a uniform pointer to soa<> data, + returning a regular uniform (non-SOA result). + */ + llvm::Value *FunctionEmitContext::loadUniformFromSOA(llvm::Value *ptr, llvm::Value *mask, const PointerType *ptrType, +- const char *name) { ++ const llvm::Twine &name) { + const Type *unifType = ptrType->GetBaseType()->GetAsUniformType(); + + const CollectionType *ct = CastType(ptrType->GetBaseType()); +@@ -2363,7 +2350,7 @@ llvm::Value *FunctionEmitContext::loadUniformFromSOA(llvm::Value *ptr, llvm::Val + } + + llvm::Value *FunctionEmitContext::LoadInst(llvm::Value *ptr, llvm::Value *mask, const Type *ptrRefType, +- const char *name, bool one_elem) { ++ const llvm::Twine &name, bool one_elem) { + if (ptr == NULL) { + AssertPos(currentPos, m->errorCount > 0); + return NULL; +@@ -2371,9 +2358,6 @@ llvm::Value *FunctionEmitContext::LoadInst(llvm::Value *ptr, llvm::Value *mask, + + AssertPos(currentPos, ptrRefType != NULL && mask != NULL); + +- if (name == NULL) +- name = LLVMGetName(ptr, "_load"); +- + const PointerType *ptrType; + const Type *elType; + if (CastType(ptrRefType) != NULL) { +@@ -2393,7 +2377,8 @@ llvm::Value *FunctionEmitContext::LoadInst(llvm::Value *ptr, llvm::Value *mask, + + if (ptrType->IsUniformType()) { + if (ptrType->IsSlice()) { +- return loadUniformFromSOA(ptr, mask, ptrType, name); ++ return loadUniformFromSOA(ptr, mask, ptrType, ++ name.isTriviallyEmpty() ? (llvm::Twine(ptr->getName()) + "_load") : name); + } else { + // FIXME: same issue as above load inst regarding alignment... + // +@@ -2405,11 +2390,15 @@ llvm::Value *FunctionEmitContext::LoadInst(llvm::Value *ptr, llvm::Value *mask, + const AtomicType *atomicType = CastType(ptrType->GetBaseType()); + + #if ISPC_LLVM_VERSION <= ISPC_LLVM_10_0 +- llvm::LoadInst *inst = new llvm::LoadInst(ptr, name, false /* not volatile */, bblock); ++ llvm::LoadInst *inst = ++ new llvm::LoadInst(ptr, name.isTriviallyEmpty() ? (llvm::Twine(ptr->getName()) + "_load") : name, ++ false /* not volatile */, bblock); + #else // LLVM 11.0+ + llvm::PointerType *ptr_type = llvm::dyn_cast(ptr->getType()); + llvm::LoadInst *inst = +- new llvm::LoadInst(ptr_type->getPointerElementType(), ptr, name, false /* not volatile */, bblock); ++ new llvm::LoadInst(ptr_type->getPointerElementType(), ptr, ++ name.isTriviallyEmpty() ? (llvm::Twine(ptr->getName()) + "_load") : name, ++ false /* not volatile */, bblock); + #endif + + if (atomicType != NULL && atomicType->IsVaryingType()) { +@@ -2433,7 +2422,8 @@ llvm::Value *FunctionEmitContext::LoadInst(llvm::Value *ptr, llvm::Value *mask, + } else { + // Otherwise we should have a varying ptr and it's time for a + // gather. +- llvm::Value *gather_result = gather(ptr, ptrType, GetFullMask(), name); ++ llvm::Value *gather_result = gather(ptr, ptrType, GetFullMask(), ++ name.isTriviallyEmpty() ? (llvm::Twine(ptr->getName()) + "_load") : name); + if (!one_elem) + return gather_result; + +@@ -2453,19 +2443,19 @@ llvm::Value *FunctionEmitContext::LoadInst(llvm::Value *ptr, llvm::Value *mask, + // We can actually call either one, since both are i32s as far as + // LLVM's type system is concerned... + llvm::Function *fmm = mm[0]->function; +- llvm::Value *int_mask = CallInst(fmm, NULL, mask, LLVMGetName(mask, "_movmsk")); ++ llvm::Value *int_mask = CallInst(fmm, NULL, mask, llvm::Twine(mask->getName()) + "_movmsk"); + std::vector lz; + m->symbolTable->LookupFunction("__count_trailing_zeros_i64", &lz); + llvm::Function *flz = lz[0]->function; +- llvm::Value *elem_idx = CallInst(flz, NULL, int_mask, LLVMGetName(mask, "_clz")); +- llvm::Value *elem = llvm::ExtractElementInst::Create(gather_result, elem_idx, +- LLVMGetName(gather_result, "_umasked_elem"), bblock); ++ llvm::Value *elem_idx = CallInst(flz, NULL, int_mask, llvm::Twine(mask->getName()) + "_clz"); ++ llvm::Value *elem = llvm::ExtractElementInst::Create( ++ gather_result, elem_idx, llvm::Twine(gather_result->getName()) + "_umasked_elem", bblock); + return elem; + } + } + + llvm::Value *FunctionEmitContext::gather(llvm::Value *ptr, const PointerType *ptrType, llvm::Value *mask, +- const char *name) { ++ const llvm::Twine &name) { + // We should have a varying pointer if we get here... + AssertPos(currentPos, ptrType->IsVaryingType()); + +@@ -2602,7 +2592,8 @@ void FunctionEmitContext::addGSMetadata(llvm::Value *v, SourcePos pos) { + inst->setMetadata("last_column", md); + } + +-llvm::Value *FunctionEmitContext::AllocaInst(llvm::Type *llvmType, const char *name, int align, bool atEntryBlock) { ++llvm::Value *FunctionEmitContext::AllocaInst(llvm::Type *llvmType, const llvm::Twine &name, int align, ++ bool atEntryBlock) { + if (llvmType == NULL) { + AssertPos(currentPos, m->errorCount > 0); + return NULL; +@@ -2615,12 +2606,12 @@ llvm::Value *FunctionEmitContext::AllocaInst(llvm::Type *llvmType, const char *n + llvm::Instruction *retInst = allocaBlock->getTerminator(); + AssertPos(currentPos, retInst); + unsigned AS = llvmFunction->getParent()->getDataLayout().getAllocaAddrSpace(); +- inst = new llvm::AllocaInst(llvmType, AS, name ? name : "", retInst); ++ inst = new llvm::AllocaInst(llvmType, AS, name, retInst); + } else { + // Unless the caller overrode the default and wants it in the + // current basic block + unsigned AS = llvmFunction->getParent()->getDataLayout().getAllocaAddrSpace(); +- inst = new llvm::AllocaInst(llvmType, AS, name ? name : "", bblock); ++ inst = new llvm::AllocaInst(llvmType, AS, name, bblock); + } + + // If no alignment was specified but we have an array of a uniform +@@ -2639,7 +2630,8 @@ llvm::Value *FunctionEmitContext::AllocaInst(llvm::Type *llvmType, const char *n + return inst; + } + +-llvm::Value *FunctionEmitContext::AllocaInst(const Type *ptrType, const char *name, int align, bool atEntryBlock) { ++llvm::Value *FunctionEmitContext::AllocaInst(const Type *ptrType, const llvm::Twine &name, int align, ++ bool atEntryBlock) { + if (ptrType == NULL) { + AssertPos(currentPos, m->errorCount > 0); + return NULL; +@@ -3075,66 +3067,59 @@ llvm::Instruction *FunctionEmitContext::BranchInst(llvm::BasicBlock *trueBlock, + return b; + } + +-llvm::Value *FunctionEmitContext::ExtractInst(llvm::Value *v, int elt, const char *name) { ++llvm::Value *FunctionEmitContext::ExtractInst(llvm::Value *v, int elt, const llvm::Twine &name) { + if (v == NULL) { + AssertPos(currentPos, m->errorCount > 0); + return NULL; + } + +- if (name == NULL) { +- char buf[32]; +- snprintf(buf, sizeof(buf), "_extract_%d", elt); +- name = LLVMGetName(v, buf); +- } + llvm::Instruction *ei = NULL; + if (llvm::isa(v->getType())) +- ei = llvm::ExtractElementInst::Create(v, LLVMInt32(elt), name, bblock); ++ ei = llvm::ExtractElementInst::Create( ++ v, LLVMInt32(elt), ++ name.isTriviallyEmpty() ? ((llvm::Twine(v->getName()) + "_extract_") + llvm::Twine(elt)) : name, bblock); + else +- ei = llvm::ExtractValueInst::Create(v, elt, name, bblock); ++ ei = llvm::ExtractValueInst::Create( ++ v, elt, name.isTriviallyEmpty() ? ((llvm::Twine(v->getName()) + "_extract_") + llvm::Twine(elt)) : name, ++ bblock); + AddDebugPos(ei); + return ei; + } + +-llvm::Value *FunctionEmitContext::InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt, const char *name) { ++llvm::Value *FunctionEmitContext::InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt, const llvm::Twine &name) { + if (v == NULL || eltVal == NULL) { + AssertPos(currentPos, m->errorCount > 0); + return NULL; + } + +- if (name == NULL) { +- char buf[32]; +- snprintf(buf, sizeof(buf), "_insert_%d", elt); +- name = LLVMGetName(v, buf); +- } +- + llvm::Instruction *ii = NULL; + if (llvm::isa(v->getType())) +- ii = llvm::InsertElementInst::Create(v, eltVal, LLVMInt32(elt), name, bblock); ++ ii = llvm::InsertElementInst::Create( ++ v, eltVal, LLVMInt32(elt), ++ name.isTriviallyEmpty() ? ((llvm::Twine(v->getName()) + "_insert_") + llvm::Twine(elt)) : name, bblock); + else +- ii = llvm::InsertValueInst::Create(v, eltVal, elt, name, bblock); ++ ii = llvm::InsertValueInst::Create( ++ v, eltVal, elt, ++ name.isTriviallyEmpty() ? ((llvm::Twine(v->getName()) + "_insert_") + llvm::Twine(elt)) : name, bblock); + AddDebugPos(ii); + return ii; + } + +-llvm::Value *FunctionEmitContext::ShuffleInst(llvm::Value *v1, llvm::Value *v2, llvm::Value *mask, const char *name) { ++llvm::Value *FunctionEmitContext::ShuffleInst(llvm::Value *v1, llvm::Value *v2, llvm::Value *mask, ++ const llvm::Twine &name) { + if (v1 == NULL || v2 == NULL || mask == NULL) { + AssertPos(currentPos, m->errorCount > 0); + return NULL; + } + +- if (name == NULL) { +- char buf[32]; +- snprintf(buf, sizeof(buf), "_shuffle"); +- name = LLVMGetName(v1, buf); +- } +- +- llvm::Instruction *ii = new llvm::ShuffleVectorInst(v1, v2, mask, name, bblock); ++ llvm::Instruction *ii = new llvm::ShuffleVectorInst( ++ v1, v2, mask, name.isTriviallyEmpty() ? (llvm::Twine(v1->getName()) + "_shuffle") : name, bblock); + + AddDebugPos(ii); + return ii; + } + +-llvm::Value *FunctionEmitContext::BroadcastValue(llvm::Value *v, llvm::Type *vecType, const char *name) { ++llvm::Value *FunctionEmitContext::BroadcastValue(llvm::Value *v, llvm::Type *vecType, const llvm::Twine &name) { + if (v == NULL || vecType == NULL) { + AssertPos(currentPos, m->errorCount > 0); + return NULL; +@@ -3148,12 +3133,6 @@ llvm::Value *FunctionEmitContext::BroadcastValue(llvm::Value *v, llvm::Type *vec + Assert(ty && ty->getVectorElementType() == v->getType()); + #endif + +- if (name == NULL) { +- char buf[32]; +- snprintf(buf, sizeof(buf), "_broadcast"); +- name = LLVMGetName(v, buf); +- } +- + // Generate the following sequence: + // %name_init.i = insertelement <4 x i32> undef, i32 %val, i32 0 + // %name.i = shufflevector <4 x i32> %name_init.i, <4 x i32> undef, +@@ -3163,7 +3142,7 @@ llvm::Value *FunctionEmitContext::BroadcastValue(llvm::Value *v, llvm::Type *vec + llvm::Value *undef2 = llvm::UndefValue::get(vecType); + + // InsertElement +- llvm::Twine tw = llvm::Twine(name) + llvm::Twine("_init"); ++ llvm::Twine tw = name.isTriviallyEmpty() ? (llvm::Twine(v->getName()) + "_broadcast") : name + llvm::Twine("_init"); + llvm::Value *insert = InsertInst(undef1, v, 0, tw.str().c_str()); + + // ShuffleVector +@@ -3179,28 +3158,27 @@ llvm::Value *FunctionEmitContext::BroadcastValue(llvm::Value *v, llvm::Type *vec + llvm::ConstantVector::getSplat(llvm::ElementCount::get(static_cast(ty->getNumElements()), false), + llvm::Constant::getNullValue(llvm::Type::getInt32Ty(*g->ctx))); + #endif +- llvm::Value *ret = ShuffleInst(insert, undef2, zeroVec, name); ++ llvm::Value *ret = ShuffleInst(insert, undef2, zeroVec, ++ name.isTriviallyEmpty() ? (llvm::Twine(v->getName()) + "_broadcast") : name); + + return ret; + } + +-llvm::PHINode *FunctionEmitContext::PhiNode(llvm::Type *type, int count, const char *name) { +- llvm::PHINode *pn = llvm::PHINode::Create(type, count, name ? name : "phi", bblock); ++llvm::PHINode *FunctionEmitContext::PhiNode(llvm::Type *type, int count, const llvm::Twine &name) { ++ llvm::PHINode *pn = llvm::PHINode::Create(type, count, name.isTriviallyEmpty() ? "phi" : name, bblock); + AddDebugPos(pn); + return pn; + } + + llvm::Instruction *FunctionEmitContext::SelectInst(llvm::Value *test, llvm::Value *val0, llvm::Value *val1, +- const char *name) { ++ const llvm::Twine &name) { + if (test == NULL || val0 == NULL || val1 == NULL) { + AssertPos(currentPos, m->errorCount > 0); + return NULL; + } + +- if (name == NULL) +- name = LLVMGetName(test, "_select"); +- +- llvm::Instruction *inst = llvm::SelectInst::Create(test, val0, val1, name, bblock); ++ llvm::Instruction *inst = llvm::SelectInst::Create( ++ test, val0, val1, name.isTriviallyEmpty() ? (llvm::Twine(test->getName()) + "_select") : name, bblock); + AddDebugPos(inst); + return inst; + } +@@ -3226,7 +3204,7 @@ static unsigned int lCalleeArgCount(llvm::Value *callee, const FunctionType *fun + } + + llvm::Value *FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType, +- const std::vector &args, const char *name) { ++ const std::vector &args, const llvm::Twine &name) { + if (func == NULL) { + AssertPos(currentPos, m->errorCount > 0); + return NULL; +@@ -3258,9 +3236,9 @@ llvm::Value *FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType + #if ISPC_LLVM_VERSION >= ISPC_LLVM_11_0 + llvm::PointerType *func_ptr_type = llvm::dyn_cast(func->getType()); + llvm::FunctionType *func_type = llvm::dyn_cast(func_ptr_type->getPointerElementType()); +- llvm::CallInst *callinst = llvm::CallInst::Create(func_type, func, argVals, name ? name : "", bblock); ++ llvm::CallInst *callinst = llvm::CallInst::Create(func_type, func, argVals, name, bblock); + #else +- llvm::CallInst *callinst = llvm::CallInst::Create(func, argVals, name ? name : "", bblock); ++ llvm::CallInst *callinst = llvm::CallInst::Create(func, argVals, name, bblock); + #endif + + // We could be dealing with a function pointer in which case this will not be a 'llvm::Function'. +@@ -3464,14 +3442,14 @@ llvm::Value *FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType + } + + llvm::Value *FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType, llvm::Value *arg, +- const char *name) { ++ const llvm::Twine &name) { + std::vector args; + args.push_back(arg); + return CallInst(func, funcType, args, name); + } + + llvm::Value *FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType, llvm::Value *arg0, +- llvm::Value *arg1, const char *name) { ++ llvm::Value *arg1, const llvm::Twine &name) { + std::vector args; + args.push_back(arg0); + args.push_back(arg1); +diff --git a/src/ctx.h b/src/ctx.h +index 6e5ad74ba..48b3bb826 100644 +--- a/src/ctx.h ++++ b/src/ctx.h +@@ -302,7 +302,7 @@ class FunctionEmitContext { + llvm::Value *GetStringPtr(const std::string &str); + + /** Create a new basic block with given name */ +- llvm::BasicBlock *CreateBasicBlock(const char *name, llvm::BasicBlock *insertAfter = NULL); ++ llvm::BasicBlock *CreateBasicBlock(const llvm::Twine &name, llvm::BasicBlock *insertAfter = NULL); + + /** Given a vector with element type i1, return a vector of type + LLVMTypes::BoolVectorType. This method handles the conversion for +@@ -380,33 +380,33 @@ class FunctionEmitContext { + this also handles applying the given operation to the vector + elements. */ + llvm::Value *BinaryOperator(llvm::Instruction::BinaryOps inst, llvm::Value *v0, llvm::Value *v1, +- const char *name = NULL); ++ const llvm::Twine &name = ""); + + /** Emit the "not" operator. Like BinaryOperator(), this also handles + a VectorType-based operand. */ +- llvm::Value *NotOperator(llvm::Value *v, const char *name = NULL); ++ llvm::Value *NotOperator(llvm::Value *v, const llvm::Twine &name = ""); + + /** Emit a comparison instruction. If the operands are VectorTypes, + then a value for the corresponding boolean VectorType is + returned. */ + llvm::Value *CmpInst(llvm::Instruction::OtherOps inst, llvm::CmpInst::Predicate pred, llvm::Value *v0, +- llvm::Value *v1, const char *name = NULL); ++ llvm::Value *v1, const llvm::Twine &name = ""); + + /** Given a scalar value, return a vector of the same type (or an + array, for pointer types). */ +- llvm::Value *SmearUniform(llvm::Value *value, const char *name = NULL); ++ llvm::Value *SmearUniform(llvm::Value *value, const llvm::Twine &name = ""); + +- llvm::Value *BitCastInst(llvm::Value *value, llvm::Type *type, const char *name = NULL); +- llvm::Value *PtrToIntInst(llvm::Value *value, const char *name = NULL); +- llvm::Value *PtrToIntInst(llvm::Value *value, llvm::Type *type, const char *name = NULL); +- llvm::Value *IntToPtrInst(llvm::Value *value, llvm::Type *type, const char *name = NULL); ++ llvm::Value *BitCastInst(llvm::Value *value, llvm::Type *type, const llvm::Twine &name = ""); ++ llvm::Value *PtrToIntInst(llvm::Value *value, const llvm::Twine &name = ""); ++ llvm::Value *PtrToIntInst(llvm::Value *value, llvm::Type *type, const llvm::Twine &name = ""); ++ llvm::Value *IntToPtrInst(llvm::Value *value, llvm::Type *type, const llvm::Twine &name = ""); + +- llvm::Instruction *TruncInst(llvm::Value *value, llvm::Type *type, const char *name = NULL); ++ llvm::Instruction *TruncInst(llvm::Value *value, llvm::Type *type, const llvm::Twine &name = ""); + llvm::Instruction *CastInst(llvm::Instruction::CastOps op, llvm::Value *value, llvm::Type *type, +- const char *name = NULL); +- llvm::Instruction *FPCastInst(llvm::Value *value, llvm::Type *type, const char *name = NULL); +- llvm::Instruction *SExtInst(llvm::Value *value, llvm::Type *type, const char *name = NULL); +- llvm::Instruction *ZExtInst(llvm::Value *value, llvm::Type *type, const char *name = NULL); ++ const llvm::Twine &name = ""); ++ llvm::Instruction *FPCastInst(llvm::Value *value, llvm::Type *type, const llvm::Twine &name = ""); ++ llvm::Instruction *SExtInst(llvm::Value *value, llvm::Type *type, const llvm::Twine &name = ""); ++ llvm::Instruction *ZExtInst(llvm::Value *value, llvm::Type *type, const llvm::Twine &name = ""); + + /** Given two integer-typed values (but possibly one vector and the + other not, and or of possibly-different bit-widths), update their +@@ -426,9 +426,9 @@ class FunctionEmitContext { + pointers. The underlying type of the base pointer must be provided + via the ptrType parameter */ + llvm::Value *GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index, const Type *ptrType, +- const char *name = NULL); ++ const llvm::Twine &name = ""); + llvm::Value *GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index0, llvm::Value *index1, const Type *ptrType, +- const char *name = NULL); ++ const llvm::Twine &name = ""); + + /** This method returns a new pointer that represents offsetting the + given base pointer to point at the given element number of the +@@ -436,26 +436,26 @@ class FunctionEmitContext { + pointer must be a pointer to a structure type. The ptrType gives + the type of the pointer, though it may be NULL if the base pointer + is uniform. */ +- llvm::Value *AddElementOffset(llvm::Value *basePtr, int elementNum, const Type *ptrType, const char *name = NULL, +- const PointerType **resultPtrType = NULL); ++ llvm::Value *AddElementOffset(llvm::Value *basePtr, int elementNum, const Type *ptrType, ++ const llvm::Twine &name = "", const PointerType **resultPtrType = NULL); + + /** Bool is stored as i8 and but represented in IR as i1 and + * . This is a helper function to match bool size at storage + * interface. */ +- llvm::Value *SwitchBoolSize(llvm::Value *value, llvm::Type *toType, const char *name = NULL); ++ llvm::Value *SwitchBoolSize(llvm::Value *value, llvm::Type *toType, const llvm::Twine &name = ""); + /** Load from the memory location(s) given by lvalue, using the given + mask. The lvalue may be varying, in which case this corresponds to + a gather from the multiple memory locations given by the array of + pointer values given by the lvalue. If the lvalue is not varying, + then both the mask pointer and the type pointer may be NULL. */ +- llvm::Value *LoadInst(llvm::Value *ptr, llvm::Value *mask, const Type *ptrType, const char *name = NULL, ++ llvm::Value *LoadInst(llvm::Value *ptr, llvm::Value *mask, const Type *ptrType, const llvm::Twine &name = "", + bool one_elem = false); + + /* Load from memory location(s) given. + * 'type' needs to be provided when storage type is different from IR type. For example, + * 'unform bool' is 'i1' in IR but stored as 'i8'. + * Otherwise leave this as NULL. */ +- llvm::Value *LoadInst(llvm::Value *ptr, const Type *type = NULL, const char *name = NULL); ++ llvm::Value *LoadInst(llvm::Value *ptr, const Type *type = NULL, const llvm::Twine &name = ""); + + /** Emits an alloca instruction to allocate stack storage for the given + type. If a non-zero alignment is specified, the object is also +@@ -463,7 +463,8 @@ class FunctionEmitContext { + instruction is added at the start of the function in the entry + basic block; if it should be added to the current basic block, then + the atEntryBlock parameter should be false. */ +- llvm::Value *AllocaInst(llvm::Type *llvmType, const char *name = NULL, int align = 0, bool atEntryBlock = true); ++ llvm::Value *AllocaInst(llvm::Type *llvmType, const llvm::Twine &name = "", int align = 0, ++ bool atEntryBlock = true); + + /** Emits an alloca instruction to allocate stack storage for the given + type. If a non-zero alignment is specified, the object is also +@@ -474,7 +475,7 @@ class FunctionEmitContext { + This implementation is preferred when possible. It is needed when + storage type is different from IR type. For example, + 'unform bool' is 'i1' in IR but stored as 'i8'. */ +- llvm::Value *AllocaInst(const Type *ptrType, const char *name = NULL, int align = 0, bool atEntryBlock = true); ++ llvm::Value *AllocaInst(const Type *ptrType, const llvm::Twine &name = "", int align = 0, bool atEntryBlock = true); + + /** Standard store instruction; for this variant, the lvalue must be a + single pointer, not a varying lvalue. +@@ -502,39 +503,41 @@ class FunctionEmitContext { + /** This convenience method maps to an llvm::ExtractElementInst if the + given value is a llvm::VectorType, and to an llvm::ExtractValueInst + otherwise. */ +- llvm::Value *ExtractInst(llvm::Value *v, int elt, const char *name = NULL); ++ llvm::Value *ExtractInst(llvm::Value *v, int elt, const llvm::Twine &name = ""); + + /** This convenience method maps to an llvm::InsertElementInst if the + given value is a llvm::VectorType, and to an llvm::InsertValueInst + otherwise. */ +- llvm::Value *InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt, const char *name = NULL); ++ llvm::Value *InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt, const llvm::Twine &name = ""); + + /** This convenience method maps to an llvm::ShuffleVectorInst. */ +- llvm::Value *ShuffleInst(llvm::Value *v1, llvm::Value *v2, llvm::Value *mask, const char *name = NULL); ++ llvm::Value *ShuffleInst(llvm::Value *v1, llvm::Value *v2, llvm::Value *mask, const llvm::Twine &name = ""); + + /** This convenience method to generate broadcast pattern. It takes a value + and a vector type. Type of the value must match element type of the + vector. */ +- llvm::Value *BroadcastValue(llvm::Value *v, llvm::Type *vecType, const char *name = NULL); ++ llvm::Value *BroadcastValue(llvm::Value *v, llvm::Type *vecType, const llvm::Twine &name = ""); + +- llvm::PHINode *PhiNode(llvm::Type *type, int count, const char *name = NULL); +- llvm::Instruction *SelectInst(llvm::Value *test, llvm::Value *val0, llvm::Value *val1, const char *name = NULL); ++ llvm::PHINode *PhiNode(llvm::Type *type, int count, const llvm::Twine &name = ""); ++ llvm::Instruction *SelectInst(llvm::Value *test, llvm::Value *val0, llvm::Value *val1, ++ const llvm::Twine &name = ""); + + /** Emits IR to do a function call with the given arguments. If the + function type is a varying function pointer type, its full type + must be provided in funcType. funcType can be NULL if func is a + uniform function pointer. */ + llvm::Value *CallInst(llvm::Value *func, const FunctionType *funcType, const std::vector &args, +- const char *name = NULL); ++ const llvm::Twine &name = ""); + + /** This is a convenience method that issues a call instruction to a + function that takes just a single argument. */ +- llvm::Value *CallInst(llvm::Value *func, const FunctionType *funcType, llvm::Value *arg, const char *name = NULL); ++ llvm::Value *CallInst(llvm::Value *func, const FunctionType *funcType, llvm::Value *arg, ++ const llvm::Twine &name = ""); + + /** This is a convenience method that issues a call instruction to a + function that takes two arguments. */ + llvm::Value *CallInst(llvm::Value *func, const FunctionType *funcType, llvm::Value *arg0, llvm::Value *arg1, +- const char *name = NULL); ++ const llvm::Twine &name = ""); + + /** Launch an asynchronous task to run the given function, passing it + he given argument values. */ +@@ -756,9 +759,10 @@ class FunctionEmitContext { + void maskedStore(llvm::Value *value, llvm::Value *ptr, const Type *ptrType, llvm::Value *mask); + void storeUniformToSOA(llvm::Value *value, llvm::Value *ptr, llvm::Value *mask, const Type *valueType, + const PointerType *ptrType); +- llvm::Value *loadUniformFromSOA(llvm::Value *ptr, llvm::Value *mask, const PointerType *ptrType, const char *name); ++ llvm::Value *loadUniformFromSOA(llvm::Value *ptr, llvm::Value *mask, const PointerType *ptrType, ++ const llvm::Twine &name = ""); + +- llvm::Value *gather(llvm::Value *ptr, const PointerType *ptrType, llvm::Value *mask, const char *name); ++ llvm::Value *gather(llvm::Value *ptr, const PointerType *ptrType, llvm::Value *mask, const llvm::Twine &name = ""); + + llvm::Value *addVaryingOffsetsIfNeeded(llvm::Value *ptr, const Type *ptrType); + }; +diff --git a/src/expr.cpp b/src/expr.cpp +index 80cc3020a..6d38d1889 100644 +--- a/src/expr.cpp ++++ b/src/expr.cpp +@@ -1021,11 +1021,11 @@ static llvm::Value *lEmitNegate(Expr *arg, SourcePos pos, FunctionEmitContext *c + ctx->SetDebugPos(pos); + if (type->IsFloatType()) { + llvm::Value *zero = llvm::ConstantFP::getZeroValueForNegation(type->LLVMType(g->ctx)); +- return ctx->BinaryOperator(llvm::Instruction::FSub, zero, argVal, LLVMGetName(argVal, "_negate")); ++ return ctx->BinaryOperator(llvm::Instruction::FSub, zero, argVal, llvm::Twine(argVal->getName()) + "_negate"); + } else { + llvm::Value *zero = lLLVMConstantValue(type, g->ctx, 0.); + AssertPos(pos, type->IsIntType()); +- return ctx->BinaryOperator(llvm::Instruction::Sub, zero, argVal, LLVMGetName(argVal, "_negate")); ++ return ctx->BinaryOperator(llvm::Instruction::Sub, zero, argVal, llvm::Twine(argVal->getName()) + "_negate"); + } + } + +@@ -1047,11 +1047,11 @@ llvm::Value *UnaryExpr::GetValue(FunctionEmitContext *ctx) const { + return lEmitNegate(expr, pos, ctx); + case LogicalNot: { + llvm::Value *argVal = expr->GetValue(ctx); +- return ctx->NotOperator(argVal, LLVMGetName(argVal, "_logicalnot")); ++ return ctx->NotOperator(argVal, llvm::Twine(argVal->getName()) + "_logicalnot"); + } + case BitNot: { + llvm::Value *argVal = expr->GetValue(ctx); +- return ctx->NotOperator(argVal, LLVMGetName(argVal, "_bitnot")); ++ return ctx->NotOperator(argVal, llvm::Twine(argVal->getName()) + "_bitnot"); + } + default: + FATAL("logic error"); +@@ -1518,7 +1518,8 @@ static llvm::Value *lEmitBinaryArith(BinaryExpr::Op op, llvm::Value *value0, llv + return NULL; + } + +- return ctx->BinaryOperator(inst, value0, value1, LLVMGetName(opName, value0, value1)); ++ return ctx->BinaryOperator(inst, value0, value1, ++ (((llvm::Twine(opName) + "_") + value0->getName()) + "_") + value1->getName()); + } + } + +@@ -1563,7 +1564,7 @@ static llvm::Value *lEmitBinaryCmp(BinaryExpr::Op op, llvm::Value *e0Val, llvm:: + } + + llvm::Value *cmp = ctx->CmpInst(isFloatOp ? llvm::Instruction::FCmp : llvm::Instruction::ICmp, pred, e0Val, e1Val, +- LLVMGetName(opName, e0Val, e1Val)); ++ (((llvm::Twine(opName) + "_") + e0Val->getName()) + "_") + e1Val->getName()); + // This is a little ugly: CmpInst returns i1 values, but we use vectors + // of i32s for varying bool values; type convert the result here if + // needed. +@@ -4177,7 +4178,7 @@ static llvm::Value *lConvertToSlicePointer(FunctionEmitContext *ctx, llvm::Value + // offsets + llvm::Value *result = llvm::Constant::getNullValue(sliceStructType); + // And replace the pointer in the struct with the given pointer +- return ctx->InsertInst(result, ptr, 0, LLVMGetName(ptr, "_slice")); ++ return ctx->InsertInst(result, ptr, 0, llvm::Twine(ptr->getName()) + "_slice"); + } + + /** If the given array index is a compile time constant, check to see if it +@@ -4258,8 +4259,8 @@ llvm::Value *IndexExpr::GetLValue(FunctionEmitContext *ctx) const { + // Convert to a slice pointer if we're indexing into SOA data + basePtrValue = lConvertPtrToSliceIfNeeded(ctx, basePtrValue, &baseExprType); + +- llvm::Value *ptr = +- ctx->GetElementPtrInst(basePtrValue, indexValue, baseExprType, LLVMGetName(basePtrValue, "_offset")); ++ llvm::Value *ptr = ctx->GetElementPtrInst(basePtrValue, indexValue, baseExprType, ++ llvm::Twine(basePtrValue->getName()) + "_offset"); + return lAddVaryingOffsetsIfNeeded(ctx, ptr, GetLValueType()); + } + +@@ -4290,8 +4291,8 @@ llvm::Value *IndexExpr::GetLValue(FunctionEmitContext *ctx) const { + ctx->SetDebugPos(pos); + + // And do the actual indexing calculation.. +- llvm::Value *ptr = +- ctx->GetElementPtrInst(basePtr, LLVMInt32(0), indexValue, basePtrType, LLVMGetName(basePtr, "_offset")); ++ llvm::Value *ptr = ctx->GetElementPtrInst(basePtr, LLVMInt32(0), indexValue, basePtrType, ++ llvm::Twine(basePtr->getName()) + "_offset"); + return lAddVaryingOffsetsIfNeeded(ctx, ptr, GetLValueType()); + } + +@@ -4788,15 +4789,14 @@ llvm::Value *VectorMemberExpr::GetValue(FunctionEmitContext *ctx) const { + for (size_t i = 0; i < identifier.size(); ++i) { + char idStr[2] = {identifier[i], '\0'}; + llvm::Value *elementPtr = +- ctx->AddElementOffset(basePtr, indices[i], basePtrType, LLVMGetName(basePtr, idStr)); ++ ctx->AddElementOffset(basePtr, indices[i], basePtrType, llvm::Twine(basePtr->getName()) + idStr); + llvm::Value *elementValue = ctx->LoadInst(elementPtr, elementMask, elementPtrType); + +- const char *resultName = LLVMGetName(resultPtr, idStr); +- llvm::Value *ptmp = ctx->AddElementOffset(resultPtr, i, NULL, resultName); ++ llvm::Value *ptmp = ctx->AddElementOffset(resultPtr, i, NULL, llvm::Twine(resultPtr->getName()) + idStr); + ctx->StoreInst(elementValue, ptmp, elementPtrType, expr->GetType()->IsUniformType()); + } + +- return ctx->LoadInst(resultPtr, memberType, LLVMGetName(basePtr, "_swizzle")); ++ return ctx->LoadInst(resultPtr, memberType, llvm::Twine(basePtr->getName()) + "_swizzle"); + } + } + +@@ -4932,7 +4932,7 @@ llvm::Value *MemberExpr::GetValue(FunctionEmitContext *ctx) const { + + ctx->SetDebugPos(pos); + std::string suffix = std::string("_") + identifier; +- return ctx->LoadInst(lvalue, mask, lvalueType, LLVMGetName(lvalue, suffix.c_str())); ++ return ctx->LoadInst(lvalue, mask, lvalueType, llvm::Twine(lvalue->getName()) + suffix); + } + + const Type *MemberExpr::GetType() const { return NULL; } +@@ -6502,7 +6502,7 @@ static llvm::Value *lTypeConvAtomic(FunctionEmitContext *ctx, llvm::Value *exprV + // does for everyone else... + Assert(cast); + cast = ctx->SwitchBoolSize(cast, LLVMTypes::BoolVectorType->getElementType(), +- LLVMGetName(cast, "to_i_bool")); ++ llvm::Twine(cast->getName()) + "to_i_bool"); + } + } else { + // fromType->IsVaryingType()) +@@ -8246,7 +8246,7 @@ llvm::Value *NewExpr::GetValue(FunctionEmitContext *ctx) const { + // pointer of the return type and to run the code for initializers, + // if present. + llvm::Type *ptrType = retType->LLVMType(g->ctx); +- ptrValue = ctx->BitCastInst(ptrValue, ptrType, LLVMGetName(ptrValue, "_cast_ptr")); ++ ptrValue = ctx->BitCastInst(ptrValue, ptrType, llvm::Twine(ptrValue->getName()) + "_cast_ptr"); + + if (initExpr != NULL) + InitSymbol(ptrValue, allocType, initExpr, ctx, pos); +diff --git a/src/llvmutil.cpp b/src/llvmutil.cpp +index 970175dbf..d0becb9f5 100644 +--- a/src/llvmutil.cpp ++++ b/src/llvmutil.cpp +@@ -1605,23 +1605,6 @@ llvm::Value *LLVMShuffleVectors(llvm::Value *v1, llvm::Value *v2, int32_t shuf[] + return new llvm::ShuffleVectorInst(v1, v2, vec, "shuffle", insertBefore); + } + +-const char *LLVMGetName(llvm::Value *v, const char *s) { +- if (v == NULL) +- return s; +- std::string ret = std::string(v->getName()); +- ret += s; +- return strdup(ret.c_str()); +-} +- +-const char *LLVMGetName(const char *op, llvm::Value *v1, llvm::Value *v2) { +- std::string r = op; +- r += "_"; +- r += v1->getName().str(); +- r += "_"; +- r += v2->getName().str(); +- return strdup(r.c_str()); +-} +- + #ifdef ISPC_GENX_ENABLED + bool lIsSVMLoad(llvm::Instruction *inst) { + Assert(inst); +diff --git a/src/llvmutil.h b/src/llvmutil.h +index 42cce4d83..07d089bb4 100644 +--- a/src/llvmutil.h ++++ b/src/llvmutil.h +@@ -328,11 +328,6 @@ extern llvm::Value *LLVMConcatVectors(llvm::Value *v1, llvm::Value *v2, llvm::In + extern llvm::Value *LLVMShuffleVectors(llvm::Value *v1, llvm::Value *v2, int32_t shuf[], int shufSize, + llvm::Instruction *insertBefore); + +-/** Utility routines to concat strings with the names of existing values to +- create meaningful new names for instruction values. +-*/ +-extern const char *LLVMGetName(llvm::Value *v, const char *); +-extern const char *LLVMGetName(const char *op, llvm::Value *v1, llvm::Value *v2); + #ifdef ISPC_GENX_ENABLED + enum AddressSpace { Local, Global, External }; + +diff --git a/src/opt.cpp b/src/opt.cpp +index 5b1ac7b63..9d6cae058 100644 +--- a/src/opt.cpp ++++ b/src/opt.cpp +@@ -257,7 +257,7 @@ static bool lGetSourcePosFromMetadata(const llvm::Instruction *inst, SourcePos * + return true; + } + +-static llvm::Instruction *lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1, const char *name, ++static llvm::Instruction *lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1, const llvm::Twine &name, + llvm::Instruction *insertBefore = NULL) { + llvm::Value *args[2] = {arg0, arg1}; + llvm::ArrayRef newArgArray(&args[0], &args[2]); +@@ -265,21 +265,22 @@ static llvm::Instruction *lCallInst(llvm::Function *func, llvm::Value *arg0, llv + } + + static llvm::Instruction *lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1, llvm::Value *arg2, +- const char *name, llvm::Instruction *insertBefore = NULL) { ++ const llvm::Twine &name, llvm::Instruction *insertBefore = NULL) { + llvm::Value *args[3] = {arg0, arg1, arg2}; + llvm::ArrayRef newArgArray(&args[0], &args[3]); + return llvm::CallInst::Create(func, newArgArray, name, insertBefore); + } + + static llvm::Instruction *lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1, llvm::Value *arg2, +- llvm::Value *arg3, const char *name, llvm::Instruction *insertBefore = NULL) { ++ llvm::Value *arg3, const llvm::Twine &name, ++ llvm::Instruction *insertBefore = NULL) { + llvm::Value *args[4] = {arg0, arg1, arg2, arg3}; + llvm::ArrayRef newArgArray(&args[0], &args[4]); + return llvm::CallInst::Create(func, newArgArray, name, insertBefore); + } + + static llvm::Instruction *lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1, llvm::Value *arg2, +- llvm::Value *arg3, llvm::Value *arg4, const char *name, ++ llvm::Value *arg3, llvm::Value *arg4, const llvm::Twine &name, + llvm::Instruction *insertBefore = NULL) { + llvm::Value *args[5] = {arg0, arg1, arg2, arg3, arg4}; + llvm::ArrayRef newArgArray(&args[0], &args[5]); +@@ -287,7 +288,7 @@ static llvm::Instruction *lCallInst(llvm::Function *func, llvm::Value *arg0, llv + } + + static llvm::Instruction *lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1, llvm::Value *arg2, +- llvm::Value *arg3, llvm::Value *arg4, llvm::Value *arg5, const char *name, ++ llvm::Value *arg3, llvm::Value *arg4, llvm::Value *arg5, const llvm::Twine &name, + llvm::Instruction *insertBefore = NULL) { + llvm::Value *args[6] = {arg0, arg1, arg2, arg3, arg4, arg5}; + llvm::ArrayRef newArgArray(&args[0], &args[6]); +@@ -991,23 +992,24 @@ bool IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) { + llvm::Type *returnType = callInst->getType(); + Assert(llvm::isa(returnType)); + // cast the i8 * to the appropriate type +- const char *name = LLVMGetName(callInst->getArgOperand(0), "_cast"); +- llvm::Value *castPtr = new llvm::BitCastInst(callInst->getArgOperand(0), +- llvm::PointerType::get(returnType, 0), name, callInst); ++ llvm::Value *castPtr = ++ new llvm::BitCastInst(callInst->getArgOperand(0), llvm::PointerType::get(returnType, 0), ++ llvm::Twine(callInst->getArgOperand(0)->getName()) + "_cast", callInst); + lCopyMetadata(castPtr, callInst); + int align; + if (g->opt.forceAlignedMemory) + align = g->target->getNativeVectorAlignment(); + else + align = callInst->getCalledFunction() == avxMaskedLoad32 ? 4 : 8; +- name = LLVMGetName(callInst->getArgOperand(0), "_load"); + #if ISPC_LLVM_VERSION == ISPC_LLVM_10_0 +- llvm::Instruction *loadInst = new llvm::LoadInst(castPtr, name, false /* not volatile */, +- llvm::MaybeAlign(align), (llvm::Instruction *)NULL); ++ llvm::Instruction *loadInst = ++ new llvm::LoadInst(castPtr, llvm::Twine(callInst->getArgOperand(0)->getName()) + "_load", ++ false /* not volatile */, llvm::MaybeAlign(align), (llvm::Instruction *)NULL); + #else + llvm::Instruction *loadInst = new llvm::LoadInst( +- llvm::dyn_cast(castPtr->getType())->getPointerElementType(), castPtr, name, +- false /* not volatile */, llvm::MaybeAlign(align).valueOrOne(), (llvm::Instruction *)NULL); ++ llvm::dyn_cast(castPtr->getType())->getPointerElementType(), castPtr, ++ llvm::Twine(callInst->getArgOperand(0)->getName()) + "_load", false /* not volatile */, ++ llvm::MaybeAlign(align).valueOrOne(), (llvm::Instruction *)NULL); + #endif + lCopyMetadata(loadInst, callInst); + llvm::ReplaceInstWithInst(callInst, loadInst); +@@ -1028,9 +1030,9 @@ bool IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) { + // all lanes storing, so replace with a regular store + llvm::Value *rvalue = callInst->getArgOperand(2); + llvm::Type *storeType = rvalue->getType(); +- const char *name = LLVMGetName(callInst->getArgOperand(0), "_ptrcast"); +- llvm::Value *castPtr = new llvm::BitCastInst(callInst->getArgOperand(0), +- llvm::PointerType::get(storeType, 0), name, callInst); ++ llvm::Value *castPtr = ++ new llvm::BitCastInst(callInst->getArgOperand(0), llvm::PointerType::get(storeType, 0), ++ llvm::Twine(callInst->getArgOperand(0)->getName()) + "_ptrcast", callInst); + lCopyMetadata(castPtr, callInst); + + int align; +@@ -1339,8 +1341,8 @@ static llvm::Value *lGetBasePointer(llvm::Value *v, llvm::Instruction *insertBef + if (t == NULL) { + return NULL; + } else { +- return llvm::CastInst::Create(ci->getOpcode(), t, ci->getType()->getScalarType(), LLVMGetName(t, "_cast"), +- insertBefore); ++ return llvm::CastInst::Create(ci->getOpcode(), t, ci->getType()->getScalarType(), ++ llvm::Twine(t->getName()) + "_cast", insertBefore); + } + } + +@@ -1583,13 +1585,13 @@ static void lExtractConstantOffset(llvm::Value *vec, llvm::Value **constOffset, + if (co == NULL) + *constOffset = NULL; + else +- *constOffset = +- llvm::CastInst::Create(cast->getOpcode(), co, cast->getType(), LLVMGetName(co, "_cast"), insertBefore); ++ *constOffset = llvm::CastInst::Create(cast->getOpcode(), co, cast->getType(), ++ llvm::Twine(co->getName()) + "_cast", insertBefore); + if (vo == NULL) + *variableOffset = NULL; + else +- *variableOffset = +- llvm::CastInst::Create(cast->getOpcode(), vo, cast->getType(), LLVMGetName(vo, "_cast"), insertBefore); ++ *variableOffset = llvm::CastInst::Create(cast->getOpcode(), vo, cast->getType(), ++ llvm::Twine(vo->getName()) + "_cast", insertBefore); + return; + } + +@@ -1608,16 +1610,18 @@ static void lExtractConstantOffset(llvm::Value *vec, llvm::Value **constOffset, + else if (c1 == NULL || llvm::isa(c1)) + *constOffset = c0; + else +- *constOffset = llvm::BinaryOperator::Create(llvm::Instruction::Add, c0, c1, LLVMGetName("add", c0, c1), +- insertBefore); ++ *constOffset = llvm::BinaryOperator::Create( ++ llvm::Instruction::Add, c0, c1, ((llvm::Twine("add_") + c0->getName()) + "_") + c1->getName(), ++ insertBefore); + + if (v0 == NULL || llvm::isa(v0)) + *variableOffset = v1; + else if (v1 == NULL || llvm::isa(v1)) + *variableOffset = v0; + else +- *variableOffset = llvm::BinaryOperator::Create(llvm::Instruction::Add, v0, v1, +- LLVMGetName("add", v0, v1), insertBefore); ++ *variableOffset = llvm::BinaryOperator::Create( ++ llvm::Instruction::Add, v0, v1, ((llvm::Twine("add_") + v0->getName()) + "_") + v1->getName(), ++ insertBefore); + return; + } else if (bop->getOpcode() == llvm::Instruction::Shl) { + lExtractConstantOffset(op0, &c0, &v0, insertBefore); +@@ -1633,10 +1637,12 @@ static void lExtractConstantOffset(llvm::Value *vec, llvm::Value **constOffset, + *constOffset = vec; + *variableOffset = NULL; + } else { +- *constOffset = llvm::BinaryOperator::Create(llvm::Instruction::Shl, c0, c1, LLVMGetName("shl", c0, c1), +- insertBefore); +- *variableOffset = llvm::BinaryOperator::Create(llvm::Instruction::Shl, v0, c1, +- LLVMGetName("shl", v0, c1), insertBefore); ++ *constOffset = llvm::BinaryOperator::Create( ++ llvm::Instruction::Shl, c0, c1, ((llvm::Twine("shl_") + c0->getName()) + "_") + c1->getName(), ++ insertBefore); ++ *variableOffset = llvm::BinaryOperator::Create( ++ llvm::Instruction::Shl, v0, c1, ((llvm::Twine("shl_") + v0->getName()) + "_") + c1->getName(), ++ insertBefore); + } + return; + } else if (bop->getOpcode() == llvm::Instruction::Mul) { +@@ -1648,25 +1654,30 @@ static void lExtractConstantOffset(llvm::Value *vec, llvm::Value **constOffset, + // Note that the first term is a constant and the last three are + // variable. + if (c0 != NULL && c1 != NULL) +- *constOffset = llvm::BinaryOperator::Create(llvm::Instruction::Mul, c0, c1, LLVMGetName("mul", c0, c1), +- insertBefore); ++ *constOffset = llvm::BinaryOperator::Create( ++ llvm::Instruction::Mul, c0, c1, ((llvm::Twine("mul_") + c0->getName()) + "_") + c1->getName(), ++ insertBefore); + else + *constOffset = NULL; + + llvm::Value *va = NULL, *vb = NULL, *vc = NULL; + if (v0 != NULL && c1 != NULL) +- va = llvm::BinaryOperator::Create(llvm::Instruction::Mul, v0, c1, LLVMGetName("mul", v0, c1), ++ va = llvm::BinaryOperator::Create(llvm::Instruction::Mul, v0, c1, ++ ((llvm::Twine("mul_") + v0->getName()) + "_") + c1->getName(), + insertBefore); + if (c0 != NULL && v1 != NULL) +- vb = llvm::BinaryOperator::Create(llvm::Instruction::Mul, c0, v1, LLVMGetName("mul", c0, v1), ++ vb = llvm::BinaryOperator::Create(llvm::Instruction::Mul, c0, v1, ++ ((llvm::Twine("mul_") + c0->getName()) + "_") + v1->getName(), + insertBefore); + if (v0 != NULL && v1 != NULL) +- vc = llvm::BinaryOperator::Create(llvm::Instruction::Mul, v0, v1, LLVMGetName("mul", v0, v1), ++ vc = llvm::BinaryOperator::Create(llvm::Instruction::Mul, v0, v1, ++ ((llvm::Twine("mul_") + v0->getName()) + "_") + v1->getName(), + insertBefore); + + llvm::Value *vab = NULL; + if (va != NULL && vb != NULL) +- vab = llvm::BinaryOperator::Create(llvm::Instruction::Add, va, vb, LLVMGetName("add", va, vb), ++ vab = llvm::BinaryOperator::Create(llvm::Instruction::Add, va, vb, ++ ((llvm::Twine("add_") + va->getName()) + "_") + vb->getName(), + insertBefore); + else if (va != NULL) + vab = va; +@@ -1674,8 +1685,9 @@ static void lExtractConstantOffset(llvm::Value *vec, llvm::Value **constOffset, + vab = vb; + + if (vab != NULL && vc != NULL) +- *variableOffset = llvm::BinaryOperator::Create(llvm::Instruction::Add, vab, vc, +- LLVMGetName("add", vab, vc), insertBefore); ++ *variableOffset = llvm::BinaryOperator::Create( ++ llvm::Instruction::Add, vab, vc, ((llvm::Twine("add_") + vab->getName()) + "_") + vc->getName(), ++ insertBefore); + else if (vab != NULL) + *variableOffset = vab; + else +@@ -1943,7 +1955,7 @@ static bool lOffsets32BitSafe(llvm::Value **variableOffsetPtr, llvm::Value **con + // all zeros (i.e. a ConstantAggregateZero, but just in case, + // do the more general check with lVectorIs32BitInts(). + variableOffset = new llvm::TruncInst(variableOffset, LLVMTypes::Int32VectorType, +- LLVMGetName(variableOffset, "_trunc"), insertBefore); ++ llvm::Twine(variableOffset->getName()) + "_trunc", insertBefore); + else + return false; + } +@@ -1952,7 +1964,7 @@ static bool lOffsets32BitSafe(llvm::Value **variableOffsetPtr, llvm::Value **con + if (lVectorIs32BitInts(constOffset)) { + // Truncate them so we have a 32-bit vector type for them. + constOffset = new llvm::TruncInst(constOffset, LLVMTypes::Int32VectorType, +- LLVMGetName(constOffset, "_trunc"), insertBefore); ++ llvm::Twine(constOffset->getName()) + "_trunc", insertBefore); + } else { + // FIXME: otherwise we just assume that all constant offsets + // can actually always fit into 32-bits... (This could be +@@ -1963,7 +1975,7 @@ static bool lOffsets32BitSafe(llvm::Value **variableOffsetPtr, llvm::Value **con + // llvm::ConstantFoldInstruction() doesn't seem to be doing + // enough for us in some cases if we call it from here. + constOffset = new llvm::TruncInst(constOffset, LLVMTypes::Int32VectorType, +- LLVMGetName(constOffset, "_trunc"), insertBefore); ++ llvm::Twine(constOffset->getName()) + "_trunc", insertBefore); + } + } + +@@ -2012,8 +2024,8 @@ static bool lOffsets32BitSafe(llvm::Value **offsetPtr, llvm::Instruction *insert + + // Alternatively, offset could be a sequence of adds terminating + // in safe constant vectors or a SExt. +- *offsetPtr = +- new llvm::TruncInst(offset, LLVMTypes::Int32VectorType, LLVMGetName(offset, "_trunc"), insertBefore); ++ *offsetPtr = new llvm::TruncInst(offset, LLVMTypes::Int32VectorType, llvm::Twine(offset->getName()) + "_trunc", ++ insertBefore); + return true; + } else + return false; +@@ -2229,7 +2241,8 @@ static bool lGSToGSBaseOffsets(llvm::CallInst *callInst) { + } + // Cast the base pointer to a void *, since that's what the + // __pseudo_*_base_offsets_* functions want. +- basePtr = new llvm::IntToPtrInst(basePtr, LLVMTypes::VoidPointerType, LLVMGetName(basePtr, "_2void"), callInst); ++ basePtr = new llvm::IntToPtrInst(basePtr, LLVMTypes::VoidPointerType, llvm::Twine(basePtr->getName()) + "_2void", ++ callInst); + lCopyMetadata(basePtr, callInst); + llvm::Function *gatherScatterFunc = info->baseOffsetsFunc; + +@@ -2803,7 +2816,7 @@ static bool lGSToLoadStore(llvm::CallInst *callInst) { + lCopyMetadata(ptr, callInst); + Debug(pos, "Transformed gather to unaligned vector load!"); + llvm::Instruction *newCall = +- lCallInst(gatherInfo->loadMaskedFunc, ptr, mask, LLVMGetName(ptr, "_masked_load")); ++ lCallInst(gatherInfo->loadMaskedFunc, ptr, mask, llvm::Twine(ptr->getName()) + "_masked_load"); + lCopyMetadata(newCall, callInst); + llvm::ReplaceInstWithInst(callInst, newCall); + return true; diff --git a/srcpkgs/ispc/patches/llvm12-004.patch b/srcpkgs/ispc/patches/llvm12-004.patch new file mode 100644 index 00000000000..3a94d8443a5 --- /dev/null +++ b/srcpkgs/ispc/patches/llvm12-004.patch @@ -0,0 +1,34 @@ +From c03a5142444476a1caa5c2ba7fa2d92d7793d6f1 Mon Sep 17 00:00:00 2001 +From: Deepak Rajendrakumaran +Date: Mon, 25 Jan 2021 09:22:49 -0800 +Subject: [PATCH] Fixing build on llvm trunk. + +--- + src/ctx.cpp | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +diff --git a/src/ctx.cpp b/src/ctx.cpp +index ded524b71..b60e6fe84 100644 +--- a/src/ctx.cpp ++++ b/src/ctx.cpp +@@ -1020,8 +1020,7 @@ void FunctionEmitContext::EmitCaseLabel(int value, bool checkMask, SourcePos pos + llvm::BasicBlock *bbCaseImpl = NULL; + if (emitGenXHardwareMask()) { + // Create basic block with actual case implementation +- llvm::Twine bbName = llvm::Twine(bbCase->getName()) + "_impl"; +- bbCaseImpl = CreateBasicBlock(bbName, bbCase); ++ bbCaseImpl = CreateBasicBlock(llvm::Twine(bbCase->getName()) + "_impl", bbCase); + } + #endif + +@@ -3142,8 +3141,8 @@ llvm::Value *FunctionEmitContext::BroadcastValue(llvm::Value *v, llvm::Type *vec + llvm::Value *undef2 = llvm::UndefValue::get(vecType); + + // InsertElement +- llvm::Twine tw = name.isTriviallyEmpty() ? (llvm::Twine(v->getName()) + "_broadcast") : name + llvm::Twine("_init"); +- llvm::Value *insert = InsertInst(undef1, v, 0, tw.str().c_str()); ++ llvm::Value *insert = ++ InsertInst(undef1, v, 0, name.isTriviallyEmpty() ? (llvm::Twine(v->getName()) + "_broadcast") : name + "_init"); + + // ShuffleVector + #if ISPC_LLVM_VERSION < ISPC_LLVM_11_0 diff --git a/srcpkgs/ispc/patches/llvm12-005.patch b/srcpkgs/ispc/patches/llvm12-005.patch new file mode 100644 index 00000000000..e3e96b00abf --- /dev/null +++ b/srcpkgs/ispc/patches/llvm12-005.patch @@ -0,0 +1,372 @@ +From c1d0a51bf8416d42144de9e2bdd59825eaeff1ac Mon Sep 17 00:00:00 2001 +From: Arina Neshlyaeva +Date: Fri, 8 Jan 2021 14:13:49 -0800 +Subject: [PATCH] LLVM 11 support for gen code + +--- + src/ctx.cpp | 13 ++- + src/gen/GlobalsLocalization.cpp | 9 +- + src/opt.cpp | 143 ++++++++++++++++++++++++-------- + 3 files changed, 124 insertions(+), 41 deletions(-) + +diff --git a/src/ctx.cpp b/src/ctx.cpp +index b60e6fe84..2a72e6837 100644 +--- a/src/ctx.cpp ++++ b/src/ctx.cpp +@@ -1,5 +1,5 @@ + /* +- Copyright (c) 2010-2020, Intel Corporation ++ Copyright (c) 2010-2021, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without +@@ -3685,8 +3685,17 @@ llvm::Value *FunctionEmitContext::GenXSimdCFPredicate(llvm::Value *value, llvm:: + AssertPos(currentPos, llvm::isa(value->getType())); + llvm::VectorType *vt = llvm::dyn_cast(value->getType()); + if (defaults == NULL) { +- defaults = llvm::ConstantVector::getSplat(value->getType()->getVectorNumElements(), ++#if ISPC_LLVM_VERSION < ISPC_LLVM_11_0 ++ defaults = llvm::ConstantVector::getSplat(vt->getVectorNumElements(), ++ llvm::Constant::getNullValue(vt->getElementType())); ++#elif ISPC_LLVM_VERSION == ISPC_LLVM_11_0 ++ defaults = llvm::ConstantVector::getSplat({static_cast(vt->getNumElements()), false}, + llvm::Constant::getNullValue(vt->getElementType())); ++#else ++ defaults = llvm::ConstantVector::getSplat( ++ llvm::ElementCount::get(static_cast(vt->getNumElements()), false), ++ llvm::Constant::getNullValue(vt->getElementType())); ++#endif + } + + auto Fn = llvm::GenXIntrinsic::getGenXDeclaration(m->module, llvm::GenXIntrinsic::genx_simdcf_predicate, +diff --git a/src/gen/GlobalsLocalization.cpp b/src/gen/GlobalsLocalization.cpp +index a176e9462..41f3b00e2 100644 +--- a/src/gen/GlobalsLocalization.cpp ++++ b/src/gen/GlobalsLocalization.cpp +@@ -1,5 +1,5 @@ + /* +- Copyright (c) 2014, 2016-2020, Intel Corporation ++ Copyright (c) 2014, 2016-2021, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without +@@ -53,8 +53,10 @@ + #include + #include + #include +-#include ++#if ISPC_LLVM_VERSION < ISPC_LLVM_11_0 + #include ++#endif ++#include + #include + #include + #include +@@ -470,7 +472,8 @@ void GlobalsLocalization::LocalizeGlobals(LocalizationInfo &LI) { + Instruction &FirstI = *Fn->getEntryBlock().begin(); + Type *ElemTy = GV->getType()->getElementType(); + AllocaInst *Alloca = new AllocaInst(ElemTy, 0, GV->getName() + ".local", &FirstI); +- Alloca->setAlignment(llvm::MaybeAlign(GV->getAlignment())); ++ Alloca->setAlignment(llvm::MaybeAlign(GV->getAlignment()).valueOrOne()); ++ + if (!isa(GV->getInitializer())) + new StoreInst(GV->getInitializer(), Alloca, &FirstI); + +diff --git a/src/opt.cpp b/src/opt.cpp +index d58d77f73..1789b8476 100644 +--- a/src/opt.cpp ++++ b/src/opt.cpp +@@ -2899,15 +2899,17 @@ static llvm::Function *lGenXMaskedInt8Inst(llvm::Instruction *inst, bool isStore + static llvm::CallInst *lGenXStoreInst(llvm::Value *val, llvm::Value *ptr, llvm::Instruction *inst) { + Assert(g->target->isGenXTarget()); + Assert(llvm::isa(val->getType())); +- Assert(llvm::isPowerOf2_32(val->getType()->getVectorNumElements())); +- Assert(val->getType()->getPrimitiveSizeInBits() / 8 <= 8 * OWORD); ++ ++ llvm::VectorType *valVecType = llvm::dyn_cast(val->getType()); ++ Assert(llvm::isPowerOf2_32(valVecType->getNumElements())); ++ Assert(valVecType->getPrimitiveSizeInBits() / 8 <= 8 * OWORD); + + // The data write of svm store must have a size that is a power of two from 16 to 128 + // bytes. However for int8 type and simd width = 8, the data write size is 8. + // So we use masked store function here instead of svm store which process int8 type + // correctly. +- if (val->getType()->getPrimitiveSizeInBits() / 8 < 16) { +- Assert(val->getType()->getScalarType() == LLVMTypes::Int8Type); ++ if (valVecType->getPrimitiveSizeInBits() / 8 < 16) { ++ Assert(valVecType->getScalarType() == LLVMTypes::Int8Type); + if (llvm::Function *maskedFunc = lGenXMaskedInt8Inst(inst, true)) + return llvm::dyn_cast(lCallInst(maskedFunc, ptr, val, LLVMMaskAllOn, "")); + else { +@@ -2923,15 +2925,17 @@ static llvm::CallInst *lGenXStoreInst(llvm::Value *val, llvm::Value *ptr, llvm:: + + static llvm::CallInst *lGenXLoadInst(llvm::Value *ptr, llvm::Type *retType, llvm::Instruction *inst) { + Assert(llvm::isa(retType)); +- Assert(llvm::isPowerOf2_32(retType->getVectorNumElements())); +- Assert(retType->getPrimitiveSizeInBits()); +- Assert(retType->getPrimitiveSizeInBits() / 8 <= 8 * OWORD); ++ ++ llvm::VectorType *retVecType = llvm::dyn_cast(retType); ++ Assert(llvm::isPowerOf2_32(retVecType->getNumElements())); ++ Assert(retVecType->getPrimitiveSizeInBits()); ++ Assert(retVecType->getPrimitiveSizeInBits() / 8 <= 8 * OWORD); + // The data read of svm load must have a size that is a power of two from 16 to 128 + // bytes. However for int8 type and simd width = 8, the data read size is 8. + // So we use masked load function here instead of svm load which process int8 type + // correctly. +- if (retType->getPrimitiveSizeInBits() / 8 < 16) { +- Assert(retType->getScalarType() == LLVMTypes::Int8Type); ++ if (retVecType->getPrimitiveSizeInBits() / 8 < 16) { ++ Assert(retVecType->getScalarType() == LLVMTypes::Int8Type); + if (llvm::Function *maskedFunc = lGenXMaskedInt8Inst(inst, false)) + return llvm::dyn_cast(lCallInst(maskedFunc, ptr, LLVMMaskAllOn, "")); + else { +@@ -5622,15 +5626,24 @@ static bool lVectorizeGEPs(llvm::Value *ptr, std::vector &ptrUses, std:: + llvm::PtrToIntInst *ptrToInt = + new llvm::PtrToIntInst(ptr, LLVMTypes::Int64Type, "vectorized_ptrtoint", insertBefore); + llvm::Instruction *addr = llvm::BinaryOperator::CreateAdd(ptrToInt, offset, "vectorized_address", insertBefore); ++#if ISPC_LLVM_VERSION >= ISPC_LLVM_11_0 ++ llvm::Type *retType = llvm::FixedVectorType::get(scalar_type, reqSize / t_size); ++#else + llvm::Type *retType = llvm::VectorType::get(scalar_type, reqSize / t_size); ++#endif + llvm::Function *fn = llvm::GenXIntrinsic::getGenXDeclaration( + m->module, llvm::GenXIntrinsic::genx_svm_block_ld_unaligned, {retType, addr->getType()}); + llvm::Instruction *ld = llvm::CallInst::Create(fn, {addr}, "vectorized_ld", insertBefore); + + if (loadingPtr) { + // Cast int to ptr via inttoptr ++#if ISPC_LLVM_VERSION >= ISPC_LLVM_11_0 ++ ld = new llvm::IntToPtrInst(ld, llvm::FixedVectorType::get(originalType, reqSize / t_size), ++ "vectorized_inttoptr", insertBefore); ++#else + ld = new llvm::IntToPtrInst(ld, llvm::VectorType::get(originalType, reqSize / t_size), + "vectorized_inttoptr", insertBefore); ++#endif + } + + // Scalar extracts for all loaded elements +@@ -6160,19 +6173,34 @@ bool ReplaceLLVMIntrinsics::runOnBasicBlock(llvm::BasicBlock &bb) { + llvm::Instruction *inst = &*I; + if (llvm::CallInst *ci = llvm::dyn_cast(inst)) { + llvm::Function *func = ci->getCalledFunction(); +- if (func && func->getName() == "llvm.trap") { ++ if (func && func->getName().equals("llvm.trap")) { + llvm::Type *argTypes[] = {LLVMTypes::Int1VectorType, LLVMTypes::Int16VectorType}; + // Description of parameters for genx_raw_send_noresult can be found in target-genx.ll + auto Fn = +llvm::GenXIntrinsic::getGenXDeclaration( + m->module, llvm::GenXIntrinsic::genx_raw_send_noresult, argTypes); + llvm::SmallVector Args; + Args.push_back(llvm::ConstantInt::get(LLVMTypes::Int32Type, 0)); +- Args.push_back(llvm::ConstantVector::getSplat(g->target->getNativeVectorWidth(), +- llvm::ConstantInt::getTrue(*g->ctx))); ++ Args.push_back(llvm::ConstantVector::getSplat( ++#if ISPC_LLVM_VERSION < ISPC_LLVM_11_0 ++ g->target->getNativeVectorWidth(), ++#elif ISPC_LLVM_VERSION == ISPC_LLVM_11_0 ++ {static_cast(g->target->getNativeVectorWidth()), false}, ++#else // LLVM 12.0+ ++ llvm::ElementCount::get(static_cast(g->target->getNativeVectorWidth()), false), ++#endif ++ llvm::ConstantInt::getTrue(*g->ctx))); ++ + Args.push_back(llvm::ConstantInt::get(LLVMTypes::Int32Type, 39)); + Args.push_back(llvm::ConstantInt::get(LLVMTypes::Int32Type, 33554448)); + llvm::Value *zeroMask = llvm::ConstantVector::getSplat( +- g->target->getNativeVectorWidth(), llvm::Constant::getNullValue(llvm::Type::getInt16Ty(*g->ctx))); ++#if ISPC_LLVM_VERSION < ISPC_LLVM_11_0 ++ g->target->getNativeVectorWidth(), ++#elif ISPC_LLVM_VERSION == ISPC_LLVM_11_0 ++ {static_cast(g->target->getNativeVectorWidth()), false}, ++#else // LLVM 12.0+ ++ llvm::ElementCount::get(static_cast(g->target->getNativeVectorWidth()), false), ++#endif ++ llvm::Constant::getNullValue(llvm::Type::getInt16Ty(*g->ctx))); + Args.push_back(zeroMask); + + llvm::Instruction *newInst = llvm::CallInst::Create(Fn, Args, ci->getName()); +@@ -6181,7 +6209,7 @@ bool ReplaceLLVMIntrinsics::runOnBasicBlock(llvm::BasicBlock &bb) { + modifiedAny = true; + goto restart; + } +- } else if (func && func->getName() == "llvm.assume") { ++ } else if (func && func->getName().equals("llvm.assume")) { + ci->eraseFromParent(); + modifiedAny = true; + goto restart; +@@ -6335,7 +6363,7 @@ bool CheckUnsupportedInsts::runOnBasicBlock(llvm::BasicBlock &bb) { + continue; + for (int i = 0; i < unsupportedFuncs.size(); i++) { + std::smatch match; +- std::string funcName = func->getName(); ++ std::string funcName = func->getName().str(); + if (std::regex_match(funcName, match, unsupportedFuncs[i])) { + // We found unsupported function. Generate error and stop compilation. + SourcePos pos; +@@ -6418,12 +6446,14 @@ bool MangleOpenCLBuiltins::runOnBasicBlock(llvm::BasicBlock &bb) { + if (func->getName().startswith("__spirv_ocl")) { + std::string mangledName; + llvm::Type *retType = func->getReturnType(); +- std::string funcName = func->getName(); ++ std::string funcName = func->getName().str(); + std::vector ArgTy; + // spirv OpenCL builtins are used for double types only +- Assert(retType->isVectorTy() && retType->getVectorElementType()->isDoubleTy() || ++ Assert(retType->isVectorTy() && ++ llvm::dyn_cast(retType)->getElementType()->isDoubleTy() || + retType->isSingleValueType() && retType->isDoubleTy()); +- if (retType->isVectorTy() && retType->getVectorElementType()->isDoubleTy()) { ++ if (retType->isVectorTy() && ++ llvm::dyn_cast(retType)->getElementType()->isDoubleTy()) { + ArgTy.push_back(LLVMTypes::DoubleVectorType); + // _DvWIDTH suffix is used in target file to differentiate scalar + // and vector versions of intrinsics. Here we remove this +@@ -6511,8 +6541,15 @@ llvm::Value *FixAddressSpace::calculateGatherScatterAddress(llvm::Value *Ptr, ll + + // Cast offsets to int64 + Offsets = new llvm::ZExtInst( ++#if ISPC_LLVM_VERSION >= ISPC_LLVM_11_0 ++ Offsets, ++ llvm::FixedVectorType::get(LLVMTypes::Int64Type, ++ llvm::dyn_cast(Offsets->getType())->getNumElements()), ++ "svm_offset_zext", InsertBefore); ++#else + Offsets, llvm::VectorType::get(LLVMTypes::Int64Type, Offsets->getType()->getVectorNumElements()), + "svm_offset_zext", InsertBefore); ++#endif + + if (!llvm::isa(Ptr)) { + // Cast ptr to int64 +@@ -6520,13 +6557,31 @@ llvm::Value *FixAddressSpace::calculateGatherScatterAddress(llvm::Value *Ptr, ll + + // Vectorize ptr + llvm::Value *undefInsertValue = ++#if ISPC_LLVM_VERSION >= ISPC_LLVM_11_0 ++ llvm::UndefValue::get(llvm::FixedVectorType::get( ++ LLVMTypes::Int64Type, llvm::dyn_cast(addressType)->getNumElements())); ++#else + llvm::UndefValue::get(llvm::VectorType::get(LLVMTypes::Int64Type, addressType->getVectorNumElements())); ++#endif + address = llvm::InsertElementInst::Create(undefInsertValue, address, LLVMInt32(0), "svm_ptr_iei", InsertBefore); + llvm::Constant *zeroVec = llvm::ConstantVector::getSplat( ++#if ISPC_LLVM_VERSION < ISPC_LLVM_11_0 + addressType->getVectorNumElements(), ++#elif ISPC_LLVM_VERSION == ISPC_LLVM_11_0 ++ {llvm::dyn_cast(addressType)->getNumElements(), false}, ++#else ++ llvm::ElementCount::get( ++ llvm::dyn_cast(addressType->getNumElements(), false), ++#endif + llvm::Constant::getNullValue(llvm::Type::getInt32Ty(InsertBefore->getContext()))); ++ + llvm::Value *undefShuffleValue = ++#if ISPC_LLVM_VERSION >= ISPC_LLVM_11_0 ++ llvm::UndefValue::get(llvm::FixedVectorType::get( ++ LLVMTypes::Int64Type, llvm::dyn_cast(addressType)->getNumElements())); ++#else + llvm::UndefValue::get(llvm::VectorType::get(LLVMTypes::Int64Type, addressType->getVectorNumElements())); ++#endif + address = new llvm::ShuffleVectorInst(address, undefShuffleValue, zeroVec, "svm_ptr_svi", InsertBefore); + + // Calculate address +@@ -6553,9 +6608,12 @@ llvm::Instruction *FixAddressSpace::processVectorLoad(llvm::LoadInst *LI) { + if (retType->getScalarType()->isPointerTy()) { + isPtrLoad = true; + auto scalarType = g->target->is32Bit() ? LLVMTypes::Int32Type : LLVMTypes::Int64Type; ++#if ISPC_LLVM_VERSION >= ISPC_LLVM_11_0 ++ retType = llvm::FixedVectorType::get(scalarType, llvm::dyn_cast(retType)->getNumElements()); ++#else + retType = llvm::VectorType::get(scalarType, retType->getVectorNumElements()); ++#endif + } +- + llvm::Instruction *res = lGenXLoadInst(ptr, retType, llvm::dyn_cast(LI)); + Assert(res); + +@@ -6580,11 +6638,16 @@ llvm::Instruction *FixAddressSpace::processSVMVectorLoad(llvm::Instruction *CI) + ptr = new llvm::IntToPtrInst(ptr, llvm::PointerType::get(retType, 0), CI->getName() + "_inttoptr", CI); + llvm::Instruction *loadInst = NULL; + #if ISPC_LLVM_VERSION >= ISPC_LLVM_11_0 +- loadInst = new llvm::LoadInst(llvm::dyn_cast(ptr->getType())->getPointerElementType(), loadInst, +- CI->getName(), (llvm::Instruction *)NULL); ++ loadInst = new llvm::LoadInst(llvm::dyn_cast(ptr->getType())->getPointerElementType(), ptr, ++ CI->getName(), false /* not volatile */, ++ llvm::MaybeAlign(g->target->getNativeVectorAlignment()).valueOrOne(), ++ (llvm::Instruction *)NULL); + #else +- loadInst = new llvm::LoadInst(ptr, CI->getName(), (llvm::Instruction *)NULL); ++ loadInst = new llvm::LoadInst(ptr, CI->getName(), false, ++ llvm::MaybeAlign(g->target->getNativeVectorAlignment()).valueOrOne(), ++ (llvm::Instruction *)NULL); + #endif ++ + Assert(loadInst); + return loadInst; + } +@@ -6606,7 +6669,11 @@ llvm::Instruction *FixAddressSpace::processVectorStore(llvm::StoreInst *SI) { + // Note: it doesn't look like a normal case for GenX target + if (valType->getScalarType()->isPointerTy()) { + auto scalarType = g->target->is32Bit() ? LLVMTypes::Int32Type : LLVMTypes::Int64Type; ++#if ISPC_LLVM_VERSION >= ISPC_LLVM_11_0 ++ valType = llvm::FixedVectorType::get(scalarType, llvm::dyn_cast(valType)->getNumElements()); ++#else + valType = llvm::VectorType::get(scalarType, valType->getVectorNumElements()); ++#endif + val = new llvm::PtrToIntInst(val, valType, "svm_st_val_ptrtoint", SI); + } + +@@ -6631,12 +6698,8 @@ llvm::Instruction *FixAddressSpace::processSVMVectorStore(llvm::Instruction *CI) + ptr = new llvm::IntToPtrInst(ptr, llvm::PointerType::get(valType, 0), CI->getName() + "_inttoptr", CI); + + llvm::Instruction *storeInst = NULL; +-#if ISPC_LLVM_VERSION >= ISPC_LLVM_11_0 +- loadInst = new llvm::StoreInst(val, llvm::dyn_cast(ptr->getType())->getPointerElementType(), +- storeInst, CI->getName(), (llvm::Instruction *)NULL); +-#else +- storeInst = new llvm::StoreInst(val, ptr, (llvm::Instruction *)NULL); +-#endif ++ storeInst = new llvm::StoreInst(val, ptr, (llvm::Instruction *)NULL, ++ llvm::MaybeAlign(g->target->getNativeVectorAlignment()).valueOrOne()); + Assert(storeInst); + return storeInst; + } +@@ -6645,15 +6708,18 @@ llvm::Instruction *FixAddressSpace::createInt8WrRegion(llvm::Value *Val, llvm::V + int width = g->target->getVectorWidth(); + + llvm::Value *Args[8]; +- ++#if ISPC_LLVM_VERSION >= ISPC_LLVM_11_0 ++ Args[0] = llvm::UndefValue::get(llvm::FixedVectorType::get(LLVMTypes::Int8Type, width * 4)); // old value ++#else + Args[0] = llvm::UndefValue::get(llvm::VectorType::get(LLVMTypes::Int8Type, width * 4)); // old value +- Args[1] = Val; // value to store +- Args[2] = llvm::ConstantInt::get(LLVMTypes::Int32Type, 0); // vstride +- Args[3] = llvm::ConstantInt::get(LLVMTypes::Int32Type, width); // width +- Args[4] = llvm::ConstantInt::get(LLVMTypes::Int32Type, 4); // stride +- Args[5] = llvm::ConstantInt::get(LLVMTypes::Int16Type, 0); // offsets +- Args[6] = llvm::ConstantInt::get(LLVMTypes::Int32Type, 0); // parent width (ignored) +- Args[7] = Mask; // mask ++#endif ++ Args[1] = Val; // value to store ++ Args[2] = llvm::ConstantInt::get(LLVMTypes::Int32Type, 0); // vstride ++ Args[3] = llvm::ConstantInt::get(LLVMTypes::Int32Type, width); // width ++ Args[4] = llvm::ConstantInt::get(LLVMTypes::Int32Type, 4); // stride ++ Args[5] = llvm::ConstantInt::get(LLVMTypes::Int16Type, 0); // offsets ++ Args[6] = llvm::ConstantInt::get(LLVMTypes::Int32Type, 0); // parent width (ignored) ++ Args[7] = Mask; // mask + + llvm::Type *Tys[4]; + +@@ -6708,7 +6774,12 @@ llvm::Instruction *FixAddressSpace::processGatherScatterPrivate(llvm::CallInst * + return NULL; + + llvm::Value *address = calculateGatherScatterAddress(ptr, offsets, CI); ++#if ISPC_LLVM_VERSION >= ISPC_LLVM_11_0 ++ llvm::Type *i8VecType = llvm::FixedVectorType::get(LLVMTypes::Int8Type, width * 4); ++#else + llvm::Type *i8VecType = llvm::VectorType::get(LLVMTypes::Int8Type, width * 4); ++#endif ++ + bool isInt8 = (value->getType()->getScalarType() == LLVMTypes::Int8Type); + + Assert(address && "Bad gather/scatter address!"); diff --git a/srcpkgs/ispc/patches/llvm12-006.patch b/srcpkgs/ispc/patches/llvm12-006.patch new file mode 100644 index 00000000000..a829ea48391 --- /dev/null +++ b/srcpkgs/ispc/patches/llvm12-006.patch @@ -0,0 +1,126 @@ +From bb3f493d1fbd45c79e1d9dee67a0430ba313eaad Mon Sep 17 00:00:00 2001 +From: Arina Neshlyaeva +Date: Tue, 19 Jan 2021 11:07:33 -0800 +Subject: [PATCH] Fixed ifdefs for LLVM_11 + +--- + src/ctx.cpp | 6 +++--- + src/opt.cpp | 18 +++++++++--------- + 2 files changed, 12 insertions(+), 12 deletions(-) + +diff --git a/src/ctx.cpp b/src/ctx.cpp +index 2a72e6837..527a02f30 100644 +--- a/src/ctx.cpp ++++ b/src/ctx.cpp +@@ -1691,7 +1691,7 @@ llvm::Value *FunctionEmitContext::SmearUniform(llvm::Value *value, const llvm::T + if (llvm::Constant *const_val = llvm::dyn_cast(value)) { + #if ISPC_LLVM_VERSION < ISPC_LLVM_11_0 + ret = llvm::ConstantVector::getSplat(g->target->getVectorWidth(), const_val); +-#elif ISPC_LLVM_VERSION == ISPC_LLVM_11_0 ++#elif ISPC_LLVM_VERSION < ISPC_LLVM_12_0 + ret = + llvm::ConstantVector::getSplat({static_cast(g->target->getVectorWidth()), false}, const_val); + #else // LLVM 12.0+ +@@ -3148,7 +3148,7 @@ llvm::Value *FunctionEmitContext::BroadcastValue(llvm::Value *v, llvm::Type *vec + #if ISPC_LLVM_VERSION < ISPC_LLVM_11_0 + llvm::Constant *zeroVec = llvm::ConstantVector::getSplat( + vecType->getVectorNumElements(), llvm::Constant::getNullValue(llvm::Type::getInt32Ty(*g->ctx))); +-#elif ISPC_LLVM_VERSION == ISPC_LLVM_11_0 ++#elif ISPC_LLVM_VERSION < ISPC_LLVM_12_0 + llvm::Constant *zeroVec = + llvm::ConstantVector::getSplat({static_cast(ty->getNumElements()), false}, + llvm::Constant::getNullValue(llvm::Type::getInt32Ty(*g->ctx))); +@@ -3688,7 +3688,7 @@ llvm::Value *FunctionEmitContext::GenXSimdCFPredicate(llvm::Value *value, llvm:: + #if ISPC_LLVM_VERSION < ISPC_LLVM_11_0 + defaults = llvm::ConstantVector::getSplat(vt->getVectorNumElements(), + llvm::Constant::getNullValue(vt->getElementType())); +-#elif ISPC_LLVM_VERSION == ISPC_LLVM_11_0 ++#elif ISPC_LLVM_VERSION < ISPC_LLVM_12_0 + defaults = llvm::ConstantVector::getSplat({static_cast(vt->getNumElements()), false}, + llvm::Constant::getNullValue(vt->getElementType())); + #else +diff --git a/src/opt.cpp b/src/opt.cpp +index 1789b8476..3ff191a0a 100644 +--- a/src/opt.cpp ++++ b/src/opt.cpp +@@ -1045,7 +1045,7 @@ bool IntrinsicsOpt::runOnBasicBlock(llvm::BasicBlock &bb) { + align = g->target->getNativeVectorAlignment(); + else + align = callInst->getCalledFunction() == avxMaskedLoad32 ? 4 : 8; +-#if ISPC_LLVM_VERSION == ISPC_LLVM_10_0 ++#if ISPC_LLVM_VERSION < ISPC_LLVM_11_0 + llvm::Instruction *loadInst = + new llvm::LoadInst(castPtr, llvm::Twine(callInst->getArgOperand(0)->getName()) + "_load", + false /* not volatile */, llvm::MaybeAlign(align), (llvm::Instruction *)NULL); +@@ -1472,7 +1472,7 @@ static llvm::Value *lGetBasePtrAndOffsets(llvm::Value *ptrs, llvm::Value **offse + llvm::Value *zeroMask = + #if ISPC_LLVM_VERSION < ISPC_LLVM_11_0 + llvm::ConstantVector::getSplat(cv->getType()->getVectorNumElements(), +-#elif ISPC_LLVM_VERSION == ISPC_LLVM_11_0 ++#elif ISPC_LLVM_VERSION < ISPC_LLVM_12_0 + llvm::ConstantVector::getSplat( + {llvm::dyn_cast(cv->getType())->getNumElements(), false}, + #else // LLVM 12.0+ +@@ -1495,7 +1495,7 @@ static llvm::Value *lGetBasePtrAndOffsets(llvm::Value *ptrs, llvm::Value **offse + llvm::Value *zeroMask = llvm::ConstantVector::getSplat( + #if ISPC_LLVM_VERSION < ISPC_LLVM_11_0 + bop_var_type->getVectorNumElements(), +-#elif ISPC_LLVM_VERSION == ISPC_LLVM_11_0 ++#elif ISPC_LLVM_VERSION < ISPC_LLVM_12_0 + {llvm::dyn_cast(bop_var_type)->getNumElements(), false}, + #else // LLVM 12.0+ + llvm::ElementCount::get( +@@ -2803,7 +2803,7 @@ static bool lGSToLoadStore(llvm::CallInst *callInst) { + llvm::Value *zeroMask = + #if ISPC_LLVM_VERSION < ISPC_LLVM_11_0 + llvm::ConstantVector::getSplat(callInst->getType()->getVectorNumElements(), +-#elif ISPC_LLVM_VERSION == ISPC_LLVM_11_0 ++#elif ISPC_LLVM_VERSION < ISPC_LLVM_12_0 + llvm::ConstantVector::getSplat( + {llvm::dyn_cast(callInst->getType())->getNumElements(), false}, + +@@ -3100,7 +3100,7 @@ static bool lImproveMaskedLoad(llvm::CallInst *callInst, llvm::BasicBlock::itera + { + llvm::Type *ptrType = llvm::PointerType::get(callInst->getType(), 0); + ptr = new llvm::BitCastInst(ptr, ptrType, "ptr_cast_for_load", callInst); +-#if ISPC_LLVM_VERSION == ISPC_LLVM_10_0 ++#if ISPC_LLVM_VERSION < ISPC_LLVM_11_0 + load = new llvm::LoadInst( + ptr, callInst->getName(), false /* not volatile */, + llvm::MaybeAlign(g->opt.forceAlignedMemory ? g->target->getNativeVectorAlignment() : info->align) +@@ -3459,7 +3459,7 @@ llvm::Value *lGEPAndLoad(llvm::Value *basePtr, int64_t offset, int align, llvm:: + llvm::Type *type) { + llvm::Value *ptr = lGEPInst(basePtr, LLVMInt64(offset), "new_base", insertBefore); + ptr = new llvm::BitCastInst(ptr, llvm::PointerType::get(type, 0), "ptr_cast", insertBefore); +-#if ISPC_LLVM_VERSION == ISPC_LLVM_10_0 ++#if ISPC_LLVM_VERSION < ISPC_LLVM_11_0 + return new llvm::LoadInst(ptr, "gather_load", false /* not volatile */, llvm::MaybeAlign(align), insertBefore); + #else // LLVM 11.0+ + return new llvm::LoadInst(llvm::dyn_cast(ptr->getType())->getPointerElementType(), ptr, +@@ -6183,7 +6183,7 @@ bool ReplaceLLVMIntrinsics::runOnBasicBlock(llvm::BasicBlock &bb) { + Args.push_back(llvm::ConstantVector::getSplat( + #if ISPC_LLVM_VERSION < ISPC_LLVM_11_0 + g->target->getNativeVectorWidth(), +-#elif ISPC_LLVM_VERSION == ISPC_LLVM_11_0 ++#elif ISPC_LLVM_VERSION < ISPC_LLVM_12_0 + {static_cast(g->target->getNativeVectorWidth()), false}, + #else // LLVM 12.0+ + llvm::ElementCount::get(static_cast(g->target->getNativeVectorWidth()), false), +@@ -6195,7 +6195,7 @@ bool ReplaceLLVMIntrinsics::runOnBasicBlock(llvm::BasicBlock &bb) { + llvm::Value *zeroMask = llvm::ConstantVector::getSplat( + #if ISPC_LLVM_VERSION < ISPC_LLVM_11_0 + g->target->getNativeVectorWidth(), +-#elif ISPC_LLVM_VERSION == ISPC_LLVM_11_0 ++#elif ISPC_LLVM_VERSION < ISPC_LLVM_12_0 + {static_cast(g->target->getNativeVectorWidth()), false}, + #else // LLVM 12.0+ + llvm::ElementCount::get(static_cast(g->target->getNativeVectorWidth()), false), +@@ -6567,7 +6567,7 @@ llvm::Value *FixAddressSpace::calculateGatherScatterAddress(llvm::Value *Ptr, ll + llvm::Constant *zeroVec = llvm::ConstantVector::getSplat( + #if ISPC_LLVM_VERSION < ISPC_LLVM_11_0 + addressType->getVectorNumElements(), +-#elif ISPC_LLVM_VERSION == ISPC_LLVM_11_0 ++#elif ISPC_LLVM_VERSION < ISPC_LLVM_12_0 + {llvm::dyn_cast(addressType)->getNumElements(), false}, + #else + llvm::ElementCount::get( diff --git a/srcpkgs/ispc/patches/llvm12-007.patch b/srcpkgs/ispc/patches/llvm12-007.patch new file mode 100644 index 00000000000..1b975f896c7 --- /dev/null +++ b/srcpkgs/ispc/patches/llvm12-007.patch @@ -0,0 +1,160 @@ +From 62f5a6c37bf26fc25a7cf81868052d3472874610 Mon Sep 17 00:00:00 2001 +From: Arina Neshlyaeva +Date: Tue, 23 Mar 2021 23:07:30 -0700 +Subject: [PATCH] Do not generate function mask when it is not required + +--- + src/ctx.cpp | 39 ++++++++++++++------- + tests/lit-tests/full_mask.ispc | 63 ++++++++++++++++++++++++++++++++++ + 2 files changed, 90 insertions(+), 12 deletions(-) + create mode 100644 tests/lit-tests/full_mask.ispc + +diff --git a/src/ctx.cpp b/src/ctx.cpp +index 527a02f30..9e775fc5c 100644 +--- a/src/ctx.cpp ++++ b/src/ctx.cpp +@@ -231,10 +231,18 @@ FunctionEmitContext::FunctionEmitContext(Function *func, Symbol *funSym, llvm::F + internalMaskPointer = AllocaInst(LLVMTypes::MaskType, "internal_mask_memory"); + StoreInst(LLVMMaskAllOn, internalMaskPointer); + +- functionMaskValue = LLVMMaskAllOn; +- +- fullMaskPointer = AllocaInst(LLVMTypes::MaskType, "full_mask_memory"); +- StoreInst(LLVMMaskAllOn, fullMaskPointer); ++ // If the function doesn't have __mask in parameters, there is no need to ++ // have function mask ++ if ((func->GetType()->isExported && ++ (lf->getFunctionType()->getNumParams() == func->GetType()->GetNumParameters())) || ++ (func->GetType()->isUnmasked) || (func->GetType()->isTask)) { ++ functionMaskValue = NULL; ++ fullMaskPointer = NULL; ++ } else { ++ functionMaskValue = LLVMMaskAllOn; ++ fullMaskPointer = AllocaInst(LLVMTypes::MaskType, "full_mask_memory"); ++ StoreInst(LLVMMaskAllOn, fullMaskPointer); ++ } + + blockEntryMask = NULL; + breakLanesPtr = continueLanesPtr = NULL; +@@ -389,20 +397,26 @@ llvm::BasicBlock *FunctionEmitContext::GetCurrentBasicBlock() { return bblock; } + + void FunctionEmitContext::SetCurrentBasicBlock(llvm::BasicBlock *bb) { bblock = bb; } + +-llvm::Value *FunctionEmitContext::GetFunctionMask() { return functionMaskValue; } ++llvm::Value *FunctionEmitContext::GetFunctionMask() { return fullMaskPointer ? functionMaskValue : LLVMMaskAllOn; } + + llvm::Value *FunctionEmitContext::GetInternalMask() { return LoadInst(internalMaskPointer, NULL, "load_mask"); } + + llvm::Value *FunctionEmitContext::GetFullMask() { +- return BinaryOperator(llvm::Instruction::And, GetInternalMask(), functionMaskValue, "internal_mask&function_mask"); ++ return fullMaskPointer ? BinaryOperator(llvm::Instruction::And, GetInternalMask(), functionMaskValue, ++ "internal_mask&function_mask") ++ : GetInternalMask(); + } + +-llvm::Value *FunctionEmitContext::GetFullMaskPointer() { return fullMaskPointer; } ++llvm::Value *FunctionEmitContext::GetFullMaskPointer() { ++ return fullMaskPointer ? fullMaskPointer : internalMaskPointer; ++} + + void FunctionEmitContext::SetFunctionMask(llvm::Value *value) { +- functionMaskValue = value; +- if (bblock != NULL) +- StoreInst(GetFullMask(), fullMaskPointer); ++ if (fullMaskPointer != NULL) { ++ functionMaskValue = value; ++ if (bblock != NULL) ++ StoreInst(GetFullMask(), fullMaskPointer); ++ } + } + + void FunctionEmitContext::SetBlockEntryMask(llvm::Value *value) { blockEntryMask = value; } +@@ -410,7 +424,8 @@ void FunctionEmitContext::SetBlockEntryMask(llvm::Value *value) { blockEntryMask + void FunctionEmitContext::SetInternalMask(llvm::Value *value) { + StoreInst(value, internalMaskPointer); + // kludge so that __mask returns the right value in ispc code. +- StoreInst(GetFullMask(), fullMaskPointer); ++ if (fullMaskPointer) ++ StoreInst(GetFullMask(), fullMaskPointer); + } + + void FunctionEmitContext::SetInternalMaskAnd(llvm::Value *oldMask, llvm::Value *test) { +@@ -1265,7 +1280,7 @@ void FunctionEmitContext::CurrentLanesReturned(Expr *expr, bool doCoherenceCheck + // lanes have returned + if (doCoherenceCheck) { + // if newReturnedLanes == functionMaskValue, get out of here! +- llvm::Value *cmp = MasksAllEqual(functionMaskValue, newReturnedLanes); ++ llvm::Value *cmp = MasksAllEqual(GetFunctionMask(), newReturnedLanes); + llvm::BasicBlock *bDoReturn = CreateBasicBlock("do_return"); + llvm::BasicBlock *bNoReturn = CreateBasicBlock("no_return"); + BranchInst(bDoReturn, bNoReturn, cmp); +diff --git a/tests/lit-tests/full_mask.ispc b/tests/lit-tests/full_mask.ispc +new file mode 100644 +index 000000000..ac0b0bca3 +--- /dev/null ++++ b/tests/lit-tests/full_mask.ispc +@@ -0,0 +1,63 @@ ++// RUN: %{ispc} %s -DISPC_EXPORT --emit-llvm-text -O0 --nowrap -o %t.ll ++// RUN: FileCheck --input-file=%t.ll -check-prefix=CHECK_ISPC_EXPORT %s ++ ++// RUN: %{ispc} %s -DISPC_UNMASKED --emit-llvm-text -O0 --nowrap -o %t.ll ++// RUN: FileCheck --input-file=%t.ll -check-prefix=CHECK_ISPC_UNMASKED %s ++ ++// RUN: %{ispc} %s -DISPC_NOQUALIF --emit-llvm-text -O0 --nowrap -o %t.ll ++// RUN: FileCheck --input-file=%t.ll -check-prefix=CHECK_ISPC_NOQUALIF %s ++ ++// RUN: %{ispc} %s -DISPC_TASK --emit-llvm-text -O0 --nowrap -o %t.ll ++// RUN: FileCheck --input-file=%t.ll -check-prefix=CHECK_ISPC_TASK %s ++ ++struct Parameters { ++ float *vout; ++ int param; ++}; ++ ++#ifdef ISPC_EXPORT ++// CHECK_ISPC_EXPORT: define void @simple_export___ ++// CHECK_ISPC_EXPORT: %full_mask_memory = alloca ++// CHECK_ISPC_EXPORT: define void @simple_export( ++// CHECK_ISPC_EXPORT-NOT: %full_mask_memory = alloca ++export void simple_export(void *uniform _p) { ++ Parameters *uniform p = (Parameters * uniform) _p; ++ if (programIndex > p->param) ++ p->vout[programIndex] = programCount * programIndex; ++} ++#endif ++#ifdef ISPC_UNMASKED ++// CHECK_ISPC_UNMASKED: define void @simple_unmasked ++// CHECK_ISPC_UNMASKED-NOT: %full_mask_memory = alloca ++unmasked void simple_unmasked(void *uniform _p) { ++ Parameters *uniform p = (Parameters * uniform) _p; ++ if (programIndex > p->param) ++ p->vout[programIndex] = programCount * programIndex; ++} ++#endif ++#ifdef ISPC_NOQUALIF ++// CHECK_ISPC_NOQUALIF: define void @simple ++// CHECK_ISPC_NOQUALIF: %full_mask_memory = alloca ++void simple(void *uniform _p) { ++ Parameters *uniform p = (Parameters * uniform) _p; ++ if (programIndex > p->param) ++ p->vout[programIndex] = programCount * programIndex; ++} ++#endif ++#ifdef ISPC_TASK ++ ++// CHECK_ISPC_TASK: define void @simple_task ++// CHECK_ISPC_TASK-NOT: %full_mask_memory = alloca ++// CHECK_ISPC_TASK: define void @simple_entry_point__ ++// CHECK_ISPC_TASK: %full_mask_memory = alloca ++// CHECK_ISPC_TASK: define void @simple_entry_point( ++// CHECK_ISPC_TASK-NOT: %full_mask_memory = alloca ++task void simple_task(void *uniform _p) { ++ Parameters *uniform p = (Parameters * uniform) _p; ++ if (programIndex > p->param) ++ p->vout[programIndex] = programCount * programIndex; ++} ++export void simple_entry_point(void *uniform parameters, uniform int dim0, uniform int dim1, uniform int dim2) { ++ launch[dim0, dim1, dim2] simple_task(parameters); ++} ++#endif +\ No newline at end of file diff --git a/srcpkgs/ispc/patches/llvm12-008.patch b/srcpkgs/ispc/patches/llvm12-008.patch new file mode 100644 index 00000000000..c8413d8b788 --- /dev/null +++ b/srcpkgs/ispc/patches/llvm12-008.patch @@ -0,0 +1,149 @@ +From afad14227d9204a3c8a626ca608f5b43c3218fe9 Mon Sep 17 00:00:00 2001 +From: Arina Neshlyaeva +Date: Tue, 4 May 2021 11:43:52 -0700 +Subject: [PATCH] Fixed ISPC gen build for LLVM12 + +--- + src/ctx.cpp | 4 ++++ + src/opt.cpp | 43 +++++++++++++++++++++++++++++-------------- + 2 files changed, 33 insertions(+), 14 deletions(-) + +diff --git a/src/ctx.cpp b/src/ctx.cpp +index 0c7050591..f7dc06939 100644 +--- a/src/ctx.cpp ++++ b/src/ctx.cpp +@@ -3738,7 +3738,11 @@ llvm::Value *FunctionEmitContext::GenXSimdCFAny(llvm::Value *value) { + + llvm::Value *FunctionEmitContext::GenXSimdCFPredicate(llvm::Value *value, llvm::Value *defaults) { + AssertPos(currentPos, llvm::isa(value->getType())); ++#if ISPC_LLVM_VERSION >= ISPC_LLVM_11_0 ++ llvm::FixedVectorType *vt = llvm::dyn_cast(value->getType()); ++#else + llvm::VectorType *vt = llvm::dyn_cast(value->getType()); ++#endif + if (defaults == NULL) { + #if ISPC_LLVM_VERSION < ISPC_LLVM_11_0 + defaults = llvm::ConstantVector::getSplat(vt->getVectorNumElements(), +diff --git a/src/opt.cpp b/src/opt.cpp +index cea4afa23..4e33d6b1a 100644 +--- a/src/opt.cpp ++++ b/src/opt.cpp +@@ -2916,9 +2916,13 @@ static llvm::Function *lGenXMaskedInt8Inst(llvm::Instruction *inst, bool isStore + + static llvm::CallInst *lGenXStoreInst(llvm::Value *val, llvm::Value *ptr, llvm::Instruction *inst) { + Assert(g->target->isGenXTarget()); ++#if ISPC_LLVM_VERSION >= ISPC_LLVM_11_0 ++ Assert(llvm::isa(val->getType())); ++ llvm::FixedVectorType *valVecType = llvm::dyn_cast(val->getType()); ++#else + Assert(llvm::isa(val->getType())); +- + llvm::VectorType *valVecType = llvm::dyn_cast(val->getType()); ++#endif + Assert(llvm::isPowerOf2_32(valVecType->getNumElements())); + Assert(valVecType->getPrimitiveSizeInBits() / 8 <= 8 * OWORD); + +@@ -2942,9 +2946,13 @@ static llvm::CallInst *lGenXStoreInst(llvm::Value *val, llvm::Value *ptr, llvm:: + } + + static llvm::CallInst *lGenXLoadInst(llvm::Value *ptr, llvm::Type *retType, llvm::Instruction *inst) { ++#if ISPC_LLVM_VERSION >= ISPC_LLVM_11_0 ++ Assert(llvm::isa(retType)); ++ llvm::FixedVectorType *retVecType = llvm::dyn_cast(retType); ++#else + Assert(llvm::isa(retType)); +- + llvm::VectorType *retVecType = llvm::dyn_cast(retType); ++#endif + Assert(llvm::isPowerOf2_32(retVecType->getNumElements())); + Assert(retVecType->getPrimitiveSizeInBits()); + Assert(retVecType->getPrimitiveSizeInBits() / 8 <= 8 * OWORD); +@@ -5485,8 +5493,13 @@ static void lCreateBlockLDUse(llvm::Instruction *currInst, std::vector(ui->getUser()))) { + // Collect idxs from gather and fix users + llvm::Value *res = llvm::UndefValue::get(gather->getType()); +- for (unsigned i = 0, end = llvm::dyn_cast(res->getType())->getNumElements(); i < end; +- ++i) { ++#if ISPC_LLVM_VERSION >= ISPC_LLVM_11_0 ++ for (unsigned i = 0, end = llvm::dyn_cast(res->getType())->getNumElements(); i < end; ++ ++i) ++#else ++ for (unsigned i = 0, end = llvm::dyn_cast(res->getType())->getNumElements(); i < end; ++i) ++#endif ++ { + // Get element via IEI + res = llvm::InsertElementInst::Create(res, EEIs[ptrUse.idxs[curr_idx++]], + llvm::ConstantInt::get(LLVMTypes::Int32Type, i), +@@ -5522,10 +5535,11 @@ static bool lVectorizeGEPs(llvm::Value *ptr, std::vector &ptrUses, std:: + + // Adjust values for vector load + #if ISPC_LLVM_VERSION >= ISPC_LLVM_11_0 +- if (auto vecTy = llvm::dyn_cast(type)) { ++ if (auto vecTy = llvm::dyn_cast(type)) + #else +- if (auto vecTy = llvm::dyn_cast(type)) { ++ if (auto vecTy = llvm::dyn_cast(type)) + #endif ++ { + // Get single element size + t_size /= vecTy->getNumElements(); + // Get single element type +@@ -6477,7 +6491,7 @@ llvm::Value *FixAddressSpace::calculateGatherScatterAddress(llvm::Value *Ptr, ll + #if ISPC_LLVM_VERSION >= ISPC_LLVM_11_0 + Offsets, + llvm::FixedVectorType::get(LLVMTypes::Int64Type, +- llvm::dyn_cast(Offsets->getType())->getNumElements()), ++ llvm::dyn_cast(Offsets->getType())->getNumElements()), + "svm_offset_zext", InsertBefore); + #else + Offsets, llvm::VectorType::get(LLVMTypes::Int64Type, Offsets->getType()->getVectorNumElements()), +@@ -6492,7 +6506,7 @@ llvm::Value *FixAddressSpace::calculateGatherScatterAddress(llvm::Value *Ptr, ll + llvm::Value *undefInsertValue = + #if ISPC_LLVM_VERSION >= ISPC_LLVM_11_0 + llvm::UndefValue::get(llvm::FixedVectorType::get( +- LLVMTypes::Int64Type, llvm::dyn_cast(addressType)->getNumElements())); ++ LLVMTypes::Int64Type, llvm::dyn_cast(addressType)->getNumElements())); + #else + llvm::UndefValue::get(llvm::VectorType::get(LLVMTypes::Int64Type, addressType->getVectorNumElements())); + #endif +@@ -6501,17 +6515,16 @@ llvm::Value *FixAddressSpace::calculateGatherScatterAddress(llvm::Value *Ptr, ll + #if ISPC_LLVM_VERSION < ISPC_LLVM_11_0 + addressType->getVectorNumElements(), + #elif ISPC_LLVM_VERSION < ISPC_LLVM_12_0 +- {llvm::dyn_cast(addressType)->getNumElements(), false}, ++ {llvm::dyn_cast(addressType)->getNumElements(), false}, + #else +- llvm::ElementCount::get( +- llvm::dyn_cast(addressType->getNumElements(), false), ++ llvm::ElementCount::get(llvm::dyn_cast(addressType)->getNumElements(), false), + #endif + llvm::Constant::getNullValue(llvm::Type::getInt32Ty(InsertBefore->getContext()))); + + llvm::Value *undefShuffleValue = + #if ISPC_LLVM_VERSION >= ISPC_LLVM_11_0 + llvm::UndefValue::get(llvm::FixedVectorType::get( +- LLVMTypes::Int64Type, llvm::dyn_cast(addressType)->getNumElements())); ++ LLVMTypes::Int64Type, llvm::dyn_cast(addressType)->getNumElements())); + #else + llvm::UndefValue::get(llvm::VectorType::get(LLVMTypes::Int64Type, addressType->getVectorNumElements())); + #endif +@@ -6542,7 +6555,8 @@ llvm::Instruction *FixAddressSpace::processVectorLoad(llvm::LoadInst *LI) { + isPtrLoad = true; + auto scalarType = g->target->is32Bit() ? LLVMTypes::Int32Type : LLVMTypes::Int64Type; + #if ISPC_LLVM_VERSION >= ISPC_LLVM_11_0 +- retType = llvm::FixedVectorType::get(scalarType, llvm::dyn_cast(retType)->getNumElements()); ++ retType = ++ llvm::FixedVectorType::get(scalarType, llvm::dyn_cast(retType)->getNumElements()); + #else + retType = llvm::VectorType::get(scalarType, retType->getVectorNumElements()); + #endif +@@ -6604,7 +6618,8 @@ llvm::Instruction *FixAddressSpace::processVectorStore(llvm::StoreInst *SI) { + if (valType->getScalarType()->isPointerTy()) { + auto scalarType = g->target->is32Bit() ? LLVMTypes::Int32Type : LLVMTypes::Int64Type; + #if ISPC_LLVM_VERSION >= ISPC_LLVM_11_0 +- valType = llvm::FixedVectorType::get(scalarType, llvm::dyn_cast(valType)->getNumElements()); ++ valType = ++ llvm::FixedVectorType::get(scalarType, llvm::dyn_cast(valType)->getNumElements()); + #else + valType = llvm::VectorType::get(scalarType, valType->getVectorNumElements()); + #endif diff --git a/srcpkgs/ispc/patches/llvm12-009.patch b/srcpkgs/ispc/patches/llvm12-009.patch new file mode 100644 index 00000000000..d3ea6782858 --- /dev/null +++ b/srcpkgs/ispc/patches/llvm12-009.patch @@ -0,0 +1,2109 @@ +From 97b13c32132e2704a5e3e7b2eae49d854d087e3e Mon Sep 17 00:00:00 2001 +From: Arina Neshlyaeva +Date: Tue, 4 May 2021 11:31:55 -0700 +Subject: [PATCH] Updated builtins-cm* for switch to LLVM12 + +--- + builtins/builtins-cm-32.ll | 234 ++++++++++++++++++------------------- + builtins/builtins-cm-64.ll | 232 ++++++++++++++++++------------------ + 2 files changed, 233 insertions(+), 233 deletions(-) + +diff --git a/builtins/builtins-cm-32.ll b/builtins/builtins-cm-32.ll +index 32debf32e..481bbdc48 100644 +--- a/builtins/builtins-cm-32.ll ++++ b/builtins/builtins-cm-32.ll +@@ -557,7 +557,7 @@ $_ZN13VaryingWriter12WriteVecElemIPvEEvv = comdat any + @.str.10 = private unnamed_addr constant [5 x i8] c"%08X\00", align 1 + + ; Function Attrs: noinline nounwind +-define dso_local signext i8 @_Z24__cm_intrinsic_impl_sdivcc(i8 signext, i8 signext) #0 { ++define internal signext i8 @_Z24__cm_intrinsic_impl_sdivcc(i8 signext, i8 signext) #0 { + %3 = alloca i8, align 1 + %4 = alloca i8, align 1 + %5 = alloca <1 x i8>, align 1 +@@ -618,7 +618,7 @@ define internal <1 x i8> @_ZN7details13__impl_divremILi1EEEu2CMvbT__cS1_S1_u2CMv + declare <1 x i8> @llvm.genx.rdregioni.v1i8.v1i8.i16(<1 x i8>, i32, i32, i32, i16, i32) #2 + + ; Function Attrs: noinline nounwind +-define dso_local <1 x i8> @_Z24__cm_intrinsic_impl_sdivu2CMvb1_cS_(<1 x i8>, <1 x i8>) #0 { ++define internal <1 x i8> @_Z24__cm_intrinsic_impl_sdivu2CMvb1_cS_(<1 x i8>, <1 x i8>) #0 { + %3 = alloca <1 x i8>, align 1 + %4 = alloca <1 x i8>, align 1 + %5 = alloca <1 x i8>, align 1 +@@ -634,7 +634,7 @@ define dso_local <1 x i8> @_Z24__cm_intrinsic_impl_sdivu2CMvb1_cS_(<1 x i8>, <1 + } + + ; Function Attrs: noinline nounwind +-define dso_local <2 x i8> @_Z24__cm_intrinsic_impl_sdivu2CMvb2_cS_(<2 x i8>, <2 x i8>) #3 { ++define internal <2 x i8> @_Z24__cm_intrinsic_impl_sdivu2CMvb2_cS_(<2 x i8>, <2 x i8>) #3 { + %3 = alloca <2 x i8>, align 2 + %4 = alloca <2 x i8>, align 2 + %5 = alloca <2 x i8>, align 2 +@@ -680,7 +680,7 @@ define internal <2 x i8> @_ZN7details13__impl_divremILi2EEEu2CMvbT__cS1_S1_u2CMv + } + + ; Function Attrs: noinline nounwind +-define dso_local <4 x i8> @_Z24__cm_intrinsic_impl_sdivu2CMvb4_cS_(<4 x i8>, <4 x i8>) #5 { ++define internal <4 x i8> @_Z24__cm_intrinsic_impl_sdivu2CMvb4_cS_(<4 x i8>, <4 x i8>) #5 { + %3 = alloca <4 x i8>, align 4 + %4 = alloca <4 x i8>, align 4 + %5 = alloca <4 x i8>, align 4 +@@ -818,7 +818,7 @@ define internal <16 x i8> @_ZN7details13__impl_divremILi16EEEu2CMvbT__cS1_S1_u2C + } + + ; Function Attrs: noinline nounwind +-define dso_local <32 x i8> @_Z24__cm_intrinsic_impl_sdivu2CMvb32_cS_(<32 x i8>, <32 x i8>) #11 { ++define internal <32 x i8> @_Z24__cm_intrinsic_impl_sdivu2CMvb32_cS_(<32 x i8>, <32 x i8>) #11 { + %3 = alloca <32 x i8>, align 32 + %4 = alloca <32 x i8>, align 32 + %5 = alloca <32 x i8>, align 32 +@@ -864,7 +864,7 @@ define internal <32 x i8> @_ZN7details13__impl_divremILi32EEEu2CMvbT__cS1_S1_u2C + } + + ; Function Attrs: noinline nounwind +-define dso_local signext i8 @_Z24__cm_intrinsic_impl_sremcc(i8 signext, i8 signext) #0 { ++define internal signext i8 @_Z24__cm_intrinsic_impl_sremcc(i8 signext, i8 signext) #0 { + %3 = alloca i8, align 1 + %4 = alloca i8, align 1 + %5 = alloca <1 x i8>, align 1 +@@ -890,7 +890,7 @@ define dso_local signext i8 @_Z24__cm_intrinsic_impl_sremcc(i8 signext, i8 signe + } + + ; Function Attrs: noinline nounwind +-define dso_local <1 x i8> @_Z24__cm_intrinsic_impl_sremu2CMvb1_cS_(<1 x i8>, <1 x i8>) #0 { ++define internal <1 x i8> @_Z24__cm_intrinsic_impl_sremu2CMvb1_cS_(<1 x i8>, <1 x i8>) #0 { + %3 = alloca <1 x i8>, align 1 + %4 = alloca <1 x i8>, align 1 + %5 = alloca <1 x i8>, align 1 +@@ -904,7 +904,7 @@ define dso_local <1 x i8> @_Z24__cm_intrinsic_impl_sremu2CMvb1_cS_(<1 x i8>, <1 + } + + ; Function Attrs: noinline nounwind +-define dso_local <2 x i8> @_Z24__cm_intrinsic_impl_sremu2CMvb2_cS_(<2 x i8>, <2 x i8>) #3 { ++define internal <2 x i8> @_Z24__cm_intrinsic_impl_sremu2CMvb2_cS_(<2 x i8>, <2 x i8>) #3 { + %3 = alloca <2 x i8>, align 2 + %4 = alloca <2 x i8>, align 2 + %5 = alloca <2 x i8>, align 2 +@@ -918,7 +918,7 @@ define dso_local <2 x i8> @_Z24__cm_intrinsic_impl_sremu2CMvb2_cS_(<2 x i8>, <2 + } + + ; Function Attrs: noinline nounwind +-define dso_local <4 x i8> @_Z24__cm_intrinsic_impl_sremu2CMvb4_cS_(<4 x i8>, <4 x i8>) #5 { ++define internal <4 x i8> @_Z24__cm_intrinsic_impl_sremu2CMvb4_cS_(<4 x i8>, <4 x i8>) #5 { + %3 = alloca <4 x i8>, align 4 + %4 = alloca <4 x i8>, align 4 + %5 = alloca <4 x i8>, align 4 +@@ -932,7 +932,7 @@ define dso_local <4 x i8> @_Z24__cm_intrinsic_impl_sremu2CMvb4_cS_(<4 x i8>, <4 + } + + ; Function Attrs: noinline nounwind +-define dso_local <8 x i8> @_Z24__cm_intrinsic_impl_sremu2CMvb8_cS_(<8 x i8>, <8 x i8>) #7 { ++define internal <8 x i8> @_Z24__cm_intrinsic_impl_sremu2CMvb8_cS_(<8 x i8>, <8 x i8>) #7 { + %3 = alloca <8 x i8>, align 8 + %4 = alloca <8 x i8>, align 8 + %5 = alloca <8 x i8>, align 8 +@@ -946,7 +946,7 @@ define dso_local <8 x i8> @_Z24__cm_intrinsic_impl_sremu2CMvb8_cS_(<8 x i8>, <8 + } + + ; Function Attrs: noinline nounwind +-define dso_local <16 x i8> @_Z24__cm_intrinsic_impl_sremu2CMvb16_cS_(<16 x i8>, <16 x i8>) #9 { ++define internal <16 x i8> @_Z24__cm_intrinsic_impl_sremu2CMvb16_cS_(<16 x i8>, <16 x i8>) #9 { + %3 = alloca <16 x i8>, align 16 + %4 = alloca <16 x i8>, align 16 + %5 = alloca <16 x i8>, align 16 +@@ -960,7 +960,7 @@ define dso_local <16 x i8> @_Z24__cm_intrinsic_impl_sremu2CMvb16_cS_(<16 x i8>, + } + + ; Function Attrs: noinline nounwind +-define dso_local <32 x i8> @_Z24__cm_intrinsic_impl_sremu2CMvb32_cS_(<32 x i8>, <32 x i8>) #11 { ++define internal <32 x i8> @_Z24__cm_intrinsic_impl_sremu2CMvb32_cS_(<32 x i8>, <32 x i8>) #11 { + %3 = alloca <32 x i8>, align 32 + %4 = alloca <32 x i8>, align 32 + %5 = alloca <32 x i8>, align 32 +@@ -974,7 +974,7 @@ define dso_local <32 x i8> @_Z24__cm_intrinsic_impl_sremu2CMvb32_cS_(<32 x i8>, + } + + ; Function Attrs: noinline nounwind +-define dso_local signext i16 @_Z24__cm_intrinsic_impl_sdivss(i16 signext, i16 signext) #3 { ++define internal signext i16 @_Z24__cm_intrinsic_impl_sdivss(i16 signext, i16 signext) #3 { + %3 = alloca i16, align 2 + %4 = alloca i16, align 2 + %5 = alloca <1 x i16>, align 2 +@@ -1058,7 +1058,7 @@ define internal <1 x i16> @_ZN7details13__impl_divremILi1EEEu2CMvbT__sS1_S1_u2CM + declare <1 x i16> @llvm.genx.rdregioni.v1i16.v1i16.i16(<1 x i16>, i32, i32, i32, i16, i32) #2 + + ; Function Attrs: noinline nounwind +-define dso_local <1 x i16> @_Z24__cm_intrinsic_impl_sdivu2CMvb1_sS_(<1 x i16>, <1 x i16>) #3 { ++define internal <1 x i16> @_Z24__cm_intrinsic_impl_sdivu2CMvb1_sS_(<1 x i16>, <1 x i16>) #3 { + %3 = alloca <1 x i16>, align 2 + %4 = alloca <1 x i16>, align 2 + %5 = alloca <1 x i16>, align 2 +@@ -1074,7 +1074,7 @@ define dso_local <1 x i16> @_Z24__cm_intrinsic_impl_sdivu2CMvb1_sS_(<1 x i16>, < + } + + ; Function Attrs: noinline nounwind +-define dso_local <2 x i16> @_Z24__cm_intrinsic_impl_sdivu2CMvb2_sS_(<2 x i16>, <2 x i16>) #5 { ++define internal <2 x i16> @_Z24__cm_intrinsic_impl_sdivu2CMvb2_sS_(<2 x i16>, <2 x i16>) #5 { + %3 = alloca <2 x i16>, align 4 + %4 = alloca <2 x i16>, align 4 + %5 = alloca <2 x i16>, align 4 +@@ -1143,7 +1143,7 @@ define internal <2 x i16> @_ZN7details13__impl_divremILi2EEEu2CMvbT__sS1_S1_u2CM + } + + ; Function Attrs: noinline nounwind +-define dso_local <4 x i16> @_Z24__cm_intrinsic_impl_sdivu2CMvb4_sS_(<4 x i16>, <4 x i16>) #7 { ++define internal <4 x i16> @_Z24__cm_intrinsic_impl_sdivu2CMvb4_sS_(<4 x i16>, <4 x i16>) #7 { + %3 = alloca <4 x i16>, align 8 + %4 = alloca <4 x i16>, align 8 + %5 = alloca <4 x i16>, align 8 +@@ -1350,7 +1350,7 @@ define internal <16 x i16> @_ZN7details13__impl_divremILi16EEEu2CMvbT__sS1_S1_u2 + } + + ; Function Attrs: noinline nounwind +-define dso_local <32 x i16> @_Z24__cm_intrinsic_impl_sdivu2CMvb32_sS_(<32 x i16>, <32 x i16>) #13 { ++define internal <32 x i16> @_Z24__cm_intrinsic_impl_sdivu2CMvb32_sS_(<32 x i16>, <32 x i16>) #13 { + %3 = alloca <32 x i16>, align 64 + %4 = alloca <32 x i16>, align 64 + %5 = alloca <32 x i16>, align 64 +@@ -1419,7 +1419,7 @@ define internal <32 x i16> @_ZN7details13__impl_divremILi32EEEu2CMvbT__sS1_S1_u2 + } + + ; Function Attrs: noinline nounwind +-define dso_local signext i16 @_Z24__cm_intrinsic_impl_sremss(i16 signext, i16 signext) #3 { ++define internal signext i16 @_Z24__cm_intrinsic_impl_sremss(i16 signext, i16 signext) #3 { + %3 = alloca i16, align 2 + %4 = alloca i16, align 2 + %5 = alloca <1 x i16>, align 2 +@@ -1445,7 +1445,7 @@ define dso_local signext i16 @_Z24__cm_intrinsic_impl_sremss(i16 signext, i16 si + } + + ; Function Attrs: noinline nounwind +-define dso_local <1 x i16> @_Z24__cm_intrinsic_impl_sremu2CMvb1_sS_(<1 x i16>, <1 x i16>) #3 { ++define internal <1 x i16> @_Z24__cm_intrinsic_impl_sremu2CMvb1_sS_(<1 x i16>, <1 x i16>) #3 { + %3 = alloca <1 x i16>, align 2 + %4 = alloca <1 x i16>, align 2 + %5 = alloca <1 x i16>, align 2 +@@ -1459,7 +1459,7 @@ define dso_local <1 x i16> @_Z24__cm_intrinsic_impl_sremu2CMvb1_sS_(<1 x i16>, < + } + + ; Function Attrs: noinline nounwind +-define dso_local <2 x i16> @_Z24__cm_intrinsic_impl_sremu2CMvb2_sS_(<2 x i16>, <2 x i16>) #5 { ++define internal <2 x i16> @_Z24__cm_intrinsic_impl_sremu2CMvb2_sS_(<2 x i16>, <2 x i16>) #5 { + %3 = alloca <2 x i16>, align 4 + %4 = alloca <2 x i16>, align 4 + %5 = alloca <2 x i16>, align 4 +@@ -1473,7 +1473,7 @@ define dso_local <2 x i16> @_Z24__cm_intrinsic_impl_sremu2CMvb2_sS_(<2 x i16>, < + } + + ; Function Attrs: noinline nounwind +-define dso_local <4 x i16> @_Z24__cm_intrinsic_impl_sremu2CMvb4_sS_(<4 x i16>, <4 x i16>) #7 { ++define internal <4 x i16> @_Z24__cm_intrinsic_impl_sremu2CMvb4_sS_(<4 x i16>, <4 x i16>) #7 { + %3 = alloca <4 x i16>, align 8 + %4 = alloca <4 x i16>, align 8 + %5 = alloca <4 x i16>, align 8 +@@ -1487,7 +1487,7 @@ define dso_local <4 x i16> @_Z24__cm_intrinsic_impl_sremu2CMvb4_sS_(<4 x i16>, < + } + + ; Function Attrs: noinline nounwind +-define dso_local <8 x i16> @_Z24__cm_intrinsic_impl_sremu2CMvb8_sS_(<8 x i16>, <8 x i16>) #9 { ++define internal <8 x i16> @_Z24__cm_intrinsic_impl_sremu2CMvb8_sS_(<8 x i16>, <8 x i16>) #9 { + %3 = alloca <8 x i16>, align 16 + %4 = alloca <8 x i16>, align 16 + %5 = alloca <8 x i16>, align 16 +@@ -1501,7 +1501,7 @@ define dso_local <8 x i16> @_Z24__cm_intrinsic_impl_sremu2CMvb8_sS_(<8 x i16>, < + } + + ; Function Attrs: noinline nounwind +-define dso_local <16 x i16> @_Z24__cm_intrinsic_impl_sremu2CMvb16_sS_(<16 x i16>, <16 x i16>) #11 { ++define internal <16 x i16> @_Z24__cm_intrinsic_impl_sremu2CMvb16_sS_(<16 x i16>, <16 x i16>) #11 { + %3 = alloca <16 x i16>, align 32 + %4 = alloca <16 x i16>, align 32 + %5 = alloca <16 x i16>, align 32 +@@ -1515,7 +1515,7 @@ define dso_local <16 x i16> @_Z24__cm_intrinsic_impl_sremu2CMvb16_sS_(<16 x i16> + } + + ; Function Attrs: noinline nounwind +-define dso_local <32 x i16> @_Z24__cm_intrinsic_impl_sremu2CMvb32_sS_(<32 x i16>, <32 x i16>) #13 { ++define internal <32 x i16> @_Z24__cm_intrinsic_impl_sremu2CMvb32_sS_(<32 x i16>, <32 x i16>) #13 { + %3 = alloca <32 x i16>, align 64 + %4 = alloca <32 x i16>, align 64 + %5 = alloca <32 x i16>, align 64 +@@ -1529,7 +1529,7 @@ define dso_local <32 x i16> @_Z24__cm_intrinsic_impl_sremu2CMvb32_sS_(<32 x i16> + } + + ; Function Attrs: noinline nounwind +-define dso_local i32 @_Z24__cm_intrinsic_impl_sdivii(i32, i32) #14 { ++define internal i32 @_Z24__cm_intrinsic_impl_sdivii(i32, i32) #14 { + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca <1 x i32>, align 4 +@@ -1730,7 +1730,7 @@ define internal <1 x i32> @_ZN7details13__impl_divremILi1EEEu2CMvbT__iS1_S1_u2CM + declare <1 x i32> @llvm.genx.rdregioni.v1i32.v1i32.i16(<1 x i32>, i32, i32, i32, i16, i32) #2 + + ; Function Attrs: noinline nounwind +-define dso_local <1 x i32> @_Z24__cm_intrinsic_impl_sdivu2CMvb1_iS_(<1 x i32>, <1 x i32>) #14 { ++define internal <1 x i32> @_Z24__cm_intrinsic_impl_sdivu2CMvb1_iS_(<1 x i32>, <1 x i32>) #14 { + %3 = alloca <1 x i32>, align 4 + %4 = alloca <1 x i32>, align 4 + %5 = alloca <1 x i32>, align 4 +@@ -1746,7 +1746,7 @@ define dso_local <1 x i32> @_Z24__cm_intrinsic_impl_sdivu2CMvb1_iS_(<1 x i32>, < + } + + ; Function Attrs: noinline nounwind +-define dso_local <2 x i32> @_Z24__cm_intrinsic_impl_sdivu2CMvb2_iS_(<2 x i32>, <2 x i32>) #15 { ++define internal <2 x i32> @_Z24__cm_intrinsic_impl_sdivu2CMvb2_iS_(<2 x i32>, <2 x i32>) #15 { + %3 = alloca <2 x i32>, align 8 + %4 = alloca <2 x i32>, align 8 + %5 = alloca <2 x i32>, align 8 +@@ -1932,7 +1932,7 @@ define internal <2 x i32> @_ZN7details13__impl_divremILi2EEEu2CMvbT__iS1_S1_u2CM + } + + ; Function Attrs: noinline nounwind +-define dso_local <4 x i32> @_Z24__cm_intrinsic_impl_sdivu2CMvb4_iS_(<4 x i32>, <4 x i32>) #16 { ++define internal <4 x i32> @_Z24__cm_intrinsic_impl_sdivu2CMvb4_iS_(<4 x i32>, <4 x i32>) #16 { + %3 = alloca <4 x i32>, align 16 + %4 = alloca <4 x i32>, align 16 + %5 = alloca <4 x i32>, align 16 +@@ -2490,7 +2490,7 @@ define internal <16 x i32> @_ZN7details13__impl_divremILi16EEEu2CMvbT__iS1_S1_u2 + } + + ; Function Attrs: noinline nounwind +-define dso_local <32 x i32> @_Z24__cm_intrinsic_impl_sdivu2CMvb32_iS_(<32 x i32>, <32 x i32>) #19 { ++define internal <32 x i32> @_Z24__cm_intrinsic_impl_sdivu2CMvb32_iS_(<32 x i32>, <32 x i32>) #19 { + %3 = alloca <32 x i32>, align 128 + %4 = alloca <32 x i32>, align 128 + %5 = alloca <32 x i32>, align 128 +@@ -2676,7 +2676,7 @@ define internal <32 x i32> @_ZN7details13__impl_divremILi32EEEu2CMvbT__iS1_S1_u2 + } + + ; Function Attrs: noinline nounwind +-define dso_local i32 @_Z24__cm_intrinsic_impl_sremii(i32, i32) #14 { ++define internal i32 @_Z24__cm_intrinsic_impl_sremii(i32, i32) #14 { + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca <1 x i32>, align 4 +@@ -2702,7 +2702,7 @@ define dso_local i32 @_Z24__cm_intrinsic_impl_sremii(i32, i32) #14 { + } + + ; Function Attrs: noinline nounwind +-define dso_local <1 x i32> @_Z24__cm_intrinsic_impl_sremu2CMvb1_iS_(<1 x i32>, <1 x i32>) #14 { ++define internal <1 x i32> @_Z24__cm_intrinsic_impl_sremu2CMvb1_iS_(<1 x i32>, <1 x i32>) #14 { + %3 = alloca <1 x i32>, align 4 + %4 = alloca <1 x i32>, align 4 + %5 = alloca <1 x i32>, align 4 +@@ -2716,7 +2716,7 @@ define dso_local <1 x i32> @_Z24__cm_intrinsic_impl_sremu2CMvb1_iS_(<1 x i32>, < + } + + ; Function Attrs: noinline nounwind +-define dso_local <2 x i32> @_Z24__cm_intrinsic_impl_sremu2CMvb2_iS_(<2 x i32>, <2 x i32>) #15 { ++define internal <2 x i32> @_Z24__cm_intrinsic_impl_sremu2CMvb2_iS_(<2 x i32>, <2 x i32>) #15 { + %3 = alloca <2 x i32>, align 8 + %4 = alloca <2 x i32>, align 8 + %5 = alloca <2 x i32>, align 8 +@@ -2730,7 +2730,7 @@ define dso_local <2 x i32> @_Z24__cm_intrinsic_impl_sremu2CMvb2_iS_(<2 x i32>, < + } + + ; Function Attrs: noinline nounwind +-define dso_local <4 x i32> @_Z24__cm_intrinsic_impl_sremu2CMvb4_iS_(<4 x i32>, <4 x i32>) #16 { ++define internal <4 x i32> @_Z24__cm_intrinsic_impl_sremu2CMvb4_iS_(<4 x i32>, <4 x i32>) #16 { + %3 = alloca <4 x i32>, align 16 + %4 = alloca <4 x i32>, align 16 + %5 = alloca <4 x i32>, align 16 +@@ -2744,7 +2744,7 @@ define dso_local <4 x i32> @_Z24__cm_intrinsic_impl_sremu2CMvb4_iS_(<4 x i32>, < + } + + ; Function Attrs: noinline nounwind +-define dso_local <8 x i32> @_Z24__cm_intrinsic_impl_sremu2CMvb8_iS_(<8 x i32>, <8 x i32>) #17 { ++define internal <8 x i32> @_Z24__cm_intrinsic_impl_sremu2CMvb8_iS_(<8 x i32>, <8 x i32>) #17 { + %3 = alloca <8 x i32>, align 32 + %4 = alloca <8 x i32>, align 32 + %5 = alloca <8 x i32>, align 32 +@@ -2758,7 +2758,7 @@ define dso_local <8 x i32> @_Z24__cm_intrinsic_impl_sremu2CMvb8_iS_(<8 x i32>, < + } + + ; Function Attrs: noinline nounwind +-define dso_local <16 x i32> @_Z24__cm_intrinsic_impl_sremu2CMvb16_iS_(<16 x i32>, <16 x i32>) #18 { ++define internal <16 x i32> @_Z24__cm_intrinsic_impl_sremu2CMvb16_iS_(<16 x i32>, <16 x i32>) #18 { + %3 = alloca <16 x i32>, align 64 + %4 = alloca <16 x i32>, align 64 + %5 = alloca <16 x i32>, align 64 +@@ -2772,7 +2772,7 @@ define dso_local <16 x i32> @_Z24__cm_intrinsic_impl_sremu2CMvb16_iS_(<16 x i32> + } + + ; Function Attrs: noinline nounwind +-define dso_local <32 x i32> @_Z24__cm_intrinsic_impl_sremu2CMvb32_iS_(<32 x i32>, <32 x i32>) #19 { ++define internal <32 x i32> @_Z24__cm_intrinsic_impl_sremu2CMvb32_iS_(<32 x i32>, <32 x i32>) #19 { + %3 = alloca <32 x i32>, align 128 + %4 = alloca <32 x i32>, align 128 + %5 = alloca <32 x i32>, align 128 +@@ -2786,7 +2786,7 @@ define dso_local <32 x i32> @_Z24__cm_intrinsic_impl_sremu2CMvb32_iS_(<32 x i32> + } + + ; Function Attrs: noinline nounwind +-define dso_local zeroext i8 @_Z24__cm_intrinsic_impl_udivhh(i8 zeroext, i8 zeroext) #21 { ++define internal zeroext i8 @_Z24__cm_intrinsic_impl_udivhh(i8 zeroext, i8 zeroext) #21 { + %3 = alloca i8, align 1 + %4 = alloca i8, align 1 + %5 = alloca <1 x i8>, align 1 +@@ -2844,7 +2844,7 @@ define internal <1 x i8> @_ZN7details14__impl_udivremILi1EEEu2CMvbT__hS1_S1_u2CM + } + + ; Function Attrs: noinline nounwind +-define dso_local <1 x i8> @_Z24__cm_intrinsic_impl_udivu2CMvb1_hS_(<1 x i8>, <1 x i8>) #21 { ++define internal <1 x i8> @_Z24__cm_intrinsic_impl_udivu2CMvb1_hS_(<1 x i8>, <1 x i8>) #21 { + %3 = alloca <1 x i8>, align 1 + %4 = alloca <1 x i8>, align 1 + %5 = alloca <1 x i8>, align 1 +@@ -2860,7 +2860,7 @@ define dso_local <1 x i8> @_Z24__cm_intrinsic_impl_udivu2CMvb1_hS_(<1 x i8>, <1 + } + + ; Function Attrs: noinline nounwind +-define dso_local <2 x i8> @_Z24__cm_intrinsic_impl_udivu2CMvb2_hS_(<2 x i8>, <2 x i8>) #22 { ++define internal <2 x i8> @_Z24__cm_intrinsic_impl_udivu2CMvb2_hS_(<2 x i8>, <2 x i8>) #22 { + %3 = alloca <2 x i8>, align 2 + %4 = alloca <2 x i8>, align 2 + %5 = alloca <2 x i8>, align 2 +@@ -2906,7 +2906,7 @@ define internal <2 x i8> @_ZN7details14__impl_udivremILi2EEEu2CMvbT__hS1_S1_u2CM + } + + ; Function Attrs: noinline nounwind +-define dso_local <4 x i8> @_Z24__cm_intrinsic_impl_udivu2CMvb4_hS_(<4 x i8>, <4 x i8>) #14 { ++define internal <4 x i8> @_Z24__cm_intrinsic_impl_udivu2CMvb4_hS_(<4 x i8>, <4 x i8>) #14 { + %3 = alloca <4 x i8>, align 4 + %4 = alloca <4 x i8>, align 4 + %5 = alloca <4 x i8>, align 4 +@@ -3044,7 +3044,7 @@ define internal <16 x i8> @_ZN7details14__impl_udivremILi16EEEu2CMvbT__hS1_S1_u2 + } + + ; Function Attrs: noinline nounwind +-define dso_local <32 x i8> @_Z24__cm_intrinsic_impl_udivu2CMvb32_hS_(<32 x i8>, <32 x i8>) #17 { ++define internal <32 x i8> @_Z24__cm_intrinsic_impl_udivu2CMvb32_hS_(<32 x i8>, <32 x i8>) #17 { + %3 = alloca <32 x i8>, align 32 + %4 = alloca <32 x i8>, align 32 + %5 = alloca <32 x i8>, align 32 +@@ -3090,7 +3090,7 @@ define internal <32 x i8> @_ZN7details14__impl_udivremILi32EEEu2CMvbT__hS1_S1_u2 + } + + ; Function Attrs: noinline nounwind +-define dso_local zeroext i8 @_Z24__cm_intrinsic_impl_uremhh(i8 zeroext, i8 zeroext) #21 { ++define internal zeroext i8 @_Z24__cm_intrinsic_impl_uremhh(i8 zeroext, i8 zeroext) #21 { + %3 = alloca i8, align 1 + %4 = alloca i8, align 1 + %5 = alloca <1 x i8>, align 1 +@@ -3116,7 +3116,7 @@ define dso_local zeroext i8 @_Z24__cm_intrinsic_impl_uremhh(i8 zeroext, i8 zeroe + } + + ; Function Attrs: noinline nounwind +-define dso_local <1 x i8> @_Z24__cm_intrinsic_impl_uremu2CMvb1_hS_(<1 x i8>, <1 x i8>) #21 { ++define internal <1 x i8> @_Z24__cm_intrinsic_impl_uremu2CMvb1_hS_(<1 x i8>, <1 x i8>) #21 { + %3 = alloca <1 x i8>, align 1 + %4 = alloca <1 x i8>, align 1 + %5 = alloca <1 x i8>, align 1 +@@ -3130,7 +3130,7 @@ define dso_local <1 x i8> @_Z24__cm_intrinsic_impl_uremu2CMvb1_hS_(<1 x i8>, <1 + } + + ; Function Attrs: noinline nounwind +-define dso_local <2 x i8> @_Z24__cm_intrinsic_impl_uremu2CMvb2_hS_(<2 x i8>, <2 x i8>) #22 { ++define internal <2 x i8> @_Z24__cm_intrinsic_impl_uremu2CMvb2_hS_(<2 x i8>, <2 x i8>) #22 { + %3 = alloca <2 x i8>, align 2 + %4 = alloca <2 x i8>, align 2 + %5 = alloca <2 x i8>, align 2 +@@ -3144,7 +3144,7 @@ define dso_local <2 x i8> @_Z24__cm_intrinsic_impl_uremu2CMvb2_hS_(<2 x i8>, <2 + } + + ; Function Attrs: noinline nounwind +-define dso_local <4 x i8> @_Z24__cm_intrinsic_impl_uremu2CMvb4_hS_(<4 x i8>, <4 x i8>) #14 { ++define internal <4 x i8> @_Z24__cm_intrinsic_impl_uremu2CMvb4_hS_(<4 x i8>, <4 x i8>) #14 { + %3 = alloca <4 x i8>, align 4 + %4 = alloca <4 x i8>, align 4 + %5 = alloca <4 x i8>, align 4 +@@ -3158,7 +3158,7 @@ define dso_local <4 x i8> @_Z24__cm_intrinsic_impl_uremu2CMvb4_hS_(<4 x i8>, <4 + } + + ; Function Attrs: noinline nounwind +-define dso_local <8 x i8> @_Z24__cm_intrinsic_impl_uremu2CMvb8_hS_(<8 x i8>, <8 x i8>) #15 { ++define internal <8 x i8> @_Z24__cm_intrinsic_impl_uremu2CMvb8_hS_(<8 x i8>, <8 x i8>) #15 { + %3 = alloca <8 x i8>, align 8 + %4 = alloca <8 x i8>, align 8 + %5 = alloca <8 x i8>, align 8 +@@ -3172,7 +3172,7 @@ define dso_local <8 x i8> @_Z24__cm_intrinsic_impl_uremu2CMvb8_hS_(<8 x i8>, <8 + } + + ; Function Attrs: noinline nounwind +-define dso_local <16 x i8> @_Z24__cm_intrinsic_impl_uremu2CMvb16_hS_(<16 x i8>, <16 x i8>) #16 { ++define internal <16 x i8> @_Z24__cm_intrinsic_impl_uremu2CMvb16_hS_(<16 x i8>, <16 x i8>) #16 { + %3 = alloca <16 x i8>, align 16 + %4 = alloca <16 x i8>, align 16 + %5 = alloca <16 x i8>, align 16 +@@ -3186,7 +3186,7 @@ define dso_local <16 x i8> @_Z24__cm_intrinsic_impl_uremu2CMvb16_hS_(<16 x i8>, + } + + ; Function Attrs: noinline nounwind +-define dso_local <32 x i8> @_Z24__cm_intrinsic_impl_uremu2CMvb32_hS_(<32 x i8>, <32 x i8>) #17 { ++define internal <32 x i8> @_Z24__cm_intrinsic_impl_uremu2CMvb32_hS_(<32 x i8>, <32 x i8>) #17 { + %3 = alloca <32 x i8>, align 32 + %4 = alloca <32 x i8>, align 32 + %5 = alloca <32 x i8>, align 32 +@@ -3200,7 +3200,7 @@ define dso_local <32 x i8> @_Z24__cm_intrinsic_impl_uremu2CMvb32_hS_(<32 x i8>, + } + + ; Function Attrs: noinline nounwind +-define dso_local zeroext i16 @_Z24__cm_intrinsic_impl_udivtt(i16 zeroext, i16 zeroext) #22 { ++define internal zeroext i16 @_Z24__cm_intrinsic_impl_udivtt(i16 zeroext, i16 zeroext) #22 { + %3 = alloca i16, align 2 + %4 = alloca i16, align 2 + %5 = alloca <1 x i16>, align 2 +@@ -3278,7 +3278,7 @@ define internal <1 x i16> @_ZN7details14__impl_udivremILi1EEEu2CMvbT__tS1_S1_u2C + } + + ; Function Attrs: noinline nounwind +-define dso_local <1 x i16> @_Z24__cm_intrinsic_impl_udivu2CMvb1_tS_(<1 x i16>, <1 x i16>) #22 { ++define internal <1 x i16> @_Z24__cm_intrinsic_impl_udivu2CMvb1_tS_(<1 x i16>, <1 x i16>) #22 { + %3 = alloca <1 x i16>, align 2 + %4 = alloca <1 x i16>, align 2 + %5 = alloca <1 x i16>, align 2 +@@ -3294,7 +3294,7 @@ define dso_local <1 x i16> @_Z24__cm_intrinsic_impl_udivu2CMvb1_tS_(<1 x i16>, < + } + + ; Function Attrs: noinline nounwind +-define dso_local <2 x i16> @_Z24__cm_intrinsic_impl_udivu2CMvb2_tS_(<2 x i16>, <2 x i16>) #14 { ++define internal <2 x i16> @_Z24__cm_intrinsic_impl_udivu2CMvb2_tS_(<2 x i16>, <2 x i16>) #14 { + %3 = alloca <2 x i16>, align 4 + %4 = alloca <2 x i16>, align 4 + %5 = alloca <2 x i16>, align 4 +@@ -3360,7 +3360,7 @@ define internal <2 x i16> @_ZN7details14__impl_udivremILi2EEEu2CMvbT__tS1_S1_u2C + } + + ; Function Attrs: noinline nounwind +-define dso_local <4 x i16> @_Z24__cm_intrinsic_impl_udivu2CMvb4_tS_(<4 x i16>, <4 x i16>) #15 { ++define internal <4 x i16> @_Z24__cm_intrinsic_impl_udivu2CMvb4_tS_(<4 x i16>, <4 x i16>) #15 { + %3 = alloca <4 x i16>, align 8 + %4 = alloca <4 x i16>, align 8 + %5 = alloca <4 x i16>, align 8 +@@ -3558,7 +3558,7 @@ define internal <16 x i16> @_ZN7details14__impl_udivremILi16EEEu2CMvbT__tS1_S1_u + } + + ; Function Attrs: noinline nounwind +-define dso_local <32 x i16> @_Z24__cm_intrinsic_impl_udivu2CMvb32_tS_(<32 x i16>, <32 x i16>) #18 { ++define internal <32 x i16> @_Z24__cm_intrinsic_impl_udivu2CMvb32_tS_(<32 x i16>, <32 x i16>) #18 { + %3 = alloca <32 x i16>, align 64 + %4 = alloca <32 x i16>, align 64 + %5 = alloca <32 x i16>, align 64 +@@ -3624,7 +3624,7 @@ define internal <32 x i16> @_ZN7details14__impl_udivremILi32EEEu2CMvbT__tS1_S1_u + } + + ; Function Attrs: noinline nounwind +-define dso_local zeroext i16 @_Z24__cm_intrinsic_impl_uremtt(i16 zeroext, i16 zeroext) #22 { ++define internal zeroext i16 @_Z24__cm_intrinsic_impl_uremtt(i16 zeroext, i16 zeroext) #22 { + %3 = alloca i16, align 2 + %4 = alloca i16, align 2 + %5 = alloca <1 x i16>, align 2 +@@ -3650,7 +3650,7 @@ define dso_local zeroext i16 @_Z24__cm_intrinsic_impl_uremtt(i16 zeroext, i16 ze + } + + ; Function Attrs: noinline nounwind +-define dso_local <1 x i16> @_Z24__cm_intrinsic_impl_uremu2CMvb1_tS_(<1 x i16>, <1 x i16>) #22 { ++define internal <1 x i16> @_Z24__cm_intrinsic_impl_uremu2CMvb1_tS_(<1 x i16>, <1 x i16>) #22 { + %3 = alloca <1 x i16>, align 2 + %4 = alloca <1 x i16>, align 2 + %5 = alloca <1 x i16>, align 2 +@@ -3664,7 +3664,7 @@ define dso_local <1 x i16> @_Z24__cm_intrinsic_impl_uremu2CMvb1_tS_(<1 x i16>, < + } + + ; Function Attrs: noinline nounwind +-define dso_local <2 x i16> @_Z24__cm_intrinsic_impl_uremu2CMvb2_tS_(<2 x i16>, <2 x i16>) #14 { ++define internal <2 x i16> @_Z24__cm_intrinsic_impl_uremu2CMvb2_tS_(<2 x i16>, <2 x i16>) #14 { + %3 = alloca <2 x i16>, align 4 + %4 = alloca <2 x i16>, align 4 + %5 = alloca <2 x i16>, align 4 +@@ -3678,7 +3678,7 @@ define dso_local <2 x i16> @_Z24__cm_intrinsic_impl_uremu2CMvb2_tS_(<2 x i16>, < + } + + ; Function Attrs: noinline nounwind +-define dso_local <4 x i16> @_Z24__cm_intrinsic_impl_uremu2CMvb4_tS_(<4 x i16>, <4 x i16>) #15 { ++define internal <4 x i16> @_Z24__cm_intrinsic_impl_uremu2CMvb4_tS_(<4 x i16>, <4 x i16>) #15 { + %3 = alloca <4 x i16>, align 8 + %4 = alloca <4 x i16>, align 8 + %5 = alloca <4 x i16>, align 8 +@@ -3692,7 +3692,7 @@ define dso_local <4 x i16> @_Z24__cm_intrinsic_impl_uremu2CMvb4_tS_(<4 x i16>, < + } + + ; Function Attrs: noinline nounwind +-define dso_local <8 x i16> @_Z24__cm_intrinsic_impl_uremu2CMvb8_tS_(<8 x i16>, <8 x i16>) #16 { ++define internal <8 x i16> @_Z24__cm_intrinsic_impl_uremu2CMvb8_tS_(<8 x i16>, <8 x i16>) #16 { + %3 = alloca <8 x i16>, align 16 + %4 = alloca <8 x i16>, align 16 + %5 = alloca <8 x i16>, align 16 +@@ -3706,7 +3706,7 @@ define dso_local <8 x i16> @_Z24__cm_intrinsic_impl_uremu2CMvb8_tS_(<8 x i16>, < + } + + ; Function Attrs: noinline nounwind +-define dso_local <16 x i16> @_Z24__cm_intrinsic_impl_uremu2CMvb16_tS_(<16 x i16>, <16 x i16>) #17 { ++define internal <16 x i16> @_Z24__cm_intrinsic_impl_uremu2CMvb16_tS_(<16 x i16>, <16 x i16>) #17 { + %3 = alloca <16 x i16>, align 32 + %4 = alloca <16 x i16>, align 32 + %5 = alloca <16 x i16>, align 32 +@@ -3720,7 +3720,7 @@ define dso_local <16 x i16> @_Z24__cm_intrinsic_impl_uremu2CMvb16_tS_(<16 x i16> + } + + ; Function Attrs: noinline nounwind +-define dso_local <32 x i16> @_Z24__cm_intrinsic_impl_uremu2CMvb32_tS_(<32 x i16>, <32 x i16>) #18 { ++define internal <32 x i16> @_Z24__cm_intrinsic_impl_uremu2CMvb32_tS_(<32 x i16>, <32 x i16>) #18 { + %3 = alloca <32 x i16>, align 64 + %4 = alloca <32 x i16>, align 64 + %5 = alloca <32 x i16>, align 64 +@@ -3734,7 +3734,7 @@ define dso_local <32 x i16> @_Z24__cm_intrinsic_impl_uremu2CMvb32_tS_(<32 x i16> + } + + ; Function Attrs: noinline nounwind +-define dso_local i32 @_Z24__cm_intrinsic_impl_udivjj(i32, i32) #14 { ++define internal i32 @_Z24__cm_intrinsic_impl_udivjj(i32, i32) #14 { + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca <1 x i32>, align 4 +@@ -3892,7 +3892,7 @@ define internal <1 x i32> @_ZN7details14__impl_udivremILi1EEEu2CMvbT__jS1_S1_u2C + } + + ; Function Attrs: noinline nounwind +-define dso_local <1 x i32> @_Z24__cm_intrinsic_impl_udivu2CMvb1_jS_(<1 x i32>, <1 x i32>) #14 { ++define internal <1 x i32> @_Z24__cm_intrinsic_impl_udivu2CMvb1_jS_(<1 x i32>, <1 x i32>) #14 { + %3 = alloca <1 x i32>, align 4 + %4 = alloca <1 x i32>, align 4 + %5 = alloca <1 x i32>, align 4 +@@ -3908,7 +3908,7 @@ define dso_local <1 x i32> @_Z24__cm_intrinsic_impl_udivu2CMvb1_jS_(<1 x i32>, < + } + + ; Function Attrs: noinline nounwind +-define dso_local <2 x i32> @_Z24__cm_intrinsic_impl_udivu2CMvb2_jS_(<2 x i32>, <2 x i32>) #15 { ++define internal <2 x i32> @_Z24__cm_intrinsic_impl_udivu2CMvb2_jS_(<2 x i32>, <2 x i32>) #15 { + %3 = alloca <2 x i32>, align 8 + %4 = alloca <2 x i32>, align 8 + %5 = alloca <2 x i32>, align 8 +@@ -4054,7 +4054,7 @@ define internal <2 x i32> @_ZN7details14__impl_udivremILi2EEEu2CMvbT__jS1_S1_u2C + } + + ; Function Attrs: noinline nounwind +-define dso_local <4 x i32> @_Z24__cm_intrinsic_impl_udivu2CMvb4_jS_(<4 x i32>, <4 x i32>) #16 { ++define internal <4 x i32> @_Z24__cm_intrinsic_impl_udivu2CMvb4_jS_(<4 x i32>, <4 x i32>) #16 { + %3 = alloca <4 x i32>, align 16 + %4 = alloca <4 x i32>, align 16 + %5 = alloca <4 x i32>, align 16 +@@ -4492,7 +4492,7 @@ define internal <16 x i32> @_ZN7details14__impl_udivremILi16EEEu2CMvbT__jS1_S1_u + } + + ; Function Attrs: noinline nounwind +-define dso_local <32 x i32> @_Z24__cm_intrinsic_impl_udivu2CMvb32_jS_(<32 x i32>, <32 x i32>) #19 { ++define internal <32 x i32> @_Z24__cm_intrinsic_impl_udivu2CMvb32_jS_(<32 x i32>, <32 x i32>) #19 { + %3 = alloca <32 x i32>, align 128 + %4 = alloca <32 x i32>, align 128 + %5 = alloca <32 x i32>, align 128 +@@ -4638,7 +4638,7 @@ define internal <32 x i32> @_ZN7details14__impl_udivremILi32EEEu2CMvbT__jS1_S1_u + } + + ; Function Attrs: noinline nounwind +-define dso_local i32 @_Z24__cm_intrinsic_impl_uremjj(i32, i32) #14 { ++define internal i32 @_Z24__cm_intrinsic_impl_uremjj(i32, i32) #14 { + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + %5 = alloca <1 x i32>, align 4 +@@ -4664,7 +4664,7 @@ define dso_local i32 @_Z24__cm_intrinsic_impl_uremjj(i32, i32) #14 { + } + + ; Function Attrs: noinline nounwind +-define dso_local <1 x i32> @_Z24__cm_intrinsic_impl_uremu2CMvb1_jS_(<1 x i32>, <1 x i32>) #14 { ++define internal <1 x i32> @_Z24__cm_intrinsic_impl_uremu2CMvb1_jS_(<1 x i32>, <1 x i32>) #14 { + %3 = alloca <1 x i32>, align 4 + %4 = alloca <1 x i32>, align 4 + %5 = alloca <1 x i32>, align 4 +@@ -4678,7 +4678,7 @@ define dso_local <1 x i32> @_Z24__cm_intrinsic_impl_uremu2CMvb1_jS_(<1 x i32>, < + } + + ; Function Attrs: noinline nounwind +-define dso_local <2 x i32> @_Z24__cm_intrinsic_impl_uremu2CMvb2_jS_(<2 x i32>, <2 x i32>) #15 { ++define internal <2 x i32> @_Z24__cm_intrinsic_impl_uremu2CMvb2_jS_(<2 x i32>, <2 x i32>) #15 { + %3 = alloca <2 x i32>, align 8 + %4 = alloca <2 x i32>, align 8 + %5 = alloca <2 x i32>, align 8 +@@ -4692,7 +4692,7 @@ define dso_local <2 x i32> @_Z24__cm_intrinsic_impl_uremu2CMvb2_jS_(<2 x i32>, < + } + + ; Function Attrs: noinline nounwind +-define dso_local <4 x i32> @_Z24__cm_intrinsic_impl_uremu2CMvb4_jS_(<4 x i32>, <4 x i32>) #16 { ++define internal <4 x i32> @_Z24__cm_intrinsic_impl_uremu2CMvb4_jS_(<4 x i32>, <4 x i32>) #16 { + %3 = alloca <4 x i32>, align 16 + %4 = alloca <4 x i32>, align 16 + %5 = alloca <4 x i32>, align 16 +@@ -4706,7 +4706,7 @@ define dso_local <4 x i32> @_Z24__cm_intrinsic_impl_uremu2CMvb4_jS_(<4 x i32>, < + } + + ; Function Attrs: noinline nounwind +-define dso_local <8 x i32> @_Z24__cm_intrinsic_impl_uremu2CMvb8_jS_(<8 x i32>, <8 x i32>) #17 { ++define internal <8 x i32> @_Z24__cm_intrinsic_impl_uremu2CMvb8_jS_(<8 x i32>, <8 x i32>) #17 { + %3 = alloca <8 x i32>, align 32 + %4 = alloca <8 x i32>, align 32 + %5 = alloca <8 x i32>, align 32 +@@ -4720,7 +4720,7 @@ define dso_local <8 x i32> @_Z24__cm_intrinsic_impl_uremu2CMvb8_jS_(<8 x i32>, < + } + + ; Function Attrs: noinline nounwind +-define dso_local <16 x i32> @_Z24__cm_intrinsic_impl_uremu2CMvb16_jS_(<16 x i32>, <16 x i32>) #18 { ++define internal <16 x i32> @_Z24__cm_intrinsic_impl_uremu2CMvb16_jS_(<16 x i32>, <16 x i32>) #18 { + %3 = alloca <16 x i32>, align 64 + %4 = alloca <16 x i32>, align 64 + %5 = alloca <16 x i32>, align 64 +@@ -4734,7 +4734,7 @@ define dso_local <16 x i32> @_Z24__cm_intrinsic_impl_uremu2CMvb16_jS_(<16 x i32> + } + + ; Function Attrs: noinline nounwind +-define dso_local <32 x i32> @_Z24__cm_intrinsic_impl_uremu2CMvb32_jS_(<32 x i32>, <32 x i32>) #19 { ++define internal <32 x i32> @_Z24__cm_intrinsic_impl_uremu2CMvb32_jS_(<32 x i32>, <32 x i32>) #19 { + %3 = alloca <32 x i32>, align 128 + %4 = alloca <32 x i32>, align 128 + %5 = alloca <32 x i32>, align 128 +@@ -4748,7 +4748,7 @@ define dso_local <32 x i32> @_Z24__cm_intrinsic_impl_uremu2CMvb32_jS_(<32 x i32> + } + + ; Function Attrs: noinline nounwind +-define dso_local void @__do_print_cm(i8*, i8*, i32, i64, i32*, i32, i32) #23 !dbg !15 { ++define internal void @__do_print_cm(i8*, i8*, i32, i64, i32*, i32, i32) #23 !dbg !15 { + %8 = alloca i8*, align 4 + %9 = alloca i8*, align 4 + %10 = alloca i32, align 4 +@@ -4988,7 +4988,7 @@ define internal void @_ZN7details16_cm_print_formatILi128EEEv15cm_surfaceindexju + } + + ; Function Attrs: noinline nounwind +-define dso_local void @__do_print_lz(i32, i8*, i32, i64, i32*, i32, i32, i32, i32) #26 !dbg !117 { ++define internal void @__do_print_lz(i32, i8*, i32, i64, i32*, i32, i32, i32, i32) #26 !dbg !117 { + %10 = alloca i32, align 4 + %11 = alloca i8*, align 4 + %12 = alloca i32, align 4 +@@ -5069,7 +5069,7 @@ define dso_local void @__do_print_lz(i32, i8*, i32, i64, i32*, i32, i32, i32, i3 + %67 = load i64, i64* %13, align 8, !dbg !159, !tbaa !19 + %68 = load <5 x i32>, <5 x i32>* %21, align 32, !dbg !160, !tbaa !7 + call void @_ZN13VaryingWriterC2ERjRPKjiyu2CMvb5_i(%class.VaryingWriter* %23, i32* dereferenceable(4) %19, i32** dereferenceable(4) %14, i32 %66, i64 %67, <5 x i32> %68), !dbg !161 +- %69 = call zeroext i1 @_ZN9PrintInfo14switchEncodingI13UniformWriter13VaryingWriterEEbNS_8EncodingET_T0_(i8 signext %62, %class.UniformWriter* byval align 32 %22, %class.VaryingWriter* byval align 32 %23), !dbg !162 ++ %69 = call zeroext i1 @_ZN9PrintInfo14switchEncodingI13UniformWriter13VaryingWriterEEbNS_8EncodingET_T0_(i8 signext %62, %class.UniformWriter* byval(%class.UniformWriter) align 32 %22, %class.VaryingWriter* byval(%class.VaryingWriter) align 32 %23), !dbg !162 + br label %70, !dbg !163 + + ;