diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index 412683fd968b0..bab2b1835194e 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -5763,6 +5763,8 @@ def mfp16 : Flag<["-"], "mfp16">, Group; def mno_fp16 : Flag<["-"], "mno-fp16">, Group; def mgc : Flag<["-"], "mgc">, Group; def mno_gc : Flag<["-"], "mno-gc">, Group; +def mlibcall_thread_context : Joined<["-"], "mlibcall-thread-context">, Group; +def mno_libcall_thread_context : Joined<["-"], "mno-libcall-thread-context">, Group; def mmultimemory : Flag<["-"], "mmultimemory">, Group; def mno_multimemory : Flag<["-"], "mno-multimemory">, Group; def mmultivalue : Flag<["-"], "mmultivalue">, Group; diff --git a/clang/lib/Basic/Targets/WebAssembly.cpp b/clang/lib/Basic/Targets/WebAssembly.cpp index ba3ff11ad4ea9..ea5b06e4a6fde 100644 --- a/clang/lib/Basic/Targets/WebAssembly.cpp +++ b/clang/lib/Basic/Targets/WebAssembly.cpp @@ -123,6 +123,8 @@ void WebAssemblyTargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__wasm_tail_call__"); if (HasWideArithmetic) Builder.defineMacro("__wasm_wide_arithmetic__"); + if (HasLibcallThreadContext) + Builder.defineMacro("__wasm_libcall_thread_context__"); // Note that not all wasm features appear here. For example, // HasCompatctImports @@ -386,6 +388,14 @@ bool WebAssemblyTargetInfo::handleTargetFeatures( HasWideArithmetic = false; continue; } + if (Feature == "+libcall-thread-context") { + HasLibcallThreadContext = true; + continue; + } + if (Feature == "-libcall-thread-context") { + HasLibcallThreadContext = false; + continue; + } Diags.Report(diag::err_opt_not_valid_with_opt) << Feature << "-target-feature"; diff --git a/clang/lib/Basic/Targets/WebAssembly.h b/clang/lib/Basic/Targets/WebAssembly.h index 808342485cad0..9199dc9f79cd7 100644 --- a/clang/lib/Basic/Targets/WebAssembly.h +++ b/clang/lib/Basic/Targets/WebAssembly.h @@ -68,6 +68,7 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyTargetInfo : public TargetInfo { bool HasExtendedConst = false; bool HasFP16 = false; bool HasGC = false; + bool HasLibcallThreadContext = false; bool HasMultiMemory = false; bool HasMultivalue = false; bool HasMutableGlobals = false; diff --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp b/clang/lib/Driver/ToolChains/WebAssembly.cpp index e532ef0743cc2..6a1e7b6653348 100644 --- a/clang/lib/Driver/ToolChains/WebAssembly.cpp +++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp @@ -88,6 +88,16 @@ static bool WantsPthread(const llvm::Triple &Triple, const ArgList &Args) { return WantsPthread; } +static bool WantsLibcallThreadContext(const llvm::Triple &Triple, + const ArgList &Args) { + // If the target is WASIP3, then enable the + // libcall-thread-context feature by default, unless explicitly + // disabled. + return Triple.getOS() == llvm::Triple::WASIp3 && + Args.hasFlag(options::OPT_mlibcall_thread_context, + options::OPT_mno_libcall_thread_context, true); +} + void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, @@ -169,6 +179,9 @@ void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA, AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); + if (WantsLibcallThreadContext(ToolChain.getTriple(), Args)) + CmdArgs.push_back("--libcall-thread-context"); + if (WantsPthread(ToolChain.getTriple(), Args)) CmdArgs.push_back("--shared-memory"); @@ -321,6 +334,11 @@ void WebAssembly::addClangTargetOptions(const ArgList &DriverArgs, options::OPT_fno_use_init_array, true)) CC1Args.push_back("-fno-use-init-array"); + if (WantsLibcallThreadContext(getTriple(), DriverArgs)) { + CC1Args.push_back("-target-feature"); + CC1Args.push_back("+libcall-thread-context"); + } + // '-pthread' implies atomics, bulk-memory, mutable-globals, and sign-ext if (WantsPthread(getTriple(), DriverArgs)) { if (DriverArgs.hasFlag(options::OPT_mno_atomics, options::OPT_matomics, diff --git a/clang/test/Driver/wasm-features.c b/clang/test/Driver/wasm-features.c index 9e523aa5b53b6..5b6fa980854f3 100644 --- a/clang/test/Driver/wasm-features.c +++ b/clang/test/Driver/wasm-features.c @@ -118,3 +118,9 @@ // COMPACT-IMPORTS: "-target-feature" "+compact-imports" // NO-COMPACT-IMPORTS: "-target-feature" "-compact-imports" + +// RUN: %clang --target=wasm32-unknown-unknown -### %s -mlibcall-thread-context 2>&1 | FileCheck %s -check-prefix=LIBCALL-THREAD-CONTEXT +// RUN: %clang --target=wasm32-unknown-unknown -### %s -mno-libcall-thread-context 2>&1 | FileCheck %s -check-prefix=NO-LIBCALL-THREAD-CONTEXT + +// LIBCALL-THREAD-CONTEXT: "-target-feature" "+libcall-thread-context" +// NO-LIBCALL-THREAD-CONTEXT: "-target-feature" "-libcall-thread-context" diff --git a/clang/test/Driver/wasm-toolchain.c b/clang/test/Driver/wasm-toolchain.c index 29a94aeec77a9..d7b2e2d5a279d 100644 --- a/clang/test/Driver/wasm-toolchain.c +++ b/clang/test/Driver/wasm-toolchain.c @@ -303,3 +303,15 @@ // RUN: | FileCheck -check-prefix=LINK_WALI_BASIC %s // LINK_WALI_BASIC: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" // LINK_WALI_BASIC: wasm-ld{{.*}}" "-L/foo/lib/wasm32-linux-muslwali" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" + +// `wasm32-wasip3` passes `+libcall-thread-context` by default. + +// RUN: %clang -### --target=wasm32-wasip3 --sysroot=/foo %s 2>&1 \ +// RUN: | FileCheck -check-prefix=LINK_WASIP3_LIBCALL_THREAD_CONTEXT %s +// LINK_WASIP3_LIBCALL_THREAD_CONTEXT: "-cc1" {{.*}} "-target-feature" "+libcall-thread-context" + +// `wasm32-wasip3` does not pass `+libcall-thread-context` when `-mno-libcall-thread-context` is used. + +// RUN: %clang -### --target=wasm32-wasip3 --sysroot=/foo -mno-libcall-thread-context %s 2>&1 \ +// RUN: | FileCheck -check-prefix=LINK_WASIP3_NO_LIBCALL_THREAD_CONTEXT %s +// LINK_WASIP3_NO_LIBCALL_THREAD_CONTEXT: "-cc1" {{.*}} "-target-feature" "-libcall-thread-context" diff --git a/clang/test/Preprocessor/wasm-target-features.c b/clang/test/Preprocessor/wasm-target-features.c index 3244fa61c0a4b..7bb9429be507b 100644 --- a/clang/test/Preprocessor/wasm-target-features.c +++ b/clang/test/Preprocessor/wasm-target-features.c @@ -251,3 +251,12 @@ // RUN: | FileCheck %s -check-prefix=BLEEDING-EDGE-NO-SIMD128 // // BLEEDING-EDGE-NO-SIMD128-NOT: #define __wasm_simd128__ 1{{$}} + +// RUN: %clang -E -dM %s -o - 2>&1 \ +// RUN: -target wasm32-unknown-unknown -mlibcall-thread-context \ +// RUN: | FileCheck %s -check-prefix=LIBCALL-THREAD-CONTEXT +// RUN: %clang -E -dM %s -o - 2>&1 \ +// RUN: -target wasm64-unknown-unknown -mlibcall-thread-context \ +// RUN: | FileCheck %s -check-prefix=LIBCALL-THREAD-CONTEXT + +// LIBCALL-THREAD-CONTEXT: #define __wasm_libcall_thread_context__ 1{{$}} diff --git a/lld/test/wasm/stack-pointer-abi.s b/lld/test/wasm/stack-pointer-abi.s new file mode 100644 index 0000000000000..63f3f5302966b --- /dev/null +++ b/lld/test/wasm/stack-pointer-abi.s @@ -0,0 +1,32 @@ +# RUN: split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t/use.o %t/use.s +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t/disallow.o %t/disallow.s +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t/start.o %t/start.s +# RUN: wasm-ld --libcall-thread-context -o %t/libcall.wasm %t/use.o %t/start.o +# RUN: obj2yaml %t/libcall.wasm | FileCheck %s --check-prefix=LIBCALL +# RUN: wasm-ld -o %t/global.wasm %t/disallow.o %t/start.o +# RUN: obj2yaml %t/global.wasm | FileCheck %s --check-prefix=GLOBAL + +#--- start.s + .globl _start +_start: + .functype _start () -> () + end_function + +#--- disallow.s +.section .custom_section.target_features,"",@ + .int8 1 + .int8 45 + .int8 22 + .ascii "libcall-thread-context" + +#--- use.s + +.section .custom_section.target_features,"",@ + .int8 1 + .int8 43 + .int8 22 + .ascii "libcall-thread-context" + +# LIBCALL: Name: __init_stack_pointer +# GLOBAL: Name: __stack_pointer diff --git a/lld/test/wasm/thread-context-abi-mismatch.s b/lld/test/wasm/thread-context-abi-mismatch.s new file mode 100644 index 0000000000000..a5bf661b11c40 --- /dev/null +++ b/lld/test/wasm/thread-context-abi-mismatch.s @@ -0,0 +1,48 @@ +# Test that linking object files with mismatched thread context ABIs fails with an error. + +# RUN: split-file %s %t + +# Test that the presence of an import of __stack_pointer from the env module is treated +# as an indication that the global thread context ABI is being used, even if the +# libcall-thread-context feature is not disallowed. + +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t/start.o %t/start.s +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t/stack-pointer.o %t/stack-pointer.s +# RUN: not wasm-ld --libcall-thread-context %t/start.o %t/stack-pointer.o -o %t/fail.wasm 2>&1 | FileCheck %s + +# Test that explicitly disallowing the libcall-thread-context feature causes linking to fail +# with an error when other files use the feature. + +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t/disallow.o %t/disallow.s +# RUN: not wasm-ld --libcall-thread-context %t/start.o %t/disallow.o -o %t/fail.wasm 2>&1 | FileCheck %s + +# CHECK: error: --libcall-thread-context is disallowed by {{.*}} because it uses globals for thread context rather than library function calls. + +#--- start.s +.globl _start +_start: + .functype _start () -> () + end_function + +.section .custom_section.target_features,"",@ + .int8 1 + .int8 43 + .int8 22 + .ascii "libcall-thread-context" + +#--- stack-pointer.s +.globaltype __stack_pointer, i32 + +.globl use_stack_pointer +use_stack_pointer: + .functype use_stack_pointer () -> () + global.get __stack_pointer + drop + end_function + +#--- disallow.s +.section .custom_section.target_features,"",@ + .int8 1 + .int8 45 + .int8 22 + .ascii "libcall-thread-context" diff --git a/lld/test/wasm/tls-libcall.s b/lld/test/wasm/tls-libcall.s new file mode 100644 index 0000000000000..4e9c547c11196 --- /dev/null +++ b/lld/test/wasm/tls-libcall.s @@ -0,0 +1,74 @@ +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s +# RUN: wasm-ld --libcall-thread-context --shared-memory -no-gc-sections -o %t.wasm %t.o +# RUN: obj2yaml %t.wasm | FileCheck %s +# RUN: llvm-objdump -d --no-print-imm-hex --no-show-raw-insn %t.wasm | FileCheck %s --check-prefix=DIS + +.globl __wasm_get_tls_base +__wasm_get_tls_base: + .functype __wasm_get_tls_base () -> (i32) + i32.const 0 + end_function + +.globl _start +_start: + .functype _start () -> (i32) + call __wasm_get_tls_base + i32.const tls1@TLSREL + i32.add + i32.load 0 + call __wasm_get_tls_base + i32.const tls2@TLSREL + i32.add + i32.load 0 + i32.add + end_function + +.section .tdata.tls1,"",@ +.globl tls1 +tls1: + .int32 1 + .size tls1, 4 + +.section .tdata.tls2,"",@ +.globl tls2 +tls2: + .int32 2 + .size tls2, 4 + +.section .custom_section.target_features,"",@ + .int8 3 + .int8 43 + .int8 22 + .ascii "libcall-thread-context" + .int8 43 + .int8 11 + .ascii "bulk-memory" + .int8 43 + .int8 7 + .ascii "atomics" + + +# CHECK: GlobalNames: +# CHECK-NEXT: - Index: 0 +# CHECK-NEXT: Name: __init_stack_pointer +# CHECK-NEXT: - Index: 1 +# CHECK-NEXT: Name: __init_tls_base +# CHECK-NEXT: - Index: 2 +# CHECK-NEXT: Name: __tls_size +# CHECK-NEXT: - Index: 3 +# CHECK-NEXT: Name: __tls_align + +# DIS-LABEL: <__wasm_init_memory>: + +# DIS-LABEL: <_start>: +# DIS-EMPTY: +# DIS-NEXT: call 4 +# DIS-NEXT: i32.const 0 +# DIS-NEXT: i32.add +# DIS-NEXT: i32.load 0 +# DIS-NEXT: call 4 +# DIS-NEXT: i32.const 4 +# DIS-NEXT: i32.add +# DIS-NEXT: i32.load 0 +# DIS-NEXT: i32.add +# DIS-NEXT: end diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h index 31e08e4e248a4..8408c1c6d2af3 100644 --- a/lld/wasm/Config.h +++ b/lld/wasm/Config.h @@ -35,6 +35,7 @@ class Symbol; class DefinedData; class GlobalSymbol; class DefinedFunction; +class UndefinedFunction; class DefinedGlobal; class UndefinedGlobal; class TableSymbol; @@ -65,6 +66,7 @@ struct Config { bool growableTable; bool gcSections; llvm::StringSet<> keepSections; + bool libcallThreadContext; std::optional> memoryImport; std::optional memoryExport; bool sharedMemory; @@ -252,6 +254,14 @@ struct Ctx { // Used as an address space for function pointers, with each function that // is used as a function pointer being allocated a slot. TableSymbol *indirectFunctionTable; + + // __wasm_set_tls_base + // Function used to set TLS base in libcall thread context modules. + UndefinedFunction *setTLSBase; + + // __wasm_get_tls_base + // Function used to get TLS base in libcall thread context modules. + UndefinedFunction *getTLSBase; }; WasmSym sym; diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index 508c6b9df90bd..45ec3af5cea21 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -562,6 +562,7 @@ static void readConfigs(opt::InputArgList &args) { ctx.arg.soName = args.getLastArgValue(OPT_soname); ctx.arg.importTable = args.hasArg(OPT_import_table); ctx.arg.importUndefined = args.hasArg(OPT_import_undefined); + ctx.arg.libcallThreadContext = args.hasArg(OPT_libcall_thread_context); ctx.arg.ltoo = args::getInteger(args, OPT_lto_O, 2); if (ctx.arg.ltoo > 3) error("invalid optimization level for LTO: " + Twine(ctx.arg.ltoo)); @@ -906,6 +907,18 @@ createUndefinedGlobal(StringRef name, llvm::wasm::WasmGlobalType *type) { return sym; } +static UndefinedFunction * +createUndefinedFunction(StringRef name, std::optional importName, + std::optional importModule, + WasmSignature *signature) { + auto *sym = cast(symtab->addUndefinedFunction( + name, importName, importModule, WASM_SYMBOL_UNDEFINED, nullptr, signature, + true)); + ctx.arg.allowUndefinedSymbols.insert(sym->getName()); + sym->isUsedInRegularObj = true; + return sym; +} + static InputGlobal *createGlobal(StringRef name, bool isMutable) { llvm::wasm::WasmGlobal wasmGlobal; bool is64 = ctx.arg.is64.value_or(false); @@ -940,17 +953,26 @@ static void createSyntheticSymbols() { true}; static llvm::wasm::WasmGlobalType mutableGlobalTypeI64 = {WASM_TYPE_I64, true}; + ctx.sym.callCtors = symtab->addSyntheticFunction( "__wasm_call_ctors", WASM_SYMBOL_VISIBILITY_HIDDEN, make(nullSignature, "__wasm_call_ctors")); bool is64 = ctx.arg.is64.value_or(false); + auto stack_pointer_name = + ctx.arg.libcallThreadContext ? "__init_stack_pointer" : "__stack_pointer"; if (ctx.isPic) { - ctx.sym.stackPointer = - createUndefinedGlobal("__stack_pointer", ctx.arg.is64.value_or(false) - ? &mutableGlobalTypeI64 - : &mutableGlobalTypeI32); + if (ctx.arg.libcallThreadContext) { + ctx.sym.stackPointer = createUndefinedGlobal( + stack_pointer_name, + ctx.arg.is64.value_or(false) ? &globalTypeI64 : &globalTypeI32); + } else { + ctx.sym.stackPointer = createUndefinedGlobal(stack_pointer_name, + ctx.arg.is64.value_or(false) + ? &mutableGlobalTypeI64 + : &mutableGlobalTypeI32); + } // For PIC code, we import two global variables (__memory_base and // __table_base) from the environment and use these as the offset at // which to load our static data and function table. @@ -963,14 +985,18 @@ static void createSyntheticSymbols() { ctx.sym.tableBase->markLive(); } else { // For non-PIC code - ctx.sym.stackPointer = createGlobalVariable("__stack_pointer", true); + ctx.sym.stackPointer = + createGlobalVariable(stack_pointer_name, !ctx.arg.libcallThreadContext); ctx.sym.stackPointer->markLive(); } if (ctx.arg.sharedMemory) { // TLS symbols are all hidden/dso-local + auto tls_base_name = + ctx.arg.libcallThreadContext ? "__init_tls_base" : "__tls_base"; ctx.sym.tlsBase = - createGlobalVariable("__tls_base", true, WASM_SYMBOL_VISIBILITY_HIDDEN); + createGlobalVariable(tls_base_name, !ctx.arg.libcallThreadContext, + WASM_SYMBOL_VISIBILITY_HIDDEN); ctx.sym.tlsSize = createGlobalVariable("__tls_size", false, WASM_SYMBOL_VISIBILITY_HIDDEN); ctx.sym.tlsAlign = createGlobalVariable("__tls_align", false, @@ -979,6 +1005,19 @@ static void createSyntheticSymbols() { "__wasm_init_tls", WASM_SYMBOL_VISIBILITY_HIDDEN, make(is64 ? i64ArgSignature : i32ArgSignature, "__wasm_init_tls")); + if (ctx.arg.libcallThreadContext) { + ctx.sym.tlsBase->markLive(); + ctx.sym.tlsSize->markLive(); + ctx.sym.tlsAlign->markLive(); + static WasmSignature setTLSBaseSignature{{}, {ValType::I32}}; + ctx.sym.setTLSBase = + createUndefinedFunction("__wasm_set_tls_base", std::nullopt, + std::nullopt, &setTLSBaseSignature); + static WasmSignature getTLSBaseSignature{{ValType::I32}, {}}; + ctx.sym.getTLSBase = + createUndefinedFunction("__wasm_get_tls_base", std::nullopt, + std::nullopt, &getTLSBaseSignature); + } } } diff --git a/lld/wasm/Options.td b/lld/wasm/Options.td index 33ecf03176d36..cac21fa070d56 100644 --- a/lld/wasm/Options.td +++ b/lld/wasm/Options.td @@ -238,6 +238,9 @@ def page_size: JJ<"page-size=">, def initial_memory: JJ<"initial-memory=">, HelpText<"Initial size of the linear memory">; +def libcall_thread_context: FF<"libcall-thread-context">, + HelpText<"Use library calls for thread context access instead of globals.">; + def max_memory: JJ<"max-memory=">, HelpText<"Maximum size of the linear memory">; diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp index 9ba43ef0fae0b..74ac9aa944772 100644 --- a/lld/wasm/SyntheticSections.cpp +++ b/lld/wasm/SyntheticSections.cpp @@ -52,6 +52,15 @@ class SubSection { raw_string_ostream os{body}; }; +void writeGetTLSBase(const Ctx &ctx, raw_ostream &os) { + if (ctx.arg.libcallThreadContext) { + writeU8(os, WASM_OPCODE_CALL, "call"); + writeUleb128(os, ctx.sym.getTLSBase->getFunctionIndex(), "function index"); + } else { + writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_SET"); + writeUleb128(os, ctx.sym.tlsBase->getGlobalIndex(), "__tls_base"); + } +} } // namespace bool DylinkSection::isNeeded() const { @@ -474,11 +483,12 @@ void GlobalSection::generateRelocationCode(raw_ostream &os, bool TLS) const { if (auto *d = dyn_cast(sym)) { // Get __memory_base - writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET"); if (sym->isTLS()) - writeUleb128(os, ctx.sym.tlsBase->getGlobalIndex(), "__tls_base"); - else + writeGetTLSBase(ctx, os); + else { + writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET"); writeUleb128(os, ctx.sym.memoryBase->getGlobalIndex(), "__memory_base"); + } // Add the virtual address of the data symbol writePtrConst(os, d->getVA(), is64, "offset"); @@ -519,7 +529,7 @@ void GlobalSection::writeBody() { // the correct runtime value during `__wasm_apply_global_relocs`. if (!ctx.arg.extendedConst && ctx.isPic && !sym->isTLS()) mutable_ = true; - // With multi-theadeding any TLS globals must be mutable since they get + // With multi-threading any TLS globals must be mutable since they get // set during `__wasm_apply_global_tls_relocs` if (ctx.arg.sharedMemory && sym->isTLS()) mutable_ = true; diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp index 128931513b215..c3eb03adf3921 100644 --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -123,6 +123,15 @@ class Writer { llvm::SmallDenseMap segmentMap; }; +void writeSetTLSBase(const Ctx &ctx, raw_ostream &os) { + if (ctx.arg.libcallThreadContext) { + writeU8(os, WASM_OPCODE_CALL, "call"); + writeUleb128(os, ctx.sym.setTLSBase->getFunctionIndex(), "function index"); + } else { + writeU8(os, WASM_OPCODE_GLOBAL_SET, "GLOBAL_SET"); + writeUleb128(os, ctx.sym.tlsBase->getGlobalIndex(), "__tls_base"); + } +} } // anonymous namespace void Writer::calculateCustomSections() { @@ -628,6 +637,21 @@ void Writer::populateTargetFeatures() { return segment->live && segment->isTLS(); }; tlsUsed = tlsUsed || llvm::any_of(file->segments, isTLS); + + // Older versions of LLVM will not disallow the `libcall-thread-context` + // feature when emitting globals for thread context, so we use the presence + // of an imported `__stack_pointer` symbol as a heuristic to detect this + // case and disallow the feature. + if (!disallowed.contains("libcall-thread-context") && + ctx.arg.libcallThreadContext) { + if (llvm::any_of(file->getSymbols(), [](const auto &sym) { + return sym && sym->getName() == "__stack_pointer" && + sym->kind() == Symbol::UndefinedGlobalKind && + sym->importModule && sym->importModule == "env"; + })) { + disallowed.insert({"libcall-thread-context", std::string(fileName)}); + } + } } if (inferFeatures) @@ -649,6 +673,15 @@ void Writer::populateTargetFeatures() { "' feature must be used in order to use shared memory"); } + // Special case for `libcall-thread-context` to give a more specific error + // message + if (ctx.arg.libcallThreadContext) + if (disallowed.contains("libcall-thread-context")) + error("--libcall-thread-context is disallowed by " + + disallowed["libcall-thread-context"] + + " because it uses globals for thread context rather than library " + "function calls."); + if (tlsUsed) { for (auto feature : {"atomics", "bulk-memory"}) if (!allowed.contains(feature)) @@ -673,7 +706,9 @@ void Writer::populateTargetFeatures() { if (feature.Prefix == WASM_FEATURE_PREFIX_DISALLOWED) continue; objectFeatures.insert(feature.Name); - if (disallowed.contains(feature.Name)) + // libcall-thread-context is handled as a special case above + if (disallowed.contains(feature.Name) && + feature.Name != "libcall-thread-context") error(Twine("Target feature '") + feature.Name + "' used in " + fileName + " is disallowed by " + disallowed[feature.Name] + ". Use --no-check-features to suppress."); @@ -1349,9 +1384,9 @@ void Writer::createInitMemoryFunction() { "i32.add"); } - // When we initialize the TLS segment we also set the `__tls_base` - // global. This allows the runtime to use this static copy of the - // TLS data for the first/main thread. + // When we initialize the TLS segment we also set the TLS base. + // This allows the runtime to use this + // static copy of the TLS data for the first/main thread. if (ctx.arg.sharedMemory && s->isTLS()) { if (ctx.isPic) { // Cache the result of the addionion in local 0 @@ -1360,8 +1395,7 @@ void Writer::createInitMemoryFunction() { } else { writePtrConst(os, s->startVA, is64, "destination address"); } - writeU8(os, WASM_OPCODE_GLOBAL_SET, "GLOBAL_SET"); - writeUleb128(os, ctx.sym.tlsBase->getGlobalIndex(), "__tls_base"); + writeSetTLSBase(ctx, os); if (ctx.isPic) { writeU8(os, WASM_OPCODE_LOCAL_GET, "local.tee"); writeUleb128(os, 1, "local 1"); @@ -1634,8 +1668,14 @@ void Writer::createInitTLSFunction() { writeU8(os, WASM_OPCODE_LOCAL_GET, "local.get"); writeUleb128(os, 0, "local index"); - writeU8(os, WASM_OPCODE_GLOBAL_SET, "global.set"); - writeUleb128(os, ctx.sym.tlsBase->getGlobalIndex(), "global index"); + if (ctx.arg.libcallThreadContext) { + writeU8(os, WASM_OPCODE_CALL, "call"); + writeUleb128(os, ctx.sym.setTLSBase->getFunctionIndex(), + "function index"); + } else { + writeU8(os, WASM_OPCODE_GLOBAL_SET, "global.set"); + writeUleb128(os, ctx.sym.tlsBase->getGlobalIndex(), "global index"); + } // FIXME(wvo): this local needs to be I64 in wasm64, or we need an extend // op. diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index 658e216992c01..f68e10704c71b 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -108,8 +108,9 @@ enum TOF { MO_MEMORY_BASE_REL, // On a symbol operand this indicates that the immediate is the symbol - // address relative the __tls_base wasm global. - // Only applicable to data symbols. + // address relative to the TLS base. This is retrieved through + // __wasm_get_tls_base() when using libcall thread context, and the __tls_base + // global otherwise. Only applicable to data symbols. MO_TLS_BASE_REL, // On a symbol operand this indicates that the immediate is the symbol diff --git a/llvm/lib/Target/WebAssembly/WebAssembly.td b/llvm/lib/Target/WebAssembly/WebAssembly.td index 06cf468d02eba..187f99e9df07d 100644 --- a/llvm/lib/Target/WebAssembly/WebAssembly.td +++ b/llvm/lib/Target/WebAssembly/WebAssembly.td @@ -37,6 +37,10 @@ def FeatureCallIndirectOverlong : SubtargetFeature<"call-indirect-overlong", "HasCallIndirectOverlong", "true", "Enable overlong encoding for call_indirect immediates">; +def FeatureLibcallThreadContext : + SubtargetFeature<"libcall-thread-context", "HasLibcallThreadContext", "true", + "Enable using library calls for managing thread context">; + def FeatureExceptionHandling : SubtargetFeature<"exception-handling", "HasExceptionHandling", "true", "Enable Wasm exception handling">; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index edea99e629407..fdfb33610d18e 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -233,11 +233,11 @@ MCSymbol *WebAssemblyAsmPrinter::getOrCreateWasmSymbol(StringRef Name) { // functions. It's OK to hardcode knowledge of specific symbols here; this // method is precisely there for fetching the signatures of known // Clang-provided symbols. - if (Name == "__stack_pointer" || Name == "__tls_base" || + if (Name == "__stack_pointer" || Name == "__init_stack_pointer" || + Name == "__tls_base" || Name == "__init_tls_base" || Name == "__memory_base" || Name == "__table_base" || Name == "__tls_size" || Name == "__tls_align") { - bool Mutable = - Name == "__stack_pointer" || Name == "__tls_base"; + bool Mutable = Name == "__stack_pointer" || Name == "__tls_base"; WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL); WasmSym->setGlobalType(wasm::WasmGlobalType{ uint8_t(Subtarget.hasAddr64() ? wasm::WASM_TYPE_I64 @@ -265,6 +265,14 @@ MCSymbol *WebAssemblyAsmPrinter::getOrCreateWasmSymbol(StringRef Name) { wasm::ValType AddrType = Subtarget.hasAddr64() ? wasm::ValType::I64 : wasm::ValType::I32; Params.push_back(AddrType); + } else if (Name == "__wasm_get_stack_pointer" || + Name == "__wasm_get_tls_base") { + WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); + Returns.push_back(wasm::ValType::I32); + } else if (Name == "__wasm_set_stack_pointer" || + Name == "__wasm_set_tls_base") { + WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); + Params.push_back(wasm::ValType::I32); } else { // Function symbols WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION); WebAssembly::getLibcallSignature(Subtarget, Name, Returns, Params); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp index 5a1779c2c80fb..180c8446f1a39 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp @@ -135,13 +135,19 @@ bool WebAssemblyFrameLowering::needsSPForLocalFrame( any_of(MRI.use_operands(getSPReg(MF)), [](MachineOperand &MO) { return !MO.isImplicit(); }); + // With libcall thread context, we need SP in the prolog when debug + // info is present so we can allocate a local for DWARF to reference. + bool NeedsSPForDebug = + MF.getFunction().getSubprogram() && + MF.getSubtarget().hasLibcallThreadContext(); + return MFI.getStackSize() || MFI.adjustsStack() || hasFP(MF) || - HasExplicitSPUse; + HasExplicitSPUse || NeedsSPForDebug; } // In function with EH pads, we need to make a copy of the value of -// __stack_pointer global in SP32/64 register, in order to use it when -// restoring __stack_pointer after an exception is caught. +// the stack pointer in the SP32/64 register, in order to use it when +// restoring the stack pointer after an exception is caught. bool WebAssemblyFrameLowering::needsPrologForEH( const MachineFunction &MF) const { auto EHType = MF.getTarget().getMCAsmInfo()->getExceptionHandlingType(); @@ -151,15 +157,16 @@ bool WebAssemblyFrameLowering::needsPrologForEH( /// Returns true if this function needs a local user-space stack pointer. /// Unlike a machine stack pointer, the wasm user stack pointer is a global -/// variable, so it is loaded into a register in the prolog. +/// variable or managed by library calls, so it is loaded +/// into a register in the prolog. bool WebAssemblyFrameLowering::needsSP(const MachineFunction &MF) const { return needsSPForLocalFrame(MF) || needsPrologForEH(MF); } /// Returns true if the local user-space stack pointer needs to be written back -/// to __stack_pointer global by this function (this is not meaningful if -/// needsSP is false). If false, the stack red zone can be used and only a local -/// SP is needed. +/// to the stack pointer global/thread context by this function (this is not +/// meaningful if needsSP is false). If false, the stack red zone can be used +/// and only a local SP is needed. bool WebAssemblyFrameLowering::needsSPWriteback( const MachineFunction &MF) const { auto &MFI = MF.getFrameInfo(); @@ -227,17 +234,25 @@ WebAssemblyFrameLowering::getOpcGlobSet(const MachineFunction &MF) { : WebAssembly::GLOBAL_SET_I32; } -void WebAssemblyFrameLowering::writeSPToGlobal( +void WebAssemblyFrameLowering::writeBackSP( unsigned SrcReg, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator &InsertStore, const DebugLoc &DL) const { const auto *TII = MF.getSubtarget().getInstrInfo(); - const char *ES = "__stack_pointer"; - auto *SPSymbol = MF.createExternalSymbolName(ES); + if (MF.getSubtarget().hasLibcallThreadContext()) { + const char *ES = "__wasm_set_stack_pointer"; + auto *SPSymbol = MF.createExternalSymbolName(ES); + BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::CALL)) + .addExternalSymbol(SPSymbol) + .addReg(SrcReg); + } else { + const char *ES = "__stack_pointer"; + auto *SPSymbol = MF.createExternalSymbolName(ES); - BuildMI(MBB, InsertStore, DL, TII->get(getOpcGlobSet(MF))) - .addExternalSymbol(SPSymbol) - .addReg(SrcReg); + BuildMI(MBB, InsertStore, DL, TII->get(getOpcGlobSet(MF))) + .addExternalSymbol(SPSymbol) + .addReg(SrcReg); + } } MachineBasicBlock::iterator @@ -251,7 +266,7 @@ WebAssemblyFrameLowering::eliminateCallFramePseudoInstr( if (I->getOpcode() == TII->getCallFrameDestroyOpcode() && needsSPWriteback(MF)) { DebugLoc DL = I->getDebugLoc(); - writeSPToGlobal(getSPReg(MF), MF, MBB, I, DL); + writeBackSP(getSPReg(MF), MF, MBB, I, DL); } return MBB.erase(I); } @@ -283,10 +298,17 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF, if (StackSize) SPReg = MRI.createVirtualRegister(PtrRC); - const char *ES = "__stack_pointer"; - auto *SPSymbol = MF.createExternalSymbolName(ES); - BuildMI(MBB, InsertPt, DL, TII->get(getOpcGlobGet(MF)), SPReg) - .addExternalSymbol(SPSymbol); + if (ST.hasLibcallThreadContext()) { + const char *ES = "__wasm_get_stack_pointer"; + auto *SPSymbol = MF.createExternalSymbolName(ES); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CALL), SPReg) + .addExternalSymbol(SPSymbol); + } else { + const char *ES = "__stack_pointer"; + auto *SPSymbol = MF.createExternalSymbolName(ES); + BuildMI(MBB, InsertPt, DL, TII->get(getOpcGlobGet(MF)), SPReg) + .addExternalSymbol(SPSymbol); + } bool HasBP = hasBP(MF); if (HasBP) { @@ -322,7 +344,7 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF, .addReg(getSPReg(MF)); } if (StackSize && needsSPWriteback(MF)) { - writeSPToGlobal(getSPReg(MF), MF, MBB, InsertPt, DL); + writeBackSP(getSPReg(MF), MF, MBB, InsertPt, DL); } } @@ -364,7 +386,7 @@ void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF, SPReg = SPFPReg; } - writeSPToGlobal(SPReg, MF, MBB, InsertPt, DL); + writeBackSP(SPReg, MF, MBB, InsertPt, DL); } bool WebAssemblyFrameLowering::isSupportedStackID( diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h index 710d5173d64db..f836f4e95a93b 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h @@ -23,7 +23,7 @@ class WebAssemblyFrameLowering final : public TargetFrameLowering { public: /// Size of the red zone for the user stack (leaf functions can use this much /// space below the stack pointer without writing it back to __stack_pointer - /// global). + /// global/__wasm_set_stack_pointer). // TODO: (ABI) Revisit and decide how large it should be. static const size_t RedZoneSize = 128; @@ -47,11 +47,10 @@ class WebAssemblyFrameLowering final : public TargetFrameLowering { bool needsPrologForEH(const MachineFunction &MF) const; - /// Write SP back to __stack_pointer global. - void writeSPToGlobal(unsigned SrcReg, MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator &InsertStore, - const DebugLoc &DL) const; + /// Write SP back to __stack_pointer global, or call __wasm_set_stack_pointer. + void writeBackSP(unsigned SrcReg, MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator &InsertStore, + const DebugLoc &DL) const; // Returns the index of the WebAssembly local to which the stack object // FrameIndex in MF should be allocated, or std::nullopt. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp index 4a63be7ce9e34..c50adcd1fc2db 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -337,10 +337,8 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) { MVT PtrVT = TLI.getPointerTy(CurDAG->getDataLayout()); switch (IntNo) { case Intrinsic::wasm_tls_base: { - MachineSDNode *TLSBase = CurDAG->getMachineNode( - GlobalGetIns, DL, PtrVT, MVT::Other, - CurDAG->getTargetExternalSymbol("__tls_base", PtrVT), - Node->getOperand(0)); + MachineSDNode *TLSBase = llvm::WebAssembly::getTLSBase( + *CurDAG, DL, Subtarget, Node->getOperand(0)); ReplaceNode(Node, TLSBase); return; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 47de46a6f7070..e069476256d96 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -2069,17 +2069,11 @@ WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op, model == GlobalValue::LocalDynamicTLSModel || (model == GlobalValue::GeneralDynamicTLSModel && getTargetMachine().shouldAssumeDSOLocal(GV))) { - // For DSO-local TLS variables we use offset from __tls_base + // For DSO-local TLS variables we use offset from __tls_base, or + // __wasm_get_tls_base() if using libcall thread context. MVT PtrVT = getPointerTy(DAG.getDataLayout()); - auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64 - : WebAssembly::GLOBAL_GET_I32; - const char *BaseName = MF.createExternalSymbolName("__tls_base"); - - SDValue BaseAddr( - DAG.getMachineNode(GlobalGet, DL, PtrVT, - DAG.getTargetExternalSymbol(BaseName, PtrVT)), - 0); + SDValue BaseAddr(WebAssembly::getTLSBase(DAG, DL, Subtarget), 0); SDValue TLSOffset = DAG.getTargetGlobalAddress( GV, DL, PtrVT, GA->getOffset(), WebAssemblyII::MO_TLS_BASE_REL); @@ -2265,14 +2259,7 @@ SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op, } case Intrinsic::thread_pointer: { - MVT PtrVT = getPointerTy(DAG.getDataLayout()); - auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64 - : WebAssembly::GLOBAL_GET_I32; - const char *TlsBase = MF.createExternalSymbolName("__tls_base"); - return SDValue( - DAG.getMachineNode(GlobalGet, DL, PtrVT, - DAG.getTargetExternalSymbol(TlsBase, PtrVT)), - 0); + return SDValue(WebAssembly::getTLSBase(DAG, DL, Subtarget), 0); } } } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td index 95b2021176b68..9f9e39aa17d3c 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -53,6 +53,11 @@ def HasFP16 : def HasGC : Predicate<"Subtarget->hasGC()">, AssemblerPredicate<(all_of FeatureGC), "gc">; +def HasLibcallThreadContext : + Predicate<"Subtarget->hasLibcallThreadContext()">, + AssemblerPredicate<(all_of FeatureLibcallThreadContext), + "libcall-thread-context">; + def HasMultiMemory : Predicate<"Subtarget->hasMultiMemory()">, AssemblerPredicate<(all_of FeatureMultiMemory), "multimemory">; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp index 8ac32f939c5f2..5e370ca4ada31 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp @@ -377,8 +377,8 @@ bool WebAssemblyLateEHPrepare::removeUnnecessaryUnreachables( } // After the stack is unwound due to a thrown exception, the __stack_pointer -// global can point to an invalid address. This inserts instructions that -// restore __stack_pointer global. +// global/__wasm_get_stack_pointer() can point to an invalid address. This +// inserts instructions that restore the stack pointer state. bool WebAssemblyLateEHPrepare::restoreStackPointer(MachineFunction &MF) { const auto *FrameLowering = static_cast( MF.getSubtarget().getFrameLowering()); @@ -391,11 +391,11 @@ bool WebAssemblyLateEHPrepare::restoreStackPointer(MachineFunction &MF) { continue; Changed = true; - // Insert __stack_pointer restoring instructions at the beginning of each EH + // Insert stack pointer restoring instructions at the beginning of each EH // pad, after the catch instruction. Here it is safe to assume that SP32 - // holds the latest value of __stack_pointer, because the only exception for - // this case is when a function uses the red zone, but that only happens - // with leaf functions, and we don't restore __stack_pointer in leaf + // holds the latest value of the stack pointer, because the only exception + // for this case is when a function uses the red zone, but that only happens + // with leaf functions, and we don't restore the stack pointer in leaf // functions anyway. auto InsertPos = MBB.begin(); // Skip EH_LABELs in the beginning of an EH pad if present. @@ -405,8 +405,8 @@ bool WebAssemblyLateEHPrepare::restoreStackPointer(MachineFunction &MF) { WebAssembly::isCatch(InsertPos->getOpcode()) && "catch/catch_all should be present in every EH pad at this point"); ++InsertPos; // Skip the catch instruction - FrameLowering->writeSPToGlobal(FrameLowering->getSPReg(MF), MF, MBB, - InsertPos, MBB.begin()->getDebugLoc()); + FrameLowering->writeBackSP(FrameLowering->getSPReg(MF), MF, MBB, InsertPos, + MBB.begin()->getDebugLoc()); } return Changed; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp index 641eef73044cd..50eef290bc5a7 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp @@ -40,6 +40,14 @@ WebAssemblySubtarget::initializeSubtargetDependencies(StringRef CPU, ParseSubtargetFeatures(CPU, /*TuneCPU*/ CPU, FS); + // WASIP3 implies using the libcall thread context by + // default, unless explicitly disabled. + if (!FS.contains("libcall-thread-context") && !HasLibcallThreadContext && + TargetTriple.getOS() == Triple::WASIp3) { + ToggleFeature(WebAssembly::FeatureLibcallThreadContext); + HasLibcallThreadContext = true; + } + FeatureBitset Bits = getFeatureBits(); // bulk-memory implies bulk-memory-opt diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h index 798dea25ef5e6..5c6f4cb5b36ff 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h @@ -52,6 +52,7 @@ class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo { bool HasExtendedConst = false; bool HasFP16 = false; bool HasGC = false; + bool HasLibcallThreadContext = false; bool HasMultiMemory = false; bool HasMultivalue = false; bool HasMutableGlobals = false; @@ -116,6 +117,7 @@ class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo { bool hasExtendedConst() const { return HasExtendedConst; } bool hasFP16() const { return HasFP16; } bool hasGC() const { return HasGC; } + bool hasLibcallThreadContext() const { return HasLibcallThreadContext; } bool hasMultiMemory() const { return HasMultiMemory; } bool hasMultivalue() const { return HasMultivalue; } bool hasMutableGlobals() const { return HasMutableGlobals; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index ac6c2969cecda..75485564fa210 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -292,7 +292,8 @@ class CoalesceFeaturesAndStripAtomics final : public ModulePass { else if (StrippedTLS && !StrippedAtomics) stripAtomics(M); - recordFeatures(M, Features, StrippedAtomics || StrippedTLS); + recordFeatures(M, WasmTM->getTargetCPU(), Features, + StrippedAtomics || StrippedTLS); // Conservatively assume we have made some change return true; @@ -395,7 +396,8 @@ class CoalesceFeaturesAndStripAtomics final : public ModulePass { return Stripped; } - void recordFeatures(Module &M, const FeatureBitset &Features, bool Stripped) { + void recordFeatures(Module &M, StringRef CPU, const FeatureBitset &Features, + bool Stripped) { for (const SubtargetFeatureKV &KV : WebAssemblyFeatureKV) { if (Features[KV.Value]) { // Mark features as used @@ -407,12 +409,21 @@ class CoalesceFeaturesAndStripAtomics final : public ModulePass { // Code compiled without atomics or bulk-memory may have had its atomics or // thread-local data lowered to nonatomic operations or non-thread-local // data. In that case, we mark the pseudo-feature "shared-mem" as disallowed - // to tell the linker that it would be unsafe to allow this code ot be used + // to tell the linker that it would be unsafe to allow this code to be used // in a module with shared memory. if (Stripped) { M.addModuleFlag(Module::ModFlagBehavior::Error, "wasm-feature-shared-mem", wasm::WASM_FEATURE_PREFIX_DISALLOWED); } + + // Mark libcall-thread-context as disallowed when not in use to + // prevent linking object files with incompatible threading ABIs. + // This is implicit for MVP since the feature is not supported at all. + if (CPU != "mvp" && !Features[WebAssembly::FeatureLibcallThreadContext]) { + M.addModuleFlag(Module::ModFlagBehavior::Error, + "wasm-feature-libcall-thread-context", + wasm::WASM_FEATURE_PREFIX_DISALLOWED); + } } }; char CoalesceFeaturesAndStripAtomics::ID = 0; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp index 890486778e700..ac8df67fe7557 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp @@ -194,3 +194,26 @@ bool WebAssembly::canLowerReturn(size_t ResultSize, const WebAssemblySubtarget *Subtarget) { return ResultSize <= 1 || canLowerMultivalueReturn(Subtarget); } + +MachineSDNode *WebAssembly::getTLSBase(SelectionDAG &DAG, const SDLoc &DL, + const WebAssemblySubtarget *Subtarget, + SDValue Chain) { + MVT PtrVT = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32; + + unsigned Opcode; + const char *SymName; + if (Subtarget->hasLibcallThreadContext()) { + Opcode = WebAssembly::CALL; + SymName = "__wasm_get_tls_base"; + } else { + Opcode = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64 + : WebAssembly::GLOBAL_GET_I32; + SymName = "__tls_base"; + } + + SDValue Sym = DAG.getTargetExternalSymbol(SymName, PtrVT); + + if (Chain.getNode()) + return DAG.getMachineNode(Opcode, DL, {PtrVT, MVT::Other}, {Sym, Chain}); + return DAG.getMachineNode(Opcode, DL, PtrVT, Sym); +} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h index 046b1b5db2a79..0827791d93657 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h @@ -15,6 +15,7 @@ #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_UTILS_WEBASSEMBLYUTILITIES_H #define LLVM_LIB_TARGET_WEBASSEMBLY_UTILS_WEBASSEMBLYUTILITIES_H +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/Support/CommandLine.h" namespace llvm { @@ -27,6 +28,9 @@ class MCSymbolWasm; class TargetRegisterClass; class WebAssemblyFunctionInfo; class WebAssemblySubtarget; +class MachineSDNode; +class SDLoc; +class SelectionDAG; namespace WebAssembly { @@ -73,6 +77,13 @@ bool canLowerMultivalueReturn(const WebAssemblySubtarget *Subtarget); /// memory. bool canLowerReturn(size_t ResultSize, const WebAssemblySubtarget *Subtarget); +// Get the TLS base value for the current target +// If using libcall thread context, calls +// __wasm_get_tls_base, otherwise, global.get __tls_base +MachineSDNode *getTLSBase(SelectionDAG &DAG, const SDLoc &DL, + const WebAssemblySubtarget *Subtarget, + const SDValue Chain = SDValue()); + } // end namespace WebAssembly } // end namespace llvm diff --git a/llvm/test/CodeGen/WebAssembly/stack-abi.ll b/llvm/test/CodeGen/WebAssembly/stack-abi.ll new file mode 100644 index 0000000000000..2cd6089dd5d65 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/stack-abi.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+libcall-thread-context | FileCheck --check-prefix=LIBCALL %s +; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=-libcall-thread-context | FileCheck --check-prefix=GLOBAL %s + +declare void @force_sp_save() +define void @use_stack() #0 { + %1 = alloca i32, align 4 + %2 = alloca ptr, align 4 + store ptr %1, ptr %2, align 4 + call void @force_sp_save() + ret void +} + +; LIBCALL-LABEL: use_stack: +; LIBCALL: call __wasm_get_stack_pointer +; LIBCALL: call __wasm_set_stack_pointer +; LIBCALL-NOT: global.get __stack_pointer +; LIBCALL-NOT: global.set __stack_pointer + +; GLOBAL-LABEL: use_stack: +; GLOBAL: global.get __stack_pointer +; GLOBAL: global.set __stack_pointer +; GLOBAL-NOT: call __wasm_get_stack_pointer +; GLOBAL-NOT: call __wasm_set_stack_pointer + diff --git a/llvm/test/CodeGen/WebAssembly/target-features-attrs.ll b/llvm/test/CodeGen/WebAssembly/target-features-attrs.ll index 0e46b96591816..cf3b3fc5a2f4a 100644 --- a/llvm/test/CodeGen/WebAssembly/target-features-attrs.ll +++ b/llvm/test/CodeGen/WebAssembly/target-features-attrs.ll @@ -53,7 +53,7 @@ attributes #2 = { "target-features"="+reference-types" } ; CHECK: i32.store ; Features in function attributes: -; +atomics, +nontrapping-fptoint, +reference-types +; +atomics, +nontrapping-fptoint, +reference-types, -libcall-thread-context ; CHECK-LABEL: .custom_section.target_features,"",@ ; CHECK-NEXT: .int8 4 ; CHECK-NEXT: .int8 43 @@ -70,7 +70,7 @@ attributes #2 = { "target-features"="+reference-types" } ; CHECK-NEXT: .ascii "reference-types" ; Features in function attributes + features specified by -mattr= option: -; +atomics, +nontrapping-fptoint, +reference-types, +simd128 +; +atomics, +nontrapping-fptoint, +reference-types, +simd128, -libcall-thread-context ; SIMD128-LABEL: .custom_section.target_features,"",@ ; SIMD128-NEXT: .int8 5 ; SIMD128-NEXT: .int8 43 diff --git a/llvm/test/CodeGen/WebAssembly/target-features-cpus.ll b/llvm/test/CodeGen/WebAssembly/target-features-cpus.ll index 4a4973b034637..bca45ec85e0bf 100644 --- a/llvm/test/CodeGen/WebAssembly/target-features-cpus.ll +++ b/llvm/test/CodeGen/WebAssembly/target-features-cpus.ll @@ -12,9 +12,9 @@ target triple = "wasm32-unknown-unknown" ; mvp: should not contain the target features section ; MVP-NOT: .custom_section.target_features,"",@ -; generic: +call-indirect-overlong, +multivalue, +mutable-globals, +reference-types, +sign-ext +; generic: +call-indirect-overlong, +multivalue, +mutable-globals, +reference-types, +sign-ext, -libcall-thread-context ; GENERIC-LABEL: .custom_section.target_features,"",@ -; GENERIC-NEXT: .int8 8 +; GENERIC-NEXT: .int8 9 ; GENERIC-NEXT: .int8 43 ; GENERIC-NEXT: .int8 11 ; GENERIC-NEXT: .ascii "bulk-memory" @@ -24,6 +24,9 @@ target triple = "wasm32-unknown-unknown" ; GENERIC-NEXT: .int8 43 ; GENERIC-NEXT: .int8 22 ; GENERIC-NEXT: .ascii "call-indirect-overlong" +; GENERIC-NEXT: .int8 45 +; GENERIC-NEXT: .int8 22 +; GENERIC-NEXT: .ascii "libcall-thread-context" ; GENERIC-NEXT: .int8 43 ; GENERIC-NEXT: .int8 10 ; GENERIC-NEXT: .ascii "multivalue" @@ -41,9 +44,9 @@ target triple = "wasm32-unknown-unknown" ; GENERIC-NEXT: .ascii "sign-ext" ; lime1: +bulk-memory-opt, +call-indirect-overlong, +extended-const, +multivalue, -; +mutable-globals, +nontrapping-fptoint, +sign-ext +; +mutable-globals, +nontrapping-fptoint, +sign-ext, -libcall-thread-context ; LIME1-LABEL: .custom_section.target_features,"",@ -; LIME1-NEXT: .int8 7 +; LIME1-NEXT: .int8 8 ; LIME1-NEXT: .int8 43 ; LIME1-NEXT: .int8 15 ; LIME1-NEXT: .ascii "bulk-memory-opt" @@ -53,6 +56,9 @@ target triple = "wasm32-unknown-unknown" ; LIME1-NEXT: .int8 43 ; LIME1-NEXT: .int8 14 ; LIME1-NEXT: .ascii "extended-const" +; LIME1-NEXT: .int8 45 +; LIME1-NEXT: .int8 22 +; LIME1-NEXT: .ascii "libcall-thread-context" ; LIME1-NEXT: .int8 43 ; LIME1-NEXT: .int8 10 ; LIME1-NEXT: .ascii "multivalue" @@ -71,8 +77,9 @@ target triple = "wasm32-unknown-unknown" ; +extended-const, +fp16, +gc, +multimemory, +multivalue, ; +mutable-globals, +nontrapping-fptoint, +relaxed-simd, ; +reference-types, +simd128, +sign-ext, +tail-call +; -libcall-thread-context ; BLEEDING-EDGE-LABEL: .section .custom_section.target_features,"",@ -; BLEEDING-EDGE-NEXT: .int8 17 +; BLEEDING-EDGE-NEXT: .int8 18 ; BLEEDING-EDGE-NEXT: .int8 43 ; BLEEDING-EDGE-NEXT: .int8 7 ; BLEEDING-EDGE-NEXT: .ascii "atomics" @@ -97,6 +104,9 @@ target triple = "wasm32-unknown-unknown" ; BLEEDING-EDGE-NEXT: .int8 43 ; BLEEDING-EDGE-NEXT: .int8 2 ; BLEEDING-EDGE-NEXT: .ascii "gc" +; BLEEDING-EDGE-NEXT: .int8 45 +; BLEEDING-EDGE-NEXT: .int8 22 +; BLEEDING-EDGE-NEXT: .ascii "libcall-thread-context" ; BLEEDING-EDGE-NEXT: .int8 43 ; BLEEDING-EDGE-NEXT: .int8 11 ; BLEEDING-EDGE-NEXT: .ascii "multimemory" diff --git a/llvm/test/CodeGen/WebAssembly/target-features-thread-context.ll b/llvm/test/CodeGen/WebAssembly/target-features-thread-context.ll new file mode 100644 index 0000000000000..703486e1971a2 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/target-features-thread-context.ll @@ -0,0 +1,70 @@ +; RUN: llc < %s -mtriple=wasm32-wasip3 | FileCheck %s --check-prefix=WASIP3 +; RUN: llc < %s -mtriple=wasm32-wasip3 -mattr=-libcall-thread-context | FileCheck %s --check-prefix=EXPLICIT-DISABLE +; RUN: llc < %s -mtriple=wasm32-wasip1 | FileCheck %s --check-prefix=WASIP1 +; RUN: llc < %s -mtriple=wasm32-wasip2 | FileCheck %s --check-prefix=WASIP2 + +; Test that wasip3 target automatically enables libcall-thread-context + +; WASIP3: .section .custom_section.target_features,"",@ +; WASIP3-NEXT: .int8 9 +; WASIP3-NEXT: .int8 43 +; WASIP3-NEXT: .int8 11 +; WASIP3-NEXT: .ascii "bulk-memory" +; WASIP3-NEXT: .int8 43 +; WASIP3-NEXT: .int8 15 +; WASIP3-NEXT: .ascii "bulk-memory-opt" +; WASIP3-NEXT: .int8 43 +; WASIP3-NEXT: .int8 22 +; WASIP3-NEXT: .ascii "call-indirect-overlong" +; WASIP3-NEXT: .int8 43 +; WASIP3-NEXT: .int8 22 +; WASIP3-NEXT: .ascii "libcall-thread-context" + +; EXPLICIT-DISABLE: .section .custom_section.target_features,"",@ +; EXPLICIT-DISABLE-NEXT: .int8 9 +; EXPLICIT-DISABLE-NEXT: .int8 43 +; EXPLICIT-DISABLE-NEXT: .int8 11 +; EXPLICIT-DISABLE-NEXT: .ascii "bulk-memory" +; EXPLICIT-DISABLE-NEXT: .int8 43 +; EXPLICIT-DISABLE-NEXT: .int8 15 +; EXPLICIT-DISABLE-NEXT: .ascii "bulk-memory-opt" +; EXPLICIT-DISABLE-NEXT: .int8 43 +; EXPLICIT-DISABLE-NEXT: .int8 22 +; EXPLICIT-DISABLE-NEXT: .ascii "call-indirect-overlong" +; EXPLICIT-DISABLE-NEXT: .int8 45 +; EXPLICIT-DISABLE-NEXT: .int8 22 +; EXPLICIT-DISABLE-NEXT: .ascii "libcall-thread-context" + +; WASIP1: .section .custom_section.target_features,"",@ +; WASIP1-NEXT: .int8 9 +; WASIP1-NEXT: .int8 43 +; WASIP1-NEXT: .int8 11 +; WASIP1-NEXT: .ascii "bulk-memory" +; WASIP1-NEXT: .int8 43 +; WASIP1-NEXT: .int8 15 +; WASIP1-NEXT: .ascii "bulk-memory-opt" +; WASIP1-NEXT: .int8 43 +; WASIP1-NEXT: .int8 22 +; WASIP1-NEXT: .ascii "call-indirect-overlong" +; WASIP1-NEXT: .int8 45 +; WASIP1-NEXT: .int8 22 +; WASIP1-NEXT: .ascii "libcall-thread-context" + +; WASIP2: .section .custom_section.target_features,"",@ +; WASIP2-NEXT: .int8 9 +; WASIP2-NEXT: .int8 43 +; WASIP2-NEXT: .int8 11 +; WASIP2-NEXT: .ascii "bulk-memory" +; WASIP2-NEXT: .int8 43 +; WASIP2-NEXT: .int8 15 +; WASIP2-NEXT: .ascii "bulk-memory-opt" +; WASIP2-NEXT: .int8 43 +; WASIP2-NEXT: .int8 22 +; WASIP2-NEXT: .ascii "call-indirect-overlong" +; WASIP2-NEXT: .int8 45 +; WASIP2-NEXT: .int8 22 +; WASIP2-NEXT: .ascii "libcall-thread-context" + +define void @test() { + ret void +} diff --git a/llvm/test/CodeGen/WebAssembly/target-features-tls.ll b/llvm/test/CodeGen/WebAssembly/target-features-tls.ll index 4abe01a73aeee..39d9be00a0eb0 100644 --- a/llvm/test/CodeGen/WebAssembly/target-features-tls.ll +++ b/llvm/test/CodeGen/WebAssembly/target-features-tls.ll @@ -1,5 +1,7 @@ ; RUN: llc < %s -mcpu=mvp -mattr=-bulk-memory,atomics | FileCheck %s --check-prefixes NO-BULK-MEM ; RUN: llc < %s -mcpu=mvp -mattr=+bulk-memory,atomics | FileCheck %s --check-prefixes BULK-MEM +; RUN: llc < %s -mcpu=mvp -mattr=+libcall-thread-context,-bulk-memory,atomics | FileCheck %s --check-prefixes NO-BULK-MEM-LIBCALL +; RUN: llc < %s -mcpu=mvp -mattr=+libcall-thread-context,bulk-memory,atomics | FileCheck %s --check-prefixes BULK-MEM-LIBCALL ; Test that the target features section contains -atomics or +atomics ; for modules that have thread local storage in their source. @@ -32,3 +34,34 @@ target triple = "wasm32-unknown-unknown" ; BULK-MEM-NEXT: .int8 15 ; BULK-MEM-NEXT: .ascii "bulk-memory-opt" ; BULK-MEM-NEXT: .tbss.foo,"T",@ + +; -bulk-memory,+libcall-thread-context +; NO-BULK-MEM-LIBCALL-LABEL: .custom_section.target_features,"",@ +; NO-BULK-MEM-LIBCALL-NEXT: .int8 3 +; NO-BULK-MEM-LIBCALL-NEXT: .int8 43 +; NO-BULK-MEM-LIBCALL-NEXT: .int8 7 +; NO-BULK-MEM-LIBCALL-NEXT: .ascii "atomics" +; NO-BULK-MEM-LIBCALL-NEXT: .int8 43 +; NO-BULK-MEM-LIBCALL-NEXT: .int8 22 +; NO-BULK-MEM-LIBCALL-NEXT: .ascii "libcall-thread-context" +; NO-BULK-MEM-LIBCALL-NEXT: .int8 45 +; NO-BULK-MEM-LIBCALL-NEXT: .int8 10 +; NO-BULK-MEM-LIBCALL-NEXT: .ascii "shared-mem" +; NO-BULK-MEM-LIBCALL-NEXT: .bss.foo,"",@ + +; +bulk-memory,+libcall-thread-context +; BULK-MEM-LIBCALL-LABEL: .custom_section.target_features,"",@ +; BULK-MEM-LIBCALL-NEXT: .int8 4 +; BULK-MEM-LIBCALL-NEXT: .int8 43 +; BULK-MEM-LIBCALL-NEXT: .int8 7 +; BULK-MEM-LIBCALL-NEXT: .ascii "atomics" +; BULK-MEM-LIBCALL-NEXT: .int8 43 +; BULK-MEM-LIBCALL-NEXT: .int8 11 +; BULK-MEM-LIBCALL-NEXT: .ascii "bulk-memory" +; BULK-MEM-LIBCALL-NEXT: .int8 43 +; BULK-MEM-LIBCALL-NEXT: .int8 15 +; BULK-MEM-LIBCALL-NEXT: .ascii "bulk-memory-opt" +; BULK-MEM-LIBCALL-NEXT: .int8 43 +; BULK-MEM-LIBCALL-NEXT: .int8 22 +; BULK-MEM-LIBCALL-NEXT: .ascii "libcall-thread-context" +; BULK-MEM-LIBCALL-NEXT: .tbss.foo,"T",@ diff --git a/llvm/test/CodeGen/WebAssembly/thread_pointer.ll b/llvm/test/CodeGen/WebAssembly/thread_pointer.ll index 18716988673db..1be5e7833e05c 100644 --- a/llvm/test/CodeGen/WebAssembly/thread_pointer.ll +++ b/llvm/test/CodeGen/WebAssembly/thread_pointer.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=wasm32-unknown-unknown | FileCheck %s --check-prefix=WASM32 ; RUN: llc < %s -mtriple=wasm64-unknown-unknown | FileCheck %s --check-prefix=WASM64 +; RUN: llc < %s -mtriple=wasm32-unknown-unknown -mattr=+libcall-thread-context | FileCheck %s --check-prefix=WASM32-LIBCALL declare ptr @llvm.thread.pointer() @@ -16,6 +17,13 @@ define ptr @thread_pointer() nounwind { ; WASM64-NEXT: # %bb.0: ; WASM64-NEXT: global.get __tls_base ; WASM64-NEXT: # fallthrough-return +; +; WASM32-LIBCALL-LABEL: thread_pointer: +; WASM32-LIBCALL: .functype thread_pointer () -> (i32) +; WASM32-LIBCALL-NEXT: # %bb.0: +; WASM32-LIBCALL-NEXT: call __wasm_get_tls_base +; WASM32-LIBCALL-NEXT: # fallthrough-return +; %1 = tail call ptr @llvm.thread.pointer() ret ptr %1 } diff --git a/llvm/test/CodeGen/WebAssembly/tls-local-exec.ll b/llvm/test/CodeGen/WebAssembly/tls-local-exec.ll index dc0d40c7973ad..586f5234d9bc4 100644 --- a/llvm/test/CodeGen/WebAssembly/tls-local-exec.ll +++ b/llvm/test/CodeGen/WebAssembly/tls-local-exec.ll @@ -1,13 +1,16 @@ ; Run the tests with the `localexec` TLS mode specified. ; RUN: sed -e 's/\[\[TLS_MODE\]\]/(localexec)/' %s | llc -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory,atomics - | FileCheck --check-prefixes=CHECK,TLS %s ; RUN: sed -e 's/\[\[TLS_MODE\]\]/(localexec)/' %s | llc -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory,atomics -fast-isel - | FileCheck --check-prefixes=CHECK,TLS %s +; RUN: sed -e 's/\[\[TLS_MODE\]\]/(localexec)/' %s | llc -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+libcall-thread-context,bulk-memory,atomics -fast-isel - | FileCheck --check-prefixes=CHECK,TLS-LIBCALL %s ; Also, run the same tests without a specified TLS mode--this should still emit `localexec` code on non-Emscripten targtes which don't currently support dynamic linking. ; RUN: sed -e 's/\[\[TLS_MODE\]\]//' %s | llc -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory,atomics - | FileCheck --check-prefixes=CHECK,TLS %s ; RUN: sed -e 's/\[\[TLS_MODE\]\]//' %s | llc -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+bulk-memory,atomics -fast-isel - | FileCheck --check-prefixes=CHECK,TLS %s +; RUN: sed -e 's/\[\[TLS_MODE\]\]//' %s | llc -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+libcall-thread-context,bulk-memory,atomics -fast-isel - | FileCheck --check-prefixes=CHECK,TLS-LIBCALL %s ; Finally, when bulk memory is disabled, no TLS code should be generated. ; RUN: sed -e 's/\[\[TLS_MODE\]\]/(localexec)/' %s | llc -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=-bulk-memory,atomics - | FileCheck --check-prefixes=CHECK,NO-TLS %s +; RUN: sed -e 's/\[\[TLS_MODE\]\]/(localexec)/' %s | llc -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+libcall-thread-context,-bulk-memory,atomics - | FileCheck --check-prefixes=CHECK,NO-TLS %s target triple = "wasm32-unknown-unknown" ; CHECK-LABEL: address_of_tls: @@ -18,6 +21,11 @@ define i32 @address_of_tls() { ; TLS-NEXT: i32.add ; TLS-NEXT: return + ; TLS-LIBCALL-DAG: call __wasm_get_tls_base + ; TLS-LIBCALL-DAG: i32.const tls@TLSREL + ; TLS-LIBCALL-NEXT: i32.add + ; TLS-LIBCALL-NEXT: return + ; NO-TLS-NEXT: i32.const tls ; NO-TLS-NEXT: return %p = call ptr @llvm.threadlocal.address.p0(ptr @tls) @@ -33,6 +41,11 @@ define i32 @address_of_tls_external() { ; TLS-NEXT: i32.add ; TLS-NEXT: return + ; TLS-LIBCALL-DAG: call __wasm_get_tls_base + ; TLS-LIBCALL-DAG: i32.const tls_external@TLSREL + ; TLS-LIBCALL-NEXT: i32.add + ; TLS-LIBCALL-NEXT: return + ; NO-TLS-NEXT: i32.const tls_external ; NO-TLS-NEXT: return %p = call ptr @llvm.threadlocal.address.p0(ptr @tls_external) @@ -48,6 +61,11 @@ define ptr @ptr_to_tls() { ; TLS-NEXT: i32.add ; TLS-NEXT: return + ; TLS-LIBCALL-DAG: call __wasm_get_tls_base + ; TLS-LIBCALL-DAG: i32.const tls@TLSREL + ; TLS-LIBCALL-NEXT: i32.add + ; TLS-LIBCALL-NEXT: return + ; NO-TLS-NEXT: i32.const tls ; NO-TLS-NEXT: return %p = call ptr @llvm.threadlocal.address.p0(ptr @tls) @@ -63,6 +81,12 @@ define i32 @tls_load() { ; TLS-NEXT: i32.load 0 ; TLS-NEXT: return + ; TLS-LIBCALL-DAG: call __wasm_get_tls_base + ; TLS-LIBCALL-DAG: i32.const tls@TLSREL + ; TLS-LIBCALL-NEXT: i32.add + ; TLS-LIBCALL-NEXT: i32.load 0 + ; TLS-LIBCALL-NEXT: return + ; NO-TLS-NEXT: i32.const 0 ; NO-TLS-NEXT: i32.load tls ; NO-TLS-NEXT: return @@ -80,6 +104,12 @@ define void @tls_store(i32 %x) { ; TLS-NEXT: i32.store 0 ; TLS-NEXT: return + ; TLS-LIBCALL-DAG: call __wasm_get_tls_base + ; TLS-LIBCALL-DAG: i32.const tls@TLSREL + ; TLS-LIBCALL-NEXT: i32.add + ; TLS-LIBCALL-NEXT: i32.store 0 + ; TLS-LIBCALL-NEXT: return + ; NO-TLS-NEXT: i32.const 0 ; NO-TLS-NEXT: i32.store tls ; NO-TLS-NEXT: return @@ -99,6 +129,7 @@ define i32 @tls_size() { ; CHECK: .type tls,@object ; TLS-NEXT: .section .tbss.tls,"T",@ +; TLS-LIBCALL-NEXT: .section .tbss.tls,"T",@ ; NO-TLS-NEXT: .section .bss.tls,"",@ ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: tls: