mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-28 13:28:01 +03:00
Update to LLVM 16.0.0, switch to upstream LLVM
This commit is contained in:
parent
7081b89e97
commit
fb88e1c1c9
24 changed files with 746 additions and 231 deletions
|
@ -7,9 +7,9 @@ git submodule -q update --init --depth 1 $(awk '/path/ && !/llvm/ { print $3 }'
|
||||||
|
|
||||||
# Prefer newer Clang than in base system (see also .ci/install-freebsd.sh)
|
# Prefer newer Clang than in base system (see also .ci/install-freebsd.sh)
|
||||||
# libc++ isn't in llvm* packages, so download manually
|
# libc++ isn't in llvm* packages, so download manually
|
||||||
fetch https://github.com/llvm/llvm-project/releases/download/llvmorg-15.0.6/llvm-project-15.0.6.src.tar.xz
|
fetch https://github.com/llvm/llvm-project/releases/download/llvmorg-16.0.0/llvm-project-16.0.0.src.tar.xz
|
||||||
tar xf llvm*.tar.xz
|
tar xf llvm*.tar.xz
|
||||||
export CC=clang15 CXX=clang++15
|
export CC=clang16 CXX=clang++16
|
||||||
cmake -B libcxx_build -G Ninja -S llvm*/libcxx \
|
cmake -B libcxx_build -G Ninja -S llvm*/libcxx \
|
||||||
-DLLVM_CCACHE_BUILD=ON \
|
-DLLVM_CCACHE_BUILD=ON \
|
||||||
-DLIBCXX_INCLUDE_BENCHMARKS=OFF \
|
-DLIBCXX_INCLUDE_BENCHMARKS=OFF \
|
||||||
|
@ -20,7 +20,7 @@ export CXXFLAGS="$CXXFLAGS -nostdinc++ -isystem$PWD/libcxx_prefix/include/c++/v1
|
||||||
export LDFLAGS="$LDFLAGS -nostdlib++ -L$PWD/libcxx_prefix/lib -l:libc++.a -lcxxrt"
|
export LDFLAGS="$LDFLAGS -nostdlib++ -L$PWD/libcxx_prefix/lib -l:libc++.a -lcxxrt"
|
||||||
|
|
||||||
CONFIGURE_ARGS="
|
CONFIGURE_ARGS="
|
||||||
-DWITH_LLVM=OFF
|
-DWITH_LLVM=ON
|
||||||
-DUSE_SDL=OFF
|
-DUSE_SDL=OFF
|
||||||
-DUSE_PRECOMPILED_HEADERS=OFF
|
-DUSE_PRECOMPILED_HEADERS=OFF
|
||||||
-DUSE_NATIVE_INSTRUCTIONS=OFF
|
-DUSE_NATIVE_INSTRUCTIONS=OFF
|
||||||
|
|
|
@ -9,16 +9,11 @@ if [ -z "$CIRRUS_CI" ]; then
|
||||||
cd rpcs3 || exit 1
|
cd rpcs3 || exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Pull all the submodules except llvm, since it is built separately and we just download that build
|
# Pull all the submodules except llvm
|
||||||
# Note: Tried to use git submodule status, but it takes over 20 seconds
|
# Note: Tried to use git submodule status, but it takes over 20 seconds
|
||||||
# shellcheck disable=SC2046
|
# shellcheck disable=SC2046
|
||||||
git submodule -q update --init $(awk '/path/ && !/llvm/ { print $3 }' .gitmodules)
|
git submodule -q update --init $(awk '/path/ && !/llvm/ { print $3 }' .gitmodules)
|
||||||
|
|
||||||
# Download pre-compiled llvm libs
|
|
||||||
curl -sLO https://github.com/RPCS3/llvm-mirror/releases/download/custom-build/llvmlibs-linux.tar.gz
|
|
||||||
mkdir llvmlibs
|
|
||||||
tar -xzf ./llvmlibs-linux.tar.gz -C llvmlibs
|
|
||||||
|
|
||||||
mkdir build && cd build || exit 1
|
mkdir build && cd build || exit 1
|
||||||
|
|
||||||
if [ "$COMPILER" = "gcc" ]; then
|
if [ "$COMPILER" = "gcc" ]; then
|
||||||
|
@ -42,8 +37,6 @@ export CFLAGS="$CFLAGS -fuse-ld=${LINKER}"
|
||||||
|
|
||||||
cmake .. \
|
cmake .. \
|
||||||
-DCMAKE_INSTALL_PREFIX=/usr \
|
-DCMAKE_INSTALL_PREFIX=/usr \
|
||||||
-DBUILD_LLVM_SUBMODULE=OFF \
|
|
||||||
-DLLVM_DIR=llvmlibs/lib/cmake/llvm/ \
|
|
||||||
-DUSE_NATIVE_INSTRUCTIONS=OFF \
|
-DUSE_NATIVE_INSTRUCTIONS=OFF \
|
||||||
-DUSE_PRECOMPILED_HEADERS=OFF \
|
-DUSE_PRECOMPILED_HEADERS=OFF \
|
||||||
-DCMAKE_C_FLAGS="$CFLAGS" \
|
-DCMAKE_C_FLAGS="$CFLAGS" \
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
#!/bin/sh -ex
|
#!/bin/sh -ex
|
||||||
|
|
||||||
brew install -f --overwrite llvm@14 nasm ninja git p7zip create-dmg ccache
|
brew install -f --overwrite llvm@16 nasm ninja git p7zip create-dmg ccache
|
||||||
|
|
||||||
#/usr/sbin/softwareupdate --install-rosetta --agree-to-license
|
#/usr/sbin/softwareupdate --install-rosetta --agree-to-license
|
||||||
arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
|
arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
|
||||||
arch -x86_64 /usr/local/homebrew/bin/brew install -f --overwrite llvm@14 sdl2 glew cmake
|
arch -x86_64 /usr/local/homebrew/bin/brew install -f --overwrite llvm@16 sdl2 glew cmake
|
||||||
|
|
||||||
#export MACOSX_DEPLOYMENT_TARGET=12.0
|
#export MACOSX_DEPLOYMENT_TARGET=12.0
|
||||||
export CXX=clang++
|
export CXX=clang++
|
||||||
|
@ -33,7 +33,7 @@ cd ..
|
||||||
export Qt5_DIR="$WORKDIR/qt-downloader/5.15.2/clang_64/lib/cmake/Qt5"
|
export Qt5_DIR="$WORKDIR/qt-downloader/5.15.2/clang_64/lib/cmake/Qt5"
|
||||||
export SDL2_DIR="$BREW_X64_PATH/opt/sdl2/lib/cmake/SDL2"
|
export SDL2_DIR="$BREW_X64_PATH/opt/sdl2/lib/cmake/SDL2"
|
||||||
|
|
||||||
export PATH="$BREW_PATH/opt/llvm@14/bin:$WORKDIR/qt-downloader/5.15.2/clang_64/bin:$BREW_BIN:$BREW_SBIN:/usr/bin:/bin:/usr/sbin:/sbin:/opt/X11/bin:/Library/Apple/usr/bin:$PATH"
|
export PATH="$BREW_PATH/opt/llvm@16/bin:$WORKDIR/qt-downloader/5.15.2/clang_64/bin:$BREW_BIN:$BREW_SBIN:/usr/bin:/bin:/usr/sbin:/sbin:/opt/X11/bin:/Library/Apple/usr/bin:$PATH"
|
||||||
export LDFLAGS="-L$BREW_X64_PATH/lib -Wl,-rpath,$BREW_X64_PATH/lib"
|
export LDFLAGS="-L$BREW_X64_PATH/lib -Wl,-rpath,$BREW_X64_PATH/lib"
|
||||||
export CPPFLAGS="-I$BREW_X64_PATH/include -msse -msse2 -mcx16 -no-pie"
|
export CPPFLAGS="-I$BREW_X64_PATH/include -msse -msse2 -mcx16 -no-pie"
|
||||||
export LIBRARY_PATH="$BREW_X64_PATH/lib"
|
export LIBRARY_PATH="$BREW_X64_PATH/lib"
|
||||||
|
|
|
@ -15,7 +15,7 @@ echo "AVVER=$AVVER" >> ../.ci/ci-vars.env
|
||||||
cd bin
|
cd bin
|
||||||
mkdir "rpcs3.app/Contents/lib/"
|
mkdir "rpcs3.app/Contents/lib/"
|
||||||
|
|
||||||
cp "/usr/local/Homebrew/opt/llvm@14/lib/c++/libc++abi.1.0.dylib" "rpcs3.app/Contents/lib/libc++abi.1.dylib"
|
cp "/usr/local/Homebrew/opt/llvm@16/lib/c++/libc++abi.1.0.dylib" "rpcs3.app/Contents/lib/libc++abi.1.dylib"
|
||||||
|
|
||||||
rm -rf "rpcs3.app/Contents/Frameworks/QtPdf.framework" \
|
rm -rf "rpcs3.app/Contents/Frameworks/QtPdf.framework" \
|
||||||
"rpcs3.app/Contents/Frameworks/QtQml.framework" \
|
"rpcs3.app/Contents/Frameworks/QtQml.framework" \
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#!/bin/sh -ex
|
#!/bin/sh -ex
|
||||||
|
|
||||||
curl -L -o "./llvm.lock" "https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win/llvmlibs_mt.7z.sha256"
|
curl -L -o "./llvm.lock" "https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win-16.0.0/llvmlibs_mt.7z.sha256"
|
||||||
curl -L -o "./glslang.lock" "https://github.com/RPCS3/glslang/releases/download/custom-build-win/glslanglibs_mt.7z.sha256"
|
curl -L -o "./glslang.lock" "https://github.com/RPCS3/glslang/releases/download/custom-build-win/glslanglibs_mt.7z.sha256"
|
||||||
|
|
|
@ -9,7 +9,7 @@ export ASSUME_ALWAYS_YES=true
|
||||||
pkg info # debug
|
pkg info # debug
|
||||||
|
|
||||||
# Prefer newer Clang than in base system (see also .ci/build-freebsd.sh)
|
# Prefer newer Clang than in base system (see also .ci/build-freebsd.sh)
|
||||||
pkg install llvm15
|
pkg install llvm16
|
||||||
|
|
||||||
# Mandatory dependencies (qt5-dbus and qt5-gui are pulled via qt5-widgets)
|
# Mandatory dependencies (qt5-dbus and qt5-gui are pulled via qt5-widgets)
|
||||||
pkg install git ccache cmake ninja qt5-qmake qt5-buildtools qt5-widgets qt5-concurrent qt5-multimedia qt5-svg glew openal-soft ffmpeg
|
pkg install git ccache cmake ninja qt5-qmake qt5-buildtools qt5-widgets qt5-concurrent qt5-multimedia qt5-svg glew openal-soft ffmpeg
|
||||||
|
|
|
@ -19,7 +19,7 @@ QT_DECL_URL="${QT_HOST}${QT_PREFIX}qtdeclarative${QT_SUFFIX}"
|
||||||
QT_TOOL_URL="${QT_HOST}${QT_PREFIX}qttools${QT_SUFFIX}"
|
QT_TOOL_URL="${QT_HOST}${QT_PREFIX}qttools${QT_SUFFIX}"
|
||||||
QT_MM_URL="${QT_HOST}${QT_PREFIX}qtmultimedia${QT_SUFFIX}"
|
QT_MM_URL="${QT_HOST}${QT_PREFIX}qtmultimedia${QT_SUFFIX}"
|
||||||
QT_SVG_URL="${QT_HOST}${QT_PREFIX}qtsvg${QT_SUFFIX}"
|
QT_SVG_URL="${QT_HOST}${QT_PREFIX}qtsvg${QT_SUFFIX}"
|
||||||
LLVMLIBS_URL='https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win/llvmlibs_mt.7z'
|
LLVMLIBS_URL='https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win-16.0.0/llvmlibs_mt.7z'
|
||||||
GLSLANG_URL='https://github.com/RPCS3/glslang/releases/download/custom-build-win/glslanglibs_mt.7z'
|
GLSLANG_URL='https://github.com/RPCS3/glslang/releases/download/custom-build-win/glslanglibs_mt.7z'
|
||||||
VULKAN_SDK_URL="https://www.dropbox.com/s/cs77c3iv5mbo0bt/VulkanSDK-${VULKAN_VER}-Installer.exe"
|
VULKAN_SDK_URL="https://www.dropbox.com/s/cs77c3iv5mbo0bt/VulkanSDK-${VULKAN_VER}-Installer.exe"
|
||||||
|
|
||||||
|
|
|
@ -61,7 +61,7 @@ windows_task:
|
||||||
|
|
||||||
linux_task:
|
linux_task:
|
||||||
container:
|
container:
|
||||||
image: rpcs3/rpcs3-ci-bionic:1.6
|
image: rpcs3/rpcs3-ci-bionic:1.7
|
||||||
cpu: 4
|
cpu: 4
|
||||||
memory: 16G
|
memory: 16G
|
||||||
env:
|
env:
|
||||||
|
|
34
3rdparty/llvm.cmake
vendored
34
3rdparty/llvm.cmake
vendored
|
@ -2,7 +2,7 @@ if(WITH_LLVM)
|
||||||
CHECK_CXX_COMPILER_FLAG("-msse -msse2 -mcx16" COMPILER_X86)
|
CHECK_CXX_COMPILER_FLAG("-msse -msse2 -mcx16" COMPILER_X86)
|
||||||
CHECK_CXX_COMPILER_FLAG("-march=armv8-a+lse" COMPILER_ARM)
|
CHECK_CXX_COMPILER_FLAG("-march=armv8-a+lse" COMPILER_ARM)
|
||||||
|
|
||||||
if(BUILD_LLVM_SUBMODULE)
|
if(BUILD_LLVM)
|
||||||
message(STATUS "LLVM will be built from the submodule.")
|
message(STATUS "LLVM will be built from the submodule.")
|
||||||
|
|
||||||
set(LLVM_TARGETS_TO_BUILD "AArch64;X86")
|
set(LLVM_TARGETS_TO_BUILD "AArch64;X86")
|
||||||
|
@ -38,49 +38,33 @@ if(WITH_LLVM)
|
||||||
set(CMAKE_CXX_FLAGS ${CXX_FLAGS_OLD})
|
set(CMAKE_CXX_FLAGS ${CXX_FLAGS_OLD})
|
||||||
|
|
||||||
# now tries to find LLVM again
|
# now tries to find LLVM again
|
||||||
find_package(LLVM 13.0 CONFIG)
|
find_package(LLVM 16.0 CONFIG)
|
||||||
if(NOT LLVM_FOUND)
|
if(NOT LLVM_FOUND)
|
||||||
message(FATAL_ERROR "Couldn't build LLVM from the submodule. You might need to run `git submodule update --init`")
|
message(FATAL_ERROR "Couldn't build LLVM from the submodule. You might need to run `git submodule update --init`")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
else()
|
else()
|
||||||
message(STATUS "Using prebuilt LLVM")
|
message(STATUS "Using prebuilt or system LLVM")
|
||||||
|
|
||||||
if (LLVM_DIR AND NOT IS_ABSOLUTE "${LLVM_DIR}")
|
if (LLVM_DIR AND NOT IS_ABSOLUTE "${LLVM_DIR}")
|
||||||
# change relative LLVM_DIR to be relative to the source dir
|
# change relative LLVM_DIR to be relative to the source dir
|
||||||
set(LLVM_DIR ${CMAKE_SOURCE_DIR}/${LLVM_DIR})
|
set(LLVM_DIR ${CMAKE_SOURCE_DIR}/${LLVM_DIR})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
find_package(LLVM 13.0 CONFIG)
|
find_package(LLVM 16.0 CONFIG)
|
||||||
|
|
||||||
if (NOT LLVM_FOUND)
|
if (NOT LLVM_FOUND)
|
||||||
if (LLVM_VERSION AND LLVM_VERSION_MAJOR LESS 11)
|
if (LLVM_VERSION AND LLVM_VERSION_MAJOR LESS 16)
|
||||||
message(FATAL_ERROR "Found LLVM version ${LLVM_VERSION}. Required version 11.0. \
|
message(FATAL_ERROR "Found LLVM version ${LLVM_VERSION}. Required version 16. \
|
||||||
Enable BUILD_LLVM_SUBMODULE option to build LLVM from included as a git submodule.")
|
Enable BUILD_LLVM option to build LLVM from included as a git submodule.")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
message(FATAL_ERROR "Can't find LLVM libraries from the CMAKE_PREFIX_PATH path or LLVM_DIR. \
|
message(FATAL_ERROR "Can't find LLVM libraries from the CMAKE_PREFIX_PATH path or LLVM_DIR. \
|
||||||
Enable BUILD_LLVM_SUBMODULE option to build LLVM from included as a git submodule.")
|
Enable BUILD_LLVM option to build LLVM from included as a git submodule.")
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
set(LLVM_LIBS LLVMMCJIT)
|
set(LLVM_LIBS LLVM)
|
||||||
|
|
||||||
if(COMPILER_X86)
|
|
||||||
set(LLVM_LIBS ${LLVM_LIBS} LLVMX86CodeGen LLVMX86AsmParser)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(COMPILER_ARM)
|
|
||||||
set(LLVM_LIBS ${LLVM_LIBS} LLVMX86CodeGen LLVMX86AsmParser LLVMAArch64CodeGen LLVMAArch64AsmParser)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(WIN32 OR CMAKE_SYSTEM MATCHES "Linux")
|
|
||||||
set(LLVM_LIBS ${LLVM_LIBS} LLVMIntelJITEvents)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(CMAKE_SYSTEM MATCHES "Linux")
|
|
||||||
set(LLVM_LIBS ${LLVM_LIBS} LLVMPerfJITEvents)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
add_library(3rdparty_llvm INTERFACE)
|
add_library(3rdparty_llvm INTERFACE)
|
||||||
target_link_libraries(3rdparty_llvm INTERFACE ${LLVM_LIBS})
|
target_link_libraries(3rdparty_llvm INTERFACE ${LLVM_LIBS})
|
||||||
|
|
|
@ -111,7 +111,7 @@ git submodule update --init
|
||||||
|
|
||||||
Open `rpcs3.sln`. The recommended build configuration is `Release`. (On older revisions: `Release - LLVM`)
|
Open `rpcs3.sln`. The recommended build configuration is `Release`. (On older revisions: `Release - LLVM`)
|
||||||
|
|
||||||
You may want to download the precompiled [LLVM libs](https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win/llvmlibs_mt.7z) and extract them to the root rpcs3 folder (which contains `rpcs3.sln`), as well as download and extract the [additional libs](https://github.com/RPCS3/glslang/releases/download/custom-build-win/glslanglibs_mt.7z) to `lib\%CONFIGURATION%-x64\` to speed up compilation time (unoptimised/debug libs are currently not available precompiled).
|
You may want to download the precompiled [LLVM libs](https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win-16.0.0/llvmlibs_mt.7z) and extract them to the root rpcs3 folder (which contains `rpcs3.sln`), as well as download and extract the [additional libs](https://github.com/RPCS3/glslang/releases/download/custom-build-win/glslanglibs_mt.7z) to `lib\%CONFIGURATION%-x64\` to speed up compilation time (unoptimised/debug libs are currently not available precompiled).
|
||||||
|
|
||||||
If you're not using the precompiled libs, build the following projects in *__BUILD_BEFORE* folder by right-clicking on a project > *Build*.:
|
If you're not using the precompiled libs, build the following projects in *__BUILD_BEFORE* folder by right-clicking on a project > *Build*.:
|
||||||
* glslang
|
* glslang
|
||||||
|
|
|
@ -14,7 +14,7 @@ endif()
|
||||||
|
|
||||||
option(USE_NATIVE_INSTRUCTIONS "USE_NATIVE_INSTRUCTIONS makes rpcs3 compile with -march=native, which is useful for local builds, but not good for packages." ON)
|
option(USE_NATIVE_INSTRUCTIONS "USE_NATIVE_INSTRUCTIONS makes rpcs3 compile with -march=native, which is useful for local builds, but not good for packages." ON)
|
||||||
option(WITH_LLVM "Enable usage of LLVM library" ON)
|
option(WITH_LLVM "Enable usage of LLVM library" ON)
|
||||||
option(BUILD_LLVM_SUBMODULE "Build LLVM from git submodule" ON)
|
option(BUILD_LLVM "Build LLVM from git submodule" OFF)
|
||||||
option(USE_FAUDIO "FAudio audio backend" ON)
|
option(USE_FAUDIO "FAudio audio backend" ON)
|
||||||
option(USE_LIBEVDEV "libevdev-based joystick support" ON)
|
option(USE_LIBEVDEV "libevdev-based joystick support" ON)
|
||||||
option(USE_DISCORD_RPC "Discord rich presence integration" OFF)
|
option(USE_DISCORD_RPC "Discord rich presence integration" OFF)
|
||||||
|
|
|
@ -198,6 +198,9 @@ static u8* add_jit_memory(usz size, uint align)
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ensure(pointer + pos >= get_jit_memory() + Off);
|
||||||
|
ensure(pointer + pos < get_jit_memory() + Off + 0x40000000);
|
||||||
|
|
||||||
return pointer + pos;
|
return pointer + pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1319,7 +1322,10 @@ std::string jit_compiler::cpu(const std::string& _cpu)
|
||||||
m_cpu == "icelake-client" ||
|
m_cpu == "icelake-client" ||
|
||||||
m_cpu == "icelake-server" ||
|
m_cpu == "icelake-server" ||
|
||||||
m_cpu == "tigerlake" ||
|
m_cpu == "tigerlake" ||
|
||||||
m_cpu == "rocketlake")
|
m_cpu == "rocketlake" ||
|
||||||
|
m_cpu == "alderlake" ||
|
||||||
|
m_cpu == "raptorlake" ||
|
||||||
|
m_cpu == "meteorlake")
|
||||||
{
|
{
|
||||||
// Downgrade if AVX is not supported by some chips
|
// Downgrade if AVX is not supported by some chips
|
||||||
if (!utils::has_avx())
|
if (!utils::has_avx())
|
||||||
|
@ -1350,6 +1356,18 @@ std::string jit_compiler::cpu(const std::string& _cpu)
|
||||||
// Upgrade
|
// Upgrade
|
||||||
m_cpu = "znver2";
|
m_cpu = "znver2";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((m_cpu == "znver3" || m_cpu == "goldmont" || m_cpu == "alderlake" || m_cpu == "raptorlake" || m_cpu == "meteorlake") && utils::has_avx512_icl())
|
||||||
|
{
|
||||||
|
// Upgrade
|
||||||
|
m_cpu = "icelake-client";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (m_cpu == "goldmont" && utils::has_avx2())
|
||||||
|
{
|
||||||
|
// Upgrade
|
||||||
|
m_cpu = "alderlake";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return m_cpu;
|
return m_cpu;
|
||||||
|
@ -1362,15 +1380,13 @@ jit_compiler::jit_compiler(const std::unordered_map<std::string, u64>& _link, co
|
||||||
std::string result;
|
std::string result;
|
||||||
|
|
||||||
auto null_mod = std::make_unique<llvm::Module> ("null_", *m_context);
|
auto null_mod = std::make_unique<llvm::Module> ("null_", *m_context);
|
||||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
null_mod->setTargetTriple(llvm::Triple::normalize(llvm::sys::getProcessTriple()));
|
||||||
// Force override triple on Apple arm64 or we'll get linking errors.
|
|
||||||
null_mod->setTargetTriple(llvm::Triple::normalize(utils::c_llvm_default_triple));
|
std::unique_ptr<llvm::RTDyldMemoryManager> mem;
|
||||||
#endif
|
|
||||||
|
|
||||||
if (_link.empty())
|
if (_link.empty())
|
||||||
{
|
{
|
||||||
std::unique_ptr<llvm::RTDyldMemoryManager> mem;
|
// Auxiliary JIT (does not use custom memory manager, only writes the objects)
|
||||||
|
|
||||||
if (flags & 0x1)
|
if (flags & 0x1)
|
||||||
{
|
{
|
||||||
mem = std::make_unique<MemoryManager1>();
|
mem = std::make_unique<MemoryManager1>();
|
||||||
|
@ -1378,31 +1394,33 @@ jit_compiler::jit_compiler(const std::unordered_map<std::string, u64>& _link, co
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
mem = std::make_unique<MemoryManager2>();
|
mem = std::make_unique<MemoryManager2>();
|
||||||
null_mod->setTargetTriple(llvm::Triple::normalize(utils::c_llvm_default_triple));
|
#if defined(_WIN32) && defined(ARCH_X64)
|
||||||
|
null_mod->setTargetTriple(llvm::Triple::normalize("x86_64-unknown-linux-gnu"));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
mem = std::make_unique<MemoryManager1>();
|
||||||
|
}
|
||||||
|
|
||||||
// Auxiliary JIT (does not use custom memory manager, only writes the objects)
|
{
|
||||||
m_engine.reset(llvm::EngineBuilder(std::move(null_mod))
|
m_engine.reset(llvm::EngineBuilder(std::move(null_mod))
|
||||||
.setErrorStr(&result)
|
.setErrorStr(&result)
|
||||||
.setEngineKind(llvm::EngineKind::JIT)
|
.setEngineKind(llvm::EngineKind::JIT)
|
||||||
.setMCJITMemoryManager(std::move(mem))
|
.setMCJITMemoryManager(std::move(mem))
|
||||||
.setOptLevel(llvm::CodeGenOpt::Aggressive)
|
.setOptLevel(llvm::CodeGenOpt::Aggressive)
|
||||||
.setCodeModel(flags & 0x2 ? llvm::CodeModel::Large : llvm::CodeModel::Small)
|
.setCodeModel(flags & 0x2 ? llvm::CodeModel::Large : llvm::CodeModel::Small)
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.setCodeModel(llvm::CodeModel::Large)
|
||||||
|
#endif
|
||||||
|
.setRelocationModel(llvm::Reloc::Model::PIC_)
|
||||||
.setMCPU(m_cpu)
|
.setMCPU(m_cpu)
|
||||||
.create());
|
.create());
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
// Primary JIT
|
|
||||||
m_engine.reset(llvm::EngineBuilder(std::move(null_mod))
|
|
||||||
.setErrorStr(&result)
|
|
||||||
.setEngineKind(llvm::EngineKind::JIT)
|
|
||||||
.setMCJITMemoryManager(std::make_unique<MemoryManager1>())
|
|
||||||
.setOptLevel(llvm::CodeGenOpt::Aggressive)
|
|
||||||
.setCodeModel(flags & 0x2 ? llvm::CodeModel::Large : llvm::CodeModel::Small)
|
|
||||||
.setMCPU(m_cpu)
|
|
||||||
.create());
|
|
||||||
|
|
||||||
|
if (!_link.empty())
|
||||||
|
{
|
||||||
for (auto&& [name, addr] : _link)
|
for (auto&& [name, addr] : _link)
|
||||||
{
|
{
|
||||||
m_engine->updateGlobalMapping(name, addr);
|
m_engine->updateGlobalMapping(name, addr);
|
||||||
|
|
|
@ -16,54 +16,97 @@
|
||||||
<Link>
|
<Link>
|
||||||
<AdditionalLibraryDirectories Condition="'$(Configuration)'=='Debug'">%(AdditionalLibraryDirectories);..\llvm_build\Debug\lib</AdditionalLibraryDirectories>
|
<AdditionalLibraryDirectories Condition="'$(Configuration)'=='Debug'">%(AdditionalLibraryDirectories);..\llvm_build\Debug\lib</AdditionalLibraryDirectories>
|
||||||
<AdditionalDependencies>%(AdditionalDependencies);
|
<AdditionalDependencies>%(AdditionalDependencies);
|
||||||
LLVMProfileData.lib;
|
LLVMAggressiveInstCombine.lib;
|
||||||
LLVMDebugInfoCodeView.lib;
|
LLVMAnalysis.lib;
|
||||||
LLVMDebugInfoMSF.lib;
|
LLVMAsmParser.lib;
|
||||||
LLVMInstrumentation.lib;
|
LLVMAsmPrinter.lib;
|
||||||
LLVMMCJIT.lib;
|
LLVMBinaryFormat.lib;
|
||||||
LLVMRuntimeDyld.lib;
|
LLVMBitReader.lib;
|
||||||
LLVMVectorize.lib;
|
LLVMBitstreamReader.lib;
|
||||||
LLVMX86CodeGen.lib;
|
LLVMBitWriter.lib;
|
||||||
LLVMGlobalISel.lib;
|
LLVMCFGuard.lib;
|
||||||
LLVMX86Disassembler.lib;
|
LLVMCFIVerify.lib;
|
||||||
LLVMExecutionEngine.lib;
|
LLVMCodeGen.lib;
|
||||||
LLVMAsmPrinter.lib;
|
LLVMCore.lib;
|
||||||
LLVMSelectionDAG.lib;
|
LLVMCoroutines.lib;
|
||||||
LLVMCodeGen.lib;
|
LLVMCoverage.lib;
|
||||||
LLVMScalarOpts.lib;
|
LLVMDebugInfoCodeView.lib;
|
||||||
LLVMInstCombine.lib;
|
LLVMDebuginfod.lib;
|
||||||
LLVMTransformUtils.lib;
|
LLVMDebugInfoDWARF.lib;
|
||||||
LLVMAnalysis.lib;
|
LLVMDebugInfoGSYM.lib;
|
||||||
LLVMTarget.lib;
|
LLVMDebugInfoLogicalView.lib;
|
||||||
LLVMX86Desc.lib;
|
LLVMDebugInfoMSF.lib;
|
||||||
LLVMObject.lib;
|
LLVMDebugInfoPDB.lib;
|
||||||
LLVMMCParser.lib;
|
LLVMDemangle.lib;
|
||||||
LLVMBitReader.lib;
|
LLVMDiff.lib;
|
||||||
LLVMCore.lib;
|
LLVMDlltoolDriver.lib;
|
||||||
LLVMMC.lib;
|
LLVMDWARFLinker.lib;
|
||||||
LLVMX86Info.lib;
|
LLVMDWARFLinkerParallel.lib;
|
||||||
LLVMSupport.lib;
|
LLVMDWP.lib;
|
||||||
LLVMMCDisassembler.lib;
|
LLVMExecutionEngine.lib;
|
||||||
LLVMipo.lib;
|
LLVMExegesis.lib;
|
||||||
LLVMBinaryFormat.lib;
|
LLVMExegesisX86.lib;
|
||||||
LLVMPasses.lib;
|
LLVMExtensions.lib;
|
||||||
LLVMIRReader.lib;
|
LLVMFileCheck.lib;
|
||||||
LLVMLinker.lib;
|
LLVMFrontendHLSL.lib;
|
||||||
LLVMAsmParser.lib;
|
LLVMFrontendOpenACC.lib;
|
||||||
LLVMX86AsmParser.lib;
|
LLVMFrontendOpenMP.lib;
|
||||||
LLVMDemangle.lib;
|
LLVMFuzzerCLI.lib;
|
||||||
LLVMDebugInfoDWARF.lib;
|
LLVMFuzzMutate.lib;
|
||||||
LLVMRemarks.lib;
|
LLVMGlobalISel.lib;
|
||||||
LLVMBitstreamReader.lib;
|
LLVMInstCombine.lib;
|
||||||
LLVMTextAPI.lib;
|
LLVMInstrumentation.lib;
|
||||||
LLVMCFGuard.lib;
|
LLVMIntelJITEvents.lib;
|
||||||
LLVMAggressiveInstCombine.lib;
|
LLVMInterfaceStub.lib;
|
||||||
LLVMBitWriter.lib;
|
LLVMInterpreter.lib;
|
||||||
LLVMCoroutines.lib;
|
LLVMipo.lib;
|
||||||
LLVMObjCARCOpts.lib;
|
LLVMIRPrinter.lib;
|
||||||
LLVMIntelJITEvents.lib;
|
LLVMIRReader.lib;
|
||||||
|
LLVMJITLink.lib;
|
||||||
|
LLVMLibDriver.lib;
|
||||||
|
LLVMLineEditor.lib;
|
||||||
|
LLVMLinker.lib;
|
||||||
|
LLVMLTO.lib;
|
||||||
|
LLVMMCA.lib;
|
||||||
|
LLVMMCDisassembler.lib;
|
||||||
|
LLVMMCJIT.lib;
|
||||||
|
LLVMMC.lib;
|
||||||
|
LLVMMCParser.lib;
|
||||||
|
LLVMMIRParser.lib;
|
||||||
|
LLVMObjCARCOpts.lib;
|
||||||
|
LLVMObjCopy.lib;
|
||||||
|
LLVMObject.lib;
|
||||||
|
LLVMObjectYAML.lib;
|
||||||
|
LLVMOption.lib;
|
||||||
|
LLVMOrcJIT.lib;
|
||||||
|
LLVMOrcShared.lib;
|
||||||
|
LLVMOrcTargetProcess.lib;
|
||||||
|
LLVMPasses.lib;
|
||||||
|
LLVMProfileData.lib;
|
||||||
|
LLVMRemarks.lib;
|
||||||
|
LLVMRuntimeDyld.lib;
|
||||||
|
LLVMScalarOpts.lib;
|
||||||
|
LLVMSelectionDAG.lib;
|
||||||
|
LLVMSupport.lib;
|
||||||
|
LLVMSymbolize.lib;
|
||||||
|
LLVMTableGenGlobalISel.lib;
|
||||||
|
LLVMTableGen.lib;
|
||||||
|
LLVMTarget.lib;
|
||||||
|
LLVMTargetParser.lib;
|
||||||
|
LLVMTextAPI.lib;
|
||||||
|
LLVMTransformUtils.lib;
|
||||||
|
LLVMVectorize.lib;
|
||||||
|
LLVMWindowsDriver.lib;
|
||||||
|
LLVMWindowsManifest.lib;
|
||||||
|
LLVMX86AsmParser.lib;
|
||||||
|
LLVMX86CodeGen.lib;
|
||||||
|
LLVMX86Desc.lib;
|
||||||
|
LLVMX86Disassembler.lib;
|
||||||
|
LLVMX86Info.lib;
|
||||||
|
LLVMX86TargetMCA.lib;
|
||||||
|
LLVMXRay.lib;
|
||||||
</AdditionalDependencies>
|
</AdditionalDependencies>
|
||||||
</Link>
|
</Link>
|
||||||
</ItemDefinitionGroup>
|
</ItemDefinitionGroup>
|
||||||
<ItemGroup />
|
<ItemGroup />
|
||||||
</Project>
|
</Project>
|
||||||
|
|
|
@ -17,54 +17,97 @@
|
||||||
<OptimizeReferences>true</OptimizeReferences>
|
<OptimizeReferences>true</OptimizeReferences>
|
||||||
<AdditionalLibraryDirectories Condition="'$(Configuration)'=='Release'">%(AdditionalLibraryDirectories);..\llvm_build\Release\lib</AdditionalLibraryDirectories>
|
<AdditionalLibraryDirectories Condition="'$(Configuration)'=='Release'">%(AdditionalLibraryDirectories);..\llvm_build\Release\lib</AdditionalLibraryDirectories>
|
||||||
<AdditionalDependencies>%(AdditionalDependencies);
|
<AdditionalDependencies>%(AdditionalDependencies);
|
||||||
LLVMProfileData.lib;
|
LLVMAggressiveInstCombine.lib;
|
||||||
LLVMDebugInfoCodeView.lib;
|
LLVMAnalysis.lib;
|
||||||
LLVMDebugInfoMSF.lib;
|
LLVMAsmParser.lib;
|
||||||
LLVMInstrumentation.lib;
|
LLVMAsmPrinter.lib;
|
||||||
LLVMMCJIT.lib;
|
LLVMBinaryFormat.lib;
|
||||||
LLVMRuntimeDyld.lib;
|
LLVMBitReader.lib;
|
||||||
LLVMVectorize.lib;
|
LLVMBitstreamReader.lib;
|
||||||
LLVMX86CodeGen.lib;
|
LLVMBitWriter.lib;
|
||||||
LLVMGlobalISel.lib;
|
LLVMCFGuard.lib;
|
||||||
LLVMX86Disassembler.lib;
|
LLVMCFIVerify.lib;
|
||||||
LLVMExecutionEngine.lib;
|
LLVMCodeGen.lib;
|
||||||
LLVMAsmPrinter.lib;
|
LLVMCore.lib;
|
||||||
LLVMSelectionDAG.lib;
|
LLVMCoroutines.lib;
|
||||||
LLVMCodeGen.lib;
|
LLVMCoverage.lib;
|
||||||
LLVMScalarOpts.lib;
|
LLVMDebugInfoCodeView.lib;
|
||||||
LLVMInstCombine.lib;
|
LLVMDebuginfod.lib;
|
||||||
LLVMTransformUtils.lib;
|
LLVMDebugInfoDWARF.lib;
|
||||||
LLVMAnalysis.lib;
|
LLVMDebugInfoGSYM.lib;
|
||||||
LLVMTarget.lib;
|
LLVMDebugInfoLogicalView.lib;
|
||||||
LLVMX86Desc.lib;
|
LLVMDebugInfoMSF.lib;
|
||||||
LLVMObject.lib;
|
LLVMDebugInfoPDB.lib;
|
||||||
LLVMMCParser.lib;
|
LLVMDemangle.lib;
|
||||||
LLVMBitReader.lib;
|
LLVMDiff.lib;
|
||||||
LLVMCore.lib;
|
LLVMDlltoolDriver.lib;
|
||||||
LLVMMC.lib;
|
LLVMDWARFLinker.lib;
|
||||||
LLVMX86Info.lib;
|
LLVMDWARFLinkerParallel.lib;
|
||||||
LLVMSupport.lib;
|
LLVMDWP.lib;
|
||||||
LLVMMCDisassembler.lib;
|
LLVMExecutionEngine.lib;
|
||||||
LLVMipo.lib;
|
LLVMExegesis.lib;
|
||||||
LLVMBinaryFormat.lib;
|
LLVMExegesisX86.lib;
|
||||||
LLVMPasses.lib;
|
LLVMExtensions.lib;
|
||||||
LLVMIRReader.lib;
|
LLVMFileCheck.lib;
|
||||||
LLVMLinker.lib;
|
LLVMFrontendHLSL.lib;
|
||||||
LLVMAsmParser.lib;
|
LLVMFrontendOpenACC.lib;
|
||||||
LLVMX86AsmParser.lib;
|
LLVMFrontendOpenMP.lib;
|
||||||
LLVMDemangle.lib;
|
LLVMFuzzerCLI.lib;
|
||||||
LLVMDebugInfoDWARF.lib;
|
LLVMFuzzMutate.lib;
|
||||||
LLVMRemarks.lib;
|
LLVMGlobalISel.lib;
|
||||||
LLVMBitstreamReader.lib;
|
LLVMInstCombine.lib;
|
||||||
LLVMTextAPI.lib;
|
LLVMInstrumentation.lib;
|
||||||
LLVMCFGuard.lib;
|
LLVMIntelJITEvents.lib;
|
||||||
LLVMAggressiveInstCombine.lib;
|
LLVMInterfaceStub.lib;
|
||||||
LLVMBitWriter.lib;
|
LLVMInterpreter.lib;
|
||||||
LLVMCoroutines.lib;
|
LLVMipo.lib;
|
||||||
LLVMObjCARCOpts.lib;
|
LLVMIRPrinter.lib;
|
||||||
LLVMIntelJITEvents.lib;
|
LLVMIRReader.lib;
|
||||||
|
LLVMJITLink.lib;
|
||||||
|
LLVMLibDriver.lib;
|
||||||
|
LLVMLineEditor.lib;
|
||||||
|
LLVMLinker.lib;
|
||||||
|
LLVMLTO.lib;
|
||||||
|
LLVMMCA.lib;
|
||||||
|
LLVMMCDisassembler.lib;
|
||||||
|
LLVMMCJIT.lib;
|
||||||
|
LLVMMC.lib;
|
||||||
|
LLVMMCParser.lib;
|
||||||
|
LLVMMIRParser.lib;
|
||||||
|
LLVMObjCARCOpts.lib;
|
||||||
|
LLVMObjCopy.lib;
|
||||||
|
LLVMObject.lib;
|
||||||
|
LLVMObjectYAML.lib;
|
||||||
|
LLVMOption.lib;
|
||||||
|
LLVMOrcJIT.lib;
|
||||||
|
LLVMOrcShared.lib;
|
||||||
|
LLVMOrcTargetProcess.lib;
|
||||||
|
LLVMPasses.lib;
|
||||||
|
LLVMProfileData.lib;
|
||||||
|
LLVMRemarks.lib;
|
||||||
|
LLVMRuntimeDyld.lib;
|
||||||
|
LLVMScalarOpts.lib;
|
||||||
|
LLVMSelectionDAG.lib;
|
||||||
|
LLVMSupport.lib;
|
||||||
|
LLVMSymbolize.lib;
|
||||||
|
LLVMTableGenGlobalISel.lib;
|
||||||
|
LLVMTableGen.lib;
|
||||||
|
LLVMTarget.lib;
|
||||||
|
LLVMTargetParser.lib;
|
||||||
|
LLVMTextAPI.lib;
|
||||||
|
LLVMTransformUtils.lib;
|
||||||
|
LLVMVectorize.lib;
|
||||||
|
LLVMWindowsDriver.lib;
|
||||||
|
LLVMWindowsManifest.lib;
|
||||||
|
LLVMX86AsmParser.lib;
|
||||||
|
LLVMX86CodeGen.lib;
|
||||||
|
LLVMX86Desc.lib;
|
||||||
|
LLVMX86Disassembler.lib;
|
||||||
|
LLVMX86Info.lib;
|
||||||
|
LLVMX86TargetMCA.lib;
|
||||||
|
LLVMXRay.lib;
|
||||||
</AdditionalDependencies>
|
</AdditionalDependencies>
|
||||||
</Link>
|
</Link>
|
||||||
</ItemDefinitionGroup>
|
</ItemDefinitionGroup>
|
||||||
<ItemGroup />
|
<ItemGroup />
|
||||||
</Project>
|
</Project>
|
||||||
|
|
2
llvm
2
llvm
|
@ -1 +1 @@
|
||||||
Subproject commit 9b52b6c39ae9f0759fbce7dd0db4b3290d6ebc56
|
Subproject commit 89d5468e9505ddb04754eadbfed526f5b6ad4cbd
|
|
@ -75,6 +75,14 @@ cpu_translator::cpu_translator(llvm::Module* _module, bool is_be)
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
register_intrinsic("any_select_by_bit4", [&](llvm::CallInst* ci) -> llvm::Value*
|
||||||
|
{
|
||||||
|
const auto s = bitcast<s8[16]>(m_ir->CreateShl(bitcast<u64[2]>(ci->getOperand(0)), 3));;
|
||||||
|
const auto a = bitcast<u8[16]>(ci->getOperand(1));
|
||||||
|
const auto b = bitcast<u8[16]>(ci->getOperand(2));
|
||||||
|
return m_ir->CreateSelect(m_ir->CreateICmpSLT(s, llvm::ConstantAggregateZero::get(get_type<s8[16]>())), b, a);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void cpu_translator::initialize(llvm::LLVMContext& context, llvm::ExecutionEngine& engine)
|
void cpu_translator::initialize(llvm::LLVMContext& context, llvm::ExecutionEngine& engine)
|
||||||
|
@ -112,6 +120,8 @@ void cpu_translator::initialize(llvm::LLVMContext& context, llvm::ExecutionEngin
|
||||||
cpu == "broadwell" ||
|
cpu == "broadwell" ||
|
||||||
cpu == "skylake" ||
|
cpu == "skylake" ||
|
||||||
cpu == "alderlake" ||
|
cpu == "alderlake" ||
|
||||||
|
cpu == "raptorlake" ||
|
||||||
|
cpu == "meteorlake" ||
|
||||||
cpu == "bdver2" ||
|
cpu == "bdver2" ||
|
||||||
cpu == "bdver3" ||
|
cpu == "bdver3" ||
|
||||||
cpu == "bdver4" ||
|
cpu == "bdver4" ||
|
||||||
|
@ -135,7 +145,9 @@ void cpu_translator::initialize(llvm::LLVMContext& context, llvm::ExecutionEngin
|
||||||
// Test VNNI feature (TODO)
|
// Test VNNI feature (TODO)
|
||||||
if (cpu == "cascadelake" ||
|
if (cpu == "cascadelake" ||
|
||||||
cpu == "cooperlake" ||
|
cpu == "cooperlake" ||
|
||||||
cpu == "alderlake")
|
cpu == "alderlake" ||
|
||||||
|
cpu == "raptorlake" ||
|
||||||
|
cpu == "meteorlake")
|
||||||
{
|
{
|
||||||
m_use_vnni = true;
|
m_use_vnni = true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,9 @@
|
||||||
#include "llvm/IR/IRBuilder.h"
|
#include "llvm/IR/IRBuilder.h"
|
||||||
#include "llvm/IR/Module.h"
|
#include "llvm/IR/Module.h"
|
||||||
#include "llvm/Target/TargetMachine.h"
|
#include "llvm/Target/TargetMachine.h"
|
||||||
|
#include "llvm/Support/KnownBits.h"
|
||||||
#include "llvm/Analysis/ConstantFolding.h"
|
#include "llvm/Analysis/ConstantFolding.h"
|
||||||
|
#include "llvm/Analysis/ValueTracking.h"
|
||||||
#include "llvm/IR/IntrinsicsX86.h"
|
#include "llvm/IR/IntrinsicsX86.h"
|
||||||
#include "llvm/IR/IntrinsicsAArch64.h"
|
#include "llvm/IR/IntrinsicsAArch64.h"
|
||||||
|
|
||||||
|
@ -59,6 +61,62 @@ concept DSLValue = requires (T& v)
|
||||||
{ v.eval(std::declval<llvm::IRBuilder<>*>()) } -> LLVMValue;
|
{ v.eval(std::declval<llvm::IRBuilder<>*>()) } -> LLVMValue;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <usz N>
|
||||||
|
struct get_int_bits
|
||||||
|
{
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct get_int_bits<1>
|
||||||
|
{
|
||||||
|
using utype = bool;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct get_int_bits<2>
|
||||||
|
{
|
||||||
|
using utype = i2;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct get_int_bits<4>
|
||||||
|
{
|
||||||
|
using utype = i4;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct get_int_bits<8>
|
||||||
|
{
|
||||||
|
using utype = u8;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct get_int_bits<16>
|
||||||
|
{
|
||||||
|
using utype = u16;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct get_int_bits<32>
|
||||||
|
{
|
||||||
|
using utype = u32;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct get_int_bits<64>
|
||||||
|
{
|
||||||
|
using utype = u64;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct get_int_bits<128>
|
||||||
|
{
|
||||||
|
using utype = u128;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <usz Bits>
|
||||||
|
using get_int_vt = typename get_int_bits<Bits>::utype;
|
||||||
|
|
||||||
template <typename T = void>
|
template <typename T = void>
|
||||||
struct llvm_value_t
|
struct llvm_value_t
|
||||||
{
|
{
|
||||||
|
@ -3292,10 +3350,41 @@ public:
|
||||||
|
|
||||||
// Infinite-precision shift left
|
// Infinite-precision shift left
|
||||||
template <typename T, typename U, typename CT = llvm_common_t<T, U>>
|
template <typename T, typename U, typename CT = llvm_common_t<T, U>>
|
||||||
static auto inf_shl(T&& a, U&& b)
|
auto inf_shl(T&& a, U&& b)
|
||||||
{
|
{
|
||||||
static constexpr u32 esz = llvm_value_t<CT>::esize;
|
static constexpr u32 esz = llvm_value_t<CT>::esize;
|
||||||
|
|
||||||
|
if constexpr (esz == 32)
|
||||||
|
{
|
||||||
|
#if defined(ARCH_X64)
|
||||||
|
if (m_use_fma && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
|
||||||
|
return eval(llvm_calli<CT, T, U>{"llvm.x86.avx2.psllv.d", {std::forward<T>(a), std::forward<U>(b)}});
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
if constexpr (esz == 16)
|
||||||
|
{
|
||||||
|
#if defined(ARCH_X64)
|
||||||
|
if (m_use_avx512 && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
|
||||||
|
return eval(llvm_calli<CT, T, U>{"llvm.x86.avx512.psllv.w.128", {std::forward<T>(a), std::forward<U>(b)}});
|
||||||
|
|
||||||
|
if (m_use_fma && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
|
||||||
|
{
|
||||||
|
using t32 = value_t<u32[4]>;
|
||||||
|
auto a32 = eval(bitcast<u32[4]>(std::forward<T>(a)));
|
||||||
|
auto b32 = eval(bitcast<u32[4]>(std::forward<U>(b)));
|
||||||
|
auto sizeL = eval(b32 & 0xffff);
|
||||||
|
auto sizeH = eval(b32 >> 16);
|
||||||
|
auto dataL = eval(llvm_calli<u32[4], t32, t32>{"llvm.x86.avx2.psllv.d", {a32, sizeL}});
|
||||||
|
auto dataH = eval(llvm_calli<u32[4], t32, t32>{"llvm.x86.avx2.psllv.d", {eval(a32 & 0xffff0000), sizeH}});
|
||||||
|
return eval(bitcast<CT>((dataL & 0xffff) | dataH));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
return eval(select(b < esz, a << b, splat<CT>(0)));
|
||||||
|
|
||||||
|
/*
|
||||||
return expr(select(b < esz, a << b, splat<CT>(0)), [](llvm::Value*& value, llvm::Module* _m) -> llvm_match_tuple<T, U>
|
return expr(select(b < esz, a << b, splat<CT>(0)), [](llvm::Value*& value, llvm::Module* _m) -> llvm_match_tuple<T, U>
|
||||||
{
|
{
|
||||||
static const auto M = match<CT>();
|
static const auto M = match<CT>();
|
||||||
|
@ -3314,14 +3403,46 @@ public:
|
||||||
value = nullptr;
|
value = nullptr;
|
||||||
return {};
|
return {};
|
||||||
});
|
});
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
// Infinite-precision logical shift right (unsigned)
|
// Infinite-precision logical shift right (unsigned)
|
||||||
template <typename T, typename U, typename CT = llvm_common_t<T, U>>
|
template <typename T, typename U, typename CT = llvm_common_t<T, U>>
|
||||||
static auto inf_lshr(T&& a, U&& b)
|
auto inf_lshr(T&& a, U&& b)
|
||||||
{
|
{
|
||||||
static constexpr u32 esz = llvm_value_t<CT>::esize;
|
static constexpr u32 esz = llvm_value_t<CT>::esize;
|
||||||
|
|
||||||
|
if constexpr (esz == 32)
|
||||||
|
{
|
||||||
|
#if defined(ARCH_X64)
|
||||||
|
if (m_use_fma && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
|
||||||
|
return eval(llvm_calli<CT, T, U>{"llvm.x86.avx2.psrlv.d", {std::forward<T>(a), std::forward<U>(b)}});
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
if constexpr (esz == 16)
|
||||||
|
{
|
||||||
|
#if defined(ARCH_X64)
|
||||||
|
if (m_use_avx512 && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
|
||||||
|
return eval(llvm_calli<CT, T, U>{"llvm.x86.avx512.psrlv.w.128", {std::forward<T>(a), std::forward<U>(b)}});
|
||||||
|
|
||||||
|
if (m_use_fma && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
|
||||||
|
{
|
||||||
|
using t32 = value_t<u32[4]>;
|
||||||
|
auto a32 = eval(bitcast<u32[4]>(std::forward<T>(a)));
|
||||||
|
auto b32 = eval(bitcast<u32[4]>(std::forward<U>(b)));
|
||||||
|
auto sizeL = eval(b32 & 0xffff);
|
||||||
|
auto sizeH = eval(b32 >> 16);
|
||||||
|
auto dataL = eval(llvm_calli<u32[4], t32, t32>{"llvm.x86.avx2.psrlv.d", {eval(a32 & 0xffff), sizeL}});
|
||||||
|
auto dataH = eval(llvm_calli<u32[4], t32, t32>{"llvm.x86.avx2.psrlv.d", {a32, sizeH}});
|
||||||
|
return eval(bitcast<CT>(dataL | (dataH & 0xffff0000)));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
return eval(select(b < esz, a >> b, splat<CT>(0)));
|
||||||
|
|
||||||
|
/*
|
||||||
return expr(select(b < esz, a >> b, splat<CT>(0)), [](llvm::Value*& value, llvm::Module* _m) -> llvm_match_tuple<T, U>
|
return expr(select(b < esz, a >> b, splat<CT>(0)), [](llvm::Value*& value, llvm::Module* _m) -> llvm_match_tuple<T, U>
|
||||||
{
|
{
|
||||||
static const auto M = match<CT>();
|
static const auto M = match<CT>();
|
||||||
|
@ -3340,14 +3461,46 @@ public:
|
||||||
value = nullptr;
|
value = nullptr;
|
||||||
return {};
|
return {};
|
||||||
});
|
});
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
// Infinite-precision arithmetic shift right (signed)
|
// Infinite-precision arithmetic shift right (signed)
|
||||||
template <typename T, typename U, typename CT = llvm_common_t<T, U>>
|
template <typename T, typename U, typename CT = llvm_common_t<T, U>>
|
||||||
static auto inf_ashr(T&& a, U&& b)
|
auto inf_ashr(T&& a, U&& b)
|
||||||
{
|
{
|
||||||
static constexpr u32 esz = llvm_value_t<CT>::esize;
|
static constexpr u32 esz = llvm_value_t<CT>::esize;
|
||||||
|
|
||||||
|
if constexpr (esz == 32)
|
||||||
|
{
|
||||||
|
#if defined(ARCH_X64)
|
||||||
|
if (m_use_fma && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
|
||||||
|
return eval(llvm_calli<CT, T, U>{"llvm.x86.avx2.psrav.d", {std::forward<T>(a), std::forward<U>(b)}});
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
if constexpr (esz == 16)
|
||||||
|
{
|
||||||
|
#if defined(ARCH_X64)
|
||||||
|
if (m_use_avx512 && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
|
||||||
|
return eval(llvm_calli<CT, T, U>{"llvm.x86.avx512.psrav.w.128", {std::forward<T>(a), std::forward<U>(b)}});
|
||||||
|
|
||||||
|
if (m_use_fma && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
|
||||||
|
{
|
||||||
|
using t32 = value_t<u32[4]>;
|
||||||
|
auto a32 = eval(bitcast<u32[4]>(std::forward<T>(a)));
|
||||||
|
auto b32 = eval(bitcast<u32[4]>(std::forward<U>(b)));
|
||||||
|
auto sizeL = eval(b32 & 0xffff);
|
||||||
|
auto sizeH = eval(b32 >> 16);
|
||||||
|
auto dataL = eval(llvm_calli<u32[4], t32, t32>{"llvm.x86.avx2.psrav.d", {eval(a32 << 16), sizeL}});
|
||||||
|
auto dataH = eval(llvm_calli<u32[4], t32, t32>{"llvm.x86.avx2.psrav.d", {a32, sizeH}});
|
||||||
|
return eval(bitcast<CT>((dataL >> 16) | (dataH & 0xffff0000)));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
return eval(a >> select(b > (esz - 1), splat<CT>(esz - 1), b));
|
||||||
|
|
||||||
|
/*
|
||||||
return expr(a >> select(b > (esz - 1), splat<CT>(esz - 1), b), [](llvm::Value*& value, llvm::Module* _m) -> llvm_match_tuple<T, U>
|
return expr(a >> select(b > (esz - 1), splat<CT>(esz - 1), b), [](llvm::Value*& value, llvm::Module* _m) -> llvm_match_tuple<T, U>
|
||||||
{
|
{
|
||||||
static const auto M = match<CT>();
|
static const auto M = match<CT>();
|
||||||
|
@ -3366,6 +3519,7 @@ public:
|
||||||
value = nullptr;
|
value = nullptr;
|
||||||
return {};
|
return {};
|
||||||
});
|
});
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename... Types>
|
template <typename... Types>
|
||||||
|
@ -3567,6 +3721,18 @@ public:
|
||||||
template <typename T = v128>
|
template <typename T = v128>
|
||||||
llvm::Constant* make_const_vector(T, llvm::Type*, u32 = __builtin_LINE());
|
llvm::Constant* make_const_vector(T, llvm::Type*, u32 = __builtin_LINE());
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
llvm::KnownBits get_known_bits(T a)
|
||||||
|
{
|
||||||
|
return llvm::computeKnownBits(a.eval(m_ir), m_module->getDataLayout());
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
llvm::KnownBits kbc(T value)
|
||||||
|
{
|
||||||
|
return llvm::KnownBits::makeConstant(llvm::APInt(sizeof(T) * 8, u64(value)));
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Custom intrinsic table
|
// Custom intrinsic table
|
||||||
std::unordered_map<std::string_view, std::function<llvm::Value*(llvm::CallInst*)>> m_intrinsics;
|
std::unordered_map<std::string_view, std::function<llvm::Value*(llvm::CallInst*)>> m_intrinsics;
|
||||||
|
@ -3647,6 +3813,13 @@ public:
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// (m << 3) >= 0 ? a : b
|
||||||
|
template <typename T, typename U, typename V>
|
||||||
|
static auto select_by_bit4(T&& m, U&& a, V&& b)
|
||||||
|
{
|
||||||
|
return llvm_calli<u8[16], T, U, V>{"any_select_by_bit4", {std::forward<T>(m), std::forward<U>(a), std::forward<V>(b)}};
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T, typename = std::enable_if_t<std::is_same_v<llvm_common_t<T>, f32[4]>>>
|
template <typename T, typename = std::enable_if_t<std::is_same_v<llvm_common_t<T>, f32[4]>>>
|
||||||
static auto fre(T&& a)
|
static auto fre(T&& a)
|
||||||
{
|
{
|
||||||
|
|
|
@ -2130,10 +2130,10 @@ static void ppu_check(ppu_thread& ppu, u64 addr)
|
||||||
{
|
{
|
||||||
ppu.cia = ::narrow<u32>(addr);
|
ppu.cia = ::narrow<u32>(addr);
|
||||||
|
|
||||||
|
// ppu_check() shall not return directly
|
||||||
if (ppu.test_stopped())
|
if (ppu.test_stopped())
|
||||||
{
|
;
|
||||||
return;
|
ppu_escape(&ppu);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ppu_trace(u64 addr)
|
static void ppu_trace(u64 addr)
|
||||||
|
@ -3368,13 +3368,6 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
|
||||||
{
|
{
|
||||||
std::unordered_map<std::string, u64> link_table
|
std::unordered_map<std::string, u64> link_table
|
||||||
{
|
{
|
||||||
{ "sys_game_watchdog_start", reinterpret_cast<u64>(ppu_execute_syscall) },
|
|
||||||
{ "sys_game_watchdog_stop", reinterpret_cast<u64>(ppu_execute_syscall) },
|
|
||||||
{ "sys_game_watchdog_clear", reinterpret_cast<u64>(ppu_execute_syscall) },
|
|
||||||
{ "sys_game_get_system_sw_version", reinterpret_cast<u64>(ppu_execute_syscall) },
|
|
||||||
{ "sys_game_board_storage_read", reinterpret_cast<u64>(ppu_execute_syscall) },
|
|
||||||
{ "sys_game_board_storage_write", reinterpret_cast<u64>(ppu_execute_syscall) },
|
|
||||||
{ "sys_game_get_rtc_status", reinterpret_cast<u64>(ppu_execute_syscall) },
|
|
||||||
{ "__trap", reinterpret_cast<u64>(&ppu_trap) },
|
{ "__trap", reinterpret_cast<u64>(&ppu_trap) },
|
||||||
{ "__error", reinterpret_cast<u64>(&ppu_error) },
|
{ "__error", reinterpret_cast<u64>(&ppu_error) },
|
||||||
{ "__check", reinterpret_cast<u64>(&ppu_check) },
|
{ "__check", reinterpret_cast<u64>(&ppu_check) },
|
||||||
|
@ -3388,6 +3381,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
|
||||||
{ "__dcbz", reinterpret_cast<u64>(+[](u32 addr){ alignas(64) static constexpr u8 z[128]{}; do_cell_atomic_128_store(addr, z); }) },
|
{ "__dcbz", reinterpret_cast<u64>(+[](u32 addr){ alignas(64) static constexpr u8 z[128]{}; do_cell_atomic_128_store(addr, z); }) },
|
||||||
{ "__resupdate", reinterpret_cast<u64>(vm::reservation_update) },
|
{ "__resupdate", reinterpret_cast<u64>(vm::reservation_update) },
|
||||||
{ "__resinterp", reinterpret_cast<u64>(ppu_reservation_fallback) },
|
{ "__resinterp", reinterpret_cast<u64>(ppu_reservation_fallback) },
|
||||||
|
{ "__escape", reinterpret_cast<u64>(+ppu_escape) },
|
||||||
};
|
};
|
||||||
|
|
||||||
for (u64 index = 0; index < 1024; index++)
|
for (u64 index = 0; index < 1024; index++)
|
||||||
|
@ -3943,12 +3937,7 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co
|
||||||
std::unique_ptr<Module> _module = std::make_unique<Module>(obj_name, jit.get_context());
|
std::unique_ptr<Module> _module = std::make_unique<Module>(obj_name, jit.get_context());
|
||||||
|
|
||||||
// Initialize target
|
// Initialize target
|
||||||
#if defined(__APPLE__) && defined(ARCH_ARM64)
|
|
||||||
// Force target linux on macOS arm64 to bypass some 64-bit address space linking issues
|
|
||||||
_module->setTargetTriple(Triple::normalize(utils::c_llvm_default_triple));
|
|
||||||
#else
|
|
||||||
_module->setTargetTriple(Triple::normalize(sys::getProcessTriple()));
|
_module->setTargetTriple(Triple::normalize(sys::getProcessTriple()));
|
||||||
#endif
|
|
||||||
_module->setDataLayout(jit.get_engine().getTargetMachine()->createDataLayout());
|
_module->setDataLayout(jit.get_engine().getTargetMachine()->createDataLayout());
|
||||||
|
|
||||||
// Initialize translator
|
// Initialize translator
|
||||||
|
@ -3978,6 +3967,11 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
|
if (g_cfg.core.ppu_debug)
|
||||||
|
{
|
||||||
|
translator.build_interpreter();
|
||||||
|
}
|
||||||
|
|
||||||
legacy::FunctionPassManager pm(_module.get());
|
legacy::FunctionPassManager pm(_module.get());
|
||||||
|
|
||||||
// Basic optimizations
|
// Basic optimizations
|
||||||
|
|
|
@ -200,7 +200,7 @@ Function* PPUTranslator::Translate(const ppu_function& info)
|
||||||
|
|
||||||
// Create tail call to the check function
|
// Create tail call to the check function
|
||||||
m_ir->SetInsertPoint(vcheck);
|
m_ir->SetInsertPoint(vcheck);
|
||||||
Call(GetType<void>(), "__check", m_thread, GetAddr());
|
Call(GetType<void>(), "__check", m_thread, GetAddr())->setTailCall();
|
||||||
m_ir->CreateRetVoid();
|
m_ir->CreateRetVoid();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -604,12 +604,12 @@ Value* PPUTranslator::ReadMemory(Value* addr, Type* type, bool is_be, u32 align)
|
||||||
{
|
{
|
||||||
// Read, byteswap, bitcast
|
// Read, byteswap, bitcast
|
||||||
const auto int_type = m_ir->getIntNTy(size);
|
const auto int_type = m_ir->getIntNTy(size);
|
||||||
const auto value = m_ir->CreateAlignedLoad(int_type, GetMemory(addr, int_type), llvm::MaybeAlign{align}, true);
|
const auto value = m_ir->CreateAlignedLoad(int_type, GetMemory(addr, int_type), llvm::MaybeAlign{align});
|
||||||
return bitcast(Call(int_type, fmt::format("llvm.bswap.i%u", size), value), type);
|
return bitcast(Call(int_type, fmt::format("llvm.bswap.i%u", size), value), type);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read normally
|
// Read normally
|
||||||
return m_ir->CreateAlignedLoad(type, GetMemory(addr, type), llvm::MaybeAlign{align}, true);
|
return m_ir->CreateAlignedLoad(type, GetMemory(addr, type), llvm::MaybeAlign{align});
|
||||||
}
|
}
|
||||||
|
|
||||||
void PPUTranslator::WriteMemory(Value* addr, Value* value, bool is_be, u32 align)
|
void PPUTranslator::WriteMemory(Value* addr, Value* value, bool is_be, u32 align)
|
||||||
|
@ -625,7 +625,7 @@ void PPUTranslator::WriteMemory(Value* addr, Value* value, bool is_be, u32 align
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write
|
// Write
|
||||||
m_ir->CreateAlignedStore(value, GetMemory(addr, value->getType()), llvm::MaybeAlign{align}, true);
|
m_ir->CreateAlignedStore(value, GetMemory(addr, value->getType()), llvm::MaybeAlign{align});
|
||||||
}
|
}
|
||||||
|
|
||||||
void PPUTranslator::CompilationError(const std::string& error)
|
void PPUTranslator::CompilationError(const std::string& error)
|
||||||
|
@ -1945,12 +1945,14 @@ void PPUTranslator::SC(ppu_opcode_t op)
|
||||||
if (index < 1024)
|
if (index < 1024)
|
||||||
{
|
{
|
||||||
Call(GetType<void>(), fmt::format("%s", ppu_syscall_code(index)), m_thread);
|
Call(GetType<void>(), fmt::format("%s", ppu_syscall_code(index)), m_thread);
|
||||||
|
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
|
||||||
m_ir->CreateRetVoid();
|
m_ir->CreateRetVoid();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Call(GetType<void>(), op.lev ? "__lv1call" : "__syscall", m_thread, num);
|
Call(GetType<void>(), op.lev ? "__lv1call" : "__syscall", m_thread, num);
|
||||||
|
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
|
||||||
m_ir->CreateRetVoid();
|
m_ir->CreateRetVoid();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2507,6 +2509,7 @@ void PPUTranslator::LWARX(ppu_opcode_t op)
|
||||||
RegStore(Trunc(GetAddr()), m_cia);
|
RegStore(Trunc(GetAddr()), m_cia);
|
||||||
FlushRegisters();
|
FlushRegisters();
|
||||||
Call(GetType<void>(), "__resinterp", m_thread);
|
Call(GetType<void>(), "__resinterp", m_thread);
|
||||||
|
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
|
||||||
m_ir->CreateRetVoid();
|
m_ir->CreateRetVoid();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -2649,6 +2652,7 @@ void PPUTranslator::LDARX(ppu_opcode_t op)
|
||||||
RegStore(Trunc(GetAddr()), m_cia);
|
RegStore(Trunc(GetAddr()), m_cia);
|
||||||
FlushRegisters();
|
FlushRegisters();
|
||||||
Call(GetType<void>(), "__resinterp", m_thread);
|
Call(GetType<void>(), "__resinterp", m_thread);
|
||||||
|
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
|
||||||
m_ir->CreateRetVoid();
|
m_ir->CreateRetVoid();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -2786,11 +2790,7 @@ void PPUTranslator::MTOCRF(ppu_opcode_t op)
|
||||||
const auto index = m_ir->CreateAnd(m_ir->CreateLShr(value, 28 - i * 4), 15);
|
const auto index = m_ir->CreateAnd(m_ir->CreateLShr(value, 28 - i * 4), 15);
|
||||||
const auto src = m_ir->CreateGEP(dyn_cast<GlobalVariable>(m_mtocr_table)->getValueType(), m_mtocr_table, {m_ir->getInt32(0), m_ir->CreateShl(index, 2)});
|
const auto src = m_ir->CreateGEP(dyn_cast<GlobalVariable>(m_mtocr_table)->getValueType(), m_mtocr_table, {m_ir->getInt32(0), m_ir->CreateShl(index, 2)});
|
||||||
const auto dst = bitcast(m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(m_cr - m_locals) + i * 4), GetType<u8*>());
|
const auto dst = bitcast(m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(m_cr - m_locals) + i * 4), GetType<u8*>());
|
||||||
#if LLVM_VERSION_MAJOR < 15
|
|
||||||
Call(GetType<void>(), "llvm.memcpy.p0i8.p0i8.i32", dst, src, m_ir->getInt32(4), m_ir->getFalse());
|
|
||||||
#else
|
|
||||||
Call(GetType<void>(), "llvm.memcpy.p0.p0.i32", dst, src, m_ir->getInt32(4), m_ir->getFalse());
|
Call(GetType<void>(), "llvm.memcpy.p0.p0.i32", dst, src, m_ir->getInt32(4), m_ir->getFalse());
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3313,7 +3313,7 @@ void PPUTranslator::STVLX(ppu_opcode_t op)
|
||||||
const auto mask = bitcast<bool[16]>(splat<u16>(0xffff) << trunc<u16>(value<u64>(addr) & 0xf));
|
const auto mask = bitcast<bool[16]>(splat<u16>(0xffff) << trunc<u16>(value<u64>(addr) & 0xf));
|
||||||
const auto ptr = value<u8(*)[16]>(GetMemory(m_ir->CreateAnd(addr, ~0xfull), GetType<u8[16]>()));
|
const auto ptr = value<u8(*)[16]>(GetMemory(m_ir->CreateAnd(addr, ~0xfull), GetType<u8[16]>()));
|
||||||
const auto align = splat<u32>(16);
|
const auto align = splat<u32>(16);
|
||||||
eval(llvm_calli<void, decltype(data), decltype(ptr), decltype(align), decltype(mask)>{"llvm.masked.store.v16i8.p0v16i8", {data, ptr, align, mask}});
|
eval(llvm_calli<void, decltype(data), decltype(ptr), decltype(align), decltype(mask)>{"llvm.masked.store.v16i8.p0", {data, ptr, align, mask}});
|
||||||
}
|
}
|
||||||
|
|
||||||
void PPUTranslator::STDBRX(ppu_opcode_t op)
|
void PPUTranslator::STDBRX(ppu_opcode_t op)
|
||||||
|
@ -3343,7 +3343,7 @@ void PPUTranslator::STVRX(ppu_opcode_t op)
|
||||||
const auto mask = bitcast<bool[16]>(trunc<u16>(splat<u64>(0xffff) << (value<u64>(addr) & 0xf) >> 16));
|
const auto mask = bitcast<bool[16]>(trunc<u16>(splat<u64>(0xffff) << (value<u64>(addr) & 0xf) >> 16));
|
||||||
const auto ptr = value<u8(*)[16]>(GetMemory(m_ir->CreateAnd(addr, ~0xfull), GetType<u8[16]>()));
|
const auto ptr = value<u8(*)[16]>(GetMemory(m_ir->CreateAnd(addr, ~0xfull), GetType<u8[16]>()));
|
||||||
const auto align = splat<u32>(16);
|
const auto align = splat<u32>(16);
|
||||||
eval(llvm_calli<void, decltype(data), decltype(ptr), decltype(align), decltype(mask)>{"llvm.masked.store.v16i8.p0v16i8", {data, ptr, align, mask}});
|
eval(llvm_calli<void, decltype(data), decltype(ptr), decltype(align), decltype(mask)>{"llvm.masked.store.v16i8.p0", {data, ptr, align, mask}});
|
||||||
}
|
}
|
||||||
|
|
||||||
void PPUTranslator::STFSUX(ppu_opcode_t op)
|
void PPUTranslator::STFSUX(ppu_opcode_t op)
|
||||||
|
@ -3524,7 +3524,7 @@ void PPUTranslator::DCBZ(ppu_opcode_t op)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
Call(GetType<void>(), "llvm.memset.p0i8.i32", GetMemory(addr, GetType<u8>()), m_ir->getInt8(0), m_ir->getInt32(128), m_ir->getTrue());
|
Call(GetType<void>(), "llvm.memset.p0.i32", GetMemory(addr, GetType<u8>()), m_ir->getInt8(0), m_ir->getInt32(128), m_ir->getFalse());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4601,6 +4601,7 @@ void PPUTranslator::UNK(ppu_opcode_t op)
|
||||||
{
|
{
|
||||||
FlushRegisters();
|
FlushRegisters();
|
||||||
Call(GetType<void>(), "__error", m_thread, GetAddr(), m_ir->getInt32(op.opcode));
|
Call(GetType<void>(), "__error", m_thread, GetAddr(), m_ir->getInt32(op.opcode));
|
||||||
|
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
|
||||||
m_ir->CreateRetVoid();
|
m_ir->CreateRetVoid();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4862,6 +4863,7 @@ Value* PPUTranslator::CheckTrapCondition(u32 to, Value* left, Value* right)
|
||||||
void PPUTranslator::Trap()
|
void PPUTranslator::Trap()
|
||||||
{
|
{
|
||||||
Call(GetType<void>(), "__trap", m_thread, GetAddr());
|
Call(GetType<void>(), "__trap", m_thread, GetAddr());
|
||||||
|
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
|
||||||
m_ir->CreateRetVoid();
|
m_ir->CreateRetVoid();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4909,4 +4911,184 @@ MDNode* PPUTranslator::CheckBranchProbability(u32 bo)
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void PPUTranslator::build_interpreter()
|
||||||
|
{
|
||||||
|
#define BUILD_VEC_INST(i) { \
|
||||||
|
m_function = llvm::cast<llvm::Function>(m_module->getOrInsertFunction("op_" #i, get_type<void>(), m_thread_type->getPointerTo()).getCallee()); \
|
||||||
|
std::fill(std::begin(m_globals), std::end(m_globals), nullptr); \
|
||||||
|
std::fill(std::begin(m_locals), std::end(m_locals), nullptr); \
|
||||||
|
IRBuilder<> irb(BasicBlock::Create(m_context, "__entry", m_function)); \
|
||||||
|
m_ir = &irb; \
|
||||||
|
m_thread = m_function->getArg(0); \
|
||||||
|
ppu_opcode_t op{}; \
|
||||||
|
op.vd = 0; \
|
||||||
|
op.va = 1; \
|
||||||
|
op.vb = 2; \
|
||||||
|
op.vc = 3; \
|
||||||
|
this->i(op); \
|
||||||
|
FlushRegisters(); \
|
||||||
|
m_ir->CreateRetVoid(); \
|
||||||
|
replace_intrinsics(*m_function); \
|
||||||
|
}
|
||||||
|
|
||||||
|
BUILD_VEC_INST(VADDCUW);
|
||||||
|
BUILD_VEC_INST(VADDFP);
|
||||||
|
BUILD_VEC_INST(VADDSBS);
|
||||||
|
BUILD_VEC_INST(VADDSHS);
|
||||||
|
BUILD_VEC_INST(VADDSWS);
|
||||||
|
BUILD_VEC_INST(VADDUBM);
|
||||||
|
BUILD_VEC_INST(VADDUBS);
|
||||||
|
BUILD_VEC_INST(VADDUHM);
|
||||||
|
BUILD_VEC_INST(VADDUHS);
|
||||||
|
BUILD_VEC_INST(VADDUWM);
|
||||||
|
BUILD_VEC_INST(VADDUWS);
|
||||||
|
BUILD_VEC_INST(VAND);
|
||||||
|
BUILD_VEC_INST(VANDC);
|
||||||
|
BUILD_VEC_INST(VAVGSB);
|
||||||
|
BUILD_VEC_INST(VAVGSH);
|
||||||
|
BUILD_VEC_INST(VAVGSW);
|
||||||
|
BUILD_VEC_INST(VAVGUB);
|
||||||
|
BUILD_VEC_INST(VAVGUH);
|
||||||
|
BUILD_VEC_INST(VAVGUW);
|
||||||
|
BUILD_VEC_INST(VCFSX);
|
||||||
|
BUILD_VEC_INST(VCFUX);
|
||||||
|
BUILD_VEC_INST(VCMPBFP);
|
||||||
|
BUILD_VEC_INST(VCMPBFP_);
|
||||||
|
BUILD_VEC_INST(VCMPEQFP);
|
||||||
|
BUILD_VEC_INST(VCMPEQFP_);
|
||||||
|
BUILD_VEC_INST(VCMPEQUB);
|
||||||
|
BUILD_VEC_INST(VCMPEQUB_);
|
||||||
|
BUILD_VEC_INST(VCMPEQUH);
|
||||||
|
BUILD_VEC_INST(VCMPEQUH_);
|
||||||
|
BUILD_VEC_INST(VCMPEQUW);
|
||||||
|
BUILD_VEC_INST(VCMPEQUW_);
|
||||||
|
BUILD_VEC_INST(VCMPGEFP);
|
||||||
|
BUILD_VEC_INST(VCMPGEFP_);
|
||||||
|
BUILD_VEC_INST(VCMPGTFP);
|
||||||
|
BUILD_VEC_INST(VCMPGTFP_);
|
||||||
|
BUILD_VEC_INST(VCMPGTSB);
|
||||||
|
BUILD_VEC_INST(VCMPGTSB_);
|
||||||
|
BUILD_VEC_INST(VCMPGTSH);
|
||||||
|
BUILD_VEC_INST(VCMPGTSH_);
|
||||||
|
BUILD_VEC_INST(VCMPGTSW);
|
||||||
|
BUILD_VEC_INST(VCMPGTSW_);
|
||||||
|
BUILD_VEC_INST(VCMPGTUB);
|
||||||
|
BUILD_VEC_INST(VCMPGTUB_);
|
||||||
|
BUILD_VEC_INST(VCMPGTUH);
|
||||||
|
BUILD_VEC_INST(VCMPGTUH_);
|
||||||
|
BUILD_VEC_INST(VCMPGTUW);
|
||||||
|
BUILD_VEC_INST(VCMPGTUW_);
|
||||||
|
BUILD_VEC_INST(VCTSXS);
|
||||||
|
BUILD_VEC_INST(VCTUXS);
|
||||||
|
BUILD_VEC_INST(VEXPTEFP);
|
||||||
|
BUILD_VEC_INST(VLOGEFP);
|
||||||
|
BUILD_VEC_INST(VMADDFP);
|
||||||
|
BUILD_VEC_INST(VMAXFP);
|
||||||
|
BUILD_VEC_INST(VMAXSB);
|
||||||
|
BUILD_VEC_INST(VMAXSH);
|
||||||
|
BUILD_VEC_INST(VMAXSW);
|
||||||
|
BUILD_VEC_INST(VMAXUB);
|
||||||
|
BUILD_VEC_INST(VMAXUH);
|
||||||
|
BUILD_VEC_INST(VMAXUW);
|
||||||
|
BUILD_VEC_INST(VMHADDSHS);
|
||||||
|
BUILD_VEC_INST(VMHRADDSHS);
|
||||||
|
BUILD_VEC_INST(VMINFP);
|
||||||
|
BUILD_VEC_INST(VMINSB);
|
||||||
|
BUILD_VEC_INST(VMINSH);
|
||||||
|
BUILD_VEC_INST(VMINSW);
|
||||||
|
BUILD_VEC_INST(VMINUB);
|
||||||
|
BUILD_VEC_INST(VMINUH);
|
||||||
|
BUILD_VEC_INST(VMINUW);
|
||||||
|
BUILD_VEC_INST(VMLADDUHM);
|
||||||
|
BUILD_VEC_INST(VMRGHB);
|
||||||
|
BUILD_VEC_INST(VMRGHH);
|
||||||
|
BUILD_VEC_INST(VMRGHW);
|
||||||
|
BUILD_VEC_INST(VMRGLB);
|
||||||
|
BUILD_VEC_INST(VMRGLH);
|
||||||
|
BUILD_VEC_INST(VMRGLW);
|
||||||
|
BUILD_VEC_INST(VMSUMMBM);
|
||||||
|
BUILD_VEC_INST(VMSUMSHM);
|
||||||
|
BUILD_VEC_INST(VMSUMSHS);
|
||||||
|
BUILD_VEC_INST(VMSUMUBM);
|
||||||
|
BUILD_VEC_INST(VMSUMUHM);
|
||||||
|
BUILD_VEC_INST(VMSUMUHS);
|
||||||
|
BUILD_VEC_INST(VMULESB);
|
||||||
|
BUILD_VEC_INST(VMULESH);
|
||||||
|
BUILD_VEC_INST(VMULEUB);
|
||||||
|
BUILD_VEC_INST(VMULEUH);
|
||||||
|
BUILD_VEC_INST(VMULOSB);
|
||||||
|
BUILD_VEC_INST(VMULOSH);
|
||||||
|
BUILD_VEC_INST(VMULOUB);
|
||||||
|
BUILD_VEC_INST(VMULOUH);
|
||||||
|
BUILD_VEC_INST(VNMSUBFP);
|
||||||
|
BUILD_VEC_INST(VNOR);
|
||||||
|
BUILD_VEC_INST(VOR);
|
||||||
|
BUILD_VEC_INST(VPERM);
|
||||||
|
BUILD_VEC_INST(VPKPX);
|
||||||
|
BUILD_VEC_INST(VPKSHSS);
|
||||||
|
BUILD_VEC_INST(VPKSHUS);
|
||||||
|
BUILD_VEC_INST(VPKSWSS);
|
||||||
|
BUILD_VEC_INST(VPKSWUS);
|
||||||
|
BUILD_VEC_INST(VPKUHUM);
|
||||||
|
BUILD_VEC_INST(VPKUHUS);
|
||||||
|
BUILD_VEC_INST(VPKUWUM);
|
||||||
|
BUILD_VEC_INST(VPKUWUS);
|
||||||
|
BUILD_VEC_INST(VREFP);
|
||||||
|
BUILD_VEC_INST(VRFIM);
|
||||||
|
BUILD_VEC_INST(VRFIN);
|
||||||
|
BUILD_VEC_INST(VRFIP);
|
||||||
|
BUILD_VEC_INST(VRFIZ);
|
||||||
|
BUILD_VEC_INST(VRLB);
|
||||||
|
BUILD_VEC_INST(VRLH);
|
||||||
|
BUILD_VEC_INST(VRLW);
|
||||||
|
BUILD_VEC_INST(VRSQRTEFP);
|
||||||
|
BUILD_VEC_INST(VSEL);
|
||||||
|
BUILD_VEC_INST(VSL);
|
||||||
|
BUILD_VEC_INST(VSLB);
|
||||||
|
BUILD_VEC_INST(VSLDOI);
|
||||||
|
BUILD_VEC_INST(VSLH);
|
||||||
|
BUILD_VEC_INST(VSLO);
|
||||||
|
BUILD_VEC_INST(VSLW);
|
||||||
|
BUILD_VEC_INST(VSPLTB);
|
||||||
|
BUILD_VEC_INST(VSPLTH);
|
||||||
|
BUILD_VEC_INST(VSPLTISB);
|
||||||
|
BUILD_VEC_INST(VSPLTISH);
|
||||||
|
BUILD_VEC_INST(VSPLTISW);
|
||||||
|
BUILD_VEC_INST(VSPLTW);
|
||||||
|
BUILD_VEC_INST(VSR);
|
||||||
|
BUILD_VEC_INST(VSRAB);
|
||||||
|
BUILD_VEC_INST(VSRAH);
|
||||||
|
BUILD_VEC_INST(VSRAW);
|
||||||
|
BUILD_VEC_INST(VSRB);
|
||||||
|
BUILD_VEC_INST(VSRH);
|
||||||
|
BUILD_VEC_INST(VSRO);
|
||||||
|
BUILD_VEC_INST(VSRW);
|
||||||
|
BUILD_VEC_INST(VSUBCUW);
|
||||||
|
BUILD_VEC_INST(VSUBFP);
|
||||||
|
BUILD_VEC_INST(VSUBSBS);
|
||||||
|
BUILD_VEC_INST(VSUBSHS);
|
||||||
|
BUILD_VEC_INST(VSUBSWS);
|
||||||
|
BUILD_VEC_INST(VSUBUBM);
|
||||||
|
BUILD_VEC_INST(VSUBUBS);
|
||||||
|
BUILD_VEC_INST(VSUBUHM);
|
||||||
|
BUILD_VEC_INST(VSUBUHS);
|
||||||
|
BUILD_VEC_INST(VSUBUWM);
|
||||||
|
BUILD_VEC_INST(VSUBUWS);
|
||||||
|
BUILD_VEC_INST(VSUMSWS);
|
||||||
|
BUILD_VEC_INST(VSUM2SWS);
|
||||||
|
BUILD_VEC_INST(VSUM4SBS);
|
||||||
|
BUILD_VEC_INST(VSUM4SHS);
|
||||||
|
BUILD_VEC_INST(VSUM4UBS);
|
||||||
|
BUILD_VEC_INST(VUPKHPX);
|
||||||
|
BUILD_VEC_INST(VUPKHSB);
|
||||||
|
BUILD_VEC_INST(VUPKHSH);
|
||||||
|
BUILD_VEC_INST(VUPKLPX);
|
||||||
|
BUILD_VEC_INST(VUPKLSB);
|
||||||
|
BUILD_VEC_INST(VUPKLSH);
|
||||||
|
BUILD_VEC_INST(VXOR);
|
||||||
|
#undef BUILD_VEC_INST
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -856,6 +856,8 @@ public:
|
||||||
void FCTID_(ppu_opcode_t op) { return FCTID(op); }
|
void FCTID_(ppu_opcode_t op) { return FCTID(op); }
|
||||||
void FCTIDZ_(ppu_opcode_t op) { return FCTIDZ(op); }
|
void FCTIDZ_(ppu_opcode_t op) { return FCTIDZ(op); }
|
||||||
void FCFID_(ppu_opcode_t op) { return FCFID(op); }
|
void FCFID_(ppu_opcode_t op) { return FCFID(op); }
|
||||||
|
|
||||||
|
void build_interpreter();
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -3914,6 +3914,7 @@ void spu_recompiler_base::dump(const spu_program& result, std::string& out)
|
||||||
#if LLVM_VERSION_MAJOR < 17
|
#if LLVM_VERSION_MAJOR < 17
|
||||||
#include "llvm/ADT/Triple.h"
|
#include "llvm/ADT/Triple.h"
|
||||||
#endif
|
#endif
|
||||||
|
#include "llvm/Support/Host.h"
|
||||||
#include "llvm/IR/LegacyPassManager.h"
|
#include "llvm/IR/LegacyPassManager.h"
|
||||||
#include "llvm/IR/Verifier.h"
|
#include "llvm/IR/Verifier.h"
|
||||||
#include "llvm/IR/InlineAsm.h"
|
#include "llvm/IR/InlineAsm.h"
|
||||||
|
@ -5006,7 +5007,11 @@ public:
|
||||||
|
|
||||||
// Create LLVM module
|
// Create LLVM module
|
||||||
std::unique_ptr<Module> _module = std::make_unique<Module>(m_hash + ".obj", m_context);
|
std::unique_ptr<Module> _module = std::make_unique<Module>(m_hash + ".obj", m_context);
|
||||||
_module->setTargetTriple(Triple::normalize(utils::c_llvm_default_triple));
|
#if defined(_WIN32) && defined(ARCH_X64)
|
||||||
|
_module->setTargetTriple(Triple::normalize("x86_64-unknown-linux-gnu"));
|
||||||
|
#else
|
||||||
|
_module->setTargetTriple(Triple::normalize(sys::getProcessTriple()));
|
||||||
|
#endif
|
||||||
_module->setDataLayout(m_jit.get_engine().getTargetMachine()->createDataLayout());
|
_module->setDataLayout(m_jit.get_engine().getTargetMachine()->createDataLayout());
|
||||||
m_module = _module.get();
|
m_module = _module.get();
|
||||||
|
|
||||||
|
@ -5227,6 +5232,7 @@ public:
|
||||||
m_ir->CreateRetVoid();
|
m_ir->CreateRetVoid();
|
||||||
|
|
||||||
m_ir->SetInsertPoint(label_stop);
|
m_ir->SetInsertPoint(label_stop);
|
||||||
|
call("spu_escape", spu_runtime::g_escape, m_thread)->setTailCall();
|
||||||
m_ir->CreateRetVoid();
|
m_ir->CreateRetVoid();
|
||||||
|
|
||||||
m_ir->SetInsertPoint(label_diff);
|
m_ir->SetInsertPoint(label_diff);
|
||||||
|
@ -5681,7 +5687,11 @@ public:
|
||||||
|
|
||||||
// Create LLVM module
|
// Create LLVM module
|
||||||
std::unique_ptr<Module> _module = std::make_unique<Module>("spu_interpreter.obj", m_context);
|
std::unique_ptr<Module> _module = std::make_unique<Module>("spu_interpreter.obj", m_context);
|
||||||
_module->setTargetTriple(Triple::normalize(utils::c_llvm_default_triple));
|
#if defined(_WIN32) && defined(ARCH_X64)
|
||||||
|
_module->setTargetTriple(Triple::normalize("x86_64-unknown-linux-gnu"));
|
||||||
|
#else
|
||||||
|
_module->setTargetTriple(Triple::normalize(sys::getProcessTriple()));
|
||||||
|
#endif
|
||||||
_module->setDataLayout(m_jit.get_engine().getTargetMachine()->createDataLayout());
|
_module->setDataLayout(m_jit.get_engine().getTargetMachine()->createDataLayout());
|
||||||
m_module = _module.get();
|
m_module = _module.get();
|
||||||
|
|
||||||
|
@ -5982,7 +5992,8 @@ public:
|
||||||
ncall->setTailCall();
|
ncall->setTailCall();
|
||||||
m_ir->CreateRetVoid();
|
m_ir->CreateRetVoid();
|
||||||
m_ir->SetInsertPoint(_stop);
|
m_ir->SetInsertPoint(_stop);
|
||||||
m_ir->CreateStore(m_interp_pc, spu_ptr<u32>(&spu_thread::pc));
|
m_ir->CreateStore(m_interp_pc, spu_ptr<u32>(&spu_thread::pc), true);
|
||||||
|
call("spu_escape", spu_runtime::g_escape, m_thread)->setTailCall();
|
||||||
m_ir->CreateRetVoid();
|
m_ir->CreateRetVoid();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -6355,7 +6366,7 @@ public:
|
||||||
|
|
||||||
llvm::Value* get_rchcnt(u32 off, u64 inv = 0)
|
llvm::Value* get_rchcnt(u32 off, u64 inv = 0)
|
||||||
{
|
{
|
||||||
const auto val = m_ir->CreateLoad(get_type<u64>(), _ptr<u64>(m_thread, off), true);
|
const auto val = m_ir->CreateLoad(get_type<u64>(), _ptr<u64>(m_thread, off));
|
||||||
const auto shv = m_ir->CreateLShr(val, spu_channel::off_count);
|
const auto shv = m_ir->CreateLShr(val, spu_channel::off_count);
|
||||||
return m_ir->CreateTrunc(m_ir->CreateXor(shv, u64{inv}), get_type<u32>());
|
return m_ir->CreateTrunc(m_ir->CreateXor(shv, u64{inv}), get_type<u32>());
|
||||||
}
|
}
|
||||||
|
@ -6415,20 +6426,20 @@ public:
|
||||||
}
|
}
|
||||||
case MFC_Cmd:
|
case MFC_Cmd:
|
||||||
{
|
{
|
||||||
res.value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::mfc_size), true);
|
res.value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::mfc_size));
|
||||||
res.value = m_ir->CreateSub(m_ir->getInt32(16), res.value);
|
res.value = m_ir->CreateSub(m_ir->getInt32(16), res.value);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case SPU_RdInMbox:
|
case SPU_RdInMbox:
|
||||||
{
|
{
|
||||||
res.value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::ch_in_mbox), true);
|
res.value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::ch_in_mbox));
|
||||||
res.value = m_ir->CreateLShr(res.value, 8);
|
res.value = m_ir->CreateLShr(res.value, 8);
|
||||||
res.value = m_ir->CreateAnd(res.value, 7);
|
res.value = m_ir->CreateAnd(res.value, 7);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case SPU_RdEventStat:
|
case SPU_RdEventStat:
|
||||||
{
|
{
|
||||||
const auto mask = m_ir->CreateTrunc(m_ir->CreateLShr(m_ir->CreateLoad(get_type<u64>(), spu_ptr<u64>(&spu_thread::ch_events), true), 32), get_type<u32>());
|
const auto mask = m_ir->CreateTrunc(m_ir->CreateLShr(m_ir->CreateLoad(get_type<u64>(), spu_ptr<u64>(&spu_thread::ch_events)), 32), get_type<u32>());
|
||||||
res.value = call("spu_get_events", &exec_get_events, m_thread, mask);
|
res.value = call("spu_get_events", &exec_get_events, m_thread, mask);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -6815,7 +6826,7 @@ public:
|
||||||
if (csize > 0 && csize <= 16)
|
if (csize > 0 && csize <= 16)
|
||||||
{
|
{
|
||||||
// Generate single copy operation
|
// Generate single copy operation
|
||||||
m_ir->CreateStore(m_ir->CreateLoad(vtype, m_ir->CreateBitCast(src, vtype->getPointerTo()), true), m_ir->CreateBitCast(dst, vtype->getPointerTo()), true);
|
m_ir->CreateStore(m_ir->CreateLoad(vtype, m_ir->CreateBitCast(src, vtype->getPointerTo())), m_ir->CreateBitCast(dst, vtype->getPointerTo()));
|
||||||
}
|
}
|
||||||
else if (csize <= stride * 16 && !(csize % 32))
|
else if (csize <= stride * 16 && !(csize % 32))
|
||||||
{
|
{
|
||||||
|
@ -6826,7 +6837,7 @@ public:
|
||||||
const auto _dst = m_ir->CreateGEP(get_type<u8>(), dst, m_ir->getInt32(i));
|
const auto _dst = m_ir->CreateGEP(get_type<u8>(), dst, m_ir->getInt32(i));
|
||||||
if (csize - i < stride)
|
if (csize - i < stride)
|
||||||
{
|
{
|
||||||
m_ir->CreateStore(m_ir->CreateLoad(get_type<u8[16]>(), m_ir->CreateBitCast(_src, get_type<u8(*)[16]>()), true), m_ir->CreateBitCast(_dst, get_type<u8(*)[16]>()), true);
|
m_ir->CreateStore(m_ir->CreateLoad(get_type<u8[16]>(), m_ir->CreateBitCast(_src, get_type<u8(*)[16]>())), m_ir->CreateBitCast(_dst, get_type<u8(*)[16]>()));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -7086,18 +7097,51 @@ public:
|
||||||
void ROTM(spu_opcode_t op)
|
void ROTM(spu_opcode_t op)
|
||||||
{
|
{
|
||||||
const auto [a, b] = get_vrs<u32[4]>(op.ra, op.rb);
|
const auto [a, b] = get_vrs<u32[4]>(op.ra, op.rb);
|
||||||
set_vr(op.rt, inf_lshr(a, -b & 63));
|
|
||||||
|
auto minusb = eval(-b);
|
||||||
|
if (auto [ok, x] = match_expr(b, -match<u32[4]>()); ok)
|
||||||
|
{
|
||||||
|
minusb = eval(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (auto k = get_known_bits(minusb); (k & kbc<u32>(32)).isZero())
|
||||||
|
{
|
||||||
|
set_vr(op.rt, a >> (minusb & 31));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
set_vr(op.rt, inf_lshr(a, minusb & 63));
|
||||||
}
|
}
|
||||||
|
|
||||||
void ROTMA(spu_opcode_t op)
|
void ROTMA(spu_opcode_t op)
|
||||||
{
|
{
|
||||||
const auto [a, b] = get_vrs<s32[4]>(op.ra, op.rb);
|
const auto [a, b] = get_vrs<s32[4]>(op.ra, op.rb);
|
||||||
set_vr(op.rt, inf_ashr(a, -b & 63));
|
|
||||||
|
auto minusb = eval(-b);
|
||||||
|
if (auto [ok, x] = match_expr(b, -match<s32[4]>()); ok)
|
||||||
|
{
|
||||||
|
minusb = eval(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (auto k = get_known_bits(minusb); (k & kbc<u32>(32)).isZero())
|
||||||
|
{
|
||||||
|
set_vr(op.rt, a >> (minusb & 31));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
set_vr(op.rt, inf_ashr(a, minusb & 63));
|
||||||
}
|
}
|
||||||
|
|
||||||
void SHL(spu_opcode_t op)
|
void SHL(spu_opcode_t op)
|
||||||
{
|
{
|
||||||
const auto [a, b] = get_vrs<u32[4]>(op.ra, op.rb);
|
const auto [a, b] = get_vrs<u32[4]>(op.ra, op.rb);
|
||||||
|
|
||||||
|
if (auto k = get_known_bits(b); (k & kbc<u32>(32)).isZero())
|
||||||
|
{
|
||||||
|
set_vr(op.rt, a << (b & 31));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
set_vr(op.rt, inf_shl(a, b & 63));
|
set_vr(op.rt, inf_shl(a, b & 63));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7110,18 +7154,51 @@ public:
|
||||||
void ROTHM(spu_opcode_t op)
|
void ROTHM(spu_opcode_t op)
|
||||||
{
|
{
|
||||||
const auto [a, b] = get_vrs<u16[8]>(op.ra, op.rb);
|
const auto [a, b] = get_vrs<u16[8]>(op.ra, op.rb);
|
||||||
set_vr(op.rt, inf_lshr(a, -b & 31));
|
|
||||||
|
auto minusb = eval(-b);
|
||||||
|
if (auto [ok, x] = match_expr(b, -match<u16[8]>()); ok)
|
||||||
|
{
|
||||||
|
minusb = eval(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (auto k = get_known_bits(minusb); (k & kbc<u16>(16)).isZero())
|
||||||
|
{
|
||||||
|
set_vr(op.rt, a >> (minusb & 15));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
set_vr(op.rt, inf_lshr(a, minusb & 31));
|
||||||
}
|
}
|
||||||
|
|
||||||
void ROTMAH(spu_opcode_t op)
|
void ROTMAH(spu_opcode_t op)
|
||||||
{
|
{
|
||||||
const auto [a, b] = get_vrs<s16[8]>(op.ra, op.rb);
|
const auto [a, b] = get_vrs<s16[8]>(op.ra, op.rb);
|
||||||
set_vr(op.rt, inf_ashr(a, -b & 31));
|
|
||||||
|
auto minusb = eval(-b);
|
||||||
|
if (auto [ok, x] = match_expr(b, -match<s16[8]>()); ok)
|
||||||
|
{
|
||||||
|
minusb = eval(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (auto k = get_known_bits(minusb); (k & kbc<u16>(16)).isZero())
|
||||||
|
{
|
||||||
|
set_vr(op.rt, a >> (minusb & 15));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
set_vr(op.rt, inf_ashr(a, minusb & 31));
|
||||||
}
|
}
|
||||||
|
|
||||||
void SHLH(spu_opcode_t op)
|
void SHLH(spu_opcode_t op)
|
||||||
{
|
{
|
||||||
const auto [a, b] = get_vrs<u16[8]>(op.ra, op.rb);
|
const auto [a, b] = get_vrs<u16[8]>(op.ra, op.rb);
|
||||||
|
|
||||||
|
if (auto k = get_known_bits(b); (k & kbc<u16>(16)).isZero())
|
||||||
|
{
|
||||||
|
set_vr(op.rt, a << (b & 15));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
set_vr(op.rt, inf_shl(a, b & 31));
|
set_vr(op.rt, inf_shl(a, b & 31));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -8093,6 +8170,12 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (auto [ok, y] = match_expr(x, bitcast<bool[std::extent_v<VT>]>(match<get_int_vt<std::extent_v<VT>>>())); ok)
|
||||||
|
{
|
||||||
|
// Don't ruin FSMB/FSM/FSMH instructions
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
set_vr(op.rt4, select(x, get_vr<VT>(op.rb), get_vr<VT>(op.ra)));
|
set_vr(op.rt4, select(x, get_vr<VT>(op.rb), get_vr<VT>(op.ra)));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -8337,9 +8420,9 @@ public:
|
||||||
const auto bx = pshufb(bs, c);
|
const auto bx = pshufb(bs, c);
|
||||||
|
|
||||||
if (perm_only)
|
if (perm_only)
|
||||||
set_vr(op.rt4, select(noncast<s8[16]>(c << 3) >= 0, ax, bx));
|
set_vr(op.rt4, select_by_bit4(c, ax, bx));
|
||||||
else
|
else
|
||||||
set_vr(op.rt4, select(noncast<s8[16]>(c << 3) >= 0, ax, bx) | x);
|
set_vr(op.rt4, select_by_bit4(c, ax, bx) | x);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -8352,9 +8435,9 @@ public:
|
||||||
const auto ax = pshufb(as, c);
|
const auto ax = pshufb(as, c);
|
||||||
|
|
||||||
if (perm_only)
|
if (perm_only)
|
||||||
set_vr(op.rt4, select(noncast<s8[16]>(c << 3) >= 0, ax, b));
|
set_vr(op.rt4, select_by_bit4(c, ax, b));
|
||||||
else
|
else
|
||||||
set_vr(op.rt4, select(noncast<s8[16]>(c << 3) >= 0, ax, b) | x);
|
set_vr(op.rt4, select_by_bit4(c, ax, b) | x);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -8371,9 +8454,9 @@ public:
|
||||||
const auto bx = pshufb(bs, c);
|
const auto bx = pshufb(bs, c);
|
||||||
|
|
||||||
if (perm_only)
|
if (perm_only)
|
||||||
set_vr(op.rt4, select(noncast<s8[16]>(c << 3) >= 0, a, bx));
|
set_vr(op.rt4, select_by_bit4(c, a, bx));
|
||||||
else
|
else
|
||||||
set_vr(op.rt4, select(noncast<s8[16]>(c << 3) >= 0, a, bx) | x);
|
set_vr(op.rt4, select_by_bit4(c, a, bx) | x);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -8401,9 +8484,9 @@ public:
|
||||||
const auto bx = pshufb(b, cr);
|
const auto bx = pshufb(b, cr);
|
||||||
|
|
||||||
if (perm_only)
|
if (perm_only)
|
||||||
set_vr(op.rt4, select(noncast<s8[16]>(cr << 3) >= 0, ax, bx));
|
set_vr(op.rt4, select_by_bit4(cr, ax, bx));
|
||||||
else
|
else
|
||||||
set_vr(op.rt4, select(noncast<s8[16]>(cr << 3) >= 0, ax, bx) | x);
|
set_vr(op.rt4, select_by_bit4(cr, ax, bx) | x);
|
||||||
}
|
}
|
||||||
|
|
||||||
void MPYA(spu_opcode_t op)
|
void MPYA(spu_opcode_t op)
|
||||||
|
@ -9611,13 +9694,13 @@ public:
|
||||||
void make_store_ls(value_t<u64> addr, value_t<u8[16]> data)
|
void make_store_ls(value_t<u64> addr, value_t<u8[16]> data)
|
||||||
{
|
{
|
||||||
const auto bswapped = byteswap(data);
|
const auto bswapped = byteswap(data);
|
||||||
m_ir->CreateStore(bswapped.eval(m_ir), m_ir->CreateBitCast(m_ir->CreateGEP(get_type<u8>(), m_lsptr, addr.value), get_type<u8(*)[16]>()), true);
|
m_ir->CreateStore(bswapped.eval(m_ir), m_ir->CreateBitCast(m_ir->CreateGEP(get_type<u8>(), m_lsptr, addr.value), get_type<u8(*)[16]>()));
|
||||||
}
|
}
|
||||||
|
|
||||||
auto make_load_ls(value_t<u64> addr)
|
auto make_load_ls(value_t<u64> addr)
|
||||||
{
|
{
|
||||||
value_t<u8[16]> data;
|
value_t<u8[16]> data;
|
||||||
data.value = m_ir->CreateLoad(get_type<u8[16]>(), m_ir->CreateBitCast(m_ir->CreateGEP(get_type<u8>(), m_lsptr, addr.value), get_type<u8(*)[16]>()), true);
|
data.value = m_ir->CreateLoad(get_type<u8[16]>(), m_ir->CreateBitCast(m_ir->CreateGEP(get_type<u8>(), m_lsptr, addr.value), get_type<u8(*)[16]>()));
|
||||||
return byteswap(data);
|
return byteswap(data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -9839,7 +9922,7 @@ public:
|
||||||
target->addIncoming(e_addr, e_exec);
|
target->addIncoming(e_addr, e_exec);
|
||||||
m_ir->CreateCondBr(get_imm<bool>(op.d).value, d_exec, d_done, m_md_unlikely);
|
m_ir->CreateCondBr(get_imm<bool>(op.d).value, d_exec, d_done, m_md_unlikely);
|
||||||
m_ir->SetInsertPoint(d_exec);
|
m_ir->SetInsertPoint(d_exec);
|
||||||
m_ir->CreateStore(m_ir->getFalse(), spu_ptr<bool>(&spu_thread::interrupts_enabled))->setVolatile(true);
|
m_ir->CreateStore(m_ir->getFalse(), spu_ptr<bool>(&spu_thread::interrupts_enabled));
|
||||||
m_ir->CreateBr(d_done);
|
m_ir->CreateBr(d_done);
|
||||||
m_ir->SetInsertPoint(d_done);
|
m_ir->SetInsertPoint(d_done);
|
||||||
m_ir->CreateBr(m_interp_bblock);
|
m_ir->CreateBr(m_interp_bblock);
|
||||||
|
@ -9890,7 +9973,7 @@ public:
|
||||||
|
|
||||||
if (op.d)
|
if (op.d)
|
||||||
{
|
{
|
||||||
m_ir->CreateStore(m_ir->getFalse(), spu_ptr<bool>(&spu_thread::interrupts_enabled))->setVolatile(true);
|
m_ir->CreateStore(m_ir->getFalse(), spu_ptr<bool>(&spu_thread::interrupts_enabled));
|
||||||
}
|
}
|
||||||
|
|
||||||
m_ir->CreateStore(addr.value, spu_ptr<u32>(&spu_thread::pc));
|
m_ir->CreateStore(addr.value, spu_ptr<u32>(&spu_thread::pc));
|
||||||
|
@ -10211,7 +10294,7 @@ public:
|
||||||
|
|
||||||
// Exit function on unexpected target
|
// Exit function on unexpected target
|
||||||
m_ir->SetInsertPoint(sw->getDefaultDest());
|
m_ir->SetInsertPoint(sw->getDefaultDest());
|
||||||
m_ir->CreateStore(addr.value, spu_ptr<u32>(&spu_thread::pc), true);
|
m_ir->CreateStore(addr.value, spu_ptr<u32>(&spu_thread::pc));
|
||||||
|
|
||||||
if (m_finfo && m_finfo->fn)
|
if (m_finfo && m_finfo->fn)
|
||||||
{
|
{
|
||||||
|
|
|
@ -136,7 +136,7 @@ namespace psf
|
||||||
{
|
{
|
||||||
std::string_view value{value_array, CharN};
|
std::string_view value{value_array, CharN};
|
||||||
value = value.substr(0, std::min<usz>(value.find_first_of('\0'), value.size()));
|
value = value.substr(0, std::min<usz>(value.find_first_of('\0'), value.size()));
|
||||||
return string(CharN, value, allow_truncate);
|
return string(max_size, value, allow_truncate);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Make array entry
|
// Make array entry
|
||||||
|
|
|
@ -71,16 +71,4 @@ namespace utils
|
||||||
u64 _get_main_tid();
|
u64 _get_main_tid();
|
||||||
|
|
||||||
inline const u64 main_tid = _get_main_tid();
|
inline const u64 main_tid = _get_main_tid();
|
||||||
|
|
||||||
#ifdef LLVM_AVAILABLE
|
|
||||||
|
|
||||||
#if defined(ARCH_X64)
|
|
||||||
const std::string c_llvm_default_triple = "x86_64-unknown-linux-gnu";
|
|
||||||
#elif defined(ARCH_ARM64)
|
|
||||||
const std::string c_llvm_default_triple = "arm64-unknown-linux-gnu";
|
|
||||||
#else
|
|
||||||
const std::string c_llvm_default_triple = "Unimplemented!"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -115,7 +115,7 @@ namespace std
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__INTELLISENSE__)
|
#if defined(__INTELLISENSE__) || (defined (__clang__) && (__clang_major__ <= 16))
|
||||||
#define consteval constexpr
|
#define consteval constexpr
|
||||||
#define constinit
|
#define constinit
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue