Update 7z to 23.01

This commit is contained in:
Megamouse 2023-12-13 20:36:41 +01:00
parent 98b7642232
commit 00a80adfae
100 changed files with 11232 additions and 3906 deletions

View file

@ -26,14 +26,18 @@
<ClInclude Include="src\7zFile.h" /> <ClInclude Include="src\7zFile.h" />
<ClInclude Include="src\7zTypes.h" /> <ClInclude Include="src\7zTypes.h" />
<ClInclude Include="src\7zVersion.h" /> <ClInclude Include="src\7zVersion.h" />
<ClInclude Include="src\7zWindows.h" />
<ClInclude Include="src\Aes.h" /> <ClInclude Include="src\Aes.h" />
<ClInclude Include="src\Alloc.h" /> <ClInclude Include="src\Alloc.h" />
<ClInclude Include="src\Bcj2.h" /> <ClInclude Include="src\Bcj2.h" />
<ClInclude Include="src\Blake2.h" />
<ClInclude Include="src\Bra.h" /> <ClInclude Include="src\Bra.h" />
<ClInclude Include="src\BwtSort.h" />
<ClInclude Include="src\Compiler.h" /> <ClInclude Include="src\Compiler.h" />
<ClInclude Include="src\CpuArch.h" /> <ClInclude Include="src\CpuArch.h" />
<ClInclude Include="src\Delta.h" /> <ClInclude Include="src\Delta.h" />
<ClInclude Include="src\DllSecur.h" /> <ClInclude Include="src\DllSecur.h" />
<ClInclude Include="src\HuffEnc.h" />
<ClInclude Include="src\LzFind.h" /> <ClInclude Include="src\LzFind.h" />
<ClInclude Include="src\LzFindMt.h" /> <ClInclude Include="src\LzFindMt.h" />
<ClInclude Include="src\LzHash.h" /> <ClInclude Include="src\LzHash.h" />
@ -48,10 +52,13 @@
<ClInclude Include="src\MtDec.h" /> <ClInclude Include="src\MtDec.h" />
<ClInclude Include="src\Ppmd.h" /> <ClInclude Include="src\Ppmd.h" />
<ClInclude Include="src\Ppmd7.h" /> <ClInclude Include="src\Ppmd7.h" />
<ClInclude Include="src\Ppmd8.h" />
<ClInclude Include="src\Precomp.h" /> <ClInclude Include="src\Precomp.h" />
<ClInclude Include="src\RotateDefs.h" /> <ClInclude Include="src\RotateDefs.h" />
<ClInclude Include="src\Sha1.h" />
<ClInclude Include="src\Sha256.h" /> <ClInclude Include="src\Sha256.h" />
<ClInclude Include="src\Sort.h" /> <ClInclude Include="src\Sort.h" />
<ClInclude Include="src\SwapBytes.h" />
<ClInclude Include="src\Threads.h" /> <ClInclude Include="src\Threads.h" />
<ClInclude Include="src\Xz.h" /> <ClInclude Include="src\Xz.h" />
<ClInclude Include="src\XzCrc64.h" /> <ClInclude Include="src\XzCrc64.h" />
@ -72,14 +79,18 @@
<ClCompile Include="src\Alloc.c" /> <ClCompile Include="src\Alloc.c" />
<ClCompile Include="src\Bcj2.c" /> <ClCompile Include="src\Bcj2.c" />
<ClCompile Include="src\Bcj2Enc.c" /> <ClCompile Include="src\Bcj2Enc.c" />
<ClCompile Include="src\Blake2s.c" />
<ClCompile Include="src\Bra.c" /> <ClCompile Include="src\Bra.c" />
<ClCompile Include="src\Bra86.c" /> <ClCompile Include="src\Bra86.c" />
<ClCompile Include="src\BraIA64.c" /> <ClCompile Include="src\BraIA64.c" />
<ClCompile Include="src\BwtSort.c" />
<ClCompile Include="src\CpuArch.c" /> <ClCompile Include="src\CpuArch.c" />
<ClCompile Include="src\Delta.c" /> <ClCompile Include="src\Delta.c" />
<ClCompile Include="src\DllSecur.c" /> <ClCompile Include="src\DllSecur.c" />
<ClCompile Include="src\HuffEnc.c" />
<ClCompile Include="src\LzFind.c" /> <ClCompile Include="src\LzFind.c" />
<ClCompile Include="src\LzFindMt.c" /> <ClCompile Include="src\LzFindMt.c" />
<ClCompile Include="src\LzFindOpt.c" />
<ClCompile Include="src\Lzma2Dec.c" /> <ClCompile Include="src\Lzma2Dec.c" />
<ClCompile Include="src\Lzma2DecMt.c" /> <ClCompile Include="src\Lzma2DecMt.c" />
<ClCompile Include="src\Lzma2Enc.c" /> <ClCompile Include="src\Lzma2Enc.c" />
@ -91,10 +102,18 @@
<ClCompile Include="src\MtCoder.c" /> <ClCompile Include="src\MtCoder.c" />
<ClCompile Include="src\MtDec.c" /> <ClCompile Include="src\MtDec.c" />
<ClCompile Include="src\Ppmd7.c" /> <ClCompile Include="src\Ppmd7.c" />
<ClCompile Include="src\Ppmd7aDec.c" />
<ClCompile Include="src\Ppmd7Dec.c" /> <ClCompile Include="src\Ppmd7Dec.c" />
<ClCompile Include="src\Ppmd7Enc.c" /> <ClCompile Include="src\Ppmd7Enc.c" />
<ClCompile Include="src\Ppmd8.c" />
<ClCompile Include="src\Ppmd8Dec.c" />
<ClCompile Include="src\Ppmd8Enc.c" />
<ClCompile Include="src\Sha1.c" />
<ClCompile Include="src\Sha1Opt.c" />
<ClCompile Include="src\Sha256.c" /> <ClCompile Include="src\Sha256.c" />
<ClCompile Include="src\Sha256Opt.c" />
<ClCompile Include="src\Sort.c" /> <ClCompile Include="src\Sort.c" />
<ClCompile Include="src\SwapBytes.c" />
<ClCompile Include="src\Threads.c" /> <ClCompile Include="src\Threads.c" />
<ClCompile Include="src\Xz.c" /> <ClCompile Include="src\Xz.c" />
<ClCompile Include="src\XzCrc64.c" /> <ClCompile Include="src\XzCrc64.c" />

View file

@ -1,264 +1,107 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup> <ItemGroup>
<Filter Include="Source Files"> <ClInclude Include="src\7z.h" />
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier> <ClInclude Include="src\7zAlloc.h" />
<Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions> <ClInclude Include="src\7zBuf.h" />
</Filter> <ClInclude Include="src\7zCrc.h" />
<Filter Include="Header Files"> <ClInclude Include="src\7zFile.h" />
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier> <ClInclude Include="src\7zTypes.h" />
<Extensions>h;hh;hpp;hxx;hm;inl;inc;ipp;xsd</Extensions> <ClInclude Include="src\7zVersion.h" />
</Filter> <ClInclude Include="src\7zWindows.h" />
<Filter Include="Resource Files"> <ClInclude Include="src\Aes.h" />
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier> <ClInclude Include="src\Alloc.h" />
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions> <ClInclude Include="src\Bcj2.h" />
</Filter> <ClInclude Include="src\Blake2.h" />
<ClInclude Include="src\Bra.h" />
<ClInclude Include="src\BwtSort.h" />
<ClInclude Include="src\Compiler.h" />
<ClInclude Include="src\CpuArch.h" />
<ClInclude Include="src\Delta.h" />
<ClInclude Include="src\DllSecur.h" />
<ClInclude Include="src\HuffEnc.h" />
<ClInclude Include="src\LzFind.h" />
<ClInclude Include="src\LzFindMt.h" />
<ClInclude Include="src\LzHash.h" />
<ClInclude Include="src\Lzma2Dec.h" />
<ClInclude Include="src\Lzma2DecMt.h" />
<ClInclude Include="src\Lzma2Enc.h" />
<ClInclude Include="src\Lzma86.h" />
<ClInclude Include="src\LzmaDec.h" />
<ClInclude Include="src\LzmaEnc.h" />
<ClInclude Include="src\LzmaLib.h" />
<ClInclude Include="src\MtCoder.h" />
<ClInclude Include="src\MtDec.h" />
<ClInclude Include="src\Ppmd.h" />
<ClInclude Include="src\Ppmd7.h" />
<ClInclude Include="src\Ppmd8.h" />
<ClInclude Include="src\Precomp.h" />
<ClInclude Include="src\RotateDefs.h" />
<ClInclude Include="src\Sha1.h" />
<ClInclude Include="src\Sha256.h" />
<ClInclude Include="src\Sort.h" />
<ClInclude Include="src\SwapBytes.h" />
<ClInclude Include="src\Threads.h" />
<ClInclude Include="src\Xz.h" />
<ClInclude Include="src\XzCrc64.h" />
<ClInclude Include="src\XzEnc.h" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include"src\7z.h"> <ClCompile Include="src\7zAlloc.c" />
<Filter>Header Files</Filter> <ClCompile Include="src\7zArcIn.c" />
</ClInclude> <ClCompile Include="src\7zBuf.c" />
<ClInclude Include"src\7zAlloc.h"> <ClCompile Include="src\7zBuf2.c" />
<Filter>Header Files</Filter> <ClCompile Include="src\7zCrc.c" />
</ClInclude> <ClCompile Include="src\7zCrcOpt.c" />
<ClInclude Include"src\7zBuf.h"> <ClCompile Include="src\7zDec.c" />
<Filter>Header Files</Filter> <ClCompile Include="src\7zFile.c" />
</ClInclude> <ClCompile Include="src\7zStream.c" />
<ClInclude Include"src\7zCrc.h"> <ClCompile Include="src\Aes.c" />
<Filter>Header Files</Filter> <ClCompile Include="src\AesOpt.c" />
</ClInclude> <ClCompile Include="src\Alloc.c" />
<ClInclude Include"src\7zFile.h"> <ClCompile Include="src\Bcj2.c" />
<Filter>Header Files</Filter> <ClCompile Include="src\Bcj2Enc.c" />
</ClInclude> <ClCompile Include="src\Blake2s.c" />
<ClInclude Include"src\7zTypes.h"> <ClCompile Include="src\Bra.c" />
<Filter>Header Files</Filter> <ClCompile Include="src\Bra86.c" />
</ClInclude> <ClCompile Include="src\BraIA64.c" />
<ClInclude Include"src\7zVersion.h"> <ClCompile Include="src\BwtSort.c" />
<Filter>Header Files</Filter> <ClCompile Include="src\CpuArch.c" />
</ClInclude> <ClCompile Include="src\Delta.c" />
<ClInclude Include"src\Aes.h"> <ClCompile Include="src\DllSecur.c" />
<Filter>Header Files</Filter> <ClCompile Include="src\HuffEnc.c" />
</ClInclude> <ClCompile Include="src\LzFind.c" />
<ClInclude Include"src\Alloc.h"> <ClCompile Include="src\LzFindMt.c" />
<Filter>Header Files</Filter> <ClCompile Include="src\LzFindOpt.c" />
</ClInclude> <ClCompile Include="src\Lzma2Dec.c" />
<ClInclude Include"src\Bcj2.h"> <ClCompile Include="src\Lzma2DecMt.c" />
<Filter>Header Files</Filter> <ClCompile Include="src\Lzma2Enc.c" />
</ClInclude> <ClCompile Include="src\Lzma86Dec.c" />
<ClInclude Include"src\Bra.h"> <ClCompile Include="src\Lzma86Enc.c" />
<Filter>Header Files</Filter> <ClCompile Include="src\LzmaDec.c" />
</ClInclude> <ClCompile Include="src\LzmaEnc.c" />
<ClInclude Include"src\Compiler.h"> <ClCompile Include="src\LzmaLib.c" />
<Filter>Header Files</Filter> <ClCompile Include="src\MtCoder.c" />
</ClInclude> <ClCompile Include="src\MtDec.c" />
<ClInclude Include"src\CpuArch.h"> <ClCompile Include="src\Ppmd7.c" />
<Filter>Header Files</Filter> <ClCompile Include="src\Ppmd7aDec.c" />
</ClInclude> <ClCompile Include="src\Ppmd7Dec.c" />
<ClInclude Include"src\Delta.h"> <ClCompile Include="src\Ppmd7Enc.c" />
<Filter>Header Files</Filter> <ClCompile Include="src\Ppmd8.c" />
</ClInclude> <ClCompile Include="src\Ppmd8Dec.c" />
<ClInclude Include"src\DllSecur.h"> <ClCompile Include="src\Ppmd8Enc.c" />
<Filter>Header Files</Filter> <ClCompile Include="src\Sha1.c" />
</ClInclude> <ClCompile Include="src\Sha1Opt.c" />
<ClInclude Include"src\LzFind.h"> <ClCompile Include="src\Sha256.c" />
<Filter>Header Files</Filter> <ClCompile Include="src\Sha256Opt.c" />
</ClInclude> <ClCompile Include="src\Sort.c" />
<ClInclude Include"src\LzFindMt.h"> <ClCompile Include="src\SwapBytes.c" />
<Filter>Header Files</Filter> <ClCompile Include="src\Threads.c" />
</ClInclude> <ClCompile Include="src\Xz.c" />
<ClInclude Include"src\LzHash.h"> <ClCompile Include="src\XzCrc64.c" />
<Filter>Header Files</Filter> <ClCompile Include="src\XzCrc64Opt.c" />
</ClInclude> <ClCompile Include="src\XzDec.c" />
<ClInclude Include"src\Lzma2Dec.h"> <ClCompile Include="src\XzEnc.c" />
<Filter>Header Files</Filter> <ClCompile Include="src\XzIn.c" />
</ClInclude>
<ClInclude Include"src\Lzma2DecMt.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include"src\Lzma2Enc.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include"src\Lzma86.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include"src\LzmaDec.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include"src\LzmaEnc.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include"src\LzmaLib.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include"src\MtCoder.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include"src\MtDec.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include"src\Ppmd.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include"src\Ppmd7.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include"src\Precomp.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include"src\RotateDefs.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include"src\Sha256.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include"src\Sort.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include"src\Threads.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include"src\Xz.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include"src\XzCrc64.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include"src\XzEnc.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include"src\7zAlloc.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\7zArcIn.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\7zBuf.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\7zBuf2.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\7zCrc.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\7zCrcOpt.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\7zDec.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\7zFile.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\7zStream.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\Aes.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\AesOpt.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\Alloc.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\Bcj2.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\Bcj2Enc.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\Bra.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\Bra86.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\BraIA64.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\CpuArch.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\Delta.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\DllSecur.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\LzFind.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\LzFindMt.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\Lzma2Dec.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\Lzma2DecMt.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\Lzma2Enc.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\Lzma86Dec.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\Lzma86Enc.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\LzmaDec.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\LzmaEnc.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\LzmaLib.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\MtCoder.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\MtDec.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\Ppmd7.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\Ppmd7Dec.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\Ppmd7Enc.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\Sha256.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\Sort.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\Threads.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\Xz.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\XzCrc64.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\XzCrc64Opt.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\XzDec.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\XzEnc.c">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include"src\XzIn.c">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
</Project> </Project>

View file

@ -1,50 +1,62 @@
# 7z sdk # 7z sdk
if(WIN32) if(WIN32)
add_library(3rdparty_7z STATIC EXCLUDE_FROM_ALL add_library(3rdparty_7z STATIC EXCLUDE_FROM_ALL
src/Sha256.c src/7zAlloc.c
src/Ppmd7Dec.c src/7zArcIn.c
src/XzDec.c src/7zBuf.c
src/XzEnc.c src/7zBuf2.c
src/Lzma2Dec.c src/7zCrc.c
src/XzCrc64.c src/7zCrcOpt.c
src/DllSecur.c src/7zDec.c
src/Lzma2DecMt.c
src/BraIA64.c
src/7zFile.c src/7zFile.c
src/7zStream.c src/7zStream.c
src/Lzma86Enc.c
src/Threads.c
src/7zAlloc.c
src/LzmaEnc.c
src/MtCoder.c
src/Lzma86Dec.c
src/Sort.c
src/LzFindMt.c
src/7zDec.c
src/Bcj2.c
src/Ppmd7.c
src/Bra86.c
src/Bcj2Enc.c
src/7zBuf.c
src/Aes.c src/Aes.c
src/AesOpt.c src/AesOpt.c
src/XzCrc64Opt.c src/Alloc.c
src/7zArcIn.c src/Bcj2.c
src/Lzma2Enc.c src/Bcj2Enc.c
src/Blake2s.c
src/Bra.c src/Bra.c
src/7zCrcOpt.c src/Bra86.c
src/7zBuf2.c src/BraIA64.c
src/LzFind.c src/BwtSort.c
src/Ppmd7Enc.c
src/CpuArch.c src/CpuArch.c
src/Delta.c src/Delta.c
src/XzIn.c src/DllSecur.c
src/Alloc.c src/HuffEnc.c
src/Xz.c src/LzFind.c
src/LzFindMt.c
src/LzFindOpt.c
src/Lzma2Dec.c
src/Lzma2DecMt.c
src/Lzma2Enc.c
src/Lzma86Dec.c
src/Lzma86Enc.c
src/LzmaDec.c src/LzmaDec.c
src/LzmaEnc.c
src/LzmaLib.c src/LzmaLib.c
src/7zCrc.c src/MtCoder.c
src/MtDec.c) src/MtDec.c
src/Ppmd7.c
src/Ppmd7aDec.c
src/Ppmd7Dec.c
src/Ppmd7Enc.c
src/Ppmd8.c
src/Ppmd8Dec.c
src/Ppmd8Enc.c
src/Sha1.c
src/Sha1Opt.c
src/Sha256.c
src/Sha256Opt.c
src/Sort.c
src/SwapBytes.c
src/Threads.c
src/Xz.c
src/XzCrc64.c
src/XzCrc64Opt.c
src/XzDec.c
src/XzEnc.c
src/XzIn.c)
target_include_directories(3rdparty_7z INTERFACE target_include_directories(3rdparty_7z INTERFACE
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src> $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src>
$<INSTALL_INTERFACE:/src>) $<INSTALL_INTERFACE:/src>)

12
3rdparty/7z/src/7z.h vendored
View file

@ -1,8 +1,8 @@
/* 7z.h -- 7z interface /* 7z.h -- 7z interface
2018-07-02 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#ifndef __7Z_H #ifndef ZIP7_INC_7Z_H
#define __7Z_H #define ZIP7_INC_7Z_H
#include "7zTypes.h" #include "7zTypes.h"
@ -98,7 +98,7 @@ typedef struct
UInt64 SzAr_GetFolderUnpackSize(const CSzAr *p, UInt32 folderIndex); UInt64 SzAr_GetFolderUnpackSize(const CSzAr *p, UInt32 folderIndex);
SRes SzAr_DecodeFolder(const CSzAr *p, UInt32 folderIndex, SRes SzAr_DecodeFolder(const CSzAr *p, UInt32 folderIndex,
ILookInStream *stream, UInt64 startPos, ILookInStreamPtr stream, UInt64 startPos,
Byte *outBuffer, size_t outSize, Byte *outBuffer, size_t outSize,
ISzAllocPtr allocMain); ISzAllocPtr allocMain);
@ -174,7 +174,7 @@ UInt16 *SzArEx_GetFullNameUtf16_Back(const CSzArEx *p, size_t fileIndex, UInt16
SRes SzArEx_Extract( SRes SzArEx_Extract(
const CSzArEx *db, const CSzArEx *db,
ILookInStream *inStream, ILookInStreamPtr inStream,
UInt32 fileIndex, /* index of file */ UInt32 fileIndex, /* index of file */
UInt32 *blockIndex, /* index of solid block */ UInt32 *blockIndex, /* index of solid block */
Byte **outBuffer, /* pointer to pointer to output buffer (allocated with allocMain) */ Byte **outBuffer, /* pointer to pointer to output buffer (allocated with allocMain) */
@ -196,7 +196,7 @@ SZ_ERROR_INPUT_EOF
SZ_ERROR_FAIL SZ_ERROR_FAIL
*/ */
SRes SzArEx_Open(CSzArEx *p, ILookInStream *inStream, SRes SzArEx_Open(CSzArEx *p, ILookInStreamPtr inStream,
ISzAllocPtr allocMain, ISzAllocPtr allocTemp); ISzAllocPtr allocMain, ISzAllocPtr allocTemp);
EXTERN_C_END EXTERN_C_END

View file

@ -1,5 +1,5 @@
/* 7zAlloc.c -- Allocation functions /* 7zAlloc.c -- Allocation functions for 7z processing
2017-04-03 : Igor Pavlov : Public domain */ 2023-03-04 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -7,74 +7,83 @@
#include "7zAlloc.h" #include "7zAlloc.h"
/* #define _SZ_ALLOC_DEBUG */ /* #define SZ_ALLOC_DEBUG */
/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */ /* use SZ_ALLOC_DEBUG to debug alloc/free operations */
#ifdef _SZ_ALLOC_DEBUG #ifdef SZ_ALLOC_DEBUG
/*
#ifdef _WIN32 #ifdef _WIN32
#include <windows.h> #include "7zWindows.h"
#endif #endif
*/
#include <stdio.h> #include <stdio.h>
int g_allocCount = 0; static int g_allocCount = 0;
int g_allocCountTemp = 0; static int g_allocCountTemp = 0;
static void Print_Alloc(const char *s, size_t size, int *counter)
{
const unsigned size2 = (unsigned)size;
fprintf(stderr, "\n%s count = %10d : %10u bytes; ", s, *counter, size2);
(*counter)++;
}
static void Print_Free(const char *s, int *counter)
{
(*counter)--;
fprintf(stderr, "\n%s count = %10d", s, *counter);
}
#endif #endif
void *SzAlloc(ISzAllocPtr p, size_t size) void *SzAlloc(ISzAllocPtr p, size_t size)
{ {
UNUSED_VAR(p); UNUSED_VAR(p)
if (size == 0) if (size == 0)
return 0; return 0;
#ifdef _SZ_ALLOC_DEBUG #ifdef SZ_ALLOC_DEBUG
fprintf(stderr, "\nAlloc %10u bytes; count = %10d", (unsigned)size, g_allocCount); Print_Alloc("Alloc", size, &g_allocCount);
g_allocCount++;
#endif #endif
return malloc(size); return malloc(size);
} }
void SzFree(ISzAllocPtr p, void *address) void SzFree(ISzAllocPtr p, void *address)
{ {
UNUSED_VAR(p); UNUSED_VAR(p)
#ifdef _SZ_ALLOC_DEBUG #ifdef SZ_ALLOC_DEBUG
if (address != 0) if (address)
{ Print_Free("Free ", &g_allocCount);
g_allocCount--;
fprintf(stderr, "\nFree; count = %10d", g_allocCount);
}
#endif #endif
free(address); free(address);
} }
void *SzAllocTemp(ISzAllocPtr p, size_t size) void *SzAllocTemp(ISzAllocPtr p, size_t size)
{ {
UNUSED_VAR(p); UNUSED_VAR(p)
if (size == 0) if (size == 0)
return 0; return 0;
#ifdef _SZ_ALLOC_DEBUG #ifdef SZ_ALLOC_DEBUG
fprintf(stderr, "\nAlloc_temp %10u bytes; count = %10d", (unsigned)size, g_allocCountTemp); Print_Alloc("Alloc_temp", size, &g_allocCountTemp);
g_allocCountTemp++; /*
#ifdef _WIN32 #ifdef _WIN32
return HeapAlloc(GetProcessHeap(), 0, size); return HeapAlloc(GetProcessHeap(), 0, size);
#endif #endif
*/
#endif #endif
return malloc(size); return malloc(size);
} }
void SzFreeTemp(ISzAllocPtr p, void *address) void SzFreeTemp(ISzAllocPtr p, void *address)
{ {
UNUSED_VAR(p); UNUSED_VAR(p)
#ifdef _SZ_ALLOC_DEBUG #ifdef SZ_ALLOC_DEBUG
if (address != 0) if (address)
{ Print_Free("Free_temp ", &g_allocCountTemp);
g_allocCountTemp--; /*
fprintf(stderr, "\nFree_temp; count = %10d", g_allocCountTemp);
}
#ifdef _WIN32 #ifdef _WIN32
HeapFree(GetProcessHeap(), 0, address); HeapFree(GetProcessHeap(), 0, address);
return; return;
#endif #endif
*/
#endif #endif
free(address); free(address);
} }

View file

@ -1,8 +1,8 @@
/* 7zAlloc.h -- Allocation functions /* 7zAlloc.h -- Allocation functions
2017-04-03 : Igor Pavlov : Public domain */ 2023-03-04 : Igor Pavlov : Public domain */
#ifndef __7Z_ALLOC_H #ifndef ZIP7_INC_7Z_ALLOC_H
#define __7Z_ALLOC_H #define ZIP7_INC_7Z_ALLOC_H
#include "7zTypes.h" #include "7zTypes.h"

File diff suppressed because it is too large Load diff

View file

@ -1,8 +1,8 @@
/* 7zBuf.h -- Byte Buffer /* 7zBuf.h -- Byte Buffer
2017-04-03 : Igor Pavlov : Public domain */ 2023-03-04 : Igor Pavlov : Public domain */
#ifndef __7Z_BUF_H #ifndef ZIP7_INC_7Z_BUF_H
#define __7Z_BUF_H #define ZIP7_INC_7Z_BUF_H
#include "7zTypes.h" #include "7zTypes.h"

View file

@ -1,5 +1,5 @@
/* 7zCrc.c -- CRC32 init /* 7zCrc.c -- CRC32 calculation and init
2021-04-01 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -13,22 +13,20 @@
#else #else
#define CRC_NUM_TABLES 9 #define CRC_NUM_TABLES 9
#define CRC_UINT32_SWAP(v) ((v >> 24) | ((v >> 8) & 0xFF00) | ((v << 8) & 0xFF0000) | (v << 24)) UInt32 Z7_FASTCALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, const UInt32 *table);
UInt32 Z7_FASTCALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, const UInt32 *table);
UInt32 MY_FAST_CALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, const UInt32 *table);
UInt32 MY_FAST_CALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, const UInt32 *table);
#endif #endif
#ifndef MY_CPU_BE #ifndef MY_CPU_BE
UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table); UInt32 Z7_FASTCALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table);
UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table); UInt32 Z7_FASTCALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table);
#endif #endif
typedef UInt32 (MY_FAST_CALL *CRC_FUNC)(UInt32 v, const void *data, size_t size, const UInt32 *table); /*
extern extern
CRC_FUNC g_CrcUpdateT4; CRC_FUNC g_CrcUpdateT4;
CRC_FUNC g_CrcUpdateT4; CRC_FUNC g_CrcUpdateT4;
*/
extern extern
CRC_FUNC g_CrcUpdateT8; CRC_FUNC g_CrcUpdateT8;
CRC_FUNC g_CrcUpdateT8; CRC_FUNC g_CrcUpdateT8;
@ -44,20 +42,22 @@ CRC_FUNC g_CrcUpdate;
UInt32 g_CrcTable[256 * CRC_NUM_TABLES]; UInt32 g_CrcTable[256 * CRC_NUM_TABLES];
UInt32 MY_FAST_CALL CrcUpdate(UInt32 v, const void *data, size_t size) UInt32 Z7_FASTCALL CrcUpdate(UInt32 v, const void *data, size_t size)
{ {
return g_CrcUpdate(v, data, size, g_CrcTable); return g_CrcUpdate(v, data, size, g_CrcTable);
} }
UInt32 MY_FAST_CALL CrcCalc(const void *data, size_t size) UInt32 Z7_FASTCALL CrcCalc(const void *data, size_t size)
{ {
return g_CrcUpdate(CRC_INIT_VAL, data, size, g_CrcTable) ^ CRC_INIT_VAL; return g_CrcUpdate(CRC_INIT_VAL, data, size, g_CrcTable) ^ CRC_INIT_VAL;
} }
#if CRC_NUM_TABLES < 4 \
|| (CRC_NUM_TABLES == 4 && defined(MY_CPU_BE)) \
|| (!defined(MY_CPU_LE) && !defined(MY_CPU_BE))
#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) #define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
UInt32 Z7_FASTCALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table);
UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table); UInt32 Z7_FASTCALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table)
UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table)
{ {
const Byte *p = (const Byte *)data; const Byte *p = (const Byte *)data;
const Byte *pEnd = p + size; const Byte *pEnd = p + size;
@ -65,7 +65,7 @@ UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const U
v = CRC_UPDATE_BYTE_2(v, *p); v = CRC_UPDATE_BYTE_2(v, *p);
return v; return v;
} }
#endif
/* ---------- hardware CRC ---------- */ /* ---------- hardware CRC ---------- */
@ -78,16 +78,29 @@ UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const U
#if defined(_MSC_VER) #if defined(_MSC_VER)
#if defined(MY_CPU_ARM64) #if defined(MY_CPU_ARM64)
#if (_MSC_VER >= 1910) #if (_MSC_VER >= 1910)
#ifndef __clang__
#define USE_ARM64_CRC #define USE_ARM64_CRC
#include <intrin.h>
#endif
#endif #endif
#endif #endif
#elif (defined(__clang__) && (__clang_major__ >= 3)) \ #elif (defined(__clang__) && (__clang_major__ >= 3)) \
|| (defined(__GNUC__) && (__GNUC__ > 4)) || (defined(__GNUC__) && (__GNUC__ > 4))
#if !defined(__ARM_FEATURE_CRC32) #if !defined(__ARM_FEATURE_CRC32)
#define __ARM_FEATURE_CRC32 1 #define __ARM_FEATURE_CRC32 1
#if (!defined(__clang__) || (__clang_major__ > 3)) // fix these numbers #if defined(__clang__)
#if defined(MY_CPU_ARM64)
#define ATTRIB_CRC __attribute__((__target__("crc")))
#else
#define ATTRIB_CRC __attribute__((__target__("armv8-a,crc")))
#endif
#else
#if defined(MY_CPU_ARM64)
#define ATTRIB_CRC __attribute__((__target__("+crc")))
#else
#define ATTRIB_CRC __attribute__((__target__("arch=armv8-a+crc"))) #define ATTRIB_CRC __attribute__((__target__("arch=armv8-a+crc")))
#endif #endif
#endif
#endif #endif
#if defined(__ARM_FEATURE_CRC32) #if defined(__ARM_FEATURE_CRC32)
#define USE_ARM64_CRC #define USE_ARM64_CRC
@ -105,7 +118,7 @@ UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const U
#pragma message("ARM64 CRC emulation") #pragma message("ARM64 CRC emulation")
MY_FORCE_INLINE Z7_FORCE_INLINE
UInt32 __crc32b(UInt32 v, UInt32 data) UInt32 __crc32b(UInt32 v, UInt32 data)
{ {
const UInt32 *table = g_CrcTable; const UInt32 *table = g_CrcTable;
@ -113,7 +126,7 @@ UInt32 __crc32b(UInt32 v, UInt32 data)
return v; return v;
} }
MY_FORCE_INLINE Z7_FORCE_INLINE
UInt32 __crc32w(UInt32 v, UInt32 data) UInt32 __crc32w(UInt32 v, UInt32 data)
{ {
const UInt32 *table = g_CrcTable; const UInt32 *table = g_CrcTable;
@ -124,7 +137,7 @@ UInt32 __crc32w(UInt32 v, UInt32 data)
return v; return v;
} }
MY_FORCE_INLINE Z7_FORCE_INLINE
UInt32 __crc32d(UInt32 v, UInt64 data) UInt32 __crc32d(UInt32 v, UInt64 data)
{ {
const UInt32 *table = g_CrcTable; const UInt32 *table = g_CrcTable;
@ -156,9 +169,9 @@ UInt32 __crc32d(UInt32 v, UInt64 data)
// #pragma message("USE ARM HW CRC") // #pragma message("USE ARM HW CRC")
ATTRIB_CRC ATTRIB_CRC
UInt32 MY_FAST_CALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table); UInt32 Z7_FASTCALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table);
ATTRIB_CRC ATTRIB_CRC
UInt32 MY_FAST_CALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table) UInt32 Z7_FASTCALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table)
{ {
const Byte *p = (const Byte *)data; const Byte *p = (const Byte *)data;
UNUSED_VAR(table); UNUSED_VAR(table);
@ -188,9 +201,9 @@ UInt32 MY_FAST_CALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, cons
} }
ATTRIB_CRC ATTRIB_CRC
UInt32 MY_FAST_CALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table); UInt32 Z7_FASTCALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table);
ATTRIB_CRC ATTRIB_CRC
UInt32 MY_FAST_CALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table) UInt32 Z7_FASTCALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table)
{ {
const Byte *p = (const Byte *)data; const Byte *p = (const Byte *)data;
UNUSED_VAR(table); UNUSED_VAR(table);
@ -219,6 +232,9 @@ UInt32 MY_FAST_CALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, cons
return v; return v;
} }
#undef T0_32_UNROLL_BYTES
#undef T0_64_UNROLL_BYTES
#endif // defined(USE_ARM64_CRC) || defined(USE_CRC_EMU) #endif // defined(USE_ARM64_CRC) || defined(USE_CRC_EMU)
#endif // MY_CPU_LE #endif // MY_CPU_LE
@ -226,7 +242,7 @@ UInt32 MY_FAST_CALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, cons
void MY_FAST_CALL CrcGenerateTable() void Z7_FASTCALL CrcGenerateTable(void)
{ {
UInt32 i; UInt32 i;
for (i = 0; i < 256; i++) for (i = 0; i < 256; i++)
@ -239,64 +255,62 @@ void MY_FAST_CALL CrcGenerateTable()
} }
for (i = 256; i < 256 * CRC_NUM_TABLES; i++) for (i = 256; i < 256 * CRC_NUM_TABLES; i++)
{ {
UInt32 r = g_CrcTable[(size_t)i - 256]; const UInt32 r = g_CrcTable[(size_t)i - 256];
g_CrcTable[i] = g_CrcTable[r & 0xFF] ^ (r >> 8); g_CrcTable[i] = g_CrcTable[r & 0xFF] ^ (r >> 8);
} }
#if CRC_NUM_TABLES < 4 #if CRC_NUM_TABLES < 4
g_CrcUpdate = CrcUpdateT1;
g_CrcUpdate = CrcUpdateT1; #elif defined(MY_CPU_LE)
// g_CrcUpdateT4 = CrcUpdateT4;
#else #if CRC_NUM_TABLES < 8
g_CrcUpdate = CrcUpdateT4;
#ifdef MY_CPU_LE #else // CRC_NUM_TABLES >= 8
g_CrcUpdateT4 = CrcUpdateT4;
g_CrcUpdate = CrcUpdateT4;
#if CRC_NUM_TABLES >= 8
g_CrcUpdateT8 = CrcUpdateT8; g_CrcUpdateT8 = CrcUpdateT8;
/*
#ifdef MY_CPU_X86_OR_AMD64 #ifdef MY_CPU_X86_OR_AMD64
if (!CPU_Is_InOrder()) if (!CPU_Is_InOrder())
#endif #endif
g_CrcUpdate = CrcUpdateT8; */
g_CrcUpdate = CrcUpdateT8;
#endif #endif
#else #else
{ {
#ifndef MY_CPU_BE #ifndef MY_CPU_BE
UInt32 k = 0x01020304; UInt32 k = 0x01020304;
const Byte *p = (const Byte *)&k; const Byte *p = (const Byte *)&k;
if (p[0] == 4 && p[1] == 3) if (p[0] == 4 && p[1] == 3)
{ {
g_CrcUpdateT4 = CrcUpdateT4; #if CRC_NUM_TABLES < 8
g_CrcUpdate = CrcUpdateT4; // g_CrcUpdateT4 = CrcUpdateT4;
#if CRC_NUM_TABLES >= 8 g_CrcUpdate = CrcUpdateT4;
g_CrcUpdateT8 = CrcUpdateT8; #else // CRC_NUM_TABLES >= 8
g_CrcUpdate = CrcUpdateT8; g_CrcUpdateT8 = CrcUpdateT8;
g_CrcUpdate = CrcUpdateT8;
#endif #endif
} }
else if (p[0] != 1 || p[1] != 2) else if (p[0] != 1 || p[1] != 2)
g_CrcUpdate = CrcUpdateT1; g_CrcUpdate = CrcUpdateT1;
else else
#endif #endif // MY_CPU_BE
{ {
for (i = 256 * CRC_NUM_TABLES - 1; i >= 256; i--) for (i = 256 * CRC_NUM_TABLES - 1; i >= 256; i--)
{ {
UInt32 x = g_CrcTable[(size_t)i - 256]; const UInt32 x = g_CrcTable[(size_t)i - 256];
g_CrcTable[i] = CRC_UINT32_SWAP(x); g_CrcTable[i] = Z7_BSWAP32(x);
} }
g_CrcUpdateT4 = CrcUpdateT1_BeT4; #if CRC_NUM_TABLES <= 4
g_CrcUpdate = CrcUpdateT1_BeT4; g_CrcUpdate = CrcUpdateT1;
#if CRC_NUM_TABLES >= 8 #elif CRC_NUM_TABLES <= 8
g_CrcUpdateT8 = CrcUpdateT1_BeT8; // g_CrcUpdateT4 = CrcUpdateT1_BeT4;
g_CrcUpdate = CrcUpdateT1_BeT8; g_CrcUpdate = CrcUpdateT1_BeT4;
#else // CRC_NUM_TABLES > 8
g_CrcUpdateT8 = CrcUpdateT1_BeT8;
g_CrcUpdate = CrcUpdateT1_BeT8;
#endif #endif
} }
} }
#endif #endif // CRC_NUM_TABLES < 4
#endif
#ifdef MY_CPU_LE #ifdef MY_CPU_LE
#ifdef USE_ARM64_CRC #ifdef USE_ARM64_CRC
@ -320,3 +334,7 @@ void MY_FAST_CALL CrcGenerateTable()
#endif #endif
#endif #endif
} }
#undef kCrcPoly
#undef CRC64_NUM_TABLES
#undef CRC_UPDATE_BYTE_2

View file

@ -1,8 +1,8 @@
/* 7zCrc.h -- CRC32 calculation /* 7zCrc.h -- CRC32 calculation
2013-01-18 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#ifndef __7Z_CRC_H #ifndef ZIP7_INC_7Z_CRC_H
#define __7Z_CRC_H #define ZIP7_INC_7Z_CRC_H
#include "7zTypes.h" #include "7zTypes.h"
@ -11,14 +11,16 @@ EXTERN_C_BEGIN
extern UInt32 g_CrcTable[]; extern UInt32 g_CrcTable[];
/* Call CrcGenerateTable one time before other CRC functions */ /* Call CrcGenerateTable one time before other CRC functions */
void MY_FAST_CALL CrcGenerateTable(void); void Z7_FASTCALL CrcGenerateTable(void);
#define CRC_INIT_VAL 0xFFFFFFFF #define CRC_INIT_VAL 0xFFFFFFFF
#define CRC_GET_DIGEST(crc) ((crc) ^ CRC_INIT_VAL) #define CRC_GET_DIGEST(crc) ((crc) ^ CRC_INIT_VAL)
#define CRC_UPDATE_BYTE(crc, b) (g_CrcTable[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) #define CRC_UPDATE_BYTE(crc, b) (g_CrcTable[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
UInt32 MY_FAST_CALL CrcUpdate(UInt32 crc, const void *data, size_t size); UInt32 Z7_FASTCALL CrcUpdate(UInt32 crc, const void *data, size_t size);
UInt32 MY_FAST_CALL CrcCalc(const void *data, size_t size); UInt32 Z7_FASTCALL CrcCalc(const void *data, size_t size);
typedef UInt32 (Z7_FASTCALL *CRC_FUNC)(UInt32 v, const void *data, size_t size, const UInt32 *table);
EXTERN_C_END EXTERN_C_END

View file

@ -1,5 +1,5 @@
/* 7zCrcOpt.c -- CRC32 calculation /* 7zCrcOpt.c -- CRC32 calculation
2021-02-09 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -9,8 +9,8 @@
#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) #define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table); UInt32 Z7_FASTCALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table);
UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table) UInt32 Z7_FASTCALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table)
{ {
const Byte *p = (const Byte *)data; const Byte *p = (const Byte *)data;
for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++) for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++)
@ -29,8 +29,8 @@ UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const U
return v; return v;
} }
UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table); UInt32 Z7_FASTCALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table);
UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table) UInt32 Z7_FASTCALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table)
{ {
const Byte *p = (const Byte *)data; const Byte *p = (const Byte *)data;
for (; size > 0 && ((unsigned)(ptrdiff_t)p & 7) != 0; size--, p++) for (; size > 0 && ((unsigned)(ptrdiff_t)p & 7) != 0; size--, p++)
@ -61,11 +61,11 @@ UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const U
#ifndef MY_CPU_LE #ifndef MY_CPU_LE
#define CRC_UINT32_SWAP(v) ((v >> 24) | ((v >> 8) & 0xFF00) | ((v << 8) & 0xFF0000) | (v << 24)) #define CRC_UINT32_SWAP(v) Z7_BSWAP32(v)
#define CRC_UPDATE_BYTE_2_BE(crc, b) (table[(((crc) >> 24) ^ (b))] ^ ((crc) << 8)) #define CRC_UPDATE_BYTE_2_BE(crc, b) (table[(((crc) >> 24) ^ (b))] ^ ((crc) << 8))
UInt32 MY_FAST_CALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, const UInt32 *table) UInt32 Z7_FASTCALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, const UInt32 *table)
{ {
const Byte *p = (const Byte *)data; const Byte *p = (const Byte *)data;
table += 0x100; table += 0x100;
@ -86,7 +86,7 @@ UInt32 MY_FAST_CALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, co
return CRC_UINT32_SWAP(v); return CRC_UINT32_SWAP(v);
} }
UInt32 MY_FAST_CALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, const UInt32 *table) UInt32 Z7_FASTCALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, const UInt32 *table)
{ {
const Byte *p = (const Byte *)data; const Byte *p = (const Byte *)data;
table += 0x100; table += 0x100;

View file

@ -1,11 +1,11 @@
/* 7zDec.c -- Decoding from 7z folder /* 7zDec.c -- Decoding from 7z folder
2021-02-09 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
#include <string.h> #include <string.h>
/* #define _7ZIP_PPMD_SUPPPORT */ /* #define Z7_PPMD_SUPPORT */
#include "7z.h" #include "7z.h"
#include "7zCrc.h" #include "7zCrc.h"
@ -16,27 +16,49 @@
#include "Delta.h" #include "Delta.h"
#include "LzmaDec.h" #include "LzmaDec.h"
#include "Lzma2Dec.h" #include "Lzma2Dec.h"
#ifdef _7ZIP_PPMD_SUPPPORT #ifdef Z7_PPMD_SUPPORT
#include "Ppmd7.h" #include "Ppmd7.h"
#endif #endif
#define k_Copy 0 #define k_Copy 0
#ifndef _7Z_NO_METHOD_LZMA2 #ifndef Z7_NO_METHOD_LZMA2
#define k_LZMA2 0x21 #define k_LZMA2 0x21
#endif #endif
#define k_LZMA 0x30101 #define k_LZMA 0x30101
#define k_BCJ2 0x303011B #define k_BCJ2 0x303011B
#ifndef _7Z_NO_METHODS_FILTERS
#if !defined(Z7_NO_METHODS_FILTERS)
#define Z7_USE_BRANCH_FILTER
#endif
#if !defined(Z7_NO_METHODS_FILTERS) || \
defined(Z7_USE_NATIVE_BRANCH_FILTER) && defined(MY_CPU_ARM64)
#define Z7_USE_FILTER_ARM64
#ifndef Z7_USE_BRANCH_FILTER
#define Z7_USE_BRANCH_FILTER
#endif
#define k_ARM64 0xa
#endif
#if !defined(Z7_NO_METHODS_FILTERS) || \
defined(Z7_USE_NATIVE_BRANCH_FILTER) && defined(MY_CPU_ARMT)
#define Z7_USE_FILTER_ARMT
#ifndef Z7_USE_BRANCH_FILTER
#define Z7_USE_BRANCH_FILTER
#endif
#define k_ARMT 0x3030701
#endif
#ifndef Z7_NO_METHODS_FILTERS
#define k_Delta 3 #define k_Delta 3
#define k_BCJ 0x3030103 #define k_BCJ 0x3030103
#define k_PPC 0x3030205 #define k_PPC 0x3030205
#define k_IA64 0x3030401 #define k_IA64 0x3030401
#define k_ARM 0x3030501 #define k_ARM 0x3030501
#define k_ARMT 0x3030701
#define k_SPARC 0x3030805 #define k_SPARC 0x3030805
#endif #endif
#ifdef _7ZIP_PPMD_SUPPPORT #ifdef Z7_PPMD_SUPPORT
#define k_PPMD 0x30401 #define k_PPMD 0x30401
@ -49,12 +71,12 @@ typedef struct
UInt64 processed; UInt64 processed;
BoolInt extra; BoolInt extra;
SRes res; SRes res;
const ILookInStream *inStream; ILookInStreamPtr inStream;
} CByteInToLook; } CByteInToLook;
static Byte ReadByte(const IByteIn *pp) static Byte ReadByte(IByteInPtr pp)
{ {
CByteInToLook *p = CONTAINER_FROM_VTBL(pp, CByteInToLook, vt); Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CByteInToLook)
if (p->cur != p->end) if (p->cur != p->end)
return *p->cur++; return *p->cur++;
if (p->res == SZ_OK) if (p->res == SZ_OK)
@ -67,13 +89,13 @@ static Byte ReadByte(const IByteIn *pp)
p->cur = p->begin; p->cur = p->begin;
p->end = p->begin + size; p->end = p->begin + size;
if (size != 0) if (size != 0)
return *p->cur++;; return *p->cur++;
} }
p->extra = True; p->extra = True;
return 0; return 0;
} }
static SRes SzDecodePpmd(const Byte *props, unsigned propsSize, UInt64 inSize, const ILookInStream *inStream, static SRes SzDecodePpmd(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStreamPtr inStream,
Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain) Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain)
{ {
CPpmd7 ppmd; CPpmd7 ppmd;
@ -138,14 +160,14 @@ static SRes SzDecodePpmd(const Byte *props, unsigned propsSize, UInt64 inSize, c
#endif #endif
static SRes SzDecodeLzma(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStream *inStream, static SRes SzDecodeLzma(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStreamPtr inStream,
Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain) Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain)
{ {
CLzmaDec state; CLzmaDec state;
SRes res = SZ_OK; SRes res = SZ_OK;
LzmaDec_Construct(&state); LzmaDec_CONSTRUCT(&state)
RINOK(LzmaDec_AllocateProbs(&state, props, propsSize, allocMain)); RINOK(LzmaDec_AllocateProbs(&state, props, propsSize, allocMain))
state.dic = outBuffer; state.dic = outBuffer;
state.dicBufSize = outSize; state.dicBufSize = outSize;
LzmaDec_Init(&state); LzmaDec_Init(&state);
@ -196,18 +218,18 @@ static SRes SzDecodeLzma(const Byte *props, unsigned propsSize, UInt64 inSize, I
} }
#ifndef _7Z_NO_METHOD_LZMA2 #ifndef Z7_NO_METHOD_LZMA2
static SRes SzDecodeLzma2(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStream *inStream, static SRes SzDecodeLzma2(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStreamPtr inStream,
Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain) Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain)
{ {
CLzma2Dec state; CLzma2Dec state;
SRes res = SZ_OK; SRes res = SZ_OK;
Lzma2Dec_Construct(&state); Lzma2Dec_CONSTRUCT(&state)
if (propsSize != 1) if (propsSize != 1)
return SZ_ERROR_DATA; return SZ_ERROR_DATA;
RINOK(Lzma2Dec_AllocateProbs(&state, props[0], allocMain)); RINOK(Lzma2Dec_AllocateProbs(&state, props[0], allocMain))
state.decoder.dic = outBuffer; state.decoder.dic = outBuffer;
state.decoder.dicBufSize = outSize; state.decoder.dicBufSize = outSize;
Lzma2Dec_Init(&state); Lzma2Dec_Init(&state);
@ -257,7 +279,7 @@ static SRes SzDecodeLzma2(const Byte *props, unsigned propsSize, UInt64 inSize,
#endif #endif
static SRes SzDecodeCopy(UInt64 inSize, ILookInStream *inStream, Byte *outBuffer) static SRes SzDecodeCopy(UInt64 inSize, ILookInStreamPtr inStream, Byte *outBuffer)
{ {
while (inSize > 0) while (inSize > 0)
{ {
@ -265,13 +287,13 @@ static SRes SzDecodeCopy(UInt64 inSize, ILookInStream *inStream, Byte *outBuffer
size_t curSize = (1 << 18); size_t curSize = (1 << 18);
if (curSize > inSize) if (curSize > inSize)
curSize = (size_t)inSize; curSize = (size_t)inSize;
RINOK(ILookInStream_Look(inStream, &inBuf, &curSize)); RINOK(ILookInStream_Look(inStream, &inBuf, &curSize))
if (curSize == 0) if (curSize == 0)
return SZ_ERROR_INPUT_EOF; return SZ_ERROR_INPUT_EOF;
memcpy(outBuffer, inBuf, curSize); memcpy(outBuffer, inBuf, curSize);
outBuffer += curSize; outBuffer += curSize;
inSize -= curSize; inSize -= curSize;
RINOK(ILookInStream_Skip(inStream, curSize)); RINOK(ILookInStream_Skip(inStream, curSize))
} }
return SZ_OK; return SZ_OK;
} }
@ -282,12 +304,12 @@ static BoolInt IS_MAIN_METHOD(UInt32 m)
{ {
case k_Copy: case k_Copy:
case k_LZMA: case k_LZMA:
#ifndef _7Z_NO_METHOD_LZMA2 #ifndef Z7_NO_METHOD_LZMA2
case k_LZMA2: case k_LZMA2:
#endif #endif
#ifdef _7ZIP_PPMD_SUPPPORT #ifdef Z7_PPMD_SUPPORT
case k_PPMD: case k_PPMD:
#endif #endif
return True; return True;
} }
return False; return False;
@ -317,7 +339,7 @@ static SRes CheckSupportedFolder(const CSzFolder *f)
} }
#ifndef _7Z_NO_METHODS_FILTERS #if defined(Z7_USE_BRANCH_FILTER)
if (f->NumCoders == 2) if (f->NumCoders == 2)
{ {
@ -333,13 +355,20 @@ static SRes CheckSupportedFolder(const CSzFolder *f)
return SZ_ERROR_UNSUPPORTED; return SZ_ERROR_UNSUPPORTED;
switch ((UInt32)c->MethodID) switch ((UInt32)c->MethodID)
{ {
#if !defined(Z7_NO_METHODS_FILTERS)
case k_Delta: case k_Delta:
case k_BCJ: case k_BCJ:
case k_PPC: case k_PPC:
case k_IA64: case k_IA64:
case k_SPARC: case k_SPARC:
case k_ARM: case k_ARM:
#endif
#ifdef Z7_USE_FILTER_ARM64
case k_ARM64:
#endif
#ifdef Z7_USE_FILTER_ARMT
case k_ARMT: case k_ARMT:
#endif
break; break;
default: default:
return SZ_ERROR_UNSUPPORTED; return SZ_ERROR_UNSUPPORTED;
@ -372,15 +401,16 @@ static SRes CheckSupportedFolder(const CSzFolder *f)
return SZ_ERROR_UNSUPPORTED; return SZ_ERROR_UNSUPPORTED;
} }
#ifndef _7Z_NO_METHODS_FILTERS
#define CASE_BRA_CONV(isa) case k_ ## isa: isa ## _Convert(outBuffer, outSize, 0, 0); break;
#endif
static SRes SzFolder_Decode2(const CSzFolder *folder, static SRes SzFolder_Decode2(const CSzFolder *folder,
const Byte *propsData, const Byte *propsData,
const UInt64 *unpackSizes, const UInt64 *unpackSizes,
const UInt64 *packPositions, const UInt64 *packPositions,
ILookInStream *inStream, UInt64 startPos, ILookInStreamPtr inStream, UInt64 startPos,
Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain, Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain,
Byte *tempBuf[]) Byte *tempBuf[])
{ {
@ -389,7 +419,7 @@ static SRes SzFolder_Decode2(const CSzFolder *folder,
SizeT tempSize3 = 0; SizeT tempSize3 = 0;
Byte *tempBuf3 = 0; Byte *tempBuf3 = 0;
RINOK(CheckSupportedFolder(folder)); RINOK(CheckSupportedFolder(folder))
for (ci = 0; ci < folder->NumCoders; ci++) for (ci = 0; ci < folder->NumCoders; ci++)
{ {
@ -404,8 +434,8 @@ static SRes SzFolder_Decode2(const CSzFolder *folder,
SizeT outSizeCur = outSize; SizeT outSizeCur = outSize;
if (folder->NumCoders == 4) if (folder->NumCoders == 4)
{ {
UInt32 indices[] = { 3, 2, 0 }; const UInt32 indices[] = { 3, 2, 0 };
UInt64 unpackSize = unpackSizes[ci]; const UInt64 unpackSize = unpackSizes[ci];
si = indices[ci]; si = indices[ci];
if (ci < 2) if (ci < 2)
{ {
@ -431,37 +461,37 @@ static SRes SzFolder_Decode2(const CSzFolder *folder,
} }
offset = packPositions[si]; offset = packPositions[si];
inSize = packPositions[(size_t)si + 1] - offset; inSize = packPositions[(size_t)si + 1] - offset;
RINOK(LookInStream_SeekTo(inStream, startPos + offset)); RINOK(LookInStream_SeekTo(inStream, startPos + offset))
if (coder->MethodID == k_Copy) if (coder->MethodID == k_Copy)
{ {
if (inSize != outSizeCur) /* check it */ if (inSize != outSizeCur) /* check it */
return SZ_ERROR_DATA; return SZ_ERROR_DATA;
RINOK(SzDecodeCopy(inSize, inStream, outBufCur)); RINOK(SzDecodeCopy(inSize, inStream, outBufCur))
} }
else if (coder->MethodID == k_LZMA) else if (coder->MethodID == k_LZMA)
{ {
RINOK(SzDecodeLzma(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain)); RINOK(SzDecodeLzma(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain))
} }
#ifndef _7Z_NO_METHOD_LZMA2 #ifndef Z7_NO_METHOD_LZMA2
else if (coder->MethodID == k_LZMA2) else if (coder->MethodID == k_LZMA2)
{ {
RINOK(SzDecodeLzma2(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain)); RINOK(SzDecodeLzma2(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain))
} }
#endif #endif
#ifdef _7ZIP_PPMD_SUPPPORT #ifdef Z7_PPMD_SUPPORT
else if (coder->MethodID == k_PPMD) else if (coder->MethodID == k_PPMD)
{ {
RINOK(SzDecodePpmd(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain)); RINOK(SzDecodePpmd(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain))
} }
#endif #endif
else else
return SZ_ERROR_UNSUPPORTED; return SZ_ERROR_UNSUPPORTED;
} }
else if (coder->MethodID == k_BCJ2) else if (coder->MethodID == k_BCJ2)
{ {
UInt64 offset = packPositions[1]; const UInt64 offset = packPositions[1];
UInt64 s3Size = packPositions[2] - offset; const UInt64 s3Size = packPositions[2] - offset;
if (ci != 3) if (ci != 3)
return SZ_ERROR_UNSUPPORTED; return SZ_ERROR_UNSUPPORTED;
@ -473,8 +503,8 @@ static SRes SzFolder_Decode2(const CSzFolder *folder,
if (!tempBuf[2] && tempSizes[2] != 0) if (!tempBuf[2] && tempSizes[2] != 0)
return SZ_ERROR_MEM; return SZ_ERROR_MEM;
RINOK(LookInStream_SeekTo(inStream, startPos + offset)); RINOK(LookInStream_SeekTo(inStream, startPos + offset))
RINOK(SzDecodeCopy(s3Size, inStream, tempBuf[2])); RINOK(SzDecodeCopy(s3Size, inStream, tempBuf[2]))
if ((tempSizes[0] & 3) != 0 || if ((tempSizes[0] & 3) != 0 ||
(tempSizes[1] & 3) != 0 || (tempSizes[1] & 3) != 0 ||
@ -493,26 +523,22 @@ static SRes SzFolder_Decode2(const CSzFolder *folder,
p.destLim = outBuffer + outSize; p.destLim = outBuffer + outSize;
Bcj2Dec_Init(&p); Bcj2Dec_Init(&p);
RINOK(Bcj2Dec_Decode(&p)); RINOK(Bcj2Dec_Decode(&p))
{ {
unsigned i; unsigned i;
for (i = 0; i < 4; i++) for (i = 0; i < 4; i++)
if (p.bufs[i] != p.lims[i]) if (p.bufs[i] != p.lims[i])
return SZ_ERROR_DATA; return SZ_ERROR_DATA;
if (p.dest != p.destLim || !Bcj2Dec_IsMaybeFinished(&p))
if (!Bcj2Dec_IsFinished(&p))
return SZ_ERROR_DATA;
if (p.dest != p.destLim
|| p.state != BCJ2_STREAM_MAIN)
return SZ_ERROR_DATA; return SZ_ERROR_DATA;
} }
} }
} }
#ifndef _7Z_NO_METHODS_FILTERS #if defined(Z7_USE_BRANCH_FILTER)
else if (ci == 1) else if (ci == 1)
{ {
#if !defined(Z7_NO_METHODS_FILTERS)
if (coder->MethodID == k_Delta) if (coder->MethodID == k_Delta)
{ {
if (coder->PropsSize != 1) if (coder->PropsSize != 1)
@ -522,31 +548,53 @@ static SRes SzFolder_Decode2(const CSzFolder *folder,
Delta_Init(state); Delta_Init(state);
Delta_Decode(state, (unsigned)(propsData[coder->PropsOffset]) + 1, outBuffer, outSize); Delta_Decode(state, (unsigned)(propsData[coder->PropsOffset]) + 1, outBuffer, outSize);
} }
continue;
} }
else #endif
#ifdef Z7_USE_FILTER_ARM64
if (coder->MethodID == k_ARM64)
{
UInt32 pc = 0;
if (coder->PropsSize == 4)
pc = GetUi32(propsData + coder->PropsOffset);
else if (coder->PropsSize != 0)
return SZ_ERROR_UNSUPPORTED;
z7_BranchConv_ARM64_Dec(outBuffer, outSize, pc);
continue;
}
#endif
#if !defined(Z7_NO_METHODS_FILTERS) || defined(Z7_USE_FILTER_ARMT)
{ {
if (coder->PropsSize != 0) if (coder->PropsSize != 0)
return SZ_ERROR_UNSUPPORTED; return SZ_ERROR_UNSUPPORTED;
#define CASE_BRA_CONV(isa) case k_ ## isa: Z7_BRANCH_CONV_DEC(isa)(outBuffer, outSize, 0); break; // pc = 0;
switch (coder->MethodID) switch (coder->MethodID)
{ {
#if !defined(Z7_NO_METHODS_FILTERS)
case k_BCJ: case k_BCJ:
{ {
UInt32 state; UInt32 state = Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL;
x86_Convert_Init(state); z7_BranchConvSt_X86_Dec(outBuffer, outSize, 0, &state); // pc = 0
x86_Convert(outBuffer, outSize, 0, &state, 0);
break; break;
} }
CASE_BRA_CONV(PPC) CASE_BRA_CONV(PPC)
CASE_BRA_CONV(IA64) CASE_BRA_CONV(IA64)
CASE_BRA_CONV(SPARC) CASE_BRA_CONV(SPARC)
CASE_BRA_CONV(ARM) CASE_BRA_CONV(ARM)
#endif
#if !defined(Z7_NO_METHODS_FILTERS) || defined(Z7_USE_FILTER_ARMT)
CASE_BRA_CONV(ARMT) CASE_BRA_CONV(ARMT)
#endif
default: default:
return SZ_ERROR_UNSUPPORTED; return SZ_ERROR_UNSUPPORTED;
} }
continue;
} }
} #endif
#endif } // (c == 1)
#endif
else else
return SZ_ERROR_UNSUPPORTED; return SZ_ERROR_UNSUPPORTED;
} }
@ -556,7 +604,7 @@ static SRes SzFolder_Decode2(const CSzFolder *folder,
SRes SzAr_DecodeFolder(const CSzAr *p, UInt32 folderIndex, SRes SzAr_DecodeFolder(const CSzAr *p, UInt32 folderIndex,
ILookInStream *inStream, UInt64 startPos, ILookInStreamPtr inStream, UInt64 startPos,
Byte *outBuffer, size_t outSize, Byte *outBuffer, size_t outSize,
ISzAllocPtr allocMain) ISzAllocPtr allocMain)
{ {

View file

@ -1,5 +1,5 @@
/* 7zFile.c -- File IO /* 7zFile.c -- File IO
2021-04-29 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -268,7 +268,7 @@ WRes File_Write(CSzFile *p, const void *data, size_t *size)
return errno; return errno;
if (processed == 0) if (processed == 0)
break; break;
data = (void *)((Byte *)data + (size_t)processed); data = (const void *)((const Byte *)data + (size_t)processed);
originalSize -= (size_t)processed; originalSize -= (size_t)processed;
*size += (size_t)processed; *size += (size_t)processed;
} }
@ -287,7 +287,8 @@ WRes File_Seek(CSzFile *p, Int64 *pos, ESzSeek origin)
DWORD moveMethod; DWORD moveMethod;
UInt32 low = (UInt32)*pos; UInt32 low = (UInt32)*pos;
LONG high = (LONG)((UInt64)*pos >> 16 >> 16); /* for case when UInt64 is 32-bit only */ LONG high = (LONG)((UInt64)*pos >> 16 >> 16); /* for case when UInt64 is 32-bit only */
switch (origin) // (int) to eliminate clang warning
switch ((int)origin)
{ {
case SZ_SEEK_SET: moveMethod = FILE_BEGIN; break; case SZ_SEEK_SET: moveMethod = FILE_BEGIN; break;
case SZ_SEEK_CUR: moveMethod = FILE_CURRENT; break; case SZ_SEEK_CUR: moveMethod = FILE_CURRENT; break;
@ -308,7 +309,7 @@ WRes File_Seek(CSzFile *p, Int64 *pos, ESzSeek origin)
int moveMethod; // = origin; int moveMethod; // = origin;
switch (origin) switch ((int)origin)
{ {
case SZ_SEEK_SET: moveMethod = SEEK_SET; break; case SZ_SEEK_SET: moveMethod = SEEK_SET; break;
case SZ_SEEK_CUR: moveMethod = SEEK_CUR; break; case SZ_SEEK_CUR: moveMethod = SEEK_CUR; break;
@ -387,10 +388,10 @@ WRes File_GetLength(CSzFile *p, UInt64 *length)
/* ---------- FileSeqInStream ---------- */ /* ---------- FileSeqInStream ---------- */
static SRes FileSeqInStream_Read(const ISeqInStream *pp, void *buf, size_t *size) static SRes FileSeqInStream_Read(ISeqInStreamPtr pp, void *buf, size_t *size)
{ {
CFileSeqInStream *p = CONTAINER_FROM_VTBL(pp, CFileSeqInStream, vt); Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CFileSeqInStream)
WRes wres = File_Read(&p->file, buf, size); const WRes wres = File_Read(&p->file, buf, size);
p->wres = wres; p->wres = wres;
return (wres == 0) ? SZ_OK : SZ_ERROR_READ; return (wres == 0) ? SZ_OK : SZ_ERROR_READ;
} }
@ -403,18 +404,18 @@ void FileSeqInStream_CreateVTable(CFileSeqInStream *p)
/* ---------- FileInStream ---------- */ /* ---------- FileInStream ---------- */
static SRes FileInStream_Read(const ISeekInStream *pp, void *buf, size_t *size) static SRes FileInStream_Read(ISeekInStreamPtr pp, void *buf, size_t *size)
{ {
CFileInStream *p = CONTAINER_FROM_VTBL(pp, CFileInStream, vt); Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CFileInStream)
WRes wres = File_Read(&p->file, buf, size); const WRes wres = File_Read(&p->file, buf, size);
p->wres = wres; p->wres = wres;
return (wres == 0) ? SZ_OK : SZ_ERROR_READ; return (wres == 0) ? SZ_OK : SZ_ERROR_READ;
} }
static SRes FileInStream_Seek(const ISeekInStream *pp, Int64 *pos, ESzSeek origin) static SRes FileInStream_Seek(ISeekInStreamPtr pp, Int64 *pos, ESzSeek origin)
{ {
CFileInStream *p = CONTAINER_FROM_VTBL(pp, CFileInStream, vt); Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CFileInStream)
WRes wres = File_Seek(&p->file, pos, origin); const WRes wres = File_Seek(&p->file, pos, origin);
p->wres = wres; p->wres = wres;
return (wres == 0) ? SZ_OK : SZ_ERROR_READ; return (wres == 0) ? SZ_OK : SZ_ERROR_READ;
} }
@ -428,10 +429,10 @@ void FileInStream_CreateVTable(CFileInStream *p)
/* ---------- FileOutStream ---------- */ /* ---------- FileOutStream ---------- */
static size_t FileOutStream_Write(const ISeqOutStream *pp, const void *data, size_t size) static size_t FileOutStream_Write(ISeqOutStreamPtr pp, const void *data, size_t size)
{ {
CFileOutStream *p = CONTAINER_FROM_VTBL(pp, CFileOutStream, vt); Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CFileOutStream)
WRes wres = File_Write(&p->file, data, &size); const WRes wres = File_Write(&p->file, data, &size);
p->wres = wres; p->wres = wres;
return size; return size;
} }

View file

@ -1,8 +1,8 @@
/* 7zFile.h -- File IO /* 7zFile.h -- File IO
2021-02-15 : Igor Pavlov : Public domain */ 2023-03-05 : Igor Pavlov : Public domain */
#ifndef __7Z_FILE_H #ifndef ZIP7_INC_FILE_H
#define __7Z_FILE_H #define ZIP7_INC_FILE_H
#ifdef _WIN32 #ifdef _WIN32
#define USE_WINDOWS_FILE #define USE_WINDOWS_FILE
@ -10,7 +10,8 @@
#endif #endif
#ifdef USE_WINDOWS_FILE #ifdef USE_WINDOWS_FILE
#include <windows.h> #include "7zWindows.h"
#else #else
// note: USE_FOPEN mode is limited to 32-bit file size // note: USE_FOPEN mode is limited to 32-bit file size
// #define USE_FOPEN // #define USE_FOPEN

View file

@ -1,5 +1,5 @@
/* 7zStream.c -- 7z Stream functions /* 7zStream.c -- 7z Stream functions
2021-02-09 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -7,12 +7,33 @@
#include "7zTypes.h" #include "7zTypes.h"
SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType)
SRes SeqInStream_ReadMax(ISeqInStreamPtr stream, void *buf, size_t *processedSize)
{
size_t size = *processedSize;
*processedSize = 0;
while (size != 0)
{
size_t cur = size;
const SRes res = ISeqInStream_Read(stream, buf, &cur);
*processedSize += cur;
buf = (void *)((Byte *)buf + cur);
size -= cur;
if (res != SZ_OK)
return res;
if (cur == 0)
return SZ_OK;
}
return SZ_OK;
}
/*
SRes SeqInStream_Read2(ISeqInStreamPtr stream, void *buf, size_t size, SRes errorType)
{ {
while (size != 0) while (size != 0)
{ {
size_t processed = size; size_t processed = size;
RINOK(ISeqInStream_Read(stream, buf, &processed)); RINOK(ISeqInStream_Read(stream, buf, &processed))
if (processed == 0) if (processed == 0)
return errorType; return errorType;
buf = (void *)((Byte *)buf + processed); buf = (void *)((Byte *)buf + processed);
@ -21,42 +42,44 @@ SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes
return SZ_OK; return SZ_OK;
} }
SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size) SRes SeqInStream_Read(ISeqInStreamPtr stream, void *buf, size_t size)
{ {
return SeqInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF); return SeqInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF);
} }
*/
SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf)
SRes SeqInStream_ReadByte(ISeqInStreamPtr stream, Byte *buf)
{ {
size_t processed = 1; size_t processed = 1;
RINOK(ISeqInStream_Read(stream, buf, &processed)); RINOK(ISeqInStream_Read(stream, buf, &processed))
return (processed == 1) ? SZ_OK : SZ_ERROR_INPUT_EOF; return (processed == 1) ? SZ_OK : SZ_ERROR_INPUT_EOF;
} }
SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset) SRes LookInStream_SeekTo(ILookInStreamPtr stream, UInt64 offset)
{ {
Int64 t = (Int64)offset; Int64 t = (Int64)offset;
return ILookInStream_Seek(stream, &t, SZ_SEEK_SET); return ILookInStream_Seek(stream, &t, SZ_SEEK_SET);
} }
SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size) SRes LookInStream_LookRead(ILookInStreamPtr stream, void *buf, size_t *size)
{ {
const void *lookBuf; const void *lookBuf;
if (*size == 0) if (*size == 0)
return SZ_OK; return SZ_OK;
RINOK(ILookInStream_Look(stream, &lookBuf, size)); RINOK(ILookInStream_Look(stream, &lookBuf, size))
memcpy(buf, lookBuf, *size); memcpy(buf, lookBuf, *size);
return ILookInStream_Skip(stream, *size); return ILookInStream_Skip(stream, *size);
} }
SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType) SRes LookInStream_Read2(ILookInStreamPtr stream, void *buf, size_t size, SRes errorType)
{ {
while (size != 0) while (size != 0)
{ {
size_t processed = size; size_t processed = size;
RINOK(ILookInStream_Read(stream, buf, &processed)); RINOK(ILookInStream_Read(stream, buf, &processed))
if (processed == 0) if (processed == 0)
return errorType; return errorType;
buf = (void *)((Byte *)buf + processed); buf = (void *)((Byte *)buf + processed);
@ -65,16 +88,16 @@ SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRe
return SZ_OK; return SZ_OK;
} }
SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size) SRes LookInStream_Read(ILookInStreamPtr stream, void *buf, size_t size)
{ {
return LookInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF); return LookInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF);
} }
#define GET_LookToRead2 CLookToRead2 *p = CONTAINER_FROM_VTBL(pp, CLookToRead2, vt); #define GET_LookToRead2 Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CLookToRead2)
static SRes LookToRead2_Look_Lookahead(const ILookInStream *pp, const void **buf, size_t *size) static SRes LookToRead2_Look_Lookahead(ILookInStreamPtr pp, const void **buf, size_t *size)
{ {
SRes res = SZ_OK; SRes res = SZ_OK;
GET_LookToRead2 GET_LookToRead2
@ -93,7 +116,7 @@ static SRes LookToRead2_Look_Lookahead(const ILookInStream *pp, const void **buf
return res; return res;
} }
static SRes LookToRead2_Look_Exact(const ILookInStream *pp, const void **buf, size_t *size) static SRes LookToRead2_Look_Exact(ILookInStreamPtr pp, const void **buf, size_t *size)
{ {
SRes res = SZ_OK; SRes res = SZ_OK;
GET_LookToRead2 GET_LookToRead2
@ -113,14 +136,14 @@ static SRes LookToRead2_Look_Exact(const ILookInStream *pp, const void **buf, si
return res; return res;
} }
static SRes LookToRead2_Skip(const ILookInStream *pp, size_t offset) static SRes LookToRead2_Skip(ILookInStreamPtr pp, size_t offset)
{ {
GET_LookToRead2 GET_LookToRead2
p->pos += offset; p->pos += offset;
return SZ_OK; return SZ_OK;
} }
static SRes LookToRead2_Read(const ILookInStream *pp, void *buf, size_t *size) static SRes LookToRead2_Read(ILookInStreamPtr pp, void *buf, size_t *size)
{ {
GET_LookToRead2 GET_LookToRead2
size_t rem = p->size - p->pos; size_t rem = p->size - p->pos;
@ -134,7 +157,7 @@ static SRes LookToRead2_Read(const ILookInStream *pp, void *buf, size_t *size)
return SZ_OK; return SZ_OK;
} }
static SRes LookToRead2_Seek(const ILookInStream *pp, Int64 *pos, ESzSeek origin) static SRes LookToRead2_Seek(ILookInStreamPtr pp, Int64 *pos, ESzSeek origin)
{ {
GET_LookToRead2 GET_LookToRead2
p->pos = p->size = 0; p->pos = p->size = 0;
@ -153,9 +176,9 @@ void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead)
static SRes SecToLook_Read(const ISeqInStream *pp, void *buf, size_t *size) static SRes SecToLook_Read(ISeqInStreamPtr pp, void *buf, size_t *size)
{ {
CSecToLook *p = CONTAINER_FROM_VTBL(pp, CSecToLook, vt); Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CSecToLook)
return LookInStream_LookRead(p->realStream, buf, size); return LookInStream_LookRead(p->realStream, buf, size);
} }
@ -164,9 +187,9 @@ void SecToLook_CreateVTable(CSecToLook *p)
p->vt.Read = SecToLook_Read; p->vt.Read = SecToLook_Read;
} }
static SRes SecToRead_Read(const ISeqInStream *pp, void *buf, size_t *size) static SRes SecToRead_Read(ISeqInStreamPtr pp, void *buf, size_t *size)
{ {
CSecToRead *p = CONTAINER_FROM_VTBL(pp, CSecToRead, vt); Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CSecToRead)
return ILookInStream_Read(p->realStream, buf, size); return ILookInStream_Read(p->realStream, buf, size);
} }

View file

@ -1,8 +1,8 @@
/* 7zTypes.h -- Basic types /* 7zTypes.h -- Basic types
2022-04-01 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#ifndef __7Z_TYPES_H #ifndef ZIP7_7Z_TYPES_H
#define __7Z_TYPES_H #define ZIP7_7Z_TYPES_H
#ifdef _WIN32 #ifdef _WIN32
/* #include <windows.h> */ /* #include <windows.h> */
@ -52,6 +52,11 @@ typedef int SRes;
#define MY_ALIGN(n) #define MY_ALIGN(n)
#endif #endif
#else #else
/*
// C11/C++11:
#include <stdalign.h>
#define MY_ALIGN(n) alignas(n)
*/
#define MY_ALIGN(n) __attribute__ ((aligned(n))) #define MY_ALIGN(n) __attribute__ ((aligned(n)))
#endif #endif
@ -62,7 +67,7 @@ typedef int SRes;
typedef unsigned WRes; typedef unsigned WRes;
#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x) #define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x)
// #define MY_HRES_ERROR__INTERNAL_ERROR MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR) // #define MY_HRES_ERROR_INTERNAL_ERROR MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR)
#else // _WIN32 #else // _WIN32
@ -70,13 +75,13 @@ typedef unsigned WRes;
typedef int WRes; typedef int WRes;
// (FACILITY_ERRNO = 0x800) is 7zip's FACILITY constant to represent (errno) errors in HRESULT // (FACILITY_ERRNO = 0x800) is 7zip's FACILITY constant to represent (errno) errors in HRESULT
#define MY__FACILITY_ERRNO 0x800 #define MY_FACILITY_ERRNO 0x800
#define MY__FACILITY_WIN32 7 #define MY_FACILITY_WIN32 7
#define MY__FACILITY__WRes MY__FACILITY_ERRNO #define MY_FACILITY_WRes MY_FACILITY_ERRNO
#define MY_HRESULT_FROM_errno_CONST_ERROR(x) ((HRESULT)( \ #define MY_HRESULT_FROM_errno_CONST_ERROR(x) ((HRESULT)( \
( (HRESULT)(x) & 0x0000FFFF) \ ( (HRESULT)(x) & 0x0000FFFF) \
| (MY__FACILITY__WRes << 16) \ | (MY_FACILITY_WRes << 16) \
| (HRESULT)0x80000000 )) | (HRESULT)0x80000000 ))
#define MY_SRes_HRESULT_FROM_WRes(x) \ #define MY_SRes_HRESULT_FROM_WRes(x) \
@ -120,17 +125,17 @@ typedef int WRes;
#define ERROR_INVALID_REPARSE_DATA ((HRESULT)0x80071128L) #define ERROR_INVALID_REPARSE_DATA ((HRESULT)0x80071128L)
#define ERROR_REPARSE_TAG_INVALID ((HRESULT)0x80071129L) #define ERROR_REPARSE_TAG_INVALID ((HRESULT)0x80071129L)
// if (MY__FACILITY__WRes != FACILITY_WIN32), // if (MY_FACILITY_WRes != FACILITY_WIN32),
// we use FACILITY_WIN32 for COM errors: // we use FACILITY_WIN32 for COM errors:
#define E_OUTOFMEMORY ((HRESULT)0x8007000EL) #define E_OUTOFMEMORY ((HRESULT)0x8007000EL)
#define E_INVALIDARG ((HRESULT)0x80070057L) #define E_INVALIDARG ((HRESULT)0x80070057L)
#define MY__E_ERROR_NEGATIVE_SEEK ((HRESULT)0x80070083L) #define MY_E_ERROR_NEGATIVE_SEEK ((HRESULT)0x80070083L)
/* /*
// we can use FACILITY_ERRNO for some COM errors, that have errno equivalents: // we can use FACILITY_ERRNO for some COM errors, that have errno equivalents:
#define E_OUTOFMEMORY MY_HRESULT_FROM_errno_CONST_ERROR(ENOMEM) #define E_OUTOFMEMORY MY_HRESULT_FROM_errno_CONST_ERROR(ENOMEM)
#define E_INVALIDARG MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL) #define E_INVALIDARG MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
#define MY__E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL) #define MY_E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
*/ */
#define TEXT(quote) quote #define TEXT(quote) quote
@ -156,18 +161,18 @@ typedef int WRes;
#ifndef RINOK #ifndef RINOK
#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; } #define RINOK(x) { const int _result_ = (x); if (_result_ != 0) return _result_; }
#endif #endif
#ifndef RINOK_WRes #ifndef RINOK_WRes
#define RINOK_WRes(x) { WRes __result__ = (x); if (__result__ != 0) return __result__; } #define RINOK_WRes(x) { const WRes _result_ = (x); if (_result_ != 0) return _result_; }
#endif #endif
typedef unsigned char Byte; typedef unsigned char Byte;
typedef short Int16; typedef short Int16;
typedef unsigned short UInt16; typedef unsigned short UInt16;
#ifdef _LZMA_UINT32_IS_ULONG #ifdef Z7_DECL_Int32_AS_long
typedef long Int32; typedef long Int32;
typedef unsigned long UInt32; typedef unsigned long UInt32;
#else #else
@ -206,37 +211,51 @@ typedef size_t SIZE_T;
#endif // _WIN32 #endif // _WIN32
#define MY_HRES_ERROR__INTERNAL_ERROR ((HRESULT)0x8007054FL) #define MY_HRES_ERROR_INTERNAL_ERROR ((HRESULT)0x8007054FL)
#ifdef _SZ_NO_INT_64 #ifdef Z7_DECL_Int64_AS_long
/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers.
NOTES: Some code will work incorrectly in that case! */
typedef long Int64; typedef long Int64;
typedef unsigned long UInt64; typedef unsigned long UInt64;
#else #else
#if defined(_MSC_VER) || defined(__BORLANDC__) #if (defined(_MSC_VER) || defined(__BORLANDC__)) && !defined(__clang__)
typedef __int64 Int64; typedef __int64 Int64;
typedef unsigned __int64 UInt64; typedef unsigned __int64 UInt64;
#define UINT64_CONST(n) n #else
#if defined(__clang__) || defined(__GNUC__)
#include <stdint.h>
typedef int64_t Int64;
typedef uint64_t UInt64;
#else #else
typedef long long int Int64; typedef long long int Int64;
typedef unsigned long long int UInt64; typedef unsigned long long int UInt64;
#define UINT64_CONST(n) n ## ULL // #define UINT64_CONST(n) n ## ULL
#endif
#endif #endif
#endif #endif
#ifdef _LZMA_NO_SYSTEM_SIZE_T #define UINT64_CONST(n) n
typedef UInt32 SizeT;
#ifdef Z7_DECL_SizeT_AS_unsigned_int
typedef unsigned int SizeT;
#else #else
typedef size_t SizeT; typedef size_t SizeT;
#endif #endif
/*
#if (defined(_MSC_VER) && _MSC_VER <= 1200)
typedef size_t MY_uintptr_t;
#else
#include <stdint.h>
typedef uintptr_t MY_uintptr_t;
#endif
*/
typedef int BoolInt; typedef int BoolInt;
/* typedef BoolInt Bool; */ /* typedef BoolInt Bool; */
#define True 1 #define True 1
@ -244,23 +263,23 @@ typedef int BoolInt;
#ifdef _WIN32 #ifdef _WIN32
#define MY_STD_CALL __stdcall #define Z7_STDCALL __stdcall
#else #else
#define MY_STD_CALL #define Z7_STDCALL
#endif #endif
#ifdef _MSC_VER #ifdef _MSC_VER
#if _MSC_VER >= 1300 #if _MSC_VER >= 1300
#define MY_NO_INLINE __declspec(noinline) #define Z7_NO_INLINE __declspec(noinline)
#else #else
#define MY_NO_INLINE #define Z7_NO_INLINE
#endif #endif
#define MY_FORCE_INLINE __forceinline #define Z7_FORCE_INLINE __forceinline
#define MY_CDECL __cdecl #define Z7_CDECL __cdecl
#define MY_FAST_CALL __fastcall #define Z7_FASTCALL __fastcall
#else // _MSC_VER #else // _MSC_VER
@ -268,27 +287,25 @@ typedef int BoolInt;
|| (defined(__clang__) && (__clang_major__ >= 4)) \ || (defined(__clang__) && (__clang_major__ >= 4)) \
|| defined(__INTEL_COMPILER) \ || defined(__INTEL_COMPILER) \
|| defined(__xlC__) || defined(__xlC__)
#define MY_NO_INLINE __attribute__((noinline)) #define Z7_NO_INLINE __attribute__((noinline))
// #define MY_FORCE_INLINE __attribute__((always_inline)) inline #define Z7_FORCE_INLINE __attribute__((always_inline)) inline
#else #else
#define MY_NO_INLINE #define Z7_NO_INLINE
#define Z7_FORCE_INLINE
#endif #endif
#define MY_FORCE_INLINE #define Z7_CDECL
#define MY_CDECL
#if defined(_M_IX86) \ #if defined(_M_IX86) \
|| defined(__i386__) || defined(__i386__)
// #define MY_FAST_CALL __attribute__((fastcall)) // #define Z7_FASTCALL __attribute__((fastcall))
// #define MY_FAST_CALL __attribute__((cdecl)) // #define Z7_FASTCALL __attribute__((cdecl))
#define MY_FAST_CALL #define Z7_FASTCALL
#elif defined(MY_CPU_AMD64) #elif defined(MY_CPU_AMD64)
// #define MY_FAST_CALL __attribute__((ms_abi)) // #define Z7_FASTCALL __attribute__((ms_abi))
#define MY_FAST_CALL #define Z7_FASTCALL
#else #else
#define MY_FAST_CALL #define Z7_FASTCALL
#endif #endif
#endif // _MSC_VER #endif // _MSC_VER
@ -296,41 +313,49 @@ typedef int BoolInt;
/* The following interfaces use first parameter as pointer to structure */ /* The following interfaces use first parameter as pointer to structure */
typedef struct IByteIn IByteIn; // #define Z7_C_IFACE_CONST_QUAL
struct IByteIn #define Z7_C_IFACE_CONST_QUAL const
#define Z7_C_IFACE_DECL(a) \
struct a ## _; \
typedef Z7_C_IFACE_CONST_QUAL struct a ## _ * a ## Ptr; \
typedef struct a ## _ a; \
struct a ## _
Z7_C_IFACE_DECL (IByteIn)
{ {
Byte (*Read)(const IByteIn *p); /* reads one byte, returns 0 in case of EOF or error */ Byte (*Read)(IByteInPtr p); /* reads one byte, returns 0 in case of EOF or error */
}; };
#define IByteIn_Read(p) (p)->Read(p) #define IByteIn_Read(p) (p)->Read(p)
typedef struct IByteOut IByteOut; Z7_C_IFACE_DECL (IByteOut)
struct IByteOut
{ {
void (*Write)(const IByteOut *p, Byte b); void (*Write)(IByteOutPtr p, Byte b);
}; };
#define IByteOut_Write(p, b) (p)->Write(p, b) #define IByteOut_Write(p, b) (p)->Write(p, b)
typedef struct ISeqInStream ISeqInStream; Z7_C_IFACE_DECL (ISeqInStream)
struct ISeqInStream
{ {
SRes (*Read)(const ISeqInStream *p, void *buf, size_t *size); SRes (*Read)(ISeqInStreamPtr p, void *buf, size_t *size);
/* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
(output(*size) < input(*size)) is allowed */ (output(*size) < input(*size)) is allowed */
}; };
#define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size) #define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size)
/* try to read as much as avail in stream and limited by (*processedSize) */
SRes SeqInStream_ReadMax(ISeqInStreamPtr stream, void *buf, size_t *processedSize);
/* it can return SZ_ERROR_INPUT_EOF */ /* it can return SZ_ERROR_INPUT_EOF */
SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size); // SRes SeqInStream_Read(ISeqInStreamPtr stream, void *buf, size_t size);
SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType); // SRes SeqInStream_Read2(ISeqInStreamPtr stream, void *buf, size_t size, SRes errorType);
SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf); SRes SeqInStream_ReadByte(ISeqInStreamPtr stream, Byte *buf);
typedef struct ISeqOutStream ISeqOutStream; Z7_C_IFACE_DECL (ISeqOutStream)
struct ISeqOutStream
{ {
size_t (*Write)(const ISeqOutStream *p, const void *buf, size_t size); size_t (*Write)(ISeqOutStreamPtr p, const void *buf, size_t size);
/* Returns: result - the number of actually written bytes. /* Returns: result - the number of actually written bytes.
(result < size) means error */ (result < size) means error */
}; };
@ -344,29 +369,26 @@ typedef enum
} ESzSeek; } ESzSeek;
typedef struct ISeekInStream ISeekInStream; Z7_C_IFACE_DECL (ISeekInStream)
struct ISeekInStream
{ {
SRes (*Read)(const ISeekInStream *p, void *buf, size_t *size); /* same as ISeqInStream::Read */ SRes (*Read)(ISeekInStreamPtr p, void *buf, size_t *size); /* same as ISeqInStream::Read */
SRes (*Seek)(const ISeekInStream *p, Int64 *pos, ESzSeek origin); SRes (*Seek)(ISeekInStreamPtr p, Int64 *pos, ESzSeek origin);
}; };
#define ISeekInStream_Read(p, buf, size) (p)->Read(p, buf, size) #define ISeekInStream_Read(p, buf, size) (p)->Read(p, buf, size)
#define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) #define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
typedef struct ILookInStream ILookInStream; Z7_C_IFACE_DECL (ILookInStream)
struct ILookInStream
{ {
SRes (*Look)(const ILookInStream *p, const void **buf, size_t *size); SRes (*Look)(ILookInStreamPtr p, const void **buf, size_t *size);
/* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
(output(*size) > input(*size)) is not allowed (output(*size) > input(*size)) is not allowed
(output(*size) < input(*size)) is allowed */ (output(*size) < input(*size)) is allowed */
SRes (*Skip)(const ILookInStream *p, size_t offset); SRes (*Skip)(ILookInStreamPtr p, size_t offset);
/* offset must be <= output(*size) of Look */ /* offset must be <= output(*size) of Look */
SRes (*Read)(ILookInStreamPtr p, void *buf, size_t *size);
SRes (*Read)(const ILookInStream *p, void *buf, size_t *size);
/* reads directly (without buffer). It's same as ISeqInStream::Read */ /* reads directly (without buffer). It's same as ISeqInStream::Read */
SRes (*Seek)(const ILookInStream *p, Int64 *pos, ESzSeek origin); SRes (*Seek)(ILookInStreamPtr p, Int64 *pos, ESzSeek origin);
}; };
#define ILookInStream_Look(p, buf, size) (p)->Look(p, buf, size) #define ILookInStream_Look(p, buf, size) (p)->Look(p, buf, size)
@ -375,19 +397,18 @@ struct ILookInStream
#define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin) #define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size); SRes LookInStream_LookRead(ILookInStreamPtr stream, void *buf, size_t *size);
SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset); SRes LookInStream_SeekTo(ILookInStreamPtr stream, UInt64 offset);
/* reads via ILookInStream::Read */ /* reads via ILookInStream::Read */
SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType); SRes LookInStream_Read2(ILookInStreamPtr stream, void *buf, size_t size, SRes errorType);
SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size); SRes LookInStream_Read(ILookInStreamPtr stream, void *buf, size_t size);
typedef struct typedef struct
{ {
ILookInStream vt; ILookInStream vt;
const ISeekInStream *realStream; ISeekInStreamPtr realStream;
size_t pos; size_t pos;
size_t size; /* it's data size */ size_t size; /* it's data size */
@ -399,13 +420,13 @@ typedef struct
void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead); void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead);
#define LookToRead2_Init(p) { (p)->pos = (p)->size = 0; } #define LookToRead2_INIT(p) { (p)->pos = (p)->size = 0; }
typedef struct typedef struct
{ {
ISeqInStream vt; ISeqInStream vt;
const ILookInStream *realStream; ILookInStreamPtr realStream;
} CSecToLook; } CSecToLook;
void SecToLook_CreateVTable(CSecToLook *p); void SecToLook_CreateVTable(CSecToLook *p);
@ -415,20 +436,19 @@ void SecToLook_CreateVTable(CSecToLook *p);
typedef struct typedef struct
{ {
ISeqInStream vt; ISeqInStream vt;
const ILookInStream *realStream; ILookInStreamPtr realStream;
} CSecToRead; } CSecToRead;
void SecToRead_CreateVTable(CSecToRead *p); void SecToRead_CreateVTable(CSecToRead *p);
typedef struct ICompressProgress ICompressProgress; Z7_C_IFACE_DECL (ICompressProgress)
struct ICompressProgress
{ {
SRes (*Progress)(const ICompressProgress *p, UInt64 inSize, UInt64 outSize); SRes (*Progress)(ICompressProgressPtr p, UInt64 inSize, UInt64 outSize);
/* Returns: result. (result != SZ_OK) means break. /* Returns: result. (result != SZ_OK) means break.
Value (UInt64)(Int64)-1 for size means unknown value. */ Value (UInt64)(Int64)-1 for size means unknown value. */
}; };
#define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize) #define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize)
@ -466,13 +486,13 @@ struct ISzAlloc
#ifndef MY_container_of #ifndef Z7_container_of
/* /*
#define MY_container_of(ptr, type, m) container_of(ptr, type, m) #define Z7_container_of(ptr, type, m) container_of(ptr, type, m)
#define MY_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m) #define Z7_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m)
#define MY_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m))) #define Z7_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m)))
#define MY_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m)))) #define Z7_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m))))
*/ */
/* /*
@ -481,24 +501,64 @@ struct ISzAlloc
GCC 4.8.1 : classes with non-public variable members" GCC 4.8.1 : classes with non-public variable members"
*/ */
#define MY_container_of(ptr, type, m) ((type *)(void *)((char *)(void *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m))) #define Z7_container_of(ptr, type, m) \
((type *)(void *)((char *)(void *) \
(1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m)))
#define Z7_container_of_CONST(ptr, type, m) \
((const type *)(const void *)((const char *)(const void *) \
(1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m)))
/*
#define Z7_container_of_NON_CONST_FROM_CONST(ptr, type, m) \
((type *)(void *)(const void *)((const char *)(const void *) \
(1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m)))
*/
#endif #endif
#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr)) #define Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr))
// #define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
#define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_container_of(ptr, type, m)
// #define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_container_of_NON_CONST_FROM_CONST(ptr, type, m)
#define Z7_CONTAINER_FROM_VTBL_CONST(ptr, type, m) Z7_container_of_CONST(ptr, type, m)
#define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
/* /*
#define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) #define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL(ptr, type, m)
*/ */
#define CONTAINER_FROM_VTBL(ptr, type, m) MY_container_of(ptr, type, m) #if defined (__clang__) || defined(__GNUC__)
#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wcast-qual\"")
#define Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL \
_Pragma("GCC diagnostic pop")
#else
#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL
#define Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL
#endif
#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) #define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(ptr, type, m, p) \
/* Z7_DIAGNOSCTIC_IGNORE_BEGIN_CAST_QUAL \
#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL(ptr, type, m) type *p = Z7_CONTAINER_FROM_VTBL(ptr, type, m); \
*/ Z7_DIAGNOSCTIC_IGNORE_END_CAST_QUAL
#define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(type) \
Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(pp, type, vt, p)
#define MY_memset_0_ARRAY(a) memset((a), 0, sizeof(a)) // #define ZIP7_DECLARE_HANDLE(name) typedef void *name;
#define Z7_DECLARE_HANDLE(name) struct name##_dummy{int unused;}; typedef struct name##_dummy *name;
#define Z7_memset_0_ARRAY(a) memset((a), 0, sizeof(a))
#ifndef Z7_ARRAY_SIZE
#define Z7_ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
#endif
#ifdef _WIN32 #ifdef _WIN32
@ -527,3 +587,11 @@ struct ISzAlloc
EXTERN_C_END EXTERN_C_END
#endif #endif
/*
#ifndef Z7_ST
#ifdef _7ZIP_ST
#define Z7_ST
#endif
#endif
*/

View file

@ -1,7 +1,7 @@
#define MY_VER_MAJOR 22 #define MY_VER_MAJOR 23
#define MY_VER_MINOR 01 #define MY_VER_MINOR 01
#define MY_VER_BUILD 0 #define MY_VER_BUILD 0
#define MY_VERSION_NUMBERS "22.01" #define MY_VERSION_NUMBERS "23.01"
#define MY_VERSION MY_VERSION_NUMBERS #define MY_VERSION MY_VERSION_NUMBERS
#ifdef MY_CPU_NAME #ifdef MY_CPU_NAME
@ -10,12 +10,12 @@
#define MY_VERSION_CPU MY_VERSION #define MY_VERSION_CPU MY_VERSION
#endif #endif
#define MY_DATE "2022-07-15" #define MY_DATE "2023-06-20"
#undef MY_COPYRIGHT #undef MY_COPYRIGHT
#undef MY_VERSION_COPYRIGHT_DATE #undef MY_VERSION_COPYRIGHT_DATE
#define MY_AUTHOR_NAME "Igor Pavlov" #define MY_AUTHOR_NAME "Igor Pavlov"
#define MY_COPYRIGHT_PD "Igor Pavlov : Public domain" #define MY_COPYRIGHT_PD "Igor Pavlov : Public domain"
#define MY_COPYRIGHT_CR "Copyright (c) 1999-2022 Igor Pavlov" #define MY_COPYRIGHT_CR "Copyright (c) 1999-2023 Igor Pavlov"
#ifdef USE_COPYRIGHT_CR #ifdef USE_COPYRIGHT_CR
#define MY_COPYRIGHT MY_COPYRIGHT_CR #define MY_COPYRIGHT MY_COPYRIGHT_CR

View file

@ -1,55 +1,55 @@
#define MY_VS_FFI_FILEFLAGSMASK 0x0000003FL #define MY_VS_FFI_FILEFLAGSMASK 0x0000003FL
#define MY_VOS_NT_WINDOWS32 0x00040004L #define MY_VOS_NT_WINDOWS32 0x00040004L
#define MY_VOS_CE_WINDOWS32 0x00050004L #define MY_VOS_CE_WINDOWS32 0x00050004L
#define MY_VFT_APP 0x00000001L #define MY_VFT_APP 0x00000001L
#define MY_VFT_DLL 0x00000002L #define MY_VFT_DLL 0x00000002L
// #include <WinVer.h> // #include <WinVer.h>
#ifndef MY_VERSION #ifndef MY_VERSION
#include "7zVersion.h" #include "7zVersion.h"
#endif #endif
#define MY_VER MY_VER_MAJOR,MY_VER_MINOR,MY_VER_BUILD,0 #define MY_VER MY_VER_MAJOR,MY_VER_MINOR,MY_VER_BUILD,0
#ifdef DEBUG #ifdef DEBUG
#define DBG_FL VS_FF_DEBUG #define DBG_FL VS_FF_DEBUG
#else #else
#define DBG_FL 0 #define DBG_FL 0
#endif #endif
#define MY_VERSION_INFO(fileType, descr, intName, origName) \ #define MY_VERSION_INFO(fileType, descr, intName, origName) \
LANGUAGE 9, 1 \ LANGUAGE 9, 1 \
1 VERSIONINFO \ 1 VERSIONINFO \
FILEVERSION MY_VER \ FILEVERSION MY_VER \
PRODUCTVERSION MY_VER \ PRODUCTVERSION MY_VER \
FILEFLAGSMASK MY_VS_FFI_FILEFLAGSMASK \ FILEFLAGSMASK MY_VS_FFI_FILEFLAGSMASK \
FILEFLAGS DBG_FL \ FILEFLAGS DBG_FL \
FILEOS MY_VOS_NT_WINDOWS32 \ FILEOS MY_VOS_NT_WINDOWS32 \
FILETYPE fileType \ FILETYPE fileType \
FILESUBTYPE 0x0L \ FILESUBTYPE 0x0L \
BEGIN \ BEGIN \
BLOCK "StringFileInfo" \ BLOCK "StringFileInfo" \
BEGIN \ BEGIN \
BLOCK "040904b0" \ BLOCK "040904b0" \
BEGIN \ BEGIN \
VALUE "CompanyName", "Igor Pavlov" \ VALUE "CompanyName", "Igor Pavlov" \
VALUE "FileDescription", descr \ VALUE "FileDescription", descr \
VALUE "FileVersion", MY_VERSION \ VALUE "FileVersion", MY_VERSION \
VALUE "InternalName", intName \ VALUE "InternalName", intName \
VALUE "LegalCopyright", MY_COPYRIGHT \ VALUE "LegalCopyright", MY_COPYRIGHT \
VALUE "OriginalFilename", origName \ VALUE "OriginalFilename", origName \
VALUE "ProductName", "7-Zip" \ VALUE "ProductName", "7-Zip" \
VALUE "ProductVersion", MY_VERSION \ VALUE "ProductVersion", MY_VERSION \
END \ END \
END \ END \
BLOCK "VarFileInfo" \ BLOCK "VarFileInfo" \
BEGIN \ BEGIN \
VALUE "Translation", 0x409, 1200 \ VALUE "Translation", 0x409, 1200 \
END \ END \
END END
#define MY_VERSION_INFO_APP(descr, intName) MY_VERSION_INFO(MY_VFT_APP, descr, intName, intName ".exe") #define MY_VERSION_INFO_APP(descr, intName) MY_VERSION_INFO(MY_VFT_APP, descr, intName, intName ".exe")
#define MY_VERSION_INFO_DLL(descr, intName) MY_VERSION_INFO(MY_VFT_DLL, descr, intName, intName ".dll") #define MY_VERSION_INFO_DLL(descr, intName) MY_VERSION_INFO(MY_VFT_DLL, descr, intName, intName ".dll")

101
3rdparty/7z/src/7zWindows.h vendored Normal file
View file

@ -0,0 +1,101 @@
/* 7zWindows.h -- StdAfx
2023-04-02 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_7Z_WINDOWS_H
#define ZIP7_INC_7Z_WINDOWS_H
#ifdef _WIN32
#if defined(__clang__)
# pragma clang diagnostic push
#endif
#if defined(_MSC_VER)
#pragma warning(push)
#pragma warning(disable : 4668) // '_WIN32_WINNT' is not defined as a preprocessor macro, replacing with '0' for '#if/#elif'
#if _MSC_VER == 1900
// for old kit10 versions
// #pragma warning(disable : 4255) // winuser.h(13979): warning C4255: 'GetThreadDpiAwarenessContext':
#endif
// win10 Windows Kit:
#endif // _MSC_VER
#if defined(_MSC_VER) && _MSC_VER <= 1200 && !defined(_WIN64)
// for msvc6 without sdk2003
#define RPC_NO_WINDOWS_H
#endif
#if defined(__MINGW32__) || defined(__MINGW64__)
// #if defined(__GNUC__) && !defined(__clang__)
#include <windows.h>
#else
#include <Windows.h>
#endif
// #include <basetsd.h>
// #include <wtypes.h>
// but if precompiled with clang-cl then we need
// #include <windows.h>
#if defined(_MSC_VER)
#pragma warning(pop)
#endif
#if defined(__clang__)
# pragma clang diagnostic pop
#endif
#if defined(_MSC_VER) && _MSC_VER <= 1200 && !defined(_WIN64)
#ifndef _W64
typedef long LONG_PTR, *PLONG_PTR;
typedef unsigned long ULONG_PTR, *PULONG_PTR;
typedef ULONG_PTR DWORD_PTR, *PDWORD_PTR;
#define Z7_OLD_WIN_SDK
#endif // _W64
#endif // _MSC_VER == 1200
#ifdef Z7_OLD_WIN_SDK
#ifndef INVALID_FILE_ATTRIBUTES
#define INVALID_FILE_ATTRIBUTES ((DWORD)-1)
#endif
#ifndef INVALID_SET_FILE_POINTER
#define INVALID_SET_FILE_POINTER ((DWORD)-1)
#endif
#ifndef FILE_SPECIAL_ACCESS
#define FILE_SPECIAL_ACCESS (FILE_ANY_ACCESS)
#endif
// ShlObj.h:
// #define BIF_NEWDIALOGSTYLE 0x0040
#pragma warning(disable : 4201)
// #pragma warning(disable : 4115)
#undef VARIANT_TRUE
#define VARIANT_TRUE ((VARIANT_BOOL)-1)
#endif
#endif // Z7_OLD_WIN_SDK
#ifdef UNDER_CE
#undef VARIANT_TRUE
#define VARIANT_TRUE ((VARIANT_BOOL)-1)
#endif
#if defined(_MSC_VER)
#if _MSC_VER >= 1400 && _MSC_VER <= 1600
// BaseTsd.h(148) : 'HandleToULong' : unreferenced inline function has been removed
// string.h
// #pragma warning(disable : 4514)
#endif
#endif
/* #include "7zTypes.h" */
#endif

108
3rdparty/7z/src/Aes.c vendored
View file

@ -1,5 +1,5 @@
/* Aes.c -- AES encryption / decryption /* Aes.c -- AES encryption / decryption
2021-05-13 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -7,7 +7,7 @@
#include "Aes.h" #include "Aes.h"
AES_CODE_FUNC g_AesCbc_Decode; AES_CODE_FUNC g_AesCbc_Decode;
#ifndef _SFX #ifndef Z7_SFX
AES_CODE_FUNC g_AesCbc_Encode; AES_CODE_FUNC g_AesCbc_Encode;
AES_CODE_FUNC g_AesCtr_Code; AES_CODE_FUNC g_AesCtr_Code;
UInt32 g_Aes_SupportedFunctions_Flags; UInt32 g_Aes_SupportedFunctions_Flags;
@ -51,7 +51,7 @@ static Byte InvS[256];
#define DD(x) (D + (x << 8)) #define DD(x) (D + (x << 8))
// #define _SHOW_AES_STATUS // #define Z7_SHOW_AES_STATUS
#ifdef MY_CPU_X86_OR_AMD64 #ifdef MY_CPU_X86_OR_AMD64
#define USE_HW_AES #define USE_HW_AES
@ -72,11 +72,11 @@ static Byte InvS[256];
#endif #endif
#ifdef USE_HW_AES #ifdef USE_HW_AES
#ifdef _SHOW_AES_STATUS #ifdef Z7_SHOW_AES_STATUS
#include <stdio.h> #include <stdio.h>
#define _PRF(x) x #define PRF(x) x
#else #else
#define _PRF(x) #define PRF(x)
#endif #endif
#endif #endif
@ -90,23 +90,23 @@ void AesGenTables(void)
for (i = 0; i < 256; i++) for (i = 0; i < 256; i++)
{ {
{ {
UInt32 a1 = Sbox[i]; const UInt32 a1 = Sbox[i];
UInt32 a2 = xtime(a1); const UInt32 a2 = xtime(a1);
UInt32 a3 = a2 ^ a1; const UInt32 a3 = a2 ^ a1;
TT(0)[i] = Ui32(a2, a1, a1, a3); TT(0)[i] = Ui32(a2, a1, a1, a3);
TT(1)[i] = Ui32(a3, a2, a1, a1); TT(1)[i] = Ui32(a3, a2, a1, a1);
TT(2)[i] = Ui32(a1, a3, a2, a1); TT(2)[i] = Ui32(a1, a3, a2, a1);
TT(3)[i] = Ui32(a1, a1, a3, a2); TT(3)[i] = Ui32(a1, a1, a3, a2);
} }
{ {
UInt32 a1 = InvS[i]; const UInt32 a1 = InvS[i];
UInt32 a2 = xtime(a1); const UInt32 a2 = xtime(a1);
UInt32 a4 = xtime(a2); const UInt32 a4 = xtime(a2);
UInt32 a8 = xtime(a4); const UInt32 a8 = xtime(a4);
UInt32 a9 = a8 ^ a1; const UInt32 a9 = a8 ^ a1;
UInt32 aB = a8 ^ a2 ^ a1; const UInt32 aB = a8 ^ a2 ^ a1;
UInt32 aD = a8 ^ a4 ^ a1; const UInt32 aD = a8 ^ a4 ^ a1;
UInt32 aE = a8 ^ a4 ^ a2; const UInt32 aE = a8 ^ a4 ^ a2;
DD(0)[i] = Ui32(aE, a9, aD, aB); DD(0)[i] = Ui32(aE, a9, aD, aB);
DD(1)[i] = Ui32(aB, aE, a9, aD); DD(1)[i] = Ui32(aB, aE, a9, aD);
DD(2)[i] = Ui32(aD, aB, aE, a9); DD(2)[i] = Ui32(aD, aB, aE, a9);
@ -116,7 +116,7 @@ void AesGenTables(void)
{ {
AES_CODE_FUNC d = AesCbc_Decode; AES_CODE_FUNC d = AesCbc_Decode;
#ifndef _SFX #ifndef Z7_SFX
AES_CODE_FUNC e = AesCbc_Encode; AES_CODE_FUNC e = AesCbc_Encode;
AES_CODE_FUNC c = AesCtr_Code; AES_CODE_FUNC c = AesCtr_Code;
UInt32 flags = 0; UInt32 flags = 0;
@ -126,10 +126,10 @@ void AesGenTables(void)
if (CPU_IsSupported_AES()) if (CPU_IsSupported_AES())
{ {
// #pragma message ("AES HW") // #pragma message ("AES HW")
_PRF(printf("\n===AES HW\n")); PRF(printf("\n===AES HW\n"));
d = AesCbc_Decode_HW; d = AesCbc_Decode_HW;
#ifndef _SFX #ifndef Z7_SFX
e = AesCbc_Encode_HW; e = AesCbc_Encode_HW;
c = AesCtr_Code_HW; c = AesCtr_Code_HW;
flags = k_Aes_SupportedFunctions_HW; flags = k_Aes_SupportedFunctions_HW;
@ -138,9 +138,9 @@ void AesGenTables(void)
#ifdef MY_CPU_X86_OR_AMD64 #ifdef MY_CPU_X86_OR_AMD64
if (CPU_IsSupported_VAES_AVX2()) if (CPU_IsSupported_VAES_AVX2())
{ {
_PRF(printf("\n===vaes avx2\n")); PRF(printf("\n===vaes avx2\n"));
d = AesCbc_Decode_HW_256; d = AesCbc_Decode_HW_256;
#ifndef _SFX #ifndef Z7_SFX
c = AesCtr_Code_HW_256; c = AesCtr_Code_HW_256;
flags |= k_Aes_SupportedFunctions_HW_256; flags |= k_Aes_SupportedFunctions_HW_256;
#endif #endif
@ -150,7 +150,7 @@ void AesGenTables(void)
#endif #endif
g_AesCbc_Decode = d; g_AesCbc_Decode = d;
#ifndef _SFX #ifndef Z7_SFX
g_AesCbc_Encode = e; g_AesCbc_Encode = e;
g_AesCtr_Code = c; g_AesCtr_Code = c;
g_Aes_SupportedFunctions_Flags = flags; g_Aes_SupportedFunctions_Flags = flags;
@ -194,7 +194,7 @@ void AesGenTables(void)
#define FD(i, x) InvS[gb(x, m[(i - x) & 3])] #define FD(i, x) InvS[gb(x, m[(i - x) & 3])]
#define FD4(i) dest[i] = Ui32(FD(i, 0), FD(i, 1), FD(i, 2), FD(i, 3)) ^ w[i]; #define FD4(i) dest[i] = Ui32(FD(i, 0), FD(i, 1), FD(i, 2), FD(i, 3)) ^ w[i];
void MY_FAST_CALL Aes_SetKey_Enc(UInt32 *w, const Byte *key, unsigned keySize) void Z7_FASTCALL Aes_SetKey_Enc(UInt32 *w, const Byte *key, unsigned keySize)
{ {
unsigned i, m; unsigned i, m;
const UInt32 *wLim; const UInt32 *wLim;
@ -230,7 +230,7 @@ void MY_FAST_CALL Aes_SetKey_Enc(UInt32 *w, const Byte *key, unsigned keySize)
while (++w != wLim); while (++w != wLim);
} }
void MY_FAST_CALL Aes_SetKey_Dec(UInt32 *w, const Byte *key, unsigned keySize) void Z7_FASTCALL Aes_SetKey_Dec(UInt32 *w, const Byte *key, unsigned keySize)
{ {
unsigned i, num; unsigned i, num;
Aes_SetKey_Enc(w, key, keySize); Aes_SetKey_Enc(w, key, keySize);
@ -251,7 +251,7 @@ void MY_FAST_CALL Aes_SetKey_Dec(UInt32 *w, const Byte *key, unsigned keySize)
src and dest are pointers to 4 UInt32 words. src and dest are pointers to 4 UInt32 words.
src and dest can point to same block */ src and dest can point to same block */
// MY_FORCE_INLINE // Z7_FORCE_INLINE
static void Aes_Encode(const UInt32 *w, UInt32 *dest, const UInt32 *src) static void Aes_Encode(const UInt32 *w, UInt32 *dest, const UInt32 *src)
{ {
UInt32 s[4]; UInt32 s[4];
@ -265,17 +265,20 @@ static void Aes_Encode(const UInt32 *w, UInt32 *dest, const UInt32 *src)
w += 4; w += 4;
for (;;) for (;;)
{ {
HT16(m, s, 0); HT16(m, s, 0)
if (--numRounds2 == 0) if (--numRounds2 == 0)
break; break;
HT16(s, m, 4); HT16(s, m, 4)
w += 8; w += 8;
} }
w += 4; w += 4;
FT4(0); FT4(1); FT4(2); FT4(3); FT4(0)
FT4(1)
FT4(2)
FT4(3)
} }
MY_FORCE_INLINE Z7_FORCE_INLINE
static void Aes_Decode(const UInt32 *w, UInt32 *dest, const UInt32 *src) static void Aes_Decode(const UInt32 *w, UInt32 *dest, const UInt32 *src)
{ {
UInt32 s[4]; UInt32 s[4];
@ -289,12 +292,15 @@ static void Aes_Decode(const UInt32 *w, UInt32 *dest, const UInt32 *src)
for (;;) for (;;)
{ {
w -= 8; w -= 8;
HD16(m, s, 4); HD16(m, s, 4)
if (--numRounds2 == 0) if (--numRounds2 == 0)
break; break;
HD16(s, m, 0); HD16(s, m, 0)
} }
FD4(0); FD4(1); FD4(2); FD4(3); FD4(0)
FD4(1)
FD4(2)
FD4(3)
} }
void AesCbc_Init(UInt32 *p, const Byte *iv) void AesCbc_Init(UInt32 *p, const Byte *iv)
@ -304,7 +310,7 @@ void AesCbc_Init(UInt32 *p, const Byte *iv)
p[i] = GetUi32(iv + i * 4); p[i] = GetUi32(iv + i * 4);
} }
void MY_FAST_CALL AesCbc_Encode(UInt32 *p, Byte *data, size_t numBlocks) void Z7_FASTCALL AesCbc_Encode(UInt32 *p, Byte *data, size_t numBlocks)
{ {
for (; numBlocks != 0; numBlocks--, data += AES_BLOCK_SIZE) for (; numBlocks != 0; numBlocks--, data += AES_BLOCK_SIZE)
{ {
@ -315,14 +321,14 @@ void MY_FAST_CALL AesCbc_Encode(UInt32 *p, Byte *data, size_t numBlocks)
Aes_Encode(p + 4, p, p); Aes_Encode(p + 4, p, p);
SetUi32(data, p[0]); SetUi32(data, p[0])
SetUi32(data + 4, p[1]); SetUi32(data + 4, p[1])
SetUi32(data + 8, p[2]); SetUi32(data + 8, p[2])
SetUi32(data + 12, p[3]); SetUi32(data + 12, p[3])
} }
} }
void MY_FAST_CALL AesCbc_Decode(UInt32 *p, Byte *data, size_t numBlocks) void Z7_FASTCALL AesCbc_Decode(UInt32 *p, Byte *data, size_t numBlocks)
{ {
UInt32 in[4], out[4]; UInt32 in[4], out[4];
for (; numBlocks != 0; numBlocks--, data += AES_BLOCK_SIZE) for (; numBlocks != 0; numBlocks--, data += AES_BLOCK_SIZE)
@ -334,10 +340,10 @@ void MY_FAST_CALL AesCbc_Decode(UInt32 *p, Byte *data, size_t numBlocks)
Aes_Decode(p + 4, out, in); Aes_Decode(p + 4, out, in);
SetUi32(data, p[0] ^ out[0]); SetUi32(data, p[0] ^ out[0])
SetUi32(data + 4, p[1] ^ out[1]); SetUi32(data + 4, p[1] ^ out[1])
SetUi32(data + 8, p[2] ^ out[2]); SetUi32(data + 8, p[2] ^ out[2])
SetUi32(data + 12, p[3] ^ out[3]); SetUi32(data + 12, p[3] ^ out[3])
p[0] = in[0]; p[0] = in[0];
p[1] = in[1]; p[1] = in[1];
@ -346,7 +352,7 @@ void MY_FAST_CALL AesCbc_Decode(UInt32 *p, Byte *data, size_t numBlocks)
} }
} }
void MY_FAST_CALL AesCtr_Code(UInt32 *p, Byte *data, size_t numBlocks) void Z7_FASTCALL AesCtr_Code(UInt32 *p, Byte *data, size_t numBlocks)
{ {
for (; numBlocks != 0; numBlocks--) for (; numBlocks != 0; numBlocks--)
{ {
@ -360,7 +366,7 @@ void MY_FAST_CALL AesCtr_Code(UInt32 *p, Byte *data, size_t numBlocks)
for (i = 0; i < 4; i++, data += 4) for (i = 0; i < 4; i++, data += 4)
{ {
UInt32 t = temp[i]; const UInt32 t = temp[i];
#ifdef MY_CPU_LE_UNALIGN #ifdef MY_CPU_LE_UNALIGN
*((UInt32 *)(void *)data) ^= t; *((UInt32 *)(void *)data) ^= t;
@ -373,3 +379,15 @@ void MY_FAST_CALL AesCtr_Code(UInt32 *p, Byte *data, size_t numBlocks)
} }
} }
} }
#undef xtime
#undef Ui32
#undef gb0
#undef gb1
#undef gb2
#undef gb3
#undef gb
#undef TT
#undef DD
#undef USE_HW_AES
#undef PRF

36
3rdparty/7z/src/Aes.h vendored
View file

@ -1,8 +1,8 @@
/* Aes.h -- AES encryption / decryption /* Aes.h -- AES encryption / decryption
2018-04-28 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#ifndef __AES_H #ifndef ZIP7_INC_AES_H
#define __AES_H #define ZIP7_INC_AES_H
#include "7zTypes.h" #include "7zTypes.h"
@ -20,19 +20,19 @@ void AesGenTables(void);
/* aes - 16-byte aligned pointer to keyMode+roundKeys sequence */ /* aes - 16-byte aligned pointer to keyMode+roundKeys sequence */
/* keySize = 16 or 24 or 32 (bytes) */ /* keySize = 16 or 24 or 32 (bytes) */
typedef void (MY_FAST_CALL *AES_SET_KEY_FUNC)(UInt32 *aes, const Byte *key, unsigned keySize); typedef void (Z7_FASTCALL *AES_SET_KEY_FUNC)(UInt32 *aes, const Byte *key, unsigned keySize);
void MY_FAST_CALL Aes_SetKey_Enc(UInt32 *aes, const Byte *key, unsigned keySize); void Z7_FASTCALL Aes_SetKey_Enc(UInt32 *aes, const Byte *key, unsigned keySize);
void MY_FAST_CALL Aes_SetKey_Dec(UInt32 *aes, const Byte *key, unsigned keySize); void Z7_FASTCALL Aes_SetKey_Dec(UInt32 *aes, const Byte *key, unsigned keySize);
/* ivAes - 16-byte aligned pointer to iv+keyMode+roundKeys sequence: UInt32[AES_NUM_IVMRK_WORDS] */ /* ivAes - 16-byte aligned pointer to iv+keyMode+roundKeys sequence: UInt32[AES_NUM_IVMRK_WORDS] */
void AesCbc_Init(UInt32 *ivAes, const Byte *iv); /* iv size is AES_BLOCK_SIZE */ void AesCbc_Init(UInt32 *ivAes, const Byte *iv); /* iv size is AES_BLOCK_SIZE */
/* data - 16-byte aligned pointer to data */ /* data - 16-byte aligned pointer to data */
/* numBlocks - the number of 16-byte blocks in data array */ /* numBlocks - the number of 16-byte blocks in data array */
typedef void (MY_FAST_CALL *AES_CODE_FUNC)(UInt32 *ivAes, Byte *data, size_t numBlocks); typedef void (Z7_FASTCALL *AES_CODE_FUNC)(UInt32 *ivAes, Byte *data, size_t numBlocks);
extern AES_CODE_FUNC g_AesCbc_Decode; extern AES_CODE_FUNC g_AesCbc_Decode;
#ifndef _SFX #ifndef Z7_SFX
extern AES_CODE_FUNC g_AesCbc_Encode; extern AES_CODE_FUNC g_AesCbc_Encode;
extern AES_CODE_FUNC g_AesCtr_Code; extern AES_CODE_FUNC g_AesCtr_Code;
#define k_Aes_SupportedFunctions_HW (1 << 2) #define k_Aes_SupportedFunctions_HW (1 << 2)
@ -41,19 +41,19 @@ extern UInt32 g_Aes_SupportedFunctions_Flags;
#endif #endif
#define DECLARE__AES_CODE_FUNC(funcName) \ #define Z7_DECLARE_AES_CODE_FUNC(funcName) \
void MY_FAST_CALL funcName(UInt32 *ivAes, Byte *data, size_t numBlocks); void Z7_FASTCALL funcName(UInt32 *ivAes, Byte *data, size_t numBlocks);
DECLARE__AES_CODE_FUNC (AesCbc_Encode) Z7_DECLARE_AES_CODE_FUNC (AesCbc_Encode)
DECLARE__AES_CODE_FUNC (AesCbc_Decode) Z7_DECLARE_AES_CODE_FUNC (AesCbc_Decode)
DECLARE__AES_CODE_FUNC (AesCtr_Code) Z7_DECLARE_AES_CODE_FUNC (AesCtr_Code)
DECLARE__AES_CODE_FUNC (AesCbc_Encode_HW) Z7_DECLARE_AES_CODE_FUNC (AesCbc_Encode_HW)
DECLARE__AES_CODE_FUNC (AesCbc_Decode_HW) Z7_DECLARE_AES_CODE_FUNC (AesCbc_Decode_HW)
DECLARE__AES_CODE_FUNC (AesCtr_Code_HW) Z7_DECLARE_AES_CODE_FUNC (AesCtr_Code_HW)
DECLARE__AES_CODE_FUNC (AesCbc_Decode_HW_256) Z7_DECLARE_AES_CODE_FUNC (AesCbc_Decode_HW_256)
DECLARE__AES_CODE_FUNC (AesCtr_Code_HW_256) Z7_DECLARE_AES_CODE_FUNC (AesCtr_Code_HW_256)
EXTERN_C_END EXTERN_C_END

View file

@ -1,39 +1,33 @@
/* AesOpt.c -- AES optimized code for x86 AES hardware instructions /* AesOpt.c -- AES optimized code for x86 AES hardware instructions
2021-04-01 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
#include "Aes.h"
#include "CpuArch.h" #include "CpuArch.h"
#ifdef MY_CPU_X86_OR_AMD64 #ifdef MY_CPU_X86_OR_AMD64
#if defined(__clang__) #if defined(__INTEL_COMPILER)
#if __clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 8)
#define USE_INTEL_AES
#define ATTRIB_AES __attribute__((__target__("aes")))
#if (__clang_major__ >= 8)
#define USE_INTEL_VAES
#define ATTRIB_VAES __attribute__((__target__("aes,vaes,avx2")))
#endif
#endif
#elif defined(__GNUC__)
#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)
#define USE_INTEL_AES
#ifndef __AES__
#define ATTRIB_AES __attribute__((__target__("aes")))
#endif
#if (__GNUC__ >= 8)
#define USE_INTEL_VAES
#define ATTRIB_VAES __attribute__((__target__("aes,vaes,avx2")))
#endif
#endif
#elif defined(__INTEL_COMPILER)
#if (__INTEL_COMPILER >= 1110) #if (__INTEL_COMPILER >= 1110)
#define USE_INTEL_AES #define USE_INTEL_AES
#if (__INTEL_COMPILER >= 1900) #if (__INTEL_COMPILER >= 1900)
#define USE_INTEL_VAES #define USE_INTEL_VAES
#endif #endif
#endif #endif
#elif defined(__clang__) && (__clang_major__ > 3 || __clang_major__ == 3 && __clang_minor__ >= 8) \
|| defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 4)
#define USE_INTEL_AES
#if !defined(__AES__)
#define ATTRIB_AES __attribute__((__target__("aes")))
#endif
#if defined(__clang__) && (__clang_major__ >= 8) \
|| defined(__GNUC__) && (__GNUC__ >= 8)
#define USE_INTEL_VAES
#if !defined(__AES__) || !defined(__VAES__) || !defined(__AVX__) || !defined(__AVX2__)
#define ATTRIB_VAES __attribute__((__target__("aes,vaes,avx,avx2")))
#endif
#endif
#elif defined(_MSC_VER) #elif defined(_MSC_VER)
#if (_MSC_VER > 1500) || (_MSC_FULL_VER >= 150030729) #if (_MSC_VER > 1500) || (_MSC_FULL_VER >= 150030729)
#define USE_INTEL_AES #define USE_INTEL_AES
@ -56,12 +50,15 @@
#include <wmmintrin.h> #include <wmmintrin.h>
#ifndef USE_INTEL_VAES #ifndef USE_INTEL_VAES
#define AES_TYPE_keys __m128i #define AES_TYPE_keys UInt32
#define AES_TYPE_data __m128i #define AES_TYPE_data Byte
// #define AES_TYPE_keys __m128i
// #define AES_TYPE_data __m128i
#endif #endif
#define AES_FUNC_START(name) \ #define AES_FUNC_START(name) \
void MY_FAST_CALL name(__m128i *p, __m128i *data, size_t numBlocks) void Z7_FASTCALL name(UInt32 *ivAes, Byte *data8, size_t numBlocks)
// void Z7_FASTCALL name(__m128i *p, __m128i *data, size_t numBlocks)
#define AES_FUNC_START2(name) \ #define AES_FUNC_START2(name) \
AES_FUNC_START (name); \ AES_FUNC_START (name); \
@ -69,14 +66,16 @@ ATTRIB_AES \
AES_FUNC_START (name) AES_FUNC_START (name)
#define MM_OP(op, dest, src) dest = op(dest, src); #define MM_OP(op, dest, src) dest = op(dest, src);
#define MM_OP_m(op, src) MM_OP(op, m, src); #define MM_OP_m(op, src) MM_OP(op, m, src)
#define MM_XOR( dest, src) MM_OP(_mm_xor_si128, dest, src); #define MM_XOR( dest, src) MM_OP(_mm_xor_si128, dest, src)
#define AVX_XOR(dest, src) MM_OP(_mm256_xor_si256, dest, src); #define AVX_XOR(dest, src) MM_OP(_mm256_xor_si256, dest, src)
AES_FUNC_START2 (AesCbc_Encode_HW) AES_FUNC_START2 (AesCbc_Encode_HW)
{ {
__m128i *p = (__m128i *)(void *)ivAes;
__m128i *data = (__m128i *)(void *)data8;
__m128i m = *p; __m128i m = *p;
const __m128i k0 = p[2]; const __m128i k0 = p[2];
const __m128i k1 = p[3]; const __m128i k1 = p[3];
@ -86,17 +85,17 @@ AES_FUNC_START2 (AesCbc_Encode_HW)
UInt32 r = numRounds2; UInt32 r = numRounds2;
const __m128i *w = p + 4; const __m128i *w = p + 4;
__m128i temp = *data; __m128i temp = *data;
MM_XOR (temp, k0); MM_XOR (temp, k0)
MM_XOR (m, temp); MM_XOR (m, temp)
MM_OP_m (_mm_aesenc_si128, k1); MM_OP_m (_mm_aesenc_si128, k1)
do do
{ {
MM_OP_m (_mm_aesenc_si128, w[0]); MM_OP_m (_mm_aesenc_si128, w[0])
MM_OP_m (_mm_aesenc_si128, w[1]); MM_OP_m (_mm_aesenc_si128, w[1])
w += 2; w += 2;
} }
while (--r); while (--r);
MM_OP_m (_mm_aesenclast_si128, w[0]); MM_OP_m (_mm_aesenclast_si128, w[0])
*data = m; *data = m;
} }
*p = m; *p = m;
@ -104,14 +103,14 @@ AES_FUNC_START2 (AesCbc_Encode_HW)
#define WOP_1(op) #define WOP_1(op)
#define WOP_2(op) WOP_1 (op) op (m1, 1); #define WOP_2(op) WOP_1 (op) op (m1, 1)
#define WOP_3(op) WOP_2 (op) op (m2, 2); #define WOP_3(op) WOP_2 (op) op (m2, 2)
#define WOP_4(op) WOP_3 (op) op (m3, 3); #define WOP_4(op) WOP_3 (op) op (m3, 3)
#ifdef MY_CPU_AMD64 #ifdef MY_CPU_AMD64
#define WOP_5(op) WOP_4 (op) op (m4, 4); #define WOP_5(op) WOP_4 (op) op (m4, 4)
#define WOP_6(op) WOP_5 (op) op (m5, 5); #define WOP_6(op) WOP_5 (op) op (m5, 5)
#define WOP_7(op) WOP_6 (op) op (m6, 6); #define WOP_7(op) WOP_6 (op) op (m6, 6)
#define WOP_8(op) WOP_7 (op) op (m7, 7); #define WOP_8(op) WOP_7 (op) op (m7, 7)
#endif #endif
/* /*
#define WOP_9(op) WOP_8 (op) op (m8, 8); #define WOP_9(op) WOP_8 (op) op (m8, 8);
@ -130,20 +129,20 @@ AES_FUNC_START2 (AesCbc_Encode_HW)
#define WOP_M1 WOP_4 #define WOP_M1 WOP_4
#endif #endif
#define WOP(op) op (m0, 0); WOP_M1(op) #define WOP(op) op (m0, 0) WOP_M1(op)
#define DECLARE_VAR(reg, ii) __m128i reg #define DECLARE_VAR(reg, ii) __m128i reg;
#define LOAD_data( reg, ii) reg = data[ii]; #define LOAD_data( reg, ii) reg = data[ii];
#define STORE_data( reg, ii) data[ii] = reg; #define STORE_data( reg, ii) data[ii] = reg;
#if (NUM_WAYS > 1) #if (NUM_WAYS > 1)
#define XOR_data_M1(reg, ii) MM_XOR (reg, data[ii- 1]); #define XOR_data_M1(reg, ii) MM_XOR (reg, data[ii- 1])
#endif #endif
#define AVX__DECLARE_VAR(reg, ii) __m256i reg #define AVX_DECLARE_VAR(reg, ii) __m256i reg;
#define AVX__LOAD_data( reg, ii) reg = ((const __m256i *)(const void *)data)[ii]; #define AVX_LOAD_data( reg, ii) reg = ((const __m256i *)(const void *)data)[ii];
#define AVX__STORE_data( reg, ii) ((__m256i *)(void *)data)[ii] = reg; #define AVX_STORE_data( reg, ii) ((__m256i *)(void *)data)[ii] = reg;
#define AVX__XOR_data_M1(reg, ii) AVX_XOR (reg, (((const __m256i *)(const void *)(data - 1))[ii])); #define AVX_XOR_data_M1(reg, ii) AVX_XOR (reg, (((const __m256i *)(const void *)(data - 1))[ii]))
#define MM_OP_key(op, reg) MM_OP(op, reg, key); #define MM_OP_key(op, reg) MM_OP(op, reg, key);
@ -154,23 +153,23 @@ AES_FUNC_START2 (AesCbc_Encode_HW)
#define AES_XOR( reg, ii) MM_OP_key (_mm_xor_si128, reg) #define AES_XOR( reg, ii) MM_OP_key (_mm_xor_si128, reg)
#define AVX__AES_DEC( reg, ii) MM_OP_key (_mm256_aesdec_epi128, reg) #define AVX_AES_DEC( reg, ii) MM_OP_key (_mm256_aesdec_epi128, reg)
#define AVX__AES_DEC_LAST( reg, ii) MM_OP_key (_mm256_aesdeclast_epi128, reg) #define AVX_AES_DEC_LAST( reg, ii) MM_OP_key (_mm256_aesdeclast_epi128, reg)
#define AVX__AES_ENC( reg, ii) MM_OP_key (_mm256_aesenc_epi128, reg) #define AVX_AES_ENC( reg, ii) MM_OP_key (_mm256_aesenc_epi128, reg)
#define AVX__AES_ENC_LAST( reg, ii) MM_OP_key (_mm256_aesenclast_epi128, reg) #define AVX_AES_ENC_LAST( reg, ii) MM_OP_key (_mm256_aesenclast_epi128, reg)
#define AVX__AES_XOR( reg, ii) MM_OP_key (_mm256_xor_si256, reg) #define AVX_AES_XOR( reg, ii) MM_OP_key (_mm256_xor_si256, reg)
#define CTR_START(reg, ii) MM_OP (_mm_add_epi64, ctr, one); reg = ctr; #define CTR_START(reg, ii) MM_OP (_mm_add_epi64, ctr, one) reg = ctr;
#define CTR_END( reg, ii) MM_XOR (data[ii], reg); #define CTR_END( reg, ii) MM_XOR (data[ii], reg)
#define AVX__CTR_START(reg, ii) MM_OP (_mm256_add_epi64, ctr2, two); reg = _mm256_xor_si256(ctr2, key); #define AVX_CTR_START(reg, ii) MM_OP (_mm256_add_epi64, ctr2, two) reg = _mm256_xor_si256(ctr2, key);
#define AVX__CTR_END( reg, ii) AVX_XOR (((__m256i *)(void *)data)[ii], reg); #define AVX_CTR_END( reg, ii) AVX_XOR (((__m256i *)(void *)data)[ii], reg)
#define WOP_KEY(op, n) { \ #define WOP_KEY(op, n) { \
const __m128i key = w[n]; \ const __m128i key = w[n]; \
WOP(op); } WOP(op); }
#define AVX__WOP_KEY(op, n) { \ #define AVX_WOP_KEY(op, n) { \
const __m256i key = w[n]; \ const __m256i key = w[n]; \
WOP(op); } WOP(op); }
@ -218,6 +217,8 @@ AES_FUNC_START2 (AesCbc_Encode_HW)
AES_FUNC_START2 (AesCbc_Decode_HW) AES_FUNC_START2 (AesCbc_Decode_HW)
{ {
__m128i *p = (__m128i *)(void *)ivAes;
__m128i *data = (__m128i *)(void *)data8;
__m128i iv = *p; __m128i iv = *p;
const __m128i *wStart = p + *(const UInt32 *)(p + 1) * 2 + 2 - 1; const __m128i *wStart = p + *(const UInt32 *)(p + 1) * 2 + 2 - 1;
const __m128i *dataEnd; const __m128i *dataEnd;
@ -228,7 +229,7 @@ AES_FUNC_START2 (AesCbc_Decode_HW)
const __m128i *w = wStart; const __m128i *w = wStart;
WOP (DECLARE_VAR) WOP (DECLARE_VAR)
WOP (LOAD_data); WOP (LOAD_data)
WOP_KEY (AES_XOR, 1) WOP_KEY (AES_XOR, 1)
do do
@ -239,10 +240,10 @@ AES_FUNC_START2 (AesCbc_Decode_HW)
while (w != p); while (w != p);
WOP_KEY (AES_DEC_LAST, 0) WOP_KEY (AES_DEC_LAST, 0)
MM_XOR (m0, iv); MM_XOR (m0, iv)
WOP_M1 (XOR_data_M1) WOP_M1 (XOR_data_M1)
iv = data[NUM_WAYS - 1]; iv = data[NUM_WAYS - 1];
WOP (STORE_data); WOP (STORE_data)
} }
WIDE_LOOP_END WIDE_LOOP_END
@ -252,15 +253,15 @@ AES_FUNC_START2 (AesCbc_Decode_HW)
__m128i m = _mm_xor_si128 (w[2], *data); __m128i m = _mm_xor_si128 (w[2], *data);
do do
{ {
MM_OP_m (_mm_aesdec_si128, w[1]); MM_OP_m (_mm_aesdec_si128, w[1])
MM_OP_m (_mm_aesdec_si128, w[0]); MM_OP_m (_mm_aesdec_si128, w[0])
w -= 2; w -= 2;
} }
while (w != p); while (w != p);
MM_OP_m (_mm_aesdec_si128, w[1]); MM_OP_m (_mm_aesdec_si128, w[1])
MM_OP_m (_mm_aesdeclast_si128, w[0]); MM_OP_m (_mm_aesdeclast_si128, w[0])
MM_XOR (m, iv); MM_XOR (m, iv)
iv = *data; iv = *data;
*data = m; *data = m;
} }
@ -271,6 +272,8 @@ AES_FUNC_START2 (AesCbc_Decode_HW)
AES_FUNC_START2 (AesCtr_Code_HW) AES_FUNC_START2 (AesCtr_Code_HW)
{ {
__m128i *p = (__m128i *)(void *)ivAes;
__m128i *data = (__m128i *)(void *)data8;
__m128i ctr = *p; __m128i ctr = *p;
UInt32 numRoundsMinus2 = *(const UInt32 *)(p + 1) * 2 - 1; UInt32 numRoundsMinus2 = *(const UInt32 *)(p + 1) * 2 - 1;
const __m128i *dataEnd; const __m128i *dataEnd;
@ -283,7 +286,7 @@ AES_FUNC_START2 (AesCtr_Code_HW)
const __m128i *w = p; const __m128i *w = p;
UInt32 r = numRoundsMinus2; UInt32 r = numRoundsMinus2;
WOP (DECLARE_VAR) WOP (DECLARE_VAR)
WOP (CTR_START); WOP (CTR_START)
WOP_KEY (AES_XOR, 0) WOP_KEY (AES_XOR, 0)
w += 1; w += 1;
do do
@ -294,7 +297,7 @@ AES_FUNC_START2 (AesCtr_Code_HW)
while (--r); while (--r);
WOP_KEY (AES_ENC_LAST, 0) WOP_KEY (AES_ENC_LAST, 0)
WOP (CTR_END); WOP (CTR_END)
} }
WIDE_LOOP_END WIDE_LOOP_END
@ -303,19 +306,19 @@ AES_FUNC_START2 (AesCtr_Code_HW)
UInt32 numRounds2 = *(const UInt32 *)(p - 2 + 1) - 1; UInt32 numRounds2 = *(const UInt32 *)(p - 2 + 1) - 1;
const __m128i *w = p; const __m128i *w = p;
__m128i m; __m128i m;
MM_OP (_mm_add_epi64, ctr, one); MM_OP (_mm_add_epi64, ctr, one)
m = _mm_xor_si128 (ctr, p[0]); m = _mm_xor_si128 (ctr, p[0]);
w += 1; w += 1;
do do
{ {
MM_OP_m (_mm_aesenc_si128, w[0]); MM_OP_m (_mm_aesenc_si128, w[0])
MM_OP_m (_mm_aesenc_si128, w[1]); MM_OP_m (_mm_aesenc_si128, w[1])
w += 2; w += 2;
} }
while (--numRounds2); while (--numRounds2);
MM_OP_m (_mm_aesenc_si128, w[0]); MM_OP_m (_mm_aesenc_si128, w[0])
MM_OP_m (_mm_aesenclast_si128, w[1]); MM_OP_m (_mm_aesenclast_si128, w[1])
MM_XOR (*data, m); MM_XOR (*data, m)
} }
p[-2] = ctr; p[-2] = ctr;
@ -325,17 +328,58 @@ AES_FUNC_START2 (AesCtr_Code_HW)
#ifdef USE_INTEL_VAES #ifdef USE_INTEL_VAES
/*
GCC before 2013-Jun:
<immintrin.h>:
#ifdef __AVX__
#include <avxintrin.h>
#endif
GCC after 2013-Jun:
<immintrin.h>:
#include <avxintrin.h>
CLANG 3.8+:
{
<immintrin.h>:
#if !defined(_MSC_VER) || defined(__AVX__)
#include <avxintrin.h>
#endif
if (the compiler is clang for Windows and if global arch is not set for __AVX__)
[ if (defined(_MSC_VER) && !defined(__AVX__)) ]
{
<immintrin.h> doesn't include <avxintrin.h>
and we have 2 ways to fix it:
1) we can define required __AVX__ before <immintrin.h>
or
2) we can include <avxintrin.h> after <immintrin.h>
}
}
If we include <avxintrin.h> manually for GCC/CLANG, it's
required that <immintrin.h> must be included before <avxintrin.h>.
*/
/*
#if defined(__clang__) && defined(_MSC_VER) #if defined(__clang__) && defined(_MSC_VER)
#define __SSE4_2__
#define __AES__
#define __AVX__ #define __AVX__
#define __AVX2__ #define __AVX2__
#define __VAES__ #define __VAES__
#define __AVX512F__
#define __AVX512VL__
#endif #endif
*/
#include <immintrin.h> #include <immintrin.h>
#if defined(__clang__) && defined(_MSC_VER)
#if !defined(__AVX__)
#include <avxintrin.h>
#endif
#if !defined(__AVX2__)
#include <avx2intrin.h>
#endif
#if !defined(__VAES__)
#include <vaesintrin.h>
#endif
#endif // __clang__ && _MSC_VER
#define VAES_FUNC_START2(name) \ #define VAES_FUNC_START2(name) \
AES_FUNC_START (name); \ AES_FUNC_START (name); \
@ -344,6 +388,8 @@ AES_FUNC_START (name)
VAES_FUNC_START2 (AesCbc_Decode_HW_256) VAES_FUNC_START2 (AesCbc_Decode_HW_256)
{ {
__m128i *p = (__m128i *)(void *)ivAes;
__m128i *data = (__m128i *)(void *)data8;
__m128i iv = *p; __m128i iv = *p;
const __m128i *dataEnd; const __m128i *dataEnd;
UInt32 numRounds = *(const UInt32 *)(p + 1) * 2 + 1; UInt32 numRounds = *(const UInt32 *)(p + 1) * 2 + 1;
@ -353,22 +399,22 @@ VAES_FUNC_START2 (AesCbc_Decode_HW_256)
{ {
const __m256i *w = keys + numRounds - 2; const __m256i *w = keys + numRounds - 2;
WOP (AVX__DECLARE_VAR) WOP (AVX_DECLARE_VAR)
WOP (AVX__LOAD_data); WOP (AVX_LOAD_data)
AVX__WOP_KEY (AVX__AES_XOR, 1) AVX_WOP_KEY (AVX_AES_XOR, 1)
do do
{ {
AVX__WOP_KEY (AVX__AES_DEC, 0) AVX_WOP_KEY (AVX_AES_DEC, 0)
w--; w--;
} }
while (w != keys); while (w != keys);
AVX__WOP_KEY (AVX__AES_DEC_LAST, 0) AVX_WOP_KEY (AVX_AES_DEC_LAST, 0)
AVX_XOR (m0, _mm256_setr_m128i(iv, data[0])); AVX_XOR (m0, _mm256_setr_m128i(iv, data[0]))
WOP_M1 (AVX__XOR_data_M1) WOP_M1 (AVX_XOR_data_M1)
iv = data[NUM_WAYS * 2 - 1]; iv = data[NUM_WAYS * 2 - 1];
WOP (AVX__STORE_data); WOP (AVX_STORE_data)
} }
WIDE_LOOP_END_AVX(;) WIDE_LOOP_END_AVX(;)
@ -378,15 +424,15 @@ VAES_FUNC_START2 (AesCbc_Decode_HW_256)
__m128i m = _mm_xor_si128 (w[2], *data); __m128i m = _mm_xor_si128 (w[2], *data);
do do
{ {
MM_OP_m (_mm_aesdec_si128, w[1]); MM_OP_m (_mm_aesdec_si128, w[1])
MM_OP_m (_mm_aesdec_si128, w[0]); MM_OP_m (_mm_aesdec_si128, w[0])
w -= 2; w -= 2;
} }
while (w != p); while (w != p);
MM_OP_m (_mm_aesdec_si128, w[1]); MM_OP_m (_mm_aesdec_si128, w[1])
MM_OP_m (_mm_aesdeclast_si128, w[0]); MM_OP_m (_mm_aesdeclast_si128, w[0])
MM_XOR (m, iv); MM_XOR (m, iv)
iv = *data; iv = *data;
*data = m; *data = m;
} }
@ -403,18 +449,20 @@ AVX2: _mm256_add_epi64 : vpaddq ymm, ymm, ymm
_mm256_broadcastsi128_si256 : vbroadcasti128 _mm256_broadcastsi128_si256 : vbroadcasti128
*/ */
#define AVX__CTR_LOOP_START \ #define AVX_CTR_LOOP_START \
ctr2 = _mm256_setr_m128i(_mm_sub_epi64(ctr, one), ctr); \ ctr2 = _mm256_setr_m128i(_mm_sub_epi64(ctr, one), ctr); \
two = _mm256_setr_m128i(one, one); \ two = _mm256_setr_m128i(one, one); \
two = _mm256_add_epi64(two, two); \ two = _mm256_add_epi64(two, two); \
// two = _mm256_setr_epi64x(2, 0, 2, 0); // two = _mm256_setr_epi64x(2, 0, 2, 0);
#define AVX__CTR_LOOP_ENC \ #define AVX_CTR_LOOP_ENC \
ctr = _mm256_extracti128_si256 (ctr2, 1); \ ctr = _mm256_extracti128_si256 (ctr2, 1); \
VAES_FUNC_START2 (AesCtr_Code_HW_256) VAES_FUNC_START2 (AesCtr_Code_HW_256)
{ {
__m128i *p = (__m128i *)(void *)ivAes;
__m128i *data = (__m128i *)(void *)data8;
__m128i ctr = *p; __m128i ctr = *p;
UInt32 numRounds = *(const UInt32 *)(p + 1) * 2 + 1; UInt32 numRounds = *(const UInt32 *)(p + 1) * 2 + 1;
const __m128i *dataEnd; const __m128i *dataEnd;
@ -422,44 +470,44 @@ VAES_FUNC_START2 (AesCtr_Code_HW_256)
__m256i ctr2, two; __m256i ctr2, two;
p += 2; p += 2;
WIDE_LOOP_START_AVX (AVX__CTR_LOOP_START) WIDE_LOOP_START_AVX (AVX_CTR_LOOP_START)
{ {
const __m256i *w = keys; const __m256i *w = keys;
UInt32 r = numRounds - 2; UInt32 r = numRounds - 2;
WOP (AVX__DECLARE_VAR) WOP (AVX_DECLARE_VAR)
AVX__WOP_KEY (AVX__CTR_START, 0); AVX_WOP_KEY (AVX_CTR_START, 0)
w += 1; w += 1;
do do
{ {
AVX__WOP_KEY (AVX__AES_ENC, 0) AVX_WOP_KEY (AVX_AES_ENC, 0)
w += 1; w += 1;
} }
while (--r); while (--r);
AVX__WOP_KEY (AVX__AES_ENC_LAST, 0) AVX_WOP_KEY (AVX_AES_ENC_LAST, 0)
WOP (AVX__CTR_END); WOP (AVX_CTR_END)
} }
WIDE_LOOP_END_AVX (AVX__CTR_LOOP_ENC) WIDE_LOOP_END_AVX (AVX_CTR_LOOP_ENC)
SINGLE_LOOP SINGLE_LOOP
{ {
UInt32 numRounds2 = *(const UInt32 *)(p - 2 + 1) - 1; UInt32 numRounds2 = *(const UInt32 *)(p - 2 + 1) - 1;
const __m128i *w = p; const __m128i *w = p;
__m128i m; __m128i m;
MM_OP (_mm_add_epi64, ctr, one); MM_OP (_mm_add_epi64, ctr, one)
m = _mm_xor_si128 (ctr, p[0]); m = _mm_xor_si128 (ctr, p[0]);
w += 1; w += 1;
do do
{ {
MM_OP_m (_mm_aesenc_si128, w[0]); MM_OP_m (_mm_aesenc_si128, w[0])
MM_OP_m (_mm_aesenc_si128, w[1]); MM_OP_m (_mm_aesenc_si128, w[1])
w += 2; w += 2;
} }
while (--numRounds2); while (--numRounds2);
MM_OP_m (_mm_aesenc_si128, w[0]); MM_OP_m (_mm_aesenc_si128, w[0])
MM_OP_m (_mm_aesenclast_si128, w[1]); MM_OP_m (_mm_aesenclast_si128, w[1])
MM_XOR (*data, m); MM_XOR (*data, m)
} }
p[-2] = ctr; p[-2] = ctr;
@ -477,7 +525,7 @@ VAES_FUNC_START2 (AesCtr_Code_HW_256)
#define AES_TYPE_data Byte #define AES_TYPE_data Byte
#define AES_FUNC_START(name) \ #define AES_FUNC_START(name) \
void MY_FAST_CALL name(UInt32 *p, Byte *data, size_t numBlocks) \ void Z7_FASTCALL name(UInt32 *p, Byte *data, size_t numBlocks) \
#define AES_COMPAT_STUB(name) \ #define AES_COMPAT_STUB(name) \
AES_FUNC_START(name); \ AES_FUNC_START(name); \
@ -496,8 +544,8 @@ AES_COMPAT_STUB (AesCtr_Code)
#pragma message("VAES HW_SW stub was used") #pragma message("VAES HW_SW stub was used")
#define VAES_COMPAT_STUB(name) \ #define VAES_COMPAT_STUB(name) \
void MY_FAST_CALL name ## _256(UInt32 *p, Byte *data, size_t numBlocks); \ void Z7_FASTCALL name ## _256(UInt32 *p, Byte *data, size_t numBlocks); \
void MY_FAST_CALL name ## _256(UInt32 *p, Byte *data, size_t numBlocks) \ void Z7_FASTCALL name ## _256(UInt32 *p, Byte *data, size_t numBlocks) \
{ name((AES_TYPE_keys *)(void *)p, (AES_TYPE_data *)(void *)data, numBlocks); } { name((AES_TYPE_keys *)(void *)p, (AES_TYPE_data *)(void *)data, numBlocks); }
VAES_COMPAT_STUB (AesCbc_Decode_HW) VAES_COMPAT_STUB (AesCbc_Decode_HW)
@ -551,7 +599,8 @@ VAES_COMPAT_STUB (AesCtr_Code_HW)
typedef uint8x16_t v128; typedef uint8x16_t v128;
#define AES_FUNC_START(name) \ #define AES_FUNC_START(name) \
void MY_FAST_CALL name(v128 *p, v128 *data, size_t numBlocks) void Z7_FASTCALL name(UInt32 *ivAes, Byte *data8, size_t numBlocks)
// void Z7_FASTCALL name(v128 *p, v128 *data, size_t numBlocks)
#define AES_FUNC_START2(name) \ #define AES_FUNC_START2(name) \
AES_FUNC_START (name); \ AES_FUNC_START (name); \
@ -559,18 +608,20 @@ ATTRIB_AES \
AES_FUNC_START (name) AES_FUNC_START (name)
#define MM_OP(op, dest, src) dest = op(dest, src); #define MM_OP(op, dest, src) dest = op(dest, src);
#define MM_OP_m(op, src) MM_OP(op, m, src); #define MM_OP_m(op, src) MM_OP(op, m, src)
#define MM_OP1_m(op) m = op(m); #define MM_OP1_m(op) m = op(m);
#define MM_XOR( dest, src) MM_OP(veorq_u8, dest, src); #define MM_XOR( dest, src) MM_OP(veorq_u8, dest, src)
#define MM_XOR_m( src) MM_XOR(m, src); #define MM_XOR_m( src) MM_XOR(m, src)
#define AES_E_m(k) MM_OP_m (vaeseq_u8, k); #define AES_E_m(k) MM_OP_m (vaeseq_u8, k)
#define AES_E_MC_m(k) AES_E_m (k); MM_OP1_m(vaesmcq_u8); #define AES_E_MC_m(k) AES_E_m (k) MM_OP1_m(vaesmcq_u8)
AES_FUNC_START2 (AesCbc_Encode_HW) AES_FUNC_START2 (AesCbc_Encode_HW)
{ {
v128 *p = (v128*)(void*)ivAes;
v128 *data = (v128*)(void*)data8;
v128 m = *p; v128 m = *p;
const v128 k0 = p[2]; const v128 k0 = p[2];
const v128 k1 = p[3]; const v128 k1 = p[3];
@ -608,7 +659,7 @@ AES_FUNC_START2 (AesCbc_Encode_HW)
AES_E_MC_m (p[14]) AES_E_MC_m (p[14])
} }
} }
AES_E_m (k_z1); AES_E_m (k_z1)
MM_XOR_m (k_z0); MM_XOR_m (k_z0);
*data = m; *data = m;
} }
@ -617,44 +668,44 @@ AES_FUNC_START2 (AesCbc_Encode_HW)
#define WOP_1(op) #define WOP_1(op)
#define WOP_2(op) WOP_1 (op) op (m1, 1); #define WOP_2(op) WOP_1 (op) op (m1, 1)
#define WOP_3(op) WOP_2 (op) op (m2, 2); #define WOP_3(op) WOP_2 (op) op (m2, 2)
#define WOP_4(op) WOP_3 (op) op (m3, 3); #define WOP_4(op) WOP_3 (op) op (m3, 3)
#define WOP_5(op) WOP_4 (op) op (m4, 4); #define WOP_5(op) WOP_4 (op) op (m4, 4)
#define WOP_6(op) WOP_5 (op) op (m5, 5); #define WOP_6(op) WOP_5 (op) op (m5, 5)
#define WOP_7(op) WOP_6 (op) op (m6, 6); #define WOP_7(op) WOP_6 (op) op (m6, 6)
#define WOP_8(op) WOP_7 (op) op (m7, 7); #define WOP_8(op) WOP_7 (op) op (m7, 7)
#define NUM_WAYS 8 #define NUM_WAYS 8
#define WOP_M1 WOP_8 #define WOP_M1 WOP_8
#define WOP(op) op (m0, 0); WOP_M1(op) #define WOP(op) op (m0, 0) WOP_M1(op)
#define DECLARE_VAR(reg, ii) v128 reg #define DECLARE_VAR(reg, ii) v128 reg;
#define LOAD_data( reg, ii) reg = data[ii]; #define LOAD_data( reg, ii) reg = data[ii];
#define STORE_data( reg, ii) data[ii] = reg; #define STORE_data( reg, ii) data[ii] = reg;
#if (NUM_WAYS > 1) #if (NUM_WAYS > 1)
#define XOR_data_M1(reg, ii) MM_XOR (reg, data[ii- 1]); #define XOR_data_M1(reg, ii) MM_XOR (reg, data[ii- 1])
#endif #endif
#define MM_OP_key(op, reg) MM_OP (op, reg, key); #define MM_OP_key(op, reg) MM_OP (op, reg, key)
#define AES_D_m(k) MM_OP_m (vaesdq_u8, k); #define AES_D_m(k) MM_OP_m (vaesdq_u8, k)
#define AES_D_IMC_m(k) AES_D_m (k); MM_OP1_m (vaesimcq_u8); #define AES_D_IMC_m(k) AES_D_m (k) MM_OP1_m (vaesimcq_u8)
#define AES_XOR( reg, ii) MM_OP_key (veorq_u8, reg) #define AES_XOR( reg, ii) MM_OP_key (veorq_u8, reg)
#define AES_D( reg, ii) MM_OP_key (vaesdq_u8, reg) #define AES_D( reg, ii) MM_OP_key (vaesdq_u8, reg)
#define AES_E( reg, ii) MM_OP_key (vaeseq_u8, reg) #define AES_E( reg, ii) MM_OP_key (vaeseq_u8, reg)
#define AES_D_IMC( reg, ii) AES_D (reg, ii); reg = vaesimcq_u8(reg) #define AES_D_IMC( reg, ii) AES_D (reg, ii) reg = vaesimcq_u8(reg);
#define AES_E_MC( reg, ii) AES_E (reg, ii); reg = vaesmcq_u8(reg) #define AES_E_MC( reg, ii) AES_E (reg, ii) reg = vaesmcq_u8(reg);
#define CTR_START(reg, ii) MM_OP (vaddq_u64, ctr, one); reg = vreinterpretq_u8_u64(ctr); #define CTR_START(reg, ii) MM_OP (vaddq_u64, ctr, one) reg = vreinterpretq_u8_u64(ctr);
#define CTR_END( reg, ii) MM_XOR (data[ii], reg); #define CTR_END( reg, ii) MM_XOR (data[ii], reg)
#define WOP_KEY(op, n) { \ #define WOP_KEY(op, n) { \
const v128 key = w[n]; \ const v128 key = w[n]; \
WOP(op); } WOP(op) }
#define WIDE_LOOP_START \ #define WIDE_LOOP_START \
dataEnd = data + numBlocks; \ dataEnd = data + numBlocks; \
@ -672,6 +723,8 @@ AES_FUNC_START2 (AesCbc_Encode_HW)
AES_FUNC_START2 (AesCbc_Decode_HW) AES_FUNC_START2 (AesCbc_Decode_HW)
{ {
v128 *p = (v128*)(void*)ivAes;
v128 *data = (v128*)(void*)data8;
v128 iv = *p; v128 iv = *p;
const v128 *wStart = p + ((size_t)*(const UInt32 *)(p + 1)) * 2; const v128 *wStart = p + ((size_t)*(const UInt32 *)(p + 1)) * 2;
const v128 *dataEnd; const v128 *dataEnd;
@ -681,7 +734,7 @@ AES_FUNC_START2 (AesCbc_Decode_HW)
{ {
const v128 *w = wStart; const v128 *w = wStart;
WOP (DECLARE_VAR) WOP (DECLARE_VAR)
WOP (LOAD_data); WOP (LOAD_data)
WOP_KEY (AES_D_IMC, 2) WOP_KEY (AES_D_IMC, 2)
do do
{ {
@ -695,7 +748,7 @@ AES_FUNC_START2 (AesCbc_Decode_HW)
MM_XOR (m0, iv); MM_XOR (m0, iv);
WOP_M1 (XOR_data_M1) WOP_M1 (XOR_data_M1)
iv = data[NUM_WAYS - 1]; iv = data[NUM_WAYS - 1];
WOP (STORE_data); WOP (STORE_data)
} }
WIDE_LOOP_END WIDE_LOOP_END
@ -724,6 +777,8 @@ AES_FUNC_START2 (AesCbc_Decode_HW)
AES_FUNC_START2 (AesCtr_Code_HW) AES_FUNC_START2 (AesCtr_Code_HW)
{ {
v128 *p = (v128*)(void*)ivAes;
v128 *data = (v128*)(void*)data8;
uint64x2_t ctr = vreinterpretq_u64_u8(*p); uint64x2_t ctr = vreinterpretq_u64_u8(*p);
const v128 *wEnd = p + ((size_t)*(const UInt32 *)(p + 1)) * 2; const v128 *wEnd = p + ((size_t)*(const UInt32 *)(p + 1)) * 2;
const v128 *dataEnd; const v128 *dataEnd;
@ -735,7 +790,7 @@ AES_FUNC_START2 (AesCtr_Code_HW)
{ {
const v128 *w = p; const v128 *w = p;
WOP (DECLARE_VAR) WOP (DECLARE_VAR)
WOP (CTR_START); WOP (CTR_START)
do do
{ {
WOP_KEY (AES_E_MC, 0) WOP_KEY (AES_E_MC, 0)
@ -746,7 +801,7 @@ AES_FUNC_START2 (AesCtr_Code_HW)
WOP_KEY (AES_E_MC, 0) WOP_KEY (AES_E_MC, 0)
WOP_KEY (AES_E, 1) WOP_KEY (AES_E, 1)
WOP_KEY (AES_XOR, 2) WOP_KEY (AES_XOR, 2)
WOP (CTR_END); WOP (CTR_END)
} }
WIDE_LOOP_END WIDE_LOOP_END
@ -762,10 +817,10 @@ AES_FUNC_START2 (AesCtr_Code_HW)
w += 2; w += 2;
} }
while (w != wEnd); while (w != wEnd);
AES_E_MC_m (w[0]); AES_E_MC_m (w[0])
AES_E_m (w[1]); AES_E_m (w[1])
MM_XOR_m (w[2]); MM_XOR_m (w[2])
CTR_END (m, 0); CTR_END (m, 0)
} }
p[-2] = vreinterpretq_u8_u64(ctr); p[-2] = vreinterpretq_u8_u64(ctr);
@ -774,3 +829,12 @@ AES_FUNC_START2 (AesCtr_Code_HW)
#endif // USE_HW_AES #endif // USE_HW_AES
#endif // MY_CPU_ARM_OR_ARM64 #endif // MY_CPU_ARM_OR_ARM64
#undef NUM_WAYS
#undef WOP_M1
#undef WOP
#undef DECLARE_VAR
#undef LOAD_data
#undef STORE_data
#undef USE_INTEL_AES
#undef USE_HW_AES

View file

@ -1,38 +1,54 @@
/* Alloc.c -- Memory allocation functions /* Alloc.c -- Memory allocation functions
2021-07-13 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
#include <stdio.h>
#ifdef _WIN32 #ifdef _WIN32
#include <Windows.h> #include "7zWindows.h"
#endif #endif
#include <stdlib.h> #include <stdlib.h>
#include "Alloc.h" #include "Alloc.h"
/* #define _SZ_ALLOC_DEBUG */ #ifdef _WIN32
#ifdef Z7_LARGE_PAGES
#if defined(__clang__) || defined(__GNUC__)
typedef void (*Z7_voidFunction)(void);
#define MY_CAST_FUNC (Z7_voidFunction)
#elif defined(_MSC_VER) && _MSC_VER > 1920
#define MY_CAST_FUNC (void *)
// #pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'void (__cdecl *)()'
#else
#define MY_CAST_FUNC
#endif
#endif // Z7_LARGE_PAGES
#endif // _WIN32
/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */ // #define SZ_ALLOC_DEBUG
#ifdef _SZ_ALLOC_DEBUG /* #define SZ_ALLOC_DEBUG */
/* use SZ_ALLOC_DEBUG to debug alloc/free operations */
#ifdef SZ_ALLOC_DEBUG
#include <string.h>
#include <stdio.h> #include <stdio.h>
int g_allocCount = 0; static int g_allocCount = 0;
int g_allocCountMid = 0; #ifdef _WIN32
int g_allocCountBig = 0; static int g_allocCountMid = 0;
static int g_allocCountBig = 0;
#endif
#define CONVERT_INT_TO_STR(charType, tempSize) \ #define CONVERT_INT_TO_STR(charType, tempSize) \
unsigned char temp[tempSize]; unsigned i = 0; \ char temp[tempSize]; unsigned i = 0; \
while (val >= 10) { temp[i++] = (unsigned char)('0' + (unsigned)(val % 10)); val /= 10; } \ while (val >= 10) { temp[i++] = (char)('0' + (unsigned)(val % 10)); val /= 10; } \
*s++ = (charType)('0' + (unsigned)val); \ *s++ = (charType)('0' + (unsigned)val); \
while (i != 0) { i--; *s++ = temp[i]; } \ while (i != 0) { i--; *s++ = temp[i]; } \
*s = 0; *s = 0;
static void ConvertUInt64ToString(UInt64 val, char *s) static void ConvertUInt64ToString(UInt64 val, char *s)
{ {
CONVERT_INT_TO_STR(char, 24); CONVERT_INT_TO_STR(char, 24)
} }
#define GET_HEX_CHAR(t) ((char)(((t < 10) ? ('0' + t) : ('A' + (t - 10))))) #define GET_HEX_CHAR(t) ((char)(((t < 10) ? ('0' + t) : ('A' + (t - 10)))))
@ -77,7 +93,7 @@ static void PrintAligned(const char *s, size_t align)
Print(s); Print(s);
} }
static void PrintLn() static void PrintLn(void)
{ {
Print("\n"); Print("\n");
} }
@ -89,10 +105,10 @@ static void PrintHex(UInt64 v, size_t align)
PrintAligned(s, align); PrintAligned(s, align);
} }
static void PrintDec(UInt64 v, size_t align) static void PrintDec(int v, size_t align)
{ {
char s[32]; char s[32];
ConvertUInt64ToString(v, s); ConvertUInt64ToString((unsigned)v, s);
PrintAligned(s, align); PrintAligned(s, align);
} }
@ -102,12 +118,19 @@ static void PrintAddr(void *p)
} }
#define PRINT_ALLOC(name, cnt, size, ptr) \ #define PRINT_REALLOC(name, cnt, size, ptr) { \
Print(name " "); \
if (!ptr) PrintDec(cnt++, 10); \
PrintHex(size, 10); \
PrintAddr(ptr); \
PrintLn(); }
#define PRINT_ALLOC(name, cnt, size, ptr) { \
Print(name " "); \ Print(name " "); \
PrintDec(cnt++, 10); \ PrintDec(cnt++, 10); \
PrintHex(size, 10); \ PrintHex(size, 10); \
PrintAddr(ptr); \ PrintAddr(ptr); \
PrintLn(); PrintLn(); }
#define PRINT_FREE(name, cnt, ptr) if (ptr) { \ #define PRINT_FREE(name, cnt, ptr) if (ptr) { \
Print(name " "); \ Print(name " "); \
@ -117,7 +140,9 @@ static void PrintAddr(void *p)
#else #else
#ifdef _WIN32
#define PRINT_ALLOC(name, cnt, size, ptr) #define PRINT_ALLOC(name, cnt, size, ptr)
#endif
#define PRINT_FREE(name, cnt, ptr) #define PRINT_FREE(name, cnt, ptr)
#define Print(s) #define Print(s)
#define PrintLn() #define PrintLn()
@ -127,16 +152,31 @@ static void PrintAddr(void *p)
#endif #endif
/*
by specification:
malloc(non_NULL, 0) : returns NULL or a unique pointer value that can later be successfully passed to free()
realloc(NULL, size) : the call is equivalent to malloc(size)
realloc(non_NULL, 0) : the call is equivalent to free(ptr)
in main compilers:
malloc(0) : returns non_NULL
realloc(NULL, 0) : returns non_NULL
realloc(non_NULL, 0) : returns NULL
*/
void *MyAlloc(size_t size) void *MyAlloc(size_t size)
{ {
if (size == 0) if (size == 0)
return NULL; return NULL;
PRINT_ALLOC("Alloc ", g_allocCount, size, NULL); // PRINT_ALLOC("Alloc ", g_allocCount, size, NULL)
#ifdef _SZ_ALLOC_DEBUG #ifdef SZ_ALLOC_DEBUG
{ {
void *p = malloc(size); void *p = malloc(size);
// PRINT_ALLOC("Alloc ", g_allocCount, size, p); if (p)
{
PRINT_ALLOC("Alloc ", g_allocCount, size, p)
}
return p; return p;
} }
#else #else
@ -146,33 +186,64 @@ void *MyAlloc(size_t size)
void MyFree(void *address) void MyFree(void *address)
{ {
PRINT_FREE("Free ", g_allocCount, address); PRINT_FREE("Free ", g_allocCount, address)
free(address); free(address);
} }
void *MyRealloc(void *address, size_t size)
{
if (size == 0)
{
MyFree(address);
return NULL;
}
// PRINT_REALLOC("Realloc ", g_allocCount, size, address)
#ifdef SZ_ALLOC_DEBUG
{
void *p = realloc(address, size);
if (p)
{
PRINT_REALLOC("Realloc ", g_allocCount, size, address)
}
return p;
}
#else
return realloc(address, size);
#endif
}
#ifdef _WIN32 #ifdef _WIN32
void *MidAlloc(size_t size) void *MidAlloc(size_t size)
{ {
if (size == 0) if (size == 0)
return NULL; return NULL;
#ifdef SZ_ALLOC_DEBUG
PRINT_ALLOC("Alloc-Mid", g_allocCountMid, size, NULL); {
void *p = VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
if (p)
{
PRINT_ALLOC("Alloc-Mid", g_allocCountMid, size, p)
}
return p;
}
#else
return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE); return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
#endif
} }
void MidFree(void *address) void MidFree(void *address)
{ {
PRINT_FREE("Free-Mid", g_allocCountMid, address); PRINT_FREE("Free-Mid", g_allocCountMid, address)
if (!address) if (!address)
return; return;
VirtualFree(address, 0, MEM_RELEASE); VirtualFree(address, 0, MEM_RELEASE);
} }
#ifdef _7ZIP_LARGE_PAGES #ifdef Z7_LARGE_PAGES
#ifdef MEM_LARGE_PAGES #ifdef MEM_LARGE_PAGES
#define MY__MEM_LARGE_PAGES MEM_LARGE_PAGES #define MY__MEM_LARGE_PAGES MEM_LARGE_PAGES
@ -183,34 +254,35 @@ void MidFree(void *address)
extern extern
SIZE_T g_LargePageSize; SIZE_T g_LargePageSize;
SIZE_T g_LargePageSize = 0; SIZE_T g_LargePageSize = 0;
typedef SIZE_T (WINAPI *GetLargePageMinimumP)(VOID); typedef SIZE_T (WINAPI *Func_GetLargePageMinimum)(VOID);
#endif // _7ZIP_LARGE_PAGES void SetLargePageSize(void)
void SetLargePageSize()
{ {
#ifdef _7ZIP_LARGE_PAGES #ifdef Z7_LARGE_PAGES
SIZE_T size; SIZE_T size;
GetLargePageMinimumP largePageMinimum = (GetLargePageMinimumP) const
GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetLargePageMinimum"); Func_GetLargePageMinimum fn =
if (!largePageMinimum) (Func_GetLargePageMinimum) MY_CAST_FUNC GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")),
"GetLargePageMinimum");
if (!fn)
return; return;
size = largePageMinimum(); size = fn();
if (size == 0 || (size & (size - 1)) != 0) if (size == 0 || (size & (size - 1)) != 0)
return; return;
g_LargePageSize = size; g_LargePageSize = size;
#endif #endif
} }
#endif // Z7_LARGE_PAGES
void *BigAlloc(size_t size) void *BigAlloc(size_t size)
{ {
if (size == 0) if (size == 0)
return NULL; return NULL;
PRINT_ALLOC("Alloc-Big", g_allocCountBig, size, NULL); PRINT_ALLOC("Alloc-Big", g_allocCountBig, size, NULL)
#ifdef _7ZIP_LARGE_PAGES #ifdef Z7_LARGE_PAGES
{ {
SIZE_T ps = g_LargePageSize; SIZE_T ps = g_LargePageSize;
if (ps != 0 && ps <= (1 << 30) && size > (ps / 2)) if (ps != 0 && ps <= (1 << 30) && size > (ps / 2))
@ -220,38 +292,38 @@ void *BigAlloc(size_t size)
size2 = (size + ps) & ~ps; size2 = (size + ps) & ~ps;
if (size2 >= size) if (size2 >= size)
{ {
void *res = VirtualAlloc(NULL, size2, MEM_COMMIT | MY__MEM_LARGE_PAGES, PAGE_READWRITE); void *p = VirtualAlloc(NULL, size2, MEM_COMMIT | MY__MEM_LARGE_PAGES, PAGE_READWRITE);
if (res) if (p)
return res; {
PRINT_ALLOC("Alloc-BM ", g_allocCountMid, size2, p)
return p;
}
} }
} }
} }
#endif #endif
return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE); return MidAlloc(size);
} }
void BigFree(void *address) void BigFree(void *address)
{ {
PRINT_FREE("Free-Big", g_allocCountBig, address); PRINT_FREE("Free-Big", g_allocCountBig, address)
MidFree(address);
if (!address)
return;
VirtualFree(address, 0, MEM_RELEASE);
} }
#endif #endif // _WIN32
static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MyAlloc(size); } static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p) return MyAlloc(size); }
static void SzFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MyFree(address); } static void SzFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p) MyFree(address); }
const ISzAlloc g_Alloc = { SzAlloc, SzFree }; const ISzAlloc g_Alloc = { SzAlloc, SzFree };
#ifdef _WIN32 #ifdef _WIN32
static void *SzMidAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MidAlloc(size); } static void *SzMidAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p) return MidAlloc(size); }
static void SzMidFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MidFree(address); } static void SzMidFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p) MidFree(address); }
static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return BigAlloc(size); } static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p) return BigAlloc(size); }
static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); BigFree(address); } static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p) BigFree(address); }
const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree }; const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree };
const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree }; const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree };
#endif #endif
@ -334,7 +406,7 @@ static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size)
void *p; void *p;
void *pAligned; void *pAligned;
size_t newSize; size_t newSize;
UNUSED_VAR(pp); UNUSED_VAR(pp)
/* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned /* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned
block to prevent cache line sharing with another allocated blocks */ block to prevent cache line sharing with another allocated blocks */
@ -362,7 +434,7 @@ static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size)
#else #else
void *p; void *p;
UNUSED_VAR(pp); UNUSED_VAR(pp)
if (posix_memalign(&p, ALLOC_ALIGN_SIZE, size)) if (posix_memalign(&p, ALLOC_ALIGN_SIZE, size))
return NULL; return NULL;
@ -377,7 +449,7 @@ static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size)
static void SzAlignedFree(ISzAllocPtr pp, void *address) static void SzAlignedFree(ISzAllocPtr pp, void *address)
{ {
UNUSED_VAR(pp); UNUSED_VAR(pp)
#ifndef USE_posix_memalign #ifndef USE_posix_memalign
if (address) if (address)
MyFree(((void **)address)[-1]); MyFree(((void **)address)[-1]);
@ -401,7 +473,7 @@ const ISzAlloc g_AlignedAlloc = { SzAlignedAlloc, SzAlignedFree };
static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size) static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size)
{ {
CAlignOffsetAlloc *p = CONTAINER_FROM_VTBL(pp, CAlignOffsetAlloc, vt); const CAlignOffsetAlloc *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CAlignOffsetAlloc, vt);
void *adr; void *adr;
void *pAligned; void *pAligned;
size_t newSize; size_t newSize;
@ -447,7 +519,7 @@ static void AlignOffsetAlloc_Free(ISzAllocPtr pp, void *address)
{ {
if (address) if (address)
{ {
CAlignOffsetAlloc *p = CONTAINER_FROM_VTBL(pp, CAlignOffsetAlloc, vt); const CAlignOffsetAlloc *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CAlignOffsetAlloc, vt);
PrintLn(); PrintLn();
Print("- Aligned Free: "); Print("- Aligned Free: ");
PrintLn(); PrintLn();

View file

@ -1,19 +1,32 @@
/* Alloc.h -- Memory allocation functions /* Alloc.h -- Memory allocation functions
2021-07-13 : Igor Pavlov : Public domain */ 2023-03-04 : Igor Pavlov : Public domain */
#ifndef __COMMON_ALLOC_H #ifndef ZIP7_INC_ALLOC_H
#define __COMMON_ALLOC_H #define ZIP7_INC_ALLOC_H
#include "7zTypes.h" #include "7zTypes.h"
EXTERN_C_BEGIN EXTERN_C_BEGIN
/*
MyFree(NULL) : is allowed, as free(NULL)
MyAlloc(0) : returns NULL : but malloc(0) is allowed to return NULL or non_NULL
MyRealloc(NULL, 0) : returns NULL : but realloc(NULL, 0) is allowed to return NULL or non_NULL
MyRealloc() is similar to realloc() for the following cases:
MyRealloc(non_NULL, 0) : returns NULL and always calls MyFree(ptr)
MyRealloc(NULL, non_ZERO) : returns NULL, if allocation failed
MyRealloc(non_NULL, non_ZERO) : returns NULL, if reallocation failed
*/
void *MyAlloc(size_t size); void *MyAlloc(size_t size);
void MyFree(void *address); void MyFree(void *address);
void *MyRealloc(void *address, size_t size);
#ifdef _WIN32 #ifdef _WIN32
#ifdef Z7_LARGE_PAGES
void SetLargePageSize(void); void SetLargePageSize(void);
#endif
void *MidAlloc(size_t size); void *MidAlloc(size_t size);
void MidFree(void *address); void MidFree(void *address);

325
3rdparty/7z/src/Bcj2.c vendored
View file

@ -1,29 +1,24 @@
/* Bcj2.c -- BCJ2 Decoder (Converter for x86 code) /* Bcj2.c -- BCJ2 Decoder (Converter for x86 code)
2021-02-09 : Igor Pavlov : Public domain */ 2023-03-01 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
#include "Bcj2.h" #include "Bcj2.h"
#include "CpuArch.h" #include "CpuArch.h"
#define CProb UInt16
#define kTopValue ((UInt32)1 << 24) #define kTopValue ((UInt32)1 << 24)
#define kNumModelBits 11 #define kNumBitModelTotalBits 11
#define kBitModelTotal (1 << kNumModelBits) #define kBitModelTotal (1 << kNumBitModelTotalBits)
#define kNumMoveBits 5 #define kNumMoveBits 5
#define _IF_BIT_0 ttt = *prob; bound = (p->range >> kNumModelBits) * ttt; if (p->code < bound) // UInt32 bcj2_stats[256 + 2][2];
#define _UPDATE_0 p->range = bound; *prob = (CProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
#define _UPDATE_1 p->range -= bound; p->code -= bound; *prob = (CProb)(ttt - (ttt >> kNumMoveBits));
void Bcj2Dec_Init(CBcj2Dec *p) void Bcj2Dec_Init(CBcj2Dec *p)
{ {
unsigned i; unsigned i;
p->state = BCJ2_STREAM_RC; // BCJ2_DEC_STATE_OK;
p->state = BCJ2_DEC_STATE_OK;
p->ip = 0; p->ip = 0;
p->temp[3] = 0; p->temp = 0;
p->range = 0; p->range = 0;
p->code = 0; p->code = 0;
for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++) for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++)
@ -32,217 +27,248 @@ void Bcj2Dec_Init(CBcj2Dec *p)
SRes Bcj2Dec_Decode(CBcj2Dec *p) SRes Bcj2Dec_Decode(CBcj2Dec *p)
{ {
UInt32 v = p->temp;
// const Byte *src;
if (p->range <= 5) if (p->range <= 5)
{ {
p->state = BCJ2_DEC_STATE_OK; UInt32 code = p->code;
p->state = BCJ2_DEC_STATE_ERROR; /* for case if we return SZ_ERROR_DATA; */
for (; p->range != 5; p->range++) for (; p->range != 5; p->range++)
{ {
if (p->range == 1 && p->code != 0) if (p->range == 1 && code != 0)
return SZ_ERROR_DATA; return SZ_ERROR_DATA;
if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC]) if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC])
{ {
p->state = BCJ2_STREAM_RC; p->state = BCJ2_STREAM_RC;
return SZ_OK; return SZ_OK;
} }
code = (code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++; p->code = code;
} }
if (code == 0xffffffff)
if (p->code == 0xFFFFFFFF)
return SZ_ERROR_DATA; return SZ_ERROR_DATA;
p->range = 0xffffffff;
p->range = 0xFFFFFFFF;
} }
else if (p->state >= BCJ2_DEC_STATE_ORIG_0) // else
{ {
while (p->state <= BCJ2_DEC_STATE_ORIG_3) unsigned state = p->state;
// we check BCJ2_IS_32BIT_STREAM() here instead of check in the main loop
if (BCJ2_IS_32BIT_STREAM(state))
{
const Byte *cur = p->bufs[state];
if (cur == p->lims[state])
return SZ_OK;
p->bufs[state] = cur + 4;
{
const UInt32 ip = p->ip + 4;
v = GetBe32a(cur) - ip;
p->ip = ip;
}
state = BCJ2_DEC_STATE_ORIG_0;
}
if ((unsigned)(state - BCJ2_DEC_STATE_ORIG_0) < 4)
{ {
Byte *dest = p->dest; Byte *dest = p->dest;
if (dest == p->destLim) for (;;)
return SZ_OK;
*dest = p->temp[(size_t)p->state - BCJ2_DEC_STATE_ORIG_0];
p->state++;
p->dest = dest + 1;
}
}
/*
if (BCJ2_IS_32BIT_STREAM(p->state))
{
const Byte *cur = p->bufs[p->state];
if (cur == p->lims[p->state])
return SZ_OK;
p->bufs[p->state] = cur + 4;
{
UInt32 val;
Byte *dest;
SizeT rem;
p->ip += 4;
val = GetBe32(cur) - p->ip;
dest = p->dest;
rem = p->destLim - dest;
if (rem < 4)
{ {
SizeT i; if (dest == p->destLim)
SetUi32(p->temp, val); {
for (i = 0; i < rem; i++) p->state = state;
dest[i] = p->temp[i]; p->temp = v;
p->dest = dest + rem; return SZ_OK;
p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem; }
return SZ_OK; *dest++ = (Byte)v;
p->dest = dest;
if (++state == BCJ2_DEC_STATE_ORIG_3 + 1)
break;
v >>= 8;
} }
SetUi32(dest, val);
p->temp[3] = (Byte)(val >> 24);
p->dest = dest + 4;
p->state = BCJ2_DEC_STATE_OK;
} }
} }
*/
// src = p->bufs[BCJ2_STREAM_MAIN];
for (;;) for (;;)
{ {
/*
if (BCJ2_IS_32BIT_STREAM(p->state)) if (BCJ2_IS_32BIT_STREAM(p->state))
p->state = BCJ2_DEC_STATE_OK; p->state = BCJ2_DEC_STATE_OK;
else else
*/
{ {
if (p->range < kTopValue) if (p->range < kTopValue)
{ {
if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC]) if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC])
{ {
p->state = BCJ2_STREAM_RC; p->state = BCJ2_STREAM_RC;
p->temp = v;
return SZ_OK; return SZ_OK;
} }
p->range <<= 8; p->range <<= 8;
p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++; p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
} }
{ {
const Byte *src = p->bufs[BCJ2_STREAM_MAIN]; const Byte *src = p->bufs[BCJ2_STREAM_MAIN];
const Byte *srcLim; const Byte *srcLim;
Byte *dest; Byte *dest = p->dest;
SizeT num = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - src);
if (num == 0)
{ {
p->state = BCJ2_STREAM_MAIN; const SizeT rem = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - src);
return SZ_OK; SizeT num = (SizeT)(p->destLim - dest);
if (num >= rem)
num = rem;
#define NUM_ITERS 4
#if (NUM_ITERS & (NUM_ITERS - 1)) == 0
num &= ~((SizeT)NUM_ITERS - 1); // if (NUM_ITERS == (1 << x))
#else
num -= num % NUM_ITERS; // if (NUM_ITERS != (1 << x))
#endif
srcLim = src + num;
} }
dest = p->dest; #define NUM_SHIFT_BITS 24
if (num > (SizeT)(p->destLim - dest)) #define ONE_ITER(indx) { \
const unsigned b = src[indx]; \
*dest++ = (Byte)b; \
v = (v << NUM_SHIFT_BITS) | b; \
if (((b + (0x100 - 0xe8)) & 0xfe) == 0) break; \
if (((v - (((UInt32)0x0f << (NUM_SHIFT_BITS)) + 0x80)) & \
((((UInt32)1 << (4 + NUM_SHIFT_BITS)) - 0x1) << 4)) == 0) break; \
/* ++dest */; /* v = b; */ }
if (src != srcLim)
for (;;)
{ {
num = (SizeT)(p->destLim - dest); /* The dependency chain of 2-cycle for (v) calculation is not big problem here.
if (num == 0) But we can remove dependency chain with v = b in the end of loop. */
ONE_ITER(0)
#if (NUM_ITERS > 1)
ONE_ITER(1)
#if (NUM_ITERS > 2)
ONE_ITER(2)
#if (NUM_ITERS > 3)
ONE_ITER(3)
#if (NUM_ITERS > 4)
ONE_ITER(4)
#if (NUM_ITERS > 5)
ONE_ITER(5)
#if (NUM_ITERS > 6)
ONE_ITER(6)
#if (NUM_ITERS > 7)
ONE_ITER(7)
#endif
#endif
#endif
#endif
#endif
#endif
#endif
src += NUM_ITERS;
if (src == srcLim)
break;
}
if (src == srcLim)
#if (NUM_ITERS > 1)
for (;;)
#endif
{
#if (NUM_ITERS > 1)
if (src == p->lims[BCJ2_STREAM_MAIN] || dest == p->destLim)
#endif
{ {
p->state = BCJ2_DEC_STATE_ORIG; const SizeT num = (SizeT)(src - p->bufs[BCJ2_STREAM_MAIN]);
p->bufs[BCJ2_STREAM_MAIN] = src;
p->dest = dest;
p->ip += (UInt32)num;
/* state BCJ2_STREAM_MAIN has more priority than BCJ2_STATE_ORIG */
p->state =
src == p->lims[BCJ2_STREAM_MAIN] ?
(unsigned)BCJ2_STREAM_MAIN :
(unsigned)BCJ2_DEC_STATE_ORIG;
p->temp = v;
return SZ_OK; return SZ_OK;
} }
#if (NUM_ITERS > 1)
ONE_ITER(0)
src++;
#endif
} }
srcLim = src + num;
if (p->temp[3] == 0x0F && (src[0] & 0xF0) == 0x80)
*dest = src[0];
else for (;;)
{ {
Byte b = *src; const SizeT num = (SizeT)(dest - p->dest);
*dest = b; p->dest = dest; // p->dest += num;
if (b != 0x0F) p->bufs[BCJ2_STREAM_MAIN] += num; // = src;
{
if ((b & 0xFE) == 0xE8)
break;
dest++;
if (++src != srcLim)
continue;
break;
}
dest++;
if (++src == srcLim)
break;
if ((*src & 0xF0) != 0x80)
continue;
*dest = *src;
break;
}
num = (SizeT)(src - p->bufs[BCJ2_STREAM_MAIN]);
if (src == srcLim)
{
p->temp[3] = src[-1];
p->bufs[BCJ2_STREAM_MAIN] = src;
p->ip += (UInt32)num; p->ip += (UInt32)num;
p->dest += num;
p->state =
p->bufs[BCJ2_STREAM_MAIN] ==
p->lims[BCJ2_STREAM_MAIN] ?
(unsigned)BCJ2_STREAM_MAIN :
(unsigned)BCJ2_DEC_STATE_ORIG;
return SZ_OK;
} }
{ {
UInt32 bound, ttt; UInt32 bound, ttt;
CProb *prob; CBcj2Prob *prob; // unsigned index;
Byte b = src[0]; /*
Byte prev = (Byte)(num == 0 ? p->temp[3] : src[-1]); prob = p->probs + (unsigned)((Byte)v == 0xe8 ?
2 + (Byte)(v >> 8) :
p->temp[3] = b; ((v >> 5) & 1)); // ((Byte)v < 0xe8 ? 0 : 1));
p->bufs[BCJ2_STREAM_MAIN] = src + 1; */
num++;
p->ip += (UInt32)num;
p->dest += num;
prob = p->probs + (unsigned)(b == 0xE8 ? 2 + (unsigned)prev : (b == 0xE9 ? 1 : 0));
_IF_BIT_0
{ {
_UPDATE_0 const unsigned c = ((v + 0x17) >> 6) & 1;
prob = p->probs + (unsigned)
(((0 - c) & (Byte)(v >> NUM_SHIFT_BITS)) + c + ((v >> 5) & 1));
// (Byte)
// 8x->0 : e9->1 : xxe8->xx+2
// 8x->0x100 : e9->0x101 : xxe8->xx
// (((0x100 - (e & ~v)) & (0x100 | (v >> 8))) + (e & v));
// (((0x101 + (~e | v)) & (0x100 | (v >> 8))) + (e & v));
}
ttt = *prob;
bound = (p->range >> kNumBitModelTotalBits) * ttt;
if (p->code < bound)
{
// bcj2_stats[prob - p->probs][0]++;
p->range = bound;
*prob = (CBcj2Prob)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
continue; continue;
} }
_UPDATE_1 {
// bcj2_stats[prob - p->probs][1]++;
p->range -= bound;
p->code -= bound;
*prob = (CBcj2Prob)(ttt - (ttt >> kNumMoveBits));
}
} }
} }
} }
{ {
UInt32 val; /* (v == 0xe8 ? 0 : 1) uses setcc instruction with additional zero register usage in x64 MSVC. */
unsigned cj = (p->temp[3] == 0xE8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP; // const unsigned cj = ((Byte)v == 0xe8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP;
const unsigned cj = (((v + 0x57) >> 6) & 1) + BCJ2_STREAM_CALL;
const Byte *cur = p->bufs[cj]; const Byte *cur = p->bufs[cj];
Byte *dest; Byte *dest;
SizeT rem; SizeT rem;
if (cur == p->lims[cj]) if (cur == p->lims[cj])
{ {
p->state = cj; p->state = cj;
break; break;
} }
v = GetBe32a(cur);
val = GetBe32(cur);
p->bufs[cj] = cur + 4; p->bufs[cj] = cur + 4;
{
p->ip += 4; const UInt32 ip = p->ip + 4;
val -= p->ip; v -= ip;
p->ip = ip;
}
dest = p->dest; dest = p->dest;
rem = (SizeT)(p->destLim - dest); rem = (SizeT)(p->destLim - dest);
if (rem < 4) if (rem < 4)
{ {
p->temp[0] = (Byte)val; if (rem > 0) dest[0] = (Byte)val; val >>= 8; if ((unsigned)rem > 0) { dest[0] = (Byte)v; v >>= 8;
p->temp[1] = (Byte)val; if (rem > 1) dest[1] = (Byte)val; val >>= 8; if ((unsigned)rem > 1) { dest[1] = (Byte)v; v >>= 8;
p->temp[2] = (Byte)val; if (rem > 2) dest[2] = (Byte)val; val >>= 8; if ((unsigned)rem > 2) { dest[2] = (Byte)v; v >>= 8; }}}
p->temp[3] = (Byte)val; p->temp = v;
p->dest = dest + rem; p->dest = dest + rem;
p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem; p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem;
break; break;
} }
SetUi32(dest, v)
SetUi32(dest, val); v >>= 24;
p->temp[3] = (Byte)(val >> 24);
p->dest = dest + 4; p->dest = dest + 4;
} }
} }
@ -252,6 +278,13 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p)
p->range <<= 8; p->range <<= 8;
p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++; p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
} }
return SZ_OK; return SZ_OK;
} }
#undef NUM_ITERS
#undef ONE_ITER
#undef NUM_SHIFT_BITS
#undef kTopValue
#undef kNumBitModelTotalBits
#undef kBitModelTotal
#undef kNumMoveBits

270
3rdparty/7z/src/Bcj2.h vendored
View file

@ -1,8 +1,8 @@
/* Bcj2.h -- BCJ2 Converter for x86 code /* Bcj2.h -- BCJ2 converter for x86 code (Branch CALL/JUMP variant2)
2014-11-10 : Igor Pavlov : Public domain */ 2023-03-02 : Igor Pavlov : Public domain */
#ifndef __BCJ2_H #ifndef ZIP7_INC_BCJ2_H
#define __BCJ2_H #define ZIP7_INC_BCJ2_H
#include "7zTypes.h" #include "7zTypes.h"
@ -26,37 +26,68 @@ enum
BCJ2_DEC_STATE_ORIG_3, BCJ2_DEC_STATE_ORIG_3,
BCJ2_DEC_STATE_ORIG, BCJ2_DEC_STATE_ORIG,
BCJ2_DEC_STATE_OK BCJ2_DEC_STATE_ERROR /* after detected data error */
}; };
enum enum
{ {
BCJ2_ENC_STATE_ORIG = BCJ2_NUM_STREAMS, BCJ2_ENC_STATE_ORIG = BCJ2_NUM_STREAMS,
BCJ2_ENC_STATE_OK BCJ2_ENC_STATE_FINISHED /* it's state after fully encoded stream */
}; };
#define BCJ2_IS_32BIT_STREAM(s) ((s) == BCJ2_STREAM_CALL || (s) == BCJ2_STREAM_JUMP) /* #define BCJ2_IS_32BIT_STREAM(s) ((s) == BCJ2_STREAM_CALL || (s) == BCJ2_STREAM_JUMP) */
#define BCJ2_IS_32BIT_STREAM(s) ((unsigned)((unsigned)(s) - (unsigned)BCJ2_STREAM_CALL) < 2)
/* /*
CBcj2Dec / CBcj2Enc CBcj2Dec / CBcj2Enc
bufs sizes: bufs sizes:
BUF_SIZE(n) = lims[n] - bufs[n] BUF_SIZE(n) = lims[n] - bufs[n]
bufs sizes for BCJ2_STREAM_CALL and BCJ2_STREAM_JUMP must be mutliply of 4: bufs sizes for BCJ2_STREAM_CALL and BCJ2_STREAM_JUMP must be multiply of 4:
(BUF_SIZE(BCJ2_STREAM_CALL) & 3) == 0 (BUF_SIZE(BCJ2_STREAM_CALL) & 3) == 0
(BUF_SIZE(BCJ2_STREAM_JUMP) & 3) == 0 (BUF_SIZE(BCJ2_STREAM_JUMP) & 3) == 0
*/ */
// typedef UInt32 CBcj2Prob;
typedef UInt16 CBcj2Prob;
/*
BCJ2 encoder / decoder internal requirements:
- If last bytes of stream contain marker (e8/e8/0f8x), then
there is also encoded symbol (0 : no conversion) in RC stream.
- One case of overlapped instructions is supported,
if last byte of converted instruction is (0f) and next byte is (8x):
marker [xx xx xx 0f] 8x
then the pair (0f 8x) is treated as marker.
*/
/* ---------- BCJ2 Decoder ---------- */
/* /*
CBcj2Dec: CBcj2Dec:
dest is allowed to overlap with bufs[BCJ2_STREAM_MAIN], with the following conditions: (dest) is allowed to overlap with bufs[BCJ2_STREAM_MAIN], with the following conditions:
bufs[BCJ2_STREAM_MAIN] >= dest && bufs[BCJ2_STREAM_MAIN] >= dest &&
bufs[BCJ2_STREAM_MAIN] - dest >= tempReserv + bufs[BCJ2_STREAM_MAIN] - dest >=
BUF_SIZE(BCJ2_STREAM_CALL) + BUF_SIZE(BCJ2_STREAM_CALL) +
BUF_SIZE(BCJ2_STREAM_JUMP) BUF_SIZE(BCJ2_STREAM_JUMP)
tempReserv = 0 : for first call of Bcj2Dec_Decode reserve = bufs[BCJ2_STREAM_MAIN] - dest -
tempReserv = 4 : for any other calls of Bcj2Dec_Decode ( BUF_SIZE(BCJ2_STREAM_CALL) +
overlap with offset = 1 is not allowed BUF_SIZE(BCJ2_STREAM_JUMP) )
and additional conditions:
if (it's first call of Bcj2Dec_Decode() after Bcj2Dec_Init())
{
(reserve != 1) : if (ver < v23.00)
}
else // if there are more than one calls of Bcj2Dec_Decode() after Bcj2Dec_Init())
{
(reserve >= 6) : if (ver < v23.00)
(reserve >= 4) : if (ver >= v23.00)
We need that (reserve) because after first call of Bcj2Dec_Decode(),
CBcj2Dec::temp can contain up to 4 bytes for writing to (dest).
}
(reserve == 0) is allowed, if we decode full stream via single call of Bcj2Dec_Decode().
(reserve == 0) also is allowed in case of multi-call, if we use fixed buffers,
and (reserve) is calculated from full (final) sizes of all streams before first call.
*/ */
typedef struct typedef struct
@ -68,22 +99,66 @@ typedef struct
unsigned state; /* BCJ2_STREAM_MAIN has more priority than BCJ2_STATE_ORIG */ unsigned state; /* BCJ2_STREAM_MAIN has more priority than BCJ2_STATE_ORIG */
UInt32 ip; UInt32 ip; /* property of starting base for decoding */
Byte temp[4]; UInt32 temp; /* Byte temp[4]; */
UInt32 range; UInt32 range;
UInt32 code; UInt32 code;
UInt16 probs[2 + 256]; CBcj2Prob probs[2 + 256];
} CBcj2Dec; } CBcj2Dec;
/* Note:
Bcj2Dec_Init() sets (CBcj2Dec::ip = 0)
if (ip != 0) property is required, the caller must set CBcj2Dec::ip after Bcj2Dec_Init()
*/
void Bcj2Dec_Init(CBcj2Dec *p); void Bcj2Dec_Init(CBcj2Dec *p);
/* Returns: SZ_OK or SZ_ERROR_DATA */
/* Bcj2Dec_Decode():
returns:
SZ_OK
SZ_ERROR_DATA : if data in 5 starting bytes of BCJ2_STREAM_RC stream are not correct
*/
SRes Bcj2Dec_Decode(CBcj2Dec *p); SRes Bcj2Dec_Decode(CBcj2Dec *p);
#define Bcj2Dec_IsFinished(_p_) ((_p_)->code == 0) /* To check that decoding was finished you can compare
sizes of processed streams with sizes known from another sources.
You must do at least one mandatory check from the two following options:
- the check for size of processed output (ORIG) stream.
- the check for size of processed input (MAIN) stream.
additional optional checks:
- the checks for processed sizes of all input streams (MAIN, CALL, JUMP, RC)
- the checks Bcj2Dec_IsMaybeFinished*()
also before actual decoding you can check that the
following condition is met for stream sizes:
( size(ORIG) == size(MAIN) + size(CALL) + size(JUMP) )
*/
/* (state == BCJ2_STREAM_MAIN) means that decoder is ready for
additional input data in BCJ2_STREAM_MAIN stream.
Note that (state == BCJ2_STREAM_MAIN) is allowed for non-finished decoding.
*/
#define Bcj2Dec_IsMaybeFinished_state_MAIN(_p_) ((_p_)->state == BCJ2_STREAM_MAIN)
/* if the stream decoding was finished correctly, then range decoder
part of CBcj2Dec also was finished, and then (CBcj2Dec::code == 0).
Note that (CBcj2Dec::code == 0) is allowed for non-finished decoding.
*/
#define Bcj2Dec_IsMaybeFinished_code(_p_) ((_p_)->code == 0)
/* use Bcj2Dec_IsMaybeFinished() only as additional check
after at least one mandatory check from the two following options:
- the check for size of processed output (ORIG) stream.
- the check for size of processed input (MAIN) stream.
*/
#define Bcj2Dec_IsMaybeFinished(_p_) ( \
Bcj2Dec_IsMaybeFinished_state_MAIN(_p_) && \
Bcj2Dec_IsMaybeFinished_code(_p_))
/* ---------- BCJ2 Encoder ---------- */
typedef enum typedef enum
{ {
BCJ2_ENC_FINISH_MODE_CONTINUE, BCJ2_ENC_FINISH_MODE_CONTINUE,
@ -91,6 +166,91 @@ typedef enum
BCJ2_ENC_FINISH_MODE_END_STREAM BCJ2_ENC_FINISH_MODE_END_STREAM
} EBcj2Enc_FinishMode; } EBcj2Enc_FinishMode;
/*
BCJ2_ENC_FINISH_MODE_CONTINUE:
process non finished encoding.
It notifies the encoder that additional further calls
can provide more input data (src) than provided by current call.
In that case the CBcj2Enc encoder still can move (src) pointer
up to (srcLim), but CBcj2Enc encoder can store some of the last
processed bytes (up to 4 bytes) from src to internal CBcj2Enc::temp[] buffer.
at return:
(CBcj2Enc::src will point to position that includes
processed data and data copied to (temp[]) buffer)
That data from (temp[]) buffer will be used in further calls.
BCJ2_ENC_FINISH_MODE_END_BLOCK:
finish encoding of current block (ended at srcLim) without RC flushing.
at return: if (CBcj2Enc::state == BCJ2_ENC_STATE_ORIG) &&
CBcj2Enc::src == CBcj2Enc::srcLim)
: it shows that block encoding was finished. And the encoder is
ready for new (src) data or for stream finish operation.
finished block means
{
CBcj2Enc has completed block encoding up to (srcLim).
(1 + 4 bytes) or (2 + 4 bytes) CALL/JUMP cortages will
not cross block boundary at (srcLim).
temporary CBcj2Enc buffer for (ORIG) src data is empty.
3 output uncompressed streams (MAIN, CALL, JUMP) were flushed.
RC stream was not flushed. And RC stream will cross block boundary.
}
Note: some possible implementation of BCJ2 encoder could
write branch marker (e8/e8/0f8x) in one call of Bcj2Enc_Encode(),
and it could calculate symbol for RC in another call of Bcj2Enc_Encode().
BCJ2 encoder uses ip/fileIp/fileSize/relatLimit values to calculate RC symbol.
And these CBcj2Enc variables can have different values in different Bcj2Enc_Encode() calls.
So caller must finish each block with BCJ2_ENC_FINISH_MODE_END_BLOCK
to ensure that RC symbol is calculated and written in proper block.
BCJ2_ENC_FINISH_MODE_END_STREAM
finish encoding of stream (ended at srcLim) fully including RC flushing.
at return: if (CBcj2Enc::state == BCJ2_ENC_STATE_FINISHED)
: it shows that stream encoding was finished fully,
and all output streams were flushed fully.
also Bcj2Enc_IsFinished() can be called.
*/
/*
32-bit relative offset in JUMP/CALL commands is
- (mod 4 GiB) for 32-bit x86 code
- signed Int32 for 64-bit x86-64 code
BCJ2 encoder also does internal relative to absolute address conversions.
And there are 2 possible ways to do it:
before v23: we used 32-bit variables and (mod 4 GiB) conversion
since v23: we use 64-bit variables and (signed Int32 offset) conversion.
The absolute address condition for conversion in v23:
((UInt64)((Int64)ip64 - (Int64)fileIp64 + 5 + (Int32)offset) < (UInt64)fileSize64)
note that if (fileSize64 > 2 GiB). there is difference between
old (mod 4 GiB) way (v22) and new (signed Int32 offset) way (v23).
And new (v23) way is more suitable to encode 64-bit x86-64 code for (fileSize64 > 2 GiB) cases.
*/
/*
// for old (v22) way for conversion:
typedef UInt32 CBcj2Enc_ip_unsigned;
typedef Int32 CBcj2Enc_ip_signed;
#define BCJ2_ENC_FileSize_MAX ((UInt32)1 << 31)
*/
typedef UInt64 CBcj2Enc_ip_unsigned;
typedef Int64 CBcj2Enc_ip_signed;
/* maximum size of file that can be used for conversion condition */
#define BCJ2_ENC_FileSize_MAX ((CBcj2Enc_ip_unsigned)0 - 2)
/* default value of fileSize64_minus1 variable that means
that absolute address limitation will not be used */
#define BCJ2_ENC_FileSizeField_UNLIMITED ((CBcj2Enc_ip_unsigned)0 - 1)
/* calculate value that later can be set to CBcj2Enc::fileSize64_minus1 */
#define BCJ2_ENC_GET_FileSizeField_VAL_FROM_FileSize(fileSize) \
((CBcj2Enc_ip_unsigned)(fileSize) - 1)
/* set CBcj2Enc::fileSize64_minus1 variable from size of file */
#define Bcj2Enc_SET_FileSize(p, fileSize) \
(p)->fileSize64_minus1 = BCJ2_ENC_GET_FileSizeField_VAL_FROM_FileSize(fileSize);
typedef struct typedef struct
{ {
Byte *bufs[BCJ2_NUM_STREAMS]; Byte *bufs[BCJ2_NUM_STREAMS];
@ -101,45 +261,71 @@ typedef struct
unsigned state; unsigned state;
EBcj2Enc_FinishMode finishMode; EBcj2Enc_FinishMode finishMode;
Byte prevByte; Byte context;
Byte flushRem;
Byte isFlushState;
Byte cache; Byte cache;
UInt32 range; UInt32 range;
UInt64 low; UInt64 low;
UInt64 cacheSize; UInt64 cacheSize;
// UInt32 context; // for marker version, it can include marker flag.
UInt32 ip; /* (ip64) and (fileIp64) correspond to virtual source stream position
that doesn't include data in temp[] */
/* 32-bit ralative offset in JUMP/CALL commands is CBcj2Enc_ip_unsigned ip64; /* current (ip) position */
- (mod 4 GB) in 32-bit mode CBcj2Enc_ip_unsigned fileIp64; /* start (ip) position of current file */
- signed Int32 in 64-bit mode CBcj2Enc_ip_unsigned fileSize64_minus1; /* size of current file (for conversion limitation) */
We use (mod 4 GB) check for fileSize. UInt32 relatLimit; /* (relatLimit <= ((UInt32)1 << 31)) : 0 means disable_conversion */
Use fileSize up to 2 GB, if you want to support 32-bit and 64-bit code conversion. */ // UInt32 relatExcludeBits;
UInt32 fileIp;
UInt32 fileSize; /* (fileSize <= ((UInt32)1 << 31)), 0 means no_limit */
UInt32 relatLimit; /* (relatLimit <= ((UInt32)1 << 31)), 0 means desable_conversion */
UInt32 tempTarget; UInt32 tempTarget;
unsigned tempPos; unsigned tempPos; /* the number of bytes that were copied to temp[] buffer
Byte temp[4 * 2]; (tempPos <= 4) outside of Bcj2Enc_Encode() */
// Byte temp[4]; // for marker version
unsigned flushPos; Byte temp[8];
CBcj2Prob probs[2 + 256];
UInt16 probs[2 + 256];
} CBcj2Enc; } CBcj2Enc;
void Bcj2Enc_Init(CBcj2Enc *p); void Bcj2Enc_Init(CBcj2Enc *p);
/*
Bcj2Enc_Encode(): at exit:
p->State < BCJ2_NUM_STREAMS : we need more buffer space for output stream
(bufs[p->State] == lims[p->State])
p->State == BCJ2_ENC_STATE_ORIG : we need more data in input src stream
(src == srcLim)
p->State == BCJ2_ENC_STATE_FINISHED : after fully encoded stream
*/
void Bcj2Enc_Encode(CBcj2Enc *p); void Bcj2Enc_Encode(CBcj2Enc *p);
#define Bcj2Enc_Get_InputData_Size(p) ((SizeT)((p)->srcLim - (p)->src) + (p)->tempPos) /* Bcj2Enc encoder can look ahead for up 4 bytes of source stream.
#define Bcj2Enc_IsFinished(p) ((p)->flushPos == 5) CBcj2Enc::tempPos : is the number of bytes that were copied from input stream to temp[] buffer.
(CBcj2Enc::src) after Bcj2Enc_Encode() is starting position after
fully processed data and after data copied to temp buffer.
So if the caller needs to get real number of fully processed input
bytes (without look ahead data in temp buffer),
the caller must subtruct (CBcj2Enc::tempPos) value from processed size
value that is calculated based on current (CBcj2Enc::src):
cur_processed_pos = Calc_Big_Processed_Pos(enc.src)) -
Bcj2Enc_Get_AvailInputSize_in_Temp(&enc);
*/
/* get the size of input data that was stored in temp[] buffer: */
#define Bcj2Enc_Get_AvailInputSize_in_Temp(p) ((p)->tempPos)
#define Bcj2Enc_IsFinished(p) ((p)->flushRem == 0)
#define BCJ2_RELAT_LIMIT_NUM_BITS 26 /* Note : the decoder supports overlapping of marker (0f 80).
#define BCJ2_RELAT_LIMIT ((UInt32)1 << BCJ2_RELAT_LIMIT_NUM_BITS) But we can eliminate such overlapping cases by setting
the limit for relative offset conversion as
/* limit for CBcj2Enc::fileSize variable */ CBcj2Enc::relatLimit <= (0x0f << 24) == (240 MiB)
#define BCJ2_FileSize_MAX ((UInt32)1 << 31) */
/* default value for CBcj2Enc::relatLimit */
#define BCJ2_ENC_RELAT_LIMIT_DEFAULT ((UInt32)0x0f << 24)
#define BCJ2_ENC_RELAT_LIMIT_MAX ((UInt32)1 << 31)
// #define BCJ2_RELAT_EXCLUDE_NUM_BITS 5
EXTERN_C_END EXTERN_C_END

View file

@ -1,60 +1,62 @@
/* Bcj2Enc.c -- BCJ2 Encoder (Converter for x86 code) /* Bcj2Enc.c -- BCJ2 Encoder converter for x86 code (Branch CALL/JUMP variant2)
2021-02-09 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
/* #define SHOW_STAT */ /* #define SHOW_STAT */
#ifdef SHOW_STAT #ifdef SHOW_STAT
#include <stdio.h> #include <stdio.h>
#define PRF(x) x #define PRF2(s) printf("%s ip=%8x tempPos=%d src= %8x\n", s, (unsigned)p->ip64, p->tempPos, (unsigned)(p->srcLim - p->src));
#else #else
#define PRF(x) #define PRF2(s)
#endif #endif
#include <string.h>
#include "Bcj2.h" #include "Bcj2.h"
#include "CpuArch.h" #include "CpuArch.h"
#define CProb UInt16
#define kTopValue ((UInt32)1 << 24) #define kTopValue ((UInt32)1 << 24)
#define kNumModelBits 11 #define kNumBitModelTotalBits 11
#define kBitModelTotal (1 << kNumModelBits) #define kBitModelTotal (1 << kNumBitModelTotalBits)
#define kNumMoveBits 5 #define kNumMoveBits 5
void Bcj2Enc_Init(CBcj2Enc *p) void Bcj2Enc_Init(CBcj2Enc *p)
{ {
unsigned i; unsigned i;
p->state = BCJ2_ENC_STATE_ORIG;
p->state = BCJ2_ENC_STATE_OK;
p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE; p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE;
p->context = 0;
p->prevByte = 0; p->flushRem = 5;
p->isFlushState = 0;
p->cache = 0; p->cache = 0;
p->range = 0xFFFFFFFF; p->range = 0xffffffff;
p->low = 0; p->low = 0;
p->cacheSize = 1; p->cacheSize = 1;
p->ip64 = 0;
p->ip = 0; p->fileIp64 = 0;
p->fileSize64_minus1 = BCJ2_ENC_FileSizeField_UNLIMITED;
p->fileIp = 0; p->relatLimit = BCJ2_ENC_RELAT_LIMIT_DEFAULT;
p->fileSize = 0; // p->relatExcludeBits = 0;
p->relatLimit = BCJ2_RELAT_LIMIT;
p->tempPos = 0; p->tempPos = 0;
p->flushPos = 0;
for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++) for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++)
p->probs[i] = kBitModelTotal >> 1; p->probs[i] = kBitModelTotal >> 1;
} }
static BoolInt MY_FAST_CALL RangeEnc_ShiftLow(CBcj2Enc *p) // Z7_NO_INLINE
Z7_FORCE_INLINE
static BoolInt Bcj2_RangeEnc_ShiftLow(CBcj2Enc *p)
{ {
if ((UInt32)p->low < (UInt32)0xFF000000 || (UInt32)(p->low >> 32) != 0) const UInt32 low = (UInt32)p->low;
const unsigned high = (unsigned)
#if defined(Z7_MSC_VER_ORIGINAL) \
&& defined(MY_CPU_X86) \
&& defined(MY_CPU_LE) \
&& !defined(MY_CPU_64BIT)
// we try to rid of __aullshr() call in MSVS-x86
(((const UInt32 *)&p->low)[1]); // [1] : for little-endian only
#else
(p->low >> 32);
#endif
if (low < (UInt32)0xff000000 || high != 0)
{ {
Byte *buf = p->bufs[BCJ2_STREAM_RC]; Byte *buf = p->bufs[BCJ2_STREAM_RC];
do do
@ -65,247 +67,440 @@ static BoolInt MY_FAST_CALL RangeEnc_ShiftLow(CBcj2Enc *p)
p->bufs[BCJ2_STREAM_RC] = buf; p->bufs[BCJ2_STREAM_RC] = buf;
return True; return True;
} }
*buf++ = (Byte)(p->cache + (Byte)(p->low >> 32)); *buf++ = (Byte)(p->cache + high);
p->cache = 0xFF; p->cache = 0xff;
} }
while (--p->cacheSize); while (--p->cacheSize);
p->bufs[BCJ2_STREAM_RC] = buf; p->bufs[BCJ2_STREAM_RC] = buf;
p->cache = (Byte)((UInt32)p->low >> 24); p->cache = (Byte)(low >> 24);
} }
p->cacheSize++; p->cacheSize++;
p->low = (UInt32)p->low << 8; p->low = low << 8;
return False; return False;
} }
/*
We can use 2 alternative versions of code:
1) non-marker version:
Byte CBcj2Enc::context
Byte temp[8];
Last byte of marker (e8/e9/[0f]8x) can be written to temp[] buffer.
Encoder writes last byte of marker (e8/e9/[0f]8x) to dest, only in conjunction
with writing branch symbol to range coder in same Bcj2Enc_Encode_2() call.
2) marker version:
UInt32 CBcj2Enc::context
Byte CBcj2Enc::temp[4];
MARKER_FLAG in CBcj2Enc::context shows that CBcj2Enc::context contains finded marker.
it's allowed that
one call of Bcj2Enc_Encode_2() writes last byte of marker (e8/e9/[0f]8x) to dest,
and another call of Bcj2Enc_Encode_2() does offset conversion.
So different values of (fileIp) and (fileSize) are possible
in these different Bcj2Enc_Encode_2() calls.
Also marker version requires additional if((v & MARKER_FLAG) == 0) check in main loop.
So we use non-marker version.
*/
/*
Corner cases with overlap in multi-block.
before v23: there was one corner case, where converted instruction
could start in one sub-stream and finish in next sub-stream.
If multi-block (solid) encoding is used,
and BCJ2_ENC_FINISH_MODE_END_BLOCK is used for each sub-stream.
and (0f) is last byte of previous sub-stream
and (8x) is first byte of current sub-stream
then (0f 8x) pair is treated as marker by BCJ2 encoder and decoder.
BCJ2 encoder can converts 32-bit offset for that (0f 8x) cortage,
if that offset meets limit requirements.
If encoder allows 32-bit offset conversion for such overlap case,
then the data in 3 uncompressed BCJ2 streams for some sub-stream
can depend from data of previous sub-stream.
That corner case is not big problem, and it's rare case.
Since v23.00 we do additional check to prevent conversions in such overlap cases.
*/
/*
Bcj2Enc_Encode_2() output variables at exit:
{
if (Bcj2Enc_Encode_2() exits with (p->state == BCJ2_ENC_STATE_ORIG))
{
it means that encoder needs more input data.
if (p->srcLim == p->src) at exit, then
{
(p->finishMode != BCJ2_ENC_FINISH_MODE_END_STREAM)
all input data were read and processed, and we are ready for
new input data.
}
else
{
(p->srcLim != p->src)
(p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE)
The encoder have found e8/e9/0f_8x marker,
and p->src points to last byte of that marker,
Bcj2Enc_Encode_2() needs more input data to get totally
5 bytes (last byte of marker and 32-bit branch offset)
as continuous array starting from p->src.
(p->srcLim - p->src < 5) requirement is met after exit.
So non-processed resedue from p->src to p->srcLim is always less than 5 bytes.
}
}
}
*/
Z7_NO_INLINE
static void Bcj2Enc_Encode_2(CBcj2Enc *p) static void Bcj2Enc_Encode_2(CBcj2Enc *p)
{ {
if (BCJ2_IS_32BIT_STREAM(p->state)) if (!p->isFlushState)
{ {
Byte *cur = p->bufs[p->state]; const Byte *src;
if (cur == p->lims[p->state]) UInt32 v;
return;
SetBe32(cur, p->tempTarget);
p->bufs[p->state] = cur + 4;
}
p->state = BCJ2_ENC_STATE_ORIG;
for (;;)
{
if (p->range < kTopValue)
{
if (RangeEnc_ShiftLow(p))
return;
p->range <<= 8;
}
{ {
const unsigned state = p->state;
if (BCJ2_IS_32BIT_STREAM(state))
{
Byte *cur = p->bufs[state];
if (cur == p->lims[state])
return;
SetBe32a(cur, p->tempTarget)
p->bufs[state] = cur + 4;
}
}
p->state = BCJ2_ENC_STATE_ORIG; // for main reason of exit
src = p->src;
v = p->context;
// #define WRITE_CONTEXT p->context = v; // for marker version
#define WRITE_CONTEXT p->context = (Byte)v;
#define WRITE_CONTEXT_AND_SRC p->src = src; WRITE_CONTEXT
for (;;)
{
// const Byte *src;
// UInt32 v;
CBcj2Enc_ip_unsigned ip;
if (p->range < kTopValue)
{
// to reduce register pressure and code size: we save and restore local variables.
WRITE_CONTEXT_AND_SRC
if (Bcj2_RangeEnc_ShiftLow(p))
return;
p->range <<= 8;
src = p->src;
v = p->context;
}
// src = p->src;
// #define MARKER_FLAG ((UInt32)1 << 17)
// if ((v & MARKER_FLAG) == 0) // for marker version
{ {
const Byte *src = p->src;
const Byte *srcLim; const Byte *srcLim;
Byte *dest; Byte *dest = p->bufs[BCJ2_STREAM_MAIN];
SizeT num = (SizeT)(p->srcLim - src);
if (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE)
{ {
if (num <= 4) const SizeT remSrc = (SizeT)(p->srcLim - src);
return; SizeT rem = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest);
num -= 4; if (rem >= remSrc)
rem = remSrc;
srcLim = src + rem;
} }
else if (num == 0) /* p->context contains context of previous byte:
break; bits [0 : 7] : src[-1], if (src) was changed in this call
bits [8 : 31] : are undefined for non-marker version
*/
// v = p->context;
#define NUM_SHIFT_BITS 24
#define CONV_FLAG ((UInt32)1 << 16)
#define ONE_ITER { \
b = src[0]; \
*dest++ = (Byte)b; \
v = (v << NUM_SHIFT_BITS) | b; \
if (((b + (0x100 - 0xe8)) & 0xfe) == 0) break; \
if (((v - (((UInt32)0x0f << (NUM_SHIFT_BITS)) + 0x80)) & \
((((UInt32)1 << (4 + NUM_SHIFT_BITS)) - 0x1) << 4)) == 0) break; \
src++; if (src == srcLim) { break; } }
dest = p->bufs[BCJ2_STREAM_MAIN]; if (src != srcLim)
if (num > (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest)) for (;;)
{ {
num = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest); /* clang can generate ineffective code with setne instead of two jcc instructions.
if (num == 0) we can use 2 iterations and external (unsigned b) to avoid that ineffective code genaration. */
unsigned b;
ONE_ITER
ONE_ITER
}
ip = p->ip64 + (CBcj2Enc_ip_unsigned)(SizeT)(dest - p->bufs[BCJ2_STREAM_MAIN]);
p->bufs[BCJ2_STREAM_MAIN] = dest;
p->ip64 = ip;
if (src == srcLim)
{
WRITE_CONTEXT_AND_SRC
if (src != p->srcLim)
{ {
p->state = BCJ2_STREAM_MAIN; p->state = BCJ2_STREAM_MAIN;
return; return;
} }
} /* (p->src == p->srcLim)
(p->state == BCJ2_ENC_STATE_ORIG) */
srcLim = src + num; if (p->finishMode != BCJ2_ENC_FINISH_MODE_END_STREAM)
return;
if (p->prevByte == 0x0F && (src[0] & 0xF0) == 0x80) /* (p->finishMode == BCJ2_ENC_FINISH_MODE_END_STREAM */
*dest = src[0]; // (p->flushRem == 5);
else for (;;) p->isFlushState = 1;
{
Byte b = *src;
*dest = b;
if (b != 0x0F)
{
if ((b & 0xFE) == 0xE8)
break;
dest++;
if (++src != srcLim)
continue;
break;
}
dest++;
if (++src == srcLim)
break;
if ((*src & 0xF0) != 0x80)
continue;
*dest = *src;
break; break;
} }
src++;
num = (SizeT)(src - p->src); // p->src = src;
}
if (src == srcLim) // ip = p->ip; // for marker version
/* marker was found */
/* (v) contains marker that was found:
bits [NUM_SHIFT_BITS : NUM_SHIFT_BITS + 7]
: value of src[-2] : xx/xx/0f
bits [0 : 7] : value of src[-1] : e8/e9/8x
*/
{
{ {
p->prevByte = src[-1]; #if NUM_SHIFT_BITS != 24
p->bufs[BCJ2_STREAM_MAIN] = dest; v &= ~(UInt32)CONV_FLAG;
p->src = src; #endif
p->ip += (UInt32)num; // UInt32 relat = 0;
continue;
}
{
Byte context = (Byte)(num == 0 ? p->prevByte : src[-1]);
BoolInt needConvert;
p->bufs[BCJ2_STREAM_MAIN] = dest + 1;
p->ip += (UInt32)num + 1;
src++;
needConvert = False;
if ((SizeT)(p->srcLim - src) >= 4) if ((SizeT)(p->srcLim - src) >= 4)
{ {
UInt32 relatVal = GetUi32(src); /*
if ((p->fileSize == 0 || (UInt32)(p->ip + 4 + relatVal - p->fileIp) < p->fileSize) if (relat != 0 || (Byte)v != 0xe8)
&& ((relatVal + p->relatLimit) >> 1) < p->relatLimit) BoolInt isBigOffset = True;
needConvert = True; */
} const UInt32 relat = GetUi32(src);
/*
{ #define EXCLUDE_FLAG ((UInt32)1 << 4)
UInt32 bound; #define NEED_CONVERT(rel) ((((rel) + EXCLUDE_FLAG) & (0 - EXCLUDE_FLAG * 2)) != 0)
unsigned ttt; if (p->relatExcludeBits != 0)
Byte b = src[-1];
CProb *prob = p->probs + (unsigned)(b == 0xE8 ? 2 + (unsigned)context : (b == 0xE9 ? 1 : 0));
ttt = *prob;
bound = (p->range >> kNumModelBits) * ttt;
if (!needConvert)
{ {
const UInt32 flag = (UInt32)1 << (p->relatExcludeBits - 1);
isBigOffset = (((relat + flag) & (0 - flag * 2)) != 0);
}
// isBigOffset = False; // for debug
*/
ip -= p->fileIp64;
// Use the following if check, if (ip) is 64-bit:
if (ip > (((v + 0x20) >> 5) & 1)) // 23.00 : we eliminate milti-block overlap for (Of 80) and (e8/e9)
if ((CBcj2Enc_ip_unsigned)((CBcj2Enc_ip_signed)ip + 4 + (Int32)relat) <= p->fileSize64_minus1)
if (((UInt32)(relat + p->relatLimit) >> 1) < p->relatLimit)
v |= CONV_FLAG;
}
else if (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE)
{
// (p->srcLim - src < 4)
// /*
// for non-marker version
p->ip64--; // p->ip = ip - 1;
p->bufs[BCJ2_STREAM_MAIN]--;
src--;
v >>= NUM_SHIFT_BITS;
// (0 < p->srcLim - p->src <= 4)
// */
// v |= MARKER_FLAG; // for marker version
/* (p->state == BCJ2_ENC_STATE_ORIG) */
WRITE_CONTEXT_AND_SRC
return;
}
{
const unsigned c = ((v + 0x17) >> 6) & 1;
CBcj2Prob *prob = p->probs + (unsigned)
(((0 - c) & (Byte)(v >> NUM_SHIFT_BITS)) + c + ((v >> 5) & 1));
/*
((Byte)v == 0xe8 ? 2 + ((Byte)(v >> 8)) :
((Byte)v < 0xe8 ? 0 : 1)); // ((v >> 5) & 1));
*/
const unsigned ttt = *prob;
const UInt32 bound = (p->range >> kNumBitModelTotalBits) * ttt;
if ((v & CONV_FLAG) == 0)
{
// static int yyy = 0; yyy++; printf("\n!needConvert = %d\n", yyy);
// v = (Byte)v; // for marker version
p->range = bound; p->range = bound;
*prob = (CProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); *prob = (CBcj2Prob)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
p->src = src; // WRITE_CONTEXT_AND_SRC
p->prevByte = b;
continue; continue;
} }
p->low += bound; p->low += bound;
p->range -= bound; p->range -= bound;
*prob = (CProb)(ttt - (ttt >> kNumMoveBits)); *prob = (CBcj2Prob)(ttt - (ttt >> kNumMoveBits));
}
// p->context = src[3];
{
// const unsigned cj = ((Byte)v == 0xe8 ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP);
const unsigned cj = (((v + 0x57) >> 6) & 1) + BCJ2_STREAM_CALL;
ip = p->ip64;
v = GetUi32(src); // relat
ip += 4;
p->ip64 = ip;
src += 4;
// p->src = src;
{ {
UInt32 relatVal = GetUi32(src); const UInt32 absol = (UInt32)ip + v;
UInt32 absVal; Byte *cur = p->bufs[cj];
p->ip += 4; v >>= 24;
absVal = p->ip + relatVal; // WRITE_CONTEXT
p->prevByte = src[3]; if (cur == p->lims[cj])
src += 4;
p->src = src;
{ {
unsigned cj = (b == 0xE8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP; p->state = cj;
Byte *cur = p->bufs[cj]; p->tempTarget = absol;
if (cur == p->lims[cj]) WRITE_CONTEXT_AND_SRC
{ return;
p->state = cj;
p->tempTarget = absVal;
return;
}
SetBe32(cur, absVal);
p->bufs[cj] = cur + 4;
} }
SetBe32a(cur, absol)
p->bufs[cj] = cur + 4;
} }
} }
} }
} }
} } // end of loop
} }
if (p->finishMode != BCJ2_ENC_FINISH_MODE_END_STREAM) for (; p->flushRem != 0; p->flushRem--)
return; if (Bcj2_RangeEnc_ShiftLow(p))
for (; p->flushPos < 5; p->flushPos++)
if (RangeEnc_ShiftLow(p))
return; return;
p->state = BCJ2_ENC_STATE_OK; p->state = BCJ2_ENC_STATE_FINISHED;
} }
/*
BCJ2 encoder needs look ahead for up to 4 bytes in (src) buffer.
So base function Bcj2Enc_Encode_2()
in BCJ2_ENC_FINISH_MODE_CONTINUE mode can return with
(p->state == BCJ2_ENC_STATE_ORIG && p->src < p->srcLim)
Bcj2Enc_Encode() solves that look ahead problem by using p->temp[] buffer.
so if (p->state == BCJ2_ENC_STATE_ORIG) after Bcj2Enc_Encode(),
then (p->src == p->srcLim).
And the caller's code is simpler with Bcj2Enc_Encode().
*/
Z7_NO_INLINE
void Bcj2Enc_Encode(CBcj2Enc *p) void Bcj2Enc_Encode(CBcj2Enc *p)
{ {
PRF(printf("\n")); PRF2("\n----")
PRF(printf("---- ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src));
if (p->tempPos != 0) if (p->tempPos != 0)
{ {
/* extra: number of bytes that were copied from (src) to (temp) buffer in this call */
unsigned extra = 0; unsigned extra = 0;
/* We will touch only minimal required number of bytes in input (src) stream.
So we will add input bytes from (src) stream to temp[] with step of 1 byte.
We don't add new bytes to temp[] before Bcj2Enc_Encode_2() call
in first loop iteration because
- previous call of Bcj2Enc_Encode() could use another (finishMode),
- previous call could finish with (p->state != BCJ2_ENC_STATE_ORIG).
the case with full temp[] buffer (p->tempPos == 4) is possible here.
*/
for (;;) for (;;)
{ {
// (0 < p->tempPos <= 5) // in non-marker version
/* p->src : the current src data position including extra bytes
that were copied to temp[] buffer in this call */
const Byte *src = p->src; const Byte *src = p->src;
const Byte *srcLim = p->srcLim; const Byte *srcLim = p->srcLim;
EBcj2Enc_FinishMode finishMode = p->finishMode; const EBcj2Enc_FinishMode finishMode = p->finishMode;
if (src != srcLim)
{
/* if there are some src data after the data copied to temp[],
then we use MODE_CONTINUE for temp data */
p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE;
}
p->src = p->temp; p->src = p->temp;
p->srcLim = p->temp + p->tempPos; p->srcLim = p->temp + p->tempPos;
if (src != srcLim) PRF2(" ")
p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE;
PRF(printf(" ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src));
Bcj2Enc_Encode_2(p); Bcj2Enc_Encode_2(p);
{ {
unsigned num = (unsigned)(p->src - p->temp); const unsigned num = (unsigned)(p->src - p->temp);
unsigned tempPos = p->tempPos - num; const unsigned tempPos = p->tempPos - num;
unsigned i; unsigned i;
p->tempPos = tempPos; p->tempPos = tempPos;
for (i = 0; i < tempPos; i++) for (i = 0; i < tempPos; i++)
p->temp[i] = p->temp[(size_t)i + num]; p->temp[i] = p->temp[(SizeT)i + num];
// tempPos : number of bytes in temp buffer
p->src = src; p->src = src;
p->srcLim = srcLim; p->srcLim = srcLim;
p->finishMode = finishMode; p->finishMode = finishMode;
if (p->state != BCJ2_ENC_STATE_ORIG)
if (p->state != BCJ2_ENC_STATE_ORIG || src == srcLim) {
// (p->tempPos <= 4) // in non-marker version
/* if (the reason of exit from Bcj2Enc_Encode_2()
is not BCJ2_ENC_STATE_ORIG),
then we exit from Bcj2Enc_Encode() with same reason */
// optional code begin : we rollback (src) and tempPos, if it's possible:
if (extra >= tempPos)
extra = tempPos;
p->src = src - extra;
p->tempPos = tempPos - extra;
// optional code end : rollback of (src) and tempPos
return; return;
}
/* (p->tempPos <= 4)
(p->state == BCJ2_ENC_STATE_ORIG)
so encoder needs more data than in temp[] */
if (src == srcLim)
return; // src buffer has no more input data.
/* (src != srcLim)
so we can provide more input data from src for Bcj2Enc_Encode_2() */
if (extra >= tempPos) if (extra >= tempPos)
{ {
p->src = src - tempPos; /* (extra >= tempPos) means that temp buffer contains
only data from src buffer of this call.
So now we can encode without temp buffer */
p->src = src - tempPos; // rollback (src)
p->tempPos = 0; p->tempPos = 0;
break; break;
} }
// we append one additional extra byte from (src) to temp[] buffer:
p->temp[tempPos] = src[0]; p->temp[tempPos] = *src;
p->tempPos = tempPos + 1; p->tempPos = tempPos + 1;
// (0 < p->tempPos <= 5) // in non-marker version
p->src = src + 1; p->src = src + 1;
extra++; extra++;
} }
} }
} }
PRF(printf("++++ ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src)); PRF2("++++")
// (p->tempPos == 0)
Bcj2Enc_Encode_2(p); Bcj2Enc_Encode_2(p);
PRF2("====")
if (p->state == BCJ2_ENC_STATE_ORIG) if (p->state == BCJ2_ENC_STATE_ORIG)
{ {
const Byte *src = p->src; const Byte *src = p->src;
unsigned rem = (unsigned)(p->srcLim - src); const Byte *srcLim = p->srcLim;
unsigned i; const unsigned rem = (unsigned)(srcLim - src);
for (i = 0; i < rem; i++) /* (rem <= 4) here.
p->temp[i] = src[i]; if (p->src != p->srcLim), then
p->tempPos = rem; - we copy non-processed bytes from (p->src) to temp[] buffer,
p->src = src + rem; - we set p->src equal to p->srcLim.
*/
if (rem)
{
unsigned i = 0;
p->src = srcLim;
p->tempPos = rem;
// (0 < p->tempPos <= 4)
do
p->temp[i] = src[i];
while (++i != rem);
}
// (p->tempPos <= 4)
// (p->src == p->srcLim)
} }
} }
#undef PRF2
#undef CONV_FLAG
#undef MARKER_FLAG
#undef WRITE_CONTEXT
#undef WRITE_CONTEXT_AND_SRC
#undef ONE_ITER
#undef NUM_SHIFT_BITS
#undef kTopValue
#undef kNumBitModelTotalBits
#undef kBitModelTotal
#undef kNumMoveBits

48
3rdparty/7z/src/Blake2.h vendored Normal file
View file

@ -0,0 +1,48 @@
/* Blake2.h -- BLAKE2 Hash
2023-03-04 : Igor Pavlov : Public domain
2015 : Samuel Neves : Public domain */
#ifndef ZIP7_INC_BLAKE2_H
#define ZIP7_INC_BLAKE2_H
#include "7zTypes.h"
EXTERN_C_BEGIN
#define BLAKE2S_BLOCK_SIZE 64
#define BLAKE2S_DIGEST_SIZE 32
#define BLAKE2SP_PARALLEL_DEGREE 8
typedef struct
{
UInt32 h[8];
UInt32 t[2];
UInt32 f[2];
Byte buf[BLAKE2S_BLOCK_SIZE];
UInt32 bufPos;
UInt32 lastNode_f1;
UInt32 dummy[2]; /* for sizeof(CBlake2s) alignment */
} CBlake2s;
/* You need to xor CBlake2s::h[i] with input parameter block after Blake2s_Init0() */
/*
void Blake2s_Init0(CBlake2s *p);
void Blake2s_Update(CBlake2s *p, const Byte *data, size_t size);
void Blake2s_Final(CBlake2s *p, Byte *digest);
*/
typedef struct
{
CBlake2s S[BLAKE2SP_PARALLEL_DEGREE];
unsigned bufPos;
} CBlake2sp;
void Blake2sp_Init(CBlake2sp *p);
void Blake2sp_Update(CBlake2sp *p, const Byte *data, size_t size);
void Blake2sp_Final(CBlake2sp *p, Byte *digest);
EXTERN_C_END
#endif

250
3rdparty/7z/src/Blake2s.c vendored Normal file
View file

@ -0,0 +1,250 @@
/* Blake2s.c -- BLAKE2s and BLAKE2sp Hash
2023-03-04 : Igor Pavlov : Public domain
2015 : Samuel Neves : Public domain */
#include "Precomp.h"
#include <string.h>
#include "Blake2.h"
#include "CpuArch.h"
#include "RotateDefs.h"
#define rotr32 rotrFixed
#define BLAKE2S_NUM_ROUNDS 10
#define BLAKE2S_FINAL_FLAG (~(UInt32)0)
static const UInt32 k_Blake2s_IV[8] =
{
0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
};
static const Byte k_Blake2s_Sigma[BLAKE2S_NUM_ROUNDS][16] =
{
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
};
static void Blake2s_Init0(CBlake2s *p)
{
unsigned i;
for (i = 0; i < 8; i++)
p->h[i] = k_Blake2s_IV[i];
p->t[0] = 0;
p->t[1] = 0;
p->f[0] = 0;
p->f[1] = 0;
p->bufPos = 0;
p->lastNode_f1 = 0;
}
static void Blake2s_Compress(CBlake2s *p)
{
UInt32 m[16];
UInt32 v[16];
{
unsigned i;
for (i = 0; i < 16; i++)
m[i] = GetUi32(p->buf + i * sizeof(m[i]));
for (i = 0; i < 8; i++)
v[i] = p->h[i];
}
v[ 8] = k_Blake2s_IV[0];
v[ 9] = k_Blake2s_IV[1];
v[10] = k_Blake2s_IV[2];
v[11] = k_Blake2s_IV[3];
v[12] = p->t[0] ^ k_Blake2s_IV[4];
v[13] = p->t[1] ^ k_Blake2s_IV[5];
v[14] = p->f[0] ^ k_Blake2s_IV[6];
v[15] = p->f[1] ^ k_Blake2s_IV[7];
#define G(r,i,a,b,c,d) \
a += b + m[sigma[2*i+0]]; d ^= a; d = rotr32(d, 16); c += d; b ^= c; b = rotr32(b, 12); \
a += b + m[sigma[2*i+1]]; d ^= a; d = rotr32(d, 8); c += d; b ^= c; b = rotr32(b, 7); \
#define R(r) \
G(r,0,v[ 0],v[ 4],v[ 8],v[12]) \
G(r,1,v[ 1],v[ 5],v[ 9],v[13]) \
G(r,2,v[ 2],v[ 6],v[10],v[14]) \
G(r,3,v[ 3],v[ 7],v[11],v[15]) \
G(r,4,v[ 0],v[ 5],v[10],v[15]) \
G(r,5,v[ 1],v[ 6],v[11],v[12]) \
G(r,6,v[ 2],v[ 7],v[ 8],v[13]) \
G(r,7,v[ 3],v[ 4],v[ 9],v[14]) \
{
unsigned r;
for (r = 0; r < BLAKE2S_NUM_ROUNDS; r++)
{
const Byte *sigma = k_Blake2s_Sigma[r];
R(r)
}
/* R(0); R(1); R(2); R(3); R(4); R(5); R(6); R(7); R(8); R(9); */
}
#undef G
#undef R
{
unsigned i;
for (i = 0; i < 8; i++)
p->h[i] ^= v[i] ^ v[i + 8];
}
}
#define Blake2s_Increment_Counter(S, inc) \
{ p->t[0] += (inc); p->t[1] += (p->t[0] < (inc)); }
#define Blake2s_Set_LastBlock(p) \
{ p->f[0] = BLAKE2S_FINAL_FLAG; p->f[1] = p->lastNode_f1; }
static void Blake2s_Update(CBlake2s *p, const Byte *data, size_t size)
{
while (size != 0)
{
unsigned pos = (unsigned)p->bufPos;
unsigned rem = BLAKE2S_BLOCK_SIZE - pos;
if (size <= rem)
{
memcpy(p->buf + pos, data, size);
p->bufPos += (UInt32)size;
return;
}
memcpy(p->buf + pos, data, rem);
Blake2s_Increment_Counter(S, BLAKE2S_BLOCK_SIZE)
Blake2s_Compress(p);
p->bufPos = 0;
data += rem;
size -= rem;
}
}
static void Blake2s_Final(CBlake2s *p, Byte *digest)
{
unsigned i;
Blake2s_Increment_Counter(S, (UInt32)p->bufPos)
Blake2s_Set_LastBlock(p)
memset(p->buf + p->bufPos, 0, BLAKE2S_BLOCK_SIZE - p->bufPos);
Blake2s_Compress(p);
for (i = 0; i < 8; i++)
{
SetUi32(digest + sizeof(p->h[i]) * i, p->h[i])
}
}
/* ---------- BLAKE2s ---------- */
/* we need to xor CBlake2s::h[i] with input parameter block after Blake2s_Init0() */
/*
typedef struct
{
Byte digest_length;
Byte key_length;
Byte fanout;
Byte depth;
UInt32 leaf_length;
Byte node_offset[6];
Byte node_depth;
Byte inner_length;
Byte salt[BLAKE2S_SALTBYTES];
Byte personal[BLAKE2S_PERSONALBYTES];
} CBlake2sParam;
*/
static void Blake2sp_Init_Spec(CBlake2s *p, unsigned node_offset, unsigned node_depth)
{
Blake2s_Init0(p);
p->h[0] ^= (BLAKE2S_DIGEST_SIZE | ((UInt32)BLAKE2SP_PARALLEL_DEGREE << 16) | ((UInt32)2 << 24));
p->h[2] ^= ((UInt32)node_offset);
p->h[3] ^= ((UInt32)node_depth << 16) | ((UInt32)BLAKE2S_DIGEST_SIZE << 24);
/*
P->digest_length = BLAKE2S_DIGEST_SIZE;
P->key_length = 0;
P->fanout = BLAKE2SP_PARALLEL_DEGREE;
P->depth = 2;
P->leaf_length = 0;
store48(P->node_offset, node_offset);
P->node_depth = node_depth;
P->inner_length = BLAKE2S_DIGEST_SIZE;
*/
}
void Blake2sp_Init(CBlake2sp *p)
{
unsigned i;
p->bufPos = 0;
for (i = 0; i < BLAKE2SP_PARALLEL_DEGREE; i++)
Blake2sp_Init_Spec(&p->S[i], i, 0);
p->S[BLAKE2SP_PARALLEL_DEGREE - 1].lastNode_f1 = BLAKE2S_FINAL_FLAG;
}
void Blake2sp_Update(CBlake2sp *p, const Byte *data, size_t size)
{
unsigned pos = p->bufPos;
while (size != 0)
{
unsigned index = pos / BLAKE2S_BLOCK_SIZE;
unsigned rem = BLAKE2S_BLOCK_SIZE - (pos & (BLAKE2S_BLOCK_SIZE - 1));
if (rem > size)
rem = (unsigned)size;
Blake2s_Update(&p->S[index], data, rem);
size -= rem;
data += rem;
pos += rem;
pos &= (BLAKE2S_BLOCK_SIZE * BLAKE2SP_PARALLEL_DEGREE - 1);
}
p->bufPos = pos;
}
void Blake2sp_Final(CBlake2sp *p, Byte *digest)
{
CBlake2s R;
unsigned i;
Blake2sp_Init_Spec(&R, 0, 1);
R.lastNode_f1 = BLAKE2S_FINAL_FLAG;
for (i = 0; i < BLAKE2SP_PARALLEL_DEGREE; i++)
{
Byte hash[BLAKE2S_DIGEST_SIZE];
Blake2s_Final(&p->S[i], hash);
Blake2s_Update(&R, hash, BLAKE2S_DIGEST_SIZE);
}
Blake2s_Final(&R, digest);
}
#undef rotr32

528
3rdparty/7z/src/Bra.c vendored
View file

@ -1,230 +1,420 @@
/* Bra.c -- Converters for RISC code /* Bra.c -- Branch converters for RISC code
2021-02-09 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
#include "CpuArch.h"
#include "Bra.h" #include "Bra.h"
#include "CpuArch.h"
#include "RotateDefs.h"
SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) #if defined(MY_CPU_SIZEOF_POINTER) \
&& ( MY_CPU_SIZEOF_POINTER == 4 \
|| MY_CPU_SIZEOF_POINTER == 8)
#define BR_CONV_USE_OPT_PC_PTR
#endif
#ifdef BR_CONV_USE_OPT_PC_PTR
#define BR_PC_INIT pc -= (UInt32)(SizeT)p;
#define BR_PC_GET (pc + (UInt32)(SizeT)p)
#else
#define BR_PC_INIT pc += (UInt32)size;
#define BR_PC_GET (pc - (UInt32)(SizeT)(lim - p))
// #define BR_PC_INIT
// #define BR_PC_GET (pc + (UInt32)(SizeT)(p - data))
#endif
#define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c;
// #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c;
#define Z7_BRANCH_CONV(name) z7_BranchConv_ ## name
#define Z7_BRANCH_FUNC_MAIN(name) \
static \
Z7_FORCE_INLINE \
Z7_ATTRIB_NO_VECTOR \
Byte *Z7_BRANCH_CONV(name)(Byte *p, SizeT size, UInt32 pc, int encoding)
#define Z7_BRANCH_FUNC_IMP(name, m, encoding) \
Z7_NO_INLINE \
Z7_ATTRIB_NO_VECTOR \
Byte *m(name)(Byte *data, SizeT size, UInt32 pc) \
{ return Z7_BRANCH_CONV(name)(data, size, pc, encoding); } \
#ifdef Z7_EXTRACT_ONLY
#define Z7_BRANCH_FUNCS_IMP(name) \
Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC, 0)
#else
#define Z7_BRANCH_FUNCS_IMP(name) \
Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC, 0) \
Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_ENC, 1)
#endif
#if defined(__clang__)
#define BR_EXTERNAL_FOR
#define BR_NEXT_ITERATION continue;
#else
#define BR_EXTERNAL_FOR for (;;)
#define BR_NEXT_ITERATION break;
#endif
#if defined(__clang__) && (__clang_major__ >= 8) \
|| defined(__GNUC__) && (__GNUC__ >= 1000) \
// GCC is not good for __builtin_expect() here
/* || defined(_MSC_VER) && (_MSC_VER >= 1920) */
// #define Z7_unlikely [[unlikely]]
// #define Z7_LIKELY(x) (__builtin_expect((x), 1))
#define Z7_UNLIKELY(x) (__builtin_expect((x), 0))
// #define Z7_likely [[likely]]
#else
// #define Z7_LIKELY(x) (x)
#define Z7_UNLIKELY(x) (x)
// #define Z7_likely
#endif
Z7_BRANCH_FUNC_MAIN(ARM64)
{ {
Byte *p; // Byte *p = data;
const Byte *lim; const Byte *lim;
size &= ~(size_t)3; const UInt32 flag = (UInt32)1 << (24 - 4);
ip += 4; const UInt32 mask = ((UInt32)1 << 24) - (flag << 1);
p = data; size &= ~(SizeT)3;
lim = data + size; // if (size == 0) return p;
lim = p + size;
if (encoding) BR_PC_INIT
pc -= 4; // because (p) will point to next instruction
for (;;)
BR_EXTERNAL_FOR
{ {
// Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
for (;;) for (;;)
{ {
if (p >= lim) UInt32 v;
return (SizeT)(p - data); if Z7_UNLIKELY(p == lim)
return p;
v = GetUi32a(p);
p += 4; p += 4;
if (p[-1] == 0xEB) if Z7_UNLIKELY(((v - 0x94000000) & 0xfc000000) == 0)
break; {
} UInt32 c = BR_PC_GET >> 2;
{ BR_CONVERT_VAL(v, c)
UInt32 v = GetUi32(p - 4); v &= 0x03ffffff;
v <<= 2; v |= 0x94000000;
v += ip + (UInt32)(p - data); SetUi32a(p - 4, v)
v >>= 2; BR_NEXT_ITERATION
v &= 0x00FFFFFF; }
v |= 0xEB000000; // v = rotlFixed(v, 8); v += (flag << 8) - 0x90; if Z7_UNLIKELY((v & ((mask << 8) + 0x9f)) == 0)
SetUi32(p - 4, v); v -= 0x90000000; if Z7_UNLIKELY((v & 0x9f000000) == 0)
} {
} UInt32 z, c;
// v = rotrFixed(v, 8);
for (;;) v += flag; if Z7_UNLIKELY(v & mask) continue;
{ z = (v & 0xffffffe0) | (v >> 26);
for (;;) c = (BR_PC_GET >> (12 - 3)) & ~(UInt32)7;
{ BR_CONVERT_VAL(z, c)
if (p >= lim) v &= 0x1f;
return (SizeT)(p - data); v |= 0x90000000;
p += 4; v |= z << 26;
if (p[-1] == 0xEB) v |= 0x00ffffe0 & ((z & (((flag << 1) - 1))) - flag);
break; SetUi32a(p - 4, v)
} }
{
UInt32 v = GetUi32(p - 4);
v <<= 2;
v -= ip + (UInt32)(p - data);
v >>= 2;
v &= 0x00FFFFFF;
v |= 0xEB000000;
SetUi32(p - 4, v);
} }
} }
} }
Z7_BRANCH_FUNCS_IMP(ARM64)
SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) Z7_BRANCH_FUNC_MAIN(ARM)
{ {
Byte *p; // Byte *p = data;
const Byte *lim; const Byte *lim;
size &= ~(size_t)1; size &= ~(SizeT)3;
p = data; lim = p + size;
lim = data + size - 4; BR_PC_INIT
/* in ARM: branch offset is relative to the +2 instructions from current instruction.
if (encoding) (p) will point to next instruction */
pc += 8 - 4;
for (;;) for (;;)
{ {
UInt32 b1;
for (;;) for (;;)
{ {
UInt32 b3; if Z7_UNLIKELY(p >= lim) { return p; } p += 4; if Z7_UNLIKELY(p[-1] == 0xeb) break;
if (p > lim) if Z7_UNLIKELY(p >= lim) { return p; } p += 4; if Z7_UNLIKELY(p[-1] == 0xeb) break;
return (SizeT)(p - data);
b1 = p[1];
b3 = p[3];
p += 2;
b1 ^= 8;
if ((b3 & b1) >= 0xF8)
break;
} }
{ {
UInt32 v = UInt32 v = GetUi32a(p - 4);
((UInt32)b1 << 19) UInt32 c = BR_PC_GET >> 2;
+ (((UInt32)p[1] & 0x7) << 8) BR_CONVERT_VAL(v, c)
+ (((UInt32)p[-2] << 11)) v &= 0x00ffffff;
+ (p[0]); v |= 0xeb000000;
SetUi32a(p - 4, v)
p += 2;
{
UInt32 cur = (ip + (UInt32)(p - data)) >> 1;
v += cur;
}
p[-4] = (Byte)(v >> 11);
p[-3] = (Byte)(0xF0 | ((v >> 19) & 0x7));
p[-2] = (Byte)v;
p[-1] = (Byte)(0xF8 | (v >> 8));
}
}
for (;;)
{
UInt32 b1;
for (;;)
{
UInt32 b3;
if (p > lim)
return (SizeT)(p - data);
b1 = p[1];
b3 = p[3];
p += 2;
b1 ^= 8;
if ((b3 & b1) >= 0xF8)
break;
}
{
UInt32 v =
((UInt32)b1 << 19)
+ (((UInt32)p[1] & 0x7) << 8)
+ (((UInt32)p[-2] << 11))
+ (p[0]);
p += 2;
{
UInt32 cur = (ip + (UInt32)(p - data)) >> 1;
v -= cur;
}
/*
SetUi16(p - 4, (UInt16)(((v >> 11) & 0x7FF) | 0xF000));
SetUi16(p - 2, (UInt16)(v | 0xF800));
*/
p[-4] = (Byte)(v >> 11);
p[-3] = (Byte)(0xF0 | ((v >> 19) & 0x7));
p[-2] = (Byte)v;
p[-1] = (Byte)(0xF8 | (v >> 8));
} }
} }
} }
Z7_BRANCH_FUNCS_IMP(ARM)
SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) Z7_BRANCH_FUNC_MAIN(PPC)
{ {
Byte *p; // Byte *p = data;
const Byte *lim; const Byte *lim;
size &= ~(size_t)3; size &= ~(SizeT)3;
ip -= 4; lim = p + size;
p = data; BR_PC_INIT
lim = data + size; pc -= 4; // because (p) will point to next instruction
for (;;) for (;;)
{ {
UInt32 v;
for (;;) for (;;)
{ {
if (p >= lim) if Z7_UNLIKELY(p == lim)
return (SizeT)(p - data); return p;
// v = GetBe32a(p);
v = *(UInt32 *)(void *)p;
p += 4; p += 4;
/* if ((v & 0xFC000003) == 0x48000001) */ // if ((v & 0xfc000003) == 0x48000001) break;
if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1) // if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1) break;
break; if Z7_UNLIKELY(
((v - Z7_CONV_BE_TO_NATIVE_CONST32(0x48000001))
& Z7_CONV_BE_TO_NATIVE_CONST32(0xfc000003)) == 0) break;
} }
{ {
UInt32 v = GetBe32(p - 4); v = Z7_CONV_NATIVE_TO_BE_32(v);
if (encoding) {
v += ip + (UInt32)(p - data); UInt32 c = BR_PC_GET;
else BR_CONVERT_VAL(v, c)
v -= ip + (UInt32)(p - data); }
v &= 0x03FFFFFF; v &= 0x03ffffff;
v |= 0x48000000; v |= 0x48000000;
SetBe32(p - 4, v); SetBe32a(p - 4, v)
} }
} }
} }
Z7_BRANCH_FUNCS_IMP(PPC)
SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) #ifdef Z7_CPU_FAST_ROTATE_SUPPORTED
#define BR_SPARC_USE_ROTATE
#endif
Z7_BRANCH_FUNC_MAIN(SPARC)
{ {
Byte *p; // Byte *p = data;
const Byte *lim; const Byte *lim;
size &= ~(size_t)3; const UInt32 flag = (UInt32)1 << 22;
ip -= 4; size &= ~(SizeT)3;
p = data; lim = p + size;
lim = data + size; BR_PC_INIT
pc -= 4; // because (p) will point to next instruction
for (;;) for (;;)
{ {
UInt32 v;
for (;;) for (;;)
{ {
if (p >= lim) if Z7_UNLIKELY(p == lim)
return (SizeT)(p - data); return p;
/* /* // the code without GetBe32a():
v = GetBe32(p); { const UInt32 v = GetUi16a(p) & 0xc0ff; p += 4; if (v == 0x40 || v == 0xc07f) break; }
p += 4;
m = v + ((UInt32)5 << 29);
m ^= (UInt32)7 << 29;
m += (UInt32)1 << 22;
if ((m & ((UInt32)0x1FF << 23)) == 0)
break;
*/ */
v = GetBe32a(p);
p += 4; p += 4;
if ((p[-4] == 0x40 && (p[-3] & 0xC0) == 0) || #ifdef BR_SPARC_USE_ROTATE
(p[-4] == 0x7F && (p[-3] >= 0xC0))) v = rotlFixed(v, 2);
v += (flag << 2) - 1;
if Z7_UNLIKELY((v & (3 - (flag << 3))) == 0)
#else
v += (UInt32)5 << 29;
v ^= (UInt32)7 << 29;
v += flag;
if Z7_UNLIKELY((v & (0 - (flag << 1))) == 0)
#endif
break; break;
} }
{ {
UInt32 v = GetBe32(p - 4); // UInt32 v = GetBe32a(p - 4);
#ifndef BR_SPARC_USE_ROTATE
v <<= 2; v <<= 2;
if (encoding) #endif
v += ip + (UInt32)(p - data); {
else UInt32 c = BR_PC_GET;
v -= ip + (UInt32)(p - data); BR_CONVERT_VAL(v, c)
}
v &= 0x01FFFFFF; v &= (flag << 3) - 1;
v -= (UInt32)1 << 24; #ifdef BR_SPARC_USE_ROTATE
v ^= 0xFF000000; v -= (flag << 2) - 1;
v = rotrFixed(v, 2);
#else
v -= (flag << 2);
v >>= 2; v >>= 2;
v |= 0x40000000; v |= (UInt32)1 << 30;
SetBe32(p - 4, v); #endif
SetBe32a(p - 4, v)
} }
} }
} }
Z7_BRANCH_FUNCS_IMP(SPARC)
Z7_BRANCH_FUNC_MAIN(ARMT)
{
// Byte *p = data;
Byte *lim;
size &= ~(SizeT)1;
// if (size == 0) return p;
if (size <= 2) return p;
size -= 2;
lim = p + size;
BR_PC_INIT
/* in ARM: branch offset is relative to the +2 instructions from current instruction.
(p) will point to the +2 instructions from current instruction */
// pc += 4 - 4;
// if (encoding) pc -= 0xf800 << 1; else pc += 0xf800 << 1;
// #define ARMT_TAIL_PROC { goto armt_tail; }
#define ARMT_TAIL_PROC { return p; }
do
{
/* in MSVC 32-bit x86 compilers:
UInt32 version : it loads value from memory with movzx
Byte version : it loads value to 8-bit register (AL/CL)
movzx version is slightly faster in some cpus
*/
unsigned b1;
// Byte / unsigned
b1 = p[1];
// optimized version to reduce one (p >= lim) check:
// unsigned a1 = p[1]; b1 = p[3]; p += 2; if Z7_LIKELY((b1 & (a1 ^ 8)) < 0xf8)
for (;;)
{
unsigned b3; // Byte / UInt32
/* (Byte)(b3) normalization can use low byte computations in MSVC.
It gives smaller code, and no loss of speed in some compilers/cpus.
But new MSVC 32-bit x86 compilers use more slow load
from memory to low byte register in that case.
So we try to use full 32-bit computations for faster code.
*/
// if (p >= lim) { ARMT_TAIL_PROC } b3 = b1 + 8; b1 = p[3]; p += 2; if ((b3 & b1) >= 0xf8) break;
if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC } b3 = p[3]; p += 2; if Z7_UNLIKELY((b3 & (b1 ^ 8)) >= 0xf8) break;
if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC } b1 = p[3]; p += 2; if Z7_UNLIKELY((b1 & (b3 ^ 8)) >= 0xf8) break;
}
{
/* we can adjust pc for (0xf800) to rid of (& 0x7FF) operation.
But gcc/clang for arm64 can use bfi instruction for full code here */
UInt32 v =
((UInt32)GetUi16a(p - 2) << 11) |
((UInt32)GetUi16a(p) & 0x7FF);
/*
UInt32 v =
((UInt32)p[1 - 2] << 19)
+ (((UInt32)p[1] & 0x7) << 8)
+ (((UInt32)p[-2] << 11))
+ (p[0]);
*/
p += 2;
{
UInt32 c = BR_PC_GET >> 1;
BR_CONVERT_VAL(v, c)
}
SetUi16a(p - 4, (UInt16)(((v >> 11) & 0x7ff) | 0xf000))
SetUi16a(p - 2, (UInt16)(v | 0xf800))
/*
p[-4] = (Byte)(v >> 11);
p[-3] = (Byte)(0xf0 | ((v >> 19) & 0x7));
p[-2] = (Byte)v;
p[-1] = (Byte)(0xf8 | (v >> 8));
*/
}
}
while (p < lim);
return p;
// armt_tail:
// if ((Byte)((lim[1] & 0xf8)) != 0xf0) { lim += 2; } return lim;
// return (Byte *)(lim + ((Byte)((lim[1] ^ 0xf0) & 0xf8) == 0 ? 0 : 2));
// return (Byte *)(lim + (((lim[1] ^ ~0xfu) & ~7u) == 0 ? 0 : 2));
// return (Byte *)(lim + 2 - (((((unsigned)lim[1] ^ 8) + 8) >> 7) & 2));
}
Z7_BRANCH_FUNCS_IMP(ARMT)
// #define BR_IA64_NO_INLINE
Z7_BRANCH_FUNC_MAIN(IA64)
{
// Byte *p = data;
const Byte *lim;
size &= ~(SizeT)15;
lim = p + size;
pc -= 1 << 4;
pc >>= 4 - 1;
// pc -= 1 << 1;
for (;;)
{
unsigned m;
for (;;)
{
if Z7_UNLIKELY(p == lim)
return p;
m = (unsigned)((UInt32)0x334b0000 >> (*p & 0x1e));
p += 16;
pc += 1 << 1;
if (m &= 3)
break;
}
{
p += (ptrdiff_t)m * 5 - 20; // negative value is expected here.
do
{
const UInt32 t =
#if defined(MY_CPU_X86_OR_AMD64)
// we use 32-bit load here to reduce code size on x86:
GetUi32(p);
#else
GetUi16(p);
#endif
UInt32 z = GetUi32(p + 1) >> m;
p += 5;
if (((t >> m) & (0x70 << 1)) == 0
&& ((z - (0x5000000 << 1)) & (0xf000000 << 1)) == 0)
{
UInt32 v = (UInt32)((0x8fffff << 1) | 1) & z;
z ^= v;
#ifdef BR_IA64_NO_INLINE
v |= (v & ((UInt32)1 << (23 + 1))) >> 3;
{
UInt32 c = pc;
BR_CONVERT_VAL(v, c)
}
v &= (0x1fffff << 1) | 1;
#else
{
if (encoding)
{
// pc &= ~(0xc00000 << 1); // we just need to clear at least 2 bits
pc &= (0x1fffff << 1) | 1;
v += pc;
}
else
{
// pc |= 0xc00000 << 1; // we need to set at least 2 bits
pc |= ~(UInt32)((0x1fffff << 1) | 1);
v -= pc;
}
}
v &= ~(UInt32)(0x600000 << 1);
#endif
v += (0x700000 << 1);
v &= (0x8fffff << 1) | 1;
z |= v;
z <<= m;
SetUi32(p + 1 - 5, z)
}
m++;
}
while (m &= 3); // while (m < 4);
}
}
}
Z7_BRANCH_FUNCS_IMP(IA64)

117
3rdparty/7z/src/Bra.h vendored
View file

@ -1,64 +1,99 @@
/* Bra.h -- Branch converters for executables /* Bra.h -- Branch converters for executables
2013-01-18 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#ifndef __BRA_H #ifndef ZIP7_INC_BRA_H
#define __BRA_H #define ZIP7_INC_BRA_H
#include "7zTypes.h" #include "7zTypes.h"
EXTERN_C_BEGIN EXTERN_C_BEGIN
/* #define Z7_BRANCH_CONV_DEC(name) z7_BranchConv_ ## name ## _Dec
These functions convert relative addresses to absolute addresses #define Z7_BRANCH_CONV_ENC(name) z7_BranchConv_ ## name ## _Enc
in CALL instructions to increase the compression ratio. #define Z7_BRANCH_CONV_ST_DEC(name) z7_BranchConvSt_ ## name ## _Dec
#define Z7_BRANCH_CONV_ST_ENC(name) z7_BranchConvSt_ ## name ## _Enc
In:
data - data buffer
size - size of data
ip - current virtual Instruction Pinter (IP) value
state - state variable for x86 converter
encoding - 0 (for decoding), 1 (for encoding)
Out:
state - state variable for x86 converter
Returns: #define Z7_BRANCH_CONV_DECL(name) Byte * name(Byte *data, SizeT size, UInt32 pc)
The number of processed bytes. If you call these functions with multiple calls, #define Z7_BRANCH_CONV_ST_DECL(name) Byte * name(Byte *data, SizeT size, UInt32 pc, UInt32 *state)
you must start next call with first byte after block of processed bytes.
typedef Z7_BRANCH_CONV_DECL( (*z7_Func_BranchConv));
typedef Z7_BRANCH_CONV_ST_DECL((*z7_Func_BranchConvSt));
#define Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL 0
Z7_BRANCH_CONV_ST_DECL(Z7_BRANCH_CONV_ST_DEC(X86));
Z7_BRANCH_CONV_ST_DECL(Z7_BRANCH_CONV_ST_ENC(X86));
#define Z7_BRANCH_FUNCS_DECL(name) \
Z7_BRANCH_CONV_DECL(Z7_BRANCH_CONV_DEC(name)); \
Z7_BRANCH_CONV_DECL(Z7_BRANCH_CONV_ENC(name));
Z7_BRANCH_FUNCS_DECL(ARM64)
Z7_BRANCH_FUNCS_DECL(ARM)
Z7_BRANCH_FUNCS_DECL(ARMT)
Z7_BRANCH_FUNCS_DECL(PPC)
Z7_BRANCH_FUNCS_DECL(SPARC)
Z7_BRANCH_FUNCS_DECL(IA64)
/*
These functions convert data that contain CPU instructions.
Each such function converts relative addresses to absolute addresses in some
branch instructions: CALL (in all converters) and JUMP (X86 converter only).
Such conversion allows to increase compression ratio, if we compress that data.
There are 2 types of converters:
Byte * Conv_RISC (Byte *data, SizeT size, UInt32 pc);
Byte * ConvSt_X86(Byte *data, SizeT size, UInt32 pc, UInt32 *state);
Each Converter supports 2 versions: one for encoding
and one for decoding (_Enc/_Dec postfixes in function name).
In params:
data : data buffer
size : size of data
pc : current virtual Program Counter (Instruction Pinter) value
In/Out param:
state : pointer to state variable (for X86 converter only)
Return:
The pointer to position in (data) buffer after last byte that was processed.
If the caller calls converter again, it must call it starting with that position.
But the caller is allowed to move data in buffer. so pointer to
current processed position also will be changed for next call.
Also the caller must increase internal (pc) value for next call.
Each converter has some characteristics: Endian, Alignment, LookAhead.
Type Endian Alignment LookAhead Type Endian Alignment LookAhead
x86 little 1 4 X86 little 1 4
ARMT little 2 2 ARMT little 2 2
ARM little 4 0 ARM little 4 0
ARM64 little 4 0
PPC big 4 0 PPC big 4 0
SPARC big 4 0 SPARC big 4 0
IA64 little 16 0 IA64 little 16 0
size must be >= Alignment + LookAhead, if it's not last block. (data) must be aligned for (Alignment).
If (size < Alignment + LookAhead), converter returns 0. processed size can be calculated as:
SizeT processed = Conv(data, size, pc) - data;
if (processed == 0)
it means that converter needs more data for processing.
If (size < Alignment + LookAhead)
then (processed == 0) is allowed.
Example: Example code for conversion in loop:
UInt32 pc = 0;
UInt32 ip = 0; size = 0;
for () for (;;)
{ {
; size must be >= Alignment + LookAhead, if it's not last block size += Load_more_input_data(data + size);
SizeT processed = Convert(data, size, ip, 1); SizeT processed = Conv(data, size, pc) - data;
data += processed; if (processed == 0 && no_more_input_data_after_size)
size -= processed; break; // we stop convert loop
ip += processed; data += processed;
} size -= processed;
pc += processed;
}
*/ */
#define x86_Convert_Init(state) { state = 0; }
SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding);
SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
EXTERN_C_END EXTERN_C_END
#endif #endif

View file

@ -1,82 +1,187 @@
/* Bra86.c -- Converter for x86 code (BCJ) /* Bra86.c -- Branch converter for X86 code (BCJ)
2021-02-09 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
#include "Bra.h" #include "Bra.h"
#include "CpuArch.h"
#define Test86MSByte(b) ((((b) + 1) & 0xFE) == 0)
SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding) #if defined(MY_CPU_SIZEOF_POINTER) \
&& ( MY_CPU_SIZEOF_POINTER == 4 \
|| MY_CPU_SIZEOF_POINTER == 8)
#define BR_CONV_USE_OPT_PC_PTR
#endif
#ifdef BR_CONV_USE_OPT_PC_PTR
#define BR_PC_INIT pc -= (UInt32)(SizeT)p; // (MY_uintptr_t)
#define BR_PC_GET (pc + (UInt32)(SizeT)p)
#else
#define BR_PC_INIT pc += (UInt32)size;
#define BR_PC_GET (pc - (UInt32)(SizeT)(lim - p))
// #define BR_PC_INIT
// #define BR_PC_GET (pc + (UInt32)(SizeT)(p - data))
#endif
#define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c;
// #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c;
#define Z7_BRANCH_CONV_ST(name) z7_BranchConvSt_ ## name
#define BR86_NEED_CONV_FOR_MS_BYTE(b) ((((b) + 1) & 0xfe) == 0)
#ifdef MY_CPU_LE_UNALIGN
#define BR86_PREPARE_BCJ_SCAN const UInt32 v = GetUi32(p) ^ 0xe8e8e8e8;
#define BR86_IS_BCJ_BYTE(n) ((v & ((UInt32)0xfe << (n) * 8)) == 0)
#else
#define BR86_PREPARE_BCJ_SCAN
// bad for MSVC X86 (partial write to byte reg):
#define BR86_IS_BCJ_BYTE(n) ((p[n - 4] & 0xfe) == 0xe8)
// bad for old MSVC (partial write to byte reg):
// #define BR86_IS_BCJ_BYTE(n) (((*p ^ 0xe8) & 0xfe) == 0)
#endif
static
Z7_FORCE_INLINE
Z7_ATTRIB_NO_VECTOR
Byte *Z7_BRANCH_CONV_ST(X86)(Byte *p, SizeT size, UInt32 pc, UInt32 *state, int encoding)
{ {
SizeT pos = 0;
UInt32 mask = *state & 7;
if (size < 5) if (size < 5)
return 0; return p;
size -= 4; {
ip += 5; // Byte *p = data;
const Byte *lim = p + size - 4;
unsigned mask = (unsigned)*state; // & 7;
#ifdef BR_CONV_USE_OPT_PC_PTR
/* if BR_CONV_USE_OPT_PC_PTR is defined: we need to adjust (pc) for (+4),
because call/jump offset is relative to the next instruction.
if BR_CONV_USE_OPT_PC_PTR is not defined : we don't need to adjust (pc) for (+4),
because BR_PC_GET uses (pc - (lim - p)), and lim was adjusted for (-4) before.
*/
pc += 4;
#endif
BR_PC_INIT
goto start;
for (;;) for (;; mask |= 4)
{ {
Byte *p = data + pos; // cont: mask |= 4;
const Byte *limit = data + size; start:
for (; p < limit; p++) if (p >= lim)
if ((*p & 0xFE) == 0xE8) goto fin;
break;
{ {
SizeT d = (SizeT)(p - data) - pos; BR86_PREPARE_BCJ_SCAN
pos = (SizeT)(p - data); p += 4;
if (p >= limit) if (BR86_IS_BCJ_BYTE(0)) { goto m0; } mask >>= 1;
{ if (BR86_IS_BCJ_BYTE(1)) { goto m1; } mask >>= 1;
*state = (d > 2 ? 0 : mask >> (unsigned)d); if (BR86_IS_BCJ_BYTE(2)) { goto m2; } mask = 0;
return pos; if (BR86_IS_BCJ_BYTE(3)) { goto a3; }
}
if (d > 2)
mask = 0;
else
{
mask >>= (unsigned)d;
if (mask != 0 && (mask > 4 || mask == 3 || Test86MSByte(p[(size_t)(mask >> 1) + 1])))
{
mask = (mask >> 1) | 4;
pos++;
continue;
}
}
} }
goto main_loop;
if (Test86MSByte(p[4])) m0: p--;
m1: p--;
m2: p--;
if (mask == 0)
goto a3;
if (p > lim)
goto fin_p;
// if (((0x17u >> mask) & 1) == 0)
if (mask > 4 || mask == 3)
{ {
UInt32 v = ((UInt32)p[4] << 24) | ((UInt32)p[3] << 16) | ((UInt32)p[2] << 8) | ((UInt32)p[1]); mask >>= 1;
UInt32 cur = ip + (UInt32)pos; continue; // goto cont;
pos += 5; }
if (encoding) mask >>= 1;
v += cur; if (BR86_NEED_CONV_FOR_MS_BYTE(p[mask]))
else continue; // goto cont;
v -= cur; // if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont;
if (mask != 0) {
UInt32 v = GetUi32(p);
UInt32 c;
v += (1 << 24); if (v & 0xfe000000) continue; // goto cont;
c = BR_PC_GET;
BR_CONVERT_VAL(v, c)
{ {
unsigned sh = (mask & 6) << 2; mask <<= 3;
if (Test86MSByte((Byte)(v >> sh))) if (BR86_NEED_CONV_FOR_MS_BYTE(v >> mask))
{ {
v ^= (((UInt32)0x100 << sh) - 1); v ^= (((UInt32)0x100 << mask) - 1);
if (encoding) #ifdef MY_CPU_X86
v += cur; // for X86 : we can recalculate (c) to reduce register pressure
else c = BR_PC_GET;
v -= cur; #endif
BR_CONVERT_VAL(v, c)
} }
mask = 0; mask = 0;
} }
p[1] = (Byte)v; // v = (v & ((1 << 24) - 1)) - (v & (1 << 24));
p[2] = (Byte)(v >> 8); v &= (1 << 25) - 1; v -= (1 << 24);
p[3] = (Byte)(v >> 16); SetUi32(p, v)
p[4] = (Byte)(0 - ((v >> 24) & 1)); p += 4;
goto main_loop;
} }
else
main_loop:
if (p >= lim)
goto fin;
for (;;)
{ {
mask = (mask >> 1) | 4; BR86_PREPARE_BCJ_SCAN
pos++; p += 4;
if (BR86_IS_BCJ_BYTE(0)) { goto a0; }
if (BR86_IS_BCJ_BYTE(1)) { goto a1; }
if (BR86_IS_BCJ_BYTE(2)) { goto a2; }
if (BR86_IS_BCJ_BYTE(3)) { goto a3; }
if (p >= lim)
goto fin;
}
a0: p--;
a1: p--;
a2: p--;
a3:
if (p > lim)
goto fin_p;
// if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont;
{
UInt32 v = GetUi32(p);
UInt32 c;
v += (1 << 24); if (v & 0xfe000000) continue; // goto cont;
c = BR_PC_GET;
BR_CONVERT_VAL(v, c)
// v = (v & ((1 << 24) - 1)) - (v & (1 << 24));
v &= (1 << 25) - 1; v -= (1 << 24);
SetUi32(p, v)
p += 4;
goto main_loop;
} }
} }
fin_p:
p--;
fin:
// the following processing for tail is optional and can be commented
/*
lim += 4;
for (; p < lim; p++, mask >>= 1)
if ((*p & 0xfe) == 0xe8)
break;
*/
*state = (UInt32)mask;
return p;
}
} }
#define Z7_BRANCH_CONV_ST_FUNC_IMP(name, m, encoding) \
Z7_NO_INLINE \
Z7_ATTRIB_NO_VECTOR \
Byte *m(name)(Byte *data, SizeT size, UInt32 pc, UInt32 *state) \
{ return Z7_BRANCH_CONV_ST(name)(data, size, pc, state, encoding); }
Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_DEC, 0)
#ifndef Z7_EXTRACT_ONLY
Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_ENC, 1)
#endif

View file

@ -1,53 +1,14 @@
/* BraIA64.c -- Converter for IA-64 code /* BraIA64.c -- Converter for IA-64 code
2017-01-26 : Igor Pavlov : Public domain */ 2023-02-20 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
#include "CpuArch.h" // the code was moved to Bra.c
#include "Bra.h"
SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) #ifdef _MSC_VER
{ #pragma warning(disable : 4206) // nonstandard extension used : translation unit is empty
SizeT i; #endif
if (size < 16)
return 0; #if defined(__clang__)
size -= 16; #pragma GCC diagnostic ignored "-Wempty-translation-unit"
i = 0; #endif
do
{
unsigned m = ((UInt32)0x334B0000 >> (data[i] & 0x1E)) & 3;
if (m)
{
m++;
do
{
Byte *p = data + (i + (size_t)m * 5 - 8);
if (((p[3] >> m) & 15) == 5
&& (((p[-1] | ((UInt32)p[0] << 8)) >> m) & 0x70) == 0)
{
unsigned raw = GetUi32(p);
unsigned v = raw >> m;
v = (v & 0xFFFFF) | ((v & (1 << 23)) >> 3);
v <<= 4;
if (encoding)
v += ip + (UInt32)i;
else
v -= ip + (UInt32)i;
v >>= 4;
v &= 0x1FFFFF;
v += 0x700000;
v &= 0x8FFFFF;
raw &= ~((UInt32)0x8FFFFF << m);
raw |= (v << m);
SetUi32(p, raw);
}
}
while (++m <= 4);
}
i += 16;
}
while (i <= size);
return i;
}

516
3rdparty/7z/src/BwtSort.c vendored Normal file
View file

@ -0,0 +1,516 @@
/* BwtSort.c -- BWT block sorting
2023-04-02 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "BwtSort.h"
#include "Sort.h"
/* #define BLOCK_SORT_USE_HEAP_SORT */
/* Don't change it !!! */
#define kNumHashBytes 2
#define kNumHashValues (1 << (kNumHashBytes * 8))
/* kNumRefBitsMax must be < (kNumHashBytes * 8) = 16 */
#define kNumRefBitsMax 12
#define BS_TEMP_SIZE kNumHashValues
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
/* 32 Flags in UInt32 word */
#define kNumFlagsBits 5
#define kNumFlagsInWord (1 << kNumFlagsBits)
#define kFlagsMask (kNumFlagsInWord - 1)
#define kAllFlags 0xFFFFFFFF
#else
#define kNumBitsMax 20
#define kIndexMask ((1 << kNumBitsMax) - 1)
#define kNumExtraBits (32 - kNumBitsMax)
#define kNumExtra0Bits (kNumExtraBits - 2)
#define kNumExtra0Mask ((1 << kNumExtra0Bits) - 1)
#define SetFinishedGroupSize(p, size) \
{ *(p) |= ((((size) - 1) & kNumExtra0Mask) << kNumBitsMax); \
if ((size) > (1 << kNumExtra0Bits)) { \
*(p) |= 0x40000000; *((p) + 1) |= ((((size) - 1)>> kNumExtra0Bits) << kNumBitsMax); } } \
static void SetGroupSize(UInt32 *p, UInt32 size)
{
if (--size == 0)
return;
*p |= 0x80000000 | ((size & kNumExtra0Mask) << kNumBitsMax);
if (size >= (1 << kNumExtra0Bits))
{
*p |= 0x40000000;
p[1] |= ((size >> kNumExtra0Bits) << kNumBitsMax);
}
}
#endif
/*
SortGroup - is recursive Range-Sort function with HeapSort optimization for small blocks
"range" is not real range. It's only for optimization.
returns: 1 - if there are groups, 0 - no more groups
*/
static
UInt32
Z7_FASTCALL
SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 groupSize, int NumRefBits, UInt32 *Indices
#ifndef BLOCK_SORT_USE_HEAP_SORT
, UInt32 left, UInt32 range
#endif
)
{
UInt32 *ind2 = Indices + groupOffset;
UInt32 *Groups;
if (groupSize <= 1)
{
/*
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
SetFinishedGroupSize(ind2, 1)
#endif
*/
return 0;
}
Groups = Indices + BlockSize + BS_TEMP_SIZE;
if (groupSize <= ((UInt32)1 << NumRefBits)
#ifndef BLOCK_SORT_USE_HEAP_SORT
&& groupSize <= range
#endif
)
{
UInt32 *temp = Indices + BlockSize;
UInt32 j;
UInt32 mask, thereAreGroups, group, cg;
{
UInt32 gPrev;
UInt32 gRes = 0;
{
UInt32 sp = ind2[0] + NumSortedBytes;
if (sp >= BlockSize) sp -= BlockSize;
gPrev = Groups[sp];
temp[0] = (gPrev << NumRefBits);
}
for (j = 1; j < groupSize; j++)
{
UInt32 sp = ind2[j] + NumSortedBytes;
UInt32 g;
if (sp >= BlockSize) sp -= BlockSize;
g = Groups[sp];
temp[j] = (g << NumRefBits) | j;
gRes |= (gPrev ^ g);
}
if (gRes == 0)
{
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
SetGroupSize(ind2, groupSize);
#endif
return 1;
}
}
HeapSort(temp, groupSize);
mask = (((UInt32)1 << NumRefBits) - 1);
thereAreGroups = 0;
group = groupOffset;
cg = (temp[0] >> NumRefBits);
temp[0] = ind2[temp[0] & mask];
{
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
UInt32 *Flags = Groups + BlockSize;
#else
UInt32 prevGroupStart = 0;
#endif
for (j = 1; j < groupSize; j++)
{
UInt32 val = temp[j];
UInt32 cgCur = (val >> NumRefBits);
if (cgCur != cg)
{
cg = cgCur;
group = groupOffset + j;
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
{
UInt32 t = group - 1;
Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
}
#else
SetGroupSize(temp + prevGroupStart, j - prevGroupStart);
prevGroupStart = j;
#endif
}
else
thereAreGroups = 1;
{
UInt32 ind = ind2[val & mask];
temp[j] = ind;
Groups[ind] = group;
}
}
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
SetGroupSize(temp + prevGroupStart, j - prevGroupStart);
#endif
}
for (j = 0; j < groupSize; j++)
ind2[j] = temp[j];
return thereAreGroups;
}
/* Check that all strings are in one group (cannot sort) */
{
UInt32 group, j;
UInt32 sp = ind2[0] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
group = Groups[sp];
for (j = 1; j < groupSize; j++)
{
sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
if (Groups[sp] != group)
break;
}
if (j == groupSize)
{
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
SetGroupSize(ind2, groupSize);
#endif
return 1;
}
}
#ifndef BLOCK_SORT_USE_HEAP_SORT
{
/* ---------- Range Sort ---------- */
UInt32 i;
UInt32 mid;
for (;;)
{
UInt32 j;
if (range <= 1)
{
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
SetGroupSize(ind2, groupSize);
#endif
return 1;
}
mid = left + ((range + 1) >> 1);
j = groupSize;
i = 0;
do
{
UInt32 sp = ind2[i] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
if (Groups[sp] >= mid)
{
for (j--; j > i; j--)
{
sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
if (Groups[sp] < mid)
{
UInt32 temp = ind2[i]; ind2[i] = ind2[j]; ind2[j] = temp;
break;
}
}
if (i >= j)
break;
}
}
while (++i < j);
if (i == 0)
{
range = range - (mid - left);
left = mid;
}
else if (i == groupSize)
range = (mid - left);
else
break;
}
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
{
UInt32 t = (groupOffset + i - 1);
UInt32 *Flags = Groups + BlockSize;
Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
}
#endif
{
UInt32 j;
for (j = i; j < groupSize; j++)
Groups[ind2[j]] = groupOffset + i;
}
{
UInt32 res = SortGroup(BlockSize, NumSortedBytes, groupOffset, i, NumRefBits, Indices, left, mid - left);
return res | SortGroup(BlockSize, NumSortedBytes, groupOffset + i, groupSize - i, NumRefBits, Indices, mid, range - (mid - left));
}
}
#else
/* ---------- Heap Sort ---------- */
{
UInt32 j;
for (j = 0; j < groupSize; j++)
{
UInt32 sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
ind2[j] = sp;
}
HeapSortRef(ind2, Groups, groupSize);
/* Write Flags */
{
UInt32 sp = ind2[0];
UInt32 group = Groups[sp];
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
UInt32 *Flags = Groups + BlockSize;
#else
UInt32 prevGroupStart = 0;
#endif
for (j = 1; j < groupSize; j++)
{
sp = ind2[j];
if (Groups[sp] != group)
{
group = Groups[sp];
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
{
UInt32 t = groupOffset + j - 1;
Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
}
#else
SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart);
prevGroupStart = j;
#endif
}
}
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart);
#endif
}
{
/* Write new Groups values and Check that there are groups */
UInt32 thereAreGroups = 0;
for (j = 0; j < groupSize; j++)
{
UInt32 group = groupOffset + j;
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
UInt32 subGroupSize = ((ind2[j] & ~0xC0000000) >> kNumBitsMax);
if ((ind2[j] & 0x40000000) != 0)
subGroupSize += ((ind2[(size_t)j + 1] >> kNumBitsMax) << kNumExtra0Bits);
subGroupSize++;
for (;;)
{
UInt32 original = ind2[j];
UInt32 sp = original & kIndexMask;
if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes;
ind2[j] = sp | (original & ~kIndexMask);
Groups[sp] = group;
if (--subGroupSize == 0)
break;
j++;
thereAreGroups = 1;
}
#else
UInt32 *Flags = Groups + BlockSize;
for (;;)
{
UInt32 sp = ind2[j]; if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes;
ind2[j] = sp;
Groups[sp] = group;
if ((Flags[(groupOffset + j) >> kNumFlagsBits] & (1 << ((groupOffset + j) & kFlagsMask))) == 0)
break;
j++;
thereAreGroups = 1;
}
#endif
}
return thereAreGroups;
}
}
#endif
}
/* conditions: blockSize > 0 */
UInt32 BlockSort(UInt32 *Indices, const Byte *data, UInt32 blockSize)
{
UInt32 *counters = Indices + blockSize;
UInt32 i;
UInt32 *Groups;
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
UInt32 *Flags;
#endif
/* Radix-Sort for 2 bytes */
for (i = 0; i < kNumHashValues; i++)
counters[i] = 0;
for (i = 0; i < blockSize - 1; i++)
counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]]++;
counters[((UInt32)data[i] << 8) | data[0]]++;
Groups = counters + BS_TEMP_SIZE;
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
Flags = Groups + blockSize;
{
UInt32 numWords = (blockSize + kFlagsMask) >> kNumFlagsBits;
for (i = 0; i < numWords; i++)
Flags[i] = kAllFlags;
}
#endif
{
UInt32 sum = 0;
for (i = 0; i < kNumHashValues; i++)
{
UInt32 groupSize = counters[i];
if (groupSize > 0)
{
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
UInt32 t = sum + groupSize - 1;
Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
#endif
sum += groupSize;
}
counters[i] = sum - groupSize;
}
for (i = 0; i < blockSize - 1; i++)
Groups[i] = counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]];
Groups[i] = counters[((UInt32)data[i] << 8) | data[0]];
for (i = 0; i < blockSize - 1; i++)
Indices[counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]]++] = i;
Indices[counters[((UInt32)data[i] << 8) | data[0]]++] = i;
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
{
UInt32 prev = 0;
for (i = 0; i < kNumHashValues; i++)
{
UInt32 prevGroupSize = counters[i] - prev;
if (prevGroupSize == 0)
continue;
SetGroupSize(Indices + prev, prevGroupSize);
prev = counters[i];
}
}
#endif
}
{
int NumRefBits;
UInt32 NumSortedBytes;
for (NumRefBits = 0; ((blockSize - 1) >> NumRefBits) != 0; NumRefBits++);
NumRefBits = 32 - NumRefBits;
if (NumRefBits > kNumRefBitsMax)
NumRefBits = kNumRefBitsMax;
for (NumSortedBytes = kNumHashBytes; ; NumSortedBytes <<= 1)
{
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
UInt32 finishedGroupSize = 0;
#endif
UInt32 newLimit = 0;
for (i = 0; i < blockSize;)
{
UInt32 groupSize;
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
if ((Flags[i >> kNumFlagsBits] & (1 << (i & kFlagsMask))) == 0)
{
i++;
continue;
}
for (groupSize = 1;
(Flags[(i + groupSize) >> kNumFlagsBits] & (1 << ((i + groupSize) & kFlagsMask))) != 0;
groupSize++);
groupSize++;
#else
groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax);
{
BoolInt finishedGroup = ((Indices[i] & 0x80000000) == 0);
if ((Indices[i] & 0x40000000) != 0)
{
groupSize += ((Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits);
Indices[(size_t)i + 1] &= kIndexMask;
}
Indices[i] &= kIndexMask;
groupSize++;
if (finishedGroup || groupSize == 1)
{
Indices[i - finishedGroupSize] &= kIndexMask;
if (finishedGroupSize > 1)
Indices[(size_t)(i - finishedGroupSize) + 1] &= kIndexMask;
{
UInt32 newGroupSize = groupSize + finishedGroupSize;
SetFinishedGroupSize(Indices + i - finishedGroupSize, newGroupSize)
finishedGroupSize = newGroupSize;
}
i += groupSize;
continue;
}
finishedGroupSize = 0;
}
#endif
if (NumSortedBytes >= blockSize)
{
UInt32 j;
for (j = 0; j < groupSize; j++)
{
UInt32 t = (i + j);
/* Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); */
Groups[Indices[t]] = t;
}
}
else
if (SortGroup(blockSize, NumSortedBytes, i, groupSize, NumRefBits, Indices
#ifndef BLOCK_SORT_USE_HEAP_SORT
, 0, blockSize
#endif
) != 0)
newLimit = i + groupSize;
i += groupSize;
}
if (newLimit == 0)
break;
}
}
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
for (i = 0; i < blockSize;)
{
UInt32 groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax);
if ((Indices[i] & 0x40000000) != 0)
{
groupSize += ((Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits);
Indices[(size_t)i + 1] &= kIndexMask;
}
Indices[i] &= kIndexMask;
groupSize++;
i += groupSize;
}
#endif
return Groups[0];
}

26
3rdparty/7z/src/BwtSort.h vendored Normal file
View file

@ -0,0 +1,26 @@
/* BwtSort.h -- BWT block sorting
2023-03-03 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_BWT_SORT_H
#define ZIP7_INC_BWT_SORT_H
#include "7zTypes.h"
EXTERN_C_BEGIN
/* use BLOCK_SORT_EXTERNAL_FLAGS if blockSize can be > 1M */
/* #define BLOCK_SORT_EXTERNAL_FLAGS */
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
#define BLOCK_SORT_EXTERNAL_SIZE(blockSize) ((((blockSize) + 31) >> 5))
#else
#define BLOCK_SORT_EXTERNAL_SIZE(blockSize) 0
#endif
#define BLOCK_SORT_BUF_SIZE(blockSize) ((blockSize) * 2 + BLOCK_SORT_EXTERNAL_SIZE(blockSize) + (1 << 16))
UInt32 BlockSort(UInt32 *indices, const Byte *data, UInt32 blockSize);
EXTERN_C_END
#endif

View file

@ -1,12 +1,37 @@
/* Compiler.h /* Compiler.h : Compiler specific defines and pragmas
2021-01-05 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#ifndef __7Z_COMPILER_H #ifndef ZIP7_INC_COMPILER_H
#define __7Z_COMPILER_H #define ZIP7_INC_COMPILER_H
#if defined(__clang__)
# define Z7_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__)
#endif
#if defined(__clang__) && defined(__apple_build_version__)
# define Z7_APPLE_CLANG_VERSION Z7_CLANG_VERSION
#elif defined(__clang__)
# define Z7_LLVM_CLANG_VERSION Z7_CLANG_VERSION
#elif defined(__GNUC__)
# define Z7_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
#endif
#ifdef _MSC_VER
#if !defined(__clang__) && !defined(__GNUC__)
#define Z7_MSC_VER_ORIGINAL _MSC_VER
#endif
#endif
#if defined(__MINGW32__) || defined(__MINGW64__)
#define Z7_MINGW
#endif
// #pragma GCC diagnostic ignored "-Wunknown-pragmas"
#ifdef __clang__
// padding size of '' with 4 bytes to alignment boundary
#pragma GCC diagnostic ignored "-Wpadded"
#endif
#ifdef __clang__
#pragma clang diagnostic ignored "-Wunused-private-field"
#endif
#ifdef _MSC_VER #ifdef _MSC_VER
@ -17,24 +42,115 @@
#pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int #pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int
#endif #endif
#if _MSC_VER >= 1300 #if defined(_MSC_VER) && _MSC_VER >= 1800
#pragma warning(disable : 4996) // This function or variable may be unsafe #pragma warning(disable : 4464) // relative include path contains '..'
#else #endif
#pragma warning(disable : 4511) // copy constructor could not be generated
#pragma warning(disable : 4512) // assignment operator could not be generated
#pragma warning(disable : 4514) // unreferenced inline function has been removed
#pragma warning(disable : 4702) // unreachable code
#pragma warning(disable : 4710) // not inlined
#pragma warning(disable : 4714) // function marked as __forceinline not inlined
#pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information
#endif
#ifdef __clang__ // == 1200 : -O1 : for __forceinline
#pragma clang diagnostic ignored "-Wdeprecated-declarations" // >= 1900 : -O1 : for printf
#pragma clang diagnostic ignored "-Wmicrosoft-exception-spec" #pragma warning(disable : 4710) // function not inlined
// #pragma clang diagnostic ignored "-Wreserved-id-macro"
#endif
#if _MSC_VER < 1900
// winnt.h: 'Int64ShllMod32'
#pragma warning(disable : 4514) // unreferenced inline function has been removed
#endif
#if _MSC_VER < 1300
// #pragma warning(disable : 4702) // unreachable code
// Bra.c : -O1:
#pragma warning(disable : 4714) // function marked as __forceinline not inlined
#endif
/*
#if _MSC_VER > 1400 && _MSC_VER <= 1900
// strcat: This function or variable may be unsafe
// sysinfoapi.h: kit10: GetVersion was declared deprecated
#pragma warning(disable : 4996)
#endif
*/
#if _MSC_VER > 1200
// -Wall warnings
#pragma warning(disable : 4711) // function selected for automatic inline expansion
#pragma warning(disable : 4820) // '2' bytes padding added after data member
#if _MSC_VER >= 1400 && _MSC_VER < 1920
// 1400: string.h: _DBG_MEMCPY_INLINE_
// 1600 - 191x : smmintrin.h __cplusplus'
// is not defined as a preprocessor macro, replacing with '0' for '#if/#elif'
#pragma warning(disable : 4668)
// 1400 - 1600 : WinDef.h : 'FARPROC' :
// 1900 - 191x : immintrin.h: _readfsbase_u32
// no function prototype given : converting '()' to '(void)'
#pragma warning(disable : 4255)
#endif
#if _MSC_VER >= 1914
// Compiler will insert Spectre mitigation for memory load if /Qspectre switch specified
#pragma warning(disable : 5045)
#endif
#endif // _MSC_VER > 1200
#endif // _MSC_VER
#if defined(__clang__) && (__clang_major__ >= 4)
#define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \
_Pragma("clang loop unroll(disable)") \
_Pragma("clang loop vectorize(disable)")
#define Z7_ATTRIB_NO_VECTORIZE
#elif defined(__GNUC__) && (__GNUC__ >= 5)
#define Z7_ATTRIB_NO_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
// __attribute__((optimize("no-unroll-loops")));
#define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
#elif defined(_MSC_VER) && (_MSC_VER >= 1920)
#define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \
_Pragma("loop( no_vector )")
#define Z7_ATTRIB_NO_VECTORIZE
#else
#define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
#define Z7_ATTRIB_NO_VECTORIZE
#endif
#if defined(MY_CPU_X86_OR_AMD64) && ( \
defined(__clang__) && (__clang_major__ >= 4) \
|| defined(__GNUC__) && (__GNUC__ >= 5))
#define Z7_ATTRIB_NO_SSE __attribute__((__target__("no-sse")))
#else
#define Z7_ATTRIB_NO_SSE
#endif
#define Z7_ATTRIB_NO_VECTOR \
Z7_ATTRIB_NO_VECTORIZE \
Z7_ATTRIB_NO_SSE
#if defined(__clang__) && (__clang_major__ >= 8) \
|| defined(__GNUC__) && (__GNUC__ >= 1000) \
/* || defined(_MSC_VER) && (_MSC_VER >= 1920) */
// GCC is not good for __builtin_expect()
#define Z7_LIKELY(x) (__builtin_expect((x), 1))
#define Z7_UNLIKELY(x) (__builtin_expect((x), 0))
// #define Z7_unlikely [[unlikely]]
// #define Z7_likely [[likely]]
#else
#define Z7_LIKELY(x) (x)
#define Z7_UNLIKELY(x) (x)
// #define Z7_likely
#endif
#if (defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 36000))
#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \
_Pragma("GCC diagnostic push") \
_Pragma("GCC diagnostic ignored \"-Wreserved-macro-identifier\"")
#define Z7_DIAGNOSCTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER \
_Pragma("GCC diagnostic pop")
#else
#define Z7_DIAGNOSCTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
#define Z7_DIAGNOSCTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
#endif #endif
#define UNUSED_VAR(x) (void)x; #define UNUSED_VAR(x) (void)x;

View file

@ -1,187 +1,318 @@
/* CpuArch.c -- CPU specific code /* CpuArch.c -- CPU specific code
2021-07-13 : Igor Pavlov : Public domain */ 2023-05-18 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
// #include <stdio.h>
#include "CpuArch.h" #include "CpuArch.h"
#ifdef MY_CPU_X86_OR_AMD64 #ifdef MY_CPU_X86_OR_AMD64
#if (defined(_MSC_VER) && !defined(MY_CPU_AMD64)) || defined(__GNUC__) #undef NEED_CHECK_FOR_CPUID
#define USE_ASM #if !defined(MY_CPU_AMD64)
#define NEED_CHECK_FOR_CPUID
#endif #endif
#if !defined(USE_ASM) && _MSC_VER >= 1500 /*
#include <intrin.h> cpuid instruction supports (subFunction) parameter in ECX,
that is used only with some specific (function) parameter values.
But we always use only (subFunction==0).
*/
/*
__cpuid(): MSVC and GCC/CLANG use same function/macro name
but parameters are different.
We use MSVC __cpuid() parameters style for our z7_x86_cpuid() function.
*/
#if defined(__GNUC__) /* && (__GNUC__ >= 10) */ \
|| defined(__clang__) /* && (__clang_major__ >= 10) */
/* there was some CLANG/GCC compilers that have issues with
rbx(ebx) handling in asm blocks in -fPIC mode (__PIC__ is defined).
compiler's <cpuid.h> contains the macro __cpuid() that is similar to our code.
The history of __cpuid() changes in CLANG/GCC:
GCC:
2007: it preserved ebx for (__PIC__ && __i386__)
2013: it preserved rbx and ebx for __PIC__
2014: it doesn't preserves rbx and ebx anymore
we suppose that (__GNUC__ >= 5) fixed that __PIC__ ebx/rbx problem.
CLANG:
2014+: it preserves rbx, but only for 64-bit code. No __PIC__ check.
Why CLANG cares about 64-bit mode only, and doesn't care about ebx (in 32-bit)?
Do we need __PIC__ test for CLANG or we must care about rbx even if
__PIC__ is not defined?
*/
#define ASM_LN "\n"
#if defined(MY_CPU_AMD64) && defined(__PIC__) \
&& ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
#define x86_cpuid_MACRO(p, func) { \
__asm__ __volatile__ ( \
ASM_LN "mov %%rbx, %q1" \
ASM_LN "cpuid" \
ASM_LN "xchg %%rbx, %q1" \
: "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
/* "=&r" selects free register. It can select even rbx, if that register is free.
"=&D" for (RDI) also works, but the code can be larger with "=&D"
"2"(0) means (subFunction = 0),
2 is (zero-based) index in the output constraint list "=c" (ECX). */
#elif defined(MY_CPU_X86) && defined(__PIC__) \
&& ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
#define x86_cpuid_MACRO(p, func) { \
__asm__ __volatile__ ( \
ASM_LN "mov %%ebx, %k1" \
ASM_LN "cpuid" \
ASM_LN "xchg %%ebx, %k1" \
: "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
#else
#define x86_cpuid_MACRO(p, func) { \
__asm__ __volatile__ ( \
ASM_LN "cpuid" \
: "=a" ((p)[0]), "=b" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
#endif #endif
#if defined(USE_ASM) && !defined(MY_CPU_AMD64)
static UInt32 CheckFlag(UInt32 flag) void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
{ {
#ifdef _MSC_VER x86_cpuid_MACRO(p, func)
__asm pushfd;
__asm pop EAX;
__asm mov EDX, EAX;
__asm xor EAX, flag;
__asm push EAX;
__asm popfd;
__asm pushfd;
__asm pop EAX;
__asm xor EAX, EDX;
__asm push EDX;
__asm popfd;
__asm and flag, EAX;
#else
__asm__ __volatile__ (
"pushf\n\t"
"pop %%EAX\n\t"
"movl %%EAX,%%EDX\n\t"
"xorl %0,%%EAX\n\t"
"push %%EAX\n\t"
"popf\n\t"
"pushf\n\t"
"pop %%EAX\n\t"
"xorl %%EDX,%%EAX\n\t"
"push %%EDX\n\t"
"popf\n\t"
"andl %%EAX, %0\n\t":
"=c" (flag) : "c" (flag) :
"%eax", "%edx");
#endif
return flag;
} }
#define CHECK_CPUID_IS_SUPPORTED if (CheckFlag(1 << 18) == 0 || CheckFlag(1 << 21) == 0) return False;
Z7_NO_INLINE
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
{
#if defined(NEED_CHECK_FOR_CPUID)
#define EFALGS_CPUID_BIT 21
UInt32 a;
__asm__ __volatile__ (
ASM_LN "pushf"
ASM_LN "pushf"
ASM_LN "pop %0"
// ASM_LN "movl %0, %1"
// ASM_LN "xorl $0x200000, %0"
ASM_LN "btc %1, %0"
ASM_LN "push %0"
ASM_LN "popf"
ASM_LN "pushf"
ASM_LN "pop %0"
ASM_LN "xorl (%%esp), %0"
ASM_LN "popf"
ASM_LN
: "=&r" (a) // "=a"
: "i" (EFALGS_CPUID_BIT)
);
if ((a & (1 << EFALGS_CPUID_BIT)) == 0)
return 0;
#endif
{
UInt32 p[4];
x86_cpuid_MACRO(p, 0)
return p[0];
}
}
#undef ASM_LN
#elif !defined(_MSC_VER)
/*
// for gcc/clang and other: we can try to use __cpuid macro:
#include <cpuid.h>
void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
{
__cpuid(func, p[0], p[1], p[2], p[3]);
}
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
{
return (UInt32)__get_cpuid_max(0, NULL);
}
*/
// for unsupported cpuid:
void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
{
UNUSED_VAR(func)
p[0] = p[1] = p[2] = p[3] = 0;
}
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
{
return 0;
}
#else // _MSC_VER
#if !defined(MY_CPU_AMD64)
UInt32 __declspec(naked) Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
{
#if defined(NEED_CHECK_FOR_CPUID)
#define EFALGS_CPUID_BIT 21
__asm pushfd
__asm pushfd
/*
__asm pop eax
// __asm mov edx, eax
__asm btc eax, EFALGS_CPUID_BIT
__asm push eax
*/
__asm btc dword ptr [esp], EFALGS_CPUID_BIT
__asm popfd
__asm pushfd
__asm pop eax
// __asm xor eax, edx
__asm xor eax, [esp]
// __asm push edx
__asm popfd
__asm and eax, (1 shl EFALGS_CPUID_BIT)
__asm jz end_func
#endif
__asm push ebx
__asm xor eax, eax // func
__asm xor ecx, ecx // subFunction (optional) for (func == 0)
__asm cpuid
__asm pop ebx
#if defined(NEED_CHECK_FOR_CPUID)
end_func:
#endif
__asm ret 0
}
void __declspec(naked) Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
{
UNUSED_VAR(p)
UNUSED_VAR(func)
__asm push ebx
__asm push edi
__asm mov edi, ecx // p
__asm mov eax, edx // func
__asm xor ecx, ecx // subfunction (optional) for (func == 0)
__asm cpuid
__asm mov [edi ], eax
__asm mov [edi + 4], ebx
__asm mov [edi + 8], ecx
__asm mov [edi + 12], edx
__asm pop edi
__asm pop ebx
__asm ret 0
}
#else // MY_CPU_AMD64
#if _MSC_VER >= 1600
#include <intrin.h>
#define MY_cpuidex __cpuidex
#else
/*
__cpuid (func == (0 or 7)) requires subfunction number in ECX.
MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction.
__cpuid() in new MSVC clears ECX.
__cpuid() in old MSVC (14.00) x64 doesn't clear ECX
We still can use __cpuid for low (func) values that don't require ECX,
but __cpuid() in old MSVC will be incorrect for some func values: (func == 7).
So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction,
where ECX value is first parameter for FASTCALL / NO_INLINE func,
So the caller of MY_cpuidex_HACK() sets ECX as subFunction, and
old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value.
DON'T remove Z7_NO_INLINE and Z7_FASTCALL for MY_cpuidex_HACK(): !!!
*/
static
Z7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(UInt32 subFunction, UInt32 func, int *CPUInfo)
{
UNUSED_VAR(subFunction)
__cpuid(CPUInfo, func);
}
#define MY_cpuidex(info, func, func2) MY_cpuidex_HACK(func2, func, info)
#pragma message("======== MY_cpuidex_HACK WAS USED ========")
#endif // _MSC_VER >= 1600
#if !defined(MY_CPU_AMD64)
/* inlining for __cpuid() in MSVC x86 (32-bit) produces big ineffective code,
so we disable inlining here */
Z7_NO_INLINE
#endif
void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
{
MY_cpuidex((int *)p, (int)func, 0);
}
Z7_NO_INLINE
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
{
int a[4];
MY_cpuidex(a, 0, 0);
return a[0];
}
#endif // MY_CPU_AMD64
#endif // _MSC_VER
#if defined(NEED_CHECK_FOR_CPUID)
#define CHECK_CPUID_IS_SUPPORTED { if (z7_x86_cpuid_GetMaxFunc() == 0) return 0; }
#else #else
#define CHECK_CPUID_IS_SUPPORTED #define CHECK_CPUID_IS_SUPPORTED
#endif #endif
#undef NEED_CHECK_FOR_CPUID
#ifndef USE_ASM
#ifdef _MSC_VER
#if _MSC_VER >= 1600
#define MY__cpuidex __cpuidex
#else
/*
__cpuid (function == 4) requires subfunction number in ECX.
MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction.
__cpuid() in new MSVC clears ECX.
__cpuid() in old MSVC (14.00) doesn't clear ECX
We still can use __cpuid for low (function) values that don't require ECX,
but __cpuid() in old MSVC will be incorrect for some function values: (function == 4).
So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction,
where ECX value is first parameter for FAST_CALL / NO_INLINE function,
So the caller of MY__cpuidex_HACK() sets ECX as subFunction, and
old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value.
DON'T remove MY_NO_INLINE and MY_FAST_CALL for MY__cpuidex_HACK() !!!
*/
static static
MY_NO_INLINE BoolInt x86cpuid_Func_1(UInt32 *p)
void MY_FAST_CALL MY__cpuidex_HACK(UInt32 subFunction, int *CPUInfo, UInt32 function)
{
UNUSED_VAR(subFunction);
__cpuid(CPUInfo, function);
}
#define MY__cpuidex(info, func, func2) MY__cpuidex_HACK(func2, info, func)
#pragma message("======== MY__cpuidex_HACK WAS USED ========")
#endif
#else
#define MY__cpuidex(info, func, func2) __cpuid(info, func)
#pragma message("======== (INCORRECT ?) cpuid WAS USED ========")
#endif
#endif
void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d)
{
#ifdef USE_ASM
#ifdef _MSC_VER
UInt32 a2, b2, c2, d2;
__asm xor EBX, EBX;
__asm xor ECX, ECX;
__asm xor EDX, EDX;
__asm mov EAX, function;
__asm cpuid;
__asm mov a2, EAX;
__asm mov b2, EBX;
__asm mov c2, ECX;
__asm mov d2, EDX;
*a = a2;
*b = b2;
*c = c2;
*d = d2;
#else
__asm__ __volatile__ (
#if defined(MY_CPU_AMD64) && defined(__PIC__)
"mov %%rbx, %%rdi;"
"cpuid;"
"xchg %%rbx, %%rdi;"
: "=a" (*a) ,
"=D" (*b) ,
#elif defined(MY_CPU_X86) && defined(__PIC__)
"mov %%ebx, %%edi;"
"cpuid;"
"xchgl %%ebx, %%edi;"
: "=a" (*a) ,
"=D" (*b) ,
#else
"cpuid"
: "=a" (*a) ,
"=b" (*b) ,
#endif
"=c" (*c) ,
"=d" (*d)
: "0" (function), "c"(0) ) ;
#endif
#else
int CPUInfo[4];
MY__cpuidex(CPUInfo, (int)function, 0);
*a = (UInt32)CPUInfo[0];
*b = (UInt32)CPUInfo[1];
*c = (UInt32)CPUInfo[2];
*d = (UInt32)CPUInfo[3];
#endif
}
BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p)
{ {
CHECK_CPUID_IS_SUPPORTED CHECK_CPUID_IS_SUPPORTED
MyCPUID(0, &p->maxFunc, &p->vendor[0], &p->vendor[2], &p->vendor[1]); z7_x86_cpuid(p, 1);
MyCPUID(1, &p->ver, &p->b, &p->c, &p->d);
return True; return True;
} }
static const UInt32 kVendors[][3] = /*
static const UInt32 kVendors[][1] =
{ {
{ 0x756E6547, 0x49656E69, 0x6C65746E}, { 0x756E6547 }, // , 0x49656E69, 0x6C65746E },
{ 0x68747541, 0x69746E65, 0x444D4163}, { 0x68747541 }, // , 0x69746E65, 0x444D4163 },
{ 0x746E6543, 0x48727561, 0x736C7561} { 0x746E6543 } // , 0x48727561, 0x736C7561 }
}; };
*/
/*
typedef struct
{
UInt32 maxFunc;
UInt32 vendor[3];
UInt32 ver;
UInt32 b;
UInt32 c;
UInt32 d;
} Cx86cpuid;
enum
{
CPU_FIRM_INTEL,
CPU_FIRM_AMD,
CPU_FIRM_VIA
};
int x86cpuid_GetFirm(const Cx86cpuid *p);
#define x86cpuid_ver_GetFamily(ver) (((ver >> 16) & 0xff0) | ((ver >> 8) & 0xf))
#define x86cpuid_ver_GetModel(ver) (((ver >> 12) & 0xf0) | ((ver >> 4) & 0xf))
#define x86cpuid_ver_GetStepping(ver) (ver & 0xf)
int x86cpuid_GetFirm(const Cx86cpuid *p) int x86cpuid_GetFirm(const Cx86cpuid *p)
{ {
unsigned i; unsigned i;
for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[i]); i++) for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[0]); i++)
{ {
const UInt32 *v = kVendors[i]; const UInt32 *v = kVendors[i];
if (v[0] == p->vendor[0] && if (v[0] == p->vendor[0]
v[1] == p->vendor[1] && // && v[1] == p->vendor[1]
v[2] == p->vendor[2]) // && v[2] == p->vendor[2]
)
return (int)i; return (int)i;
} }
return -1; return -1;
@ -190,41 +321,55 @@ int x86cpuid_GetFirm(const Cx86cpuid *p)
BoolInt CPU_Is_InOrder() BoolInt CPU_Is_InOrder()
{ {
Cx86cpuid p; Cx86cpuid p;
int firm;
UInt32 family, model; UInt32 family, model;
if (!x86cpuid_CheckAndRead(&p)) if (!x86cpuid_CheckAndRead(&p))
return True; return True;
family = x86cpuid_GetFamily(p.ver); family = x86cpuid_ver_GetFamily(p.ver);
model = x86cpuid_GetModel(p.ver); model = x86cpuid_ver_GetModel(p.ver);
firm = x86cpuid_GetFirm(&p);
switch (firm) switch (x86cpuid_GetFirm(&p))
{ {
case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && ( case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && (
/* In-Order Atom CPU */ // In-Order Atom CPU
model == 0x1C /* 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330 */ model == 0x1C // 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330
|| model == 0x26 /* 45 nm, Z6xx */ || model == 0x26 // 45 nm, Z6xx
|| model == 0x27 /* 32 nm, Z2460 */ || model == 0x27 // 32 nm, Z2460
|| model == 0x35 /* 32 nm, Z2760 */ || model == 0x35 // 32 nm, Z2760
|| model == 0x36 /* 32 nm, N2xxx, D2xxx */ || model == 0x36 // 32 nm, N2xxx, D2xxx
))); )));
case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA))); case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA)));
case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF)); case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF));
} }
return True; return False; // v23 : unknown processors are not In-Order
} }
*/
#ifdef _WIN32
#include "7zWindows.h"
#endif
#if !defined(MY_CPU_AMD64) && defined(_WIN32) #if !defined(MY_CPU_AMD64) && defined(_WIN32)
#include <Windows.h>
static BoolInt CPU_Sys_Is_SSE_Supported() /* for legacy SSE ia32: there is no user-space cpu instruction to check
that OS supports SSE register storing/restoring on context switches.
So we need some OS-specific function to check that it's safe to use SSE registers.
*/
Z7_FORCE_INLINE
static BoolInt CPU_Sys_Is_SSE_Supported(void)
{ {
OSVERSIONINFO vi; #ifdef _MSC_VER
vi.dwOSVersionInfoSize = sizeof(vi); #pragma warning(push)
if (!GetVersionEx(&vi)) #pragma warning(disable : 4996) // `GetVersion': was declared deprecated
return False; #endif
return (vi.dwMajorVersion >= 5); /* low byte is major version of Windows
We suppose that any Windows version since
Windows2000 (major == 5) supports SSE registers */
return (Byte)GetVersion() >= 5;
#if defined(_MSC_VER)
#pragma warning(pop)
#endif
} }
#define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False; #define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False;
#else #else
@ -232,94 +377,300 @@ static BoolInt CPU_Sys_Is_SSE_Supported()
#endif #endif
static UInt32 X86_CPUID_ECX_Get_Flags() #if !defined(MY_CPU_AMD64)
BoolInt CPU_IsSupported_CMOV(void)
{ {
Cx86cpuid p; UInt32 a[4];
CHECK_SYS_SSE_SUPPORT if (!x86cpuid_Func_1(&a[0]))
if (!x86cpuid_CheckAndRead(&p))
return 0; return 0;
return p.c; return (a[3] >> 15) & 1;
} }
BoolInt CPU_IsSupported_AES() BoolInt CPU_IsSupported_SSE(void)
{ {
return (X86_CPUID_ECX_Get_Flags() >> 25) & 1; UInt32 a[4];
}
BoolInt CPU_IsSupported_SSSE3()
{
return (X86_CPUID_ECX_Get_Flags() >> 9) & 1;
}
BoolInt CPU_IsSupported_SSE41()
{
return (X86_CPUID_ECX_Get_Flags() >> 19) & 1;
}
BoolInt CPU_IsSupported_SHA()
{
Cx86cpuid p;
CHECK_SYS_SSE_SUPPORT CHECK_SYS_SSE_SUPPORT
if (!x86cpuid_CheckAndRead(&p)) if (!x86cpuid_Func_1(&a[0]))
return False; return 0;
return (a[3] >> 25) & 1;
}
if (p.maxFunc < 7) BoolInt CPU_IsSupported_SSE2(void)
{
UInt32 a[4];
CHECK_SYS_SSE_SUPPORT
if (!x86cpuid_Func_1(&a[0]))
return 0;
return (a[3] >> 26) & 1;
}
#endif
static UInt32 x86cpuid_Func_1_ECX(void)
{
UInt32 a[4];
CHECK_SYS_SSE_SUPPORT
if (!x86cpuid_Func_1(&a[0]))
return 0;
return a[2];
}
BoolInt CPU_IsSupported_AES(void)
{
return (x86cpuid_Func_1_ECX() >> 25) & 1;
}
BoolInt CPU_IsSupported_SSSE3(void)
{
return (x86cpuid_Func_1_ECX() >> 9) & 1;
}
BoolInt CPU_IsSupported_SSE41(void)
{
return (x86cpuid_Func_1_ECX() >> 19) & 1;
}
BoolInt CPU_IsSupported_SHA(void)
{
CHECK_SYS_SSE_SUPPORT
if (z7_x86_cpuid_GetMaxFunc() < 7)
return False; return False;
{ {
UInt32 d[4] = { 0 }; UInt32 d[4];
MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); z7_x86_cpuid(d, 7);
return (d[1] >> 29) & 1; return (d[1] >> 29) & 1;
} }
} }
// #include <stdio.h> /*
MSVC: _xgetbv() intrinsic is available since VS2010SP1.
MSVC also defines (_XCR_XFEATURE_ENABLED_MASK) macro in
<immintrin.h> that we can use or check.
For any 32-bit x86 we can use asm code in MSVC,
but MSVC asm code is huge after compilation.
So _xgetbv() is better
#ifdef _WIN32 ICC: _xgetbv() intrinsic is available (in what version of ICC?)
#include <Windows.h> ICC defines (__GNUC___) and it supports gnu assembler
also ICC supports MASM style code with -use-msasm switch.
but ICC doesn't support __attribute__((__target__))
GCC/CLANG 9:
_xgetbv() is macro that works via __builtin_ia32_xgetbv()
and we need __attribute__((__target__("xsave")).
But with __target__("xsave") the function will be not
inlined to function that has no __target__("xsave") attribute.
If we want _xgetbv() call inlining, then we should use asm version
instead of calling _xgetbv().
Note:intrinsic is broke before GCC 8.2:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85684
*/
#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1100) \
|| defined(_MSC_VER) && (_MSC_VER >= 1600) && (_MSC_FULL_VER >= 160040219) \
|| defined(__GNUC__) && (__GNUC__ >= 9) \
|| defined(__clang__) && (__clang_major__ >= 9)
// we define ATTRIB_XGETBV, if we want to use predefined _xgetbv() from compiler
#if defined(__INTEL_COMPILER)
#define ATTRIB_XGETBV
#elif defined(__GNUC__) || defined(__clang__)
// we don't define ATTRIB_XGETBV here, because asm version is better for inlining.
// #define ATTRIB_XGETBV __attribute__((__target__("xsave")))
#else
#define ATTRIB_XGETBV
#endif
#endif #endif
BoolInt CPU_IsSupported_AVX2() #if defined(ATTRIB_XGETBV)
{ #include <immintrin.h>
Cx86cpuid p; #endif
CHECK_SYS_SSE_SUPPORT
// XFEATURE_ENABLED_MASK/XCR0
#define MY_XCR_XFEATURE_ENABLED_MASK 0
#if defined(ATTRIB_XGETBV)
ATTRIB_XGETBV
#endif
static UInt64 x86_xgetbv_0(UInt32 num)
{
#if defined(ATTRIB_XGETBV)
{
return
#if (defined(_MSC_VER))
_xgetbv(num);
#else
__builtin_ia32_xgetbv(
#if !defined(__clang__)
(int)
#endif
num);
#endif
}
#elif defined(__GNUC__) || defined(__clang__) || defined(__SUNPRO_CC)
UInt32 a, d;
#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4))
__asm__
(
"xgetbv"
: "=a"(a), "=d"(d) : "c"(num) : "cc"
);
#else // is old gcc
__asm__
(
".byte 0x0f, 0x01, 0xd0" "\n\t"
: "=a"(a), "=d"(d) : "c"(num) : "cc"
);
#endif
return ((UInt64)d << 32) | a;
// return a;
#elif defined(_MSC_VER) && !defined(MY_CPU_AMD64)
UInt32 a, d;
__asm {
push eax
push edx
push ecx
mov ecx, num;
// xor ecx, ecx // = MY_XCR_XFEATURE_ENABLED_MASK
_emit 0x0f
_emit 0x01
_emit 0xd0
mov a, eax
mov d, edx
pop ecx
pop edx
pop eax
}
return ((UInt64)d << 32) | a;
// return a;
#else // it's unknown compiler
// #error "Need xgetbv function"
UNUSED_VAR(num)
// for MSVC-X64 we could call external function from external file.
/* Actually we had checked OSXSAVE/AVX in cpuid before.
So it's expected that OS supports at least AVX and below. */
// if (num != MY_XCR_XFEATURE_ENABLED_MASK) return 0; // if not XCR0
return
// (1 << 0) | // x87
(1 << 1) // SSE
| (1 << 2); // AVX
#endif
}
#ifdef _WIN32
/*
Windows versions do not know about new ISA extensions that
can be introduced. But we still can use new extensions,
even if Windows doesn't report about supporting them,
But we can use new extensions, only if Windows knows about new ISA extension
that changes the number or size of registers: SSE, AVX/XSAVE, AVX512
So it's enough to check
MY_PF_AVX_INSTRUCTIONS_AVAILABLE
instead of
MY_PF_AVX2_INSTRUCTIONS_AVAILABLE
*/
#define MY_PF_XSAVE_ENABLED 17
// #define MY_PF_SSSE3_INSTRUCTIONS_AVAILABLE 36
// #define MY_PF_SSE4_1_INSTRUCTIONS_AVAILABLE 37
// #define MY_PF_SSE4_2_INSTRUCTIONS_AVAILABLE 38
// #define MY_PF_AVX_INSTRUCTIONS_AVAILABLE 39
// #define MY_PF_AVX2_INSTRUCTIONS_AVAILABLE 40
// #define MY_PF_AVX512F_INSTRUCTIONS_AVAILABLE 41
#endif
BoolInt CPU_IsSupported_AVX(void)
{
#ifdef _WIN32 #ifdef _WIN32
#define MY__PF_XSAVE_ENABLED 17 if (!IsProcessorFeaturePresent(MY_PF_XSAVE_ENABLED))
if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED))
return False; return False;
/* PF_AVX_INSTRUCTIONS_AVAILABLE probably is supported starting from
some latest Win10 revisions. But we need AVX in older Windows also.
So we don't use the following check: */
/*
if (!IsProcessorFeaturePresent(MY_PF_AVX_INSTRUCTIONS_AVAILABLE))
return False;
*/
#endif #endif
if (!x86cpuid_CheckAndRead(&p)) /*
OS must use new special XSAVE/XRSTOR instructions to save
AVX registers when it required for context switching.
At OS statring:
OS sets CR4.OSXSAVE flag to signal the processor that OS supports the XSAVE extensions.
Also OS sets bitmask in XCR0 register that defines what
registers will be processed by XSAVE instruction:
XCR0.SSE[bit 0] - x87 registers and state
XCR0.SSE[bit 1] - SSE registers and state
XCR0.AVX[bit 2] - AVX registers and state
CR4.OSXSAVE is reflected to CPUID.1:ECX.OSXSAVE[bit 27].
So we can read that bit in user-space.
XCR0 is available for reading in user-space by new XGETBV instruction.
*/
{
const UInt32 c = x86cpuid_Func_1_ECX();
if (0 == (1
& (c >> 28) // AVX instructions are supported by hardware
& (c >> 27))) // OSXSAVE bit: XSAVE and related instructions are enabled by OS.
return False;
}
/* also we can check
CPUID.1:ECX.XSAVE [bit 26] : that shows that
XSAVE, XRESTOR, XSETBV, XGETBV instructions are supported by hardware.
But that check is redundant, because if OSXSAVE bit is set, then XSAVE is also set */
/* If OS have enabled XSAVE extension instructions (OSXSAVE == 1),
in most cases we expect that OS also will support storing/restoring
for AVX and SSE states at least.
But to be ensure for that we call user-space instruction
XGETBV(0) to get XCR0 value that contains bitmask that defines
what exact states(registers) OS have enabled for storing/restoring.
*/
{
const UInt32 bm = (UInt32)x86_xgetbv_0(MY_XCR_XFEATURE_ENABLED_MASK);
// printf("\n=== XGetBV=%d\n", bm);
return 1
& (bm >> 1) // SSE state is supported (set by OS) for storing/restoring
& (bm >> 2); // AVX state is supported (set by OS) for storing/restoring
}
// since Win7SP1: we can use GetEnabledXStateFeatures();
}
BoolInt CPU_IsSupported_AVX2(void)
{
if (!CPU_IsSupported_AVX())
return False; return False;
if (p.maxFunc < 7) if (z7_x86_cpuid_GetMaxFunc() < 7)
return False; return False;
{ {
UInt32 d[4] = { 0 }; UInt32 d[4];
MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); z7_x86_cpuid(d, 7);
// printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
return 1 return 1
& (d[1] >> 5); // avx2 & (d[1] >> 5); // avx2
} }
} }
BoolInt CPU_IsSupported_VAES_AVX2() BoolInt CPU_IsSupported_VAES_AVX2(void)
{ {
Cx86cpuid p; if (!CPU_IsSupported_AVX())
CHECK_SYS_SSE_SUPPORT
#ifdef _WIN32
#define MY__PF_XSAVE_ENABLED 17
if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED))
return False; return False;
#endif if (z7_x86_cpuid_GetMaxFunc() < 7)
if (!x86cpuid_CheckAndRead(&p))
return False;
if (p.maxFunc < 7)
return False; return False;
{ {
UInt32 d[4] = { 0 }; UInt32 d[4];
MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); z7_x86_cpuid(d, 7);
// printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
return 1 return 1
& (d[1] >> 5) // avx2 & (d[1] >> 5) // avx2
@ -328,20 +679,15 @@ BoolInt CPU_IsSupported_VAES_AVX2()
} }
} }
BoolInt CPU_IsSupported_PageGB() BoolInt CPU_IsSupported_PageGB(void)
{ {
Cx86cpuid cpuid; CHECK_CPUID_IS_SUPPORTED
if (!x86cpuid_CheckAndRead(&cpuid))
return False;
{ {
UInt32 d[4] = { 0 }; UInt32 d[4];
MyCPUID(0x80000000, &d[0], &d[1], &d[2], &d[3]); z7_x86_cpuid(d, 0x80000000);
if (d[0] < 0x80000001) if (d[0] < 0x80000001)
return False; return False;
} z7_x86_cpuid(d, 0x80000001);
{
UInt32 d[4] = { 0 };
MyCPUID(0x80000001, &d[0], &d[1], &d[2], &d[3]);
return (d[3] >> 26) & 1; return (d[3] >> 26) & 1;
} }
} }
@ -351,11 +697,11 @@ BoolInt CPU_IsSupported_PageGB()
#ifdef _WIN32 #ifdef _WIN32
#include <Windows.h> #include "7zWindows.h"
BoolInt CPU_IsSupported_CRC32() { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } BoolInt CPU_IsSupported_CRC32(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
BoolInt CPU_IsSupported_CRYPTO() { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } BoolInt CPU_IsSupported_CRYPTO(void) { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
BoolInt CPU_IsSupported_NEON() { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } BoolInt CPU_IsSupported_NEON(void) { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
#else #else
@ -378,28 +724,27 @@ static void Print_sysctlbyname(const char *name)
} }
} }
*/ */
/*
Print_sysctlbyname("hw.pagesize");
Print_sysctlbyname("machdep.cpu.brand_string");
*/
static BoolInt My_sysctlbyname_Get_BoolInt(const char *name) static BoolInt z7_sysctlbyname_Get_BoolInt(const char *name)
{ {
UInt32 val = 0; UInt32 val = 0;
if (My_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1) if (z7_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1)
return 1; return 1;
return 0; return 0;
} }
/*
Print_sysctlbyname("hw.pagesize");
Print_sysctlbyname("machdep.cpu.brand_string");
*/
BoolInt CPU_IsSupported_CRC32(void) BoolInt CPU_IsSupported_CRC32(void)
{ {
return My_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32"); return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32");
} }
BoolInt CPU_IsSupported_NEON(void) BoolInt CPU_IsSupported_NEON(void)
{ {
return My_sysctlbyname_Get_BoolInt("hw.optional.neon"); return z7_sysctlbyname_Get_BoolInt("hw.optional.neon");
} }
#ifdef MY_CPU_ARM64 #ifdef MY_CPU_ARM64
@ -461,15 +806,15 @@ MY_HWCAP_CHECK_FUNC (AES)
#include <sys/sysctl.h> #include <sys/sysctl.h>
int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize) int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize)
{ {
return sysctlbyname(name, buf, bufSize, NULL, 0); return sysctlbyname(name, buf, bufSize, NULL, 0);
} }
int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val) int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val)
{ {
size_t bufSize = sizeof(*val); size_t bufSize = sizeof(*val);
int res = My_sysctlbyname_Get(name, val, &bufSize); const int res = z7_sysctlbyname_Get(name, val, &bufSize);
if (res == 0 && bufSize != sizeof(*val)) if (res == 0 && bufSize != sizeof(*val))
return EFAULT; return EFAULT;
return res; return res;

View file

@ -1,8 +1,8 @@
/* CpuArch.h -- CPU specific code /* CpuArch.h -- CPU specific code
2022-07-15 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#ifndef __CPU_ARCH_H #ifndef ZIP7_INC_CPU_ARCH_H
#define __CPU_ARCH_H #define ZIP7_INC_CPU_ARCH_H
#include "7zTypes.h" #include "7zTypes.h"
@ -51,7 +51,13 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
|| defined(__AARCH64EB__) \ || defined(__AARCH64EB__) \
|| defined(__aarch64__) || defined(__aarch64__)
#define MY_CPU_ARM64 #define MY_CPU_ARM64
#define MY_CPU_NAME "arm64" #ifdef __ILP32__
#define MY_CPU_NAME "arm64-32"
#define MY_CPU_SIZEOF_POINTER 4
#else
#define MY_CPU_NAME "arm64"
#define MY_CPU_SIZEOF_POINTER 8
#endif
#define MY_CPU_64BIT #define MY_CPU_64BIT
#endif #endif
@ -68,8 +74,10 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#define MY_CPU_ARM #define MY_CPU_ARM
#if defined(__thumb__) || defined(__THUMBEL__) || defined(_M_ARMT) #if defined(__thumb__) || defined(__THUMBEL__) || defined(_M_ARMT)
#define MY_CPU_ARMT
#define MY_CPU_NAME "armt" #define MY_CPU_NAME "armt"
#else #else
#define MY_CPU_ARM32
#define MY_CPU_NAME "arm" #define MY_CPU_NAME "arm"
#endif #endif
/* #define MY_CPU_32BIT */ /* #define MY_CPU_32BIT */
@ -103,6 +111,8 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
|| defined(__PPC__) \ || defined(__PPC__) \
|| defined(_POWER) || defined(_POWER)
#define MY_CPU_PPC_OR_PPC64
#if defined(__ppc64__) \ #if defined(__ppc64__) \
|| defined(__powerpc64__) \ || defined(__powerpc64__) \
|| defined(_LP64) \ || defined(_LP64) \
@ -197,6 +207,9 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#error Stop_Compiling_Bad_Endian #error Stop_Compiling_Bad_Endian
#endif #endif
#if !defined(MY_CPU_LE) && !defined(MY_CPU_BE)
#error Stop_Compiling_CPU_ENDIAN_must_be_detected_at_compile_time
#endif
#if defined(MY_CPU_32BIT) && defined(MY_CPU_64BIT) #if defined(MY_CPU_32BIT) && defined(MY_CPU_64BIT)
#error Stop_Compiling_Bad_32_64_BIT #error Stop_Compiling_Bad_32_64_BIT
@ -253,6 +266,67 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#ifdef __has_builtin
#define Z7_has_builtin(x) __has_builtin(x)
#else
#define Z7_has_builtin(x) 0
#endif
#define Z7_BSWAP32_CONST(v) \
( (((UInt32)(v) << 24) ) \
| (((UInt32)(v) << 8) & (UInt32)0xff0000) \
| (((UInt32)(v) >> 8) & (UInt32)0xff00 ) \
| (((UInt32)(v) >> 24) ))
#if defined(_MSC_VER) && (_MSC_VER >= 1300)
#include <stdlib.h>
/* Note: these macros will use bswap instruction (486), that is unsupported in 386 cpu */
#pragma intrinsic(_byteswap_ushort)
#pragma intrinsic(_byteswap_ulong)
#pragma intrinsic(_byteswap_uint64)
#define Z7_BSWAP16(v) _byteswap_ushort(v)
#define Z7_BSWAP32(v) _byteswap_ulong (v)
#define Z7_BSWAP64(v) _byteswap_uint64(v)
#define Z7_CPU_FAST_BSWAP_SUPPORTED
#elif (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \
|| (defined(__clang__) && Z7_has_builtin(__builtin_bswap16))
#define Z7_BSWAP16(v) __builtin_bswap16(v)
#define Z7_BSWAP32(v) __builtin_bswap32(v)
#define Z7_BSWAP64(v) __builtin_bswap64(v)
#define Z7_CPU_FAST_BSWAP_SUPPORTED
#else
#define Z7_BSWAP16(v) ((UInt16) \
( ((UInt32)(v) << 8) \
| ((UInt32)(v) >> 8) \
))
#define Z7_BSWAP32(v) Z7_BSWAP32_CONST(v)
#define Z7_BSWAP64(v) \
( ( ( (UInt64)(v) ) << 8 * 7 ) \
| ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 1) ) << 8 * 5 ) \
| ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 2) ) << 8 * 3 ) \
| ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 3) ) << 8 * 1 ) \
| ( ( (UInt64)(v) >> 8 * 1 ) & ((UInt32)0xff << 8 * 3) ) \
| ( ( (UInt64)(v) >> 8 * 3 ) & ((UInt32)0xff << 8 * 2) ) \
| ( ( (UInt64)(v) >> 8 * 5 ) & ((UInt32)0xff << 8 * 1) ) \
| ( ( (UInt64)(v) >> 8 * 7 ) ) \
)
#endif
#ifdef MY_CPU_LE #ifdef MY_CPU_LE
#if defined(MY_CPU_X86_OR_AMD64) \ #if defined(MY_CPU_X86_OR_AMD64) \
|| defined(MY_CPU_ARM64) || defined(MY_CPU_ARM64)
@ -272,13 +346,11 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#define GetUi32(p) (*(const UInt32 *)(const void *)(p)) #define GetUi32(p) (*(const UInt32 *)(const void *)(p))
#ifdef MY_CPU_LE_UNALIGN_64 #ifdef MY_CPU_LE_UNALIGN_64
#define GetUi64(p) (*(const UInt64 *)(const void *)(p)) #define GetUi64(p) (*(const UInt64 *)(const void *)(p))
#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); }
#endif #endif
#define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); } #define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); }
#define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); } #define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); }
#ifdef MY_CPU_LE_UNALIGN_64
#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); }
#endif
#else #else
@ -305,51 +377,26 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#endif #endif
#ifndef MY_CPU_LE_UNALIGN_64 #ifndef GetUi64
#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32)) #define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32))
#endif
#ifndef SetUi64
#define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \ #define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \
SetUi32(_ppp2_ , (UInt32)_vvv2_); \ SetUi32(_ppp2_ , (UInt32)_vvv2_) \
SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)); } SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)) }
#endif #endif
#if defined(MY_CPU_LE_UNALIGN) && defined(Z7_CPU_FAST_BSWAP_SUPPORTED)
#define GetBe32(p) Z7_BSWAP32 (*(const UInt32 *)(const void *)(p))
#define SetBe32(p, v) { (*(UInt32 *)(void *)(p)) = Z7_BSWAP32(v); }
#ifdef __has_builtin #if defined(MY_CPU_LE_UNALIGN_64)
#define MY__has_builtin(x) __has_builtin(x) #define GetBe64(p) Z7_BSWAP64 (*(const UInt64 *)(const void *)(p))
#else
#define MY__has_builtin(x) 0
#endif #endif
#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ defined(_MSC_VER) && (_MSC_VER >= 1300)
/* Note: we use bswap instruction, that is unsupported in 386 cpu */
#include <stdlib.h>
#pragma intrinsic(_byteswap_ushort)
#pragma intrinsic(_byteswap_ulong)
#pragma intrinsic(_byteswap_uint64)
/* #define GetBe16(p) _byteswap_ushort(*(const UInt16 *)(const Byte *)(p)) */
#define GetBe32(p) _byteswap_ulong (*(const UInt32 *)(const void *)(p))
#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const void *)(p))
#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = _byteswap_ulong(v)
#elif defined(MY_CPU_LE_UNALIGN) && ( \
(defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \
|| (defined(__clang__) && MY__has_builtin(__builtin_bswap16)) )
/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const void *)(p)) */
#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const void *)(p))
#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const void *)(p))
#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = __builtin_bswap32(v)
#else #else
#define GetBe32(p) ( \ #define GetBe32(p) ( \
@ -358,8 +405,6 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
((UInt32)((const Byte *)(p))[2] << 8) | \ ((UInt32)((const Byte *)(p))[2] << 8) | \
((const Byte *)(p))[3] ) ((const Byte *)(p))[3] )
#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4))
#define SetBe32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \ #define SetBe32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
_ppp_[0] = (Byte)(_vvv_ >> 24); \ _ppp_[0] = (Byte)(_vvv_ >> 24); \
_ppp_[1] = (Byte)(_vvv_ >> 16); \ _ppp_[1] = (Byte)(_vvv_ >> 16); \
@ -368,50 +413,83 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#endif #endif
#ifndef GetBe64
#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4))
#endif
#ifndef GetBe16 #ifndef GetBe16
#define GetBe16(p) ( (UInt16) ( \ #define GetBe16(p) ( (UInt16) ( \
((UInt16)((const Byte *)(p))[0] << 8) | \ ((UInt16)((const Byte *)(p))[0] << 8) | \
((const Byte *)(p))[1] )) ((const Byte *)(p))[1] ))
#endif #endif
#if defined(MY_CPU_BE)
#define Z7_CONV_BE_TO_NATIVE_CONST32(v) (v)
#define Z7_CONV_LE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v)
#define Z7_CONV_NATIVE_TO_BE_32(v) (v)
#elif defined(MY_CPU_LE)
#define Z7_CONV_BE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v)
#define Z7_CONV_LE_TO_NATIVE_CONST32(v) (v)
#define Z7_CONV_NATIVE_TO_BE_32(v) Z7_BSWAP32(v)
#else
#error Stop_Compiling_Unknown_Endian_CONV
#endif
#if defined(MY_CPU_BE)
#define GetBe32a(p) (*(const UInt32 *)(const void *)(p))
#define GetBe16a(p) (*(const UInt16 *)(const void *)(p))
#define SetBe32a(p, v) { *(UInt32 *)(void *)(p) = (v); }
#define SetBe16a(p, v) { *(UInt16 *)(void *)(p) = (v); }
#define GetUi32a(p) GetUi32(p)
#define GetUi16a(p) GetUi16(p)
#define SetUi32a(p, v) SetUi32(p, v)
#define SetUi16a(p, v) SetUi16(p, v)
#elif defined(MY_CPU_LE)
#define GetUi32a(p) (*(const UInt32 *)(const void *)(p))
#define GetUi16a(p) (*(const UInt16 *)(const void *)(p))
#define SetUi32a(p, v) { *(UInt32 *)(void *)(p) = (v); }
#define SetUi16a(p, v) { *(UInt16 *)(void *)(p) = (v); }
#define GetBe32a(p) GetBe32(p)
#define GetBe16a(p) GetBe16(p)
#define SetBe32a(p, v) SetBe32(p, v)
#define SetBe16a(p, v) SetBe16(p, v)
#else
#error Stop_Compiling_Unknown_Endian_CPU_a
#endif
#if defined(MY_CPU_X86_OR_AMD64) \
|| defined(MY_CPU_ARM_OR_ARM64) \
|| defined(MY_CPU_PPC_OR_PPC64)
#define Z7_CPU_FAST_ROTATE_SUPPORTED
#endif
#ifdef MY_CPU_X86_OR_AMD64 #ifdef MY_CPU_X86_OR_AMD64
typedef struct void Z7_FASTCALL z7_x86_cpuid(UInt32 a[4], UInt32 function);
{ UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void);
UInt32 maxFunc; #if defined(MY_CPU_AMD64)
UInt32 vendor[3]; #define Z7_IF_X86_CPUID_SUPPORTED
UInt32 ver; #else
UInt32 b; #define Z7_IF_X86_CPUID_SUPPORTED if (z7_x86_cpuid_GetMaxFunc())
UInt32 c; #endif
UInt32 d;
} Cx86cpuid;
enum
{
CPU_FIRM_INTEL,
CPU_FIRM_AMD,
CPU_FIRM_VIA
};
void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d);
BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p);
int x86cpuid_GetFirm(const Cx86cpuid *p);
#define x86cpuid_GetFamily(ver) (((ver >> 16) & 0xFF0) | ((ver >> 8) & 0xF))
#define x86cpuid_GetModel(ver) (((ver >> 12) & 0xF0) | ((ver >> 4) & 0xF))
#define x86cpuid_GetStepping(ver) (ver & 0xF)
BoolInt CPU_Is_InOrder(void);
BoolInt CPU_IsSupported_AES(void); BoolInt CPU_IsSupported_AES(void);
BoolInt CPU_IsSupported_AVX(void);
BoolInt CPU_IsSupported_AVX2(void); BoolInt CPU_IsSupported_AVX2(void);
BoolInt CPU_IsSupported_VAES_AVX2(void); BoolInt CPU_IsSupported_VAES_AVX2(void);
BoolInt CPU_IsSupported_CMOV(void);
BoolInt CPU_IsSupported_SSE(void);
BoolInt CPU_IsSupported_SSE2(void);
BoolInt CPU_IsSupported_SSSE3(void); BoolInt CPU_IsSupported_SSSE3(void);
BoolInt CPU_IsSupported_SSE41(void); BoolInt CPU_IsSupported_SSE41(void);
BoolInt CPU_IsSupported_SHA(void); BoolInt CPU_IsSupported_SHA(void);
@ -436,8 +514,8 @@ BoolInt CPU_IsSupported_AES(void);
#endif #endif
#if defined(__APPLE__) #if defined(__APPLE__)
int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize); int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize);
int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val); int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val);
#endif #endif
EXTERN_C_END EXTERN_C_END

View file

@ -1,8 +1,8 @@
/* Delta.h -- Delta converter /* Delta.h -- Delta converter
2013-01-18 : Igor Pavlov : Public domain */ 2023-03-03 : Igor Pavlov : Public domain */
#ifndef __DELTA_H #ifndef ZIP7_INC_DELTA_H
#define __DELTA_H #define ZIP7_INC_DELTA_H
#include "7zTypes.h" #include "7zTypes.h"

View file

@ -1,18 +1,28 @@
/* DllSecur.c -- DLL loading security /* DllSecur.c -- DLL loading security
2022-07-15 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
#ifdef _WIN32 #ifdef _WIN32
#include <Windows.h> #include "7zWindows.h"
#include "DllSecur.h" #include "DllSecur.h"
#ifndef UNDER_CE #ifndef UNDER_CE
#if defined(__GNUC__) && (__GNUC__ >= 8) #if (defined(__GNUC__) && (__GNUC__ >= 8)) || defined(__clang__)
#pragma GCC diagnostic ignored "-Wcast-function-type" // #pragma GCC diagnostic ignored "-Wcast-function-type"
#endif
#if defined(__clang__) || defined(__GNUC__)
typedef void (*Z7_voidFunction)(void);
#define MY_CAST_FUNC (Z7_voidFunction)
#elif defined(_MSC_VER) && _MSC_VER > 1920
#define MY_CAST_FUNC (void *)
// #pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'void (__cdecl *)()'
#else
#define MY_CAST_FUNC
#endif #endif
typedef BOOL (WINAPI *Func_SetDefaultDllDirectories)(DWORD DirectoryFlags); typedef BOOL (WINAPI *Func_SetDefaultDllDirectories)(DWORD DirectoryFlags);
@ -20,95 +30,82 @@ typedef BOOL (WINAPI *Func_SetDefaultDllDirectories)(DWORD DirectoryFlags);
#define MY_LOAD_LIBRARY_SEARCH_USER_DIRS 0x400 #define MY_LOAD_LIBRARY_SEARCH_USER_DIRS 0x400
#define MY_LOAD_LIBRARY_SEARCH_SYSTEM32 0x800 #define MY_LOAD_LIBRARY_SEARCH_SYSTEM32 0x800
#define DELIM "\0"
static const char * const g_Dlls = static const char * const g_Dlls =
"userenv"
DELIM "setupapi"
DELIM "apphelp"
DELIM "propsys"
DELIM "dwmapi"
DELIM "cryptbase"
DELIM "oleacc"
DELIM "clbcatq"
DELIM "version"
#ifndef _CONSOLE #ifndef _CONSOLE
"UXTHEME\0" DELIM "uxtheme"
#endif #endif
"USERENV\0" DELIM;
"SETUPAPI\0"
"APPHELP\0"
"PROPSYS\0"
"DWMAPI\0"
"CRYPTBASE\0"
"OLEACC\0"
"CLBCATQ\0"
"VERSION\0"
;
#endif #endif
// #define MY_CAST_FUNC (void(*)()) #ifdef __clang__
#define MY_CAST_FUNC #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#endif
#if defined (_MSC_VER) && _MSC_VER >= 1900
// sysinfoapi.h: kit10: GetVersion was declared deprecated
#pragma warning(disable : 4996)
#endif
void My_SetDefaultDllDirectories() #define IF_NON_VISTA_SET_DLL_DIRS_AND_RETURN \
if ((UInt16)GetVersion() != 6) { \
const \
Func_SetDefaultDllDirectories setDllDirs = \
(Func_SetDefaultDllDirectories) MY_CAST_FUNC GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), \
"SetDefaultDllDirectories"); \
if (setDllDirs) if (setDllDirs(MY_LOAD_LIBRARY_SEARCH_SYSTEM32 | MY_LOAD_LIBRARY_SEARCH_USER_DIRS)) return; }
void My_SetDefaultDllDirectories(void)
{ {
#ifndef UNDER_CE #ifndef UNDER_CE
IF_NON_VISTA_SET_DLL_DIRS_AND_RETURN
OSVERSIONINFO vi;
vi.dwOSVersionInfoSize = sizeof(vi);
if (!GetVersionEx(&vi) || vi.dwMajorVersion != 6 || vi.dwMinorVersion != 0)
{
Func_SetDefaultDllDirectories setDllDirs = (Func_SetDefaultDllDirectories)
MY_CAST_FUNC GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "SetDefaultDllDirectories");
if (setDllDirs)
if (setDllDirs(MY_LOAD_LIBRARY_SEARCH_SYSTEM32 | MY_LOAD_LIBRARY_SEARCH_USER_DIRS))
return;
}
#endif #endif
} }
void LoadSecurityDlls() void LoadSecurityDlls(void)
{ {
#ifndef UNDER_CE #ifndef UNDER_CE
// at Vista (ver 6.0) : CoCreateInstance(CLSID_ShellLink, ...) doesn't work after SetDefaultDllDirectories() : Check it ???
wchar_t buf[MAX_PATH + 100]; IF_NON_VISTA_SET_DLL_DIRS_AND_RETURN
{
// at Vista (ver 6.0) : CoCreateInstance(CLSID_ShellLink, ...) doesn't work after SetDefaultDllDirectories() : Check it ???
OSVERSIONINFO vi;
vi.dwOSVersionInfoSize = sizeof(vi);
if (!GetVersionEx(&vi) || vi.dwMajorVersion != 6 || vi.dwMinorVersion != 0)
{
Func_SetDefaultDllDirectories setDllDirs = (Func_SetDefaultDllDirectories)
MY_CAST_FUNC GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "SetDefaultDllDirectories");
if (setDllDirs)
if (setDllDirs(MY_LOAD_LIBRARY_SEARCH_SYSTEM32 | MY_LOAD_LIBRARY_SEARCH_USER_DIRS))
return;
}
}
{
unsigned len = GetSystemDirectoryW(buf, MAX_PATH + 2);
if (len == 0 || len > MAX_PATH)
return;
}
{ {
wchar_t buf[MAX_PATH + 100];
const char *dll; const char *dll;
unsigned pos = (unsigned)lstrlenW(buf); unsigned pos = GetSystemDirectoryW(buf, MAX_PATH + 2);
if (pos == 0 || pos > MAX_PATH)
return;
if (buf[pos - 1] != '\\') if (buf[pos - 1] != '\\')
buf[pos++] = '\\'; buf[pos++] = '\\';
for (dll = g_Dlls; *dll != 0;)
for (dll = g_Dlls; dll[0] != 0;)
{ {
unsigned k = 0; wchar_t *dest = &buf[pos];
for (;;) for (;;)
{ {
char c = *dll++; const char c = *dll++;
buf[pos + k] = (Byte)c;
k++;
if (c == 0) if (c == 0)
break; break;
*dest++ = (Byte)c;
} }
dest[0] = '.';
lstrcatW(buf, L".dll"); dest[1] = 'd';
dest[2] = 'l';
dest[3] = 'l';
dest[4] = 0;
// lstrcatW(buf, L".dll");
LoadLibraryExW(buf, NULL, LOAD_WITH_ALTERED_SEARCH_PATH); LoadLibraryExW(buf, NULL, LOAD_WITH_ALTERED_SEARCH_PATH);
} }
} }
#endif #endif
} }
#endif #endif // _WIN32

View file

@ -1,8 +1,8 @@
/* DllSecur.h -- DLL loading for security /* DllSecur.h -- DLL loading for security
2018-02-19 : Igor Pavlov : Public domain */ 2023-03-03 : Igor Pavlov : Public domain */
#ifndef __DLL_SECUR_H #ifndef ZIP7_INC_DLL_SECUR_H
#define __DLL_SECUR_H #define ZIP7_INC_DLL_SECUR_H
#include "7zTypes.h" #include "7zTypes.h"

154
3rdparty/7z/src/HuffEnc.c vendored Normal file
View file

@ -0,0 +1,154 @@
/* HuffEnc.c -- functions for Huffman encoding
2023-03-04 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "HuffEnc.h"
#include "Sort.h"
#define kMaxLen 16
#define NUM_BITS 10
#define MASK (((unsigned)1 << NUM_BITS) - 1)
#define NUM_COUNTERS 64
#define HUFFMAN_SPEED_OPT
void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, UInt32 numSymbols, UInt32 maxLen)
{
UInt32 num = 0;
/* if (maxLen > 10) maxLen = 10; */
{
UInt32 i;
#ifdef HUFFMAN_SPEED_OPT
UInt32 counters[NUM_COUNTERS];
for (i = 0; i < NUM_COUNTERS; i++)
counters[i] = 0;
for (i = 0; i < numSymbols; i++)
{
UInt32 freq = freqs[i];
counters[(freq < NUM_COUNTERS - 1) ? freq : NUM_COUNTERS - 1]++;
}
for (i = 1; i < NUM_COUNTERS; i++)
{
UInt32 temp = counters[i];
counters[i] = num;
num += temp;
}
for (i = 0; i < numSymbols; i++)
{
UInt32 freq = freqs[i];
if (freq == 0)
lens[i] = 0;
else
p[counters[((freq < NUM_COUNTERS - 1) ? freq : NUM_COUNTERS - 1)]++] = i | (freq << NUM_BITS);
}
counters[0] = 0;
HeapSort(p + counters[NUM_COUNTERS - 2], counters[NUM_COUNTERS - 1] - counters[NUM_COUNTERS - 2]);
#else
for (i = 0; i < numSymbols; i++)
{
UInt32 freq = freqs[i];
if (freq == 0)
lens[i] = 0;
else
p[num++] = i | (freq << NUM_BITS);
}
HeapSort(p, num);
#endif
}
if (num < 2)
{
unsigned minCode = 0;
unsigned maxCode = 1;
if (num == 1)
{
maxCode = (unsigned)p[0] & MASK;
if (maxCode == 0)
maxCode++;
}
p[minCode] = 0;
p[maxCode] = 1;
lens[minCode] = lens[maxCode] = 1;
return;
}
{
UInt32 b, e, i;
i = b = e = 0;
do
{
UInt32 n, m, freq;
n = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++;
freq = (p[n] & ~MASK);
p[n] = (p[n] & MASK) | (e << NUM_BITS);
m = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++;
freq += (p[m] & ~MASK);
p[m] = (p[m] & MASK) | (e << NUM_BITS);
p[e] = (p[e] & MASK) | freq;
e++;
}
while (num - e > 1);
{
UInt32 lenCounters[kMaxLen + 1];
for (i = 0; i <= kMaxLen; i++)
lenCounters[i] = 0;
p[--e] &= MASK;
lenCounters[1] = 2;
while (e != 0)
{
UInt32 len = (p[p[--e] >> NUM_BITS] >> NUM_BITS) + 1;
p[e] = (p[e] & MASK) | (len << NUM_BITS);
if (len >= maxLen)
for (len = maxLen - 1; lenCounters[len] == 0; len--);
lenCounters[len]--;
lenCounters[(size_t)len + 1] += 2;
}
{
UInt32 len;
i = 0;
for (len = maxLen; len != 0; len--)
{
UInt32 k;
for (k = lenCounters[len]; k != 0; k--)
lens[p[i++] & MASK] = (Byte)len;
}
}
{
UInt32 nextCodes[kMaxLen + 1];
{
UInt32 code = 0;
UInt32 len;
for (len = 1; len <= kMaxLen; len++)
nextCodes[len] = code = (code + lenCounters[(size_t)len - 1]) << 1;
}
/* if (code + lenCounters[kMaxLen] - 1 != (1 << kMaxLen) - 1) throw 1; */
{
UInt32 k;
for (k = 0; k < numSymbols; k++)
p[k] = nextCodes[lens[k]]++;
}
}
}
}
}
#undef kMaxLen
#undef NUM_BITS
#undef MASK
#undef NUM_COUNTERS
#undef HUFFMAN_SPEED_OPT

23
3rdparty/7z/src/HuffEnc.h vendored Normal file
View file

@ -0,0 +1,23 @@
/* HuffEnc.h -- Huffman encoding
2023-03-05 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_HUFF_ENC_H
#define ZIP7_INC_HUFF_ENC_H
#include "7zTypes.h"
EXTERN_C_BEGIN
/*
Conditions:
num <= 1024 = 2 ^ NUM_BITS
Sum(freqs) < 4M = 2 ^ (32 - NUM_BITS)
maxLen <= 16 = kMaxLen
Num_Items(p) >= HUFFMAN_TEMP_SIZE(num)
*/
void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, UInt32 num, UInt32 maxLen);
EXTERN_C_END
#endif

View file

@ -1,5 +1,5 @@
/* LzFind.c -- Match finder for LZ algorithms /* LzFind.c -- Match finder for LZ algorithms
2021-11-29 : Igor Pavlov : Public domain */ 2023-03-14 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -17,7 +17,7 @@
#define kEmptyHashValue 0 #define kEmptyHashValue 0
#define kMaxValForNormalize ((UInt32)0) #define kMaxValForNormalize ((UInt32)0)
// #define kMaxValForNormalize ((UInt32)(1 << 20) + 0xFFF) // for debug // #define kMaxValForNormalize ((UInt32)(1 << 20) + 0xfff) // for debug
// #define kNormalizeAlign (1 << 7) // alignment for speculated accesses // #define kNormalizeAlign (1 << 7) // alignment for speculated accesses
@ -67,10 +67,10 @@
static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc) static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc)
{ {
if (!p->directInput) // if (!p->directInput)
{ {
ISzAlloc_Free(alloc, p->bufferBase); ISzAlloc_Free(alloc, p->bufBase);
p->bufferBase = NULL; p->bufBase = NULL;
} }
} }
@ -79,7 +79,7 @@ static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr all
{ {
if (blockSize == 0) if (blockSize == 0)
return 0; return 0;
if (!p->bufferBase || p->blockSize != blockSize) if (!p->bufBase || p->blockSize != blockSize)
{ {
// size_t blockSizeT; // size_t blockSizeT;
LzInWindow_Free(p, alloc); LzInWindow_Free(p, alloc);
@ -101,11 +101,11 @@ static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr all
#endif #endif
*/ */
p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, blockSize); p->bufBase = (Byte *)ISzAlloc_Alloc(alloc, blockSize);
// printf("\nbufferBase = %p\n", p->bufferBase); // printf("\nbufferBase = %p\n", p->bufBase);
// return 0; // for debug // return 0; // for debug
} }
return (p->bufferBase != NULL); return (p->bufBase != NULL);
} }
static const Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; } static const Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
@ -113,7 +113,7 @@ static const Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return
static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return GET_AVAIL_BYTES(p); } static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return GET_AVAIL_BYTES(p); }
MY_NO_INLINE Z7_NO_INLINE
static void MatchFinder_ReadBlock(CMatchFinder *p) static void MatchFinder_ReadBlock(CMatchFinder *p)
{ {
if (p->streamEndWasReached || p->result != SZ_OK) if (p->streamEndWasReached || p->result != SZ_OK)
@ -127,8 +127,8 @@ static void MatchFinder_ReadBlock(CMatchFinder *p)
UInt32 curSize = 0xFFFFFFFF - GET_AVAIL_BYTES(p); UInt32 curSize = 0xFFFFFFFF - GET_AVAIL_BYTES(p);
if (curSize > p->directInputRem) if (curSize > p->directInputRem)
curSize = (UInt32)p->directInputRem; curSize = (UInt32)p->directInputRem;
p->directInputRem -= curSize;
p->streamPos += curSize; p->streamPos += curSize;
p->directInputRem -= curSize;
if (p->directInputRem == 0) if (p->directInputRem == 0)
p->streamEndWasReached = 1; p->streamEndWasReached = 1;
return; return;
@ -136,8 +136,8 @@ static void MatchFinder_ReadBlock(CMatchFinder *p)
for (;;) for (;;)
{ {
Byte *dest = p->buffer + GET_AVAIL_BYTES(p); const Byte *dest = p->buffer + GET_AVAIL_BYTES(p);
size_t size = (size_t)(p->bufferBase + p->blockSize - dest); size_t size = (size_t)(p->bufBase + p->blockSize - dest);
if (size == 0) if (size == 0)
{ {
/* we call ReadBlock() after NeedMove() and MoveBlock(). /* we call ReadBlock() after NeedMove() and MoveBlock().
@ -153,7 +153,14 @@ static void MatchFinder_ReadBlock(CMatchFinder *p)
// #define kRead 3 // #define kRead 3
// if (size > kRead) size = kRead; // for debug // if (size > kRead) size = kRead; // for debug
p->result = ISeqInStream_Read(p->stream, dest, &size); /*
// we need cast (Byte *)dest.
#ifdef __clang__
#pragma GCC diagnostic ignored "-Wcast-qual"
#endif
*/
p->result = ISeqInStream_Read(p->stream,
p->bufBase + (dest - p->bufBase), &size);
if (p->result != SZ_OK) if (p->result != SZ_OK)
return; return;
if (size == 0) if (size == 0)
@ -173,14 +180,14 @@ static void MatchFinder_ReadBlock(CMatchFinder *p)
MY_NO_INLINE Z7_NO_INLINE
void MatchFinder_MoveBlock(CMatchFinder *p) void MatchFinder_MoveBlock(CMatchFinder *p)
{ {
const size_t offset = (size_t)(p->buffer - p->bufferBase) - p->keepSizeBefore; const size_t offset = (size_t)(p->buffer - p->bufBase) - p->keepSizeBefore;
const size_t keepBefore = (offset & (kBlockMoveAlign - 1)) + p->keepSizeBefore; const size_t keepBefore = (offset & (kBlockMoveAlign - 1)) + p->keepSizeBefore;
p->buffer = p->bufferBase + keepBefore; p->buffer = p->bufBase + keepBefore;
memmove(p->bufferBase, memmove(p->bufBase,
p->bufferBase + (offset & ~((size_t)kBlockMoveAlign - 1)), p->bufBase + (offset & ~((size_t)kBlockMoveAlign - 1)),
keepBefore + (size_t)GET_AVAIL_BYTES(p)); keepBefore + (size_t)GET_AVAIL_BYTES(p));
} }
@ -198,7 +205,7 @@ int MatchFinder_NeedMove(CMatchFinder *p)
return 0; return 0;
if (p->streamEndWasReached || p->result != SZ_OK) if (p->streamEndWasReached || p->result != SZ_OK)
return 0; return 0;
return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter); return ((size_t)(p->bufBase + p->blockSize - p->buffer) <= p->keepSizeAfter);
} }
void MatchFinder_ReadIfRequired(CMatchFinder *p) void MatchFinder_ReadIfRequired(CMatchFinder *p)
@ -214,6 +221,8 @@ static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
p->cutValue = 32; p->cutValue = 32;
p->btMode = 1; p->btMode = 1;
p->numHashBytes = 4; p->numHashBytes = 4;
p->numHashBytes_Min = 2;
p->numHashOutBits = 0;
p->bigHash = 0; p->bigHash = 0;
} }
@ -222,8 +231,10 @@ static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
void MatchFinder_Construct(CMatchFinder *p) void MatchFinder_Construct(CMatchFinder *p)
{ {
unsigned i; unsigned i;
p->bufferBase = NULL; p->buffer = NULL;
p->bufBase = NULL;
p->directInput = 0; p->directInput = 0;
p->stream = NULL;
p->hash = NULL; p->hash = NULL;
p->expectedDataSize = (UInt64)(Int64)-1; p->expectedDataSize = (UInt64)(Int64)-1;
MatchFinder_SetDefaultSettings(p); MatchFinder_SetDefaultSettings(p);
@ -238,6 +249,8 @@ void MatchFinder_Construct(CMatchFinder *p)
} }
} }
#undef kCrcPoly
static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc) static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc)
{ {
ISzAlloc_Free(alloc, p->hash); ISzAlloc_Free(alloc, p->hash);
@ -252,7 +265,7 @@ void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc)
static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc) static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc)
{ {
size_t sizeInBytes = (size_t)num * sizeof(CLzRef); const size_t sizeInBytes = (size_t)num * sizeof(CLzRef);
if (sizeInBytes / sizeof(CLzRef) != num) if (sizeInBytes / sizeof(CLzRef) != num)
return NULL; return NULL;
return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes); return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes);
@ -298,6 +311,62 @@ static UInt32 GetBlockSize(CMatchFinder *p, UInt32 historySize)
} }
// input is historySize
static UInt32 MatchFinder_GetHashMask2(CMatchFinder *p, UInt32 hs)
{
if (p->numHashBytes == 2)
return (1 << 16) - 1;
if (hs != 0)
hs--;
hs |= (hs >> 1);
hs |= (hs >> 2);
hs |= (hs >> 4);
hs |= (hs >> 8);
// we propagated 16 bits in (hs). Low 16 bits must be set later
if (hs >= (1 << 24))
{
if (p->numHashBytes == 3)
hs = (1 << 24) - 1;
/* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
}
// (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
hs |= (1 << 16) - 1; /* don't change it! */
// bt5: we adjust the size with recommended minimum size
if (p->numHashBytes >= 5)
hs |= (256 << kLzHash_CrcShift_2) - 1;
return hs;
}
// input is historySize
static UInt32 MatchFinder_GetHashMask(CMatchFinder *p, UInt32 hs)
{
if (p->numHashBytes == 2)
return (1 << 16) - 1;
if (hs != 0)
hs--;
hs |= (hs >> 1);
hs |= (hs >> 2);
hs |= (hs >> 4);
hs |= (hs >> 8);
// we propagated 16 bits in (hs). Low 16 bits must be set later
hs >>= 1;
if (hs >= (1 << 24))
{
if (p->numHashBytes == 3)
hs = (1 << 24) - 1;
else
hs >>= 1;
/* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
}
// (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
hs |= (1 << 16) - 1; /* don't change it! */
// bt5: we adjust the size with recommended minimum size
if (p->numHashBytes >= 5)
hs |= (256 << kLzHash_CrcShift_2) - 1;
return hs;
}
int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
ISzAllocPtr alloc) ISzAllocPtr alloc)
@ -318,78 +387,91 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
p->blockSize = 0; p->blockSize = 0;
if (p->directInput || LzInWindow_Create2(p, GetBlockSize(p, historySize), alloc)) if (p->directInput || LzInWindow_Create2(p, GetBlockSize(p, historySize), alloc))
{ {
const UInt32 newCyclicBufferSize = historySize + 1; // do not change it size_t hashSizeSum;
UInt32 hs;
p->matchMaxLen = matchMaxLen;
{ {
// UInt32 hs4; UInt32 hs;
p->fixedHashSize = 0; UInt32 hsCur;
hs = (1 << 16) - 1;
if (p->numHashBytes != 2) if (p->numHashOutBits != 0)
{ {
hs = historySize; unsigned numBits = p->numHashOutBits;
if (hs > p->expectedDataSize) const unsigned nbMax =
hs = (UInt32)p->expectedDataSize; (p->numHashBytes == 2 ? 16 :
if (hs != 0) (p->numHashBytes == 3 ? 24 : 32));
hs--; if (numBits > nbMax)
hs |= (hs >> 1); numBits = nbMax;
hs |= (hs >> 2); if (numBits >= 32)
hs |= (hs >> 4); hs = (UInt32)0 - 1;
hs |= (hs >> 8); else
// we propagated 16 bits in (hs). Low 16 bits must be set later hs = ((UInt32)1 << numBits) - 1;
hs >>= 1;
if (hs >= (1 << 24))
{
if (p->numHashBytes == 3)
hs = (1 << 24) - 1;
else
hs >>= 1;
/* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
}
// hs = ((UInt32)1 << 25) - 1; // for test
// (hash_size >= (1 << 16)) : Required for (numHashBytes > 2) // (hash_size >= (1 << 16)) : Required for (numHashBytes > 2)
hs |= (1 << 16) - 1; /* don't change it! */ hs |= (1 << 16) - 1; /* don't change it! */
// bt5: we adjust the size with recommended minimum size
if (p->numHashBytes >= 5) if (p->numHashBytes >= 5)
hs |= (256 << kLzHash_CrcShift_2) - 1; hs |= (256 << kLzHash_CrcShift_2) - 1;
{
const UInt32 hs2 = MatchFinder_GetHashMask2(p, historySize);
if (hs > hs2)
hs = hs2;
}
hsCur = hs;
if (p->expectedDataSize < historySize)
{
const UInt32 hs2 = MatchFinder_GetHashMask2(p, (UInt32)p->expectedDataSize);
if (hsCur > hs2)
hsCur = hs2;
}
}
else
{
hs = MatchFinder_GetHashMask(p, historySize);
hsCur = hs;
if (p->expectedDataSize < historySize)
{
hsCur = MatchFinder_GetHashMask(p, (UInt32)p->expectedDataSize);
if (hsCur > hs) // is it possible?
hsCur = hs;
}
} }
p->hashMask = hs;
hs++;
/* p->hashMask = hsCur;
hs4 = (1 << 20);
if (hs4 > hs)
hs4 = hs;
// hs4 = (1 << 16); // for test
p->hash4Mask = hs4 - 1;
*/
if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size; hashSizeSum = hs;
if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size; hashSizeSum++;
// if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size; if (hashSizeSum < hs)
hs += p->fixedHashSize; return 0;
{
UInt32 fixedHashSize = 0;
if (p->numHashBytes > 2 && p->numHashBytes_Min <= 2) fixedHashSize += kHash2Size;
if (p->numHashBytes > 3 && p->numHashBytes_Min <= 3) fixedHashSize += kHash3Size;
// if (p->numHashBytes > 4) p->fixedHashSize += hs4; // kHash4Size;
hashSizeSum += fixedHashSize;
p->fixedHashSize = fixedHashSize;
}
} }
p->matchMaxLen = matchMaxLen;
{ {
size_t newSize; size_t newSize;
size_t numSons; size_t numSons;
const UInt32 newCyclicBufferSize = historySize + 1; // do not change it
p->historySize = historySize; p->historySize = historySize;
p->hashSizeSum = hs;
p->cyclicBufferSize = newCyclicBufferSize; // it must be = (historySize + 1) p->cyclicBufferSize = newCyclicBufferSize; // it must be = (historySize + 1)
numSons = newCyclicBufferSize; numSons = newCyclicBufferSize;
if (p->btMode) if (p->btMode)
numSons <<= 1; numSons <<= 1;
newSize = hs + numSons; newSize = hashSizeSum + numSons;
if (numSons < newCyclicBufferSize || newSize < numSons)
return 0;
// aligned size is not required here, but it can be better for some loops // aligned size is not required here, but it can be better for some loops
#define NUM_REFS_ALIGN_MASK 0xF #define NUM_REFS_ALIGN_MASK 0xF
newSize = (newSize + NUM_REFS_ALIGN_MASK) & ~(size_t)NUM_REFS_ALIGN_MASK; newSize = (newSize + NUM_REFS_ALIGN_MASK) & ~(size_t)NUM_REFS_ALIGN_MASK;
if (p->hash && p->numRefs == newSize) // 22.02: we don't reallocate buffer, if old size is enough
if (p->hash && p->numRefs >= newSize)
return 1; return 1;
MatchFinder_FreeThisClassMemory(p, alloc); MatchFinder_FreeThisClassMemory(p, alloc);
@ -398,7 +480,7 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
if (p->hash) if (p->hash)
{ {
p->son = p->hash + p->hashSizeSum; p->son = p->hash + hashSizeSum;
return 1; return 1;
} }
} }
@ -470,7 +552,8 @@ void MatchFinder_Init_HighHash(CMatchFinder *p)
void MatchFinder_Init_4(CMatchFinder *p) void MatchFinder_Init_4(CMatchFinder *p)
{ {
p->buffer = p->bufferBase; if (!p->directInput)
p->buffer = p->bufBase;
{ {
/* kEmptyHashValue = 0 (Zero) is used in hash tables as NO-VALUE marker. /* kEmptyHashValue = 0 (Zero) is used in hash tables as NO-VALUE marker.
the code in CMatchFinderMt expects (pos = 1) */ the code in CMatchFinderMt expects (pos = 1) */
@ -507,20 +590,20 @@ void MatchFinder_Init(CMatchFinder *p)
#ifdef MY_CPU_X86_OR_AMD64 #ifdef MY_CPU_X86_OR_AMD64
#if defined(__clang__) && (__clang_major__ >= 8) \ #if defined(__clang__) && (__clang_major__ >= 4) \
|| defined(__GNUC__) && (__GNUC__ >= 8) \ || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40701)
|| defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900) // || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900)
#define USE_SATUR_SUB_128
#define USE_AVX2 #define USE_LZFIND_SATUR_SUB_128
#define ATTRIB_SSE41 __attribute__((__target__("sse4.1"))) #define USE_LZFIND_SATUR_SUB_256
#define ATTRIB_AVX2 __attribute__((__target__("avx2"))) #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("sse4.1")))
#define LZFIND_ATTRIB_AVX2 __attribute__((__target__("avx2")))
#elif defined(_MSC_VER) #elif defined(_MSC_VER)
#if (_MSC_VER >= 1600) #if (_MSC_VER >= 1600)
#define USE_SATUR_SUB_128 #define USE_LZFIND_SATUR_SUB_128
#if (_MSC_VER >= 1900) #endif
#define USE_AVX2 #if (_MSC_VER >= 1900)
#include <immintrin.h> // avx #define USE_LZFIND_SATUR_SUB_256
#endif
#endif #endif
#endif #endif
@ -529,16 +612,16 @@ void MatchFinder_Init(CMatchFinder *p)
#if defined(__clang__) && (__clang_major__ >= 8) \ #if defined(__clang__) && (__clang_major__ >= 8) \
|| defined(__GNUC__) && (__GNUC__ >= 8) || defined(__GNUC__) && (__GNUC__ >= 8)
#define USE_SATUR_SUB_128 #define USE_LZFIND_SATUR_SUB_128
#ifdef MY_CPU_ARM64 #ifdef MY_CPU_ARM64
// #define ATTRIB_SSE41 __attribute__((__target__(""))) // #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("")))
#else #else
// #define ATTRIB_SSE41 __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) // #define LZFIND_ATTRIB_SSE41 __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
#endif #endif
#elif defined(_MSC_VER) #elif defined(_MSC_VER)
#if (_MSC_VER >= 1910) #if (_MSC_VER >= 1910)
#define USE_SATUR_SUB_128 #define USE_LZFIND_SATUR_SUB_128
#endif #endif
#endif #endif
@ -550,121 +633,130 @@ void MatchFinder_Init(CMatchFinder *p)
#endif #endif
/*
#ifndef ATTRIB_SSE41
#define ATTRIB_SSE41
#endif
#ifndef ATTRIB_AVX2
#define ATTRIB_AVX2
#endif
*/
#ifdef USE_SATUR_SUB_128 #ifdef USE_LZFIND_SATUR_SUB_128
// #define _SHOW_HW_STATUS // #define Z7_SHOW_HW_STATUS
#ifdef _SHOW_HW_STATUS #ifdef Z7_SHOW_HW_STATUS
#include <stdio.h> #include <stdio.h>
#define _PRF(x) x #define PRF(x) x
_PRF(;) PRF(;)
#else #else
#define _PRF(x) #define PRF(x)
#endif #endif
#ifdef MY_CPU_ARM_OR_ARM64 #ifdef MY_CPU_ARM_OR_ARM64
#ifdef MY_CPU_ARM64 #ifdef MY_CPU_ARM64
// #define FORCE_SATUR_SUB_128 // #define FORCE_LZFIND_SATUR_SUB_128
#endif #endif
typedef uint32x4_t LzFind_v128;
#define SASUB_128_V(v, s) \
vsubq_u32(vmaxq_u32(v, s), s)
typedef uint32x4_t v128; #else // MY_CPU_ARM_OR_ARM64
#define SASUB_128(i) \
*(v128 *)(void *)(items + (i) * 4) = \
vsubq_u32(vmaxq_u32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2);
#else
#include <smmintrin.h> // sse4.1 #include <smmintrin.h> // sse4.1
typedef __m128i v128; typedef __m128i LzFind_v128;
// SSE 4.1
#define SASUB_128_V(v, s) \
_mm_sub_epi32(_mm_max_epu32(v, s), s)
#endif // MY_CPU_ARM_OR_ARM64
#define SASUB_128(i) \ #define SASUB_128(i) \
*(v128 *)(void *)(items + (i) * 4) = \ *( LzFind_v128 *)( void *)(items + (i) * 4) = SASUB_128_V( \
_mm_sub_epi32(_mm_max_epu32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2); // SSE 4.1 *(const LzFind_v128 *)(const void *)(items + (i) * 4), sub2);
#endif
Z7_NO_INLINE
MY_NO_INLINE
static static
#ifdef ATTRIB_SSE41 #ifdef LZFIND_ATTRIB_SSE41
ATTRIB_SSE41 LZFIND_ATTRIB_SSE41
#endif #endif
void void
MY_FAST_CALL Z7_FASTCALL
LzFind_SaturSub_128(UInt32 subValue, CLzRef *items, const CLzRef *lim) LzFind_SaturSub_128(UInt32 subValue, CLzRef *items, const CLzRef *lim)
{ {
v128 sub2 = const LzFind_v128 sub2 =
#ifdef MY_CPU_ARM_OR_ARM64 #ifdef MY_CPU_ARM_OR_ARM64
vdupq_n_u32(subValue); vdupq_n_u32(subValue);
#else #else
_mm_set_epi32((Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue); _mm_set_epi32((Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue);
#endif #endif
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
do do
{ {
SASUB_128(0) SASUB_128(0) SASUB_128(1) items += 2 * 4;
SASUB_128(1) SASUB_128(0) SASUB_128(1) items += 2 * 4;
SASUB_128(2)
SASUB_128(3)
items += 4 * 4;
} }
while (items != lim); while (items != lim);
} }
#ifdef USE_AVX2 #ifdef USE_LZFIND_SATUR_SUB_256
#include <immintrin.h> // avx #include <immintrin.h> // avx
/*
clang :immintrin.h uses
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__AVX2__)
#include <avx2intrin.h>
#endif
so we need <avxintrin.h> for clang-cl */
#define SASUB_256(i) *(__m256i *)(void *)(items + (i) * 8) = _mm256_sub_epi32(_mm256_max_epu32(*(const __m256i *)(const void *)(items + (i) * 8), sub2), sub2); // AVX2 #if defined(__clang__)
#include <avxintrin.h>
#include <avx2intrin.h>
#endif
MY_NO_INLINE // AVX2:
#define SASUB_256(i) \
*( __m256i *)( void *)(items + (i) * 8) = \
_mm256_sub_epi32(_mm256_max_epu32( \
*(const __m256i *)(const void *)(items + (i) * 8), sub2), sub2);
Z7_NO_INLINE
static static
#ifdef ATTRIB_AVX2 #ifdef LZFIND_ATTRIB_AVX2
ATTRIB_AVX2 LZFIND_ATTRIB_AVX2
#endif #endif
void void
MY_FAST_CALL Z7_FASTCALL
LzFind_SaturSub_256(UInt32 subValue, CLzRef *items, const CLzRef *lim) LzFind_SaturSub_256(UInt32 subValue, CLzRef *items, const CLzRef *lim)
{ {
__m256i sub2 = _mm256_set_epi32( const __m256i sub2 = _mm256_set_epi32(
(Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue,
(Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue); (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue);
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
do do
{ {
SASUB_256(0) SASUB_256(0) SASUB_256(1) items += 2 * 8;
SASUB_256(1) SASUB_256(0) SASUB_256(1) items += 2 * 8;
items += 2 * 8;
} }
while (items != lim); while (items != lim);
} }
#endif // USE_AVX2 #endif // USE_LZFIND_SATUR_SUB_256
#ifndef FORCE_SATUR_SUB_128 #ifndef FORCE_LZFIND_SATUR_SUB_128
typedef void (MY_FAST_CALL *LZFIND_SATUR_SUB_CODE_FUNC)( typedef void (Z7_FASTCALL *LZFIND_SATUR_SUB_CODE_FUNC)(
UInt32 subValue, CLzRef *items, const CLzRef *lim); UInt32 subValue, CLzRef *items, const CLzRef *lim);
static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub; static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub;
#endif // FORCE_SATUR_SUB_128 #endif // FORCE_LZFIND_SATUR_SUB_128
#endif // USE_SATUR_SUB_128 #endif // USE_LZFIND_SATUR_SUB_128
// kEmptyHashValue must be zero // kEmptyHashValue must be zero
// #define SASUB_32(i) v = items[i]; m = v - subValue; if (v < subValue) m = kEmptyHashValue; items[i] = m; // #define SASUB_32(i) { UInt32 v = items[i]; UInt32 m = v - subValue; if (v < subValue) m = kEmptyHashValue; items[i] = m; }
#define SASUB_32(i) v = items[i]; if (v < subValue) v = subValue; items[i] = v - subValue; #define SASUB_32(i) { UInt32 v = items[i]; if (v < subValue) v = subValue; items[i] = v - subValue; }
#ifdef FORCE_SATUR_SUB_128 #ifdef FORCE_LZFIND_SATUR_SUB_128
#define DEFAULT_SaturSub LzFind_SaturSub_128 #define DEFAULT_SaturSub LzFind_SaturSub_128
@ -672,24 +764,19 @@ static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub;
#define DEFAULT_SaturSub LzFind_SaturSub_32 #define DEFAULT_SaturSub LzFind_SaturSub_32
MY_NO_INLINE Z7_NO_INLINE
static static
void void
MY_FAST_CALL Z7_FASTCALL
LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim) LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim)
{ {
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
do do
{ {
UInt32 v; SASUB_32(0) SASUB_32(1) items += 2;
SASUB_32(0) SASUB_32(0) SASUB_32(1) items += 2;
SASUB_32(1) SASUB_32(0) SASUB_32(1) items += 2;
SASUB_32(2) SASUB_32(0) SASUB_32(1) items += 2;
SASUB_32(3)
SASUB_32(4)
SASUB_32(5)
SASUB_32(6)
SASUB_32(7)
items += 8;
} }
while (items != lim); while (items != lim);
} }
@ -697,27 +784,23 @@ LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim)
#endif #endif
MY_NO_INLINE Z7_NO_INLINE
void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems) void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
{ {
#define K_NORM_ALIGN_BLOCK_SIZE (1 << 6) #define LZFIND_NORM_ALIGN_BLOCK_SIZE (1 << 7)
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
CLzRef *lim; for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (LZFIND_NORM_ALIGN_BLOCK_SIZE - 1)) != 0; numItems--)
for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (K_NORM_ALIGN_BLOCK_SIZE - 1)) != 0; numItems--)
{ {
UInt32 v; SASUB_32(0)
SASUB_32(0);
items++; items++;
} }
{ {
#define K_NORM_ALIGN_MASK (K_NORM_ALIGN_BLOCK_SIZE / 4 - 1) const size_t k_Align_Mask = (LZFIND_NORM_ALIGN_BLOCK_SIZE / 4 - 1);
lim = items + (numItems & ~(size_t)K_NORM_ALIGN_MASK); CLzRef *lim = items + (numItems & ~(size_t)k_Align_Mask);
numItems &= K_NORM_ALIGN_MASK; numItems &= k_Align_Mask;
if (items != lim) if (items != lim)
{ {
#if defined(USE_SATUR_SUB_128) && !defined(FORCE_SATUR_SUB_128) #if defined(USE_LZFIND_SATUR_SUB_128) && !defined(FORCE_LZFIND_SATUR_SUB_128)
if (g_LzFind_SaturSub) if (g_LzFind_SaturSub)
g_LzFind_SaturSub(subValue, items, lim); g_LzFind_SaturSub(subValue, items, lim);
else else
@ -726,12 +809,10 @@ void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
} }
items = lim; items = lim;
} }
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
for (; numItems != 0; numItems--) for (; numItems != 0; numItems--)
{ {
UInt32 v; SASUB_32(0)
SASUB_32(0);
items++; items++;
} }
} }
@ -740,7 +821,7 @@ void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
// call MatchFinder_CheckLimits() only after (p->pos++) update // call MatchFinder_CheckLimits() only after (p->pos++) update
MY_NO_INLINE Z7_NO_INLINE
static void MatchFinder_CheckLimits(CMatchFinder *p) static void MatchFinder_CheckLimits(CMatchFinder *p)
{ {
if (// !p->streamEndWasReached && p->result == SZ_OK && if (// !p->streamEndWasReached && p->result == SZ_OK &&
@ -768,11 +849,14 @@ static void MatchFinder_CheckLimits(CMatchFinder *p)
const UInt32 subValue = (p->pos - p->historySize - 1) /* & ~(UInt32)(kNormalizeAlign - 1) */; const UInt32 subValue = (p->pos - p->historySize - 1) /* & ~(UInt32)(kNormalizeAlign - 1) */;
// const UInt32 subValue = (1 << 15); // for debug // const UInt32 subValue = (1 << 15); // for debug
// printf("\nMatchFinder_Normalize() subValue == 0x%x\n", subValue); // printf("\nMatchFinder_Normalize() subValue == 0x%x\n", subValue);
size_t numSonRefs = p->cyclicBufferSize; MatchFinder_REDUCE_OFFSETS(p, subValue)
if (p->btMode) MatchFinder_Normalize3(subValue, p->hash, (size_t)p->hashMask + 1 + p->fixedHashSize);
numSonRefs <<= 1; {
Inline_MatchFinder_ReduceOffsets(p, subValue); size_t numSonRefs = p->cyclicBufferSize;
MatchFinder_Normalize3(subValue, p->hash, (size_t)p->hashSizeSum + numSonRefs); if (p->btMode)
numSonRefs <<= 1;
MatchFinder_Normalize3(subValue, p->son, numSonRefs);
}
} }
if (p->cyclicBufferPos == p->cyclicBufferSize) if (p->cyclicBufferPos == p->cyclicBufferSize)
@ -785,7 +869,7 @@ static void MatchFinder_CheckLimits(CMatchFinder *p)
/* /*
(lenLimit > maxLen) (lenLimit > maxLen)
*/ */
MY_FORCE_INLINE Z7_FORCE_INLINE
static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
UInt32 *d, unsigned maxLen) UInt32 *d, unsigned maxLen)
@ -867,7 +951,7 @@ static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos,
} }
MY_FORCE_INLINE Z7_FORCE_INLINE
UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
UInt32 *d, UInt32 maxLen) UInt32 *d, UInt32 maxLen)
@ -1004,7 +1088,7 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const
#define MOVE_POS_RET MOVE_POS return distances; #define MOVE_POS_RET MOVE_POS return distances;
MY_NO_INLINE Z7_NO_INLINE
static void MatchFinder_MovePos(CMatchFinder *p) static void MatchFinder_MovePos(CMatchFinder *p)
{ {
/* we go here at the end of stream data, when (avail < num_hash_bytes) /* we go here at the end of stream data, when (avail < num_hash_bytes)
@ -1015,11 +1099,11 @@ static void MatchFinder_MovePos(CMatchFinder *p)
if (p->btMode) if (p->btMode)
p->sons[(p->cyclicBufferPos << p->btMode) + 1] = 0; // kEmptyHashValue p->sons[(p->cyclicBufferPos << p->btMode) + 1] = 0; // kEmptyHashValue
*/ */
MOVE_POS; MOVE_POS
} }
#define GET_MATCHES_HEADER2(minLen, ret_op) \ #define GET_MATCHES_HEADER2(minLen, ret_op) \
unsigned lenLimit; UInt32 hv; Byte *cur; UInt32 curMatch; \ unsigned lenLimit; UInt32 hv; const Byte *cur; UInt32 curMatch; \
lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \ lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
cur = p->buffer; cur = p->buffer;
@ -1028,11 +1112,11 @@ static void MatchFinder_MovePos(CMatchFinder *p)
#define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue #define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
#define SKIP_FOOTER SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS; } while (--num); #define SKIP_FOOTER SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS } while (--num);
#define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \ #define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \
distances = func(MF_PARAMS(p), \ distances = func(MF_PARAMS(p), \
distances, (UInt32)_maxLen_); MOVE_POS_RET; distances, (UInt32)_maxLen_); MOVE_POS_RET
#define GET_MATCHES_FOOTER_BT(_maxLen_) \ #define GET_MATCHES_FOOTER_BT(_maxLen_) \
GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1) GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1)
@ -1052,7 +1136,7 @@ static void MatchFinder_MovePos(CMatchFinder *p)
static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{ {
GET_MATCHES_HEADER(2) GET_MATCHES_HEADER(2)
HASH2_CALC; HASH2_CALC
curMatch = p->hash[hv]; curMatch = p->hash[hv];
p->hash[hv] = p->pos; p->hash[hv] = p->pos;
GET_MATCHES_FOOTER_BT(1) GET_MATCHES_FOOTER_BT(1)
@ -1061,7 +1145,7 @@ static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{ {
GET_MATCHES_HEADER(3) GET_MATCHES_HEADER(3)
HASH_ZIP_CALC; HASH_ZIP_CALC
curMatch = p->hash[hv]; curMatch = p->hash[hv];
p->hash[hv] = p->pos; p->hash[hv] = p->pos;
GET_MATCHES_FOOTER_BT(2) GET_MATCHES_FOOTER_BT(2)
@ -1082,7 +1166,7 @@ static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
UInt32 *hash; UInt32 *hash;
GET_MATCHES_HEADER(3) GET_MATCHES_HEADER(3)
HASH3_CALC; HASH3_CALC
hash = p->hash; hash = p->hash;
pos = p->pos; pos = p->pos;
@ -1107,7 +1191,7 @@ static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
if (maxLen == lenLimit) if (maxLen == lenLimit)
{ {
SkipMatchesSpec(MF_PARAMS(p)); SkipMatchesSpec(MF_PARAMS(p));
MOVE_POS_RET; MOVE_POS_RET
} }
} }
@ -1123,7 +1207,7 @@ static UInt32* Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
UInt32 *hash; UInt32 *hash;
GET_MATCHES_HEADER(4) GET_MATCHES_HEADER(4)
HASH4_CALC; HASH4_CALC
hash = p->hash; hash = p->hash;
pos = p->pos; pos = p->pos;
@ -1190,7 +1274,7 @@ static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
UInt32 *hash; UInt32 *hash;
GET_MATCHES_HEADER(5) GET_MATCHES_HEADER(5)
HASH5_CALC; HASH5_CALC
hash = p->hash; hash = p->hash;
pos = p->pos; pos = p->pos;
@ -1246,7 +1330,7 @@ static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
if (maxLen == lenLimit) if (maxLen == lenLimit)
{ {
SkipMatchesSpec(MF_PARAMS(p)); SkipMatchesSpec(MF_PARAMS(p));
MOVE_POS_RET; MOVE_POS_RET
} }
break; break;
} }
@ -1263,7 +1347,7 @@ static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
UInt32 *hash; UInt32 *hash;
GET_MATCHES_HEADER(4) GET_MATCHES_HEADER(4)
HASH4_CALC; HASH4_CALC
hash = p->hash; hash = p->hash;
pos = p->pos; pos = p->pos;
@ -1314,12 +1398,12 @@ static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
if (maxLen == lenLimit) if (maxLen == lenLimit)
{ {
p->son[p->cyclicBufferPos] = curMatch; p->son[p->cyclicBufferPos] = curMatch;
MOVE_POS_RET; MOVE_POS_RET
} }
break; break;
} }
GET_MATCHES_FOOTER_HC(maxLen); GET_MATCHES_FOOTER_HC(maxLen)
} }
@ -1330,7 +1414,7 @@ static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
UInt32 *hash; UInt32 *hash;
GET_MATCHES_HEADER(5) GET_MATCHES_HEADER(5)
HASH5_CALC; HASH5_CALC
hash = p->hash; hash = p->hash;
pos = p->pos; pos = p->pos;
@ -1386,19 +1470,19 @@ static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
if (maxLen == lenLimit) if (maxLen == lenLimit)
{ {
p->son[p->cyclicBufferPos] = curMatch; p->son[p->cyclicBufferPos] = curMatch;
MOVE_POS_RET; MOVE_POS_RET
} }
break; break;
} }
GET_MATCHES_FOOTER_HC(maxLen); GET_MATCHES_FOOTER_HC(maxLen)
} }
UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
{ {
GET_MATCHES_HEADER(3) GET_MATCHES_HEADER(3)
HASH_ZIP_CALC; HASH_ZIP_CALC
curMatch = p->hash[hv]; curMatch = p->hash[hv];
p->hash[hv] = p->pos; p->hash[hv] = p->pos;
GET_MATCHES_FOOTER_HC(2) GET_MATCHES_FOOTER_HC(2)
@ -1409,7 +1493,7 @@ static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{ {
SKIP_HEADER(2) SKIP_HEADER(2)
{ {
HASH2_CALC; HASH2_CALC
curMatch = p->hash[hv]; curMatch = p->hash[hv];
p->hash[hv] = p->pos; p->hash[hv] = p->pos;
} }
@ -1420,7 +1504,7 @@ void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{ {
SKIP_HEADER(3) SKIP_HEADER(3)
{ {
HASH_ZIP_CALC; HASH_ZIP_CALC
curMatch = p->hash[hv]; curMatch = p->hash[hv];
p->hash[hv] = p->pos; p->hash[hv] = p->pos;
} }
@ -1433,7 +1517,7 @@ static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{ {
UInt32 h2; UInt32 h2;
UInt32 *hash; UInt32 *hash;
HASH3_CALC; HASH3_CALC
hash = p->hash; hash = p->hash;
curMatch = (hash + kFix3HashSize)[hv]; curMatch = (hash + kFix3HashSize)[hv];
hash[h2] = hash[h2] =
@ -1448,7 +1532,7 @@ static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{ {
UInt32 h2, h3; UInt32 h2, h3;
UInt32 *hash; UInt32 *hash;
HASH4_CALC; HASH4_CALC
hash = p->hash; hash = p->hash;
curMatch = (hash + kFix4HashSize)[hv]; curMatch = (hash + kFix4HashSize)[hv];
hash [h2] = hash [h2] =
@ -1464,7 +1548,7 @@ static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{ {
UInt32 h2, h3; UInt32 h2, h3;
UInt32 *hash; UInt32 *hash;
HASH5_CALC; HASH5_CALC
hash = p->hash; hash = p->hash;
curMatch = (hash + kFix5HashSize)[hv]; curMatch = (hash + kFix5HashSize)[hv];
hash [h2] = hash [h2] =
@ -1478,7 +1562,7 @@ static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
#define HC_SKIP_HEADER(minLen) \ #define HC_SKIP_HEADER(minLen) \
do { if (p->lenLimit < minLen) { MatchFinder_MovePos(p); num--; continue; } { \ do { if (p->lenLimit < minLen) { MatchFinder_MovePos(p); num--; continue; } { \
Byte *cur; \ const Byte *cur; \
UInt32 *hash; \ UInt32 *hash; \
UInt32 *son; \ UInt32 *son; \
UInt32 pos = p->pos; \ UInt32 pos = p->pos; \
@ -1510,7 +1594,7 @@ static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
HC_SKIP_HEADER(4) HC_SKIP_HEADER(4)
UInt32 h2, h3; UInt32 h2, h3;
HASH4_CALC; HASH4_CALC
curMatch = (hash + kFix4HashSize)[hv]; curMatch = (hash + kFix4HashSize)[hv];
hash [h2] = hash [h2] =
(hash + kFix3HashSize)[h3] = (hash + kFix3HashSize)[h3] =
@ -1540,7 +1624,7 @@ void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
{ {
HC_SKIP_HEADER(3) HC_SKIP_HEADER(3)
HASH_ZIP_CALC; HASH_ZIP_CALC
curMatch = hash[hv]; curMatch = hash[hv];
hash[hv] = pos; hash[hv] = pos;
@ -1590,17 +1674,17 @@ void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable)
void LzFindPrepare() void LzFindPrepare(void)
{ {
#ifndef FORCE_SATUR_SUB_128 #ifndef FORCE_LZFIND_SATUR_SUB_128
#ifdef USE_SATUR_SUB_128 #ifdef USE_LZFIND_SATUR_SUB_128
LZFIND_SATUR_SUB_CODE_FUNC f = NULL; LZFIND_SATUR_SUB_CODE_FUNC f = NULL;
#ifdef MY_CPU_ARM_OR_ARM64 #ifdef MY_CPU_ARM_OR_ARM64
{ {
if (CPU_IsSupported_NEON()) if (CPU_IsSupported_NEON())
{ {
// #pragma message ("=== LzFind NEON") // #pragma message ("=== LzFind NEON")
_PRF(printf("\n=== LzFind NEON\n")); PRF(printf("\n=== LzFind NEON\n"));
f = LzFind_SaturSub_128; f = LzFind_SaturSub_128;
} }
// f = 0; // for debug // f = 0; // for debug
@ -1609,20 +1693,25 @@ void LzFindPrepare()
if (CPU_IsSupported_SSE41()) if (CPU_IsSupported_SSE41())
{ {
// #pragma message ("=== LzFind SSE41") // #pragma message ("=== LzFind SSE41")
_PRF(printf("\n=== LzFind SSE41\n")); PRF(printf("\n=== LzFind SSE41\n"));
f = LzFind_SaturSub_128; f = LzFind_SaturSub_128;
#ifdef USE_AVX2 #ifdef USE_LZFIND_SATUR_SUB_256
if (CPU_IsSupported_AVX2()) if (CPU_IsSupported_AVX2())
{ {
// #pragma message ("=== LzFind AVX2") // #pragma message ("=== LzFind AVX2")
_PRF(printf("\n=== LzFind AVX2\n")); PRF(printf("\n=== LzFind AVX2\n"));
f = LzFind_SaturSub_256; f = LzFind_SaturSub_256;
} }
#endif #endif
} }
#endif // MY_CPU_ARM_OR_ARM64 #endif // MY_CPU_ARM_OR_ARM64
g_LzFind_SaturSub = f; g_LzFind_SaturSub = f;
#endif // USE_SATUR_SUB_128 #endif // USE_LZFIND_SATUR_SUB_128
#endif // FORCE_SATUR_SUB_128 #endif // FORCE_LZFIND_SATUR_SUB_128
} }
#undef MOVE_POS
#undef MOVE_POS_RET
#undef PRF

View file

@ -1,8 +1,8 @@
/* LzFind.h -- Match finder for LZ algorithms /* LzFind.h -- Match finder for LZ algorithms
2021-07-13 : Igor Pavlov : Public domain */ 2023-03-04 : Igor Pavlov : Public domain */
#ifndef __LZ_FIND_H #ifndef ZIP7_INC_LZ_FIND_H
#define __LZ_FIND_H #define ZIP7_INC_LZ_FIND_H
#include "7zTypes.h" #include "7zTypes.h"
@ -10,9 +10,9 @@ EXTERN_C_BEGIN
typedef UInt32 CLzRef; typedef UInt32 CLzRef;
typedef struct _CMatchFinder typedef struct
{ {
Byte *buffer; const Byte *buffer;
UInt32 pos; UInt32 pos;
UInt32 posLimit; UInt32 posLimit;
UInt32 streamPos; /* wrap over Zero is allowed (streamPos < pos). Use (UInt32)(streamPos - pos) */ UInt32 streamPos; /* wrap over Zero is allowed (streamPos < pos). Use (UInt32)(streamPos - pos) */
@ -32,8 +32,8 @@ typedef struct _CMatchFinder
UInt32 hashMask; UInt32 hashMask;
UInt32 cutValue; UInt32 cutValue;
Byte *bufferBase; Byte *bufBase;
ISeqInStream *stream; ISeqInStreamPtr stream;
UInt32 blockSize; UInt32 blockSize;
UInt32 keepSizeBefore; UInt32 keepSizeBefore;
@ -43,7 +43,9 @@ typedef struct _CMatchFinder
size_t directInputRem; size_t directInputRem;
UInt32 historySize; UInt32 historySize;
UInt32 fixedHashSize; UInt32 fixedHashSize;
UInt32 hashSizeSum; Byte numHashBytes_Min;
Byte numHashOutBits;
Byte _pad2_[2];
SRes result; SRes result;
UInt32 crc[256]; UInt32 crc[256];
size_t numRefs; size_t numRefs;
@ -69,24 +71,45 @@ void MatchFinder_ReadIfRequired(CMatchFinder *p);
void MatchFinder_Construct(CMatchFinder *p); void MatchFinder_Construct(CMatchFinder *p);
/* Conditions: /* (directInput = 0) is default value.
historySize <= 3 GB It's required to provide correct (directInput) value
keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB before calling MatchFinder_Create().
You can set (directInput) by any of the following calls:
- MatchFinder_SET_DIRECT_INPUT_BUF()
- MatchFinder_SET_STREAM()
- MatchFinder_SET_STREAM_MODE()
*/ */
#define MatchFinder_SET_DIRECT_INPUT_BUF(p, _src_, _srcLen_) { \
(p)->stream = NULL; \
(p)->directInput = 1; \
(p)->buffer = (_src_); \
(p)->directInputRem = (_srcLen_); }
/*
#define MatchFinder_SET_STREAM_MODE(p) { \
(p)->directInput = 0; }
*/
#define MatchFinder_SET_STREAM(p, _stream_) { \
(p)->stream = _stream_; \
(p)->directInput = 0; }
int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
ISzAllocPtr alloc); ISzAllocPtr alloc);
void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc); void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc);
void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems); void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems);
// void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
/* /*
#define Inline_MatchFinder_InitPos(p, val) \ #define MatchFinder_INIT_POS(p, val) \
(p)->pos = (val); \ (p)->pos = (val); \
(p)->streamPos = (val); (p)->streamPos = (val);
*/ */
#define Inline_MatchFinder_ReduceOffsets(p, subValue) \ // void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
#define MatchFinder_REDUCE_OFFSETS(p, subValue) \
(p)->pos -= (subValue); \ (p)->pos -= (subValue); \
(p)->streamPos -= (subValue); (p)->streamPos -= (subValue);
@ -107,7 +130,7 @@ typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object);
typedef UInt32 * (*Mf_GetMatches_Func)(void *object, UInt32 *distances); typedef UInt32 * (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
typedef void (*Mf_Skip_Func)(void *object, UInt32); typedef void (*Mf_Skip_Func)(void *object, UInt32);
typedef struct _IMatchFinder typedef struct
{ {
Mf_Init_Func Init; Mf_Init_Func Init;
Mf_GetNumAvailableBytes_Func GetNumAvailableBytes; Mf_GetNumAvailableBytes_Func GetNumAvailableBytes;

View file

@ -1,5 +1,5 @@
/* LzFindMt.c -- multithreaded Match finder for LZ algorithms /* LzFindMt.c -- multithreaded Match finder for LZ algorithms
2021-12-21 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -69,7 +69,7 @@ extern UInt64 g_NumIters_Bytes;
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); } h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
#define __MT_HASH4_CALC { \ #define MT_HASH4_CALC { \
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
h2 = temp & (kHash2Size - 1); \ h2 = temp & (kHash2Size - 1); \
temp ^= ((UInt32)cur[2] << 8); \ temp ^= ((UInt32)cur[2] << 8); \
@ -79,14 +79,14 @@ extern UInt64 g_NumIters_Bytes;
*/ */
MY_NO_INLINE Z7_NO_INLINE
static void MtSync_Construct(CMtSync *p) static void MtSync_Construct(CMtSync *p)
{ {
p->affinity = 0; p->affinity = 0;
p->wasCreated = False; p->wasCreated = False;
p->csWasInitialized = False; p->csWasInitialized = False;
p->csWasEntered = False; p->csWasEntered = False;
Thread_Construct(&p->thread); Thread_CONSTRUCT(&p->thread)
Event_Construct(&p->canStart); Event_Construct(&p->canStart);
Event_Construct(&p->wasStopped); Event_Construct(&p->wasStopped);
Semaphore_Construct(&p->freeSemaphore); Semaphore_Construct(&p->freeSemaphore);
@ -116,7 +116,7 @@ static void MtSync_Construct(CMtSync *p)
(p)->csWasEntered = False; } (p)->csWasEntered = False; }
MY_NO_INLINE Z7_NO_INLINE
static UInt32 MtSync_GetNextBlock(CMtSync *p) static UInt32 MtSync_GetNextBlock(CMtSync *p)
{ {
UInt32 numBlocks = 0; UInt32 numBlocks = 0;
@ -140,14 +140,14 @@ static UInt32 MtSync_GetNextBlock(CMtSync *p)
// buffer is UNLOCKED here // buffer is UNLOCKED here
Semaphore_Wait(&p->filledSemaphore); Semaphore_Wait(&p->filledSemaphore);
LOCK_BUFFER(p); LOCK_BUFFER(p)
return numBlocks; return numBlocks;
} }
/* if Writing (Processing) thread was started, we must call MtSync_StopWriting() */ /* if Writing (Processing) thread was started, we must call MtSync_StopWriting() */
MY_NO_INLINE Z7_NO_INLINE
static void MtSync_StopWriting(CMtSync *p) static void MtSync_StopWriting(CMtSync *p)
{ {
if (!Thread_WasCreated(&p->thread) || p->needStart) if (!Thread_WasCreated(&p->thread) || p->needStart)
@ -185,7 +185,7 @@ static void MtSync_StopWriting(CMtSync *p)
} }
MY_NO_INLINE Z7_NO_INLINE
static void MtSync_Destruct(CMtSync *p) static void MtSync_Destruct(CMtSync *p)
{ {
PRF(printf("\nMtSync_Destruct %p\n", p)); PRF(printf("\nMtSync_Destruct %p\n", p));
@ -220,11 +220,11 @@ static void MtSync_Destruct(CMtSync *p)
// #define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; } // #define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; }
// we want to get real system error codes here instead of SZ_ERROR_THREAD // we want to get real system error codes here instead of SZ_ERROR_THREAD
#define RINOK_THREAD(x) RINOK(x) #define RINOK_THREAD(x) RINOK_WRes(x)
// call it before each new file (when new starting is required): // call it before each new file (when new starting is required):
MY_NO_INLINE Z7_NO_INLINE
static SRes MtSync_Init(CMtSync *p, UInt32 numBlocks) static SRes MtSync_Init(CMtSync *p, UInt32 numBlocks)
{ {
WRes wres; WRes wres;
@ -245,12 +245,12 @@ static WRes MtSync_Create_WRes(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *
if (p->wasCreated) if (p->wasCreated)
return SZ_OK; return SZ_OK;
RINOK_THREAD(CriticalSection_Init(&p->cs)); RINOK_THREAD(CriticalSection_Init(&p->cs))
p->csWasInitialized = True; p->csWasInitialized = True;
p->csWasEntered = False; p->csWasEntered = False;
RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart)); RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart))
RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped)); RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped))
p->needStart = True; p->needStart = True;
p->exit = True; /* p->exit is unused before (canStart) Event. p->exit = True; /* p->exit is unused before (canStart) Event.
@ -264,13 +264,13 @@ static WRes MtSync_Create_WRes(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *
else else
wres = Thread_Create(&p->thread, startAddress, obj); wres = Thread_Create(&p->thread, startAddress, obj);
RINOK_THREAD(wres); RINOK_THREAD(wres)
p->wasCreated = True; p->wasCreated = True;
return SZ_OK; return SZ_OK;
} }
MY_NO_INLINE Z7_NO_INLINE
static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj) static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj)
{ {
const WRes wres = MtSync_Create_WRes(p, startAddress, obj); const WRes wres = MtSync_Create_WRes(p, startAddress, obj);
@ -519,7 +519,7 @@ static void HashThreadFunc(CMatchFinderMt *mt)
if (mf->pos > (UInt32)kMtMaxValForNormalize - num) if (mf->pos > (UInt32)kMtMaxValForNormalize - num)
{ {
const UInt32 subValue = (mf->pos - mf->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1); const UInt32 subValue = (mf->pos - mf->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1);
Inline_MatchFinder_ReduceOffsets(mf, subValue); MatchFinder_REDUCE_OFFSETS(mf, subValue)
MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, (size_t)mf->hashMask + 1); MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, (size_t)mf->hashMask + 1);
} }
@ -560,7 +560,7 @@ static void HashThreadFunc(CMatchFinderMt *mt)
*/ */
UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
UInt32 *posRes); UInt32 *posRes);
@ -749,7 +749,7 @@ static void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex)
} }
MY_NO_INLINE Z7_NO_INLINE
static void BtThreadFunc(CMatchFinderMt *mt) static void BtThreadFunc(CMatchFinderMt *mt)
{ {
CMtSync *p = &mt->btSync; CMtSync *p = &mt->btSync;
@ -864,15 +864,15 @@ SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddB
if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc)) if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc))
return SZ_ERROR_MEM; return SZ_ERROR_MEM;
RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p)); RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p))
RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p)); RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p))
return SZ_OK; return SZ_OK;
} }
SRes MatchFinderMt_InitMt(CMatchFinderMt *p) SRes MatchFinderMt_InitMt(CMatchFinderMt *p)
{ {
RINOK(MtSync_Init(&p->hashSync, kMtHashNumBlocks)); RINOK(MtSync_Init(&p->hashSync, kMtHashNumBlocks))
return MtSync_Init(&p->btSync, kMtBtNumBlocks); return MtSync_Init(&p->btSync, kMtBtNumBlocks);
} }
@ -941,7 +941,7 @@ void MatchFinderMt_ReleaseStream(CMatchFinderMt *p)
} }
MY_NO_INLINE Z7_NO_INLINE
static UInt32 MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p) static UInt32 MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p)
{ {
if (p->failure_LZ_BT) if (p->failure_LZ_BT)
@ -1163,7 +1163,7 @@ UInt32* MatchFinderMt_GetMatches_Bt4(CMatchFinderMt *p, UInt32 *d)
*/ */
static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) static UInt32 * MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
{ {
UInt32 h2, h3, /* h4, */ c2, c3 /* , c4 */; UInt32 h2, h3, /* h4, */ c2, c3 /* , c4 */;
UInt32 *hash = p->hash; UInt32 *hash = p->hash;
@ -1179,9 +1179,8 @@ static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
(hash + kFix3HashSize)[h3] = m; (hash + kFix3HashSize)[h3] = m;
// (hash + kFix4HashSize)[h4] = m; // (hash + kFix4HashSize)[h4] = m;
#define _USE_H2 // #define BT5_USE_H2
// #ifdef BT5_USE_H2
#ifdef _USE_H2
if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0]) if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
{ {
d[1] = m - c2 - 1; d[1] = m - c2 - 1;
@ -1197,8 +1196,8 @@ static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
} }
d[0] = 3; d[0] = 3;
d += 2; d += 2;
#ifdef _USE_H4 #ifdef BT5_USE_H4
if (c4 >= matchMinPos) if (c4 >= matchMinPos)
if ( if (
cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] && cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] &&
@ -1214,7 +1213,7 @@ static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
d[0] = 2; d[0] = 2;
d += 2; d += 2;
} }
#endif // #endif
if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0]) if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
{ {
@ -1228,7 +1227,7 @@ static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
d += 2; d += 2;
} }
#ifdef _USE_H4 #ifdef BT5_USE_H4
if (c4 >= matchMinPos) if (c4 >= matchMinPos)
if ( if (
cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] && cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] &&
@ -1244,7 +1243,7 @@ static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
} }
static UInt32* MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d) static UInt32 * MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d)
{ {
const UInt32 *bt = p->btBufPos; const UInt32 *bt = p->btBufPos;
const UInt32 len = *bt++; const UInt32 len = *bt++;
@ -1268,7 +1267,7 @@ static UInt32* MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d)
static UInt32* MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d) static UInt32 * MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d)
{ {
const UInt32 *bt = p->btBufPos; const UInt32 *bt = p->btBufPos;
UInt32 len = *bt++; UInt32 len = *bt++;
@ -1398,3 +1397,10 @@ void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable)
break; break;
} }
} }
#undef RINOK_THREAD
#undef PRF
#undef MF
#undef GetUi24hi_from32
#undef LOCK_BUFFER
#undef UNLOCK_BUFFER

View file

@ -1,15 +1,15 @@
/* LzFindMt.h -- multithreaded Match finder for LZ algorithms /* LzFindMt.h -- multithreaded Match finder for LZ algorithms
2021-07-12 : Igor Pavlov : Public domain */ 2023-03-05 : Igor Pavlov : Public domain */
#ifndef __LZ_FIND_MT_H #ifndef ZIP7_INC_LZ_FIND_MT_H
#define __LZ_FIND_MT_H #define ZIP7_INC_LZ_FIND_MT_H
#include "LzFind.h" #include "LzFind.h"
#include "Threads.h" #include "Threads.h"
EXTERN_C_BEGIN EXTERN_C_BEGIN
typedef struct _CMtSync typedef struct
{ {
UInt32 numProcessedBlocks; UInt32 numProcessedBlocks;
CThread thread; CThread thread;
@ -39,7 +39,7 @@ typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distance
typedef void (*Mf_GetHeads)(const Byte *buffer, UInt32 pos, typedef void (*Mf_GetHeads)(const Byte *buffer, UInt32 pos,
UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc); UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc);
typedef struct _CMatchFinderMt typedef struct
{ {
/* LZ */ /* LZ */
const Byte *pointerToCurPos; const Byte *pointerToCurPos;

View file

@ -1,5 +1,5 @@
/* LzFindOpt.c -- multithreaded Match finder for LZ algorithms /* LzFindOpt.c -- multithreaded Match finder for LZ algorithms
2021-07-13 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -41,8 +41,8 @@ UInt64 g_NumIters_Bytes;
// #define CYC_TO_POS_OFFSET 1 // for debug // #define CYC_TO_POS_OFFSET 1 // for debug
/* /*
MY_NO_INLINE Z7_NO_INLINE
UInt32 * MY_FAST_CALL GetMatchesSpecN_1(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, UInt32 * Z7_FASTCALL GetMatchesSpecN_1(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 *posRes) UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 *posRes)
{ {
do do
@ -214,13 +214,13 @@ else
to eliminate "movsx" BUG in old MSVC x64 compiler. to eliminate "movsx" BUG in old MSVC x64 compiler.
*/ */
UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
UInt32 *posRes); UInt32 *posRes);
MY_NO_INLINE Z7_NO_INLINE
UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
UInt32 *posRes) UInt32 *posRes)
@ -404,7 +404,7 @@ else
/* /*
typedef UInt32 uint32plus; // size_t typedef UInt32 uint32plus; // size_t
UInt32 * MY_FAST_CALL GetMatchesSpecN_3(uint32plus lenLimit, size_t pos, const Byte *cur, CLzRef *son, UInt32 * Z7_FASTCALL GetMatchesSpecN_3(uint32plus lenLimit, size_t pos, const Byte *cur, CLzRef *son,
UInt32 _cutValue, UInt32 *d, uint32plus _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 _cutValue, UInt32 *d, uint32plus _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
UInt32 *posRes) UInt32 *posRes)

View file

@ -1,8 +1,8 @@
/* LzHash.h -- HASH functions for LZ algorithms /* LzHash.h -- HASH constants for LZ algorithms
2019-10-30 : Igor Pavlov : Public domain */ 2023-03-05 : Igor Pavlov : Public domain */
#ifndef __LZ_HASH_H #ifndef ZIP7_INC_LZ_HASH_H
#define __LZ_HASH_H #define ZIP7_INC_LZ_HASH_H
/* /*
(kHash2Size >= (1 << 8)) : Required (kHash2Size >= (1 << 8)) : Required

View file

@ -1,5 +1,5 @@
/* Lzma2Dec.c -- LZMA2 Decoder /* Lzma2Dec.c -- LZMA2 Decoder
2021-02-09 : Igor Pavlov : Public domain */ 2023-03-03 : Igor Pavlov : Public domain */
/* #define SHOW_DEBUG_INFO */ /* #define SHOW_DEBUG_INFO */
@ -71,14 +71,14 @@ static SRes Lzma2Dec_GetOldProps(Byte prop, Byte *props)
SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc) SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc)
{ {
Byte props[LZMA_PROPS_SIZE]; Byte props[LZMA_PROPS_SIZE];
RINOK(Lzma2Dec_GetOldProps(prop, props)); RINOK(Lzma2Dec_GetOldProps(prop, props))
return LzmaDec_AllocateProbs(&p->decoder, props, LZMA_PROPS_SIZE, alloc); return LzmaDec_AllocateProbs(&p->decoder, props, LZMA_PROPS_SIZE, alloc);
} }
SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc) SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc)
{ {
Byte props[LZMA_PROPS_SIZE]; Byte props[LZMA_PROPS_SIZE];
RINOK(Lzma2Dec_GetOldProps(prop, props)); RINOK(Lzma2Dec_GetOldProps(prop, props))
return LzmaDec_Allocate(&p->decoder, props, LZMA_PROPS_SIZE, alloc); return LzmaDec_Allocate(&p->decoder, props, LZMA_PROPS_SIZE, alloc);
} }
@ -474,8 +474,8 @@ SRes Lzma2Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
SizeT outSize = *destLen, inSize = *srcLen; SizeT outSize = *destLen, inSize = *srcLen;
*destLen = *srcLen = 0; *destLen = *srcLen = 0;
*status = LZMA_STATUS_NOT_SPECIFIED; *status = LZMA_STATUS_NOT_SPECIFIED;
Lzma2Dec_Construct(&p); Lzma2Dec_CONSTRUCT(&p)
RINOK(Lzma2Dec_AllocateProbs(&p, prop, alloc)); RINOK(Lzma2Dec_AllocateProbs(&p, prop, alloc))
p.decoder.dic = dest; p.decoder.dic = dest;
p.decoder.dicBufSize = outSize; p.decoder.dicBufSize = outSize;
Lzma2Dec_Init(&p); Lzma2Dec_Init(&p);
@ -487,3 +487,5 @@ SRes Lzma2Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
Lzma2Dec_FreeProbs(&p, alloc); Lzma2Dec_FreeProbs(&p, alloc);
return res; return res;
} }
#undef PRF

View file

@ -1,8 +1,8 @@
/* Lzma2Dec.h -- LZMA2 Decoder /* Lzma2Dec.h -- LZMA2 Decoder
2018-02-19 : Igor Pavlov : Public domain */ 2023-03-03 : Igor Pavlov : Public domain */
#ifndef __LZMA2_DEC_H #ifndef ZIP7_INC_LZMA2_DEC_H
#define __LZMA2_DEC_H #define ZIP7_INC_LZMA2_DEC_H
#include "LzmaDec.h" #include "LzmaDec.h"
@ -22,9 +22,10 @@ typedef struct
CLzmaDec decoder; CLzmaDec decoder;
} CLzma2Dec; } CLzma2Dec;
#define Lzma2Dec_Construct(p) LzmaDec_Construct(&(p)->decoder) #define Lzma2Dec_CONSTRUCT(p) LzmaDec_CONSTRUCT(&(p)->decoder)
#define Lzma2Dec_FreeProbs(p, alloc) LzmaDec_FreeProbs(&(p)->decoder, alloc) #define Lzma2Dec_Construct(p) Lzma2Dec_CONSTRUCT(p)
#define Lzma2Dec_Free(p, alloc) LzmaDec_Free(&(p)->decoder, alloc) #define Lzma2Dec_FreeProbs(p, alloc) LzmaDec_FreeProbs(&(p)->decoder, alloc)
#define Lzma2Dec_Free(p, alloc) LzmaDec_Free(&(p)->decoder, alloc)
SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc); SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc);
SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc); SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc);
@ -90,7 +91,7 @@ Lzma2Dec_GetUnpackExtra() returns the value that shows
at current input positon. at current input positon.
*/ */
#define Lzma2Dec_GetUnpackExtra(p) ((p)->isExtraMode ? (p)->unpackSize : 0); #define Lzma2Dec_GetUnpackExtra(p) ((p)->isExtraMode ? (p)->unpackSize : 0)
/* ---------- One Call Interface ---------- */ /* ---------- One Call Interface ---------- */

View file

@ -1,44 +1,44 @@
/* Lzma2DecMt.c -- LZMA2 Decoder Multi-thread /* Lzma2DecMt.c -- LZMA2 Decoder Multi-thread
2021-04-01 : Igor Pavlov : Public domain */ 2023-04-13 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
// #define SHOW_DEBUG_INFO // #define SHOW_DEBUG_INFO
// #define Z7_ST
// #define _7ZIP_ST
#ifdef SHOW_DEBUG_INFO #ifdef SHOW_DEBUG_INFO
#include <stdio.h> #include <stdio.h>
#endif #endif
#ifndef _7ZIP_ST
#ifdef SHOW_DEBUG_INFO
#define PRF(x) x
#else
#define PRF(x)
#endif
#define PRF_STR(s) PRF(printf("\n" s "\n"))
#define PRF_STR_INT_2(s, d1, d2) PRF(printf("\n" s " %d %d\n", (unsigned)d1, (unsigned)d2))
#endif
#include "Alloc.h" #include "Alloc.h"
#include "Lzma2Dec.h" #include "Lzma2Dec.h"
#include "Lzma2DecMt.h" #include "Lzma2DecMt.h"
#ifndef _7ZIP_ST #ifndef Z7_ST
#include "MtDec.h" #include "MtDec.h"
#define LZMA2DECMT_OUT_BLOCK_MAX_DEFAULT (1 << 28) #define LZMA2DECMT_OUT_BLOCK_MAX_DEFAULT (1 << 28)
#endif #endif
#ifndef Z7_ST
#ifdef SHOW_DEBUG_INFO
#define PRF(x) x
#else
#define PRF(x)
#endif
#define PRF_STR(s) PRF(printf("\n" s "\n");)
#define PRF_STR_INT_2(s, d1, d2) PRF(printf("\n" s " %d %d\n", (unsigned)d1, (unsigned)d2);)
#endif
void Lzma2DecMtProps_Init(CLzma2DecMtProps *p) void Lzma2DecMtProps_Init(CLzma2DecMtProps *p)
{ {
p->inBufSize_ST = 1 << 20; p->inBufSize_ST = 1 << 20;
p->outStep_ST = 1 << 20; p->outStep_ST = 1 << 20;
#ifndef _7ZIP_ST #ifndef Z7_ST
p->numThreads = 1; p->numThreads = 1;
p->inBufSize_MT = 1 << 18; p->inBufSize_MT = 1 << 18;
p->outBlockMax = LZMA2DECMT_OUT_BLOCK_MAX_DEFAULT; p->outBlockMax = LZMA2DECMT_OUT_BLOCK_MAX_DEFAULT;
@ -48,7 +48,7 @@ void Lzma2DecMtProps_Init(CLzma2DecMtProps *p)
#ifndef _7ZIP_ST #ifndef Z7_ST
/* ---------- CLzma2DecMtThread ---------- */ /* ---------- CLzma2DecMtThread ---------- */
@ -81,7 +81,7 @@ typedef struct
/* ---------- CLzma2DecMt ---------- */ /* ---------- CLzma2DecMt ---------- */
typedef struct struct CLzma2DecMt
{ {
// ISzAllocPtr alloc; // ISzAllocPtr alloc;
ISzAllocPtr allocMid; ISzAllocPtr allocMid;
@ -90,9 +90,9 @@ typedef struct
CLzma2DecMtProps props; CLzma2DecMtProps props;
Byte prop; Byte prop;
ISeqInStream *inStream; ISeqInStreamPtr inStream;
ISeqOutStream *outStream; ISeqOutStreamPtr outStream;
ICompressProgress *progress; ICompressProgressPtr progress;
BoolInt finishMode; BoolInt finishMode;
BoolInt outSize_Defined; BoolInt outSize_Defined;
@ -111,14 +111,13 @@ typedef struct
size_t inPos; size_t inPos;
size_t inLim; size_t inLim;
#ifndef _7ZIP_ST #ifndef Z7_ST
UInt64 outProcessed_Parse; UInt64 outProcessed_Parse;
BoolInt mtc_WasConstructed; BoolInt mtc_WasConstructed;
CMtDec mtc; CMtDec mtc;
CLzma2DecMtThread coders[MTDEC__THREADS_MAX]; CLzma2DecMtThread coders[MTDEC_THREADS_MAX];
#endif #endif
};
} CLzma2DecMt;
@ -142,11 +141,11 @@ CLzma2DecMtHandle Lzma2DecMt_Create(ISzAllocPtr alloc, ISzAllocPtr allocMid)
// Lzma2DecMtProps_Init(&p->props); // Lzma2DecMtProps_Init(&p->props);
#ifndef _7ZIP_ST #ifndef Z7_ST
p->mtc_WasConstructed = False; p->mtc_WasConstructed = False;
{ {
unsigned i; unsigned i;
for (i = 0; i < MTDEC__THREADS_MAX; i++) for (i = 0; i < MTDEC_THREADS_MAX; i++)
{ {
CLzma2DecMtThread *t = &p->coders[i]; CLzma2DecMtThread *t = &p->coders[i];
t->dec_created = False; t->dec_created = False;
@ -156,16 +155,16 @@ CLzma2DecMtHandle Lzma2DecMt_Create(ISzAllocPtr alloc, ISzAllocPtr allocMid)
} }
#endif #endif
return p; return (CLzma2DecMtHandle)(void *)p;
} }
#ifndef _7ZIP_ST #ifndef Z7_ST
static void Lzma2DecMt_FreeOutBufs(CLzma2DecMt *p) static void Lzma2DecMt_FreeOutBufs(CLzma2DecMt *p)
{ {
unsigned i; unsigned i;
for (i = 0; i < MTDEC__THREADS_MAX; i++) for (i = 0; i < MTDEC_THREADS_MAX; i++)
{ {
CLzma2DecMtThread *t = &p->coders[i]; CLzma2DecMtThread *t = &p->coders[i];
if (t->outBuf) if (t->outBuf)
@ -196,13 +195,15 @@ static void Lzma2DecMt_FreeSt(CLzma2DecMt *p)
} }
void Lzma2DecMt_Destroy(CLzma2DecMtHandle pp) // #define GET_CLzma2DecMt_p CLzma2DecMt *p = (CLzma2DecMt *)(void *)pp;
void Lzma2DecMt_Destroy(CLzma2DecMtHandle p)
{ {
CLzma2DecMt *p = (CLzma2DecMt *)pp; // GET_CLzma2DecMt_p
Lzma2DecMt_FreeSt(p); Lzma2DecMt_FreeSt(p);
#ifndef _7ZIP_ST #ifndef Z7_ST
if (p->mtc_WasConstructed) if (p->mtc_WasConstructed)
{ {
@ -211,7 +212,7 @@ void Lzma2DecMt_Destroy(CLzma2DecMtHandle pp)
} }
{ {
unsigned i; unsigned i;
for (i = 0; i < MTDEC__THREADS_MAX; i++) for (i = 0; i < MTDEC_THREADS_MAX; i++)
{ {
CLzma2DecMtThread *t = &p->coders[i]; CLzma2DecMtThread *t = &p->coders[i];
if (t->dec_created) if (t->dec_created)
@ -226,19 +227,19 @@ void Lzma2DecMt_Destroy(CLzma2DecMtHandle pp)
#endif #endif
ISzAlloc_Free(p->alignOffsetAlloc.baseAlloc, pp); ISzAlloc_Free(p->alignOffsetAlloc.baseAlloc, p);
} }
#ifndef _7ZIP_ST #ifndef Z7_ST
static void Lzma2DecMt_MtCallback_Parse(void *obj, unsigned coderIndex, CMtDecCallbackInfo *cc) static void Lzma2DecMt_MtCallback_Parse(void *obj, unsigned coderIndex, CMtDecCallbackInfo *cc)
{ {
CLzma2DecMt *me = (CLzma2DecMt *)obj; CLzma2DecMt *me = (CLzma2DecMt *)obj;
CLzma2DecMtThread *t = &me->coders[coderIndex]; CLzma2DecMtThread *t = &me->coders[coderIndex];
PRF_STR_INT_2("Parse", coderIndex, cc->srcSize); PRF_STR_INT_2("Parse", coderIndex, cc->srcSize)
cc->state = MTDEC_PARSE_CONTINUE; cc->state = MTDEC_PARSE_CONTINUE;
@ -246,7 +247,7 @@ static void Lzma2DecMt_MtCallback_Parse(void *obj, unsigned coderIndex, CMtDecCa
{ {
if (!t->dec_created) if (!t->dec_created)
{ {
Lzma2Dec_Construct(&t->dec); Lzma2Dec_CONSTRUCT(&t->dec)
t->dec_created = True; t->dec_created = True;
AlignOffsetAlloc_CreateVTable(&t->alloc); AlignOffsetAlloc_CreateVTable(&t->alloc);
{ {
@ -297,7 +298,7 @@ static void Lzma2DecMt_MtCallback_Parse(void *obj, unsigned coderIndex, CMtDecCa
// that must be finished at position <= outBlockMax. // that must be finished at position <= outBlockMax.
{ {
const SizeT srcOrig = cc->srcSize; const size_t srcOrig = cc->srcSize;
SizeT srcSize_Point = 0; SizeT srcSize_Point = 0;
SizeT dicPos_Point = 0; SizeT dicPos_Point = 0;
@ -306,10 +307,10 @@ static void Lzma2DecMt_MtCallback_Parse(void *obj, unsigned coderIndex, CMtDecCa
for (;;) for (;;)
{ {
SizeT srcCur = srcOrig - cc->srcSize; SizeT srcCur = (SizeT)(srcOrig - cc->srcSize);
status = Lzma2Dec_Parse(&t->dec, status = Lzma2Dec_Parse(&t->dec,
limit - t->dec.decoder.dicPos, (SizeT)limit - t->dec.decoder.dicPos,
cc->src + cc->srcSize, &srcCur, cc->src + cc->srcSize, &srcCur,
checkFinishBlock); checkFinishBlock);
@ -333,7 +334,7 @@ static void Lzma2DecMt_MtCallback_Parse(void *obj, unsigned coderIndex, CMtDecCa
if (t->dec.decoder.dicPos >= (1 << 14)) if (t->dec.decoder.dicPos >= (1 << 14))
break; break;
dicPos_Point = t->dec.decoder.dicPos; dicPos_Point = t->dec.decoder.dicPos;
srcSize_Point = cc->srcSize; srcSize_Point = (SizeT)cc->srcSize;
continue; continue;
} }
@ -391,7 +392,7 @@ static void Lzma2DecMt_MtCallback_Parse(void *obj, unsigned coderIndex, CMtDecCa
if (unpackRem != 0) if (unpackRem != 0)
{ {
/* we also reserve space for max possible number of output bytes of current LZMA chunk */ /* we also reserve space for max possible number of output bytes of current LZMA chunk */
SizeT rem = limit - dicPos; size_t rem = limit - dicPos;
if (rem > unpackRem) if (rem > unpackRem)
rem = unpackRem; rem = unpackRem;
dicPos += rem; dicPos += rem;
@ -444,7 +445,7 @@ static SRes Lzma2DecMt_MtCallback_PreCode(void *pp, unsigned coderIndex)
} }
t->dec.decoder.dic = dest; t->dec.decoder.dic = dest;
t->dec.decoder.dicBufSize = t->outPreSize; t->dec.decoder.dicBufSize = (SizeT)t->outPreSize;
t->needInit = True; t->needInit = True;
@ -462,7 +463,7 @@ static SRes Lzma2DecMt_MtCallback_Code(void *pp, unsigned coderIndex,
UNUSED_VAR(srcFinished) UNUSED_VAR(srcFinished)
PRF_STR_INT_2("Code", coderIndex, srcSize); PRF_STR_INT_2("Code", coderIndex, srcSize)
*inCodePos = t->inCodeSize; *inCodePos = t->inCodeSize;
*outCodePos = 0; *outCodePos = 0;
@ -476,13 +477,13 @@ static SRes Lzma2DecMt_MtCallback_Code(void *pp, unsigned coderIndex,
{ {
ELzmaStatus status; ELzmaStatus status;
size_t srcProcessed = srcSize; SizeT srcProcessed = (SizeT)srcSize;
BoolInt blockWasFinished = BoolInt blockWasFinished =
((int)t->parseStatus == LZMA_STATUS_FINISHED_WITH_MARK ((int)t->parseStatus == LZMA_STATUS_FINISHED_WITH_MARK
|| t->parseStatus == LZMA2_PARSE_STATUS_NEW_BLOCK); || t->parseStatus == LZMA2_PARSE_STATUS_NEW_BLOCK);
SRes res = Lzma2Dec_DecodeToDic(&t->dec, SRes res = Lzma2Dec_DecodeToDic(&t->dec,
t->outPreSize, (SizeT)t->outPreSize,
src, &srcProcessed, src, &srcProcessed,
blockWasFinished ? LZMA_FINISH_END : LZMA_FINISH_ANY, blockWasFinished ? LZMA_FINISH_END : LZMA_FINISH_ANY,
&status); &status);
@ -540,7 +541,7 @@ static SRes Lzma2DecMt_MtCallback_Write(void *pp, unsigned coderIndex,
UNUSED_VAR(srcSize) UNUSED_VAR(srcSize)
UNUSED_VAR(isCross) UNUSED_VAR(isCross)
PRF_STR_INT_2("Write", coderIndex, srcSize); PRF_STR_INT_2("Write", coderIndex, srcSize)
*needContinue = False; *needContinue = False;
*canRecode = True; *canRecode = True;
@ -588,7 +589,7 @@ static SRes Lzma2DecMt_MtCallback_Write(void *pp, unsigned coderIndex,
*needContinue = needContinue2; *needContinue = needContinue2;
return SZ_OK; return SZ_OK;
} }
RINOK(MtProgress_ProgressAdd(&me->mtc.mtProgress, 0, 0)); RINOK(MtProgress_ProgressAdd(&me->mtc.mtProgress, 0, 0))
} }
} }
@ -611,11 +612,11 @@ static SRes Lzma2Dec_Prepare_ST(CLzma2DecMt *p)
{ {
if (!p->dec_created) if (!p->dec_created)
{ {
Lzma2Dec_Construct(&p->dec); Lzma2Dec_CONSTRUCT(&p->dec)
p->dec_created = True; p->dec_created = True;
} }
RINOK(Lzma2Dec_Allocate(&p->dec, p->prop, &p->alignOffsetAlloc.vt)); RINOK(Lzma2Dec_Allocate(&p->dec, p->prop, &p->alignOffsetAlloc.vt))
if (!p->inBuf || p->inBufSize != p->props.inBufSize_ST) if (!p->inBuf || p->inBufSize != p->props.inBufSize_ST)
{ {
@ -634,7 +635,7 @@ static SRes Lzma2Dec_Prepare_ST(CLzma2DecMt *p)
static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p
#ifndef _7ZIP_ST #ifndef Z7_ST
, BoolInt tMode , BoolInt tMode
#endif #endif
) )
@ -646,7 +647,7 @@ static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p
CLzma2Dec *dec; CLzma2Dec *dec;
#ifndef _7ZIP_ST #ifndef Z7_ST
if (tMode) if (tMode)
{ {
Lzma2DecMt_FreeOutBufs(p); Lzma2DecMt_FreeOutBufs(p);
@ -654,7 +655,7 @@ static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p
} }
#endif #endif
RINOK(Lzma2Dec_Prepare_ST(p)); RINOK(Lzma2Dec_Prepare_ST(p))
dec = &p->dec; dec = &p->dec;
@ -681,7 +682,7 @@ static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p
if (inPos == inLim) if (inPos == inLim)
{ {
#ifndef _7ZIP_ST #ifndef Z7_ST
if (tMode) if (tMode)
{ {
inData = MtDec_Read(&p->mtc, &inLim); inData = MtDec_Read(&p->mtc, &inLim);
@ -710,7 +711,7 @@ static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p
{ {
SizeT next = dec->decoder.dicBufSize; SizeT next = dec->decoder.dicBufSize;
if (next - wrPos > p->props.outStep_ST) if (next - wrPos > p->props.outStep_ST)
next = wrPos + p->props.outStep_ST; next = wrPos + (SizeT)p->props.outStep_ST;
size = next - dicPos; size = next - dicPos;
} }
@ -726,7 +727,7 @@ static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p
} }
} }
inProcessed = inLim - inPos; inProcessed = (SizeT)(inLim - inPos);
res = Lzma2Dec_DecodeToDic(dec, dicPos + size, inData + inPos, &inProcessed, finishMode, &status); res = Lzma2Dec_DecodeToDic(dec, dicPos + size, inData + inPos, &inProcessed, finishMode, &status);
@ -755,7 +756,7 @@ static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p
dec->decoder.dicPos = 0; dec->decoder.dicPos = 0;
wrPos = dec->decoder.dicPos; wrPos = dec->decoder.dicPos;
RINOK(res2); RINOK(res2)
if (needStop) if (needStop)
{ {
@ -788,7 +789,7 @@ static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p
UInt64 outDelta = p->outProcessed - outPrev; UInt64 outDelta = p->outProcessed - outPrev;
if (inDelta >= (1 << 22) || outDelta >= (1 << 22)) if (inDelta >= (1 << 22) || outDelta >= (1 << 22))
{ {
RINOK(ICompressProgress_Progress(p->progress, p->inProcessed, p->outProcessed)); RINOK(ICompressProgress_Progress(p->progress, p->inProcessed, p->outProcessed))
inPrev = p->inProcessed; inPrev = p->inProcessed;
outPrev = p->outProcessed; outPrev = p->outProcessed;
} }
@ -798,20 +799,20 @@ static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p
SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp, SRes Lzma2DecMt_Decode(CLzma2DecMtHandle p,
Byte prop, Byte prop,
const CLzma2DecMtProps *props, const CLzma2DecMtProps *props,
ISeqOutStream *outStream, const UInt64 *outDataSize, int finishMode, ISeqOutStreamPtr outStream, const UInt64 *outDataSize, int finishMode,
// Byte *outBuf, size_t *outBufSize, // Byte *outBuf, size_t *outBufSize,
ISeqInStream *inStream, ISeqInStreamPtr inStream,
// const Byte *inData, size_t inDataSize, // const Byte *inData, size_t inDataSize,
UInt64 *inProcessed, UInt64 *inProcessed,
// UInt64 *outProcessed, // UInt64 *outProcessed,
int *isMT, int *isMT,
ICompressProgress *progress) ICompressProgressPtr progress)
{ {
CLzma2DecMt *p = (CLzma2DecMt *)pp; // GET_CLzma2DecMt_p
#ifndef _7ZIP_ST #ifndef Z7_ST
BoolInt tMode; BoolInt tMode;
#endif #endif
@ -845,7 +846,7 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp,
*isMT = False; *isMT = False;
#ifndef _7ZIP_ST #ifndef Z7_ST
tMode = False; tMode = False;
@ -939,7 +940,7 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp,
p->readWasFinished = p->mtc.readWasFinished; p->readWasFinished = p->mtc.readWasFinished;
p->inProcessed = p->mtc.inProcessed; p->inProcessed = p->mtc.inProcessed;
PRF_STR("----- decoding ST -----"); PRF_STR("----- decoding ST -----")
} }
} }
@ -950,7 +951,7 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp,
{ {
SRes res = Lzma2Dec_Decode_ST(p SRes res = Lzma2Dec_Decode_ST(p
#ifndef _7ZIP_ST #ifndef Z7_ST
, tMode , tMode
#endif #endif
); );
@ -967,7 +968,7 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp,
res = p->readRes; res = p->readRes;
/* /*
#ifndef _7ZIP_ST #ifndef Z7_ST
if (res == SZ_OK && tMode && p->mtc.parseRes != SZ_OK) if (res == SZ_OK && tMode && p->mtc.parseRes != SZ_OK)
res = p->mtc.parseRes; res = p->mtc.parseRes;
#endif #endif
@ -980,13 +981,13 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp,
/* ---------- Read from CLzma2DecMtHandle Interface ---------- */ /* ---------- Read from CLzma2DecMtHandle Interface ---------- */
SRes Lzma2DecMt_Init(CLzma2DecMtHandle pp, SRes Lzma2DecMt_Init(CLzma2DecMtHandle p,
Byte prop, Byte prop,
const CLzma2DecMtProps *props, const CLzma2DecMtProps *props,
const UInt64 *outDataSize, int finishMode, const UInt64 *outDataSize, int finishMode,
ISeqInStream *inStream) ISeqInStreamPtr inStream)
{ {
CLzma2DecMt *p = (CLzma2DecMt *)pp; // GET_CLzma2DecMt_p
if (prop > 40) if (prop > 40)
return SZ_ERROR_UNSUPPORTED; return SZ_ERROR_UNSUPPORTED;
@ -1015,11 +1016,11 @@ SRes Lzma2DecMt_Init(CLzma2DecMtHandle pp,
} }
SRes Lzma2DecMt_Read(CLzma2DecMtHandle pp, SRes Lzma2DecMt_Read(CLzma2DecMtHandle p,
Byte *data, size_t *outSize, Byte *data, size_t *outSize,
UInt64 *inStreamProcessed) UInt64 *inStreamProcessed)
{ {
CLzma2DecMt *p = (CLzma2DecMt *)pp; // GET_CLzma2DecMt_p
ELzmaFinishMode finishMode; ELzmaFinishMode finishMode;
SRes readRes; SRes readRes;
size_t size = *outSize; size_t size = *outSize;
@ -1055,8 +1056,8 @@ SRes Lzma2DecMt_Read(CLzma2DecMtHandle pp,
readRes = ISeqInStream_Read(p->inStream, p->inBuf, &p->inLim); readRes = ISeqInStream_Read(p->inStream, p->inBuf, &p->inLim);
} }
inCur = p->inLim - p->inPos; inCur = (SizeT)(p->inLim - p->inPos);
outCur = size; outCur = (SizeT)size;
res = Lzma2Dec_DecodeToBuf(&p->dec, data, &outCur, res = Lzma2Dec_DecodeToBuf(&p->dec, data, &outCur,
p->inBuf + p->inPos, &inCur, finishMode, &status); p->inBuf + p->inPos, &inCur, finishMode, &status);
@ -1088,3 +1089,7 @@ SRes Lzma2DecMt_Read(CLzma2DecMtHandle pp,
return readRes; return readRes;
} }
} }
#undef PRF
#undef PRF_STR
#undef PRF_STR_INT_2

View file

@ -1,8 +1,8 @@
/* Lzma2DecMt.h -- LZMA2 Decoder Multi-thread /* Lzma2DecMt.h -- LZMA2 Decoder Multi-thread
2018-02-17 : Igor Pavlov : Public domain */ 2023-04-13 : Igor Pavlov : Public domain */
#ifndef __LZMA2_DEC_MT_H #ifndef ZIP7_INC_LZMA2_DEC_MT_H
#define __LZMA2_DEC_MT_H #define ZIP7_INC_LZMA2_DEC_MT_H
#include "7zTypes.h" #include "7zTypes.h"
@ -13,7 +13,7 @@ typedef struct
size_t inBufSize_ST; size_t inBufSize_ST;
size_t outStep_ST; size_t outStep_ST;
#ifndef _7ZIP_ST #ifndef Z7_ST
unsigned numThreads; unsigned numThreads;
size_t inBufSize_MT; size_t inBufSize_MT;
size_t outBlockMax; size_t outBlockMax;
@ -38,7 +38,9 @@ SRes:
SZ_ERROR_THREAD - error in multithreading functions (only for Mt version) SZ_ERROR_THREAD - error in multithreading functions (only for Mt version)
*/ */
typedef void * CLzma2DecMtHandle; typedef struct CLzma2DecMt CLzma2DecMt;
typedef CLzma2DecMt * CLzma2DecMtHandle;
// Z7_DECLARE_HANDLE(CLzma2DecMtHandle)
CLzma2DecMtHandle Lzma2DecMt_Create(ISzAllocPtr alloc, ISzAllocPtr allocMid); CLzma2DecMtHandle Lzma2DecMt_Create(ISzAllocPtr alloc, ISzAllocPtr allocMid);
void Lzma2DecMt_Destroy(CLzma2DecMtHandle p); void Lzma2DecMt_Destroy(CLzma2DecMtHandle p);
@ -46,11 +48,11 @@ void Lzma2DecMt_Destroy(CLzma2DecMtHandle p);
SRes Lzma2DecMt_Decode(CLzma2DecMtHandle p, SRes Lzma2DecMt_Decode(CLzma2DecMtHandle p,
Byte prop, Byte prop,
const CLzma2DecMtProps *props, const CLzma2DecMtProps *props,
ISeqOutStream *outStream, ISeqOutStreamPtr outStream,
const UInt64 *outDataSize, // NULL means undefined const UInt64 *outDataSize, // NULL means undefined
int finishMode, // 0 - partial unpacking is allowed, 1 - if lzma2 stream must be finished int finishMode, // 0 - partial unpacking is allowed, 1 - if lzma2 stream must be finished
// Byte *outBuf, size_t *outBufSize, // Byte *outBuf, size_t *outBufSize,
ISeqInStream *inStream, ISeqInStreamPtr inStream,
// const Byte *inData, size_t inDataSize, // const Byte *inData, size_t inDataSize,
// out variables: // out variables:
@ -58,7 +60,7 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle p,
int *isMT, /* out: (*isMT == 0), if single thread decoding was used */ int *isMT, /* out: (*isMT == 0), if single thread decoding was used */
// UInt64 *outProcessed, // UInt64 *outProcessed,
ICompressProgress *progress); ICompressProgressPtr progress);
/* ---------- Read from CLzma2DecMtHandle Interface ---------- */ /* ---------- Read from CLzma2DecMtHandle Interface ---------- */
@ -67,7 +69,7 @@ SRes Lzma2DecMt_Init(CLzma2DecMtHandle pp,
Byte prop, Byte prop,
const CLzma2DecMtProps *props, const CLzma2DecMtProps *props,
const UInt64 *outDataSize, int finishMode, const UInt64 *outDataSize, int finishMode,
ISeqInStream *inStream); ISeqInStreamPtr inStream);
SRes Lzma2DecMt_Read(CLzma2DecMtHandle pp, SRes Lzma2DecMt_Read(CLzma2DecMtHandle pp,
Byte *data, size_t *outSize, Byte *data, size_t *outSize,

View file

@ -1,18 +1,18 @@
/* Lzma2Enc.c -- LZMA2 Encoder /* Lzma2Enc.c -- LZMA2 Encoder
2021-02-09 : Igor Pavlov : Public domain */ 2023-04-13 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
#include <string.h> #include <string.h>
/* #define _7ZIP_ST */ /* #define Z7_ST */
#include "Lzma2Enc.h" #include "Lzma2Enc.h"
#ifndef _7ZIP_ST #ifndef Z7_ST
#include "MtCoder.h" #include "MtCoder.h"
#else #else
#define MTCODER__THREADS_MAX 1 #define MTCODER_THREADS_MAX 1
#endif #endif
#define LZMA2_CONTROL_LZMA (1 << 7) #define LZMA2_CONTROL_LZMA (1 << 7)
@ -40,7 +40,7 @@
typedef struct typedef struct
{ {
ISeqInStream vt; ISeqInStream vt;
ISeqInStream *realStream; ISeqInStreamPtr realStream;
UInt64 limit; UInt64 limit;
UInt64 processed; UInt64 processed;
int finished; int finished;
@ -53,15 +53,15 @@ static void LimitedSeqInStream_Init(CLimitedSeqInStream *p)
p->finished = 0; p->finished = 0;
} }
static SRes LimitedSeqInStream_Read(const ISeqInStream *pp, void *data, size_t *size) static SRes LimitedSeqInStream_Read(ISeqInStreamPtr pp, void *data, size_t *size)
{ {
CLimitedSeqInStream *p = CONTAINER_FROM_VTBL(pp, CLimitedSeqInStream, vt); Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CLimitedSeqInStream)
size_t size2 = *size; size_t size2 = *size;
SRes res = SZ_OK; SRes res = SZ_OK;
if (p->limit != (UInt64)(Int64)-1) if (p->limit != (UInt64)(Int64)-1)
{ {
UInt64 rem = p->limit - p->processed; const UInt64 rem = p->limit - p->processed;
if (size2 > rem) if (size2 > rem)
size2 = (size_t)rem; size2 = (size_t)rem;
} }
@ -95,8 +95,8 @@ static SRes Lzma2EncInt_InitStream(CLzma2EncInt *p, const CLzma2EncProps *props)
{ {
SizeT propsSize = LZMA_PROPS_SIZE; SizeT propsSize = LZMA_PROPS_SIZE;
Byte propsEncoded[LZMA_PROPS_SIZE]; Byte propsEncoded[LZMA_PROPS_SIZE];
RINOK(LzmaEnc_SetProps(p->enc, &props->lzmaProps)); RINOK(LzmaEnc_SetProps(p->enc, &props->lzmaProps))
RINOK(LzmaEnc_WriteProperties(p->enc, propsEncoded, &propsSize)); RINOK(LzmaEnc_WriteProperties(p->enc, propsEncoded, &propsSize))
p->propsByte = propsEncoded[0]; p->propsByte = propsEncoded[0];
p->propsAreSet = True; p->propsAreSet = True;
} }
@ -111,23 +111,23 @@ static void Lzma2EncInt_InitBlock(CLzma2EncInt *p)
} }
SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, ISeqInStream *inStream, UInt32 keepWindowSize, SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle p, ISeqInStreamPtr inStream, UInt32 keepWindowSize,
ISzAllocPtr alloc, ISzAllocPtr allocBig); ISzAllocPtr alloc, ISzAllocPtr allocBig);
SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen, SRes LzmaEnc_MemPrepare(CLzmaEncHandle p, const Byte *src, SizeT srcLen,
UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig); UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig);
SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit, SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle p, BoolInt reInit,
Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize); Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize);
const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp); const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle p);
void LzmaEnc_Finish(CLzmaEncHandle pp); void LzmaEnc_Finish(CLzmaEncHandle p);
void LzmaEnc_SaveState(CLzmaEncHandle pp); void LzmaEnc_SaveState(CLzmaEncHandle p);
void LzmaEnc_RestoreState(CLzmaEncHandle pp); void LzmaEnc_RestoreState(CLzmaEncHandle p);
/* /*
UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp); UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle p);
*/ */
static SRes Lzma2EncInt_EncodeSubblock(CLzma2EncInt *p, Byte *outBuf, static SRes Lzma2EncInt_EncodeSubblock(CLzma2EncInt *p, Byte *outBuf,
size_t *packSizeRes, ISeqOutStream *outStream) size_t *packSizeRes, ISeqOutStreamPtr outStream)
{ {
size_t packSizeLimit = *packSizeRes; size_t packSizeLimit = *packSizeRes;
size_t packSize = packSizeLimit; size_t packSize = packSizeLimit;
@ -167,7 +167,7 @@ static SRes Lzma2EncInt_EncodeSubblock(CLzma2EncInt *p, Byte *outBuf,
while (unpackSize > 0) while (unpackSize > 0)
{ {
UInt32 u = (unpackSize < LZMA2_COPY_CHUNK_SIZE) ? unpackSize : LZMA2_COPY_CHUNK_SIZE; const UInt32 u = (unpackSize < LZMA2_COPY_CHUNK_SIZE) ? unpackSize : LZMA2_COPY_CHUNK_SIZE;
if (packSizeLimit - destPos < u + 3) if (packSizeLimit - destPos < u + 3)
return SZ_ERROR_OUTPUT_EOF; return SZ_ERROR_OUTPUT_EOF;
outBuf[destPos++] = (Byte)(p->srcPos == 0 ? LZMA2_CONTROL_COPY_RESET_DIC : LZMA2_CONTROL_COPY_NO_RESET); outBuf[destPos++] = (Byte)(p->srcPos == 0 ? LZMA2_CONTROL_COPY_RESET_DIC : LZMA2_CONTROL_COPY_NO_RESET);
@ -196,9 +196,9 @@ static SRes Lzma2EncInt_EncodeSubblock(CLzma2EncInt *p, Byte *outBuf,
{ {
size_t destPos = 0; size_t destPos = 0;
UInt32 u = unpackSize - 1; const UInt32 u = unpackSize - 1;
UInt32 pm = (UInt32)(packSize - 1); const UInt32 pm = (UInt32)(packSize - 1);
unsigned mode = (p->srcPos == 0) ? 3 : (p->needInitState ? (p->needInitProp ? 2 : 1) : 0); const unsigned mode = (p->srcPos == 0) ? 3 : (p->needInitState ? (p->needInitProp ? 2 : 1) : 0);
PRF(printf(" ")); PRF(printf(" "));
@ -231,7 +231,7 @@ static SRes Lzma2EncInt_EncodeSubblock(CLzma2EncInt *p, Byte *outBuf,
void Lzma2EncProps_Init(CLzma2EncProps *p) void Lzma2EncProps_Init(CLzma2EncProps *p)
{ {
LzmaEncProps_Init(&p->lzmaProps); LzmaEncProps_Init(&p->lzmaProps);
p->blockSize = LZMA2_ENC_PROPS__BLOCK_SIZE__AUTO; p->blockSize = LZMA2_ENC_PROPS_BLOCK_SIZE_AUTO;
p->numBlockThreads_Reduced = -1; p->numBlockThreads_Reduced = -1;
p->numBlockThreads_Max = -1; p->numBlockThreads_Max = -1;
p->numTotalThreads = -1; p->numTotalThreads = -1;
@ -251,8 +251,8 @@ void Lzma2EncProps_Normalize(CLzma2EncProps *p)
t2 = p->numBlockThreads_Max; t2 = p->numBlockThreads_Max;
t3 = p->numTotalThreads; t3 = p->numTotalThreads;
if (t2 > MTCODER__THREADS_MAX) if (t2 > MTCODER_THREADS_MAX)
t2 = MTCODER__THREADS_MAX; t2 = MTCODER_THREADS_MAX;
if (t3 <= 0) if (t3 <= 0)
{ {
@ -268,8 +268,8 @@ void Lzma2EncProps_Normalize(CLzma2EncProps *p)
t1 = 1; t1 = 1;
t2 = t3; t2 = t3;
} }
if (t2 > MTCODER__THREADS_MAX) if (t2 > MTCODER_THREADS_MAX)
t2 = MTCODER__THREADS_MAX; t2 = MTCODER_THREADS_MAX;
} }
else if (t1 <= 0) else if (t1 <= 0)
{ {
@ -286,8 +286,8 @@ void Lzma2EncProps_Normalize(CLzma2EncProps *p)
fileSize = p->lzmaProps.reduceSize; fileSize = p->lzmaProps.reduceSize;
if ( p->blockSize != LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID if ( p->blockSize != LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID
&& p->blockSize != LZMA2_ENC_PROPS__BLOCK_SIZE__AUTO && p->blockSize != LZMA2_ENC_PROPS_BLOCK_SIZE_AUTO
&& (p->blockSize < fileSize || fileSize == (UInt64)(Int64)-1)) && (p->blockSize < fileSize || fileSize == (UInt64)(Int64)-1))
p->lzmaProps.reduceSize = p->blockSize; p->lzmaProps.reduceSize = p->blockSize;
@ -297,19 +297,19 @@ void Lzma2EncProps_Normalize(CLzma2EncProps *p)
t1 = p->lzmaProps.numThreads; t1 = p->lzmaProps.numThreads;
if (p->blockSize == LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID) if (p->blockSize == LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID)
{ {
t2r = t2 = 1; t2r = t2 = 1;
t3 = t1; t3 = t1;
} }
else if (p->blockSize == LZMA2_ENC_PROPS__BLOCK_SIZE__AUTO && t2 <= 1) else if (p->blockSize == LZMA2_ENC_PROPS_BLOCK_SIZE_AUTO && t2 <= 1)
{ {
/* if there is no block multi-threading, we use SOLID block */ /* if there is no block multi-threading, we use SOLID block */
p->blockSize = LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID; p->blockSize = LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID;
} }
else else
{ {
if (p->blockSize == LZMA2_ENC_PROPS__BLOCK_SIZE__AUTO) if (p->blockSize == LZMA2_ENC_PROPS_BLOCK_SIZE_AUTO)
{ {
const UInt32 kMinSize = (UInt32)1 << 20; const UInt32 kMinSize = (UInt32)1 << 20;
const UInt32 kMaxSize = (UInt32)1 << 28; const UInt32 kMaxSize = (UInt32)1 << 28;
@ -344,7 +344,7 @@ void Lzma2EncProps_Normalize(CLzma2EncProps *p)
} }
static SRes Progress(ICompressProgress *p, UInt64 inSize, UInt64 outSize) static SRes Progress(ICompressProgressPtr p, UInt64 inSize, UInt64 outSize)
{ {
return (p && ICompressProgress_Progress(p, inSize, outSize) != SZ_OK) ? SZ_ERROR_PROGRESS : SZ_OK; return (p && ICompressProgress_Progress(p, inSize, outSize) != SZ_OK) ? SZ_ERROR_PROGRESS : SZ_OK;
} }
@ -352,7 +352,7 @@ static SRes Progress(ICompressProgress *p, UInt64 inSize, UInt64 outSize)
/* ---------- Lzma2 ---------- */ /* ---------- Lzma2 ---------- */
typedef struct struct CLzma2Enc
{ {
Byte propEncoded; Byte propEncoded;
CLzma2EncProps props; CLzma2EncProps props;
@ -363,23 +363,22 @@ typedef struct
ISzAllocPtr alloc; ISzAllocPtr alloc;
ISzAllocPtr allocBig; ISzAllocPtr allocBig;
CLzma2EncInt coders[MTCODER__THREADS_MAX]; CLzma2EncInt coders[MTCODER_THREADS_MAX];
#ifndef _7ZIP_ST #ifndef Z7_ST
ISeqOutStream *outStream; ISeqOutStreamPtr outStream;
Byte *outBuf; Byte *outBuf;
size_t outBuf_Rem; /* remainder in outBuf */ size_t outBuf_Rem; /* remainder in outBuf */
size_t outBufSize; /* size of allocated outBufs[i] */ size_t outBufSize; /* size of allocated outBufs[i] */
size_t outBufsDataSizes[MTCODER__BLOCKS_MAX]; size_t outBufsDataSizes[MTCODER_BLOCKS_MAX];
BoolInt mtCoder_WasConstructed; BoolInt mtCoder_WasConstructed;
CMtCoder mtCoder; CMtCoder mtCoder;
Byte *outBufs[MTCODER__BLOCKS_MAX]; Byte *outBufs[MTCODER_BLOCKS_MAX];
#endif #endif
};
} CLzma2Enc;
@ -396,30 +395,30 @@ CLzma2EncHandle Lzma2Enc_Create(ISzAllocPtr alloc, ISzAllocPtr allocBig)
p->allocBig = allocBig; p->allocBig = allocBig;
{ {
unsigned i; unsigned i;
for (i = 0; i < MTCODER__THREADS_MAX; i++) for (i = 0; i < MTCODER_THREADS_MAX; i++)
p->coders[i].enc = NULL; p->coders[i].enc = NULL;
} }
#ifndef _7ZIP_ST #ifndef Z7_ST
p->mtCoder_WasConstructed = False; p->mtCoder_WasConstructed = False;
{ {
unsigned i; unsigned i;
for (i = 0; i < MTCODER__BLOCKS_MAX; i++) for (i = 0; i < MTCODER_BLOCKS_MAX; i++)
p->outBufs[i] = NULL; p->outBufs[i] = NULL;
p->outBufSize = 0; p->outBufSize = 0;
} }
#endif #endif
return p; return (CLzma2EncHandle)p;
} }
#ifndef _7ZIP_ST #ifndef Z7_ST
static void Lzma2Enc_FreeOutBufs(CLzma2Enc *p) static void Lzma2Enc_FreeOutBufs(CLzma2Enc *p)
{ {
unsigned i; unsigned i;
for (i = 0; i < MTCODER__BLOCKS_MAX; i++) for (i = 0; i < MTCODER_BLOCKS_MAX; i++)
if (p->outBufs[i]) if (p->outBufs[i])
{ {
ISzAlloc_Free(p->alloc, p->outBufs[i]); ISzAlloc_Free(p->alloc, p->outBufs[i]);
@ -430,12 +429,13 @@ static void Lzma2Enc_FreeOutBufs(CLzma2Enc *p)
#endif #endif
// #define GET_CLzma2Enc_p CLzma2Enc *p = (CLzma2Enc *)(void *)p;
void Lzma2Enc_Destroy(CLzma2EncHandle pp) void Lzma2Enc_Destroy(CLzma2EncHandle p)
{ {
CLzma2Enc *p = (CLzma2Enc *)pp; // GET_CLzma2Enc_p
unsigned i; unsigned i;
for (i = 0; i < MTCODER__THREADS_MAX; i++) for (i = 0; i < MTCODER_THREADS_MAX; i++)
{ {
CLzma2EncInt *t = &p->coders[i]; CLzma2EncInt *t = &p->coders[i];
if (t->enc) if (t->enc)
@ -446,7 +446,7 @@ void Lzma2Enc_Destroy(CLzma2EncHandle pp)
} }
#ifndef _7ZIP_ST #ifndef Z7_ST
if (p->mtCoder_WasConstructed) if (p->mtCoder_WasConstructed)
{ {
MtCoder_Destruct(&p->mtCoder); MtCoder_Destruct(&p->mtCoder);
@ -458,13 +458,13 @@ void Lzma2Enc_Destroy(CLzma2EncHandle pp)
ISzAlloc_Free(p->alloc, p->tempBufLzma); ISzAlloc_Free(p->alloc, p->tempBufLzma);
p->tempBufLzma = NULL; p->tempBufLzma = NULL;
ISzAlloc_Free(p->alloc, pp); ISzAlloc_Free(p->alloc, p);
} }
SRes Lzma2Enc_SetProps(CLzma2EncHandle pp, const CLzma2EncProps *props) SRes Lzma2Enc_SetProps(CLzma2EncHandle p, const CLzma2EncProps *props)
{ {
CLzma2Enc *p = (CLzma2Enc *)pp; // GET_CLzma2Enc_p
CLzmaEncProps lzmaProps = props->lzmaProps; CLzmaEncProps lzmaProps = props->lzmaProps;
LzmaEncProps_Normalize(&lzmaProps); LzmaEncProps_Normalize(&lzmaProps);
if (lzmaProps.lc + lzmaProps.lp > LZMA2_LCLP_MAX) if (lzmaProps.lc + lzmaProps.lp > LZMA2_LCLP_MAX)
@ -475,16 +475,16 @@ SRes Lzma2Enc_SetProps(CLzma2EncHandle pp, const CLzma2EncProps *props)
} }
void Lzma2Enc_SetDataSize(CLzmaEncHandle pp, UInt64 expectedDataSiize) void Lzma2Enc_SetDataSize(CLzma2EncHandle p, UInt64 expectedDataSiize)
{ {
CLzma2Enc *p = (CLzma2Enc *)pp; // GET_CLzma2Enc_p
p->expectedDataSize = expectedDataSiize; p->expectedDataSize = expectedDataSiize;
} }
Byte Lzma2Enc_WriteProperties(CLzma2EncHandle pp) Byte Lzma2Enc_WriteProperties(CLzma2EncHandle p)
{ {
CLzma2Enc *p = (CLzma2Enc *)pp; // GET_CLzma2Enc_p
unsigned i; unsigned i;
UInt32 dicSize = LzmaEncProps_GetDictSize(&p->props.lzmaProps); UInt32 dicSize = LzmaEncProps_GetDictSize(&p->props.lzmaProps);
for (i = 0; i < 40; i++) for (i = 0; i < 40; i++)
@ -497,12 +497,12 @@ Byte Lzma2Enc_WriteProperties(CLzma2EncHandle pp)
static SRes Lzma2Enc_EncodeMt1( static SRes Lzma2Enc_EncodeMt1(
CLzma2Enc *me, CLzma2Enc *me,
CLzma2EncInt *p, CLzma2EncInt *p,
ISeqOutStream *outStream, ISeqOutStreamPtr outStream,
Byte *outBuf, size_t *outBufSize, Byte *outBuf, size_t *outBufSize,
ISeqInStream *inStream, ISeqInStreamPtr inStream,
const Byte *inData, size_t inDataSize, const Byte *inData, size_t inDataSize,
int finished, int finished,
ICompressProgress *progress) ICompressProgressPtr progress)
{ {
UInt64 unpackTotal = 0; UInt64 unpackTotal = 0;
UInt64 packTotal = 0; UInt64 packTotal = 0;
@ -540,12 +540,12 @@ static SRes Lzma2Enc_EncodeMt1(
} }
} }
RINOK(Lzma2EncInt_InitStream(p, &me->props)); RINOK(Lzma2EncInt_InitStream(p, &me->props))
for (;;) for (;;)
{ {
SRes res = SZ_OK; SRes res = SZ_OK;
size_t inSizeCur = 0; SizeT inSizeCur = 0;
Lzma2EncInt_InitBlock(p); Lzma2EncInt_InitBlock(p);
@ -559,7 +559,7 @@ static SRes Lzma2Enc_EncodeMt1(
if (me->expectedDataSize != (UInt64)(Int64)-1 if (me->expectedDataSize != (UInt64)(Int64)-1
&& me->expectedDataSize >= unpackTotal) && me->expectedDataSize >= unpackTotal)
expected = me->expectedDataSize - unpackTotal; expected = me->expectedDataSize - unpackTotal;
if (me->props.blockSize != LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID if (me->props.blockSize != LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID
&& expected > me->props.blockSize) && expected > me->props.blockSize)
expected = (size_t)me->props.blockSize; expected = (size_t)me->props.blockSize;
@ -569,14 +569,14 @@ static SRes Lzma2Enc_EncodeMt1(
&limitedInStream.vt, &limitedInStream.vt,
LZMA2_KEEP_WINDOW_SIZE, LZMA2_KEEP_WINDOW_SIZE,
me->alloc, me->alloc,
me->allocBig)); me->allocBig))
} }
else else
{ {
inSizeCur = inDataSize - (size_t)unpackTotal; inSizeCur = (SizeT)(inDataSize - (size_t)unpackTotal);
if (me->props.blockSize != LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID if (me->props.blockSize != LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID
&& inSizeCur > me->props.blockSize) && inSizeCur > me->props.blockSize)
inSizeCur = (size_t)me->props.blockSize; inSizeCur = (SizeT)(size_t)me->props.blockSize;
// LzmaEnc_SetDataSize(p->enc, inSizeCur); // LzmaEnc_SetDataSize(p->enc, inSizeCur);
@ -584,7 +584,7 @@ static SRes Lzma2Enc_EncodeMt1(
inData + (size_t)unpackTotal, inSizeCur, inData + (size_t)unpackTotal, inSizeCur,
LZMA2_KEEP_WINDOW_SIZE, LZMA2_KEEP_WINDOW_SIZE,
me->alloc, me->alloc,
me->allocBig)); me->allocBig))
} }
for (;;) for (;;)
@ -621,7 +621,7 @@ static SRes Lzma2Enc_EncodeMt1(
unpackTotal += p->srcPos; unpackTotal += p->srcPos;
RINOK(res); RINOK(res)
if (p->srcPos != (inStream ? limitedInStream.processed : inSizeCur)) if (p->srcPos != (inStream ? limitedInStream.processed : inSizeCur))
return SZ_ERROR_FAIL; return SZ_ERROR_FAIL;
@ -652,12 +652,12 @@ static SRes Lzma2Enc_EncodeMt1(
#ifndef _7ZIP_ST #ifndef Z7_ST
static SRes Lzma2Enc_MtCallback_Code(void *pp, unsigned coderIndex, unsigned outBufIndex, static SRes Lzma2Enc_MtCallback_Code(void *p, unsigned coderIndex, unsigned outBufIndex,
const Byte *src, size_t srcSize, int finished) const Byte *src, size_t srcSize, int finished)
{ {
CLzma2Enc *me = (CLzma2Enc *)pp; CLzma2Enc *me = (CLzma2Enc *)p;
size_t destSize = me->outBufSize; size_t destSize = me->outBufSize;
SRes res; SRes res;
CMtProgressThunk progressThunk; CMtProgressThunk progressThunk;
@ -692,9 +692,9 @@ static SRes Lzma2Enc_MtCallback_Code(void *pp, unsigned coderIndex, unsigned out
} }
static SRes Lzma2Enc_MtCallback_Write(void *pp, unsigned outBufIndex) static SRes Lzma2Enc_MtCallback_Write(void *p, unsigned outBufIndex)
{ {
CLzma2Enc *me = (CLzma2Enc *)pp; CLzma2Enc *me = (CLzma2Enc *)p;
size_t size = me->outBufsDataSizes[outBufIndex]; size_t size = me->outBufsDataSizes[outBufIndex];
const Byte *data = me->outBufs[outBufIndex]; const Byte *data = me->outBufs[outBufIndex];
@ -713,14 +713,14 @@ static SRes Lzma2Enc_MtCallback_Write(void *pp, unsigned outBufIndex)
SRes Lzma2Enc_Encode2(CLzma2EncHandle pp, SRes Lzma2Enc_Encode2(CLzma2EncHandle p,
ISeqOutStream *outStream, ISeqOutStreamPtr outStream,
Byte *outBuf, size_t *outBufSize, Byte *outBuf, size_t *outBufSize,
ISeqInStream *inStream, ISeqInStreamPtr inStream,
const Byte *inData, size_t inDataSize, const Byte *inData, size_t inDataSize,
ICompressProgress *progress) ICompressProgressPtr progress)
{ {
CLzma2Enc *p = (CLzma2Enc *)pp; // GET_CLzma2Enc_p
if (inStream && inData) if (inStream && inData)
return SZ_ERROR_PARAM; return SZ_ERROR_PARAM;
@ -730,11 +730,11 @@ SRes Lzma2Enc_Encode2(CLzma2EncHandle pp,
{ {
unsigned i; unsigned i;
for (i = 0; i < MTCODER__THREADS_MAX; i++) for (i = 0; i < MTCODER_THREADS_MAX; i++)
p->coders[i].propsAreSet = False; p->coders[i].propsAreSet = False;
} }
#ifndef _7ZIP_ST #ifndef Z7_ST
if (p->props.numBlockThreads_Reduced > 1) if (p->props.numBlockThreads_Reduced > 1)
{ {
@ -772,7 +772,7 @@ SRes Lzma2Enc_Encode2(CLzma2EncHandle pp,
return SZ_ERROR_PARAM; /* SZ_ERROR_MEM */ return SZ_ERROR_PARAM; /* SZ_ERROR_MEM */
{ {
size_t destBlockSize = p->mtCoder.blockSize + (p->mtCoder.blockSize >> 10) + 16; const size_t destBlockSize = p->mtCoder.blockSize + (p->mtCoder.blockSize >> 10) + 16;
if (destBlockSize < p->mtCoder.blockSize) if (destBlockSize < p->mtCoder.blockSize)
return SZ_ERROR_PARAM; return SZ_ERROR_PARAM;
if (p->outBufSize != destBlockSize) if (p->outBufSize != destBlockSize)
@ -784,7 +784,7 @@ SRes Lzma2Enc_Encode2(CLzma2EncHandle pp,
p->mtCoder.expectedDataSize = p->expectedDataSize; p->mtCoder.expectedDataSize = p->expectedDataSize;
{ {
SRes res = MtCoder_Code(&p->mtCoder); const SRes res = MtCoder_Code(&p->mtCoder);
if (!outStream) if (!outStream)
*outBufSize = (size_t)(p->outBuf - outBuf); *outBufSize = (size_t)(p->outBuf - outBuf);
return res; return res;
@ -801,3 +801,5 @@ SRes Lzma2Enc_Encode2(CLzma2EncHandle pp,
True, /* finished */ True, /* finished */
progress); progress);
} }
#undef PRF

View file

@ -1,15 +1,15 @@
/* Lzma2Enc.h -- LZMA2 Encoder /* Lzma2Enc.h -- LZMA2 Encoder
2017-07-27 : Igor Pavlov : Public domain */ 2023-04-13 : Igor Pavlov : Public domain */
#ifndef __LZMA2_ENC_H #ifndef ZIP7_INC_LZMA2_ENC_H
#define __LZMA2_ENC_H #define ZIP7_INC_LZMA2_ENC_H
#include "LzmaEnc.h" #include "LzmaEnc.h"
EXTERN_C_BEGIN EXTERN_C_BEGIN
#define LZMA2_ENC_PROPS__BLOCK_SIZE__AUTO 0 #define LZMA2_ENC_PROPS_BLOCK_SIZE_AUTO 0
#define LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID ((UInt64)(Int64)-1) #define LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID ((UInt64)(Int64)-1)
typedef struct typedef struct
{ {
@ -36,7 +36,9 @@ SRes:
SZ_ERROR_THREAD - error in multithreading functions (only for Mt version) SZ_ERROR_THREAD - error in multithreading functions (only for Mt version)
*/ */
typedef void * CLzma2EncHandle; typedef struct CLzma2Enc CLzma2Enc;
typedef CLzma2Enc * CLzma2EncHandle;
// Z7_DECLARE_HANDLE(CLzma2EncHandle)
CLzma2EncHandle Lzma2Enc_Create(ISzAllocPtr alloc, ISzAllocPtr allocBig); CLzma2EncHandle Lzma2Enc_Create(ISzAllocPtr alloc, ISzAllocPtr allocBig);
void Lzma2Enc_Destroy(CLzma2EncHandle p); void Lzma2Enc_Destroy(CLzma2EncHandle p);
@ -44,11 +46,11 @@ SRes Lzma2Enc_SetProps(CLzma2EncHandle p, const CLzma2EncProps *props);
void Lzma2Enc_SetDataSize(CLzma2EncHandle p, UInt64 expectedDataSiize); void Lzma2Enc_SetDataSize(CLzma2EncHandle p, UInt64 expectedDataSiize);
Byte Lzma2Enc_WriteProperties(CLzma2EncHandle p); Byte Lzma2Enc_WriteProperties(CLzma2EncHandle p);
SRes Lzma2Enc_Encode2(CLzma2EncHandle p, SRes Lzma2Enc_Encode2(CLzma2EncHandle p,
ISeqOutStream *outStream, ISeqOutStreamPtr outStream,
Byte *outBuf, size_t *outBufSize, Byte *outBuf, size_t *outBufSize,
ISeqInStream *inStream, ISeqInStreamPtr inStream,
const Byte *inData, size_t inDataSize, const Byte *inData, size_t inDataSize,
ICompressProgress *progress); ICompressProgressPtr progress);
EXTERN_C_END EXTERN_C_END

View file

@ -1,8 +1,8 @@
/* Lzma86.h -- LZMA + x86 (BCJ) Filter /* Lzma86.h -- LZMA + x86 (BCJ) Filter
2013-01-18 : Igor Pavlov : Public domain */ 2023-03-03 : Igor Pavlov : Public domain */
#ifndef __LZMA86_H #ifndef ZIP7_INC_LZMA86_H
#define __LZMA86_H #define ZIP7_INC_LZMA86_H
#include "7zTypes.h" #include "7zTypes.h"

View file

@ -1,5 +1,5 @@
/* Lzma86Dec.c -- LZMA + x86 (BCJ) Filter Decoder /* Lzma86Dec.c -- LZMA + x86 (BCJ) Filter Decoder
2016-05-16 : Igor Pavlov : Public domain */ 2023-03-03 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -46,9 +46,8 @@ SRes Lzma86_Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen)
return res; return res;
if (useFilter == 1) if (useFilter == 1)
{ {
UInt32 x86State; UInt32 x86State = Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL;
x86_Convert_Init(x86State); z7_BranchConvSt_X86_Dec(dest, *destLen, 0, &x86State);
x86_Convert(dest, *destLen, 0, &x86State, 0);
} }
return SZ_OK; return SZ_OK;
} }

View file

@ -1,5 +1,5 @@
/* Lzma86Enc.c -- LZMA + x86 (BCJ) Filter Encoder /* Lzma86Enc.c -- LZMA + x86 (BCJ) Filter Encoder
2018-07-04 : Igor Pavlov : Public domain */ 2023-03-03 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -46,9 +46,8 @@ int Lzma86_Encode(Byte *dest, size_t *destLen, const Byte *src, size_t srcLen,
memcpy(filteredStream, src, srcLen); memcpy(filteredStream, src, srcLen);
} }
{ {
UInt32 x86State; UInt32 x86State = Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL;
x86_Convert_Init(x86State); z7_BranchConvSt_X86_Enc(filteredStream, srcLen, 0, &x86State);
x86_Convert(filteredStream, srcLen, 0, &x86State, 1);
} }
} }

View file

@ -1,5 +1,5 @@
/* LzmaDec.c -- LZMA Decoder /* LzmaDec.c -- LZMA Decoder
2021-04-01 : Igor Pavlov : Public domain */ 2023-04-07 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -8,15 +8,15 @@
/* #include "CpuArch.h" */ /* #include "CpuArch.h" */
#include "LzmaDec.h" #include "LzmaDec.h"
#define kNumTopBits 24 // #define kNumTopBits 24
#define kTopValue ((UInt32)1 << kNumTopBits) #define kTopValue ((UInt32)1 << 24)
#define kNumBitModelTotalBits 11 #define kNumBitModelTotalBits 11
#define kBitModelTotal (1 << kNumBitModelTotalBits) #define kBitModelTotal (1 << kNumBitModelTotalBits)
#define RC_INIT_SIZE 5 #define RC_INIT_SIZE 5
#ifndef _LZMA_DEC_OPT #ifndef Z7_LZMA_DEC_OPT
#define kNumMoveBits 5 #define kNumMoveBits 5
#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); } #define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); }
@ -25,14 +25,14 @@
#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); #define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); #define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits));
#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \ #define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \
{ UPDATE_0(p); i = (i + i); A0; } else \ { UPDATE_0(p) i = (i + i); A0; } else \
{ UPDATE_1(p); i = (i + i) + 1; A1; } { UPDATE_1(p) i = (i + i) + 1; A1; }
#define TREE_GET_BIT(probs, i) { GET_BIT2(probs + i, i, ;, ;); } #define TREE_GET_BIT(probs, i) { GET_BIT2(probs + i, i, ;, ;); }
#define REV_BIT(p, i, A0, A1) IF_BIT_0(p + i) \ #define REV_BIT(p, i, A0, A1) IF_BIT_0(p + i) \
{ UPDATE_0(p + i); A0; } else \ { UPDATE_0(p + i) A0; } else \
{ UPDATE_1(p + i); A1; } { UPDATE_1(p + i) A1; }
#define REV_BIT_VAR( p, i, m) REV_BIT(p, i, i += m; m += m, m += m; i += m; ) #define REV_BIT_VAR( p, i, m) REV_BIT(p, i, i += m; m += m, m += m; i += m; )
#define REV_BIT_CONST(p, i, m) REV_BIT(p, i, i += m; , i += m * 2; ) #define REV_BIT_CONST(p, i, m) REV_BIT(p, i, i += m; , i += m * 2; )
#define REV_BIT_LAST( p, i, m) REV_BIT(p, i, i -= m , ; ) #define REV_BIT_LAST( p, i, m) REV_BIT(p, i, i -= m , ; )
@ -40,19 +40,19 @@
#define TREE_DECODE(probs, limit, i) \ #define TREE_DECODE(probs, limit, i) \
{ i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; } { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; }
/* #define _LZMA_SIZE_OPT */ /* #define Z7_LZMA_SIZE_OPT */
#ifdef _LZMA_SIZE_OPT #ifdef Z7_LZMA_SIZE_OPT
#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i) #define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i)
#else #else
#define TREE_6_DECODE(probs, i) \ #define TREE_6_DECODE(probs, i) \
{ i = 1; \ { i = 1; \
TREE_GET_BIT(probs, i); \ TREE_GET_BIT(probs, i) \
TREE_GET_BIT(probs, i); \ TREE_GET_BIT(probs, i) \
TREE_GET_BIT(probs, i); \ TREE_GET_BIT(probs, i) \
TREE_GET_BIT(probs, i); \ TREE_GET_BIT(probs, i) \
TREE_GET_BIT(probs, i); \ TREE_GET_BIT(probs, i) \
TREE_GET_BIT(probs, i); \ TREE_GET_BIT(probs, i) \
i -= 0x40; } i -= 0x40; }
#endif #endif
@ -64,25 +64,25 @@
probLit = prob + (offs + bit + symbol); \ probLit = prob + (offs + bit + symbol); \
GET_BIT2(probLit, symbol, offs ^= bit; , ;) GET_BIT2(probLit, symbol, offs ^= bit; , ;)
#endif // _LZMA_DEC_OPT #endif // Z7_LZMA_DEC_OPT
#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_INPUT_EOF; range <<= 8; code = (code << 8) | (*buf++); } #define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_INPUT_EOF; range <<= 8; code = (code << 8) | (*buf++); }
#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound) #define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
#define UPDATE_0_CHECK range = bound; #define UPDATE_0_CHECK range = bound;
#define UPDATE_1_CHECK range -= bound; code -= bound; #define UPDATE_1_CHECK range -= bound; code -= bound;
#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \ #define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \
{ UPDATE_0_CHECK; i = (i + i); A0; } else \ { UPDATE_0_CHECK i = (i + i); A0; } else \
{ UPDATE_1_CHECK; i = (i + i) + 1; A1; } { UPDATE_1_CHECK i = (i + i) + 1; A1; }
#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;) #define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;)
#define TREE_DECODE_CHECK(probs, limit, i) \ #define TREE_DECODE_CHECK(probs, limit, i) \
{ i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; } { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; }
#define REV_BIT_CHECK(p, i, m) IF_BIT_0_CHECK(p + i) \ #define REV_BIT_CHECK(p, i, m) IF_BIT_0_CHECK(p + i) \
{ UPDATE_0_CHECK; i += m; m += m; } else \ { UPDATE_0_CHECK i += m; m += m; } else \
{ UPDATE_1_CHECK; m += m; i += m; } { UPDATE_1_CHECK m += m; i += m; }
#define kNumPosBitsMax 4 #define kNumPosBitsMax 4
@ -224,14 +224,14 @@ Out:
*/ */
#ifdef _LZMA_DEC_OPT #ifdef Z7_LZMA_DEC_OPT
int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit); int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit);
#else #else
static static
int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit) int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
{ {
CLzmaProb *probs = GET_PROBS; CLzmaProb *probs = GET_PROBS;
unsigned state = (unsigned)p->state; unsigned state = (unsigned)p->state;
@ -263,7 +263,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
IF_BIT_0(prob) IF_BIT_0(prob)
{ {
unsigned symbol; unsigned symbol;
UPDATE_0(prob); UPDATE_0(prob)
prob = probs + Literal; prob = probs + Literal;
if (processedPos != 0 || checkDicSize != 0) if (processedPos != 0 || checkDicSize != 0)
prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc); prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc);
@ -273,7 +273,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
{ {
state -= (state < 4) ? state : 3; state -= (state < 4) ? state : 3;
symbol = 1; symbol = 1;
#ifdef _LZMA_SIZE_OPT #ifdef Z7_LZMA_SIZE_OPT
do { NORMAL_LITER_DEC } while (symbol < 0x100); do { NORMAL_LITER_DEC } while (symbol < 0x100);
#else #else
NORMAL_LITER_DEC NORMAL_LITER_DEC
@ -292,7 +292,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
unsigned offs = 0x100; unsigned offs = 0x100;
state -= (state < 10) ? 3 : 6; state -= (state < 10) ? 3 : 6;
symbol = 1; symbol = 1;
#ifdef _LZMA_SIZE_OPT #ifdef Z7_LZMA_SIZE_OPT
do do
{ {
unsigned bit; unsigned bit;
@ -321,25 +321,25 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
} }
{ {
UPDATE_1(prob); UPDATE_1(prob)
prob = probs + IsRep + state; prob = probs + IsRep + state;
IF_BIT_0(prob) IF_BIT_0(prob)
{ {
UPDATE_0(prob); UPDATE_0(prob)
state += kNumStates; state += kNumStates;
prob = probs + LenCoder; prob = probs + LenCoder;
} }
else else
{ {
UPDATE_1(prob); UPDATE_1(prob)
prob = probs + IsRepG0 + state; prob = probs + IsRepG0 + state;
IF_BIT_0(prob) IF_BIT_0(prob)
{ {
UPDATE_0(prob); UPDATE_0(prob)
prob = probs + IsRep0Long + COMBINED_PS_STATE; prob = probs + IsRep0Long + COMBINED_PS_STATE;
IF_BIT_0(prob) IF_BIT_0(prob)
{ {
UPDATE_0(prob); UPDATE_0(prob)
// that case was checked before with kBadRepCode // that case was checked before with kBadRepCode
// if (checkDicSize == 0 && processedPos == 0) { len = kMatchSpecLen_Error_Data + 1; break; } // if (checkDicSize == 0 && processedPos == 0) { len = kMatchSpecLen_Error_Data + 1; break; }
@ -353,30 +353,30 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
state = state < kNumLitStates ? 9 : 11; state = state < kNumLitStates ? 9 : 11;
continue; continue;
} }
UPDATE_1(prob); UPDATE_1(prob)
} }
else else
{ {
UInt32 distance; UInt32 distance;
UPDATE_1(prob); UPDATE_1(prob)
prob = probs + IsRepG1 + state; prob = probs + IsRepG1 + state;
IF_BIT_0(prob) IF_BIT_0(prob)
{ {
UPDATE_0(prob); UPDATE_0(prob)
distance = rep1; distance = rep1;
} }
else else
{ {
UPDATE_1(prob); UPDATE_1(prob)
prob = probs + IsRepG2 + state; prob = probs + IsRepG2 + state;
IF_BIT_0(prob) IF_BIT_0(prob)
{ {
UPDATE_0(prob); UPDATE_0(prob)
distance = rep2; distance = rep2;
} }
else else
{ {
UPDATE_1(prob); UPDATE_1(prob)
distance = rep3; distance = rep3;
rep3 = rep2; rep3 = rep2;
} }
@ -389,37 +389,37 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
prob = probs + RepLenCoder; prob = probs + RepLenCoder;
} }
#ifdef _LZMA_SIZE_OPT #ifdef Z7_LZMA_SIZE_OPT
{ {
unsigned lim, offset; unsigned lim, offset;
CLzmaProb *probLen = prob + LenChoice; CLzmaProb *probLen = prob + LenChoice;
IF_BIT_0(probLen) IF_BIT_0(probLen)
{ {
UPDATE_0(probLen); UPDATE_0(probLen)
probLen = prob + LenLow + GET_LEN_STATE; probLen = prob + LenLow + GET_LEN_STATE;
offset = 0; offset = 0;
lim = (1 << kLenNumLowBits); lim = (1 << kLenNumLowBits);
} }
else else
{ {
UPDATE_1(probLen); UPDATE_1(probLen)
probLen = prob + LenChoice2; probLen = prob + LenChoice2;
IF_BIT_0(probLen) IF_BIT_0(probLen)
{ {
UPDATE_0(probLen); UPDATE_0(probLen)
probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
offset = kLenNumLowSymbols; offset = kLenNumLowSymbols;
lim = (1 << kLenNumLowBits); lim = (1 << kLenNumLowBits);
} }
else else
{ {
UPDATE_1(probLen); UPDATE_1(probLen)
probLen = prob + LenHigh; probLen = prob + LenHigh;
offset = kLenNumLowSymbols * 2; offset = kLenNumLowSymbols * 2;
lim = (1 << kLenNumHighBits); lim = (1 << kLenNumHighBits);
} }
} }
TREE_DECODE(probLen, lim, len); TREE_DECODE(probLen, lim, len)
len += offset; len += offset;
} }
#else #else
@ -427,32 +427,32 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
CLzmaProb *probLen = prob + LenChoice; CLzmaProb *probLen = prob + LenChoice;
IF_BIT_0(probLen) IF_BIT_0(probLen)
{ {
UPDATE_0(probLen); UPDATE_0(probLen)
probLen = prob + LenLow + GET_LEN_STATE; probLen = prob + LenLow + GET_LEN_STATE;
len = 1; len = 1;
TREE_GET_BIT(probLen, len); TREE_GET_BIT(probLen, len)
TREE_GET_BIT(probLen, len); TREE_GET_BIT(probLen, len)
TREE_GET_BIT(probLen, len); TREE_GET_BIT(probLen, len)
len -= 8; len -= 8;
} }
else else
{ {
UPDATE_1(probLen); UPDATE_1(probLen)
probLen = prob + LenChoice2; probLen = prob + LenChoice2;
IF_BIT_0(probLen) IF_BIT_0(probLen)
{ {
UPDATE_0(probLen); UPDATE_0(probLen)
probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
len = 1; len = 1;
TREE_GET_BIT(probLen, len); TREE_GET_BIT(probLen, len)
TREE_GET_BIT(probLen, len); TREE_GET_BIT(probLen, len)
TREE_GET_BIT(probLen, len); TREE_GET_BIT(probLen, len)
} }
else else
{ {
UPDATE_1(probLen); UPDATE_1(probLen)
probLen = prob + LenHigh; probLen = prob + LenHigh;
TREE_DECODE(probLen, (1 << kLenNumHighBits), len); TREE_DECODE(probLen, (1 << kLenNumHighBits), len)
len += kLenNumLowSymbols * 2; len += kLenNumLowSymbols * 2;
} }
} }
@ -464,7 +464,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
UInt32 distance; UInt32 distance;
prob = probs + PosSlot + prob = probs + PosSlot +
((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits);
TREE_6_DECODE(prob, distance); TREE_6_DECODE(prob, distance)
if (distance >= kStartPosModelIndex) if (distance >= kStartPosModelIndex)
{ {
unsigned posSlot = (unsigned)distance; unsigned posSlot = (unsigned)distance;
@ -479,7 +479,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
distance++; distance++;
do do
{ {
REV_BIT_VAR(prob, distance, m); REV_BIT_VAR(prob, distance, m)
} }
while (--numDirectBits); while (--numDirectBits);
distance -= m; distance -= m;
@ -514,10 +514,10 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
distance <<= kNumAlignBits; distance <<= kNumAlignBits;
{ {
unsigned i = 1; unsigned i = 1;
REV_BIT_CONST(prob, i, 1); REV_BIT_CONST(prob, i, 1)
REV_BIT_CONST(prob, i, 2); REV_BIT_CONST(prob, i, 2)
REV_BIT_CONST(prob, i, 4); REV_BIT_CONST(prob, i, 4)
REV_BIT_LAST (prob, i, 8); REV_BIT_LAST (prob, i, 8)
distance |= i; distance |= i;
} }
if (distance == (UInt32)0xFFFFFFFF) if (distance == (UInt32)0xFFFFFFFF)
@ -592,7 +592,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
} }
while (dicPos < limit && buf < bufLimit); while (dicPos < limit && buf < bufLimit);
NORMALIZE; NORMALIZE
p->buf = buf; p->buf = buf;
p->range = range; p->range = range;
@ -613,7 +613,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit) static void Z7_FASTCALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
{ {
unsigned len = (unsigned)p->remainLen; unsigned len = (unsigned)p->remainLen;
if (len == 0 /* || len >= kMatchSpecLenStart */) if (len == 0 /* || len >= kMatchSpecLenStart */)
@ -683,7 +683,7 @@ and we support the following state of (p->checkDicSize):
(p->checkDicSize == p->prop.dicSize) (p->checkDicSize == p->prop.dicSize)
*/ */
static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit) static int Z7_FASTCALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
{ {
if (p->checkDicSize == 0) if (p->checkDicSize == 0)
{ {
@ -767,54 +767,54 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
else else
{ {
unsigned len; unsigned len;
UPDATE_1_CHECK; UPDATE_1_CHECK
prob = probs + IsRep + state; prob = probs + IsRep + state;
IF_BIT_0_CHECK(prob) IF_BIT_0_CHECK(prob)
{ {
UPDATE_0_CHECK; UPDATE_0_CHECK
state = 0; state = 0;
prob = probs + LenCoder; prob = probs + LenCoder;
res = DUMMY_MATCH; res = DUMMY_MATCH;
} }
else else
{ {
UPDATE_1_CHECK; UPDATE_1_CHECK
res = DUMMY_REP; res = DUMMY_REP;
prob = probs + IsRepG0 + state; prob = probs + IsRepG0 + state;
IF_BIT_0_CHECK(prob) IF_BIT_0_CHECK(prob)
{ {
UPDATE_0_CHECK; UPDATE_0_CHECK
prob = probs + IsRep0Long + COMBINED_PS_STATE; prob = probs + IsRep0Long + COMBINED_PS_STATE;
IF_BIT_0_CHECK(prob) IF_BIT_0_CHECK(prob)
{ {
UPDATE_0_CHECK; UPDATE_0_CHECK
break; break;
} }
else else
{ {
UPDATE_1_CHECK; UPDATE_1_CHECK
} }
} }
else else
{ {
UPDATE_1_CHECK; UPDATE_1_CHECK
prob = probs + IsRepG1 + state; prob = probs + IsRepG1 + state;
IF_BIT_0_CHECK(prob) IF_BIT_0_CHECK(prob)
{ {
UPDATE_0_CHECK; UPDATE_0_CHECK
} }
else else
{ {
UPDATE_1_CHECK; UPDATE_1_CHECK
prob = probs + IsRepG2 + state; prob = probs + IsRepG2 + state;
IF_BIT_0_CHECK(prob) IF_BIT_0_CHECK(prob)
{ {
UPDATE_0_CHECK; UPDATE_0_CHECK
} }
else else
{ {
UPDATE_1_CHECK; UPDATE_1_CHECK
} }
} }
} }
@ -826,31 +826,31 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
const CLzmaProb *probLen = prob + LenChoice; const CLzmaProb *probLen = prob + LenChoice;
IF_BIT_0_CHECK(probLen) IF_BIT_0_CHECK(probLen)
{ {
UPDATE_0_CHECK; UPDATE_0_CHECK
probLen = prob + LenLow + GET_LEN_STATE; probLen = prob + LenLow + GET_LEN_STATE;
offset = 0; offset = 0;
limit = 1 << kLenNumLowBits; limit = 1 << kLenNumLowBits;
} }
else else
{ {
UPDATE_1_CHECK; UPDATE_1_CHECK
probLen = prob + LenChoice2; probLen = prob + LenChoice2;
IF_BIT_0_CHECK(probLen) IF_BIT_0_CHECK(probLen)
{ {
UPDATE_0_CHECK; UPDATE_0_CHECK
probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits); probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
offset = kLenNumLowSymbols; offset = kLenNumLowSymbols;
limit = 1 << kLenNumLowBits; limit = 1 << kLenNumLowBits;
} }
else else
{ {
UPDATE_1_CHECK; UPDATE_1_CHECK
probLen = prob + LenHigh; probLen = prob + LenHigh;
offset = kLenNumLowSymbols * 2; offset = kLenNumLowSymbols * 2;
limit = 1 << kLenNumHighBits; limit = 1 << kLenNumHighBits;
} }
} }
TREE_DECODE_CHECK(probLen, limit, len); TREE_DECODE_CHECK(probLen, limit, len)
len += offset; len += offset;
} }
@ -860,7 +860,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
prob = probs + PosSlot + prob = probs + PosSlot +
((len < kNumLenToPosStates - 1 ? len : kNumLenToPosStates - 1) << ((len < kNumLenToPosStates - 1 ? len : kNumLenToPosStates - 1) <<
kNumPosSlotBits); kNumPosSlotBits);
TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot); TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot)
if (posSlot >= kStartPosModelIndex) if (posSlot >= kStartPosModelIndex)
{ {
unsigned numDirectBits = ((posSlot >> 1) - 1); unsigned numDirectBits = ((posSlot >> 1) - 1);
@ -888,7 +888,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
unsigned m = 1; unsigned m = 1;
do do
{ {
REV_BIT_CHECK(prob, i, m); REV_BIT_CHECK(prob, i, m)
} }
while (--numDirectBits); while (--numDirectBits);
} }
@ -897,7 +897,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
} }
break; break;
} }
NORMALIZE_CHECK; NORMALIZE_CHECK
*bufOut = buf; *bufOut = buf;
return res; return res;
@ -943,7 +943,7 @@ When the decoder lookahead, and the lookahead symbol is not end_marker, we have
*/ */
#define RETURN__NOT_FINISHED__FOR_FINISH \ #define RETURN_NOT_FINISHED_FOR_FINISH \
*status = LZMA_STATUS_NOT_FINISHED; \ *status = LZMA_STATUS_NOT_FINISHED; \
return SZ_ERROR_DATA; // for strict mode return SZ_ERROR_DATA; // for strict mode
// return SZ_OK; // for relaxed mode // return SZ_OK; // for relaxed mode
@ -1029,7 +1029,7 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
} }
if (p->remainLen != 0) if (p->remainLen != 0)
{ {
RETURN__NOT_FINISHED__FOR_FINISH; RETURN_NOT_FINISHED_FOR_FINISH
} }
checkEndMarkNow = 1; checkEndMarkNow = 1;
} }
@ -1072,7 +1072,7 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
for (i = 0; i < (unsigned)dummyProcessed; i++) for (i = 0; i < (unsigned)dummyProcessed; i++)
p->tempBuf[i] = src[i]; p->tempBuf[i] = src[i];
// p->remainLen = kMatchSpecLen_Error_Data; // p->remainLen = kMatchSpecLen_Error_Data;
RETURN__NOT_FINISHED__FOR_FINISH; RETURN_NOT_FINISHED_FOR_FINISH
} }
bufLimit = src; bufLimit = src;
@ -1150,7 +1150,7 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
(*srcLen) += (unsigned)dummyProcessed - p->tempBufSize; (*srcLen) += (unsigned)dummyProcessed - p->tempBufSize;
p->tempBufSize = (unsigned)dummyProcessed; p->tempBufSize = (unsigned)dummyProcessed;
// p->remainLen = kMatchSpecLen_Error_Data; // p->remainLen = kMatchSpecLen_Error_Data;
RETURN__NOT_FINISHED__FOR_FINISH; RETURN_NOT_FINISHED_FOR_FINISH
} }
} }
@ -1299,8 +1299,8 @@ static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAl
SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc) SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc)
{ {
CLzmaProps propNew; CLzmaProps propNew;
RINOK(LzmaProps_Decode(&propNew, props, propsSize)); RINOK(LzmaProps_Decode(&propNew, props, propsSize))
RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc))
p->prop = propNew; p->prop = propNew;
return SZ_OK; return SZ_OK;
} }
@ -1309,14 +1309,14 @@ SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAll
{ {
CLzmaProps propNew; CLzmaProps propNew;
SizeT dicBufSize; SizeT dicBufSize;
RINOK(LzmaProps_Decode(&propNew, props, propsSize)); RINOK(LzmaProps_Decode(&propNew, props, propsSize))
RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc))
{ {
UInt32 dictSize = propNew.dicSize; UInt32 dictSize = propNew.dicSize;
SizeT mask = ((UInt32)1 << 12) - 1; SizeT mask = ((UInt32)1 << 12) - 1;
if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1; if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1;
else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;; else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;
dicBufSize = ((SizeT)dictSize + mask) & ~mask; dicBufSize = ((SizeT)dictSize + mask) & ~mask;
if (dicBufSize < dictSize) if (dicBufSize < dictSize)
dicBufSize = dictSize; dicBufSize = dictSize;
@ -1348,8 +1348,8 @@ SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
*status = LZMA_STATUS_NOT_SPECIFIED; *status = LZMA_STATUS_NOT_SPECIFIED;
if (inSize < RC_INIT_SIZE) if (inSize < RC_INIT_SIZE)
return SZ_ERROR_INPUT_EOF; return SZ_ERROR_INPUT_EOF;
LzmaDec_Construct(&p); LzmaDec_CONSTRUCT(&p)
RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc)); RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc))
p.dic = dest; p.dic = dest;
p.dicBufSize = outSize; p.dicBufSize = outSize;
LzmaDec_Init(&p); LzmaDec_Init(&p);

View file

@ -1,19 +1,19 @@
/* LzmaDec.h -- LZMA Decoder /* LzmaDec.h -- LZMA Decoder
2020-03-19 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#ifndef __LZMA_DEC_H #ifndef ZIP7_INC_LZMA_DEC_H
#define __LZMA_DEC_H #define ZIP7_INC_LZMA_DEC_H
#include "7zTypes.h" #include "7zTypes.h"
EXTERN_C_BEGIN EXTERN_C_BEGIN
/* #define _LZMA_PROB32 */ /* #define Z7_LZMA_PROB32 */
/* _LZMA_PROB32 can increase the speed on some CPUs, /* Z7_LZMA_PROB32 can increase the speed on some CPUs,
but memory usage for CLzmaDec::probs will be doubled in that case */ but memory usage for CLzmaDec::probs will be doubled in that case */
typedef typedef
#ifdef _LZMA_PROB32 #ifdef Z7_LZMA_PROB32
UInt32 UInt32
#else #else
UInt16 UInt16
@ -25,7 +25,7 @@ typedef
#define LZMA_PROPS_SIZE 5 #define LZMA_PROPS_SIZE 5
typedef struct _CLzmaProps typedef struct
{ {
Byte lc; Byte lc;
Byte lp; Byte lp;
@ -73,7 +73,8 @@ typedef struct
Byte tempBuf[LZMA_REQUIRED_INPUT_MAX]; Byte tempBuf[LZMA_REQUIRED_INPUT_MAX];
} CLzmaDec; } CLzmaDec;
#define LzmaDec_Construct(p) { (p)->dic = NULL; (p)->probs = NULL; } #define LzmaDec_CONSTRUCT(p) { (p)->dic = NULL; (p)->probs = NULL; }
#define LzmaDec_Construct(p) LzmaDec_CONSTRUCT(p)
void LzmaDec_Init(CLzmaDec *p); void LzmaDec_Init(CLzmaDec *p);

View file

@ -1,5 +1,5 @@
/* LzmaEnc.c -- LZMA Encoder /* LzmaEnc.c -- LZMA Encoder
2022-07-15: Igor Pavlov : Public domain */ 2023-04-13: Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -16,22 +16,22 @@
#include "LzmaEnc.h" #include "LzmaEnc.h"
#include "LzFind.h" #include "LzFind.h"
#ifndef _7ZIP_ST #ifndef Z7_ST
#include "LzFindMt.h" #include "LzFindMt.h"
#endif #endif
/* the following LzmaEnc_* declarations is internal LZMA interface for LZMA2 encoder */ /* the following LzmaEnc_* declarations is internal LZMA interface for LZMA2 encoder */
SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, ISeqInStream *inStream, UInt32 keepWindowSize, SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle p, ISeqInStreamPtr inStream, UInt32 keepWindowSize,
ISzAllocPtr alloc, ISzAllocPtr allocBig); ISzAllocPtr alloc, ISzAllocPtr allocBig);
SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen, SRes LzmaEnc_MemPrepare(CLzmaEncHandle p, const Byte *src, SizeT srcLen,
UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig); UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig);
SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit, SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle p, BoolInt reInit,
Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize); Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize);
const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp); const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle p);
void LzmaEnc_Finish(CLzmaEncHandle pp); void LzmaEnc_Finish(CLzmaEncHandle p);
void LzmaEnc_SaveState(CLzmaEncHandle pp); void LzmaEnc_SaveState(CLzmaEncHandle p);
void LzmaEnc_RestoreState(CLzmaEncHandle pp); void LzmaEnc_RestoreState(CLzmaEncHandle p);
#ifdef SHOW_STAT #ifdef SHOW_STAT
static unsigned g_STAT_OFFSET = 0; static unsigned g_STAT_OFFSET = 0;
@ -40,8 +40,8 @@ static unsigned g_STAT_OFFSET = 0;
/* for good normalization speed we still reserve 256 MB before 4 GB range */ /* for good normalization speed we still reserve 256 MB before 4 GB range */
#define kLzmaMaxHistorySize ((UInt32)15 << 28) #define kLzmaMaxHistorySize ((UInt32)15 << 28)
#define kNumTopBits 24 // #define kNumTopBits 24
#define kTopValue ((UInt32)1 << kNumTopBits) #define kTopValue ((UInt32)1 << 24)
#define kNumBitModelTotalBits 11 #define kNumBitModelTotalBits 11
#define kBitModelTotal (1 << kNumBitModelTotalBits) #define kBitModelTotal (1 << kNumBitModelTotalBits)
@ -60,6 +60,7 @@ void LzmaEncProps_Init(CLzmaEncProps *p)
p->dictSize = p->mc = 0; p->dictSize = p->mc = 0;
p->reduceSize = (UInt64)(Int64)-1; p->reduceSize = (UInt64)(Int64)-1;
p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1; p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1;
p->numHashOutBits = 0;
p->writeEndMark = 0; p->writeEndMark = 0;
p->affinity = 0; p->affinity = 0;
} }
@ -99,7 +100,7 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p)
if (p->numThreads < 0) if (p->numThreads < 0)
p->numThreads = p->numThreads =
#ifndef _7ZIP_ST #ifndef Z7_ST
((p->btMode && p->algo) ? 2 : 1); ((p->btMode && p->algo) ? 2 : 1);
#else #else
1; 1;
@ -293,7 +294,7 @@ typedef struct
#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) #define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
typedef typedef
#ifdef _LZMA_PROB32 #ifdef Z7_LZMA_PROB32
UInt32 UInt32
#else #else
UInt16 UInt16
@ -350,7 +351,7 @@ typedef struct
Byte *buf; Byte *buf;
Byte *bufLim; Byte *bufLim;
Byte *bufBase; Byte *bufBase;
ISeqOutStream *outStream; ISeqOutStreamPtr outStream;
UInt64 processed; UInt64 processed;
SRes res; SRes res;
} CRangeEnc; } CRangeEnc;
@ -383,7 +384,7 @@ typedef struct
typedef UInt32 CProbPrice; typedef UInt32 CProbPrice;
typedef struct struct CLzmaEnc
{ {
void *matchFinderObj; void *matchFinderObj;
IMatchFinder2 matchFinder; IMatchFinder2 matchFinder;
@ -426,7 +427,7 @@ typedef struct
UInt32 dictSize; UInt32 dictSize;
SRes result; SRes result;
#ifndef _7ZIP_ST #ifndef Z7_ST
BoolInt mtMode; BoolInt mtMode;
// begin of CMatchFinderMt is used in LZ thread // begin of CMatchFinderMt is used in LZ thread
CMatchFinderMt matchFinderMt; CMatchFinderMt matchFinderMt;
@ -439,7 +440,7 @@ typedef struct
// we suppose that we have 8-bytes alignment after CMatchFinder // we suppose that we have 8-bytes alignment after CMatchFinder
#ifndef _7ZIP_ST #ifndef Z7_ST
Byte pad[128]; Byte pad[128];
#endif #endif
@ -479,77 +480,59 @@ typedef struct
CSaveState saveState; CSaveState saveState;
// BoolInt mf_Failure; // BoolInt mf_Failure;
#ifndef _7ZIP_ST #ifndef Z7_ST
Byte pad2[128]; Byte pad2[128];
#endif #endif
} CLzmaEnc; };
#define MFB (p->matchFinderBase) #define MFB (p->matchFinderBase)
/* /*
#ifndef _7ZIP_ST #ifndef Z7_ST
#define MFB (p->matchFinderMt.MatchFinder) #define MFB (p->matchFinderMt.MatchFinder)
#endif #endif
*/ */
#define COPY_ARR(dest, src, arr) memcpy(dest->arr, src->arr, sizeof(src->arr)); // #define GET_CLzmaEnc_p CLzmaEnc *p = (CLzmaEnc*)(void *)p;
// #define GET_const_CLzmaEnc_p const CLzmaEnc *p = (const CLzmaEnc*)(const void *)p;
void LzmaEnc_SaveState(CLzmaEncHandle pp) #define COPY_ARR(dest, src, arr) memcpy((dest)->arr, (src)->arr, sizeof((src)->arr));
#define COPY_LZMA_ENC_STATE(d, s, p) \
(d)->state = (s)->state; \
COPY_ARR(d, s, reps) \
COPY_ARR(d, s, posAlignEncoder) \
COPY_ARR(d, s, isRep) \
COPY_ARR(d, s, isRepG0) \
COPY_ARR(d, s, isRepG1) \
COPY_ARR(d, s, isRepG2) \
COPY_ARR(d, s, isMatch) \
COPY_ARR(d, s, isRep0Long) \
COPY_ARR(d, s, posSlotEncoder) \
COPY_ARR(d, s, posEncoders) \
(d)->lenProbs = (s)->lenProbs; \
(d)->repLenProbs = (s)->repLenProbs; \
memcpy((d)->litProbs, (s)->litProbs, ((UInt32)0x300 << (p)->lclp) * sizeof(CLzmaProb));
void LzmaEnc_SaveState(CLzmaEncHandle p)
{ {
CLzmaEnc *p = (CLzmaEnc *)pp; // GET_CLzmaEnc_p
CSaveState *dest = &p->saveState; CSaveState *v = &p->saveState;
COPY_LZMA_ENC_STATE(v, p, p)
dest->state = p->state; }
dest->lenProbs = p->lenProbs;
dest->repLenProbs = p->repLenProbs;
COPY_ARR(dest, p, reps); void LzmaEnc_RestoreState(CLzmaEncHandle p)
{
COPY_ARR(dest, p, posAlignEncoder); // GET_CLzmaEnc_p
COPY_ARR(dest, p, isRep); const CSaveState *v = &p->saveState;
COPY_ARR(dest, p, isRepG0); COPY_LZMA_ENC_STATE(p, v, p)
COPY_ARR(dest, p, isRepG1);
COPY_ARR(dest, p, isRepG2);
COPY_ARR(dest, p, isMatch);
COPY_ARR(dest, p, isRep0Long);
COPY_ARR(dest, p, posSlotEncoder);
COPY_ARR(dest, p, posEncoders);
memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << p->lclp) * sizeof(CLzmaProb));
} }
void LzmaEnc_RestoreState(CLzmaEncHandle pp) Z7_NO_INLINE
SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props2)
{ {
CLzmaEnc *dest = (CLzmaEnc *)pp; // GET_CLzmaEnc_p
const CSaveState *p = &dest->saveState;
dest->state = p->state;
dest->lenProbs = p->lenProbs;
dest->repLenProbs = p->repLenProbs;
COPY_ARR(dest, p, reps);
COPY_ARR(dest, p, posAlignEncoder);
COPY_ARR(dest, p, isRep);
COPY_ARR(dest, p, isRepG0);
COPY_ARR(dest, p, isRepG1);
COPY_ARR(dest, p, isRepG2);
COPY_ARR(dest, p, isMatch);
COPY_ARR(dest, p, isRep0Long);
COPY_ARR(dest, p, posSlotEncoder);
COPY_ARR(dest, p, posEncoders);
memcpy(dest->litProbs, p->litProbs, ((UInt32)0x300 << dest->lclp) * sizeof(CLzmaProb));
}
SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
{
CLzmaEnc *p = (CLzmaEnc *)pp;
CLzmaEncProps props = *props2; CLzmaEncProps props = *props2;
LzmaEncProps_Normalize(&props); LzmaEncProps_Normalize(&props);
@ -585,6 +568,7 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
p->fastMode = (props.algo == 0); p->fastMode = (props.algo == 0);
// p->_maxMode = True; // p->_maxMode = True;
MFB.btMode = (Byte)(props.btMode ? 1 : 0); MFB.btMode = (Byte)(props.btMode ? 1 : 0);
// MFB.btMode = (Byte)(props.btMode);
{ {
unsigned numHashBytes = 4; unsigned numHashBytes = 4;
if (props.btMode) if (props.btMode)
@ -595,13 +579,15 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
if (props.numHashBytes >= 5) numHashBytes = 5; if (props.numHashBytes >= 5) numHashBytes = 5;
MFB.numHashBytes = numHashBytes; MFB.numHashBytes = numHashBytes;
// MFB.numHashBytes_Min = 2;
MFB.numHashOutBits = (Byte)props.numHashOutBits;
} }
MFB.cutValue = props.mc; MFB.cutValue = props.mc;
p->writeEndMark = (BoolInt)props.writeEndMark; p->writeEndMark = (BoolInt)props.writeEndMark;
#ifndef _7ZIP_ST #ifndef Z7_ST
/* /*
if (newMultiThread != _multiThread) if (newMultiThread != _multiThread)
{ {
@ -618,9 +604,9 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
} }
void LzmaEnc_SetDataSize(CLzmaEncHandle pp, UInt64 expectedDataSiize) void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize)
{ {
CLzmaEnc *p = (CLzmaEnc *)pp; // GET_CLzmaEnc_p
MFB.expectedDataSize = expectedDataSiize; MFB.expectedDataSize = expectedDataSiize;
} }
@ -684,7 +670,7 @@ static void RangeEnc_Init(CRangeEnc *p)
p->res = SZ_OK; p->res = SZ_OK;
} }
MY_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p) Z7_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p)
{ {
const size_t num = (size_t)(p->buf - p->bufBase); const size_t num = (size_t)(p->buf - p->bufBase);
if (p->res == SZ_OK) if (p->res == SZ_OK)
@ -696,7 +682,7 @@ MY_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p)
p->buf = p->bufBase; p->buf = p->bufBase;
} }
MY_NO_INLINE static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc *p) Z7_NO_INLINE static void Z7_FASTCALL RangeEnc_ShiftLow(CRangeEnc *p)
{ {
UInt32 low = (UInt32)p->low; UInt32 low = (UInt32)p->low;
unsigned high = (unsigned)(p->low >> 32); unsigned high = (unsigned)(p->low >> 32);
@ -741,9 +727,9 @@ static void RangeEnc_FlushData(CRangeEnc *p)
ttt = *(prob); \ ttt = *(prob); \
newBound = (range >> kNumBitModelTotalBits) * ttt; newBound = (range >> kNumBitModelTotalBits) * ttt;
// #define _LZMA_ENC_USE_BRANCH // #define Z7_LZMA_ENC_USE_BRANCH
#ifdef _LZMA_ENC_USE_BRANCH #ifdef Z7_LZMA_ENC_USE_BRANCH
#define RC_BIT(p, prob, bit) { \ #define RC_BIT(p, prob, bit) { \
RC_BIT_PRE(p, prob) \ RC_BIT_PRE(p, prob) \
@ -811,7 +797,7 @@ static void LitEnc_Encode(CRangeEnc *p, CLzmaProb *probs, UInt32 sym)
CLzmaProb *prob = probs + (sym >> 8); CLzmaProb *prob = probs + (sym >> 8);
UInt32 bit = (sym >> 7) & 1; UInt32 bit = (sym >> 7) & 1;
sym <<= 1; sym <<= 1;
RC_BIT(p, prob, bit); RC_BIT(p, prob, bit)
} }
while (sym < 0x10000); while (sym < 0x10000);
p->range = range; p->range = range;
@ -833,7 +819,7 @@ static void LitEnc_EncodeMatched(CRangeEnc *p, CLzmaProb *probs, UInt32 sym, UIn
bit = (sym >> 7) & 1; bit = (sym >> 7) & 1;
sym <<= 1; sym <<= 1;
offs &= ~(matchByte ^ sym); offs &= ~(matchByte ^ sym);
RC_BIT(p, prob, bit); RC_BIT(p, prob, bit)
} }
while (sym < 0x10000); while (sym < 0x10000);
p->range = range; p->range = range;
@ -867,10 +853,10 @@ static void LzmaEnc_InitPriceTables(CProbPrice *ProbPrices)
#define GET_PRICE(prob, bit) \ #define GET_PRICE(prob, bit) \
p->ProbPrices[((prob) ^ (unsigned)(((-(int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; p->ProbPrices[((prob) ^ (unsigned)(((-(int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]
#define GET_PRICEa(prob, bit) \ #define GET_PRICEa(prob, bit) \
ProbPrices[((prob) ^ (unsigned)((-((int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; ProbPrices[((prob) ^ (unsigned)((-((int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]
#define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits] #define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits]
#define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] #define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits]
@ -921,7 +907,7 @@ static void RcTree_ReverseEncode(CRangeEnc *rc, CLzmaProb *probs, unsigned numBi
unsigned bit = sym & 1; unsigned bit = sym & 1;
// RangeEnc_EncodeBit(rc, probs + m, bit); // RangeEnc_EncodeBit(rc, probs + m, bit);
sym >>= 1; sym >>= 1;
RC_BIT(rc, probs + m, bit); RC_BIT(rc, probs + m, bit)
m = (m << 1) | bit; m = (m << 1) | bit;
} }
while (--numBits); while (--numBits);
@ -944,15 +930,15 @@ static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, unsigned sym, unsigned posS
UInt32 range, ttt, newBound; UInt32 range, ttt, newBound;
CLzmaProb *probs = p->low; CLzmaProb *probs = p->low;
range = rc->range; range = rc->range;
RC_BIT_PRE(rc, probs); RC_BIT_PRE(rc, probs)
if (sym >= kLenNumLowSymbols) if (sym >= kLenNumLowSymbols)
{ {
RC_BIT_1(rc, probs); RC_BIT_1(rc, probs)
probs += kLenNumLowSymbols; probs += kLenNumLowSymbols;
RC_BIT_PRE(rc, probs); RC_BIT_PRE(rc, probs)
if (sym >= kLenNumLowSymbols * 2) if (sym >= kLenNumLowSymbols * 2)
{ {
RC_BIT_1(rc, probs); RC_BIT_1(rc, probs)
rc->range = range; rc->range = range;
// RcTree_Encode(rc, p->high, kLenNumHighBits, sym - kLenNumLowSymbols * 2); // RcTree_Encode(rc, p->high, kLenNumHighBits, sym - kLenNumLowSymbols * 2);
LitEnc_Encode(rc, p->high, sym - kLenNumLowSymbols * 2); LitEnc_Encode(rc, p->high, sym - kLenNumLowSymbols * 2);
@ -965,11 +951,11 @@ static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, unsigned sym, unsigned posS
{ {
unsigned m; unsigned m;
unsigned bit; unsigned bit;
RC_BIT_0(rc, probs); RC_BIT_0(rc, probs)
probs += (posState << (1 + kLenNumLowBits)); probs += (posState << (1 + kLenNumLowBits));
bit = (sym >> 2) ; RC_BIT(rc, probs + 1, bit); m = (1 << 1) + bit; bit = (sym >> 2) ; RC_BIT(rc, probs + 1, bit) m = (1 << 1) + bit;
bit = (sym >> 1) & 1; RC_BIT(rc, probs + m, bit); m = (m << 1) + bit; bit = (sym >> 1) & 1; RC_BIT(rc, probs + m, bit) m = (m << 1) + bit;
bit = sym & 1; RC_BIT(rc, probs + m, bit); bit = sym & 1; RC_BIT(rc, probs + m, bit)
rc->range = range; rc->range = range;
} }
} }
@ -990,7 +976,7 @@ static void SetPrices_3(const CLzmaProb *probs, UInt32 startPrice, UInt32 *price
} }
MY_NO_INLINE static void MY_FAST_CALL LenPriceEnc_UpdateTables( Z7_NO_INLINE static void Z7_FASTCALL LenPriceEnc_UpdateTables(
CLenPriceEnc *p, CLenPriceEnc *p,
unsigned numPosStates, unsigned numPosStates,
const CLenEnc *enc, const CLenEnc *enc,
@ -1152,7 +1138,7 @@ static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes)
+ GET_PRICE_1(p->isRep[state]) \ + GET_PRICE_1(p->isRep[state]) \
+ GET_PRICE_0(p->isRepG0[state]) + GET_PRICE_0(p->isRepG0[state])
MY_FORCE_INLINE Z7_FORCE_INLINE
static UInt32 GetPrice_PureRep(const CLzmaEnc *p, unsigned repIndex, size_t state, size_t posState) static UInt32 GetPrice_PureRep(const CLzmaEnc *p, unsigned repIndex, size_t state, size_t posState)
{ {
UInt32 price; UInt32 price;
@ -1331,7 +1317,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
LitEnc_GetPrice(probs, curByte, p->ProbPrices)); LitEnc_GetPrice(probs, curByte, p->ProbPrices));
} }
MakeAs_Lit(&p->opt[1]); MakeAs_Lit(&p->opt[1])
matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]); matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]);
repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]); repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]);
@ -1343,7 +1329,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
if (shortRepPrice < p->opt[1].price) if (shortRepPrice < p->opt[1].price)
{ {
p->opt[1].price = shortRepPrice; p->opt[1].price = shortRepPrice;
MakeAs_ShortRep(&p->opt[1]); MakeAs_ShortRep(&p->opt[1])
} }
if (last < 2) if (last < 2)
{ {
@ -1410,7 +1396,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
else else
{ {
unsigned slot; unsigned slot;
GetPosSlot2(dist, slot); GetPosSlot2(dist, slot)
price += p->alignPrices[dist & kAlignMask]; price += p->alignPrices[dist & kAlignMask];
price += p->posSlotPrices[lenToPosState][slot]; price += p->posSlotPrices[lenToPosState][slot];
} }
@ -1486,7 +1472,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
unsigned delta = best - cur; unsigned delta = best - cur;
if (delta != 0) if (delta != 0)
{ {
MOVE_POS(p, delta); MOVE_POS(p, delta)
} }
} }
cur = best; cur = best;
@ -1633,7 +1619,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
{ {
nextOpt->price = litPrice; nextOpt->price = litPrice;
nextOpt->len = 1; nextOpt->len = 1;
MakeAs_Lit(nextOpt); MakeAs_Lit(nextOpt)
nextIsLit = True; nextIsLit = True;
} }
} }
@ -1667,7 +1653,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
{ {
nextOpt->price = shortRepPrice; nextOpt->price = shortRepPrice;
nextOpt->len = 1; nextOpt->len = 1;
MakeAs_ShortRep(nextOpt); MakeAs_ShortRep(nextOpt)
nextIsLit = False; nextIsLit = False;
} }
} }
@ -1871,7 +1857,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
dist = MATCHES[(size_t)offs + 1]; dist = MATCHES[(size_t)offs + 1];
// if (dist >= kNumFullDistances) // if (dist >= kNumFullDistances)
GetPosSlot2(dist, posSlot); GetPosSlot2(dist, posSlot)
for (len = /*2*/ startLen; ; len++) for (len = /*2*/ startLen; ; len++)
{ {
@ -1962,7 +1948,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position)
break; break;
dist = MATCHES[(size_t)offs + 1]; dist = MATCHES[(size_t)offs + 1];
// if (dist >= kNumFullDistances) // if (dist >= kNumFullDistances)
GetPosSlot2(dist, posSlot); GetPosSlot2(dist, posSlot)
} }
} }
} }
@ -2138,7 +2124,7 @@ static void WriteEndMarker(CLzmaEnc *p, unsigned posState)
{ {
UInt32 ttt, newBound; UInt32 ttt, newBound;
RC_BIT_PRE(p, probs + m) RC_BIT_PRE(p, probs + m)
RC_BIT_1(&p->rc, probs + m); RC_BIT_1(&p->rc, probs + m)
m = (m << 1) + 1; m = (m << 1) + 1;
} }
while (m < (1 << kNumPosSlotBits)); while (m < (1 << kNumPosSlotBits));
@ -2163,7 +2149,7 @@ static void WriteEndMarker(CLzmaEnc *p, unsigned posState)
{ {
UInt32 ttt, newBound; UInt32 ttt, newBound;
RC_BIT_PRE(p, probs + m) RC_BIT_PRE(p, probs + m)
RC_BIT_1(&p->rc, probs + m); RC_BIT_1(&p->rc, probs + m)
m = (m << 1) + 1; m = (m << 1) + 1;
} }
while (m < kAlignTableSize); while (m < kAlignTableSize);
@ -2179,7 +2165,7 @@ static SRes CheckErrors(CLzmaEnc *p)
if (p->rc.res != SZ_OK) if (p->rc.res != SZ_OK)
p->result = SZ_ERROR_WRITE; p->result = SZ_ERROR_WRITE;
#ifndef _7ZIP_ST #ifndef Z7_ST
if ( if (
// p->mf_Failure || // p->mf_Failure ||
(p->mtMode && (p->mtMode &&
@ -2187,7 +2173,7 @@ static SRes CheckErrors(CLzmaEnc *p)
p->matchFinderMt.failure_LZ_BT)) p->matchFinderMt.failure_LZ_BT))
) )
{ {
p->result = MY_HRES_ERROR__INTERNAL_ERROR; p->result = MY_HRES_ERROR_INTERNAL_ERROR;
// printf("\nCheckErrors p->matchFinderMt.failureLZ\n"); // printf("\nCheckErrors p->matchFinderMt.failureLZ\n");
} }
#endif #endif
@ -2201,7 +2187,7 @@ static SRes CheckErrors(CLzmaEnc *p)
} }
MY_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos) Z7_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos)
{ {
/* ReleaseMFStream(); */ /* ReleaseMFStream(); */
p->finished = True; p->finished = True;
@ -2213,7 +2199,7 @@ MY_NO_INLINE static SRes Flush(CLzmaEnc *p, UInt32 nowPos)
} }
MY_NO_INLINE static void FillAlignPrices(CLzmaEnc *p) Z7_NO_INLINE static void FillAlignPrices(CLzmaEnc *p)
{ {
unsigned i; unsigned i;
const CProbPrice *ProbPrices = p->ProbPrices; const CProbPrice *ProbPrices = p->ProbPrices;
@ -2237,7 +2223,7 @@ MY_NO_INLINE static void FillAlignPrices(CLzmaEnc *p)
} }
MY_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p) Z7_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p)
{ {
// int y; for (y = 0; y < 100; y++) { // int y; for (y = 0; y < 100; y++) {
@ -2337,7 +2323,7 @@ static void LzmaEnc_Construct(CLzmaEnc *p)
RangeEnc_Construct(&p->rc); RangeEnc_Construct(&p->rc);
MatchFinder_Construct(&MFB); MatchFinder_Construct(&MFB);
#ifndef _7ZIP_ST #ifndef Z7_ST
p->matchFinderMt.MatchFinder = &MFB; p->matchFinderMt.MatchFinder = &MFB;
MatchFinderMt_Construct(&p->matchFinderMt); MatchFinderMt_Construct(&p->matchFinderMt);
#endif #endif
@ -2345,7 +2331,7 @@ static void LzmaEnc_Construct(CLzmaEnc *p)
{ {
CLzmaEncProps props; CLzmaEncProps props;
LzmaEncProps_Init(&props); LzmaEncProps_Init(&props);
LzmaEnc_SetProps(p, &props); LzmaEnc_SetProps((CLzmaEncHandle)(void *)p, &props);
} }
#ifndef LZMA_LOG_BSR #ifndef LZMA_LOG_BSR
@ -2376,7 +2362,7 @@ static void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc)
static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig) static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
{ {
#ifndef _7ZIP_ST #ifndef Z7_ST
MatchFinderMt_Destruct(&p->matchFinderMt, allocBig); MatchFinderMt_Destruct(&p->matchFinderMt, allocBig);
#endif #endif
@ -2387,21 +2373,22 @@ static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBi
void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig) void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
{ {
LzmaEnc_Destruct((CLzmaEnc *)p, alloc, allocBig); // GET_CLzmaEnc_p
LzmaEnc_Destruct(p, alloc, allocBig);
ISzAlloc_Free(alloc, p); ISzAlloc_Free(alloc, p);
} }
MY_NO_INLINE Z7_NO_INLINE
static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpackSize) static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpackSize)
{ {
UInt32 nowPos32, startPos32; UInt32 nowPos32, startPos32;
if (p->needInit) if (p->needInit)
{ {
#ifndef _7ZIP_ST #ifndef Z7_ST
if (p->mtMode) if (p->mtMode)
{ {
RINOK(MatchFinderMt_InitMt(&p->matchFinderMt)); RINOK(MatchFinderMt_InitMt(&p->matchFinderMt))
} }
#endif #endif
p->matchFinder.Init(p->matchFinderObj); p->matchFinder.Init(p->matchFinderObj);
@ -2410,7 +2397,7 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
if (p->finished) if (p->finished)
return p->result; return p->result;
RINOK(CheckErrors(p)); RINOK(CheckErrors(p))
nowPos32 = (UInt32)p->nowPos64; nowPos32 = (UInt32)p->nowPos64;
startPos32 = nowPos32; startPos32 = nowPos32;
@ -2473,7 +2460,7 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
const Byte *data; const Byte *data;
unsigned state; unsigned state;
RC_BIT_0(&p->rc, probs); RC_BIT_0(&p->rc, probs)
p->rc.range = range; p->rc.range = range;
data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset;
probs = LIT_PROBS(nowPos32, *(data - 1)); probs = LIT_PROBS(nowPos32, *(data - 1));
@ -2487,53 +2474,53 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
} }
else else
{ {
RC_BIT_1(&p->rc, probs); RC_BIT_1(&p->rc, probs)
probs = &p->isRep[p->state]; probs = &p->isRep[p->state];
RC_BIT_PRE(&p->rc, probs) RC_BIT_PRE(&p->rc, probs)
if (dist < LZMA_NUM_REPS) if (dist < LZMA_NUM_REPS)
{ {
RC_BIT_1(&p->rc, probs); RC_BIT_1(&p->rc, probs)
probs = &p->isRepG0[p->state]; probs = &p->isRepG0[p->state];
RC_BIT_PRE(&p->rc, probs) RC_BIT_PRE(&p->rc, probs)
if (dist == 0) if (dist == 0)
{ {
RC_BIT_0(&p->rc, probs); RC_BIT_0(&p->rc, probs)
probs = &p->isRep0Long[p->state][posState]; probs = &p->isRep0Long[p->state][posState];
RC_BIT_PRE(&p->rc, probs) RC_BIT_PRE(&p->rc, probs)
if (len != 1) if (len != 1)
{ {
RC_BIT_1_BASE(&p->rc, probs); RC_BIT_1_BASE(&p->rc, probs)
} }
else else
{ {
RC_BIT_0_BASE(&p->rc, probs); RC_BIT_0_BASE(&p->rc, probs)
p->state = kShortRepNextStates[p->state]; p->state = kShortRepNextStates[p->state];
} }
} }
else else
{ {
RC_BIT_1(&p->rc, probs); RC_BIT_1(&p->rc, probs)
probs = &p->isRepG1[p->state]; probs = &p->isRepG1[p->state];
RC_BIT_PRE(&p->rc, probs) RC_BIT_PRE(&p->rc, probs)
if (dist == 1) if (dist == 1)
{ {
RC_BIT_0_BASE(&p->rc, probs); RC_BIT_0_BASE(&p->rc, probs)
dist = p->reps[1]; dist = p->reps[1];
} }
else else
{ {
RC_BIT_1(&p->rc, probs); RC_BIT_1(&p->rc, probs)
probs = &p->isRepG2[p->state]; probs = &p->isRepG2[p->state];
RC_BIT_PRE(&p->rc, probs) RC_BIT_PRE(&p->rc, probs)
if (dist == 2) if (dist == 2)
{ {
RC_BIT_0_BASE(&p->rc, probs); RC_BIT_0_BASE(&p->rc, probs)
dist = p->reps[2]; dist = p->reps[2];
} }
else else
{ {
RC_BIT_1_BASE(&p->rc, probs); RC_BIT_1_BASE(&p->rc, probs)
dist = p->reps[3]; dist = p->reps[3];
p->reps[3] = p->reps[2]; p->reps[3] = p->reps[2];
} }
@ -2557,7 +2544,7 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
else else
{ {
unsigned posSlot; unsigned posSlot;
RC_BIT_0(&p->rc, probs); RC_BIT_0(&p->rc, probs)
p->rc.range = range; p->rc.range = range;
p->state = kMatchNextStates[p->state]; p->state = kMatchNextStates[p->state];
@ -2571,7 +2558,7 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
p->reps[0] = dist + 1; p->reps[0] = dist + 1;
p->matchPriceCount++; p->matchPriceCount++;
GetPosSlot(dist, posSlot); GetPosSlot(dist, posSlot)
// RcTree_Encode_PosSlot(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], posSlot); // RcTree_Encode_PosSlot(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], posSlot);
{ {
UInt32 sym = (UInt32)posSlot + (1 << kNumPosSlotBits); UInt32 sym = (UInt32)posSlot + (1 << kNumPosSlotBits);
@ -2582,7 +2569,7 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
CLzmaProb *prob = probs + (sym >> kNumPosSlotBits); CLzmaProb *prob = probs + (sym >> kNumPosSlotBits);
UInt32 bit = (sym >> (kNumPosSlotBits - 1)) & 1; UInt32 bit = (sym >> (kNumPosSlotBits - 1)) & 1;
sym <<= 1; sym <<= 1;
RC_BIT(&p->rc, prob, bit); RC_BIT(&p->rc, prob, bit)
} }
while (sym < (1 << kNumPosSlotBits * 2)); while (sym < (1 << kNumPosSlotBits * 2));
p->rc.range = range; p->rc.range = range;
@ -2626,10 +2613,10 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa
{ {
unsigned m = 1; unsigned m = 1;
unsigned bit; unsigned bit;
bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit; bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit) m = (m << 1) + bit;
bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit; bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit) m = (m << 1) + bit;
bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); m = (m << 1) + bit; bit = dist & 1; dist >>= 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit) m = (m << 1) + bit;
bit = dist & 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit); bit = dist & 1; RC_BIT(&p->rc, p->posAlignEncoder + m, bit)
p->rc.range = range; p->rc.range = range;
// p->alignPriceCount++; // p->alignPriceCount++;
} }
@ -2704,7 +2691,7 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc,
if (!RangeEnc_Alloc(&p->rc, alloc)) if (!RangeEnc_Alloc(&p->rc, alloc))
return SZ_ERROR_MEM; return SZ_ERROR_MEM;
#ifndef _7ZIP_ST #ifndef Z7_ST
p->mtMode = (p->multiThread && !p->fastMode && (MFB.btMode != 0)); p->mtMode = (p->multiThread && !p->fastMode && (MFB.btMode != 0));
#endif #endif
@ -2748,15 +2735,14 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc,
(numFastBytes + LZMA_MATCH_LEN_MAX + 1) (numFastBytes + LZMA_MATCH_LEN_MAX + 1)
*/ */
#ifndef _7ZIP_ST #ifndef Z7_ST
if (p->mtMode) if (p->mtMode)
{ {
RINOK(MatchFinderMt_Create(&p->matchFinderMt, dictSize, beforeSize, RINOK(MatchFinderMt_Create(&p->matchFinderMt, dictSize, beforeSize,
p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 18.04 */ p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 18.04 */
, allocBig)); , allocBig))
p->matchFinderObj = &p->matchFinderMt; p->matchFinderObj = &p->matchFinderMt;
MFB.bigHash = (Byte)( MFB.bigHash = (Byte)(MFB.hashMask >= 0xFFFFFF ? 1 : 0);
(p->dictSize > kBigHashDicLimit && MFB.hashMask >= 0xFFFFFF) ? 1 : 0);
MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder); MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder);
} }
else else
@ -2872,59 +2858,53 @@ static SRes LzmaEnc_AllocAndInit(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr
p->finished = False; p->finished = False;
p->result = SZ_OK; p->result = SZ_OK;
RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig)); p->nowPos64 = 0;
p->needInit = 1;
RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig))
LzmaEnc_Init(p); LzmaEnc_Init(p);
LzmaEnc_InitPrices(p); LzmaEnc_InitPrices(p);
p->nowPos64 = 0;
return SZ_OK; return SZ_OK;
} }
static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, static SRes LzmaEnc_Prepare(CLzmaEncHandle p,
ISeqOutStreamPtr outStream,
ISeqInStreamPtr inStream,
ISzAllocPtr alloc, ISzAllocPtr allocBig) ISzAllocPtr alloc, ISzAllocPtr allocBig)
{ {
CLzmaEnc *p = (CLzmaEnc *)pp; // GET_CLzmaEnc_p
MFB.stream = inStream; MatchFinder_SET_STREAM(&MFB, inStream)
p->needInit = 1;
p->rc.outStream = outStream; p->rc.outStream = outStream;
return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig); return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig);
} }
SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle p,
ISeqInStream *inStream, UInt32 keepWindowSize, ISeqInStreamPtr inStream, UInt32 keepWindowSize,
ISzAllocPtr alloc, ISzAllocPtr allocBig) ISzAllocPtr alloc, ISzAllocPtr allocBig)
{ {
CLzmaEnc *p = (CLzmaEnc *)pp; // GET_CLzmaEnc_p
MFB.stream = inStream; MatchFinder_SET_STREAM(&MFB, inStream)
p->needInit = 1;
return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig);
} }
static void LzmaEnc_SetInputBuf(CLzmaEnc *p, const Byte *src, SizeT srcLen) SRes LzmaEnc_MemPrepare(CLzmaEncHandle p,
const Byte *src, SizeT srcLen,
UInt32 keepWindowSize,
ISzAllocPtr alloc, ISzAllocPtr allocBig)
{ {
MFB.directInput = 1; // GET_CLzmaEnc_p
MFB.bufferBase = (Byte *)src; MatchFinder_SET_DIRECT_INPUT_BUF(&MFB, src, srcLen)
MFB.directInputRem = srcLen; LzmaEnc_SetDataSize(p, srcLen);
}
SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen,
UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig)
{
CLzmaEnc *p = (CLzmaEnc *)pp;
LzmaEnc_SetInputBuf(p, src, srcLen);
p->needInit = 1;
LzmaEnc_SetDataSize(pp, srcLen);
return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig);
} }
void LzmaEnc_Finish(CLzmaEncHandle pp) void LzmaEnc_Finish(CLzmaEncHandle p)
{ {
#ifndef _7ZIP_ST #ifndef Z7_ST
CLzmaEnc *p = (CLzmaEnc *)pp; // GET_CLzmaEnc_p
if (p->mtMode) if (p->mtMode)
MatchFinderMt_ReleaseStream(&p->matchFinderMt); MatchFinderMt_ReleaseStream(&p->matchFinderMt);
#else #else
UNUSED_VAR(pp); UNUSED_VAR(p)
#endif #endif
} }
@ -2933,13 +2913,13 @@ typedef struct
{ {
ISeqOutStream vt; ISeqOutStream vt;
Byte *data; Byte *data;
SizeT rem; size_t rem;
BoolInt overflow; BoolInt overflow;
} CLzmaEnc_SeqOutStreamBuf; } CLzmaEnc_SeqOutStreamBuf;
static size_t SeqOutStreamBuf_Write(const ISeqOutStream *pp, const void *data, size_t size) static size_t SeqOutStreamBuf_Write(ISeqOutStreamPtr pp, const void *data, size_t size)
{ {
CLzmaEnc_SeqOutStreamBuf *p = CONTAINER_FROM_VTBL(pp, CLzmaEnc_SeqOutStreamBuf, vt); Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CLzmaEnc_SeqOutStreamBuf)
if (p->rem < size) if (p->rem < size)
{ {
size = p->rem; size = p->rem;
@ -2956,25 +2936,25 @@ static size_t SeqOutStreamBuf_Write(const ISeqOutStream *pp, const void *data, s
/* /*
UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp) UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle p)
{ {
const CLzmaEnc *p = (CLzmaEnc *)pp; GET_const_CLzmaEnc_p
return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
} }
*/ */
const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp) const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle p)
{ {
const CLzmaEnc *p = (CLzmaEnc *)pp; // GET_const_CLzmaEnc_p
return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset;
} }
// (desiredPackSize == 0) is not allowed // (desiredPackSize == 0) is not allowed
SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit, SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle p, BoolInt reInit,
Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize) Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize)
{ {
CLzmaEnc *p = (CLzmaEnc *)pp; // GET_CLzmaEnc_p
UInt64 nowPos64; UInt64 nowPos64;
SRes res; SRes res;
CLzmaEnc_SeqOutStreamBuf outStream; CLzmaEnc_SeqOutStreamBuf outStream;
@ -3006,12 +2986,12 @@ SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit,
} }
MY_NO_INLINE Z7_NO_INLINE
static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress) static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgressPtr progress)
{ {
SRes res = SZ_OK; SRes res = SZ_OK;
#ifndef _7ZIP_ST #ifndef Z7_ST
Byte allocaDummy[0x300]; Byte allocaDummy[0x300];
allocaDummy[0] = 0; allocaDummy[0] = 0;
allocaDummy[1] = allocaDummy[0]; allocaDummy[1] = allocaDummy[0];
@ -3033,7 +3013,7 @@ static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress)
} }
} }
LzmaEnc_Finish(p); LzmaEnc_Finish((CLzmaEncHandle)(void *)p);
/* /*
if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&MFB)) if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&MFB))
@ -3045,21 +3025,22 @@ static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress)
} }
SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, ICompressProgress *progress, SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream, ICompressProgressPtr progress,
ISzAllocPtr alloc, ISzAllocPtr allocBig) ISzAllocPtr alloc, ISzAllocPtr allocBig)
{ {
RINOK(LzmaEnc_Prepare(pp, outStream, inStream, alloc, allocBig)); // GET_CLzmaEnc_p
return LzmaEnc_Encode2((CLzmaEnc *)pp, progress); RINOK(LzmaEnc_Prepare(p, outStream, inStream, alloc, allocBig))
return LzmaEnc_Encode2(p, progress);
} }
SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size) SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *props, SizeT *size)
{ {
if (*size < LZMA_PROPS_SIZE) if (*size < LZMA_PROPS_SIZE)
return SZ_ERROR_PARAM; return SZ_ERROR_PARAM;
*size = LZMA_PROPS_SIZE; *size = LZMA_PROPS_SIZE;
{ {
const CLzmaEnc *p = (const CLzmaEnc *)pp; // GET_CLzmaEnc_p
const UInt32 dictSize = p->dictSize; const UInt32 dictSize = p->dictSize;
UInt32 v; UInt32 v;
props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc); props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc);
@ -3083,23 +3064,24 @@ SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size)
while (v < dictSize); while (v < dictSize);
} }
SetUi32(props + 1, v); SetUi32(props + 1, v)
return SZ_OK; return SZ_OK;
} }
} }
unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle pp) unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p)
{ {
return (unsigned)((CLzmaEnc *)pp)->writeEndMark; // GET_CLzmaEnc_p
return (unsigned)p->writeEndMark;
} }
SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig) int writeEndMark, ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig)
{ {
SRes res; SRes res;
CLzmaEnc *p = (CLzmaEnc *)pp; // GET_CLzmaEnc_p
CLzmaEnc_SeqOutStreamBuf outStream; CLzmaEnc_SeqOutStreamBuf outStream;
@ -3111,7 +3093,7 @@ SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte
p->writeEndMark = writeEndMark; p->writeEndMark = writeEndMark;
p->rc.outStream = &outStream.vt; p->rc.outStream = &outStream.vt;
res = LzmaEnc_MemPrepare(pp, src, srcLen, 0, alloc, allocBig); res = LzmaEnc_MemPrepare(p, src, srcLen, 0, alloc, allocBig);
if (res == SZ_OK) if (res == SZ_OK)
{ {
@ -3120,7 +3102,7 @@ SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte
res = SZ_ERROR_FAIL; res = SZ_ERROR_FAIL;
} }
*destLen -= outStream.rem; *destLen -= (SizeT)outStream.rem;
if (outStream.overflow) if (outStream.overflow)
return SZ_ERROR_OUTPUT_EOF; return SZ_ERROR_OUTPUT_EOF;
return res; return res;
@ -3129,9 +3111,9 @@ SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte *dest, SizeT *destLen, const Byte
SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig) ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig)
{ {
CLzmaEnc *p = (CLzmaEnc *)LzmaEnc_Create(alloc); CLzmaEncHandle p = LzmaEnc_Create(alloc);
SRes res; SRes res;
if (!p) if (!p)
return SZ_ERROR_MEM; return SZ_ERROR_MEM;
@ -3151,10 +3133,10 @@ SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
/* /*
#ifndef _7ZIP_ST #ifndef Z7_ST
void LzmaEnc_GetLzThreads(CLzmaEncHandle pp, HANDLE lz_threads[2]) void LzmaEnc_GetLzThreads(CLzmaEncHandle p, HANDLE lz_threads[2])
{ {
const CLzmaEnc *p = (CLzmaEnc *)pp; GET_const_CLzmaEnc_p
lz_threads[0] = p->matchFinderMt.hashSync.thread; lz_threads[0] = p->matchFinderMt.hashSync.thread;
lz_threads[1] = p->matchFinderMt.btSync.thread; lz_threads[1] = p->matchFinderMt.btSync.thread;
} }

View file

@ -1,8 +1,8 @@
/* LzmaEnc.h -- LZMA Encoder /* LzmaEnc.h -- LZMA Encoder
2019-10-30 : Igor Pavlov : Public domain */ 2023-04-13 : Igor Pavlov : Public domain */
#ifndef __LZMA_ENC_H #ifndef ZIP7_INC_LZMA_ENC_H
#define __LZMA_ENC_H #define ZIP7_INC_LZMA_ENC_H
#include "7zTypes.h" #include "7zTypes.h"
@ -10,7 +10,7 @@ EXTERN_C_BEGIN
#define LZMA_PROPS_SIZE 5 #define LZMA_PROPS_SIZE 5
typedef struct _CLzmaEncProps typedef struct
{ {
int level; /* 0 <= level <= 9 */ int level; /* 0 <= level <= 9 */
UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version
@ -23,10 +23,13 @@ typedef struct _CLzmaEncProps
int fb; /* 5 <= fb <= 273, default = 32 */ int fb; /* 5 <= fb <= 273, default = 32 */
int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */ int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */
int numHashBytes; /* 2, 3 or 4, default = 4 */ int numHashBytes; /* 2, 3 or 4, default = 4 */
unsigned numHashOutBits; /* default = ? */
UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */ UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */
unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */ unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */
int numThreads; /* 1 or 2, default = 2 */ int numThreads; /* 1 or 2, default = 2 */
// int _pad;
UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1. UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1.
Encoder uses this value to reduce dictionary size */ Encoder uses this value to reduce dictionary size */
@ -51,7 +54,9 @@ SRes:
SZ_ERROR_THREAD - error in multithreading functions (only for Mt version) SZ_ERROR_THREAD - error in multithreading functions (only for Mt version)
*/ */
typedef void * CLzmaEncHandle; typedef struct CLzmaEnc CLzmaEnc;
typedef CLzmaEnc * CLzmaEncHandle;
// Z7_DECLARE_HANDLE(CLzmaEncHandle)
CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc); CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc);
void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig); void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig);
@ -61,17 +66,17 @@ void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize);
SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size); SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size);
unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p); unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p);
SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream, SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream,
ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); int writeEndMark, ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
/* ---------- One Call Interface ---------- */ /* ---------- One Call Interface ---------- */
SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen, SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig); ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
EXTERN_C_END EXTERN_C_END

View file

@ -1,12 +1,14 @@
/* LzmaLib.c -- LZMA library wrapper /* LzmaLib.c -- LZMA library wrapper
2015-06-13 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "Alloc.h" #include "Alloc.h"
#include "LzmaDec.h" #include "LzmaDec.h"
#include "LzmaEnc.h" #include "LzmaEnc.h"
#include "LzmaLib.h" #include "LzmaLib.h"
MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen, Z7_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,
unsigned char *outProps, size_t *outPropsSize, unsigned char *outProps, size_t *outPropsSize,
int level, /* 0 <= level <= 9, default = 5 */ int level, /* 0 <= level <= 9, default = 5 */
unsigned dictSize, /* use (1 << N) or (3 << N). 4 KB < dictSize <= 128 MB */ unsigned dictSize, /* use (1 << N) or (3 << N). 4 KB < dictSize <= 128 MB */
@ -32,7 +34,7 @@ MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char
} }
MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t *srcLen, Z7_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t *srcLen,
const unsigned char *props, size_t propsSize) const unsigned char *props, size_t propsSize)
{ {
ELzmaStatus status; ELzmaStatus status;

View file

@ -1,14 +1,14 @@
/* LzmaLib.h -- LZMA library interface /* LzmaLib.h -- LZMA library interface
2021-04-03 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#ifndef __LZMA_LIB_H #ifndef ZIP7_INC_LZMA_LIB_H
#define __LZMA_LIB_H #define ZIP7_INC_LZMA_LIB_H
#include "7zTypes.h" #include "7zTypes.h"
EXTERN_C_BEGIN EXTERN_C_BEGIN
#define MY_STDAPI int MY_STD_CALL #define Z7_STDAPI int Z7_STDCALL
#define LZMA_PROPS_SIZE 5 #define LZMA_PROPS_SIZE 5
@ -100,7 +100,7 @@ Returns:
SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version)
*/ */
MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen, Z7_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,
unsigned char *outProps, size_t *outPropsSize, /* *outPropsSize must be = 5 */ unsigned char *outProps, size_t *outPropsSize, /* *outPropsSize must be = 5 */
int level, /* 0 <= level <= 9, default = 5 */ int level, /* 0 <= level <= 9, default = 5 */
unsigned dictSize, /* default = (1 << 24) */ unsigned dictSize, /* default = (1 << 24) */
@ -130,7 +130,7 @@ Returns:
SZ_ERROR_INPUT_EOF - it needs more bytes in input buffer (src) SZ_ERROR_INPUT_EOF - it needs more bytes in input buffer (src)
*/ */
MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, SizeT *srcLen, Z7_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, SizeT *srcLen,
const unsigned char *props, size_t propsSize); const unsigned char *props, size_t propsSize);
EXTERN_C_END EXTERN_C_END

View file

@ -1,28 +1,28 @@
/* MtCoder.c -- Multi-thread Coder /* MtCoder.c -- Multi-thread Coder
2021-12-21 : Igor Pavlov : Public domain */ 2023-04-13 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
#include "MtCoder.h" #include "MtCoder.h"
#ifndef _7ZIP_ST #ifndef Z7_ST
static SRes MtProgressThunk_Progress(const ICompressProgress *pp, UInt64 inSize, UInt64 outSize) static SRes MtProgressThunk_Progress(ICompressProgressPtr pp, UInt64 inSize, UInt64 outSize)
{ {
CMtProgressThunk *thunk = CONTAINER_FROM_VTBL(pp, CMtProgressThunk, vt); Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CMtProgressThunk)
UInt64 inSize2 = 0; UInt64 inSize2 = 0;
UInt64 outSize2 = 0; UInt64 outSize2 = 0;
if (inSize != (UInt64)(Int64)-1) if (inSize != (UInt64)(Int64)-1)
{ {
inSize2 = inSize - thunk->inSize; inSize2 = inSize - p->inSize;
thunk->inSize = inSize; p->inSize = inSize;
} }
if (outSize != (UInt64)(Int64)-1) if (outSize != (UInt64)(Int64)-1)
{ {
outSize2 = outSize - thunk->outSize; outSize2 = outSize - p->outSize;
thunk->outSize = outSize; p->outSize = outSize;
} }
return MtProgress_ProgressAdd(thunk->mtProgress, inSize2, outSize2); return MtProgress_ProgressAdd(p->mtProgress, inSize2, outSize2);
} }
@ -36,20 +36,12 @@ void MtProgressThunk_CreateVTable(CMtProgressThunk *p)
#define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; } #define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; }
static WRes ArEvent_OptCreate_And_Reset(CEvent *p)
{
if (Event_IsCreated(p))
return Event_Reset(p);
return AutoResetEvent_CreateNotSignaled(p);
}
static THREAD_FUNC_DECL ThreadFunc(void *pp); static THREAD_FUNC_DECL ThreadFunc(void *pp);
static SRes MtCoderThread_CreateAndStart(CMtCoderThread *t) static SRes MtCoderThread_CreateAndStart(CMtCoderThread *t)
{ {
WRes wres = ArEvent_OptCreate_And_Reset(&t->startEvent); WRes wres = AutoResetEvent_OptCreate_And_Reset(&t->startEvent);
if (wres == 0) if (wres == 0)
{ {
t->stop = False; t->stop = False;
@ -84,24 +76,6 @@ static void MtCoderThread_Destruct(CMtCoderThread *t)
static SRes FullRead(ISeqInStream *stream, Byte *data, size_t *processedSize)
{
size_t size = *processedSize;
*processedSize = 0;
while (size != 0)
{
size_t cur = size;
SRes res = ISeqInStream_Read(stream, data, &cur);
*processedSize += cur;
data += cur;
size -= cur;
RINOK(res);
if (cur == 0)
return SZ_OK;
}
return SZ_OK;
}
/* /*
ThreadFunc2() returns: ThreadFunc2() returns:
@ -152,7 +126,7 @@ static SRes ThreadFunc2(CMtCoderThread *t)
} }
if (res == SZ_OK) if (res == SZ_OK)
{ {
res = FullRead(mtc->inStream, t->inBuf, &size); res = SeqInStream_ReadMax(mtc->inStream, t->inBuf, &size);
readProcessed = mtc->readProcessed + size; readProcessed = mtc->readProcessed + size;
mtc->readProcessed = readProcessed; mtc->readProcessed = readProcessed;
} }
@ -253,7 +227,7 @@ static SRes ThreadFunc2(CMtCoderThread *t)
block->finished = finished; block->finished = finished;
} }
#ifdef MTCODER__USE_WRITE_THREAD #ifdef MTCODER_USE_WRITE_THREAD
RINOK_THREAD(Event_Set(&mtc->writeEvents[bi])) RINOK_THREAD(Event_Set(&mtc->writeEvents[bi]))
#else #else
{ {
@ -352,7 +326,7 @@ static THREAD_FUNC_DECL ThreadFunc(void *pp)
MtProgress_SetError(&mtc->mtProgress, res); MtProgress_SetError(&mtc->mtProgress, res);
} }
#ifndef MTCODER__USE_WRITE_THREAD #ifndef MTCODER_USE_WRITE_THREAD
{ {
unsigned numFinished = (unsigned)InterlockedIncrement(&mtc->numFinishedThreads); unsigned numFinished = (unsigned)InterlockedIncrement(&mtc->numFinishedThreads);
if (numFinished == mtc->numStartedThreads) if (numFinished == mtc->numStartedThreads)
@ -389,7 +363,7 @@ void MtCoder_Construct(CMtCoder *p)
Event_Construct(&p->readEvent); Event_Construct(&p->readEvent);
Semaphore_Construct(&p->blocksSemaphore); Semaphore_Construct(&p->blocksSemaphore);
for (i = 0; i < MTCODER__THREADS_MAX; i++) for (i = 0; i < MTCODER_THREADS_MAX; i++)
{ {
CMtCoderThread *t = &p->threads[i]; CMtCoderThread *t = &p->threads[i];
t->mtCoder = p; t->mtCoder = p;
@ -397,11 +371,11 @@ void MtCoder_Construct(CMtCoder *p)
t->inBuf = NULL; t->inBuf = NULL;
t->stop = False; t->stop = False;
Event_Construct(&t->startEvent); Event_Construct(&t->startEvent);
Thread_Construct(&t->thread); Thread_CONSTRUCT(&t->thread)
} }
#ifdef MTCODER__USE_WRITE_THREAD #ifdef MTCODER_USE_WRITE_THREAD
for (i = 0; i < MTCODER__BLOCKS_MAX; i++) for (i = 0; i < MTCODER_BLOCKS_MAX; i++)
Event_Construct(&p->writeEvents[i]); Event_Construct(&p->writeEvents[i]);
#else #else
Event_Construct(&p->finishedEvent); Event_Construct(&p->finishedEvent);
@ -424,14 +398,14 @@ static void MtCoder_Free(CMtCoder *p)
Event_Set(&p->readEvent); Event_Set(&p->readEvent);
*/ */
for (i = 0; i < MTCODER__THREADS_MAX; i++) for (i = 0; i < MTCODER_THREADS_MAX; i++)
MtCoderThread_Destruct(&p->threads[i]); MtCoderThread_Destruct(&p->threads[i]);
Event_Close(&p->readEvent); Event_Close(&p->readEvent);
Semaphore_Close(&p->blocksSemaphore); Semaphore_Close(&p->blocksSemaphore);
#ifdef MTCODER__USE_WRITE_THREAD #ifdef MTCODER_USE_WRITE_THREAD
for (i = 0; i < MTCODER__BLOCKS_MAX; i++) for (i = 0; i < MTCODER_BLOCKS_MAX; i++)
Event_Close(&p->writeEvents[i]); Event_Close(&p->writeEvents[i]);
#else #else
Event_Close(&p->finishedEvent); Event_Close(&p->finishedEvent);
@ -455,20 +429,20 @@ SRes MtCoder_Code(CMtCoder *p)
unsigned i; unsigned i;
SRes res = SZ_OK; SRes res = SZ_OK;
if (numThreads > MTCODER__THREADS_MAX) if (numThreads > MTCODER_THREADS_MAX)
numThreads = MTCODER__THREADS_MAX; numThreads = MTCODER_THREADS_MAX;
numBlocksMax = MTCODER__GET_NUM_BLOCKS_FROM_THREADS(numThreads); numBlocksMax = MTCODER_GET_NUM_BLOCKS_FROM_THREADS(numThreads);
if (p->blockSize < ((UInt32)1 << 26)) numBlocksMax++; if (p->blockSize < ((UInt32)1 << 26)) numBlocksMax++;
if (p->blockSize < ((UInt32)1 << 24)) numBlocksMax++; if (p->blockSize < ((UInt32)1 << 24)) numBlocksMax++;
if (p->blockSize < ((UInt32)1 << 22)) numBlocksMax++; if (p->blockSize < ((UInt32)1 << 22)) numBlocksMax++;
if (numBlocksMax > MTCODER__BLOCKS_MAX) if (numBlocksMax > MTCODER_BLOCKS_MAX)
numBlocksMax = MTCODER__BLOCKS_MAX; numBlocksMax = MTCODER_BLOCKS_MAX;
if (p->blockSize != p->allocatedBufsSize) if (p->blockSize != p->allocatedBufsSize)
{ {
for (i = 0; i < MTCODER__THREADS_MAX; i++) for (i = 0; i < MTCODER_THREADS_MAX; i++)
{ {
CMtCoderThread *t = &p->threads[i]; CMtCoderThread *t = &p->threads[i];
if (t->inBuf) if (t->inBuf)
@ -484,23 +458,23 @@ SRes MtCoder_Code(CMtCoder *p)
MtProgress_Init(&p->mtProgress, p->progress); MtProgress_Init(&p->mtProgress, p->progress);
#ifdef MTCODER__USE_WRITE_THREAD #ifdef MTCODER_USE_WRITE_THREAD
for (i = 0; i < numBlocksMax; i++) for (i = 0; i < numBlocksMax; i++)
{ {
RINOK_THREAD(ArEvent_OptCreate_And_Reset(&p->writeEvents[i])); RINOK_THREAD(AutoResetEvent_OptCreate_And_Reset(&p->writeEvents[i]))
} }
#else #else
RINOK_THREAD(ArEvent_OptCreate_And_Reset(&p->finishedEvent)); RINOK_THREAD(AutoResetEvent_OptCreate_And_Reset(&p->finishedEvent))
#endif #endif
{ {
RINOK_THREAD(ArEvent_OptCreate_And_Reset(&p->readEvent)); RINOK_THREAD(AutoResetEvent_OptCreate_And_Reset(&p->readEvent))
RINOK_THREAD(Semaphore_OptCreateInit(&p->blocksSemaphore, numBlocksMax, numBlocksMax)); RINOK_THREAD(Semaphore_OptCreateInit(&p->blocksSemaphore, numBlocksMax, numBlocksMax))
} }
for (i = 0; i < MTCODER__BLOCKS_MAX - 1; i++) for (i = 0; i < MTCODER_BLOCKS_MAX - 1; i++)
p->freeBlockList[i] = i + 1; p->freeBlockList[i] = i + 1;
p->freeBlockList[MTCODER__BLOCKS_MAX - 1] = (unsigned)(int)-1; p->freeBlockList[MTCODER_BLOCKS_MAX - 1] = (unsigned)(int)-1;
p->freeBlockHead = 0; p->freeBlockHead = 0;
p->readProcessed = 0; p->readProcessed = 0;
@ -508,10 +482,10 @@ SRes MtCoder_Code(CMtCoder *p)
p->numBlocksMax = numBlocksMax; p->numBlocksMax = numBlocksMax;
p->stopReading = False; p->stopReading = False;
#ifndef MTCODER__USE_WRITE_THREAD #ifndef MTCODER_USE_WRITE_THREAD
p->writeIndex = 0; p->writeIndex = 0;
p->writeRes = SZ_OK; p->writeRes = SZ_OK;
for (i = 0; i < MTCODER__BLOCKS_MAX; i++) for (i = 0; i < MTCODER_BLOCKS_MAX; i++)
p->ReadyBlocks[i] = False; p->ReadyBlocks[i] = False;
p->numFinishedThreads = 0; p->numFinishedThreads = 0;
#endif #endif
@ -522,12 +496,12 @@ SRes MtCoder_Code(CMtCoder *p)
// for (i = 0; i < numThreads; i++) // for (i = 0; i < numThreads; i++)
{ {
CMtCoderThread *nextThread = &p->threads[p->numStartedThreads++]; CMtCoderThread *nextThread = &p->threads[p->numStartedThreads++];
RINOK(MtCoderThread_CreateAndStart(nextThread)); RINOK(MtCoderThread_CreateAndStart(nextThread))
} }
RINOK_THREAD(Event_Set(&p->readEvent)) RINOK_THREAD(Event_Set(&p->readEvent))
#ifdef MTCODER__USE_WRITE_THREAD #ifdef MTCODER_USE_WRITE_THREAD
{ {
unsigned bi = 0; unsigned bi = 0;
@ -582,7 +556,7 @@ SRes MtCoder_Code(CMtCoder *p)
if (res == SZ_OK) if (res == SZ_OK)
res = p->mtProgress.res; res = p->mtProgress.res;
#ifndef MTCODER__USE_WRITE_THREAD #ifndef MTCODER_USE_WRITE_THREAD
if (res == SZ_OK) if (res == SZ_OK)
res = p->writeRes; res = p->writeRes;
#endif #endif
@ -593,3 +567,5 @@ SRes MtCoder_Code(CMtCoder *p)
} }
#endif #endif
#undef RINOK_THREAD

View file

@ -1,30 +1,30 @@
/* MtCoder.h -- Multi-thread Coder /* MtCoder.h -- Multi-thread Coder
2018-07-04 : Igor Pavlov : Public domain */ 2023-04-13 : Igor Pavlov : Public domain */
#ifndef __MT_CODER_H #ifndef ZIP7_INC_MT_CODER_H
#define __MT_CODER_H #define ZIP7_INC_MT_CODER_H
#include "MtDec.h" #include "MtDec.h"
EXTERN_C_BEGIN EXTERN_C_BEGIN
/* /*
if ( defined MTCODER__USE_WRITE_THREAD) : main thread writes all data blocks to output stream if ( defined MTCODER_USE_WRITE_THREAD) : main thread writes all data blocks to output stream
if (not defined MTCODER__USE_WRITE_THREAD) : any coder thread can write data blocks to output stream if (not defined MTCODER_USE_WRITE_THREAD) : any coder thread can write data blocks to output stream
*/ */
/* #define MTCODER__USE_WRITE_THREAD */ /* #define MTCODER_USE_WRITE_THREAD */
#ifndef _7ZIP_ST #ifndef Z7_ST
#define MTCODER__GET_NUM_BLOCKS_FROM_THREADS(numThreads) ((numThreads) + (numThreads) / 8 + 1) #define MTCODER_GET_NUM_BLOCKS_FROM_THREADS(numThreads) ((numThreads) + (numThreads) / 8 + 1)
#define MTCODER__THREADS_MAX 64 #define MTCODER_THREADS_MAX 64
#define MTCODER__BLOCKS_MAX (MTCODER__GET_NUM_BLOCKS_FROM_THREADS(MTCODER__THREADS_MAX) + 3) #define MTCODER_BLOCKS_MAX (MTCODER_GET_NUM_BLOCKS_FROM_THREADS(MTCODER_THREADS_MAX) + 3)
#else #else
#define MTCODER__THREADS_MAX 1 #define MTCODER_THREADS_MAX 1
#define MTCODER__BLOCKS_MAX 1 #define MTCODER_BLOCKS_MAX 1
#endif #endif
#ifndef _7ZIP_ST #ifndef Z7_ST
typedef struct typedef struct
@ -37,15 +37,15 @@ typedef struct
void MtProgressThunk_CreateVTable(CMtProgressThunk *p); void MtProgressThunk_CreateVTable(CMtProgressThunk *p);
#define MtProgressThunk_Init(p) { (p)->inSize = 0; (p)->outSize = 0; } #define MtProgressThunk_INIT(p) { (p)->inSize = 0; (p)->outSize = 0; }
struct _CMtCoder; struct CMtCoder_;
typedef struct typedef struct
{ {
struct _CMtCoder *mtCoder; struct CMtCoder_ *mtCoder;
unsigned index; unsigned index;
int stop; int stop;
Byte *inBuf; Byte *inBuf;
@ -71,7 +71,7 @@ typedef struct
} CMtCoderBlock; } CMtCoderBlock;
typedef struct _CMtCoder typedef struct CMtCoder_
{ {
/* input variables */ /* input variables */
@ -79,11 +79,11 @@ typedef struct _CMtCoder
unsigned numThreadsMax; unsigned numThreadsMax;
UInt64 expectedDataSize; UInt64 expectedDataSize;
ISeqInStream *inStream; ISeqInStreamPtr inStream;
const Byte *inData; const Byte *inData;
size_t inDataSize; size_t inDataSize;
ICompressProgress *progress; ICompressProgressPtr progress;
ISzAllocPtr allocBig; ISzAllocPtr allocBig;
IMtCoderCallback2 *mtCallback; IMtCoderCallback2 *mtCallback;
@ -100,13 +100,13 @@ typedef struct _CMtCoder
BoolInt stopReading; BoolInt stopReading;
SRes readRes; SRes readRes;
#ifdef MTCODER__USE_WRITE_THREAD #ifdef MTCODER_USE_WRITE_THREAD
CAutoResetEvent writeEvents[MTCODER__BLOCKS_MAX]; CAutoResetEvent writeEvents[MTCODER_BLOCKS_MAX];
#else #else
CAutoResetEvent finishedEvent; CAutoResetEvent finishedEvent;
SRes writeRes; SRes writeRes;
unsigned writeIndex; unsigned writeIndex;
Byte ReadyBlocks[MTCODER__BLOCKS_MAX]; Byte ReadyBlocks[MTCODER_BLOCKS_MAX];
LONG numFinishedThreads; LONG numFinishedThreads;
#endif #endif
@ -120,11 +120,11 @@ typedef struct _CMtCoder
CCriticalSection cs; CCriticalSection cs;
unsigned freeBlockHead; unsigned freeBlockHead;
unsigned freeBlockList[MTCODER__BLOCKS_MAX]; unsigned freeBlockList[MTCODER_BLOCKS_MAX];
CMtProgress mtProgress; CMtProgress mtProgress;
CMtCoderBlock blocks[MTCODER__BLOCKS_MAX]; CMtCoderBlock blocks[MTCODER_BLOCKS_MAX];
CMtCoderThread threads[MTCODER__THREADS_MAX]; CMtCoderThread threads[MTCODER_THREADS_MAX];
} CMtCoder; } CMtCoder;

View file

@ -1,5 +1,5 @@
/* MtDec.c -- Multi-thread Decoder /* MtDec.c -- Multi-thread Decoder
2021-12-21 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -14,7 +14,7 @@
#include "MtDec.h" #include "MtDec.h"
#ifndef _7ZIP_ST #ifndef Z7_ST
#ifdef SHOW_DEBUG_INFO #ifdef SHOW_DEBUG_INFO
#define PRF(x) x #define PRF(x) x
@ -24,7 +24,7 @@
#define PRF_STR_INT(s, d) PRF(printf("\n" s " %d\n", (unsigned)d)) #define PRF_STR_INT(s, d) PRF(printf("\n" s " %d\n", (unsigned)d))
void MtProgress_Init(CMtProgress *p, ICompressProgress *progress) void MtProgress_Init(CMtProgress *p, ICompressProgressPtr progress)
{ {
p->progress = progress; p->progress = progress;
p->res = SZ_OK; p->res = SZ_OK;
@ -81,36 +81,28 @@ void MtProgress_SetError(CMtProgress *p, SRes res)
#define RINOK_THREAD(x) RINOK_WRes(x) #define RINOK_THREAD(x) RINOK_WRes(x)
static WRes ArEvent_OptCreate_And_Reset(CEvent *p) struct CMtDecBufLink_
{ {
if (Event_IsCreated(p)) struct CMtDecBufLink_ *next;
return Event_Reset(p);
return AutoResetEvent_CreateNotSignaled(p);
}
struct __CMtDecBufLink
{
struct __CMtDecBufLink *next;
void *pad[3]; void *pad[3];
}; };
typedef struct __CMtDecBufLink CMtDecBufLink; typedef struct CMtDecBufLink_ CMtDecBufLink;
#define MTDEC__LINK_DATA_OFFSET sizeof(CMtDecBufLink) #define MTDEC__LINK_DATA_OFFSET sizeof(CMtDecBufLink)
#define MTDEC__DATA_PTR_FROM_LINK(link) ((Byte *)(link) + MTDEC__LINK_DATA_OFFSET) #define MTDEC__DATA_PTR_FROM_LINK(link) ((Byte *)(link) + MTDEC__LINK_DATA_OFFSET)
static THREAD_FUNC_DECL ThreadFunc(void *pp); static THREAD_FUNC_DECL MtDec_ThreadFunc(void *pp);
static WRes MtDecThread_CreateEvents(CMtDecThread *t) static WRes MtDecThread_CreateEvents(CMtDecThread *t)
{ {
WRes wres = ArEvent_OptCreate_And_Reset(&t->canWrite); WRes wres = AutoResetEvent_OptCreate_And_Reset(&t->canWrite);
if (wres == 0) if (wres == 0)
{ {
wres = ArEvent_OptCreate_And_Reset(&t->canRead); wres = AutoResetEvent_OptCreate_And_Reset(&t->canRead);
if (wres == 0) if (wres == 0)
return SZ_OK; return SZ_OK;
} }
@ -126,7 +118,7 @@ static SRes MtDecThread_CreateAndStart(CMtDecThread *t)
{ {
if (Thread_WasCreated(&t->thread)) if (Thread_WasCreated(&t->thread))
return SZ_OK; return SZ_OK;
wres = Thread_Create(&t->thread, ThreadFunc, t); wres = Thread_Create(&t->thread, MtDec_ThreadFunc, t);
if (wres == 0) if (wres == 0)
return SZ_OK; return SZ_OK;
} }
@ -167,7 +159,7 @@ static void MtDecThread_CloseThread(CMtDecThread *t)
static void MtDec_CloseThreads(CMtDec *p) static void MtDec_CloseThreads(CMtDec *p)
{ {
unsigned i; unsigned i;
for (i = 0; i < MTDEC__THREADS_MAX; i++) for (i = 0; i < MTDEC_THREADS_MAX; i++)
MtDecThread_CloseThread(&p->threads[i]); MtDecThread_CloseThread(&p->threads[i]);
} }
@ -179,25 +171,6 @@ static void MtDecThread_Destruct(CMtDecThread *t)
static SRes FullRead(ISeqInStream *stream, Byte *data, size_t *processedSize)
{
size_t size = *processedSize;
*processedSize = 0;
while (size != 0)
{
size_t cur = size;
SRes res = ISeqInStream_Read(stream, data, &cur);
*processedSize += cur;
data += cur;
size -= cur;
RINOK(res);
if (cur == 0)
return SZ_OK;
}
return SZ_OK;
}
static SRes MtDec_GetError_Spec(CMtDec *p, UInt64 interruptIndex, BoolInt *wasInterrupted) static SRes MtDec_GetError_Spec(CMtDec *p, UInt64 interruptIndex, BoolInt *wasInterrupted)
{ {
SRes res; SRes res;
@ -253,7 +226,7 @@ Byte *MtDec_GetCrossBuff(CMtDec *p)
/* /*
ThreadFunc2() returns: MtDec_ThreadFunc2() returns:
0 - in all normal cases (even for stream error or memory allocation error) 0 - in all normal cases (even for stream error or memory allocation error)
(!= 0) - WRes error return by system threading function (!= 0) - WRes error return by system threading function
*/ */
@ -261,11 +234,11 @@ Byte *MtDec_GetCrossBuff(CMtDec *p)
// #define MTDEC_ProgessStep (1 << 22) // #define MTDEC_ProgessStep (1 << 22)
#define MTDEC_ProgessStep (1 << 0) #define MTDEC_ProgessStep (1 << 0)
static WRes ThreadFunc2(CMtDecThread *t) static WRes MtDec_ThreadFunc2(CMtDecThread *t)
{ {
CMtDec *p = t->mtDec; CMtDec *p = t->mtDec;
PRF_STR_INT("ThreadFunc2", t->index); PRF_STR_INT("MtDec_ThreadFunc2", t->index)
// SetThreadAffinityMask(GetCurrentThread(), 1 << t->index); // SetThreadAffinityMask(GetCurrentThread(), 1 << t->index);
@ -295,13 +268,13 @@ static WRes ThreadFunc2(CMtDecThread *t)
// CMtDecCallbackInfo parse; // CMtDecCallbackInfo parse;
CMtDecThread *nextThread; CMtDecThread *nextThread;
PRF_STR_INT("=============== Event_Wait(&t->canRead)", t->index); PRF_STR_INT("=============== Event_Wait(&t->canRead)", t->index)
RINOK_THREAD(Event_Wait(&t->canRead)); RINOK_THREAD(Event_Wait(&t->canRead))
if (p->exitThread) if (p->exitThread)
return 0; return 0;
PRF_STR_INT("after Event_Wait(&t->canRead)", t->index); PRF_STR_INT("after Event_Wait(&t->canRead)", t->index)
// if (t->index == 3) return 19; // for test // if (t->index == 3) return 19; // for test
@ -373,7 +346,7 @@ static WRes ThreadFunc2(CMtDecThread *t)
{ {
size = p->inBufSize; size = p->inBufSize;
res = FullRead(p->inStream, data, &size); res = SeqInStream_ReadMax(p->inStream, data, &size);
// size = 10; // test // size = 10; // test
@ -615,7 +588,7 @@ static WRes ThreadFunc2(CMtDecThread *t)
// if ( !finish ) we must call Event_Set(&nextThread->canWrite) in any case // if ( !finish ) we must call Event_Set(&nextThread->canWrite) in any case
// if ( finish ) we switch to single-thread mode and there are 2 ways at the end of current iteration (current block): // if ( finish ) we switch to single-thread mode and there are 2 ways at the end of current iteration (current block):
// - if (needContinue) after Write(&needContinue), we restore decoding with new iteration // - if (needContinue) after Write(&needContinue), we restore decoding with new iteration
// - otherwise we stop decoding and exit from ThreadFunc2() // - otherwise we stop decoding and exit from MtDec_ThreadFunc2()
// Don't change (finish) variable in the further code // Don't change (finish) variable in the further code
@ -688,7 +661,7 @@ static WRes ThreadFunc2(CMtDecThread *t)
// ---------- WRITE ---------- // ---------- WRITE ----------
RINOK_THREAD(Event_Wait(&t->canWrite)); RINOK_THREAD(Event_Wait(&t->canWrite))
{ {
BoolInt isErrorMode = False; BoolInt isErrorMode = False;
@ -801,14 +774,14 @@ static WRes ThreadFunc2(CMtDecThread *t)
if (!finish) if (!finish)
{ {
RINOK_THREAD(Event_Set(&nextThread->canWrite)); RINOK_THREAD(Event_Set(&nextThread->canWrite))
} }
else else
{ {
if (needContinue) if (needContinue)
{ {
// we restore decoding with new iteration // we restore decoding with new iteration
RINOK_THREAD(Event_Set(&p->threads[0].canWrite)); RINOK_THREAD(Event_Set(&p->threads[0].canWrite))
} }
else else
{ {
@ -817,7 +790,7 @@ static WRes ThreadFunc2(CMtDecThread *t)
return SZ_OK; return SZ_OK;
p->exitThread = True; p->exitThread = True;
} }
RINOK_THREAD(Event_Set(&p->threads[0].canRead)); RINOK_THREAD(Event_Set(&p->threads[0].canRead))
} }
} }
} }
@ -836,7 +809,7 @@ static WRes ThreadFunc2(CMtDecThread *t)
#endif #endif
static THREAD_FUNC_DECL ThreadFunc1(void *pp) static THREAD_FUNC_DECL MtDec_ThreadFunc1(void *pp)
{ {
WRes res; WRes res;
@ -845,7 +818,7 @@ static THREAD_FUNC_DECL ThreadFunc1(void *pp)
// fprintf(stdout, "\n%d = %p\n", t->index, &t); // fprintf(stdout, "\n%d = %p\n", t->index, &t);
res = ThreadFunc2(t); res = MtDec_ThreadFunc2(t);
p = t->mtDec; p = t->mtDec;
if (res == 0) if (res == 0)
return (THREAD_FUNC_RET_TYPE)(UINT_PTR)p->exitThreadWRes; return (THREAD_FUNC_RET_TYPE)(UINT_PTR)p->exitThreadWRes;
@ -862,14 +835,14 @@ static THREAD_FUNC_DECL ThreadFunc1(void *pp)
return (THREAD_FUNC_RET_TYPE)(UINT_PTR)res; return (THREAD_FUNC_RET_TYPE)(UINT_PTR)res;
} }
static MY_NO_INLINE THREAD_FUNC_DECL ThreadFunc(void *pp) static Z7_NO_INLINE THREAD_FUNC_DECL MtDec_ThreadFunc(void *pp)
{ {
#ifdef USE_ALLOCA #ifdef USE_ALLOCA
CMtDecThread *t = (CMtDecThread *)pp; CMtDecThread *t = (CMtDecThread *)pp;
// fprintf(stderr, "\n%d = %p - before", t->index, &t); // fprintf(stderr, "\n%d = %p - before", t->index, &t);
t->allocaPtr = alloca(t->index * 128); t->allocaPtr = alloca(t->index * 128);
#endif #endif
return ThreadFunc1(pp); return MtDec_ThreadFunc1(pp);
} }
@ -883,7 +856,7 @@ int MtDec_PrepareRead(CMtDec *p)
{ {
unsigned i; unsigned i;
for (i = 0; i < MTDEC__THREADS_MAX; i++) for (i = 0; i < MTDEC_THREADS_MAX; i++)
if (i > p->numStartedThreads if (i > p->numStartedThreads
|| p->numFilledThreads <= || p->numFilledThreads <=
(i >= p->filledThreadStart ? (i >= p->filledThreadStart ?
@ -987,7 +960,7 @@ void MtDec_Construct(CMtDec *p)
p->allocatedBufsSize = 0; p->allocatedBufsSize = 0;
for (i = 0; i < MTDEC__THREADS_MAX; i++) for (i = 0; i < MTDEC_THREADS_MAX; i++)
{ {
CMtDecThread *t = &p->threads[i]; CMtDecThread *t = &p->threads[i];
t->mtDec = p; t->mtDec = p;
@ -995,7 +968,7 @@ void MtDec_Construct(CMtDec *p)
t->inBuf = NULL; t->inBuf = NULL;
Event_Construct(&t->canRead); Event_Construct(&t->canRead);
Event_Construct(&t->canWrite); Event_Construct(&t->canWrite);
Thread_Construct(&t->thread); Thread_CONSTRUCT(&t->thread)
} }
// Event_Construct(&p->finishedEvent); // Event_Construct(&p->finishedEvent);
@ -1010,7 +983,7 @@ static void MtDec_Free(CMtDec *p)
p->exitThread = True; p->exitThread = True;
for (i = 0; i < MTDEC__THREADS_MAX; i++) for (i = 0; i < MTDEC_THREADS_MAX; i++)
MtDecThread_Destruct(&p->threads[i]); MtDecThread_Destruct(&p->threads[i]);
// Event_Close(&p->finishedEvent); // Event_Close(&p->finishedEvent);
@ -1061,15 +1034,15 @@ SRes MtDec_Code(CMtDec *p)
{ {
unsigned numThreads = p->numThreadsMax; unsigned numThreads = p->numThreadsMax;
if (numThreads > MTDEC__THREADS_MAX) if (numThreads > MTDEC_THREADS_MAX)
numThreads = MTDEC__THREADS_MAX; numThreads = MTDEC_THREADS_MAX;
p->numStartedThreads_Limit = numThreads; p->numStartedThreads_Limit = numThreads;
p->numStartedThreads = 0; p->numStartedThreads = 0;
} }
if (p->inBufSize != p->allocatedBufsSize) if (p->inBufSize != p->allocatedBufsSize)
{ {
for (i = 0; i < MTDEC__THREADS_MAX; i++) for (i = 0; i < MTDEC_THREADS_MAX; i++)
{ {
CMtDecThread *t = &p->threads[i]; CMtDecThread *t = &p->threads[i];
if (t->inBuf) if (t->inBuf)
@ -1086,7 +1059,7 @@ SRes MtDec_Code(CMtDec *p)
MtProgress_Init(&p->mtProgress, p->progress); MtProgress_Init(&p->mtProgress, p->progress);
// RINOK_THREAD(ArEvent_OptCreate_And_Reset(&p->finishedEvent)); // RINOK_THREAD(AutoResetEvent_OptCreate_And_Reset(&p->finishedEvent))
p->exitThread = False; p->exitThread = False;
p->exitThreadWRes = 0; p->exitThreadWRes = 0;
@ -1098,7 +1071,7 @@ SRes MtDec_Code(CMtDec *p)
wres = MtDecThread_CreateEvents(nextThread); wres = MtDecThread_CreateEvents(nextThread);
if (wres == 0) { wres = Event_Set(&nextThread->canWrite); if (wres == 0) { wres = Event_Set(&nextThread->canWrite);
if (wres == 0) { wres = Event_Set(&nextThread->canRead); if (wres == 0) { wres = Event_Set(&nextThread->canRead);
if (wres == 0) { THREAD_FUNC_RET_TYPE res = ThreadFunc(nextThread); if (wres == 0) { THREAD_FUNC_RET_TYPE res = MtDec_ThreadFunc(nextThread);
wres = (WRes)(UINT_PTR)res; wres = (WRes)(UINT_PTR)res;
if (wres != 0) if (wres != 0)
{ {
@ -1137,3 +1110,5 @@ SRes MtDec_Code(CMtDec *p)
} }
#endif #endif
#undef PRF

View file

@ -1,46 +1,46 @@
/* MtDec.h -- Multi-thread Decoder /* MtDec.h -- Multi-thread Decoder
2020-03-05 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#ifndef __MT_DEC_H #ifndef ZIP7_INC_MT_DEC_H
#define __MT_DEC_H #define ZIP7_INC_MT_DEC_H
#include "7zTypes.h" #include "7zTypes.h"
#ifndef _7ZIP_ST #ifndef Z7_ST
#include "Threads.h" #include "Threads.h"
#endif #endif
EXTERN_C_BEGIN EXTERN_C_BEGIN
#ifndef _7ZIP_ST #ifndef Z7_ST
#ifndef _7ZIP_ST #ifndef Z7_ST
#define MTDEC__THREADS_MAX 32 #define MTDEC_THREADS_MAX 32
#else #else
#define MTDEC__THREADS_MAX 1 #define MTDEC_THREADS_MAX 1
#endif #endif
typedef struct typedef struct
{ {
ICompressProgress *progress; ICompressProgressPtr progress;
SRes res; SRes res;
UInt64 totalInSize; UInt64 totalInSize;
UInt64 totalOutSize; UInt64 totalOutSize;
CCriticalSection cs; CCriticalSection cs;
} CMtProgress; } CMtProgress;
void MtProgress_Init(CMtProgress *p, ICompressProgress *progress); void MtProgress_Init(CMtProgress *p, ICompressProgressPtr progress);
SRes MtProgress_Progress_ST(CMtProgress *p); SRes MtProgress_Progress_ST(CMtProgress *p);
SRes MtProgress_ProgressAdd(CMtProgress *p, UInt64 inSize, UInt64 outSize); SRes MtProgress_ProgressAdd(CMtProgress *p, UInt64 inSize, UInt64 outSize);
SRes MtProgress_GetError(CMtProgress *p); SRes MtProgress_GetError(CMtProgress *p);
void MtProgress_SetError(CMtProgress *p, SRes res); void MtProgress_SetError(CMtProgress *p, SRes res);
struct _CMtDec; struct CMtDec;
typedef struct typedef struct
{ {
struct _CMtDec *mtDec; struct CMtDec_ *mtDec;
unsigned index; unsigned index;
void *inBuf; void *inBuf;
@ -117,7 +117,7 @@ typedef struct
typedef struct _CMtDec typedef struct CMtDec_
{ {
/* input variables */ /* input variables */
@ -126,11 +126,11 @@ typedef struct _CMtDec
// size_t inBlockMax; // size_t inBlockMax;
unsigned numThreadsMax_2; unsigned numThreadsMax_2;
ISeqInStream *inStream; ISeqInStreamPtr inStream;
// const Byte *inData; // const Byte *inData;
// size_t inDataSize; // size_t inDataSize;
ICompressProgress *progress; ICompressProgressPtr progress;
ISzAllocPtr alloc; ISzAllocPtr alloc;
IMtDecCallback2 *mtCallback; IMtDecCallback2 *mtCallback;
@ -171,11 +171,11 @@ typedef struct _CMtDec
unsigned filledThreadStart; unsigned filledThreadStart;
unsigned numFilledThreads; unsigned numFilledThreads;
#ifndef _7ZIP_ST #ifndef Z7_ST
BoolInt needInterrupt; BoolInt needInterrupt;
UInt64 interruptIndex; UInt64 interruptIndex;
CMtProgress mtProgress; CMtProgress mtProgress;
CMtDecThread threads[MTDEC__THREADS_MAX]; CMtDecThread threads[MTDEC_THREADS_MAX];
#endif #endif
} CMtDec; } CMtDec;

View file

@ -1,9 +1,9 @@
/* Ppmd.h -- PPMD codec common code /* Ppmd.h -- PPMD codec common code
2021-04-13 : Igor Pavlov : Public domain 2023-03-05 : Igor Pavlov : Public domain
This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
#ifndef __PPMD_H #ifndef ZIP7_INC_PPMD_H
#define __PPMD_H #define ZIP7_INC_PPMD_H
#include "CpuArch.h" #include "CpuArch.h"
@ -48,8 +48,10 @@ typedef struct
Byte Count; /* Count to next change of Shift */ Byte Count; /* Count to next change of Shift */
} CPpmd_See; } CPpmd_See;
#define Ppmd_See_Update(p) if ((p)->Shift < PPMD_PERIOD_BITS && --(p)->Count == 0) \ #define Ppmd_See_UPDATE(p) \
{ (p)->Summ = (UInt16)((p)->Summ << 1); (p)->Count = (Byte)(3 << (p)->Shift++); } { if ((p)->Shift < PPMD_PERIOD_BITS && --(p)->Count == 0) \
{ (p)->Summ = (UInt16)((p)->Summ << 1); \
(p)->Count = (Byte)(3 << (p)->Shift++); }}
typedef struct typedef struct

View file

@ -1,5 +1,5 @@
/* Ppmd7.c -- PPMdH codec /* Ppmd7.c -- PPMdH codec
2021-04-13 : Igor Pavlov : Public domain 2023-04-02 : Igor Pavlov : Public domain
This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */ This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -14,7 +14,7 @@ This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
MY_ALIGN(16) MY_ALIGN(16)
static const Byte PPMD7_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 }; static const Byte PPMD7_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 };
MY_ALIGN(16) MY_ALIGN(16)
static const UInt16 kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x5ABC, 0x6632, 0x6051}; static const UInt16 PPMD7_kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x5ABC, 0x6632, 0x6051};
#define MAX_FREQ 124 #define MAX_FREQ 124
#define UNIT_SIZE 12 #define UNIT_SIZE 12
@ -33,7 +33,7 @@ static const UInt16 kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x
#define ONE_STATE(ctx) Ppmd7Context_OneState(ctx) #define ONE_STATE(ctx) Ppmd7Context_OneState(ctx)
#define SUFFIX(ctx) CTX((ctx)->Suffix) #define SUFFIX(ctx) CTX((ctx)->Suffix)
typedef CPpmd7_Context * CTX_PTR; typedef CPpmd7_Context * PPMD7_CTX_PTR;
struct CPpmd7_Node_; struct CPpmd7_Node_;
@ -107,14 +107,14 @@ BoolInt Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc)
// ---------- Internal Memory Allocator ---------- // ---------- Internal Memory Allocator ----------
/* We can use CPpmd7_Node in list of free units (as in Ppmd8) /* We can use CPpmd7_Node in list of free units (as in Ppmd8)
But we still need one additional list walk pass in GlueFreeBlocks(). But we still need one additional list walk pass in Ppmd7_GlueFreeBlocks().
So we use simple CPpmd_Void_Ref instead of CPpmd7_Node in InsertNode() / RemoveNode() So we use simple CPpmd_Void_Ref instead of CPpmd7_Node in Ppmd7_InsertNode() / Ppmd7_RemoveNode()
*/ */
#define EMPTY_NODE 0 #define EMPTY_NODE 0
static void InsertNode(CPpmd7 *p, void *node, unsigned indx) static void Ppmd7_InsertNode(CPpmd7 *p, void *node, unsigned indx)
{ {
*((CPpmd_Void_Ref *)node) = p->FreeList[indx]; *((CPpmd_Void_Ref *)node) = p->FreeList[indx];
// ((CPpmd7_Node *)node)->Next = (CPpmd7_Node_Ref)p->FreeList[indx]; // ((CPpmd7_Node *)node)->Next = (CPpmd7_Node_Ref)p->FreeList[indx];
@ -124,7 +124,7 @@ static void InsertNode(CPpmd7 *p, void *node, unsigned indx)
} }
static void *RemoveNode(CPpmd7 *p, unsigned indx) static void *Ppmd7_RemoveNode(CPpmd7 *p, unsigned indx)
{ {
CPpmd_Void_Ref *node = (CPpmd_Void_Ref *)Ppmd7_GetPtr(p, p->FreeList[indx]); CPpmd_Void_Ref *node = (CPpmd_Void_Ref *)Ppmd7_GetPtr(p, p->FreeList[indx]);
p->FreeList[indx] = *node; p->FreeList[indx] = *node;
@ -134,32 +134,32 @@ static void *RemoveNode(CPpmd7 *p, unsigned indx)
} }
static void SplitBlock(CPpmd7 *p, void *ptr, unsigned oldIndx, unsigned newIndx) static void Ppmd7_SplitBlock(CPpmd7 *p, void *ptr, unsigned oldIndx, unsigned newIndx)
{ {
unsigned i, nu = I2U(oldIndx) - I2U(newIndx); unsigned i, nu = I2U(oldIndx) - I2U(newIndx);
ptr = (Byte *)ptr + U2B(I2U(newIndx)); ptr = (Byte *)ptr + U2B(I2U(newIndx));
if (I2U(i = U2I(nu)) != nu) if (I2U(i = U2I(nu)) != nu)
{ {
unsigned k = I2U(--i); unsigned k = I2U(--i);
InsertNode(p, ((Byte *)ptr) + U2B(k), nu - k - 1); Ppmd7_InsertNode(p, ((Byte *)ptr) + U2B(k), nu - k - 1);
} }
InsertNode(p, ptr, i); Ppmd7_InsertNode(p, ptr, i);
} }
/* we use CPpmd7_Node_Union union to solve XLC -O2 strict pointer aliasing problem */ /* we use CPpmd7_Node_Union union to solve XLC -O2 strict pointer aliasing problem */
typedef union _CPpmd7_Node_Union typedef union
{ {
CPpmd7_Node Node; CPpmd7_Node Node;
CPpmd7_Node_Ref NextRef; CPpmd7_Node_Ref NextRef;
} CPpmd7_Node_Union; } CPpmd7_Node_Union;
/* Original PPmdH (Ppmd7) code uses doubly linked list in GlueFreeBlocks() /* Original PPmdH (Ppmd7) code uses doubly linked list in Ppmd7_GlueFreeBlocks()
we use single linked list similar to Ppmd8 code */ we use single linked list similar to Ppmd8 code */
static void GlueFreeBlocks(CPpmd7 *p) static void Ppmd7_GlueFreeBlocks(CPpmd7 *p)
{ {
/* /*
we use first UInt16 field of 12-bytes UNITs as record type stamp we use first UInt16 field of 12-bytes UNITs as record type stamp
@ -239,27 +239,27 @@ static void GlueFreeBlocks(CPpmd7 *p)
if (nu == 0) if (nu == 0)
continue; continue;
for (; nu > 128; nu -= 128, node += 128) for (; nu > 128; nu -= 128, node += 128)
InsertNode(p, node, PPMD_NUM_INDEXES - 1); Ppmd7_InsertNode(p, node, PPMD_NUM_INDEXES - 1);
if (I2U(i = U2I(nu)) != nu) if (I2U(i = U2I(nu)) != nu)
{ {
unsigned k = I2U(--i); unsigned k = I2U(--i);
InsertNode(p, node + k, (unsigned)nu - k - 1); Ppmd7_InsertNode(p, node + k, (unsigned)nu - k - 1);
} }
InsertNode(p, node, i); Ppmd7_InsertNode(p, node, i);
} }
} }
MY_NO_INLINE Z7_NO_INLINE
static void *AllocUnitsRare(CPpmd7 *p, unsigned indx) static void *Ppmd7_AllocUnitsRare(CPpmd7 *p, unsigned indx)
{ {
unsigned i; unsigned i;
if (p->GlueCount == 0) if (p->GlueCount == 0)
{ {
GlueFreeBlocks(p); Ppmd7_GlueFreeBlocks(p);
if (p->FreeList[indx] != 0) if (p->FreeList[indx] != 0)
return RemoveNode(p, indx); return Ppmd7_RemoveNode(p, indx);
} }
i = indx; i = indx;
@ -277,17 +277,17 @@ static void *AllocUnitsRare(CPpmd7 *p, unsigned indx)
while (p->FreeList[i] == 0); while (p->FreeList[i] == 0);
{ {
void *block = RemoveNode(p, i); void *block = Ppmd7_RemoveNode(p, i);
SplitBlock(p, block, i, indx); Ppmd7_SplitBlock(p, block, i, indx);
return block; return block;
} }
} }
static void *AllocUnits(CPpmd7 *p, unsigned indx) static void *Ppmd7_AllocUnits(CPpmd7 *p, unsigned indx)
{ {
if (p->FreeList[indx] != 0) if (p->FreeList[indx] != 0)
return RemoveNode(p, indx); return Ppmd7_RemoveNode(p, indx);
{ {
UInt32 numBytes = U2B(I2U(indx)); UInt32 numBytes = U2B(I2U(indx));
Byte *lo = p->LoUnit; Byte *lo = p->LoUnit;
@ -297,11 +297,11 @@ static void *AllocUnits(CPpmd7 *p, unsigned indx)
return lo; return lo;
} }
} }
return AllocUnitsRare(p, indx); return Ppmd7_AllocUnitsRare(p, indx);
} }
#define MyMem12Cpy(dest, src, num) \ #define MEM_12_CPY(dest, src, num) \
{ UInt32 *d = (UInt32 *)dest; const UInt32 *z = (const UInt32 *)src; UInt32 n = num; \ { UInt32 *d = (UInt32 *)dest; const UInt32 *z = (const UInt32 *)src; UInt32 n = num; \
do { d[0] = z[0]; d[1] = z[1]; d[2] = z[2]; z += 3; d += 3; } while (--n); } do { d[0] = z[0]; d[1] = z[1]; d[2] = z[2]; z += 3; d += 3; } while (--n); }
@ -315,12 +315,12 @@ static void *ShrinkUnits(CPpmd7 *p, void *oldPtr, unsigned oldNU, unsigned newNU
return oldPtr; return oldPtr;
if (p->FreeList[i1] != 0) if (p->FreeList[i1] != 0)
{ {
void *ptr = RemoveNode(p, i1); void *ptr = Ppmd7_RemoveNode(p, i1);
MyMem12Cpy(ptr, oldPtr, newNU); MEM_12_CPY(ptr, oldPtr, newNU)
InsertNode(p, oldPtr, i0); Ppmd7_InsertNode(p, oldPtr, i0);
return ptr; return ptr;
} }
SplitBlock(p, oldPtr, i0, i1); Ppmd7_SplitBlock(p, oldPtr, i0, i1);
return oldPtr; return oldPtr;
} }
*/ */
@ -329,14 +329,14 @@ static void *ShrinkUnits(CPpmd7 *p, void *oldPtr, unsigned oldNU, unsigned newNU
#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p) #define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
static void SetSuccessor(CPpmd_State *p, CPpmd_Void_Ref v) static void SetSuccessor(CPpmd_State *p, CPpmd_Void_Ref v)
{ {
Ppmd_SET_SUCCESSOR(p, v); Ppmd_SET_SUCCESSOR(p, v)
} }
MY_NO_INLINE Z7_NO_INLINE
static static
void RestartModel(CPpmd7 *p) void Ppmd7_RestartModel(CPpmd7 *p)
{ {
unsigned i, k; unsigned i, k;
@ -352,8 +352,8 @@ void RestartModel(CPpmd7 *p)
p->PrevSuccess = 0; p->PrevSuccess = 0;
{ {
CPpmd7_Context *mc = (CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */ CPpmd7_Context *mc = (PPMD7_CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */
CPpmd_State *s = (CPpmd_State *)p->LoUnit; /* AllocUnits(p, PPMD_NUM_INDEXES - 1); */ CPpmd_State *s = (CPpmd_State *)p->LoUnit; /* Ppmd7_AllocUnits(p, PPMD_NUM_INDEXES - 1); */
p->LoUnit += U2B(256 / 2); p->LoUnit += U2B(256 / 2);
p->MaxContext = p->MinContext = mc; p->MaxContext = p->MinContext = mc;
@ -391,7 +391,7 @@ void RestartModel(CPpmd7 *p)
{ {
unsigned m; unsigned m;
UInt16 *dest = p->BinSumm[i] + k; UInt16 *dest = p->BinSumm[i] + k;
UInt16 val = (UInt16)(PPMD_BIN_SCALE - kInitBinEsc[k] / (i + 2)); const UInt16 val = (UInt16)(PPMD_BIN_SCALE - PPMD7_kInitBinEsc[k] / (i + 2));
for (m = 0; m < 64; m += 8) for (m = 0; m < 64; m += 8)
dest[m] = val; dest[m] = val;
} }
@ -423,13 +423,13 @@ void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder)
{ {
p->MaxOrder = maxOrder; p->MaxOrder = maxOrder;
RestartModel(p); Ppmd7_RestartModel(p);
} }
/* /*
CreateSuccessors() Ppmd7_CreateSuccessors()
It's called when (FoundState->Successor) is RAW-Successor, It's called when (FoundState->Successor) is RAW-Successor,
that is the link to position in Raw text. that is the link to position in Raw text.
So we create Context records and write the links to So we create Context records and write the links to
@ -445,10 +445,10 @@ void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder)
also it can return pointer to real context of same order, also it can return pointer to real context of same order,
*/ */
MY_NO_INLINE Z7_NO_INLINE
static CTX_PTR CreateSuccessors(CPpmd7 *p) static PPMD7_CTX_PTR Ppmd7_CreateSuccessors(CPpmd7 *p)
{ {
CTX_PTR c = p->MinContext; PPMD7_CTX_PTR c = p->MinContext;
CPpmd_Byte_Ref upBranch = (CPpmd_Byte_Ref)SUCCESSOR(p->FoundState); CPpmd_Byte_Ref upBranch = (CPpmd_Byte_Ref)SUCCESSOR(p->FoundState);
Byte newSym, newFreq; Byte newSym, newFreq;
unsigned numPs = 0; unsigned numPs = 0;
@ -522,15 +522,15 @@ static CTX_PTR CreateSuccessors(CPpmd7 *p)
do do
{ {
CTX_PTR c1; PPMD7_CTX_PTR c1;
/* = AllocContext(p); */ /* = AllocContext(p); */
if (p->HiUnit != p->LoUnit) if (p->HiUnit != p->LoUnit)
c1 = (CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); c1 = (PPMD7_CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE);
else if (p->FreeList[0] != 0) else if (p->FreeList[0] != 0)
c1 = (CTX_PTR)RemoveNode(p, 0); c1 = (PPMD7_CTX_PTR)Ppmd7_RemoveNode(p, 0);
else else
{ {
c1 = (CTX_PTR)AllocUnitsRare(p, 0); c1 = (PPMD7_CTX_PTR)Ppmd7_AllocUnitsRare(p, 0);
if (!c1) if (!c1)
return NULL; return NULL;
} }
@ -550,16 +550,16 @@ static CTX_PTR CreateSuccessors(CPpmd7 *p)
#define SwapStates(s) \ #define SWAP_STATES(s) \
{ CPpmd_State tmp = s[0]; s[0] = s[-1]; s[-1] = tmp; } { CPpmd_State tmp = s[0]; s[0] = s[-1]; s[-1] = tmp; }
void Ppmd7_UpdateModel(CPpmd7 *p); void Ppmd7_UpdateModel(CPpmd7 *p);
MY_NO_INLINE Z7_NO_INLINE
void Ppmd7_UpdateModel(CPpmd7 *p) void Ppmd7_UpdateModel(CPpmd7 *p)
{ {
CPpmd_Void_Ref maxSuccessor, minSuccessor; CPpmd_Void_Ref maxSuccessor, minSuccessor;
CTX_PTR c, mc; PPMD7_CTX_PTR c, mc;
unsigned s0, ns; unsigned s0, ns;
@ -592,7 +592,7 @@ void Ppmd7_UpdateModel(CPpmd7 *p)
if (s[0].Freq >= s[-1].Freq) if (s[0].Freq >= s[-1].Freq)
{ {
SwapStates(s); SWAP_STATES(s)
s--; s--;
} }
} }
@ -610,10 +610,10 @@ void Ppmd7_UpdateModel(CPpmd7 *p)
{ {
/* MAX ORDER context */ /* MAX ORDER context */
/* (FoundState->Successor) is RAW-Successor. */ /* (FoundState->Successor) is RAW-Successor. */
p->MaxContext = p->MinContext = CreateSuccessors(p); p->MaxContext = p->MinContext = Ppmd7_CreateSuccessors(p);
if (!p->MinContext) if (!p->MinContext)
{ {
RestartModel(p); Ppmd7_RestartModel(p);
return; return;
} }
SetSuccessor(p->FoundState, REF(p->MinContext)); SetSuccessor(p->FoundState, REF(p->MinContext));
@ -629,7 +629,7 @@ void Ppmd7_UpdateModel(CPpmd7 *p)
p->Text = text; p->Text = text;
if (text >= p->UnitsStart) if (text >= p->UnitsStart)
{ {
RestartModel(p); Ppmd7_RestartModel(p);
return; return;
} }
maxSuccessor = REF(text); maxSuccessor = REF(text);
@ -645,10 +645,10 @@ void Ppmd7_UpdateModel(CPpmd7 *p)
if (minSuccessor <= maxSuccessor) if (minSuccessor <= maxSuccessor)
{ {
// minSuccessor is RAW-Successor. So we will create real contexts records: // minSuccessor is RAW-Successor. So we will create real contexts records:
CTX_PTR cs = CreateSuccessors(p); PPMD7_CTX_PTR cs = Ppmd7_CreateSuccessors(p);
if (!cs) if (!cs)
{ {
RestartModel(p); Ppmd7_RestartModel(p);
return; return;
} }
minSuccessor = REF(cs); minSuccessor = REF(cs);
@ -715,16 +715,16 @@ void Ppmd7_UpdateModel(CPpmd7 *p)
unsigned i = U2I(oldNU); unsigned i = U2I(oldNU);
if (i != U2I((size_t)oldNU + 1)) if (i != U2I((size_t)oldNU + 1))
{ {
void *ptr = AllocUnits(p, i + 1); void *ptr = Ppmd7_AllocUnits(p, i + 1);
void *oldPtr; void *oldPtr;
if (!ptr) if (!ptr)
{ {
RestartModel(p); Ppmd7_RestartModel(p);
return; return;
} }
oldPtr = STATS(c); oldPtr = STATS(c);
MyMem12Cpy(ptr, oldPtr, oldNU); MEM_12_CPY(ptr, oldPtr, oldNU)
InsertNode(p, oldPtr, i); Ppmd7_InsertNode(p, oldPtr, i);
c->Union4.Stats = STATS_REF(ptr); c->Union4.Stats = STATS_REF(ptr);
} }
} }
@ -739,10 +739,10 @@ void Ppmd7_UpdateModel(CPpmd7 *p)
else else
{ {
// instead of One-symbol context we create 2-symbol context // instead of One-symbol context we create 2-symbol context
CPpmd_State *s = (CPpmd_State*)AllocUnits(p, 0); CPpmd_State *s = (CPpmd_State*)Ppmd7_AllocUnits(p, 0);
if (!s) if (!s)
{ {
RestartModel(p); Ppmd7_RestartModel(p);
return; return;
} }
{ {
@ -795,8 +795,8 @@ void Ppmd7_UpdateModel(CPpmd7 *p)
MY_NO_INLINE Z7_NO_INLINE
static void Rescale(CPpmd7 *p) static void Ppmd7_Rescale(CPpmd7 *p)
{ {
unsigned i, adder, sumFreq, escFreq; unsigned i, adder, sumFreq, escFreq;
CPpmd_State *stats = STATS(p->MinContext); CPpmd_State *stats = STATS(p->MinContext);
@ -885,7 +885,7 @@ static void Rescale(CPpmd7 *p)
*s = *stats; *s = *stats;
s->Freq = (Byte)freq; // (freq <= 260 / 4) s->Freq = (Byte)freq; // (freq <= 260 / 4)
p->FoundState = s; p->FoundState = s;
InsertNode(p, stats, U2I(n0)); Ppmd7_InsertNode(p, stats, U2I(n0));
return; return;
} }
@ -899,13 +899,13 @@ static void Rescale(CPpmd7 *p)
{ {
if (p->FreeList[i1] != 0) if (p->FreeList[i1] != 0)
{ {
void *ptr = RemoveNode(p, i1); void *ptr = Ppmd7_RemoveNode(p, i1);
p->MinContext->Union4.Stats = STATS_REF(ptr); p->MinContext->Union4.Stats = STATS_REF(ptr);
MyMem12Cpy(ptr, (const void *)stats, n1); MEM_12_CPY(ptr, (const void *)stats, n1)
InsertNode(p, stats, i0); Ppmd7_InsertNode(p, stats, i0);
} }
else else
SplitBlock(p, stats, i0, i1); Ppmd7_SplitBlock(p, stats, i0, i1);
} }
} }
} }
@ -948,9 +948,9 @@ CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *escFreq)
} }
static void NextContext(CPpmd7 *p) static void Ppmd7_NextContext(CPpmd7 *p)
{ {
CTX_PTR c = CTX(SUCCESSOR(p->FoundState)); PPMD7_CTX_PTR c = CTX(SUCCESSOR(p->FoundState));
if (p->OrderFall == 0 && (const Byte *)c > p->Text) if (p->OrderFall == 0 && (const Byte *)c > p->Text)
p->MaxContext = p->MinContext = c; p->MaxContext = p->MinContext = c;
else else
@ -967,12 +967,12 @@ void Ppmd7_Update1(CPpmd7 *p)
s->Freq = (Byte)freq; s->Freq = (Byte)freq;
if (freq > s[-1].Freq) if (freq > s[-1].Freq)
{ {
SwapStates(s); SWAP_STATES(s)
p->FoundState = --s; p->FoundState = --s;
if (freq > MAX_FREQ) if (freq > MAX_FREQ)
Rescale(p); Ppmd7_Rescale(p);
} }
NextContext(p); Ppmd7_NextContext(p);
} }
@ -988,8 +988,8 @@ void Ppmd7_Update1_0(CPpmd7 *p)
freq += 4; freq += 4;
s->Freq = (Byte)freq; s->Freq = (Byte)freq;
if (freq > MAX_FREQ) if (freq > MAX_FREQ)
Rescale(p); Ppmd7_Rescale(p);
NextContext(p); Ppmd7_NextContext(p);
} }
@ -1000,7 +1000,7 @@ void Ppmd7_UpdateBin(CPpmd7 *p)
p->FoundState->Freq = (Byte)(freq + (freq < 128)); p->FoundState->Freq = (Byte)(freq + (freq < 128));
p->PrevSuccess = 1; p->PrevSuccess = 1;
p->RunLength++; p->RunLength++;
NextContext(p); Ppmd7_NextContext(p);
} }
*/ */
@ -1013,7 +1013,7 @@ void Ppmd7_Update2(CPpmd7 *p)
p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4); p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4);
s->Freq = (Byte)freq; s->Freq = (Byte)freq;
if (freq > MAX_FREQ) if (freq > MAX_FREQ)
Rescale(p); Ppmd7_Rescale(p);
Ppmd7_UpdateModel(p); Ppmd7_UpdateModel(p);
} }
@ -1042,8 +1042,8 @@ Last UNIT of array at offset (Size - 12) is root order-0 CPpmd7_Context record.
The code can free UNITs memory blocks that were allocated to store CPpmd_State vectors. The code can free UNITs memory blocks that were allocated to store CPpmd_State vectors.
The code doesn't free UNITs allocated for CPpmd7_Context records. The code doesn't free UNITs allocated for CPpmd7_Context records.
The code calls RestartModel(), when there is no free memory for allocation. The code calls Ppmd7_RestartModel(), when there is no free memory for allocation.
And RestartModel() changes the state to orignal start state, with full free block. And Ppmd7_RestartModel() changes the state to orignal start state, with full free block.
The code allocates UNITs with the following order: The code allocates UNITs with the following order:
@ -1051,14 +1051,14 @@ The code allocates UNITs with the following order:
Allocation of 1 UNIT for Context record Allocation of 1 UNIT for Context record
- from free space (HiUnit) down to (LoUnit) - from free space (HiUnit) down to (LoUnit)
- from FreeList[0] - from FreeList[0]
- AllocUnitsRare() - Ppmd7_AllocUnitsRare()
AllocUnits() for CPpmd_State vectors: Ppmd7_AllocUnits() for CPpmd_State vectors:
- from FreeList[i] - from FreeList[i]
- from free space (LoUnit) up to (HiUnit) - from free space (LoUnit) up to (HiUnit)
- AllocUnitsRare() - Ppmd7_AllocUnitsRare()
AllocUnitsRare() Ppmd7_AllocUnitsRare()
- if (GlueCount == 0) - if (GlueCount == 0)
{ Glue lists, GlueCount = 255, allocate from FreeList[i]] } { Glue lists, GlueCount = 255, allocate from FreeList[i]] }
- loop for all higher sized FreeList[...] lists - loop for all higher sized FreeList[...] lists
@ -1093,8 +1093,8 @@ The PPMd code tries to fulfill the condition:
We have (Sum(Stats[].Freq) <= 256 * 124), because of (MAX_FREQ = 124) We have (Sum(Stats[].Freq) <= 256 * 124), because of (MAX_FREQ = 124)
So (4 = 128 - 124) is average reserve for Escape_Freq for each symbol. So (4 = 128 - 124) is average reserve for Escape_Freq for each symbol.
If (CPpmd_State::Freq) is not aligned for 4, the reserve can be 5, 6 or 7. If (CPpmd_State::Freq) is not aligned for 4, the reserve can be 5, 6 or 7.
SummFreq and Escape_Freq can be changed in Rescale() and *Update*() functions. SummFreq and Escape_Freq can be changed in Ppmd7_Rescale() and *Update*() functions.
Rescale() can remove symbols only from max-order contexts. So Escape_Freq can increase after multiple calls of Rescale() for Ppmd7_Rescale() can remove symbols only from max-order contexts. So Escape_Freq can increase after multiple calls of Ppmd7_Rescale() for
max-order context. max-order context.
When the PPMd code still break (Total <= RC::Range) condition in range coder, When the PPMd code still break (Total <= RC::Range) condition in range coder,
@ -1102,3 +1102,21 @@ we have two ways to resolve that problem:
1) we can report error, if we want to keep compatibility with original PPMd code that has no fix for such cases. 1) we can report error, if we want to keep compatibility with original PPMd code that has no fix for such cases.
2) we can reduce (Total) value to (RC::Range) by reducing (Escape_Freq) part of (Total) value. 2) we can reduce (Total) value to (RC::Range) by reducing (Escape_Freq) part of (Total) value.
*/ */
#undef MAX_FREQ
#undef UNIT_SIZE
#undef U2B
#undef U2I
#undef I2U
#undef I2U_UInt16
#undef REF
#undef STATS_REF
#undef CTX
#undef STATS
#undef ONE_STATE
#undef SUFFIX
#undef NODE
#undef EMPTY_NODE
#undef MEM_12_CPY
#undef SUCCESSOR
#undef SWAP_STATES

View file

@ -1,11 +1,11 @@
/* Ppmd7.h -- Ppmd7 (PPMdH) compression codec /* Ppmd7.h -- Ppmd7 (PPMdH) compression codec
2021-04-13 : Igor Pavlov : Public domain 2023-04-02 : Igor Pavlov : Public domain
This code is based on: This code is based on:
PPMd var.H (2001): Dmitry Shkarin : Public domain */ PPMd var.H (2001): Dmitry Shkarin : Public domain */
#ifndef __PPMD7_H #ifndef ZIP7_INC_PPMD7_H
#define __PPMD7_H #define ZIP7_INC_PPMD7_H
#include "Ppmd.h" #include "Ppmd.h"
@ -55,7 +55,7 @@ typedef struct
UInt32 Range; UInt32 Range;
UInt32 Code; UInt32 Code;
UInt32 Low; UInt32 Low;
IByteIn *Stream; IByteInPtr Stream;
} CPpmd7_RangeDec; } CPpmd7_RangeDec;
@ -66,7 +66,7 @@ typedef struct
// Byte _dummy_[3]; // Byte _dummy_[3];
UInt64 Low; UInt64 Low;
UInt64 CacheSize; UInt64 CacheSize;
IByteOut *Stream; IByteOutPtr Stream;
} CPpmd7z_RangeEnc; } CPpmd7z_RangeEnc;

View file

@ -1,5 +1,5 @@
/* Ppmd7Dec.c -- Ppmd7z (PPMdH with 7z Range Coder) Decoder /* Ppmd7Dec.c -- Ppmd7z (PPMdH with 7z Range Coder) Decoder
2021-04-13 : Igor Pavlov : Public domain 2023-04-02 : Igor Pavlov : Public domain
This code is based on: This code is based on:
PPMd var.H (2001): Dmitry Shkarin : Public domain */ PPMd var.H (2001): Dmitry Shkarin : Public domain */
@ -8,7 +8,7 @@ This code is based on:
#include "Ppmd7.h" #include "Ppmd7.h"
#define kTopValue (1 << 24) #define kTopValue ((UInt32)1 << 24)
#define READ_BYTE(p) IByteIn_Read((p)->Stream) #define READ_BYTE(p) IByteIn_Read((p)->Stream)
@ -37,9 +37,9 @@ BoolInt Ppmd7z_RangeDec_Init(CPpmd7_RangeDec *p)
#define R (&p->rc.dec) #define R (&p->rc.dec)
MY_FORCE_INLINE Z7_FORCE_INLINE
// MY_NO_INLINE // Z7_NO_INLINE
static void RangeDec_Decode(CPpmd7 *p, UInt32 start, UInt32 size) static void Ppmd7z_RD_Decode(CPpmd7 *p, UInt32 start, UInt32 size)
{ {
@ -48,18 +48,18 @@ static void RangeDec_Decode(CPpmd7 *p, UInt32 start, UInt32 size)
RC_NORM_LOCAL(R) RC_NORM_LOCAL(R)
} }
#define RC_Decode(start, size) RangeDec_Decode(p, start, size); #define RC_Decode(start, size) Ppmd7z_RD_Decode(p, start, size);
#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R) #define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R)
#define RC_GetThreshold(total) (R->Code / (R->Range /= (total))) #define RC_GetThreshold(total) (R->Code / (R->Range /= (total)))
#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref)) #define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref))
typedef CPpmd7_Context * CTX_PTR; // typedef CPpmd7_Context * CTX_PTR;
#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p) #define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
void Ppmd7_UpdateModel(CPpmd7 *p); void Ppmd7_UpdateModel(CPpmd7 *p);
#define MASK(sym) ((unsigned char *)charMask)[sym] #define MASK(sym) ((unsigned char *)charMask)[sym]
// MY_FORCE_INLINE // Z7_FORCE_INLINE
// static // static
int Ppmd7z_DecodeSymbol(CPpmd7 *p) int Ppmd7z_DecodeSymbol(CPpmd7 *p)
{ {
@ -70,7 +70,7 @@ int Ppmd7z_DecodeSymbol(CPpmd7 *p)
CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext); CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext);
unsigned i; unsigned i;
UInt32 count, hiCnt; UInt32 count, hiCnt;
UInt32 summFreq = p->MinContext->Union2.SummFreq; const UInt32 summFreq = p->MinContext->Union2.SummFreq;
@ -81,7 +81,7 @@ int Ppmd7z_DecodeSymbol(CPpmd7 *p)
if ((Int32)(count -= s->Freq) < 0) if ((Int32)(count -= s->Freq) < 0)
{ {
Byte sym; Byte sym;
RC_DecodeFinal(0, s->Freq); RC_DecodeFinal(0, s->Freq)
p->FoundState = s; p->FoundState = s;
sym = s->Symbol; sym = s->Symbol;
Ppmd7_Update1_0(p); Ppmd7_Update1_0(p);
@ -96,7 +96,7 @@ int Ppmd7z_DecodeSymbol(CPpmd7 *p)
if ((Int32)(count -= (++s)->Freq) < 0) if ((Int32)(count -= (++s)->Freq) < 0)
{ {
Byte sym; Byte sym;
RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq); RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq)
p->FoundState = s; p->FoundState = s;
sym = s->Symbol; sym = s->Symbol;
Ppmd7_Update1(p); Ppmd7_Update1(p);
@ -109,10 +109,10 @@ int Ppmd7z_DecodeSymbol(CPpmd7 *p)
return PPMD7_SYM_ERROR; return PPMD7_SYM_ERROR;
hiCnt -= count; hiCnt -= count;
RC_Decode(hiCnt, summFreq - hiCnt); RC_Decode(hiCnt, summFreq - hiCnt)
p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol); p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol);
PPMD_SetAllBitsIn256Bytes(charMask); PPMD_SetAllBitsIn256Bytes(charMask)
// i = p->MinContext->NumStats - 1; // i = p->MinContext->NumStats - 1;
// do { MASK((--s)->Symbol) = 0; } while (--i); // do { MASK((--s)->Symbol) = 0; } while (--i);
{ {
@ -152,7 +152,7 @@ int Ppmd7z_DecodeSymbol(CPpmd7 *p)
// Ppmd7_UpdateBin(p); // Ppmd7_UpdateBin(p);
{ {
unsigned freq = s->Freq; unsigned freq = s->Freq;
CTX_PTR c = CTX(SUCCESSOR(s)); CPpmd7_Context *c = CTX(SUCCESSOR(s));
sym = s->Symbol; sym = s->Symbol;
p->FoundState = s; p->FoundState = s;
p->PrevSuccess = 1; p->PrevSuccess = 1;
@ -176,7 +176,7 @@ int Ppmd7z_DecodeSymbol(CPpmd7 *p)
R->Range -= size0; R->Range -= size0;
RC_NORM_LOCAL(R) RC_NORM_LOCAL(R)
PPMD_SetAllBitsIn256Bytes(charMask); PPMD_SetAllBitsIn256Bytes(charMask)
MASK(Ppmd7Context_OneState(p->MinContext)->Symbol) = 0; MASK(Ppmd7Context_OneState(p->MinContext)->Symbol) = 0;
p->PrevSuccess = 0; p->PrevSuccess = 0;
} }
@ -245,13 +245,13 @@ int Ppmd7z_DecodeSymbol(CPpmd7 *p)
{ {
count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
// count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break; // count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
}; }
} }
s--; s--;
RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq); RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq)
// new (see->Summ) value can overflow over 16-bits in some rare cases // new (see->Summ) value can overflow over 16-bits in some rare cases
Ppmd_See_Update(see); Ppmd_See_UPDATE(see)
p->FoundState = s; p->FoundState = s;
sym = s->Symbol; sym = s->Symbol;
Ppmd7_Update2(p); Ppmd7_Update2(p);
@ -261,7 +261,7 @@ int Ppmd7z_DecodeSymbol(CPpmd7 *p)
if (count >= freqSum) if (count >= freqSum)
return PPMD7_SYM_ERROR; return PPMD7_SYM_ERROR;
RC_Decode(hiCnt, freqSum - hiCnt); RC_Decode(hiCnt, freqSum - hiCnt)
// We increase (see->Summ) for sum of Freqs of all non_Masked symbols. // We increase (see->Summ) for sum of Freqs of all non_Masked symbols.
// new (see->Summ) value can overflow over 16-bits in some rare cases // new (see->Summ) value can overflow over 16-bits in some rare cases
@ -295,3 +295,18 @@ Byte *Ppmd7z_DecodeSymbols(CPpmd7 *p, Byte *buf, const Byte *lim)
return buf; return buf;
} }
*/ */
#undef kTopValue
#undef READ_BYTE
#undef RC_NORM_BASE
#undef RC_NORM_1
#undef RC_NORM
#undef RC_NORM_LOCAL
#undef RC_NORM_REMOTE
#undef R
#undef RC_Decode
#undef RC_DecodeFinal
#undef RC_GetThreshold
#undef CTX
#undef SUCCESSOR
#undef MASK

View file

@ -1,5 +1,5 @@
/* Ppmd7Enc.c -- Ppmd7z (PPMdH with 7z Range Coder) Encoder /* Ppmd7Enc.c -- Ppmd7z (PPMdH with 7z Range Coder) Encoder
2021-04-13 : Igor Pavlov : Public domain 2023-04-02 : Igor Pavlov : Public domain
This code is based on: This code is based on:
PPMd var.H (2001): Dmitry Shkarin : Public domain */ PPMd var.H (2001): Dmitry Shkarin : Public domain */
@ -8,7 +8,7 @@ This code is based on:
#include "Ppmd7.h" #include "Ppmd7.h"
#define kTopValue (1 << 24) #define kTopValue ((UInt32)1 << 24)
#define R (&p->rc.enc) #define R (&p->rc.enc)
@ -20,8 +20,8 @@ void Ppmd7z_Init_RangeEnc(CPpmd7 *p)
R->CacheSize = 1; R->CacheSize = 1;
} }
MY_NO_INLINE Z7_NO_INLINE
static void RangeEnc_ShiftLow(CPpmd7 *p) static void Ppmd7z_RangeEnc_ShiftLow(CPpmd7 *p)
{ {
if ((UInt32)R->Low < (UInt32)0xFF000000 || (unsigned)(R->Low >> 32) != 0) if ((UInt32)R->Low < (UInt32)0xFF000000 || (unsigned)(R->Low >> 32) != 0)
{ {
@ -38,53 +38,53 @@ static void RangeEnc_ShiftLow(CPpmd7 *p)
R->Low = (UInt32)((UInt32)R->Low << 8); R->Low = (UInt32)((UInt32)R->Low << 8);
} }
#define RC_NORM_BASE(p) if (R->Range < kTopValue) { R->Range <<= 8; RangeEnc_ShiftLow(p); #define RC_NORM_BASE(p) if (R->Range < kTopValue) { R->Range <<= 8; Ppmd7z_RangeEnc_ShiftLow(p);
#define RC_NORM_1(p) RC_NORM_BASE(p) } #define RC_NORM_1(p) RC_NORM_BASE(p) }
#define RC_NORM(p) RC_NORM_BASE(p) RC_NORM_BASE(p) }} #define RC_NORM(p) RC_NORM_BASE(p) RC_NORM_BASE(p) }}
// we must use only one type of Normalization from two: LOCAL or REMOTE // we must use only one type of Normalization from two: LOCAL or REMOTE
#define RC_NORM_LOCAL(p) // RC_NORM(p) #define RC_NORM_LOCAL(p) // RC_NORM(p)
#define RC_NORM_REMOTE(p) RC_NORM(p) #define RC_NORM_REMOTE(p) RC_NORM(p)
/* /*
#define RangeEnc_Encode(p, start, _size_) \ #define Ppmd7z_RangeEnc_Encode(p, start, _size_) \
{ UInt32 size = _size_; \ { UInt32 size = _size_; \
R->Low += start * R->Range; \ R->Low += start * R->Range; \
R->Range *= size; \ R->Range *= size; \
RC_NORM_LOCAL(p); } RC_NORM_LOCAL(p); }
*/ */
MY_FORCE_INLINE Z7_FORCE_INLINE
// MY_NO_INLINE // Z7_NO_INLINE
static void RangeEnc_Encode(CPpmd7 *p, UInt32 start, UInt32 size) static void Ppmd7z_RangeEnc_Encode(CPpmd7 *p, UInt32 start, UInt32 size)
{ {
R->Low += start * R->Range; R->Low += start * R->Range;
R->Range *= size; R->Range *= size;
RC_NORM_LOCAL(p); RC_NORM_LOCAL(p)
} }
void Ppmd7z_Flush_RangeEnc(CPpmd7 *p) void Ppmd7z_Flush_RangeEnc(CPpmd7 *p)
{ {
unsigned i; unsigned i;
for (i = 0; i < 5; i++) for (i = 0; i < 5; i++)
RangeEnc_ShiftLow(p); Ppmd7z_RangeEnc_ShiftLow(p);
} }
#define RC_Encode(start, size) RangeEnc_Encode(p, start, size); #define RC_Encode(start, size) Ppmd7z_RangeEnc_Encode(p, start, size);
#define RC_EncodeFinal(start, size) RC_Encode(start, size); RC_NORM_REMOTE(p); #define RC_EncodeFinal(start, size) RC_Encode(start, size) RC_NORM_REMOTE(p)
#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref)) #define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref))
#define SUFFIX(ctx) CTX((ctx)->Suffix) #define SUFFIX(ctx) CTX((ctx)->Suffix)
typedef CPpmd7_Context * CTX_PTR; // typedef CPpmd7_Context * CTX_PTR;
#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p) #define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
void Ppmd7_UpdateModel(CPpmd7 *p); void Ppmd7_UpdateModel(CPpmd7 *p);
#define MASK(sym) ((unsigned char *)charMask)[sym] #define MASK(sym) ((unsigned char *)charMask)[sym]
MY_FORCE_INLINE Z7_FORCE_INLINE
static static
void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol) void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol)
{ {
@ -104,7 +104,7 @@ void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol)
if (s->Symbol == symbol) if (s->Symbol == symbol)
{ {
// R->Range /= p->MinContext->Union2.SummFreq; // R->Range /= p->MinContext->Union2.SummFreq;
RC_EncodeFinal(0, s->Freq); RC_EncodeFinal(0, s->Freq)
p->FoundState = s; p->FoundState = s;
Ppmd7_Update1_0(p); Ppmd7_Update1_0(p);
return; return;
@ -117,7 +117,7 @@ void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol)
if ((++s)->Symbol == symbol) if ((++s)->Symbol == symbol)
{ {
// R->Range /= p->MinContext->Union2.SummFreq; // R->Range /= p->MinContext->Union2.SummFreq;
RC_EncodeFinal(sum, s->Freq); RC_EncodeFinal(sum, s->Freq)
p->FoundState = s; p->FoundState = s;
Ppmd7_Update1(p); Ppmd7_Update1(p);
return; return;
@ -127,10 +127,10 @@ void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol)
while (--i); while (--i);
// R->Range /= p->MinContext->Union2.SummFreq; // R->Range /= p->MinContext->Union2.SummFreq;
RC_Encode(sum, p->MinContext->Union2.SummFreq - sum); RC_Encode(sum, p->MinContext->Union2.SummFreq - sum)
p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol); p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol);
PPMD_SetAllBitsIn256Bytes(charMask); PPMD_SetAllBitsIn256Bytes(charMask)
// MASK(s->Symbol) = 0; // MASK(s->Symbol) = 0;
// i = p->MinContext->NumStats - 1; // i = p->MinContext->NumStats - 1;
// do { MASK((--s)->Symbol) = 0; } while (--i); // do { MASK((--s)->Symbol) = 0; } while (--i);
@ -153,20 +153,20 @@ void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol)
UInt16 *prob = Ppmd7_GetBinSumm(p); UInt16 *prob = Ppmd7_GetBinSumm(p);
CPpmd_State *s = Ppmd7Context_OneState(p->MinContext); CPpmd_State *s = Ppmd7Context_OneState(p->MinContext);
UInt32 pr = *prob; UInt32 pr = *prob;
UInt32 bound = (R->Range >> 14) * pr; const UInt32 bound = (R->Range >> 14) * pr;
pr = PPMD_UPDATE_PROB_1(pr); pr = PPMD_UPDATE_PROB_1(pr);
if (s->Symbol == symbol) if (s->Symbol == symbol)
{ {
*prob = (UInt16)(pr + (1 << PPMD_INT_BITS)); *prob = (UInt16)(pr + (1 << PPMD_INT_BITS));
// RangeEnc_EncodeBit_0(p, bound); // RangeEnc_EncodeBit_0(p, bound);
R->Range = bound; R->Range = bound;
RC_NORM_1(p); RC_NORM_1(p)
// p->FoundState = s; // p->FoundState = s;
// Ppmd7_UpdateBin(p); // Ppmd7_UpdateBin(p);
{ {
unsigned freq = s->Freq; const unsigned freq = s->Freq;
CTX_PTR c = CTX(SUCCESSOR(s)); CPpmd7_Context *c = CTX(SUCCESSOR(s));
p->FoundState = s; p->FoundState = s;
p->PrevSuccess = 1; p->PrevSuccess = 1;
p->RunLength++; p->RunLength++;
@ -187,7 +187,7 @@ void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol)
R->Range -= bound; R->Range -= bound;
RC_NORM_LOCAL(p) RC_NORM_LOCAL(p)
PPMD_SetAllBitsIn256Bytes(charMask); PPMD_SetAllBitsIn256Bytes(charMask)
MASK(s->Symbol) = 0; MASK(s->Symbol) = 0;
p->PrevSuccess = 0; p->PrevSuccess = 0;
} }
@ -248,14 +248,14 @@ void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol)
do do
{ {
unsigned cur = s->Symbol; const unsigned cur = s->Symbol;
if ((int)cur == symbol) if ((int)cur == symbol)
{ {
UInt32 low = sum; const UInt32 low = sum;
UInt32 freq = s->Freq; const UInt32 freq = s->Freq;
unsigned num2; unsigned num2;
Ppmd_See_Update(see); Ppmd_See_UPDATE(see)
p->FoundState = s; p->FoundState = s;
sum += escFreq; sum += escFreq;
@ -279,7 +279,7 @@ void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol)
R->Range /= sum; R->Range /= sum;
RC_EncodeFinal(low, freq); RC_EncodeFinal(low, freq)
Ppmd7_Update2(p); Ppmd7_Update2(p);
return; return;
} }
@ -289,21 +289,21 @@ void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol)
while (--i); while (--i);
{ {
UInt32 total = sum + escFreq; const UInt32 total = sum + escFreq;
see->Summ = (UInt16)(see->Summ + total); see->Summ = (UInt16)(see->Summ + total);
R->Range /= total; R->Range /= total;
RC_Encode(sum, escFreq); RC_Encode(sum, escFreq)
} }
{ {
CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext); const CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext);
s--; s--;
MASK(s->Symbol) = 0; MASK(s->Symbol) = 0;
do do
{ {
unsigned sym0 = s2[0].Symbol; const unsigned sym0 = s2[0].Symbol;
unsigned sym1 = s2[1].Symbol; const unsigned sym1 = s2[1].Symbol;
s2 += 2; s2 += 2;
MASK(sym0) = 0; MASK(sym0) = 0;
MASK(sym1) = 0; MASK(sym1) = 0;
@ -321,3 +321,18 @@ void Ppmd7z_EncodeSymbols(CPpmd7 *p, const Byte *buf, const Byte *lim)
Ppmd7z_EncodeSymbol(p, *buf); Ppmd7z_EncodeSymbol(p, *buf);
} }
} }
#undef kTopValue
#undef WRITE_BYTE
#undef RC_NORM_BASE
#undef RC_NORM_1
#undef RC_NORM
#undef RC_NORM_LOCAL
#undef RC_NORM_REMOTE
#undef R
#undef RC_Encode
#undef RC_EncodeFinal
#undef SUFFIX
#undef CTX
#undef SUCCESSOR
#undef MASK

295
3rdparty/7z/src/Ppmd7aDec.c vendored Normal file
View file

@ -0,0 +1,295 @@
/* Ppmd7aDec.c -- PPMd7a (PPMdH) Decoder
2023-04-02 : Igor Pavlov : Public domain
This code is based on:
PPMd var.H (2001): Dmitry Shkarin : Public domain
Carryless rangecoder (1999): Dmitry Subbotin : Public domain */
#include "Precomp.h"
#include "Ppmd7.h"
#define kTop ((UInt32)1 << 24)
#define kBot ((UInt32)1 << 15)
#define READ_BYTE(p) IByteIn_Read((p)->Stream)
BoolInt Ppmd7a_RangeDec_Init(CPpmd7_RangeDec *p)
{
unsigned i;
p->Code = 0;
p->Range = 0xFFFFFFFF;
p->Low = 0;
for (i = 0; i < 4; i++)
p->Code = (p->Code << 8) | READ_BYTE(p);
return (p->Code < 0xFFFFFFFF);
}
#define RC_NORM(p) \
while ((p->Low ^ (p->Low + p->Range)) < kTop \
|| (p->Range < kBot && ((p->Range = (0 - p->Low) & (kBot - 1)), 1))) { \
p->Code = (p->Code << 8) | READ_BYTE(p); \
p->Range <<= 8; p->Low <<= 8; }
// we must use only one type of Normalization from two: LOCAL or REMOTE
#define RC_NORM_LOCAL(p) // RC_NORM(p)
#define RC_NORM_REMOTE(p) RC_NORM(p)
#define R (&p->rc.dec)
Z7_FORCE_INLINE
// Z7_NO_INLINE
static void Ppmd7a_RD_Decode(CPpmd7 *p, UInt32 start, UInt32 size)
{
start *= R->Range;
R->Low += start;
R->Code -= start;
R->Range *= size;
RC_NORM_LOCAL(R)
}
#define RC_Decode(start, size) Ppmd7a_RD_Decode(p, start, size);
#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R)
#define RC_GetThreshold(total) (R->Code / (R->Range /= (total)))
#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref))
typedef CPpmd7_Context * CTX_PTR;
#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
void Ppmd7_UpdateModel(CPpmd7 *p);
#define MASK(sym) ((unsigned char *)charMask)[sym]
int Ppmd7a_DecodeSymbol(CPpmd7 *p)
{
size_t charMask[256 / sizeof(size_t)];
if (p->MinContext->NumStats != 1)
{
CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext);
unsigned i;
UInt32 count, hiCnt;
const UInt32 summFreq = p->MinContext->Union2.SummFreq;
if (summFreq > R->Range)
return PPMD7_SYM_ERROR;
count = RC_GetThreshold(summFreq);
hiCnt = count;
if ((Int32)(count -= s->Freq) < 0)
{
Byte sym;
RC_DecodeFinal(0, s->Freq)
p->FoundState = s;
sym = s->Symbol;
Ppmd7_Update1_0(p);
return sym;
}
p->PrevSuccess = 0;
i = (unsigned)p->MinContext->NumStats - 1;
do
{
if ((Int32)(count -= (++s)->Freq) < 0)
{
Byte sym;
RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq)
p->FoundState = s;
sym = s->Symbol;
Ppmd7_Update1(p);
return sym;
}
}
while (--i);
if (hiCnt >= summFreq)
return PPMD7_SYM_ERROR;
hiCnt -= count;
RC_Decode(hiCnt, summFreq - hiCnt)
p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol);
PPMD_SetAllBitsIn256Bytes(charMask)
// i = p->MinContext->NumStats - 1;
// do { MASK((--s)->Symbol) = 0; } while (--i);
{
CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext);
MASK(s->Symbol) = 0;
do
{
unsigned sym0 = s2[0].Symbol;
unsigned sym1 = s2[1].Symbol;
s2 += 2;
MASK(sym0) = 0;
MASK(sym1) = 0;
}
while (s2 < s);
}
}
else
{
CPpmd_State *s = Ppmd7Context_OneState(p->MinContext);
UInt16 *prob = Ppmd7_GetBinSumm(p);
UInt32 pr = *prob;
UInt32 size0 = (R->Range >> 14) * pr;
pr = PPMD_UPDATE_PROB_1(pr);
if (R->Code < size0)
{
Byte sym;
*prob = (UInt16)(pr + (1 << PPMD_INT_BITS));
// RangeDec_DecodeBit0(size0);
R->Range = size0;
RC_NORM(R)
// sym = (p->FoundState = Ppmd7Context_OneState(p->MinContext))->Symbol;
// Ppmd7_UpdateBin(p);
{
unsigned freq = s->Freq;
CTX_PTR c = CTX(SUCCESSOR(s));
sym = s->Symbol;
p->FoundState = s;
p->PrevSuccess = 1;
p->RunLength++;
s->Freq = (Byte)(freq + (freq < 128));
// NextContext(p);
if (p->OrderFall == 0 && (const Byte *)c > p->Text)
p->MaxContext = p->MinContext = c;
else
Ppmd7_UpdateModel(p);
}
return sym;
}
*prob = (UInt16)pr;
p->InitEsc = p->ExpEscape[pr >> 10];
// RangeDec_DecodeBit1(size0);
R->Low += size0;
R->Code -= size0;
R->Range = (R->Range & ~((UInt32)PPMD_BIN_SCALE - 1)) - size0;
RC_NORM_LOCAL(R)
PPMD_SetAllBitsIn256Bytes(charMask)
MASK(Ppmd7Context_OneState(p->MinContext)->Symbol) = 0;
p->PrevSuccess = 0;
}
for (;;)
{
CPpmd_State *s, *s2;
UInt32 freqSum, count, hiCnt;
CPpmd_See *see;
CPpmd7_Context *mc;
unsigned numMasked;
RC_NORM_REMOTE(R)
mc = p->MinContext;
numMasked = mc->NumStats;
do
{
p->OrderFall++;
if (!mc->Suffix)
return PPMD7_SYM_END;
mc = Ppmd7_GetContext(p, mc->Suffix);
}
while (mc->NumStats == numMasked);
s = Ppmd7_GetStats(p, mc);
{
unsigned num = mc->NumStats;
unsigned num2 = num / 2;
num &= 1;
hiCnt = (s->Freq & (unsigned)(MASK(s->Symbol))) & (0 - (UInt32)num);
s += num;
p->MinContext = mc;
do
{
unsigned sym0 = s[0].Symbol;
unsigned sym1 = s[1].Symbol;
s += 2;
hiCnt += (s[-2].Freq & (unsigned)(MASK(sym0)));
hiCnt += (s[-1].Freq & (unsigned)(MASK(sym1)));
}
while (--num2);
}
see = Ppmd7_MakeEscFreq(p, numMasked, &freqSum);
freqSum += hiCnt;
if (freqSum > R->Range)
return PPMD7_SYM_ERROR;
count = RC_GetThreshold(freqSum);
if (count < hiCnt)
{
Byte sym;
s = Ppmd7_GetStats(p, p->MinContext);
hiCnt = count;
// count -= s->Freq & (unsigned)(MASK(s->Symbol));
// if ((Int32)count >= 0)
{
for (;;)
{
count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
// count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
}
}
s--;
RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq)
// new (see->Summ) value can overflow over 16-bits in some rare cases
Ppmd_See_UPDATE(see)
p->FoundState = s;
sym = s->Symbol;
Ppmd7_Update2(p);
return sym;
}
if (count >= freqSum)
return PPMD7_SYM_ERROR;
RC_Decode(hiCnt, freqSum - hiCnt)
// We increase (see->Summ) for sum of Freqs of all non_Masked symbols.
// new (see->Summ) value can overflow over 16-bits in some rare cases
see->Summ = (UInt16)(see->Summ + freqSum);
s = Ppmd7_GetStats(p, p->MinContext);
s2 = s + p->MinContext->NumStats;
do
{
MASK(s->Symbol) = 0;
s++;
}
while (s != s2);
}
}
#undef kTop
#undef kBot
#undef READ_BYTE
#undef RC_NORM_BASE
#undef RC_NORM_1
#undef RC_NORM
#undef RC_NORM_LOCAL
#undef RC_NORM_REMOTE
#undef R
#undef RC_Decode
#undef RC_DecodeFinal
#undef RC_GetThreshold
#undef CTX
#undef SUCCESSOR
#undef MASK

1565
3rdparty/7z/src/Ppmd8.c vendored Normal file

File diff suppressed because it is too large Load diff

181
3rdparty/7z/src/Ppmd8.h vendored Normal file
View file

@ -0,0 +1,181 @@
/* Ppmd8.h -- Ppmd8 (PPMdI) compression codec
2023-04-02 : Igor Pavlov : Public domain
This code is based on:
PPMd var.I (2002): Dmitry Shkarin : Public domain
Carryless rangecoder (1999): Dmitry Subbotin : Public domain */
#ifndef ZIP7_INC_PPMD8_H
#define ZIP7_INC_PPMD8_H
#include "Ppmd.h"
EXTERN_C_BEGIN
#define PPMD8_MIN_ORDER 2
#define PPMD8_MAX_ORDER 16
struct CPpmd8_Context_;
typedef Ppmd_Ref_Type(struct CPpmd8_Context_) CPpmd8_Context_Ref;
// MY_CPU_pragma_pack_push_1
typedef struct CPpmd8_Context_
{
Byte NumStats;
Byte Flags;
union
{
UInt16 SummFreq;
CPpmd_State2 State2;
} Union2;
union
{
CPpmd_State_Ref Stats;
CPpmd_State4 State4;
} Union4;
CPpmd8_Context_Ref Suffix;
} CPpmd8_Context;
// MY_CPU_pragma_pop
#define Ppmd8Context_OneState(p) ((CPpmd_State *)&(p)->Union2)
/* PPMdI code rev.2 contains the fix over PPMdI code rev.1.
But the code PPMdI.2 is not compatible with PPMdI.1 for some files compressed
in FREEZE mode. So we disable FREEZE mode support. */
// #define PPMD8_FREEZE_SUPPORT
enum
{
PPMD8_RESTORE_METHOD_RESTART,
PPMD8_RESTORE_METHOD_CUT_OFF
#ifdef PPMD8_FREEZE_SUPPORT
, PPMD8_RESTORE_METHOD_FREEZE
#endif
, PPMD8_RESTORE_METHOD_UNSUPPPORTED
};
typedef struct
{
CPpmd8_Context *MinContext, *MaxContext;
CPpmd_State *FoundState;
unsigned OrderFall, InitEsc, PrevSuccess, MaxOrder, RestoreMethod;
Int32 RunLength, InitRL; /* must be 32-bit at least */
UInt32 Size;
UInt32 GlueCount;
UInt32 AlignOffset;
Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart;
UInt32 Range;
UInt32 Code;
UInt32 Low;
union
{
IByteInPtr In;
IByteOutPtr Out;
} Stream;
Byte Indx2Units[PPMD_NUM_INDEXES + 2]; // +2 for alignment
Byte Units2Indx[128];
CPpmd_Void_Ref FreeList[PPMD_NUM_INDEXES];
UInt32 Stamps[PPMD_NUM_INDEXES];
Byte NS2BSIndx[256], NS2Indx[260];
Byte ExpEscape[16];
CPpmd_See DummySee, See[24][32];
UInt16 BinSumm[25][64];
} CPpmd8;
void Ppmd8_Construct(CPpmd8 *p);
BoolInt Ppmd8_Alloc(CPpmd8 *p, UInt32 size, ISzAllocPtr alloc);
void Ppmd8_Free(CPpmd8 *p, ISzAllocPtr alloc);
void Ppmd8_Init(CPpmd8 *p, unsigned maxOrder, unsigned restoreMethod);
#define Ppmd8_WasAllocated(p) ((p)->Base != NULL)
/* ---------- Internal Functions ---------- */
#define Ppmd8_GetPtr(p, ptr) Ppmd_GetPtr(p, ptr)
#define Ppmd8_GetContext(p, ptr) Ppmd_GetPtr_Type(p, ptr, CPpmd8_Context)
#define Ppmd8_GetStats(p, ctx) Ppmd_GetPtr_Type(p, (ctx)->Union4.Stats, CPpmd_State)
void Ppmd8_Update1(CPpmd8 *p);
void Ppmd8_Update1_0(CPpmd8 *p);
void Ppmd8_Update2(CPpmd8 *p);
#define Ppmd8_GetBinSumm(p) \
&p->BinSumm[p->NS2Indx[(size_t)Ppmd8Context_OneState(p->MinContext)->Freq - 1]] \
[ p->PrevSuccess + ((p->RunLength >> 26) & 0x20) \
+ p->NS2BSIndx[Ppmd8_GetContext(p, p->MinContext->Suffix)->NumStats] + \
+ p->MinContext->Flags ]
CPpmd_See *Ppmd8_MakeEscFreq(CPpmd8 *p, unsigned numMasked, UInt32 *scale);
/* 20.01: the original PPMdI encoder and decoder probably could work incorrectly in some rare cases,
where the original PPMdI code can give "Divide by Zero" operation.
We use the following fix to allow correct working of encoder and decoder in any cases.
We correct (Escape_Freq) and (_sum_), if (_sum_) is larger than p->Range) */
#define PPMD8_CORRECT_SUM_RANGE(p, _sum_) if (_sum_ > p->Range /* /1 */) _sum_ = p->Range;
/* ---------- Decode ---------- */
#define PPMD8_SYM_END (-1)
#define PPMD8_SYM_ERROR (-2)
/*
You must set (CPpmd8::Stream.In) before Ppmd8_RangeDec_Init()
Ppmd8_DecodeSymbol()
out:
>= 0 : decoded byte
-1 : PPMD8_SYM_END : End of payload marker
-2 : PPMD8_SYM_ERROR : Data error
*/
BoolInt Ppmd8_Init_RangeDec(CPpmd8 *p);
#define Ppmd8_RangeDec_IsFinishedOK(p) ((p)->Code == 0)
int Ppmd8_DecodeSymbol(CPpmd8 *p);
/* ---------- Encode ---------- */
#define Ppmd8_Init_RangeEnc(p) { (p)->Low = 0; (p)->Range = 0xFFFFFFFF; }
void Ppmd8_Flush_RangeEnc(CPpmd8 *p);
void Ppmd8_EncodeSymbol(CPpmd8 *p, int symbol);
EXTERN_C_END
#endif

295
3rdparty/7z/src/Ppmd8Dec.c vendored Normal file
View file

@ -0,0 +1,295 @@
/* Ppmd8Dec.c -- Ppmd8 (PPMdI) Decoder
2023-04-02 : Igor Pavlov : Public domain
This code is based on:
PPMd var.I (2002): Dmitry Shkarin : Public domain
Carryless rangecoder (1999): Dmitry Subbotin : Public domain */
#include "Precomp.h"
#include "Ppmd8.h"
#define kTop ((UInt32)1 << 24)
#define kBot ((UInt32)1 << 15)
#define READ_BYTE(p) IByteIn_Read((p)->Stream.In)
BoolInt Ppmd8_Init_RangeDec(CPpmd8 *p)
{
unsigned i;
p->Code = 0;
p->Range = 0xFFFFFFFF;
p->Low = 0;
for (i = 0; i < 4; i++)
p->Code = (p->Code << 8) | READ_BYTE(p);
return (p->Code < 0xFFFFFFFF);
}
#define RC_NORM(p) \
while ((p->Low ^ (p->Low + p->Range)) < kTop \
|| (p->Range < kBot && ((p->Range = (0 - p->Low) & (kBot - 1)), 1))) { \
p->Code = (p->Code << 8) | READ_BYTE(p); \
p->Range <<= 8; p->Low <<= 8; }
// we must use only one type of Normalization from two: LOCAL or REMOTE
#define RC_NORM_LOCAL(p) // RC_NORM(p)
#define RC_NORM_REMOTE(p) RC_NORM(p)
#define R p
Z7_FORCE_INLINE
// Z7_NO_INLINE
static void Ppmd8_RD_Decode(CPpmd8 *p, UInt32 start, UInt32 size)
{
start *= R->Range;
R->Low += start;
R->Code -= start;
R->Range *= size;
RC_NORM_LOCAL(R)
}
#define RC_Decode(start, size) Ppmd8_RD_Decode(p, start, size);
#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R)
#define RC_GetThreshold(total) (R->Code / (R->Range /= (total)))
#define CTX(ref) ((CPpmd8_Context *)Ppmd8_GetContext(p, ref))
// typedef CPpmd8_Context * CTX_PTR;
#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
void Ppmd8_UpdateModel(CPpmd8 *p);
#define MASK(sym) ((unsigned char *)charMask)[sym]
int Ppmd8_DecodeSymbol(CPpmd8 *p)
{
size_t charMask[256 / sizeof(size_t)];
if (p->MinContext->NumStats != 0)
{
CPpmd_State *s = Ppmd8_GetStats(p, p->MinContext);
unsigned i;
UInt32 count, hiCnt;
UInt32 summFreq = p->MinContext->Union2.SummFreq;
PPMD8_CORRECT_SUM_RANGE(p, summFreq)
count = RC_GetThreshold(summFreq);
hiCnt = count;
if ((Int32)(count -= s->Freq) < 0)
{
Byte sym;
RC_DecodeFinal(0, s->Freq)
p->FoundState = s;
sym = s->Symbol;
Ppmd8_Update1_0(p);
return sym;
}
p->PrevSuccess = 0;
i = p->MinContext->NumStats;
do
{
if ((Int32)(count -= (++s)->Freq) < 0)
{
Byte sym;
RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq)
p->FoundState = s;
sym = s->Symbol;
Ppmd8_Update1(p);
return sym;
}
}
while (--i);
if (hiCnt >= summFreq)
return PPMD8_SYM_ERROR;
hiCnt -= count;
RC_Decode(hiCnt, summFreq - hiCnt)
PPMD_SetAllBitsIn256Bytes(charMask)
// i = p->MinContext->NumStats - 1;
// do { MASK((--s)->Symbol) = 0; } while (--i);
{
CPpmd_State *s2 = Ppmd8_GetStats(p, p->MinContext);
MASK(s->Symbol) = 0;
do
{
unsigned sym0 = s2[0].Symbol;
unsigned sym1 = s2[1].Symbol;
s2 += 2;
MASK(sym0) = 0;
MASK(sym1) = 0;
}
while (s2 < s);
}
}
else
{
CPpmd_State *s = Ppmd8Context_OneState(p->MinContext);
UInt16 *prob = Ppmd8_GetBinSumm(p);
UInt32 pr = *prob;
UInt32 size0 = (R->Range >> 14) * pr;
pr = PPMD_UPDATE_PROB_1(pr);
if (R->Code < size0)
{
Byte sym;
*prob = (UInt16)(pr + (1 << PPMD_INT_BITS));
// RangeDec_DecodeBit0(size0);
R->Range = size0;
RC_NORM(R)
// sym = (p->FoundState = Ppmd8Context_OneState(p->MinContext))->Symbol;
// Ppmd8_UpdateBin(p);
{
unsigned freq = s->Freq;
CPpmd8_Context *c = CTX(SUCCESSOR(s));
sym = s->Symbol;
p->FoundState = s;
p->PrevSuccess = 1;
p->RunLength++;
s->Freq = (Byte)(freq + (freq < 196));
// NextContext(p);
if (p->OrderFall == 0 && (const Byte *)c >= p->UnitsStart)
p->MaxContext = p->MinContext = c;
else
Ppmd8_UpdateModel(p);
}
return sym;
}
*prob = (UInt16)pr;
p->InitEsc = p->ExpEscape[pr >> 10];
// RangeDec_DecodeBit1(rc2, size0);
R->Low += size0;
R->Code -= size0;
R->Range = (R->Range & ~((UInt32)PPMD_BIN_SCALE - 1)) - size0;
RC_NORM_LOCAL(R)
PPMD_SetAllBitsIn256Bytes(charMask)
MASK(Ppmd8Context_OneState(p->MinContext)->Symbol) = 0;
p->PrevSuccess = 0;
}
for (;;)
{
CPpmd_State *s, *s2;
UInt32 freqSum, count, hiCnt;
UInt32 freqSum2;
CPpmd_See *see;
CPpmd8_Context *mc;
unsigned numMasked;
RC_NORM_REMOTE(R)
mc = p->MinContext;
numMasked = mc->NumStats;
do
{
p->OrderFall++;
if (!mc->Suffix)
return PPMD8_SYM_END;
mc = Ppmd8_GetContext(p, mc->Suffix);
}
while (mc->NumStats == numMasked);
s = Ppmd8_GetStats(p, mc);
{
unsigned num = (unsigned)mc->NumStats + 1;
unsigned num2 = num / 2;
num &= 1;
hiCnt = (s->Freq & (unsigned)(MASK(s->Symbol))) & (0 - (UInt32)num);
s += num;
p->MinContext = mc;
do
{
unsigned sym0 = s[0].Symbol;
unsigned sym1 = s[1].Symbol;
s += 2;
hiCnt += (s[-2].Freq & (unsigned)(MASK(sym0)));
hiCnt += (s[-1].Freq & (unsigned)(MASK(sym1)));
}
while (--num2);
}
see = Ppmd8_MakeEscFreq(p, numMasked, &freqSum);
freqSum += hiCnt;
freqSum2 = freqSum;
PPMD8_CORRECT_SUM_RANGE(R, freqSum2)
count = RC_GetThreshold(freqSum2);
if (count < hiCnt)
{
Byte sym;
// Ppmd_See_UPDATE(see) // new (see->Summ) value can overflow over 16-bits in some rare cases
s = Ppmd8_GetStats(p, p->MinContext);
hiCnt = count;
{
for (;;)
{
count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
// count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
}
}
s--;
RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq)
// new (see->Summ) value can overflow over 16-bits in some rare cases
Ppmd_See_UPDATE(see)
p->FoundState = s;
sym = s->Symbol;
Ppmd8_Update2(p);
return sym;
}
if (count >= freqSum2)
return PPMD8_SYM_ERROR;
RC_Decode(hiCnt, freqSum2 - hiCnt)
// We increase (see->Summ) for sum of Freqs of all non_Masked symbols.
// new (see->Summ) value can overflow over 16-bits in some rare cases
see->Summ = (UInt16)(see->Summ + freqSum);
s = Ppmd8_GetStats(p, p->MinContext);
s2 = s + p->MinContext->NumStats + 1;
do
{
MASK(s->Symbol) = 0;
s++;
}
while (s != s2);
}
}
#undef kTop
#undef kBot
#undef READ_BYTE
#undef RC_NORM_BASE
#undef RC_NORM_1
#undef RC_NORM
#undef RC_NORM_LOCAL
#undef RC_NORM_REMOTE
#undef R
#undef RC_Decode
#undef RC_DecodeFinal
#undef RC_GetThreshold
#undef CTX
#undef SUCCESSOR
#undef MASK

338
3rdparty/7z/src/Ppmd8Enc.c vendored Normal file
View file

@ -0,0 +1,338 @@
/* Ppmd8Enc.c -- Ppmd8 (PPMdI) Encoder
2023-04-02 : Igor Pavlov : Public domain
This code is based on:
PPMd var.I (2002): Dmitry Shkarin : Public domain
Carryless rangecoder (1999): Dmitry Subbotin : Public domain */
#include "Precomp.h"
#include "Ppmd8.h"
#define kTop ((UInt32)1 << 24)
#define kBot ((UInt32)1 << 15)
#define WRITE_BYTE(p) IByteOut_Write(p->Stream.Out, (Byte)(p->Low >> 24))
void Ppmd8_Flush_RangeEnc(CPpmd8 *p)
{
unsigned i;
for (i = 0; i < 4; i++, p->Low <<= 8 )
WRITE_BYTE(p);
}
#define RC_NORM(p) \
while ((p->Low ^ (p->Low + p->Range)) < kTop \
|| (p->Range < kBot && ((p->Range = (0 - p->Low) & (kBot - 1)), 1))) \
{ WRITE_BYTE(p); p->Range <<= 8; p->Low <<= 8; }
// we must use only one type of Normalization from two: LOCAL or REMOTE
#define RC_NORM_LOCAL(p) // RC_NORM(p)
#define RC_NORM_REMOTE(p) RC_NORM(p)
// #define RC_PRE(total) p->Range /= total;
// #define RC_PRE(total)
#define R p
Z7_FORCE_INLINE
// Z7_NO_INLINE
static void Ppmd8_RangeEnc_Encode(CPpmd8 *p, UInt32 start, UInt32 size, UInt32 total)
{
R->Low += start * (R->Range /= total);
R->Range *= size;
RC_NORM_LOCAL(R)
}
#define RC_Encode(start, size, total) Ppmd8_RangeEnc_Encode(p, start, size, total);
#define RC_EncodeFinal(start, size, total) RC_Encode(start, size, total) RC_NORM_REMOTE(p)
#define CTX(ref) ((CPpmd8_Context *)Ppmd8_GetContext(p, ref))
// typedef CPpmd8_Context * CTX_PTR;
#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
void Ppmd8_UpdateModel(CPpmd8 *p);
#define MASK(sym) ((unsigned char *)charMask)[sym]
// Z7_FORCE_INLINE
// static
void Ppmd8_EncodeSymbol(CPpmd8 *p, int symbol)
{
size_t charMask[256 / sizeof(size_t)];
if (p->MinContext->NumStats != 0)
{
CPpmd_State *s = Ppmd8_GetStats(p, p->MinContext);
UInt32 sum;
unsigned i;
UInt32 summFreq = p->MinContext->Union2.SummFreq;
PPMD8_CORRECT_SUM_RANGE(p, summFreq)
// RC_PRE(summFreq);
if (s->Symbol == symbol)
{
RC_EncodeFinal(0, s->Freq, summFreq)
p->FoundState = s;
Ppmd8_Update1_0(p);
return;
}
p->PrevSuccess = 0;
sum = s->Freq;
i = p->MinContext->NumStats;
do
{
if ((++s)->Symbol == symbol)
{
RC_EncodeFinal(sum, s->Freq, summFreq)
p->FoundState = s;
Ppmd8_Update1(p);
return;
}
sum += s->Freq;
}
while (--i);
RC_Encode(sum, summFreq - sum, summFreq)
PPMD_SetAllBitsIn256Bytes(charMask)
// MASK(s->Symbol) = 0;
// i = p->MinContext->NumStats;
// do { MASK((--s)->Symbol) = 0; } while (--i);
{
CPpmd_State *s2 = Ppmd8_GetStats(p, p->MinContext);
MASK(s->Symbol) = 0;
do
{
unsigned sym0 = s2[0].Symbol;
unsigned sym1 = s2[1].Symbol;
s2 += 2;
MASK(sym0) = 0;
MASK(sym1) = 0;
}
while (s2 < s);
}
}
else
{
UInt16 *prob = Ppmd8_GetBinSumm(p);
CPpmd_State *s = Ppmd8Context_OneState(p->MinContext);
UInt32 pr = *prob;
const UInt32 bound = (R->Range >> 14) * pr;
pr = PPMD_UPDATE_PROB_1(pr);
if (s->Symbol == symbol)
{
*prob = (UInt16)(pr + (1 << PPMD_INT_BITS));
// RangeEnc_EncodeBit_0(p, bound);
R->Range = bound;
RC_NORM(R)
// p->FoundState = s;
// Ppmd8_UpdateBin(p);
{
const unsigned freq = s->Freq;
CPpmd8_Context *c = CTX(SUCCESSOR(s));
p->FoundState = s;
p->PrevSuccess = 1;
p->RunLength++;
s->Freq = (Byte)(freq + (freq < 196)); // Ppmd8 (196)
// NextContext(p);
if (p->OrderFall == 0 && (const Byte *)c >= p->UnitsStart)
p->MaxContext = p->MinContext = c;
else
Ppmd8_UpdateModel(p);
}
return;
}
*prob = (UInt16)pr;
p->InitEsc = p->ExpEscape[pr >> 10];
// RangeEnc_EncodeBit_1(p, bound);
R->Low += bound;
R->Range = (R->Range & ~((UInt32)PPMD_BIN_SCALE - 1)) - bound;
RC_NORM_LOCAL(R)
PPMD_SetAllBitsIn256Bytes(charMask)
MASK(s->Symbol) = 0;
p->PrevSuccess = 0;
}
for (;;)
{
CPpmd_See *see;
CPpmd_State *s;
UInt32 sum, escFreq;
CPpmd8_Context *mc;
unsigned i, numMasked;
RC_NORM_REMOTE(p)
mc = p->MinContext;
numMasked = mc->NumStats;
do
{
p->OrderFall++;
if (!mc->Suffix)
return; /* EndMarker (symbol = -1) */
mc = Ppmd8_GetContext(p, mc->Suffix);
}
while (mc->NumStats == numMasked);
p->MinContext = mc;
see = Ppmd8_MakeEscFreq(p, numMasked, &escFreq);
s = Ppmd8_GetStats(p, p->MinContext);
sum = 0;
i = (unsigned)p->MinContext->NumStats + 1;
do
{
const unsigned cur = s->Symbol;
if ((int)cur == symbol)
{
const UInt32 low = sum;
const UInt32 freq = s->Freq;
unsigned num2;
Ppmd_See_UPDATE(see)
p->FoundState = s;
sum += escFreq;
num2 = i / 2;
i &= 1;
sum += freq & (0 - (UInt32)i);
if (num2 != 0)
{
s += i;
for (;;)
{
unsigned sym0 = s[0].Symbol;
unsigned sym1 = s[1].Symbol;
s += 2;
sum += (s[-2].Freq & (unsigned)(MASK(sym0)));
sum += (s[-1].Freq & (unsigned)(MASK(sym1)));
if (--num2 == 0)
break;
}
}
PPMD8_CORRECT_SUM_RANGE(p, sum)
RC_EncodeFinal(low, freq, sum)
Ppmd8_Update2(p);
return;
}
sum += (s->Freq & (unsigned)(MASK(cur)));
s++;
}
while (--i);
{
UInt32 total = sum + escFreq;
see->Summ = (UInt16)(see->Summ + total);
PPMD8_CORRECT_SUM_RANGE(p, total)
RC_Encode(sum, total - sum, total)
}
{
const CPpmd_State *s2 = Ppmd8_GetStats(p, p->MinContext);
s--;
MASK(s->Symbol) = 0;
do
{
const unsigned sym0 = s2[0].Symbol;
const unsigned sym1 = s2[1].Symbol;
s2 += 2;
MASK(sym0) = 0;
MASK(sym1) = 0;
}
while (s2 < s);
}
}
}
#undef kTop
#undef kBot
#undef WRITE_BYTE
#undef RC_NORM_BASE
#undef RC_NORM_1
#undef RC_NORM
#undef RC_NORM_LOCAL
#undef RC_NORM_REMOTE
#undef R
#undef RC_Encode
#undef RC_EncodeFinal
#undef CTX
#undef SUCCESSOR
#undef MASK

View file

@ -1,8 +1,8 @@
/* Precomp.h -- StdAfx /* Precomp.h -- StdAfx
2013-11-12 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#ifndef __7Z_PRECOMP_H #ifndef ZIP7_INC_PRECOMP_H
#define __7Z_PRECOMP_H #define ZIP7_INC_PRECOMP_H
#include "Compiler.h" #include "Compiler.h"
/* #include "7zTypes.h" */ /* #include "7zTypes.h" */

View file

@ -1,14 +1,14 @@
/* RotateDefs.h -- Rotate functions /* RotateDefs.h -- Rotate functions
2015-03-25 : Igor Pavlov : Public domain */ 2023-06-18 : Igor Pavlov : Public domain */
#ifndef __ROTATE_DEFS_H #ifndef ZIP7_INC_ROTATE_DEFS_H
#define __ROTATE_DEFS_H #define ZIP7_INC_ROTATE_DEFS_H
#ifdef _MSC_VER #ifdef _MSC_VER
#include <stdlib.h> #include <stdlib.h>
/* don't use _rotl with MINGW. It can insert slow call to function. */ /* don't use _rotl with old MINGW. It can insert slow call to function. */
/* #if (_MSC_VER >= 1200) */ /* #if (_MSC_VER >= 1200) */
#pragma intrinsic(_rotl) #pragma intrinsic(_rotl)
@ -18,12 +18,32 @@
#define rotlFixed(x, n) _rotl((x), (n)) #define rotlFixed(x, n) _rotl((x), (n))
#define rotrFixed(x, n) _rotr((x), (n)) #define rotrFixed(x, n) _rotr((x), (n))
#if (_MSC_VER >= 1300)
#define Z7_ROTL64(x, n) _rotl64((x), (n))
#define Z7_ROTR64(x, n) _rotr64((x), (n))
#else
#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n))))
#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n))))
#endif
#else #else
/* new compilers can translate these macros to fast commands. */ /* new compilers can translate these macros to fast commands. */
#if defined(__clang__) && (__clang_major__ >= 4) \
|| defined(__GNUC__) && (__GNUC__ >= 5)
/* GCC 4.9.0 and clang 3.5 can recognize more correct version: */
#define rotlFixed(x, n) (((x) << (n)) | ((x) >> (-(n) & 31)))
#define rotrFixed(x, n) (((x) >> (n)) | ((x) << (-(n) & 31)))
#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (-(n) & 63)))
#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (-(n) & 63)))
#else
/* for old GCC / clang: */
#define rotlFixed(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) #define rotlFixed(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
#define rotrFixed(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) #define rotrFixed(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n))))
#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n))))
#endif
#endif #endif

498
3rdparty/7z/src/Sha1.c vendored Normal file
View file

@ -0,0 +1,498 @@
/* Sha1.c -- SHA-1 Hash
2023-04-02 : Igor Pavlov : Public domain
This code is based on public domain code of Steve Reid from Wei Dai's Crypto++ library. */
#include "Precomp.h"
#include <string.h>
#include "CpuArch.h"
#include "RotateDefs.h"
#include "Sha1.h"
#if defined(_MSC_VER) && (_MSC_VER < 1900)
// #define USE_MY_MM
#endif
#ifdef MY_CPU_X86_OR_AMD64
#ifdef _MSC_VER
#if _MSC_VER >= 1200
#define Z7_COMPILER_SHA1_SUPPORTED
#endif
#elif defined(__clang__)
#if (__clang_major__ >= 8) // fix that check
#define Z7_COMPILER_SHA1_SUPPORTED
#endif
#elif defined(__GNUC__)
#if (__GNUC__ >= 8) // fix that check
#define Z7_COMPILER_SHA1_SUPPORTED
#endif
#elif defined(__INTEL_COMPILER)
#if (__INTEL_COMPILER >= 1800) // fix that check
#define Z7_COMPILER_SHA1_SUPPORTED
#endif
#endif
#elif defined(MY_CPU_ARM_OR_ARM64)
#ifdef _MSC_VER
#if _MSC_VER >= 1910 && _MSC_VER >= 1929 && _MSC_FULL_VER >= 192930037
#define Z7_COMPILER_SHA1_SUPPORTED
#endif
#elif defined(__clang__)
#if (__clang_major__ >= 8) // fix that check
#define Z7_COMPILER_SHA1_SUPPORTED
#endif
#elif defined(__GNUC__)
#if (__GNUC__ >= 6) // fix that check
#define Z7_COMPILER_SHA1_SUPPORTED
#endif
#endif
#endif
void Z7_FASTCALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks);
#ifdef Z7_COMPILER_SHA1_SUPPORTED
void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks);
static SHA1_FUNC_UPDATE_BLOCKS g_SHA1_FUNC_UPDATE_BLOCKS = Sha1_UpdateBlocks;
static SHA1_FUNC_UPDATE_BLOCKS g_SHA1_FUNC_UPDATE_BLOCKS_HW;
#define SHA1_UPDATE_BLOCKS(p) p->func_UpdateBlocks
#else
#define SHA1_UPDATE_BLOCKS(p) Sha1_UpdateBlocks
#endif
BoolInt Sha1_SetFunction(CSha1 *p, unsigned algo)
{
SHA1_FUNC_UPDATE_BLOCKS func = Sha1_UpdateBlocks;
#ifdef Z7_COMPILER_SHA1_SUPPORTED
if (algo != SHA1_ALGO_SW)
{
if (algo == SHA1_ALGO_DEFAULT)
func = g_SHA1_FUNC_UPDATE_BLOCKS;
else
{
if (algo != SHA1_ALGO_HW)
return False;
func = g_SHA1_FUNC_UPDATE_BLOCKS_HW;
if (!func)
return False;
}
}
#else
if (algo > 1)
return False;
#endif
p->func_UpdateBlocks = func;
return True;
}
/* define it for speed optimization */
// #define Z7_SHA1_UNROLL
// allowed unroll steps: (1, 2, 4, 5, 20)
#undef Z7_SHA1_BIG_W
#ifdef Z7_SHA1_UNROLL
#define STEP_PRE 20
#define STEP_MAIN 20
#else
#define Z7_SHA1_BIG_W
#define STEP_PRE 5
#define STEP_MAIN 5
#endif
#ifdef Z7_SHA1_BIG_W
#define kNumW 80
#define w(i) W[i]
#else
#define kNumW 16
#define w(i) W[(i)&15]
#endif
#define w0(i) (W[i] = GetBe32(data + (size_t)(i) * 4))
#define w1(i) (w(i) = rotlFixed(w((size_t)(i)-3) ^ w((size_t)(i)-8) ^ w((size_t)(i)-14) ^ w((size_t)(i)-16), 1))
#define f0(x,y,z) ( 0x5a827999 + (z^(x&(y^z))) )
#define f1(x,y,z) ( 0x6ed9eba1 + (x^y^z) )
#define f2(x,y,z) ( 0x8f1bbcdc + ((x&y)|(z&(x|y))) )
#define f3(x,y,z) ( 0xca62c1d6 + (x^y^z) )
/*
#define T1(fx, ww) \
tmp = e + fx(b,c,d) + ww + rotlFixed(a, 5); \
e = d; \
d = c; \
c = rotlFixed(b, 30); \
b = a; \
a = tmp; \
*/
#define T5(a,b,c,d,e, fx, ww) \
e += fx(b,c,d) + ww + rotlFixed(a, 5); \
b = rotlFixed(b, 30); \
/*
#define R1(i, fx, wx) \
T1 ( fx, wx(i)); \
#define R2(i, fx, wx) \
R1 ( (i) , fx, wx); \
R1 ( (i) + 1, fx, wx); \
#define R4(i, fx, wx) \
R2 ( (i) , fx, wx); \
R2 ( (i) + 2, fx, wx); \
*/
#define M5(i, fx, wx0, wx1) \
T5 ( a,b,c,d,e, fx, wx0((i) ) ) \
T5 ( e,a,b,c,d, fx, wx1((i)+1) ) \
T5 ( d,e,a,b,c, fx, wx1((i)+2) ) \
T5 ( c,d,e,a,b, fx, wx1((i)+3) ) \
T5 ( b,c,d,e,a, fx, wx1((i)+4) ) \
#define R5(i, fx, wx) \
M5 ( i, fx, wx, wx) \
#if STEP_PRE > 5
#define R20_START \
R5 ( 0, f0, w0) \
R5 ( 5, f0, w0) \
R5 ( 10, f0, w0) \
M5 ( 15, f0, w0, w1) \
#elif STEP_PRE == 5
#define R20_START \
{ size_t i; for (i = 0; i < 15; i += STEP_PRE) \
{ R5(i, f0, w0) } } \
M5 ( 15, f0, w0, w1) \
#else
#if STEP_PRE == 1
#define R_PRE R1
#elif STEP_PRE == 2
#define R_PRE R2
#elif STEP_PRE == 4
#define R_PRE R4
#endif
#define R20_START \
{ size_t i; for (i = 0; i < 16; i += STEP_PRE) \
{ R_PRE(i, f0, w0) } } \
R4 ( 16, f0, w1) \
#endif
#if STEP_MAIN > 5
#define R20(ii, fx) \
R5 ( (ii) , fx, w1) \
R5 ( (ii) + 5 , fx, w1) \
R5 ( (ii) + 10, fx, w1) \
R5 ( (ii) + 15, fx, w1) \
#else
#if STEP_MAIN == 1
#define R_MAIN R1
#elif STEP_MAIN == 2
#define R_MAIN R2
#elif STEP_MAIN == 4
#define R_MAIN R4
#elif STEP_MAIN == 5
#define R_MAIN R5
#endif
#define R20(ii, fx) \
{ size_t i; for (i = (ii); i < (ii) + 20; i += STEP_MAIN) \
{ R_MAIN(i, fx, w1) } } \
#endif
void Sha1_InitState(CSha1 *p)
{
p->count = 0;
p->state[0] = 0x67452301;
p->state[1] = 0xEFCDAB89;
p->state[2] = 0x98BADCFE;
p->state[3] = 0x10325476;
p->state[4] = 0xC3D2E1F0;
}
void Sha1_Init(CSha1 *p)
{
p->func_UpdateBlocks =
#ifdef Z7_COMPILER_SHA1_SUPPORTED
g_SHA1_FUNC_UPDATE_BLOCKS;
#else
NULL;
#endif
Sha1_InitState(p);
}
Z7_NO_INLINE
void Z7_FASTCALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks)
{
UInt32 a, b, c, d, e;
UInt32 W[kNumW];
// if (numBlocks != 0x1264378347) return;
if (numBlocks == 0)
return;
a = state[0];
b = state[1];
c = state[2];
d = state[3];
e = state[4];
do
{
#if STEP_PRE < 5 || STEP_MAIN < 5
UInt32 tmp;
#endif
R20_START
R20(20, f1)
R20(40, f2)
R20(60, f3)
a += state[0];
b += state[1];
c += state[2];
d += state[3];
e += state[4];
state[0] = a;
state[1] = b;
state[2] = c;
state[3] = d;
state[4] = e;
data += 64;
}
while (--numBlocks);
}
#define Sha1_UpdateBlock(p) SHA1_UPDATE_BLOCKS(p)(p->state, p->buffer, 1)
void Sha1_Update(CSha1 *p, const Byte *data, size_t size)
{
if (size == 0)
return;
{
unsigned pos = (unsigned)p->count & 0x3F;
unsigned num;
p->count += size;
num = 64 - pos;
if (num > size)
{
memcpy(p->buffer + pos, data, size);
return;
}
if (pos != 0)
{
size -= num;
memcpy(p->buffer + pos, data, num);
data += num;
Sha1_UpdateBlock(p);
}
}
{
size_t numBlocks = size >> 6;
SHA1_UPDATE_BLOCKS(p)(p->state, data, numBlocks);
size &= 0x3F;
if (size == 0)
return;
data += (numBlocks << 6);
memcpy(p->buffer, data, size);
}
}
void Sha1_Final(CSha1 *p, Byte *digest)
{
unsigned pos = (unsigned)p->count & 0x3F;
p->buffer[pos++] = 0x80;
if (pos > (64 - 8))
{
while (pos != 64) { p->buffer[pos++] = 0; }
// memset(&p->buf.buffer[pos], 0, 64 - pos);
Sha1_UpdateBlock(p);
pos = 0;
}
/*
if (pos & 3)
{
p->buffer[pos] = 0;
p->buffer[pos + 1] = 0;
p->buffer[pos + 2] = 0;
pos += 3;
pos &= ~3;
}
{
for (; pos < 64 - 8; pos += 4)
*(UInt32 *)(&p->buffer[pos]) = 0;
}
*/
memset(&p->buffer[pos], 0, (64 - 8) - pos);
{
const UInt64 numBits = (p->count << 3);
SetBe32(p->buffer + 64 - 8, (UInt32)(numBits >> 32))
SetBe32(p->buffer + 64 - 4, (UInt32)(numBits))
}
Sha1_UpdateBlock(p);
SetBe32(digest, p->state[0])
SetBe32(digest + 4, p->state[1])
SetBe32(digest + 8, p->state[2])
SetBe32(digest + 12, p->state[3])
SetBe32(digest + 16, p->state[4])
Sha1_InitState(p);
}
void Sha1_PrepareBlock(const CSha1 *p, Byte *block, unsigned size)
{
const UInt64 numBits = (p->count + size) << 3;
SetBe32(&((UInt32 *)(void *)block)[SHA1_NUM_BLOCK_WORDS - 2], (UInt32)(numBits >> 32))
SetBe32(&((UInt32 *)(void *)block)[SHA1_NUM_BLOCK_WORDS - 1], (UInt32)(numBits))
// SetBe32((UInt32 *)(block + size), 0x80000000);
SetUi32((UInt32 *)(void *)(block + size), 0x80)
size += 4;
while (size != (SHA1_NUM_BLOCK_WORDS - 2) * 4)
{
*((UInt32 *)(void *)(block + size)) = 0;
size += 4;
}
}
void Sha1_GetBlockDigest(const CSha1 *p, const Byte *data, Byte *destDigest)
{
MY_ALIGN (16)
UInt32 st[SHA1_NUM_DIGEST_WORDS];
st[0] = p->state[0];
st[1] = p->state[1];
st[2] = p->state[2];
st[3] = p->state[3];
st[4] = p->state[4];
SHA1_UPDATE_BLOCKS(p)(st, data, 1);
SetBe32(destDigest + 0 , st[0])
SetBe32(destDigest + 1 * 4, st[1])
SetBe32(destDigest + 2 * 4, st[2])
SetBe32(destDigest + 3 * 4, st[3])
SetBe32(destDigest + 4 * 4, st[4])
}
void Sha1Prepare(void)
{
#ifdef Z7_COMPILER_SHA1_SUPPORTED
SHA1_FUNC_UPDATE_BLOCKS f, f_hw;
f = Sha1_UpdateBlocks;
f_hw = NULL;
#ifdef MY_CPU_X86_OR_AMD64
#ifndef USE_MY_MM
if (CPU_IsSupported_SHA()
&& CPU_IsSupported_SSSE3()
// && CPU_IsSupported_SSE41()
)
#endif
#else
if (CPU_IsSupported_SHA1())
#endif
{
// printf("\n========== HW SHA1 ======== \n");
#if defined(MY_CPU_ARM_OR_ARM64) && defined(_MSC_VER)
/* there was bug in MSVC compiler for ARM64 -O2 before version VS2019 16.10 (19.29.30037).
It generated incorrect SHA-1 code.
21.03 : we test sha1-hardware code at runtime initialization */
#pragma message("== SHA1 code: MSC compiler : failure-check code was inserted")
UInt32 state[5] = { 0, 1, 2, 3, 4 } ;
Byte data[64];
unsigned i;
for (i = 0; i < sizeof(data); i += 2)
{
data[i ] = (Byte)(i);
data[i + 1] = (Byte)(i + 1);
}
Sha1_UpdateBlocks_HW(state, data, sizeof(data) / 64);
if ( state[0] != 0x9acd7297
|| state[1] != 0x4624d898
|| state[2] != 0x0bf079f0
|| state[3] != 0x031e61b3
|| state[4] != 0x8323fe20)
{
// printf("\n========== SHA-1 hardware version failure ======== \n");
}
else
#endif
{
f = f_hw = Sha1_UpdateBlocks_HW;
}
}
g_SHA1_FUNC_UPDATE_BLOCKS = f;
g_SHA1_FUNC_UPDATE_BLOCKS_HW = f_hw;
#endif
}
#undef kNumW
#undef w
#undef w0
#undef w1
#undef f0
#undef f1
#undef f2
#undef f3
#undef T1
#undef T5
#undef M5
#undef R1
#undef R2
#undef R4
#undef R5
#undef R20_START
#undef R_PRE
#undef R_MAIN
#undef STEP_PRE
#undef STEP_MAIN
#undef Z7_SHA1_BIG_W
#undef Z7_SHA1_UNROLL
#undef Z7_COMPILER_SHA1_SUPPORTED

76
3rdparty/7z/src/Sha1.h vendored Normal file
View file

@ -0,0 +1,76 @@
/* Sha1.h -- SHA-1 Hash
2023-04-02 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_SHA1_H
#define ZIP7_INC_SHA1_H
#include "7zTypes.h"
EXTERN_C_BEGIN
#define SHA1_NUM_BLOCK_WORDS 16
#define SHA1_NUM_DIGEST_WORDS 5
#define SHA1_BLOCK_SIZE (SHA1_NUM_BLOCK_WORDS * 4)
#define SHA1_DIGEST_SIZE (SHA1_NUM_DIGEST_WORDS * 4)
typedef void (Z7_FASTCALL *SHA1_FUNC_UPDATE_BLOCKS)(UInt32 state[5], const Byte *data, size_t numBlocks);
/*
if (the system supports different SHA1 code implementations)
{
(CSha1::func_UpdateBlocks) will be used
(CSha1::func_UpdateBlocks) can be set by
Sha1_Init() - to default (fastest)
Sha1_SetFunction() - to any algo
}
else
{
(CSha1::func_UpdateBlocks) is ignored.
}
*/
typedef struct
{
SHA1_FUNC_UPDATE_BLOCKS func_UpdateBlocks;
UInt64 count;
UInt64 _pad_2[2];
UInt32 state[SHA1_NUM_DIGEST_WORDS];
UInt32 _pad_3[3];
Byte buffer[SHA1_BLOCK_SIZE];
} CSha1;
#define SHA1_ALGO_DEFAULT 0
#define SHA1_ALGO_SW 1
#define SHA1_ALGO_HW 2
/*
Sha1_SetFunction()
return:
0 - (algo) value is not supported, and func_UpdateBlocks was not changed
1 - func_UpdateBlocks was set according (algo) value.
*/
BoolInt Sha1_SetFunction(CSha1 *p, unsigned algo);
void Sha1_InitState(CSha1 *p);
void Sha1_Init(CSha1 *p);
void Sha1_Update(CSha1 *p, const Byte *data, size_t size);
void Sha1_Final(CSha1 *p, Byte *digest);
void Sha1_PrepareBlock(const CSha1 *p, Byte *block, unsigned size);
void Sha1_GetBlockDigest(const CSha1 *p, const Byte *data, Byte *destDigest);
// void Z7_FASTCALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks);
/*
call Sha1Prepare() once at program start.
It prepares all supported implementations, and detects the fastest implementation.
*/
void Sha1Prepare(void);
EXTERN_C_END
#endif

386
3rdparty/7z/src/Sha1Opt.c vendored Normal file
View file

@ -0,0 +1,386 @@
/* Sha1Opt.c -- SHA-1 optimized code for SHA-1 hardware instructions
2023-04-02 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "Compiler.h"
#include "CpuArch.h"
#if defined(_MSC_VER)
#if (_MSC_VER < 1900) && (_MSC_VER >= 1200)
// #define USE_MY_MM
#endif
#endif
#ifdef MY_CPU_X86_OR_AMD64
#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) // fix that check
#define USE_HW_SHA
#elif defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30800) \
|| defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 50100) \
|| defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900)
#define USE_HW_SHA
#if !defined(_INTEL_COMPILER)
// icc defines __GNUC__, but icc doesn't support __attribute__(__target__)
#if !defined(__SHA__) || !defined(__SSSE3__)
#define ATTRIB_SHA __attribute__((__target__("sha,ssse3")))
#endif
#endif
#elif defined(_MSC_VER)
#ifdef USE_MY_MM
#define USE_VER_MIN 1300
#else
#define USE_VER_MIN 1900
#endif
#if (_MSC_VER >= USE_VER_MIN)
#define USE_HW_SHA
#endif
#endif
// #endif // MY_CPU_X86_OR_AMD64
#ifdef USE_HW_SHA
// #pragma message("Sha1 HW")
// sse/sse2/ssse3:
#include <tmmintrin.h>
// sha*:
#include <immintrin.h>
#if defined (__clang__) && defined(_MSC_VER)
// #if !defined(__SSSE3__)
// #endif
#if !defined(__SHA__)
#include <shaintrin.h>
#endif
#else
#ifdef USE_MY_MM
#include "My_mm.h"
#endif
#endif
/*
SHA1 uses:
SSE2:
_mm_loadu_si128
_mm_storeu_si128
_mm_set_epi32
_mm_add_epi32
_mm_shuffle_epi32 / pshufd
_mm_xor_si128
_mm_cvtsi128_si32
_mm_cvtsi32_si128
SSSE3:
_mm_shuffle_epi8 / pshufb
SHA:
_mm_sha1*
*/
#define XOR_SI128(dest, src) dest = _mm_xor_si128(dest, src);
#define SHUFFLE_EPI8(dest, mask) dest = _mm_shuffle_epi8(dest, mask);
#define SHUFFLE_EPI32(dest, mask) dest = _mm_shuffle_epi32(dest, mask);
#ifdef __clang__
#define SHA1_RNDS4_RET_TYPE_CAST (__m128i)
#else
#define SHA1_RNDS4_RET_TYPE_CAST
#endif
#define SHA1_RND4(abcd, e0, f) abcd = SHA1_RNDS4_RET_TYPE_CAST _mm_sha1rnds4_epu32(abcd, e0, f);
#define SHA1_NEXTE(e, m) e = _mm_sha1nexte_epu32(e, m);
#define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src);
#define SHA1_MSG1(dest, src) dest = _mm_sha1msg1_epu32(dest, src);
#define SHA1_MSG2(dest, src) dest = _mm_sha1msg2_epu32(dest, src);
#define LOAD_SHUFFLE(m, k) \
m = _mm_loadu_si128((const __m128i *)(const void *)(data + (k) * 16)); \
SHUFFLE_EPI8(m, mask) \
#define SM1(m0, m1, m2, m3) \
SHA1_MSG1(m0, m1) \
#define SM2(m0, m1, m2, m3) \
XOR_SI128(m3, m1) \
SHA1_MSG2(m3, m2) \
#define SM3(m0, m1, m2, m3) \
XOR_SI128(m3, m1) \
SM1(m0, m1, m2, m3) \
SHA1_MSG2(m3, m2) \
#define NNN(m0, m1, m2, m3)
#define R4(k, e0, e1, m0, m1, m2, m3, OP) \
e1 = abcd; \
SHA1_RND4(abcd, e0, (k) / 5) \
SHA1_NEXTE(e1, m1) \
OP(m0, m1, m2, m3) \
#define R16(k, mx, OP0, OP1, OP2, OP3) \
R4 ( (k)*4+0, e0,e1, m0,m1,m2,m3, OP0 ) \
R4 ( (k)*4+1, e1,e0, m1,m2,m3,m0, OP1 ) \
R4 ( (k)*4+2, e0,e1, m2,m3,m0,m1, OP2 ) \
R4 ( (k)*4+3, e1,e0, m3,mx,m1,m2, OP3 ) \
#define PREPARE_STATE \
SHUFFLE_EPI32 (abcd, 0x1B) \
SHUFFLE_EPI32 (e0, 0x1B) \
void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks);
#ifdef ATTRIB_SHA
ATTRIB_SHA
#endif
void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks)
{
const __m128i mask = _mm_set_epi32(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f);
__m128i abcd, e0;
if (numBlocks == 0)
return;
abcd = _mm_loadu_si128((const __m128i *) (const void *) &state[0]); // dbca
e0 = _mm_cvtsi32_si128((int)state[4]); // 000e
PREPARE_STATE
do
{
__m128i abcd_save, e2;
__m128i m0, m1, m2, m3;
__m128i e1;
abcd_save = abcd;
e2 = e0;
LOAD_SHUFFLE (m0, 0)
LOAD_SHUFFLE (m1, 1)
LOAD_SHUFFLE (m2, 2)
LOAD_SHUFFLE (m3, 3)
ADD_EPI32(e0, m0)
R16 ( 0, m0, SM1, SM3, SM3, SM3 )
R16 ( 1, m0, SM3, SM3, SM3, SM3 )
R16 ( 2, m0, SM3, SM3, SM3, SM3 )
R16 ( 3, m0, SM3, SM3, SM3, SM3 )
R16 ( 4, e2, SM2, NNN, NNN, NNN )
ADD_EPI32(abcd, abcd_save)
data += 64;
}
while (--numBlocks);
PREPARE_STATE
_mm_storeu_si128((__m128i *) (void *) state, abcd);
*(state+4) = (UInt32)_mm_cvtsi128_si32(e0);
}
#endif // USE_HW_SHA
#elif defined(MY_CPU_ARM_OR_ARM64)
#if defined(__clang__)
#if (__clang_major__ >= 8) // fix that check
#define USE_HW_SHA
#endif
#elif defined(__GNUC__)
#if (__GNUC__ >= 6) // fix that check
#define USE_HW_SHA
#endif
#elif defined(_MSC_VER)
#if _MSC_VER >= 1910
#define USE_HW_SHA
#endif
#endif
#ifdef USE_HW_SHA
// #pragma message("=== Sha1 HW === ")
#if defined(__clang__) || defined(__GNUC__)
#ifdef MY_CPU_ARM64
#define ATTRIB_SHA __attribute__((__target__("+crypto")))
#else
#define ATTRIB_SHA __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
#endif
#else
// _MSC_VER
// for arm32
#define _ARM_USE_NEW_NEON_INTRINSICS
#endif
#if defined(_MSC_VER) && defined(MY_CPU_ARM64)
#include <arm64_neon.h>
#else
#include <arm_neon.h>
#endif
typedef uint32x4_t v128;
// typedef __n128 v128; // MSVC
#ifdef MY_CPU_BE
#define MY_rev32_for_LE(x)
#else
#define MY_rev32_for_LE(x) x = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(x)))
#endif
#define LOAD_128(_p) (*(const v128 *)(const void *)(_p))
#define STORE_128(_p, _v) *(v128 *)(void *)(_p) = (_v)
#define LOAD_SHUFFLE(m, k) \
m = LOAD_128((data + (k) * 16)); \
MY_rev32_for_LE(m); \
#define SU0(dest, src2, src3) dest = vsha1su0q_u32(dest, src2, src3);
#define SU1(dest, src) dest = vsha1su1q_u32(dest, src);
#define C(e) abcd = vsha1cq_u32(abcd, e, t);
#define P(e) abcd = vsha1pq_u32(abcd, e, t);
#define M(e) abcd = vsha1mq_u32(abcd, e, t);
#define H(e) e = vsha1h_u32(vgetq_lane_u32(abcd, 0))
#define T(m, c) t = vaddq_u32(m, c)
void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
#ifdef ATTRIB_SHA
ATTRIB_SHA
#endif
void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
{
v128 abcd;
v128 c0, c1, c2, c3;
uint32_t e0;
if (numBlocks == 0)
return;
c0 = vdupq_n_u32(0x5a827999);
c1 = vdupq_n_u32(0x6ed9eba1);
c2 = vdupq_n_u32(0x8f1bbcdc);
c3 = vdupq_n_u32(0xca62c1d6);
abcd = LOAD_128(&state[0]);
e0 = state[4];
do
{
v128 abcd_save;
v128 m0, m1, m2, m3;
v128 t;
uint32_t e0_save, e1;
abcd_save = abcd;
e0_save = e0;
LOAD_SHUFFLE (m0, 0)
LOAD_SHUFFLE (m1, 1)
LOAD_SHUFFLE (m2, 2)
LOAD_SHUFFLE (m3, 3)
T(m0, c0); H(e1); C(e0);
T(m1, c0); SU0(m0, m1, m2); H(e0); C(e1);
T(m2, c0); SU0(m1, m2, m3); SU1(m0, m3); H(e1); C(e0);
T(m3, c0); SU0(m2, m3, m0); SU1(m1, m0); H(e0); C(e1);
T(m0, c0); SU0(m3, m0, m1); SU1(m2, m1); H(e1); C(e0);
T(m1, c1); SU0(m0, m1, m2); SU1(m3, m2); H(e0); P(e1);
T(m2, c1); SU0(m1, m2, m3); SU1(m0, m3); H(e1); P(e0);
T(m3, c1); SU0(m2, m3, m0); SU1(m1, m0); H(e0); P(e1);
T(m0, c1); SU0(m3, m0, m1); SU1(m2, m1); H(e1); P(e0);
T(m1, c1); SU0(m0, m1, m2); SU1(m3, m2); H(e0); P(e1);
T(m2, c2); SU0(m1, m2, m3); SU1(m0, m3); H(e1); M(e0);
T(m3, c2); SU0(m2, m3, m0); SU1(m1, m0); H(e0); M(e1);
T(m0, c2); SU0(m3, m0, m1); SU1(m2, m1); H(e1); M(e0);
T(m1, c2); SU0(m0, m1, m2); SU1(m3, m2); H(e0); M(e1);
T(m2, c2); SU0(m1, m2, m3); SU1(m0, m3); H(e1); M(e0);
T(m3, c3); SU0(m2, m3, m0); SU1(m1, m0); H(e0); P(e1);
T(m0, c3); SU0(m3, m0, m1); SU1(m2, m1); H(e1); P(e0);
T(m1, c3); SU1(m3, m2); H(e0); P(e1);
T(m2, c3); H(e1); P(e0);
T(m3, c3); H(e0); P(e1);
abcd = vaddq_u32(abcd, abcd_save);
e0 += e0_save;
data += 64;
}
while (--numBlocks);
STORE_128(&state[0], abcd);
state[4] = e0;
}
#endif // USE_HW_SHA
#endif // MY_CPU_ARM_OR_ARM64
#ifndef USE_HW_SHA
// #error Stop_Compiling_UNSUPPORTED_SHA
// #include <stdlib.h>
// #include "Sha1.h"
void Z7_FASTCALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks);
#pragma message("Sha1 HW-SW stub was used")
void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks);
void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks)
{
Sha1_UpdateBlocks(state, data, numBlocks);
/*
UNUSED_VAR(state);
UNUSED_VAR(data);
UNUSED_VAR(numBlocks);
exit(1);
return;
*/
}
#endif
#undef SU0
#undef SU1
#undef C
#undef P
#undef M
#undef H
#undef T
#undef MY_rev32_for_LE
#undef NNN
#undef LOAD_128
#undef STORE_128
#undef LOAD_SHUFFLE
#undef SM1
#undef SM2
#undef SM3
#undef NNN
#undef R4
#undef R16
#undef PREPARE_STATE
#undef USE_HW_SHA
#undef ATTRIB_SHA
#undef USE_VER_MIN

View file

@ -1,5 +1,5 @@
/* Sha256.c -- SHA-256 Hash /* Sha256.c -- SHA-256 Hash
2021-04-01 : Igor Pavlov : Public domain 2023-04-02 : Igor Pavlov : Public domain
This code is based on public domain code from Wei Dai's Crypto++ library. */ This code is based on public domain code from Wei Dai's Crypto++ library. */
#include "Precomp.h" #include "Precomp.h"
@ -17,48 +17,48 @@ This code is based on public domain code from Wei Dai's Crypto++ library. */
#ifdef MY_CPU_X86_OR_AMD64 #ifdef MY_CPU_X86_OR_AMD64
#ifdef _MSC_VER #ifdef _MSC_VER
#if _MSC_VER >= 1200 #if _MSC_VER >= 1200
#define _SHA_SUPPORTED #define Z7_COMPILER_SHA256_SUPPORTED
#endif #endif
#elif defined(__clang__) #elif defined(__clang__)
#if (__clang_major__ >= 8) // fix that check #if (__clang_major__ >= 8) // fix that check
#define _SHA_SUPPORTED #define Z7_COMPILER_SHA256_SUPPORTED
#endif #endif
#elif defined(__GNUC__) #elif defined(__GNUC__)
#if (__GNUC__ >= 8) // fix that check #if (__GNUC__ >= 8) // fix that check
#define _SHA_SUPPORTED #define Z7_COMPILER_SHA256_SUPPORTED
#endif #endif
#elif defined(__INTEL_COMPILER) #elif defined(__INTEL_COMPILER)
#if (__INTEL_COMPILER >= 1800) // fix that check #if (__INTEL_COMPILER >= 1800) // fix that check
#define _SHA_SUPPORTED #define Z7_COMPILER_SHA256_SUPPORTED
#endif #endif
#endif #endif
#elif defined(MY_CPU_ARM_OR_ARM64) #elif defined(MY_CPU_ARM_OR_ARM64)
#ifdef _MSC_VER #ifdef _MSC_VER
#if _MSC_VER >= 1910 #if _MSC_VER >= 1910
#define _SHA_SUPPORTED #define Z7_COMPILER_SHA256_SUPPORTED
#endif #endif
#elif defined(__clang__) #elif defined(__clang__)
#if (__clang_major__ >= 8) // fix that check #if (__clang_major__ >= 8) // fix that check
#define _SHA_SUPPORTED #define Z7_COMPILER_SHA256_SUPPORTED
#endif #endif
#elif defined(__GNUC__) #elif defined(__GNUC__)
#if (__GNUC__ >= 6) // fix that check #if (__GNUC__ >= 6) // fix that check
#define _SHA_SUPPORTED #define Z7_COMPILER_SHA256_SUPPORTED
#endif #endif
#endif #endif
#endif #endif
void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks); void Z7_FASTCALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
#ifdef _SHA_SUPPORTED #ifdef Z7_COMPILER_SHA256_SUPPORTED
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
static SHA256_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS = Sha256_UpdateBlocks; static SHA256_FUNC_UPDATE_BLOCKS g_SHA256_FUNC_UPDATE_BLOCKS = Sha256_UpdateBlocks;
static SHA256_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS_HW; static SHA256_FUNC_UPDATE_BLOCKS g_SHA256_FUNC_UPDATE_BLOCKS_HW;
#define UPDATE_BLOCKS(p) p->func_UpdateBlocks #define SHA256_UPDATE_BLOCKS(p) p->func_UpdateBlocks
#else #else
#define UPDATE_BLOCKS(p) Sha256_UpdateBlocks #define SHA256_UPDATE_BLOCKS(p) Sha256_UpdateBlocks
#endif #endif
@ -66,16 +66,16 @@ BoolInt Sha256_SetFunction(CSha256 *p, unsigned algo)
{ {
SHA256_FUNC_UPDATE_BLOCKS func = Sha256_UpdateBlocks; SHA256_FUNC_UPDATE_BLOCKS func = Sha256_UpdateBlocks;
#ifdef _SHA_SUPPORTED #ifdef Z7_COMPILER_SHA256_SUPPORTED
if (algo != SHA256_ALGO_SW) if (algo != SHA256_ALGO_SW)
{ {
if (algo == SHA256_ALGO_DEFAULT) if (algo == SHA256_ALGO_DEFAULT)
func = g_FUNC_UPDATE_BLOCKS; func = g_SHA256_FUNC_UPDATE_BLOCKS;
else else
{ {
if (algo != SHA256_ALGO_HW) if (algo != SHA256_ALGO_HW)
return False; return False;
func = g_FUNC_UPDATE_BLOCKS_HW; func = g_SHA256_FUNC_UPDATE_BLOCKS_HW;
if (!func) if (!func)
return False; return False;
} }
@ -92,17 +92,18 @@ BoolInt Sha256_SetFunction(CSha256 *p, unsigned algo)
/* define it for speed optimization */ /* define it for speed optimization */
#ifdef _SFX #ifdef Z7_SFX
#define STEP_PRE 1 #define STEP_PRE 1
#define STEP_MAIN 1 #define STEP_MAIN 1
#else #else
#define STEP_PRE 2 #define STEP_PRE 2
#define STEP_MAIN 4 #define STEP_MAIN 4
// #define _SHA256_UNROLL // #define Z7_SHA256_UNROLL
#endif #endif
#undef Z7_SHA256_BIG_W
#if STEP_MAIN != 16 #if STEP_MAIN != 16
#define _SHA256_BIG_W #define Z7_SHA256_BIG_W
#endif #endif
@ -124,8 +125,8 @@ void Sha256_InitState(CSha256 *p)
void Sha256_Init(CSha256 *p) void Sha256_Init(CSha256 *p)
{ {
p->func_UpdateBlocks = p->func_UpdateBlocks =
#ifdef _SHA_SUPPORTED #ifdef Z7_COMPILER_SHA256_SUPPORTED
g_FUNC_UPDATE_BLOCKS; g_SHA256_FUNC_UPDATE_BLOCKS;
#else #else
NULL; NULL;
#endif #endif
@ -145,7 +146,7 @@ void Sha256_Init(CSha256 *p)
#define blk2_main(j, i) s1(w(j, (i)-2)) + w(j, (i)-7) + s0(w(j, (i)-15)) #define blk2_main(j, i) s1(w(j, (i)-2)) + w(j, (i)-7) + s0(w(j, (i)-15))
#ifdef _SHA256_BIG_W #ifdef Z7_SHA256_BIG_W
// we use +i instead of +(i) to change the order to solve CLANG compiler warning for signed/unsigned. // we use +i instead of +(i) to change the order to solve CLANG compiler warning for signed/unsigned.
#define w(j, i) W[(size_t)(j) + i] #define w(j, i) W[(size_t)(j) + i]
#define blk2(j, i) (w(j, i) = w(j, (i)-16) + blk2_main(j, i)) #define blk2(j, i) (w(j, i) = w(j, (i)-16) + blk2_main(j, i))
@ -176,7 +177,7 @@ void Sha256_Init(CSha256 *p)
#define R1_PRE(i) T1( W_PRE, i) #define R1_PRE(i) T1( W_PRE, i)
#define R1_MAIN(i) T1( W_MAIN, i) #define R1_MAIN(i) T1( W_MAIN, i)
#if (!defined(_SHA256_UNROLL) || STEP_MAIN < 8) && (STEP_MAIN >= 4) #if (!defined(Z7_SHA256_UNROLL) || STEP_MAIN < 8) && (STEP_MAIN >= 4)
#define R2_MAIN(i) \ #define R2_MAIN(i) \
R1_MAIN(i) \ R1_MAIN(i) \
R1_MAIN(i + 1) \ R1_MAIN(i + 1) \
@ -185,7 +186,7 @@ void Sha256_Init(CSha256 *p)
#if defined(_SHA256_UNROLL) && STEP_MAIN >= 8 #if defined(Z7_SHA256_UNROLL) && STEP_MAIN >= 8
#define T4( a,b,c,d,e,f,g,h, wx, i) \ #define T4( a,b,c,d,e,f,g,h, wx, i) \
h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \ h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
@ -223,7 +224,7 @@ void Sha256_Init(CSha256 *p)
#endif #endif
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
// static // static
extern MY_ALIGN(64) extern MY_ALIGN(64)
@ -252,11 +253,11 @@ const UInt32 SHA256_K_ARRAY[64] = {
#define K SHA256_K_ARRAY #define K SHA256_K_ARRAY
MY_NO_INLINE Z7_NO_INLINE
void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks) void Z7_FASTCALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks)
{ {
UInt32 W UInt32 W
#ifdef _SHA256_BIG_W #ifdef Z7_SHA256_BIG_W
[64]; [64];
#else #else
[16]; [16];
@ -266,7 +267,7 @@ void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t
UInt32 a,b,c,d,e,f,g,h; UInt32 a,b,c,d,e,f,g,h;
#if !defined(_SHA256_UNROLL) || (STEP_MAIN <= 4) || (STEP_PRE <= 4) #if !defined(Z7_SHA256_UNROLL) || (STEP_MAIN <= 4) || (STEP_PRE <= 4)
UInt32 tmp; UInt32 tmp;
#endif #endif
@ -297,12 +298,12 @@ void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t
#else #else
R1_PRE(0); R1_PRE(0)
#if STEP_PRE >= 2 #if STEP_PRE >= 2
R1_PRE(1); R1_PRE(1)
#if STEP_PRE >= 4 #if STEP_PRE >= 4
R1_PRE(2); R1_PRE(2)
R1_PRE(3); R1_PRE(3)
#endif #endif
#endif #endif
@ -311,32 +312,32 @@ void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t
for (j = 16; j < 64; j += STEP_MAIN) for (j = 16; j < 64; j += STEP_MAIN)
{ {
#if defined(_SHA256_UNROLL) && STEP_MAIN >= 8 #if defined(Z7_SHA256_UNROLL) && STEP_MAIN >= 8
#if STEP_MAIN < 8 #if STEP_MAIN < 8
R4_MAIN(0); R4_MAIN(0)
#else #else
R8_MAIN(0); R8_MAIN(0)
#if STEP_MAIN == 16 #if STEP_MAIN == 16
R8_MAIN(8); R8_MAIN(8)
#endif #endif
#endif #endif
#else #else
R1_MAIN(0); R1_MAIN(0)
#if STEP_MAIN >= 2 #if STEP_MAIN >= 2
R1_MAIN(1); R1_MAIN(1)
#if STEP_MAIN >= 4 #if STEP_MAIN >= 4
R2_MAIN(2); R2_MAIN(2)
#if STEP_MAIN >= 8 #if STEP_MAIN >= 8
R2_MAIN(4); R2_MAIN(4)
R2_MAIN(6); R2_MAIN(6)
#if STEP_MAIN >= 16 #if STEP_MAIN >= 16
R2_MAIN(8); R2_MAIN(8)
R2_MAIN(10); R2_MAIN(10)
R2_MAIN(12); R2_MAIN(12)
R2_MAIN(14); R2_MAIN(14)
#endif #endif
#endif #endif
#endif #endif
@ -367,7 +368,7 @@ void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t
#undef s1 #undef s1
#undef K #undef K
#define Sha256_UpdateBlock(p) UPDATE_BLOCKS(p)(p->state, p->buffer, 1) #define Sha256_UpdateBlock(p) SHA256_UPDATE_BLOCKS(p)(p->state, p->buffer, 1)
void Sha256_Update(CSha256 *p, const Byte *data, size_t size) void Sha256_Update(CSha256 *p, const Byte *data, size_t size)
{ {
@ -397,7 +398,7 @@ void Sha256_Update(CSha256 *p, const Byte *data, size_t size)
} }
{ {
size_t numBlocks = size >> 6; size_t numBlocks = size >> 6;
UPDATE_BLOCKS(p)(p->state, data, numBlocks); SHA256_UPDATE_BLOCKS(p)(p->state, data, numBlocks);
size &= 0x3F; size &= 0x3F;
if (size == 0) if (size == 0)
return; return;
@ -441,8 +442,8 @@ void Sha256_Final(CSha256 *p, Byte *digest)
{ {
UInt64 numBits = (p->count << 3); UInt64 numBits = (p->count << 3);
SetBe32(p->buffer + 64 - 8, (UInt32)(numBits >> 32)); SetBe32(p->buffer + 64 - 8, (UInt32)(numBits >> 32))
SetBe32(p->buffer + 64 - 4, (UInt32)(numBits)); SetBe32(p->buffer + 64 - 4, (UInt32)(numBits))
} }
Sha256_UpdateBlock(p); Sha256_UpdateBlock(p);
@ -451,8 +452,8 @@ void Sha256_Final(CSha256 *p, Byte *digest)
{ {
UInt32 v0 = p->state[i]; UInt32 v0 = p->state[i];
UInt32 v1 = p->state[(size_t)i + 1]; UInt32 v1 = p->state[(size_t)i + 1];
SetBe32(digest , v0); SetBe32(digest , v0)
SetBe32(digest + 4, v1); SetBe32(digest + 4, v1)
digest += 8; digest += 8;
} }
@ -460,9 +461,9 @@ void Sha256_Final(CSha256 *p, Byte *digest)
} }
void Sha256Prepare() void Sha256Prepare(void)
{ {
#ifdef _SHA_SUPPORTED #ifdef Z7_COMPILER_SHA256_SUPPORTED
SHA256_FUNC_UPDATE_BLOCKS f, f_hw; SHA256_FUNC_UPDATE_BLOCKS f, f_hw;
f = Sha256_UpdateBlocks; f = Sha256_UpdateBlocks;
f_hw = NULL; f_hw = NULL;
@ -480,7 +481,36 @@ void Sha256Prepare()
// printf("\n========== HW SHA256 ======== \n"); // printf("\n========== HW SHA256 ======== \n");
f = f_hw = Sha256_UpdateBlocks_HW; f = f_hw = Sha256_UpdateBlocks_HW;
} }
g_FUNC_UPDATE_BLOCKS = f; g_SHA256_FUNC_UPDATE_BLOCKS = f;
g_FUNC_UPDATE_BLOCKS_HW = f_hw; g_SHA256_FUNC_UPDATE_BLOCKS_HW = f_hw;
#endif #endif
} }
#undef S0
#undef S1
#undef s0
#undef s1
#undef Ch
#undef Maj
#undef W_MAIN
#undef W_PRE
#undef w
#undef blk2_main
#undef blk2
#undef T1
#undef T4
#undef T8
#undef R1_PRE
#undef R1_MAIN
#undef R2_MAIN
#undef R4
#undef R4_PRE
#undef R4_MAIN
#undef R8
#undef R8_PRE
#undef R8_MAIN
#undef STEP_PRE
#undef STEP_MAIN
#undef Z7_SHA256_BIG_W
#undef Z7_SHA256_UNROLL
#undef Z7_COMPILER_SHA256_SUPPORTED

View file

@ -1,8 +1,8 @@
/* Sha256.h -- SHA-256 Hash /* Sha256.h -- SHA-256 Hash
2021-01-01 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#ifndef __7Z_SHA256_H #ifndef ZIP7_INC_SHA256_H
#define __7Z_SHA256_H #define ZIP7_INC_SHA256_H
#include "7zTypes.h" #include "7zTypes.h"
@ -14,7 +14,7 @@ EXTERN_C_BEGIN
#define SHA256_BLOCK_SIZE (SHA256_NUM_BLOCK_WORDS * 4) #define SHA256_BLOCK_SIZE (SHA256_NUM_BLOCK_WORDS * 4)
#define SHA256_DIGEST_SIZE (SHA256_NUM_DIGEST_WORDS * 4) #define SHA256_DIGEST_SIZE (SHA256_NUM_DIGEST_WORDS * 4)
typedef void (MY_FAST_CALL *SHA256_FUNC_UPDATE_BLOCKS)(UInt32 state[8], const Byte *data, size_t numBlocks); typedef void (Z7_FASTCALL *SHA256_FUNC_UPDATE_BLOCKS)(UInt32 state[8], const Byte *data, size_t numBlocks);
/* /*
if (the system supports different SHA256 code implementations) if (the system supports different SHA256 code implementations)
@ -34,7 +34,7 @@ typedef struct
{ {
SHA256_FUNC_UPDATE_BLOCKS func_UpdateBlocks; SHA256_FUNC_UPDATE_BLOCKS func_UpdateBlocks;
UInt64 count; UInt64 count;
UInt64 __pad_2[2]; UInt64 _pad_2[2];
UInt32 state[SHA256_NUM_DIGEST_WORDS]; UInt32 state[SHA256_NUM_DIGEST_WORDS];
Byte buffer[SHA256_BLOCK_SIZE]; Byte buffer[SHA256_BLOCK_SIZE];
@ -62,7 +62,7 @@ void Sha256_Final(CSha256 *p, Byte *digest);
// void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks); // void Z7_FASTCALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
/* /*
call Sha256Prepare() once at program start. call Sha256Prepare() once at program start.

View file

@ -1,7 +1,9 @@
/* Sha256Opt.c -- SHA-256 optimized code for SHA-256 hardware instructions /* Sha256Opt.c -- SHA-256 optimized code for SHA-256 hardware instructions
2021-04-01 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
#include "Compiler.h"
#include "CpuArch.h"
#if defined(_MSC_VER) #if defined(_MSC_VER)
#if (_MSC_VER < 1900) && (_MSC_VER >= 1200) #if (_MSC_VER < 1900) && (_MSC_VER >= 1200)
@ -9,41 +11,26 @@
#endif #endif
#endif #endif
#include "CpuArch.h"
#ifdef MY_CPU_X86_OR_AMD64 #ifdef MY_CPU_X86_OR_AMD64
#if defined(__clang__) #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) // fix that check
#if (__clang_major__ >= 8) // fix that check
#define USE_HW_SHA #define USE_HW_SHA
#ifndef __SHA__ #elif defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30800) \
|| defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 50100) \
|| defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900)
#define USE_HW_SHA
#if !defined(_INTEL_COMPILER)
// icc defines __GNUC__, but icc doesn't support __attribute__(__target__)
#if !defined(__SHA__) || !defined(__SSSE3__)
#define ATTRIB_SHA __attribute__((__target__("sha,ssse3"))) #define ATTRIB_SHA __attribute__((__target__("sha,ssse3")))
#if defined(_MSC_VER)
// SSSE3: for clang-cl:
#include <tmmintrin.h>
#define __SHA__
#endif
#endif #endif
#endif
#elif defined(__GNUC__)
#if (__GNUC__ >= 8) // fix that check
#define USE_HW_SHA
#ifndef __SHA__
#define ATTRIB_SHA __attribute__((__target__("sha,ssse3")))
// #pragma GCC target("sha,ssse3")
#endif #endif
#endif
#elif defined(__INTEL_COMPILER)
#if (__INTEL_COMPILER >= 1800) // fix that check
#define USE_HW_SHA
#endif
#elif defined(_MSC_VER) #elif defined(_MSC_VER)
#ifdef USE_MY_MM #ifdef USE_MY_MM
#define USE_VER_MIN 1300 #define USE_VER_MIN 1300
#else #else
#define USE_VER_MIN 1910 #define USE_VER_MIN 1900
#endif #endif
#if _MSC_VER >= USE_VER_MIN #if (_MSC_VER >= USE_VER_MIN)
#define USE_HW_SHA #define USE_HW_SHA
#endif #endif
#endif #endif
@ -52,16 +39,19 @@
#ifdef USE_HW_SHA #ifdef USE_HW_SHA
// #pragma message("Sha256 HW") // #pragma message("Sha256 HW")
// #include <wmmintrin.h>
#if !defined(_MSC_VER) || (_MSC_VER >= 1900) // sse/sse2/ssse3:
#include <tmmintrin.h>
// sha*:
#include <immintrin.h> #include <immintrin.h>
#else
#include <emmintrin.h>
#if defined(_MSC_VER) && (_MSC_VER >= 1600) #if defined (__clang__) && defined(_MSC_VER)
// #include <intrin.h> // #if !defined(__SSSE3__)
#endif // #endif
#if !defined(__SHA__)
#include <shaintrin.h>
#endif
#else
#ifdef USE_MY_MM #ifdef USE_MY_MM
#include "My_mm.h" #include "My_mm.h"
@ -98,9 +88,9 @@ const UInt32 SHA256_K_ARRAY[64];
#define K SHA256_K_ARRAY #define K SHA256_K_ARRAY
#define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src); #define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src);
#define SHA256_MSG1(dest, src) dest = _mm_sha256msg1_epu32(dest, src); #define SHA256_MSG1(dest, src) dest = _mm_sha256msg1_epu32(dest, src);
#define SHA25G_MSG2(dest, src) dest = _mm_sha256msg2_epu32(dest, src); #define SHA25G_MSG2(dest, src) dest = _mm_sha256msg2_epu32(dest, src);
#define LOAD_SHUFFLE(m, k) \ #define LOAD_SHUFFLE(m, k) \
@ -112,7 +102,7 @@ const UInt32 SHA256_K_ARRAY[64];
#define SM2(g0, g1, g2, g3) \ #define SM2(g0, g1, g2, g3) \
tmp = _mm_alignr_epi8(g1, g0, 4); \ tmp = _mm_alignr_epi8(g1, g0, 4); \
ADD_EPI32(g2, tmp); \ ADD_EPI32(g2, tmp) \
SHA25G_MSG2(g2, g1); \ SHA25G_MSG2(g2, g1); \
// #define LS0(k, g0, g1, g2, g3) LOAD_SHUFFLE(g0, k) // #define LS0(k, g0, g1, g2, g3) LOAD_SHUFFLE(g0, k)
@ -138,16 +128,16 @@ const UInt32 SHA256_K_ARRAY[64];
// We use scheme with 3 rounds ahead for SHA256_MSG1 / 2 rounds ahead for SHA256_MSG2 // We use scheme with 3 rounds ahead for SHA256_MSG1 / 2 rounds ahead for SHA256_MSG2
#define R4(k, g0, g1, g2, g3, OP0, OP1) \ #define R4(k, g0, g1, g2, g3, OP0, OP1) \
RND2_0(g0, k); \ RND2_0(g0, k) \
OP0(g0, g1, g2, g3); \ OP0(g0, g1, g2, g3) \
RND2_1; \ RND2_1 \
OP1(g0, g1, g2, g3); \ OP1(g0, g1, g2, g3) \
#define R16(k, OP0, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \ #define R16(k, OP0, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \
R4 ( (k)*4+0, m0, m1, m2, m3, OP0, OP1 ) \ R4 ( (k)*4+0, m0,m1,m2,m3, OP0, OP1 ) \
R4 ( (k)*4+1, m1, m2, m3, m0, OP2, OP3 ) \ R4 ( (k)*4+1, m1,m2,m3,m0, OP2, OP3 ) \
R4 ( (k)*4+2, m2, m3, m0, m1, OP4, OP5 ) \ R4 ( (k)*4+2, m2,m3,m0,m1, OP4, OP5 ) \
R4 ( (k)*4+3, m3, m0, m1, m2, OP6, OP7 ) \ R4 ( (k)*4+3, m3,m0,m1,m2, OP6, OP7 ) \
#define PREPARE_STATE \ #define PREPARE_STATE \
tmp = _mm_shuffle_epi32(state0, 0x1B); /* abcd */ \ tmp = _mm_shuffle_epi32(state0, 0x1B); /* abcd */ \
@ -157,11 +147,11 @@ const UInt32 SHA256_K_ARRAY[64];
state1 = _mm_unpackhi_epi64(state1, tmp); /* abef */ \ state1 = _mm_unpackhi_epi64(state1, tmp); /* abef */ \
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
#ifdef ATTRIB_SHA #ifdef ATTRIB_SHA
ATTRIB_SHA ATTRIB_SHA
#endif #endif
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
{ {
const __m128i mask = _mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203); const __m128i mask = _mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203);
__m128i tmp; __m128i tmp;
@ -192,13 +182,13 @@ void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size
R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 ); R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 )
R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ); R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 )
R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 ); R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 )
R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN ); R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN )
ADD_EPI32(state0, state0_save); ADD_EPI32(state0, state0_save)
ADD_EPI32(state1, state1_save); ADD_EPI32(state1, state1_save)
data += 64; data += 64;
} }
@ -298,11 +288,11 @@ const UInt32 SHA256_K_ARRAY[64];
R4 ( (k)*4+3, m3, m0, m1, m2, OP6, OP7 ) \ R4 ( (k)*4+3, m3, m0, m1, m2, OP6, OP7 ) \
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
#ifdef ATTRIB_SHA #ifdef ATTRIB_SHA
ATTRIB_SHA ATTRIB_SHA
#endif #endif
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
{ {
v128 state0, state1; v128 state0, state1;
@ -353,12 +343,12 @@ void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size
// #include <stdlib.h> // #include <stdlib.h>
// #include "Sha256.h" // #include "Sha256.h"
void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks); void Z7_FASTCALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
#pragma message("Sha256 HW-SW stub was used") #pragma message("Sha256 HW-SW stub was used")
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks); void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks) void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
{ {
Sha256_UpdateBlocks(state, data, numBlocks); Sha256_UpdateBlocks(state, data, numBlocks);
/* /*
@ -371,3 +361,26 @@ void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size
} }
#endif #endif
#undef K
#undef RND2
#undef RND2_0
#undef RND2_1
#undef MY_rev32_for_LE
#undef NNN
#undef LOAD_128
#undef STORE_128
#undef LOAD_SHUFFLE
#undef SM1
#undef SM2
#undef NNN
#undef R4
#undef R16
#undef PREPARE_STATE
#undef USE_HW_SHA
#undef ATTRIB_SHA
#undef USE_VER_MIN

View file

@ -1,8 +1,8 @@
/* Sort.h -- Sort functions /* Sort.h -- Sort functions
2014-04-05 : Igor Pavlov : Public domain */ 2023-03-05 : Igor Pavlov : Public domain */
#ifndef __7Z_SORT_H #ifndef ZIP7_INC_SORT_H
#define __7Z_SORT_H #define ZIP7_INC_SORT_H
#include "7zTypes.h" #include "7zTypes.h"

800
3rdparty/7z/src/SwapBytes.c vendored Normal file
View file

@ -0,0 +1,800 @@
/* SwapBytes.c -- Byte Swap conversion filter
2023-04-07 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "Compiler.h"
#include "CpuArch.h"
#include "RotateDefs.h"
#include "SwapBytes.h"
typedef UInt16 CSwapUInt16;
typedef UInt32 CSwapUInt32;
// #define k_SwapBytes_Mode_BASE 0
#ifdef MY_CPU_X86_OR_AMD64
#define k_SwapBytes_Mode_SSE2 1
#define k_SwapBytes_Mode_SSSE3 2
#define k_SwapBytes_Mode_AVX2 3
// #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900)
#if defined(__clang__) && (__clang_major__ >= 4) \
|| defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40701)
#define k_SwapBytes_Mode_MAX k_SwapBytes_Mode_AVX2
#define SWAP_ATTRIB_SSE2 __attribute__((__target__("sse2")))
#define SWAP_ATTRIB_SSSE3 __attribute__((__target__("ssse3")))
#define SWAP_ATTRIB_AVX2 __attribute__((__target__("avx2")))
#elif defined(_MSC_VER)
#if (_MSC_VER == 1900)
#pragma warning(disable : 4752) // found Intel(R) Advanced Vector Extensions; consider using /arch:AVX
#endif
#if (_MSC_VER >= 1900)
#define k_SwapBytes_Mode_MAX k_SwapBytes_Mode_AVX2
#elif (_MSC_VER >= 1500) // (VS2008)
#define k_SwapBytes_Mode_MAX k_SwapBytes_Mode_SSSE3
#elif (_MSC_VER >= 1310) // (VS2003)
#define k_SwapBytes_Mode_MAX k_SwapBytes_Mode_SSE2
#endif
#endif // _MSC_VER
/*
// for debug
#ifdef k_SwapBytes_Mode_MAX
#undef k_SwapBytes_Mode_MAX
#endif
*/
#ifndef k_SwapBytes_Mode_MAX
#define k_SwapBytes_Mode_MAX 0
#endif
#if (k_SwapBytes_Mode_MAX != 0) && defined(MY_CPU_AMD64)
#define k_SwapBytes_Mode_MIN k_SwapBytes_Mode_SSE2
#else
#define k_SwapBytes_Mode_MIN 0
#endif
#if (k_SwapBytes_Mode_MAX >= k_SwapBytes_Mode_AVX2)
#define USE_SWAP_AVX2
#endif
#if (k_SwapBytes_Mode_MAX >= k_SwapBytes_Mode_SSSE3)
#define USE_SWAP_SSSE3
#endif
#if (k_SwapBytes_Mode_MAX >= k_SwapBytes_Mode_SSE2)
#define USE_SWAP_128
#endif
#if k_SwapBytes_Mode_MAX <= k_SwapBytes_Mode_MIN || !defined(USE_SWAP_128)
#define FORCE_SWAP_MODE
#endif
#ifdef USE_SWAP_128
/*
<mmintrin.h> MMX
<xmmintrin.h> SSE
<emmintrin.h> SSE2
<pmmintrin.h> SSE3
<tmmintrin.h> SSSE3
<smmintrin.h> SSE4.1
<nmmintrin.h> SSE4.2
<ammintrin.h> SSE4A
<wmmintrin.h> AES
<immintrin.h> AVX, AVX2, FMA
*/
#include <emmintrin.h> // sse2
// typedef __m128i v128;
#define SWAP2_128(i) { \
const __m128i v = *(const __m128i *)(const void *)(items + (i) * 8); \
*( __m128i *)( void *)(items + (i) * 8) = \
_mm_or_si128( \
_mm_slli_epi16(v, 8), \
_mm_srli_epi16(v, 8)); }
// _mm_or_si128() has more ports to execute than _mm_add_epi16().
static
#ifdef SWAP_ATTRIB_SSE2
SWAP_ATTRIB_SSE2
#endif
void
Z7_FASTCALL
SwapBytes2_128(CSwapUInt16 *items, const CSwapUInt16 *lim)
{
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
do
{
SWAP2_128(0) SWAP2_128(1) items += 2 * 8;
SWAP2_128(0) SWAP2_128(1) items += 2 * 8;
}
while (items != lim);
}
/*
// sse2
#define SWAP4_128_pack(i) { \
__m128i v = *(const __m128i *)(const void *)(items + (i) * 4); \
__m128i v0 = _mm_unpacklo_epi8(v, mask); \
__m128i v1 = _mm_unpackhi_epi8(v, mask); \
v0 = _mm_shufflelo_epi16(v0, 0x1b); \
v1 = _mm_shufflelo_epi16(v1, 0x1b); \
v0 = _mm_shufflehi_epi16(v0, 0x1b); \
v1 = _mm_shufflehi_epi16(v1, 0x1b); \
*(__m128i *)(void *)(items + (i) * 4) = _mm_packus_epi16(v0, v1); }
static
#ifdef SWAP_ATTRIB_SSE2
SWAP_ATTRIB_SSE2
#endif
void
Z7_FASTCALL
SwapBytes4_128_pack(CSwapUInt32 *items, const CSwapUInt32 *lim)
{
const __m128i mask = _mm_setzero_si128();
// const __m128i mask = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, 0);
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
do
{
SWAP4_128_pack(0); items += 1 * 4;
// SWAP4_128_pack(0); SWAP4_128_pack(1); items += 2 * 4;
}
while (items != lim);
}
// sse2
#define SWAP4_128_shift(i) { \
__m128i v = *(const __m128i *)(const void *)(items + (i) * 4); \
__m128i v2; \
v2 = _mm_or_si128( \
_mm_slli_si128(_mm_and_si128(v, mask), 1), \
_mm_and_si128(_mm_srli_si128(v, 1), mask)); \
v = _mm_or_si128( \
_mm_slli_epi32(v, 24), \
_mm_srli_epi32(v, 24)); \
*(__m128i *)(void *)(items + (i) * 4) = _mm_or_si128(v2, v); }
static
#ifdef SWAP_ATTRIB_SSE2
SWAP_ATTRIB_SSE2
#endif
void
Z7_FASTCALL
SwapBytes4_128_shift(CSwapUInt32 *items, const CSwapUInt32 *lim)
{
#define M1 0xff00
const __m128i mask = _mm_set_epi32(M1, M1, M1, M1);
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
do
{
// SWAP4_128_shift(0) SWAP4_128_shift(1) items += 2 * 4;
// SWAP4_128_shift(0) SWAP4_128_shift(1) items += 2 * 4;
SWAP4_128_shift(0); items += 1 * 4;
}
while (items != lim);
}
*/
#if defined(USE_SWAP_SSSE3) || defined(USE_SWAP_AVX2)
#define SWAP_SHUF_REV_SEQ_2_VALS(v) (v)+1, (v)
#define SWAP_SHUF_REV_SEQ_4_VALS(v) (v)+3, (v)+2, (v)+1, (v)
#define SWAP2_SHUF_MASK_16_BYTES \
SWAP_SHUF_REV_SEQ_2_VALS (0 * 2), \
SWAP_SHUF_REV_SEQ_2_VALS (1 * 2), \
SWAP_SHUF_REV_SEQ_2_VALS (2 * 2), \
SWAP_SHUF_REV_SEQ_2_VALS (3 * 2), \
SWAP_SHUF_REV_SEQ_2_VALS (4 * 2), \
SWAP_SHUF_REV_SEQ_2_VALS (5 * 2), \
SWAP_SHUF_REV_SEQ_2_VALS (6 * 2), \
SWAP_SHUF_REV_SEQ_2_VALS (7 * 2)
#define SWAP4_SHUF_MASK_16_BYTES \
SWAP_SHUF_REV_SEQ_4_VALS (0 * 4), \
SWAP_SHUF_REV_SEQ_4_VALS (1 * 4), \
SWAP_SHUF_REV_SEQ_4_VALS (2 * 4), \
SWAP_SHUF_REV_SEQ_4_VALS (3 * 4)
#if defined(USE_SWAP_AVX2)
/* if we use 256_BIT_INIT_MASK, each static array mask will be larger for 16 bytes */
// #define SWAP_USE_256_BIT_INIT_MASK
#endif
#if defined(SWAP_USE_256_BIT_INIT_MASK) && defined(USE_SWAP_AVX2)
#define SWAP_MASK_INIT_SIZE 32
#else
#define SWAP_MASK_INIT_SIZE 16
#endif
MY_ALIGN(SWAP_MASK_INIT_SIZE)
static const Byte k_ShufMask_Swap2[] =
{
SWAP2_SHUF_MASK_16_BYTES
#if SWAP_MASK_INIT_SIZE > 16
, SWAP2_SHUF_MASK_16_BYTES
#endif
};
MY_ALIGN(SWAP_MASK_INIT_SIZE)
static const Byte k_ShufMask_Swap4[] =
{
SWAP4_SHUF_MASK_16_BYTES
#if SWAP_MASK_INIT_SIZE > 16
, SWAP4_SHUF_MASK_16_BYTES
#endif
};
#ifdef USE_SWAP_SSSE3
#include <tmmintrin.h> // ssse3
#define SHUF_128(i) *(items + (i)) = \
_mm_shuffle_epi8(*(items + (i)), mask); // SSSE3
// Z7_NO_INLINE
static
#ifdef SWAP_ATTRIB_SSSE3
SWAP_ATTRIB_SSSE3
#endif
Z7_ATTRIB_NO_VECTORIZE
void
Z7_FASTCALL
ShufBytes_128(void *items8, const void *lim8, const void *mask128_ptr)
{
__m128i *items = (__m128i *)items8;
const __m128i *lim = (const __m128i *)lim8;
// const __m128i mask = _mm_set_epi8(SHUF_SWAP2_MASK_16_VALS);
// const __m128i mask = _mm_set_epi8(SHUF_SWAP4_MASK_16_VALS);
// const __m128i mask = _mm_load_si128((const __m128i *)(const void *)&(k_ShufMask_Swap4[0]));
// const __m128i mask = _mm_load_si128((const __m128i *)(const void *)&(k_ShufMask_Swap4[0]));
// const __m128i mask = *(const __m128i *)(const void *)&(k_ShufMask_Swap4[0]);
const __m128i mask = *(const __m128i *)mask128_ptr;
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
do
{
SHUF_128(0) SHUF_128(1) items += 2;
SHUF_128(0) SHUF_128(1) items += 2;
}
while (items != lim);
}
#endif // USE_SWAP_SSSE3
#ifdef USE_SWAP_AVX2
#include <immintrin.h> // avx, avx2
#if defined(__clang__)
#include <avxintrin.h>
#include <avx2intrin.h>
#endif
#define SHUF_256(i) *(items + (i)) = \
_mm256_shuffle_epi8(*(items + (i)), mask); // AVX2
// Z7_NO_INLINE
static
#ifdef SWAP_ATTRIB_AVX2
SWAP_ATTRIB_AVX2
#endif
Z7_ATTRIB_NO_VECTORIZE
void
Z7_FASTCALL
ShufBytes_256(void *items8, const void *lim8, const void *mask128_ptr)
{
__m256i *items = (__m256i *)items8;
const __m256i *lim = (const __m256i *)lim8;
/*
UNUSED_VAR(mask128_ptr)
__m256i mask =
for Swap4: _mm256_setr_epi8(SWAP4_SHUF_MASK_16_BYTES, SWAP4_SHUF_MASK_16_BYTES);
for Swap2: _mm256_setr_epi8(SWAP2_SHUF_MASK_16_BYTES, SWAP2_SHUF_MASK_16_BYTES);
*/
const __m256i mask =
#if SWAP_MASK_INIT_SIZE > 16
*(const __m256i *)(const void *)mask128_ptr;
#else
/* msvc: broadcastsi128() version reserves the stack for no reason
msvc 19.29-: _mm256_insertf128_si256() / _mm256_set_m128i)) versions use non-avx movdqu xmm0,XMMWORD PTR [r8]
msvc 19.30+ (VS2022): replaces _mm256_set_m128i(m,m) to vbroadcastf128(m) as we want
*/
// _mm256_broadcastsi128_si256(*mask128_ptr);
/*
#define MY_mm256_set_m128i(hi, lo) _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1)
MY_mm256_set_m128i
*/
_mm256_set_m128i(
*(const __m128i *)mask128_ptr,
*(const __m128i *)mask128_ptr);
#endif
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
do
{
SHUF_256(0) SHUF_256(1) items += 2;
SHUF_256(0) SHUF_256(1) items += 2;
}
while (items != lim);
}
#endif // USE_SWAP_AVX2
#endif // USE_SWAP_SSSE3 || USE_SWAP_AVX2
#endif // USE_SWAP_128
// compile message "NEON intrinsics not available with the soft-float ABI"
#elif defined(MY_CPU_ARM_OR_ARM64) || \
(defined(__ARM_ARCH) && (__ARM_ARCH >= 7))
// #elif defined(MY_CPU_ARM64)
#if defined(__clang__) && (__clang_major__ >= 8) \
|| defined(__GNUC__) && (__GNUC__ >= 8)
#if (defined(__ARM_ARCH) && (__ARM_ARCH >= 7)) \
|| defined(MY_CPU_ARM64)
#define USE_SWAP_128
#endif
#ifdef MY_CPU_ARM64
// #define SWAP_ATTRIB_NEON __attribute__((__target__("")))
#else
// #define SWAP_ATTRIB_NEON __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
#endif
#elif defined(_MSC_VER)
#if (_MSC_VER >= 1910)
#define USE_SWAP_128
#endif
#endif
#if defined(_MSC_VER) && defined(MY_CPU_ARM64)
#include <arm64_neon.h>
#else
#include <arm_neon.h>
#endif
#ifndef USE_SWAP_128
#define FORCE_SWAP_MODE
#else
#ifdef MY_CPU_ARM64
// for debug : comment it
#define FORCE_SWAP_MODE
#else
#define k_SwapBytes_Mode_NEON 1
#endif
// typedef uint8x16_t v128;
#define SWAP2_128(i) *(uint8x16_t *) (void *)(items + (i) * 8) = \
vrev16q_u8(*(const uint8x16_t *)(const void *)(items + (i) * 8));
#define SWAP4_128(i) *(uint8x16_t *) (void *)(items + (i) * 4) = \
vrev32q_u8(*(const uint8x16_t *)(const void *)(items + (i) * 4));
// Z7_NO_INLINE
static
#ifdef SWAP_ATTRIB_NEON
SWAP_ATTRIB_NEON
#endif
Z7_ATTRIB_NO_VECTORIZE
void
Z7_FASTCALL
SwapBytes2_128(CSwapUInt16 *items, const CSwapUInt16 *lim)
{
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
do
{
SWAP2_128(0) SWAP2_128(1) items += 2 * 8;
SWAP2_128(0) SWAP2_128(1) items += 2 * 8;
}
while (items != lim);
}
// Z7_NO_INLINE
static
#ifdef SWAP_ATTRIB_NEON
SWAP_ATTRIB_NEON
#endif
Z7_ATTRIB_NO_VECTORIZE
void
Z7_FASTCALL
SwapBytes4_128(CSwapUInt32 *items, const CSwapUInt32 *lim)
{
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
do
{
SWAP4_128(0) SWAP4_128(1) items += 2 * 4;
SWAP4_128(0) SWAP4_128(1) items += 2 * 4;
}
while (items != lim);
}
#endif // USE_SWAP_128
#else // MY_CPU_ARM_OR_ARM64
#define FORCE_SWAP_MODE
#endif // MY_CPU_ARM_OR_ARM64
#if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_X86)
/* _byteswap_ushort() in MSVC x86 32-bit works via slow { mov dh, al; mov dl, ah }
So we use own versions of byteswap function */
#if (_MSC_VER < 1400 ) // old MSVC-X86 without _rotr16() support
#define SWAP2_16(i) { UInt32 v = items[i]; v += (v << 16); v >>= 8; items[i] = (CSwapUInt16)v; }
#else // is new MSVC-X86 with fast _rotr16()
#include <intrin.h>
#define SWAP2_16(i) { items[i] = _rotr16(items[i], 8); }
#endif
#else // is not MSVC-X86
#define SWAP2_16(i) { CSwapUInt16 v = items[i]; items[i] = Z7_BSWAP16(v); }
#endif // MSVC-X86
#if defined(Z7_CPU_FAST_BSWAP_SUPPORTED)
#define SWAP4_32(i) { CSwapUInt32 v = items[i]; items[i] = Z7_BSWAP32(v); }
#else
#define SWAP4_32(i) \
{ UInt32 v = items[i]; \
v = ((v & 0xff00ff) << 8) + ((v >> 8) & 0xff00ff); \
v = rotlFixed(v, 16); \
items[i] = v; }
#endif
#if defined(FORCE_SWAP_MODE) && defined(USE_SWAP_128)
#define DEFAULT_Swap2 SwapBytes2_128
#if !defined(MY_CPU_X86_OR_AMD64)
#define DEFAULT_Swap4 SwapBytes4_128
#endif
#endif
#if !defined(DEFAULT_Swap2) || !defined(DEFAULT_Swap4)
#define SWAP_BASE_FUNCS_PREFIXES \
Z7_FORCE_INLINE \
static \
Z7_ATTRIB_NO_VECTOR \
void Z7_FASTCALL
#ifdef MY_CPU_64BIT
#if defined(MY_CPU_ARM64) \
&& defined(__ARM_ARCH) && (__ARM_ARCH >= 8) \
&& ( (defined(__GNUC__) && (__GNUC__ >= 4)) \
|| (defined(__clang__) && (__clang_major__ >= 4)))
#define SWAP2_64_VAR(v) asm ("rev16 %x0,%x0" : "+r" (v));
#define SWAP4_64_VAR(v) asm ("rev32 %x0,%x0" : "+r" (v));
#else // is not ARM64-GNU
#if !defined(MY_CPU_X86_OR_AMD64) || (k_SwapBytes_Mode_MIN == 0) || !defined(USE_SWAP_128)
#define SWAP2_64_VAR(v) \
v = ( 0x00ff00ff00ff00ff & (v >> 8)) \
+ ((0x00ff00ff00ff00ff & v) << 8);
/* plus gives faster code in MSVC */
#endif
#ifdef Z7_CPU_FAST_BSWAP_SUPPORTED
#define SWAP4_64_VAR(v) \
v = Z7_BSWAP64(v); \
v = Z7_ROTL64(v, 32);
#else
#define SWAP4_64_VAR(v) \
v = ( 0x000000ff000000ff & (v >> 24)) \
+ ((0x000000ff000000ff & v) << 24 ) \
+ ( 0x0000ff000000ff00 & (v >> 8)) \
+ ((0x0000ff000000ff00 & v) << 8 ) \
;
#endif
#endif // ARM64-GNU
#ifdef SWAP2_64_VAR
#define SWAP2_64(i) { \
UInt64 v = *(const UInt64 *)(const void *)(items + (i) * 4); \
SWAP2_64_VAR(v) \
*(UInt64 *)(void *)(items + (i) * 4) = v; }
SWAP_BASE_FUNCS_PREFIXES
SwapBytes2_64(CSwapUInt16 *items, const CSwapUInt16 *lim)
{
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
do
{
SWAP2_64(0) SWAP2_64(1) items += 2 * 4;
SWAP2_64(0) SWAP2_64(1) items += 2 * 4;
}
while (items != lim);
}
#define DEFAULT_Swap2 SwapBytes2_64
#if !defined(FORCE_SWAP_MODE)
#define SWAP2_DEFAULT_MODE 0
#endif
#else // !defined(SWAP2_64_VAR)
#define DEFAULT_Swap2 SwapBytes2_128
#if !defined(FORCE_SWAP_MODE)
#define SWAP2_DEFAULT_MODE 1
#endif
#endif // SWAP2_64_VAR
#define SWAP4_64(i) { \
UInt64 v = *(const UInt64 *)(const void *)(items + (i) * 2); \
SWAP4_64_VAR(v) \
*(UInt64 *)(void *)(items + (i) * 2) = v; }
SWAP_BASE_FUNCS_PREFIXES
SwapBytes4_64(CSwapUInt32 *items, const CSwapUInt32 *lim)
{
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
do
{
SWAP4_64(0) SWAP4_64(1) items += 2 * 2;
SWAP4_64(0) SWAP4_64(1) items += 2 * 2;
}
while (items != lim);
}
#define DEFAULT_Swap4 SwapBytes4_64
#else // is not 64BIT
#if defined(MY_CPU_ARM_OR_ARM64) \
&& defined(__ARM_ARCH) && (__ARM_ARCH >= 6) \
&& ( (defined(__GNUC__) && (__GNUC__ >= 4)) \
|| (defined(__clang__) && (__clang_major__ >= 4)))
#ifdef MY_CPU_64BIT
#define SWAP2_32_VAR(v) asm ("rev16 %w0,%w0" : "+r" (v));
#else
#define SWAP2_32_VAR(v) asm ("rev16 %0,%0" : "+r" (v)); // for clang/gcc
// asm ("rev16 %r0,%r0" : "+r" (a)); // for gcc
#endif
#elif defined(_MSC_VER) && (_MSC_VER < 1300) && defined(MY_CPU_X86) \
|| !defined(Z7_CPU_FAST_BSWAP_SUPPORTED) \
|| !defined(Z7_CPU_FAST_ROTATE_SUPPORTED)
// old msvc doesn't support _byteswap_ulong()
#define SWAP2_32_VAR(v) \
v = ((v & 0xff00ff) << 8) + ((v >> 8) & 0xff00ff);
#else // is not ARM and is not old-MSVC-X86 and fast BSWAP/ROTATE are supported
#define SWAP2_32_VAR(v) \
v = Z7_BSWAP32(v); \
v = rotlFixed(v, 16);
#endif // GNU-ARM*
#define SWAP2_32(i) { \
UInt32 v = *(const UInt32 *)(const void *)(items + (i) * 2); \
SWAP2_32_VAR(v); \
*(UInt32 *)(void *)(items + (i) * 2) = v; }
SWAP_BASE_FUNCS_PREFIXES
SwapBytes2_32(CSwapUInt16 *items, const CSwapUInt16 *lim)
{
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
do
{
SWAP2_32(0) SWAP2_32(1) items += 2 * 2;
SWAP2_32(0) SWAP2_32(1) items += 2 * 2;
}
while (items != lim);
}
SWAP_BASE_FUNCS_PREFIXES
SwapBytes4_32(CSwapUInt32 *items, const CSwapUInt32 *lim)
{
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
do
{
SWAP4_32(0) SWAP4_32(1) items += 2;
SWAP4_32(0) SWAP4_32(1) items += 2;
}
while (items != lim);
}
#define DEFAULT_Swap2 SwapBytes2_32
#define DEFAULT_Swap4 SwapBytes4_32
#if !defined(FORCE_SWAP_MODE)
#define SWAP2_DEFAULT_MODE 0
#endif
#endif // MY_CPU_64BIT
#endif // if !defined(DEFAULT_Swap2) || !defined(DEFAULT_Swap4)
#if !defined(FORCE_SWAP_MODE)
static unsigned g_SwapBytes_Mode;
#endif
/* size of largest unrolled loop iteration: 128 bytes = 4 * 32 bytes (AVX). */
#define SWAP_ITERATION_BLOCK_SIZE_MAX (1 << 7)
// 32 bytes for (AVX) or 2 * 16-bytes for NEON.
#define SWAP_VECTOR_ALIGN_SIZE (1 << 5)
Z7_NO_INLINE
void z7_SwapBytes2(CSwapUInt16 *items, size_t numItems)
{
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (SWAP_VECTOR_ALIGN_SIZE - 1)) != 0; numItems--)
{
SWAP2_16(0)
items++;
}
{
const size_t k_Align_Mask = SWAP_ITERATION_BLOCK_SIZE_MAX / sizeof(CSwapUInt16) - 1;
size_t numItems2 = numItems;
CSwapUInt16 *lim;
numItems &= k_Align_Mask;
numItems2 &= ~(size_t)k_Align_Mask;
lim = items + numItems2;
if (numItems2 != 0)
{
#if !defined(FORCE_SWAP_MODE)
#ifdef MY_CPU_X86_OR_AMD64
#ifdef USE_SWAP_AVX2
if (g_SwapBytes_Mode > k_SwapBytes_Mode_SSSE3)
ShufBytes_256((__m256i *)(void *)items,
(const __m256i *)(const void *)lim,
(const __m128i *)(const void *)&(k_ShufMask_Swap2[0]));
else
#endif
#ifdef USE_SWAP_SSSE3
if (g_SwapBytes_Mode >= k_SwapBytes_Mode_SSSE3)
ShufBytes_128((__m128i *)(void *)items,
(const __m128i *)(const void *)lim,
(const __m128i *)(const void *)&(k_ShufMask_Swap2[0]));
else
#endif
#endif // MY_CPU_X86_OR_AMD64
#if SWAP2_DEFAULT_MODE == 0
if (g_SwapBytes_Mode != 0)
SwapBytes2_128(items, lim);
else
#endif
#endif // FORCE_SWAP_MODE
DEFAULT_Swap2(items, lim);
}
items = lim;
}
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
for (; numItems != 0; numItems--)
{
SWAP2_16(0)
items++;
}
}
Z7_NO_INLINE
void z7_SwapBytes4(CSwapUInt32 *items, size_t numItems)
{
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (SWAP_VECTOR_ALIGN_SIZE - 1)) != 0; numItems--)
{
SWAP4_32(0)
items++;
}
{
const size_t k_Align_Mask = SWAP_ITERATION_BLOCK_SIZE_MAX / sizeof(CSwapUInt32) - 1;
size_t numItems2 = numItems;
CSwapUInt32 *lim;
numItems &= k_Align_Mask;
numItems2 &= ~(size_t)k_Align_Mask;
lim = items + numItems2;
if (numItems2 != 0)
{
#if !defined(FORCE_SWAP_MODE)
#ifdef MY_CPU_X86_OR_AMD64
#ifdef USE_SWAP_AVX2
if (g_SwapBytes_Mode > k_SwapBytes_Mode_SSSE3)
ShufBytes_256((__m256i *)(void *)items,
(const __m256i *)(const void *)lim,
(const __m128i *)(const void *)&(k_ShufMask_Swap4[0]));
else
#endif
#ifdef USE_SWAP_SSSE3
if (g_SwapBytes_Mode >= k_SwapBytes_Mode_SSSE3)
ShufBytes_128((__m128i *)(void *)items,
(const __m128i *)(const void *)lim,
(const __m128i *)(const void *)&(k_ShufMask_Swap4[0]));
else
#endif
#else // MY_CPU_X86_OR_AMD64
if (g_SwapBytes_Mode != 0)
SwapBytes4_128(items, lim);
else
#endif // MY_CPU_X86_OR_AMD64
#endif // FORCE_SWAP_MODE
DEFAULT_Swap4(items, lim);
}
items = lim;
}
Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
for (; numItems != 0; numItems--)
{
SWAP4_32(0)
items++;
}
}
// #define SHOW_HW_STATUS
#ifdef SHOW_HW_STATUS
#include <stdio.h>
#define PRF(x) x
#else
#define PRF(x)
#endif
void z7_SwapBytesPrepare(void)
{
#ifndef FORCE_SWAP_MODE
unsigned mode = 0; // k_SwapBytes_Mode_BASE;
#ifdef MY_CPU_ARM_OR_ARM64
{
if (CPU_IsSupported_NEON())
{
// #pragma message ("=== SwapBytes NEON")
PRF(printf("\n=== SwapBytes NEON\n");)
mode = k_SwapBytes_Mode_NEON;
}
}
#else // MY_CPU_ARM_OR_ARM64
{
#ifdef USE_SWAP_AVX2
if (CPU_IsSupported_AVX2())
{
// #pragma message ("=== SwapBytes AVX2")
PRF(printf("\n=== SwapBytes AVX2\n");)
mode = k_SwapBytes_Mode_AVX2;
}
else
#endif
#ifdef USE_SWAP_SSSE3
if (CPU_IsSupported_SSSE3())
{
// #pragma message ("=== SwapBytes SSSE3")
PRF(printf("\n=== SwapBytes SSSE3\n");)
mode = k_SwapBytes_Mode_SSSE3;
}
else
#endif
#if !defined(MY_CPU_AMD64)
if (CPU_IsSupported_SSE2())
#endif
{
// #pragma message ("=== SwapBytes SSE2")
PRF(printf("\n=== SwapBytes SSE2\n");)
mode = k_SwapBytes_Mode_SSE2;
}
}
#endif // MY_CPU_ARM_OR_ARM64
g_SwapBytes_Mode = mode;
// g_SwapBytes_Mode = 0; // for debug
#endif // FORCE_SWAP_MODE
PRF(printf("\n=== SwapBytesPrepare\n");)
}
#undef PRF

17
3rdparty/7z/src/SwapBytes.h vendored Normal file
View file

@ -0,0 +1,17 @@
/* SwapBytes.h -- Byte Swap conversion filter
2023-04-02 : Igor Pavlov : Public domain */
#ifndef ZIP7_INC_SWAP_BYTES_H
#define ZIP7_INC_SWAP_BYTES_H
#include "7zTypes.h"
EXTERN_C_BEGIN
void z7_SwapBytes2(UInt16 *data, size_t numItems);
void z7_SwapBytes4(UInt32 *data, size_t numItems);
void z7_SwapBytesPrepare(void);
EXTERN_C_END
#endif

View file

@ -1,5 +1,5 @@
/* Threads.c -- multithreading library /* Threads.c -- multithreading library
2021-12-21 : Igor Pavlov : Public domain */ 2023-03-04 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -11,9 +11,9 @@
#include "Threads.h" #include "Threads.h"
static WRes GetError() static WRes GetError(void)
{ {
DWORD res = GetLastError(); const DWORD res = GetLastError();
return res ? (WRes)res : 1; return res ? (WRes)res : 1;
} }
@ -173,6 +173,9 @@ WRes CriticalSection_Init(CCriticalSection *p)
Windows XP, 2003 : can raise a STATUS_NO_MEMORY exception Windows XP, 2003 : can raise a STATUS_NO_MEMORY exception
Windows Vista+ : no exceptions */ Windows Vista+ : no exceptions */
#ifdef _MSC_VER #ifdef _MSC_VER
#ifdef __clang__
#pragma GCC diagnostic ignored "-Wlanguage-extension-token"
#endif
__try __try
#endif #endif
{ {
@ -193,18 +196,26 @@ WRes CriticalSection_Init(CCriticalSection *p)
// ---------- POSIX ---------- // ---------- POSIX ----------
#ifndef __APPLE__ #ifndef __APPLE__
#ifndef _7ZIP_AFFINITY_DISABLE #ifndef Z7_AFFINITY_DISABLE
// _GNU_SOURCE can be required for pthread_setaffinity_np() / CPU_ZERO / CPU_SET // _GNU_SOURCE can be required for pthread_setaffinity_np() / CPU_ZERO / CPU_SET
// clang < 3.6 : unknown warning group '-Wreserved-id-macro'
// clang 3.6 - 12.01 : gives warning "macro name is a reserved identifier"
// clang >= 13 : do not give warning
#if !defined(_GNU_SOURCE)
#if defined(__clang__) && (__clang_major__ >= 4) && (__clang_major__ <= 12)
#pragma GCC diagnostic ignored "-Wreserved-id-macro"
#endif
#define _GNU_SOURCE #define _GNU_SOURCE
#endif #endif // !defined(_GNU_SOURCE)
#endif #endif // Z7_AFFINITY_DISABLE
#endif // __APPLE__
#include "Threads.h" #include "Threads.h"
#include <errno.h> #include <errno.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#ifdef _7ZIP_AFFINITY_SUPPORTED #ifdef Z7_AFFINITY_SUPPORTED
// #include <sched.h> // #include <sched.h>
#endif #endif
@ -212,15 +223,12 @@ WRes CriticalSection_Init(CCriticalSection *p)
// #include <stdio.h> // #include <stdio.h>
// #define PRF(p) p // #define PRF(p) p
#define PRF(p) #define PRF(p)
#define Print(s) PRF(printf("\n%s\n", s);)
#define Print(s) PRF(printf("\n%s\n", s))
// #include <stdio.h>
WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet) WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet)
{ {
// new thread in Posix probably inherits affinity from parrent thread // new thread in Posix probably inherits affinity from parrent thread
Print("Thread_Create_With_CpuSet"); Print("Thread_Create_With_CpuSet")
pthread_attr_t attr; pthread_attr_t attr;
int ret; int ret;
@ -228,7 +236,7 @@ WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param,
p->_created = 0; p->_created = 0;
RINOK(pthread_attr_init(&attr)); RINOK(pthread_attr_init(&attr))
ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
@ -236,7 +244,7 @@ WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param,
{ {
if (cpuSet) if (cpuSet)
{ {
#ifdef _7ZIP_AFFINITY_SUPPORTED #ifdef Z7_AFFINITY_SUPPORTED
/* /*
printf("\n affinity :"); printf("\n affinity :");
@ -292,7 +300,7 @@ WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param)
WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity) WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity)
{ {
Print("Thread_Create_WithAffinity"); Print("Thread_Create_WithAffinity")
CCpuSet cs; CCpuSet cs;
unsigned i; unsigned i;
CpuSet_Zero(&cs); CpuSet_Zero(&cs);
@ -312,7 +320,7 @@ WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param
WRes Thread_Close(CThread *p) WRes Thread_Close(CThread *p)
{ {
// Print("Thread_Close"); // Print("Thread_Close")
int ret; int ret;
if (!p->_created) if (!p->_created)
return 0; return 0;
@ -326,7 +334,7 @@ WRes Thread_Close(CThread *p)
WRes Thread_Wait_Close(CThread *p) WRes Thread_Wait_Close(CThread *p)
{ {
// Print("Thread_Wait_Close"); // Print("Thread_Wait_Close")
void *thread_return; void *thread_return;
int ret; int ret;
if (!p->_created) if (!p->_created)
@ -343,8 +351,8 @@ WRes Thread_Wait_Close(CThread *p)
static WRes Event_Create(CEvent *p, int manualReset, int signaled) static WRes Event_Create(CEvent *p, int manualReset, int signaled)
{ {
RINOK(pthread_mutex_init(&p->_mutex, NULL)); RINOK(pthread_mutex_init(&p->_mutex, NULL))
RINOK(pthread_cond_init(&p->_cond, NULL)); RINOK(pthread_cond_init(&p->_cond, NULL))
p->_manual_reset = manualReset; p->_manual_reset = manualReset;
p->_state = (signaled ? True : False); p->_state = (signaled ? True : False);
p->_created = 1; p->_created = 1;
@ -363,7 +371,7 @@ WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p)
WRes Event_Set(CEvent *p) WRes Event_Set(CEvent *p)
{ {
RINOK(pthread_mutex_lock(&p->_mutex)); RINOK(pthread_mutex_lock(&p->_mutex))
p->_state = True; p->_state = True;
int res1 = pthread_cond_broadcast(&p->_cond); int res1 = pthread_cond_broadcast(&p->_cond);
int res2 = pthread_mutex_unlock(&p->_mutex); int res2 = pthread_mutex_unlock(&p->_mutex);
@ -372,14 +380,14 @@ WRes Event_Set(CEvent *p)
WRes Event_Reset(CEvent *p) WRes Event_Reset(CEvent *p)
{ {
RINOK(pthread_mutex_lock(&p->_mutex)); RINOK(pthread_mutex_lock(&p->_mutex))
p->_state = False; p->_state = False;
return pthread_mutex_unlock(&p->_mutex); return pthread_mutex_unlock(&p->_mutex);
} }
WRes Event_Wait(CEvent *p) WRes Event_Wait(CEvent *p)
{ {
RINOK(pthread_mutex_lock(&p->_mutex)); RINOK(pthread_mutex_lock(&p->_mutex))
while (p->_state == False) while (p->_state == False)
{ {
// ETIMEDOUT // ETIMEDOUT
@ -411,8 +419,8 @@ WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
{ {
if (initCount > maxCount || maxCount < 1) if (initCount > maxCount || maxCount < 1)
return EINVAL; return EINVAL;
RINOK(pthread_mutex_init(&p->_mutex, NULL)); RINOK(pthread_mutex_init(&p->_mutex, NULL))
RINOK(pthread_cond_init(&p->_cond, NULL)); RINOK(pthread_cond_init(&p->_cond, NULL))
p->_count = initCount; p->_count = initCount;
p->_maxCount = maxCount; p->_maxCount = maxCount;
p->_created = 1; p->_created = 1;
@ -448,7 +456,7 @@ WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 releaseCount)
if (releaseCount < 1) if (releaseCount < 1)
return EINVAL; return EINVAL;
RINOK(pthread_mutex_lock(&p->_mutex)); RINOK(pthread_mutex_lock(&p->_mutex))
newCount = p->_count + releaseCount; newCount = p->_count + releaseCount;
if (newCount > p->_maxCount) if (newCount > p->_maxCount)
@ -458,13 +466,13 @@ WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 releaseCount)
p->_count = newCount; p->_count = newCount;
ret = pthread_cond_broadcast(&p->_cond); ret = pthread_cond_broadcast(&p->_cond);
} }
RINOK(pthread_mutex_unlock(&p->_mutex)); RINOK(pthread_mutex_unlock(&p->_mutex))
return ret; return ret;
} }
WRes Semaphore_Wait(CSemaphore *p) WRes Semaphore_Wait(CSemaphore *p)
{ {
RINOK(pthread_mutex_lock(&p->_mutex)); RINOK(pthread_mutex_lock(&p->_mutex))
while (p->_count < 1) while (p->_count < 1)
{ {
pthread_cond_wait(&p->_cond, &p->_mutex); pthread_cond_wait(&p->_cond, &p->_mutex);
@ -489,7 +497,7 @@ WRes Semaphore_Close(CSemaphore *p)
WRes CriticalSection_Init(CCriticalSection *p) WRes CriticalSection_Init(CCriticalSection *p)
{ {
// Print("CriticalSection_Init"); // Print("CriticalSection_Init")
if (!p) if (!p)
return EINTR; return EINTR;
return pthread_mutex_init(&p->_mutex, NULL); return pthread_mutex_init(&p->_mutex, NULL);
@ -497,7 +505,7 @@ WRes CriticalSection_Init(CCriticalSection *p)
void CriticalSection_Enter(CCriticalSection *p) void CriticalSection_Enter(CCriticalSection *p)
{ {
// Print("CriticalSection_Enter"); // Print("CriticalSection_Enter")
if (p) if (p)
{ {
// int ret = // int ret =
@ -507,7 +515,7 @@ void CriticalSection_Enter(CCriticalSection *p)
void CriticalSection_Leave(CCriticalSection *p) void CriticalSection_Leave(CCriticalSection *p)
{ {
// Print("CriticalSection_Leave"); // Print("CriticalSection_Leave")
if (p) if (p)
{ {
// int ret = // int ret =
@ -517,7 +525,7 @@ void CriticalSection_Leave(CCriticalSection *p)
void CriticalSection_Delete(CCriticalSection *p) void CriticalSection_Delete(CCriticalSection *p)
{ {
// Print("CriticalSection_Delete"); // Print("CriticalSection_Delete")
if (p) if (p)
{ {
// int ret = // int ret =
@ -527,14 +535,28 @@ void CriticalSection_Delete(CCriticalSection *p)
LONG InterlockedIncrement(LONG volatile *addend) LONG InterlockedIncrement(LONG volatile *addend)
{ {
// Print("InterlockedIncrement"); // Print("InterlockedIncrement")
#ifdef USE_HACK_UNSAFE_ATOMIC #ifdef USE_HACK_UNSAFE_ATOMIC
LONG val = *addend + 1; LONG val = *addend + 1;
*addend = val; *addend = val;
return val; return val;
#else #else
#if defined(__clang__) && (__clang_major__ >= 8)
#pragma GCC diagnostic ignored "-Watomic-implicit-seq-cst"
#endif
return __sync_add_and_fetch(addend, 1); return __sync_add_and_fetch(addend, 1);
#endif #endif
} }
#endif // _WIN32 #endif // _WIN32
WRes AutoResetEvent_OptCreate_And_Reset(CAutoResetEvent *p)
{
if (Event_IsCreated(p))
return Event_Reset(p);
return AutoResetEvent_CreateNotSignaled(p);
}
#undef PRF
#undef Print

View file

@ -1,18 +1,19 @@
/* Threads.h -- multithreading library /* Threads.h -- multithreading library
2021-12-21 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#ifndef __7Z_THREADS_H #ifndef ZIP7_INC_THREADS_H
#define __7Z_THREADS_H #define ZIP7_INC_THREADS_H
#ifdef _WIN32 #ifdef _WIN32
#include <Windows.h> #include "7zWindows.h"
#else #else
#if defined(__linux__) #if defined(__linux__)
#if !defined(__APPLE__) && !defined(_AIX) && !defined(__ANDROID__) #if !defined(__APPLE__) && !defined(_AIX) && !defined(__ANDROID__)
#ifndef _7ZIP_AFFINITY_DISABLE #ifndef Z7_AFFINITY_DISABLE
#define _7ZIP_AFFINITY_SUPPORTED #define Z7_AFFINITY_SUPPORTED
// #pragma message(" ==== _7ZIP_AFFINITY_SUPPORTED") // #pragma message(" ==== Z7_AFFINITY_SUPPORTED")
// #define _GNU_SOURCE // #define _GNU_SOURCE
#endif #endif
#endif #endif
@ -33,7 +34,7 @@ WRes Handle_WaitObject(HANDLE h);
typedef HANDLE CThread; typedef HANDLE CThread;
#define Thread_Construct(p) { *(p) = NULL; } #define Thread_CONSTRUCT(p) { *(p) = NULL; }
#define Thread_WasCreated(p) (*(p) != NULL) #define Thread_WasCreated(p) (*(p) != NULL)
#define Thread_Close(p) HandlePtr_Close(p) #define Thread_Close(p) HandlePtr_Close(p)
// #define Thread_Wait(p) Handle_WaitObject(*(p)) // #define Thread_Wait(p) Handle_WaitObject(*(p))
@ -52,42 +53,46 @@ typedef
#endif #endif
THREAD_FUNC_RET_TYPE; THREAD_FUNC_RET_TYPE;
#define THREAD_FUNC_RET_ZERO 0
typedef DWORD_PTR CAffinityMask; typedef DWORD_PTR CAffinityMask;
typedef DWORD_PTR CCpuSet; typedef DWORD_PTR CCpuSet;
#define CpuSet_Zero(p) { *(p) = 0; } #define CpuSet_Zero(p) *(p) = (0)
#define CpuSet_Set(p, cpu) { *(p) |= ((DWORD_PTR)1 << (cpu)); } #define CpuSet_Set(p, cpu) *(p) |= ((DWORD_PTR)1 << (cpu))
#else // _WIN32 #else // _WIN32
typedef struct _CThread typedef struct
{ {
pthread_t _tid; pthread_t _tid;
int _created; int _created;
} CThread; } CThread;
#define Thread_Construct(p) { (p)->_tid = 0; (p)->_created = 0; } #define Thread_CONSTRUCT(p) { (p)->_tid = 0; (p)->_created = 0; }
#define Thread_WasCreated(p) ((p)->_created != 0) #define Thread_WasCreated(p) ((p)->_created != 0)
WRes Thread_Close(CThread *p); WRes Thread_Close(CThread *p);
// #define Thread_Wait Thread_Wait_Close // #define Thread_Wait Thread_Wait_Close
typedef void * THREAD_FUNC_RET_TYPE; typedef void * THREAD_FUNC_RET_TYPE;
#define THREAD_FUNC_RET_ZERO NULL
typedef UInt64 CAffinityMask; typedef UInt64 CAffinityMask;
#ifdef _7ZIP_AFFINITY_SUPPORTED #ifdef Z7_AFFINITY_SUPPORTED
typedef cpu_set_t CCpuSet; typedef cpu_set_t CCpuSet;
#define CpuSet_Zero(p) CPU_ZERO(p) #define CpuSet_Zero(p) CPU_ZERO(p)
#define CpuSet_Set(p, cpu) CPU_SET(cpu, p) #define CpuSet_Set(p, cpu) CPU_SET(cpu, p)
#define CpuSet_IsSet(p, cpu) CPU_ISSET(cpu, p) #define CpuSet_IsSet(p, cpu) CPU_ISSET(cpu, p)
#else #else
typedef UInt64 CCpuSet; typedef UInt64 CCpuSet;
#define CpuSet_Zero(p) { *(p) = 0; } #define CpuSet_Zero(p) *(p) = (0)
#define CpuSet_Set(p, cpu) { *(p) |= ((UInt64)1 << (cpu)); } #define CpuSet_Set(p, cpu) *(p) |= ((UInt64)1 << (cpu))
#define CpuSet_IsSet(p, cpu) ((*(p) & ((UInt64)1 << (cpu))) != 0) #define CpuSet_IsSet(p, cpu) ((*(p) & ((UInt64)1 << (cpu))) != 0)
#endif #endif
@ -95,7 +100,7 @@ typedef UInt64 CCpuSet;
#endif // _WIN32 #endif // _WIN32
#define THREAD_FUNC_CALL_TYPE MY_STD_CALL #define THREAD_FUNC_CALL_TYPE Z7_STDCALL
#if defined(_WIN32) && defined(__GNUC__) #if defined(_WIN32) && defined(__GNUC__)
/* GCC compiler for x86 32-bit uses the rule: /* GCC compiler for x86 32-bit uses the rule:
@ -187,6 +192,7 @@ WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled);
WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p); WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p);
WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled); WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled);
WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p); WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p);
WRes Event_Set(CEvent *p); WRes Event_Set(CEvent *p);
WRes Event_Reset(CEvent *p); WRes Event_Reset(CEvent *p);
WRes Event_Wait(CEvent *p); WRes Event_Wait(CEvent *p);
@ -227,6 +233,8 @@ LONG InterlockedIncrement(LONG volatile *addend);
#endif // _WIN32 #endif // _WIN32
WRes AutoResetEvent_OptCreate_And_Reset(CAutoResetEvent *p);
EXTERN_C_END EXTERN_C_END
#endif #endif

View file

@ -1,5 +1,5 @@
/* Xz.c - Xz /* Xz.c - Xz
2021-02-09 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -70,7 +70,7 @@ int XzCheck_Final(CXzCheck *p, Byte *digest)
switch (p->mode) switch (p->mode)
{ {
case XZ_CHECK_CRC32: case XZ_CHECK_CRC32:
SetUi32(digest, CRC_GET_DIGEST(p->crc)); SetUi32(digest, CRC_GET_DIGEST(p->crc))
break; break;
case XZ_CHECK_CRC64: case XZ_CHECK_CRC64:
{ {

56
3rdparty/7z/src/Xz.h vendored
View file

@ -1,21 +1,23 @@
/* Xz.h - Xz interface /* Xz.h - Xz interface
2021-04-01 : Igor Pavlov : Public domain */ 2023-04-13 : Igor Pavlov : Public domain */
#ifndef __XZ_H #ifndef ZIP7_INC_XZ_H
#define __XZ_H #define ZIP7_INC_XZ_H
#include "Sha256.h" #include "Sha256.h"
#include "Delta.h"
EXTERN_C_BEGIN EXTERN_C_BEGIN
#define XZ_ID_Subblock 1 #define XZ_ID_Subblock 1
#define XZ_ID_Delta 3 #define XZ_ID_Delta 3
#define XZ_ID_X86 4 #define XZ_ID_X86 4
#define XZ_ID_PPC 5 #define XZ_ID_PPC 5
#define XZ_ID_IA64 6 #define XZ_ID_IA64 6
#define XZ_ID_ARM 7 #define XZ_ID_ARM 7
#define XZ_ID_ARMT 8 #define XZ_ID_ARMT 8
#define XZ_ID_SPARC 9 #define XZ_ID_SPARC 9
#define XZ_ID_ARM64 0xa
#define XZ_ID_LZMA2 0x21 #define XZ_ID_LZMA2 0x21
unsigned Xz_ReadVarInt(const Byte *p, size_t maxSize, UInt64 *value); unsigned Xz_ReadVarInt(const Byte *p, size_t maxSize, UInt64 *value);
@ -53,7 +55,7 @@ typedef struct
#define XzBlock_HasUnsupportedFlags(p) (((p)->flags & ~(XZ_BF_NUM_FILTERS_MASK | XZ_BF_PACK_SIZE | XZ_BF_UNPACK_SIZE)) != 0) #define XzBlock_HasUnsupportedFlags(p) (((p)->flags & ~(XZ_BF_NUM_FILTERS_MASK | XZ_BF_PACK_SIZE | XZ_BF_UNPACK_SIZE)) != 0)
SRes XzBlock_Parse(CXzBlock *p, const Byte *header); SRes XzBlock_Parse(CXzBlock *p, const Byte *header);
SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStream *inStream, BoolInt *isIndex, UInt32 *headerSizeRes); SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStreamPtr inStream, BoolInt *isIndex, UInt32 *headerSizeRes);
/* ---------- xz stream ---------- */ /* ---------- xz stream ---------- */
@ -101,7 +103,7 @@ typedef UInt16 CXzStreamFlags;
unsigned XzFlags_GetCheckSize(CXzStreamFlags f); unsigned XzFlags_GetCheckSize(CXzStreamFlags f);
SRes Xz_ParseHeader(CXzStreamFlags *p, const Byte *buf); SRes Xz_ParseHeader(CXzStreamFlags *p, const Byte *buf);
SRes Xz_ReadHeader(CXzStreamFlags *p, ISeqInStream *inStream); SRes Xz_ReadHeader(CXzStreamFlags *p, ISeqInStreamPtr inStream);
typedef struct typedef struct
{ {
@ -112,6 +114,7 @@ typedef struct
typedef struct typedef struct
{ {
CXzStreamFlags flags; CXzStreamFlags flags;
// Byte _pad[6];
size_t numBlocks; size_t numBlocks;
CXzBlockSizes *blocks; CXzBlockSizes *blocks;
UInt64 startOffset; UInt64 startOffset;
@ -134,7 +137,7 @@ typedef struct
void Xzs_Construct(CXzs *p); void Xzs_Construct(CXzs *p);
void Xzs_Free(CXzs *p, ISzAllocPtr alloc); void Xzs_Free(CXzs *p, ISzAllocPtr alloc);
SRes Xzs_ReadBackward(CXzs *p, ILookInStream *inStream, Int64 *startOffset, ICompressProgress *progress, ISzAllocPtr alloc); SRes Xzs_ReadBackward(CXzs *p, ILookInStreamPtr inStream, Int64 *startOffset, ICompressProgressPtr progress, ISzAllocPtr alloc);
UInt64 Xzs_GetNumBlocks(const CXzs *p); UInt64 Xzs_GetNumBlocks(const CXzs *p);
UInt64 Xzs_GetUnpackSize(const CXzs *p); UInt64 Xzs_GetUnpackSize(const CXzs *p);
@ -160,9 +163,9 @@ typedef enum
} ECoderFinishMode; } ECoderFinishMode;
typedef struct _IStateCoder typedef struct
{ {
void *p; void *p; // state object;
void (*Free)(void *p, ISzAllocPtr alloc); void (*Free)(void *p, ISzAllocPtr alloc);
SRes (*SetProps)(void *p, const Byte *props, size_t propSize, ISzAllocPtr alloc); SRes (*SetProps)(void *p, const Byte *props, size_t propSize, ISzAllocPtr alloc);
void (*Init)(void *p); void (*Init)(void *p);
@ -174,6 +177,20 @@ typedef struct _IStateCoder
} IStateCoder; } IStateCoder;
typedef struct
{
UInt32 methodId;
UInt32 delta;
UInt32 ip;
UInt32 X86_State;
Byte delta_State[DELTA_STATE_SIZE];
} CXzBcFilterStateBase;
typedef SizeT (*Xz_Func_BcFilterStateBase_Filter)(CXzBcFilterStateBase *p, Byte *data, SizeT size);
SRes Xz_StateCoder_Bc_SetFromMethod_Func(IStateCoder *p, UInt64 id,
Xz_Func_BcFilterStateBase_Filter func, ISzAllocPtr alloc);
#define MIXCODER_NUM_FILTERS_MAX 4 #define MIXCODER_NUM_FILTERS_MAX 4
@ -422,7 +439,7 @@ typedef struct
size_t outStep_ST; // size of output buffer for Single-Thread decoding size_t outStep_ST; // size of output buffer for Single-Thread decoding
BoolInt ignoreErrors; // if set to 1, the decoder can ignore some errors and it skips broken parts of data. BoolInt ignoreErrors; // if set to 1, the decoder can ignore some errors and it skips broken parts of data.
#ifndef _7ZIP_ST #ifndef Z7_ST
unsigned numThreads; // the number of threads for Multi-Thread decoding. if (umThreads == 1) it will use Single-thread decoding unsigned numThreads; // the number of threads for Multi-Thread decoding. if (umThreads == 1) it will use Single-thread decoding
size_t inBufSize_MT; // size of small input data buffers for Multi-Thread decoding. Big number of such small buffers can be created size_t inBufSize_MT; // size of small input data buffers for Multi-Thread decoding. Big number of such small buffers can be created
size_t memUseMax; // the limit of total memory usage for Multi-Thread decoding. size_t memUseMax; // the limit of total memory usage for Multi-Thread decoding.
@ -432,8 +449,9 @@ typedef struct
void XzDecMtProps_Init(CXzDecMtProps *p); void XzDecMtProps_Init(CXzDecMtProps *p);
typedef struct CXzDecMt CXzDecMt;
typedef void * CXzDecMtHandle; typedef CXzDecMt * CXzDecMtHandle;
// Z7_DECLARE_HANDLE(CXzDecMtHandle)
/* /*
alloc : XzDecMt uses CAlignOffsetAlloc internally for addresses allocated by (alloc). alloc : XzDecMt uses CAlignOffsetAlloc internally for addresses allocated by (alloc).
@ -503,14 +521,14 @@ SRes XzDecMt_Decode(CXzDecMtHandle p,
const CXzDecMtProps *props, const CXzDecMtProps *props,
const UInt64 *outDataSize, // NULL means undefined const UInt64 *outDataSize, // NULL means undefined
int finishMode, // 0 - partial unpacking is allowed, 1 - xz stream(s) must be finished int finishMode, // 0 - partial unpacking is allowed, 1 - xz stream(s) must be finished
ISeqOutStream *outStream, ISeqOutStreamPtr outStream,
// Byte *outBuf, size_t *outBufSize, // Byte *outBuf, size_t *outBufSize,
ISeqInStream *inStream, ISeqInStreamPtr inStream,
// const Byte *inData, size_t inDataSize, // const Byte *inData, size_t inDataSize,
CXzStatInfo *stat, // out: decoding results and statistics CXzStatInfo *stat, // out: decoding results and statistics
int *isMT, // out: 0 means that ST (Single-Thread) version was used int *isMT, // out: 0 means that ST (Single-Thread) version was used
// 1 means that MT (Multi-Thread) version was used // 1 means that MT (Multi-Thread) version was used
ICompressProgress *progress); ICompressProgressPtr progress);
EXTERN_C_END EXTERN_C_END

View file

@ -1,5 +1,5 @@
/* XzCrc64.c -- CRC64 calculation /* XzCrc64.c -- CRC64 calculation
2017-05-23 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -12,39 +12,30 @@
#define CRC64_NUM_TABLES 4 #define CRC64_NUM_TABLES 4
#else #else
#define CRC64_NUM_TABLES 5 #define CRC64_NUM_TABLES 5
#define CRC_UINT64_SWAP(v) \
((v >> 56) \
| ((v >> 40) & ((UInt64)0xFF << 8)) \
| ((v >> 24) & ((UInt64)0xFF << 16)) \
| ((v >> 8) & ((UInt64)0xFF << 24)) \
| ((v << 8) & ((UInt64)0xFF << 32)) \
| ((v << 24) & ((UInt64)0xFF << 40)) \
| ((v << 40) & ((UInt64)0xFF << 48)) \
| ((v << 56)))
UInt64 MY_FAST_CALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table); UInt64 Z7_FASTCALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table);
#endif #endif
#ifndef MY_CPU_BE #ifndef MY_CPU_BE
UInt64 MY_FAST_CALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table); UInt64 Z7_FASTCALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table);
#endif #endif
typedef UInt64 (MY_FAST_CALL *CRC64_FUNC)(UInt64 v, const void *data, size_t size, const UInt64 *table); typedef UInt64 (Z7_FASTCALL *CRC64_FUNC)(UInt64 v, const void *data, size_t size, const UInt64 *table);
static CRC64_FUNC g_Crc64Update; static CRC64_FUNC g_Crc64Update;
UInt64 g_Crc64Table[256 * CRC64_NUM_TABLES]; UInt64 g_Crc64Table[256 * CRC64_NUM_TABLES];
UInt64 MY_FAST_CALL Crc64Update(UInt64 v, const void *data, size_t size) UInt64 Z7_FASTCALL Crc64Update(UInt64 v, const void *data, size_t size)
{ {
return g_Crc64Update(v, data, size, g_Crc64Table); return g_Crc64Update(v, data, size, g_Crc64Table);
} }
UInt64 MY_FAST_CALL Crc64Calc(const void *data, size_t size) UInt64 Z7_FASTCALL Crc64Calc(const void *data, size_t size)
{ {
return g_Crc64Update(CRC64_INIT_VAL, data, size, g_Crc64Table) ^ CRC64_INIT_VAL; return g_Crc64Update(CRC64_INIT_VAL, data, size, g_Crc64Table) ^ CRC64_INIT_VAL;
} }
void MY_FAST_CALL Crc64GenerateTable() void Z7_FASTCALL Crc64GenerateTable(void)
{ {
UInt32 i; UInt32 i;
for (i = 0; i < 256; i++) for (i = 0; i < 256; i++)
@ -57,7 +48,7 @@ void MY_FAST_CALL Crc64GenerateTable()
} }
for (i = 256; i < 256 * CRC64_NUM_TABLES; i++) for (i = 256; i < 256 * CRC64_NUM_TABLES; i++)
{ {
UInt64 r = g_Crc64Table[(size_t)i - 256]; const UInt64 r = g_Crc64Table[(size_t)i - 256];
g_Crc64Table[i] = g_Crc64Table[r & 0xFF] ^ (r >> 8); g_Crc64Table[i] = g_Crc64Table[r & 0xFF] ^ (r >> 8);
} }
@ -76,11 +67,14 @@ void MY_FAST_CALL Crc64GenerateTable()
{ {
for (i = 256 * CRC64_NUM_TABLES - 1; i >= 256; i--) for (i = 256 * CRC64_NUM_TABLES - 1; i >= 256; i--)
{ {
UInt64 x = g_Crc64Table[(size_t)i - 256]; const UInt64 x = g_Crc64Table[(size_t)i - 256];
g_Crc64Table[i] = CRC_UINT64_SWAP(x); g_Crc64Table[i] = Z7_BSWAP64(x);
} }
g_Crc64Update = XzCrc64UpdateT1_BeT4; g_Crc64Update = XzCrc64UpdateT1_BeT4;
} }
} }
#endif #endif
} }
#undef kCrc64Poly
#undef CRC64_NUM_TABLES

View file

@ -1,8 +1,8 @@
/* XzCrc64.h -- CRC64 calculation /* XzCrc64.h -- CRC64 calculation
2013-01-18 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#ifndef __XZ_CRC64_H #ifndef ZIP7_INC_XZ_CRC64_H
#define __XZ_CRC64_H #define ZIP7_INC_XZ_CRC64_H
#include <stddef.h> #include <stddef.h>
@ -12,14 +12,14 @@ EXTERN_C_BEGIN
extern UInt64 g_Crc64Table[]; extern UInt64 g_Crc64Table[];
void MY_FAST_CALL Crc64GenerateTable(void); void Z7_FASTCALL Crc64GenerateTable(void);
#define CRC64_INIT_VAL UINT64_CONST(0xFFFFFFFFFFFFFFFF) #define CRC64_INIT_VAL UINT64_CONST(0xFFFFFFFFFFFFFFFF)
#define CRC64_GET_DIGEST(crc) ((crc) ^ CRC64_INIT_VAL) #define CRC64_GET_DIGEST(crc) ((crc) ^ CRC64_INIT_VAL)
#define CRC64_UPDATE_BYTE(crc, b) (g_Crc64Table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) #define CRC64_UPDATE_BYTE(crc, b) (g_Crc64Table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
UInt64 MY_FAST_CALL Crc64Update(UInt64 crc, const void *data, size_t size); UInt64 Z7_FASTCALL Crc64Update(UInt64 crc, const void *data, size_t size);
UInt64 MY_FAST_CALL Crc64Calc(const void *data, size_t size); UInt64 Z7_FASTCALL Crc64Calc(const void *data, size_t size);
EXTERN_C_END EXTERN_C_END

View file

@ -1,5 +1,5 @@
/* XzCrc64Opt.c -- CRC64 calculation /* XzCrc64Opt.c -- CRC64 calculation
2021-02-09 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -9,15 +9,15 @@
#define CRC64_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) #define CRC64_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
UInt64 MY_FAST_CALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table); UInt64 Z7_FASTCALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table);
UInt64 MY_FAST_CALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table) UInt64 Z7_FASTCALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, const UInt64 *table)
{ {
const Byte *p = (const Byte *)data; const Byte *p = (const Byte *)data;
for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++) for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++)
v = CRC64_UPDATE_BYTE_2(v, *p); v = CRC64_UPDATE_BYTE_2(v, *p);
for (; size >= 4; size -= 4, p += 4) for (; size >= 4; size -= 4, p += 4)
{ {
UInt32 d = (UInt32)v ^ *(const UInt32 *)(const void *)p; const UInt32 d = (UInt32)v ^ *(const UInt32 *)(const void *)p;
v = (v >> 32) v = (v >> 32)
^ (table + 0x300)[((d ) & 0xFF)] ^ (table + 0x300)[((d ) & 0xFF)]
^ (table + 0x200)[((d >> 8) & 0xFF)] ^ (table + 0x200)[((d >> 8) & 0xFF)]
@ -34,29 +34,19 @@ UInt64 MY_FAST_CALL XzCrc64UpdateT4(UInt64 v, const void *data, size_t size, con
#ifndef MY_CPU_LE #ifndef MY_CPU_LE
#define CRC_UINT64_SWAP(v) \
((v >> 56) \
| ((v >> 40) & ((UInt64)0xFF << 8)) \
| ((v >> 24) & ((UInt64)0xFF << 16)) \
| ((v >> 8) & ((UInt64)0xFF << 24)) \
| ((v << 8) & ((UInt64)0xFF << 32)) \
| ((v << 24) & ((UInt64)0xFF << 40)) \
| ((v << 40) & ((UInt64)0xFF << 48)) \
| ((v << 56)))
#define CRC64_UPDATE_BYTE_2_BE(crc, b) (table[(Byte)((crc) >> 56) ^ (b)] ^ ((crc) << 8)) #define CRC64_UPDATE_BYTE_2_BE(crc, b) (table[(Byte)((crc) >> 56) ^ (b)] ^ ((crc) << 8))
UInt64 MY_FAST_CALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table); UInt64 Z7_FASTCALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table);
UInt64 MY_FAST_CALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table) UInt64 Z7_FASTCALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size, const UInt64 *table)
{ {
const Byte *p = (const Byte *)data; const Byte *p = (const Byte *)data;
table += 0x100; table += 0x100;
v = CRC_UINT64_SWAP(v); v = Z7_BSWAP64(v);
for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++) for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++)
v = CRC64_UPDATE_BYTE_2_BE(v, *p); v = CRC64_UPDATE_BYTE_2_BE(v, *p);
for (; size >= 4; size -= 4, p += 4) for (; size >= 4; size -= 4, p += 4)
{ {
UInt32 d = (UInt32)(v >> 32) ^ *(const UInt32 *)(const void *)p; const UInt32 d = (UInt32)(v >> 32) ^ *(const UInt32 *)(const void *)p;
v = (v << 32) v = (v << 32)
^ (table + 0x000)[((d ) & 0xFF)] ^ (table + 0x000)[((d ) & 0xFF)]
^ (table + 0x100)[((d >> 8) & 0xFF)] ^ (table + 0x100)[((d >> 8) & 0xFF)]
@ -65,7 +55,7 @@ UInt64 MY_FAST_CALL XzCrc64UpdateT1_BeT4(UInt64 v, const void *data, size_t size
} }
for (; size > 0; size--, p++) for (; size > 0; size--, p++)
v = CRC64_UPDATE_BYTE_2_BE(v, *p); v = CRC64_UPDATE_BYTE_2_BE(v, *p);
return CRC_UINT64_SWAP(v); return Z7_BSWAP64(v);
} }
#endif #endif

View file

@ -1,5 +1,5 @@
/* XzDec.c -- Xz Decode /* XzDec.c -- Xz Decode
2021-09-04 : Igor Pavlov : Public domain */ 2023-04-13 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -67,7 +67,8 @@ unsigned Xz_ReadVarInt(const Byte *p, size_t maxSize, UInt64 *value)
return 0; return 0;
} }
/* ---------- BraState ---------- */
/* ---------- XzBcFilterState ---------- */
#define BRA_BUF_SIZE (1 << 14) #define BRA_BUF_SIZE (1 << 14)
@ -76,27 +77,29 @@ typedef struct
size_t bufPos; size_t bufPos;
size_t bufConv; size_t bufConv;
size_t bufTotal; size_t bufTotal;
Byte *buf; // must be aligned for 4 bytes
Xz_Func_BcFilterStateBase_Filter filter_func;
// int encodeMode;
CXzBcFilterStateBase base;
// Byte buf[BRA_BUF_SIZE];
} CXzBcFilterState;
int encodeMode;
UInt32 methodId; static void XzBcFilterState_Free(void *pp, ISzAllocPtr alloc)
UInt32 delta;
UInt32 ip;
UInt32 x86State;
Byte deltaState[DELTA_STATE_SIZE];
Byte buf[BRA_BUF_SIZE];
} CBraState;
static void BraState_Free(void *pp, ISzAllocPtr alloc)
{ {
ISzAlloc_Free(alloc, pp); if (pp)
{
CXzBcFilterState *p = ((CXzBcFilterState *)pp);
ISzAlloc_Free(alloc, p->buf);
ISzAlloc_Free(alloc, pp);
}
} }
static SRes BraState_SetProps(void *pp, const Byte *props, size_t propSize, ISzAllocPtr alloc)
static SRes XzBcFilterState_SetProps(void *pp, const Byte *props, size_t propSize, ISzAllocPtr alloc)
{ {
CBraState *p = ((CBraState *)pp); CXzBcFilterStateBase *p = &((CXzBcFilterState *)pp)->base;
UNUSED_VAR(alloc); UNUSED_VAR(alloc)
p->ip = 0; p->ip = 0;
if (p->methodId == XZ_ID_Delta) if (p->methodId == XZ_ID_Delta)
{ {
@ -114,6 +117,7 @@ static SRes BraState_SetProps(void *pp, const Byte *props, size_t propSize, ISzA
case XZ_ID_PPC: case XZ_ID_PPC:
case XZ_ID_ARM: case XZ_ID_ARM:
case XZ_ID_SPARC: case XZ_ID_SPARC:
case XZ_ID_ARM64:
if ((v & 3) != 0) if ((v & 3) != 0)
return SZ_ERROR_UNSUPPORTED; return SZ_ERROR_UNSUPPORTED;
break; break;
@ -134,73 +138,90 @@ static SRes BraState_SetProps(void *pp, const Byte *props, size_t propSize, ISzA
return SZ_OK; return SZ_OK;
} }
static void BraState_Init(void *pp)
static void XzBcFilterState_Init(void *pp)
{ {
CBraState *p = ((CBraState *)pp); CXzBcFilterState *p = ((CXzBcFilterState *)pp);
p->bufPos = p->bufConv = p->bufTotal = 0; p->bufPos = p->bufConv = p->bufTotal = 0;
x86_Convert_Init(p->x86State); p->base.X86_State = Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL;
if (p->methodId == XZ_ID_Delta) if (p->base.methodId == XZ_ID_Delta)
Delta_Init(p->deltaState); Delta_Init(p->base.delta_State);
} }
#define CASE_BRA_CONV(isa) case XZ_ID_ ## isa: size = isa ## _Convert(data, size, p->ip, p->encodeMode); break; static const z7_Func_BranchConv g_Funcs_BranchConv_RISC_Dec[] =
{
static SizeT BraState_Filter(void *pp, Byte *data, SizeT size) Z7_BRANCH_CONV_DEC(PPC),
Z7_BRANCH_CONV_DEC(IA64),
Z7_BRANCH_CONV_DEC(ARM),
Z7_BRANCH_CONV_DEC(ARMT),
Z7_BRANCH_CONV_DEC(SPARC),
Z7_BRANCH_CONV_DEC(ARM64)
};
static SizeT XzBcFilterStateBase_Filter_Dec(CXzBcFilterStateBase *p, Byte *data, SizeT size)
{ {
CBraState *p = ((CBraState *)pp);
switch (p->methodId) switch (p->methodId)
{ {
case XZ_ID_Delta: case XZ_ID_Delta:
if (p->encodeMode) Delta_Decode(p->delta_State, p->delta, data, size);
Delta_Encode(p->deltaState, p->delta, data, size);
else
Delta_Decode(p->deltaState, p->delta, data, size);
break; break;
case XZ_ID_X86: case XZ_ID_X86:
size = x86_Convert(data, size, p->ip, &p->x86State, p->encodeMode); size = (SizeT)(z7_BranchConvSt_X86_Dec(data, size, p->ip, &p->X86_State) - data);
break;
default:
if (p->methodId >= XZ_ID_PPC)
{
const UInt32 i = p->methodId - XZ_ID_PPC;
if (i < Z7_ARRAY_SIZE(g_Funcs_BranchConv_RISC_Dec))
size = (SizeT)(g_Funcs_BranchConv_RISC_Dec[i](data, size, p->ip) - data);
}
break; break;
CASE_BRA_CONV(PPC)
CASE_BRA_CONV(IA64)
CASE_BRA_CONV(ARM)
CASE_BRA_CONV(ARMT)
CASE_BRA_CONV(SPARC)
} }
p->ip += (UInt32)size; p->ip += (UInt32)size;
return size; return size;
} }
static SRes BraState_Code2(void *pp, static SizeT XzBcFilterState_Filter(void *pp, Byte *data, SizeT size)
{
CXzBcFilterState *p = ((CXzBcFilterState *)pp);
return p->filter_func(&p->base, data, size);
}
static SRes XzBcFilterState_Code2(void *pp,
Byte *dest, SizeT *destLen, Byte *dest, SizeT *destLen,
const Byte *src, SizeT *srcLen, int srcWasFinished, const Byte *src, SizeT *srcLen, int srcWasFinished,
ECoderFinishMode finishMode, ECoderFinishMode finishMode,
// int *wasFinished // int *wasFinished
ECoderStatus *status) ECoderStatus *status)
{ {
CBraState *p = ((CBraState *)pp); CXzBcFilterState *p = ((CXzBcFilterState *)pp);
SizeT destRem = *destLen; SizeT destRem = *destLen;
SizeT srcRem = *srcLen; SizeT srcRem = *srcLen;
UNUSED_VAR(finishMode); UNUSED_VAR(finishMode)
*destLen = 0; *destLen = 0;
*srcLen = 0; *srcLen = 0;
// *wasFinished = False; // *wasFinished = False;
*status = CODER_STATUS_NOT_FINISHED; *status = CODER_STATUS_NOT_FINISHED;
while (destRem > 0) while (destRem != 0)
{ {
if (p->bufPos != p->bufConv)
{ {
size_t size = p->bufConv - p->bufPos; size_t size = p->bufConv - p->bufPos;
if (size > destRem) if (size)
size = destRem; {
memcpy(dest, p->buf + p->bufPos, size); if (size > destRem)
p->bufPos += size; size = destRem;
*destLen += size; memcpy(dest, p->buf + p->bufPos, size);
dest += size; p->bufPos += size;
destRem -= size; *destLen += size;
continue; dest += size;
destRem -= size;
continue;
}
} }
p->bufTotal -= p->bufPos; p->bufTotal -= p->bufPos;
@ -220,7 +241,7 @@ static SRes BraState_Code2(void *pp,
if (p->bufTotal == 0) if (p->bufTotal == 0)
break; break;
p->bufConv = BraState_Filter(pp, p->buf, p->bufTotal); p->bufConv = p->filter_func(&p->base, p->buf, p->bufTotal);
if (p->bufConv == 0) if (p->bufConv == 0)
{ {
@ -240,27 +261,37 @@ static SRes BraState_Code2(void *pp,
} }
SRes BraState_SetFromMethod(IStateCoder *p, UInt64 id, int encodeMode, ISzAllocPtr alloc); #define XZ_IS_SUPPORTED_FILTER_ID(id) \
SRes BraState_SetFromMethod(IStateCoder *p, UInt64 id, int encodeMode, ISzAllocPtr alloc) ((id) >= XZ_ID_Delta && (id) <= XZ_ID_ARM64)
SRes Xz_StateCoder_Bc_SetFromMethod_Func(IStateCoder *p, UInt64 id,
Xz_Func_BcFilterStateBase_Filter func, ISzAllocPtr alloc)
{ {
CBraState *decoder; CXzBcFilterState *decoder;
if (id < XZ_ID_Delta || id > XZ_ID_SPARC) if (!XZ_IS_SUPPORTED_FILTER_ID(id))
return SZ_ERROR_UNSUPPORTED; return SZ_ERROR_UNSUPPORTED;
decoder = (CBraState *)p->p; decoder = (CXzBcFilterState *)p->p;
if (!decoder) if (!decoder)
{ {
decoder = (CBraState *)ISzAlloc_Alloc(alloc, sizeof(CBraState)); decoder = (CXzBcFilterState *)ISzAlloc_Alloc(alloc, sizeof(CXzBcFilterState));
if (!decoder) if (!decoder)
return SZ_ERROR_MEM; return SZ_ERROR_MEM;
decoder->buf = ISzAlloc_Alloc(alloc, BRA_BUF_SIZE);
if (!decoder->buf)
{
ISzAlloc_Free(alloc, decoder);
return SZ_ERROR_MEM;
}
p->p = decoder; p->p = decoder;
p->Free = BraState_Free; p->Free = XzBcFilterState_Free;
p->SetProps = BraState_SetProps; p->SetProps = XzBcFilterState_SetProps;
p->Init = BraState_Init; p->Init = XzBcFilterState_Init;
p->Code2 = BraState_Code2; p->Code2 = XzBcFilterState_Code2;
p->Filter = BraState_Filter; p->Filter = XzBcFilterState_Filter;
decoder->filter_func = func;
} }
decoder->methodId = (UInt32)id; decoder->base.methodId = (UInt32)id;
decoder->encodeMode = encodeMode; // decoder->encodeMode = encodeMode;
return SZ_OK; return SZ_OK;
} }
@ -279,9 +310,9 @@ static void SbState_Free(void *pp, ISzAllocPtr alloc)
static SRes SbState_SetProps(void *pp, const Byte *props, size_t propSize, ISzAllocPtr alloc) static SRes SbState_SetProps(void *pp, const Byte *props, size_t propSize, ISzAllocPtr alloc)
{ {
UNUSED_VAR(pp); UNUSED_VAR(pp)
UNUSED_VAR(props); UNUSED_VAR(props)
UNUSED_VAR(alloc); UNUSED_VAR(alloc)
return (propSize == 0) ? SZ_OK : SZ_ERROR_UNSUPPORTED; return (propSize == 0) ? SZ_OK : SZ_ERROR_UNSUPPORTED;
} }
@ -297,7 +328,7 @@ static SRes SbState_Code2(void *pp, Byte *dest, SizeT *destLen, const Byte *src,
{ {
CSbDec *p = (CSbDec *)pp; CSbDec *p = (CSbDec *)pp;
SRes res; SRes res;
UNUSED_VAR(srcWasFinished); UNUSED_VAR(srcWasFinished)
p->dest = dest; p->dest = dest;
p->destLen = *destLen; p->destLen = *destLen;
p->src = src; p->src = src;
@ -389,7 +420,7 @@ static SRes Lzma2State_Code2(void *pp, Byte *dest, SizeT *destLen, const Byte *s
ELzmaStatus status2; ELzmaStatus status2;
/* ELzmaFinishMode fm = (finishMode == LZMA_FINISH_ANY) ? LZMA_FINISH_ANY : LZMA_FINISH_END; */ /* ELzmaFinishMode fm = (finishMode == LZMA_FINISH_ANY) ? LZMA_FINISH_ANY : LZMA_FINISH_END; */
SRes res; SRes res;
UNUSED_VAR(srcWasFinished); UNUSED_VAR(srcWasFinished)
if (spec->outBufMode) if (spec->outBufMode)
{ {
SizeT dicPos = spec->decoder.decoder.dicPos; SizeT dicPos = spec->decoder.decoder.dicPos;
@ -420,7 +451,7 @@ static SRes Lzma2State_SetFromMethod(IStateCoder *p, Byte *outBuf, size_t outBuf
p->Init = Lzma2State_Init; p->Init = Lzma2State_Init;
p->Code2 = Lzma2State_Code2; p->Code2 = Lzma2State_Code2;
p->Filter = NULL; p->Filter = NULL;
Lzma2Dec_Construct(&spec->decoder); Lzma2Dec_CONSTRUCT(&spec->decoder)
} }
spec->outBufMode = False; spec->outBufMode = False;
if (outBuf) if (outBuf)
@ -519,7 +550,8 @@ static SRes MixCoder_SetFromMethod(CMixCoder *p, unsigned coderIndex, UInt64 met
} }
if (coderIndex == 0) if (coderIndex == 0)
return SZ_ERROR_UNSUPPORTED; return SZ_ERROR_UNSUPPORTED;
return BraState_SetFromMethod(sc, methodId, 0, p->alloc); return Xz_StateCoder_Bc_SetFromMethod_Func(sc, methodId,
XzBcFilterStateBase_Filter_Dec, p->alloc);
} }
@ -568,7 +600,7 @@ static SRes MixCoder_Code(CMixCoder *p,
SizeT destLen2, srcLen2; SizeT destLen2, srcLen2;
int wasFinished; int wasFinished;
PRF_STR("------- MixCoder Single ----------"); PRF_STR("------- MixCoder Single ----------")
srcLen2 = srcLenOrig; srcLen2 = srcLenOrig;
destLen2 = destLenOrig; destLen2 = destLenOrig;
@ -615,14 +647,14 @@ static SRes MixCoder_Code(CMixCoder *p,
processed = coder->Filter(coder->p, p->outBuf, processed); processed = coder->Filter(coder->p, p->outBuf, processed);
if (wasFinished || (destFinish && p->outWritten == destLenOrig)) if (wasFinished || (destFinish && p->outWritten == destLenOrig))
processed = p->outWritten; processed = p->outWritten;
PRF_STR_INT("filter", i); PRF_STR_INT("filter", i)
} }
*destLen = processed; *destLen = processed;
} }
return res; return res;
} }
PRF_STR("standard mix"); PRF_STR("standard mix")
if (p->numCoders != 1) if (p->numCoders != 1)
{ {
@ -779,7 +811,7 @@ static BoolInt Xz_CheckFooter(CXzStreamFlags flags, UInt64 indexSize, const Byte
static BoolInt XzBlock_AreSupportedFilters(const CXzBlock *p) static BoolInt XzBlock_AreSupportedFilters(const CXzBlock *p)
{ {
unsigned numFilters = XzBlock_GetNumFilters(p) - 1; const unsigned numFilters = XzBlock_GetNumFilters(p) - 1;
unsigned i; unsigned i;
{ {
const CXzFilter *f = &p->filters[numFilters]; const CXzFilter *f = &p->filters[numFilters];
@ -795,8 +827,7 @@ static BoolInt XzBlock_AreSupportedFilters(const CXzBlock *p)
if (f->propsSize != 1) if (f->propsSize != 1)
return False; return False;
} }
else if (f->id < XZ_ID_Delta else if (!XZ_IS_SUPPORTED_FILTER_ID(f->id)
|| f->id > XZ_ID_SPARC
|| (f->propsSize != 0 && f->propsSize != 4)) || (f->propsSize != 0 && f->propsSize != 4))
return False; return False;
} }
@ -821,22 +852,24 @@ SRes XzBlock_Parse(CXzBlock *p, const Byte *header)
p->packSize = (UInt64)(Int64)-1; p->packSize = (UInt64)(Int64)-1;
if (XzBlock_HasPackSize(p)) if (XzBlock_HasPackSize(p))
{ {
READ_VARINT_AND_CHECK(header, pos, headerSize, &p->packSize); READ_VARINT_AND_CHECK(header, pos, headerSize, &p->packSize)
if (p->packSize == 0 || p->packSize + headerSize >= (UInt64)1 << 63) if (p->packSize == 0 || p->packSize + headerSize >= (UInt64)1 << 63)
return SZ_ERROR_ARCHIVE; return SZ_ERROR_ARCHIVE;
} }
p->unpackSize = (UInt64)(Int64)-1; p->unpackSize = (UInt64)(Int64)-1;
if (XzBlock_HasUnpackSize(p)) if (XzBlock_HasUnpackSize(p))
READ_VARINT_AND_CHECK(header, pos, headerSize, &p->unpackSize); {
READ_VARINT_AND_CHECK(header, pos, headerSize, &p->unpackSize)
}
numFilters = XzBlock_GetNumFilters(p); numFilters = XzBlock_GetNumFilters(p);
for (i = 0; i < numFilters; i++) for (i = 0; i < numFilters; i++)
{ {
CXzFilter *filter = p->filters + i; CXzFilter *filter = p->filters + i;
UInt64 size; UInt64 size;
READ_VARINT_AND_CHECK(header, pos, headerSize, &filter->id); READ_VARINT_AND_CHECK(header, pos, headerSize, &filter->id)
READ_VARINT_AND_CHECK(header, pos, headerSize, &size); READ_VARINT_AND_CHECK(header, pos, headerSize, &size)
if (size > headerSize - pos || size > XZ_FILTER_PROPS_SIZE_MAX) if (size > headerSize - pos || size > XZ_FILTER_PROPS_SIZE_MAX)
return SZ_ERROR_ARCHIVE; return SZ_ERROR_ARCHIVE;
filter->propsSize = (UInt32)size; filter->propsSize = (UInt32)size;
@ -894,20 +927,20 @@ static SRes XzDecMix_Init(CMixCoder *p, const CXzBlock *block, Byte *outBuf, siz
MixCoder_Free(p); MixCoder_Free(p);
for (i = 0; i < numFilters; i++) for (i = 0; i < numFilters; i++)
{ {
RINOK(MixCoder_SetFromMethod(p, i, block->filters[numFilters - 1 - i].id, outBuf, outBufSize)); RINOK(MixCoder_SetFromMethod(p, i, block->filters[numFilters - 1 - i].id, outBuf, outBufSize))
} }
p->numCoders = numFilters; p->numCoders = numFilters;
} }
else else
{ {
RINOK(MixCoder_ResetFromMethod(p, 0, block->filters[numFilters - 1].id, outBuf, outBufSize)); RINOK(MixCoder_ResetFromMethod(p, 0, block->filters[numFilters - 1].id, outBuf, outBufSize))
} }
for (i = 0; i < numFilters; i++) for (i = 0; i < numFilters; i++)
{ {
const CXzFilter *f = &block->filters[numFilters - 1 - i]; const CXzFilter *f = &block->filters[numFilters - 1 - i];
IStateCoder *sc = &p->coders[i]; IStateCoder *sc = &p->coders[i];
RINOK(sc->SetProps(sc->p, f->props, f->propsSize, p->alloc)); RINOK(sc->SetProps(sc->p, f->props, f->propsSize, p->alloc))
} }
MixCoder_Init(p); MixCoder_Init(p);
@ -1054,14 +1087,14 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
(*destLen) += destLen2; (*destLen) += destLen2;
p->unpackSize += destLen2; p->unpackSize += destLen2;
RINOK(res); RINOK(res)
if (*status != CODER_STATUS_FINISHED_WITH_MARK) if (*status != CODER_STATUS_FINISHED_WITH_MARK)
{ {
if (p->block.packSize == p->packSize if (p->block.packSize == p->packSize
&& *status == CODER_STATUS_NEEDS_MORE_INPUT) && *status == CODER_STATUS_NEEDS_MORE_INPUT)
{ {
PRF_STR("CODER_STATUS_NEEDS_MORE_INPUT"); PRF_STR("CODER_STATUS_NEEDS_MORE_INPUT")
*status = CODER_STATUS_NOT_SPECIFIED; *status = CODER_STATUS_NOT_SPECIFIED;
return SZ_ERROR_DATA; return SZ_ERROR_DATA;
} }
@ -1078,7 +1111,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
if ((p->block.packSize != (UInt64)(Int64)-1 && p->block.packSize != p->packSize) if ((p->block.packSize != (UInt64)(Int64)-1 && p->block.packSize != p->packSize)
|| (p->block.unpackSize != (UInt64)(Int64)-1 && p->block.unpackSize != p->unpackSize)) || (p->block.unpackSize != (UInt64)(Int64)-1 && p->block.unpackSize != p->unpackSize))
{ {
PRF_STR("ERROR: block.size mismatch"); PRF_STR("ERROR: block.size mismatch")
return SZ_ERROR_DATA; return SZ_ERROR_DATA;
} }
} }
@ -1109,7 +1142,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
} }
else else
{ {
RINOK(Xz_ParseHeader(&p->streamFlags, p->buf)); RINOK(Xz_ParseHeader(&p->streamFlags, p->buf))
p->numStartedStreams++; p->numStartedStreams++;
p->indexSize = 0; p->indexSize = 0;
p->numBlocks = 0; p->numBlocks = 0;
@ -1155,7 +1188,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
} }
else else
{ {
RINOK(XzBlock_Parse(&p->block, p->buf)); RINOK(XzBlock_Parse(&p->block, p->buf))
if (!XzBlock_AreSupportedFilters(&p->block)) if (!XzBlock_AreSupportedFilters(&p->block))
return SZ_ERROR_UNSUPPORTED; return SZ_ERROR_UNSUPPORTED;
p->numTotalBlocks++; p->numTotalBlocks++;
@ -1168,7 +1201,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
p->headerParsedOk = True; p->headerParsedOk = True;
return SZ_OK; return SZ_OK;
} }
RINOK(XzDecMix_Init(&p->decoder, &p->block, p->outBuf, p->outBufSize)); RINOK(XzDecMix_Init(&p->decoder, &p->block, p->outBuf, p->outBufSize))
} }
break; break;
} }
@ -1389,7 +1422,7 @@ UInt64 XzUnpacker_GetExtraSize(const CXzUnpacker *p)
#ifndef _7ZIP_ST #ifndef Z7_ST
#include "MtDec.h" #include "MtDec.h"
#endif #endif
@ -1400,7 +1433,7 @@ void XzDecMtProps_Init(CXzDecMtProps *p)
p->outStep_ST = 1 << 20; p->outStep_ST = 1 << 20;
p->ignoreErrors = False; p->ignoreErrors = False;
#ifndef _7ZIP_ST #ifndef Z7_ST
p->numThreads = 1; p->numThreads = 1;
p->inBufSize_MT = 1 << 18; p->inBufSize_MT = 1 << 18;
p->memUseMax = sizeof(size_t) << 28; p->memUseMax = sizeof(size_t) << 28;
@ -1409,7 +1442,7 @@ void XzDecMtProps_Init(CXzDecMtProps *p)
#ifndef _7ZIP_ST #ifndef Z7_ST
/* ---------- CXzDecMtThread ---------- */ /* ---------- CXzDecMtThread ---------- */
@ -1448,7 +1481,7 @@ typedef struct
/* ---------- CXzDecMt ---------- */ /* ---------- CXzDecMt ---------- */
typedef struct struct CXzDecMt
{ {
CAlignOffsetAlloc alignOffsetAlloc; CAlignOffsetAlloc alignOffsetAlloc;
ISzAllocPtr allocMid; ISzAllocPtr allocMid;
@ -1456,9 +1489,9 @@ typedef struct
CXzDecMtProps props; CXzDecMtProps props;
size_t unpackBlockMaxSize; size_t unpackBlockMaxSize;
ISeqInStream *inStream; ISeqInStreamPtr inStream;
ISeqOutStream *outStream; ISeqOutStreamPtr outStream;
ICompressProgress *progress; ICompressProgressPtr progress;
BoolInt finishMode; BoolInt finishMode;
BoolInt outSize_Defined; BoolInt outSize_Defined;
@ -1481,7 +1514,7 @@ typedef struct
ECoderStatus status; ECoderStatus status;
SRes codeRes; SRes codeRes;
#ifndef _7ZIP_ST #ifndef Z7_ST
BoolInt mainDecoderWasCalled; BoolInt mainDecoderWasCalled;
// int statErrorDefined; // int statErrorDefined;
int finishedDecoderIndex; int finishedDecoderIndex;
@ -1504,10 +1537,9 @@ typedef struct
BoolInt mtc_WasConstructed; BoolInt mtc_WasConstructed;
CMtDec mtc; CMtDec mtc;
CXzDecMtThread coders[MTDEC__THREADS_MAX]; CXzDecMtThread coders[MTDEC_THREADS_MAX];
#endif #endif
};
} CXzDecMt;
@ -1535,11 +1567,11 @@ CXzDecMtHandle XzDecMt_Create(ISzAllocPtr alloc, ISzAllocPtr allocMid)
XzDecMtProps_Init(&p->props); XzDecMtProps_Init(&p->props);
#ifndef _7ZIP_ST #ifndef Z7_ST
p->mtc_WasConstructed = False; p->mtc_WasConstructed = False;
{ {
unsigned i; unsigned i;
for (i = 0; i < MTDEC__THREADS_MAX; i++) for (i = 0; i < MTDEC_THREADS_MAX; i++)
{ {
CXzDecMtThread *coder = &p->coders[i]; CXzDecMtThread *coder = &p->coders[i];
coder->dec_created = False; coder->dec_created = False;
@ -1549,16 +1581,16 @@ CXzDecMtHandle XzDecMt_Create(ISzAllocPtr alloc, ISzAllocPtr allocMid)
} }
#endif #endif
return p; return (CXzDecMtHandle)p;
} }
#ifndef _7ZIP_ST #ifndef Z7_ST
static void XzDecMt_FreeOutBufs(CXzDecMt *p) static void XzDecMt_FreeOutBufs(CXzDecMt *p)
{ {
unsigned i; unsigned i;
for (i = 0; i < MTDEC__THREADS_MAX; i++) for (i = 0; i < MTDEC_THREADS_MAX; i++)
{ {
CXzDecMtThread *coder = &p->coders[i]; CXzDecMtThread *coder = &p->coders[i];
if (coder->outBuf) if (coder->outBuf)
@ -1595,13 +1627,15 @@ static void XzDecMt_FreeSt(CXzDecMt *p)
} }
void XzDecMt_Destroy(CXzDecMtHandle pp) // #define GET_CXzDecMt_p CXzDecMt *p = pp;
void XzDecMt_Destroy(CXzDecMtHandle p)
{ {
CXzDecMt *p = (CXzDecMt *)pp; // GET_CXzDecMt_p
XzDecMt_FreeSt(p); XzDecMt_FreeSt(p);
#ifndef _7ZIP_ST #ifndef Z7_ST
if (p->mtc_WasConstructed) if (p->mtc_WasConstructed)
{ {
@ -1610,7 +1644,7 @@ void XzDecMt_Destroy(CXzDecMtHandle pp)
} }
{ {
unsigned i; unsigned i;
for (i = 0; i < MTDEC__THREADS_MAX; i++) for (i = 0; i < MTDEC_THREADS_MAX; i++)
{ {
CXzDecMtThread *t = &p->coders[i]; CXzDecMtThread *t = &p->coders[i];
if (t->dec_created) if (t->dec_created)
@ -1625,12 +1659,12 @@ void XzDecMt_Destroy(CXzDecMtHandle pp)
#endif #endif
ISzAlloc_Free(p->alignOffsetAlloc.baseAlloc, pp); ISzAlloc_Free(p->alignOffsetAlloc.baseAlloc, p);
} }
#ifndef _7ZIP_ST #ifndef Z7_ST
static void XzDecMt_Callback_Parse(void *obj, unsigned coderIndex, CMtDecCallbackInfo *cc) static void XzDecMt_Callback_Parse(void *obj, unsigned coderIndex, CMtDecCallbackInfo *cc)
{ {
@ -1696,7 +1730,7 @@ static void XzDecMt_Callback_Parse(void *obj, unsigned coderIndex, CMtDecCallbac
coder->dec.parseMode = True; coder->dec.parseMode = True;
coder->dec.headerParsedOk = False; coder->dec.headerParsedOk = False;
PRF_STR_INT("Parse", srcSize2); PRF_STR_INT("Parse", srcSize2)
res = XzUnpacker_Code(&coder->dec, res = XzUnpacker_Code(&coder->dec,
NULL, &destSize, NULL, &destSize,
@ -2071,7 +2105,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
} }
data += cur; data += cur;
size -= cur; size -= cur;
// PRF_STR_INT("Written size =", size); // PRF_STR_INT("Written size =", size)
if (size == 0) if (size == 0)
break; break;
res = MtProgress_ProgressAdd(&me->mtc.mtProgress, 0, 0); res = MtProgress_ProgressAdd(&me->mtc.mtProgress, 0, 0);
@ -2087,7 +2121,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
return res; return res;
} }
RINOK(res); RINOK(res)
if (coder->inPreSize != coder->inCodeSize if (coder->inPreSize != coder->inCodeSize
|| coder->blockPackTotal != coder->inCodeSize) || coder->blockPackTotal != coder->inCodeSize)
@ -2106,13 +2140,13 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
// (coder->state == MTDEC_PARSE_END) means that there are no other working threads // (coder->state == MTDEC_PARSE_END) means that there are no other working threads
// so we can use mtc variables without lock // so we can use mtc variables without lock
PRF_STR_INT("Write MTDEC_PARSE_END", me->mtc.inProcessed); PRF_STR_INT("Write MTDEC_PARSE_END", me->mtc.inProcessed)
me->mtc.mtProgress.totalInSize = me->mtc.inProcessed; me->mtc.mtProgress.totalInSize = me->mtc.inProcessed;
{ {
CXzUnpacker *dec = &me->dec; CXzUnpacker *dec = &me->dec;
PRF_STR_INT("PostSingle", srcSize); PRF_STR_INT("PostSingle", srcSize)
{ {
size_t srcProcessed = srcSize; size_t srcProcessed = srcSize;
@ -2186,7 +2220,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
me->mtc.crossEnd = srcSize; me->mtc.crossEnd = srcSize;
} }
PRF_STR_INT("XZ_STATE_STREAM_HEADER crossEnd = ", (unsigned)me->mtc.crossEnd); PRF_STR_INT("XZ_STATE_STREAM_HEADER crossEnd = ", (unsigned)me->mtc.crossEnd)
return SZ_OK; return SZ_OK;
} }
@ -2277,7 +2311,7 @@ static SRes XzDecMt_Callback_Write(void *pp, unsigned coderIndex,
UInt64 inDelta = me->mtc.inProcessed - inProgressPrev; UInt64 inDelta = me->mtc.inProcessed - inProgressPrev;
if (inDelta >= (1 << 22)) if (inDelta >= (1 << 22))
{ {
RINOK(MtProgress_Progress_ST(&me->mtc.mtProgress)); RINOK(MtProgress_Progress_ST(&me->mtc.mtProgress))
inProgressPrev = me->mtc.inProcessed; inProgressPrev = me->mtc.inProcessed;
} }
} }
@ -2331,7 +2365,7 @@ void XzStatInfo_Clear(CXzStatInfo *p)
*/ */
static SRes XzDecMt_Decode_ST(CXzDecMt *p static SRes XzDecMt_Decode_ST(CXzDecMt *p
#ifndef _7ZIP_ST #ifndef Z7_ST
, BoolInt tMode , BoolInt tMode
#endif #endif
, CXzStatInfo *stat) , CXzStatInfo *stat)
@ -2343,7 +2377,7 @@ static SRes XzDecMt_Decode_ST(CXzDecMt *p
CXzUnpacker *dec; CXzUnpacker *dec;
#ifndef _7ZIP_ST #ifndef Z7_ST
if (tMode) if (tMode)
{ {
XzDecMt_FreeOutBufs(p); XzDecMt_FreeOutBufs(p);
@ -2400,7 +2434,7 @@ static SRes XzDecMt_Decode_ST(CXzDecMt *p
if (inPos == inLim) if (inPos == inLim)
{ {
#ifndef _7ZIP_ST #ifndef Z7_ST
if (tMode) if (tMode)
{ {
inData = MtDec_Read(&p->mtc, &inLim); inData = MtDec_Read(&p->mtc, &inLim);
@ -2577,19 +2611,19 @@ static void XzStatInfo_SetStat(const CXzUnpacker *dec,
SRes XzDecMt_Decode(CXzDecMtHandle pp, SRes XzDecMt_Decode(CXzDecMtHandle p,
const CXzDecMtProps *props, const CXzDecMtProps *props,
const UInt64 *outDataSize, int finishMode, const UInt64 *outDataSize, int finishMode,
ISeqOutStream *outStream, ISeqOutStreamPtr outStream,
// Byte *outBuf, size_t *outBufSize, // Byte *outBuf, size_t *outBufSize,
ISeqInStream *inStream, ISeqInStreamPtr inStream,
// const Byte *inData, size_t inDataSize, // const Byte *inData, size_t inDataSize,
CXzStatInfo *stat, CXzStatInfo *stat,
int *isMT, int *isMT,
ICompressProgress *progress) ICompressProgressPtr progress)
{ {
CXzDecMt *p = (CXzDecMt *)pp; // GET_CXzDecMt_p
#ifndef _7ZIP_ST #ifndef Z7_ST
BoolInt tMode; BoolInt tMode;
#endif #endif
@ -2640,7 +2674,7 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
*/ */
#ifndef _7ZIP_ST #ifndef Z7_ST
p->isBlockHeaderState_Parse = False; p->isBlockHeaderState_Parse = False;
p->isBlockHeaderState_Write = False; p->isBlockHeaderState_Write = False;
@ -2782,7 +2816,7 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
return res; return res;
} }
PRF_STR("----- decoding ST -----"); PRF_STR("----- decoding ST -----")
} }
#endif #endif
@ -2792,13 +2826,13 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
{ {
SRes res = XzDecMt_Decode_ST(p SRes res = XzDecMt_Decode_ST(p
#ifndef _7ZIP_ST #ifndef Z7_ST
, tMode , tMode
#endif #endif
, stat , stat
); );
#ifndef _7ZIP_ST #ifndef Z7_ST
// we must set error code from MT decoding at first // we must set error code from MT decoding at first
if (p->mainErrorCode != SZ_OK) if (p->mainErrorCode != SZ_OK)
stat->DecodeRes = p->mainErrorCode; stat->DecodeRes = p->mainErrorCode;
@ -2835,3 +2869,7 @@ SRes XzDecMt_Decode(CXzDecMtHandle pp,
return res; return res;
} }
} }
#undef PRF
#undef PRF_STR
#undef PRF_STR_INT_2

View file

@ -1,5 +1,5 @@
/* XzEnc.c -- Xz Encode /* XzEnc.c -- Xz Encode
2021-04-01 : Igor Pavlov : Public domain */ 2023-04-13 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -18,13 +18,13 @@
#include "XzEnc.h" #include "XzEnc.h"
// #define _7ZIP_ST // #define Z7_ST
#ifndef _7ZIP_ST #ifndef Z7_ST
#include "MtCoder.h" #include "MtCoder.h"
#else #else
#define MTCODER__THREADS_MAX 1 #define MTCODER_THREADS_MAX 1
#define MTCODER__BLOCKS_MAX 1 #define MTCODER_BLOCKS_MAX 1
#endif #endif
#define XZ_GET_PAD_SIZE(dataSize) ((4 - ((unsigned)(dataSize) & 3)) & 3) #define XZ_GET_PAD_SIZE(dataSize) ((4 - ((unsigned)(dataSize) & 3)) & 3)
@ -35,25 +35,25 @@
#define XZ_GET_ESTIMATED_BLOCK_TOTAL_PACK_SIZE(unpackSize) (XZ_BLOCK_HEADER_SIZE_MAX + XZ_GET_MAX_BLOCK_PACK_SIZE(unpackSize)) #define XZ_GET_ESTIMATED_BLOCK_TOTAL_PACK_SIZE(unpackSize) (XZ_BLOCK_HEADER_SIZE_MAX + XZ_GET_MAX_BLOCK_PACK_SIZE(unpackSize))
#define XzBlock_ClearFlags(p) (p)->flags = 0; // #define XzBlock_ClearFlags(p) (p)->flags = 0;
#define XzBlock_SetNumFilters(p, n) (p)->flags = (Byte)((p)->flags | ((n) - 1)); #define XzBlock_ClearFlags_SetNumFilters(p, n) (p)->flags = (Byte)((n) - 1);
#define XzBlock_SetHasPackSize(p) (p)->flags |= XZ_BF_PACK_SIZE; #define XzBlock_SetHasPackSize(p) (p)->flags |= XZ_BF_PACK_SIZE;
#define XzBlock_SetHasUnpackSize(p) (p)->flags |= XZ_BF_UNPACK_SIZE; #define XzBlock_SetHasUnpackSize(p) (p)->flags |= XZ_BF_UNPACK_SIZE;
static SRes WriteBytes(ISeqOutStream *s, const void *buf, size_t size) static SRes WriteBytes(ISeqOutStreamPtr s, const void *buf, size_t size)
{ {
return (ISeqOutStream_Write(s, buf, size) == size) ? SZ_OK : SZ_ERROR_WRITE; return (ISeqOutStream_Write(s, buf, size) == size) ? SZ_OK : SZ_ERROR_WRITE;
} }
static SRes WriteBytesUpdateCrc(ISeqOutStream *s, const void *buf, size_t size, UInt32 *crc) static SRes WriteBytes_UpdateCrc(ISeqOutStreamPtr s, const void *buf, size_t size, UInt32 *crc)
{ {
*crc = CrcUpdate(*crc, buf, size); *crc = CrcUpdate(*crc, buf, size);
return WriteBytes(s, buf, size); return WriteBytes(s, buf, size);
} }
static SRes Xz_WriteHeader(CXzStreamFlags f, ISeqOutStream *s) static SRes Xz_WriteHeader(CXzStreamFlags f, ISeqOutStreamPtr s)
{ {
UInt32 crc; UInt32 crc;
Byte header[XZ_STREAM_HEADER_SIZE]; Byte header[XZ_STREAM_HEADER_SIZE];
@ -61,12 +61,12 @@ static SRes Xz_WriteHeader(CXzStreamFlags f, ISeqOutStream *s)
header[XZ_SIG_SIZE] = (Byte)(f >> 8); header[XZ_SIG_SIZE] = (Byte)(f >> 8);
header[XZ_SIG_SIZE + 1] = (Byte)(f & 0xFF); header[XZ_SIG_SIZE + 1] = (Byte)(f & 0xFF);
crc = CrcCalc(header + XZ_SIG_SIZE, XZ_STREAM_FLAGS_SIZE); crc = CrcCalc(header + XZ_SIG_SIZE, XZ_STREAM_FLAGS_SIZE);
SetUi32(header + XZ_SIG_SIZE + XZ_STREAM_FLAGS_SIZE, crc); SetUi32(header + XZ_SIG_SIZE + XZ_STREAM_FLAGS_SIZE, crc)
return WriteBytes(s, header, XZ_STREAM_HEADER_SIZE); return WriteBytes(s, header, XZ_STREAM_HEADER_SIZE);
} }
static SRes XzBlock_WriteHeader(const CXzBlock *p, ISeqOutStream *s) static SRes XzBlock_WriteHeader(const CXzBlock *p, ISeqOutStreamPtr s)
{ {
Byte header[XZ_BLOCK_HEADER_SIZE_MAX]; Byte header[XZ_BLOCK_HEADER_SIZE_MAX];
@ -91,7 +91,7 @@ static SRes XzBlock_WriteHeader(const CXzBlock *p, ISeqOutStream *s)
header[pos++] = 0; header[pos++] = 0;
header[0] = (Byte)(pos >> 2); header[0] = (Byte)(pos >> 2);
SetUi32(header + pos, CrcCalc(header, pos)); SetUi32(header + pos, CrcCalc(header, pos))
return WriteBytes(s, header, pos + 4); return WriteBytes(s, header, pos + 4);
} }
@ -182,7 +182,7 @@ static SRes XzEncIndex_AddIndexRecord(CXzEncIndex *p, UInt64 unpackSize, UInt64
size_t newSize = p->allocated * 2 + 16 * 2; size_t newSize = p->allocated * 2 + 16 * 2;
if (newSize < p->size + pos) if (newSize < p->size + pos)
return SZ_ERROR_MEM; return SZ_ERROR_MEM;
RINOK(XzEncIndex_ReAlloc(p, newSize, alloc)); RINOK(XzEncIndex_ReAlloc(p, newSize, alloc))
} }
memcpy(p->blocks + p->size, buf, pos); memcpy(p->blocks + p->size, buf, pos);
p->size += pos; p->size += pos;
@ -191,7 +191,7 @@ static SRes XzEncIndex_AddIndexRecord(CXzEncIndex *p, UInt64 unpackSize, UInt64
} }
static SRes XzEncIndex_WriteFooter(const CXzEncIndex *p, CXzStreamFlags flags, ISeqOutStream *s) static SRes XzEncIndex_WriteFooter(const CXzEncIndex *p, CXzStreamFlags flags, ISeqOutStreamPtr s)
{ {
Byte buf[32]; Byte buf[32];
UInt64 globalPos; UInt64 globalPos;
@ -200,8 +200,8 @@ static SRes XzEncIndex_WriteFooter(const CXzEncIndex *p, CXzStreamFlags flags, I
globalPos = pos; globalPos = pos;
buf[0] = 0; buf[0] = 0;
RINOK(WriteBytesUpdateCrc(s, buf, pos, &crc)); RINOK(WriteBytes_UpdateCrc(s, buf, pos, &crc))
RINOK(WriteBytesUpdateCrc(s, p->blocks, p->size, &crc)); RINOK(WriteBytes_UpdateCrc(s, p->blocks, p->size, &crc))
globalPos += p->size; globalPos += p->size;
pos = XZ_GET_PAD_SIZE(globalPos); pos = XZ_GET_PAD_SIZE(globalPos);
@ -211,12 +211,12 @@ static SRes XzEncIndex_WriteFooter(const CXzEncIndex *p, CXzStreamFlags flags, I
globalPos += pos; globalPos += pos;
crc = CrcUpdate(crc, buf + 4 - pos, pos); crc = CrcUpdate(crc, buf + 4 - pos, pos);
SetUi32(buf + 4, CRC_GET_DIGEST(crc)); SetUi32(buf + 4, CRC_GET_DIGEST(crc))
SetUi32(buf + 8 + 4, (UInt32)(globalPos >> 2)); SetUi32(buf + 8 + 4, (UInt32)(globalPos >> 2))
buf[8 + 8] = (Byte)(flags >> 8); buf[8 + 8] = (Byte)(flags >> 8);
buf[8 + 9] = (Byte)(flags & 0xFF); buf[8 + 9] = (Byte)(flags & 0xFF);
SetUi32(buf + 8, CrcCalc(buf + 8 + 4, 6)); SetUi32(buf + 8, CrcCalc(buf + 8 + 4, 6))
buf[8 + 10] = XZ_FOOTER_SIG_0; buf[8 + 10] = XZ_FOOTER_SIG_0;
buf[8 + 11] = XZ_FOOTER_SIG_1; buf[8 + 11] = XZ_FOOTER_SIG_1;
@ -230,7 +230,7 @@ static SRes XzEncIndex_WriteFooter(const CXzEncIndex *p, CXzStreamFlags flags, I
typedef struct typedef struct
{ {
ISeqInStream vt; ISeqInStream vt;
ISeqInStream *realStream; ISeqInStreamPtr realStream;
const Byte *data; const Byte *data;
UInt64 limit; UInt64 limit;
UInt64 processed; UInt64 processed;
@ -251,9 +251,9 @@ static void SeqCheckInStream_GetDigest(CSeqCheckInStream *p, Byte *digest)
XzCheck_Final(&p->check, digest); XzCheck_Final(&p->check, digest);
} }
static SRes SeqCheckInStream_Read(const ISeqInStream *pp, void *data, size_t *size) static SRes SeqCheckInStream_Read(ISeqInStreamPtr pp, void *data, size_t *size)
{ {
CSeqCheckInStream *p = CONTAINER_FROM_VTBL(pp, CSeqCheckInStream, vt); Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CSeqCheckInStream)
size_t size2 = *size; size_t size2 = *size;
SRes res = SZ_OK; SRes res = SZ_OK;
@ -285,15 +285,15 @@ static SRes SeqCheckInStream_Read(const ISeqInStream *pp, void *data, size_t *si
typedef struct typedef struct
{ {
ISeqOutStream vt; ISeqOutStream vt;
ISeqOutStream *realStream; ISeqOutStreamPtr realStream;
Byte *outBuf; Byte *outBuf;
size_t outBufLimit; size_t outBufLimit;
UInt64 processed; UInt64 processed;
} CSeqSizeOutStream; } CSeqSizeOutStream;
static size_t SeqSizeOutStream_Write(const ISeqOutStream *pp, const void *data, size_t size) static size_t SeqSizeOutStream_Write(ISeqOutStreamPtr pp, const void *data, size_t size)
{ {
CSeqSizeOutStream *p = CONTAINER_FROM_VTBL(pp, CSeqSizeOutStream, vt); Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CSeqSizeOutStream)
if (p->realStream) if (p->realStream)
size = ISeqOutStream_Write(p->realStream, data, size); size = ISeqOutStream_Write(p->realStream, data, size);
else else
@ -313,8 +313,8 @@ static size_t SeqSizeOutStream_Write(const ISeqOutStream *pp, const void *data,
typedef struct typedef struct
{ {
ISeqInStream p; ISeqInStream vt;
ISeqInStream *realStream; ISeqInStreamPtr realStream;
IStateCoder StateCoder; IStateCoder StateCoder;
Byte *buf; Byte *buf;
size_t curPos; size_t curPos;
@ -323,7 +323,39 @@ typedef struct
} CSeqInFilter; } CSeqInFilter;
SRes BraState_SetFromMethod(IStateCoder *p, UInt64 id, int encodeMode, ISzAllocPtr alloc); static const z7_Func_BranchConv g_Funcs_BranchConv_RISC_Enc[] =
{
Z7_BRANCH_CONV_ENC(PPC),
Z7_BRANCH_CONV_ENC(IA64),
Z7_BRANCH_CONV_ENC(ARM),
Z7_BRANCH_CONV_ENC(ARMT),
Z7_BRANCH_CONV_ENC(SPARC),
Z7_BRANCH_CONV_ENC(ARM64)
};
static SizeT XzBcFilterStateBase_Filter_Enc(CXzBcFilterStateBase *p, Byte *data, SizeT size)
{
switch (p->methodId)
{
case XZ_ID_Delta:
Delta_Encode(p->delta_State, p->delta, data, size);
break;
case XZ_ID_X86:
size = (SizeT)(z7_BranchConvSt_X86_Enc(data, size, p->ip, &p->X86_State) - data);
break;
default:
if (p->methodId >= XZ_ID_PPC)
{
const UInt32 i = p->methodId - XZ_ID_PPC;
if (i < Z7_ARRAY_SIZE(g_Funcs_BranchConv_RISC_Enc))
size = (SizeT)(g_Funcs_BranchConv_RISC_Enc[i](data, size, p->ip) - data);
}
break;
}
p->ip += (UInt32)size;
return size;
}
static SRes SeqInFilter_Init(CSeqInFilter *p, const CXzFilter *props, ISzAllocPtr alloc) static SRes SeqInFilter_Init(CSeqInFilter *p, const CXzFilter *props, ISzAllocPtr alloc)
{ {
@ -335,17 +367,17 @@ static SRes SeqInFilter_Init(CSeqInFilter *p, const CXzFilter *props, ISzAllocPt
} }
p->curPos = p->endPos = 0; p->curPos = p->endPos = 0;
p->srcWasFinished = 0; p->srcWasFinished = 0;
RINOK(BraState_SetFromMethod(&p->StateCoder, props->id, 1, alloc)); RINOK(Xz_StateCoder_Bc_SetFromMethod_Func(&p->StateCoder, props->id, XzBcFilterStateBase_Filter_Enc, alloc))
RINOK(p->StateCoder.SetProps(p->StateCoder.p, props->props, props->propsSize, alloc)); RINOK(p->StateCoder.SetProps(p->StateCoder.p, props->props, props->propsSize, alloc))
p->StateCoder.Init(p->StateCoder.p); p->StateCoder.Init(p->StateCoder.p);
return SZ_OK; return SZ_OK;
} }
static SRes SeqInFilter_Read(const ISeqInStream *pp, void *data, size_t *size) static SRes SeqInFilter_Read(ISeqInStreamPtr pp, void *data, size_t *size)
{ {
CSeqInFilter *p = CONTAINER_FROM_VTBL(pp, CSeqInFilter, p); Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CSeqInFilter)
size_t sizeOriginal = *size; const size_t sizeOriginal = *size;
if (sizeOriginal == 0) if (sizeOriginal == 0)
return SZ_OK; return SZ_OK;
*size = 0; *size = 0;
@ -356,7 +388,7 @@ static SRes SeqInFilter_Read(const ISeqInStream *pp, void *data, size_t *size)
{ {
p->curPos = 0; p->curPos = 0;
p->endPos = FILTER_BUF_SIZE; p->endPos = FILTER_BUF_SIZE;
RINOK(ISeqInStream_Read(p->realStream, p->buf, &p->endPos)); RINOK(ISeqInStream_Read(p->realStream, p->buf, &p->endPos))
if (p->endPos == 0) if (p->endPos == 0)
p->srcWasFinished = 1; p->srcWasFinished = 1;
} }
@ -381,7 +413,7 @@ static void SeqInFilter_Construct(CSeqInFilter *p)
{ {
p->buf = NULL; p->buf = NULL;
p->StateCoder.p = NULL; p->StateCoder.p = NULL;
p->p.Read = SeqInFilter_Read; p->vt.Read = SeqInFilter_Read;
} }
static void SeqInFilter_Free(CSeqInFilter *p, ISzAllocPtr alloc) static void SeqInFilter_Free(CSeqInFilter *p, ISzAllocPtr alloc)
@ -406,13 +438,13 @@ static void SeqInFilter_Free(CSeqInFilter *p, ISzAllocPtr alloc)
typedef struct typedef struct
{ {
ISeqInStream vt; ISeqInStream vt;
ISeqInStream *inStream; ISeqInStreamPtr inStream;
CSbEnc enc; CSbEnc enc;
} CSbEncInStream; } CSbEncInStream;
static SRes SbEncInStream_Read(const ISeqInStream *pp, void *data, size_t *size) static SRes SbEncInStream_Read(ISeqInStreamPtr pp, void *data, size_t *size)
{ {
CSbEncInStream *p = CONTAINER_FROM_VTBL(pp, CSbEncInStream, vt); CSbEncInStream *p = Z7_CONTAINER_FROM_VTBL(pp, CSbEncInStream, vt);
size_t sizeOriginal = *size; size_t sizeOriginal = *size;
if (sizeOriginal == 0) if (sizeOriginal == 0)
return SZ_OK; return SZ_OK;
@ -422,7 +454,7 @@ static SRes SbEncInStream_Read(const ISeqInStream *pp, void *data, size_t *size)
if (p->enc.needRead && !p->enc.readWasFinished) if (p->enc.needRead && !p->enc.readWasFinished)
{ {
size_t processed = p->enc.needReadSizeMax; size_t processed = p->enc.needReadSizeMax;
RINOK(p->inStream->Read(p->inStream, p->enc.buf + p->enc.readPos, &processed)); RINOK(p->inStream->Read(p->inStream, p->enc.buf + p->enc.readPos, &processed))
p->enc.readPos += processed; p->enc.readPos += processed;
if (processed == 0) if (processed == 0)
{ {
@ -433,7 +465,7 @@ static SRes SbEncInStream_Read(const ISeqInStream *pp, void *data, size_t *size)
} }
*size = sizeOriginal; *size = sizeOriginal;
RINOK(SbEnc_Read(&p->enc, data, size)); RINOK(SbEnc_Read(&p->enc, data, size))
if (*size != 0 || !p->enc.needRead) if (*size != 0 || !p->enc.needRead)
return SZ_OK; return SZ_OK;
} }
@ -473,7 +505,7 @@ void XzFilterProps_Init(CXzFilterProps *p)
void XzProps_Init(CXzProps *p) void XzProps_Init(CXzProps *p)
{ {
p->checkId = XZ_CHECK_CRC32; p->checkId = XZ_CHECK_CRC32;
p->blockSize = XZ_PROPS__BLOCK_SIZE__AUTO; p->blockSize = XZ_PROPS_BLOCK_SIZE_AUTO;
p->numBlockThreads_Reduced = -1; p->numBlockThreads_Reduced = -1;
p->numBlockThreads_Max = -1; p->numBlockThreads_Max = -1;
p->numTotalThreads = -1; p->numTotalThreads = -1;
@ -502,8 +534,8 @@ static void XzEncProps_Normalize_Fixed(CXzProps *p)
t2 = p->numBlockThreads_Max; t2 = p->numBlockThreads_Max;
t3 = p->numTotalThreads; t3 = p->numTotalThreads;
if (t2 > MTCODER__THREADS_MAX) if (t2 > MTCODER_THREADS_MAX)
t2 = MTCODER__THREADS_MAX; t2 = MTCODER_THREADS_MAX;
if (t3 <= 0) if (t3 <= 0)
{ {
@ -519,8 +551,8 @@ static void XzEncProps_Normalize_Fixed(CXzProps *p)
t1 = 1; t1 = 1;
t2 = t3; t2 = t3;
} }
if (t2 > MTCODER__THREADS_MAX) if (t2 > MTCODER_THREADS_MAX)
t2 = MTCODER__THREADS_MAX; t2 = MTCODER_THREADS_MAX;
} }
else if (t1 <= 0) else if (t1 <= 0)
{ {
@ -571,7 +603,7 @@ static void XzProps_Normalize(CXzProps *p)
/* we normalize xzProps properties, but we normalize only some of CXzProps::lzma2Props properties. /* we normalize xzProps properties, but we normalize only some of CXzProps::lzma2Props properties.
Lzma2Enc_SetProps() will normalize lzma2Props later. */ Lzma2Enc_SetProps() will normalize lzma2Props later. */
if (p->blockSize == XZ_PROPS__BLOCK_SIZE__SOLID) if (p->blockSize == XZ_PROPS_BLOCK_SIZE_SOLID)
{ {
p->lzma2Props.lzmaProps.reduceSize = p->reduceSize; p->lzma2Props.lzmaProps.reduceSize = p->reduceSize;
p->numBlockThreads_Reduced = 1; p->numBlockThreads_Reduced = 1;
@ -583,15 +615,15 @@ static void XzProps_Normalize(CXzProps *p)
else else
{ {
CLzma2EncProps *lzma2 = &p->lzma2Props; CLzma2EncProps *lzma2 = &p->lzma2Props;
if (p->blockSize == LZMA2_ENC_PROPS__BLOCK_SIZE__AUTO) if (p->blockSize == LZMA2_ENC_PROPS_BLOCK_SIZE_AUTO)
{ {
// xz-auto // xz-auto
p->lzma2Props.lzmaProps.reduceSize = p->reduceSize; p->lzma2Props.lzmaProps.reduceSize = p->reduceSize;
if (lzma2->blockSize == LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID) if (lzma2->blockSize == LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID)
{ {
// if (xz-auto && lzma2-solid) - we use solid for both // if (xz-auto && lzma2-solid) - we use solid for both
p->blockSize = XZ_PROPS__BLOCK_SIZE__SOLID; p->blockSize = XZ_PROPS_BLOCK_SIZE_SOLID;
p->numBlockThreads_Reduced = 1; p->numBlockThreads_Reduced = 1;
p->numBlockThreads_Max = 1; p->numBlockThreads_Max = 1;
if (p->lzma2Props.numTotalThreads <= 0) if (p->lzma2Props.numTotalThreads <= 0)
@ -610,9 +642,9 @@ static void XzProps_Normalize(CXzProps *p)
p->blockSize = tp.blockSize; // fixed or solid p->blockSize = tp.blockSize; // fixed or solid
p->numBlockThreads_Reduced = tp.numBlockThreads_Reduced; p->numBlockThreads_Reduced = tp.numBlockThreads_Reduced;
p->numBlockThreads_Max = tp.numBlockThreads_Max; p->numBlockThreads_Max = tp.numBlockThreads_Max;
if (lzma2->blockSize == LZMA2_ENC_PROPS__BLOCK_SIZE__AUTO) if (lzma2->blockSize == LZMA2_ENC_PROPS_BLOCK_SIZE_AUTO)
lzma2->blockSize = tp.blockSize; // fixed or solid, LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID lzma2->blockSize = tp.blockSize; // fixed or solid, LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID
if (lzma2->lzmaProps.reduceSize > tp.blockSize && tp.blockSize != LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID) if (lzma2->lzmaProps.reduceSize > tp.blockSize && tp.blockSize != LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID)
lzma2->lzmaProps.reduceSize = tp.blockSize; lzma2->lzmaProps.reduceSize = tp.blockSize;
lzma2->numBlockThreads_Reduced = 1; lzma2->numBlockThreads_Reduced = 1;
lzma2->numBlockThreads_Max = 1; lzma2->numBlockThreads_Max = 1;
@ -631,9 +663,9 @@ static void XzProps_Normalize(CXzProps *p)
r = p->blockSize; r = p->blockSize;
lzma2->lzmaProps.reduceSize = r; lzma2->lzmaProps.reduceSize = r;
} }
if (lzma2->blockSize == LZMA2_ENC_PROPS__BLOCK_SIZE__AUTO) if (lzma2->blockSize == LZMA2_ENC_PROPS_BLOCK_SIZE_AUTO)
lzma2->blockSize = LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID; lzma2->blockSize = LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID;
else if (lzma2->blockSize > p->blockSize && lzma2->blockSize != LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID) else if (lzma2->blockSize > p->blockSize && lzma2->blockSize != LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID)
lzma2->blockSize = p->blockSize; lzma2->blockSize = p->blockSize;
XzEncProps_Normalize_Fixed(p); XzEncProps_Normalize_Fixed(p);
@ -704,17 +736,17 @@ typedef struct
static SRes Xz_CompressBlock( static SRes Xz_CompressBlock(
CLzma2WithFilters *lzmaf, CLzma2WithFilters *lzmaf,
ISeqOutStream *outStream, ISeqOutStreamPtr outStream,
Byte *outBufHeader, Byte *outBufHeader,
Byte *outBufData, size_t outBufDataLimit, Byte *outBufData, size_t outBufDataLimit,
ISeqInStream *inStream, ISeqInStreamPtr inStream,
// UInt64 expectedSize, // UInt64 expectedSize,
const Byte *inBuf, // used if (!inStream) const Byte *inBuf, // used if (!inStream)
size_t inBufSize, // used if (!inStream), it's block size, props->blockSize is ignored size_t inBufSize, // used if (!inStream), it's block size, props->blockSize is ignored
const CXzProps *props, const CXzProps *props,
ICompressProgress *progress, ICompressProgressPtr progress,
int *inStreamFinished, /* only for inStream version */ int *inStreamFinished, /* only for inStream version */
CXzEncBlockInfo *blockSizes, CXzEncBlockInfo *blockSizes,
ISzAllocPtr alloc, ISzAllocPtr alloc,
@ -731,12 +763,12 @@ static SRes Xz_CompressBlock(
*inStreamFinished = False; *inStreamFinished = False;
RINOK(Lzma2WithFilters_Create(lzmaf, alloc, allocBig)); RINOK(Lzma2WithFilters_Create(lzmaf, alloc, allocBig))
RINOK(Lzma2Enc_SetProps(lzmaf->lzma2, &props->lzma2Props)); RINOK(Lzma2Enc_SetProps(lzmaf->lzma2, &props->lzma2Props))
XzBlock_ClearFlags(&block); // XzBlock_ClearFlags(&block)
XzBlock_SetNumFilters(&block, 1 + (fp ? 1 : 0)); XzBlock_ClearFlags_SetNumFilters(&block, 1 + (fp ? 1 : 0))
if (fp) if (fp)
{ {
@ -752,7 +784,7 @@ static SRes Xz_CompressBlock(
else if (fp->ipDefined) else if (fp->ipDefined)
{ {
Byte *ptr = filter->props; Byte *ptr = filter->props;
SetUi32(ptr, fp->ip); SetUi32(ptr, fp->ip)
filter->propsSize = 4; filter->propsSize = 4;
} }
} }
@ -777,13 +809,13 @@ static SRes Xz_CompressBlock(
if (props->blockSize != (UInt64)(Int64)-1) if (props->blockSize != (UInt64)(Int64)-1)
if (expectedSize > props->blockSize) if (expectedSize > props->blockSize)
block.unpackSize = props->blockSize; block.unpackSize = props->blockSize;
XzBlock_SetHasUnpackSize(&block); XzBlock_SetHasUnpackSize(&block)
} }
*/ */
if (outStream) if (outStream)
{ {
RINOK(XzBlock_WriteHeader(&block, &seqSizeOutStream.vt)); RINOK(XzBlock_WriteHeader(&block, &seqSizeOutStream.vt))
} }
checkInStream.vt.Read = SeqCheckInStream_Read; checkInStream.vt.Read = SeqCheckInStream_Read;
@ -801,13 +833,13 @@ static SRes Xz_CompressBlock(
if (fp->id == XZ_ID_Subblock) if (fp->id == XZ_ID_Subblock)
{ {
lzmaf->sb.inStream = &checkInStream.vt; lzmaf->sb.inStream = &checkInStream.vt;
RINOK(SbEncInStream_Init(&lzmaf->sb)); RINOK(SbEncInStream_Init(&lzmaf->sb))
} }
else else
#endif #endif
{ {
lzmaf->filter.realStream = &checkInStream.vt; lzmaf->filter.realStream = &checkInStream.vt;
RINOK(SeqInFilter_Init(&lzmaf->filter, filter, alloc)); RINOK(SeqInFilter_Init(&lzmaf->filter, filter, alloc))
} }
} }
@ -841,7 +873,7 @@ static SRes Xz_CompressBlock(
#ifdef USE_SUBBLOCK #ifdef USE_SUBBLOCK
(fp->id == XZ_ID_Subblock) ? &lzmaf->sb.vt: (fp->id == XZ_ID_Subblock) ? &lzmaf->sb.vt:
#endif #endif
&lzmaf->filter.p) : &lzmaf->filter.vt) :
&checkInStream.vt) : NULL, &checkInStream.vt) : NULL,
useStream ? NULL : inBuf, useStream ? NULL : inBuf,
@ -852,7 +884,7 @@ static SRes Xz_CompressBlock(
if (outBuf) if (outBuf)
seqSizeOutStream.processed += outSize; seqSizeOutStream.processed += outSize;
RINOK(res); RINOK(res)
blockSizes->unpackSize = checkInStream.processed; blockSizes->unpackSize = checkInStream.processed;
} }
{ {
@ -866,7 +898,7 @@ static SRes Xz_CompressBlock(
buf[3] = 0; buf[3] = 0;
SeqCheckInStream_GetDigest(&checkInStream, buf + 4); SeqCheckInStream_GetDigest(&checkInStream, buf + 4);
RINOK(WriteBytes(&seqSizeOutStream.vt, buf + (4 - padSize), padSize + XzFlags_GetCheckSize((CXzStreamFlags)props->checkId))); RINOK(WriteBytes(&seqSizeOutStream.vt, buf + (4 - padSize), padSize + XzFlags_GetCheckSize((CXzStreamFlags)props->checkId)))
blockSizes->totalSize = seqSizeOutStream.processed - padSize; blockSizes->totalSize = seqSizeOutStream.processed - padSize;
@ -877,12 +909,12 @@ static SRes Xz_CompressBlock(
seqSizeOutStream.processed = 0; seqSizeOutStream.processed = 0;
block.unpackSize = blockSizes->unpackSize; block.unpackSize = blockSizes->unpackSize;
XzBlock_SetHasUnpackSize(&block); XzBlock_SetHasUnpackSize(&block)
block.packSize = packSize; block.packSize = packSize;
XzBlock_SetHasPackSize(&block); XzBlock_SetHasPackSize(&block)
RINOK(XzBlock_WriteHeader(&block, &seqSizeOutStream.vt)); RINOK(XzBlock_WriteHeader(&block, &seqSizeOutStream.vt))
blockSizes->headerSize = (size_t)seqSizeOutStream.processed; blockSizes->headerSize = (size_t)seqSizeOutStream.processed;
blockSizes->totalSize += seqSizeOutStream.processed; blockSizes->totalSize += seqSizeOutStream.processed;
@ -906,15 +938,15 @@ static SRes Xz_CompressBlock(
typedef struct typedef struct
{ {
ICompressProgress vt; ICompressProgress vt;
ICompressProgress *progress; ICompressProgressPtr progress;
UInt64 inOffset; UInt64 inOffset;
UInt64 outOffset; UInt64 outOffset;
} CCompressProgress_XzEncOffset; } CCompressProgress_XzEncOffset;
static SRes CompressProgress_XzEncOffset_Progress(const ICompressProgress *pp, UInt64 inSize, UInt64 outSize) static SRes CompressProgress_XzEncOffset_Progress(ICompressProgressPtr pp, UInt64 inSize, UInt64 outSize)
{ {
const CCompressProgress_XzEncOffset *p = CONTAINER_FROM_VTBL(pp, CCompressProgress_XzEncOffset, vt); const CCompressProgress_XzEncOffset *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CCompressProgress_XzEncOffset, vt);
inSize += p->inOffset; inSize += p->inOffset;
outSize += p->outOffset; outSize += p->outOffset;
return ICompressProgress_Progress(p->progress, inSize, outSize); return ICompressProgress_Progress(p->progress, inSize, outSize);
@ -923,7 +955,7 @@ static SRes CompressProgress_XzEncOffset_Progress(const ICompressProgress *pp, U
typedef struct struct CXzEnc
{ {
ISzAllocPtr alloc; ISzAllocPtr alloc;
ISzAllocPtr allocBig; ISzAllocPtr allocBig;
@ -933,20 +965,19 @@ typedef struct
CXzEncIndex xzIndex; CXzEncIndex xzIndex;
CLzma2WithFilters lzmaf_Items[MTCODER__THREADS_MAX]; CLzma2WithFilters lzmaf_Items[MTCODER_THREADS_MAX];
size_t outBufSize; /* size of allocated outBufs[i] */ size_t outBufSize; /* size of allocated outBufs[i] */
Byte *outBufs[MTCODER__BLOCKS_MAX]; Byte *outBufs[MTCODER_BLOCKS_MAX];
#ifndef _7ZIP_ST #ifndef Z7_ST
unsigned checkType; unsigned checkType;
ISeqOutStream *outStream; ISeqOutStreamPtr outStream;
BoolInt mtCoder_WasConstructed; BoolInt mtCoder_WasConstructed;
CMtCoder mtCoder; CMtCoder mtCoder;
CXzEncBlockInfo EncBlocks[MTCODER__BLOCKS_MAX]; CXzEncBlockInfo EncBlocks[MTCODER_BLOCKS_MAX];
#endif #endif
};
} CXzEnc;
static void XzEnc_Construct(CXzEnc *p) static void XzEnc_Construct(CXzEnc *p)
@ -955,13 +986,13 @@ static void XzEnc_Construct(CXzEnc *p)
XzEncIndex_Construct(&p->xzIndex); XzEncIndex_Construct(&p->xzIndex);
for (i = 0; i < MTCODER__THREADS_MAX; i++) for (i = 0; i < MTCODER_THREADS_MAX; i++)
Lzma2WithFilters_Construct(&p->lzmaf_Items[i]); Lzma2WithFilters_Construct(&p->lzmaf_Items[i]);
#ifndef _7ZIP_ST #ifndef Z7_ST
p->mtCoder_WasConstructed = False; p->mtCoder_WasConstructed = False;
{ {
for (i = 0; i < MTCODER__BLOCKS_MAX; i++) for (i = 0; i < MTCODER_BLOCKS_MAX; i++)
p->outBufs[i] = NULL; p->outBufs[i] = NULL;
p->outBufSize = 0; p->outBufSize = 0;
} }
@ -972,7 +1003,7 @@ static void XzEnc_Construct(CXzEnc *p)
static void XzEnc_FreeOutBufs(CXzEnc *p) static void XzEnc_FreeOutBufs(CXzEnc *p)
{ {
unsigned i; unsigned i;
for (i = 0; i < MTCODER__BLOCKS_MAX; i++) for (i = 0; i < MTCODER_BLOCKS_MAX; i++)
if (p->outBufs[i]) if (p->outBufs[i])
{ {
ISzAlloc_Free(p->alloc, p->outBufs[i]); ISzAlloc_Free(p->alloc, p->outBufs[i]);
@ -988,10 +1019,10 @@ static void XzEnc_Free(CXzEnc *p, ISzAllocPtr alloc)
XzEncIndex_Free(&p->xzIndex, alloc); XzEncIndex_Free(&p->xzIndex, alloc);
for (i = 0; i < MTCODER__THREADS_MAX; i++) for (i = 0; i < MTCODER_THREADS_MAX; i++)
Lzma2WithFilters_Free(&p->lzmaf_Items[i], alloc); Lzma2WithFilters_Free(&p->lzmaf_Items[i], alloc);
#ifndef _7ZIP_ST #ifndef Z7_ST
if (p->mtCoder_WasConstructed) if (p->mtCoder_WasConstructed)
{ {
MtCoder_Destruct(&p->mtCoder); MtCoder_Destruct(&p->mtCoder);
@ -1013,37 +1044,38 @@ CXzEncHandle XzEnc_Create(ISzAllocPtr alloc, ISzAllocPtr allocBig)
p->expectedDataSize = (UInt64)(Int64)-1; p->expectedDataSize = (UInt64)(Int64)-1;
p->alloc = alloc; p->alloc = alloc;
p->allocBig = allocBig; p->allocBig = allocBig;
return p; return (CXzEncHandle)p;
} }
// #define GET_CXzEnc_p CXzEnc *p = (CXzEnc *)(void *)pp;
void XzEnc_Destroy(CXzEncHandle pp) void XzEnc_Destroy(CXzEncHandle p)
{ {
CXzEnc *p = (CXzEnc *)pp; // GET_CXzEnc_p
XzEnc_Free(p, p->alloc); XzEnc_Free(p, p->alloc);
ISzAlloc_Free(p->alloc, p); ISzAlloc_Free(p->alloc, p);
} }
SRes XzEnc_SetProps(CXzEncHandle pp, const CXzProps *props) SRes XzEnc_SetProps(CXzEncHandle p, const CXzProps *props)
{ {
CXzEnc *p = (CXzEnc *)pp; // GET_CXzEnc_p
p->xzProps = *props; p->xzProps = *props;
XzProps_Normalize(&p->xzProps); XzProps_Normalize(&p->xzProps);
return SZ_OK; return SZ_OK;
} }
void XzEnc_SetDataSize(CXzEncHandle pp, UInt64 expectedDataSiize) void XzEnc_SetDataSize(CXzEncHandle p, UInt64 expectedDataSiize)
{ {
CXzEnc *p = (CXzEnc *)pp; // GET_CXzEnc_p
p->expectedDataSize = expectedDataSiize; p->expectedDataSize = expectedDataSiize;
} }
#ifndef _7ZIP_ST #ifndef Z7_ST
static SRes XzEnc_MtCallback_Code(void *pp, unsigned coderIndex, unsigned outBufIndex, static SRes XzEnc_MtCallback_Code(void *pp, unsigned coderIndex, unsigned outBufIndex,
const Byte *src, size_t srcSize, int finished) const Byte *src, size_t srcSize, int finished)
@ -1073,7 +1105,7 @@ static SRes XzEnc_MtCallback_Code(void *pp, unsigned coderIndex, unsigned outBuf
MtProgressThunk_CreateVTable(&progressThunk); MtProgressThunk_CreateVTable(&progressThunk);
progressThunk.mtProgress = &me->mtCoder.mtProgress; progressThunk.mtProgress = &me->mtCoder.mtProgress;
MtProgressThunk_Init(&progressThunk); MtProgressThunk_INIT(&progressThunk)
{ {
CXzEncBlockInfo blockSizes; CXzEncBlockInfo blockSizes;
@ -1112,11 +1144,11 @@ static SRes XzEnc_MtCallback_Write(void *pp, unsigned outBufIndex)
const CXzEncBlockInfo *bInfo = &me->EncBlocks[outBufIndex]; const CXzEncBlockInfo *bInfo = &me->EncBlocks[outBufIndex];
const Byte *data = me->outBufs[outBufIndex]; const Byte *data = me->outBufs[outBufIndex];
RINOK(WriteBytes(me->outStream, data, bInfo->headerSize)); RINOK(WriteBytes(me->outStream, data, bInfo->headerSize))
{ {
UInt64 totalPackFull = bInfo->totalSize + XZ_GET_PAD_SIZE(bInfo->totalSize); UInt64 totalPackFull = bInfo->totalSize + XZ_GET_PAD_SIZE(bInfo->totalSize);
RINOK(WriteBytes(me->outStream, data + XZ_BLOCK_HEADER_SIZE_MAX, (size_t)totalPackFull - bInfo->headerSize)); RINOK(WriteBytes(me->outStream, data + XZ_BLOCK_HEADER_SIZE_MAX, (size_t)totalPackFull - bInfo->headerSize))
} }
return XzEncIndex_AddIndexRecord(&me->xzIndex, bInfo->unpackSize, bInfo->totalSize, me->alloc); return XzEncIndex_AddIndexRecord(&me->xzIndex, bInfo->unpackSize, bInfo->totalSize, me->alloc);
@ -1126,9 +1158,9 @@ static SRes XzEnc_MtCallback_Write(void *pp, unsigned outBufIndex)
SRes XzEnc_Encode(CXzEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStream, ICompressProgress *progress) SRes XzEnc_Encode(CXzEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream, ICompressProgressPtr progress)
{ {
CXzEnc *p = (CXzEnc *)pp; // GET_CXzEnc_p
const CXzProps *props = &p->xzProps; const CXzProps *props = &p->xzProps;
@ -1137,7 +1169,7 @@ SRes XzEnc_Encode(CXzEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStr
UInt64 numBlocks = 1; UInt64 numBlocks = 1;
UInt64 blockSize = props->blockSize; UInt64 blockSize = props->blockSize;
if (blockSize != XZ_PROPS__BLOCK_SIZE__SOLID if (blockSize != XZ_PROPS_BLOCK_SIZE_SOLID
&& props->reduceSize != (UInt64)(Int64)-1) && props->reduceSize != (UInt64)(Int64)-1)
{ {
numBlocks = props->reduceSize / blockSize; numBlocks = props->reduceSize / blockSize;
@ -1147,13 +1179,13 @@ SRes XzEnc_Encode(CXzEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStr
else else
blockSize = (UInt64)1 << 62; blockSize = (UInt64)1 << 62;
RINOK(XzEncIndex_PreAlloc(&p->xzIndex, numBlocks, blockSize, XZ_GET_ESTIMATED_BLOCK_TOTAL_PACK_SIZE(blockSize), p->alloc)); RINOK(XzEncIndex_PreAlloc(&p->xzIndex, numBlocks, blockSize, XZ_GET_ESTIMATED_BLOCK_TOTAL_PACK_SIZE(blockSize), p->alloc))
} }
RINOK(Xz_WriteHeader((CXzStreamFlags)props->checkId, outStream)); RINOK(Xz_WriteHeader((CXzStreamFlags)props->checkId, outStream))
#ifndef _7ZIP_ST #ifndef Z7_ST
if (props->numBlockThreads_Reduced > 1) if (props->numBlockThreads_Reduced > 1)
{ {
IMtCoderCallback2 vt; IMtCoderCallback2 vt;
@ -1180,8 +1212,8 @@ SRes XzEnc_Encode(CXzEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStr
p->mtCoder.mtCallback = &vt; p->mtCoder.mtCallback = &vt;
p->mtCoder.mtCallbackObject = p; p->mtCoder.mtCallbackObject = p;
if ( props->blockSize == XZ_PROPS__BLOCK_SIZE__SOLID if ( props->blockSize == XZ_PROPS_BLOCK_SIZE_SOLID
|| props->blockSize == XZ_PROPS__BLOCK_SIZE__AUTO) || props->blockSize == XZ_PROPS_BLOCK_SIZE_AUTO)
return SZ_ERROR_FAIL; return SZ_ERROR_FAIL;
p->mtCoder.blockSize = (size_t)props->blockSize; p->mtCoder.blockSize = (size_t)props->blockSize;
@ -1200,7 +1232,7 @@ SRes XzEnc_Encode(CXzEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStr
p->mtCoder.numThreadsMax = (unsigned)props->numBlockThreads_Max; p->mtCoder.numThreadsMax = (unsigned)props->numBlockThreads_Max;
p->mtCoder.expectedDataSize = p->expectedDataSize; p->mtCoder.expectedDataSize = p->expectedDataSize;
RINOK(MtCoder_Code(&p->mtCoder)); RINOK(MtCoder_Code(&p->mtCoder))
} }
else else
#endif #endif
@ -1217,7 +1249,7 @@ SRes XzEnc_Encode(CXzEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStr
writeStartSizes = 0; writeStartSizes = 0;
if (props->blockSize != XZ_PROPS__BLOCK_SIZE__SOLID) if (props->blockSize != XZ_PROPS_BLOCK_SIZE_SOLID)
{ {
writeStartSizes = (props->forceWriteSizesInHeader > 0); writeStartSizes = (props->forceWriteSizesInHeader > 0);
@ -1274,18 +1306,18 @@ SRes XzEnc_Encode(CXzEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStr
&inStreamFinished, &inStreamFinished,
&blockSizes, &blockSizes,
p->alloc, p->alloc,
p->allocBig)); p->allocBig))
{ {
UInt64 totalPackFull = blockSizes.totalSize + XZ_GET_PAD_SIZE(blockSizes.totalSize); UInt64 totalPackFull = blockSizes.totalSize + XZ_GET_PAD_SIZE(blockSizes.totalSize);
if (writeStartSizes) if (writeStartSizes)
{ {
RINOK(WriteBytes(outStream, p->outBufs[0], blockSizes.headerSize)); RINOK(WriteBytes(outStream, p->outBufs[0], blockSizes.headerSize))
RINOK(WriteBytes(outStream, bufData, (size_t)totalPackFull - blockSizes.headerSize)); RINOK(WriteBytes(outStream, bufData, (size_t)totalPackFull - blockSizes.headerSize))
} }
RINOK(XzEncIndex_AddIndexRecord(&p->xzIndex, blockSizes.unpackSize, blockSizes.totalSize, p->alloc)); RINOK(XzEncIndex_AddIndexRecord(&p->xzIndex, blockSizes.unpackSize, blockSizes.totalSize, p->alloc))
progress2.inOffset += blockSizes.unpackSize; progress2.inOffset += blockSizes.unpackSize;
progress2.outOffset += totalPackFull; progress2.outOffset += totalPackFull;
@ -1302,8 +1334,8 @@ SRes XzEnc_Encode(CXzEncHandle pp, ISeqOutStream *outStream, ISeqInStream *inStr
#include "Alloc.h" #include "Alloc.h"
SRes Xz_Encode(ISeqOutStream *outStream, ISeqInStream *inStream, SRes Xz_Encode(ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream,
const CXzProps *props, ICompressProgress *progress) const CXzProps *props, ICompressProgressPtr progress)
{ {
SRes res; SRes res;
CXzEncHandle xz = XzEnc_Create(&g_Alloc, &g_BigAlloc); CXzEncHandle xz = XzEnc_Create(&g_Alloc, &g_BigAlloc);
@ -1317,7 +1349,7 @@ SRes Xz_Encode(ISeqOutStream *outStream, ISeqInStream *inStream,
} }
SRes Xz_EncodeEmpty(ISeqOutStream *outStream) SRes Xz_EncodeEmpty(ISeqOutStreamPtr outStream)
{ {
SRes res; SRes res;
CXzEncIndex xzIndex; CXzEncIndex xzIndex;

View file

@ -1,8 +1,8 @@
/* XzEnc.h -- Xz Encode /* XzEnc.h -- Xz Encode
2017-06-27 : Igor Pavlov : Public domain */ 2023-04-13 : Igor Pavlov : Public domain */
#ifndef __XZ_ENC_H #ifndef ZIP7_INC_XZ_ENC_H
#define __XZ_ENC_H #define ZIP7_INC_XZ_ENC_H
#include "Lzma2Enc.h" #include "Lzma2Enc.h"
@ -11,8 +11,8 @@
EXTERN_C_BEGIN EXTERN_C_BEGIN
#define XZ_PROPS__BLOCK_SIZE__AUTO LZMA2_ENC_PROPS__BLOCK_SIZE__AUTO #define XZ_PROPS_BLOCK_SIZE_AUTO LZMA2_ENC_PROPS_BLOCK_SIZE_AUTO
#define XZ_PROPS__BLOCK_SIZE__SOLID LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID #define XZ_PROPS_BLOCK_SIZE_SOLID LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID
typedef struct typedef struct
@ -41,19 +41,20 @@ typedef struct
void XzProps_Init(CXzProps *p); void XzProps_Init(CXzProps *p);
typedef struct CXzEnc CXzEnc;
typedef void * CXzEncHandle; typedef CXzEnc * CXzEncHandle;
// Z7_DECLARE_HANDLE(CXzEncHandle)
CXzEncHandle XzEnc_Create(ISzAllocPtr alloc, ISzAllocPtr allocBig); CXzEncHandle XzEnc_Create(ISzAllocPtr alloc, ISzAllocPtr allocBig);
void XzEnc_Destroy(CXzEncHandle p); void XzEnc_Destroy(CXzEncHandle p);
SRes XzEnc_SetProps(CXzEncHandle p, const CXzProps *props); SRes XzEnc_SetProps(CXzEncHandle p, const CXzProps *props);
void XzEnc_SetDataSize(CXzEncHandle p, UInt64 expectedDataSiize); void XzEnc_SetDataSize(CXzEncHandle p, UInt64 expectedDataSiize);
SRes XzEnc_Encode(CXzEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream, ICompressProgress *progress); SRes XzEnc_Encode(CXzEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream, ICompressProgressPtr progress);
SRes Xz_Encode(ISeqOutStream *outStream, ISeqInStream *inStream, SRes Xz_Encode(ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream,
const CXzProps *props, ICompressProgress *progress); const CXzProps *props, ICompressProgressPtr progress);
SRes Xz_EncodeEmpty(ISeqOutStream *outStream); SRes Xz_EncodeEmpty(ISeqOutStreamPtr outStream);
EXTERN_C_END EXTERN_C_END

View file

@ -1,5 +1,5 @@
/* XzIn.c - Xz input /* XzIn.c - Xz input
2021-09-04 : Igor Pavlov : Public domain */ 2023-04-02 : Igor Pavlov : Public domain */
#include "Precomp.h" #include "Precomp.h"
@ -15,11 +15,13 @@
#define XZ_FOOTER_SIG_CHECK(p) ((p)[0] == XZ_FOOTER_SIG_0 && (p)[1] == XZ_FOOTER_SIG_1) #define XZ_FOOTER_SIG_CHECK(p) ((p)[0] == XZ_FOOTER_SIG_0 && (p)[1] == XZ_FOOTER_SIG_1)
SRes Xz_ReadHeader(CXzStreamFlags *p, ISeqInStream *inStream) SRes Xz_ReadHeader(CXzStreamFlags *p, ISeqInStreamPtr inStream)
{ {
Byte sig[XZ_STREAM_HEADER_SIZE]; Byte sig[XZ_STREAM_HEADER_SIZE];
RINOK(SeqInStream_Read2(inStream, sig, XZ_STREAM_HEADER_SIZE, SZ_ERROR_NO_ARCHIVE)); size_t processedSize = XZ_STREAM_HEADER_SIZE;
if (memcmp(sig, XZ_SIG, XZ_SIG_SIZE) != 0) RINOK(SeqInStream_ReadMax(inStream, sig, &processedSize))
if (processedSize != XZ_STREAM_HEADER_SIZE
|| memcmp(sig, XZ_SIG, XZ_SIG_SIZE) != 0)
return SZ_ERROR_NO_ARCHIVE; return SZ_ERROR_NO_ARCHIVE;
return Xz_ParseHeader(p, sig); return Xz_ParseHeader(p, sig);
} }
@ -29,12 +31,12 @@ SRes Xz_ReadHeader(CXzStreamFlags *p, ISeqInStream *inStream)
if (s == 0) return SZ_ERROR_ARCHIVE; \ if (s == 0) return SZ_ERROR_ARCHIVE; \
pos += s; } pos += s; }
SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStream *inStream, BoolInt *isIndex, UInt32 *headerSizeRes) SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStreamPtr inStream, BoolInt *isIndex, UInt32 *headerSizeRes)
{ {
Byte header[XZ_BLOCK_HEADER_SIZE_MAX]; Byte header[XZ_BLOCK_HEADER_SIZE_MAX];
unsigned headerSize; unsigned headerSize;
*headerSizeRes = 0; *headerSizeRes = 0;
RINOK(SeqInStream_ReadByte(inStream, &header[0])); RINOK(SeqInStream_ReadByte(inStream, &header[0]))
headerSize = (unsigned)header[0]; headerSize = (unsigned)header[0];
if (headerSize == 0) if (headerSize == 0)
{ {
@ -46,7 +48,12 @@ SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStream *inStream, BoolInt *isIndex, U
*isIndex = False; *isIndex = False;
headerSize = (headerSize << 2) + 4; headerSize = (headerSize << 2) + 4;
*headerSizeRes = headerSize; *headerSizeRes = headerSize;
RINOK(SeqInStream_Read(inStream, header + 1, headerSize - 1)); {
size_t processedSize = headerSize - 1;
RINOK(SeqInStream_ReadMax(inStream, header + 1, &processedSize))
if (processedSize != headerSize - 1)
return SZ_ERROR_INPUT_EOF;
}
return XzBlock_Parse(p, header); return XzBlock_Parse(p, header);
} }
@ -58,7 +65,9 @@ UInt64 Xz_GetUnpackSize(const CXzStream *p)
UInt64 size = 0; UInt64 size = 0;
size_t i; size_t i;
for (i = 0; i < p->numBlocks; i++) for (i = 0; i < p->numBlocks; i++)
ADD_SIZE_CHECK(size, p->blocks[i].unpackSize); {
ADD_SIZE_CHECK(size, p->blocks[i].unpackSize)
}
return size; return size;
} }
@ -67,12 +76,14 @@ UInt64 Xz_GetPackSize(const CXzStream *p)
UInt64 size = 0; UInt64 size = 0;
size_t i; size_t i;
for (i = 0; i < p->numBlocks; i++) for (i = 0; i < p->numBlocks; i++)
ADD_SIZE_CHECK(size, (p->blocks[i].totalSize + 3) & ~(UInt64)3); {
ADD_SIZE_CHECK(size, (p->blocks[i].totalSize + 3) & ~(UInt64)3)
}
return size; return size;
} }
/* /*
SRes XzBlock_ReadFooter(CXzBlock *p, CXzStreamFlags f, ISeqInStream *inStream) SRes XzBlock_ReadFooter(CXzBlock *p, CXzStreamFlags f, ISeqInStreamPtr inStream)
{ {
return SeqInStream_Read(inStream, p->check, XzFlags_GetCheckSize(f)); return SeqInStream_Read(inStream, p->check, XzFlags_GetCheckSize(f));
} }
@ -93,7 +104,7 @@ static SRes Xz_ReadIndex2(CXzStream *p, const Byte *buf, size_t size, ISzAllocPt
{ {
UInt64 numBlocks64; UInt64 numBlocks64;
READ_VARINT_AND_CHECK(buf, pos, size, &numBlocks64); READ_VARINT_AND_CHECK(buf, pos, size, &numBlocks64)
numBlocks = (size_t)numBlocks64; numBlocks = (size_t)numBlocks64;
if (numBlocks != numBlocks64 || numBlocks * 2 > size) if (numBlocks != numBlocks64 || numBlocks * 2 > size)
return SZ_ERROR_ARCHIVE; return SZ_ERROR_ARCHIVE;
@ -110,8 +121,8 @@ static SRes Xz_ReadIndex2(CXzStream *p, const Byte *buf, size_t size, ISzAllocPt
for (i = 0; i < numBlocks; i++) for (i = 0; i < numBlocks; i++)
{ {
CXzBlockSizes *block = &p->blocks[i]; CXzBlockSizes *block = &p->blocks[i];
READ_VARINT_AND_CHECK(buf, pos, size, &block->totalSize); READ_VARINT_AND_CHECK(buf, pos, size, &block->totalSize)
READ_VARINT_AND_CHECK(buf, pos, size, &block->unpackSize); READ_VARINT_AND_CHECK(buf, pos, size, &block->unpackSize)
if (block->totalSize == 0) if (block->totalSize == 0)
return SZ_ERROR_ARCHIVE; return SZ_ERROR_ARCHIVE;
} }
@ -122,7 +133,7 @@ static SRes Xz_ReadIndex2(CXzStream *p, const Byte *buf, size_t size, ISzAllocPt
return (pos == size) ? SZ_OK : SZ_ERROR_ARCHIVE; return (pos == size) ? SZ_OK : SZ_ERROR_ARCHIVE;
} }
static SRes Xz_ReadIndex(CXzStream *p, ILookInStream *stream, UInt64 indexSize, ISzAllocPtr alloc) static SRes Xz_ReadIndex(CXzStream *p, ILookInStreamPtr stream, UInt64 indexSize, ISzAllocPtr alloc)
{ {
SRes res; SRes res;
size_t size; size_t size;
@ -142,14 +153,14 @@ static SRes Xz_ReadIndex(CXzStream *p, ILookInStream *stream, UInt64 indexSize,
return res; return res;
} }
static SRes LookInStream_SeekRead_ForArc(ILookInStream *stream, UInt64 offset, void *buf, size_t size) static SRes LookInStream_SeekRead_ForArc(ILookInStreamPtr stream, UInt64 offset, void *buf, size_t size)
{ {
RINOK(LookInStream_SeekTo(stream, offset)); RINOK(LookInStream_SeekTo(stream, offset))
return LookInStream_Read(stream, buf, size); return LookInStream_Read(stream, buf, size);
/* return LookInStream_Read2(stream, buf, size, SZ_ERROR_NO_ARCHIVE); */ /* return LookInStream_Read2(stream, buf, size, SZ_ERROR_NO_ARCHIVE); */
} }
static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOffset, ISzAllocPtr alloc) static SRes Xz_ReadBackward(CXzStream *p, ILookInStreamPtr stream, Int64 *startOffset, ISzAllocPtr alloc)
{ {
UInt64 indexSize; UInt64 indexSize;
Byte buf[XZ_STREAM_FOOTER_SIZE]; Byte buf[XZ_STREAM_FOOTER_SIZE];
@ -159,7 +170,7 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff
return SZ_ERROR_NO_ARCHIVE; return SZ_ERROR_NO_ARCHIVE;
pos -= XZ_STREAM_FOOTER_SIZE; pos -= XZ_STREAM_FOOTER_SIZE;
RINOK(LookInStream_SeekRead_ForArc(stream, pos, buf, XZ_STREAM_FOOTER_SIZE)); RINOK(LookInStream_SeekRead_ForArc(stream, pos, buf, XZ_STREAM_FOOTER_SIZE))
if (!XZ_FOOTER_SIG_CHECK(buf + 10)) if (!XZ_FOOTER_SIG_CHECK(buf + 10))
{ {
@ -174,7 +185,7 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff
i = (pos > TEMP_BUF_SIZE) ? TEMP_BUF_SIZE : (size_t)pos; i = (pos > TEMP_BUF_SIZE) ? TEMP_BUF_SIZE : (size_t)pos;
pos -= i; pos -= i;
RINOK(LookInStream_SeekRead_ForArc(stream, pos, temp, i)); RINOK(LookInStream_SeekRead_ForArc(stream, pos, temp, i))
total += (UInt32)i; total += (UInt32)i;
for (; i != 0; i--) for (; i != 0; i--)
if (temp[i - 1] != 0) if (temp[i - 1] != 0)
@ -193,7 +204,7 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff
if (pos < XZ_STREAM_FOOTER_SIZE) if (pos < XZ_STREAM_FOOTER_SIZE)
return SZ_ERROR_NO_ARCHIVE; return SZ_ERROR_NO_ARCHIVE;
pos -= XZ_STREAM_FOOTER_SIZE; pos -= XZ_STREAM_FOOTER_SIZE;
RINOK(LookInStream_SeekRead_ForArc(stream, pos, buf, XZ_STREAM_FOOTER_SIZE)); RINOK(LookInStream_SeekRead_ForArc(stream, pos, buf, XZ_STREAM_FOOTER_SIZE))
if (!XZ_FOOTER_SIG_CHECK(buf + 10)) if (!XZ_FOOTER_SIG_CHECK(buf + 10))
return SZ_ERROR_NO_ARCHIVE; return SZ_ERROR_NO_ARCHIVE;
} }
@ -217,8 +228,8 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff
return SZ_ERROR_ARCHIVE; return SZ_ERROR_ARCHIVE;
pos -= indexSize; pos -= indexSize;
RINOK(LookInStream_SeekTo(stream, pos)); RINOK(LookInStream_SeekTo(stream, pos))
RINOK(Xz_ReadIndex(p, stream, indexSize, alloc)); RINOK(Xz_ReadIndex(p, stream, indexSize, alloc))
{ {
UInt64 totalSize = Xz_GetPackSize(p); UInt64 totalSize = Xz_GetPackSize(p);
@ -227,7 +238,7 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff
|| pos < totalSize + XZ_STREAM_HEADER_SIZE) || pos < totalSize + XZ_STREAM_HEADER_SIZE)
return SZ_ERROR_ARCHIVE; return SZ_ERROR_ARCHIVE;
pos -= (totalSize + XZ_STREAM_HEADER_SIZE); pos -= (totalSize + XZ_STREAM_HEADER_SIZE);
RINOK(LookInStream_SeekTo(stream, pos)); RINOK(LookInStream_SeekTo(stream, pos))
*startOffset = (Int64)pos; *startOffset = (Int64)pos;
} }
{ {
@ -236,7 +247,7 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStream *stream, Int64 *startOff
SecToRead_CreateVTable(&secToRead); SecToRead_CreateVTable(&secToRead);
secToRead.realStream = stream; secToRead.realStream = stream;
RINOK(Xz_ReadHeader(&headerFlags, &secToRead.vt)); RINOK(Xz_ReadHeader(&headerFlags, &secToRead.vt))
return (p->flags == headerFlags) ? SZ_OK : SZ_ERROR_ARCHIVE; return (p->flags == headerFlags) ? SZ_OK : SZ_ERROR_ARCHIVE;
} }
} }
@ -274,7 +285,9 @@ UInt64 Xzs_GetUnpackSize(const CXzs *p)
UInt64 size = 0; UInt64 size = 0;
size_t i; size_t i;
for (i = 0; i < p->num; i++) for (i = 0; i < p->num; i++)
ADD_SIZE_CHECK(size, Xz_GetUnpackSize(&p->streams[i])); {
ADD_SIZE_CHECK(size, Xz_GetUnpackSize(&p->streams[i]))
}
return size; return size;
} }
@ -284,15 +297,17 @@ UInt64 Xzs_GetPackSize(const CXzs *p)
UInt64 size = 0; UInt64 size = 0;
size_t i; size_t i;
for (i = 0; i < p->num; i++) for (i = 0; i < p->num; i++)
ADD_SIZE_CHECK(size, Xz_GetTotalSize(&p->streams[i])); {
ADD_SIZE_CHECK(size, Xz_GetTotalSize(&p->streams[i]))
}
return size; return size;
} }
*/ */
SRes Xzs_ReadBackward(CXzs *p, ILookInStream *stream, Int64 *startOffset, ICompressProgress *progress, ISzAllocPtr alloc) SRes Xzs_ReadBackward(CXzs *p, ILookInStreamPtr stream, Int64 *startOffset, ICompressProgressPtr progress, ISzAllocPtr alloc)
{ {
Int64 endOffset = 0; Int64 endOffset = 0;
RINOK(ILookInStream_Seek(stream, &endOffset, SZ_SEEK_END)); RINOK(ILookInStream_Seek(stream, &endOffset, SZ_SEEK_END))
*startOffset = endOffset; *startOffset = endOffset;
for (;;) for (;;)
{ {
@ -301,7 +316,7 @@ SRes Xzs_ReadBackward(CXzs *p, ILookInStream *stream, Int64 *startOffset, ICompr
Xz_Construct(&st); Xz_Construct(&st);
res = Xz_ReadBackward(&st, stream, startOffset, alloc); res = Xz_ReadBackward(&st, stream, startOffset, alloc);
st.startOffset = (UInt64)*startOffset; st.startOffset = (UInt64)*startOffset;
RINOK(res); RINOK(res)
if (p->num == p->numAllocated) if (p->num == p->numAllocated)
{ {
const size_t newNum = p->num + p->num / 4 + 1; const size_t newNum = p->num + p->num / 4 + 1;
@ -317,7 +332,7 @@ SRes Xzs_ReadBackward(CXzs *p, ILookInStream *stream, Int64 *startOffset, ICompr
p->streams[p->num++] = st; p->streams[p->num++] = st;
if (*startOffset == 0) if (*startOffset == 0)
break; break;
RINOK(LookInStream_SeekTo(stream, (UInt64)*startOffset)); RINOK(LookInStream_SeekTo(stream, (UInt64)*startOffset))
if (progress && ICompressProgress_Progress(progress, (UInt64)(endOffset - *startOffset), (UInt64)(Int64)-1) != SZ_OK) if (progress && ICompressProgress_Progress(progress, (UInt64)(endOffset - *startOffset), (UInt64)(Int64)-1) != SZ_OK)
return SZ_ERROR_PROGRESS; return SZ_ERROR_PROGRESS;
} }