From df06f31a42c4eb3d4428c1367cbcab03aaf61318 Mon Sep 17 00:00:00 2001 From: Tino Reichardt Date: Wed, 25 Aug 2021 22:33:02 +0200 Subject: [PATCH] Update to 7-Zip Version 21.03 --- Asm/x86/7zAsm.asm | 27 +- Asm/x86/LzFindOpt.asm | 513 ++++++ C/7zTypes.h | 7 +- C/7zVersion.h | 6 +- C/Aes.c | 10 +- C/Alloc.c | 8 +- C/Alloc.h | 9 +- C/CpuArch.c | 62 +- C/CpuArch.h | 29 +- C/LzFind.c | 994 +++++++--- C/LzFind.h | 41 +- C/LzFindMt.c | 860 +++++---- C/LzFindMt.h | 36 +- C/LzFindOpt.c | 578 ++++++ C/LzmaEnc.c | 318 +++- C/MtCoder.c | 9 +- C/Sha1.c | 36 +- C/Threads.c | 40 +- C/Threads.h | 10 +- C/Util/LzmaLib/LzmaLib.dsp | 4 + C/Util/LzmaLib/makefile | 1 + C/var_clang_x64.mak | 1 - C/var_clang_x86.mak | 1 - C/var_gcc_x86.mak | 1 - C/warn_gcc.mak | 2 - CPP/7zip/7zip_gcc.mak | 14 +- CPP/7zip/Archive/7z/7zHandlerOut.cpp | 2 +- CPP/7zip/Archive/Zip/ZipHeader.h | 1 + CPP/7zip/Archive/Zip/ZipItem.cpp | 1 + CPP/7zip/Bundles/Alone/Alone.dsp | 118 +- CPP/7zip/Bundles/Alone/makefile | 3 + CPP/7zip/Bundles/Alone/makefile.gcc | 2 + CPP/7zip/Bundles/Alone7z/Alone.dsp | 30 +- CPP/7zip/Bundles/Alone7z/makefile | 2 + CPP/7zip/Bundles/Alone7z/makefile.gcc | 2 + CPP/7zip/Bundles/Codec_flzma2/makefile | 3 + CPP/7zip/Bundles/Fm/FM.dsp | 9 + CPP/7zip/Bundles/Format7z/makefile | 2 + CPP/7zip/Bundles/Format7zF/Arc.mak | 2 + CPP/7zip/Bundles/Format7zF/Arc_gcc.mak | 2 + CPP/7zip/Bundles/Format7zF/Format7z.dsp | 20 + CPP/7zip/Bundles/Format7zR/makefile | 2 + CPP/7zip/Bundles/LzmaCon/LzmaCon.dsp | 17 + CPP/7zip/Bundles/LzmaCon/makefile | 3 + CPP/7zip/Bundles/LzmaCon/makefile.gcc | 3 + CPP/7zip/Common/MemBlocks.cpp | 4 +- CPP/7zip/Common/MethodProps.cpp | 66 +- CPP/7zip/Common/MethodProps.h | 85 +- CPP/7zip/Common/OffsetStream.cpp | 2 +- CPP/7zip/Common/StreamBinder.cpp | 4 +- CPP/7zip/Compress/DeflateDecoder.cpp | 15 +- CPP/7zip/Compress/DeflateEncoder.cpp | 24 +- CPP/7zip/Compress/LzmaEncoder.cpp | 24 +- CPP/7zip/Compress/PpmdEncoder.cpp | 46 +- CPP/7zip/LzFindOpt.mak | 7 + CPP/7zip/UI/Common/ArchiveCommandLine.cpp | 2 + CPP/7zip/UI/Common/ArchiveCommandLine.h | 1 + CPP/7zip/UI/Common/Bench.cpp | 614 +++++-- CPP/7zip/UI/Common/Bench.h | 67 +- CPP/7zip/UI/Common/CompressCall2.cpp | 6 +- CPP/7zip/UI/Console/Main.cpp | 68 +- CPP/7zip/UI/FileManager/AboutDialog.cpp | 4 +- CPP/7zip/UI/FileManager/ProgressDialog.cpp | 7 +- CPP/7zip/UI/FileManager/ProgressDialog2.cpp | 34 +- CPP/7zip/UI/FileManager/ProgressDialog2.h | 2 + CPP/7zip/UI/FileManager/ProgressDialog2Res.h | 3 +- CPP/7zip/UI/FileManager/ProgressDialog2a.rc | 9 +- CPP/7zip/UI/GUI/BenchmarkDialog.cpp | 1711 ++++++++++++++---- CPP/7zip/UI/GUI/BenchmarkDialog.h | 185 +- CPP/7zip/UI/GUI/BenchmarkDialog.rc | 121 +- CPP/7zip/UI/GUI/BenchmarkDialogRes.h | 10 +- CPP/7zip/UI/GUI/CompressDialog.cpp | 370 ++-- CPP/7zip/UI/GUI/CompressDialog.h | 25 +- CPP/7zip/UI/GUI/GUI.cpp | 7 +- CPP/7zip/UI/GUI/GUI.dsp | 4 + CPP/7zip/UI/GUI/UpdateGUI.cpp | 8 +- CPP/7zip/warn_clang_mac.mak | 2 + CPP/7zip/warn_gcc.mak | 4 + CPP/Common/LzFindPrepare.cpp | 7 + CPP/Common/MyBuffer2.h | 30 + CPP/Windows/Control/Dialog.cpp | 9 + CPP/Windows/Control/Dialog.h | 20 + CPP/Windows/ErrorMsg.cpp | 8 + CPP/Windows/Registry.cpp | 36 +- CPP/Windows/Synchronization.h | 4 + CPP/Windows/SystemInfo.cpp | 263 ++- CPP/Windows/SystemInfo.h | 6 + DOC/7zip.wxs | 2 +- DOC/readme.txt | 88 +- README.md | 6 +- 90 files changed, 5991 insertions(+), 1870 deletions(-) create mode 100644 Asm/x86/LzFindOpt.asm create mode 100644 C/LzFindOpt.c create mode 100644 CPP/7zip/LzFindOpt.mak create mode 100644 CPP/Common/LzFindPrepare.cpp diff --git a/Asm/x86/7zAsm.asm b/Asm/x86/7zAsm.asm index dde40da6..34c278cb 100644 --- a/Asm/x86/7zAsm.asm +++ b/Asm/x86/7zAsm.asm @@ -1,5 +1,5 @@ ; 7zAsm.asm -- ASM macros -; 2021-02-07 : Igor Pavlov : Public domain +; 2021-07-13 : Igor Pavlov : Public domain ifdef RAX x64 equ 1 @@ -171,6 +171,7 @@ endm ; for fastcall and for WIN-x64 REG_PARAM_0_x equ x1 REG_PARAM_0 equ r1 +REG_PARAM_1_x equ x2 REG_PARAM_1 equ r2 ifndef x64 @@ -178,6 +179,7 @@ ifndef x64 REG_ABI_PARAM_0_x equ REG_PARAM_0_x REG_ABI_PARAM_0 equ REG_PARAM_0 +REG_ABI_PARAM_1_x equ REG_PARAM_1_x REG_ABI_PARAM_1 equ REG_PARAM_1 else @@ -186,28 +188,39 @@ else if (IS_LINUX eq 0) ; for WIN-x64: -REG_PARAM_2 equ r8 -REG_PARAM_3 equ r9 +REG_PARAM_2_x equ x8 +REG_PARAM_2 equ r8 +REG_PARAM_3 equ r9 REG_ABI_PARAM_0_x equ REG_PARAM_0_x REG_ABI_PARAM_0 equ REG_PARAM_0 +REG_ABI_PARAM_1_x equ REG_PARAM_1_x REG_ABI_PARAM_1 equ REG_PARAM_1 +REG_ABI_PARAM_2_x equ REG_PARAM_2_x REG_ABI_PARAM_2 equ REG_PARAM_2 REG_ABI_PARAM_3 equ REG_PARAM_3 else ; for LINUX-x64: REG_LINUX_PARAM_0_x equ x7 -REG_LINUX_PARAM_0 equ r7 -REG_LINUX_PARAM_1 equ r6 -REG_LINUX_PARAM_2 equ r2 -REG_LINUX_PARAM_3 equ r1 +REG_LINUX_PARAM_0 equ r7 +REG_LINUX_PARAM_1_x equ x6 +REG_LINUX_PARAM_1 equ r6 +REG_LINUX_PARAM_2 equ r2 +REG_LINUX_PARAM_3 equ r1 +REG_LINUX_PARAM_4_x equ x8 +REG_LINUX_PARAM_4 equ r8 +REG_LINUX_PARAM_5 equ r9 REG_ABI_PARAM_0_x equ REG_LINUX_PARAM_0_x REG_ABI_PARAM_0 equ REG_LINUX_PARAM_0 +REG_ABI_PARAM_1_x equ REG_LINUX_PARAM_1_x REG_ABI_PARAM_1 equ REG_LINUX_PARAM_1 REG_ABI_PARAM_2 equ REG_LINUX_PARAM_2 REG_ABI_PARAM_3 equ REG_LINUX_PARAM_3 +REG_ABI_PARAM_4_x equ REG_LINUX_PARAM_4_x +REG_ABI_PARAM_4 equ REG_LINUX_PARAM_4 +REG_ABI_PARAM_5 equ REG_LINUX_PARAM_5 MY_ABI_LINUX_TO_WIN_2 macro mov r2, r6 diff --git a/Asm/x86/LzFindOpt.asm b/Asm/x86/LzFindOpt.asm new file mode 100644 index 00000000..bc3a6fe7 --- /dev/null +++ b/Asm/x86/LzFindOpt.asm @@ -0,0 +1,513 @@ +; LzFindOpt.asm -- ASM version of GetMatchesSpecN_2() function +; 2021-07-13: Igor Pavlov : Public domain +; + +ifndef x64 +; x64=1 +; .err +endif + +include 7zAsm.asm + +MY_ASM_START + +_TEXT$LZFINDOPT SEGMENT ALIGN(64) 'CODE' + +MY_ALIGN macro num:req + align num +endm + +MY_ALIGN_32 macro + MY_ALIGN 32 +endm + +MY_ALIGN_64 macro + MY_ALIGN 64 +endm + + +t0_L equ x0_L +t0_x equ x0 +t0 equ r0 +t1_x equ x3 +t1 equ r3 + +cp_x equ t1_x +cp_r equ t1 +m equ x5 +m_r equ r5 +len_x equ x6 +len equ r6 +diff_x equ x7 +diff equ r7 +len0 equ r10 +len1_x equ x11 +len1 equ r11 +maxLen_x equ x12 +maxLen equ r12 +d equ r13 +ptr0 equ r14 +ptr1 equ r15 + +d_lim equ m_r +cycSize equ len_x +hash_lim equ len0 +delta1_x equ len1_x +delta1_r equ len1 +delta_x equ maxLen_x +delta_r equ maxLen +hash equ ptr0 +src equ ptr1 + + + +if (IS_LINUX gt 0) + +; r1 r2 r8 r9 : win32 +; r7 r6 r2 r1 r8 r9 : linux + +lenLimit equ r8 +lenLimit_x equ x8 +; pos_r equ r2 +pos equ x2 +cur equ r1 +son equ r9 + +else + +lenLimit equ REG_ABI_PARAM_2 +lenLimit_x equ REG_ABI_PARAM_2_x +pos equ REG_ABI_PARAM_1_x +cur equ REG_ABI_PARAM_0 +son equ REG_ABI_PARAM_3 + +endif + + +if (IS_LINUX gt 0) + maxLen_OFFS equ (REG_SIZE * (6 + 1)) +else + cutValue_OFFS equ (REG_SIZE * (8 + 1 + 4)) + d_OFFS equ (REG_SIZE + cutValue_OFFS) + maxLen_OFFS equ (REG_SIZE + d_OFFS) +endif + hash_OFFS equ (REG_SIZE + maxLen_OFFS) + limit_OFFS equ (REG_SIZE + hash_OFFS) + size_OFFS equ (REG_SIZE + limit_OFFS) + cycPos_OFFS equ (REG_SIZE + size_OFFS) + cycSize_OFFS equ (REG_SIZE + cycPos_OFFS) + posRes_OFFS equ (REG_SIZE + cycSize_OFFS) + +if (IS_LINUX gt 0) +else + cutValue_PAR equ [r0 + cutValue_OFFS] + d_PAR equ [r0 + d_OFFS] +endif + maxLen_PAR equ [r0 + maxLen_OFFS] + hash_PAR equ [r0 + hash_OFFS] + limit_PAR equ [r0 + limit_OFFS] + size_PAR equ [r0 + size_OFFS] + cycPos_PAR equ [r0 + cycPos_OFFS] + cycSize_PAR equ [r0 + cycSize_OFFS] + posRes_PAR equ [r0 + posRes_OFFS] + + + cutValue_VAR equ DWORD PTR [r4 + 8 * 0] + cutValueCur_VAR equ DWORD PTR [r4 + 8 * 0 + 4] + cycPos_VAR equ DWORD PTR [r4 + 8 * 1 + 0] + cycSize_VAR equ DWORD PTR [r4 + 8 * 1 + 4] + hash_VAR equ QWORD PTR [r4 + 8 * 2] + limit_VAR equ QWORD PTR [r4 + 8 * 3] + size_VAR equ QWORD PTR [r4 + 8 * 4] + distances equ QWORD PTR [r4 + 8 * 5] + maxLen_VAR equ QWORD PTR [r4 + 8 * 6] + + Old_RSP equ QWORD PTR [r4 + 8 * 7] + LOCAL_SIZE equ 8 * 8 + +COPY_VAR_32 macro dest_var, src_var + mov x3, src_var + mov dest_var, x3 +endm + +COPY_VAR_64 macro dest_var, src_var + mov r3, src_var + mov dest_var, r3 +endm + + +; MY_ALIGN_64 +MY_PROC GetMatchesSpecN_2, 13 +MY_PUSH_PRESERVED_ABI_REGS + mov r0, RSP + lea r3, [r0 - LOCAL_SIZE] + and r3, -64 + mov RSP, r3 + mov Old_RSP, r0 + +if (IS_LINUX gt 0) + mov d, REG_ABI_PARAM_5 ; r13 = r9 + mov cutValue_VAR, REG_ABI_PARAM_4_x ; = r8 + mov son, REG_ABI_PARAM_3 ; r9 = r1 + mov r8, REG_ABI_PARAM_2 ; r8 = r2 + mov pos, REG_ABI_PARAM_1_x ; r2 = x6 + mov r1, REG_ABI_PARAM_0 ; r1 = r7 +else + COPY_VAR_32 cutValue_VAR, cutValue_PAR + mov d, d_PAR +endif + + COPY_VAR_64 limit_VAR, limit_PAR + + mov hash_lim, size_PAR + mov size_VAR, hash_lim + + mov cp_x, cycPos_PAR + mov hash, hash_PAR + + mov cycSize, cycSize_PAR + mov cycSize_VAR, cycSize + + ; we want cur in (rcx). So we change the cur and lenLimit variables + sub lenLimit, cur + neg lenLimit_x + inc lenLimit_x + + mov t0_x, maxLen_PAR + sub t0, lenLimit + mov maxLen_VAR, t0 + + jmp main_loop + +MY_ALIGN_64 +fill_empty: + ; ptr0 = *ptr1 = kEmptyHashValue; + mov QWORD PTR [ptr1], 0 + inc pos + inc cp_x + mov DWORD PTR [d - 4], 0 + cmp d, limit_VAR + jae fin + cmp hash, hash_lim + je fin + +; MY_ALIGN_64 +main_loop: + ; UInt32 delta = *hash++; + mov diff_x, [hash] ; delta + add hash, 4 + ; mov cycPos_VAR, cp_x + + inc cur + add d, 4 + mov m, pos + sub m, diff_x; ; matchPos + + ; CLzRef *ptr1 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2; + lea ptr1, [son + 8 * cp_r] + ; mov cycSize, cycSize_VAR + cmp pos, cycSize + jb directMode ; if (pos < cycSize_VAR) + + ; CYC MODE + + cmp diff_x, cycSize + jae fill_empty ; if (delta >= cycSize_VAR) + + xor t0_x, t0_x + mov cycPos_VAR, cp_x + sub cp_x, diff_x + ; jae prepare_for_tree_loop + ; add cp_x, cycSize + cmovb t0_x, cycSize + add cp_x, t0_x ; cp_x += (cycPos < delta ? cycSize : 0) + jmp prepare_for_tree_loop + + +directMode: + cmp diff_x, pos + je fill_empty ; if (delta == pos) + jae fin_error ; if (delta >= pos) + + mov cycPos_VAR, cp_x + mov cp_x, m + +prepare_for_tree_loop: + mov len0, lenLimit + mov hash_VAR, hash + ; CLzRef *ptr0 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2 + 1; + lea ptr0, [ptr1 + 4] + ; UInt32 *_distances = ++d; + mov distances, d + + neg len0 + mov len1, len0 + + mov t0_x, cutValue_VAR + mov maxLen, maxLen_VAR + mov cutValueCur_VAR, t0_x + +MY_ALIGN_32 +tree_loop: + neg diff + mov len, len0 + cmp len1, len0 + cmovb len, len1 ; len = (len1 < len0 ? len1 : len0); + add diff, cur + + mov t0_x, [son + cp_r * 8] ; prefetch + movzx t0_x, BYTE PTR [diff + 1 * len] + lea cp_r, [son + cp_r * 8] + cmp [cur + 1 * len], t0_L + je matched_1 + + jb left_0 + + mov [ptr1], m + mov m, [cp_r + 4] + lea ptr1, [cp_r + 4] + sub diff, cur ; FIX32 + jmp next_node + +MY_ALIGN_32 +left_0: + mov [ptr0], m + mov m, [cp_r] + mov ptr0, cp_r + sub diff, cur ; FIX32 + ; jmp next_node + +; ------------ NEXT NODE ------------ +; MY_ALIGN_32 +next_node: + mov cycSize, cycSize_VAR + dec cutValueCur_VAR + je finish_tree + + add diff_x, pos ; prev_match = pos + diff + cmp m, diff_x + jae fin_error ; if (new_match >= prev_match) + + mov diff_x, pos + sub diff_x, m ; delta = pos - new_match + cmp pos, cycSize + jae cyc_mode_2 ; if (pos >= cycSize) + + mov cp_x, m + test m, m + jne tree_loop ; if (m != 0) + +finish_tree: + ; ptr0 = *ptr1 = kEmptyHashValue; + mov DWORD PTR [ptr0], 0 + mov DWORD PTR [ptr1], 0 + + inc pos + + ; _distances[-1] = (UInt32)(d - _distances); + mov t0, distances + mov t1, d + sub t1, t0 + shr t1_x, 2 + mov [t0 - 4], t1_x + + cmp d, limit_VAR + jae fin ; if (d >= limit) + + mov cp_x, cycPos_VAR + mov hash, hash_VAR + mov hash_lim, size_VAR + inc cp_x + cmp hash, hash_lim + jne main_loop ; if (hash != size) + jmp fin + + +MY_ALIGN_32 +cyc_mode_2: + cmp diff_x, cycSize + jae finish_tree ; if (delta >= cycSize) + + mov cp_x, cycPos_VAR + xor t0_x, t0_x + sub cp_x, diff_x ; cp_x = cycPos - delta + cmovb t0_x, cycSize + add cp_x, t0_x ; cp_x += (cycPos < delta ? cycSize : 0) + jmp tree_loop + + +MY_ALIGN_32 +matched_1: + + inc len + ; cmp len_x, lenLimit_x + je short lenLimit_reach + movzx t0_x, BYTE PTR [diff + 1 * len] + cmp [cur + 1 * len], t0_L + jne mismatch + + +MY_ALIGN_32 +match_loop: + ; while (++len != lenLimit) (len[diff] != len[0]) ; + + inc len + ; cmp len_x, lenLimit_x + je short lenLimit_reach + movzx t0_x, BYTE PTR [diff + 1 * len] + cmp BYTE PTR [cur + 1 * len], t0_L + je match_loop + +mismatch: + jb left_2 + + mov [ptr1], m + mov m, [cp_r + 4] + lea ptr1, [cp_r + 4] + mov len1, len + + jmp max_update + +MY_ALIGN_32 +left_2: + mov [ptr0], m + mov m, [cp_r] + mov ptr0, cp_r + mov len0, len + +max_update: + sub diff, cur ; restore diff + + cmp maxLen, len + jae next_node + + mov maxLen, len + add len, lenLimit + mov [d], len_x + mov t0_x, diff_x + not t0_x + mov [d + 4], t0_x + add d, 8 + + jmp next_node + + + +MY_ALIGN_32 +lenLimit_reach: + + mov delta_r, cur + sub delta_r, diff + lea delta1_r, [delta_r - 1] + + mov t0_x, [cp_r] + mov [ptr1], t0_x + mov t0_x, [cp_r + 4] + mov [ptr0], t0_x + + mov [d], lenLimit_x + mov [d + 4], delta1_x + add d, 8 + + ; _distances[-1] = (UInt32)(d - _distances); + mov t0, distances + mov t1, d + sub t1, t0 + shr t1_x, 2 + mov [t0 - 4], t1_x + + mov hash, hash_VAR + mov hash_lim, size_VAR + + inc pos + mov cp_x, cycPos_VAR + inc cp_x + + mov d_lim, limit_VAR + mov cycSize, cycSize_VAR + ; if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit) + ; break; + cmp hash, hash_lim + je fin + cmp d, d_lim + jae fin + cmp delta_x, [hash] + jne main_loop + movzx t0_x, BYTE PTR [diff] + cmp [cur], t0_L + jne main_loop + + ; jmp main_loop ; bypass for debug + + mov cycPos_VAR, cp_x + shl len, 3 ; cycSize * 8 + sub diff, cur ; restore diff + xor t0_x, t0_x + cmp cp_x, delta_x ; cmp (cycPos_VAR, delta) + lea cp_r, [son + 8 * cp_r] ; dest + lea src, [cp_r + 8 * diff] + cmovb t0, len ; t0 = (cycPos_VAR < delta ? cycSize * 8 : 0) + add src, t0 + add len, son ; len = son + cycSize * 8 + + +MY_ALIGN_32 +long_loop: + add hash, 4 + + ; *(UInt64 *)(void *)ptr = ((const UInt64 *)(const void *)ptr)[diff]; + + mov t0, [src] + add src, 8 + mov [cp_r], t0 + add cp_r, 8 + cmp src, len + cmove src, son ; if end of (son) buffer is reached, we wrap to begin + + mov DWORD PTR [d], 2 + mov [d + 4], lenLimit_x + mov [d + 8], delta1_x + add d, 12 + + inc cur + + cmp hash, hash_lim + je long_footer + cmp delta_x, [hash] + jne long_footer + movzx t0_x, BYTE PTR [diff + cur] + cmp [cur], t0_L + jne long_footer + cmp d, d_lim + jb long_loop + +long_footer: + sub cp_r, son + shr cp_r, 3 + add pos, cp_x + sub pos, cycPos_VAR + mov cycSize, cycSize_VAR + + cmp d, d_lim + jae fin + cmp hash, hash_lim + jne main_loop + jmp fin + + + +fin_error: + xor d, d + +fin: + mov RSP, Old_RSP + mov t0, [r4 + posRes_OFFS] + mov [t0], pos + mov r0, d + +MY_POP_PRESERVED_ABI_REGS +MY_ENDP + +_TEXT$LZFINDOPT ENDS + +end diff --git a/C/7zTypes.h b/C/7zTypes.h index f817b7f5..3f66a7b5 100644 --- a/C/7zTypes.h +++ b/C/7zTypes.h @@ -1,5 +1,5 @@ /* 7zTypes.h -- Basic types -2021-04-25 : Igor Pavlov : Public domain */ +2021-07-13 : Igor Pavlov : Public domain */ #ifndef __7Z_TYPES_H #define __7Z_TYPES_H @@ -62,6 +62,8 @@ typedef int SRes; typedef unsigned WRes; #define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x) +// #define MY_HRES_ERROR__INTERNAL_ERROR MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR) + #else // _WIN32 // #define ENV_HAVE_LSTAT @@ -95,6 +97,7 @@ typedef int WRes; #define ERROR_DIRECTORY 267L #define ERROR_TOO_MANY_POSTS 298L +#define ERROR_INTERNAL_ERROR 1359L #define ERROR_INVALID_REPARSE_DATA 4392L #define ERROR_REPARSE_TAG_INVALID 4393L #define ERROR_REPARSE_TAG_MISMATCH 4394L @@ -206,6 +209,8 @@ typedef size_t SIZE_T; #endif // _WIN32 +#define MY_HRES_ERROR__INTERNAL_ERROR ((HRESULT)0x8007054FL) + #ifdef _SZ_NO_INT_64 diff --git a/C/7zVersion.h b/C/7zVersion.h index 734c514d..69f5a00f 100644 --- a/C/7zVersion.h +++ b/C/7zVersion.h @@ -1,7 +1,7 @@ #define MY_VER_MAJOR 21 -#define MY_VER_MINOR 02 +#define MY_VER_MINOR 03 #define MY_VER_BUILD 0 -#define MY_VERSION_NUMBERS "21.02 ZS v1.5.0 R1" +#define MY_VERSION_NUMBERS "21.03 ZS v1.5.0 R1" #define MY_VERSION MY_VERSION_NUMBERS #ifdef MY_CPU_NAME @@ -10,7 +10,7 @@ #define MY_VERSION_CPU MY_VERSION #endif -#define MY_DATE "2021-05-16" +#define MY_DATE "2021-08-26" #undef MY_COPYRIGHT #undef MY_VERSION_COPYRIGHT_DATE #define MY_AUTHOR_NAME "Igor Pavlov, Tino Reichardt" diff --git a/C/Aes.c b/C/Aes.c index 4436a3c6..27e32e62 100644 --- a/C/Aes.c +++ b/C/Aes.c @@ -1,5 +1,5 @@ /* Aes.c -- AES encryption / decryption -2021-04-01 : Igor Pavlov : Public domain */ +2021-05-13 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -365,10 +365,10 @@ void MY_FAST_CALL AesCtr_Code(UInt32 *p, Byte *data, size_t numBlocks) #ifdef MY_CPU_LE_UNALIGN *((UInt32 *)(void *)data) ^= t; #else - data[0] ^= (t & 0xFF); - data[1] ^= ((t >> 8) & 0xFF); - data[2] ^= ((t >> 16) & 0xFF); - data[3] ^= ((t >> 24)); + data[0] = (Byte)(data[0] ^ (t & 0xFF)); + data[1] = (Byte)(data[1] ^ ((t >> 8) & 0xFF)); + data[2] = (Byte)(data[2] ^ ((t >> 16) & 0xFF)); + data[3] = (Byte)(data[3] ^ ((t >> 24))); #endif } } diff --git a/C/Alloc.c b/C/Alloc.c index b350deb7..d1af76c5 100644 --- a/C/Alloc.c +++ b/C/Alloc.c @@ -1,5 +1,5 @@ /* Alloc.c -- Memory allocation functions -2020-10-29 : Igor Pavlov : Public domain */ +2021-07-13 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -247,14 +247,14 @@ static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MyAlloc static void SzFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MyFree(address); } const ISzAlloc g_Alloc = { SzAlloc, SzFree }; +#ifdef _WIN32 static void *SzMidAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MidAlloc(size); } static void SzMidFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MidFree(address); } -const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree }; - static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return BigAlloc(size); } static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); BigFree(address); } +const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree }; const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree }; - +#endif /* uintptr_t : C99 (optional) diff --git a/C/Alloc.h b/C/Alloc.h index b32b6767..3be2041e 100644 --- a/C/Alloc.h +++ b/C/Alloc.h @@ -1,5 +1,5 @@ /* Alloc.h -- Memory allocation functions -2021-02-08 : Igor Pavlov : Public domain */ +2021-07-13 : Igor Pavlov : Public domain */ #ifndef __COMMON_ALLOC_H #define __COMMON_ALLOC_H @@ -30,8 +30,15 @@ void BigFree(void *address); #endif extern const ISzAlloc g_Alloc; + +#ifdef _WIN32 extern const ISzAlloc g_BigAlloc; extern const ISzAlloc g_MidAlloc; +#else +#define g_BigAlloc g_AlignedAlloc +#define g_MidAlloc g_AlignedAlloc +#endif + extern const ISzAlloc g_AlignedAlloc; diff --git a/C/CpuArch.c b/C/CpuArch.c index 1d761416..fa9afe39 100644 --- a/C/CpuArch.c +++ b/C/CpuArch.c @@ -1,5 +1,5 @@ /* CpuArch.c -- CPU specific code -2021-04-28 : Igor Pavlov : Public domain */ +2021-07-13 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -278,6 +278,30 @@ BoolInt CPU_IsSupported_SHA() #include #endif +BoolInt CPU_IsSupported_AVX2() +{ + Cx86cpuid p; + CHECK_SYS_SSE_SUPPORT + + #ifdef _WIN32 + #define MY__PF_XSAVE_ENABLED 17 + if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED)) + return False; + #endif + + if (!x86cpuid_CheckAndRead(&p)) + return False; + if (p.maxFunc < 7) + return False; + { + UInt32 d[4] = { 0 }; + MyCPUID(7, &d[0], &d[1], &d[2], &d[3]); + // printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]); + return 1 + & (d[1] >> 5); // avx2 + } +} + BoolInt CPU_IsSupported_VAES_AVX2() { Cx86cpuid p; @@ -329,10 +353,9 @@ BoolInt CPU_IsSupported_PageGB() #include -BoolInt CPU_IsSupported_CRC32() - { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } -BoolInt CPU_IsSupported_CRYPTO() - { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } +BoolInt CPU_IsSupported_CRC32() { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } +BoolInt CPU_IsSupported_CRYPTO() { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } +BoolInt CPU_IsSupported_NEON() { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; } #else @@ -356,17 +379,27 @@ static void Print_sysctlbyname(const char *name) } */ -BoolInt CPU_IsSupported_CRC32(void) +static BoolInt My_sysctlbyname_Get_BoolInt(const char *name) { + UInt32 val = 0; + if (My_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1) + return 1; + return 0; +} + /* Print_sysctlbyname("hw.pagesize"); Print_sysctlbyname("machdep.cpu.brand_string"); */ - UInt32 val = 0; - if (My_sysctlbyname_Get_UInt32("hw.optional.armv8_crc32", &val) == 0 && val == 1) - return 1; - return 0; +BoolInt CPU_IsSupported_CRC32(void) +{ + return My_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32"); +} + +BoolInt CPU_IsSupported_NEON(void) +{ + return My_sysctlbyname_Get_BoolInt("hw.optional.neon"); } #ifdef MY_CPU_ARM64 @@ -390,18 +423,25 @@ BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; } #include + #define MY_HWCAP_CHECK_FUNC_2(name1, name2) \ + BoolInt CPU_IsSupported_ ## name1() { return (getauxval(AT_HWCAP) & (HWCAP_ ## name2)) ? 1 : 0; } + #ifdef MY_CPU_ARM64 #define MY_HWCAP_CHECK_FUNC(name) \ - BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP) & (HWCAP_ ## name)) ? 1 : 0; } + MY_HWCAP_CHECK_FUNC_2(name, name) + MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD) +// MY_HWCAP_CHECK_FUNC (ASIMD) #elif defined(MY_CPU_ARM) #define MY_HWCAP_CHECK_FUNC(name) \ BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP2) & (HWCAP2_ ## name)) ? 1 : 0; } + MY_HWCAP_CHECK_FUNC_2(NEON, NEON) #endif #else // USE_HWCAP #define MY_HWCAP_CHECK_FUNC(name) \ BoolInt CPU_IsSupported_ ## name() { return 0; } + MY_HWCAP_CHECK_FUNC(NEON) #endif // USE_HWCAP diff --git a/C/CpuArch.h b/C/CpuArch.h index 6c4ab404..529d3a50 100644 --- a/C/CpuArch.h +++ b/C/CpuArch.h @@ -1,5 +1,5 @@ /* CpuArch.h -- CPU specific code -2021-04-25 : Igor Pavlov : Public domain */ +2021-07-13 : Igor Pavlov : Public domain */ #ifndef __CPU_ARCH_H #define __CPU_ARCH_H @@ -225,7 +225,6 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #endif #else #ifdef __xlC__ - // for XLC compiler: #define MY_CPU_pragma_pack_push_1 _Pragma("pack(1)") #define MY_CPU_pragma_pop _Pragma("pack()") #else @@ -253,8 +252,12 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #ifdef MY_CPU_LE #if defined(MY_CPU_X86_OR_AMD64) \ - || defined(MY_CPU_ARM64) \ - || defined(__ARM_FEATURE_UNALIGNED) + || defined(MY_CPU_ARM64) + #define MY_CPU_LE_UNALIGN + #define MY_CPU_LE_UNALIGN_64 + #elif defined(__ARM_FEATURE_UNALIGNED) + /* gcc9 for 32-bit arm can use LDRD instruction that requires 32-bit alignment. + So we can't use unaligned 64-bit operations. */ #define MY_CPU_LE_UNALIGN #endif #endif @@ -264,11 +267,15 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #define GetUi16(p) (*(const UInt16 *)(const void *)(p)) #define GetUi32(p) (*(const UInt32 *)(const void *)(p)) +#ifdef MY_CPU_LE_UNALIGN_64 #define GetUi64(p) (*(const UInt64 *)(const void *)(p)) +#endif #define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); } #define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); } +#ifdef MY_CPU_LE_UNALIGN_64 #define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); } +#endif #else @@ -282,8 +289,6 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. ((UInt32)((const Byte *)(p))[2] << 16) | \ ((UInt32)((const Byte *)(p))[3] << 24)) -#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32)) - #define SetUi16(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \ _ppp_[0] = (Byte)_vvv_; \ _ppp_[1] = (Byte)(_vvv_ >> 8); } @@ -294,12 +299,22 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. _ppp_[2] = (Byte)(_vvv_ >> 16); \ _ppp_[3] = (Byte)(_vvv_ >> 24); } +#endif + + +#ifndef MY_CPU_LE_UNALIGN_64 + +#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32)) + #define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \ SetUi32(_ppp2_ , (UInt32)_vvv2_); \ SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)); } #endif + + + #ifdef __has_builtin #define MY__has_builtin(x) __has_builtin(x) #else @@ -392,6 +407,7 @@ int x86cpuid_GetFirm(const Cx86cpuid *p); BoolInt CPU_Is_InOrder(void); BoolInt CPU_IsSupported_AES(void); +BoolInt CPU_IsSupported_AVX2(void); BoolInt CPU_IsSupported_VAES_AVX2(void); BoolInt CPU_IsSupported_SSSE3(void); BoolInt CPU_IsSupported_SSE41(void); @@ -401,6 +417,7 @@ BoolInt CPU_IsSupported_PageGB(void); #elif defined(MY_CPU_ARM_OR_ARM64) BoolInt CPU_IsSupported_CRC32(void); +BoolInt CPU_IsSupported_NEON(void); #if defined(_WIN32) BoolInt CPU_IsSupported_CRYPTO(void); diff --git a/C/LzFind.c b/C/LzFind.c index 95966f06..3b32eae8 100644 --- a/C/LzFind.c +++ b/C/LzFind.c @@ -1,19 +1,29 @@ /* LzFind.c -- Match finder for LZ algorithms -2021-04-01 : Igor Pavlov : Public domain */ +2021-07-12 : Igor Pavlov : Public domain */ #include "Precomp.h" #include +// #include #include "CpuArch.h" #include "LzFind.h" #include "LzHash.h" +#define kBlockMoveAlign (1 << 7) // alignment for memmove() +#define kBlockSizeAlign (1 << 16) // alignment for block allocation +#define kBlockSizeReserveMin (1 << 24) // it's 1/256 from 4 GB dictinary + #define kEmptyHashValue 0 -#define kMaxValForNormalize ((UInt32)0xFFFFFFFF) -#define kNormalizeStepMin (1 << 10) /* it must be power of 2 */ -#define kNormalizeMask (~(UInt32)(kNormalizeStepMin - 1)) -#define kMaxHistorySize ((UInt32)7 << 29) + +#define kMaxValForNormalize ((UInt32)0) +// #define kMaxValForNormalize ((UInt32)(1 << 20) + 0xFFF) // for debug + +// #define kNormalizeAlign (1 << 7) // alignment for speculated accesses + +#define GET_AVAIL_BYTES(p) \ + Inline_MatchFinder_GetNumAvailableBytes(p) + // #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size) #define kFix5HashSize kFix4HashSize @@ -64,46 +74,57 @@ static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc) } } -/* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */ -static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAllocPtr alloc) +static int LzInWindow_Create2(CMatchFinder *p, UInt32 blockSize, ISzAllocPtr alloc) { - UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv; - if (p->directInput) - { - p->blockSize = blockSize; - return 1; - } + if (blockSize == 0) + return 0; if (!p->bufferBase || p->blockSize != blockSize) { + // size_t blockSizeT; LzInWindow_Free(p, alloc); p->blockSize = blockSize; - p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, (size_t)blockSize); + // blockSizeT = blockSize; + + // printf("\nblockSize = 0x%x\n", blockSize); + /* + #if defined _WIN64 + // we can allocate 4GiB, but still use UInt32 for (p->blockSize) + // we use UInt32 type for (p->blockSize), because + // we don't want to wrap over 4 GiB, + // when we use (p->streamPos - p->pos) that is UInt32. + if (blockSize >= (UInt32)0 - (UInt32)kBlockSizeAlign) + { + blockSizeT = ((size_t)1 << 32); + printf("\nchanged to blockSizeT = 4GiB\n"); + } + #endif + */ + + p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, blockSize); + // printf("\nbufferBase = %p\n", p->bufferBase); + // return 0; // for debug } return (p->bufferBase != NULL); } -static Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; } +static const Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; } -static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; } +static UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return GET_AVAIL_BYTES(p); } -void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue) -{ - p->posLimit -= subValue; - p->pos -= subValue; - p->streamPos -= subValue; -} +MY_NO_INLINE static void MatchFinder_ReadBlock(CMatchFinder *p) { if (p->streamEndWasReached || p->result != SZ_OK) return; - /* We use (p->streamPos - p->pos) value. (p->streamPos < p->pos) is allowed. */ + /* We use (p->streamPos - p->pos) value. + (p->streamPos < p->pos) is allowed. */ if (p->directInput) { - UInt32 curSize = 0xFFFFFFFF - (p->streamPos - p->pos); + UInt32 curSize = 0xFFFFFFFF - GET_AVAIL_BYTES(p); if (curSize > p->directInputRem) curSize = (UInt32)p->directInputRem; p->directInputRem -= curSize; @@ -115,10 +136,22 @@ static void MatchFinder_ReadBlock(CMatchFinder *p) for (;;) { - Byte *dest = p->buffer + (p->streamPos - p->pos); + Byte *dest = p->buffer + GET_AVAIL_BYTES(p); size_t size = (size_t)(p->bufferBase + p->blockSize - dest); if (size == 0) + { + /* we call ReadBlock() after NeedMove() and MoveBlock(). + NeedMove() and MoveBlock() povide more than (keepSizeAfter) + to the end of (blockSize). + So we don't execute this branch in normal code flow. + We can go here, if we will call ReadBlock() before NeedMove(), MoveBlock(). + */ + // p->result = SZ_ERROR_FAIL; // we can show error here return; + } + + // #define kRead 3 + // if (size > kRead) size = kRead; // for debug p->result = ISeqInStream_Read(p->stream, dest, &size); if (p->result != SZ_OK) @@ -129,41 +162,52 @@ static void MatchFinder_ReadBlock(CMatchFinder *p) return; } p->streamPos += (UInt32)size; - if (p->streamPos - p->pos > p->keepSizeAfter) + if (GET_AVAIL_BYTES(p) > p->keepSizeAfter) return; + /* here and in another (p->keepSizeAfter) checks we keep on 1 byte more than was requested by Create() function + (GET_AVAIL_BYTES(p) >= p->keepSizeAfter) - minimal required size */ } + + // on exit: (p->result != SZ_OK || p->streamEndWasReached || GET_AVAIL_BYTES(p) > p->keepSizeAfter) } + + +MY_NO_INLINE void MatchFinder_MoveBlock(CMatchFinder *p) { + const size_t offset = (size_t)(p->buffer - p->bufferBase) - p->keepSizeBefore; + const size_t keepBefore = (offset & (kBlockMoveAlign - 1)) + p->keepSizeBefore; + p->buffer = p->bufferBase + keepBefore; memmove(p->bufferBase, - p->buffer - p->keepSizeBefore, - (size_t)(p->streamPos - p->pos) + p->keepSizeBefore); - p->buffer = p->bufferBase + p->keepSizeBefore; + p->bufferBase + (offset & ~((size_t)kBlockMoveAlign - 1)), + keepBefore + (size_t)GET_AVAIL_BYTES(p)); } +/* We call MoveBlock() before ReadBlock(). + So MoveBlock() can be wasteful operation, if the whole input data + can fit in current block even without calling MoveBlock(). + in important case where (dataSize <= historySize) + condition (p->blockSize > dataSize + p->keepSizeAfter) is met + So there is no MoveBlock() in that case case. +*/ + int MatchFinder_NeedMove(CMatchFinder *p) { if (p->directInput) return 0; - /* if (p->streamEndWasReached) return 0; */ + if (p->streamEndWasReached || p->result != SZ_OK) + return 0; return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter); } void MatchFinder_ReadIfRequired(CMatchFinder *p) { - if (p->streamEndWasReached) - return; - if (p->keepSizeAfter >= p->streamPos - p->pos) + if (p->keepSizeAfter >= GET_AVAIL_BYTES(p)) MatchFinder_ReadBlock(p); } -static void MatchFinder_CheckAndMoveAndRead(CMatchFinder *p) -{ - if (MatchFinder_NeedMove(p)) - MatchFinder_MoveBlock(p); - MatchFinder_ReadBlock(p); -} + static void MatchFinder_SetDefaultSettings(CMatchFinder *p) { @@ -214,32 +258,67 @@ static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc) return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes); } -int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, - UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, - ISzAllocPtr alloc) +#if (kBlockSizeReserveMin < kBlockSizeAlign * 2) + #error Stop_Compiling_Bad_Reserve +#endif + + + +static UInt32 GetBlockSize(CMatchFinder *p, UInt32 historySize) { - UInt32 sizeReserv; - + UInt32 blockSize = (p->keepSizeBefore + p->keepSizeAfter); + /* if (historySize > kMaxHistorySize) - { - MatchFinder_Free(p, alloc); return 0; - } + */ + // printf("\nhistorySize == 0x%x\n", historySize); - sizeReserv = historySize >> 1; - if (historySize >= ((UInt32)3 << 30)) sizeReserv = historySize >> 3; - else if (historySize >= ((UInt32)2 << 30)) sizeReserv = historySize >> 2; + if (p->keepSizeBefore < historySize || blockSize < p->keepSizeBefore) // if 32-bit overflow + return 0; - sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19); + { + const UInt32 kBlockSizeMax = (UInt32)0 - (UInt32)kBlockSizeAlign; + const UInt32 rem = kBlockSizeMax - blockSize; + const UInt32 reserve = (blockSize >> (blockSize < ((UInt32)1 << 30) ? 1 : 2)) + + (1 << 12) + kBlockMoveAlign + kBlockSizeAlign; // do not overflow 32-bit here + if (blockSize >= kBlockSizeMax + || rem < kBlockSizeReserveMin) // we reject settings that will be slow + return 0; + if (reserve >= rem) + blockSize = kBlockSizeMax; + else + { + blockSize += reserve; + blockSize &= ~(UInt32)(kBlockSizeAlign - 1); + } + } + // printf("\n LzFind_blockSize = %x\n", blockSize); + // printf("\n LzFind_blockSize = %d\n", blockSize >> 20); + return blockSize; +} + +int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, + UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, + ISzAllocPtr alloc) +{ + /* we need one additional byte in (p->keepSizeBefore), + since we use MoveBlock() after (p->pos++) and before dictionary using */ + // keepAddBufferBefore = (UInt32)0xFFFFFFFF - (1 << 22); // for debug p->keepSizeBefore = historySize + keepAddBufferBefore + 1; - p->keepSizeAfter = matchMaxLen + keepAddBufferAfter; - - /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */ - - if (LzInWindow_Create(p, sizeReserv, alloc)) + + keepAddBufferAfter += matchMaxLen; + /* we need (p->keepSizeAfter >= p->numHashBytes) */ + if (keepAddBufferAfter < p->numHashBytes) + keepAddBufferAfter = p->numHashBytes; + // keepAddBufferAfter -= 2; // for debug + p->keepSizeAfter = keepAddBufferAfter; + + if (p->directInput) + p->blockSize = 0; + if (p->directInput || LzInWindow_Create2(p, GetBlockSize(p, historySize), alloc)) { - UInt32 newCyclicBufferSize = historySize + 1; + const UInt32 newCyclicBufferSize = historySize + 1; // do not change it UInt32 hs; p->matchMaxLen = matchMaxLen; { @@ -299,7 +378,7 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, size_t numSons; p->historySize = historySize; p->hashSizeSum = hs; - p->cyclicBufferSize = newCyclicBufferSize; + p->cyclicBufferSize = newCyclicBufferSize; // it must be = (historySize + 1) numSons = newCyclicBufferSize; if (p->btMode) @@ -329,33 +408,43 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, return 0; } + static void MatchFinder_SetLimits(CMatchFinder *p) { - UInt32 limit = kMaxValForNormalize - p->pos; - UInt32 limit2 = p->cyclicBufferSize - p->cyclicBufferPos; - - if (limit2 < limit) - limit = limit2; - limit2 = p->streamPos - p->pos; + UInt32 k; + UInt32 n = kMaxValForNormalize - p->pos; + if (n == 0) + n = (UInt32)(Int32)-1; // we allow (pos == 0) at start even with (kMaxValForNormalize == 0) - if (limit2 <= p->keepSizeAfter) + k = p->cyclicBufferSize - p->cyclicBufferPos; + if (k < n) + n = k; + + k = GET_AVAIL_BYTES(p); { - if (limit2 > 0) - limit2 = 1; + const UInt32 ksa = p->keepSizeAfter; + UInt32 mm = p->matchMaxLen; + if (k > ksa) + k -= ksa; // we must limit exactly to keepSizeAfter for ReadBlock + else if (k >= mm) + { + // the limitation for (p->lenLimit) update + k -= mm; // optimization : to reduce the number of checks + k++; + // k = 1; // non-optimized version : for debug + } + else + { + mm = k; + if (k != 0) + k = 1; + } + p->lenLimit = mm; } - else - limit2 -= p->keepSizeAfter; - - if (limit2 < limit) - limit = limit2; + if (k < n) + n = k; - { - UInt32 lenLimit = p->streamPos - p->pos; - if (lenLimit > p->matchMaxLen) - lenLimit = p->matchMaxLen; - p->lenLimit = lenLimit; - } - p->posLimit = p->pos + limit; + p->posLimit = p->pos + n; } @@ -363,7 +452,7 @@ void MatchFinder_Init_LowHash(CMatchFinder *p) { size_t i; CLzRef *items = p->hash; - size_t numItems = p->fixedHashSize; + const size_t numItems = p->fixedHashSize; for (i = 0; i < numItems; i++) items[i] = kEmptyHashValue; } @@ -373,80 +462,315 @@ void MatchFinder_Init_HighHash(CMatchFinder *p) { size_t i; CLzRef *items = p->hash + p->fixedHashSize; - size_t numItems = (size_t)p->hashMask + 1; + const size_t numItems = (size_t)p->hashMask + 1; for (i = 0; i < numItems; i++) items[i] = kEmptyHashValue; } -void MatchFinder_Init_3(CMatchFinder *p, int readData) +void MatchFinder_Init_4(CMatchFinder *p) { - p->cyclicBufferPos = 0; p->buffer = p->bufferBase; - p->pos = - p->streamPos = p->cyclicBufferSize; + { + /* kEmptyHashValue = 0 (Zero) is used in hash tables as NO-VALUE marker. + the code in CMatchFinderMt expects (pos = 1) */ + p->pos = + p->streamPos = + 1; // it's smallest optimal value. do not change it + // 0; // for debug + } p->result = SZ_OK; p->streamEndWasReached = 0; - - if (readData) - MatchFinder_ReadBlock(p); - - MatchFinder_SetLimits(p); } +// (CYC_TO_POS_OFFSET == 0) is expected by some optimized code +#define CYC_TO_POS_OFFSET 0 +// #define CYC_TO_POS_OFFSET 1 // for debug + void MatchFinder_Init(CMatchFinder *p) { MatchFinder_Init_HighHash(p); MatchFinder_Init_LowHash(p); - MatchFinder_Init_3(p, True); + MatchFinder_Init_4(p); + // if (readData) + MatchFinder_ReadBlock(p); + + /* if we init (cyclicBufferPos = pos), then we can use one variable + instead of both (cyclicBufferPos) and (pos) : only before (cyclicBufferPos) wrapping */ + p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET); // init with relation to (pos) + // p->cyclicBufferPos = 0; // smallest value + // p->son[0] = p->son[1] = 0; // unused: we can init skipped record for speculated accesses. + MatchFinder_SetLimits(p); } - -static UInt32 MatchFinder_GetSubValue(CMatchFinder *p) + + +#ifdef MY_CPU_X86_OR_AMD64 + #if defined(__clang__) && (__clang_major__ >= 8) \ + || defined(__GNUC__) && (__GNUC__ >= 8) \ + || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900) + #define USE_SATUR_SUB_128 + #define USE_AVX2 + #define ATTRIB_SSE41 __attribute__((__target__("sse4.1"))) + #define ATTRIB_AVX2 __attribute__((__target__("avx2"))) + #elif defined(_MSC_VER) + #if (_MSC_VER >= 1600) + #define USE_SATUR_SUB_128 + #if (_MSC_VER >= 1900) + #define USE_AVX2 + #include // avx + #endif + #endif + #endif + +// #elif defined(MY_CPU_ARM_OR_ARM64) +#elif defined(MY_CPU_ARM64) + + #if defined(__clang__) && (__clang_major__ >= 8) \ + || defined(__GNUC__) && (__GNUC__ >= 8) + #define USE_SATUR_SUB_128 + #ifdef MY_CPU_ARM64 + // #define ATTRIB_SSE41 __attribute__((__target__(""))) + #else + // #define ATTRIB_SSE41 __attribute__((__target__("fpu=crypto-neon-fp-armv8"))) + #endif + + #elif defined(_MSC_VER) + #if (_MSC_VER >= 1910) + #define USE_SATUR_SUB_128 + #endif + #endif + + #if defined(_MSC_VER) && defined(MY_CPU_ARM64) + #include + #else + #include + #endif + +#endif + +/* +#ifndef ATTRIB_SSE41 + #define ATTRIB_SSE41 +#endif +#ifndef ATTRIB_AVX2 + #define ATTRIB_AVX2 +#endif +*/ + +#ifdef USE_SATUR_SUB_128 + +// #define _SHOW_HW_STATUS + +#ifdef _SHOW_HW_STATUS +#include +#define _PRF(x) x +_PRF(;) +#else +#define _PRF(x) +#endif + +#ifdef MY_CPU_ARM_OR_ARM64 + +#ifdef MY_CPU_ARM64 +// #define FORCE_SATUR_SUB_128 +#endif + +typedef uint32x4_t v128; +#define SASUB_128(i) \ + *(v128 *)(void *)(items + (i) * 4) = \ + vsubq_u32(vmaxq_u32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2); + +#else + +#include // sse4.1 + +typedef __m128i v128; +#define SASUB_128(i) \ + *(v128 *)(void *)(items + (i) * 4) = \ + _mm_sub_epi32(_mm_max_epu32(*(const v128 *)(const void *)(items + (i) * 4), sub2), sub2); // SSE 4.1 + +#endif + + + +MY_NO_INLINE +static +#ifdef ATTRIB_SSE41 +ATTRIB_SSE41 +#endif +void LzFind_SaturSub_128(UInt32 subValue, CLzRef *items, const CLzRef *lim) { - return (p->pos - p->historySize - 1) & kNormalizeMask; + v128 sub2 = + #ifdef MY_CPU_ARM_OR_ARM64 + vdupq_n_u32(subValue); + #else + _mm_set_epi32((Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue); + #endif + do + { + SASUB_128(0) + SASUB_128(1) + SASUB_128(2) + SASUB_128(3) + items += 4 * 4; + } + while (items != lim); } -void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems) + + +#ifdef USE_AVX2 + +#include // avx + +#define SASUB_256(i) *(__m256i *)(void *)(items + (i) * 8) = _mm256_sub_epi32(_mm256_max_epu32(*(const __m256i *)(const void *)(items + (i) * 8), sub2), sub2); // AVX2 + +MY_NO_INLINE +static +#ifdef ATTRIB_AVX2 +ATTRIB_AVX2 +#endif +void LzFind_SaturSub_256(UInt32 subValue, CLzRef *items, const CLzRef *lim) { - if (numItems == 0) - return; - { - const CLzRef *lim = items + numItems - 1; - for (; items < lim; items += 2) + __m256i sub2 = _mm256_set_epi32( + (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue, + (Int32)subValue, (Int32)subValue, (Int32)subValue, (Int32)subValue); + do { - UInt32 v, m; - v = items[0]; m = v - subValue; if (v < subValue) m = kEmptyHashValue; - v = items[1]; items[0] = m; m = v - subValue; if (v < subValue) m = kEmptyHashValue; - items[1] = m; + SASUB_256(0) + SASUB_256(1) + items += 2 * 8; } - if (items == lim) + while (items != lim); +} +#endif // USE_AVX2 + +#ifndef FORCE_SATUR_SUB_128 +typedef void (MY_FAST_CALL *LZFIND_SATUR_SUB_CODE_FUNC)( + UInt32 subValue, CLzRef *items, const CLzRef *lim); +static LZFIND_SATUR_SUB_CODE_FUNC g_LzFind_SaturSub; +#endif // FORCE_SATUR_SUB_128 + +#endif // USE_SATUR_SUB_128 + + +// kEmptyHashValue must be zero +// #define SASUB_32(i) v = items[i]; m = v - subValue; if (v < subValue) m = kEmptyHashValue; items[i] = m; +#define SASUB_32(i) v = items[i]; if (v < subValue) v = subValue; items[i] = v - subValue; + +#ifdef FORCE_SATUR_SUB_128 + +#define DEFAULT_SaturSub LzFind_SaturSub_128 + +#else + +#define DEFAULT_SaturSub LzFind_SaturSub_32 + +MY_NO_INLINE +static void LzFind_SaturSub_32(UInt32 subValue, CLzRef *items, const CLzRef *lim) +{ + do { - UInt32 v, m; - v = items[0]; m = v - subValue; if (v < subValue) m = kEmptyHashValue; - items[0] = m; - } + UInt32 v; + SASUB_32(0) + SASUB_32(1) + SASUB_32(2) + SASUB_32(3) + SASUB_32(4) + SASUB_32(5) + SASUB_32(6) + SASUB_32(7) + items += 8; } + while (items != lim); } -static void MatchFinder_Normalize(CMatchFinder *p) +#endif + + +MY_NO_INLINE +void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems) { - UInt32 subValue = MatchFinder_GetSubValue(p); - MatchFinder_Normalize3(subValue, p->hash, p->numRefs); - MatchFinder_ReduceOffsets(p, subValue); + #define K_NORM_ALIGN_BLOCK_SIZE (1 << 6) + + CLzRef *lim; + + for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (K_NORM_ALIGN_BLOCK_SIZE - 1)) != 0; numItems--) + { + UInt32 v; + SASUB_32(0); + items++; + } + + { + #define K_NORM_ALIGN_MASK (K_NORM_ALIGN_BLOCK_SIZE / 4 - 1) + lim = items + (numItems & ~(size_t)K_NORM_ALIGN_MASK); + numItems &= K_NORM_ALIGN_MASK; + if (items != lim) + { + #if defined(USE_SATUR_SUB_128) && !defined(FORCE_SATUR_SUB_128) + if (g_LzFind_SaturSub) + g_LzFind_SaturSub(subValue, items, lim); + else + #endif + DEFAULT_SaturSub(subValue, items, lim); + } + items = lim; + } + + + for (; numItems != 0; numItems--) + { + UInt32 v; + SASUB_32(0); + items++; + } } + +// call MatchFinder_CheckLimits() only after (p->pos++) update + MY_NO_INLINE static void MatchFinder_CheckLimits(CMatchFinder *p) { + if (// !p->streamEndWasReached && p->result == SZ_OK && + p->keepSizeAfter == GET_AVAIL_BYTES(p)) + { + // we try to read only in exact state (p->keepSizeAfter == GET_AVAIL_BYTES(p)) + if (MatchFinder_NeedMove(p)) + MatchFinder_MoveBlock(p); + MatchFinder_ReadBlock(p); + } + if (p->pos == kMaxValForNormalize) - MatchFinder_Normalize(p); - if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos) - MatchFinder_CheckAndMoveAndRead(p); + if (GET_AVAIL_BYTES(p) >= p->numHashBytes) // optional optimization for last bytes of data. + /* + if we disable normalization for last bytes of data, and + if (data_size == 4 GiB), we don't call wastfull normalization, + but (pos) will be wrapped over Zero (0) in that case. + And we cannot resume later to normal operation + */ + { + // MatchFinder_Normalize(p); + /* after normalization we need (p->pos >= p->historySize + 1); */ + /* we can reduce subValue to aligned value, if want to keep alignment + of (p->pos) and (p->buffer) for speculated accesses. */ + const UInt32 subValue = (p->pos - p->historySize - 1) /* & ~(UInt32)(kNormalizeAlign - 1) */; + // const UInt32 subValue = (1 << 15); // for debug + // printf("\nMatchFinder_Normalize() subValue == 0x%x\n", subValue); + size_t numSonRefs = p->cyclicBufferSize; + if (p->btMode) + numSonRefs <<= 1; + Inline_MatchFinder_ReduceOffsets(p, subValue); + MatchFinder_Normalize3(subValue, p->hash, (size_t)p->hashSizeSum + numSonRefs); + } + if (p->cyclicBufferPos == p->cyclicBufferSize) p->cyclicBufferPos = 0; + MatchFinder_SetLimits(p); } @@ -455,9 +779,9 @@ static void MatchFinder_CheckLimits(CMatchFinder *p) (lenLimit > maxLen) */ MY_FORCE_INLINE -static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, - UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, - UInt32 *distances, unsigned maxLen) +static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, + UInt32 *d, unsigned maxLen) { /* son[_cyclicBufferPos] = curMatch; @@ -465,7 +789,7 @@ static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos { UInt32 delta = pos - curMatch; if (cutValue-- == 0 || delta >= _cyclicBufferSize) - return distances; + return d; { const Byte *pb = cur - delta; curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; @@ -478,10 +802,10 @@ static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos if (maxLen < len) { maxLen = len; - *distances++ = len; - *distances++ = delta - 1; + *d++ = len; + *d++ = delta - 1; if (len == lenLimit) - return distances; + return d; } } } @@ -490,9 +814,15 @@ static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos const Byte *lim = cur + lenLimit; son[_cyclicBufferPos] = curMatch; + do { - UInt32 delta = pos - curMatch; + UInt32 delta; + + if (curMatch == 0) + break; + // if (curMatch2 >= curMatch) return NULL; + delta = pos - curMatch; if (delta >= _cyclicBufferSize) break; { @@ -506,19 +836,19 @@ static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos { if (++c == lim) { - distances[0] = (UInt32)(lim - cur); - distances[1] = delta - 1; - return distances + 2; + d[0] = (UInt32)(lim - cur); + d[1] = delta - 1; + return d + 2; } } { - unsigned len = (unsigned)(c - cur); + const unsigned len = (unsigned)(c - cur); if (maxLen < len) { maxLen = len; - distances[0] = (UInt32)len; - distances[1] = delta - 1; - distances += 2; + d[0] = (UInt32)len; + d[1] = delta - 1; + d += 2; } } } @@ -526,31 +856,36 @@ static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos } while (--cutValue); - return distances; + return d; } MY_FORCE_INLINE UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, - UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, - UInt32 *distances, UInt32 maxLen) + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, + UInt32 *d, UInt32 maxLen) { CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); unsigned len0 = 0, len1 = 0; - for (;;) + + UInt32 cmCheck; + + // if (curMatch >= pos) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; } + + cmCheck = (UInt32)(pos - _cyclicBufferSize); + if ((UInt32)pos <= _cyclicBufferSize) + cmCheck = 0; + + if (cmCheck < curMatch) + do { - UInt32 delta = pos - curMatch; - if (cutValue-- == 0 || delta >= _cyclicBufferSize) - { - *ptr0 = *ptr1 = kEmptyHashValue; - return distances; - } + const UInt32 delta = pos - curMatch; { CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); const Byte *pb = cur - delta; unsigned len = (len0 < len1 ? len0 : len1); - UInt32 pair0 = pair[0]; + const UInt32 pair0 = pair[0]; if (pb[len] == cur[len]) { if (++len != lenLimit && pb[len] == cur[len]) @@ -560,48 +895,60 @@ UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byt if (maxLen < len) { maxLen = (UInt32)len; - *distances++ = (UInt32)len; - *distances++ = delta - 1; + *d++ = (UInt32)len; + *d++ = delta - 1; if (len == lenLimit) { *ptr1 = pair0; *ptr0 = pair[1]; - return distances; + return d; } } } if (pb[len] < cur[len]) { *ptr1 = curMatch; + // const UInt32 curMatch2 = pair[1]; + // if (curMatch2 >= curMatch) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; } + // curMatch = curMatch2; + curMatch = pair[1]; ptr1 = pair + 1; - curMatch = *ptr1; len1 = len; } else { *ptr0 = curMatch; + curMatch = pair[0]; ptr0 = pair; - curMatch = *ptr0; len0 = len; } } } + while(--cutValue && cmCheck < curMatch); + + *ptr0 = *ptr1 = kEmptyHashValue; + return d; } + static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son, - UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue) + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue) { CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); unsigned len0 = 0, len1 = 0; - for (;;) + + UInt32 cmCheck; + + cmCheck = (UInt32)(pos - _cyclicBufferSize); + if ((UInt32)pos <= _cyclicBufferSize) + cmCheck = 0; + + if (// curMatch >= pos || // failure + cmCheck < curMatch) + do { - UInt32 delta = pos - curMatch; - if (cutValue-- == 0 || delta >= _cyclicBufferSize) - { - *ptr0 = *ptr1 = kEmptyHashValue; - return; - } + const UInt32 delta = pos - curMatch; { CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); const Byte *pb = cur - delta; @@ -623,43 +970,62 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const if (pb[len] < cur[len]) { *ptr1 = curMatch; + curMatch = pair[1]; ptr1 = pair + 1; - curMatch = *ptr1; len1 = len; } else { *ptr0 = curMatch; + curMatch = pair[0]; ptr0 = pair; - curMatch = *ptr0; len0 = len; } } } + while(--cutValue && cmCheck < curMatch); + + *ptr0 = *ptr1 = kEmptyHashValue; + return; } + #define MOVE_POS \ ++p->cyclicBufferPos; \ p->buffer++; \ - if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p); + { const UInt32 pos1 = p->pos + 1; p->pos = pos1; if (pos1 == p->posLimit) MatchFinder_CheckLimits(p); } -#define MOVE_POS_RET MOVE_POS return (UInt32)offset; +#define MOVE_POS_RET MOVE_POS return distances; -static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; } +MY_NO_INLINE +static void MatchFinder_MovePos(CMatchFinder *p) +{ + /* we go here at the end of stream data, when (avail < num_hash_bytes) + We don't update sons[cyclicBufferPos << btMode]. + So (sons) record will contain junk. And we cannot resume match searching + to normal operation, even if we will provide more input data in buffer. + p->sons[p->cyclicBufferPos << p->btMode] = 0; // kEmptyHashValue + if (p->btMode) + p->sons[(p->cyclicBufferPos << p->btMode) + 1] = 0; // kEmptyHashValue + */ + MOVE_POS; +} #define GET_MATCHES_HEADER2(minLen, ret_op) \ - unsigned lenLimit; UInt32 hv; const Byte *cur; UInt32 curMatch; \ + unsigned lenLimit; UInt32 hv; Byte *cur; UInt32 curMatch; \ lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \ cur = p->buffer; -#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0) -#define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue) +#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return distances) +#define SKIP_HEADER(minLen) do { GET_MATCHES_HEADER2(minLen, continue) + +#define MF_PARAMS(p) lenLimit, curMatch, p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue -#define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue +#define SKIP_FOOTER SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS; } while (--num); #define GET_MATCHES_FOOTER_BASE(_maxLen_, func) \ - offset = (unsigned)(func((UInt32)lenLimit, curMatch, MF_PARAMS(p), \ - distances + offset, (UInt32)(_maxLen_)) - distances); MOVE_POS_RET; + distances = func(MF_PARAMS(p), \ + distances, (UInt32)_maxLen_); MOVE_POS_RET; #define GET_MATCHES_FOOTER_BT(_maxLen_) \ GET_MATCHES_FOOTER_BASE(_maxLen_, GetMatchesSpec1) @@ -667,42 +1033,45 @@ static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; } #define GET_MATCHES_FOOTER_HC(_maxLen_) \ GET_MATCHES_FOOTER_BASE(_maxLen_, Hc_GetMatchesSpec) -#define SKIP_FOOTER \ - SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS; + #define UPDATE_maxLen { \ - ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)d2; \ + const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)d2; \ const Byte *c = cur + maxLen; \ const Byte *lim = cur + lenLimit; \ for (; c != lim; c++) if (*(c + diff) != *c) break; \ maxLen = (unsigned)(c - cur); } -static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +static UInt32* Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { - unsigned offset; GET_MATCHES_HEADER(2) HASH2_CALC; curMatch = p->hash[hv]; p->hash[hv] = p->pos; - offset = 0; GET_MATCHES_FOOTER_BT(1) } -UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { - unsigned offset; GET_MATCHES_HEADER(3) HASH_ZIP_CALC; curMatch = p->hash[hv]; p->hash[hv] = p->pos; - offset = 0; GET_MATCHES_FOOTER_BT(2) } -static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) + +#define SET_mmm \ + mmm = p->cyclicBufferSize; \ + if (pos < mmm) \ + mmm = pos; + + +static UInt32* Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { + UInt32 mmm; UInt32 h2, d2, pos; - unsigned maxLen, offset; + unsigned maxLen; UInt32 *hash; GET_MATCHES_HEADER(3) @@ -718,18 +1087,19 @@ static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) hash[h2] = pos; (hash + kFix3HashSize)[hv] = pos; + SET_mmm + maxLen = 2; - offset = 0; - if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) + if (d2 < mmm && *(cur - d2) == *cur) { UPDATE_maxLen distances[0] = (UInt32)maxLen; distances[1] = d2 - 1; - offset = 2; + distances += 2; if (maxLen == lenLimit) { - SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p)); + SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS_RET; } } @@ -738,10 +1108,11 @@ static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) } -static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +static UInt32* Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { + UInt32 mmm; UInt32 h2, h3, d2, d3, pos; - unsigned maxLen, offset; + unsigned maxLen; UInt32 *hash; GET_MATCHES_HEADER(4) @@ -758,43 +1129,44 @@ static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) (hash + kFix3HashSize)[h3] = pos; (hash + kFix4HashSize)[hv] = pos; + SET_mmm + maxLen = 3; - offset = 0; for (;;) { - if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) + if (d2 < mmm && *(cur - d2) == *cur) { distances[0] = 2; distances[1] = d2 - 1; - offset = 2; + distances += 2; if (*(cur - d2 + 2) == cur[2]) { - // distances[0] = 3; + // distances[-2] = 3; } - else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) + else if (d3 < mmm && *(cur - d3) == *cur) { d2 = d3; - distances[2 + 1] = d3 - 1; - offset = 4; + distances[1] = d3 - 1; + distances += 2; } else break; } - else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) + else if (d3 < mmm && *(cur - d3) == *cur) { d2 = d3; distances[1] = d3 - 1; - offset = 2; + distances += 2; } else break; UPDATE_maxLen - distances[(size_t)offset - 2] = (UInt32)maxLen; + distances[-2] = (UInt32)maxLen; if (maxLen == lenLimit) { - SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p)); + SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS_RET } break; @@ -804,9 +1176,10 @@ static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) } -static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +static UInt32* Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { - UInt32 h2, h3, d2, d3, maxLen, offset, pos; + UInt32 mmm; + UInt32 h2, h3, d2, d3, maxLen, pos; UInt32 *hash; GET_MATCHES_HEADER(5) @@ -826,45 +1199,46 @@ static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) // (hash + kFix4HashSize)[h4] = pos; (hash + kFix5HashSize)[hv] = pos; + SET_mmm + maxLen = 4; - offset = 0; for (;;) { - if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) + if (d2 < mmm && *(cur - d2) == *cur) { distances[0] = 2; distances[1] = d2 - 1; - offset = 2; + distances += 2; if (*(cur - d2 + 2) == cur[2]) { } - else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) + else if (d3 < mmm && *(cur - d3) == *cur) { - distances[3] = d3 - 1; - offset = 4; + distances[1] = d3 - 1; + distances += 2; d2 = d3; } else break; } - else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) + else if (d3 < mmm && *(cur - d3) == *cur) { distances[1] = d3 - 1; - offset = 2; + distances += 2; d2 = d3; } else break; - distances[(size_t)offset - 2] = 3; + distances[-2] = 3; if (*(cur - d2 + 3) != cur[3]) break; UPDATE_maxLen - distances[(size_t)offset - 2] = maxLen; + distances[-2] = (UInt32)maxLen; if (maxLen == lenLimit) { - SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); + SkipMatchesSpec(MF_PARAMS(p)); MOVE_POS_RET; } break; @@ -874,10 +1248,11 @@ static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) } -static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +static UInt32* Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { + UInt32 mmm; UInt32 h2, h3, d2, d3, pos; - unsigned maxLen, offset; + unsigned maxLen; UInt32 *hash; GET_MATCHES_HEADER(4) @@ -894,40 +1269,41 @@ static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) (hash + kFix3HashSize)[h3] = pos; (hash + kFix4HashSize)[hv] = pos; + SET_mmm + maxLen = 3; - offset = 0; for (;;) { - if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) + if (d2 < mmm && *(cur - d2) == *cur) { distances[0] = 2; distances[1] = d2 - 1; - offset = 2; + distances += 2; if (*(cur - d2 + 2) == cur[2]) { - // distances[0] = 3; + // distances[-2] = 3; } - else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) + else if (d3 < mmm && *(cur - d3) == *cur) { d2 = d3; - distances[2 + 1] = d3 - 1; - offset = 4; + distances[1] = d3 - 1; + distances += 2; } else break; } - else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) + else if (d3 < mmm && *(cur - d3) == *cur) { d2 = d3; distances[1] = d3 - 1; - offset = 2; + distances += 2; } else break; UPDATE_maxLen - distances[(size_t)offset - 2] = (UInt32)maxLen; + distances[-2] = (UInt32)maxLen; if (maxLen == lenLimit) { p->son[p->cyclicBufferPos] = curMatch; @@ -940,9 +1316,10 @@ static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) } -static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +static UInt32 * Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { - UInt32 h2, h3, d2, d3, maxLen, offset, pos; + UInt32 mmm; + UInt32 h2, h3, d2, d3, maxLen, pos; UInt32 *hash; GET_MATCHES_HEADER(5) @@ -962,42 +1339,43 @@ static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) // (hash + kFix4HashSize)[h4] = pos; (hash + kFix5HashSize)[hv] = pos; + SET_mmm + maxLen = 4; - offset = 0; for (;;) { - if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur) + if (d2 < mmm && *(cur - d2) == *cur) { distances[0] = 2; distances[1] = d2 - 1; - offset = 2; + distances += 2; if (*(cur - d2 + 2) == cur[2]) { } - else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) + else if (d3 < mmm && *(cur - d3) == *cur) { - distances[3] = d3 - 1; - offset = 4; + distances[1] = d3 - 1; + distances += 2; d2 = d3; } else break; } - else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur) + else if (d3 < mmm && *(cur - d3) == *cur) { distances[1] = d3 - 1; - offset = 2; + distances += 2; d2 = d3; } else break; - distances[(size_t)offset - 2] = 3; + distances[-2] = 3; if (*(cur - d2 + 3) != cur[3]) break; UPDATE_maxLen - distances[(size_t)offset - 2] = maxLen; + distances[-2] = maxLen; if (maxLen == lenLimit) { p->son[p->cyclicBufferPos] = curMatch; @@ -1010,86 +1388,75 @@ static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) } -UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) +UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances) { - unsigned offset; GET_MATCHES_HEADER(3) HASH_ZIP_CALC; curMatch = p->hash[hv]; p->hash[hv] = p->pos; - offset = 0; GET_MATCHES_FOOTER_HC(2) } static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { - do + SKIP_HEADER(2) { - SKIP_HEADER(2) HASH2_CALC; curMatch = p->hash[hv]; p->hash[hv] = p->pos; - SKIP_FOOTER } - while (--num != 0); + SKIP_FOOTER } void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { - do + SKIP_HEADER(3) { - SKIP_HEADER(3) HASH_ZIP_CALC; curMatch = p->hash[hv]; p->hash[hv] = p->pos; - SKIP_FOOTER } - while (--num != 0); + SKIP_FOOTER } static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { - do + SKIP_HEADER(3) { UInt32 h2; UInt32 *hash; - SKIP_HEADER(3) HASH3_CALC; hash = p->hash; curMatch = (hash + kFix3HashSize)[hv]; hash[h2] = (hash + kFix3HashSize)[hv] = p->pos; - SKIP_FOOTER } - while (--num != 0); + SKIP_FOOTER } static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { - do + SKIP_HEADER(4) { UInt32 h2, h3; UInt32 *hash; - SKIP_HEADER(4) HASH4_CALC; hash = p->hash; curMatch = (hash + kFix4HashSize)[hv]; hash [h2] = (hash + kFix3HashSize)[h3] = (hash + kFix4HashSize)[hv] = p->pos; - SKIP_FOOTER } - while (--num != 0); + SKIP_FOOTER } static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { - do + SKIP_HEADER(5) { UInt32 h2, h3; UInt32 *hash; - SKIP_HEADER(5) HASH5_CALC; hash = p->hash; curMatch = (hash + kFix5HashSize)[hv]; @@ -1097,66 +1464,84 @@ static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) (hash + kFix3HashSize)[h3] = // (hash + kFix4HashSize)[h4] = (hash + kFix5HashSize)[hv] = p->pos; - SKIP_FOOTER } - while (--num != 0); + SKIP_FOOTER } + +#define HC_SKIP_HEADER(minLen) \ + do { if (p->lenLimit < minLen) { MatchFinder_MovePos(p); num--; continue; } { \ + Byte *cur; \ + UInt32 *hash; \ + UInt32 *son; \ + UInt32 pos = p->pos; \ + UInt32 num2 = num; \ + /* (p->pos == p->posLimit) is not allowed here !!! */ \ + { const UInt32 rem = p->posLimit - pos; if (num2 > rem) num2 = rem; } \ + num -= num2; \ + { const UInt32 cycPos = p->cyclicBufferPos; \ + son = p->son + cycPos; \ + p->cyclicBufferPos = cycPos + num2; } \ + cur = p->buffer; \ + hash = p->hash; \ + do { \ + UInt32 curMatch; \ + UInt32 hv; + + +#define HC_SKIP_FOOTER \ + cur++; pos++; *son++ = curMatch; \ + } while (--num2); \ + p->buffer = cur; \ + p->pos = pos; \ + if (pos == p->posLimit) MatchFinder_CheckLimits(p); \ + }} while(num); \ + + static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { - do - { + HC_SKIP_HEADER(4) + UInt32 h2, h3; - UInt32 *hash; - SKIP_HEADER(4) HASH4_CALC; - hash = p->hash; curMatch = (hash + kFix4HashSize)[hv]; hash [h2] = (hash + kFix3HashSize)[h3] = - (hash + kFix4HashSize)[hv] = p->pos; - p->son[p->cyclicBufferPos] = curMatch; - MOVE_POS - } - while (--num != 0); + (hash + kFix4HashSize)[hv] = pos; + + HC_SKIP_FOOTER } + static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { - do - { + HC_SKIP_HEADER(5) + UInt32 h2, h3; - UInt32 *hash; - SKIP_HEADER(5) - HASH5_CALC; - hash = p->hash; + HASH5_CALC curMatch = (hash + kFix5HashSize)[hv]; hash [h2] = (hash + kFix3HashSize)[h3] = // (hash + kFix4HashSize)[h4] = - (hash + kFix5HashSize)[hv] = p->pos; - p->son[p->cyclicBufferPos] = curMatch; - MOVE_POS - } - while (--num != 0); + (hash + kFix5HashSize)[hv] = pos; + + HC_SKIP_FOOTER } void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num) { - do - { - SKIP_HEADER(3) + HC_SKIP_HEADER(3) + HASH_ZIP_CALC; - curMatch = p->hash[hv]; - p->hash[hv] = p->pos; - p->son[p->cyclicBufferPos] = curMatch; - MOVE_POS - } - while (--num != 0); + curMatch = hash[hv]; + hash[hv] = pos; + + HC_SKIP_FOOTER } -void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable) + +void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable) { vTable->Init = (Mf_Init_Func)MatchFinder_Init; vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes; @@ -1195,3 +1580,42 @@ void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable) vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip; } } + + + +void LzFindPrepare() +{ + #ifndef FORCE_SATUR_SUB_128 + #ifdef USE_SATUR_SUB_128 + LZFIND_SATUR_SUB_CODE_FUNC f = NULL; + #ifdef MY_CPU_ARM_OR_ARM64 + { + if (CPU_IsSupported_NEON()) + { + #pragma message ("=== LzFind NEON") + _PRF(printf("\n=== LzFind NEON\n")); + f = LzFind_SaturSub_128; + } + // f = 0; // for debug + } + #else // MY_CPU_ARM_OR_ARM64 + if (CPU_IsSupported_SSE41()) + { + #pragma message ("=== LzFind SSE41") + _PRF(printf("\n=== LzFind SSE41\n")); + f = LzFind_SaturSub_128; + + #ifdef USE_AVX2 + if (CPU_IsSupported_AVX2()) + { + #pragma message ("=== LzFind AVX2") + _PRF(printf("\n=== LzFind AVX2\n")); + f = LzFind_SaturSub_256; + } + #endif + } + #endif // MY_CPU_ARM_OR_ARM64 + g_LzFind_SaturSub = f; + #endif // USE_SATUR_SUB_128 + #endif // FORCE_SATUR_SUB_128 +} diff --git a/C/LzFind.h b/C/LzFind.h index c613c739..eea873ff 100644 --- a/C/LzFind.h +++ b/C/LzFind.h @@ -1,5 +1,5 @@ /* LzFind.h -- Match finder for LZ algorithms -2021-02-09 : Igor Pavlov : Public domain */ +2021-07-13 : Igor Pavlov : Public domain */ #ifndef __LZ_FIND_H #define __LZ_FIND_H @@ -15,7 +15,7 @@ typedef struct _CMatchFinder Byte *buffer; UInt32 pos; UInt32 posLimit; - UInt32 streamPos; + UInt32 streamPos; /* wrap over Zero is allowed (streamPos < pos). Use (UInt32)(streamPos - pos) */ UInt32 lenLimit; UInt32 cyclicBufferPos; @@ -51,17 +51,19 @@ typedef struct _CMatchFinder UInt64 expectedDataSize; } CMatchFinder; -#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer) +#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((const Byte *)(p)->buffer) -#define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos) +#define Inline_MatchFinder_GetNumAvailableBytes(p) ((UInt32)((p)->streamPos - (p)->pos)) +/* #define Inline_MatchFinder_IsFinishedOK(p) \ ((p)->streamEndWasReached \ && (p)->streamPos == (p)->pos \ && (!(p)->directInput || (p)->directInputRem == 0)) +*/ int MatchFinder_NeedMove(CMatchFinder *p); -// Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p); +/* Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p); */ void MatchFinder_MoveBlock(CMatchFinder *p); void MatchFinder_ReadIfRequired(CMatchFinder *p); @@ -76,10 +78,21 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, ISzAllocPtr alloc); void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc); void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems); -void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue); +// void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue); + +/* +#define Inline_MatchFinder_InitPos(p, val) \ + (p)->pos = (val); \ + (p)->streamPos = (val); +*/ + +#define Inline_MatchFinder_ReduceOffsets(p, subValue) \ + (p)->pos -= (subValue); \ + (p)->streamPos -= (subValue); + UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son, - UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue, + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue, UInt32 *distances, UInt32 maxLen); /* @@ -91,7 +104,7 @@ UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byt typedef void (*Mf_Init_Func)(void *object); typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object); typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object); -typedef UInt32 (*Mf_GetMatches_Func)(void *object, UInt32 *distances); +typedef UInt32 * (*Mf_GetMatches_Func)(void *object, UInt32 *distances); typedef void (*Mf_Skip_Func)(void *object, UInt32); typedef struct _IMatchFinder @@ -101,21 +114,23 @@ typedef struct _IMatchFinder Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos; Mf_GetMatches_Func GetMatches; Mf_Skip_Func Skip; -} IMatchFinder; +} IMatchFinder2; -void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable); +void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable); void MatchFinder_Init_LowHash(CMatchFinder *p); void MatchFinder_Init_HighHash(CMatchFinder *p); -void MatchFinder_Init_3(CMatchFinder *p, int readData); +void MatchFinder_Init_4(CMatchFinder *p); void MatchFinder_Init(CMatchFinder *p); -UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); -UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); +UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); +UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances); void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num); void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num); +void LzFindPrepare(void); + EXTERN_C_END #endif diff --git a/C/LzFindMt.c b/C/LzFindMt.c index 49369ec0..da339ebf 100644 --- a/C/LzFindMt.c +++ b/C/LzFindMt.c @@ -1,8 +1,10 @@ /* LzFindMt.c -- multithreaded Match finder for LZ algorithms -2021-04-01 : Igor Pavlov : Public domain */ +2021-07-12 : Igor Pavlov : Public domain */ #include "Precomp.h" +// #include + #include "CpuArch.h" #include "LzHash.h" @@ -10,22 +12,34 @@ // #define LOG_ITERS +// #define LOG_THREAD + +#ifdef LOG_THREAD +#include +#define PRF(x) x +#else +#define PRF(x) +#endif + #ifdef LOG_ITERS #include -static UInt64 g_NumIters_Tree; -static UInt64 g_NumIters_Loop; +extern UInt64 g_NumIters_Tree; +extern UInt64 g_NumIters_Loop; +extern UInt64 g_NumIters_Bytes; #define LOG_ITER(x) x #else #define LOG_ITER(x) #endif -#define kMtHashBlockSize (1 << 17) +#define kMtHashBlockSize ((UInt32)1 << 17) #define kMtHashNumBlocks (1 << 1) -#define kMtHashNumBlocksMask (kMtHashNumBlocks - 1) -#define kMtBtBlockSize (1 << 16) +#define GET_HASH_BLOCK_OFFSET(i) (((i) & (kMtHashNumBlocks - 1)) * kMtHashBlockSize) + +#define kMtBtBlockSize ((UInt32)1 << 16) #define kMtBtNumBlocks (1 << 4) -#define kMtBtNumBlocksMask (kMtBtNumBlocks - 1) + +#define GET_BT_BLOCK_OFFSET(i) (((i) & (kMtBtNumBlocks - 1)) * (size_t)kMtBtBlockSize) /* HASH functions: @@ -36,11 +50,17 @@ static UInt64 g_NumIters_Loop; (crc[0...0xFF] & 0xFF) <-> [0...0xFF] */ +#define MF(mt) ((mt)->MatchFinder) +#define MF_CRC (p->crc) + +// #define MF(mt) (&(mt)->MatchFinder) +// #define MF_CRC (p->MatchFinder.crc) + #define MT_HASH2_CALC \ - h2 = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1); + h2 = (MF_CRC[cur[0]] ^ cur[1]) & (kHash2Size - 1); #define MT_HASH3_CALC { \ - UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ + UInt32 temp = MF_CRC[cur[0]] ^ cur[1]; \ h2 = temp & (kHash2Size - 1); \ h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); } @@ -59,93 +79,137 @@ static UInt64 g_NumIters_Loop; */ +MY_NO_INLINE static void MtSync_Construct(CMtSync *p) { + p->affinity = 0; p->wasCreated = False; p->csWasInitialized = False; p->csWasEntered = False; Thread_Construct(&p->thread); Event_Construct(&p->canStart); - Event_Construct(&p->wasStarted); Event_Construct(&p->wasStopped); Semaphore_Construct(&p->freeSemaphore); Semaphore_Construct(&p->filledSemaphore); - p->affinity = 0; } +#define DEBUG_BUFFER_LOCK // define it to debug lock state + +#ifdef DEBUG_BUFFER_LOCK +#include +#define BUFFER_MUST_BE_LOCKED(p) if (!(p)->csWasEntered) exit(1); +#define BUFFER_MUST_BE_UNLOCKED(p) if ( (p)->csWasEntered) exit(1); +#else +#define BUFFER_MUST_BE_LOCKED(p) +#define BUFFER_MUST_BE_UNLOCKED(p) +#endif + +#define LOCK_BUFFER(p) { \ + BUFFER_MUST_BE_UNLOCKED(p); \ + CriticalSection_Enter(&(p)->cs); \ + (p)->csWasEntered = True; } + +#define UNLOCK_BUFFER(p) { \ + BUFFER_MUST_BE_LOCKED(p); \ + CriticalSection_Leave(&(p)->cs); \ + (p)->csWasEntered = False; } + + MY_NO_INLINE -static void MtSync_GetNextBlock(CMtSync *p) +static UInt32 MtSync_GetNextBlock(CMtSync *p) { + UInt32 numBlocks = 0; if (p->needStart) { + BUFFER_MUST_BE_UNLOCKED(p) p->numProcessedBlocks = 1; p->needStart = False; p->stopWriting = False; p->exit = False; - Event_Reset(&p->wasStarted); Event_Reset(&p->wasStopped); - Event_Set(&p->canStart); - Event_Wait(&p->wasStarted); - - // if (mt) MatchFinder_Init_LowHash(mt->MatchFinder); } else { - CriticalSection_Leave(&p->cs); - p->csWasEntered = False; - p->numProcessedBlocks++; + UNLOCK_BUFFER(p) + // we free current block + numBlocks = p->numProcessedBlocks++; Semaphore_Release1(&p->freeSemaphore); } + + // buffer is UNLOCKED here Semaphore_Wait(&p->filledSemaphore); - CriticalSection_Enter(&p->cs); - p->csWasEntered = True; + LOCK_BUFFER(p); + return numBlocks; } -/* MtSync_StopWriting must be called if Writing was started */ +/* if Writing (Processing) thread was started, we must call MtSync_StopWriting() */ + +MY_NO_INLINE static void MtSync_StopWriting(CMtSync *p) { - UInt32 myNumBlocks = p->numProcessedBlocks; if (!Thread_WasCreated(&p->thread) || p->needStart) return; - p->stopWriting = True; + + PRF(printf("\nMtSync_StopWriting %p\n", p)); + if (p->csWasEntered) { - CriticalSection_Leave(&p->cs); - p->csWasEntered = False; + /* we don't use buffer in this thread after StopWriting(). + So we UNLOCK buffer. + And we restore default UNLOCKED state for stopped thread */ + UNLOCK_BUFFER(p) } - Semaphore_Release1(&p->freeSemaphore); - + + /* We send (p->stopWriting) message and release freeSemaphore + to free current block. + So the thread will see (p->stopWriting) at some + iteration after Wait(freeSemaphore). + The thread doesn't need to fill all avail free blocks, + so we can get fast thread stop. + */ + + p->stopWriting = True; + Semaphore_Release1(&p->freeSemaphore); // check semaphore count !!! + + PRF(printf("\nMtSync_StopWriting %p : Event_Wait(&p->wasStopped)\n", p)); Event_Wait(&p->wasStopped); + PRF(printf("\nMtSync_StopWriting %p : Event_Wait() finsihed\n", p)); + + /* 21.03 : we don't restore samaphore counters here. + We will recreate and reinit samaphores in next start */ - while (myNumBlocks++ != p->numProcessedBlocks) - { - Semaphore_Wait(&p->filledSemaphore); - Semaphore_Release1(&p->freeSemaphore); - } p->needStart = True; } + +MY_NO_INLINE static void MtSync_Destruct(CMtSync *p) { + PRF(printf("\nMtSync_Destruct %p\n", p)); + if (Thread_WasCreated(&p->thread)) { + /* we want thread to be in Stopped state before sending EXIT command. + note: stop(btSync) will stop (htSync) also */ MtSync_StopWriting(p); + /* thread in Stopped state here : (p->needStart == true) */ p->exit = True; - if (p->needStart) - Event_Set(&p->canStart); - Thread_Wait_Close(&p->thread); + // if (p->needStart) // it's (true) + Event_Set(&p->canStart); // we send EXIT command to thread + Thread_Wait_Close(&p->thread); // we wait thread finishing } + if (p->csWasInitialized) { CriticalSection_Delete(&p->cs); p->csWasInitialized = False; } + p->csWasEntered = False; Event_Close(&p->canStart); - Event_Close(&p->wasStarted); Event_Close(&p->wasStopped); Semaphore_Close(&p->freeSemaphore); Semaphore_Close(&p->filledSemaphore); @@ -153,48 +217,75 @@ static void MtSync_Destruct(CMtSync *p) p->wasCreated = False; } -#define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; } -static SRes MtSync_Create2(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj, UInt32 numBlocks) +// #define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; } +// we want to get real system error codes here instead of SZ_ERROR_THREAD +#define RINOK_THREAD(x) RINOK(x) + + +// call it before each new file (when new starting is required): +MY_NO_INLINE +static SRes MtSync_Init(CMtSync *p, UInt32 numBlocks) +{ + WRes wres; + // BUFFER_MUST_BE_UNLOCKED(p) + if (!p->needStart || p->csWasEntered) + return SZ_ERROR_FAIL; + wres = Semaphore_OptCreateInit(&p->freeSemaphore, numBlocks, numBlocks); + if (wres == 0) + wres = Semaphore_OptCreateInit(&p->filledSemaphore, 0, numBlocks); + return MY_SRes_HRESULT_FROM_WRes(wres); +} + + +static WRes MtSync_Create_WRes(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj) { WRes wres; + if (p->wasCreated) return SZ_OK; RINOK_THREAD(CriticalSection_Init(&p->cs)); p->csWasInitialized = True; + p->csWasEntered = False; RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart)); - RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStarted)); RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped)); - - RINOK_THREAD(Semaphore_Create(&p->freeSemaphore, numBlocks, numBlocks)); - RINOK_THREAD(Semaphore_Create(&p->filledSemaphore, 0, numBlocks)); p->needStart = True; + p->exit = True; /* p->exit is unused before (canStart) Event. + But in case of some unexpected code failure we will get fast exit from thread */ + + // return ERROR_TOO_MANY_POSTS; // for debug + // return EINVAL; // for debug if (p->affinity != 0) wres = Thread_Create_With_Affinity(&p->thread, startAddress, obj, (CAffinityMask)p->affinity); else wres = Thread_Create(&p->thread, startAddress, obj); + RINOK_THREAD(wres); p->wasCreated = True; return SZ_OK; } -static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj, UInt32 numBlocks) + +MY_NO_INLINE +static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj) { - SRes res = MtSync_Create2(p, startAddress, obj, numBlocks); - if (res != SZ_OK) - MtSync_Destruct(p); - return res; + const WRes wres = MtSync_Create_WRes(p, startAddress, obj); + if (wres == 0) + return 0; + MtSync_Destruct(p); + return MY_SRes_HRESULT_FROM_WRes(wres); } -// static void MtSync_Init(CMtSync *p) { p->needStart = True; } -#define kMtMaxValForNormalize 0xFFFFFFFF -// #define kMtMaxValForNormalize ((1 << 25) + (1 << 20)) +// ---------- HASH THREAD ---------- +#define kMtMaxValForNormalize 0xFFFFFFFF +// #define kMtMaxValForNormalize ((1 << 21)) // for debug +// #define kNormalizeAlign (1 << 7) // alignment for speculated accesses #ifdef MY_CPU_LE_UNALIGN #define GetUi24hi_from32(p) ((UInt32)GetUi32(p) >> 8) @@ -349,27 +440,28 @@ DEF_GetHeads(5b, (crc[p[0]] ^ (crc[p[4]] << kLzHash_CrcShift_1) ^ GetUi24hi_from static void HashThreadFunc(CMatchFinderMt *mt) { CMtSync *p = &mt->hashSync; + PRF(printf("\nHashThreadFunc\n")); + for (;;) { - UInt32 numProcessedBlocks = 0; + UInt32 blockIndex = 0; + PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart)\n")); Event_Wait(&p->canStart); - Event_Set(&p->wasStarted); + PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart) : after \n")); + if (p->exit) + { + PRF(printf("\nHashThreadFunc : exit \n")); + return; + } - MatchFinder_Init_HighHash(mt->MatchFinder); + MatchFinder_Init_HighHash(MF(mt)); for (;;) { - if (p->exit) - return; - if (p->stopWriting) - { - p->numProcessedBlocks = numProcessedBlocks; - Event_Set(&p->wasStopped); - break; - } + PRF(printf("Hash thread block = %d pos = %d\n", (unsigned)blockIndex, mt->MatchFinder->pos)); { - CMatchFinder *mf = mt->MatchFinder; + CMatchFinder *mf = MF(mt); if (MatchFinder_NeedMove(mf)) { CriticalSection_Enter(&mt->btSync.cs); @@ -382,196 +474,178 @@ static void HashThreadFunc(CMatchFinderMt *mt) mt->pointerToCurPos -= offset; mt->buffer -= offset; } - CriticalSection_Leave(&mt->btSync.cs); CriticalSection_Leave(&mt->hashSync.cs); + CriticalSection_Leave(&mt->btSync.cs); continue; } Semaphore_Wait(&p->freeSemaphore); + if (p->exit) // exit is unexpected here. But we check it here for some failure case + return; + + // for faster stop : we check (p->stopWriting) after Wait(freeSemaphore) + if (p->stopWriting) + break; + MatchFinder_ReadIfRequired(mf); - if (mf->pos > (kMtMaxValForNormalize - kMtHashBlockSize)) { - UInt32 subValue = (mf->pos - mf->historySize - 1); - MatchFinder_ReduceOffsets(mf, subValue); - MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, (size_t)mf->hashMask + 1); - } - { - UInt32 *heads = mt->hashBuf + ((numProcessedBlocks++) & kMtHashNumBlocksMask) * kMtHashBlockSize; - UInt32 num = mf->streamPos - mf->pos; + UInt32 *heads = mt->hashBuf + GET_HASH_BLOCK_OFFSET(blockIndex++); + UInt32 num = Inline_MatchFinder_GetNumAvailableBytes(mf); heads[0] = 2; heads[1] = num; + + /* heads[1] contains the number of avail bytes: + if (avail < mf->numHashBytes) : + { + it means that stream was finished + HASH_THREAD and BT_TREAD must move position for heads[1] (avail) bytes. + HASH_THREAD doesn't stop, + HASH_THREAD fills only the header (2 numbers) for all next blocks: + {2, NumHashBytes - 1}, {2,0}, {2,0}, ... , {2,0} + } + else + { + HASH_THREAD and BT_TREAD must move position for (heads[0] - 2) bytes; + } + */ + if (num >= mf->numHashBytes) { num = num - mf->numHashBytes + 1; if (num > kMtHashBlockSize - 2) num = kMtHashBlockSize - 2; - mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc); + + if (mf->pos > (UInt32)kMtMaxValForNormalize - num) + { + const UInt32 subValue = (mf->pos - mf->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1); + Inline_MatchFinder_ReduceOffsets(mf, subValue); + MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, (size_t)mf->hashMask + 1); + } + heads[0] = 2 + num; + mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc); } - mf->pos += num; + + mf->pos += num; // wrap over zero is allowed at the end of stream mf->buffer += num; } } Semaphore_Release1(&p->filledSemaphore); - } - } -} + } // for() processing end -static void MatchFinderMt_GetNextBlock_Hash(CMatchFinderMt *p) -{ - MtSync_GetNextBlock(&p->hashSync); - p->hashBufPosLimit = p->hashBufPos = ((p->hashSync.numProcessedBlocks - 1) & kMtHashNumBlocksMask) * kMtHashBlockSize; - p->hashBufPosLimit += p->hashBuf[p->hashBufPos++]; - p->hashNumAvail = p->hashBuf[p->hashBufPos++]; + // p->numBlocks_Sent = blockIndex; + Event_Set(&p->wasStopped); + } // for() thread end } -#define kEmptyHashValue 0 + + + +// ---------- BT THREAD ---------- + +/* we use one variable instead of two (cyclicBufferPos == pos) before CyclicBuf wrap. + here we define fixed offset of (p->pos) from (p->cyclicBufferPos) */ +#define CYC_TO_POS_OFFSET 0 +// #define CYC_TO_POS_OFFSET 1 // for debug #define MFMT_GM_INLINE #ifdef MFMT_GM_INLINE /* - we use size_t for _cyclicBufferPos instead of UInt32 + we use size_t for (pos) instead of UInt32 to eliminate "movsx" BUG in old MSVC x64 compiler. */ -MY_NO_INLINE -static UInt32 *GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte *cur, CLzRef *son, - size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue, - UInt32 *d, UInt32 _maxLen, const UInt32 *hash, const UInt32 *limit, UInt32 size, UInt32 *posRes) -{ - do - { - UInt32 *_distances = ++d; - UInt32 delta = *hash++; - CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; - CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); - unsigned len0 = 0, len1 = 0; - UInt32 cutValue = _cutValue; - unsigned maxLen = (unsigned)_maxLen; - - /* - #define PREF_STEP 1 - if (size > PREF_STEP) - { - UInt32 delta = hash[PREF_STEP - 1]; - if (delta < _cyclicBufferSize) - { - size_t cyc1 = _cyclicBufferPos + PREF_STEP; - CLzRef *pair = son + ((size_t)(cyc1 - delta + ((delta > cyc1) ? _cyclicBufferSize : 0)) << 1); - Byte b = *(cur + PREF_STEP - delta); - _distances[0] = pair[0]; - _distances[1] = b; - } - } - */ - if (cutValue == 0 || delta >= _cyclicBufferSize) - { - *ptr0 = *ptr1 = kEmptyHashValue; - } - else - for (LOG_ITER(g_NumIters_Tree++);;) - { - LOG_ITER(g_NumIters_Loop++); - { - CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((_cyclicBufferPos < delta) ? _cyclicBufferSize : 0)) << 1); - const Byte *pb = cur - delta; - unsigned len = (len0 < len1 ? len0 : len1); - UInt32 pair0 = *pair; - if (pb[len] == cur[len]) - { - if (++len != lenLimit && pb[len] == cur[len]) - while (++len != lenLimit) - if (pb[len] != cur[len]) - break; - if (maxLen < len) - { - maxLen = len; - *d++ = (UInt32)len; - *d++ = delta - 1; - if (len == lenLimit) - { - UInt32 pair1 = pair[1]; - *ptr1 = pair0; - *ptr0 = pair1; - break; - } - } - } - { - UInt32 curMatch = pos - delta; - // delta = pos - *pair; - // delta = pos - pair[((UInt32)pb[len] - (UInt32)cur[len]) >> 31]; - if (pb[len] < cur[len]) - { - delta = pos - pair[1]; - *ptr1 = curMatch; - ptr1 = pair + 1; - len1 = len; - } - else - { - delta = pos - *pair; - *ptr0 = curMatch; - ptr0 = pair; - len0 = len; - } - } - } - if (--cutValue == 0 || delta >= _cyclicBufferSize) - { - *ptr0 = *ptr1 = kEmptyHashValue; - break; - } - } - pos++; - _cyclicBufferPos++; - cur++; - { - UInt32 num = (UInt32)(d - _distances); - _distances[-1] = num; - } - } - while (d < limit && --size != 0); - *posRes = pos; - return d; -} +UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, + UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, + UInt32 *posRes); #endif - static void BtGetMatches(CMatchFinderMt *p, UInt32 *d) { UInt32 numProcessed = 0; UInt32 curPos = 2; - UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2); // * 2 + /* GetMatchesSpec() functions don't create (len = 1) + in [len, dist] match pairs, if (p->numHashBytes >= 2) + Also we suppose here that (matchMaxLen >= 2). + So the following code for (reserve) is not required + UInt32 reserve = (p->matchMaxLen * 2); + const UInt32 kNumHashBytes_Max = 5; // BT_HASH_BYTES_MAX + if (reserve < kNumHashBytes_Max - 1) + reserve = kNumHashBytes_Max - 1; + const UInt32 limit = kMtBtBlockSize - (reserve); + */ + + const UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2); + d[1] = p->hashNumAvail; + + if (p->failure_BT) + { + // printf("\n == 1 BtGetMatches() p->failure_BT\n"); + d[0] = 0; + // d[1] = 0; + return; + } while (curPos < limit) { if (p->hashBufPos == p->hashBufPosLimit) { - MatchFinderMt_GetNextBlock_Hash(p); - d[1] = numProcessed + p->hashNumAvail; - if (p->hashNumAvail >= p->numHashBytes) + // MatchFinderMt_GetNextBlock_Hash(p); + UInt32 avail; + { + const UInt32 bi = MtSync_GetNextBlock(&p->hashSync); + const UInt32 k = GET_HASH_BLOCK_OFFSET(bi); + const UInt32 *h = p->hashBuf + k; + avail = h[1]; + p->hashBufPosLimit = k + h[0]; + p->hashNumAvail = avail; + p->hashBufPos = k + 2; + } + + { + /* we must prevent UInt32 overflow for avail total value, + if avail was increased with new hash block */ + UInt32 availSum = numProcessed + avail; + if (availSum < numProcessed) + availSum = (UInt32)(Int32)-1; + d[1] = availSum; + } + + if (avail >= p->numHashBytes) continue; - d[0] = curPos + p->hashNumAvail; + + // if (p->hashBufPos != p->hashBufPosLimit) exit(1); + + /* (avail < p->numHashBytes) + It means that stream was finished. + And (avail) - is a number of remaining bytes, + we fill (d) for (avail) bytes for LZ_THREAD (receiver). + but we don't update (p->pos) and (p->cyclicBufferPos) here in BT_THREAD */ + + /* here we suppose that we have space enough: + (kMtBtBlockSize - curPos >= p->hashNumAvail) */ + p->hashNumAvail = 0; + d[0] = curPos + avail; d += curPos; - for (; p->hashNumAvail != 0; p->hashNumAvail--) + for (; avail != 0; avail--) *d++ = 0; return; } { UInt32 size = p->hashBufPosLimit - p->hashBufPos; - UInt32 lenLimit = p->matchMaxLen; UInt32 pos = p->pos; UInt32 cyclicBufferPos = p->cyclicBufferPos; + UInt32 lenLimit = p->matchMaxLen; if (lenLimit >= p->hashNumAvail) lenLimit = p->hashNumAvail; { @@ -583,6 +657,14 @@ static void BtGetMatches(CMatchFinderMt *p, UInt32 *d) size = size2; } + if (pos > (UInt32)kMtMaxValForNormalize - size) + { + const UInt32 subValue = (pos - p->cyclicBufferSize); // & ~(UInt32)(kNormalizeAlign - 1); + pos -= subValue; + p->pos = pos; + MatchFinder_Normalize3(subValue, p->son, (size_t)p->cyclicBufferSize * 2); + } + #ifndef MFMT_GM_INLINE while (curPos < limit && size-- != 0) { @@ -598,21 +680,45 @@ static void BtGetMatches(CMatchFinderMt *p, UInt32 *d) } #else { - UInt32 posRes; - curPos = (UInt32)(GetMatchesSpecN(lenLimit, pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue, - d + curPos, p->numHashBytes - 1, p->hashBuf + p->hashBufPos, - d + limit, - size, &posRes) - d); - p->hashBufPos += posRes - pos; - cyclicBufferPos += posRes - pos; - p->buffer += posRes - pos; - pos = posRes; + UInt32 posRes = pos; + const UInt32 *d_end; + { + d_end = GetMatchesSpecN_2( + p->buffer + lenLimit - 1, + pos, p->buffer, p->son, p->cutValue, d + curPos, + p->numHashBytes - 1, p->hashBuf + p->hashBufPos, + d + limit, p->hashBuf + p->hashBufPos + size, + cyclicBufferPos, p->cyclicBufferSize, + &posRes); + } + { + if (!d_end) + { + // printf("\n == 2 BtGetMatches() p->failure_BT\n"); + // internal data failure + p->failure_BT = True; + d[0] = 0; + // d[1] = 0; + return; + } + } + curPos = (UInt32)(d_end - d); + { + const UInt32 processed = posRes - pos; + pos = posRes; + p->hashBufPos += processed; + cyclicBufferPos += processed; + p->buffer += processed; + } } #endif - numProcessed += pos - p->pos; - p->hashNumAvail -= pos - p->pos; - p->pos = pos; + { + const UInt32 processed = pos - p->pos; + numProcessed += processed; + p->hashNumAvail -= processed; + p->pos = pos; + } if (cyclicBufferPos == p->cyclicBufferSize) cyclicBufferPos = 0; p->cyclicBufferPos = cyclicBufferPos; @@ -622,31 +728,28 @@ static void BtGetMatches(CMatchFinderMt *p, UInt32 *d) d[0] = curPos; } + static void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex) { CMtSync *sync = &p->hashSync; + + BUFFER_MUST_BE_UNLOCKED(sync) + if (!sync->needStart) { - CriticalSection_Enter(&sync->cs); - sync->csWasEntered = True; + LOCK_BUFFER(sync) } - BtGetMatches(p, p->btBuf + (globalBlockIndex & kMtBtNumBlocksMask) * kMtBtBlockSize); - - if (p->pos > kMtMaxValForNormalize - kMtBtBlockSize) - { - UInt32 subValue = p->pos - p->cyclicBufferSize; - MatchFinder_Normalize3(subValue, p->son, (size_t)p->cyclicBufferSize * 2); - p->pos -= subValue; - } + BtGetMatches(p, p->btBuf + GET_BT_BLOCK_OFFSET(globalBlockIndex)); + + /* We suppose that we have called GetNextBlock() from start. + So buffer is LOCKED */ - if (!sync->needStart) - { - CriticalSection_Leave(&sync->cs); - sync->csWasEntered = False; - } + UNLOCK_BUFFER(sync) } + +MY_NO_INLINE static void BtThreadFunc(CMatchFinderMt *mt) { CMtSync *p = &mt->btSync; @@ -654,25 +757,35 @@ static void BtThreadFunc(CMatchFinderMt *mt) { UInt32 blockIndex = 0; Event_Wait(&p->canStart); - Event_Set(&p->wasStarted); + for (;;) { + PRF(printf(" BT thread block = %d pos = %d\n", (unsigned)blockIndex, mt->pos)); + /* (p->exit == true) is possible after (p->canStart) at first loop iteration + and is unexpected after more Wait(freeSemaphore) iterations */ if (p->exit) return; + + Semaphore_Wait(&p->freeSemaphore); + + // for faster stop : we check (p->stopWriting) after Wait(freeSemaphore) if (p->stopWriting) - { - p->numProcessedBlocks = blockIndex; - MtSync_StopWriting(&mt->hashSync); - Event_Set(&p->wasStopped); break; - } - Semaphore_Wait(&p->freeSemaphore); + BtFillBlock(mt, blockIndex++); + Semaphore_Release1(&p->filledSemaphore); } + + // we stop HASH_THREAD here + MtSync_StopWriting(&mt->hashSync); + + // p->numBlocks_Sent = blockIndex; + Event_Set(&p->wasStopped); } } + void MatchFinderMt_Construct(CMatchFinderMt *p) { p->hashBuf = NULL; @@ -688,22 +801,37 @@ static void MatchFinderMt_FreeMem(CMatchFinderMt *p, ISzAllocPtr alloc) void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc) { - MtSync_Destruct(&p->hashSync); + /* + HASH_THREAD can use CriticalSection(s) btSync.cs and hashSync.cs. + So we must be sure that HASH_THREAD will not use CriticalSection(s) + after deleting CriticalSection here. + + we call ReleaseStream(p) + that calls StopWriting(btSync) + that calls StopWriting(hashSync), if it's required to stop HASH_THREAD. + after StopWriting() it's safe to destruct MtSync(s) in any order */ + + MatchFinderMt_ReleaseStream(p); + MtSync_Destruct(&p->btSync); + MtSync_Destruct(&p->hashSync); LOG_ITER( - printf("\nTree %9d * %7d iter = %9d sum \n", + printf("\nTree %9d * %7d iter = %9d = sum : bytes = %9d\n", (UInt32)(g_NumIters_Tree / 1000), (UInt32)(((UInt64)g_NumIters_Loop * 1000) / (g_NumIters_Tree + 1)), - (UInt32)(g_NumIters_Loop / 1000) + (UInt32)(g_NumIters_Loop / 1000), + (UInt32)(g_NumIters_Bytes / 1000) )); MatchFinderMt_FreeMem(p, alloc); } + #define kHashBufferSize (kMtHashBlockSize * kMtHashNumBlocks) #define kBtBufferSize (kMtBtBlockSize * kMtBtNumBlocks) + static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE HashThreadFunc2(void *p) { HashThreadFunc((CMatchFinderMt *)p); return 0; } static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE BtThreadFunc2(void *p) { @@ -716,16 +844,17 @@ static THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE BtThreadFunc2(void *p) return 0; } + SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc) { - CMatchFinder *mf = p->MatchFinder; + CMatchFinder *mf = MF(p); p->historySize = historySize; if (kMtBtBlockSize <= matchMaxLen * 4) return SZ_ERROR_PARAM; if (!p->hashBuf) { - p->hashBuf = (UInt32 *)ISzAlloc_Alloc(alloc, (kHashBufferSize + kBtBufferSize) * sizeof(UInt32)); + p->hashBuf = (UInt32 *)ISzAlloc_Alloc(alloc, ((size_t)kHashBufferSize + (size_t)kBtBufferSize) * sizeof(UInt32)); if (!p->hashBuf) return SZ_ERROR_MEM; p->btBuf = p->hashBuf + kHashBufferSize; @@ -735,101 +864,163 @@ SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddB if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc)) return SZ_ERROR_MEM; - RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p, kMtHashNumBlocks)); - RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p, kMtBtNumBlocks)); + RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p)); + RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p)); return SZ_OK; } -/* Call it after ReleaseStream / SetStream */ + +SRes MatchFinderMt_InitMt(CMatchFinderMt *p) +{ + RINOK(MtSync_Init(&p->hashSync, kMtHashNumBlocks)); + return MtSync_Init(&p->btSync, kMtBtNumBlocks); +} + + static void MatchFinderMt_Init(CMatchFinderMt *p) { - CMatchFinder *mf = p->MatchFinder; + CMatchFinder *mf = MF(p); p->btBufPos = - p->btBufPosLimit = 0; + p->btBufPosLimit = NULL; p->hashBufPos = p->hashBufPosLimit = 0; + p->hashNumAvail = 0; // 21.03 + + p->failure_BT = False; /* Init without data reading. We don't want to read data in this thread */ - MatchFinder_Init_3(mf, False); + MatchFinder_Init_4(mf); + MatchFinder_Init_LowHash(mf); p->pointerToCurPos = Inline_MatchFinder_GetPointerToCurrentPos(mf); p->btNumAvailBytes = 0; - p->lzPos = p->historySize + 1; + p->failure_LZ_BT = False; + // p->failure_LZ_LZ = False; + + p->lzPos = + 1; // optimal smallest value + // 0; // for debug: ignores match to start + // kNormalizeAlign; // for debug p->hash = mf->hash; p->fixedHashSize = mf->fixedHashSize; // p->hash4Mask = mf->hash4Mask; p->crc = mf->crc; + // memcpy(p->crc, mf->crc, sizeof(mf->crc)); p->son = mf->son; p->matchMaxLen = mf->matchMaxLen; p->numHashBytes = mf->numHashBytes; - p->pos = mf->pos; - p->buffer = mf->buffer; - p->cyclicBufferPos = mf->cyclicBufferPos; + + /* (mf->pos) and (mf->streamPos) were already initialized to 1 in MatchFinder_Init_4() */ + // mf->streamPos = mf->pos = 1; // optimal smallest value + // 0; // for debug: ignores match to start + // kNormalizeAlign; // for debug + + /* we must init (p->pos = mf->pos) for BT, because + BT code needs (p->pos == delta_value_for_empty_hash_record == mf->pos) */ + p->pos = mf->pos; // do not change it + + p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET); p->cyclicBufferSize = mf->cyclicBufferSize; + p->buffer = mf->buffer; p->cutValue = mf->cutValue; + // p->son[0] = p->son[1] = 0; // unused: to init skipped record for speculated accesses. } + /* ReleaseStream is required to finish multithreading */ void MatchFinderMt_ReleaseStream(CMatchFinderMt *p) { + // Sleep(1); // for debug MtSync_StopWriting(&p->btSync); + // Sleep(200); // for debug /* p->MatchFinder->ReleaseStream(); */ } MY_NO_INLINE -static void MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p) +static UInt32 MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p) { - UInt32 blockIndex, k; - - MtSync_GetNextBlock(&p->btSync); - - blockIndex = ((p->btSync.numProcessedBlocks - 1) & kMtBtNumBlocksMask); - k = blockIndex * kMtBtBlockSize; - p->btBufPosLimit = k + p->btBuf[k]; - p->btNumAvailBytes = p->btBuf[k + 1]; - p->btBufPos = k + 2; - if (p->lzPos >= kMtMaxValForNormalize - kMtBtBlockSize) + if (p->failure_LZ_BT) + p->btBufPos = p->failureBuf; + else { - MatchFinder_Normalize3(p->lzPos - p->historySize - 1, p->hash, p->fixedHashSize); - p->lzPos = p->historySize + 1; + const UInt32 bi = MtSync_GetNextBlock(&p->btSync); + const UInt32 *bt = p->btBuf + GET_BT_BLOCK_OFFSET(bi); + { + const UInt32 numItems = bt[0]; + p->btBufPosLimit = bt + numItems; + p->btNumAvailBytes = bt[1]; + p->btBufPos = bt + 2; + if (numItems < 2 || numItems > kMtBtBlockSize) + { + p->failureBuf[0] = 0; + p->btBufPos = p->failureBuf; + p->btBufPosLimit = p->failureBuf + 1; + p->failure_LZ_BT = True; + // p->btNumAvailBytes = 0; + /* we don't want to decrease AvailBytes, that was load before. + that can be unxepected for the code that have loaded anopther value before */ + } + } + + if (p->lzPos >= (UInt32)kMtMaxValForNormalize - (UInt32)kMtBtBlockSize) + { + /* we don't check (lzPos) over exact avail bytes in (btBuf). + (fixedHashSize) is small, so normalization is fast */ + const UInt32 subValue = (p->lzPos - p->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1); + p->lzPos -= subValue; + MatchFinder_Normalize3(subValue, p->hash, p->fixedHashSize); + } } + return p->btNumAvailBytes; } + + static const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p) { return p->pointerToCurPos; } + #define GET_NEXT_BLOCK_IF_REQUIRED if (p->btBufPos == p->btBufPosLimit) MatchFinderMt_GetNextBlock_Bt(p); + static UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt *p) { - GET_NEXT_BLOCK_IF_REQUIRED; - return p->btNumAvailBytes; + if (p->btBufPos != p->btBufPosLimit) + return p->btNumAvailBytes; + return MatchFinderMt_GetNextBlock_Bt(p); } + +// #define CHECK_FAILURE_LZ(_match_, _pos_) if (_match_ >= _pos_) { p->failure_LZ_LZ = True; return d; } +#define CHECK_FAILURE_LZ(_match_, _pos_) + static UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) { UInt32 h2, c2; UInt32 *hash = p->hash; const Byte *cur = p->pointerToCurPos; - UInt32 m = p->lzPos; + const UInt32 m = p->lzPos; MT_HASH2_CALC c2 = hash[h2]; hash[h2] = m; if (c2 >= matchMinPos) + { + CHECK_FAILURE_LZ(c2, m) if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0]) { *d++ = 2; *d++ = m - c2 - 1; } + } return d; } @@ -839,7 +1030,7 @@ static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) UInt32 h2, h3, c2, c3; UInt32 *hash = p->hash; const Byte *cur = p->pointerToCurPos; - UInt32 m = p->lzPos; + const UInt32 m = p->lzPos; MT_HASH3_CALC c2 = hash[h2]; @@ -848,22 +1039,30 @@ static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) hash[h2] = m; (hash + kFix3HashSize)[h3] = m; - if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0]) + if (c2 >= matchMinPos) { - d[1] = m - c2 - 1; - if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2]) + CHECK_FAILURE_LZ(c2, m) + if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0]) { - d[0] = 3; - return d + 2; + d[1] = m - c2 - 1; + if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2]) + { + d[0] = 3; + return d + 2; + } + d[0] = 2; + d += 2; } - d[0] = 2; - d += 2; } - if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0]) + if (c3 >= matchMinPos) { - *d++ = 3; - *d++ = m - c3 - 1; + CHECK_FAILURE_LZ(c3, m) + if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0]) + { + *d++ = 3; + *d++ = m - c3 - 1; + } } return d; @@ -874,30 +1073,37 @@ static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) /* static -UInt32 MatchFinderMt_GetMatches_Bt4(CMatchFinderMt *p, UInt32 *d) +UInt32* MatchFinderMt_GetMatches_Bt4(CMatchFinderMt *p, UInt32 *d) { - UInt32 pos = p->btBufPos; - const UInt32 *bt = p->btBuf + pos; - UInt32 len = *bt++; + const UInt32 *bt = p->btBufPos; + const UInt32 len = *bt++; + const UInt32 *btLim = bt + len; UInt32 matchMinPos; - const UInt32 *d_base = d; UInt32 avail = p->btNumAvailBytes - 1; - p->btBufPos = pos + 1 + len; + p->btBufPos = btLim; { - UInt32 temp1 = p->historySize; p->btNumAvailBytes = avail; #define BT_HASH_BYTES_MAX 5 + + matchMinPos = p->lzPos; if (len != 0) - temp1 = bt[1]; - else if (avail < (BT_HASH_BYTES_MAX - 2)) + matchMinPos -= bt[1]; + else if (avail < (BT_HASH_BYTES_MAX - 1) - 1) { INCREASE_LZ_POS - return 0; + return d; + } + else + { + const UInt32 hs = p->historySize; + if (matchMinPos > hs) + matchMinPos -= hs; + else + matchMinPos = 1; } - matchMinPos = p->lzPos - temp1; } for (;;) @@ -942,17 +1148,17 @@ UInt32 MatchFinderMt_GetMatches_Bt4(CMatchFinderMt *p, UInt32 *d) { do { - UInt32 v0 = bt[0]; - UInt32 v1 = bt[1]; + const UInt32 v0 = bt[0]; + const UInt32 v1 = bt[1]; bt += 2; d[0] = v0; d[1] = v1; d += 2; } - while ((len -= 2) != 0); + while (bt != btLim); } INCREASE_LZ_POS - return (UInt32)(d - d_base); + return d; } */ @@ -962,7 +1168,7 @@ static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) UInt32 h2, h3, /* h4, */ c2, c3 /* , c4 */; UInt32 *hash = p->hash; const Byte *cur = p->pointerToCurPos; - UInt32 m = p->lzPos; + const UInt32 m = p->lzPos; MT_HASH3_CALC // MT_HASH4_CALC c2 = hash[h2]; @@ -1038,43 +1244,49 @@ static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) } -static UInt32 MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d) +static UInt32* MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d) { - const UInt32 *bt = p->btBuf + p->btBufPos; - UInt32 len = *bt++; - p->btBufPos += 1 + len; + const UInt32 *bt = p->btBufPos; + const UInt32 len = *bt++; + const UInt32 *btLim = bt + len; + p->btBufPos = btLim; p->btNumAvailBytes--; + INCREASE_LZ_POS { - UInt32 i; - for (i = 0; i < len; i += 2) + while (bt != btLim) { - UInt32 v0 = bt[0]; - UInt32 v1 = bt[1]; + const UInt32 v0 = bt[0]; + const UInt32 v1 = bt[1]; bt += 2; d[0] = v0; d[1] = v1; d += 2; } } - INCREASE_LZ_POS - return len; + return d; } -static UInt32 MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d) +static UInt32* MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d) { - UInt32 pos = p->btBufPos; - const UInt32 *bt = p->btBuf + pos; + const UInt32 *bt = p->btBufPos; UInt32 len = *bt++; - UInt32 avail = p->btNumAvailBytes - 1; + const UInt32 avail = p->btNumAvailBytes - 1; p->btNumAvailBytes = avail; - p->btBufPos = pos + 1 + len; + p->btBufPos = bt + len; if (len == 0) { #define BT_HASH_BYTES_MAX 5 if (avail >= (BT_HASH_BYTES_MAX - 1) - 1) - len = (UInt32)(p->MixMatchesFunc(p, p->lzPos - p->historySize, d) - d); + { + UInt32 m = p->lzPos; + if (m > p->historySize) + m -= p->historySize; + else + m = 1; + d = p->MixMatchesFunc(p, m, d); + } } else { @@ -1083,27 +1295,26 @@ static UInt32 MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d) (match_len >= numHashBytes). MixMatchesFunc() inserts only hash matches that are nearer than (match_dist) */ - UInt32 *d2; - d2 = p->MixMatchesFunc(p, p->lzPos - bt[1], d); + d = p->MixMatchesFunc(p, p->lzPos - bt[1], d); + // if (d) // check for failure do { - UInt32 v0 = bt[0]; - UInt32 v1 = bt[1]; + const UInt32 v0 = bt[0]; + const UInt32 v1 = bt[1]; bt += 2; - d2[0] = v0; - d2[1] = v1; - d2 += 2; + d[0] = v0; + d[1] = v1; + d += 2; } - while ((len -= 2) != 0); - len = (UInt32)(d2 - d); + while (len -= 2); } INCREASE_LZ_POS - return len; + return d; } #define SKIP_HEADER2_MT do { GET_NEXT_BLOCK_IF_REQUIRED #define SKIP_HEADER_MT(n) SKIP_HEADER2_MT if (p->btNumAvailBytes-- >= (n)) { const Byte *cur = p->pointerToCurPos; UInt32 *hash = p->hash; -#define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += p->btBuf[p->btBufPos] + 1; } while (--num != 0); +#define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += (size_t)*p->btBufPos + 1; } while (--num != 0); static void MatchFinderMt0_Skip(CMatchFinderMt *p, UInt32 num) { @@ -1131,10 +1342,14 @@ static void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num) SKIP_FOOTER_MT } +/* +// MatchFinderMt4_Skip() is similar to MatchFinderMt3_Skip(). +// The difference is that MatchFinderMt3_Skip() updates hash for last 3 bytes of stream. + static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num) { SKIP_HEADER_MT(4) - UInt32 h2, h3 /*, h4 */; + UInt32 h2, h3; // h4 MT_HASH3_CALC // MT_HASH4_CALC // (hash + kFix4HashSize)[h4] = @@ -1143,15 +1358,16 @@ static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num) p->lzPos; SKIP_FOOTER_MT } +*/ -void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable) +void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable) { vTable->Init = (Mf_Init_Func)MatchFinderMt_Init; vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinderMt_GetNumAvailableBytes; vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinderMt_GetPointerToCurrentPos; vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches; - switch (p->MatchFinder->numHashBytes) + switch (MF(p)->numHashBytes) { case 2: p->GetHeadsFunc = GetHeads2; @@ -1160,12 +1376,12 @@ void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable) vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches; break; case 3: - p->GetHeadsFunc = p->MatchFinder->bigHash ? GetHeads3b : GetHeads3; + p->GetHeadsFunc = MF(p)->bigHash ? GetHeads3b : GetHeads3; p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2; vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip; break; case 4: - p->GetHeadsFunc = p->MatchFinder->bigHash ? GetHeads4b : GetHeads4; + p->GetHeadsFunc = MF(p)->bigHash ? GetHeads4b : GetHeads4; // it's fast inline version of GetMatches() // vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches_Bt4; @@ -1174,9 +1390,11 @@ void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable) vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip; break; default: - p->GetHeadsFunc = p->MatchFinder->bigHash ? GetHeads5b : GetHeads5; + p->GetHeadsFunc = MF(p)->bigHash ? GetHeads5b : GetHeads5; p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4; - vTable->Skip = (Mf_Skip_Func)MatchFinderMt4_Skip; + vTable->Skip = + (Mf_Skip_Func)MatchFinderMt3_Skip; + // (Mf_Skip_Func)MatchFinderMt4_Skip; break; } } diff --git a/C/LzFindMt.h b/C/LzFindMt.h index 05297ca3..660b7244 100644 --- a/C/LzFindMt.h +++ b/C/LzFindMt.h @@ -1,5 +1,5 @@ /* LzFindMt.h -- multithreaded Match finder for LZ algorithms -2019-11-05 : Igor Pavlov : Public domain */ +2021-07-12 : Igor Pavlov : Public domain */ #ifndef __LZ_FIND_MT_H #define __LZ_FIND_MT_H @@ -11,22 +11,24 @@ EXTERN_C_BEGIN typedef struct _CMtSync { + UInt32 numProcessedBlocks; + CThread thread; + UInt64 affinity; + BoolInt wasCreated; BoolInt needStart; + BoolInt csWasInitialized; + BoolInt csWasEntered; + BoolInt exit; BoolInt stopWriting; - CThread thread; CAutoResetEvent canStart; - CAutoResetEvent wasStarted; CAutoResetEvent wasStopped; CSemaphore freeSemaphore; CSemaphore filledSemaphore; - BoolInt csWasInitialized; - BoolInt csWasEntered; CCriticalSection cs; - UInt32 numProcessedBlocks; - UInt64 affinity; + // UInt32 numBlocks_Sent; } CMtSync; typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distances); @@ -42,8 +44,8 @@ typedef struct _CMatchFinderMt /* LZ */ const Byte *pointerToCurPos; UInt32 *btBuf; - UInt32 btBufPos; - UInt32 btBufPosLimit; + const UInt32 *btBufPos; + const UInt32 *btBufPosLimit; UInt32 lzPos; UInt32 btNumAvailBytes; @@ -54,6 +56,10 @@ typedef struct _CMatchFinderMt const UInt32 *crc; Mf_Mix_Matches MixMatchesFunc; + UInt32 failure_LZ_BT; // failure in BT transfered to LZ + // UInt32 failure_LZ_LZ; // failure in LZ tables + UInt32 failureBuf[1]; + // UInt32 crc[256]; /* LZ + BT */ CMtSync btSync; @@ -64,6 +70,8 @@ typedef struct _CMatchFinderMt UInt32 hashBufPos; UInt32 hashBufPosLimit; UInt32 hashNumAvail; + UInt32 failure_BT; + CLzRef *son; UInt32 matchMaxLen; @@ -71,7 +79,7 @@ typedef struct _CMatchFinderMt UInt32 pos; const Byte *buffer; UInt32 cyclicBufferPos; - UInt32 cyclicBufferSize; /* it must be historySize + 1 */ + UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */ UInt32 cutValue; /* BT + Hash */ @@ -81,13 +89,19 @@ typedef struct _CMatchFinderMt /* Hash */ Mf_GetHeads GetHeadsFunc; CMatchFinder *MatchFinder; + // CMatchFinder MatchFinder; } CMatchFinderMt; +// only for Mt part void MatchFinderMt_Construct(CMatchFinderMt *p); void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc); + SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc); -void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder *vTable); +void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable); + +/* call MatchFinderMt_InitMt() before IMatchFinder::Init() */ +SRes MatchFinderMt_InitMt(CMatchFinderMt *p); void MatchFinderMt_ReleaseStream(CMatchFinderMt *p); EXTERN_C_END diff --git a/C/LzFindOpt.c b/C/LzFindOpt.c new file mode 100644 index 00000000..8ff006e0 --- /dev/null +++ b/C/LzFindOpt.c @@ -0,0 +1,578 @@ +/* LzFindOpt.c -- multithreaded Match finder for LZ algorithms +2021-07-13 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "CpuArch.h" +#include "LzFind.h" + +// #include "LzFindMt.h" + +// #define LOG_ITERS + +// #define LOG_THREAD + +#ifdef LOG_THREAD +#include +#define PRF(x) x +#else +// #define PRF(x) +#endif + +#ifdef LOG_ITERS +#include +UInt64 g_NumIters_Tree; +UInt64 g_NumIters_Loop; +UInt64 g_NumIters_Bytes; +#define LOG_ITER(x) x +#else +#define LOG_ITER(x) +#endif + +// ---------- BT THREAD ---------- + +#define USE_SON_PREFETCH +#define USE_LONG_MATCH_OPT + +#define kEmptyHashValue 0 + +// #define CYC_TO_POS_OFFSET 0 + +// #define CYC_TO_POS_OFFSET 1 // for debug + +/* +MY_NO_INLINE +UInt32 * MY_FAST_CALL GetMatchesSpecN_1(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, + UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 *posRes) +{ + do + { + UInt32 delta; + if (hash == size) + break; + delta = *hash++; + + if (delta == 0 || delta > (UInt32)pos) + return NULL; + + lenLimit++; + + if (delta == (UInt32)pos) + { + CLzRef *ptr1 = son + ((size_t)pos << 1) - CYC_TO_POS_OFFSET * 2; + *d++ = 0; + ptr1[0] = kEmptyHashValue; + ptr1[1] = kEmptyHashValue; + } +else +{ + UInt32 *_distances = ++d; + + CLzRef *ptr0 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2 + 1; + CLzRef *ptr1 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2; + + const Byte *len0 = cur, *len1 = cur; + UInt32 cutValue = _cutValue; + const Byte *maxLen = cur + _maxLen; + + for (LOG_ITER(g_NumIters_Tree++);;) + { + LOG_ITER(g_NumIters_Loop++); + { + const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta; + CLzRef *pair = son + ((size_t)(((ptrdiff_t)pos - CYC_TO_POS_OFFSET) + diff) << 1); + const Byte *len = (len0 < len1 ? len0 : len1); + + #ifdef USE_SON_PREFETCH + const UInt32 pair0 = *pair; + #endif + + if (len[diff] == len[0]) + { + if (++len != lenLimit && len[diff] == len[0]) + while (++len != lenLimit) + { + LOG_ITER(g_NumIters_Bytes++); + if (len[diff] != len[0]) + break; + } + if (maxLen < len) + { + maxLen = len; + *d++ = (UInt32)(len - cur); + *d++ = delta - 1; + + if (len == lenLimit) + { + const UInt32 pair1 = pair[1]; + *ptr1 = + #ifdef USE_SON_PREFETCH + pair0; + #else + pair[0]; + #endif + *ptr0 = pair1; + + _distances[-1] = (UInt32)(d - _distances); + + #ifdef USE_LONG_MATCH_OPT + + if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit) + break; + + { + for (;;) + { + hash++; + pos++; + cur++; + lenLimit++; + { + CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2; + #if 0 + *(UInt64 *)(void *)ptr = ((const UInt64 *)(const void *)ptr)[diff]; + #else + const UInt32 p0 = ptr[0 + (diff * 2)]; + const UInt32 p1 = ptr[1 + (diff * 2)]; + ptr[0] = p0; + ptr[1] = p1; + // ptr[0] = ptr[0 + (diff * 2)]; + // ptr[1] = ptr[1 + (diff * 2)]; + #endif + } + // PrintSon(son + 2, pos - 1); + // printf("\npos = %x delta = %x\n", pos, delta); + len++; + *d++ = 2; + *d++ = (UInt32)(len - cur); + *d++ = delta - 1; + if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit) + break; + } + } + #endif + + break; + } + } + } + + { + const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff); + if (len[diff] < len[0]) + { + delta = pair[1]; + if (delta >= curMatch) + return NULL; + *ptr1 = curMatch; + ptr1 = pair + 1; + len1 = len; + } + else + { + delta = *pair; + if (delta >= curMatch) + return NULL; + *ptr0 = curMatch; + ptr0 = pair; + len0 = len; + } + + delta = (UInt32)pos - delta; + + if (--cutValue == 0 || delta >= pos) + { + *ptr0 = *ptr1 = kEmptyHashValue; + _distances[-1] = (UInt32)(d - _distances); + break; + } + } + } + } // for (tree iterations) +} + pos++; + cur++; + } + while (d < limit); + *posRes = (UInt32)pos; + return d; +} +*/ + +/* define cbs if you use 2 functions. + GetMatchesSpecN_1() : (pos < _cyclicBufferSize) + GetMatchesSpecN_2() : (pos >= _cyclicBufferSize) + + do not define cbs if you use 1 function: + GetMatchesSpecN_2() +*/ + +// #define cbs _cyclicBufferSize + +/* + we use size_t for (pos) and (_cyclicBufferPos_ instead of UInt32 + to eliminate "movsx" BUG in old MSVC x64 compiler. +*/ + +UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, + UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, + UInt32 *posRes); + +MY_NO_INLINE +UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, + UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, + UInt32 *posRes) +{ + do // while (hash != size) + { + UInt32 delta; + + #ifndef cbs + UInt32 cbs; + #endif + + if (hash == size) + break; + + delta = *hash++; + + if (delta == 0) + return NULL; + + lenLimit++; + + #ifndef cbs + cbs = _cyclicBufferSize; + if ((UInt32)pos < cbs) + { + if (delta > (UInt32)pos) + return NULL; + cbs = (UInt32)pos; + } + #endif + + if (delta >= cbs) + { + CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); + *d++ = 0; + ptr1[0] = kEmptyHashValue; + ptr1[1] = kEmptyHashValue; + } +else +{ + UInt32 *_distances = ++d; + + CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; + CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); + + UInt32 cutValue = _cutValue; + const Byte *len0 = cur, *len1 = cur; + const Byte *maxLen = cur + _maxLen; + + // if (cutValue == 0) { *ptr0 = *ptr1 = kEmptyHashValue; } else + for (LOG_ITER(g_NumIters_Tree++);;) + { + LOG_ITER(g_NumIters_Loop++); + { + // SPEC code + CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - (ptrdiff_t)delta + + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0) + ) << 1); + + const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta; + const Byte *len = (len0 < len1 ? len0 : len1); + + #ifdef USE_SON_PREFETCH + const UInt32 pair0 = *pair; + #endif + + if (len[diff] == len[0]) + { + if (++len != lenLimit && len[diff] == len[0]) + while (++len != lenLimit) + { + LOG_ITER(g_NumIters_Bytes++); + if (len[diff] != len[0]) + break; + } + if (maxLen < len) + { + maxLen = len; + *d++ = (UInt32)(len - cur); + *d++ = delta - 1; + + if (len == lenLimit) + { + const UInt32 pair1 = pair[1]; + *ptr1 = + #ifdef USE_SON_PREFETCH + pair0; + #else + pair[0]; + #endif + *ptr0 = pair1; + + _distances[-1] = (UInt32)(d - _distances); + + #ifdef USE_LONG_MATCH_OPT + + if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit) + break; + + { + for (;;) + { + *d++ = 2; + *d++ = (UInt32)(lenLimit - cur); + *d++ = delta - 1; + cur++; + lenLimit++; + // SPEC + _cyclicBufferPos++; + { + // SPEC code + CLzRef *dest = son + ((size_t)(_cyclicBufferPos) << 1); + const CLzRef *src = dest + ((diff + + (ptrdiff_t)(UInt32)((_cyclicBufferPos < delta) ? cbs : 0)) << 1); + // CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2; + #if 0 + *(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src); + #else + const UInt32 p0 = src[0]; + const UInt32 p1 = src[1]; + dest[0] = p0; + dest[1] = p1; + #endif + } + pos++; + hash++; + if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit) + break; + } // for() end for long matches + } + #endif + + break; // break from TREE iterations + } + } + } + { + const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff); + if (len[diff] < len[0]) + { + delta = pair[1]; + *ptr1 = curMatch; + ptr1 = pair + 1; + len1 = len; + if (delta >= curMatch) + return NULL; + } + else + { + delta = *pair; + *ptr0 = curMatch; + ptr0 = pair; + len0 = len; + if (delta >= curMatch) + return NULL; + } + delta = (UInt32)pos - delta; + + if (--cutValue == 0 || delta >= cbs) + { + *ptr0 = *ptr1 = kEmptyHashValue; + _distances[-1] = (UInt32)(d - _distances); + break; + } + } + } + } // for (tree iterations) +} + pos++; + _cyclicBufferPos++; + cur++; + } + while (d < limit); + *posRes = (UInt32)pos; + return d; +} + + + +/* +typedef UInt32 uint32plus; // size_t + +UInt32 * MY_FAST_CALL GetMatchesSpecN_3(uint32plus lenLimit, size_t pos, const Byte *cur, CLzRef *son, + UInt32 _cutValue, UInt32 *d, uint32plus _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, + size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, + UInt32 *posRes) +{ + do // while (hash != size) + { + UInt32 delta; + + #ifndef cbs + UInt32 cbs; + #endif + + if (hash == size) + break; + + delta = *hash++; + + if (delta == 0) + return NULL; + + #ifndef cbs + cbs = _cyclicBufferSize; + if ((UInt32)pos < cbs) + { + if (delta > (UInt32)pos) + return NULL; + cbs = (UInt32)pos; + } + #endif + + if (delta >= cbs) + { + CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); + *d++ = 0; + ptr1[0] = kEmptyHashValue; + ptr1[1] = kEmptyHashValue; + } +else +{ + CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1; + CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1); + UInt32 *_distances = ++d; + uint32plus len0 = 0, len1 = 0; + UInt32 cutValue = _cutValue; + uint32plus maxLen = _maxLen; + // lenLimit++; // const Byte *lenLimit = cur + _lenLimit; + + for (LOG_ITER(g_NumIters_Tree++);;) + { + LOG_ITER(g_NumIters_Loop++); + { + // const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta; + CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - delta + + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0) + ) << 1); + const Byte *pb = cur - delta; + uint32plus len = (len0 < len1 ? len0 : len1); + + #ifdef USE_SON_PREFETCH + const UInt32 pair0 = *pair; + #endif + + if (pb[len] == cur[len]) + { + if (++len != lenLimit && pb[len] == cur[len]) + while (++len != lenLimit) + if (pb[len] != cur[len]) + break; + if (maxLen < len) + { + maxLen = len; + *d++ = (UInt32)len; + *d++ = delta - 1; + if (len == lenLimit) + { + { + const UInt32 pair1 = pair[1]; + *ptr0 = pair1; + *ptr1 = + #ifdef USE_SON_PREFETCH + pair0; + #else + pair[0]; + #endif + } + + _distances[-1] = (UInt32)(d - _distances); + + #ifdef USE_LONG_MATCH_OPT + + if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit) + break; + + { + const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta; + for (;;) + { + *d++ = 2; + *d++ = (UInt32)lenLimit; + *d++ = delta - 1; + _cyclicBufferPos++; + { + CLzRef *dest = son + ((size_t)_cyclicBufferPos << 1); + const CLzRef *src = dest + ((diff + + (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)) << 1); + #if 0 + *(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src); + #else + const UInt32 p0 = src[0]; + const UInt32 p1 = src[1]; + dest[0] = p0; + dest[1] = p1; + #endif + } + hash++; + pos++; + cur++; + pb++; + if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit) + break; + } + } + #endif + + break; + } + } + } + { + const UInt32 curMatch = (UInt32)pos - delta; + if (pb[len] < cur[len]) + { + delta = pair[1]; + *ptr1 = curMatch; + ptr1 = pair + 1; + len1 = len; + } + else + { + delta = *pair; + *ptr0 = curMatch; + ptr0 = pair; + len0 = len; + } + + { + if (delta >= curMatch) + return NULL; + delta = (UInt32)pos - delta; + if (delta >= cbs + // delta >= _cyclicBufferSize || delta >= pos + || --cutValue == 0) + { + *ptr0 = *ptr1 = kEmptyHashValue; + _distances[-1] = (UInt32)(d - _distances); + break; + } + } + } + } + } // for (tree iterations) +} + pos++; + _cyclicBufferPos++; + cur++; + } + while (d < limit); + *posRes = (UInt32)pos; + return d; +} +*/ diff --git a/C/LzmaEnc.c b/C/LzmaEnc.c index 377e20c3..832e4149 100644 --- a/C/LzmaEnc.c +++ b/C/LzmaEnc.c @@ -1,5 +1,5 @@ /* LzmaEnc.c -- LZMA Encoder -2021-04-01: Igor Pavlov : Public domain */ +2021-07-10: Igor Pavlov : Public domain */ #include "Precomp.h" @@ -12,6 +12,7 @@ #include #endif +#include "CpuArch.h" #include "LzmaEnc.h" #include "LzFind.h" @@ -36,8 +37,8 @@ void LzmaEnc_RestoreState(CLzmaEncHandle pp); static unsigned g_STAT_OFFSET = 0; #endif -#define kLzmaMaxHistorySize ((UInt32)3 << 29) -/* #define kLzmaMaxHistorySize ((UInt32)7 << 29) */ +/* for good normalization speed we still reserve 256 MB before 4 GB range */ +#define kLzmaMaxHistorySize ((UInt32)15 << 28) #define kNumTopBits 24 #define kTopValue ((UInt32)1 << kNumTopBits) @@ -78,13 +79,12 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p) if (p->dictSize > p->reduceSize) { - unsigned i; - UInt32 reduceSize = (UInt32)p->reduceSize; - for (i = 11; i <= 30; i++) - { - if (reduceSize <= ((UInt32)2 << i)) { p->dictSize = ((UInt32)2 << i); break; } - if (reduceSize <= ((UInt32)3 << i)) { p->dictSize = ((UInt32)3 << i); break; } - } + UInt32 v = (UInt32)p->reduceSize; + const UInt32 kReduceMin = ((UInt32)1 << 12); + if (v < kReduceMin) + v = kReduceMin; + if (p->dictSize > v) + p->dictSize = v; } if (p->lc < 0) p->lc = 3; @@ -113,18 +113,85 @@ UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2) return props.dictSize; } -#if defined(_MSC_VER) && (_MSC_VER >= 1400) -/* BSR code is fast for some new CPUs */ -/* #define LZMA_LOG_BSR */ + +/* +x86/x64: + +BSR: + IF (SRC == 0) ZF = 1, DEST is undefined; + AMD : DEST is unchanged; + IF (SRC != 0) ZF = 0; DEST is index of top non-zero bit + BSR is slow in some processors + +LZCNT: + IF (SRC == 0) CF = 1, DEST is size_in_bits_of_register(src) (32 or 64) + IF (SRC != 0) CF = 0, DEST = num_lead_zero_bits + IF (DEST == 0) ZF = 1; + +LZCNT works only in new processors starting from Haswell. +if LZCNT is not supported by processor, then it's executed as BSR. +LZCNT can be faster than BSR, if supported. +*/ + +// #define LZMA_LOG_BSR + +#if defined(MY_CPU_ARM_OR_ARM64) /* || defined(MY_CPU_X86_OR_AMD64) */ + + #if (defined(__clang__) && (__clang_major__ >= 6)) \ + || (defined(__GNUC__) && (__GNUC__ >= 6)) + #define LZMA_LOG_BSR + #elif defined(_MSC_VER) && (_MSC_VER >= 1300) + // #if defined(MY_CPU_ARM_OR_ARM64) + #define LZMA_LOG_BSR + // #endif + #endif #endif +// #include + #ifdef LZMA_LOG_BSR -#define kDicLogSizeMaxCompress 32 +#if defined(__clang__) \ + || defined(__GNUC__) + +/* + C code: : (30 - __builtin_clz(x)) + gcc9/gcc10 for x64 /x86 : 30 - (bsr(x) xor 31) + clang10 for x64 : 31 + (bsr(x) xor -32) +*/ + + #define MY_clz(x) ((unsigned)__builtin_clz(x)) + // __lzcnt32 + // __builtin_ia32_lzcnt_u32 + +#else // #if defined(_MSC_VER) + + #ifdef MY_CPU_ARM_OR_ARM64 + + #define MY_clz _CountLeadingZeros + + #else // if defined(MY_CPU_X86_OR_AMD64) + + // #define MY_clz __lzcnt // we can use lzcnt (unsupported by old CPU) + // _BitScanReverse code is not optimal for some MSVC compilers + #define BSR2_RET(pos, res) { unsigned long zz; _BitScanReverse(&zz, (pos)); zz--; \ + res = (zz + zz) + (pos >> zz); } + + #endif // MY_CPU_X86_OR_AMD64 -#define BSR2_RET(pos, res) { unsigned long zz; _BitScanReverse(&zz, (pos)); res = (zz + zz) + ((pos >> (zz - 1)) & 1); } +#endif // _MSC_VER -static unsigned GetPosSlot1(UInt32 pos) + +#ifndef BSR2_RET + + #define BSR2_RET(pos, res) { unsigned zz = 30 - MY_clz(pos); \ + res = (zz + zz) + (pos >> zz); } + +#endif + + +unsigned GetPosSlot1(UInt32 pos); +unsigned GetPosSlot1(UInt32 pos) { unsigned res; BSR2_RET(pos, res); @@ -133,10 +200,10 @@ static unsigned GetPosSlot1(UInt32 pos) #define GetPosSlot2(pos, res) { BSR2_RET(pos, res); } #define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); } -#else -#define kNumLogBits (9 + sizeof(size_t) / 2) -/* #define kNumLogBits (11 + sizeof(size_t) / 8 * 3) */ +#else // ! LZMA_LOG_BSR + +#define kNumLogBits (11 + sizeof(size_t) / 8 * 3) #define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7) @@ -183,7 +250,7 @@ static void LzmaEnc_FastPosInit(Byte *g_FastPos) #define GetPosSlot2(pos, res) { BSR2_RET(pos, res); } #define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos & (kNumFullDistances - 1)]; else BSR2_RET(pos, res); } -#endif +#endif // LZMA_LOG_BSR #define LZMA_NUM_REPS 4 @@ -319,7 +386,7 @@ typedef UInt32 CProbPrice; typedef struct { void *matchFinderObj; - IMatchFinder matchFinder; + IMatchFinder2 matchFinder; unsigned optCur; unsigned optEnd; @@ -364,10 +431,14 @@ typedef struct // begin of CMatchFinderMt is used in LZ thread CMatchFinderMt matchFinderMt; // end of CMatchFinderMt is used in BT and HASH threads + // #else + // CMatchFinder matchFinderBase; #endif - CMatchFinder matchFinderBase; + + // we suppose that we have 8-bytes alignment after CMatchFinder + #ifndef _7ZIP_ST Byte pad[128]; #endif @@ -375,8 +446,10 @@ typedef struct // LZ thread CProbPrice ProbPrices[kBitModelTotal >> kNumMoveReducingBits]; - UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2 + 1]; + // we want {len , dist} pairs to be 8-bytes aligned in matches array + UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2]; + // we want 8-bytes alignment here UInt32 alignPrices[kAlignTableSize]; UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax]; UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances]; @@ -405,12 +478,19 @@ typedef struct CSaveState saveState; + // BoolInt mf_Failure; #ifndef _7ZIP_ST Byte pad2[128]; #endif } CLzmaEnc; +#define MFB (p->matchFinderBase) +/* +#ifndef _7ZIP_ST +#define MFB (p->matchFinderMt.MatchFinder) +#endif +*/ #define COPY_ARR(dest, src, arr) memcpy(dest->arr, src->arr, sizeof(src->arr)); @@ -475,11 +555,21 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) if (props.lc > LZMA_LC_MAX || props.lp > LZMA_LP_MAX - || props.pb > LZMA_PB_MAX - || props.dictSize > ((UInt64)1 << kDicLogSizeMaxCompress) - || props.dictSize > kLzmaMaxHistorySize) + || props.pb > LZMA_PB_MAX) return SZ_ERROR_PARAM; + + if (props.dictSize > kLzmaMaxHistorySize) + props.dictSize = kLzmaMaxHistorySize; + + #ifndef LZMA_LOG_BSR + { + const UInt64 dict64 = props.dictSize; + if (dict64 > ((UInt64)1 << kDicLogSizeMaxCompress)) + return SZ_ERROR_PARAM; + } + #endif + p->dictSize = props.dictSize; { unsigned fb = (unsigned)props.fb; @@ -494,7 +584,7 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) p->pb = (unsigned)props.pb; p->fastMode = (props.algo == 0); // p->_maxMode = True; - p->matchFinderBase.btMode = (Byte)(props.btMode ? 1 : 0); + MFB.btMode = (Byte)(props.btMode ? 1 : 0); { unsigned numHashBytes = 4; if (props.btMode) @@ -504,10 +594,10 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) } if (props.numHashBytes >= 5) numHashBytes = 5; - p->matchFinderBase.numHashBytes = numHashBytes; + MFB.numHashBytes = numHashBytes; } - p->matchFinderBase.cutValue = props.mc; + MFB.cutValue = props.mc; p->writeEndMark = (BoolInt)props.writeEndMark; @@ -531,7 +621,7 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2) void LzmaEnc_SetDataSize(CLzmaEncHandle pp, UInt64 expectedDataSiize) { CLzmaEnc *p = (CLzmaEnc *)pp; - p->matchFinderBase.expectedDataSize = expectedDataSiize; + MFB.expectedDataSize = expectedDataSiize; } @@ -1007,7 +1097,11 @@ static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes) p->additionalOffset++; p->numAvail = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); - numPairs = p->matchFinder.GetMatches(p->matchFinderObj, p->matches); + { + const UInt32 *d = p->matchFinder.GetMatches(p->matchFinderObj, p->matches); + // if (!d) { p->mf_Failure = True; *numPairsRes = 0; return 0; } + numPairs = (unsigned)(d - p->matches); + } *numPairsRes = numPairs; #ifdef SHOW_STAT @@ -1023,7 +1117,7 @@ static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes) if (numPairs == 0) return 0; { - unsigned len = p->matches[(size_t)numPairs - 2]; + const unsigned len = p->matches[(size_t)numPairs - 2]; if (len != p->numFastBytes) return len; { @@ -1033,7 +1127,7 @@ static unsigned ReadMatchDistances(CLzmaEnc *p, unsigned *numPairsRes) { const Byte *p1 = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; const Byte *p2 = p1 + len; - ptrdiff_t dif = (ptrdiff_t)-1 - (ptrdiff_t)p->matches[(size_t)numPairs - 1]; + const ptrdiff_t dif = (ptrdiff_t)-1 - (ptrdiff_t)p->matches[(size_t)numPairs - 1]; const Byte *lim = p1 + numAvail; for (; p2 != lim && *p2 == p2[dif]; p2++) {} @@ -1189,6 +1283,8 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) repLens[i] = len; if (len > repLens[repMaxIndex]) repMaxIndex = i; + if (len == LZMA_MATCH_LEN_MAX) // 21.03 : optimization + break; } if (repLens[repMaxIndex] >= p->numFastBytes) @@ -1201,10 +1297,12 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) } matches = p->matches; + #define MATCHES matches + // #define MATCHES p->matches if (mainLen >= p->numFastBytes) { - p->backRes = matches[(size_t)numPairs - 1] + LZMA_NUM_REPS; + p->backRes = MATCHES[(size_t)numPairs - 1] + LZMA_NUM_REPS; MOVE_POS(p, mainLen - 1) return mainLen; } @@ -1298,13 +1396,13 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) if (len < 2) len = 2; else - while (len > matches[offs]) + while (len > MATCHES[offs]) offs += 2; for (; ; len++) { COptimal *opt; - UInt32 dist = matches[(size_t)offs + 1]; + UInt32 dist = MATCHES[(size_t)offs + 1]; UInt32 price = normalMatchPrice + GET_PRICE_LEN(&p->lenEnc, posState, len); unsigned lenToPosState = GetLenToPosState(len); @@ -1328,7 +1426,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) opt->extra = 0; } - if (len == matches[offs]) + if (len == MATCHES[offs]) { offs += 2; if (offs == numPairs) @@ -1749,8 +1847,8 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) if (newLen > numAvail) { newLen = numAvail; - for (numPairs = 0; newLen > matches[numPairs]; numPairs += 2); - matches[numPairs] = (UInt32)newLen; + for (numPairs = 0; newLen > MATCHES[numPairs]; numPairs += 2); + MATCHES[numPairs] = (UInt32)newLen; numPairs += 2; } @@ -1769,9 +1867,9 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) } offs = 0; - while (startLen > matches[offs]) + while (startLen > MATCHES[offs]) offs += 2; - dist = matches[(size_t)offs + 1]; + dist = MATCHES[(size_t)offs + 1]; // if (dist >= kNumFullDistances) GetPosSlot2(dist, posSlot); @@ -1798,7 +1896,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) } } - if (len == matches[offs]) + if (len == MATCHES[offs]) { // if (p->_maxMode) { // MATCH : LIT : REP_0 @@ -1863,7 +1961,7 @@ static unsigned GetOptimum(CLzmaEnc *p, UInt32 position) offs += 2; if (offs == numPairs) break; - dist = matches[(size_t)offs + 1]; + dist = MATCHES[(size_t)offs + 1]; // if (dist >= kNumFullDistances) GetPosSlot2(dist, posSlot); } @@ -2081,8 +2179,23 @@ static SRes CheckErrors(CLzmaEnc *p) return p->result; if (p->rc.res != SZ_OK) p->result = SZ_ERROR_WRITE; - if (p->matchFinderBase.result != SZ_OK) + + #ifndef _7ZIP_ST + if ( + // p->mf_Failure || + (p->mtMode && + ( // p->matchFinderMt.failure_LZ_LZ || + p->matchFinderMt.failure_LZ_BT)) + ) + { + p->result = MY_HRES_ERROR__INTERNAL_ERROR; + // printf("\nCheckErrors p->matchFinderMt.failureLZ\n"); + } + #endif + + if (MFB.result != SZ_OK) p->result = SZ_ERROR_READ; + if (p->result != SZ_OK) p->finished = True; return p->result; @@ -2223,11 +2336,11 @@ MY_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p) static void LzmaEnc_Construct(CLzmaEnc *p) { RangeEnc_Construct(&p->rc); - MatchFinder_Construct(&p->matchFinderBase); + MatchFinder_Construct(&MFB); #ifndef _7ZIP_ST + p->matchFinderMt.MatchFinder = &MFB; MatchFinderMt_Construct(&p->matchFinderMt); - p->matchFinderMt.MatchFinder = &p->matchFinderBase; #endif { @@ -2243,7 +2356,6 @@ static void LzmaEnc_Construct(CLzmaEnc *p) LzmaEnc_InitPriceTables(p->ProbPrices); p->litProbs = NULL; p->saveState.litProbs = NULL; - } CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc) @@ -2269,7 +2381,7 @@ static void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBi MatchFinderMt_Destruct(&p->matchFinderMt, allocBig); #endif - MatchFinder_Free(&p->matchFinderBase, allocBig); + MatchFinder_Free(&MFB, allocBig); LzmaEnc_FreeLits(p, alloc); RangeEnc_Free(&p->rc, alloc); } @@ -2287,6 +2399,12 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa UInt32 nowPos32, startPos32; if (p->needInit) { + #ifndef _7ZIP_ST + if (p->mtMode) + { + RINOK(MatchFinderMt_InitMt(&p->matchFinderMt)); + } + #endif p->matchFinder.Init(p->matchFinderObj); p->needInit = 0; } @@ -2582,11 +2700,13 @@ static SRes LzmaEnc_CodeOneBlock(CLzmaEnc *p, UInt32 maxPackSize, UInt32 maxUnpa static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig) { UInt32 beforeSize = kNumOpts; + UInt32 dictSize; + if (!RangeEnc_Alloc(&p->rc, alloc)) return SZ_ERROR_MEM; #ifndef _7ZIP_ST - p->mtMode = (p->multiThread && !p->fastMode && (p->matchFinderBase.btMode != 0)); + p->mtMode = (p->multiThread && !p->fastMode && (MFB.btMode != 0)); #endif { @@ -2605,30 +2725,50 @@ static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, } } - p->matchFinderBase.bigHash = (Byte)(p->dictSize > kBigHashDicLimit ? 1 : 0); + MFB.bigHash = (Byte)(p->dictSize > kBigHashDicLimit ? 1 : 0); + + + dictSize = p->dictSize; + if (dictSize == ((UInt32)2 << 30) || + dictSize == ((UInt32)3 << 30)) + { + /* 21.03 : here we reduce the dictionary for 2 reasons: + 1) we don't want 32-bit back_distance matches in decoder for 2 GB dictionary. + 2) we want to elimate useless last MatchFinder_Normalize3() for corner cases, + where data size is aligned for 1 GB: 5/6/8 GB. + That reducing must be >= 1 for such corner cases. */ + dictSize -= 1; + } + + if (beforeSize + dictSize < keepWindowSize) + beforeSize = keepWindowSize - dictSize; - if (beforeSize + p->dictSize < keepWindowSize) - beforeSize = keepWindowSize - p->dictSize; + /* in worst case we can look ahead for + max(LZMA_MATCH_LEN_MAX, numFastBytes + 1 + numFastBytes) bytes. + we send larger value for (keepAfter) to MantchFinder_Create(): + (numFastBytes + LZMA_MATCH_LEN_MAX + 1) + */ #ifndef _7ZIP_ST if (p->mtMode) { - RINOK(MatchFinderMt_Create(&p->matchFinderMt, p->dictSize, beforeSize, p->numFastBytes, - LZMA_MATCH_LEN_MAX - + 1 /* 18.04 */ + RINOK(MatchFinderMt_Create(&p->matchFinderMt, dictSize, beforeSize, + p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 18.04 */ , allocBig)); p->matchFinderObj = &p->matchFinderMt; - p->matchFinderBase.bigHash = (Byte)( - (p->dictSize > kBigHashDicLimit && p->matchFinderBase.hashMask >= 0xFFFFFF) ? 1 : 0); + MFB.bigHash = (Byte)( + (p->dictSize > kBigHashDicLimit && MFB.hashMask >= 0xFFFFFF) ? 1 : 0); MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder); } else #endif { - if (!MatchFinder_Create(&p->matchFinderBase, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig)) + if (!MatchFinder_Create(&MFB, dictSize, beforeSize, + p->numFastBytes, LZMA_MATCH_LEN_MAX + 1 /* 21.03 */ + , allocBig)) return SZ_ERROR_MEM; - p->matchFinderObj = &p->matchFinderBase; - MatchFinder_CreateVTable(&p->matchFinderBase, &p->matchFinder); + p->matchFinderObj = &MFB; + MatchFinder_CreateVTable(&MFB, &p->matchFinder); } return SZ_OK; @@ -2700,6 +2840,8 @@ static void LzmaEnc_Init(CLzmaEnc *p) p->pbMask = ((unsigned)1 << p->pb) - 1; p->lpMask = ((UInt32)0x100 << p->lp) - ((unsigned)0x100 >> p->lc); + + // p->mf_Failure = False; } @@ -2742,7 +2884,7 @@ static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInS ISzAllocPtr alloc, ISzAllocPtr allocBig) { CLzmaEnc *p = (CLzmaEnc *)pp; - p->matchFinderBase.stream = inStream; + MFB.stream = inStream; p->needInit = 1; p->rc.outStream = outStream; return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig); @@ -2753,16 +2895,16 @@ SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, ISzAllocPtr alloc, ISzAllocPtr allocBig) { CLzmaEnc *p = (CLzmaEnc *)pp; - p->matchFinderBase.stream = inStream; + MFB.stream = inStream; p->needInit = 1; return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); } static void LzmaEnc_SetInputBuf(CLzmaEnc *p, const Byte *src, SizeT srcLen) { - p->matchFinderBase.directInput = 1; - p->matchFinderBase.bufferBase = (Byte *)src; - p->matchFinderBase.directInputRem = srcLen; + MFB.directInput = 1; + MFB.bufferBase = (Byte *)src; + MFB.directInputRem = srcLen; } SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen, @@ -2895,7 +3037,7 @@ static SRes LzmaEnc_Encode2(CLzmaEnc *p, ICompressProgress *progress) LzmaEnc_Finish(p); /* - if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&p->matchFinderBase)) + if (res == SZ_OK && !Inline_MatchFinder_IsFinishedOK(&MFB)) res = SZ_ERROR_FAIL; } */ @@ -2914,29 +3056,37 @@ SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream *outStream, ISeqInStream *i SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size) { - CLzmaEnc *p = (CLzmaEnc *)pp; - unsigned i; - UInt32 dictSize = p->dictSize; if (*size < LZMA_PROPS_SIZE) return SZ_ERROR_PARAM; *size = LZMA_PROPS_SIZE; - props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc); - - if (dictSize >= ((UInt32)1 << 22)) - { - const UInt32 kDictMask = ((UInt32)1 << 20) - 1; - if (dictSize < (UInt32)0xFFFFFFFF - kDictMask) - dictSize = (dictSize + kDictMask) & ~kDictMask; - } - else for (i = 11; i <= 30; i++) { - if (dictSize <= ((UInt32)2 << i)) { dictSize = ((UInt32)2 << i); break; } - if (dictSize <= ((UInt32)3 << i)) { dictSize = ((UInt32)3 << i); break; } - } + const CLzmaEnc *p = (const CLzmaEnc *)pp; + const UInt32 dictSize = p->dictSize; + UInt32 v; + props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc); + + // we write aligned dictionary value to properties for lzma decoder + if (dictSize >= ((UInt32)1 << 21)) + { + const UInt32 kDictMask = ((UInt32)1 << 20) - 1; + v = (dictSize + kDictMask) & ~kDictMask; + if (v < dictSize) + v = dictSize; + } + else + { + unsigned i = 11 * 2; + do + { + v = (UInt32)(2 + (i & 1)) << (i >> 1); + i++; + } + while (v < dictSize); + } - for (i = 0; i < 4; i++) - props[1 + i] = (Byte)(dictSize >> (8 * i)); - return SZ_OK; + SetUi32(props + 1, v); + return SZ_OK; + } } diff --git a/C/MtCoder.c b/C/MtCoder.c index 17e33182..7936c415 100644 --- a/C/MtCoder.c +++ b/C/MtCoder.c @@ -1,5 +1,5 @@ /* MtCoder.c -- Multi-thread Coder -2021-02-09 : Igor Pavlov : Public domain */ +2021-07-12 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -495,12 +495,7 @@ SRes MtCoder_Code(CMtCoder *p) { RINOK_THREAD(ArEvent_OptCreate_And_Reset(&p->readEvent)); - - if (Semaphore_IsCreated(&p->blocksSemaphore)) - { - RINOK_THREAD(Semaphore_Close(&p->blocksSemaphore)); - } - RINOK_THREAD(Semaphore_Create(&p->blocksSemaphore, numBlocksMax, numBlocksMax)); + RINOK_THREAD(Semaphore_OptCreateInit(&p->blocksSemaphore, numBlocksMax, numBlocksMax)); } for (i = 0; i < MTCODER__BLOCKS_MAX - 1; i++) diff --git a/C/Sha1.c b/C/Sha1.c index a34c13ed..9665b5b5 100644 --- a/C/Sha1.c +++ b/C/Sha1.c @@ -1,5 +1,5 @@ /* Sha1.c -- SHA-1 Hash -2021-04-01 : Igor Pavlov : Public domain +2021-07-13 : Igor Pavlov : Public domain This code is based on public domain code of Steve Reid from Wei Dai's Crypto++ library. */ #include "Precomp.h" @@ -34,7 +34,7 @@ This code is based on public domain code of Steve Reid from Wei Dai's Crypto++ l #endif #elif defined(MY_CPU_ARM_OR_ARM64) #ifdef _MSC_VER - #if _MSC_VER >= 1910 + #if _MSC_VER >= 1910 && _MSC_VER >= 1929 && _MSC_FULL_VER >= 192930037 #define _SHA_SUPPORTED #endif #elif defined(__clang__) @@ -435,7 +435,37 @@ void Sha1Prepare() #endif { // printf("\n========== HW SHA1 ======== \n"); - f = f_hw = Sha1_UpdateBlocks_HW; + #if defined(MY_CPU_ARM_OR_ARM64) && defined(_MSC_VER) + /* there was bug in MSVC compiler for ARM64 -O2 before version VS2019 16.10 (19.29.30037). + It generated incorrect SHA-1 code. + 21.03 : we test sha1-hardware code at runtime initialization */ + + #pragma message("== SHA1 code: MSC compiler : failure-check code was inserted") + + UInt32 state[5] = { 0, 1, 2, 3, 4 } ; + Byte data[64]; + unsigned i; + for (i = 0; i < sizeof(data); i += 2) + { + data[i ] = (Byte)(i); + data[i + 1] = (Byte)(i + 1); + } + + Sha1_UpdateBlocks_HW(state, data, sizeof(data) / 64); + + if ( state[0] != 0x9acd7297 + || state[1] != 0x4624d898 + || state[2] != 0x0bf079f0 + || state[3] != 0x031e61b3 + || state[4] != 0x8323fe20) + { + // printf("\n========== SHA-1 hardware version failure ======== \n"); + } + else + #endif + { + f = f_hw = Sha1_UpdateBlocks_HW; + } } g_FUNC_UPDATE_BLOCKS = f; g_FUNC_UPDATE_BLOCKS_HW = f_hw; diff --git a/C/Threads.c b/C/Threads.c index bd9553dc..7b4f5b5d 100644 --- a/C/Threads.c +++ b/C/Threads.c @@ -1,5 +1,5 @@ /* Threads.c -- multithreading library -2021-04-25 : Igor Pavlov : Public domain */ +2021-07-12 : Igor Pavlov : Public domain */ #include "Precomp.h" @@ -150,6 +150,17 @@ WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount) return HandleToWRes(*p); } +WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount) +{ + // if (Semaphore_IsCreated(p)) + { + WRes wres = Semaphore_Close(p); + if (wres != 0) + return wres; + } + return Semaphore_Create(p, initCount, maxCount); +} + static WRes Semaphore_Release(CSemaphore *p, LONG releaseCount, LONG *previousCount) { return BOOLToWRes(ReleaseSemaphore(*p, releaseCount, previousCount)); } WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num) @@ -158,7 +169,9 @@ WRes Semaphore_Release1(CSemaphore *p) { return Semaphore_ReleaseN(p, 1); } WRes CriticalSection_Init(CCriticalSection *p) { - /* InitializeCriticalSection can raise only STATUS_NO_MEMORY exception */ + /* InitializeCriticalSection() can raise exception: + Windows XP, 2003 : can raise a STATUS_NO_MEMORY exception + Windows Vista+ : no exceptions */ #ifdef _MSC_VER __try #endif @@ -167,7 +180,7 @@ WRes CriticalSection_Init(CCriticalSection *p) /* InitializeCriticalSectionAndSpinCount(p, 0); */ } #ifdef _MSC_VER - __except (EXCEPTION_EXECUTE_HANDLER) { return 1; } + __except (EXCEPTION_EXECUTE_HANDLER) { return ERROR_NOT_ENOUGH_MEMORY; } #endif return 0; } @@ -406,6 +419,27 @@ WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount) return 0; } + +WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount) +{ + if (Semaphore_IsCreated(p)) + { + /* + WRes wres = Semaphore_Close(p); + if (wres != 0) + return wres; + */ + if (initCount > maxCount || maxCount < 1) + return EINVAL; + // return EINVAL; // for debug + p->_count = initCount; + p->_maxCount = maxCount; + return 0; + } + return Semaphore_Create(p, initCount, maxCount); +} + + WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 releaseCount) { UInt32 newCount; diff --git a/C/Threads.h b/C/Threads.h index 6cb4aa4b..9e70ecab 100644 --- a/C/Threads.h +++ b/C/Threads.h @@ -1,5 +1,5 @@ /* Threads.h -- multithreading library -2021-04-25 : Igor Pavlov : Public domain */ +2021-07-12 : Igor Pavlov : Public domain */ #ifndef __7Z_THREADS_H #define __7Z_THREADS_H @@ -8,14 +8,18 @@ #include #else -#if !defined(__APPLE__) && !defined(_AIX) +#if defined(__linux__) +#if !defined(__APPLE__) && !defined(_AIX) && !defined(__ANDROID__) #ifndef _7ZIP_AFFINITY_DISABLE #define _7ZIP_AFFINITY_SUPPORTED +// #pragma message(" ==== _7ZIP_AFFINITY_SUPPORTED") // #define _GNU_SOURCE #endif #endif +#endif #include + #endif #include "7zTypes.h" @@ -122,6 +126,7 @@ typedef HANDLE CSemaphore; #define Semaphore_Close(p) HandlePtr_Close(p) #define Semaphore_Wait(p) Handle_WaitObject(*(p)) WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount); +WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount); WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num); WRes Semaphore_Release1(CSemaphore *p); @@ -172,6 +177,7 @@ typedef struct _CSemaphore #define Semaphore_IsCreated(p) ((p)->_created) WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount); +WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount); WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num); #define Semaphore_Release1(p) Semaphore_ReleaseN(p, 1) WRes Semaphore_Wait(CSemaphore *p); diff --git a/C/Util/LzmaLib/LzmaLib.dsp b/C/Util/LzmaLib/LzmaLib.dsp index 3421de83..6ce91dcd 100644 --- a/C/Util/LzmaLib/LzmaLib.dsp +++ b/C/Util/LzmaLib/LzmaLib.dsp @@ -136,6 +136,10 @@ SOURCE=..\..\LzFindMt.h # End Source File # Begin Source File +SOURCE=..\..\LzFindOpt.c +# End Source File +# Begin Source File + SOURCE=..\..\LzHash.h # End Source File # Begin Source File diff --git a/C/Util/LzmaLib/makefile b/C/Util/LzmaLib/makefile index 74103bb0..eb6ade33 100644 --- a/C/Util/LzmaLib/makefile +++ b/C/Util/LzmaLib/makefile @@ -13,6 +13,7 @@ C_OBJS = \ $O\Alloc.obj \ $O\LzFind.obj \ $O\LzFindMt.obj \ + $O\LzFindOpt.obj \ $O\LzmaDec.obj \ $O\LzmaEnc.obj \ $O\LzmaLib.obj \ diff --git a/C/var_clang_x64.mak b/C/var_clang_x64.mak index fefed51c..34e1b49c 100644 --- a/C/var_clang_x64.mak +++ b/C/var_clang_x64.mak @@ -9,4 +9,3 @@ USE_ASM=1 CC=$(CROSS_COMPILE)clang CXX=$(CROSS_COMPILE)clang++ USE_CLANG=1 - diff --git a/C/var_clang_x86.mak b/C/var_clang_x86.mak index 5f3c2d9c..bd2317c2 100644 --- a/C/var_clang_x86.mak +++ b/C/var_clang_x86.mak @@ -9,4 +9,3 @@ USE_ASM=1 CC=$(CROSS_COMPILE)clang CXX=$(CROSS_COMPILE)clang++ USE_CLANG=1 - diff --git a/C/var_gcc_x86.mak b/C/var_gcc_x86.mak index 288bf94b..f0718ec7 100644 --- a/C/var_gcc_x86.mak +++ b/C/var_gcc_x86.mak @@ -8,4 +8,3 @@ MY_ARCH=-m32 USE_ASM=1 CC=$(CROSS_COMPILE)gcc CXX=$(CROSS_COMPILE)g++ - diff --git a/C/warn_gcc.mak b/C/warn_gcc.mak index 5fb747dc..7aab7a44 100644 --- a/C/warn_gcc.mak +++ b/C/warn_gcc.mak @@ -49,5 +49,3 @@ CFLAGS_WARN_GCC_PPMD_UNALIGNED = \ CFLAGS_WARN = $(CFLAGS_WARN_GCC_9) \ # $(CFLAGS_WARN_GCC_PPMD_UNALIGNED) - - \ No newline at end of file diff --git a/CPP/7zip/7zip_gcc.mak b/CPP/7zip/7zip_gcc.mak index 122686ff..59074214 100644 --- a/CPP/7zip/7zip_gcc.mak +++ b/CPP/7zip/7zip_gcc.mak @@ -2,7 +2,7 @@ # USE_ASM = 1 # IS_X64 = 1 # MY_ARCH = - +# USE_ASM= MY_ARCH_2 = $(MY_ARCH) @@ -23,6 +23,8 @@ CFLAGS_BASE = -O2 $(CFLAGS_BASE_LIST) $(CFLAGS_WARN_WALL) $(CFLAGS_WARN) \ -DNDEBUG -D_REENTRANT -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE \ -fPIC +# -D_7ZIP_AFFINITY_DISABLE + ifdef SystemDrive IS_MINGW = 1 @@ -186,6 +188,8 @@ $O/Lang.o: ../../../Common/Lang.cpp $(CXX) $(CXXFLAGS) $< $O/ListFileUtils.o: ../../../Common/ListFileUtils.cpp $(CXX) $(CXXFLAGS) $< +$O/LzFindPrepare.o: ../../../Common/LzFindPrepare.cpp + $(CXX) $(CXXFLAGS) $< $O/MyMap.o: ../../../Common/MyMap.cpp $(CXX) $(CXXFLAGS) $< $O/MyString.o: ../../../Common/MyString.cpp @@ -1095,6 +1099,7 @@ $O/XzCrc64.o: ../../../../C/XzCrc64.c ifdef USE_ASM ifdef IS_X64 USE_X86_ASM=1 +USE_X64_ASM=1 else ifdef IS_X86 USE_X86_ASM=1 @@ -1126,6 +1131,13 @@ $O/AesOpt.o: ../../../../C/AesOpt.c $(CC) $(CFLAGS) $< endif +ifdef USE_X64_ASM +$O/LzFindOpt.o: ../../../../Asm/x86/LzFindOpt.asm + $(MY_ASM) $(AFLAGS) $< +else +$O/LzFindOpt.o: ../../../../C/LzFindOpt.c + $(CC) $(CFLAGS) $< +endif ifdef USE_LZMA_DEC_ASM diff --git a/CPP/7zip/Archive/7z/7zHandlerOut.cpp b/CPP/7zip/Archive/7z/7zHandlerOut.cpp index 54d94689..61f641fb 100644 --- a/CPP/7zip/Archive/7z/7zHandlerOut.cpp +++ b/CPP/7zip/Archive/7z/7zHandlerOut.cpp @@ -133,7 +133,7 @@ HRESULT CHandler::SetMainMethod( if (_numSolidBytesDefined) continue; - UInt32 dicSize; + UInt64 dicSize; switch (methodFull.Id) { case k_LZMA: diff --git a/CPP/7zip/Archive/Zip/ZipHeader.h b/CPP/7zip/Archive/Zip/ZipHeader.h index c1150b6e..332f32d6 100644 --- a/CPP/7zip/Archive/Zip/ZipHeader.h +++ b/CPP/7zip/Archive/Zip/ZipHeader.h @@ -89,6 +89,7 @@ namespace NFileHeader kZip64 = 0x01, kNTFS = 0x0A, kStrongEncrypt = 0x17, + kIzNtSecurityDescriptor = 0x4453, kUnixTime = 0x5455, kUnixExtra = 0x5855, kIzUnicodeComment = 0x6375, diff --git a/CPP/7zip/Archive/Zip/ZipItem.cpp b/CPP/7zip/Archive/Zip/ZipItem.cpp index 38921dce..be336485 100644 --- a/CPP/7zip/Archive/Zip/ZipItem.cpp +++ b/CPP/7zip/Archive/Zip/ZipItem.cpp @@ -37,6 +37,7 @@ static const CUInt32PCharPair g_ExtraTypes[] = { NExtraID::kUnix3Extra, "ux" }, { NExtraID::kIzUnicodeComment, "uc" }, { NExtraID::kIzUnicodeName, "up" }, + { NExtraID::kIzNtSecurityDescriptor, "SD" }, { NExtraID::kWzAES, "WzAES" }, { NExtraID::kApkAlign, "ApkAlign" } }; diff --git a/CPP/7zip/Bundles/Alone/Alone.dsp b/CPP/7zip/Bundles/Alone/Alone.dsp index d50b95a4..28273ade 100644 --- a/CPP/7zip/Bundles/Alone/Alone.dsp +++ b/CPP/7zip/Bundles/Alone/Alone.dsp @@ -44,7 +44,7 @@ RSC=rc.exe # PROP Ignore_Export_Lib 0 # PROP Target_Dir "" # ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c -# ADD CPP /nologo /Gz /MT /W4 /WX /GX /O1 /I "..\..\..\\" /D "NDEBUG" /D "_MBCS" /D "WIN32" /D "_CONSOLE" /D "WIN_LONG_PATH" /D "_7ZIP_LARGE_PAGES" /D "SUPPORT_DEVICE_FILE" /Yu"StdAfx.h" /FD /c +# ADD CPP /nologo /Gr /MT /W4 /WX /GX /O1 /I "..\..\..\\" /D "NDEBUG" /D "_MBCS" /D "WIN32" /D "_CONSOLE" /D "WIN_LONG_PATH" /D "_7ZIP_LARGE_PAGES" /D "SUPPORT_DEVICE_FILE" /FAcs /Yu"StdAfx.h" /FD /c # ADD BASE RSC /l 0x419 /d "NDEBUG" # ADD RSC /l 0x419 /d "NDEBUG" BSC32=bscmake.exe @@ -270,6 +270,10 @@ SOURCE=..\..\..\Common\CommandLineParser.h # End Source File # Begin Source File +SOURCE=..\..\..\Common\Common.h +# End Source File +# Begin Source File + SOURCE=..\..\..\Common\ComTry.h # End Source File # Begin Source File @@ -306,6 +310,18 @@ SOURCE=..\..\..\Common\ListFileUtils.h # End Source File # Begin Source File +SOURCE=..\..\..\Common\LzFindPrepare.cpp +# End Source File +# Begin Source File + +SOURCE=..\..\..\Common\MyBuffer.h +# End Source File +# Begin Source File + +SOURCE=..\..\..\Common\MyBuffer2.h +# End Source File +# Begin Source File + SOURCE=..\..\..\Common\MyCom.h # End Source File # Begin Source File @@ -322,6 +338,10 @@ SOURCE=..\..\..\Common\MyInitGuid.h # End Source File # Begin Source File +SOURCE=..\..\..\Common\MyLinux.h +# End Source File +# Begin Source File + SOURCE=..\..\..\Common\MyString.cpp # End Source File # Begin Source File @@ -330,6 +350,10 @@ SOURCE=..\..\..\Common\MyString.h # End Source File # Begin Source File +SOURCE=..\..\..\Common\MyTypes.h +# End Source File +# Begin Source File + SOURCE=..\..\..\Common\MyUnknown.h # End Source File # Begin Source File @@ -342,6 +366,10 @@ SOURCE=..\..\..\Common\MyVector.h # End Source File # Begin Source File +SOURCE=..\..\..\Common\MyWindows.h +# End Source File +# Begin Source File + SOURCE=..\..\..\Common\NewHandler.cpp # End Source File # Begin Source File @@ -478,6 +506,10 @@ SOURCE=..\..\..\Windows\FileLink.cpp # End Source File # Begin Source File +SOURCE=..\..\..\Windows\FileMapping.h +# End Source File +# Begin Source File + SOURCE=..\..\..\Windows\FileName.cpp # End Source File # Begin Source File @@ -506,6 +538,10 @@ SOURCE=..\..\..\Windows\MemoryLock.h # End Source File # Begin Source File +SOURCE=..\..\..\Windows\NtCheck.h +# End Source File +# Begin Source File + SOURCE=..\..\..\Windows\PropVariant.cpp # End Source File # Begin Source File @@ -538,6 +574,10 @@ SOURCE=..\..\..\Windows\Registry.h # End Source File # Begin Source File +SOURCE=..\..\..\Windows\SecurityUtils.h +# End Source File +# Begin Source File + SOURCE=..\..\..\Windows\Synchronization.cpp # End Source File # Begin Source File @@ -654,6 +694,10 @@ SOURCE=..\..\Common\MemBlocks.cpp # End Source File # Begin Source File +SOURCE=..\..\Common\MemBlocks.h +# End Source File +# Begin Source File + SOURCE=..\..\Common\MethodId.cpp # End Source File # Begin Source File @@ -769,6 +813,10 @@ SOURCE=..\..\Common\VirtThread.h # PROP Default_Filter "" # Begin Source File +SOURCE=..\..\Compress\BZip2Const.h +# End Source File +# Begin Source File + SOURCE=..\..\Compress\BZip2Crc.cpp # End Source File # Begin Source File @@ -909,10 +957,6 @@ SOURCE=..\..\Compress\DeflateEncoder.h # End Source File # Begin Source File -SOURCE=..\..\Compress\DeflateExtConst.h -# End Source File -# Begin Source File - SOURCE=..\..\Compress\DeflateRegister.cpp # End Source File # End Group @@ -1682,6 +1726,10 @@ SOURCE=..\..\UI\Common\DefaultName.h # End Source File # Begin Source File +SOURCE=..\..\UI\Common\DirItem.h +# End Source File +# Begin Source File + SOURCE=..\..\UI\Common\EnumDirItems.cpp # End Source File # Begin Source File @@ -1690,6 +1738,10 @@ SOURCE=..\..\UI\Common\EnumDirItems.h # End Source File # Begin Source File +SOURCE=..\..\UI\Common\ExitCode.h +# End Source File +# Begin Source File + SOURCE=..\..\UI\Common\Extract.cpp # End Source File # Begin Source File @@ -1706,6 +1758,10 @@ SOURCE=..\..\UI\Common\ExtractingFilePath.h # End Source File # Begin Source File +SOURCE=..\..\UI\Common\ExtractMode.h +# End Source File +# Begin Source File + SOURCE=..\..\UI\Common\HashCalc.cpp # End Source File # Begin Source File @@ -1714,6 +1770,10 @@ SOURCE=..\..\UI\Common\HashCalc.h # End Source File # Begin Source File +SOURCE=..\..\UI\Common\IFileExtractCallback.h +# End Source File +# Begin Source File + SOURCE=..\..\UI\Common\LoadCodecs.cpp # End Source File # Begin Source File @@ -1730,6 +1790,10 @@ SOURCE=..\..\UI\Common\OpenArchive.h # End Source File # Begin Source File +SOURCE=..\..\UI\Common\Property.h +# End Source File +# Begin Source File + SOURCE=..\..\UI\Common\PropIDUtils.cpp # End Source File # Begin Source File @@ -1916,6 +1980,10 @@ SOURCE=..\..\Crypto\RandGen.h # End Source File # Begin Source File +SOURCE=..\..\Crypto\Sha1Cls.h +# End Source File +# Begin Source File + SOURCE=..\..\Crypto\WzAes.cpp # End Source File # Begin Source File @@ -1963,6 +2031,10 @@ SOURCE=..\..\ICoder.h # End Source File # Begin Source File +SOURCE=..\..\IDecl.h +# End Source File +# Begin Source File + SOURCE=..\..\IMyUnknown.h # End Source File # Begin Source File @@ -1979,6 +2051,10 @@ SOURCE=..\..\IStream.h # End Source File # Begin Source File +SOURCE=..\..\MyVersion.h +# End Source File +# Begin Source File + SOURCE=..\..\PropID.h # End Source File # End Group @@ -2227,6 +2303,10 @@ SOURCE=..\..\..\..\C\7zTypes.h # End Source File # Begin Source File +SOURCE=..\..\..\..\C\7zVersion.h +# End Source File +# Begin Source File + SOURCE=..\..\..\..\C\Aes.c !IF "$(CFG)" == "Alone - Win32 Release" @@ -2444,6 +2524,10 @@ SOURCE=..\..\..\..\C\BwtSort.h # End Source File # Begin Source File +SOURCE=..\..\..\..\C\Compiler.h +# End Source File +# Begin Source File + SOURCE=..\..\..\..\C\CpuArch.c !IF "$(CFG)" == "Alone - Win32 Release" @@ -2597,6 +2681,30 @@ SOURCE=..\..\..\..\C\LzFindMt.h # End Source File # Begin Source File +SOURCE=..\..\..\..\C\LzFindOpt.c + +!IF "$(CFG)" == "Alone - Win32 Release" + +# ADD CPP /O2 +# SUBTRACT CPP /YX /Yc /Yu + +!ELSEIF "$(CFG)" == "Alone - Win32 Debug" + +# SUBTRACT CPP /YX /Yc /Yu + +!ELSEIF "$(CFG)" == "Alone - Win32 ReleaseU" + +# SUBTRACT CPP /YX /Yc /Yu + +!ELSEIF "$(CFG)" == "Alone - Win32 DebugU" + +# SUBTRACT CPP /YX /Yc /Yu + +!ENDIF + +# End Source File +# Begin Source File + SOURCE=..\..\..\..\C\LzHash.h # End Source File # Begin Source File diff --git a/CPP/7zip/Bundles/Alone/makefile b/CPP/7zip/Bundles/Alone/makefile index 95b950e7..616a57ac 100644 --- a/CPP/7zip/Bundles/Alone/makefile +++ b/CPP/7zip/Bundles/Alone/makefile @@ -1,6 +1,7 @@ PROG = 7za.exe # USE_C_AES = 1 # USE_C_SHA = 1 +# USE_C_LZFINDOPT = 1 MY_CONSOLE = 1 CFLAGS = $(CFLAGS) -DZSTD_MULTITHREAD @@ -15,6 +16,7 @@ COMMON_OBJS = \ $O\CrcReg.obj \ $O\IntToString.obj \ $O\ListFileUtils.obj \ + $O\LzFindPrepare.obj \ $O\NewHandler.obj \ $O\StdInStream.obj \ $O\StdOutStream.obj \ @@ -352,6 +354,7 @@ FASTLZMA2_OBJS = \ !include "../../Aes.mak" !include "../../Crc.mak" !include "../../Crc64.mak" +!include "../../LzFindOpt.mak" !include "../../LzmaDec.mak" !include "../../Sha1.mak" !include "../../Sha256.mak" diff --git a/CPP/7zip/Bundles/Alone/makefile.gcc b/CPP/7zip/Bundles/Alone/makefile.gcc index e63fc332..182e9a7c 100644 --- a/CPP/7zip/Bundles/Alone/makefile.gcc +++ b/CPP/7zip/Bundles/Alone/makefile.gcc @@ -110,6 +110,7 @@ COMMON_OBJS = \ $O/CrcReg.o \ $O/IntToString.o \ $O/ListFileUtils.o \ + $O/LzFindPrepare.o \ $O/MyString.o \ $O/NewHandler.o \ $O/StdInStream.o \ @@ -283,6 +284,7 @@ C_OBJS = \ $O/Delta.o \ $O/HuffEnc.o \ $O/LzFind.o \ + $O/LzFindOpt.o \ $O/Lzma2Dec.o \ $O/Lzma2DecMt.o \ $O/Lzma2Enc.o \ diff --git a/CPP/7zip/Bundles/Alone7z/Alone.dsp b/CPP/7zip/Bundles/Alone7z/Alone.dsp index a46526bf..a15a5bfa 100644 --- a/CPP/7zip/Bundles/Alone7z/Alone.dsp +++ b/CPP/7zip/Bundles/Alone7z/Alone.dsp @@ -44,7 +44,7 @@ RSC=rc.exe # PROP Ignore_Export_Lib 0 # PROP Target_Dir "" # ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c -# ADD CPP /nologo /Gr /MT /W4 /WX /GX /O1 /I "..\..\..\\" /D "NDEBUG" /D "_MBCS" /D "WIN32" /D "_CONSOLE" /D "_7ZIP_LARGE_PAGES" /D "SUPPORT_DEVICE_FILE" /FAc /Yu"StdAfx.h" /FD /c +# ADD CPP /nologo /Gr /MT /W4 /WX /GX /O1 /I "..\..\..\\" /D "NDEBUG" /D "_MBCS" /D "WIN32" /D "_CONSOLE" /D "_7ZIP_LARGE_PAGES" /D "SUPPORT_DEVICE_FILE" /FAcs /Yu"StdAfx.h" /FD /c # ADD BASE RSC /l 0x419 /d "NDEBUG" # ADD RSC /l 0x419 /d "NDEBUG" BSC32=bscmake.exe @@ -306,6 +306,10 @@ SOURCE=..\..\..\Common\ListFileUtils.h # End Source File # Begin Source File +SOURCE=..\..\..\Common\LzFindPrepare.cpp +# End Source File +# Begin Source File + SOURCE=..\..\..\Common\MyCom.h # End Source File # Begin Source File @@ -1681,6 +1685,30 @@ SOURCE=..\..\..\..\C\LzFindMt.h # End Source File # Begin Source File +SOURCE=..\..\..\..\C\LzFindOpt.c + +!IF "$(CFG)" == "Alone - Win32 Release" + +# ADD CPP /O2 +# SUBTRACT CPP /YX /Yc /Yu + +!ELSEIF "$(CFG)" == "Alone - Win32 Debug" + +# SUBTRACT CPP /YX /Yc /Yu + +!ELSEIF "$(CFG)" == "Alone - Win32 ReleaseU" + +# SUBTRACT CPP /YX /Yc /Yu + +!ELSEIF "$(CFG)" == "Alone - Win32 DebugU" + +# SUBTRACT CPP /YX /Yc /Yu + +!ENDIF + +# End Source File +# Begin Source File + SOURCE=..\..\..\..\C\Compress\Lz\LzHash.h # End Source File # Begin Source File diff --git a/CPP/7zip/Bundles/Alone7z/makefile b/CPP/7zip/Bundles/Alone7z/makefile index 803277ab..0a68e141 100644 --- a/CPP/7zip/Bundles/Alone7z/makefile +++ b/CPP/7zip/Bundles/Alone7z/makefile @@ -10,6 +10,7 @@ COMMON_OBJS = \ $O\CrcReg.obj \ $O\IntToString.obj \ $O\ListFileUtils.obj \ + $O\LzFindPrepare.obj \ $O\NewHandler.obj \ $O\StdInStream.obj \ $O\StdOutStream.obj \ @@ -152,6 +153,7 @@ C_OBJS = \ !include "../../Aes.mak" !include "../../Crc.mak" !include "../../Crc64.mak" +!include "../../LzFindOpt.mak" !include "../../LzmaDec.mak" !include "../../Sha256.mak" diff --git a/CPP/7zip/Bundles/Alone7z/makefile.gcc b/CPP/7zip/Bundles/Alone7z/makefile.gcc index b1f69734..c1d6ac50 100644 --- a/CPP/7zip/Bundles/Alone7z/makefile.gcc +++ b/CPP/7zip/Bundles/Alone7z/makefile.gcc @@ -27,6 +27,7 @@ else MT_OBJS = \ $O/LzFindMt.o \ + $O/LzFindOpt.o \ $O/StreamBinder.o \ $O/Synchronization.o \ $O/VirtThread.o \ @@ -110,6 +111,7 @@ COMMON_OBJS = \ $O/CrcReg.o \ $O/IntToString.o \ $O/ListFileUtils.o \ + $O/LzFindPrepare.o \ $O/MyString.o \ $O/MyVector.o \ $O/NewHandler.o \ diff --git a/CPP/7zip/Bundles/Codec_flzma2/makefile b/CPP/7zip/Bundles/Codec_flzma2/makefile index 3a544e6c..891003c6 100644 --- a/CPP/7zip/Bundles/Codec_flzma2/makefile +++ b/CPP/7zip/Bundles/Codec_flzma2/makefile @@ -15,6 +15,7 @@ COMPRESS_OBJS = \ C_OBJS = \ $O\Alloc.obj \ + $O\CpuArch.obj \ $O\Threads.obj \ $O\LzFind.obj \ $O\LzFindMt.obj \ @@ -48,4 +49,6 @@ FASTLZMA2_OBJS = \ $O\range_enc.obj \ $O\util.obj \ +!include "../../Crc.mak" +!include "../../LzFindOpt.mak" !include "../../7zip.mak" diff --git a/CPP/7zip/Bundles/Fm/FM.dsp b/CPP/7zip/Bundles/Fm/FM.dsp index cccf1be5..86d788f0 100644 --- a/CPP/7zip/Bundles/Fm/FM.dsp +++ b/CPP/7zip/Bundles/Fm/FM.dsp @@ -1016,6 +1016,11 @@ SOURCE=..\..\..\..\C\LzFindMt.h # End Source File # Begin Source File +SOURCE=..\..\..\..\C\LzFindOpt.c +# SUBTRACT CPP /YX /Yc /Yu +# End Source File +# Begin Source File + SOURCE=..\..\..\..\C\Lzma2Dec.c # SUBTRACT CPP /YX /Yc /Yu # End Source File @@ -1514,6 +1519,10 @@ SOURCE=..\..\..\Common\ListFileUtils.h # End Source File # Begin Source File +SOURCE=..\..\..\Common\LzFindPrepare.cpp +# End Source File +# Begin Source File + SOURCE=..\..\..\Common\MyBuffer.h # End Source File # Begin Source File diff --git a/CPP/7zip/Bundles/Format7z/makefile b/CPP/7zip/Bundles/Format7z/makefile index 0d98eb08..566cf971 100644 --- a/CPP/7zip/Bundles/Format7z/makefile +++ b/CPP/7zip/Bundles/Format7z/makefile @@ -6,6 +6,7 @@ COMMON_OBJS = \ $O\CRC.obj \ $O\CrcReg.obj \ $O\IntToString.obj \ + $O\LzFindPrepare.obj \ $O\NewHandler.obj \ $O\MyString.obj \ $O\Sha256Reg.obj \ @@ -137,6 +138,7 @@ C_OBJS = \ !include "../../Aes.mak" !include "../../Crc.mak" +!include "../../LzFindOpt.mak" !include "../../LzmaDec.mak" !include "../../Sha256.mak" diff --git a/CPP/7zip/Bundles/Format7zF/Arc.mak b/CPP/7zip/Bundles/Format7zF/Arc.mak index 35149374..67ccc866 100644 --- a/CPP/7zip/Bundles/Format7zF/Arc.mak +++ b/CPP/7zip/Bundles/Format7zF/Arc.mak @@ -4,6 +4,7 @@ COMMON_OBJS = \ $O\CrcReg.obj \ $O\DynLimBuf.obj \ $O\IntToString.obj \ + $O\LzFindPrepare.obj \ $O\Md2Reg.obj \ $O\Md4Reg.obj \ $O\Md5Reg.obj \ @@ -305,6 +306,7 @@ C_OBJS = \ !include "../../Aes.mak" !include "../../Crc.mak" !include "../../Crc64.mak" +!include "../../LzFindOpt.mak" !include "../../LzmaDec.mak" !include "../../Sha1.mak" !include "../../Sha256.mak" diff --git a/CPP/7zip/Bundles/Format7zF/Arc_gcc.mak b/CPP/7zip/Bundles/Format7zF/Arc_gcc.mak index c3dbf349..73711d3a 100644 --- a/CPP/7zip/Bundles/Format7zF/Arc_gcc.mak +++ b/CPP/7zip/Bundles/Format7zF/Arc_gcc.mak @@ -18,6 +18,7 @@ else MT_OBJS = \ $O/LzFindMt.o \ + $O/LzFindOpt.o \ $O/StreamBinder.o \ $O/Synchronization.o \ $O/VirtThread.o \ @@ -35,6 +36,7 @@ COMMON_OBJS = \ $O/CrcReg.o \ $O/DynLimBuf.o \ $O/IntToString.o \ + $O/LzFindPrepare.o \ $O/MyMap.o \ $O/MyString.o \ $O/MyVector.o \ diff --git a/CPP/7zip/Bundles/Format7zF/Format7z.dsp b/CPP/7zip/Bundles/Format7zF/Format7z.dsp index 67883ebc..36ac6042 100644 --- a/CPP/7zip/Bundles/Format7zF/Format7z.dsp +++ b/CPP/7zip/Bundles/Format7zF/Format7z.dsp @@ -267,6 +267,10 @@ SOURCE=..\..\..\Common\IntToString.h # End Source File # Begin Source File +SOURCE=..\..\..\Common\LzFindPrepare.cpp +# End Source File +# Begin Source File + SOURCE=..\..\..\Common\MyBuffer.h # End Source File # Begin Source File @@ -1869,6 +1873,22 @@ SOURCE=..\..\..\..\C\LzFindMt.h # End Source File # Begin Source File +SOURCE=..\..\..\..\C\LzFindOpt.c + +!IF "$(CFG)" == "7z - Win32 Release" + +# ADD CPP /O2 +# SUBTRACT CPP /YX /Yc /Yu + +!ELSEIF "$(CFG)" == "7z - Win32 Debug" + +# SUBTRACT CPP /YX /Yc /Yu + +!ENDIF + +# End Source File +# Begin Source File + SOURCE=..\..\..\..\C\LzHash.h # End Source File # Begin Source File diff --git a/CPP/7zip/Bundles/Format7zR/makefile b/CPP/7zip/Bundles/Format7zR/makefile index 5c05abd1..c2237152 100644 --- a/CPP/7zip/Bundles/Format7zR/makefile +++ b/CPP/7zip/Bundles/Format7zR/makefile @@ -7,6 +7,7 @@ COMMON_OBJS = \ $O\CRC.obj \ $O\CrcReg.obj \ $O\IntToString.obj \ + $O\LzFindPrepare.obj \ $O\NewHandler.obj \ $O\MyString.obj \ $O\StringConvert.obj \ @@ -111,6 +112,7 @@ C_OBJS = \ $O\Threads.obj \ !include "../../Crc.mak" +!include "../../LzFindOpt.mak" !include "../../LzmaDec.mak" !include "../../7zip.mak" diff --git a/CPP/7zip/Bundles/LzmaCon/LzmaCon.dsp b/CPP/7zip/Bundles/LzmaCon/LzmaCon.dsp index 907b23e0..d7326efc 100644 --- a/CPP/7zip/Bundles/LzmaCon/LzmaCon.dsp +++ b/CPP/7zip/Bundles/LzmaCon/LzmaCon.dsp @@ -212,6 +212,10 @@ SOURCE=..\..\..\Common\IntToString.h # End Source File # Begin Source File +SOURCE=..\..\..\Common\LzFindPrepare.cpp +# End Source File +# Begin Source File + SOURCE=..\..\..\Common\MyCom.h # End Source File # Begin Source File @@ -316,6 +320,14 @@ SOURCE=..\..\Common\MethodProps.h # End Source File # Begin Source File +SOURCE=..\..\Common\StreamObjects.cpp +# End Source File +# Begin Source File + +SOURCE=..\..\Common\StreamObjects.h +# End Source File +# Begin Source File + SOURCE=..\..\Common\StreamUtils.cpp # End Source File # Begin Source File @@ -441,6 +453,11 @@ SOURCE=..\..\..\..\C\LzFindMt.h # End Source File # Begin Source File +SOURCE=..\..\..\..\C\LzFindOpt.c +# SUBTRACT CPP /YX /Yc /Yu +# End Source File +# Begin Source File + SOURCE=..\..\..\..\C\LzHash.h # End Source File # Begin Source File diff --git a/CPP/7zip/Bundles/LzmaCon/makefile b/CPP/7zip/Bundles/LzmaCon/makefile index e87becc4..5e53327a 100644 --- a/CPP/7zip/Bundles/LzmaCon/makefile +++ b/CPP/7zip/Bundles/LzmaCon/makefile @@ -14,6 +14,7 @@ COMMON_OBJS = \ $O\CRC.obj \ $O\CrcReg.obj \ $O\IntToString.obj \ + $O\LzFindPrepare.obj \ $O\MyString.obj \ $O\MyVector.obj \ $O\NewHandler.obj \ @@ -33,6 +34,7 @@ WIN_OBJS = \ $O\FileStreams.obj \ $O\FilterCoder.obj \ $O\MethodProps.obj \ + $O\StreamObjects.obj \ $O\StreamUtils.obj \ UI_COMMON_OBJS = \ @@ -55,6 +57,7 @@ C_OBJS = \ $O\Threads.obj \ !include "../../Crc.mak" +!include "../../LzFindOpt.mak" !include "../../LzmaDec.mak" !include "../../7zip.mak" diff --git a/CPP/7zip/Bundles/LzmaCon/makefile.gcc b/CPP/7zip/Bundles/LzmaCon/makefile.gcc index f9ccfce4..58c204af 100644 --- a/CPP/7zip/Bundles/LzmaCon/makefile.gcc +++ b/CPP/7zip/Bundles/LzmaCon/makefile.gcc @@ -18,6 +18,7 @@ else MT_OBJS = \ $O/LzFindMt.o \ + $O/LzFindOpt.o \ $O/Synchronization.o \ $O/Threads.o \ @@ -55,6 +56,7 @@ COMMON_OBJS = \ $O/CRC.o \ $O/CrcReg.o \ $O/IntToString.o \ + $O/LzFindPrepare.o \ $O/MyString.o \ $O/MyVector.o \ $O/NewHandler.o \ @@ -83,6 +85,7 @@ CONSOLE_OBJS = \ $O/FileStreams.o \ $O/FilterCoder.o \ $O/MethodProps.o \ + $O/StreamObjects.o \ $O/StreamUtils.o \ C_OBJS = \ diff --git a/CPP/7zip/Common/MemBlocks.cpp b/CPP/7zip/Common/MemBlocks.cpp index f351abbd..9b0652c6 100644 --- a/CPP/7zip/Common/MemBlocks.cpp +++ b/CPP/7zip/Common/MemBlocks.cpp @@ -67,7 +67,6 @@ HRes CMemBlockManagerMt::AllocateSpace(size_t numBlocks, size_t numNoLockBlocks) return E_OUTOFMEMORY; if (!CMemBlockManager::AllocateSpace_bool(numBlocks)) return E_OUTOFMEMORY; - Semaphore.Close(); // we need (maxCount = 1), if we want to create non-use empty Semaphore if (maxCount == 0) maxCount = 1; @@ -75,12 +74,13 @@ HRes CMemBlockManagerMt::AllocateSpace(size_t numBlocks, size_t numNoLockBlocks) // printf("\n Synchro.Create() \n"); WRes wres; #ifndef _WIN32 + Semaphore.Close(); wres = Synchro.Create(); if (wres != 0) return HRESULT_FROM_WIN32(wres); wres = Semaphore.Create(&Synchro, (UInt32)numLockBlocks, maxCount); #else - wres = Semaphore.Create((UInt32)numLockBlocks, maxCount); + wres = Semaphore.OptCreateInit((UInt32)numLockBlocks, maxCount); #endif return HRESULT_FROM_WIN32(wres); diff --git a/CPP/7zip/Common/MethodProps.cpp b/CPP/7zip/Common/MethodProps.cpp index 48ba06b7..21b7c322 100644 --- a/CPP/7zip/Common/MethodProps.cpp +++ b/CPP/7zip/Common/MethodProps.cpp @@ -99,41 +99,65 @@ HRESULT ParseMtProp(const UString &name, const PROPVARIANT &prop, UInt32 default } +static HRESULT SetLogSizeProp(UInt64 number, NCOM::CPropVariant &destProp) +{ + if (number >= 64) + return E_INVALIDARG; + UInt32 val32; + if (number < 32) + val32 = (UInt32)1 << (unsigned)number; + /* + else if (number == 32 && reduce_4GB_to_32bits) + val32 = (UInt32)(Int32)-1; + */ + else + { + destProp = (UInt64)((UInt64)1 << (unsigned)number); + return S_OK; + } + destProp = (UInt32)val32; + return S_OK; +} + + static HRESULT StringToDictSize(const UString &s, NCOM::CPropVariant &destProp) { + /* if (reduce_4GB_to_32bits) we can reduce (4 GiB) property to (4 GiB - 1). + to fit the value to UInt32 for clients that do not support 64-bit values */ + const wchar_t *end; - UInt32 number = ConvertStringToUInt32(s, &end); - unsigned numDigits = (unsigned)(end - s.Ptr()); + const UInt64 number = ConvertStringToUInt64(s, &end); + const unsigned numDigits = (unsigned)(end - s.Ptr()); if (numDigits == 0 || s.Len() > numDigits + 1) return E_INVALIDARG; if (s.Len() == numDigits) - { - if (number >= 64) - return E_INVALIDARG; - if (number < 32) - destProp = (UInt32)((UInt32)1 << (unsigned)number); - else - destProp = (UInt64)((UInt64)1 << (unsigned)number); - return S_OK; - } + return SetLogSizeProp(number, destProp); unsigned numBits; switch (MyCharLower_Ascii(s[numDigits])) { - case 'b': destProp = number; return S_OK; + case 'b': numBits = 0; break; case 'k': numBits = 10; break; case 'm': numBits = 20; break; case 'g': numBits = 30; break; default: return E_INVALIDARG; } - if (number < ((UInt32)1 << (32 - numBits))) - destProp = (UInt32)(number << numBits); + const UInt64 range4g = ((UInt64)1 << (32 - numBits)); + if (number < range4g) + destProp = (UInt32)((UInt32)number << numBits); + /* + else if (number == range4g && reduce_4GB_to_32bits) + destProp = (UInt32)(Int32)-1; + */ + else if (numBits == 0) + destProp = (UInt64)number; + else if (number >= ((UInt64)1 << (64 - numBits))) + return E_INVALIDARG; else destProp = (UInt64)((UInt64)number << numBits); - return S_OK; } @@ -141,16 +165,8 @@ static HRESULT StringToDictSize(const UString &s, NCOM::CPropVariant &destProp) static HRESULT PROPVARIANT_to_DictSize(const PROPVARIANT &prop, NCOM::CPropVariant &destProp) { if (prop.vt == VT_UI4) - { - UInt32 v = prop.ulVal; - if (v >= 64) - return E_INVALIDARG; - if (v < 32) - destProp = (UInt32)((UInt32)1 << (unsigned)v); - else - destProp = (UInt64)((UInt64)1 << (unsigned)v); - return S_OK; - } + return SetLogSizeProp(prop.ulVal, destProp); + if (prop.vt == VT_BSTR) { UString s; diff --git a/CPP/7zip/Common/MethodProps.h b/CPP/7zip/Common/MethodProps.h index e0519b16..bd9283f0 100644 --- a/CPP/7zip/Common/MethodProps.h +++ b/CPP/7zip/Common/MethodProps.h @@ -64,23 +64,34 @@ class CMethodProps: public CProps unsigned GetLevel() const; int Get_NumThreads() const { - int i = FindProp(NCoderPropID::kNumThreads); + const int i = FindProp(NCoderPropID::kNumThreads); if (i >= 0) - if (Props[(unsigned)i].Value.vt == VT_UI4) - return (int)Props[(unsigned)i].Value.ulVal; + { + const NWindows::NCOM::CPropVariant &val = Props[(unsigned)i].Value; + if (val.vt == VT_UI4) + return (int)val.ulVal; + } return -1; } - bool Get_DicSize(UInt32 &res) const + bool Get_DicSize(UInt64 &res) const { res = 0; - int i = FindProp(NCoderPropID::kDictionarySize); + const int i = FindProp(NCoderPropID::kDictionarySize); if (i >= 0) - if (Props[(unsigned)i].Value.vt == VT_UI4) + { + const NWindows::NCOM::CPropVariant &val = Props[(unsigned)i].Value; + if (val.vt == VT_UI4) { - res = Props[(unsigned)i].Value.ulVal; + res = val.ulVal; return true; } + if (val.vt == VT_UI8) + { + res = val.uhVal.QuadPart; + return true; + } + } return false; } @@ -90,23 +101,26 @@ class CMethodProps: public CProps { int i = FindProp(NCoderPropID::kAlgorithm); if (i >= 0) - if (Props[(unsigned)i].Value.vt == VT_UI4) - return Props[(unsigned)i].Value.ulVal; + { + const NWindows::NCOM::CPropVariant &val = Props[(unsigned)i].Value; + if (val.vt == VT_UI4) + return val.ulVal; + } return GetLevel() >= 5 ? 1 : 0; } - UInt32 Get_Lzma_DicSize() const + UInt64 Get_Lzma_DicSize() const { - int i = FindProp(NCoderPropID::kDictionarySize); - if (i >= 0) - if (Props[(unsigned)i].Value.vt == VT_UI4) - return Props[(unsigned)i].Value.ulVal; - unsigned level = GetLevel(); - return - ( level <= 3 ? (1 << (level * 2 + 16)) : - ( level <= 6 ? (1 << (level + 19)) : - ( level <= 7 ? (1 << 25) : (1 << 26) + UInt64 v; + if (Get_DicSize(v)) + return v; + const unsigned level = GetLevel(); + const UInt32 dictSize = + ( level <= 3 ? ((UInt32)1 << (level * 2 + 16)) : + ( level <= 6 ? ((UInt32)1 << (level + 19)) : + ( level <= 7 ? ((UInt32)1 << 25) : ((UInt32)1 << 26) ))); + return dictSize; } bool Get_Lzma_Eos() const @@ -152,7 +166,7 @@ class CMethodProps: public CProps UInt64 GetProp_BlockSize(PROPID id) const { - int i = FindProp(id); + const int i = FindProp(id); if (i >= 0) { const NWindows::NCOM::CPropVariant &val = Props[(unsigned)i].Value; @@ -176,7 +190,7 @@ class CMethodProps: public CProps } const UInt32 kMinSize = (UInt32)1 << 20; const UInt32 kMaxSize = (UInt32)1 << 28; - UInt32 dictSize = Get_Lzma_DicSize(); + const UInt64 dictSize = Get_Lzma_DicSize(); UInt64 blockSize = (UInt64)dictSize << 2; if (blockSize < kMinSize) blockSize = kMinSize; if (blockSize > kMaxSize) blockSize = kMaxSize; @@ -204,29 +218,38 @@ class CMethodProps: public CProps UInt32 Get_BZip2_BlockSize() const { - int i = FindProp(NCoderPropID::kDictionarySize); + const int i = FindProp(NCoderPropID::kDictionarySize); if (i >= 0) - if (Props[(unsigned)i].Value.vt == VT_UI4) + { + const NWindows::NCOM::CPropVariant &val = Props[(unsigned)i].Value; + if (val.vt == VT_UI4) { - UInt32 blockSize = Props[(unsigned)i].Value.ulVal; + UInt32 blockSize = val.ulVal; const UInt32 kDicSizeMin = 100000; const UInt32 kDicSizeMax = 900000; if (blockSize < kDicSizeMin) blockSize = kDicSizeMin; if (blockSize > kDicSizeMax) blockSize = kDicSizeMax; return blockSize; } - unsigned level = GetLevel(); + } + const unsigned level = GetLevel(); return 100000 * (level >= 5 ? 9 : (level >= 1 ? level * 2 - 1: 1)); } - UInt32 Get_Ppmd_MemSize() const + UInt64 Get_Ppmd_MemSize() const { - int i = FindProp(NCoderPropID::kUsedMemorySize); + const int i = FindProp(NCoderPropID::kUsedMemorySize); if (i >= 0) - if (Props[(unsigned)i].Value.vt == VT_UI4) - return Props[(unsigned)i].Value.ulVal; - unsigned level = GetLevel(); - return ((UInt32)1 << (level + 19)); + { + const NWindows::NCOM::CPropVariant &val = Props[(unsigned)i].Value; + if (val.vt == VT_UI4) + return val.ulVal; + if (val.vt == VT_UI8) + return val.uhVal.QuadPart; + } + const unsigned level = GetLevel(); + const UInt32 mem = (UInt32)1 << (level + 19); + return mem; } void AddProp_Level(UInt32 level) diff --git a/CPP/7zip/Common/OffsetStream.cpp b/CPP/7zip/Common/OffsetStream.cpp index b3e710f9..b16124c2 100644 --- a/CPP/7zip/Common/OffsetStream.cpp +++ b/CPP/7zip/Common/OffsetStream.cpp @@ -20,13 +20,13 @@ STDMETHODIMP COffsetOutStream::Write(const void *data, UInt32 size, UInt32 *proc STDMETHODIMP COffsetOutStream::Seek(Int64 offset, UInt32 seekOrigin, UInt64 *newPosition) { - UInt64 absoluteNewPosition; if (seekOrigin == STREAM_SEEK_SET) { if (offset < 0) return HRESULT_WIN32_ERROR_NEGATIVE_SEEK; offset += _offset; } + UInt64 absoluteNewPosition = 0; // =0 for gcc-10 HRESULT result = _stream->Seek(offset, seekOrigin, &absoluteNewPosition); if (newPosition) *newPosition = absoluteNewPosition - _offset; diff --git a/CPP/7zip/Common/StreamBinder.cpp b/CPP/7zip/Common/StreamBinder.cpp index fbf2a0de..6b6e0e58 100644 --- a/CPP/7zip/Common/StreamBinder.cpp +++ b/CPP/7zip/Common/StreamBinder.cpp @@ -52,9 +52,9 @@ HRESULT CStreamBinder::Create_ReInit() RINOK(Event__Create_or_Reset(_canRead_Event)); // RINOK(Event__Create_or_Reset(_canWrite_Event)); - _canWrite_Semaphore.Close(); + // _canWrite_Semaphore.Close(); // we need at least 3 items of maxCount: 1 for normal unlock in Read(), 2 items for unlock in CloseRead_CallOnce() - _canWrite_Semaphore.Create(0, 3); + _canWrite_Semaphore.OptCreateInit(0, 3); // _readingWasClosed = false; _readingWasClosed2 = false; diff --git a/CPP/7zip/Compress/DeflateDecoder.cpp b/CPP/7zip/Compress/DeflateDecoder.cpp index e34c2c0c..0206ce8d 100644 --- a/CPP/7zip/Compress/DeflateDecoder.cpp +++ b/CPP/7zip/Compress/DeflateDecoder.cpp @@ -274,15 +274,24 @@ HRESULT CCoder::CodeSpec(UInt32 curSize, bool finishInputStream, UInt32 inputPro sym = m_DistDecoder.Decode(&m_InBitStream); if (sym >= _numDistLevels) return S_FALSE; - UInt32 distance = kDistStart[sym] + m_InBitStream.ReadBits(kDistDirectBits[sym]); - if (!m_OutWindowStream.CopyBlock(distance, locLen)) + sym = kDistStart[sym] + m_InBitStream.ReadBits(kDistDirectBits[sym]); + /* + if (sym >= 4) + { + // sym &= 31; + const unsigned numDirectBits = (unsigned)(((sym >> 1) - 1)); + sym = (2 | (sym & 1)) << numDirectBits; + sym += m_InBitStream.ReadBits(numDirectBits); + } + */ + if (!m_OutWindowStream.CopyBlock(sym, locLen)) return S_FALSE; curSize -= locLen; len -= locLen; if (len != 0) { _remainLen = (Int32)len; - _rep0 = distance; + _rep0 = sym; break; } } diff --git a/CPP/7zip/Compress/DeflateEncoder.cpp b/CPP/7zip/Compress/DeflateEncoder.cpp index fb24c6b0..8168ec78 100644 --- a/CPP/7zip/Compress/DeflateEncoder.cpp +++ b/CPP/7zip/Compress/DeflateEncoder.cpp @@ -44,7 +44,9 @@ static const Byte kNoLenStatPrice = 11; static const Byte kNoPosStatPrice = 6; static Byte g_LenSlots[kNumLenSymbolsMax]; -static Byte g_FastPos[1 << 9]; + +#define kNumLogBits 9 // do not change it +static Byte g_FastPos[1 << kNumLogBits]; class CFastPosInit { @@ -60,7 +62,7 @@ class CFastPosInit g_LenSlots[c] = (Byte)i; } - const unsigned kFastSlots = 18; + const unsigned kFastSlots = kNumLogBits * 2; unsigned c = 0; for (Byte slotFast = 0; slotFast < kFastSlots; slotFast++) { @@ -73,14 +75,24 @@ class CFastPosInit static CFastPosInit g_FastPosInit; - inline UInt32 GetPosSlot(UInt32 pos) { + /* if (pos < 0x200) return g_FastPos[pos]; return g_FastPos[pos >> 8] + 16; + */ + // const unsigned zz = (pos < ((UInt32)1 << (kNumLogBits))) ? 0 : 8; + /* + const unsigned zz = (kNumLogBits - 1) & + ((UInt32)0 - (((((UInt32)1 << kNumLogBits) - 1) - pos) >> 31)); + */ + const unsigned zz = (kNumLogBits - 1) & + (((((UInt32)1 << kNumLogBits) - 1) - pos) >> (31 - 3)); + return g_FastPos[pos >> zz] + (zz * 2); } + void CEncProps::Normalize() { int level = Level; @@ -253,13 +265,13 @@ NO_INLINE void CCoder::GetMatches() UInt32 distanceTmp[kMatchMaxLen * 2 + 3]; - UInt32 numPairs = (_btMode) ? + const UInt32 numPairs = (UInt32)((_btMode ? Bt3Zip_MatchFinder_GetMatches(&_lzInWindow, distanceTmp): - Hc3Zip_MatchFinder_GetMatches(&_lzInWindow, distanceTmp); + Hc3Zip_MatchFinder_GetMatches(&_lzInWindow, distanceTmp)) - distanceTmp); *m_MatchDistances = (UInt16)numPairs; - if (numPairs > 0) + if (numPairs != 0) { UInt32 i; for (i = 0; i < numPairs; i += 2) diff --git a/CPP/7zip/Compress/LzmaEncoder.cpp b/CPP/7zip/Compress/LzmaEncoder.cpp index 5a6c6831..4b3acd30 100644 --- a/CPP/7zip/Compress/LzmaEncoder.cpp +++ b/CPP/7zip/Compress/LzmaEncoder.cpp @@ -112,12 +112,34 @@ HRESULT SetLzmaProp(PROPID propID, const PROPVARIANT &prop, CLzmaEncProps &ep) return S_OK; } + if (propID == NCoderPropID::kDictionarySize) + { + if (prop.vt == VT_UI8) + { + // 21.03 : we support 64-bit VT_UI8 for dictionary and (dict == 4 GiB) + const UInt64 v = prop.uhVal.QuadPart; + if (v > ((UInt64)1 << 32)) + return E_INVALIDARG; + UInt32 dict; + if (v == ((UInt64)1 << 32)) + dict = (UInt32)(Int32)-1; + else + dict = (UInt32)v; + ep.dictSize = dict; + return S_OK; + } + } + if (prop.vt != VT_UI4) return E_INVALIDARG; UInt32 v = prop.ulVal; switch (propID) { - case NCoderPropID::kDefaultProp: if (v > 31) return E_INVALIDARG; ep.dictSize = (UInt32)1 << (unsigned)v; break; + case NCoderPropID::kDefaultProp: + if (v > 32) + return E_INVALIDARG; + ep.dictSize = (v == 32) ? (UInt32)(Int32)-1 : (UInt32)1 << (unsigned)v; + break; SET_PROP_32(kLevel, level) SET_PROP_32(kNumFastBytes, fb) SET_PROP_32U(kMatchFinderCycles, mc) diff --git a/CPP/7zip/Compress/PpmdEncoder.cpp b/CPP/7zip/Compress/PpmdEncoder.cpp index e2754772..d41d2aca 100644 --- a/CPP/7zip/Compress/PpmdEncoder.cpp +++ b/CPP/7zip/Compress/PpmdEncoder.cpp @@ -59,7 +59,7 @@ STDMETHODIMP CEncoder::SetCoderProperties(const PROPID *propIDs, const PROPVARIA for (UInt32 i = 0; i < numProps; i++) { const PROPVARIANT &prop = coderProps[i]; - PROPID propID = propIDs[i]; + const PROPID propID = propIDs[i]; if (propID > NCoderPropID::kReduceSize) continue; if (propID == NCoderPropID::kReduceSize) @@ -68,16 +68,50 @@ STDMETHODIMP CEncoder::SetCoderProperties(const PROPID *propIDs, const PROPVARIA props.ReduceSize = (UInt32)prop.uhVal.QuadPart; continue; } + + if (propID == NCoderPropID::kUsedMemorySize) + { + // here we have selected (4 GiB - 1 KiB) as replacement for (4 GiB) MEM_SIZE. + const UInt32 kPpmd_Default_4g = (UInt32)0 - ((UInt32)1 << 10); + UInt32 v; + if (prop.vt == VT_UI8) + { + // 21.03 : we support 64-bit values (for 4 GiB value) + const UInt64 v64 = prop.uhVal.QuadPart; + if (v64 > ((UInt64)1 << 32)) + return E_INVALIDARG; + if (v64 == ((UInt64)1 << 32)) + v = kPpmd_Default_4g; + else + v = (UInt32)v64; + } + else if (prop.vt == VT_UI4) + v = (UInt32)prop.ulVal; + else + return E_INVALIDARG; + if (v > PPMD7_MAX_MEM_SIZE) + v = kPpmd_Default_4g; + + /* here we restrict MEM_SIZE for Encoder. + It's for better performance of encoding and decoding. + The Decoder still supports more MEM_SIZE values. */ + if (v < ((UInt32)1 << 16) || (v & 3) != 0) + return E_INVALIDARG; + // if (v < PPMD7_MIN_MEM_SIZE) return E_INVALIDARG; // (1 << 11) + /* + Supported MEM_SIZE range : + [ (1 << 11) , 0xFFFFFFFF - 12 * 3 ] - current 7-Zip's Ppmd7 constants + [ 1824 , 0xFFFFFFFF ] - real limits of Ppmd7 code + */ + props.MemSize = v; + continue; + } + if (prop.vt != VT_UI4) return E_INVALIDARG; UInt32 v = (UInt32)prop.ulVal; switch (propID) { - case NCoderPropID::kUsedMemorySize: - if (v < (1 << 16) || v > PPMD7_MAX_MEM_SIZE || (v & 3) != 0) - return E_INVALIDARG; - props.MemSize = v; - break; case NCoderPropID::kOrder: if (v < 2 || v > 32) return E_INVALIDARG; diff --git a/CPP/7zip/LzFindOpt.mak b/CPP/7zip/LzFindOpt.mak new file mode 100644 index 00000000..169e10f0 --- /dev/null +++ b/CPP/7zip/LzFindOpt.mak @@ -0,0 +1,7 @@ +!IF defined(USE_C_LZFINDOPT) || "$(PLATFORM)" != "x64" +C_OBJS = $(C_OBJS) \ + $O\LzFindOpt.obj +!ELSE +ASM_OBJS = $(ASM_OBJS) \ + $O\LzFindOpt.obj +!ENDIF diff --git a/CPP/7zip/UI/Common/ArchiveCommandLine.cpp b/CPP/7zip/UI/Common/ArchiveCommandLine.cpp index bd1655a1..ca5778c6 100644 --- a/CPP/7zip/UI/Common/ArchiveCommandLine.cpp +++ b/CPP/7zip/UI/Common/ArchiveCommandLine.cpp @@ -1405,11 +1405,13 @@ void CArcCmdLineParser::Parse2(CArcCmdLineOptions &options) else if (options.Command.CommandType == NCommandType::kBenchmark) { options.NumIterations = 1; + options.NumIterations_Defined = false; if (curCommandIndex < numNonSwitchStrings) { if (!StringToUInt32(nonSwitchStrings[curCommandIndex], options.NumIterations)) throw CArcCmdLineException("Incorrect number of benchmark iterations", nonSwitchStrings[curCommandIndex]); curCommandIndex++; + options.NumIterations_Defined = true; } } else if (options.Command.CommandType == NCommandType::kHash) diff --git a/CPP/7zip/UI/Common/ArchiveCommandLine.h b/CPP/7zip/UI/Common/ArchiveCommandLine.h index 150541e6..1e488d8c 100644 --- a/CPP/7zip/UI/Common/ArchiveCommandLine.h +++ b/CPP/7zip/UI/Common/ArchiveCommandLine.h @@ -109,6 +109,7 @@ struct CArcCmdLineOptions // Benchmark UInt32 NumIterations; + bool NumIterations_Defined; CArcCmdLineOptions(): HelpMode(false), diff --git a/CPP/7zip/UI/Common/Bench.cpp b/CPP/7zip/UI/Common/Bench.cpp index 4e957331..fb4c1726 100644 --- a/CPP/7zip/UI/Common/Bench.cpp +++ b/CPP/7zip/UI/Common/Bench.cpp @@ -51,6 +51,7 @@ #include "../../../Common/StringToInt.h" #include "../../Common/MethodProps.h" +#include "../../Common/StreamObjects.h" #include "../../Common/StreamUtils.h" #include "Bench.h" @@ -87,20 +88,30 @@ static void SetComplexCommandsMs(UInt32 complexInMs, } } +// const UInt64 kBenchmarkUsageMult = 1000000; // for debug +static const unsigned kBenchmarkUsageMultBits = 16; +static const UInt64 kBenchmarkUsageMult = 1 << kBenchmarkUsageMultBits; + +UInt64 Benchmark_GetUsage_Percents(UInt64 usage) +{ + return (100 * usage + kBenchmarkUsageMult / 2) / kBenchmarkUsageMult; +} + static const unsigned kNumHashDictBits = 17; static const UInt32 kFilterUnpackSize = (47 << 10); // + 5; // for test -static const unsigned kOldLzmaDictBits = 30; +static const unsigned kOldLzmaDictBits = 32; -static const UInt32 kAdditionalSize = (1 << 16); +// static const size_t kAdditionalSize = (size_t)1 << 32; // for debug +static const size_t kAdditionalSize = (size_t)1 << 16; static const UInt32 kCompressedAdditionalSize = (1 << 10); -static const UInt32 kMaxLzmaPropSize = 5; +static const UInt32 kMaxMethodPropSize = (1 << 6); #define ALLOC_WITH_HRESULT(_buffer_, _size_) \ { (_buffer_)->Alloc(_size_); \ - if (!(_buffer_)->IsAllocated()) return E_OUTOFMEMORY; } + if (_size_ && !(_buffer_)->IsAllocated()) return E_OUTOFMEMORY; } class CBaseRandomGenerator @@ -143,7 +154,7 @@ static void RandGen(Byte *buf, size_t size) } -class CBenchRandomGenerator: public CAlignedBuffer +class CBenchRandomGenerator: public CMidAlignedBuffer { static UInt32 GetVal(UInt32 &res, unsigned numBits) { @@ -172,14 +183,21 @@ class CBenchRandomGenerator: public CAlignedBuffer void GenerateLz(unsigned dictBits, UInt32 salt) { CBaseRandomGenerator rg(salt); - UInt32 pos = 0; - UInt32 rep0 = 1; + size_t pos = 0; + size_t rep0 = 1; const size_t bufSize = Size(); Byte *buf = (Byte *)*this; unsigned posBits = 1; + + // printf("\n dictBits = %d\n", (UInt32)dictBits); + // printf("\n bufSize = 0x%p\n", (const void *)bufSize); while (pos < bufSize) { + /* + if (pos >= ((UInt32)1 << 31)) + printf(" %x\n", pos); + */ UInt32 r = rg.GetRnd(); if (GetVal(r, 1) == 0 || pos < 1024) buf[pos++] = (Byte)(r & 0xFF); @@ -192,7 +210,7 @@ class CBenchRandomGenerator: public CAlignedBuffer { len += GetLen(r); - while (((UInt32)1 << posBits) < pos) + while (((size_t)1 << posBits) < pos) posBits++; unsigned numBitsMax = dictBits; @@ -206,11 +224,12 @@ class CBenchRandomGenerator: public CAlignedBuffer for (;;) { - UInt32 ppp = GetVal(r, numLogBits) + kAddBits; + const UInt32 ppp = GetVal(r, numLogBits) + kAddBits; r = rg.GetRnd(); if (ppp > numBitsMax) continue; - rep0 = GetVal(r, ppp); + // rep0 = GetVal(r, ppp); + rep0 = r & (((size_t)1 << ppp) - 1); if (rep0 < pos) break; r = rg.GetRnd(); @@ -218,10 +237,11 @@ class CBenchRandomGenerator: public CAlignedBuffer rep0++; } + // len *= 300; // for debug { - UInt32 rem = (UInt32)bufSize - pos; + const size_t rem = bufSize - pos; if (len > rem) - len = rem; + len = (UInt32)rem; } Byte *dest = buf + pos; const Byte *src = dest - rep0; @@ -230,6 +250,7 @@ class CBenchRandomGenerator: public CAlignedBuffer *dest++ = *src++; } } + // printf("\n CRC = %x\n", CrcCalc(buf, bufSize)); } }; @@ -274,7 +295,7 @@ STDMETHODIMP CBenchmarkInStream::Read(void *data, UInt32 size, UInt32 *processed class CBenchmarkOutStream: public ISequentialOutStream, - public CAlignedBuffer, + public CMidAlignedBuffer, public CMyUnknownImp { // bool _overflow; @@ -304,6 +325,8 @@ class CBenchmarkOutStream: Crc = CrcUpdate(Crc, data, size); } + size_t GetPos() const { return Pos; } + // void Print() { printf("\n%8d %8d\n", (unsigned)BufferSize, (unsigned)Pos); } MY_UNKNOWN_IMP @@ -571,6 +594,7 @@ class CBenchProgressInfo: STDMETHOD(SetRatioInfo)(const UInt64 *inSize, const UInt64 *outSize); }; + STDMETHODIMP CBenchProgressInfo::SetRatioInfo(const UInt64 *inSize, const UInt64 *outSize) { HRESULT res = Status->GetResult(); @@ -578,6 +602,22 @@ STDMETHODIMP CBenchProgressInfo::SetRatioInfo(const UInt64 *inSize, const UInt64 return res; if (!Callback) return res; + + /* + static UInt64 inSizePrev = 0; + static UInt64 outSizePrev = 0; + UInt64 delta1 = 0, delta2 = 0, val1 = 0, val2 = 0; + if (inSize) { val1 = *inSize; delta1 = val1 - inSizePrev; inSizePrev = val1; } + if (outSize) { val2 = *outSize; delta2 = val2 - outSizePrev; outSizePrev = val2; } + UInt64 percents = delta2 * 1000; + if (delta1 != 0) + percents /= delta1; + printf("=== %7d %7d %7d %7d ratio = %4d\n", + (unsigned)(val1 >> 10), (unsigned)(delta1 >> 10), + (unsigned)(val2 >> 10), (unsigned)(delta2 >> 10), + (unsigned)percents); + */ + CBenchInfo info; SetFinishTime(info); if (Status->EncodeMode) @@ -599,18 +639,26 @@ STDMETHODIMP CBenchProgressInfo::SetRatioInfo(const UInt64 *inSize, const UInt64 static const unsigned kSubBits = 8; -static UInt32 GetLogSize(UInt32 size) +static UInt32 GetLogSize(UInt64 size) { - for (unsigned i = kSubBits; i < 32; i++) - for (UInt32 j = 0; j < (1 << kSubBits); j++) - if (size <= (((UInt32)1) << i) + (j << (i - kSubBits))) - return (i << kSubBits) + j; - return (32 << kSubBits); + if (size <= 1) + return 0; + unsigned i; + for (i = 2; i < 64; i++) + if (size < ((UInt64)1 << i)) + break; + i--; + UInt32 v; + if (i <= kSubBits) + v = (UInt32)(size) << (kSubBits - i); + else + v = (UInt32)(size >> (i - kSubBits)); + return ((UInt32)i << kSubBits) + (v & (((UInt32)1 << kSubBits) - 1)); } static void NormalizeVals(UInt64 &v1, UInt64 &v2) { - while (v1 > 1000000) + while (v1 >= ((UInt32)1 << ((64 - kBenchmarkUsageMultBits) / 2))) { v1 >>= 1; v2 >>= 1; @@ -629,7 +677,7 @@ UInt64 CBenchInfo::GetUsage() const userFreq = 1; if (globalTime == 0) globalTime = 1; - return userTime * globalFreq * 1000000 / userFreq / globalTime; + return userTime * globalFreq * kBenchmarkUsageMult / userFreq / globalTime; } UInt64 CBenchInfo::GetRatingPerUsage(UInt64 rating) const @@ -659,9 +707,9 @@ static UInt64 MyMultDiv64(UInt64 value, UInt64 elapsedTime, UInt64 freq) return value * freq / elTime; } -UInt64 CBenchInfo::GetSpeed(UInt64 numCommands) const +UInt64 CBenchInfo::GetSpeed(UInt64 numUnits) const { - return MyMultDiv64(numCommands, GlobalTime, GlobalFreq); + return MyMultDiv64(numUnits, GlobalTime, GlobalFreq); } struct CBenchProps @@ -694,7 +742,7 @@ struct CBenchProps return (packSize * DecComplexCompr + unpackSize * DecComplexUnc); } - UInt64 GetCompressRating(UInt32 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size); + UInt64 GetCompressRating(UInt64 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size); UInt64 GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations); }; @@ -706,38 +754,50 @@ void CBenchProps::SetLzmaCompexity() LzmaRatingMode = true; } -UInt64 CBenchProps::GetCompressRating(UInt32 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size) +UInt64 CBenchProps::GetCompressRating(UInt64 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size) { if (dictSize < (1 << kBenchMinDicLogSize)) dictSize = (1 << kBenchMinDicLogSize); UInt64 encComplex = EncComplex; if (LzmaRatingMode) { - UInt64 t = GetLogSize(dictSize) - (kBenchMinDicLogSize << kSubBits); + /* + for (UInt64 uu = 0; uu < (UInt64)0xf << 60;) + { + unsigned rr = GetLogSize(uu); + printf("\n%16I64x , log = %4x", uu, rr); + uu += 1; + uu += uu / 50; + } + */ + // throw 1; + const UInt32 t = GetLogSize(dictSize) - (kBenchMinDicLogSize << kSubBits); encComplex = 870 + ((t * t * 5) >> (2 * kSubBits)); } - UInt64 numCommands = (UInt64)size * encComplex; + const UInt64 numCommands = (UInt64)size * encComplex; return MyMultDiv64(numCommands, elapsedTime, freq); } UInt64 CBenchProps::GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations) { - UInt64 numCommands = (inSize * DecComplexCompr + outSize * DecComplexUnc) * numIterations; + const UInt64 numCommands = (inSize * DecComplexCompr + outSize * DecComplexUnc) * numIterations; return MyMultDiv64(numCommands, elapsedTime, freq); } -UInt64 GetCompressRating(UInt32 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size) + + +UInt64 CBenchInfo::GetRating_LzmaEnc(UInt64 dictSize) const { CBenchProps props; props.SetLzmaCompexity(); - return props.GetCompressRating(dictSize, elapsedTime, freq, size); + return props.GetCompressRating(dictSize, GlobalTime, GlobalFreq, UnpackSize * NumIterations); } -UInt64 GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations) +UInt64 CBenchInfo::GetRating_LzmaDec() const { CBenchProps props; props.SetLzmaCompexity(); - return props.GetDecompressRating(elapsedTime, freq, outSize, inSize, numIterations); + return props.GetDecompressRating(GlobalTime, GlobalFreq, UnpackSize, PackSize, NumIterations); } @@ -880,10 +940,14 @@ struct CBenchSyncCommon -struct CEncoderInfo; +class CEncoderInfo; -struct CEncoderInfo +class CEncoderInfo { + CLASS_NO_COPY(CEncoderInfo) + +public: + #ifndef _7ZIP_ST NWindows::CThread thread[2]; NSynchronization::CManualResetEvent ReadyEvent; @@ -949,8 +1013,11 @@ struct CEncoderInfo const Byte *fileData; CBenchRandomGenerator rg; - CAlignedBuffer rgCopy; // it must be 16-byte aligned !!! - CBenchmarkOutStream *propStreamSpec; + CMidAlignedBuffer rgCopy; // it must be 16-byte aligned !!! + + // CBenchmarkOutStream *propStreamSpec; + Byte propsData[kMaxMethodPropSize]; + CBufPtrSeqOutStream *propStreamSpec; CMyComPtr propStream; unsigned generateDictBits; @@ -1055,15 +1122,21 @@ struct CEncoderInfo }; + + +static size_t GetBenchCompressedSize(size_t bufferSize) +{ + return kCompressedAdditionalSize + bufferSize + bufferSize / 16; + // kBufferSize / 2; +} + + HRESULT CEncoderInfo::Generate() { const COneMethodInfo &method = _method; // we need extra space, if input data is already compressed - const size_t kCompressedBufferSize = - kCompressedAdditionalSize + - kBufferSize + kBufferSize / 16; - // kBufferSize / 2; + const size_t kCompressedBufferSize = GetBenchCompressedSize(kBufferSize); if (kCompressedBufferSize < kBufferSize) return E_FAIL; @@ -1078,7 +1151,13 @@ HRESULT CEncoderInfo::Generate() if (generateDictBits == 0) rg.GenerateSimpleRandom(Salt); else + { + if (generateDictBits >= sizeof(size_t) * 8 + && kBufferSize > ((size_t)1 << (sizeof(size_t) * 8 - 1))) + return E_INVALIDARG; rg.GenerateLz(generateDictBits, Salt); + // return E_ABORT; // for debug + } // printf("\n%d\n ", GetTickCount() - ttt); crc = CrcCalc((const Byte *)rg, rg.Size()); @@ -1101,11 +1180,12 @@ HRESULT CEncoderInfo::Generate() if (!propStream) { - propStreamSpec = new CBenchmarkOutStream; + propStreamSpec = new CBufPtrSeqOutStream; // CBenchmarkOutStream; propStream = propStreamSpec; } - ALLOC_WITH_HRESULT(propStreamSpec, kMaxLzmaPropSize); - propStreamSpec->Init(true, false); + // ALLOC_WITH_HRESULT_2(propStreamSpec, kMaxMethodPropSize); + // propStreamSpec->Init(true, false); + propStreamSpec->Init(propsData, sizeof(propsData)); CMyComPtr coder; @@ -1206,8 +1286,12 @@ static void My_FilterBench(ICompressFilter *filter, Byte *data, size_t size) HRESULT CEncoderInfo::Encode() { + // printf("\nCEncoderInfo::Generate\n"); + RINOK(Generate()); + // printf("\n2222\n"); + #ifndef _7ZIP_ST if (Common) { @@ -1359,7 +1443,7 @@ HRESULT CEncoderInfo::Decode(UInt32 decoderIndex) CMyComPtr setDecProps; coder.QueryInterface(IID_ICompressSetDecoderProperties2, &setDecProps); - if (!setDecProps && propStreamSpec->Pos != 0) + if (!setDecProps && propStreamSpec->GetPos() != 0) return E_FAIL; CCrcOutStream *crcOutStreamSpec = new CCrcOutStream; @@ -1393,7 +1477,10 @@ HRESULT CEncoderInfo::Decode(UInt32 decoderIndex) if (setDecProps) { - RINOK(setDecProps->SetDecoderProperties2((const Byte *)*propStreamSpec, (UInt32)propStreamSpec->Pos)); + RINOK(setDecProps->SetDecoderProperties2( + /* (const Byte *)*propStreamSpec, */ + propsData, + (UInt32)propStreamSpec->GetPos())); } { @@ -1918,16 +2005,36 @@ static HRESULT MethodBench( info.PackSize += encoder.compressedSize; } - RINOK(callback->SetDecodeResult(info, false)); + // RINOK(callback->SetDecodeResult(info, false)); // why we called before 21.03 ?? RINOK(callback->SetDecodeResult(info, true)); return S_OK; } -static inline UInt64 GetLZMAUsage(bool multiThread, UInt32 dictionary) + +static inline UInt64 GetDictSizeFromLog(unsigned dictSizeLog) { - UInt32 hs = dictionary - 1; + /* + if (dictSizeLog < 32) + return (UInt32)1 << dictSizeLog; + else + return (UInt32)(Int32)-1; + */ + return (UInt64)1 << dictSizeLog; +} + + +// it's limit of current LZMA implementation that can be changed later +#define kLzmaMaxDictSize ((UInt32)15 << 28) + +static inline UInt64 GetLZMAUsage(bool multiThread, int btMode, UInt64 dict) +{ + if (dict == 0) + dict = 1; + if (dict > kLzmaMaxDictSize) + dict = kLzmaMaxDictSize; + UInt32 hs = (UInt32)dict - 1; hs |= (hs >> 1); hs |= (hs >> 2); hs |= (hs >> 4); @@ -1937,29 +2044,59 @@ static inline UInt64 GetLZMAUsage(bool multiThread, UInt32 dictionary) if (hs > (1 << 24)) hs >>= 1; hs++; - return ((hs + (1 << 16)) + (UInt64)dictionary * 2) * 4 + (UInt64)dictionary * 3 / 2 + + hs += (1 << 16); + + const UInt32 kBlockSizeMax = (UInt32)0 - (UInt32)(1 << 16); + UInt64 blockSize = (UInt64)dict + (1 << 16) + + (multiThread ? (1 << 20) : 0); + blockSize += (blockSize >> (blockSize < ((UInt32)1 << 30) ? 1 : 2)); + if (blockSize >= kBlockSizeMax) + blockSize = kBlockSizeMax; + + UInt64 son = (UInt64)dict; + if (btMode) + son *= 2; + const UInt64 v = (hs + son) * 4 + blockSize + (1 << 20) + (multiThread ? (6 << 20) : 0); + + // printf("\nGetLZMAUsage = %d\n", (UInt32)(v >> 20)); + // printf("\nblockSize = %d\n", (UInt32)(blockSize >> 20)); + return v; } -UInt64 GetBenchMemoryUsage(UInt32 numThreads, UInt32 dictionary, bool totalBench) + +UInt64 GetBenchMemoryUsage(UInt32 numThreads, int level, UInt64 dictionary, bool totalBench) { - const UInt32 kBufferSize = dictionary; - const UInt32 kCompressedBufferSize = kBufferSize; // / 2; - bool lzmaMt = (totalBench || numThreads > 1); + const size_t kBufferSize = (size_t)dictionary + kAdditionalSize; + const UInt64 kCompressedBufferSize = GetBenchCompressedSize(kBufferSize); // / 2; + if (level < 0) + level = 5; + const int algo = (level < 5 ? 0 : 1); + const int btMode = (algo == 0 ? 0 : 1); + UInt32 numBigThreads = numThreads; - if (!totalBench && lzmaMt) - numBigThreads /= 2; + bool lzmaMt = (totalBench || (numThreads > 1 && btMode)); + if (btMode) + { + if (!totalBench && lzmaMt) + numBigThreads /= 2; + } return ((UInt64)kBufferSize + kCompressedBufferSize + - GetLZMAUsage(lzmaMt, dictionary) + (2 << 20)) * numBigThreads; + GetLZMAUsage(lzmaMt, btMode, dictionary) + (2 << 20)) * numBigThreads; } +static UInt64 GetBenchMemoryUsage_Hash(UInt32 numThreads, UInt64 dictionary) +{ + // dictionary += (dictionary >> 9); // for page tables (virtual memory) + return (UInt64)(dictionary + (1 << 15)) * numThreads + (2 << 20); +} // ---------- CRC and HASH ---------- struct CCrcInfo_Base { - CAlignedBuffer Buffer; + CMidAlignedBuffer Buffer; const Byte *Data; size_t Size; bool CreateLocalBuf; @@ -2111,9 +2248,12 @@ static THREAD_FUNC_DECL FreqThreadFunction(void *param) UInt32 sum = g_BenchCpuFreqTemp; for (UInt64 k = p->NumIterations; k > 0; k--) { - p->CallbackRes = p->Callback->CheckBreak(); - if (p->CallbackRes != S_OK) - return 0; + if (p->Callback) + { + p->CallbackRes = p->Callback->CheckBreak(); + if (p->CallbackRes != S_OK) + return 0; + } sum = CountCpuFreq(sum, p->Size, g_BenchCpuFreqTemp); } p->ValRes = sum; @@ -2437,27 +2577,6 @@ static const CBenchHash g_Hash[] = { 2, 5500, 0x85189d02, "BLAKE2sp" } }; -struct CTotalBenchRes -{ - // UInt64 NumIterations1; // for Usage - UInt64 NumIterations2; // for Rating / RPU - - UInt64 Rating; - UInt64 Usage; - UInt64 RPU; - - void Init() { /* NumIterations1 = 0; */ NumIterations2 = 0; Rating = 0; Usage = 0; RPU = 0; } - - void SetSum(const CTotalBenchRes &r1, const CTotalBenchRes &r2) - { - Rating = (r1.Rating + r2.Rating); - Usage = (r1.Usage + r2.Usage); - RPU = (r1.RPU + r2.RPU); - // NumIterations1 = (r1.NumIterations1 + r2.NumIterations1); - NumIterations2 = (r1.NumIterations2 + r2.NumIterations2); - } -}; - static void PrintNumber(IBenchPrintCallback &f, UInt64 value, unsigned size) { char s[128]; @@ -2523,7 +2642,7 @@ static void PrintSpaces(IBenchPrintCallback &f, unsigned size) static void PrintUsage(IBenchPrintCallback &f, UInt64 usage, unsigned size) { - PrintNumber(f, (usage + 5000) / 10000, size); + PrintNumber(f, Benchmark_GetUsage_Percents(usage), size); } static void PrintResults(IBenchPrintCallback &f, UInt64 usage, UInt64 rpu, UInt64 rating, bool showFreq, UInt64 cpuFreq) @@ -2537,17 +2656,39 @@ static void PrintResults(IBenchPrintCallback &f, UInt64 usage, UInt64 rpu, UInt6 PrintSpaces(f, kFieldSize_EUAndEffec); else { - UInt64 ddd = cpuFreq * usage / 100; - /* - if (ddd == 0) - ddd = 1; - */ - PrintPercents(f, (rating * 10000), ddd, kFieldSize_EU); + PrintPercents(f, rating, cpuFreq * usage / kBenchmarkUsageMult, kFieldSize_EU); PrintPercents(f, rating, cpuFreq, kFieldSize_Effec); } } } + +void CTotalBenchRes::Generate_From_BenchInfo(const CBenchInfo &info) +{ + Speed = info.GetUnpackSizeSpeed(); + Usage = info.GetUsage(); + RPU = info.GetRatingPerUsage(Rating); +} + +void CTotalBenchRes::Mult_For_Weight(unsigned weight) +{ + NumIterations2 *= weight; + RPU *= weight; + Rating *= weight; + Usage += weight; + Speed += weight; +} + +void CTotalBenchRes::Update_With_Res(const CTotalBenchRes &r) +{ + Rating += r.Rating; + Usage += r.Usage; + RPU += r.RPU; + Speed += r.Speed; + // NumIterations1 = (r1.NumIterations1 + r2.NumIterations1); + NumIterations2 += r.NumIterations2; +} + static void PrintResults(IBenchPrintCallback *f, const CBenchInfo &info, unsigned weight, @@ -2555,36 +2696,43 @@ static void PrintResults(IBenchPrintCallback *f, bool showFreq, UInt64 cpuFreq, CTotalBenchRes *res) { - UInt64 speed = info.GetSpeed(info.UnpackSize * info.NumIterations); + CTotalBenchRes t; + t.Rating = rating; + t.NumIterations2 = 1; + t.Generate_From_BenchInfo(info); + if (f) { - if (speed != 0) - PrintNumber(*f, speed / 1024, kFieldSize_Speed); + if (t.Speed != 0) + PrintNumber(*f, t.Speed / 1024, kFieldSize_Speed); else PrintSpaces(*f, 1 + kFieldSize_Speed); } - UInt64 usage = info.GetUsage(); - UInt64 rpu = info.GetRatingPerUsage(rating); if (f) { - PrintResults(*f, usage, rpu, rating, showFreq, cpuFreq); + PrintResults(*f, t.Usage, t.RPU, rating, showFreq, cpuFreq); } if (res) { // res->NumIterations1++; - res->NumIterations2 += weight; - res->RPU += (rpu * weight); - res->Rating += (rating * weight); - res->Usage += (usage * weight); + t.Mult_For_Weight(weight); + res->Update_With_Res(t); } } -static void PrintTotals(IBenchPrintCallback &f, bool showFreq, UInt64 cpuFreq, const CTotalBenchRes &res) +static void PrintTotals(IBenchPrintCallback &f, + bool showFreq, UInt64 cpuFreq, bool showSpeed, const CTotalBenchRes &res) { - PrintSpaces(f, 1 + kFieldSize_Speed); + const UInt64 numIterations2 = res.NumIterations2 ? res.NumIterations2 : 1; + const UInt64 speed = res.Speed / numIterations2; + if (showSpeed && speed != 0) + PrintNumber(f, speed / 1024, kFieldSize_Speed); + else + PrintSpaces(f, 1 + kFieldSize_Speed); + + // PrintSpaces(f, 1 + kFieldSize_Speed); // UInt64 numIterations1 = res.NumIterations1; if (numIterations1 == 0) numIterations1 = 1; - UInt64 numIterations2 = res.NumIterations2; if (numIterations2 == 0) numIterations2 = 1; PrintResults(f, res.Usage / numIterations2, res.RPU / numIterations2, res.Rating / numIterations2, showFreq, cpuFreq); } @@ -2649,7 +2797,7 @@ void Add_LargePages_String(AString &s) #ifdef _WIN32 if (g_LargePagesMode || g_LargePageSize != 0) { - s += " (LP-"; + s.Add_OptSpaced("(LP-"); PrintSize_KMGT_Or_Hex(s, g_LargePageSize); #ifdef MY_CPU_X86_OR_AMD64 if (CPU_IsSupported_PageGB()) @@ -2700,7 +2848,7 @@ struct CBenchCallbackToPrint: public IBenchCallback CTotalBenchRes EncodeRes; CTotalBenchRes DecodeRes; IBenchPrintCallback *_file; - UInt32 DictSize; + UInt64 DictSize; bool Use2Columns; unsigned NameFieldSize; @@ -2876,7 +3024,8 @@ struct CFreqBench UInt64 specifiedFreq; // out: - UInt64 cpuFreq; + UInt64 CpuFreqRes; + UInt64 UsageRes; UInt32 res; CFreqBench() @@ -2897,7 +3046,8 @@ HRESULT CFreqBench::FreqBench(IBenchPrintCallback *_file ) { res = 0; - cpuFreq = 0; + CpuFreqRes = 0; + UsageRes = 0; if (numThreads == 0) numThreads = 1; @@ -2984,17 +3134,17 @@ HRESULT CFreqBench::FreqBench(IBenchPrintCallback *_file info.PackSize = 0; info.NumIterations = 1; + const UInt64 numCommands = (UInt64)numIterations * numIterations2 * numThreads * complexity; + const UInt64 rating = info.GetSpeed(numCommands); + CpuFreqRes = rating / numThreads; + UsageRes = info.GetUsage(); + if (_file) { - { - UInt64 numCommands = (UInt64)numIterations * numIterations2 * numThreads * complexity; - UInt64 rating = info.GetSpeed(numCommands); - cpuFreq = rating / numThreads; - PrintResults(_file, info, + PrintResults(_file, info, 0, // weight rating, - showFreq, showFreq ? (specifiedFreq != 0 ? specifiedFreq : cpuFreq) : 0, NULL); - } + showFreq, showFreq ? (specifiedFreq != 0 ? specifiedFreq : CpuFreqRes) : 0, NULL); RINOK(_file->CheckBreak()); } @@ -3041,7 +3191,7 @@ static HRESULT CrcBench( /* // if will generate random data in each thread, instead of global data - CAlignedBuffer buffer; + CMidAlignedBuffer buffer; if (!fileData) { ALLOC_WITH_HRESULT(&buffer, bufferSize) @@ -3288,10 +3438,10 @@ HRESULT Bench( DECL_EXTERNAL_CODECS_LOC_VARS IBenchPrintCallback *printCallback, IBenchCallback *benchCallback, - // IBenchFreqCallback *freqCallback, const CObjectVector &props, UInt32 numIterations, - bool multiDict) + bool multiDict, + IBenchFreqCallback *freqCallback) { if (!CrcInternalTest()) return E_FAIL; @@ -3342,7 +3492,9 @@ HRESULT Bench( COneMethodInfo method; - CAlignedBuffer fileDataBuffer; + CMidAlignedBuffer fileDataBuffer; + bool use_fileData = false; + bool isFixedDict = false; { unsigned i; @@ -3395,7 +3547,10 @@ HRESULT Bench( return E_INVALIDARG; } + // (len == 0) is allowed. Also it's allowed if Alloc(0) returns NULL here + ALLOC_WITH_HRESULT(&fileDataBuffer, len); + use_fileData = true; { size_t processed; @@ -3436,9 +3591,14 @@ HRESULT Bench( continue; } - if (name.IsEqualTo("ds")) + const bool isCurrent_fixedDict = name.IsEqualTo("df"); + if (isCurrent_fixedDict) + isFixedDict = true; + if (isCurrent_fixedDict || name.IsEqualTo("ds")) { RINOK(ParsePropToUInt32(UString(), propVariant, startDicLog)); + if (startDicLog > 32) + return E_INVALIDARG; startDicLog_Defined = true; continue; } @@ -3505,6 +3665,15 @@ HRESULT Bench( if (printCallback) { AString s; + + #ifndef _WIN32 + s += "Compiler: "; + GetCompiler(s); + printCallback->Print(s); + printCallback->NewLine(); + s.Empty(); + #endif + GetSystemInfoText(s); printCallback->Print(s); printCallback->NewLine(); @@ -3512,10 +3681,10 @@ HRESULT Bench( if (printCallback) { - printCallback->Print("CPU Freq:"); + printCallback->Print("1T CPU Freq (MHz):"); } - if (printCallback /* || freqCallback */) + if (printCallback || freqCallback) { UInt64 numMilCommands = 1 << 6; if (specifiedFreq != 0) @@ -3543,6 +3712,7 @@ HRESULT Bench( start = 1; const UInt64 freq = GetFreq(); // mips is constant in some compilers + const UInt64 hz = MyMultDiv64(numMilCommands * 1000000, start, freq); const UInt64 mipsVal = numMilCommands * freq / start; if (printCallback) { @@ -3556,10 +3726,10 @@ HRESULT Bench( PrintNumber(*printCallback, mipsVal, 5); } } - /* if (freqCallback) - freqCallback->AddCpuFreq(mipsVal); - */ + { + RINOK(freqCallback->AddCpuFreq(1, hz, kBenchmarkUsageMult)); + } if (jj >= 1) { @@ -3573,7 +3743,10 @@ HRESULT Bench( if (start >= freq * 16) { printCallback->Print(" (Cmplx)"); - needSetComplexity = true; + if (!freqCallback) // we don't want complexity change for old gui lzma benchmark + { + needSetComplexity = true; + } needStop = true; } if (needSetComplexity) @@ -3583,8 +3756,110 @@ HRESULT Bench( numMilCommands <<= 1; } } + if (freqCallback) + { + RINOK(freqCallback->FreqsFinished(1)); + } + } + + if (numThreadsSpecified >= 2) + if (printCallback || freqCallback) + { + if (printCallback) + printCallback->NewLine(); + + /* it can show incorrect frequency for HT threads. + so we reduce freq test to (numCPUs / 2) */ + + UInt32 numThreads = numThreadsSpecified >= numCPUs / 2 ? numCPUs / 2: numThreadsSpecified; + if (numThreads < 1) + numThreads = 1; + + if (printCallback) + { + char s[128]; + ConvertUInt64ToString(numThreads, s); + printCallback->Print(s); + printCallback->Print("T CPU Freq (MHz):"); + } + UInt64 numMilCommands = 1 << 10; + if (specifiedFreq != 0) + { + while (numMilCommands > 1 && specifiedFreq < (numMilCommands * 1000000)) + numMilCommands >>= 1; + } + + for (int jj = 0;; jj++) + { + if (printCallback) + RINOK(printCallback->CheckBreak()); + + { + // PrintLeft(f, "CPU", kFieldSize_Name); + + // UInt32 resVal; + + CFreqBench fb; + fb.complexInCommands = numMilCommands * 1000000; + fb.numThreads = numThreads; + // showFreq; + // fb.showFreq = (freqTest == kNumCpuTests - 1 || specifiedFreq != 0); + fb.showFreq = true; + fb.specifiedFreq = 1; + + HRESULT res = fb.FreqBench(NULL /* printCallback */ + #ifndef _7ZIP_ST + , &affinityMode + #endif + ); + RINOK(res); + + if (freqCallback) + { + RINOK(freqCallback->AddCpuFreq(numThreads, fb.CpuFreqRes, fb.UsageRes)); + } + + if (printCallback) + { + /* + if (realDelta == 0) + { + printCallback->Print(" -"); + } + else + */ + { + // PrintNumber(*printCallback, start, 0); + PrintUsage(*printCallback, fb.UsageRes, 3); + printCallback->Print("%"); + PrintNumber(*printCallback, fb.CpuFreqRes / 1000000, 0); + printCallback->Print(" "); + + // PrintNumber(*printCallback, fb.UsageRes, 5); + } + } + } + // if (jj >= 1) + { + bool needStop = (numMilCommands >= (1 << + #ifdef _DEBUG + 7 + #else + 11 + #endif + )); + if (needStop) + break; + numMilCommands <<= 1; + } + } + if (freqCallback) + { + RINOK(freqCallback->FreqsFinished(numThreads)); + } } + if (printCallback) { printCallback->NewLine(); @@ -3597,8 +3872,10 @@ HRESULT Bench( if (numThreadsSpecified < 1 || numThreadsSpecified > kNumThreadsMax) return E_INVALIDARG; - UInt32 dict; - bool dictIsDefined = method.Get_DicSize(dict); + UInt64 dict = (UInt64)1 << startDicLog; + const bool dictIsDefined = (isFixedDict || method.Get_DicSize(dict)); + + const int level = method.GetLevel(); if (method.MethodName.IsEmpty()) method.MethodName = "LZMA"; @@ -3607,8 +3884,20 @@ HRESULT Bench( { CBenchProps benchProps; benchProps.SetLzmaCompexity(); - UInt32 dictSize = method.Get_Lzma_DicSize(); - UInt32 uncompressedDataSize = kAdditionalSize + dictSize; + const UInt64 dictSize = method.Get_Lzma_DicSize(); + + size_t uncompressedDataSize; + if (use_fileData) + { + uncompressedDataSize = fileDataBuffer.Size(); + } + else + { + uncompressedDataSize = kAdditionalSize + (size_t)dictSize; + if (uncompressedDataSize < dictSize) + return E_INVALIDARG; + } + return MethodBench( EXTERNAL_CODECS_LOC_VARS complexInCommands, @@ -3636,7 +3925,7 @@ HRESULT Bench( UInt64 dict64 = dict; if (!dictIsDefined) dict64 = (1 << 27); - if (fileDataBuffer.IsAllocated()) + if (use_fileData) { if (!dictIsDefined) dict64 = fileDataBuffer.Size(); @@ -3684,13 +3973,15 @@ HRESULT Bench( { UInt64 usage = 1 << 20; UInt64 bufSize = dict64; - if (fileDataBuffer.IsAllocated()) + if (use_fileData) { usage += fileDataBuffer.Size(); if (bufSize > fileDataBuffer.Size()) bufSize = fileDataBuffer.Size(); + #ifndef _7ZIP_ST if (numThreadsSpecified != 1) usage += bufSize * numThreadsSpecified * (k_Crc_CreateLocalBuf_For_File ? 1 : 0); + #endif } else usage += numThreadsSpecified * bufSize; @@ -3776,7 +4067,7 @@ HRESULT Bench( PrintRight(f, s, 4); size_t dataSize = fileDataBuffer.Size(); - if (dataSize > bufSize || !fileDataBuffer.IsAllocated()) + if (dataSize > bufSize || !use_fileData) dataSize = (size_t)bufSize; FOR_VECTOR (ti, numThreadsVector) @@ -3792,7 +4083,7 @@ HRESULT Bench( speed, usage, complexity, 1, // benchWeight, - (pow == kNumHashDictBits && !fileDataBuffer.IsAllocated()) ? checkSum : NULL, + (pow == kNumHashDictBits && !use_fileData) ? checkSum : NULL, method, &f, #ifndef _7ZIP_ST @@ -3880,7 +4171,7 @@ HRESULT Bench( f.NewLine(); } - if (!dictIsDefined) + if (!dictIsDefined && !onlyHashBench) { const unsigned dicSizeLog_Main = (totalBenchMode ? 24 : 25); unsigned dicSizeLog = dicSizeLog_Main; @@ -3891,10 +4182,10 @@ HRESULT Bench( if (ramSize_Defined) for (; dicSizeLog > kBenchMinDicLogSize; dicSizeLog--) - if (GetBenchMemoryUsage(numThreads, ((UInt32)1 << dicSizeLog), totalBenchMode) + (8 << 20) <= ramSize) + if (GetBenchMemoryUsage(numThreads, level, ((UInt64)1 << dicSizeLog), totalBenchMode) + (8 << 20) <= ramSize) break; - dict = (UInt32)1 << dicSizeLog; + dict = (UInt64)1 << dicSizeLog; if (totalBenchMode && dicSizeLog != dicSizeLog_Main) { @@ -3904,7 +4195,12 @@ HRESULT Bench( } } - Print_Usage_and_Threads(f, GetBenchMemoryUsage(numThreads, dict, totalBenchMode), numThreads); + Print_Usage_and_Threads(f, + onlyHashBench ? + GetBenchMemoryUsage_Hash(numThreads, dict) : + GetBenchMemoryUsage(numThreads, level, dict, totalBenchMode), + numThreads); + f.NewLine(); f.NewLine(); @@ -3988,6 +4284,7 @@ HRESULT Bench( if (specifiedFreq != 0) cpuFreq = specifiedFreq; + // bool showTotalSpeed = false; if (totalBenchMode) { @@ -4017,7 +4314,7 @@ HRESULT Bench( ); RINOK(res); - cpuFreq = fb.cpuFreq; + cpuFreq = fb.CpuFreqRes; callback.NewLine(); if (specifiedFreq != 0) @@ -4037,12 +4334,12 @@ HRESULT Bench( if (!onlyHashBench) { - size_t dataSize = dict; - if (fileDataBuffer.IsAllocated()) + size_t dataSize = (size_t)dict; + if (use_fileData) { dataSize = fileDataBuffer.Size(); if (dictIsDefined && dataSize > dict) - dataSize = dict; + dataSize = (size_t)dict; } HRESULT res = TotalBench(EXTERNAL_CODECS_LOC_VARS @@ -4051,7 +4348,7 @@ HRESULT Bench( numThreads, &affinityMode, #endif - dictIsDefined || fileDataBuffer.IsAllocated(), // forceUnpackSize + dictIsDefined || use_fileData, // forceUnpackSize dataSize, (const Byte *)fileDataBuffer, printCallback, &callback); @@ -4061,12 +4358,16 @@ HRESULT Bench( { size_t dataSize = (size_t)1 << kNumHashDictBits; if (dictIsDefined) - dataSize = dict; - if (fileDataBuffer.IsAllocated()) + { + dataSize = (size_t)dict; + if (dataSize != dict) + return E_OUTOFMEMORY; + } + if (use_fileData) { dataSize = fileDataBuffer.Size(); if (dictIsDefined && dataSize > dict) - dataSize = dict; + dataSize = (size_t)dict; } HRESULT res = TotalBench_Hash(EXTERNAL_CODECS_LOC_VARS complexInCommands, numThreads, @@ -4143,12 +4444,12 @@ HRESULT Bench( for (unsigned i = 0; i < numIterations; i++) { - unsigned pow = (dict < ((UInt32)1 << startDicLog)) ? kBenchMinDicLogSize : (unsigned)startDicLog; + unsigned pow = (dict < GetDictSizeFromLog(startDicLog)) ? kBenchMinDicLogSize : (unsigned)startDicLog; if (!multiDict) - pow = 31; - while (((UInt32)1 << pow) > dict && pow > 0) + pow = 32; + while (GetDictSizeFromLog(pow) > dict && pow > 0) pow--; - for (; ((UInt32)1 << pow) <= dict; pow++) + for (; GetDictSizeFromLog(pow) <= dict; pow++) { char s[16]; ConvertUInt32ToString(pow, s); @@ -4156,7 +4457,7 @@ HRESULT Bench( s[pos++] = ':'; s[pos] = 0; PrintLeft(f, s, kFieldSize_SmallName); - callback.DictSize = (UInt32)1 << pow; + callback.DictSize = (UInt64)1 << pow; COneMethodInfo method2 = method; @@ -4170,13 +4471,15 @@ HRESULT Bench( } size_t uncompressedDataSize; - if (fileDataBuffer.IsAllocated()) + if (use_fileData) { uncompressedDataSize = fileDataBuffer.Size(); } else { - uncompressedDataSize = callback.DictSize; + uncompressedDataSize = (size_t)callback.DictSize; + if (uncompressedDataSize != callback.DictSize) + return E_OUTOFMEMORY; if (uncompressedDataSize >= (1 << 18)) uncompressedDataSize += kAdditionalSize; } @@ -4212,16 +4515,19 @@ HRESULT Bench( if (use2Columns) { PrintLeft(f, "Avr:", callback.NameFieldSize); - PrintTotals(f, showFreq, cpuFreq, callback.EncodeRes); + PrintTotals(f, showFreq, cpuFreq, !totalBenchMode, callback.EncodeRes); f.Print(kSep); - PrintTotals(f, showFreq, cpuFreq, callback.DecodeRes); + PrintTotals(f, showFreq, cpuFreq, !totalBenchMode, callback.DecodeRes); f.NewLine(); } PrintLeft(f, "Tot:", callback.NameFieldSize); CTotalBenchRes midRes; - midRes.SetSum(callback.EncodeRes, callback.DecodeRes); - PrintTotals(f, showFreq, cpuFreq, midRes); + midRes = callback.EncodeRes; + midRes.Update_With_Res(callback.DecodeRes); + + // midRes.SetSum(callback.EncodeRes, callback.DecodeRes); + PrintTotals(f, showFreq, cpuFreq, false, midRes); f.NewLine(); } diff --git a/CPP/7zip/UI/Common/Bench.h b/CPP/7zip/UI/Common/Bench.h index 02f443e3..ab0c3048 100644 --- a/CPP/7zip/UI/Common/Bench.h +++ b/CPP/7zip/UI/Common/Bench.h @@ -8,6 +8,8 @@ #include "../../Common/CreateCoder.h" #include "../../UI/Common/Property.h" +UInt64 Benchmark_GetUsage_Percents(UInt64 usage); + struct CBenchInfo { UInt64 GlobalTime; @@ -17,26 +19,71 @@ struct CBenchInfo UInt64 UnpackSize; UInt64 PackSize; UInt64 NumIterations; + + /* + during Code(): we track benchInfo only from one thread (theads with index[0]) + NumIterations means number of threads + UnpackSize and PackSize are total sizes of all iterations of current thread + after Code(): + NumIterations means the number of Iterations + UnpackSize and PackSize are total sizes of all threads + */ CBenchInfo(): NumIterations(0) {} + UInt64 GetUsage() const; UInt64 GetRatingPerUsage(UInt64 rating) const; - UInt64 GetSpeed(UInt64 numCommands) const; + UInt64 GetSpeed(UInt64 numUnits) const; + UInt64 GetUnpackSizeSpeed() const { return GetSpeed(UnpackSize * NumIterations); } + + UInt64 Get_UnpackSize_Full() const { return UnpackSize * NumIterations; } + + UInt64 GetRating_LzmaEnc(UInt64 dictSize) const; + UInt64 GetRating_LzmaDec() const; }; + +struct CTotalBenchRes +{ + // UInt64 NumIterations1; // for Usage + UInt64 NumIterations2; // for Rating / RPU + + UInt64 Rating; + UInt64 Usage; + UInt64 RPU; + UInt64 Speed; + + void Init() { /* NumIterations1 = 0; */ NumIterations2 = 0; Rating = 0; Usage = 0; RPU = 0; Speed = 0; } + + void SetSum(const CTotalBenchRes &r1, const CTotalBenchRes &r2) + { + Rating = (r1.Rating + r2.Rating); + Usage = (r1.Usage + r2.Usage); + RPU = (r1.RPU + r2.RPU); + Speed = (r1.Speed + r2.Speed); + // NumIterations1 = (r1.NumIterations1 + r2.NumIterations1); + NumIterations2 = (r1.NumIterations2 + r2.NumIterations2); + } + + void Generate_From_BenchInfo(const CBenchInfo &info); + void Mult_For_Weight(unsigned weight); + void Update_With_Res(const CTotalBenchRes &r); +}; + + + struct IBenchCallback { - virtual HRESULT SetFreq(bool showFreq, UInt64 cpuFreq) = 0; + // virtual HRESULT SetFreq(bool showFreq, UInt64 cpuFreq) = 0; virtual HRESULT SetEncodeResult(const CBenchInfo &info, bool final) = 0; virtual HRESULT SetDecodeResult(const CBenchInfo &info, bool final) = 0; }; -UInt64 GetCompressRating(UInt32 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size); -UInt64 GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations); + const unsigned kBenchMinDicLogSize = 18; -UInt64 GetBenchMemoryUsage(UInt32 numThreads, UInt32 dictionary, bool totalBench = false); +UInt64 GetBenchMemoryUsage(UInt32 numThreads, int level, UInt64 dictionary, bool totalBench); struct IBenchPrintCallback { @@ -45,22 +92,20 @@ struct IBenchPrintCallback virtual HRESULT CheckBreak() = 0; }; -/* struct IBenchFreqCallback { - virtual void AddCpuFreq(UInt64 freq) = 0; + virtual HRESULT AddCpuFreq(unsigned numThreads, UInt64 freq, UInt64 usage) = 0; + virtual HRESULT FreqsFinished(unsigned numThreads) = 0; }; -*/ HRESULT Bench( DECL_EXTERNAL_CODECS_LOC_VARS IBenchPrintCallback *printCallback, IBenchCallback *benchCallback, - // IBenchFreqCallback *freqCallback, const CObjectVector &props, UInt32 numIterations, - bool multiDict - ); + bool multiDict, + IBenchFreqCallback *freqCallback = NULL); AString GetProcessThreadsInfo(const NWindows::NSystem::CProcessAffinity &ti); diff --git a/CPP/7zip/UI/Common/CompressCall2.cpp b/CPP/7zip/UI/Common/CompressCall2.cpp index 5f8a775f..ca65e1af 100644 --- a/CPP/7zip/UI/Common/CompressCall2.cpp +++ b/CPP/7zip/UI/Common/CompressCall2.cpp @@ -272,7 +272,11 @@ void Benchmark(bool totalMode) prop.Value = "*"; props.Add(prop); } - result = Benchmark(EXTERNAL_CODECS_VARS_L props, g_HWND); + result = Benchmark( + EXTERNAL_CODECS_VARS_L + props, + k_NumBenchIterations_Default, + g_HWND); MY_TRY_FINISH } diff --git a/CPP/7zip/UI/Console/Main.cpp b/CPP/7zip/UI/Console/Main.cpp index 8c24aaff..e7d9fd1b 100644 --- a/CPP/7zip/UI/Console/Main.cpp +++ b/CPP/7zip/UI/Console/Main.cpp @@ -128,7 +128,7 @@ static const char * const kHelpString = #ifndef _NO_CRYPTO " -p{Password} : set Password\n" #endif - " -r[-|0] : Recurse subdirectories\n" + " -r[-|0] : Recurse subdirectories for name search\n" " -sa{a|e|s} : set Archive name mode\n" " -scc{UTF-8|WIN|DOS} : set charset for for console input/output\n" " -scs{UTF-8|UTF-16LE|UTF-16BE|WIN|DOS|{id}} : set charset for list files\n" @@ -200,63 +200,55 @@ static void ShowProgInfo(CStdOutStream *so) #endif */ - #ifdef __VERSION__ - << " compiler: " << __VERSION__ - #endif - - #ifdef __GNUC__ - << " GCC " << __GNUC__ << "." << __GNUC_MINOR__ << "." << __GNUC_PATCHLEVEL__ - #endif - - #ifdef __clang__ - << " CLANG " << __clang_major__ << "." << __clang_minor__ - #endif - - #ifdef __xlC__ - << " XLC " << (__xlC__ >> 8) << "." << (__xlC__ & 0xFF) - #ifdef __xlC_ver__ - << "." << (__xlC_ver__ >> 8) << "." << (__xlC_ver__ & 0xFF) - #endif - #endif - - #ifdef _MSC_VER - << " MSC " << _MSC_VER - #endif - - #ifdef __ARM_FEATURE_CRC32 - << " CRC32" - #endif - << " " << (unsigned)(sizeof(void *)) * 8 << "-bit" #ifdef __ILP32__ << " ILP32" #endif - + #ifdef __ARM_ARCH << " arm_v:" << __ARM_ARCH #ifdef __ARM_ARCH_ISA_THUMB << " thumb:" << __ARM_ARCH_ISA_THUMB #endif #endif + ; + #ifdef ENV_HAVE_LOCALE - << " locale=" << GetLocale() + *so << " locale=" << GetLocale(); #endif #ifndef _WIN32 - << " UTF8=" << (IsNativeUTF8() ? "+" : "-") - << " use-UTF8=" << (g_ForceToUTF8 ? "+" : "-") - << " wchar_t=" << (unsigned)(sizeof(wchar_t)) * 8 << "-bit" - << " Files=" << (unsigned)(sizeof(off_t)) * 8 << "-bit" + { + const bool is_IsNativeUTF8 = IsNativeUTF8(); + if (!is_IsNativeUTF8) + *so << " UTF8=" << (is_IsNativeUTF8 ? "+" : "-"); + } + if (!g_ForceToUTF8) + *so << " use-UTF8=" << (g_ForceToUTF8 ? "+" : "-"); + { + const unsigned wchar_t_size = (unsigned)sizeof(wchar_t); + if (wchar_t_size != 4) + *so << " wchar_t=" << wchar_t_size * 8 << "-bit"; + } + { + const unsigned off_t_size = (unsigned)sizeof(off_t); + if (off_t_size != 8) + *so << " Files=" << off_t_size * 8 << "-bit"; + } #endif - ; { const UInt32 numCpus = NWindows::NSystem::GetNumberOfProcessors(); *so << " Threads:" << numCpus; } + #ifdef _7ZIP_ASM + *so << ", ASM"; + #endif + + /* { AString s; GetCpuName(s); @@ -264,9 +256,10 @@ static void ShowProgInfo(CStdOutStream *so) *so << ", " << s; } - #ifdef _7ZIP_ASM - *so << ",ASM"; + #ifdef __ARM_FEATURE_CRC32 + << " CRC32" #endif + #if (defined MY_CPU_X86_OR_AMD64 || defined(MY_CPU_ARM_OR_ARM64)) if (CPU_IsSupported_AES()) *so << ",AES"; @@ -281,6 +274,7 @@ static void ShowProgInfo(CStdOutStream *so) if (CPU_IsSupported_SHA2()) *so << ",SHA2"; #endif #endif + */ *so << endl; } diff --git a/CPP/7zip/UI/FileManager/AboutDialog.cpp b/CPP/7zip/UI/FileManager/AboutDialog.cpp index b92e6290..fe0a4997 100644 --- a/CPP/7zip/UI/FileManager/AboutDialog.cpp +++ b/CPP/7zip/UI/FileManager/AboutDialog.cpp @@ -19,8 +19,8 @@ static const UInt32 kLangIDs[] = IDT_ABOUT_INFO }; -#define kHomePageURL TEXT("http://www.7-zip.org/") -#define kHomePageURL2 TEXT("http://github.com/mcmilk/7-Zip-zstd/") +#define kHomePageURL TEXT("https://www.7-zip.org/") +#define kHomePageURL2 TEXT("https://github.com/mcmilk/7-Zip-zstd/") #define kHelpTopic "start.htm" #define LLL_(quote) L##quote diff --git a/CPP/7zip/UI/FileManager/ProgressDialog.cpp b/CPP/7zip/UI/FileManager/ProgressDialog.cpp index 1bf115ad..b688a901 100644 --- a/CPP/7zip/UI/FileManager/ProgressDialog.cpp +++ b/CPP/7zip/UI/FileManager/ProgressDialog.cpp @@ -136,8 +136,11 @@ bool CProgressDialog::OnMessage(UINT message, WPARAM wParam, LPARAM lParam) { case kCloseMessage: { - KillTimer(_timer); - _timer = 0; + if (_timer) + { + KillTimer(kTimerID); + _timer = 0; + } if (_inCancelMessageBox) { _externalCloseMessageWasReceived = true; diff --git a/CPP/7zip/UI/FileManager/ProgressDialog2.cpp b/CPP/7zip/UI/FileManager/ProgressDialog2.cpp index 8e2d7c75..7b132468 100644 --- a/CPP/7zip/UI/FileManager/ProgressDialog2.cpp +++ b/CPP/7zip/UI/FileManager/ProgressDialog2.cpp @@ -348,7 +348,9 @@ bool CProgressDialog::OnInit() INIT_AS_UNDEFINED(_processed_Prev); INIT_AS_UNDEFINED(_packed_Prev); INIT_AS_UNDEFINED(_ratio_Prev); + _filesStr_Prev.Empty(); + _filesTotStr_Prev.Empty(); _foreground = true; @@ -423,13 +425,14 @@ static const UINT kIDs[] = IDT_PROGRESS_ELAPSED, IDT_PROGRESS_ELAPSED_VAL, IDT_PROGRESS_REMAINING, IDT_PROGRESS_REMAINING_VAL, IDT_PROGRESS_FILES, IDT_PROGRESS_FILES_VAL, - IDT_PROGRESS_RATIO, IDT_PROGRESS_RATIO_VAL, + 0, IDT_PROGRESS_FILES_TOTAL, IDT_PROGRESS_ERRORS, IDT_PROGRESS_ERRORS_VAL, IDT_PROGRESS_TOTAL, IDT_PROGRESS_TOTAL_VAL, IDT_PROGRESS_SPEED, IDT_PROGRESS_SPEED_VAL, IDT_PROGRESS_PROCESSED, IDT_PROGRESS_PROCESSED_VAL, - IDT_PROGRESS_PACKED, IDT_PROGRESS_PACKED_VAL + IDT_PROGRESS_PACKED, IDT_PROGRESS_PACKED_VAL, + IDT_PROGRESS_RATIO, IDT_PROGRESS_RATIO_VAL }; bool CProgressDialog::OnSize(WPARAM /* wParam */, int xSize, int ySize) @@ -546,6 +549,7 @@ bool CProgressDialog::OnSize(WPARAM /* wParam */, int xSize, int ySize) yPos = my; x = mx + gSize + padSize; } + if (kIDs[i] != 0) MoveItem(kIDs[i], x, yPos, labelSize, sY); MoveItem(kIDs[i + 1], x + labelSize, yPos, valueSize, sY); yPos += sStep; @@ -617,6 +621,7 @@ static void ConvertSizeToString(UInt64 v, wchar_t *s) s += MyStringLen(s); *s++ = ' '; *s++ = c; + *s++ = 'B'; *s++ = 0; } } @@ -829,16 +834,24 @@ void CProgressDialog::UpdateStatInfo(bool showAll) { wchar_t s[64]; + ConvertUInt64ToString(completedFiles, s); + if (_filesStr_Prev != s) + { + _filesStr_Prev = s; + SetItemText(IDT_PROGRESS_FILES_VAL, s); + } + + s[0] = 0; if (IS_DEFINED_VAL(totalFiles)) { - MyStringCat(s, L" / "); + MyStringCopy(s, L" / "); ConvertUInt64ToString(totalFiles, s + MyStringLen(s)); } - if (_filesStr_Prev != s) + if (_filesTotStr_Prev != s) { - _filesStr_Prev = s; - SetItemText(IDT_PROGRESS_FILES_VAL, s); + _filesTotStr_Prev = s; + SetItemText(IDT_PROGRESS_FILES_TOTAL, s); } } @@ -1024,8 +1037,13 @@ bool CProgressDialog::OnMessage(UINT message, WPARAM wParam, LPARAM lParam) { case kCloseMessage: { - KillTimer(_timer); - _timer = 0; + if (_timer) + { + /* 21.03 : KillTimer(kTimerID) instead of KillTimer(_timer). + But (_timer == kTimerID) in Win10. So it worked too */ + KillTimer(kTimerID); + _timer = 0; + } if (_inCancelMessageBox) { _externalCloseMessageWasReceived = true; diff --git a/CPP/7zip/UI/FileManager/ProgressDialog2.h b/CPP/7zip/UI/FileManager/ProgressDialog2.h index fc032cd9..c17dd395 100644 --- a/CPP/7zip/UI/FileManager/ProgressDialog2.h +++ b/CPP/7zip/UI/FileManager/ProgressDialog2.h @@ -169,7 +169,9 @@ class CProgressDialog: public NWindows::NControl::CModalDialog UInt64 _processed_Prev; UInt64 _packed_Prev; UInt64 _ratio_Prev; + UString _filesStr_Prev; + UString _filesTotStr_Prev; unsigned _prevSpeed_MoveBits; UInt64 _prevSpeed; diff --git a/CPP/7zip/UI/FileManager/ProgressDialog2Res.h b/CPP/7zip/UI/FileManager/ProgressDialog2Res.h index b45d7b49..736c7179 100644 --- a/CPP/7zip/UI/FileManager/ProgressDialog2Res.h +++ b/CPP/7zip/UI/FileManager/ProgressDialog2Res.h @@ -28,6 +28,7 @@ #define IDT_PROGRESS_PACKED_VAL 110 #define IDT_PROGRESS_FILES_VAL 111 +#define IDT_PROGRESS_FILES_TOTAL 112 #define IDT_PROGRESS_ELAPSED_VAL 120 #define IDT_PROGRESS_REMAINING_VAL 121 @@ -41,7 +42,7 @@ #ifdef UNDER_CE #define MY_PROGRESS_VAL_UNITS 44 #else -#define MY_PROGRESS_VAL_UNITS 76 +#define MY_PROGRESS_VAL_UNITS 72 #endif #define MY_PROGRESS_LABEL_UNITS_MIN 60 #define MY_PROGRESS_LABEL_UNITS_START 90 diff --git a/CPP/7zip/UI/FileManager/ProgressDialog2a.rc b/CPP/7zip/UI/FileManager/ProgressDialog2a.rc index c183af82..d2fee8cf 100644 --- a/CPP/7zip/UI/FileManager/ProgressDialog2a.rc +++ b/CPP/7zip/UI/FileManager/ProgressDialog2a.rc @@ -47,27 +47,32 @@ CAPTION "Progress" PUSHBUTTON "&Pause", IDB_PAUSE, bx2, by, bxs, bys PUSHBUTTON "Cancel", IDCANCEL, bx1, by, bxs, bys + LTEXT "Elapsed time:", IDT_PROGRESS_ELAPSED, m, y0, x0s, 8 LTEXT "Remaining time:", IDT_PROGRESS_REMAINING, m, y1, x0s, 8 LTEXT "Files:", IDT_PROGRESS_FILES, m, y2, x0s, 8 - LTEXT "Compression ratio:", IDT_PROGRESS_RATIO, m, y3, x0s, 8 + LTEXT "Errors:", IDT_PROGRESS_ERRORS, m, y4, x0s, 8 + LTEXT "Total size:", IDT_PROGRESS_TOTAL, x2, y0, x2s, 8 LTEXT "Speed:", IDT_PROGRESS_SPEED, x2, y1, x2s, 8 LTEXT "Processed:", IDT_PROGRESS_PROCESSED,x2, y2, x2s, 8 LTEXT "Compressed size:" , IDT_PROGRESS_PACKED, x2, y3, x2s, 8 + LTEXT "Compression ratio:", IDT_PROGRESS_RATIO, x2, y4, x2s, 8 + RTEXT "", IDT_PROGRESS_ELAPSED_VAL, x1, y0, x1s, MY_TEXT_NOPREFIX RTEXT "", IDT_PROGRESS_REMAINING_VAL, x1, y1, x1s, MY_TEXT_NOPREFIX RTEXT "", IDT_PROGRESS_FILES_VAL, x1, y2, x1s, MY_TEXT_NOPREFIX - RTEXT "", IDT_PROGRESS_RATIO_VAL, x1, y3, x1s, MY_TEXT_NOPREFIX + RTEXT "", IDT_PROGRESS_FILES_TOTAL x1, y3, x1s, MY_TEXT_NOPREFIX RTEXT "", IDT_PROGRESS_ERRORS_VAL, x1, y4, x1s, MY_TEXT_NOPREFIX RTEXT "", IDT_PROGRESS_TOTAL_VAL, x3, y0, x3s, MY_TEXT_NOPREFIX RTEXT "", IDT_PROGRESS_SPEED_VAL, x3, y1, x3s, MY_TEXT_NOPREFIX RTEXT "", IDT_PROGRESS_PROCESSED_VAL, x3, y2, x3s, MY_TEXT_NOPREFIX RTEXT "", IDT_PROGRESS_PACKED_VAL, x3, y3, x3s, MY_TEXT_NOPREFIX + RTEXT "", IDT_PROGRESS_RATIO_VAL, x3, y4, x3s, MY_TEXT_NOPREFIX LTEXT "", IDT_PROGRESS_STATUS, m, z3, xc, MY_TEXT_NOPREFIX CONTROL "", IDT_PROGRESS_FILE_NAME, "Static", SS_NOPREFIX | SS_LEFTNOWORDWRAP, m, z2, xc, z2s diff --git a/CPP/7zip/UI/GUI/BenchmarkDialog.cpp b/CPP/7zip/UI/GUI/BenchmarkDialog.cpp index 94dfab4c..41e0927d 100644 --- a/CPP/7zip/UI/GUI/BenchmarkDialog.cpp +++ b/CPP/7zip/UI/GUI/BenchmarkDialog.cpp @@ -10,15 +10,27 @@ #include "../../../Common/StringConvert.h" #include "../../../Common/StringToInt.h" +#include "../../../Windows/Synchronization.h" #include "../../../Windows/System.h" #include "../../../Windows/Thread.h" +#include "../../../Windows/SystemInfo.h" + +#include "../../../Windows/Control/ComboBox.h" +#include "../../../Windows/Control/Edit.h" #include "../../Common/MethodProps.h" +#include "../FileManager/DialogSize.h" #include "../FileManager/HelpUtils.h" +#ifdef LANG +#include "../FileManager/LangUtils.h" +#endif #include "../../MyVersion.h" +#include "../Common/Bench.h" + +#include "BenchmarkDialogRes.h" #include "BenchmarkDialog.h" using namespace NWindows; @@ -26,13 +38,350 @@ using namespace NWindows; #define kHelpTopic "fm/benchmark.htm" static const UINT_PTR kTimerID = 4; -static const UINT kTimerElapse = 1000; +static const UINT kTimerElapse = 1000; // 1000 + +// use PRINT_ITER_TIME to show time of each iteration in log box +// #define PRINT_ITER_TIME + +static const unsigned kRatingVector_NumBundlesMax = 20; + +enum MyBenchMessages +{ + k_Message_Finished = WM_APP + 1 +}; + +enum My_Message_WPARAM +{ + k_Msg_WPARM_Thread_Finished = 0, + k_Msg_WPARM_Iter_Finished, + k_Msg_WPARM_Enc1_Finished +}; + + +struct CBenchPassResult +{ + CTotalBenchRes Enc; + CTotalBenchRes Dec; + #ifdef PRINT_ITER_TIME + DWORD Ticks; + #endif + // CBenchInfo EncInfo; // for debug + // CBenchPassResult() {}; +}; + + +struct CTotalBenchRes2: public CTotalBenchRes +{ + UInt64 UnpackSize; + + void Init() + { + CTotalBenchRes::Init(); + UnpackSize = 0; + } + + void SetFrom_BenchInfo(const CBenchInfo &info) + { + NumIterations2 = 1; + Generate_From_BenchInfo(info); + UnpackSize = info.Get_UnpackSize_Full(); + } + + void Update_With_Res2(const CTotalBenchRes2 &r) + { + Update_With_Res(r); + UnpackSize += r.UnpackSize; + } +}; + + +struct CSyncData +{ + UInt32 NumPasses_Finished; + + // UInt64 NumEncProgress; // for debug + // UInt64 NumDecProgress; // for debug + // CBenchInfo EncInfo; // for debug + + CTotalBenchRes2 Enc_BenchRes_1; + CTotalBenchRes2 Enc_BenchRes; + + CTotalBenchRes2 Dec_BenchRes_1; + CTotalBenchRes2 Dec_BenchRes; + + #ifdef PRINT_ITER_TIME + DWORD TotalTicks; + #endif + + int RatingVector_DeletedIndex; + // UInt64 RatingVector_NumDeleted; + + bool BenchWasFinished; // all passes were finished + bool NeedPrint_Freq; + bool NeedPrint_RatingVector; + bool NeedPrint_Enc_1; + bool NeedPrint_Enc; + bool NeedPrint_Dec_1; + bool NeedPrint_Dec; + bool NeedPrint_Tot; // intermediate Total was updated after current pass + + void Init(); +}; + + +void CSyncData::Init() +{ + NumPasses_Finished = 0; + + // NumEncProgress = 0; + // NumDecProgress = 0; + + Enc_BenchRes.Init(); + Enc_BenchRes_1.Init(); + Dec_BenchRes.Init(); + Dec_BenchRes_1.Init(); + + #ifdef PRINT_ITER_TIME + TotalTicks = 0; + #endif + + RatingVector_DeletedIndex = -1; + // RatingVector_NumDeleted = 0; + + BenchWasFinished = + NeedPrint_Freq = + NeedPrint_RatingVector = + NeedPrint_Enc_1 = + NeedPrint_Enc = + NeedPrint_Dec_1 = + NeedPrint_Dec = + NeedPrint_Tot = false; +}; + + +struct CBenchProgressSync +{ + bool Exit; // GUI asks BenchThread to Exit, and BenchThread reads that variable + UInt32 NumThreads; + UInt64 DictSize; + UInt32 NumPasses_Limit; + int Level; + + // must be written by benchmark thread, read by GUI thread */ + CSyncData sd; + CRecordVector RatingVector; + + NWindows::NSynchronization::CCriticalSection CS; + + AString Text; + bool TextWasChanged; + + /* BenchFinish_Task_HRESULT - for result from benchmark code + BenchFinish_Thread_HRESULT - for Exceptions and service errors + these arreos must be shown even if user escapes benchmark */ + + HRESULT BenchFinish_Task_HRESULT; + HRESULT BenchFinish_Thread_HRESULT; + + UInt32 NumFreqThreadsPrev; + UString FreqString_Sync; + UString FreqString_GUI; + + CBenchProgressSync() + { + NumPasses_Limit = 1; + } + + void Init(); + + void SendExit() + { + NWindows::NSynchronization::CCriticalSectionLock lock(CS); + Exit = true; + } +}; + + +void CBenchProgressSync::Init() +{ + Exit = false; + + BenchFinish_Task_HRESULT = S_OK; + BenchFinish_Thread_HRESULT = S_OK; + + sd.Init(); + RatingVector.Clear(); + + NumFreqThreadsPrev = 0; + FreqString_Sync.Empty(); + FreqString_GUI.Empty(); + + Text.Empty(); + TextWasChanged = true; +} + + + +struct CMyFont +{ + HFONT _font; + CMyFont(): _font(NULL) {} + ~CMyFont() + { + if (_font) + DeleteObject(_font); + } + void Create(const LOGFONT *lplf) + { + _font = CreateFontIndirect(lplf); + } +}; + + +class CBenchmarkDialog; + +struct CThreadBenchmark +{ + CBenchmarkDialog *BenchmarkDialog; + DECL_EXTERNAL_CODECS_LOC_VARS2; + // HRESULT Result; + + HRESULT Process(); + static THREAD_FUNC_DECL MyThreadFunction(void *param) + { + /* ((CThreadBenchmark *)param)->Result = */ + ((CThreadBenchmark *)param)->Process(); + return 0; + } +}; + + +class CBenchmarkDialog: + public NWindows::NControl::CModalDialog +{ + NWindows::NControl::CComboBox m_Dictionary; + NWindows::NControl::CComboBox m_NumThreads; + NWindows::NControl::CComboBox m_NumPasses; + NWindows::NControl::CEdit _consoleEdit; + UINT_PTR _timer; + + UInt32 _startTime; + UInt32 _finishTime; + bool _finishTime_WasSet; + + bool WasStopped_in_GUI; + bool ExitWasAsked_in_GUI; + bool NeedRestart; + + CMyFont _font; + + UInt64 RamSize; + UInt64 RamSize_Limit; + bool RamSize_Defined; + + UInt32 NumPasses_Finished_Prev; + + UString ElapsedSec_Prev; + + void InitSyncNew() + { + NumPasses_Finished_Prev = (UInt32)(Int32)-1; + ElapsedSec_Prev.Empty(); + Sync.Init(); + } + + virtual bool OnInit(); + virtual bool OnDestroy(); + virtual bool OnSize(WPARAM /* wParam */, int xSize, int ySize); + virtual bool OnMessage(UINT message, WPARAM wParam, LPARAM lParam); + virtual bool OnCommand(int code, int itemID, LPARAM lParam); + virtual void OnHelp(); + virtual void OnCancel(); + virtual bool OnTimer(WPARAM timerID, LPARAM callback); + virtual bool OnButtonClicked(int buttonID, HWND buttonHWND); + + void Disable_Stop_Button(); + void OnStopButton(); + void RestartBenchmark(); + void StartBenchmark(); + + void UpdateGui(); + + void PrintTime(); + void PrintRating(UInt64 rating, UINT controlID); + void PrintUsage(UInt64 usage, UINT controlID); + void PrintBenchRes(const CTotalBenchRes2 &info, const UINT ids[]); + + UInt32 GetNumberOfThreads(); + size_t OnChangeDictionary(); + + void SetItemText_Number(int itemID, UInt64 val, LPCTSTR post = NULL); + void Print_MemUsage(UString &s, UInt64 memUsage) const; + bool IsMemoryUsageOK(UInt64 memUsage) const + { return memUsage + (1 << 20) <= RamSize_Limit; } + + void MyKillTimer(); + + void SendExit_Status(const wchar_t *message) + { + SetItemText(IDT_BENCH_ERROR_MESSAGE, message); + Sync.SendExit(); + } + +public: + CBenchProgressSync Sync; + + bool TotalMode; + CObjectVector Props; + + CSysString Bench2Text; + + NWindows::CThread _thread; + CThreadBenchmark _threadBenchmark; + + CBenchmarkDialog(): + _timer(0), + TotalMode(false), + WasStopped_in_GUI(false), + ExitWasAsked_in_GUI(false), + NeedRestart(false) + {} + + ~CBenchmarkDialog(); + + bool PostMsg_Finish(LPARAM param) + { + if ((HWND)*this) + return PostMsg(k_Message_Finished, param); + // the (HWND)*this is NULL only for some internal code failure + return true; + } + + INT_PTR Create(HWND wndParent = 0) + { + BIG_DIALOG_SIZE(332, 228); + return CModalDialog::Create(TotalMode ? IDD_BENCH_TOTAL : SIZED_DIALOG(IDD_BENCH), wndParent); + } + void MessageBoxError(LPCWSTR message) + { + MessageBoxW(*this, message, L"7-Zip", MB_ICONERROR); + } + void MessageBoxError_Status(LPCWSTR message) + { + UString s ("ERROR: "); + s += message; + MessageBoxError(s); + SetItemText(IDT_BENCH_ERROR_MESSAGE, s); + } +}; + + + + + + -#ifdef LANG -#include "../FileManager/LangUtils.h" -#endif -using namespace NWindows; UString HResultToMessage(HRESULT errorCode); @@ -65,36 +414,48 @@ static const UInt32 kLangIDs_Colon[] = #endif static LPCTSTR const kProcessingString = TEXT("..."); +static LPCTSTR const kGB = TEXT(" GB"); static LPCTSTR const kMB = TEXT(" MB"); -static LPCTSTR const kMIPS = TEXT(" MIPS"); +static LPCTSTR const kKB = TEXT(" KB"); +// static LPCTSTR const kMIPS = TEXT(" MIPS"); static LPCTSTR const kKBs = TEXT(" KB/s"); -static const unsigned kMinDicLogSize = - #ifdef UNDER_CE - 20; - #else - 21; - #endif +static const unsigned kMinDicLogSize = 18; -static const UInt32 kMinDicSize = (1 << kMinDicLogSize); -static const UInt32 kMaxDicSize = +static const UInt32 kMinDicSize = (UInt32)1 << kMinDicLogSize; +static const size_t kMaxDicSize = (size_t)1 << (22 + sizeof(size_t) / 4 * 5); +// static const size_t kMaxDicSize = (size_t)1 << 16; + /* #ifdef MY_CPU_64BIT - (1 << 30); + (UInt32)(Int32)-1; // we can use it, if we want 4 GB buffer + // (UInt32)15 << 28; #else - (1 << 27); + (UInt32)1 << 27; #endif + */ + + +static int ComboBox_Add_UInt32(NWindows::NControl::CComboBox &cb, UInt32 v) +{ + TCHAR s[16]; + ConvertUInt32ToString(v, s); + int index = (int)cb.AddString(s); + cb.SetItemData(index, v); + return index; +} + bool CBenchmarkDialog::OnInit() { #ifdef LANG LangSetWindowText(*this, IDD_BENCH); LangSetDlgItems(*this, kLangIDs, ARRAY_SIZE(kLangIDs)); - LangSetDlgItems_Colon(*this, kLangIDs_Colon, ARRAY_SIZE(kLangIDs_Colon)); + // LangSetDlgItems_Colon(*this, kLangIDs_Colon, ARRAY_SIZE(kLangIDs_Colon)); LangSetDlgItemText(*this, IDT_BENCH_CURRENT2, IDT_BENCH_CURRENT); LangSetDlgItemText(*this, IDT_BENCH_RESULTING2, IDT_BENCH_RESULTING); #endif - Sync.Init(); + InitSyncNew(); if (TotalMode) { @@ -120,154 +481,194 @@ bool CBenchmarkDialog::OnInit() UInt32 numCPUs = 1; { - UString s ("/ "); + AString s ("/ "); NSystem::CProcessAffinity threadsInfo; threadsInfo.InitST(); #ifndef _7ZIP_ST - if (threadsInfo.Get() && threadsInfo.processAffinityMask != 0) numCPUs = threadsInfo.GetNumProcessThreads(); else numCPUs = NSystem::GetNumberOfProcessors(); - #endif s.Add_UInt32(numCPUs); s += GetProcessThreadsInfo(threadsInfo); - SetItemText(IDT_BENCH_HARDWARE_THREADS, s); - } - - { - UString s; - { - AString s1, s2; - GetSysInfo(s1, s2); - s = s1; - SetItemText(IDT_BENCH_SYS1, s); - if (s1 != s2 && !s2.IsEmpty()) - { - s = s2; - SetItemText(IDT_BENCH_SYS2, s); - } - } - /* + SetItemTextA(IDT_BENCH_HARDWARE_THREADS, s); + { - GetVersionString(s); - SetItemText(IDT_BENCH_SYSTEM, s); + AString s2; + GetSysInfo(s, s2); + SetItemTextA(IDT_BENCH_SYS1, s); + if (s != s2 && !s2.IsEmpty()) + SetItemTextA(IDT_BENCH_SYS2, s2); } - */ { - AString s2; - GetCpuName(s2); - s = s2; - SetItemText(IDT_BENCH_CPU, s); + GetCpuName_MultiLine(s); + SetItemTextA(IDT_BENCH_CPU, s); } { - AString s2; - AddCpuFeatures(s2); - s = s2; - SetItemText(IDT_BENCH_CPU_FEATURE, s); + GetOsInfoText(s); + s += " : "; + AddCpuFeatures(s); + SetItemTextA(IDT_BENCH_CPU_FEATURE, s); } s = "7-Zip " MY_VERSION_CPU; - SetItemText(IDT_BENCH_VER, s); + SetItemTextA(IDT_BENCH_VER, s); } + // ----- Num Threads ---------- + if (numCPUs < 1) numCPUs = 1; - numCPUs = MyMin(numCPUs, (UInt32)(1 << 8)); + numCPUs = MyMin(numCPUs, (UInt32)(1 << 6)); // it's WIN32 limit + + UInt32 numThreads = Sync.NumThreads; + + if (numThreads == (UInt32)(Int32)-1) + numThreads = numCPUs; + if (numThreads > 1) + numThreads &= ~1; + const UInt32 kNumThreadsMax = (1 << 12); + if (numThreads > kNumThreadsMax) + numThreads = kNumThreadsMax; - if (Sync.NumThreads == (UInt32)(Int32)-1) - { - Sync.NumThreads = numCPUs; - if (Sync.NumThreads > 1) - Sync.NumThreads &= ~1; - } m_NumThreads.Attach(GetItem(IDC_BENCH_NUM_THREADS)); + const UInt32 numTheads_Combo = numCPUs * 2; + UInt32 v = 1; int cur = 0; - for (UInt32 num = 1; num <= numCPUs * 2;) + for (; v <= numTheads_Combo;) { - TCHAR s[16]; - ConvertUInt32ToString(num, s); - int index = (int)m_NumThreads.AddString(s); - m_NumThreads.SetItemData(index, num); - if (num <= Sync.NumThreads) + int index = ComboBox_Add_UInt32(m_NumThreads, v); + const UInt32 vNext = v + (v < 2 ? 1 : 2); + if (v <= numThreads) + if (numThreads < vNext || vNext > numTheads_Combo) + { + if (v != numThreads) + index = ComboBox_Add_UInt32(m_NumThreads, numThreads); cur = index; - if (num > 1) - num++; - num++; + } + v = vNext; } m_NumThreads.SetCurSel(cur); Sync.NumThreads = GetNumberOfThreads(); + + // ----- Dictionary ---------- + m_Dictionary.Attach(GetItem(IDC_BENCH_DICTIONARY)); - cur = 0; - ramSize = (UInt64)(sizeof(size_t)) << 29; - ramSize_Defined = NSystem::GetRamSize(ramSize); + RamSize = (UInt64)(sizeof(size_t)) << 29; + RamSize_Defined = NSystem::GetRamSize(RamSize); + #ifdef UNDER_CE const UInt32 kNormalizedCeSize = (16 << 20); - if (ramSize > kNormalizedCeSize && ramSize < (33 << 20)) - ramSize = kNormalizedCeSize; + if (RamSize > kNormalizedCeSize && RamSize < (33 << 20)) + RamSize = kNormalizedCeSize; #endif + RamSize_Limit = RamSize / 16 * 15; - if (Sync.DictionarySize == (UInt32)(Int32)-1) + if (Sync.DictSize == (UInt64)(Int64)-1) { unsigned dicSizeLog = 25; - #ifdef UNDER_CE dicSizeLog = 20; #endif - - if (ramSize_Defined) + if (RamSize_Defined) for (; dicSizeLog > kBenchMinDicLogSize; dicSizeLog--) - if (GetBenchMemoryUsage(Sync.NumThreads, ((UInt32)1 << dicSizeLog)) + (8 << 20) <= ramSize) + if (IsMemoryUsageOK(GetBenchMemoryUsage( + Sync.NumThreads, Sync.Level, (UInt64)1 << dicSizeLog, TotalMode))) break; - Sync.DictionarySize = (1 << dicSizeLog); + Sync.DictSize = (UInt64)1 << dicSizeLog; } - if (Sync.DictionarySize < kMinDicSize) Sync.DictionarySize = kMinDicSize; - if (Sync.DictionarySize > kMaxDicSize) Sync.DictionarySize = kMaxDicSize; + if (Sync.DictSize < kMinDicSize) Sync.DictSize = kMinDicSize; + if (Sync.DictSize > kMaxDicSize) Sync.DictSize = kMaxDicSize; - for (unsigned i = kMinDicLogSize; i <= 30; i++) - for (unsigned j = 0; j < 2; j++) - { - UInt32 dict = ((UInt32)1 << i) + ((UInt32)j << (i - 1)); - if (dict > kMaxDicSize) - continue; + cur = 0; + for (unsigned i = (kMinDicLogSize - 1) * 2; i <= (32 - 1) * 2; i++) + { + const size_t dict = (size_t)(2 + (i & 1)) << (i / 2); + // if (i == (32 - 1) * 2) dict = kMaxDicSize; TCHAR s[32]; - ConvertUInt32ToString((dict >> 20), s); - lstrcat(s, kMB); - int index = (int)m_Dictionary.AddString(s); + const TCHAR *post; + UInt32 d; + if (dict >= ((UInt32)1 << 31)) { d = (UInt32)(dict >> 30); post = kGB; } + else if (dict >= ((UInt32)1 << 21)) { d = (UInt32)(dict >> 20); post = kMB; } + else { d = (UInt32)(dict >> 10); post = kKB; } + ConvertUInt32ToString(d, s); + lstrcat(s, post); + const int index = (int)m_Dictionary.AddString(s); m_Dictionary.SetItemData(index, dict); - if (dict <= Sync.DictionarySize) + if (dict <= Sync.DictSize) cur = index; + if (dict >= kMaxDicSize) + break; } m_Dictionary.SetCurSel(cur); - OnChangeSettings(); - Sync._startEvent.Set(); - _timer = SetTimer(kTimerID, kTimerElapse); + // ----- Num Passes ---------- + + m_NumPasses.Attach(GetItem(IDC_BENCH_NUM_PASSES)); + cur = 0; + v = 1; + for (;;) + { + int index = ComboBox_Add_UInt32(m_NumPasses, v); + const bool isLast = (v >= 10000000); + UInt32 vNext = v * 10; + if (v < 2) vNext = 2; + else if (v < 5) vNext = 5; + else if (v < 10) vNext = 10; + + if (v <= Sync.NumPasses_Limit) + if (isLast || Sync.NumPasses_Limit < vNext) + { + if (v != Sync.NumPasses_Limit) + index = ComboBox_Add_UInt32(m_NumPasses, Sync.NumPasses_Limit); + cur = index; + } + v = vNext; + if (isLast) + break; + } + m_NumPasses.SetCurSel(cur); if (TotalMode) NormalizeSize(true); else NormalizePosition(); + + RestartBenchmark(); + return CModalDialog::OnInit(); } + bool CBenchmarkDialog::OnSize(WPARAM /* wParam */, int xSize, int ySize) { - if (!TotalMode) - return false; int mx, my; GetMargins(8, mx, my); + + if (!TotalMode) + { + RECT rect; + GetClientRectOfItem(IDT_BENCH_LOG, rect); + int x = xSize - rect.left - mx; + int y = ySize - rect.top - my; + if (x < 0) x = 0; + if (y < 0) y = 0; + MoveItem(IDT_BENCH_LOG, rect.left, rect.top, x, y, true); + return false; + } + int bx1, bx2, by; + GetItemSizes(IDCANCEL, bx1, by); GetItemSizes(IDHELP, bx2, by); @@ -299,12 +700,28 @@ bool CBenchmarkDialog::OnSize(WPARAM /* wParam */, int xSize, int ySize) return false; } + UInt32 CBenchmarkDialog::GetNumberOfThreads() { return (UInt32)m_NumThreads.GetItemData_of_CurSel(); } +#define UINT_TO_STR_3(s, val) { \ + s[0] = (wchar_t)('0' + (val) / 100); \ + s[1] = (wchar_t)('0' + (val) % 100 / 10); \ + s[2] = (wchar_t)('0' + (val) % 10); \ + s[3] = 0; } + +static void NumberToDot3(UInt64 val, WCHAR *s) +{ + ConvertUInt64ToString(val / 1000, s); + const UInt32 rem = (UInt32)(val % 1000); + s += MyStringLen(s); + *s++ = '.'; + UINT_TO_STR_3(s, rem); +} + void CBenchmarkDialog::SetItemText_Number(int itemID, UInt64 val, LPCTSTR post) { TCHAR s[64]; @@ -314,7 +731,7 @@ void CBenchmarkDialog::SetItemText_Number(int itemID, UInt64 val, LPCTSTR post) SetItemText(itemID, s); } -static void PrintSize_MB(UString &s, UInt64 size) +static void AddSize_MB(UString &s, UInt64 size) { char temp[32]; ConvertUInt64ToString((size + (1 << 20) - 1) >> 20, temp); @@ -322,25 +739,36 @@ static void PrintSize_MB(UString &s, UInt64 size) s += kMB; } - -UInt32 CBenchmarkDialog::OnChangeDictionary() +void CBenchmarkDialog::Print_MemUsage(UString &s, UInt64 memUsage) const { - const UInt32 dict = (UInt32)m_Dictionary.GetItemData_of_CurSel(); - const UInt64 memUsage = GetBenchMemoryUsage(GetNumberOfThreads(), dict); - - UString s; - PrintSize_MB(s, memUsage); - if (ramSize_Defined) + AddSize_MB(s, memUsage); + if (RamSize_Defined) { s += " / "; - PrintSize_MB(s, ramSize); + AddSize_MB(s, RamSize); } +} + +size_t CBenchmarkDialog::OnChangeDictionary() +{ + const size_t dict = (size_t)m_Dictionary.GetItemData_of_CurSel(); + const UInt64 memUsage = GetBenchMemoryUsage(GetNumberOfThreads(), + Sync.Level, + dict, + false); // totalBench mode + + UString s; + Print_MemUsage(s, memUsage); #ifdef _7ZIP_LARGE_PAGES { AString s2; Add_LargePages_String(s2); - s += s2; + if (!s2.IsEmpty()) + { + s.Add_Space(); + s += s2; + } } #endif @@ -349,8 +777,11 @@ UInt32 CBenchmarkDialog::OnChangeDictionary() return dict; } + static const UInt32 g_IDs[] = { + IDT_BENCH_COMPRESS_SIZE1, + IDT_BENCH_COMPRESS_SIZE2, IDT_BENCH_COMPRESS_USAGE1, IDT_BENCH_COMPRESS_USAGE2, IDT_BENCH_COMPRESS_SPEED1, @@ -360,6 +791,8 @@ static const UInt32 g_IDs[] = IDT_BENCH_COMPRESS_RPU1, IDT_BENCH_COMPRESS_RPU2, + IDT_BENCH_DECOMPR_SIZE1, + IDT_BENCH_DECOMPR_SIZE2, IDT_BENCH_DECOMPR_SPEED1, IDT_BENCH_DECOMPR_SPEED2, IDT_BENCH_DECOMPR_RATING1, @@ -372,108 +805,456 @@ static const UInt32 g_IDs[] = IDT_BENCH_TOTAL_USAGE_VAL, IDT_BENCH_TOTAL_RATING_VAL, IDT_BENCH_TOTAL_RPU_VAL +}; + + +static const unsigned k_Ids_Enc_1[] = { + IDT_BENCH_COMPRESS_USAGE1, + IDT_BENCH_COMPRESS_SPEED1, + IDT_BENCH_COMPRESS_RPU1, + IDT_BENCH_COMPRESS_RATING1, + IDT_BENCH_COMPRESS_SIZE1 }; + +static const unsigned k_Ids_Enc[] = { + IDT_BENCH_COMPRESS_USAGE2, + IDT_BENCH_COMPRESS_SPEED2, + IDT_BENCH_COMPRESS_RPU2, + IDT_BENCH_COMPRESS_RATING2, + IDT_BENCH_COMPRESS_SIZE2 }; + +static const unsigned k_Ids_Dec_1[] = { + IDT_BENCH_DECOMPR_USAGE1, + IDT_BENCH_DECOMPR_SPEED1, + IDT_BENCH_DECOMPR_RPU1, + IDT_BENCH_DECOMPR_RATING1, + IDT_BENCH_DECOMPR_SIZE1 }; + +static const unsigned k_Ids_Dec[] = { + IDT_BENCH_DECOMPR_USAGE2, + IDT_BENCH_DECOMPR_SPEED2, + IDT_BENCH_DECOMPR_RPU2, + IDT_BENCH_DECOMPR_RATING2, + IDT_BENCH_DECOMPR_SIZE2 }; + +static const unsigned k_Ids_Tot[] = { + IDT_BENCH_TOTAL_USAGE_VAL, + 0, + IDT_BENCH_TOTAL_RPU_VAL, + IDT_BENCH_TOTAL_RATING_VAL, + 0 }; + + +void CBenchmarkDialog::MyKillTimer() +{ + if (_timer != 0) + { + KillTimer(kTimerID); + _timer = 0; + } +} + + +bool CBenchmarkDialog::OnDestroy() +{ + /* actually timer was removed before. + also the timer must be removed by Windows, when window will be removed. */ + MyKillTimer(); // it's optional code + return false; // we return (false) to perform default dialog operation +} + +void SetErrorMessage_MemUsage(UString &s, UInt64 reqSize, UInt64 ramSize, UInt64 ramLimit, const UString &usageString); + +void CBenchmarkDialog::StartBenchmark() +{ + NeedRestart = false; + WasStopped_in_GUI = false; + + SetItemText_Empty(IDT_BENCH_ERROR_MESSAGE); + + MyKillTimer(); // optional code. timer was killed before + + const size_t dict = OnChangeDictionary(); + const UInt32 numThreads = GetNumberOfThreads(); + const UInt32 numPasses = (UInt32)m_NumPasses.GetItemData_of_CurSel(); + + for (unsigned i = 0; i < ARRAY_SIZE(g_IDs); i++) + SetItemText(g_IDs[i], kProcessingString); + + SetItemText_Empty(IDT_BENCH_LOG); + SetItemText_Empty(IDT_BENCH_ELAPSED_VAL); + SetItemText_Empty(IDT_BENCH_ERROR_MESSAGE); + + const UInt64 memUsage = GetBenchMemoryUsage(numThreads, Sync.Level, dict, + false); // totalBench + if (!IsMemoryUsageOK(memUsage)) + { + UString s2 = LangString(IDT_BENCH_MEMORY); + if (s2.IsEmpty()) + GetItemText(IDT_BENCH_MEMORY, s2); + UString s; + SetErrorMessage_MemUsage(s, memUsage, RamSize, RamSize_Limit, s2); + MessageBoxError_Status(s); + return; + } + + EnableItem(IDB_STOP, true); + + _startTime = GetTickCount(); + _finishTime = _startTime; + _finishTime_WasSet = false; + + { + NWindows::NSynchronization::CCriticalSectionLock lock(Sync.CS); + InitSyncNew(); + Sync.DictSize = dict; + Sync.NumThreads = numThreads; + Sync.NumPasses_Limit = numPasses; + } + + PrintTime(); + + _timer = SetTimer(kTimerID, kTimerElapse); + if (_thread.Create(CThreadBenchmark::MyThreadFunction, &_threadBenchmark) != 0) + { + MyKillTimer(); + MessageBoxError_Status(L"Can't create thread"); + }; + return; +} + + +void CBenchmarkDialog::RestartBenchmark() +{ + if (ExitWasAsked_in_GUI) + return; + + if (_thread.IsCreated()) + { + NeedRestart = true; + SendExit_Status(L"Stop for restart ..."); + } + else + StartBenchmark(); +} + + +void CBenchmarkDialog::Disable_Stop_Button() +{ + // if we disable focused button, then focus will be lost + if (GetFocus() == GetItem(IDB_STOP)) + { + // SendMsg_NextDlgCtl_Prev(); + SendMsg_NextDlgCtl_CtlId(IDB_RESTART); + } + EnableItem(IDB_STOP, false); +} + + +void CBenchmarkDialog::OnStopButton() +{ + if (ExitWasAsked_in_GUI) + return; + + Disable_Stop_Button(); + + WasStopped_in_GUI = true; + if (_thread.IsCreated()) + { + SendExit_Status(L"Stop ..."); + } +} + + + +void CBenchmarkDialog::OnCancel() +{ + ExitWasAsked_in_GUI = true; + + /* + SendMsg_NextDlgCtl_Prev(); + EnableItem(IDCANCEL, false); + */ + + if (_thread.IsCreated()) + SendExit_Status(L"Cancel ..."); + else + CModalDialog::OnCancel(); +} + + +void CBenchmarkDialog::OnHelp() +{ + ShowHelpWindow(kHelpTopic); +} + + + +// void GetTimeString(UInt64 timeValue, wchar_t *s); + +void CBenchmarkDialog::PrintTime() +{ + const UInt32 curTime = + _finishTime_WasSet ? + _finishTime : + ::GetTickCount(); + + const UInt32 elapsedTime = (curTime - _startTime); + + WCHAR s[64]; + + // GetTimeString(elapsedTime / 1000, s); + ConvertUInt32ToString(elapsedTime / 1000, s); + + if (_finishTime_WasSet) + { + WCHAR *p = s + MyStringLen(s); + *p++ = '.'; + UINT_TO_STR_3(p, elapsedTime % 1000); + } + + // NumberToDot3((UInt64)elapsedTime, s); + + wcscat(s, L" s"); + + // if (WasStopped_in_GUI) wcscat(s, L" X"); // for debug + + if (s == ElapsedSec_Prev) + return; + + ElapsedSec_Prev = s; + + // static cnt = 0; cnt++; wcscat(s, L" "); + // UString s2; s2.Add_UInt32(cnt); wcscat(s, s2.Ptr()); + + SetItemText(IDT_BENCH_ELAPSED_VAL, s); +} - // IDT_BENCH_FREQ_CUR, - // IDT_BENCH_FREQ_RES -}; - -void CBenchmarkDialog::OnChangeSettings() + +static UInt64 GetMips(UInt64 ips) { - EnableItem(IDB_STOP, true); - UInt32 dict = OnChangeDictionary(); - - for (unsigned i = 0; i < ARRAY_SIZE(g_IDs); i++) - SetItemText(g_IDs[i], kProcessingString); - _startTime = GetTickCount(); - PrintTime(); - NWindows::NSynchronization::CCriticalSectionLock lock(Sync.CS); - Sync.Init(); - Sync.DictionarySize = dict; - Sync.Changed = true; - Sync.NumThreads = GetNumberOfThreads(); + return (ips + 500000) / 1000000; } -void CBenchmarkDialog::OnRestartButton() + +static UInt64 GetUsagePercents(UInt64 usage) { - OnChangeSettings(); + return Benchmark_GetUsage_Percents(usage); } -void CBenchmarkDialog::OnStopButton() + +static UInt32 GetRating(const CTotalBenchRes &info) { - EnableItem(IDB_STOP, false); - Sync.Pause(); -} + UInt64 numIter = info.NumIterations2; + if (numIter == 0) + numIter = 1000000; + const UInt64 rating64 = GetMips(info.Rating / numIter); + // return rating64; + UInt32 rating32 = (UInt32)rating64; + if (rating32 != rating64) + rating32 = (UInt32)(Int32)-1; + return rating32; +}; -void CBenchmarkDialog::OnHelp() + +static void AddUsageString(UString &s, const CTotalBenchRes &info) { - ShowHelpWindow(kHelpTopic); + UInt64 numIter = info.NumIterations2; + if (numIter == 0) + numIter = 1000000; + UInt64 usage = GetUsagePercents(info.Usage / numIter); + + wchar_t w[64]; + ConvertUInt64ToString(usage, w); + unsigned len = MyStringLen(w); + while (len < 5) + { + s.Add_Space(); + len++; + } + s += w; + s += "%"; } -void CBenchmarkDialog::OnCancel() + +static void Add_Dot3String(UString &s, UInt64 val) { - Sync.Stop(); - KillTimer(_timer); - CModalDialog::OnCancel(); + WCHAR temp[32]; + NumberToDot3(val, temp); + s += temp; } -void GetTimeString(UInt64 timeValue, wchar_t *s); -void CBenchmarkDialog::PrintTime() +static void AddRatingString(UString &s, const CTotalBenchRes &info) { - UInt32 curTime = ::GetTickCount(); - UInt32 elapsedTime = (curTime - _startTime); - UInt32 elapsedSec = elapsedTime / 1000; - if (elapsedSec != 0 && Sync.WasPaused()) - return; - WCHAR s[40]; - GetTimeString(elapsedSec, s); - SetItemText(IDT_BENCH_ELAPSED_VAL, s); + // AddUsageString(s, info); + // s += " "; + // s.Add_UInt32(GetRating(info)); + Add_Dot3String(s, GetRating(info)); +}; + + +static void AddRatingsLine(UString &s, const CTotalBenchRes &enc, const CTotalBenchRes &dec + #ifdef PRINT_ITER_TIME + , DWORD ticks + #endif + ) +{ + // AddUsageString(s, enc); s += " "; + + AddRatingString(s, enc); + s += " "; + AddRatingString(s, dec); + + CTotalBenchRes tot_BenchRes; + tot_BenchRes.SetSum(enc, dec); + + s += " "; + AddRatingString(s, tot_BenchRes); + + s += " "; AddUsageString(s, tot_BenchRes); + + + #ifdef PRINT_ITER_TIME + s += " "; + { + Add_Dot3String(s, ticks; + s += " s"; + // s.Add_UInt32(ticks); s += " ms"; + } + #endif } + void CBenchmarkDialog::PrintRating(UInt64 rating, UINT controlID) { - SetItemText_Number(controlID, rating / 1000000, kMIPS); + // SetItemText_Number(controlID, GetMips(rating), kMIPS); + WCHAR s[64]; + NumberToDot3(GetMips(rating), s); + MyStringCat(s, L" GIPS"); + SetItemText(controlID, s); } void CBenchmarkDialog::PrintUsage(UInt64 usage, UINT controlID) { - SetItemText_Number(controlID, (usage + 5000) / 10000, TEXT("%")); + SetItemText_Number(controlID, GetUsagePercents(usage), TEXT("%")); } -void CBenchmarkDialog::PrintResults( - UInt32 dictionarySize, - const CBenchInfo2 &info, - UINT usageID, UINT speedID, UINT rpuID, UINT ratingID, - bool decompressMode) + +// void SetItemText_Number + +void CBenchmarkDialog::PrintBenchRes( + const CTotalBenchRes2 &info, + const UINT ids[]) { - if (info.GlobalTime == 0) + if (info.NumIterations2 == 0) return; - + if (ids[1] != 0) + SetItemText_Number(ids[1], (info.Speed >> 10) / info.NumIterations2, kKBs); + PrintRating(info.Rating / info.NumIterations2, ids[3]); + PrintRating(info.RPU / info.NumIterations2, ids[2]); + PrintUsage(info.Usage / info.NumIterations2, ids[0]); + if (ids[4] != 0) { - const UInt64 speed = info.UnpackSize * info.NumIterations * info.GlobalFreq / info.GlobalTime; - SetItemText_Number(speedID, speed >> 10, kKBs); + UInt64 val = info.UnpackSize; + LPCTSTR kPostfix; + if (val >= ((UInt64)1 << 40)) + { + kPostfix = kGB; + val >>= 30; + } + else + { + kPostfix = kMB; + val >>= 20; + } + SetItemText_Number(ids[4], val, kPostfix); } - UInt64 rating; - if (decompressMode) - rating = info.GetDecompressRating(); - else - rating = info.GetCompressRating(dictionarySize); - - PrintRating(rating, ratingID); - PrintRating(info.GetRatingPerUsage(rating), rpuID); - PrintUsage(info.GetUsage(), usageID); } -bool CBenchmarkDialog::OnTimer(WPARAM /* timerID */, LPARAM /* callback */) + +// static UInt32 k_Message_Finished_cnt = 0; +// static UInt32 k_OnTimer_cnt = 0; + +bool CBenchmarkDialog::OnMessage(UINT message, WPARAM wParam, LPARAM lParam) { - bool printTime = true; - if (TotalMode) + if (message != k_Message_Finished) + return CModalDialog::OnMessage(message, wParam, lParam); + { - if (Sync.WasStopped()) - printTime = false; + if (wParam == k_Msg_WPARM_Thread_Finished) + { + _finishTime = GetTickCount(); + _finishTime_WasSet = true; + MyKillTimer(); + + if (_thread.Wait_Close() != 0) + { + MessageBoxError_Status(L"Thread Wait Error"); + } + + if (!WasStopped_in_GUI) + { + WasStopped_in_GUI = true; + Disable_Stop_Button(); + } + + HRESULT res = Sync.BenchFinish_Thread_HRESULT; + if (res != S_OK) + // if (!ExitWasAsked_in_GUI || res != E_ABORT) + MessageBoxError_Status(HResultToMessage(res)); + + if (ExitWasAsked_in_GUI) + { + // SetItemText(IDT_BENCH_ERROR_MESSAGE, "before CModalDialog::OnCancel()"); + // Sleep (2000); + // MessageBoxError(L"test"); + CModalDialog::OnCancel(); + return true; + } + + SetItemText_Empty(IDT_BENCH_ERROR_MESSAGE); + + res = Sync.BenchFinish_Task_HRESULT; + if (res != S_OK) + { + if (!WasStopped_in_GUI || res != E_ABORT) + { + UString m; + if (res == S_FALSE) + m = "Decoding error"; + else if (res == CLASS_E_CLASSNOTAVAILABLE) + m = "Can't find 7z.dll"; + else + m = HResultToMessage(res); + MessageBoxError_Status(m); + } + } + + if (NeedRestart) + { + StartBenchmark(); + return true; + } + } + // k_Message_Finished_cnt++; + UpdateGui(); + return true; } - if (printTime) - PrintTime(); +} + + +bool CBenchmarkDialog::OnTimer(WPARAM timerID, LPARAM /* callback */) +{ + // k_OnTimer_cnt++; + if (timerID == kTimerID) + UpdateGui(); + return true; +} + + +void CBenchmarkDialog::UpdateGui() +{ + PrintTime(); if (TotalMode) { @@ -491,103 +1272,147 @@ bool CBenchmarkDialog::OnTimer(WPARAM /* timerID */, LPARAM /* callback */) } if (wasChanged) _consoleEdit.SetText(Bench2Text); - return true; + return; } - SetItemText_Number(IDT_BENCH_SIZE_VAL, (Sync.ProcessedSize >> 20), kMB); + CSyncData sd; + CRecordVector RatingVector; - SetItemText_Number(IDT_BENCH_PASSES_VAL, Sync.NumPasses); + { + NWindows::NSynchronization::CCriticalSectionLock lock(Sync.CS); + sd = Sync.sd; - /* - if (Sync.FirstPath) - SetItemText_Number(IDT_BENCH_FREQ_CUR, Sync.Freq, TEXT(" MHz")); - else - SetItemText_Number(IDT_BENCH_FREQ_RES, Sync.Freq, TEXT(" MHz")); - */ + if (sd.NeedPrint_RatingVector) + RatingVector = Sync.RatingVector; + + if (sd.NeedPrint_Freq) + { + Sync.FreqString_GUI = Sync.FreqString_Sync; + sd.NeedPrint_RatingVector = true; + } - /* - if (Sync.FreqWasChanged) - { - SetItemText(IDT_BENCH_FREQ, Sync.Freq); - Sync.FreqWasChanged = false; + Sync.sd.NeedPrint_RatingVector = false; + Sync.sd.NeedPrint_Enc_1 = false; + Sync.sd.NeedPrint_Enc = false; + Sync.sd.NeedPrint_Dec_1 = false; + Sync.sd.NeedPrint_Dec = false; + Sync.sd.NeedPrint_Tot = false; + Sync.sd.NeedPrint_Freq = false; } - */ + if (sd.NumPasses_Finished != NumPasses_Finished_Prev) { - UInt32 dicSizeTemp = (UInt32)MyMax(Sync.ProcessedSize, UInt64(1) << 20); - dicSizeTemp = MyMin(dicSizeTemp, Sync.DictionarySize); - PrintResults(dicSizeTemp, - Sync.CompressingInfoTemp, - IDT_BENCH_COMPRESS_USAGE1, - IDT_BENCH_COMPRESS_SPEED1, - IDT_BENCH_COMPRESS_RPU1, - IDT_BENCH_COMPRESS_RATING1); + SetItemText_Number(IDT_BENCH_PASSES_VAL, sd.NumPasses_Finished, TEXT(" /")); + NumPasses_Finished_Prev = sd.NumPasses_Finished; } + if (sd.NeedPrint_Enc_1) PrintBenchRes(sd.Enc_BenchRes_1, k_Ids_Enc_1); + if (sd.NeedPrint_Enc) PrintBenchRes(sd.Enc_BenchRes, k_Ids_Enc); + if (sd.NeedPrint_Dec_1) PrintBenchRes(sd.Dec_BenchRes_1, k_Ids_Dec_1); + if (sd.NeedPrint_Dec) PrintBenchRes(sd.Dec_BenchRes, k_Ids_Dec); + + if (sd.BenchWasFinished && sd.NeedPrint_Tot) { - PrintResults( - Sync.DictionarySize, - Sync.CompressingInfo, - IDT_BENCH_COMPRESS_USAGE2, - IDT_BENCH_COMPRESS_SPEED2, - IDT_BENCH_COMPRESS_RPU2, - IDT_BENCH_COMPRESS_RATING2); + CTotalBenchRes2 tot_BenchRes = sd.Enc_BenchRes; + tot_BenchRes.Update_With_Res2(sd.Dec_BenchRes); + PrintBenchRes(tot_BenchRes, k_Ids_Tot); } + + if (sd.NeedPrint_RatingVector) + // for (unsigned k = 0; k < 1; k++) { - PrintResults( - Sync.DictionarySize, - Sync.DecompressingInfoTemp, - IDT_BENCH_DECOMPR_USAGE1, - IDT_BENCH_DECOMPR_SPEED1, - IDT_BENCH_DECOMPR_RPU1, - IDT_BENCH_DECOMPR_RATING1, - true); - } - { - PrintResults( - Sync.DictionarySize, - Sync.DecompressingInfo, - IDT_BENCH_DECOMPR_USAGE2, - IDT_BENCH_DECOMPR_SPEED2, - IDT_BENCH_DECOMPR_RPU2, - IDT_BENCH_DECOMPR_RATING2, - true); - if (Sync.DecompressingInfo.GlobalTime > 0 && - Sync.CompressingInfo.GlobalTime > 0) + UString s; + s += Sync.FreqString_GUI; + if (!RatingVector.IsEmpty()) + { + if (!s.IsEmpty()) + s.Add_LF(); + s += "Compr Decompr Total CPU" + #ifdef PRINT_ITER_TIME + " Time" + #endif + ; + s.Add_LF(); + } + // s += "GIPS GIPS GIPS % s"; s.Add_LF(); + for (unsigned i = 0; i < RatingVector.Size(); i++) + { + if (i != 0) + s.Add_LF(); + if ((int)i == sd.RatingVector_DeletedIndex) + { + s += "..."; + s.Add_LF(); + } + const CBenchPassResult &pair = RatingVector[i]; + /* + s += "g:"; s.Add_UInt32((UInt32)pair.EncInfo.GlobalTime); + s += " u:"; s.Add_UInt32((UInt32)pair.EncInfo.UserTime); + s += " "; + */ + AddRatingsLine(s, pair.Enc, pair.Dec + #ifdef PRINT_ITER_TIME + , pair.Ticks + #endif + ); + /* + { + UInt64 v = i + 1; + if (sd.RatingVector_DeletedIndex >= 0 && i >= (unsigned)sd.RatingVector_DeletedIndex) + v += sd.RatingVector_NumDeleted; + char temp[64]; + ConvertUInt64ToString(v, temp); + s += " : "; + s += temp; + } + */ + } + + if (sd.BenchWasFinished) { - UInt64 comprRating = Sync.CompressingInfo.GetCompressRating(Sync.DictionarySize); - UInt64 decomprRating = Sync.DecompressingInfo.GetDecompressRating(); - PrintRating((comprRating + decomprRating) / 2, IDT_BENCH_TOTAL_RATING_VAL); - PrintRating(( - Sync.CompressingInfo.GetRatingPerUsage(comprRating) + - Sync.DecompressingInfo.GetRatingPerUsage(decomprRating)) / 2, IDT_BENCH_TOTAL_RPU_VAL); - PrintUsage( - (Sync.CompressingInfo.GetUsage() + - Sync.DecompressingInfo.GetUsage()) / 2, IDT_BENCH_TOTAL_USAGE_VAL); + s.Add_LF(); + s += "-------------"; + s.Add_LF(); + { + // average time is not correct because of freq detection in first iteration + AddRatingsLine(s, sd.Enc_BenchRes, sd.Dec_BenchRes + #ifdef PRINT_ITER_TIME + , (DWORD)(sd.TotalTicks / (sd.NumPasses_Finished ? sd.NumPasses_Finished : 1)) + #endif + ); + } } + // s.Add_LF(); s += "OnTimer: "; s.Add_UInt32(k_OnTimer_cnt); + // s.Add_LF(); s += "finished Message: "; s.Add_UInt32(k_Message_Finished_cnt); + // static cnt = 0; cnt++; s.Add_LF(); s += "Print: "; s.Add_UInt32(cnt); + // s.Add_LF(); s += "NumEncProgress: "; s.Add_UInt32((UInt32)sd.NumEncProgress); + // s.Add_LF(); s += "NumDecProgress: "; s.Add_UInt32((UInt32)sd.NumDecProgress); + SetItemText(IDT_BENCH_LOG, s); } - return true; } + bool CBenchmarkDialog::OnCommand(int code, int itemID, LPARAM lParam) { if (code == CBN_SELCHANGE && (itemID == IDC_BENCH_DICTIONARY || + itemID == IDC_BENCH_NUM_PASSES || itemID == IDC_BENCH_NUM_THREADS)) { - OnChangeSettings(); + RestartBenchmark(); return true; } return CModalDialog::OnCommand(code, itemID, lParam); } + bool CBenchmarkDialog::OnButtonClicked(int buttonID, HWND buttonHWND) { switch (buttonID) { case IDB_RESTART: - OnRestartButton(); + RestartBenchmark(); return true; case IDB_STOP: OnStopButton(); @@ -596,87 +1421,81 @@ bool CBenchmarkDialog::OnButtonClicked(int buttonID, HWND buttonHWND) return CModalDialog::OnButtonClicked(buttonID, buttonHWND); } -struct CThreadBenchmark -{ - CBenchmarkDialog *BenchmarkDialog; - DECL_EXTERNAL_CODECS_LOC_VARS2; - // UInt32 dictionarySize; - // UInt32 numThreads; - HRESULT Process(); - HRESULT Result; - static THREAD_FUNC_DECL MyThreadFunction(void *param) - { - ((CThreadBenchmark *)param)->Result = ((CThreadBenchmark *)param)->Process(); - return 0; - } -}; + + + +// ---------- Benchmark Thread ---------- struct CBenchCallback: public IBenchCallback { - UInt32 dictionarySize; + UInt64 dictionarySize; CBenchProgressSync *Sync; + CBenchmarkDialog *BenchmarkDialog; - // void AddCpuFreq(UInt64 cpuFreq); - HRESULT SetFreq(bool showFreq, UInt64 cpuFreq); HRESULT SetEncodeResult(const CBenchInfo &info, bool final); HRESULT SetDecodeResult(const CBenchInfo &info, bool final); }; -/* -void CBenchCallback::AddCpuFreq(UInt64 cpuFreq) -{ - NSynchronization::CCriticalSectionLock lock(Sync->CS); - { - wchar_t s[32]; - ConvertUInt64ToString(cpuFreq, s); - Sync->Freq.Add_Space_if_NotEmpty(); - Sync->Freq += s; - Sync->FreqWasChanged = true; - } -} -*/ - -HRESULT CBenchCallback::SetFreq(bool /* showFreq */, UInt64 /* cpuFreq */) -{ - return S_OK; -} - HRESULT CBenchCallback::SetEncodeResult(const CBenchInfo &info, bool final) { - NSynchronization::CCriticalSectionLock lock(Sync->CS); - if (Sync->Changed || Sync->Paused || Sync->Stopped) - return E_ABORT; - Sync->ProcessedSize = info.UnpackSize * info.NumIterations; - if (final && Sync->CompressingInfo.GlobalTime == 0) + bool needPost = false; { - (CBenchInfo&)Sync->CompressingInfo = info; - if (Sync->CompressingInfo.GlobalTime == 0) - Sync->CompressingInfo.GlobalTime = 1; + NSynchronization::CCriticalSectionLock lock(Sync->CS); + if (Sync->Exit) + return E_ABORT; + CSyncData &sd = Sync->sd; + // sd.NumEncProgress++; + CTotalBenchRes2 &br = sd.Enc_BenchRes_1; + { + UInt64 dictSize = Sync->DictSize; + if (final) + { + // sd.EncInfo = info; + } + else + { + /* if (!final), then CBenchInfo::NumIterations means totalNumber of threads. + so we can reduce the dictionary */ + if (dictSize > info.UnpackSize) + dictSize = info.UnpackSize; + } + br.Rating = info.GetRating_LzmaEnc(dictSize); + } + br.SetFrom_BenchInfo(info); + sd.NeedPrint_Enc_1 = true; + if (final) + { + sd.Enc_BenchRes.Update_With_Res2(br); + sd.NeedPrint_Enc = true; + needPost = true; + } } - else - (CBenchInfo&)Sync->CompressingInfoTemp = info; + + if (needPost) + BenchmarkDialog->PostMsg(k_Message_Finished, k_Msg_WPARM_Enc1_Finished); return S_OK; } + HRESULT CBenchCallback::SetDecodeResult(const CBenchInfo &info, bool final) { NSynchronization::CCriticalSectionLock lock(Sync->CS); - if (Sync->Changed || Sync->Paused || Sync->Stopped) + if (Sync->Exit) return E_ABORT; - CBenchInfo info2 = info; - if (final && Sync->DecompressingInfo.GlobalTime == 0) - { - (CBenchInfo&)Sync->DecompressingInfo = info2; - if (Sync->DecompressingInfo.GlobalTime == 0) - Sync->DecompressingInfo.GlobalTime = 1; - } - else - (CBenchInfo&)Sync->DecompressingInfoTemp = info2; + CSyncData &sd = Sync->sd; + // sd.NumDecProgress++; + CTotalBenchRes2 &br = sd.Dec_BenchRes_1; + br.Rating = info.GetRating_LzmaDec(); + br.SetFrom_BenchInfo(info); + sd.NeedPrint_Dec_1 = true; + if (final) + sd.Dec_BenchRes.Update_With_Res2(br); return S_OK; } + struct CBenchCallback2: public IBenchPrintCallback { CBenchProgressSync *Sync; @@ -704,74 +1523,145 @@ void CBenchCallback2::NewLine() HRESULT CBenchCallback2::CheckBreak() { - if (Sync->Changed || Sync->Paused || Sync->Stopped) + if (Sync->Exit) return E_ABORT; return S_OK; } -/* struct CFreqCallback: public IBenchFreqCallback { - CBenchProgressSync *Sync; + CBenchmarkDialog *BenchmarkDialog; - virtual void AddCpuFreq(UInt64 freq); + virtual HRESULT AddCpuFreq(unsigned numThreads, UInt64 freq, UInt64 usage); + virtual HRESULT FreqsFinished(unsigned numThreads); }; -void CFreqCallback::AddCpuFreq(UInt64 freq) +HRESULT CFreqCallback::AddCpuFreq(unsigned numThreads, UInt64 freq, UInt64 usage) { - NSynchronization::CCriticalSectionLock lock(Sync->CS); - Sync->Freq = freq; + HRESULT res; + { + CBenchProgressSync &sync = BenchmarkDialog->Sync; + NSynchronization::CCriticalSectionLock lock(sync.CS); + UString &s = sync.FreqString_Sync; + if (sync.NumFreqThreadsPrev != numThreads) + { + sync.NumFreqThreadsPrev = numThreads; + if (!s.IsEmpty()) + s.Add_LF(); + s.Add_UInt32(numThreads); + s += "T Frequency (MHz):"; + s.Add_LF(); + } + s += " "; + char temp[64]; + if (numThreads != 1) + { + ConvertUInt64ToString(GetUsagePercents(usage), temp); + s += temp; + s += '%'; + s.Add_Space(); + } + ConvertUInt64ToString(GetMips(freq), temp); + s += temp; + // BenchmarkDialog->Sync.sd.NeedPrint_Freq = true; + res = sync.Exit ? E_ABORT : S_OK; + } + // BenchmarkDialog->PostMsg(k_Message_Finished, k_Msg_WPARM_Enc1_Finished); + return res; +} + +HRESULT CFreqCallback::FreqsFinished(unsigned /* numThreads */) +{ + HRESULT res; + { + CBenchProgressSync &sync = BenchmarkDialog->Sync; + NSynchronization::CCriticalSectionLock lock(sync.CS); + sync.sd.NeedPrint_Freq = true; + BenchmarkDialog->PostMsg(k_Message_Finished, k_Msg_WPARM_Enc1_Finished); + res = sync.Exit ? E_ABORT : S_OK; + } + BenchmarkDialog->PostMsg(k_Message_Finished, k_Msg_WPARM_Enc1_Finished); + return res; } -*/ +// define USE_DUMMY only for debug +// #define USE_DUMMY +#ifdef USE_DUMMY +static unsigned dummy = 1; +static unsigned Dummy(unsigned limit) +{ + unsigned sum = 0; + for (unsigned k = 0; k < limit; k++) + { + sum += dummy; + if (sum == 0) + break; + } + return sum; +} +#endif + + HRESULT CThreadBenchmark::Process() { + /* the first benchmark pass can be slow, + if we run benchmark while the window is being created, + and (no freq detecion loop) && (dictionary is small) (-mtic is small) */ + + // Sleep(300); // for debug + #ifdef USE_DUMMY + Dummy(1000 * 1000 * 1000); // for debug + #endif + CBenchProgressSync &sync = BenchmarkDialog->Sync; - sync.WaitCreating(); + HRESULT finishHRESULT = S_OK; + try { - for (;;) + for (UInt32 passIndex = 0;; passIndex++) { - if (sync.WasStopped()) - return 0; - if (sync.WasPaused()) - { - Sleep(200); - continue; - } - UInt32 dictionarySize; + // throw 1; // to debug + // throw CSystemException(E_INVALIDARG); // to debug + + UInt64 dictionarySize; UInt32 numThreads; { NSynchronization::CCriticalSectionLock lock(sync.CS); - if (sync.Stopped || sync.Paused) - continue; - if (sync.Changed) - sync.Init(); - dictionarySize = sync.DictionarySize; + if (sync.Exit) + break; + dictionarySize = sync.DictSize; numThreads = sync.NumThreads; - /* - if (sync.CompressingInfo.GlobalTime != 0) - sync.FirstPath = false; - */ } + + #ifdef PRINT_ITER_TIME + const DWORD startTick = GetTickCount(); + #endif CBenchCallback callback; + callback.dictionarySize = dictionarySize; callback.Sync = &sync; + callback.BenchmarkDialog = BenchmarkDialog; + CBenchCallback2 callback2; callback2.TotalMode = BenchmarkDialog->TotalMode; callback2.Sync = &sync; - // CFreqCallback freqCallback; - // freqCallback.Sync = &sync; + + CFreqCallback freqCallback; + freqCallback.BenchmarkDialog = BenchmarkDialog; + HRESULT result; try { CObjectVector props; + + props = BenchmarkDialog->Props; + if (BenchmarkDialog->TotalMode) { props = BenchmarkDialog->Props; @@ -787,8 +1677,8 @@ HRESULT CThreadBenchmark::Process() { CProperty prop; prop.Name = 'd'; - prop.Name.Add_UInt32(dictionarySize); - prop.Name += 'b'; + prop.Name.Add_UInt32((UInt32)(dictionarySize >> 10)); + prop.Name += 'k'; props.Add(prop); } } @@ -796,57 +1686,133 @@ HRESULT CThreadBenchmark::Process() result = Bench(EXTERNAL_CODECS_LOC_VARS BenchmarkDialog->TotalMode ? &callback2 : NULL, BenchmarkDialog->TotalMode ? NULL : &callback, - // &freqCallback, - props, 1, false); + props, 1, false, + (!BenchmarkDialog->TotalMode) && passIndex == 0 ? &freqCallback: NULL); - if (BenchmarkDialog->TotalMode) - { - sync.Stop(); - } + // result = S_FALSE; // for debug; + // throw 1; } catch(...) { result = E_FAIL; } + #ifdef PRINT_ITER_TIME + const DWORD numTicks = GetTickCount() - startTick; + #endif + + bool finished = true; + + NSynchronization::CCriticalSectionLock lock(sync.CS); + if (result != S_OK) { - if (result != E_ABORT) + sync.BenchFinish_Task_HRESULT = result; + break; + } + + { + CSyncData &sd = sync.sd; + + sd.NumPasses_Finished++; + #ifdef PRINT_ITER_TIME + sd.TotalTicks += numTicks; + #endif + + if (BenchmarkDialog->TotalMode) + break; + { + CTotalBenchRes tot_BenchRes = sd.Enc_BenchRes_1; + tot_BenchRes.Update_With_Res(sd.Dec_BenchRes_1); + + sd.NeedPrint_RatingVector = true; { - NSynchronization::CCriticalSectionLock lock(sync.CS); - sync.Pause(); + CBenchPassResult pair; + // pair.EncInfo = sd.EncInfo; // for debug + pair.Enc = sd.Enc_BenchRes_1; + pair.Dec = sd.Dec_BenchRes_1; + #ifdef PRINT_ITER_TIME + pair.Ticks = numTicks; + #endif + sync.RatingVector.Add(pair); + // pair.Dec_Defined = true; } - UString message; - if (result == S_FALSE) - message = "Decoding error"; - else if (result == CLASS_E_CLASSNOTAVAILABLE) - message = "Can't find 7z.dll"; - else - message = HResultToMessage(result); - BenchmarkDialog->MessageBoxError(message); + } + + sd.NeedPrint_Dec = true; + sd.NeedPrint_Tot = true; + + if (sync.RatingVector.Size() > kRatingVector_NumBundlesMax) + { + // sd.RatingVector_NumDeleted++; + sd.RatingVector_DeletedIndex = (int)(kRatingVector_NumBundlesMax / 4); + sync.RatingVector.Delete((unsigned)(sd.RatingVector_DeletedIndex)); + } + + if (sync.sd.NumPasses_Finished < sync.NumPasses_Limit) + finished = false; + else + { + sync.sd.BenchWasFinished = true; + // BenchmarkDialog->_finishTime = GetTickCount(); + // return 0; } } - else + + if (BenchmarkDialog->TotalMode) + break; + + /* + if (newTick - prevTick < 1000) + numSameTick++; + if (numSameTick > 5 || finished) { - NSynchronization::CCriticalSectionLock lock(sync.CS); - sync.NumPasses++; + prevTick = newTick; + numSameTick = 0; + */ + // for (unsigned i = 0; i < 1; i++) + { + // we suppose that PostMsg messages will be processed in order. + if (!BenchmarkDialog->PostMsg_Finish(k_Msg_WPARM_Iter_Finished)) + { + finished = true; + finishHRESULT = E_FAIL; + // throw 1234567; + } } + if (finished) + break; } // return S_OK; } catch(CSystemException &e) { - BenchmarkDialog->MessageBoxError(HResultToMessage(e.ErrorCode)); - return E_FAIL; + finishHRESULT = e.ErrorCode; + // BenchmarkDialog->MessageBoxError(HResultToMessage(e.ErrorCode)); + // return E_FAIL; } catch(...) { - BenchmarkDialog->MessageBoxError(HResultToMessage(E_FAIL)); - return E_FAIL; + finishHRESULT = E_FAIL; + // BenchmarkDialog->MessageBoxError(HResultToMessage(E_FAIL)); + // return E_FAIL; + } + + if (finishHRESULT != S_OK) + { + NSynchronization::CCriticalSectionLock lock(sync.CS); + sync.BenchFinish_Thread_HRESULT = finishHRESULT; + } + if (!BenchmarkDialog->PostMsg_Finish(k_Msg_WPARM_Thread_Finished)) + { + // sync.BenchFinish_Thread_HRESULT = E_FAIL; } + return 0; } + + static void ParseNumberString(const UString &s, NCOM::CPropVariant &prop) { const wchar_t *end; @@ -859,20 +1825,21 @@ static void ParseNumberString(const UString &s, NCOM::CPropVariant &prop) prop = result; } + HRESULT Benchmark( DECL_EXTERNAL_CODECS_LOC_VARS - const CObjectVector &props, HWND hwndParent) + const CObjectVector &props, UInt32 numIterations, HWND hwndParent) { - CThreadBenchmark benchmarker; - #ifdef EXTERNAL_CODECS - benchmarker.__externalCodecs = __externalCodecs; - #endif - CBenchmarkDialog bd; - bd.Props = props; + bd.TotalMode = false; - bd.Sync.DictionarySize = (UInt32)(Int32)-1; + bd.Props = props; + if (numIterations == 0) + numIterations = 1; + bd.Sync.NumPasses_Limit = numIterations; + bd.Sync.DictSize = (UInt64)(Int64)-1; bd.Sync.NumThreads = (UInt32)(Int32)-1; + bd.Sync.Level = -1; COneMethodInfo method; @@ -905,13 +1872,17 @@ HRESULT Benchmark( #endif continue; } - if (name.IsEqualTo("testtime")) + /* + if (name.IsEqualTo("time")) { // UInt32 testTime = 4; // RINOK(ParsePropToUInt32(L"", propVariant, testTime)); continue; } RINOK(method.ParseMethodFromPROPVARIANT(name, propVariant)); + */ + // here we need to parse DictSize property, and ignore unknown properties + method.ParseMethodFromPROPVARIANT(name, propVariant); } if (bd.TotalMode) @@ -923,17 +1894,37 @@ HRESULT Benchmark( } { - UInt32 dict; + UInt64 dict; if (method.Get_DicSize(dict)) - bd.Sync.DictionarySize = dict; + bd.Sync.DictSize = dict; + } + bd.Sync.Level = method.GetLevel(); + + // Dummy(1000 * 1000 * 1); + + { + CThreadBenchmark &benchmarker = bd._threadBenchmark; + #ifdef EXTERNAL_CODECS + benchmarker.__externalCodecs = __externalCodecs; + #endif + benchmarker.BenchmarkDialog = &bd; } - benchmarker.BenchmarkDialog = &bd; + bd.Create(hwndParent); + + return S_OK; +} + +CBenchmarkDialog::~CBenchmarkDialog() +{ + if (_thread.IsCreated()) { - NWindows::CThread thread; - RINOK(thread.Create(CThreadBenchmark::MyThreadFunction, &benchmarker)); - bd.Create(hwndParent); - return thread.Wait_Close(); + /* the following code will be not executed in normal code flow. + it can be called, if there is some internal failure in dialog code. */ + Attach(NULL); + MessageBoxError(L"The flaw in benchmark thread code"); + Sync.SendExit(); + _thread.Wait_Close(); } } diff --git a/CPP/7zip/UI/GUI/BenchmarkDialog.h b/CPP/7zip/UI/GUI/BenchmarkDialog.h index 7312a1da..a280592e 100644 --- a/CPP/7zip/UI/GUI/BenchmarkDialog.h +++ b/CPP/7zip/UI/GUI/BenchmarkDialog.h @@ -3,190 +3,13 @@ #ifndef __BENCHMARK_DIALOG_H #define __BENCHMARK_DIALOG_H -#include "../../../Windows/Synchronization.h" +#include "../../Common/CreateCoder.h" +#include "../../UI/Common/Property.h" -#include "../../../Windows/Control/ComboBox.h" -#include "../../../Windows/Control/Edit.h" - -#include "../Common/Bench.h" - -#include "../FileManager/DialogSize.h" - -#include "BenchmarkDialogRes.h" - -struct CBenchInfo2 : public CBenchInfo -{ - void Init() { GlobalTime = UserTime = 0; } - - UInt64 GetCompressRating(UInt32 dictSize) const - { - return ::GetCompressRating(dictSize, GlobalTime, GlobalFreq, UnpackSize * NumIterations); - } - - UInt64 GetDecompressRating() const - { - return ::GetDecompressRating(GlobalTime, GlobalFreq, UnpackSize, PackSize, NumIterations); - } -}; - -class CBenchProgressSync -{ -public: - bool Stopped; - bool Paused; - bool Changed; - UInt32 DictionarySize; - UInt32 NumThreads; - UInt64 NumPasses; - NWindows::NSynchronization::CManualResetEvent _startEvent; - NWindows::NSynchronization::CCriticalSection CS; - - CBenchInfo2 CompressingInfoTemp; - CBenchInfo2 CompressingInfo; - UInt64 ProcessedSize; - - CBenchInfo2 DecompressingInfoTemp; - CBenchInfo2 DecompressingInfo; - - AString Text; - bool TextWasChanged; - - // bool FirstPath; - // UInt64 Freq; - // UString Freq; - // bool FreqWasChanged; - - CBenchProgressSync() - { - if (_startEvent.Create() != S_OK) - throw 3986437; - } - - void Init() - { - Changed = false; - Stopped = false; - Paused = false; - CompressingInfoTemp.Init(); - CompressingInfo.Init(); - ProcessedSize = 0; - - DecompressingInfoTemp.Init(); - DecompressingInfo.Init(); - - NumPasses = 0; - - // FirstPath = true; - // Freq = 0; - // Freq.SetFromAscii("MHz: "); - // FreqWasChanged = true; - - Text.Empty(); - TextWasChanged = true; - } - - void Stop() - { - NWindows::NSynchronization::CCriticalSectionLock lock(CS); - Stopped = true; - } - bool WasStopped() - { - NWindows::NSynchronization::CCriticalSectionLock lock(CS); - return Stopped; - } - void Pause() - { - NWindows::NSynchronization::CCriticalSectionLock lock(CS); - Paused = true; - } - void Start() - { - NWindows::NSynchronization::CCriticalSectionLock lock(CS); - Paused = false; - } - bool WasPaused() - { - NWindows::NSynchronization::CCriticalSectionLock lock(CS); - return Paused; - } - void WaitCreating() { _startEvent.Lock(); } -}; - -struct CMyFont -{ - HFONT _font; - CMyFont(): _font(NULL) {} - ~CMyFont() - { - if (_font) - DeleteObject(_font); - } - void Create(const LOGFONT *lplf) - { - _font = CreateFontIndirect(lplf); - } -}; - - -class CBenchmarkDialog: - public NWindows::NControl::CModalDialog -{ - NWindows::NControl::CComboBox m_Dictionary; - NWindows::NControl::CComboBox m_NumThreads; - NWindows::NControl::CEdit _consoleEdit; - UINT_PTR _timer; - UInt32 _startTime; - CMyFont _font; - - UInt64 ramSize; - bool ramSize_Defined; - - bool OnSize(WPARAM /* wParam */, int xSize, int ySize); - bool OnTimer(WPARAM timerID, LPARAM callback); - virtual bool OnInit(); - void OnRestartButton(); - void OnStopButton(); - void OnHelp(); - virtual void OnCancel(); - bool OnButtonClicked(int buttonID, HWND buttonHWND); - bool OnCommand(int code, int itemID, LPARAM lParam); - - void PrintTime(); - void PrintRating(UInt64 rating, UINT controlID); - void PrintUsage(UInt64 usage, UINT controlID); - void PrintResults( - UInt32 dictionarySize, - const CBenchInfo2 &info, UINT usageID, UINT speedID, UINT rpuID, UINT ratingID, - bool decompressMode = false); - - UInt32 GetNumberOfThreads(); - UInt32 OnChangeDictionary(); - void OnChangeSettings(); - - void SetItemText_Number(int itemID, UInt64 val, LPCTSTR post = NULL); - -public: - CBenchProgressSync Sync; - bool TotalMode; - CObjectVector Props; - - CSysString Bench2Text; - - CBenchmarkDialog(): _timer(0), TotalMode(false) {} - INT_PTR Create(HWND wndParent = 0) - { - BIG_DIALOG_SIZE(332, 228); - return CModalDialog::Create(TotalMode ? IDD_BENCH_TOTAL : SIZED_DIALOG(IDD_BENCH), wndParent); - } - void MessageBoxError(LPCWSTR message) - { - MessageBoxW(*this, message, L"7-Zip ZS", MB_ICONERROR); - } -}; +const UInt32 k_NumBenchIterations_Default = 10; HRESULT Benchmark( DECL_EXTERNAL_CODECS_LOC_VARS - const CObjectVector &props, HWND hwndParent = NULL); + const CObjectVector &props, UInt32 numIterations, HWND hwndParent = NULL); #endif diff --git a/CPP/7zip/UI/GUI/BenchmarkDialog.rc b/CPP/7zip/UI/GUI/BenchmarkDialog.rc index a8455a0f..3e73e46d 100644 --- a/CPP/7zip/UI/GUI/BenchmarkDialog.rc +++ b/CPP/7zip/UI/GUI/BenchmarkDialog.rc @@ -23,25 +23,29 @@ #define g4x (m + m) -#define sRating 60 -#define sSpeed 60 -#define sUsage 60 -#define sRpu 60 -#define sFreq 34 +#define sRating 58 +#define sSpeed 60 +#define sUsage 46 +#define sRpu 58 +#define sSize 52 +// #define sFreq 34 #define xRating (xs - m - m - sRating) #define xRpu (xRating - sRpu) #define xUsage (xRpu - sUsage) #define xSpeed (xUsage - sSpeed) +#define xSize (xSpeed - sSize) -#define xFreq (xUsage - sFreq) +// #define xFreq (xUsage - sFreq) -#define sLabel (xUsage - g4x) +#define sLabel (xSize - g4x) #define sTotalRating (sUsage + sRpu + sRating + m + m) #define xTotalRating (xs - m - sTotalRating) -#define g2xs 58 -#define g3xs 36 +#define sPasses 60 + +#define g2xs 60 +#define g3xs 64 #define g3x (m + g2xs) #undef GROUP_Y_SIZE @@ -56,7 +60,10 @@ #define g7xs bx1 - m - g0xs - g1xs - m -IDD_BENCH DIALOG 0, 0, xs, ys MY_MODAL_DIALOG_STYLE | WS_MINIMIZEBOX +#define sLog 140 + 0 + +// MY_MODAL_DIALOG_STYLE +IDD_BENCH DIALOG 0, 0, xs + sLog, ys MY_MODAL_RESIZE_DIALOG_STYLE | WS_MINIMIZEBOX CAPTION "Benchmark" MY_FONT BEGIN @@ -70,71 +77,79 @@ BEGIN COMBOBOX IDC_BENCH_DICTIONARY, g1x, m, g1xs, 140, MY_COMBO LTEXT "Memory usage:", IDT_BENCH_MEMORY, gc2x, m - 2, g7xs, 8 - LTEXT "", IDT_BENCH_MEMORY_VAL, gc2x, m + 8, g7xs, 8 + LTEXT "", IDT_BENCH_MEMORY_VAL, gc2x, m + 8, g7xs, MY_TEXT_NOPREFIX LTEXT "&Number of CPU threads:", IDT_BENCH_NUM_THREADS, m, 30, g0xs, 8 COMBOBOX IDC_BENCH_NUM_THREADS, g1x, 29, g1xs, 140, MY_COMBO - LTEXT "", IDT_BENCH_HARDWARE_THREADS, gc2x, 32, g7xs, 8 + LTEXT "", IDT_BENCH_HARDWARE_THREADS, gc2x, 30, g7xs, MY_TEXT_NOPREFIX - RTEXT "CPU Usage", IDT_BENCH_USAGE_LABEL, xUsage, 54, sUsage, 8 - RTEXT "Speed", IDT_BENCH_SPEED, xSpeed, 54, sSpeed, 8 - RTEXT "Rating / Usage", IDT_BENCH_RPU_LABEL, xRpu, 54, sRpu, 8 - RTEXT "Rating", IDT_BENCH_RATING_LABEL, xRating, 54, sRating, 8 + RTEXT "Size", IDT_BENCH_SIZE, xSize, 54, sSize, MY_TEXT_NOPREFIX + RTEXT "CPU Usage", IDT_BENCH_USAGE_LABEL, xUsage, 54, sUsage, MY_TEXT_NOPREFIX + RTEXT "Speed", IDT_BENCH_SPEED, xSpeed, 54, sSpeed, MY_TEXT_NOPREFIX + RTEXT "Rating / Usage", IDT_BENCH_RPU_LABEL, xRpu, 54, sRpu, MY_TEXT_NOPREFIX + RTEXT "Rating", IDT_BENCH_RATING_LABEL, xRating, 54, sRating, MY_TEXT_NOPREFIX GROUPBOX "Compressing", IDG_BENCH_COMPRESSING, m, 64, xc, GROUP_Y_SIZE - LTEXT "Current", IDT_BENCH_CURRENT, g4x, 76, sLabel, 8 - RTEXT "", IDT_BENCH_COMPRESS_USAGE1, xUsage, 76, sUsage, 8 - RTEXT "", IDT_BENCH_COMPRESS_SPEED1, xSpeed, 76, sSpeed, 8 - RTEXT "", IDT_BENCH_COMPRESS_RPU1, xRpu, 76, sRpu, 8 - RTEXT "", IDT_BENCH_COMPRESS_RATING1, xRating, 76, sRating, 8 + LTEXT "Current", IDT_BENCH_CURRENT, g4x, 76, sLabel, MY_TEXT_NOPREFIX + RTEXT "", IDT_BENCH_COMPRESS_SIZE1, xSize, 76, sSize, MY_TEXT_NOPREFIX + RTEXT "", IDT_BENCH_COMPRESS_USAGE1, xUsage, 76, sUsage, MY_TEXT_NOPREFIX + RTEXT "", IDT_BENCH_COMPRESS_SPEED1, xSpeed, 76, sSpeed, MY_TEXT_NOPREFIX + RTEXT "", IDT_BENCH_COMPRESS_RPU1, xRpu, 76, sRpu, MY_TEXT_NOPREFIX + RTEXT "", IDT_BENCH_COMPRESS_RATING1, xRating, 76, sRating, MY_TEXT_NOPREFIX - LTEXT "Resulting", IDT_BENCH_RESULTING, g4x, 89, sLabel, 8 - RTEXT "", IDT_BENCH_COMPRESS_USAGE2, xUsage, 89, sUsage, 8 - RTEXT "", IDT_BENCH_COMPRESS_SPEED2, xSpeed, 89, sSpeed, 8 - RTEXT "", IDT_BENCH_COMPRESS_RPU2, xRpu, 89, sRpu, 8 - RTEXT "", IDT_BENCH_COMPRESS_RATING2, xRating, 89, sRating, 8 + LTEXT "Resulting", IDT_BENCH_RESULTING, g4x, 89, sLabel, MY_TEXT_NOPREFIX + RTEXT "", IDT_BENCH_COMPRESS_SIZE2, xSize, 89, sSize, MY_TEXT_NOPREFIX + RTEXT "", IDT_BENCH_COMPRESS_USAGE2, xUsage, 89, sUsage, MY_TEXT_NOPREFIX + RTEXT "", IDT_BENCH_COMPRESS_SPEED2, xSpeed, 89, sSpeed, MY_TEXT_NOPREFIX + RTEXT "", IDT_BENCH_COMPRESS_RPU2, xRpu, 89, sRpu, MY_TEXT_NOPREFIX + RTEXT "", IDT_BENCH_COMPRESS_RATING2, xRating, 89, sRating, MY_TEXT_NOPREFIX GROUPBOX "Decompressing", IDG_BENCH_DECOMPRESSING, m, 111, xc, GROUP_Y_SIZE - LTEXT "Current", IDT_BENCH_CURRENT2, g4x, 123, sLabel, 8 - RTEXT "", IDT_BENCH_DECOMPR_USAGE1, xUsage, 123, sUsage, 8 - RTEXT "", IDT_BENCH_DECOMPR_SPEED1, xSpeed, 123, sSpeed, 8 - RTEXT "", IDT_BENCH_DECOMPR_RPU1, xRpu, 123, sRpu, 8 - RTEXT "", IDT_BENCH_DECOMPR_RATING1, xRating, 123, sRating, 8 + LTEXT "Current", IDT_BENCH_CURRENT2, g4x, 123, sLabel, MY_TEXT_NOPREFIX + RTEXT "", IDT_BENCH_DECOMPR_SIZE1, xSize, 123, sSize, MY_TEXT_NOPREFIX + RTEXT "", IDT_BENCH_DECOMPR_USAGE1, xUsage, 123, sUsage, MY_TEXT_NOPREFIX + RTEXT "", IDT_BENCH_DECOMPR_SPEED1, xSpeed, 123, sSpeed, MY_TEXT_NOPREFIX + RTEXT "", IDT_BENCH_DECOMPR_RPU1, xRpu, 123, sRpu, MY_TEXT_NOPREFIX + RTEXT "", IDT_BENCH_DECOMPR_RATING1, xRating, 123, sRating, MY_TEXT_NOPREFIX - LTEXT "Resulting", IDT_BENCH_RESULTING2, g4x, 136, sLabel, 8 - RTEXT "", IDT_BENCH_DECOMPR_USAGE2, xUsage, 136, sUsage, 8 - RTEXT "", IDT_BENCH_DECOMPR_SPEED2, xSpeed, 136, sSpeed, 8 - RTEXT "", IDT_BENCH_DECOMPR_RPU2, xRpu, 136, sRpu, 8 - RTEXT "", IDT_BENCH_DECOMPR_RATING2, xRating, 136, sRating, 8 + LTEXT "Resulting", IDT_BENCH_RESULTING2, g4x, 136, sLabel, MY_TEXT_NOPREFIX + RTEXT "", IDT_BENCH_DECOMPR_SIZE2, xSize, 136, sSize, MY_TEXT_NOPREFIX + RTEXT "", IDT_BENCH_DECOMPR_USAGE2, xUsage, 136, sUsage, MY_TEXT_NOPREFIX + RTEXT "", IDT_BENCH_DECOMPR_SPEED2, xSpeed, 136, sSpeed, MY_TEXT_NOPREFIX + RTEXT "", IDT_BENCH_DECOMPR_RPU2, xRpu, 136, sRpu, MY_TEXT_NOPREFIX + RTEXT "", IDT_BENCH_DECOMPR_RATING2, xRating, 136, sRating, MY_TEXT_NOPREFIX + RTEXT "", IDT_BENCH_ERROR_MESSAGE, m, 155, xc, MY_TEXT_NOPREFIX + GROUPBOX "Total Rating", IDG_BENCH_TOTAL_RATING, xTotalRating, 163, sTotalRating, GROUP_Y2_SIZE - RTEXT "", IDT_BENCH_TOTAL_USAGE_VAL, xUsage, 176, sUsage, 8 - RTEXT "", IDT_BENCH_TOTAL_RPU_VAL, xRpu, 176, sRpu, 8 - RTEXT "", IDT_BENCH_TOTAL_RATING_VAL, xRating, 176, sRating, 8 + RTEXT "", IDT_BENCH_TOTAL_USAGE_VAL, xUsage, 176, sUsage, MY_TEXT_NOPREFIX + RTEXT "", IDT_BENCH_TOTAL_RPU_VAL, xRpu, 176, sRpu, MY_TEXT_NOPREFIX + RTEXT "", IDT_BENCH_TOTAL_RATING_VAL, xRating, 176, sRating, MY_TEXT_NOPREFIX - RTEXT "", IDT_BENCH_CPU, m, 202, xc, 8 - RTEXT "", IDT_BENCH_VER, m + xc - 100, 216, 100, 8 + // RTEXT "", IDT_BENCH_CPU, m + sPasses, 202, xc - sPasses, 16, SS_NOPREFIX + RTEXT "", IDT_BENCH_CPU, m + 0, 202, xc - 0, 16, SS_NOPREFIX + RTEXT "", IDT_BENCH_VER, m + xc - 100, 222, 100, MY_TEXT_NOPREFIX - LTEXT "", IDT_BENCH_CPU_FEATURE, m, 228, xc - 100, 8 - LTEXT "", IDT_BENCH_SYS1, m, 238, xc - 140, 8 - LTEXT "", IDT_BENCH_SYS2, m, 248, xc - 140, 8 - - // LTEXT "", IDT_BENCH_SYSTEM, m, 232, xc - 80, 8 - // LTEXT "", IDT_BENCH_FREQ_RES, m, 242, 80, 8 - + LTEXT "", IDT_BENCH_CPU_FEATURE, m, 222, xc - 100, 16, SS_NOPREFIX // - 100 + LTEXT "", IDT_BENCH_SYS1, m, 238, xc - 140, MY_TEXT_NOPREFIX + LTEXT "", IDT_BENCH_SYS2, m, 248, xc - 140, MY_TEXT_NOPREFIX + + LTEXT "", IDT_BENCH_LOG, m + xc + m, m, sLog - m, yc, SS_LEFTNOWORDWRAP | SS_NOPREFIX LTEXT "Elapsed time:", IDT_BENCH_ELAPSED, m, 163, g2xs, 8 - LTEXT "Size:", IDT_BENCH_SIZE, m, 176, g2xs, 8 - LTEXT "Passes:", IDT_BENCH_PASSES, m, 189, g2xs, 8 +// LTEXT "Size:", IDT_BENCH_SIZE, m, 176, g2xs, 8 + LTEXT "Passes:", IDT_BENCH_PASSES, m, 176, g2xs, 8 + COMBOBOX IDC_BENCH_NUM_PASSES, m, 187, sPasses, 140, MY_COMBO + + RTEXT "", IDT_BENCH_ELAPSED_VAL, g3x, 163, g3xs, MY_TEXT_NOPREFIX + // RTEXT "", IDT_BENCH_SIZE_VAL, g3x, 176, g3xs, MY_TEXT_NOPREFIX + RTEXT "", IDT_BENCH_PASSES_VAL, g3x, 176, g3xs, MY_TEXT_NOPREFIX - RTEXT "", IDT_BENCH_ELAPSED_VAL, g3x, 163, g3xs, 8 - RTEXT "", IDT_BENCH_SIZE_VAL, g3x, 176, g3xs, 8 - RTEXT "", IDT_BENCH_PASSES_VAL, g3x, 189, g3xs, 8 END #ifdef UNDER_CE diff --git a/CPP/7zip/UI/GUI/BenchmarkDialogRes.h b/CPP/7zip/UI/GUI/BenchmarkDialogRes.h index 8ee4f681..b7d54b77 100644 --- a/CPP/7zip/UI/GUI/BenchmarkDialogRes.h +++ b/CPP/7zip/UI/GUI/BenchmarkDialogRes.h @@ -38,9 +38,17 @@ #define IDT_BENCH_TOTAL_USAGE_VAL 133 #define IDT_BENCH_ELAPSED_VAL 140 -#define IDT_BENCH_SIZE_VAL 141 +// #define IDT_BENCH_SIZE_VAL 141 #define IDT_BENCH_PASSES_VAL 142 +#define IDC_BENCH_NUM_PASSES 143 +#define IDT_BENCH_LOG 160 +#define IDT_BENCH_ERROR_MESSAGE 161 + +#define IDT_BENCH_COMPRESS_SIZE1 170 +#define IDT_BENCH_COMPRESS_SIZE2 171 +#define IDT_BENCH_DECOMPR_SIZE1 172 +#define IDT_BENCH_DECOMPR_SIZE2 173 // #define IDT_BENCH_FREQ_CUR 150 // #define IDT_BENCH_FREQ_RES 151 diff --git a/CPP/7zip/UI/GUI/CompressDialog.cpp b/CPP/7zip/UI/GUI/CompressDialog.cpp index 6072640b..876d80c9 100644 --- a/CPP/7zip/UI/GUI/CompressDialog.cpp +++ b/CPP/7zip/UI/GUI/CompressDialog.cpp @@ -83,6 +83,8 @@ static const unsigned kHistorySize = 20; static const UInt32 kNoSolidBlockSize = 0; static const UInt32 kSolidBlockSize = 64; +static const UInt32 kLzmaMaxDictSize = (UInt32)15 << 28; + static LPCSTR const kExeExt = ".exe"; #define k7zFormat "7z" @@ -375,18 +377,20 @@ static bool IsMethodSupportedBySfx(int methodID) return false; } -static bool GetMaxRamSizeForProgram(UInt64 &physSize) +static bool GetMaxRamSizeForProgram(UInt64 &ramSize, UInt64 &size) { - physSize = (UInt64)(sizeof(size_t)) << 29; - bool ramSize_Defined = NSystem::GetRamSize(physSize); + size = (UInt64)(sizeof(size_t)) << 29; + bool ramSize_Defined = NSystem::GetRamSize(size); + ramSize = size; + size = size / 16 * 15; const UInt64 kMinSysSize = (1 << 24); - if (physSize <= kMinSysSize) - physSize = 0; + if (size <= kMinSysSize) + size = 0; else - physSize -= kMinSysSize; + size -= kMinSysSize; const UInt64 kMinUseSize = (1 << 24); - if (physSize < kMinUseSize) - physSize = kMinUseSize; + if (size < kMinUseSize) + size = kMinUseSize; return ramSize_Defined; } @@ -535,7 +539,7 @@ bool CCompressDialog::OnInit() SetSolidBlockSize(); SetNumThreads(); - TCHAR s[40] = { TEXT('/'), TEXT(' '), 0 }; + TCHAR s[32] = { TEXT('/'), TEXT(' '), 0 }; ConvertUInt32ToString(NSystem::GetNumberOfProcessors(), s + 2); SetItemText(IDT_COMPRESS_HARDWARE_THREADS, s); @@ -780,6 +784,48 @@ static bool IsAsciiString(const UString &s) return true; } + +static void AddSize_MB(UString &s, UInt64 size) +{ + char temp[32]; + ConvertUInt64ToString((size + (1 << 20) - 1) >> 20, temp); + s += temp; + s += " MB"; +} + + +void SetErrorMessage_MemUsage(UString &s, UInt64 reqSize, UInt64 ramSize, UInt64 ramLimit, const UString &usageString) +{ + s += "The operation was blocked by 7-Zip"; + s.Add_LF(); + s += "The operation can require big amount of RAM (memory):"; + s.Add_LF(); + s.Add_LF(); + AddSize_MB(s, reqSize); + + if (!usageString.IsEmpty()) + { + s += " : "; + s += usageString; + } + + s.Add_LF(); + AddSize_MB(s, ramSize); + s += " : RAM"; + + if (ramLimit != 0) + { + s.Add_LF(); + AddSize_MB(s, ramLimit); + s += " : 7-Zip limit"; + } + + s.Add_LF(); + s.Add_LF(); + s += LangString(IDS_MEM_ERROR); +} + + void CCompressDialog::OnOK() { _password1Control.GetText(Info.Password); @@ -811,6 +857,24 @@ void CCompressDialog::OnOK() } } + { + UInt64 ramSize; + UInt64 maxRamSize; + const bool maxRamSize_Defined = GetMaxRamSizeForProgram(ramSize, maxRamSize); + UInt64 decompressMem; + const UInt64 memUsage = GetMemoryUsage_DecompMem(decompressMem); + if (maxRamSize_Defined && memUsage > maxRamSize) + { + UString s; + UString s2 = LangString(IDT_COMPRESS_MEMORY); + if (s2.IsEmpty()) + GetItemText(IDT_COMPRESS_MEMORY, s2); + SetErrorMessage_MemUsage(s, memUsage, ramSize, maxRamSize, s2); + MessageBoxError(s); + return; + } + } + SaveOptionsInMem(); { UString s; @@ -829,7 +893,7 @@ void CCompressDialog::OnOK() Info.PathMode = (NWildcard::ECensorPathMode)k_PathMode_Vals[m_PathMode.GetCurSel()]; Info.Level = GetLevelSpec(); - Info.Dictionary = GetDictionarySpec(); + Info.Dict64 = GetDictSpec(); Info.Order = GetOrderSpec(); Info.OrderMode = GetOrderMode(); Info.NumThreads = GetNumThreadsSpec(); @@ -1372,22 +1436,28 @@ UString CCompressDialog::GetEncryptionMethodSpec() return s; } -void CCompressDialog::AddDictionarySize(UInt32 size) +void CCompressDialog::AddDict2(size_t sizeReal, size_t sizeShow) { Byte c = 0; unsigned moveBits = 0; - if ((size & 0xFFFFF) == 0) { moveBits = 20; c = 'M'; } - else if ((size & 0x3FF) == 0) { moveBits = 10; c = 'K'; } - TCHAR s[40]; - ConvertUInt32ToString(size >> moveBits, s); + if ((sizeShow & 0xFFFFF) == 0) { moveBits = 20; c = 'M'; } + else if ((sizeShow & 0x3FF) == 0) { moveBits = 10; c = 'K'; } + TCHAR s[32]; + ConvertUInt64ToString(sizeShow >> moveBits, s); unsigned pos = MyStringLen(s); s[pos++] = ' '; if (moveBits != 0) s[pos++] = c; s[pos++] = 'B'; s[pos++] = 0; - int index = (int)m_Dictionary.AddString(s); - m_Dictionary.SetItemData(index, size); + const int index = (int)m_Dictionary.AddString(s); + m_Dictionary.SetItemData(index, sizeReal); +} + + +void CCompressDialog::AddDict(size_t size) +{ + AddDict2(size, size); } typedef enum { @@ -1435,7 +1505,7 @@ void CCompressDialog::SetDictionary() { m_Dictionary.ResetContent(); const CArcInfoEx &ai = (*ArcFormats)[GetFormatIndex()]; - int index = FindRegistryFormat(ai.Name); + const int index = FindRegistryFormat(ai.Name); UInt32 defaultDict = (UInt32)(Int32)-1; if (index >= 0) @@ -1445,12 +1515,13 @@ void CCompressDialog::SetDictionary() defaultDict = fo.Dictionary; } - int methodID = GetMethodID(); + const int methodID = GetMethodID(); UInt32 level = GetLevel2(); if (methodID < 0) return; + UInt64 ramSize; UInt64 maxRamSize; - bool maxRamSize_Defined = GetMaxRamSizeForProgram(maxRamSize); + const bool maxRamSize_Defined = GetMaxRamSizeForProgram(ramSize, maxRamSize); switch (methodID) { @@ -1460,38 +1531,44 @@ void CCompressDialog::SetDictionary() if (defaultDict == (UInt32)(Int32)-1) { defaultDict = - ( level <= 3 ? (1 << (level * 2 + 16)) : - ( level <= 6 ? (1 << (level + 19)) : - ( level <= 7 ? (1 << 25) : (1 << 26) - ))); + ( level <= 3 ? ((UInt32)1 << (level * 2 + 16)) : + ( level <= 6 ? ((UInt32)1 << (level + 19)) : + ( level <= 7 ? ((UInt32)1 << 25) : ((UInt32)1 << 26) + ))); } - AddDictionarySize(1 << 16); - AddDictionarySize(1 << 18); - m_Dictionary.SetCurSel(m_Dictionary.GetCount() - 1); + + // we use threshold 3.75 GiB to switch to kLzmaMaxDictSize. + if (defaultDict >= ((UInt32)15 << 28)) + defaultDict = kLzmaMaxDictSize; - for (unsigned i = 20; i <= 31; i++) - for (unsigned j = 0; j < 2; j++) - { - if (i == 20 && j > 0) - continue; - UInt32 dict = ((UInt32)(2 + j) << (i - 1)); - - if (dict > - #ifdef MY_CPU_64BIT - (3 << 29) - #else - (1 << 26) - #endif - ) - continue; - - AddDictionarySize(dict); - UInt64 decomprSize; - UInt64 requiredComprSize = GetMemoryUsage(dict, decomprSize); - if (dict <= defaultDict && (!maxRamSize_Defined || requiredComprSize <= maxRamSize)) - m_Dictionary.SetCurSel(m_Dictionary.GetCount() - 1); - } + const size_t kLzmaMaxDictSize_Up = (size_t)1 << (20 + sizeof(size_t) / 4 * 6); + + int curSel = 0; + for (unsigned i = (16 - 1) * 2; i <= (32 - 1) * 2; i++) + { + if (i < (20 - 1) * 2 + && i != (16 - 1) * 2 + && i != (18 - 1) * 2) + continue; + if (i == (20 - 1) * 2 + 1) + continue; + const size_t dict_up = (size_t)(2 + (i & 1)) << (i / 2); + size_t dict = dict_up; + if (dict_up >= kLzmaMaxDictSize) + dict = kLzmaMaxDictSize; // we reduce dictionary + + AddDict(dict); + // AddDict2(dict, dict_up); // for debug : we show 4 GB + + const UInt64 memUsage = GetMemoryUsageComp_Dict(dict); + if (dict <= defaultDict && (!maxRamSize_Defined || memUsage <= maxRamSize)) + curSel = m_Dictionary.GetCount() - 1; + if (dict_up >= kLzmaMaxDictSize_Up) + break; + } + + m_Dictionary.SetCurSel(curSel); // SetNearestSelectComboBox(m_Dictionary, defaultDict); break; } @@ -1519,10 +1596,9 @@ void CCompressDialog::SetDictionary() ) continue; - AddDictionarySize(dict); - UInt64 decomprSize; - UInt64 requiredComprSize = GetMemoryUsage(dict, decomprSize); - if (dict <= defaultDict && (!maxRamSize_Defined || requiredComprSize <= maxRamSize)) + AddDict(dict); + const UInt64 memUsage = GetMemoryUsageComp_Dict(dict); + if (dict <= defaultDict && (!maxRamSize_Defined || memUsage <= maxRamSize)) m_Dictionary.SetCurSel(m_Dictionary.GetCount() - 1); } @@ -1532,46 +1608,63 @@ void CCompressDialog::SetDictionary() case kPPMd: { if (defaultDict == (UInt32)(Int32)-1) - { defaultDict = (UInt32)1 << (level + 19); - } - for (unsigned i = 20; i < 31; i++) - for (unsigned j = 0; j < 2; j++) - { - if (i == 20 && j > 0) - continue; - UInt32 dict = ((UInt32)(2 + j) << (i - 1)); - if (dict > - #ifdef MY_CPU_64BIT - (1 << 30) - #else - (1 << 29) - #endif - ) - continue; - AddDictionarySize(dict); - UInt64 decomprSize; - UInt64 requiredComprSize = GetMemoryUsage(dict, decomprSize); - if ((dict <= defaultDict && (!maxRamSize_Defined || requiredComprSize <= maxRamSize)) - || m_Dictionary.GetCount() == 1) - m_Dictionary.SetCurSel(m_Dictionary.GetCount() - 1); - } - + const UInt32 kPpmd_Default_4g = (UInt32)0 - ((UInt32)1 << 10); + const size_t kPpmd_MaxDictSize_Up = (size_t)1 << (29 + sizeof(size_t) / 8); + + if (defaultDict >= ((UInt32)15 << 28)) // threshold + defaultDict = kPpmd_Default_4g; + + int curSel = 0; + for (unsigned i = (20 - 1) * 2; i <= (32 - 1) * 2; i++) + { + if (i == (20 - 1) * 2 + 1) + continue; + + const size_t dict_up = (size_t)(2 + (i & 1)) << (i / 2); + size_t dict = dict_up; + if (dict_up >= kPpmd_Default_4g) + dict = kPpmd_Default_4g; + + AddDict2(dict, dict_up); + // AddDict2((UInt32)((UInt32)0 - 2), dict_up); // for debug + // AddDict(dict_up); // for debug + const UInt64 memUsage = GetMemoryUsageComp_Dict(dict); + if (dict <= defaultDict && (!maxRamSize_Defined || memUsage <= maxRamSize)) + curSel = m_Dictionary.GetCount() - 1; + if (dict_up >= kPpmd_MaxDictSize_Up) + break; + } + m_Dictionary.SetCurSel(curSel); // SetNearestSelectComboBox(m_Dictionary, defaultDict); break; } - case kDeflate: + case kPPMdZip: { - AddDictionarySize(32 << 10); - m_Dictionary.SetCurSel(0); + if (defaultDict == (UInt32)(Int32)-1) + defaultDict = (UInt32)1 << (level + 19); + + int curSel = 0; + for (unsigned i = 20; i <= 28; i++) + { + const UInt32 dict = (UInt32)1 << i; + AddDict(dict); + const UInt64 memUsage = GetMemoryUsageComp_Dict(dict); + if ((dict <= defaultDict && (!maxRamSize_Defined || memUsage <= maxRamSize))) + curSel = m_Dictionary.GetCount() - 1; + } + m_Dictionary.SetCurSel(curSel); + // SetNearestSelectComboBox(m_Dictionary, defaultDict); break; } - + + case kDeflate: case kDeflate64: { - AddDictionarySize(64 << 10); + const UInt32 dict = (methodID == kDeflate ? (UInt32)(1 << 15) : (UInt32)(1 << 16)); + AddDict(dict); m_Dictionary.SetCurSel(0); break; } @@ -1585,39 +1678,22 @@ void CCompressDialog::SetDictionary() else defaultDict = (100 << 10); } + int curSel = 0; for (unsigned i = 1; i <= 9; i++) { - UInt32 dict = ((UInt32)i * 100) << 10; - AddDictionarySize(dict); - if (dict <= defaultDict || m_Dictionary.GetCount() == 0) - m_Dictionary.SetCurSel(m_Dictionary.GetCount() - 1); - } - - break; - } - - case kPPMdZip: - { - if (defaultDict == (UInt32)(Int32)-1) - defaultDict = (UInt32)1 << (level + 19); - - for (unsigned i = 20; i <= 28; i++) - { - UInt32 dict = (1 << i); - AddDictionarySize(dict); - UInt64 decomprSize; - UInt64 requiredComprSize = GetMemoryUsage(dict, decomprSize); - if ((dict <= defaultDict && (!maxRamSize_Defined || requiredComprSize <= maxRamSize)) - || m_Dictionary.GetCount() == 1) - m_Dictionary.SetCurSel(m_Dictionary.GetCount() - 1); + const UInt32 dict = ((UInt32)i * 100) << 10; + AddDict(dict); + // AddDict2(i * 100000, dict); + if (i <= defaultDict / 100000) + curSel = m_Dictionary.GetCount() - 1; } - - // SetNearestSelectComboBox(m_Dictionary, defaultDict); + m_Dictionary.SetCurSel(curSel); break; } } } + UInt32 CCompressDialog::GetComboValue(NWindows::NControl::CComboBox &c, int defMax) { if (c.GetCount() <= defMax) @@ -1625,6 +1701,15 @@ UInt32 CCompressDialog::GetComboValue(NWindows::NControl::CComboBox &c, int defM return (UInt32)c.GetItemData_of_CurSel(); } + +UInt64 CCompressDialog::GetComboValue_64(NWindows::NControl::CComboBox &c, int defMax) +{ + if (c.GetCount() <= defMax) + return (UInt64)(Int64)-1; + // LRESULT is signed. so we cast it to unsigned size_t at first: + return (UInt64)(size_t)c.GetItemData_of_CurSel(); +} + UInt32 CCompressDialog::GetLevel2() { UInt32 level = GetLevel(); @@ -1635,7 +1720,7 @@ UInt32 CCompressDialog::GetLevel2() int CCompressDialog::AddOrder(UInt32 size) { - TCHAR s[40]; + TCHAR s[32]; ConvertUInt32ToString(size, s); int index = (int)m_Order.AddString(s); m_Order.SetItemData(index, size); @@ -1761,7 +1846,7 @@ bool CCompressDialog::GetOrderMode() } -static UInt64 Get_Lzma2_ChunkSize(UInt32 dict) +static UInt64 Get_Lzma2_ChunkSize(UInt64 dict) { // we use same default chunk sizes as defined in 7z encoder and lzma2 encoder UInt64 cs = (UInt64)dict << 2; @@ -1787,8 +1872,8 @@ void CCompressDialog::SetSolidBlockSize(bool useDictionary) if (level == 0) return; - UInt32 dict = GetDictionarySpec(); - if (dict == (UInt32)(Int32)-1) + UInt64 dict = GetDictSpec(); + if (dict == (UInt64)(Int64)-1) dict = 1; UInt32 defaultBlockSize = (UInt32)(Int32)-1; @@ -1847,7 +1932,7 @@ void CCompressDialog::SetSolidBlockSize(bool useDictionary) if (defaultBlockSize == (UInt32)(Int32)-1 && ((UInt64)1 << i) >= blockSize) defaultBlockSize = i; - TCHAR s[40]; + TCHAR s[32]; char post; ConvertUInt32ToString(1 << (i % 10), s); if (i < 20) post = 'K'; @@ -1918,7 +2003,7 @@ void CCompressDialog::SetNumThreads() numAlgoThreadsMax = 128; for (UInt32 i = 1; i <= numHardwareThreads * 2 && i <= numAlgoThreadsMax; i++) { - TCHAR s[40]; + TCHAR s[32]; ConvertUInt32ToString(i, s); int index = (int)m_NumThreads.AddString(s); m_NumThreads.SetItemData(index, (UInt32)i); @@ -1926,7 +2011,8 @@ void CCompressDialog::SetNumThreads() SetNearestSelectComboBox(m_NumThreads, defaultValue); } -UInt64 CCompressDialog::GetMemoryUsage(UInt32 dict, UInt64 &decompressMemory) + +UInt64 CCompressDialog::GetMemoryUsage_Dict_DecompMem(UInt64 dict64, UInt64 &decompressMemory) { decompressMemory = UInt64(Int64(-1)); UInt32 level = GetLevel2(); @@ -1959,6 +2045,7 @@ UInt64 CCompressDialog::GetMemoryUsage(UInt32 dict, UInt64 &decompressMemory) case kLZMA: case kLZMA2: { + const UInt32 dict = (dict64 >= kLzmaMaxDictSize ? kLzmaMaxDictSize : (UInt32)dict64); UInt32 hs = dict - 1; hs |= (hs >> 1); hs |= (hs >> 2); @@ -2010,7 +2097,15 @@ UInt64 CCompressDialog::GetMemoryUsage(UInt32 dict, UInt64 &decompressMemory) } if (chunkSize == 0) - size += numBlockThreads * (size1 + (UInt64)dict * 3 / 2); + { + const UInt32 kBlockSizeMax = (UInt32)0 - (UInt32)(1 << 16); + UInt64 blockSize = (UInt64)dict + (1 << 16) + + (numThreads1 > 1 ? (1 << 20) : 0); + blockSize += (blockSize >> (blockSize < ((UInt32)1 << 30) ? 1 : 2)); + if (blockSize >= kBlockSizeMax) + blockSize = kBlockSizeMax; + size += numBlockThreads * (size1 + blockSize); + } else { size += numBlockThreads * (size1 + chunkSize); @@ -2024,6 +2119,7 @@ UInt64 CCompressDialog::GetMemoryUsage(UInt32 dict, UInt64 &decompressMemory) case kFLZMA2: { + const UInt32 dict = (dict64 >= kLzmaMaxDictSize ? kLzmaMaxDictSize : (UInt32)dict64); if (level > FL2_MAX_7Z_CLEVEL) level = FL2_MAX_7Z_CLEVEL; /* dual buffer is enabled in Lzma2Encoder.cpp so size is dict * 6 */ @@ -2049,7 +2145,7 @@ UInt64 CCompressDialog::GetMemoryUsage(UInt32 dict, UInt64 &decompressMemory) case kPPMd: { - decompressMemory = dict + (2 << 20); + decompressMemory = dict64 + (2 << 20); return size + decompressMemory; } @@ -2077,7 +2173,7 @@ UInt64 CCompressDialog::GetMemoryUsage(UInt32 dict, UInt64 &decompressMemory) case kPPMdZip: { - decompressMemory = dict + (2 << 20); + decompressMemory = dict64 + (2 << 20); return size + (UInt64)decompressMemory * numThreads; } } @@ -2085,9 +2181,15 @@ UInt64 CCompressDialog::GetMemoryUsage(UInt32 dict, UInt64 &decompressMemory) return (UInt64)(Int64)-1; } -UInt64 CCompressDialog::GetMemoryUsage(UInt64 &decompressMemory) +UInt64 CCompressDialog::GetMemoryUsage_DecompMem(UInt64 &decompressMemory) { - return GetMemoryUsage(GetDictionary(), decompressMemory); + return GetMemoryUsage_Dict_DecompMem(GetDict(), decompressMemory); +} + +UInt64 CCompressDialog::GetMemoryUsageComp_Dict(UInt64 dict64) +{ + UInt64 decompressMemory; + return GetMemoryUsage_Dict_DecompMem(dict64, decompressMemory); } void CCompressDialog::PrintMemUsage(UINT res, UInt64 value) @@ -2097,7 +2199,7 @@ void CCompressDialog::PrintMemUsage(UINT res, UInt64 value) SetItemText(res, TEXT("?")); return; } - TCHAR s[40]; + TCHAR s[32]; if (value <= ((UInt64)16 << 30)) { value = (value + (1 << 20) - 1) >> 20; @@ -2116,7 +2218,7 @@ void CCompressDialog::PrintMemUsage(UINT res, UInt64 value) void CCompressDialog::SetMemoryUsage() { UInt64 decompressMem; - UInt64 memUsage = GetMemoryUsage(decompressMem); + const UInt64 memUsage = GetMemoryUsage_DecompMem(decompressMem); PrintMemUsage(IDT_COMPRESS_MEMORY_VALUE, memUsage); PrintMemUsage(IDT_COMPRESS_MEMORY_DE_VALUE, decompressMem); } @@ -2125,7 +2227,7 @@ void CCompressDialog::SetParams() { const CArcInfoEx &ai = (*ArcFormats)[GetFormatIndex()]; m_Params.SetText(TEXT("")); - int index = FindRegistryFormat(ai.Name); + const int index = FindRegistryFormat(ai.Name); if (index >= 0) { const NCompression::CFormatOptions &fo = m_RegistryInfo.Formats[index]; @@ -2137,7 +2239,7 @@ void CCompressDialog::SetParams() void CCompressDialog::SaveOptionsInMem() { const CArcInfoEx &ai = (*ArcFormats)[Info.FormatIndex]; - int index = FindRegistryFormatAlways(ai.Name); + const int index = FindRegistryFormatAlways(ai.Name); m_Params.GetText(Info.Options); m_Volume.GetText(Info.SplitVolume); Info.Options.Trim(); @@ -2146,12 +2248,32 @@ void CCompressDialog::SaveOptionsInMem() fo.Options = Info.Options; fo.SplitVolume = Info.SplitVolume; fo.Level = GetLevelSpec(); - fo.Dictionary = GetDictionarySpec(); fo.Order = GetOrderSpec(); fo.Method = GetMethodSpec(); fo.EncryptionMethod = GetEncryptionMethodSpec(); fo.NumThreads = GetNumThreadsSpec(); fo.BlockLogSize = GetBlockSizeSpec(); + { + const UInt64 dict64 = GetDictSpec(); + UInt32 dict32; + if (dict64 == (UInt64)(Int64)-1) + dict32 = (UInt32)(Int32)-1; + else + { + dict32 = (UInt32)dict64; + if (dict64 != dict32) + { + /* here we must write 32-bit value for registry that indicates big_value + (UInt32)(Int32)-1 : is used as marker for default size + (UInt32)(Int32)-2 : it can be used to indicate big value (4 GiB) + the value must be larger than threshold + */ + dict32 = (UInt32)(Int32)-2; + // dict32 = kLzmaMaxDictSize; // it must be larger than threshold + } + } + fo.Dictionary = dict32; + } } unsigned CCompressDialog::GetFormatIndex() diff --git a/CPP/7zip/UI/GUI/CompressDialog.h b/CPP/7zip/UI/GUI/CompressDialog.h index f55f0daf..fbf3ec12 100644 --- a/CPP/7zip/UI/GUI/CompressDialog.h +++ b/CPP/7zip/UI/GUI/CompressDialog.h @@ -42,7 +42,7 @@ namespace NCompressDialog UInt32 Level; UString Method; - UInt32 Dictionary; + UInt64 Dict64; bool OrderMode; UInt32 Order; UString Options; @@ -80,7 +80,8 @@ namespace NCompressDialog DeleteAfterCompressing(false), FormatIndex(-1) { - Level = Dictionary = Order = UInt32(-1); + Level = Order = (UInt32)(Int32)-1; + Dict64 = (UInt64)(Int64)(-1); OrderMode = false; Method.Empty(); Options.Empty(); @@ -90,6 +91,7 @@ namespace NCompressDialog }; } + class CCompressDialog: public NWindows::NControl::CModalDialog { NWindows::NControl::CComboBox m_ArchivePath; @@ -144,17 +146,19 @@ class CCompressDialog: public NWindows::NControl::CModalDialog void SetEncryptionMethod(); - void AddDictionarySize(UInt32 size); + void AddDict2(size_t sizeReal, size_t sizeShow); + void AddDict(size_t size); void SetDictionary(); UInt32 GetComboValue(NWindows::NControl::CComboBox &c, int defMax = 0); + UInt64 GetComboValue_64(NWindows::NControl::CComboBox &c, int defMax = 0); UInt32 GetLevel() { return GetComboValue(m_Level); } UInt32 GetLevelSpec() { return GetComboValue(m_Level, 1); } UInt32 GetLevel2(); - UInt32 GetDictionary() { return GetComboValue(m_Dictionary); } - UInt32 GetDictionarySpec() { return GetComboValue(m_Dictionary, 1); } + UInt64 GetDict() { return GetComboValue_64(m_Dictionary); } + UInt64 GetDictSpec() { return GetComboValue_64(m_Dictionary, 1); } UInt32 GetOrder() { return GetComboValue(m_Order); } UInt32 GetOrderSpec() { return GetComboValue(m_Order, 1); } UInt32 GetNumThreadsSpec() { return GetComboValue(m_NumThreads, 1); } @@ -168,8 +172,10 @@ class CCompressDialog: public NWindows::NControl::CModalDialog void SetSolidBlockSize(bool useDictionary = false); void SetNumThreads(); - UInt64 GetMemoryUsage(UInt32 dict, UInt64 &decompressMemory); - UInt64 GetMemoryUsage(UInt64 &decompressMemory); + UInt64 GetMemoryUsage_Dict_DecompMem(UInt64 dict, UInt64 &decompressMemory); + UInt64 GetMemoryUsage_DecompMem(UInt64 &decompressMemory); + UInt64 GetMemoryUsageComp_Dict(UInt64 dict64); + void PrintMemUsage(UINT res, UInt64 value); void SetMemoryUsage(); void SetParams(); @@ -198,6 +204,11 @@ class CCompressDialog: public NWindows::NControl::CModalDialog CCompressDialog(): CurrentDirWasChanged(false) {}; + void MessageBoxError(LPCWSTR message) + { + MessageBoxW(*this, message, L"7-Zip", MB_ICONERROR); + } + protected: void CheckSFXControlsEnable(); diff --git a/CPP/7zip/UI/GUI/GUI.cpp b/CPP/7zip/UI/GUI/GUI.cpp index aef9cdd0..2d9ef0f8 100644 --- a/CPP/7zip/UI/GUI/GUI.cpp +++ b/CPP/7zip/UI/GUI/GUI.cpp @@ -193,7 +193,12 @@ static int Main2() if (options.Command.CommandType == NCommandType::kBenchmark) { - HRESULT res = Benchmark(EXTERNAL_CODECS_VARS_L options.Properties); + HRESULT res = Benchmark( + EXTERNAL_CODECS_VARS_L + options.Properties, + options.NumIterations_Defined ? + options.NumIterations : + k_NumBenchIterations_Default); /* if (res == S_FALSE) { diff --git a/CPP/7zip/UI/GUI/GUI.dsp b/CPP/7zip/UI/GUI/GUI.dsp index 41c1e04d..7e65f481 100644 --- a/CPP/7zip/UI/GUI/GUI.dsp +++ b/CPP/7zip/UI/GUI/GUI.dsp @@ -1164,6 +1164,10 @@ SOURCE=..\..\..\Windows\SystemInfo.h # End Source File # Begin Source File +SOURCE=..\..\..\Windows\Thread.h +# End Source File +# Begin Source File + SOURCE=..\..\..\Windows\TimeUtils.cpp # End Source File # Begin Source File diff --git a/CPP/7zip/UI/GUI/UpdateGUI.cpp b/CPP/7zip/UI/GUI/UpdateGUI.cpp index af07e550..28f19d25 100644 --- a/CPP/7zip/UI/GUI/UpdateGUI.cpp +++ b/CPP/7zip/UI/GUI/UpdateGUI.cpp @@ -142,7 +142,7 @@ static void SetOutProperties( UInt32 level, bool setMethod, const UString &method, - UInt32 dictionary, + UInt64 dict64, bool orderMode, UInt32 order, bool solidIsSpecified, UInt64 solidBlockSize, @@ -157,13 +157,13 @@ static void SetOutProperties( { if (!method.IsEmpty()) AddProp(properties, is7z ? "0": "m", method); - if (dictionary != (UInt32)(Int32)-1) + if (dict64 != (UInt64)(Int64)-1) { AString name; if (is7z) name = "0"; name += (orderMode ? "mem" : "d"); - AddProp(properties, name, GetNumInBytesString(dictionary)); + AddProp(properties, name, GetNumInBytesString(dict64)); } if (order != (UInt32)(Int32)-1) { @@ -389,7 +389,7 @@ static HRESULT ShowDialog( di.Level, !methodOverride, di.Method, - di.Dictionary, + di.Dict64, di.OrderMode, di.Order, di.SolidIsSpecified, di.SolidBlockSize, di.MultiThreadIsAllowed, di.NumThreads, diff --git a/CPP/7zip/warn_clang_mac.mak b/CPP/7zip/warn_clang_mac.mak index 41044a2c..aadf14f7 100644 --- a/CPP/7zip/warn_clang_mac.mak +++ b/CPP/7zip/warn_clang_mac.mak @@ -27,6 +27,8 @@ CFLAGS_WARN_CLANG_12= $(CFLAGS_WARN_CLANG_3_8) \ -Wno-atomic-implicit-seq-cst \ -Wconversion \ -Wno-sign-conversion \ + -Wno-suggest-override \ + -Wno-suggest-destructor-override \ CFLAGS_WARN_MAC = \ -Wno-poison-system-directories \ diff --git a/CPP/7zip/warn_gcc.mak b/CPP/7zip/warn_gcc.mak index 5fb747dc..3185326a 100644 --- a/CPP/7zip/warn_gcc.mak +++ b/CPP/7zip/warn_gcc.mak @@ -42,6 +42,10 @@ CFLAGS_WARN_GCC_9 = \ # -Wno-sign-conversion \ +CFLAGS_WARN_GCC_10 = $(CFLAGS_WARN_GCC_9) \ + -Wmaybe-uninitialized \ + -Wmisleading-indentation \ + CFLAGS_WARN_GCC_PPMD_UNALIGNED = \ -Wno-strict-aliasing \ diff --git a/CPP/Common/LzFindPrepare.cpp b/CPP/Common/LzFindPrepare.cpp new file mode 100644 index 00000000..8845e4a5 --- /dev/null +++ b/CPP/Common/LzFindPrepare.cpp @@ -0,0 +1,7 @@ +// Sha256Prepare.cpp + +#include "StdAfx.h" + +#include "../../C/LzFind.h" + +static struct CLzFindPrepare { CLzFindPrepare() { LzFindPrepare(); } } g_CLzFindPrepare; diff --git a/CPP/Common/MyBuffer2.h b/CPP/Common/MyBuffer2.h index de5ebbdd..372d478c 100644 --- a/CPP/Common/MyBuffer2.h +++ b/CPP/Common/MyBuffer2.h @@ -25,6 +25,19 @@ class CMidBuffer operator const Byte *() const { return _data; } size_t Size() const { return _size; } + void Alloc(size_t size) + { + if (!_data || size != _size) + { + ::MidFree(_data); + _size = 0; + _data = NULL; + _data = (Byte *)::MidAlloc(size); + if (_data) + _size = size; + } + } + void AllocAtLeast(size_t size) { if (!_data || size > _size) @@ -105,5 +118,22 @@ class CAlignedBuffer } }; +/* + CMidAlignedBuffer must return aligned pointer. + - in Windows it uses CMidBuffer(): MidAlloc() : VirtualAlloc() + VirtualAlloc(): Memory allocated is automatically initialized to zero. + MidAlloc(0) returns NULL + - in non-Windows systems it uses g_AlignedAlloc. + g_AlignedAlloc::Alloc(size = 0) can return non NULL. +*/ + +typedef +#ifdef _WIN32 + CMidBuffer +#else + CAlignedBuffer +#endif + CMidAlignedBuffer; + #endif diff --git a/CPP/Windows/Control/Dialog.cpp b/CPP/Windows/Control/Dialog.cpp index 020694a6..6a9d7d9b 100644 --- a/CPP/Windows/Control/Dialog.cpp +++ b/CPP/Windows/Control/Dialog.cpp @@ -26,6 +26,14 @@ static INT_PTR APIENTRY DialogProcedure(HWND dialogHWND, UINT message, WPARAM wP return FALSE; if (message == WM_INITDIALOG) dialog->Attach(dialogHWND); + + /* MSDN: The dialog box procedure should return + TRUE - if it processed the message + FALSE - if it did not process the message + If the dialog box procedure returns FALSE, + the dialog manager performs the default dialog operation in response to the message. + */ + try { return BoolToBOOL(dialog->OnMessage(message, wParam, lParam)); } catch(...) { return TRUE; } } @@ -39,6 +47,7 @@ bool CDialog::OnMessage(UINT message, WPARAM wParam, LPARAM lParam) case WM_NOTIFY: return OnNotify((UINT)wParam, (LPNMHDR) lParam); case WM_TIMER: return OnTimer(wParam, lParam); case WM_SIZE: return OnSize(wParam, LOWORD(lParam), HIWORD(lParam)); + case WM_DESTROY: return OnDestroy(); case WM_HELP: OnHelp(); return true; /* OnHelp( diff --git a/CPP/Windows/Control/Dialog.h b/CPP/Windows/Control/Dialog.h index 33caa5b2..f804a9e7 100644 --- a/CPP/Windows/Control/Dialog.h +++ b/CPP/Windows/Control/Dialog.h @@ -31,6 +31,12 @@ class CDialog: public CWindow bool SetItemText(int itemID, LPCTSTR s) { return BOOLToBool(SetDlgItemText(_window, itemID, s)); } + bool SetItemTextA(int itemID, LPCSTR s) + { return BOOLToBool(SetDlgItemTextA(_window, itemID, s)); } + + bool SetItemText_Empty(int itemID) + { return SetItemText(itemID, TEXT("")); } + #ifndef _UNICODE bool SetItemText(int itemID, LPCWSTR s) { @@ -51,6 +57,12 @@ class CDialog: public CWindow */ #endif + bool GetItemText(int itemID, UString &s) + { + CWindow window(GetItem(itemID)); + return window.GetText(s); + } + bool SetItemInt(int itemID, UINT value, bool isSigned) { return BOOLToBool(SetDlgItemInt(_window, itemID, value, BoolToBOOL(isSigned))); } bool GetItemInt(int itemID, bool isSigned, UINT &value) @@ -65,6 +77,13 @@ class CDialog: public CWindow HWND GetNextTabItem(HWND control, bool previous) { return GetNextDlgTabItem(_window, control, BoolToBOOL(previous)); } + LRESULT SendMsg_NextDlgCtl(WPARAM wParam, LPARAM lParam) + { return SendMsg(WM_NEXTDLGCTL, wParam, lParam); } + LRESULT SendMsg_NextDlgCtl_HWND(HWND hwnd) { return SendMsg_NextDlgCtl((WPARAM)hwnd, TRUE); } + LRESULT SendMsg_NextDlgCtl_CtlId(int id) { return SendMsg_NextDlgCtl_HWND(GetItem(id)); } + LRESULT SendMsg_NextDlgCtl_Next() { return SendMsg_NextDlgCtl(0, FALSE); } + LRESULT SendMsg_NextDlgCtl_Prev() { return SendMsg_NextDlgCtl(1, FALSE); } + bool MapRect(LPRECT rect) { return BOOLToBool(MapDialogRect(_window, rect)); } @@ -92,6 +111,7 @@ class CDialog: public CWindow virtual bool OnCommand(WPARAM wParam, LPARAM lParam); virtual bool OnCommand(int code, int itemID, LPARAM lParam); virtual bool OnSize(WPARAM /* wParam */, int /* xSize */, int /* ySize */) { return false; } + virtual bool OnDestroy() { return false; } /* #ifdef UNDER_CE diff --git a/CPP/Windows/ErrorMsg.cpp b/CPP/Windows/ErrorMsg.cpp index bfa21e50..63fd7922 100644 --- a/CPP/Windows/ErrorMsg.cpp +++ b/CPP/Windows/ErrorMsg.cpp @@ -19,6 +19,14 @@ namespace NError { static bool MyFormatMessage(DWORD errorCode, UString &message) { + #ifndef _SFX + if ((HRESULT)errorCode == MY_HRES_ERROR__INTERNAL_ERROR) + { + message = "Internal Error: The failure in hardware (RAM or CPU), OS or program"; + return true; + } + #endif + #ifdef _WIN32 LPVOID msgBuf; diff --git a/CPP/Windows/Registry.cpp b/CPP/Windows/Registry.cpp index 2c4643bc..b20157d5 100644 --- a/CPP/Windows/Registry.cpp +++ b/CPP/Windows/Registry.cpp @@ -3,6 +3,7 @@ #include "StdAfx.h" #include +// #include #ifndef _UNICODE #include "../Common/StringConvert.h" @@ -17,12 +18,27 @@ namespace NWindows { namespace NRegistry { #define MYASSERT(expr) // _ASSERTE(expr) +#define MY_ASSUME(expr) + +/* +static void Error() +{ + #ifdef _CONSOLE + printf("\nregistry error\n"); + #else + MessageBoxW(0, L"registry error", L"", 0); + // exit(1); + #endif +} + +#define MY_ASSUME(expr) { if (!(expr)) Error(); } +*/ LONG CKey::Create(HKEY parentKey, LPCTSTR keyName, LPTSTR keyClass, DWORD options, REGSAM accessMask, LPSECURITY_ATTRIBUTES securityAttributes, LPDWORD disposition) throw() { - MYASSERT(parentKey != NULL); + MY_ASSUME(parentKey != NULL); DWORD dispositionReal; HKEY key = NULL; LONG res = RegCreateKeyEx(parentKey, keyName, 0, keyClass, @@ -39,7 +55,7 @@ LONG CKey::Create(HKEY parentKey, LPCTSTR keyName, LONG CKey::Open(HKEY parentKey, LPCTSTR keyName, REGSAM accessMask) throw() { - MYASSERT(parentKey != NULL); + MY_ASSUME(parentKey != NULL); HKEY key = NULL; LONG res = RegOpenKeyEx(parentKey, keyName, 0, accessMask, &key); if (res == ERROR_SUCCESS) @@ -66,7 +82,7 @@ LONG CKey::Close() throw() // winNT to be deleted must not have subkeys LONG CKey::DeleteSubKey(LPCTSTR subKeyName) throw() { - MYASSERT(_object != NULL); + MY_ASSUME(_object != NULL); return RegDeleteKey(_object, subKeyName); } @@ -101,14 +117,14 @@ static inline bool UINT32ToBool(UInt32 value) { return (value != 0); } LONG CKey::DeleteValue(LPCTSTR name) throw() { - MYASSERT(_object != NULL); + MY_ASSUME(_object != NULL); return ::RegDeleteValue(_object, name); } #ifndef _UNICODE LONG CKey::DeleteValue(LPCWSTR name) { - MYASSERT(_object != NULL); + MY_ASSUME(_object != NULL); if (g_IsNT) return ::RegDeleteValueW(_object, name); return DeleteValue(name == 0 ? 0 : (LPCSTR)GetSystemString(name)); @@ -117,7 +133,7 @@ LONG CKey::DeleteValue(LPCWSTR name) LONG CKey::SetValue(LPCTSTR name, UInt32 value) throw() { - MYASSERT(_object != NULL); + MY_ASSUME(_object != NULL); return RegSetValueEx(_object, name, 0, REG_DWORD, (const BYTE *)&value, sizeof(UInt32)); } @@ -130,7 +146,7 @@ LONG CKey::SetValue(LPCTSTR name, bool value) throw() LONG CKey::SetValue(LPCTSTR name, LPCTSTR value) throw() { MYASSERT(value != NULL); - MYASSERT(_object != NULL); + MY_ASSUME(_object != NULL); return RegSetValueEx(_object, name, 0, REG_SZ, (const BYTE *)value, ((DWORD)lstrlen(value) + 1) * sizeof(TCHAR)); } @@ -139,7 +155,7 @@ LONG CKey::SetValue(LPCTSTR name, LPCTSTR value) throw() LONG CKey::SetValue(LPCTSTR name, const CSysString &value) { MYASSERT(value != NULL); - MYASSERT(_object != NULL); + MY_ASSUME(_object != NULL); return RegSetValueEx(_object, name, NULL, REG_SZ, (const BYTE *)(const TCHAR *)value, (value.Len() + 1) * sizeof(TCHAR)); } @@ -150,7 +166,7 @@ LONG CKey::SetValue(LPCTSTR name, const CSysString &value) LONG CKey::SetValue(LPCWSTR name, LPCWSTR value) { MYASSERT(value != NULL); - MYASSERT(_object != NULL); + MY_ASSUME(_object != NULL); if (g_IsNT) return RegSetValueExW(_object, name, 0, REG_SZ, (const BYTE * )value, (DWORD)((wcslen(value) + 1) * sizeof(wchar_t))); @@ -164,7 +180,7 @@ LONG CKey::SetValue(LPCWSTR name, LPCWSTR value) LONG CKey::SetValue(LPCTSTR name, const void *value, UInt32 size) throw() { MYASSERT(value != NULL); - MYASSERT(_object != NULL); + MY_ASSUME(_object != NULL); return RegSetValueEx(_object, name, 0, REG_BINARY, (const BYTE *)value, size); } diff --git a/CPP/Windows/Synchronization.h b/CPP/Windows/Synchronization.h index 98ea0b69..7d2e8d2a 100644 --- a/CPP/Windows/Synchronization.h +++ b/CPP/Windows/Synchronization.h @@ -153,6 +153,10 @@ class CSemaphore MY_UNCOPYABLE { return Semaphore_Create(&_object, initCount, maxCount); } + WRes OptCreateInit(UInt32 initCount, UInt32 maxCount) + { + return Semaphore_OptCreateInit(&_object, initCount, maxCount); + } WRes Release() { return Semaphore_Release1(&_object); } WRes Release(UInt32 releaseCount) { return Semaphore_ReleaseN(&_object, releaseCount); } WRes Lock() { return Semaphore_Wait(&_object); } diff --git a/CPP/Windows/SystemInfo.cpp b/CPP/Windows/SystemInfo.cpp index 55403efc..9346afd5 100644 --- a/CPP/Windows/SystemInfo.cpp +++ b/CPP/Windows/SystemInfo.cpp @@ -12,12 +12,12 @@ #else +#include #include #ifdef __APPLE__ #include #elif !defined(_AIX) - #include #ifdef MY_CPU_ARM_OR_ARM64 @@ -25,13 +25,56 @@ #endif #endif +#ifdef __linux__ +#include "../Windows/FileIO.h" #endif +#endif // WIN32 + #include "SystemInfo.h" #include "System.h" using namespace NWindows; +#ifdef __linux__ + +static bool ReadFile_to_Buffer(CFSTR fileName, CByteBuffer &buf) +{ + NWindows::NFile::NIO::CInFile file; + if (!file.Open(fileName)) + return false; + /* + UInt64 size; + if (!file.GetLength(size)) + { + // GetLength() doesn't work "/proc/cpuinfo" + return false; + } + if (size >= ((UInt32)1 << 29)) + return false; + */ + size_t size = 0; + size_t addSize = ((size_t)1 << 12); + for (;;) + { + // printf("\nsize = %d\n", (unsigned)size); + buf.ChangeSize_KeepData(size + addSize, size); + size_t processed; + if (!file.ReadFull(buf + size, addSize, processed)) + return false; + if (processed == 0) + { + buf.ChangeSize_KeepData(size, size); + return true; + } + size += processed; + addSize *= 2; + } +} + +#endif + + #ifndef __APPLE__ static void PrintHex(AString &s, UInt64 v) { @@ -56,7 +99,7 @@ static void PrintCpuChars(AString &s, UInt32 v) } -static void x86cpuid_to_String(const Cx86cpuid &c, AString &s) +static void x86cpuid_to_String(const Cx86cpuid &c, AString &s, AString &ver) { s.Empty(); @@ -87,13 +130,10 @@ static void x86cpuid_to_String(const Cx86cpuid &c, AString &s) s.Trim(); } - s.Add_Space_if_NotEmpty(); { char temp[32]; ConvertUInt32ToHex(c.ver, temp); - s += '('; - s += temp; - s += ')'; + ver += temp; } } @@ -402,18 +442,52 @@ void GetSysInfo(AString &s1, AString &s2) void GetCpuName(AString &s); -void GetCpuName(AString &s) + +static void AddBracedString(AString &dest, AString &src) { - s.Empty(); + if (!src.IsEmpty()) + { + AString s; + s += '('; + s += src; + s += ')'; + dest.Add_OptSpaced(s); + } +} + +struct CCpuName +{ + AString CpuName; + AString Revision; + AString Microcode; + AString LargePages; + + void Fill(); + + void Get_Revision_Microcode_LargePages(AString &s) + { + s.Empty(); + AddBracedString(s, Revision); + AddBracedString(s, Microcode); + s.Add_OptSpaced(LargePages); + } +}; + +void CCpuName::Fill() +{ + CpuName.Empty(); + Revision.Empty(); + Microcode.Empty(); + LargePages.Empty(); + + AString &s = CpuName; #ifdef MY_CPU_X86_OR_AMD64 { Cx86cpuid cpuid; if (x86cpuid_CheckAndRead(&cpuid)) { - AString s2; - x86cpuid_to_String(cpuid, s2); - s += s2; + x86cpuid_to_String(cpuid, s, Revision); } else { @@ -484,11 +558,10 @@ void GetCpuName(AString &s) } if (res[0] == ERROR_SUCCESS || res[1] == ERROR_SUCCESS) { - s.Add_OptSpaced("("); for (int i = 0; i < 2; i++) { if (i == 1) - s += "->"; + Microcode += "->"; if (res[i] != ERROR_SUCCESS) continue; const CByteBuffer &buf = bufs[i]; @@ -497,13 +570,12 @@ void GetCpuName(AString &s) UInt32 high = GetUi32(buf); if (high != 0) { - PrintHex(s, high); - s += "."; + PrintHex(Microcode, high); + Microcode += "."; } - PrintHex(s, GetUi32(buf + 4)); + PrintHex(Microcode, GetUi32(buf + 4)); } } - s += ")"; } } } @@ -511,7 +583,7 @@ void GetCpuName(AString &s) #ifdef _7ZIP_LARGE_PAGES - Add_LargePages_String(s); + Add_LargePages_String(LargePages); #endif } @@ -532,13 +604,10 @@ void AddCpuFeatures(AString &s) // s += TypeToString2(k_PF, ARRAY_SIZE(k_PF), i); } } - s.Add_Space_if_NotEmpty(); - s += "f:"; + s.Add_OptSpaced("f:"); PrintHex(s, flags); - #else // _WIN32 - - #ifdef __APPLE__ + #elif defined(__APPLE__) { UInt32 v = 0; if (My_sysctlbyname_Get_UInt32("hw.pagesize", &v) == 0) @@ -549,10 +618,46 @@ void AddCpuFeatures(AString &s) } } - #elif !defined(_AIX) + #else + + const long v = sysconf(_SC_PAGESIZE); + if (v != -1) + { + s.Add_Space_if_NotEmpty(); + s += "PageSize:"; + s.Add_UInt32((UInt32)(v >> 10)); + s += "KB"; + } + + #if !defined(_AIX) + + #ifdef __linux__ + + CByteBuffer buf; + if (ReadFile_to_Buffer("/sys/kernel/mm/transparent_hugepage/enabled", buf)) + // if (ReadFile_to_Buffer("/proc/cpuinfo", buf)) + { + s.Add_OptSpaced("THP:"); + AString s2; + s2.SetFrom_CalcLen((const char *)(const void *)(const Byte *)buf, (unsigned)buf.Size()); + const int pos = s2.Find('['); + if (pos >= 0) + { + const int pos2 = s2.Find(']', pos + 1); + if (pos2 >= 0) + { + s2.DeleteFrom(pos2); + s2.DeleteFrontal(pos + 1); + } + } + s += s2; + } + // else throw CSystemException(MY_SRes_HRESULT_FROM_WRes(errno)); + + #endif + - s.Add_Space_if_NotEmpty(); - s += "hwcap:"; + s.Add_OptSpaced("hwcap:"); { unsigned long h = getauxval(AT_HWCAP); PrintHex(s, h); @@ -561,6 +666,9 @@ void AddCpuFeatures(AString &s) if (h & HWCAP_SHA1) s += ":SHA1"; if (h & HWCAP_SHA2) s += ":SHA2"; if (h & HWCAP_AES) s += ":AES"; + if (h & HWCAP_ASIMD) s += ":ASIMD"; + #elif defined(MY_CPU_ARM) + if (h & HWCAP_NEON) s += ":NEON"; #endif } @@ -580,9 +688,8 @@ void AddCpuFeatures(AString &s) #endif } } - - #endif - #endif // _WIN32 + #endif // _AIX + #endif // _WIN32 } @@ -609,11 +716,11 @@ static BOOL My_RtlGetVersion(OSVERSIONINFOEXW *vi) #endif -void GetSystemInfoText(AString &sRes) +void GetOsInfoText(AString &sRes) { - { - { - AString s; + sRes.Empty(); + AString s; + #ifdef _WIN32 #ifndef UNDER_CE // OSVERSIONINFO vi; @@ -634,16 +741,16 @@ void GetSystemInfoText(AString &sRes) s += " SP:"; s.Add_UInt32(vi.wServicePackMajor); s += "."; s.Add_UInt32(vi.wServicePackMinor); } - s += " Suite:"; PrintHex(s, vi.wSuiteMask); - s += " Type:"; s.Add_UInt32(vi.wProductType); + // s += " Suite:"; PrintHex(s, vi.wSuiteMask); + // s += " Type:"; s.Add_UInt32(vi.wProductType); // s += " "; s += GetOemString(vi.szCSDVersion); } + /* { - s += " OEMCP:"; - s.Add_UInt32(GetOEMCP()); - s += " ACP:"; - s.Add_UInt32(GetACP()); + s += " OEMCP:"; s.Add_UInt32(GetOEMCP()); + s += " ACP:"; s.Add_UInt32(GetACP()); } + */ #endif #else // _WIN32 @@ -666,8 +773,14 @@ void GetSystemInfoText(AString &sRes) #endif // _WIN32 sRes += s; - sRes.Add_LF(); - } +} + + + +void GetSystemInfoText(AString &sRes) +{ + GetOsInfoText(sRes); + sRes.Add_LF(); { AString s, s1, s2; @@ -712,5 +825,73 @@ void GetSystemInfoText(AString &sRes) } #endif */ +} + + +void GetCpuName(AString &s); +void GetCpuName(AString &s) +{ + CCpuName cpuName; + cpuName.Fill(); + s = cpuName.CpuName; + AString s2; + cpuName.Get_Revision_Microcode_LargePages(s2); + s.Add_OptSpaced(s2); +} + + +void GetCpuName_MultiLine(AString &s); +void GetCpuName_MultiLine(AString &s) +{ + CCpuName cpuName; + cpuName.Fill(); + s = cpuName.CpuName; + AString s2; + cpuName.Get_Revision_Microcode_LargePages(s2); + if (!s2.IsEmpty()) + { + s.Add_LF(); + s += s2; } } + +void GetCompiler(AString &s) +{ + #ifdef __VERSION__ + s += __VERSION__; + #endif + + #ifdef __GNUC__ + s += " GCC "; + s.Add_UInt32(__GNUC__); + s += '.'; + s.Add_UInt32(__GNUC_MINOR__); + s += '.'; + s.Add_UInt32(__GNUC_PATCHLEVEL__); + #endif + + #ifdef __clang__ + s += " CLANG "; + s.Add_UInt32(__clang_major__); + s += '.'; + s.Add_UInt32(__clang_minor__); + #endif + + #ifdef __xlC__ + s += " XLC "; + s.Add_UInt32(__xlC__ >> 8); + s += '.'; + s.Add_UInt32(__xlC__ & 0xFF); + #ifdef __xlC_ver__ + s += '.'; + s.Add_UInt32(__xlC_ver__ >> 8); + s += '.'; + s.Add_UInt32(__xlC_ver__ & 0xFF); + #endif + #endif + + #ifdef _MSC_VER + s += " MSC "; + s.Add_UInt32(_MSC_VER); + #endif +} diff --git a/CPP/Windows/SystemInfo.h b/CPP/Windows/SystemInfo.h index 856bb2ba..e941d0aa 100644 --- a/CPP/Windows/SystemInfo.h +++ b/CPP/Windows/SystemInfo.h @@ -5,8 +5,14 @@ #include "../Common/MyString.h" + +void GetCpuName_MultiLine(AString &s); + +void GetOsInfoText(AString &sRes); void GetSystemInfoText(AString &s); void PrintSize_KMGT_Or_Hex(AString &s, UInt64 v); void Add_LargePages_String(AString &s); +void GetCompiler(AString &s); + #endif diff --git a/DOC/7zip.wxs b/DOC/7zip.wxs index 14793a67..c18e7068 100644 --- a/DOC/7zip.wxs +++ b/DOC/7zip.wxs @@ -1,7 +1,7 @@ - + diff --git a/DOC/readme.txt b/DOC/readme.txt index 18e5e387..d5e16452 100644 --- a/DOC/readme.txt +++ b/DOC/readme.txt @@ -1,4 +1,4 @@ -7-Zip 21.02 Sources +7-Zip 21.03 Sources ------------------- 7-Zip is a file archiver for Windows. @@ -42,11 +42,11 @@ You can download LZMA SDK from: LZMA SDK is written and placed in the public domain by Igor Pavlov. -How to compile --------------- +How to compile in Windows +------------------------- To compile the sources to Windows binaries you need Visual Studio compiler and/or Windows SDK. -You can use latest Windows Studio 2017 to compile binaries for x86, x64 and arm64 platforms. +You can use latest Windows Studio 2017/2019 to compile binaries for x86, x64 and arm64 platforms. Also you can use old compilers for some platforms: x86 : Visual C++ 6.0 with Platform SDK x64 : Windows Server 2003 R2 Platform SDK @@ -73,6 +73,7 @@ The dsp file compiling can be used for development and debug purposes. The final 7-Zip binaries are compiled via makefiles, that provide best optimization options. + How to compile with makefile ---------------------------- @@ -90,10 +91,78 @@ MY_DYNAMIC_LINK -Compiling under Unix/Linux --------------------------- -Check this site for Posix/Linux version: -http://sourceforge.net/projects/p7zip/ +Compiling 7-Zip for Unix/Linux +------------------------------ + +There are several otpions to compile 7-Zip with different compilers: gcc and clang. +Also 7-Zip code contains two versions for some critical parts of code: in C and in Assembeler. +So if you compile the version with Assembeler code, you will get faster 7-Zip binary. + +7-Zip's assembler code uses the following syntax for different platforms: + +1) x86 and x86-64 (AMD64): MASM syntax. + There are 2 programs that supports MASM syntax in Linux. +' 'Asmc Macro Assembler and JWasm. But JWasm now doesn't support some + cpu instructions used in 7-Zip. + So you must install Asmc Macro Assembler in Linux, if you want to compile fastest version + of 7-Zip x86 and x86-64: + https://github.com/nidud/asmc + +2) arm64: GNU assembler for ARM64 with preprocessor. + That systax of that arm64 assembler code in 7-Zip is supported by GCC and CLANG for ARM64. + +There are different binaries that can be compiled from 7-Zip source. +There are 2 main files in folder for compiling: + makefile - that can be used for compiling Windows version of 7-Zip with nmake command + makefile.gcc - that can be used for compiling Linux/macOS versions of 7-Zip with make command + +At first you must change the current folder to folder that contains `makefile.gcc`: + + cd CPP/7zip/Bundles/Alone2 + +Then you can compile `makefile.gcc` with the command: + + make -j -f makefile.gcc + +Also there are additional "*.mak" files in folder "CPP/7zip/" that can be used to compile +7-Zip binaries with optimized code and optimzing options. + +To compile with GCC without assembler: + cd CPP/7zip/Bundles/Alone2 + make -j -f ../../cmpl_gcc.mak + +To compile with CLANG without assembler: + make -j -f ../../cmpl_clang.mak + +To compile 7-Zip for x86-64 with asmc assembler: + make -j -f ../../cmpl_gcc_x64.mak + +To compile 7-Zip for arm64 with assembler: + make -j -f ../../cmpl_gcc_arm64.mak + +To compile 7-Zip for arm64 for macOS: + make -j -f ../../cmpl_mac_arm64.mak + +Also you can change some compiler options in the mak files: + cmpl_gcc.mak + var_gcc.mak + warn_gcc.mak + + +7-Zip and p7zip +=============== +Now there are two different ports of 7-Zip for Linux/macOS: + +1) p7zip - another port of 7-Zip for Linux, made by an independent developer. + The latest version of p7zip now is 16.02, and that p7zip 16.02 is outdated now. + http://sourceforge.net/projects/p7zip/ + +2) 7-Zip for Linux/macOS - this package - it's new code with all changes from latest 7-Zip for Windows. + +These two ports are not identical. +Note also that some Linux specific things can be implemented better in p7zip than in new 7-Zip for Linux. + + Notes: @@ -127,7 +196,7 @@ DOC Documentation 7zip.wix - installer script for WIX -Asm - Source code in Assembler (optimized code for CRC calculation and Intel-AES encryption) +Asm - Source code in Assembler : optimized code for CRC, SHA, AES, LZMA decoding. C - Source code in C @@ -146,6 +215,7 @@ Windows common files for Windows related code Bundle Modules that are bundles of other modules (files) Alone 7za.exe: Standalone version of 7-Zip console that supports only 7z/xz/cab/zip/gzip/bzip2/tar. + Alone2 7zz.exe: Standalone version of 7-Zip console that supports all formats. Alone7z 7zr.exe: Standalone version of 7-Zip console that supports only 7z (reduced version) Fm Standalone version of 7-Zip File Manager Format7z 7za.dll: .7z support diff --git a/README.md b/README.md index 5f442247..0efce94c 100644 --- a/README.md +++ b/README.md @@ -167,7 +167,7 @@ Hashers: The output should look like this: ``` -7-Zip 21.02 (x64) : Copyright (c) 1999-2021 Igor Pavlov : 2021-05-06 +7-Zip 21.03 (x64) : Copyright (c) 1999-2021 Igor Pavlov : 2021-05-06 Libs: 0 C:\Program Files\7-Zip\7z.dll @@ -288,7 +288,7 @@ You find this project useful, maybe you consider a donation ;-) ## Version Information -- 7-Zip ZS Version 21.02 +- 7-Zip ZS Version 21.03 - [Brotli] Version 1.0.9 - [Fast LZMA2] Version 1.0.1 - [Lizard] Version 1.0 @@ -297,7 +297,7 @@ You find this project useful, maybe you consider a donation ;-) - [Zstandard] Version 1.5.0 - [BLAKE3] Version 0.3.7 -/TR 2021-05-16 +/TR 2021-08-26 ## Notes