From 5697b3dece084e4098337a22fa4ef763295c5117 Mon Sep 17 00:00:00 2001 From: sebres Date: Thu, 14 Sep 2023 16:48:14 +0200 Subject: [PATCH 1/3] zstd compression - set source size as hint if it is known e. g. by file compression (slightly better performance and/or compression ratio); although the feature still calling as "experimental", but zstd uses this in its own client since v.1.4 IIRC and the only known drawback would be significant regress of compression ration if guess considerably underestimates, but it does no matter in case of known file size. --- CPP/7zip/Archive/ZstdHandler.cpp | 1 + CPP/7zip/Compress/ZstdEncoder.cpp | 8 +++++++- CPP/7zip/Compress/ZstdEncoder.h | 3 +++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/CPP/7zip/Archive/ZstdHandler.cpp b/CPP/7zip/Archive/ZstdHandler.cpp index e5b0148f..b93e3f3a 100644 --- a/CPP/7zip/Archive/ZstdHandler.cpp +++ b/CPP/7zip/Archive/ZstdHandler.cpp @@ -286,6 +286,7 @@ static HRESULT UpdateArchive( CMyComPtr localProgress = localProgressSpec; localProgressSpec->Init(updateCallback, true); NCompress::NZSTD::CEncoder *encoderSpec = new NCompress::NZSTD::CEncoder; + encoderSpec->unpackSize = unpackSize; CMyComPtr encoder = encoderSpec; RINOK(props.SetCoderProps(encoderSpec, NULL)); RINOK(encoder->Code(fileInStream, outStream, NULL, NULL, localProgress)); diff --git a/CPP/7zip/Compress/ZstdEncoder.cpp b/CPP/7zip/Compress/ZstdEncoder.cpp index bf5c8fa4..b7c6543e 100644 --- a/CPP/7zip/Compress/ZstdEncoder.cpp +++ b/CPP/7zip/Compress/ZstdEncoder.cpp @@ -30,7 +30,8 @@ CEncoder::CEncoder(): _LdmHashLog(-1), _LdmMinMatch(-1), _LdmBucketSizeLog(-1), - _LdmHashRateLog(-1) + _LdmHashRateLog(-1), + unpackSize(0) { _props.clear(); } @@ -251,6 +252,11 @@ STDMETHODIMP CEncoder::Code(ISequentialInStream *inStream, err = ZSTD_CCtx_setParameter(_ctx, ZSTD_c_contentSizeFlag, 1); if (ZSTD_isError(err)) return E_INVALIDARG; + if (unpackSize) { + err = ZSTD_CCtx_setParameter(_ctx, ZSTD_c_srcSizeHint, (int)(unpackSize <= INT_MAX ? unpackSize : INT_MAX)); + if (ZSTD_isError(err)) return E_INVALIDARG; + } + /* enable ldm for large windowlog values */ if (_WindowLog > 27 && _Long == 0) _Long = 1; diff --git a/CPP/7zip/Compress/ZstdEncoder.h b/CPP/7zip/Compress/ZstdEncoder.h index 56fe0671..814bdad0 100644 --- a/CPP/7zip/Compress/ZstdEncoder.h +++ b/CPP/7zip/Compress/ZstdEncoder.h @@ -67,6 +67,9 @@ class CEncoder: Int32 _LdmHashRateLog; public: + + UInt64 unpackSize; + MY_QUERYINTERFACE_BEGIN2(ICompressCoder) MY_QUERYINTERFACE_ENTRY(ICompressSetCoderMt) MY_QUERYINTERFACE_ENTRY(ICompressSetCoderProperties) From 8a5e1c82dbf9bd089d252528da1384d43b7a0920 Mon Sep 17 00:00:00 2001 From: sebres Date: Thu, 14 Sep 2023 18:18:09 +0200 Subject: [PATCH 2/3] make compression of zstd archive type more similar to Zstandard CLI (store dictID and checksum by default in zstd type, no effect for 7z type) --- CPP/7zip/Archive/ZstdHandler.cpp | 3 +++ CPP/7zip/Compress/ZstdEncoder.cpp | 17 +++++++++++++++++ CPP/7zip/Compress/ZstdEncoder.h | 2 ++ 3 files changed, 22 insertions(+) diff --git a/CPP/7zip/Archive/ZstdHandler.cpp b/CPP/7zip/Archive/ZstdHandler.cpp index b93e3f3a..dcdef408 100644 --- a/CPP/7zip/Archive/ZstdHandler.cpp +++ b/CPP/7zip/Archive/ZstdHandler.cpp @@ -286,6 +286,9 @@ static HRESULT UpdateArchive( CMyComPtr localProgress = localProgressSpec; localProgressSpec->Init(updateCallback, true); NCompress::NZSTD::CEncoder *encoderSpec = new NCompress::NZSTD::CEncoder; + // by zstd archive type store dictID and checksum (similar to zstd client) + encoderSpec->dictIDFlag = 1; + encoderSpec->checksumFlag = 1; encoderSpec->unpackSize = unpackSize; CMyComPtr encoder = encoderSpec; RINOK(props.SetCoderProps(encoderSpec, NULL)); diff --git a/CPP/7zip/Compress/ZstdEncoder.cpp b/CPP/7zip/Compress/ZstdEncoder.cpp index b7c6543e..fcfaaf77 100644 --- a/CPP/7zip/Compress/ZstdEncoder.cpp +++ b/CPP/7zip/Compress/ZstdEncoder.cpp @@ -31,6 +31,8 @@ CEncoder::CEncoder(): _LdmMinMatch(-1), _LdmBucketSizeLog(-1), _LdmHashRateLog(-1), + dictIDFlag(-1), + checksumFlag(-1), unpackSize(0) { _props.clear(); @@ -252,6 +254,15 @@ STDMETHODIMP CEncoder::Code(ISequentialInStream *inStream, err = ZSTD_CCtx_setParameter(_ctx, ZSTD_c_contentSizeFlag, 1); if (ZSTD_isError(err)) return E_INVALIDARG; + if (dictIDFlag != -1) { + err = ZSTD_CCtx_setParameter(_ctx, ZSTD_c_dictIDFlag, dictIDFlag); + if (ZSTD_isError(err)) return E_INVALIDARG; + } + if (checksumFlag != -1) { + err = ZSTD_CCtx_setParameter(_ctx, ZSTD_c_checksumFlag, checksumFlag); + if (ZSTD_isError(err)) return E_INVALIDARG; + } + if (unpackSize) { err = ZSTD_CCtx_setParameter(_ctx, ZSTD_c_srcSizeHint, (int)(unpackSize <= INT_MAX ? unpackSize : INT_MAX)); if (ZSTD_isError(err)) return E_INVALIDARG; @@ -326,6 +337,12 @@ STDMETHODIMP CEncoder::Code(ISequentialInStream *inStream, err = ZSTD_CCtx_setParameter(_ctx, ZSTD_c_ldmHashRateLog, _LdmHashRateLog); if (ZSTD_isError(err)) return E_INVALIDARG; } + + //err = ZSTD_CCtx_setParameter(_ctx, ZSTD_c_literalCompressionMode, (int)ZSTD_ps_auto); + //if (ZSTD_isError(err)) return E_INVALIDARG; + + //err = ZSTD_CCtx_setParameter(_ctx, ZSTD_c_enableDedicatedDictSearch, 1); + //if (ZSTD_isError(err)) return E_INVALIDARG; } for (;;) { diff --git a/CPP/7zip/Compress/ZstdEncoder.h b/CPP/7zip/Compress/ZstdEncoder.h index 814bdad0..1b3a61a0 100644 --- a/CPP/7zip/Compress/ZstdEncoder.h +++ b/CPP/7zip/Compress/ZstdEncoder.h @@ -68,6 +68,8 @@ class CEncoder: public: + int dictIDFlag; + int checksumFlag; UInt64 unpackSize; MY_QUERYINTERFACE_BEGIN2(ICompressCoder) From 1eca571ea29a3cf8ec5caf41892d85d6efdb3c16 Mon Sep 17 00:00:00 2001 From: sebres Date: Thu, 14 Sep 2023 19:51:36 +0200 Subject: [PATCH 3/3] .zst, .br: avoid setting of size hint for unknown size by `-si`, since unpackSize will be supplied as -1 (`UINT64_MAX`) --- C/zstdmt/brotli-mt_compress.c | 4 ++-- CPP/7zip/Compress/BrotliEncoder.cpp | 3 ++- CPP/7zip/Compress/ZstdEncoder.cpp | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/C/zstdmt/brotli-mt_compress.c b/C/zstdmt/brotli-mt_compress.c index 736821ae..d8dad2b4 100644 --- a/C/zstdmt/brotli-mt_compress.c +++ b/C/zstdmt/brotli-mt_compress.c @@ -383,7 +383,7 @@ static size_t st_compress(void *arg) /* 0, or not specified by user; could be chosen by compressor. */ uint32_t lgwin = 24 /* DEFAULT_LGWIN */; /* Use file size to limit lgwin. */ - if (ctx->unpackSize >= 0) { + if (ctx->unpackSize >= 0 && ctx->unpackSize != (uint64_t)(int64_t)-1) { lgwin = BROTLI_MIN_WINDOW_BITS; while (BROTLI_MAX_BACKWARD_LIMIT(lgwin) < (uint64_t)ctx->unpackSize) { @@ -393,7 +393,7 @@ static size_t st_compress(void *arg) } BrotliEncoderSetParameter(state, BROTLI_PARAM_LGWIN, lgwin); } - if (ctx->unpackSize > 0) { + if (ctx->unpackSize > 0 && ctx->unpackSize != (uint64_t)(int64_t)-1) { uint32_t size_hint = ctx->unpackSize < (1 << 30) ? (uint32_t)ctx->unpackSize : (1u << 30); BrotliEncoderSetParameter(state, BROTLI_PARAM_SIZE_HINT, size_hint); diff --git a/CPP/7zip/Compress/BrotliEncoder.cpp b/CPP/7zip/Compress/BrotliEncoder.cpp index b44fc582..4343a848 100644 --- a/CPP/7zip/Compress/BrotliEncoder.cpp +++ b/CPP/7zip/Compress/BrotliEncoder.cpp @@ -15,7 +15,8 @@ CEncoder::CEncoder(): _numThreads(NWindows::NSystem::GetNumberOfProcessors()), _Long(-1), _WindowLog(-1), - _ctx(NULL) + _ctx(NULL), + unpackSize(0) { _props.clear(); } diff --git a/CPP/7zip/Compress/ZstdEncoder.cpp b/CPP/7zip/Compress/ZstdEncoder.cpp index fcfaaf77..dda80153 100644 --- a/CPP/7zip/Compress/ZstdEncoder.cpp +++ b/CPP/7zip/Compress/ZstdEncoder.cpp @@ -263,7 +263,7 @@ STDMETHODIMP CEncoder::Code(ISequentialInStream *inStream, if (ZSTD_isError(err)) return E_INVALIDARG; } - if (unpackSize) { + if (unpackSize && unpackSize != (UInt64)(Int64)-1) { // size is known err = ZSTD_CCtx_setParameter(_ctx, ZSTD_c_srcSizeHint, (int)(unpackSize <= INT_MAX ? unpackSize : INT_MAX)); if (ZSTD_isError(err)) return E_INVALIDARG; }