Skip to content

Commit

Permalink
Prefetch the backreference hashtable bucket.
Browse files Browse the repository at this point in the history
Place the prefetch before the last distance checks, to give the prefetch enough time to work.

PiperOrigin-RevId: 626228820
  • Loading branch information
Brotli authored and Copybara-Service committed Apr 19, 2024
1 parent 443af10 commit 1b3a5cc
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 7 deletions.
15 changes: 15 additions & 0 deletions c/common/platform.h
Expand Up @@ -519,6 +519,21 @@ BROTLI_UNUSED_FUNCTION void BrotliSuppressUnusedFunctions(void) {
#if BROTLI_ENABLE_DUMP
BROTLI_UNUSED(&BrotliDump);
#endif

#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) && !defined(_M_ARM64EC) /* _mm_prefetch() is not defined outside of x86/x64 */
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
#elif BROTLI_GNUC_HAS_BUILTIN(__builtin_prefetch, 3, 1, 0)
# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
#elif defined(__aarch64__)
# define PREFETCH_L1(ptr) do { __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))); } while (0)
# define PREFETCH_L2(ptr) do { __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))); } while (0)
#else
# define PREFETCH_L1(ptr) do { (void)(ptr); } while (0) /* disabled */
# define PREFETCH_L2(ptr) do { (void)(ptr); } while (0) /* disabled */
#endif
}

#endif /* BROTLI_COMMON_PLATFORM_H_ */
7 changes: 5 additions & 2 deletions c/enc/hash_longest_match64_inc.h
Expand Up @@ -170,6 +170,11 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
score_t best_score = out->score;
size_t best_len = out->len;
size_t i;
/* Precalculate the hash key and prefetch the bucket. */
const size_t key = FN(HashBytes)(&data[cur_ix_masked], self->hash_mul_);
uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
PREFETCH_L1(bucket);
if (self->block_bits_ > 4) PREFETCH_L1(bucket + 16);
out->len = 0;
out->len_code_delta = 0;

Expand Down Expand Up @@ -220,8 +225,6 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
best_len = 3;
}
{
const size_t key = FN(HashBytes)(&data[cur_ix_masked], self->hash_mul_);
uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
const size_t down =
(num[key] > self->block_size_) ?
(num[key] - self->block_size_) : 0u;
Expand Down
13 changes: 8 additions & 5 deletions c/enc/hash_longest_match_inc.h
Expand Up @@ -169,11 +169,17 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
score_t best_score = out->score;
size_t best_len = out->len;
size_t i;
/* Precalculate the hash key and prefetch the bucket. */
const uint32_t key =
FN(HashBytes)(&data[cur_ix_masked], self->hash_shift_);
uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
PREFETCH_L1(bucket);
if (self->block_bits_ > 4) PREFETCH_L1(bucket + 16);
out->len = 0;
out->len_code_delta = 0;

BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask);

out->len = 0;
out->len_code_delta = 0;
/* Try last distance first. */
for (i = 0; i < (size_t)self->num_last_distances_to_check_; ++i) {
const size_t backward = (size_t)distance_cache[i];
Expand Down Expand Up @@ -219,9 +225,6 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
best_len = 3;
}
{
const uint32_t key =
FN(HashBytes)(&data[cur_ix_masked], self->hash_shift_);
uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
const size_t down =
(num[key] > self->block_size_) ? (num[key] - self->block_size_) : 0u;
for (i = num[key]; i > down;) {
Expand Down

0 comments on commit 1b3a5cc

Please sign in to comment.