Skip to content

Commit

Permalink
Merge branch 'rc-4.3.1'
Browse files Browse the repository at this point in the history
  • Loading branch information
jasone committed Nov 8, 2016
2 parents 9bef119 + b0f5658 commit 0110fa8
Show file tree
Hide file tree
Showing 10 changed files with 390 additions and 46 deletions.
8 changes: 8 additions & 0 deletions ChangeLog
Expand Up @@ -4,6 +4,14 @@ brevity. Much more detail can be found in the git revision history:

https://github.com/jemalloc/jemalloc

* 4.3.1 (November 7, 2016)

Bug fixes:
- Fix a severe virtual memory leak. This regression was first released in
4.3.0. (@interwq, @jasone)
- Refactor atomic and prng APIs to restore support for 32-bit platforms that
use pre-C11 toolchains, e.g. FreeBSD's mips. (@jasone)

* 4.3.0 (November 4, 2016)

This is the first release that passes the test suite for multiple Windows
Expand Down
2 changes: 1 addition & 1 deletion include/jemalloc/internal/arena.h
Expand Up @@ -370,7 +370,7 @@ struct arena_s {
* PRNG state for cache index randomization of large allocation base
* pointers.
*/
uint64_t offset_state;
size_t offset_state;

dss_prec_t dss_prec;

Expand Down
18 changes: 10 additions & 8 deletions include/jemalloc/internal/atomic.h
Expand Up @@ -66,7 +66,8 @@ void atomic_write_u(unsigned *p, unsigned x);
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_))
/******************************************************************************/
/* 64-bit operations. */
#if (defined(__amd64__) || defined(__x86_64__))
#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
# if (defined(__amd64__) || defined(__x86_64__))
JEMALLOC_INLINE uint64_t
atomic_add_uint64(uint64_t *p, uint64_t x)
{
Expand Down Expand Up @@ -124,7 +125,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
: "memory" /* Clobbers. */
);
}
#elif (defined(JEMALLOC_C11ATOMICS))
# elif (defined(JEMALLOC_C11ATOMICS))
JEMALLOC_INLINE uint64_t
atomic_add_uint64(uint64_t *p, uint64_t x)
{
Expand Down Expand Up @@ -152,7 +153,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
atomic_store(a, x);
}
#elif (defined(JEMALLOC_ATOMIC9))
# elif (defined(JEMALLOC_ATOMIC9))
JEMALLOC_INLINE uint64_t
atomic_add_uint64(uint64_t *p, uint64_t x)
{
Expand Down Expand Up @@ -192,7 +193,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)

atomic_store_rel_long(p, x);
}
#elif (defined(JEMALLOC_OSATOMIC))
# elif (defined(JEMALLOC_OSATOMIC))
JEMALLOC_INLINE uint64_t
atomic_add_uint64(uint64_t *p, uint64_t x)
{
Expand Down Expand Up @@ -224,7 +225,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
o = atomic_read_uint64(p);
} while (atomic_cas_uint64(p, o, x));
}
#elif (defined(_MSC_VER))
# elif (defined(_MSC_VER))
JEMALLOC_INLINE uint64_t
atomic_add_uint64(uint64_t *p, uint64_t x)
{
Expand Down Expand Up @@ -254,7 +255,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)

InterlockedExchange64(p, x);
}
#elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || \
# elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || \
defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8))
JEMALLOC_INLINE uint64_t
atomic_add_uint64(uint64_t *p, uint64_t x)
Expand Down Expand Up @@ -283,8 +284,9 @@ atomic_write_uint64(uint64_t *p, uint64_t x)

__sync_lock_test_and_set(p, x);
}
#else
# error "Missing implementation for 64-bit atomic operations"
# else
# error "Missing implementation for 64-bit atomic operations"
# endif
#endif

/******************************************************************************/
Expand Down
11 changes: 9 additions & 2 deletions include/jemalloc/internal/private_symbols.txt
Expand Up @@ -405,8 +405,15 @@ pind2sz_tab
pow2_ceil_u32
pow2_ceil_u64
pow2_ceil_zu
prng_lg_range
prng_range
prng_lg_range_u32
prng_lg_range_u64
prng_lg_range_zu
prng_range_u32
prng_range_u64
prng_range_zu
prng_state_next_u32
prng_state_next_u64
prng_state_next_zu
prof_active
prof_active_get
prof_active_get_unlocked
Expand Down
150 changes: 139 additions & 11 deletions include/jemalloc/internal/prng.h
Expand Up @@ -19,8 +19,12 @@
* the next has a cycle of 4, etc. For this reason, we prefer to use the upper
* bits.
*/
#define PRNG_A UINT64_C(6364136223846793005)
#define PRNG_C UINT64_C(1442695040888963407)

#define PRNG_A_32 UINT32_C(1103515241)
#define PRNG_C_32 UINT32_C(12347)

#define PRNG_A_64 UINT64_C(6364136223846793005)
#define PRNG_C_64 UINT64_C(1442695040888963407)

#endif /* JEMALLOC_H_TYPES */
/******************************************************************************/
Expand All @@ -35,28 +39,133 @@
#ifdef JEMALLOC_H_INLINES

#ifndef JEMALLOC_ENABLE_INLINE
uint64_t prng_lg_range(uint64_t *state, unsigned lg_range);
uint64_t prng_range(uint64_t *state, uint64_t range);
uint32_t prng_state_next_u32(uint32_t state);
uint64_t prng_state_next_u64(uint64_t state);
size_t prng_state_next_zu(size_t state);

uint32_t prng_lg_range_u32(uint32_t *state, unsigned lg_range,
bool atomic);
uint64_t prng_lg_range_u64(uint64_t *state, unsigned lg_range);
size_t prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic);

uint32_t prng_range_u32(uint32_t *state, uint32_t range, bool atomic);
uint64_t prng_range_u64(uint64_t *state, uint64_t range);
size_t prng_range_zu(size_t *state, size_t range, bool atomic);
#endif

#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PRNG_C_))
JEMALLOC_ALWAYS_INLINE uint32_t
prng_state_next_u32(uint32_t state)
{

return ((state * PRNG_A_32) + PRNG_C_32);
}

JEMALLOC_ALWAYS_INLINE uint64_t
prng_lg_range(uint64_t *state, unsigned lg_range)
prng_state_next_u64(uint64_t state)
{
uint64_t ret;

return ((state * PRNG_A_64) + PRNG_C_64);
}

JEMALLOC_ALWAYS_INLINE size_t
prng_state_next_zu(size_t state)
{

#if LG_SIZEOF_PTR == 2
return ((state * PRNG_A_32) + PRNG_C_32);
#elif LG_SIZEOF_PTR == 3
return ((state * PRNG_A_64) + PRNG_C_64);
#else
#error Unsupported pointer size
#endif
}

JEMALLOC_ALWAYS_INLINE uint32_t
prng_lg_range_u32(uint32_t *state, unsigned lg_range, bool atomic)
{
uint32_t ret, state1;

assert(lg_range > 0);
assert(lg_range <= 32);

if (atomic) {
uint32_t state0;

do {
state0 = atomic_read_uint32(state);
state1 = prng_state_next_u32(state0);
} while (atomic_cas_uint32(state, state0, state1));
} else {
state1 = prng_state_next_u32(*state);
*state = state1;
}
ret = state1 >> (32 - lg_range);

return (ret);
}

/* 64-bit atomic operations cannot be supported on all relevant platforms. */
JEMALLOC_ALWAYS_INLINE uint64_t
prng_lg_range_u64(uint64_t *state, unsigned lg_range)
{
uint64_t ret, state1;

assert(lg_range > 0);
assert(lg_range <= 64);

ret = (*state * PRNG_A) + PRNG_C;
*state = ret;
ret >>= (64 - lg_range);
state1 = prng_state_next_u64(*state);
*state = state1;
ret = state1 >> (64 - lg_range);

return (ret);
}

JEMALLOC_ALWAYS_INLINE size_t
prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic)
{
size_t ret, state1;

assert(lg_range > 0);
assert(lg_range <= ZU(1) << (3 + LG_SIZEOF_PTR));

if (atomic) {
size_t state0;

do {
state0 = atomic_read_z(state);
state1 = prng_state_next_zu(state0);
} while (atomic_cas_z(state, state0, state1));
} else {
state1 = prng_state_next_zu(*state);
*state = state1;
}
ret = state1 >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - lg_range);

return (ret);
}

JEMALLOC_ALWAYS_INLINE uint32_t
prng_range_u32(uint32_t *state, uint32_t range, bool atomic)
{
uint32_t ret;
unsigned lg_range;

assert(range > 1);

/* Compute the ceiling of lg(range). */
lg_range = ffs_u32(pow2_ceil_u32(range)) - 1;

/* Generate a result in [0..range) via repeated trial. */
do {
ret = prng_lg_range_u32(state, lg_range, atomic);
} while (ret >= range);

return (ret);
}

JEMALLOC_ALWAYS_INLINE uint64_t
prng_range(uint64_t *state, uint64_t range)
prng_range_u64(uint64_t *state, uint64_t range)
{
uint64_t ret;
unsigned lg_range;
Expand All @@ -68,7 +177,26 @@ prng_range(uint64_t *state, uint64_t range)

/* Generate a result in [0..range) via repeated trial. */
do {
ret = prng_lg_range(state, lg_range);
ret = prng_lg_range_u64(state, lg_range);
} while (ret >= range);

return (ret);
}

JEMALLOC_ALWAYS_INLINE size_t
prng_range_zu(size_t *state, size_t range, bool atomic)
{
size_t ret;
unsigned lg_range;

assert(range > 1);

/* Compute the ceiling of lg(range). */
lg_range = ffs_u64(pow2_ceil_u64(range)) - 1;

/* Generate a result in [0..range) via repeated trial. */
do {
ret = prng_lg_range_zu(state, lg_range, atomic);
} while (ret >= range);

return (ret);
Expand Down
19 changes: 11 additions & 8 deletions src/arena.c
Expand Up @@ -150,6 +150,8 @@ arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
arena_miscelm_get_const(chunk, pageind))));
assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
LG_PAGE));
assert((npages << LG_PAGE) < chunksize);
assert(pind2sz(pind) <= chunksize);
arena_run_heap_insert(&arena->runs_avail[pind],
arena_miscelm_get_mutable(chunk, pageind));
}
Expand All @@ -162,6 +164,8 @@ arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
arena_miscelm_get_const(chunk, pageind))));
assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
LG_PAGE));
assert((npages << LG_PAGE) < chunksize);
assert(pind2sz(pind) <= chunksize);
arena_run_heap_remove(&arena->runs_avail[pind],
arena_miscelm_get_mutable(chunk, pageind));
}
Expand Down Expand Up @@ -1046,7 +1050,7 @@ arena_run_first_best_fit(arena_t *arena, size_t size)

pind = psz2ind(run_quantize_ceil(size));

for (i = pind; pind2sz(i) <= large_maxclass; i++) {
for (i = pind; pind2sz(i) <= chunksize; i++) {
arena_chunk_map_misc_t *miscelm = arena_run_heap_first(
&arena->runs_avail[i]);
if (miscelm != NULL)
Expand Down Expand Up @@ -1195,7 +1199,7 @@ arena_decay_deadline_init(arena_t *arena)
if (arena->decay.time > 0) {
nstime_t jitter;

nstime_init(&jitter, prng_range(&arena->decay.jitter_state,
nstime_init(&jitter, prng_range_u64(&arena->decay.jitter_state,
nstime_ns(&arena->decay.interval)));
nstime_add(&arena->decay.deadline, &jitter);
}
Expand Down Expand Up @@ -1922,8 +1926,7 @@ arena_reset(tsd_t *tsd, arena_t *arena)
assert(!arena->purging);
arena->nactive = 0;

for (i = 0; i < sizeof(arena->runs_avail) / sizeof(arena_run_heap_t);
i++)
for (i = 0; i < NPSIZES; i++)
arena_run_heap_new(&arena->runs_avail[i]);

malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock);
Expand Down Expand Up @@ -2562,7 +2565,8 @@ arena_malloc_large(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero)
* that is a multiple of the cacheline size, e.g. [0 .. 63) * 64
* for 4 KiB pages and 64-byte cachelines.
*/
r = prng_lg_range(&arena->offset_state, LG_PAGE - LG_CACHELINE);
r = prng_lg_range_zu(&arena->offset_state, LG_PAGE -
LG_CACHELINE, false);
random_offset = ((uintptr_t)r) << LG_CACHELINE;
} else
random_offset = 0;
Expand Down Expand Up @@ -3500,7 +3504,7 @@ arena_new(tsdn_t *tsdn, unsigned ind)
* deterministic seed.
*/
arena->offset_state = config_debug ? ind :
(uint64_t)(uintptr_t)arena;
(size_t)(uintptr_t)arena;
}

arena->dss_prec = chunk_dss_prec_get();
Expand All @@ -3514,8 +3518,7 @@ arena_new(tsdn_t *tsdn, unsigned ind)
arena->nactive = 0;
arena->ndirty = 0;

for (i = 0; i < sizeof(arena->runs_avail) / sizeof(arena_run_heap_t);
i++)
for (i = 0; i < NPSIZES; i++)
arena_run_heap_new(&arena->runs_avail[i]);

qr_new(&arena->runs_dirty, rd_link);
Expand Down
5 changes: 3 additions & 2 deletions src/ckh.c
Expand Up @@ -99,7 +99,8 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
* Cycle through the cells in the bucket, starting at a random position.
* The randomness avoids worst-case search overhead as buckets fill up.
*/
offset = (unsigned)prng_lg_range(&ckh->prng_state, LG_CKH_BUCKET_CELLS);
offset = (unsigned)prng_lg_range_u64(&ckh->prng_state,
LG_CKH_BUCKET_CELLS);
for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) +
((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))];
Expand Down Expand Up @@ -141,7 +142,7 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
* were an item for which both hashes indicated the same
* bucket.
*/
i = (unsigned)prng_lg_range(&ckh->prng_state,
i = (unsigned)prng_lg_range_u64(&ckh->prng_state,
LG_CKH_BUCKET_CELLS);
cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
assert(cell->key != NULL);
Expand Down
2 changes: 1 addition & 1 deletion src/prof.c
Expand Up @@ -874,7 +874,7 @@ prof_sample_threshold_update(prof_tdata_t *tdata)
* pp 500
* (http://luc.devroye.org/rnbookindex.html)
*/
r = prng_lg_range(&tdata->prng_state, 53);
r = prng_lg_range_u64(&tdata->prng_state, 53);
u = (double)r * (1.0/9007199254740992.0L);
tdata->bytes_until_sample = (uint64_t)(log(u) /
log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
Expand Down

0 comments on commit 0110fa8

Please sign in to comment.