[CRASH] Redis 7.2.3 crashed in slotToKeyReplaceEntry #13205

SarthakSahu · 2024-04-12T09:41:10Z

=== REDIS BUG REPORT START: Cut & paste starting from here ===
9:M 12 Apr 2024 03:02:55.999 # Redis 7.2.3 crashed by signal: 11, si_code: 1
9:M 12 Apr 2024 03:02:55.999 # Accessing address: 0x50
9:M 12 Apr 2024 03:02:55.999 # Crashed running the instruction at: 0x4d0aa5

------ STACK TRACE ------

EIP:
/usr/bin/redis-server *:6379 [cluster](slotToKeyReplaceEntry+0xa5)[0x4d0aa5]

Backtrace:
/lib64/libpthread.so.0(+0x12cf0)[0x7f147eec8cf0]
/usr/bin/redis-server *:6379 [cluster](slotToKeyReplaceEntry+0xa5)[0x4d0aa5]
/usr/bin/redis-server *:6379 [cluster][0x563ed8]
/usr/bin/redis-server *:6379 [cluster][0x5643bf]
/usr/bin/redis-server *:6379 [cluster](activeDefragCycle+0x357)[0x48b0f7]
/usr/bin/redis-server *:6379 [cluster](databasesCron+0x6c)[0x5665ac]
/usr/bin/redis-server *:6379 [cluster](serverCron+0x64a)[0x568e9a]
/usr/bin/redis-server *:6379 [cluster][0x56214d]
/usr/bin/redis-server *:6379 [cluster](aeMain+0x1d8)[0x563918]
/usr/bin/redis-server *:6379 [cluster](main+0x39a)[0x450d4a]
/lib64/libc.so.6(__libc_start_main+0xe5)[0x7f147eb2bd85]
/usr/bin/redis-server *:6379 [cluster](_start+0x2e)[0x45147e]


------ REGISTERS ------
9:M 12 Apr 2024 03:02:56.000 # 
RAX:0000000000011020 RBX:00007f1471d1a4b8
RCX:0000000000000046 RDX:0000000000000000
RDI:00007f1442882912 RSI:000000000000003f
RBP:00007f147e45be40 RSP:00007ffcbf966940
R8 :000000000000003f R9 :00000003a040b000
R10:0000000000000028 R11:0000000000000000
R12:0000000000000000 R13:00007f147e45be40
R14:0000000000000000 R15:00007f12c2288000
RIP:00000000004d0aa5 EFL:0000000000010202
CSGSFS:002b000000000033
9:M 12 Apr 2024 03:02:56.000 # (00007ffcbf96694f) -> 00007ffcbf966a30
9:M 12 Apr 2024 03:02:56.000 # (00007ffcbf96694e) -> 0000000000000014
9:M 12 Apr 2024 03:02:56.000 # (00007ffcbf96694d) -> 00000000005643bf
9:M 12 Apr 2024 03:02:56.000 # (00007ffcbf96694c) -> 000000996a97b751
9:M 12 Apr 2024 03:02:56.000 # (00007ffcbf96694b) -> 0000000003888000
9:M 12 Apr 2024 03:02:56.000 # (00007ffcbf96694a) -> 00007f147e45be40
9:M 12 Apr 2024 03:02:56.000 # (00007ffcbf966949) -> 00007f147e42f300
9:M 12 Apr 2024 03:02:56.000 # (00007ffcbf966948) -> 000000000048a4a0
9:M 12 Apr 2024 03:02:56.000 # (00007ffcbf966947) -> fffffffffff11000
9:M 12 Apr 2024 03:02:56.000 # (00007ffcbf966946) -> 00000000002054e9
9:M 12 Apr 2024 03:02:56.000 # (00007ffcbf966945) -> 0000000000000000
9:M 12 Apr 2024 03:02:56.000 # (00007ffcbf966944) -> 0000000000480930
9:M 12 Apr 2024 03:02:56.000 # (00007ffcbf966943) -> 0000000000563ed8
9:M 12 Apr 2024 03:02:56.000 # (00007ffcbf966942) -> 0000000000000000
9:M 12 Apr 2024 03:02:56.000 # (00007ffcbf966941) -> 00007f1471d1a4b8
9:M 12 Apr 2024 03:02:56.000 # (00007ffcbf966940) -> 00007f13df2560c8

------ INFO OUTPUT ------
# Server
redis_version:7.2.3
redis_git_sha1:7d22beed
redis_git_dirty:0
redis_build_id:7a29a0d060f6d902
redis_mode:cluster
os:Linux 4.18.0-372.75.1.el8_6.x86_64 x86_64
arch_bits:64
monotonic_clock:POSIX clock_gettime
multiplexing_api:epoll
atomicvar_api:c11-builtin
gcc_version:8.5.0
process_id:9
process_supervised:no
run_id:a704b9d1dd51118a824d041f84637ef782f22600
tcp_port:6379
server_time_usec:1712890975998334
uptime_in_seconds:87634
uptime_in_days:1
hz:10
configured_hz:10
lru_clock:1614943
executable:/usr/bin/redis-server
config_file:/redisdb/conf/server.conf
io_threads_active:0
listener0:name=tcp

# Clients
connected_clients:42
cluster_connections:18
maxclients:10000
client_recent_max_input_buffer:24576
client_recent_max_output_buffer:20504
blocked_clients:0
tracking_clients:0
clients_in_timeout_table:0
total_blocking_keys:0
total_blocking_keys_on_nokey:0

# Memory
used_memory:4741536552
used_memory_human:4.42G
used_memory_rss:5345316864
used_memory_rss_human:4.98G
used_memory_peak:5270248928
used_memory_peak_human:4.91G
used_memory_peak_perc:89.97%
used_memory_overhead:771394852
used_memory_startup:1631608
used_memory_dataset:3970141700
used_memory_dataset_perc:83.76%
allocator_allocated:4741865984
allocator_active:5218967552
allocator_resident:5318950912
total_system_memory:540415094784
total_system_memory_human:503.30G
used_memory_lua:50176
used_memory_vm_eval:50176
used_memory_lua_human:49.00K
used_memory_scripts_eval:440
number_of_cached_scripts:1
number_of_functions:0
number_of_libraries:0
used_memory_vm_functions:32768
used_memory_vm_total:82944
used_memory_vm_total_human:81.00K
used_memory_functions:184
used_memory_scripts:624
used_memory_scripts_human:624B
maxmemory:6442450944
maxmemory_human:6.00G
maxmemory_policy:volatile-lru
allocator_frag_ratio:1.10
allocator_frag_bytes:477101568
allocator_rss_ratio:1.02
allocator_rss_bytes:99983360
rss_overhead_ratio:1.00
rss_overhead_bytes:26365952
mem_fragmentation_ratio:1.13
mem_fragmentation_bytes:603782216
mem_not_counted_for_evict:0
mem_replication_backlog:268438812
mem_total_replication_buffers:269299536
mem_clients_slaves:864080
mem_clients_normal:942856
mem_cluster_links:19296
mem_aof_buffer:0
mem_allocator:jemalloc-5.3.0
active_defrag_running:1
lazyfree_pending_objects:0
lazyfreed_objects:71

# Persistence
loading:0
async_loading:0
current_cow_peak:0
current_cow_size:0
current_cow_size_age:0
current_fork_perc:0.00
current_save_keys_processed:0
current_save_keys_total:0
rdb_changes_since_last_save:536665877
rdb_bgsave_in_progress:0
rdb_last_save_time:1712803341
rdb_last_bgsave_status:ok
rdb_last_bgsave_time_sec:0
rdb_current_bgsave_time_sec:-1
rdb_saves:0
rdb_last_cow_size:1810432
rdb_last_load_keys_expired:0
rdb_last_load_keys_loaded:0
aof_enabled:0
aof_rewrite_in_progress:0
aof_rewrite_scheduled:0
aof_last_rewrite_time_sec:-1
aof_current_rewrite_time_sec:-1
aof_last_bgrewrite_status:ok
aof_rewrites:0
aof_rewrites_consecutive_failures:0
aof_last_write_status:ok
aof_last_cow_size:0
module_fork_in_progress:0
module_fork_last_cow_size:0

# Stats
total_connections_received:8434
total_commands_processed:896198239
instantaneous_ops_per_sec:9639
total_net_input_bytes:658918324049
total_net_output_bytes:632580610226
total_net_repl_input_bytes:0
total_net_repl_output_bytes:626872252017
instantaneous_input_kbps:1535.98
instantaneous_output_kbps:998.97
instantaneous_input_repl_kbps:0.00
instantaneous_output_repl_kbps:948.41
rejected_connections:0
sync_full:1
sync_partial_ok:0
sync_partial_err:1
expired_keys:0
expired_stale_perc:0.00
expired_time_cap_reached_count:0
expire_cycle_cpu_milliseconds:6183
evicted_keys:0
evicted_clients:0
total_eviction_exceeded_time:0
current_eviction_exceeded_time:0
keyspace_hits:22665571
keyspace_misses:78077474
pubsub_channels:0
pubsub_patterns:0
pubsubshard_channels:0
latest_fork_usec:801
total_forks:1
migrate_cached_sockets:0
slave_expires_tracked_keys:0
active_defrag_hits:2118892
active_defrag_misses:4787736
active_defrag_key_hits:1019014
active_defrag_key_misses:731397
total_active_defrag_time:3369118
current_active_defrag_time:203
tracking_total_keys:0
tracking_total_items:0
tracking_total_prefixes:0
unexpected_error_replies:0
total_error_replies:62942
dump_payload_sanitizations:0
total_reads_processed:429043632
total_writes_processed:770916759
io_threaded_reads_processed:0
io_threaded_writes_processed:0
reply_buffer_shrinks:16892
reply_buffer_expands:22661
eventloop_cycles:404285111
eventloop_duration_sum:16676420980
eventloop_duration_cmd_sum:2892413108
instantaneous_eventloop_cycles_per_sec:2959
instantaneous_eventloop_duration_usec:34
acl_access_denied_auth:0
acl_access_denied_cmd:0
acl_access_denied_key:0
acl_access_denied_channel:0

# Replication
role:master
connected_slaves:1
slave0:ip=10.254.119.215
master_failover_state:no-failover
master_replid:b71b3b31f53260fa7be2cf17bc05837e5f5b47eb
master_replid2:0000000000000000000000000000000000000000
master_repl_offset:626876775351
second_repl_offset:-1
repl_backlog_active:1
repl_backlog_size:268435456
repl_backlog_first_byte_offset:626608333401
repl_backlog_histlen:268441951

# CPU
used_cpu_sys:8391.303958
used_cpu_user:5192.758853
used_cpu_sys_children:0.200895
used_cpu_user_children:0.519965
used_cpu_sys_main_thread:8385.406512
used_cpu_user_main_thread:5190.564016

# Modules
module:name=auditlog

# Commandstats
cmdstat_cluster|nodes:calls=10
cmdstat_cluster|myid:calls=8390
cmdstat_cluster|info:calls=87485
cmdstat_flushall:calls=1
cmdstat_replconf:calls=87270
cmdstat_acl|log:calls=873172
cmdstat_psync:calls=1
cmdstat_auth:calls=8434
cmdstat_get:calls=100743045
cmdstat_del:calls=345446900
cmdstat_info:calls=288366
cmdstat_eval:calls=49576278
cmdstat_ping:calls=17520
cmdstat_client|setname:calls=1
cmdstat_client|setinfo:calls=16784
cmdstat_set:calls=399044582

# Errorstats
errorstat_CLUSTERDOWN:count=62941
errorstat_NOAUTH:count=1

# Latencystats
latency_percentiles_usec_cluster|nodes:p50=85.503
latency_percentiles_usec_cluster|myid:p50=1.003
latency_percentiles_usec_cluster|info:p50=44.031
latency_percentiles_usec_flushall:p50=129.023
latency_percentiles_usec_replconf:p50=1.003
latency_percentiles_usec_acl|log:p50=2.007
latency_percentiles_usec_psync:p50=154.623
latency_percentiles_usec_auth:p50=5.023
latency_percentiles_usec_get:p50=1.003
latency_percentiles_usec_del:p50=2.007
latency_percentiles_usec_info:p50=66.047
latency_percentiles_usec_eval:p50=17.023
latency_percentiles_usec_ping:p50=1.003
latency_percentiles_usec_client|setname:p50=1.003
latency_percentiles_usec_client|setinfo:p50=0.001
latency_percentiles_usec_set:p50=3.007

# Cluster
cluster_enabled:1

# Keyspace
db0:keys=4565998

# Cluster info
cluster_state:ok
cluster_slots_assigned:16384
cluster_slots_ok:16384
cluster_slots_pfail:0
cluster_slots_fail:0
cluster_known_nodes:10
cluster_size:5
cluster_current_epoch:15
cluster_my_epoch:12
cluster_stats_messages_ping_sent:260374
cluster_stats_messages_pong_sent:262119
cluster_stats_messages_sent:522493
cluster_stats_messages_ping_received:262119
cluster_stats_messages_pong_received:260371
cluster_stats_messages_received:522490
total_cluster_links_buffer_limit_exceeded:0

------ CLUSTER NODES OUTPUT ------
924b5315fffce5fef2eb142aaee9bf2a37247a49 10.254.81.202:6379@16379
bf5aed5fda50a30adcfa28af0a62d9324373d910 10.254.129.26:6379@16379
79154a744f909715b53750f85393316ef95600e6 10.254.62.160:6379@16379
8c562cb3b58590f80f7fc815ca641f01efc95343 10.254.234.190:6379@16379
82ec0f12c13dff2acd7a06f58a33e821a2f21058 10.254.236.131:6379@16379
46b08557bf546459dc6af6142b1e93053f6b18b8 10.254.73.152:6379@16379
83e52f45a4c028d79113d99a41ee7c4f3fa93193 10.254.93.94:6379@16379
f14048d196680240a95a60bff432f7fd768372c5 10.254.94.191:6379@16379
aba3ba132dbce6664b5ac441fb84a17d96ab2e0a 10.254.167.66:6379@16379
6dede0e20ef9888646b061dc8152a7a514b26695 10.254.119.215:6379@16379

------ CLIENT LIST OUTPUT ------
id=22 addr=172.16.18.230:35828 laddr=172.16.58.190:6379 fd=42 name= age=87633 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=24467 argv-mem=0 multi-mem=0 rbs=1024 rbp=24 obl=0 oll=0 omem=0 tot-mem=26496 events=r cmd=del user=default redir=-1 resp=2 lib-name= lib-ver=
id=34 addr=172.16.45.145:60170 laddr=172.16.58.190:6379 fd=58 name= age=87631 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=20 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=del user=default redir=-1 resp=2 lib-name= lib-ver=
id=42 addr=172.16.40.181:51794 laddr=172.16.58.190:6379 fd=66 name= age=87629 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=28 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=set user=default redir=-1 resp=2 lib-name= lib-ver=
id=35 addr=172.16.30.159:58422 laddr=172.16.58.190:6379 fd=59 name= age=87631 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=24 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=set user=default redir=-1 resp=2 lib-name= lib-ver=
id=9 addr=172.16.217.211:54430 laddr=172.16.58.190:6379 fd=22 name= age=87633 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=25 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=set user=default redir=-1 resp=2 lib-name= lib-ver=
id=10 addr=172.16.225.199:38944 laddr=172.16.58.190:6379 fd=23 name= age=87633 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=35 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=set user=default redir=-1 resp=2 lib-name= lib-ver=
id=29 addr=172.16.127.143:48266 laddr=172.16.58.190:6379 fd=51 name= age=87632 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=25 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=set user=default redir=-1 resp=2 lib-name= lib-ver=
id=11 addr=172.16.21.180:35294 laddr=172.16.58.190:6379 fd=24 name= age=87633 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=35 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=del user=default redir=-1 resp=2 lib-name= lib-ver=
id=43 addr=172.16.30.230:39066 laddr=172.16.58.190:6379 fd=67 name= age=87629 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=33 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=del user=default redir=-1 resp=2 lib-name= lib-ver=
id=37 addr=172.16.96.22:57554 laddr=172.16.58.190:6379 fd=61 name= age=87630 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=17 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=del user=default redir=-1 resp=2 lib-name= lib-ver=
id=12 addr=172.16.41.143:56576 laddr=172.16.58.190:6379 fd=25 name= age=87633 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=28 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=set user=default redir=-1 resp=2 lib-name= lib-ver=
id=13 addr=172.16.144.7:35578 laddr=172.16.58.190:6379 fd=26 name= age=87633 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=32 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=set user=default redir=-1 resp=2 lib-name= lib-ver=
id=38 addr=172.16.127.55:52790 laddr=172.16.58.190:6379 fd=62 name= age=87630 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=16 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=set user=default redir=-1 resp=2 lib-name= lib-ver=
id=23 addr=[::1]:47436 laddr=[::1]:6379 fd=43 name= age=87633 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=8192 rbp=5890 obl=0 oll=0 omem=0 tot-mem=29568 events=r cmd=info user=probe-user redir=-1 resp=2 lib-name=redis-py lib-ver=5.0.1
id=44 addr=172.16.220.31:41144 laddr=172.16.58.190:6379 fd=68 name= age=87629 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=25 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=set user=default redir=-1 resp=2 lib-name= lib-ver=
id=14 addr=172.16.31.11:38200 laddr=172.16.58.190:6379 fd=27 name= age=87633 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=28 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=set user=default redir=-1 resp=2 lib-name= lib-ver=
id=30 addr=172.16.58.159:33026 laddr=172.16.58.190:6379 fd=52 name= age=87632 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=20 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=set user=default redir=-1 resp=2 lib-name= lib-ver=
id=31 addr=172.16.208.202:56292 laddr=172.16.58.190:6379 fd=53 name= age=87632 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=30 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=del user=default redir=-1 resp=2 lib-name= lib-ver=
id=32 addr=172.16.58.18:52802 laddr=172.16.58.190:6379 fd=54 name= age=87632 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=31 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=set user=default redir=-1 resp=2 lib-name= lib-ver=
id=39 addr=172.16.96.107:57360 laddr=172.16.58.190:6379 fd=63 name= age=87630 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=20 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=set user=default redir=-1 resp=2 lib-name= lib-ver=
id=45 addr=172.16.146.218:35422 laddr=172.16.58.190:6379 fd=69 name= age=87629 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=17896 argv-mem=0 multi-mem=0 rbs=1024 rbp=25 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=del user=default redir=-1 resp=2 lib-name= lib-ver=
id=25 addr=172.16.190.148:33102 laddr=172.16.58.190:6379 fd=46 name= age=87632 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=27 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=del user=default redir=-1 resp=2 lib-name= lib-ver=
id=26 addr=172.16.224.43:41718 laddr=172.16.58.190:6379 fd=48 name= age=87632 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=20 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=del user=default redir=-1 resp=2 lib-name= lib-ver=
id=27 addr=172.16.223.206:58224 laddr=172.16.58.190:6379 fd=49 name= age=87632 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=25 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=set user=default redir=-1 resp=2 lib-name= lib-ver=
id=21 addr=172.16.233.10:47040 laddr=172.16.58.190:6379 fd=41 name= age=87633 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=25 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=del user=default redir=-1 resp=2 lib-name= lib-ver=
id=36 addr=172.16.119.211:57302 laddr=172.16.58.190:6379 fd=60 name= age=87631 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=20 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=del user=default redir=-1 resp=2 lib-name= lib-ver=
id=15 addr=172.16.109.32:54390 laddr=172.16.58.190:6379 fd=28 name= age=87633 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=30 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=del user=default redir=-1 resp=2 lib-name= lib-ver=
id=16 addr=172.16.110.35:57838 laddr=172.16.58.190:6379 fd=29 name= age=87633 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=28 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=set user=default redir=-1 resp=2 lib-name= lib-ver=
id=17 addr=172.16.46.62:49770 laddr=172.16.58.190:6379 fd=31 name= age=87633 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=27 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=del user=default redir=-1 resp=2 lib-name= lib-ver=
id=33 addr=172.16.182.227:40814 laddr=172.16.58.190:6379 fd=57 name= age=87631 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=33 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=del user=default redir=-1 resp=2 lib-name= lib-ver=
id=18 addr=172.16.168.239:50376 laddr=172.16.58.190:6379 fd=32 name= age=87633 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=25 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=get user=default redir=-1 resp=2 lib-name= lib-ver=
id=46 addr=[::1]:47452 laddr=[::1]:6379 fd=70 name=local_monitor age=87629 idle=3 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=0 argv-mem=0 multi-mem=0 rbs=8192 rbp=5890 obl=0 oll=0 omem=0 tot-mem=9096 events=r cmd=info user=probe-user redir=-1 resp=2 lib-name=redis-py lib-ver=5.0.1
id=47 addr=172.16.25.251:47928 laddr=172.16.58.190:6379 fd=71 name= age=87629 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=27 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=del user=default redir=-1 resp=2 lib-name= lib-ver=
id=40 addr=172.16.114.106:51674 laddr=172.16.58.190:6379 fd=64 name= age=87629 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=0 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=del user=default redir=-1 resp=2 lib-name= lib-ver=
id=28 addr=172.16.126.169:53956 laddr=172.16.58.190:6379 fd=50 name= age=87632 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=30 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=set user=default redir=-1 resp=2 lib-name= lib-ver=
id=19 addr=172.16.77.53:48352 laddr=172.16.58.190:6379 fd=33 name= age=87633 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=21653 argv-mem=0 multi-mem=0 rbs=1024 rbp=30 obl=0 oll=0 omem=0 tot-mem=26496 events=r cmd=del user=default redir=-1 resp=2 lib-name= lib-ver=
id=20 addr=172.16.208.154:60348 laddr=172.16.58.190:6379 fd=34 name= age=87633 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=32 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=del user=default redir=-1 resp=2 lib-name= lib-ver=
id=5 addr=172.16.137.6:33944 laddr=172.16.58.190:6379 fd=12 name= age=87633 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=25 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=set user=default redir=-1 resp=2 lib-name= lib-ver=
id=6 addr=172.16.241.138:58536 laddr=172.16.58.190:6379 fd=19 name= age=87633 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=25 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=set user=default redir=-1 resp=2 lib-name= lib-ver=
id=41 addr=172.16.95.192:51902 laddr=172.16.58.190:6379 fd=65 name= age=87629 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=0 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=set user=default redir=-1 resp=2 lib-name= lib-ver=
id=24 addr=172.16.109.5:52888 laddr=172.16.58.190:6379 fd=45 name= age=87633 idle=0 flags=S db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=0 obl=0 oll=1 omem=20504 tot-mem=42904 events=r cmd=replconf user=repl-user redir=-1 resp=2 lib-name= lib-ver=
id=7 addr=172.16.142.207:33080 laddr=172.16.58.190:6379 fd=20 name= age=87633 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=30 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=del user=default redir=-1 resp=2 lib-name= lib-ver=
id=8 addr=172.16.198.104:45568 laddr=172.16.58.190:6379 fd=21 name= age=87633 idle=0 flags=N db=0 sub=0 psub=0 ssub=0 multi=-1 qbuf=0 qbuf-free=20474 argv-mem=0 multi-mem=0 rbs=1024 rbp=26 obl=0 oll=0 omem=0 tot-mem=22400 events=r cmd=del user=default redir=-1 resp=2 lib-name= lib-ver=

------ MODULES INFO OUTPUT ------

------ CONFIG DEBUG OUTPUT ------
activedefrag yes
lazyfree-lazy-user-flush no
repl-diskless-sync yes
sanitize-dump-payload no
lazyfree-lazy-server-del no
proto-max-bulk-len 512mb
slave-read-only yes
lazyfree-lazy-eviction no
client-query-buffer-limit 1gb
replica-read-only yes
lazyfree-lazy-user-del no
io-threads-do-reads no
lazyfree-lazy-expire no
list-compress-depth 0
repl-diskless-load disabled
io-threads 1

------ FAST MEMORY TEST ------
9:M 12 Apr 2024 03:02:56.003 # Bio worker thread #0 terminated
9:M 12 Apr 2024 03:02:56.003 # Bio worker thread #1 terminated
9:M 12 Apr 2024 03:02:56.004 # Bio worker thread #2 terminated
*** Preparing to test memory region 8eb000 (2273280 bytes)
*** Preparing to test memory region 184c000 (2740224 bytes)
*** Preparing to test memory region 7f12bea00000 (6733955072 bytes)
*** Preparing to test memory region 7f14500dd000 (604504064 bytes)
*** Preparing to test memory region 7f147415e000 (8388608 bytes)
*** Preparing to test memory region 7f147495f000 (8388608 bytes)
*** Preparing to test memory region 7f1475160000 (8388608 bytes)
*** Preparing to test memory region 7f1475961000 (8388608 bytes)
*** Preparing to test memory region 7f147694c000 (4096 bytes)
*** Preparing to test memory region 7f14789b8000 (4096 bytes)
*** Preparing to test memory region 7f1478e80000 (4096 bytes)
*** Preparing to test memory region 7f147986c000 (4096 bytes)
*** Preparing to test memory region 7f1479f86000 (8192 bytes)
*** Preparing to test memory region 7f147a532000 (12288 bytes)
*** Preparing to test memory region 7f147b2a0000 (16384 bytes)
*** Preparing to test memory region 7f147b8d2000 (1048576 bytes)
*** Preparing to test memory region 7f147c41f000 (1048576 bytes)
*** Preparing to test memory region 7f147c93e000 (1048576 bytes)
*** Preparing to test memory region 7f147d084000 (1048576 bytes)
*** Preparing to test memory region 7f147db2d000 (270336 bytes)
*** Preparing to test memory region 7f147dd80000 (11010048 bytes)
*** Preparing to test memory region 7f147eaf0000 (4096 bytes)
*** Preparing to test memory region 7f147eeb2000 (16384 bytes)
*** Preparing to test memory region 7f147f0d2000 (16384 bytes)
*** Preparing to test memory region 7f147f5bc000 (20480 bytes)
*** Preparing to test memory region 7f147f854000 (4096 bytes)
*** Preparing to test memory region 7f148001b000 (1675264 bytes)
*** Preparing to test memory region 7f1480207000 (24576 bytes)
*** Preparing to test memory region 7f148020f000 (8192 bytes)
{
{
.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O
Fast memory test PASSED

------ DUMPING CODE AROUND EIP ------
Symbol: slotToKeyReplaceEntry (base: 0x4d0a00)
Module: /usr/bin/redis-server *:6379 [cluster] (base 0x400000)
$ xxd -r -p /tmp/dump.hex /tmp/dump.bin
$ objdump --adjust-vma=0x4d0a00 -D -b binary -m i386:x86-64 /tmp/dump.bin
------
9:M 12 Apr 2024 03:03:19.535 # dump of function (hexdump of 293 bytes):
55534883ec0840f6c6077564488b56204889fd4889f3488b46184885d27409f6c207754c488972184885c07413a807753f488958204883c4085b5dc30f1f40004889dfe8380009000fb670ff89f283e20780fa040f879b0000000fb6d2ff24d56807630066662e0f1f84000000000090ba19030000be37b76400bf10df6000e85c27010066662e0f1f840000000000908b70f74889c7e835e30000488b553889c048c1e00448034250488958084883c4085b5dc366662e0f1f840000000000900fb770fbebcd662e0f1f8400000000000fb670fdebbd662e0f1f84000000000040c0ee03400fb6f6eba9660f1f4400008b70efeb9e31f6eb9a0f1f8000000000554889f5534889fb4883ec08e86fff08000fb670ff4889c789f283e20780fa040f87e20000
Function at 0x560a80 is dictGetKey
Function at 0x4e31e0 is _serverAssert
Function at 0x4dedd0 is keyHashSlot

=== REDIS BUG REPORT END. Make sure to include from START to END. ===

The text was updated successfully, but these errors were encountered:

sundb · 2024-04-15T01:58:49Z

crash in &(*db->slots_to_keys).by_slot.

assemble

   0x0000000000144b13 <+147>:   call   0x134c60 <keyHashSlot>
   0x0000000000144b18 <+152>:   mov    0x38(%rbp),%rdx
   0x0000000000144b1c <+156>:   mov    %eax,%eax
   0x0000000000144b1e <+158>:   shl    $0x4,%rax
   0x0000000000144b22 <+162>:   add    0x50(%rdx),%rax          <- crash here

        unsigned int hashslot = keyHashSlot(key, sdslen(key));
        clusterDictMetadata *dictmeta = dictMetadata(d);
        redisDb *db = dictmeta->db;
        slotToKeys *slot_to_keys = &(*db->slots_to_keys).by_slot[hashslot];

SarthakSahu · 2024-04-15T02:23:31Z

Thank you @sundb.
But why does it crashed. It is a bug here ?

sundb · 2024-04-15T02:25:53Z

@SarthakSahu yes, but i don't know why yet, it still need some time.

SarthakSahu · 2024-04-16T05:57:20Z

Hi @sundb

Any break through here? Our cluster has been crashed again with same stacks trace.

sundb · 2024-04-16T06:18:30Z

@SarthakSahu sorry, since i haven't had enought time to dig into it right now, could you provide more information so I can pinpoint it quickly? Or can you reproduce it quickly?

stevelipinski · 2024-04-17T13:13:33Z

Same as #12677?

sundb · 2024-04-17T13:21:36Z

@stevelipinski Yes, they are same.
did you enable activedefrag?

stevelipinski · 2024-04-17T13:40:52Z

Yes:

------ CONFIG DEBUG OUTPUT ------
activedefrag yes

I believe one minor difference is that in this case, activedefrag is set from the beginning, whereas in #12677, they mention setting it on a replica.

sundb · 2024-04-17T13:44:42Z

@stevelipinski thanks a lot, this is usefull, and I will take the time to figue out why as soon as possible.

SarthakSahu · 2024-04-23T06:10:29Z

Hi @sundb, Do we have any intermediate update to share.

sundb · 2024-04-23T06:24:03Z

@SarthakSahu sorry, I've tried and failed to reproduce it, can you give me more clues about your system, special configurations, etc.

kkharbas · 2024-04-24T22:12:34Z

@SarthakSahu disable activedefrag until bug is fixed

SarthakSahu · 2024-04-25T11:11:06Z

@SarthakSahu sorry, I've tried and failed to reproduce it, can you give me more clues about your system, special configurations, etc.

AFAIK, Frequently short leaved data has been injected. This mean frequently data is keep injected and deleted.

sundb · 2024-04-25T11:15:07Z

@SarthakSahu did you do anything special operations with the cluster? like slot migration?

stevelipinski · 2024-05-28T20:08:24Z

I've been trying to reproduce as well. The closest I can achieve is with a script that creates a bunch of strings, some of them having a ttl. Then by disabling and then re-enabling activedefrag, it crashes. But, it is crashing at a different point in the code:

11360:M 28 May 2024 16:01:13.400 # Redis 7.2.3 crashed by signal: 11, si_code: 1
11360:M 28 May 2024 16:01:13.400 # Accessing address: 0x48
11360:M 28 May 2024 16:01:13.400 # Crashed running the instruction at: 0x48aa1c

------ STACK TRACE ------
EIP:
redis-server *:63791 [cluster](defragLaterStep+0x4c)[0x48aa1c]

Backtrace:
/lib64/libpthread.so.0(+0x12cf0)[0x7fe1d1570cf0]
redis-server *:63791 [cluster](defragLaterStep+0x4c)[0x48aa1c]
redis-server *:63791 [cluster](activeDefragCycle+0x3b4)[0x48b154]
redis-server *:63791 [cluster](databasesCron+0x6c)[0x5668fc]
redis-server *:63791 [cluster](serverCron+0x64a)[0x5691ea]
redis-server *:63791 [cluster][0x56249d]
redis-server *:63791 [cluster](aeMain+0x1d8)[0x563c68]
redis-server *:63791 [cluster](main+0x39a)[0x450d4a]
/lib64/libc.so.6(__libc_start_main+0xe5)[0x7fe1d11d3d85]
redis-server *:63791 [cluster](_start+0x2e)[0x45147e]

------ REGISTERS ------
11360:M 28 May 2024 16:01:13.401 #
RAX:0000000000000000 RBX:0000000000000000
RCX:0000000000000000 RDX:0000000000000000
RDI:0000000000000000 RSI:0000000000000000
RBP:0000000000000000 RSP:00007ffc11d3acc0
R8 :0000000000481278 R9 :00007ffc11d78080
R10:00007ffc11d3ad40 R11:0000000000000003
R12:0000000000000119 R13:00061989188d756f
R14:0000000000000000 R15:0000000000000000
RIP:000000000048aa1c EFL:0000000000010246
CSGSFS:002b000000000033
11360:M 28 May 2024 16:01:13.401 # (00007ffc11d3accf) -> 0000000000000000
11360:M 28 May 2024 16:01:13.401 # (00007ffc11d3acce) -> 00061989188d756f
11360:M 28 May 2024 16:01:13.401 # (00007ffc11d3accd) -> 0000000000000000
11360:M 28 May 2024 16:01:13.401 # (00007ffc11d3accc) -> 0000000000000014
11360:M 28 May 2024 16:01:13.401 # (00007ffc11d3accb) -> 0000000000061cf3
11360:M 28 May 2024 16:01:13.401 # (00007ffc11d3acca) -> 0000000066563809
11360:M 28 May 2024 16:01:13.401 # (00007ffc11d3acc9) -> 0000000000481e0f
11360:M 28 May 2024 16:01:13.401 # (00007ffc11d3acc8) -> 0000000000000119
11360:M 28 May 2024 16:01:13.401 # (00007ffc11d3acc7) -> 0000000000000000
11360:M 28 May 2024 16:01:13.401 # (00007ffc11d3acc6) -> 00000005d9513e3c
11360:M 28 May 2024 16:01:13.401 # (00007ffc11d3acc5) -> 0000000000051650
11360:M 28 May 2024 16:01:13.401 # (00007ffc11d3acc4) -> 00000005d9513e3c
11360:M 28 May 2024 16:01:13.401 # (00007ffc11d3acc3) -> 0000000000480a20
11360:M 28 May 2024 16:01:13.401 # (00007ffc11d3acc2) -> 0000000000480930
11360:M 28 May 2024 16:01:13.401 # (00007ffc11d3acc1) -> 0000000000000001
11360:M 28 May 2024 16:01:13.401 # (00007ffc11d3acc0) -> 0000000000000000
....


------ DUMPING CODE AROUND EIP ------
Symbol: defragLaterStep (base: 0x48a9d0)
Module: redis-server *:63791 [cluster] (base 0x400000)
$ xxd -r -p /tmp/dump.hex /tmp/dump.bin
$ objdump --adjust-vma=0x48a9d0 -D -b binary -m i386:x86-64 /tmp/dump.bin
------
11360:M 28 May 2024 16:01:13.534 # dump of function (hexdump of 204 bytes):
41574531ff41564989fe41554989f5415455534883ec68488b2d32734600f30f7e0dd27b19004c8b2543734600488b15244c46000f160d257b19000f294c2410488b35194c46004885d2755a498b7e48488b074885f6742f483970100f85510300004889c6e886870d00498b464848c705df4b46000000000048c705dc4b460000000000488b004885c00f841c03000048c705bd4b460000000000488b7010488935ba4b4600498b3ee802d00d004889c3488b05987246004889442420e9b500000066662e0f1f8400000000
Function at 0x5631c0 is listDelNode
Function at 0x567a80 is dictFind

stevelipinski · 2024-05-28T20:10:21Z

Common thread I see here in these crashes, between 13205 and 12677 is the backtrace:
activeDefragCycle, ..., libpthread.so.0

Makes me wonder if there is some race condition occurring between threads when active defrag is running...

sundb · 2024-05-28T23:14:02Z

@stevelipinski thanks, it will be fixed in next release.

stevelipinski · 2024-05-29T01:45:40Z

@sundb - Did you find the root cause? Because we are investigating, and if you already know what needs fixed, we will not spend more time on it. Thanks!

sundb · 2024-05-29T03:59:25Z

@stevelipinski sorry for late, the reason of crash is that we forgot to call slotToKeyInit() when emptying the database async.
the defragment triggers the bug.
this is the patch: sundb@eb4a927

stevelipinski · 2024-05-29T14:42:49Z

@sundb - thanks for sharing the patch. I also think that my above-mentioned crash was because of disabling activedefrag while it was actively running, which caused the old cursor to allow access to an out-of-range db: stevelipinski/redis@ecb7cd8

sundb · 2024-05-29T15:03:58Z

@stevelipinski defragment is just one way to trigger the crash, any code touching dictMetadata(db->dict)->db may trigger crash.
btw, did you use flushdb async or enable lazyfree-lazy-user-flush config?

stevelipinski · 2024-05-29T15:46:40Z

No - I did not use flushdb nor lazyfree-lazy-user-flush. I did set activedefrag=yes with some low thresholds.
All I did was run a script that set a bunch of keys (like 100,000), some of which have a short TTL (1-10s).
I then did CONFIG SET activedefrag no; CONFIG SET activedefrag yes - and it would crash.

It looked like a different crash/backtrace than was being discussed in this orig issue. See above.
Reproduced with gdb and checked that in activeDefragCycle(), line 1014, if (defragLaterStep(db, endtime)) was being called with db=NULL.
That could occur if the disabled mid-run block was hit (934-948), which could leave expires_cursor set to non-zero.
Then when re-enabled, expires_cursor >0 caused the first do{}while to be skipped and into the second. With db=NULL

sundb · 2024-05-29T15:56:39Z

@stevelipinski i saw cmdstat_flushall:calls=1 in your crash log.
maybe you can apply my patch first to see if it crashes?

Makes me wonder if there is some race condition occurring between threads when active defrag is running...

active defrag is running in the main thread.

stevelipinski · 2024-05-29T18:43:28Z

Nope - even with your fix:

=== REDIS BUG REPORT START: Cut & paste starting from here ===
19766:M 29 May 2024 14:29:44.134 # Redis 7.2.3 crashed by signal: 11, si_code: 1
19766:M 29 May 2024 14:29:44.134 # Accessing address: 0x48
19766:M 29 May 2024 14:29:44.134 # Crashed running the instruction at: 0x48a9bc

------ STACK TRACE ------
EIP:
redis-server *:63791 [cluster](defragLaterStep+0x4c)[0x48a9bc]

Backtrace:
/lib64/libpthread.so.0(+0x12cf0)[0x7f11c991fcf0]
redis-server *:63791 [cluster](defragLaterStep+0x4c)[0x48a9bc]
redis-server *:63791 [cluster](activeDefragCycle+0x3b4)[0x48b0f4]
redis-server *:63791 [cluster](databasesCron+0x6c)[0x5668ec]
redis-server *:63791 [cluster](serverCron+0x64a)[0x5691da]
redis-server *:63791 [cluster][0x56248d]
redis-server *:63791 [cluster](aeMain+0x1d8)[0x563c58]
redis-server *:63791 [cluster](main+0x39a)[0x450d4a]
/lib64/libc.so.6(__libc_start_main+0xe5)[0x7f11c9582d85]
redis-server *:63791 [cluster](_start+0x2e)[0x45147e]

------ REGISTERS ------
19766:M 29 May 2024 14:29:44.135 #
RAX:0000000000000000 RBX:0000000000000000
RCX:0000000000000000 RDX:0000000000000000
RDI:0000000000000000 RSI:0000000000000000
RBP:0000000000000000 RSP:00007ffe5c2adab0
R8 :000000000036c9c2 R9 :00007ffe5c34a080
R10:00007ffe5c2adb30 R11:0000000000000002
R12:00000000000002c0 R13:0006199bef354748
R14:0000000000000000 R15:0000000000000000
RIP:000000000048a9bc EFL:0000000000010246
CSGSFS:002b000000000033
19766:M 29 May 2024 14:29:44.135 # (00007ffe5c2adabf) -> 0000000000000000
19766:M 29 May 2024 14:29:44.135 # (00007ffe5c2adabe) -> 0006199bef354748
19766:M 29 May 2024 14:29:44.135 # (00007ffe5c2adabd) -> 0000000000000000
19766:M 29 May 2024 14:29:44.135 # (00007ffe5c2adabc) -> 0000000000000014
19766:M 29 May 2024 14:29:44.135 # (00007ffe5c2adabb) -> 0000000000020d20
19766:M 29 May 2024 14:29:44.135 # (00007ffe5c2adaba) -> 0000000066577418
19766:M 29 May 2024 14:29:44.135 # (00007ffe5c2adab9) -> 0000000000481daf
19766:M 29 May 2024 14:29:44.135 # (00007ffe5c2adab8) -> 00000000000002c0
19766:M 29 May 2024 14:29:44.135 # (00007ffe5c2adab7) -> 0000000000000000
19766:M 29 May 2024 14:29:44.135 # (00007ffe5c2adab6) -> 000000050a98c3d4
19766:M 29 May 2024 14:29:44.135 # (00007ffe5c2adab5) -> 0000000000054e20
19766:M 29 May 2024 14:29:44.135 # (00007ffe5c2adab4) -> 000000050a98c3d4
19766:M 29 May 2024 14:29:44.135 # (00007ffe5c2adab3) -> 00000000004809c0
19766:M 29 May 2024 14:29:44.135 # (00007ffe5c2adab2) -> 00000000004808d0
19766:M 29 May 2024 14:29:44.135 # (00007ffe5c2adab1) -> 0000000000000001
19766:M 29 May 2024 14:29:44.135 # (00007ffe5c2adab0) -> 0000000000000000
...

------ DUMPING CODE AROUND EIP ------
Symbol: defragLaterStep (base: 0x48a970)
Module: redis-server *:63791 [cluster] (base 0x400000)
$ xxd -r -p /tmp/dump.hex /tmp/dump.bin
$ objdump --adjust-vma=0x48a970 -D -b binary -m i386:x86-64 /tmp/dump.bin
------
19766:M 29 May 2024 14:29:44.272 # dump of function (hexdump of 204 bytes):
41574531ff41564989fe41554989f5415455534883ec68488b2d92734600f30f7e0d127c19004c8b25a3734600488b15844c46000f160d657b19000f294c2410488b35794c46004885d2755a498b7e48488b074885f6742f483970100f85510300004889c6e8d6870d00498b464848c7053f4c46000000000048c7053c4c460000000000488b004885c00f841c03000048c7051d4c460000000000488b70104889351a4c4600498b3ee852d00d004889c3488b05f87246004889442420e9b500000066662e0f1f8400000000
Function at 0x5631b0 is listDelNode
Function at 0x567a70 is dictFind

Needs my change to reset expires_cursor in defrag.c to avoid this crash
Should I open a new issue for this?

stevelipinski · 2024-05-29T18:44:47Z

@sundb - Given your change to add slotToKeyInit(), how can I reproduce this issue? I stumbled on the other crash in attempt to reproduce this one, but I am still unable. It would be nice to be able to reproduce this issue and verify the fix resolves it.

sundb · 2024-05-30T01:32:09Z

@stevelipinski thanks, they do seem to two issue, welcome to create a PR to fix it.

@sundb - Given your change to add slotToKeyInit(), how can I reproduce this issue? I stumbled on the other crash while attempting to reproduce this one, but I am still unable to. It would be nice to be able to reproduce this issue and verify the fix resolves it.

you can reproduce it by using flushdb async command, it's a high probability that the crash will be triggered when you use in cluster mode.

sundb · 2024-05-30T06:57:06Z

@stevelipinski i've reproduced it locally and manually, and your solution is right.
the root cause is the disable of activedefrag config in the mid of defragment, and then enabling activedefrag might trigger this crash.
do you wanna make a PR to fix it?

sundb linked a pull request Jun 3, 2024 that will close this issue

Fixed crashes due to missed slotToKeyInit() and missed expires_cursor reset #13315

Open

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[CRASH] Redis 7.2.3 crashed in slotToKeyReplaceEntry #13205

[CRASH] Redis 7.2.3 crashed in slotToKeyReplaceEntry #13205

SarthakSahu commented Apr 12, 2024 •

edited

sundb commented Apr 15, 2024

SarthakSahu commented Apr 15, 2024

sundb commented Apr 15, 2024

SarthakSahu commented Apr 16, 2024

sundb commented Apr 16, 2024

stevelipinski commented Apr 17, 2024

sundb commented Apr 17, 2024

stevelipinski commented Apr 17, 2024

sundb commented Apr 17, 2024

SarthakSahu commented Apr 23, 2024

sundb commented Apr 23, 2024

kkharbas commented Apr 24, 2024

SarthakSahu commented Apr 25, 2024

sundb commented Apr 25, 2024

stevelipinski commented May 28, 2024

stevelipinski commented May 28, 2024

sundb commented May 28, 2024

stevelipinski commented May 29, 2024

sundb commented May 29, 2024 •

edited

stevelipinski commented May 29, 2024

sundb commented May 29, 2024 •

edited

stevelipinski commented May 29, 2024 •

edited

sundb commented May 29, 2024

stevelipinski commented May 29, 2024

stevelipinski commented May 29, 2024

sundb commented May 30, 2024 •

edited

sundb commented May 30, 2024

[CRASH] Redis 7.2.3 crashed in slotToKeyReplaceEntry #13205

[CRASH] Redis 7.2.3 crashed in slotToKeyReplaceEntry #13205

Comments

SarthakSahu commented Apr 12, 2024 • edited

sundb commented Apr 15, 2024

SarthakSahu commented Apr 15, 2024

sundb commented Apr 15, 2024

SarthakSahu commented Apr 16, 2024

sundb commented Apr 16, 2024

stevelipinski commented Apr 17, 2024

sundb commented Apr 17, 2024

stevelipinski commented Apr 17, 2024

sundb commented Apr 17, 2024

SarthakSahu commented Apr 23, 2024

sundb commented Apr 23, 2024

kkharbas commented Apr 24, 2024

SarthakSahu commented Apr 25, 2024

sundb commented Apr 25, 2024

stevelipinski commented May 28, 2024

stevelipinski commented May 28, 2024

sundb commented May 28, 2024

stevelipinski commented May 29, 2024

sundb commented May 29, 2024 • edited

stevelipinski commented May 29, 2024

sundb commented May 29, 2024 • edited

stevelipinski commented May 29, 2024 • edited

sundb commented May 29, 2024

stevelipinski commented May 29, 2024

stevelipinski commented May 29, 2024

sundb commented May 30, 2024 • edited

sundb commented May 30, 2024

SarthakSahu commented Apr 12, 2024 •

edited

sundb commented May 29, 2024 •

edited

sundb commented May 29, 2024 •

edited

stevelipinski commented May 29, 2024 •

edited

sundb commented May 30, 2024 •

edited