Skip to content

Latest commit

 

History

History
426 lines (404 loc) · 18.6 KB

File metadata and controls

426 lines (404 loc) · 18.6 KB

第 10 周

观看技术培训会

7 月 31 日下午,我们观看了《2022 全国大学生系统能力大赛操作系统设计赛第九场技术培训会》。会上,许多高水平的同学就他们的成果作了展示。和我们相同选题的北京航空航天大学 高世伟同学也在其中展示。据他介绍,他们组计划转储到 SQLite 数据库,同时加入前端数据展示界面。我想起自己曾经和马玉昆老师询问过关于如何展示的内容,但那时由于对于持有锁分析等更重要的内容的进度缓慢,前端展示这一问题便被搁置。就这一问题我咨询了队友叶景熙同学,他表示可以调研一下。三天后,他计划使用 Qt 读取文件等的方式来实现这样的功能,我们在 8 月 4 日的会议上与马玉昆导师就此事进行了交流,马老师表示对此方面可能不是很了解。

此次技术培训会也我认识到自己的不足,自己费了很大劲还不完全弄明白的内容,别人轻轻松松就能搞懂。自己应该放弃不切实际的幻想,更加脚踏实地地做事。

内核模块开发

基数树

基数树(Radix tree)是内核中的一种数据结构,可以将值映射为整型关键字,使用 RADIX_TREE 宏进行定义及初始化,调用 radix_tree_insertradix_tree_delete 可以插入或删除某个结点,而 radix_tree_lookup 可以查找基数树;对于重复元素,会返回 –EEXIST

这些都比较好理解,不过删除整棵基数树就有些没那么直白了。由于基数树在插入元素的过程中也在增长,其内部也动态分配了内存,我找到的方案是遍历并删除元素。

gdb 调试内核模块

原来自己并没有想过用 gdb 调试,但频频出现的内核挂起或死锁等问题却让我感到举步维艰,于是我阅读了这篇文章

(gdb) hbreak do_init_module 
Hardware assisted breakpoint 1 at 0xffffffff8125d46d: file kernel/module.c, line 3485.
(gdb) target remote 127.0.0.1:1234
Remote debugging using 127.0.0.1:1234
0xffffffff8206870a in native_safe_halt ()
    at ./arch/x86/include/asm/irqflags.h:57
57		asm volatile("sti; hlt": : :"memory");
(gdb) c
Continuing.
[Switching to Thread 1.3]

Thread 3 hit Breakpoint 1, do_init_module (mod=mod@entry=0xffffffffa0a2ab80)
    at kernel/module.c:3485
3485	{
(gdb) print mod->sect_attrs->attrs[1]->name
$1 = 0xffff888107ea5e88 ".text"
......
(gdb) print mod->sect_attrs->attrs[12]->name 
$15 = 0xffff888107ea5d70 ".data"
......
(gdb) print mod->sect_attrs->attrs[16]->name 
$19 = 0xffff888107ea5848 ".bss"
(gdb) print /x mod->sect_attrs->attrs[1]->address
$20 = 0xffffffffa0a28000
(gdb) print /x mod->sect_attrs->attrs[12]->address
$21 = 0xffffffffa0a2a000
(gdb) print /x mod->sect_attrs->attrs[16]->address
$22 = 0xffffffffa0a2af80
(gdb) add-symbol-file /home/qcwl/LinuxLearn/LockTest/lock_test.ko 0xffffffffa0a28000 -s .data 0xffffffffa0a2a000 -s .bss 0xffffffffa0a2af80
add symbol table from file "/home/qcwl/LinuxLearn/LockTest/lock_test.ko" at
	.text_addr = 0xffffffffa0a28000
	.data_addr = 0xffffffffa0a2a000
	.bss_addr = 0xffffffffa0a2af80
(y or n) y
Reading symbols from /home/qcwl/LinuxLearn/LockTest/lock_test.ko...done.

此时我们已经获得了内核模块被加载的地址,因此可以让 GDB 加载其符号。如下所示,不加载调试信息的情况下是看不到具体的代码行数的。

.......
(gdb) c
Continuing.
[Switching to Thread 1.4]

Thread 4 hit Breakpoint 2, add_into_hashtable (lock_addr=18446744071615679776, 
    lock_type=lock_type@entry=0)
    at /home/qcwl/LinuxLearn/LockTest/lock_test.c:40
40	static void add_into_hashtable(unsigned long lock_addr, int lock_type) {
(gdb) n
44	    pr_info("0\n");
(gdb) n
45	    hash_for_each_possible(lock_table, current_lock_info, node, lock_addr) {
(gdb) n
50	        if (current_lock_info->lock_address == lock_addr) {
(gdb) n
53	            pr_info("1\n");
(gdb) n
56	            current_lock_info->called_num++;
(gdb) n
59	            list_for_each_entry(pos, &current_lock_info->process_list_head, process_list_node) {
(gdb) n
61	                if (pos == NULL) {
(gdb) n
64	                if (pos->pid == get_current()->pid) {
(gdb) n
15		return this_cpu_read_stable(current_task);
(gdb) n
59	            list_for_each_entry(pos, &current_lock_info->process_list_head, process_list_node) {
(gdb) n
73	                pr_info("2\n");
(gdb) n
74	                new_node = kmalloc(sizeof(struct process_stack), GFP_ATOMIC);
(gdb) n
75	                if (new_node == NULL) {
(gdb) n
78	                INIT_LIST_HEAD(&new_node->process_list_node);
(gdb) n
80	                new_node->pid = get_current()->pid;
(gdb) n
15		return this_cpu_read_stable(current_task);
(gdb) n
375		return __builtin_memcpy(p, q, size);
(gdb) n
83	                lock_trace.nr_entries = 0;
(gdb) n
84	                lock_trace.max_entries = MAX_STACK_TRACE_DEPTH;
(gdb) n
85	                lock_trace.entries = new_node->entries;
(gdb) n
86	                lock_trace.skip = 0;// 复制过来时是 2
(gdb) n
15		return this_cpu_read_stable(current_task);
(gdb) n
88	                new_node->nr_entries = lock_trace.nr_entries;
(gdb) n
90	                list_add(&new_node->process_list_node, &current_lock_info->process_list_head);
(gdb) n
129	    pr_info(".\n");
(gdb) n
pre_handler_spin_lock_irqsave (p=<optimized out>, regs=0xffff88810d189b98)
    at /home/qcwl/LinuxLearn/LockTest/lock_test.c:146
146	    smp_mb();
(gdb) n
147	    atomic_set(&table_lock, 0);
(gdb) n
149	    atomic_set(this_cpu_ptr(&in_prober[0]), false);
(gdb) n
165		return !(flags & X86_EFLAGS_IF);
(gdb) n
150	    local_irq_restore(irq_flags);
(gdb) n
151	    return 0;
(gdb) n
138	        return 0;
(gdb) n
kprobe_ftrace_handler (ip=18446744071596054648, parent_ip=<optimized out>, 
    ops=<optimized out>, regs=0xffff88810d189b98)
    at arch/x86/kernel/kprobes/ftrace.c:83
83				__skip_singlestep(p, regs, kcb, orig_ip);
(gdb) n
84				preempt_enable_no_resched();
(gdb) n
__ftrace_ops_list_func (regs=0xffff88810d189b98, 
    ignored=0x0 <fixed_percpu_data>, parent_ip=18446744071580636019, 
    ip=18446744071596054648) at kernel/trace/ftrace.c:6866
6866		} while_for_each_ftrace_op(op);
(gdb) n
6868		preempt_enable_notrace();
(gdb) n
6869		trace_clear_recursion(bit);
(gdb) n
ftrace_regs_caller () at arch/x86/kernel/ftrace_64.S:220
220		movq EFLAGS(%rsp), %rax
(gdb) n
221		movq %rax, MCOUNT_REG_SIZE(%rsp)
(gdb) n
224		movq RIP(%rsp), %rax
(gdb) n
225		movq %rax, MCOUNT_REG_SIZE+8(%rsp)
(gdb) n
228		movq R15(%rsp), %r15
(gdb) n
229		movq R14(%rsp), %r14
(gdb) n
230		movq R13(%rsp), %r13
(gdb) n
231		movq R12(%rsp), %r12
(gdb) n
232		movq R10(%rsp), %r10
(gdb) n
233		movq RBX(%rsp), %rbx
(gdb) n
235		movq ORIG_RAX(%rsp), %rax
(gdb) n
236		movq %rax, MCOUNT_REG_SIZE-8(%rsp)
(gdb) n
239		movq ORIG_RAX(%rsp), %rax
(gdb) n
240		cmpq	$0, %rax
(gdb) n
241		je	1f
(gdb) n
252	1:	restore_mcount_regs
(gdb) n
ftrace_regs_caller () at arch/x86/kernel/ftrace_64.S:264
264		popfq
(gdb) n
ftrace_regs_caller () at arch/x86/kernel/ftrace_64.S:274
274		jmp ftrace_epilogue
(gdb) n
ftrace_caller () at arch/x86/kernel/ftrace_64.S:169
169		jmp ftrace_stub
(gdb) n
ftrace_caller () at arch/x86/kernel/ftrace_64.S:174
174		retq
(gdb) n
_raw_spin_lock_irqsave (lock=lock@entry=0xffffffff83320d20 <log_wait>)
    at kernel/locking/spinlock.c:159
159		return __raw_spin_lock_irqsave(lock);
(gdb) n
^C
Thread 4 received signal SIGINT, Interrupt.
0xffffffff82068718 in native_halt () at ./arch/x86/include/asm/irqflags.h:63
63		asm volatile("hlt": : :"memory");

此处似乎在等待锁,这时我们可以用 bt 查看其栈回溯。

(gdb) bt
#0  0xffffffff82068718 in native_halt ()
    at ./arch/x86/include/asm/irqflags.h:63
#1  0xffffffff81099231 in halt () at ./arch/x86/include/asm/paravirt.h:99
#2  kvm_wait (ptr=0xffffffff83320d20 <log_wait> "\003", val=<optimized out>)
    at arch/x86/kernel/kvm.c:896
#3  0xffffffff811dec2c in pv_wait (val=3 '\003', 
    ptr=0xffffffff83320d20 <log_wait> "\003")
    at ./arch/x86/include/asm/paravirt.h:694
#4  pv_wait_head_or_lock (lock=lock@entry=0xffffffff83320d20 <log_wait>, 
    node=node@entry=0xffff88810d1b37c0)
    at kernel/locking/qspinlock_paravirt.h:470
#5  0xffffffff811defa5 in __pv_queued_spin_lock_slowpath (
    lock=0xffffffff83320d20 <log_wait>, val=<optimized out>)
    at kernel/locking/qspinlock.c:517
#6  0xffffffff811df542 in pv_queued_spin_lock_slowpath (val=3, 
    lock=0xffffffff83320d20 <log_wait>)
    at ./arch/x86/include/asm/paravirt.h:684
#7  queued_spin_lock_slowpath (val=3, lock=0xffffffff83320d20 <log_wait>)
    at ./arch/x86/include/asm/qspinlock.h:51
#8  queued_spin_lock (lock=0xffffffff83320d20 <log_wait>)
    at ./include/asm-generic/qspinlock.h:89
#9  do_raw_spin_lock (lock=lock@entry=0xffffffff83320d20 <log_wait>)
    at kernel/locking/spinlock_debug.c:113
--Type <RET> for more, q to quit, c to continue without paging--
#10 0xffffffff820698cf in __raw_spin_lock_irqsave (
    lock=0xffffffff83320d20 <log_wait>)
    at ./include/linux/spinlock_api_smp.h:117
#11 _raw_spin_lock_irqsave (lock=lock@entry=0xffffffff83320d20 <log_wait>)
    at kernel/locking/spinlock.c:159
#12 0xffffffff811b5373 in __wake_up_common_lock (
    wq_head=wq_head@entry=0xffffffff83320d20 <log_wait>, mode=mode@entry=1, 
    nr_exclusive=nr_exclusive@entry=1, wake_flags=wake_flags@entry=0, 
    key=key@entry=0x0 <fixed_percpu_data>) at ./include/linux/spinlock.h:329
#13 0xffffffff811b5435 in __wake_up (
    wq_head=wq_head@entry=0xffffffff83320d20 <log_wait>, mode=mode@entry=1, 
    nr_exclusive=nr_exclusive@entry=1, key=key@entry=0x0 <fixed_percpu_data>)
    at kernel/sched/wait.c:158
#14 0xffffffff811f02a7 in wake_up_klogd_work_func (irq_work=<optimized out>)
    at kernel/printk/printk.c:2984
#15 0xffffffff8133593e in irq_work_run_list (list=<optimized out>)
    at kernel/irq_work.c:156
#16 0xffffffff81335bec in irq_work_tick () at kernel/irq_work.c:184
#17 0xffffffff8122e0e8 in update_process_times (user_tick=0)
    at kernel/time/timer.c:1706
#18 0xffffffff81248f8a in tick_sched_handle (ts=ts@entry=0xffff88810d1a5180, 
    regs=regs@entry=0xffff888100d2fcc8) at ./arch/x86/include/asm/ptrace.h:131
#19 0xffffffff81249011 in tick_sched_timer (
--Type <RET> for more, q to quit, c to continue without paging--
    timer=timer@entry=0xffff88810d1a5180) at kernel/time/tick-sched.c:1433
#20 0xffffffff8122faf4 in __run_hrtimer (
    cpu_base=cpu_base@entry=0xffff88810d1a4740, 
    base=base@entry=0xffff88810d1a47c0, timer=timer@entry=0xffff88810d1a5180, 
    now=now@entry=0xffff88810d189ea8, flags=flags@entry=70)
    at kernel/time/hrtimer.c:1530
#21 0xffffffff8122fc1d in __hrtimer_run_queues (
    cpu_base=cpu_base@entry=0xffff88810d1a4740, now=now@entry=220769561299, 
    flags=flags@entry=70, active_mask=active_mask@entry=15)
    at kernel/time/hrtimer.c:1594
#22 0xffffffff812305a2 in hrtimer_interrupt (dev=<optimized out>)
    at kernel/time/hrtimer.c:1656
#23 0xffffffff81080b73 in local_apic_timer_interrupt ()
    at arch/x86/kernel/apic/apic.c:1073
#24 0xffffffff822030bf in smp_apic_timer_interrupt (regs=<optimized out>)
    at arch/x86/kernel/apic/apic.c:1098
#25 0xffffffff82201cdf in apic_timer_interrupt ()
    at arch/x86/entry/entry_64.S:869
#26 0xffff888100d2fcc8 in ?? ()
    at /home/qcwl/LinuxLearn/LockTest/lock_test.c:359
#27 0x000ad20000000000 in ?? ()
    at /home/qcwl/LinuxLearn/LockTest/lock_test.c:359
#28 0x00d30000fffffe00 in ?? ()
--Type <RET> for more, q to quit, c to continue without paging--
    at /home/qcwl/LinuxLearn/LockTest/lock_test.c:359
#29 0x021160e8ffff8881 in ?? ()
    at /home/qcwl/LinuxLearn/LockTest/lock_test.c:359
Backtrace stopped: Cannot access memory at address 0x2000

我把希望寄托在内核调试选项上。开启 Detect stack corruption on calls to schedule()、Debug IRQ flag manipulation 之后,重新编译内核,得到:

[   29.959798] =====================================
[   29.961699] WARNING: bad unlock balance detected!
[   29.963940] 4.18.0-348.7.1.el8.qunchuwolao.x86_64 #1 Tainted: G           OE    ---------r-  -
[   29.966321] -------------------------------------
[   29.967332] insmod/1550 is trying to release lock (&table_lock) at:
[   29.968860] [<ffffffffa066c4e0>] pre_handler_spin_lock_irqsave+0x82/0x114 [lock_test]
[   29.970549] but there are no more locks to release!
[   29.972290] 
[   29.972290] other info that might help us debug this:
[   29.975269] 3 locks held by insmod/1550:
[   29.976603]  #0: ffffffff82b21528 (kprobe_mutex){....}-{3:3}, at: register_kprobe+0xba/0x1ee
[   29.980170]  #1: ffffffff82b36648 (ftrace_lock){....}-{3:3}, at: register_ftrace_function+0x1b/0x63
[   29.984280]  #2: ffffffff82a71dc8 (text_mutex){....}-{3:3}, at: ftrace_arch_code_modify_prepare+0x11/0x21
[   29.988334] 
[   29.988334] stack backtrace:
[   29.990202] CPU: 3 PID: 1550 Comm: insmod Kdump: loaded Tainted: G           OE    ---------r-  - 4.18.0-348.7.1.el8.qunchuwolao.x86_64 #1
[   29.994743] Hardware name: Red Hat KVM, BIOS 1.13.0-2.module_el8.5.0+746+bbd5d70c 04/01/2014
[   29.996984] Call Trace:
[   29.997556]  <IRQ>
[   29.998049]  ? __dump_stack+0x1b/0x1c
[   29.998834]  ? dump_stack+0x7c/0xba
[   29.999637]  ? pre_handler_spin_lock_irqsave+0x82/0x114 [lock_test]
[   30.001513]  ? print_unlock_imbalance_bug+0xc9/0xce
[   30.003520]  ? __lock_release+0x15f/0x189
[   30.005345]  ? pre_handler_spin_lock_irqsave+0x82/0x114 [lock_test]
[   30.008230]  ? _raw_spin_lock_irqsave+0x5/0x61
[   30.009647]  ? lock_release+0xb1/0xf4
[   30.010813]  ? _raw_spin_unlock+0x17/0x21
[   30.011974]  ? pre_handler_spin_lock_irqsave+0x82/0x114 [lock_test]
[   30.015187]  ? _raw_spin_lock_irqsave+0x1/0x61
[   30.017549]  ? kprobe_ftrace_handler+0x89/0xdb
[   30.019861]  ? _raw_write_lock_irqsave+0x61/0x61
[   30.022233]  ? hrtimer_interrupt+0x4d/0x210
[   30.023745]  ? ftrace_ops_list_func+0xc7/0x120
[   30.025312]  ? ftrace_regs_call+0x5/0xd3
[   30.026831]  ? _raw_spin_lock_irqsave+0x1/0x61
[   30.028296]  ? _raw_spin_lock_irqsave+0x5/0x61
[   30.030858]  ? hrtimer_interrupt+0x4d/0x210
[   30.033226]  ? kvm_clock_read+0xd/0xe
[   30.035286]  ? local_apic_timer_interrupt+0x2d/0x4c
[   30.037996]  ? smp_apic_timer_interrupt+0x9f/0x10c
[   30.040476]  ? apic_timer_interrupt+0xf/0x20
[   30.043085]  </IRQ>
[   30.044699]  ? ftrace_get_addr_curr+0x8/0x77
[   30.047132]  ? add_brk_on_nop+0x14/0x16
[   30.049277]  ? add_breakpoints+0xe/0x4a
[   30.051592]  ? ftrace_replace_code+0x33/0x156
[   30.054080]  ? ftrace_modify_all_code+0x84/0xc8
[   30.056424]  ? arch_ftrace_update_code+0xc/0x14
[   30.059302]  ? ftrace_run_update_code+0x13/0x30
[   30.061940]  ? ftrace_startup_enable+0x28/0x2f
[   30.064623]  ? ftrace_startup+0x7b/0x9e
[   30.067079]  ? register_ftrace_function+0x28/0x63
[   30.068962]  ? __arm_kprobe_ftrace+0x71/0xaf
[   30.070454]  ? arm_kprobe_ftrace+0x33/0x34
[   30.072570]  ? arm_kprobe+0x4b/0x4f
[   30.074872]  ? register_kprobe+0x19d/0x1ee
[   30.077050]  ? register_kprobes+0x2b/0x5c
[   30.079337]  ? start_module+0x92/0x1000 [lock_test]
[   30.082357]  ? 0xffffffffa0673000
[   30.084342]  ? do_one_initcall+0x9f/0x1ec
[   30.086744]  ? do_init_module+0x22/0x23f
[   30.088519]  ? kmem_cache_alloc_trace+0x11a/0x1eb
[   30.090986]  ? do_init_module+0x5a/0x23f
[   30.093058]  ? load_module+0x438/0x43d
[   30.094283]  ? __do_sys_finit_module+0xd3/0xeb
[   30.096660]  ? __se_sys_finit_module+0xa/0xb
[   30.099286]  ? __x64_sys_finit_module+0x16/0x17
[   30.102024]  ? do_syscall_64+0x85/0x1b7
[   30.104141]  ? entry_SYSCALL_64_after_hwframe+0x6a/0xdf
[   30.151812] Registered probe_spin_lock_irqsave.

使用 info threads 可以查看所有进程的信息,使用 thread 线程号 可以切换,我认为这是十分有用的。

(gdb) info threads 
  Id   Target Id                    Frame 
* 1    Thread 1.1 (CPU#0 [running]) pv_hybrid_queued_unfair_trylock (
    lock=0xffffffffa057d640 <table_lock>)
    at kernel/locking/qspinlock_paravirt.h:90
  2    Thread 1.2 (CPU#1 [running]) __read_once_size (size=4, 
    res=<synthetic pointer>, p=0xffffffffa057d640 <table_lock>)
    at ./include/linux/compiler.h:276
  3    Thread 1.3 (CPU#2 [halted ]) 0xffffffff81a0af8d in native_halt ()
    at ./arch/x86/include/asm/irqflags.h:63
  4    Thread 1.4 (CPU#3 [running]) add_into_hashtable (
    lock_addr=<optimized out>, lock_type=lock_type@entry=0)
    at /home/qcwl/LinuxLearn/LockTest/lock_test.c:60
(gdb) thread 1.4
[Switching to thread 4 (Thread 1.4)]
#0  add_into_hashtable (lock_addr=<optimized out>, lock_type=lock_type@entry=0)
    at /home/qcwl/LinuxLearn/LockTest/lock_test.c:60
60	            list_for_each_entry(pos, &current_lock_info->process_list_head, process_list_node) {
(gdb) bt
#0  add_into_hashtable (lock_addr=<optimized out>, lock_type=lock_type@entry=0)
    at /home/qcwl/LinuxLearn/LockTest/lock_test.c:60
#1  0xffffffffa057b5e8 in pre_handler_spin_lock (p=<optimized out>, 
    regs=0xffffc9000093b960) at /home/qcwl/LinuxLearn/LockTest/lock_test.c:173
#2  0xffffffff8105f537 in kprobe_ftrace_handler (ip=18446744071589377096, 
    parent_ip=<optimized out>, ops=<optimized out>, regs=0xffffc9000093b960)
    at arch/x86/kernel/kprobes/ftrace.c:82
#3  0xffffffff811dcbf0 in ftrace_ops_assist_func (ip=18446744071589377096, 
    parent_ip=18446744071582007391, op=0xffffffff82cfae40 <kprobe_ftrace_ops>, 
    regs=0xffffc9000093b960) at kernel/trace/ftrace.c:6919
#4  0xffffffffa05850c8 in ?? ()
#5  0xffff8881000058c0 in ?? ()
    at /home/qcwl/LinuxLearn/LockTest/lock_test.c:382
#6  0xffff88813bdaf160 in ?? ()
    at /home/qcwl/LinuxLearn/LockTest/lock_test.c:382
#7  0x0000000000000000 in ?? ()
(gdb) n
65	                if (pos->pid == get_current()->pid) {

用 l* 查看,可以查看指定的地址对应的代码。

[qcwl@localhost LockTest]$ gdb ./lock_test.ko --silent
Reading symbols from ./lock_test.ko...done.
(gdb) l* end_module+0x2e8
0x104e is in end_module (/home/qcwl/LinuxLearn/LockTest/lock_test.c:450).
445             // /* 原版 */list_for_each_entry_safe(pos, n, &current_lock_info->process_list_head, process_list_node) {
446             pos = current_lock_info->begin; // ADD
447             while (pos != NULL) { // ADD
448                 int i, j;
449                 struct pid_array *same_pid;
450                 n = pos->next; // ADD
451                 pr_info("PID %d, comm %s, Backtrace: \n", pos->pid, pos->comm);
452                 same_pid = radix_tree_lookup(&pid_tree, pos->pid);
453                 if (same_pid) {
454                     pr_info("Pid holds %d lock.\n", same_pid->ring_index);