介绍 arm 体系下如何实现 stack backtrace
APCS
APCS (ARM Procedure Call Standard),ARM 过程调用标准规范了 arm 寄存器的使用、过程调用时出栈和入栈的约定。如下图示意:
函数的栈帧由 fp
和 sp
标记边界。如果编译器遵循APCS,形成结构化的函数调用栈,就可以解析当前栈(callee)结构,从而得到调用栈(caller)的结构,这样就输出了整个回溯栈。
编译器选项 -g
生成栈帧信息 .debug_frame
$ readelf -S output/out.elf
There are 18 section headers, starting at offset 0x1bb0fd0:
节头:
[Nr] Name Type Addr Off Size ES Flg Lk Inf Al
[ 0] NULL 00000000 000000 000000 00 0 0 0
[ 1] .text PROGBITS 90000000 001000 6c132c 04 AX 0 0 1024
[ 2] .devtab PROGBITS 906c132c 6c232c 000c8c 00 WA 0 0 8
[ 3] .bss NOBITS 906c1fb8 6c2fb8 16a568 00 WA 0 0 8
[ 4] .debug_abbrev PROGBITS 00000000 6c2fb8 0b82a7 00 0 0 1
[ 5] .debug_info PROGBITS 00000000 77b25f a37035 00 0 0 1
[ 6] .debug_line PROGBITS 00000000 11b2294 3bb5fc 00 0 0 1
[ 7] .debug_pubnames PROGBITS 00000000 156d890 04911e 00 0 0 1
[ 8] .debug_pubtypes PROGBITS 00000000 15b69ae 166807 00 0 0 1
[ 9] .debug_aranges PROGBITS 00000000 171d1b5 018900 00 0 0 1
[10] .debug_str PROGBITS 00000000 1735ab5 0c4732 01 MS 0 0 1
[11] .comment PROGBITS 00000000 17fa1e7 0000cf 01 MS 0 0 1
[12] .debug_frame PROGBITS 00000000 17fa2b8 08c8b0 00 0 0 4
[13] .debug_ranges PROGBITS 00000000 1886b68 056380 00 0 0 1
[14] .debug_loc PROGBITS 00000000 18dcee8 2d402b 00 0 0 1
[15] .shstrtab STRTAB 00000000 1bb0f13 0000bd 00 0 0 1
[16] .symtab SYMTAB 00000000 1bb12a0 071130 10 17 14593 4
[17] .strtab STRTAB 00000000 1c223d0 086411 00 0 0 1
内核编译选项
ifeq ($(CONFIG_FRAME_POINTER),y)
KBUILD_CFLAGS +=-fno-omit-frame-pointer -mapcs -mno-sched-prolog
endif
unwind
APCS的缺陷是,维护栈框的指令过多,栈消耗大,占用的寄存器也过多,比如每次调用都必须将 r11,r12,lr,pc
入栈。使用unwind就能避免这些问题,生产指令的效率要有用的多。它的原理是记录每个函数的入栈指令(一般比APCS的入栈要少的多)到特殊的段 .ARM.unwind_idx
.ARM.unwind_tab
。
$ readelf -S vmlinux
There are 33 section headers, starting at offset 0xa33ba8:
节头:
[Nr] Name Type Addr Off Size ES Flg Lk Inf Al
[ 0] NULL 00000000 000000 000000 00 0 0 0
[ 1] .head.text PROGBITS c0008000 008000 00026c 00 AX 0 0 4
[ 2] .text PROGBITS c0100000 010000 55e3dc 00 AX 0 0 64
[ 3] .fixup PROGBITS c065e3dc 56e3dc 00001c 00 AX 0 0 4
[ 4] .rodata PROGBITS c0700000 570000 105fc8 00 WA 0 0 4096
[ 5] __bug_table PROGBITS c0805fc8 675fc8 0057a8 00 A 0 0 4
[ 6] __ksymtab PROGBITS c080b770 67b770 006e28 00 A 0 0 4
[ 7] __ksymtab_gpl PROGBITS c0812598 682598 005d10 00 A 0 0 4
[ 8] __ksymtab_strings PROGBITS c08182a8 6882a8 01e235 00 A 0 0 1
[ 9] __param PROGBITS c08364e0 6a64e0 000d34 00 A 0 0 4
[10] __modver PROGBITS c0837214 6a7214 000dec 00 A 0 0 4
[11] __ex_table PROGBITS c0838000 6a8000 001038 00 A 0 0 8
[12] .ARM.unwind_idx ARM_EXIDX c0839038 6a9038 0273d8 00 AL 17 0 4
[13] .ARM.unwind_tab PROGBITS c0860410 6d0410 003630 00 A 0 0 4
[14] .notes NOTE c0863a40 6d3a40 000024 00 AX 0 0 4
[15] .vectors PROGBITS ffff0000 6e0000 000020 00 AX 0 0 4
[16] .stubs PROGBITS ffff1000 6e1000 0002ac 00 AX 0 0 32
[17] .init.text PROGBITS c09002e0 6f02e0 0440d0 00 AX 0 0 32
[18] .exit.text PROGBITS c09443b0 7343b0 000ee4 00 AX 0 0 4
[19] .init.arch.info PROGBITS c0945294 735294 0000d0 00 A 0 0 4
[20] .init.tagtable PROGBITS c0945364 735364 000048 00 A 0 0 4
[21] .init.smpalt PROGBITS c09453ac 7353ac 00bd10 00 A 0 0 4
[22] .init.pv_table PROGBITS c09510bc 7410bc 00064c 00 A 0 0 1
[23] .init.data PROGBITS c0952000 742000 00a194 00 WA 0 0 4096
[24] .data..percpu PROGBITS c095d000 74d000 0067cc 00 WA 0 0 64
[25] .data PROGBITS c0a00000 760000 0609ac 00 WA 0 0 64
[26] .data..page_align PROGBITS c0a61000 7c1000 001000 00 WA 0 0 4096
[27] .bss NOBITS c0a62000 7c2000 03260c 00 WA 0 0 64
[28] .comment PROGBITS 00000000 7c2000 00004e 01 MS 0 0 1
[29] .ARM.attributes ARM_ATTRIBUTES 00000000 7c204e 00002f 00 0 0 1
[30] .symtab SYMTAB 00000000 7c2080 167560 10 31 77680 4
[31] .strtab STRTAB 00000000 9295e0 10a46c 00 0 0 1
[32] .shstrtab STRTAB 00000000 a33a4c 00015a 00 0 0 1
$ objdump -D vmlinu
c04ee72c <__skb_recv_datagram>:
c04ee72c: e3110040 tst r1, #64 ; 0x40
c04ee730: e92d43f0 push {r4, r5, r6, r7, r8, r9, lr}
c04ee734: e1a09003 mov r9, r3
c04ee738: 05903144 ldreq r3, [r0, #324] ; 0x144
c04ee73c: e24dd014 sub sp, sp, #20
$ readelf -u .ARM.unwind_idx vmlinux
0xc04ee72c <__skb_recv_datagram>: 0x8004adb0
Compact model index: 0
0x04 vsp = vsp + 20
0xad pop {r4, r5, r6, r7, r8, r9, r14}
0xb0 finish
输出了函数的地址和对应的编码。接下输出的是编码对应的出栈伪指令,这些伪指令正好是函数栈操作的逆过程,用于回溯。
内核编译选项
ifeq ($(CONFIG_ARM_UNWIND),y)
CFLAGS_ABI +=-funwind-tables
endif
内核实现
APCS会产生更多的代码指令,对性能有影响,使用unwind的方式会生成额外的段,但不影响性能。
文件 arch/arm/kernel/traps.c
#ifdef CONFIG_ARM_UNWIND
static inline void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
{
unwind_backtrace(regs, tsk);
}
#else
static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
{
unsigned int fp, mode;
int ok = 1;
printk("Backtrace: ");
if (!tsk)
tsk = current;
if (regs) {
fp = frame_pointer(regs);
mode = processor_mode(regs);
} else if (tsk != current) {
fp = thread_saved_fp(tsk);
mode = 0x10;
} else {
asm("mov %0, fp" : "=r" (fp) : : "cc");
mode = 0x10;
}
if (!fp) {
pr_cont("no frame pointer");
ok = 0;
} else if (verify_stack(fp)) {
pr_cont("invalid frame pointer 0x%08x", fp);
ok = 0;
} else if (fp < (unsigned long)end_of_stack(tsk))
pr_cont("frame pointer underflow");
pr_cont("\n");
if (ok)
c_backtrace(fp, mode);
}
#endif