0%

ARM stack backtrace的实现

介绍 arm 体系下如何实现 stack backtrace

APCS

APCS (ARM Procedure Call Standard),ARM 过程调用标准规范了 arm 寄存器的使用、过程调用时出栈和入栈的约定。如下图示意:

函数的栈帧由 fpsp 标记边界。如果编译器遵循APCS,形成结构化的函数调用栈,就可以解析当前栈(callee)结构,从而得到调用栈(caller)的结构,这样就输出了整个回溯栈。

编译器选项 -g 生成栈帧信息 .debug_frame

$ readelf -S output/out.elf
There are 18 section headers, starting at offset 0x1bb0fd0:

节头:
  [Nr] Name              Type            Addr     Off    Size   ES Flg Lk Inf Al
  [ 0]                   NULL            00000000 000000 000000 00      0   0  0
  [ 1] .text             PROGBITS        90000000 001000 6c132c 04  AX  0   0 1024
  [ 2] .devtab           PROGBITS        906c132c 6c232c 000c8c 00  WA  0   0  8
  [ 3] .bss              NOBITS          906c1fb8 6c2fb8 16a568 00  WA  0   0  8
  [ 4] .debug_abbrev     PROGBITS        00000000 6c2fb8 0b82a7 00      0   0  1
  [ 5] .debug_info       PROGBITS        00000000 77b25f a37035 00      0   0  1
  [ 6] .debug_line       PROGBITS        00000000 11b2294 3bb5fc 00      0   0  1
  [ 7] .debug_pubnames   PROGBITS        00000000 156d890 04911e 00      0   0  1
  [ 8] .debug_pubtypes   PROGBITS        00000000 15b69ae 166807 00      0   0  1
  [ 9] .debug_aranges    PROGBITS        00000000 171d1b5 018900 00      0   0  1
  [10] .debug_str        PROGBITS        00000000 1735ab5 0c4732 01  MS  0   0  1
  [11] .comment          PROGBITS        00000000 17fa1e7 0000cf 01  MS  0   0  1
  [12] .debug_frame      PROGBITS        00000000 17fa2b8 08c8b0 00      0   0  4
  [13] .debug_ranges     PROGBITS        00000000 1886b68 056380 00      0   0  1
  [14] .debug_loc        PROGBITS        00000000 18dcee8 2d402b 00      0   0  1
  [15] .shstrtab         STRTAB          00000000 1bb0f13 0000bd 00      0   0  1
  [16] .symtab           SYMTAB          00000000 1bb12a0 071130 10     17 14593  4
  [17] .strtab           STRTAB          00000000 1c223d0 086411 00      0   0  1

内核编译选项

ifeq ($(CONFIG_FRAME_POINTER),y)
KBUILD_CFLAGS   +=-fno-omit-frame-pointer -mapcs -mno-sched-prolog
endif

unwind

APCS的缺陷是,维护栈框的指令过多,栈消耗大,占用的寄存器也过多,比如每次调用都必须将 r11,r12,lr,pc 入栈。使用unwind就能避免这些问题,生产指令的效率要有用的多。它的原理是记录每个函数的入栈指令(一般比APCS的入栈要少的多)到特殊的段 .ARM.unwind_idx .ARM.unwind_tab

$ readelf -S vmlinux
There are 33 section headers, starting at offset 0xa33ba8:

节头:
  [Nr] Name              Type            Addr     Off    Size   ES Flg Lk Inf Al
  [ 0]                   NULL            00000000 000000 000000 00      0   0  0
  [ 1] .head.text        PROGBITS        c0008000 008000 00026c 00  AX  0   0  4
  [ 2] .text             PROGBITS        c0100000 010000 55e3dc 00  AX  0   0 64
  [ 3] .fixup            PROGBITS        c065e3dc 56e3dc 00001c 00  AX  0   0  4
  [ 4] .rodata           PROGBITS        c0700000 570000 105fc8 00  WA  0   0 4096
  [ 5] __bug_table       PROGBITS        c0805fc8 675fc8 0057a8 00   A  0   0  4
  [ 6] __ksymtab         PROGBITS        c080b770 67b770 006e28 00   A  0   0  4
  [ 7] __ksymtab_gpl     PROGBITS        c0812598 682598 005d10 00   A  0   0  4
  [ 8] __ksymtab_strings PROGBITS        c08182a8 6882a8 01e235 00   A  0   0  1
  [ 9] __param           PROGBITS        c08364e0 6a64e0 000d34 00   A  0   0  4
  [10] __modver          PROGBITS        c0837214 6a7214 000dec 00   A  0   0  4
  [11] __ex_table        PROGBITS        c0838000 6a8000 001038 00   A  0   0  8
  [12] .ARM.unwind_idx   ARM_EXIDX       c0839038 6a9038 0273d8 00  AL 17   0  4
  [13] .ARM.unwind_tab   PROGBITS        c0860410 6d0410 003630 00   A  0   0  4
  [14] .notes            NOTE            c0863a40 6d3a40 000024 00  AX  0   0  4
  [15] .vectors          PROGBITS        ffff0000 6e0000 000020 00  AX  0   0  4
  [16] .stubs            PROGBITS        ffff1000 6e1000 0002ac 00  AX  0   0 32
  [17] .init.text        PROGBITS        c09002e0 6f02e0 0440d0 00  AX  0   0 32
  [18] .exit.text        PROGBITS        c09443b0 7343b0 000ee4 00  AX  0   0  4
  [19] .init.arch.info   PROGBITS        c0945294 735294 0000d0 00   A  0   0  4
  [20] .init.tagtable    PROGBITS        c0945364 735364 000048 00   A  0   0  4
  [21] .init.smpalt      PROGBITS        c09453ac 7353ac 00bd10 00   A  0   0  4
  [22] .init.pv_table    PROGBITS        c09510bc 7410bc 00064c 00   A  0   0  1
  [23] .init.data        PROGBITS        c0952000 742000 00a194 00  WA  0   0 4096
  [24] .data..percpu     PROGBITS        c095d000 74d000 0067cc 00  WA  0   0 64
  [25] .data             PROGBITS        c0a00000 760000 0609ac 00  WA  0   0 64
  [26] .data..page_align PROGBITS        c0a61000 7c1000 001000 00  WA  0   0 4096
  [27] .bss              NOBITS          c0a62000 7c2000 03260c 00  WA  0   0 64
  [28] .comment          PROGBITS        00000000 7c2000 00004e 01  MS  0   0  1
  [29] .ARM.attributes   ARM_ATTRIBUTES  00000000 7c204e 00002f 00      0   0  1
  [30] .symtab           SYMTAB          00000000 7c2080 167560 10     31 77680  4
  [31] .strtab           STRTAB          00000000 9295e0 10a46c 00      0   0  1
  [32] .shstrtab         STRTAB          00000000 a33a4c 00015a 00      0   0  1

$ objdump -D vmlinu
c04ee72c <__skb_recv_datagram>:
c04ee72c:       e3110040        tst     r1, #64 ; 0x40
c04ee730:       e92d43f0        push    {r4, r5, r6, r7, r8, r9, lr}
c04ee734:       e1a09003        mov     r9, r3
c04ee738:       05903144        ldreq   r3, [r0, #324]  ; 0x144
c04ee73c:       e24dd014        sub     sp, sp, #20

$ readelf -u .ARM.unwind_idx vmlinux
0xc04ee72c <__skb_recv_datagram>: 0x8004adb0
  Compact model index: 0
  0x04      vsp = vsp + 20
  0xad      pop {r4, r5, r6, r7, r8, r9, r14}
  0xb0      finish

输出了函数的地址和对应的编码。接下输出的是编码对应的出栈伪指令,这些伪指令正好是函数栈操作的逆过程,用于回溯。

内核编译选项

ifeq ($(CONFIG_ARM_UNWIND),y)
CFLAGS_ABI  +=-funwind-tables
endif

内核实现

APCS会产生更多的代码指令,对性能有影响,使用unwind的方式会生成额外的段,但不影响性能。

文件 arch/arm/kernel/traps.c

#ifdef CONFIG_ARM_UNWIND
static inline void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
{
    unwind_backtrace(regs, tsk);
}
#else
static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
{
    unsigned int fp, mode;
    int ok = 1;

    printk("Backtrace: ");

    if (!tsk)
        tsk = current;

    if (regs) {
        fp = frame_pointer(regs);
        mode = processor_mode(regs);
    } else if (tsk != current) {
        fp = thread_saved_fp(tsk);
        mode = 0x10;
    } else {
        asm("mov %0, fp" : "=r" (fp) : : "cc");
        mode = 0x10;
    }

    if (!fp) {
        pr_cont("no frame pointer");
        ok = 0;
    } else if (verify_stack(fp)) {
        pr_cont("invalid frame pointer 0x%08x", fp);
        ok = 0;
    } else if (fp < (unsigned long)end_of_stack(tsk))
        pr_cont("frame pointer underflow");
    pr_cont("\n");

    if (ok)
        c_backtrace(fp, mode);
}
#endif

Ref

  1. APCS,ARM 过程调用标准(ARM Procedure Call Standard)
  2. ARM FP寄存器及frame pointer介绍
  3. Stack backtrace 的实现