0%

打印用户态段错误信息

使用 user_debug 打印由用户态引起的 oops 信息

Linux应用调试-修改内核来打印用户态的oops

  • Linux 4.9
  • arm arch

Linux 内核错误处理

文件 arch/arm/mm/fsr-2level.c

{ do_bad,       SIGSEGV, 0,     "vector exception"         },
{ do_bad,       SIGBUS,  BUS_ADRALN,    "alignment exception"          },
{ do_bad,       SIGKILL, 0,     "terminal exception"           },
{ do_bad,       SIGBUS,  BUS_ADRALN,    "alignment exception"          },
{ do_bad,       SIGBUS,  0,     "external abort on linefetch"      },
{ do_translation_fault, SIGSEGV, SEGV_MAPERR,   "section translation fault"    },
{ do_bad,       SIGBUS,  0,     "external abort on linefetch"      },
{ do_page_fault,    SIGSEGV, SEGV_MAPERR,   "page translation fault"       },
{ do_bad,       SIGBUS,  0,     "external abort on non-linefetch"  },
{ do_bad,       SIGSEGV, SEGV_ACCERR,   "section domain fault"         },
{ do_bad,       SIGBUS,  0,     "external abort on non-linefetch"  },
{ do_bad,       SIGSEGV, SEGV_ACCERR,   "page domain fault"        },
{ do_bad,       SIGBUS,  0,     "external abort on translation"    },
{ do_sect_fault,    SIGSEGV, SEGV_ACCERR,   "section permission fault"     },
{ do_bad,       SIGBUS,  0,     "external abort on translation"    },
{ do_page_fault,    SIGSEGV, SEGV_ACCERR,   "page permission fault"        },
  • do_translation_fault 调用 do_bad_area
  • do_page_fault 调用 __do_user_fault__do_kernel_fault
  • do_sect_fault 调用 do_bad_area
/*                                                                              
 * Oops.  The kernel tried to access some page that wasn't present.             
 */                                                                             
static void                                                                     
__do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,   
          struct pt_regs *regs)                                                 
{                                                                               
    /*                                                                          
     * Are we prepared to handle this kernel fault?                             
     */                                                                         
    if (fixup_exception(regs))                                                  
        return;                                                                 

    /*                                                                          
     * No handler, we'll have to terminate things with extreme prejudice.       
     */                                                                         
    bust_spinlocks(1);                                                          
    pr_alert("Unable to handle kernel %s at virtual address %08lx\n",           
         (addr < PAGE_SIZE) ? "NULL pointer dereference" :                      
         "paging request", addr);                                               

    show_pte(mm, addr);                                                         
    die("Oops", regs, fsr);                                                     
    bust_spinlocks(0);                                                          
    do_exit(SIGKILL);                                                           
}

/*
 * Something tried to access memory that isn't in our memory map..
 * User mode accesses just cause a SIGSEGV
 */
static void
__do_user_fault(struct task_struct *tsk, unsigned long addr,
        unsigned int fsr, unsigned int sig, int code,
        struct pt_regs *regs)
{
    struct siginfo si;

#ifdef CONFIG_DEBUG_USER
    if (((user_debug & UDBG_SEGV) && (sig == SIGSEGV)) ||
        ((user_debug & UDBG_BUS)  && (sig == SIGBUS))) {
        printk(KERN_DEBUG "%s: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n",
               tsk->comm, sig, addr, fsr);
        show_pte(tsk->mm, addr);
        show_regs(regs);
    }
#endif

    tsk->thread.address = addr;
    tsk->thread.error_code = fsr;
    tsk->thread.trap_no = 14;
    si.si_signo = sig;
    si.si_errno = 0;
    si.si_code = code;
    si.si_addr = (void __user *)addr;
    force_sig_info(sig, &si, tsk);
}

void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
    struct task_struct *tsk = current;
    struct mm_struct *mm = tsk->active_mm;

    /*
     * If we are in kernel mode at this point, we
     * have no context to handle this fault with.
     */
    if (user_mode(regs))
        __do_user_fault(tsk, addr, fsr, SIGSEGV, SEGV_MAPERR, regs);
    else
        __do_kernel_fault(mm, addr, fsr, regs);
}

可以看出 __do_kernel_fault 为内核态 oops 输出,__do_user_fault 为用户态输出,同时需要满足

  1. 定义 CONFIG_DEBUG_USER
  2. 设置 user_debug

配置内核

  • Kernel hacking -> Verbose user fault messages
  • user_debug 见文档 kernel-parameters.txt,在 cmdline 中增加 user_debug=31
      user_debug= [KNL,ARM]
              Format: <int>
              See arch/arm/Kconfig.debug help text.
                  1 - undefined instruction events
                  2 - system calls
                  4 - invalid data aborts
                  8 - SIGSEGV faults
                  16 - SIGBUS faults
              Example: user_debug=31
    unsigned int user_debug;
    static int __init user_debug_setup(char *str)
    {
      get_option(&str, &user_debug);
      return 1;
    }
    __setup("user_debug=", user_debug_setup);

修改之后测试会输出调试信息,但不包括栈信息

打印应用栈数据

参考 __do_kernel_fault 中打印栈信息函数

if (!user_mode(regs) || in_interrupt()) {
    dump_mem(KERN_EMERG, "Stack: ", regs->ARM_sp,
         THREAD_SIZE + (unsigned long)task_stack_page(tsk));
    dump_backtrace(regs, tsk);
    dump_instr(KERN_EMERG, regs);
}

通过sp寄存器里存的栈地址,每打印一个栈地址里的32位数据, 栈地址便加4

__do_user_fault 中修改如下

unsigned long ret;
unsigned long val;
int i = 0;
printk("Stack: \n");
while(i < 1024){
    if(copy_from_user(&val, (const void __user *)(regs->ARM_sp + i*4), 4)){
        break;
    }
    i++;
    printk("%08x ", val);
    if(i%8 == 0)
        printk("\n");
}
printk("\n end of stack \n");