记录调试内核模块相关
Linux Kernel Config
Linux内核调试需要打开如下内核配置
- Kernel hacking
- Kernel debugging
- Detect Soft Lockups
- Collect scheduler debugging info
- Compile the kernel with debug info
- Self test for the backtrace code
Linux module prepare
Linux模块需要打开调试信息-g -O0 -ggdb
两个选项分析,用于支持 watch
- -fno-exceptions
- -fstrict-volatile-bitfields
GDB
(cskygdb) help add-symbol-file
Load symbols from FILE, assuming FILE has been dynamically loaded.
Usage: add-symbol-file FILE ADDR [-s <SECT> <SECT_ADDR> -s <SECT> <SECT_ADDR> ...]
ADDR is the starting address of the file's text.
The optional arguments are section-name section-address pairs and
should be specified if the data and bss segments are not contiguous
with the text. SECT is a section name to be loaded at SECT_ADDR.
加载地址是ko的text段位置,不是ko位置,计算加载位置如下
$ lsmod | grep xx
xx 1495536 0 - Live 0xc016d000
$ objdump --section-headers xx.o | grep text
0 .text 000003d0 00000000 00000000 00000034 2**2
因此,在gdb
中加载符号如下:
(gdb) add-symbol-file /path/to/xx.ko 0xc016d034
add symbol table from file "path/to/xx.ko" at
.text_addr = 0xc016d034
(y or n) y
Reading symbols from /path/to/xx/ko..done
如果需要调试静态符号,需要加载 .bss
段
(gdb) add-symbol-file /path/to/xx.ko 0xc016d034 -s .bss 0xc016e034
add symbol table from file "path/to/xx.ko" at
.text_addr = 0xc016d034
.bss_addr = 0xc016e034
(y or n) y
Reading symbols from /path/to/xx/ko..done
Linux modules
ko
加载时触发 module_init(xx_module_init)
,卸载时触发 module_exit(xx_module_exit)
static int __init xx_module_init(void)
{
dev_t dev_id;
int ret = -1, i;
//执行sub模块init
device_count = xx_init();
if (device_count < 0)
return device_count;
dev_id = MKDEV(XX_MAJOR, 0);
if ((ret = register_chrdev_region(dev_id, device_count, XX_DEVICE_NAME)) != 0) {
printk(KERN_ERR "xx-core: unable to get major %d\n", XX_MAJOR);
xx_devices_cleanup();
return ret;
}
cdev_init(&cdev, &xx_fops);
if ((ret = cdev_add(&cdev, dev_id, device_count)) != 0) {
printk(KERN_ERR "xx-core: unable register character device\n");
goto error;
}
xx_class = class_create(THIS_MODULE, XX_CLASS_NAME);
if (IS_ERR(xx_class)) {
ret = PTR_ERR(xx_class);
goto error;
}
for (i = 0; i < device_count; i++) {
CLASS_DEV_CREATE(xx_class, NULL, MKDEV(XX_MAJOR, i), i);
}
xx_create_proc_entries();
printk(KERN_EMERG "%s: ok! \n", __FUNCTION__);
return 0;
error:
cdev_del(&cdev);
unregister_chrdev_region(dev_id, device_count);
xx_devices_cleanup();
return ret;
}
static void __exit xx_module_exit(void)
{
int i;
printk(KERN_EMERG "%s: av_devices.count = %d\n", __FUNCTION__, device_count);
for (i = 0; i < device_count; i++) {
CLASS_DEV_DESTROY(xx_class, MKDEV(XX_MAJOR, i));
}
xx_devices_cleanup();
class_destroy(xx_class);
cdev_del(&cdev);
unregister_chrdev_region(MKDEV(XX_MAJOR, 0), device_count);
printk(KERN_EMERG "%s: ok! \n", __FUNCTION__);
}
void xx_devices_cleanup(void)
{
struct xx_device *pos = device_list;
XX_ASSERT(pos != NULL);
do {
//uninit主要执行sub模块close和cleanup
device_uninit(pos);
device_unregister(pos);
pos = device_list;
} while (device_list);
}
problem
出问题代码
xx_open()
{
//something
if(xx.thread == NULL) {
xx.ops = &xx_ops;
__xx_init(a, b);
}
}
xx_close()
{
//something
if(xx.thread != NULL) {
__xx_stop(a, b);
__xx_exit(xx);
}
}
在 xx_open
xx_close
上层的封装中并不是调用 open
close
就会触发底层驱动。
而是有打开次数来决定的,如果open
时当前模块没有打开才会调用xx_open
,
因此在上述代码中,在某些情况下,其他模块(例如demux1)触发了xx_close
,
从而将xx
结构体清零,而模块(demux0)并没有关闭,
当操作demux0时,调用open
不会起作用,此时 xx
为空导致死机
修改为,将xx
相关信息放到 xx_init
和 xx_cleanup
中,这样 xx_open
和 xx_close
对 xx
无影响