variable-argument-lists in AARCH64

Table of Contents

Variadic functions

定义: https://en.cppreference.com/w/c/variadic
这类函数,的特点在于,参数的个数是不固定的,具体有多少个参数,实际上是通过调用者来决定的。被调用的函数是不知道具体会传递多少个参数.

资料

AARCH64 APCS规范中已经对可变参数函数做了详细的说明,下面就以一个实际的例子来看一下,具体的过程

通过资料,我们大概可以了解到,可变参数函数在使用时,需要使用va_list的这样一个结构体,函数在调用时,通过一系列的宏来准备,获取参数.
下边是va_arg的伪代码

type va_arg (va_list ap, type)
{
    int nreg, offs;
    if (type passed in general registers) {
        offs = ap.__gr_offs;
        if (offs >= 0)
            goto on_stack;              // reg save area empty
        if (alignof(type) > 8)
            offs = (offs + 15) & -16;   // round up
        nreg = (sizeof(type) + 7) / 8;
        ap.__gr_offs = offs + (nreg * 8);
        if (ap.__gr_offs > 0)
            goto on_stack;              // overflowed reg save area
#ifdef BIG_ENDIAN
        if (classof(type) != "aggregate" && sizeof(type) < 8)
            offs += 8 - sizeof(type);
#endif
        return *(type *)(ap.__gr_top + offs);
    } else if (type is an HFA or an HVA) {
        type ha;       // treat as "struct {ftype field[n];}"
        offs = ap.__vr_offs;
        if (offs >= 0)
            goto on_stack;              // reg save area empty
        nreg = sizeof(type) / sizeof(ftype);
        ap.__vr_offs = offs + (nreg * 16);
        if (ap.__vr_offs > 0)
            goto on_stack;              // overflowed reg save area
#ifdef BIG_ENDIAN
        if (sizeof(ftype) < 16)
            offs += 16 - sizeof(ftype);
#endif
        for (i = 0; i < nreg; i++, offs += 16)
            ha.field[i] = *((ftype *)(ap.__vr_top + offs));
        return ha;
    } else if (type passed in fp/simd registers) {
        offs = ap.__vr_offs;
        if (offs >= 0)
            goto on_stack;              // reg save area empty
        nreg = (sizeof(type) + 15) / 16;
        ap.__vr_offs = offs + (nreg * 16);
        if (ap.__vr_offs > 0)
            goto on_stack;              // overflowed reg save area
#ifdef BIG_ENDIAN
        if (classof(type) != "aggregate" && sizeof(type) < 16)
            offs += 16 - sizeof(type);
#endif
        return *(type *)(ap.__vr_top + offs);
    }
on_stack:
    intptr_t arg = ap.__stack;
    if (alignof(type) > 8)
        arg = (arg + 15) & -16;
    ap.__stack = (void *)((arg + sizeof(type) + 7) & -8);
#ifdef BIG_ENDIAN
    if (classof(type) != "aggregate" && sizeof(type) < 8)
        arg += 8 - sizeof(type);
#endif
    return *(type *)arg;
}

例子

本文章使用Linux Kernel中的printk为研究对象

< 7个参数, 以两个参数为例

Caller代码

__mdiobus_register
     bus->state = MDIOBUS_REGISTERED;
     pr_info("%s: probed\n", bus->name);

Caller disassemble

下面就是调用的地方,从汇编代码上来看,这个地方并没有什么特别之处,这个东西在调用之前,x0为格式化输出字符串,x1是bus->name的参数,和普通的参数传递方法完全一致.

     419
     420             bus->state = MDIOBUS_REGISTERED;
     0xffff0000086bb8d0 <+432>:   ldr     x1, [x20, #8]
         0xffff0000086bb8d4 <+436>:   mov     w0, #0x2                        // #2
         0xffff0000086bb8d8 <+440>:   str     w0, [x20, #152]

         421             pr_info("%s: probed\n", bus->name);
     0xffff0000086bb8dc <+444>:   adrp    x0, 0xffff000008e97000
         0xffff0000086bb8e0 <+448>:   add     x0, x0, #0xaf0
         0xffff0000086bb8e4 <+452>:   bl      0xffff000008127454 <printk>
    (gdb) x/s $x0
0xffff000008e97af0:     "\001\066libphy: %s: probed\n"
(gdb) x/s $x1
0xffff000008e98080:     "Fixed MDIO Bus"
(gdb)

上面例子是内核中__mdiobus_register函数的片段

Callee disassemble

(gdb) disassemble /s
Dump of assembler code for function printk:
kernel/printk/printk.c:
1990    {
   => 0xffff000008127454 <+0>:     stp     x29, x30, [sp, #-176]!                     sp = 0xffff00000805bcf0
   0xffff000008127458 <+4>:     mov     w8, #0xffffffc8                 // #-56       sp = 0xFFFF00000805bc40
   0xffff00000812745c <+8>:     mov     x29, sp                                       x29 = sp, fp = 0xFFFF00000805bc40
   0xffff000008127460 <+12>:    add     x9, sp, #0x70                                 x9= 0xFFFF00000805bcb0
   0xffff000008127464 <+16>:    add     x10, sp, #0xb0                                x10 = 0xffff00000805bcf0  i.e. stack_top
   0xffff000008127468 <+20>:    str     x19, [sp, #16]                                backup x19 to stack Local Variables
   0xffff00000812746c <+24>:    adrp    x19, 0xffff0000092b9000 <page_wait_table+5376>
   0xffff000008127470 <+28>:    add     x19, x19, #0x6c8                              x19 = address of __stack_chk_guard
   0xffff000008127474 <+32>:    stp     x10, x10, [sp, #72]                           prepare va_list args
   0xffff000008127478 <+36>:    str     x9, [sp, #88]                                 save x9 to 0xFFFF00000805BC98
   0xffff00000812747c <+40>:    ldr     x9, [x19]                                     load stack guard magic value to x9.
   0xffff000008127480 <+44>:    str     x9, [sp, #104]                                store stack guard magic value to stack on 0xFFFF00000805BCA8.
   0xffff000008127484 <+48>:    mov     x9, #0x0                        // #0
   0xffff000008127488 <+52>:    stp     w8, wzr, [sp, #96]                           设置初始的__gr_offs
   0xffff00000812748c <+56>:    ldp     x8, x9, [sp, #72]
   0xffff000008127490 <+60>:    stp     x8, x9, [sp, #32]
   0xffff000008127494 <+64>:    ldp     x8, x9, [sp, #88]
   0xffff000008127498 <+68>:    stp     x1, x2, [sp, #120]                           将x1 ~ x7放入GP Arg Save Area的区域
   0xffff00000812749c <+72>:    add     x1, sp, #0x20
   0xffff0000081274a0 <+76>:    stp     x8, x9, [sp, #48]
   0xffff0000081274a4 <+80>:    stp     x3, x4, [sp, #136]
   0xffff0000081274a8 <+84>:    stp     x5, x6, [sp, #152]
   0xffff0000081274ac <+88>:    str     x7, [sp, #168]                               已经将GP Arg Save Area设置好,va_list args参数已经处理好了。
(gdb) i r pc
pc             0xffff0000081274b0  0xffff0000081274b0 <printk+92>
(gdb) p args
$6 = {
  __stack = 0xffff00000805bcf0,
  __gr_top = 0xffff00000805bcf0,
  __vr_top = 0xffff00000805bcb0,
  __gr_offs = -56,
  __vr_offs = 0
}
   0xffff0000081274b0 <+92>:    bl      0xffff000008127e28 <vprintk_func>

1996            va_end(args);
1997
1998            return r;
   0xffff0000081274b4 <+96>:    ldr     x2, [sp, #104]
   0xffff0000081274b8 <+100>:   ldr     x1, [x19]
   0xffff0000081274bc <+104>:   eor     x1, x2, x1
   0xffff0000081274c0 <+108>:   cbz     x1, 0xffff0000081274c8 <printk+116>
   0xffff0000081274c4 <+112>:   bl      0xffff0000080d3d48 <__stack_chk_fail>
   0xffff0000081274c8 <+116>:   ldr     x19, [sp, #16]
   0xffff0000081274cc <+120>:   ldp     x29, x30, [sp], #176
   0xffff0000081274d0 <+124>:   ret
End of assembler dump.

从上面的例子中,可以看出来,在参数小于7个时,编译器会将参数全部存入GP Arg Save Area的区域,并传递给vprintk_func来处理.
下边的函数通过va_list args这个参数机可以遍历所有x1 ~ x7的参数了。

参数个数大于7

同样的,取内核中的例子:

Caller 代码

void __init mem_init_print_info(const char *str)
{
    pr_info("Memory: %luK/%luK available (%luK kernel code, %luK rwdata, %luK rodata, %luK init, %luK bss, %luK reserved, %luK cma-reserved"
#ifdef  CONFIG_HIGHMEM
        ", %luK highmem"
#endif
        "%s%s)\n",
        nr_free_pages() << (PAGE_SHIFT - 10),
        physpages << (PAGE_SHIFT - 10),
        codesize >> 10, datasize >> 10, rosize >> 10,
        (init_data_size + init_code_size) >> 10, bss_size >> 10,
        (physpages - totalram_pages - totalcma_pages) << (PAGE_SHIFT - 10),
        totalcma_pages << (PAGE_SHIFT - 10),
#ifdef  CONFIG_HIGHMEM
        totalhigh_pages << (PAGE_SHIFT - 10),
#endif
        str ? ", " : "", str ? str : "");
}

这段内存初始化时的打印,传递了非常多的参数,就以这个作为研究对象吧

Callee va_list

上面的例子中已经简单的分析过printk的汇编代码,这里不再赘述,直接去看va_list的值

#0  0xffff0000081274b0 in printk (fmt=0xffff000008e04b68 "\001\066Memory: %luK/%luK available (%luK kernel code, %luK rwdata, %luK rodata, %luK init, %luK bss, %luK reserved, %luK cma-reserved%s%s)\n") at kernel/printk/printk.c:1995
        args = {
          __stack = 0xffff0000092b3f00,
          __gr_top = 0xffff0000092b3f00,
          __vr_top = 0xffff0000092b3ec0,
          __gr_offs = -56,
          __vr_offs = 0
        }
        r = <optimized out>
#1  0xffff000009196994 in mem_init_print_info (str=0xffff000008e0e090 "") at ./include/linux/vmstat.h:184
        physpages = <optimized out>
        codesize = <optimized out>
        datasize = <optimized out>
        rosize = <optimized out>
        bss_size = <optimized out>
        init_code_size = <optimized out>
        init_data_size = <optimized out>
#2  0xffff000009185e9c in mem_init () at arch/arm64/mm/init.c:608
No locals.
#3  0xffff000009180b90 in mm_init () at init/main.c:520
No locals.
#4  start_kernel () at init/main.c:590
        command_line = 0xffff000009226028 <boot_command_line> "root=/dev/ram0 console=ttyAMA0"
        after_dashes = <optimized out>
#5  0x0000000000000000 in ?? ()
(gdb) i r pc
pc             0xffff0000081274b0  0xffff0000081274b0 <printk+92>
(gdb) x/16gx args.__stack
0xffff0000092b3f00:     0x000000000000c848      0x0000000000008000 分别是第8和第9个参数
0xffff0000092b3f10:     0xffff000008e0e090      0xffff000008e0e090 第10和第11个参数
0xffff0000092b3f20:     0xffff0000092b3f70      0xffff000009185e9c
0xffff0000092b3f30:     0xffff000009407000      0xffff0000092d55b8
0xffff0000092b3f40:     0xffff0000092b96c8      0xffff0000092b96c0
0xffff0000092b3f50:     0xffff800005fffe00      0xffff000009226028
0xffff0000092b3f60:     0x0000000000000000      0x07bd33f332224500
0xffff0000092b3f70:     0xffff0000092b3fa0      0xffff000009180b90
(gdb) x/s 0xffff000008e0e090
0xffff000008e0e090:     ""

从上边准备的va_list可以看到,在传递多余7个参数时,参数存放的位址还是和正常的函数调用一样,存放在Caller的Stack Arg Area中
打印出来的log

[    0.000000] Kernel command line: root=/dev/ram0 console=ttyAMA0
[    0.000000] Memory: 47032K/131072K available (10236K kernel code, 1352K rwdata, 7112K rodata, 1216K init, 379K bss, 51272K reserved, 32768K cma-reserved)
[    0.000000] SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=2, Nodes=1
[    0.000000] Preemptible hierarchical RCU implementation.
[    0.000000]  RCU restricting CPUs from NR_CPUS=64 to nr_cpu_ids=2.
[    0.000000]  Tasks RCU enabled.

从log可以看出,这里最后的4个参数完全一致.

refs

Contact me via :)
虚怀乃若谷,水深则流缓。