/* * Was the fault on kernel-controlled part of the address space? * 检查 address 来判断地址属于内核态还是用户态 */ if (unlikely(fault_in_kernel_space(address))) /* 处理内核态的缺页中断 */ do_kern_addr_fault(regs, hw_error_code, address); else /* 处理用户态的缺页中断 */ do_user_addr_fault(regs, hw_error_code, address); } NOKPROBE_SYMBOL(__do_page_fault);
/* Handle faults in the user portion of the address space */ staticinline voiddo_user_addr_fault(struct pt_regs *regs, unsignedlong hw_error_code, unsignedlong address) { tsk = current; mm = tsk->mm; /* 获取该进程的内存描述符 mm */ /* ... */ /* * hw_error_code is literally the "page fault error code" passed to * the kernel directly from the hardware. But, we will shortly be * modifying it in software, so give it a new name. */ sw_error_code = hw_error_code;
/* ... */
vma = find_vma(mm, address); /* 通过 address 在内存描述符 mm 中查找 vma */ if (unlikely(!vma)) { /* 假如不存在 */ bad_area(regs, sw_error_code, address); /* 访问非法地址 */ return; } if (likely(vma->vm_start <= address)) /* 访问合法地址,跳转至 good_area */ goto good_area; if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) { bad_area(regs, sw_error_code, address); return; } if (sw_error_code & X86_PF_USER) { /* * Accessing the stack below %sp is always a bug. * The large cushion allows instructions like enter * and pusha to work. ("enter $65535, $31" pushes * 32 pointers and then decrements %sp by 65535.) */ /* 访问了越界的栈空间 */ if (unlikely(address + 65536 + 32 * sizeof(unsignedlong) < regs->sp)) { bad_area(regs, sw_error_code, address); return; } } if (unlikely(expand_stack(vma, address))) { bad_area(regs, sw_error_code, address); return; }
/* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo * the fault. Since we never set FAULT_FLAG_RETRY_NOWAIT, if * we get VM_FAULT_RETRY back, the mmap_sem has been unlocked. * * Note that handle_userfault() may also release and reacquire mmap_sem * (and not return with VM_FAULT_RETRY), when returning to userland to * repeat the page fault later with a VM_FAULT_NOPAGE retval * (potentially after handling any pending signal during the return to * userland). The return to userland is identified whenever * FAULT_FLAG_USER|FAULT_FLAG_KILLABLE are both set in flags. */ fault = handle_mm_fault(vma, address, flags); /* 处理缺页的具体实现 */ major |= fault & VM_FAULT_MAJOR;
if (unlikely(pmd_none(*vmf->pmd))) { /* 页中间目录不存在,即页表也为空 */ vmf->pte = NULL; } else { /* ... */ /* 页中间目录存在,通过 address 尝试获取页表(Page Table) */ vmf->pte = pte_offset_map(vmf->pmd, vmf->address); vmf->orig_pte = *vmf->pte; /* * some architectures can have larger ptes than wordsize, * e.g.ppc44x-defconfig has CONFIG_PTE_64BIT=y and * CONFIG_32BIT=y, so READ_ONCE cannot guarantee atomic * accesses. The code below just needs a consistent view * for the ifs and we later double check anyway with the * ptl lock held. So here a barrier will do. */ barrier(); if (pte_none(vmf->orig_pte)) { /* 假如页中间目录存在,但页表不存在, vmf->pte置为NULL */ pte_unmap(vmf->pte); vmf->pte = NULL; } }