/* * This struct defines a memory VMM memory area. There is one of these * per VM-area/task. A VM area is any part of the process virtual memory * space that has a special rule for the page-fault handlers (ie a shared * library, the executable area etc). */ structvm_area_struct { /* The first cache line has the info for VMA tree walking. */
/* linked list of VM areas per task, sorted by address */ structvm_area_struct *vm_next, *vm_prev;/* 虚拟内存区域链表中的前继,后继指针 */ structrb_nodevm_rb;
/* * Largest free memory gap in bytes to the left of this VMA. * Either between this VMA and vma->vm_prev, or between one of the * VMAs below us in the VMA rbtree and its ->vm_prev. This helps * get_unmapped_area find a free area of the right size. */ unsignedlong rb_subtree_gap;
/* Second cache line starts here. */
/* Function pointers to deal with this struct. */ conststructvm_operations_struct *vm_ops;/* 虚拟内存操作集合 */ structmm_struct *vm_mm;/* vma所属的虚拟地址空间 */ pgprot_t vm_page_prot; /* Access permissions of this VMA. */ unsignedlong vm_flags; /* Flags, see mm.h. */ unsignedlong vm_pgoff; /* 以Page为单位的偏移. */ structfile * vm_file;/* 映射的文件,匿名映射即为nullptr*/
/* * Does the application expect PROT_READ to imply PROT_EXEC? * * (the exception is when the underlying filesystem is noexec * mounted, in which case we dont add PROT_EXEC.) */ if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC)) if (!(file && path_noexec(&file->f_path))) prot |= PROT_EXEC; /* force arch specific MAP_FIXED handling in get_unmapped_area */ if (flags & MAP_FIXED_NOREPLACE) flags |= MAP_FIXED; /* 假如没有设置MAP_FIXED标志,且addr小于mmap_min_addr, 因为可以修改addr, 所以就需要将addr设为mmap_min_addr的页对齐后的地址 */ if (!(flags & MAP_FIXED)) addr = round_hint_to_min(addr);
/* Careful about overflows.. */ /* 进行Page大小的对齐 */ len = PAGE_ALIGN(len); if (!len) return -ENOMEM;
/* Too many mappings? */ /* 判断该进程的地址空间的虚拟区间数量是否超过了限制 */ if (mm->map_count > sysctl_max_map_count) return -ENOMEM;
/* Obtain the address to map to. we verify (or select) it and ensure * that it represents a valid section of the address space. */ /* get_unmapped_area从当前进程的用户空间获取一个未被映射区间的起始地址 */ addr = get_unmapped_area(file, addr, len, pgoff, flags); /* 检查addr是否有效 */ if (offset_in_page(addr)) return addr;
if (prot == PROT_EXEC) { pkey = execute_only_pkey(mm); if (pkey < 0) pkey = 0; }
/* Do simple checking here so the lower-level routines won't have * to. we assume access permissions have been handled by the open * of the memory object, so we don't do any here. */ vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) | mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; /* 假如flags设置MAP_LOCKED,即类似于mlock()将申请的地址空间锁定在内存中, 检查是否可以进行lock*/ if (flags & MAP_LOCKED) if (!can_do_mlock()) return -EPERM;
/* ... 根据标志指定的map种类,把为文件设置的访问权考虑进去。 如果所请求的内存映射是共享可写的,就要检查要映射的文件是为写入而打开的,而不 是以追加模式打开的,还要检查文件上没有上强制锁。 对于任何种类的内存映射,都要检查文件是否为读操作而打开的。 ... */ } else { switch (flags & MAP_TYPE) { case MAP_SHARED: if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP)) return -EINVAL; /* * Ignore pgoff. */ pgoff = 0; vm_flags |= VM_SHARED | VM_MAYSHARE; break; case MAP_PRIVATE: /* * Set pgoff according to addr for anon_vma. */ pgoff = addr >> PAGE_SHIFT; break; default: return -EINVAL; } } /* * Set 'VM_NORESERVE' if we should not account for the * memory use of this mapping. */ if (flags & MAP_NORESERVE) { /* We honor MAP_NORESERVE if allowed to overcommit */ if (sysctl_overcommit_memory != OVERCOMMIT_NEVER) vm_flags |= VM_NORESERVE;
/* Check against address space limit. */ /* 检查申请的虚拟内存空间是否超过了限制. */ if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) { unsignedlong nr_pages;
/* * MAP_FIXED may remove pages of mappings that intersects with * requested mapping. Account for the pages it would unmap. */ nr_pages = count_vma_pages_range(mm, addr, addr + len);
/* * Determine the object being mapped and call the appropriate * specific mapper. the address has already been validated, but * not unmapped, but the maps are removed from the list. */ /* 如果不能和已有的虚拟内存区域合并,通过 Memory Descriptor 来申请一个 vma */ vma = vm_area_alloc(mm); if (!vma) { error = -ENOMEM; goto unacct_error; } /* 初始化 vma */ vma->vm_start = addr; vma->vm_end = addr + len; vma->vm_flags = vm_flags; vma->vm_page_prot = vm_get_page_prot(vm_flags); vma->vm_pgoff = pgoff;
if (file) { /* 假如指定了文件映射 */ if (vm_flags & VM_DENYWRITE) { /* 映射的文件不允许写入,调用 deny_write_accsess(file) 排斥常规的文件操作 */ error = deny_write_access(file); if (error) goto free_vma; } if (vm_flags & VM_SHARED) { /* 映射的文件允许其他进程可见, 标记文件为可写 */ error = mapping_map_writable(file->f_mapping); if (error) goto allow_write_and_free_vma; }
/* ->mmap() can change vma->vm_file, but must guarantee that * vma_link() below can deny write-access if VM_DENYWRITE is set * and map writably if VM_SHARED is set. This usually means the * new file must not have been exposed to user-space, yet. */ vma->vm_file = get_file(file); /* 递增 File 的引用次数,返回 File 赋给 vma */ error = call_mmap(file, vma); /* 调用文件系统指定的 mmap 函数,后面会介绍 */ if (error) goto unmap_and_free_vma;
/* Can addr have changed?? * * Answer: Yes, several device drivers can do it in their * f_op->mmap method. -DaveM * Bug: If addr is changed, prev, rb_link, rb_parent should * be updated for vma_link() */ WARN_ON_ONCE(addr != vma->vm_start);
/* * New (or expanded) vma always get soft dirty status. * Otherwise user-space soft-dirty page tracker won't * be able to distinguish situation when vma area unmapped, * then new mapped in-place (which must be aimed as * a completely new data area). */ vma->vm_flags |= VM_SOFTDIRTY;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) return -EIO;
/* * We don't support synchronous mappings for non-DAX files. At least * until someone comes with a sensible use case. */ if (!IS_DAX(file_inode(file)) && (vma->vm_flags & VM_SYNC)) return -EOPNOTSUPP;