1
0
Files
kernel-49/mm/pagewalk.c
Greg Kroah-Hartman 33b88b6e30 Merge 4.9.238 into android-4.9-q
Changes in 4.9.238
	af_key: pfkey_dump needs parameter validation
	KVM: fix memory leak in kvm_io_bus_unregister_dev()
	kprobes: fix kill kprobe which has been marked as gone
	RDMA/ucma: ucma_context reference leak in error path
	mtd: Fix comparison in map_word_andequal()
	hdlc_ppp: add range checks in ppp_cp_parse_cr()
	ip: fix tos reflection in ack and reset packets
	tipc: use skb_unshare() instead in tipc_buf_append()
	bnxt_en: Protect bnxt_set_eee() and bnxt_set_pauseparam() with mutex.
	net: phy: Avoid NPD upon phy_detach() when driver is unbound
	net/hsr: Check skb_put_padto() return value
	net: add __must_check to skb_put_padto()
	serial: 8250: Avoid error message on reprobe
	scsi: aacraid: fix illegal IO beyond last LBA
	m68k: q40: Fix info-leak in rtc_ioctl
	gma/gma500: fix a memory disclosure bug due to uninitialized bytes
	ASoC: kirkwood: fix IRQ error handling
	ALSA: usb-audio: Add delay quirk for H570e USB headsets
	PM / devfreq: tegra30: Fix integer overflow on CPU's freq max out
	clk/ti/adpll: allocate room for terminating null
	mtd: cfi_cmdset_0002: don't free cfi->cfiq in error path of cfi_amdstd_setup()
	mfd: mfd-core: Protect against NULL call-back function pointer
	tracing: Adding NULL checks for trace_array descriptor pointer
	bcache: fix a lost wake-up problem caused by mca_cannibalize_lock
	RDMA/i40iw: Fix potential use after free
	xfs: fix attr leaf header freemap.size underflow
	RDMA/iw_cgxb4: Fix an error handling path in 'c4iw_connect()'
	debugfs: Fix !DEBUG_FS debugfs_create_automount
	CIFS: Properly process SMB3 lease breaks
	kernel/sys.c: avoid copying possible padding bytes in copy_to_user
	neigh_stat_seq_next() should increase position index
	rt_cpu_seq_next should increase position index
	seqlock: Require WRITE_ONCE surrounding raw_seqcount_barrier
	media: ti-vpe: cal: Restrict DMA to avoid memory corruption
	ACPI: EC: Reference count query handlers under lock
	dmaengine: zynqmp_dma: fix burst length configuration
	tracing: Set kernel_stack's caller size properly
	ar5523: Add USB ID of SMCWUSBT-G2 wireless adapter
	Bluetooth: Fix refcount use-after-free issue
	mm: pagewalk: fix termination condition in walk_pte_range()
	Bluetooth: prefetch channel before killing sock
	KVM: fix overflow of zero page refcount with ksm running
	ALSA: hda: Clear RIRB status before reading WP
	skbuff: fix a data race in skb_queue_len()
	audit: CONFIG_CHANGE don't log internal bookkeeping as an event
	selinux: sel_avc_get_stat_idx should increase position index
	scsi: lpfc: Fix RQ buffer leakage when no IOCBs available
	scsi: lpfc: Fix coverity errors in fmdi attribute handling
	drm/omap: fix possible object reference leak
	RDMA/rxe: Fix configuration of atomic queue pair attributes
	KVM: x86: fix incorrect comparison in trace event
	x86/pkeys: Add check for pkey "overflow"
	bpf: Remove recursion prevention from rcu free callback
	dmaengine: tegra-apb: Prevent race conditions on channel's freeing
	media: go7007: Fix URB type for interrupt handling
	Bluetooth: guard against controllers sending zero'd events
	timekeeping: Prevent 32bit truncation in scale64_check_overflow()
	drm/amdgpu: increase atombios cmd timeout
	Bluetooth: L2CAP: handle l2cap config request during open state
	media: tda10071: fix unsigned sign extension overflow
	xfs: don't ever return a stale pointer from __xfs_dir3_free_read
	tpm: ibmvtpm: Wait for buffer to be set before proceeding
	tracing: Use address-of operator on section symbols
	serial: 8250_port: Don't service RX FIFO if throttled
	serial: 8250_omap: Fix sleeping function called from invalid context during probe
	serial: 8250: 8250_omap: Terminate DMA before pushing data on RX timeout
	cpufreq: powernv: Fix frame-size-overflow in powernv_cpufreq_work_fn
	tools: gpio-hammer: Avoid potential overflow in main
	SUNRPC: Fix a potential buffer overflow in 'svc_print_xprts()'
	svcrdma: Fix leak of transport addresses
	ubifs: Fix out-of-bounds memory access caused by abnormal value of node_len
	ALSA: usb-audio: Fix case when USB MIDI interface has more than one extra endpoint descriptor
	mm/filemap.c: clear page error before actual read
	mm/mmap.c: initialize align_offset explicitly for vm_unmapped_area
	KVM: Remove CREATE_IRQCHIP/SET_PIT2 race
	bdev: Reduce time holding bd_mutex in sync in blkdev_close()
	drivers: char: tlclk.c: Avoid data race between init and interrupt handler
	dt-bindings: sound: wm8994: Correct required supplies based on actual implementaion
	atm: fix a memory leak of vcc->user_back
	phy: samsung: s5pv210-usb2: Add delay after reset
	Bluetooth: Handle Inquiry Cancel error after Inquiry Complete
	USB: EHCI: ehci-mv: fix error handling in mv_ehci_probe()
	tty: serial: samsung: Correct clock selection logic
	ALSA: hda: Fix potential race in unsol event handler
	fuse: don't check refcount after stealing page
	USB: EHCI: ehci-mv: fix less than zero comparison of an unsigned int
	e1000: Do not perform reset in reset_task if we are already down
	printk: handle blank console arguments passed in.
	btrfs: don't force read-only after error in drop snapshot
	vfio/pci: fix memory leaks of eventfd ctx
	perf util: Fix memory leak of prefix_if_not_in
	perf kcore_copy: Fix module map when there are no modules loaded
	mtd: rawnand: omap_elm: Fix runtime PM imbalance on error
	ceph: fix potential race in ceph_check_caps
	mtd: parser: cmdline: Support MTD names containing one or more colons
	x86/speculation/mds: Mark mds_user_clear_cpu_buffers() __always_inline
	vfio/pci: Clear error and request eventfd ctx after releasing
	cifs: Fix double add page to memcg when cifs_readpages
	selftests/x86/syscall_nt: Clear weird flags after each test
	vfio/pci: fix racy on error and request eventfd ctx
	s390/init: add missing __init annotations
	i2c: core: Call i2c_acpi_install_space_handler() before i2c_acpi_register_devices()
	objtool: Fix noreturn detection for ignored functions
	ieee802154/adf7242: check status of adf7242_read_reg
	clocksource/drivers/h8300_timer8: Fix wrong return value in h8300_8timer_init()
	mwifiex: Increase AES key storage size to 256 bits
	batman-adv: bla: fix type misuse for backbone_gw hash indexing
	atm: eni: fix the missed pci_disable_device() for eni_init_one()
	batman-adv: mcast/TT: fix wrongly dropped or rerouted packets
	mac802154: tx: fix use-after-free
	batman-adv: Add missing include for in_interrupt()
	batman-adv: mcast: fix duplicate mcast packets in BLA backbone from mesh
	ALSA: asihpi: fix iounmap in error handler
	MIPS: Add the missing 'CPU_1074K' into __get_cpu_type()
	kprobes: Fix to check probe enabled before disarm_kprobe_ftrace()
	lib/string.c: implement stpcpy
	ata: define AC_ERR_OK
	ata: make qc_prep return ata_completion_errors
	ata: sata_mv, avoid trigerrable BUG_ON
	Linux 4.9.238

Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Change-Id: I799877db3bc49e473bbc023ab948cd241755beff
2020-11-10 11:34:37 +03:00

309 lines
7.6 KiB
C

#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/sched.h>
#include <linux/hugetlb.h>
static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
pte_t *pte;
int err = 0;
pte = pte_offset_map(pmd, addr);
for (;;) {
err = walk->pte_entry(pte, addr, addr + PAGE_SIZE, walk);
if (err)
break;
if (addr >= end - PAGE_SIZE)
break;
addr += PAGE_SIZE;
pte++;
}
pte_unmap(pte);
return err;
}
static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
pmd_t *pmd;
unsigned long next;
int err = 0;
pmd = pmd_offset(pud, addr);
do {
again:
next = pmd_addr_end(addr, end);
if (pmd_none(*pmd) || !walk->vma) {
if (walk->pte_hole)
err = walk->pte_hole(addr, next, walk);
if (err)
break;
continue;
}
/*
* This implies that each ->pmd_entry() handler
* needs to know about pmd_trans_huge() pmds
*/
if (walk->pmd_entry)
err = walk->pmd_entry(pmd, addr, next, walk);
if (err)
break;
/*
* Check this here so we only break down trans_huge
* pages when we _need_ to
*/
if (!walk->pte_entry)
continue;
split_huge_pmd(walk->vma, pmd, addr);
if (pmd_trans_unstable(pmd))
goto again;
err = walk_pte_range(pmd, addr, next, walk);
if (err)
break;
} while (pmd++, addr = next, addr != end);
return err;
}
static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
pud_t *pud;
unsigned long next;
int err = 0;
pud = pud_offset(pgd, addr);
do {
next = pud_addr_end(addr, end);
if (pud_none_or_clear_bad(pud)) {
if (walk->pte_hole)
err = walk->pte_hole(addr, next, walk);
if (err)
break;
continue;
}
if (walk->pmd_entry || walk->pte_entry)
err = walk_pmd_range(pud, addr, next, walk);
if (err)
break;
} while (pud++, addr = next, addr != end);
return err;
}
static int walk_pgd_range(unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
pgd_t *pgd;
unsigned long next;
int err = 0;
pgd = pgd_offset(walk->mm, addr);
do {
next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(pgd)) {
if (walk->pte_hole)
err = walk->pte_hole(addr, next, walk);
if (err)
break;
continue;
}
if (walk->pmd_entry || walk->pte_entry)
err = walk_pud_range(pgd, addr, next, walk);
if (err)
break;
} while (pgd++, addr = next, addr != end);
return err;
}
#ifdef CONFIG_HUGETLB_PAGE
static unsigned long hugetlb_entry_end(struct hstate *h, unsigned long addr,
unsigned long end)
{
unsigned long boundary = (addr & huge_page_mask(h)) + huge_page_size(h);
return boundary < end ? boundary : end;
}
static int walk_hugetlb_range(unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
struct vm_area_struct *vma = walk->vma;
struct hstate *h = hstate_vma(vma);
unsigned long next;
unsigned long hmask = huge_page_mask(h);
pte_t *pte;
int err = 0;
do {
next = hugetlb_entry_end(h, addr, end);
pte = huge_pte_offset(walk->mm, addr & hmask);
if (pte)
err = walk->hugetlb_entry(pte, hmask, addr, next, walk);
else if (walk->pte_hole)
err = walk->pte_hole(addr, next, walk);
if (err)
break;
} while (addr = next, addr != end);
return err;
}
#else /* CONFIG_HUGETLB_PAGE */
static int walk_hugetlb_range(unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
return 0;
}
#endif /* CONFIG_HUGETLB_PAGE */
/*
* Decide whether we really walk over the current vma on [@start, @end)
* or skip it via the returned value. Return 0 if we do walk over the
* current vma, and return 1 if we skip the vma. Negative values means
* error, where we abort the current walk.
*/
static int walk_page_test(unsigned long start, unsigned long end,
struct mm_walk *walk)
{
struct vm_area_struct *vma = walk->vma;
if (walk->test_walk)
return walk->test_walk(start, end, walk);
/*
* vma(VM_PFNMAP) doesn't have any valid struct pages behind VM_PFNMAP
* range, so we don't walk over it as we do for normal vmas. However,
* Some callers are interested in handling hole range and they don't
* want to just ignore any single address range. Such users certainly
* define their ->pte_hole() callbacks, so let's delegate them to handle
* vma(VM_PFNMAP).
*/
if (vma->vm_flags & VM_PFNMAP) {
int err = 1;
if (walk->pte_hole)
err = walk->pte_hole(start, end, walk);
return err ? err : 1;
}
return 0;
}
static int __walk_page_range(unsigned long start, unsigned long end,
struct mm_walk *walk)
{
int err = 0;
struct vm_area_struct *vma = walk->vma;
if (vma && is_vm_hugetlb_page(vma)) {
if (walk->hugetlb_entry)
err = walk_hugetlb_range(start, end, walk);
} else
err = walk_pgd_range(start, end, walk);
return err;
}
/**
* walk_page_range - walk page table with caller specific callbacks
*
* Recursively walk the page table tree of the process represented by @walk->mm
* within the virtual address range [@start, @end). During walking, we can do
* some caller-specific works for each entry, by setting up pmd_entry(),
* pte_entry(), and/or hugetlb_entry(). If you don't set up for some of these
* callbacks, the associated entries/pages are just ignored.
* The return values of these callbacks are commonly defined like below:
* - 0 : succeeded to handle the current entry, and if you don't reach the
* end address yet, continue to walk.
* - >0 : succeeded to handle the current entry, and return to the caller
* with caller specific value.
* - <0 : failed to handle the current entry, and return to the caller
* with error code.
*
* Before starting to walk page table, some callers want to check whether
* they really want to walk over the current vma, typically by checking
* its vm_flags. walk_page_test() and @walk->test_walk() are used for this
* purpose.
*
* struct mm_walk keeps current values of some common data like vma and pmd,
* which are useful for the access from callbacks. If you want to pass some
* caller-specific data to callbacks, @walk->private should be helpful.
*
* Locking:
* Callers of walk_page_range() and walk_page_vma() should hold
* @walk->mm->mmap_sem, because these function traverse vma list and/or
* access to vma's data.
*/
int walk_page_range(unsigned long start, unsigned long end,
struct mm_walk *walk)
{
int err = 0;
unsigned long next;
struct vm_area_struct *vma;
if (start >= end)
return -EINVAL;
if (!walk->mm)
return -EINVAL;
VM_BUG_ON_MM(!rwsem_is_locked(&walk->mm->mmap_sem), walk->mm);
vma = find_vma(walk->mm, start);
do {
if (!vma) { /* after the last vma */
walk->vma = NULL;
next = end;
} else if (start < vma->vm_start) { /* outside vma */
walk->vma = NULL;
next = min(end, vma->vm_start);
} else { /* inside vma */
walk->vma = vma;
next = min(end, vma->vm_end);
vma = vma->vm_next;
err = walk_page_test(start, next, walk);
if (err > 0) {
/*
* positive return values are purely for
* controlling the pagewalk, so should never
* be passed to the callers.
*/
err = 0;
continue;
}
if (err < 0)
break;
}
if (walk->vma || walk->pte_hole)
err = __walk_page_range(start, next, walk);
if (err)
break;
} while (start = next, start < end);
return err;
}
int walk_page_vma(struct vm_area_struct *vma, struct mm_walk *walk)
{
int err;
if (!walk->mm)
return -EINVAL;
VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem));
VM_BUG_ON(!vma);
walk->vma = vma;
err = walk_page_test(vma->vm_start, vma->vm_end, walk);
if (err > 0)
return 0;
if (err < 0)
return err;
return __walk_page_range(vma->vm_start, vma->vm_end, walk);
}