1
0
Files
kernel-49/mm/mremap.c
Greg Kroah-Hartman f95d85c34b Merge 4.9.311 into android-4.9-q
Changes in 4.9.311
	USB: serial: pl2303: add IBM device IDs
	USB: serial: simple: add Nokia phone driver
	netdevice: add the case if dev is NULL
	virtio_console: break out of buf poll on remove
	ethernet: sun: Free the coherent when failing in probing
	af_key: add __GFP_ZERO flag for compose_sadb_supported in function pfkey_register
	block: Add a helper to validate the block size
	virtio-blk: Use blk_validate_block_size() to validate block size
	USB: usb-storage: Fix use of bitfields for hardware data in ene_ub6250.c
	coresight: Fix TRCCONFIGR.QE sysfs interface
	iio: inkern: apply consumer scale on IIO_VAL_INT cases
	iio: inkern: make a best effort on offset calculation
	clk: uniphier: Fix fixed-rate initialization
	ptrace: Check PTRACE_O_SUSPEND_SECCOMP permission on PTRACE_SEIZE
	SUNRPC: avoid race between mod_timer() and del_timer_sync()
	NFSD: prevent underflow in nfssvc_decode_writeargs()
	can: ems_usb: ems_usb_start_xmit(): fix double dev_kfree_skb() in error path
	jffs2: fix use-after-free in jffs2_clear_xattr_subsystem
	jffs2: fix memory leak in jffs2_do_mount_fs
	jffs2: fix memory leak in jffs2_scan_medium
	mm/pages_alloc.c: don't create ZONE_MOVABLE beyond the end of a node
	mempolicy: mbind_range() set_policy() after vma_merge()
	scsi: libsas: Fix sas_ata_qc_issue() handling of NCQ NON DATA commands
	Revert "Input: clear BTN_RIGHT/MIDDLE on buttonpads"
	ALSA: cs4236: fix an incorrect NULL check on list iterator
	drivers: hamradio: 6pack: fix UAF bug caused by mod_timer()
	video: fbdev: sm712fb: Fix crash in smtcfb_read()
	video: fbdev: atari: Atari 2 bpp (STe) palette bugfix
	ARM: dts: exynos: fix UART3 pins configuration in Exynos5250
	ARM: dts: exynos: add missing HDMI supplies on SMDK5250
	ARM: dts: exynos: add missing HDMI supplies on SMDK5420
	carl9170: fix missing bit-wise or operator for tx_params
	thermal: int340x: Increase bitmap size
	lib/raid6/test: fix multiple definition linking error
	DEC: Limit PMAX memory probing to R3k systems
	media: davinci: vpif: fix unbalanced runtime PM get
	brcmfmac: firmware: Allocate space for default boardrev in nvram
	brcmfmac: pcie: Replace brcmf_pcie_copy_mem_todev with memcpy_toio
	PCI: pciehp: Clear cmd_busy bit in polling mode
	crypto: authenc - Fix sleep in atomic context in decrypt_tail
	crypto: mxs-dcp - Fix scatterlist processing
	spi: tegra114: Add missing IRQ check in tegra_spi_probe
	selftests/x86: Add validity check and allow field splitting
	hwmon: (pmbus) Add mutex to regulator ops
	hwmon: (sch56xx-common) Replace WDOG_ACTIVE with WDOG_HW_RUNNING
	PM: hibernate: fix __setup handler error handling
	PM: suspend: fix return value of __setup handler
	crypto: vmx - add missing dependencies
	crypto: ccp - ccp_dmaengine_unregister release dma channels
	hwmon: (pmbus) Add Vin unit off handling
	clocksource: acpi_pm: fix return value of __setup handler
	sched/debug: Remove mpol_get/put and task_lock/unlock from sched_show_numa
	perf/core: Fix address filter parser for multiple filters
	perf/x86/intel/pt: Fix address filter config for 32-bit kernel
	video: fbdev: smscufx: Fix null-ptr-deref in ufx_usb_probe()
	video: fbdev: fbcvt.c: fix printing in fb_cvt_print_name()
	ARM: dts: qcom: ipq4019: fix sleep clock
	soc: ti: wkup_m3_ipc: Fix IRQ check in wkup_m3_ipc_probe
	media: usb: go7007: s2250-board: fix leak in probe()
	ASoC: ti: davinci-i2s: Add check for clk_enable()
	ALSA: spi: Add check for clk_enable()
	arm64: dts: ns2: Fix spi-cpol and spi-cpha property
	arm64: dts: broadcom: Fix sata nodename
	printk: fix return value of printk.devkmsg __setup handler
	ASoC: mxs-saif: Handle errors for clk_enable
	ASoC: atmel_ssc_dai: Handle errors for clk_enable
	memory: emif: Add check for setup_interrupts
	memory: emif: check the pointer temp in get_device_details()
	ALSA: firewire-lib: fix uninitialized flag for AV/C deferred transaction
	ASoC: atmel: Add missing of_node_put() in at91sam9g20ek_audio_probe
	ASoC: wm8350: Handle error for wm8350_register_irq
	ASoC: fsi: Add check for clk_enable
	video: fbdev: omapfb: Add missing of_node_put() in dvic_probe_of
	ASoC: dmaengine: do not use a NULL prepare_slave_config() callback
	ASoC: mxs: Fix error handling in mxs_sgtl5000_probe
	ASoC: imx-es8328: Fix error return code in imx_es8328_probe()
	mtd: onenand: Check for error irq
	drm/edid: Don't clear formats if using deep color
	ath9k_htc: fix uninit value bugs
	ray_cs: Check ioremap return value
	power: supply: ab8500: Fix memory leak in ab8500_fg_sysfs_init
	HID: i2c-hid: fix GET/SET_REPORT for unnumbered reports
	iwlwifi: Fix -EIO error code that is never returned
	scsi: pm8001: Fix command initialization in pm80XX_send_read_log()
	scsi: pm8001: Fix command initialization in pm8001_chip_ssp_tm_req()
	scsi: pm8001: Fix payload initialization in pm80xx_set_thermal_config()
	scsi: pm8001: Fix abort all task initialization
	TOMOYO: fix __setup handlers return values
	ext2: correct max file size computing
	drm/tegra: Fix reference leak in tegra_dsi_ganged_probe
	KVM: x86: Fix emulation in writing cr8
	KVM: x86/emulator: Defer not-present segment check in __load_segment_descriptor()
	i2c: xiic: Make bus names unique
	power: supply: wm8350-power: Handle error for wm8350_register_irq
	power: supply: wm8350-power: Add missing free in free_charger_irq
	powerpc/sysdev: fix incorrect use to determine if list is empty
	mfd: mc13xxx: Add check for mc13xxx_irq_request
	MIPS: RB532: fix return value of __setup handler
	USB: storage: ums-realtek: fix error code in rts51x_read_mem()
	af_netlink: Fix shift out of bounds in group mask calculation
	i2c: mux: demux-pinctrl: do not deactivate a master that is not active
	mfd: asic3: Add missing iounmap() on error asic3_mfd_probe
	mxser: fix xmit_buf leak in activate when LSR == 0xff
	pwm: lpc18xx-sct: Initialize driver data and hardware before pwmchip_add()
	iio: adc: Add check for devm_request_threaded_irq
	clk: qcom: clk-rcg2: Update the frac table for pixel clock
	remoteproc: qcom_wcnss: Add missing of_node_put() in wcnss_alloc_memory_region
	clk: loongson1: Terminate clk_div_table with sentinel element
	clk: clps711x: Terminate clk_div_table with sentinel element
	clk: tegra: tegra124-emc: Fix missing put_device() call in emc_ensure_emc_driver
	NFS: remove unneeded check in decode_devicenotify_args()
	pinctrl: mediatek: Fix missing of_node_put() in mtk_pctrl_init
	pinctrl: nomadik: Add missing of_node_put() in nmk_pinctrl_probe
	pinctrl/rockchip: Add missing of_node_put() in rockchip_pinctrl_probe
	tty: hvc: fix return value of __setup handler
	kgdboc: fix return value of __setup handler
	kgdbts: fix return value of __setup handler
	jfs: fix divide error in dbNextAG
	netfilter: nf_conntrack_tcp: preserve liberal flag in tcp options
	net: phy: broadcom: Fix brcm_fet_config_init()
	qlcnic: dcb: default to returning -EOPNOTSUPP
	net/x25: Fix null-ptr-deref caused by x25_disconnect
	selinux: use correct type for context length
	loop: use sysfs_emit() in the sysfs xxx show()
	Fix incorrect type in assignment of ipv6 port for audit
	irqchip/nvic: Release nvic_base upon failure
	ACPICA: Avoid walking the ACPI Namespace if it is not there
	ACPI/APEI: Limit printable size of BERT table data
	PM: core: keep irq flags in device_pm_check_callbacks()
	spi: tegra20: Use of_device_get_match_data()
	ext4: don't BUG if someone dirty pages without asking ext4 first
	ntfs: add sanity check on allocation size
	video: fbdev: nvidiafb: Use strscpy() to prevent buffer overflow
	video: fbdev: w100fb: Reset global state
	video: fbdev: cirrusfb: check pixclock to avoid divide by zero
	video: fbdev: omapfb: acx565akm: replace snprintf with sysfs_emit
	ARM: dts: qcom: fix gic_irq_domain_translate warnings for msm8960
	ARM: dts: bcm2837: Add the missing L1/L2 cache information
	video: fbdev: omapfb: panel-dsi-cm: Use sysfs_emit() instead of snprintf()
	video: fbdev: omapfb: panel-tpo-td043mtea1: Use sysfs_emit() instead of snprintf()
	ASoC: soc-core: skip zero num_dai component in searching dai name
	media: cx88-mpeg: clear interrupt status register before streaming video
	ARM: tegra: tamonten: Fix I2C3 pad setting
	ARM: mmp: Fix failure to remove sram device
	video: fbdev: sm712fb: Fix crash in smtcfb_write()
	media: hdpvr: initialize dev->worker at hdpvr_register_videodev
	mmc: host: Return an error when ->enable_sdio_irq() ops is missing
	scsi: qla2xxx: Fix incorrect reporting of task management failure
	KVM: Prevent module exit until all VMs are freed
	ubifs: Add missing iput if do_tmpfile() failed in rename whiteout
	ubifs: setflags: Make dirtied_ino_d 8 bytes aligned
	gfs2: Make sure FITRIM minlen is rounded up to fs block size
	pinctrl: pinconf-generic: Print arguments for bias-pull-*
	ACPI: CPPC: Avoid out of bounds access when parsing _CPC data
	mm/mmap: return 1 from stack_guard_gap __setup() handler
	mm/memcontrol: return 1 from cgroup.memory __setup() handler
	ubi: fastmap: Return error code if memory allocation fails in add_aeb()
	ASoC: topology: Allow TLV control to be either read or write
	ARM: dts: spear1340: Update serial node properties
	ARM: dts: spear13xx: Update SPI dma properties
	openvswitch: Fixed nd target mask field in the flow dump.
	KVM: x86: Forbid VMM to set SYNIC/STIMER MSRs when SynIC wasn't activated
	rtc: wm8350: Handle error for wm8350_register_irq
	ARM: 9187/1: JIVE: fix return value of __setup handler
	KVM: x86/svm: Clear reserved bits written to PerfEvtSeln MSRs
	ath5k: fix OOB in ath5k_eeprom_read_pcal_info_5111
	ptp: replace snprintf with sysfs_emit
	powerpc: dts: t104xrdb: fix phy type for FMAN 4/5
	scsi: mvsas: Replace snprintf() with sysfs_emit()
	scsi: bfa: Replace snprintf() with sysfs_emit()
	iommu/arm-smmu-v3: fix event handling soft lockup
	dm ioctl: prevent potential spectre v1 gadget
	scsi: pm8001: Fix pm8001_mpi_task_abort_resp()
	scsi: aha152x: Fix aha152x_setup() __setup handler return value
	bnxt_en: Eliminate unintended link toggle during FW reset
	MIPS: fix fortify panic when copying asm exception handlers
	scsi: libfc: Fix use after free in fc_exch_abts_resp()
	usb: dwc3: omap: fix "unbalanced disables for smps10_out1" on omap5evm
	xtensa: fix DTC warning unit_address_format
	Bluetooth: Fix use after free in hci_send_acl
	init/main.c: return 1 from handled __setup() functions
	w1: w1_therm: fixes w1_seq for ds28ea00 sensors
	SUNRPC/call_alloc: async tasks mustn't block waiting for memory
	serial: samsung_tty: do not unlock port->lock for uart_write_wakeup()
	virtio_console: eliminate anonymous module_init & module_exit
	jfs: prevent NULL deref in diFree
	mm: fix race between MADV_FREE reclaim and blkdev direct IO read
	scsi: zorro7xx: Fix a resource leak in zorro7xx_remove_one()
	net: stmmac: Fix unset max_speed difference between DT and non-DT platforms
	drm/imx: Fix memory leak in imx_pd_connector_get_modes
	drbd: Fix five use after free bugs in get_initial_state
	mmmremap.c: avoid pointless invalidate_range_start/end on mremap(old_size=0)
	mm/mempolicy: fix mpol_new leak in shared_policy_replace
	x86/pm: Save the MSR validity status at context setup
	x86/speculation: Restore speculation related MSRs during S3 resume
	arm64: patch_text: Fixup last cpu should be master
	tools build: Use $(shell ) instead of `` to get embedded libperl's ccopts
	dmaengine: Revert "dmaengine: shdma: Fix runtime PM imbalance on error"
	mm: don't skip swap entry even if zap_details specified
	arm64: module: remove (NOLOAD) from linker script
	xfrm: policy: match with both mark and mask on user interfaces
	veth: Ensure eth header is in skb's linear part
	net: ethernet: stmmac: fix altr_tse_pcs function when using a fixed-link
	nfc: nci: add flush_workqueue to prevent uaf
	cifs: potential buffer overflow in handling symlinks
	drm/amdkfd: Check for potential null return of kmalloc_array()
	scsi: ibmvscsis: Increase INITIAL_SRP_LIMIT to 1024
	net: micrel: fix KS8851_MLL Kconfig
	gpu: ipu-v3: Fix dev_dbg frequency output
	scsi: mvsas: Add PCI ID of RocketRaid 2640
	drivers: net: slip: fix NPD bug in sl_tx_timeout()
	mm, page_alloc: fix build_zonerefs_node()
	mm: kmemleak: take a full lowmem check in kmemleak_*_phys()
	ALSA: pcm: Test for "silence" field in struct "pcm_format_data"
	ARM: davinci: da850-evm: Avoid NULL pointer dereference
	smp: Fix offline cpu check in flush_smp_call_function_queue()
	i2c: pasemi: Wait for write xfers to finish
	gcc-plugins: latent_entropy: use /dev/urandom
	Linux 4.9.311

Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Change-Id: Ia8f55c5ae2f0eb71b0893d8271a10dfd3c78b3b8
2022-04-25 16:25:23 +03:00

607 lines
16 KiB
C

/*
* mm/mremap.c
*
* (C) Copyright 1996 Linus Torvalds
*
* Address space accounting code <alan@lxorguk.ukuu.org.uk>
* (C) Copyright 2002 Red Hat Inc, All Rights Reserved
*/
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/shm.h>
#include <linux/ksm.h>
#include <linux/mman.h>
#include <linux/swap.h>
#include <linux/capability.h>
#include <linux/fs.h>
#include <linux/swapops.h>
#include <linux/highmem.h>
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/mmu_notifier.h>
#include <linux/uaccess.h>
#include <linux/mm-arch-hooks.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
#include "internal.h"
static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pgd = pgd_offset(mm, addr);
if (pgd_none_or_clear_bad(pgd))
return NULL;
pud = pud_offset(pgd, addr);
if (pud_none_or_clear_bad(pud))
return NULL;
pmd = pmd_offset(pud, addr);
if (pmd_none(*pmd))
return NULL;
return pmd;
}
static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pgd = pgd_offset(mm, addr);
pud = pud_alloc(mm, pgd, addr);
if (!pud)
return NULL;
pmd = pmd_alloc(mm, pud, addr);
if (!pmd)
return NULL;
VM_BUG_ON(pmd_trans_huge(*pmd));
return pmd;
}
static void take_rmap_locks(struct vm_area_struct *vma)
{
if (vma->vm_file)
i_mmap_lock_write(vma->vm_file->f_mapping);
if (vma->anon_vma)
anon_vma_lock_write(vma->anon_vma);
}
static void drop_rmap_locks(struct vm_area_struct *vma)
{
if (vma->anon_vma)
anon_vma_unlock_write(vma->anon_vma);
if (vma->vm_file)
i_mmap_unlock_write(vma->vm_file->f_mapping);
}
static pte_t move_soft_dirty_pte(pte_t pte)
{
/*
* Set soft dirty bit so we can notice
* in userspace the ptes were moved.
*/
#ifdef CONFIG_MEM_SOFT_DIRTY
if (pte_present(pte))
pte = pte_mksoft_dirty(pte);
else if (is_swap_pte(pte))
pte = pte_swp_mksoft_dirty(pte);
#endif
return pte;
}
static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
unsigned long old_addr, unsigned long old_end,
struct vm_area_struct *new_vma, pmd_t *new_pmd,
unsigned long new_addr, bool need_rmap_locks)
{
struct mm_struct *mm = vma->vm_mm;
pte_t *old_pte, *new_pte, pte;
spinlock_t *old_ptl, *new_ptl;
bool force_flush = false;
unsigned long len = old_end - old_addr;
/*
* When need_rmap_locks is true, we take the i_mmap_rwsem and anon_vma
* locks to ensure that rmap will always observe either the old or the
* new ptes. This is the easiest way to avoid races with
* truncate_pagecache(), page migration, etc...
*
* When need_rmap_locks is false, we use other ways to avoid
* such races:
*
* - During exec() shift_arg_pages(), we use a specially tagged vma
* which rmap call sites look for using is_vma_temporary_stack().
*
* - During mremap(), new_vma is often known to be placed after vma
* in rmap traversal order. This ensures rmap will always observe
* either the old pte, or the new pte, or both (the page table locks
* serialize access to individual ptes, but only rmap traversal
* order guarantees that we won't miss both the old and new ptes).
*/
if (need_rmap_locks)
take_rmap_locks(vma);
/*
* We don't have to worry about the ordering of src and dst
* pte locks because exclusive mmap_sem prevents deadlock.
*/
old_pte = pte_offset_map_lock(mm, old_pmd, old_addr, &old_ptl);
new_pte = pte_offset_map(new_pmd, new_addr);
new_ptl = pte_lockptr(mm, new_pmd);
if (new_ptl != old_ptl)
spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
flush_tlb_batched_pending(vma->vm_mm);
arch_enter_lazy_mmu_mode();
for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE,
new_pte++, new_addr += PAGE_SIZE) {
if (pte_none(*old_pte))
continue;
pte = ptep_get_and_clear(mm, old_addr, old_pte);
/*
* If we are remapping a valid PTE, make sure
* to flush TLB before we drop the PTL for the
* PTE.
*
* NOTE! Both old and new PTL matter: the old one
* for racing with page_mkclean(), the new one to
* make sure the physical page stays valid until
* the TLB entry for the old mapping has been
* flushed.
*/
if (pte_present(pte))
force_flush = true;
pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
pte = move_soft_dirty_pte(pte);
set_pte_at(mm, new_addr, new_pte, pte);
}
arch_leave_lazy_mmu_mode();
if (force_flush)
flush_tlb_range(vma, old_end - len, old_end);
if (new_ptl != old_ptl)
spin_unlock(new_ptl);
pte_unmap(new_pte - 1);
pte_unmap_unlock(old_pte - 1, old_ptl);
if (need_rmap_locks)
drop_rmap_locks(vma);
}
#define LATENCY_LIMIT (64 * PAGE_SIZE)
unsigned long move_page_tables(struct vm_area_struct *vma,
unsigned long old_addr, struct vm_area_struct *new_vma,
unsigned long new_addr, unsigned long len,
bool need_rmap_locks)
{
unsigned long extent, next, old_end;
pmd_t *old_pmd, *new_pmd;
unsigned long mmun_start; /* For mmu_notifiers */
unsigned long mmun_end; /* For mmu_notifiers */
if (!len)
return 0;
old_end = old_addr + len;
flush_cache_range(vma, old_addr, old_end);
mmun_start = old_addr;
mmun_end = old_end;
mmu_notifier_invalidate_range_start(vma->vm_mm, mmun_start, mmun_end);
for (; old_addr < old_end; old_addr += extent, new_addr += extent) {
cond_resched();
next = (old_addr + PMD_SIZE) & PMD_MASK;
/* even if next overflowed, extent below will be ok */
extent = next - old_addr;
if (extent > old_end - old_addr)
extent = old_end - old_addr;
old_pmd = get_old_pmd(vma->vm_mm, old_addr);
if (!old_pmd)
continue;
new_pmd = alloc_new_pmd(vma->vm_mm, vma, new_addr);
if (!new_pmd)
break;
if (pmd_trans_huge(*old_pmd) || pmd_devmap(*old_pmd)) {
if (extent == HPAGE_PMD_SIZE) {
bool moved;
/* See comment in move_ptes() */
if (need_rmap_locks)
take_rmap_locks(vma);
moved = move_huge_pmd(vma, old_addr, new_addr,
old_end, old_pmd, new_pmd);
if (need_rmap_locks)
drop_rmap_locks(vma);
if (moved)
continue;
}
split_huge_pmd(vma, old_pmd, old_addr);
if (pmd_trans_unstable(old_pmd))
continue;
}
if (pte_alloc(new_vma->vm_mm, new_pmd, new_addr))
break;
next = (new_addr + PMD_SIZE) & PMD_MASK;
if (extent > next - new_addr)
extent = next - new_addr;
if (extent > LATENCY_LIMIT)
extent = LATENCY_LIMIT;
move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma,
new_pmd, new_addr, need_rmap_locks);
}
mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
return len + old_addr - old_end; /* how much done */
}
static unsigned long move_vma(struct vm_area_struct *vma,
unsigned long old_addr, unsigned long old_len,
unsigned long new_len, unsigned long new_addr, bool *locked)
{
struct mm_struct *mm = vma->vm_mm;
struct vm_area_struct *new_vma;
unsigned long vm_flags = vma->vm_flags;
unsigned long new_pgoff;
unsigned long moved_len;
unsigned long excess = 0;
unsigned long hiwater_vm;
int split = 0;
int err;
bool need_rmap_locks;
/*
* We'd prefer to avoid failure later on in do_munmap:
* which may split one vma into three before unmapping.
*/
if (mm->map_count >= sysctl_max_map_count - 3)
return -ENOMEM;
/*
* Advise KSM to break any KSM pages in the area to be moved:
* it would be confusing if they were to turn up at the new
* location, where they happen to coincide with different KSM
* pages recently unmapped. But leave vma->vm_flags as it was,
* so KSM can come around to merge on vma and new_vma afterwards.
*/
err = ksm_madvise(vma, old_addr, old_addr + old_len,
MADV_UNMERGEABLE, &vm_flags);
if (err)
return err;
new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff,
&need_rmap_locks);
if (!new_vma)
return -ENOMEM;
moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len,
need_rmap_locks);
if (moved_len < old_len) {
err = -ENOMEM;
} else if (vma->vm_ops && vma->vm_ops->mremap) {
err = vma->vm_ops->mremap(new_vma);
}
if (unlikely(err)) {
/*
* On error, move entries back from new area to old,
* which will succeed since page tables still there,
* and then proceed to unmap new area instead of old.
*/
move_page_tables(new_vma, new_addr, vma, old_addr, moved_len,
true);
vma = new_vma;
old_len = new_len;
old_addr = new_addr;
new_addr = err;
} else {
arch_remap(mm, old_addr, old_addr + old_len,
new_addr, new_addr + new_len);
}
/* Conceal VM_ACCOUNT so old reservation is not undone */
if (vm_flags & VM_ACCOUNT) {
vma->vm_flags &= ~VM_ACCOUNT;
excess = vma->vm_end - vma->vm_start - old_len;
if (old_addr > vma->vm_start &&
old_addr + old_len < vma->vm_end)
split = 1;
}
/*
* If we failed to move page tables we still do total_vm increment
* since do_munmap() will decrement it by old_len == new_len.
*
* Since total_vm is about to be raised artificially high for a
* moment, we need to restore high watermark afterwards: if stats
* are taken meanwhile, total_vm and hiwater_vm appear too high.
* If this were a serious issue, we'd add a flag to do_munmap().
*/
hiwater_vm = mm->hiwater_vm;
vm_stat_account(mm, vma->vm_flags, new_len >> PAGE_SHIFT);
/* Tell pfnmap has moved from this vma */
if (unlikely(vma->vm_flags & VM_PFNMAP))
untrack_pfn_moved(vma);
if (do_munmap(mm, old_addr, old_len) < 0) {
/* OOM: unable to split vma, just get accounts right */
vm_unacct_memory(excess >> PAGE_SHIFT);
excess = 0;
}
mm->hiwater_vm = hiwater_vm;
/* Restore VM_ACCOUNT if one or two pieces of vma left */
if (excess) {
vma->vm_flags |= VM_ACCOUNT;
if (split)
vma->vm_next->vm_flags |= VM_ACCOUNT;
}
if (vm_flags & VM_LOCKED) {
mm->locked_vm += new_len >> PAGE_SHIFT;
*locked = true;
}
return new_addr;
}
static struct vm_area_struct *vma_to_resize(unsigned long addr,
unsigned long old_len, unsigned long new_len, unsigned long *p)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma = find_vma(mm, addr);
unsigned long pgoff;
if (!vma || vma->vm_start > addr)
return ERR_PTR(-EFAULT);
if (is_vm_hugetlb_page(vma))
return ERR_PTR(-EINVAL);
/* We can't remap across vm area boundaries */
if (old_len > vma->vm_end - addr)
return ERR_PTR(-EFAULT);
if (new_len == old_len)
return vma;
/* Need to be careful about a growing mapping */
pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
pgoff += vma->vm_pgoff;
if (pgoff + (new_len >> PAGE_SHIFT) < pgoff)
return ERR_PTR(-EINVAL);
if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP))
return ERR_PTR(-EFAULT);
if (vma->vm_flags & VM_LOCKED) {
unsigned long locked, lock_limit;
locked = mm->locked_vm << PAGE_SHIFT;
lock_limit = rlimit(RLIMIT_MEMLOCK);
locked += new_len - old_len;
if (locked > lock_limit && !capable(CAP_IPC_LOCK))
return ERR_PTR(-EAGAIN);
}
if (!may_expand_vm(mm, vma->vm_flags,
(new_len - old_len) >> PAGE_SHIFT))
return ERR_PTR(-ENOMEM);
if (vma->vm_flags & VM_ACCOUNT) {
unsigned long charged = (new_len - old_len) >> PAGE_SHIFT;
if (security_vm_enough_memory_mm(mm, charged))
return ERR_PTR(-ENOMEM);
*p = charged;
}
return vma;
}
static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
unsigned long new_addr, unsigned long new_len, bool *locked)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
unsigned long ret = -EINVAL;
unsigned long charged = 0;
unsigned long map_flags;
if (offset_in_page(new_addr))
goto out;
if (new_len > TASK_SIZE || new_addr > TASK_SIZE - new_len)
goto out;
/* Ensure the old/new locations do not overlap */
if (addr + old_len > new_addr && new_addr + new_len > addr)
goto out;
ret = do_munmap(mm, new_addr, new_len);
if (ret)
goto out;
if (old_len >= new_len) {
ret = do_munmap(mm, addr+new_len, old_len - new_len);
if (ret && old_len != new_len)
goto out;
old_len = new_len;
}
vma = vma_to_resize(addr, old_len, new_len, &charged);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto out;
}
map_flags = MAP_FIXED;
if (vma->vm_flags & VM_MAYSHARE)
map_flags |= MAP_SHARED;
ret = get_unmapped_area(vma->vm_file, new_addr, new_len, vma->vm_pgoff +
((addr - vma->vm_start) >> PAGE_SHIFT),
map_flags);
if (offset_in_page(ret))
goto out1;
ret = move_vma(vma, addr, old_len, new_len, new_addr, locked);
if (!(offset_in_page(ret)))
goto out;
out1:
vm_unacct_memory(charged);
out:
return ret;
}
static int vma_expandable(struct vm_area_struct *vma, unsigned long delta)
{
unsigned long end = vma->vm_end + delta;
if (end < vma->vm_end) /* overflow */
return 0;
if (vma->vm_next && vma->vm_next->vm_start < end) /* intersection */
return 0;
if (get_unmapped_area(NULL, vma->vm_start, end - vma->vm_start,
0, MAP_FIXED) & ~PAGE_MASK)
return 0;
return 1;
}
/*
* Expand (or shrink) an existing mapping, potentially moving it at the
* same time (controlled by the MREMAP_MAYMOVE flag and available VM space)
*
* MREMAP_FIXED option added 5-Dec-1999 by Benjamin LaHaise
* This option implies MREMAP_MAYMOVE.
*/
SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
unsigned long, new_len, unsigned long, flags,
unsigned long, new_addr)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
unsigned long ret = -EINVAL;
unsigned long charged = 0;
bool locked = false;
if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE))
return ret;
if (flags & MREMAP_FIXED && !(flags & MREMAP_MAYMOVE))
return ret;
if (offset_in_page(addr))
return ret;
old_len = PAGE_ALIGN(old_len);
new_len = PAGE_ALIGN(new_len);
/*
* We allow a zero old-len as a special case
* for DOS-emu "duplicate shm area" thing. But
* a zero new-len is nonsensical.
*/
if (!new_len)
return ret;
if (down_write_killable(&current->mm->mmap_sem))
return -EINTR;
if (flags & MREMAP_FIXED) {
ret = mremap_to(addr, old_len, new_addr, new_len,
&locked);
goto out;
}
/*
* Always allow a shrinking remap: that just unmaps
* the unnecessary pages..
* do_munmap does all the needed commit accounting
*/
if (old_len >= new_len) {
ret = do_munmap(mm, addr+new_len, old_len - new_len);
if (ret && old_len != new_len)
goto out;
ret = addr;
goto out;
}
/*
* Ok, we need to grow..
*/
vma = vma_to_resize(addr, old_len, new_len, &charged);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto out;
}
/* old_len exactly to the end of the area..
*/
if (old_len == vma->vm_end - addr) {
/* can we just expand the current mapping? */
if (vma_expandable(vma, new_len - old_len)) {
int pages = (new_len - old_len) >> PAGE_SHIFT;
if (vma_adjust(vma, vma->vm_start, addr + new_len,
vma->vm_pgoff, NULL)) {
ret = -ENOMEM;
goto out;
}
vm_stat_account(mm, vma->vm_flags, pages);
if (vma->vm_flags & VM_LOCKED) {
mm->locked_vm += pages;
locked = true;
new_addr = addr;
}
ret = addr;
goto out;
}
}
/*
* We weren't able to just expand or shrink the area,
* we need to create a new one and move it..
*/
ret = -ENOMEM;
if (flags & MREMAP_MAYMOVE) {
unsigned long map_flags = 0;
if (vma->vm_flags & VM_MAYSHARE)
map_flags |= MAP_SHARED;
new_addr = get_unmapped_area(vma->vm_file, 0, new_len,
vma->vm_pgoff +
((addr - vma->vm_start) >> PAGE_SHIFT),
map_flags);
if (offset_in_page(new_addr)) {
ret = new_addr;
goto out;
}
ret = move_vma(vma, addr, old_len, new_len, new_addr, &locked);
}
out:
if (offset_in_page(ret)) {
vm_unacct_memory(charged);
locked = 0;
}
up_write(&current->mm->mmap_sem);
if (locked && new_len > old_len)
mm_populate(new_addr + old_len, new_len - old_len);
return ret;
}