1
0
Files

3373 lines
87 KiB
C
Raw Permalink Normal View History

/*
* hosting zSeries kernel virtual machines
*
* Copyright IBM Corp. 2008, 2009
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
* as published by the Free Software Foundation.
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
* Heiko Carstens <heiko.carstens@de.ibm.com>
* Christian Ehrhardt <ehrhardt@de.ibm.com>
* Jason J. Herne <jjherne@us.ibm.com>
*/
#include <linux/compiler.h>
#include <linux/err.h>
#include <linux/fs.h>
#include <linux/hrtimer.h>
#include <linux/init.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <linux/mman.h>
#include <linux/module.h>
#include <linux/random.h>
#include <linux/slab.h>
#include <linux/timer.h>
#include <linux/vmalloc.h>
#include <linux/bitmap.h>
#include <asm/asm-offsets.h>
#include <asm/lowcore.h>
#include <asm/stp.h>
#include <asm/pgtable.h>
#include <asm/gmap.h>
#include <asm/nmi.h>
#include <asm/switch_to.h>
#include <asm/isc.h>
#include <asm/sclp.h>
#include <asm/cpacf.h>
#include <asm/timex.h>
#include "kvm-s390.h"
#include "gaccess.h"
#define KMSG_COMPONENT "kvm-s390"
#undef pr_fmt
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#define CREATE_TRACE_POINTS
#include "trace.h"
#include "trace-s390.h"
#define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
#define LOCAL_IRQS 32
#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
(KVM_MAX_VCPUS + LOCAL_IRQS))
#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "userspace_handled", VCPU_STAT(exit_userspace) },
{ "exit_null", VCPU_STAT(exit_null) },
{ "exit_validity", VCPU_STAT(exit_validity) },
{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
{ "exit_external_request", VCPU_STAT(exit_external_request) },
{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
{ "exit_instruction", VCPU_STAT(exit_instruction) },
{ "exit_pei", VCPU_STAT(exit_pei) },
{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
{ "instruction_spx", VCPU_STAT(instruction_spx) },
{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
{ "instruction_stap", VCPU_STAT(instruction_stap) },
{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
{ "instruction_essa", VCPU_STAT(instruction_essa) },
{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
{ "instruction_sie", VCPU_STAT(instruction_sie) },
{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
{ "diagnose_10", VCPU_STAT(diagnose_10) },
{ "diagnose_44", VCPU_STAT(diagnose_44) },
{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
{ "diagnose_258", VCPU_STAT(diagnose_258) },
{ "diagnose_308", VCPU_STAT(diagnose_308) },
{ "diagnose_500", VCPU_STAT(diagnose_500) },
{ NULL }
};
/* allow nested virtualization in KVM (if enabled by user space) */
static int nested;
module_param(nested, int, S_IRUGO);
MODULE_PARM_DESC(nested, "Nested virtualization support");
/* upper facilities limit for kvm */
unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
unsigned long kvm_s390_fac_list_mask_size(void)
{
BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
return ARRAY_SIZE(kvm_s390_fac_list_mask);
}
/* available cpu features supported by kvm */
static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
/* available subfunctions indicated via query / "test bit" */
static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
static struct gmap_notifier gmap_notifier;
static struct gmap_notifier vsie_gmap_notifier;
debug_info_t *kvm_s390_dbf;
/* Section: not file related */
int kvm_arch_hardware_enable(void)
{
/* every s390 is virtualization enabled ;-) */
return 0;
}
static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
unsigned long end);
/*
* This callback is executed during stop_machine(). All CPUs are therefore
* temporarily stopped. In order not to change guest behavior, we have to
* disable preemption whenever we touch the epoch of kvm and the VCPUs,
* so a CPU won't be stopped while calculating with the epoch.
*/
static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
void *v)
{
struct kvm *kvm;
struct kvm_vcpu *vcpu;
int i;
unsigned long long *delta = v;
list_for_each_entry(kvm, &vm_list, vm_list) {
kvm->arch.epoch -= *delta;
kvm_for_each_vcpu(i, vcpu, kvm) {
vcpu->arch.sie_block->epoch -= *delta;
if (vcpu->arch.cputm_enabled)
vcpu->arch.cputm_start += *delta;
if (vcpu->arch.vsie_block)
vcpu->arch.vsie_block->epoch -= *delta;
}
}
return NOTIFY_OK;
}
static struct notifier_block kvm_clock_notifier = {
.notifier_call = kvm_clock_sync,
};
int kvm_arch_hardware_setup(void)
{
gmap_notifier.notifier_call = kvm_gmap_notifier;
gmap_register_pte_notifier(&gmap_notifier);
vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
gmap_register_pte_notifier(&vsie_gmap_notifier);
atomic_notifier_chain_register(&s390_epoch_delta_notifier,
&kvm_clock_notifier);
return 0;
}
void kvm_arch_hardware_unsetup(void)
{
gmap_unregister_pte_notifier(&gmap_notifier);
gmap_unregister_pte_notifier(&vsie_gmap_notifier);
atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
&kvm_clock_notifier);
}
static void allow_cpu_feat(unsigned long nr)
{
set_bit_inv(nr, kvm_s390_available_cpu_feat);
}
static inline int plo_test_bit(unsigned char nr)
{
register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
int cc = 3; /* subfunction not available */
asm volatile(
/* Parameter registers are ignored for "test bit" */
" plo 0,0,0,0(0)\n"
" ipm %0\n"
" srl %0,28\n"
: "=d" (cc)
: "d" (r0)
: "cc");
return cc == 0;
}
static void kvm_s390_cpu_feat_init(void)
{
int i;
for (i = 0; i < 256; ++i) {
if (plo_test_bit(i))
kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
}
if (test_facility(28)) /* TOD-clock steering */
ptff(kvm_s390_available_subfunc.ptff,
sizeof(kvm_s390_available_subfunc.ptff),
PTFF_QAF);
if (test_facility(17)) { /* MSA */
__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
kvm_s390_available_subfunc.kmac);
__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
kvm_s390_available_subfunc.kmc);
__cpacf_query(CPACF_KM, (cpacf_mask_t *)
kvm_s390_available_subfunc.km);
__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
kvm_s390_available_subfunc.kimd);
__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
kvm_s390_available_subfunc.klmd);
}
if (test_facility(76)) /* MSA3 */
__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
kvm_s390_available_subfunc.pckmo);
if (test_facility(77)) { /* MSA4 */
__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
kvm_s390_available_subfunc.kmctr);
__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
kvm_s390_available_subfunc.kmf);
__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
kvm_s390_available_subfunc.kmo);
__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
kvm_s390_available_subfunc.pcc);
}
if (test_facility(57)) /* MSA5 */
__cpacf_query(CPACF_PPNO, (cpacf_mask_t *)
kvm_s390_available_subfunc.ppno);
if (MACHINE_HAS_ESOP)
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
/*
* We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
* 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
*/
if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
!test_facility(3) || !nested)
return;
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
if (sclp.has_64bscao)
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
if (sclp.has_siif)
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
if (sclp.has_gpere)
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
if (sclp.has_gsls)
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
if (sclp.has_ib)
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
if (sclp.has_cei)
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
if (sclp.has_ibs)
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
/*
* KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
* all skey handling functions read/set the skey from the PGSTE
* instead of the real storage key.
*
* KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
* pages being detected as preserved although they are resident.
*
* KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
* have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
*
* For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
* KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
* correctly shadowed. We can do that for the PGSTE but not for PTE.I.
*
* KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
* cannot easily shadow the SCA because of the ipte lock.
*/
}
int kvm_arch_init(void *opaque)
{
Merge 4.9.206 into android-4.9-q Changes in 4.9.206 ASoC: compress: fix unsigned integer overflow check ASoC: kirkwood: fix external clock probe defer clk: samsung: exynos5420: Preserve PLL configuration during suspend/resume reset: fix reset_control_ops kerneldoc comment clk: at91: avoid sleeping early net: fec: add missed clk_disable_unprepare in remove can: peak_usb: report bus recovery as well can: c_can: D_CAN: c_can_chip_config(): perform a sofware reset on open watchdog: meson: Fix the wrong value of left time scripts/gdb: fix debugging modules compiled with hot/cold partitioning mac80211: fix station inactive_time shortly after boot block: drbd: remove a stray unlock in __drbd_send_protocol() pwm: bcm-iproc: Prevent unloading the driver module while in use scsi: lpfc: Fix dif and first burst use in write commands ARM: debug-imx: only define DEBUG_IMX_UART_PORT if needed ARM: dts: imx53-voipac-dmm-668: Fix memory node duplication parisc: Fix serio address output parisc: Fix HP SDC hpa address output arm64: mm: Prevent mismatched 52-bit VA support arm64: smp: Handle errors reported by the firmware PM / AVS: SmartReflex: NULL check before some freeing functions is not needed ARM: ks8695: fix section mismatch warning ACPI / LPSS: Ignore acpi_device_fix_up_power() return value crypto: user - support incremental algorithm dumps mwifiex: fix potential NULL dereference and use after free mwifiex: debugfs: correct histogram spacing, formatting rtl818x: fix potential use after free xfs: require both realtime inodes to mount ubi: Put MTD device after it is not used ubi: Do not drop UBI device reference before using microblaze: adjust the help to the real behavior microblaze: move "... is ready" messages to arch/microblaze/Makefile gpiolib: Fix return value of gpio_to_desc() stub if !GPIOLIB VSOCK: bind to random port for VMADDR_PORT_ANY mtd: rawnand: sunxi: Write pageprog related opcodes to WCMD_SET btrfs: only track ref_heads in delayed_ref_updates HID: intel-ish-hid: fixes incorrect error handling xen/pciback: Check dev_data before using it pinctrl: xway: fix gpio-hog related boot issues net/mlx5: Continue driver initialization despite debugfs failure KVM: s390: unregister debug feature on failing arch init pinctrl: sh-pfc: sh7264: Fix PFCR3 and PFCR0 register configuration pinctrl: sh-pfc: sh7734: Fix shifted values in IPSR10 HID: doc: fix wrong data structure reference for UHID_OUTPUT dm flakey: Properly corrupt multi-page bios. gfs2: take jdata unstuff into account in do_grow xfs: Align compat attrlist_by_handle with native implementation. xfs: Fix bulkstat compat ioctls on x32 userspace. IB/qib: Fix an error code in qib_sdma_verbs_send() powerpc/book3s/32: fix number of bats in p/v_block_mapped() powerpc/xmon: fix dump_segments() drivers/regulator: fix a missing check of return value serial: max310x: Fix tx_empty() callback openrisc: Fix broken paths to arch/or32 RDMA/srp: Propagate ib_post_send() failures to the SCSI mid-layer scsi: qla2xxx: deadlock by configfs_depend_item scsi: csiostor: fix incorrect dma device in case of vport ath6kl: Only use match sets when firmware supports it ath6kl: Fix off by one error in scan completion powerpc/prom: fix early DEBUG messages powerpc/mm: Make NULL pointer deferences explicit on bad page faults. powerpc/44x/bamboo: Fix PCI range vfio/spapr_tce: Get rid of possible infinite loop powerpc/powernv/eeh/npu: Fix uninitialized variables in opal_pci_eeh_freeze_status drbd: ignore "all zero" peer volume sizes in handshake drbd: reject attach of unsuitable uuids even if connected drbd: do not block when adjusting "disk-options" while IO is frozen drbd: fix print_st_err()'s prototype to match the definition regulator: tps65910: fix a missing check of return value powerpc/83xx: handle machine check caused by watchdog timer powerpc/pseries: Fix node leak in update_lmb_associativity_index() crypto: mxc-scc - fix build warnings on ARM64 pwm: clps711x: Fix period calculation net/net_namespace: Check the return value of register_pernet_subsys() um: Make GCOV depend on !KCOV net: stmicro: fix a missing check of clk_prepare net: dsa: bcm_sf2: Propagate error value from mdio_write atl1e: checking the status of atl1e_write_phy_reg tipc: fix a missing check of genlmsg_put net/wan/fsl_ucc_hdlc: Avoid double free in ucc_hdlc_probe() ocfs2: clear journal dirty flag after shutdown journal vmscan: return NODE_RECLAIM_NOSCAN in node_reclaim() when CONFIG_NUMA is n lib/genalloc.c: fix allocation of aligned buffer from non-aligned chunk lib/genalloc.c: use vzalloc_node() to allocate the bitmap drivers/base/platform.c: kmemleak ignore a known leak lib/genalloc.c: include vmalloc.h mtd: Check add_mtd_device() ret code tipc: fix memory leak in tipc_nl_compat_publ_dump net/core/neighbour: tell kmemleak about hash tables net/core/neighbour: fix kmemleak minimal reference count for hash tables sfc: suppress duplicate nvmem partition types in efx_ef10_mtd_probe ip_tunnel: Make none-tunnel-dst tunnel port work with lwtunnel decnet: fix DN_IFREQ_SIZE tipc: fix skb may be leaky in tipc_link_input sfc: initialise found bitmap in efx_ef10_mtd_probe net: fix possible overflow in __sk_mem_raise_allocated() sctp: don't compare hb_timer expire date before starting it net: dev: Use unsigned integer as an argument to left-shift iommu/amd: Fix NULL dereference bug in match_hid_uid scsi: libsas: Support SATA PHY connection rate unmatch fixing during discovery ACPI / APEI: Switch estatus pool to use vmalloc memory scsi: libsas: Check SMP PHY control function result powerpc/pseries/dlpar: Fix a missing check in dlpar_parse_cc_property() mtd: Remove a debug trace in mtdpart.c mm, gup: add missing refcount overflow checks on x86 and s390 clk: at91: fix update bit maps on CFG_MOR write staging: rtl8192e: fix potential use after free USB: serial: ftdi_sio: add device IDs for U-Blox C099-F9P mei: bus: prefix device names on bus with the bus name media: v4l2-ctrl: fix flags for DO_WHITE_BALANCE net: macb: fix error format in dev_err() pwm: Clear chip_data in pwm_put() media: atmel: atmel-isc: fix asd memory allocation macvlan: schedule bc_work even if error openvswitch: fix flow command message size slip: Fix use-after-free Read in slip_open openvswitch: drop unneeded BUG_ON() in ovs_flow_cmd_build_info() openvswitch: remove another BUG_ON() tipc: fix link name length check sctp: cache netns in sctp_ep_common net: sched: fix `tc -s class show` no bstats on class with nolock subqueues HID: core: check whether Usage Page item is after Usage ID items hwrng: stm32 - fix unbalanced pm_runtime_enable platform/x86: hp-wmi: Fix ACPI errors caused by too small buffer net: fec: fix clock count mis-match Linux 4.9.206 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
2019-12-05 15:46:10 +01:00
int rc;
kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
if (!kvm_s390_dbf)
return -ENOMEM;
if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
Merge 4.9.206 into android-4.9-q Changes in 4.9.206 ASoC: compress: fix unsigned integer overflow check ASoC: kirkwood: fix external clock probe defer clk: samsung: exynos5420: Preserve PLL configuration during suspend/resume reset: fix reset_control_ops kerneldoc comment clk: at91: avoid sleeping early net: fec: add missed clk_disable_unprepare in remove can: peak_usb: report bus recovery as well can: c_can: D_CAN: c_can_chip_config(): perform a sofware reset on open watchdog: meson: Fix the wrong value of left time scripts/gdb: fix debugging modules compiled with hot/cold partitioning mac80211: fix station inactive_time shortly after boot block: drbd: remove a stray unlock in __drbd_send_protocol() pwm: bcm-iproc: Prevent unloading the driver module while in use scsi: lpfc: Fix dif and first burst use in write commands ARM: debug-imx: only define DEBUG_IMX_UART_PORT if needed ARM: dts: imx53-voipac-dmm-668: Fix memory node duplication parisc: Fix serio address output parisc: Fix HP SDC hpa address output arm64: mm: Prevent mismatched 52-bit VA support arm64: smp: Handle errors reported by the firmware PM / AVS: SmartReflex: NULL check before some freeing functions is not needed ARM: ks8695: fix section mismatch warning ACPI / LPSS: Ignore acpi_device_fix_up_power() return value crypto: user - support incremental algorithm dumps mwifiex: fix potential NULL dereference and use after free mwifiex: debugfs: correct histogram spacing, formatting rtl818x: fix potential use after free xfs: require both realtime inodes to mount ubi: Put MTD device after it is not used ubi: Do not drop UBI device reference before using microblaze: adjust the help to the real behavior microblaze: move "... is ready" messages to arch/microblaze/Makefile gpiolib: Fix return value of gpio_to_desc() stub if !GPIOLIB VSOCK: bind to random port for VMADDR_PORT_ANY mtd: rawnand: sunxi: Write pageprog related opcodes to WCMD_SET btrfs: only track ref_heads in delayed_ref_updates HID: intel-ish-hid: fixes incorrect error handling xen/pciback: Check dev_data before using it pinctrl: xway: fix gpio-hog related boot issues net/mlx5: Continue driver initialization despite debugfs failure KVM: s390: unregister debug feature on failing arch init pinctrl: sh-pfc: sh7264: Fix PFCR3 and PFCR0 register configuration pinctrl: sh-pfc: sh7734: Fix shifted values in IPSR10 HID: doc: fix wrong data structure reference for UHID_OUTPUT dm flakey: Properly corrupt multi-page bios. gfs2: take jdata unstuff into account in do_grow xfs: Align compat attrlist_by_handle with native implementation. xfs: Fix bulkstat compat ioctls on x32 userspace. IB/qib: Fix an error code in qib_sdma_verbs_send() powerpc/book3s/32: fix number of bats in p/v_block_mapped() powerpc/xmon: fix dump_segments() drivers/regulator: fix a missing check of return value serial: max310x: Fix tx_empty() callback openrisc: Fix broken paths to arch/or32 RDMA/srp: Propagate ib_post_send() failures to the SCSI mid-layer scsi: qla2xxx: deadlock by configfs_depend_item scsi: csiostor: fix incorrect dma device in case of vport ath6kl: Only use match sets when firmware supports it ath6kl: Fix off by one error in scan completion powerpc/prom: fix early DEBUG messages powerpc/mm: Make NULL pointer deferences explicit on bad page faults. powerpc/44x/bamboo: Fix PCI range vfio/spapr_tce: Get rid of possible infinite loop powerpc/powernv/eeh/npu: Fix uninitialized variables in opal_pci_eeh_freeze_status drbd: ignore "all zero" peer volume sizes in handshake drbd: reject attach of unsuitable uuids even if connected drbd: do not block when adjusting "disk-options" while IO is frozen drbd: fix print_st_err()'s prototype to match the definition regulator: tps65910: fix a missing check of return value powerpc/83xx: handle machine check caused by watchdog timer powerpc/pseries: Fix node leak in update_lmb_associativity_index() crypto: mxc-scc - fix build warnings on ARM64 pwm: clps711x: Fix period calculation net/net_namespace: Check the return value of register_pernet_subsys() um: Make GCOV depend on !KCOV net: stmicro: fix a missing check of clk_prepare net: dsa: bcm_sf2: Propagate error value from mdio_write atl1e: checking the status of atl1e_write_phy_reg tipc: fix a missing check of genlmsg_put net/wan/fsl_ucc_hdlc: Avoid double free in ucc_hdlc_probe() ocfs2: clear journal dirty flag after shutdown journal vmscan: return NODE_RECLAIM_NOSCAN in node_reclaim() when CONFIG_NUMA is n lib/genalloc.c: fix allocation of aligned buffer from non-aligned chunk lib/genalloc.c: use vzalloc_node() to allocate the bitmap drivers/base/platform.c: kmemleak ignore a known leak lib/genalloc.c: include vmalloc.h mtd: Check add_mtd_device() ret code tipc: fix memory leak in tipc_nl_compat_publ_dump net/core/neighbour: tell kmemleak about hash tables net/core/neighbour: fix kmemleak minimal reference count for hash tables sfc: suppress duplicate nvmem partition types in efx_ef10_mtd_probe ip_tunnel: Make none-tunnel-dst tunnel port work with lwtunnel decnet: fix DN_IFREQ_SIZE tipc: fix skb may be leaky in tipc_link_input sfc: initialise found bitmap in efx_ef10_mtd_probe net: fix possible overflow in __sk_mem_raise_allocated() sctp: don't compare hb_timer expire date before starting it net: dev: Use unsigned integer as an argument to left-shift iommu/amd: Fix NULL dereference bug in match_hid_uid scsi: libsas: Support SATA PHY connection rate unmatch fixing during discovery ACPI / APEI: Switch estatus pool to use vmalloc memory scsi: libsas: Check SMP PHY control function result powerpc/pseries/dlpar: Fix a missing check in dlpar_parse_cc_property() mtd: Remove a debug trace in mtdpart.c mm, gup: add missing refcount overflow checks on x86 and s390 clk: at91: fix update bit maps on CFG_MOR write staging: rtl8192e: fix potential use after free USB: serial: ftdi_sio: add device IDs for U-Blox C099-F9P mei: bus: prefix device names on bus with the bus name media: v4l2-ctrl: fix flags for DO_WHITE_BALANCE net: macb: fix error format in dev_err() pwm: Clear chip_data in pwm_put() media: atmel: atmel-isc: fix asd memory allocation macvlan: schedule bc_work even if error openvswitch: fix flow command message size slip: Fix use-after-free Read in slip_open openvswitch: drop unneeded BUG_ON() in ovs_flow_cmd_build_info() openvswitch: remove another BUG_ON() tipc: fix link name length check sctp: cache netns in sctp_ep_common net: sched: fix `tc -s class show` no bstats on class with nolock subqueues HID: core: check whether Usage Page item is after Usage ID items hwrng: stm32 - fix unbalanced pm_runtime_enable platform/x86: hp-wmi: Fix ACPI errors caused by too small buffer net: fec: fix clock count mis-match Linux 4.9.206 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
2019-12-05 15:46:10 +01:00
rc = -ENOMEM;
goto out_debug_unreg;
}
kvm_s390_cpu_feat_init();
/* Register floating interrupt controller interface. */
Merge 4.9.206 into android-4.9-q Changes in 4.9.206 ASoC: compress: fix unsigned integer overflow check ASoC: kirkwood: fix external clock probe defer clk: samsung: exynos5420: Preserve PLL configuration during suspend/resume reset: fix reset_control_ops kerneldoc comment clk: at91: avoid sleeping early net: fec: add missed clk_disable_unprepare in remove can: peak_usb: report bus recovery as well can: c_can: D_CAN: c_can_chip_config(): perform a sofware reset on open watchdog: meson: Fix the wrong value of left time scripts/gdb: fix debugging modules compiled with hot/cold partitioning mac80211: fix station inactive_time shortly after boot block: drbd: remove a stray unlock in __drbd_send_protocol() pwm: bcm-iproc: Prevent unloading the driver module while in use scsi: lpfc: Fix dif and first burst use in write commands ARM: debug-imx: only define DEBUG_IMX_UART_PORT if needed ARM: dts: imx53-voipac-dmm-668: Fix memory node duplication parisc: Fix serio address output parisc: Fix HP SDC hpa address output arm64: mm: Prevent mismatched 52-bit VA support arm64: smp: Handle errors reported by the firmware PM / AVS: SmartReflex: NULL check before some freeing functions is not needed ARM: ks8695: fix section mismatch warning ACPI / LPSS: Ignore acpi_device_fix_up_power() return value crypto: user - support incremental algorithm dumps mwifiex: fix potential NULL dereference and use after free mwifiex: debugfs: correct histogram spacing, formatting rtl818x: fix potential use after free xfs: require both realtime inodes to mount ubi: Put MTD device after it is not used ubi: Do not drop UBI device reference before using microblaze: adjust the help to the real behavior microblaze: move "... is ready" messages to arch/microblaze/Makefile gpiolib: Fix return value of gpio_to_desc() stub if !GPIOLIB VSOCK: bind to random port for VMADDR_PORT_ANY mtd: rawnand: sunxi: Write pageprog related opcodes to WCMD_SET btrfs: only track ref_heads in delayed_ref_updates HID: intel-ish-hid: fixes incorrect error handling xen/pciback: Check dev_data before using it pinctrl: xway: fix gpio-hog related boot issues net/mlx5: Continue driver initialization despite debugfs failure KVM: s390: unregister debug feature on failing arch init pinctrl: sh-pfc: sh7264: Fix PFCR3 and PFCR0 register configuration pinctrl: sh-pfc: sh7734: Fix shifted values in IPSR10 HID: doc: fix wrong data structure reference for UHID_OUTPUT dm flakey: Properly corrupt multi-page bios. gfs2: take jdata unstuff into account in do_grow xfs: Align compat attrlist_by_handle with native implementation. xfs: Fix bulkstat compat ioctls on x32 userspace. IB/qib: Fix an error code in qib_sdma_verbs_send() powerpc/book3s/32: fix number of bats in p/v_block_mapped() powerpc/xmon: fix dump_segments() drivers/regulator: fix a missing check of return value serial: max310x: Fix tx_empty() callback openrisc: Fix broken paths to arch/or32 RDMA/srp: Propagate ib_post_send() failures to the SCSI mid-layer scsi: qla2xxx: deadlock by configfs_depend_item scsi: csiostor: fix incorrect dma device in case of vport ath6kl: Only use match sets when firmware supports it ath6kl: Fix off by one error in scan completion powerpc/prom: fix early DEBUG messages powerpc/mm: Make NULL pointer deferences explicit on bad page faults. powerpc/44x/bamboo: Fix PCI range vfio/spapr_tce: Get rid of possible infinite loop powerpc/powernv/eeh/npu: Fix uninitialized variables in opal_pci_eeh_freeze_status drbd: ignore "all zero" peer volume sizes in handshake drbd: reject attach of unsuitable uuids even if connected drbd: do not block when adjusting "disk-options" while IO is frozen drbd: fix print_st_err()'s prototype to match the definition regulator: tps65910: fix a missing check of return value powerpc/83xx: handle machine check caused by watchdog timer powerpc/pseries: Fix node leak in update_lmb_associativity_index() crypto: mxc-scc - fix build warnings on ARM64 pwm: clps711x: Fix period calculation net/net_namespace: Check the return value of register_pernet_subsys() um: Make GCOV depend on !KCOV net: stmicro: fix a missing check of clk_prepare net: dsa: bcm_sf2: Propagate error value from mdio_write atl1e: checking the status of atl1e_write_phy_reg tipc: fix a missing check of genlmsg_put net/wan/fsl_ucc_hdlc: Avoid double free in ucc_hdlc_probe() ocfs2: clear journal dirty flag after shutdown journal vmscan: return NODE_RECLAIM_NOSCAN in node_reclaim() when CONFIG_NUMA is n lib/genalloc.c: fix allocation of aligned buffer from non-aligned chunk lib/genalloc.c: use vzalloc_node() to allocate the bitmap drivers/base/platform.c: kmemleak ignore a known leak lib/genalloc.c: include vmalloc.h mtd: Check add_mtd_device() ret code tipc: fix memory leak in tipc_nl_compat_publ_dump net/core/neighbour: tell kmemleak about hash tables net/core/neighbour: fix kmemleak minimal reference count for hash tables sfc: suppress duplicate nvmem partition types in efx_ef10_mtd_probe ip_tunnel: Make none-tunnel-dst tunnel port work with lwtunnel decnet: fix DN_IFREQ_SIZE tipc: fix skb may be leaky in tipc_link_input sfc: initialise found bitmap in efx_ef10_mtd_probe net: fix possible overflow in __sk_mem_raise_allocated() sctp: don't compare hb_timer expire date before starting it net: dev: Use unsigned integer as an argument to left-shift iommu/amd: Fix NULL dereference bug in match_hid_uid scsi: libsas: Support SATA PHY connection rate unmatch fixing during discovery ACPI / APEI: Switch estatus pool to use vmalloc memory scsi: libsas: Check SMP PHY control function result powerpc/pseries/dlpar: Fix a missing check in dlpar_parse_cc_property() mtd: Remove a debug trace in mtdpart.c mm, gup: add missing refcount overflow checks on x86 and s390 clk: at91: fix update bit maps on CFG_MOR write staging: rtl8192e: fix potential use after free USB: serial: ftdi_sio: add device IDs for U-Blox C099-F9P mei: bus: prefix device names on bus with the bus name media: v4l2-ctrl: fix flags for DO_WHITE_BALANCE net: macb: fix error format in dev_err() pwm: Clear chip_data in pwm_put() media: atmel: atmel-isc: fix asd memory allocation macvlan: schedule bc_work even if error openvswitch: fix flow command message size slip: Fix use-after-free Read in slip_open openvswitch: drop unneeded BUG_ON() in ovs_flow_cmd_build_info() openvswitch: remove another BUG_ON() tipc: fix link name length check sctp: cache netns in sctp_ep_common net: sched: fix `tc -s class show` no bstats on class with nolock subqueues HID: core: check whether Usage Page item is after Usage ID items hwrng: stm32 - fix unbalanced pm_runtime_enable platform/x86: hp-wmi: Fix ACPI errors caused by too small buffer net: fec: fix clock count mis-match Linux 4.9.206 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
2019-12-05 15:46:10 +01:00
rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
if (rc) {
pr_err("Failed to register FLIC rc=%d\n", rc);
goto out_debug_unreg;
}
return 0;
out_debug_unreg:
debug_unregister(kvm_s390_dbf);
return rc;
}
void kvm_arch_exit(void)
{
debug_unregister(kvm_s390_dbf);
}
/* Section: device related */
long kvm_arch_dev_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
if (ioctl == KVM_S390_ENABLE_SIE)
return s390_enable_sie();
return -EINVAL;
}
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
{
int r;
switch (ext) {
case KVM_CAP_S390_PSW:
case KVM_CAP_S390_GMAP:
case KVM_CAP_SYNC_MMU:
#ifdef CONFIG_KVM_S390_UCONTROL
case KVM_CAP_S390_UCONTROL:
#endif
case KVM_CAP_ASYNC_PF:
case KVM_CAP_SYNC_REGS:
case KVM_CAP_ONE_REG:
case KVM_CAP_ENABLE_CAP:
case KVM_CAP_S390_CSS_SUPPORT:
case KVM_CAP_IOEVENTFD:
case KVM_CAP_DEVICE_CTRL:
case KVM_CAP_ENABLE_CAP_VM:
case KVM_CAP_S390_IRQCHIP:
case KVM_CAP_VM_ATTRIBUTES:
case KVM_CAP_MP_STATE:
case KVM_CAP_S390_INJECT_IRQ:
case KVM_CAP_S390_USER_SIGP:
case KVM_CAP_S390_USER_STSI:
case KVM_CAP_S390_SKEYS:
case KVM_CAP_S390_IRQ_STATE:
case KVM_CAP_S390_USER_INSTR0:
r = 1;
break;
case KVM_CAP_S390_MEM_OP:
r = MEM_OP_MAX_SIZE;
break;
case KVM_CAP_NR_VCPUS:
case KVM_CAP_MAX_VCPUS:
r = KVM_S390_BSCA_CPU_SLOTS;
if (!kvm_s390_use_sca_entries())
r = KVM_MAX_VCPUS;
else if (sclp.has_esca && sclp.has_64bscao)
r = KVM_S390_ESCA_CPU_SLOTS;
break;
case KVM_CAP_NR_MEMSLOTS:
r = KVM_USER_MEM_SLOTS;
break;
case KVM_CAP_S390_COW:
r = MACHINE_HAS_ESOP;
break;
case KVM_CAP_S390_VECTOR_REGISTERS:
r = MACHINE_HAS_VX;
break;
case KVM_CAP_S390_RI:
r = test_facility(64);
break;
case KVM_CAP_S390_BPB:
r = test_facility(82);
break;
default:
r = 0;
}
return r;
}
static void kvm_s390_sync_dirty_log(struct kvm *kvm,
struct kvm_memory_slot *memslot)
{
gfn_t cur_gfn, last_gfn;
unsigned long address;
struct gmap *gmap = kvm->arch.gmap;
/* Loop over all guest pages */
last_gfn = memslot->base_gfn + memslot->npages;
for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
address = gfn_to_hva_memslot(memslot, cur_gfn);
if (test_and_clear_guest_dirty(gmap->mm, address))
mark_page_dirty(kvm, cur_gfn);
if (fatal_signal_pending(current))
return;
cond_resched();
}
}
/* Section: vm related */
static void sca_del_vcpu(struct kvm_vcpu *vcpu);
/*
* Get (and clear) the dirty memory log for a memory slot.
*/
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
struct kvm_dirty_log *log)
{
int r;
unsigned long n;
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
int is_dirty = 0;
if (kvm_is_ucontrol(kvm))
return -EINVAL;
mutex_lock(&kvm->slots_lock);
r = -EINVAL;
if (log->slot >= KVM_USER_MEM_SLOTS)
goto out;
slots = kvm_memslots(kvm);
memslot = id_to_memslot(slots, log->slot);
r = -ENOENT;
if (!memslot->dirty_bitmap)
goto out;
kvm_s390_sync_dirty_log(kvm, memslot);
r = kvm_get_dirty_log(kvm, log, &is_dirty);
if (r)
goto out;
/* Clear the dirty log */
if (is_dirty) {
n = kvm_dirty_bitmap_bytes(memslot);
memset(memslot->dirty_bitmap, 0, n);
}
r = 0;
out:
mutex_unlock(&kvm->slots_lock);
return r;
}
static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
{
unsigned int i;
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm) {
kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
}
}
static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
{
int r;
if (cap->flags)
return -EINVAL;
switch (cap->cap) {
case KVM_CAP_S390_IRQCHIP:
VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
kvm->arch.use_irqchip = 1;
r = 0;
break;
case KVM_CAP_S390_USER_SIGP:
VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
kvm->arch.user_sigp = 1;
r = 0;
break;
case KVM_CAP_S390_VECTOR_REGISTERS:
mutex_lock(&kvm->lock);
if (kvm->created_vcpus) {
r = -EBUSY;
} else if (MACHINE_HAS_VX) {
set_kvm_facility(kvm->arch.model.fac_mask, 129);
set_kvm_facility(kvm->arch.model.fac_list, 129);
r = 0;
} else
r = -EINVAL;
mutex_unlock(&kvm->lock);
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
r ? "(not available)" : "(success)");
break;
case KVM_CAP_S390_RI:
r = -EINVAL;
mutex_lock(&kvm->lock);
if (kvm->created_vcpus) {
r = -EBUSY;
} else if (test_facility(64)) {
set_kvm_facility(kvm->arch.model.fac_mask, 64);
set_kvm_facility(kvm->arch.model.fac_list, 64);
r = 0;
}
mutex_unlock(&kvm->lock);
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
r ? "(not available)" : "(success)");
break;
case KVM_CAP_S390_USER_STSI:
VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
kvm->arch.user_stsi = 1;
r = 0;
break;
case KVM_CAP_S390_USER_INSTR0:
VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
kvm->arch.user_instr0 = 1;
icpt_operexc_on_all_vcpus(kvm);
r = 0;
break;
default:
r = -EINVAL;
break;
}
return r;
}
static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
{
int ret;
switch (attr->attr) {
case KVM_S390_VM_MEM_LIMIT_SIZE:
ret = 0;
VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
kvm->arch.mem_limit);
if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
ret = -EFAULT;
break;
default:
ret = -ENXIO;
break;
}
return ret;
}
static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
{
int ret;
unsigned int idx;
switch (attr->attr) {
case KVM_S390_VM_MEM_ENABLE_CMMA:
ret = -ENXIO;
if (!sclp.has_cmma)
break;
ret = -EBUSY;
VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
mutex_lock(&kvm->lock);
if (!kvm->created_vcpus) {
kvm->arch.use_cmma = 1;
ret = 0;
}
mutex_unlock(&kvm->lock);
break;
case KVM_S390_VM_MEM_CLR_CMMA:
ret = -ENXIO;
if (!sclp.has_cmma)
break;
ret = -EINVAL;
if (!kvm->arch.use_cmma)
break;
VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
mutex_lock(&kvm->lock);
idx = srcu_read_lock(&kvm->srcu);
s390_reset_cmma(kvm->arch.gmap->mm);
srcu_read_unlock(&kvm->srcu, idx);
mutex_unlock(&kvm->lock);
ret = 0;
break;
case KVM_S390_VM_MEM_LIMIT_SIZE: {
unsigned long new_limit;
if (kvm_is_ucontrol(kvm))
return -EINVAL;
if (get_user(new_limit, (u64 __user *)attr->addr))
return -EFAULT;
if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
new_limit > kvm->arch.mem_limit)
return -E2BIG;
if (!new_limit)
return -EINVAL;
/* gmap_create takes last usable address */
if (new_limit != KVM_S390_NO_MEM_LIMIT)
new_limit -= 1;
ret = -EBUSY;
mutex_lock(&kvm->lock);
if (!kvm->created_vcpus) {
/* gmap_create will round the limit up */
struct gmap *new = gmap_create(current->mm, new_limit);
if (!new) {
ret = -ENOMEM;
} else {
gmap_remove(kvm->arch.gmap);
new->private = kvm;
kvm->arch.gmap = new;
ret = 0;
}
}
mutex_unlock(&kvm->lock);
VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
(void *) kvm->arch.gmap->asce);
break;
}
default:
ret = -ENXIO;
break;
}
return ret;
}
static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
{
struct kvm_vcpu *vcpu;
int i;
if (!test_kvm_facility(kvm, 76))
return -EINVAL;
mutex_lock(&kvm->lock);
switch (attr->attr) {
case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
get_random_bytes(
kvm->arch.crypto.crycb->aes_wrapping_key_mask,
sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
kvm->arch.crypto.aes_kw = 1;
VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
break;
case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
get_random_bytes(
kvm->arch.crypto.crycb->dea_wrapping_key_mask,
sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
kvm->arch.crypto.dea_kw = 1;
VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
break;
case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
kvm->arch.crypto.aes_kw = 0;
memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
break;
case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
kvm->arch.crypto.dea_kw = 0;
memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
break;
default:
mutex_unlock(&kvm->lock);
return -ENXIO;
}
kvm_for_each_vcpu(i, vcpu, kvm) {
kvm_s390_vcpu_crypto_setup(vcpu);
exit_sie(vcpu);
}
mutex_unlock(&kvm->lock);
return 0;
}
static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
{
u8 gtod_high;
if (copy_from_user(&gtod_high, (void __user *)attr->addr,
sizeof(gtod_high)))
return -EFAULT;
if (gtod_high != 0)
return -EINVAL;
VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
return 0;
}
static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
{
u64 gtod;
if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
return -EFAULT;
kvm_s390_set_tod_clock(kvm, gtod);
VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
return 0;
}
static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
{
int ret;
if (attr->flags)
return -EINVAL;
switch (attr->attr) {
case KVM_S390_VM_TOD_HIGH:
ret = kvm_s390_set_tod_high(kvm, attr);
break;
case KVM_S390_VM_TOD_LOW:
ret = kvm_s390_set_tod_low(kvm, attr);
break;
default:
ret = -ENXIO;
break;
}
return ret;
}
static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
{
u8 gtod_high = 0;
if (copy_to_user((void __user *)attr->addr, &gtod_high,
sizeof(gtod_high)))
return -EFAULT;
VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
return 0;
}
static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
{
u64 gtod;
gtod = kvm_s390_get_tod_clock_fast(kvm);
if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
return -EFAULT;
VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
return 0;
}
static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
{
int ret;
if (attr->flags)
return -EINVAL;
switch (attr->attr) {
case KVM_S390_VM_TOD_HIGH:
ret = kvm_s390_get_tod_high(kvm, attr);
break;
case KVM_S390_VM_TOD_LOW:
ret = kvm_s390_get_tod_low(kvm, attr);
break;
default:
ret = -ENXIO;
break;
}
return ret;
}
static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
{
struct kvm_s390_vm_cpu_processor *proc;
u16 lowest_ibc, unblocked_ibc;
int ret = 0;
mutex_lock(&kvm->lock);
if (kvm->created_vcpus) {
ret = -EBUSY;
goto out;
}
proc = kzalloc(sizeof(*proc), GFP_KERNEL);
if (!proc) {
ret = -ENOMEM;
goto out;
}
if (!copy_from_user(proc, (void __user *)attr->addr,
sizeof(*proc))) {
kvm->arch.model.cpuid = proc->cpuid;
lowest_ibc = sclp.ibc >> 16 & 0xfff;
unblocked_ibc = sclp.ibc & 0xfff;
if (lowest_ibc && proc->ibc) {
if (proc->ibc > unblocked_ibc)
kvm->arch.model.ibc = unblocked_ibc;
else if (proc->ibc < lowest_ibc)
kvm->arch.model.ibc = lowest_ibc;
else
kvm->arch.model.ibc = proc->ibc;
}
memcpy(kvm->arch.model.fac_list, proc->fac_list,
S390_ARCH_FAC_LIST_SIZE_BYTE);
} else
ret = -EFAULT;
kfree(proc);
out:
mutex_unlock(&kvm->lock);
return ret;
}
static int kvm_s390_set_processor_feat(struct kvm *kvm,
struct kvm_device_attr *attr)
{
struct kvm_s390_vm_cpu_feat data;
int ret = -EBUSY;
if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
return -EFAULT;
if (!bitmap_subset((unsigned long *) data.feat,
kvm_s390_available_cpu_feat,
KVM_S390_VM_CPU_FEAT_NR_BITS))
return -EINVAL;
mutex_lock(&kvm->lock);
if (!atomic_read(&kvm->online_vcpus)) {
bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
KVM_S390_VM_CPU_FEAT_NR_BITS);
ret = 0;
}
mutex_unlock(&kvm->lock);
return ret;
}
static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
struct kvm_device_attr *attr)
{
/*
* Once supported by kernel + hw, we have to store the subfunctions
* in kvm->arch and remember that user space configured them.
*/
return -ENXIO;
}
static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
{
int ret = -ENXIO;
switch (attr->attr) {
case KVM_S390_VM_CPU_PROCESSOR:
ret = kvm_s390_set_processor(kvm, attr);
break;
case KVM_S390_VM_CPU_PROCESSOR_FEAT:
ret = kvm_s390_set_processor_feat(kvm, attr);
break;
case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
ret = kvm_s390_set_processor_subfunc(kvm, attr);
break;
}
return ret;
}
static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
{
struct kvm_s390_vm_cpu_processor *proc;
int ret = 0;
proc = kzalloc(sizeof(*proc), GFP_KERNEL);
if (!proc) {
ret = -ENOMEM;
goto out;
}
proc->cpuid = kvm->arch.model.cpuid;
proc->ibc = kvm->arch.model.ibc;
memcpy(&proc->fac_list, kvm->arch.model.fac_list,
S390_ARCH_FAC_LIST_SIZE_BYTE);
if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
ret = -EFAULT;
kfree(proc);
out:
return ret;
}
static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
{
struct kvm_s390_vm_cpu_machine *mach;
int ret = 0;
mach = kzalloc(sizeof(*mach), GFP_KERNEL);
if (!mach) {
ret = -ENOMEM;
goto out;
}
get_cpu_id((struct cpuid *) &mach->cpuid);
mach->ibc = sclp.ibc;
memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
S390_ARCH_FAC_LIST_SIZE_BYTE);
memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
sizeof(S390_lowcore.stfle_fac_list));
if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
ret = -EFAULT;
kfree(mach);
out:
return ret;
}
static int kvm_s390_get_processor_feat(struct kvm *kvm,
struct kvm_device_attr *attr)
{
struct kvm_s390_vm_cpu_feat data;
bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
KVM_S390_VM_CPU_FEAT_NR_BITS);
if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
return -EFAULT;
return 0;
}
static int kvm_s390_get_machine_feat(struct kvm *kvm,
struct kvm_device_attr *attr)
{
struct kvm_s390_vm_cpu_feat data;
bitmap_copy((unsigned long *) data.feat,
kvm_s390_available_cpu_feat,
KVM_S390_VM_CPU_FEAT_NR_BITS);
if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
return -EFAULT;
return 0;
}
static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
struct kvm_device_attr *attr)
{
/*
* Once we can actually configure subfunctions (kernel + hw support),
* we have to check if they were already set by user space, if so copy
* them from kvm->arch.
*/
return -ENXIO;
}
static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
struct kvm_device_attr *attr)
{
if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
sizeof(struct kvm_s390_vm_cpu_subfunc)))
return -EFAULT;
return 0;
}
static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
{
int ret = -ENXIO;
switch (attr->attr) {
case KVM_S390_VM_CPU_PROCESSOR:
ret = kvm_s390_get_processor(kvm, attr);
break;
case KVM_S390_VM_CPU_MACHINE:
ret = kvm_s390_get_machine(kvm, attr);
break;
case KVM_S390_VM_CPU_PROCESSOR_FEAT:
ret = kvm_s390_get_processor_feat(kvm, attr);
break;
case KVM_S390_VM_CPU_MACHINE_FEAT:
ret = kvm_s390_get_machine_feat(kvm, attr);
break;
case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
ret = kvm_s390_get_processor_subfunc(kvm, attr);
break;
case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
ret = kvm_s390_get_machine_subfunc(kvm, attr);
break;
}
return ret;
}
static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
{
int ret;
switch (attr->group) {
case KVM_S390_VM_MEM_CTRL:
ret = kvm_s390_set_mem_control(kvm, attr);
break;
case KVM_S390_VM_TOD:
ret = kvm_s390_set_tod(kvm, attr);
break;
case KVM_S390_VM_CPU_MODEL:
ret = kvm_s390_set_cpu_model(kvm, attr);
break;
case KVM_S390_VM_CRYPTO:
ret = kvm_s390_vm_set_crypto(kvm, attr);
break;
default:
ret = -ENXIO;
break;
}
return ret;
}
static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
{
int ret;
switch (attr->group) {
case KVM_S390_VM_MEM_CTRL:
ret = kvm_s390_get_mem_control(kvm, attr);
break;
case KVM_S390_VM_TOD:
ret = kvm_s390_get_tod(kvm, attr);
break;
case KVM_S390_VM_CPU_MODEL:
ret = kvm_s390_get_cpu_model(kvm, attr);
break;
default:
ret = -ENXIO;
break;
}
return ret;
}
static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
{
int ret;
switch (attr->group) {
case KVM_S390_VM_MEM_CTRL:
switch (attr->attr) {
case KVM_S390_VM_MEM_ENABLE_CMMA:
case KVM_S390_VM_MEM_CLR_CMMA:
ret = sclp.has_cmma ? 0 : -ENXIO;
break;
case KVM_S390_VM_MEM_LIMIT_SIZE:
ret = 0;
break;
default:
ret = -ENXIO;
break;
}
break;
case KVM_S390_VM_TOD:
switch (attr->attr) {
case KVM_S390_VM_TOD_LOW:
case KVM_S390_VM_TOD_HIGH:
ret = 0;
break;
default:
ret = -ENXIO;
break;
}
break;
case KVM_S390_VM_CPU_MODEL:
switch (attr->attr) {
case KVM_S390_VM_CPU_PROCESSOR:
case KVM_S390_VM_CPU_MACHINE:
case KVM_S390_VM_CPU_PROCESSOR_FEAT:
case KVM_S390_VM_CPU_MACHINE_FEAT:
case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
ret = 0;
break;
/* configuring subfunctions is not supported yet */
case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
default:
ret = -ENXIO;
break;
}
break;
case KVM_S390_VM_CRYPTO:
switch (attr->attr) {
case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
ret = 0;
break;
default:
ret = -ENXIO;
break;
}
break;
default:
ret = -ENXIO;
break;
}
return ret;
}
static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
{
uint8_t *keys;
uint64_t hva;
int i, r = 0;
if (args->flags != 0)
return -EINVAL;
/* Is this guest using storage keys? */
if (!mm_use_skey(current->mm))
return KVM_S390_GET_SKEYS_NONE;
/* Enforce sane limit on memory allocation */
if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
return -EINVAL;
keys = kmalloc_array(args->count, sizeof(uint8_t),
GFP_KERNEL | __GFP_NOWARN);
if (!keys)
keys = vmalloc(sizeof(uint8_t) * args->count);
if (!keys)
return -ENOMEM;
down_read(&current->mm->mmap_sem);
for (i = 0; i < args->count; i++) {
hva = gfn_to_hva(kvm, args->start_gfn + i);
if (kvm_is_error_hva(hva)) {
r = -EFAULT;
break;
}
r = get_guest_storage_key(current->mm, hva, &keys[i]);
if (r)
break;
}
up_read(&current->mm->mmap_sem);
if (!r) {
r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
sizeof(uint8_t) * args->count);
if (r)
r = -EFAULT;
}
kvfree(keys);
return r;
}
static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
{
uint8_t *keys;
uint64_t hva;
int i, r = 0;
if (args->flags != 0)
return -EINVAL;
/* Enforce sane limit on memory allocation */
if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
return -EINVAL;
keys = kmalloc_array(args->count, sizeof(uint8_t),
GFP_KERNEL | __GFP_NOWARN);
if (!keys)
keys = vmalloc(sizeof(uint8_t) * args->count);
if (!keys)
return -ENOMEM;
r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
sizeof(uint8_t) * args->count);
if (r) {
r = -EFAULT;
goto out;
}
/* Enable storage key handling for the guest */
r = s390_enable_skey();
if (r)
goto out;
down_read(&current->mm->mmap_sem);
for (i = 0; i < args->count; i++) {
hva = gfn_to_hva(kvm, args->start_gfn + i);
if (kvm_is_error_hva(hva)) {
r = -EFAULT;
break;
}
/* Lowest order bit is reserved */
if (keys[i] & 0x01) {
r = -EINVAL;
break;
}
r = set_guest_storage_key(current->mm, hva, keys[i], 0);
if (r)
break;
}
up_read(&current->mm->mmap_sem);
out:
kvfree(keys);
return r;
}
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
struct kvm *kvm = filp->private_data;
void __user *argp = (void __user *)arg;
struct kvm_device_attr attr;
int r;
switch (ioctl) {
case KVM_S390_INTERRUPT: {
struct kvm_s390_interrupt s390int;
r = -EFAULT;
if (copy_from_user(&s390int, argp, sizeof(s390int)))
break;
r = kvm_s390_inject_vm(kvm, &s390int);
break;
}
case KVM_ENABLE_CAP: {
struct kvm_enable_cap cap;
r = -EFAULT;
if (copy_from_user(&cap, argp, sizeof(cap)))
break;
r = kvm_vm_ioctl_enable_cap(kvm, &cap);
break;
}
case KVM_CREATE_IRQCHIP: {
struct kvm_irq_routing_entry routing;
r = -EINVAL;
if (kvm->arch.use_irqchip) {
/* Set up dummy routing. */
memset(&routing, 0, sizeof(routing));
r = kvm_set_irq_routing(kvm, &routing, 0, 0);
}
break;
}
case KVM_SET_DEVICE_ATTR: {
r = -EFAULT;
if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
break;
r = kvm_s390_vm_set_attr(kvm, &attr);
break;
}
case KVM_GET_DEVICE_ATTR: {
r = -EFAULT;
if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
break;
r = kvm_s390_vm_get_attr(kvm, &attr);
break;
}
case KVM_HAS_DEVICE_ATTR: {
r = -EFAULT;
if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
break;
r = kvm_s390_vm_has_attr(kvm, &attr);
break;
}
case KVM_S390_GET_SKEYS: {
struct kvm_s390_skeys args;
r = -EFAULT;
if (copy_from_user(&args, argp,
sizeof(struct kvm_s390_skeys)))
break;
r = kvm_s390_get_skeys(kvm, &args);
break;
}
case KVM_S390_SET_SKEYS: {
struct kvm_s390_skeys args;
r = -EFAULT;
if (copy_from_user(&args, argp,
sizeof(struct kvm_s390_skeys)))
break;
r = kvm_s390_set_skeys(kvm, &args);
break;
}
default:
r = -ENOTTY;
}
return r;
}
static int kvm_s390_query_ap_config(u8 *config)
{
u32 fcn_code = 0x04000000UL;
u32 cc = 0;
memset(config, 0, 128);
asm volatile(
"lgr 0,%1\n"
"lgr 2,%2\n"
".long 0xb2af0000\n" /* PQAP(QCI) */
"0: ipm %0\n"
"srl %0,28\n"
"1:\n"
EX_TABLE(0b, 1b)
: "+r" (cc)
: "r" (fcn_code), "r" (config)
: "cc", "0", "2", "memory"
);
return cc;
}
static int kvm_s390_apxa_installed(void)
{
u8 config[128];
int cc;
if (test_facility(12)) {
cc = kvm_s390_query_ap_config(config);
if (cc)
pr_err("PQAP(QCI) failed with cc=%d", cc);
else
return config[0] & 0x40;
}
return 0;
}
static void kvm_s390_set_crycb_format(struct kvm *kvm)
{
kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
if (kvm_s390_apxa_installed())
kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
else
kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
}
static u64 kvm_s390_get_initial_cpuid(void)
{
struct cpuid cpuid;
get_cpu_id(&cpuid);
cpuid.version = 0xff;
return *((u64 *) &cpuid);
}
static void kvm_s390_crypto_init(struct kvm *kvm)
{
if (!test_kvm_facility(kvm, 76))
return;
kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
kvm_s390_set_crycb_format(kvm);
/* Enable AES/DEA protected key functions by default */
kvm->arch.crypto.aes_kw = 1;
kvm->arch.crypto.dea_kw = 1;
get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
}
static void sca_dispose(struct kvm *kvm)
{
if (kvm->arch.use_esca)
free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
else
free_page((unsigned long)(kvm->arch.sca));
kvm->arch.sca = NULL;
}
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
gfp_t alloc_flags = GFP_KERNEL;
int i, rc;
char debug_name[16];
static unsigned long sca_offset;
rc = -EINVAL;
#ifdef CONFIG_KVM_S390_UCONTROL
if (type & ~KVM_VM_S390_UCONTROL)
goto out_err;
if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
goto out_err;
#else
if (type)
goto out_err;
#endif
rc = s390_enable_sie();
if (rc)
goto out_err;
rc = -ENOMEM;
ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
kvm->arch.use_esca = 0; /* start with basic SCA */
if (!sclp.has_64bscao)
alloc_flags |= GFP_DMA;
rwlock_init(&kvm->arch.sca_lock);
kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
if (!kvm->arch.sca)
goto out_err;
Merge 4.9.202 into android-4.9-q Changes in 4.9.202 kvm: mmu: Don't read PDPTEs when paging is not enabled Bluetooth: hci_ldisc: Postpone HCI_UART_PROTO_READY bit set in hci_uart_set_proto() MIPS: BCM63XX: fix switch core reset on BCM6368 usb: gadget: core: unmap request from DMA only if previously mapped KVM: x86: use Intel speculation bugs and features as derived in generic x86 code x86/msr: Add the IA32_TSX_CTRL MSR x86/cpu: Add a helper function x86_read_arch_cap_msr() x86/cpu: Add a "tsx=" cmdline option with TSX disabled by default x86/speculation/taa: Add mitigation for TSX Async Abort x86/speculation/taa: Add sysfs reporting for TSX Async Abort kvm/x86: Export MDS_NO=0 to guests when TSX is enabled x86/tsx: Add "auto" option to the tsx= cmdline parameter x86/speculation/taa: Add documentation for TSX Async Abort x86/tsx: Add config options to set tsx=on|off|auto x86/speculation/taa: Fix printing of TAA_MSG_SMT on IBRS_ALL CPUs KVM: x86: simplify ept_misconfig KVM: x86: extend usage of RET_MMIO_PF_* constants kvm: Convert kvm_lock to a mutex kvm: x86: Do not release the page inside mmu_set_spte() KVM: x86: make FNAME(fetch) and __direct_map more similar KVM: x86: remove now unneeded hugepage gfn adjustment KVM: x86: change kvm_mmu_page_get_gfn BUG_ON to WARN_ON KVM: x86: Add is_executable_pte() KVM: x86: add tracepoints around __direct_map and FNAME(fetch) KVM: vmx, svm: always run with EFER.NXE=1 when shadow paging is active x86/bugs: Add ITLB_MULTIHIT bug infrastructure cpu/speculation: Uninline and export CPU mitigations helpers kvm: mmu: ITLB_MULTIHIT mitigation kvm: Add helper function for creating VM worker threads kvm: x86: mmu: Recovery of shattered NX large pages Documentation: Add ITLB_MULTIHIT documentation Linux 4.9.202 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
2019-11-16 11:05:12 +01:00
mutex_lock(&kvm_lock);
sca_offset += 16;
if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
sca_offset = 0;
kvm->arch.sca = (struct bsca_block *)
((char *) kvm->arch.sca + sca_offset);
Merge 4.9.202 into android-4.9-q Changes in 4.9.202 kvm: mmu: Don't read PDPTEs when paging is not enabled Bluetooth: hci_ldisc: Postpone HCI_UART_PROTO_READY bit set in hci_uart_set_proto() MIPS: BCM63XX: fix switch core reset on BCM6368 usb: gadget: core: unmap request from DMA only if previously mapped KVM: x86: use Intel speculation bugs and features as derived in generic x86 code x86/msr: Add the IA32_TSX_CTRL MSR x86/cpu: Add a helper function x86_read_arch_cap_msr() x86/cpu: Add a "tsx=" cmdline option with TSX disabled by default x86/speculation/taa: Add mitigation for TSX Async Abort x86/speculation/taa: Add sysfs reporting for TSX Async Abort kvm/x86: Export MDS_NO=0 to guests when TSX is enabled x86/tsx: Add "auto" option to the tsx= cmdline parameter x86/speculation/taa: Add documentation for TSX Async Abort x86/tsx: Add config options to set tsx=on|off|auto x86/speculation/taa: Fix printing of TAA_MSG_SMT on IBRS_ALL CPUs KVM: x86: simplify ept_misconfig KVM: x86: extend usage of RET_MMIO_PF_* constants kvm: Convert kvm_lock to a mutex kvm: x86: Do not release the page inside mmu_set_spte() KVM: x86: make FNAME(fetch) and __direct_map more similar KVM: x86: remove now unneeded hugepage gfn adjustment KVM: x86: change kvm_mmu_page_get_gfn BUG_ON to WARN_ON KVM: x86: Add is_executable_pte() KVM: x86: add tracepoints around __direct_map and FNAME(fetch) KVM: vmx, svm: always run with EFER.NXE=1 when shadow paging is active x86/bugs: Add ITLB_MULTIHIT bug infrastructure cpu/speculation: Uninline and export CPU mitigations helpers kvm: mmu: ITLB_MULTIHIT mitigation kvm: Add helper function for creating VM worker threads kvm: x86: mmu: Recovery of shattered NX large pages Documentation: Add ITLB_MULTIHIT documentation Linux 4.9.202 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
2019-11-16 11:05:12 +01:00
mutex_unlock(&kvm_lock);
sprintf(debug_name, "kvm-%u", current->pid);
kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
if (!kvm->arch.dbf)
goto out_err;
kvm->arch.sie_page2 =
(struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
if (!kvm->arch.sie_page2)
goto out_err;
/* Populate the facility mask initially. */
memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
sizeof(S390_lowcore.stfle_fac_list));
for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
if (i < kvm_s390_fac_list_mask_size())
kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
else
kvm->arch.model.fac_mask[i] = 0UL;
}
/* Populate the facility list initially. */
kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
S390_ARCH_FAC_LIST_SIZE_BYTE);
set_kvm_facility(kvm->arch.model.fac_mask, 74);
set_kvm_facility(kvm->arch.model.fac_list, 74);
kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
kvm->arch.model.ibc = sclp.ibc & 0x0fff;
kvm_s390_crypto_init(kvm);
spin_lock_init(&kvm->arch.float_int.lock);
for (i = 0; i < FIRQ_LIST_COUNT; i++)
INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
init_waitqueue_head(&kvm->arch.ipte_wq);
mutex_init(&kvm->arch.ipte_mutex);
debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
VM_EVENT(kvm, 3, "vm created with type %lu", type);
if (type & KVM_VM_S390_UCONTROL) {
kvm->arch.gmap = NULL;
kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
} else {
if (sclp.hamax == U64_MAX)
kvm->arch.mem_limit = TASK_MAX_SIZE;
else
kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
sclp.hamax + 1);
kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
if (!kvm->arch.gmap)
goto out_err;
kvm->arch.gmap->private = kvm;
kvm->arch.gmap->pfault_enabled = 0;
}
kvm->arch.css_support = 0;
kvm->arch.use_irqchip = 0;
kvm->arch.epoch = 0;
spin_lock_init(&kvm->arch.start_stop_lock);
kvm_s390_vsie_init(kvm);
KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
return 0;
out_err:
free_page((unsigned long)kvm->arch.sie_page2);
debug_unregister(kvm->arch.dbf);
sca_dispose(kvm);
KVM_EVENT(3, "creation of vm failed: %d", rc);
return rc;
}
bool kvm_arch_has_vcpu_debugfs(void)
{
return false;
}
int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
{
return 0;
}
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
VCPU_EVENT(vcpu, 3, "%s", "free cpu");
trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
kvm_s390_clear_local_irqs(vcpu);
kvm_clear_async_pf_completion_queue(vcpu);
if (!kvm_is_ucontrol(vcpu->kvm))
sca_del_vcpu(vcpu);
if (kvm_is_ucontrol(vcpu->kvm))
gmap_remove(vcpu->arch.gmap);
if (vcpu->kvm->arch.use_cmma)
kvm_s390_vcpu_unsetup_cmma(vcpu);
free_page((unsigned long)(vcpu->arch.sie_block));
kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, vcpu);
}
static void kvm_free_vcpus(struct kvm *kvm)
{
unsigned int i;
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_arch_vcpu_destroy(vcpu);
mutex_lock(&kvm->lock);
for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
kvm->vcpus[i] = NULL;
atomic_set(&kvm->online_vcpus, 0);
mutex_unlock(&kvm->lock);
}
void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvm_free_vcpus(kvm);
sca_dispose(kvm);
debug_unregister(kvm->arch.dbf);
free_page((unsigned long)kvm->arch.sie_page2);
if (!kvm_is_ucontrol(kvm))
gmap_remove(kvm->arch.gmap);
kvm_s390_destroy_adapters(kvm);
kvm_s390_clear_float_irqs(kvm);
kvm_s390_vsie_destroy(kvm);
KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
}
/* Section: vcpu related */
static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
{
vcpu->arch.gmap = gmap_create(current->mm, -1UL);
if (!vcpu->arch.gmap)
return -ENOMEM;
vcpu->arch.gmap->private = vcpu->kvm;
return 0;
}
static void sca_del_vcpu(struct kvm_vcpu *vcpu)
{
if (!kvm_s390_use_sca_entries())
return;
read_lock(&vcpu->kvm->arch.sca_lock);
if (vcpu->kvm->arch.use_esca) {
struct esca_block *sca = vcpu->kvm->arch.sca;
clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
sca->cpu[vcpu->vcpu_id].sda = 0;
} else {
struct bsca_block *sca = vcpu->kvm->arch.sca;
clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
sca->cpu[vcpu->vcpu_id].sda = 0;
}
read_unlock(&vcpu->kvm->arch.sca_lock);
}
static void sca_add_vcpu(struct kvm_vcpu *vcpu)
{
if (!kvm_s390_use_sca_entries()) {
struct bsca_block *sca = vcpu->kvm->arch.sca;
/* we still need the basic sca for the ipte control */
vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
return;
}
read_lock(&vcpu->kvm->arch.sca_lock);
if (vcpu->kvm->arch.use_esca) {
struct esca_block *sca = vcpu->kvm->arch.sca;
sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
vcpu->arch.sie_block->ecb2 |= 0x04U;
set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
} else {
struct bsca_block *sca = vcpu->kvm->arch.sca;
sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
}
read_unlock(&vcpu->kvm->arch.sca_lock);
}
/* Basic SCA to Extended SCA data copy routines */
static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
{
d->sda = s->sda;
d->sigp_ctrl.c = s->sigp_ctrl.c;
d->sigp_ctrl.scn = s->sigp_ctrl.scn;
}
static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
{
int i;
d->ipte_control = s->ipte_control;
d->mcn[0] = s->mcn;
for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
sca_copy_entry(&d->cpu[i], &s->cpu[i]);
}
static int sca_switch_to_extended(struct kvm *kvm)
{
struct bsca_block *old_sca = kvm->arch.sca;
struct esca_block *new_sca;
struct kvm_vcpu *vcpu;
unsigned int vcpu_idx;
u32 scaol, scaoh;
new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
if (!new_sca)
return -ENOMEM;
scaoh = (u32)((u64)(new_sca) >> 32);
scaol = (u32)(u64)(new_sca) & ~0x3fU;
kvm_s390_vcpu_block_all(kvm);
write_lock(&kvm->arch.sca_lock);
sca_copy_b_to_e(new_sca, old_sca);
kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
vcpu->arch.sie_block->scaoh = scaoh;
vcpu->arch.sie_block->scaol = scaol;
vcpu->arch.sie_block->ecb2 |= 0x04U;
}
kvm->arch.sca = new_sca;
kvm->arch.use_esca = 1;
write_unlock(&kvm->arch.sca_lock);
kvm_s390_vcpu_unblock_all(kvm);
free_page((unsigned long)old_sca);
VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
old_sca, kvm->arch.sca);
return 0;
}
static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
{
int rc;
if (!kvm_s390_use_sca_entries()) {
if (id < KVM_MAX_VCPUS)
return true;
return false;
}
if (id < KVM_S390_BSCA_CPU_SLOTS)
return true;
if (!sclp.has_esca || !sclp.has_64bscao)
return false;
mutex_lock(&kvm->lock);
rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
mutex_unlock(&kvm->lock);
return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
}
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
{
vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
kvm_clear_async_pf_completion_queue(vcpu);
vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
KVM_SYNC_GPRS |
KVM_SYNC_ACRS |
KVM_SYNC_CRS |
KVM_SYNC_ARCH0 |
KVM_SYNC_PFAULT;
kvm_s390_set_prefix(vcpu, 0);
if (test_kvm_facility(vcpu->kvm, 64))
vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
if (test_kvm_facility(vcpu->kvm, 82))
vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
/* fprs can be synchronized via vrs, even if the guest has no vx. With
* MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
*/
if (MACHINE_HAS_VX)
vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
else
vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
if (kvm_is_ucontrol(vcpu->kvm))
return __kvm_ucontrol_vcpu_init(vcpu);
return 0;
}
/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
{
WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
vcpu->arch.cputm_start = get_tod_clock_fast();
raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
}
/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
{
WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
vcpu->arch.cputm_start = 0;
raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
}
/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
{
WARN_ON_ONCE(vcpu->arch.cputm_enabled);
vcpu->arch.cputm_enabled = true;
__start_cpu_timer_accounting(vcpu);
}
/* needs disabled preemption to protect from TOD sync and vcpu_load/put */
static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
{
WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
__stop_cpu_timer_accounting(vcpu);
vcpu->arch.cputm_enabled = false;
}
static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
{
preempt_disable(); /* protect from TOD sync and vcpu_load/put */
__enable_cpu_timer_accounting(vcpu);
preempt_enable();
}
static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
{
preempt_disable(); /* protect from TOD sync and vcpu_load/put */
__disable_cpu_timer_accounting(vcpu);
preempt_enable();
}
/* set the cpu timer - may only be called from the VCPU thread itself */
void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
{
preempt_disable(); /* protect from TOD sync and vcpu_load/put */
raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
if (vcpu->arch.cputm_enabled)
vcpu->arch.cputm_start = get_tod_clock_fast();
vcpu->arch.sie_block->cputm = cputm;
raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
preempt_enable();
}
/* update and get the cpu timer - can also be called from other VCPU threads */
__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
{
unsigned int seq;
__u64 value;
if (unlikely(!vcpu->arch.cputm_enabled))
return vcpu->arch.sie_block->cputm;
preempt_disable(); /* protect from TOD sync and vcpu_load/put */
do {
seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
/*
* If the writer would ever execute a read in the critical
* section, e.g. in irq context, we have a deadlock.
*/
WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
value = vcpu->arch.sie_block->cputm;
/* if cputm_start is 0, accounting is being started/stopped */
if (likely(vcpu->arch.cputm_start))
value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
preempt_enable();
return value;
}
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
/* Save host register state */
save_fpu_regs();
vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
if (MACHINE_HAS_VX)
current->thread.fpu.regs = vcpu->run->s.regs.vrs;
else
current->thread.fpu.regs = vcpu->run->s.regs.fprs;
current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
if (test_fp_ctl(current->thread.fpu.fpc))
/* User space provided an invalid FPC, let's clear it */
current->thread.fpu.fpc = 0;
save_access_regs(vcpu->arch.host_acrs);
restore_access_regs(vcpu->run->s.regs.acrs);
gmap_enable(vcpu->arch.enabled_gmap);
atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
__start_cpu_timer_accounting(vcpu);
vcpu->cpu = cpu;
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
vcpu->cpu = -1;
if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
__stop_cpu_timer_accounting(vcpu);
atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
vcpu->arch.enabled_gmap = gmap_get_enabled();
gmap_disable(vcpu->arch.enabled_gmap);
/* Save guest register state */
save_fpu_regs();
vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
/* Restore host register state */
current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
save_access_regs(vcpu->run->s.regs.acrs);
restore_access_regs(vcpu->arch.host_acrs);
}
static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
{
/* this equals initial cpu reset in pop, but we don't switch to ESA */
vcpu->arch.sie_block->gpsw.mask = 0UL;
vcpu->arch.sie_block->gpsw.addr = 0UL;
kvm_s390_set_prefix(vcpu, 0);
kvm_s390_set_cpu_timer(vcpu, 0);
vcpu->arch.sie_block->ckc = 0UL;
vcpu->arch.sie_block->todpr = 0;
memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
vcpu->arch.sie_block->gcr[0] = 0xE0UL;
vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
/* make sure the new fpc will be lazily loaded */
save_fpu_regs();
current->thread.fpu.fpc = 0;
vcpu->arch.sie_block->gbea = 1;
vcpu->arch.sie_block->pp = 0;
vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
kvm_clear_async_pf_completion_queue(vcpu);
if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
kvm_s390_vcpu_stop(vcpu);
kvm_s390_clear_local_irqs(vcpu);
}
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
{
mutex_lock(&vcpu->kvm->lock);
preempt_disable();
vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
preempt_enable();
mutex_unlock(&vcpu->kvm->lock);
if (!kvm_is_ucontrol(vcpu->kvm)) {
vcpu->arch.gmap = vcpu->kvm->arch.gmap;
sca_add_vcpu(vcpu);
}
if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
/* make vcpu_load load the right gmap on the first trigger */
vcpu->arch.enabled_gmap = vcpu->arch.gmap;
}
static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
{
if (!test_kvm_facility(vcpu->kvm, 76))
return;
vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
if (vcpu->kvm->arch.crypto.aes_kw)
vcpu->arch.sie_block->ecb3 |= ECB3_AES;
if (vcpu->kvm->arch.crypto.dea_kw)
vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
}
void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
{
free_page(vcpu->arch.sie_block->cbrlo);
vcpu->arch.sie_block->cbrlo = 0;
}
int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
{
vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
if (!vcpu->arch.sie_block->cbrlo)
return -ENOMEM;
vcpu->arch.sie_block->ecb2 |= 0x80;
vcpu->arch.sie_block->ecb2 &= ~0x08;
return 0;
}
static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
{
struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
vcpu->arch.sie_block->ibc = model->ibc;
if (test_kvm_facility(vcpu->kvm, 7))
vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
}
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
int rc = 0;
atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
CPUSTAT_SM |
CPUSTAT_STOPPED);
if (test_kvm_facility(vcpu->kvm, 78))
atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
else if (test_kvm_facility(vcpu->kvm, 8))
atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
kvm_s390_vcpu_setup_model(vcpu);
/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
if (MACHINE_HAS_ESOP)
vcpu->arch.sie_block->ecb |= 0x02;
if (test_kvm_facility(vcpu->kvm, 9))
vcpu->arch.sie_block->ecb |= 0x04;
if (test_kvm_facility(vcpu->kvm, 73))
vcpu->arch.sie_block->ecb |= 0x10;
if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
vcpu->arch.sie_block->ecb2 |= 0x08;
vcpu->arch.sie_block->eca = 0x1002000U;
if (sclp.has_cei)
vcpu->arch.sie_block->eca |= 0x80000000U;
if (sclp.has_ib)
vcpu->arch.sie_block->eca |= 0x40000000U;
if (sclp.has_siif)
vcpu->arch.sie_block->eca |= 1;
if (sclp.has_sigpif)
vcpu->arch.sie_block->eca |= 0x10000000U;
if (test_kvm_facility(vcpu->kvm, 129)) {
vcpu->arch.sie_block->eca |= 0x00020000;
vcpu->arch.sie_block->ecd |= 0x20000000;
}
vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
if (vcpu->kvm->arch.use_cmma) {
rc = kvm_s390_vcpu_setup_cmma(vcpu);
if (rc)
return rc;
}
hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
kvm_s390_vcpu_crypto_setup(vcpu);
return rc;
}
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
unsigned int id)
{
struct kvm_vcpu *vcpu;
struct sie_page *sie_page;
int rc = -EINVAL;
if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
goto out;
rc = -ENOMEM;
vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
if (!vcpu)
goto out;
sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
if (!sie_page)
goto out_free_cpu;
vcpu->arch.sie_block = &sie_page->sie_block;
vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
/* the real guest size will always be smaller than msl */
vcpu->arch.sie_block->mso = 0;
vcpu->arch.sie_block->msl = sclp.hamax;
vcpu->arch.sie_block->icpua = id;
spin_lock_init(&vcpu->arch.local_int.lock);
vcpu->arch.local_int.float_int = &kvm->arch.float_int;
vcpu->arch.local_int.wq = &vcpu->wq;
vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
seqcount_init(&vcpu->arch.cputm_seqcount);
rc = kvm_vcpu_init(vcpu, kvm, id);
if (rc)
goto out_free_sie_block;
VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
vcpu->arch.sie_block);
trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
return vcpu;
out_free_sie_block:
free_page((unsigned long)(vcpu->arch.sie_block));
out_free_cpu:
kmem_cache_free(kvm_vcpu_cache, vcpu);
out:
return ERR_PTR(rc);
}
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
return kvm_s390_vcpu_has_irq(vcpu, 0);
}
void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
{
atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
exit_sie(vcpu);
}
void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
{
atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
}
static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
{
atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
exit_sie(vcpu);
}
static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
{
atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
}
/*
* Kick a guest cpu out of SIE and wait until SIE is not running.
* If the CPU is not running (e.g. waiting as idle) the function will
* return immediately. */
void exit_sie(struct kvm_vcpu *vcpu)
{
atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
cpu_relax();
}
/* Kick a guest cpu out of SIE to process a request synchronously */
void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
{
kvm_make_request(req, vcpu);
kvm_s390_vcpu_request(vcpu);
}
static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
unsigned long end)
{
struct kvm *kvm = gmap->private;
struct kvm_vcpu *vcpu;
unsigned long prefix;
int i;
if (gmap_is_shadow(gmap))
return;
if (start >= 1UL << 31)
/* We are only interested in prefix pages */
return;
kvm_for_each_vcpu(i, vcpu, kvm) {
/* match against both prefix pages */
prefix = kvm_s390_get_prefix(vcpu);
if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
start, end);
kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
}
}
}
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
{
/* kvm common code refers to this, but never calls it */
BUG();
return 0;
}
static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
struct kvm_one_reg *reg)
{
int r = -EINVAL;
switch (reg->id) {
case KVM_REG_S390_TODPR:
r = put_user(vcpu->arch.sie_block->todpr,
(u32 __user *)reg->addr);
break;
case KVM_REG_S390_EPOCHDIFF:
r = put_user(vcpu->arch.sie_block->epoch,
(u64 __user *)reg->addr);
break;
case KVM_REG_S390_CPU_TIMER:
r = put_user(kvm_s390_get_cpu_timer(vcpu),
(u64 __user *)reg->addr);
break;
case KVM_REG_S390_CLOCK_COMP:
r = put_user(vcpu->arch.sie_block->ckc,
(u64 __user *)reg->addr);
break;
case KVM_REG_S390_PFTOKEN:
r = put_user(vcpu->arch.pfault_token,
(u64 __user *)reg->addr);
break;
case KVM_REG_S390_PFCOMPARE:
r = put_user(vcpu->arch.pfault_compare,
(u64 __user *)reg->addr);
break;
case KVM_REG_S390_PFSELECT:
r = put_user(vcpu->arch.pfault_select,
(u64 __user *)reg->addr);
break;
case KVM_REG_S390_PP:
r = put_user(vcpu->arch.sie_block->pp,
(u64 __user *)reg->addr);
break;
case KVM_REG_S390_GBEA:
r = put_user(vcpu->arch.sie_block->gbea,
(u64 __user *)reg->addr);
break;
default:
break;
}
return r;
}
static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
struct kvm_one_reg *reg)
{
int r = -EINVAL;
__u64 val;
switch (reg->id) {
case KVM_REG_S390_TODPR:
r = get_user(vcpu->arch.sie_block->todpr,
(u32 __user *)reg->addr);
break;
case KVM_REG_S390_EPOCHDIFF:
r = get_user(vcpu->arch.sie_block->epoch,
(u64 __user *)reg->addr);
break;
case KVM_REG_S390_CPU_TIMER:
r = get_user(val, (u64 __user *)reg->addr);
if (!r)
kvm_s390_set_cpu_timer(vcpu, val);
break;
case KVM_REG_S390_CLOCK_COMP:
r = get_user(vcpu->arch.sie_block->ckc,
(u64 __user *)reg->addr);
break;
case KVM_REG_S390_PFTOKEN:
r = get_user(vcpu->arch.pfault_token,
(u64 __user *)reg->addr);
if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
kvm_clear_async_pf_completion_queue(vcpu);
break;
case KVM_REG_S390_PFCOMPARE:
r = get_user(vcpu->arch.pfault_compare,
(u64 __user *)reg->addr);
break;
case KVM_REG_S390_PFSELECT:
r = get_user(vcpu->arch.pfault_select,
(u64 __user *)reg->addr);
break;
case KVM_REG_S390_PP:
r = get_user(vcpu->arch.sie_block->pp,
(u64 __user *)reg->addr);
break;
case KVM_REG_S390_GBEA:
r = get_user(vcpu->arch.sie_block->gbea,
(u64 __user *)reg->addr);
break;
default:
break;
}
return r;
}
static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
{
kvm_s390_vcpu_initial_reset(vcpu);
return 0;
}
int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
return 0;
}
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
return 0;
}
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
restore_access_regs(vcpu->run->s.regs.acrs);
return 0;
}
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
return 0;
}
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
/* make sure the new values will be lazily loaded */
save_fpu_regs();
if (test_fp_ctl(fpu->fpc))
return -EINVAL;
current->thread.fpu.fpc = fpu->fpc;
if (MACHINE_HAS_VX)
convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
(freg_t *) fpu->fprs);
else
memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
return 0;
}
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
/* make sure we have the latest values */
save_fpu_regs();
if (MACHINE_HAS_VX)
convert_vx_to_fp((freg_t *) fpu->fprs,
(__vector128 *) vcpu->run->s.regs.vrs);
else
memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
fpu->fpc = current->thread.fpu.fpc;
return 0;
}
static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
{
int rc = 0;
if (!is_vcpu_stopped(vcpu))
rc = -EBUSY;
else {
vcpu->run->psw_mask = psw.mask;
vcpu->run->psw_addr = psw.addr;
}
return rc;
}
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
struct kvm_translation *tr)
{
return -EINVAL; /* not implemented yet */
}
#define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
KVM_GUESTDBG_USE_HW_BP | \
KVM_GUESTDBG_ENABLE)
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
int rc = 0;
vcpu->guest_debug = 0;
kvm_s390_clear_bp_data(vcpu);
if (dbg->control & ~VALID_GUESTDBG_FLAGS)
return -EINVAL;
if (!sclp.has_gpere)
return -EINVAL;
if (dbg->control & KVM_GUESTDBG_ENABLE) {
vcpu->guest_debug = dbg->control;
/* enforce guest PER */
atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
rc = kvm_s390_import_bp_data(vcpu, dbg);
} else {
atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
vcpu->arch.guestdbg.last_bp = 0;
}
if (rc) {
vcpu->guest_debug = 0;
kvm_s390_clear_bp_data(vcpu);
atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
}
return rc;
}
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
/* CHECK_STOP and LOAD are not supported yet */
return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
KVM_MP_STATE_OPERATING;
}
int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
int rc = 0;
/* user space knows about this interface - let it control the state */
vcpu->kvm->arch.user_cpu_state_ctrl = 1;
switch (mp_state->mp_state) {
case KVM_MP_STATE_STOPPED:
kvm_s390_vcpu_stop(vcpu);
break;
case KVM_MP_STATE_OPERATING:
kvm_s390_vcpu_start(vcpu);
break;
case KVM_MP_STATE_LOAD:
case KVM_MP_STATE_CHECK_STOP:
/* fall through - CHECK_STOP and LOAD are not supported yet */
default:
rc = -ENXIO;
}
return rc;
}
static bool ibs_enabled(struct kvm_vcpu *vcpu)
{
return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
}
static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
{
retry:
kvm_s390_vcpu_request_handled(vcpu);
if (!vcpu->requests)
return 0;
/*
* We use MMU_RELOAD just to re-arm the ipte notifier for the
* guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
* This ensures that the ipte instruction for this request has
* already finished. We might race against a second unmapper that
* wants to set the blocking bit. Lets just retry the request loop.
*/
if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
int rc;
rc = gmap_mprotect_notify(vcpu->arch.gmap,
kvm_s390_get_prefix(vcpu),
PAGE_SIZE * 2, PROT_WRITE);
if (rc) {
kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
return rc;
}
goto retry;
}
if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
vcpu->arch.sie_block->ihcpu = 0xffff;
goto retry;
}
if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
if (!ibs_enabled(vcpu)) {
trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
atomic_or(CPUSTAT_IBS,
&vcpu->arch.sie_block->cpuflags);
}
goto retry;
}
if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
if (ibs_enabled(vcpu)) {
trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
atomic_andnot(CPUSTAT_IBS,
&vcpu->arch.sie_block->cpuflags);
}
goto retry;
}
if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
goto retry;
}
/* nothing to do, just clear the request */
clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
return 0;
}
void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
{
struct kvm_vcpu *vcpu;
int i;
mutex_lock(&kvm->lock);
preempt_disable();
kvm->arch.epoch = tod - get_tod_clock();
kvm_s390_vcpu_block_all(kvm);
kvm_for_each_vcpu(i, vcpu, kvm)
vcpu->arch.sie_block->epoch = kvm->arch.epoch;
kvm_s390_vcpu_unblock_all(kvm);
preempt_enable();
mutex_unlock(&kvm->lock);
}
/**
* kvm_arch_fault_in_page - fault-in guest page if necessary
* @vcpu: The corresponding virtual cpu
* @gpa: Guest physical address
* @writable: Whether the page should be writable or not
*
* Make sure that a guest page has been faulted-in on the host.
*
* Return: Zero on success, negative error code otherwise.
*/
long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
{
return gmap_fault(vcpu->arch.gmap, gpa,
writable ? FAULT_FLAG_WRITE : 0);
}
static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
unsigned long token)
{
struct kvm_s390_interrupt inti;
struct kvm_s390_irq irq;
if (start_token) {
irq.u.ext.ext_params2 = token;
irq.type = KVM_S390_INT_PFAULT_INIT;
WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
} else {
inti.type = KVM_S390_INT_PFAULT_DONE;
inti.parm64 = token;
WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
}
}
void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work)
{
trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
}
void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work)
{
trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
}
void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work)
{
/* s390 will always inject the page directly */
}
bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
{
/*
* s390 will always inject the page directly,
* but we still want check_async_completion to cleanup
*/
return true;
}
static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
{
hva_t hva;
struct kvm_arch_async_pf arch;
int rc;
if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
return 0;
if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
vcpu->arch.pfault_compare)
return 0;
if (psw_extint_disabled(vcpu))
return 0;
if (kvm_s390_vcpu_has_irq(vcpu, 0))
return 0;
if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
return 0;
if (!vcpu->arch.gmap->pfault_enabled)
return 0;
hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
hva += current->thread.gmap_addr & ~PAGE_MASK;
if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
return 0;
rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
return rc;
}
static int vcpu_pre_run(struct kvm_vcpu *vcpu)
{
int rc, cpuflags;
/*
* On s390 notifications for arriving pages will be delivered directly
* to the guest but the house keeping for completed pfaults is
* handled outside the worker.
*/
kvm_check_async_pf_completion(vcpu);
vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
if (need_resched())
schedule();
if (test_cpu_flag(CIF_MCCK_PENDING))
s390_handle_mcck();
if (!kvm_is_ucontrol(vcpu->kvm)) {
rc = kvm_s390_deliver_pending_interrupts(vcpu);
if (rc)
return rc;
}
rc = kvm_s390_handle_requests(vcpu);
if (rc)
return rc;
if (guestdbg_enabled(vcpu)) {
kvm_s390_backup_guest_per_regs(vcpu);
kvm_s390_patch_guest_per_regs(vcpu);
}
vcpu->arch.sie_block->icptcode = 0;
cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
trace_kvm_s390_sie_enter(vcpu, cpuflags);
return 0;
}
static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
{
struct kvm_s390_pgm_info pgm_info = {
.code = PGM_ADDRESSING,
};
u8 opcode, ilen;
int rc;
VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
trace_kvm_s390_sie_fault(vcpu);
/*
* We want to inject an addressing exception, which is defined as a
* suppressing or terminating exception. However, since we came here
* by a DAT access exception, the PSW still points to the faulting
* instruction since DAT exceptions are nullifying. So we've got
* to look up the current opcode to get the length of the instruction
* to be able to forward the PSW.
*/
rc = read_guest_instr(vcpu, &opcode, 1);
ilen = insn_length(opcode);
if (rc < 0) {
return rc;
} else if (rc) {
/* Instruction-Fetching Exceptions - we can't detect the ilen.
* Forward by arbitrary ilc, injection will take care of
* nullification if necessary.
*/
pgm_info = vcpu->arch.pgm;
ilen = 4;
}
pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
kvm_s390_forward_psw(vcpu, ilen);
return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
}
static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
{
VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
vcpu->arch.sie_block->icptcode);
trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
if (guestdbg_enabled(vcpu))
kvm_s390_restore_guest_per_regs(vcpu);
vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
if (vcpu->arch.sie_block->icptcode > 0) {
int rc = kvm_handle_sie_intercept(vcpu);
if (rc != -EOPNOTSUPP)
return rc;
vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
return -EREMOTE;
} else if (exit_reason != -EFAULT) {
vcpu->stat.exit_null++;
return 0;
} else if (kvm_is_ucontrol(vcpu->kvm)) {
vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
vcpu->run->s390_ucontrol.trans_exc_code =
current->thread.gmap_addr;
vcpu->run->s390_ucontrol.pgm_code = 0x10;
return -EREMOTE;
} else if (current->thread.gmap_pfault) {
trace_kvm_s390_major_guest_pfault(vcpu);
current->thread.gmap_pfault = 0;
if (kvm_arch_setup_async_pf(vcpu))
return 0;
return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
}
return vcpu_post_run_fault_in_sie(vcpu);
}
static int __vcpu_run(struct kvm_vcpu *vcpu)
{
int rc, exit_reason;
/*
* We try to hold kvm->srcu during most of vcpu_run (except when run-
* ning the guest), so that memslots (and other stuff) are protected
*/
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
do {
rc = vcpu_pre_run(vcpu);
if (rc)
break;
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
/*
* As PF_VCPU will be used in fault handler, between
* guest_enter and guest_exit should be no uaccess.
*/
local_irq_disable();
guest_enter_irqoff();
__disable_cpu_timer_accounting(vcpu);
local_irq_enable();
exit_reason = sie64a(vcpu->arch.sie_block,
vcpu->run->s.regs.gprs);
local_irq_disable();
__enable_cpu_timer_accounting(vcpu);
guest_exit_irqoff();
local_irq_enable();
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
rc = vcpu_post_run(vcpu, exit_reason);
} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
return rc;
}
static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
/* some control register changes require a tlb flush */
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
}
if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
}
if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
vcpu->arch.pfault_token = kvm_run->s.regs.pft;
vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
kvm_clear_async_pf_completion_queue(vcpu);
}
/*
* If userspace sets the riccb (e.g. after migration) to a valid state,
* we should enable RI here instead of doing the lazy enablement.
*/
if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
test_kvm_facility(vcpu->kvm, 64)) {
struct runtime_instr_cb *riccb =
(struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
if (riccb->valid)
vcpu->arch.sie_block->ecb3 |= 0x01;
}
if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
test_kvm_facility(vcpu->kvm, 82)) {
vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
}
kvm_run->kvm_dirty_regs = 0;
}
static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
kvm_run->s.regs.pft = vcpu->arch.pfault_token;
kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
}
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
int rc;
sigset_t sigsaved;
if (guestdbg_exit_pending(vcpu)) {
kvm_s390_prepare_debug_exit(vcpu);
return 0;
}
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
kvm_s390_vcpu_start(vcpu);
} else if (is_vcpu_stopped(vcpu)) {
pr_err_ratelimited("can't run stopped vcpu %d\n",
vcpu->vcpu_id);
return -EINVAL;
}
sync_regs(vcpu, kvm_run);
enable_cpu_timer_accounting(vcpu);
might_fault();
rc = __vcpu_run(vcpu);
if (signal_pending(current) && !rc) {
kvm_run->exit_reason = KVM_EXIT_INTR;
rc = -EINTR;
}
if (guestdbg_exit_pending(vcpu) && !rc) {
kvm_s390_prepare_debug_exit(vcpu);
rc = 0;
}
if (rc == -EREMOTE) {
/* userspace support is needed, kvm_run has been prepared */
rc = 0;
}
disable_cpu_timer_accounting(vcpu);
store_regs(vcpu, kvm_run);
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &sigsaved, NULL);
vcpu->stat.exit_userspace++;
return rc;
}
/*
* store status at address
* we use have two special cases:
* KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
* KVM_S390_STORE_STATUS_PREFIXED: -> prefix
*/
int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
{
unsigned char archmode = 1;
freg_t fprs[NUM_FPRS];
unsigned int px;
u64 clkcomp, cputm;
int rc;
px = kvm_s390_get_prefix(vcpu);
if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
if (write_guest_abs(vcpu, 163, &archmode, 1))
return -EFAULT;
gpa = 0;
} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
if (write_guest_real(vcpu, 163, &archmode, 1))
return -EFAULT;
gpa = px;
} else
gpa -= __LC_FPREGS_SAVE_AREA;
/* manually convert vector registers if necessary */
if (MACHINE_HAS_VX) {
convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
fprs, 128);
} else {
rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
vcpu->run->s.regs.fprs, 128);
}
rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
vcpu->run->s.regs.gprs, 128);
rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
&vcpu->arch.sie_block->gpsw, 16);
rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
&px, 4);
rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
&vcpu->run->s.regs.fpc, 4);
rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
&vcpu->arch.sie_block->todpr, 4);
cputm = kvm_s390_get_cpu_timer(vcpu);
rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
&cputm, 8);
clkcomp = vcpu->arch.sie_block->ckc >> 8;
rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
&clkcomp, 8);
rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
&vcpu->run->s.regs.acrs, 64);
rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
&vcpu->arch.sie_block->gcr, 128);
return rc ? -EFAULT : 0;
}
int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
{
/*
* The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
* copying in vcpu load/put. Lets update our copies before we save
* it into the save area
*/
save_fpu_regs();
vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
save_access_regs(vcpu->run->s.regs.acrs);
return kvm_s390_store_status_unloaded(vcpu, addr);
}
static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
{
kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
}
static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
{
unsigned int i;
struct kvm_vcpu *vcpu;
kvm_for_each_vcpu(i, vcpu, kvm) {
__disable_ibs_on_vcpu(vcpu);
}
}
static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
{
if (!sclp.has_ibs)
return;
kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
}
void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
{
int i, online_vcpus, started_vcpus = 0;
if (!is_vcpu_stopped(vcpu))
return;
trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
/* Only one cpu at a time may enter/leave the STOPPED state. */
spin_lock(&vcpu->kvm->arch.start_stop_lock);
online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
for (i = 0; i < online_vcpus; i++) {
if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
started_vcpus++;
}
if (started_vcpus == 0) {
/* we're the only active VCPU -> speed it up */
__enable_ibs_on_vcpu(vcpu);
} else if (started_vcpus == 1) {
/*
* As we are starting a second VCPU, we have to disable
* the IBS facility on all VCPUs to remove potentially
* oustanding ENABLE requests.
*/
__disable_ibs_on_all_vcpus(vcpu->kvm);
}
atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
/*
* Another VCPU might have used IBS while we were offline.
* Let's play safe and flush the VCPU at startup.
*/
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
spin_unlock(&vcpu->kvm->arch.start_stop_lock);
return;
}
void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
{
int i, online_vcpus, started_vcpus = 0;
struct kvm_vcpu *started_vcpu = NULL;
if (is_vcpu_stopped(vcpu))
return;
trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
/* Only one cpu at a time may enter/leave the STOPPED state. */
spin_lock(&vcpu->kvm->arch.start_stop_lock);
online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
kvm_s390_clear_stop_irq(vcpu);
atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
__disable_ibs_on_vcpu(vcpu);
for (i = 0; i < online_vcpus; i++) {
if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
started_vcpus++;
started_vcpu = vcpu->kvm->vcpus[i];
}
}
if (started_vcpus == 1) {
/*
* As we only have one VCPU left, we want to enable the
* IBS facility for that VCPU to speed it up.
*/
__enable_ibs_on_vcpu(started_vcpu);
}
spin_unlock(&vcpu->kvm->arch.start_stop_lock);
return;
}
static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
struct kvm_enable_cap *cap)
{
int r;
if (cap->flags)
return -EINVAL;
switch (cap->cap) {
case KVM_CAP_S390_CSS_SUPPORT:
if (!vcpu->kvm->arch.css_support) {
vcpu->kvm->arch.css_support = 1;
VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
trace_kvm_s390_enable_css(vcpu->kvm);
}
r = 0;
break;
default:
r = -EINVAL;
break;
}
return r;
}
static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
struct kvm_s390_mem_op *mop)
{
void __user *uaddr = (void __user *)mop->buf;
void *tmpbuf = NULL;
int r, srcu_idx;
const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
| KVM_S390_MEMOP_F_CHECK_ONLY;
Merge 4.9.197 into android-4.9-q Changes in 4.9.197 KVM: s390: Test for bad access register and size at the start of S390_MEM_OP s390/topology: avoid firing events before kobjs are created s390/cio: avoid calling strlen on null pointer s390/cio: exclude subchannels with no parent from pseudo check KVM: nVMX: handle page fault in vmread fix ASoC: Define a set of DAPM pre/post-up events powerpc/powernv: Restrict OPAL symbol map to only be readable by root can: mcp251x: mcp251x_hw_reset(): allow more time after a reset crypto: qat - Silence smp_processor_id() warning usercopy: Avoid HIGHMEM pfn warning timer: Read jiffies once when forwarding base clk watchdog: imx2_wdt: fix min() calculation in imx2_wdt_set_timeout ieee802154: atusb: fix use-after-free at disconnect cfg80211: initialize on-stack chandefs ima: always return negative code for error fs: nfs: Fix possible null-pointer dereferences in encode_attrs() 9p: avoid attaching writeback_fid on mmap with type PRIVATE xen/pci: reserve MCFG areas earlier ceph: fix directories inode i_blkbits initialization ceph: reconnect connection if session hang in opening state drm/amdgpu: Check for valid number of registers to read thermal: Fix use-after-free when unregistering thermal zone device fuse: fix memleak in cuse_channel_open sched/core: Fix migration to invalid CPU in __set_cpus_allowed_ptr() kernel/elfcore.c: include proper prototypes tools lib traceevent: Do not free tep->cmdlines in add_new_comm() on failure perf tools: Fix segfault in cpu_cache_level__read() perf stat: Fix a segmentation fault when using repeat forever perf stat: Reset previous counts on repeat with interval crypto: caam - fix concurrency issue in givencrypt descriptor coresight: etm4x: Use explicit barriers on enable/disable cfg80211: add and use strongly typed element iteration macros cfg80211: Use const more consistently in for_each_element macros nl80211: validate beacon head ASoC: sgtl5000: Improve VAG power and mute control panic: ensure preemption is disabled during panic() USB: rio500: Remove Rio 500 kernel driver USB: yurex: Don't retry on unexpected errors USB: yurex: fix NULL-derefs on disconnect USB: usb-skeleton: fix runtime PM after driver unbind USB: usb-skeleton: fix NULL-deref on disconnect xhci: Fix false warning message about wrong bounce buffer write length xhci: Prevent device initiated U1/U2 link pm if exit latency is too long xhci: Check all endpoints for LPM timeout usb: xhci: wait for CNR controller not ready bit in xhci resume xhci: Increase STS_SAVE timeout in xhci_suspend() USB: adutux: remove redundant variable minor USB: adutux: fix use-after-free on disconnect USB: adutux: fix NULL-derefs on disconnect USB: adutux: fix use-after-free on release USB: iowarrior: fix use-after-free on disconnect USB: iowarrior: fix use-after-free on release USB: iowarrior: fix use-after-free after driver unbind USB: usblp: fix runtime PM after driver unbind USB: chaoskey: fix use-after-free on release USB: ldusb: fix NULL-derefs on driver unbind serial: uartlite: fix exit path null pointer USB: serial: keyspan: fix NULL-derefs on open() and write() USB: serial: ftdi_sio: add device IDs for Sienna and Echelon PL-20 USB: serial: option: add Telit FN980 compositions USB: serial: option: add support for Cinterion CLS8 devices USB: serial: fix runtime PM after driver unbind USB: usblcd: fix I/O after disconnect USB: microtek: fix info-leak at probe USB: dummy-hcd: fix power budget for SuperSpeed mode usb: renesas_usbhs: gadget: Do not discard queues in usb_ep_set_{halt,wedge}() usb: renesas_usbhs: gadget: Fix usb_ep_set_{halt,wedge}() behavior USB: legousbtower: fix slab info leak at probe USB: legousbtower: fix deadlock on disconnect USB: legousbtower: fix potential NULL-deref on disconnect USB: legousbtower: fix open after failed reset request USB: legousbtower: fix use-after-free on release staging: vt6655: Fix memory leak in vt6655_probe iio: adc: ad799x: fix probe error handling iio: light: opt3001: fix mutex unlock race efivar/ssdt: Don't iterate over EFI vars if no SSDT override was specified perf llvm: Don't access out-of-scope array perf inject jit: Fix JIT_CODE_MOVE filename CIFS: Gracefully handle QueryInfo errors during open CIFS: Force revalidate inode when dentry is stale CIFS: Force reval dentry if LOOKUP_REVAL flag is set kernel/sysctl.c: do not override max_threads provided by userspace staging: fbtft: Stop using BL_CORE_DRIVER1 Staging: fbtft: fix memory leak in fbtft_framebuffer_alloc MIPS: Disable Loongson MMI instructions for kernel build Fix the locking in dcache_readdir() and friends media: stkwebcam: fix runtime PM after driver unbind tracing/hwlat: Report total time spent in all NMIs during the sample tracing/hwlat: Don't ignore outer-loop duration when calculating max_latency tracing: Get trace_array reference for available_tracers files x86/asm: Fix MWAITX C-state hint value xfs: clear sb->s_fs_info on mount failure Linux 4.9.197 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
2019-10-17 13:54:42 -07:00
if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
return -EINVAL;
if (mop->size > MEM_OP_MAX_SIZE)
return -E2BIG;
if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
tmpbuf = vmalloc(mop->size);
if (!tmpbuf)
return -ENOMEM;
}
srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
switch (mop->op) {
case KVM_S390_MEMOP_LOGICAL_READ:
if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
r = check_gva_range(vcpu, mop->gaddr, mop->ar,
mop->size, GACC_FETCH);
break;
}
r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
if (r == 0) {
if (copy_to_user(uaddr, tmpbuf, mop->size))
r = -EFAULT;
}
break;
case KVM_S390_MEMOP_LOGICAL_WRITE:
if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
r = check_gva_range(vcpu, mop->gaddr, mop->ar,
mop->size, GACC_STORE);
break;
}
if (copy_from_user(tmpbuf, uaddr, mop->size)) {
r = -EFAULT;
break;
}
r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
break;
default:
r = -EINVAL;
}
srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
vfree(tmpbuf);
return r;
}
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
struct kvm_vcpu *vcpu = filp->private_data;
void __user *argp = (void __user *)arg;
int idx;
long r;
switch (ioctl) {
case KVM_S390_IRQ: {
struct kvm_s390_irq s390irq;
r = -EFAULT;
if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
break;
r = kvm_s390_inject_vcpu(vcpu, &s390irq);
break;
}
case KVM_S390_INTERRUPT: {
struct kvm_s390_interrupt s390int;
Merge 4.9.194 into android-4.9 Changes in 4.9.194 bridge/mdb: remove wrong use of NLM_F_MULTI cdc_ether: fix rndis support for Mediatek based smartphones ipv6: Fix the link time qualifier of 'ping_v6_proc_exit_net()' isdn/capi: check message length in capi_write() net: Fix null de-reference of device refcount net: gso: Fix skb_segment splat when splitting gso_size mangled skb having linear-headed frag_list sch_hhf: ensure quantum and hhf_non_hh_weight are non-zero sctp: Fix the link time qualifier of 'sctp_ctrlsock_exit()' sctp: use transport pf_retrans in sctp_do_8_2_transport_strike tcp: fix tcp_ecn_withdraw_cwr() to clear TCP_ECN_QUEUE_CWR tipc: add NULL pointer check before calling kfree_rcu tun: fix use-after-free when register netdev failed Revert "MIPS: SiByte: Enable swiotlb for SWARM, LittleSur and BigSur" gpio: fix line flag validation in linehandle_create gpio: fix line flag validation in lineevent_create Btrfs: fix assertion failure during fsync and use of stale transaction genirq: Prevent NULL pointer dereference in resend_irqs() KVM: s390: Do not leak kernel stack data in the KVM_S390_INTERRUPT ioctl KVM: x86: work around leak of uninitialized stack contents KVM: nVMX: handle page fault in vmread MIPS: VDSO: Prevent use of smp_processor_id() MIPS: VDSO: Use same -m%-float cflag as the kernel proper clk: rockchip: Don't yell about bad mmc phases when getting mtd: rawnand: mtk: Fix wrongly assigned OOB buffer pointer issue driver core: Fix use-after-free and double free on glue directory crypto: talitos - check AES key size crypto: talitos - fix CTR alg blocksize crypto: talitos - check data blocksize in ablkcipher. crypto: talitos - fix ECB algs ivsize crypto: talitos - Do not modify req->cryptlen on decryption. crypto: talitos - HMAC SNOOP NO AFEU mode requires SW icv checking. drm/mediatek: mtk_drm_drv.c: Add of_node_put() before goto nvmem: Use the same permissions for eeprom as for nvmem x86/build: Add -Wnoaddress-of-packed-member to REALMODE_CFLAGS, to silence GCC9 build warning USB: usbcore: Fix slab-out-of-bounds bug during device reset media: tm6000: double free if usb disconnect while streaming powerpc/mm/radix: Use the right page size for vmemmap mapping x86/boot: Add missing bootparam that breaks boot on some platforms xen-netfront: do not assume sk_buff_head list is empty in error handling KVM: coalesced_mmio: add bounds checking serial: sprd: correct the wrong sequence of arguments tty/serial: atmel: reschedule TX after RX was started mwifiex: Fix three heap overflow at parsing element in cfg80211_ap_settings ARM: OMAP2+: Fix missing SYSC_HAS_RESET_STATUS for dra7 epwmss s390/bpf: fix lcgr instruction encoding ARM: OMAP2+: Fix omap4 errata warning on other SoCs s390/bpf: use 32-bit index for tail calls NFSv4: Fix return values for nfs4_file_open() NFS: Fix initialisation of I/O result struct in nfs_pgio_rpcsetup Kconfig: Fix the reference to the IDT77105 Phy driver in the description of ATM_NICSTAR_USE_IDT77105 qed: Add cleanup in qed_slowpath_start() ARM: 8874/1: mm: only adjust sections of valid mm structures batman-adv: Only read OGM2 tvlv_len after buffer len check r8152: Set memory to all 0xFFs on failed reg reads x86/apic: Fix arch_dynirq_lower_bound() bug for DT enabled machines netfilter: nf_conntrack_ftp: Fix debug output NFSv2: Fix eof handling NFSv2: Fix write regression cifs: set domainName when a domain-key is used in multiuser cifs: Use kzfree() to zero out the password ARM: 8901/1: add a criteria for pfn_valid of arm sky2: Disable MSI on yet another ASUS boards (P6Xxxx) perf/x86/intel: Restrict period on Nehalem perf/x86/amd/ibs: Fix sample bias for dispatched micro-ops tools/power turbostat: fix buffer overrun net: seeq: Fix the function used to release some memory in an error handling path dmaengine: ti: dma-crossbar: Fix a memory leak bug dmaengine: ti: omap-dma: Add cleanup in omap_dma_probe() x86/uaccess: Don't leak the AC flags into __get_user() argument evaluation keys: Fix missing null pointer check in request_key_auth_describe() iommu/amd: Fix race in increase_address_space() floppy: fix usercopy direction media: technisat-usb2: break out of loop at end of buffer ARC: export "abort" for modules net_sched: let qdisc_put() accept NULL pointer Linux 4.9.194 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
2019-09-21 08:01:31 +02:00
struct kvm_s390_irq s390irq = {};
r = -EFAULT;
if (copy_from_user(&s390int, argp, sizeof(s390int)))
break;
if (s390int_to_s390irq(&s390int, &s390irq))
return -EINVAL;
r = kvm_s390_inject_vcpu(vcpu, &s390irq);
break;
}
case KVM_S390_STORE_STATUS:
idx = srcu_read_lock(&vcpu->kvm->srcu);
r = kvm_s390_vcpu_store_status(vcpu, arg);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
break;
case KVM_S390_SET_INITIAL_PSW: {
psw_t psw;
r = -EFAULT;
if (copy_from_user(&psw, argp, sizeof(psw)))
break;
r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
break;
}
case KVM_S390_INITIAL_RESET:
r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
break;
case KVM_SET_ONE_REG:
case KVM_GET_ONE_REG: {
struct kvm_one_reg reg;
r = -EFAULT;
if (copy_from_user(&reg, argp, sizeof(reg)))
break;
if (ioctl == KVM_SET_ONE_REG)
r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
else
r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
break;
}
#ifdef CONFIG_KVM_S390_UCONTROL
case KVM_S390_UCAS_MAP: {
struct kvm_s390_ucas_mapping ucasmap;
if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
r = -EFAULT;
break;
}
if (!kvm_is_ucontrol(vcpu->kvm)) {
r = -EINVAL;
break;
}
r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
ucasmap.vcpu_addr, ucasmap.length);
break;
}
case KVM_S390_UCAS_UNMAP: {
struct kvm_s390_ucas_mapping ucasmap;
if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
r = -EFAULT;
break;
}
if (!kvm_is_ucontrol(vcpu->kvm)) {
r = -EINVAL;
break;
}
r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
ucasmap.length);
break;
}
#endif
case KVM_S390_VCPU_FAULT: {
r = gmap_fault(vcpu->arch.gmap, arg, 0);
break;
}
case KVM_ENABLE_CAP:
{
struct kvm_enable_cap cap;
r = -EFAULT;
if (copy_from_user(&cap, argp, sizeof(cap)))
break;
r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
break;
}
case KVM_S390_MEM_OP: {
struct kvm_s390_mem_op mem_op;
if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
r = kvm_s390_guest_mem_op(vcpu, &mem_op);
else
r = -EFAULT;
break;
}
case KVM_S390_SET_IRQ_STATE: {
struct kvm_s390_irq_state irq_state;
r = -EFAULT;
if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
break;
if (irq_state.len > VCPU_IRQS_MAX_BUF ||
irq_state.len == 0 ||
irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
r = -EINVAL;
break;
}
r = kvm_s390_set_irq_state(vcpu,
(void __user *) irq_state.buf,
irq_state.len);
break;
}
case KVM_S390_GET_IRQ_STATE: {
struct kvm_s390_irq_state irq_state;
r = -EFAULT;
if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
break;
if (irq_state.len == 0) {
r = -EINVAL;
break;
}
r = kvm_s390_get_irq_state(vcpu,
(__u8 __user *) irq_state.buf,
irq_state.len);
break;
}
default:
r = -ENOTTY;
}
return r;
}
int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
{
#ifdef CONFIG_KVM_S390_UCONTROL
if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
&& (kvm_is_ucontrol(vcpu->kvm))) {
vmf->page = virt_to_page(vcpu->arch.sie_block);
get_page(vmf->page);
return 0;
}
#endif
return VM_FAULT_SIGBUS;
}
int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
unsigned long npages)
{
return 0;
}
/* Section: memory related */
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
const struct kvm_userspace_memory_region *mem,
enum kvm_mr_change change)
{
/* A few sanity checks. We can have memory slots which have to be
located/ended at a segment boundary (1MB). The memory in userland is
ok to be fragmented into various different vmas. It is okay to mmap()
and munmap() stuff in this slot after doing this call at any time */
if (mem->userspace_addr & 0xffffful)
return -EINVAL;
if (mem->memory_size & 0xffffful)
return -EINVAL;
if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
return -EINVAL;
return 0;
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem,
const struct kvm_memory_slot *old,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
Merge 4.9.183 into android-4.9 Changes in 4.9.183 rapidio: fix a NULL pointer dereference when create_workqueue() fails fs/fat/file.c: issue flush after the writeback of FAT sysctl: return -EINVAL if val violates minmax ipc: prevent lockup on alloc_msg and free_msg ARM: prevent tracing IPI_CPU_BACKTRACE hugetlbfs: on restore reserve error path retain subpool reservation mem-hotplug: fix node spanned pages when we have a node with only ZONE_MOVABLE mm/cma.c: fix crash on CMA allocation if bitmap allocation fails mm/cma_debug.c: fix the break condition in cma_maxchunk_get() mm/slab.c: fix an infinite loop in leaks_show() kernel/sys.c: prctl: fix false positive in validate_prctl_map() drivers: thermal: tsens: Don't print error message on -EPROBE_DEFER mfd: tps65912-spi: Add missing of table registration mfd: intel-lpss: Set the device in reset state when init mfd: twl6040: Fix device init errors for ACCCTL register perf/x86/intel: Allow PEBS multi-entry in watermark mode drm/bridge: adv7511: Fix low refresh rate selection objtool: Don't use ignore flag for fake jumps pwm: meson: Use the spin-lock only to protect register modifications ntp: Allow TAI-UTC offset to be set to zero f2fs: fix to avoid panic in do_recover_data() f2fs: fix to clear dirty inode in error path of f2fs_iget() f2fs: fix to do sanity check on valid block count of segment configfs: fix possible use-after-free in configfs_register_group uml: fix a boot splat wrt use of cpu_all_mask watchdog: imx2_wdt: Fix set_timeout for big timeout values watchdog: fix compile time error of pretimeout governors iommu/vt-d: Set intel_iommu_gfx_mapped correctly ALSA: hda - Register irq handler after the chip initialization nvmem: core: fix read buffer in place fuse: retrieve: cap requested size to negotiated max_write nfsd: allow fh_want_write to be called twice x86/PCI: Fix PCI IRQ routing table memory leak platform/chrome: cros_ec_proto: check for NULL transfer function soc: mediatek: pwrap: Zero initialize rdata in pwrap_init_cipher clk: rockchip: Turn on "aclk_dmac1" for suspend on rk3288 ARM: dts: imx6sx: Specify IMX6SX_CLK_IPG as "ahb" clock to SDMA ARM: dts: imx7d: Specify IMX7D_CLK_IPG as "ipg" clock to SDMA ARM: dts: imx6ul: Specify IMX6UL_CLK_IPG as "ipg" clock to SDMA ARM: dts: imx6sx: Specify IMX6SX_CLK_IPG as "ipg" clock to SDMA ARM: dts: imx6qdl: Specify IMX6QDL_CLK_IPG as "ipg" clock to SDMA PCI: rpadlpar: Fix leaked device_node references in add/remove paths platform/x86: intel_pmc_ipc: adding error handling PCI: rcar: Fix a potential NULL pointer dereference PCI: rcar: Fix 64bit MSI message address handling video: hgafb: fix potential NULL pointer dereference video: imsttfb: fix potential NULL pointer dereferences PCI: xilinx: Check for __get_free_pages() failure gpio: gpio-omap: add check for off wake capable gpios dmaengine: idma64: Use actual device for DMA transfers pwm: tiehrpwm: Update shadow register for disabling PWMs ARM: dts: exynos: Always enable necessary APIO_1V8 and ABB_1V8 regulators on Arndale Octa pwm: Fix deadlock warning when removing PWM device ARM: exynos: Fix undefined instruction during Exynos5422 resume Revert "Bluetooth: Align minimum encryption key size for LE and BR/EDR connections" ALSA: seq: Cover unsubscribe_port() in list_mutex ALSA: oxfw: allow PCM capture for Stanton SCS.1m libata: Extend quirks for the ST1000LM024 drives with NOLPM quirk mm/list_lru.c: fix memory leak in __memcg_init_list_lru_node fs/ocfs2: fix race in ocfs2_dentry_attach_lock() signal/ptrace: Don't leak unitialized kernel memory with PTRACE_PEEK_SIGINFO ptrace: restore smp_rmb() in __ptrace_may_access() media: v4l2-ioctl: clear fields in s_parm i2c: acorn: fix i2c warning bcache: fix stack corruption by PRECEDING_KEY() cgroup: Use css_tryget() instead of css_tryget_online() in task_get_css() ASoC: cs42xx8: Add regcache mask dirty ASoC: fsl_asrc: Fix the issue about unsupported rate x86/uaccess, kcov: Disable stack protector ALSA: seq: Protect in-kernel ioctl calls with mutex ALSA: seq: Fix race of get-subscription call vs port-delete ioctls Revert "ALSA: seq: Protect in-kernel ioctl calls with mutex" Drivers: misc: fix out-of-bounds access in function param_set_kgdbts_var scsi: lpfc: add check for loss of ndlp when sending RRQ arm64/mm: Inhibit huge-vmap with ptdump scsi: bnx2fc: fix incorrect cast to u64 on shift operation selftests/timers: Add missing fflush(stdout) calls usbnet: ipheth: fix racing condition KVM: x86/pmu: do not mask the value that is written to fixed PMUs KVM: s390: fix memory slot handling for KVM_SET_USER_MEMORY_REGION drm/vmwgfx: integer underflow in vmw_cmd_dx_set_shader() leading to an invalid read drm/vmwgfx: NULL pointer dereference from vmw_cmd_dx_view_define() usb: dwc2: Fix DMA cache alignment issues USB: Fix chipmunk-like voice when using Logitech C270 for recording audio. USB: usb-storage: Add new ID to ums-realtek USB: serial: pl2303: add Allied Telesis VT-Kit3 USB: serial: option: add support for Simcom SIM7500/SIM7600 RNDIS mode USB: serial: option: add Telit 0x1260 and 0x1261 compositions rtc: pcf8523: don't return invalid date when battery is low ax25: fix inconsistent lock state in ax25_destroy_timer be2net: Fix number of Rx queues used for flow hashing ipv6: flowlabel: fl6_sock_lookup() must use atomic_inc_not_zero lapb: fixed leak of control-blocks. neigh: fix use-after-free read in pneigh_get_next sunhv: Fix device naming inconsistency between sunhv_console and sunhv_reg Revert "staging: vc04_services: prevent integer overflow in create_pagelist()" perf/x86/intel/ds: Fix EVENT vs. UEVENT PEBS constraints selftests: netfilter: missing error check when setting up veth interface mISDN: make sure device name is NUL terminated x86/CPU/AMD: Don't force the CPB cap when running under a hypervisor perf/ring_buffer: Fix exposing a temporarily decreased data_head perf/ring_buffer: Add ordering to rb->nest increment gpio: fix gpio-adp5588 build errors net: tulip: de4x5: Drop redundant MODULE_DEVICE_TABLE() i2c: dev: fix potential memory leak in i2cdev_ioctl_rdwr configfs: Fix use-after-free when accessing sd->s_dentry perf data: Fix 'strncat may truncate' build failure with recent gcc perf record: Fix s390 missing module symbol and warning for non-root users ia64: fix build errors by exporting paddr_to_nid() KVM: PPC: Book3S: Use new mutex to synchronize access to rtas token list KVM: PPC: Book3S HV: Don't take kvm->lock around kvm_for_each_vcpu net: sh_eth: fix mdio access in sh_eth_close() for R-Car Gen2 and RZ/A1 SoCs scsi: libcxgbi: add a check for NULL pointer in cxgbi_check_route() scsi: smartpqi: properly set both the DMA mask and the coherent DMA mask scsi: libsas: delete sas port if expander discover failed mlxsw: spectrum: Prevent force of 56G Abort file_remove_privs() for non-reg. files Linux 4.9.183 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
2019-06-22 08:54:42 +02:00
int rc = 0;
Merge 4.9.183 into android-4.9 Changes in 4.9.183 rapidio: fix a NULL pointer dereference when create_workqueue() fails fs/fat/file.c: issue flush after the writeback of FAT sysctl: return -EINVAL if val violates minmax ipc: prevent lockup on alloc_msg and free_msg ARM: prevent tracing IPI_CPU_BACKTRACE hugetlbfs: on restore reserve error path retain subpool reservation mem-hotplug: fix node spanned pages when we have a node with only ZONE_MOVABLE mm/cma.c: fix crash on CMA allocation if bitmap allocation fails mm/cma_debug.c: fix the break condition in cma_maxchunk_get() mm/slab.c: fix an infinite loop in leaks_show() kernel/sys.c: prctl: fix false positive in validate_prctl_map() drivers: thermal: tsens: Don't print error message on -EPROBE_DEFER mfd: tps65912-spi: Add missing of table registration mfd: intel-lpss: Set the device in reset state when init mfd: twl6040: Fix device init errors for ACCCTL register perf/x86/intel: Allow PEBS multi-entry in watermark mode drm/bridge: adv7511: Fix low refresh rate selection objtool: Don't use ignore flag for fake jumps pwm: meson: Use the spin-lock only to protect register modifications ntp: Allow TAI-UTC offset to be set to zero f2fs: fix to avoid panic in do_recover_data() f2fs: fix to clear dirty inode in error path of f2fs_iget() f2fs: fix to do sanity check on valid block count of segment configfs: fix possible use-after-free in configfs_register_group uml: fix a boot splat wrt use of cpu_all_mask watchdog: imx2_wdt: Fix set_timeout for big timeout values watchdog: fix compile time error of pretimeout governors iommu/vt-d: Set intel_iommu_gfx_mapped correctly ALSA: hda - Register irq handler after the chip initialization nvmem: core: fix read buffer in place fuse: retrieve: cap requested size to negotiated max_write nfsd: allow fh_want_write to be called twice x86/PCI: Fix PCI IRQ routing table memory leak platform/chrome: cros_ec_proto: check for NULL transfer function soc: mediatek: pwrap: Zero initialize rdata in pwrap_init_cipher clk: rockchip: Turn on "aclk_dmac1" for suspend on rk3288 ARM: dts: imx6sx: Specify IMX6SX_CLK_IPG as "ahb" clock to SDMA ARM: dts: imx7d: Specify IMX7D_CLK_IPG as "ipg" clock to SDMA ARM: dts: imx6ul: Specify IMX6UL_CLK_IPG as "ipg" clock to SDMA ARM: dts: imx6sx: Specify IMX6SX_CLK_IPG as "ipg" clock to SDMA ARM: dts: imx6qdl: Specify IMX6QDL_CLK_IPG as "ipg" clock to SDMA PCI: rpadlpar: Fix leaked device_node references in add/remove paths platform/x86: intel_pmc_ipc: adding error handling PCI: rcar: Fix a potential NULL pointer dereference PCI: rcar: Fix 64bit MSI message address handling video: hgafb: fix potential NULL pointer dereference video: imsttfb: fix potential NULL pointer dereferences PCI: xilinx: Check for __get_free_pages() failure gpio: gpio-omap: add check for off wake capable gpios dmaengine: idma64: Use actual device for DMA transfers pwm: tiehrpwm: Update shadow register for disabling PWMs ARM: dts: exynos: Always enable necessary APIO_1V8 and ABB_1V8 regulators on Arndale Octa pwm: Fix deadlock warning when removing PWM device ARM: exynos: Fix undefined instruction during Exynos5422 resume Revert "Bluetooth: Align minimum encryption key size for LE and BR/EDR connections" ALSA: seq: Cover unsubscribe_port() in list_mutex ALSA: oxfw: allow PCM capture for Stanton SCS.1m libata: Extend quirks for the ST1000LM024 drives with NOLPM quirk mm/list_lru.c: fix memory leak in __memcg_init_list_lru_node fs/ocfs2: fix race in ocfs2_dentry_attach_lock() signal/ptrace: Don't leak unitialized kernel memory with PTRACE_PEEK_SIGINFO ptrace: restore smp_rmb() in __ptrace_may_access() media: v4l2-ioctl: clear fields in s_parm i2c: acorn: fix i2c warning bcache: fix stack corruption by PRECEDING_KEY() cgroup: Use css_tryget() instead of css_tryget_online() in task_get_css() ASoC: cs42xx8: Add regcache mask dirty ASoC: fsl_asrc: Fix the issue about unsupported rate x86/uaccess, kcov: Disable stack protector ALSA: seq: Protect in-kernel ioctl calls with mutex ALSA: seq: Fix race of get-subscription call vs port-delete ioctls Revert "ALSA: seq: Protect in-kernel ioctl calls with mutex" Drivers: misc: fix out-of-bounds access in function param_set_kgdbts_var scsi: lpfc: add check for loss of ndlp when sending RRQ arm64/mm: Inhibit huge-vmap with ptdump scsi: bnx2fc: fix incorrect cast to u64 on shift operation selftests/timers: Add missing fflush(stdout) calls usbnet: ipheth: fix racing condition KVM: x86/pmu: do not mask the value that is written to fixed PMUs KVM: s390: fix memory slot handling for KVM_SET_USER_MEMORY_REGION drm/vmwgfx: integer underflow in vmw_cmd_dx_set_shader() leading to an invalid read drm/vmwgfx: NULL pointer dereference from vmw_cmd_dx_view_define() usb: dwc2: Fix DMA cache alignment issues USB: Fix chipmunk-like voice when using Logitech C270 for recording audio. USB: usb-storage: Add new ID to ums-realtek USB: serial: pl2303: add Allied Telesis VT-Kit3 USB: serial: option: add support for Simcom SIM7500/SIM7600 RNDIS mode USB: serial: option: add Telit 0x1260 and 0x1261 compositions rtc: pcf8523: don't return invalid date when battery is low ax25: fix inconsistent lock state in ax25_destroy_timer be2net: Fix number of Rx queues used for flow hashing ipv6: flowlabel: fl6_sock_lookup() must use atomic_inc_not_zero lapb: fixed leak of control-blocks. neigh: fix use-after-free read in pneigh_get_next sunhv: Fix device naming inconsistency between sunhv_console and sunhv_reg Revert "staging: vc04_services: prevent integer overflow in create_pagelist()" perf/x86/intel/ds: Fix EVENT vs. UEVENT PEBS constraints selftests: netfilter: missing error check when setting up veth interface mISDN: make sure device name is NUL terminated x86/CPU/AMD: Don't force the CPB cap when running under a hypervisor perf/ring_buffer: Fix exposing a temporarily decreased data_head perf/ring_buffer: Add ordering to rb->nest increment gpio: fix gpio-adp5588 build errors net: tulip: de4x5: Drop redundant MODULE_DEVICE_TABLE() i2c: dev: fix potential memory leak in i2cdev_ioctl_rdwr configfs: Fix use-after-free when accessing sd->s_dentry perf data: Fix 'strncat may truncate' build failure with recent gcc perf record: Fix s390 missing module symbol and warning for non-root users ia64: fix build errors by exporting paddr_to_nid() KVM: PPC: Book3S: Use new mutex to synchronize access to rtas token list KVM: PPC: Book3S HV: Don't take kvm->lock around kvm_for_each_vcpu net: sh_eth: fix mdio access in sh_eth_close() for R-Car Gen2 and RZ/A1 SoCs scsi: libcxgbi: add a check for NULL pointer in cxgbi_check_route() scsi: smartpqi: properly set both the DMA mask and the coherent DMA mask scsi: libsas: delete sas port if expander discover failed mlxsw: spectrum: Prevent force of 56G Abort file_remove_privs() for non-reg. files Linux 4.9.183 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
2019-06-22 08:54:42 +02:00
switch (change) {
case KVM_MR_DELETE:
rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
old->npages * PAGE_SIZE);
break;
case KVM_MR_MOVE:
rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE,
old->npages * PAGE_SIZE);
if (rc)
break;
/* FALLTHROUGH */
case KVM_MR_CREATE:
rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
mem->guest_phys_addr, mem->memory_size);
break;
case KVM_MR_FLAGS_ONLY:
break;
default:
WARN(1, "Unknown KVM MR CHANGE: %d\n", change);
}
if (rc)
pr_warn("failed to commit memory region\n");
return;
}
static inline unsigned long nonhyp_mask(int i)
{
unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
}
void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
{
vcpu->valid_wakeup = false;
}
static int __init kvm_s390_init(void)
{
int i;
if (!sclp.has_sief2) {
pr_info("SIE not available\n");
return -ENODEV;
}
for (i = 0; i < 16; i++)
kvm_s390_fac_list_mask[i] |=
S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
}
static void __exit kvm_s390_exit(void)
{
kvm_exit();
}
module_init(kvm_s390_init);
module_exit(kvm_s390_exit);
/*
* Enable autoloading of the kvm module.
* Note that we add the module alias here instead of virt/kvm/kvm_main.c
* since x86 takes a different approach.
*/
#include <linux/miscdevice.h>
MODULE_ALIAS_MISCDEV(KVM_MINOR);
MODULE_ALIAS("devname:kvm");