Changes in 4.9.311 USB: serial: pl2303: add IBM device IDs USB: serial: simple: add Nokia phone driver netdevice: add the case if dev is NULL virtio_console: break out of buf poll on remove ethernet: sun: Free the coherent when failing in probing af_key: add __GFP_ZERO flag for compose_sadb_supported in function pfkey_register block: Add a helper to validate the block size virtio-blk: Use blk_validate_block_size() to validate block size USB: usb-storage: Fix use of bitfields for hardware data in ene_ub6250.c coresight: Fix TRCCONFIGR.QE sysfs interface iio: inkern: apply consumer scale on IIO_VAL_INT cases iio: inkern: make a best effort on offset calculation clk: uniphier: Fix fixed-rate initialization ptrace: Check PTRACE_O_SUSPEND_SECCOMP permission on PTRACE_SEIZE SUNRPC: avoid race between mod_timer() and del_timer_sync() NFSD: prevent underflow in nfssvc_decode_writeargs() can: ems_usb: ems_usb_start_xmit(): fix double dev_kfree_skb() in error path jffs2: fix use-after-free in jffs2_clear_xattr_subsystem jffs2: fix memory leak in jffs2_do_mount_fs jffs2: fix memory leak in jffs2_scan_medium mm/pages_alloc.c: don't create ZONE_MOVABLE beyond the end of a node mempolicy: mbind_range() set_policy() after vma_merge() scsi: libsas: Fix sas_ata_qc_issue() handling of NCQ NON DATA commands Revert "Input: clear BTN_RIGHT/MIDDLE on buttonpads" ALSA: cs4236: fix an incorrect NULL check on list iterator drivers: hamradio: 6pack: fix UAF bug caused by mod_timer() video: fbdev: sm712fb: Fix crash in smtcfb_read() video: fbdev: atari: Atari 2 bpp (STe) palette bugfix ARM: dts: exynos: fix UART3 pins configuration in Exynos5250 ARM: dts: exynos: add missing HDMI supplies on SMDK5250 ARM: dts: exynos: add missing HDMI supplies on SMDK5420 carl9170: fix missing bit-wise or operator for tx_params thermal: int340x: Increase bitmap size lib/raid6/test: fix multiple definition linking error DEC: Limit PMAX memory probing to R3k systems media: davinci: vpif: fix unbalanced runtime PM get brcmfmac: firmware: Allocate space for default boardrev in nvram brcmfmac: pcie: Replace brcmf_pcie_copy_mem_todev with memcpy_toio PCI: pciehp: Clear cmd_busy bit in polling mode crypto: authenc - Fix sleep in atomic context in decrypt_tail crypto: mxs-dcp - Fix scatterlist processing spi: tegra114: Add missing IRQ check in tegra_spi_probe selftests/x86: Add validity check and allow field splitting hwmon: (pmbus) Add mutex to regulator ops hwmon: (sch56xx-common) Replace WDOG_ACTIVE with WDOG_HW_RUNNING PM: hibernate: fix __setup handler error handling PM: suspend: fix return value of __setup handler crypto: vmx - add missing dependencies crypto: ccp - ccp_dmaengine_unregister release dma channels hwmon: (pmbus) Add Vin unit off handling clocksource: acpi_pm: fix return value of __setup handler sched/debug: Remove mpol_get/put and task_lock/unlock from sched_show_numa perf/core: Fix address filter parser for multiple filters perf/x86/intel/pt: Fix address filter config for 32-bit kernel video: fbdev: smscufx: Fix null-ptr-deref in ufx_usb_probe() video: fbdev: fbcvt.c: fix printing in fb_cvt_print_name() ARM: dts: qcom: ipq4019: fix sleep clock soc: ti: wkup_m3_ipc: Fix IRQ check in wkup_m3_ipc_probe media: usb: go7007: s2250-board: fix leak in probe() ASoC: ti: davinci-i2s: Add check for clk_enable() ALSA: spi: Add check for clk_enable() arm64: dts: ns2: Fix spi-cpol and spi-cpha property arm64: dts: broadcom: Fix sata nodename printk: fix return value of printk.devkmsg __setup handler ASoC: mxs-saif: Handle errors for clk_enable ASoC: atmel_ssc_dai: Handle errors for clk_enable memory: emif: Add check for setup_interrupts memory: emif: check the pointer temp in get_device_details() ALSA: firewire-lib: fix uninitialized flag for AV/C deferred transaction ASoC: atmel: Add missing of_node_put() in at91sam9g20ek_audio_probe ASoC: wm8350: Handle error for wm8350_register_irq ASoC: fsi: Add check for clk_enable video: fbdev: omapfb: Add missing of_node_put() in dvic_probe_of ASoC: dmaengine: do not use a NULL prepare_slave_config() callback ASoC: mxs: Fix error handling in mxs_sgtl5000_probe ASoC: imx-es8328: Fix error return code in imx_es8328_probe() mtd: onenand: Check for error irq drm/edid: Don't clear formats if using deep color ath9k_htc: fix uninit value bugs ray_cs: Check ioremap return value power: supply: ab8500: Fix memory leak in ab8500_fg_sysfs_init HID: i2c-hid: fix GET/SET_REPORT for unnumbered reports iwlwifi: Fix -EIO error code that is never returned scsi: pm8001: Fix command initialization in pm80XX_send_read_log() scsi: pm8001: Fix command initialization in pm8001_chip_ssp_tm_req() scsi: pm8001: Fix payload initialization in pm80xx_set_thermal_config() scsi: pm8001: Fix abort all task initialization TOMOYO: fix __setup handlers return values ext2: correct max file size computing drm/tegra: Fix reference leak in tegra_dsi_ganged_probe KVM: x86: Fix emulation in writing cr8 KVM: x86/emulator: Defer not-present segment check in __load_segment_descriptor() i2c: xiic: Make bus names unique power: supply: wm8350-power: Handle error for wm8350_register_irq power: supply: wm8350-power: Add missing free in free_charger_irq powerpc/sysdev: fix incorrect use to determine if list is empty mfd: mc13xxx: Add check for mc13xxx_irq_request MIPS: RB532: fix return value of __setup handler USB: storage: ums-realtek: fix error code in rts51x_read_mem() af_netlink: Fix shift out of bounds in group mask calculation i2c: mux: demux-pinctrl: do not deactivate a master that is not active mfd: asic3: Add missing iounmap() on error asic3_mfd_probe mxser: fix xmit_buf leak in activate when LSR == 0xff pwm: lpc18xx-sct: Initialize driver data and hardware before pwmchip_add() iio: adc: Add check for devm_request_threaded_irq clk: qcom: clk-rcg2: Update the frac table for pixel clock remoteproc: qcom_wcnss: Add missing of_node_put() in wcnss_alloc_memory_region clk: loongson1: Terminate clk_div_table with sentinel element clk: clps711x: Terminate clk_div_table with sentinel element clk: tegra: tegra124-emc: Fix missing put_device() call in emc_ensure_emc_driver NFS: remove unneeded check in decode_devicenotify_args() pinctrl: mediatek: Fix missing of_node_put() in mtk_pctrl_init pinctrl: nomadik: Add missing of_node_put() in nmk_pinctrl_probe pinctrl/rockchip: Add missing of_node_put() in rockchip_pinctrl_probe tty: hvc: fix return value of __setup handler kgdboc: fix return value of __setup handler kgdbts: fix return value of __setup handler jfs: fix divide error in dbNextAG netfilter: nf_conntrack_tcp: preserve liberal flag in tcp options net: phy: broadcom: Fix brcm_fet_config_init() qlcnic: dcb: default to returning -EOPNOTSUPP net/x25: Fix null-ptr-deref caused by x25_disconnect selinux: use correct type for context length loop: use sysfs_emit() in the sysfs xxx show() Fix incorrect type in assignment of ipv6 port for audit irqchip/nvic: Release nvic_base upon failure ACPICA: Avoid walking the ACPI Namespace if it is not there ACPI/APEI: Limit printable size of BERT table data PM: core: keep irq flags in device_pm_check_callbacks() spi: tegra20: Use of_device_get_match_data() ext4: don't BUG if someone dirty pages without asking ext4 first ntfs: add sanity check on allocation size video: fbdev: nvidiafb: Use strscpy() to prevent buffer overflow video: fbdev: w100fb: Reset global state video: fbdev: cirrusfb: check pixclock to avoid divide by zero video: fbdev: omapfb: acx565akm: replace snprintf with sysfs_emit ARM: dts: qcom: fix gic_irq_domain_translate warnings for msm8960 ARM: dts: bcm2837: Add the missing L1/L2 cache information video: fbdev: omapfb: panel-dsi-cm: Use sysfs_emit() instead of snprintf() video: fbdev: omapfb: panel-tpo-td043mtea1: Use sysfs_emit() instead of snprintf() ASoC: soc-core: skip zero num_dai component in searching dai name media: cx88-mpeg: clear interrupt status register before streaming video ARM: tegra: tamonten: Fix I2C3 pad setting ARM: mmp: Fix failure to remove sram device video: fbdev: sm712fb: Fix crash in smtcfb_write() media: hdpvr: initialize dev->worker at hdpvr_register_videodev mmc: host: Return an error when ->enable_sdio_irq() ops is missing scsi: qla2xxx: Fix incorrect reporting of task management failure KVM: Prevent module exit until all VMs are freed ubifs: Add missing iput if do_tmpfile() failed in rename whiteout ubifs: setflags: Make dirtied_ino_d 8 bytes aligned gfs2: Make sure FITRIM minlen is rounded up to fs block size pinctrl: pinconf-generic: Print arguments for bias-pull-* ACPI: CPPC: Avoid out of bounds access when parsing _CPC data mm/mmap: return 1 from stack_guard_gap __setup() handler mm/memcontrol: return 1 from cgroup.memory __setup() handler ubi: fastmap: Return error code if memory allocation fails in add_aeb() ASoC: topology: Allow TLV control to be either read or write ARM: dts: spear1340: Update serial node properties ARM: dts: spear13xx: Update SPI dma properties openvswitch: Fixed nd target mask field in the flow dump. KVM: x86: Forbid VMM to set SYNIC/STIMER MSRs when SynIC wasn't activated rtc: wm8350: Handle error for wm8350_register_irq ARM: 9187/1: JIVE: fix return value of __setup handler KVM: x86/svm: Clear reserved bits written to PerfEvtSeln MSRs ath5k: fix OOB in ath5k_eeprom_read_pcal_info_5111 ptp: replace snprintf with sysfs_emit powerpc: dts: t104xrdb: fix phy type for FMAN 4/5 scsi: mvsas: Replace snprintf() with sysfs_emit() scsi: bfa: Replace snprintf() with sysfs_emit() iommu/arm-smmu-v3: fix event handling soft lockup dm ioctl: prevent potential spectre v1 gadget scsi: pm8001: Fix pm8001_mpi_task_abort_resp() scsi: aha152x: Fix aha152x_setup() __setup handler return value bnxt_en: Eliminate unintended link toggle during FW reset MIPS: fix fortify panic when copying asm exception handlers scsi: libfc: Fix use after free in fc_exch_abts_resp() usb: dwc3: omap: fix "unbalanced disables for smps10_out1" on omap5evm xtensa: fix DTC warning unit_address_format Bluetooth: Fix use after free in hci_send_acl init/main.c: return 1 from handled __setup() functions w1: w1_therm: fixes w1_seq for ds28ea00 sensors SUNRPC/call_alloc: async tasks mustn't block waiting for memory serial: samsung_tty: do not unlock port->lock for uart_write_wakeup() virtio_console: eliminate anonymous module_init & module_exit jfs: prevent NULL deref in diFree mm: fix race between MADV_FREE reclaim and blkdev direct IO read scsi: zorro7xx: Fix a resource leak in zorro7xx_remove_one() net: stmmac: Fix unset max_speed difference between DT and non-DT platforms drm/imx: Fix memory leak in imx_pd_connector_get_modes drbd: Fix five use after free bugs in get_initial_state mmmremap.c: avoid pointless invalidate_range_start/end on mremap(old_size=0) mm/mempolicy: fix mpol_new leak in shared_policy_replace x86/pm: Save the MSR validity status at context setup x86/speculation: Restore speculation related MSRs during S3 resume arm64: patch_text: Fixup last cpu should be master tools build: Use $(shell ) instead of `` to get embedded libperl's ccopts dmaengine: Revert "dmaengine: shdma: Fix runtime PM imbalance on error" mm: don't skip swap entry even if zap_details specified arm64: module: remove (NOLOAD) from linker script xfrm: policy: match with both mark and mask on user interfaces veth: Ensure eth header is in skb's linear part net: ethernet: stmmac: fix altr_tse_pcs function when using a fixed-link nfc: nci: add flush_workqueue to prevent uaf cifs: potential buffer overflow in handling symlinks drm/amdkfd: Check for potential null return of kmalloc_array() scsi: ibmvscsis: Increase INITIAL_SRP_LIMIT to 1024 net: micrel: fix KS8851_MLL Kconfig gpu: ipu-v3: Fix dev_dbg frequency output scsi: mvsas: Add PCI ID of RocketRaid 2640 drivers: net: slip: fix NPD bug in sl_tx_timeout() mm, page_alloc: fix build_zonerefs_node() mm: kmemleak: take a full lowmem check in kmemleak_*_phys() ALSA: pcm: Test for "silence" field in struct "pcm_format_data" ARM: davinci: da850-evm: Avoid NULL pointer dereference smp: Fix offline cpu check in flush_smp_call_function_queue() i2c: pasemi: Wait for write xfers to finish gcc-plugins: latent_entropy: use /dev/urandom Linux 4.9.311 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com> Change-Id: Ia8f55c5ae2f0eb71b0893d8271a10dfd3c78b3b8
1285 lines
33 KiB
C
1285 lines
33 KiB
C
/*
|
|
* KVM Microsoft Hyper-V emulation
|
|
*
|
|
* derived from arch/x86/kvm/x86.c
|
|
*
|
|
* Copyright (C) 2006 Qumranet, Inc.
|
|
* Copyright (C) 2008 Qumranet, Inc.
|
|
* Copyright IBM Corporation, 2008
|
|
* Copyright 2010 Red Hat, Inc. and/or its affiliates.
|
|
* Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com>
|
|
*
|
|
* Authors:
|
|
* Avi Kivity <avi@qumranet.com>
|
|
* Yaniv Kamay <yaniv@qumranet.com>
|
|
* Amit Shah <amit.shah@qumranet.com>
|
|
* Ben-Ami Yassour <benami@il.ibm.com>
|
|
* Andrey Smetanin <asmetanin@virtuozzo.com>
|
|
*
|
|
* This work is licensed under the terms of the GNU GPL, version 2. See
|
|
* the COPYING file in the top-level directory.
|
|
*
|
|
*/
|
|
|
|
#include "x86.h"
|
|
#include "lapic.h"
|
|
#include "ioapic.h"
|
|
#include "hyperv.h"
|
|
|
|
#include <linux/kvm_host.h>
|
|
#include <linux/highmem.h>
|
|
#include <linux/nospec.h>
|
|
#include <asm/apicdef.h>
|
|
#include <trace/events/kvm.h>
|
|
|
|
#include "trace.h"
|
|
|
|
static inline u64 synic_read_sint(struct kvm_vcpu_hv_synic *synic, int sint)
|
|
{
|
|
return atomic64_read(&synic->sint[sint]);
|
|
}
|
|
|
|
static inline int synic_get_sint_vector(u64 sint_value)
|
|
{
|
|
if (sint_value & HV_SYNIC_SINT_MASKED)
|
|
return -1;
|
|
return sint_value & HV_SYNIC_SINT_VECTOR_MASK;
|
|
}
|
|
|
|
static bool synic_has_vector_connected(struct kvm_vcpu_hv_synic *synic,
|
|
int vector)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
|
|
if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector)
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic,
|
|
int vector)
|
|
{
|
|
int i;
|
|
u64 sint_value;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
|
|
sint_value = synic_read_sint(synic, i);
|
|
if (synic_get_sint_vector(sint_value) == vector &&
|
|
sint_value & HV_SYNIC_SINT_AUTO_EOI)
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint,
|
|
u64 data, bool host)
|
|
{
|
|
int vector;
|
|
|
|
vector = data & HV_SYNIC_SINT_VECTOR_MASK;
|
|
if (vector < 16 && !host)
|
|
return 1;
|
|
/*
|
|
* Guest may configure multiple SINTs to use the same vector, so
|
|
* we maintain a bitmap of vectors handled by synic, and a
|
|
* bitmap of vectors with auto-eoi behavior. The bitmaps are
|
|
* updated here, and atomically queried on fast paths.
|
|
*/
|
|
|
|
atomic64_set(&synic->sint[sint], data);
|
|
|
|
if (synic_has_vector_connected(synic, vector))
|
|
__set_bit(vector, synic->vec_bitmap);
|
|
else
|
|
__clear_bit(vector, synic->vec_bitmap);
|
|
|
|
if (synic_has_vector_auto_eoi(synic, vector))
|
|
__set_bit(vector, synic->auto_eoi_bitmap);
|
|
else
|
|
__clear_bit(vector, synic->auto_eoi_bitmap);
|
|
|
|
/* Load SynIC vectors into EOI exit bitmap */
|
|
kvm_make_request(KVM_REQ_SCAN_IOAPIC, synic_to_vcpu(synic));
|
|
return 0;
|
|
}
|
|
|
|
static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vcpu_id)
|
|
{
|
|
struct kvm_vcpu *vcpu;
|
|
struct kvm_vcpu_hv_synic *synic;
|
|
|
|
if (vcpu_id >= atomic_read(&kvm->online_vcpus))
|
|
return NULL;
|
|
vcpu = kvm_get_vcpu(kvm, vcpu_id);
|
|
if (!vcpu)
|
|
return NULL;
|
|
synic = vcpu_to_synic(vcpu);
|
|
return (synic->active) ? synic : NULL;
|
|
}
|
|
|
|
static void synic_clear_sint_msg_pending(struct kvm_vcpu_hv_synic *synic,
|
|
u32 sint)
|
|
{
|
|
struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
|
|
struct page *page;
|
|
gpa_t gpa;
|
|
struct hv_message *msg;
|
|
struct hv_message_page *msg_page;
|
|
|
|
gpa = synic->msg_page & PAGE_MASK;
|
|
page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
|
|
if (is_error_page(page)) {
|
|
vcpu_err(vcpu, "Hyper-V SynIC can't get msg page, gpa 0x%llx\n",
|
|
gpa);
|
|
return;
|
|
}
|
|
msg_page = kmap_atomic(page);
|
|
|
|
msg = &msg_page->sint_message[sint];
|
|
msg->header.message_flags.msg_pending = 0;
|
|
|
|
kunmap_atomic(msg_page);
|
|
kvm_release_page_dirty(page);
|
|
kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
|
|
}
|
|
|
|
static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint)
|
|
{
|
|
struct kvm *kvm = vcpu->kvm;
|
|
struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
|
|
struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
|
|
struct kvm_vcpu_hv_stimer *stimer;
|
|
int gsi, idx, stimers_pending;
|
|
|
|
trace_kvm_hv_notify_acked_sint(vcpu->vcpu_id, sint);
|
|
|
|
if (synic->msg_page & HV_SYNIC_SIMP_ENABLE)
|
|
synic_clear_sint_msg_pending(synic, sint);
|
|
|
|
/* Try to deliver pending Hyper-V SynIC timers messages */
|
|
stimers_pending = 0;
|
|
for (idx = 0; idx < ARRAY_SIZE(hv_vcpu->stimer); idx++) {
|
|
stimer = &hv_vcpu->stimer[idx];
|
|
if (stimer->msg_pending &&
|
|
(stimer->config & HV_STIMER_ENABLE) &&
|
|
HV_STIMER_SINT(stimer->config) == sint) {
|
|
set_bit(stimer->index,
|
|
hv_vcpu->stimer_pending_bitmap);
|
|
stimers_pending++;
|
|
}
|
|
}
|
|
if (stimers_pending)
|
|
kvm_make_request(KVM_REQ_HV_STIMER, vcpu);
|
|
|
|
idx = srcu_read_lock(&kvm->irq_srcu);
|
|
gsi = atomic_read(&synic->sint_to_gsi[sint]);
|
|
if (gsi != -1)
|
|
kvm_notify_acked_gsi(kvm, gsi);
|
|
srcu_read_unlock(&kvm->irq_srcu, idx);
|
|
}
|
|
|
|
static void synic_exit(struct kvm_vcpu_hv_synic *synic, u32 msr)
|
|
{
|
|
struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
|
|
struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv;
|
|
|
|
hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNIC;
|
|
hv_vcpu->exit.u.synic.msr = msr;
|
|
hv_vcpu->exit.u.synic.control = synic->control;
|
|
hv_vcpu->exit.u.synic.evt_page = synic->evt_page;
|
|
hv_vcpu->exit.u.synic.msg_page = synic->msg_page;
|
|
|
|
kvm_make_request(KVM_REQ_HV_EXIT, vcpu);
|
|
}
|
|
|
|
static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
|
|
u32 msr, u64 data, bool host)
|
|
{
|
|
struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
|
|
int ret;
|
|
|
|
if (!synic->active)
|
|
return 1;
|
|
|
|
trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host);
|
|
|
|
ret = 0;
|
|
switch (msr) {
|
|
case HV_X64_MSR_SCONTROL:
|
|
synic->control = data;
|
|
if (!host)
|
|
synic_exit(synic, msr);
|
|
break;
|
|
case HV_X64_MSR_SVERSION:
|
|
if (!host) {
|
|
ret = 1;
|
|
break;
|
|
}
|
|
synic->version = data;
|
|
break;
|
|
case HV_X64_MSR_SIEFP:
|
|
if (data & HV_SYNIC_SIEFP_ENABLE)
|
|
if (kvm_clear_guest(vcpu->kvm,
|
|
data & PAGE_MASK, PAGE_SIZE)) {
|
|
ret = 1;
|
|
break;
|
|
}
|
|
synic->evt_page = data;
|
|
if (!host)
|
|
synic_exit(synic, msr);
|
|
break;
|
|
case HV_X64_MSR_SIMP:
|
|
if (data & HV_SYNIC_SIMP_ENABLE)
|
|
if (kvm_clear_guest(vcpu->kvm,
|
|
data & PAGE_MASK, PAGE_SIZE)) {
|
|
ret = 1;
|
|
break;
|
|
}
|
|
synic->msg_page = data;
|
|
if (!host)
|
|
synic_exit(synic, msr);
|
|
break;
|
|
case HV_X64_MSR_EOM: {
|
|
int i;
|
|
|
|
if (!synic->active)
|
|
break;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(synic->sint); i++)
|
|
kvm_hv_notify_acked_sint(vcpu, i);
|
|
break;
|
|
}
|
|
case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
|
|
ret = synic_set_sint(synic, msr - HV_X64_MSR_SINT0, data, host);
|
|
break;
|
|
default:
|
|
ret = 1;
|
|
break;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata)
|
|
{
|
|
int ret;
|
|
|
|
if (!synic->active)
|
|
return 1;
|
|
|
|
ret = 0;
|
|
switch (msr) {
|
|
case HV_X64_MSR_SCONTROL:
|
|
*pdata = synic->control;
|
|
break;
|
|
case HV_X64_MSR_SVERSION:
|
|
*pdata = synic->version;
|
|
break;
|
|
case HV_X64_MSR_SIEFP:
|
|
*pdata = synic->evt_page;
|
|
break;
|
|
case HV_X64_MSR_SIMP:
|
|
*pdata = synic->msg_page;
|
|
break;
|
|
case HV_X64_MSR_EOM:
|
|
*pdata = 0;
|
|
break;
|
|
case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
|
|
*pdata = atomic64_read(&synic->sint[msr - HV_X64_MSR_SINT0]);
|
|
break;
|
|
default:
|
|
ret = 1;
|
|
break;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint)
|
|
{
|
|
struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
|
|
struct kvm_lapic_irq irq;
|
|
int ret, vector;
|
|
|
|
if (sint >= ARRAY_SIZE(synic->sint))
|
|
return -EINVAL;
|
|
|
|
vector = synic_get_sint_vector(synic_read_sint(synic, sint));
|
|
if (vector < 0)
|
|
return -ENOENT;
|
|
|
|
memset(&irq, 0, sizeof(irq));
|
|
irq.dest_id = kvm_apic_id(vcpu->arch.apic);
|
|
irq.dest_mode = APIC_DEST_PHYSICAL;
|
|
irq.delivery_mode = APIC_DM_FIXED;
|
|
irq.vector = vector;
|
|
irq.level = 1;
|
|
|
|
ret = kvm_irq_delivery_to_apic(vcpu->kvm, NULL, &irq, NULL);
|
|
trace_kvm_hv_synic_set_irq(vcpu->vcpu_id, sint, irq.vector, ret);
|
|
return ret;
|
|
}
|
|
|
|
int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint)
|
|
{
|
|
struct kvm_vcpu_hv_synic *synic;
|
|
|
|
synic = synic_get(kvm, vcpu_id);
|
|
if (!synic)
|
|
return -EINVAL;
|
|
|
|
return synic_set_irq(synic, sint);
|
|
}
|
|
|
|
void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector)
|
|
{
|
|
struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
|
|
int i;
|
|
|
|
trace_kvm_hv_synic_send_eoi(vcpu->vcpu_id, vector);
|
|
|
|
for (i = 0; i < ARRAY_SIZE(synic->sint); i++)
|
|
if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector)
|
|
kvm_hv_notify_acked_sint(vcpu, i);
|
|
}
|
|
|
|
static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vcpu_id, u32 sint, int gsi)
|
|
{
|
|
struct kvm_vcpu_hv_synic *synic;
|
|
|
|
synic = synic_get(kvm, vcpu_id);
|
|
if (!synic)
|
|
return -EINVAL;
|
|
|
|
if (sint >= ARRAY_SIZE(synic->sint_to_gsi))
|
|
return -EINVAL;
|
|
|
|
atomic_set(&synic->sint_to_gsi[sint], gsi);
|
|
return 0;
|
|
}
|
|
|
|
void kvm_hv_irq_routing_update(struct kvm *kvm)
|
|
{
|
|
struct kvm_irq_routing_table *irq_rt;
|
|
struct kvm_kernel_irq_routing_entry *e;
|
|
u32 gsi;
|
|
|
|
irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu,
|
|
lockdep_is_held(&kvm->irq_lock));
|
|
|
|
for (gsi = 0; gsi < irq_rt->nr_rt_entries; gsi++) {
|
|
hlist_for_each_entry(e, &irq_rt->map[gsi], link) {
|
|
if (e->type == KVM_IRQ_ROUTING_HV_SINT)
|
|
kvm_hv_set_sint_gsi(kvm, e->hv_sint.vcpu,
|
|
e->hv_sint.sint, gsi);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void synic_init(struct kvm_vcpu_hv_synic *synic)
|
|
{
|
|
int i;
|
|
|
|
memset(synic, 0, sizeof(*synic));
|
|
synic->version = HV_SYNIC_VERSION_1;
|
|
for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
|
|
atomic64_set(&synic->sint[i], HV_SYNIC_SINT_MASKED);
|
|
atomic_set(&synic->sint_to_gsi[i], -1);
|
|
}
|
|
}
|
|
|
|
static u64 get_time_ref_counter(struct kvm *kvm)
|
|
{
|
|
struct kvm_hv *hv = &kvm->arch.hyperv;
|
|
struct kvm_vcpu *vcpu;
|
|
u64 tsc;
|
|
|
|
/*
|
|
* The guest has not set up the TSC page or the clock isn't
|
|
* stable, fall back to get_kvmclock_ns.
|
|
*/
|
|
if (!hv->tsc_ref.tsc_sequence)
|
|
return div_u64(get_kvmclock_ns(kvm), 100);
|
|
|
|
vcpu = kvm_get_vcpu(kvm, 0);
|
|
tsc = kvm_read_l1_tsc(vcpu, rdtsc());
|
|
return mul_u64_u64_shr(tsc, hv->tsc_ref.tsc_scale, 64)
|
|
+ hv->tsc_ref.tsc_offset;
|
|
}
|
|
|
|
static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer,
|
|
bool vcpu_kick)
|
|
{
|
|
struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
|
|
|
|
set_bit(stimer->index,
|
|
vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap);
|
|
kvm_make_request(KVM_REQ_HV_STIMER, vcpu);
|
|
if (vcpu_kick)
|
|
kvm_vcpu_kick(vcpu);
|
|
}
|
|
|
|
static void stimer_cleanup(struct kvm_vcpu_hv_stimer *stimer)
|
|
{
|
|
struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
|
|
|
|
trace_kvm_hv_stimer_cleanup(stimer_to_vcpu(stimer)->vcpu_id,
|
|
stimer->index);
|
|
|
|
hrtimer_cancel(&stimer->timer);
|
|
clear_bit(stimer->index,
|
|
vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap);
|
|
stimer->msg_pending = false;
|
|
stimer->exp_time = 0;
|
|
}
|
|
|
|
static enum hrtimer_restart stimer_timer_callback(struct hrtimer *timer)
|
|
{
|
|
struct kvm_vcpu_hv_stimer *stimer;
|
|
|
|
stimer = container_of(timer, struct kvm_vcpu_hv_stimer, timer);
|
|
trace_kvm_hv_stimer_callback(stimer_to_vcpu(stimer)->vcpu_id,
|
|
stimer->index);
|
|
stimer_mark_pending(stimer, true);
|
|
|
|
return HRTIMER_NORESTART;
|
|
}
|
|
|
|
/*
|
|
* stimer_start() assumptions:
|
|
* a) stimer->count is not equal to 0
|
|
* b) stimer->config has HV_STIMER_ENABLE flag
|
|
*/
|
|
static int stimer_start(struct kvm_vcpu_hv_stimer *stimer)
|
|
{
|
|
u64 time_now;
|
|
ktime_t ktime_now;
|
|
|
|
time_now = get_time_ref_counter(stimer_to_vcpu(stimer)->kvm);
|
|
ktime_now = ktime_get();
|
|
|
|
if (stimer->config & HV_STIMER_PERIODIC) {
|
|
if (stimer->exp_time) {
|
|
if (time_now >= stimer->exp_time) {
|
|
u64 remainder;
|
|
|
|
div64_u64_rem(time_now - stimer->exp_time,
|
|
stimer->count, &remainder);
|
|
stimer->exp_time =
|
|
time_now + (stimer->count - remainder);
|
|
}
|
|
} else
|
|
stimer->exp_time = time_now + stimer->count;
|
|
|
|
trace_kvm_hv_stimer_start_periodic(
|
|
stimer_to_vcpu(stimer)->vcpu_id,
|
|
stimer->index,
|
|
time_now, stimer->exp_time);
|
|
|
|
hrtimer_start(&stimer->timer,
|
|
ktime_add_ns(ktime_now,
|
|
100 * (stimer->exp_time - time_now)),
|
|
HRTIMER_MODE_ABS);
|
|
return 0;
|
|
}
|
|
stimer->exp_time = stimer->count;
|
|
if (time_now >= stimer->count) {
|
|
/*
|
|
* Expire timer according to Hypervisor Top-Level Functional
|
|
* specification v4(15.3.1):
|
|
* "If a one shot is enabled and the specified count is in
|
|
* the past, it will expire immediately."
|
|
*/
|
|
stimer_mark_pending(stimer, false);
|
|
return 0;
|
|
}
|
|
|
|
trace_kvm_hv_stimer_start_one_shot(stimer_to_vcpu(stimer)->vcpu_id,
|
|
stimer->index,
|
|
time_now, stimer->count);
|
|
|
|
hrtimer_start(&stimer->timer,
|
|
ktime_add_ns(ktime_now, 100 * (stimer->count - time_now)),
|
|
HRTIMER_MODE_ABS);
|
|
return 0;
|
|
}
|
|
|
|
static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config,
|
|
bool host)
|
|
{
|
|
struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
|
|
struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
|
|
|
|
if (!synic->active && (!host || config))
|
|
return 1;
|
|
|
|
trace_kvm_hv_stimer_set_config(stimer_to_vcpu(stimer)->vcpu_id,
|
|
stimer->index, config, host);
|
|
|
|
stimer_cleanup(stimer);
|
|
if ((stimer->config & HV_STIMER_ENABLE) && HV_STIMER_SINT(config) == 0)
|
|
config &= ~HV_STIMER_ENABLE;
|
|
stimer->config = config;
|
|
stimer_mark_pending(stimer, false);
|
|
return 0;
|
|
}
|
|
|
|
static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count,
|
|
bool host)
|
|
{
|
|
struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
|
|
struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
|
|
|
|
if (!synic->active && (!host || count))
|
|
return 1;
|
|
|
|
trace_kvm_hv_stimer_set_count(stimer_to_vcpu(stimer)->vcpu_id,
|
|
stimer->index, count, host);
|
|
|
|
stimer_cleanup(stimer);
|
|
stimer->count = count;
|
|
if (stimer->count == 0)
|
|
stimer->config &= ~HV_STIMER_ENABLE;
|
|
else if (stimer->config & HV_STIMER_AUTOENABLE)
|
|
stimer->config |= HV_STIMER_ENABLE;
|
|
stimer_mark_pending(stimer, false);
|
|
return 0;
|
|
}
|
|
|
|
static int stimer_get_config(struct kvm_vcpu_hv_stimer *stimer, u64 *pconfig)
|
|
{
|
|
*pconfig = stimer->config;
|
|
return 0;
|
|
}
|
|
|
|
static int stimer_get_count(struct kvm_vcpu_hv_stimer *stimer, u64 *pcount)
|
|
{
|
|
*pcount = stimer->count;
|
|
return 0;
|
|
}
|
|
|
|
static int synic_deliver_msg(struct kvm_vcpu_hv_synic *synic, u32 sint,
|
|
struct hv_message *src_msg)
|
|
{
|
|
struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
|
|
struct page *page;
|
|
gpa_t gpa;
|
|
struct hv_message *dst_msg;
|
|
int r;
|
|
struct hv_message_page *msg_page;
|
|
|
|
if (!(synic->msg_page & HV_SYNIC_SIMP_ENABLE))
|
|
return -ENOENT;
|
|
|
|
gpa = synic->msg_page & PAGE_MASK;
|
|
page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
|
|
if (is_error_page(page))
|
|
return -EFAULT;
|
|
|
|
msg_page = kmap_atomic(page);
|
|
dst_msg = &msg_page->sint_message[sint];
|
|
if (sync_cmpxchg(&dst_msg->header.message_type, HVMSG_NONE,
|
|
src_msg->header.message_type) != HVMSG_NONE) {
|
|
dst_msg->header.message_flags.msg_pending = 1;
|
|
r = -EAGAIN;
|
|
} else {
|
|
memcpy(&dst_msg->u.payload, &src_msg->u.payload,
|
|
src_msg->header.payload_size);
|
|
dst_msg->header.message_type = src_msg->header.message_type;
|
|
dst_msg->header.payload_size = src_msg->header.payload_size;
|
|
r = synic_set_irq(synic, sint);
|
|
if (r >= 1)
|
|
r = 0;
|
|
else if (r == 0)
|
|
r = -EFAULT;
|
|
}
|
|
kunmap_atomic(msg_page);
|
|
kvm_release_page_dirty(page);
|
|
kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
|
|
return r;
|
|
}
|
|
|
|
static int stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer)
|
|
{
|
|
struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
|
|
struct hv_message *msg = &stimer->msg;
|
|
struct hv_timer_message_payload *payload =
|
|
(struct hv_timer_message_payload *)&msg->u.payload;
|
|
|
|
payload->expiration_time = stimer->exp_time;
|
|
payload->delivery_time = get_time_ref_counter(vcpu->kvm);
|
|
return synic_deliver_msg(vcpu_to_synic(vcpu),
|
|
HV_STIMER_SINT(stimer->config), msg);
|
|
}
|
|
|
|
static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer)
|
|
{
|
|
int r;
|
|
|
|
stimer->msg_pending = true;
|
|
r = stimer_send_msg(stimer);
|
|
trace_kvm_hv_stimer_expiration(stimer_to_vcpu(stimer)->vcpu_id,
|
|
stimer->index, r);
|
|
if (!r) {
|
|
stimer->msg_pending = false;
|
|
if (!(stimer->config & HV_STIMER_PERIODIC))
|
|
stimer->config &= ~HV_STIMER_ENABLE;
|
|
}
|
|
}
|
|
|
|
void kvm_hv_process_stimers(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
|
|
struct kvm_vcpu_hv_stimer *stimer;
|
|
u64 time_now, exp_time;
|
|
int i;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
|
|
if (test_and_clear_bit(i, hv_vcpu->stimer_pending_bitmap)) {
|
|
stimer = &hv_vcpu->stimer[i];
|
|
if (stimer->config & HV_STIMER_ENABLE) {
|
|
exp_time = stimer->exp_time;
|
|
|
|
if (exp_time) {
|
|
time_now =
|
|
get_time_ref_counter(vcpu->kvm);
|
|
if (time_now >= exp_time)
|
|
stimer_expiration(stimer);
|
|
}
|
|
|
|
if ((stimer->config & HV_STIMER_ENABLE) &&
|
|
stimer->count)
|
|
stimer_start(stimer);
|
|
else
|
|
stimer_cleanup(stimer);
|
|
}
|
|
}
|
|
}
|
|
|
|
void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
|
|
int i;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
|
|
stimer_cleanup(&hv_vcpu->stimer[i]);
|
|
}
|
|
|
|
static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer)
|
|
{
|
|
struct hv_message *msg = &stimer->msg;
|
|
struct hv_timer_message_payload *payload =
|
|
(struct hv_timer_message_payload *)&msg->u.payload;
|
|
|
|
memset(&msg->header, 0, sizeof(msg->header));
|
|
msg->header.message_type = HVMSG_TIMER_EXPIRED;
|
|
msg->header.payload_size = sizeof(*payload);
|
|
|
|
payload->timer_index = stimer->index;
|
|
payload->expiration_time = 0;
|
|
payload->delivery_time = 0;
|
|
}
|
|
|
|
static void stimer_init(struct kvm_vcpu_hv_stimer *stimer, int timer_index)
|
|
{
|
|
memset(stimer, 0, sizeof(*stimer));
|
|
stimer->index = timer_index;
|
|
hrtimer_init(&stimer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
|
|
stimer->timer.function = stimer_timer_callback;
|
|
stimer_prepare_msg(stimer);
|
|
}
|
|
|
|
void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
|
|
int i;
|
|
|
|
synic_init(&hv_vcpu->synic);
|
|
|
|
bitmap_zero(hv_vcpu->stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
|
|
for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
|
|
stimer_init(&hv_vcpu->stimer[i], i);
|
|
}
|
|
|
|
int kvm_hv_activate_synic(struct kvm_vcpu *vcpu)
|
|
{
|
|
/*
|
|
* Hyper-V SynIC auto EOI SINT's are
|
|
* not compatible with APICV, so deactivate APICV
|
|
*/
|
|
kvm_vcpu_deactivate_apicv(vcpu);
|
|
vcpu_to_synic(vcpu)->active = true;
|
|
return 0;
|
|
}
|
|
|
|
static bool kvm_hv_msr_partition_wide(u32 msr)
|
|
{
|
|
bool r = false;
|
|
|
|
switch (msr) {
|
|
case HV_X64_MSR_GUEST_OS_ID:
|
|
case HV_X64_MSR_HYPERCALL:
|
|
case HV_X64_MSR_REFERENCE_TSC:
|
|
case HV_X64_MSR_TIME_REF_COUNT:
|
|
case HV_X64_MSR_CRASH_CTL:
|
|
case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
|
|
case HV_X64_MSR_RESET:
|
|
r = true;
|
|
break;
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu,
|
|
u32 index, u64 *pdata)
|
|
{
|
|
struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
|
|
size_t size = ARRAY_SIZE(hv->hv_crash_param);
|
|
|
|
if (WARN_ON_ONCE(index >= size))
|
|
return -EINVAL;
|
|
|
|
*pdata = hv->hv_crash_param[array_index_nospec(index, size)];
|
|
return 0;
|
|
}
|
|
|
|
static int kvm_hv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u64 *pdata)
|
|
{
|
|
struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
|
|
|
|
*pdata = hv->hv_crash_ctl;
|
|
return 0;
|
|
}
|
|
|
|
static int kvm_hv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u64 data, bool host)
|
|
{
|
|
struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
|
|
|
|
if (host)
|
|
hv->hv_crash_ctl = data & HV_X64_MSR_CRASH_CTL_NOTIFY;
|
|
|
|
if (!host && (data & HV_X64_MSR_CRASH_CTL_NOTIFY)) {
|
|
|
|
vcpu_debug(vcpu, "hv crash (0x%llx 0x%llx 0x%llx 0x%llx 0x%llx)\n",
|
|
hv->hv_crash_param[0],
|
|
hv->hv_crash_param[1],
|
|
hv->hv_crash_param[2],
|
|
hv->hv_crash_param[3],
|
|
hv->hv_crash_param[4]);
|
|
|
|
/* Send notification about crash to user space */
|
|
kvm_make_request(KVM_REQ_HV_CRASH, vcpu);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
|
|
u32 index, u64 data)
|
|
{
|
|
struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
|
|
size_t size = ARRAY_SIZE(hv->hv_crash_param);
|
|
|
|
if (WARN_ON_ONCE(index >= size))
|
|
return -EINVAL;
|
|
|
|
hv->hv_crash_param[array_index_nospec(index, size)] = data;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* The kvmclock and Hyper-V TSC page use similar formulas, and converting
|
|
* between them is possible:
|
|
*
|
|
* kvmclock formula:
|
|
* nsec = (ticks - tsc_timestamp) * tsc_to_system_mul * 2^(tsc_shift-32)
|
|
* + system_time
|
|
*
|
|
* Hyper-V formula:
|
|
* nsec/100 = ticks * scale / 2^64 + offset
|
|
*
|
|
* When tsc_timestamp = system_time = 0, offset is zero in the Hyper-V formula.
|
|
* By dividing the kvmclock formula by 100 and equating what's left we get:
|
|
* ticks * scale / 2^64 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100
|
|
* scale / 2^64 = tsc_to_system_mul * 2^(tsc_shift-32) / 100
|
|
* scale = tsc_to_system_mul * 2^(32+tsc_shift) / 100
|
|
*
|
|
* Now expand the kvmclock formula and divide by 100:
|
|
* nsec = ticks * tsc_to_system_mul * 2^(tsc_shift-32)
|
|
* - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32)
|
|
* + system_time
|
|
* nsec/100 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100
|
|
* - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) / 100
|
|
* + system_time / 100
|
|
*
|
|
* Replace tsc_to_system_mul * 2^(tsc_shift-32) / 100 by scale / 2^64:
|
|
* nsec/100 = ticks * scale / 2^64
|
|
* - tsc_timestamp * scale / 2^64
|
|
* + system_time / 100
|
|
*
|
|
* Equate with the Hyper-V formula so that ticks * scale / 2^64 cancels out:
|
|
* offset = system_time / 100 - tsc_timestamp * scale / 2^64
|
|
*
|
|
* These two equivalencies are implemented in this function.
|
|
*/
|
|
static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock,
|
|
HV_REFERENCE_TSC_PAGE *tsc_ref)
|
|
{
|
|
u64 max_mul;
|
|
|
|
if (!(hv_clock->flags & PVCLOCK_TSC_STABLE_BIT))
|
|
return false;
|
|
|
|
/*
|
|
* check if scale would overflow, if so we use the time ref counter
|
|
* tsc_to_system_mul * 2^(tsc_shift+32) / 100 >= 2^64
|
|
* tsc_to_system_mul / 100 >= 2^(32-tsc_shift)
|
|
* tsc_to_system_mul >= 100 * 2^(32-tsc_shift)
|
|
*/
|
|
max_mul = 100ull << (32 - hv_clock->tsc_shift);
|
|
if (hv_clock->tsc_to_system_mul >= max_mul)
|
|
return false;
|
|
|
|
/*
|
|
* Otherwise compute the scale and offset according to the formulas
|
|
* derived above.
|
|
*/
|
|
tsc_ref->tsc_scale =
|
|
mul_u64_u32_div(1ULL << (32 + hv_clock->tsc_shift),
|
|
hv_clock->tsc_to_system_mul,
|
|
100);
|
|
|
|
tsc_ref->tsc_offset = hv_clock->system_time;
|
|
do_div(tsc_ref->tsc_offset, 100);
|
|
tsc_ref->tsc_offset -=
|
|
mul_u64_u64_shr(hv_clock->tsc_timestamp, tsc_ref->tsc_scale, 64);
|
|
return true;
|
|
}
|
|
|
|
void kvm_hv_setup_tsc_page(struct kvm *kvm,
|
|
struct pvclock_vcpu_time_info *hv_clock)
|
|
{
|
|
struct kvm_hv *hv = &kvm->arch.hyperv;
|
|
u32 tsc_seq;
|
|
u64 gfn;
|
|
|
|
BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence));
|
|
BUILD_BUG_ON(offsetof(HV_REFERENCE_TSC_PAGE, tsc_sequence) != 0);
|
|
|
|
if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
|
|
return;
|
|
|
|
gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
|
|
/*
|
|
* Because the TSC parameters only vary when there is a
|
|
* change in the master clock, do not bother with caching.
|
|
*/
|
|
if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn),
|
|
&tsc_seq, sizeof(tsc_seq))))
|
|
return;
|
|
|
|
/*
|
|
* While we're computing and writing the parameters, force the
|
|
* guest to use the time reference count MSR.
|
|
*/
|
|
hv->tsc_ref.tsc_sequence = 0;
|
|
if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
|
|
&hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
|
|
return;
|
|
|
|
if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref))
|
|
return;
|
|
|
|
/* Ensure sequence is zero before writing the rest of the struct. */
|
|
smp_wmb();
|
|
if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref)))
|
|
return;
|
|
|
|
/*
|
|
* Now switch to the TSC page mechanism by writing the sequence.
|
|
*/
|
|
tsc_seq++;
|
|
if (tsc_seq == 0xFFFFFFFF || tsc_seq == 0)
|
|
tsc_seq = 1;
|
|
|
|
/* Write the struct entirely before the non-zero sequence. */
|
|
smp_wmb();
|
|
|
|
hv->tsc_ref.tsc_sequence = tsc_seq;
|
|
kvm_write_guest(kvm, gfn_to_gpa(gfn),
|
|
&hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence));
|
|
}
|
|
|
|
static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
|
|
bool host)
|
|
{
|
|
struct kvm *kvm = vcpu->kvm;
|
|
struct kvm_hv *hv = &kvm->arch.hyperv;
|
|
|
|
switch (msr) {
|
|
case HV_X64_MSR_GUEST_OS_ID:
|
|
hv->hv_guest_os_id = data;
|
|
/* setting guest os id to zero disables hypercall page */
|
|
if (!hv->hv_guest_os_id)
|
|
hv->hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
|
|
break;
|
|
case HV_X64_MSR_HYPERCALL: {
|
|
u64 gfn;
|
|
unsigned long addr;
|
|
u8 instructions[4];
|
|
|
|
/* if guest os id is not set hypercall should remain disabled */
|
|
if (!hv->hv_guest_os_id)
|
|
break;
|
|
if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
|
|
hv->hv_hypercall = data;
|
|
break;
|
|
}
|
|
gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT;
|
|
addr = gfn_to_hva(kvm, gfn);
|
|
if (kvm_is_error_hva(addr))
|
|
return 1;
|
|
kvm_x86_ops->patch_hypercall(vcpu, instructions);
|
|
((unsigned char *)instructions)[3] = 0xc3; /* ret */
|
|
if (__copy_to_user((void __user *)addr, instructions, 4))
|
|
return 1;
|
|
hv->hv_hypercall = data;
|
|
mark_page_dirty(kvm, gfn);
|
|
break;
|
|
}
|
|
case HV_X64_MSR_REFERENCE_TSC:
|
|
hv->hv_tsc_page = data;
|
|
if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)
|
|
kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
|
|
break;
|
|
case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
|
|
return kvm_hv_msr_set_crash_data(vcpu,
|
|
msr - HV_X64_MSR_CRASH_P0,
|
|
data);
|
|
case HV_X64_MSR_CRASH_CTL:
|
|
return kvm_hv_msr_set_crash_ctl(vcpu, data, host);
|
|
case HV_X64_MSR_RESET:
|
|
if (data == 1) {
|
|
vcpu_debug(vcpu, "hyper-v reset requested\n");
|
|
kvm_make_request(KVM_REQ_HV_RESET, vcpu);
|
|
}
|
|
break;
|
|
default:
|
|
vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
|
|
msr, data);
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/* Calculate cpu time spent by current task in 100ns units */
|
|
static u64 current_task_runtime_100ns(void)
|
|
{
|
|
cputime_t utime, stime;
|
|
|
|
task_cputime_adjusted(current, &utime, &stime);
|
|
return div_u64(cputime_to_nsecs(utime + stime), 100);
|
|
}
|
|
|
|
static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
|
|
{
|
|
struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
|
|
|
|
switch (msr) {
|
|
case HV_X64_MSR_APIC_ASSIST_PAGE: {
|
|
u64 gfn;
|
|
unsigned long addr;
|
|
|
|
if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) {
|
|
hv->hv_vapic = data;
|
|
if (kvm_lapic_enable_pv_eoi(vcpu, 0))
|
|
return 1;
|
|
break;
|
|
}
|
|
gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT;
|
|
addr = kvm_vcpu_gfn_to_hva(vcpu, gfn);
|
|
if (kvm_is_error_hva(addr))
|
|
return 1;
|
|
if (__clear_user((void __user *)addr, PAGE_SIZE))
|
|
return 1;
|
|
hv->hv_vapic = data;
|
|
kvm_vcpu_mark_page_dirty(vcpu, gfn);
|
|
if (kvm_lapic_enable_pv_eoi(vcpu,
|
|
gfn_to_gpa(gfn) | KVM_MSR_ENABLED))
|
|
return 1;
|
|
break;
|
|
}
|
|
case HV_X64_MSR_EOI:
|
|
return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
|
|
case HV_X64_MSR_ICR:
|
|
return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
|
|
case HV_X64_MSR_TPR:
|
|
return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
|
|
case HV_X64_MSR_VP_RUNTIME:
|
|
if (!host)
|
|
return 1;
|
|
hv->runtime_offset = data - current_task_runtime_100ns();
|
|
break;
|
|
case HV_X64_MSR_SCONTROL:
|
|
case HV_X64_MSR_SVERSION:
|
|
case HV_X64_MSR_SIEFP:
|
|
case HV_X64_MSR_SIMP:
|
|
case HV_X64_MSR_EOM:
|
|
case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
|
|
return synic_set_msr(vcpu_to_synic(vcpu), msr, data, host);
|
|
case HV_X64_MSR_STIMER0_CONFIG:
|
|
case HV_X64_MSR_STIMER1_CONFIG:
|
|
case HV_X64_MSR_STIMER2_CONFIG:
|
|
case HV_X64_MSR_STIMER3_CONFIG: {
|
|
int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2;
|
|
|
|
return stimer_set_config(vcpu_to_stimer(vcpu, timer_index),
|
|
data, host);
|
|
}
|
|
case HV_X64_MSR_STIMER0_COUNT:
|
|
case HV_X64_MSR_STIMER1_COUNT:
|
|
case HV_X64_MSR_STIMER2_COUNT:
|
|
case HV_X64_MSR_STIMER3_COUNT: {
|
|
int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2;
|
|
|
|
return stimer_set_count(vcpu_to_stimer(vcpu, timer_index),
|
|
data, host);
|
|
}
|
|
default:
|
|
vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
|
|
msr, data);
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
|
{
|
|
u64 data = 0;
|
|
struct kvm *kvm = vcpu->kvm;
|
|
struct kvm_hv *hv = &kvm->arch.hyperv;
|
|
|
|
switch (msr) {
|
|
case HV_X64_MSR_GUEST_OS_ID:
|
|
data = hv->hv_guest_os_id;
|
|
break;
|
|
case HV_X64_MSR_HYPERCALL:
|
|
data = hv->hv_hypercall;
|
|
break;
|
|
case HV_X64_MSR_TIME_REF_COUNT:
|
|
data = get_time_ref_counter(kvm);
|
|
break;
|
|
case HV_X64_MSR_REFERENCE_TSC:
|
|
data = hv->hv_tsc_page;
|
|
break;
|
|
case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
|
|
return kvm_hv_msr_get_crash_data(vcpu,
|
|
msr - HV_X64_MSR_CRASH_P0,
|
|
pdata);
|
|
case HV_X64_MSR_CRASH_CTL:
|
|
return kvm_hv_msr_get_crash_ctl(vcpu, pdata);
|
|
case HV_X64_MSR_RESET:
|
|
data = 0;
|
|
break;
|
|
default:
|
|
vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
|
|
return 1;
|
|
}
|
|
|
|
*pdata = data;
|
|
return 0;
|
|
}
|
|
|
|
static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
|
{
|
|
u64 data = 0;
|
|
struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
|
|
|
|
switch (msr) {
|
|
case HV_X64_MSR_VP_INDEX: {
|
|
int r;
|
|
struct kvm_vcpu *v;
|
|
|
|
kvm_for_each_vcpu(r, v, vcpu->kvm) {
|
|
if (v == vcpu) {
|
|
data = r;
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case HV_X64_MSR_EOI:
|
|
return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
|
|
case HV_X64_MSR_ICR:
|
|
return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
|
|
case HV_X64_MSR_TPR:
|
|
return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
|
|
case HV_X64_MSR_APIC_ASSIST_PAGE:
|
|
data = hv->hv_vapic;
|
|
break;
|
|
case HV_X64_MSR_VP_RUNTIME:
|
|
data = current_task_runtime_100ns() + hv->runtime_offset;
|
|
break;
|
|
case HV_X64_MSR_SCONTROL:
|
|
case HV_X64_MSR_SVERSION:
|
|
case HV_X64_MSR_SIEFP:
|
|
case HV_X64_MSR_SIMP:
|
|
case HV_X64_MSR_EOM:
|
|
case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
|
|
return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata);
|
|
case HV_X64_MSR_STIMER0_CONFIG:
|
|
case HV_X64_MSR_STIMER1_CONFIG:
|
|
case HV_X64_MSR_STIMER2_CONFIG:
|
|
case HV_X64_MSR_STIMER3_CONFIG: {
|
|
int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2;
|
|
|
|
return stimer_get_config(vcpu_to_stimer(vcpu, timer_index),
|
|
pdata);
|
|
}
|
|
case HV_X64_MSR_STIMER0_COUNT:
|
|
case HV_X64_MSR_STIMER1_COUNT:
|
|
case HV_X64_MSR_STIMER2_COUNT:
|
|
case HV_X64_MSR_STIMER3_COUNT: {
|
|
int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2;
|
|
|
|
return stimer_get_count(vcpu_to_stimer(vcpu, timer_index),
|
|
pdata);
|
|
}
|
|
default:
|
|
vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
|
|
return 1;
|
|
}
|
|
*pdata = data;
|
|
return 0;
|
|
}
|
|
|
|
int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
|
|
{
|
|
if (kvm_hv_msr_partition_wide(msr)) {
|
|
int r;
|
|
|
|
mutex_lock(&vcpu->kvm->lock);
|
|
r = kvm_hv_set_msr_pw(vcpu, msr, data, host);
|
|
mutex_unlock(&vcpu->kvm->lock);
|
|
return r;
|
|
} else
|
|
return kvm_hv_set_msr(vcpu, msr, data, host);
|
|
}
|
|
|
|
int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
|
|
{
|
|
if (kvm_hv_msr_partition_wide(msr)) {
|
|
int r;
|
|
|
|
mutex_lock(&vcpu->kvm->lock);
|
|
r = kvm_hv_get_msr_pw(vcpu, msr, pdata);
|
|
mutex_unlock(&vcpu->kvm->lock);
|
|
return r;
|
|
} else
|
|
return kvm_hv_get_msr(vcpu, msr, pdata);
|
|
}
|
|
|
|
bool kvm_hv_hypercall_enabled(struct kvm *kvm)
|
|
{
|
|
return kvm->arch.hyperv.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
|
|
}
|
|
|
|
static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
|
|
{
|
|
bool longmode;
|
|
|
|
longmode = is_64_bit_mode(vcpu);
|
|
if (longmode)
|
|
kvm_register_write(vcpu, VCPU_REGS_RAX, result);
|
|
else {
|
|
kvm_register_write(vcpu, VCPU_REGS_RDX, result >> 32);
|
|
kvm_register_write(vcpu, VCPU_REGS_RAX, result & 0xffffffff);
|
|
}
|
|
}
|
|
|
|
static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct kvm_run *run = vcpu->run;
|
|
|
|
kvm_hv_hypercall_set_result(vcpu, run->hyperv.u.hcall.result);
|
|
return 1;
|
|
}
|
|
|
|
int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
|
|
{
|
|
u64 param, ingpa, outgpa, ret;
|
|
uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
|
|
bool fast, longmode;
|
|
|
|
/*
|
|
* hypercall generates UD from non zero cpl and real mode
|
|
* per HYPER-V spec
|
|
*/
|
|
if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) {
|
|
kvm_queue_exception(vcpu, UD_VECTOR);
|
|
return 1;
|
|
}
|
|
|
|
longmode = is_64_bit_mode(vcpu);
|
|
|
|
if (!longmode) {
|
|
param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
|
|
(kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
|
|
ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
|
|
(kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
|
|
outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
|
|
(kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
|
|
}
|
|
#ifdef CONFIG_X86_64
|
|
else {
|
|
param = kvm_register_read(vcpu, VCPU_REGS_RCX);
|
|
ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
|
|
outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
|
|
}
|
|
#endif
|
|
|
|
code = param & 0xffff;
|
|
fast = (param >> 16) & 0x1;
|
|
rep_cnt = (param >> 32) & 0xfff;
|
|
rep_idx = (param >> 48) & 0xfff;
|
|
|
|
trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
|
|
|
|
/* Hypercall continuation is not supported yet */
|
|
if (rep_cnt || rep_idx) {
|
|
res = HV_STATUS_INVALID_HYPERCALL_CODE;
|
|
goto set_result;
|
|
}
|
|
|
|
switch (code) {
|
|
case HVCALL_NOTIFY_LONG_SPIN_WAIT:
|
|
kvm_vcpu_on_spin(vcpu);
|
|
break;
|
|
case HVCALL_POST_MESSAGE:
|
|
case HVCALL_SIGNAL_EVENT:
|
|
/* don't bother userspace if it has no way to handle it */
|
|
if (!vcpu_to_synic(vcpu)->active) {
|
|
res = HV_STATUS_INVALID_HYPERCALL_CODE;
|
|
break;
|
|
}
|
|
vcpu->run->exit_reason = KVM_EXIT_HYPERV;
|
|
vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL;
|
|
vcpu->run->hyperv.u.hcall.input = param;
|
|
vcpu->run->hyperv.u.hcall.params[0] = ingpa;
|
|
vcpu->run->hyperv.u.hcall.params[1] = outgpa;
|
|
vcpu->arch.complete_userspace_io =
|
|
kvm_hv_hypercall_complete_userspace;
|
|
return 0;
|
|
default:
|
|
res = HV_STATUS_INVALID_HYPERCALL_CODE;
|
|
break;
|
|
}
|
|
|
|
set_result:
|
|
ret = res | (((u64)rep_done & 0xfff) << 32);
|
|
kvm_hv_hypercall_set_result(vcpu, ret);
|
|
return 1;
|
|
}
|