Changes in 4.9.209 PM / devfreq: Don't fail devfreq_dev_release if not in list RDMA/cma: add missed unregister_pernet_subsys in init failure scsi: lpfc: Fix memory leak on lpfc_bsg_write_ebuf_set func scsi: qla2xxx: Don't call qlt_async_event twice scsi: iscsi: qla4xxx: fix double free in probe scsi: libsas: stop discovering if oob mode is disconnected usb: gadget: fix wrong endpoint desc md: raid1: check rdev before reference in raid1_sync_request func s390/cpum_sf: Adjust sampling interval to avoid hitting sample limits s390/cpum_sf: Avoid SBD overflow condition in irq handler IB/mlx4: Follow mirror sequence of device add during device removal xen-blkback: prevent premature module unload xen/balloon: fix ballooned page accounting without hotplug enabled PM / hibernate: memory_bm_find_bit(): Tighten node optimisation xfs: fix mount failure crash on invalid iclog memory access taskstats: fix data-race drm: limit to INT_MAX in create_blob ioctl Revert "perf report: Add warning when libunwind not compiled in" ALSA: ice1724: Fix sleep-in-atomic in Infrasonic Quartet support code MIPS: Avoid VDSO ABI breakage due to global register variable mm/zsmalloc.c: fix the migrated zspage statistics. memcg: account security cred as well to kmemcg locks: print unsigned ino in /proc/locks dmaengine: Fix access to uninitialized dma_slave_caps compat_ioctl: block: handle Persistent Reservations ata: libahci_platform: Export again ahci_platform_<en/dis>able_phys() ata: ahci_brcm: Allow optional reset controller to be used ata: ahci_brcm: Fix AHCI resources management gpiolib: fix up emulated open drain outputs tracing: Have the histogram compare functions convert to u64 first ALSA: cs4236: fix error return comparison of an unsigned integer ftrace: Avoid potential division by zero in function profiler arm64: Revert support for execute-only user mappings PM / devfreq: Check NULL governor in available_governors_show nfsd4: fix up replay_matches_cache() xfs: don't check for AG deadlock for realtime files in bunmapi Bluetooth: btusb: fix PM leak in error case of setup Bluetooth: delete a stray unlock Bluetooth: Fix memory leak in hci_connect_le_scan media: flexcop-usb: ensure -EIO is returned on error condition regulator: ab8500: Remove AB8505 USB regulator media: usb: fix memory leak in af9005_identify_state tty: serial: msm_serial: Fix lockup for sysrq and oops fix compat handling of FICLONERANGE, FIDEDUPERANGE and FS_IOC_FIEMAP drm/mst: Fix MST sideband up-reply failure handling powerpc/pseries/hvconsole: Fix stack overread via udbg rxrpc: Fix possible NULL pointer access in ICMP handling ath9k_htc: Modify byte order for an error message ath9k_htc: Discard undersized packets net: add annotations on hh->hh_len lockless accesses s390/smp: fix physical to logical CPU map for SMT xen/blkback: Avoid unmapping unmapped grant pages locking/x86: Remove the unused atomic_inc_short() methd pstore/ram: Write new dumps to start of recycled zones locking/spinlock/debug: Fix various data races netfilter: ctnetlink: netns exit must wait for callbacks efi/gop: Return EFI_NOT_FOUND if there are no usable GOPs efi/gop: Return EFI_SUCCESS if a usable GOP was found efi/gop: Fix memory leak in __gop_query32/64() ARM: vexpress: Set-up shared OPP table instead of individual for each CPU netfilter: uapi: Avoid undefined left-shift in xt_sctp.h spi: spi-cavium-thunderx: Add missing pci_release_regions() ARM: dts: am437x-gp/epos-evm: fix panel compatible samples: bpf: Replace symbol compare of trace_event powerpc: Ensure that swiotlb buffer is allocated from low memory bnx2x: Do not handle requests from VFs after parity bnx2x: Fix logic to get total no. of PFs per engine net: usb: lan78xx: Fix error message format specifier rfkill: Fix incorrect check to avoid NULL pointer dereference ASoC: wm8962: fix lambda value regulator: rn5t618: fix module aliases kconfig: don't crash on NULL expressions in expr_eq() perf/x86/intel: Fix PT PMI handling net: stmmac: RX buffer size must be 16 byte aligned block: fix memleak when __blk_rq_map_user_iov() is failed parisc: Fix compiler warnings in debug_core.c llc2: Fix return statement of llc_stat_ev_rx_null_dsap_xid_c (and _test_c) macvlan: do not assume mac_header is set in macvlan_broadcast() net: stmmac: dwmac-sunxi: Allow all RGMII modes net: usb: lan78xx: fix possible skb leak pkt_sched: fq: do not accept silly TCA_FQ_QUANTUM sctp: free cmd->obj.chunk for the unprocessed SCTP_CMD_REPLY tcp: fix "old stuff" D-SACK causing SACK to be treated as D-SACK vxlan: fix tos value before xmit vlan: vlan_changelink() should propagate errors net: sch_prio: When ungrafting, replace with FIFO vlan: fix memory leak in vlan_dev_set_egress_priority USB: core: fix check for duplicate endpoints USB: serial: option: add Telit ME910G1 0x110a composition Linux 4.9.209 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com> Change-Id: I37562d713bddce2ead7934800c3f9e231884c43f
206 lines
6.9 KiB
ArmAsm
206 lines
6.9 KiB
ArmAsm
/*
|
|
* Copyright 2010 Tilera Corporation. All Rights Reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation, version 2.
|
|
*
|
|
* This program is distributed in the hope that it will be useful, but
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
|
|
* NON INFRINGEMENT. See the GNU General Public License for
|
|
* more details.
|
|
*
|
|
* Support routines for atomic operations. Each function takes:
|
|
*
|
|
* r0: address to manipulate
|
|
* r1: pointer to atomic lock guarding this operation (for ATOMIC_LOCK_REG)
|
|
* r2: new value to write, or for cmpxchg/add_unless, value to compare against
|
|
* r3: (cmpxchg/xchg_add_unless) new value to write or add;
|
|
* (atomic64 ops) high word of value to write
|
|
* r4/r5: (cmpxchg64/add_unless64) new value to write or add
|
|
*
|
|
* The 32-bit routines return a "struct __get_user" so that the futex code
|
|
* has an opportunity to return -EFAULT to the user if needed.
|
|
* The 64-bit routines just return a "long long" with the value,
|
|
* since they are only used from kernel space and don't expect to fault.
|
|
* Support for 16-bit ops is included in the framework but we don't provide any.
|
|
*
|
|
* Note that the caller is advised to issue a suitable L1 or L2
|
|
* prefetch on the address being manipulated to avoid extra stalls.
|
|
* In addition, the hot path is on two icache lines, and we start with
|
|
* a jump to the second line to make sure they are both in cache so
|
|
* that we never stall waiting on icache fill while holding the lock.
|
|
* (This doesn't work out with most 64-bit ops, since they consume
|
|
* too many bundles, so may take an extra i-cache stall.)
|
|
*
|
|
* These routines set the INTERRUPT_CRITICAL_SECTION bit, just
|
|
* like sys_cmpxchg(), so that NMIs like PERF_COUNT will not interrupt
|
|
* the code, just page faults.
|
|
*
|
|
* If the load or store faults in a way that can be directly fixed in
|
|
* the do_page_fault_ics() handler (e.g. a vmalloc reference) we fix it
|
|
* directly, return to the instruction that faulted, and retry it.
|
|
*
|
|
* If the load or store faults in a way that potentially requires us
|
|
* to release the atomic lock, then retry (e.g. a migrating PTE), we
|
|
* reset the PC in do_page_fault_ics() to the "tns" instruction so
|
|
* that on return we will reacquire the lock and restart the op. We
|
|
* are somewhat overloading the exception_table_entry notion by doing
|
|
* this, since those entries are not normally used for migrating PTEs.
|
|
*
|
|
* If the main page fault handler discovers a bad address, it will see
|
|
* the PC pointing to the "tns" instruction (due to the earlier
|
|
* exception_table_entry processing in do_page_fault_ics), and
|
|
* re-reset the PC to the fault handler, atomic_bad_address(), which
|
|
* effectively takes over from the atomic op and can either return a
|
|
* bad "struct __get_user" (for user addresses) or can just panic (for
|
|
* bad kernel addresses).
|
|
*
|
|
* Note that if the value we would store is the same as what we
|
|
* loaded, we bypass the store. Other platforms with true atomics can
|
|
* make the guarantee that a non-atomic __clear_bit(), for example,
|
|
* can safely race with an atomic test_and_set_bit(); this example is
|
|
* from bit_spinlock.h in slub_lock() / slub_unlock(). We can't do
|
|
* that on Tile since the "atomic" op is really just a
|
|
* read/modify/write, and can race with the non-atomic
|
|
* read/modify/write. However, if we can short-circuit the write when
|
|
* it is not needed, in the atomic case, we avoid the race.
|
|
*/
|
|
|
|
#include <linux/linkage.h>
|
|
#include <asm/atomic_32.h>
|
|
#include <asm/page.h>
|
|
#include <asm/processor.h>
|
|
|
|
.section .text.atomic,"ax"
|
|
ENTRY(__start_atomic_asm_code)
|
|
|
|
.macro atomic_op, name, bitwidth, body
|
|
.align 64
|
|
STD_ENTRY_SECTION(__atomic\name, .text.atomic)
|
|
{
|
|
movei r24, 1
|
|
j 4f /* branch to second cache line */
|
|
}
|
|
1: {
|
|
.ifc \bitwidth,16
|
|
lh r22, r0
|
|
.else
|
|
lw r22, r0
|
|
addi r28, r0, 4
|
|
.endif
|
|
}
|
|
.ifc \bitwidth,64
|
|
lw r23, r28
|
|
.endif
|
|
\body /* set r24, and r25 if 64-bit */
|
|
{
|
|
seq r26, r22, r24
|
|
seq r27, r23, r25
|
|
}
|
|
.ifc \bitwidth,64
|
|
bbnst r27, 2f
|
|
.endif
|
|
bbs r26, 3f /* skip write-back if it's the same value */
|
|
2: {
|
|
.ifc \bitwidth,16
|
|
sh r0, r24
|
|
.else
|
|
sw r0, r24
|
|
.endif
|
|
}
|
|
.ifc \bitwidth,64
|
|
sw r28, r25
|
|
.endif
|
|
mf
|
|
3: {
|
|
move r0, r22
|
|
.ifc \bitwidth,64
|
|
move r1, r23
|
|
.else
|
|
move r1, zero
|
|
.endif
|
|
sw ATOMIC_LOCK_REG_NAME, zero
|
|
}
|
|
mtspr INTERRUPT_CRITICAL_SECTION, zero
|
|
jrp lr
|
|
4: {
|
|
move ATOMIC_LOCK_REG_NAME, r1
|
|
mtspr INTERRUPT_CRITICAL_SECTION, r24
|
|
}
|
|
#ifndef CONFIG_SMP
|
|
j 1b /* no atomic locks */
|
|
#else
|
|
{
|
|
tns r21, ATOMIC_LOCK_REG_NAME
|
|
moveli r23, 2048 /* maximum backoff time in cycles */
|
|
}
|
|
{
|
|
bzt r21, 1b /* branch if lock acquired */
|
|
moveli r25, 32 /* starting backoff time in cycles */
|
|
}
|
|
5: mtspr INTERRUPT_CRITICAL_SECTION, zero
|
|
mfspr r26, CYCLE_LOW /* get start point for this backoff */
|
|
6: mfspr r22, CYCLE_LOW /* test to see if we've backed off enough */
|
|
sub r22, r22, r26
|
|
slt r22, r22, r25
|
|
bbst r22, 6b
|
|
{
|
|
mtspr INTERRUPT_CRITICAL_SECTION, r24
|
|
shli r25, r25, 1 /* double the backoff; retry the tns */
|
|
}
|
|
{
|
|
tns r21, ATOMIC_LOCK_REG_NAME
|
|
slt r26, r23, r25 /* is the proposed backoff too big? */
|
|
}
|
|
{
|
|
bzt r21, 1b /* branch if lock acquired */
|
|
mvnz r25, r26, r23
|
|
}
|
|
j 5b
|
|
#endif
|
|
STD_ENDPROC(__atomic\name)
|
|
.ifc \bitwidth,32
|
|
.pushsection __ex_table,"a"
|
|
.align 4
|
|
.word 1b, __atomic\name
|
|
.word 2b, __atomic\name
|
|
.word __atomic\name, __atomic_bad_address
|
|
.popsection
|
|
.endif
|
|
.endm
|
|
|
|
|
|
/*
|
|
* Use __atomic32 prefix to avoid collisions with GCC builtin __atomic functions.
|
|
*/
|
|
|
|
atomic_op 32_cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }"
|
|
atomic_op 32_xchg, 32, "move r24, r2"
|
|
atomic_op 32_xchg_add, 32, "add r24, r22, r2"
|
|
atomic_op 32_xchg_add_unless, 32, \
|
|
"sne r26, r22, r2; { bbns r26, 3f; add r24, r22, r3 }"
|
|
atomic_op 32_fetch_or, 32, "or r24, r22, r2"
|
|
atomic_op 32_fetch_and, 32, "and r24, r22, r2"
|
|
atomic_op 32_fetch_andn, 32, "nor r2, r2, zero; and r24, r22, r2"
|
|
atomic_op 32_fetch_xor, 32, "xor r24, r22, r2"
|
|
|
|
atomic_op 64_cmpxchg, 64, "{ seq r26, r22, r2; seq r27, r23, r3 }; \
|
|
{ bbns r26, 3f; move r24, r4 }; { bbns r27, 3f; move r25, r5 }"
|
|
atomic_op 64_xchg, 64, "{ move r24, r2; move r25, r3 }"
|
|
atomic_op 64_xchg_add, 64, "{ add r24, r22, r2; add r25, r23, r3 }; \
|
|
slt_u r26, r24, r22; add r25, r25, r26"
|
|
atomic_op 64_xchg_add_unless, 64, \
|
|
"{ sne r26, r22, r2; sne r27, r23, r3 }; \
|
|
{ bbns r26, 3f; add r24, r22, r4 }; \
|
|
{ bbns r27, 3f; add r25, r23, r5 }; \
|
|
slt_u r26, r24, r22; add r25, r25, r26"
|
|
atomic_op 64_fetch_or, 64, "{ or r24, r22, r2; or r25, r23, r3 }"
|
|
atomic_op 64_fetch_and, 64, "{ and r24, r22, r2; and r25, r23, r3 }"
|
|
atomic_op 64_fetch_xor, 64, "{ xor r24, r22, r2; xor r25, r23, r3 }"
|
|
|
|
jrp lr /* happy backtracer */
|
|
|
|
ENTRY(__end_atomic_asm_code)
|