1
0
Files
kernel-49/arch/arc/lib/memcpy-archs.S
Greg Kroah-Hartman 55492e04c6 Merge 4.9.165 into android-4.9
Changes in 4.9.165
	media: videobuf2-v4l2: drop WARN_ON in vb2_warn_zero_bytesused()
	9p: use inode->i_lock to protect i_size_write() under 32-bit
	9p/net: fix memory leak in p9_client_create
	ASoC: fsl_esai: fix register setting issue in RIGHT_J mode
	iio: adc: exynos-adc: Fix NULL pointer exception on unbind
	stm class: Fix an endless loop in channel allocation
	crypto: caam - fixed handling of sg list
	crypto: ahash - fix another early termination in hash walk
	gpu: ipu-v3: Fix i.MX51 CSI control registers offset
	gpu: ipu-v3: Fix CSI offsets for imx53
	s390/dasd: fix using offset into zero size array error
	ARM: OMAP2+: Variable "reg" in function omap4_dsi_mux_pads() could be uninitialized
	Input: cap11xx - switch to using set_brightness_blocking()
	Input: matrix_keypad - use flush_delayed_work()
	floppy: check_events callback should not return a negative number
	mm/gup: fix gup_pmd_range() for dax
	mm: page_alloc: fix ref bias in page_frag_alloc() for 1-byte allocs
	net: hns: Fix object reference leaks in hns_dsaf_roce_reset()
	i2c: cadence: Fix the hold bit setting
	Input: st-keyscan - fix potential zalloc NULL dereference
	clk: sunxi: A31: Fix wrong AHB gate number
	ARM: 8824/1: fix a migrating irq bug when hotplug cpu
	assoc_array: Fix shortcut creation
	scsi: libiscsi: Fix race between iscsi_xmit_task and iscsi_complete_task
	net: systemport: Fix reception of BPDUs
	pinctrl: meson: meson8b: fix the sdxc_a data 1..3 pins
	qmi_wwan: apply SET_DTR quirk to Sierra WP7607
	net: mv643xx_eth: disable clk on error path in mv643xx_eth_shared_probe()
	ASoC: topology: free created components in tplg load error
	arm64: Relax GIC version check during early boot
	net: marvell: mvneta: fix DMA debug warning
	tmpfs: fix link accounting when a tmpfile is linked in
	ARCv2: lib: memcpy: fix doing prefetchw outside of buffer
	ARC: uacces: remove lp_start, lp_end from clobber list
	phonet: fix building with clang
	mac80211_hwsim: propagate genlmsg_reply return code
	net: thunderx: make CFG_DONE message to run through generic send-ack sequence
	nfp: bpf: fix code-gen bug on BPF_ALU | BPF_XOR | BPF_K
	nfp: bpf: fix ALU32 high bits clearance bug
	net: set static variable an initial value in atl2_probe()
	tmpfs: fix uninitialized return value in shmem_link
	stm class: Prevent division by zero
	libnvdimm/label: Clear 'updating' flag after label-set update
	libnvdimm/pmem: Honor force_raw for legacy pmem regions
	libnvdimm: Fix altmap reservation size calculation
	crypto: hash - set CRYPTO_TFM_NEED_KEY if ->setkey() fails
	crypto: arm64/aes-ccm - fix logical bug in AAD MAC handling
	CIFS: Do not reset lease state to NONE on lease break
	CIFS: Fix read after write for files with read caching
	tracing: Use strncpy instead of memcpy for string keys in hist triggers
	tracing: Do not free iter->trace in fail path of tracing_open_pipe()
	ACPI / device_sysfs: Avoid OF modalias creation for removed device
	spi: ti-qspi: Fix mmap read when more than one CS in use
	spi: pxa2xx: Setup maximum supported DMA transfer length
	regulator: s2mps11: Fix steps for buck7, buck8 and LDO35
	regulator: s2mpa01: Fix step values for some LDOs
	clocksource/drivers/exynos_mct: Move one-shot check from tick clear to ISR
	clocksource/drivers/exynos_mct: Clear timer interrupt when shutdown
	s390/virtio: handle find on invalid queue gracefully
	scsi: virtio_scsi: don't send sc payload with tmfs
	scsi: sd: Optimal I/O size should be a multiple of physical block size
	scsi: target/iscsi: Avoid iscsit_release_commands_from_conn() deadlock
	fs/devpts: always delete dcache dentry-s in dput()
	splice: don't merge into linked buffers
	m68k: Add -ffreestanding to CFLAGS
	btrfs: ensure that a DUP or RAID1 block group has exactly two stripes
	Btrfs: fix corruption reading shared and compressed extents after hole punching
	crypto: pcbc - remove bogus memcpy()s with src == dest
	libertas_tf: don't set URB_ZERO_PACKET on IN USB transfer
	cpufreq: tegra124: add missing of_node_put()
	cpufreq: pxa2xx: remove incorrect __init annotation
	ext4: fix crash during online resizing
	ext2: Fix underflow in ext2_max_size()
	clk: clk-twl6040: Fix imprecise external abort for pdmclk
	clk: ingenic: Fix round_rate misbehaving with non-integer dividers
	clk: ingenic: Fix doc of ingenic_cgu_div_info
	nfit: acpi_nfit_ctl(): Check out_obj->type in the right place
	mm: hwpoison: fix thp split handing in soft_offline_in_use_page()
	mm/vmalloc: fix size check for remap_vmalloc_range_partial()
	kernel/sysctl.c: add missing range check in do_proc_dointvec_minmax_conv
	device property: Fix the length used in PROPERTY_ENTRY_STRING()
	intel_th: Don't reference unassigned outputs
	parport_pc: fix find_superio io compare code, should use equal test.
	i2c: tegra: fix maximum transfer size
	drm/i915: Relax mmap VMA check
	serial: uartps: Fix stuck ISR if RX disabled with non-empty FIFO
	serial: 8250_of: assume reg-shift of 2 for mrvl,mmp-uart
	8250: FIX Fourth port offset of Pericom PI7C9X7954 boards
	serial: 8250_pci: Fix number of ports for ACCES serial cards
	serial: 8250_pci: Have ACCES cards that use the four port Pericom PI7C9X7954 chip use the pci_pericom_setup()
	jbd2: clear dirty flag when revoking a buffer from an older transaction
	jbd2: fix compile warning when using JBUFFER_TRACE
	powerpc/32: Clear on-stack exception marker upon exception return
	powerpc/wii: properly disable use of BATs when requested.
	powerpc/powernv: Make opal log only readable by root
	powerpc/83xx: Also save/restore SPRG4-7 during suspend
	powerpc: Fix 32-bit KVM-PR lockup and host crash with MacOS guest
	powerpc/ptrace: Simplify vr_get/set() to avoid GCC warning
	ARM: s3c24xx: Fix boolean expressions in osiris_dvs_notify
	dm: fix to_sector() for 32bit
	NFS: Fix I/O request leakages
	NFS: Fix an I/O request leakage in nfs_do_recoalesce
	NFS: Don't recoalesce on error in nfs_pageio_complete_mirror()
	nfsd: fix memory corruption caused by readdir
	nfsd: fix wrong check in write_v4_end_grace()
	PM / wakeup: Rework wakeup source timer cancellation
	bcache: never writeback a discard operation
	perf intel-pt: Fix CYC timestamp calculation after OVF
	perf auxtrace: Define auxtrace record alignment
	perf intel-pt: Fix overlap calculation for padding
	perf intel-pt: Fix divide by zero when TSC is not available
	md: Fix failed allocation of md_register_thread
	rcu: Do RCU GP kthread self-wakeup from softirq and interrupt
	media: uvcvideo: Avoid NULL pointer dereference at the end of streaming
	drm/radeon/evergreen_cs: fix missing break in switch statement
	KVM: nVMX: Sign extend displacements of VMX instr's mem operands
	KVM: nVMX: Ignore limit checks on VMX instructions using flat segments
	KVM: X86: Fix residual mmio emulation request to userspace
	Linux 4.9.165

Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
2019-03-25 11:12:42 +03:00

223 lines
4.4 KiB
ArmAsm

/*
* Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/linkage.h>
#ifdef __LITTLE_ENDIAN__
# define SHIFT_1(RX,RY,IMM) asl RX, RY, IMM ; <<
# define SHIFT_2(RX,RY,IMM) lsr RX, RY, IMM ; >>
# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM
# define MERGE_2(RX,RY,IMM)
# define EXTRACT_1(RX,RY,IMM) and RX, RY, 0xFFFF
# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, IMM
#else
# define SHIFT_1(RX,RY,IMM) lsr RX, RY, IMM ; >>
# define SHIFT_2(RX,RY,IMM) asl RX, RY, IMM ; <<
# define MERGE_1(RX,RY,IMM) asl RX, RY, IMM ; <<
# define MERGE_2(RX,RY,IMM) asl RX, RY, IMM ; <<
# define EXTRACT_1(RX,RY,IMM) lsr RX, RY, IMM
# define EXTRACT_2(RX,RY,IMM) lsr RX, RY, 0x08
#endif
#ifdef CONFIG_ARC_HAS_LL64
# define LOADX(DST,RX) ldd.ab DST, [RX, 8]
# define STOREX(SRC,RX) std.ab SRC, [RX, 8]
# define ZOLSHFT 5
# define ZOLAND 0x1F
#else
# define LOADX(DST,RX) ld.ab DST, [RX, 4]
# define STOREX(SRC,RX) st.ab SRC, [RX, 4]
# define ZOLSHFT 4
# define ZOLAND 0xF
#endif
ENTRY_CFI(memcpy)
mov.f 0, r2
;;; if size is zero
jz.d [blink]
mov r3, r0 ; don;t clobber ret val
;;; if size <= 8
cmp r2, 8
bls.d @.Lsmallchunk
mov.f lp_count, r2
and.f r4, r0, 0x03
rsub lp_count, r4, 4
lpnz @.Laligndestination
;; LOOP BEGIN
ldb.ab r5, [r1,1]
sub r2, r2, 1
stb.ab r5, [r3,1]
.Laligndestination:
;;; Check the alignment of the source
and.f r4, r1, 0x03
bnz.d @.Lsourceunaligned
;;; CASE 0: Both source and destination are 32bit aligned
;;; Convert len to Dwords, unfold x4
lsr.f lp_count, r2, ZOLSHFT
lpnz @.Lcopy32_64bytes
;; LOOP START
LOADX (r6, r1)
LOADX (r8, r1)
LOADX (r10, r1)
LOADX (r4, r1)
STOREX (r6, r3)
STOREX (r8, r3)
STOREX (r10, r3)
STOREX (r4, r3)
.Lcopy32_64bytes:
and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes
.Lsmallchunk:
lpnz @.Lcopyremainingbytes
;; LOOP START
ldb.ab r5, [r1,1]
stb.ab r5, [r3,1]
.Lcopyremainingbytes:
j [blink]
;;; END CASE 0
.Lsourceunaligned:
cmp r4, 2
beq.d @.LunalignedOffby2
sub r2, r2, 1
bhi.d @.LunalignedOffby3
ldb.ab r5, [r1, 1]
;;; CASE 1: The source is unaligned, off by 1
;; Hence I need to read 1 byte for a 16bit alignment
;; and 2bytes to reach 32bit alignment
ldh.ab r6, [r1, 2]
sub r2, r2, 2
;; Convert to words, unfold x2
lsr.f lp_count, r2, 3
MERGE_1 (r6, r6, 8)
MERGE_2 (r5, r5, 24)
or r5, r5, r6
;; Both src and dst are aligned
lpnz @.Lcopy8bytes_1
;; LOOP START
ld.ab r6, [r1, 4]
ld.ab r8, [r1,4]
SHIFT_1 (r7, r6, 24)
or r7, r7, r5
SHIFT_2 (r5, r6, 8)
SHIFT_1 (r9, r8, 24)
or r9, r9, r5
SHIFT_2 (r5, r8, 8)
st.ab r7, [r3, 4]
st.ab r9, [r3, 4]
.Lcopy8bytes_1:
;; Write back the remaining 16bits
EXTRACT_1 (r6, r5, 16)
sth.ab r6, [r3, 2]
;; Write back the remaining 8bits
EXTRACT_2 (r5, r5, 16)
stb.ab r5, [r3, 1]
and.f lp_count, r2, 0x07 ;Last 8bytes
lpnz @.Lcopybytewise_1
;; LOOP START
ldb.ab r6, [r1,1]
stb.ab r6, [r3,1]
.Lcopybytewise_1:
j [blink]
.LunalignedOffby2:
;;; CASE 2: The source is unaligned, off by 2
ldh.ab r5, [r1, 2]
sub r2, r2, 1
;; Both src and dst are aligned
;; Convert to words, unfold x2
lsr.f lp_count, r2, 3
#ifdef __BIG_ENDIAN__
asl.nz r5, r5, 16
#endif
lpnz @.Lcopy8bytes_2
;; LOOP START
ld.ab r6, [r1, 4]
ld.ab r8, [r1,4]
SHIFT_1 (r7, r6, 16)
or r7, r7, r5
SHIFT_2 (r5, r6, 16)
SHIFT_1 (r9, r8, 16)
or r9, r9, r5
SHIFT_2 (r5, r8, 16)
st.ab r7, [r3, 4]
st.ab r9, [r3, 4]
.Lcopy8bytes_2:
#ifdef __BIG_ENDIAN__
lsr.nz r5, r5, 16
#endif
sth.ab r5, [r3, 2]
and.f lp_count, r2, 0x07 ;Last 8bytes
lpnz @.Lcopybytewise_2
;; LOOP START
ldb.ab r6, [r1,1]
stb.ab r6, [r3,1]
.Lcopybytewise_2:
j [blink]
.LunalignedOffby3:
;;; CASE 3: The source is unaligned, off by 3
;;; Hence, I need to read 1byte for achieve the 32bit alignment
;; Both src and dst are aligned
;; Convert to words, unfold x2
lsr.f lp_count, r2, 3
#ifdef __BIG_ENDIAN__
asl.ne r5, r5, 24
#endif
lpnz @.Lcopy8bytes_3
;; LOOP START
ld.ab r6, [r1, 4]
ld.ab r8, [r1,4]
SHIFT_1 (r7, r6, 8)
or r7, r7, r5
SHIFT_2 (r5, r6, 24)
SHIFT_1 (r9, r8, 8)
or r9, r9, r5
SHIFT_2 (r5, r8, 24)
st.ab r7, [r3, 4]
st.ab r9, [r3, 4]
.Lcopy8bytes_3:
#ifdef __BIG_ENDIAN__
lsr.nz r5, r5, 24
#endif
stb.ab r5, [r3, 1]
and.f lp_count, r2, 0x07 ;Last 8bytes
lpnz @.Lcopybytewise_3
;; LOOP START
ldb.ab r6, [r1,1]
stb.ab r6, [r3,1]
.Lcopybytewise_3:
j [blink]
END_CFI(memcpy)