Changes in 4.9.232 pinctrl: amd: fix npins for uart0 in kerncz_groups mac80211: allow rx of mesh eapol frames with default rx key scsi: scsi_transport_spi: Fix function pointer check xtensa: fix __sync_fetch_and_{and,or}_4 declarations xtensa: update *pos in cpuinfo_op.next drivers/net/wan/lapbether: Fixed the value of hard_header_len net: sky2: initialize return of gm_phy_read drm/nouveau/i2c/g94-: increase NV_PMGR_DP_AUXCTL_TRANSACTREQ timeout SUNRPC reverting d03727b248d0 ("NFSv4 fix CLOSE not waiting for direct IO compeletion") uprobes: Change handle_swbp() to send SIGTRAP with si_code=SI_KERNEL, to fix GDB regression ALSA: info: Drop WARN_ON() from buffer NULL sanity check ASoC: rt5670: Correct RT5670_LDO_SEL_MASK btrfs: fix double free on ulist after backref resolution failure btrfs: fix mount failure caused by race with umount bnxt_en: Fix race when modifying pause settings. hippi: Fix a size used in a 'pci_free_consistent()' in an error handling path ax88172a: fix ax88172a_unbind() failures net: dp83640: fix SIOCSHWTSTAMP to update the struct with actual configuration net: smc91x: Fix possible memory leak in smc_drv_probe() scripts/decode_stacktrace: strip basepath from all paths HID: i2c-hid: add Mediacom FlexBook edge13 to descriptor override HID: apple: Disable Fn-key key-re-mapping on clone keyboards dmaengine: tegra210-adma: Fix runtime PM imbalance on error regmap: dev_get_regmap_match(): fix string comparison dmaengine: ioat setting ioat timeout as module parameter usb: gadget: udc: gr_udc: fix memleak on error handling path in gr_ep_init() arm64: Use test_tsk_thread_flag() for checking TIF_SINGLESTEP x86: math-emu: Fix up 'cmp' insn for clang ias usb: xhci-mtk: fix the failure of bandwidth allocation usb: xhci: Fix ASM2142/ASM3142 DMA addressing Revert "cifs: Fix the target file was deleted when rename failed." staging: wlan-ng: properly check endpoint types staging: comedi: addi_apci_1032: check INSN_CONFIG_DIGITAL_TRIG shift staging: comedi: ni_6527: fix INSN_CONFIG_DIGITAL_TRIG support staging: comedi: addi_apci_1500: check INSN_CONFIG_DIGITAL_TRIG shift staging: comedi: addi_apci_1564: check INSN_CONFIG_DIGITAL_TRIG shift serial: 8250: fix null-ptr-deref in serial8250_start_tx() serial: 8250_mtk: Fix high-speed baud rates clamping vt: Reject zero-sized screen buffer size. Makefile: Fix GCC_TOOLCHAIN_DIR prefix for Clang cross compilation mm/memcg: fix refcount error while moving and swapping io-mapping: indicate mapping failure parisc: Add atomic64_set_release() define to avoid CPU soft lockups ath9k: Fix general protection fault in ath9k_hif_usb_rx_cb ath9k: Fix regression with Atheros 9271 AX.25: Fix out-of-bounds read in ax25_connect() AX.25: Prevent out-of-bounds read in ax25_sendmsg() dev: Defer free of skbs in flush_backlog net-sysfs: add a newline when printing 'tx_timeout' by sysfs net: udp: Fix wrong clean up for IS_UDPLITE macro rxrpc: Fix sendmsg() returning EPIPE due to recvmsg() returning ENODATA AX.25: Prevent integer overflows in connect and sendmsg tcp: allow at most one TLP probe per flight ip6_gre: fix null-ptr-deref in ip6gre_init_net() drivers/net/wan/x25_asy: Fix to make it work regmap: debugfs: check count when read regmap file xfs: set format back to extents if xfs_bmap_extents_to_btree perf probe: Fix to check blacklist address correctly perf annotate: Use asprintf when formatting objdump command line perf tools: Fix snprint warnings for gcc 8 perf: Make perf able to build with latest libbfd Linux 4.9.232 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com> Change-Id: Iae57e72dbaebe67399c6756146f30762e5f25b2e
471 lines
11 KiB
ArmAsm
471 lines
11 KiB
ArmAsm
.file "wm_sqrt.S"
|
|
/*---------------------------------------------------------------------------+
|
|
| wm_sqrt.S |
|
|
| |
|
|
| Fixed point arithmetic square root evaluation. |
|
|
| |
|
|
| Copyright (C) 1992,1993,1995,1997 |
|
|
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
|
|
| Australia. E-mail billm@suburbia.net |
|
|
| |
|
|
| Call from C as: |
|
|
| int wm_sqrt(FPU_REG *n, unsigned int control_word) |
|
|
| |
|
|
+---------------------------------------------------------------------------*/
|
|
|
|
/*---------------------------------------------------------------------------+
|
|
| wm_sqrt(FPU_REG *n, unsigned int control_word) |
|
|
| returns the square root of n in n. |
|
|
| |
|
|
| Use Newton's method to compute the square root of a number, which must |
|
|
| be in the range [1.0 .. 4.0), to 64 bits accuracy. |
|
|
| Does not check the sign or tag of the argument. |
|
|
| Sets the exponent, but not the sign or tag of the result. |
|
|
| |
|
|
| The guess is kept in %esi:%edi |
|
|
+---------------------------------------------------------------------------*/
|
|
|
|
#include "exception.h"
|
|
#include "fpu_emu.h"
|
|
|
|
|
|
#ifndef NON_REENTRANT_FPU
|
|
/* Local storage on the stack: */
|
|
#define FPU_accum_3 -4(%ebp) /* ms word */
|
|
#define FPU_accum_2 -8(%ebp)
|
|
#define FPU_accum_1 -12(%ebp)
|
|
#define FPU_accum_0 -16(%ebp)
|
|
|
|
/*
|
|
* The de-normalised argument:
|
|
* sq_2 sq_1 sq_0
|
|
* b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
|
|
* ^ binary point here
|
|
*/
|
|
#define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */
|
|
#define FPU_fsqrt_arg_1 -24(%ebp)
|
|
#define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */
|
|
|
|
#else
|
|
/* Local storage in a static area: */
|
|
.data
|
|
.align 4,0
|
|
FPU_accum_3:
|
|
.long 0 /* ms word */
|
|
FPU_accum_2:
|
|
.long 0
|
|
FPU_accum_1:
|
|
.long 0
|
|
FPU_accum_0:
|
|
.long 0
|
|
|
|
/* The de-normalised argument:
|
|
sq_2 sq_1 sq_0
|
|
b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
|
|
^ binary point here
|
|
*/
|
|
FPU_fsqrt_arg_2:
|
|
.long 0 /* ms word */
|
|
FPU_fsqrt_arg_1:
|
|
.long 0
|
|
FPU_fsqrt_arg_0:
|
|
.long 0 /* ls word, at most the ms bit is set */
|
|
#endif /* NON_REENTRANT_FPU */
|
|
|
|
|
|
.text
|
|
ENTRY(wm_sqrt)
|
|
pushl %ebp
|
|
movl %esp,%ebp
|
|
#ifndef NON_REENTRANT_FPU
|
|
subl $28,%esp
|
|
#endif /* NON_REENTRANT_FPU */
|
|
pushl %esi
|
|
pushl %edi
|
|
pushl %ebx
|
|
|
|
movl PARAM1,%esi
|
|
|
|
movl SIGH(%esi),%eax
|
|
movl SIGL(%esi),%ecx
|
|
xorl %edx,%edx
|
|
|
|
/* We use a rough linear estimate for the first guess.. */
|
|
|
|
cmpw EXP_BIAS,EXP(%esi)
|
|
jnz sqrt_arg_ge_2
|
|
|
|
shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */
|
|
rcrl $1,%ecx
|
|
rcrl $1,%edx
|
|
|
|
sqrt_arg_ge_2:
|
|
/* From here on, n is never accessed directly again until it is
|
|
replaced by the answer. */
|
|
|
|
movl %eax,FPU_fsqrt_arg_2 /* ms word of n */
|
|
movl %ecx,FPU_fsqrt_arg_1
|
|
movl %edx,FPU_fsqrt_arg_0
|
|
|
|
/* Make a linear first estimate */
|
|
shrl $1,%eax
|
|
addl $0x40000000,%eax
|
|
movl $0xaaaaaaaa,%ecx
|
|
mull %ecx
|
|
shll %edx /* max result was 7fff... */
|
|
testl $0x80000000,%edx /* but min was 3fff... */
|
|
jnz sqrt_prelim_no_adjust
|
|
|
|
movl $0x80000000,%edx /* round up */
|
|
|
|
sqrt_prelim_no_adjust:
|
|
movl %edx,%esi /* Our first guess */
|
|
|
|
/* We have now computed (approx) (2 + x) / 3, which forms the basis
|
|
for a few iterations of Newton's method */
|
|
|
|
movl FPU_fsqrt_arg_2,%ecx /* ms word */
|
|
|
|
/*
|
|
* From our initial estimate, three iterations are enough to get us
|
|
* to 30 bits or so. This will then allow two iterations at better
|
|
* precision to complete the process.
|
|
*/
|
|
|
|
/* Compute (g + n/g)/2 at each iteration (g is the guess). */
|
|
shrl %ecx /* Doing this first will prevent a divide */
|
|
/* overflow later. */
|
|
|
|
movl %ecx,%edx /* msw of the arg / 2 */
|
|
divl %esi /* current estimate */
|
|
shrl %esi /* divide by 2 */
|
|
addl %eax,%esi /* the new estimate */
|
|
|
|
movl %ecx,%edx
|
|
divl %esi
|
|
shrl %esi
|
|
addl %eax,%esi
|
|
|
|
movl %ecx,%edx
|
|
divl %esi
|
|
shrl %esi
|
|
addl %eax,%esi
|
|
|
|
/*
|
|
* Now that an estimate accurate to about 30 bits has been obtained (in %esi),
|
|
* we improve it to 60 bits or so.
|
|
*
|
|
* The strategy from now on is to compute new estimates from
|
|
* guess := guess + (n - guess^2) / (2 * guess)
|
|
*/
|
|
|
|
/* First, find the square of the guess */
|
|
movl %esi,%eax
|
|
mull %esi
|
|
/* guess^2 now in %edx:%eax */
|
|
|
|
movl FPU_fsqrt_arg_1,%ecx
|
|
subl %ecx,%eax
|
|
movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */
|
|
sbbl %ecx,%edx
|
|
jnc sqrt_stage_2_positive
|
|
|
|
/* Subtraction gives a negative result,
|
|
negate the result before division. */
|
|
notl %edx
|
|
notl %eax
|
|
addl $1,%eax
|
|
adcl $0,%edx
|
|
|
|
divl %esi
|
|
movl %eax,%ecx
|
|
|
|
movl %edx,%eax
|
|
divl %esi
|
|
jmp sqrt_stage_2_finish
|
|
|
|
sqrt_stage_2_positive:
|
|
divl %esi
|
|
movl %eax,%ecx
|
|
|
|
movl %edx,%eax
|
|
divl %esi
|
|
|
|
notl %ecx
|
|
notl %eax
|
|
addl $1,%eax
|
|
adcl $0,%ecx
|
|
|
|
sqrt_stage_2_finish:
|
|
sarl $1,%ecx /* divide by 2 */
|
|
rcrl $1,%eax
|
|
|
|
/* Form the new estimate in %esi:%edi */
|
|
movl %eax,%edi
|
|
addl %ecx,%esi
|
|
|
|
jnz sqrt_stage_2_done /* result should be [1..2) */
|
|
|
|
#ifdef PARANOID
|
|
/* It should be possible to get here only if the arg is ffff....ffff */
|
|
cmpl $0xffffffff,FPU_fsqrt_arg_1
|
|
jnz sqrt_stage_2_error
|
|
#endif /* PARANOID */
|
|
|
|
/* The best rounded result. */
|
|
xorl %eax,%eax
|
|
decl %eax
|
|
movl %eax,%edi
|
|
movl %eax,%esi
|
|
movl $0x7fffffff,%eax
|
|
jmp sqrt_round_result
|
|
|
|
#ifdef PARANOID
|
|
sqrt_stage_2_error:
|
|
pushl EX_INTERNAL|0x213
|
|
call EXCEPTION
|
|
#endif /* PARANOID */
|
|
|
|
sqrt_stage_2_done:
|
|
|
|
/* Now the square root has been computed to better than 60 bits. */
|
|
|
|
/* Find the square of the guess. */
|
|
movl %edi,%eax /* ls word of guess */
|
|
mull %edi
|
|
movl %edx,FPU_accum_1
|
|
|
|
movl %esi,%eax
|
|
mull %esi
|
|
movl %edx,FPU_accum_3
|
|
movl %eax,FPU_accum_2
|
|
|
|
movl %edi,%eax
|
|
mull %esi
|
|
addl %eax,FPU_accum_1
|
|
adcl %edx,FPU_accum_2
|
|
adcl $0,FPU_accum_3
|
|
|
|
/* movl %esi,%eax */
|
|
/* mull %edi */
|
|
addl %eax,FPU_accum_1
|
|
adcl %edx,FPU_accum_2
|
|
adcl $0,FPU_accum_3
|
|
|
|
/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */
|
|
|
|
movl FPU_fsqrt_arg_0,%eax /* get normalized n */
|
|
subl %eax,FPU_accum_1
|
|
movl FPU_fsqrt_arg_1,%eax
|
|
sbbl %eax,FPU_accum_2
|
|
movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */
|
|
sbbl %eax,FPU_accum_3
|
|
jnc sqrt_stage_3_positive
|
|
|
|
/* Subtraction gives a negative result,
|
|
negate the result before division */
|
|
notl FPU_accum_1
|
|
notl FPU_accum_2
|
|
notl FPU_accum_3
|
|
addl $1,FPU_accum_1
|
|
adcl $0,FPU_accum_2
|
|
|
|
#ifdef PARANOID
|
|
adcl $0,FPU_accum_3 /* This must be zero */
|
|
jz sqrt_stage_3_no_error
|
|
|
|
sqrt_stage_3_error:
|
|
pushl EX_INTERNAL|0x207
|
|
call EXCEPTION
|
|
|
|
sqrt_stage_3_no_error:
|
|
#endif /* PARANOID */
|
|
|
|
movl FPU_accum_2,%edx
|
|
movl FPU_accum_1,%eax
|
|
divl %esi
|
|
movl %eax,%ecx
|
|
|
|
movl %edx,%eax
|
|
divl %esi
|
|
|
|
sarl $1,%ecx /* divide by 2 */
|
|
rcrl $1,%eax
|
|
|
|
/* prepare to round the result */
|
|
|
|
addl %ecx,%edi
|
|
adcl $0,%esi
|
|
|
|
jmp sqrt_stage_3_finished
|
|
|
|
sqrt_stage_3_positive:
|
|
movl FPU_accum_2,%edx
|
|
movl FPU_accum_1,%eax
|
|
divl %esi
|
|
movl %eax,%ecx
|
|
|
|
movl %edx,%eax
|
|
divl %esi
|
|
|
|
sarl $1,%ecx /* divide by 2 */
|
|
rcrl $1,%eax
|
|
|
|
/* prepare to round the result */
|
|
|
|
notl %eax /* Negate the correction term */
|
|
notl %ecx
|
|
addl $1,%eax
|
|
adcl $0,%ecx /* carry here ==> correction == 0 */
|
|
adcl $0xffffffff,%esi
|
|
|
|
addl %ecx,%edi
|
|
adcl $0,%esi
|
|
|
|
sqrt_stage_3_finished:
|
|
|
|
/*
|
|
* The result in %esi:%edi:%esi should be good to about 90 bits here,
|
|
* and the rounding information here does not have sufficient accuracy
|
|
* in a few rare cases.
|
|
*/
|
|
cmpl $0xffffffe0,%eax
|
|
ja sqrt_near_exact_x
|
|
|
|
cmpl $0x00000020,%eax
|
|
jb sqrt_near_exact
|
|
|
|
cmpl $0x7fffffe0,%eax
|
|
jb sqrt_round_result
|
|
|
|
cmpl $0x80000020,%eax
|
|
jb sqrt_get_more_precision
|
|
|
|
sqrt_round_result:
|
|
/* Set up for rounding operations */
|
|
movl %eax,%edx
|
|
movl %esi,%eax
|
|
movl %edi,%ebx
|
|
movl PARAM1,%edi
|
|
movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */
|
|
jmp fpu_reg_round
|
|
|
|
|
|
sqrt_near_exact_x:
|
|
/* First, the estimate must be rounded up. */
|
|
addl $1,%edi
|
|
adcl $0,%esi
|
|
|
|
sqrt_near_exact:
|
|
/*
|
|
* This is an easy case because x^1/2 is monotonic.
|
|
* We need just find the square of our estimate, compare it
|
|
* with the argument, and deduce whether our estimate is
|
|
* above, below, or exact. We use the fact that the estimate
|
|
* is known to be accurate to about 90 bits.
|
|
*/
|
|
movl %edi,%eax /* ls word of guess */
|
|
mull %edi
|
|
movl %edx,%ebx /* 2nd ls word of square */
|
|
movl %eax,%ecx /* ls word of square */
|
|
|
|
movl %edi,%eax
|
|
mull %esi
|
|
addl %eax,%ebx
|
|
addl %eax,%ebx
|
|
|
|
#ifdef PARANOID
|
|
cmp $0xffffffb0,%ebx
|
|
jb sqrt_near_exact_ok
|
|
|
|
cmp $0x00000050,%ebx
|
|
ja sqrt_near_exact_ok
|
|
|
|
pushl EX_INTERNAL|0x214
|
|
call EXCEPTION
|
|
|
|
sqrt_near_exact_ok:
|
|
#endif /* PARANOID */
|
|
|
|
or %ebx,%ebx
|
|
js sqrt_near_exact_small
|
|
|
|
jnz sqrt_near_exact_large
|
|
|
|
or %ebx,%edx
|
|
jnz sqrt_near_exact_large
|
|
|
|
/* Our estimate is exactly the right answer */
|
|
xorl %eax,%eax
|
|
jmp sqrt_round_result
|
|
|
|
sqrt_near_exact_small:
|
|
/* Our estimate is too small */
|
|
movl $0x000000ff,%eax
|
|
jmp sqrt_round_result
|
|
|
|
sqrt_near_exact_large:
|
|
/* Our estimate is too large, we need to decrement it */
|
|
subl $1,%edi
|
|
sbbl $0,%esi
|
|
movl $0xffffff00,%eax
|
|
jmp sqrt_round_result
|
|
|
|
|
|
sqrt_get_more_precision:
|
|
/* This case is almost the same as the above, except we start
|
|
with an extra bit of precision in the estimate. */
|
|
stc /* The extra bit. */
|
|
rcll $1,%edi /* Shift the estimate left one bit */
|
|
rcll $1,%esi
|
|
|
|
movl %edi,%eax /* ls word of guess */
|
|
mull %edi
|
|
movl %edx,%ebx /* 2nd ls word of square */
|
|
movl %eax,%ecx /* ls word of square */
|
|
|
|
movl %edi,%eax
|
|
mull %esi
|
|
addl %eax,%ebx
|
|
addl %eax,%ebx
|
|
|
|
/* Put our estimate back to its original value */
|
|
stc /* The ms bit. */
|
|
rcrl $1,%esi /* Shift the estimate left one bit */
|
|
rcrl $1,%edi
|
|
|
|
#ifdef PARANOID
|
|
cmp $0xffffff60,%ebx
|
|
jb sqrt_more_prec_ok
|
|
|
|
cmp $0x000000a0,%ebx
|
|
ja sqrt_more_prec_ok
|
|
|
|
pushl EX_INTERNAL|0x215
|
|
call EXCEPTION
|
|
|
|
sqrt_more_prec_ok:
|
|
#endif /* PARANOID */
|
|
|
|
or %ebx,%ebx
|
|
js sqrt_more_prec_small
|
|
|
|
jnz sqrt_more_prec_large
|
|
|
|
or %ebx,%ecx
|
|
jnz sqrt_more_prec_large
|
|
|
|
/* Our estimate is exactly the right answer */
|
|
movl $0x80000000,%eax
|
|
jmp sqrt_round_result
|
|
|
|
sqrt_more_prec_small:
|
|
/* Our estimate is too small */
|
|
movl $0x800000ff,%eax
|
|
jmp sqrt_round_result
|
|
|
|
sqrt_more_prec_large:
|
|
/* Our estimate is too large */
|
|
movl $0x7fffff00,%eax
|
|
jmp sqrt_round_result
|