mirror of
https://github.com/golang/go
synced 2025-07-19 22:21:52 +00:00
cmd/compile: combine multiply/add into maddld on ppc64le/power9
Add a new lowering rule to match and replace such instances with the MADDLD instruction available on power9 where possible. Likewise, this plumbs in a new ppc64 ssa opcode to house the newly generated MADDLD instructions. When testing ed25519, this reduced binary size by 936B. Similarly, MADDLD combination occcurs in a few other less obvious cases such as division by constant. Testing of golang.org/x/crypto/ed25519 shows non-trivial speedup during keygeneration: name old time/op new time/op delta KeyGeneration 65.2µs ± 0% 63.1µs ± 0% -3.19% Signing 64.3µs ± 0% 64.4µs ± 0% +0.16% Verification 147µs ± 0% 147µs ± 0% +0.11% Similarly, this test binary has shrunk by 66488B. Change-Id: I077aeda7943119b41f07e4e62e44a648f16e4ad0 Reviewed-on: https://go-review.googlesource.com/c/go/+/248723 Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
This commit is contained in:
committed by
Lynn Boger
parent
216714e44f
commit
e7c7ce646f
@@ -601,6 +601,20 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = v.Reg()
|
||||
|
||||
case ssa.OpPPC64MADDLD:
|
||||
r := v.Reg()
|
||||
r1 := v.Args[0].Reg()
|
||||
r2 := v.Args[1].Reg()
|
||||
r3 := v.Args[2].Reg()
|
||||
// r = r1*r2 ± r3
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = r1
|
||||
p.Reg = r2
|
||||
p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r3})
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = r
|
||||
|
||||
case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
|
||||
r := v.Reg()
|
||||
r1 := v.Args[0].Reg()
|
||||
|
@@ -11,6 +11,9 @@
|
||||
(Sub32F ...) => (FSUBS ...)
|
||||
(Sub64F ...) => (FSUB ...)
|
||||
|
||||
// Combine 64 bit integer multiply and adds
|
||||
(ADD l:(MULLD x y) z) && objabi.GOPPC64 >= 9 && l.Uses == 1 && clobber(l) => (MADDLD x y z)
|
||||
|
||||
(Mod16 x y) => (Mod32 (SignExt16to32 x) (SignExt16to32 y))
|
||||
(Mod16u x y) => (Mod32u (ZeroExt16to32 x) (ZeroExt16to32 y))
|
||||
(Mod8 x y) => (Mod32 (SignExt8to32 x) (SignExt8to32 y))
|
||||
|
@@ -137,6 +137,7 @@ func init() {
|
||||
gp01 = regInfo{inputs: nil, outputs: []regMask{gp}}
|
||||
gp11 = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}
|
||||
gp21 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}}
|
||||
gp31 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}}
|
||||
gp22 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, gp}}
|
||||
gp32 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, gp}}
|
||||
gp1cr = regInfo{inputs: []regMask{gp | sp | sb}}
|
||||
@@ -179,6 +180,7 @@ func init() {
|
||||
|
||||
{name: "MULLD", argLength: 2, reg: gp21, asm: "MULLD", typ: "Int64", commutative: true}, // arg0*arg1 (signed 64-bit)
|
||||
{name: "MULLW", argLength: 2, reg: gp21, asm: "MULLW", typ: "Int32", commutative: true}, // arg0*arg1 (signed 32-bit)
|
||||
{name: "MADDLD", argLength: 3, reg: gp31, asm: "MADDLD", typ: "Int64"}, // (arg0*arg1)+arg2 (signed 64-bit)
|
||||
|
||||
{name: "MULHD", argLength: 2, reg: gp21, asm: "MULHD", commutative: true}, // (arg0 * arg1) >> 64, signed
|
||||
{name: "MULHW", argLength: 2, reg: gp21, asm: "MULHW", commutative: true}, // (arg0 * arg1) >> 32, signed
|
||||
|
@@ -1832,6 +1832,7 @@ const (
|
||||
OpPPC64FSUBS
|
||||
OpPPC64MULLD
|
||||
OpPPC64MULLW
|
||||
OpPPC64MADDLD
|
||||
OpPPC64MULHD
|
||||
OpPPC64MULHW
|
||||
OpPPC64MULHDU
|
||||
@@ -24374,6 +24375,21 @@ var opcodeTable = [...]opInfo{
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "MADDLD",
|
||||
argLen: 3,
|
||||
asm: ppc64.AMADDLD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||
{1, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||
{2, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "MULHD",
|
||||
argLen: 2,
|
||||
|
@@ -3852,6 +3852,27 @@ func rewriteValuePPC64_OpPPC64ADD(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
b := v.Block
|
||||
typ := &b.Func.Config.Types
|
||||
// match: (ADD l:(MULLD x y) z)
|
||||
// cond: objabi.GOPPC64 >= 9 && l.Uses == 1 && clobber(l)
|
||||
// result: (MADDLD x y z)
|
||||
for {
|
||||
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
|
||||
l := v_0
|
||||
if l.Op != OpPPC64MULLD {
|
||||
continue
|
||||
}
|
||||
y := l.Args[1]
|
||||
x := l.Args[0]
|
||||
z := v_1
|
||||
if !(objabi.GOPPC64 >= 9 && l.Uses == 1 && clobber(l)) {
|
||||
continue
|
||||
}
|
||||
v.reset(OpPPC64MADDLD)
|
||||
v.AddArg3(x, y, z)
|
||||
return true
|
||||
}
|
||||
break
|
||||
}
|
||||
// match: (ADD (SLDconst x [c]) (SRDconst x [d]))
|
||||
// cond: d == 64-c
|
||||
// result: (ROTLconst [c] x)
|
||||
|
@@ -253,16 +253,20 @@ func Divisible(n1 uint, n2 int) (bool, bool, bool, bool) {
|
||||
// 386:"IMUL3L\t[$]-1431655765","ADDL\t[$]715827882","ROLL\t[$]31",-"DIVQ"
|
||||
// arm64:"MUL","ADD\t[$]3074457345618258602","ROR",-"DIV"
|
||||
// arm:"MUL","ADD\t[$]715827882",-".*udiv"
|
||||
// ppc64:"MULLD","ADD","ROTL\t[$]63"
|
||||
// ppc64le:"MULLD","ADD","ROTL\t[$]63"
|
||||
// ppc64/power8:"MULLD","ADD","ROTL\t[$]63"
|
||||
// ppc64le/power8:"MULLD","ADD","ROTL\t[$]63"
|
||||
// ppc64/power9:"MADDLD","ROTL\t[$]63"
|
||||
// ppc64le/power9:"MADDLD","ROTL\t[$]63"
|
||||
evenS := n2%6 == 0
|
||||
|
||||
// amd64:"IMULQ","ADD",-"ROLQ",-"DIVQ"
|
||||
// 386:"IMUL3L\t[$]678152731","ADDL\t[$]113025455",-"ROLL",-"DIVQ"
|
||||
// arm64:"MUL","ADD\t[$]485440633518672410",-"ROR",-"DIV"
|
||||
// arm:"MUL","ADD\t[$]113025455",-".*udiv"
|
||||
// ppc64:"MULLD","ADD",-"ROTL"
|
||||
// ppc64le:"MULLD","ADD",-"ROTL"
|
||||
// ppc64/power8:"MULLD","ADD",-"ROTL"
|
||||
// ppc64/power9:"MADDLD",-"ROTL"
|
||||
// ppc64le/power8:"MULLD","ADD",-"ROTL"
|
||||
// ppc64le/power9:"MADDLD",-"ROTL"
|
||||
oddS := n2%19 == 0
|
||||
|
||||
return evenU, oddU, evenS, oddS
|
||||
|
Reference in New Issue
Block a user