Skip to content

Commit

Permalink
Merge pull request #17 from rakitzis/arm-asm-fixes
Browse files Browse the repository at this point in the history
fixes for issue #15 -- ARM assembly alignment bug
  • Loading branch information
dchest authored Aug 18, 2018
2 parents ca249f4 + 4f4eba0 commit 34f2012
Show file tree
Hide file tree
Showing 8 changed files with 243 additions and 16 deletions.
2 changes: 1 addition & 1 deletion blocks.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// +build !amd64 appengine gccgo
// +build !arm,!amd64 appengine gccgo

package siphash

Expand Down
144 changes: 144 additions & 0 deletions blocks_arm.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
#include "textflag.h"

#define ROUND()\
ADD.S R2,R0,R0;\
ADC R3,R1,R1;\
EOR R2<<13,R0,R8;\
EOR R3>>19,R8,R8;\
EOR R2>>19,R1,R11;\
EOR R3<<13,R11,R11;\
ADD.S R6,R4,R4;\
ADC R7,R5,R5;\
EOR R6<<16,R4,R2;\
EOR R7>>16,R2,R2;\
EOR R6>>16,R5,R3;\
EOR R7<<16,R3,R3;\
ADD.S R2,R1,R1;\
ADC R3,R0,R0;\
EOR R2<<21,R1,R6;\
EOR R3>>11,R6,R6;\
EOR R2>>11,R0,R7;\
EOR R3<<21,R7,R7;\
ADD.S R8,R4,R4;\
ADC R11,R5,R5;\
EOR R8<<17,R4,R2;\
EOR R11>>15,R2,R2;\
EOR R8>>15,R5,R3;\
EOR R11<<17,R3,R3;\
ADD.S R2,R1,R1;\
ADC R3,R0,R0;\
EOR R2<<13,R1,R8;\
EOR R3>>19,R8,R8;\
EOR R2>>19,R0,R11;\
EOR R3<<13,R11,R11;\
ADD.S R6,R5,R5;\
ADC R7,R4,R4;\
EOR R6<<16,R5,R2;\
EOR R7>>16,R2,R2;\
EOR R6>>16,R4,R3;\
EOR R7<<16,R3,R3;\
ADD.S R2,R0,R0;\
ADC R3,R1,R1;\
EOR R2<<21,R0,R6;\
EOR R3>>11,R6,R6;\
EOR R2>>11,R1,R7;\
EOR R3<<21,R7,R7;\
ADD.S R8,R5,R5;\
ADC R11,R4,R4;\
EOR R8<<17,R5,R2;\
EOR R11>>15,R2,R2;\
EOR R8>>15,R4,R3;\
EOR R11<<17,R3,R3;

// once(d *digest)
TEXT ·once(SB),NOSPLIT,$4-4
MOVW d+0(FP),R8
MOVM.IA (R8),[R0,R1,R2,R3,R4,R5,R6,R7]
MOVW 48(R8),R12
MOVW 52(R8),R14
EOR R12,R6,R6
EOR R14,R7,R7
ROUND()
EOR R12,R0,R0
EOR R14,R1,R1
MOVW d+0(FP),R8
MOVM.IA [R0,R1,R2,R3,R4,R5,R6,R7],(R8)
RET

// finalize(d *digest) uint64
TEXT ·finalize(SB),NOSPLIT,$4-12
MOVW d+0(FP),R8
MOVM.IA (R8),[R0,R1,R2,R3,R4,R5,R6,R7]
MOVW 48(R8),R12
MOVW 52(R8),R14
EOR R12,R6,R6
EOR R14,R7,R7
ROUND()
EOR R12,R0,R0
EOR R14,R1,R1
EOR $255,R4
ROUND()
ROUND()
EOR R2,R0,R0
EOR R3,R1,R1
EOR R6,R4,R4
EOR R7,R5,R5
EOR R4,R0,R0
EOR R5,R1,R1
MOVW R0,ret_lo+4(FP)
MOVW R1,ret_hi+8(FP)
RET

// blocks(d *digest, data []uint8)
TEXT ·blocks(SB),NOSPLIT,$8-16
MOVW R9,sav-8(SP)
MOVW d+0(FP),R8
MOVM.IA (R8),[R0,R1,R2,R3,R4,R5,R6,R7]
MOVW p+4(FP),R9
MOVW p_len+8(FP),R11
ADD R9,R11,R11
MOVW R11,endp-4(SP)
AND.S $3,R9,R8
BNE blocksunaligned
blocksloop:
MOVM.IA.W (R9),[R12,R14]
EOR R12,R6,R6
EOR R14,R7,R7
ROUND()
EOR R12,R0,R0
EOR R14,R1,R1
MOVW endp-4(SP),R11
CMP R11,R9
BLO blocksloop
MOVW d+0(FP),R8
MOVM.IA [R0,R1,R2,R3,R4,R5,R6,R7],(R8)
MOVW sav-8(SP),R9
RET
blocksunaligned:
MOVBU (R9),R12
MOVBU 1(R9),R11
ORR R11<<8,R12,R12
MOVBU 2(R9),R11
ORR R11<<16,R12,R12
MOVBU 3(R9),R11
ORR R11<<24,R12,R12
MOVBU 4(R9),R14
MOVBU 5(R9),R11
ORR R11<<8,R14,R14
MOVBU 6(R9),R11
ORR R11<<16,R14,R14
MOVBU 7(R9),R11
ORR R11<<24,R14,R14
ADD $8,R9,R9
EOR R12,R6,R6
EOR R14,R7,R7
ROUND()
EOR R12,R0,R0
EOR R14,R1,R1
MOVW endp-4(SP),R11
CMP R11,R9
BLO blocksunaligned
MOVW d+0(FP),R8
MOVM.IA [R0,R1,R2,R3,R4,R5,R6,R7],(R8)
MOVW sav-8(SP),R9
RET
21 changes: 21 additions & 0 deletions blocks_asm.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// +build arm amd64,!appengine,!gccgo

// Written in 2012 by Dmitry Chestnykh.
//
// To the extent possible under law, the author have dedicated all copyright
// and related and neighboring rights to this software to the public domain
// worldwide. This software is distributed without any warranty.
// http://creativecommons.org/publicdomain/zero/1.0/

// This file contains a function definition for use with assembly implementations of Hash()

package siphash

//go:noescape
func blocks(d *digest, p []uint8)

//go:noescape
func finalize(d *digest) uint64

//go:noescape
func once(d *digest)
2 changes: 1 addition & 1 deletion hash.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// +build !amd64 appengine gccgo
// +build !arm,!amd64 appengine gccgo

// Written in 2012 by Dmitry Chestnykh.
//
Expand Down
2 changes: 1 addition & 1 deletion hash128.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// +build !amd64 appengine gccgo
// +build !arm,!amd64 appengine gccgo
// Written in 2012 by Dmitry Chestnykh.
// Modifications 2014 for 128-bit hash function by Damian Gryski.
//
Expand Down
27 changes: 27 additions & 0 deletions hash_arm.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// +build arm

package siphash

// NB: ARM implementation of forgoes extra speed for Hash()
// and Hash128() by simply reusing the same blocks() implementation
// in assembly used by the streaming hash.

func Hash(k0, k1 uint64, p []byte) uint64 {
var d digest
d.size = Size
d.k0 = k0
d.k1 = k1
d.Reset()
d.Write(p)
return d.Sum64()
}

func Hash128(k0, k1 uint64, p []byte) (uint64, uint64) {
var d digest
d.size = Size128
d.k0 = k0
d.k1 = k1
d.Reset()
d.Write(p)
return d.sum128()
}
9 changes: 0 additions & 9 deletions hash_asm.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,3 @@ func Hash(k0, k1 uint64, b []byte) uint64
// Hash128 returns the 128-bit SipHash-2-4 of the given byte slice with two
// 64-bit parts of 128-bit key: k0 and k1.
func Hash128(k0, k1 uint64, b []byte) (uint64, uint64)

//go:noescape
func blocks(d *digest, p []uint8)

//go:noescape
func finalize(d *digest) uint64

//go:noescape
func once(d *digest)
52 changes: 48 additions & 4 deletions siphash_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -347,16 +347,60 @@ func TestAlign(t *testing.T) {
data := "0076a9143219adce9b6f0a21fd53cb17e2fd9b2b4fac40b388ac"
k0 := uint64(316665572293978160)
k1 := uint64(8573005253291875333)
want := uint64(16770526497674945769)

want := []uint64{
16380727507974277821,
16770526497674945769,
11373998677292870540,
10374222295991299613,
}
want128 := []uint64{
14802151199638645495,
13251497035884452880,
7034723853391616289,
16742813562040528752,
10468120447644272532,
10941274532208162335,
11293904790559355408,
15432350433573653068,
}

d, err := hex.DecodeString(data)
if err != nil {
t.Fatal(err)
}

res := Hash(k0, k1, d[1:])
if res != want {
t.Fatalf("Expected %v got %v", want, res)
var k [16]byte
binary.LittleEndian.PutUint64(k[0:], k0)
binary.LittleEndian.PutUint64(k[8:], k1)

for i := range want {
res := Hash(k0, k1, d[i:])
if res != want[i] {
t.Fatalf("Expected %v got %v", want[i], res)
}
reslo, reshi := Hash128(k0, k1, d[i:])
if reslo != want128[i*2] {
t.Fatalf("Expected %v got %v", want128[i*2], reslo)
}
if reshi != want128[i*2+1] {
t.Fatalf("Expected %v got %v", want128[i*2+1], reshi)
}
dig := newDigest(Size, k[:])
dig.Write(d[i:])
res = dig.Sum64()
if res != want[i] {
t.Fatalf("Expected %v got %v", want[i], res)
}
dig128 := newDigest(Size128, k[:])
dig128.Write(d[i:])
reslo, reshi = dig128.sum128()
if reslo != want128[i*2] {
t.Fatalf("Expected %v got %v", want128[i*2], reslo)
}
if reshi != want128[i*2+1] {
t.Fatalf("Expected %v got %v", want128[i*2+1], reshi)
}
}
}

Expand Down

0 comments on commit 34f2012

Please sign in to comment.