Source release 17.1.0
This commit is contained in:
@@ -33,6 +33,8 @@
|
||||
.align 5
|
||||
aes_hw_set_encrypt_key:
|
||||
.Lenc_key:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
mov x3,#-1
|
||||
@@ -201,6 +203,7 @@ aes_hw_set_encrypt_key:
|
||||
.type aes_hw_set_decrypt_key,%function
|
||||
.align 5
|
||||
aes_hw_set_decrypt_key:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
bl .Lenc_key
|
||||
@@ -234,6 +237,7 @@ aes_hw_set_decrypt_key:
|
||||
eor x0,x0,x0 // return value
|
||||
.Ldec_key_abort:
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
|
||||
.globl aes_hw_encrypt
|
||||
@@ -241,6 +245,7 @@ aes_hw_set_decrypt_key:
|
||||
.type aes_hw_encrypt,%function
|
||||
.align 5
|
||||
aes_hw_encrypt:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ldr w3,[x2,#240]
|
||||
ld1 {v0.4s},[x2],#16
|
||||
ld1 {v2.16b},[x0]
|
||||
@@ -271,6 +276,7 @@ aes_hw_encrypt:
|
||||
.type aes_hw_decrypt,%function
|
||||
.align 5
|
||||
aes_hw_decrypt:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ldr w3,[x2,#240]
|
||||
ld1 {v0.4s},[x2],#16
|
||||
ld1 {v2.16b},[x0]
|
||||
@@ -301,6 +307,8 @@ aes_hw_decrypt:
|
||||
.type aes_hw_cbc_encrypt,%function
|
||||
.align 5
|
||||
aes_hw_cbc_encrypt:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
subs x2,x2,#16
|
||||
@@ -592,6 +600,8 @@ aes_hw_cbc_encrypt:
|
||||
.type aes_hw_ctr32_encrypt_blocks,%function
|
||||
.align 5
|
||||
aes_hw_ctr32_encrypt_blocks:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
ldr w5,[x3,#240]
|
||||
@@ -611,20 +621,34 @@ aes_hw_ctr32_encrypt_blocks:
|
||||
add x7,x3,#32
|
||||
mov w6,w5
|
||||
csel x12,xzr,x12,lo
|
||||
|
||||
// ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
|
||||
// affected by silicon errata #1742098 [0] and #1655431 [1],
|
||||
// respectively, where the second instruction of an aese/aesmc
|
||||
// instruction pair may execute twice if an interrupt is taken right
|
||||
// after the first instruction consumes an input register of which a
|
||||
// single 32-bit lane has been updated the last time it was modified.
|
||||
//
|
||||
// This function uses a counter in one 32-bit lane. The vmov lines
|
||||
// could write to v1.16b and v18.16b directly, but that trips this bugs.
|
||||
// We write to v6.16b and copy to the final register as a workaround.
|
||||
//
|
||||
// [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
|
||||
// [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
|
||||
#ifndef __ARMEB__
|
||||
rev w8, w8
|
||||
#endif
|
||||
orr v1.16b,v0.16b,v0.16b
|
||||
add w10, w8, #1
|
||||
orr v18.16b,v0.16b,v0.16b
|
||||
add w8, w8, #2
|
||||
orr v6.16b,v0.16b,v0.16b
|
||||
rev w10, w10
|
||||
mov v1.s[3],w10
|
||||
mov v6.s[3],w10
|
||||
add w8, w8, #2
|
||||
orr v1.16b,v6.16b,v6.16b
|
||||
b.ls .Lctr32_tail
|
||||
rev w12, w8
|
||||
mov v6.s[3],w12
|
||||
sub x2,x2,#3 // bias
|
||||
mov v18.s[3],w12
|
||||
orr v18.16b,v6.16b,v6.16b
|
||||
b .Loop3x_ctr32
|
||||
|
||||
.align 4
|
||||
@@ -651,11 +675,11 @@ aes_hw_ctr32_encrypt_blocks:
|
||||
aese v1.16b,v16.16b
|
||||
aesmc v5.16b,v1.16b
|
||||
ld1 {v2.16b},[x0],#16
|
||||
orr v0.16b,v6.16b,v6.16b
|
||||
add w9,w8,#1
|
||||
aese v18.16b,v16.16b
|
||||
aesmc v18.16b,v18.16b
|
||||
ld1 {v3.16b},[x0],#16
|
||||
orr v1.16b,v6.16b,v6.16b
|
||||
rev w9,w9
|
||||
aese v4.16b,v17.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
aese v5.16b,v17.16b
|
||||
@@ -664,8 +688,6 @@ aes_hw_ctr32_encrypt_blocks:
|
||||
mov x7,x3
|
||||
aese v18.16b,v17.16b
|
||||
aesmc v17.16b,v18.16b
|
||||
orr v18.16b,v6.16b,v6.16b
|
||||
add w9,w8,#1
|
||||
aese v4.16b,v20.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
aese v5.16b,v20.16b
|
||||
@@ -680,21 +702,26 @@ aes_hw_ctr32_encrypt_blocks:
|
||||
aesmc v4.16b,v4.16b
|
||||
aese v5.16b,v21.16b
|
||||
aesmc v5.16b,v5.16b
|
||||
// Note the logic to update v0.16b, v1.16b, and v1.16b is written to work
|
||||
// around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
|
||||
// 32-bit mode. See the comment above.
|
||||
eor v19.16b,v19.16b,v7.16b
|
||||
rev w9,w9
|
||||
mov v6.s[3], w9
|
||||
aese v17.16b,v21.16b
|
||||
aesmc v17.16b,v17.16b
|
||||
mov v0.s[3], w9
|
||||
orr v0.16b,v6.16b,v6.16b
|
||||
rev w10,w10
|
||||
aese v4.16b,v22.16b
|
||||
aesmc v4.16b,v4.16b
|
||||
mov v6.s[3], w10
|
||||
rev w12,w8
|
||||
aese v5.16b,v22.16b
|
||||
aesmc v5.16b,v5.16b
|
||||
mov v1.s[3], w10
|
||||
rev w12,w8
|
||||
orr v1.16b,v6.16b,v6.16b
|
||||
mov v6.s[3], w12
|
||||
aese v17.16b,v22.16b
|
||||
aesmc v17.16b,v17.16b
|
||||
mov v18.s[3], w12
|
||||
orr v18.16b,v6.16b,v6.16b
|
||||
subs x2,x2,#3
|
||||
aese v4.16b,v23.16b
|
||||
aese v5.16b,v23.16b
|
||||
|
||||
@@ -13,6 +13,8 @@
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
.globl bn_mul_mont
|
||||
@@ -20,6 +22,7 @@
|
||||
.type bn_mul_mont,%function
|
||||
.align 5
|
||||
bn_mul_mont:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
tst x5,#7
|
||||
b.eq __bn_sqr8x_mont
|
||||
tst x5,#3
|
||||
@@ -217,11 +220,14 @@ bn_mul_mont:
|
||||
mov x0,#1
|
||||
ldp x23,x24,[x29,#48]
|
||||
ldr x29,[sp],#64
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size bn_mul_mont,.-bn_mul_mont
|
||||
.type __bn_sqr8x_mont,%function
|
||||
.align 5
|
||||
__bn_sqr8x_mont:
|
||||
// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_sqr8x_mont is jumped to
|
||||
// only from bn_mul_mont which has already signed the return address.
|
||||
cmp x1,x2
|
||||
b.ne __bn_mul4x_mont
|
||||
.Lsqr8x_mont:
|
||||
@@ -975,11 +981,16 @@ __bn_sqr8x_mont:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldr x29,[sp],#128
|
||||
// x30 is popped earlier
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size __bn_sqr8x_mont,.-__bn_sqr8x_mont
|
||||
.type __bn_mul4x_mont,%function
|
||||
.align 5
|
||||
__bn_mul4x_mont:
|
||||
// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_mul4x_mont is jumped to
|
||||
// only from bn_mul_mont or __bn_mul8x_mont which have already signed the
|
||||
// return address.
|
||||
stp x29,x30,[sp,#-128]!
|
||||
add x29,sp,#0
|
||||
stp x19,x20,[sp,#16]
|
||||
@@ -1413,6 +1424,8 @@ __bn_mul4x_mont:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldr x29,[sp],#128
|
||||
// x30 is popped earlier
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size __bn_mul4x_mont,.-__bn_mul4x_mont
|
||||
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
|
||||
@@ -13,6 +13,8 @@
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.text
|
||||
|
||||
.globl gcm_init_neon
|
||||
@@ -20,6 +22,7 @@
|
||||
.type gcm_init_neon,%function
|
||||
.align 4
|
||||
gcm_init_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
// This function is adapted from gcm_init_v8. xC2 is t3.
|
||||
ld1 {v17.2d}, [x1] // load H
|
||||
movi v19.16b, #0xe1
|
||||
@@ -45,6 +48,7 @@ gcm_init_neon:
|
||||
.type gcm_gmult_neon,%function
|
||||
.align 4
|
||||
gcm_gmult_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v3.16b}, [x0] // load Xi
|
||||
ld1 {v5.1d}, [x1], #8 // load twisted H
|
||||
ld1 {v6.1d}, [x1]
|
||||
@@ -64,6 +68,7 @@ gcm_gmult_neon:
|
||||
.type gcm_ghash_neon,%function
|
||||
.align 4
|
||||
gcm_ghash_neon:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v0.16b}, [x0] // load Xi
|
||||
ld1 {v5.1d}, [x1], #8 // load twisted H
|
||||
ld1 {v6.1d}, [x1]
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.text
|
||||
.arch armv8-a+crypto
|
||||
.globl gcm_init_v8
|
||||
@@ -22,6 +23,7 @@
|
||||
.type gcm_init_v8,%function
|
||||
.align 4
|
||||
gcm_init_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v17.2d},[x1] //load input H
|
||||
movi v19.16b,#0xe1
|
||||
shl v19.2d,v19.2d,#57 //0xc2.0
|
||||
@@ -64,8 +66,48 @@ gcm_init_v8:
|
||||
ext v17.16b,v22.16b,v22.16b,#8 //Karatsuba pre-processing
|
||||
eor v17.16b,v17.16b,v22.16b
|
||||
ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
|
||||
st1 {v21.2d,v22.2d},[x0] //store Htable[1..2]
|
||||
st1 {v21.2d,v22.2d},[x0],#32 //store Htable[1..2]
|
||||
//calculate H^3 and H^4
|
||||
pmull v0.1q,v20.1d, v22.1d
|
||||
pmull v5.1q,v22.1d,v22.1d
|
||||
pmull2 v2.1q,v20.2d, v22.2d
|
||||
pmull2 v7.1q,v22.2d,v22.2d
|
||||
pmull v1.1q,v16.1d,v17.1d
|
||||
pmull v6.1q,v17.1d,v17.1d
|
||||
|
||||
ext v16.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
ext v17.16b,v5.16b,v7.16b,#8
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v16.16b
|
||||
eor v4.16b,v5.16b,v7.16b
|
||||
eor v6.16b,v6.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase
|
||||
eor v6.16b,v6.16b,v4.16b
|
||||
pmull v4.1q,v5.1d,v19.1d
|
||||
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v7.d[0],v6.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
ins v6.d[1],v5.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
eor v5.16b,v6.16b,v4.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
|
||||
ext v4.16b,v5.16b,v5.16b,#8
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
pmull v5.1q,v5.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v4.16b,v4.16b,v7.16b
|
||||
eor v20.16b, v0.16b,v18.16b //H^3
|
||||
eor v22.16b,v5.16b,v4.16b //H^4
|
||||
|
||||
ext v16.16b,v20.16b, v20.16b,#8 //Karatsuba pre-processing
|
||||
ext v17.16b,v22.16b,v22.16b,#8
|
||||
eor v16.16b,v16.16b,v20.16b
|
||||
eor v17.16b,v17.16b,v22.16b
|
||||
ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
|
||||
st1 {v20.2d,v21.2d,v22.2d},[x0] //store Htable[3..5]
|
||||
ret
|
||||
.size gcm_init_v8,.-gcm_init_v8
|
||||
.globl gcm_gmult_v8
|
||||
@@ -73,6 +115,7 @@ gcm_init_v8:
|
||||
.type gcm_gmult_v8,%function
|
||||
.align 4
|
||||
gcm_gmult_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
ld1 {v17.2d},[x0] //load Xi
|
||||
movi v19.16b,#0xe1
|
||||
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
|
||||
@@ -115,6 +158,9 @@ gcm_gmult_v8:
|
||||
.type gcm_ghash_v8,%function
|
||||
.align 4
|
||||
gcm_ghash_v8:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
cmp x3,#64
|
||||
b.hs .Lgcm_ghash_v8_4x
|
||||
ld1 {v0.2d},[x0] //load [rotated] Xi
|
||||
//"[rotated]" means that
|
||||
//loaded value would have
|
||||
@@ -241,9 +287,290 @@ gcm_ghash_v8:
|
||||
|
||||
ret
|
||||
.size gcm_ghash_v8,.-gcm_ghash_v8
|
||||
.type gcm_ghash_v8_4x,%function
|
||||
.align 4
|
||||
gcm_ghash_v8_4x:
|
||||
.Lgcm_ghash_v8_4x:
|
||||
ld1 {v0.2d},[x0] //load [rotated] Xi
|
||||
ld1 {v20.2d,v21.2d,v22.2d},[x1],#48 //load twisted H, ..., H^2
|
||||
movi v19.16b,#0xe1
|
||||
ld1 {v26.2d,v27.2d,v28.2d},[x1] //load twisted H^3, ..., H^4
|
||||
shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant
|
||||
|
||||
ld1 {v4.2d,v5.2d,v6.2d,v7.2d},[x2],#64
|
||||
#ifndef __ARMEB__
|
||||
rev64 v0.16b,v0.16b
|
||||
rev64 v5.16b,v5.16b
|
||||
rev64 v6.16b,v6.16b
|
||||
rev64 v7.16b,v7.16b
|
||||
rev64 v4.16b,v4.16b
|
||||
#endif
|
||||
ext v25.16b,v7.16b,v7.16b,#8
|
||||
ext v24.16b,v6.16b,v6.16b,#8
|
||||
ext v23.16b,v5.16b,v5.16b,#8
|
||||
|
||||
pmull v29.1q,v20.1d,v25.1d //H·Ii+3
|
||||
eor v7.16b,v7.16b,v25.16b
|
||||
pmull2 v31.1q,v20.2d,v25.2d
|
||||
pmull v30.1q,v21.1d,v7.1d
|
||||
|
||||
pmull v16.1q,v22.1d,v24.1d //H^2·Ii+2
|
||||
eor v6.16b,v6.16b,v24.16b
|
||||
pmull2 v24.1q,v22.2d,v24.2d
|
||||
pmull2 v6.1q,v21.2d,v6.2d
|
||||
|
||||
eor v29.16b,v29.16b,v16.16b
|
||||
eor v31.16b,v31.16b,v24.16b
|
||||
eor v30.16b,v30.16b,v6.16b
|
||||
|
||||
pmull v7.1q,v26.1d,v23.1d //H^3·Ii+1
|
||||
eor v5.16b,v5.16b,v23.16b
|
||||
pmull2 v23.1q,v26.2d,v23.2d
|
||||
pmull v5.1q,v27.1d,v5.1d
|
||||
|
||||
eor v29.16b,v29.16b,v7.16b
|
||||
eor v31.16b,v31.16b,v23.16b
|
||||
eor v30.16b,v30.16b,v5.16b
|
||||
|
||||
subs x3,x3,#128
|
||||
b.lo .Ltail4x
|
||||
|
||||
b .Loop4x
|
||||
|
||||
.align 4
|
||||
.Loop4x:
|
||||
eor v16.16b,v4.16b,v0.16b
|
||||
ld1 {v4.2d,v5.2d,v6.2d,v7.2d},[x2],#64
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
#ifndef __ARMEB__
|
||||
rev64 v5.16b,v5.16b
|
||||
rev64 v6.16b,v6.16b
|
||||
rev64 v7.16b,v7.16b
|
||||
rev64 v4.16b,v4.16b
|
||||
#endif
|
||||
|
||||
pmull v0.1q,v28.1d,v3.1d //H^4·(Xi+Ii)
|
||||
eor v16.16b,v16.16b,v3.16b
|
||||
pmull2 v2.1q,v28.2d,v3.2d
|
||||
ext v25.16b,v7.16b,v7.16b,#8
|
||||
pmull2 v1.1q,v27.2d,v16.2d
|
||||
|
||||
eor v0.16b,v0.16b,v29.16b
|
||||
eor v2.16b,v2.16b,v31.16b
|
||||
ext v24.16b,v6.16b,v6.16b,#8
|
||||
eor v1.16b,v1.16b,v30.16b
|
||||
ext v23.16b,v5.16b,v5.16b,#8
|
||||
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
pmull v29.1q,v20.1d,v25.1d //H·Ii+3
|
||||
eor v7.16b,v7.16b,v25.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
pmull2 v31.1q,v20.2d,v25.2d
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
pmull v30.1q,v21.1d,v7.1d
|
||||
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
pmull v16.1q,v22.1d,v24.1d //H^2·Ii+2
|
||||
eor v6.16b,v6.16b,v24.16b
|
||||
pmull2 v24.1q,v22.2d,v24.2d
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
pmull2 v6.1q,v21.2d,v6.2d
|
||||
|
||||
eor v29.16b,v29.16b,v16.16b
|
||||
eor v31.16b,v31.16b,v24.16b
|
||||
eor v30.16b,v30.16b,v6.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
pmull v7.1q,v26.1d,v23.1d //H^3·Ii+1
|
||||
eor v5.16b,v5.16b,v23.16b
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
pmull2 v23.1q,v26.2d,v23.2d
|
||||
pmull v5.1q,v27.1d,v5.1d
|
||||
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
eor v29.16b,v29.16b,v7.16b
|
||||
eor v31.16b,v31.16b,v23.16b
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
eor v30.16b,v30.16b,v5.16b
|
||||
|
||||
subs x3,x3,#64
|
||||
b.hs .Loop4x
|
||||
|
||||
.Ltail4x:
|
||||
eor v16.16b,v4.16b,v0.16b
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
|
||||
pmull v0.1q,v28.1d,v3.1d //H^4·(Xi+Ii)
|
||||
eor v16.16b,v16.16b,v3.16b
|
||||
pmull2 v2.1q,v28.2d,v3.2d
|
||||
pmull2 v1.1q,v27.2d,v16.2d
|
||||
|
||||
eor v0.16b,v0.16b,v29.16b
|
||||
eor v2.16b,v2.16b,v31.16b
|
||||
eor v1.16b,v1.16b,v30.16b
|
||||
|
||||
adds x3,x3,#64
|
||||
b.eq .Ldone4x
|
||||
|
||||
cmp x3,#32
|
||||
b.lo .Lone
|
||||
b.eq .Ltwo
|
||||
.Lthree:
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
ld1 {v4.2d,v5.2d,v6.2d},[x2]
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
#ifndef __ARMEB__
|
||||
rev64 v5.16b,v5.16b
|
||||
rev64 v6.16b,v6.16b
|
||||
rev64 v4.16b,v4.16b
|
||||
#endif
|
||||
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
ext v24.16b,v6.16b,v6.16b,#8
|
||||
ext v23.16b,v5.16b,v5.16b,#8
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
pmull v29.1q,v20.1d,v24.1d //H·Ii+2
|
||||
eor v6.16b,v6.16b,v24.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
pmull2 v31.1q,v20.2d,v24.2d
|
||||
pmull v30.1q,v21.1d,v6.1d
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
pmull v7.1q,v22.1d,v23.1d //H^2·Ii+1
|
||||
eor v5.16b,v5.16b,v23.16b
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
|
||||
pmull2 v23.1q,v22.2d,v23.2d
|
||||
eor v16.16b,v4.16b,v0.16b
|
||||
pmull2 v5.1q,v21.2d,v5.2d
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
|
||||
eor v29.16b,v29.16b,v7.16b
|
||||
eor v31.16b,v31.16b,v23.16b
|
||||
eor v30.16b,v30.16b,v5.16b
|
||||
|
||||
pmull v0.1q,v26.1d,v3.1d //H^3·(Xi+Ii)
|
||||
eor v16.16b,v16.16b,v3.16b
|
||||
pmull2 v2.1q,v26.2d,v3.2d
|
||||
pmull v1.1q,v27.1d,v16.1d
|
||||
|
||||
eor v0.16b,v0.16b,v29.16b
|
||||
eor v2.16b,v2.16b,v31.16b
|
||||
eor v1.16b,v1.16b,v30.16b
|
||||
b .Ldone4x
|
||||
|
||||
.align 4
|
||||
.Ltwo:
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
ld1 {v4.2d,v5.2d},[x2]
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
#ifndef __ARMEB__
|
||||
rev64 v5.16b,v5.16b
|
||||
rev64 v4.16b,v4.16b
|
||||
#endif
|
||||
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
ext v23.16b,v5.16b,v5.16b,#8
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
|
||||
pmull v29.1q,v20.1d,v23.1d //H·Ii+1
|
||||
eor v5.16b,v5.16b,v23.16b
|
||||
|
||||
eor v16.16b,v4.16b,v0.16b
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
|
||||
pmull2 v31.1q,v20.2d,v23.2d
|
||||
pmull v30.1q,v21.1d,v5.1d
|
||||
|
||||
pmull v0.1q,v22.1d,v3.1d //H^2·(Xi+Ii)
|
||||
eor v16.16b,v16.16b,v3.16b
|
||||
pmull2 v2.1q,v22.2d,v3.2d
|
||||
pmull2 v1.1q,v21.2d,v16.2d
|
||||
|
||||
eor v0.16b,v0.16b,v29.16b
|
||||
eor v2.16b,v2.16b,v31.16b
|
||||
eor v1.16b,v1.16b,v30.16b
|
||||
b .Ldone4x
|
||||
|
||||
.align 4
|
||||
.Lone:
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
ld1 {v4.2d},[x2]
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
#ifndef __ARMEB__
|
||||
rev64 v4.16b,v4.16b
|
||||
#endif
|
||||
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
|
||||
eor v16.16b,v4.16b,v0.16b
|
||||
ext v3.16b,v16.16b,v16.16b,#8
|
||||
|
||||
pmull v0.1q,v20.1d,v3.1d
|
||||
eor v16.16b,v16.16b,v3.16b
|
||||
pmull2 v2.1q,v20.2d,v3.2d
|
||||
pmull v1.1q,v21.1d,v16.1d
|
||||
|
||||
.Ldone4x:
|
||||
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
|
||||
eor v18.16b,v0.16b,v2.16b
|
||||
eor v1.16b,v1.16b,v17.16b
|
||||
eor v1.16b,v1.16b,v18.16b
|
||||
|
||||
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
|
||||
ins v2.d[0],v1.d[1]
|
||||
ins v1.d[1],v0.d[0]
|
||||
eor v0.16b,v1.16b,v18.16b
|
||||
|
||||
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
|
||||
pmull v0.1q,v0.1d,v19.1d
|
||||
eor v18.16b,v18.16b,v2.16b
|
||||
eor v0.16b,v0.16b,v18.16b
|
||||
ext v0.16b,v0.16b,v0.16b,#8
|
||||
|
||||
#ifndef __ARMEB__
|
||||
rev64 v0.16b,v0.16b
|
||||
#endif
|
||||
st1 {v0.2d},[x0] //write out Xi
|
||||
|
||||
ret
|
||||
.size gcm_ghash_v8_4x,.-gcm_ghash_v8_4x
|
||||
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
||||
@@ -18,11 +18,14 @@
|
||||
.text
|
||||
|
||||
|
||||
.hidden OPENSSL_armcap_P
|
||||
.globl sha1_block_data_order
|
||||
.hidden sha1_block_data_order
|
||||
.type sha1_block_data_order,%function
|
||||
.align 6
|
||||
sha1_block_data_order:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
|
||||
adrp x16,:pg_hi21_nc:OPENSSL_armcap_P
|
||||
#else
|
||||
@@ -1090,6 +1093,8 @@ sha1_block_data_order:
|
||||
.type sha1_block_armv8,%function
|
||||
.align 6
|
||||
sha1_block_armv8:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
.Lv8_entry:
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
@@ -1228,8 +1233,6 @@ sha1_block_armv8:
|
||||
.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
.comm OPENSSL_armcap_P,4,4
|
||||
.hidden OPENSSL_armcap_P
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
||||
@@ -59,11 +59,13 @@
|
||||
.text
|
||||
|
||||
|
||||
.hidden OPENSSL_armcap_P
|
||||
.globl sha256_block_data_order
|
||||
.hidden sha256_block_data_order
|
||||
.type sha256_block_data_order,%function
|
||||
.align 6
|
||||
sha256_block_data_order:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
#ifndef __KERNEL__
|
||||
#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
|
||||
adrp x16,:pg_hi21_nc:OPENSSL_armcap_P
|
||||
@@ -74,6 +76,7 @@ sha256_block_data_order:
|
||||
tst w16,#ARMV8_SHA256
|
||||
b.ne .Lv8_entry
|
||||
#endif
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-128]!
|
||||
add x29,sp,#0
|
||||
|
||||
@@ -1034,6 +1037,7 @@ sha256_block_data_order:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#128
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size sha256_block_data_order,.-sha256_block_data_order
|
||||
|
||||
@@ -1068,6 +1072,7 @@ sha256_block_data_order:
|
||||
.align 6
|
||||
sha256_block_armv8:
|
||||
.Lv8_entry:
|
||||
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
@@ -1204,10 +1209,6 @@ sha256_block_armv8:
|
||||
ret
|
||||
.size sha256_block_armv8,.-sha256_block_armv8
|
||||
#endif
|
||||
#ifndef __KERNEL__
|
||||
.comm OPENSSL_armcap_P,4,4
|
||||
.hidden OPENSSL_armcap_P
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
||||
@@ -59,11 +59,13 @@
|
||||
.text
|
||||
|
||||
|
||||
.hidden OPENSSL_armcap_P
|
||||
.globl sha512_block_data_order
|
||||
.hidden sha512_block_data_order
|
||||
.type sha512_block_data_order,%function
|
||||
.align 6
|
||||
sha512_block_data_order:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-128]!
|
||||
add x29,sp,#0
|
||||
|
||||
@@ -1024,6 +1026,7 @@ sha512_block_data_order:
|
||||
ldp x25,x26,[x29,#64]
|
||||
ldp x27,x28,[x29,#80]
|
||||
ldp x29,x30,[sp],#128
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size sha512_block_data_order,.-sha512_block_data_order
|
||||
|
||||
@@ -1076,10 +1079,6 @@ sha512_block_data_order:
|
||||
.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
|
||||
.align 2
|
||||
.align 2
|
||||
#ifndef __KERNEL__
|
||||
.comm OPENSSL_armcap_P,4,4
|
||||
.hidden OPENSSL_armcap_P
|
||||
#endif
|
||||
#endif
|
||||
#endif // !OPENSSL_NO_ASM
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
||||
@@ -13,6 +13,8 @@
|
||||
#if defined(BORINGSSL_PREFIX)
|
||||
#include <boringssl_prefix_symbols_asm.h>
|
||||
#endif
|
||||
#include <openssl/arm_arch.h>
|
||||
|
||||
.section .rodata
|
||||
|
||||
.type _vpaes_consts,%object
|
||||
@@ -215,6 +217,7 @@ _vpaes_encrypt_core:
|
||||
.type vpaes_encrypt,%function
|
||||
.align 4
|
||||
vpaes_encrypt:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
@@ -224,6 +227,7 @@ vpaes_encrypt:
|
||||
st1 {v0.16b}, [x1]
|
||||
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size vpaes_encrypt,.-vpaes_encrypt
|
||||
|
||||
@@ -452,6 +456,7 @@ _vpaes_decrypt_core:
|
||||
.type vpaes_decrypt,%function
|
||||
.align 4
|
||||
vpaes_decrypt:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
@@ -461,6 +466,7 @@ vpaes_decrypt:
|
||||
st1 {v0.16b}, [x1]
|
||||
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size vpaes_decrypt,.-vpaes_decrypt
|
||||
|
||||
@@ -630,6 +636,7 @@ _vpaes_key_preheat:
|
||||
.type _vpaes_schedule_core,%function
|
||||
.align 4
|
||||
_vpaes_schedule_core:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29, x30, [sp,#-16]!
|
||||
add x29,sp,#0
|
||||
|
||||
@@ -799,6 +806,7 @@ _vpaes_schedule_core:
|
||||
eor v6.16b, v6.16b, v6.16b // vpxor %xmm6, %xmm6, %xmm6
|
||||
eor v7.16b, v7.16b, v7.16b // vpxor %xmm7, %xmm7, %xmm7
|
||||
ldp x29, x30, [sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size _vpaes_schedule_core,.-_vpaes_schedule_core
|
||||
|
||||
@@ -1001,7 +1009,7 @@ _vpaes_schedule_mangle:
|
||||
|
||||
.Lschedule_mangle_both:
|
||||
tbl v3.16b, {v3.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm3
|
||||
add x8, x8, #64-16 // add $-16, %r8
|
||||
add x8, x8, #48 // add $-16, %r8
|
||||
and x8, x8, #~(1<<6) // and $0x30, %r8
|
||||
st1 {v3.2d}, [x2] // vmovdqu %xmm3, (%rdx)
|
||||
ret
|
||||
@@ -1012,6 +1020,7 @@ _vpaes_schedule_mangle:
|
||||
.type vpaes_set_encrypt_key,%function
|
||||
.align 4
|
||||
vpaes_set_encrypt_key:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
stp d8,d9,[sp,#-16]! // ABI spec says so
|
||||
@@ -1027,6 +1036,7 @@ vpaes_set_encrypt_key:
|
||||
|
||||
ldp d8,d9,[sp],#16
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size vpaes_set_encrypt_key,.-vpaes_set_encrypt_key
|
||||
|
||||
@@ -1035,6 +1045,7 @@ vpaes_set_encrypt_key:
|
||||
.type vpaes_set_decrypt_key,%function
|
||||
.align 4
|
||||
vpaes_set_decrypt_key:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
stp d8,d9,[sp,#-16]! // ABI spec says so
|
||||
@@ -1054,6 +1065,7 @@ vpaes_set_decrypt_key:
|
||||
|
||||
ldp d8,d9,[sp],#16
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size vpaes_set_decrypt_key,.-vpaes_set_decrypt_key
|
||||
.globl vpaes_cbc_encrypt
|
||||
@@ -1061,6 +1073,7 @@ vpaes_set_decrypt_key:
|
||||
.type vpaes_cbc_encrypt,%function
|
||||
.align 4
|
||||
vpaes_cbc_encrypt:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
cbz x2, .Lcbc_abort
|
||||
cmp w5, #0 // check direction
|
||||
b.eq vpaes_cbc_decrypt
|
||||
@@ -1088,12 +1101,15 @@ vpaes_cbc_encrypt:
|
||||
|
||||
ldp x29,x30,[sp],#16
|
||||
.Lcbc_abort:
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size vpaes_cbc_encrypt,.-vpaes_cbc_encrypt
|
||||
|
||||
.type vpaes_cbc_decrypt,%function
|
||||
.align 4
|
||||
vpaes_cbc_decrypt:
|
||||
// Not adding AARCH64_SIGN_LINK_REGISTER here because vpaes_cbc_decrypt is jumped to
|
||||
// only from vpaes_cbc_encrypt which has already signed the return address.
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
stp d8,d9,[sp,#-16]! // ABI spec says so
|
||||
@@ -1135,6 +1151,7 @@ vpaes_cbc_decrypt:
|
||||
ldp d10,d11,[sp],#16
|
||||
ldp d8,d9,[sp],#16
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size vpaes_cbc_decrypt,.-vpaes_cbc_decrypt
|
||||
.globl vpaes_ctr32_encrypt_blocks
|
||||
@@ -1142,6 +1159,7 @@ vpaes_cbc_decrypt:
|
||||
.type vpaes_ctr32_encrypt_blocks,%function
|
||||
.align 4
|
||||
vpaes_ctr32_encrypt_blocks:
|
||||
AARCH64_SIGN_LINK_REGISTER
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
stp d8,d9,[sp,#-16]! // ABI spec says so
|
||||
@@ -1209,6 +1227,7 @@ vpaes_ctr32_encrypt_blocks:
|
||||
ldp d10,d11,[sp],#16
|
||||
ldp d8,d9,[sp],#16
|
||||
ldp x29,x30,[sp],#16
|
||||
AARCH64_VALIDATE_LINK_REGISTER
|
||||
ret
|
||||
.size vpaes_ctr32_encrypt_blocks,.-vpaes_ctr32_encrypt_blocks
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user