Merge BoringSSL 55db667: Enable vpaes for aarch64, with CTR optimizations.

This doesn't enable VPAES for AAarch64 in *ring* though.
This commit is contained in:
Brian Smith 2019-07-01 14:35:01 -10:00
commit a64a9f7377

View File

@ -42,7 +42,7 @@ while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
( $xlate="${dir}../../../perlasm/arm-xlate.pl" and -f $xlate) or
die "can't locate arm-xlate.pl";
open OUT,"| \"$^X\" $xlate $flavour $output";
@ -1171,7 +1171,8 @@ vpaes_cbc_decrypt:
ret
.size vpaes_cbc_decrypt,.-vpaes_cbc_decrypt
___
if (1) {
# We omit vpaes_ecb_* in BoringSSL. They are unused.
if (0) {
$code.=<<___;
.globl vpaes_ecb_encrypt
.type vpaes_ecb_encrypt,%function
@ -1253,7 +1254,89 @@ vpaes_ecb_decrypt:
ret
.size vpaes_ecb_decrypt,.-vpaes_ecb_decrypt
___
} }
}
my ($ctr, $ctr_tmp) = ("w6", "w7");
# void vpaes_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, size_t len,
# const AES_KEY *key, const uint8_t ivec[16]);
$code.=<<___;
.globl vpaes_ctr32_encrypt_blocks
.type vpaes_ctr32_encrypt_blocks,%function
.align 4
vpaes_ctr32_encrypt_blocks:
stp x29,x30,[sp,#-16]!
add x29,sp,#0
stp d8,d9,[sp,#-16]! // ABI spec says so
stp d10,d11,[sp,#-16]!
stp d12,d13,[sp,#-16]!
stp d14,d15,[sp,#-16]!
cbz $len, .Lctr32_done
// Note, unlike the other functions, $len here is measured in blocks,
// not bytes.
mov x17, $len
mov x2, $key
// Load the IV and counter portion.
ldr $ctr, [$ivec, #12]
ld1 {v7.16b}, [$ivec]
bl _vpaes_encrypt_preheat
tst x17, #1
rev $ctr, $ctr // The counter is big-endian.
b.eq .Lctr32_prep_loop
// Handle one block so the remaining block count is even for
// _vpaes_encrypt_2x.
ld1 {v6.16b}, [$inp], #16 // Load input ahead of time
bl _vpaes_encrypt_core
eor v0.16b, v0.16b, v6.16b // XOR input and result
st1 {v0.16b}, [$out], #16
subs x17, x17, #1
// Update the counter.
add $ctr, $ctr, #1
rev $ctr_tmp, $ctr
mov v7.s[3], $ctr_tmp
b.ls .Lctr32_done
.Lctr32_prep_loop:
// _vpaes_encrypt_core takes its input from v7, while _vpaes_encrypt_2x
// uses v14 and v15.
mov v15.16b, v7.16b
mov v14.16b, v7.16b
add $ctr, $ctr, #1
rev $ctr_tmp, $ctr
mov v15.s[3], $ctr_tmp
.Lctr32_loop:
ld1 {v6.16b,v7.16b}, [$inp], #32 // Load input ahead of time
bl _vpaes_encrypt_2x
eor v0.16b, v0.16b, v6.16b // XOR input and result
eor v1.16b, v1.16b, v7.16b // XOR input and result (#2)
st1 {v0.16b,v1.16b}, [$out], #32
subs x17, x17, #2
// Update the counter.
add $ctr_tmp, $ctr, #1
add $ctr, $ctr, #2
rev $ctr_tmp, $ctr_tmp
mov v14.s[3], $ctr_tmp
rev $ctr_tmp, $ctr
mov v15.s[3], $ctr_tmp
b.hi .Lctr32_loop
.Lctr32_done:
ldp d14,d15,[sp],#16
ldp d12,d13,[sp],#16
ldp d10,d11,[sp],#16
ldp d8,d9,[sp],#16
ldp x29,x30,[sp],#16
ret
.size vpaes_ctr32_encrypt_blocks,.-vpaes_ctr32_encrypt_blocks
___
}
print $code;
close STDOUT;