Merge BoringSSL 55db667: Enable vpaes for aarch64, with CTR optimizations.
This doesn't enable VPAES for AAarch64 in *ring* though.
This commit is contained in:
commit
a64a9f7377
@ -42,7 +42,7 @@ while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
|
||||
|
||||
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
|
||||
( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
|
||||
( $xlate="${dir}../../../perlasm/arm-xlate.pl" and -f $xlate) or
|
||||
die "can't locate arm-xlate.pl";
|
||||
|
||||
open OUT,"| \"$^X\" $xlate $flavour $output";
|
||||
@ -1171,7 +1171,8 @@ vpaes_cbc_decrypt:
|
||||
ret
|
||||
.size vpaes_cbc_decrypt,.-vpaes_cbc_decrypt
|
||||
___
|
||||
if (1) {
|
||||
# We omit vpaes_ecb_* in BoringSSL. They are unused.
|
||||
if (0) {
|
||||
$code.=<<___;
|
||||
.globl vpaes_ecb_encrypt
|
||||
.type vpaes_ecb_encrypt,%function
|
||||
@ -1253,7 +1254,89 @@ vpaes_ecb_decrypt:
|
||||
ret
|
||||
.size vpaes_ecb_decrypt,.-vpaes_ecb_decrypt
|
||||
___
|
||||
} }
|
||||
}
|
||||
|
||||
my ($ctr, $ctr_tmp) = ("w6", "w7");
|
||||
|
||||
# void vpaes_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, size_t len,
|
||||
# const AES_KEY *key, const uint8_t ivec[16]);
|
||||
$code.=<<___;
|
||||
.globl vpaes_ctr32_encrypt_blocks
|
||||
.type vpaes_ctr32_encrypt_blocks,%function
|
||||
.align 4
|
||||
vpaes_ctr32_encrypt_blocks:
|
||||
stp x29,x30,[sp,#-16]!
|
||||
add x29,sp,#0
|
||||
stp d8,d9,[sp,#-16]! // ABI spec says so
|
||||
stp d10,d11,[sp,#-16]!
|
||||
stp d12,d13,[sp,#-16]!
|
||||
stp d14,d15,[sp,#-16]!
|
||||
|
||||
cbz $len, .Lctr32_done
|
||||
|
||||
// Note, unlike the other functions, $len here is measured in blocks,
|
||||
// not bytes.
|
||||
mov x17, $len
|
||||
mov x2, $key
|
||||
|
||||
// Load the IV and counter portion.
|
||||
ldr $ctr, [$ivec, #12]
|
||||
ld1 {v7.16b}, [$ivec]
|
||||
|
||||
bl _vpaes_encrypt_preheat
|
||||
tst x17, #1
|
||||
rev $ctr, $ctr // The counter is big-endian.
|
||||
b.eq .Lctr32_prep_loop
|
||||
|
||||
// Handle one block so the remaining block count is even for
|
||||
// _vpaes_encrypt_2x.
|
||||
ld1 {v6.16b}, [$inp], #16 // Load input ahead of time
|
||||
bl _vpaes_encrypt_core
|
||||
eor v0.16b, v0.16b, v6.16b // XOR input and result
|
||||
st1 {v0.16b}, [$out], #16
|
||||
subs x17, x17, #1
|
||||
// Update the counter.
|
||||
add $ctr, $ctr, #1
|
||||
rev $ctr_tmp, $ctr
|
||||
mov v7.s[3], $ctr_tmp
|
||||
b.ls .Lctr32_done
|
||||
|
||||
.Lctr32_prep_loop:
|
||||
// _vpaes_encrypt_core takes its input from v7, while _vpaes_encrypt_2x
|
||||
// uses v14 and v15.
|
||||
mov v15.16b, v7.16b
|
||||
mov v14.16b, v7.16b
|
||||
add $ctr, $ctr, #1
|
||||
rev $ctr_tmp, $ctr
|
||||
mov v15.s[3], $ctr_tmp
|
||||
|
||||
.Lctr32_loop:
|
||||
ld1 {v6.16b,v7.16b}, [$inp], #32 // Load input ahead of time
|
||||
bl _vpaes_encrypt_2x
|
||||
eor v0.16b, v0.16b, v6.16b // XOR input and result
|
||||
eor v1.16b, v1.16b, v7.16b // XOR input and result (#2)
|
||||
st1 {v0.16b,v1.16b}, [$out], #32
|
||||
subs x17, x17, #2
|
||||
// Update the counter.
|
||||
add $ctr_tmp, $ctr, #1
|
||||
add $ctr, $ctr, #2
|
||||
rev $ctr_tmp, $ctr_tmp
|
||||
mov v14.s[3], $ctr_tmp
|
||||
rev $ctr_tmp, $ctr
|
||||
mov v15.s[3], $ctr_tmp
|
||||
b.hi .Lctr32_loop
|
||||
|
||||
.Lctr32_done:
|
||||
ldp d14,d15,[sp],#16
|
||||
ldp d12,d13,[sp],#16
|
||||
ldp d10,d11,[sp],#16
|
||||
ldp d8,d9,[sp],#16
|
||||
ldp x29,x30,[sp],#16
|
||||
ret
|
||||
.size vpaes_ctr32_encrypt_blocks,.-vpaes_ctr32_encrypt_blocks
|
||||
___
|
||||
}
|
||||
|
||||
print $code;
|
||||
|
||||
close STDOUT;
|
||||
|
Loading…
x
Reference in New Issue
Block a user