Add optimised Aarch64 GCM.

Cost: 6.3KiB, based on the size of the .o file. (The bssl tool size
doesn't really change, probably due to padding somewhere.)

This code originally came from ARM but David has merged the AES-128 and
AES-256 specific code into a function that works across AES sizes.

Speeds from an M1 Pro:

Did 16546000 AES-128-GCM (16 bytes) seal operations in 1000018us (16545702.2 ops/sec): 264.7 MB/s
Did 10450500 AES-128-GCM (256 bytes) seal operations in 1000011us (10450385.0 ops/sec): 2675.3 MB/s
Did 2822500 AES-128-GCM (1350 bytes) seal operations in 1000042us (2822381.5 ops/sec): 3810.2 MB/s
Did 547000 AES-128-GCM (8192 bytes) seal operations in 1000826us (546548.6 ops/sec): 4477.3 MB/s
Did 279000 AES-128-GCM (16384 bytes) seal operations in 1000411us (278885.4 ops/sec): 4569.3 MB/s
Did 16991250 AES-256-GCM (16 bytes) seal operations in 1000001us (16991233.0 ops/sec): 271.9 MB/s
Did 9257000 AES-256-GCM (256 bytes) seal operations in 1000072us (9256333.5 ops/sec): 2369.6 MB/s
Did 2398000 AES-256-GCM (1350 bytes) seal operations in 1000002us (2397995.2 ops/sec): 3237.3 MB/s
Did 465000 AES-256-GCM (8192 bytes) seal operations in 1001108us (464485.4 ops/sec): 3805.1 MB/s
Did 240000 AES-256-GCM (16384 bytes) seal operations in 1002704us (239352.8 ops/sec): 3921.6 MB/s

Did 16670000 AES-128-GCM (16 bytes) seal operations in 1000054us (16669099.9 ops/sec): 266.7 MB/s
Did 11450750 AES-128-GCM (256 bytes) seal operations in 1000014us (11450589.7 ops/sec): 2931.4 MB/s
Did 3830000 AES-128-GCM (1350 bytes) seal operations in 1000097us (3829628.5 ops/sec): 5170.0 MB/s
Did 790000 AES-128-GCM (8192 bytes) seal operations in 1000379us (789700.7 ops/sec): 6469.2 MB/s
Did 400000 AES-128-GCM (16384 bytes) seal operations in 1000980us (399608.4 ops/sec): 6547.2 MB/s
Did 16877000 AES-256-GCM (16 bytes) seal operations in 1000052us (16876122.4 ops/sec): 270.0 MB/s
Did 10438000 AES-256-GCM (256 bytes) seal operations in 1000067us (10437300.7 ops/sec): 2671.9 MB/s
Did 3419000 AES-256-GCM (1350 bytes) seal operations in 1000158us (3418459.9 ops/sec): 4614.9 MB/s
Did 698000 AES-256-GCM (8192 bytes) seal operations in 1000557us (697611.4 ops/sec): 5714.8 MB/s
Did 355000 AES-256-GCM (16384 bytes) seal operations in 1001900us (354326.8 ops/sec): 5805.3 MB/s

Change-Id: Id88f6e14482f09591fe95145bf4089de1ab68380
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/55926
Commit-Queue: David Benjamin <davidben@google.com>
Reviewed-by: David Benjamin <davidben@google.com>
This commit is contained in:
Adam Langley 2022-12-28 13:02:12 -08:00 committed by Boringssl LUCI CQ
parent 1f2529d99d
commit c6e3780763
6 changed files with 1599 additions and 1 deletions

View File

@ -21,6 +21,7 @@ record keeping.)
27287199
27287880
27287883
263291445
OpenSSL License
---------------

View File

@ -61,6 +61,7 @@ if(ARCH STREQUAL "aarch64")
BCM_ASM_SOURCES
aesv8-armx.${ASM_EXT}
aesv8-gcm-armv8.${ASM_EXT}
armv8-mont.${ASM_EXT}
ghash-neon-armv8.${ASM_EXT}
ghashv8-armx.${ASM_EXT}
@ -87,6 +88,7 @@ perlasm(aesni-x86_64.${ASM_EXT} aes/asm/aesni-x86_64.pl)
perlasm(aesni-x86.${ASM_EXT} aes/asm/aesni-x86.pl)
perlasm(aesp8-ppc.${ASM_EXT} aes/asm/aesp8-ppc.pl)
perlasm(aesv8-armx.${ASM_EXT} aes/asm/aesv8-armx.pl)
perlasm(aesv8-gcm-armv8.${ASM_EXT} modes/asm/aesv8-gcm-armv8.pl)
perlasm(armv4-mont.${ASM_EXT} bn/asm/armv4-mont.pl)
perlasm(armv8-mont.${ASM_EXT} bn/asm/armv8-mont.pl)
perlasm(bn-586.${ASM_EXT} bn/asm/bn-586.pl)

File diff suppressed because it is too large Load Diff

View File

@ -146,6 +146,32 @@ static size_t hw_gcm_decrypt(const uint8_t *in, uint8_t *out, size_t len,
}
#endif // HW_GCM && X86_64
#if defined(HW_GCM) && defined(OPENSSL_AARCH64)
static size_t hw_gcm_encrypt(const uint8_t *in, uint8_t *out, size_t len,
const AES_KEY *key, uint8_t ivec[16],
uint64_t *Xi) {
const size_t len_blocks = len & kSizeTWithoutLower4Bits;
if (!len_blocks) {
return 0;
}
aes_gcm_enc_kernel(in, len_blocks * 8, out, Xi, ivec, key);
return len_blocks;
}
static size_t hw_gcm_decrypt(const uint8_t *in, uint8_t *out, size_t len,
const AES_KEY *key, uint8_t ivec[16],
uint64_t *Xi) {
const size_t len_blocks = len & kSizeTWithoutLower4Bits;
if (!len_blocks) {
return 0;
}
aes_gcm_dec_kernel(in, len_blocks * 8, out, Xi, ivec, key);
return len_blocks;
}
#endif // HW_GCM && AARCH64
void CRYPTO_ghash_init(gmult_func *out_mult, ghash_func *out_hash,
u128 *out_key, u128 out_table[16], int *out_is_avx,
const uint8_t gcm_key[16]) {
@ -231,7 +257,12 @@ void CRYPTO_gcm128_init_key(GCM128_KEY *gcm_key, const AES_KEY *aes_key,
CRYPTO_ghash_init(&gcm_key->gmult, &gcm_key->ghash, &gcm_key->H,
gcm_key->Htable, &is_avx, ghash_key);
gcm_key->use_hw_gcm_crypt = (is_avx && block_is_hwaes) ? 1 : 0;
#if defined(OPENSSL_AARCH64)
gcm_key->use_hw_gcm_crypt = (gcm_pmull_capable() && block_is_hwaes) ? 1 :
0;
#else
gcm_key->use_hw_gcm_crypt = (is_avx && block_is_hwaes) ? 1 : 0;
#endif
}
void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const AES_KEY *key,

View File

@ -209,6 +209,20 @@ TEST(GCMTest, ABI) {
}
#endif // GHASH_ASM_ARM
#if defined(OPENSSL_AARCH64) && defined(HW_GCM)
if (hwaes_capable() && gcm_pmull_capable()) {
static const uint8_t kKey[16] = {0};
uint8_t iv[16] = {0};
for (size_t key_bits = 128; key_bits <= 256; key_bits += 64) {
AES_KEY aes_key;
aes_hw_set_encrypt_key(kKey, key_bits, &aes_key);
CHECK_ABI(aes_gcm_enc_kernel, buf, sizeof(buf) * 8, buf, X, iv, &aes_key);
CHECK_ABI(aes_gcm_dec_kernel, buf, sizeof(buf) * 8, buf, X, iv, &aes_key);
}
}
#endif
#if defined(GHASH_ASM_PPC64LE)
if (CRYPTO_is_PPC64LE_vcrypto_capable()) {
CHECK_ABI(gcm_init_p8, Htable, kH);

View File

@ -279,6 +279,7 @@ size_t aesni_gcm_decrypt(const uint8_t *in, uint8_t *out, size_t len,
#endif // OPENSSL_X86
#elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
#define GHASH_ASM_ARM
#define GCM_FUNCREF
@ -298,6 +299,15 @@ void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]);
void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
size_t len);
#if defined(OPENSSL_AARCH64)
#define HW_GCM
// These functions are defined in aesv8-gcm-armv8.pl.
void aes_gcm_enc_kernel(const uint8_t *in, uint64_t in_bits, void *out,
void *Xi, uint8_t *ivec, const AES_KEY *key);
void aes_gcm_dec_kernel(const uint8_t *in, uint64_t in_bits, void *out,
void *Xi, uint8_t *ivec, const AES_KEY *key);
#endif
#elif defined(OPENSSL_PPC64LE)
#define GHASH_ASM_PPC64LE
#define GCM_FUNCREF