From 46d39428cb174985ef3b7b1af044d4faaac829f8 Mon Sep 17 00:00:00 2001 From: Brian Smith Date: Mon, 16 Nov 2020 22:34:29 -0800 Subject: [PATCH] Clean up memcpy/memset patterns. Define `GFp_memcpy` and `GFp_memset` with fallback implementations. Sync up some code that diverged from BoringSSL due to the lack of these functions. --- crypto/curve25519/curve25519.c | 14 ++++----- crypto/curve25519/internal.h | 6 ---- crypto/fipsmodule/aes/aes_nohw.c | 49 ++++++++++++++------------------ crypto/internal.h | 45 +++++++++++++++++++++++++++-- 4 files changed, 70 insertions(+), 44 deletions(-) diff --git a/crypto/curve25519/curve25519.c b/crypto/curve25519/curve25519.c index b4198996e..30afff0ed 100644 --- a/crypto/curve25519/curve25519.c +++ b/crypto/curve25519/curve25519.c @@ -159,7 +159,7 @@ static void fe_frombytes_strict(fe *h, const uint8_t s[32]) { static void fe_frombytes(fe *h, const uint8_t s[32]) { uint8_t s_copy[32]; - bytes_copy(s_copy, s, 32); + GFp_memcpy(s_copy, s, 32); s_copy[31] &= 0x7f; fe_frombytes_strict(h, s_copy); } @@ -171,21 +171,21 @@ static void fe_tobytes(uint8_t s[32], const fe *f) { // h = 0 static void fe_0(fe *h) { - fe_limbs_zero(h->v); + GFp_memset(h, 0, sizeof(fe)); } static void fe_loose_0(fe_loose *h) { - fe_limbs_zero(h->v); + GFp_memset(h, 0, sizeof(fe_loose)); } // h = 1 static void fe_1(fe *h) { - fe_0(h); + GFp_memset(h, 0, sizeof(fe)); h->v[0] = 1; } static void fe_loose_1(fe_loose *h) { - fe_loose_0(h); + GFp_memset(h, 0, sizeof(fe_loose)); h->v[0] = 1; } @@ -1782,7 +1782,7 @@ void GFp_x25519_scalar_mult_generic_masked(uint8_t out[32], fe_loose x2l, z2l, x3l, tmp0l, tmp1l; uint8_t e[32]; - bytes_copy(e, scalar_masked, 32); + GFp_memcpy(e, scalar_masked, 32); // The following implementation was transcribed to Coq and proven to // correspond to unary scalar multiplication in affine coordinates given that // x1 != 0 is the x coordinate of some point on the curve. It was also checked @@ -1856,7 +1856,7 @@ void GFp_x25519_scalar_mult_generic_masked(uint8_t out[32], void GFp_x25519_public_from_private_generic_masked(uint8_t out_public_value[32], const uint8_t private_key_masked[32]) { uint8_t e[32]; - bytes_copy(e, private_key_masked, 32); + GFp_memcpy(e, private_key_masked, 32); ge_p3 A; GFp_x25519_ge_scalarmult_base(&A, e); diff --git a/crypto/curve25519/internal.h b/crypto/curve25519/internal.h index 5f87f9200..60f2f615b 100644 --- a/crypto/curve25519/internal.h +++ b/crypto/curve25519/internal.h @@ -65,12 +65,6 @@ static inline void fe_limbs_copy(fe_limb_t r[], const fe_limb_t a[]) { } } -static inline void fe_limbs_zero(fe_limb_t r[]) { - for (size_t i = 0; i < FE_NUM_LIMBS; ++i) { - r[i] = 0; - } -} - // ge means group element. // // Here the group is the set of pairs (x,y) of field elements (see fe.h) diff --git a/crypto/fipsmodule/aes/aes_nohw.c b/crypto/fipsmodule/aes/aes_nohw.c index 656f051a6..19b019e73 100644 --- a/crypto/fipsmodule/aes/aes_nohw.c +++ b/crypto/fipsmodule/aes/aes_nohw.c @@ -14,13 +14,6 @@ #include -#if !defined(__wasm__) -#include -#else -void *memcpy(void *, const void*, size_t); -void *memset(void *, int, size_t); -#endif - #include "../../internal.h" #if defined(OPENSSL_SSE2) @@ -353,7 +346,7 @@ static inline uint8_t lo(uint32_t a) { static inline void aes_nohw_compact_block(aes_word_t out[AES_NOHW_BLOCK_WORDS], const uint8_t in[16]) { - memcpy(out, in, 16); + GFp_memcpy(out, in, 16); #if defined(OPENSSL_SSE2) // No conversions needed. #elif defined(OPENSSL_64_BIT) @@ -381,7 +374,7 @@ static inline void aes_nohw_compact_block(aes_word_t out[AES_NOHW_BLOCK_WORDS], static inline void aes_nohw_uncompact_block( uint8_t out[16], const aes_word_t in[AES_NOHW_BLOCK_WORDS]) { #if defined(OPENSSL_SSE2) - memcpy(out, in, 16); // No conversions needed. + GFp_memcpy(out, in, 16); // No conversions needed. #elif defined(OPENSSL_64_BIT) uint64_t a0 = in[0]; uint64_t a1 = in[1]; @@ -389,8 +382,8 @@ static inline void aes_nohw_uncompact_block( aes_nohw_uncompact_word((a0 & UINT64_C(0x00000000ffffffff)) | (a1 << 32)); uint64_t b1 = aes_nohw_uncompact_word((a1 & UINT64_C(0xffffffff00000000)) | (a0 >> 32)); - memcpy(out, &b0, 8); - memcpy(out + 8, &b1, 8); + GFp_memcpy(out, &b0, 8); + GFp_memcpy(out + 8, &b1, 8); #else uint32_t a0 = in[0]; uint32_t a1 = in[1]; @@ -411,10 +404,10 @@ static inline void aes_nohw_uncompact_block( b1 = aes_nohw_uncompact_word(b1); b2 = aes_nohw_uncompact_word(b2); b3 = aes_nohw_uncompact_word(b3); - memcpy(out, &b0, 4); - memcpy(out + 4, &b1, 4); - memcpy(out + 8, &b2, 4); - memcpy(out + 12, &b3, 4); + GFp_memcpy(out, &b0, 4); + GFp_memcpy(out + 4, &b1, 4); + GFp_memcpy(out + 8, &b2, 4); + GFp_memcpy(out + 12, &b3, 4); #endif } @@ -482,7 +475,7 @@ static void aes_nohw_transpose(AES_NOHW_BATCH *batch) { static void aes_nohw_to_batch(AES_NOHW_BATCH *out, const uint8_t *in, size_t num_blocks) { // Don't leave unused blocks uninitialized. - memset(out, 0, sizeof(AES_NOHW_BATCH)); + GFp_memset(out, 0, sizeof(AES_NOHW_BATCH)); debug_assert_nonsecret(num_blocks <= AES_NOHW_BATCH_SIZE); for (size_t i = 0; i < num_blocks; i++) { aes_word_t block[AES_NOHW_BLOCK_WORDS]; @@ -777,7 +770,7 @@ static void aes_nohw_expand_round_keys(AES_NOHW_SCHEDULE *out, // Copy the round key into each block in the batch. for (size_t j = 0; j < AES_NOHW_BATCH_SIZE; j++) { aes_word_t tmp[AES_NOHW_BLOCK_WORDS]; - memcpy(tmp, key->rd_key + 4 * i, 16); + GFp_memcpy(tmp, key->rd_key + 4 * i, 16); aes_nohw_batch_set(&out->keys[i], tmp, j); } aes_nohw_transpose(&out->keys[i]); @@ -801,7 +794,7 @@ static inline aes_word_t aes_nohw_rcon_slice(uint8_t rcon, size_t i) { static void aes_nohw_sub_block(aes_word_t out[AES_NOHW_BLOCK_WORDS], const aes_word_t in[AES_NOHW_BLOCK_WORDS]) { AES_NOHW_BATCH batch; - memset(&batch, 0, sizeof(batch)); + GFp_memset(&batch, 0, sizeof(batch)); aes_nohw_batch_set(&batch, in, 0); aes_nohw_transpose(&batch); aes_nohw_sub_bytes(&batch); @@ -814,7 +807,7 @@ static void aes_nohw_setup_key_128(AES_KEY *key, const uint8_t in[16]) { aes_word_t block[AES_NOHW_BLOCK_WORDS]; aes_nohw_compact_block(block, in); - memcpy(key->rd_key, block, 16); + GFp_memcpy(key->rd_key, block, 16); for (size_t i = 1; i <= 10; i++) { aes_word_t sub[AES_NOHW_BLOCK_WORDS]; @@ -833,7 +826,7 @@ static void aes_nohw_setup_key_128(AES_KEY *key, const uint8_t in[16]) { block[j] = aes_nohw_xor(block[j], aes_nohw_shift_left(v, 8)); block[j] = aes_nohw_xor(block[j], aes_nohw_shift_left(v, 12)); } - memcpy(key->rd_key + 4 * i, block, 16); + GFp_memcpy(key->rd_key + 4 * i, block, 16); } } @@ -843,10 +836,10 @@ static void aes_nohw_setup_key_256(AES_KEY *key, const uint8_t in[32]) { // Each key schedule iteration produces two round keys. aes_word_t block1[AES_NOHW_BLOCK_WORDS], block2[AES_NOHW_BLOCK_WORDS]; aes_nohw_compact_block(block1, in); - memcpy(key->rd_key, block1, 16); + GFp_memcpy(key->rd_key, block1, 16); aes_nohw_compact_block(block2, in + 16); - memcpy(key->rd_key + 4, block2, 16); + GFp_memcpy(key->rd_key + 4, block2, 16); for (size_t i = 2; i <= 14; i += 2) { aes_word_t sub[AES_NOHW_BLOCK_WORDS]; @@ -864,7 +857,7 @@ static void aes_nohw_setup_key_256(AES_KEY *key, const uint8_t in[32]) { block1[j] = aes_nohw_xor(block1[j], aes_nohw_shift_left(v, 8)); block1[j] = aes_nohw_xor(block1[j], aes_nohw_shift_left(v, 12)); } - memcpy(key->rd_key + 4 * i, block1, 16); + GFp_memcpy(key->rd_key + 4 * i, block1, 16); if (i == 14) { break; @@ -880,7 +873,7 @@ static void aes_nohw_setup_key_256(AES_KEY *key, const uint8_t in[32]) { block2[j] = aes_nohw_xor(block2[j], aes_nohw_shift_left(v, 8)); block2[j] = aes_nohw_xor(block2[j], aes_nohw_shift_left(v, 12)); } - memcpy(key->rd_key + 4 * (i + 1), block2, 16); + GFp_memcpy(key->rd_key + 4 * (i + 1), block2, 16); } } @@ -913,10 +906,10 @@ static inline void aes_nohw_xor_block(uint8_t out[16], const uint8_t a[16], const uint8_t b[16]) { for (size_t i = 0; i < 16; i += sizeof(aes_word_t)) { aes_word_t x, y; - memcpy(&x, a + i, sizeof(aes_word_t)); - memcpy(&y, b + i, sizeof(aes_word_t)); + GFp_memcpy(&x, a + i, sizeof(aes_word_t)); + GFp_memcpy(&y, b + i, sizeof(aes_word_t)); x = aes_nohw_xor(x, y); - memcpy(out + i, &x, sizeof(aes_word_t)); + GFp_memcpy(out + i, &x, sizeof(aes_word_t)); } } @@ -936,7 +929,7 @@ void GFp_aes_nohw_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, uint8_t u8[AES_NOHW_BATCH_SIZE * 16]; } ivs, enc_ivs; for (size_t i = 0; i < AES_NOHW_BATCH_SIZE; i++) { - memcpy(ivs.u8 + 16 * i, ivec, 16); + GFp_memcpy(ivs.u8 + 16 * i, ivec, 16); } uint32_t ctr = CRYPTO_bswap4(ivs.u32[3]); diff --git a/crypto/internal.h b/crypto/internal.h index 57607bfc3..8f3ea0973 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -259,10 +259,49 @@ static inline uint32_t CRYPTO_bswap4(uint32_t x) { } #endif -static inline void bytes_copy(uint8_t out[], const uint8_t in[], size_t len) { - for (size_t i = 0; i < len; ++i) { - out[i] = in[i]; +// Assume we have unless we can detect otherwise. The +// targets that don't have string.h do have `__has_include`. +#define GFp_HAS_STRING_H + +#if defined(__has_include) +# if !__has_include() +# undef GFp_HAS_STRING_H +# endif +#endif + +#if defined(GFp_HAS_STRING_H) +#include +#endif + +static inline void *GFp_memcpy(void *dst, const void *src, size_t n) { +#if defined(GFp_HAS_STRING_H) + if (n == 0) { + return dst; } + return memcpy(dst, src, n); +#else + unsigned char *d = dst; + const unsigned char *s = src; + for (size_t i = 0; i < n; ++i) { + d[i] = s[i]; + } + return dst; +#endif +} + +static inline void *GFp_memset(void *dst, int c, size_t n) { +#if defined(GFp_HAS_STRING_H) + if (n == 0) { + return dst; + } + return memset(dst, c, n); +#else + unsigned char *d = dst; + for (size_t i = 0; i < n; ++i) { + d[i] = (unsigned char)c; + } + return dst; +#endif } #endif // OPENSSL_HEADER_CRYPTO_INTERNAL_H