Merge BoringSSL 808f832: Run the comment converter on libcrypto.
This commit is contained in:
commit
4c01b3a7f1
@ -29,20 +29,20 @@
|
||||
|
||||
#if defined(OPENSSL_X86_64) && !defined(OPENSSL_NO_ASM)
|
||||
|
||||
/* Optimised AES-GCM-SIV */
|
||||
// Optimised AES-GCM-SIV
|
||||
|
||||
struct aead_aes_gcm_siv_asm_ctx {
|
||||
alignas(16) uint8_t key[16*15];
|
||||
int is_128_bit;
|
||||
};
|
||||
|
||||
/* aes128gcmsiv_aes_ks writes an AES-128 key schedule for |key| to
|
||||
* |out_expanded_key|. */
|
||||
// aes128gcmsiv_aes_ks writes an AES-128 key schedule for |key| to
|
||||
// |out_expanded_key|.
|
||||
extern void aes128gcmsiv_aes_ks(
|
||||
const uint8_t key[16], uint8_t out_expanded_key[16*15]);
|
||||
|
||||
/* aes128gcmsiv_aes_ks writes an AES-128 key schedule for |key| to
|
||||
* |out_expanded_key|. */
|
||||
// aes128gcmsiv_aes_ks writes an AES-128 key schedule for |key| to
|
||||
// |out_expanded_key|.
|
||||
extern void aes256gcmsiv_aes_ks(
|
||||
const uint8_t key[16], uint8_t out_expanded_key[16*15]);
|
||||
|
||||
@ -52,7 +52,7 @@ static int aead_aes_gcm_siv_asm_init(EVP_AEAD_CTX *ctx, const uint8_t *key,
|
||||
|
||||
if (key_bits != 128 && key_bits != 256) {
|
||||
OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_BAD_KEY_LENGTH);
|
||||
return 0; /* EVP_AEAD_CTX_init should catch this. */
|
||||
return 0; // EVP_AEAD_CTX_init should catch this.
|
||||
}
|
||||
|
||||
if (tag_len == EVP_AEAD_DEFAULT_TAG_LENGTH) {
|
||||
@ -70,7 +70,7 @@ static int aead_aes_gcm_siv_asm_init(EVP_AEAD_CTX *ctx, const uint8_t *key,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* malloc should return a 16-byte-aligned address. */
|
||||
// malloc should return a 16-byte-aligned address.
|
||||
assert((((uintptr_t)gcm_siv_ctx) & 15) == 0);
|
||||
|
||||
if (key_bits == 128) {
|
||||
@ -92,123 +92,123 @@ static void aead_aes_gcm_siv_asm_cleanup(EVP_AEAD_CTX *ctx) {
|
||||
OPENSSL_free(gcm_siv_asm_ctx);
|
||||
}
|
||||
|
||||
/* aesgcmsiv_polyval_horner updates the POLYVAL value in |in_out_poly| to
|
||||
* include a number (|in_blocks|) of 16-byte blocks of data from |in|, given
|
||||
* the POLYVAL key in |key|. */
|
||||
// aesgcmsiv_polyval_horner updates the POLYVAL value in |in_out_poly| to
|
||||
// include a number (|in_blocks|) of 16-byte blocks of data from |in|, given
|
||||
// the POLYVAL key in |key|.
|
||||
extern void aesgcmsiv_polyval_horner(const uint8_t in_out_poly[16],
|
||||
const uint8_t key[16], const uint8_t *in,
|
||||
size_t in_blocks);
|
||||
|
||||
/* aesgcmsiv_htable_init writes powers 1..8 of |auth_key| to |out_htable|. */
|
||||
// aesgcmsiv_htable_init writes powers 1..8 of |auth_key| to |out_htable|.
|
||||
extern void aesgcmsiv_htable_init(uint8_t out_htable[16 * 8],
|
||||
const uint8_t auth_key[16]);
|
||||
|
||||
/* aesgcmsiv_htable6_init writes powers 1..6 of |auth_key| to |out_htable|. */
|
||||
// aesgcmsiv_htable6_init writes powers 1..6 of |auth_key| to |out_htable|.
|
||||
extern void aesgcmsiv_htable6_init(uint8_t out_htable[16 * 6],
|
||||
const uint8_t auth_key[16]);
|
||||
|
||||
/* aesgcmsiv_htable_polyval updates the POLYVAL value in |in_out_poly| to
|
||||
* include |in_len| bytes of data from |in|. (Where |in_len| must be a multiple
|
||||
* of 16.) It uses the precomputed powers of the key given in |htable|. */
|
||||
// aesgcmsiv_htable_polyval updates the POLYVAL value in |in_out_poly| to
|
||||
// include |in_len| bytes of data from |in|. (Where |in_len| must be a multiple
|
||||
// of 16.) It uses the precomputed powers of the key given in |htable|.
|
||||
extern void aesgcmsiv_htable_polyval(const uint8_t htable[16 * 8],
|
||||
const uint8_t *in, size_t in_len,
|
||||
uint8_t in_out_poly[16]);
|
||||
|
||||
/* aes128gcmsiv_dec decrypts |in_len| & ~15 bytes from |out| and writes them to
|
||||
* |in|. (The full value of |in_len| is still used to find the authentication
|
||||
* tag appended to the ciphertext, however, so must not be pre-masked.)
|
||||
*
|
||||
* |in| and |out| may be equal, but must not otherwise overlap.
|
||||
*
|
||||
* While decrypting, it updates the POLYVAL value found at the beginning of
|
||||
* |in_out_calculated_tag_and_scratch| and writes the updated value back before
|
||||
* return. During executation, it may use the whole of this space for other
|
||||
* purposes. In order to decrypt and update the POLYVAL value, it uses the
|
||||
* expanded key from |key| and the table of powers in |htable|. */
|
||||
// aes128gcmsiv_dec decrypts |in_len| & ~15 bytes from |out| and writes them to
|
||||
// |in|. (The full value of |in_len| is still used to find the authentication
|
||||
// tag appended to the ciphertext, however, so must not be pre-masked.)
|
||||
//
|
||||
// |in| and |out| may be equal, but must not otherwise overlap.
|
||||
//
|
||||
// While decrypting, it updates the POLYVAL value found at the beginning of
|
||||
// |in_out_calculated_tag_and_scratch| and writes the updated value back before
|
||||
// return. During executation, it may use the whole of this space for other
|
||||
// purposes. In order to decrypt and update the POLYVAL value, it uses the
|
||||
// expanded key from |key| and the table of powers in |htable|.
|
||||
extern void aes128gcmsiv_dec(const uint8_t *in, uint8_t *out,
|
||||
uint8_t in_out_calculated_tag_and_scratch[16 * 8],
|
||||
const uint8_t htable[16 * 6],
|
||||
const struct aead_aes_gcm_siv_asm_ctx *key,
|
||||
size_t in_len);
|
||||
|
||||
/* aes256gcmsiv_dec acts like |aes128gcmsiv_dec|, but for AES-256. */
|
||||
// aes256gcmsiv_dec acts like |aes128gcmsiv_dec|, but for AES-256.
|
||||
extern void aes256gcmsiv_dec(const uint8_t *in, uint8_t *out,
|
||||
uint8_t in_out_calculated_tag_and_scratch[16 * 8],
|
||||
const uint8_t htable[16 * 6],
|
||||
const struct aead_aes_gcm_siv_asm_ctx *key,
|
||||
size_t in_len);
|
||||
|
||||
/* aes128gcmsiv_kdf performs the AES-GCM-SIV KDF given the expanded key from
|
||||
* |key_schedule| and the nonce in |nonce|. Note that, while only 12 bytes of
|
||||
* the nonce are used, 16 bytes are read and so the value must be
|
||||
* right-padded. */
|
||||
// aes128gcmsiv_kdf performs the AES-GCM-SIV KDF given the expanded key from
|
||||
// |key_schedule| and the nonce in |nonce|. Note that, while only 12 bytes of
|
||||
// the nonce are used, 16 bytes are read and so the value must be
|
||||
// right-padded.
|
||||
extern void aes128gcmsiv_kdf(const uint8_t nonce[16],
|
||||
uint64_t out_key_material[8],
|
||||
const uint8_t *key_schedule);
|
||||
|
||||
/* aes256gcmsiv_kdf acts like |aes128gcmsiv_kdf|, but for AES-256. */
|
||||
// aes256gcmsiv_kdf acts like |aes128gcmsiv_kdf|, but for AES-256.
|
||||
extern void aes256gcmsiv_kdf(const uint8_t nonce[16],
|
||||
uint64_t out_key_material[12],
|
||||
const uint8_t *key_schedule);
|
||||
|
||||
/* aes128gcmsiv_aes_ks_enc_x1 performs a key expansion of the AES-128 key in
|
||||
* |key|, writes the expanded key to |out_expanded_key| and encrypts a single
|
||||
* block from |in| to |out|. */
|
||||
// aes128gcmsiv_aes_ks_enc_x1 performs a key expansion of the AES-128 key in
|
||||
// |key|, writes the expanded key to |out_expanded_key| and encrypts a single
|
||||
// block from |in| to |out|.
|
||||
extern void aes128gcmsiv_aes_ks_enc_x1(const uint8_t in[16], uint8_t out[16],
|
||||
uint8_t out_expanded_key[16 * 15],
|
||||
const uint64_t key[2]);
|
||||
|
||||
/* aes256gcmsiv_aes_ks_enc_x1 acts like |aes128gcmsiv_aes_ks_enc_x1|, but for
|
||||
* AES-256. */
|
||||
// aes256gcmsiv_aes_ks_enc_x1 acts like |aes128gcmsiv_aes_ks_enc_x1|, but for
|
||||
// AES-256.
|
||||
extern void aes256gcmsiv_aes_ks_enc_x1(const uint8_t in[16], uint8_t out[16],
|
||||
uint8_t out_expanded_key[16 * 15],
|
||||
const uint64_t key[4]);
|
||||
|
||||
/* aes128gcmsiv_ecb_enc_block encrypts a single block from |in| to |out| using
|
||||
* the expanded key in |expanded_key|. */
|
||||
// aes128gcmsiv_ecb_enc_block encrypts a single block from |in| to |out| using
|
||||
// the expanded key in |expanded_key|.
|
||||
extern void aes128gcmsiv_ecb_enc_block(
|
||||
const uint8_t in[16], uint8_t out[16],
|
||||
const struct aead_aes_gcm_siv_asm_ctx *expanded_key);
|
||||
|
||||
/* aes256gcmsiv_ecb_enc_block acts like |aes128gcmsiv_ecb_enc_block|, but for
|
||||
* AES-256. */
|
||||
// aes256gcmsiv_ecb_enc_block acts like |aes128gcmsiv_ecb_enc_block|, but for
|
||||
// AES-256.
|
||||
extern void aes256gcmsiv_ecb_enc_block(
|
||||
const uint8_t in[16], uint8_t out[16],
|
||||
const struct aead_aes_gcm_siv_asm_ctx *expanded_key);
|
||||
|
||||
/* aes128gcmsiv_enc_msg_x4 encrypts |in_len| bytes from |in| to |out| using the
|
||||
* expanded key from |key|. (The value of |in_len| must be a multiple of 16.)
|
||||
* The |in| and |out| buffers may be equal but must not otherwise overlap. The
|
||||
* initial counter is constructed from the given |tag| as required by
|
||||
* AES-GCM-SIV. */
|
||||
// aes128gcmsiv_enc_msg_x4 encrypts |in_len| bytes from |in| to |out| using the
|
||||
// expanded key from |key|. (The value of |in_len| must be a multiple of 16.)
|
||||
// The |in| and |out| buffers may be equal but must not otherwise overlap. The
|
||||
// initial counter is constructed from the given |tag| as required by
|
||||
// AES-GCM-SIV.
|
||||
extern void aes128gcmsiv_enc_msg_x4(const uint8_t *in, uint8_t *out,
|
||||
const uint8_t *tag,
|
||||
const struct aead_aes_gcm_siv_asm_ctx *key,
|
||||
size_t in_len);
|
||||
|
||||
/* aes256gcmsiv_enc_msg_x4 acts like |aes128gcmsiv_enc_msg_x4|, but for
|
||||
* AES-256. */
|
||||
// aes256gcmsiv_enc_msg_x4 acts like |aes128gcmsiv_enc_msg_x4|, but for
|
||||
// AES-256.
|
||||
extern void aes256gcmsiv_enc_msg_x4(const uint8_t *in, uint8_t *out,
|
||||
const uint8_t *tag,
|
||||
const struct aead_aes_gcm_siv_asm_ctx *key,
|
||||
size_t in_len);
|
||||
|
||||
/* aes128gcmsiv_enc_msg_x8 acts like |aes128gcmsiv_enc_msg_x4|, but is
|
||||
* optimised for longer messages. */
|
||||
// aes128gcmsiv_enc_msg_x8 acts like |aes128gcmsiv_enc_msg_x4|, but is
|
||||
// optimised for longer messages.
|
||||
extern void aes128gcmsiv_enc_msg_x8(const uint8_t *in, uint8_t *out,
|
||||
const uint8_t *tag,
|
||||
const struct aead_aes_gcm_siv_asm_ctx *key,
|
||||
size_t in_len);
|
||||
|
||||
/* aes256gcmsiv_enc_msg_x8 acts like |aes256gcmsiv_enc_msg_x4|, but is
|
||||
* optimised for longer messages. */
|
||||
// aes256gcmsiv_enc_msg_x8 acts like |aes256gcmsiv_enc_msg_x4|, but is
|
||||
// optimised for longer messages.
|
||||
extern void aes256gcmsiv_enc_msg_x8(const uint8_t *in, uint8_t *out,
|
||||
const uint8_t *tag,
|
||||
const struct aead_aes_gcm_siv_asm_ctx *key,
|
||||
size_t in_len);
|
||||
|
||||
/* gcm_siv_asm_polyval evaluates POLYVAL at |auth_key| on the given plaintext
|
||||
* and AD. The result is written to |out_tag|. */
|
||||
// gcm_siv_asm_polyval evaluates POLYVAL at |auth_key| on the given plaintext
|
||||
// and AD. The result is written to |out_tag|.
|
||||
static void gcm_siv_asm_polyval(uint8_t out_tag[16], const uint8_t *in,
|
||||
size_t in_len, const uint8_t *ad, size_t ad_len,
|
||||
const uint8_t auth_key[16],
|
||||
@ -268,10 +268,10 @@ static void gcm_siv_asm_polyval(uint8_t out_tag[16], const uint8_t *in,
|
||||
out_tag[15] &= 0x7f;
|
||||
}
|
||||
|
||||
/* aead_aes_gcm_siv_asm_crypt_last_block handles the encryption/decryption
|
||||
* (same thing in CTR mode) of the final block of a plaintext/ciphertext. It
|
||||
* writes |in_len| & 15 bytes to |out| + |in_len|, based on an initial counter
|
||||
* derived from |tag|. */
|
||||
// aead_aes_gcm_siv_asm_crypt_last_block handles the encryption/decryption
|
||||
// (same thing in CTR mode) of the final block of a plaintext/ciphertext. It
|
||||
// writes |in_len| & 15 bytes to |out| + |in_len|, based on an initial counter
|
||||
// derived from |tag|.
|
||||
static void aead_aes_gcm_siv_asm_crypt_last_block(
|
||||
int is_128_bit, uint8_t *out, const uint8_t *in, size_t in_len,
|
||||
const uint8_t tag[16],
|
||||
@ -299,8 +299,8 @@ static void aead_aes_gcm_siv_asm_crypt_last_block(
|
||||
}
|
||||
}
|
||||
|
||||
/* aead_aes_gcm_siv_kdf calculates the record encryption and authentication
|
||||
* keys given the |nonce|. */
|
||||
// aead_aes_gcm_siv_kdf calculates the record encryption and authentication
|
||||
// keys given the |nonce|.
|
||||
static void aead_aes_gcm_siv_kdf(
|
||||
int is_128_bit, const struct aead_aes_gcm_siv_asm_ctx *gcm_siv_ctx,
|
||||
uint64_t out_record_auth_key[2], uint64_t out_record_enc_key[4],
|
||||
@ -433,8 +433,8 @@ static int aead_aes_gcm_siv_asm_open(const EVP_AEAD_CTX *ctx, uint8_t *out,
|
||||
} else {
|
||||
aes256gcmsiv_aes_ks((const uint8_t *) record_enc_key, &expanded_key.key[0]);
|
||||
}
|
||||
/* calculated_tag is 16*8 bytes, rather than 16 bytes, because
|
||||
* aes[128|256]gcmsiv_dec uses the extra as scratch space. */
|
||||
// calculated_tag is 16*8 bytes, rather than 16 bytes, because
|
||||
// aes[128|256]gcmsiv_dec uses the extra as scratch space.
|
||||
alignas(16) uint8_t calculated_tag[16 * 8] = {0};
|
||||
|
||||
OPENSSL_memset(calculated_tag, 0, EVP_AEAD_AES_GCM_SIV_TAG_LEN);
|
||||
@ -507,11 +507,11 @@ static int aead_aes_gcm_siv_asm_open(const EVP_AEAD_CTX *ctx, uint8_t *out,
|
||||
}
|
||||
|
||||
static const EVP_AEAD aead_aes_128_gcm_siv_asm = {
|
||||
16, /* key length */
|
||||
EVP_AEAD_AES_GCM_SIV_NONCE_LEN, /* nonce length */
|
||||
EVP_AEAD_AES_GCM_SIV_TAG_LEN, /* overhead */
|
||||
EVP_AEAD_AES_GCM_SIV_TAG_LEN, /* max tag length */
|
||||
0, /* seal_scatter_supports_extra_in */
|
||||
16, // key length
|
||||
EVP_AEAD_AES_GCM_SIV_NONCE_LEN, // nonce length
|
||||
EVP_AEAD_AES_GCM_SIV_TAG_LEN, // overhead
|
||||
EVP_AEAD_AES_GCM_SIV_TAG_LEN, // max tag length
|
||||
0, // seal_scatter_supports_extra_in
|
||||
|
||||
aead_aes_gcm_siv_asm_init,
|
||||
NULL /* init_with_direction */,
|
||||
@ -523,11 +523,11 @@ static const EVP_AEAD aead_aes_128_gcm_siv_asm = {
|
||||
};
|
||||
|
||||
static const EVP_AEAD aead_aes_256_gcm_siv_asm = {
|
||||
32, /* key length */
|
||||
EVP_AEAD_AES_GCM_SIV_NONCE_LEN, /* nonce length */
|
||||
EVP_AEAD_AES_GCM_SIV_TAG_LEN, /* overhead */
|
||||
EVP_AEAD_AES_GCM_SIV_TAG_LEN, /* max tag length */
|
||||
0, /* seal_scatter_supports_extra_in */
|
||||
32, // key length
|
||||
EVP_AEAD_AES_GCM_SIV_NONCE_LEN, // nonce length
|
||||
EVP_AEAD_AES_GCM_SIV_TAG_LEN, // overhead
|
||||
EVP_AEAD_AES_GCM_SIV_TAG_LEN, // max tag length
|
||||
0, // seal_scatter_supports_extra_in
|
||||
|
||||
aead_aes_gcm_siv_asm_init,
|
||||
NULL /* init_with_direction */,
|
||||
@ -538,7 +538,7 @@ static const EVP_AEAD aead_aes_256_gcm_siv_asm = {
|
||||
NULL /* get_iv */,
|
||||
};
|
||||
|
||||
#endif /* X86_64 && !NO_ASM */
|
||||
#endif // X86_64 && !NO_ASM
|
||||
|
||||
struct aead_aes_gcm_siv_ctx {
|
||||
union {
|
||||
@ -555,7 +555,7 @@ static int aead_aes_gcm_siv_init(EVP_AEAD_CTX *ctx, const uint8_t *key,
|
||||
|
||||
if (key_bits != 128 && key_bits != 256) {
|
||||
OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_BAD_KEY_LENGTH);
|
||||
return 0; /* EVP_AEAD_CTX_init should catch this. */
|
||||
return 0; // EVP_AEAD_CTX_init should catch this.
|
||||
}
|
||||
|
||||
if (tag_len == EVP_AEAD_DEFAULT_TAG_LENGTH) {
|
||||
@ -588,13 +588,13 @@ static void aead_aes_gcm_siv_cleanup(EVP_AEAD_CTX *ctx) {
|
||||
OPENSSL_free(gcm_siv_ctx);
|
||||
}
|
||||
|
||||
/* gcm_siv_crypt encrypts (or decrypts—it's the same thing) |in_len| bytes from
|
||||
* |in| to |out|, using the block function |enc_block| with |key| in counter
|
||||
* mode, starting at |initial_counter|. This differs from the traditional
|
||||
* counter mode code in that the counter is handled little-endian, only the
|
||||
* first four bytes are used and the GCM-SIV tweak to the final byte is
|
||||
* applied. The |in| and |out| pointers may be equal but otherwise must not
|
||||
* alias. */
|
||||
// gcm_siv_crypt encrypts (or decrypts—it's the same thing) |in_len| bytes from
|
||||
// |in| to |out|, using the block function |enc_block| with |key| in counter
|
||||
// mode, starting at |initial_counter|. This differs from the traditional
|
||||
// counter mode code in that the counter is handled little-endian, only the
|
||||
// first four bytes are used and the GCM-SIV tweak to the final byte is
|
||||
// applied. The |in| and |out| pointers may be equal but otherwise must not
|
||||
// alias.
|
||||
static void gcm_siv_crypt(uint8_t *out, const uint8_t *in, size_t in_len,
|
||||
const uint8_t initial_counter[AES_BLOCK_SIZE],
|
||||
block128_f enc_block, const AES_KEY *key) {
|
||||
@ -624,8 +624,8 @@ static void gcm_siv_crypt(uint8_t *out, const uint8_t *in, size_t in_len,
|
||||
}
|
||||
}
|
||||
|
||||
/* gcm_siv_polyval evaluates POLYVAL at |auth_key| on the given plaintext and
|
||||
* AD. The result is written to |out_tag|. */
|
||||
// gcm_siv_polyval evaluates POLYVAL at |auth_key| on the given plaintext and
|
||||
// AD. The result is written to |out_tag|.
|
||||
static void gcm_siv_polyval(
|
||||
uint8_t out_tag[16], const uint8_t *in, size_t in_len, const uint8_t *ad,
|
||||
size_t ad_len, const uint8_t auth_key[16],
|
||||
@ -669,7 +669,7 @@ static void gcm_siv_polyval(
|
||||
out_tag[15] &= 0x7f;
|
||||
}
|
||||
|
||||
/* gcm_siv_record_keys contains the keys used for a specific GCM-SIV record. */
|
||||
// gcm_siv_record_keys contains the keys used for a specific GCM-SIV record.
|
||||
struct gcm_siv_record_keys {
|
||||
uint8_t auth_key[16];
|
||||
union {
|
||||
@ -679,8 +679,8 @@ struct gcm_siv_record_keys {
|
||||
block128_f enc_block;
|
||||
};
|
||||
|
||||
/* gcm_siv_keys calculates the keys for a specific GCM-SIV record with the
|
||||
* given nonce and writes them to |*out_keys|. */
|
||||
// gcm_siv_keys calculates the keys for a specific GCM-SIV record with the
|
||||
// given nonce and writes them to |*out_keys|.
|
||||
static void gcm_siv_keys(
|
||||
const struct aead_aes_gcm_siv_ctx *gcm_siv_ctx,
|
||||
struct gcm_siv_record_keys *out_keys,
|
||||
@ -791,11 +791,11 @@ static int aead_aes_gcm_siv_open_gather(const EVP_AEAD_CTX *ctx, uint8_t *out,
|
||||
}
|
||||
|
||||
static const EVP_AEAD aead_aes_128_gcm_siv = {
|
||||
16, /* key length */
|
||||
EVP_AEAD_AES_GCM_SIV_NONCE_LEN, /* nonce length */
|
||||
EVP_AEAD_AES_GCM_SIV_TAG_LEN, /* overhead */
|
||||
EVP_AEAD_AES_GCM_SIV_TAG_LEN, /* max tag length */
|
||||
0, /* seal_scatter_supports_extra_in */
|
||||
16, // key length
|
||||
EVP_AEAD_AES_GCM_SIV_NONCE_LEN, // nonce length
|
||||
EVP_AEAD_AES_GCM_SIV_TAG_LEN, // overhead
|
||||
EVP_AEAD_AES_GCM_SIV_TAG_LEN, // max tag length
|
||||
0, // seal_scatter_supports_extra_in
|
||||
|
||||
aead_aes_gcm_siv_init,
|
||||
NULL /* init_with_direction */,
|
||||
@ -807,11 +807,11 @@ static const EVP_AEAD aead_aes_128_gcm_siv = {
|
||||
};
|
||||
|
||||
static const EVP_AEAD aead_aes_256_gcm_siv = {
|
||||
32, /* key length */
|
||||
EVP_AEAD_AES_GCM_SIV_NONCE_LEN, /* nonce length */
|
||||
EVP_AEAD_AES_GCM_SIV_TAG_LEN, /* overhead */
|
||||
EVP_AEAD_AES_GCM_SIV_TAG_LEN, /* max tag length */
|
||||
0, /* seal_scatter_supports_extra_in */
|
||||
32, // key length
|
||||
EVP_AEAD_AES_GCM_SIV_NONCE_LEN, // nonce length
|
||||
EVP_AEAD_AES_GCM_SIV_TAG_LEN, // overhead
|
||||
EVP_AEAD_AES_GCM_SIV_TAG_LEN, // max tag length
|
||||
0, // seal_scatter_supports_extra_in
|
||||
|
||||
aead_aes_gcm_siv_init,
|
||||
NULL /* init_with_direction */,
|
||||
@ -855,4 +855,4 @@ const EVP_AEAD *EVP_aead_aes_256_gcm_siv(void) {
|
||||
return &aead_aes_256_gcm_siv;
|
||||
}
|
||||
|
||||
#endif /* X86_64 && !NO_ASM */
|
||||
#endif // X86_64 && !NO_ASM
|
||||
|
@ -28,8 +28,8 @@ extern uint32_t GFp_armcap_P;
|
||||
void GFp_cpuid_setup(void) {
|
||||
unsigned long hwcap = getauxval(AT_HWCAP);
|
||||
|
||||
/* See /usr/include/asm/hwcap.h on an aarch64 installation for the source of
|
||||
* these values. */
|
||||
// See /usr/include/asm/hwcap.h on an aarch64 installation for the source of
|
||||
// these values.
|
||||
static const unsigned long kNEON = 1 << 1;
|
||||
static const unsigned long kAES = 1 << 3;
|
||||
static const unsigned long kPMULL = 1 << 4;
|
||||
@ -37,8 +37,8 @@ void GFp_cpuid_setup(void) {
|
||||
static const unsigned long kSHA256 = 1 << 6;
|
||||
|
||||
if ((hwcap & kNEON) == 0) {
|
||||
/* Matching OpenSSL, if NEON is missing, don't report other features
|
||||
* either. */
|
||||
// Matching OpenSSL, if NEON is missing, don't report other features
|
||||
// either.
|
||||
return;
|
||||
}
|
||||
|
||||
@ -58,4 +58,4 @@ void GFp_cpuid_setup(void) {
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* OPENSSL_AARCH64 && !OPENSSL_STATIC_ARMCAP */
|
||||
#endif // OPENSSL_AARCH64 && !OPENSSL_STATIC_ARMCAP
|
||||
|
@ -33,15 +33,15 @@
|
||||
|
||||
#define HWCAP_NEON (1 << 12)
|
||||
|
||||
/* See /usr/include/asm/hwcap.h on an ARM installation for the source of
|
||||
* these values. */
|
||||
// See /usr/include/asm/hwcap.h on an ARM installation for the source of
|
||||
// these values.
|
||||
#define HWCAP2_AES (1 << 0)
|
||||
#define HWCAP2_PMULL (1 << 1)
|
||||
#define HWCAP2_SHA1 (1 << 2)
|
||||
#define HWCAP2_SHA2 (1 << 3)
|
||||
|
||||
/* |getauxval| is not available on Android until API level 20. Link it as a weak
|
||||
* symbol and use other methods as fallback. */
|
||||
// |getauxval| is not available on Android until API level 20. Link it as a weak
|
||||
// symbol and use other methods as fallback.
|
||||
unsigned long getauxval(unsigned long type) __attribute__((weak));
|
||||
|
||||
static int open_eintr(const char *path, int flags) {
|
||||
@ -60,8 +60,8 @@ static ssize_t read_eintr(int fd, void *out, size_t len) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* read_full reads exactly |len| bytes from |fd| to |out|. On error or end of
|
||||
* file, it returns zero. */
|
||||
// read_full reads exactly |len| bytes from |fd| to |out|. On error or end of
|
||||
// file, it returns zero.
|
||||
static int read_full(int fd, void *out, size_t len) {
|
||||
while (len > 0) {
|
||||
ssize_t ret = read_eintr(fd, out, len);
|
||||
@ -74,9 +74,9 @@ static int read_full(int fd, void *out, size_t len) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* read_file opens |path| and reads until end-of-file. On success, it returns
|
||||
* one and sets |*out_ptr| and |*out_len| to a newly-allocated buffer with the
|
||||
* contents. Otherwise, it returns zero. */
|
||||
// read_file opens |path| and reads until end-of-file. On success, it returns
|
||||
// one and sets |*out_ptr| and |*out_len| to a newly-allocated buffer with the
|
||||
// contents. Otherwise, it returns zero.
|
||||
static int read_file(char **out_ptr, size_t *out_len, const char *path) {
|
||||
int fd = open_eintr(path, O_RDONLY);
|
||||
if (fd < 0) {
|
||||
@ -126,7 +126,7 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* getauxval_proc behaves like |getauxval| but reads from /proc/self/auxv. */
|
||||
// getauxval_proc behaves like |getauxval| but reads from /proc/self/auxv.
|
||||
static unsigned long getauxval_proc(unsigned long type) {
|
||||
int fd = open_eintr("/proc/self/auxv", O_RDONLY);
|
||||
if (fd < 0) {
|
||||
@ -162,16 +162,16 @@ static int STRING_PIECE_equals(const STRING_PIECE *a, const char *b) {
|
||||
return a->len == b_len && memcmp(a->data, b, b_len) == 0;
|
||||
}
|
||||
|
||||
/* STRING_PIECE_split finds the first occurence of |sep| in |in| and, if found,
|
||||
* sets |*out_left| and |*out_right| to |in| split before and after it. It
|
||||
* returns one if |sep| was found and zero otherwise. */
|
||||
// STRING_PIECE_split finds the first occurence of |sep| in |in| and, if found,
|
||||
// sets |*out_left| and |*out_right| to |in| split before and after it. It
|
||||
// returns one if |sep| was found and zero otherwise.
|
||||
static int STRING_PIECE_split(STRING_PIECE *out_left, STRING_PIECE *out_right,
|
||||
const STRING_PIECE *in, char sep) {
|
||||
const char *p = memchr(in->data, sep, in->len);
|
||||
if (p == NULL) {
|
||||
return 0;
|
||||
}
|
||||
/* |out_left| or |out_right| may alias |in|, so make a copy. */
|
||||
// |out_left| or |out_right| may alias |in|, so make a copy.
|
||||
STRING_PIECE in_copy = *in;
|
||||
out_left->data = in_copy.data;
|
||||
out_left->len = p - in_copy.data;
|
||||
@ -180,7 +180,7 @@ static int STRING_PIECE_split(STRING_PIECE *out_left, STRING_PIECE *out_right,
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* STRING_PIECE_trim removes leading and trailing whitespace from |s|. */
|
||||
// STRING_PIECE_trim removes leading and trailing whitespace from |s|.
|
||||
static void STRING_PIECE_trim(STRING_PIECE *s) {
|
||||
while (s->len != 0 && (s->data[0] == ' ' || s->data[0] == '\t')) {
|
||||
s->data++;
|
||||
@ -192,12 +192,12 @@ static void STRING_PIECE_trim(STRING_PIECE *s) {
|
||||
}
|
||||
}
|
||||
|
||||
/* extract_cpuinfo_field extracts a /proc/cpuinfo field named |field| from
|
||||
* |in|. If found, it sets |*out| to the value and returns one. Otherwise, it
|
||||
* returns zero. */
|
||||
// extract_cpuinfo_field extracts a /proc/cpuinfo field named |field| from
|
||||
// |in|. If found, it sets |*out| to the value and returns one. Otherwise, it
|
||||
// returns zero.
|
||||
static int extract_cpuinfo_field(STRING_PIECE *out, const STRING_PIECE *in,
|
||||
const char *field) {
|
||||
/* Process |in| one line at a time. */
|
||||
// Process |in| one line at a time.
|
||||
STRING_PIECE remaining = *in, line;
|
||||
while (STRING_PIECE_split(&line, &remaining, &remaining, '\n')) {
|
||||
STRING_PIECE key, value;
|
||||
@ -222,8 +222,8 @@ static int cpuinfo_field_equals(const STRING_PIECE *cpuinfo, const char *field,
|
||||
STRING_PIECE_equals(&extracted, value);
|
||||
}
|
||||
|
||||
/* has_list_item treats |list| as a space-separated list of items and returns
|
||||
* one if |item| is contained in |list| and zero otherwise. */
|
||||
// has_list_item treats |list| as a space-separated list of items and returns
|
||||
// one if |item| is contained in |list| and zero otherwise.
|
||||
static int has_list_item(const STRING_PIECE *list, const char *item) {
|
||||
STRING_PIECE remaining = *list, feature;
|
||||
while (STRING_PIECE_split(&feature, &remaining, &remaining, ' ')) {
|
||||
@ -236,11 +236,11 @@ static int has_list_item(const STRING_PIECE *list, const char *item) {
|
||||
|
||||
static unsigned long get_hwcap_cpuinfo(const STRING_PIECE *cpuinfo) {
|
||||
if (cpuinfo_field_equals(cpuinfo, "CPU architecture", "8")) {
|
||||
/* This is a 32-bit ARM binary running on a 64-bit kernel. NEON is always
|
||||
* available on ARMv8. Linux omits required features, so reading the
|
||||
* "Features" line does not work. (For simplicity, use strict equality. We
|
||||
* assume everything running on future ARM architectures will have a
|
||||
* working |getauxval|.) */
|
||||
// This is a 32-bit ARM binary running on a 64-bit kernel. NEON is always
|
||||
// available on ARMv8. Linux omits required features, so reading the
|
||||
// "Features" line does not work. (For simplicity, use strict equality. We
|
||||
// assume everything running on future ARM architectures will have a
|
||||
// working |getauxval|.)
|
||||
return HWCAP_NEON;
|
||||
}
|
||||
|
||||
@ -274,8 +274,8 @@ static unsigned long get_hwcap2_cpuinfo(const STRING_PIECE *cpuinfo) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* has_broken_neon returns one if |in| matches a CPU known to have a broken
|
||||
* NEON unit. See https://crbug.com/341598. */
|
||||
// has_broken_neon returns one if |in| matches a CPU known to have a broken
|
||||
// NEON unit. See https://crbug.com/341598.
|
||||
static int has_broken_neon(const STRING_PIECE *cpuinfo) {
|
||||
return cpuinfo_field_equals(cpuinfo, "CPU implementer", "0x51") &&
|
||||
cpuinfo_field_equals(cpuinfo, "CPU architecture", "7") &&
|
||||
@ -298,13 +298,13 @@ void GFp_cpuid_setup(void) {
|
||||
cpuinfo.data = cpuinfo_data;
|
||||
cpuinfo.len = cpuinfo_len;
|
||||
|
||||
/* |getauxval| is not available on Android until API level 20. If it is
|
||||
* unavailable, read from /proc/self/auxv as a fallback. This is unreadable
|
||||
* on some versions of Android, so further fall back to /proc/cpuinfo.
|
||||
*
|
||||
* See
|
||||
* https://android.googlesource.com/platform/ndk/+/882ac8f3392858991a0e1af33b4b7387ec856bd2
|
||||
* and b/13679666 (Google-internal) for details. */
|
||||
// |getauxval| is not available on Android until API level 20. If it is
|
||||
// unavailable, read from /proc/self/auxv as a fallback. This is unreadable
|
||||
// on some versions of Android, so further fall back to /proc/cpuinfo.
|
||||
//
|
||||
// See
|
||||
// https://android.googlesource.com/platform/ndk/+/882ac8f3392858991a0e1af33b4b7387ec856bd2
|
||||
// and b/13679666 (Google-internal) for details.
|
||||
unsigned long hwcap = 0;
|
||||
if (getauxval != NULL) {
|
||||
hwcap = getauxval(AT_HWCAP);
|
||||
@ -316,18 +316,18 @@ void GFp_cpuid_setup(void) {
|
||||
hwcap = get_hwcap_cpuinfo(&cpuinfo);
|
||||
}
|
||||
|
||||
/* Clear NEON support if known broken. */
|
||||
// Clear NEON support if known broken.
|
||||
g_has_broken_neon = has_broken_neon(&cpuinfo);
|
||||
if (g_has_broken_neon) {
|
||||
hwcap &= ~HWCAP_NEON;
|
||||
}
|
||||
|
||||
/* Matching OpenSSL, only report other features if NEON is present. */
|
||||
// Matching OpenSSL, only report other features if NEON is present.
|
||||
if (hwcap & HWCAP_NEON) {
|
||||
GFp_armcap_P |= ARMV7_NEON;
|
||||
|
||||
/* Some ARMv8 Android devices don't expose AT_HWCAP2. Fall back to
|
||||
* /proc/cpuinfo. See https://crbug.com/596156. */
|
||||
// Some ARMv8 Android devices don't expose AT_HWCAP2. Fall back to
|
||||
// /proc/cpuinfo. See https://crbug.com/596156.
|
||||
unsigned long hwcap2 = 0;
|
||||
if (getauxval != NULL) {
|
||||
hwcap2 = getauxval(AT_HWCAP2);
|
||||
@ -355,4 +355,4 @@ void GFp_cpuid_setup(void) {
|
||||
|
||||
int GFp_has_broken_NEON(void) { return g_has_broken_neon; }
|
||||
|
||||
#endif /* OPENSSL_ARM && !OPENSSL_STATIC_ARMCAP */
|
||||
#endif // OPENSSL_ARM && !OPENSSL_STATIC_ARMCAP
|
||||
|
@ -71,9 +71,9 @@
|
||||
#include "internal.h"
|
||||
|
||||
|
||||
/* OPENSSL_cpuid runs the cpuid instruction. |leaf| is passed in as EAX and ECX
|
||||
* is set to zero. It writes EAX, EBX, ECX, and EDX to |*out_eax| through
|
||||
* |*out_edx|. */
|
||||
// OPENSSL_cpuid runs the cpuid instruction. |leaf| is passed in as EAX and ECX
|
||||
// is set to zero. It writes EAX, EBX, ECX, and EDX to |*out_eax| through
|
||||
// |*out_edx|.
|
||||
static void OPENSSL_cpuid(uint32_t *out_eax, uint32_t *out_ebx,
|
||||
uint32_t *out_ecx, uint32_t *out_edx, uint32_t leaf) {
|
||||
#if defined(_MSC_VER)
|
||||
@ -84,8 +84,8 @@ static void OPENSSL_cpuid(uint32_t *out_eax, uint32_t *out_ebx,
|
||||
*out_ecx = (uint32_t)tmp[2];
|
||||
*out_edx = (uint32_t)tmp[3];
|
||||
#elif defined(__pic__) && defined(OPENSSL_32_BIT)
|
||||
/* Inline assembly may not clobber the PIC register. For 32-bit, this is EBX.
|
||||
* See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47602. */
|
||||
// Inline assembly may not clobber the PIC register. For 32-bit, this is EBX.
|
||||
// See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47602.
|
||||
__asm__ volatile (
|
||||
"xor %%ecx, %%ecx\n"
|
||||
"mov %%ebx, %%edi\n"
|
||||
@ -104,11 +104,10 @@ static void OPENSSL_cpuid(uint32_t *out_eax, uint32_t *out_ebx,
|
||||
#endif
|
||||
}
|
||||
|
||||
/* OPENSSL_xgetbv returns the value of an Intel Extended Control Register (XCR).
|
||||
* Currently only XCR0 is defined by Intel so |xcr| should always be zero.
|
||||
*
|
||||
* See https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family
|
||||
*/
|
||||
// OPENSSL_xgetbv returns the value of an Intel Extended Control Register (XCR).
|
||||
// Currently only XCR0 is defined by Intel so |xcr| should always be zero.
|
||||
//
|
||||
// See https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family
|
||||
static uint64_t OPENSSL_xgetbv(uint32_t xcr) {
|
||||
#if defined(_MSC_VER)
|
||||
return (uint64_t)_xgetbv(xcr);
|
||||
@ -120,7 +119,7 @@ static uint64_t OPENSSL_xgetbv(uint32_t xcr) {
|
||||
}
|
||||
|
||||
void GFp_cpuid_setup(void) {
|
||||
/* Determine the vendor and maximum input value. */
|
||||
// Determine the vendor and maximum input value.
|
||||
uint32_t eax, ebx, ecx, edx;
|
||||
OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 0);
|
||||
|
||||
@ -140,23 +139,23 @@ void GFp_cpuid_setup(void) {
|
||||
extended_features = ebx;
|
||||
}
|
||||
|
||||
/* Determine the number of cores sharing an L1 data cache to adjust the
|
||||
* hyper-threading bit. */
|
||||
// Determine the number of cores sharing an L1 data cache to adjust the
|
||||
// hyper-threading bit.
|
||||
uint32_t cores_per_cache = 0;
|
||||
if (is_amd) {
|
||||
/* AMD CPUs never share an L1 data cache between threads but do set the HTT
|
||||
* bit on multi-core CPUs. */
|
||||
// AMD CPUs never share an L1 data cache between threads but do set the HTT
|
||||
// bit on multi-core CPUs.
|
||||
cores_per_cache = 1;
|
||||
} else if (num_ids >= 4) {
|
||||
/* TODO(davidben): The Intel manual says this CPUID leaf enumerates all
|
||||
* caches using ECX and doesn't say which is first. Does this matter? */
|
||||
// TODO(davidben): The Intel manual says this CPUID leaf enumerates all
|
||||
// caches using ECX and doesn't say which is first. Does this matter?
|
||||
OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 4);
|
||||
cores_per_cache = 1 + ((eax >> 14) & 0xfff);
|
||||
}
|
||||
|
||||
OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 1);
|
||||
|
||||
/* Adjust the hyper-threading bit. */
|
||||
// Adjust the hyper-threading bit.
|
||||
if (edx & (1 << 28)) {
|
||||
uint32_t num_logical_cores = (ebx >> 16) & 0xff;
|
||||
if (cores_per_cache == 1 || num_logical_cores <= 1) {
|
||||
@ -164,17 +163,17 @@ void GFp_cpuid_setup(void) {
|
||||
}
|
||||
}
|
||||
|
||||
/* Reserved bit #20 was historically repurposed to control the in-memory
|
||||
* representation of RC4 state. Always set it to zero. */
|
||||
// Reserved bit #20 was historically repurposed to control the in-memory
|
||||
// representation of RC4 state. Always set it to zero.
|
||||
edx &= ~(1 << 20);
|
||||
|
||||
/* Reserved bit #30 is repurposed to signal an Intel CPU. */
|
||||
// Reserved bit #30 is repurposed to signal an Intel CPU.
|
||||
if (is_intel) {
|
||||
edx |= (1 << 30);
|
||||
|
||||
/* Clear the XSAVE bit on Knights Landing to mimic Silvermont. This enables
|
||||
* some Silvermont-specific codepaths which perform better. See OpenSSL
|
||||
* commit 64d92d74985ebb3d0be58a9718f9e080a14a8e7f. */
|
||||
// Clear the XSAVE bit on Knights Landing to mimic Silvermont. This enables
|
||||
// some Silvermont-specific codepaths which perform better. See OpenSSL
|
||||
// commit 64d92d74985ebb3d0be58a9718f9e080a14a8e7f.
|
||||
if ((eax & 0x0fff0ff0) == 0x00050670 /* Knights Landing */ ||
|
||||
(eax & 0x0fff0ff0) == 0x00080650 /* Knights Mill (per SDE) */) {
|
||||
ecx &= ~(1 << 26);
|
||||
@ -183,36 +182,36 @@ void GFp_cpuid_setup(void) {
|
||||
edx &= ~(1 << 30);
|
||||
}
|
||||
|
||||
/* The SDBG bit is repurposed to denote AMD XOP support. */
|
||||
// The SDBG bit is repurposed to denote AMD XOP support.
|
||||
ecx &= ~(1 << 11);
|
||||
|
||||
uint64_t xcr0 = 0;
|
||||
if (ecx & (1 << 27)) {
|
||||
/* XCR0 may only be queried if the OSXSAVE bit is set. */
|
||||
// XCR0 may only be queried if the OSXSAVE bit is set.
|
||||
xcr0 = OPENSSL_xgetbv(0);
|
||||
}
|
||||
/* See Intel manual, volume 1, section 14.3. */
|
||||
// See Intel manual, volume 1, section 14.3.
|
||||
if ((xcr0 & 6) != 6) {
|
||||
/* YMM registers cannot be used. */
|
||||
ecx &= ~(1 << 28); /* AVX */
|
||||
ecx &= ~(1 << 12); /* FMA */
|
||||
ecx &= ~(1 << 11); /* AMD XOP */
|
||||
/* Clear AVX2 and AVX512* bits.
|
||||
*
|
||||
* TODO(davidben): Should bits 17 and 26-28 also be cleared? Upstream
|
||||
* doesn't clear those. */
|
||||
// YMM registers cannot be used.
|
||||
ecx &= ~(1 << 28); // AVX
|
||||
ecx &= ~(1 << 12); // FMA
|
||||
ecx &= ~(1 << 11); // AMD XOP
|
||||
// Clear AVX2 and AVX512* bits.
|
||||
//
|
||||
// TODO(davidben): Should bits 17 and 26-28 also be cleared? Upstream
|
||||
// doesn't clear those.
|
||||
extended_features &=
|
||||
~((1 << 5) | (1 << 16) | (1 << 21) | (1 << 30) | (1 << 31));
|
||||
}
|
||||
/* See Intel manual, volume 1, section 15.2. */
|
||||
// See Intel manual, volume 1, section 15.2.
|
||||
if ((xcr0 & 0xe6) != 0xe6) {
|
||||
/* Clear AVX512F. Note we don't touch other AVX512 extensions because they
|
||||
* can be used with YMM. */
|
||||
// Clear AVX512F. Note we don't touch other AVX512 extensions because they
|
||||
// can be used with YMM.
|
||||
extended_features &= ~(1 << 16);
|
||||
}
|
||||
|
||||
/* Disable ADX instructions on Knights Landing. See OpenSSL commit
|
||||
* 64d92d74985ebb3d0be58a9718f9e080a14a8e7f. */
|
||||
// Disable ADX instructions on Knights Landing. See OpenSSL commit
|
||||
// 64d92d74985ebb3d0be58a9718f9e080a14a8e7f.
|
||||
if ((ecx & (1 << 26)) == 0) {
|
||||
extended_features &= ~(1 << 19);
|
||||
}
|
||||
@ -223,4 +222,4 @@ void GFp_cpuid_setup(void) {
|
||||
GFp_ia32cap_P[3] = 0;
|
||||
}
|
||||
|
||||
#endif /* !OPENSSL_NO_ASM && (OPENSSL_X86 || OPENSSL_X86_64) */
|
||||
#endif // !OPENSSL_NO_ASM && (OPENSSL_X86 || OPENSSL_X86_64)
|
||||
|
@ -42,13 +42,13 @@
|
||||
|
||||
|
||||
#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
|
||||
/* This value must be explicitly initialised to zero in order to work around a
|
||||
* bug in libtool or the linker on OS X.
|
||||
*
|
||||
* If not initialised then it becomes a "common symbol". When put into an
|
||||
* archive, linking on OS X will fail to resolve common symbols. By
|
||||
* initialising it to zero, it becomes a "data symbol", which isn't so
|
||||
* affected. */
|
||||
// This value must be explicitly initialised to zero in order to work around a
|
||||
// bug in libtool or the linker on OS X.
|
||||
//
|
||||
// If not initialised then it becomes a "common symbol". When put into an
|
||||
// archive, linking on OS X will fail to resolve common symbols. By
|
||||
// initialising it to zero, it becomes a "data symbol", which isn't so
|
||||
// affected.
|
||||
uint32_t GFp_ia32cap_P[4] = {0};
|
||||
#elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
|
||||
|
||||
@ -82,9 +82,9 @@ uint32_t GFp_armcap_P = 0;
|
||||
|
||||
#if defined(__linux__)
|
||||
|
||||
/* The getrandom syscall was added in Linux 3.17. For some important platforms,
|
||||
* we also support building against older kernels' headers. For other
|
||||
* platforms, the newer kernel's headers are required. */
|
||||
// The getrandom syscall was added in Linux 3.17. For some important platforms,
|
||||
// we also support building against older kernels' headers. For other
|
||||
// platforms, the newer kernel's headers are required. */
|
||||
#if !defined(SYS_getrandom)
|
||||
#if defined(OPENSSL_AARCH64)
|
||||
#define SYS_getrandom 278
|
||||
@ -102,8 +102,8 @@ uint32_t GFp_armcap_P = 0;
|
||||
const long GFp_SYS_GETRANDOM = SYS_getrandom;
|
||||
#endif
|
||||
|
||||
/* These allow tests in other languages to verify that their understanding of
|
||||
* the C types matches the C compiler's understanding. */
|
||||
// These allow tests in other languages to verify that their understanding of
|
||||
// the C types matches the C compiler's understanding.
|
||||
|
||||
#define DEFINE_METRICS(ty) \
|
||||
OPENSSL_EXPORT uint16_t GFp_##ty##_align = alignof(ty); \
|
||||
|
@ -12,12 +12,12 @@
|
||||
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
|
||||
|
||||
/* This code is mostly taken from the ref10 version of Ed25519 in SUPERCOP
|
||||
* 20141124 (http://bench.cr.yp.to/supercop.html). That code is released as
|
||||
* public domain but this file has the ISC license just to keep licencing
|
||||
* simple.
|
||||
*
|
||||
* The field functions are shared by Ed25519 and X25519 where possible. */
|
||||
// This code is mostly taken from the ref10 version of Ed25519 in SUPERCOP
|
||||
// 20141124 (http://bench.cr.yp.to/supercop.html). That code is released as
|
||||
// public domain but this file has the ISC license just to keep licencing
|
||||
// simple.
|
||||
//
|
||||
// The field functions are shared by Ed25519 and X25519 where possible.
|
||||
|
||||
#include <string.h>
|
||||
|
||||
@ -29,7 +29,7 @@
|
||||
#include "internal.h"
|
||||
|
||||
|
||||
/* Prevent -Wmissing-prototypes warnings. */
|
||||
// Prevent -Wmissing-prototypes warnings.
|
||||
void GFp_curve25519_scalar_mask(uint8_t a[32]);
|
||||
void GFp_fe_invert(fe out, const fe z);
|
||||
uint8_t GFp_fe_isnegative(const fe f);
|
||||
@ -68,7 +68,7 @@ static uint64_t load_4(const uint8_t *in) {
|
||||
}
|
||||
|
||||
static void fe_frombytes(fe h, const uint8_t *s) {
|
||||
/* Ignores top bit of h. */
|
||||
// Ignores top bit of h.
|
||||
int64_t h0 = load_4(s);
|
||||
int64_t h1 = load_3(s + 4) << 6;
|
||||
int64_t h2 = load_3(s + 7) << 5;
|
||||
@ -114,28 +114,28 @@ static void fe_frombytes(fe h, const uint8_t *s) {
|
||||
h[9] = (int32_t)h9;
|
||||
}
|
||||
|
||||
/* Preconditions:
|
||||
* |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
|
||||
*
|
||||
* Write p=2^255-19; q=floor(h/p).
|
||||
* Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
|
||||
*
|
||||
* Proof:
|
||||
* Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
|
||||
* Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4.
|
||||
*
|
||||
* Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
|
||||
* Then 0<y<1.
|
||||
*
|
||||
* Write r=h-pq.
|
||||
* Have 0<=r<=p-1=2^255-20.
|
||||
* Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
|
||||
*
|
||||
* Write x=r+19(2^-255)r+y.
|
||||
* Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
|
||||
*
|
||||
* Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
|
||||
* so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q. */
|
||||
// Preconditions:
|
||||
// |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
|
||||
//
|
||||
// Write p=2^255-19; q=floor(h/p).
|
||||
// Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
|
||||
//
|
||||
// Proof:
|
||||
// Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
|
||||
// Also have |h-2^230 h9|<2^231 so |19 2^(-255)(h-2^230 h9)|<1/4.
|
||||
//
|
||||
// Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
|
||||
// Then 0<y<1.
|
||||
//
|
||||
// Write r=h-pq.
|
||||
// Have 0<=r<=p-1=2^255-20.
|
||||
// Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
|
||||
//
|
||||
// Write x=r+19(2^-255)r+y.
|
||||
// Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
|
||||
//
|
||||
// Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
|
||||
// so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
|
||||
void GFp_fe_tobytes(uint8_t *s, const fe h) {
|
||||
int32_t h0 = h[0];
|
||||
int32_t h1 = h[1];
|
||||
@ -161,9 +161,9 @@ void GFp_fe_tobytes(uint8_t *s, const fe h) {
|
||||
q = (h8 + q) >> 26;
|
||||
q = (h9 + q) >> 25;
|
||||
|
||||
/* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */
|
||||
// Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20.
|
||||
h0 += 19 * q;
|
||||
/* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */
|
||||
// Goal: Output h-2^255 q, which is between 0 and 2^255-20.
|
||||
|
||||
h1 += h0 >> 26; h0 &= kBottom26Bits;
|
||||
h2 += h1 >> 25; h1 &= kBottom25Bits;
|
||||
@ -175,12 +175,12 @@ void GFp_fe_tobytes(uint8_t *s, const fe h) {
|
||||
h8 += h7 >> 25; h7 &= kBottom25Bits;
|
||||
h9 += h8 >> 26; h8 &= kBottom26Bits;
|
||||
h9 &= kBottom25Bits;
|
||||
/* h10 = carry9 */
|
||||
// h10 = carry9
|
||||
|
||||
/* Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
|
||||
* Have h0+...+2^230 h9 between 0 and 2^255-1;
|
||||
* evidently 2^255 h10-2^255 q = 0.
|
||||
* Goal: Output h0+...+2^230 h9. */
|
||||
// Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
|
||||
// Have h0+...+2^230 h9 between 0 and 2^255-1;
|
||||
// evidently 2^255 h10-2^255 q = 0.
|
||||
// Goal: Output h0+...+2^230 h9.
|
||||
|
||||
s[0] = (uint8_t)(h0 >> 0);
|
||||
s[1] = (uint8_t)(h0 >> 8);
|
||||
@ -216,29 +216,29 @@ void GFp_fe_tobytes(uint8_t *s, const fe h) {
|
||||
s[31] = (uint8_t)(h9 >> 18);
|
||||
}
|
||||
|
||||
/* h = f */
|
||||
// h = f
|
||||
static void fe_copy(fe h, const fe f) {
|
||||
memmove(h, f, sizeof(int32_t) * 10);
|
||||
}
|
||||
|
||||
/* h = 0 */
|
||||
// h = 0
|
||||
static void fe_0(fe h) { memset(h, 0, sizeof(int32_t) * 10); }
|
||||
|
||||
/* h = 1 */
|
||||
// h = 1
|
||||
static void fe_1(fe h) {
|
||||
memset(h, 0, sizeof(int32_t) * 10);
|
||||
h[0] = 1;
|
||||
}
|
||||
|
||||
/* h = f + g
|
||||
* Can overlap h with f or g.
|
||||
*
|
||||
* Preconditions:
|
||||
* |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
|
||||
* |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
|
||||
*
|
||||
* Postconditions:
|
||||
* |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. */
|
||||
// h = f + g
|
||||
// Can overlap h with f or g.
|
||||
//
|
||||
// Preconditions:
|
||||
// |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
|
||||
// |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
|
||||
//
|
||||
// Postconditions:
|
||||
// |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
|
||||
static void fe_add(fe h, const fe f, const fe g) {
|
||||
unsigned i;
|
||||
for (i = 0; i < 10; i++) {
|
||||
@ -246,15 +246,15 @@ static void fe_add(fe h, const fe f, const fe g) {
|
||||
}
|
||||
}
|
||||
|
||||
/* h = f - g
|
||||
* Can overlap h with f or g.
|
||||
*
|
||||
* Preconditions:
|
||||
* |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
|
||||
* |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
|
||||
*
|
||||
* Postconditions:
|
||||
* |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. */
|
||||
// h = f - g
|
||||
// Can overlap h with f or g.
|
||||
//
|
||||
// Preconditions:
|
||||
// |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
|
||||
// |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
|
||||
//
|
||||
// Postconditions:
|
||||
// |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
|
||||
static void fe_sub(fe h, const fe f, const fe g) {
|
||||
unsigned i;
|
||||
for (i = 0; i < 10; i++) {
|
||||
@ -262,33 +262,33 @@ static void fe_sub(fe h, const fe f, const fe g) {
|
||||
}
|
||||
}
|
||||
|
||||
/* h = f * g
|
||||
* Can overlap h with f or g.
|
||||
*
|
||||
* Preconditions:
|
||||
* |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
|
||||
* |g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
|
||||
*
|
||||
* Postconditions:
|
||||
* |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
|
||||
*
|
||||
* Notes on implementation strategy:
|
||||
*
|
||||
* Using schoolbook multiplication.
|
||||
* Karatsuba would save a little in some cost models.
|
||||
*
|
||||
* Most multiplications by 2 and 19 are 32-bit precomputations;
|
||||
* cheaper than 64-bit postcomputations.
|
||||
*
|
||||
* There is one remaining multiplication by 19 in the carry chain;
|
||||
* one *19 precomputation can be merged into this,
|
||||
* but the resulting data flow is considerably less clean.
|
||||
*
|
||||
* There are 12 carries below.
|
||||
* 10 of them are 2-way parallelizable and vectorizable.
|
||||
* Can get away with 11 carries, but then data flow is much deeper.
|
||||
*
|
||||
* With tighter constraints on inputs can squeeze carries into int32. */
|
||||
// h = f * g
|
||||
// Can overlap h with f or g.
|
||||
//
|
||||
// Preconditions:
|
||||
// |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
|
||||
// |g| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
|
||||
//
|
||||
// Postconditions:
|
||||
// |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
|
||||
//
|
||||
// Notes on implementation strategy:
|
||||
//
|
||||
// Using schoolbook multiplication.
|
||||
// Karatsuba would save a little in some cost models.
|
||||
//
|
||||
// Most multiplications by 2 and 19 are 32-bit precomputations;
|
||||
// cheaper than 64-bit postcomputations.
|
||||
//
|
||||
// There is one remaining multiplication by 19 in the carry chain;
|
||||
// one *19 precomputation can be merged into this,
|
||||
// but the resulting data flow is considerably less clean.
|
||||
//
|
||||
// There are 12 carries below.
|
||||
// 10 of them are 2-way parallelizable and vectorizable.
|
||||
// Can get away with 11 carries, but then data flow is much deeper.
|
||||
//
|
||||
// With tighter constraints on inputs can squeeze carries into int32.
|
||||
void GFp_fe_mul(fe h, const fe f, const fe g) {
|
||||
int32_t f0 = f[0];
|
||||
int32_t f1 = f[1];
|
||||
@ -310,8 +310,8 @@ void GFp_fe_mul(fe h, const fe f, const fe g) {
|
||||
int32_t g7 = g[7];
|
||||
int32_t g8 = g[8];
|
||||
int32_t g9 = g[9];
|
||||
int32_t g1_19 = 19 * g1; /* 1.959375*2^29 */
|
||||
int32_t g2_19 = 19 * g2; /* 1.959375*2^30; still ok */
|
||||
int32_t g1_19 = 19 * g1; // 1.959375*2^29
|
||||
int32_t g2_19 = 19 * g2; // 1.959375*2^30; still ok
|
||||
int32_t g3_19 = 19 * g3;
|
||||
int32_t g4_19 = 19 * g4;
|
||||
int32_t g5_19 = 19 * g5;
|
||||
@ -445,53 +445,53 @@ void GFp_fe_mul(fe h, const fe f, const fe g) {
|
||||
int64_t carry8;
|
||||
int64_t carry9;
|
||||
|
||||
/* |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38))
|
||||
* i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8
|
||||
* |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19))
|
||||
* i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9 */
|
||||
// |h0| <= (1.65*1.65*2^52*(1+19+19+19+19)+1.65*1.65*2^50*(38+38+38+38+38))
|
||||
// i.e. |h0| <= 1.4*2^60; narrower ranges for h2, h4, h6, h8
|
||||
// |h1| <= (1.65*1.65*2^51*(1+1+19+19+19+19+19+19+19+19))
|
||||
// i.e. |h1| <= 1.7*2^59; narrower ranges for h3, h5, h7, h9
|
||||
|
||||
carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits;
|
||||
carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits;
|
||||
/* |h0| <= 2^25 */
|
||||
/* |h4| <= 2^25 */
|
||||
/* |h1| <= 1.71*2^59 */
|
||||
/* |h5| <= 1.71*2^59 */
|
||||
// |h0| <= 2^25
|
||||
// |h4| <= 2^25
|
||||
// |h1| <= 1.71*2^59
|
||||
// |h5| <= 1.71*2^59
|
||||
|
||||
carry1 = h1 + (1 << 24); h2 += carry1 >> 25; h1 -= carry1 & kTop39Bits;
|
||||
carry5 = h5 + (1 << 24); h6 += carry5 >> 25; h5 -= carry5 & kTop39Bits;
|
||||
/* |h1| <= 2^24; from now on fits into int32 */
|
||||
/* |h5| <= 2^24; from now on fits into int32 */
|
||||
/* |h2| <= 1.41*2^60 */
|
||||
/* |h6| <= 1.41*2^60 */
|
||||
// |h1| <= 2^24; from now on fits into int32
|
||||
// |h5| <= 2^24; from now on fits into int32
|
||||
// |h2| <= 1.41*2^60
|
||||
// |h6| <= 1.41*2^60
|
||||
|
||||
carry2 = h2 + (1 << 25); h3 += carry2 >> 26; h2 -= carry2 & kTop38Bits;
|
||||
carry6 = h6 + (1 << 25); h7 += carry6 >> 26; h6 -= carry6 & kTop38Bits;
|
||||
/* |h2| <= 2^25; from now on fits into int32 unchanged */
|
||||
/* |h6| <= 2^25; from now on fits into int32 unchanged */
|
||||
/* |h3| <= 1.71*2^59 */
|
||||
/* |h7| <= 1.71*2^59 */
|
||||
// |h2| <= 2^25; from now on fits into int32 unchanged
|
||||
// |h6| <= 2^25; from now on fits into int32 unchanged
|
||||
// |h3| <= 1.71*2^59
|
||||
// |h7| <= 1.71*2^59
|
||||
|
||||
carry3 = h3 + (1 << 24); h4 += carry3 >> 25; h3 -= carry3 & kTop39Bits;
|
||||
carry7 = h7 + (1 << 24); h8 += carry7 >> 25; h7 -= carry7 & kTop39Bits;
|
||||
/* |h3| <= 2^24; from now on fits into int32 unchanged */
|
||||
/* |h7| <= 2^24; from now on fits into int32 unchanged */
|
||||
/* |h4| <= 1.72*2^34 */
|
||||
/* |h8| <= 1.41*2^60 */
|
||||
// |h3| <= 2^24; from now on fits into int32 unchanged
|
||||
// |h7| <= 2^24; from now on fits into int32 unchanged
|
||||
// |h4| <= 1.72*2^34
|
||||
// |h8| <= 1.41*2^60
|
||||
|
||||
carry4 = h4 + (1 << 25); h5 += carry4 >> 26; h4 -= carry4 & kTop38Bits;
|
||||
carry8 = h8 + (1 << 25); h9 += carry8 >> 26; h8 -= carry8 & kTop38Bits;
|
||||
/* |h4| <= 2^25; from now on fits into int32 unchanged */
|
||||
/* |h8| <= 2^25; from now on fits into int32 unchanged */
|
||||
/* |h5| <= 1.01*2^24 */
|
||||
/* |h9| <= 1.71*2^59 */
|
||||
// |h4| <= 2^25; from now on fits into int32 unchanged
|
||||
// |h8| <= 2^25; from now on fits into int32 unchanged
|
||||
// |h5| <= 1.01*2^24
|
||||
// |h9| <= 1.71*2^59
|
||||
|
||||
carry9 = h9 + (1 << 24); h0 += (carry9 >> 25) * 19; h9 -= carry9 & kTop39Bits;
|
||||
/* |h9| <= 2^24; from now on fits into int32 unchanged */
|
||||
/* |h0| <= 1.1*2^39 */
|
||||
// |h9| <= 2^24; from now on fits into int32 unchanged
|
||||
// |h0| <= 1.1*2^39
|
||||
|
||||
carry0 = h0 + (1 << 25); h1 += carry0 >> 26; h0 -= carry0 & kTop38Bits;
|
||||
/* |h0| <= 2^25; from now on fits into int32 unchanged */
|
||||
/* |h1| <= 1.01*2^24 */
|
||||
// |h0| <= 2^25; from now on fits into int32 unchanged
|
||||
// |h1| <= 1.01*2^24
|
||||
|
||||
h[0] = (int32_t)h0;
|
||||
h[1] = (int32_t)h1;
|
||||
@ -505,14 +505,16 @@ void GFp_fe_mul(fe h, const fe f, const fe g) {
|
||||
h[9] = (int32_t)h9;
|
||||
}
|
||||
|
||||
/* h = f * f
|
||||
* Can overlap h with f.
|
||||
*
|
||||
* Preconditions:
|
||||
* |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
|
||||
*
|
||||
* Postconditions:
|
||||
* |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. */
|
||||
// h = f * f
|
||||
// Can overlap h with f.
|
||||
//
|
||||
// Preconditions:
|
||||
// |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
|
||||
//
|
||||
// Postconditions:
|
||||
// |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
|
||||
//
|
||||
// See fe_mul.c for discussion of implementation strategy.
|
||||
static void fe_sq(fe h, const fe f) {
|
||||
int32_t f0 = f[0];
|
||||
int32_t f1 = f[1];
|
||||
@ -532,11 +534,11 @@ static void fe_sq(fe h, const fe f) {
|
||||
int32_t f5_2 = 2 * f5;
|
||||
int32_t f6_2 = 2 * f6;
|
||||
int32_t f7_2 = 2 * f7;
|
||||
int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
|
||||
int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
|
||||
int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
|
||||
int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
|
||||
int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
|
||||
int32_t f5_38 = 38 * f5; // 1.959375*2^30
|
||||
int32_t f6_19 = 19 * f6; // 1.959375*2^30
|
||||
int32_t f7_38 = 38 * f7; // 1.959375*2^30
|
||||
int32_t f8_19 = 19 * f8; // 1.959375*2^30
|
||||
int32_t f9_38 = 38 * f9; // 1.959375*2^30
|
||||
int64_t f0f0 = f0 * (int64_t) f0;
|
||||
int64_t f0f1_2 = f0_2 * (int64_t) f1;
|
||||
int64_t f0f2_2 = f0_2 * (int64_t) f2;
|
||||
@ -702,13 +704,13 @@ void GFp_fe_invert(fe out, const fe z) {
|
||||
GFp_fe_mul(out, t1, t0);
|
||||
}
|
||||
|
||||
/* h = -f
|
||||
*
|
||||
* Preconditions:
|
||||
* |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
|
||||
*
|
||||
* Postconditions:
|
||||
* |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. */
|
||||
// h = -f
|
||||
//
|
||||
// Preconditions:
|
||||
// |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
|
||||
//
|
||||
// Postconditions:
|
||||
// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
|
||||
static void fe_neg(fe h, const fe f) {
|
||||
unsigned i;
|
||||
for (i = 0; i < 10; i++) {
|
||||
@ -716,10 +718,10 @@ static void fe_neg(fe h, const fe f) {
|
||||
}
|
||||
}
|
||||
|
||||
/* Replace (f,g) with (g,g) if b == 1;
|
||||
* replace (f,g) with (f,g) if b == 0.
|
||||
*
|
||||
* Preconditions: b in {0,1}. */
|
||||
// Replace (f,g) with (g,g) if b == 1;
|
||||
// replace (f,g) with (f,g) if b == 0.
|
||||
//
|
||||
// Preconditions: b in {0,1}.
|
||||
static void fe_cmov(fe f, const fe g, unsigned b) {
|
||||
b = 0-b;
|
||||
unsigned i;
|
||||
@ -730,11 +732,11 @@ static void fe_cmov(fe f, const fe g, unsigned b) {
|
||||
}
|
||||
}
|
||||
|
||||
/* return 0 if f == 0
|
||||
* return 1 if f != 0
|
||||
*
|
||||
* Preconditions:
|
||||
* |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. */
|
||||
// return 0 if f == 0
|
||||
// return 1 if f != 0
|
||||
//
|
||||
// Preconditions:
|
||||
// |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
|
||||
static int fe_isnonzero(const fe f) {
|
||||
uint8_t s[32];
|
||||
GFp_fe_tobytes(s, f);
|
||||
@ -743,25 +745,27 @@ static int fe_isnonzero(const fe f) {
|
||||
return GFp_memcmp(s, zero, sizeof(zero)) != 0;
|
||||
}
|
||||
|
||||
/* return 1 if f is in {1,3,5,...,q-2}
|
||||
* return 0 if f is in {0,2,4,...,q-1}
|
||||
*
|
||||
* Preconditions:
|
||||
* |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. */
|
||||
// return 1 if f is in {1,3,5,...,q-2}
|
||||
// return 0 if f is in {0,2,4,...,q-1}
|
||||
//
|
||||
// Preconditions:
|
||||
// |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
|
||||
uint8_t GFp_fe_isnegative(const fe f) {
|
||||
uint8_t s[32];
|
||||
GFp_fe_tobytes(s, f);
|
||||
return s[0] & 1;
|
||||
}
|
||||
|
||||
/* h = 2 * f * f
|
||||
* Can overlap h with f.
|
||||
*
|
||||
* Preconditions:
|
||||
* |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
|
||||
*
|
||||
* Postconditions:
|
||||
* |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc. */
|
||||
// h = 2 * f * f
|
||||
// Can overlap h with f.
|
||||
//
|
||||
// Preconditions:
|
||||
// |f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
|
||||
//
|
||||
// Postconditions:
|
||||
// |h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
|
||||
//
|
||||
// See fe_mul.c for discussion of implementation strategy.
|
||||
static void fe_sq2(fe h, const fe f) {
|
||||
int32_t f0 = f[0];
|
||||
int32_t f1 = f[1];
|
||||
@ -781,11 +785,11 @@ static void fe_sq2(fe h, const fe f) {
|
||||
int32_t f5_2 = 2 * f5;
|
||||
int32_t f6_2 = 2 * f6;
|
||||
int32_t f7_2 = 2 * f7;
|
||||
int32_t f5_38 = 38 * f5; /* 1.959375*2^30 */
|
||||
int32_t f6_19 = 19 * f6; /* 1.959375*2^30 */
|
||||
int32_t f7_38 = 38 * f7; /* 1.959375*2^30 */
|
||||
int32_t f8_19 = 19 * f8; /* 1.959375*2^30 */
|
||||
int32_t f9_38 = 38 * f9; /* 1.959375*2^30 */
|
||||
int32_t f5_38 = 38 * f5; // 1.959375*2^30
|
||||
int32_t f6_19 = 19 * f6; // 1.959375*2^30
|
||||
int32_t f7_38 = 38 * f7; // 1.959375*2^30
|
||||
int32_t f8_19 = 19 * f8; // 1.959375*2^30
|
||||
int32_t f9_38 = 38 * f9; // 1.959375*2^30
|
||||
int64_t f0f0 = f0 * (int64_t) f0;
|
||||
int64_t f0f1_2 = f0_2 * (int64_t) f1;
|
||||
int64_t f0f2_2 = f0_2 * (int64_t) f2;
|
||||
@ -978,24 +982,24 @@ int GFp_x25519_ge_frombytes_vartime(ge_p3 *h, const uint8_t *s) {
|
||||
fe_1(h->Z);
|
||||
fe_sq(u, h->Y);
|
||||
GFp_fe_mul(v, u, d);
|
||||
fe_sub(u, u, h->Z); /* u = y^2-1 */
|
||||
fe_add(v, v, h->Z); /* v = dy^2+1 */
|
||||
fe_sub(u, u, h->Z); // u = y^2-1
|
||||
fe_add(v, v, h->Z); // v = dy^2+1
|
||||
|
||||
fe_sq(v3, v);
|
||||
GFp_fe_mul(v3, v3, v); /* v3 = v^3 */
|
||||
GFp_fe_mul(v3, v3, v); // v3 = v^3
|
||||
fe_sq(h->X, v3);
|
||||
GFp_fe_mul(h->X, h->X, v);
|
||||
GFp_fe_mul(h->X, h->X, u); /* x = uv^7 */
|
||||
GFp_fe_mul(h->X, h->X, u); // x = uv^7
|
||||
|
||||
fe_pow22523(h->X, h->X); /* x = (uv^7)^((q-5)/8) */
|
||||
fe_pow22523(h->X, h->X); // x = (uv^7)^((q-5)/8)
|
||||
GFp_fe_mul(h->X, h->X, v3);
|
||||
GFp_fe_mul(h->X, h->X, u); /* x = uv^3(uv^7)^((q-5)/8) */
|
||||
GFp_fe_mul(h->X, h->X, u); // x = uv^3(uv^7)^((q-5)/8)
|
||||
|
||||
fe_sq(vxx, h->X);
|
||||
GFp_fe_mul(vxx, vxx, v);
|
||||
fe_sub(check, vxx, u); /* vx^2-u */
|
||||
if (fe_isnonzero(check)) {
|
||||
fe_add(check, vxx, u); /* vx^2+u */
|
||||
fe_add(check, vxx, u); // vx^2+u
|
||||
if (fe_isnonzero(check)) {
|
||||
return 0;
|
||||
}
|
||||
@ -1029,7 +1033,7 @@ static void ge_precomp_0(ge_precomp *h) {
|
||||
fe_0(h->xy2d);
|
||||
}
|
||||
|
||||
/* r = p */
|
||||
// r = p
|
||||
static void ge_p3_to_p2(ge_p2 *r, const ge_p3 *p) {
|
||||
fe_copy(r->X, p->X);
|
||||
fe_copy(r->Y, p->Y);
|
||||
@ -1039,7 +1043,7 @@ static void ge_p3_to_p2(ge_p2 *r, const ge_p3 *p) {
|
||||
static const fe d2 = {-21827239, -5839606, -30745221, 13898782, 229458,
|
||||
15978800, -12551817, -6495438, 29715968, 9444199};
|
||||
|
||||
/* r = p */
|
||||
// r = p
|
||||
static void x25519_ge_p3_to_cached(ge_cached *r, const ge_p3 *p) {
|
||||
fe_add(r->YplusX, p->Y, p->X);
|
||||
fe_sub(r->YminusX, p->Y, p->X);
|
||||
@ -1047,14 +1051,14 @@ static void x25519_ge_p3_to_cached(ge_cached *r, const ge_p3 *p) {
|
||||
GFp_fe_mul(r->T2d, p->T, d2);
|
||||
}
|
||||
|
||||
/* r = p */
|
||||
// r = p
|
||||
static void x25519_ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p) {
|
||||
GFp_fe_mul(r->X, p->X, p->T);
|
||||
GFp_fe_mul(r->Y, p->Y, p->Z);
|
||||
GFp_fe_mul(r->Z, p->Z, p->T);
|
||||
}
|
||||
|
||||
/* r = p */
|
||||
// r = p
|
||||
static void x25519_ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p) {
|
||||
GFp_fe_mul(r->X, p->X, p->T);
|
||||
GFp_fe_mul(r->Y, p->Y, p->Z);
|
||||
@ -1062,7 +1066,7 @@ static void x25519_ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p) {
|
||||
GFp_fe_mul(r->T, p->X, p->Y);
|
||||
}
|
||||
|
||||
/* r = 2 * p */
|
||||
// r = 2 * p
|
||||
static void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p) {
|
||||
fe t0;
|
||||
|
||||
@ -1077,14 +1081,14 @@ static void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p) {
|
||||
fe_sub(r->T, r->T, r->Z);
|
||||
}
|
||||
|
||||
/* r = 2 * p */
|
||||
// r = 2 * p
|
||||
static void ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p) {
|
||||
ge_p2 q;
|
||||
ge_p3_to_p2(&q, p);
|
||||
ge_p2_dbl(r, &q);
|
||||
}
|
||||
|
||||
/* r = p + q */
|
||||
// r = p + q
|
||||
static void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) {
|
||||
fe t0;
|
||||
|
||||
@ -1100,7 +1104,7 @@ static void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) {
|
||||
fe_sub(r->T, t0, r->T);
|
||||
}
|
||||
|
||||
/* r = p - q */
|
||||
// r = p - q
|
||||
static void ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) {
|
||||
fe t0;
|
||||
|
||||
@ -1116,7 +1120,7 @@ static void ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) {
|
||||
fe_add(r->T, t0, r->T);
|
||||
}
|
||||
|
||||
/* r = p + q */
|
||||
// r = p + q
|
||||
static void x25519_ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) {
|
||||
fe t0;
|
||||
|
||||
@ -1133,7 +1137,7 @@ static void x25519_ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) {
|
||||
fe_sub(r->T, t0, r->T);
|
||||
}
|
||||
|
||||
/* r = p - q */
|
||||
// r = p - q
|
||||
static void x25519_ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) {
|
||||
fe t0;
|
||||
|
||||
@ -1153,10 +1157,10 @@ static void x25519_ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) {
|
||||
static uint8_t equal(signed char b, signed char c) {
|
||||
uint8_t ub = b;
|
||||
uint8_t uc = c;
|
||||
uint8_t x = ub ^ uc; /* 0: yes; 1..255: no */
|
||||
uint32_t y = x; /* 0: yes; 1..255: no */
|
||||
y -= 1; /* 4294967295: yes; 0..254: no */
|
||||
y >>= 31; /* 1: yes; 0: no */
|
||||
uint8_t x = ub ^ uc; // 0: yes; 1..255: no
|
||||
uint32_t y = x; // 0: yes; 1..255: no
|
||||
y -= 1; // 4294967295: yes; 0..254: no
|
||||
y >>= 31; // 1: yes; 0: no
|
||||
return (uint8_t)y;
|
||||
}
|
||||
|
||||
@ -1170,8 +1174,8 @@ static void cmov(ge_precomp *t, const ge_precomp *u, uint8_t b) {
|
||||
|
||||
static void x25519_ge_scalarmult_small_precomp(
|
||||
ge_p3 *h, const uint8_t a[32], const uint8_t precomp_table[15 * 2 * 32]) {
|
||||
/* precomp_table is first expanded into matching |ge_precomp|
|
||||
* elements. */
|
||||
// precomp_table is first expanded into matching |ge_precomp|
|
||||
// elements.
|
||||
ge_precomp multiples[15];
|
||||
|
||||
unsigned i;
|
||||
@ -1188,9 +1192,9 @@ static void x25519_ge_scalarmult_small_precomp(
|
||||
GFp_fe_mul(out->xy2d, out->xy2d, d2);
|
||||
}
|
||||
|
||||
/* See the comment above |k25519SmallPrecomp| about the structure of the
|
||||
* precomputed elements. This loop does 64 additions and 64 doublings to
|
||||
* calculate the result. */
|
||||
// See the comment above |k25519SmallPrecomp| about the structure of the
|
||||
// precomputed elements. This loop does 64 additions and 64 doublings to
|
||||
// calculate the result.
|
||||
ge_p3_0(h);
|
||||
|
||||
for (i = 63; i < 64; i--) {
|
||||
@ -1220,14 +1224,14 @@ static void x25519_ge_scalarmult_small_precomp(
|
||||
}
|
||||
}
|
||||
|
||||
/* This block of code replaces the standard base-point table with a much smaller
|
||||
* one. The standard table is 30,720 bytes while this one is just 960.
|
||||
*
|
||||
* This table contains 15 pairs of group elements, (x, y), where each field
|
||||
* element is serialised with |GFp_fe_tobytes|. If |i| is the index of the group
|
||||
* element then consider i+1 as a four-bit number: (i_0, i_1, i_2, i_3) where
|
||||
* i_0 is the most significant bit). The value of the group element is then
|
||||
* (i_x2^192 + i_1x2^128 + i_2x2^64 + i_3)G, where G is the generator. */
|
||||
// This block of code replaces the standard base-point table with a much smaller
|
||||
// one. The standard table is 30,720 bytes while this one is just 960.
|
||||
//
|
||||
// This table contains 15 pairs of group elements, (x, y), where each field
|
||||
// element is serialised with |fe_tobytes|. If |i| is the index of the group
|
||||
// element then consider i+1 as a four-bit number: (i₀, i₁, i₂, i₃) (where i₀
|
||||
// is the most significant bit). The value of the group element is then:
|
||||
// (i₀×2^192 + i₁×2^128 + i₂×2^64 + i₃)G, where G is the generator.
|
||||
static const uint8_t k25519SmallPrecomp[15 * 2 * 32] = {
|
||||
0x1a, 0xd5, 0x25, 0x8f, 0x60, 0x2d, 0x56, 0xc9, 0xb2, 0xa7, 0x25, 0x95,
|
||||
0x60, 0xc7, 0x2c, 0x69, 0x5c, 0xdc, 0xd6, 0xfd, 0x31, 0xe2, 0xa4, 0xc0,
|
||||
@ -1317,7 +1321,7 @@ void GFp_x25519_ge_scalarmult_base(ge_p3 *h, const uint8_t a[32]) {
|
||||
|
||||
#else
|
||||
|
||||
/* k25519Precomp[i][j] = (j+1)*256^i*B */
|
||||
// k25519Precomp[i][j] = (j+1)*256^i*B
|
||||
static const ge_precomp k25519Precomp[32][8] = {
|
||||
{
|
||||
{
|
||||
@ -3435,7 +3439,7 @@ static const ge_precomp k25519Precomp[32][8] = {
|
||||
|
||||
static uint8_t negative(signed char b) {
|
||||
uint32_t x = b;
|
||||
x >>= 31; /* 1: yes; 0: no */
|
||||
x >>= 31; // 1: yes; 0: no
|
||||
return (uint8_t)x;
|
||||
}
|
||||
|
||||
@ -3459,12 +3463,12 @@ static void table_select(ge_precomp *t, int pos, signed char b) {
|
||||
cmov(t, &minust, bnegative);
|
||||
}
|
||||
|
||||
/* h = a * B
|
||||
* where a = a[0]+256*a[1]+...+256^31 a[31]
|
||||
* B is the Ed25519 base point (x,4/5) with x positive.
|
||||
*
|
||||
* Preconditions:
|
||||
* a[31] <= 127 */
|
||||
// h = a * B
|
||||
// where a = a[0]+256*a[1]+...+256^31 a[31]
|
||||
// B is the Ed25519 base point (x,4/5) with x positive.
|
||||
//
|
||||
// Preconditions:
|
||||
// a[31] <= 127
|
||||
void GFp_x25519_ge_scalarmult_base(ge_p3 *h, const uint8_t *a) {
|
||||
signed char e[64];
|
||||
signed char carry;
|
||||
@ -3477,8 +3481,8 @@ void GFp_x25519_ge_scalarmult_base(ge_p3 *h, const uint8_t *a) {
|
||||
e[2 * i + 0] = (a[i] >> 0) & 15;
|
||||
e[2 * i + 1] = (a[i] >> 4) & 15;
|
||||
}
|
||||
/* each e[i] is between 0 and 15 */
|
||||
/* e[63] is between 0 and 7 */
|
||||
// each e[i] is between 0 and 15
|
||||
// e[63] is between 0 and 7
|
||||
|
||||
carry = 0;
|
||||
for (i = 0; i < 63; ++i) {
|
||||
@ -3488,7 +3492,7 @@ void GFp_x25519_ge_scalarmult_base(ge_p3 *h, const uint8_t *a) {
|
||||
e[i] -= carry << 4;
|
||||
}
|
||||
e[63] += carry;
|
||||
/* each e[i] is between -8 and 8 */
|
||||
// each e[i] is between -8 and 8
|
||||
|
||||
ge_p3_0(h);
|
||||
for (i = 1; i < 64; i += 2) {
|
||||
@ -3616,15 +3620,15 @@ static const ge_precomp Bi[8] = {
|
||||
},
|
||||
};
|
||||
|
||||
/* r = a * A + b * B
|
||||
* where a = a[0]+256*a[1]+...+256^31 a[31].
|
||||
* and b = b[0]+256*b[1]+...+256^31 b[31].
|
||||
* B is the Ed25519 base point (x,4/5) with x positive. */
|
||||
// r = a * A + b * B
|
||||
// where a = a[0]+256*a[1]+...+256^31 a[31].
|
||||
// and b = b[0]+256*b[1]+...+256^31 b[31].
|
||||
// B is the Ed25519 base point (x,4/5) with x positive.
|
||||
void GFp_ge_double_scalarmult_vartime(ge_p2 *r, const uint8_t *a,
|
||||
const ge_p3 *A, const uint8_t *b) {
|
||||
signed char aslide[256];
|
||||
signed char bslide[256];
|
||||
ge_cached Ai[8]; /* A,3A,5A,7A,9A,11A,13A,15A */
|
||||
ge_cached Ai[8]; // A,3A,5A,7A,9A,11A,13A,15A
|
||||
ge_p1p1 t;
|
||||
ge_p3 u;
|
||||
ge_p3 A2;
|
||||
@ -3689,16 +3693,16 @@ void GFp_ge_double_scalarmult_vartime(ge_p2 *r, const uint8_t *a,
|
||||
}
|
||||
}
|
||||
|
||||
/* The set of scalars is \Z/l
|
||||
* where l = 2^252 + 27742317777372353535851937790883648493. */
|
||||
// The set of scalars is \Z/l
|
||||
// where l = 2^252 + 27742317777372353535851937790883648493.
|
||||
|
||||
/* Input:
|
||||
* s[0]+256*s[1]+...+256^63*s[63] = s
|
||||
*
|
||||
* Output:
|
||||
* s[0]+256*s[1]+...+256^31*s[31] = s mod l
|
||||
* where l = 2^252 + 27742317777372353535851937790883648493.
|
||||
* Overwrites s in place. */
|
||||
// Input:
|
||||
// s[0]+256*s[1]+...+256^63*s[63] = s
|
||||
//
|
||||
// Output:
|
||||
// s[0]+256*s[1]+...+256^31*s[31] = s mod l
|
||||
// where l = 2^252 + 27742317777372353535851937790883648493.
|
||||
// Overwrites s in place.
|
||||
void GFp_x25519_sc_reduce(uint8_t *s) {
|
||||
int64_t s0 = 2097151 & load_3(s);
|
||||
int64_t s1 = 2097151 & (load_4(s + 2) >> 5);
|
||||
@ -4032,14 +4036,14 @@ void GFp_x25519_sc_reduce(uint8_t *s) {
|
||||
s[31] = (uint8_t)(s11 >> 17);
|
||||
}
|
||||
|
||||
/* Input:
|
||||
* a[0]+256*a[1]+...+256^31*a[31] = a
|
||||
* b[0]+256*b[1]+...+256^31*b[31] = b
|
||||
* c[0]+256*c[1]+...+256^31*c[31] = c
|
||||
*
|
||||
* Output:
|
||||
* s[0]+256*s[1]+...+256^31*s[31] = (ab+c) mod l
|
||||
* where l = 2^252 + 27742317777372353535851937790883648493. */
|
||||
// Input:
|
||||
// a[0]+256*a[1]+...+256^31*a[31] = a
|
||||
// b[0]+256*b[1]+...+256^31*b[31] = b
|
||||
// c[0]+256*c[1]+...+256^31*c[31] = c
|
||||
//
|
||||
// Output:
|
||||
// s[0]+256*s[1]+...+256^31*s[31] = (ab+c) mod l
|
||||
// where l = 2^252 + 27742317777372353535851937790883648493.
|
||||
void GFp_x25519_sc_muladd(uint8_t *s, const uint8_t *a, const uint8_t *b,
|
||||
const uint8_t *c) {
|
||||
int64_t a0 = 2097151 & load_3(a);
|
||||
@ -4536,10 +4540,10 @@ void GFp_x25519_scalar_mult(uint8_t out[32], const uint8_t scalar[32],
|
||||
|
||||
#else
|
||||
|
||||
/* Replace (f,g) with (g,f) if b == 1;
|
||||
* replace (f,g) with (f,g) if b == 0.
|
||||
*
|
||||
* Preconditions: b in {0,1}. */
|
||||
// Replace (f,g) with (g,f) if b == 1;
|
||||
// replace (f,g) with (f,g) if b == 0.
|
||||
//
|
||||
// Preconditions: b in {0,1}.
|
||||
static void fe_cswap(fe f, fe g, unsigned int b) {
|
||||
b = 0-b;
|
||||
unsigned i;
|
||||
@ -4551,14 +4555,14 @@ static void fe_cswap(fe f, fe g, unsigned int b) {
|
||||
}
|
||||
}
|
||||
|
||||
/* h = f * 121666
|
||||
* Can overlap h with f.
|
||||
*
|
||||
* Preconditions:
|
||||
* |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
|
||||
*
|
||||
* Postconditions:
|
||||
* |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. */
|
||||
// h = f * 121666
|
||||
// Can overlap h with f.
|
||||
//
|
||||
// Preconditions:
|
||||
// |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
|
||||
//
|
||||
// Postconditions:
|
||||
// |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
|
||||
static void GFp_fe_mul121666(fe h, fe f) {
|
||||
int32_t f0 = f[0];
|
||||
int32_t f1 = f[1];
|
||||
@ -4676,18 +4680,18 @@ void GFp_x25519_scalar_mult(uint8_t out[32], const uint8_t scalar[32],
|
||||
x25519_scalar_mult_generic(out, scalar, point);
|
||||
}
|
||||
|
||||
#endif /* BORINGSSL_X25519_X86_64 */
|
||||
#endif // BORINGSSL_X25519_X86_64
|
||||
|
||||
|
||||
/* Prototypes to avoid -Wmissing-prototypes warnings. */
|
||||
// Prototypes to avoid -Wmissing-prototypes warnings.
|
||||
void GFp_x25519_public_from_private(uint8_t out_public_value[32],
|
||||
const uint8_t private_key[32]);
|
||||
|
||||
#if defined(BORINGSSL_X25519_X86_64)
|
||||
|
||||
/* When |BORINGSSL_X25519_X86_64| is set, base point multiplication is done with
|
||||
* the Montgomery ladder because it's faster. Otherwise it's done using the
|
||||
* Ed25519 tables. */
|
||||
// When |BORINGSSL_X25519_X86_64| is set, base point multiplication is done with
|
||||
// the Montgomery ladder because it's faster. Otherwise it's done using the
|
||||
// Ed25519 tables.
|
||||
|
||||
void GFp_x25519_public_from_private(uint8_t out_public_value[32],
|
||||
const uint8_t private_key[32]) {
|
||||
@ -4714,8 +4718,8 @@ void GFp_x25519_public_from_private(uint8_t out_public_value[32],
|
||||
ge_p3 A;
|
||||
GFp_x25519_ge_scalarmult_base(&A, e);
|
||||
|
||||
/* We only need the u-coordinate of the curve25519 point. The map is
|
||||
* u=(y+1)/(1-y). Since y=Y/Z, this gives u=(Z+Y)/(Z-Y). */
|
||||
// We only need the u-coordinate of the curve25519 point. The map is
|
||||
// u=(y+1)/(1-y). Since y=Y/Z, this gives u=(Z+Y)/(Z-Y).
|
||||
fe zplusy, zminusy, zminusy_inv;
|
||||
fe_add(zplusy, A.Z, A.Y);
|
||||
fe_sub(zminusy, A.Z, A.Y);
|
||||
@ -4724,4 +4728,4 @@ void GFp_x25519_public_from_private(uint8_t out_public_value[32],
|
||||
GFp_fe_tobytes(out_public_value, zplusy);
|
||||
}
|
||||
|
||||
#endif /* BORINGSSL_X25519_X86_64 */
|
||||
#endif // BORINGSSL_X25519_X86_64
|
||||
|
@ -27,8 +27,8 @@ extern "C" {
|
||||
|
||||
#if defined(OPENSSL_X86_64) && !defined(OPENSSL_SMALL) && \
|
||||
!defined(OPENSSL_WINDOWS) && !defined(OPENSSL_NO_ASM)
|
||||
/* This isn't compatible with Windows because the asm code makes use of the red
|
||||
* zone, which Windows doesn't support. */
|
||||
// This isn't compatible with Windows because the asm code makes use of the red
|
||||
// zone, which Windows doesn't support.
|
||||
#define BORINGSSL_X25519_X86_64
|
||||
|
||||
void GFp_x25519_x86_64(uint8_t out[32], const uint8_t scalar[32],
|
||||
@ -39,17 +39,17 @@ void GFp_x25519_x86_64(uint8_t out[32], const uint8_t scalar[32],
|
||||
#if defined(OPENSSL_ARM) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_APPLE)
|
||||
#define BORINGSSL_X25519_NEON
|
||||
|
||||
/* x25519_NEON is defined in asm/x25519-arm.S. */
|
||||
// x25519_NEON is defined in asm/x25519-arm.S.
|
||||
void GFp_x25519_NEON(uint8_t out[32], const uint8_t scalar[32],
|
||||
const uint8_t point[32]);
|
||||
#endif
|
||||
|
||||
/* fe means field element. Here the field is \Z/(2^255-19). An element t,
|
||||
* entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
|
||||
* t[3]+2^102 t[4]+...+2^230 t[9]. Bounds on each t[i] vary depending on
|
||||
* context.
|
||||
*
|
||||
* Keep in sync with `Elem` and `ELEM_LIMBS` in curve25519/ops.rs. */
|
||||
// fe means field element. Here the field is \Z/(2^255-19). An element t,
|
||||
// entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
|
||||
// t[3]+2^102 t[4]+...+2^230 t[9]. Bounds on each t[i] vary depending on
|
||||
// context.
|
||||
//
|
||||
// Keep in sync with `Elem` and `ELEM_LIMBS` in curve25519/ops.rs.
|
||||
typedef int32_t fe[10];
|
||||
|
||||
/* ge means group element.
|
||||
@ -62,10 +62,9 @@ typedef int32_t fe[10];
|
||||
* ge_p2 (projective): (X:Y:Z) satisfying x=X/Z, y=Y/Z
|
||||
* ge_p3 (extended): (X:Y:Z:T) satisfying x=X/Z, y=Y/Z, XY=ZT
|
||||
* ge_p1p1 (completed): ((X:Z),(Y:T)) satisfying x=X/Z, y=Y/T
|
||||
* ge_precomp (Duif): (y+x,y-x,2dxy)
|
||||
*/
|
||||
* ge_precomp (Duif): (y+x,y-x,2dxy) */
|
||||
|
||||
/* Keep in sync with `Point` in curve25519/ops.rs. */
|
||||
// Keep in sync with `Point` in curve25519/ops.rs.
|
||||
typedef struct {
|
||||
fe X;
|
||||
fe Y;
|
||||
@ -73,7 +72,7 @@ typedef struct {
|
||||
} ge_p2;
|
||||
|
||||
|
||||
/* Keep in sync with `ExtPoint` in curve25519/ops.rs. */
|
||||
// Keep in sync with `ExtPoint` in curve25519/ops.rs.
|
||||
typedef struct {
|
||||
fe X;
|
||||
fe Y;
|
||||
@ -103,7 +102,7 @@ typedef struct {
|
||||
|
||||
|
||||
#if defined(__cplusplus)
|
||||
} /* extern C */
|
||||
} // extern C
|
||||
#endif
|
||||
|
||||
#endif /* OPENSSL_HEADER_CURVE25519_INTERNAL_H */
|
||||
#endif // OPENSSL_HEADER_CURVE25519_INTERNAL_H
|
||||
|
@ -12,12 +12,12 @@
|
||||
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
|
||||
|
||||
/* This code is mostly taken from the ref10 version of Ed25519 in SUPERCOP
|
||||
* 20141124 (http://bench.cr.yp.to/supercop.html). That code is released as
|
||||
* public domain but this file has the ISC license just to keep licencing
|
||||
* simple.
|
||||
*
|
||||
* The field functions are shared by Ed25519 and X25519 where possible. */
|
||||
// This code is mostly taken from the ref10 version of Ed25519 in SUPERCOP
|
||||
// 20141124 (http://bench.cr.yp.to/supercop.html). That code is released as
|
||||
// public domain but this file has the ISC license just to keep licencing
|
||||
// simple.
|
||||
//
|
||||
// The field functions are shared by Ed25519 and X25519 where possible.
|
||||
|
||||
#include <string.h>
|
||||
|
||||
@ -28,7 +28,7 @@
|
||||
|
||||
typedef struct { uint64_t v[5]; } fe25519;
|
||||
|
||||
/* These functions are defined in asm/x25519-x86_64.S */
|
||||
// These functions are defined in asm/x25519-x86_64.S.
|
||||
void GFp_x25519_x86_64_work_cswap(fe25519 *, uint64_t);
|
||||
void GFp_x25519_x86_64_mul(fe25519 *out, const fe25519 *a, const fe25519 *b);
|
||||
void GFp_x25519_x86_64_square(fe25519 *out, const fe25519 *a);
|
||||
@ -43,7 +43,7 @@ static void fe25519_setint(fe25519 *r, unsigned v) {
|
||||
r->v[4] = 0;
|
||||
}
|
||||
|
||||
/* Assumes input x being reduced below 2^255 */
|
||||
// Assumes input x being reduced below 2^255
|
||||
static void fe25519_pack(unsigned char r[32], const fe25519 *x) {
|
||||
fe25519 t;
|
||||
t = *x;
|
||||
@ -241,4 +241,4 @@ void GFp_x25519_x86_64(uint8_t out[32], const uint8_t scalar[32],
|
||||
fe25519_pack(out, &t);
|
||||
}
|
||||
|
||||
#endif /* BORINGSSL_X25519_X86_64 */
|
||||
#endif // BORINGSSL_X25519_X86_64
|
||||
|
@ -58,11 +58,10 @@
|
||||
#if defined(OPENSSL_NO_ASM) || \
|
||||
(!defined(OPENSSL_X86) && !defined(OPENSSL_X86_64) && !defined(OPENSSL_ARM))
|
||||
|
||||
/* Te0[x] = S [x].[02, 01, 01, 03];
|
||||
* Te1[x] = S [x].[03, 02, 01, 01];
|
||||
* Te2[x] = S [x].[01, 03, 02, 01];
|
||||
* Te3[x] = S [x].[01, 01, 03, 02];
|
||||
*/
|
||||
// Te0[x] = S [x].[02, 01, 01, 03];
|
||||
// Te1[x] = S [x].[03, 02, 01, 01];
|
||||
// Te2[x] = S [x].[01, 03, 02, 01];
|
||||
// Te3[x] = S [x].[01, 01, 03, 02];
|
||||
|
||||
static const uint32_t Te0[256] = {
|
||||
0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU, 0xfff2f20dU,
|
||||
@ -283,7 +282,7 @@ static const uint32_t Te3[256] = {
|
||||
static const uint32_t rcon[] = {
|
||||
0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000,
|
||||
0x20000000, 0x40000000, 0x80000000, 0x1B000000, 0x36000000,
|
||||
/* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
|
||||
// for 128-bit blocks, Rijndael never uses more than 10 rcon values
|
||||
};
|
||||
|
||||
int GFp_AES_set_encrypt_key(const uint8_t *key, unsigned bits,
|
||||
@ -366,19 +365,19 @@ void GFp_AES_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
|
||||
uint32_t s0, s1, s2, s3, t0, t1, t2, t3;
|
||||
#ifndef FULL_UNROLL
|
||||
int r;
|
||||
#endif /* ?FULL_UNROLL */
|
||||
#endif // ?FULL_UNROLL
|
||||
|
||||
assert(in && out && key);
|
||||
rk = key->rd_key;
|
||||
|
||||
/* map byte array block to cipher state
|
||||
* and add initial round key: */
|
||||
// map byte array block to cipher state
|
||||
// and add initial round key:
|
||||
s0 = from_be_u32_ptr(in) ^ rk[0];
|
||||
s1 = from_be_u32_ptr(in + 4) ^ rk[1];
|
||||
s2 = from_be_u32_ptr(in + 8) ^ rk[2];
|
||||
s3 = from_be_u32_ptr(in + 12) ^ rk[3];
|
||||
#ifdef FULL_UNROLL
|
||||
/* round 1: */
|
||||
// round 1:
|
||||
t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^
|
||||
Te3[s3 & 0xff] ^ rk[4];
|
||||
t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^
|
||||
@ -387,7 +386,7 @@ void GFp_AES_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
|
||||
Te3[s1 & 0xff] ^ rk[6];
|
||||
t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^
|
||||
Te3[s2 & 0xff] ^ rk[7];
|
||||
/* round 2: */
|
||||
// round 2:
|
||||
s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^
|
||||
Te3[t3 & 0xff] ^ rk[8];
|
||||
s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^
|
||||
@ -396,7 +395,7 @@ void GFp_AES_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
|
||||
Te3[t1 & 0xff] ^ rk[10];
|
||||
s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^
|
||||
Te3[t2 & 0xff] ^ rk[11];
|
||||
/* round 3: */
|
||||
// round 3:
|
||||
t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^
|
||||
Te3[s3 & 0xff] ^ rk[12];
|
||||
t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^
|
||||
@ -405,7 +404,7 @@ void GFp_AES_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
|
||||
Te3[s1 & 0xff] ^ rk[14];
|
||||
t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^
|
||||
Te3[s2 & 0xff] ^ rk[15];
|
||||
/* round 4: */
|
||||
// round 4:
|
||||
s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^
|
||||
Te3[t3 & 0xff] ^ rk[16];
|
||||
s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^
|
||||
@ -414,7 +413,7 @@ void GFp_AES_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
|
||||
Te3[t1 & 0xff] ^ rk[18];
|
||||
s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^
|
||||
Te3[t2 & 0xff] ^ rk[19];
|
||||
/* round 5: */
|
||||
// round 5:
|
||||
t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^
|
||||
Te3[s3 & 0xff] ^ rk[20];
|
||||
t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^
|
||||
@ -423,7 +422,7 @@ void GFp_AES_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
|
||||
Te3[s1 & 0xff] ^ rk[22];
|
||||
t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^
|
||||
Te3[s2 & 0xff] ^ rk[23];
|
||||
/* round 6: */
|
||||
// round 6:
|
||||
s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^
|
||||
Te3[t3 & 0xff] ^ rk[24];
|
||||
s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^
|
||||
@ -432,7 +431,7 @@ void GFp_AES_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
|
||||
Te3[t1 & 0xff] ^ rk[26];
|
||||
s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^
|
||||
Te3[t2 & 0xff] ^ rk[27];
|
||||
/* round 7: */
|
||||
// round 7:
|
||||
t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^
|
||||
Te3[s3 & 0xff] ^ rk[28];
|
||||
t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^
|
||||
@ -441,7 +440,7 @@ void GFp_AES_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
|
||||
Te3[s1 & 0xff] ^ rk[30];
|
||||
t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^
|
||||
Te3[s2 & 0xff] ^ rk[31];
|
||||
/* round 8: */
|
||||
// round 8:
|
||||
s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^
|
||||
Te3[t3 & 0xff] ^ rk[32];
|
||||
s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^
|
||||
@ -450,7 +449,7 @@ void GFp_AES_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
|
||||
Te3[t1 & 0xff] ^ rk[34];
|
||||
s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^
|
||||
Te3[t2 & 0xff] ^ rk[35];
|
||||
/* round 9: */
|
||||
// round 9:
|
||||
t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^
|
||||
Te3[s3 & 0xff] ^ rk[36];
|
||||
t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^
|
||||
@ -460,7 +459,7 @@ void GFp_AES_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
|
||||
t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^
|
||||
Te3[s2 & 0xff] ^ rk[39];
|
||||
if (key->rounds > 10) {
|
||||
/* round 10: */
|
||||
// round 10:
|
||||
s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^
|
||||
Te3[t3 & 0xff] ^ rk[40];
|
||||
s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^
|
||||
@ -469,7 +468,7 @@ void GFp_AES_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
|
||||
Te3[t1 & 0xff] ^ rk[42];
|
||||
s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^
|
||||
Te3[t2 & 0xff] ^ rk[43];
|
||||
/* round 11: */
|
||||
// round 11:
|
||||
t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^
|
||||
Te3[s3 & 0xff] ^ rk[44];
|
||||
t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^
|
||||
@ -479,7 +478,7 @@ void GFp_AES_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
|
||||
t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^
|
||||
Te3[s2 & 0xff] ^ rk[47];
|
||||
if (key->rounds > 12) {
|
||||
/* round 12: */
|
||||
// round 12:
|
||||
s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^
|
||||
Te3[t3 & 0xff] ^ rk[48];
|
||||
s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^
|
||||
@ -488,7 +487,7 @@ void GFp_AES_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
|
||||
Te3[t1 & 0xff] ^ rk[50];
|
||||
s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^
|
||||
Te3[t2 & 0xff] ^ rk[51];
|
||||
/* round 13: */
|
||||
// round 13:
|
||||
t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^
|
||||
Te3[s3 & 0xff] ^ rk[52];
|
||||
t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^
|
||||
@ -500,10 +499,8 @@ void GFp_AES_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
|
||||
}
|
||||
}
|
||||
rk += key->rounds << 2;
|
||||
#else /* !FULL_UNROLL */
|
||||
/*
|
||||
* Nr - 1 full rounds:
|
||||
*/
|
||||
#else // !FULL_UNROLL
|
||||
// Nr - 1 full rounds:
|
||||
r = key->rounds >> 1;
|
||||
for (;;) {
|
||||
t0 = Te0[(s0 >> 24)] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^
|
||||
@ -529,8 +526,8 @@ void GFp_AES_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
|
||||
s3 = Te0[(t3 >> 24)] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^
|
||||
Te3[(t2) & 0xff] ^ rk[3];
|
||||
}
|
||||
#endif /* ?FULL_UNROLL */
|
||||
/* apply last round and map cipher state to byte array block: */
|
||||
#endif // ?FULL_UNROLL
|
||||
// apply last round and map cipher state to byte array block:
|
||||
s0 = (Te2[(t0 >> 24)] & 0xff000000) ^ (Te3[(t1 >> 16) & 0xff] & 0x00ff0000) ^
|
||||
(Te0[(t2 >> 8) & 0xff] & 0x0000ff00) ^ (Te1[(t3) & 0xff] & 0x000000ff) ^
|
||||
rk[0];
|
||||
@ -551,10 +548,10 @@ void GFp_AES_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
|
||||
|
||||
#else
|
||||
|
||||
/* In this case several functions are provided by asm code. However, one cannot
|
||||
* control asm symbol visibility with command line flags and such so they are
|
||||
* always hidden and wrapped by these C functions, which can be so
|
||||
* controlled. */
|
||||
// In this case several functions are provided by asm code. However, one cannot
|
||||
// control asm symbol visibility with command line flags and such so they are
|
||||
// always hidden and wrapped by these C functions, which can be so
|
||||
// controlled.
|
||||
|
||||
void GFp_asm_AES_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
|
||||
void GFp_AES_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
|
||||
@ -579,4 +576,4 @@ int GFp_AES_set_encrypt_key(const uint8_t *key, unsigned bits,
|
||||
return GFp_asm_AES_set_encrypt_key(key, bits, aeskey);
|
||||
}
|
||||
|
||||
#endif /* OPENSSL_NO_ASM || (!OPENSSL_X86 && !OPENSSL_X86_64 && !OPENSSL_ARM) */
|
||||
#endif // OPENSSL_NO_ASM || (!OPENSSL_X86 && !OPENSSL_X86_64 && !OPENSSL_ARM)
|
||||
|
@ -30,7 +30,7 @@ extern "C" {
|
||||
static int hwaes_capable(void) {
|
||||
return GFp_is_ARMv8_AES_capable();
|
||||
}
|
||||
#endif /* !NO_ASM && (AES || AARCH64) */
|
||||
#endif // !NO_ASM && (AES || AARCH64)
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_PPC64LE)
|
||||
#define HWAES
|
||||
@ -38,7 +38,7 @@ static int hwaes_capable(void) {
|
||||
static int hwaes_capable(void) {
|
||||
return GFp_is_PPC64LE_vcrypto_capable();
|
||||
}
|
||||
#endif /* !NO_ASM && PPC64LE */
|
||||
#endif // !NO_ASM && PPC64LE
|
||||
|
||||
|
||||
#if defined(HWAES)
|
||||
@ -57,7 +57,7 @@ void aes_hw_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, size_t len,
|
||||
#endif /* HWAES */
|
||||
|
||||
#if defined(__cplusplus)
|
||||
} /* extern C */
|
||||
} // extern C
|
||||
#endif
|
||||
|
||||
#endif /* OPENSSL_HEADER_AES_INTERNAL_H */
|
||||
#endif // OPENSSL_HEADER_AES_INTERNAL_H
|
||||
|
@ -69,8 +69,8 @@ void GFp_BN_init(BIGNUM *bn) {
|
||||
}
|
||||
|
||||
void GFp_BN_free(BIGNUM *bn) {
|
||||
/* Keep this in sync with the |Drop| impl for |BIGNUM| in
|
||||
* |ring::rsa::bigint|. */
|
||||
// Keep this in sync with the |Drop| impl for |BIGNUM| in
|
||||
// |ring::rsa::bigint|.
|
||||
|
||||
if (bn == NULL) {
|
||||
return;
|
||||
|
@ -133,10 +133,10 @@ int GFp_bn_from_montgomery(BN_ULONG *rp, const BN_ULONG *ap,
|
||||
const BN_ULONG *n0, int num);
|
||||
#else
|
||||
|
||||
/* GFp_BN_mod_exp_mont_consttime() stores the precomputed powers in a specific
|
||||
* layout so that accessing any of these table values shows the same access
|
||||
* pattern as far as cache lines are concerned. The following functions are
|
||||
* used to transfer a BIGNUM from/to that table. */
|
||||
// GFp_BN_mod_exp_mont_consttime() stores the precomputed powers in a specific
|
||||
// layout so that accessing any of these table values shows the same access
|
||||
// pattern as far as cache lines are concerned. The following functions are
|
||||
// used to transfer a BIGNUM from/to that table.
|
||||
static int copy_to_prebuf(const BIGNUM *b, int top, unsigned char *buf, int idx,
|
||||
int window) {
|
||||
int i, j;
|
||||
@ -144,7 +144,7 @@ static int copy_to_prebuf(const BIGNUM *b, int top, unsigned char *buf, int idx,
|
||||
BN_ULONG *table = (BN_ULONG *) buf;
|
||||
|
||||
if (top > b->top) {
|
||||
top = b->top; /* this works because 'buf' is explicitly zeroed */
|
||||
top = b->top; // this works because 'buf' is explicitly zeroed
|
||||
}
|
||||
|
||||
for (i = 0, j = idx; i < top; i++, j += width) {
|
||||
@ -178,8 +178,8 @@ static int copy_from_prebuf(BIGNUM *b, int top, unsigned char *buf, int idx,
|
||||
int xstride = 1 << (window - 2);
|
||||
BN_ULONG y0, y1, y2, y3;
|
||||
|
||||
i = idx >> (window - 2); /* equivalent of idx / xstride */
|
||||
idx &= xstride - 1; /* equivalent of idx % xstride */
|
||||
i = idx >> (window - 2); // equivalent of idx / xstride
|
||||
idx &= xstride - 1; // equivalent of idx % xstride
|
||||
|
||||
y0 = (BN_ULONG)0 - (constant_time_eq_int(i, 0) & 1);
|
||||
y1 = (BN_ULONG)0 - (constant_time_eq_int(i, 1) & 1);
|
||||
@ -205,25 +205,25 @@ static int copy_from_prebuf(BIGNUM *b, int top, unsigned char *buf, int idx,
|
||||
}
|
||||
#endif
|
||||
|
||||
/* GFp_BN_mod_exp_mont_consttime is based on the assumption that the L1 data cache
|
||||
* line width of the target processor is at least the following value. */
|
||||
// GFp_BN_mod_exp_mont_consttime is based on the assumption that the L1 data cache
|
||||
// line width of the target processor is at least the following value.
|
||||
#define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH (64)
|
||||
#define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK \
|
||||
(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1)
|
||||
|
||||
#if !defined(OPENSSL_X86_64)
|
||||
|
||||
/* Window sizes optimized for fixed window size modular exponentiation
|
||||
* algorithm (GFp_BN_mod_exp_mont_consttime).
|
||||
*
|
||||
* To achieve the security goals of GFp_BN_mod_exp_mont_consttime, the maximum
|
||||
* size of the window must not exceed
|
||||
* log_2(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH).
|
||||
*
|
||||
* Window size thresholds are defined for cache line sizes of 32 and 64, cache
|
||||
* line sizes where log_2(32)=5 and log_2(64)=6 respectively. A window size of
|
||||
* 7 should only be used on processors that have a 128 byte or greater cache
|
||||
* line size. */
|
||||
// Window sizes optimized for fixed window size modular exponentiation
|
||||
// algorithm (GFp_BN_mod_exp_mont_consttime).
|
||||
//
|
||||
// To achieve the security goals of GFp_BN_mod_exp_mont_consttime, the maximum
|
||||
// size of the window must not exceed
|
||||
// log_2(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH).
|
||||
//
|
||||
// Window size thresholds are defined for cache line sizes of 32 and 64, cache
|
||||
// line sizes where log_2(32)=5 and log_2(64)=6 respectively. A window size of
|
||||
// 7 should only be used on processors that have a 128 byte or greater cache
|
||||
// line size.
|
||||
#if MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64
|
||||
|
||||
#define GFp_BN_window_bits_for_ctime_exponent_size(b) \
|
||||
@ -238,25 +238,25 @@ static int copy_from_prebuf(BIGNUM *b, int top, unsigned char *buf, int idx,
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* defined(OPENSSL_X86_64) */
|
||||
#endif // defined(OPENSSL_X86_64)
|
||||
|
||||
/* Given a pointer value, compute the next address that is a cache line
|
||||
* multiple. */
|
||||
// Given a pointer value, compute the next address that is a cache line
|
||||
// multiple.
|
||||
#define MOD_EXP_CTIME_ALIGN(x_) \
|
||||
((unsigned char *)(x_) + \
|
||||
(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - \
|
||||
(((uintptr_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
|
||||
|
||||
/* This variant of GFp_BN_mod_exp_mont() uses fixed windows and the special
|
||||
* precomputation memory layout to limit data-dependency to a minimum
|
||||
* to protect secret exponents (cf. the hyper-threading timing attacks
|
||||
* pointed out by Colin Percival,
|
||||
* http://www.daemonology.net/hyperthreading-considered-harmful/).
|
||||
*
|
||||
* |p| must be positive. |a_mont| must in [0, m). |one_mont| must be
|
||||
* the value 1 Montgomery-encoded and fully reduced (mod m).
|
||||
*
|
||||
* Assumes 0 < a_mont < n, 0 < p, 0 < p_bits. */
|
||||
// This variant of GFp_BN_mod_exp_mont() uses fixed windows and the special
|
||||
// precomputation memory layout to limit data-dependency to a minimum
|
||||
// to protect secret exponents (cf. the hyper-threading timing attacks
|
||||
// pointed out by Colin Percival,
|
||||
// http://www.daemonology.net/hyperthreading-considered-harmful/).
|
||||
//
|
||||
// |p| must be positive. |a_mont| must in [0, m). |one_mont| must be
|
||||
// the value 1 Montgomery-encoded and fully reduced (mod m).
|
||||
//
|
||||
// Assumes 0 < a_mont < n, 0 < p, 0 < p_bits.
|
||||
int GFp_BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a_mont,
|
||||
const BIGNUM *p, size_t p_bits,
|
||||
const BIGNUM *one_mont, const BIGNUM *n,
|
||||
@ -274,24 +274,23 @@ int GFp_BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a_mont,
|
||||
BIGNUM tmp, am;
|
||||
|
||||
const int top = n->top;
|
||||
/* The |OPENSSL_BN_ASM_MONT5| code requires top > 1. */
|
||||
// The |OPENSSL_BN_ASM_MONT5| code requires top > 1.
|
||||
if (top <= 1) {
|
||||
goto err;
|
||||
}
|
||||
|
||||
|
||||
/* Get the window size to use with size of p. */
|
||||
// Get the window size to use with size of p.
|
||||
#if defined(OPENSSL_BN_ASM_MONT5)
|
||||
static const int window = 5;
|
||||
/* reserve space for n->d[] copy */
|
||||
// reserve space for n->d[] copy
|
||||
powerbufLen += top * sizeof(n->d[0]);
|
||||
#else
|
||||
const int window = GFp_BN_window_bits_for_ctime_exponent_size(bits);
|
||||
#endif
|
||||
|
||||
/* Allocate a buffer large enough to hold all of the pre-computed
|
||||
* powers of am, am itself and tmp.
|
||||
*/
|
||||
// Allocate a buffer large enough to hold all of the pre-computed
|
||||
// powers of am, am itself and tmp.
|
||||
numPowers = 1 << window;
|
||||
powerbufLen +=
|
||||
sizeof(n->d[0]) *
|
||||
@ -317,27 +316,27 @@ int GFp_BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a_mont,
|
||||
}
|
||||
#endif
|
||||
|
||||
/* lay down tmp and am right after powers table */
|
||||
// Lay down tmp and am right after powers table.
|
||||
tmp.d = (BN_ULONG *)(powerbuf + sizeof(n->d[0]) * top * numPowers);
|
||||
am.d = tmp.d + top;
|
||||
tmp.top = am.top = 0;
|
||||
tmp.dmax = am.dmax = top;
|
||||
tmp.flags = am.flags = BN_FLG_STATIC_DATA;
|
||||
|
||||
/* Copy a^0 and a^1. */
|
||||
// Copy a^0 and a^1.
|
||||
if (!GFp_BN_copy(&tmp, one_mont) ||
|
||||
!GFp_BN_copy(&am, a_mont)) {
|
||||
goto err;
|
||||
}
|
||||
|
||||
#if defined(OPENSSL_BN_ASM_MONT5)
|
||||
/* This optimization uses ideas from http://eprint.iacr.org/2011/239,
|
||||
* specifically optimization of cache-timing attack countermeasures
|
||||
* and pre-computation optimization. */
|
||||
// This optimization uses ideas from http://eprint.iacr.org/2011/239,
|
||||
// specifically optimization of cache-timing attack countermeasures
|
||||
// and pre-computation optimization.
|
||||
{
|
||||
BN_ULONG *np;
|
||||
|
||||
/* copy n->d[] to improve cache locality */
|
||||
// copy n->d[] to improve cache locality
|
||||
for (np = am.d + top, i = 0; i < top; i++) {
|
||||
np[i] = n->d[i];
|
||||
}
|
||||
@ -347,7 +346,7 @@ int GFp_BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a_mont,
|
||||
GFp_bn_mul_mont(tmp.d, am.d, am.d, np, n0, top);
|
||||
GFp_bn_scatter5(tmp.d, top, powerbuf, 2);
|
||||
|
||||
/* same as above, but uses squaring for 1/2 of operations */
|
||||
// same as above, but uses squaring for 1/2 of operations
|
||||
for (i = 4; i < 32; i *= 2) {
|
||||
GFp_bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
|
||||
GFp_bn_scatter5(tmp.d, top, powerbuf, i);
|
||||
@ -378,13 +377,12 @@ int GFp_BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a_mont,
|
||||
}
|
||||
GFp_bn_gather5(tmp.d, top, powerbuf, wvalue);
|
||||
|
||||
/* At this point |bits| is 4 mod 5 and at least -1. (|bits| is the first bit
|
||||
* that has not been read yet.) */
|
||||
// At this point |bits| is 4 mod 5 and at least -1. (|bits| is the first bit
|
||||
// that has not been read yet.)
|
||||
assert(bits >= -1 && (bits == -1 || bits % 5 == 4));
|
||||
|
||||
/* Scan the exponent one window at a time starting from the most
|
||||
* significant bits.
|
||||
*/
|
||||
// Scan the exponent one window at a time starting from the most
|
||||
// significant bits.
|
||||
if (top & 7) {
|
||||
while (bits >= 0) {
|
||||
for (wvalue = 0, i = 0; i < 5; i++, bits--) {
|
||||
@ -402,16 +400,16 @@ int GFp_BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a_mont,
|
||||
const uint8_t *p_bytes = (const uint8_t *)p->d;
|
||||
int max_bits = p->top * BN_BITS2;
|
||||
assert(bits < max_bits);
|
||||
/* |p = 0| has been handled as a special case, so |max_bits| is at least
|
||||
* one word. */
|
||||
// |p = 0| has been handled as a special case, so |max_bits| is at least
|
||||
// one word.
|
||||
assert(max_bits >= 64);
|
||||
|
||||
/* If the first bit to be read lands in the last byte, unroll the first
|
||||
* iteration to avoid reading past the bounds of |p->d|. (After the first
|
||||
* iteration, we are guaranteed to be past the last byte.) Note |bits|
|
||||
* here is the top bit, inclusive. */
|
||||
// If the first bit to be read lands in the last byte, unroll the first
|
||||
// iteration to avoid reading past the bounds of |p->d|. (After the first
|
||||
// iteration, we are guaranteed to be past the last byte.) Note |bits|
|
||||
// here is the top bit, inclusive.
|
||||
if (bits - 4 >= max_bits - 8) {
|
||||
/* Read five bits from |bits-4| through |bits|, inclusive. */
|
||||
// Read five bits from |bits-4| through |bits|, inclusive.
|
||||
wvalue = p_bytes[p->top * BN_BYTES - 1];
|
||||
wvalue >>= (bits - 4) & 7;
|
||||
wvalue &= 0x1f;
|
||||
@ -419,10 +417,10 @@ int GFp_BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a_mont,
|
||||
GFp_bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
|
||||
}
|
||||
while (bits >= 0) {
|
||||
/* Read five bits from |bits-4| through |bits|, inclusive. */
|
||||
// Read five bits from |bits-4| through |bits|, inclusive.
|
||||
int first_bit = bits - 4;
|
||||
uint16_t val;
|
||||
/* Assumes little-endian. */
|
||||
// Assumes little-endian.
|
||||
memcpy(&val, p_bytes + (first_bit >> 3), sizeof(val));
|
||||
val >>= first_bit & 7;
|
||||
val &= 0x1f;
|
||||
@ -447,18 +445,17 @@ int GFp_BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a_mont,
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* If the window size is greater than 1, then calculate
|
||||
* val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
|
||||
* (even powers could instead be computed as (a^(i/2))^2
|
||||
* to use the slight performance advantage of sqr over mul).
|
||||
*/
|
||||
// If the window size is greater than 1, then calculate
|
||||
// val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
|
||||
// (even powers could instead be computed as (a^(i/2))^2
|
||||
// to use the slight performance advantage of sqr over mul).
|
||||
if (window > 1) {
|
||||
if (!GFp_BN_mod_mul_mont(&tmp, &am, &am, n, n0) ||
|
||||
!copy_to_prebuf(&tmp, top, powerbuf, 2, window)) {
|
||||
goto err;
|
||||
}
|
||||
for (i = 3; i < numPowers; i++) {
|
||||
/* Calculate a^i = a^(i-1) * a */
|
||||
// Calculate a^i = a^(i-1) * a
|
||||
if (!GFp_BN_mod_mul_mont(&tmp, &am, &tmp, n, n0) ||
|
||||
!copy_to_prebuf(&tmp, top, powerbuf, i, window)) {
|
||||
goto err;
|
||||
@ -474,13 +471,13 @@ int GFp_BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a_mont,
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* Scan the exponent one window at a time starting from the most
|
||||
// Scan the exponent one window at a time starting from the most
|
||||
* significant bits.
|
||||
*/
|
||||
while (bits >= 0) {
|
||||
wvalue = 0; /* The 'value' of the window */
|
||||
wvalue = 0; // The 'value' of the window
|
||||
|
||||
/* Scan the window, squaring the result as we go */
|
||||
// Scan the window, squaring the result as we go */
|
||||
for (i = 0; i < window; i++, bits--) {
|
||||
if (!GFp_BN_mod_mul_mont(&tmp, &tmp, &tmp, n, n0)) {
|
||||
goto err;
|
||||
@ -488,12 +485,12 @@ int GFp_BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a_mont,
|
||||
wvalue = (wvalue << 1) + GFp_BN_is_bit_set(p, bits);
|
||||
}
|
||||
|
||||
/* Fetch the appropriate pre-computed value from the pre-buf */
|
||||
// Fetch the appropriate pre-computed value from the pre-buf */
|
||||
if (!copy_from_prebuf(&am, top, powerbuf, wvalue, window)) {
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* Multiply the result into the intermediate result */
|
||||
// Multiply the result into the intermediate result */
|
||||
if (!GFp_BN_mod_mul_mont(&tmp, &tmp, &am, n, n0)) {
|
||||
goto err;
|
||||
}
|
||||
|
@ -141,7 +141,7 @@ extern "C" {
|
||||
#if defined(OPENSSL_64_BIT)
|
||||
|
||||
#if !defined(_MSC_VER)
|
||||
/* MSVC doesn't support two-word integers on 64-bit. */
|
||||
// MSVC doesn't support two-word integers on 64-bit.
|
||||
#define BN_ULLONG uint128_t
|
||||
#endif
|
||||
|
||||
@ -158,11 +158,11 @@ extern "C" {
|
||||
#define BN_BITS2 32
|
||||
#define BN_BYTES 4
|
||||
#define BN_MASK2 (0xffffffffUL)
|
||||
/* On some 32-bit platforms, Montgomery multiplication is done using 64-bit
|
||||
* arithmetic with SIMD instructions. On such platforms, |BN_MONT_CTX::n0|
|
||||
* needs to be two words long. Only certain 32-bit platforms actually make use
|
||||
* of n0[1] and shorter R value would suffice for the others. However,
|
||||
* currently only the assembly files know which is which. */
|
||||
// On some 32-bit platforms, Montgomery multiplication is done using 64-bit
|
||||
// arithmetic with SIMD instructions. On such platforms, |BN_MONT_CTX::n0|
|
||||
// needs to be two words long. Only certain 32-bit platforms actually make use
|
||||
// of n0[1] and shorter R value would suffice for the others. However,
|
||||
// currently only the assembly files know which is which. */
|
||||
#define BN_MONT_CTX_N0_LIMBS 2
|
||||
#define BN_MONT_CTX_N0(hi, lo) TOBN(hi, lo)
|
||||
#define TOBN(hi, lo) (lo), (hi)
|
||||
@ -179,12 +179,12 @@ BN_ULONG GFp_bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
|
||||
BN_ULONG GFp_bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
|
||||
int num);
|
||||
|
||||
/* |num| must be at least 4, at least on x86.
|
||||
*
|
||||
* In other forks, |bn_mul_mont| returns an |int| indicating whether it
|
||||
* actually did the multiplication. All our implementations always do the
|
||||
* multiplication, and forcing callers to deal with the possibility of it
|
||||
* failing just leads to further problems. */
|
||||
// |num| must be at least 4, at least on x86.
|
||||
//
|
||||
// In other forks, |bn_mul_mont| returns an |int| indicating whether it
|
||||
// actually did the multiplication. All our implementations always do the
|
||||
// multiplication, and forcing callers to deal with the possibility of it
|
||||
// failing just leads to further problems. */
|
||||
void GFp_bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
|
||||
const BN_ULONG *np, const BN_ULONG *n0, int num);
|
||||
|
||||
@ -201,7 +201,7 @@ static inline void bn_umult_lohi(BN_ULONG *low_out, BN_ULONG *high_out,
|
||||
|
||||
|
||||
#if defined(__cplusplus)
|
||||
} /* extern C */
|
||||
} // extern C
|
||||
#endif
|
||||
|
||||
#endif /* OPENSSL_HEADER_BN_INTERNAL_H */
|
||||
#endif // OPENSSL_HEADER_BN_INTERNAL_H
|
||||
|
@ -118,7 +118,7 @@
|
||||
#include "../../internal.h"
|
||||
|
||||
|
||||
/* Avoid -Wmissing-prototypes warnings. */
|
||||
// Avoid -Wmissing-prototypes warnings.
|
||||
int GFp_BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, const BIGNUM *n,
|
||||
const BN_ULONG n0[BN_MONT_CTX_N0_LIMBS]);
|
||||
|
||||
@ -140,7 +140,7 @@ int GFp_BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, const BIGNUM *n,
|
||||
return 1;
|
||||
}
|
||||
|
||||
max = (2 * nl); /* carry is stored separately */
|
||||
max = (2 * nl); // carry is stored separately
|
||||
if (!GFp_bn_wexpand(r, max)) {
|
||||
return 0;
|
||||
}
|
||||
@ -148,7 +148,7 @@ int GFp_BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, const BIGNUM *n,
|
||||
np = n->d;
|
||||
rp = r->d;
|
||||
|
||||
/* clear the top words of T */
|
||||
// clear the top words of T
|
||||
if (max > r->top) {
|
||||
memset(&rp[r->top], 0, (max - r->top) * sizeof(BN_ULONG));
|
||||
}
|
||||
@ -177,8 +177,8 @@ int GFp_BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, const BIGNUM *n,
|
||||
uintptr_t m;
|
||||
|
||||
v = GFp_bn_sub_words(rp, ap, np, nl) - carry;
|
||||
/* if subtraction result is real, then trick unconditional memcpy below to
|
||||
* perform in-place "refresh" instead of actual copy. */
|
||||
// if subtraction result is real, then trick unconditional memcpy below to
|
||||
// perform in-place "refresh" instead of actual copy.
|
||||
m = (0u - (uintptr_t)v);
|
||||
nrp = (BN_ULONG *)(((uintptr_t)rp & ~m) | ((uintptr_t)ap & m));
|
||||
|
||||
@ -210,13 +210,13 @@ int GFp_BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, const BIGNUM *n,
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Assumes a < n and b < n */
|
||||
// Assumes a < n and b < n
|
||||
int GFp_BN_mod_mul_mont(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
|
||||
const BIGNUM *n,
|
||||
const BN_ULONG n0[BN_MONT_CTX_N0_LIMBS]) {
|
||||
int num = n->top;
|
||||
|
||||
/* GFp_bn_mul_mont requires at least four limbs, at least for x86. */
|
||||
// GFp_bn_mul_mont requires at least four limbs, at least for x86.
|
||||
if (num < 4) {
|
||||
return 0;
|
||||
}
|
||||
|
@ -20,7 +20,7 @@
|
||||
#include "../../internal.h"
|
||||
|
||||
|
||||
/* Avoid -Wmissing-prototypes warnings. */
|
||||
// Avoid -Wmissing-prototypes warnings.
|
||||
uint64_t GFp_bn_neg_inv_mod_r_u64(uint64_t n);
|
||||
|
||||
OPENSSL_COMPILE_ASSERT(BN_MONT_CTX_N0_LIMBS == 1 || BN_MONT_CTX_N0_LIMBS == 2,
|
||||
@ -29,35 +29,35 @@ OPENSSL_COMPILE_ASSERT(sizeof(uint64_t) ==
|
||||
BN_MONT_CTX_N0_LIMBS * sizeof(BN_ULONG),
|
||||
BN_MONT_CTX_N0_LIMBS_DOES_NOT_MATCH_UINT64_T);
|
||||
|
||||
/* LG_LITTLE_R is log_2(r). */
|
||||
// LG_LITTLE_R is log_2(r).
|
||||
#define LG_LITTLE_R (BN_MONT_CTX_N0_LIMBS * BN_BITS2)
|
||||
|
||||
/* bn_neg_inv_r_mod_n_u64 calculates the -1/n mod r; i.e. it calculates |v|
|
||||
* such that u*r - v*n == 1. |r| is the constant defined in |bn_mont_n0|. |n|
|
||||
* must be odd.
|
||||
*
|
||||
* This is derived from |xbinGCD| in Henry S. Warren, Jr.'s "Montgomery
|
||||
* Multiplication" (http://www.hackersdelight.org/MontgomeryMultiplication.pdf).
|
||||
* It is very similar to the MODULAR-INVERSE function in Stephen R. Dussé's and
|
||||
* Burton S. Kaliski Jr.'s "A Cryptographic Library for the Motorola DSP56000"
|
||||
* (http://link.springer.com/chapter/10.1007%2F3-540-46877-3_21).
|
||||
*
|
||||
* This is inspired by Joppe W. Bos's "Constant Time Modular Inversion"
|
||||
* (http://www.joppebos.com/files/CTInversion.pdf) so that the inversion is
|
||||
* constant-time with respect to |n|. We assume uint64_t additions,
|
||||
* subtractions, shifts, and bitwise operations are all constant time, which
|
||||
* may be a large leap of faith on 32-bit targets. We avoid division and
|
||||
* multiplication, which tend to be the most problematic in terms of timing
|
||||
* leaks.
|
||||
*
|
||||
* Most GCD implementations return values such that |u*r + v*n == 1|, so the
|
||||
* caller would have to negate the resultant |v| for the purpose of Montgomery
|
||||
* multiplication. This implementation does the negation implicitly by doing
|
||||
* the computations as a difference instead of a sum. */
|
||||
// bn_neg_inv_r_mod_n_u64 calculates the -1/n mod r; i.e. it calculates |v|
|
||||
// such that u*r - v*n == 1. |r| is the constant defined in |bn_mont_n0|. |n|
|
||||
// must be odd.
|
||||
//
|
||||
// This is derived from |xbinGCD| in Henry S. Warren, Jr.'s "Montgomery
|
||||
// Multiplication" (http://www.hackersdelight.org/MontgomeryMultiplication.pdf).
|
||||
// It is very similar to the MODULAR-INVERSE function in Stephen R. Dussé's and
|
||||
// Burton S. Kaliski Jr.'s "A Cryptographic Library for the Motorola DSP56000"
|
||||
// (http://link.springer.com/chapter/10.1007%2F3-540-46877-3_21).
|
||||
//
|
||||
// This is inspired by Joppe W. Bos's "Constant Time Modular Inversion"
|
||||
// (http://www.joppebos.com/files/CTInversion.pdf) so that the inversion is
|
||||
// constant-time with respect to |n|. We assume uint64_t additions,
|
||||
// subtractions, shifts, and bitwise operations are all constant time, which
|
||||
// may be a large leap of faith on 32-bit targets. We avoid division and
|
||||
// multiplication, which tend to be the most problematic in terms of timing
|
||||
// leaks.
|
||||
//
|
||||
// Most GCD implementations return values such that |u*r + v*n == 1|, so the
|
||||
// caller would have to negate the resultant |v| for the purpose of Montgomery
|
||||
// multiplication. This implementation does the negation implicitly by doing
|
||||
// the computations as a difference instead of a sum.
|
||||
uint64_t GFp_bn_neg_inv_mod_r_u64(uint64_t n) {
|
||||
assert(n % 2 == 1);
|
||||
|
||||
/* alpha == 2**(lg r - 1) == r / 2. */
|
||||
// alpha == 2**(lg r - 1) == r / 2.
|
||||
static const uint64_t alpha = UINT64_C(1) << (LG_LITTLE_R - 1);
|
||||
|
||||
const uint64_t beta = n;
|
||||
@ -65,46 +65,46 @@ uint64_t GFp_bn_neg_inv_mod_r_u64(uint64_t n) {
|
||||
uint64_t u = 1;
|
||||
uint64_t v = 0;
|
||||
|
||||
/* The invariant maintained from here on is:
|
||||
* 2**(lg r - i) == u*2*alpha - v*beta. */
|
||||
// The invariant maintained from here on is:
|
||||
// 2**(lg r - i) == u*2*alpha - v*beta.
|
||||
for (size_t i = 0; i < LG_LITTLE_R; ++i) {
|
||||
#if BN_BITS2 == 64 && defined(BN_ULLONG)
|
||||
assert((BN_ULLONG)(1) << (LG_LITTLE_R - i) ==
|
||||
((BN_ULLONG)u * 2 * alpha) - ((BN_ULLONG)v * beta));
|
||||
#endif
|
||||
|
||||
/* Delete a common factor of 2 in u and v if |u| is even. Otherwise, set
|
||||
* |u = (u + beta) / 2| and |v = (v / 2) + alpha|. */
|
||||
// Delete a common factor of 2 in u and v if |u| is even. Otherwise, set
|
||||
// |u = (u + beta) / 2| and |v = (v / 2) + alpha|.
|
||||
|
||||
uint64_t u_is_odd = UINT64_C(0) - (u & 1); /* Either 0xff..ff or 0. */
|
||||
uint64_t u_is_odd = UINT64_C(0) - (u & 1); // Either 0xff..ff or 0.
|
||||
|
||||
/* The addition can overflow, so use Dietz's method for it.
|
||||
*
|
||||
* Dietz calculates (x+y)/2 by (x xor y)>>1 + x&y. This is valid for all
|
||||
* (unsigned) x and y, even when x+y overflows. Evidence for 32-bit values
|
||||
* (embedded in 64 bits to so that overflow can be ignored):
|
||||
*
|
||||
* (declare-fun x () (_ BitVec 64))
|
||||
* (declare-fun y () (_ BitVec 64))
|
||||
* (assert (let (
|
||||
* (one (_ bv1 64))
|
||||
* (thirtyTwo (_ bv32 64)))
|
||||
* (and
|
||||
* (bvult x (bvshl one thirtyTwo))
|
||||
* (bvult y (bvshl one thirtyTwo))
|
||||
* (not (=
|
||||
* (bvadd (bvlshr (bvxor x y) one) (bvand x y))
|
||||
* (bvlshr (bvadd x y) one)))
|
||||
* )))
|
||||
* (check-sat) */
|
||||
uint64_t beta_if_u_is_odd = beta & u_is_odd; /* Either |beta| or 0. */
|
||||
// The addition can overflow, so use Dietz's method for it.
|
||||
//
|
||||
// Dietz calculates (x+y)/2 by (x xor y)>>1 + x&y. This is valid for all
|
||||
// (unsigned) x and y, even when x+y overflows. Evidence for 32-bit values
|
||||
// (embedded in 64 bits to so that overflow can be ignored):
|
||||
//
|
||||
// (declare-fun x () (_ BitVec 64))
|
||||
// (declare-fun y () (_ BitVec 64))
|
||||
// (assert (let (
|
||||
// (one (_ bv1 64))
|
||||
// (thirtyTwo (_ bv32 64)))
|
||||
// (and
|
||||
// (bvult x (bvshl one thirtyTwo))
|
||||
// (bvult y (bvshl one thirtyTwo))
|
||||
// (not (=
|
||||
// (bvadd (bvlshr (bvxor x y) one) (bvand x y))
|
||||
// (bvlshr (bvadd x y) one)))
|
||||
// )))
|
||||
// (check-sat)
|
||||
uint64_t beta_if_u_is_odd = beta & u_is_odd; // Either |beta| or 0.
|
||||
u = ((u ^ beta_if_u_is_odd) >> 1) + (u & beta_if_u_is_odd);
|
||||
|
||||
uint64_t alpha_if_u_is_odd = alpha & u_is_odd; /* Either |alpha| or 0. */
|
||||
v = (v >> 1) + alpha_if_u_is_odd;
|
||||
}
|
||||
|
||||
/* The invariant now shows that u*r - v*n == 1 since r == 2 * alpha. */
|
||||
// The invariant now shows that u*r - v*n == 1 since r == 2 * alpha.
|
||||
#if BN_BITS2 == 64 && defined(BN_ULLONG)
|
||||
assert(1 == ((BN_ULLONG)u * 2 * alpha) - ((BN_ULLONG)v * beta));
|
||||
#endif
|
||||
|
@ -65,8 +65,8 @@
|
||||
#define EVP_AEAD_AES_GCM_NONCE_LEN 12
|
||||
#define EVP_AEAD_AES_GCM_TAG_LEN 16
|
||||
|
||||
/* Declarations for extern functions only called by Rust code, to avoid
|
||||
* -Wmissing-prototypes warnings. */
|
||||
// Declarations for extern functions only called by Rust code, to avoid
|
||||
// -Wmissing-prototypes warnings.
|
||||
int GFp_aes_gcm_init(void *ctx_buf, size_t ctx_buf_len, const uint8_t *key,
|
||||
size_t key_len);
|
||||
int GFp_aes_gcm_open(const void *ctx_buf, uint8_t *out, size_t in_out_len,
|
||||
@ -108,8 +108,8 @@ static char bsaes_capable(void) {
|
||||
#endif
|
||||
|
||||
#if defined(BSAES)
|
||||
/* On platforms where BSAES gets defined (just above), then these functions are
|
||||
* provided by asm. */
|
||||
// On platforms where BSAES gets defined (just above), then these functions are
|
||||
// provided by asm.
|
||||
void GFp_bsaes_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, size_t len,
|
||||
const AES_KEY *key, const uint8_t ivec[16]);
|
||||
#endif
|
||||
@ -128,8 +128,8 @@ static void aes_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out,
|
||||
const uint8_t ivec[16]);
|
||||
|
||||
#if defined(VPAES)
|
||||
/* On platforms where VPAES gets defined (just above), then these functions are
|
||||
* provided by asm. */
|
||||
// On platforms where VPAES gets defined (just above), then these functions are
|
||||
// provided by asm.
|
||||
int GFp_vpaes_set_encrypt_key(const uint8_t *userKey, unsigned bits,
|
||||
AES_KEY *key);
|
||||
void GFp_vpaes_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
|
||||
@ -174,7 +174,7 @@ static aes_set_key_f aes_set_key(void) {
|
||||
return GFp_AES_set_encrypt_key;
|
||||
}
|
||||
|
||||
/* TODO(perf): Consider inlining this. */
|
||||
// TODO(perf): Consider inlining this.
|
||||
int GFp_aes_block_is_aesni_encrypt(aes_block_f block) {
|
||||
#if defined(AESNI)
|
||||
return block == GFp_aesni_encrypt;
|
||||
@ -185,7 +185,7 @@ int GFp_aes_block_is_aesni_encrypt(aes_block_f block) {
|
||||
}
|
||||
|
||||
static aes_block_f aes_block(void) {
|
||||
/* Keep this in sync with |set_set_key| and |aes_ctr|. */
|
||||
// Keep this in sync with |set_set_key| and |aes_ctr|.
|
||||
|
||||
#if defined(AESNI)
|
||||
if (aesni_capable()) {
|
||||
@ -216,7 +216,7 @@ static aes_block_f aes_block(void) {
|
||||
}
|
||||
|
||||
static aes_ctr_f aes_ctr(void) {
|
||||
/* Keep this in sync with |set_set_key| and |aes_block|. */
|
||||
// Keep this in sync with |set_set_key| and |aes_block|.
|
||||
|
||||
#if defined(AESNI)
|
||||
if (aesni_capable()) {
|
||||
@ -260,7 +260,7 @@ static void aes_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out,
|
||||
for (size_t i = 0; i < 16; ++i) {
|
||||
out[i] = in[i] ^ counter_ciphertext[i];
|
||||
}
|
||||
/* The caller must ensure the counter won't wrap around. */
|
||||
// The caller must ensure the counter won't wrap around.
|
||||
++counter;
|
||||
assert(counter != 0);
|
||||
to_be_u32_ptr(&counter_plaintext[12], counter);
|
||||
@ -277,8 +277,8 @@ int GFp_aes_gcm_init(void *ctx_buf, size_t ctx_buf_len, const uint8_t *key,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* XXX: Ignores return value. TODO: These functions should return |void|
|
||||
* anyway. */
|
||||
// XXX: Ignores return value. TODO: These functions should return |void|
|
||||
// anyway.
|
||||
(void)(aes_set_key())(key, (unsigned)key_len * 8, &ks);
|
||||
|
||||
GFp_gcm128_init_serialized((uint8_t *)ctx_buf + sizeof(ks), &ks, aes_block());
|
||||
|
@ -68,16 +68,16 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Preconditions for AEAD implementation methods. */
|
||||
// Preconditions for AEAD implementation methods. */
|
||||
|
||||
/* aead_check_alias returns 0 if |out| points within the buffer determined by
|
||||
* |in| and |in_len| and 1 otherwise.
|
||||
*
|
||||
* When processing, there's only an issue if |out| points within in[:in_len]
|
||||
* and isn't equal to |in|. If that's the case then writing the output will
|
||||
* stomp input that hasn't been read yet.
|
||||
*
|
||||
* This function checks for that case. */
|
||||
// aead_check_alias returns 0 if |out| points within the buffer determined by
|
||||
// |in| and |in_len| and 1 otherwise.
|
||||
//
|
||||
// When processing, there's only an issue if |out| points within in[:in_len]
|
||||
// and isn't equal to |in|. If that's the case then writing the output will
|
||||
// stomp input that hasn't been read yet.
|
||||
//
|
||||
// This function checks for that case. */
|
||||
static inline int aead_check_alias(const uint8_t *in, size_t in_len,
|
||||
const uint8_t *out) {
|
||||
if (out <= in) {
|
||||
@ -88,17 +88,17 @@ static inline int aead_check_alias(const uint8_t *in, size_t in_len,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* TODO: This isn't used yet, but it will probably be used soon, once
|
||||
* AES-GCM-SIV is integrated into *ring*.
|
||||
* */
|
||||
// TODO: This isn't used yet, but it will probably be used soon, once
|
||||
// AES-GCM-SIV is integrated into *ring*.
|
||||
//
|
||||
#if 0
|
||||
|
||||
/* aes_ctr_set_key initialises |*aes_key| using |key_bytes| bytes from |key|,
|
||||
* where |key_bytes| must either be 16, 24 or 32. If not NULL, |*out_block| is
|
||||
* set to a function that encrypts single blocks. If not NULL, |*gcm_ctx| is
|
||||
* initialised to do GHASH with the given key. It returns a function for
|
||||
* optimised CTR-mode, or NULL if CTR-mode should be built using
|
||||
* |*out_block|. */
|
||||
// aes_ctr_set_key initialises |*aes_key| using |key_bytes| bytes from |key|,
|
||||
// where |key_bytes| must either be 16, 24 or 32. If not NULL, |*out_block| is
|
||||
// set to a function that encrypts single blocks. If not NULL, |*gcm_ctx| is
|
||||
// initialised to do GHASH with the given key. It returns a function for
|
||||
// optimised CTR-mode, or NULL if CTR-mode should be built using
|
||||
// |*out_block|.
|
||||
ctr128_f aes_ctr_set_key(AES_KEY *aes_key, GCM128_CONTEXT *gcm_ctx,
|
||||
block128_f *out_block, const uint8_t *key,
|
||||
size_t key_bytes);
|
||||
@ -106,7 +106,7 @@ ctr128_f aes_ctr_set_key(AES_KEY *aes_key, GCM128_CONTEXT *gcm_ctx,
|
||||
#endif
|
||||
|
||||
#if defined(__cplusplus)
|
||||
} /* extern C */
|
||||
} // extern C
|
||||
#endif
|
||||
|
||||
#endif /* OPENSSL_HEADER_CIPHER_INTERNAL_H */
|
||||
#endif // OPENSSL_HEADER_CIPHER_INTERNAL_H
|
||||
|
@ -30,77 +30,77 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/* This function looks at `w + 1` scalar bits (`w` current, 1 adjacent less
|
||||
* significant bit), and recodes them into a signed digit for use in fast point
|
||||
* multiplication: the use of signed rather than unsigned digits means that
|
||||
* fewer points need to be precomputed, given that point inversion is easy (a
|
||||
* precomputed point dP makes -dP available as well).
|
||||
*
|
||||
* BACKGROUND:
|
||||
*
|
||||
* Signed digits for multiplication were introduced by Booth ("A signed binary
|
||||
* multiplication technique", Quart. Journ. Mech. and Applied Math., vol. IV,
|
||||
* pt. 2 (1951), pp. 236-240), in that case for multiplication of integers.
|
||||
* Booth's original encoding did not generally improve the density of nonzero
|
||||
* digits over the binary representation, and was merely meant to simplify the
|
||||
* handling of signed factors given in two's complement; but it has since been
|
||||
* shown to be the basis of various signed-digit representations that do have
|
||||
* further advantages, including the wNAF, using the following general
|
||||
* approach:
|
||||
*
|
||||
* (1) Given a binary representation
|
||||
*
|
||||
* b_k ... b_2 b_1 b_0,
|
||||
*
|
||||
* of a nonnegative integer (b_k in {0, 1}), rewrite it in digits 0, 1, -1
|
||||
* by using bit-wise subtraction as follows:
|
||||
*
|
||||
* b_k b_(k-1) ... b_2 b_1 b_0
|
||||
* - b_k ... b_3 b_2 b_1 b_0
|
||||
* -------------------------------------
|
||||
* s_k b_(k-1) ... s_3 s_2 s_1 s_0
|
||||
*
|
||||
* A left-shift followed by subtraction of the original value yields a new
|
||||
* representation of the same value, using signed bits s_i = b_(i+1) - b_i.
|
||||
* This representation from Booth's paper has since appeared in the
|
||||
* literature under a variety of different names including "reversed binary
|
||||
* form", "alternating greedy expansion", "mutual opposite form", and
|
||||
* "sign-alternating {+-1}-representation".
|
||||
*
|
||||
* An interesting property is that among the nonzero bits, values 1 and -1
|
||||
* strictly alternate.
|
||||
*
|
||||
* (2) Various window schemes can be applied to the Booth representation of
|
||||
* integers: for example, right-to-left sliding windows yield the wNAF
|
||||
* (a signed-digit encoding independently discovered by various researchers
|
||||
* in the 1990s), and left-to-right sliding windows yield a left-to-right
|
||||
* equivalent of the wNAF (independently discovered by various researchers
|
||||
* around 2004).
|
||||
*
|
||||
* To prevent leaking information through side channels in point multiplication,
|
||||
* we need to recode the given integer into a regular pattern: sliding windows
|
||||
* as in wNAFs won't do, we need their fixed-window equivalent -- which is a few
|
||||
* decades older: we'll be using the so-called "modified Booth encoding" due to
|
||||
* MacSorley ("High-speed arithmetic in binary computers", Proc. IRE, vol. 49
|
||||
* (1961), pp. 67-91), in a radix-2**w setting. That is, we always combine `w`
|
||||
* signed bits into a signed digit, e.g. (for `w == 5`):
|
||||
*
|
||||
* s_(4j + 4) s_(4j + 3) s_(4j + 2) s_(4j + 1) s_(4j)
|
||||
*
|
||||
* The sign-alternating property implies that the resulting digit values are
|
||||
* integers from `-2**(w-1)` to `2**(w-1)`, e.g. -16 to 16 for `w == 5`.
|
||||
*
|
||||
* Of course, we don't actually need to compute the signed digits s_i as an
|
||||
* intermediate step (that's just a nice way to see how this scheme relates
|
||||
* to the wNAF): a direct computation obtains the recoded digit from the
|
||||
* six bits b_(4j + 4) ... b_(4j - 1).
|
||||
*
|
||||
* This function takes those `w` bits as an integer, writing the recoded digit
|
||||
* to |*is_negative| (a mask for `constant_time_select_s`) and |*digit|
|
||||
* (absolute value, in the range 0 .. 2**(w-1). Note that this integer
|
||||
* essentially provides the input bits "shifted to the left" by one position.
|
||||
* For example, the input to compute the least significant recoded digit, given
|
||||
* that there's no bit b_-1, has to be b_4 b_3 b_2 b_1 b_0 0. */
|
||||
// This function looks at `w + 1` scalar bits (`w` current, 1 adjacent less
|
||||
// significant bit), and recodes them into a signed digit for use in fast point
|
||||
// multiplication: the use of signed rather than unsigned digits means that
|
||||
// fewer points need to be precomputed, given that point inversion is easy (a
|
||||
// precomputed point dP makes -dP available as well).
|
||||
//
|
||||
// BACKGROUND:
|
||||
//
|
||||
// Signed digits for multiplication were introduced by Booth ("A signed binary
|
||||
// multiplication technique", Quart. Journ. Mech. and Applied Math., vol. IV,
|
||||
// pt. 2 (1951), pp. 236-240), in that case for multiplication of integers.
|
||||
// Booth's original encoding did not generally improve the density of nonzero
|
||||
// digits over the binary representation, and was merely meant to simplify the
|
||||
// handling of signed factors given in two's complement; but it has since been
|
||||
// shown to be the basis of various signed-digit representations that do have
|
||||
// further advantages, including the wNAF, using the following general
|
||||
// approach:
|
||||
//
|
||||
// (1) Given a binary representation
|
||||
//
|
||||
// b_k ... b_2 b_1 b_0,
|
||||
//
|
||||
// of a nonnegative integer (b_k in {0, 1}), rewrite it in digits 0, 1, -1
|
||||
// by using bit-wise subtraction as follows:
|
||||
//
|
||||
// b_k b_(k-1) ... b_2 b_1 b_0
|
||||
// - b_k ... b_3 b_2 b_1 b_0
|
||||
// -------------------------------------
|
||||
// s_k b_(k-1) ... s_3 s_2 s_1 s_0
|
||||
//
|
||||
// A left-shift followed by subtraction of the original value yields a new
|
||||
// representation of the same value, using signed bits s_i = b_(i+1) - b_i.
|
||||
// This representation from Booth's paper has since appeared in the
|
||||
// literature under a variety of different names including "reversed binary
|
||||
// form", "alternating greedy expansion", "mutual opposite form", and
|
||||
// "sign-alternating {+-1}-representation".
|
||||
//
|
||||
// An interesting property is that among the nonzero bits, values 1 and -1
|
||||
// strictly alternate.
|
||||
//
|
||||
// (2) Various window schemes can be applied to the Booth representation of
|
||||
// integers: for example, right-to-left sliding windows yield the wNAF
|
||||
// (a signed-digit encoding independently discovered by various researchers
|
||||
// in the 1990s), and left-to-right sliding windows yield a left-to-right
|
||||
// equivalent of the wNAF (independently discovered by various researchers
|
||||
// around 2004).
|
||||
//
|
||||
// To prevent leaking information through side channels in point multiplication,
|
||||
// we need to recode the given integer into a regular pattern: sliding windows
|
||||
// as in wNAFs won't do, we need their fixed-window equivalent -- which is a few
|
||||
// decades older: we'll be using the so-called "modified Booth encoding" due to
|
||||
// MacSorley ("High-speed arithmetic in binary computers", Proc. IRE, vol. 49
|
||||
// (1961), pp. 67-91), in a radix-2**w setting. That is, we always combine `w`
|
||||
// signed bits into a signed digit, e.g. (for `w == 5`):
|
||||
//
|
||||
// s_(4j + 4) s_(4j + 3) s_(4j + 2) s_(4j + 1) s_(4j)
|
||||
//
|
||||
// The sign-alternating property implies that the resulting digit values are
|
||||
// integers from `-2**(w-1)` to `2**(w-1)`, e.g. -16 to 16 for `w == 5`.
|
||||
//
|
||||
// Of course, we don't actually need to compute the signed digits s_i as an
|
||||
// intermediate step (that's just a nice way to see how this scheme relates
|
||||
// to the wNAF): a direct computation obtains the recoded digit from the
|
||||
// six bits b_(4j + 4) ... b_(4j - 1).
|
||||
//
|
||||
// This function takes those `w` bits as an integer, writing the recoded digit
|
||||
// to |*is_negative| (a mask for `constant_time_select_s`) and |*digit|
|
||||
// (absolute value, in the range 0 .. 2**(w-1). Note that this integer
|
||||
// essentially provides the input bits "shifted to the left" by one position.
|
||||
// For example, the input to compute the least significant recoded digit, given
|
||||
// that there's no bit b_-1, has to be b_4 b_3 b_2 b_1 b_0 0.
|
||||
OPENSSL_COMPILE_ASSERT(sizeof(crypto_word_t) == sizeof(BN_ULONG),
|
||||
size_t_and_bn_ulong_are_different_sizes);
|
||||
static inline void booth_recode(BN_ULONG *is_negative, unsigned *digit,
|
||||
@ -108,8 +108,8 @@ static inline void booth_recode(BN_ULONG *is_negative, unsigned *digit,
|
||||
assert(w >= 2);
|
||||
assert(w <= 7);
|
||||
|
||||
/* Set all bits of `s` to MSB(in), similar to |constant_time_msb_s|,
|
||||
* but 'in' seen as (`w+1`)-bit value. */
|
||||
// Set all bits of `s` to MSB(in), similar to |constant_time_msb_s|,
|
||||
// but 'in' seen as (`w+1`)-bit value.
|
||||
BN_ULONG s = ~((in >> w) - 1);
|
||||
unsigned d;
|
||||
d = (1 << (w + 1)) - in - 1;
|
||||
@ -130,4 +130,4 @@ void gfp_little_endian_bytes_from_scalar(uint8_t str[], size_t str_len,
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* OPENSSL_HEADER_EC_ECP_NISTZ_H */
|
||||
#endif // OPENSSL_HEADER_EC_ECP_NISTZ_H
|
||||
|
@ -12,17 +12,17 @@
|
||||
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
|
||||
|
||||
/* This is the precomputed constant time access table for the code in
|
||||
* GFp_nistz256.c, for the default generator. The table consists of 37
|
||||
* subtables, each subtable contains 64 affine points. The affine points are
|
||||
* encoded as eight uint64's, four for the x coordinate and four for the y.
|
||||
* Both values are in little-endian order. There are 37 tables because a
|
||||
* signed, 6-bit recoding of the scalar is used and ceil(256/(6 + 1)) = 37.
|
||||
* Within each table there are 64 values because the 6-bit recoding can take
|
||||
* 64 values, ignoring the sign bit, which is implemented by performing a
|
||||
* negation of the affine point when required. We would like to align it to 2MB
|
||||
* in order to increase the chances of using a large page but that appears to
|
||||
* lead to invalid ELF files being produced. */
|
||||
// This is the precomputed constant time access table for the code in
|
||||
// GFp_nistz256.c, for the default generator. The table consists of 37
|
||||
// subtables, each subtable contains 64 affine points. The affine points are
|
||||
// encoded as eight uint64's, four for the x coordinate and four for the y.
|
||||
// Both values are in little-endian order. There are 37 tables because a
|
||||
// signed, 6-bit recoding of the scalar is used and ceil(256/(6 + 1)) = 37.
|
||||
// Within each table there are 64 values because the 6-bit recoding can take
|
||||
// 64 values, ignoring the sign bit, which is implemented by performing a
|
||||
// negation of the affine point when required. We would like to align it to 2MB
|
||||
// in order to increase the chances of using a large page but that appears to
|
||||
// lead to invalid ELF files being produced.
|
||||
|
||||
static const alignas(4096) BN_ULONG
|
||||
GFp_nistz256_precomputed[37][64 * sizeof(P256_POINT_AFFINE) /
|
||||
|
@ -37,7 +37,7 @@ typedef struct {
|
||||
} P384_POINT_AFFINE;
|
||||
|
||||
|
||||
/* Prototypes to avoid -Wmissing-prototypes warnings. */
|
||||
// Prototypes to avoid -Wmissing-prototypes warnings.
|
||||
void GFp_nistz384_point_double(P384_POINT *r, const P384_POINT *a);
|
||||
void GFp_nistz384_point_add(P384_POINT *r, const P384_POINT *a,
|
||||
const P384_POINT *b);
|
||||
@ -47,4 +47,4 @@ void GFp_nistz384_point_add(P384_POINT *r, const P384_POINT *a,
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* OPENSSL_HEADER_EC_ECP_NISTZ384_H */
|
||||
#endif // OPENSSL_HEADER_EC_ECP_NISTZ384_H
|
||||
|
@ -113,7 +113,7 @@ static void gcm_init_4bit(u128 Htable[16], const uint64_t H[2]) {
|
||||
Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
|
||||
|
||||
#if defined(GHASH_ASM) && defined(OPENSSL_ARM)
|
||||
/* ARM assembler expects specific dword order in Htable. */
|
||||
// ARM assembler expects specific dword order in Htable.
|
||||
{
|
||||
int j;
|
||||
|
||||
@ -190,11 +190,11 @@ static void GFp_gcm_gmult_4bit(uint8_t Xi[16], const u128 Htable[16]) {
|
||||
to_be_u64_ptr(Xi + 8, Z.lo);
|
||||
}
|
||||
|
||||
/* Streamed gcm_mult_4bit, see GFp_gcm128_[en|de]crypt for
|
||||
* details... Compiler-generated code doesn't seem to give any
|
||||
* performance improvement, at least not on x86[_64]. It's here
|
||||
* mostly as reference and a placeholder for possible future
|
||||
* non-trivial optimization[s]... */
|
||||
// Streamed gcm_mult_4bit, see GFp_gcm128_[en|de]crypt for
|
||||
// details... Compiler-generated code doesn't seem to give any
|
||||
// performance improvement, at least not on x86[_64]. It's here
|
||||
// mostly as reference and a placeholder for possible future
|
||||
// non-trivial optimization[s]...
|
||||
static void GFp_gcm_ghash_4bit(uint8_t Xi[16], const u128 Htable[16],
|
||||
const uint8_t *inp, size_t len) {
|
||||
u128 Z;
|
||||
@ -250,7 +250,7 @@ static void GFp_gcm_ghash_4bit(uint8_t Xi[16], const u128 Htable[16],
|
||||
Xi[1] = from_be_u64(Z.lo);
|
||||
} while (inp += 16, len -= 16);
|
||||
}
|
||||
#else /* GHASH_ASM */
|
||||
#else // GHASH_ASM
|
||||
void GFp_gcm_gmult_4bit(uint8_t Xi[16], const u128 Htable[16]);
|
||||
void GFp_gcm_ghash_4bit(uint8_t Xi[16], const u128 Htable[16],
|
||||
const uint8_t *inp, size_t len);
|
||||
@ -259,9 +259,9 @@ void GFp_gcm_ghash_4bit(uint8_t Xi[16], const u128 Htable[16],
|
||||
#define GCM_MUL(ctx, Xi) GFp_gcm_gmult_4bit((ctx)->Xi, (ctx)->Htable)
|
||||
#if defined(GHASH_ASM)
|
||||
#define GHASH(ctx, in, len) GFp_gcm_ghash_4bit((ctx)->Xi, (ctx)->Htable, in, len)
|
||||
/* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
|
||||
* trashing effect. In other words idea is to hash data while it's
|
||||
* still in L1 cache after encryption pass... */
|
||||
// GHASH_CHUNK is "stride parameter" missioned to mitigate cache
|
||||
// trashing effect. In other words idea is to hash data while it's
|
||||
// still in L1 cache after encryption pass...
|
||||
#define GHASH_CHUNK (3 * 1024)
|
||||
#endif
|
||||
|
||||
@ -310,7 +310,7 @@ void GFp_gcm_ghash_v8(uint8_t Xi[16], const u128 Htable[16], const uint8_t *inp,
|
||||
|
||||
#if defined(OPENSSL_ARM) && __ARM_MAX_ARCH__ >= 7
|
||||
#define GCM_FUNCREF_4BIT
|
||||
/* 32-bit ARM also has support for doing GCM with NEON instructions. */
|
||||
// 32-bit ARM also has support for doing GCM with NEON instructions.
|
||||
void GFp_gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]);
|
||||
void GFp_gcm_gmult_neon(uint8_t Xi[16], const u128 Htable[16]);
|
||||
void GFp_gcm_ghash_neon(uint8_t Xi[16], const u128 Htable[16],
|
||||
@ -324,9 +324,9 @@ void GFp_gcm_init_p8(u128 Htable[16], const uint64_t Xi[2]);
|
||||
void GFp_gcm_gmult_p8(uint64_t Xi[2], const u128 Htable[16]);
|
||||
void GFp_gcm_ghash_p8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
|
||||
size_t len);
|
||||
#endif /* Platform */
|
||||
#endif // Platform
|
||||
|
||||
#endif /* GHASH_ASM */
|
||||
#endif // GHASH_ASM
|
||||
|
||||
#ifdef GCM_FUNCREF_4BIT
|
||||
#undef GCM_MUL
|
||||
@ -347,7 +347,7 @@ void GFp_gcm128_init_serialized(
|
||||
uint8_t H_be[16];
|
||||
(*block)(ZEROS, H_be, key);
|
||||
|
||||
/* H is stored in host byte order */
|
||||
// H is stored in host byte order
|
||||
alignas(16) uint64_t H[2];
|
||||
H[0] = from_be_u64_ptr(H_be);
|
||||
H[1] = from_be_u64_ptr(H_be + 8);
|
||||
@ -363,12 +363,12 @@ void GFp_gcm128_init_serialized(
|
||||
|
||||
static void gcm128_init_htable(u128 Htable[GCM128_HTABLE_LEN],
|
||||
const uint64_t H[2]) {
|
||||
/* Keep in sync with |gcm128_init_gmult_ghash|. */
|
||||
// Keep in sync with |gcm128_init_gmult_ghash|.
|
||||
|
||||
#if defined(GHASH_ASM_X86_64) || defined(GHASH_ASM_X86)
|
||||
if (GFp_gcm_clmul_enabled()) {
|
||||
#if defined(GHASH_ASM_X86_64)
|
||||
if (((GFp_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
|
||||
if (((GFp_ia32cap_P[1] >> 22) & 0x41) == 0x41) { // AVX+MOVBE
|
||||
GFp_gcm_init_avx(Htable, H);
|
||||
return;
|
||||
}
|
||||
@ -403,12 +403,12 @@ static void gcm128_init_gmult_ghash(GCM128_CONTEXT *ctx,
|
||||
int is_aesni_encrypt) {
|
||||
(void)is_aesni_encrypt; // Unused
|
||||
|
||||
/* Keep in sync with |gcm128_init_htable|. */
|
||||
// Keep in sync with |gcm128_init_htable|.
|
||||
|
||||
#if defined(GHASH_ASM_X86_64) || defined(GHASH_ASM_X86)
|
||||
if (GFp_gcm_clmul_enabled()) {
|
||||
#if defined(GHASH_ASM_X86_64)
|
||||
if (((GFp_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
|
||||
if (((GFp_ia32cap_P[1] >> 22) & 0x41) == 0x41) { // AVX+MOVBE
|
||||
ctx->gmult = GFp_gcm_gmult_avx;
|
||||
ctx->ghash = GFp_gcm_ghash_avx;
|
||||
ctx->use_aesni_gcm_crypt = is_aesni_encrypt ? 1 : 0;
|
||||
@ -522,8 +522,8 @@ int GFp_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, const AES_KEY *key,
|
||||
|
||||
#if defined(AESNI_GCM)
|
||||
if (ctx->use_aesni_gcm_crypt) {
|
||||
/* |aesni_gcm_encrypt| may not process all the input given to it. It may
|
||||
* not process *any* of its input if it is deemed too small. */
|
||||
// |aesni_gcm_encrypt| may not process all the input given to it. It may
|
||||
// not process *any* of its input if it is deemed too small.
|
||||
size_t bulk = GFp_aesni_gcm_encrypt(in, out, len, key, ctx->Yi, ctx->Xi);
|
||||
in += bulk;
|
||||
out += bulk;
|
||||
@ -601,8 +601,8 @@ int GFp_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, const AES_KEY *key,
|
||||
|
||||
#if defined(AESNI_GCM)
|
||||
if (ctx->use_aesni_gcm_crypt) {
|
||||
/* |aesni_gcm_decrypt| may not process all the input given to it. It may
|
||||
* not process *any* of its input if it is deemed too small. */
|
||||
// |aesni_gcm_decrypt| may not process all the input given to it. It may
|
||||
// not process *any* of its input if it is deemed too small.
|
||||
size_t bulk = GFp_aesni_gcm_decrypt(in, out, len, key, ctx->Yi, ctx->Xi);
|
||||
in += bulk;
|
||||
out += bulk;
|
||||
@ -688,8 +688,8 @@ void GFp_gcm128_tag(GCM128_CONTEXT *ctx, uint8_t tag[16]) {
|
||||
#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
|
||||
int GFp_gcm_clmul_enabled(void) {
|
||||
#ifdef GHASH_ASM
|
||||
return GFp_ia32cap_P[0] & (1 << 24) && /* check FXSR bit */
|
||||
GFp_ia32cap_P[1] & (1 << 1); /* check PCLMULQDQ bit */
|
||||
return GFp_ia32cap_P[0] & (1 << 24) && // check FXSR bit
|
||||
GFp_ia32cap_P[1] & (1 << 1); // check PCLMULQDQ bit
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
|
@ -58,12 +58,12 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/* aes_block_f is a pointer to |AES_Encrypt| or a variant thereof. */
|
||||
// aes_block_f is a pointer to |AES_Encrypt| or a variant thereof.
|
||||
typedef void (*aes_block_f)(const uint8_t in[16], uint8_t out[16],
|
||||
const AES_KEY *key);
|
||||
int GFp_aes_block_is_aesni_encrypt(aes_block_f aes_block);
|
||||
|
||||
/* GCM definitions */
|
||||
// GCM definitions
|
||||
typedef struct { uint64_t hi,lo; } u128;
|
||||
|
||||
typedef void (*gcm128_gmult_f)(uint8_t Xi[16], const u128 Htable[16]);
|
||||
@ -74,11 +74,11 @@ typedef void (*gcm128_ghash_f)(uint8_t Xi[16], const u128 Htable[16],
|
||||
|
||||
#define GCM128_SERIALIZED_LEN (GCM128_HTABLE_LEN * 16)
|
||||
|
||||
/* This differs from OpenSSL's |gcm128_context| in that it does not have the
|
||||
* |key| pointer, in order to make it |memcpy|-friendly. See GFp/modes.h
|
||||
* for more info. */
|
||||
// This differs from OpenSSL's |gcm128_context| in that it does not have the
|
||||
// |key| pointer, in order to make it |memcpy|-friendly. See GFp/modes.h
|
||||
// for more info.
|
||||
struct gcm128_context {
|
||||
/* Following 6 names follow names in GCM specification */
|
||||
// Following 6 names follow names in GCM specification
|
||||
alignas(16) uint8_t Yi[16];
|
||||
alignas(16) uint8_t EKi[16];
|
||||
alignas(16) uint8_t EK0[16];
|
||||
@ -90,39 +90,39 @@ struct gcm128_context {
|
||||
uint64_t u[2];
|
||||
} H_unused;
|
||||
|
||||
/* Relative position of Xi, H and pre-computed Htable is used in some
|
||||
* assembler modules, i.e. don't change the order! */
|
||||
// Relative position of Xi, H and pre-computed Htable is used in some
|
||||
// assembler modules, i.e. don't change the order!
|
||||
u128 Htable[GCM128_HTABLE_LEN];
|
||||
|
||||
gcm128_gmult_f gmult;
|
||||
gcm128_ghash_f ghash;
|
||||
aes_block_f block;
|
||||
|
||||
/* use_aesni_gcm_crypt is true if this context should use the assembly
|
||||
* functions |aesni_gcm_encrypt| and |aesni_gcm_decrypt| to process data. */
|
||||
// use_aesni_gcm_crypt is true if this context should use the assembly
|
||||
// functions |aesni_gcm_encrypt| and |aesni_gcm_decrypt| to process data.
|
||||
unsigned use_aesni_gcm_crypt:1;
|
||||
};
|
||||
|
||||
#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
|
||||
/* GFp_gcm_clmul_enabled returns one if the CLMUL implementation of GCM is
|
||||
* used. */
|
||||
// GFp_gcm_clmul_enabled returns one if the CLMUL implementation of GCM is
|
||||
// used.
|
||||
int GFp_gcm_clmul_enabled(void);
|
||||
#endif
|
||||
|
||||
|
||||
/* CTR. */
|
||||
// CTR.
|
||||
|
||||
/* aes_ctr_f is the type of a function that performs CTR-mode encryption with
|
||||
* AES. */
|
||||
// aes_ctr_f is the type of a function that performs CTR-mode encryption with
|
||||
// AES.
|
||||
typedef void (*aes_ctr_f)(const uint8_t *in, uint8_t *out, size_t blocks,
|
||||
const AES_KEY *key, const uint8_t ivec[16]);
|
||||
|
||||
/* GCM.
|
||||
*
|
||||
* This API differs from the OpenSSL API slightly. The |GCM128_CONTEXT| does
|
||||
* not have a |key| pointer that points to the key as OpenSSL's version does.
|
||||
* Instead, every function takes a |key| parameter. This way |GCM128_CONTEXT|
|
||||
* can be safely copied. */
|
||||
// GCM.
|
||||
//
|
||||
// This API differs from the OpenSSL API slightly. The |GCM128_CONTEXT| does
|
||||
// not have a |key| pointer that points to the key as OpenSSL's version does.
|
||||
// Instead, every function takes a |key| parameter. This way |GCM128_CONTEXT|
|
||||
// can be safely copied.
|
||||
|
||||
typedef struct gcm128_context GCM128_CONTEXT;
|
||||
|
||||
@ -134,31 +134,31 @@ OPENSSL_EXPORT void GFp_gcm128_init(
|
||||
GCM128_CONTEXT *ctx, const AES_KEY *key, aes_block_f block,
|
||||
const uint8_t serialized_ctx[GCM128_SERIALIZED_LEN], const uint8_t *iv);
|
||||
|
||||
/* GFp_gcm128_aad sets the authenticated data for an instance of GCM. This must
|
||||
* be called before and data is encrypted. It returns one on success and zero
|
||||
* otherwise. */
|
||||
// GFp_gcm128_aad sets the authenticated data for an instance of GCM. This must
|
||||
// be called before and data is encrypted. It returns one on success and zero
|
||||
// otherwise.
|
||||
OPENSSL_EXPORT int GFp_gcm128_aad(GCM128_CONTEXT *ctx, const uint8_t *aad,
|
||||
size_t len);
|
||||
|
||||
/* GFp_gcm128_encrypt_ctr32 encrypts |len| bytes from |in| to |out| using a CTR
|
||||
* function that only handles the bottom 32 bits of the nonce, like
|
||||
* |GFp_ctr128_encrypt_ctr32|. The |key| must be the same key that was passed
|
||||
* to |GFp_gcm128_init|. It returns one on success and zero otherwise. */
|
||||
// GFp_gcm128_encrypt_ctr32 encrypts |len| bytes from |in| to |out| using a CTR
|
||||
// function that only handles the bottom 32 bits of the nonce, like
|
||||
// |GFp_ctr128_encrypt_ctr32|. The |key| must be the same key that was passed
|
||||
// to |GFp_gcm128_init|. It returns one on success and zero otherwise.
|
||||
OPENSSL_EXPORT int GFp_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
|
||||
const AES_KEY *key,
|
||||
const uint8_t *in, uint8_t *out,
|
||||
size_t len, aes_ctr_f stream);
|
||||
|
||||
/* GFp_gcm128_decrypt_ctr32 decrypts |len| bytes from |in| to |out| using a CTR
|
||||
* function that only handles the bottom 32 bits of the nonce, like
|
||||
* |GFp_ctr128_encrypt_ctr32|. The |key| must be the same key that was passed
|
||||
* to |GFp_gcm128_init|. It returns one on success and zero otherwise. */
|
||||
// GFp_gcm128_decrypt_ctr32 decrypts |len| bytes from |in| to |out| using a CTR
|
||||
// function that only handles the bottom 32 bits of the nonce, like
|
||||
// |GFp_ctr128_encrypt_ctr32|. The |key| must be the same key that was passed
|
||||
// to |GFp_gcm128_init|. It returns one on success and zero otherwise.
|
||||
OPENSSL_EXPORT int GFp_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
|
||||
const AES_KEY *key,
|
||||
const uint8_t *in, uint8_t *out,
|
||||
size_t len, aes_ctr_f stream);
|
||||
|
||||
/* GFp_gcm128_tag calculates the authenticator and copies it into |tag|. */
|
||||
// GFp_gcm128_tag calculates the authenticator and copies it into |tag|.
|
||||
OPENSSL_EXPORT void GFp_gcm128_tag(GCM128_CONTEXT *ctx, uint8_t tag[16]);
|
||||
|
||||
|
||||
@ -170,7 +170,7 @@ void GFp_aesni_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out,
|
||||
#endif
|
||||
|
||||
#if defined(__cplusplus)
|
||||
} /* extern C */
|
||||
} // extern C
|
||||
#endif
|
||||
|
||||
#endif /* OPENSSL_HEADER_MODES_INTERNAL_H */
|
||||
#endif // OPENSSL_HEADER_MODES_INTERNAL_H
|
||||
|
@ -21,16 +21,16 @@
|
||||
#include "../../internal.h"
|
||||
|
||||
|
||||
/* byte_reverse reverses the order of the bytes in |b->c|. */
|
||||
// byte_reverse reverses the order of the bytes in |b->c|.
|
||||
static void byte_reverse(polyval_block *b) {
|
||||
const uint64_t t = CRYPTO_bswap8(b->u[0]);
|
||||
b->u[0] = CRYPTO_bswap8(b->u[1]);
|
||||
b->u[1] = t;
|
||||
}
|
||||
|
||||
/* reverse_and_mulX_ghash interprets the bytes |b->c| as a reversed element of
|
||||
* the GHASH field, multiplies that by 'x' and serialises the result back into
|
||||
* |b|, but with GHASH's backwards bit ordering. */
|
||||
// reverse_and_mulX_ghash interprets the bytes |b->c| as a reversed element of
|
||||
// the GHASH field, multiplies that by 'x' and serialises the result back into
|
||||
// |b|, but with GHASH's backwards bit ordering.
|
||||
static void reverse_and_mulX_ghash(polyval_block *b) {
|
||||
uint64_t hi = b->u[0];
|
||||
uint64_t lo = b->u[1];
|
||||
@ -44,11 +44,11 @@ static void reverse_and_mulX_ghash(polyval_block *b) {
|
||||
b->u[1] = CRYPTO_bswap8(hi);
|
||||
}
|
||||
|
||||
/* POLYVAL(H, X_1, ..., X_n) =
|
||||
* ByteReverse(GHASH(mulX_GHASH(ByteReverse(H)), ByteReverse(X_1), ...,
|
||||
* ByteReverse(X_n))).
|
||||
*
|
||||
* See https://tools.ietf.org/html/draft-irtf-cfrg-gcmsiv-02#appendix-A. */
|
||||
// POLYVAL(H, X_1, ..., X_n) =
|
||||
// ByteReverse(GHASH(mulX_GHASH(ByteReverse(H)), ByteReverse(X_1), ...,
|
||||
// ByteReverse(X_n))).
|
||||
//
|
||||
// See https://tools.ietf.org/html/draft-irtf-cfrg-gcmsiv-02#appendix-A.
|
||||
|
||||
void CRYPTO_POLYVAL_init(struct polyval_ctx *ctx, const uint8_t key[16]) {
|
||||
polyval_block H;
|
||||
|
@ -143,7 +143,7 @@ extern "C" {
|
||||
|
||||
#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || defined(OPENSSL_ARM) || \
|
||||
defined(OPENSSL_AARCH64) || defined(OPENSSL_PPC64LE)
|
||||
/* OPENSSL_cpuid_setup initializes the platform-specific feature cache. */
|
||||
// GFp_cpuid_setup initializes the platform-specific feature cache.
|
||||
void GFp_cpuid_setup(void);
|
||||
#endif
|
||||
|
||||
@ -178,29 +178,29 @@ typedef __uint128_t uint128_t;
|
||||
#endif
|
||||
|
||||
|
||||
/* Constant-time utility functions.
|
||||
*
|
||||
* The following methods return a bitmask of all ones (0xff...f) for true and 0
|
||||
* for false. This is useful for choosing a value based on the result of a
|
||||
* conditional in constant time. For example,
|
||||
*
|
||||
* if (a < b) {
|
||||
* c = a;
|
||||
* } else {
|
||||
* c = b;
|
||||
* }
|
||||
*
|
||||
* can be written as
|
||||
*
|
||||
* crypto_word_t lt = constant_time_lt_w(a, b);
|
||||
* c = constant_time_select_w(lt, a, b); */
|
||||
// Constant-time utility functions.
|
||||
//
|
||||
// The following methods return a bitmask of all ones (0xff...f) for true and 0
|
||||
// for false. This is useful for choosing a value based on the result of a
|
||||
// conditional in constant time. For example,
|
||||
//
|
||||
// if (a < b) {
|
||||
// c = a;
|
||||
// } else {
|
||||
// c = b;
|
||||
// }
|
||||
//
|
||||
// can be written as
|
||||
//
|
||||
// crypto_word_t lt = constant_time_lt_w(a, b);
|
||||
// c = constant_time_select_w(lt, a, b);
|
||||
|
||||
/* crypto_word_t is the type that most constant-time functions use. Ideally we
|
||||
* would like it to be |size_t|, but NaCl builds in 64-bit mode with 32-bit
|
||||
* pointers, which means that |size_t| can be 32 bits when |BN_ULONG| is 64
|
||||
* bits. Since we want to be able to do constant-time operations on a
|
||||
* |BN_ULONG|, |crypto_word_t| is defined as an unsigned value with the native
|
||||
* word length. */
|
||||
// crypto_word_t is the type that most constant-time functions use. Ideally we
|
||||
// would like it to be |size_t|, but NaCl builds in 64-bit mode with 32-bit
|
||||
// pointers, which means that |size_t| can be 32 bits when |BN_ULONG| is 64
|
||||
// bits. Since we want to be able to do constant-time operations on a
|
||||
// |BN_ULONG|, |crypto_word_t| is defined as an unsigned value with the native
|
||||
// word length.
|
||||
#if defined(OPENSSL_64_BIT)
|
||||
typedef uint64_t crypto_word_t;
|
||||
#elif defined(OPENSSL_32_BIT)
|
||||
@ -214,26 +214,25 @@ typedef uint32_t crypto_word_t;
|
||||
#define CONSTTIME_TRUE_8 ((uint8_t)0xff)
|
||||
#define CONSTTIME_FALSE_8 ((uint8_t)0)
|
||||
|
||||
/* constant_time_msb_w returns the given value with the MSB copied to all the
|
||||
* other bits. */
|
||||
// constant_time_msb_w returns the given value with the MSB copied to all the
|
||||
// other bits.
|
||||
static inline crypto_word_t constant_time_msb_w(crypto_word_t a) {
|
||||
return 0u - (a >> (sizeof(a) * 8 - 1));
|
||||
}
|
||||
|
||||
/* constant_time_is_zero returns 0xff..f if a == 0 and 0 otherwise. */
|
||||
// constant_time_is_zero_w returns 0xff..f if a == 0 and 0 otherwise.
|
||||
static inline crypto_word_t constant_time_is_zero_w(crypto_word_t a) {
|
||||
/* Here is an SMT-LIB verification of this formula:
|
||||
*
|
||||
* (define-fun is_zero ((a (_ BitVec 32))) (_ BitVec 32)
|
||||
* (bvand (bvnot a) (bvsub a #x00000001))
|
||||
* )
|
||||
*
|
||||
* (declare-fun a () (_ BitVec 32))
|
||||
*
|
||||
* (assert (not (= (= #x00000001 (bvlshr (is_zero a) #x0000001f)) (= a #x00000000))))
|
||||
* (check-sat)
|
||||
* (get-model)
|
||||
*/
|
||||
// Here is an SMT-LIB verification of this formula:
|
||||
//
|
||||
// (define-fun is_zero ((a (_ BitVec 32))) (_ BitVec 32)
|
||||
// (bvand (bvnot a) (bvsub a #x00000001))
|
||||
// )
|
||||
//
|
||||
// (declare-fun a () (_ BitVec 32))
|
||||
//
|
||||
// (assert (not (= (= #x00000001 (bvlshr (is_zero a) #x0000001f)) (= a #x00000000))))
|
||||
// (check-sat)
|
||||
// (get-model)
|
||||
return constant_time_msb_w(~a & (a - 1));
|
||||
}
|
||||
|
||||
@ -241,42 +240,41 @@ static inline crypto_word_t constant_time_is_nonzero_w(crypto_word_t a) {
|
||||
return ~constant_time_is_zero_w(a);
|
||||
}
|
||||
|
||||
/* constant_time_is_zero_8 acts like |constant_time_is_zero_w| but returns an
|
||||
* 8-bit mask. */
|
||||
// constant_time_is_zero_8 acts like |constant_time_is_zero_w| but returns an
|
||||
// 8-bit mask.
|
||||
static inline uint8_t constant_time_is_zero_8(crypto_word_t a) {
|
||||
return (uint8_t)(constant_time_is_zero_w(a));
|
||||
}
|
||||
|
||||
/* constant_time_eq_s returns 0xff..f if a == b and 0 otherwise. */
|
||||
// constant_time_eq_w returns 0xff..f if a == b and 0 otherwise.
|
||||
static inline crypto_word_t constant_time_eq_w(crypto_word_t a,
|
||||
crypto_word_t b) {
|
||||
return constant_time_is_zero_w(a ^ b);
|
||||
}
|
||||
|
||||
/* constant_time_eq_int acts like |constant_time_eq_w| but works on int
|
||||
* values. */
|
||||
// constant_time_eq_int acts like |constant_time_eq_w| but works on int
|
||||
// values.
|
||||
static inline crypto_word_t constant_time_eq_int(int a, int b) {
|
||||
return constant_time_eq_w((crypto_word_t)(a), (crypto_word_t)(b));
|
||||
}
|
||||
|
||||
/* constant_time_select_w returns (mask & a) | (~mask & b). When |mask| is all
|
||||
* 1s or all 0s (as returned by the methods above), the select methods return
|
||||
* either |a| (if |mask| is nonzero) or |b| (if |mask| is zero). */
|
||||
// constant_time_select_w returns (mask & a) | (~mask & b). When |mask| is all
|
||||
// 1s or all 0s (as returned by the methods above), the select methods return
|
||||
// either |a| (if |mask| is nonzero) or |b| (if |mask| is zero).
|
||||
static inline crypto_word_t constant_time_select_w(crypto_word_t mask,
|
||||
crypto_word_t a,
|
||||
crypto_word_t b) {
|
||||
return (mask & a) | (~mask & b);
|
||||
}
|
||||
|
||||
/* from_be_u32_ptr returns the 32-bit big-endian-encoded value at |data|. */
|
||||
// from_be_u32_ptr returns the 32-bit big-endian-encoded value at |data|.
|
||||
static inline uint32_t from_be_u32_ptr(const uint8_t *data) {
|
||||
#if defined(__clang__) || defined(_MSC_VER)
|
||||
/* XXX: Unlike GCC, Clang doesn't optimize compliant access to unaligned data
|
||||
* well. See https://llvm.org/bugs/show_bug.cgi?id=20605,
|
||||
* https://llvm.org/bugs/show_bug.cgi?id=17603,
|
||||
* http://blog.regehr.org/archives/702, and
|
||||
* http://blog.regehr.org/archives/1055. MSVC seems to have similar problems.
|
||||
*/
|
||||
// XXX: Unlike GCC, Clang doesn't optimize compliant access to unaligned data
|
||||
// well. See https://llvm.org/bugs/show_bug.cgi?id=20605,
|
||||
// https://llvm.org/bugs/show_bug.cgi?id=17603,
|
||||
// http://blog.regehr.org/archives/702, and
|
||||
// http://blog.regehr.org/archives/1055. MSVC seems to have similar problems.
|
||||
uint32_t value;
|
||||
memcpy(&value, data, sizeof(value));
|
||||
#if OPENSSL_ENDIAN != OPENSSL_BIG_ENDIAN
|
||||
@ -292,15 +290,14 @@ static inline uint32_t from_be_u32_ptr(const uint8_t *data) {
|
||||
}
|
||||
|
||||
|
||||
/* from_be_u64_ptr returns the 64-bit big-endian-encoded value at |data|. */
|
||||
// from_be_u64_ptr returns the 64-bit big-endian-encoded value at |data|.
|
||||
static inline uint64_t from_be_u64_ptr(const uint8_t *data) {
|
||||
#if defined(__clang__) || defined(_MSC_VER)
|
||||
/* XXX: Unlike GCC, Clang doesn't optimize compliant access to unaligned data
|
||||
* well. See https://llvm.org/bugs/show_bug.cgi?id=20605,
|
||||
* https://llvm.org/bugs/show_bug.cgi?id=17603,
|
||||
* http://blog.regehr.org/archives/702, and
|
||||
* http://blog.regehr.org/archives/1055. MSVC seems to have similar problems.
|
||||
*/
|
||||
// XXX: Unlike GCC, Clang doesn't optimize compliant access to unaligned data
|
||||
// well. See https://llvm.org/bugs/show_bug.cgi?id=20605,
|
||||
// https://llvm.org/bugs/show_bug.cgi?id=17603,
|
||||
// http://blog.regehr.org/archives/702, and
|
||||
// http://blog.regehr.org/archives/1055. MSVC seems to have similar problems.
|
||||
uint64_t value;
|
||||
memcpy(&value, data, sizeof(value));
|
||||
#if OPENSSL_ENDIAN != OPENSSL_BIG_ENDIAN
|
||||
@ -319,16 +316,15 @@ static inline uint64_t from_be_u64_ptr(const uint8_t *data) {
|
||||
#endif
|
||||
}
|
||||
|
||||
/* to_be_u32_ptr writes the value |x| to the location |out| in big-endian
|
||||
order. */
|
||||
// to_be_u32_ptr writes the value |x| to the location |out| in big-endian
|
||||
// order.
|
||||
static inline void to_be_u32_ptr(uint8_t *out, uint32_t value) {
|
||||
#if defined(__clang__) || defined(_MSC_VER)
|
||||
/* XXX: Unlike GCC, Clang doesn't optimize compliant access to unaligned data
|
||||
* well. See https://llvm.org/bugs/show_bug.cgi?id=20605,
|
||||
* https://llvm.org/bugs/show_bug.cgi?id=17603,
|
||||
* http://blog.regehr.org/archives/702, and
|
||||
* http://blog.regehr.org/archives/1055. MSVC seems to have similar problems.
|
||||
*/
|
||||
// XXX: Unlike GCC, Clang doesn't optimize compliant access to unaligned data
|
||||
// well. See https://llvm.org/bugs/show_bug.cgi?id=20605,
|
||||
// https://llvm.org/bugs/show_bug.cgi?id=17603,
|
||||
// http://blog.regehr.org/archives/702, and
|
||||
// http://blog.regehr.org/archives/1055. MSVC seems to have similar problems.
|
||||
#if OPENSSL_ENDIAN != OPENSSL_BIG_ENDIAN
|
||||
value = bswap_u32(value);
|
||||
#endif
|
||||
@ -341,16 +337,15 @@ static inline void to_be_u32_ptr(uint8_t *out, uint32_t value) {
|
||||
#endif
|
||||
}
|
||||
|
||||
/* to_be_u64_ptr writes the value |value| to the location |out| in big-endian
|
||||
order. */
|
||||
// to_be_u64_ptr writes the value |value| to the location |out| in big-endian
|
||||
// order.
|
||||
static inline void to_be_u64_ptr(uint8_t *out, uint64_t value) {
|
||||
#if defined(__clang__) || defined(_MSC_VER)
|
||||
/* XXX: Unlike GCC, Clang doesn't optimize compliant access to unaligned data
|
||||
* well. See https://llvm.org/bugs/show_bug.cgi?id=20605,
|
||||
* https://llvm.org/bugs/show_bug.cgi?id=17603,
|
||||
* http://blog.regehr.org/archives/702, and
|
||||
* http://blog.regehr.org/archives/1055. MSVC seems to have similar problems.
|
||||
*/
|
||||
// XXX: Unlike GCC, Clang doesn't optimize compliant access to unaligned data
|
||||
// well. See https://llvm.org/bugs/show_bug.cgi?id=20605,
|
||||
// https://llvm.org/bugs/show_bug.cgi?id=17603,
|
||||
// http://blog.regehr.org/archives/702, and
|
||||
// http://blog.regehr.org/archives/1055. MSVC seems to have similar problems.
|
||||
#if OPENSSL_ENDIAN != OPENSSL_BIG_ENDIAN
|
||||
value = bswap_u64(value);
|
||||
#endif
|
||||
@ -367,8 +362,8 @@ static inline void to_be_u64_ptr(uint8_t *out, uint64_t value) {
|
||||
#endif
|
||||
}
|
||||
|
||||
/* from_be_u64 returns the native representation of the 64-bit
|
||||
* big-endian-encoded value |x|. */
|
||||
// from_be_u64 returns the native representation of the 64-bit
|
||||
// big-endian-encoded value |x|.
|
||||
static inline uint64_t from_be_u64(uint64_t x) {
|
||||
#if OPENSSL_ENDIAN != OPENSSL_BIG_ENDIAN
|
||||
x = bswap_u64(x);
|
||||
@ -378,7 +373,7 @@ static inline uint64_t from_be_u64(uint64_t x) {
|
||||
|
||||
|
||||
#if defined(__cplusplus)
|
||||
} /* extern C */
|
||||
} // extern C
|
||||
#endif
|
||||
|
||||
#endif /* OPENSSL_HEADER_CRYPTO_INTERNAL_H */
|
||||
#endif // OPENSSL_HEADER_CRYPTO_INTERNAL_H
|
||||
|
@ -54,7 +54,7 @@
|
||||
#define OPENSSL_HEADER_BASE_H
|
||||
|
||||
|
||||
/* This file should be the first included by all BoringSSL headers. */
|
||||
// This file should be the first included by all BoringSSL headers.
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
@ -92,10 +92,10 @@ extern "C" {
|
||||
#elif defined(__myriad2__)
|
||||
#define OPENSSL_32_BIT
|
||||
#else
|
||||
/* Note BoringSSL only supports standard 32-bit and 64-bit two's-complement,
|
||||
* little-endian architectures. Functions will not produce the correct answer
|
||||
* on other systems. Run the crypto_test binary, notably
|
||||
* crypto/compiler_test.cc, before adding a new architecture. */
|
||||
// Note BoringSSL only supports standard 32-bit and 64-bit two's-complement,
|
||||
// little-endian architectures. Functions will not produce the correct answer
|
||||
// on other systems. Run the crypto_test binary, notably
|
||||
// crypto/compiler_test.cc, before adding a new architecture.
|
||||
#error "Unknown target CPU"
|
||||
#endif
|
||||
|
||||
@ -111,9 +111,9 @@ extern "C" {
|
||||
#define OPENSSL_IS_RING
|
||||
#define OPENSSL_VERSION_NUMBER 0x10002000
|
||||
|
||||
/* *ring* doesn't support the `BORINGSSL_SHARED_LIBRARY` configuration, so
|
||||
* the default (usually "hidden") visibility is always used, even for exported
|
||||
* items. */
|
||||
// *ring* doesn't support the `BORINGSSL_SHARED_LIBRARY` configuration, so
|
||||
// the default (usually "hidden") visibility is always used, even for exported
|
||||
// items.
|
||||
#define OPENSSL_EXPORT
|
||||
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
@ -127,7 +127,7 @@ typedef struct bignum_st BIGNUM;
|
||||
|
||||
|
||||
#if defined(__cplusplus)
|
||||
} /* extern C */
|
||||
} // extern C
|
||||
#endif
|
||||
|
||||
#endif /* OPENSSL_HEADER_BASE_H */
|
||||
#endif // OPENSSL_HEADER_BASE_H
|
||||
|
Loading…
x
Reference in New Issue
Block a user