Factor out gfp_little_endian_bytes_from_scalar from ecp_nistz256.

Factor out duplicate code. Parameterize the code on the size of the
scalar it works on and move it to a place that can be used by the
upcoming ecp_nistz384.
This commit is contained in:
Brian Smith 2016-07-07 21:13:23 -10:00
parent fecb4c95df
commit ed27bbb24d
5 changed files with 71 additions and 41 deletions

55
crypto/ec/ecp_nistz.c Normal file
View File

@ -0,0 +1,55 @@
/* Copyright (c) 2014, Intel Corporation.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
#include "ecp_nistz.h"
#include <assert.h>
#include <openssl/bn.h>
#include "../bn/internal.h"
/* Fills |str| with the bytewise little-endian encoding of |scalar|, where
* |scalar| has |num_limbs| limbs. |str| is padded with zeros at the end up
* to |str_len| bytes. Actually, |str_len| must be exactly one byte more than
* needed to encode |num_limbs| losslessly, so that there is an extra byte at
* the end. The extra byte is useful because the caller will be breaking |str|
* up into windows of a number of bytes (5 or 7) that isn't divisible by 8, and
* so it is useful for it to be able to read an extra zero byte. */
void gfp_little_endian_bytes_from_scalar(uint8_t str[], size_t str_len,
const BN_ULONG scalar[],
size_t num_limbs) {
assert(str_len == (num_limbs * BN_BYTES) + 1);
size_t i;
for (i = 0; i < num_limbs * BN_BYTES; i += BN_BYTES) {
BN_ULONG d = scalar[i / BN_BYTES];
str[i + 0] = d & 0xff;
str[i + 1] = (d >> 8) & 0xff;
str[i + 2] = (d >> 16) & 0xff;
str[i + 3] = (d >>= 24) & 0xff;
if (BN_BYTES == 8) {
d >>= 8;
str[i + 4] = d & 0xff;
str[i + 5] = (d >> 8) & 0xff;
str[i + 6] = (d >> 16) & 0xff;
str[i + 7] = (d >> 24) & 0xff;
}
}
for (; i < str_len; i++) {
str[i] = 0;
}
}

View File

@ -121,6 +121,11 @@ static inline void booth_recode(BN_ULONG *is_negative, unsigned *digit,
}
void gfp_little_endian_bytes_from_scalar(uint8_t str[], size_t str_len,
const BN_ULONG scalar[],
size_t num_limbs);
#if defined(__cplusplus)
}
#endif

View File

@ -93,32 +93,14 @@ void ecp_nistz256_point_mul(P256_POINT *r, const BN_ULONG p_scalar[P256_LIMBS],
static const unsigned kWindowSize = 5;
static const unsigned kMask = (1 << (5 /* kWindowSize */ + 1)) - 1;
uint8_t p_str[(P256_LIMBS * BN_BYTES) + 1];
gfp_little_endian_bytes_from_scalar(p_str, sizeof(p_str) / sizeof(p_str[0]),
p_scalar, P256_LIMBS);
/* A |P256_POINT| is (3 * 32) = 96 bytes, and the 64-byte alignment should
* add no more than 63 bytes of overhead. Thus, |table| should require
* ~1599 ((96 * 16) + 63) bytes of stack space. */
alignas(64) P256_POINT table[16];
uint8_t p_str[33];
int j;
for (j = 0; j < P256_LIMBS * BN_BYTES; j += BN_BYTES) {
BN_ULONG d = p_scalar[j / BN_BYTES];
p_str[j + 0] = d & 0xff;
p_str[j + 1] = (d >> 8) & 0xff;
p_str[j + 2] = (d >> 16) & 0xff;
p_str[j + 3] = (d >>= 24) & 0xff;
if (BN_BYTES == 8) {
d >>= 8;
p_str[j + 4] = d & 0xff;
p_str[j + 5] = (d >> 8) & 0xff;
p_str[j + 6] = (d >> 16) & 0xff;
p_str[j + 7] = (d >> 24) & 0xff;
}
}
for (; j < 33; j++) {
p_str[j] = 0;
}
/* table[0] is implicitly (0,0,0) (the point at infinity), therefore it is
* not stored. All other values are actually stored with an offset of -1 in
@ -211,6 +193,10 @@ void ecp_nistz256_point_mul_base(P256_POINT *r,
static const unsigned kWindowSize = 7;
static const unsigned kMask = (1 << (7 /* kWindowSize */ + 1)) - 1;
uint8_t p_str[(P256_LIMBS * BN_BYTES) + 1];
gfp_little_endian_bytes_from_scalar(p_str, sizeof(p_str) / sizeof(p_str[0]),
g_scalar, P256_LIMBS);
typedef union {
P256_POINT p;
P256_POINT_AFFINE a;
@ -219,24 +205,6 @@ void ecp_nistz256_point_mul_base(P256_POINT *r,
alignas(32) P256_POINT_UNION p;
alignas(32) P256_POINT_UNION t;
uint8_t p_str[33] = {0};
int i;
for (i = 0; i < P256_LIMBS * BN_BYTES; i += BN_BYTES) {
BN_ULONG d = g_scalar[i / BN_BYTES];
p_str[i + 0] = d & 0xff;
p_str[i + 1] = (d >> 8) & 0xff;
p_str[i + 2] = (d >> 16) & 0xff;
p_str[i + 3] = (d >>= 24) & 0xff;
if (BN_BYTES == 8) {
d >>= 8;
p_str[i + 4] = d & 0xff;
p_str[i + 5] = (d >> 8) & 0xff;
p_str[i + 6] = (d >> 16) & 0xff;
p_str[i + 7] = (d >> 24) & 0xff;
}
}
/* First window */
unsigned index = kWindowSize;
@ -255,7 +223,7 @@ void ecp_nistz256_point_mul_base(P256_POINT *r,
memcpy(p.p.Z, ONE, sizeof(ONE));
for (i = 1; i < 37; i++) {
for (size_t i = 1; i < 37; i++) {
unsigned off = (index - 1) / 8;
raw_wvalue = p_str[off] | p_str[off + 1] << 8;
raw_wvalue = (raw_wvalue >> ((index - 1) % 8)) & kMask;

View File

@ -37,6 +37,7 @@
<ClCompile Include="ec\ec.c" />
<ClCompile Include="ec\ec_curves.c" />
<ClCompile Include="ec\ec_montgomery.c" />
<ClCompile Include="ec\ecp_nistz.c" />
<ClCompile Include="ec\ecp_nistz256.c" />
<ClCompile Include="ec\gfp_constant_time.c" />
<ClCompile Include="ec\gfp_p256.c" />

View File

@ -38,6 +38,7 @@ RING_SRCS = $(addprefix $(RING_PREFIX), \
crypto/ec/ec.c \
crypto/ec/ec_curves.c \
crypto/ec/ec_montgomery.c \
crypto/ec/ecp_nistz.c \
crypto/ec/ecp_nistz256.c \
crypto/ec/gfp_constant_time.c \
crypto/ec/gfp_p256.c \