Factor out gfp_little_endian_bytes_from_scalar from ecp_nistz256.

Factor out duplicate code. Parameterize the code on the size of the scalar it works on and move it to a place that can be used by the upcoming ecp_nistz384.
2016-07-07 21:13:23 -10:00 · 2016-07-07 21:13:23 -10:00 · ed27bbb24d
commit ed27bbb24d
parent fecb4c95df
5 changed files with 71 additions and 41 deletions
--- a/crypto/ec/ecp_nistz.c
+++ b/crypto/ec/ecp_nistz.c
@ -0,0 +1,55 @@
+/* Copyright (c) 2014, Intel Corporation.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include "ecp_nistz.h"
+
+#include <assert.h>
+
+#include <openssl/bn.h>
+
+#include "../bn/internal.h"
+
+
+/* Fills |str| with the bytewise little-endian encoding of |scalar|, where
+ * |scalar| has |num_limbs| limbs. |str| is padded with zeros at the end up
+ * to |str_len| bytes. Actually, |str_len| must be exactly one byte more than
+ * needed to encode |num_limbs| losslessly, so that there is an extra byte at
+ * the end. The extra byte is useful because the caller will be breaking |str|
+ * up into windows of a number of bytes (5 or 7) that isn't divisible by 8, and
+ * so it is useful for it to be able to read an extra zero byte. */
+void gfp_little_endian_bytes_from_scalar(uint8_t str[], size_t str_len,
+                                         const BN_ULONG scalar[],
+                                         size_t num_limbs) {
+  assert(str_len == (num_limbs * BN_BYTES) + 1);
+
+  size_t i;
+  for (i = 0; i < num_limbs * BN_BYTES; i += BN_BYTES) {
+    BN_ULONG d = scalar[i / BN_BYTES];
+
+    str[i + 0] = d & 0xff;
+    str[i + 1] = (d >> 8) & 0xff;
+    str[i + 2] = (d >> 16) & 0xff;
+    str[i + 3] = (d >>= 24) & 0xff;
+    if (BN_BYTES == 8) {
+      d >>= 8;
+      str[i + 4] = d & 0xff;
+      str[i + 5] = (d >> 8) & 0xff;
+      str[i + 6] = (d >> 16) & 0xff;
+      str[i + 7] = (d >> 24) & 0xff;
+    }
+  }
+  for (; i < str_len; i++) {
+    str[i] = 0;
+  }
+}
--- a/crypto/ec/ecp_nistz.h
+++ b/crypto/ec/ecp_nistz.h
@ -121,6 +121,11 @@ static inline void booth_recode(BN_ULONG *is_negative, unsigned *digit,
 }


+void gfp_little_endian_bytes_from_scalar(uint8_t str[], size_t str_len,
+                                         const BN_ULONG scalar[],
+                                         size_t num_limbs);
+
+
 #if defined(__cplusplus)
 }
 #endif
--- a/crypto/ec/ecp_nistz256.c
+++ b/crypto/ec/ecp_nistz256.c
@ -93,32 +93,14 @@ void ecp_nistz256_point_mul(P256_POINT *r, const BN_ULONG p_scalar[P256_LIMBS],
  static const unsigned kWindowSize = 5;
  static const unsigned kMask = (1 << (5 /* kWindowSize */ + 1)) - 1;

+  uint8_t p_str[(P256_LIMBS * BN_BYTES) + 1];
+  gfp_little_endian_bytes_from_scalar(p_str, sizeof(p_str) / sizeof(p_str[0]),
+                                      p_scalar, P256_LIMBS);
+
  /* A |P256_POINT| is (3 * 32) = 96 bytes, and the 64-byte alignment should
   * add no more than 63 bytes of overhead. Thus, |table| should require
   * ~1599 ((96 * 16) + 63) bytes of stack space. */
  alignas(64) P256_POINT table[16];
-  uint8_t p_str[33];
-
-  int j;
-  for (j = 0; j < P256_LIMBS * BN_BYTES; j += BN_BYTES) {
-    BN_ULONG d = p_scalar[j / BN_BYTES];
-
-    p_str[j + 0] = d & 0xff;
-    p_str[j + 1] = (d >> 8) & 0xff;
-    p_str[j + 2] = (d >> 16) & 0xff;
-    p_str[j + 3] = (d >>= 24) & 0xff;
-    if (BN_BYTES == 8) {
-      d >>= 8;
-      p_str[j + 4] = d & 0xff;
-      p_str[j + 5] = (d >> 8) & 0xff;
-      p_str[j + 6] = (d >> 16) & 0xff;
-      p_str[j + 7] = (d >> 24) & 0xff;
-    }
-  }
-
-  for (; j < 33; j++) {
-    p_str[j] = 0;
-  }

  /* table[0] is implicitly (0,0,0) (the point at infinity), therefore it is
   * not stored. All other values are actually stored with an offset of -1 in
@ -211,6 +193,10 @@ void ecp_nistz256_point_mul_base(P256_POINT *r,
  static const unsigned kWindowSize = 7;
  static const unsigned kMask = (1 << (7 /* kWindowSize */ + 1)) - 1;

+  uint8_t p_str[(P256_LIMBS * BN_BYTES) + 1];
+  gfp_little_endian_bytes_from_scalar(p_str, sizeof(p_str) / sizeof(p_str[0]),
+                                      g_scalar, P256_LIMBS);
+
  typedef union {
    P256_POINT p;
    P256_POINT_AFFINE a;
@ -219,24 +205,6 @@ void ecp_nistz256_point_mul_base(P256_POINT *r,
  alignas(32) P256_POINT_UNION p;
  alignas(32) P256_POINT_UNION t;

-  uint8_t p_str[33] = {0};
-  int i;
-  for (i = 0; i < P256_LIMBS * BN_BYTES; i += BN_BYTES) {
-    BN_ULONG d = g_scalar[i / BN_BYTES];
-
-    p_str[i + 0] = d & 0xff;
-    p_str[i + 1] = (d >> 8) & 0xff;
-    p_str[i + 2] = (d >> 16) & 0xff;
-    p_str[i + 3] = (d >>= 24) & 0xff;
-    if (BN_BYTES == 8) {
-      d >>= 8;
-      p_str[i + 4] = d & 0xff;
-      p_str[i + 5] = (d >> 8) & 0xff;
-      p_str[i + 6] = (d >> 16) & 0xff;
-      p_str[i + 7] = (d >> 24) & 0xff;
-    }
-  }
-
  /* First window */
  unsigned index = kWindowSize;

@ -255,7 +223,7 @@ void ecp_nistz256_point_mul_base(P256_POINT *r,

  memcpy(p.p.Z, ONE, sizeof(ONE));

-  for (i = 1; i < 37; i++) {
+  for (size_t i = 1; i < 37; i++) {
    unsigned off = (index - 1) / 8;
    raw_wvalue = p_str[off] | p_str[off + 1] << 8;
    raw_wvalue = (raw_wvalue >> ((index - 1) % 8)) & kMask;
--- a/crypto/libring.Windows.vcxproj
+++ b/crypto/libring.Windows.vcxproj
@ -37,6 +37,7 @@
    <ClCompile Include="ec\ec.c" />
    <ClCompile Include="ec\ec_curves.c" />
    <ClCompile Include="ec\ec_montgomery.c" />
+    <ClCompile Include="ec\ecp_nistz.c" />
    <ClCompile Include="ec\ecp_nistz256.c" />
    <ClCompile Include="ec\gfp_constant_time.c" />
    <ClCompile Include="ec\gfp_p256.c" />
--- a/mk/ring.mk
+++ b/mk/ring.mk
@ -38,6 +38,7 @@ RING_SRCS = $(addprefix $(RING_PREFIX), \
  crypto/ec/ec.c \
  crypto/ec/ec_curves.c \
  crypto/ec/ec_montgomery.c \
+  crypto/ec/ecp_nistz.c \
  crypto/ec/ecp_nistz256.c \
  crypto/ec/gfp_constant_time.c \
  crypto/ec/gfp_p256.c \