Merge BoringSSL 884086e: Remove x86_64 x25519 assembly.

2018-05-01 13:02:12 -10:00 · 2018-05-01 13:02:12 -10:00 · 82804e9460
commit 82804e9460
parent 8b99aa0cd8 884086e0e2
5 changed files with 0 additions and 2190 deletions
--- a/build.rs
+++ b/build.rs
@ -91,8 +91,6 @@ const RING_SRCS: &'static [(&'static [&'static str], &'static str)] = &[
    (&[X86], "crypto/fipsmodule/sha/asm/sha256-586.pl"),
    (&[X86], "crypto/fipsmodule/sha/asm/sha512-586.pl"),

-    (&[X86_64], "crypto/curve25519/x25519-x86_64.c"),
-
    (&[X86_64], "crypto/fipsmodule/aes/asm/aes-x86_64.pl"),
    (&[X86_64], "crypto/fipsmodule/aes/asm/aesni-x86_64.pl"),
    (&[X86_64], "crypto/fipsmodule/aes/asm/bsaes-x86_64.pl"),
@ -101,7 +99,6 @@ const RING_SRCS: &'static [(&'static [&'static str], &'static str)] = &[
    (&[X86_64], "crypto/fipsmodule/bn/asm/x86_64-mont5.pl"),
    (&[X86_64], "crypto/chacha/asm/chacha-x86_64.pl"),
    (&[NEVER], "crypto/cipher_extra/asm/aes128gcmsiv-x86_64.pl"),
-    (&[X86_64], "crypto/curve25519/asm/x25519-asm-x86_64.S"),
    (&[X86_64], "crypto/fipsmodule/ec/asm/ecp_nistz256-x86_64.pl"),
    (&[X86_64], "crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl"),
    (&[X86_64], "crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl"),
--- a/crypto/curve25519/asm/x25519-asm-x86_64.S
+++ b/crypto/curve25519/asm/x25519-asm-x86_64.S
--- a/crypto/curve25519/x25519-x86_64.c
+++ b/crypto/curve25519/x25519-x86_64.c
@ -1,245 +0,0 @@
-/* Copyright (c) 2015, Google Inc.
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
-
-// This code is mostly taken from the ref10 version of Ed25519 in SUPERCOP
-// 20141124 (http://bench.cr.yp.to/supercop.html). That code is released as
-// public domain but this file has the ISC license just to keep licencing
-// simple.
-//
-// The field functions are shared by Ed25519 and X25519 where possible.
-
-#include <string.h>
-
-#include "../internal.h"
-#include "../../third_party/fiat/internal.h"
-
-
-#if defined(BORINGSSL_X25519_X86_64)
-
-typedef struct { uint64_t v[5]; } fe25519;
-
-// These functions are defined in asm/x25519-x86_64.S.
-void GFp_x25519_x86_64_work_cswap(fe25519 *, uint64_t);
-void GFp_x25519_x86_64_mul(fe25519 *out, const fe25519 *a, const fe25519 *b);
-void GFp_x25519_x86_64_square(fe25519 *out, const fe25519 *a);
-void GFp_x25519_x86_64_freeze(fe25519 *);
-void GFp_x25519_x86_64_ladderstep(fe25519 *work);
-
-static void fe25519_setint(fe25519 *r, unsigned v) {
-  r->v[0] = v;
-  r->v[1] = 0;
-  r->v[2] = 0;
-  r->v[3] = 0;
-  r->v[4] = 0;
-}
-
-// Assumes input x being reduced below 2^255
-static void fe25519_pack(unsigned char r[32], const fe25519 *x) {
-  fe25519 t;
-  t = *x;
-  GFp_x25519_x86_64_freeze(&t);
-
-  r[0] = (uint8_t)(t.v[0] & 0xff);
-  r[1] = (uint8_t)((t.v[0] >> 8) & 0xff);
-  r[2] = (uint8_t)((t.v[0] >> 16) & 0xff);
-  r[3] = (uint8_t)((t.v[0] >> 24) & 0xff);
-  r[4] = (uint8_t)((t.v[0] >> 32) & 0xff);
-  r[5] = (uint8_t)((t.v[0] >> 40) & 0xff);
-  r[6] = (uint8_t)((t.v[0] >> 48));
-
-  r[6] ^= (uint8_t)((t.v[1] << 3) & 0xf8);
-  r[7] = (uint8_t)((t.v[1] >> 5) & 0xff);
-  r[8] = (uint8_t)((t.v[1] >> 13) & 0xff);
-  r[9] = (uint8_t)((t.v[1] >> 21) & 0xff);
-  r[10] = (uint8_t)((t.v[1] >> 29) & 0xff);
-  r[11] = (uint8_t)((t.v[1] >> 37) & 0xff);
-  r[12] = (uint8_t)((t.v[1] >> 45));
-
-  r[12] ^= (uint8_t)((t.v[2] << 6) & 0xc0);
-  r[13] = (uint8_t)((t.v[2] >> 2) & 0xff);
-  r[14] = (uint8_t)((t.v[2] >> 10) & 0xff);
-  r[15] = (uint8_t)((t.v[2] >> 18) & 0xff);
-  r[16] = (uint8_t)((t.v[2] >> 26) & 0xff);
-  r[17] = (uint8_t)((t.v[2] >> 34) & 0xff);
-  r[18] = (uint8_t)((t.v[2] >> 42) & 0xff);
-  r[19] = (uint8_t)((t.v[2] >> 50));
-
-  r[19] ^= (uint8_t)((t.v[3] << 1) & 0xfe);
-  r[20] = (uint8_t)((t.v[3] >> 7) & 0xff);
-  r[21] = (uint8_t)((t.v[3] >> 15) & 0xff);
-  r[22] = (uint8_t)((t.v[3] >> 23) & 0xff);
-  r[23] = (uint8_t)((t.v[3] >> 31) & 0xff);
-  r[24] = (uint8_t)((t.v[3] >> 39) & 0xff);
-  r[25] = (uint8_t)((t.v[3] >> 47));
-
-  r[25] ^= (uint8_t)((t.v[4] << 4) & 0xf0);
-  r[26] = (uint8_t)((t.v[4] >> 4) & 0xff);
-  r[27] = (uint8_t)((t.v[4] >> 12) & 0xff);
-  r[28] = (uint8_t)((t.v[4] >> 20) & 0xff);
-  r[29] = (uint8_t)((t.v[4] >> 28) & 0xff);
-  r[30] = (uint8_t)((t.v[4] >> 36) & 0xff);
-  r[31] = (uint8_t)((t.v[4] >> 44));
-}
-
-static void fe25519_unpack(fe25519 *r, const uint8_t x[32]) {
-  r->v[0] = x[0];
-  r->v[0] += (uint64_t)x[1] << 8;
-  r->v[0] += (uint64_t)x[2] << 16;
-  r->v[0] += (uint64_t)x[3] << 24;
-  r->v[0] += (uint64_t)x[4] << 32;
-  r->v[0] += (uint64_t)x[5] << 40;
-  r->v[0] += ((uint64_t)x[6] & 7) << 48;
-
-  r->v[1] = x[6] >> 3;
-  r->v[1] += (uint64_t)x[7] << 5;
-  r->v[1] += (uint64_t)x[8] << 13;
-  r->v[1] += (uint64_t)x[9] << 21;
-  r->v[1] += (uint64_t)x[10] << 29;
-  r->v[1] += (uint64_t)x[11] << 37;
-  r->v[1] += ((uint64_t)x[12] & 63) << 45;
-
-  r->v[2] = x[12] >> 6;
-  r->v[2] += (uint64_t)x[13] << 2;
-  r->v[2] += (uint64_t)x[14] << 10;
-  r->v[2] += (uint64_t)x[15] << 18;
-  r->v[2] += (uint64_t)x[16] << 26;
-  r->v[2] += (uint64_t)x[17] << 34;
-  r->v[2] += (uint64_t)x[18] << 42;
-  r->v[2] += ((uint64_t)x[19] & 1) << 50;
-
-  r->v[3] = x[19] >> 1;
-  r->v[3] += (uint64_t)x[20] << 7;
-  r->v[3] += (uint64_t)x[21] << 15;
-  r->v[3] += (uint64_t)x[22] << 23;
-  r->v[3] += (uint64_t)x[23] << 31;
-  r->v[3] += (uint64_t)x[24] << 39;
-  r->v[3] += ((uint64_t)x[25] & 15) << 47;
-
-  r->v[4] = x[25] >> 4;
-  r->v[4] += (uint64_t)x[26] << 4;
-  r->v[4] += (uint64_t)x[27] << 12;
-  r->v[4] += (uint64_t)x[28] << 20;
-  r->v[4] += (uint64_t)x[29] << 28;
-  r->v[4] += (uint64_t)x[30] << 36;
-  r->v[4] += ((uint64_t)x[31] & 127) << 44;
-}
-
-static void fe25519_invert(fe25519 *r, const fe25519 *x) {
-  fe25519 z2;
-  fe25519 z9;
-  fe25519 z11;
-  fe25519 z2_5_0;
-  fe25519 z2_10_0;
-  fe25519 z2_20_0;
-  fe25519 z2_50_0;
-  fe25519 z2_100_0;
-  fe25519 t;
-  int i;
-
-  /* 2 */ GFp_x25519_x86_64_square(&z2, x);
-  /* 4 */ GFp_x25519_x86_64_square(&t, &z2);
-  /* 8 */ GFp_x25519_x86_64_square(&t, &t);
-  /* 9 */ GFp_x25519_x86_64_mul(&z9, &t, x);
-  /* 11 */ GFp_x25519_x86_64_mul(&z11, &z9, &z2);
-  /* 22 */ GFp_x25519_x86_64_square(&t, &z11);
-  /* 2^5 - 2^0 = 31 */ GFp_x25519_x86_64_mul(&z2_5_0, &t, &z9);
-
-  /* 2^6 - 2^1 */ GFp_x25519_x86_64_square(&t, &z2_5_0);
-  /* 2^20 - 2^10 */ for (i = 1; i < 5; i++) { GFp_x25519_x86_64_square(&t, &t); }
-  /* 2^10 - 2^0 */ GFp_x25519_x86_64_mul(&z2_10_0, &t, &z2_5_0);
-
-  /* 2^11 - 2^1 */ GFp_x25519_x86_64_square(&t, &z2_10_0);
-  /* 2^20 - 2^10 */ for (i = 1; i < 10; i++) { GFp_x25519_x86_64_square(&t, &t); }
-  /* 2^20 - 2^0 */ GFp_x25519_x86_64_mul(&z2_20_0, &t, &z2_10_0);
-
-  /* 2^21 - 2^1 */ GFp_x25519_x86_64_square(&t, &z2_20_0);
-  /* 2^40 - 2^20 */ for (i = 1; i < 20; i++) { GFp_x25519_x86_64_square(&t, &t); }
-  /* 2^40 - 2^0 */ GFp_x25519_x86_64_mul(&t, &t, &z2_20_0);
-
-  /* 2^41 - 2^1 */ GFp_x25519_x86_64_square(&t, &t);
-  /* 2^50 - 2^10 */ for (i = 1; i < 10; i++) { GFp_x25519_x86_64_square(&t, &t); }
-  /* 2^50 - 2^0 */ GFp_x25519_x86_64_mul(&z2_50_0, &t, &z2_10_0);
-
-  /* 2^51 - 2^1 */ GFp_x25519_x86_64_square(&t, &z2_50_0);
-  /* 2^100 - 2^50 */ for (i = 1; i < 50; i++) { GFp_x25519_x86_64_square(&t, &t); }
-  /* 2^100 - 2^0 */ GFp_x25519_x86_64_mul(&z2_100_0, &t, &z2_50_0);
-
-  /* 2^101 - 2^1 */ GFp_x25519_x86_64_square(&t, &z2_100_0);
-  /* 2^200 - 2^100 */ for (i = 1; i < 100; i++) {
-    GFp_x25519_x86_64_square(&t, &t);
-  }
-  /* 2^200 - 2^0 */ GFp_x25519_x86_64_mul(&t, &t, &z2_100_0);
-
-  /* 2^201 - 2^1 */ GFp_x25519_x86_64_square(&t, &t);
-  /* 2^250 - 2^50 */ for (i = 1; i < 50; i++) { GFp_x25519_x86_64_square(&t, &t); }
-  /* 2^250 - 2^0 */ GFp_x25519_x86_64_mul(&t, &t, &z2_50_0);
-
-  /* 2^251 - 2^1 */ GFp_x25519_x86_64_square(&t, &t);
-  /* 2^252 - 2^2 */ GFp_x25519_x86_64_square(&t, &t);
-  /* 2^253 - 2^3 */ GFp_x25519_x86_64_square(&t, &t);
-
-  /* 2^254 - 2^4 */ GFp_x25519_x86_64_square(&t, &t);
-
-  /* 2^255 - 2^5 */ GFp_x25519_x86_64_square(&t, &t);
-  /* 2^255 - 21 */ GFp_x25519_x86_64_mul(r, &t, &z11);
-}
-
-static void mladder(fe25519 *xr, fe25519 *zr, const uint8_t s[32]) {
-  fe25519 work[5];
-
-  work[0] = *xr;
-  fe25519_setint(work + 1, 1);
-  fe25519_setint(work + 2, 0);
-  work[3] = *xr;
-  fe25519_setint(work + 4, 1);
-
-  int i, j;
-  uint8_t prevbit = 0;
-
-  j = 6;
-  for (i = 31; i >= 0; i--) {
-    while (j >= 0) {
-      const uint8_t bit = 1 & (s[i] >> j);
-      const uint64_t swap = bit ^ prevbit;
-      prevbit = bit;
-      GFp_x25519_x86_64_work_cswap(work + 1, swap);
-      GFp_x25519_x86_64_ladderstep(work);
-      j -= 1;
-    }
-    j = 7;
-  }
-
-  *xr = work[1];
-  *zr = work[2];
-}
-
-void GFp_x25519_x86_64(uint8_t out[32], const uint8_t scalar[32],
-                       const uint8_t point[32]) {
-  uint8_t e[32];
-  memcpy(e, scalar, sizeof(e));
-
-  e[0] &= 248;
-  e[31] &= 127;
-  e[31] |= 64;
-
-  fe25519 t;
-  fe25519 z;
-  fe25519_unpack(&t, point);
-  mladder(&t, &z, e);
-  fe25519_invert(&z, &z);
-  GFp_x25519_x86_64_mul(&t, &t, &z);
-  fe25519_pack(out, &t);
-}
-
-#endif  // BORINGSSL_X25519_X86_64
--- a/third_party/fiat/curve25519.c
+++ b/third_party/fiat/curve25519.c
@ -513,8 +513,6 @@ static void fe_sq_tt(fe *h, const fe *f) {
  fe_sqr_impl(h->v, f->v);
 }

-#if !defined(BORINGSSL_X25519_X86_64)
-
 // Replace (f,g) with (g,f) if b == 1;
 // replace (f,g) with (f,g) if b == 0.
 //
@ -590,8 +588,6 @@ static void fe_mul121666(fe *h, const fe_loose *f) {
  assert_fe(h->v);
 }

-#endif  // !BORINGSSL_X25519_X86_64
-
 // Adapted from Fiat-synthesized |fe_sub_impl| with |out| = 0.
 static void fe_neg_impl(uint64_t out[5], const uint64_t in2[5]) {
  { const uint64_t x10 = 0;
@ -1202,8 +1198,6 @@ static void fe_sq_tt(fe *h, const fe *f) {
  fe_sqr_impl(h->v, f->v);
 }

-#if !defined(BORINGSSL_X25519_X86_64)
-
 // Replace (f,g) with (g,f) if b == 1;
 // replace (f,g) with (f,g) if b == 0.
 //
@ -1343,8 +1337,6 @@ static void fe_mul121666(fe *h, const fe_loose *f) {
  assert_fe(h->v);
 }

-#endif  // !BORINGSSL_X25519_X86_64
-
 // Adapted from Fiat-synthesized |fe_sub_impl| with |out| = 0.
 static void fe_neg_impl(uint32_t out[10], const uint32_t in2[10]) {
  { const uint32_t x20 = 0;
@ -2866,14 +2858,6 @@ static void sc_muladd(uint8_t *s, const uint8_t *a, const uint8_t *b,
  s[31] = s11 >> 17;
 }

-#if defined(BORINGSSL_X25519_X86_64)
-
-static void x25519_scalar_mult(uint8_t out[32], const uint8_t scalar[32],
-                            const uint8_t point[32]) {
-  GFp_x25519_x86_64(out, scalar, point);
-}
-
-#else

 static void x25519_scalar_mult_generic(uint8_t out[32],
                                       const uint8_t scalar[32],
@ -2966,8 +2950,6 @@ static void x25519_scalar_mult(uint8_t out[32], const uint8_t scalar[32],
  x25519_scalar_mult_generic(out, scalar, point);
 }

-#endif  // BORINGSSL_X25519_X86_64
-
 void GFp_x25519_scalar_mult(uint8_t out[32], const uint8_t scalar[32],
                            const uint8_t point[32]) {
  x25519_scalar_mult(out, scalar, point);
@ -2977,20 +2959,6 @@ void GFp_x25519_scalar_mult(uint8_t out[32], const uint8_t scalar[32],
 void GFp_x25519_public_from_private(uint8_t out_public_value[32],
                                    const uint8_t private_key[32]);

-#if defined(BORINGSSL_X25519_X86_64)
-
-// When |BORINGSSL_X25519_X86_64| is set, base point multiplication is done with
-// the Montgomery ladder because it's faster. Otherwise it's done using the
-// Ed25519 tables.
-
-void GFp_x25519_public_from_private(uint8_t out_public_value[32],
-                                    const uint8_t private_key[32]) {
-  static const uint8_t kMongomeryBasePoint[32] = {9};
-  GFp_x25519_scalar_mult(out_public_value, private_key, kMongomeryBasePoint);
-}
-
-#else
-
 void GFp_x25519_public_from_private(uint8_t out_public_value[32],
                                    const uint8_t private_key[32]) {
 #if defined(BORINGSSL_X25519_NEON)
@ -3019,8 +2987,6 @@ void GFp_x25519_public_from_private(uint8_t out_public_value[32],
  fe_tobytes(out_public_value, &zminusy_inv);
 }

-#endif  // BORINGSSL_X25519_X86_64
-
 void GFp_x25519_fe_invert(fe *out, const fe *z) {
  fe_invert(out, z);
 }
--- a/third_party/fiat/internal.h
+++ b/third_party/fiat/internal.h
@ -37,17 +37,6 @@ extern "C" {
 #include "../../crypto/internal.h"


-#if defined(OPENSSL_X86_64) && !defined(OPENSSL_SMALL) && \
-    !defined(OPENSSL_WINDOWS) && !defined(OPENSSL_NO_ASM)
-// This isn't compatible with Windows because the asm code makes use of the red
-// zone, which Windows doesn't support.
-#define BORINGSSL_X25519_X86_64
-
-void GFp_x25519_x86_64(uint8_t out[32], const uint8_t scalar[32],
-                       const uint8_t point[32]);
-#endif
-
-
 #if defined(OPENSSL_ARM) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_APPLE)
 #define BORINGSSL_X25519_NEON