From 92b7c89e6e8ba82924b57153bea68241cc45f658 Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Mon, 17 Jun 2019 20:26:24 +0000 Subject: [PATCH] Add a value barrier to constant-time selects. Clang recognizes the (mask & a) | (~mask & b) pattern as a select. While it often optimizes this into a cmov, it sometimes inserts branches instead, particularly when it detects a string of cmovs with the same condition. In the long term, we need language-level support for expressing our constraints. In the short term, introduce value barriers to prevent the compiler from reasoning about our bit tricks. Thanks to Chandler Carruth for suggesting this pattern. It should be reasonably robust, short of value-based PGO or the compiler learning to reason about empty inline assembly blocks. Apply barriers to our various constant-time selects. We should invest more in the valgrind-based tooling to figure out if there are other instances. Change-Id: Icc24ce36a61f7fec021a762c27197b9c5bd28c5d Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/36484 Reviewed-by: Chandler Carruth Reviewed-by: Adam Langley --- crypto/constant_time_test.cc | 16 ++++++++++++++ crypto/internal.h | 38 +++++++++++++++++++++++++++++++- third_party/fiat/curve25519_32.h | 8 ++++++- third_party/fiat/curve25519_64.h | 8 ++++++- third_party/fiat/p256_32.h | 8 ++++++- third_party/fiat/p256_64.h | 8 ++++++- 6 files changed, 81 insertions(+), 5 deletions(-) diff --git a/crypto/constant_time_test.cc b/crypto/constant_time_test.cc index 59a7bb187..ae80003eb 100644 --- a/crypto/constant_time_test.cc +++ b/crypto/constant_time_test.cc @@ -153,3 +153,19 @@ TEST(ConstantTimeTest, MemCmp) { } } } + +TEST(ConstantTimeTest, ValueBarrier) { + for (int i = 0; i < 10; i++) { + crypto_word_t word; + RAND_bytes(reinterpret_cast(&word), sizeof(word)); + EXPECT_EQ(word, value_barrier_w(word)); + + uint32_t u32; + RAND_bytes(reinterpret_cast(&u32), sizeof(u32)); + EXPECT_EQ(u32, value_barrier_u32(u32)); + + uint64_t u64; + RAND_bytes(reinterpret_cast(&u64), sizeof(u64)); + EXPECT_EQ(u64, value_barrier_u64(u64)); + } +} diff --git a/crypto/internal.h b/crypto/internal.h index b80f065ff..76a317aae 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -240,6 +240,36 @@ typedef uint32_t crypto_word_t; #define CONSTTIME_TRUE_8 ((uint8_t)0xff) #define CONSTTIME_FALSE_8 ((uint8_t)0) +// value_barrier_w returns |a|, but prevents GCC and Clang from reasoning about +// the returned value. This is used to mitigate compilers undoing constant-time +// code, until we can express our requirements directly in the language. +// +// Note the compiler is aware that |value_barrier_w| has no side effects and +// always has the same output for a given input. This allows it to eliminate +// dead code, move computations across loops, and vectorize. +static inline crypto_word_t value_barrier_w(crypto_word_t a) { +#if !defined(OPENSSL_NO_ASM) && (defined(__GNUC__) || defined(__clang__)) + __asm__("" : "+r"(a) : /* no inputs */); +#endif + return a; +} + +// value_barrier_u32 behaves like |value_barrier_w| but takes a |uint32_t|. +static inline uint32_t value_barrier_u32(uint32_t a) { +#if !defined(OPENSSL_NO_ASM) && (defined(__GNUC__) || defined(__clang__)) + __asm__("" : "+r"(a) : /* no inputs */); +#endif + return a; +} + +// value_barrier_u64 behaves like |value_barrier_w| but takes a |uint64_t|. +static inline uint64_t value_barrier_u64(uint64_t a) { +#if !defined(OPENSSL_NO_ASM) && (defined(__GNUC__) || defined(__clang__)) + __asm__("" : "+r"(a) : /* no inputs */); +#endif + return a; +} + // constant_time_msb_w returns the given value with the MSB copied to all the // other bits. static inline crypto_word_t constant_time_msb_w(crypto_word_t a) { @@ -352,7 +382,13 @@ static inline uint8_t constant_time_eq_int_8(int a, int b) { static inline crypto_word_t constant_time_select_w(crypto_word_t mask, crypto_word_t a, crypto_word_t b) { - return (mask & a) | (~mask & b); + // Clang recognizes this pattern as a select. While it usually transforms it + // to a cmov, it sometimes further transforms it into a branch, which we do + // not want. + // + // Adding barriers to both |mask| and |~mask| breaks the relationship between + // the two, which makes the compiler stick with bitmasks. + return (value_barrier_w(mask) & a) | (value_barrier_w(~mask) & b); } // constant_time_select_8 acts like |constant_time_select| but operates on diff --git a/third_party/fiat/curve25519_32.h b/third_party/fiat/curve25519_32.h index 820a5c9bb..53772421d 100644 --- a/third_party/fiat/curve25519_32.h +++ b/third_party/fiat/curve25519_32.h @@ -90,7 +90,13 @@ static void fiat_25519_subborrowx_u25(uint32_t* out1, fiat_25519_uint1* out2, fi static void fiat_25519_cmovznz_u32(uint32_t* out1, fiat_25519_uint1 arg1, uint32_t arg2, uint32_t arg3) { fiat_25519_uint1 x1 = (!(!arg1)); uint32_t x2 = ((fiat_25519_int1)(0x0 - x1) & UINT32_C(0xffffffff)); - uint32_t x3 = ((x2 & arg3) | ((~x2) & arg2)); + // Note this line has been patched from the synthesized code to add value + // barriers. + // + // Clang recognizes this pattern as a select. While it usually transforms it + // to a cmov, it sometimes further transforms it into a branch, which we do + // not want. + uint32_t x3 = ((value_barrier_u32(x2) & arg3) | (value_barrier_u32(~x2) & arg2)); *out1 = x3; } diff --git a/third_party/fiat/curve25519_64.h b/third_party/fiat/curve25519_64.h index 23bf361d8..7c31ff99c 100644 --- a/third_party/fiat/curve25519_64.h +++ b/third_party/fiat/curve25519_64.h @@ -58,7 +58,13 @@ static void fiat_25519_subborrowx_u51(uint64_t* out1, fiat_25519_uint1* out2, fi static void fiat_25519_cmovznz_u64(uint64_t* out1, fiat_25519_uint1 arg1, uint64_t arg2, uint64_t arg3) { fiat_25519_uint1 x1 = (!(!arg1)); uint64_t x2 = ((fiat_25519_int1)(0x0 - x1) & UINT64_C(0xffffffffffffffff)); - uint64_t x3 = ((x2 & arg3) | ((~x2) & arg2)); + // Note this line has been patched from the synthesized code to add value + // barriers. + // + // Clang recognizes this pattern as a select. While it usually transforms it + // to a cmov, it sometimes further transforms it into a branch, which we do + // not want. + uint64_t x3 = ((value_barrier_u64(x2) & arg3) | (value_barrier_u64(~x2) & arg2)); *out1 = x3; } diff --git a/third_party/fiat/p256_32.h b/third_party/fiat/p256_32.h index faaa0b04e..638eb5d9c 100644 --- a/third_party/fiat/p256_32.h +++ b/third_party/fiat/p256_32.h @@ -77,7 +77,13 @@ static void fiat_p256_mulx_u32(uint32_t* out1, uint32_t* out2, uint32_t arg1, ui static void fiat_p256_cmovznz_u32(uint32_t* out1, fiat_p256_uint1 arg1, uint32_t arg2, uint32_t arg3) { fiat_p256_uint1 x1 = (!(!arg1)); uint32_t x2 = ((fiat_p256_int1)(0x0 - x1) & UINT32_C(0xffffffff)); - uint32_t x3 = ((x2 & arg3) | ((~x2) & arg2)); + // Note this line has been patched from the synthesized code to add value + // barriers. + // + // Clang recognizes this pattern as a select. While it usually transforms it + // to a cmov, it sometimes further transforms it into a branch, which we do + // not want. + uint32_t x3 = ((value_barrier_u32(x2) & arg3) | (value_barrier_u32(~x2) & arg2)); *out1 = x3; } diff --git a/third_party/fiat/p256_64.h b/third_party/fiat/p256_64.h index 8e449c6b9..7d97e0a09 100644 --- a/third_party/fiat/p256_64.h +++ b/third_party/fiat/p256_64.h @@ -79,7 +79,13 @@ static void fiat_p256_mulx_u64(uint64_t* out1, uint64_t* out2, uint64_t arg1, ui static void fiat_p256_cmovznz_u64(uint64_t* out1, fiat_p256_uint1 arg1, uint64_t arg2, uint64_t arg3) { fiat_p256_uint1 x1 = (!(!arg1)); uint64_t x2 = ((fiat_p256_int1)(0x0 - x1) & UINT64_C(0xffffffffffffffff)); - uint64_t x3 = ((x2 & arg3) | ((~x2) & arg2)); + // Note this line has been patched from the synthesized code to add value + // barriers. + // + // Clang recognizes this pattern as a select. While it usually transforms it + // to a cmov, it sometimes further transforms it into a branch, which we do + // not want. + uint64_t x3 = ((value_barrier_u64(x2) & arg3) | (value_barrier_u64(~x2) & arg2)); *out1 = x3; }