Merge pull request #1548 from briansmith/b/merge-boringssl-4
Merge BoringSSL fa3fbda: P-256 assembly optimisations for Aarch64.
This commit is contained in:
commit
383317656b
@ -64,6 +64,7 @@ include = [
|
||||
"crypto/fipsmodule/bn/internal.h",
|
||||
"crypto/fipsmodule/bn/montgomery.c",
|
||||
"crypto/fipsmodule/bn/montgomery_inv.c",
|
||||
"crypto/fipsmodule/ec/asm/p256-armv8-asm.pl",
|
||||
"crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl",
|
||||
"crypto/fipsmodule/ec/ecp_nistz.c",
|
||||
"crypto/fipsmodule/ec/ecp_nistz.h",
|
||||
@ -72,9 +73,9 @@ include = [
|
||||
"crypto/fipsmodule/ec/gfp_p256.c",
|
||||
"crypto/fipsmodule/ec/gfp_p384.c",
|
||||
"crypto/fipsmodule/ec/p256.c",
|
||||
"crypto/fipsmodule/ec/p256-x86_64-table.h",
|
||||
"crypto/fipsmodule/ec/p256-x86_64.c",
|
||||
"crypto/fipsmodule/ec/p256-x86_64.h",
|
||||
"crypto/fipsmodule/ec/p256-nistz-table.h",
|
||||
"crypto/fipsmodule/ec/p256-nistz.c",
|
||||
"crypto/fipsmodule/ec/p256-nistz.h",
|
||||
"crypto/fipsmodule/ec/p256_shared.h",
|
||||
"crypto/fipsmodule/ec/p256_table.h",
|
||||
"crypto/fipsmodule/ec/util.h",
|
||||
|
33
build.rs
33
build.rs
@ -60,7 +60,6 @@ const RING_SRCS: &[(&[&str], &str)] = &[
|
||||
(&[X86_64], "crypto/fipsmodule/aes/asm/vpaes-x86_64.pl"),
|
||||
(&[X86_64], "crypto/fipsmodule/bn/asm/x86_64-mont.pl"),
|
||||
(&[X86_64], "crypto/fipsmodule/bn/asm/x86_64-mont5.pl"),
|
||||
(&[X86_64], "crypto/fipsmodule/ec/p256-x86_64.c"),
|
||||
(&[X86_64], "crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl"),
|
||||
(&[X86_64], "crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl"),
|
||||
(&[X86_64], "crypto/fipsmodule/modes/asm/ghash-x86_64.pl"),
|
||||
@ -68,6 +67,8 @@ const RING_SRCS: &[(&[&str], &str)] = &[
|
||||
(&[X86_64], SHA512_X86_64),
|
||||
(&[X86_64], "crypto/cipher_extra/asm/chacha20_poly1305_x86_64.pl"),
|
||||
|
||||
(&[AARCH64, X86_64], "crypto/fipsmodule/ec/p256-nistz.c"),
|
||||
|
||||
(&[AARCH64, ARM], "crypto/fipsmodule/aes/asm/aesv8-armx.pl"),
|
||||
(&[AARCH64, ARM], "crypto/fipsmodule/modes/asm/ghashv8-armx.pl"),
|
||||
|
||||
@ -84,6 +85,7 @@ const RING_SRCS: &[(&[&str], &str)] = &[
|
||||
|
||||
(&[AARCH64], "crypto/fipsmodule/aes/asm/vpaes-armv8.pl"),
|
||||
(&[AARCH64], "crypto/fipsmodule/bn/asm/armv8-mont.pl"),
|
||||
(&[AARCH64], "crypto/fipsmodule/ec/asm/p256-armv8-asm.pl"),
|
||||
(&[AARCH64], "crypto/chacha/asm/chacha-armv8.pl"),
|
||||
(&[AARCH64], "crypto/fipsmodule/modes/asm/ghash-neon-armv8.pl"),
|
||||
(&[AARCH64], SHA512_ARMV8),
|
||||
@ -903,6 +905,18 @@ fn generate_prefix_symbols_header(
|
||||
}
|
||||
|
||||
fn prefix_all_symbols(pp: char, prefix_prefix: &str, prefix: &str) -> String {
|
||||
// Rename some nistz256 assembly functions to match the names of their
|
||||
// polyfills.
|
||||
static SYMBOLS_TO_RENAME: &[(&str, &str)] = &[
|
||||
("ecp_nistz256_point_double", "p256_point_double"),
|
||||
("ecp_nistz256_point_add", "p256_point_add"),
|
||||
("ecp_nistz256_point_add_affine", "p256_point_add_affine"),
|
||||
("ecp_nistz256_ord_mul_mont", "p256_scalar_mul_mont"),
|
||||
("ecp_nistz256_ord_sqr_mont", "p256_scalar_sqr_rep_mont"),
|
||||
("ecp_nistz256_mul_mont", "p256_mul_mont"),
|
||||
("ecp_nistz256_sqr_mont", "p256_sqr_mont"),
|
||||
];
|
||||
|
||||
static SYMBOLS_TO_PREFIX: &[&str] = &[
|
||||
"CRYPTO_poly1305_finish",
|
||||
"CRYPTO_poly1305_finish_neon",
|
||||
@ -961,9 +975,9 @@ fn prefix_all_symbols(pp: char, prefix_prefix: &str, prefix: &str) -> String {
|
||||
"gcm_init_neon",
|
||||
"limbs_mul_add_limb",
|
||||
"little_endian_bytes_from_scalar",
|
||||
"nistz256_neg",
|
||||
"nistz256_select_w5",
|
||||
"nistz256_select_w7",
|
||||
"ecp_nistz256_neg",
|
||||
"ecp_nistz256_select_w5",
|
||||
"ecp_nistz256_select_w7",
|
||||
"nistz384_point_add",
|
||||
"nistz384_point_double",
|
||||
"nistz384_point_mul",
|
||||
@ -1007,6 +1021,17 @@ fn prefix_all_symbols(pp: char, prefix_prefix: &str, prefix: &str) -> String {
|
||||
|
||||
let mut out = String::new();
|
||||
|
||||
for (old, new) in SYMBOLS_TO_RENAME {
|
||||
let line = format!(
|
||||
"{pp}define {prefix_prefix}{old} {prefix_prefix}{new}\n",
|
||||
pp = pp,
|
||||
prefix_prefix = prefix_prefix,
|
||||
old = old,
|
||||
new = new
|
||||
);
|
||||
out += &line;
|
||||
}
|
||||
|
||||
for symbol in SYMBOLS_TO_PREFIX {
|
||||
let line = format!(
|
||||
"{pp}define {prefix_prefix}{symbol} {prefix_prefix}{prefix}{symbol}\n",
|
||||
|
1548
crypto/fipsmodule/ec/asm/p256-armv8-asm.pl
Normal file
1548
crypto/fipsmodule/ec/asm/p256-armv8-asm.pl
Normal file
File diff suppressed because it is too large
Load Diff
@ -90,11 +90,11 @@ my ($r_ptr,$a_ptr,$b_ptr)=("%rdi","%rsi","%rdx");
|
||||
$code.=<<___;
|
||||
|
||||
################################################################################
|
||||
# void nistz256_neg(uint64_t res[4], uint64_t a[4]);
|
||||
.globl nistz256_neg
|
||||
.type nistz256_neg,\@function,2
|
||||
# void ecp_nistz256_neg(uint64_t res[4], uint64_t a[4]);
|
||||
.globl ecp_nistz256_neg
|
||||
.type ecp_nistz256_neg,\@function,2
|
||||
.align 32
|
||||
nistz256_neg:
|
||||
ecp_nistz256_neg:
|
||||
.cfi_startproc
|
||||
push %r12
|
||||
.cfi_push %r12
|
||||
@ -143,7 +143,7 @@ nistz256_neg:
|
||||
.Lneg_epilogue:
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size nistz256_neg,.-nistz256_neg
|
||||
.size ecp_nistz256_neg,.-ecp_nistz256_neg
|
||||
___
|
||||
}
|
||||
{
|
||||
@ -154,15 +154,15 @@ my ($poly1,$poly3)=($acc6,$acc7);
|
||||
|
||||
$code.=<<___;
|
||||
################################################################################
|
||||
# void p256_scalar_mul_mont(
|
||||
# void ecp_nistz256_ord_mul_mont(
|
||||
# uint64_t res[4],
|
||||
# uint64_t a[4],
|
||||
# uint64_t b[4]);
|
||||
|
||||
.globl p256_scalar_mul_mont
|
||||
.type p256_scalar_mul_mont,\@function,3
|
||||
.globl ecp_nistz256_ord_mul_mont
|
||||
.type ecp_nistz256_ord_mul_mont,\@function,3
|
||||
.align 32
|
||||
p256_scalar_mul_mont:
|
||||
ecp_nistz256_ord_mul_mont:
|
||||
.cfi_startproc
|
||||
___
|
||||
$code.=<<___ if ($addx);
|
||||
@ -482,18 +482,18 @@ $code.=<<___;
|
||||
.Lord_mul_epilogue:
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size p256_scalar_mul_mont,.-p256_scalar_mul_mont
|
||||
.size ecp_nistz256_ord_mul_mont,.-ecp_nistz256_ord_mul_mont
|
||||
|
||||
################################################################################
|
||||
# void p256_scalar_sqr_rep_mont(
|
||||
# void ecp_nistz256_ord_sqr_mont(
|
||||
# uint64_t res[4],
|
||||
# uint64_t a[4],
|
||||
# uint64_t rep);
|
||||
|
||||
.globl p256_scalar_sqr_rep_mont
|
||||
.type p256_scalar_sqr_rep_mont,\@function,3
|
||||
.globl ecp_nistz256_ord_sqr_mont
|
||||
.type ecp_nistz256_ord_sqr_mont,\@function,3
|
||||
.align 32
|
||||
p256_scalar_sqr_rep_mont:
|
||||
ecp_nistz256_ord_sqr_mont:
|
||||
.cfi_startproc
|
||||
___
|
||||
$code.=<<___ if ($addx);
|
||||
@ -783,7 +783,7 @@ $code.=<<___;
|
||||
.Lord_sqr_epilogue:
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size p256_scalar_sqr_rep_mont,.-p256_scalar_sqr_rep_mont
|
||||
.size ecp_nistz256_ord_sqr_mont,.-ecp_nistz256_ord_sqr_mont
|
||||
___
|
||||
|
||||
$code.=<<___ if ($addx);
|
||||
@ -1235,15 +1235,15 @@ ___
|
||||
|
||||
$code.=<<___;
|
||||
################################################################################
|
||||
# void p256_mul_mont(
|
||||
# void ecp_nistz256_mul_mont(
|
||||
# uint64_t res[4],
|
||||
# uint64_t a[4],
|
||||
# uint64_t b[4]);
|
||||
|
||||
.globl p256_mul_mont
|
||||
.type p256_mul_mont,\@function,3
|
||||
.globl ecp_nistz256_mul_mont
|
||||
.type ecp_nistz256_mul_mont,\@function,3
|
||||
.align 32
|
||||
p256_mul_mont:
|
||||
ecp_nistz256_mul_mont:
|
||||
.cfi_startproc
|
||||
___
|
||||
$code.=<<___ if ($addx);
|
||||
@ -1315,7 +1315,7 @@ $code.=<<___;
|
||||
.Lmul_epilogue:
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size p256_mul_mont,.-p256_mul_mont
|
||||
.size ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont
|
||||
|
||||
.type __ecp_nistz256_mul_montq,\@abi-omnipotent
|
||||
.align 32
|
||||
@ -1536,16 +1536,16 @@ __ecp_nistz256_mul_montq:
|
||||
.size __ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq
|
||||
|
||||
################################################################################
|
||||
# void p256_sqr_mont(
|
||||
# void ecp_nistz256_sqr_mont(
|
||||
# uint64_t res[4],
|
||||
# uint64_t a[4]);
|
||||
|
||||
# we optimize the square according to S.Gueron and V.Krasnov,
|
||||
# "Speeding up Big-Number Squaring"
|
||||
.globl p256_sqr_mont
|
||||
.type p256_sqr_mont,\@function,2
|
||||
.globl ecp_nistz256_sqr_mont
|
||||
.type ecp_nistz256_sqr_mont,\@function,2
|
||||
.align 32
|
||||
p256_sqr_mont:
|
||||
ecp_nistz256_sqr_mont:
|
||||
.cfi_startproc
|
||||
___
|
||||
$code.=<<___ if ($addx);
|
||||
@ -1612,7 +1612,7 @@ $code.=<<___;
|
||||
.Lsqr_epilogue:
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size p256_sqr_mont,.-p256_sqr_mont
|
||||
.size ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont
|
||||
|
||||
.type __ecp_nistz256_sqr_montq,\@abi-omnipotent
|
||||
.align 32
|
||||
@ -2090,11 +2090,11 @@ my ($M1,$T2a,$T2b,$TMP2,$M2,$T2a,$T2b,$TMP2)=map("%xmm$_",(8..15));
|
||||
|
||||
$code.=<<___;
|
||||
################################################################################
|
||||
# void nistz256_select_w5(uint64_t *val, uint64_t *in_t, crypto_word index);
|
||||
.globl nistz256_select_w5
|
||||
.type nistz256_select_w5,\@abi-omnipotent
|
||||
# void ecp_nistz256_select_w5(uint64_t *val, uint64_t *in_t, int index);
|
||||
.globl ecp_nistz256_select_w5
|
||||
.type ecp_nistz256_select_w5,\@abi-omnipotent
|
||||
.align 32
|
||||
nistz256_select_w5:
|
||||
ecp_nistz256_select_w5:
|
||||
.cfi_startproc
|
||||
___
|
||||
$code.=<<___ if ($avx>1);
|
||||
@ -2105,7 +2105,7 @@ $code.=<<___ if ($avx>1);
|
||||
___
|
||||
$code.=<<___ if ($win64);
|
||||
lea -0x88(%rsp), %rax
|
||||
.LSEH_begin_nistz256_select_w5:
|
||||
.LSEH_begin_ecp_nistz256_select_w5:
|
||||
.byte 0x48,0x8d,0x60,0xe0 #lea -0x20(%rax), %rsp
|
||||
.byte 0x0f,0x29,0x70,0xe0 #movaps %xmm6, -0x20(%rax)
|
||||
.byte 0x0f,0x29,0x78,0xf0 #movaps %xmm7, -0x10(%rax)
|
||||
@ -2186,15 +2186,15 @@ ___
|
||||
$code.=<<___;
|
||||
ret
|
||||
.cfi_endproc
|
||||
.LSEH_end_nistz256_select_w5:
|
||||
.size nistz256_select_w5,.-nistz256_select_w5
|
||||
.LSEH_end_ecp_nistz256_select_w5:
|
||||
.size ecp_nistz256_select_w5,.-ecp_nistz256_select_w5
|
||||
|
||||
################################################################################
|
||||
# void nistz256_select_w7(uint64_t *val, uint64_t *in_t, crypto_word index);
|
||||
.globl nistz256_select_w7
|
||||
.type nistz256_select_w7,\@abi-omnipotent
|
||||
# void ecp_nistz256_select_w7(uint64_t *val, uint64_t *in_t, int index);
|
||||
.globl ecp_nistz256_select_w7
|
||||
.type ecp_nistz256_select_w7,\@abi-omnipotent
|
||||
.align 32
|
||||
nistz256_select_w7:
|
||||
ecp_nistz256_select_w7:
|
||||
.cfi_startproc
|
||||
___
|
||||
$code.=<<___ if ($avx>1);
|
||||
@ -2205,7 +2205,7 @@ $code.=<<___ if ($avx>1);
|
||||
___
|
||||
$code.=<<___ if ($win64);
|
||||
lea -0x88(%rsp), %rax
|
||||
.LSEH_begin_nistz256_select_w7:
|
||||
.LSEH_begin_ecp_nistz256_select_w7:
|
||||
.byte 0x48,0x8d,0x60,0xe0 #lea -0x20(%rax), %rsp
|
||||
.byte 0x0f,0x29,0x70,0xe0 #movaps %xmm6, -0x20(%rax)
|
||||
.byte 0x0f,0x29,0x78,0xf0 #movaps %xmm7, -0x10(%rax)
|
||||
@ -2275,8 +2275,8 @@ ___
|
||||
$code.=<<___;
|
||||
ret
|
||||
.cfi_endproc
|
||||
.LSEH_end_nistz256_select_w7:
|
||||
.size nistz256_select_w7,.-nistz256_select_w7
|
||||
.LSEH_end_ecp_nistz256_select_w7:
|
||||
.size ecp_nistz256_select_w7,.-ecp_nistz256_select_w7
|
||||
___
|
||||
}
|
||||
if ($avx>1) {
|
||||
@ -2700,10 +2700,10 @@ sub gen_double () {
|
||||
$bias = 0;
|
||||
|
||||
$code.=<<___;
|
||||
.globl p256_point_double
|
||||
.type p256_point_double,\@function,2
|
||||
.globl ecp_nistz256_point_double
|
||||
.type ecp_nistz256_point_double,\@function,2
|
||||
.align 32
|
||||
p256_point_double:
|
||||
ecp_nistz256_point_double:
|
||||
.cfi_startproc
|
||||
___
|
||||
$code.=<<___ if ($addx);
|
||||
@ -2719,9 +2719,9 @@ ___
|
||||
$bias = 128;
|
||||
|
||||
$code.=<<___;
|
||||
.type p256_point_doublex,\@function,2
|
||||
.type ecp_nistz256_point_doublex,\@function,2
|
||||
.align 32
|
||||
p256_point_doublex:
|
||||
ecp_nistz256_point_doublex:
|
||||
.cfi_startproc
|
||||
.Lpoint_doublex:
|
||||
___
|
||||
@ -2931,7 +2931,7 @@ $code.=<<___;
|
||||
.Lpoint_double${x}_epilogue:
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size p256_point_double$sfx,.-p256_point_double$sfx
|
||||
.size ecp_nistz256_point_double$sfx,.-ecp_nistz256_point_double$sfx
|
||||
___
|
||||
}
|
||||
&gen_double("q");
|
||||
@ -2952,10 +2952,10 @@ sub gen_add () {
|
||||
$bias = 0;
|
||||
|
||||
$code.=<<___;
|
||||
.globl p256_point_add
|
||||
.type p256_point_add,\@function,3
|
||||
.globl ecp_nistz256_point_add
|
||||
.type ecp_nistz256_point_add,\@function,3
|
||||
.align 32
|
||||
p256_point_add:
|
||||
ecp_nistz256_point_add:
|
||||
.cfi_startproc
|
||||
___
|
||||
$code.=<<___ if ($addx);
|
||||
@ -2971,9 +2971,9 @@ ___
|
||||
$bias = 128;
|
||||
|
||||
$code.=<<___;
|
||||
.type p256_point_addx,\@function,3
|
||||
.type ecp_nistz256_point_addx,\@function,3
|
||||
.align 32
|
||||
p256_point_addx:
|
||||
ecp_nistz256_point_addx:
|
||||
.cfi_startproc
|
||||
.Lpoint_addx:
|
||||
___
|
||||
@ -3330,7 +3330,7 @@ $code.=<<___;
|
||||
.Lpoint_add${x}_epilogue:
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size p256_point_add$sfx,.-p256_point_add$sfx
|
||||
.size ecp_nistz256_point_add$sfx,.-ecp_nistz256_point_add$sfx
|
||||
___
|
||||
}
|
||||
&gen_add("q");
|
||||
@ -3350,10 +3350,10 @@ sub gen_add_affine () {
|
||||
$bias = 0;
|
||||
|
||||
$code.=<<___;
|
||||
.globl p256_point_add_affine
|
||||
.type p256_point_add_affine,\@function,3
|
||||
.globl ecp_nistz256_point_add_affine
|
||||
.type ecp_nistz256_point_add_affine,\@function,3
|
||||
.align 32
|
||||
p256_point_add_affine:
|
||||
ecp_nistz256_point_add_affine:
|
||||
.cfi_startproc
|
||||
___
|
||||
$code.=<<___ if ($addx);
|
||||
@ -3369,9 +3369,9 @@ ___
|
||||
$bias = 128;
|
||||
|
||||
$code.=<<___;
|
||||
.type p256_point_add_affinex,\@function,3
|
||||
.type ecp_nistz256_point_add_affinex,\@function,3
|
||||
.align 32
|
||||
p256_point_add_affinex:
|
||||
ecp_nistz256_point_add_affinex:
|
||||
.cfi_startproc
|
||||
.Lpoint_add_affinex:
|
||||
___
|
||||
@ -3655,7 +3655,7 @@ $code.=<<___;
|
||||
.Ladd_affine${x}_epilogue:
|
||||
ret
|
||||
.cfi_endproc
|
||||
.size p256_point_add_affine$sfx,.-p256_point_add_affine$sfx
|
||||
.size ecp_nistz256_point_add_affine$sfx,.-ecp_nistz256_point_add_affine$sfx
|
||||
___
|
||||
}
|
||||
&gen_add_affine("q");
|
||||
@ -3956,17 +3956,17 @@ full_handler:
|
||||
|
||||
.section .pdata
|
||||
.align 4
|
||||
.rva .LSEH_begin_nistz256_neg
|
||||
.rva .LSEH_end_nistz256_neg
|
||||
.rva .LSEH_info_nistz256_neg
|
||||
.rva .LSEH_begin_ecp_nistz256_neg
|
||||
.rva .LSEH_end_ecp_nistz256_neg
|
||||
.rva .LSEH_info_ecp_nistz256_neg
|
||||
|
||||
.rva .LSEH_begin_p256_scalar_mul_mont
|
||||
.rva .LSEH_end_p256_scalar_mul_mont
|
||||
.rva .LSEH_info_p256_scalar_mul_mont
|
||||
.rva .LSEH_begin_ecp_nistz256_ord_mul_mont
|
||||
.rva .LSEH_end_ecp_nistz256_ord_mul_mont
|
||||
.rva .LSEH_info_ecp_nistz256_ord_mul_mont
|
||||
|
||||
.rva .LSEH_begin_p256_scalar_sqr_rep_mont
|
||||
.rva .LSEH_end_p256_scalar_sqr_rep_mont
|
||||
.rva .LSEH_info_p256_scalar_sqr_rep_mont
|
||||
.rva .LSEH_begin_ecp_nistz256_ord_sqr_mont
|
||||
.rva .LSEH_end_ecp_nistz256_ord_sqr_mont
|
||||
.rva .LSEH_info_ecp_nistz256_ord_sqr_mont
|
||||
___
|
||||
$code.=<<___ if ($addx);
|
||||
.rva .LSEH_begin_ecp_nistz256_ord_mul_montx
|
||||
@ -3978,20 +3978,20 @@ $code.=<<___ if ($addx);
|
||||
.rva .LSEH_info_ecp_nistz256_ord_sqr_montx
|
||||
___
|
||||
$code.=<<___;
|
||||
.rva .LSEH_begin_p256_mul_mont
|
||||
.rva .LSEH_end_p256_mul_mont
|
||||
.rva .LSEH_info_p256_mul_mont
|
||||
.rva .LSEH_begin_ecp_nistz256_mul_mont
|
||||
.rva .LSEH_end_ecp_nistz256_mul_mont
|
||||
.rva .LSEH_info_ecp_nistz256_mul_mont
|
||||
|
||||
.rva .LSEH_begin_p256_sqr_mont
|
||||
.rva .LSEH_end_p256_sqr_mont
|
||||
.rva .LSEH_info_p256_sqr_mont
|
||||
.rva .LSEH_begin_ecp_nistz256_sqr_mont
|
||||
.rva .LSEH_end_ecp_nistz256_sqr_mont
|
||||
.rva .LSEH_info_ecp_nistz256_sqr_mont
|
||||
|
||||
.rva .LSEH_begin_nistz256_select_w5
|
||||
.rva .LSEH_end_nistz256_select_w5
|
||||
.rva .LSEH_begin_ecp_nistz256_select_w5
|
||||
.rva .LSEH_end_ecp_nistz256_select_w5
|
||||
.rva .LSEH_info_ecp_nistz256_select_wX
|
||||
|
||||
.rva .LSEH_begin_nistz256_select_w7
|
||||
.rva .LSEH_end_nistz256_select_w7
|
||||
.rva .LSEH_begin_ecp_nistz256_select_w7
|
||||
.rva .LSEH_end_ecp_nistz256_select_w7
|
||||
.rva .LSEH_info_ecp_nistz256_select_wX
|
||||
___
|
||||
$code.=<<___ if ($avx>1);
|
||||
@ -4004,45 +4004,45 @@ $code.=<<___ if ($avx>1);
|
||||
.rva .LSEH_info_ecp_nistz256_avx2_select_wX
|
||||
___
|
||||
$code.=<<___;
|
||||
.rva .LSEH_begin_p256_point_double
|
||||
.rva .LSEH_end_p256_point_double
|
||||
.rva .LSEH_info_p256_point_double
|
||||
.rva .LSEH_begin_ecp_nistz256_point_double
|
||||
.rva .LSEH_end_ecp_nistz256_point_double
|
||||
.rva .LSEH_info_ecp_nistz256_point_double
|
||||
|
||||
.rva .LSEH_begin_p256_point_add
|
||||
.rva .LSEH_end_p256_point_add
|
||||
.rva .LSEH_info_p256_point_add
|
||||
.rva .LSEH_begin_ecp_nistz256_point_add
|
||||
.rva .LSEH_end_ecp_nistz256_point_add
|
||||
.rva .LSEH_info_ecp_nistz256_point_add
|
||||
|
||||
.rva .LSEH_begin_p256_point_add_affine
|
||||
.rva .LSEH_end_p256_point_add_affine
|
||||
.rva .LSEH_info_p256_point_add_affine
|
||||
.rva .LSEH_begin_ecp_nistz256_point_add_affine
|
||||
.rva .LSEH_end_ecp_nistz256_point_add_affine
|
||||
.rva .LSEH_info_ecp_nistz256_point_add_affine
|
||||
___
|
||||
$code.=<<___ if ($addx);
|
||||
.rva .LSEH_begin_p256_point_doublex
|
||||
.rva .LSEH_end_p256_point_doublex
|
||||
.rva .LSEH_info_p256_point_doublex
|
||||
.rva .LSEH_begin_ecp_nistz256_point_doublex
|
||||
.rva .LSEH_end_ecp_nistz256_point_doublex
|
||||
.rva .LSEH_info_ecp_nistz256_point_doublex
|
||||
|
||||
.rva .LSEH_begin_p256_point_addx
|
||||
.rva .LSEH_end_p256_point_addx
|
||||
.rva .LSEH_info_p256_point_addx
|
||||
.rva .LSEH_begin_ecp_nistz256_point_addx
|
||||
.rva .LSEH_end_ecp_nistz256_point_addx
|
||||
.rva .LSEH_info_ecp_nistz256_point_addx
|
||||
|
||||
.rva .LSEH_begin_p256_point_add_affinex
|
||||
.rva .LSEH_end_p256_point_add_affinex
|
||||
.rva .LSEH_info_p256_point_add_affinex
|
||||
.rva .LSEH_begin_ecp_nistz256_point_add_affinex
|
||||
.rva .LSEH_end_ecp_nistz256_point_add_affinex
|
||||
.rva .LSEH_info_ecp_nistz256_point_add_affinex
|
||||
___
|
||||
$code.=<<___;
|
||||
|
||||
.section .xdata
|
||||
.align 8
|
||||
.LSEH_info_nistz256_neg:
|
||||
.LSEH_info_ecp_nistz256_neg:
|
||||
.byte 9,0,0,0
|
||||
.rva short_handler
|
||||
.rva .Lneg_body,.Lneg_epilogue # HandlerData[]
|
||||
.LSEH_info_p256_scalar_mul_mont:
|
||||
.LSEH_info_ecp_nistz256_ord_mul_mont:
|
||||
.byte 9,0,0,0
|
||||
.rva full_handler
|
||||
.rva .Lord_mul_body,.Lord_mul_epilogue # HandlerData[]
|
||||
.long 48,0
|
||||
.LSEH_info_p256_scalar_sqr_rep_mont:
|
||||
.LSEH_info_ecp_nistz256_ord_sqr_mont:
|
||||
.byte 9,0,0,0
|
||||
.rva full_handler
|
||||
.rva .Lord_sqr_body,.Lord_sqr_epilogue # HandlerData[]
|
||||
@ -4061,12 +4061,12 @@ $code.=<<___ if ($addx);
|
||||
.long 48,0
|
||||
___
|
||||
$code.=<<___;
|
||||
.LSEH_info_p256_mul_mont:
|
||||
.LSEH_info_ecp_nistz256_mul_mont:
|
||||
.byte 9,0,0,0
|
||||
.rva full_handler
|
||||
.rva .Lmul_body,.Lmul_epilogue # HandlerData[]
|
||||
.long 48,0
|
||||
.LSEH_info_p256_sqr_mont:
|
||||
.LSEH_info_ecp_nistz256_sqr_mont:
|
||||
.byte 9,0,0,0
|
||||
.rva full_handler
|
||||
.rva .Lsqr_body,.Lsqr_epilogue # HandlerData[]
|
||||
@ -4104,17 +4104,17 @@ $code.=<<___ if ($avx>1);
|
||||
.align 8
|
||||
___
|
||||
$code.=<<___;
|
||||
.LSEH_info_p256_point_double:
|
||||
.LSEH_info_ecp_nistz256_point_double:
|
||||
.byte 9,0,0,0
|
||||
.rva full_handler
|
||||
.rva .Lpoint_doubleq_body,.Lpoint_doubleq_epilogue # HandlerData[]
|
||||
.long 32*5+56,0
|
||||
.LSEH_info_p256_point_add:
|
||||
.LSEH_info_ecp_nistz256_point_add:
|
||||
.byte 9,0,0,0
|
||||
.rva full_handler
|
||||
.rva .Lpoint_addq_body,.Lpoint_addq_epilogue # HandlerData[]
|
||||
.long 32*18+56,0
|
||||
.LSEH_info_p256_point_add_affine:
|
||||
.LSEH_info_ecp_nistz256_point_add_affine:
|
||||
.byte 9,0,0,0
|
||||
.rva full_handler
|
||||
.rva .Ladd_affineq_body,.Ladd_affineq_epilogue # HandlerData[]
|
||||
@ -4122,17 +4122,17 @@ $code.=<<___;
|
||||
___
|
||||
$code.=<<___ if ($addx);
|
||||
.align 8
|
||||
.LSEH_info_p256_point_doublex:
|
||||
.LSEH_info_ecp_nistz256_point_doublex:
|
||||
.byte 9,0,0,0
|
||||
.rva full_handler
|
||||
.rva .Lpoint_doublex_body,.Lpoint_doublex_epilogue # HandlerData[]
|
||||
.long 32*5+56,0
|
||||
.LSEH_info_p256_point_addx:
|
||||
.LSEH_info_ecp_nistz256_point_addx:
|
||||
.byte 9,0,0,0
|
||||
.rva full_handler
|
||||
.rva .Lpoint_addx_body,.Lpoint_addx_epilogue # HandlerData[]
|
||||
.long 32*18+56,0
|
||||
.LSEH_info_p256_point_add_affinex:
|
||||
.LSEH_info_ecp_nistz256_point_add_affinex:
|
||||
.byte 9,0,0,0
|
||||
.rva full_handler
|
||||
.rva .Ladd_affinex_body,.Ladd_affinex_epilogue # HandlerData[]
|
||||
|
@ -23,8 +23,8 @@ import (
|
||||
)
|
||||
|
||||
func main() {
|
||||
if err := writeP256X86_64Table("p256-x86_64-table.h"); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error writing p256-x86_64-table.h: %s\n", err)
|
||||
if err := writeP256NistzTable("p256-nistz-table.h"); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error writing p256-nistz-table.h: %s\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
@ -34,7 +34,7 @@ func main() {
|
||||
}
|
||||
}
|
||||
|
||||
func writeP256X86_64Table(path string) error {
|
||||
func writeP256NistzTable(path string) error {
|
||||
curve := elliptic.P256()
|
||||
tables := make([][][2]*big.Int, 0, 37)
|
||||
for shift := 0; shift < 256; shift += 7 {
|
||||
@ -59,7 +59,7 @@ func writeP256X86_64Table(path string) error {
|
||||
*/
|
||||
|
||||
// This is the precomputed constant time access table for the code in
|
||||
// p256-x86_64.c, for the default generator. The table consists of 37
|
||||
// p256-nistz.c, for the default generator. The table consists of 37
|
||||
// subtables, each subtable contains 64 affine points. The affine points are
|
||||
// encoded as eight uint64's, four for the x coordinate and four for the y.
|
||||
// Both values are in little-endian order. There are 37 tables because a
|
||||
|
@ -9,7 +9,7 @@
|
||||
*/
|
||||
|
||||
// This is the precomputed constant time access table for the code in
|
||||
// p256-x86_64.c, for the default generator. The table consists of 37
|
||||
// p256-nistz.c, for the default generator. The table consists of 37
|
||||
// subtables, each subtable contains 64 affine points. The affine points are
|
||||
// encoded as eight uint64's, four for the x coordinate and four for the y.
|
||||
// Both values are in little-endian order. There are 37 tables because a
|
@ -22,7 +22,7 @@
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "p256-x86_64.h"
|
||||
#include "p256-nistz.h"
|
||||
|
||||
#if defined(OPENSSL_USE_NISTZ256)
|
||||
|
||||
@ -35,7 +35,7 @@ static const BN_ULONG ONE[P256_LIMBS] = {
|
||||
};
|
||||
|
||||
// Precomputed tables for the default generator
|
||||
#include "p256-x86_64-table.h"
|
||||
#include "p256-nistz-table.h"
|
||||
|
||||
// Recode window to a signed digit, see |nistp_recode_scalar_bits| in
|
||||
// util.c for details
|
||||
@ -168,7 +168,7 @@ static void ecp_nistz256_windowed_mul(P256_POINT *r,
|
||||
crypto_word wvalue = p_str[(index - 1) / 8];
|
||||
wvalue = (wvalue >> ((index - 1) % 8)) & kMask;
|
||||
|
||||
ecp_nistz256_select_w5(r, table, booth_recode_w5(wvalue) >> 1);
|
||||
ecp_nistz256_select_w5(r, table, (int)(booth_recode_w5(wvalue) >> 1));
|
||||
|
||||
while (index >= 5) {
|
||||
if (index != 255) {
|
||||
@ -179,7 +179,7 @@ static void ecp_nistz256_windowed_mul(P256_POINT *r,
|
||||
|
||||
wvalue = booth_recode_w5(wvalue);
|
||||
|
||||
ecp_nistz256_select_w5(&h, table, wvalue >> 1);
|
||||
ecp_nistz256_select_w5(&h, table, (int)(wvalue >> 1));
|
||||
|
||||
ecp_nistz256_neg(tmp, h.Y);
|
||||
copy_conditional(h.Y, tmp, (wvalue & 1));
|
||||
@ -202,7 +202,7 @@ static void ecp_nistz256_windowed_mul(P256_POINT *r,
|
||||
|
||||
wvalue = booth_recode_w5(wvalue);
|
||||
|
||||
ecp_nistz256_select_w5(&h, table, wvalue >> 1);
|
||||
ecp_nistz256_select_w5(&h, table, (int)(wvalue >> 1));
|
||||
|
||||
ecp_nistz256_neg(tmp, h.Y);
|
||||
copy_conditional(h.Y, tmp, wvalue & 1);
|
||||
@ -258,7 +258,7 @@ void p256_point_mul_base(P256_POINT *r, const Limb scalar[P256_LIMBS]) {
|
||||
size_t index = 0;
|
||||
crypto_word wvalue = calc_first_wvalue(&index, p_str);
|
||||
|
||||
ecp_nistz256_select_w7(&p.a, ecp_nistz256_precomputed[0], wvalue >> 1);
|
||||
ecp_nistz256_select_w7(&p.a, ecp_nistz256_precomputed[0], (int)(wvalue >> 1));
|
||||
ecp_nistz256_neg(p.p.Z, p.p.Y);
|
||||
copy_conditional(p.p.Y, p.p.Z, wvalue & 1);
|
||||
|
||||
@ -271,7 +271,7 @@ void p256_point_mul_base(P256_POINT *r, const Limb scalar[P256_LIMBS]) {
|
||||
for (int i = 1; i < 37; i++) {
|
||||
wvalue = calc_wvalue(&index, p_str);
|
||||
|
||||
ecp_nistz256_select_w7(&t.a, ecp_nistz256_precomputed[i], wvalue >> 1);
|
||||
ecp_nistz256_select_w7(&t.a, ecp_nistz256_precomputed[i], (int)(wvalue >> 1));
|
||||
|
||||
ecp_nistz256_neg(t.p.Z, t.a.Y);
|
||||
copy_conditional(t.a.Y, t.p.Z, wvalue & 1);
|
@ -27,13 +27,6 @@
|
||||
|
||||
#if defined(OPENSSL_USE_NISTZ256)
|
||||
|
||||
#define ecp_nistz256_neg nistz256_neg
|
||||
#define ecp_nistz256_select_w5 nistz256_select_w5
|
||||
#define ecp_nistz256_select_w7 nistz256_select_w7
|
||||
#define ecp_nistz256_point_double p256_point_double
|
||||
#define ecp_nistz256_point_add p256_point_add
|
||||
#define ecp_nistz256_point_add_affine p256_point_add_affine
|
||||
|
||||
// ecp_nistz256_neg sets |res| to -|a| mod P.
|
||||
void ecp_nistz256_neg(BN_ULONG res[P256_LIMBS], const BN_ULONG a[P256_LIMBS]);
|
||||
|
||||
@ -82,14 +75,14 @@ typedef struct {
|
||||
// and all zeros (the point at infinity) if |index| is 0. This is done in
|
||||
// constant time.
|
||||
void ecp_nistz256_select_w5(P256_POINT *val, const P256_POINT in_t[16],
|
||||
crypto_word index);
|
||||
int index);
|
||||
|
||||
// ecp_nistz256_select_w7 sets |*val| to |in_t[index-1]| if 1 <= |index| <= 64
|
||||
// and all zeros (the point at infinity) if |index| is 0. This is done in
|
||||
// constant time.
|
||||
void ecp_nistz256_select_w7(P256_POINT_AFFINE *val,
|
||||
const P256_POINT_AFFINE in_t[64],
|
||||
crypto_word index);
|
||||
int index);
|
||||
|
||||
// ecp_nistz256_point_double sets |r| to |a| doubled.
|
||||
void ecp_nistz256_point_double(P256_POINT *r, const P256_POINT *a);
|
@ -23,7 +23,8 @@
|
||||
|
||||
#include "../bn/internal.h"
|
||||
|
||||
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && \
|
||||
#if !defined(OPENSSL_NO_ASM) && \
|
||||
(defined(OPENSSL_X86_64) || defined(OPENSSL_AARCH64)) && \
|
||||
!defined(OPENSSL_SMALL)
|
||||
# define OPENSSL_USE_NISTZ256
|
||||
#endif
|
||||
|
@ -584,16 +584,16 @@ mod tests {
|
||||
})
|
||||
}
|
||||
|
||||
// There is no `nistz256_neg` on other targets.
|
||||
// There is no `ecp_nistz256_neg` on other targets.
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[test]
|
||||
fn p256_elem_neg_test() {
|
||||
prefixed_extern! {
|
||||
fn nistz256_neg(r: *mut Limb, a: *const Limb);
|
||||
fn ecp_nistz256_neg(r: *mut Limb, a: *const Limb);
|
||||
}
|
||||
elem_neg_test(
|
||||
&p256::COMMON_OPS,
|
||||
nistz256_neg,
|
||||
ecp_nistz256_neg,
|
||||
test_file!("ops/p256_elem_neg_tests.txt"),
|
||||
);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user