Merge pull request #1548 from briansmith/b/merge-boringssl-4

Merge BoringSSL fa3fbda: P-256 assembly optimisations for Aarch64.
This commit is contained in:
Brian Smith 2022-11-03 09:01:20 -07:00 committed by GitHub
commit 383317656b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 1705 additions and 137 deletions

View File

@ -64,6 +64,7 @@ include = [
"crypto/fipsmodule/bn/internal.h", "crypto/fipsmodule/bn/internal.h",
"crypto/fipsmodule/bn/montgomery.c", "crypto/fipsmodule/bn/montgomery.c",
"crypto/fipsmodule/bn/montgomery_inv.c", "crypto/fipsmodule/bn/montgomery_inv.c",
"crypto/fipsmodule/ec/asm/p256-armv8-asm.pl",
"crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl", "crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl",
"crypto/fipsmodule/ec/ecp_nistz.c", "crypto/fipsmodule/ec/ecp_nistz.c",
"crypto/fipsmodule/ec/ecp_nistz.h", "crypto/fipsmodule/ec/ecp_nistz.h",
@ -72,9 +73,9 @@ include = [
"crypto/fipsmodule/ec/gfp_p256.c", "crypto/fipsmodule/ec/gfp_p256.c",
"crypto/fipsmodule/ec/gfp_p384.c", "crypto/fipsmodule/ec/gfp_p384.c",
"crypto/fipsmodule/ec/p256.c", "crypto/fipsmodule/ec/p256.c",
"crypto/fipsmodule/ec/p256-x86_64-table.h", "crypto/fipsmodule/ec/p256-nistz-table.h",
"crypto/fipsmodule/ec/p256-x86_64.c", "crypto/fipsmodule/ec/p256-nistz.c",
"crypto/fipsmodule/ec/p256-x86_64.h", "crypto/fipsmodule/ec/p256-nistz.h",
"crypto/fipsmodule/ec/p256_shared.h", "crypto/fipsmodule/ec/p256_shared.h",
"crypto/fipsmodule/ec/p256_table.h", "crypto/fipsmodule/ec/p256_table.h",
"crypto/fipsmodule/ec/util.h", "crypto/fipsmodule/ec/util.h",

View File

@ -60,7 +60,6 @@ const RING_SRCS: &[(&[&str], &str)] = &[
(&[X86_64], "crypto/fipsmodule/aes/asm/vpaes-x86_64.pl"), (&[X86_64], "crypto/fipsmodule/aes/asm/vpaes-x86_64.pl"),
(&[X86_64], "crypto/fipsmodule/bn/asm/x86_64-mont.pl"), (&[X86_64], "crypto/fipsmodule/bn/asm/x86_64-mont.pl"),
(&[X86_64], "crypto/fipsmodule/bn/asm/x86_64-mont5.pl"), (&[X86_64], "crypto/fipsmodule/bn/asm/x86_64-mont5.pl"),
(&[X86_64], "crypto/fipsmodule/ec/p256-x86_64.c"),
(&[X86_64], "crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl"), (&[X86_64], "crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl"),
(&[X86_64], "crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl"), (&[X86_64], "crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl"),
(&[X86_64], "crypto/fipsmodule/modes/asm/ghash-x86_64.pl"), (&[X86_64], "crypto/fipsmodule/modes/asm/ghash-x86_64.pl"),
@ -68,6 +67,8 @@ const RING_SRCS: &[(&[&str], &str)] = &[
(&[X86_64], SHA512_X86_64), (&[X86_64], SHA512_X86_64),
(&[X86_64], "crypto/cipher_extra/asm/chacha20_poly1305_x86_64.pl"), (&[X86_64], "crypto/cipher_extra/asm/chacha20_poly1305_x86_64.pl"),
(&[AARCH64, X86_64], "crypto/fipsmodule/ec/p256-nistz.c"),
(&[AARCH64, ARM], "crypto/fipsmodule/aes/asm/aesv8-armx.pl"), (&[AARCH64, ARM], "crypto/fipsmodule/aes/asm/aesv8-armx.pl"),
(&[AARCH64, ARM], "crypto/fipsmodule/modes/asm/ghashv8-armx.pl"), (&[AARCH64, ARM], "crypto/fipsmodule/modes/asm/ghashv8-armx.pl"),
@ -84,6 +85,7 @@ const RING_SRCS: &[(&[&str], &str)] = &[
(&[AARCH64], "crypto/fipsmodule/aes/asm/vpaes-armv8.pl"), (&[AARCH64], "crypto/fipsmodule/aes/asm/vpaes-armv8.pl"),
(&[AARCH64], "crypto/fipsmodule/bn/asm/armv8-mont.pl"), (&[AARCH64], "crypto/fipsmodule/bn/asm/armv8-mont.pl"),
(&[AARCH64], "crypto/fipsmodule/ec/asm/p256-armv8-asm.pl"),
(&[AARCH64], "crypto/chacha/asm/chacha-armv8.pl"), (&[AARCH64], "crypto/chacha/asm/chacha-armv8.pl"),
(&[AARCH64], "crypto/fipsmodule/modes/asm/ghash-neon-armv8.pl"), (&[AARCH64], "crypto/fipsmodule/modes/asm/ghash-neon-armv8.pl"),
(&[AARCH64], SHA512_ARMV8), (&[AARCH64], SHA512_ARMV8),
@ -903,6 +905,18 @@ fn generate_prefix_symbols_header(
} }
fn prefix_all_symbols(pp: char, prefix_prefix: &str, prefix: &str) -> String { fn prefix_all_symbols(pp: char, prefix_prefix: &str, prefix: &str) -> String {
// Rename some nistz256 assembly functions to match the names of their
// polyfills.
static SYMBOLS_TO_RENAME: &[(&str, &str)] = &[
("ecp_nistz256_point_double", "p256_point_double"),
("ecp_nistz256_point_add", "p256_point_add"),
("ecp_nistz256_point_add_affine", "p256_point_add_affine"),
("ecp_nistz256_ord_mul_mont", "p256_scalar_mul_mont"),
("ecp_nistz256_ord_sqr_mont", "p256_scalar_sqr_rep_mont"),
("ecp_nistz256_mul_mont", "p256_mul_mont"),
("ecp_nistz256_sqr_mont", "p256_sqr_mont"),
];
static SYMBOLS_TO_PREFIX: &[&str] = &[ static SYMBOLS_TO_PREFIX: &[&str] = &[
"CRYPTO_poly1305_finish", "CRYPTO_poly1305_finish",
"CRYPTO_poly1305_finish_neon", "CRYPTO_poly1305_finish_neon",
@ -961,9 +975,9 @@ fn prefix_all_symbols(pp: char, prefix_prefix: &str, prefix: &str) -> String {
"gcm_init_neon", "gcm_init_neon",
"limbs_mul_add_limb", "limbs_mul_add_limb",
"little_endian_bytes_from_scalar", "little_endian_bytes_from_scalar",
"nistz256_neg", "ecp_nistz256_neg",
"nistz256_select_w5", "ecp_nistz256_select_w5",
"nistz256_select_w7", "ecp_nistz256_select_w7",
"nistz384_point_add", "nistz384_point_add",
"nistz384_point_double", "nistz384_point_double",
"nistz384_point_mul", "nistz384_point_mul",
@ -1007,6 +1021,17 @@ fn prefix_all_symbols(pp: char, prefix_prefix: &str, prefix: &str) -> String {
let mut out = String::new(); let mut out = String::new();
for (old, new) in SYMBOLS_TO_RENAME {
let line = format!(
"{pp}define {prefix_prefix}{old} {prefix_prefix}{new}\n",
pp = pp,
prefix_prefix = prefix_prefix,
old = old,
new = new
);
out += &line;
}
for symbol in SYMBOLS_TO_PREFIX { for symbol in SYMBOLS_TO_PREFIX {
let line = format!( let line = format!(
"{pp}define {prefix_prefix}{symbol} {prefix_prefix}{prefix}{symbol}\n", "{pp}define {prefix_prefix}{symbol} {prefix_prefix}{prefix}{symbol}\n",

File diff suppressed because it is too large Load Diff

View File

@ -90,11 +90,11 @@ my ($r_ptr,$a_ptr,$b_ptr)=("%rdi","%rsi","%rdx");
$code.=<<___; $code.=<<___;
################################################################################ ################################################################################
# void nistz256_neg(uint64_t res[4], uint64_t a[4]); # void ecp_nistz256_neg(uint64_t res[4], uint64_t a[4]);
.globl nistz256_neg .globl ecp_nistz256_neg
.type nistz256_neg,\@function,2 .type ecp_nistz256_neg,\@function,2
.align 32 .align 32
nistz256_neg: ecp_nistz256_neg:
.cfi_startproc .cfi_startproc
push %r12 push %r12
.cfi_push %r12 .cfi_push %r12
@ -143,7 +143,7 @@ nistz256_neg:
.Lneg_epilogue: .Lneg_epilogue:
ret ret
.cfi_endproc .cfi_endproc
.size nistz256_neg,.-nistz256_neg .size ecp_nistz256_neg,.-ecp_nistz256_neg
___ ___
} }
{ {
@ -154,15 +154,15 @@ my ($poly1,$poly3)=($acc6,$acc7);
$code.=<<___; $code.=<<___;
################################################################################ ################################################################################
# void p256_scalar_mul_mont( # void ecp_nistz256_ord_mul_mont(
# uint64_t res[4], # uint64_t res[4],
# uint64_t a[4], # uint64_t a[4],
# uint64_t b[4]); # uint64_t b[4]);
.globl p256_scalar_mul_mont .globl ecp_nistz256_ord_mul_mont
.type p256_scalar_mul_mont,\@function,3 .type ecp_nistz256_ord_mul_mont,\@function,3
.align 32 .align 32
p256_scalar_mul_mont: ecp_nistz256_ord_mul_mont:
.cfi_startproc .cfi_startproc
___ ___
$code.=<<___ if ($addx); $code.=<<___ if ($addx);
@ -482,18 +482,18 @@ $code.=<<___;
.Lord_mul_epilogue: .Lord_mul_epilogue:
ret ret
.cfi_endproc .cfi_endproc
.size p256_scalar_mul_mont,.-p256_scalar_mul_mont .size ecp_nistz256_ord_mul_mont,.-ecp_nistz256_ord_mul_mont
################################################################################ ################################################################################
# void p256_scalar_sqr_rep_mont( # void ecp_nistz256_ord_sqr_mont(
# uint64_t res[4], # uint64_t res[4],
# uint64_t a[4], # uint64_t a[4],
# uint64_t rep); # uint64_t rep);
.globl p256_scalar_sqr_rep_mont .globl ecp_nistz256_ord_sqr_mont
.type p256_scalar_sqr_rep_mont,\@function,3 .type ecp_nistz256_ord_sqr_mont,\@function,3
.align 32 .align 32
p256_scalar_sqr_rep_mont: ecp_nistz256_ord_sqr_mont:
.cfi_startproc .cfi_startproc
___ ___
$code.=<<___ if ($addx); $code.=<<___ if ($addx);
@ -783,7 +783,7 @@ $code.=<<___;
.Lord_sqr_epilogue: .Lord_sqr_epilogue:
ret ret
.cfi_endproc .cfi_endproc
.size p256_scalar_sqr_rep_mont,.-p256_scalar_sqr_rep_mont .size ecp_nistz256_ord_sqr_mont,.-ecp_nistz256_ord_sqr_mont
___ ___
$code.=<<___ if ($addx); $code.=<<___ if ($addx);
@ -1235,15 +1235,15 @@ ___
$code.=<<___; $code.=<<___;
################################################################################ ################################################################################
# void p256_mul_mont( # void ecp_nistz256_mul_mont(
# uint64_t res[4], # uint64_t res[4],
# uint64_t a[4], # uint64_t a[4],
# uint64_t b[4]); # uint64_t b[4]);
.globl p256_mul_mont .globl ecp_nistz256_mul_mont
.type p256_mul_mont,\@function,3 .type ecp_nistz256_mul_mont,\@function,3
.align 32 .align 32
p256_mul_mont: ecp_nistz256_mul_mont:
.cfi_startproc .cfi_startproc
___ ___
$code.=<<___ if ($addx); $code.=<<___ if ($addx);
@ -1315,7 +1315,7 @@ $code.=<<___;
.Lmul_epilogue: .Lmul_epilogue:
ret ret
.cfi_endproc .cfi_endproc
.size p256_mul_mont,.-p256_mul_mont .size ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont
.type __ecp_nistz256_mul_montq,\@abi-omnipotent .type __ecp_nistz256_mul_montq,\@abi-omnipotent
.align 32 .align 32
@ -1536,16 +1536,16 @@ __ecp_nistz256_mul_montq:
.size __ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq .size __ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq
################################################################################ ################################################################################
# void p256_sqr_mont( # void ecp_nistz256_sqr_mont(
# uint64_t res[4], # uint64_t res[4],
# uint64_t a[4]); # uint64_t a[4]);
# we optimize the square according to S.Gueron and V.Krasnov, # we optimize the square according to S.Gueron and V.Krasnov,
# "Speeding up Big-Number Squaring" # "Speeding up Big-Number Squaring"
.globl p256_sqr_mont .globl ecp_nistz256_sqr_mont
.type p256_sqr_mont,\@function,2 .type ecp_nistz256_sqr_mont,\@function,2
.align 32 .align 32
p256_sqr_mont: ecp_nistz256_sqr_mont:
.cfi_startproc .cfi_startproc
___ ___
$code.=<<___ if ($addx); $code.=<<___ if ($addx);
@ -1612,7 +1612,7 @@ $code.=<<___;
.Lsqr_epilogue: .Lsqr_epilogue:
ret ret
.cfi_endproc .cfi_endproc
.size p256_sqr_mont,.-p256_sqr_mont .size ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont
.type __ecp_nistz256_sqr_montq,\@abi-omnipotent .type __ecp_nistz256_sqr_montq,\@abi-omnipotent
.align 32 .align 32
@ -2090,11 +2090,11 @@ my ($M1,$T2a,$T2b,$TMP2,$M2,$T2a,$T2b,$TMP2)=map("%xmm$_",(8..15));
$code.=<<___; $code.=<<___;
################################################################################ ################################################################################
# void nistz256_select_w5(uint64_t *val, uint64_t *in_t, crypto_word index); # void ecp_nistz256_select_w5(uint64_t *val, uint64_t *in_t, int index);
.globl nistz256_select_w5 .globl ecp_nistz256_select_w5
.type nistz256_select_w5,\@abi-omnipotent .type ecp_nistz256_select_w5,\@abi-omnipotent
.align 32 .align 32
nistz256_select_w5: ecp_nistz256_select_w5:
.cfi_startproc .cfi_startproc
___ ___
$code.=<<___ if ($avx>1); $code.=<<___ if ($avx>1);
@ -2105,7 +2105,7 @@ $code.=<<___ if ($avx>1);
___ ___
$code.=<<___ if ($win64); $code.=<<___ if ($win64);
lea -0x88(%rsp), %rax lea -0x88(%rsp), %rax
.LSEH_begin_nistz256_select_w5: .LSEH_begin_ecp_nistz256_select_w5:
.byte 0x48,0x8d,0x60,0xe0 #lea -0x20(%rax), %rsp .byte 0x48,0x8d,0x60,0xe0 #lea -0x20(%rax), %rsp
.byte 0x0f,0x29,0x70,0xe0 #movaps %xmm6, -0x20(%rax) .byte 0x0f,0x29,0x70,0xe0 #movaps %xmm6, -0x20(%rax)
.byte 0x0f,0x29,0x78,0xf0 #movaps %xmm7, -0x10(%rax) .byte 0x0f,0x29,0x78,0xf0 #movaps %xmm7, -0x10(%rax)
@ -2186,15 +2186,15 @@ ___
$code.=<<___; $code.=<<___;
ret ret
.cfi_endproc .cfi_endproc
.LSEH_end_nistz256_select_w5: .LSEH_end_ecp_nistz256_select_w5:
.size nistz256_select_w5,.-nistz256_select_w5 .size ecp_nistz256_select_w5,.-ecp_nistz256_select_w5
################################################################################ ################################################################################
# void nistz256_select_w7(uint64_t *val, uint64_t *in_t, crypto_word index); # void ecp_nistz256_select_w7(uint64_t *val, uint64_t *in_t, int index);
.globl nistz256_select_w7 .globl ecp_nistz256_select_w7
.type nistz256_select_w7,\@abi-omnipotent .type ecp_nistz256_select_w7,\@abi-omnipotent
.align 32 .align 32
nistz256_select_w7: ecp_nistz256_select_w7:
.cfi_startproc .cfi_startproc
___ ___
$code.=<<___ if ($avx>1); $code.=<<___ if ($avx>1);
@ -2205,7 +2205,7 @@ $code.=<<___ if ($avx>1);
___ ___
$code.=<<___ if ($win64); $code.=<<___ if ($win64);
lea -0x88(%rsp), %rax lea -0x88(%rsp), %rax
.LSEH_begin_nistz256_select_w7: .LSEH_begin_ecp_nistz256_select_w7:
.byte 0x48,0x8d,0x60,0xe0 #lea -0x20(%rax), %rsp .byte 0x48,0x8d,0x60,0xe0 #lea -0x20(%rax), %rsp
.byte 0x0f,0x29,0x70,0xe0 #movaps %xmm6, -0x20(%rax) .byte 0x0f,0x29,0x70,0xe0 #movaps %xmm6, -0x20(%rax)
.byte 0x0f,0x29,0x78,0xf0 #movaps %xmm7, -0x10(%rax) .byte 0x0f,0x29,0x78,0xf0 #movaps %xmm7, -0x10(%rax)
@ -2275,8 +2275,8 @@ ___
$code.=<<___; $code.=<<___;
ret ret
.cfi_endproc .cfi_endproc
.LSEH_end_nistz256_select_w7: .LSEH_end_ecp_nistz256_select_w7:
.size nistz256_select_w7,.-nistz256_select_w7 .size ecp_nistz256_select_w7,.-ecp_nistz256_select_w7
___ ___
} }
if ($avx>1) { if ($avx>1) {
@ -2700,10 +2700,10 @@ sub gen_double () {
$bias = 0; $bias = 0;
$code.=<<___; $code.=<<___;
.globl p256_point_double .globl ecp_nistz256_point_double
.type p256_point_double,\@function,2 .type ecp_nistz256_point_double,\@function,2
.align 32 .align 32
p256_point_double: ecp_nistz256_point_double:
.cfi_startproc .cfi_startproc
___ ___
$code.=<<___ if ($addx); $code.=<<___ if ($addx);
@ -2719,9 +2719,9 @@ ___
$bias = 128; $bias = 128;
$code.=<<___; $code.=<<___;
.type p256_point_doublex,\@function,2 .type ecp_nistz256_point_doublex,\@function,2
.align 32 .align 32
p256_point_doublex: ecp_nistz256_point_doublex:
.cfi_startproc .cfi_startproc
.Lpoint_doublex: .Lpoint_doublex:
___ ___
@ -2931,7 +2931,7 @@ $code.=<<___;
.Lpoint_double${x}_epilogue: .Lpoint_double${x}_epilogue:
ret ret
.cfi_endproc .cfi_endproc
.size p256_point_double$sfx,.-p256_point_double$sfx .size ecp_nistz256_point_double$sfx,.-ecp_nistz256_point_double$sfx
___ ___
} }
&gen_double("q"); &gen_double("q");
@ -2952,10 +2952,10 @@ sub gen_add () {
$bias = 0; $bias = 0;
$code.=<<___; $code.=<<___;
.globl p256_point_add .globl ecp_nistz256_point_add
.type p256_point_add,\@function,3 .type ecp_nistz256_point_add,\@function,3
.align 32 .align 32
p256_point_add: ecp_nistz256_point_add:
.cfi_startproc .cfi_startproc
___ ___
$code.=<<___ if ($addx); $code.=<<___ if ($addx);
@ -2971,9 +2971,9 @@ ___
$bias = 128; $bias = 128;
$code.=<<___; $code.=<<___;
.type p256_point_addx,\@function,3 .type ecp_nistz256_point_addx,\@function,3
.align 32 .align 32
p256_point_addx: ecp_nistz256_point_addx:
.cfi_startproc .cfi_startproc
.Lpoint_addx: .Lpoint_addx:
___ ___
@ -3330,7 +3330,7 @@ $code.=<<___;
.Lpoint_add${x}_epilogue: .Lpoint_add${x}_epilogue:
ret ret
.cfi_endproc .cfi_endproc
.size p256_point_add$sfx,.-p256_point_add$sfx .size ecp_nistz256_point_add$sfx,.-ecp_nistz256_point_add$sfx
___ ___
} }
&gen_add("q"); &gen_add("q");
@ -3350,10 +3350,10 @@ sub gen_add_affine () {
$bias = 0; $bias = 0;
$code.=<<___; $code.=<<___;
.globl p256_point_add_affine .globl ecp_nistz256_point_add_affine
.type p256_point_add_affine,\@function,3 .type ecp_nistz256_point_add_affine,\@function,3
.align 32 .align 32
p256_point_add_affine: ecp_nistz256_point_add_affine:
.cfi_startproc .cfi_startproc
___ ___
$code.=<<___ if ($addx); $code.=<<___ if ($addx);
@ -3369,9 +3369,9 @@ ___
$bias = 128; $bias = 128;
$code.=<<___; $code.=<<___;
.type p256_point_add_affinex,\@function,3 .type ecp_nistz256_point_add_affinex,\@function,3
.align 32 .align 32
p256_point_add_affinex: ecp_nistz256_point_add_affinex:
.cfi_startproc .cfi_startproc
.Lpoint_add_affinex: .Lpoint_add_affinex:
___ ___
@ -3655,7 +3655,7 @@ $code.=<<___;
.Ladd_affine${x}_epilogue: .Ladd_affine${x}_epilogue:
ret ret
.cfi_endproc .cfi_endproc
.size p256_point_add_affine$sfx,.-p256_point_add_affine$sfx .size ecp_nistz256_point_add_affine$sfx,.-ecp_nistz256_point_add_affine$sfx
___ ___
} }
&gen_add_affine("q"); &gen_add_affine("q");
@ -3956,17 +3956,17 @@ full_handler:
.section .pdata .section .pdata
.align 4 .align 4
.rva .LSEH_begin_nistz256_neg .rva .LSEH_begin_ecp_nistz256_neg
.rva .LSEH_end_nistz256_neg .rva .LSEH_end_ecp_nistz256_neg
.rva .LSEH_info_nistz256_neg .rva .LSEH_info_ecp_nistz256_neg
.rva .LSEH_begin_p256_scalar_mul_mont .rva .LSEH_begin_ecp_nistz256_ord_mul_mont
.rva .LSEH_end_p256_scalar_mul_mont .rva .LSEH_end_ecp_nistz256_ord_mul_mont
.rva .LSEH_info_p256_scalar_mul_mont .rva .LSEH_info_ecp_nistz256_ord_mul_mont
.rva .LSEH_begin_p256_scalar_sqr_rep_mont .rva .LSEH_begin_ecp_nistz256_ord_sqr_mont
.rva .LSEH_end_p256_scalar_sqr_rep_mont .rva .LSEH_end_ecp_nistz256_ord_sqr_mont
.rva .LSEH_info_p256_scalar_sqr_rep_mont .rva .LSEH_info_ecp_nistz256_ord_sqr_mont
___ ___
$code.=<<___ if ($addx); $code.=<<___ if ($addx);
.rva .LSEH_begin_ecp_nistz256_ord_mul_montx .rva .LSEH_begin_ecp_nistz256_ord_mul_montx
@ -3978,20 +3978,20 @@ $code.=<<___ if ($addx);
.rva .LSEH_info_ecp_nistz256_ord_sqr_montx .rva .LSEH_info_ecp_nistz256_ord_sqr_montx
___ ___
$code.=<<___; $code.=<<___;
.rva .LSEH_begin_p256_mul_mont .rva .LSEH_begin_ecp_nistz256_mul_mont
.rva .LSEH_end_p256_mul_mont .rva .LSEH_end_ecp_nistz256_mul_mont
.rva .LSEH_info_p256_mul_mont .rva .LSEH_info_ecp_nistz256_mul_mont
.rva .LSEH_begin_p256_sqr_mont .rva .LSEH_begin_ecp_nistz256_sqr_mont
.rva .LSEH_end_p256_sqr_mont .rva .LSEH_end_ecp_nistz256_sqr_mont
.rva .LSEH_info_p256_sqr_mont .rva .LSEH_info_ecp_nistz256_sqr_mont
.rva .LSEH_begin_nistz256_select_w5 .rva .LSEH_begin_ecp_nistz256_select_w5
.rva .LSEH_end_nistz256_select_w5 .rva .LSEH_end_ecp_nistz256_select_w5
.rva .LSEH_info_ecp_nistz256_select_wX .rva .LSEH_info_ecp_nistz256_select_wX
.rva .LSEH_begin_nistz256_select_w7 .rva .LSEH_begin_ecp_nistz256_select_w7
.rva .LSEH_end_nistz256_select_w7 .rva .LSEH_end_ecp_nistz256_select_w7
.rva .LSEH_info_ecp_nistz256_select_wX .rva .LSEH_info_ecp_nistz256_select_wX
___ ___
$code.=<<___ if ($avx>1); $code.=<<___ if ($avx>1);
@ -4004,45 +4004,45 @@ $code.=<<___ if ($avx>1);
.rva .LSEH_info_ecp_nistz256_avx2_select_wX .rva .LSEH_info_ecp_nistz256_avx2_select_wX
___ ___
$code.=<<___; $code.=<<___;
.rva .LSEH_begin_p256_point_double .rva .LSEH_begin_ecp_nistz256_point_double
.rva .LSEH_end_p256_point_double .rva .LSEH_end_ecp_nistz256_point_double
.rva .LSEH_info_p256_point_double .rva .LSEH_info_ecp_nistz256_point_double
.rva .LSEH_begin_p256_point_add .rva .LSEH_begin_ecp_nistz256_point_add
.rva .LSEH_end_p256_point_add .rva .LSEH_end_ecp_nistz256_point_add
.rva .LSEH_info_p256_point_add .rva .LSEH_info_ecp_nistz256_point_add
.rva .LSEH_begin_p256_point_add_affine .rva .LSEH_begin_ecp_nistz256_point_add_affine
.rva .LSEH_end_p256_point_add_affine .rva .LSEH_end_ecp_nistz256_point_add_affine
.rva .LSEH_info_p256_point_add_affine .rva .LSEH_info_ecp_nistz256_point_add_affine
___ ___
$code.=<<___ if ($addx); $code.=<<___ if ($addx);
.rva .LSEH_begin_p256_point_doublex .rva .LSEH_begin_ecp_nistz256_point_doublex
.rva .LSEH_end_p256_point_doublex .rva .LSEH_end_ecp_nistz256_point_doublex
.rva .LSEH_info_p256_point_doublex .rva .LSEH_info_ecp_nistz256_point_doublex
.rva .LSEH_begin_p256_point_addx .rva .LSEH_begin_ecp_nistz256_point_addx
.rva .LSEH_end_p256_point_addx .rva .LSEH_end_ecp_nistz256_point_addx
.rva .LSEH_info_p256_point_addx .rva .LSEH_info_ecp_nistz256_point_addx
.rva .LSEH_begin_p256_point_add_affinex .rva .LSEH_begin_ecp_nistz256_point_add_affinex
.rva .LSEH_end_p256_point_add_affinex .rva .LSEH_end_ecp_nistz256_point_add_affinex
.rva .LSEH_info_p256_point_add_affinex .rva .LSEH_info_ecp_nistz256_point_add_affinex
___ ___
$code.=<<___; $code.=<<___;
.section .xdata .section .xdata
.align 8 .align 8
.LSEH_info_nistz256_neg: .LSEH_info_ecp_nistz256_neg:
.byte 9,0,0,0 .byte 9,0,0,0
.rva short_handler .rva short_handler
.rva .Lneg_body,.Lneg_epilogue # HandlerData[] .rva .Lneg_body,.Lneg_epilogue # HandlerData[]
.LSEH_info_p256_scalar_mul_mont: .LSEH_info_ecp_nistz256_ord_mul_mont:
.byte 9,0,0,0 .byte 9,0,0,0
.rva full_handler .rva full_handler
.rva .Lord_mul_body,.Lord_mul_epilogue # HandlerData[] .rva .Lord_mul_body,.Lord_mul_epilogue # HandlerData[]
.long 48,0 .long 48,0
.LSEH_info_p256_scalar_sqr_rep_mont: .LSEH_info_ecp_nistz256_ord_sqr_mont:
.byte 9,0,0,0 .byte 9,0,0,0
.rva full_handler .rva full_handler
.rva .Lord_sqr_body,.Lord_sqr_epilogue # HandlerData[] .rva .Lord_sqr_body,.Lord_sqr_epilogue # HandlerData[]
@ -4061,12 +4061,12 @@ $code.=<<___ if ($addx);
.long 48,0 .long 48,0
___ ___
$code.=<<___; $code.=<<___;
.LSEH_info_p256_mul_mont: .LSEH_info_ecp_nistz256_mul_mont:
.byte 9,0,0,0 .byte 9,0,0,0
.rva full_handler .rva full_handler
.rva .Lmul_body,.Lmul_epilogue # HandlerData[] .rva .Lmul_body,.Lmul_epilogue # HandlerData[]
.long 48,0 .long 48,0
.LSEH_info_p256_sqr_mont: .LSEH_info_ecp_nistz256_sqr_mont:
.byte 9,0,0,0 .byte 9,0,0,0
.rva full_handler .rva full_handler
.rva .Lsqr_body,.Lsqr_epilogue # HandlerData[] .rva .Lsqr_body,.Lsqr_epilogue # HandlerData[]
@ -4104,17 +4104,17 @@ $code.=<<___ if ($avx>1);
.align 8 .align 8
___ ___
$code.=<<___; $code.=<<___;
.LSEH_info_p256_point_double: .LSEH_info_ecp_nistz256_point_double:
.byte 9,0,0,0 .byte 9,0,0,0
.rva full_handler .rva full_handler
.rva .Lpoint_doubleq_body,.Lpoint_doubleq_epilogue # HandlerData[] .rva .Lpoint_doubleq_body,.Lpoint_doubleq_epilogue # HandlerData[]
.long 32*5+56,0 .long 32*5+56,0
.LSEH_info_p256_point_add: .LSEH_info_ecp_nistz256_point_add:
.byte 9,0,0,0 .byte 9,0,0,0
.rva full_handler .rva full_handler
.rva .Lpoint_addq_body,.Lpoint_addq_epilogue # HandlerData[] .rva .Lpoint_addq_body,.Lpoint_addq_epilogue # HandlerData[]
.long 32*18+56,0 .long 32*18+56,0
.LSEH_info_p256_point_add_affine: .LSEH_info_ecp_nistz256_point_add_affine:
.byte 9,0,0,0 .byte 9,0,0,0
.rva full_handler .rva full_handler
.rva .Ladd_affineq_body,.Ladd_affineq_epilogue # HandlerData[] .rva .Ladd_affineq_body,.Ladd_affineq_epilogue # HandlerData[]
@ -4122,17 +4122,17 @@ $code.=<<___;
___ ___
$code.=<<___ if ($addx); $code.=<<___ if ($addx);
.align 8 .align 8
.LSEH_info_p256_point_doublex: .LSEH_info_ecp_nistz256_point_doublex:
.byte 9,0,0,0 .byte 9,0,0,0
.rva full_handler .rva full_handler
.rva .Lpoint_doublex_body,.Lpoint_doublex_epilogue # HandlerData[] .rva .Lpoint_doublex_body,.Lpoint_doublex_epilogue # HandlerData[]
.long 32*5+56,0 .long 32*5+56,0
.LSEH_info_p256_point_addx: .LSEH_info_ecp_nistz256_point_addx:
.byte 9,0,0,0 .byte 9,0,0,0
.rva full_handler .rva full_handler
.rva .Lpoint_addx_body,.Lpoint_addx_epilogue # HandlerData[] .rva .Lpoint_addx_body,.Lpoint_addx_epilogue # HandlerData[]
.long 32*18+56,0 .long 32*18+56,0
.LSEH_info_p256_point_add_affinex: .LSEH_info_ecp_nistz256_point_add_affinex:
.byte 9,0,0,0 .byte 9,0,0,0
.rva full_handler .rva full_handler
.rva .Ladd_affinex_body,.Ladd_affinex_epilogue # HandlerData[] .rva .Ladd_affinex_body,.Ladd_affinex_epilogue # HandlerData[]

View File

@ -23,8 +23,8 @@ import (
) )
func main() { func main() {
if err := writeP256X86_64Table("p256-x86_64-table.h"); err != nil { if err := writeP256NistzTable("p256-nistz-table.h"); err != nil {
fmt.Fprintf(os.Stderr, "Error writing p256-x86_64-table.h: %s\n", err) fmt.Fprintf(os.Stderr, "Error writing p256-nistz-table.h: %s\n", err)
os.Exit(1) os.Exit(1)
} }
@ -34,7 +34,7 @@ func main() {
} }
} }
func writeP256X86_64Table(path string) error { func writeP256NistzTable(path string) error {
curve := elliptic.P256() curve := elliptic.P256()
tables := make([][][2]*big.Int, 0, 37) tables := make([][][2]*big.Int, 0, 37)
for shift := 0; shift < 256; shift += 7 { for shift := 0; shift < 256; shift += 7 {
@ -59,7 +59,7 @@ func writeP256X86_64Table(path string) error {
*/ */
// This is the precomputed constant time access table for the code in // This is the precomputed constant time access table for the code in
// p256-x86_64.c, for the default generator. The table consists of 37 // p256-nistz.c, for the default generator. The table consists of 37
// subtables, each subtable contains 64 affine points. The affine points are // subtables, each subtable contains 64 affine points. The affine points are
// encoded as eight uint64's, four for the x coordinate and four for the y. // encoded as eight uint64's, four for the x coordinate and four for the y.
// Both values are in little-endian order. There are 37 tables because a // Both values are in little-endian order. There are 37 tables because a

View File

@ -9,7 +9,7 @@
*/ */
// This is the precomputed constant time access table for the code in // This is the precomputed constant time access table for the code in
// p256-x86_64.c, for the default generator. The table consists of 37 // p256-nistz.c, for the default generator. The table consists of 37
// subtables, each subtable contains 64 affine points. The affine points are // subtables, each subtable contains 64 affine points. The affine points are
// encoded as eight uint64's, four for the x coordinate and four for the y. // encoded as eight uint64's, four for the x coordinate and four for the y.
// Both values are in little-endian order. There are 37 tables because a // Both values are in little-endian order. There are 37 tables because a

View File

@ -22,7 +22,7 @@
#include <stdint.h> #include <stdint.h>
#include "p256-x86_64.h" #include "p256-nistz.h"
#if defined(OPENSSL_USE_NISTZ256) #if defined(OPENSSL_USE_NISTZ256)
@ -35,7 +35,7 @@ static const BN_ULONG ONE[P256_LIMBS] = {
}; };
// Precomputed tables for the default generator // Precomputed tables for the default generator
#include "p256-x86_64-table.h" #include "p256-nistz-table.h"
// Recode window to a signed digit, see |nistp_recode_scalar_bits| in // Recode window to a signed digit, see |nistp_recode_scalar_bits| in
// util.c for details // util.c for details
@ -168,7 +168,7 @@ static void ecp_nistz256_windowed_mul(P256_POINT *r,
crypto_word wvalue = p_str[(index - 1) / 8]; crypto_word wvalue = p_str[(index - 1) / 8];
wvalue = (wvalue >> ((index - 1) % 8)) & kMask; wvalue = (wvalue >> ((index - 1) % 8)) & kMask;
ecp_nistz256_select_w5(r, table, booth_recode_w5(wvalue) >> 1); ecp_nistz256_select_w5(r, table, (int)(booth_recode_w5(wvalue) >> 1));
while (index >= 5) { while (index >= 5) {
if (index != 255) { if (index != 255) {
@ -179,7 +179,7 @@ static void ecp_nistz256_windowed_mul(P256_POINT *r,
wvalue = booth_recode_w5(wvalue); wvalue = booth_recode_w5(wvalue);
ecp_nistz256_select_w5(&h, table, wvalue >> 1); ecp_nistz256_select_w5(&h, table, (int)(wvalue >> 1));
ecp_nistz256_neg(tmp, h.Y); ecp_nistz256_neg(tmp, h.Y);
copy_conditional(h.Y, tmp, (wvalue & 1)); copy_conditional(h.Y, tmp, (wvalue & 1));
@ -202,7 +202,7 @@ static void ecp_nistz256_windowed_mul(P256_POINT *r,
wvalue = booth_recode_w5(wvalue); wvalue = booth_recode_w5(wvalue);
ecp_nistz256_select_w5(&h, table, wvalue >> 1); ecp_nistz256_select_w5(&h, table, (int)(wvalue >> 1));
ecp_nistz256_neg(tmp, h.Y); ecp_nistz256_neg(tmp, h.Y);
copy_conditional(h.Y, tmp, wvalue & 1); copy_conditional(h.Y, tmp, wvalue & 1);
@ -258,7 +258,7 @@ void p256_point_mul_base(P256_POINT *r, const Limb scalar[P256_LIMBS]) {
size_t index = 0; size_t index = 0;
crypto_word wvalue = calc_first_wvalue(&index, p_str); crypto_word wvalue = calc_first_wvalue(&index, p_str);
ecp_nistz256_select_w7(&p.a, ecp_nistz256_precomputed[0], wvalue >> 1); ecp_nistz256_select_w7(&p.a, ecp_nistz256_precomputed[0], (int)(wvalue >> 1));
ecp_nistz256_neg(p.p.Z, p.p.Y); ecp_nistz256_neg(p.p.Z, p.p.Y);
copy_conditional(p.p.Y, p.p.Z, wvalue & 1); copy_conditional(p.p.Y, p.p.Z, wvalue & 1);
@ -271,7 +271,7 @@ void p256_point_mul_base(P256_POINT *r, const Limb scalar[P256_LIMBS]) {
for (int i = 1; i < 37; i++) { for (int i = 1; i < 37; i++) {
wvalue = calc_wvalue(&index, p_str); wvalue = calc_wvalue(&index, p_str);
ecp_nistz256_select_w7(&t.a, ecp_nistz256_precomputed[i], wvalue >> 1); ecp_nistz256_select_w7(&t.a, ecp_nistz256_precomputed[i], (int)(wvalue >> 1));
ecp_nistz256_neg(t.p.Z, t.a.Y); ecp_nistz256_neg(t.p.Z, t.a.Y);
copy_conditional(t.a.Y, t.p.Z, wvalue & 1); copy_conditional(t.a.Y, t.p.Z, wvalue & 1);

View File

@ -27,13 +27,6 @@
#if defined(OPENSSL_USE_NISTZ256) #if defined(OPENSSL_USE_NISTZ256)
#define ecp_nistz256_neg nistz256_neg
#define ecp_nistz256_select_w5 nistz256_select_w5
#define ecp_nistz256_select_w7 nistz256_select_w7
#define ecp_nistz256_point_double p256_point_double
#define ecp_nistz256_point_add p256_point_add
#define ecp_nistz256_point_add_affine p256_point_add_affine
// ecp_nistz256_neg sets |res| to -|a| mod P. // ecp_nistz256_neg sets |res| to -|a| mod P.
void ecp_nistz256_neg(BN_ULONG res[P256_LIMBS], const BN_ULONG a[P256_LIMBS]); void ecp_nistz256_neg(BN_ULONG res[P256_LIMBS], const BN_ULONG a[P256_LIMBS]);
@ -82,14 +75,14 @@ typedef struct {
// and all zeros (the point at infinity) if |index| is 0. This is done in // and all zeros (the point at infinity) if |index| is 0. This is done in
// constant time. // constant time.
void ecp_nistz256_select_w5(P256_POINT *val, const P256_POINT in_t[16], void ecp_nistz256_select_w5(P256_POINT *val, const P256_POINT in_t[16],
crypto_word index); int index);
// ecp_nistz256_select_w7 sets |*val| to |in_t[index-1]| if 1 <= |index| <= 64 // ecp_nistz256_select_w7 sets |*val| to |in_t[index-1]| if 1 <= |index| <= 64
// and all zeros (the point at infinity) if |index| is 0. This is done in // and all zeros (the point at infinity) if |index| is 0. This is done in
// constant time. // constant time.
void ecp_nistz256_select_w7(P256_POINT_AFFINE *val, void ecp_nistz256_select_w7(P256_POINT_AFFINE *val,
const P256_POINT_AFFINE in_t[64], const P256_POINT_AFFINE in_t[64],
crypto_word index); int index);
// ecp_nistz256_point_double sets |r| to |a| doubled. // ecp_nistz256_point_double sets |r| to |a| doubled.
void ecp_nistz256_point_double(P256_POINT *r, const P256_POINT *a); void ecp_nistz256_point_double(P256_POINT *r, const P256_POINT *a);

View File

@ -23,7 +23,8 @@
#include "../bn/internal.h" #include "../bn/internal.h"
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && \ #if !defined(OPENSSL_NO_ASM) && \
(defined(OPENSSL_X86_64) || defined(OPENSSL_AARCH64)) && \
!defined(OPENSSL_SMALL) !defined(OPENSSL_SMALL)
# define OPENSSL_USE_NISTZ256 # define OPENSSL_USE_NISTZ256
#endif #endif

View File

@ -584,16 +584,16 @@ mod tests {
}) })
} }
// There is no `nistz256_neg` on other targets. // There is no `ecp_nistz256_neg` on other targets.
#[cfg(target_arch = "x86_64")] #[cfg(target_arch = "x86_64")]
#[test] #[test]
fn p256_elem_neg_test() { fn p256_elem_neg_test() {
prefixed_extern! { prefixed_extern! {
fn nistz256_neg(r: *mut Limb, a: *const Limb); fn ecp_nistz256_neg(r: *mut Limb, a: *const Limb);
} }
elem_neg_test( elem_neg_test(
&p256::COMMON_OPS, &p256::COMMON_OPS,
nistz256_neg, ecp_nistz256_neg,
test_file!("ops/p256_elem_neg_tests.txt"), test_file!("ops/p256_elem_neg_tests.txt"),
); );
} }