Merge pull request from briansmith/b/merge-boringssl-4

Merge BoringSSL fa3fbda: P-256 assembly optimisations for Aarch64.
This commit is contained in:
Brian Smith 2022-11-03 09:01:20 -07:00 committed by GitHub
commit 383317656b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 1705 additions and 137 deletions

@ -64,6 +64,7 @@ include = [
"crypto/fipsmodule/bn/internal.h",
"crypto/fipsmodule/bn/montgomery.c",
"crypto/fipsmodule/bn/montgomery_inv.c",
"crypto/fipsmodule/ec/asm/p256-armv8-asm.pl",
"crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl",
"crypto/fipsmodule/ec/ecp_nistz.c",
"crypto/fipsmodule/ec/ecp_nistz.h",
@ -72,9 +73,9 @@ include = [
"crypto/fipsmodule/ec/gfp_p256.c",
"crypto/fipsmodule/ec/gfp_p384.c",
"crypto/fipsmodule/ec/p256.c",
"crypto/fipsmodule/ec/p256-x86_64-table.h",
"crypto/fipsmodule/ec/p256-x86_64.c",
"crypto/fipsmodule/ec/p256-x86_64.h",
"crypto/fipsmodule/ec/p256-nistz-table.h",
"crypto/fipsmodule/ec/p256-nistz.c",
"crypto/fipsmodule/ec/p256-nistz.h",
"crypto/fipsmodule/ec/p256_shared.h",
"crypto/fipsmodule/ec/p256_table.h",
"crypto/fipsmodule/ec/util.h",

@ -60,7 +60,6 @@ const RING_SRCS: &[(&[&str], &str)] = &[
(&[X86_64], "crypto/fipsmodule/aes/asm/vpaes-x86_64.pl"),
(&[X86_64], "crypto/fipsmodule/bn/asm/x86_64-mont.pl"),
(&[X86_64], "crypto/fipsmodule/bn/asm/x86_64-mont5.pl"),
(&[X86_64], "crypto/fipsmodule/ec/p256-x86_64.c"),
(&[X86_64], "crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl"),
(&[X86_64], "crypto/fipsmodule/modes/asm/aesni-gcm-x86_64.pl"),
(&[X86_64], "crypto/fipsmodule/modes/asm/ghash-x86_64.pl"),
@ -68,6 +67,8 @@ const RING_SRCS: &[(&[&str], &str)] = &[
(&[X86_64], SHA512_X86_64),
(&[X86_64], "crypto/cipher_extra/asm/chacha20_poly1305_x86_64.pl"),
(&[AARCH64, X86_64], "crypto/fipsmodule/ec/p256-nistz.c"),
(&[AARCH64, ARM], "crypto/fipsmodule/aes/asm/aesv8-armx.pl"),
(&[AARCH64, ARM], "crypto/fipsmodule/modes/asm/ghashv8-armx.pl"),
@ -84,6 +85,7 @@ const RING_SRCS: &[(&[&str], &str)] = &[
(&[AARCH64], "crypto/fipsmodule/aes/asm/vpaes-armv8.pl"),
(&[AARCH64], "crypto/fipsmodule/bn/asm/armv8-mont.pl"),
(&[AARCH64], "crypto/fipsmodule/ec/asm/p256-armv8-asm.pl"),
(&[AARCH64], "crypto/chacha/asm/chacha-armv8.pl"),
(&[AARCH64], "crypto/fipsmodule/modes/asm/ghash-neon-armv8.pl"),
(&[AARCH64], SHA512_ARMV8),
@ -903,6 +905,18 @@ fn generate_prefix_symbols_header(
}
fn prefix_all_symbols(pp: char, prefix_prefix: &str, prefix: &str) -> String {
// Rename some nistz256 assembly functions to match the names of their
// polyfills.
static SYMBOLS_TO_RENAME: &[(&str, &str)] = &[
("ecp_nistz256_point_double", "p256_point_double"),
("ecp_nistz256_point_add", "p256_point_add"),
("ecp_nistz256_point_add_affine", "p256_point_add_affine"),
("ecp_nistz256_ord_mul_mont", "p256_scalar_mul_mont"),
("ecp_nistz256_ord_sqr_mont", "p256_scalar_sqr_rep_mont"),
("ecp_nistz256_mul_mont", "p256_mul_mont"),
("ecp_nistz256_sqr_mont", "p256_sqr_mont"),
];
static SYMBOLS_TO_PREFIX: &[&str] = &[
"CRYPTO_poly1305_finish",
"CRYPTO_poly1305_finish_neon",
@ -961,9 +975,9 @@ fn prefix_all_symbols(pp: char, prefix_prefix: &str, prefix: &str) -> String {
"gcm_init_neon",
"limbs_mul_add_limb",
"little_endian_bytes_from_scalar",
"nistz256_neg",
"nistz256_select_w5",
"nistz256_select_w7",
"ecp_nistz256_neg",
"ecp_nistz256_select_w5",
"ecp_nistz256_select_w7",
"nistz384_point_add",
"nistz384_point_double",
"nistz384_point_mul",
@ -1007,6 +1021,17 @@ fn prefix_all_symbols(pp: char, prefix_prefix: &str, prefix: &str) -> String {
let mut out = String::new();
for (old, new) in SYMBOLS_TO_RENAME {
let line = format!(
"{pp}define {prefix_prefix}{old} {prefix_prefix}{new}\n",
pp = pp,
prefix_prefix = prefix_prefix,
old = old,
new = new
);
out += &line;
}
for symbol in SYMBOLS_TO_PREFIX {
let line = format!(
"{pp}define {prefix_prefix}{symbol} {prefix_prefix}{prefix}{symbol}\n",

File diff suppressed because it is too large Load Diff

@ -90,11 +90,11 @@ my ($r_ptr,$a_ptr,$b_ptr)=("%rdi","%rsi","%rdx");
$code.=<<___;
################################################################################
# void nistz256_neg(uint64_t res[4], uint64_t a[4]);
.globl nistz256_neg
.type nistz256_neg,\@function,2
# void ecp_nistz256_neg(uint64_t res[4], uint64_t a[4]);
.globl ecp_nistz256_neg
.type ecp_nistz256_neg,\@function,2
.align 32
nistz256_neg:
ecp_nistz256_neg:
.cfi_startproc
push %r12
.cfi_push %r12
@ -143,7 +143,7 @@ nistz256_neg:
.Lneg_epilogue:
ret
.cfi_endproc
.size nistz256_neg,.-nistz256_neg
.size ecp_nistz256_neg,.-ecp_nistz256_neg
___
}
{
@ -154,15 +154,15 @@ my ($poly1,$poly3)=($acc6,$acc7);
$code.=<<___;
################################################################################
# void p256_scalar_mul_mont(
# void ecp_nistz256_ord_mul_mont(
# uint64_t res[4],
# uint64_t a[4],
# uint64_t b[4]);
.globl p256_scalar_mul_mont
.type p256_scalar_mul_mont,\@function,3
.globl ecp_nistz256_ord_mul_mont
.type ecp_nistz256_ord_mul_mont,\@function,3
.align 32
p256_scalar_mul_mont:
ecp_nistz256_ord_mul_mont:
.cfi_startproc
___
$code.=<<___ if ($addx);
@ -482,18 +482,18 @@ $code.=<<___;
.Lord_mul_epilogue:
ret
.cfi_endproc
.size p256_scalar_mul_mont,.-p256_scalar_mul_mont
.size ecp_nistz256_ord_mul_mont,.-ecp_nistz256_ord_mul_mont
################################################################################
# void p256_scalar_sqr_rep_mont(
# void ecp_nistz256_ord_sqr_mont(
# uint64_t res[4],
# uint64_t a[4],
# uint64_t rep);
.globl p256_scalar_sqr_rep_mont
.type p256_scalar_sqr_rep_mont,\@function,3
.globl ecp_nistz256_ord_sqr_mont
.type ecp_nistz256_ord_sqr_mont,\@function,3
.align 32
p256_scalar_sqr_rep_mont:
ecp_nistz256_ord_sqr_mont:
.cfi_startproc
___
$code.=<<___ if ($addx);
@ -783,7 +783,7 @@ $code.=<<___;
.Lord_sqr_epilogue:
ret
.cfi_endproc
.size p256_scalar_sqr_rep_mont,.-p256_scalar_sqr_rep_mont
.size ecp_nistz256_ord_sqr_mont,.-ecp_nistz256_ord_sqr_mont
___
$code.=<<___ if ($addx);
@ -1235,15 +1235,15 @@ ___
$code.=<<___;
################################################################################
# void p256_mul_mont(
# void ecp_nistz256_mul_mont(
# uint64_t res[4],
# uint64_t a[4],
# uint64_t b[4]);
.globl p256_mul_mont
.type p256_mul_mont,\@function,3
.globl ecp_nistz256_mul_mont
.type ecp_nistz256_mul_mont,\@function,3
.align 32
p256_mul_mont:
ecp_nistz256_mul_mont:
.cfi_startproc
___
$code.=<<___ if ($addx);
@ -1315,7 +1315,7 @@ $code.=<<___;
.Lmul_epilogue:
ret
.cfi_endproc
.size p256_mul_mont,.-p256_mul_mont
.size ecp_nistz256_mul_mont,.-ecp_nistz256_mul_mont
.type __ecp_nistz256_mul_montq,\@abi-omnipotent
.align 32
@ -1536,16 +1536,16 @@ __ecp_nistz256_mul_montq:
.size __ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq
################################################################################
# void p256_sqr_mont(
# void ecp_nistz256_sqr_mont(
# uint64_t res[4],
# uint64_t a[4]);
# we optimize the square according to S.Gueron and V.Krasnov,
# "Speeding up Big-Number Squaring"
.globl p256_sqr_mont
.type p256_sqr_mont,\@function,2
.globl ecp_nistz256_sqr_mont
.type ecp_nistz256_sqr_mont,\@function,2
.align 32
p256_sqr_mont:
ecp_nistz256_sqr_mont:
.cfi_startproc
___
$code.=<<___ if ($addx);
@ -1612,7 +1612,7 @@ $code.=<<___;
.Lsqr_epilogue:
ret
.cfi_endproc
.size p256_sqr_mont,.-p256_sqr_mont
.size ecp_nistz256_sqr_mont,.-ecp_nistz256_sqr_mont
.type __ecp_nistz256_sqr_montq,\@abi-omnipotent
.align 32
@ -2090,11 +2090,11 @@ my ($M1,$T2a,$T2b,$TMP2,$M2,$T2a,$T2b,$TMP2)=map("%xmm$_",(8..15));
$code.=<<___;
################################################################################
# void nistz256_select_w5(uint64_t *val, uint64_t *in_t, crypto_word index);
.globl nistz256_select_w5
.type nistz256_select_w5,\@abi-omnipotent
# void ecp_nistz256_select_w5(uint64_t *val, uint64_t *in_t, int index);
.globl ecp_nistz256_select_w5
.type ecp_nistz256_select_w5,\@abi-omnipotent
.align 32
nistz256_select_w5:
ecp_nistz256_select_w5:
.cfi_startproc
___
$code.=<<___ if ($avx>1);
@ -2105,7 +2105,7 @@ $code.=<<___ if ($avx>1);
___
$code.=<<___ if ($win64);
lea -0x88(%rsp), %rax
.LSEH_begin_nistz256_select_w5:
.LSEH_begin_ecp_nistz256_select_w5:
.byte 0x48,0x8d,0x60,0xe0 #lea -0x20(%rax), %rsp
.byte 0x0f,0x29,0x70,0xe0 #movaps %xmm6, -0x20(%rax)
.byte 0x0f,0x29,0x78,0xf0 #movaps %xmm7, -0x10(%rax)
@ -2186,15 +2186,15 @@ ___
$code.=<<___;
ret
.cfi_endproc
.LSEH_end_nistz256_select_w5:
.size nistz256_select_w5,.-nistz256_select_w5
.LSEH_end_ecp_nistz256_select_w5:
.size ecp_nistz256_select_w5,.-ecp_nistz256_select_w5
################################################################################
# void nistz256_select_w7(uint64_t *val, uint64_t *in_t, crypto_word index);
.globl nistz256_select_w7
.type nistz256_select_w7,\@abi-omnipotent
# void ecp_nistz256_select_w7(uint64_t *val, uint64_t *in_t, int index);
.globl ecp_nistz256_select_w7
.type ecp_nistz256_select_w7,\@abi-omnipotent
.align 32
nistz256_select_w7:
ecp_nistz256_select_w7:
.cfi_startproc
___
$code.=<<___ if ($avx>1);
@ -2205,7 +2205,7 @@ $code.=<<___ if ($avx>1);
___
$code.=<<___ if ($win64);
lea -0x88(%rsp), %rax
.LSEH_begin_nistz256_select_w7:
.LSEH_begin_ecp_nistz256_select_w7:
.byte 0x48,0x8d,0x60,0xe0 #lea -0x20(%rax), %rsp
.byte 0x0f,0x29,0x70,0xe0 #movaps %xmm6, -0x20(%rax)
.byte 0x0f,0x29,0x78,0xf0 #movaps %xmm7, -0x10(%rax)
@ -2275,8 +2275,8 @@ ___
$code.=<<___;
ret
.cfi_endproc
.LSEH_end_nistz256_select_w7:
.size nistz256_select_w7,.-nistz256_select_w7
.LSEH_end_ecp_nistz256_select_w7:
.size ecp_nistz256_select_w7,.-ecp_nistz256_select_w7
___
}
if ($avx>1) {
@ -2700,10 +2700,10 @@ sub gen_double () {
$bias = 0;
$code.=<<___;
.globl p256_point_double
.type p256_point_double,\@function,2
.globl ecp_nistz256_point_double
.type ecp_nistz256_point_double,\@function,2
.align 32
p256_point_double:
ecp_nistz256_point_double:
.cfi_startproc
___
$code.=<<___ if ($addx);
@ -2719,9 +2719,9 @@ ___
$bias = 128;
$code.=<<___;
.type p256_point_doublex,\@function,2
.type ecp_nistz256_point_doublex,\@function,2
.align 32
p256_point_doublex:
ecp_nistz256_point_doublex:
.cfi_startproc
.Lpoint_doublex:
___
@ -2931,7 +2931,7 @@ $code.=<<___;
.Lpoint_double${x}_epilogue:
ret
.cfi_endproc
.size p256_point_double$sfx,.-p256_point_double$sfx
.size ecp_nistz256_point_double$sfx,.-ecp_nistz256_point_double$sfx
___
}
&gen_double("q");
@ -2952,10 +2952,10 @@ sub gen_add () {
$bias = 0;
$code.=<<___;
.globl p256_point_add
.type p256_point_add,\@function,3
.globl ecp_nistz256_point_add
.type ecp_nistz256_point_add,\@function,3
.align 32
p256_point_add:
ecp_nistz256_point_add:
.cfi_startproc
___
$code.=<<___ if ($addx);
@ -2971,9 +2971,9 @@ ___
$bias = 128;
$code.=<<___;
.type p256_point_addx,\@function,3
.type ecp_nistz256_point_addx,\@function,3
.align 32
p256_point_addx:
ecp_nistz256_point_addx:
.cfi_startproc
.Lpoint_addx:
___
@ -3330,7 +3330,7 @@ $code.=<<___;
.Lpoint_add${x}_epilogue:
ret
.cfi_endproc
.size p256_point_add$sfx,.-p256_point_add$sfx
.size ecp_nistz256_point_add$sfx,.-ecp_nistz256_point_add$sfx
___
}
&gen_add("q");
@ -3350,10 +3350,10 @@ sub gen_add_affine () {
$bias = 0;
$code.=<<___;
.globl p256_point_add_affine
.type p256_point_add_affine,\@function,3
.globl ecp_nistz256_point_add_affine
.type ecp_nistz256_point_add_affine,\@function,3
.align 32
p256_point_add_affine:
ecp_nistz256_point_add_affine:
.cfi_startproc
___
$code.=<<___ if ($addx);
@ -3369,9 +3369,9 @@ ___
$bias = 128;
$code.=<<___;
.type p256_point_add_affinex,\@function,3
.type ecp_nistz256_point_add_affinex,\@function,3
.align 32
p256_point_add_affinex:
ecp_nistz256_point_add_affinex:
.cfi_startproc
.Lpoint_add_affinex:
___
@ -3655,7 +3655,7 @@ $code.=<<___;
.Ladd_affine${x}_epilogue:
ret
.cfi_endproc
.size p256_point_add_affine$sfx,.-p256_point_add_affine$sfx
.size ecp_nistz256_point_add_affine$sfx,.-ecp_nistz256_point_add_affine$sfx
___
}
&gen_add_affine("q");
@ -3956,17 +3956,17 @@ full_handler:
.section .pdata
.align 4
.rva .LSEH_begin_nistz256_neg
.rva .LSEH_end_nistz256_neg
.rva .LSEH_info_nistz256_neg
.rva .LSEH_begin_ecp_nistz256_neg
.rva .LSEH_end_ecp_nistz256_neg
.rva .LSEH_info_ecp_nistz256_neg
.rva .LSEH_begin_p256_scalar_mul_mont
.rva .LSEH_end_p256_scalar_mul_mont
.rva .LSEH_info_p256_scalar_mul_mont
.rva .LSEH_begin_ecp_nistz256_ord_mul_mont
.rva .LSEH_end_ecp_nistz256_ord_mul_mont
.rva .LSEH_info_ecp_nistz256_ord_mul_mont
.rva .LSEH_begin_p256_scalar_sqr_rep_mont
.rva .LSEH_end_p256_scalar_sqr_rep_mont
.rva .LSEH_info_p256_scalar_sqr_rep_mont
.rva .LSEH_begin_ecp_nistz256_ord_sqr_mont
.rva .LSEH_end_ecp_nistz256_ord_sqr_mont
.rva .LSEH_info_ecp_nistz256_ord_sqr_mont
___
$code.=<<___ if ($addx);
.rva .LSEH_begin_ecp_nistz256_ord_mul_montx
@ -3978,20 +3978,20 @@ $code.=<<___ if ($addx);
.rva .LSEH_info_ecp_nistz256_ord_sqr_montx
___
$code.=<<___;
.rva .LSEH_begin_p256_mul_mont
.rva .LSEH_end_p256_mul_mont
.rva .LSEH_info_p256_mul_mont
.rva .LSEH_begin_ecp_nistz256_mul_mont
.rva .LSEH_end_ecp_nistz256_mul_mont
.rva .LSEH_info_ecp_nistz256_mul_mont
.rva .LSEH_begin_p256_sqr_mont
.rva .LSEH_end_p256_sqr_mont
.rva .LSEH_info_p256_sqr_mont
.rva .LSEH_begin_ecp_nistz256_sqr_mont
.rva .LSEH_end_ecp_nistz256_sqr_mont
.rva .LSEH_info_ecp_nistz256_sqr_mont
.rva .LSEH_begin_nistz256_select_w5
.rva .LSEH_end_nistz256_select_w5
.rva .LSEH_begin_ecp_nistz256_select_w5
.rva .LSEH_end_ecp_nistz256_select_w5
.rva .LSEH_info_ecp_nistz256_select_wX
.rva .LSEH_begin_nistz256_select_w7
.rva .LSEH_end_nistz256_select_w7
.rva .LSEH_begin_ecp_nistz256_select_w7
.rva .LSEH_end_ecp_nistz256_select_w7
.rva .LSEH_info_ecp_nistz256_select_wX
___
$code.=<<___ if ($avx>1);
@ -4004,45 +4004,45 @@ $code.=<<___ if ($avx>1);
.rva .LSEH_info_ecp_nistz256_avx2_select_wX
___
$code.=<<___;
.rva .LSEH_begin_p256_point_double
.rva .LSEH_end_p256_point_double
.rva .LSEH_info_p256_point_double
.rva .LSEH_begin_ecp_nistz256_point_double
.rva .LSEH_end_ecp_nistz256_point_double
.rva .LSEH_info_ecp_nistz256_point_double
.rva .LSEH_begin_p256_point_add
.rva .LSEH_end_p256_point_add
.rva .LSEH_info_p256_point_add
.rva .LSEH_begin_ecp_nistz256_point_add
.rva .LSEH_end_ecp_nistz256_point_add
.rva .LSEH_info_ecp_nistz256_point_add
.rva .LSEH_begin_p256_point_add_affine
.rva .LSEH_end_p256_point_add_affine
.rva .LSEH_info_p256_point_add_affine
.rva .LSEH_begin_ecp_nistz256_point_add_affine
.rva .LSEH_end_ecp_nistz256_point_add_affine
.rva .LSEH_info_ecp_nistz256_point_add_affine
___
$code.=<<___ if ($addx);
.rva .LSEH_begin_p256_point_doublex
.rva .LSEH_end_p256_point_doublex
.rva .LSEH_info_p256_point_doublex
.rva .LSEH_begin_ecp_nistz256_point_doublex
.rva .LSEH_end_ecp_nistz256_point_doublex
.rva .LSEH_info_ecp_nistz256_point_doublex
.rva .LSEH_begin_p256_point_addx
.rva .LSEH_end_p256_point_addx
.rva .LSEH_info_p256_point_addx
.rva .LSEH_begin_ecp_nistz256_point_addx
.rva .LSEH_end_ecp_nistz256_point_addx
.rva .LSEH_info_ecp_nistz256_point_addx
.rva .LSEH_begin_p256_point_add_affinex
.rva .LSEH_end_p256_point_add_affinex
.rva .LSEH_info_p256_point_add_affinex
.rva .LSEH_begin_ecp_nistz256_point_add_affinex
.rva .LSEH_end_ecp_nistz256_point_add_affinex
.rva .LSEH_info_ecp_nistz256_point_add_affinex
___
$code.=<<___;
.section .xdata
.align 8
.LSEH_info_nistz256_neg:
.LSEH_info_ecp_nistz256_neg:
.byte 9,0,0,0
.rva short_handler
.rva .Lneg_body,.Lneg_epilogue # HandlerData[]
.LSEH_info_p256_scalar_mul_mont:
.LSEH_info_ecp_nistz256_ord_mul_mont:
.byte 9,0,0,0
.rva full_handler
.rva .Lord_mul_body,.Lord_mul_epilogue # HandlerData[]
.long 48,0
.LSEH_info_p256_scalar_sqr_rep_mont:
.LSEH_info_ecp_nistz256_ord_sqr_mont:
.byte 9,0,0,0
.rva full_handler
.rva .Lord_sqr_body,.Lord_sqr_epilogue # HandlerData[]
@ -4061,12 +4061,12 @@ $code.=<<___ if ($addx);
.long 48,0
___
$code.=<<___;
.LSEH_info_p256_mul_mont:
.LSEH_info_ecp_nistz256_mul_mont:
.byte 9,0,0,0
.rva full_handler
.rva .Lmul_body,.Lmul_epilogue # HandlerData[]
.long 48,0
.LSEH_info_p256_sqr_mont:
.LSEH_info_ecp_nistz256_sqr_mont:
.byte 9,0,0,0
.rva full_handler
.rva .Lsqr_body,.Lsqr_epilogue # HandlerData[]
@ -4104,17 +4104,17 @@ $code.=<<___ if ($avx>1);
.align 8
___
$code.=<<___;
.LSEH_info_p256_point_double:
.LSEH_info_ecp_nistz256_point_double:
.byte 9,0,0,0
.rva full_handler
.rva .Lpoint_doubleq_body,.Lpoint_doubleq_epilogue # HandlerData[]
.long 32*5+56,0
.LSEH_info_p256_point_add:
.LSEH_info_ecp_nistz256_point_add:
.byte 9,0,0,0
.rva full_handler
.rva .Lpoint_addq_body,.Lpoint_addq_epilogue # HandlerData[]
.long 32*18+56,0
.LSEH_info_p256_point_add_affine:
.LSEH_info_ecp_nistz256_point_add_affine:
.byte 9,0,0,0
.rva full_handler
.rva .Ladd_affineq_body,.Ladd_affineq_epilogue # HandlerData[]
@ -4122,17 +4122,17 @@ $code.=<<___;
___
$code.=<<___ if ($addx);
.align 8
.LSEH_info_p256_point_doublex:
.LSEH_info_ecp_nistz256_point_doublex:
.byte 9,0,0,0
.rva full_handler
.rva .Lpoint_doublex_body,.Lpoint_doublex_epilogue # HandlerData[]
.long 32*5+56,0
.LSEH_info_p256_point_addx:
.LSEH_info_ecp_nistz256_point_addx:
.byte 9,0,0,0
.rva full_handler
.rva .Lpoint_addx_body,.Lpoint_addx_epilogue # HandlerData[]
.long 32*18+56,0
.LSEH_info_p256_point_add_affinex:
.LSEH_info_ecp_nistz256_point_add_affinex:
.byte 9,0,0,0
.rva full_handler
.rva .Ladd_affinex_body,.Ladd_affinex_epilogue # HandlerData[]

@ -23,8 +23,8 @@ import (
)
func main() {
if err := writeP256X86_64Table("p256-x86_64-table.h"); err != nil {
fmt.Fprintf(os.Stderr, "Error writing p256-x86_64-table.h: %s\n", err)
if err := writeP256NistzTable("p256-nistz-table.h"); err != nil {
fmt.Fprintf(os.Stderr, "Error writing p256-nistz-table.h: %s\n", err)
os.Exit(1)
}
@ -34,7 +34,7 @@ func main() {
}
}
func writeP256X86_64Table(path string) error {
func writeP256NistzTable(path string) error {
curve := elliptic.P256()
tables := make([][][2]*big.Int, 0, 37)
for shift := 0; shift < 256; shift += 7 {
@ -59,7 +59,7 @@ func writeP256X86_64Table(path string) error {
*/
// This is the precomputed constant time access table for the code in
// p256-x86_64.c, for the default generator. The table consists of 37
// p256-nistz.c, for the default generator. The table consists of 37
// subtables, each subtable contains 64 affine points. The affine points are
// encoded as eight uint64's, four for the x coordinate and four for the y.
// Both values are in little-endian order. There are 37 tables because a

@ -9,7 +9,7 @@
*/
// This is the precomputed constant time access table for the code in
// p256-x86_64.c, for the default generator. The table consists of 37
// p256-nistz.c, for the default generator. The table consists of 37
// subtables, each subtable contains 64 affine points. The affine points are
// encoded as eight uint64's, four for the x coordinate and four for the y.
// Both values are in little-endian order. There are 37 tables because a

@ -22,7 +22,7 @@
#include <stdint.h>
#include "p256-x86_64.h"
#include "p256-nistz.h"
#if defined(OPENSSL_USE_NISTZ256)
@ -35,7 +35,7 @@ static const BN_ULONG ONE[P256_LIMBS] = {
};
// Precomputed tables for the default generator
#include "p256-x86_64-table.h"
#include "p256-nistz-table.h"
// Recode window to a signed digit, see |nistp_recode_scalar_bits| in
// util.c for details
@ -168,7 +168,7 @@ static void ecp_nistz256_windowed_mul(P256_POINT *r,
crypto_word wvalue = p_str[(index - 1) / 8];
wvalue = (wvalue >> ((index - 1) % 8)) & kMask;
ecp_nistz256_select_w5(r, table, booth_recode_w5(wvalue) >> 1);
ecp_nistz256_select_w5(r, table, (int)(booth_recode_w5(wvalue) >> 1));
while (index >= 5) {
if (index != 255) {
@ -179,7 +179,7 @@ static void ecp_nistz256_windowed_mul(P256_POINT *r,
wvalue = booth_recode_w5(wvalue);
ecp_nistz256_select_w5(&h, table, wvalue >> 1);
ecp_nistz256_select_w5(&h, table, (int)(wvalue >> 1));
ecp_nistz256_neg(tmp, h.Y);
copy_conditional(h.Y, tmp, (wvalue & 1));
@ -202,7 +202,7 @@ static void ecp_nistz256_windowed_mul(P256_POINT *r,
wvalue = booth_recode_w5(wvalue);
ecp_nistz256_select_w5(&h, table, wvalue >> 1);
ecp_nistz256_select_w5(&h, table, (int)(wvalue >> 1));
ecp_nistz256_neg(tmp, h.Y);
copy_conditional(h.Y, tmp, wvalue & 1);
@ -258,7 +258,7 @@ void p256_point_mul_base(P256_POINT *r, const Limb scalar[P256_LIMBS]) {
size_t index = 0;
crypto_word wvalue = calc_first_wvalue(&index, p_str);
ecp_nistz256_select_w7(&p.a, ecp_nistz256_precomputed[0], wvalue >> 1);
ecp_nistz256_select_w7(&p.a, ecp_nistz256_precomputed[0], (int)(wvalue >> 1));
ecp_nistz256_neg(p.p.Z, p.p.Y);
copy_conditional(p.p.Y, p.p.Z, wvalue & 1);
@ -271,7 +271,7 @@ void p256_point_mul_base(P256_POINT *r, const Limb scalar[P256_LIMBS]) {
for (int i = 1; i < 37; i++) {
wvalue = calc_wvalue(&index, p_str);
ecp_nistz256_select_w7(&t.a, ecp_nistz256_precomputed[i], wvalue >> 1);
ecp_nistz256_select_w7(&t.a, ecp_nistz256_precomputed[i], (int)(wvalue >> 1));
ecp_nistz256_neg(t.p.Z, t.a.Y);
copy_conditional(t.a.Y, t.p.Z, wvalue & 1);

@ -27,13 +27,6 @@
#if defined(OPENSSL_USE_NISTZ256)
#define ecp_nistz256_neg nistz256_neg
#define ecp_nistz256_select_w5 nistz256_select_w5
#define ecp_nistz256_select_w7 nistz256_select_w7
#define ecp_nistz256_point_double p256_point_double
#define ecp_nistz256_point_add p256_point_add
#define ecp_nistz256_point_add_affine p256_point_add_affine
// ecp_nistz256_neg sets |res| to -|a| mod P.
void ecp_nistz256_neg(BN_ULONG res[P256_LIMBS], const BN_ULONG a[P256_LIMBS]);
@ -82,14 +75,14 @@ typedef struct {
// and all zeros (the point at infinity) if |index| is 0. This is done in
// constant time.
void ecp_nistz256_select_w5(P256_POINT *val, const P256_POINT in_t[16],
crypto_word index);
int index);
// ecp_nistz256_select_w7 sets |*val| to |in_t[index-1]| if 1 <= |index| <= 64
// and all zeros (the point at infinity) if |index| is 0. This is done in
// constant time.
void ecp_nistz256_select_w7(P256_POINT_AFFINE *val,
const P256_POINT_AFFINE in_t[64],
crypto_word index);
int index);
// ecp_nistz256_point_double sets |r| to |a| doubled.
void ecp_nistz256_point_double(P256_POINT *r, const P256_POINT *a);

@ -23,7 +23,8 @@
#include "../bn/internal.h"
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && \
#if !defined(OPENSSL_NO_ASM) && \
(defined(OPENSSL_X86_64) || defined(OPENSSL_AARCH64)) && \
!defined(OPENSSL_SMALL)
# define OPENSSL_USE_NISTZ256
#endif

@ -584,16 +584,16 @@ mod tests {
})
}
// There is no `nistz256_neg` on other targets.
// There is no `ecp_nistz256_neg` on other targets.
#[cfg(target_arch = "x86_64")]
#[test]
fn p256_elem_neg_test() {
prefixed_extern! {
fn nistz256_neg(r: *mut Limb, a: *const Limb);
fn ecp_nistz256_neg(r: *mut Limb, a: *const Limb);
}
elem_neg_test(
&p256::COMMON_OPS,
nistz256_neg,
ecp_nistz256_neg,
test_file!("ops/p256_elem_neg_tests.txt"),
);
}