Merge BoringSSL '7ce5d41': Select SHA-256 vs SHA-512 explicitly in perlasm.

This commit is contained in:
Brian Smith 2023-09-29 17:25:30 -07:00
commit 2f9969b9aa
2 changed files with 36 additions and 39 deletions

View File

@ -39,21 +39,7 @@
# generated with -mgeneral-regs-only is significantly faster
# and the gap is only 40-90%.
$output=pop;
$flavour=pop;
if ($flavour && $flavour ne "void") {
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
( $xlate="${dir}../../../perlasm/arm-xlate.pl" and -f $xlate) or
die "can't locate arm-xlate.pl";
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
*STDOUT=*OUT;
} else {
open OUT,">$output";
*STDOUT=*OUT;
}
my ($flavour, $output) = @ARGV;
if ($output =~ /sha512-armv8/) {
$BITS=512;
@ -75,6 +61,19 @@ if ($output =~ /sha512-armv8/) {
$reg_t="w";
}
if ($flavour && $flavour ne "void") {
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
( $xlate="${dir}../../../perlasm/arm-xlate.pl" and -f $xlate) or
die "can't locate arm-xlate.pl";
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
*STDOUT=*OUT;
} else {
open OUT,">$output";
*STDOUT=*OUT;
}
$func="sha${BITS}_block_data_order";
($ctx,$inp,$num,$Ktbl)=map("x$_",(0..2,30));

View File

@ -111,30 +111,7 @@
#
# Modified from upstream OpenSSL to remove the XOP code.
$flavour = shift;
$output = shift;
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
( $xlate="${dir}../../../perlasm/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";
# In upstream, this is controlled by shelling out to the compiler to check
# versions, but BoringSSL is intended to be used with pre-generated perlasm
# output, so this isn't useful anyway.
#
# This file also has an AVX2 implementation, controlled by setting $avx to 2.
# For now, we intentionally disable it. While it gives a 13-16% perf boost, the
# CFI annotations are wrong. It allocates stack in a loop and should be
# rewritten to avoid this.
$avx = 1;
$shaext = 1;
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
*STDOUT=*OUT;
my ($flavour, $output) = @ARGV;
if ($output =~ /sha512-x86_64/) {
$func="sha512_block_data_order";
@ -162,6 +139,27 @@ if ($output =~ /sha512-x86_64/) {
$rounds=64;
}
$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
( $xlate="${dir}../../../perlasm/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";
# In upstream, this is controlled by shelling out to the compiler to check
# versions, but BoringSSL is intended to be used with pre-generated perlasm
# output, so this isn't useful anyway.
#
# This file also has an AVX2 implementation, controlled by setting $avx to 2.
# For now, we intentionally disable it. While it gives a 13-16% perf boost, the
# CFI annotations are wrong. It allocates stack in a loop and should be
# rewritten to avoid this.
$avx = 1;
$shaext = 1;
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
*STDOUT=*OUT;
$ctx="%rdi"; # 1st arg, zapped by $a3
$inp="%rsi"; # 2nd arg
$Tbl="%rbp";