Merge BoringSSL through af561c221d3af70bd0aa48024db4f1fcf1988eef.
Merge BoringSSL through af561c221d3af70bd0aa48024db4f1fcf1988eef.
This commit is contained in:
commit
feeca747e2
@ -1,5 +1,5 @@
|
||||
#! /usr/bin/env perl
|
||||
# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
# Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
@ -17,23 +17,31 @@
|
||||
# GHASH for ARMv8 Crypto Extension, 64-bit polynomial multiplication.
|
||||
#
|
||||
# June 2014
|
||||
# Initial version was developed in tight cooperation with Ard Biesheuvel
|
||||
# of Linaro from bits-n-pieces from other assembly modules. Just like
|
||||
# aesv8-armx.pl this module supports both AArch32 and AArch64 execution modes.
|
||||
#
|
||||
# Initial version was developed in tight cooperation with Ard
|
||||
# Biesheuvel of Linaro from bits-n-pieces from other assembly modules.
|
||||
# Just like aesv8-armx.pl this module supports both AArch32 and
|
||||
# AArch64 execution modes.
|
||||
#
|
||||
# July 2014
|
||||
#
|
||||
# Implement 2x aggregated reduction [see ghash-x86.pl for background
|
||||
# information].
|
||||
#
|
||||
# November 2017
|
||||
#
|
||||
# AArch64 register bank to "accommodate" 4x aggregated reduction and
|
||||
# improve performance by 20-70% depending on processor.
|
||||
#
|
||||
# Current performance in cycles per processed byte:
|
||||
#
|
||||
# PMULL[2] 32-bit NEON(*)
|
||||
# Apple A7 0.92 5.62
|
||||
# Cortex-A53 1.01 8.39
|
||||
# Cortex-A57 1.17 7.61
|
||||
# Denver 0.71 6.02
|
||||
# Mongoose 1.10 8.06
|
||||
# Kryo 1.16 8.00
|
||||
# 64-bit PMULL 32-bit PMULL 32-bit NEON(*)
|
||||
# Apple A7 0.58 0.92 5.62
|
||||
# Cortex-A53 0.85 1.01 8.39
|
||||
# Cortex-A57 0.73 1.17 7.61
|
||||
# Denver 0.51 0.65 6.02
|
||||
# Mongoose 0.65 1.10 8.06
|
||||
# Kryo 0.76 1.16 8.00
|
||||
#
|
||||
# (*) presented for reference/comparison purposes;
|
||||
|
||||
@ -62,6 +70,7 @@ my ($t0,$t1,$t2,$xC2,$H,$Hhl,$H2)=map("q$_",(8..14));
|
||||
$code=<<___;
|
||||
#include <ring-core/arm_arch.h>
|
||||
|
||||
#if __ARM_MAX_ARCH__>=7
|
||||
.text
|
||||
___
|
||||
$code.=".arch armv8-a+crypto\n" if ($flavour =~ /64/);
|
||||
@ -129,8 +138,56 @@ gcm_init_clmul:
|
||||
vext.8 $t1,$H2,$H2,#8 @ Karatsuba pre-processing
|
||||
veor $t1,$t1,$H2
|
||||
vext.8 $Hhl,$t0,$t1,#8 @ pack Karatsuba pre-processed
|
||||
vst1.64 {$Hhl-$H2},[x0] @ store Htable[1..2]
|
||||
vst1.64 {$Hhl-$H2},[x0],#32 @ store Htable[1..2]
|
||||
___
|
||||
if ($flavour =~ /64/) {
|
||||
my ($t3,$Yl,$Ym,$Yh) = map("q$_",(4..7));
|
||||
|
||||
$code.=<<___;
|
||||
@ calculate H^3 and H^4
|
||||
vpmull.p64 $Xl,$H, $H2
|
||||
vpmull.p64 $Yl,$H2,$H2
|
||||
vpmull2.p64 $Xh,$H, $H2
|
||||
vpmull2.p64 $Yh,$H2,$H2
|
||||
vpmull.p64 $Xm,$t0,$t1
|
||||
vpmull.p64 $Ym,$t1,$t1
|
||||
|
||||
vext.8 $t0,$Xl,$Xh,#8 @ Karatsuba post-processing
|
||||
vext.8 $t1,$Yl,$Yh,#8
|
||||
veor $t2,$Xl,$Xh
|
||||
veor $Xm,$Xm,$t0
|
||||
veor $t3,$Yl,$Yh
|
||||
veor $Ym,$Ym,$t1
|
||||
veor $Xm,$Xm,$t2
|
||||
vpmull.p64 $t2,$Xl,$xC2 @ 1st phase
|
||||
veor $Ym,$Ym,$t3
|
||||
vpmull.p64 $t3,$Yl,$xC2
|
||||
|
||||
vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result
|
||||
vmov $Yh#lo,$Ym#hi
|
||||
vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl
|
||||
vmov $Ym#hi,$Yl#lo
|
||||
veor $Xl,$Xm,$t2
|
||||
veor $Yl,$Ym,$t3
|
||||
|
||||
vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase
|
||||
vext.8 $t3,$Yl,$Yl,#8
|
||||
vpmull.p64 $Xl,$Xl,$xC2
|
||||
vpmull.p64 $Yl,$Yl,$xC2
|
||||
veor $t2,$t2,$Xh
|
||||
veor $t3,$t3,$Yh
|
||||
veor $H, $Xl,$t2 @ H^3
|
||||
veor $H2,$Yl,$t3 @ H^4
|
||||
|
||||
vext.8 $t0,$H, $H,#8 @ Karatsuba pre-processing
|
||||
vext.8 $t1,$H2,$H2,#8
|
||||
veor $t0,$t0,$H
|
||||
veor $t1,$t1,$H2
|
||||
vext.8 $Hhl,$t0,$t1,#8 @ pack Karatsuba pre-processed
|
||||
vst1.64 {$H-$H2},[x0] @ store Htable[3..5]
|
||||
___
|
||||
}
|
||||
$code.=<<___;
|
||||
ret
|
||||
.size gcm_init_clmul,.-gcm_init_clmul
|
||||
___
|
||||
@ -202,6 +259,10 @@ $code.=<<___;
|
||||
gcm_ghash_clmul:
|
||||
AARCH64_VALID_CALL_TARGET
|
||||
___
|
||||
$code.=<<___ if ($flavour =~ /64/);
|
||||
cmp $len,#64
|
||||
b.hs .Lgcm_ghash_v8_4x
|
||||
___
|
||||
$code.=<<___ if ($flavour !~ /64/);
|
||||
vstmdb sp!,{d8-d15} @ 32-bit ABI says so
|
||||
___
|
||||
@ -349,10 +410,301 @@ $code.=<<___;
|
||||
ret
|
||||
.size gcm_ghash_clmul,.-gcm_ghash_clmul
|
||||
___
|
||||
|
||||
if ($flavour =~ /64/) { # 4x subroutine
|
||||
my ($I0,$j1,$j2,$j3,
|
||||
$I1,$I2,$I3,$H3,$H34,$H4,$Yl,$Ym,$Yh) = map("q$_",(4..7,15..23));
|
||||
|
||||
$code.=<<___;
|
||||
.type gcm_ghash_v8_4x,%function
|
||||
.align 4
|
||||
gcm_ghash_v8_4x:
|
||||
.Lgcm_ghash_v8_4x:
|
||||
vld1.64 {$Xl},[$Xi] @ load [rotated] Xi
|
||||
vld1.64 {$H-$H2},[$Htbl],#48 @ load twisted H, ..., H^2
|
||||
vmov.i8 $xC2,#0xe1
|
||||
vld1.64 {$H3-$H4},[$Htbl] @ load twisted H^3, ..., H^4
|
||||
vshl.u64 $xC2,$xC2,#57 @ compose 0xc2.0 constant
|
||||
|
||||
vld1.64 {$I0-$j3},[$inp],#64
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 $Xl,$Xl
|
||||
vrev64.8 $j1,$j1
|
||||
vrev64.8 $j2,$j2
|
||||
vrev64.8 $j3,$j3
|
||||
vrev64.8 $I0,$I0
|
||||
#endif
|
||||
vext.8 $I3,$j3,$j3,#8
|
||||
vext.8 $I2,$j2,$j2,#8
|
||||
vext.8 $I1,$j1,$j1,#8
|
||||
|
||||
vpmull.p64 $Yl,$H,$I3 @ H·Ii+3
|
||||
veor $j3,$j3,$I3
|
||||
vpmull2.p64 $Yh,$H,$I3
|
||||
vpmull.p64 $Ym,$Hhl,$j3
|
||||
|
||||
vpmull.p64 $t0,$H2,$I2 @ H^2·Ii+2
|
||||
veor $j2,$j2,$I2
|
||||
vpmull2.p64 $I2,$H2,$I2
|
||||
vpmull2.p64 $j2,$Hhl,$j2
|
||||
|
||||
veor $Yl,$Yl,$t0
|
||||
veor $Yh,$Yh,$I2
|
||||
veor $Ym,$Ym,$j2
|
||||
|
||||
vpmull.p64 $j3,$H3,$I1 @ H^3·Ii+1
|
||||
veor $j1,$j1,$I1
|
||||
vpmull2.p64 $I1,$H3,$I1
|
||||
vpmull.p64 $j1,$H34,$j1
|
||||
|
||||
veor $Yl,$Yl,$j3
|
||||
veor $Yh,$Yh,$I1
|
||||
veor $Ym,$Ym,$j1
|
||||
|
||||
subs $len,$len,#128
|
||||
b.lo .Ltail4x
|
||||
|
||||
b .Loop4x
|
||||
|
||||
.align 4
|
||||
.Loop4x:
|
||||
veor $t0,$I0,$Xl
|
||||
vld1.64 {$I0-$j3},[$inp],#64
|
||||
vext.8 $IN,$t0,$t0,#8
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 $j1,$j1
|
||||
vrev64.8 $j2,$j2
|
||||
vrev64.8 $j3,$j3
|
||||
vrev64.8 $I0,$I0
|
||||
#endif
|
||||
|
||||
vpmull.p64 $Xl,$H4,$IN @ H^4·(Xi+Ii)
|
||||
veor $t0,$t0,$IN
|
||||
vpmull2.p64 $Xh,$H4,$IN
|
||||
vext.8 $I3,$j3,$j3,#8
|
||||
vpmull2.p64 $Xm,$H34,$t0
|
||||
|
||||
veor $Xl,$Xl,$Yl
|
||||
veor $Xh,$Xh,$Yh
|
||||
vext.8 $I2,$j2,$j2,#8
|
||||
veor $Xm,$Xm,$Ym
|
||||
vext.8 $I1,$j1,$j1,#8
|
||||
|
||||
vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing
|
||||
veor $t2,$Xl,$Xh
|
||||
vpmull.p64 $Yl,$H,$I3 @ H·Ii+3
|
||||
veor $j3,$j3,$I3
|
||||
veor $Xm,$Xm,$t1
|
||||
vpmull2.p64 $Yh,$H,$I3
|
||||
veor $Xm,$Xm,$t2
|
||||
vpmull.p64 $Ym,$Hhl,$j3
|
||||
|
||||
vpmull.p64 $t2,$Xl,$xC2 @ 1st phase of reduction
|
||||
vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result
|
||||
vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl
|
||||
vpmull.p64 $t0,$H2,$I2 @ H^2·Ii+2
|
||||
veor $j2,$j2,$I2
|
||||
vpmull2.p64 $I2,$H2,$I2
|
||||
veor $Xl,$Xm,$t2
|
||||
vpmull2.p64 $j2,$Hhl,$j2
|
||||
|
||||
veor $Yl,$Yl,$t0
|
||||
veor $Yh,$Yh,$I2
|
||||
veor $Ym,$Ym,$j2
|
||||
|
||||
vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase of reduction
|
||||
vpmull.p64 $Xl,$Xl,$xC2
|
||||
vpmull.p64 $j3,$H3,$I1 @ H^3·Ii+1
|
||||
veor $j1,$j1,$I1
|
||||
veor $t2,$t2,$Xh
|
||||
vpmull2.p64 $I1,$H3,$I1
|
||||
vpmull.p64 $j1,$H34,$j1
|
||||
|
||||
veor $Xl,$Xl,$t2
|
||||
veor $Yl,$Yl,$j3
|
||||
veor $Yh,$Yh,$I1
|
||||
vext.8 $Xl,$Xl,$Xl,#8
|
||||
veor $Ym,$Ym,$j1
|
||||
|
||||
subs $len,$len,#64
|
||||
b.hs .Loop4x
|
||||
|
||||
.Ltail4x:
|
||||
veor $t0,$I0,$Xl
|
||||
vext.8 $IN,$t0,$t0,#8
|
||||
|
||||
vpmull.p64 $Xl,$H4,$IN @ H^4·(Xi+Ii)
|
||||
veor $t0,$t0,$IN
|
||||
vpmull2.p64 $Xh,$H4,$IN
|
||||
vpmull2.p64 $Xm,$H34,$t0
|
||||
|
||||
veor $Xl,$Xl,$Yl
|
||||
veor $Xh,$Xh,$Yh
|
||||
veor $Xm,$Xm,$Ym
|
||||
|
||||
adds $len,$len,#64
|
||||
b.eq .Ldone4x
|
||||
|
||||
cmp $len,#32
|
||||
b.lo .Lone
|
||||
b.eq .Ltwo
|
||||
.Lthree:
|
||||
vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing
|
||||
veor $t2,$Xl,$Xh
|
||||
veor $Xm,$Xm,$t1
|
||||
vld1.64 {$I0-$j2},[$inp]
|
||||
veor $Xm,$Xm,$t2
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 $j1,$j1
|
||||
vrev64.8 $j2,$j2
|
||||
vrev64.8 $I0,$I0
|
||||
#endif
|
||||
|
||||
vpmull.p64 $t2,$Xl,$xC2 @ 1st phase of reduction
|
||||
vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result
|
||||
vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl
|
||||
vext.8 $I2,$j2,$j2,#8
|
||||
vext.8 $I1,$j1,$j1,#8
|
||||
veor $Xl,$Xm,$t2
|
||||
|
||||
vpmull.p64 $Yl,$H,$I2 @ H·Ii+2
|
||||
veor $j2,$j2,$I2
|
||||
|
||||
vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase of reduction
|
||||
vpmull.p64 $Xl,$Xl,$xC2
|
||||
veor $t2,$t2,$Xh
|
||||
vpmull2.p64 $Yh,$H,$I2
|
||||
vpmull.p64 $Ym,$Hhl,$j2
|
||||
veor $Xl,$Xl,$t2
|
||||
vpmull.p64 $j3,$H2,$I1 @ H^2·Ii+1
|
||||
veor $j1,$j1,$I1
|
||||
vext.8 $Xl,$Xl,$Xl,#8
|
||||
|
||||
vpmull2.p64 $I1,$H2,$I1
|
||||
veor $t0,$I0,$Xl
|
||||
vpmull2.p64 $j1,$Hhl,$j1
|
||||
vext.8 $IN,$t0,$t0,#8
|
||||
|
||||
veor $Yl,$Yl,$j3
|
||||
veor $Yh,$Yh,$I1
|
||||
veor $Ym,$Ym,$j1
|
||||
|
||||
vpmull.p64 $Xl,$H3,$IN @ H^3·(Xi+Ii)
|
||||
veor $t0,$t0,$IN
|
||||
vpmull2.p64 $Xh,$H3,$IN
|
||||
vpmull.p64 $Xm,$H34,$t0
|
||||
|
||||
veor $Xl,$Xl,$Yl
|
||||
veor $Xh,$Xh,$Yh
|
||||
veor $Xm,$Xm,$Ym
|
||||
b .Ldone4x
|
||||
|
||||
.align 4
|
||||
.Ltwo:
|
||||
vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing
|
||||
veor $t2,$Xl,$Xh
|
||||
veor $Xm,$Xm,$t1
|
||||
vld1.64 {$I0-$j1},[$inp]
|
||||
veor $Xm,$Xm,$t2
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 $j1,$j1
|
||||
vrev64.8 $I0,$I0
|
||||
#endif
|
||||
|
||||
vpmull.p64 $t2,$Xl,$xC2 @ 1st phase of reduction
|
||||
vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result
|
||||
vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl
|
||||
vext.8 $I1,$j1,$j1,#8
|
||||
veor $Xl,$Xm,$t2
|
||||
|
||||
vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase of reduction
|
||||
vpmull.p64 $Xl,$Xl,$xC2
|
||||
veor $t2,$t2,$Xh
|
||||
veor $Xl,$Xl,$t2
|
||||
vext.8 $Xl,$Xl,$Xl,#8
|
||||
|
||||
vpmull.p64 $Yl,$H,$I1 @ H·Ii+1
|
||||
veor $j1,$j1,$I1
|
||||
|
||||
veor $t0,$I0,$Xl
|
||||
vext.8 $IN,$t0,$t0,#8
|
||||
|
||||
vpmull2.p64 $Yh,$H,$I1
|
||||
vpmull.p64 $Ym,$Hhl,$j1
|
||||
|
||||
vpmull.p64 $Xl,$H2,$IN @ H^2·(Xi+Ii)
|
||||
veor $t0,$t0,$IN
|
||||
vpmull2.p64 $Xh,$H2,$IN
|
||||
vpmull2.p64 $Xm,$Hhl,$t0
|
||||
|
||||
veor $Xl,$Xl,$Yl
|
||||
veor $Xh,$Xh,$Yh
|
||||
veor $Xm,$Xm,$Ym
|
||||
b .Ldone4x
|
||||
|
||||
.align 4
|
||||
.Lone:
|
||||
vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing
|
||||
veor $t2,$Xl,$Xh
|
||||
veor $Xm,$Xm,$t1
|
||||
vld1.64 {$I0},[$inp]
|
||||
veor $Xm,$Xm,$t2
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 $I0,$I0
|
||||
#endif
|
||||
|
||||
vpmull.p64 $t2,$Xl,$xC2 @ 1st phase of reduction
|
||||
vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result
|
||||
vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl
|
||||
veor $Xl,$Xm,$t2
|
||||
|
||||
vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase of reduction
|
||||
vpmull.p64 $Xl,$Xl,$xC2
|
||||
veor $t2,$t2,$Xh
|
||||
veor $Xl,$Xl,$t2
|
||||
vext.8 $Xl,$Xl,$Xl,#8
|
||||
|
||||
veor $t0,$I0,$Xl
|
||||
vext.8 $IN,$t0,$t0,#8
|
||||
|
||||
vpmull.p64 $Xl,$H,$IN
|
||||
veor $t0,$t0,$IN
|
||||
vpmull2.p64 $Xh,$H,$IN
|
||||
vpmull.p64 $Xm,$Hhl,$t0
|
||||
|
||||
.Ldone4x:
|
||||
vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing
|
||||
veor $t2,$Xl,$Xh
|
||||
veor $Xm,$Xm,$t1
|
||||
veor $Xm,$Xm,$t2
|
||||
|
||||
vpmull.p64 $t2,$Xl,$xC2 @ 1st phase of reduction
|
||||
vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result
|
||||
vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl
|
||||
veor $Xl,$Xm,$t2
|
||||
|
||||
vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase of reduction
|
||||
vpmull.p64 $Xl,$Xl,$xC2
|
||||
veor $t2,$t2,$Xh
|
||||
veor $Xl,$Xl,$t2
|
||||
vext.8 $Xl,$Xl,$Xl,#8
|
||||
|
||||
#ifndef __ARMEB__
|
||||
vrev64.8 $Xl,$Xl
|
||||
#endif
|
||||
vst1.64 {$Xl},[$Xi] @ write out Xi
|
||||
|
||||
ret
|
||||
.size gcm_ghash_v8_4x,.-gcm_ghash_v8_4x
|
||||
___
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
$code.=<<___;
|
||||
.asciz "GHASH for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
|
||||
.align 2
|
||||
#endif
|
||||
___
|
||||
|
||||
if ($flavour =~ /64/) { ######## 64-bit code
|
||||
@ -360,7 +712,8 @@ if ($flavour =~ /64/) { ######## 64-bit code
|
||||
my $arg=shift;
|
||||
|
||||
$arg =~ m/q([0-9]+)#(lo|hi),\s*q([0-9]+)#(lo|hi)/o &&
|
||||
sprintf "ins v%d.d[%d],v%d.d[%d]",$1,($2 eq "lo")?0:1,$3,($4 eq "lo")?0:1;
|
||||
sprintf "ins v%d.d[%d],v%d.d[%d]",$1<8?$1:$1+8,($2 eq "lo")?0:1,
|
||||
$3<8?$3:$3+8,($4 eq "lo")?0:1;
|
||||
}
|
||||
foreach(split("\n",$code)) {
|
||||
s/cclr\s+([wx])([^,]+),\s*([a-z]+)/csel $1$2,$1zr,$1$2,$3/o or
|
||||
|
@ -1,5 +1,5 @@
|
||||
#! /usr/bin/env perl
|
||||
# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
# Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
@ -27,6 +27,7 @@
|
||||
# Denver 2.01 10.5 (+26%) 6.70 (+8%)
|
||||
# X-Gene 20.0 (+100%) 12.8 (+300%(***))
|
||||
# Mongoose 2.36 13.0 (+50%) 8.36 (+33%)
|
||||
# Kryo 1.92 17.4 (+30%) 11.2 (+8%)
|
||||
#
|
||||
# (*) Software SHA256 results are of lesser relevance, presented
|
||||
# mostly for informational purposes.
|
||||
@ -35,7 +36,7 @@
|
||||
# on Cortex-A53 (or by 4 cycles per round).
|
||||
# (***) Super-impressive coefficients over gcc-generated code are
|
||||
# indication of some compiler "pathology", most notably code
|
||||
# generated with -mgeneral-regs-only is significanty faster
|
||||
# generated with -mgeneral-regs-only is significantly faster
|
||||
# and the gap is only 40-90%.
|
||||
|
||||
$output=pop;
|
||||
@ -89,7 +90,7 @@ my ($T0,$T1,$T2)=(@X[($i-8)&15],@X[($i-9)&15],@X[($i-10)&15]);
|
||||
$T0=@X[$i+3] if ($i<11);
|
||||
|
||||
$code.=<<___ if ($i<16);
|
||||
#ifndef __ARMEB__
|
||||
#ifndef __AARCH64EB__
|
||||
rev @X[$i],@X[$i] // $i
|
||||
#endif
|
||||
___
|
||||
@ -449,12 +450,15 @@ close SELF;
|
||||
|
||||
foreach(split("\n",$code)) {
|
||||
|
||||
s/\`([^\`]*)\`/eval($1)/geo;
|
||||
s/\`([^\`]*)\`/eval($1)/ge;
|
||||
|
||||
s/\b(sha256\w+)\s+([qv].*)/unsha256($1,$2)/geo;
|
||||
s/\b(sha256\w+)\s+([qv].*)/unsha256($1,$2)/ge;
|
||||
|
||||
s/\.\w?32\b//o and s/\.16b/\.4s/go;
|
||||
m/(ld|st)1[^\[]+\[0\]/o and s/\.4s/\.s/go;
|
||||
s/\bq([0-9]+)\b/v$1.16b/g; # old->new registers
|
||||
|
||||
s/\.[ui]?8(\s)/$1/;
|
||||
s/\.\w?32\b// and s/\.16b/\.4s/g;
|
||||
m/(ld|st)1[^\[]+\[0\]/ and s/\.4s/\.s/g;
|
||||
|
||||
print $_,"\n";
|
||||
}
|
||||
|
@ -1136,7 +1136,7 @@ my $endbranch = sub {
|
||||
########################################################################
|
||||
|
||||
{
|
||||
my $comment = "#";
|
||||
my $comment = "//";
|
||||
$comment = ";" if ($masm || $nasm);
|
||||
print <<___;
|
||||
$comment This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
|
@ -275,7 +275,7 @@ sub ::asciz
|
||||
|
||||
sub ::asm_finish
|
||||
{ &file_end();
|
||||
my $comment = "#";
|
||||
my $comment = "//";
|
||||
$comment = ";" if ($win32);
|
||||
print <<___;
|
||||
$comment This file is generated from a similarly-named Perl script in the BoringSSL
|
||||
|
@ -138,7 +138,7 @@ fn gcm_mul64_nohw(a: u64, b: u64) -> (u64, u64) {
|
||||
}
|
||||
|
||||
pub(super) fn init(xi: [u64; 2]) -> super::u128 {
|
||||
// We implement GHASH in terms of POLYVAL, as described in RFC8452. This
|
||||
// We implement GHASH in terms of POLYVAL, as described in RFC 8452. This
|
||||
// avoids a shift by 1 in the multiplication, needed to account for bit
|
||||
// reversal losing a bit after multiplication, that is,
|
||||
// rev128(X) * rev128(Y) = rev255(X*Y).
|
||||
|
@ -10,6 +10,7 @@ Input = "Sample message for keylen<blocklen"
|
||||
Key = 000102030405060708090A0B0C0D0E0F10111213
|
||||
Output = 4C99FF0CB1B31BD33F8431DBAF4D17FCD356A807
|
||||
|
||||
# This is actually keylen>blocklen, but the NIST test vectors have a misleading input.
|
||||
HMAC = SHA1
|
||||
Input = "Sample message for keylen=blocklen"
|
||||
Key = 000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F202122232425262728292A2B2C2D2E2F303132333435363738393A3B3C3D3E3F404142434445464748494A4B4C4D4E4F505152535455565758595A5B5C5D5E5F60616263
|
||||
@ -25,6 +26,7 @@ Input = "Sample message for keylen<blocklen"
|
||||
Key = 000102030405060708090A0B0C0D0E0F101112131415161718191A1B
|
||||
Output = E3D249A8CFB67EF8B7A169E9A0A599714A2CECBA65999A51BEB8FBBE
|
||||
|
||||
# This is actually keylen>blocklen, but the NIST test vectors have a misleading input.
|
||||
HMAC = SHA224
|
||||
Input = "Sample message for keylen=blocklen"
|
||||
Key = 000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F202122232425262728292A2B2C2D2E2F303132333435363738393A3B3C3D3E3F404142434445464748494A4B4C4D4E4F505152535455565758595A5B5C5D5E5F60616263
|
||||
@ -40,6 +42,7 @@ Input = "Sample message for keylen<blocklen"
|
||||
Key = 000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F
|
||||
Output = A28CF43130EE696A98F14A37678B56BCFCBDD9E5CF69717FECF5480F0EBDF790
|
||||
|
||||
# This is actually keylen>blocklen, but the NIST test vectors have a misleading input.
|
||||
HMAC = SHA256
|
||||
Input = "Sample message for keylen=blocklen"
|
||||
Key = 000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F202122232425262728292A2B2C2D2E2F303132333435363738393A3B3C3D3E3F404142434445464748494A4B4C4D4E4F505152535455565758595A5B5C5D5E5F60616263
|
||||
@ -55,6 +58,7 @@ Input = "Sample message for keylen<blocklen"
|
||||
Key = 000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F202122232425262728292A2B2C2D2E2F
|
||||
Output = 6EB242BDBB582CA17BEBFA481B1E23211464D2B7F8C20B9FF2201637B93646AF5AE9AC316E98DB45D9CAE773675EEED0
|
||||
|
||||
# This is actually keylen>blocklen, but the NIST test vectors have a misleading input.
|
||||
HMAC = SHA384
|
||||
Input = "Sample message for keylen=blocklen"
|
||||
Key = 000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F202122232425262728292A2B2C2D2E2F303132333435363738393A3B3C3D3E3F404142434445464748494A4B4C4D4E4F505152535455565758595A5B5C5D5E5F606162636465666768696A6B6C6D6E6F707172737475767778797A7B7C7D7E7F808182838485868788898A8B8C8D8E8F909192939495969798999A9B9C9D9E9FA0A1A2A3A4A5A6A7A8A9AAABACADAEAFB0B1B2B3B4B5B6B7B8B9BABBBCBDBEBFC0C1C2C3C4C5C6C7
|
||||
@ -70,6 +74,7 @@ Input = "Sample message for keylen<blocklen"
|
||||
Key = 000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F202122232425262728292A2B2C2D2E2F303132333435363738393A3B3C3D3E3F
|
||||
Output = FD44C18BDA0BB0A6CE0E82B031BF2818F6539BD56EC00BDC10A8A2D730B3634DE2545D639B0F2CF710D0692C72A1896F1F211C2B922D1A96C392E07E7EA9FEDC
|
||||
|
||||
# This is actually keylen>blocklen, but the NIST test vectors have a misleading input.
|
||||
HMAC = SHA512
|
||||
Input = "Sample message for keylen=blocklen"
|
||||
Key = 000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F202122232425262728292A2B2C2D2E2F303132333435363738393A3B3C3D3E3F404142434445464748494A4B4C4D4E4F505152535455565758595A5B5C5D5E5F606162636465666768696A6B6C6D6E6F707172737475767778797A7B7C7D7E7F808182838485868788898A8B8C8D8E8F909192939495969798999A9B9C9D9E9FA0A1A2A3A4A5A6A7A8A9AAABACADAEAFB0B1B2B3B4B5B6B7B8B9BABBBCBDBEBFC0C1C2C3C4C5C6C7
|
||||
|
Loading…
x
Reference in New Issue
Block a user